1 // Copyright © 2024 Institute of Software, CAS. All rights reserved. 2 // 3 // Copyright © 2019 Intel Corporation 4 // 5 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause 6 // 7 // Copyright © 2020, Microsoft Corporation 8 // 9 // Copyright 2018-2019 CrowdStrike, Inc. 10 // 11 // 12 13 use std::any::Any; 14 use std::collections::HashMap; 15 #[cfg(target_arch = "x86_64")] 16 use std::fs::File; 17 #[cfg(target_arch = "x86_64")] 18 use std::os::unix::io::AsRawFd; 19 #[cfg(feature = "tdx")] 20 use std::os::unix::io::RawFd; 21 use std::result; 22 #[cfg(target_arch = "x86_64")] 23 use std::sync::atomic::{AtomicBool, Ordering}; 24 use std::sync::{Arc, Mutex, RwLock}; 25 26 use kvm_ioctls::{NoDatamatch, VcpuFd, VmFd}; 27 use vmm_sys_util::eventfd::EventFd; 28 29 #[cfg(target_arch = "aarch64")] 30 use crate::aarch64::gic::KvmGicV3Its; 31 #[cfg(target_arch = "aarch64")] 32 pub use crate::aarch64::{ 33 check_required_kvm_extensions, gic::Gicv3ItsState as GicState, is_system_register, VcpuInit, 34 VcpuKvmState, 35 }; 36 #[cfg(target_arch = "aarch64")] 37 use crate::arch::aarch64::gic::{Vgic, VgicConfig}; 38 #[cfg(target_arch = "riscv64")] 39 use crate::arch::riscv64::aia::{Vaia, VaiaConfig}; 40 #[cfg(target_arch = "riscv64")] 41 use crate::riscv64::aia::KvmAiaImsics; 42 #[cfg(target_arch = "riscv64")] 43 pub use crate::riscv64::{ 44 aia::AiaImsicsState as AiaState, check_required_kvm_extensions, is_non_core_register, 45 VcpuKvmState, 46 }; 47 use crate::vm::{self, InterruptSourceConfig, VmOps}; 48 #[cfg(target_arch = "aarch64")] 49 use crate::{arm64_core_reg_id, offset_of}; 50 use crate::{cpu, hypervisor, vec_with_array_field, HypervisorType}; 51 #[cfg(target_arch = "riscv64")] 52 use crate::{offset_of, riscv64_reg_id}; 53 // x86_64 dependencies 54 #[cfg(target_arch = "x86_64")] 55 pub mod x86_64; 56 #[cfg(target_arch = "aarch64")] 57 use aarch64::{RegList, Register}; 58 #[cfg(target_arch = "x86_64")] 59 use kvm_bindings::{ 60 kvm_enable_cap, kvm_msr_entry, MsrList, KVM_CAP_HYPERV_SYNIC, KVM_CAP_SPLIT_IRQCHIP, 61 KVM_GUESTDBG_USE_HW_BP, 62 }; 63 #[cfg(target_arch = "riscv64")] 64 use riscv64::{RegList, Register}; 65 #[cfg(target_arch = "x86_64")] 66 use x86_64::check_required_kvm_extensions; 67 #[cfg(target_arch = "x86_64")] 68 pub use x86_64::{CpuId, ExtendedControlRegisters, MsrEntries, VcpuKvmState}; 69 70 #[cfg(target_arch = "x86_64")] 71 use crate::arch::x86::{ 72 CpuIdEntry, FpuState, LapicState, MsrEntry, SpecialRegisters, XsaveState, NUM_IOAPIC_PINS, 73 }; 74 #[cfg(target_arch = "x86_64")] 75 use crate::ClockData; 76 use crate::{ 77 CpuState, IoEventAddress, IrqRoutingEntry, MpState, StandardRegisters, UserMemoryRegion, 78 USER_MEMORY_REGION_LOG_DIRTY, USER_MEMORY_REGION_READ, USER_MEMORY_REGION_WRITE, 79 }; 80 // aarch64 dependencies 81 #[cfg(target_arch = "aarch64")] 82 pub mod aarch64; 83 // riscv64 dependencies 84 #[cfg(target_arch = "riscv64")] 85 pub mod riscv64; 86 #[cfg(target_arch = "aarch64")] 87 use std::mem; 88 89 /// 90 /// Export generically-named wrappers of kvm-bindings for Unix-based platforms 91 /// 92 #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] 93 pub use kvm_bindings::kvm_vcpu_events as VcpuEvents; 94 pub use kvm_bindings::{ 95 kvm_clock_data, kvm_create_device, kvm_create_device as CreateDevice, 96 kvm_device_attr as DeviceAttr, kvm_device_type_KVM_DEV_TYPE_VFIO, kvm_guest_debug, 97 kvm_irq_routing, kvm_irq_routing_entry, kvm_mp_state, kvm_run, kvm_userspace_memory_region, 98 KVM_GUESTDBG_ENABLE, KVM_GUESTDBG_SINGLESTEP, KVM_IRQ_ROUTING_IRQCHIP, KVM_IRQ_ROUTING_MSI, 99 KVM_MEM_LOG_DIRTY_PAGES, KVM_MEM_READONLY, KVM_MSI_VALID_DEVID, 100 }; 101 #[cfg(target_arch = "aarch64")] 102 use kvm_bindings::{ 103 kvm_regs, user_fpsimd_state, user_pt_regs, KVM_GUESTDBG_USE_HW, KVM_NR_SPSR, KVM_REG_ARM64, 104 KVM_REG_ARM64_SYSREG, KVM_REG_ARM64_SYSREG_CRM_MASK, KVM_REG_ARM64_SYSREG_CRN_MASK, 105 KVM_REG_ARM64_SYSREG_OP0_MASK, KVM_REG_ARM64_SYSREG_OP1_MASK, KVM_REG_ARM64_SYSREG_OP2_MASK, 106 KVM_REG_ARM_CORE, KVM_REG_SIZE_U128, KVM_REG_SIZE_U32, KVM_REG_SIZE_U64, 107 }; 108 #[cfg(target_arch = "riscv64")] 109 use kvm_bindings::{kvm_riscv_core, user_regs_struct, KVM_REG_RISCV_CORE}; 110 #[cfg(feature = "tdx")] 111 use kvm_bindings::{kvm_run__bindgen_ty_1, KVMIO}; 112 pub use kvm_ioctls::{Cap, Kvm, VcpuExit}; 113 use thiserror::Error; 114 use vfio_ioctls::VfioDeviceFd; 115 #[cfg(feature = "tdx")] 116 use vmm_sys_util::{ioctl::ioctl_with_val, ioctl_ioc_nr, ioctl_iowr_nr}; 117 pub use {kvm_bindings, kvm_ioctls}; 118 119 #[cfg(target_arch = "x86_64")] 120 const KVM_CAP_SGX_ATTRIBUTE: u32 = 196; 121 122 #[cfg(target_arch = "x86_64")] 123 use vmm_sys_util::ioctl_io_nr; 124 #[cfg(all(not(feature = "tdx"), target_arch = "x86_64"))] 125 use vmm_sys_util::ioctl_ioc_nr; 126 127 #[cfg(target_arch = "x86_64")] 128 ioctl_io_nr!(KVM_NMI, kvm_bindings::KVMIO, 0x9a); 129 130 #[cfg(feature = "tdx")] 131 const KVM_EXIT_TDX: u32 = 50; 132 #[cfg(feature = "tdx")] 133 const TDG_VP_VMCALL_GET_QUOTE: u64 = 0x10002; 134 #[cfg(feature = "tdx")] 135 const TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT: u64 = 0x10004; 136 #[cfg(feature = "tdx")] 137 const TDG_VP_VMCALL_SUCCESS: u64 = 0; 138 #[cfg(feature = "tdx")] 139 const TDG_VP_VMCALL_INVALID_OPERAND: u64 = 0x8000000000000000; 140 141 #[cfg(feature = "tdx")] 142 ioctl_iowr_nr!(KVM_MEMORY_ENCRYPT_OP, KVMIO, 0xba, std::os::raw::c_ulong); 143 144 #[cfg(feature = "tdx")] 145 #[repr(u32)] 146 enum TdxCommand { 147 Capabilities = 0, 148 InitVm, 149 InitVcpu, 150 InitMemRegion, 151 Finalize, 152 } 153 154 #[cfg(feature = "tdx")] 155 pub enum TdxExitDetails { 156 GetQuote, 157 SetupEventNotifyInterrupt, 158 } 159 160 #[cfg(feature = "tdx")] 161 pub enum TdxExitStatus { 162 Success, 163 InvalidOperand, 164 } 165 166 #[cfg(feature = "tdx")] 167 const TDX_MAX_NR_CPUID_CONFIGS: usize = 6; 168 169 #[cfg(feature = "tdx")] 170 #[repr(C)] 171 #[derive(Debug, Default)] 172 pub struct TdxCpuidConfig { 173 pub leaf: u32, 174 pub sub_leaf: u32, 175 pub eax: u32, 176 pub ebx: u32, 177 pub ecx: u32, 178 pub edx: u32, 179 } 180 181 #[cfg(feature = "tdx")] 182 #[repr(C)] 183 #[derive(Debug, Default)] 184 pub struct TdxCapabilities { 185 pub attrs_fixed0: u64, 186 pub attrs_fixed1: u64, 187 pub xfam_fixed0: u64, 188 pub xfam_fixed1: u64, 189 pub nr_cpuid_configs: u32, 190 pub padding: u32, 191 pub cpuid_configs: [TdxCpuidConfig; TDX_MAX_NR_CPUID_CONFIGS], 192 } 193 194 #[cfg(feature = "tdx")] 195 #[derive(Copy, Clone)] 196 pub struct KvmTdxExit { 197 pub type_: u32, 198 pub pad: u32, 199 pub u: KvmTdxExitU, 200 } 201 202 #[cfg(feature = "tdx")] 203 #[repr(C)] 204 #[derive(Copy, Clone)] 205 pub union KvmTdxExitU { 206 pub vmcall: KvmTdxExitVmcall, 207 } 208 209 #[cfg(feature = "tdx")] 210 #[repr(C)] 211 #[derive(Debug, Default, Copy, Clone, PartialEq)] 212 pub struct KvmTdxExitVmcall { 213 pub type_: u64, 214 pub subfunction: u64, 215 pub reg_mask: u64, 216 pub in_r12: u64, 217 pub in_r13: u64, 218 pub in_r14: u64, 219 pub in_r15: u64, 220 pub in_rbx: u64, 221 pub in_rdi: u64, 222 pub in_rsi: u64, 223 pub in_r8: u64, 224 pub in_r9: u64, 225 pub in_rdx: u64, 226 pub status_code: u64, 227 pub out_r11: u64, 228 pub out_r12: u64, 229 pub out_r13: u64, 230 pub out_r14: u64, 231 pub out_r15: u64, 232 pub out_rbx: u64, 233 pub out_rdi: u64, 234 pub out_rsi: u64, 235 pub out_r8: u64, 236 pub out_r9: u64, 237 pub out_rdx: u64, 238 } 239 240 impl From<kvm_userspace_memory_region> for UserMemoryRegion { 241 fn from(region: kvm_userspace_memory_region) -> Self { 242 let mut flags = USER_MEMORY_REGION_READ; 243 if region.flags & KVM_MEM_READONLY == 0 { 244 flags |= USER_MEMORY_REGION_WRITE; 245 } 246 if region.flags & KVM_MEM_LOG_DIRTY_PAGES != 0 { 247 flags |= USER_MEMORY_REGION_LOG_DIRTY; 248 } 249 250 UserMemoryRegion { 251 slot: region.slot, 252 guest_phys_addr: region.guest_phys_addr, 253 memory_size: region.memory_size, 254 userspace_addr: region.userspace_addr, 255 flags, 256 } 257 } 258 } 259 260 impl From<UserMemoryRegion> for kvm_userspace_memory_region { 261 fn from(region: UserMemoryRegion) -> Self { 262 assert!( 263 region.flags & USER_MEMORY_REGION_READ != 0, 264 "KVM mapped memory is always readable" 265 ); 266 267 let mut flags = 0; 268 if region.flags & USER_MEMORY_REGION_WRITE == 0 { 269 flags |= KVM_MEM_READONLY; 270 } 271 if region.flags & USER_MEMORY_REGION_LOG_DIRTY != 0 { 272 flags |= KVM_MEM_LOG_DIRTY_PAGES; 273 } 274 275 kvm_userspace_memory_region { 276 slot: region.slot, 277 guest_phys_addr: region.guest_phys_addr, 278 memory_size: region.memory_size, 279 userspace_addr: region.userspace_addr, 280 flags, 281 } 282 } 283 } 284 285 impl From<kvm_mp_state> for MpState { 286 fn from(s: kvm_mp_state) -> Self { 287 MpState::Kvm(s) 288 } 289 } 290 291 impl From<MpState> for kvm_mp_state { 292 fn from(ms: MpState) -> Self { 293 match ms { 294 MpState::Kvm(s) => s, 295 /* Needed in case other hypervisors are enabled */ 296 #[allow(unreachable_patterns)] 297 _ => panic!("CpuState is not valid"), 298 } 299 } 300 } 301 302 impl From<kvm_ioctls::IoEventAddress> for IoEventAddress { 303 fn from(a: kvm_ioctls::IoEventAddress) -> Self { 304 match a { 305 kvm_ioctls::IoEventAddress::Pio(x) => Self::Pio(x), 306 kvm_ioctls::IoEventAddress::Mmio(x) => Self::Mmio(x), 307 } 308 } 309 } 310 311 impl From<IoEventAddress> for kvm_ioctls::IoEventAddress { 312 fn from(a: IoEventAddress) -> Self { 313 match a { 314 IoEventAddress::Pio(x) => Self::Pio(x), 315 IoEventAddress::Mmio(x) => Self::Mmio(x), 316 } 317 } 318 } 319 320 impl From<VcpuKvmState> for CpuState { 321 fn from(s: VcpuKvmState) -> Self { 322 CpuState::Kvm(s) 323 } 324 } 325 326 impl From<CpuState> for VcpuKvmState { 327 fn from(s: CpuState) -> Self { 328 match s { 329 CpuState::Kvm(s) => s, 330 /* Needed in case other hypervisors are enabled */ 331 #[allow(unreachable_patterns)] 332 _ => panic!("CpuState is not valid"), 333 } 334 } 335 } 336 337 #[cfg(target_arch = "x86_64")] 338 impl From<kvm_clock_data> for ClockData { 339 fn from(d: kvm_clock_data) -> Self { 340 ClockData::Kvm(d) 341 } 342 } 343 344 #[cfg(target_arch = "x86_64")] 345 impl From<ClockData> for kvm_clock_data { 346 fn from(ms: ClockData) -> Self { 347 match ms { 348 ClockData::Kvm(s) => s, 349 /* Needed in case other hypervisors are enabled */ 350 #[allow(unreachable_patterns)] 351 _ => panic!("CpuState is not valid"), 352 } 353 } 354 } 355 356 #[cfg(not(target_arch = "riscv64"))] 357 impl From<kvm_bindings::kvm_regs> for crate::StandardRegisters { 358 fn from(s: kvm_bindings::kvm_regs) -> Self { 359 crate::StandardRegisters::Kvm(s) 360 } 361 } 362 363 #[cfg(not(target_arch = "riscv64"))] 364 impl From<crate::StandardRegisters> for kvm_bindings::kvm_regs { 365 fn from(e: crate::StandardRegisters) -> Self { 366 match e { 367 crate::StandardRegisters::Kvm(e) => e, 368 /* Needed in case other hypervisors are enabled */ 369 #[allow(unreachable_patterns)] 370 _ => panic!("StandardRegisters are not valid"), 371 } 372 } 373 } 374 375 #[cfg(target_arch = "riscv64")] 376 impl From<kvm_bindings::kvm_riscv_core> for crate::StandardRegisters { 377 fn from(s: kvm_bindings::kvm_riscv_core) -> Self { 378 crate::StandardRegisters::Kvm(s) 379 } 380 } 381 382 #[cfg(target_arch = "riscv64")] 383 impl From<crate::StandardRegisters> for kvm_bindings::kvm_riscv_core { 384 fn from(e: crate::StandardRegisters) -> Self { 385 match e { 386 crate::StandardRegisters::Kvm(e) => e, 387 /* Needed in case other hypervisors are enabled */ 388 #[allow(unreachable_patterns)] 389 _ => panic!("StandardRegisters are not valid"), 390 } 391 } 392 } 393 394 impl From<kvm_irq_routing_entry> for IrqRoutingEntry { 395 fn from(s: kvm_irq_routing_entry) -> Self { 396 IrqRoutingEntry::Kvm(s) 397 } 398 } 399 400 impl From<IrqRoutingEntry> for kvm_irq_routing_entry { 401 fn from(e: IrqRoutingEntry) -> Self { 402 match e { 403 IrqRoutingEntry::Kvm(e) => e, 404 /* Needed in case other hypervisors are enabled */ 405 #[allow(unreachable_patterns)] 406 _ => panic!("IrqRoutingEntry is not valid"), 407 } 408 } 409 } 410 411 struct KvmDirtyLogSlot { 412 slot: u32, 413 guest_phys_addr: u64, 414 memory_size: u64, 415 userspace_addr: u64, 416 } 417 418 /// Wrapper over KVM VM ioctls. 419 pub struct KvmVm { 420 fd: Arc<VmFd>, 421 #[cfg(target_arch = "x86_64")] 422 msrs: Vec<MsrEntry>, 423 dirty_log_slots: Arc<RwLock<HashMap<u32, KvmDirtyLogSlot>>>, 424 } 425 426 impl KvmVm { 427 /// 428 /// Creates an emulated device in the kernel. 429 /// 430 /// See the documentation for `KVM_CREATE_DEVICE`. 431 fn create_device(&self, device: &mut CreateDevice) -> vm::Result<vfio_ioctls::VfioDeviceFd> { 432 let device_fd = self 433 .fd 434 .create_device(device) 435 .map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?; 436 Ok(VfioDeviceFd::new_from_kvm(device_fd)) 437 } 438 /// Checks if a particular `Cap` is available. 439 pub fn check_extension(&self, c: Cap) -> bool { 440 self.fd.check_extension(c) 441 } 442 } 443 444 /// Implementation of Vm trait for KVM 445 /// 446 /// # Examples 447 /// 448 /// ``` 449 /// # use hypervisor::kvm::KvmHypervisor; 450 /// # use std::sync::Arc; 451 /// let kvm = KvmHypervisor::new().unwrap(); 452 /// let hypervisor = Arc::new(kvm); 453 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 454 /// ``` 455 impl vm::Vm for KvmVm { 456 #[cfg(target_arch = "x86_64")] 457 /// 458 /// Sets the address of the one-page region in the VM's address space. 459 /// 460 fn set_identity_map_address(&self, address: u64) -> vm::Result<()> { 461 self.fd 462 .set_identity_map_address(address) 463 .map_err(|e| vm::HypervisorVmError::SetIdentityMapAddress(e.into())) 464 } 465 466 #[cfg(target_arch = "x86_64")] 467 /// 468 /// Sets the address of the three-page region in the VM's address space. 469 /// 470 fn set_tss_address(&self, offset: usize) -> vm::Result<()> { 471 self.fd 472 .set_tss_address(offset) 473 .map_err(|e| vm::HypervisorVmError::SetTssAddress(e.into())) 474 } 475 476 #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] 477 /// 478 /// Creates an in-kernel interrupt controller. 479 /// 480 fn create_irq_chip(&self) -> vm::Result<()> { 481 self.fd 482 .create_irq_chip() 483 .map_err(|e| vm::HypervisorVmError::CreateIrq(e.into())) 484 } 485 486 /// 487 /// Registers an event that will, when signaled, trigger the `gsi` IRQ. 488 /// 489 fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 490 self.fd 491 .register_irqfd(fd, gsi) 492 .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into())) 493 } 494 495 /// 496 /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ. 497 /// 498 fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 499 self.fd 500 .unregister_irqfd(fd, gsi) 501 .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into())) 502 } 503 504 /// 505 /// Creates a VcpuFd object from a vcpu RawFd. 506 /// 507 fn create_vcpu( 508 &self, 509 id: u8, 510 vm_ops: Option<Arc<dyn VmOps>>, 511 ) -> vm::Result<Arc<dyn cpu::Vcpu>> { 512 let fd = self 513 .fd 514 .create_vcpu(id as u64) 515 .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?; 516 let vcpu = KvmVcpu { 517 fd: Arc::new(Mutex::new(fd)), 518 #[cfg(target_arch = "x86_64")] 519 msrs: self.msrs.clone(), 520 vm_ops, 521 #[cfg(target_arch = "x86_64")] 522 hyperv_synic: AtomicBool::new(false), 523 }; 524 Ok(Arc::new(vcpu)) 525 } 526 527 #[cfg(target_arch = "aarch64")] 528 /// 529 /// Creates a virtual GIC device. 530 /// 531 fn create_vgic(&self, config: VgicConfig) -> vm::Result<Arc<Mutex<dyn Vgic>>> { 532 let gic_device = KvmGicV3Its::new(self, config) 533 .map_err(|e| vm::HypervisorVmError::CreateVgic(anyhow!("Vgic error {:?}", e)))?; 534 Ok(Arc::new(Mutex::new(gic_device))) 535 } 536 537 #[cfg(target_arch = "riscv64")] 538 /// 539 /// Creates a virtual AIA device. 540 /// 541 fn create_vaia(&self, config: VaiaConfig) -> vm::Result<Arc<Mutex<dyn Vaia>>> { 542 let aia_device = KvmAiaImsics::new(self, config) 543 .map_err(|e| vm::HypervisorVmError::CreateVaia(anyhow!("Vaia error {:?}", e)))?; 544 Ok(Arc::new(Mutex::new(aia_device))) 545 } 546 547 /// 548 /// Registers an event to be signaled whenever a certain address is written to. 549 /// 550 fn register_ioevent( 551 &self, 552 fd: &EventFd, 553 addr: &IoEventAddress, 554 datamatch: Option<vm::DataMatch>, 555 ) -> vm::Result<()> { 556 let addr = &kvm_ioctls::IoEventAddress::from(*addr); 557 if let Some(dm) = datamatch { 558 match dm { 559 vm::DataMatch::DataMatch32(kvm_dm32) => self 560 .fd 561 .register_ioevent(fd, addr, kvm_dm32) 562 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 563 vm::DataMatch::DataMatch64(kvm_dm64) => self 564 .fd 565 .register_ioevent(fd, addr, kvm_dm64) 566 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 567 } 568 } else { 569 self.fd 570 .register_ioevent(fd, addr, NoDatamatch) 571 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())) 572 } 573 } 574 575 /// 576 /// Unregisters an event from a certain address it has been previously registered to. 577 /// 578 fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> { 579 let addr = &kvm_ioctls::IoEventAddress::from(*addr); 580 self.fd 581 .unregister_ioevent(fd, addr, NoDatamatch) 582 .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into())) 583 } 584 585 /// 586 /// Constructs a routing entry 587 /// 588 fn make_routing_entry(&self, gsi: u32, config: &InterruptSourceConfig) -> IrqRoutingEntry { 589 match &config { 590 InterruptSourceConfig::MsiIrq(cfg) => { 591 let mut kvm_route = kvm_irq_routing_entry { 592 gsi, 593 type_: KVM_IRQ_ROUTING_MSI, 594 ..Default::default() 595 }; 596 597 kvm_route.u.msi.address_lo = cfg.low_addr; 598 kvm_route.u.msi.address_hi = cfg.high_addr; 599 kvm_route.u.msi.data = cfg.data; 600 601 if self.check_extension(crate::kvm::Cap::MsiDevid) { 602 // On AArch64, there is limitation on the range of the 'devid', 603 // it cannot be greater than 65536 (the max of u16). 604 // 605 // BDF cannot be used directly, because 'segment' is in high 606 // 16 bits. The layout of the u32 BDF is: 607 // |---- 16 bits ----|-- 8 bits --|-- 5 bits --|-- 3 bits --| 608 // | segment | bus | device | function | 609 // 610 // Now that we support 1 bus only in a segment, we can build a 611 // 'devid' by replacing the 'bus' bits with the low 8 bits of 612 // 'segment' data. 613 // This way we can resolve the range checking problem and give 614 // different `devid` to all the devices. Limitation is that at 615 // most 256 segments can be supported. 616 // 617 let modified_devid = (cfg.devid & 0x00ff_0000) >> 8 | cfg.devid & 0xff; 618 619 kvm_route.flags = KVM_MSI_VALID_DEVID; 620 kvm_route.u.msi.__bindgen_anon_1.devid = modified_devid; 621 } 622 kvm_route.into() 623 } 624 InterruptSourceConfig::LegacyIrq(cfg) => { 625 let mut kvm_route = kvm_irq_routing_entry { 626 gsi, 627 type_: KVM_IRQ_ROUTING_IRQCHIP, 628 ..Default::default() 629 }; 630 kvm_route.u.irqchip.irqchip = cfg.irqchip; 631 kvm_route.u.irqchip.pin = cfg.pin; 632 633 kvm_route.into() 634 } 635 } 636 } 637 638 /// 639 /// Sets the GSI routing table entries, overwriting any previously set 640 /// entries, as per the `KVM_SET_GSI_ROUTING` ioctl. 641 /// 642 fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> { 643 let mut irq_routing = 644 vec_with_array_field::<kvm_irq_routing, kvm_irq_routing_entry>(entries.len()); 645 irq_routing[0].nr = entries.len() as u32; 646 irq_routing[0].flags = 0; 647 let entries: Vec<kvm_irq_routing_entry> = entries 648 .iter() 649 .map(|entry| match entry { 650 IrqRoutingEntry::Kvm(e) => *e, 651 #[allow(unreachable_patterns)] 652 _ => panic!("IrqRoutingEntry type is wrong"), 653 }) 654 .collect(); 655 656 // SAFETY: irq_routing initialized with entries.len() and now it is being turned into 657 // entries_slice with entries.len() again. It is guaranteed to be large enough to hold 658 // everything from entries. 659 unsafe { 660 let entries_slice: &mut [kvm_irq_routing_entry] = 661 irq_routing[0].entries.as_mut_slice(entries.len()); 662 entries_slice.copy_from_slice(&entries); 663 } 664 665 self.fd 666 .set_gsi_routing(&irq_routing[0]) 667 .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into())) 668 } 669 670 /// 671 /// Creates a memory region structure that can be used with {create/remove}_user_memory_region 672 /// 673 fn make_user_memory_region( 674 &self, 675 slot: u32, 676 guest_phys_addr: u64, 677 memory_size: u64, 678 userspace_addr: u64, 679 readonly: bool, 680 log_dirty_pages: bool, 681 ) -> UserMemoryRegion { 682 kvm_userspace_memory_region { 683 slot, 684 guest_phys_addr, 685 memory_size, 686 userspace_addr, 687 flags: if readonly { KVM_MEM_READONLY } else { 0 } 688 | if log_dirty_pages { 689 KVM_MEM_LOG_DIRTY_PAGES 690 } else { 691 0 692 }, 693 } 694 .into() 695 } 696 697 /// 698 /// Creates a guest physical memory region. 699 /// 700 fn create_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> { 701 let mut region: kvm_userspace_memory_region = user_memory_region.into(); 702 703 if (region.flags & KVM_MEM_LOG_DIRTY_PAGES) != 0 { 704 if (region.flags & KVM_MEM_READONLY) != 0 { 705 return Err(vm::HypervisorVmError::CreateUserMemory(anyhow!( 706 "Error creating regions with both 'dirty-pages-log' and 'read-only'." 707 ))); 708 } 709 710 // Keep track of the regions that need dirty pages log 711 self.dirty_log_slots.write().unwrap().insert( 712 region.slot, 713 KvmDirtyLogSlot { 714 slot: region.slot, 715 guest_phys_addr: region.guest_phys_addr, 716 memory_size: region.memory_size, 717 userspace_addr: region.userspace_addr, 718 }, 719 ); 720 721 // Always create guest physical memory region without `KVM_MEM_LOG_DIRTY_PAGES`. 722 // For regions that need this flag, dirty pages log will be turned on in `start_dirty_log`. 723 region.flags = 0; 724 } 725 726 // SAFETY: Safe because guest regions are guaranteed not to overlap. 727 unsafe { 728 self.fd 729 .set_user_memory_region(region) 730 .map_err(|e| vm::HypervisorVmError::CreateUserMemory(e.into())) 731 } 732 } 733 734 /// 735 /// Removes a guest physical memory region. 736 /// 737 fn remove_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> { 738 let mut region: kvm_userspace_memory_region = user_memory_region.into(); 739 740 // Remove the corresponding entry from "self.dirty_log_slots" if needed 741 self.dirty_log_slots.write().unwrap().remove(®ion.slot); 742 743 // Setting the size to 0 means "remove" 744 region.memory_size = 0; 745 // SAFETY: Safe because guest regions are guaranteed not to overlap. 746 unsafe { 747 self.fd 748 .set_user_memory_region(region) 749 .map_err(|e| vm::HypervisorVmError::RemoveUserMemory(e.into())) 750 } 751 } 752 753 /// 754 /// Returns the preferred CPU target type which can be emulated by KVM on underlying host. 755 /// 756 #[cfg(target_arch = "aarch64")] 757 fn get_preferred_target(&self, kvi: &mut VcpuInit) -> vm::Result<()> { 758 self.fd 759 .get_preferred_target(kvi) 760 .map_err(|e| vm::HypervisorVmError::GetPreferredTarget(e.into())) 761 } 762 763 #[cfg(target_arch = "x86_64")] 764 fn enable_split_irq(&self) -> vm::Result<()> { 765 // Create split irqchip 766 // Only the local APIC is emulated in kernel, both PICs and IOAPIC 767 // are not. 768 let mut cap = kvm_enable_cap { 769 cap: KVM_CAP_SPLIT_IRQCHIP, 770 ..Default::default() 771 }; 772 cap.args[0] = NUM_IOAPIC_PINS as u64; 773 self.fd 774 .enable_cap(&cap) 775 .map_err(|e| vm::HypervisorVmError::EnableSplitIrq(e.into()))?; 776 Ok(()) 777 } 778 779 #[cfg(target_arch = "x86_64")] 780 fn enable_sgx_attribute(&self, file: File) -> vm::Result<()> { 781 let mut cap = kvm_enable_cap { 782 cap: KVM_CAP_SGX_ATTRIBUTE, 783 ..Default::default() 784 }; 785 cap.args[0] = file.as_raw_fd() as u64; 786 self.fd 787 .enable_cap(&cap) 788 .map_err(|e| vm::HypervisorVmError::EnableSgxAttribute(e.into()))?; 789 Ok(()) 790 } 791 792 /// Retrieve guest clock. 793 #[cfg(target_arch = "x86_64")] 794 fn get_clock(&self) -> vm::Result<ClockData> { 795 Ok(self 796 .fd 797 .get_clock() 798 .map_err(|e| vm::HypervisorVmError::GetClock(e.into()))? 799 .into()) 800 } 801 802 /// Set guest clock. 803 #[cfg(target_arch = "x86_64")] 804 fn set_clock(&self, data: &ClockData) -> vm::Result<()> { 805 let data = (*data).into(); 806 self.fd 807 .set_clock(&data) 808 .map_err(|e| vm::HypervisorVmError::SetClock(e.into())) 809 } 810 811 /// Create a device that is used for passthrough 812 fn create_passthrough_device(&self) -> vm::Result<VfioDeviceFd> { 813 let mut vfio_dev = kvm_create_device { 814 type_: kvm_device_type_KVM_DEV_TYPE_VFIO, 815 fd: 0, 816 flags: 0, 817 }; 818 819 self.create_device(&mut vfio_dev) 820 .map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into())) 821 } 822 823 /// 824 /// Start logging dirty pages 825 /// 826 fn start_dirty_log(&self) -> vm::Result<()> { 827 let dirty_log_slots = self.dirty_log_slots.read().unwrap(); 828 for (_, s) in dirty_log_slots.iter() { 829 let region = kvm_userspace_memory_region { 830 slot: s.slot, 831 guest_phys_addr: s.guest_phys_addr, 832 memory_size: s.memory_size, 833 userspace_addr: s.userspace_addr, 834 flags: KVM_MEM_LOG_DIRTY_PAGES, 835 }; 836 // SAFETY: Safe because guest regions are guaranteed not to overlap. 837 unsafe { 838 self.fd 839 .set_user_memory_region(region) 840 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?; 841 } 842 } 843 844 Ok(()) 845 } 846 847 /// 848 /// Stop logging dirty pages 849 /// 850 fn stop_dirty_log(&self) -> vm::Result<()> { 851 let dirty_log_slots = self.dirty_log_slots.read().unwrap(); 852 for (_, s) in dirty_log_slots.iter() { 853 let region = kvm_userspace_memory_region { 854 slot: s.slot, 855 guest_phys_addr: s.guest_phys_addr, 856 memory_size: s.memory_size, 857 userspace_addr: s.userspace_addr, 858 flags: 0, 859 }; 860 // SAFETY: Safe because guest regions are guaranteed not to overlap. 861 unsafe { 862 self.fd 863 .set_user_memory_region(region) 864 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?; 865 } 866 } 867 868 Ok(()) 869 } 870 871 /// 872 /// Get dirty pages bitmap (one bit per page) 873 /// 874 fn get_dirty_log(&self, slot: u32, _base_gpa: u64, memory_size: u64) -> vm::Result<Vec<u64>> { 875 self.fd 876 .get_dirty_log(slot, memory_size as usize) 877 .map_err(|e| vm::HypervisorVmError::GetDirtyLog(e.into())) 878 } 879 880 /// 881 /// Initialize TDX for this VM 882 /// 883 #[cfg(feature = "tdx")] 884 fn tdx_init(&self, cpuid: &[CpuIdEntry], max_vcpus: u32) -> vm::Result<()> { 885 const TDX_ATTR_SEPT_VE_DISABLE: usize = 28; 886 887 let mut cpuid: Vec<kvm_bindings::kvm_cpuid_entry2> = 888 cpuid.iter().map(|e| (*e).into()).collect(); 889 cpuid.resize(256, kvm_bindings::kvm_cpuid_entry2::default()); 890 891 #[repr(C)] 892 struct TdxInitVm { 893 attributes: u64, 894 max_vcpus: u32, 895 padding: u32, 896 mrconfigid: [u64; 6], 897 mrowner: [u64; 6], 898 mrownerconfig: [u64; 6], 899 cpuid_nent: u32, 900 cpuid_padding: u32, 901 cpuid_entries: [kvm_bindings::kvm_cpuid_entry2; 256], 902 } 903 let data = TdxInitVm { 904 attributes: 1 << TDX_ATTR_SEPT_VE_DISABLE, 905 max_vcpus, 906 padding: 0, 907 mrconfigid: [0; 6], 908 mrowner: [0; 6], 909 mrownerconfig: [0; 6], 910 cpuid_nent: cpuid.len() as u32, 911 cpuid_padding: 0, 912 cpuid_entries: cpuid.as_slice().try_into().unwrap(), 913 }; 914 915 tdx_command( 916 &self.fd.as_raw_fd(), 917 TdxCommand::InitVm, 918 0, 919 &data as *const _ as u64, 920 ) 921 .map_err(vm::HypervisorVmError::InitializeTdx) 922 } 923 924 /// 925 /// Finalize the TDX setup for this VM 926 /// 927 #[cfg(feature = "tdx")] 928 fn tdx_finalize(&self) -> vm::Result<()> { 929 tdx_command(&self.fd.as_raw_fd(), TdxCommand::Finalize, 0, 0) 930 .map_err(vm::HypervisorVmError::FinalizeTdx) 931 } 932 933 /// 934 /// Initialize memory regions for the TDX VM 935 /// 936 #[cfg(feature = "tdx")] 937 fn tdx_init_memory_region( 938 &self, 939 host_address: u64, 940 guest_address: u64, 941 size: u64, 942 measure: bool, 943 ) -> vm::Result<()> { 944 #[repr(C)] 945 struct TdxInitMemRegion { 946 host_address: u64, 947 guest_address: u64, 948 pages: u64, 949 } 950 let data = TdxInitMemRegion { 951 host_address, 952 guest_address, 953 pages: size / 4096, 954 }; 955 956 tdx_command( 957 &self.fd.as_raw_fd(), 958 TdxCommand::InitMemRegion, 959 u32::from(measure), 960 &data as *const _ as u64, 961 ) 962 .map_err(vm::HypervisorVmError::InitMemRegionTdx) 963 } 964 965 /// Downcast to the underlying KvmVm type 966 fn as_any(&self) -> &dyn Any { 967 self 968 } 969 } 970 971 #[cfg(feature = "tdx")] 972 fn tdx_command( 973 fd: &RawFd, 974 command: TdxCommand, 975 flags: u32, 976 data: u64, 977 ) -> std::result::Result<(), std::io::Error> { 978 #[repr(C)] 979 struct TdxIoctlCmd { 980 command: TdxCommand, 981 flags: u32, 982 data: u64, 983 error: u64, 984 unused: u64, 985 } 986 let cmd = TdxIoctlCmd { 987 command, 988 flags, 989 data, 990 error: 0, 991 unused: 0, 992 }; 993 // SAFETY: FFI call. All input parameters are valid. 994 let ret = unsafe { 995 ioctl_with_val( 996 fd, 997 KVM_MEMORY_ENCRYPT_OP(), 998 &cmd as *const TdxIoctlCmd as std::os::raw::c_ulong, 999 ) 1000 }; 1001 1002 if ret < 0 { 1003 return Err(std::io::Error::last_os_error()); 1004 } 1005 Ok(()) 1006 } 1007 1008 /// Wrapper over KVM system ioctls. 1009 pub struct KvmHypervisor { 1010 kvm: Kvm, 1011 } 1012 1013 impl KvmHypervisor { 1014 #[cfg(target_arch = "x86_64")] 1015 /// 1016 /// Retrieve the list of MSRs supported by the hypervisor. 1017 /// 1018 fn get_msr_list(&self) -> hypervisor::Result<MsrList> { 1019 self.kvm 1020 .get_msr_index_list() 1021 .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into())) 1022 } 1023 } 1024 1025 /// Enum for KVM related error 1026 #[derive(Debug, Error)] 1027 pub enum KvmError { 1028 #[error("Capability missing: {0:?}")] 1029 CapabilityMissing(Cap), 1030 } 1031 1032 pub type KvmResult<T> = result::Result<T, KvmError>; 1033 1034 impl KvmHypervisor { 1035 /// Create a hypervisor based on Kvm 1036 #[allow(clippy::new_ret_no_self)] 1037 pub fn new() -> hypervisor::Result<Arc<dyn hypervisor::Hypervisor>> { 1038 let kvm_obj = Kvm::new().map_err(|e| hypervisor::HypervisorError::VmCreate(e.into()))?; 1039 let api_version = kvm_obj.get_api_version(); 1040 1041 if api_version != kvm_bindings::KVM_API_VERSION as i32 { 1042 return Err(hypervisor::HypervisorError::IncompatibleApiVersion); 1043 } 1044 1045 Ok(Arc::new(KvmHypervisor { kvm: kvm_obj })) 1046 } 1047 1048 /// Check if the hypervisor is available 1049 pub fn is_available() -> hypervisor::Result<bool> { 1050 match std::fs::metadata("/dev/kvm") { 1051 Ok(_) => Ok(true), 1052 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false), 1053 Err(err) => Err(hypervisor::HypervisorError::HypervisorAvailableCheck( 1054 err.into(), 1055 )), 1056 } 1057 } 1058 } 1059 1060 /// Implementation of Hypervisor trait for KVM 1061 /// 1062 /// # Examples 1063 /// 1064 /// ``` 1065 /// # use hypervisor::kvm::KvmHypervisor; 1066 /// # use std::sync::Arc; 1067 /// let kvm = KvmHypervisor::new().unwrap(); 1068 /// let hypervisor = Arc::new(kvm); 1069 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 1070 /// ``` 1071 impl hypervisor::Hypervisor for KvmHypervisor { 1072 /// 1073 /// Returns the type of the hypervisor 1074 /// 1075 fn hypervisor_type(&self) -> HypervisorType { 1076 HypervisorType::Kvm 1077 } 1078 1079 /// Create a KVM vm object of a specific VM type and return the object as Vm trait object 1080 /// 1081 /// # Examples 1082 /// 1083 /// ``` 1084 /// # use hypervisor::kvm::KvmHypervisor; 1085 /// use hypervisor::kvm::KvmVm; 1086 /// let hypervisor = KvmHypervisor::new().unwrap(); 1087 /// let vm = hypervisor.create_vm_with_type(0).unwrap(); 1088 /// ``` 1089 fn create_vm_with_type(&self, vm_type: u64) -> hypervisor::Result<Arc<dyn vm::Vm>> { 1090 let fd: VmFd; 1091 loop { 1092 match self.kvm.create_vm_with_type(vm_type) { 1093 Ok(res) => fd = res, 1094 Err(e) => { 1095 if e.errno() == libc::EINTR { 1096 // If the error returned is EINTR, which means the 1097 // ioctl has been interrupted, we have to retry as 1098 // this can't be considered as a regular error. 1099 continue; 1100 } else { 1101 return Err(hypervisor::HypervisorError::VmCreate(e.into())); 1102 } 1103 } 1104 } 1105 break; 1106 } 1107 1108 let vm_fd = Arc::new(fd); 1109 1110 #[cfg(target_arch = "x86_64")] 1111 { 1112 let msr_list = self.get_msr_list()?; 1113 let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize; 1114 let mut msrs: Vec<MsrEntry> = vec![ 1115 MsrEntry { 1116 ..Default::default() 1117 }; 1118 num_msrs 1119 ]; 1120 let indices = msr_list.as_slice(); 1121 for (pos, index) in indices.iter().enumerate() { 1122 msrs[pos].index = *index; 1123 } 1124 1125 Ok(Arc::new(KvmVm { 1126 fd: vm_fd, 1127 msrs, 1128 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())), 1129 })) 1130 } 1131 1132 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] 1133 { 1134 Ok(Arc::new(KvmVm { 1135 fd: vm_fd, 1136 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())), 1137 })) 1138 } 1139 } 1140 1141 /// Create a KVM vm object and return the object as Vm trait object 1142 /// 1143 /// # Examples 1144 /// 1145 /// ``` 1146 /// # use hypervisor::kvm::KvmHypervisor; 1147 /// use hypervisor::kvm::KvmVm; 1148 /// let hypervisor = KvmHypervisor::new().unwrap(); 1149 /// let vm = hypervisor.create_vm().unwrap(); 1150 /// ``` 1151 fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> { 1152 #[allow(unused_mut)] 1153 let mut vm_type: u64 = 0; // Create with default platform type 1154 1155 // When KVM supports Cap::ArmVmIPASize, it is better to get the IPA 1156 // size from the host and use that when creating the VM, which may 1157 // avoid unnecessary VM creation failures. 1158 #[cfg(target_arch = "aarch64")] 1159 if self.kvm.check_extension(Cap::ArmVmIPASize) { 1160 vm_type = self.kvm.get_host_ipa_limit().try_into().unwrap(); 1161 } 1162 1163 self.create_vm_with_type(vm_type) 1164 } 1165 1166 fn check_required_extensions(&self) -> hypervisor::Result<()> { 1167 check_required_kvm_extensions(&self.kvm) 1168 .map_err(|e| hypervisor::HypervisorError::CheckExtensions(e.into())) 1169 } 1170 1171 #[cfg(target_arch = "x86_64")] 1172 /// 1173 /// X86 specific call to get the system supported CPUID values. 1174 /// 1175 fn get_supported_cpuid(&self) -> hypervisor::Result<Vec<CpuIdEntry>> { 1176 let kvm_cpuid = self 1177 .kvm 1178 .get_supported_cpuid(kvm_bindings::KVM_MAX_CPUID_ENTRIES) 1179 .map_err(|e| hypervisor::HypervisorError::GetCpuId(e.into()))?; 1180 1181 let v = kvm_cpuid.as_slice().iter().map(|e| (*e).into()).collect(); 1182 1183 Ok(v) 1184 } 1185 1186 #[cfg(target_arch = "aarch64")] 1187 /// 1188 /// Retrieve AArch64 host maximum IPA size supported by KVM. 1189 /// 1190 fn get_host_ipa_limit(&self) -> i32 { 1191 self.kvm.get_host_ipa_limit() 1192 } 1193 1194 /// 1195 /// Retrieve TDX capabilities 1196 /// 1197 #[cfg(feature = "tdx")] 1198 fn tdx_capabilities(&self) -> hypervisor::Result<TdxCapabilities> { 1199 let data = TdxCapabilities { 1200 nr_cpuid_configs: TDX_MAX_NR_CPUID_CONFIGS as u32, 1201 ..Default::default() 1202 }; 1203 1204 tdx_command( 1205 &self.kvm.as_raw_fd(), 1206 TdxCommand::Capabilities, 1207 0, 1208 &data as *const _ as u64, 1209 ) 1210 .map_err(|e| hypervisor::HypervisorError::TdxCapabilities(e.into()))?; 1211 1212 Ok(data) 1213 } 1214 1215 #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] 1216 /// 1217 /// Get the number of supported hardware breakpoints 1218 /// 1219 fn get_guest_debug_hw_bps(&self) -> usize { 1220 #[cfg(target_arch = "x86_64")] 1221 { 1222 4 1223 } 1224 #[cfg(target_arch = "aarch64")] 1225 { 1226 self.kvm.get_guest_debug_hw_bps() as usize 1227 } 1228 } 1229 1230 /// Get maximum number of vCPUs 1231 fn get_max_vcpus(&self) -> u32 { 1232 self.kvm.get_max_vcpus().min(u32::MAX as usize) as u32 1233 } 1234 } 1235 1236 /// Vcpu struct for KVM 1237 pub struct KvmVcpu { 1238 fd: Arc<Mutex<VcpuFd>>, 1239 #[cfg(target_arch = "x86_64")] 1240 msrs: Vec<MsrEntry>, 1241 vm_ops: Option<Arc<dyn vm::VmOps>>, 1242 #[cfg(target_arch = "x86_64")] 1243 hyperv_synic: AtomicBool, 1244 } 1245 1246 /// Implementation of Vcpu trait for KVM 1247 /// 1248 /// # Examples 1249 /// 1250 /// ``` 1251 /// # use hypervisor::kvm::KvmHypervisor; 1252 /// # use std::sync::Arc; 1253 /// let kvm = KvmHypervisor::new().unwrap(); 1254 /// let hypervisor = Arc::new(kvm); 1255 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 1256 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 1257 /// ``` 1258 impl cpu::Vcpu for KvmVcpu { 1259 /// 1260 /// Returns StandardRegisters with default value set 1261 /// 1262 fn create_standard_regs(&self) -> StandardRegisters { 1263 #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] 1264 { 1265 kvm_bindings::kvm_regs::default().into() 1266 } 1267 #[cfg(target_arch = "riscv64")] 1268 { 1269 kvm_bindings::kvm_riscv_core::default().into() 1270 } 1271 } 1272 #[cfg(target_arch = "x86_64")] 1273 /// 1274 /// Returns the vCPU general purpose registers. 1275 /// 1276 fn get_regs(&self) -> cpu::Result<StandardRegisters> { 1277 Ok(self 1278 .fd 1279 .lock() 1280 .unwrap() 1281 .get_regs() 1282 .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into()))? 1283 .into()) 1284 } 1285 1286 /// 1287 /// Returns the vCPU general purpose registers. 1288 /// The `KVM_GET_REGS` ioctl is not available on AArch64, `KVM_GET_ONE_REG` 1289 /// is used to get registers one by one. 1290 /// 1291 #[cfg(target_arch = "aarch64")] 1292 fn get_regs(&self) -> cpu::Result<StandardRegisters> { 1293 let mut state = kvm_regs::default(); 1294 let mut off = offset_of!(user_pt_regs, regs); 1295 // There are 31 user_pt_regs: 1296 // https://elixir.free-electrons.com/linux/v4.14.174/source/arch/arm64/include/uapi/asm/ptrace.h#L72 1297 // These actually are the general-purpose registers of the Armv8-a 1298 // architecture (i.e x0-x30 if used as a 64bit register or w0-30 when used as a 32bit register). 1299 for i in 0..31 { 1300 let mut bytes = [0_u8; 8]; 1301 self.fd 1302 .lock() 1303 .unwrap() 1304 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1305 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1306 state.regs.regs[i] = u64::from_le_bytes(bytes); 1307 off += std::mem::size_of::<u64>(); 1308 } 1309 1310 // We are now entering the "Other register" section of the ARMv8-a architecture. 1311 // First one, stack pointer. 1312 let off = offset_of!(user_pt_regs, sp); 1313 let mut bytes = [0_u8; 8]; 1314 self.fd 1315 .lock() 1316 .unwrap() 1317 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1318 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1319 state.regs.sp = u64::from_le_bytes(bytes); 1320 1321 // Second one, the program counter. 1322 let off = offset_of!(user_pt_regs, pc); 1323 let mut bytes = [0_u8; 8]; 1324 self.fd 1325 .lock() 1326 .unwrap() 1327 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1328 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1329 state.regs.pc = u64::from_le_bytes(bytes); 1330 1331 // Next is the processor state. 1332 let off = offset_of!(user_pt_regs, pstate); 1333 let mut bytes = [0_u8; 8]; 1334 self.fd 1335 .lock() 1336 .unwrap() 1337 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1338 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1339 state.regs.pstate = u64::from_le_bytes(bytes); 1340 1341 // The stack pointer associated with EL1 1342 let off = offset_of!(kvm_regs, sp_el1); 1343 let mut bytes = [0_u8; 8]; 1344 self.fd 1345 .lock() 1346 .unwrap() 1347 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1348 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1349 state.sp_el1 = u64::from_le_bytes(bytes); 1350 1351 // Exception Link Register for EL1, when taking an exception to EL1, this register 1352 // holds the address to which to return afterwards. 1353 let off = offset_of!(kvm_regs, elr_el1); 1354 let mut bytes = [0_u8; 8]; 1355 self.fd 1356 .lock() 1357 .unwrap() 1358 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1359 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1360 state.elr_el1 = u64::from_le_bytes(bytes); 1361 1362 // Saved Program Status Registers, there are 5 of them used in the kernel. 1363 let mut off = offset_of!(kvm_regs, spsr); 1364 for i in 0..KVM_NR_SPSR as usize { 1365 let mut bytes = [0_u8; 8]; 1366 self.fd 1367 .lock() 1368 .unwrap() 1369 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1370 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1371 state.spsr[i] = u64::from_le_bytes(bytes); 1372 off += std::mem::size_of::<u64>(); 1373 } 1374 1375 // Now moving on to floating point registers which are stored in the user_fpsimd_state in the kernel: 1376 // https://elixir.free-electrons.com/linux/v4.9.62/source/arch/arm64/include/uapi/asm/kvm.h#L53 1377 let mut off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, vregs); 1378 for i in 0..32 { 1379 let mut bytes = [0_u8; 16]; 1380 self.fd 1381 .lock() 1382 .unwrap() 1383 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U128, off), &mut bytes) 1384 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1385 state.fp_regs.vregs[i] = u128::from_le_bytes(bytes); 1386 off += mem::size_of::<u128>(); 1387 } 1388 1389 // Floating-point Status Register 1390 let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpsr); 1391 let mut bytes = [0_u8; 4]; 1392 self.fd 1393 .lock() 1394 .unwrap() 1395 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off), &mut bytes) 1396 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1397 state.fp_regs.fpsr = u32::from_le_bytes(bytes); 1398 1399 // Floating-point Control Register 1400 let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpcr); 1401 let mut bytes = [0_u8; 4]; 1402 self.fd 1403 .lock() 1404 .unwrap() 1405 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off), &mut bytes) 1406 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1407 state.fp_regs.fpcr = u32::from_le_bytes(bytes); 1408 Ok(state.into()) 1409 } 1410 1411 #[cfg(target_arch = "riscv64")] 1412 /// 1413 /// Returns the RISC-V vCPU core registers. 1414 /// The `KVM_GET_REGS` ioctl is not available on RISC-V 64-bit, 1415 /// `KVM_GET_ONE_REG` is used to get registers one by one. 1416 /// 1417 fn get_regs(&self) -> cpu::Result<StandardRegisters> { 1418 let mut state = kvm_riscv_core::default(); 1419 1420 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, pc); 1421 let mut bytes = [0_u8; 8]; 1422 self.fd 1423 .lock() 1424 .unwrap() 1425 .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes) 1426 .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?; 1427 state.regs.pc = u64::from_le_bytes(bytes); 1428 1429 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, ra); 1430 let mut bytes = [0_u8; 8]; 1431 self.fd 1432 .lock() 1433 .unwrap() 1434 .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes) 1435 .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?; 1436 state.regs.ra = u64::from_le_bytes(bytes); 1437 1438 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, sp); 1439 let mut bytes = [0_u8; 8]; 1440 self.fd 1441 .lock() 1442 .unwrap() 1443 .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes) 1444 .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?; 1445 state.regs.sp = u64::from_le_bytes(bytes); 1446 1447 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, gp); 1448 let mut bytes = [0_u8; 8]; 1449 self.fd 1450 .lock() 1451 .unwrap() 1452 .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes) 1453 .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?; 1454 state.regs.gp = u64::from_le_bytes(bytes); 1455 1456 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, tp); 1457 let mut bytes = [0_u8; 8]; 1458 self.fd 1459 .lock() 1460 .unwrap() 1461 .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes) 1462 .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?; 1463 state.regs.tp = u64::from_le_bytes(bytes); 1464 1465 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, t0); 1466 let mut bytes = [0_u8; 8]; 1467 self.fd 1468 .lock() 1469 .unwrap() 1470 .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes) 1471 .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?; 1472 state.regs.t0 = u64::from_le_bytes(bytes); 1473 1474 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, t1); 1475 let mut bytes = [0_u8; 8]; 1476 self.fd 1477 .lock() 1478 .unwrap() 1479 .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes) 1480 .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?; 1481 state.regs.t1 = u64::from_le_bytes(bytes); 1482 1483 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, t2); 1484 let mut bytes = [0_u8; 8]; 1485 self.fd 1486 .lock() 1487 .unwrap() 1488 .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes) 1489 .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?; 1490 state.regs.t2 = u64::from_le_bytes(bytes); 1491 1492 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s0); 1493 let mut bytes = [0_u8; 8]; 1494 self.fd 1495 .lock() 1496 .unwrap() 1497 .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes) 1498 .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?; 1499 state.regs.s0 = u64::from_le_bytes(bytes); 1500 1501 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s1); 1502 let mut bytes = [0_u8; 8]; 1503 self.fd 1504 .lock() 1505 .unwrap() 1506 .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes) 1507 .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?; 1508 state.regs.s1 = u64::from_le_bytes(bytes); 1509 1510 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, a0); 1511 let mut bytes = [0_u8; 8]; 1512 self.fd 1513 .lock() 1514 .unwrap() 1515 .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes) 1516 .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?; 1517 state.regs.a0 = u64::from_le_bytes(bytes); 1518 1519 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, a1); 1520 let mut bytes = [0_u8; 8]; 1521 self.fd 1522 .lock() 1523 .unwrap() 1524 .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes) 1525 .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?; 1526 state.regs.a1 = u64::from_le_bytes(bytes); 1527 1528 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, a2); 1529 let mut bytes = [0_u8; 8]; 1530 self.fd 1531 .lock() 1532 .unwrap() 1533 .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes) 1534 .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?; 1535 state.regs.a2 = u64::from_le_bytes(bytes); 1536 1537 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, a3); 1538 let mut bytes = [0_u8; 8]; 1539 self.fd 1540 .lock() 1541 .unwrap() 1542 .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes) 1543 .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?; 1544 state.regs.a3 = u64::from_le_bytes(bytes); 1545 1546 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, a4); 1547 let mut bytes = [0_u8; 8]; 1548 self.fd 1549 .lock() 1550 .unwrap() 1551 .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes) 1552 .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?; 1553 state.regs.a4 = u64::from_le_bytes(bytes); 1554 1555 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, a5); 1556 let mut bytes = [0_u8; 8]; 1557 self.fd 1558 .lock() 1559 .unwrap() 1560 .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes) 1561 .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?; 1562 state.regs.a5 = u64::from_le_bytes(bytes); 1563 1564 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, a6); 1565 let mut bytes = [0_u8; 8]; 1566 self.fd 1567 .lock() 1568 .unwrap() 1569 .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes) 1570 .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?; 1571 state.regs.a6 = u64::from_le_bytes(bytes); 1572 1573 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, a7); 1574 let mut bytes = [0_u8; 8]; 1575 self.fd 1576 .lock() 1577 .unwrap() 1578 .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes) 1579 .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?; 1580 state.regs.a7 = u64::from_le_bytes(bytes); 1581 1582 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s2); 1583 let mut bytes = [0_u8; 8]; 1584 self.fd 1585 .lock() 1586 .unwrap() 1587 .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes) 1588 .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?; 1589 state.regs.s2 = u64::from_le_bytes(bytes); 1590 1591 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s3); 1592 let mut bytes = [0_u8; 8]; 1593 self.fd 1594 .lock() 1595 .unwrap() 1596 .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes) 1597 .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?; 1598 state.regs.s3 = u64::from_le_bytes(bytes); 1599 1600 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s4); 1601 let mut bytes = [0_u8; 8]; 1602 self.fd 1603 .lock() 1604 .unwrap() 1605 .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes) 1606 .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?; 1607 state.regs.s4 = u64::from_le_bytes(bytes); 1608 1609 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s5); 1610 let mut bytes = [0_u8; 8]; 1611 self.fd 1612 .lock() 1613 .unwrap() 1614 .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes) 1615 .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?; 1616 state.regs.s5 = u64::from_le_bytes(bytes); 1617 1618 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s6); 1619 let mut bytes = [0_u8; 8]; 1620 self.fd 1621 .lock() 1622 .unwrap() 1623 .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes) 1624 .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?; 1625 state.regs.s6 = u64::from_le_bytes(bytes); 1626 1627 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s7); 1628 let mut bytes = [0_u8; 8]; 1629 self.fd 1630 .lock() 1631 .unwrap() 1632 .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes) 1633 .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?; 1634 state.regs.s7 = u64::from_le_bytes(bytes); 1635 1636 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s8); 1637 let mut bytes = [0_u8; 8]; 1638 self.fd 1639 .lock() 1640 .unwrap() 1641 .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes) 1642 .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?; 1643 state.regs.s8 = u64::from_le_bytes(bytes); 1644 1645 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s9); 1646 let mut bytes = [0_u8; 8]; 1647 self.fd 1648 .lock() 1649 .unwrap() 1650 .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes) 1651 .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?; 1652 state.regs.s9 = u64::from_le_bytes(bytes); 1653 1654 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s10); 1655 let mut bytes = [0_u8; 8]; 1656 self.fd 1657 .lock() 1658 .unwrap() 1659 .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes) 1660 .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?; 1661 state.regs.s10 = u64::from_le_bytes(bytes); 1662 1663 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s11); 1664 let mut bytes = [0_u8; 8]; 1665 self.fd 1666 .lock() 1667 .unwrap() 1668 .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes) 1669 .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?; 1670 state.regs.s11 = u64::from_le_bytes(bytes); 1671 1672 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, t3); 1673 let mut bytes = [0_u8; 8]; 1674 self.fd 1675 .lock() 1676 .unwrap() 1677 .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes) 1678 .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?; 1679 state.regs.t3 = u64::from_le_bytes(bytes); 1680 1681 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, t4); 1682 let mut bytes = [0_u8; 8]; 1683 self.fd 1684 .lock() 1685 .unwrap() 1686 .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes) 1687 .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?; 1688 state.regs.t4 = u64::from_le_bytes(bytes); 1689 1690 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, t5); 1691 let mut bytes = [0_u8; 8]; 1692 self.fd 1693 .lock() 1694 .unwrap() 1695 .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes) 1696 .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?; 1697 state.regs.t5 = u64::from_le_bytes(bytes); 1698 1699 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, t6); 1700 let mut bytes = [0_u8; 8]; 1701 self.fd 1702 .lock() 1703 .unwrap() 1704 .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes) 1705 .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?; 1706 state.regs.t6 = u64::from_le_bytes(bytes); 1707 1708 let off = offset_of!(kvm_riscv_core, mode); 1709 let mut bytes = [0_u8; 8]; 1710 self.fd 1711 .lock() 1712 .unwrap() 1713 .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes) 1714 .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?; 1715 state.mode = u64::from_le_bytes(bytes); 1716 1717 Ok(state.into()) 1718 } 1719 1720 #[cfg(target_arch = "x86_64")] 1721 /// 1722 /// Sets the vCPU general purpose registers using the `KVM_SET_REGS` ioctl. 1723 /// 1724 fn set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()> { 1725 let regs = (*regs).into(); 1726 self.fd 1727 .lock() 1728 .unwrap() 1729 .set_regs(®s) 1730 .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into())) 1731 } 1732 1733 /// 1734 /// Sets the vCPU general purpose registers. 1735 /// The `KVM_SET_REGS` ioctl is not available on AArch64, `KVM_SET_ONE_REG` 1736 /// is used to set registers one by one. 1737 /// 1738 #[cfg(target_arch = "aarch64")] 1739 fn set_regs(&self, state: &StandardRegisters) -> cpu::Result<()> { 1740 // The function follows the exact identical order from `state`. Look there 1741 // for some additional info on registers. 1742 let kvm_regs_state: kvm_regs = (*state).into(); 1743 let mut off = offset_of!(user_pt_regs, regs); 1744 for i in 0..31 { 1745 self.fd 1746 .lock() 1747 .unwrap() 1748 .set_one_reg( 1749 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1750 &kvm_regs_state.regs.regs[i].to_le_bytes(), 1751 ) 1752 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1753 off += std::mem::size_of::<u64>(); 1754 } 1755 1756 let off = offset_of!(user_pt_regs, sp); 1757 self.fd 1758 .lock() 1759 .unwrap() 1760 .set_one_reg( 1761 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1762 &kvm_regs_state.regs.sp.to_le_bytes(), 1763 ) 1764 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1765 1766 let off = offset_of!(user_pt_regs, pc); 1767 self.fd 1768 .lock() 1769 .unwrap() 1770 .set_one_reg( 1771 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1772 &kvm_regs_state.regs.pc.to_le_bytes(), 1773 ) 1774 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1775 1776 let off = offset_of!(user_pt_regs, pstate); 1777 self.fd 1778 .lock() 1779 .unwrap() 1780 .set_one_reg( 1781 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1782 &kvm_regs_state.regs.pstate.to_le_bytes(), 1783 ) 1784 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1785 1786 let off = offset_of!(kvm_regs, sp_el1); 1787 self.fd 1788 .lock() 1789 .unwrap() 1790 .set_one_reg( 1791 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1792 &kvm_regs_state.sp_el1.to_le_bytes(), 1793 ) 1794 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1795 1796 let off = offset_of!(kvm_regs, elr_el1); 1797 self.fd 1798 .lock() 1799 .unwrap() 1800 .set_one_reg( 1801 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1802 &kvm_regs_state.elr_el1.to_le_bytes(), 1803 ) 1804 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1805 1806 let mut off = offset_of!(kvm_regs, spsr); 1807 for i in 0..KVM_NR_SPSR as usize { 1808 self.fd 1809 .lock() 1810 .unwrap() 1811 .set_one_reg( 1812 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1813 &kvm_regs_state.spsr[i].to_le_bytes(), 1814 ) 1815 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1816 off += std::mem::size_of::<u64>(); 1817 } 1818 1819 let mut off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, vregs); 1820 for i in 0..32 { 1821 self.fd 1822 .lock() 1823 .unwrap() 1824 .set_one_reg( 1825 arm64_core_reg_id!(KVM_REG_SIZE_U128, off), 1826 &kvm_regs_state.fp_regs.vregs[i].to_le_bytes(), 1827 ) 1828 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1829 off += mem::size_of::<u128>(); 1830 } 1831 1832 let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpsr); 1833 self.fd 1834 .lock() 1835 .unwrap() 1836 .set_one_reg( 1837 arm64_core_reg_id!(KVM_REG_SIZE_U32, off), 1838 &kvm_regs_state.fp_regs.fpsr.to_le_bytes(), 1839 ) 1840 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1841 1842 let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpcr); 1843 self.fd 1844 .lock() 1845 .unwrap() 1846 .set_one_reg( 1847 arm64_core_reg_id!(KVM_REG_SIZE_U32, off), 1848 &kvm_regs_state.fp_regs.fpcr.to_le_bytes(), 1849 ) 1850 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1851 Ok(()) 1852 } 1853 1854 #[cfg(target_arch = "riscv64")] 1855 /// 1856 /// Sets the RISC-V vCPU core registers. 1857 /// The `KVM_SET_REGS` ioctl is not available on RISC-V 64-bit, 1858 /// `KVM_SET_ONE_REG` is used to set registers one by one. 1859 /// 1860 fn set_regs(&self, state: &StandardRegisters) -> cpu::Result<()> { 1861 // The function follows the exact identical order from `state`. Look there 1862 // for some additional info on registers. 1863 let kvm_regs_state: kvm_riscv_core = (*state).into(); 1864 1865 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, pc); 1866 self.fd 1867 .lock() 1868 .unwrap() 1869 .set_one_reg( 1870 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1871 &kvm_regs_state.regs.pc.to_le_bytes(), 1872 ) 1873 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1874 1875 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, ra); 1876 self.fd 1877 .lock() 1878 .unwrap() 1879 .set_one_reg( 1880 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1881 &kvm_regs_state.regs.ra.to_le_bytes(), 1882 ) 1883 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1884 1885 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, sp); 1886 self.fd 1887 .lock() 1888 .unwrap() 1889 .set_one_reg( 1890 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1891 &kvm_regs_state.regs.sp.to_le_bytes(), 1892 ) 1893 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1894 1895 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, gp); 1896 self.fd 1897 .lock() 1898 .unwrap() 1899 .set_one_reg( 1900 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1901 &kvm_regs_state.regs.gp.to_le_bytes(), 1902 ) 1903 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1904 1905 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, tp); 1906 self.fd 1907 .lock() 1908 .unwrap() 1909 .set_one_reg( 1910 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1911 &kvm_regs_state.regs.tp.to_le_bytes(), 1912 ) 1913 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1914 1915 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, t0); 1916 self.fd 1917 .lock() 1918 .unwrap() 1919 .set_one_reg( 1920 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1921 &kvm_regs_state.regs.t0.to_le_bytes(), 1922 ) 1923 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1924 1925 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, t1); 1926 self.fd 1927 .lock() 1928 .unwrap() 1929 .set_one_reg( 1930 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1931 &kvm_regs_state.regs.t1.to_le_bytes(), 1932 ) 1933 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1934 1935 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, t2); 1936 self.fd 1937 .lock() 1938 .unwrap() 1939 .set_one_reg( 1940 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1941 &kvm_regs_state.regs.t2.to_le_bytes(), 1942 ) 1943 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1944 1945 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s0); 1946 self.fd 1947 .lock() 1948 .unwrap() 1949 .set_one_reg( 1950 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1951 &kvm_regs_state.regs.s0.to_le_bytes(), 1952 ) 1953 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1954 1955 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s1); 1956 self.fd 1957 .lock() 1958 .unwrap() 1959 .set_one_reg( 1960 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1961 &kvm_regs_state.regs.s1.to_le_bytes(), 1962 ) 1963 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1964 1965 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, a0); 1966 self.fd 1967 .lock() 1968 .unwrap() 1969 .set_one_reg( 1970 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1971 &kvm_regs_state.regs.a0.to_le_bytes(), 1972 ) 1973 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1974 1975 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, a1); 1976 self.fd 1977 .lock() 1978 .unwrap() 1979 .set_one_reg( 1980 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1981 &kvm_regs_state.regs.a1.to_le_bytes(), 1982 ) 1983 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1984 1985 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, a2); 1986 self.fd 1987 .lock() 1988 .unwrap() 1989 .set_one_reg( 1990 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1991 &kvm_regs_state.regs.a2.to_le_bytes(), 1992 ) 1993 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1994 1995 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, a3); 1996 self.fd 1997 .lock() 1998 .unwrap() 1999 .set_one_reg( 2000 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 2001 &kvm_regs_state.regs.a3.to_le_bytes(), 2002 ) 2003 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 2004 2005 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, a4); 2006 self.fd 2007 .lock() 2008 .unwrap() 2009 .set_one_reg( 2010 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 2011 &kvm_regs_state.regs.a4.to_le_bytes(), 2012 ) 2013 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 2014 2015 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, a5); 2016 self.fd 2017 .lock() 2018 .unwrap() 2019 .set_one_reg( 2020 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 2021 &kvm_regs_state.regs.a5.to_le_bytes(), 2022 ) 2023 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 2024 2025 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, a6); 2026 self.fd 2027 .lock() 2028 .unwrap() 2029 .set_one_reg( 2030 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 2031 &kvm_regs_state.regs.a6.to_le_bytes(), 2032 ) 2033 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 2034 2035 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, a7); 2036 self.fd 2037 .lock() 2038 .unwrap() 2039 .set_one_reg( 2040 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 2041 &kvm_regs_state.regs.a7.to_le_bytes(), 2042 ) 2043 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 2044 2045 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s2); 2046 self.fd 2047 .lock() 2048 .unwrap() 2049 .set_one_reg( 2050 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 2051 &kvm_regs_state.regs.s2.to_le_bytes(), 2052 ) 2053 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 2054 2055 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s3); 2056 self.fd 2057 .lock() 2058 .unwrap() 2059 .set_one_reg( 2060 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 2061 &kvm_regs_state.regs.s3.to_le_bytes(), 2062 ) 2063 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 2064 2065 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s4); 2066 self.fd 2067 .lock() 2068 .unwrap() 2069 .set_one_reg( 2070 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 2071 &kvm_regs_state.regs.s4.to_le_bytes(), 2072 ) 2073 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 2074 2075 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s5); 2076 self.fd 2077 .lock() 2078 .unwrap() 2079 .set_one_reg( 2080 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 2081 &kvm_regs_state.regs.s5.to_le_bytes(), 2082 ) 2083 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 2084 2085 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s6); 2086 self.fd 2087 .lock() 2088 .unwrap() 2089 .set_one_reg( 2090 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 2091 &kvm_regs_state.regs.s6.to_le_bytes(), 2092 ) 2093 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 2094 2095 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s7); 2096 self.fd 2097 .lock() 2098 .unwrap() 2099 .set_one_reg( 2100 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 2101 &kvm_regs_state.regs.s7.to_le_bytes(), 2102 ) 2103 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 2104 2105 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s8); 2106 self.fd 2107 .lock() 2108 .unwrap() 2109 .set_one_reg( 2110 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 2111 &kvm_regs_state.regs.s8.to_le_bytes(), 2112 ) 2113 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 2114 2115 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s9); 2116 self.fd 2117 .lock() 2118 .unwrap() 2119 .set_one_reg( 2120 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 2121 &kvm_regs_state.regs.s9.to_le_bytes(), 2122 ) 2123 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 2124 2125 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s10); 2126 self.fd 2127 .lock() 2128 .unwrap() 2129 .set_one_reg( 2130 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 2131 &kvm_regs_state.regs.s10.to_le_bytes(), 2132 ) 2133 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 2134 2135 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s11); 2136 self.fd 2137 .lock() 2138 .unwrap() 2139 .set_one_reg( 2140 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 2141 &kvm_regs_state.regs.s11.to_le_bytes(), 2142 ) 2143 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 2144 2145 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, t3); 2146 self.fd 2147 .lock() 2148 .unwrap() 2149 .set_one_reg( 2150 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 2151 &kvm_regs_state.regs.t3.to_le_bytes(), 2152 ) 2153 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 2154 2155 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, t4); 2156 self.fd 2157 .lock() 2158 .unwrap() 2159 .set_one_reg( 2160 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 2161 &kvm_regs_state.regs.t4.to_le_bytes(), 2162 ) 2163 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 2164 2165 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, t5); 2166 self.fd 2167 .lock() 2168 .unwrap() 2169 .set_one_reg( 2170 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 2171 &kvm_regs_state.regs.t5.to_le_bytes(), 2172 ) 2173 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 2174 2175 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, t6); 2176 self.fd 2177 .lock() 2178 .unwrap() 2179 .set_one_reg( 2180 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 2181 &kvm_regs_state.regs.t6.to_le_bytes(), 2182 ) 2183 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 2184 2185 let off = offset_of!(kvm_riscv_core, mode); 2186 self.fd 2187 .lock() 2188 .unwrap() 2189 .set_one_reg( 2190 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 2191 &kvm_regs_state.mode.to_le_bytes(), 2192 ) 2193 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 2194 2195 Ok(()) 2196 } 2197 2198 #[cfg(target_arch = "x86_64")] 2199 /// 2200 /// Returns the vCPU special registers. 2201 /// 2202 fn get_sregs(&self) -> cpu::Result<SpecialRegisters> { 2203 Ok(self 2204 .fd 2205 .lock() 2206 .unwrap() 2207 .get_sregs() 2208 .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into()))? 2209 .into()) 2210 } 2211 2212 #[cfg(target_arch = "x86_64")] 2213 /// 2214 /// Sets the vCPU special registers using the `KVM_SET_SREGS` ioctl. 2215 /// 2216 fn set_sregs(&self, sregs: &SpecialRegisters) -> cpu::Result<()> { 2217 let sregs = (*sregs).into(); 2218 self.fd 2219 .lock() 2220 .unwrap() 2221 .set_sregs(&sregs) 2222 .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into())) 2223 } 2224 2225 #[cfg(target_arch = "x86_64")] 2226 /// 2227 /// Returns the floating point state (FPU) from the vCPU. 2228 /// 2229 fn get_fpu(&self) -> cpu::Result<FpuState> { 2230 Ok(self 2231 .fd 2232 .lock() 2233 .unwrap() 2234 .get_fpu() 2235 .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into()))? 2236 .into()) 2237 } 2238 2239 #[cfg(target_arch = "x86_64")] 2240 /// 2241 /// Set the floating point state (FPU) of a vCPU using the `KVM_SET_FPU` ioctl. 2242 /// 2243 fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> { 2244 let fpu: kvm_bindings::kvm_fpu = (*fpu).clone().into(); 2245 self.fd 2246 .lock() 2247 .unwrap() 2248 .set_fpu(&fpu) 2249 .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into())) 2250 } 2251 2252 #[cfg(target_arch = "x86_64")] 2253 /// 2254 /// X86 specific call to setup the CPUID registers. 2255 /// 2256 fn set_cpuid2(&self, cpuid: &[CpuIdEntry]) -> cpu::Result<()> { 2257 let cpuid: Vec<kvm_bindings::kvm_cpuid_entry2> = 2258 cpuid.iter().map(|e| (*e).into()).collect(); 2259 let kvm_cpuid = <CpuId>::from_entries(&cpuid) 2260 .map_err(|_| cpu::HypervisorCpuError::SetCpuid(anyhow!("failed to create CpuId")))?; 2261 2262 self.fd 2263 .lock() 2264 .unwrap() 2265 .set_cpuid2(&kvm_cpuid) 2266 .map_err(|e| cpu::HypervisorCpuError::SetCpuid(e.into())) 2267 } 2268 2269 #[cfg(target_arch = "x86_64")] 2270 /// 2271 /// X86 specific call to enable HyperV SynIC 2272 /// 2273 fn enable_hyperv_synic(&self) -> cpu::Result<()> { 2274 // Update the information about Hyper-V SynIC being enabled and 2275 // emulated as it will influence later which MSRs should be saved. 2276 self.hyperv_synic.store(true, Ordering::Release); 2277 2278 let cap = kvm_enable_cap { 2279 cap: KVM_CAP_HYPERV_SYNIC, 2280 ..Default::default() 2281 }; 2282 self.fd 2283 .lock() 2284 .unwrap() 2285 .enable_cap(&cap) 2286 .map_err(|e| cpu::HypervisorCpuError::EnableHyperVSyncIc(e.into())) 2287 } 2288 2289 /// 2290 /// X86 specific call to retrieve the CPUID registers. 2291 /// 2292 #[cfg(target_arch = "x86_64")] 2293 fn get_cpuid2(&self, num_entries: usize) -> cpu::Result<Vec<CpuIdEntry>> { 2294 let kvm_cpuid = self 2295 .fd 2296 .lock() 2297 .unwrap() 2298 .get_cpuid2(num_entries) 2299 .map_err(|e| cpu::HypervisorCpuError::GetCpuid(e.into()))?; 2300 2301 let v = kvm_cpuid.as_slice().iter().map(|e| (*e).into()).collect(); 2302 2303 Ok(v) 2304 } 2305 2306 #[cfg(target_arch = "x86_64")] 2307 /// 2308 /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 2309 /// 2310 fn get_lapic(&self) -> cpu::Result<LapicState> { 2311 Ok(self 2312 .fd 2313 .lock() 2314 .unwrap() 2315 .get_lapic() 2316 .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into()))? 2317 .into()) 2318 } 2319 2320 #[cfg(target_arch = "x86_64")] 2321 /// 2322 /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 2323 /// 2324 fn set_lapic(&self, klapic: &LapicState) -> cpu::Result<()> { 2325 let klapic: kvm_bindings::kvm_lapic_state = (*klapic).clone().into(); 2326 self.fd 2327 .lock() 2328 .unwrap() 2329 .set_lapic(&klapic) 2330 .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into())) 2331 } 2332 2333 #[cfg(target_arch = "x86_64")] 2334 /// 2335 /// Returns the model-specific registers (MSR) for this vCPU. 2336 /// 2337 fn get_msrs(&self, msrs: &mut Vec<MsrEntry>) -> cpu::Result<usize> { 2338 let kvm_msrs: Vec<kvm_msr_entry> = msrs.iter().map(|e| (*e).into()).collect(); 2339 let mut kvm_msrs = MsrEntries::from_entries(&kvm_msrs).unwrap(); 2340 let succ = self 2341 .fd 2342 .lock() 2343 .unwrap() 2344 .get_msrs(&mut kvm_msrs) 2345 .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into()))?; 2346 2347 msrs[..succ].copy_from_slice( 2348 &kvm_msrs.as_slice()[..succ] 2349 .iter() 2350 .map(|e| (*e).into()) 2351 .collect::<Vec<MsrEntry>>(), 2352 ); 2353 2354 Ok(succ) 2355 } 2356 2357 #[cfg(target_arch = "x86_64")] 2358 /// 2359 /// Setup the model-specific registers (MSR) for this vCPU. 2360 /// Returns the number of MSR entries actually written. 2361 /// 2362 fn set_msrs(&self, msrs: &[MsrEntry]) -> cpu::Result<usize> { 2363 let kvm_msrs: Vec<kvm_msr_entry> = msrs.iter().map(|e| (*e).into()).collect(); 2364 let kvm_msrs = MsrEntries::from_entries(&kvm_msrs).unwrap(); 2365 self.fd 2366 .lock() 2367 .unwrap() 2368 .set_msrs(&kvm_msrs) 2369 .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into())) 2370 } 2371 2372 /// 2373 /// Returns the vcpu's current "multiprocessing state". 2374 /// 2375 fn get_mp_state(&self) -> cpu::Result<MpState> { 2376 Ok(self 2377 .fd 2378 .lock() 2379 .unwrap() 2380 .get_mp_state() 2381 .map_err(|e| cpu::HypervisorCpuError::GetMpState(e.into()))? 2382 .into()) 2383 } 2384 2385 /// 2386 /// Sets the vcpu's current "multiprocessing state". 2387 /// 2388 fn set_mp_state(&self, mp_state: MpState) -> cpu::Result<()> { 2389 self.fd 2390 .lock() 2391 .unwrap() 2392 .set_mp_state(mp_state.into()) 2393 .map_err(|e| cpu::HypervisorCpuError::SetMpState(e.into())) 2394 } 2395 2396 #[cfg(target_arch = "x86_64")] 2397 /// 2398 /// Translates guest virtual address to guest physical address using the `KVM_TRANSLATE` ioctl. 2399 /// 2400 fn translate_gva(&self, gva: u64, _flags: u64) -> cpu::Result<(u64, u32)> { 2401 let tr = self 2402 .fd 2403 .lock() 2404 .unwrap() 2405 .translate_gva(gva) 2406 .map_err(|e| cpu::HypervisorCpuError::TranslateVirtualAddress(e.into()))?; 2407 // tr.valid is set if the GVA is mapped to valid GPA. 2408 match tr.valid { 2409 0 => Err(cpu::HypervisorCpuError::TranslateVirtualAddress(anyhow!( 2410 "Invalid GVA: {:#x}", 2411 gva 2412 ))), 2413 _ => Ok((tr.physical_address, 0)), 2414 } 2415 } 2416 2417 /// 2418 /// Triggers the running of the current virtual CPU returning an exit reason. 2419 /// 2420 fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> { 2421 match self.fd.lock().unwrap().run() { 2422 Ok(run) => match run { 2423 #[cfg(target_arch = "x86_64")] 2424 VcpuExit::IoIn(addr, data) => { 2425 if let Some(vm_ops) = &self.vm_ops { 2426 return vm_ops 2427 .pio_read(addr.into(), data) 2428 .map(|_| cpu::VmExit::Ignore) 2429 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 2430 } 2431 2432 Ok(cpu::VmExit::Ignore) 2433 } 2434 #[cfg(target_arch = "x86_64")] 2435 VcpuExit::IoOut(addr, data) => { 2436 if let Some(vm_ops) = &self.vm_ops { 2437 return vm_ops 2438 .pio_write(addr.into(), data) 2439 .map(|_| cpu::VmExit::Ignore) 2440 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 2441 } 2442 2443 Ok(cpu::VmExit::Ignore) 2444 } 2445 #[cfg(target_arch = "x86_64")] 2446 VcpuExit::IoapicEoi(vector) => Ok(cpu::VmExit::IoapicEoi(vector)), 2447 #[cfg(target_arch = "x86_64")] 2448 VcpuExit::Shutdown | VcpuExit::Hlt => Ok(cpu::VmExit::Reset), 2449 2450 #[cfg(target_arch = "aarch64")] 2451 VcpuExit::SystemEvent(event_type, flags) => { 2452 use kvm_bindings::{KVM_SYSTEM_EVENT_RESET, KVM_SYSTEM_EVENT_SHUTDOWN}; 2453 // On Aarch64, when the VM is shutdown, run() returns 2454 // VcpuExit::SystemEvent with reason KVM_SYSTEM_EVENT_SHUTDOWN 2455 if event_type == KVM_SYSTEM_EVENT_RESET { 2456 Ok(cpu::VmExit::Reset) 2457 } else if event_type == KVM_SYSTEM_EVENT_SHUTDOWN { 2458 Ok(cpu::VmExit::Shutdown) 2459 } else { 2460 Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 2461 "Unexpected system event with type 0x{:x}, flags 0x{:x?}", 2462 event_type, 2463 flags 2464 ))) 2465 } 2466 } 2467 2468 VcpuExit::MmioRead(addr, data) => { 2469 if let Some(vm_ops) = &self.vm_ops { 2470 return vm_ops 2471 .mmio_read(addr, data) 2472 .map(|_| cpu::VmExit::Ignore) 2473 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 2474 } 2475 2476 Ok(cpu::VmExit::Ignore) 2477 } 2478 VcpuExit::MmioWrite(addr, data) => { 2479 if let Some(vm_ops) = &self.vm_ops { 2480 return vm_ops 2481 .mmio_write(addr, data) 2482 .map(|_| cpu::VmExit::Ignore) 2483 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 2484 } 2485 2486 Ok(cpu::VmExit::Ignore) 2487 } 2488 VcpuExit::Hyperv => Ok(cpu::VmExit::Hyperv), 2489 #[cfg(feature = "tdx")] 2490 VcpuExit::Unsupported(KVM_EXIT_TDX) => Ok(cpu::VmExit::Tdx), 2491 VcpuExit::Debug(_) => Ok(cpu::VmExit::Debug), 2492 2493 r => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 2494 "Unexpected exit reason on vcpu run: {:?}", 2495 r 2496 ))), 2497 }, 2498 2499 Err(ref e) => match e.errno() { 2500 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore), 2501 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 2502 "VCPU error {:?}", 2503 e 2504 ))), 2505 }, 2506 } 2507 } 2508 2509 #[cfg(target_arch = "x86_64")] 2510 /// 2511 /// Let the guest know that it has been paused, which prevents from 2512 /// potential soft lockups when being resumed. 2513 /// 2514 fn notify_guest_clock_paused(&self) -> cpu::Result<()> { 2515 if let Err(e) = self.fd.lock().unwrap().kvmclock_ctrl() { 2516 // Linux kernel returns -EINVAL if the PV clock isn't yet initialised 2517 // which could be because we're still in firmware or the guest doesn't 2518 // use KVM clock. 2519 if e.errno() != libc::EINVAL { 2520 return Err(cpu::HypervisorCpuError::NotifyGuestClockPaused(e.into())); 2521 } 2522 } 2523 2524 Ok(()) 2525 } 2526 2527 #[cfg(not(target_arch = "riscv64"))] 2528 /// 2529 /// Sets debug registers to set hardware breakpoints and/or enable single step. 2530 /// 2531 fn set_guest_debug( 2532 &self, 2533 addrs: &[vm_memory::GuestAddress], 2534 singlestep: bool, 2535 ) -> cpu::Result<()> { 2536 let mut dbg = kvm_guest_debug { 2537 #[cfg(target_arch = "x86_64")] 2538 control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP, 2539 #[cfg(target_arch = "aarch64")] 2540 control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW, 2541 ..Default::default() 2542 }; 2543 if singlestep { 2544 dbg.control |= KVM_GUESTDBG_SINGLESTEP; 2545 } 2546 2547 // Set the debug registers. 2548 // Here we assume that the number of addresses do not exceed what 2549 // `Hypervisor::get_guest_debug_hw_bps()` specifies. 2550 #[cfg(target_arch = "x86_64")] 2551 { 2552 // Set bits 9 and 10. 2553 // bit 9: GE (global exact breakpoint enable) flag. 2554 // bit 10: always 1. 2555 dbg.arch.debugreg[7] = 0x0600; 2556 2557 for (i, addr) in addrs.iter().enumerate() { 2558 dbg.arch.debugreg[i] = addr.0; 2559 // Set global breakpoint enable flag 2560 dbg.arch.debugreg[7] |= 2 << (i * 2); 2561 } 2562 } 2563 #[cfg(target_arch = "aarch64")] 2564 { 2565 for (i, addr) in addrs.iter().enumerate() { 2566 // DBGBCR_EL1 (Debug Breakpoint Control Registers, D13.3.2): 2567 // bit 0: 1 (Enabled) 2568 // bit 1~2: 0b11 (PMC = EL1/EL0) 2569 // bit 5~8: 0b1111 (BAS = AArch64) 2570 // others: 0 2571 dbg.arch.dbg_bcr[i] = 0b1u64 | 0b110u64 | 0b1_1110_0000u64; 2572 // DBGBVR_EL1 (Debug Breakpoint Value Registers, D13.3.3): 2573 // bit 2~52: VA[2:52] 2574 dbg.arch.dbg_bvr[i] = (!0u64 >> 11) & addr.0; 2575 } 2576 } 2577 self.fd 2578 .lock() 2579 .unwrap() 2580 .set_guest_debug(&dbg) 2581 .map_err(|e| cpu::HypervisorCpuError::SetDebugRegs(e.into())) 2582 } 2583 2584 #[cfg(target_arch = "aarch64")] 2585 fn vcpu_init(&self, kvi: &VcpuInit) -> cpu::Result<()> { 2586 self.fd 2587 .lock() 2588 .unwrap() 2589 .vcpu_init(kvi) 2590 .map_err(|e| cpu::HypervisorCpuError::VcpuInit(e.into())) 2591 } 2592 2593 #[cfg(target_arch = "aarch64")] 2594 fn vcpu_finalize(&self, feature: i32) -> cpu::Result<()> { 2595 self.fd 2596 .lock() 2597 .unwrap() 2598 .vcpu_finalize(&feature) 2599 .map_err(|e| cpu::HypervisorCpuError::VcpuFinalize(e.into())) 2600 } 2601 2602 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] 2603 /// 2604 /// Gets a list of the guest registers that are supported for the 2605 /// KVM_GET_ONE_REG/KVM_SET_ONE_REG calls. 2606 /// 2607 fn get_reg_list(&self, reg_list: &mut RegList) -> cpu::Result<()> { 2608 self.fd 2609 .lock() 2610 .unwrap() 2611 .get_reg_list(reg_list) 2612 .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into())) 2613 } 2614 2615 /// 2616 /// Gets the value of a system register 2617 /// 2618 #[cfg(target_arch = "aarch64")] 2619 fn get_sys_reg(&self, sys_reg: u32) -> cpu::Result<u64> { 2620 // 2621 // Arm Architecture Reference Manual defines the encoding of 2622 // AArch64 system registers, see 2623 // https://developer.arm.com/documentation/ddi0487 (chapter D12). 2624 // While KVM defines another ID for each AArch64 system register, 2625 // which is used in calling `KVM_G/SET_ONE_REG` to access a system 2626 // register of a guest. 2627 // A mapping exists between the Arm standard encoding and the KVM ID. 2628 // This function takes the standard u32 ID as input parameter, converts 2629 // it to the corresponding KVM ID, and call `KVM_GET_ONE_REG` API to 2630 // get the value of the system parameter. 2631 // 2632 let id: u64 = KVM_REG_ARM64 2633 | KVM_REG_SIZE_U64 2634 | KVM_REG_ARM64_SYSREG as u64 2635 | ((((sys_reg) >> 5) 2636 & (KVM_REG_ARM64_SYSREG_OP0_MASK 2637 | KVM_REG_ARM64_SYSREG_OP1_MASK 2638 | KVM_REG_ARM64_SYSREG_CRN_MASK 2639 | KVM_REG_ARM64_SYSREG_CRM_MASK 2640 | KVM_REG_ARM64_SYSREG_OP2_MASK)) as u64); 2641 let mut bytes = [0_u8; 8]; 2642 self.fd 2643 .lock() 2644 .unwrap() 2645 .get_one_reg(id, &mut bytes) 2646 .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?; 2647 Ok(u64::from_le_bytes(bytes)) 2648 } 2649 2650 /// 2651 /// Gets the value of a non-core register 2652 /// 2653 #[cfg(target_arch = "riscv64")] 2654 fn get_non_core_reg(&self, _non_core_reg: u32) -> cpu::Result<u64> { 2655 unimplemented!() 2656 } 2657 2658 /// 2659 /// Configure core registers for a given CPU. 2660 /// 2661 #[cfg(target_arch = "aarch64")] 2662 fn setup_regs(&self, cpu_id: u8, boot_ip: u64, fdt_start: u64) -> cpu::Result<()> { 2663 #[allow(non_upper_case_globals)] 2664 // PSR (Processor State Register) bits. 2665 // Taken from arch/arm64/include/uapi/asm/ptrace.h. 2666 const PSR_MODE_EL1h: u64 = 0x0000_0005; 2667 const PSR_F_BIT: u64 = 0x0000_0040; 2668 const PSR_I_BIT: u64 = 0x0000_0080; 2669 const PSR_A_BIT: u64 = 0x0000_0100; 2670 const PSR_D_BIT: u64 = 0x0000_0200; 2671 // Taken from arch/arm64/kvm/inject_fault.c. 2672 const PSTATE_FAULT_BITS_64: u64 = 2673 PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | PSR_I_BIT | PSR_D_BIT; 2674 2675 let kreg_off = offset_of!(kvm_regs, regs); 2676 2677 // Get the register index of the PSTATE (Processor State) register. 2678 let pstate = offset_of!(user_pt_regs, pstate) + kreg_off; 2679 self.fd 2680 .lock() 2681 .unwrap() 2682 .set_one_reg( 2683 arm64_core_reg_id!(KVM_REG_SIZE_U64, pstate), 2684 &PSTATE_FAULT_BITS_64.to_le_bytes(), 2685 ) 2686 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 2687 2688 // Other vCPUs are powered off initially awaiting PSCI wakeup. 2689 if cpu_id == 0 { 2690 // Setting the PC (Processor Counter) to the current program address (kernel address). 2691 let pc = offset_of!(user_pt_regs, pc) + kreg_off; 2692 self.fd 2693 .lock() 2694 .unwrap() 2695 .set_one_reg( 2696 arm64_core_reg_id!(KVM_REG_SIZE_U64, pc), 2697 &boot_ip.to_le_bytes(), 2698 ) 2699 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 2700 2701 // Last mandatory thing to set -> the address pointing to the FDT (also called DTB). 2702 // "The device tree blob (dtb) must be placed on an 8-byte boundary and must 2703 // not exceed 2 megabytes in size." -> https://www.kernel.org/doc/Documentation/arm64/booting.txt. 2704 // We are choosing to place it the end of DRAM. See `get_fdt_addr`. 2705 let regs0 = offset_of!(user_pt_regs, regs) + kreg_off; 2706 self.fd 2707 .lock() 2708 .unwrap() 2709 .set_one_reg( 2710 arm64_core_reg_id!(KVM_REG_SIZE_U64, regs0), 2711 &fdt_start.to_le_bytes(), 2712 ) 2713 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 2714 } 2715 Ok(()) 2716 } 2717 2718 #[cfg(target_arch = "riscv64")] 2719 /// 2720 /// Configure registers for a given RISC-V CPU. 2721 /// 2722 fn setup_regs(&self, cpu_id: u8, boot_ip: u64, fdt_start: u64) -> cpu::Result<()> { 2723 // Setting the A0 () to the hartid of this CPU. 2724 let a0 = offset_of!(kvm_riscv_core, regs, user_regs_struct, a0); 2725 self.fd 2726 .lock() 2727 .unwrap() 2728 .set_one_reg( 2729 riscv64_reg_id!(KVM_REG_RISCV_CORE, a0), 2730 &u64::from(cpu_id).to_le_bytes(), 2731 ) 2732 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 2733 2734 // Setting the PC (Processor Counter) to the current program address (kernel address). 2735 let pc = offset_of!(kvm_riscv_core, regs, user_regs_struct, pc); 2736 self.fd 2737 .lock() 2738 .unwrap() 2739 .set_one_reg( 2740 riscv64_reg_id!(KVM_REG_RISCV_CORE, pc), 2741 &boot_ip.to_le_bytes(), 2742 ) 2743 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 2744 2745 // Last mandatory thing to set -> the address pointing to the FDT (also called DTB). 2746 // "The device tree blob (dtb) must be placed on an 8-byte boundary and must 2747 // not exceed 64 kilobytes in size." -> https://www.kernel.org/doc/Documentation/arch/riscv/boot.txt. 2748 let a1 = offset_of!(kvm_riscv_core, regs, user_regs_struct, a1); 2749 self.fd 2750 .lock() 2751 .unwrap() 2752 .set_one_reg( 2753 riscv64_reg_id!(KVM_REG_RISCV_CORE, a1), 2754 &fdt_start.to_le_bytes(), 2755 ) 2756 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 2757 2758 Ok(()) 2759 } 2760 2761 #[cfg(target_arch = "x86_64")] 2762 /// 2763 /// Get the current CPU state 2764 /// 2765 /// Ordering requirements: 2766 /// 2767 /// KVM_GET_MP_STATE calls kvm_apic_accept_events(), which might modify 2768 /// vCPU/LAPIC state. As such, it must be done before most everything 2769 /// else, otherwise we cannot restore everything and expect it to work. 2770 /// 2771 /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are 2772 /// still running. 2773 /// 2774 /// KVM_GET_LAPIC may change state of LAPIC before returning it. 2775 /// 2776 /// GET_VCPU_EVENTS should probably be last to save. The code looks as 2777 /// it might as well be affected by internal state modifications of the 2778 /// GET ioctls. 2779 /// 2780 /// SREGS saves/restores a pending interrupt, similar to what 2781 /// VCPU_EVENTS also does. 2782 /// 2783 /// GET_MSRS requires a prepopulated data structure to do something 2784 /// meaningful. For SET_MSRS it will then contain good data. 2785 /// 2786 /// # Example 2787 /// 2788 /// ```rust 2789 /// # use hypervisor::kvm::KvmHypervisor; 2790 /// # use std::sync::Arc; 2791 /// let kvm = KvmHypervisor::new().unwrap(); 2792 /// let hv = Arc::new(kvm); 2793 /// let vm = hv.create_vm().expect("new VM fd creation failed"); 2794 /// vm.enable_split_irq().unwrap(); 2795 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 2796 /// let state = vcpu.state().unwrap(); 2797 /// ``` 2798 fn state(&self) -> cpu::Result<CpuState> { 2799 let cpuid = self.get_cpuid2(kvm_bindings::KVM_MAX_CPUID_ENTRIES)?; 2800 let mp_state = self.get_mp_state()?.into(); 2801 let regs = self.get_regs()?; 2802 let sregs = self.get_sregs()?; 2803 let xsave = self.get_xsave()?; 2804 let xcrs = self.get_xcrs()?; 2805 let lapic_state = self.get_lapic()?; 2806 let fpu = self.get_fpu()?; 2807 2808 // Try to get all MSRs based on the list previously retrieved from KVM. 2809 // If the number of MSRs obtained from GET_MSRS is different from the 2810 // expected amount, we fallback onto a slower method by getting MSRs 2811 // by chunks. This is the only way to make sure we try to get as many 2812 // MSRs as possible, even if some MSRs are not supported. 2813 let mut msr_entries = self.msrs.clone(); 2814 2815 // Save extra MSRs if the Hyper-V synthetic interrupt controller is 2816 // emulated. 2817 if self.hyperv_synic.load(Ordering::Acquire) { 2818 let hyperv_synic_msrs = vec![ 2819 0x40000020, 0x40000021, 0x40000080, 0x40000081, 0x40000082, 0x40000083, 0x40000084, 2820 0x40000090, 0x40000091, 0x40000092, 0x40000093, 0x40000094, 0x40000095, 0x40000096, 2821 0x40000097, 0x40000098, 0x40000099, 0x4000009a, 0x4000009b, 0x4000009c, 0x4000009d, 2822 0x4000009e, 0x4000009f, 0x400000b0, 0x400000b1, 0x400000b2, 0x400000b3, 0x400000b4, 2823 0x400000b5, 0x400000b6, 0x400000b7, 2824 ]; 2825 for index in hyperv_synic_msrs { 2826 let msr = kvm_msr_entry { 2827 index, 2828 ..Default::default() 2829 }; 2830 msr_entries.push(msr.into()); 2831 } 2832 } 2833 2834 let expected_num_msrs = msr_entries.len(); 2835 let num_msrs = self.get_msrs(&mut msr_entries)?; 2836 let msrs = if num_msrs != expected_num_msrs { 2837 let mut faulty_msr_index = num_msrs; 2838 let mut msr_entries_tmp = msr_entries[..faulty_msr_index].to_vec(); 2839 2840 loop { 2841 warn!( 2842 "Detected faulty MSR 0x{:x} while getting MSRs", 2843 msr_entries[faulty_msr_index].index 2844 ); 2845 2846 // Skip the first bad MSR 2847 let start_pos = faulty_msr_index + 1; 2848 2849 let mut sub_msr_entries = msr_entries[start_pos..].to_vec(); 2850 let num_msrs = self.get_msrs(&mut sub_msr_entries)?; 2851 2852 msr_entries_tmp.extend(&sub_msr_entries[..num_msrs]); 2853 2854 if num_msrs == sub_msr_entries.len() { 2855 break; 2856 } 2857 2858 faulty_msr_index = start_pos + num_msrs; 2859 } 2860 2861 msr_entries_tmp 2862 } else { 2863 msr_entries 2864 }; 2865 2866 let vcpu_events = self.get_vcpu_events()?; 2867 let tsc_khz = self.tsc_khz()?; 2868 2869 Ok(VcpuKvmState { 2870 cpuid, 2871 msrs, 2872 vcpu_events, 2873 regs: regs.into(), 2874 sregs: sregs.into(), 2875 fpu, 2876 lapic_state, 2877 xsave, 2878 xcrs, 2879 mp_state, 2880 tsc_khz, 2881 } 2882 .into()) 2883 } 2884 2885 /// 2886 /// Get the current AArch64 CPU state 2887 /// 2888 #[cfg(target_arch = "aarch64")] 2889 fn state(&self) -> cpu::Result<CpuState> { 2890 let mut state = VcpuKvmState { 2891 mp_state: self.get_mp_state()?.into(), 2892 ..Default::default() 2893 }; 2894 // Get core registers 2895 state.core_regs = self.get_regs()?.into(); 2896 2897 // Get systerm register 2898 // Call KVM_GET_REG_LIST to get all registers available to the guest. 2899 // For ArmV8 there are around 500 registers. 2900 let mut sys_regs: Vec<Register> = Vec::new(); 2901 let mut reg_list = RegList::new(500).unwrap(); 2902 self.fd 2903 .lock() 2904 .unwrap() 2905 .get_reg_list(&mut reg_list) 2906 .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into()))?; 2907 2908 // At this point reg_list should contain: core registers and system 2909 // registers. 2910 // The register list contains the number of registers and their ids. We 2911 // will be needing to call KVM_GET_ONE_REG on each id in order to save 2912 // all of them. We carve out from the list the core registers which are 2913 // represented in the kernel by kvm_regs structure and for which we can 2914 // calculate the id based on the offset in the structure. 2915 reg_list.retain(|regid| is_system_register(*regid)); 2916 2917 // Now, for the rest of the registers left in the previously fetched 2918 // register list, we are simply calling KVM_GET_ONE_REG. 2919 let indices = reg_list.as_slice(); 2920 for index in indices.iter() { 2921 let mut bytes = [0_u8; 8]; 2922 self.fd 2923 .lock() 2924 .unwrap() 2925 .get_one_reg(*index, &mut bytes) 2926 .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?; 2927 sys_regs.push(kvm_bindings::kvm_one_reg { 2928 id: *index, 2929 addr: u64::from_le_bytes(bytes), 2930 }); 2931 } 2932 2933 state.sys_regs = sys_regs; 2934 2935 Ok(state.into()) 2936 } 2937 2938 #[cfg(target_arch = "riscv64")] 2939 /// 2940 /// Get the current RISC-V 64-bit CPU state 2941 /// 2942 fn state(&self) -> cpu::Result<CpuState> { 2943 let mut state = VcpuKvmState { 2944 mp_state: self.get_mp_state()?.into(), 2945 ..Default::default() 2946 }; 2947 // Get core registers 2948 state.core_regs = self.get_regs()?.into(); 2949 2950 // Get non-core register 2951 // Call KVM_GET_REG_LIST to get all registers available to the guest. 2952 // For RISC-V 64-bit there are around 200 registers. 2953 let mut sys_regs: Vec<Register> = Vec::new(); 2954 let mut reg_list = RegList::new(200).unwrap(); 2955 self.fd 2956 .lock() 2957 .unwrap() 2958 .get_reg_list(&mut reg_list) 2959 .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into()))?; 2960 2961 // At this point reg_list should contain: 2962 // - core registers 2963 // - config registers 2964 // - timer registers 2965 // - control and status registers 2966 // - AIA control and status registers 2967 // - smstateen control and status registers 2968 // - sbi_sta control and status registers. 2969 // 2970 // The register list contains the number of registers and their ids. We 2971 // will be needing to call KVM_GET_ONE_REG on each id in order to save 2972 // all of them. We carve out from the list the core registers which are 2973 // represented in the kernel by `kvm_riscv_core` structure and for which 2974 // we can calculate the id based on the offset in the structure. 2975 reg_list.retain(|regid| is_non_core_register(*regid)); 2976 2977 // Now, for the rest of the registers left in the previously fetched 2978 // register list, we are simply calling KVM_GET_ONE_REG. 2979 let indices = reg_list.as_slice(); 2980 for index in indices.iter() { 2981 let mut bytes = [0_u8; 8]; 2982 self.fd 2983 .lock() 2984 .unwrap() 2985 .get_one_reg(*index, &mut bytes) 2986 .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?; 2987 sys_regs.push(kvm_bindings::kvm_one_reg { 2988 id: *index, 2989 addr: u64::from_le_bytes(bytes), 2990 }); 2991 } 2992 2993 state.non_core_regs = sys_regs; 2994 2995 Ok(state.into()) 2996 } 2997 2998 #[cfg(target_arch = "x86_64")] 2999 /// 3000 /// Restore the previously saved CPU state 3001 /// 3002 /// Ordering requirements: 3003 /// 3004 /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are 3005 /// still running. 3006 /// 3007 /// Some SET ioctls (like set_mp_state) depend on kvm_vcpu_is_bsp(), so 3008 /// if we ever change the BSP, we have to do that before restoring anything. 3009 /// The same seems to be true for CPUID stuff. 3010 /// 3011 /// SREGS saves/restores a pending interrupt, similar to what 3012 /// VCPU_EVENTS also does. 3013 /// 3014 /// SET_REGS clears pending exceptions unconditionally, thus, it must be 3015 /// done before SET_VCPU_EVENTS, which restores it. 3016 /// 3017 /// SET_LAPIC must come after SET_SREGS, because the latter restores 3018 /// the apic base msr. 3019 /// 3020 /// SET_LAPIC must come before SET_MSRS, because the TSC deadline MSR 3021 /// only restores successfully, when the LAPIC is correctly configured. 3022 /// 3023 /// Arguments: CpuState 3024 /// # Example 3025 /// 3026 /// ```rust 3027 /// # use hypervisor::kvm::KvmHypervisor; 3028 /// # use std::sync::Arc; 3029 /// let kvm = KvmHypervisor::new().unwrap(); 3030 /// let hv = Arc::new(kvm); 3031 /// let vm = hv.create_vm().expect("new VM fd creation failed"); 3032 /// vm.enable_split_irq().unwrap(); 3033 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 3034 /// let state = vcpu.state().unwrap(); 3035 /// vcpu.set_state(&state).unwrap(); 3036 /// ``` 3037 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 3038 let state: VcpuKvmState = state.clone().into(); 3039 self.set_cpuid2(&state.cpuid)?; 3040 self.set_mp_state(state.mp_state.into())?; 3041 self.set_regs(&state.regs.into())?; 3042 self.set_sregs(&state.sregs.into())?; 3043 self.set_xsave(&state.xsave)?; 3044 self.set_xcrs(&state.xcrs)?; 3045 self.set_lapic(&state.lapic_state)?; 3046 self.set_fpu(&state.fpu)?; 3047 3048 if let Some(freq) = state.tsc_khz { 3049 self.set_tsc_khz(freq)?; 3050 } 3051 3052 // Try to set all MSRs previously stored. 3053 // If the number of MSRs set from SET_MSRS is different from the 3054 // expected amount, we fallback onto a slower method by setting MSRs 3055 // by chunks. This is the only way to make sure we try to set as many 3056 // MSRs as possible, even if some MSRs are not supported. 3057 let expected_num_msrs = state.msrs.len(); 3058 let num_msrs = self.set_msrs(&state.msrs)?; 3059 if num_msrs != expected_num_msrs { 3060 let mut faulty_msr_index = num_msrs; 3061 3062 loop { 3063 warn!( 3064 "Detected faulty MSR 0x{:x} while setting MSRs", 3065 state.msrs[faulty_msr_index].index 3066 ); 3067 3068 // Skip the first bad MSR 3069 let start_pos = faulty_msr_index + 1; 3070 3071 let sub_msr_entries = state.msrs[start_pos..].to_vec(); 3072 3073 let num_msrs = self.set_msrs(&sub_msr_entries)?; 3074 3075 if num_msrs == sub_msr_entries.len() { 3076 break; 3077 } 3078 3079 faulty_msr_index = start_pos + num_msrs; 3080 } 3081 } 3082 3083 self.set_vcpu_events(&state.vcpu_events)?; 3084 3085 Ok(()) 3086 } 3087 3088 /// 3089 /// Restore the previously saved AArch64 CPU state 3090 /// 3091 #[cfg(target_arch = "aarch64")] 3092 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 3093 let state: VcpuKvmState = state.clone().into(); 3094 // Set core registers 3095 self.set_regs(&state.core_regs.into())?; 3096 // Set system registers 3097 for reg in &state.sys_regs { 3098 self.fd 3099 .lock() 3100 .unwrap() 3101 .set_one_reg(reg.id, ®.addr.to_le_bytes()) 3102 .map_err(|e| cpu::HypervisorCpuError::SetSysRegister(e.into()))?; 3103 } 3104 3105 self.set_mp_state(state.mp_state.into())?; 3106 3107 Ok(()) 3108 } 3109 3110 #[cfg(target_arch = "riscv64")] 3111 /// 3112 /// Restore the previously saved RISC-V 64-bit CPU state 3113 /// 3114 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 3115 let state: VcpuKvmState = state.clone().into(); 3116 // Set core registers 3117 self.set_regs(&state.core_regs.into())?; 3118 // Set system registers 3119 for reg in &state.non_core_regs { 3120 self.fd 3121 .lock() 3122 .unwrap() 3123 .set_one_reg(reg.id, ®.addr.to_le_bytes()) 3124 .map_err(|e| cpu::HypervisorCpuError::SetSysRegister(e.into()))?; 3125 } 3126 3127 self.set_mp_state(state.mp_state.into())?; 3128 3129 Ok(()) 3130 } 3131 3132 /// 3133 /// Initialize TDX for this CPU 3134 /// 3135 #[cfg(feature = "tdx")] 3136 fn tdx_init(&self, hob_address: u64) -> cpu::Result<()> { 3137 tdx_command( 3138 &self.fd.lock().unwrap().as_raw_fd(), 3139 TdxCommand::InitVcpu, 3140 0, 3141 hob_address, 3142 ) 3143 .map_err(cpu::HypervisorCpuError::InitializeTdx) 3144 } 3145 3146 /// 3147 /// Set the "immediate_exit" state 3148 /// 3149 fn set_immediate_exit(&self, exit: bool) { 3150 self.fd.lock().unwrap().set_kvm_immediate_exit(exit.into()); 3151 } 3152 3153 /// 3154 /// Returns the details about TDX exit reason 3155 /// 3156 #[cfg(feature = "tdx")] 3157 fn get_tdx_exit_details(&mut self) -> cpu::Result<TdxExitDetails> { 3158 let mut fd = self.fd.as_ref().lock().unwrap(); 3159 let kvm_run = fd.get_kvm_run(); 3160 // SAFETY: accessing a union field in a valid structure 3161 let tdx_vmcall = unsafe { 3162 &mut (*((&mut kvm_run.__bindgen_anon_1) as *mut kvm_run__bindgen_ty_1 3163 as *mut KvmTdxExit)) 3164 .u 3165 .vmcall 3166 }; 3167 3168 tdx_vmcall.status_code = TDG_VP_VMCALL_INVALID_OPERAND; 3169 3170 if tdx_vmcall.type_ != 0 { 3171 return Err(cpu::HypervisorCpuError::UnknownTdxVmCall); 3172 } 3173 3174 match tdx_vmcall.subfunction { 3175 TDG_VP_VMCALL_GET_QUOTE => Ok(TdxExitDetails::GetQuote), 3176 TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT => { 3177 Ok(TdxExitDetails::SetupEventNotifyInterrupt) 3178 } 3179 _ => Err(cpu::HypervisorCpuError::UnknownTdxVmCall), 3180 } 3181 } 3182 3183 /// 3184 /// Set the status code for TDX exit 3185 /// 3186 #[cfg(feature = "tdx")] 3187 fn set_tdx_status(&mut self, status: TdxExitStatus) { 3188 let mut fd = self.fd.as_ref().lock().unwrap(); 3189 let kvm_run = fd.get_kvm_run(); 3190 // SAFETY: accessing a union field in a valid structure 3191 let tdx_vmcall = unsafe { 3192 &mut (*((&mut kvm_run.__bindgen_anon_1) as *mut kvm_run__bindgen_ty_1 3193 as *mut KvmTdxExit)) 3194 .u 3195 .vmcall 3196 }; 3197 3198 tdx_vmcall.status_code = match status { 3199 TdxExitStatus::Success => TDG_VP_VMCALL_SUCCESS, 3200 TdxExitStatus::InvalidOperand => TDG_VP_VMCALL_INVALID_OPERAND, 3201 }; 3202 } 3203 3204 #[cfg(target_arch = "x86_64")] 3205 /// 3206 /// Return the list of initial MSR entries for a VCPU 3207 /// 3208 fn boot_msr_entries(&self) -> Vec<MsrEntry> { 3209 use crate::arch::x86::{msr_index, MTRR_ENABLE, MTRR_MEM_TYPE_WB}; 3210 3211 [ 3212 msr!(msr_index::MSR_IA32_SYSENTER_CS), 3213 msr!(msr_index::MSR_IA32_SYSENTER_ESP), 3214 msr!(msr_index::MSR_IA32_SYSENTER_EIP), 3215 msr!(msr_index::MSR_STAR), 3216 msr!(msr_index::MSR_CSTAR), 3217 msr!(msr_index::MSR_LSTAR), 3218 msr!(msr_index::MSR_KERNEL_GS_BASE), 3219 msr!(msr_index::MSR_SYSCALL_MASK), 3220 msr!(msr_index::MSR_IA32_TSC), 3221 msr_data!( 3222 msr_index::MSR_IA32_MISC_ENABLE, 3223 msr_index::MSR_IA32_MISC_ENABLE_FAST_STRING as u64 3224 ), 3225 msr_data!(msr_index::MSR_MTRRdefType, MTRR_ENABLE | MTRR_MEM_TYPE_WB), 3226 ] 3227 .to_vec() 3228 } 3229 3230 #[cfg(target_arch = "aarch64")] 3231 fn has_pmu_support(&self) -> bool { 3232 let cpu_attr = kvm_bindings::kvm_device_attr { 3233 group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL, 3234 attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_INIT), 3235 addr: 0x0, 3236 flags: 0, 3237 }; 3238 self.fd.lock().unwrap().has_device_attr(&cpu_attr).is_ok() 3239 } 3240 3241 #[cfg(target_arch = "aarch64")] 3242 fn init_pmu(&self, irq: u32) -> cpu::Result<()> { 3243 let cpu_attr = kvm_bindings::kvm_device_attr { 3244 group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL, 3245 attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_INIT), 3246 addr: 0x0, 3247 flags: 0, 3248 }; 3249 let cpu_attr_irq = kvm_bindings::kvm_device_attr { 3250 group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL, 3251 attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_IRQ), 3252 addr: &irq as *const u32 as u64, 3253 flags: 0, 3254 }; 3255 self.fd 3256 .lock() 3257 .unwrap() 3258 .set_device_attr(&cpu_attr_irq) 3259 .map_err(|_| cpu::HypervisorCpuError::InitializePmu)?; 3260 self.fd 3261 .lock() 3262 .unwrap() 3263 .set_device_attr(&cpu_attr) 3264 .map_err(|_| cpu::HypervisorCpuError::InitializePmu) 3265 } 3266 3267 #[cfg(target_arch = "x86_64")] 3268 /// 3269 /// Get the frequency of the TSC if available 3270 /// 3271 fn tsc_khz(&self) -> cpu::Result<Option<u32>> { 3272 match self.fd.lock().unwrap().get_tsc_khz() { 3273 Err(e) => { 3274 if e.errno() == libc::EIO { 3275 Ok(None) 3276 } else { 3277 Err(cpu::HypervisorCpuError::GetTscKhz(e.into())) 3278 } 3279 } 3280 Ok(v) => Ok(Some(v)), 3281 } 3282 } 3283 3284 #[cfg(target_arch = "x86_64")] 3285 /// 3286 /// Set the frequency of the TSC if available 3287 /// 3288 fn set_tsc_khz(&self, freq: u32) -> cpu::Result<()> { 3289 match self.fd.lock().unwrap().set_tsc_khz(freq) { 3290 Err(e) => { 3291 if e.errno() == libc::EIO { 3292 Ok(()) 3293 } else { 3294 Err(cpu::HypervisorCpuError::SetTscKhz(e.into())) 3295 } 3296 } 3297 Ok(_) => Ok(()), 3298 } 3299 } 3300 3301 #[cfg(target_arch = "x86_64")] 3302 /// 3303 /// Trigger NMI interrupt 3304 /// 3305 fn nmi(&self) -> cpu::Result<()> { 3306 match self.fd.lock().unwrap().nmi() { 3307 Err(e) => { 3308 if e.errno() == libc::EIO { 3309 Ok(()) 3310 } else { 3311 Err(cpu::HypervisorCpuError::Nmi(e.into())) 3312 } 3313 } 3314 Ok(_) => Ok(()), 3315 } 3316 } 3317 } 3318 3319 impl KvmVcpu { 3320 #[cfg(target_arch = "x86_64")] 3321 /// 3322 /// X86 specific call that returns the vcpu's current "xsave struct". 3323 /// 3324 fn get_xsave(&self) -> cpu::Result<XsaveState> { 3325 Ok(self 3326 .fd 3327 .lock() 3328 .unwrap() 3329 .get_xsave() 3330 .map_err(|e| cpu::HypervisorCpuError::GetXsaveState(e.into()))? 3331 .into()) 3332 } 3333 3334 #[cfg(target_arch = "x86_64")] 3335 /// 3336 /// X86 specific call that sets the vcpu's current "xsave struct". 3337 /// 3338 fn set_xsave(&self, xsave: &XsaveState) -> cpu::Result<()> { 3339 let xsave: kvm_bindings::kvm_xsave = (*xsave).clone().into(); 3340 self.fd 3341 .lock() 3342 .unwrap() 3343 .set_xsave(&xsave) 3344 .map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into())) 3345 } 3346 3347 #[cfg(target_arch = "x86_64")] 3348 /// 3349 /// X86 specific call that returns the vcpu's current "xcrs". 3350 /// 3351 fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> { 3352 self.fd 3353 .lock() 3354 .unwrap() 3355 .get_xcrs() 3356 .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into())) 3357 } 3358 3359 #[cfg(target_arch = "x86_64")] 3360 /// 3361 /// X86 specific call that sets the vcpu's current "xcrs". 3362 /// 3363 fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> { 3364 self.fd 3365 .lock() 3366 .unwrap() 3367 .set_xcrs(xcrs) 3368 .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into())) 3369 } 3370 3371 #[cfg(target_arch = "x86_64")] 3372 /// 3373 /// Returns currently pending exceptions, interrupts, and NMIs as well as related 3374 /// states of the vcpu. 3375 /// 3376 fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> { 3377 self.fd 3378 .lock() 3379 .unwrap() 3380 .get_vcpu_events() 3381 .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into())) 3382 } 3383 3384 #[cfg(target_arch = "x86_64")] 3385 /// 3386 /// Sets pending exceptions, interrupts, and NMIs as well as related states 3387 /// of the vcpu. 3388 /// 3389 fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> { 3390 self.fd 3391 .lock() 3392 .unwrap() 3393 .set_vcpu_events(events) 3394 .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into())) 3395 } 3396 } 3397 3398 #[cfg(test)] 3399 mod tests { 3400 #[test] 3401 #[cfg(target_arch = "riscv64")] 3402 fn test_get_and_set_regs() { 3403 use super::*; 3404 3405 let kvm = KvmHypervisor::new().unwrap(); 3406 let hypervisor = Arc::new(kvm); 3407 let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 3408 let vcpu0 = vm.create_vcpu(0, None).unwrap(); 3409 3410 let core_regs = StandardRegisters::from(kvm_riscv_core { 3411 regs: user_regs_struct { 3412 pc: 0x00, 3413 ra: 0x01, 3414 sp: 0x02, 3415 gp: 0x03, 3416 tp: 0x04, 3417 t0: 0x05, 3418 t1: 0x06, 3419 t2: 0x07, 3420 s0: 0x08, 3421 s1: 0x09, 3422 a0: 0x0a, 3423 a1: 0x0b, 3424 a2: 0x0c, 3425 a3: 0x0d, 3426 a4: 0x0e, 3427 a5: 0x0f, 3428 a6: 0x10, 3429 a7: 0x11, 3430 s2: 0x12, 3431 s3: 0x13, 3432 s4: 0x14, 3433 s5: 0x15, 3434 s6: 0x16, 3435 s7: 0x17, 3436 s8: 0x18, 3437 s9: 0x19, 3438 s10: 0x1a, 3439 s11: 0x1b, 3440 t3: 0x1c, 3441 t4: 0x1d, 3442 t5: 0x1e, 3443 t6: 0x1f, 3444 }, 3445 mode: 0x00, 3446 }); 3447 3448 vcpu0.set_regs(&core_regs).unwrap(); 3449 assert_eq!(vcpu0.get_regs().unwrap(), core_regs); 3450 } 3451 } 3452