1 // Copyright © 2019 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause 4 // 5 // Copyright © 2020, Microsoft Corporation 6 // 7 // Copyright 2018-2019 CrowdStrike, Inc. 8 // 9 // 10 11 #[cfg(target_arch = "aarch64")] 12 pub use crate::aarch64::{ 13 check_required_kvm_extensions, is_system_register, VcpuInit, VcpuKvmState as CpuState, 14 MPIDR_EL1, 15 }; 16 use crate::cpu; 17 use crate::device; 18 use crate::hypervisor; 19 use crate::vec_with_array_field; 20 use crate::vm::{self, VmmOps}; 21 #[cfg(target_arch = "aarch64")] 22 use crate::{arm64_core_reg_id, offset__of}; 23 use kvm_ioctls::{NoDatamatch, VcpuFd, VmFd}; 24 use serde_derive::{Deserialize, Serialize}; 25 #[cfg(target_arch = "aarch64")] 26 use std::convert::TryInto; 27 #[cfg(target_arch = "x86_64")] 28 use std::fs::File; 29 use std::os::unix::io::{AsRawFd, RawFd}; 30 use std::result; 31 #[cfg(target_arch = "x86_64")] 32 use std::sync::atomic::{AtomicBool, Ordering}; 33 use std::sync::Arc; 34 #[cfg(target_arch = "x86_64")] 35 use vm_memory::Address; 36 use vmm_sys_util::eventfd::EventFd; 37 // x86_64 dependencies 38 #[cfg(target_arch = "x86_64")] 39 pub mod x86_64; 40 #[cfg(target_arch = "x86_64")] 41 use crate::arch::x86::NUM_IOAPIC_PINS; 42 #[cfg(target_arch = "aarch64")] 43 use aarch64::{RegList, Register, StandardRegisters}; 44 #[cfg(target_arch = "x86_64")] 45 use kvm_bindings::{ 46 kvm_enable_cap, kvm_msr_entry, MsrList, KVM_CAP_HYPERV_SYNIC, KVM_CAP_SPLIT_IRQCHIP, 47 }; 48 #[cfg(target_arch = "x86_64")] 49 use x86_64::{ 50 check_required_kvm_extensions, FpuState, SpecialRegisters, StandardRegisters, KVM_TSS_ADDRESS, 51 }; 52 #[cfg(target_arch = "x86_64")] 53 pub use x86_64::{ 54 CpuId, CpuIdEntry, ExtendedControlRegisters, LapicState, MsrEntries, VcpuKvmState as CpuState, 55 Xsave, CPUID_FLAG_VALID_INDEX, 56 }; 57 // aarch64 dependencies 58 #[cfg(target_arch = "aarch64")] 59 pub mod aarch64; 60 pub use kvm_bindings; 61 #[cfg(feature = "tdx")] 62 use kvm_bindings::KVMIO; 63 pub use kvm_bindings::{ 64 kvm_create_device, kvm_device_type_KVM_DEV_TYPE_VFIO, kvm_irq_routing, kvm_irq_routing_entry, 65 kvm_userspace_memory_region, KVM_IRQ_ROUTING_IRQCHIP, KVM_IRQ_ROUTING_MSI, 66 KVM_MEM_LOG_DIRTY_PAGES, KVM_MEM_READONLY, KVM_MSI_VALID_DEVID, 67 }; 68 #[cfg(target_arch = "aarch64")] 69 use kvm_bindings::{ 70 kvm_regs, user_fpsimd_state, user_pt_regs, KVM_NR_SPSR, KVM_REG_ARM64, KVM_REG_ARM_CORE, 71 KVM_REG_SIZE_U128, KVM_REG_SIZE_U32, KVM_REG_SIZE_U64, 72 }; 73 pub use kvm_ioctls; 74 pub use kvm_ioctls::{Cap, Kvm}; 75 #[cfg(target_arch = "aarch64")] 76 use std::mem; 77 use thiserror::Error; 78 #[cfg(feature = "tdx")] 79 use vmm_sys_util::{ioctl::ioctl_with_val, ioctl_expr, ioctl_ioc_nr, ioctl_iowr_nr}; 80 /// 81 /// Export generically-named wrappers of kvm-bindings for Unix-based platforms 82 /// 83 pub use { 84 kvm_bindings::kvm_clock_data as ClockData, kvm_bindings::kvm_create_device as CreateDevice, 85 kvm_bindings::kvm_device_attr as DeviceAttr, 86 kvm_bindings::kvm_irq_routing_entry as IrqRoutingEntry, kvm_bindings::kvm_mp_state as MpState, 87 kvm_bindings::kvm_userspace_memory_region as MemoryRegion, 88 kvm_bindings::kvm_vcpu_events as VcpuEvents, kvm_ioctls::DeviceFd, kvm_ioctls::IoEventAddress, 89 kvm_ioctls::VcpuExit, 90 }; 91 92 #[cfg(target_arch = "x86_64")] 93 const KVM_CAP_SGX_ATTRIBUTE: u32 = 196; 94 95 #[cfg(feature = "tdx")] 96 ioctl_iowr_nr!(KVM_MEMORY_ENCRYPT_OP, KVMIO, 0xba, std::os::raw::c_ulong); 97 98 #[cfg(feature = "tdx")] 99 #[repr(u32)] 100 enum TdxCommand { 101 #[allow(dead_code)] 102 Capabilities = 0, 103 InitVm, 104 InitVcpu, 105 InitMemRegion, 106 Finalize, 107 } 108 109 #[derive(Clone, Copy, Debug, PartialEq, Deserialize, Serialize)] 110 pub struct KvmVmState {} 111 112 pub use KvmVmState as VmState; 113 /// Wrapper over KVM VM ioctls. 114 pub struct KvmVm { 115 fd: Arc<VmFd>, 116 #[cfg(target_arch = "x86_64")] 117 msrs: MsrEntries, 118 state: KvmVmState, 119 } 120 121 /// 122 /// Implementation of Vm trait for KVM 123 /// Example: 124 /// #[cfg(feature = "kvm")] 125 /// extern crate hypervisor 126 /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap(); 127 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm); 128 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 129 /// vm.set/get().unwrap() 130 /// 131 impl vm::Vm for KvmVm { 132 #[cfg(target_arch = "x86_64")] 133 /// 134 /// Sets the address of the three-page region in the VM's address space. 135 /// 136 fn set_tss_address(&self, offset: usize) -> vm::Result<()> { 137 self.fd 138 .set_tss_address(offset) 139 .map_err(|e| vm::HypervisorVmError::SetTssAddress(e.into())) 140 } 141 /// 142 /// Creates an in-kernel interrupt controller. 143 /// 144 fn create_irq_chip(&self) -> vm::Result<()> { 145 self.fd 146 .create_irq_chip() 147 .map_err(|e| vm::HypervisorVmError::CreateIrq(e.into())) 148 } 149 /// 150 /// Registers an event that will, when signaled, trigger the `gsi` IRQ. 151 /// 152 fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 153 self.fd 154 .register_irqfd(fd, gsi) 155 .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into())) 156 } 157 /// 158 /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ. 159 /// 160 fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 161 self.fd 162 .unregister_irqfd(fd, gsi) 163 .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into())) 164 } 165 /// 166 /// Creates a VcpuFd object from a vcpu RawFd. 167 /// 168 fn create_vcpu( 169 &self, 170 id: u8, 171 vmmops: Option<Arc<Box<dyn VmmOps>>>, 172 ) -> vm::Result<Arc<dyn cpu::Vcpu>> { 173 let vc = self 174 .fd 175 .create_vcpu(id as u64) 176 .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?; 177 let vcpu = KvmVcpu { 178 fd: vc, 179 #[cfg(target_arch = "x86_64")] 180 msrs: self.msrs.clone(), 181 vmmops, 182 #[cfg(target_arch = "x86_64")] 183 hyperv_synic: AtomicBool::new(false), 184 }; 185 Ok(Arc::new(vcpu)) 186 } 187 /// 188 /// Registers an event to be signaled whenever a certain address is written to. 189 /// 190 fn register_ioevent( 191 &self, 192 fd: &EventFd, 193 addr: &IoEventAddress, 194 datamatch: Option<vm::DataMatch>, 195 ) -> vm::Result<()> { 196 if let Some(dm) = datamatch { 197 match dm { 198 vm::DataMatch::DataMatch32(kvm_dm32) => self 199 .fd 200 .register_ioevent(fd, addr, kvm_dm32) 201 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 202 vm::DataMatch::DataMatch64(kvm_dm64) => self 203 .fd 204 .register_ioevent(fd, addr, kvm_dm64) 205 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 206 } 207 } else { 208 self.fd 209 .register_ioevent(fd, addr, NoDatamatch) 210 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())) 211 } 212 } 213 /// 214 /// Unregisters an event from a certain address it has been previously registered to. 215 /// 216 fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> { 217 self.fd 218 .unregister_ioevent(fd, addr, NoDatamatch) 219 .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into())) 220 } 221 /// 222 /// Sets the GSI routing table entries, overwriting any previously set 223 /// entries, as per the `KVM_SET_GSI_ROUTING` ioctl. 224 /// 225 fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> { 226 let mut irq_routing = 227 vec_with_array_field::<kvm_irq_routing, kvm_irq_routing_entry>(entries.len()); 228 irq_routing[0].nr = entries.len() as u32; 229 irq_routing[0].flags = 0; 230 231 unsafe { 232 let entries_slice: &mut [kvm_irq_routing_entry] = 233 irq_routing[0].entries.as_mut_slice(entries.len()); 234 entries_slice.copy_from_slice(entries); 235 } 236 237 self.fd 238 .set_gsi_routing(&irq_routing[0]) 239 .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into())) 240 } 241 /// 242 /// Creates a memory region structure that can be used with {create/remove}_user_memory_region 243 /// 244 fn make_user_memory_region( 245 &self, 246 slot: u32, 247 guest_phys_addr: u64, 248 memory_size: u64, 249 userspace_addr: u64, 250 readonly: bool, 251 log_dirty_pages: bool, 252 ) -> MemoryRegion { 253 MemoryRegion { 254 slot, 255 guest_phys_addr, 256 memory_size, 257 userspace_addr, 258 flags: if readonly { KVM_MEM_READONLY } else { 0 } 259 | if log_dirty_pages { 260 KVM_MEM_LOG_DIRTY_PAGES 261 } else { 262 0 263 }, 264 } 265 } 266 /// 267 /// Creates a guest physical memory region. 268 /// 269 fn create_user_memory_region(&self, user_memory_region: MemoryRegion) -> vm::Result<()> { 270 // Safe because guest regions are guaranteed not to overlap. 271 unsafe { 272 self.fd 273 .set_user_memory_region(user_memory_region) 274 .map_err(|e| vm::HypervisorVmError::CreateUserMemory(e.into())) 275 } 276 } 277 /// 278 /// Removes a guest physical memory region. 279 /// 280 fn remove_user_memory_region(&self, user_memory_region: MemoryRegion) -> vm::Result<()> { 281 let mut region = user_memory_region; 282 283 // Setting the size to 0 means "remove" 284 region.memory_size = 0; 285 // Safe because guest regions are guaranteed not to overlap. 286 unsafe { 287 self.fd 288 .set_user_memory_region(region) 289 .map_err(|e| vm::HypervisorVmError::RemoveUserMemory(e.into())) 290 } 291 } 292 /// 293 /// Creates an emulated device in the kernel. 294 /// 295 /// See the documentation for `KVM_CREATE_DEVICE`. 296 fn create_device(&self, device: &mut CreateDevice) -> vm::Result<Arc<dyn device::Device>> { 297 let fd = self 298 .fd 299 .create_device(device) 300 .map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?; 301 let device = KvmDevice { fd }; 302 Ok(Arc::new(device)) 303 } 304 /// 305 /// Returns the preferred CPU target type which can be emulated by KVM on underlying host. 306 /// 307 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 308 fn get_preferred_target(&self, kvi: &mut VcpuInit) -> vm::Result<()> { 309 self.fd 310 .get_preferred_target(kvi) 311 .map_err(|e| vm::HypervisorVmError::GetPreferredTarget(e.into())) 312 } 313 #[cfg(target_arch = "x86_64")] 314 fn enable_split_irq(&self) -> vm::Result<()> { 315 // Set TSS 316 self.fd 317 .set_tss_address(KVM_TSS_ADDRESS.raw_value() as usize) 318 .map_err(|e| vm::HypervisorVmError::EnableSplitIrq(e.into()))?; 319 // Create split irqchip 320 // Only the local APIC is emulated in kernel, both PICs and IOAPIC 321 // are not. 322 let mut cap = kvm_enable_cap { 323 cap: KVM_CAP_SPLIT_IRQCHIP, 324 ..Default::default() 325 }; 326 cap.args[0] = NUM_IOAPIC_PINS as u64; 327 self.fd 328 .enable_cap(&cap) 329 .map_err(|e| vm::HypervisorVmError::EnableSplitIrq(e.into()))?; 330 Ok(()) 331 } 332 #[cfg(target_arch = "x86_64")] 333 fn enable_sgx_attribute(&self, file: File) -> vm::Result<()> { 334 let mut cap = kvm_enable_cap { 335 cap: KVM_CAP_SGX_ATTRIBUTE, 336 ..Default::default() 337 }; 338 cap.args[0] = file.as_raw_fd() as u64; 339 self.fd 340 .enable_cap(&cap) 341 .map_err(|e| vm::HypervisorVmError::EnableSgxAttribute(e.into()))?; 342 Ok(()) 343 } 344 /// Retrieve guest clock. 345 #[cfg(target_arch = "x86_64")] 346 fn get_clock(&self) -> vm::Result<ClockData> { 347 self.fd 348 .get_clock() 349 .map_err(|e| vm::HypervisorVmError::GetClock(e.into())) 350 } 351 /// Set guest clock. 352 #[cfg(target_arch = "x86_64")] 353 fn set_clock(&self, data: &ClockData) -> vm::Result<()> { 354 self.fd 355 .set_clock(data) 356 .map_err(|e| vm::HypervisorVmError::SetClock(e.into())) 357 } 358 /// Checks if a particular `Cap` is available. 359 fn check_extension(&self, c: Cap) -> bool { 360 self.fd.check_extension(c) 361 } 362 /// Create a device that is used for passthrough 363 fn create_passthrough_device(&self) -> vm::Result<Arc<dyn device::Device>> { 364 let mut vfio_dev = kvm_create_device { 365 type_: kvm_device_type_KVM_DEV_TYPE_VFIO, 366 fd: 0, 367 flags: 0, 368 }; 369 370 self.create_device(&mut vfio_dev) 371 .map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into())) 372 } 373 /// 374 /// Get the Vm state. Return VM specific data 375 /// 376 fn state(&self) -> vm::Result<VmState> { 377 Ok(self.state) 378 } 379 /// 380 /// Set the VM state 381 /// 382 fn set_state(&self, _state: VmState) -> vm::Result<()> { 383 Ok(()) 384 } 385 386 /// 387 /// Get dirty pages bitmap (one bit per page) 388 /// 389 fn get_dirty_log(&self, slot: u32, memory_size: u64) -> vm::Result<Vec<u64>> { 390 self.fd 391 .get_dirty_log(slot, memory_size as usize) 392 .map_err(|e| vm::HypervisorVmError::GetDirtyLog(e.into())) 393 } 394 395 /// 396 /// Initialize TDX for this VM 397 /// 398 #[cfg(feature = "tdx")] 399 fn tdx_init(&self, cpuid: &CpuId, max_vcpus: u32) -> vm::Result<()> { 400 #[repr(C)] 401 struct TdxInitVm { 402 max_vcpus: u32, 403 reserved: u32, 404 attributes: u64, 405 cpuid: u64, 406 } 407 let data = TdxInitVm { 408 max_vcpus, 409 reserved: 0, 410 attributes: 1, // TDX1_TD_ATTRIBUTE_DEBUG, 411 cpuid: cpuid.as_fam_struct_ptr() as u64, 412 }; 413 414 tdx_command( 415 &self.fd.as_raw_fd(), 416 TdxCommand::InitVm, 417 0, 418 &data as *const _ as u64, 419 ) 420 .map_err(vm::HypervisorVmError::InitializeTdx) 421 } 422 423 /// 424 /// Finalize the TDX setup for this VM 425 /// 426 #[cfg(feature = "tdx")] 427 fn tdx_finalize(&self) -> vm::Result<()> { 428 tdx_command(&self.fd.as_raw_fd(), TdxCommand::Finalize, 0, 0) 429 .map_err(vm::HypervisorVmError::FinalizeTdx) 430 } 431 432 /// 433 /// Initialize memory regions for the TDX VM 434 /// 435 #[cfg(feature = "tdx")] 436 fn tdx_init_memory_region( 437 &self, 438 host_address: u64, 439 guest_address: u64, 440 size: u64, 441 measure: bool, 442 ) -> vm::Result<()> { 443 #[repr(C)] 444 struct TdxInitMemRegion { 445 host_address: u64, 446 guest_address: u64, 447 pages: u64, 448 } 449 let data = TdxInitMemRegion { 450 host_address, 451 guest_address, 452 pages: size / 4096, 453 }; 454 455 tdx_command( 456 &self.fd.as_raw_fd(), 457 TdxCommand::InitMemRegion, 458 if measure { 1 } else { 0 }, 459 &data as *const _ as u64, 460 ) 461 .map_err(vm::HypervisorVmError::InitMemRegionTdx) 462 } 463 } 464 465 #[cfg(feature = "tdx")] 466 fn tdx_command( 467 fd: &RawFd, 468 command: TdxCommand, 469 metadata: u32, 470 data: u64, 471 ) -> std::result::Result<(), std::io::Error> { 472 #[repr(C)] 473 struct TdxIoctlCmd { 474 command: TdxCommand, 475 metadata: u32, 476 data: u64, 477 } 478 let cmd = TdxIoctlCmd { 479 command, 480 metadata, 481 data, 482 }; 483 let ret = unsafe { 484 ioctl_with_val( 485 fd, 486 KVM_MEMORY_ENCRYPT_OP(), 487 &cmd as *const TdxIoctlCmd as std::os::raw::c_ulong, 488 ) 489 }; 490 491 if ret < 0 { 492 return Err(std::io::Error::last_os_error()); 493 } 494 Ok(()) 495 } 496 497 /// Wrapper over KVM system ioctls. 498 pub struct KvmHypervisor { 499 kvm: Kvm, 500 } 501 /// Enum for KVM related error 502 #[derive(Debug, Error)] 503 pub enum KvmError { 504 #[error("Capability missing: {0:?}")] 505 CapabilityMissing(Cap), 506 } 507 pub type KvmResult<T> = result::Result<T, KvmError>; 508 impl KvmHypervisor { 509 /// Create a hypervisor based on Kvm 510 pub fn new() -> hypervisor::Result<KvmHypervisor> { 511 let kvm_obj = Kvm::new().map_err(|e| hypervisor::HypervisorError::VmCreate(e.into()))?; 512 let api_version = kvm_obj.get_api_version(); 513 514 if api_version != kvm_bindings::KVM_API_VERSION as i32 { 515 return Err(hypervisor::HypervisorError::IncompatibleApiVersion); 516 } 517 518 Ok(KvmHypervisor { kvm: kvm_obj }) 519 } 520 } 521 /// Implementation of Hypervisor trait for KVM 522 /// Example: 523 /// #[cfg(feature = "kvm")] 524 /// extern crate hypervisor 525 /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap(); 526 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm); 527 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 528 /// 529 impl hypervisor::Hypervisor for KvmHypervisor { 530 /// Create a KVM vm object of a specific VM type and return the object as Vm trait object 531 /// Example 532 /// # extern crate hypervisor; 533 /// # use hypervisor::KvmHypervisor; 534 /// use hypervisor::KvmVm; 535 /// let hypervisor = KvmHypervisor::new().unwrap(); 536 /// let vm = hypervisor.create_vm_with_type(KvmVmType::LegacyVm).unwrap() 537 /// 538 fn create_vm_with_type(&self, vm_type: u64) -> hypervisor::Result<Arc<dyn vm::Vm>> { 539 let fd: VmFd; 540 loop { 541 match self.kvm.create_vm_with_type(vm_type) { 542 Ok(res) => fd = res, 543 Err(e) => { 544 if e.errno() == libc::EINTR { 545 // If the error returned is EINTR, which means the 546 // ioctl has been interrupted, we have to retry as 547 // this can't be considered as a regular error. 548 continue; 549 } else { 550 return Err(hypervisor::HypervisorError::VmCreate(e.into())); 551 } 552 } 553 } 554 break; 555 } 556 557 let vm_fd = Arc::new(fd); 558 559 #[cfg(target_arch = "x86_64")] 560 { 561 let msr_list = self.get_msr_list()?; 562 let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize; 563 let mut msrs = MsrEntries::new(num_msrs).unwrap(); 564 let indices = msr_list.as_slice(); 565 let msr_entries = msrs.as_mut_slice(); 566 for (pos, index) in indices.iter().enumerate() { 567 msr_entries[pos].index = *index; 568 } 569 570 Ok(Arc::new(KvmVm { 571 fd: vm_fd, 572 msrs, 573 state: VmState {}, 574 })) 575 } 576 577 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 578 { 579 Ok(Arc::new(KvmVm { 580 fd: vm_fd, 581 state: VmState {}, 582 })) 583 } 584 } 585 586 /// Create a KVM vm object and return the object as Vm trait object 587 /// Example 588 /// # extern crate hypervisor; 589 /// # use hypervisor::KvmHypervisor; 590 /// use hypervisor::KvmVm; 591 /// let hypervisor = KvmHypervisor::new().unwrap(); 592 /// let vm = hypervisor.create_vm().unwrap() 593 /// 594 fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> { 595 #[allow(unused_mut)] 596 let mut vm_type: u64 = 0; // Create with default platform type 597 598 // When KVM supports Cap::ArmVmIPASize, it is better to get the IPA 599 // size from the host and use that when creating the VM, which may 600 // avoid unnecessary VM creation failures. 601 #[cfg(target_arch = "aarch64")] 602 if self.kvm.check_extension(Cap::ArmVmIPASize) { 603 vm_type = self.kvm.get_host_ipa_limit().try_into().unwrap(); 604 } 605 606 self.create_vm_with_type(vm_type) 607 } 608 609 fn check_required_extensions(&self) -> hypervisor::Result<()> { 610 check_required_kvm_extensions(&self.kvm) 611 .map_err(|e| hypervisor::HypervisorError::CheckExtensions(e.into())) 612 } 613 614 #[cfg(target_arch = "x86_64")] 615 /// 616 /// X86 specific call to get the system supported CPUID values. 617 /// 618 fn get_cpuid(&self) -> hypervisor::Result<CpuId> { 619 self.kvm 620 .get_supported_cpuid(kvm_bindings::KVM_MAX_CPUID_ENTRIES) 621 .map_err(|e| hypervisor::HypervisorError::GetCpuId(e.into())) 622 } 623 624 #[cfg(target_arch = "x86_64")] 625 /// 626 /// Retrieve the list of MSRs supported by KVM. 627 /// 628 fn get_msr_list(&self) -> hypervisor::Result<MsrList> { 629 self.kvm 630 .get_msr_index_list() 631 .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into())) 632 } 633 #[cfg(target_arch = "aarch64")] 634 /// 635 /// Retrieve AArch64 host maximum IPA size supported by KVM. 636 /// 637 fn get_host_ipa_limit(&self) -> i32 { 638 self.kvm.get_host_ipa_limit() 639 } 640 } 641 /// Vcpu struct for KVM 642 pub struct KvmVcpu { 643 fd: VcpuFd, 644 #[cfg(target_arch = "x86_64")] 645 msrs: MsrEntries, 646 vmmops: Option<Arc<Box<dyn vm::VmmOps>>>, 647 #[cfg(target_arch = "x86_64")] 648 hyperv_synic: AtomicBool, 649 } 650 /// Implementation of Vcpu trait for KVM 651 /// Example: 652 /// #[cfg(feature = "kvm")] 653 /// extern crate hypervisor 654 /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap(); 655 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm); 656 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 657 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 658 /// vcpu.get/set().unwrap() 659 /// 660 impl cpu::Vcpu for KvmVcpu { 661 #[cfg(target_arch = "x86_64")] 662 /// 663 /// Returns the vCPU general purpose registers. 664 /// 665 fn get_regs(&self) -> cpu::Result<StandardRegisters> { 666 self.fd 667 .get_regs() 668 .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into())) 669 } 670 #[cfg(target_arch = "x86_64")] 671 /// 672 /// Sets the vCPU general purpose registers using the `KVM_SET_REGS` ioctl. 673 /// 674 fn set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()> { 675 self.fd 676 .set_regs(regs) 677 .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into())) 678 } 679 #[cfg(target_arch = "x86_64")] 680 /// 681 /// Returns the vCPU special registers. 682 /// 683 fn get_sregs(&self) -> cpu::Result<SpecialRegisters> { 684 self.fd 685 .get_sregs() 686 .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into())) 687 } 688 #[cfg(target_arch = "x86_64")] 689 /// 690 /// Sets the vCPU special registers using the `KVM_SET_SREGS` ioctl. 691 /// 692 fn set_sregs(&self, sregs: &SpecialRegisters) -> cpu::Result<()> { 693 self.fd 694 .set_sregs(sregs) 695 .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into())) 696 } 697 #[cfg(target_arch = "x86_64")] 698 /// 699 /// Returns the floating point state (FPU) from the vCPU. 700 /// 701 fn get_fpu(&self) -> cpu::Result<FpuState> { 702 self.fd 703 .get_fpu() 704 .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into())) 705 } 706 #[cfg(target_arch = "x86_64")] 707 /// 708 /// Set the floating point state (FPU) of a vCPU using the `KVM_SET_FPU` ioct. 709 /// 710 fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> { 711 self.fd 712 .set_fpu(fpu) 713 .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into())) 714 } 715 #[cfg(target_arch = "x86_64")] 716 /// 717 /// X86 specific call to setup the CPUID registers. 718 /// 719 fn set_cpuid2(&self, cpuid: &CpuId) -> cpu::Result<()> { 720 self.fd 721 .set_cpuid2(cpuid) 722 .map_err(|e| cpu::HypervisorCpuError::SetCpuid(e.into())) 723 } 724 #[cfg(target_arch = "x86_64")] 725 /// 726 /// X86 specific call to enable HyperV SynIC 727 /// 728 fn enable_hyperv_synic(&self) -> cpu::Result<()> { 729 // Update the information about Hyper-V SynIC being enabled and 730 // emulated as it will influence later which MSRs should be saved. 731 self.hyperv_synic.store(true, Ordering::Release); 732 733 let cap = kvm_enable_cap { 734 cap: KVM_CAP_HYPERV_SYNIC, 735 ..Default::default() 736 }; 737 self.fd 738 .enable_cap(&cap) 739 .map_err(|e| cpu::HypervisorCpuError::EnableHyperVSyncIc(e.into())) 740 } 741 /// 742 /// X86 specific call to retrieve the CPUID registers. 743 /// 744 #[cfg(target_arch = "x86_64")] 745 fn get_cpuid2(&self, num_entries: usize) -> cpu::Result<CpuId> { 746 self.fd 747 .get_cpuid2(num_entries) 748 .map_err(|e| cpu::HypervisorCpuError::GetCpuid(e.into())) 749 } 750 #[cfg(target_arch = "x86_64")] 751 /// 752 /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 753 /// 754 fn get_lapic(&self) -> cpu::Result<LapicState> { 755 self.fd 756 .get_lapic() 757 .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into())) 758 } 759 #[cfg(target_arch = "x86_64")] 760 /// 761 /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 762 /// 763 fn set_lapic(&self, klapic: &LapicState) -> cpu::Result<()> { 764 self.fd 765 .set_lapic(klapic) 766 .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into())) 767 } 768 #[cfg(target_arch = "x86_64")] 769 /// 770 /// Returns the model-specific registers (MSR) for this vCPU. 771 /// 772 fn get_msrs(&self, msrs: &mut MsrEntries) -> cpu::Result<usize> { 773 self.fd 774 .get_msrs(msrs) 775 .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into())) 776 } 777 #[cfg(target_arch = "x86_64")] 778 /// 779 /// Setup the model-specific registers (MSR) for this vCPU. 780 /// Returns the number of MSR entries actually written. 781 /// 782 fn set_msrs(&self, msrs: &MsrEntries) -> cpu::Result<usize> { 783 self.fd 784 .set_msrs(msrs) 785 .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into())) 786 } 787 /// 788 /// Returns the vcpu's current "multiprocessing state". 789 /// 790 fn get_mp_state(&self) -> cpu::Result<MpState> { 791 self.fd 792 .get_mp_state() 793 .map_err(|e| cpu::HypervisorCpuError::GetMpState(e.into())) 794 } 795 /// 796 /// Sets the vcpu's current "multiprocessing state". 797 /// 798 fn set_mp_state(&self, mp_state: MpState) -> cpu::Result<()> { 799 self.fd 800 .set_mp_state(mp_state) 801 .map_err(|e| cpu::HypervisorCpuError::SetMpState(e.into())) 802 } 803 #[cfg(target_arch = "x86_64")] 804 /// 805 /// X86 specific call that returns the vcpu's current "xsave struct". 806 /// 807 fn get_xsave(&self) -> cpu::Result<Xsave> { 808 self.fd 809 .get_xsave() 810 .map_err(|e| cpu::HypervisorCpuError::GetXsaveState(e.into())) 811 } 812 #[cfg(target_arch = "x86_64")] 813 /// 814 /// X86 specific call that sets the vcpu's current "xsave struct". 815 /// 816 fn set_xsave(&self, xsave: &Xsave) -> cpu::Result<()> { 817 self.fd 818 .set_xsave(xsave) 819 .map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into())) 820 } 821 #[cfg(target_arch = "x86_64")] 822 /// 823 /// X86 specific call that returns the vcpu's current "xcrs". 824 /// 825 fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> { 826 self.fd 827 .get_xcrs() 828 .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into())) 829 } 830 #[cfg(target_arch = "x86_64")] 831 /// 832 /// X86 specific call that sets the vcpu's current "xcrs". 833 /// 834 fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> { 835 self.fd 836 .set_xcrs(xcrs) 837 .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into())) 838 } 839 /// 840 /// Triggers the running of the current virtual CPU returning an exit reason. 841 /// 842 fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> { 843 match self.fd.run() { 844 Ok(run) => match run { 845 #[cfg(target_arch = "x86_64")] 846 VcpuExit::IoIn(addr, data) => { 847 if let Some(vmmops) = &self.vmmops { 848 return vmmops 849 .pio_read(addr.into(), data) 850 .map(|_| cpu::VmExit::Ignore) 851 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 852 } 853 854 Ok(cpu::VmExit::IoIn(addr, data)) 855 } 856 #[cfg(target_arch = "x86_64")] 857 VcpuExit::IoOut(addr, data) => { 858 if let Some(vmmops) = &self.vmmops { 859 return vmmops 860 .pio_write(addr.into(), data) 861 .map(|_| cpu::VmExit::Ignore) 862 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 863 } 864 865 Ok(cpu::VmExit::IoOut(addr, data)) 866 } 867 #[cfg(target_arch = "x86_64")] 868 VcpuExit::IoapicEoi(vector) => Ok(cpu::VmExit::IoapicEoi(vector)), 869 #[cfg(target_arch = "x86_64")] 870 VcpuExit::Shutdown | VcpuExit::Hlt => Ok(cpu::VmExit::Reset), 871 872 #[cfg(target_arch = "aarch64")] 873 VcpuExit::SystemEvent(event_type, flags) => { 874 use kvm_bindings::{KVM_SYSTEM_EVENT_RESET, KVM_SYSTEM_EVENT_SHUTDOWN}; 875 // On Aarch64, when the VM is shutdown, run() returns 876 // VcpuExit::SystemEvent with reason KVM_SYSTEM_EVENT_SHUTDOWN 877 if event_type == KVM_SYSTEM_EVENT_RESET { 878 Ok(cpu::VmExit::Reset) 879 } else if event_type == KVM_SYSTEM_EVENT_SHUTDOWN { 880 Ok(cpu::VmExit::Shutdown) 881 } else { 882 Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 883 "Unexpected system event with type 0x{:x}, flags 0x{:x}", 884 event_type, 885 flags 886 ))) 887 } 888 } 889 890 VcpuExit::MmioRead(addr, data) => { 891 if let Some(vmmops) = &self.vmmops { 892 return vmmops 893 .mmio_read(addr, data) 894 .map(|_| cpu::VmExit::Ignore) 895 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 896 } 897 898 Ok(cpu::VmExit::MmioRead(addr, data)) 899 } 900 VcpuExit::MmioWrite(addr, data) => { 901 if let Some(vmmops) = &self.vmmops { 902 return vmmops 903 .mmio_write(addr, data) 904 .map(|_| cpu::VmExit::Ignore) 905 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 906 } 907 908 Ok(cpu::VmExit::MmioWrite(addr, data)) 909 } 910 VcpuExit::Hyperv => Ok(cpu::VmExit::Hyperv), 911 912 r => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 913 "Unexpected exit reason on vcpu run: {:?}", 914 r 915 ))), 916 }, 917 918 Err(ref e) => match e.errno() { 919 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore), 920 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 921 "VCPU error {:?}", 922 e 923 ))), 924 }, 925 } 926 } 927 #[cfg(target_arch = "x86_64")] 928 /// 929 /// Returns currently pending exceptions, interrupts, and NMIs as well as related 930 /// states of the vcpu. 931 /// 932 fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> { 933 self.fd 934 .get_vcpu_events() 935 .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into())) 936 } 937 #[cfg(target_arch = "x86_64")] 938 /// 939 /// Sets pending exceptions, interrupts, and NMIs as well as related states 940 /// of the vcpu. 941 /// 942 fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> { 943 self.fd 944 .set_vcpu_events(events) 945 .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into())) 946 } 947 #[cfg(target_arch = "x86_64")] 948 /// 949 /// Let the guest know that it has been paused, which prevents from 950 /// potential soft lockups when being resumed. 951 /// 952 fn notify_guest_clock_paused(&self) -> cpu::Result<()> { 953 self.fd 954 .kvmclock_ctrl() 955 .map_err(|e| cpu::HypervisorCpuError::NotifyGuestClockPaused(e.into())) 956 } 957 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 958 fn vcpu_init(&self, kvi: &VcpuInit) -> cpu::Result<()> { 959 self.fd 960 .vcpu_init(kvi) 961 .map_err(|e| cpu::HypervisorCpuError::VcpuInit(e.into())) 962 } 963 /// 964 /// Sets the value of one register for this vCPU. 965 /// 966 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 967 fn set_reg(&self, reg_id: u64, data: u64) -> cpu::Result<()> { 968 self.fd 969 .set_one_reg(reg_id, data) 970 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into())) 971 } 972 /// 973 /// Gets the value of one register for this vCPU. 974 /// 975 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 976 fn get_reg(&self, reg_id: u64) -> cpu::Result<u64> { 977 self.fd 978 .get_one_reg(reg_id) 979 .map_err(|e| cpu::HypervisorCpuError::GetRegister(e.into())) 980 } 981 /// 982 /// Gets a list of the guest registers that are supported for the 983 /// KVM_GET_ONE_REG/KVM_SET_ONE_REG calls. 984 /// 985 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 986 fn get_reg_list(&self, reg_list: &mut RegList) -> cpu::Result<()> { 987 self.fd 988 .get_reg_list(reg_list) 989 .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into())) 990 } 991 /// 992 /// Save the state of the core registers. 993 /// 994 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 995 fn core_registers(&self, state: &mut StandardRegisters) -> cpu::Result<()> { 996 let mut off = offset__of!(user_pt_regs, regs); 997 // There are 31 user_pt_regs: 998 // https://elixir.free-electrons.com/linux/v4.14.174/source/arch/arm64/include/uapi/asm/ptrace.h#L72 999 // These actually are the general-purpose registers of the Armv8-a 1000 // architecture (i.e x0-x30 if used as a 64bit register or w0-30 when used as a 32bit register). 1001 for i in 0..31 { 1002 state.regs.regs[i] = self 1003 .fd 1004 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off)) 1005 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 1006 off += std::mem::size_of::<u64>(); 1007 } 1008 1009 // We are now entering the "Other register" section of the ARMv8-a architecture. 1010 // First one, stack pointer. 1011 let off = offset__of!(user_pt_regs, sp); 1012 state.regs.sp = self 1013 .fd 1014 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off)) 1015 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 1016 1017 // Second one, the program counter. 1018 let off = offset__of!(user_pt_regs, pc); 1019 state.regs.pc = self 1020 .fd 1021 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off)) 1022 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 1023 1024 // Next is the processor state. 1025 let off = offset__of!(user_pt_regs, pstate); 1026 state.regs.pstate = self 1027 .fd 1028 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off)) 1029 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 1030 1031 // The stack pointer associated with EL1 1032 let off = offset__of!(kvm_regs, sp_el1); 1033 state.sp_el1 = self 1034 .fd 1035 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off)) 1036 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 1037 1038 // Exception Link Register for EL1, when taking an exception to EL1, this register 1039 // holds the address to which to return afterwards. 1040 let off = offset__of!(kvm_regs, elr_el1); 1041 state.elr_el1 = self 1042 .fd 1043 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off)) 1044 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 1045 1046 // Saved Program Status Registers, there are 5 of them used in the kernel. 1047 let mut off = offset__of!(kvm_regs, spsr); 1048 for i in 0..KVM_NR_SPSR as usize { 1049 state.spsr[i] = self 1050 .fd 1051 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off)) 1052 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 1053 off += std::mem::size_of::<u64>(); 1054 } 1055 1056 // Now moving on to floting point registers which are stored in the user_fpsimd_state in the kernel: 1057 // https://elixir.free-electrons.com/linux/v4.9.62/source/arch/arm64/include/uapi/asm/kvm.h#L53 1058 let mut off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, vregs); 1059 for i in 0..32 { 1060 state.fp_regs.vregs[i][0] = self 1061 .fd 1062 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U128, off)) 1063 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 1064 off += mem::size_of::<u128>(); 1065 } 1066 1067 // Floating-point Status Register 1068 let off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, fpsr); 1069 state.fp_regs.fpsr = self 1070 .fd 1071 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off)) 1072 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))? 1073 as u32; 1074 1075 // Floating-point Control Register 1076 let off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, fpcr); 1077 state.fp_regs.fpcr = self 1078 .fd 1079 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off)) 1080 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))? 1081 as u32; 1082 Ok(()) 1083 } 1084 /// 1085 /// Restore the state of the core registers. 1086 /// 1087 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 1088 fn set_core_registers(&self, state: &StandardRegisters) -> cpu::Result<()> { 1089 // The function follows the exact identical order from `state`. Look there 1090 // for some additional info on registers. 1091 let mut off = offset__of!(user_pt_regs, regs); 1092 for i in 0..31 { 1093 self.fd 1094 .set_one_reg( 1095 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1096 state.regs.regs[i], 1097 ) 1098 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1099 off += std::mem::size_of::<u64>(); 1100 } 1101 1102 let off = offset__of!(user_pt_regs, sp); 1103 self.fd 1104 .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.regs.sp) 1105 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1106 1107 let off = offset__of!(user_pt_regs, pc); 1108 self.fd 1109 .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.regs.pc) 1110 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1111 1112 let off = offset__of!(user_pt_regs, pstate); 1113 self.fd 1114 .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.regs.pstate) 1115 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1116 1117 let off = offset__of!(kvm_regs, sp_el1); 1118 self.fd 1119 .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.sp_el1) 1120 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1121 1122 let off = offset__of!(kvm_regs, elr_el1); 1123 self.fd 1124 .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.elr_el1) 1125 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1126 1127 let mut off = offset__of!(kvm_regs, spsr); 1128 for i in 0..KVM_NR_SPSR as usize { 1129 self.fd 1130 .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.spsr[i]) 1131 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1132 off += std::mem::size_of::<u64>(); 1133 } 1134 1135 let mut off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, vregs); 1136 for i in 0..32 { 1137 self.fd 1138 .set_one_reg( 1139 arm64_core_reg_id!(KVM_REG_SIZE_U128, off), 1140 state.fp_regs.vregs[i][0], 1141 ) 1142 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1143 off += mem::size_of::<u128>(); 1144 } 1145 1146 let off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, fpsr); 1147 self.fd 1148 .set_one_reg( 1149 arm64_core_reg_id!(KVM_REG_SIZE_U32, off), 1150 state.fp_regs.fpsr as u64, 1151 ) 1152 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1153 1154 let off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, fpcr); 1155 self.fd 1156 .set_one_reg( 1157 arm64_core_reg_id!(KVM_REG_SIZE_U32, off), 1158 state.fp_regs.fpcr as u64, 1159 ) 1160 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1161 Ok(()) 1162 } 1163 /// 1164 /// Save the state of the system registers. 1165 /// 1166 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 1167 fn system_registers(&self, state: &mut Vec<Register>) -> cpu::Result<()> { 1168 // Call KVM_GET_REG_LIST to get all registers available to the guest. For ArmV8 there are 1169 // around 500 registers. 1170 let mut reg_list = RegList::new(500).unwrap(); 1171 self.fd 1172 .get_reg_list(&mut reg_list) 1173 .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into()))?; 1174 1175 // At this point reg_list should contain: core registers and system registers. 1176 // The register list contains the number of registers and their ids. We will be needing to 1177 // call KVM_GET_ONE_REG on each id in order to save all of them. We carve out from the list 1178 // the core registers which are represented in the kernel by kvm_regs structure and for which 1179 // we can calculate the id based on the offset in the structure. 1180 1181 reg_list.retain(|regid| *regid != 0); 1182 reg_list.as_slice().to_vec().sort_unstable(); 1183 1184 reg_list.retain(|regid| is_system_register(*regid)); 1185 1186 // Now, for the rest of the registers left in the previously fetched register list, we are 1187 // simply calling KVM_GET_ONE_REG. 1188 let indices = reg_list.as_slice(); 1189 for (_pos, index) in indices.iter().enumerate() { 1190 if _pos > 230 { 1191 break; 1192 } 1193 state.push(kvm_bindings::kvm_one_reg { 1194 id: *index, 1195 addr: self 1196 .fd 1197 .get_one_reg(*index) 1198 .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?, 1199 }); 1200 } 1201 1202 Ok(()) 1203 } 1204 /// 1205 /// Restore the state of the system registers. 1206 /// 1207 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 1208 fn set_system_registers(&self, state: &[Register]) -> cpu::Result<()> { 1209 for reg in state { 1210 self.fd 1211 .set_one_reg(reg.id, reg.addr) 1212 .map_err(|e| cpu::HypervisorCpuError::SetSysRegister(e.into()))?; 1213 } 1214 Ok(()) 1215 } 1216 /// 1217 /// Read the MPIDR - Multiprocessor Affinity Register. 1218 /// 1219 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 1220 fn read_mpidr(&self) -> cpu::Result<u64> { 1221 self.fd 1222 .get_one_reg(MPIDR_EL1) 1223 .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into())) 1224 } 1225 #[cfg(target_arch = "x86_64")] 1226 /// 1227 /// Get the current CPU state 1228 /// 1229 /// Ordering requirements: 1230 /// 1231 /// KVM_GET_MP_STATE calls kvm_apic_accept_events(), which might modify 1232 /// vCPU/LAPIC state. As such, it must be done before most everything 1233 /// else, otherwise we cannot restore everything and expect it to work. 1234 /// 1235 /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are 1236 /// still running. 1237 /// 1238 /// KVM_GET_LAPIC may change state of LAPIC before returning it. 1239 /// 1240 /// GET_VCPU_EVENTS should probably be last to save. The code looks as 1241 /// it might as well be affected by internal state modifications of the 1242 /// GET ioctls. 1243 /// 1244 /// SREGS saves/restores a pending interrupt, similar to what 1245 /// VCPU_EVENTS also does. 1246 /// 1247 /// GET_MSRS requires a pre-populated data structure to do something 1248 /// meaningful. For SET_MSRS it will then contain good data. 1249 /// 1250 /// # Example 1251 /// 1252 /// ```rust 1253 /// # extern crate hypervisor; 1254 /// # use hypervisor::KvmHypervisor; 1255 /// # use std::sync::Arc; 1256 /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap(); 1257 /// let hv: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm); 1258 /// let vm = hv.create_vm().expect("new VM fd creation failed"); 1259 /// vm.enable_split_irq().unwrap(); 1260 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 1261 /// let state = vcpu.state().unwrap(); 1262 /// ``` 1263 fn state(&self) -> cpu::Result<CpuState> { 1264 let cpuid = self.get_cpuid2(kvm_bindings::KVM_MAX_CPUID_ENTRIES)?; 1265 let mp_state = self.get_mp_state()?; 1266 let regs = self.get_regs()?; 1267 let sregs = self.get_sregs()?; 1268 let xsave = self.get_xsave()?; 1269 let xcrs = self.get_xcrs()?; 1270 let lapic_state = self.get_lapic()?; 1271 let fpu = self.get_fpu()?; 1272 1273 // Try to get all MSRs based on the list previously retrieved from KVM. 1274 // If the number of MSRs obtained from GET_MSRS is different from the 1275 // expected amount, we fallback onto a slower method by getting MSRs 1276 // by chunks. This is the only way to make sure we try to get as many 1277 // MSRs as possible, even if some MSRs are not supported. 1278 let mut msr_entries = self.msrs.clone(); 1279 1280 // Save extra MSRs if the Hyper-V synthetic interrupt controller is 1281 // emulated. 1282 if self.hyperv_synic.load(Ordering::Acquire) { 1283 let hyperv_synic_msrs = vec![ 1284 0x40000020, 0x40000021, 0x40000080, 0x40000081, 0x40000082, 0x40000083, 0x40000084, 1285 0x40000090, 0x40000091, 0x40000092, 0x40000093, 0x40000094, 0x40000095, 0x40000096, 1286 0x40000097, 0x40000098, 0x40000099, 0x4000009a, 0x4000009b, 0x4000009c, 0x4000009d, 1287 0x4000009f, 0x400000b0, 0x400000b1, 0x400000b2, 0x400000b3, 0x400000b4, 0x400000b5, 1288 0x400000b6, 0x400000b7, 1289 ]; 1290 for index in hyperv_synic_msrs { 1291 let msr = kvm_msr_entry { 1292 index, 1293 ..Default::default() 1294 }; 1295 msr_entries.push(msr).unwrap(); 1296 } 1297 } 1298 1299 let expected_num_msrs = msr_entries.as_fam_struct_ref().nmsrs as usize; 1300 let num_msrs = self.get_msrs(&mut msr_entries)?; 1301 let msrs = if num_msrs != expected_num_msrs { 1302 let mut faulty_msr_index = num_msrs; 1303 let mut msr_entries_tmp = 1304 MsrEntries::from_entries(&msr_entries.as_slice()[..faulty_msr_index]).unwrap(); 1305 1306 loop { 1307 warn!( 1308 "Detected faulty MSR 0x{:x} while getting MSRs", 1309 msr_entries.as_slice()[faulty_msr_index].index 1310 ); 1311 1312 let start_pos = faulty_msr_index + 1; 1313 let mut sub_msr_entries = 1314 MsrEntries::from_entries(&msr_entries.as_slice()[start_pos..]).unwrap(); 1315 let expected_num_msrs = sub_msr_entries.as_fam_struct_ref().nmsrs as usize; 1316 let num_msrs = self.get_msrs(&mut sub_msr_entries)?; 1317 1318 for i in 0..num_msrs { 1319 msr_entries_tmp 1320 .push(sub_msr_entries.as_slice()[i]) 1321 .map_err(|e| { 1322 cpu::HypervisorCpuError::GetMsrEntries(anyhow!( 1323 "Failed adding MSR entries: {:?}", 1324 e 1325 )) 1326 })?; 1327 } 1328 1329 if num_msrs == expected_num_msrs { 1330 break; 1331 } 1332 1333 faulty_msr_index = start_pos + num_msrs; 1334 } 1335 1336 msr_entries_tmp 1337 } else { 1338 msr_entries 1339 }; 1340 1341 let vcpu_events = self.get_vcpu_events()?; 1342 1343 Ok(CpuState { 1344 cpuid, 1345 msrs, 1346 vcpu_events, 1347 regs, 1348 sregs, 1349 fpu, 1350 lapic_state, 1351 xsave, 1352 xcrs, 1353 mp_state, 1354 }) 1355 } 1356 /// 1357 /// Get the current AArch64 CPU state 1358 /// 1359 #[cfg(target_arch = "aarch64")] 1360 fn state(&self) -> cpu::Result<CpuState> { 1361 let mut state = CpuState { 1362 mp_state: self.get_mp_state()?, 1363 mpidr: self.read_mpidr()?, 1364 ..Default::default() 1365 }; 1366 self.core_registers(&mut state.core_regs)?; 1367 self.system_registers(&mut state.sys_regs)?; 1368 1369 Ok(state) 1370 } 1371 #[cfg(target_arch = "x86_64")] 1372 /// 1373 /// Restore the previously saved CPU state 1374 /// 1375 /// Ordering requirements: 1376 /// 1377 /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are 1378 /// still running. 1379 /// 1380 /// Some SET ioctls (like set_mp_state) depend on kvm_vcpu_is_bsp(), so 1381 /// if we ever change the BSP, we have to do that before restoring anything. 1382 /// The same seems to be true for CPUID stuff. 1383 /// 1384 /// SREGS saves/restores a pending interrupt, similar to what 1385 /// VCPU_EVENTS also does. 1386 /// 1387 /// SET_REGS clears pending exceptions unconditionally, thus, it must be 1388 /// done before SET_VCPU_EVENTS, which restores it. 1389 /// 1390 /// SET_LAPIC must come after SET_SREGS, because the latter restores 1391 /// the apic base msr. 1392 /// 1393 /// SET_LAPIC must come before SET_MSRS, because the TSC deadline MSR 1394 /// only restores successfully, when the LAPIC is correctly configured. 1395 /// 1396 /// Arguments: CpuState 1397 /// # Example 1398 /// 1399 /// ```rust 1400 /// # extern crate hypervisor; 1401 /// # use hypervisor::KvmHypervisor; 1402 /// # use std::sync::Arc; 1403 /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap(); 1404 /// let hv: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm); 1405 /// let vm = hv.create_vm().expect("new VM fd creation failed"); 1406 /// vm.enable_split_irq().unwrap(); 1407 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 1408 /// let state = vcpu.state().unwrap(); 1409 /// vcpu.set_state(&state).unwrap(); 1410 /// ``` 1411 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 1412 self.set_cpuid2(&state.cpuid)?; 1413 self.set_mp_state(state.mp_state)?; 1414 self.set_regs(&state.regs)?; 1415 self.set_sregs(&state.sregs)?; 1416 self.set_xsave(&state.xsave)?; 1417 self.set_xcrs(&state.xcrs)?; 1418 self.set_lapic(&state.lapic_state)?; 1419 self.set_fpu(&state.fpu)?; 1420 1421 // Try to set all MSRs previously stored. 1422 // If the number of MSRs set from SET_MSRS is different from the 1423 // expected amount, we fallback onto a slower method by setting MSRs 1424 // by chunks. This is the only way to make sure we try to set as many 1425 // MSRs as possible, even if some MSRs are not supported. 1426 let expected_num_msrs = state.msrs.as_fam_struct_ref().nmsrs as usize; 1427 let num_msrs = self.set_msrs(&state.msrs)?; 1428 if num_msrs != expected_num_msrs { 1429 let mut faulty_msr_index = num_msrs; 1430 1431 loop { 1432 warn!( 1433 "Detected faulty MSR 0x{:x} while setting MSRs", 1434 state.msrs.as_slice()[faulty_msr_index].index 1435 ); 1436 1437 let start_pos = faulty_msr_index + 1; 1438 let sub_msr_entries = 1439 MsrEntries::from_entries(&state.msrs.as_slice()[start_pos..]).unwrap(); 1440 let expected_num_msrs = sub_msr_entries.as_fam_struct_ref().nmsrs as usize; 1441 let num_msrs = self.set_msrs(&sub_msr_entries)?; 1442 1443 if num_msrs == expected_num_msrs { 1444 break; 1445 } 1446 1447 faulty_msr_index = start_pos + num_msrs; 1448 } 1449 } 1450 1451 self.set_vcpu_events(&state.vcpu_events)?; 1452 1453 Ok(()) 1454 } 1455 /// 1456 /// Restore the previously saved AArch64 CPU state 1457 /// 1458 #[cfg(target_arch = "aarch64")] 1459 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 1460 self.set_core_registers(&state.core_regs)?; 1461 self.set_system_registers(&state.sys_regs)?; 1462 self.set_mp_state(state.mp_state)?; 1463 1464 Ok(()) 1465 } 1466 1467 /// 1468 /// Initialize TDX for this CPU 1469 /// 1470 #[cfg(feature = "tdx")] 1471 fn tdx_init(&self, hob_address: u64) -> cpu::Result<()> { 1472 tdx_command(&self.fd.as_raw_fd(), TdxCommand::InitVcpu, 0, hob_address) 1473 .map_err(cpu::HypervisorCpuError::InitializeTdx) 1474 } 1475 } 1476 1477 /// Device struct for KVM 1478 pub struct KvmDevice { 1479 fd: DeviceFd, 1480 } 1481 1482 impl device::Device for KvmDevice { 1483 /// 1484 /// Set device attribute 1485 /// 1486 fn set_device_attr(&self, attr: &DeviceAttr) -> device::Result<()> { 1487 self.fd 1488 .set_device_attr(attr) 1489 .map_err(|e| device::HypervisorDeviceError::SetDeviceAttribute(e.into())) 1490 } 1491 /// 1492 /// Get device attribute 1493 /// 1494 fn get_device_attr(&self, attr: &mut DeviceAttr) -> device::Result<()> { 1495 self.fd 1496 .get_device_attr(attr) 1497 .map_err(|e| device::HypervisorDeviceError::GetDeviceAttribute(e.into())) 1498 } 1499 } 1500 1501 impl AsRawFd for KvmDevice { 1502 fn as_raw_fd(&self) -> RawFd { 1503 self.fd.as_raw_fd() 1504 } 1505 } 1506