1 // Copyright © 2019 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause 4 // 5 // Copyright © 2020, Microsoft Corporation 6 // 7 // Copyright 2018-2019 CrowdStrike, Inc. 8 // 9 // 10 11 #[cfg(target_arch = "aarch64")] 12 pub use crate::aarch64::{ 13 check_required_kvm_extensions, is_system_register, VcpuInit, VcpuKvmState as CpuState, 14 MPIDR_EL1, 15 }; 16 use crate::cpu; 17 use crate::device; 18 use crate::hypervisor; 19 use crate::vm::{self, VmmOps}; 20 #[cfg(target_arch = "aarch64")] 21 use crate::{arm64_core_reg_id, offset__of}; 22 use arc_swap::ArcSwapOption; 23 use kvm_ioctls::{NoDatamatch, VcpuFd, VmFd}; 24 use serde_derive::{Deserialize, Serialize}; 25 use std::os::unix::io::{AsRawFd, RawFd}; 26 use std::result; 27 #[cfg(target_arch = "x86_64")] 28 use std::sync::atomic::{AtomicBool, Ordering}; 29 use std::sync::Arc; 30 #[cfg(target_arch = "x86_64")] 31 use vm_memory::Address; 32 use vmm_sys_util::eventfd::EventFd; 33 // x86_64 dependencies 34 #[cfg(target_arch = "x86_64")] 35 pub mod x86_64; 36 37 #[cfg(target_arch = "x86_64")] 38 use x86_64::{ 39 check_required_kvm_extensions, FpuState, SpecialRegisters, StandardRegisters, KVM_TSS_ADDRESS, 40 }; 41 42 #[cfg(target_arch = "aarch64")] 43 use aarch64::{RegList, Register, StandardRegisters}; 44 45 #[cfg(target_arch = "x86_64")] 46 pub use x86_64::{ 47 CpuId, CpuIdEntry, ExtendedControlRegisters, LapicState, MsrEntries, VcpuKvmState as CpuState, 48 Xsave, CPUID_FLAG_VALID_INDEX, 49 }; 50 51 #[cfg(target_arch = "x86_64")] 52 use kvm_bindings::{ 53 kvm_enable_cap, kvm_msr_entry, MsrList, KVM_CAP_HYPERV_SYNIC, KVM_CAP_SPLIT_IRQCHIP, 54 }; 55 56 #[cfg(target_arch = "x86_64")] 57 use crate::arch::x86::NUM_IOAPIC_PINS; 58 59 // aarch64 dependencies 60 #[cfg(target_arch = "aarch64")] 61 pub mod aarch64; 62 #[cfg(target_arch = "aarch64")] 63 use kvm_bindings::{ 64 kvm_regs, user_fpsimd_state, user_pt_regs, KVM_NR_SPSR, KVM_REG_ARM64, KVM_REG_ARM_CORE, 65 KVM_REG_SIZE_U128, KVM_REG_SIZE_U32, KVM_REG_SIZE_U64, 66 }; 67 #[cfg(target_arch = "aarch64")] 68 use std::mem; 69 70 pub use kvm_bindings; 71 pub use kvm_bindings::{ 72 kvm_create_device, kvm_device_type_KVM_DEV_TYPE_VFIO, kvm_irq_routing, kvm_irq_routing_entry, 73 kvm_userspace_memory_region, KVM_IRQ_ROUTING_MSI, KVM_MEM_READONLY, KVM_MSI_VALID_DEVID, 74 }; 75 pub use kvm_ioctls; 76 pub use kvm_ioctls::{Cap, Kvm}; 77 78 /// 79 /// Export generically-named wrappers of kvm-bindings for Unix-based platforms 80 /// 81 pub use { 82 kvm_bindings::kvm_clock_data as ClockData, kvm_bindings::kvm_create_device as CreateDevice, 83 kvm_bindings::kvm_device_attr as DeviceAttr, 84 kvm_bindings::kvm_irq_routing_entry as IrqRoutingEntry, kvm_bindings::kvm_mp_state as MpState, 85 kvm_bindings::kvm_userspace_memory_region as MemoryRegion, 86 kvm_bindings::kvm_vcpu_events as VcpuEvents, kvm_ioctls::DeviceFd, kvm_ioctls::IoEventAddress, 87 kvm_ioctls::VcpuExit, 88 }; 89 #[derive(Clone, Copy, Debug, PartialEq, Deserialize, Serialize)] 90 pub struct KvmVmState {} 91 92 pub use KvmVmState as VmState; 93 /// Wrapper over KVM VM ioctls. 94 pub struct KvmVm { 95 fd: Arc<VmFd>, 96 #[cfg(target_arch = "x86_64")] 97 msrs: MsrEntries, 98 state: KvmVmState, 99 vmmops: ArcSwapOption<Box<dyn vm::VmmOps>>, 100 } 101 102 // Returns a `Vec<T>` with a size in bytes at least as large as `size_in_bytes`. 103 fn vec_with_size_in_bytes<T: Default>(size_in_bytes: usize) -> Vec<T> { 104 let rounded_size = (size_in_bytes + size_of::<T>() - 1) / size_of::<T>(); 105 let mut v = Vec::with_capacity(rounded_size); 106 v.resize_with(rounded_size, T::default); 107 v 108 } 109 110 // The kvm API has many structs that resemble the following `Foo` structure: 111 // 112 // ``` 113 // #[repr(C)] 114 // struct Foo { 115 // some_data: u32 116 // entries: __IncompleteArrayField<__u32>, 117 // } 118 // ``` 119 // 120 // In order to allocate such a structure, `size_of::<Foo>()` would be too small because it would not 121 // include any space for `entries`. To make the allocation large enough while still being aligned 122 // for `Foo`, a `Vec<Foo>` is created. Only the first element of `Vec<Foo>` would actually be used 123 // as a `Foo`. The remaining memory in the `Vec<Foo>` is for `entries`, which must be contiguous 124 // with `Foo`. This function is used to make the `Vec<Foo>` with enough space for `count` entries. 125 use std::mem::size_of; 126 fn vec_with_array_field<T: Default, F>(count: usize) -> Vec<T> { 127 let element_space = count * size_of::<F>(); 128 let vec_size_bytes = size_of::<T>() + element_space; 129 vec_with_size_in_bytes(vec_size_bytes) 130 } 131 132 /// 133 /// Implementation of Vm trait for KVM 134 /// Example: 135 /// #[cfg(feature = "kvm")] 136 /// extern crate hypervisor 137 /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap(); 138 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm); 139 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 140 /// vm.set/get().unwrap() 141 /// 142 impl vm::Vm for KvmVm { 143 #[cfg(target_arch = "x86_64")] 144 /// 145 /// Sets the address of the three-page region in the VM's address space. 146 /// 147 fn set_tss_address(&self, offset: usize) -> vm::Result<()> { 148 self.fd 149 .set_tss_address(offset) 150 .map_err(|e| vm::HypervisorVmError::SetTssAddress(e.into())) 151 } 152 /// 153 /// Creates an in-kernel interrupt controller. 154 /// 155 fn create_irq_chip(&self) -> vm::Result<()> { 156 self.fd 157 .create_irq_chip() 158 .map_err(|e| vm::HypervisorVmError::CreateIrq(e.into())) 159 } 160 /// 161 /// Registers an event that will, when signaled, trigger the `gsi` IRQ. 162 /// 163 fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 164 self.fd 165 .register_irqfd(fd, gsi) 166 .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into())) 167 } 168 /// 169 /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ. 170 /// 171 fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 172 self.fd 173 .unregister_irqfd(fd, gsi) 174 .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into())) 175 } 176 /// 177 /// Creates a VcpuFd object from a vcpu RawFd. 178 /// 179 fn create_vcpu(&self, id: u8) -> vm::Result<Arc<dyn cpu::Vcpu>> { 180 let vc = self 181 .fd 182 .create_vcpu(id) 183 .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?; 184 let vcpu = KvmVcpu { 185 fd: vc, 186 #[cfg(target_arch = "x86_64")] 187 msrs: self.msrs.clone(), 188 vmmops: self.vmmops.clone(), 189 #[cfg(target_arch = "x86_64")] 190 hyperv_synic: AtomicBool::new(false), 191 }; 192 Ok(Arc::new(vcpu)) 193 } 194 /// 195 /// Registers an event to be signaled whenever a certain address is written to. 196 /// 197 fn register_ioevent( 198 &self, 199 fd: &EventFd, 200 addr: &IoEventAddress, 201 datamatch: Option<vm::DataMatch>, 202 ) -> vm::Result<()> { 203 if let Some(dm) = datamatch { 204 match dm { 205 vm::DataMatch::DataMatch32(kvm_dm32) => self 206 .fd 207 .register_ioevent(fd, addr, kvm_dm32) 208 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 209 vm::DataMatch::DataMatch64(kvm_dm64) => self 210 .fd 211 .register_ioevent(fd, addr, kvm_dm64) 212 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 213 } 214 } else { 215 self.fd 216 .register_ioevent(fd, addr, NoDatamatch) 217 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())) 218 } 219 } 220 /// 221 /// Unregisters an event from a certain address it has been previously registered to. 222 /// 223 fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> { 224 self.fd 225 .unregister_ioevent(fd, addr, NoDatamatch) 226 .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into())) 227 } 228 /// 229 /// Sets the GSI routing table entries, overwriting any previously set 230 /// entries, as per the `KVM_SET_GSI_ROUTING` ioctl. 231 /// 232 fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> { 233 let mut irq_routing = 234 vec_with_array_field::<kvm_irq_routing, kvm_irq_routing_entry>(entries.len()); 235 irq_routing[0].nr = entries.len() as u32; 236 irq_routing[0].flags = 0; 237 238 unsafe { 239 let entries_slice: &mut [kvm_irq_routing_entry] = 240 irq_routing[0].entries.as_mut_slice(entries.len()); 241 entries_slice.copy_from_slice(&entries); 242 } 243 244 self.fd 245 .set_gsi_routing(&irq_routing[0]) 246 .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into())) 247 } 248 /// 249 /// Creates a memory region structure that can be used with set_user_memory_region 250 /// 251 fn make_user_memory_region( 252 &self, 253 slot: u32, 254 guest_phys_addr: u64, 255 memory_size: u64, 256 userspace_addr: u64, 257 readonly: bool, 258 ) -> MemoryRegion { 259 MemoryRegion { 260 slot, 261 guest_phys_addr, 262 memory_size, 263 userspace_addr, 264 flags: if readonly { KVM_MEM_READONLY } else { 0 }, 265 } 266 } 267 /// 268 /// Creates/modifies a guest physical memory slot. 269 /// 270 fn set_user_memory_region(&self, user_memory_region: MemoryRegion) -> vm::Result<()> { 271 // Safe because guest regions are guaranteed not to overlap. 272 unsafe { 273 self.fd 274 .set_user_memory_region(user_memory_region) 275 .map_err(|e| vm::HypervisorVmError::SetUserMemory(e.into())) 276 } 277 } 278 /// 279 /// Creates an emulated device in the kernel. 280 /// 281 /// See the documentation for `KVM_CREATE_DEVICE`. 282 fn create_device(&self, device: &mut CreateDevice) -> vm::Result<Arc<dyn device::Device>> { 283 let fd = self 284 .fd 285 .create_device(device) 286 .map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?; 287 let device = KvmDevice { fd }; 288 Ok(Arc::new(device)) 289 } 290 /// 291 /// Returns the preferred CPU target type which can be emulated by KVM on underlying host. 292 /// 293 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 294 fn get_preferred_target(&self, kvi: &mut VcpuInit) -> vm::Result<()> { 295 self.fd 296 .get_preferred_target(kvi) 297 .map_err(|e| vm::HypervisorVmError::GetPreferredTarget(e.into())) 298 } 299 #[cfg(target_arch = "x86_64")] 300 fn enable_split_irq(&self) -> vm::Result<()> { 301 // Set TSS 302 self.fd 303 .set_tss_address(KVM_TSS_ADDRESS.raw_value() as usize) 304 .map_err(|e| vm::HypervisorVmError::EnableSplitIrq(e.into()))?; 305 // Create split irqchip 306 // Only the local APIC is emulated in kernel, both PICs and IOAPIC 307 // are not. 308 let mut cap: kvm_enable_cap = Default::default(); 309 cap.cap = KVM_CAP_SPLIT_IRQCHIP; 310 cap.args[0] = NUM_IOAPIC_PINS as u64; 311 self.fd 312 .enable_cap(&cap) 313 .map_err(|e| vm::HypervisorVmError::EnableSplitIrq(e.into()))?; 314 Ok(()) 315 } 316 /// Retrieve guest clock. 317 #[cfg(target_arch = "x86_64")] 318 fn get_clock(&self) -> vm::Result<ClockData> { 319 self.fd 320 .get_clock() 321 .map_err(|e| vm::HypervisorVmError::GetClock(e.into())) 322 } 323 /// Set guest clock. 324 #[cfg(target_arch = "x86_64")] 325 fn set_clock(&self, data: &ClockData) -> vm::Result<()> { 326 self.fd 327 .set_clock(data) 328 .map_err(|e| vm::HypervisorVmError::SetClock(e.into())) 329 } 330 /// Checks if a particular `Cap` is available. 331 fn check_extension(&self, c: Cap) -> bool { 332 self.fd.check_extension(c) 333 } 334 /// Create a device that is used for passthrough 335 fn create_passthrough_device(&self) -> vm::Result<Arc<dyn device::Device>> { 336 let mut vfio_dev = kvm_create_device { 337 type_: kvm_device_type_KVM_DEV_TYPE_VFIO, 338 fd: 0, 339 flags: 0, 340 }; 341 342 self.create_device(&mut vfio_dev) 343 .map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into())) 344 } 345 /// 346 /// Get the Vm state. Return VM specific data 347 /// 348 fn state(&self) -> vm::Result<VmState> { 349 Ok(self.state) 350 } 351 /// 352 /// Set the VM state 353 /// 354 fn set_state(&self, _state: VmState) -> vm::Result<()> { 355 Ok(()) 356 } 357 358 /// 359 /// Set the VmmOps interface 360 /// 361 fn set_vmmops(&self, vmmops: Box<dyn VmmOps>) -> vm::Result<()> { 362 self.vmmops.store(Some(Arc::new(vmmops))); 363 Ok(()) 364 } 365 } 366 /// Wrapper over KVM system ioctls. 367 pub struct KvmHypervisor { 368 kvm: Kvm, 369 } 370 /// Enum for KVM related error 371 #[derive(Debug)] 372 pub enum KvmError { 373 CapabilityMissing(Cap), 374 } 375 pub type KvmResult<T> = result::Result<T, KvmError>; 376 impl KvmHypervisor { 377 /// Create a hypervisor based on Kvm 378 pub fn new() -> hypervisor::Result<KvmHypervisor> { 379 let kvm_obj = Kvm::new().map_err(|e| hypervisor::HypervisorError::VmCreate(e.into()))?; 380 let api_version = kvm_obj.get_api_version(); 381 382 if api_version != kvm_bindings::KVM_API_VERSION as i32 { 383 return Err(hypervisor::HypervisorError::IncompatibleApiVersion); 384 } 385 386 Ok(KvmHypervisor { kvm: kvm_obj }) 387 } 388 } 389 /// Implementation of Hypervisor trait for KVM 390 /// Example: 391 /// #[cfg(feature = "kvm")] 392 /// extern crate hypervisor 393 /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap(); 394 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm); 395 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 396 /// 397 impl hypervisor::Hypervisor for KvmHypervisor { 398 /// Create a KVM vm object and return the object as Vm trait object 399 /// Example 400 /// # extern crate hypervisor; 401 /// # use hypervisor::KvmHypervisor; 402 /// use hypervisor::KvmVm; 403 /// let hypervisor = KvmHypervisor::new().unwrap(); 404 /// let vm = hypervisor.create_vm().unwrap() 405 /// 406 fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> { 407 let fd: VmFd; 408 loop { 409 match self.kvm.create_vm() { 410 Ok(res) => fd = res, 411 Err(e) => { 412 if e.errno() == libc::EINTR { 413 // If the error returned is EINTR, which means the 414 // ioctl has been interrupted, we have to retry as 415 // this can't be considered as a regular error. 416 continue; 417 } else { 418 return Err(hypervisor::HypervisorError::VmCreate(e.into())); 419 } 420 } 421 } 422 break; 423 } 424 425 let vm_fd = Arc::new(fd); 426 427 #[cfg(target_arch = "x86_64")] 428 { 429 let msr_list = self.get_msr_list()?; 430 let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize; 431 let mut msrs = MsrEntries::new(num_msrs); 432 let indices = msr_list.as_slice(); 433 let msr_entries = msrs.as_mut_slice(); 434 for (pos, index) in indices.iter().enumerate() { 435 msr_entries[pos].index = *index; 436 } 437 438 Ok(Arc::new(KvmVm { 439 fd: vm_fd, 440 msrs, 441 state: VmState {}, 442 vmmops: ArcSwapOption::from(None), 443 })) 444 } 445 446 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 447 { 448 Ok(Arc::new(KvmVm { 449 fd: vm_fd, 450 state: VmState {}, 451 vmmops: ArcSwapOption::from(None), 452 })) 453 } 454 } 455 456 fn check_required_extensions(&self) -> hypervisor::Result<()> { 457 check_required_kvm_extensions(&self.kvm).expect("Missing KVM capabilities"); 458 Ok(()) 459 } 460 461 /// 462 /// Returns the size of the memory mapping required to use the vcpu's `kvm_run` structure. 463 /// 464 fn get_vcpu_mmap_size(&self) -> hypervisor::Result<usize> { 465 self.kvm 466 .get_vcpu_mmap_size() 467 .map_err(|e| hypervisor::HypervisorError::GetVcpuMmap(e.into())) 468 } 469 /// 470 /// Gets the recommended maximum number of VCPUs per VM. 471 /// 472 fn get_max_vcpus(&self) -> hypervisor::Result<usize> { 473 Ok(self.kvm.get_max_vcpus()) 474 } 475 /// 476 /// Gets the recommended number of VCPUs per VM. 477 /// 478 fn get_nr_vcpus(&self) -> hypervisor::Result<usize> { 479 Ok(self.kvm.get_nr_vcpus()) 480 } 481 #[cfg(target_arch = "x86_64")] 482 /// 483 /// Checks if a particular `Cap` is available. 484 /// 485 fn check_capability(&self, c: Cap) -> bool { 486 self.kvm.check_extension(c) 487 } 488 #[cfg(target_arch = "x86_64")] 489 /// 490 /// X86 specific call to get the system supported CPUID values. 491 /// 492 fn get_cpuid(&self) -> hypervisor::Result<CpuId> { 493 self.kvm 494 .get_supported_cpuid(kvm_bindings::KVM_MAX_CPUID_ENTRIES) 495 .map_err(|e| hypervisor::HypervisorError::GetCpuId(e.into())) 496 } 497 #[cfg(target_arch = "x86_64")] 498 /// 499 /// Retrieve the list of MSRs supported by KVM. 500 /// 501 fn get_msr_list(&self) -> hypervisor::Result<MsrList> { 502 self.kvm 503 .get_msr_index_list() 504 .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into())) 505 } 506 } 507 /// Vcpu struct for KVM 508 pub struct KvmVcpu { 509 fd: VcpuFd, 510 #[cfg(target_arch = "x86_64")] 511 msrs: MsrEntries, 512 vmmops: ArcSwapOption<Box<dyn vm::VmmOps>>, 513 #[cfg(target_arch = "x86_64")] 514 hyperv_synic: AtomicBool, 515 } 516 /// Implementation of Vcpu trait for KVM 517 /// Example: 518 /// #[cfg(feature = "kvm")] 519 /// extern crate hypervisor 520 /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap(); 521 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm); 522 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 523 /// let vcpu = vm.create_vcpu(0).unwrap(); 524 /// vcpu.get/set().unwrap() 525 /// 526 impl cpu::Vcpu for KvmVcpu { 527 #[cfg(target_arch = "x86_64")] 528 /// 529 /// Returns the vCPU general purpose registers. 530 /// 531 fn get_regs(&self) -> cpu::Result<StandardRegisters> { 532 self.fd 533 .get_regs() 534 .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into())) 535 } 536 #[cfg(target_arch = "x86_64")] 537 /// 538 /// Sets the vCPU general purpose registers using the `KVM_SET_REGS` ioctl. 539 /// 540 fn set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()> { 541 self.fd 542 .set_regs(regs) 543 .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into())) 544 } 545 #[cfg(target_arch = "x86_64")] 546 /// 547 /// Returns the vCPU special registers. 548 /// 549 fn get_sregs(&self) -> cpu::Result<SpecialRegisters> { 550 self.fd 551 .get_sregs() 552 .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into())) 553 } 554 #[cfg(target_arch = "x86_64")] 555 /// 556 /// Sets the vCPU special registers using the `KVM_SET_SREGS` ioctl. 557 /// 558 fn set_sregs(&self, sregs: &SpecialRegisters) -> cpu::Result<()> { 559 self.fd 560 .set_sregs(sregs) 561 .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into())) 562 } 563 #[cfg(target_arch = "x86_64")] 564 /// 565 /// Returns the floating point state (FPU) from the vCPU. 566 /// 567 fn get_fpu(&self) -> cpu::Result<FpuState> { 568 self.fd 569 .get_fpu() 570 .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into())) 571 } 572 #[cfg(target_arch = "x86_64")] 573 /// 574 /// Set the floating point state (FPU) of a vCPU using the `KVM_SET_FPU` ioct. 575 /// 576 fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> { 577 self.fd 578 .set_fpu(fpu) 579 .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into())) 580 } 581 #[cfg(target_arch = "x86_64")] 582 /// 583 /// X86 specific call to setup the CPUID registers. 584 /// 585 fn set_cpuid2(&self, cpuid: &CpuId) -> cpu::Result<()> { 586 self.fd 587 .set_cpuid2(cpuid) 588 .map_err(|e| cpu::HypervisorCpuError::SetCpuid(e.into())) 589 } 590 #[cfg(target_arch = "x86_64")] 591 /// 592 /// X86 specific call to enable HyperV SynIC 593 /// 594 fn enable_hyperv_synic(&self) -> cpu::Result<()> { 595 // Update the information about Hyper-V SynIC being enabled and 596 // emulated as it will influence later which MSRs should be saved. 597 self.hyperv_synic.store(true, Ordering::SeqCst); 598 599 let mut cap: kvm_enable_cap = Default::default(); 600 cap.cap = KVM_CAP_HYPERV_SYNIC; 601 self.fd 602 .enable_cap(&cap) 603 .map_err(|e| cpu::HypervisorCpuError::EnableHyperVSynIC(e.into())) 604 } 605 /// 606 /// X86 specific call to retrieve the CPUID registers. 607 /// 608 #[cfg(target_arch = "x86_64")] 609 fn get_cpuid2(&self, num_entries: usize) -> cpu::Result<CpuId> { 610 self.fd 611 .get_cpuid2(num_entries) 612 .map_err(|e| cpu::HypervisorCpuError::GetCpuid(e.into())) 613 } 614 #[cfg(target_arch = "x86_64")] 615 /// 616 /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 617 /// 618 fn get_lapic(&self) -> cpu::Result<LapicState> { 619 self.fd 620 .get_lapic() 621 .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into())) 622 } 623 #[cfg(target_arch = "x86_64")] 624 /// 625 /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 626 /// 627 fn set_lapic(&self, klapic: &LapicState) -> cpu::Result<()> { 628 self.fd 629 .set_lapic(klapic) 630 .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into())) 631 } 632 #[cfg(target_arch = "x86_64")] 633 /// 634 /// Returns the model-specific registers (MSR) for this vCPU. 635 /// 636 fn get_msrs(&self, msrs: &mut MsrEntries) -> cpu::Result<usize> { 637 self.fd 638 .get_msrs(msrs) 639 .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into())) 640 } 641 #[cfg(target_arch = "x86_64")] 642 /// 643 /// Setup the model-specific registers (MSR) for this vCPU. 644 /// Returns the number of MSR entries actually written. 645 /// 646 fn set_msrs(&self, msrs: &MsrEntries) -> cpu::Result<usize> { 647 self.fd 648 .set_msrs(msrs) 649 .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into())) 650 } 651 /// 652 /// Returns the vcpu's current "multiprocessing state". 653 /// 654 fn get_mp_state(&self) -> cpu::Result<MpState> { 655 self.fd 656 .get_mp_state() 657 .map_err(|e| cpu::HypervisorCpuError::GetMpState(e.into())) 658 } 659 /// 660 /// Sets the vcpu's current "multiprocessing state". 661 /// 662 fn set_mp_state(&self, mp_state: MpState) -> cpu::Result<()> { 663 self.fd 664 .set_mp_state(mp_state) 665 .map_err(|e| cpu::HypervisorCpuError::SetMpState(e.into())) 666 } 667 #[cfg(target_arch = "x86_64")] 668 /// 669 /// X86 specific call that returns the vcpu's current "xsave struct". 670 /// 671 fn get_xsave(&self) -> cpu::Result<Xsave> { 672 self.fd 673 .get_xsave() 674 .map_err(|e| cpu::HypervisorCpuError::GetXsaveState(e.into())) 675 } 676 #[cfg(target_arch = "x86_64")] 677 /// 678 /// X86 specific call that sets the vcpu's current "xsave struct". 679 /// 680 fn set_xsave(&self, xsave: &Xsave) -> cpu::Result<()> { 681 self.fd 682 .set_xsave(xsave) 683 .map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into())) 684 } 685 #[cfg(target_arch = "x86_64")] 686 /// 687 /// X86 specific call that returns the vcpu's current "xcrs". 688 /// 689 fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> { 690 self.fd 691 .get_xcrs() 692 .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into())) 693 } 694 #[cfg(target_arch = "x86_64")] 695 /// 696 /// X86 specific call that sets the vcpu's current "xcrs". 697 /// 698 fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> { 699 self.fd 700 .set_xcrs(&xcrs) 701 .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into())) 702 } 703 /// 704 /// Triggers the running of the current virtual CPU returning an exit reason. 705 /// 706 fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> { 707 match self.fd.run() { 708 Ok(run) => match run { 709 #[cfg(target_arch = "x86_64")] 710 VcpuExit::IoIn(addr, data) => { 711 if let Some(vmmops) = self.vmmops.load_full() { 712 return vmmops 713 .pio_read(addr.into(), data) 714 .map(|_| cpu::VmExit::Ignore) 715 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 716 } 717 718 Ok(cpu::VmExit::IoIn(addr, data)) 719 } 720 #[cfg(target_arch = "x86_64")] 721 VcpuExit::IoOut(addr, data) => { 722 if let Some(vmmops) = self.vmmops.load_full() { 723 return vmmops 724 .pio_write(addr.into(), data) 725 .map(|_| cpu::VmExit::Ignore) 726 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 727 } 728 729 Ok(cpu::VmExit::IoOut(addr, data)) 730 } 731 #[cfg(target_arch = "x86_64")] 732 VcpuExit::IoapicEoi(vector) => Ok(cpu::VmExit::IoapicEoi(vector)), 733 #[cfg(target_arch = "x86_64")] 734 VcpuExit::Shutdown | VcpuExit::Hlt => Ok(cpu::VmExit::Reset), 735 736 #[cfg(target_arch = "aarch64")] 737 VcpuExit::SystemEvent(event_type, flags) => { 738 use kvm_bindings::{KVM_SYSTEM_EVENT_RESET, KVM_SYSTEM_EVENT_SHUTDOWN}; 739 // On Aarch64, when the VM is shutdown, run() returns 740 // VcpuExit::SystemEvent with reason KVM_SYSTEM_EVENT_SHUTDOWN 741 if event_type == KVM_SYSTEM_EVENT_SHUTDOWN 742 || event_type == KVM_SYSTEM_EVENT_RESET 743 { 744 Ok(cpu::VmExit::Reset) 745 } else { 746 Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 747 "Unexpected system event with type 0x{:x}, flags 0x{:x}", 748 event_type, 749 flags 750 ))) 751 } 752 } 753 754 VcpuExit::MmioRead(addr, data) => { 755 if let Some(vmmops) = self.vmmops.load_full() { 756 return vmmops 757 .mmio_read(addr, data) 758 .map(|_| cpu::VmExit::Ignore) 759 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 760 } 761 762 Ok(cpu::VmExit::MmioRead(addr, data)) 763 } 764 VcpuExit::MmioWrite(addr, data) => { 765 if let Some(vmmops) = self.vmmops.load_full() { 766 return vmmops 767 .mmio_write(addr, data) 768 .map(|_| cpu::VmExit::Ignore) 769 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 770 } 771 772 Ok(cpu::VmExit::MmioWrite(addr, data)) 773 } 774 VcpuExit::Hyperv => Ok(cpu::VmExit::Hyperv), 775 776 r => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 777 "Unexpected exit reason on vcpu run: {:?}", 778 r 779 ))), 780 }, 781 782 Err(ref e) => match e.errno() { 783 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore), 784 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 785 "VCPU error {:?}", 786 e 787 ))), 788 }, 789 } 790 } 791 #[cfg(target_arch = "x86_64")] 792 /// 793 /// Returns currently pending exceptions, interrupts, and NMIs as well as related 794 /// states of the vcpu. 795 /// 796 fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> { 797 self.fd 798 .get_vcpu_events() 799 .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into())) 800 } 801 #[cfg(target_arch = "x86_64")] 802 /// 803 /// Sets pending exceptions, interrupts, and NMIs as well as related states 804 /// of the vcpu. 805 /// 806 fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> { 807 self.fd 808 .set_vcpu_events(events) 809 .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into())) 810 } 811 #[cfg(target_arch = "x86_64")] 812 /// 813 /// Let the guest know that it has been paused, which prevents from 814 /// potential soft lockups when being resumed. 815 /// 816 fn notify_guest_clock_paused(&self) -> cpu::Result<()> { 817 self.fd 818 .kvmclock_ctrl() 819 .map_err(|e| cpu::HypervisorCpuError::NotifyGuestClockPaused(e.into())) 820 } 821 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 822 fn vcpu_init(&self, kvi: &VcpuInit) -> cpu::Result<()> { 823 self.fd 824 .vcpu_init(kvi) 825 .map_err(|e| cpu::HypervisorCpuError::VcpuInit(e.into())) 826 } 827 /// 828 /// Sets the value of one register for this vCPU. 829 /// 830 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 831 fn set_reg(&self, reg_id: u64, data: u64) -> cpu::Result<()> { 832 self.fd 833 .set_one_reg(reg_id, data) 834 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into())) 835 } 836 /// 837 /// Gets the value of one register for this vCPU. 838 /// 839 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 840 fn get_reg(&self, reg_id: u64) -> cpu::Result<u64> { 841 self.fd 842 .get_one_reg(reg_id) 843 .map_err(|e| cpu::HypervisorCpuError::GetRegister(e.into())) 844 } 845 /// 846 /// Gets a list of the guest registers that are supported for the 847 /// KVM_GET_ONE_REG/KVM_SET_ONE_REG calls. 848 /// 849 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 850 fn get_reg_list(&self, reg_list: &mut RegList) -> cpu::Result<()> { 851 self.fd 852 .get_reg_list(reg_list) 853 .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into())) 854 } 855 /// 856 /// Save the state of the core registers. 857 /// 858 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 859 fn core_registers(&self, state: &mut StandardRegisters) -> cpu::Result<()> { 860 let mut off = offset__of!(user_pt_regs, regs); 861 // There are 31 user_pt_regs: 862 // https://elixir.free-electrons.com/linux/v4.14.174/source/arch/arm64/include/uapi/asm/ptrace.h#L72 863 // These actually are the general-purpose registers of the Armv8-a 864 // architecture (i.e x0-x30 if used as a 64bit register or w0-30 when used as a 32bit register). 865 for i in 0..31 { 866 state.regs.regs[i] = self 867 .fd 868 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off)) 869 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 870 off += std::mem::size_of::<u64>(); 871 } 872 873 // We are now entering the "Other register" section of the ARMv8-a architecture. 874 // First one, stack pointer. 875 let off = offset__of!(user_pt_regs, sp); 876 state.regs.sp = self 877 .fd 878 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off)) 879 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 880 881 // Second one, the program counter. 882 let off = offset__of!(user_pt_regs, pc); 883 state.regs.pc = self 884 .fd 885 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off)) 886 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 887 888 // Next is the processor state. 889 let off = offset__of!(user_pt_regs, pstate); 890 state.regs.pstate = self 891 .fd 892 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off)) 893 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 894 895 // The stack pointer associated with EL1 896 let off = offset__of!(kvm_regs, sp_el1); 897 state.sp_el1 = self 898 .fd 899 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off)) 900 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 901 902 // Exception Link Register for EL1, when taking an exception to EL1, this register 903 // holds the address to which to return afterwards. 904 let off = offset__of!(kvm_regs, elr_el1); 905 state.elr_el1 = self 906 .fd 907 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off)) 908 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 909 910 // Saved Program Status Registers, there are 5 of them used in the kernel. 911 let mut off = offset__of!(kvm_regs, spsr); 912 for i in 0..KVM_NR_SPSR as usize { 913 state.spsr[i] = self 914 .fd 915 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off)) 916 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 917 off += std::mem::size_of::<u64>(); 918 } 919 920 // Now moving on to floting point registers which are stored in the user_fpsimd_state in the kernel: 921 // https://elixir.free-electrons.com/linux/v4.9.62/source/arch/arm64/include/uapi/asm/kvm.h#L53 922 let mut off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, vregs); 923 for i in 0..32 { 924 state.fp_regs.vregs[i][0] = self 925 .fd 926 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U128, off)) 927 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 928 off += mem::size_of::<u128>(); 929 } 930 931 // Floating-point Status Register 932 let off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, fpsr); 933 state.fp_regs.fpsr = self 934 .fd 935 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off)) 936 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))? 937 as u32; 938 939 // Floating-point Control Register 940 let off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, fpcr); 941 state.fp_regs.fpcr = self 942 .fd 943 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off)) 944 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))? 945 as u32; 946 Ok(()) 947 } 948 /// 949 /// Restore the state of the core registers. 950 /// 951 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 952 fn set_core_registers(&self, state: &StandardRegisters) -> cpu::Result<()> { 953 // The function follows the exact identical order from `state`. Look there 954 // for some additional info on registers. 955 let mut off = offset__of!(user_pt_regs, regs); 956 for i in 0..31 { 957 self.fd 958 .set_one_reg( 959 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 960 state.regs.regs[i], 961 ) 962 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 963 off += std::mem::size_of::<u64>(); 964 } 965 966 let off = offset__of!(user_pt_regs, sp); 967 self.fd 968 .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.regs.sp) 969 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 970 971 let off = offset__of!(user_pt_regs, pc); 972 self.fd 973 .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.regs.pc) 974 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 975 976 let off = offset__of!(user_pt_regs, pstate); 977 self.fd 978 .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.regs.pstate) 979 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 980 981 let off = offset__of!(kvm_regs, sp_el1); 982 self.fd 983 .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.sp_el1) 984 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 985 986 let off = offset__of!(kvm_regs, elr_el1); 987 self.fd 988 .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.elr_el1) 989 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 990 991 let mut off = offset__of!(kvm_regs, spsr); 992 for i in 0..KVM_NR_SPSR as usize { 993 self.fd 994 .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.spsr[i]) 995 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 996 off += std::mem::size_of::<u64>(); 997 } 998 999 let mut off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, vregs); 1000 for i in 0..32 { 1001 self.fd 1002 .set_one_reg( 1003 arm64_core_reg_id!(KVM_REG_SIZE_U128, off), 1004 state.fp_regs.vregs[i][0], 1005 ) 1006 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1007 off += mem::size_of::<u128>(); 1008 } 1009 1010 let off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, fpsr); 1011 self.fd 1012 .set_one_reg( 1013 arm64_core_reg_id!(KVM_REG_SIZE_U32, off), 1014 state.fp_regs.fpsr as u64, 1015 ) 1016 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1017 1018 let off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, fpcr); 1019 self.fd 1020 .set_one_reg( 1021 arm64_core_reg_id!(KVM_REG_SIZE_U32, off), 1022 state.fp_regs.fpcr as u64, 1023 ) 1024 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1025 Ok(()) 1026 } 1027 /// 1028 /// Save the state of the system registers. 1029 /// 1030 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 1031 fn system_registers(&self, state: &mut Vec<Register>) -> cpu::Result<()> { 1032 // Call KVM_GET_REG_LIST to get all registers available to the guest. For ArmV8 there are 1033 // around 500 registers. 1034 let mut reg_list = RegList::new(512); 1035 self.fd 1036 .get_reg_list(&mut reg_list) 1037 .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into()))?; 1038 1039 // At this point reg_list should contain: core registers and system registers. 1040 // The register list contains the number of registers and their ids. We will be needing to 1041 // call KVM_GET_ONE_REG on each id in order to save all of them. We carve out from the list 1042 // the core registers which are represented in the kernel by kvm_regs structure and for which 1043 // we can calculate the id based on the offset in the structure. 1044 1045 reg_list.retain(|regid| *regid != 0); 1046 reg_list.as_slice().to_vec().sort_unstable(); 1047 1048 reg_list.retain(|regid| is_system_register(*regid)); 1049 1050 // Now, for the rest of the registers left in the previously fetched register list, we are 1051 // simply calling KVM_GET_ONE_REG. 1052 let indices = reg_list.as_slice(); 1053 for (_pos, index) in indices.iter().enumerate() { 1054 if _pos > 230 { 1055 break; 1056 } 1057 state.push(kvm_bindings::kvm_one_reg { 1058 id: *index, 1059 addr: self 1060 .fd 1061 .get_one_reg(*index) 1062 .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?, 1063 }); 1064 } 1065 1066 Ok(()) 1067 } 1068 /// 1069 /// Restore the state of the system registers. 1070 /// 1071 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 1072 fn set_system_registers(&self, state: &[Register]) -> cpu::Result<()> { 1073 for reg in state { 1074 self.fd 1075 .set_one_reg(reg.id, reg.addr) 1076 .map_err(|e| cpu::HypervisorCpuError::SetSysRegister(e.into()))?; 1077 } 1078 Ok(()) 1079 } 1080 /// 1081 /// Read the MPIDR - Multiprocessor Affinity Register. 1082 /// 1083 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 1084 fn read_mpidr(&self) -> cpu::Result<u64> { 1085 self.fd 1086 .get_one_reg(MPIDR_EL1) 1087 .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into())) 1088 } 1089 #[cfg(target_arch = "x86_64")] 1090 /// 1091 /// Get the current CPU state 1092 /// 1093 /// Ordering requirements: 1094 /// 1095 /// KVM_GET_MP_STATE calls kvm_apic_accept_events(), which might modify 1096 /// vCPU/LAPIC state. As such, it must be done before most everything 1097 /// else, otherwise we cannot restore everything and expect it to work. 1098 /// 1099 /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are 1100 /// still running. 1101 /// 1102 /// KVM_GET_LAPIC may change state of LAPIC before returning it. 1103 /// 1104 /// GET_VCPU_EVENTS should probably be last to save. The code looks as 1105 /// it might as well be affected by internal state modifications of the 1106 /// GET ioctls. 1107 /// 1108 /// SREGS saves/restores a pending interrupt, similar to what 1109 /// VCPU_EVENTS also does. 1110 /// 1111 /// GET_MSRS requires a pre-populated data structure to do something 1112 /// meaningful. For SET_MSRS it will then contain good data. 1113 /// 1114 /// # Example 1115 /// 1116 /// ```rust 1117 /// # extern crate hypervisor; 1118 /// # use hypervisor::KvmHypervisor; 1119 /// # use std::sync::Arc; 1120 /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap(); 1121 /// let hv: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm); 1122 /// let vm = hv.create_vm().expect("new VM fd creation failed"); 1123 /// vm.enable_split_irq().unwrap(); 1124 /// let vcpu = vm.create_vcpu(0).unwrap(); 1125 /// let state = vcpu.state().unwrap(); 1126 /// ``` 1127 fn state(&self) -> cpu::Result<CpuState> { 1128 let cpuid = self.get_cpuid2(kvm_bindings::KVM_MAX_CPUID_ENTRIES)?; 1129 let mp_state = self.get_mp_state()?; 1130 let regs = self.get_regs()?; 1131 let sregs = self.get_sregs()?; 1132 let xsave = self.get_xsave()?; 1133 let xcrs = self.get_xcrs()?; 1134 let lapic_state = self.get_lapic()?; 1135 let fpu = self.get_fpu()?; 1136 1137 // Try to get all MSRs based on the list previously retrieved from KVM. 1138 // If the number of MSRs obtained from GET_MSRS is different from the 1139 // expected amount, we fallback onto a slower method by getting MSRs 1140 // by chunks. This is the only way to make sure we try to get as many 1141 // MSRs as possible, even if some MSRs are not supported. 1142 let mut msr_entries = self.msrs.clone(); 1143 1144 // Save extra MSRs if the Hyper-V synthetic interrupt controller is 1145 // emulated. 1146 if self.hyperv_synic.load(Ordering::SeqCst) { 1147 let hyperv_synic_msrs = vec![ 1148 0x40000020, 0x40000021, 0x40000080, 0x40000081, 0x40000082, 0x40000083, 0x40000084, 1149 0x40000090, 0x40000091, 0x40000092, 0x40000093, 0x40000094, 0x40000095, 0x40000096, 1150 0x40000097, 0x40000098, 0x40000099, 0x4000009a, 0x4000009b, 0x4000009c, 0x4000009d, 1151 0x4000009f, 0x400000b0, 0x400000b1, 0x400000b2, 0x400000b3, 0x400000b4, 0x400000b5, 1152 0x400000b6, 0x400000b7, 1153 ]; 1154 for index in hyperv_synic_msrs { 1155 let msr = kvm_msr_entry { 1156 index, 1157 ..Default::default() 1158 }; 1159 msr_entries.push(msr).unwrap(); 1160 } 1161 } 1162 1163 let expected_num_msrs = msr_entries.as_fam_struct_ref().nmsrs as usize; 1164 let num_msrs = self.get_msrs(&mut msr_entries)?; 1165 let msrs = if num_msrs != expected_num_msrs { 1166 let mut faulty_msr_index = num_msrs; 1167 let mut msr_entries_tmp = 1168 MsrEntries::from_entries(&msr_entries.as_slice()[..faulty_msr_index]); 1169 1170 loop { 1171 warn!( 1172 "Detected faulty MSR 0x{:x} while getting MSRs", 1173 msr_entries.as_slice()[faulty_msr_index].index 1174 ); 1175 1176 let start_pos = faulty_msr_index + 1; 1177 let mut sub_msr_entries = 1178 MsrEntries::from_entries(&msr_entries.as_slice()[start_pos..]); 1179 let expected_num_msrs = sub_msr_entries.as_fam_struct_ref().nmsrs as usize; 1180 let num_msrs = self.get_msrs(&mut sub_msr_entries)?; 1181 1182 for i in 0..num_msrs { 1183 msr_entries_tmp 1184 .push(sub_msr_entries.as_slice()[i]) 1185 .map_err(|e| { 1186 cpu::HypervisorCpuError::GetMsrEntries(anyhow!( 1187 "Failed adding MSR entries: {:?}", 1188 e 1189 )) 1190 })?; 1191 } 1192 1193 if num_msrs == expected_num_msrs { 1194 break; 1195 } 1196 1197 faulty_msr_index = start_pos + num_msrs; 1198 } 1199 1200 msr_entries_tmp 1201 } else { 1202 msr_entries 1203 }; 1204 1205 let vcpu_events = self.get_vcpu_events()?; 1206 1207 Ok(CpuState { 1208 cpuid, 1209 msrs, 1210 vcpu_events, 1211 regs, 1212 sregs, 1213 fpu, 1214 lapic_state, 1215 xsave, 1216 xcrs, 1217 mp_state, 1218 }) 1219 } 1220 /// 1221 /// Get the current AArch64 CPU state 1222 /// 1223 #[cfg(target_arch = "aarch64")] 1224 fn state(&self) -> cpu::Result<CpuState> { 1225 let mut state = CpuState::default(); 1226 // Get this vCPUs multiprocessing state. 1227 state.mp_state = self.get_mp_state()?; 1228 self.core_registers(&mut state.core_regs)?; 1229 self.system_registers(&mut state.sys_regs)?; 1230 state.mpidr = self.read_mpidr()?; 1231 1232 Ok(state) 1233 } 1234 #[cfg(target_arch = "x86_64")] 1235 /// 1236 /// Restore the previously saved CPU state 1237 /// 1238 /// Ordering requirements: 1239 /// 1240 /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are 1241 /// still running. 1242 /// 1243 /// Some SET ioctls (like set_mp_state) depend on kvm_vcpu_is_bsp(), so 1244 /// if we ever change the BSP, we have to do that before restoring anything. 1245 /// The same seems to be true for CPUID stuff. 1246 /// 1247 /// SREGS saves/restores a pending interrupt, similar to what 1248 /// VCPU_EVENTS also does. 1249 /// 1250 /// SET_REGS clears pending exceptions unconditionally, thus, it must be 1251 /// done before SET_VCPU_EVENTS, which restores it. 1252 /// 1253 /// SET_LAPIC must come after SET_SREGS, because the latter restores 1254 /// the apic base msr. 1255 /// 1256 /// SET_LAPIC must come before SET_MSRS, because the TSC deadline MSR 1257 /// only restores successfully, when the LAPIC is correctly configured. 1258 /// 1259 /// Arguments: CpuState 1260 /// # Example 1261 /// 1262 /// ```rust 1263 /// # extern crate hypervisor; 1264 /// # use hypervisor::KvmHypervisor; 1265 /// # use std::sync::Arc; 1266 /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap(); 1267 /// let hv: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm); 1268 /// let vm = hv.create_vm().expect("new VM fd creation failed"); 1269 /// vm.enable_split_irq().unwrap(); 1270 /// let vcpu = vm.create_vcpu(0).unwrap(); 1271 /// let state = vcpu.state().unwrap(); 1272 /// vcpu.set_state(&state).unwrap(); 1273 /// ``` 1274 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 1275 self.set_cpuid2(&state.cpuid)?; 1276 self.set_mp_state(state.mp_state)?; 1277 self.set_regs(&state.regs)?; 1278 self.set_sregs(&state.sregs)?; 1279 self.set_xsave(&state.xsave)?; 1280 self.set_xcrs(&state.xcrs)?; 1281 self.set_lapic(&state.lapic_state)?; 1282 self.set_fpu(&state.fpu)?; 1283 1284 // Try to set all MSRs previously stored. 1285 // If the number of MSRs set from SET_MSRS is different from the 1286 // expected amount, we fallback onto a slower method by setting MSRs 1287 // by chunks. This is the only way to make sure we try to set as many 1288 // MSRs as possible, even if some MSRs are not supported. 1289 let expected_num_msrs = state.msrs.as_fam_struct_ref().nmsrs as usize; 1290 let num_msrs = self.set_msrs(&state.msrs)?; 1291 if num_msrs != expected_num_msrs { 1292 let mut faulty_msr_index = num_msrs; 1293 1294 loop { 1295 warn!( 1296 "Detected faulty MSR 0x{:x} while setting MSRs", 1297 state.msrs.as_slice()[faulty_msr_index].index 1298 ); 1299 1300 let start_pos = faulty_msr_index + 1; 1301 let sub_msr_entries = MsrEntries::from_entries(&state.msrs.as_slice()[start_pos..]); 1302 let expected_num_msrs = sub_msr_entries.as_fam_struct_ref().nmsrs as usize; 1303 let num_msrs = self.set_msrs(&sub_msr_entries)?; 1304 1305 if num_msrs == expected_num_msrs { 1306 break; 1307 } 1308 1309 faulty_msr_index = start_pos + num_msrs; 1310 } 1311 } 1312 1313 self.set_vcpu_events(&state.vcpu_events)?; 1314 1315 Ok(()) 1316 } 1317 /// 1318 /// Restore the previously saved AArch64 CPU state 1319 /// 1320 #[cfg(target_arch = "aarch64")] 1321 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 1322 self.set_core_registers(&state.core_regs)?; 1323 self.set_system_registers(&state.sys_regs)?; 1324 self.set_mp_state(state.mp_state)?; 1325 1326 Ok(()) 1327 } 1328 } 1329 1330 /// Device struct for KVM 1331 pub struct KvmDevice { 1332 fd: DeviceFd, 1333 } 1334 1335 impl device::Device for KvmDevice { 1336 /// 1337 /// Set device attribute 1338 /// 1339 fn set_device_attr(&self, attr: &DeviceAttr) -> device::Result<()> { 1340 self.fd 1341 .set_device_attr(attr) 1342 .map_err(|e| device::HypervisorDeviceError::SetDeviceAttribute(e.into())) 1343 } 1344 /// 1345 /// Get device attribute 1346 /// 1347 fn get_device_attr(&self, attr: &mut DeviceAttr) -> device::Result<()> { 1348 self.fd 1349 .get_device_attr(attr) 1350 .map_err(|e| device::HypervisorDeviceError::GetDeviceAttribute(e.into())) 1351 } 1352 } 1353 1354 impl AsRawFd for KvmDevice { 1355 fn as_raw_fd(&self) -> RawFd { 1356 self.fd.as_raw_fd() 1357 } 1358 } 1359