1 // Copyright © 2019 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause 4 // 5 // Copyright © 2020, Microsoft Corporation 6 // 7 // Copyright 2018-2019 CrowdStrike, Inc. 8 // 9 // 10 11 #[cfg(target_arch = "aarch64")] 12 pub use crate::aarch64::{ 13 check_required_kvm_extensions, is_system_register, VcpuInit, VcpuKvmState as CpuState, 14 MPIDR_EL1, 15 }; 16 use crate::cpu; 17 use crate::device; 18 use crate::hypervisor; 19 use crate::vm::{self, VmmOps}; 20 #[cfg(target_arch = "aarch64")] 21 use crate::{arm64_core_reg_id, offset__of}; 22 use kvm_ioctls::{NoDatamatch, VcpuFd, VmFd}; 23 use serde_derive::{Deserialize, Serialize}; 24 use std::os::unix::io::{AsRawFd, RawFd}; 25 use std::result; 26 #[cfg(target_arch = "x86_64")] 27 use std::sync::atomic::{AtomicBool, Ordering}; 28 use std::sync::Arc; 29 #[cfg(target_arch = "x86_64")] 30 use vm_memory::Address; 31 use vmm_sys_util::eventfd::EventFd; 32 // x86_64 dependencies 33 #[cfg(target_arch = "x86_64")] 34 pub mod x86_64; 35 #[cfg(target_arch = "x86_64")] 36 use crate::arch::x86::NUM_IOAPIC_PINS; 37 #[cfg(target_arch = "aarch64")] 38 use aarch64::{RegList, Register, StandardRegisters}; 39 #[cfg(target_arch = "x86_64")] 40 use kvm_bindings::{ 41 kvm_enable_cap, kvm_msr_entry, MsrList, KVM_CAP_HYPERV_SYNIC, KVM_CAP_SPLIT_IRQCHIP, 42 }; 43 #[cfg(target_arch = "x86_64")] 44 use x86_64::{ 45 check_required_kvm_extensions, FpuState, SpecialRegisters, StandardRegisters, KVM_TSS_ADDRESS, 46 }; 47 #[cfg(target_arch = "x86_64")] 48 pub use x86_64::{ 49 CpuId, CpuIdEntry, ExtendedControlRegisters, LapicState, MsrEntries, VcpuKvmState as CpuState, 50 Xsave, CPUID_FLAG_VALID_INDEX, 51 }; 52 // aarch64 dependencies 53 #[cfg(target_arch = "aarch64")] 54 pub mod aarch64; 55 pub use kvm_bindings; 56 pub use kvm_bindings::{ 57 kvm_create_device, kvm_device_type_KVM_DEV_TYPE_VFIO, kvm_irq_routing, kvm_irq_routing_entry, 58 kvm_userspace_memory_region, KVM_IRQ_ROUTING_MSI, KVM_MEM_LOG_DIRTY_PAGES, KVM_MEM_READONLY, 59 KVM_MSI_VALID_DEVID, 60 }; 61 #[cfg(target_arch = "aarch64")] 62 use kvm_bindings::{ 63 kvm_regs, user_fpsimd_state, user_pt_regs, KVM_NR_SPSR, KVM_REG_ARM64, KVM_REG_ARM_CORE, 64 KVM_REG_SIZE_U128, KVM_REG_SIZE_U32, KVM_REG_SIZE_U64, 65 }; 66 pub use kvm_ioctls; 67 pub use kvm_ioctls::{Cap, Kvm}; 68 #[cfg(target_arch = "aarch64")] 69 use std::mem; 70 71 /// 72 /// Export generically-named wrappers of kvm-bindings for Unix-based platforms 73 /// 74 pub use { 75 kvm_bindings::kvm_clock_data as ClockData, kvm_bindings::kvm_create_device as CreateDevice, 76 kvm_bindings::kvm_device_attr as DeviceAttr, 77 kvm_bindings::kvm_irq_routing_entry as IrqRoutingEntry, kvm_bindings::kvm_mp_state as MpState, 78 kvm_bindings::kvm_userspace_memory_region as MemoryRegion, 79 kvm_bindings::kvm_vcpu_events as VcpuEvents, kvm_ioctls::DeviceFd, kvm_ioctls::IoEventAddress, 80 kvm_ioctls::VcpuExit, 81 }; 82 #[derive(Clone, Copy, Debug, PartialEq, Deserialize, Serialize)] 83 pub struct KvmVmState {} 84 85 pub use KvmVmState as VmState; 86 /// Wrapper over KVM VM ioctls. 87 pub struct KvmVm { 88 fd: Arc<VmFd>, 89 #[cfg(target_arch = "x86_64")] 90 msrs: MsrEntries, 91 state: KvmVmState, 92 } 93 94 // Returns a `Vec<T>` with a size in bytes at least as large as `size_in_bytes`. 95 fn vec_with_size_in_bytes<T: Default>(size_in_bytes: usize) -> Vec<T> { 96 let rounded_size = (size_in_bytes + size_of::<T>() - 1) / size_of::<T>(); 97 let mut v = Vec::with_capacity(rounded_size); 98 v.resize_with(rounded_size, T::default); 99 v 100 } 101 102 // The kvm API has many structs that resemble the following `Foo` structure: 103 // 104 // ``` 105 // #[repr(C)] 106 // struct Foo { 107 // some_data: u32 108 // entries: __IncompleteArrayField<__u32>, 109 // } 110 // ``` 111 // 112 // In order to allocate such a structure, `size_of::<Foo>()` would be too small because it would not 113 // include any space for `entries`. To make the allocation large enough while still being aligned 114 // for `Foo`, a `Vec<Foo>` is created. Only the first element of `Vec<Foo>` would actually be used 115 // as a `Foo`. The remaining memory in the `Vec<Foo>` is for `entries`, which must be contiguous 116 // with `Foo`. This function is used to make the `Vec<Foo>` with enough space for `count` entries. 117 use std::mem::size_of; 118 fn vec_with_array_field<T: Default, F>(count: usize) -> Vec<T> { 119 let element_space = count * size_of::<F>(); 120 let vec_size_bytes = size_of::<T>() + element_space; 121 vec_with_size_in_bytes(vec_size_bytes) 122 } 123 124 /// 125 /// Implementation of Vm trait for KVM 126 /// Example: 127 /// #[cfg(feature = "kvm")] 128 /// extern crate hypervisor 129 /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap(); 130 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm); 131 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 132 /// vm.set/get().unwrap() 133 /// 134 impl vm::Vm for KvmVm { 135 #[cfg(target_arch = "x86_64")] 136 /// 137 /// Sets the address of the three-page region in the VM's address space. 138 /// 139 fn set_tss_address(&self, offset: usize) -> vm::Result<()> { 140 self.fd 141 .set_tss_address(offset) 142 .map_err(|e| vm::HypervisorVmError::SetTssAddress(e.into())) 143 } 144 /// 145 /// Creates an in-kernel interrupt controller. 146 /// 147 fn create_irq_chip(&self) -> vm::Result<()> { 148 self.fd 149 .create_irq_chip() 150 .map_err(|e| vm::HypervisorVmError::CreateIrq(e.into())) 151 } 152 /// 153 /// Registers an event that will, when signaled, trigger the `gsi` IRQ. 154 /// 155 fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 156 self.fd 157 .register_irqfd(fd, gsi) 158 .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into())) 159 } 160 /// 161 /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ. 162 /// 163 fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 164 self.fd 165 .unregister_irqfd(fd, gsi) 166 .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into())) 167 } 168 /// 169 /// Creates a VcpuFd object from a vcpu RawFd. 170 /// 171 fn create_vcpu( 172 &self, 173 id: u8, 174 vmmops: Option<Arc<Box<dyn VmmOps>>>, 175 ) -> vm::Result<Arc<dyn cpu::Vcpu>> { 176 let vc = self 177 .fd 178 .create_vcpu(id as u64) 179 .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?; 180 let vcpu = KvmVcpu { 181 fd: vc, 182 #[cfg(target_arch = "x86_64")] 183 msrs: self.msrs.clone(), 184 vmmops, 185 #[cfg(target_arch = "x86_64")] 186 hyperv_synic: AtomicBool::new(false), 187 }; 188 Ok(Arc::new(vcpu)) 189 } 190 /// 191 /// Registers an event to be signaled whenever a certain address is written to. 192 /// 193 fn register_ioevent( 194 &self, 195 fd: &EventFd, 196 addr: &IoEventAddress, 197 datamatch: Option<vm::DataMatch>, 198 ) -> vm::Result<()> { 199 if let Some(dm) = datamatch { 200 match dm { 201 vm::DataMatch::DataMatch32(kvm_dm32) => self 202 .fd 203 .register_ioevent(fd, addr, kvm_dm32) 204 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 205 vm::DataMatch::DataMatch64(kvm_dm64) => self 206 .fd 207 .register_ioevent(fd, addr, kvm_dm64) 208 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 209 } 210 } else { 211 self.fd 212 .register_ioevent(fd, addr, NoDatamatch) 213 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())) 214 } 215 } 216 /// 217 /// Unregisters an event from a certain address it has been previously registered to. 218 /// 219 fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> { 220 self.fd 221 .unregister_ioevent(fd, addr, NoDatamatch) 222 .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into())) 223 } 224 /// 225 /// Sets the GSI routing table entries, overwriting any previously set 226 /// entries, as per the `KVM_SET_GSI_ROUTING` ioctl. 227 /// 228 fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> { 229 let mut irq_routing = 230 vec_with_array_field::<kvm_irq_routing, kvm_irq_routing_entry>(entries.len()); 231 irq_routing[0].nr = entries.len() as u32; 232 irq_routing[0].flags = 0; 233 234 unsafe { 235 let entries_slice: &mut [kvm_irq_routing_entry] = 236 irq_routing[0].entries.as_mut_slice(entries.len()); 237 entries_slice.copy_from_slice(&entries); 238 } 239 240 self.fd 241 .set_gsi_routing(&irq_routing[0]) 242 .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into())) 243 } 244 /// 245 /// Creates a memory region structure that can be used with set_user_memory_region 246 /// 247 fn make_user_memory_region( 248 &self, 249 slot: u32, 250 guest_phys_addr: u64, 251 memory_size: u64, 252 userspace_addr: u64, 253 readonly: bool, 254 log_dirty_pages: bool, 255 ) -> MemoryRegion { 256 MemoryRegion { 257 slot, 258 guest_phys_addr, 259 memory_size, 260 userspace_addr, 261 flags: if readonly { KVM_MEM_READONLY } else { 0 } 262 | if log_dirty_pages { 263 KVM_MEM_LOG_DIRTY_PAGES 264 } else { 265 0 266 }, 267 } 268 } 269 /// 270 /// Creates/modifies a guest physical memory slot. 271 /// 272 fn set_user_memory_region(&self, user_memory_region: MemoryRegion) -> vm::Result<()> { 273 // Safe because guest regions are guaranteed not to overlap. 274 unsafe { 275 self.fd 276 .set_user_memory_region(user_memory_region) 277 .map_err(|e| vm::HypervisorVmError::SetUserMemory(e.into())) 278 } 279 } 280 /// 281 /// Creates an emulated device in the kernel. 282 /// 283 /// See the documentation for `KVM_CREATE_DEVICE`. 284 fn create_device(&self, device: &mut CreateDevice) -> vm::Result<Arc<dyn device::Device>> { 285 let fd = self 286 .fd 287 .create_device(device) 288 .map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?; 289 let device = KvmDevice { fd }; 290 Ok(Arc::new(device)) 291 } 292 /// 293 /// Returns the preferred CPU target type which can be emulated by KVM on underlying host. 294 /// 295 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 296 fn get_preferred_target(&self, kvi: &mut VcpuInit) -> vm::Result<()> { 297 self.fd 298 .get_preferred_target(kvi) 299 .map_err(|e| vm::HypervisorVmError::GetPreferredTarget(e.into())) 300 } 301 #[cfg(target_arch = "x86_64")] 302 fn enable_split_irq(&self) -> vm::Result<()> { 303 // Set TSS 304 self.fd 305 .set_tss_address(KVM_TSS_ADDRESS.raw_value() as usize) 306 .map_err(|e| vm::HypervisorVmError::EnableSplitIrq(e.into()))?; 307 // Create split irqchip 308 // Only the local APIC is emulated in kernel, both PICs and IOAPIC 309 // are not. 310 let mut cap = kvm_enable_cap { 311 cap: KVM_CAP_SPLIT_IRQCHIP, 312 ..Default::default() 313 }; 314 cap.args[0] = NUM_IOAPIC_PINS as u64; 315 self.fd 316 .enable_cap(&cap) 317 .map_err(|e| vm::HypervisorVmError::EnableSplitIrq(e.into()))?; 318 Ok(()) 319 } 320 /// Retrieve guest clock. 321 #[cfg(target_arch = "x86_64")] 322 fn get_clock(&self) -> vm::Result<ClockData> { 323 self.fd 324 .get_clock() 325 .map_err(|e| vm::HypervisorVmError::GetClock(e.into())) 326 } 327 /// Set guest clock. 328 #[cfg(target_arch = "x86_64")] 329 fn set_clock(&self, data: &ClockData) -> vm::Result<()> { 330 self.fd 331 .set_clock(data) 332 .map_err(|e| vm::HypervisorVmError::SetClock(e.into())) 333 } 334 /// Checks if a particular `Cap` is available. 335 fn check_extension(&self, c: Cap) -> bool { 336 self.fd.check_extension(c) 337 } 338 /// Create a device that is used for passthrough 339 fn create_passthrough_device(&self) -> vm::Result<Arc<dyn device::Device>> { 340 let mut vfio_dev = kvm_create_device { 341 type_: kvm_device_type_KVM_DEV_TYPE_VFIO, 342 fd: 0, 343 flags: 0, 344 }; 345 346 self.create_device(&mut vfio_dev) 347 .map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into())) 348 } 349 /// 350 /// Get the Vm state. Return VM specific data 351 /// 352 fn state(&self) -> vm::Result<VmState> { 353 Ok(self.state) 354 } 355 /// 356 /// Set the VM state 357 /// 358 fn set_state(&self, _state: VmState) -> vm::Result<()> { 359 Ok(()) 360 } 361 362 /// 363 /// Get dirty pages bitmap (one bit per page) 364 /// 365 fn get_dirty_log(&self, slot: u32, memory_size: u64) -> vm::Result<Vec<u64>> { 366 self.fd 367 .get_dirty_log(slot, memory_size as usize) 368 .map_err(|e| vm::HypervisorVmError::GetDirtyLog(e.into())) 369 } 370 } 371 /// Wrapper over KVM system ioctls. 372 pub struct KvmHypervisor { 373 kvm: Kvm, 374 } 375 /// Enum for KVM related error 376 #[derive(Debug)] 377 pub enum KvmError { 378 CapabilityMissing(Cap), 379 } 380 pub type KvmResult<T> = result::Result<T, KvmError>; 381 impl KvmHypervisor { 382 /// Create a hypervisor based on Kvm 383 pub fn new() -> hypervisor::Result<KvmHypervisor> { 384 let kvm_obj = Kvm::new().map_err(|e| hypervisor::HypervisorError::VmCreate(e.into()))?; 385 let api_version = kvm_obj.get_api_version(); 386 387 if api_version != kvm_bindings::KVM_API_VERSION as i32 { 388 return Err(hypervisor::HypervisorError::IncompatibleApiVersion); 389 } 390 391 Ok(KvmHypervisor { kvm: kvm_obj }) 392 } 393 } 394 /// Implementation of Hypervisor trait for KVM 395 /// Example: 396 /// #[cfg(feature = "kvm")] 397 /// extern crate hypervisor 398 /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap(); 399 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm); 400 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 401 /// 402 impl hypervisor::Hypervisor for KvmHypervisor { 403 /// Create a KVM vm object and return the object as Vm trait object 404 /// Example 405 /// # extern crate hypervisor; 406 /// # use hypervisor::KvmHypervisor; 407 /// use hypervisor::KvmVm; 408 /// let hypervisor = KvmHypervisor::new().unwrap(); 409 /// let vm = hypervisor.create_vm().unwrap() 410 /// 411 fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> { 412 let fd: VmFd; 413 loop { 414 match self.kvm.create_vm() { 415 Ok(res) => fd = res, 416 Err(e) => { 417 if e.errno() == libc::EINTR { 418 // If the error returned is EINTR, which means the 419 // ioctl has been interrupted, we have to retry as 420 // this can't be considered as a regular error. 421 continue; 422 } else { 423 return Err(hypervisor::HypervisorError::VmCreate(e.into())); 424 } 425 } 426 } 427 break; 428 } 429 430 let vm_fd = Arc::new(fd); 431 432 #[cfg(target_arch = "x86_64")] 433 { 434 let msr_list = self.get_msr_list()?; 435 let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize; 436 let mut msrs = MsrEntries::new(num_msrs); 437 let indices = msr_list.as_slice(); 438 let msr_entries = msrs.as_mut_slice(); 439 for (pos, index) in indices.iter().enumerate() { 440 msr_entries[pos].index = *index; 441 } 442 443 Ok(Arc::new(KvmVm { 444 fd: vm_fd, 445 msrs, 446 state: VmState {}, 447 })) 448 } 449 450 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 451 { 452 Ok(Arc::new(KvmVm { 453 fd: vm_fd, 454 state: VmState {}, 455 })) 456 } 457 } 458 459 fn check_required_extensions(&self) -> hypervisor::Result<()> { 460 check_required_kvm_extensions(&self.kvm).expect("Missing KVM capabilities"); 461 Ok(()) 462 } 463 464 /// 465 /// Returns the size of the memory mapping required to use the vcpu's `kvm_run` structure. 466 /// 467 fn get_vcpu_mmap_size(&self) -> hypervisor::Result<usize> { 468 self.kvm 469 .get_vcpu_mmap_size() 470 .map_err(|e| hypervisor::HypervisorError::GetVcpuMmap(e.into())) 471 } 472 /// 473 /// Gets the recommended maximum number of VCPUs per VM. 474 /// 475 fn get_max_vcpus(&self) -> hypervisor::Result<usize> { 476 Ok(self.kvm.get_max_vcpus()) 477 } 478 /// 479 /// Gets the recommended number of VCPUs per VM. 480 /// 481 fn get_nr_vcpus(&self) -> hypervisor::Result<usize> { 482 Ok(self.kvm.get_nr_vcpus()) 483 } 484 #[cfg(target_arch = "x86_64")] 485 /// 486 /// Checks if a particular `Cap` is available. 487 /// 488 fn check_capability(&self, c: Cap) -> bool { 489 self.kvm.check_extension(c) 490 } 491 #[cfg(target_arch = "x86_64")] 492 /// 493 /// X86 specific call to get the system supported CPUID values. 494 /// 495 fn get_cpuid(&self) -> hypervisor::Result<CpuId> { 496 self.kvm 497 .get_supported_cpuid(kvm_bindings::KVM_MAX_CPUID_ENTRIES) 498 .map_err(|e| hypervisor::HypervisorError::GetCpuId(e.into())) 499 } 500 #[cfg(target_arch = "x86_64")] 501 /// 502 /// Retrieve the list of MSRs supported by KVM. 503 /// 504 fn get_msr_list(&self) -> hypervisor::Result<MsrList> { 505 self.kvm 506 .get_msr_index_list() 507 .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into())) 508 } 509 } 510 /// Vcpu struct for KVM 511 pub struct KvmVcpu { 512 fd: VcpuFd, 513 #[cfg(target_arch = "x86_64")] 514 msrs: MsrEntries, 515 vmmops: Option<Arc<Box<dyn vm::VmmOps>>>, 516 #[cfg(target_arch = "x86_64")] 517 hyperv_synic: AtomicBool, 518 } 519 /// Implementation of Vcpu trait for KVM 520 /// Example: 521 /// #[cfg(feature = "kvm")] 522 /// extern crate hypervisor 523 /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap(); 524 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm); 525 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 526 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 527 /// vcpu.get/set().unwrap() 528 /// 529 impl cpu::Vcpu for KvmVcpu { 530 #[cfg(target_arch = "x86_64")] 531 /// 532 /// Returns the vCPU general purpose registers. 533 /// 534 fn get_regs(&self) -> cpu::Result<StandardRegisters> { 535 self.fd 536 .get_regs() 537 .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into())) 538 } 539 #[cfg(target_arch = "x86_64")] 540 /// 541 /// Sets the vCPU general purpose registers using the `KVM_SET_REGS` ioctl. 542 /// 543 fn set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()> { 544 self.fd 545 .set_regs(regs) 546 .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into())) 547 } 548 #[cfg(target_arch = "x86_64")] 549 /// 550 /// Returns the vCPU special registers. 551 /// 552 fn get_sregs(&self) -> cpu::Result<SpecialRegisters> { 553 self.fd 554 .get_sregs() 555 .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into())) 556 } 557 #[cfg(target_arch = "x86_64")] 558 /// 559 /// Sets the vCPU special registers using the `KVM_SET_SREGS` ioctl. 560 /// 561 fn set_sregs(&self, sregs: &SpecialRegisters) -> cpu::Result<()> { 562 self.fd 563 .set_sregs(sregs) 564 .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into())) 565 } 566 #[cfg(target_arch = "x86_64")] 567 /// 568 /// Returns the floating point state (FPU) from the vCPU. 569 /// 570 fn get_fpu(&self) -> cpu::Result<FpuState> { 571 self.fd 572 .get_fpu() 573 .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into())) 574 } 575 #[cfg(target_arch = "x86_64")] 576 /// 577 /// Set the floating point state (FPU) of a vCPU using the `KVM_SET_FPU` ioct. 578 /// 579 fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> { 580 self.fd 581 .set_fpu(fpu) 582 .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into())) 583 } 584 #[cfg(target_arch = "x86_64")] 585 /// 586 /// X86 specific call to setup the CPUID registers. 587 /// 588 fn set_cpuid2(&self, cpuid: &CpuId) -> cpu::Result<()> { 589 self.fd 590 .set_cpuid2(cpuid) 591 .map_err(|e| cpu::HypervisorCpuError::SetCpuid(e.into())) 592 } 593 #[cfg(target_arch = "x86_64")] 594 /// 595 /// X86 specific call to enable HyperV SynIC 596 /// 597 fn enable_hyperv_synic(&self) -> cpu::Result<()> { 598 // Update the information about Hyper-V SynIC being enabled and 599 // emulated as it will influence later which MSRs should be saved. 600 self.hyperv_synic.store(true, Ordering::Release); 601 602 let cap = kvm_enable_cap { 603 cap: KVM_CAP_HYPERV_SYNIC, 604 ..Default::default() 605 }; 606 self.fd 607 .enable_cap(&cap) 608 .map_err(|e| cpu::HypervisorCpuError::EnableHyperVSynIC(e.into())) 609 } 610 /// 611 /// X86 specific call to retrieve the CPUID registers. 612 /// 613 #[cfg(target_arch = "x86_64")] 614 fn get_cpuid2(&self, num_entries: usize) -> cpu::Result<CpuId> { 615 self.fd 616 .get_cpuid2(num_entries) 617 .map_err(|e| cpu::HypervisorCpuError::GetCpuid(e.into())) 618 } 619 #[cfg(target_arch = "x86_64")] 620 /// 621 /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 622 /// 623 fn get_lapic(&self) -> cpu::Result<LapicState> { 624 self.fd 625 .get_lapic() 626 .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into())) 627 } 628 #[cfg(target_arch = "x86_64")] 629 /// 630 /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 631 /// 632 fn set_lapic(&self, klapic: &LapicState) -> cpu::Result<()> { 633 self.fd 634 .set_lapic(klapic) 635 .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into())) 636 } 637 #[cfg(target_arch = "x86_64")] 638 /// 639 /// Returns the model-specific registers (MSR) for this vCPU. 640 /// 641 fn get_msrs(&self, msrs: &mut MsrEntries) -> cpu::Result<usize> { 642 self.fd 643 .get_msrs(msrs) 644 .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into())) 645 } 646 #[cfg(target_arch = "x86_64")] 647 /// 648 /// Setup the model-specific registers (MSR) for this vCPU. 649 /// Returns the number of MSR entries actually written. 650 /// 651 fn set_msrs(&self, msrs: &MsrEntries) -> cpu::Result<usize> { 652 self.fd 653 .set_msrs(msrs) 654 .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into())) 655 } 656 /// 657 /// Returns the vcpu's current "multiprocessing state". 658 /// 659 fn get_mp_state(&self) -> cpu::Result<MpState> { 660 self.fd 661 .get_mp_state() 662 .map_err(|e| cpu::HypervisorCpuError::GetMpState(e.into())) 663 } 664 /// 665 /// Sets the vcpu's current "multiprocessing state". 666 /// 667 fn set_mp_state(&self, mp_state: MpState) -> cpu::Result<()> { 668 self.fd 669 .set_mp_state(mp_state) 670 .map_err(|e| cpu::HypervisorCpuError::SetMpState(e.into())) 671 } 672 #[cfg(target_arch = "x86_64")] 673 /// 674 /// X86 specific call that returns the vcpu's current "xsave struct". 675 /// 676 fn get_xsave(&self) -> cpu::Result<Xsave> { 677 self.fd 678 .get_xsave() 679 .map_err(|e| cpu::HypervisorCpuError::GetXsaveState(e.into())) 680 } 681 #[cfg(target_arch = "x86_64")] 682 /// 683 /// X86 specific call that sets the vcpu's current "xsave struct". 684 /// 685 fn set_xsave(&self, xsave: &Xsave) -> cpu::Result<()> { 686 self.fd 687 .set_xsave(xsave) 688 .map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into())) 689 } 690 #[cfg(target_arch = "x86_64")] 691 /// 692 /// X86 specific call that returns the vcpu's current "xcrs". 693 /// 694 fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> { 695 self.fd 696 .get_xcrs() 697 .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into())) 698 } 699 #[cfg(target_arch = "x86_64")] 700 /// 701 /// X86 specific call that sets the vcpu's current "xcrs". 702 /// 703 fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> { 704 self.fd 705 .set_xcrs(&xcrs) 706 .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into())) 707 } 708 /// 709 /// Triggers the running of the current virtual CPU returning an exit reason. 710 /// 711 fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> { 712 match self.fd.run() { 713 Ok(run) => match run { 714 #[cfg(target_arch = "x86_64")] 715 VcpuExit::IoIn(addr, data) => { 716 if let Some(vmmops) = &self.vmmops { 717 return vmmops 718 .pio_read(addr.into(), data) 719 .map(|_| cpu::VmExit::Ignore) 720 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 721 } 722 723 Ok(cpu::VmExit::IoIn(addr, data)) 724 } 725 #[cfg(target_arch = "x86_64")] 726 VcpuExit::IoOut(addr, data) => { 727 if let Some(vmmops) = &self.vmmops { 728 return vmmops 729 .pio_write(addr.into(), data) 730 .map(|_| cpu::VmExit::Ignore) 731 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 732 } 733 734 Ok(cpu::VmExit::IoOut(addr, data)) 735 } 736 #[cfg(target_arch = "x86_64")] 737 VcpuExit::IoapicEoi(vector) => Ok(cpu::VmExit::IoapicEoi(vector)), 738 #[cfg(target_arch = "x86_64")] 739 VcpuExit::Shutdown | VcpuExit::Hlt => Ok(cpu::VmExit::Reset), 740 741 #[cfg(target_arch = "aarch64")] 742 VcpuExit::SystemEvent(event_type, flags) => { 743 use kvm_bindings::{KVM_SYSTEM_EVENT_RESET, KVM_SYSTEM_EVENT_SHUTDOWN}; 744 // On Aarch64, when the VM is shutdown, run() returns 745 // VcpuExit::SystemEvent with reason KVM_SYSTEM_EVENT_SHUTDOWN 746 if event_type == KVM_SYSTEM_EVENT_RESET { 747 Ok(cpu::VmExit::Reset) 748 } else if event_type == KVM_SYSTEM_EVENT_SHUTDOWN { 749 Ok(cpu::VmExit::Shutdown) 750 } else { 751 Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 752 "Unexpected system event with type 0x{:x}, flags 0x{:x}", 753 event_type, 754 flags 755 ))) 756 } 757 } 758 759 VcpuExit::MmioRead(addr, data) => { 760 if let Some(vmmops) = &self.vmmops { 761 return vmmops 762 .mmio_read(addr, data) 763 .map(|_| cpu::VmExit::Ignore) 764 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 765 } 766 767 Ok(cpu::VmExit::MmioRead(addr, data)) 768 } 769 VcpuExit::MmioWrite(addr, data) => { 770 if let Some(vmmops) = &self.vmmops { 771 return vmmops 772 .mmio_write(addr, data) 773 .map(|_| cpu::VmExit::Ignore) 774 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 775 } 776 777 Ok(cpu::VmExit::MmioWrite(addr, data)) 778 } 779 VcpuExit::Hyperv => Ok(cpu::VmExit::Hyperv), 780 781 r => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 782 "Unexpected exit reason on vcpu run: {:?}", 783 r 784 ))), 785 }, 786 787 Err(ref e) => match e.errno() { 788 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore), 789 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 790 "VCPU error {:?}", 791 e 792 ))), 793 }, 794 } 795 } 796 #[cfg(target_arch = "x86_64")] 797 /// 798 /// Returns currently pending exceptions, interrupts, and NMIs as well as related 799 /// states of the vcpu. 800 /// 801 fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> { 802 self.fd 803 .get_vcpu_events() 804 .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into())) 805 } 806 #[cfg(target_arch = "x86_64")] 807 /// 808 /// Sets pending exceptions, interrupts, and NMIs as well as related states 809 /// of the vcpu. 810 /// 811 fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> { 812 self.fd 813 .set_vcpu_events(events) 814 .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into())) 815 } 816 #[cfg(target_arch = "x86_64")] 817 /// 818 /// Let the guest know that it has been paused, which prevents from 819 /// potential soft lockups when being resumed. 820 /// 821 fn notify_guest_clock_paused(&self) -> cpu::Result<()> { 822 self.fd 823 .kvmclock_ctrl() 824 .map_err(|e| cpu::HypervisorCpuError::NotifyGuestClockPaused(e.into())) 825 } 826 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 827 fn vcpu_init(&self, kvi: &VcpuInit) -> cpu::Result<()> { 828 self.fd 829 .vcpu_init(kvi) 830 .map_err(|e| cpu::HypervisorCpuError::VcpuInit(e.into())) 831 } 832 /// 833 /// Sets the value of one register for this vCPU. 834 /// 835 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 836 fn set_reg(&self, reg_id: u64, data: u64) -> cpu::Result<()> { 837 self.fd 838 .set_one_reg(reg_id, data) 839 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into())) 840 } 841 /// 842 /// Gets the value of one register for this vCPU. 843 /// 844 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 845 fn get_reg(&self, reg_id: u64) -> cpu::Result<u64> { 846 self.fd 847 .get_one_reg(reg_id) 848 .map_err(|e| cpu::HypervisorCpuError::GetRegister(e.into())) 849 } 850 /// 851 /// Gets a list of the guest registers that are supported for the 852 /// KVM_GET_ONE_REG/KVM_SET_ONE_REG calls. 853 /// 854 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 855 fn get_reg_list(&self, reg_list: &mut RegList) -> cpu::Result<()> { 856 self.fd 857 .get_reg_list(reg_list) 858 .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into())) 859 } 860 /// 861 /// Save the state of the core registers. 862 /// 863 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 864 fn core_registers(&self, state: &mut StandardRegisters) -> cpu::Result<()> { 865 let mut off = offset__of!(user_pt_regs, regs); 866 // There are 31 user_pt_regs: 867 // https://elixir.free-electrons.com/linux/v4.14.174/source/arch/arm64/include/uapi/asm/ptrace.h#L72 868 // These actually are the general-purpose registers of the Armv8-a 869 // architecture (i.e x0-x30 if used as a 64bit register or w0-30 when used as a 32bit register). 870 for i in 0..31 { 871 state.regs.regs[i] = self 872 .fd 873 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off)) 874 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 875 off += std::mem::size_of::<u64>(); 876 } 877 878 // We are now entering the "Other register" section of the ARMv8-a architecture. 879 // First one, stack pointer. 880 let off = offset__of!(user_pt_regs, sp); 881 state.regs.sp = self 882 .fd 883 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off)) 884 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 885 886 // Second one, the program counter. 887 let off = offset__of!(user_pt_regs, pc); 888 state.regs.pc = self 889 .fd 890 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off)) 891 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 892 893 // Next is the processor state. 894 let off = offset__of!(user_pt_regs, pstate); 895 state.regs.pstate = self 896 .fd 897 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off)) 898 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 899 900 // The stack pointer associated with EL1 901 let off = offset__of!(kvm_regs, sp_el1); 902 state.sp_el1 = self 903 .fd 904 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off)) 905 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 906 907 // Exception Link Register for EL1, when taking an exception to EL1, this register 908 // holds the address to which to return afterwards. 909 let off = offset__of!(kvm_regs, elr_el1); 910 state.elr_el1 = self 911 .fd 912 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off)) 913 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 914 915 // Saved Program Status Registers, there are 5 of them used in the kernel. 916 let mut off = offset__of!(kvm_regs, spsr); 917 for i in 0..KVM_NR_SPSR as usize { 918 state.spsr[i] = self 919 .fd 920 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off)) 921 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 922 off += std::mem::size_of::<u64>(); 923 } 924 925 // Now moving on to floting point registers which are stored in the user_fpsimd_state in the kernel: 926 // https://elixir.free-electrons.com/linux/v4.9.62/source/arch/arm64/include/uapi/asm/kvm.h#L53 927 let mut off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, vregs); 928 for i in 0..32 { 929 state.fp_regs.vregs[i][0] = self 930 .fd 931 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U128, off)) 932 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 933 off += mem::size_of::<u128>(); 934 } 935 936 // Floating-point Status Register 937 let off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, fpsr); 938 state.fp_regs.fpsr = self 939 .fd 940 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off)) 941 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))? 942 as u32; 943 944 // Floating-point Control Register 945 let off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, fpcr); 946 state.fp_regs.fpcr = self 947 .fd 948 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off)) 949 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))? 950 as u32; 951 Ok(()) 952 } 953 /// 954 /// Restore the state of the core registers. 955 /// 956 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 957 fn set_core_registers(&self, state: &StandardRegisters) -> cpu::Result<()> { 958 // The function follows the exact identical order from `state`. Look there 959 // for some additional info on registers. 960 let mut off = offset__of!(user_pt_regs, regs); 961 for i in 0..31 { 962 self.fd 963 .set_one_reg( 964 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 965 state.regs.regs[i], 966 ) 967 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 968 off += std::mem::size_of::<u64>(); 969 } 970 971 let off = offset__of!(user_pt_regs, sp); 972 self.fd 973 .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.regs.sp) 974 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 975 976 let off = offset__of!(user_pt_regs, pc); 977 self.fd 978 .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.regs.pc) 979 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 980 981 let off = offset__of!(user_pt_regs, pstate); 982 self.fd 983 .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.regs.pstate) 984 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 985 986 let off = offset__of!(kvm_regs, sp_el1); 987 self.fd 988 .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.sp_el1) 989 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 990 991 let off = offset__of!(kvm_regs, elr_el1); 992 self.fd 993 .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.elr_el1) 994 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 995 996 let mut off = offset__of!(kvm_regs, spsr); 997 for i in 0..KVM_NR_SPSR as usize { 998 self.fd 999 .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.spsr[i]) 1000 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1001 off += std::mem::size_of::<u64>(); 1002 } 1003 1004 let mut off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, vregs); 1005 for i in 0..32 { 1006 self.fd 1007 .set_one_reg( 1008 arm64_core_reg_id!(KVM_REG_SIZE_U128, off), 1009 state.fp_regs.vregs[i][0], 1010 ) 1011 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1012 off += mem::size_of::<u128>(); 1013 } 1014 1015 let off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, fpsr); 1016 self.fd 1017 .set_one_reg( 1018 arm64_core_reg_id!(KVM_REG_SIZE_U32, off), 1019 state.fp_regs.fpsr as u64, 1020 ) 1021 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1022 1023 let off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, fpcr); 1024 self.fd 1025 .set_one_reg( 1026 arm64_core_reg_id!(KVM_REG_SIZE_U32, off), 1027 state.fp_regs.fpcr as u64, 1028 ) 1029 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1030 Ok(()) 1031 } 1032 /// 1033 /// Save the state of the system registers. 1034 /// 1035 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 1036 fn system_registers(&self, state: &mut Vec<Register>) -> cpu::Result<()> { 1037 // Call KVM_GET_REG_LIST to get all registers available to the guest. For ArmV8 there are 1038 // around 500 registers. 1039 let mut reg_list = RegList::new(512); 1040 self.fd 1041 .get_reg_list(&mut reg_list) 1042 .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into()))?; 1043 1044 // At this point reg_list should contain: core registers and system registers. 1045 // The register list contains the number of registers and their ids. We will be needing to 1046 // call KVM_GET_ONE_REG on each id in order to save all of them. We carve out from the list 1047 // the core registers which are represented in the kernel by kvm_regs structure and for which 1048 // we can calculate the id based on the offset in the structure. 1049 1050 reg_list.retain(|regid| *regid != 0); 1051 reg_list.as_slice().to_vec().sort_unstable(); 1052 1053 reg_list.retain(|regid| is_system_register(*regid)); 1054 1055 // Now, for the rest of the registers left in the previously fetched register list, we are 1056 // simply calling KVM_GET_ONE_REG. 1057 let indices = reg_list.as_slice(); 1058 for (_pos, index) in indices.iter().enumerate() { 1059 if _pos > 230 { 1060 break; 1061 } 1062 state.push(kvm_bindings::kvm_one_reg { 1063 id: *index, 1064 addr: self 1065 .fd 1066 .get_one_reg(*index) 1067 .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?, 1068 }); 1069 } 1070 1071 Ok(()) 1072 } 1073 /// 1074 /// Restore the state of the system registers. 1075 /// 1076 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 1077 fn set_system_registers(&self, state: &[Register]) -> cpu::Result<()> { 1078 for reg in state { 1079 self.fd 1080 .set_one_reg(reg.id, reg.addr) 1081 .map_err(|e| cpu::HypervisorCpuError::SetSysRegister(e.into()))?; 1082 } 1083 Ok(()) 1084 } 1085 /// 1086 /// Read the MPIDR - Multiprocessor Affinity Register. 1087 /// 1088 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 1089 fn read_mpidr(&self) -> cpu::Result<u64> { 1090 self.fd 1091 .get_one_reg(MPIDR_EL1) 1092 .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into())) 1093 } 1094 #[cfg(target_arch = "x86_64")] 1095 /// 1096 /// Get the current CPU state 1097 /// 1098 /// Ordering requirements: 1099 /// 1100 /// KVM_GET_MP_STATE calls kvm_apic_accept_events(), which might modify 1101 /// vCPU/LAPIC state. As such, it must be done before most everything 1102 /// else, otherwise we cannot restore everything and expect it to work. 1103 /// 1104 /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are 1105 /// still running. 1106 /// 1107 /// KVM_GET_LAPIC may change state of LAPIC before returning it. 1108 /// 1109 /// GET_VCPU_EVENTS should probably be last to save. The code looks as 1110 /// it might as well be affected by internal state modifications of the 1111 /// GET ioctls. 1112 /// 1113 /// SREGS saves/restores a pending interrupt, similar to what 1114 /// VCPU_EVENTS also does. 1115 /// 1116 /// GET_MSRS requires a pre-populated data structure to do something 1117 /// meaningful. For SET_MSRS it will then contain good data. 1118 /// 1119 /// # Example 1120 /// 1121 /// ```rust 1122 /// # extern crate hypervisor; 1123 /// # use hypervisor::KvmHypervisor; 1124 /// # use std::sync::Arc; 1125 /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap(); 1126 /// let hv: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm); 1127 /// let vm = hv.create_vm().expect("new VM fd creation failed"); 1128 /// vm.enable_split_irq().unwrap(); 1129 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 1130 /// let state = vcpu.state().unwrap(); 1131 /// ``` 1132 fn state(&self) -> cpu::Result<CpuState> { 1133 let cpuid = self.get_cpuid2(kvm_bindings::KVM_MAX_CPUID_ENTRIES)?; 1134 let mp_state = self.get_mp_state()?; 1135 let regs = self.get_regs()?; 1136 let sregs = self.get_sregs()?; 1137 let xsave = self.get_xsave()?; 1138 let xcrs = self.get_xcrs()?; 1139 let lapic_state = self.get_lapic()?; 1140 let fpu = self.get_fpu()?; 1141 1142 // Try to get all MSRs based on the list previously retrieved from KVM. 1143 // If the number of MSRs obtained from GET_MSRS is different from the 1144 // expected amount, we fallback onto a slower method by getting MSRs 1145 // by chunks. This is the only way to make sure we try to get as many 1146 // MSRs as possible, even if some MSRs are not supported. 1147 let mut msr_entries = self.msrs.clone(); 1148 1149 // Save extra MSRs if the Hyper-V synthetic interrupt controller is 1150 // emulated. 1151 if self.hyperv_synic.load(Ordering::Acquire) { 1152 let hyperv_synic_msrs = vec![ 1153 0x40000020, 0x40000021, 0x40000080, 0x40000081, 0x40000082, 0x40000083, 0x40000084, 1154 0x40000090, 0x40000091, 0x40000092, 0x40000093, 0x40000094, 0x40000095, 0x40000096, 1155 0x40000097, 0x40000098, 0x40000099, 0x4000009a, 0x4000009b, 0x4000009c, 0x4000009d, 1156 0x4000009f, 0x400000b0, 0x400000b1, 0x400000b2, 0x400000b3, 0x400000b4, 0x400000b5, 1157 0x400000b6, 0x400000b7, 1158 ]; 1159 for index in hyperv_synic_msrs { 1160 let msr = kvm_msr_entry { 1161 index, 1162 ..Default::default() 1163 }; 1164 msr_entries.push(msr).unwrap(); 1165 } 1166 } 1167 1168 let expected_num_msrs = msr_entries.as_fam_struct_ref().nmsrs as usize; 1169 let num_msrs = self.get_msrs(&mut msr_entries)?; 1170 let msrs = if num_msrs != expected_num_msrs { 1171 let mut faulty_msr_index = num_msrs; 1172 let mut msr_entries_tmp = 1173 MsrEntries::from_entries(&msr_entries.as_slice()[..faulty_msr_index]); 1174 1175 loop { 1176 warn!( 1177 "Detected faulty MSR 0x{:x} while getting MSRs", 1178 msr_entries.as_slice()[faulty_msr_index].index 1179 ); 1180 1181 let start_pos = faulty_msr_index + 1; 1182 let mut sub_msr_entries = 1183 MsrEntries::from_entries(&msr_entries.as_slice()[start_pos..]); 1184 let expected_num_msrs = sub_msr_entries.as_fam_struct_ref().nmsrs as usize; 1185 let num_msrs = self.get_msrs(&mut sub_msr_entries)?; 1186 1187 for i in 0..num_msrs { 1188 msr_entries_tmp 1189 .push(sub_msr_entries.as_slice()[i]) 1190 .map_err(|e| { 1191 cpu::HypervisorCpuError::GetMsrEntries(anyhow!( 1192 "Failed adding MSR entries: {:?}", 1193 e 1194 )) 1195 })?; 1196 } 1197 1198 if num_msrs == expected_num_msrs { 1199 break; 1200 } 1201 1202 faulty_msr_index = start_pos + num_msrs; 1203 } 1204 1205 msr_entries_tmp 1206 } else { 1207 msr_entries 1208 }; 1209 1210 let vcpu_events = self.get_vcpu_events()?; 1211 1212 Ok(CpuState { 1213 cpuid, 1214 msrs, 1215 vcpu_events, 1216 regs, 1217 sregs, 1218 fpu, 1219 lapic_state, 1220 xsave, 1221 xcrs, 1222 mp_state, 1223 }) 1224 } 1225 /// 1226 /// Get the current AArch64 CPU state 1227 /// 1228 #[cfg(target_arch = "aarch64")] 1229 fn state(&self) -> cpu::Result<CpuState> { 1230 let mut state = CpuState { 1231 mp_state: self.get_mp_state()?, 1232 mpidr: self.read_mpidr()?, 1233 ..Default::default() 1234 }; 1235 self.core_registers(&mut state.core_regs)?; 1236 self.system_registers(&mut state.sys_regs)?; 1237 1238 Ok(state) 1239 } 1240 #[cfg(target_arch = "x86_64")] 1241 /// 1242 /// Restore the previously saved CPU state 1243 /// 1244 /// Ordering requirements: 1245 /// 1246 /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are 1247 /// still running. 1248 /// 1249 /// Some SET ioctls (like set_mp_state) depend on kvm_vcpu_is_bsp(), so 1250 /// if we ever change the BSP, we have to do that before restoring anything. 1251 /// The same seems to be true for CPUID stuff. 1252 /// 1253 /// SREGS saves/restores a pending interrupt, similar to what 1254 /// VCPU_EVENTS also does. 1255 /// 1256 /// SET_REGS clears pending exceptions unconditionally, thus, it must be 1257 /// done before SET_VCPU_EVENTS, which restores it. 1258 /// 1259 /// SET_LAPIC must come after SET_SREGS, because the latter restores 1260 /// the apic base msr. 1261 /// 1262 /// SET_LAPIC must come before SET_MSRS, because the TSC deadline MSR 1263 /// only restores successfully, when the LAPIC is correctly configured. 1264 /// 1265 /// Arguments: CpuState 1266 /// # Example 1267 /// 1268 /// ```rust 1269 /// # extern crate hypervisor; 1270 /// # use hypervisor::KvmHypervisor; 1271 /// # use std::sync::Arc; 1272 /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap(); 1273 /// let hv: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm); 1274 /// let vm = hv.create_vm().expect("new VM fd creation failed"); 1275 /// vm.enable_split_irq().unwrap(); 1276 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 1277 /// let state = vcpu.state().unwrap(); 1278 /// vcpu.set_state(&state).unwrap(); 1279 /// ``` 1280 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 1281 self.set_cpuid2(&state.cpuid)?; 1282 self.set_mp_state(state.mp_state)?; 1283 self.set_regs(&state.regs)?; 1284 self.set_sregs(&state.sregs)?; 1285 self.set_xsave(&state.xsave)?; 1286 self.set_xcrs(&state.xcrs)?; 1287 self.set_lapic(&state.lapic_state)?; 1288 self.set_fpu(&state.fpu)?; 1289 1290 // Try to set all MSRs previously stored. 1291 // If the number of MSRs set from SET_MSRS is different from the 1292 // expected amount, we fallback onto a slower method by setting MSRs 1293 // by chunks. This is the only way to make sure we try to set as many 1294 // MSRs as possible, even if some MSRs are not supported. 1295 let expected_num_msrs = state.msrs.as_fam_struct_ref().nmsrs as usize; 1296 let num_msrs = self.set_msrs(&state.msrs)?; 1297 if num_msrs != expected_num_msrs { 1298 let mut faulty_msr_index = num_msrs; 1299 1300 loop { 1301 warn!( 1302 "Detected faulty MSR 0x{:x} while setting MSRs", 1303 state.msrs.as_slice()[faulty_msr_index].index 1304 ); 1305 1306 let start_pos = faulty_msr_index + 1; 1307 let sub_msr_entries = MsrEntries::from_entries(&state.msrs.as_slice()[start_pos..]); 1308 let expected_num_msrs = sub_msr_entries.as_fam_struct_ref().nmsrs as usize; 1309 let num_msrs = self.set_msrs(&sub_msr_entries)?; 1310 1311 if num_msrs == expected_num_msrs { 1312 break; 1313 } 1314 1315 faulty_msr_index = start_pos + num_msrs; 1316 } 1317 } 1318 1319 self.set_vcpu_events(&state.vcpu_events)?; 1320 1321 Ok(()) 1322 } 1323 /// 1324 /// Restore the previously saved AArch64 CPU state 1325 /// 1326 #[cfg(target_arch = "aarch64")] 1327 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 1328 self.set_core_registers(&state.core_regs)?; 1329 self.set_system_registers(&state.sys_regs)?; 1330 self.set_mp_state(state.mp_state)?; 1331 1332 Ok(()) 1333 } 1334 } 1335 1336 /// Device struct for KVM 1337 pub struct KvmDevice { 1338 fd: DeviceFd, 1339 } 1340 1341 impl device::Device for KvmDevice { 1342 /// 1343 /// Set device attribute 1344 /// 1345 fn set_device_attr(&self, attr: &DeviceAttr) -> device::Result<()> { 1346 self.fd 1347 .set_device_attr(attr) 1348 .map_err(|e| device::HypervisorDeviceError::SetDeviceAttribute(e.into())) 1349 } 1350 /// 1351 /// Get device attribute 1352 /// 1353 fn get_device_attr(&self, attr: &mut DeviceAttr) -> device::Result<()> { 1354 self.fd 1355 .get_device_attr(attr) 1356 .map_err(|e| device::HypervisorDeviceError::GetDeviceAttribute(e.into())) 1357 } 1358 } 1359 1360 impl AsRawFd for KvmDevice { 1361 fn as_raw_fd(&self) -> RawFd { 1362 self.fd.as_raw_fd() 1363 } 1364 } 1365