1 // Copyright © 2019 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause 4 // 5 // Copyright © 2020, Microsoft Corporation 6 // 7 // Copyright 2018-2019 CrowdStrike, Inc. 8 // 9 // 10 11 #[cfg(target_arch = "aarch64")] 12 pub use crate::aarch64::{ 13 check_required_kvm_extensions, is_system_register, VcpuInit, VcpuKvmState as CpuState, 14 MPIDR_EL1, 15 }; 16 use crate::cpu; 17 use crate::device; 18 use crate::hypervisor; 19 use crate::vm::{self, VmmOps}; 20 #[cfg(target_arch = "aarch64")] 21 use crate::{arm64_core_reg_id, offset__of}; 22 use kvm_ioctls::{NoDatamatch, VcpuFd, VmFd}; 23 use serde_derive::{Deserialize, Serialize}; 24 use std::os::unix::io::{AsRawFd, RawFd}; 25 use std::result; 26 #[cfg(target_arch = "x86_64")] 27 use std::sync::atomic::{AtomicBool, Ordering}; 28 use std::sync::Arc; 29 #[cfg(target_arch = "x86_64")] 30 use vm_memory::Address; 31 use vmm_sys_util::eventfd::EventFd; 32 // x86_64 dependencies 33 #[cfg(target_arch = "x86_64")] 34 pub mod x86_64; 35 36 #[cfg(target_arch = "x86_64")] 37 use x86_64::{ 38 check_required_kvm_extensions, FpuState, SpecialRegisters, StandardRegisters, KVM_TSS_ADDRESS, 39 }; 40 41 #[cfg(target_arch = "aarch64")] 42 use aarch64::{RegList, Register, StandardRegisters}; 43 44 #[cfg(target_arch = "x86_64")] 45 pub use x86_64::{ 46 CpuId, CpuIdEntry, ExtendedControlRegisters, LapicState, MsrEntries, VcpuKvmState as CpuState, 47 Xsave, CPUID_FLAG_VALID_INDEX, 48 }; 49 50 #[cfg(target_arch = "x86_64")] 51 use kvm_bindings::{ 52 kvm_enable_cap, kvm_msr_entry, MsrList, KVM_CAP_HYPERV_SYNIC, KVM_CAP_SPLIT_IRQCHIP, 53 }; 54 55 #[cfg(target_arch = "x86_64")] 56 use crate::arch::x86::NUM_IOAPIC_PINS; 57 58 // aarch64 dependencies 59 #[cfg(target_arch = "aarch64")] 60 pub mod aarch64; 61 #[cfg(target_arch = "aarch64")] 62 use kvm_bindings::{ 63 kvm_regs, user_fpsimd_state, user_pt_regs, KVM_NR_SPSR, KVM_REG_ARM64, KVM_REG_ARM_CORE, 64 KVM_REG_SIZE_U128, KVM_REG_SIZE_U32, KVM_REG_SIZE_U64, 65 }; 66 #[cfg(target_arch = "aarch64")] 67 use std::mem; 68 69 pub use kvm_bindings; 70 pub use kvm_bindings::{ 71 kvm_create_device, kvm_device_type_KVM_DEV_TYPE_VFIO, kvm_irq_routing, kvm_irq_routing_entry, 72 kvm_userspace_memory_region, KVM_IRQ_ROUTING_MSI, KVM_MEM_LOG_DIRTY_PAGES, KVM_MEM_READONLY, 73 KVM_MSI_VALID_DEVID, 74 }; 75 pub use kvm_ioctls; 76 pub use kvm_ioctls::{Cap, Kvm}; 77 78 /// 79 /// Export generically-named wrappers of kvm-bindings for Unix-based platforms 80 /// 81 pub use { 82 kvm_bindings::kvm_clock_data as ClockData, kvm_bindings::kvm_create_device as CreateDevice, 83 kvm_bindings::kvm_device_attr as DeviceAttr, 84 kvm_bindings::kvm_irq_routing_entry as IrqRoutingEntry, kvm_bindings::kvm_mp_state as MpState, 85 kvm_bindings::kvm_userspace_memory_region as MemoryRegion, 86 kvm_bindings::kvm_vcpu_events as VcpuEvents, kvm_ioctls::DeviceFd, kvm_ioctls::IoEventAddress, 87 kvm_ioctls::VcpuExit, 88 }; 89 #[derive(Clone, Copy, Debug, PartialEq, Deserialize, Serialize)] 90 pub struct KvmVmState {} 91 92 pub use KvmVmState as VmState; 93 /// Wrapper over KVM VM ioctls. 94 pub struct KvmVm { 95 fd: Arc<VmFd>, 96 #[cfg(target_arch = "x86_64")] 97 msrs: MsrEntries, 98 state: KvmVmState, 99 } 100 101 // Returns a `Vec<T>` with a size in bytes at least as large as `size_in_bytes`. 102 fn vec_with_size_in_bytes<T: Default>(size_in_bytes: usize) -> Vec<T> { 103 let rounded_size = (size_in_bytes + size_of::<T>() - 1) / size_of::<T>(); 104 let mut v = Vec::with_capacity(rounded_size); 105 v.resize_with(rounded_size, T::default); 106 v 107 } 108 109 // The kvm API has many structs that resemble the following `Foo` structure: 110 // 111 // ``` 112 // #[repr(C)] 113 // struct Foo { 114 // some_data: u32 115 // entries: __IncompleteArrayField<__u32>, 116 // } 117 // ``` 118 // 119 // In order to allocate such a structure, `size_of::<Foo>()` would be too small because it would not 120 // include any space for `entries`. To make the allocation large enough while still being aligned 121 // for `Foo`, a `Vec<Foo>` is created. Only the first element of `Vec<Foo>` would actually be used 122 // as a `Foo`. The remaining memory in the `Vec<Foo>` is for `entries`, which must be contiguous 123 // with `Foo`. This function is used to make the `Vec<Foo>` with enough space for `count` entries. 124 use std::mem::size_of; 125 fn vec_with_array_field<T: Default, F>(count: usize) -> Vec<T> { 126 let element_space = count * size_of::<F>(); 127 let vec_size_bytes = size_of::<T>() + element_space; 128 vec_with_size_in_bytes(vec_size_bytes) 129 } 130 131 /// 132 /// Implementation of Vm trait for KVM 133 /// Example: 134 /// #[cfg(feature = "kvm")] 135 /// extern crate hypervisor 136 /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap(); 137 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm); 138 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 139 /// vm.set/get().unwrap() 140 /// 141 impl vm::Vm for KvmVm { 142 #[cfg(target_arch = "x86_64")] 143 /// 144 /// Sets the address of the three-page region in the VM's address space. 145 /// 146 fn set_tss_address(&self, offset: usize) -> vm::Result<()> { 147 self.fd 148 .set_tss_address(offset) 149 .map_err(|e| vm::HypervisorVmError::SetTssAddress(e.into())) 150 } 151 /// 152 /// Creates an in-kernel interrupt controller. 153 /// 154 fn create_irq_chip(&self) -> vm::Result<()> { 155 self.fd 156 .create_irq_chip() 157 .map_err(|e| vm::HypervisorVmError::CreateIrq(e.into())) 158 } 159 /// 160 /// Registers an event that will, when signaled, trigger the `gsi` IRQ. 161 /// 162 fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 163 self.fd 164 .register_irqfd(fd, gsi) 165 .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into())) 166 } 167 /// 168 /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ. 169 /// 170 fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 171 self.fd 172 .unregister_irqfd(fd, gsi) 173 .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into())) 174 } 175 /// 176 /// Creates a VcpuFd object from a vcpu RawFd. 177 /// 178 fn create_vcpu( 179 &self, 180 id: u8, 181 vmmops: Option<Arc<Box<dyn VmmOps>>>, 182 ) -> vm::Result<Arc<dyn cpu::Vcpu>> { 183 let vc = self 184 .fd 185 .create_vcpu(id as u64) 186 .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?; 187 let vcpu = KvmVcpu { 188 fd: vc, 189 #[cfg(target_arch = "x86_64")] 190 msrs: self.msrs.clone(), 191 vmmops, 192 #[cfg(target_arch = "x86_64")] 193 hyperv_synic: AtomicBool::new(false), 194 }; 195 Ok(Arc::new(vcpu)) 196 } 197 /// 198 /// Registers an event to be signaled whenever a certain address is written to. 199 /// 200 fn register_ioevent( 201 &self, 202 fd: &EventFd, 203 addr: &IoEventAddress, 204 datamatch: Option<vm::DataMatch>, 205 ) -> vm::Result<()> { 206 if let Some(dm) = datamatch { 207 match dm { 208 vm::DataMatch::DataMatch32(kvm_dm32) => self 209 .fd 210 .register_ioevent(fd, addr, kvm_dm32) 211 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 212 vm::DataMatch::DataMatch64(kvm_dm64) => self 213 .fd 214 .register_ioevent(fd, addr, kvm_dm64) 215 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 216 } 217 } else { 218 self.fd 219 .register_ioevent(fd, addr, NoDatamatch) 220 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())) 221 } 222 } 223 /// 224 /// Unregisters an event from a certain address it has been previously registered to. 225 /// 226 fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> { 227 self.fd 228 .unregister_ioevent(fd, addr, NoDatamatch) 229 .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into())) 230 } 231 /// 232 /// Sets the GSI routing table entries, overwriting any previously set 233 /// entries, as per the `KVM_SET_GSI_ROUTING` ioctl. 234 /// 235 fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> { 236 let mut irq_routing = 237 vec_with_array_field::<kvm_irq_routing, kvm_irq_routing_entry>(entries.len()); 238 irq_routing[0].nr = entries.len() as u32; 239 irq_routing[0].flags = 0; 240 241 unsafe { 242 let entries_slice: &mut [kvm_irq_routing_entry] = 243 irq_routing[0].entries.as_mut_slice(entries.len()); 244 entries_slice.copy_from_slice(&entries); 245 } 246 247 self.fd 248 .set_gsi_routing(&irq_routing[0]) 249 .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into())) 250 } 251 /// 252 /// Creates a memory region structure that can be used with set_user_memory_region 253 /// 254 fn make_user_memory_region( 255 &self, 256 slot: u32, 257 guest_phys_addr: u64, 258 memory_size: u64, 259 userspace_addr: u64, 260 readonly: bool, 261 log_dirty_pages: bool, 262 ) -> MemoryRegion { 263 MemoryRegion { 264 slot, 265 guest_phys_addr, 266 memory_size, 267 userspace_addr, 268 flags: if readonly { KVM_MEM_READONLY } else { 0 } 269 | if log_dirty_pages { 270 KVM_MEM_LOG_DIRTY_PAGES 271 } else { 272 0 273 }, 274 } 275 } 276 /// 277 /// Creates/modifies a guest physical memory slot. 278 /// 279 fn set_user_memory_region(&self, user_memory_region: MemoryRegion) -> vm::Result<()> { 280 // Safe because guest regions are guaranteed not to overlap. 281 unsafe { 282 self.fd 283 .set_user_memory_region(user_memory_region) 284 .map_err(|e| vm::HypervisorVmError::SetUserMemory(e.into())) 285 } 286 } 287 /// 288 /// Creates an emulated device in the kernel. 289 /// 290 /// See the documentation for `KVM_CREATE_DEVICE`. 291 fn create_device(&self, device: &mut CreateDevice) -> vm::Result<Arc<dyn device::Device>> { 292 let fd = self 293 .fd 294 .create_device(device) 295 .map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?; 296 let device = KvmDevice { fd }; 297 Ok(Arc::new(device)) 298 } 299 /// 300 /// Returns the preferred CPU target type which can be emulated by KVM on underlying host. 301 /// 302 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 303 fn get_preferred_target(&self, kvi: &mut VcpuInit) -> vm::Result<()> { 304 self.fd 305 .get_preferred_target(kvi) 306 .map_err(|e| vm::HypervisorVmError::GetPreferredTarget(e.into())) 307 } 308 #[cfg(target_arch = "x86_64")] 309 fn enable_split_irq(&self) -> vm::Result<()> { 310 // Set TSS 311 self.fd 312 .set_tss_address(KVM_TSS_ADDRESS.raw_value() as usize) 313 .map_err(|e| vm::HypervisorVmError::EnableSplitIrq(e.into()))?; 314 // Create split irqchip 315 // Only the local APIC is emulated in kernel, both PICs and IOAPIC 316 // are not. 317 let mut cap = kvm_enable_cap { 318 cap: KVM_CAP_SPLIT_IRQCHIP, 319 ..Default::default() 320 }; 321 cap.args[0] = NUM_IOAPIC_PINS as u64; 322 self.fd 323 .enable_cap(&cap) 324 .map_err(|e| vm::HypervisorVmError::EnableSplitIrq(e.into()))?; 325 Ok(()) 326 } 327 /// Retrieve guest clock. 328 #[cfg(target_arch = "x86_64")] 329 fn get_clock(&self) -> vm::Result<ClockData> { 330 self.fd 331 .get_clock() 332 .map_err(|e| vm::HypervisorVmError::GetClock(e.into())) 333 } 334 /// Set guest clock. 335 #[cfg(target_arch = "x86_64")] 336 fn set_clock(&self, data: &ClockData) -> vm::Result<()> { 337 self.fd 338 .set_clock(data) 339 .map_err(|e| vm::HypervisorVmError::SetClock(e.into())) 340 } 341 /// Checks if a particular `Cap` is available. 342 fn check_extension(&self, c: Cap) -> bool { 343 self.fd.check_extension(c) 344 } 345 /// Create a device that is used for passthrough 346 fn create_passthrough_device(&self) -> vm::Result<Arc<dyn device::Device>> { 347 let mut vfio_dev = kvm_create_device { 348 type_: kvm_device_type_KVM_DEV_TYPE_VFIO, 349 fd: 0, 350 flags: 0, 351 }; 352 353 self.create_device(&mut vfio_dev) 354 .map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into())) 355 } 356 /// 357 /// Get the Vm state. Return VM specific data 358 /// 359 fn state(&self) -> vm::Result<VmState> { 360 Ok(self.state) 361 } 362 /// 363 /// Set the VM state 364 /// 365 fn set_state(&self, _state: VmState) -> vm::Result<()> { 366 Ok(()) 367 } 368 369 /// 370 /// Get dirty pages bitmap (one bit per page) 371 /// 372 fn get_dirty_log(&self, slot: u32, memory_size: u64) -> vm::Result<Vec<u64>> { 373 self.fd 374 .get_dirty_log(slot, memory_size as usize) 375 .map_err(|e| vm::HypervisorVmError::GetDirtyLog(e.into())) 376 } 377 } 378 /// Wrapper over KVM system ioctls. 379 pub struct KvmHypervisor { 380 kvm: Kvm, 381 } 382 /// Enum for KVM related error 383 #[derive(Debug)] 384 pub enum KvmError { 385 CapabilityMissing(Cap), 386 } 387 pub type KvmResult<T> = result::Result<T, KvmError>; 388 impl KvmHypervisor { 389 /// Create a hypervisor based on Kvm 390 pub fn new() -> hypervisor::Result<KvmHypervisor> { 391 let kvm_obj = Kvm::new().map_err(|e| hypervisor::HypervisorError::VmCreate(e.into()))?; 392 let api_version = kvm_obj.get_api_version(); 393 394 if api_version != kvm_bindings::KVM_API_VERSION as i32 { 395 return Err(hypervisor::HypervisorError::IncompatibleApiVersion); 396 } 397 398 Ok(KvmHypervisor { kvm: kvm_obj }) 399 } 400 } 401 /// Implementation of Hypervisor trait for KVM 402 /// Example: 403 /// #[cfg(feature = "kvm")] 404 /// extern crate hypervisor 405 /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap(); 406 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm); 407 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 408 /// 409 impl hypervisor::Hypervisor for KvmHypervisor { 410 /// Create a KVM vm object and return the object as Vm trait object 411 /// Example 412 /// # extern crate hypervisor; 413 /// # use hypervisor::KvmHypervisor; 414 /// use hypervisor::KvmVm; 415 /// let hypervisor = KvmHypervisor::new().unwrap(); 416 /// let vm = hypervisor.create_vm().unwrap() 417 /// 418 fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> { 419 let fd: VmFd; 420 loop { 421 match self.kvm.create_vm() { 422 Ok(res) => fd = res, 423 Err(e) => { 424 if e.errno() == libc::EINTR { 425 // If the error returned is EINTR, which means the 426 // ioctl has been interrupted, we have to retry as 427 // this can't be considered as a regular error. 428 continue; 429 } else { 430 return Err(hypervisor::HypervisorError::VmCreate(e.into())); 431 } 432 } 433 } 434 break; 435 } 436 437 let vm_fd = Arc::new(fd); 438 439 #[cfg(target_arch = "x86_64")] 440 { 441 let msr_list = self.get_msr_list()?; 442 let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize; 443 let mut msrs = MsrEntries::new(num_msrs); 444 let indices = msr_list.as_slice(); 445 let msr_entries = msrs.as_mut_slice(); 446 for (pos, index) in indices.iter().enumerate() { 447 msr_entries[pos].index = *index; 448 } 449 450 Ok(Arc::new(KvmVm { 451 fd: vm_fd, 452 msrs, 453 state: VmState {}, 454 })) 455 } 456 457 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 458 { 459 Ok(Arc::new(KvmVm { 460 fd: vm_fd, 461 state: VmState {}, 462 })) 463 } 464 } 465 466 fn check_required_extensions(&self) -> hypervisor::Result<()> { 467 check_required_kvm_extensions(&self.kvm).expect("Missing KVM capabilities"); 468 Ok(()) 469 } 470 471 /// 472 /// Returns the size of the memory mapping required to use the vcpu's `kvm_run` structure. 473 /// 474 fn get_vcpu_mmap_size(&self) -> hypervisor::Result<usize> { 475 self.kvm 476 .get_vcpu_mmap_size() 477 .map_err(|e| hypervisor::HypervisorError::GetVcpuMmap(e.into())) 478 } 479 /// 480 /// Gets the recommended maximum number of VCPUs per VM. 481 /// 482 fn get_max_vcpus(&self) -> hypervisor::Result<usize> { 483 Ok(self.kvm.get_max_vcpus()) 484 } 485 /// 486 /// Gets the recommended number of VCPUs per VM. 487 /// 488 fn get_nr_vcpus(&self) -> hypervisor::Result<usize> { 489 Ok(self.kvm.get_nr_vcpus()) 490 } 491 #[cfg(target_arch = "x86_64")] 492 /// 493 /// Checks if a particular `Cap` is available. 494 /// 495 fn check_capability(&self, c: Cap) -> bool { 496 self.kvm.check_extension(c) 497 } 498 #[cfg(target_arch = "x86_64")] 499 /// 500 /// X86 specific call to get the system supported CPUID values. 501 /// 502 fn get_cpuid(&self) -> hypervisor::Result<CpuId> { 503 self.kvm 504 .get_supported_cpuid(kvm_bindings::KVM_MAX_CPUID_ENTRIES) 505 .map_err(|e| hypervisor::HypervisorError::GetCpuId(e.into())) 506 } 507 #[cfg(target_arch = "x86_64")] 508 /// 509 /// Retrieve the list of MSRs supported by KVM. 510 /// 511 fn get_msr_list(&self) -> hypervisor::Result<MsrList> { 512 self.kvm 513 .get_msr_index_list() 514 .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into())) 515 } 516 } 517 /// Vcpu struct for KVM 518 pub struct KvmVcpu { 519 fd: VcpuFd, 520 #[cfg(target_arch = "x86_64")] 521 msrs: MsrEntries, 522 vmmops: Option<Arc<Box<dyn vm::VmmOps>>>, 523 #[cfg(target_arch = "x86_64")] 524 hyperv_synic: AtomicBool, 525 } 526 /// Implementation of Vcpu trait for KVM 527 /// Example: 528 /// #[cfg(feature = "kvm")] 529 /// extern crate hypervisor 530 /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap(); 531 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm); 532 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 533 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 534 /// vcpu.get/set().unwrap() 535 /// 536 impl cpu::Vcpu for KvmVcpu { 537 #[cfg(target_arch = "x86_64")] 538 /// 539 /// Returns the vCPU general purpose registers. 540 /// 541 fn get_regs(&self) -> cpu::Result<StandardRegisters> { 542 self.fd 543 .get_regs() 544 .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into())) 545 } 546 #[cfg(target_arch = "x86_64")] 547 /// 548 /// Sets the vCPU general purpose registers using the `KVM_SET_REGS` ioctl. 549 /// 550 fn set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()> { 551 self.fd 552 .set_regs(regs) 553 .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into())) 554 } 555 #[cfg(target_arch = "x86_64")] 556 /// 557 /// Returns the vCPU special registers. 558 /// 559 fn get_sregs(&self) -> cpu::Result<SpecialRegisters> { 560 self.fd 561 .get_sregs() 562 .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into())) 563 } 564 #[cfg(target_arch = "x86_64")] 565 /// 566 /// Sets the vCPU special registers using the `KVM_SET_SREGS` ioctl. 567 /// 568 fn set_sregs(&self, sregs: &SpecialRegisters) -> cpu::Result<()> { 569 self.fd 570 .set_sregs(sregs) 571 .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into())) 572 } 573 #[cfg(target_arch = "x86_64")] 574 /// 575 /// Returns the floating point state (FPU) from the vCPU. 576 /// 577 fn get_fpu(&self) -> cpu::Result<FpuState> { 578 self.fd 579 .get_fpu() 580 .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into())) 581 } 582 #[cfg(target_arch = "x86_64")] 583 /// 584 /// Set the floating point state (FPU) of a vCPU using the `KVM_SET_FPU` ioct. 585 /// 586 fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> { 587 self.fd 588 .set_fpu(fpu) 589 .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into())) 590 } 591 #[cfg(target_arch = "x86_64")] 592 /// 593 /// X86 specific call to setup the CPUID registers. 594 /// 595 fn set_cpuid2(&self, cpuid: &CpuId) -> cpu::Result<()> { 596 self.fd 597 .set_cpuid2(cpuid) 598 .map_err(|e| cpu::HypervisorCpuError::SetCpuid(e.into())) 599 } 600 #[cfg(target_arch = "x86_64")] 601 /// 602 /// X86 specific call to enable HyperV SynIC 603 /// 604 fn enable_hyperv_synic(&self) -> cpu::Result<()> { 605 // Update the information about Hyper-V SynIC being enabled and 606 // emulated as it will influence later which MSRs should be saved. 607 self.hyperv_synic.store(true, Ordering::Release); 608 609 let cap = kvm_enable_cap { 610 cap: KVM_CAP_HYPERV_SYNIC, 611 ..Default::default() 612 }; 613 self.fd 614 .enable_cap(&cap) 615 .map_err(|e| cpu::HypervisorCpuError::EnableHyperVSynIC(e.into())) 616 } 617 /// 618 /// X86 specific call to retrieve the CPUID registers. 619 /// 620 #[cfg(target_arch = "x86_64")] 621 fn get_cpuid2(&self, num_entries: usize) -> cpu::Result<CpuId> { 622 self.fd 623 .get_cpuid2(num_entries) 624 .map_err(|e| cpu::HypervisorCpuError::GetCpuid(e.into())) 625 } 626 #[cfg(target_arch = "x86_64")] 627 /// 628 /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 629 /// 630 fn get_lapic(&self) -> cpu::Result<LapicState> { 631 self.fd 632 .get_lapic() 633 .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into())) 634 } 635 #[cfg(target_arch = "x86_64")] 636 /// 637 /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 638 /// 639 fn set_lapic(&self, klapic: &LapicState) -> cpu::Result<()> { 640 self.fd 641 .set_lapic(klapic) 642 .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into())) 643 } 644 #[cfg(target_arch = "x86_64")] 645 /// 646 /// Returns the model-specific registers (MSR) for this vCPU. 647 /// 648 fn get_msrs(&self, msrs: &mut MsrEntries) -> cpu::Result<usize> { 649 self.fd 650 .get_msrs(msrs) 651 .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into())) 652 } 653 #[cfg(target_arch = "x86_64")] 654 /// 655 /// Setup the model-specific registers (MSR) for this vCPU. 656 /// Returns the number of MSR entries actually written. 657 /// 658 fn set_msrs(&self, msrs: &MsrEntries) -> cpu::Result<usize> { 659 self.fd 660 .set_msrs(msrs) 661 .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into())) 662 } 663 /// 664 /// Returns the vcpu's current "multiprocessing state". 665 /// 666 fn get_mp_state(&self) -> cpu::Result<MpState> { 667 self.fd 668 .get_mp_state() 669 .map_err(|e| cpu::HypervisorCpuError::GetMpState(e.into())) 670 } 671 /// 672 /// Sets the vcpu's current "multiprocessing state". 673 /// 674 fn set_mp_state(&self, mp_state: MpState) -> cpu::Result<()> { 675 self.fd 676 .set_mp_state(mp_state) 677 .map_err(|e| cpu::HypervisorCpuError::SetMpState(e.into())) 678 } 679 #[cfg(target_arch = "x86_64")] 680 /// 681 /// X86 specific call that returns the vcpu's current "xsave struct". 682 /// 683 fn get_xsave(&self) -> cpu::Result<Xsave> { 684 self.fd 685 .get_xsave() 686 .map_err(|e| cpu::HypervisorCpuError::GetXsaveState(e.into())) 687 } 688 #[cfg(target_arch = "x86_64")] 689 /// 690 /// X86 specific call that sets the vcpu's current "xsave struct". 691 /// 692 fn set_xsave(&self, xsave: &Xsave) -> cpu::Result<()> { 693 self.fd 694 .set_xsave(xsave) 695 .map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into())) 696 } 697 #[cfg(target_arch = "x86_64")] 698 /// 699 /// X86 specific call that returns the vcpu's current "xcrs". 700 /// 701 fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> { 702 self.fd 703 .get_xcrs() 704 .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into())) 705 } 706 #[cfg(target_arch = "x86_64")] 707 /// 708 /// X86 specific call that sets the vcpu's current "xcrs". 709 /// 710 fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> { 711 self.fd 712 .set_xcrs(&xcrs) 713 .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into())) 714 } 715 /// 716 /// Triggers the running of the current virtual CPU returning an exit reason. 717 /// 718 fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> { 719 match self.fd.run() { 720 Ok(run) => match run { 721 #[cfg(target_arch = "x86_64")] 722 VcpuExit::IoIn(addr, data) => { 723 if let Some(vmmops) = &self.vmmops { 724 return vmmops 725 .pio_read(addr.into(), data) 726 .map(|_| cpu::VmExit::Ignore) 727 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 728 } 729 730 Ok(cpu::VmExit::IoIn(addr, data)) 731 } 732 #[cfg(target_arch = "x86_64")] 733 VcpuExit::IoOut(addr, data) => { 734 if let Some(vmmops) = &self.vmmops { 735 return vmmops 736 .pio_write(addr.into(), data) 737 .map(|_| cpu::VmExit::Ignore) 738 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 739 } 740 741 Ok(cpu::VmExit::IoOut(addr, data)) 742 } 743 #[cfg(target_arch = "x86_64")] 744 VcpuExit::IoapicEoi(vector) => Ok(cpu::VmExit::IoapicEoi(vector)), 745 #[cfg(target_arch = "x86_64")] 746 VcpuExit::Shutdown | VcpuExit::Hlt => Ok(cpu::VmExit::Reset), 747 748 #[cfg(target_arch = "aarch64")] 749 VcpuExit::SystemEvent(event_type, flags) => { 750 use kvm_bindings::{KVM_SYSTEM_EVENT_RESET, KVM_SYSTEM_EVENT_SHUTDOWN}; 751 // On Aarch64, when the VM is shutdown, run() returns 752 // VcpuExit::SystemEvent with reason KVM_SYSTEM_EVENT_SHUTDOWN 753 if event_type == KVM_SYSTEM_EVENT_RESET { 754 Ok(cpu::VmExit::Reset) 755 } else if event_type == KVM_SYSTEM_EVENT_SHUTDOWN { 756 Ok(cpu::VmExit::Shutdown) 757 } else { 758 Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 759 "Unexpected system event with type 0x{:x}, flags 0x{:x}", 760 event_type, 761 flags 762 ))) 763 } 764 } 765 766 VcpuExit::MmioRead(addr, data) => { 767 if let Some(vmmops) = &self.vmmops { 768 return vmmops 769 .mmio_read(addr, data) 770 .map(|_| cpu::VmExit::Ignore) 771 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 772 } 773 774 Ok(cpu::VmExit::MmioRead(addr, data)) 775 } 776 VcpuExit::MmioWrite(addr, data) => { 777 if let Some(vmmops) = &self.vmmops { 778 return vmmops 779 .mmio_write(addr, data) 780 .map(|_| cpu::VmExit::Ignore) 781 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 782 } 783 784 Ok(cpu::VmExit::MmioWrite(addr, data)) 785 } 786 VcpuExit::Hyperv => Ok(cpu::VmExit::Hyperv), 787 788 r => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 789 "Unexpected exit reason on vcpu run: {:?}", 790 r 791 ))), 792 }, 793 794 Err(ref e) => match e.errno() { 795 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore), 796 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 797 "VCPU error {:?}", 798 e 799 ))), 800 }, 801 } 802 } 803 #[cfg(target_arch = "x86_64")] 804 /// 805 /// Returns currently pending exceptions, interrupts, and NMIs as well as related 806 /// states of the vcpu. 807 /// 808 fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> { 809 self.fd 810 .get_vcpu_events() 811 .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into())) 812 } 813 #[cfg(target_arch = "x86_64")] 814 /// 815 /// Sets pending exceptions, interrupts, and NMIs as well as related states 816 /// of the vcpu. 817 /// 818 fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> { 819 self.fd 820 .set_vcpu_events(events) 821 .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into())) 822 } 823 #[cfg(target_arch = "x86_64")] 824 /// 825 /// Let the guest know that it has been paused, which prevents from 826 /// potential soft lockups when being resumed. 827 /// 828 fn notify_guest_clock_paused(&self) -> cpu::Result<()> { 829 self.fd 830 .kvmclock_ctrl() 831 .map_err(|e| cpu::HypervisorCpuError::NotifyGuestClockPaused(e.into())) 832 } 833 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 834 fn vcpu_init(&self, kvi: &VcpuInit) -> cpu::Result<()> { 835 self.fd 836 .vcpu_init(kvi) 837 .map_err(|e| cpu::HypervisorCpuError::VcpuInit(e.into())) 838 } 839 /// 840 /// Sets the value of one register for this vCPU. 841 /// 842 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 843 fn set_reg(&self, reg_id: u64, data: u64) -> cpu::Result<()> { 844 self.fd 845 .set_one_reg(reg_id, data) 846 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into())) 847 } 848 /// 849 /// Gets the value of one register for this vCPU. 850 /// 851 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 852 fn get_reg(&self, reg_id: u64) -> cpu::Result<u64> { 853 self.fd 854 .get_one_reg(reg_id) 855 .map_err(|e| cpu::HypervisorCpuError::GetRegister(e.into())) 856 } 857 /// 858 /// Gets a list of the guest registers that are supported for the 859 /// KVM_GET_ONE_REG/KVM_SET_ONE_REG calls. 860 /// 861 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 862 fn get_reg_list(&self, reg_list: &mut RegList) -> cpu::Result<()> { 863 self.fd 864 .get_reg_list(reg_list) 865 .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into())) 866 } 867 /// 868 /// Save the state of the core registers. 869 /// 870 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 871 fn core_registers(&self, state: &mut StandardRegisters) -> cpu::Result<()> { 872 let mut off = offset__of!(user_pt_regs, regs); 873 // There are 31 user_pt_regs: 874 // https://elixir.free-electrons.com/linux/v4.14.174/source/arch/arm64/include/uapi/asm/ptrace.h#L72 875 // These actually are the general-purpose registers of the Armv8-a 876 // architecture (i.e x0-x30 if used as a 64bit register or w0-30 when used as a 32bit register). 877 for i in 0..31 { 878 state.regs.regs[i] = self 879 .fd 880 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off)) 881 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 882 off += std::mem::size_of::<u64>(); 883 } 884 885 // We are now entering the "Other register" section of the ARMv8-a architecture. 886 // First one, stack pointer. 887 let off = offset__of!(user_pt_regs, sp); 888 state.regs.sp = self 889 .fd 890 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off)) 891 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 892 893 // Second one, the program counter. 894 let off = offset__of!(user_pt_regs, pc); 895 state.regs.pc = self 896 .fd 897 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off)) 898 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 899 900 // Next is the processor state. 901 let off = offset__of!(user_pt_regs, pstate); 902 state.regs.pstate = self 903 .fd 904 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off)) 905 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 906 907 // The stack pointer associated with EL1 908 let off = offset__of!(kvm_regs, sp_el1); 909 state.sp_el1 = self 910 .fd 911 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off)) 912 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 913 914 // Exception Link Register for EL1, when taking an exception to EL1, this register 915 // holds the address to which to return afterwards. 916 let off = offset__of!(kvm_regs, elr_el1); 917 state.elr_el1 = self 918 .fd 919 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off)) 920 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 921 922 // Saved Program Status Registers, there are 5 of them used in the kernel. 923 let mut off = offset__of!(kvm_regs, spsr); 924 for i in 0..KVM_NR_SPSR as usize { 925 state.spsr[i] = self 926 .fd 927 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off)) 928 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 929 off += std::mem::size_of::<u64>(); 930 } 931 932 // Now moving on to floting point registers which are stored in the user_fpsimd_state in the kernel: 933 // https://elixir.free-electrons.com/linux/v4.9.62/source/arch/arm64/include/uapi/asm/kvm.h#L53 934 let mut off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, vregs); 935 for i in 0..32 { 936 state.fp_regs.vregs[i][0] = self 937 .fd 938 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U128, off)) 939 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 940 off += mem::size_of::<u128>(); 941 } 942 943 // Floating-point Status Register 944 let off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, fpsr); 945 state.fp_regs.fpsr = self 946 .fd 947 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off)) 948 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))? 949 as u32; 950 951 // Floating-point Control Register 952 let off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, fpcr); 953 state.fp_regs.fpcr = self 954 .fd 955 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off)) 956 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))? 957 as u32; 958 Ok(()) 959 } 960 /// 961 /// Restore the state of the core registers. 962 /// 963 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 964 fn set_core_registers(&self, state: &StandardRegisters) -> cpu::Result<()> { 965 // The function follows the exact identical order from `state`. Look there 966 // for some additional info on registers. 967 let mut off = offset__of!(user_pt_regs, regs); 968 for i in 0..31 { 969 self.fd 970 .set_one_reg( 971 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 972 state.regs.regs[i], 973 ) 974 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 975 off += std::mem::size_of::<u64>(); 976 } 977 978 let off = offset__of!(user_pt_regs, sp); 979 self.fd 980 .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.regs.sp) 981 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 982 983 let off = offset__of!(user_pt_regs, pc); 984 self.fd 985 .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.regs.pc) 986 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 987 988 let off = offset__of!(user_pt_regs, pstate); 989 self.fd 990 .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.regs.pstate) 991 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 992 993 let off = offset__of!(kvm_regs, sp_el1); 994 self.fd 995 .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.sp_el1) 996 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 997 998 let off = offset__of!(kvm_regs, elr_el1); 999 self.fd 1000 .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.elr_el1) 1001 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1002 1003 let mut off = offset__of!(kvm_regs, spsr); 1004 for i in 0..KVM_NR_SPSR as usize { 1005 self.fd 1006 .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.spsr[i]) 1007 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1008 off += std::mem::size_of::<u64>(); 1009 } 1010 1011 let mut off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, vregs); 1012 for i in 0..32 { 1013 self.fd 1014 .set_one_reg( 1015 arm64_core_reg_id!(KVM_REG_SIZE_U128, off), 1016 state.fp_regs.vregs[i][0], 1017 ) 1018 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1019 off += mem::size_of::<u128>(); 1020 } 1021 1022 let off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, fpsr); 1023 self.fd 1024 .set_one_reg( 1025 arm64_core_reg_id!(KVM_REG_SIZE_U32, off), 1026 state.fp_regs.fpsr as u64, 1027 ) 1028 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1029 1030 let off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, fpcr); 1031 self.fd 1032 .set_one_reg( 1033 arm64_core_reg_id!(KVM_REG_SIZE_U32, off), 1034 state.fp_regs.fpcr as u64, 1035 ) 1036 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1037 Ok(()) 1038 } 1039 /// 1040 /// Save the state of the system registers. 1041 /// 1042 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 1043 fn system_registers(&self, state: &mut Vec<Register>) -> cpu::Result<()> { 1044 // Call KVM_GET_REG_LIST to get all registers available to the guest. For ArmV8 there are 1045 // around 500 registers. 1046 let mut reg_list = RegList::new(512); 1047 self.fd 1048 .get_reg_list(&mut reg_list) 1049 .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into()))?; 1050 1051 // At this point reg_list should contain: core registers and system registers. 1052 // The register list contains the number of registers and their ids. We will be needing to 1053 // call KVM_GET_ONE_REG on each id in order to save all of them. We carve out from the list 1054 // the core registers which are represented in the kernel by kvm_regs structure and for which 1055 // we can calculate the id based on the offset in the structure. 1056 1057 reg_list.retain(|regid| *regid != 0); 1058 reg_list.as_slice().to_vec().sort_unstable(); 1059 1060 reg_list.retain(|regid| is_system_register(*regid)); 1061 1062 // Now, for the rest of the registers left in the previously fetched register list, we are 1063 // simply calling KVM_GET_ONE_REG. 1064 let indices = reg_list.as_slice(); 1065 for (_pos, index) in indices.iter().enumerate() { 1066 if _pos > 230 { 1067 break; 1068 } 1069 state.push(kvm_bindings::kvm_one_reg { 1070 id: *index, 1071 addr: self 1072 .fd 1073 .get_one_reg(*index) 1074 .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?, 1075 }); 1076 } 1077 1078 Ok(()) 1079 } 1080 /// 1081 /// Restore the state of the system registers. 1082 /// 1083 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 1084 fn set_system_registers(&self, state: &[Register]) -> cpu::Result<()> { 1085 for reg in state { 1086 self.fd 1087 .set_one_reg(reg.id, reg.addr) 1088 .map_err(|e| cpu::HypervisorCpuError::SetSysRegister(e.into()))?; 1089 } 1090 Ok(()) 1091 } 1092 /// 1093 /// Read the MPIDR - Multiprocessor Affinity Register. 1094 /// 1095 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 1096 fn read_mpidr(&self) -> cpu::Result<u64> { 1097 self.fd 1098 .get_one_reg(MPIDR_EL1) 1099 .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into())) 1100 } 1101 #[cfg(target_arch = "x86_64")] 1102 /// 1103 /// Get the current CPU state 1104 /// 1105 /// Ordering requirements: 1106 /// 1107 /// KVM_GET_MP_STATE calls kvm_apic_accept_events(), which might modify 1108 /// vCPU/LAPIC state. As such, it must be done before most everything 1109 /// else, otherwise we cannot restore everything and expect it to work. 1110 /// 1111 /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are 1112 /// still running. 1113 /// 1114 /// KVM_GET_LAPIC may change state of LAPIC before returning it. 1115 /// 1116 /// GET_VCPU_EVENTS should probably be last to save. The code looks as 1117 /// it might as well be affected by internal state modifications of the 1118 /// GET ioctls. 1119 /// 1120 /// SREGS saves/restores a pending interrupt, similar to what 1121 /// VCPU_EVENTS also does. 1122 /// 1123 /// GET_MSRS requires a pre-populated data structure to do something 1124 /// meaningful. For SET_MSRS it will then contain good data. 1125 /// 1126 /// # Example 1127 /// 1128 /// ```rust 1129 /// # extern crate hypervisor; 1130 /// # use hypervisor::KvmHypervisor; 1131 /// # use std::sync::Arc; 1132 /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap(); 1133 /// let hv: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm); 1134 /// let vm = hv.create_vm().expect("new VM fd creation failed"); 1135 /// vm.enable_split_irq().unwrap(); 1136 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 1137 /// let state = vcpu.state().unwrap(); 1138 /// ``` 1139 fn state(&self) -> cpu::Result<CpuState> { 1140 let cpuid = self.get_cpuid2(kvm_bindings::KVM_MAX_CPUID_ENTRIES)?; 1141 let mp_state = self.get_mp_state()?; 1142 let regs = self.get_regs()?; 1143 let sregs = self.get_sregs()?; 1144 let xsave = self.get_xsave()?; 1145 let xcrs = self.get_xcrs()?; 1146 let lapic_state = self.get_lapic()?; 1147 let fpu = self.get_fpu()?; 1148 1149 // Try to get all MSRs based on the list previously retrieved from KVM. 1150 // If the number of MSRs obtained from GET_MSRS is different from the 1151 // expected amount, we fallback onto a slower method by getting MSRs 1152 // by chunks. This is the only way to make sure we try to get as many 1153 // MSRs as possible, even if some MSRs are not supported. 1154 let mut msr_entries = self.msrs.clone(); 1155 1156 // Save extra MSRs if the Hyper-V synthetic interrupt controller is 1157 // emulated. 1158 if self.hyperv_synic.load(Ordering::Acquire) { 1159 let hyperv_synic_msrs = vec![ 1160 0x40000020, 0x40000021, 0x40000080, 0x40000081, 0x40000082, 0x40000083, 0x40000084, 1161 0x40000090, 0x40000091, 0x40000092, 0x40000093, 0x40000094, 0x40000095, 0x40000096, 1162 0x40000097, 0x40000098, 0x40000099, 0x4000009a, 0x4000009b, 0x4000009c, 0x4000009d, 1163 0x4000009f, 0x400000b0, 0x400000b1, 0x400000b2, 0x400000b3, 0x400000b4, 0x400000b5, 1164 0x400000b6, 0x400000b7, 1165 ]; 1166 for index in hyperv_synic_msrs { 1167 let msr = kvm_msr_entry { 1168 index, 1169 ..Default::default() 1170 }; 1171 msr_entries.push(msr).unwrap(); 1172 } 1173 } 1174 1175 let expected_num_msrs = msr_entries.as_fam_struct_ref().nmsrs as usize; 1176 let num_msrs = self.get_msrs(&mut msr_entries)?; 1177 let msrs = if num_msrs != expected_num_msrs { 1178 let mut faulty_msr_index = num_msrs; 1179 let mut msr_entries_tmp = 1180 MsrEntries::from_entries(&msr_entries.as_slice()[..faulty_msr_index]); 1181 1182 loop { 1183 warn!( 1184 "Detected faulty MSR 0x{:x} while getting MSRs", 1185 msr_entries.as_slice()[faulty_msr_index].index 1186 ); 1187 1188 let start_pos = faulty_msr_index + 1; 1189 let mut sub_msr_entries = 1190 MsrEntries::from_entries(&msr_entries.as_slice()[start_pos..]); 1191 let expected_num_msrs = sub_msr_entries.as_fam_struct_ref().nmsrs as usize; 1192 let num_msrs = self.get_msrs(&mut sub_msr_entries)?; 1193 1194 for i in 0..num_msrs { 1195 msr_entries_tmp 1196 .push(sub_msr_entries.as_slice()[i]) 1197 .map_err(|e| { 1198 cpu::HypervisorCpuError::GetMsrEntries(anyhow!( 1199 "Failed adding MSR entries: {:?}", 1200 e 1201 )) 1202 })?; 1203 } 1204 1205 if num_msrs == expected_num_msrs { 1206 break; 1207 } 1208 1209 faulty_msr_index = start_pos + num_msrs; 1210 } 1211 1212 msr_entries_tmp 1213 } else { 1214 msr_entries 1215 }; 1216 1217 let vcpu_events = self.get_vcpu_events()?; 1218 1219 Ok(CpuState { 1220 cpuid, 1221 msrs, 1222 vcpu_events, 1223 regs, 1224 sregs, 1225 fpu, 1226 lapic_state, 1227 xsave, 1228 xcrs, 1229 mp_state, 1230 }) 1231 } 1232 /// 1233 /// Get the current AArch64 CPU state 1234 /// 1235 #[cfg(target_arch = "aarch64")] 1236 fn state(&self) -> cpu::Result<CpuState> { 1237 let mut state = CpuState { 1238 mp_state: self.get_mp_state()?, 1239 mpidr: self.read_mpidr()?, 1240 ..Default::default() 1241 }; 1242 self.core_registers(&mut state.core_regs)?; 1243 self.system_registers(&mut state.sys_regs)?; 1244 1245 Ok(state) 1246 } 1247 #[cfg(target_arch = "x86_64")] 1248 /// 1249 /// Restore the previously saved CPU state 1250 /// 1251 /// Ordering requirements: 1252 /// 1253 /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are 1254 /// still running. 1255 /// 1256 /// Some SET ioctls (like set_mp_state) depend on kvm_vcpu_is_bsp(), so 1257 /// if we ever change the BSP, we have to do that before restoring anything. 1258 /// The same seems to be true for CPUID stuff. 1259 /// 1260 /// SREGS saves/restores a pending interrupt, similar to what 1261 /// VCPU_EVENTS also does. 1262 /// 1263 /// SET_REGS clears pending exceptions unconditionally, thus, it must be 1264 /// done before SET_VCPU_EVENTS, which restores it. 1265 /// 1266 /// SET_LAPIC must come after SET_SREGS, because the latter restores 1267 /// the apic base msr. 1268 /// 1269 /// SET_LAPIC must come before SET_MSRS, because the TSC deadline MSR 1270 /// only restores successfully, when the LAPIC is correctly configured. 1271 /// 1272 /// Arguments: CpuState 1273 /// # Example 1274 /// 1275 /// ```rust 1276 /// # extern crate hypervisor; 1277 /// # use hypervisor::KvmHypervisor; 1278 /// # use std::sync::Arc; 1279 /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap(); 1280 /// let hv: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm); 1281 /// let vm = hv.create_vm().expect("new VM fd creation failed"); 1282 /// vm.enable_split_irq().unwrap(); 1283 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 1284 /// let state = vcpu.state().unwrap(); 1285 /// vcpu.set_state(&state).unwrap(); 1286 /// ``` 1287 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 1288 self.set_cpuid2(&state.cpuid)?; 1289 self.set_mp_state(state.mp_state)?; 1290 self.set_regs(&state.regs)?; 1291 self.set_sregs(&state.sregs)?; 1292 self.set_xsave(&state.xsave)?; 1293 self.set_xcrs(&state.xcrs)?; 1294 self.set_lapic(&state.lapic_state)?; 1295 self.set_fpu(&state.fpu)?; 1296 1297 // Try to set all MSRs previously stored. 1298 // If the number of MSRs set from SET_MSRS is different from the 1299 // expected amount, we fallback onto a slower method by setting MSRs 1300 // by chunks. This is the only way to make sure we try to set as many 1301 // MSRs as possible, even if some MSRs are not supported. 1302 let expected_num_msrs = state.msrs.as_fam_struct_ref().nmsrs as usize; 1303 let num_msrs = self.set_msrs(&state.msrs)?; 1304 if num_msrs != expected_num_msrs { 1305 let mut faulty_msr_index = num_msrs; 1306 1307 loop { 1308 warn!( 1309 "Detected faulty MSR 0x{:x} while setting MSRs", 1310 state.msrs.as_slice()[faulty_msr_index].index 1311 ); 1312 1313 let start_pos = faulty_msr_index + 1; 1314 let sub_msr_entries = MsrEntries::from_entries(&state.msrs.as_slice()[start_pos..]); 1315 let expected_num_msrs = sub_msr_entries.as_fam_struct_ref().nmsrs as usize; 1316 let num_msrs = self.set_msrs(&sub_msr_entries)?; 1317 1318 if num_msrs == expected_num_msrs { 1319 break; 1320 } 1321 1322 faulty_msr_index = start_pos + num_msrs; 1323 } 1324 } 1325 1326 self.set_vcpu_events(&state.vcpu_events)?; 1327 1328 Ok(()) 1329 } 1330 /// 1331 /// Restore the previously saved AArch64 CPU state 1332 /// 1333 #[cfg(target_arch = "aarch64")] 1334 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 1335 self.set_core_registers(&state.core_regs)?; 1336 self.set_system_registers(&state.sys_regs)?; 1337 self.set_mp_state(state.mp_state)?; 1338 1339 Ok(()) 1340 } 1341 } 1342 1343 /// Device struct for KVM 1344 pub struct KvmDevice { 1345 fd: DeviceFd, 1346 } 1347 1348 impl device::Device for KvmDevice { 1349 /// 1350 /// Set device attribute 1351 /// 1352 fn set_device_attr(&self, attr: &DeviceAttr) -> device::Result<()> { 1353 self.fd 1354 .set_device_attr(attr) 1355 .map_err(|e| device::HypervisorDeviceError::SetDeviceAttribute(e.into())) 1356 } 1357 /// 1358 /// Get device attribute 1359 /// 1360 fn get_device_attr(&self, attr: &mut DeviceAttr) -> device::Result<()> { 1361 self.fd 1362 .get_device_attr(attr) 1363 .map_err(|e| device::HypervisorDeviceError::GetDeviceAttribute(e.into())) 1364 } 1365 } 1366 1367 impl AsRawFd for KvmDevice { 1368 fn as_raw_fd(&self) -> RawFd { 1369 self.fd.as_raw_fd() 1370 } 1371 } 1372