1 // Copyright © 2019 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause 4 // 5 // Copyright © 2020, Microsoft Corporation 6 // 7 // Copyright 2018-2019 CrowdStrike, Inc. 8 // 9 // 10 11 #[cfg(target_arch = "aarch64")] 12 pub use crate::aarch64::{check_required_kvm_extensions, VcpuInit, VcpuKvmState as CpuState}; 13 use crate::cpu; 14 use crate::device; 15 use crate::hypervisor; 16 use crate::vm; 17 use kvm_ioctls::{NoDatamatch, VcpuFd, VmFd}; 18 use serde_derive::{Deserialize, Serialize}; 19 use std::os::unix::io::{AsRawFd, RawFd}; 20 use std::result; 21 use std::sync::Arc; 22 #[cfg(target_arch = "x86_64")] 23 use vm_memory::Address; 24 use vmm_sys_util::eventfd::EventFd; 25 // x86_64 dependencies 26 #[cfg(target_arch = "x86_64")] 27 pub mod x86_64; 28 29 #[cfg(target_arch = "x86_64")] 30 use x86_64::{ 31 check_required_kvm_extensions, FpuState, SpecialRegisters, StandardRegisters, KVM_TSS_ADDRESS, 32 }; 33 34 #[cfg(target_arch = "x86_64")] 35 pub use x86_64::{ 36 CpuId, CpuIdEntry, ExtendedControlRegisters, LapicState, MsrEntries, VcpuKvmState as CpuState, 37 Xsave, CPUID_FLAG_VALID_INDEX, 38 }; 39 40 #[cfg(target_arch = "x86_64")] 41 use kvm_bindings::{kvm_enable_cap, MsrList, KVM_CAP_HYPERV_SYNIC, KVM_CAP_SPLIT_IRQCHIP}; 42 43 #[cfg(target_arch = "x86_64")] 44 use crate::arch::x86::NUM_IOAPIC_PINS; 45 46 // aarch64 dependencies 47 #[cfg(target_arch = "aarch64")] 48 pub mod aarch64; 49 50 pub use kvm_bindings; 51 pub use kvm_bindings::{ 52 kvm_create_device, kvm_device_type_KVM_DEV_TYPE_VFIO, kvm_irq_routing, kvm_irq_routing_entry, 53 kvm_userspace_memory_region, KVM_IRQ_ROUTING_MSI, KVM_MEM_READONLY, KVM_MSI_VALID_DEVID, 54 }; 55 pub use kvm_ioctls; 56 pub use kvm_ioctls::{Cap, Kvm}; 57 58 /// 59 /// Export generically-named wrappers of kvm-bindings for Unix-based platforms 60 /// 61 pub use { 62 kvm_bindings::kvm_clock_data as ClockData, kvm_bindings::kvm_create_device as CreateDevice, 63 kvm_bindings::kvm_device_attr as DeviceAttr, 64 kvm_bindings::kvm_irq_routing_entry as IrqRoutingEntry, kvm_bindings::kvm_mp_state as MpState, 65 kvm_bindings::kvm_userspace_memory_region as MemoryRegion, 66 kvm_bindings::kvm_vcpu_events as VcpuEvents, kvm_ioctls::DeviceFd, kvm_ioctls::IoEventAddress, 67 kvm_ioctls::VcpuExit, 68 }; 69 #[derive(Clone, Copy, Debug, PartialEq, Deserialize, Serialize)] 70 pub struct KvmVmState {} 71 72 pub use KvmVmState as VmState; 73 /// Wrapper over KVM VM ioctls. 74 pub struct KvmVm { 75 fd: Arc<VmFd>, 76 #[cfg(target_arch = "x86_64")] 77 msrs: MsrEntries, 78 state: KvmVmState, 79 } 80 81 // Returns a `Vec<T>` with a size in bytes at least as large as `size_in_bytes`. 82 fn vec_with_size_in_bytes<T: Default>(size_in_bytes: usize) -> Vec<T> { 83 let rounded_size = (size_in_bytes + size_of::<T>() - 1) / size_of::<T>(); 84 let mut v = Vec::with_capacity(rounded_size); 85 v.resize_with(rounded_size, T::default); 86 v 87 } 88 89 // The kvm API has many structs that resemble the following `Foo` structure: 90 // 91 // ``` 92 // #[repr(C)] 93 // struct Foo { 94 // some_data: u32 95 // entries: __IncompleteArrayField<__u32>, 96 // } 97 // ``` 98 // 99 // In order to allocate such a structure, `size_of::<Foo>()` would be too small because it would not 100 // include any space for `entries`. To make the allocation large enough while still being aligned 101 // for `Foo`, a `Vec<Foo>` is created. Only the first element of `Vec<Foo>` would actually be used 102 // as a `Foo`. The remaining memory in the `Vec<Foo>` is for `entries`, which must be contiguous 103 // with `Foo`. This function is used to make the `Vec<Foo>` with enough space for `count` entries. 104 use std::mem::size_of; 105 fn vec_with_array_field<T: Default, F>(count: usize) -> Vec<T> { 106 let element_space = count * size_of::<F>(); 107 let vec_size_bytes = size_of::<T>() + element_space; 108 vec_with_size_in_bytes(vec_size_bytes) 109 } 110 111 /// 112 /// Implementation of Vm trait for KVM 113 /// Example: 114 /// #[cfg(feature = "kvm")] 115 /// extern crate hypervisor 116 /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap(); 117 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm); 118 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 119 /// vm.set/get().unwrap() 120 /// 121 impl vm::Vm for KvmVm { 122 #[cfg(target_arch = "x86_64")] 123 /// 124 /// Sets the address of the three-page region in the VM's address space. 125 /// 126 fn set_tss_address(&self, offset: usize) -> vm::Result<()> { 127 self.fd 128 .set_tss_address(offset) 129 .map_err(|e| vm::HypervisorVmError::SetTssAddress(e.into())) 130 } 131 /// 132 /// Creates an in-kernel interrupt controller. 133 /// 134 fn create_irq_chip(&self) -> vm::Result<()> { 135 self.fd 136 .create_irq_chip() 137 .map_err(|e| vm::HypervisorVmError::CreateIrq(e.into())) 138 } 139 /// 140 /// Registers an event that will, when signaled, trigger the `gsi` IRQ. 141 /// 142 fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 143 self.fd 144 .register_irqfd(fd, gsi) 145 .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into())) 146 } 147 /// 148 /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ. 149 /// 150 fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 151 self.fd 152 .unregister_irqfd(fd, gsi) 153 .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into())) 154 } 155 /// 156 /// Creates a VcpuFd object from a vcpu RawFd. 157 /// 158 fn create_vcpu(&self, id: u8) -> vm::Result<Arc<dyn cpu::Vcpu>> { 159 let vc = self 160 .fd 161 .create_vcpu(id) 162 .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?; 163 let vcpu = KvmVcpu { 164 fd: vc, 165 #[cfg(target_arch = "x86_64")] 166 msrs: self.msrs.clone(), 167 }; 168 Ok(Arc::new(vcpu)) 169 } 170 /// 171 /// Registers an event to be signaled whenever a certain address is written to. 172 /// 173 fn register_ioevent( 174 &self, 175 fd: &EventFd, 176 addr: &IoEventAddress, 177 datamatch: Option<vm::DataMatch>, 178 ) -> vm::Result<()> { 179 if let Some(dm) = datamatch { 180 match dm { 181 vm::DataMatch::DataMatch32(kvm_dm32) => self 182 .fd 183 .register_ioevent(fd, addr, kvm_dm32) 184 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 185 vm::DataMatch::DataMatch64(kvm_dm64) => self 186 .fd 187 .register_ioevent(fd, addr, kvm_dm64) 188 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 189 } 190 } else { 191 self.fd 192 .register_ioevent(fd, addr, NoDatamatch) 193 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())) 194 } 195 } 196 /// 197 /// Unregisters an event from a certain address it has been previously registered to. 198 /// 199 fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> { 200 self.fd 201 .unregister_ioevent(fd, addr, NoDatamatch) 202 .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into())) 203 } 204 /// 205 /// Sets the GSI routing table entries, overwriting any previously set 206 /// entries, as per the `KVM_SET_GSI_ROUTING` ioctl. 207 /// 208 fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> { 209 let mut irq_routing = 210 vec_with_array_field::<kvm_irq_routing, kvm_irq_routing_entry>(entries.len()); 211 irq_routing[0].nr = entries.len() as u32; 212 irq_routing[0].flags = 0; 213 214 unsafe { 215 let entries_slice: &mut [kvm_irq_routing_entry] = 216 irq_routing[0].entries.as_mut_slice(entries.len()); 217 entries_slice.copy_from_slice(&entries); 218 } 219 220 self.fd 221 .set_gsi_routing(&irq_routing[0]) 222 .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into())) 223 } 224 /// 225 /// Creates a memory region structure that can be used with set_user_memory_region 226 /// 227 fn make_user_memory_region( 228 &self, 229 slot: u32, 230 guest_phys_addr: u64, 231 memory_size: u64, 232 userspace_addr: u64, 233 readonly: bool, 234 ) -> MemoryRegion { 235 MemoryRegion { 236 slot, 237 guest_phys_addr, 238 memory_size, 239 userspace_addr, 240 flags: if readonly { KVM_MEM_READONLY } else { 0 }, 241 } 242 } 243 /// 244 /// Creates/modifies a guest physical memory slot. 245 /// 246 fn set_user_memory_region(&self, user_memory_region: MemoryRegion) -> vm::Result<()> { 247 // Safe because guest regions are guaranteed not to overlap. 248 unsafe { 249 self.fd 250 .set_user_memory_region(user_memory_region) 251 .map_err(|e| vm::HypervisorVmError::SetUserMemory(e.into())) 252 } 253 } 254 /// 255 /// Creates an emulated device in the kernel. 256 /// 257 /// See the documentation for `KVM_CREATE_DEVICE`. 258 fn create_device(&self, device: &mut CreateDevice) -> vm::Result<Arc<dyn device::Device>> { 259 let fd = self 260 .fd 261 .create_device(device) 262 .map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?; 263 let device = KvmDevice { fd }; 264 Ok(Arc::new(device)) 265 } 266 /// 267 /// Returns the preferred CPU target type which can be emulated by KVM on underlying host. 268 /// 269 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 270 fn get_preferred_target(&self, kvi: &mut VcpuInit) -> vm::Result<()> { 271 self.fd 272 .get_preferred_target(kvi) 273 .map_err(|e| vm::HypervisorVmError::GetPreferredTarget(e.into())) 274 } 275 #[cfg(target_arch = "x86_64")] 276 fn enable_split_irq(&self) -> vm::Result<()> { 277 // Set TSS 278 self.fd 279 .set_tss_address(KVM_TSS_ADDRESS.raw_value() as usize) 280 .map_err(|e| vm::HypervisorVmError::EnableSplitIrq(e.into()))?; 281 // Create split irqchip 282 // Only the local APIC is emulated in kernel, both PICs and IOAPIC 283 // are not. 284 let mut cap: kvm_enable_cap = Default::default(); 285 cap.cap = KVM_CAP_SPLIT_IRQCHIP; 286 cap.args[0] = NUM_IOAPIC_PINS as u64; 287 self.fd 288 .enable_cap(&cap) 289 .map_err(|e| vm::HypervisorVmError::EnableSplitIrq(e.into()))?; 290 Ok(()) 291 } 292 /// Retrieve guest clock. 293 #[cfg(target_arch = "x86_64")] 294 fn get_clock(&self) -> vm::Result<ClockData> { 295 self.fd 296 .get_clock() 297 .map_err(|e| vm::HypervisorVmError::GetClock(e.into())) 298 } 299 /// Set guest clock. 300 #[cfg(target_arch = "x86_64")] 301 fn set_clock(&self, data: &ClockData) -> vm::Result<()> { 302 self.fd 303 .set_clock(data) 304 .map_err(|e| vm::HypervisorVmError::SetClock(e.into())) 305 } 306 /// Checks if a particular `Cap` is available. 307 fn check_extension(&self, c: Cap) -> bool { 308 self.fd.check_extension(c) 309 } 310 /// Create a device that is used for passthrough 311 fn create_passthrough_device(&self) -> vm::Result<Arc<dyn device::Device>> { 312 let mut vfio_dev = kvm_create_device { 313 type_: kvm_device_type_KVM_DEV_TYPE_VFIO, 314 fd: 0, 315 flags: 0, 316 }; 317 318 self.create_device(&mut vfio_dev) 319 .map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into())) 320 } 321 /// 322 /// Get the Vm state. Return VM specific data 323 /// 324 fn state(&self) -> vm::Result<VmState> { 325 Ok(self.state) 326 } 327 /// 328 /// Set the VM state 329 /// 330 fn set_state(&self, _state: &VmState) -> vm::Result<()> { 331 Ok(()) 332 } 333 } 334 /// Wrapper over KVM system ioctls. 335 pub struct KvmHypervisor { 336 kvm: Kvm, 337 } 338 /// Enum for KVM related error 339 #[derive(Debug)] 340 pub enum KvmError { 341 CapabilityMissing(Cap), 342 } 343 pub type KvmResult<T> = result::Result<T, KvmError>; 344 impl KvmHypervisor { 345 /// Create a hypervisor based on Kvm 346 pub fn new() -> hypervisor::Result<KvmHypervisor> { 347 let kvm_obj = Kvm::new().map_err(|e| hypervisor::HypervisorError::VmCreate(e.into()))?; 348 let api_version = kvm_obj.get_api_version(); 349 350 if api_version != kvm_bindings::KVM_API_VERSION as i32 { 351 return Err(hypervisor::HypervisorError::IncompatibleApiVersion); 352 } 353 354 Ok(KvmHypervisor { kvm: kvm_obj }) 355 } 356 } 357 /// Implementation of Hypervisor trait for KVM 358 /// Example: 359 /// #[cfg(feature = "kvm")] 360 /// extern crate hypervisor 361 /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap(); 362 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm); 363 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 364 /// 365 impl hypervisor::Hypervisor for KvmHypervisor { 366 /// Create a KVM vm object and return the object as Vm trait object 367 /// Example 368 /// # extern crate hypervisor; 369 /// # use hypervisor::KvmHypervisor; 370 /// use hypervisor::KvmVm; 371 /// let hypervisor = KvmHypervisor::new().unwrap(); 372 /// let vm = hypervisor.create_vm().unwrap() 373 /// 374 fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> { 375 let fd: VmFd; 376 loop { 377 match self.kvm.create_vm() { 378 Ok(res) => fd = res, 379 Err(e) => { 380 if e.errno() == libc::EINTR { 381 // If the error returned is EINTR, which means the 382 // ioctl has been interrupted, we have to retry as 383 // this can't be considered as a regular error. 384 continue; 385 } else { 386 return Err(hypervisor::HypervisorError::VmCreate(e.into())); 387 } 388 } 389 } 390 break; 391 } 392 393 let vm_fd = Arc::new(fd); 394 395 #[cfg(target_arch = "x86_64")] 396 { 397 let msr_list = self.get_msr_list()?; 398 let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize; 399 let mut msrs = MsrEntries::new(num_msrs); 400 let indices = msr_list.as_slice(); 401 let msr_entries = msrs.as_mut_slice(); 402 for (pos, index) in indices.iter().enumerate() { 403 msr_entries[pos].index = *index; 404 } 405 406 Ok(Arc::new(KvmVm { 407 fd: vm_fd, 408 msrs, 409 state: VmState {}, 410 })) 411 } 412 413 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 414 { 415 Ok(Arc::new(KvmVm { 416 fd: vm_fd, 417 state: VmState {}, 418 })) 419 } 420 } 421 422 fn check_required_extensions(&self) -> hypervisor::Result<()> { 423 check_required_kvm_extensions(&self.kvm).expect("Missing KVM capabilities"); 424 Ok(()) 425 } 426 427 /// 428 /// Returns the size of the memory mapping required to use the vcpu's `kvm_run` structure. 429 /// 430 fn get_vcpu_mmap_size(&self) -> hypervisor::Result<usize> { 431 self.kvm 432 .get_vcpu_mmap_size() 433 .map_err(|e| hypervisor::HypervisorError::GetVcpuMmap(e.into())) 434 } 435 /// 436 /// Gets the recommended maximum number of VCPUs per VM. 437 /// 438 fn get_max_vcpus(&self) -> hypervisor::Result<usize> { 439 Ok(self.kvm.get_max_vcpus()) 440 } 441 /// 442 /// Gets the recommended number of VCPUs per VM. 443 /// 444 fn get_nr_vcpus(&self) -> hypervisor::Result<usize> { 445 Ok(self.kvm.get_nr_vcpus()) 446 } 447 #[cfg(target_arch = "x86_64")] 448 /// 449 /// Checks if a particular `Cap` is available. 450 /// 451 fn check_capability(&self, c: Cap) -> bool { 452 self.kvm.check_extension(c) 453 } 454 #[cfg(target_arch = "x86_64")] 455 /// 456 /// X86 specific call to get the system supported CPUID values. 457 /// 458 fn get_cpuid(&self) -> hypervisor::Result<CpuId> { 459 self.kvm 460 .get_supported_cpuid(kvm_bindings::KVM_MAX_CPUID_ENTRIES) 461 .map_err(|e| hypervisor::HypervisorError::GetCpuId(e.into())) 462 } 463 #[cfg(target_arch = "x86_64")] 464 /// 465 /// Retrieve the list of MSRs supported by KVM. 466 /// 467 fn get_msr_list(&self) -> hypervisor::Result<MsrList> { 468 self.kvm 469 .get_msr_index_list() 470 .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into())) 471 } 472 } 473 /// Vcpu struct for KVM 474 pub struct KvmVcpu { 475 fd: VcpuFd, 476 #[cfg(target_arch = "x86_64")] 477 msrs: MsrEntries, 478 } 479 /// Implementation of Vcpu trait for KVM 480 /// Example: 481 /// #[cfg(feature = "kvm")] 482 /// extern crate hypervisor 483 /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap(); 484 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm); 485 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 486 /// let vcpu = vm.create_vcpu(0).unwrap(); 487 /// vcpu.get/set().unwrap() 488 /// 489 impl cpu::Vcpu for KvmVcpu { 490 #[cfg(target_arch = "x86_64")] 491 /// 492 /// Returns the vCPU general purpose registers. 493 /// 494 fn get_regs(&self) -> cpu::Result<StandardRegisters> { 495 self.fd 496 .get_regs() 497 .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into())) 498 } 499 #[cfg(target_arch = "x86_64")] 500 /// 501 /// Sets the vCPU general purpose registers using the `KVM_SET_REGS` ioctl. 502 /// 503 fn set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()> { 504 self.fd 505 .set_regs(regs) 506 .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into())) 507 } 508 #[cfg(target_arch = "x86_64")] 509 /// 510 /// Returns the vCPU special registers. 511 /// 512 fn get_sregs(&self) -> cpu::Result<SpecialRegisters> { 513 self.fd 514 .get_sregs() 515 .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into())) 516 } 517 #[cfg(target_arch = "x86_64")] 518 /// 519 /// Sets the vCPU special registers using the `KVM_SET_SREGS` ioctl. 520 /// 521 fn set_sregs(&self, sregs: &SpecialRegisters) -> cpu::Result<()> { 522 self.fd 523 .set_sregs(sregs) 524 .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into())) 525 } 526 #[cfg(target_arch = "x86_64")] 527 /// 528 /// Returns the floating point state (FPU) from the vCPU. 529 /// 530 fn get_fpu(&self) -> cpu::Result<FpuState> { 531 self.fd 532 .get_fpu() 533 .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into())) 534 } 535 #[cfg(target_arch = "x86_64")] 536 /// 537 /// Set the floating point state (FPU) of a vCPU using the `KVM_SET_FPU` ioct. 538 /// 539 fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> { 540 self.fd 541 .set_fpu(fpu) 542 .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into())) 543 } 544 #[cfg(target_arch = "x86_64")] 545 /// 546 /// X86 specific call to setup the CPUID registers. 547 /// 548 fn set_cpuid2(&self, cpuid: &CpuId) -> cpu::Result<()> { 549 self.fd 550 .set_cpuid2(cpuid) 551 .map_err(|e| cpu::HypervisorCpuError::SetCpuid(e.into())) 552 } 553 #[cfg(target_arch = "x86_64")] 554 /// 555 /// X86 specific call to enable HyperV SynIC 556 /// 557 fn enable_hyperv_synic(&self) -> cpu::Result<()> { 558 let mut cap: kvm_enable_cap = Default::default(); 559 cap.cap = KVM_CAP_HYPERV_SYNIC; 560 self.fd 561 .enable_cap(&cap) 562 .map_err(|e| cpu::HypervisorCpuError::EnableHyperVSynIC(e.into())) 563 } 564 /// 565 /// X86 specific call to retrieve the CPUID registers. 566 /// 567 #[cfg(target_arch = "x86_64")] 568 fn get_cpuid2(&self, num_entries: usize) -> cpu::Result<CpuId> { 569 self.fd 570 .get_cpuid2(num_entries) 571 .map_err(|e| cpu::HypervisorCpuError::GetCpuid(e.into())) 572 } 573 #[cfg(target_arch = "x86_64")] 574 /// 575 /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 576 /// 577 fn get_lapic(&self) -> cpu::Result<LapicState> { 578 self.fd 579 .get_lapic() 580 .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into())) 581 } 582 #[cfg(target_arch = "x86_64")] 583 /// 584 /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 585 /// 586 fn set_lapic(&self, klapic: &LapicState) -> cpu::Result<()> { 587 self.fd 588 .set_lapic(klapic) 589 .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into())) 590 } 591 #[cfg(target_arch = "x86_64")] 592 /// 593 /// Returns the model-specific registers (MSR) for this vCPU. 594 /// 595 fn get_msrs(&self, msrs: &mut MsrEntries) -> cpu::Result<usize> { 596 self.fd 597 .get_msrs(msrs) 598 .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into())) 599 } 600 #[cfg(target_arch = "x86_64")] 601 /// 602 /// Setup the model-specific registers (MSR) for this vCPU. 603 /// Returns the number of MSR entries actually written. 604 /// 605 fn set_msrs(&self, msrs: &MsrEntries) -> cpu::Result<usize> { 606 self.fd 607 .set_msrs(msrs) 608 .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into())) 609 } 610 /// 611 /// Returns the vcpu's current "multiprocessing state". 612 /// 613 fn get_mp_state(&self) -> cpu::Result<MpState> { 614 self.fd 615 .get_mp_state() 616 .map_err(|e| cpu::HypervisorCpuError::GetMpState(e.into())) 617 } 618 /// 619 /// Sets the vcpu's current "multiprocessing state". 620 /// 621 fn set_mp_state(&self, mp_state: MpState) -> cpu::Result<()> { 622 self.fd 623 .set_mp_state(mp_state) 624 .map_err(|e| cpu::HypervisorCpuError::SetMpState(e.into())) 625 } 626 #[cfg(target_arch = "x86_64")] 627 /// 628 /// X86 specific call that returns the vcpu's current "xsave struct". 629 /// 630 fn get_xsave(&self) -> cpu::Result<Xsave> { 631 self.fd 632 .get_xsave() 633 .map_err(|e| cpu::HypervisorCpuError::GetXsaveState(e.into())) 634 } 635 #[cfg(target_arch = "x86_64")] 636 /// 637 /// X86 specific call that sets the vcpu's current "xsave struct". 638 /// 639 fn set_xsave(&self, xsave: &Xsave) -> cpu::Result<()> { 640 self.fd 641 .set_xsave(xsave) 642 .map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into())) 643 } 644 #[cfg(target_arch = "x86_64")] 645 /// 646 /// X86 specific call that returns the vcpu's current "xcrs". 647 /// 648 fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> { 649 self.fd 650 .get_xcrs() 651 .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into())) 652 } 653 #[cfg(target_arch = "x86_64")] 654 /// 655 /// X86 specific call that sets the vcpu's current "xcrs". 656 /// 657 fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> { 658 self.fd 659 .set_xcrs(&xcrs) 660 .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into())) 661 } 662 /// 663 /// Triggers the running of the current virtual CPU returning an exit reason. 664 /// 665 fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> { 666 match self.fd.run() { 667 Ok(run) => match run { 668 #[cfg(target_arch = "x86_64")] 669 VcpuExit::IoIn(addr, data) => Ok(cpu::VmExit::IoIn(addr, data)), 670 #[cfg(target_arch = "x86_64")] 671 VcpuExit::IoOut(addr, data) => Ok(cpu::VmExit::IoOut(addr, data)), 672 #[cfg(target_arch = "x86_64")] 673 VcpuExit::IoapicEoi(vector) => Ok(cpu::VmExit::IoapicEoi(vector)), 674 #[cfg(target_arch = "x86_64")] 675 VcpuExit::Shutdown | VcpuExit::Hlt => Ok(cpu::VmExit::Reset), 676 677 #[cfg(target_arch = "aarch64")] 678 VcpuExit::SystemEvent(event_type, flags) => { 679 use kvm_bindings::KVM_SYSTEM_EVENT_SHUTDOWN; 680 // On Aarch64, when the VM is shutdown, run() returns 681 // VcpuExit::SystemEvent with reason KVM_SYSTEM_EVENT_SHUTDOWN 682 if event_type == KVM_SYSTEM_EVENT_SHUTDOWN { 683 Ok(cpu::VmExit::Reset) 684 } else { 685 Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 686 "Unexpected system event with type 0x{:x}, flags 0x{:x}", 687 event_type, 688 flags 689 ))) 690 } 691 } 692 693 VcpuExit::MmioRead(addr, data) => Ok(cpu::VmExit::MmioRead(addr, data)), 694 VcpuExit::MmioWrite(addr, data) => Ok(cpu::VmExit::MmioWrite(addr, data)), 695 VcpuExit::Hyperv => Ok(cpu::VmExit::Hyperv), 696 697 r => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 698 "Unexpected exit reason on vcpu run: {:?}", 699 r 700 ))), 701 }, 702 703 Err(ref e) => match e.errno() { 704 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore), 705 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 706 "VCPU error {:?}", 707 e 708 ))), 709 }, 710 } 711 } 712 #[cfg(target_arch = "x86_64")] 713 /// 714 /// Returns currently pending exceptions, interrupts, and NMIs as well as related 715 /// states of the vcpu. 716 /// 717 fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> { 718 self.fd 719 .get_vcpu_events() 720 .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into())) 721 } 722 #[cfg(target_arch = "x86_64")] 723 /// 724 /// Sets pending exceptions, interrupts, and NMIs as well as related states 725 /// of the vcpu. 726 /// 727 fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> { 728 self.fd 729 .set_vcpu_events(events) 730 .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into())) 731 } 732 #[cfg(target_arch = "x86_64")] 733 /// 734 /// Let the guest know that it has been paused, which prevents from 735 /// potential soft lockups when being resumed. 736 /// 737 fn notify_guest_clock_paused(&self) -> cpu::Result<()> { 738 self.fd 739 .kvmclock_ctrl() 740 .map_err(|e| cpu::HypervisorCpuError::NotifyGuestClockPaused(e.into())) 741 } 742 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 743 fn vcpu_init(&self, kvi: &VcpuInit) -> cpu::Result<()> { 744 self.fd 745 .vcpu_init(kvi) 746 .map_err(|e| cpu::HypervisorCpuError::VcpuInit(e.into())) 747 } 748 /// 749 /// Sets the value of one register for this vCPU. 750 /// 751 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 752 fn set_one_reg(&self, reg_id: u64, data: u64) -> cpu::Result<()> { 753 self.fd 754 .set_one_reg(reg_id, data) 755 .map_err(|e| cpu::HypervisorCpuError::SetOneReg(e.into())) 756 } 757 /// 758 /// Gets the value of one register for this vCPU. 759 /// 760 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 761 fn get_one_reg(&self, reg_id: u64) -> cpu::Result<u64> { 762 self.fd 763 .get_one_reg(reg_id) 764 .map_err(|e| cpu::HypervisorCpuError::GetOneReg(e.into())) 765 } 766 #[cfg(target_arch = "x86_64")] 767 /// 768 /// Get the current CPU state 769 /// 770 /// Ordering requirements: 771 /// 772 /// KVM_GET_MP_STATE calls kvm_apic_accept_events(), which might modify 773 /// vCPU/LAPIC state. As such, it must be done before most everything 774 /// else, otherwise we cannot restore everything and expect it to work. 775 /// 776 /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are 777 /// still running. 778 /// 779 /// KVM_GET_LAPIC may change state of LAPIC before returning it. 780 /// 781 /// GET_VCPU_EVENTS should probably be last to save. The code looks as 782 /// it might as well be affected by internal state modifications of the 783 /// GET ioctls. 784 /// 785 /// SREGS saves/restores a pending interrupt, similar to what 786 /// VCPU_EVENTS also does. 787 /// 788 /// GET_MSRS requires a pre-populated data structure to do something 789 /// meaningful. For SET_MSRS it will then contain good data. 790 /// 791 /// # Example 792 /// 793 /// ```rust 794 /// # extern crate hypervisor; 795 /// # use hypervisor::KvmHypervisor; 796 /// # use std::sync::Arc; 797 /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap(); 798 /// let hv: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm); 799 /// let vm = hv.create_vm().expect("new VM fd creation failed"); 800 /// vm.enable_split_irq().unwrap(); 801 /// let vcpu = vm.create_vcpu(0).unwrap(); 802 /// let state = vcpu.state().unwrap(); 803 /// ``` 804 fn state(&self) -> cpu::Result<CpuState> { 805 let mp_state = self.get_mp_state()?; 806 let regs = self.get_regs()?; 807 let sregs = self.get_sregs()?; 808 let xsave = self.get_xsave()?; 809 let xcrs = self.get_xcrs()?; 810 let lapic_state = self.get_lapic()?; 811 let fpu = self.get_fpu()?; 812 813 // Try to get all MSRs based on the list previously retrieved from KVM. 814 // If the number of MSRs obtained from GET_MSRS is different from the 815 // expected amount, we fallback onto a slower method by getting MSRs 816 // by chunks. This is the only way to make sure we try to get as many 817 // MSRs as possible, even if some MSRs are not supported. 818 let mut msr_entries = self.msrs.clone(); 819 let expected_num_msrs = msr_entries.as_fam_struct_ref().nmsrs as usize; 820 let num_msrs = self.get_msrs(&mut msr_entries)?; 821 let msrs = if num_msrs != expected_num_msrs { 822 let mut faulty_msr_index = num_msrs; 823 let mut msr_entries_tmp = 824 MsrEntries::from_entries(&msr_entries.as_slice()[..faulty_msr_index]); 825 826 loop { 827 warn!( 828 "Detected faulty MSR 0x{:x} while getting MSRs", 829 msr_entries.as_slice()[faulty_msr_index].index 830 ); 831 832 let start_pos = faulty_msr_index + 1; 833 let mut sub_msr_entries = 834 MsrEntries::from_entries(&msr_entries.as_slice()[start_pos..]); 835 let expected_num_msrs = sub_msr_entries.as_fam_struct_ref().nmsrs as usize; 836 let num_msrs = self.get_msrs(&mut sub_msr_entries)?; 837 838 for i in 0..num_msrs { 839 msr_entries_tmp 840 .push(sub_msr_entries.as_slice()[i]) 841 .map_err(|e| { 842 cpu::HypervisorCpuError::GetMsrEntries(anyhow!( 843 "Failed adding MSR entries: {:?}", 844 e 845 )) 846 })?; 847 } 848 849 if num_msrs == expected_num_msrs { 850 break; 851 } 852 853 faulty_msr_index = start_pos + num_msrs; 854 } 855 856 msr_entries_tmp 857 } else { 858 msr_entries 859 }; 860 861 let vcpu_events = self.get_vcpu_events()?; 862 863 Ok(CpuState { 864 msrs, 865 vcpu_events, 866 regs, 867 sregs, 868 fpu, 869 lapic_state, 870 xsave, 871 xcrs, 872 mp_state, 873 }) 874 } 875 #[cfg(target_arch = "aarch64")] 876 fn state(&self) -> cpu::Result<CpuState> { 877 unimplemented!(); 878 } 879 #[cfg(target_arch = "x86_64")] 880 /// 881 /// Restore the previously saved CPU state 882 /// 883 /// Ordering requirements: 884 /// 885 /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are 886 /// still running. 887 /// 888 /// Some SET ioctls (like set_mp_state) depend on kvm_vcpu_is_bsp(), so 889 /// if we ever change the BSP, we have to do that before restoring anything. 890 /// The same seems to be true for CPUID stuff. 891 /// 892 /// SREGS saves/restores a pending interrupt, similar to what 893 /// VCPU_EVENTS also does. 894 /// 895 /// SET_REGS clears pending exceptions unconditionally, thus, it must be 896 /// done before SET_VCPU_EVENTS, which restores it. 897 /// 898 /// SET_LAPIC must come after SET_SREGS, because the latter restores 899 /// the apic base msr. 900 /// 901 /// SET_LAPIC must come before SET_MSRS, because the TSC deadline MSR 902 /// only restores successfully, when the LAPIC is correctly configured. 903 /// 904 /// Arguments: CpuState 905 /// # Example 906 /// 907 /// ```rust 908 /// # extern crate hypervisor; 909 /// # use hypervisor::KvmHypervisor; 910 /// # use std::sync::Arc; 911 /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap(); 912 /// let hv: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm); 913 /// let vm = hv.create_vm().expect("new VM fd creation failed"); 914 /// vm.enable_split_irq().unwrap(); 915 /// let vcpu = vm.create_vcpu(0).unwrap(); 916 /// let state = vcpu.state().unwrap(); 917 /// vcpu.set_state(&state).unwrap(); 918 /// ``` 919 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 920 self.set_mp_state(state.mp_state)?; 921 self.set_regs(&state.regs)?; 922 self.set_sregs(&state.sregs)?; 923 self.set_xsave(&state.xsave)?; 924 self.set_xcrs(&state.xcrs)?; 925 self.set_lapic(&state.lapic_state)?; 926 self.set_fpu(&state.fpu)?; 927 928 // Try to set all MSRs previously stored. 929 // If the number of MSRs set from SET_MSRS is different from the 930 // expected amount, we fallback onto a slower method by setting MSRs 931 // by chunks. This is the only way to make sure we try to set as many 932 // MSRs as possible, even if some MSRs are not supported. 933 let expected_num_msrs = state.msrs.as_fam_struct_ref().nmsrs as usize; 934 let num_msrs = self.set_msrs(&state.msrs)?; 935 if num_msrs != expected_num_msrs { 936 let mut faulty_msr_index = num_msrs; 937 938 loop { 939 warn!( 940 "Detected faulty MSR 0x{:x} while setting MSRs", 941 state.msrs.as_slice()[faulty_msr_index].index 942 ); 943 944 let start_pos = faulty_msr_index + 1; 945 let sub_msr_entries = MsrEntries::from_entries(&state.msrs.as_slice()[start_pos..]); 946 let expected_num_msrs = sub_msr_entries.as_fam_struct_ref().nmsrs as usize; 947 let num_msrs = self.set_msrs(&sub_msr_entries)?; 948 949 if num_msrs == expected_num_msrs { 950 break; 951 } 952 953 faulty_msr_index = start_pos + num_msrs; 954 } 955 } 956 957 self.set_vcpu_events(&state.vcpu_events)?; 958 959 Ok(()) 960 } 961 #[allow(unused_variables)] 962 #[cfg(target_arch = "aarch64")] 963 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 964 warn!("CPU state was not restored"); 965 Ok(()) 966 } 967 } 968 969 /// Device struct for KVM 970 pub struct KvmDevice { 971 fd: DeviceFd, 972 } 973 974 impl device::Device for KvmDevice { 975 /// 976 /// Set device attribute 977 /// 978 fn set_device_attr(&self, attr: &DeviceAttr) -> device::Result<()> { 979 self.fd 980 .set_device_attr(attr) 981 .map_err(|e| device::HypervisorDeviceError::SetDeviceAttribute(e.into())) 982 } 983 } 984 985 impl AsRawFd for KvmDevice { 986 fn as_raw_fd(&self) -> RawFd { 987 self.fd.as_raw_fd() 988 } 989 } 990