xref: /cloud-hypervisor/hypervisor/src/kvm/mod.rs (revision a6b839b35c139ccd38be9b032115e3463933d2d3)
1 // Copyright © 2019 Intel Corporation
2 //
3 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
4 //
5 // Copyright © 2020, Microsoft Corporation
6 //
7 // Copyright 2018-2019 CrowdStrike, Inc.
8 //
9 //
10 
11 #[cfg(target_arch = "aarch64")]
12 pub use crate::aarch64::{
13     check_required_kvm_extensions, is_system_register, VcpuInit, VcpuKvmState as CpuState,
14     MPIDR_EL1,
15 };
16 use crate::cpu;
17 use crate::device;
18 use crate::hypervisor;
19 use crate::vm::{self, VmmOps};
20 #[cfg(target_arch = "aarch64")]
21 use crate::{arm64_core_reg_id, offset__of};
22 use kvm_ioctls::{NoDatamatch, VcpuFd, VmFd};
23 use serde_derive::{Deserialize, Serialize};
24 use std::os::unix::io::{AsRawFd, RawFd};
25 use std::result;
26 #[cfg(target_arch = "x86_64")]
27 use std::sync::atomic::{AtomicBool, Ordering};
28 use std::sync::Arc;
29 #[cfg(target_arch = "x86_64")]
30 use vm_memory::Address;
31 use vmm_sys_util::eventfd::EventFd;
32 // x86_64 dependencies
33 #[cfg(target_arch = "x86_64")]
34 pub mod x86_64;
35 
36 #[cfg(target_arch = "x86_64")]
37 use x86_64::{
38     check_required_kvm_extensions, FpuState, SpecialRegisters, StandardRegisters, KVM_TSS_ADDRESS,
39 };
40 
41 #[cfg(target_arch = "aarch64")]
42 use aarch64::{RegList, Register, StandardRegisters};
43 
44 #[cfg(target_arch = "x86_64")]
45 pub use x86_64::{
46     CpuId, CpuIdEntry, ExtendedControlRegisters, LapicState, MsrEntries, VcpuKvmState as CpuState,
47     Xsave, CPUID_FLAG_VALID_INDEX,
48 };
49 
50 #[cfg(target_arch = "x86_64")]
51 use kvm_bindings::{
52     kvm_enable_cap, kvm_msr_entry, MsrList, KVM_CAP_HYPERV_SYNIC, KVM_CAP_SPLIT_IRQCHIP,
53 };
54 
55 #[cfg(target_arch = "x86_64")]
56 use crate::arch::x86::NUM_IOAPIC_PINS;
57 
58 // aarch64 dependencies
59 #[cfg(target_arch = "aarch64")]
60 pub mod aarch64;
61 #[cfg(target_arch = "aarch64")]
62 use kvm_bindings::{
63     kvm_regs, user_fpsimd_state, user_pt_regs, KVM_NR_SPSR, KVM_REG_ARM64, KVM_REG_ARM_CORE,
64     KVM_REG_SIZE_U128, KVM_REG_SIZE_U32, KVM_REG_SIZE_U64,
65 };
66 #[cfg(target_arch = "aarch64")]
67 use std::mem;
68 
69 pub use kvm_bindings;
70 pub use kvm_bindings::{
71     kvm_create_device, kvm_device_type_KVM_DEV_TYPE_VFIO, kvm_irq_routing, kvm_irq_routing_entry,
72     kvm_userspace_memory_region, KVM_IRQ_ROUTING_MSI, KVM_MEM_LOG_DIRTY_PAGES, KVM_MEM_READONLY,
73     KVM_MSI_VALID_DEVID,
74 };
75 pub use kvm_ioctls;
76 pub use kvm_ioctls::{Cap, Kvm};
77 
78 ///
79 /// Export generically-named wrappers of kvm-bindings for Unix-based platforms
80 ///
81 pub use {
82     kvm_bindings::kvm_clock_data as ClockData, kvm_bindings::kvm_create_device as CreateDevice,
83     kvm_bindings::kvm_device_attr as DeviceAttr,
84     kvm_bindings::kvm_irq_routing_entry as IrqRoutingEntry, kvm_bindings::kvm_mp_state as MpState,
85     kvm_bindings::kvm_userspace_memory_region as MemoryRegion,
86     kvm_bindings::kvm_vcpu_events as VcpuEvents, kvm_ioctls::DeviceFd, kvm_ioctls::IoEventAddress,
87     kvm_ioctls::VcpuExit,
88 };
89 #[derive(Clone, Copy, Debug, PartialEq, Deserialize, Serialize)]
90 pub struct KvmVmState {}
91 
92 pub use KvmVmState as VmState;
93 /// Wrapper over KVM VM ioctls.
94 pub struct KvmVm {
95     fd: Arc<VmFd>,
96     #[cfg(target_arch = "x86_64")]
97     msrs: MsrEntries,
98     state: KvmVmState,
99 }
100 
101 // Returns a `Vec<T>` with a size in bytes at least as large as `size_in_bytes`.
102 fn vec_with_size_in_bytes<T: Default>(size_in_bytes: usize) -> Vec<T> {
103     let rounded_size = (size_in_bytes + size_of::<T>() - 1) / size_of::<T>();
104     let mut v = Vec::with_capacity(rounded_size);
105     v.resize_with(rounded_size, T::default);
106     v
107 }
108 
109 // The kvm API has many structs that resemble the following `Foo` structure:
110 //
111 // ```
112 // #[repr(C)]
113 // struct Foo {
114 //    some_data: u32
115 //    entries: __IncompleteArrayField<__u32>,
116 // }
117 // ```
118 //
119 // In order to allocate such a structure, `size_of::<Foo>()` would be too small because it would not
120 // include any space for `entries`. To make the allocation large enough while still being aligned
121 // for `Foo`, a `Vec<Foo>` is created. Only the first element of `Vec<Foo>` would actually be used
122 // as a `Foo`. The remaining memory in the `Vec<Foo>` is for `entries`, which must be contiguous
123 // with `Foo`. This function is used to make the `Vec<Foo>` with enough space for `count` entries.
124 use std::mem::size_of;
125 fn vec_with_array_field<T: Default, F>(count: usize) -> Vec<T> {
126     let element_space = count * size_of::<F>();
127     let vec_size_bytes = size_of::<T>() + element_space;
128     vec_with_size_in_bytes(vec_size_bytes)
129 }
130 
131 ///
132 /// Implementation of Vm trait for KVM
133 /// Example:
134 /// #[cfg(feature = "kvm")]
135 /// extern crate hypervisor
136 /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap();
137 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm);
138 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
139 /// vm.set/get().unwrap()
140 ///
141 impl vm::Vm for KvmVm {
142     #[cfg(target_arch = "x86_64")]
143     ///
144     /// Sets the address of the three-page region in the VM's address space.
145     ///
146     fn set_tss_address(&self, offset: usize) -> vm::Result<()> {
147         self.fd
148             .set_tss_address(offset)
149             .map_err(|e| vm::HypervisorVmError::SetTssAddress(e.into()))
150     }
151     ///
152     /// Creates an in-kernel interrupt controller.
153     ///
154     fn create_irq_chip(&self) -> vm::Result<()> {
155         self.fd
156             .create_irq_chip()
157             .map_err(|e| vm::HypervisorVmError::CreateIrq(e.into()))
158     }
159     ///
160     /// Registers an event that will, when signaled, trigger the `gsi` IRQ.
161     ///
162     fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> {
163         self.fd
164             .register_irqfd(fd, gsi)
165             .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into()))
166     }
167     ///
168     /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ.
169     ///
170     fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> {
171         self.fd
172             .unregister_irqfd(fd, gsi)
173             .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into()))
174     }
175     ///
176     /// Creates a VcpuFd object from a vcpu RawFd.
177     ///
178     fn create_vcpu(
179         &self,
180         id: u8,
181         vmmops: Option<Arc<Box<dyn VmmOps>>>,
182     ) -> vm::Result<Arc<dyn cpu::Vcpu>> {
183         let vc = self
184             .fd
185             .create_vcpu(id as u64)
186             .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?;
187         let vcpu = KvmVcpu {
188             fd: vc,
189             #[cfg(target_arch = "x86_64")]
190             msrs: self.msrs.clone(),
191             vmmops,
192             #[cfg(target_arch = "x86_64")]
193             hyperv_synic: AtomicBool::new(false),
194         };
195         Ok(Arc::new(vcpu))
196     }
197     ///
198     /// Registers an event to be signaled whenever a certain address is written to.
199     ///
200     fn register_ioevent(
201         &self,
202         fd: &EventFd,
203         addr: &IoEventAddress,
204         datamatch: Option<vm::DataMatch>,
205     ) -> vm::Result<()> {
206         if let Some(dm) = datamatch {
207             match dm {
208                 vm::DataMatch::DataMatch32(kvm_dm32) => self
209                     .fd
210                     .register_ioevent(fd, addr, kvm_dm32)
211                     .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())),
212                 vm::DataMatch::DataMatch64(kvm_dm64) => self
213                     .fd
214                     .register_ioevent(fd, addr, kvm_dm64)
215                     .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())),
216             }
217         } else {
218             self.fd
219                 .register_ioevent(fd, addr, NoDatamatch)
220                 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into()))
221         }
222     }
223     ///
224     /// Unregisters an event from a certain address it has been previously registered to.
225     ///
226     fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> {
227         self.fd
228             .unregister_ioevent(fd, addr, NoDatamatch)
229             .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into()))
230     }
231     ///
232     /// Sets the GSI routing table entries, overwriting any previously set
233     /// entries, as per the `KVM_SET_GSI_ROUTING` ioctl.
234     ///
235     fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> {
236         let mut irq_routing =
237             vec_with_array_field::<kvm_irq_routing, kvm_irq_routing_entry>(entries.len());
238         irq_routing[0].nr = entries.len() as u32;
239         irq_routing[0].flags = 0;
240 
241         unsafe {
242             let entries_slice: &mut [kvm_irq_routing_entry] =
243                 irq_routing[0].entries.as_mut_slice(entries.len());
244             entries_slice.copy_from_slice(&entries);
245         }
246 
247         self.fd
248             .set_gsi_routing(&irq_routing[0])
249             .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into()))
250     }
251     ///
252     /// Creates a memory region structure that can be used with set_user_memory_region
253     ///
254     fn make_user_memory_region(
255         &self,
256         slot: u32,
257         guest_phys_addr: u64,
258         memory_size: u64,
259         userspace_addr: u64,
260         readonly: bool,
261         log_dirty_pages: bool,
262     ) -> MemoryRegion {
263         MemoryRegion {
264             slot,
265             guest_phys_addr,
266             memory_size,
267             userspace_addr,
268             flags: if readonly { KVM_MEM_READONLY } else { 0 }
269                 | if log_dirty_pages {
270                     KVM_MEM_LOG_DIRTY_PAGES
271                 } else {
272                     0
273                 },
274         }
275     }
276     ///
277     /// Creates/modifies a guest physical memory slot.
278     ///
279     fn set_user_memory_region(&self, user_memory_region: MemoryRegion) -> vm::Result<()> {
280         // Safe because guest regions are guaranteed not to overlap.
281         unsafe {
282             self.fd
283                 .set_user_memory_region(user_memory_region)
284                 .map_err(|e| vm::HypervisorVmError::SetUserMemory(e.into()))
285         }
286     }
287     ///
288     /// Creates an emulated device in the kernel.
289     ///
290     /// See the documentation for `KVM_CREATE_DEVICE`.
291     fn create_device(&self, device: &mut CreateDevice) -> vm::Result<Arc<dyn device::Device>> {
292         let fd = self
293             .fd
294             .create_device(device)
295             .map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?;
296         let device = KvmDevice { fd };
297         Ok(Arc::new(device))
298     }
299     ///
300     /// Returns the preferred CPU target type which can be emulated by KVM on underlying host.
301     ///
302     #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
303     fn get_preferred_target(&self, kvi: &mut VcpuInit) -> vm::Result<()> {
304         self.fd
305             .get_preferred_target(kvi)
306             .map_err(|e| vm::HypervisorVmError::GetPreferredTarget(e.into()))
307     }
308     #[cfg(target_arch = "x86_64")]
309     fn enable_split_irq(&self) -> vm::Result<()> {
310         // Set TSS
311         self.fd
312             .set_tss_address(KVM_TSS_ADDRESS.raw_value() as usize)
313             .map_err(|e| vm::HypervisorVmError::EnableSplitIrq(e.into()))?;
314         // Create split irqchip
315         // Only the local APIC is emulated in kernel, both PICs and IOAPIC
316         // are not.
317         let mut cap = kvm_enable_cap {
318             cap: KVM_CAP_SPLIT_IRQCHIP,
319             ..Default::default()
320         };
321         cap.args[0] = NUM_IOAPIC_PINS as u64;
322         self.fd
323             .enable_cap(&cap)
324             .map_err(|e| vm::HypervisorVmError::EnableSplitIrq(e.into()))?;
325         Ok(())
326     }
327     /// Retrieve guest clock.
328     #[cfg(target_arch = "x86_64")]
329     fn get_clock(&self) -> vm::Result<ClockData> {
330         self.fd
331             .get_clock()
332             .map_err(|e| vm::HypervisorVmError::GetClock(e.into()))
333     }
334     /// Set guest clock.
335     #[cfg(target_arch = "x86_64")]
336     fn set_clock(&self, data: &ClockData) -> vm::Result<()> {
337         self.fd
338             .set_clock(data)
339             .map_err(|e| vm::HypervisorVmError::SetClock(e.into()))
340     }
341     /// Checks if a particular `Cap` is available.
342     fn check_extension(&self, c: Cap) -> bool {
343         self.fd.check_extension(c)
344     }
345     /// Create a device that is used for passthrough
346     fn create_passthrough_device(&self) -> vm::Result<Arc<dyn device::Device>> {
347         let mut vfio_dev = kvm_create_device {
348             type_: kvm_device_type_KVM_DEV_TYPE_VFIO,
349             fd: 0,
350             flags: 0,
351         };
352 
353         self.create_device(&mut vfio_dev)
354             .map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into()))
355     }
356     ///
357     /// Get the Vm state. Return VM specific data
358     ///
359     fn state(&self) -> vm::Result<VmState> {
360         Ok(self.state)
361     }
362     ///
363     /// Set the VM state
364     ///
365     fn set_state(&self, _state: VmState) -> vm::Result<()> {
366         Ok(())
367     }
368 
369     ///
370     /// Get dirty pages bitmap (one bit per page)
371     ///
372     fn get_dirty_log(&self, slot: u32, memory_size: u64) -> vm::Result<Vec<u64>> {
373         self.fd
374             .get_dirty_log(slot, memory_size as usize)
375             .map_err(|e| vm::HypervisorVmError::GetDirtyLog(e.into()))
376     }
377 }
378 /// Wrapper over KVM system ioctls.
379 pub struct KvmHypervisor {
380     kvm: Kvm,
381 }
382 /// Enum for KVM related error
383 #[derive(Debug)]
384 pub enum KvmError {
385     CapabilityMissing(Cap),
386 }
387 pub type KvmResult<T> = result::Result<T, KvmError>;
388 impl KvmHypervisor {
389     /// Create a hypervisor based on Kvm
390     pub fn new() -> hypervisor::Result<KvmHypervisor> {
391         let kvm_obj = Kvm::new().map_err(|e| hypervisor::HypervisorError::VmCreate(e.into()))?;
392         let api_version = kvm_obj.get_api_version();
393 
394         if api_version != kvm_bindings::KVM_API_VERSION as i32 {
395             return Err(hypervisor::HypervisorError::IncompatibleApiVersion);
396         }
397 
398         Ok(KvmHypervisor { kvm: kvm_obj })
399     }
400 }
401 /// Implementation of Hypervisor trait for KVM
402 /// Example:
403 /// #[cfg(feature = "kvm")]
404 /// extern crate hypervisor
405 /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap();
406 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm);
407 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
408 ///
409 impl hypervisor::Hypervisor for KvmHypervisor {
410     /// Create a KVM vm object and return the object as Vm trait object
411     /// Example
412     /// # extern crate hypervisor;
413     /// # use hypervisor::KvmHypervisor;
414     /// use hypervisor::KvmVm;
415     /// let hypervisor = KvmHypervisor::new().unwrap();
416     /// let vm = hypervisor.create_vm().unwrap()
417     ///
418     fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> {
419         let fd: VmFd;
420         loop {
421             match self.kvm.create_vm() {
422                 Ok(res) => fd = res,
423                 Err(e) => {
424                     if e.errno() == libc::EINTR {
425                         // If the error returned is EINTR, which means the
426                         // ioctl has been interrupted, we have to retry as
427                         // this can't be considered as a regular error.
428                         continue;
429                     } else {
430                         return Err(hypervisor::HypervisorError::VmCreate(e.into()));
431                     }
432                 }
433             }
434             break;
435         }
436 
437         let vm_fd = Arc::new(fd);
438 
439         #[cfg(target_arch = "x86_64")]
440         {
441             let msr_list = self.get_msr_list()?;
442             let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize;
443             let mut msrs = MsrEntries::new(num_msrs);
444             let indices = msr_list.as_slice();
445             let msr_entries = msrs.as_mut_slice();
446             for (pos, index) in indices.iter().enumerate() {
447                 msr_entries[pos].index = *index;
448             }
449 
450             Ok(Arc::new(KvmVm {
451                 fd: vm_fd,
452                 msrs,
453                 state: VmState {},
454             }))
455         }
456 
457         #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
458         {
459             Ok(Arc::new(KvmVm {
460                 fd: vm_fd,
461                 state: VmState {},
462             }))
463         }
464     }
465 
466     fn check_required_extensions(&self) -> hypervisor::Result<()> {
467         check_required_kvm_extensions(&self.kvm).expect("Missing KVM capabilities");
468         Ok(())
469     }
470 
471     ///
472     ///  Returns the size of the memory mapping required to use the vcpu's `kvm_run` structure.
473     ///
474     fn get_vcpu_mmap_size(&self) -> hypervisor::Result<usize> {
475         self.kvm
476             .get_vcpu_mmap_size()
477             .map_err(|e| hypervisor::HypervisorError::GetVcpuMmap(e.into()))
478     }
479     ///
480     /// Gets the recommended maximum number of VCPUs per VM.
481     ///
482     fn get_max_vcpus(&self) -> hypervisor::Result<usize> {
483         Ok(self.kvm.get_max_vcpus())
484     }
485     ///
486     /// Gets the recommended number of VCPUs per VM.
487     ///
488     fn get_nr_vcpus(&self) -> hypervisor::Result<usize> {
489         Ok(self.kvm.get_nr_vcpus())
490     }
491     #[cfg(target_arch = "x86_64")]
492     ///
493     /// Checks if a particular `Cap` is available.
494     ///
495     fn check_capability(&self, c: Cap) -> bool {
496         self.kvm.check_extension(c)
497     }
498     #[cfg(target_arch = "x86_64")]
499     ///
500     /// X86 specific call to get the system supported CPUID values.
501     ///
502     fn get_cpuid(&self) -> hypervisor::Result<CpuId> {
503         self.kvm
504             .get_supported_cpuid(kvm_bindings::KVM_MAX_CPUID_ENTRIES)
505             .map_err(|e| hypervisor::HypervisorError::GetCpuId(e.into()))
506     }
507     #[cfg(target_arch = "x86_64")]
508     ///
509     /// Retrieve the list of MSRs supported by KVM.
510     ///
511     fn get_msr_list(&self) -> hypervisor::Result<MsrList> {
512         self.kvm
513             .get_msr_index_list()
514             .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into()))
515     }
516 }
517 /// Vcpu struct for KVM
518 pub struct KvmVcpu {
519     fd: VcpuFd,
520     #[cfg(target_arch = "x86_64")]
521     msrs: MsrEntries,
522     vmmops: Option<Arc<Box<dyn vm::VmmOps>>>,
523     #[cfg(target_arch = "x86_64")]
524     hyperv_synic: AtomicBool,
525 }
526 /// Implementation of Vcpu trait for KVM
527 /// Example:
528 /// #[cfg(feature = "kvm")]
529 /// extern crate hypervisor
530 /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap();
531 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm);
532 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
533 /// let vcpu = vm.create_vcpu(0, None).unwrap();
534 /// vcpu.get/set().unwrap()
535 ///
536 impl cpu::Vcpu for KvmVcpu {
537     #[cfg(target_arch = "x86_64")]
538     ///
539     /// Returns the vCPU general purpose registers.
540     ///
541     fn get_regs(&self) -> cpu::Result<StandardRegisters> {
542         self.fd
543             .get_regs()
544             .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into()))
545     }
546     #[cfg(target_arch = "x86_64")]
547     ///
548     /// Sets the vCPU general purpose registers using the `KVM_SET_REGS` ioctl.
549     ///
550     fn set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()> {
551         self.fd
552             .set_regs(regs)
553             .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into()))
554     }
555     #[cfg(target_arch = "x86_64")]
556     ///
557     /// Returns the vCPU special registers.
558     ///
559     fn get_sregs(&self) -> cpu::Result<SpecialRegisters> {
560         self.fd
561             .get_sregs()
562             .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into()))
563     }
564     #[cfg(target_arch = "x86_64")]
565     ///
566     /// Sets the vCPU special registers using the `KVM_SET_SREGS` ioctl.
567     ///
568     fn set_sregs(&self, sregs: &SpecialRegisters) -> cpu::Result<()> {
569         self.fd
570             .set_sregs(sregs)
571             .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into()))
572     }
573     #[cfg(target_arch = "x86_64")]
574     ///
575     /// Returns the floating point state (FPU) from the vCPU.
576     ///
577     fn get_fpu(&self) -> cpu::Result<FpuState> {
578         self.fd
579             .get_fpu()
580             .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into()))
581     }
582     #[cfg(target_arch = "x86_64")]
583     ///
584     /// Set the floating point state (FPU) of a vCPU using the `KVM_SET_FPU` ioct.
585     ///
586     fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> {
587         self.fd
588             .set_fpu(fpu)
589             .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into()))
590     }
591     #[cfg(target_arch = "x86_64")]
592     ///
593     /// X86 specific call to setup the CPUID registers.
594     ///
595     fn set_cpuid2(&self, cpuid: &CpuId) -> cpu::Result<()> {
596         self.fd
597             .set_cpuid2(cpuid)
598             .map_err(|e| cpu::HypervisorCpuError::SetCpuid(e.into()))
599     }
600     #[cfg(target_arch = "x86_64")]
601     ///
602     /// X86 specific call to enable HyperV SynIC
603     ///
604     fn enable_hyperv_synic(&self) -> cpu::Result<()> {
605         // Update the information about Hyper-V SynIC being enabled and
606         // emulated as it will influence later which MSRs should be saved.
607         self.hyperv_synic.store(true, Ordering::Release);
608 
609         let cap = kvm_enable_cap {
610             cap: KVM_CAP_HYPERV_SYNIC,
611             ..Default::default()
612         };
613         self.fd
614             .enable_cap(&cap)
615             .map_err(|e| cpu::HypervisorCpuError::EnableHyperVSynIC(e.into()))
616     }
617     ///
618     /// X86 specific call to retrieve the CPUID registers.
619     ///
620     #[cfg(target_arch = "x86_64")]
621     fn get_cpuid2(&self, num_entries: usize) -> cpu::Result<CpuId> {
622         self.fd
623             .get_cpuid2(num_entries)
624             .map_err(|e| cpu::HypervisorCpuError::GetCpuid(e.into()))
625     }
626     #[cfg(target_arch = "x86_64")]
627     ///
628     /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
629     ///
630     fn get_lapic(&self) -> cpu::Result<LapicState> {
631         self.fd
632             .get_lapic()
633             .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into()))
634     }
635     #[cfg(target_arch = "x86_64")]
636     ///
637     /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
638     ///
639     fn set_lapic(&self, klapic: &LapicState) -> cpu::Result<()> {
640         self.fd
641             .set_lapic(klapic)
642             .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into()))
643     }
644     #[cfg(target_arch = "x86_64")]
645     ///
646     /// Returns the model-specific registers (MSR) for this vCPU.
647     ///
648     fn get_msrs(&self, msrs: &mut MsrEntries) -> cpu::Result<usize> {
649         self.fd
650             .get_msrs(msrs)
651             .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into()))
652     }
653     #[cfg(target_arch = "x86_64")]
654     ///
655     /// Setup the model-specific registers (MSR) for this vCPU.
656     /// Returns the number of MSR entries actually written.
657     ///
658     fn set_msrs(&self, msrs: &MsrEntries) -> cpu::Result<usize> {
659         self.fd
660             .set_msrs(msrs)
661             .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into()))
662     }
663     ///
664     /// Returns the vcpu's current "multiprocessing state".
665     ///
666     fn get_mp_state(&self) -> cpu::Result<MpState> {
667         self.fd
668             .get_mp_state()
669             .map_err(|e| cpu::HypervisorCpuError::GetMpState(e.into()))
670     }
671     ///
672     /// Sets the vcpu's current "multiprocessing state".
673     ///
674     fn set_mp_state(&self, mp_state: MpState) -> cpu::Result<()> {
675         self.fd
676             .set_mp_state(mp_state)
677             .map_err(|e| cpu::HypervisorCpuError::SetMpState(e.into()))
678     }
679     #[cfg(target_arch = "x86_64")]
680     ///
681     /// X86 specific call that returns the vcpu's current "xsave struct".
682     ///
683     fn get_xsave(&self) -> cpu::Result<Xsave> {
684         self.fd
685             .get_xsave()
686             .map_err(|e| cpu::HypervisorCpuError::GetXsaveState(e.into()))
687     }
688     #[cfg(target_arch = "x86_64")]
689     ///
690     /// X86 specific call that sets the vcpu's current "xsave struct".
691     ///
692     fn set_xsave(&self, xsave: &Xsave) -> cpu::Result<()> {
693         self.fd
694             .set_xsave(xsave)
695             .map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into()))
696     }
697     #[cfg(target_arch = "x86_64")]
698     ///
699     /// X86 specific call that returns the vcpu's current "xcrs".
700     ///
701     fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> {
702         self.fd
703             .get_xcrs()
704             .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into()))
705     }
706     #[cfg(target_arch = "x86_64")]
707     ///
708     /// X86 specific call that sets the vcpu's current "xcrs".
709     ///
710     fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> {
711         self.fd
712             .set_xcrs(&xcrs)
713             .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into()))
714     }
715     ///
716     /// Triggers the running of the current virtual CPU returning an exit reason.
717     ///
718     fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> {
719         match self.fd.run() {
720             Ok(run) => match run {
721                 #[cfg(target_arch = "x86_64")]
722                 VcpuExit::IoIn(addr, data) => {
723                     if let Some(vmmops) = &self.vmmops {
724                         return vmmops
725                             .pio_read(addr.into(), data)
726                             .map(|_| cpu::VmExit::Ignore)
727                             .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
728                     }
729 
730                     Ok(cpu::VmExit::IoIn(addr, data))
731                 }
732                 #[cfg(target_arch = "x86_64")]
733                 VcpuExit::IoOut(addr, data) => {
734                     if let Some(vmmops) = &self.vmmops {
735                         return vmmops
736                             .pio_write(addr.into(), data)
737                             .map(|_| cpu::VmExit::Ignore)
738                             .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
739                     }
740 
741                     Ok(cpu::VmExit::IoOut(addr, data))
742                 }
743                 #[cfg(target_arch = "x86_64")]
744                 VcpuExit::IoapicEoi(vector) => Ok(cpu::VmExit::IoapicEoi(vector)),
745                 #[cfg(target_arch = "x86_64")]
746                 VcpuExit::Shutdown | VcpuExit::Hlt => Ok(cpu::VmExit::Reset),
747 
748                 #[cfg(target_arch = "aarch64")]
749                 VcpuExit::SystemEvent(event_type, flags) => {
750                     use kvm_bindings::{KVM_SYSTEM_EVENT_RESET, KVM_SYSTEM_EVENT_SHUTDOWN};
751                     // On Aarch64, when the VM is shutdown, run() returns
752                     // VcpuExit::SystemEvent with reason KVM_SYSTEM_EVENT_SHUTDOWN
753                     if event_type == KVM_SYSTEM_EVENT_RESET {
754                         Ok(cpu::VmExit::Reset)
755                     } else if event_type == KVM_SYSTEM_EVENT_SHUTDOWN {
756                         Ok(cpu::VmExit::Shutdown)
757                     } else {
758                         Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
759                             "Unexpected system event with type 0x{:x}, flags 0x{:x}",
760                             event_type,
761                             flags
762                         )))
763                     }
764                 }
765 
766                 VcpuExit::MmioRead(addr, data) => {
767                     if let Some(vmmops) = &self.vmmops {
768                         return vmmops
769                             .mmio_read(addr, data)
770                             .map(|_| cpu::VmExit::Ignore)
771                             .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
772                     }
773 
774                     Ok(cpu::VmExit::MmioRead(addr, data))
775                 }
776                 VcpuExit::MmioWrite(addr, data) => {
777                     if let Some(vmmops) = &self.vmmops {
778                         return vmmops
779                             .mmio_write(addr, data)
780                             .map(|_| cpu::VmExit::Ignore)
781                             .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
782                     }
783 
784                     Ok(cpu::VmExit::MmioWrite(addr, data))
785                 }
786                 VcpuExit::Hyperv => Ok(cpu::VmExit::Hyperv),
787 
788                 r => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
789                     "Unexpected exit reason on vcpu run: {:?}",
790                     r
791                 ))),
792             },
793 
794             Err(ref e) => match e.errno() {
795                 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore),
796                 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
797                     "VCPU error {:?}",
798                     e
799                 ))),
800             },
801         }
802     }
803     #[cfg(target_arch = "x86_64")]
804     ///
805     /// Returns currently pending exceptions, interrupts, and NMIs as well as related
806     /// states of the vcpu.
807     ///
808     fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> {
809         self.fd
810             .get_vcpu_events()
811             .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into()))
812     }
813     #[cfg(target_arch = "x86_64")]
814     ///
815     /// Sets pending exceptions, interrupts, and NMIs as well as related states
816     /// of the vcpu.
817     ///
818     fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> {
819         self.fd
820             .set_vcpu_events(events)
821             .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into()))
822     }
823     #[cfg(target_arch = "x86_64")]
824     ///
825     /// Let the guest know that it has been paused, which prevents from
826     /// potential soft lockups when being resumed.
827     ///
828     fn notify_guest_clock_paused(&self) -> cpu::Result<()> {
829         self.fd
830             .kvmclock_ctrl()
831             .map_err(|e| cpu::HypervisorCpuError::NotifyGuestClockPaused(e.into()))
832     }
833     #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
834     fn vcpu_init(&self, kvi: &VcpuInit) -> cpu::Result<()> {
835         self.fd
836             .vcpu_init(kvi)
837             .map_err(|e| cpu::HypervisorCpuError::VcpuInit(e.into()))
838     }
839     ///
840     /// Sets the value of one register for this vCPU.
841     ///
842     #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
843     fn set_reg(&self, reg_id: u64, data: u64) -> cpu::Result<()> {
844         self.fd
845             .set_one_reg(reg_id, data)
846             .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))
847     }
848     ///
849     /// Gets the value of one register for this vCPU.
850     ///
851     #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
852     fn get_reg(&self, reg_id: u64) -> cpu::Result<u64> {
853         self.fd
854             .get_one_reg(reg_id)
855             .map_err(|e| cpu::HypervisorCpuError::GetRegister(e.into()))
856     }
857     ///
858     /// Gets a list of the guest registers that are supported for the
859     /// KVM_GET_ONE_REG/KVM_SET_ONE_REG calls.
860     ///
861     #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
862     fn get_reg_list(&self, reg_list: &mut RegList) -> cpu::Result<()> {
863         self.fd
864             .get_reg_list(reg_list)
865             .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into()))
866     }
867     ///
868     /// Save the state of the core registers.
869     ///
870     #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
871     fn core_registers(&self, state: &mut StandardRegisters) -> cpu::Result<()> {
872         let mut off = offset__of!(user_pt_regs, regs);
873         // There are 31 user_pt_regs:
874         // https://elixir.free-electrons.com/linux/v4.14.174/source/arch/arm64/include/uapi/asm/ptrace.h#L72
875         // These actually are the general-purpose registers of the Armv8-a
876         // architecture (i.e x0-x30 if used as a 64bit register or w0-30 when used as a 32bit register).
877         for i in 0..31 {
878             state.regs.regs[i] = self
879                 .fd
880                 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off))
881                 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
882             off += std::mem::size_of::<u64>();
883         }
884 
885         // We are now entering the "Other register" section of the ARMv8-a architecture.
886         // First one, stack pointer.
887         let off = offset__of!(user_pt_regs, sp);
888         state.regs.sp = self
889             .fd
890             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off))
891             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
892 
893         // Second one, the program counter.
894         let off = offset__of!(user_pt_regs, pc);
895         state.regs.pc = self
896             .fd
897             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off))
898             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
899 
900         // Next is the processor state.
901         let off = offset__of!(user_pt_regs, pstate);
902         state.regs.pstate = self
903             .fd
904             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off))
905             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
906 
907         // The stack pointer associated with EL1
908         let off = offset__of!(kvm_regs, sp_el1);
909         state.sp_el1 = self
910             .fd
911             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off))
912             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
913 
914         // Exception Link Register for EL1, when taking an exception to EL1, this register
915         // holds the address to which to return afterwards.
916         let off = offset__of!(kvm_regs, elr_el1);
917         state.elr_el1 = self
918             .fd
919             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off))
920             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
921 
922         // Saved Program Status Registers, there are 5 of them used in the kernel.
923         let mut off = offset__of!(kvm_regs, spsr);
924         for i in 0..KVM_NR_SPSR as usize {
925             state.spsr[i] = self
926                 .fd
927                 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off))
928                 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
929             off += std::mem::size_of::<u64>();
930         }
931 
932         // Now moving on to floting point registers which are stored in the user_fpsimd_state in the kernel:
933         // https://elixir.free-electrons.com/linux/v4.9.62/source/arch/arm64/include/uapi/asm/kvm.h#L53
934         let mut off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, vregs);
935         for i in 0..32 {
936             state.fp_regs.vregs[i][0] = self
937                 .fd
938                 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U128, off))
939                 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
940             off += mem::size_of::<u128>();
941         }
942 
943         // Floating-point Status Register
944         let off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, fpsr);
945         state.fp_regs.fpsr = self
946             .fd
947             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off))
948             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?
949             as u32;
950 
951         // Floating-point Control Register
952         let off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, fpcr);
953         state.fp_regs.fpcr = self
954             .fd
955             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off))
956             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?
957             as u32;
958         Ok(())
959     }
960     ///
961     /// Restore the state of the core registers.
962     ///
963     #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
964     fn set_core_registers(&self, state: &StandardRegisters) -> cpu::Result<()> {
965         // The function follows the exact identical order from `state`. Look there
966         // for some additional info on registers.
967         let mut off = offset__of!(user_pt_regs, regs);
968         for i in 0..31 {
969             self.fd
970                 .set_one_reg(
971                     arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
972                     state.regs.regs[i],
973                 )
974                 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
975             off += std::mem::size_of::<u64>();
976         }
977 
978         let off = offset__of!(user_pt_regs, sp);
979         self.fd
980             .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.regs.sp)
981             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
982 
983         let off = offset__of!(user_pt_regs, pc);
984         self.fd
985             .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.regs.pc)
986             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
987 
988         let off = offset__of!(user_pt_regs, pstate);
989         self.fd
990             .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.regs.pstate)
991             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
992 
993         let off = offset__of!(kvm_regs, sp_el1);
994         self.fd
995             .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.sp_el1)
996             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
997 
998         let off = offset__of!(kvm_regs, elr_el1);
999         self.fd
1000             .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.elr_el1)
1001             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1002 
1003         let mut off = offset__of!(kvm_regs, spsr);
1004         for i in 0..KVM_NR_SPSR as usize {
1005             self.fd
1006                 .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.spsr[i])
1007                 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1008             off += std::mem::size_of::<u64>();
1009         }
1010 
1011         let mut off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, vregs);
1012         for i in 0..32 {
1013             self.fd
1014                 .set_one_reg(
1015                     arm64_core_reg_id!(KVM_REG_SIZE_U128, off),
1016                     state.fp_regs.vregs[i][0],
1017                 )
1018                 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1019             off += mem::size_of::<u128>();
1020         }
1021 
1022         let off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, fpsr);
1023         self.fd
1024             .set_one_reg(
1025                 arm64_core_reg_id!(KVM_REG_SIZE_U32, off),
1026                 state.fp_regs.fpsr as u64,
1027             )
1028             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1029 
1030         let off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, fpcr);
1031         self.fd
1032             .set_one_reg(
1033                 arm64_core_reg_id!(KVM_REG_SIZE_U32, off),
1034                 state.fp_regs.fpcr as u64,
1035             )
1036             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1037         Ok(())
1038     }
1039     ///
1040     /// Save the state of the system registers.
1041     ///
1042     #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
1043     fn system_registers(&self, state: &mut Vec<Register>) -> cpu::Result<()> {
1044         // Call KVM_GET_REG_LIST to get all registers available to the guest. For ArmV8 there are
1045         // around 500 registers.
1046         let mut reg_list = RegList::new(512);
1047         self.fd
1048             .get_reg_list(&mut reg_list)
1049             .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into()))?;
1050 
1051         // At this point reg_list should contain: core registers and system registers.
1052         // The register list contains the number of registers and their ids. We will be needing to
1053         // call KVM_GET_ONE_REG on each id in order to save all of them. We carve out from the list
1054         // the core registers which are represented in the kernel by kvm_regs structure and for which
1055         // we can calculate the id based on the offset in the structure.
1056 
1057         reg_list.retain(|regid| *regid != 0);
1058         reg_list.as_slice().to_vec().sort_unstable();
1059 
1060         reg_list.retain(|regid| is_system_register(*regid));
1061 
1062         // Now, for the rest of the registers left in the previously fetched register list, we are
1063         // simply calling KVM_GET_ONE_REG.
1064         let indices = reg_list.as_slice();
1065         for (_pos, index) in indices.iter().enumerate() {
1066             if _pos > 230 {
1067                 break;
1068             }
1069             state.push(kvm_bindings::kvm_one_reg {
1070                 id: *index,
1071                 addr: self
1072                     .fd
1073                     .get_one_reg(*index)
1074                     .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?,
1075             });
1076         }
1077 
1078         Ok(())
1079     }
1080     ///
1081     /// Restore the state of the system registers.
1082     ///
1083     #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
1084     fn set_system_registers(&self, state: &[Register]) -> cpu::Result<()> {
1085         for reg in state {
1086             self.fd
1087                 .set_one_reg(reg.id, reg.addr)
1088                 .map_err(|e| cpu::HypervisorCpuError::SetSysRegister(e.into()))?;
1089         }
1090         Ok(())
1091     }
1092     ///
1093     /// Read the MPIDR - Multiprocessor Affinity Register.
1094     ///
1095     #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
1096     fn read_mpidr(&self) -> cpu::Result<u64> {
1097         self.fd
1098             .get_one_reg(MPIDR_EL1)
1099             .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))
1100     }
1101     #[cfg(target_arch = "x86_64")]
1102     ///
1103     /// Get the current CPU state
1104     ///
1105     /// Ordering requirements:
1106     ///
1107     /// KVM_GET_MP_STATE calls kvm_apic_accept_events(), which might modify
1108     /// vCPU/LAPIC state. As such, it must be done before most everything
1109     /// else, otherwise we cannot restore everything and expect it to work.
1110     ///
1111     /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are
1112     /// still running.
1113     ///
1114     /// KVM_GET_LAPIC may change state of LAPIC before returning it.
1115     ///
1116     /// GET_VCPU_EVENTS should probably be last to save. The code looks as
1117     /// it might as well be affected by internal state modifications of the
1118     /// GET ioctls.
1119     ///
1120     /// SREGS saves/restores a pending interrupt, similar to what
1121     /// VCPU_EVENTS also does.
1122     ///
1123     /// GET_MSRS requires a pre-populated data structure to do something
1124     /// meaningful. For SET_MSRS it will then contain good data.
1125     ///
1126     /// # Example
1127     ///
1128     /// ```rust
1129     /// # extern crate hypervisor;
1130     /// # use hypervisor::KvmHypervisor;
1131     /// # use std::sync::Arc;
1132     /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap();
1133     /// let hv: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm);
1134     /// let vm = hv.create_vm().expect("new VM fd creation failed");
1135     /// vm.enable_split_irq().unwrap();
1136     /// let vcpu = vm.create_vcpu(0, None).unwrap();
1137     /// let state = vcpu.state().unwrap();
1138     /// ```
1139     fn state(&self) -> cpu::Result<CpuState> {
1140         let cpuid = self.get_cpuid2(kvm_bindings::KVM_MAX_CPUID_ENTRIES)?;
1141         let mp_state = self.get_mp_state()?;
1142         let regs = self.get_regs()?;
1143         let sregs = self.get_sregs()?;
1144         let xsave = self.get_xsave()?;
1145         let xcrs = self.get_xcrs()?;
1146         let lapic_state = self.get_lapic()?;
1147         let fpu = self.get_fpu()?;
1148 
1149         // Try to get all MSRs based on the list previously retrieved from KVM.
1150         // If the number of MSRs obtained from GET_MSRS is different from the
1151         // expected amount, we fallback onto a slower method by getting MSRs
1152         // by chunks. This is the only way to make sure we try to get as many
1153         // MSRs as possible, even if some MSRs are not supported.
1154         let mut msr_entries = self.msrs.clone();
1155 
1156         // Save extra MSRs if the Hyper-V synthetic interrupt controller is
1157         // emulated.
1158         if self.hyperv_synic.load(Ordering::Acquire) {
1159             let hyperv_synic_msrs = vec![
1160                 0x40000020, 0x40000021, 0x40000080, 0x40000081, 0x40000082, 0x40000083, 0x40000084,
1161                 0x40000090, 0x40000091, 0x40000092, 0x40000093, 0x40000094, 0x40000095, 0x40000096,
1162                 0x40000097, 0x40000098, 0x40000099, 0x4000009a, 0x4000009b, 0x4000009c, 0x4000009d,
1163                 0x4000009f, 0x400000b0, 0x400000b1, 0x400000b2, 0x400000b3, 0x400000b4, 0x400000b5,
1164                 0x400000b6, 0x400000b7,
1165             ];
1166             for index in hyperv_synic_msrs {
1167                 let msr = kvm_msr_entry {
1168                     index,
1169                     ..Default::default()
1170                 };
1171                 msr_entries.push(msr).unwrap();
1172             }
1173         }
1174 
1175         let expected_num_msrs = msr_entries.as_fam_struct_ref().nmsrs as usize;
1176         let num_msrs = self.get_msrs(&mut msr_entries)?;
1177         let msrs = if num_msrs != expected_num_msrs {
1178             let mut faulty_msr_index = num_msrs;
1179             let mut msr_entries_tmp =
1180                 MsrEntries::from_entries(&msr_entries.as_slice()[..faulty_msr_index]);
1181 
1182             loop {
1183                 warn!(
1184                     "Detected faulty MSR 0x{:x} while getting MSRs",
1185                     msr_entries.as_slice()[faulty_msr_index].index
1186                 );
1187 
1188                 let start_pos = faulty_msr_index + 1;
1189                 let mut sub_msr_entries =
1190                     MsrEntries::from_entries(&msr_entries.as_slice()[start_pos..]);
1191                 let expected_num_msrs = sub_msr_entries.as_fam_struct_ref().nmsrs as usize;
1192                 let num_msrs = self.get_msrs(&mut sub_msr_entries)?;
1193 
1194                 for i in 0..num_msrs {
1195                     msr_entries_tmp
1196                         .push(sub_msr_entries.as_slice()[i])
1197                         .map_err(|e| {
1198                             cpu::HypervisorCpuError::GetMsrEntries(anyhow!(
1199                                 "Failed adding MSR entries: {:?}",
1200                                 e
1201                             ))
1202                         })?;
1203                 }
1204 
1205                 if num_msrs == expected_num_msrs {
1206                     break;
1207                 }
1208 
1209                 faulty_msr_index = start_pos + num_msrs;
1210             }
1211 
1212             msr_entries_tmp
1213         } else {
1214             msr_entries
1215         };
1216 
1217         let vcpu_events = self.get_vcpu_events()?;
1218 
1219         Ok(CpuState {
1220             cpuid,
1221             msrs,
1222             vcpu_events,
1223             regs,
1224             sregs,
1225             fpu,
1226             lapic_state,
1227             xsave,
1228             xcrs,
1229             mp_state,
1230         })
1231     }
1232     ///
1233     /// Get the current AArch64 CPU state
1234     ///
1235     #[cfg(target_arch = "aarch64")]
1236     fn state(&self) -> cpu::Result<CpuState> {
1237         let mut state = CpuState {
1238             mp_state: self.get_mp_state()?,
1239             mpidr: self.read_mpidr()?,
1240             ..Default::default()
1241         };
1242         self.core_registers(&mut state.core_regs)?;
1243         self.system_registers(&mut state.sys_regs)?;
1244 
1245         Ok(state)
1246     }
1247     #[cfg(target_arch = "x86_64")]
1248     ///
1249     /// Restore the previously saved CPU state
1250     ///
1251     /// Ordering requirements:
1252     ///
1253     /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are
1254     /// still running.
1255     ///
1256     /// Some SET ioctls (like set_mp_state) depend on kvm_vcpu_is_bsp(), so
1257     /// if we ever change the BSP, we have to do that before restoring anything.
1258     /// The same seems to be true for CPUID stuff.
1259     ///
1260     /// SREGS saves/restores a pending interrupt, similar to what
1261     /// VCPU_EVENTS also does.
1262     ///
1263     /// SET_REGS clears pending exceptions unconditionally, thus, it must be
1264     /// done before SET_VCPU_EVENTS, which restores it.
1265     ///
1266     /// SET_LAPIC must come after SET_SREGS, because the latter restores
1267     /// the apic base msr.
1268     ///
1269     /// SET_LAPIC must come before SET_MSRS, because the TSC deadline MSR
1270     /// only restores successfully, when the LAPIC is correctly configured.
1271     ///
1272     /// Arguments: CpuState
1273     /// # Example
1274     ///
1275     /// ```rust
1276     /// # extern crate hypervisor;
1277     /// # use hypervisor::KvmHypervisor;
1278     /// # use std::sync::Arc;
1279     /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap();
1280     /// let hv: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm);
1281     /// let vm = hv.create_vm().expect("new VM fd creation failed");
1282     /// vm.enable_split_irq().unwrap();
1283     /// let vcpu = vm.create_vcpu(0, None).unwrap();
1284     /// let state = vcpu.state().unwrap();
1285     /// vcpu.set_state(&state).unwrap();
1286     /// ```
1287     fn set_state(&self, state: &CpuState) -> cpu::Result<()> {
1288         self.set_cpuid2(&state.cpuid)?;
1289         self.set_mp_state(state.mp_state)?;
1290         self.set_regs(&state.regs)?;
1291         self.set_sregs(&state.sregs)?;
1292         self.set_xsave(&state.xsave)?;
1293         self.set_xcrs(&state.xcrs)?;
1294         self.set_lapic(&state.lapic_state)?;
1295         self.set_fpu(&state.fpu)?;
1296 
1297         // Try to set all MSRs previously stored.
1298         // If the number of MSRs set from SET_MSRS is different from the
1299         // expected amount, we fallback onto a slower method by setting MSRs
1300         // by chunks. This is the only way to make sure we try to set as many
1301         // MSRs as possible, even if some MSRs are not supported.
1302         let expected_num_msrs = state.msrs.as_fam_struct_ref().nmsrs as usize;
1303         let num_msrs = self.set_msrs(&state.msrs)?;
1304         if num_msrs != expected_num_msrs {
1305             let mut faulty_msr_index = num_msrs;
1306 
1307             loop {
1308                 warn!(
1309                     "Detected faulty MSR 0x{:x} while setting MSRs",
1310                     state.msrs.as_slice()[faulty_msr_index].index
1311                 );
1312 
1313                 let start_pos = faulty_msr_index + 1;
1314                 let sub_msr_entries = MsrEntries::from_entries(&state.msrs.as_slice()[start_pos..]);
1315                 let expected_num_msrs = sub_msr_entries.as_fam_struct_ref().nmsrs as usize;
1316                 let num_msrs = self.set_msrs(&sub_msr_entries)?;
1317 
1318                 if num_msrs == expected_num_msrs {
1319                     break;
1320                 }
1321 
1322                 faulty_msr_index = start_pos + num_msrs;
1323             }
1324         }
1325 
1326         self.set_vcpu_events(&state.vcpu_events)?;
1327 
1328         Ok(())
1329     }
1330     ///
1331     /// Restore the previously saved AArch64 CPU state
1332     ///
1333     #[cfg(target_arch = "aarch64")]
1334     fn set_state(&self, state: &CpuState) -> cpu::Result<()> {
1335         self.set_core_registers(&state.core_regs)?;
1336         self.set_system_registers(&state.sys_regs)?;
1337         self.set_mp_state(state.mp_state)?;
1338 
1339         Ok(())
1340     }
1341 }
1342 
1343 /// Device struct for KVM
1344 pub struct KvmDevice {
1345     fd: DeviceFd,
1346 }
1347 
1348 impl device::Device for KvmDevice {
1349     ///
1350     /// Set device attribute
1351     ///
1352     fn set_device_attr(&self, attr: &DeviceAttr) -> device::Result<()> {
1353         self.fd
1354             .set_device_attr(attr)
1355             .map_err(|e| device::HypervisorDeviceError::SetDeviceAttribute(e.into()))
1356     }
1357     ///
1358     /// Get device attribute
1359     ///
1360     fn get_device_attr(&self, attr: &mut DeviceAttr) -> device::Result<()> {
1361         self.fd
1362             .get_device_attr(attr)
1363             .map_err(|e| device::HypervisorDeviceError::GetDeviceAttribute(e.into()))
1364     }
1365 }
1366 
1367 impl AsRawFd for KvmDevice {
1368     fn as_raw_fd(&self) -> RawFd {
1369         self.fd.as_raw_fd()
1370     }
1371 }
1372