xref: /cloud-hypervisor/hypervisor/src/kvm/mod.rs (revision 07a09eda274b0cee025abd82efb7c0fff7d939db)
1 // Copyright © 2019 Intel Corporation
2 //
3 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
4 //
5 // Copyright © 2020, Microsoft Corporation
6 //
7 // Copyright 2018-2019 CrowdStrike, Inc.
8 //
9 //
10 
11 #[cfg(target_arch = "aarch64")]
12 pub use crate::aarch64::{
13     check_required_kvm_extensions, is_system_register, VcpuInit, VcpuKvmState as CpuState,
14     MPIDR_EL1,
15 };
16 use crate::cpu;
17 use crate::device;
18 use crate::hypervisor;
19 use crate::vm::{self, VmmOps};
20 #[cfg(target_arch = "aarch64")]
21 use crate::{arm64_core_reg_id, offset__of};
22 use kvm_ioctls::{NoDatamatch, VcpuFd, VmFd};
23 use serde_derive::{Deserialize, Serialize};
24 use std::os::unix::io::{AsRawFd, RawFd};
25 use std::result;
26 #[cfg(target_arch = "x86_64")]
27 use std::sync::atomic::{AtomicBool, Ordering};
28 use std::sync::Arc;
29 #[cfg(target_arch = "x86_64")]
30 use vm_memory::Address;
31 use vmm_sys_util::eventfd::EventFd;
32 // x86_64 dependencies
33 #[cfg(target_arch = "x86_64")]
34 pub mod x86_64;
35 #[cfg(target_arch = "x86_64")]
36 use crate::arch::x86::NUM_IOAPIC_PINS;
37 #[cfg(target_arch = "aarch64")]
38 use aarch64::{RegList, Register, StandardRegisters};
39 #[cfg(target_arch = "x86_64")]
40 use kvm_bindings::{
41     kvm_enable_cap, kvm_msr_entry, MsrList, KVM_CAP_HYPERV_SYNIC, KVM_CAP_SPLIT_IRQCHIP,
42 };
43 #[cfg(target_arch = "x86_64")]
44 use x86_64::{
45     check_required_kvm_extensions, FpuState, SpecialRegisters, StandardRegisters, KVM_TSS_ADDRESS,
46 };
47 #[cfg(target_arch = "x86_64")]
48 pub use x86_64::{
49     CpuId, CpuIdEntry, ExtendedControlRegisters, LapicState, MsrEntries, VcpuKvmState as CpuState,
50     Xsave, CPUID_FLAG_VALID_INDEX,
51 };
52 // aarch64 dependencies
53 #[cfg(target_arch = "aarch64")]
54 pub mod aarch64;
55 pub use kvm_bindings;
56 pub use kvm_bindings::{
57     kvm_create_device, kvm_device_type_KVM_DEV_TYPE_VFIO, kvm_irq_routing, kvm_irq_routing_entry,
58     kvm_userspace_memory_region, KVM_IRQ_ROUTING_MSI, KVM_MEM_LOG_DIRTY_PAGES, KVM_MEM_READONLY,
59     KVM_MSI_VALID_DEVID,
60 };
61 #[cfg(target_arch = "aarch64")]
62 use kvm_bindings::{
63     kvm_regs, user_fpsimd_state, user_pt_regs, KVM_NR_SPSR, KVM_REG_ARM64, KVM_REG_ARM_CORE,
64     KVM_REG_SIZE_U128, KVM_REG_SIZE_U32, KVM_REG_SIZE_U64,
65 };
66 pub use kvm_ioctls;
67 pub use kvm_ioctls::{Cap, Kvm};
68 #[cfg(target_arch = "aarch64")]
69 use std::mem;
70 
71 ///
72 /// Export generically-named wrappers of kvm-bindings for Unix-based platforms
73 ///
74 pub use {
75     kvm_bindings::kvm_clock_data as ClockData, kvm_bindings::kvm_create_device as CreateDevice,
76     kvm_bindings::kvm_device_attr as DeviceAttr,
77     kvm_bindings::kvm_irq_routing_entry as IrqRoutingEntry, kvm_bindings::kvm_mp_state as MpState,
78     kvm_bindings::kvm_userspace_memory_region as MemoryRegion,
79     kvm_bindings::kvm_vcpu_events as VcpuEvents, kvm_ioctls::DeviceFd, kvm_ioctls::IoEventAddress,
80     kvm_ioctls::VcpuExit,
81 };
82 #[derive(Clone, Copy, Debug, PartialEq, Deserialize, Serialize)]
83 pub struct KvmVmState {}
84 
85 pub use KvmVmState as VmState;
86 /// Wrapper over KVM VM ioctls.
87 pub struct KvmVm {
88     fd: Arc<VmFd>,
89     #[cfg(target_arch = "x86_64")]
90     msrs: MsrEntries,
91     state: KvmVmState,
92 }
93 
94 // Returns a `Vec<T>` with a size in bytes at least as large as `size_in_bytes`.
95 fn vec_with_size_in_bytes<T: Default>(size_in_bytes: usize) -> Vec<T> {
96     let rounded_size = (size_in_bytes + size_of::<T>() - 1) / size_of::<T>();
97     let mut v = Vec::with_capacity(rounded_size);
98     v.resize_with(rounded_size, T::default);
99     v
100 }
101 
102 // The kvm API has many structs that resemble the following `Foo` structure:
103 //
104 // ```
105 // #[repr(C)]
106 // struct Foo {
107 //    some_data: u32
108 //    entries: __IncompleteArrayField<__u32>,
109 // }
110 // ```
111 //
112 // In order to allocate such a structure, `size_of::<Foo>()` would be too small because it would not
113 // include any space for `entries`. To make the allocation large enough while still being aligned
114 // for `Foo`, a `Vec<Foo>` is created. Only the first element of `Vec<Foo>` would actually be used
115 // as a `Foo`. The remaining memory in the `Vec<Foo>` is for `entries`, which must be contiguous
116 // with `Foo`. This function is used to make the `Vec<Foo>` with enough space for `count` entries.
117 use std::mem::size_of;
118 fn vec_with_array_field<T: Default, F>(count: usize) -> Vec<T> {
119     let element_space = count * size_of::<F>();
120     let vec_size_bytes = size_of::<T>() + element_space;
121     vec_with_size_in_bytes(vec_size_bytes)
122 }
123 
124 ///
125 /// Implementation of Vm trait for KVM
126 /// Example:
127 /// #[cfg(feature = "kvm")]
128 /// extern crate hypervisor
129 /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap();
130 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm);
131 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
132 /// vm.set/get().unwrap()
133 ///
134 impl vm::Vm for KvmVm {
135     #[cfg(target_arch = "x86_64")]
136     ///
137     /// Sets the address of the three-page region in the VM's address space.
138     ///
139     fn set_tss_address(&self, offset: usize) -> vm::Result<()> {
140         self.fd
141             .set_tss_address(offset)
142             .map_err(|e| vm::HypervisorVmError::SetTssAddress(e.into()))
143     }
144     ///
145     /// Creates an in-kernel interrupt controller.
146     ///
147     fn create_irq_chip(&self) -> vm::Result<()> {
148         self.fd
149             .create_irq_chip()
150             .map_err(|e| vm::HypervisorVmError::CreateIrq(e.into()))
151     }
152     ///
153     /// Registers an event that will, when signaled, trigger the `gsi` IRQ.
154     ///
155     fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> {
156         self.fd
157             .register_irqfd(fd, gsi)
158             .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into()))
159     }
160     ///
161     /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ.
162     ///
163     fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> {
164         self.fd
165             .unregister_irqfd(fd, gsi)
166             .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into()))
167     }
168     ///
169     /// Creates a VcpuFd object from a vcpu RawFd.
170     ///
171     fn create_vcpu(
172         &self,
173         id: u8,
174         vmmops: Option<Arc<Box<dyn VmmOps>>>,
175     ) -> vm::Result<Arc<dyn cpu::Vcpu>> {
176         let vc = self
177             .fd
178             .create_vcpu(id as u64)
179             .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?;
180         let vcpu = KvmVcpu {
181             fd: vc,
182             #[cfg(target_arch = "x86_64")]
183             msrs: self.msrs.clone(),
184             vmmops,
185             #[cfg(target_arch = "x86_64")]
186             hyperv_synic: AtomicBool::new(false),
187         };
188         Ok(Arc::new(vcpu))
189     }
190     ///
191     /// Registers an event to be signaled whenever a certain address is written to.
192     ///
193     fn register_ioevent(
194         &self,
195         fd: &EventFd,
196         addr: &IoEventAddress,
197         datamatch: Option<vm::DataMatch>,
198     ) -> vm::Result<()> {
199         if let Some(dm) = datamatch {
200             match dm {
201                 vm::DataMatch::DataMatch32(kvm_dm32) => self
202                     .fd
203                     .register_ioevent(fd, addr, kvm_dm32)
204                     .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())),
205                 vm::DataMatch::DataMatch64(kvm_dm64) => self
206                     .fd
207                     .register_ioevent(fd, addr, kvm_dm64)
208                     .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())),
209             }
210         } else {
211             self.fd
212                 .register_ioevent(fd, addr, NoDatamatch)
213                 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into()))
214         }
215     }
216     ///
217     /// Unregisters an event from a certain address it has been previously registered to.
218     ///
219     fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> {
220         self.fd
221             .unregister_ioevent(fd, addr, NoDatamatch)
222             .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into()))
223     }
224     ///
225     /// Sets the GSI routing table entries, overwriting any previously set
226     /// entries, as per the `KVM_SET_GSI_ROUTING` ioctl.
227     ///
228     fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> {
229         let mut irq_routing =
230             vec_with_array_field::<kvm_irq_routing, kvm_irq_routing_entry>(entries.len());
231         irq_routing[0].nr = entries.len() as u32;
232         irq_routing[0].flags = 0;
233 
234         unsafe {
235             let entries_slice: &mut [kvm_irq_routing_entry] =
236                 irq_routing[0].entries.as_mut_slice(entries.len());
237             entries_slice.copy_from_slice(&entries);
238         }
239 
240         self.fd
241             .set_gsi_routing(&irq_routing[0])
242             .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into()))
243     }
244     ///
245     /// Creates a memory region structure that can be used with set_user_memory_region
246     ///
247     fn make_user_memory_region(
248         &self,
249         slot: u32,
250         guest_phys_addr: u64,
251         memory_size: u64,
252         userspace_addr: u64,
253         readonly: bool,
254         log_dirty_pages: bool,
255     ) -> MemoryRegion {
256         MemoryRegion {
257             slot,
258             guest_phys_addr,
259             memory_size,
260             userspace_addr,
261             flags: if readonly { KVM_MEM_READONLY } else { 0 }
262                 | if log_dirty_pages {
263                     KVM_MEM_LOG_DIRTY_PAGES
264                 } else {
265                     0
266                 },
267         }
268     }
269     ///
270     /// Creates/modifies a guest physical memory slot.
271     ///
272     fn set_user_memory_region(&self, user_memory_region: MemoryRegion) -> vm::Result<()> {
273         // Safe because guest regions are guaranteed not to overlap.
274         unsafe {
275             self.fd
276                 .set_user_memory_region(user_memory_region)
277                 .map_err(|e| vm::HypervisorVmError::SetUserMemory(e.into()))
278         }
279     }
280     ///
281     /// Creates an emulated device in the kernel.
282     ///
283     /// See the documentation for `KVM_CREATE_DEVICE`.
284     fn create_device(&self, device: &mut CreateDevice) -> vm::Result<Arc<dyn device::Device>> {
285         let fd = self
286             .fd
287             .create_device(device)
288             .map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?;
289         let device = KvmDevice { fd };
290         Ok(Arc::new(device))
291     }
292     ///
293     /// Returns the preferred CPU target type which can be emulated by KVM on underlying host.
294     ///
295     #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
296     fn get_preferred_target(&self, kvi: &mut VcpuInit) -> vm::Result<()> {
297         self.fd
298             .get_preferred_target(kvi)
299             .map_err(|e| vm::HypervisorVmError::GetPreferredTarget(e.into()))
300     }
301     #[cfg(target_arch = "x86_64")]
302     fn enable_split_irq(&self) -> vm::Result<()> {
303         // Set TSS
304         self.fd
305             .set_tss_address(KVM_TSS_ADDRESS.raw_value() as usize)
306             .map_err(|e| vm::HypervisorVmError::EnableSplitIrq(e.into()))?;
307         // Create split irqchip
308         // Only the local APIC is emulated in kernel, both PICs and IOAPIC
309         // are not.
310         let mut cap = kvm_enable_cap {
311             cap: KVM_CAP_SPLIT_IRQCHIP,
312             ..Default::default()
313         };
314         cap.args[0] = NUM_IOAPIC_PINS as u64;
315         self.fd
316             .enable_cap(&cap)
317             .map_err(|e| vm::HypervisorVmError::EnableSplitIrq(e.into()))?;
318         Ok(())
319     }
320     /// Retrieve guest clock.
321     #[cfg(target_arch = "x86_64")]
322     fn get_clock(&self) -> vm::Result<ClockData> {
323         self.fd
324             .get_clock()
325             .map_err(|e| vm::HypervisorVmError::GetClock(e.into()))
326     }
327     /// Set guest clock.
328     #[cfg(target_arch = "x86_64")]
329     fn set_clock(&self, data: &ClockData) -> vm::Result<()> {
330         self.fd
331             .set_clock(data)
332             .map_err(|e| vm::HypervisorVmError::SetClock(e.into()))
333     }
334     /// Checks if a particular `Cap` is available.
335     fn check_extension(&self, c: Cap) -> bool {
336         self.fd.check_extension(c)
337     }
338     /// Create a device that is used for passthrough
339     fn create_passthrough_device(&self) -> vm::Result<Arc<dyn device::Device>> {
340         let mut vfio_dev = kvm_create_device {
341             type_: kvm_device_type_KVM_DEV_TYPE_VFIO,
342             fd: 0,
343             flags: 0,
344         };
345 
346         self.create_device(&mut vfio_dev)
347             .map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into()))
348     }
349     ///
350     /// Get the Vm state. Return VM specific data
351     ///
352     fn state(&self) -> vm::Result<VmState> {
353         Ok(self.state)
354     }
355     ///
356     /// Set the VM state
357     ///
358     fn set_state(&self, _state: VmState) -> vm::Result<()> {
359         Ok(())
360     }
361 
362     ///
363     /// Get dirty pages bitmap (one bit per page)
364     ///
365     fn get_dirty_log(&self, slot: u32, memory_size: u64) -> vm::Result<Vec<u64>> {
366         self.fd
367             .get_dirty_log(slot, memory_size as usize)
368             .map_err(|e| vm::HypervisorVmError::GetDirtyLog(e.into()))
369     }
370 }
371 /// Wrapper over KVM system ioctls.
372 pub struct KvmHypervisor {
373     kvm: Kvm,
374 }
375 /// Enum for KVM related error
376 #[derive(Debug)]
377 pub enum KvmError {
378     CapabilityMissing(Cap),
379 }
380 pub type KvmResult<T> = result::Result<T, KvmError>;
381 impl KvmHypervisor {
382     /// Create a hypervisor based on Kvm
383     pub fn new() -> hypervisor::Result<KvmHypervisor> {
384         let kvm_obj = Kvm::new().map_err(|e| hypervisor::HypervisorError::VmCreate(e.into()))?;
385         let api_version = kvm_obj.get_api_version();
386 
387         if api_version != kvm_bindings::KVM_API_VERSION as i32 {
388             return Err(hypervisor::HypervisorError::IncompatibleApiVersion);
389         }
390 
391         Ok(KvmHypervisor { kvm: kvm_obj })
392     }
393 }
394 /// Implementation of Hypervisor trait for KVM
395 /// Example:
396 /// #[cfg(feature = "kvm")]
397 /// extern crate hypervisor
398 /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap();
399 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm);
400 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
401 ///
402 impl hypervisor::Hypervisor for KvmHypervisor {
403     /// Create a KVM vm object and return the object as Vm trait object
404     /// Example
405     /// # extern crate hypervisor;
406     /// # use hypervisor::KvmHypervisor;
407     /// use hypervisor::KvmVm;
408     /// let hypervisor = KvmHypervisor::new().unwrap();
409     /// let vm = hypervisor.create_vm().unwrap()
410     ///
411     fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> {
412         let fd: VmFd;
413         loop {
414             match self.kvm.create_vm() {
415                 Ok(res) => fd = res,
416                 Err(e) => {
417                     if e.errno() == libc::EINTR {
418                         // If the error returned is EINTR, which means the
419                         // ioctl has been interrupted, we have to retry as
420                         // this can't be considered as a regular error.
421                         continue;
422                     } else {
423                         return Err(hypervisor::HypervisorError::VmCreate(e.into()));
424                     }
425                 }
426             }
427             break;
428         }
429 
430         let vm_fd = Arc::new(fd);
431 
432         #[cfg(target_arch = "x86_64")]
433         {
434             let msr_list = self.get_msr_list()?;
435             let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize;
436             let mut msrs = MsrEntries::new(num_msrs);
437             let indices = msr_list.as_slice();
438             let msr_entries = msrs.as_mut_slice();
439             for (pos, index) in indices.iter().enumerate() {
440                 msr_entries[pos].index = *index;
441             }
442 
443             Ok(Arc::new(KvmVm {
444                 fd: vm_fd,
445                 msrs,
446                 state: VmState {},
447             }))
448         }
449 
450         #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
451         {
452             Ok(Arc::new(KvmVm {
453                 fd: vm_fd,
454                 state: VmState {},
455             }))
456         }
457     }
458 
459     fn check_required_extensions(&self) -> hypervisor::Result<()> {
460         check_required_kvm_extensions(&self.kvm).expect("Missing KVM capabilities");
461         Ok(())
462     }
463 
464     ///
465     ///  Returns the size of the memory mapping required to use the vcpu's `kvm_run` structure.
466     ///
467     fn get_vcpu_mmap_size(&self) -> hypervisor::Result<usize> {
468         self.kvm
469             .get_vcpu_mmap_size()
470             .map_err(|e| hypervisor::HypervisorError::GetVcpuMmap(e.into()))
471     }
472     ///
473     /// Gets the recommended maximum number of VCPUs per VM.
474     ///
475     fn get_max_vcpus(&self) -> hypervisor::Result<usize> {
476         Ok(self.kvm.get_max_vcpus())
477     }
478     ///
479     /// Gets the recommended number of VCPUs per VM.
480     ///
481     fn get_nr_vcpus(&self) -> hypervisor::Result<usize> {
482         Ok(self.kvm.get_nr_vcpus())
483     }
484     #[cfg(target_arch = "x86_64")]
485     ///
486     /// Checks if a particular `Cap` is available.
487     ///
488     fn check_capability(&self, c: Cap) -> bool {
489         self.kvm.check_extension(c)
490     }
491     #[cfg(target_arch = "x86_64")]
492     ///
493     /// X86 specific call to get the system supported CPUID values.
494     ///
495     fn get_cpuid(&self) -> hypervisor::Result<CpuId> {
496         self.kvm
497             .get_supported_cpuid(kvm_bindings::KVM_MAX_CPUID_ENTRIES)
498             .map_err(|e| hypervisor::HypervisorError::GetCpuId(e.into()))
499     }
500     #[cfg(target_arch = "x86_64")]
501     ///
502     /// Retrieve the list of MSRs supported by KVM.
503     ///
504     fn get_msr_list(&self) -> hypervisor::Result<MsrList> {
505         self.kvm
506             .get_msr_index_list()
507             .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into()))
508     }
509 }
510 /// Vcpu struct for KVM
511 pub struct KvmVcpu {
512     fd: VcpuFd,
513     #[cfg(target_arch = "x86_64")]
514     msrs: MsrEntries,
515     vmmops: Option<Arc<Box<dyn vm::VmmOps>>>,
516     #[cfg(target_arch = "x86_64")]
517     hyperv_synic: AtomicBool,
518 }
519 /// Implementation of Vcpu trait for KVM
520 /// Example:
521 /// #[cfg(feature = "kvm")]
522 /// extern crate hypervisor
523 /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap();
524 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm);
525 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
526 /// let vcpu = vm.create_vcpu(0, None).unwrap();
527 /// vcpu.get/set().unwrap()
528 ///
529 impl cpu::Vcpu for KvmVcpu {
530     #[cfg(target_arch = "x86_64")]
531     ///
532     /// Returns the vCPU general purpose registers.
533     ///
534     fn get_regs(&self) -> cpu::Result<StandardRegisters> {
535         self.fd
536             .get_regs()
537             .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into()))
538     }
539     #[cfg(target_arch = "x86_64")]
540     ///
541     /// Sets the vCPU general purpose registers using the `KVM_SET_REGS` ioctl.
542     ///
543     fn set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()> {
544         self.fd
545             .set_regs(regs)
546             .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into()))
547     }
548     #[cfg(target_arch = "x86_64")]
549     ///
550     /// Returns the vCPU special registers.
551     ///
552     fn get_sregs(&self) -> cpu::Result<SpecialRegisters> {
553         self.fd
554             .get_sregs()
555             .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into()))
556     }
557     #[cfg(target_arch = "x86_64")]
558     ///
559     /// Sets the vCPU special registers using the `KVM_SET_SREGS` ioctl.
560     ///
561     fn set_sregs(&self, sregs: &SpecialRegisters) -> cpu::Result<()> {
562         self.fd
563             .set_sregs(sregs)
564             .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into()))
565     }
566     #[cfg(target_arch = "x86_64")]
567     ///
568     /// Returns the floating point state (FPU) from the vCPU.
569     ///
570     fn get_fpu(&self) -> cpu::Result<FpuState> {
571         self.fd
572             .get_fpu()
573             .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into()))
574     }
575     #[cfg(target_arch = "x86_64")]
576     ///
577     /// Set the floating point state (FPU) of a vCPU using the `KVM_SET_FPU` ioct.
578     ///
579     fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> {
580         self.fd
581             .set_fpu(fpu)
582             .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into()))
583     }
584     #[cfg(target_arch = "x86_64")]
585     ///
586     /// X86 specific call to setup the CPUID registers.
587     ///
588     fn set_cpuid2(&self, cpuid: &CpuId) -> cpu::Result<()> {
589         self.fd
590             .set_cpuid2(cpuid)
591             .map_err(|e| cpu::HypervisorCpuError::SetCpuid(e.into()))
592     }
593     #[cfg(target_arch = "x86_64")]
594     ///
595     /// X86 specific call to enable HyperV SynIC
596     ///
597     fn enable_hyperv_synic(&self) -> cpu::Result<()> {
598         // Update the information about Hyper-V SynIC being enabled and
599         // emulated as it will influence later which MSRs should be saved.
600         self.hyperv_synic.store(true, Ordering::Release);
601 
602         let cap = kvm_enable_cap {
603             cap: KVM_CAP_HYPERV_SYNIC,
604             ..Default::default()
605         };
606         self.fd
607             .enable_cap(&cap)
608             .map_err(|e| cpu::HypervisorCpuError::EnableHyperVSynIC(e.into()))
609     }
610     ///
611     /// X86 specific call to retrieve the CPUID registers.
612     ///
613     #[cfg(target_arch = "x86_64")]
614     fn get_cpuid2(&self, num_entries: usize) -> cpu::Result<CpuId> {
615         self.fd
616             .get_cpuid2(num_entries)
617             .map_err(|e| cpu::HypervisorCpuError::GetCpuid(e.into()))
618     }
619     #[cfg(target_arch = "x86_64")]
620     ///
621     /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
622     ///
623     fn get_lapic(&self) -> cpu::Result<LapicState> {
624         self.fd
625             .get_lapic()
626             .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into()))
627     }
628     #[cfg(target_arch = "x86_64")]
629     ///
630     /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
631     ///
632     fn set_lapic(&self, klapic: &LapicState) -> cpu::Result<()> {
633         self.fd
634             .set_lapic(klapic)
635             .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into()))
636     }
637     #[cfg(target_arch = "x86_64")]
638     ///
639     /// Returns the model-specific registers (MSR) for this vCPU.
640     ///
641     fn get_msrs(&self, msrs: &mut MsrEntries) -> cpu::Result<usize> {
642         self.fd
643             .get_msrs(msrs)
644             .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into()))
645     }
646     #[cfg(target_arch = "x86_64")]
647     ///
648     /// Setup the model-specific registers (MSR) for this vCPU.
649     /// Returns the number of MSR entries actually written.
650     ///
651     fn set_msrs(&self, msrs: &MsrEntries) -> cpu::Result<usize> {
652         self.fd
653             .set_msrs(msrs)
654             .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into()))
655     }
656     ///
657     /// Returns the vcpu's current "multiprocessing state".
658     ///
659     fn get_mp_state(&self) -> cpu::Result<MpState> {
660         self.fd
661             .get_mp_state()
662             .map_err(|e| cpu::HypervisorCpuError::GetMpState(e.into()))
663     }
664     ///
665     /// Sets the vcpu's current "multiprocessing state".
666     ///
667     fn set_mp_state(&self, mp_state: MpState) -> cpu::Result<()> {
668         self.fd
669             .set_mp_state(mp_state)
670             .map_err(|e| cpu::HypervisorCpuError::SetMpState(e.into()))
671     }
672     #[cfg(target_arch = "x86_64")]
673     ///
674     /// X86 specific call that returns the vcpu's current "xsave struct".
675     ///
676     fn get_xsave(&self) -> cpu::Result<Xsave> {
677         self.fd
678             .get_xsave()
679             .map_err(|e| cpu::HypervisorCpuError::GetXsaveState(e.into()))
680     }
681     #[cfg(target_arch = "x86_64")]
682     ///
683     /// X86 specific call that sets the vcpu's current "xsave struct".
684     ///
685     fn set_xsave(&self, xsave: &Xsave) -> cpu::Result<()> {
686         self.fd
687             .set_xsave(xsave)
688             .map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into()))
689     }
690     #[cfg(target_arch = "x86_64")]
691     ///
692     /// X86 specific call that returns the vcpu's current "xcrs".
693     ///
694     fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> {
695         self.fd
696             .get_xcrs()
697             .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into()))
698     }
699     #[cfg(target_arch = "x86_64")]
700     ///
701     /// X86 specific call that sets the vcpu's current "xcrs".
702     ///
703     fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> {
704         self.fd
705             .set_xcrs(&xcrs)
706             .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into()))
707     }
708     ///
709     /// Triggers the running of the current virtual CPU returning an exit reason.
710     ///
711     fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> {
712         match self.fd.run() {
713             Ok(run) => match run {
714                 #[cfg(target_arch = "x86_64")]
715                 VcpuExit::IoIn(addr, data) => {
716                     if let Some(vmmops) = &self.vmmops {
717                         return vmmops
718                             .pio_read(addr.into(), data)
719                             .map(|_| cpu::VmExit::Ignore)
720                             .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
721                     }
722 
723                     Ok(cpu::VmExit::IoIn(addr, data))
724                 }
725                 #[cfg(target_arch = "x86_64")]
726                 VcpuExit::IoOut(addr, data) => {
727                     if let Some(vmmops) = &self.vmmops {
728                         return vmmops
729                             .pio_write(addr.into(), data)
730                             .map(|_| cpu::VmExit::Ignore)
731                             .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
732                     }
733 
734                     Ok(cpu::VmExit::IoOut(addr, data))
735                 }
736                 #[cfg(target_arch = "x86_64")]
737                 VcpuExit::IoapicEoi(vector) => Ok(cpu::VmExit::IoapicEoi(vector)),
738                 #[cfg(target_arch = "x86_64")]
739                 VcpuExit::Shutdown | VcpuExit::Hlt => Ok(cpu::VmExit::Reset),
740 
741                 #[cfg(target_arch = "aarch64")]
742                 VcpuExit::SystemEvent(event_type, flags) => {
743                     use kvm_bindings::{KVM_SYSTEM_EVENT_RESET, KVM_SYSTEM_EVENT_SHUTDOWN};
744                     // On Aarch64, when the VM is shutdown, run() returns
745                     // VcpuExit::SystemEvent with reason KVM_SYSTEM_EVENT_SHUTDOWN
746                     if event_type == KVM_SYSTEM_EVENT_RESET {
747                         Ok(cpu::VmExit::Reset)
748                     } else if event_type == KVM_SYSTEM_EVENT_SHUTDOWN {
749                         Ok(cpu::VmExit::Shutdown)
750                     } else {
751                         Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
752                             "Unexpected system event with type 0x{:x}, flags 0x{:x}",
753                             event_type,
754                             flags
755                         )))
756                     }
757                 }
758 
759                 VcpuExit::MmioRead(addr, data) => {
760                     if let Some(vmmops) = &self.vmmops {
761                         return vmmops
762                             .mmio_read(addr, data)
763                             .map(|_| cpu::VmExit::Ignore)
764                             .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
765                     }
766 
767                     Ok(cpu::VmExit::MmioRead(addr, data))
768                 }
769                 VcpuExit::MmioWrite(addr, data) => {
770                     if let Some(vmmops) = &self.vmmops {
771                         return vmmops
772                             .mmio_write(addr, data)
773                             .map(|_| cpu::VmExit::Ignore)
774                             .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
775                     }
776 
777                     Ok(cpu::VmExit::MmioWrite(addr, data))
778                 }
779                 VcpuExit::Hyperv => Ok(cpu::VmExit::Hyperv),
780 
781                 r => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
782                     "Unexpected exit reason on vcpu run: {:?}",
783                     r
784                 ))),
785             },
786 
787             Err(ref e) => match e.errno() {
788                 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore),
789                 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
790                     "VCPU error {:?}",
791                     e
792                 ))),
793             },
794         }
795     }
796     #[cfg(target_arch = "x86_64")]
797     ///
798     /// Returns currently pending exceptions, interrupts, and NMIs as well as related
799     /// states of the vcpu.
800     ///
801     fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> {
802         self.fd
803             .get_vcpu_events()
804             .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into()))
805     }
806     #[cfg(target_arch = "x86_64")]
807     ///
808     /// Sets pending exceptions, interrupts, and NMIs as well as related states
809     /// of the vcpu.
810     ///
811     fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> {
812         self.fd
813             .set_vcpu_events(events)
814             .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into()))
815     }
816     #[cfg(target_arch = "x86_64")]
817     ///
818     /// Let the guest know that it has been paused, which prevents from
819     /// potential soft lockups when being resumed.
820     ///
821     fn notify_guest_clock_paused(&self) -> cpu::Result<()> {
822         self.fd
823             .kvmclock_ctrl()
824             .map_err(|e| cpu::HypervisorCpuError::NotifyGuestClockPaused(e.into()))
825     }
826     #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
827     fn vcpu_init(&self, kvi: &VcpuInit) -> cpu::Result<()> {
828         self.fd
829             .vcpu_init(kvi)
830             .map_err(|e| cpu::HypervisorCpuError::VcpuInit(e.into()))
831     }
832     ///
833     /// Sets the value of one register for this vCPU.
834     ///
835     #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
836     fn set_reg(&self, reg_id: u64, data: u64) -> cpu::Result<()> {
837         self.fd
838             .set_one_reg(reg_id, data)
839             .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))
840     }
841     ///
842     /// Gets the value of one register for this vCPU.
843     ///
844     #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
845     fn get_reg(&self, reg_id: u64) -> cpu::Result<u64> {
846         self.fd
847             .get_one_reg(reg_id)
848             .map_err(|e| cpu::HypervisorCpuError::GetRegister(e.into()))
849     }
850     ///
851     /// Gets a list of the guest registers that are supported for the
852     /// KVM_GET_ONE_REG/KVM_SET_ONE_REG calls.
853     ///
854     #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
855     fn get_reg_list(&self, reg_list: &mut RegList) -> cpu::Result<()> {
856         self.fd
857             .get_reg_list(reg_list)
858             .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into()))
859     }
860     ///
861     /// Save the state of the core registers.
862     ///
863     #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
864     fn core_registers(&self, state: &mut StandardRegisters) -> cpu::Result<()> {
865         let mut off = offset__of!(user_pt_regs, regs);
866         // There are 31 user_pt_regs:
867         // https://elixir.free-electrons.com/linux/v4.14.174/source/arch/arm64/include/uapi/asm/ptrace.h#L72
868         // These actually are the general-purpose registers of the Armv8-a
869         // architecture (i.e x0-x30 if used as a 64bit register or w0-30 when used as a 32bit register).
870         for i in 0..31 {
871             state.regs.regs[i] = self
872                 .fd
873                 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off))
874                 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
875             off += std::mem::size_of::<u64>();
876         }
877 
878         // We are now entering the "Other register" section of the ARMv8-a architecture.
879         // First one, stack pointer.
880         let off = offset__of!(user_pt_regs, sp);
881         state.regs.sp = self
882             .fd
883             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off))
884             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
885 
886         // Second one, the program counter.
887         let off = offset__of!(user_pt_regs, pc);
888         state.regs.pc = self
889             .fd
890             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off))
891             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
892 
893         // Next is the processor state.
894         let off = offset__of!(user_pt_regs, pstate);
895         state.regs.pstate = self
896             .fd
897             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off))
898             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
899 
900         // The stack pointer associated with EL1
901         let off = offset__of!(kvm_regs, sp_el1);
902         state.sp_el1 = self
903             .fd
904             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off))
905             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
906 
907         // Exception Link Register for EL1, when taking an exception to EL1, this register
908         // holds the address to which to return afterwards.
909         let off = offset__of!(kvm_regs, elr_el1);
910         state.elr_el1 = self
911             .fd
912             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off))
913             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
914 
915         // Saved Program Status Registers, there are 5 of them used in the kernel.
916         let mut off = offset__of!(kvm_regs, spsr);
917         for i in 0..KVM_NR_SPSR as usize {
918             state.spsr[i] = self
919                 .fd
920                 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off))
921                 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
922             off += std::mem::size_of::<u64>();
923         }
924 
925         // Now moving on to floting point registers which are stored in the user_fpsimd_state in the kernel:
926         // https://elixir.free-electrons.com/linux/v4.9.62/source/arch/arm64/include/uapi/asm/kvm.h#L53
927         let mut off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, vregs);
928         for i in 0..32 {
929             state.fp_regs.vregs[i][0] = self
930                 .fd
931                 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U128, off))
932                 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
933             off += mem::size_of::<u128>();
934         }
935 
936         // Floating-point Status Register
937         let off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, fpsr);
938         state.fp_regs.fpsr = self
939             .fd
940             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off))
941             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?
942             as u32;
943 
944         // Floating-point Control Register
945         let off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, fpcr);
946         state.fp_regs.fpcr = self
947             .fd
948             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off))
949             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?
950             as u32;
951         Ok(())
952     }
953     ///
954     /// Restore the state of the core registers.
955     ///
956     #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
957     fn set_core_registers(&self, state: &StandardRegisters) -> cpu::Result<()> {
958         // The function follows the exact identical order from `state`. Look there
959         // for some additional info on registers.
960         let mut off = offset__of!(user_pt_regs, regs);
961         for i in 0..31 {
962             self.fd
963                 .set_one_reg(
964                     arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
965                     state.regs.regs[i],
966                 )
967                 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
968             off += std::mem::size_of::<u64>();
969         }
970 
971         let off = offset__of!(user_pt_regs, sp);
972         self.fd
973             .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.regs.sp)
974             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
975 
976         let off = offset__of!(user_pt_regs, pc);
977         self.fd
978             .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.regs.pc)
979             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
980 
981         let off = offset__of!(user_pt_regs, pstate);
982         self.fd
983             .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.regs.pstate)
984             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
985 
986         let off = offset__of!(kvm_regs, sp_el1);
987         self.fd
988             .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.sp_el1)
989             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
990 
991         let off = offset__of!(kvm_regs, elr_el1);
992         self.fd
993             .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.elr_el1)
994             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
995 
996         let mut off = offset__of!(kvm_regs, spsr);
997         for i in 0..KVM_NR_SPSR as usize {
998             self.fd
999                 .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.spsr[i])
1000                 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1001             off += std::mem::size_of::<u64>();
1002         }
1003 
1004         let mut off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, vregs);
1005         for i in 0..32 {
1006             self.fd
1007                 .set_one_reg(
1008                     arm64_core_reg_id!(KVM_REG_SIZE_U128, off),
1009                     state.fp_regs.vregs[i][0],
1010                 )
1011                 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1012             off += mem::size_of::<u128>();
1013         }
1014 
1015         let off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, fpsr);
1016         self.fd
1017             .set_one_reg(
1018                 arm64_core_reg_id!(KVM_REG_SIZE_U32, off),
1019                 state.fp_regs.fpsr as u64,
1020             )
1021             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1022 
1023         let off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, fpcr);
1024         self.fd
1025             .set_one_reg(
1026                 arm64_core_reg_id!(KVM_REG_SIZE_U32, off),
1027                 state.fp_regs.fpcr as u64,
1028             )
1029             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1030         Ok(())
1031     }
1032     ///
1033     /// Save the state of the system registers.
1034     ///
1035     #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
1036     fn system_registers(&self, state: &mut Vec<Register>) -> cpu::Result<()> {
1037         // Call KVM_GET_REG_LIST to get all registers available to the guest. For ArmV8 there are
1038         // around 500 registers.
1039         let mut reg_list = RegList::new(512);
1040         self.fd
1041             .get_reg_list(&mut reg_list)
1042             .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into()))?;
1043 
1044         // At this point reg_list should contain: core registers and system registers.
1045         // The register list contains the number of registers and their ids. We will be needing to
1046         // call KVM_GET_ONE_REG on each id in order to save all of them. We carve out from the list
1047         // the core registers which are represented in the kernel by kvm_regs structure and for which
1048         // we can calculate the id based on the offset in the structure.
1049 
1050         reg_list.retain(|regid| *regid != 0);
1051         reg_list.as_slice().to_vec().sort_unstable();
1052 
1053         reg_list.retain(|regid| is_system_register(*regid));
1054 
1055         // Now, for the rest of the registers left in the previously fetched register list, we are
1056         // simply calling KVM_GET_ONE_REG.
1057         let indices = reg_list.as_slice();
1058         for (_pos, index) in indices.iter().enumerate() {
1059             if _pos > 230 {
1060                 break;
1061             }
1062             state.push(kvm_bindings::kvm_one_reg {
1063                 id: *index,
1064                 addr: self
1065                     .fd
1066                     .get_one_reg(*index)
1067                     .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?,
1068             });
1069         }
1070 
1071         Ok(())
1072     }
1073     ///
1074     /// Restore the state of the system registers.
1075     ///
1076     #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
1077     fn set_system_registers(&self, state: &[Register]) -> cpu::Result<()> {
1078         for reg in state {
1079             self.fd
1080                 .set_one_reg(reg.id, reg.addr)
1081                 .map_err(|e| cpu::HypervisorCpuError::SetSysRegister(e.into()))?;
1082         }
1083         Ok(())
1084     }
1085     ///
1086     /// Read the MPIDR - Multiprocessor Affinity Register.
1087     ///
1088     #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
1089     fn read_mpidr(&self) -> cpu::Result<u64> {
1090         self.fd
1091             .get_one_reg(MPIDR_EL1)
1092             .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))
1093     }
1094     #[cfg(target_arch = "x86_64")]
1095     ///
1096     /// Get the current CPU state
1097     ///
1098     /// Ordering requirements:
1099     ///
1100     /// KVM_GET_MP_STATE calls kvm_apic_accept_events(), which might modify
1101     /// vCPU/LAPIC state. As such, it must be done before most everything
1102     /// else, otherwise we cannot restore everything and expect it to work.
1103     ///
1104     /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are
1105     /// still running.
1106     ///
1107     /// KVM_GET_LAPIC may change state of LAPIC before returning it.
1108     ///
1109     /// GET_VCPU_EVENTS should probably be last to save. The code looks as
1110     /// it might as well be affected by internal state modifications of the
1111     /// GET ioctls.
1112     ///
1113     /// SREGS saves/restores a pending interrupt, similar to what
1114     /// VCPU_EVENTS also does.
1115     ///
1116     /// GET_MSRS requires a pre-populated data structure to do something
1117     /// meaningful. For SET_MSRS it will then contain good data.
1118     ///
1119     /// # Example
1120     ///
1121     /// ```rust
1122     /// # extern crate hypervisor;
1123     /// # use hypervisor::KvmHypervisor;
1124     /// # use std::sync::Arc;
1125     /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap();
1126     /// let hv: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm);
1127     /// let vm = hv.create_vm().expect("new VM fd creation failed");
1128     /// vm.enable_split_irq().unwrap();
1129     /// let vcpu = vm.create_vcpu(0, None).unwrap();
1130     /// let state = vcpu.state().unwrap();
1131     /// ```
1132     fn state(&self) -> cpu::Result<CpuState> {
1133         let cpuid = self.get_cpuid2(kvm_bindings::KVM_MAX_CPUID_ENTRIES)?;
1134         let mp_state = self.get_mp_state()?;
1135         let regs = self.get_regs()?;
1136         let sregs = self.get_sregs()?;
1137         let xsave = self.get_xsave()?;
1138         let xcrs = self.get_xcrs()?;
1139         let lapic_state = self.get_lapic()?;
1140         let fpu = self.get_fpu()?;
1141 
1142         // Try to get all MSRs based on the list previously retrieved from KVM.
1143         // If the number of MSRs obtained from GET_MSRS is different from the
1144         // expected amount, we fallback onto a slower method by getting MSRs
1145         // by chunks. This is the only way to make sure we try to get as many
1146         // MSRs as possible, even if some MSRs are not supported.
1147         let mut msr_entries = self.msrs.clone();
1148 
1149         // Save extra MSRs if the Hyper-V synthetic interrupt controller is
1150         // emulated.
1151         if self.hyperv_synic.load(Ordering::Acquire) {
1152             let hyperv_synic_msrs = vec![
1153                 0x40000020, 0x40000021, 0x40000080, 0x40000081, 0x40000082, 0x40000083, 0x40000084,
1154                 0x40000090, 0x40000091, 0x40000092, 0x40000093, 0x40000094, 0x40000095, 0x40000096,
1155                 0x40000097, 0x40000098, 0x40000099, 0x4000009a, 0x4000009b, 0x4000009c, 0x4000009d,
1156                 0x4000009f, 0x400000b0, 0x400000b1, 0x400000b2, 0x400000b3, 0x400000b4, 0x400000b5,
1157                 0x400000b6, 0x400000b7,
1158             ];
1159             for index in hyperv_synic_msrs {
1160                 let msr = kvm_msr_entry {
1161                     index,
1162                     ..Default::default()
1163                 };
1164                 msr_entries.push(msr).unwrap();
1165             }
1166         }
1167 
1168         let expected_num_msrs = msr_entries.as_fam_struct_ref().nmsrs as usize;
1169         let num_msrs = self.get_msrs(&mut msr_entries)?;
1170         let msrs = if num_msrs != expected_num_msrs {
1171             let mut faulty_msr_index = num_msrs;
1172             let mut msr_entries_tmp =
1173                 MsrEntries::from_entries(&msr_entries.as_slice()[..faulty_msr_index]);
1174 
1175             loop {
1176                 warn!(
1177                     "Detected faulty MSR 0x{:x} while getting MSRs",
1178                     msr_entries.as_slice()[faulty_msr_index].index
1179                 );
1180 
1181                 let start_pos = faulty_msr_index + 1;
1182                 let mut sub_msr_entries =
1183                     MsrEntries::from_entries(&msr_entries.as_slice()[start_pos..]);
1184                 let expected_num_msrs = sub_msr_entries.as_fam_struct_ref().nmsrs as usize;
1185                 let num_msrs = self.get_msrs(&mut sub_msr_entries)?;
1186 
1187                 for i in 0..num_msrs {
1188                     msr_entries_tmp
1189                         .push(sub_msr_entries.as_slice()[i])
1190                         .map_err(|e| {
1191                             cpu::HypervisorCpuError::GetMsrEntries(anyhow!(
1192                                 "Failed adding MSR entries: {:?}",
1193                                 e
1194                             ))
1195                         })?;
1196                 }
1197 
1198                 if num_msrs == expected_num_msrs {
1199                     break;
1200                 }
1201 
1202                 faulty_msr_index = start_pos + num_msrs;
1203             }
1204 
1205             msr_entries_tmp
1206         } else {
1207             msr_entries
1208         };
1209 
1210         let vcpu_events = self.get_vcpu_events()?;
1211 
1212         Ok(CpuState {
1213             cpuid,
1214             msrs,
1215             vcpu_events,
1216             regs,
1217             sregs,
1218             fpu,
1219             lapic_state,
1220             xsave,
1221             xcrs,
1222             mp_state,
1223         })
1224     }
1225     ///
1226     /// Get the current AArch64 CPU state
1227     ///
1228     #[cfg(target_arch = "aarch64")]
1229     fn state(&self) -> cpu::Result<CpuState> {
1230         let mut state = CpuState {
1231             mp_state: self.get_mp_state()?,
1232             mpidr: self.read_mpidr()?,
1233             ..Default::default()
1234         };
1235         self.core_registers(&mut state.core_regs)?;
1236         self.system_registers(&mut state.sys_regs)?;
1237 
1238         Ok(state)
1239     }
1240     #[cfg(target_arch = "x86_64")]
1241     ///
1242     /// Restore the previously saved CPU state
1243     ///
1244     /// Ordering requirements:
1245     ///
1246     /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are
1247     /// still running.
1248     ///
1249     /// Some SET ioctls (like set_mp_state) depend on kvm_vcpu_is_bsp(), so
1250     /// if we ever change the BSP, we have to do that before restoring anything.
1251     /// The same seems to be true for CPUID stuff.
1252     ///
1253     /// SREGS saves/restores a pending interrupt, similar to what
1254     /// VCPU_EVENTS also does.
1255     ///
1256     /// SET_REGS clears pending exceptions unconditionally, thus, it must be
1257     /// done before SET_VCPU_EVENTS, which restores it.
1258     ///
1259     /// SET_LAPIC must come after SET_SREGS, because the latter restores
1260     /// the apic base msr.
1261     ///
1262     /// SET_LAPIC must come before SET_MSRS, because the TSC deadline MSR
1263     /// only restores successfully, when the LAPIC is correctly configured.
1264     ///
1265     /// Arguments: CpuState
1266     /// # Example
1267     ///
1268     /// ```rust
1269     /// # extern crate hypervisor;
1270     /// # use hypervisor::KvmHypervisor;
1271     /// # use std::sync::Arc;
1272     /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap();
1273     /// let hv: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm);
1274     /// let vm = hv.create_vm().expect("new VM fd creation failed");
1275     /// vm.enable_split_irq().unwrap();
1276     /// let vcpu = vm.create_vcpu(0, None).unwrap();
1277     /// let state = vcpu.state().unwrap();
1278     /// vcpu.set_state(&state).unwrap();
1279     /// ```
1280     fn set_state(&self, state: &CpuState) -> cpu::Result<()> {
1281         self.set_cpuid2(&state.cpuid)?;
1282         self.set_mp_state(state.mp_state)?;
1283         self.set_regs(&state.regs)?;
1284         self.set_sregs(&state.sregs)?;
1285         self.set_xsave(&state.xsave)?;
1286         self.set_xcrs(&state.xcrs)?;
1287         self.set_lapic(&state.lapic_state)?;
1288         self.set_fpu(&state.fpu)?;
1289 
1290         // Try to set all MSRs previously stored.
1291         // If the number of MSRs set from SET_MSRS is different from the
1292         // expected amount, we fallback onto a slower method by setting MSRs
1293         // by chunks. This is the only way to make sure we try to set as many
1294         // MSRs as possible, even if some MSRs are not supported.
1295         let expected_num_msrs = state.msrs.as_fam_struct_ref().nmsrs as usize;
1296         let num_msrs = self.set_msrs(&state.msrs)?;
1297         if num_msrs != expected_num_msrs {
1298             let mut faulty_msr_index = num_msrs;
1299 
1300             loop {
1301                 warn!(
1302                     "Detected faulty MSR 0x{:x} while setting MSRs",
1303                     state.msrs.as_slice()[faulty_msr_index].index
1304                 );
1305 
1306                 let start_pos = faulty_msr_index + 1;
1307                 let sub_msr_entries = MsrEntries::from_entries(&state.msrs.as_slice()[start_pos..]);
1308                 let expected_num_msrs = sub_msr_entries.as_fam_struct_ref().nmsrs as usize;
1309                 let num_msrs = self.set_msrs(&sub_msr_entries)?;
1310 
1311                 if num_msrs == expected_num_msrs {
1312                     break;
1313                 }
1314 
1315                 faulty_msr_index = start_pos + num_msrs;
1316             }
1317         }
1318 
1319         self.set_vcpu_events(&state.vcpu_events)?;
1320 
1321         Ok(())
1322     }
1323     ///
1324     /// Restore the previously saved AArch64 CPU state
1325     ///
1326     #[cfg(target_arch = "aarch64")]
1327     fn set_state(&self, state: &CpuState) -> cpu::Result<()> {
1328         self.set_core_registers(&state.core_regs)?;
1329         self.set_system_registers(&state.sys_regs)?;
1330         self.set_mp_state(state.mp_state)?;
1331 
1332         Ok(())
1333     }
1334 }
1335 
1336 /// Device struct for KVM
1337 pub struct KvmDevice {
1338     fd: DeviceFd,
1339 }
1340 
1341 impl device::Device for KvmDevice {
1342     ///
1343     /// Set device attribute
1344     ///
1345     fn set_device_attr(&self, attr: &DeviceAttr) -> device::Result<()> {
1346         self.fd
1347             .set_device_attr(attr)
1348             .map_err(|e| device::HypervisorDeviceError::SetDeviceAttribute(e.into()))
1349     }
1350     ///
1351     /// Get device attribute
1352     ///
1353     fn get_device_attr(&self, attr: &mut DeviceAttr) -> device::Result<()> {
1354         self.fd
1355             .get_device_attr(attr)
1356             .map_err(|e| device::HypervisorDeviceError::GetDeviceAttribute(e.into()))
1357     }
1358 }
1359 
1360 impl AsRawFd for KvmDevice {
1361     fn as_raw_fd(&self) -> RawFd {
1362         self.fd.as_raw_fd()
1363     }
1364 }
1365