1 // Copyright © 2024 Institute of Software, CAS. All rights reserved.
2 //
3 // Copyright © 2019 Intel Corporation
4 //
5 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
6 //
7 // Copyright © 2020, Microsoft Corporation
8 //
9 // Copyright 2018-2019 CrowdStrike, Inc.
10 //
11 //
12
13 use std::any::Any;
14 use std::collections::HashMap;
15 #[cfg(target_arch = "x86_64")]
16 use std::fs::File;
17 #[cfg(target_arch = "x86_64")]
18 use std::os::unix::io::AsRawFd;
19 #[cfg(feature = "tdx")]
20 use std::os::unix::io::RawFd;
21 use std::result;
22 #[cfg(target_arch = "x86_64")]
23 use std::sync::atomic::{AtomicBool, Ordering};
24 use std::sync::{Arc, Mutex, RwLock};
25
26 use kvm_ioctls::{NoDatamatch, VcpuFd, VmFd};
27 use vmm_sys_util::eventfd::EventFd;
28
29 #[cfg(target_arch = "aarch64")]
30 use crate::aarch64::gic::KvmGicV3Its;
31 #[cfg(target_arch = "aarch64")]
32 pub use crate::aarch64::{check_required_kvm_extensions, is_system_register, VcpuKvmState};
33 #[cfg(target_arch = "aarch64")]
34 use crate::arch::aarch64::gic::{Vgic, VgicConfig};
35 #[cfg(target_arch = "riscv64")]
36 use crate::arch::riscv64::aia::{Vaia, VaiaConfig};
37 #[cfg(target_arch = "riscv64")]
38 use crate::riscv64::aia::KvmAiaImsics;
39 #[cfg(target_arch = "riscv64")]
40 pub use crate::riscv64::{
41 aia::AiaImsicsState as AiaState, check_required_kvm_extensions, is_non_core_register,
42 VcpuKvmState,
43 };
44 use crate::vm::{self, InterruptSourceConfig, VmOps};
45 #[cfg(target_arch = "aarch64")]
46 use crate::{arm64_core_reg_id, offset_of};
47 use crate::{cpu, hypervisor, HypervisorType};
48 #[cfg(target_arch = "riscv64")]
49 use crate::{offset_of, riscv64_reg_id};
50 // x86_64 dependencies
51 #[cfg(target_arch = "x86_64")]
52 pub mod x86_64;
53 #[cfg(target_arch = "x86_64")]
54 use kvm_bindings::{
55 kvm_enable_cap, kvm_msr_entry, MsrList, KVM_CAP_HYPERV_SYNIC, KVM_CAP_SPLIT_IRQCHIP,
56 KVM_GUESTDBG_USE_HW_BP,
57 };
58 #[cfg(target_arch = "x86_64")]
59 use x86_64::check_required_kvm_extensions;
60 #[cfg(target_arch = "x86_64")]
61 pub use x86_64::{CpuId, ExtendedControlRegisters, MsrEntries, VcpuKvmState};
62
63 #[cfg(target_arch = "x86_64")]
64 use crate::arch::x86::{
65 CpuIdEntry, FpuState, LapicState, MsrEntry, SpecialRegisters, XsaveState, NUM_IOAPIC_PINS,
66 };
67 #[cfg(target_arch = "x86_64")]
68 use crate::ClockData;
69 use crate::{
70 CpuState, IoEventAddress, IrqRoutingEntry, MpState, StandardRegisters, UserMemoryRegion,
71 USER_MEMORY_REGION_LOG_DIRTY, USER_MEMORY_REGION_READ, USER_MEMORY_REGION_WRITE,
72 };
73 // aarch64 dependencies
74 #[cfg(target_arch = "aarch64")]
75 pub mod aarch64;
76 // riscv64 dependencies
77 #[cfg(target_arch = "riscv64")]
78 pub mod riscv64;
79 #[cfg(target_arch = "aarch64")]
80 use std::mem;
81
82 ///
83 /// Export generically-named wrappers of kvm-bindings for Unix-based platforms
84 ///
85 #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
86 pub use kvm_bindings::kvm_vcpu_events as VcpuEvents;
87 pub use kvm_bindings::{
88 kvm_clock_data, kvm_create_device, kvm_create_device as CreateDevice,
89 kvm_device_attr as DeviceAttr, kvm_device_type_KVM_DEV_TYPE_VFIO, kvm_guest_debug,
90 kvm_irq_routing, kvm_irq_routing_entry, kvm_mp_state, kvm_run, kvm_userspace_memory_region,
91 KVM_GUESTDBG_ENABLE, KVM_GUESTDBG_SINGLESTEP, KVM_IRQ_ROUTING_IRQCHIP, KVM_IRQ_ROUTING_MSI,
92 KVM_MEM_LOG_DIRTY_PAGES, KVM_MEM_READONLY, KVM_MSI_VALID_DEVID,
93 };
94 #[cfg(target_arch = "aarch64")]
95 use kvm_bindings::{
96 kvm_regs, user_fpsimd_state, user_pt_regs, KVM_GUESTDBG_USE_HW, KVM_NR_SPSR, KVM_REG_ARM64,
97 KVM_REG_ARM64_SYSREG, KVM_REG_ARM64_SYSREG_CRM_MASK, KVM_REG_ARM64_SYSREG_CRN_MASK,
98 KVM_REG_ARM64_SYSREG_OP0_MASK, KVM_REG_ARM64_SYSREG_OP1_MASK, KVM_REG_ARM64_SYSREG_OP2_MASK,
99 KVM_REG_ARM_CORE, KVM_REG_SIZE_U128, KVM_REG_SIZE_U32, KVM_REG_SIZE_U64,
100 };
101 #[cfg(target_arch = "riscv64")]
102 use kvm_bindings::{kvm_riscv_core, user_regs_struct, KVM_REG_RISCV_CORE};
103 #[cfg(feature = "tdx")]
104 use kvm_bindings::{kvm_run__bindgen_ty_1, KVMIO};
105 pub use kvm_ioctls::{Cap, Kvm, VcpuExit};
106 use thiserror::Error;
107 use vfio_ioctls::VfioDeviceFd;
108 #[cfg(feature = "tdx")]
109 use vmm_sys_util::{ioctl::ioctl_with_val, ioctl_iowr_nr};
110 pub use {kvm_bindings, kvm_ioctls};
111
112 #[cfg(target_arch = "aarch64")]
113 use crate::arch::aarch64::regs;
114 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
115 use crate::RegList;
116
117 #[cfg(target_arch = "x86_64")]
118 const KVM_CAP_SGX_ATTRIBUTE: u32 = 196;
119
120 #[cfg(target_arch = "x86_64")]
121 use vmm_sys_util::ioctl_io_nr;
122
123 #[cfg(target_arch = "x86_64")]
124 ioctl_io_nr!(KVM_NMI, kvm_bindings::KVMIO, 0x9a);
125
126 #[cfg(feature = "tdx")]
127 const KVM_EXIT_TDX: u32 = 50;
128 #[cfg(feature = "tdx")]
129 const TDG_VP_VMCALL_GET_QUOTE: u64 = 0x10002;
130 #[cfg(feature = "tdx")]
131 const TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT: u64 = 0x10004;
132 #[cfg(feature = "tdx")]
133 const TDG_VP_VMCALL_SUCCESS: u64 = 0;
134 #[cfg(feature = "tdx")]
135 const TDG_VP_VMCALL_INVALID_OPERAND: u64 = 0x8000000000000000;
136
137 #[cfg(feature = "tdx")]
138 ioctl_iowr_nr!(KVM_MEMORY_ENCRYPT_OP, KVMIO, 0xba, std::os::raw::c_ulong);
139
140 #[cfg(feature = "tdx")]
141 #[repr(u32)]
142 enum TdxCommand {
143 Capabilities = 0,
144 InitVm,
145 InitVcpu,
146 InitMemRegion,
147 Finalize,
148 }
149
150 #[cfg(feature = "tdx")]
151 pub enum TdxExitDetails {
152 GetQuote,
153 SetupEventNotifyInterrupt,
154 }
155
156 #[cfg(feature = "tdx")]
157 pub enum TdxExitStatus {
158 Success,
159 InvalidOperand,
160 }
161
162 #[cfg(feature = "tdx")]
163 const TDX_MAX_NR_CPUID_CONFIGS: usize = 6;
164
165 #[cfg(feature = "tdx")]
166 #[repr(C)]
167 #[derive(Debug, Default)]
168 pub struct TdxCpuidConfig {
169 pub leaf: u32,
170 pub sub_leaf: u32,
171 pub eax: u32,
172 pub ebx: u32,
173 pub ecx: u32,
174 pub edx: u32,
175 }
176
177 #[cfg(feature = "tdx")]
178 #[repr(C)]
179 #[derive(Debug, Default)]
180 pub struct TdxCapabilities {
181 pub attrs_fixed0: u64,
182 pub attrs_fixed1: u64,
183 pub xfam_fixed0: u64,
184 pub xfam_fixed1: u64,
185 pub nr_cpuid_configs: u32,
186 pub padding: u32,
187 pub cpuid_configs: [TdxCpuidConfig; TDX_MAX_NR_CPUID_CONFIGS],
188 }
189
190 #[cfg(feature = "tdx")]
191 #[derive(Copy, Clone)]
192 pub struct KvmTdxExit {
193 pub type_: u32,
194 pub pad: u32,
195 pub u: KvmTdxExitU,
196 }
197
198 #[cfg(feature = "tdx")]
199 #[repr(C)]
200 #[derive(Copy, Clone)]
201 pub union KvmTdxExitU {
202 pub vmcall: KvmTdxExitVmcall,
203 }
204
205 #[cfg(feature = "tdx")]
206 #[repr(C)]
207 #[derive(Debug, Default, Copy, Clone, PartialEq)]
208 pub struct KvmTdxExitVmcall {
209 pub type_: u64,
210 pub subfunction: u64,
211 pub reg_mask: u64,
212 pub in_r12: u64,
213 pub in_r13: u64,
214 pub in_r14: u64,
215 pub in_r15: u64,
216 pub in_rbx: u64,
217 pub in_rdi: u64,
218 pub in_rsi: u64,
219 pub in_r8: u64,
220 pub in_r9: u64,
221 pub in_rdx: u64,
222 pub status_code: u64,
223 pub out_r11: u64,
224 pub out_r12: u64,
225 pub out_r13: u64,
226 pub out_r14: u64,
227 pub out_r15: u64,
228 pub out_rbx: u64,
229 pub out_rdi: u64,
230 pub out_rsi: u64,
231 pub out_r8: u64,
232 pub out_r9: u64,
233 pub out_rdx: u64,
234 }
235
236 impl From<kvm_userspace_memory_region> for UserMemoryRegion {
from(region: kvm_userspace_memory_region) -> Self237 fn from(region: kvm_userspace_memory_region) -> Self {
238 let mut flags = USER_MEMORY_REGION_READ;
239 if region.flags & KVM_MEM_READONLY == 0 {
240 flags |= USER_MEMORY_REGION_WRITE;
241 }
242 if region.flags & KVM_MEM_LOG_DIRTY_PAGES != 0 {
243 flags |= USER_MEMORY_REGION_LOG_DIRTY;
244 }
245
246 UserMemoryRegion {
247 slot: region.slot,
248 guest_phys_addr: region.guest_phys_addr,
249 memory_size: region.memory_size,
250 userspace_addr: region.userspace_addr,
251 flags,
252 }
253 }
254 }
255
256 impl From<UserMemoryRegion> for kvm_userspace_memory_region {
from(region: UserMemoryRegion) -> Self257 fn from(region: UserMemoryRegion) -> Self {
258 assert!(
259 region.flags & USER_MEMORY_REGION_READ != 0,
260 "KVM mapped memory is always readable"
261 );
262
263 let mut flags = 0;
264 if region.flags & USER_MEMORY_REGION_WRITE == 0 {
265 flags |= KVM_MEM_READONLY;
266 }
267 if region.flags & USER_MEMORY_REGION_LOG_DIRTY != 0 {
268 flags |= KVM_MEM_LOG_DIRTY_PAGES;
269 }
270
271 kvm_userspace_memory_region {
272 slot: region.slot,
273 guest_phys_addr: region.guest_phys_addr,
274 memory_size: region.memory_size,
275 userspace_addr: region.userspace_addr,
276 flags,
277 }
278 }
279 }
280
281 impl From<kvm_mp_state> for MpState {
from(s: kvm_mp_state) -> Self282 fn from(s: kvm_mp_state) -> Self {
283 MpState::Kvm(s)
284 }
285 }
286
287 impl From<MpState> for kvm_mp_state {
from(ms: MpState) -> Self288 fn from(ms: MpState) -> Self {
289 match ms {
290 MpState::Kvm(s) => s,
291 /* Needed in case other hypervisors are enabled */
292 #[allow(unreachable_patterns)]
293 _ => panic!("CpuState is not valid"),
294 }
295 }
296 }
297
298 impl From<kvm_ioctls::IoEventAddress> for IoEventAddress {
from(a: kvm_ioctls::IoEventAddress) -> Self299 fn from(a: kvm_ioctls::IoEventAddress) -> Self {
300 match a {
301 kvm_ioctls::IoEventAddress::Pio(x) => Self::Pio(x),
302 kvm_ioctls::IoEventAddress::Mmio(x) => Self::Mmio(x),
303 }
304 }
305 }
306
307 impl From<IoEventAddress> for kvm_ioctls::IoEventAddress {
from(a: IoEventAddress) -> Self308 fn from(a: IoEventAddress) -> Self {
309 match a {
310 IoEventAddress::Pio(x) => Self::Pio(x),
311 IoEventAddress::Mmio(x) => Self::Mmio(x),
312 }
313 }
314 }
315
316 impl From<VcpuKvmState> for CpuState {
from(s: VcpuKvmState) -> Self317 fn from(s: VcpuKvmState) -> Self {
318 CpuState::Kvm(s)
319 }
320 }
321
322 impl From<CpuState> for VcpuKvmState {
from(s: CpuState) -> Self323 fn from(s: CpuState) -> Self {
324 match s {
325 CpuState::Kvm(s) => s,
326 /* Needed in case other hypervisors are enabled */
327 #[allow(unreachable_patterns)]
328 _ => panic!("CpuState is not valid"),
329 }
330 }
331 }
332
333 #[cfg(target_arch = "x86_64")]
334 impl From<kvm_clock_data> for ClockData {
from(d: kvm_clock_data) -> Self335 fn from(d: kvm_clock_data) -> Self {
336 ClockData::Kvm(d)
337 }
338 }
339
340 #[cfg(target_arch = "x86_64")]
341 impl From<ClockData> for kvm_clock_data {
from(ms: ClockData) -> Self342 fn from(ms: ClockData) -> Self {
343 match ms {
344 ClockData::Kvm(s) => s,
345 /* Needed in case other hypervisors are enabled */
346 #[allow(unreachable_patterns)]
347 _ => panic!("CpuState is not valid"),
348 }
349 }
350 }
351
352 impl From<kvm_bindings::kvm_one_reg> for crate::Register {
from(s: kvm_bindings::kvm_one_reg) -> Self353 fn from(s: kvm_bindings::kvm_one_reg) -> Self {
354 crate::Register::Kvm(s)
355 }
356 }
357
358 impl From<crate::Register> for kvm_bindings::kvm_one_reg {
from(e: crate::Register) -> Self359 fn from(e: crate::Register) -> Self {
360 match e {
361 crate::Register::Kvm(e) => e,
362 /* Needed in case other hypervisors are enabled */
363 #[allow(unreachable_patterns)]
364 _ => panic!("Register is not valid"),
365 }
366 }
367 }
368
369 #[cfg(target_arch = "aarch64")]
370 impl From<kvm_bindings::kvm_vcpu_init> for crate::VcpuInit {
from(s: kvm_bindings::kvm_vcpu_init) -> Self371 fn from(s: kvm_bindings::kvm_vcpu_init) -> Self {
372 crate::VcpuInit::Kvm(s)
373 }
374 }
375
376 #[cfg(target_arch = "aarch64")]
377 impl From<crate::VcpuInit> for kvm_bindings::kvm_vcpu_init {
from(e: crate::VcpuInit) -> Self378 fn from(e: crate::VcpuInit) -> Self {
379 match e {
380 crate::VcpuInit::Kvm(e) => e,
381 /* Needed in case other hypervisors are enabled */
382 #[allow(unreachable_patterns)]
383 _ => panic!("VcpuInit is not valid"),
384 }
385 }
386 }
387
388 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
389 impl From<kvm_bindings::RegList> for crate::RegList {
from(s: kvm_bindings::RegList) -> Self390 fn from(s: kvm_bindings::RegList) -> Self {
391 crate::RegList::Kvm(s)
392 }
393 }
394
395 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
396 impl From<crate::RegList> for kvm_bindings::RegList {
from(e: crate::RegList) -> Self397 fn from(e: crate::RegList) -> Self {
398 match e {
399 crate::RegList::Kvm(e) => e,
400 /* Needed in case other hypervisors are enabled */
401 #[allow(unreachable_patterns)]
402 _ => panic!("RegList is not valid"),
403 }
404 }
405 }
406
407 #[cfg(not(target_arch = "riscv64"))]
408 impl From<kvm_bindings::kvm_regs> for crate::StandardRegisters {
from(s: kvm_bindings::kvm_regs) -> Self409 fn from(s: kvm_bindings::kvm_regs) -> Self {
410 crate::StandardRegisters::Kvm(s)
411 }
412 }
413
414 #[cfg(not(target_arch = "riscv64"))]
415 impl From<crate::StandardRegisters> for kvm_bindings::kvm_regs {
from(e: crate::StandardRegisters) -> Self416 fn from(e: crate::StandardRegisters) -> Self {
417 match e {
418 crate::StandardRegisters::Kvm(e) => e,
419 /* Needed in case other hypervisors are enabled */
420 #[allow(unreachable_patterns)]
421 _ => panic!("StandardRegisters are not valid"),
422 }
423 }
424 }
425
426 #[cfg(target_arch = "riscv64")]
427 impl From<kvm_bindings::kvm_riscv_core> for crate::StandardRegisters {
from(s: kvm_bindings::kvm_riscv_core) -> Self428 fn from(s: kvm_bindings::kvm_riscv_core) -> Self {
429 crate::StandardRegisters::Kvm(s)
430 }
431 }
432
433 #[cfg(target_arch = "riscv64")]
434 impl From<crate::StandardRegisters> for kvm_bindings::kvm_riscv_core {
from(e: crate::StandardRegisters) -> Self435 fn from(e: crate::StandardRegisters) -> Self {
436 match e {
437 crate::StandardRegisters::Kvm(e) => e,
438 /* Needed in case other hypervisors are enabled */
439 #[allow(unreachable_patterns)]
440 _ => panic!("StandardRegisters are not valid"),
441 }
442 }
443 }
444
445 impl From<kvm_irq_routing_entry> for IrqRoutingEntry {
from(s: kvm_irq_routing_entry) -> Self446 fn from(s: kvm_irq_routing_entry) -> Self {
447 IrqRoutingEntry::Kvm(s)
448 }
449 }
450
451 impl From<IrqRoutingEntry> for kvm_irq_routing_entry {
from(e: IrqRoutingEntry) -> Self452 fn from(e: IrqRoutingEntry) -> Self {
453 match e {
454 IrqRoutingEntry::Kvm(e) => e,
455 /* Needed in case other hypervisors are enabled */
456 #[allow(unreachable_patterns)]
457 _ => panic!("IrqRoutingEntry is not valid"),
458 }
459 }
460 }
461
462 struct KvmDirtyLogSlot {
463 slot: u32,
464 guest_phys_addr: u64,
465 memory_size: u64,
466 userspace_addr: u64,
467 }
468
469 /// Wrapper over KVM VM ioctls.
470 pub struct KvmVm {
471 fd: Arc<VmFd>,
472 #[cfg(target_arch = "x86_64")]
473 msrs: Vec<MsrEntry>,
474 dirty_log_slots: Arc<RwLock<HashMap<u32, KvmDirtyLogSlot>>>,
475 }
476
477 impl KvmVm {
478 ///
479 /// Creates an emulated device in the kernel.
480 ///
481 /// See the documentation for `KVM_CREATE_DEVICE`.
create_device(&self, device: &mut CreateDevice) -> vm::Result<vfio_ioctls::VfioDeviceFd>482 fn create_device(&self, device: &mut CreateDevice) -> vm::Result<vfio_ioctls::VfioDeviceFd> {
483 let device_fd = self
484 .fd
485 .create_device(device)
486 .map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?;
487 Ok(VfioDeviceFd::new_from_kvm(device_fd))
488 }
489 /// Checks if a particular `Cap` is available.
check_extension(&self, c: Cap) -> bool490 pub fn check_extension(&self, c: Cap) -> bool {
491 self.fd.check_extension(c)
492 }
493 }
494
495 /// Implementation of Vm trait for KVM
496 ///
497 /// # Examples
498 ///
499 /// ```
500 /// # use hypervisor::kvm::KvmHypervisor;
501 /// # use std::sync::Arc;
502 /// let kvm = KvmHypervisor::new().unwrap();
503 /// let hypervisor = Arc::new(kvm);
504 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
505 /// ```
506 impl vm::Vm for KvmVm {
507 #[cfg(target_arch = "x86_64")]
508 ///
509 /// Sets the address of the one-page region in the VM's address space.
510 ///
set_identity_map_address(&self, address: u64) -> vm::Result<()>511 fn set_identity_map_address(&self, address: u64) -> vm::Result<()> {
512 self.fd
513 .set_identity_map_address(address)
514 .map_err(|e| vm::HypervisorVmError::SetIdentityMapAddress(e.into()))
515 }
516
517 #[cfg(target_arch = "x86_64")]
518 ///
519 /// Sets the address of the three-page region in the VM's address space.
520 ///
set_tss_address(&self, offset: usize) -> vm::Result<()>521 fn set_tss_address(&self, offset: usize) -> vm::Result<()> {
522 self.fd
523 .set_tss_address(offset)
524 .map_err(|e| vm::HypervisorVmError::SetTssAddress(e.into()))
525 }
526
527 #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
528 ///
529 /// Creates an in-kernel interrupt controller.
530 ///
create_irq_chip(&self) -> vm::Result<()>531 fn create_irq_chip(&self) -> vm::Result<()> {
532 self.fd
533 .create_irq_chip()
534 .map_err(|e| vm::HypervisorVmError::CreateIrq(e.into()))
535 }
536
537 ///
538 /// Registers an event that will, when signaled, trigger the `gsi` IRQ.
539 ///
register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()>540 fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> {
541 self.fd
542 .register_irqfd(fd, gsi)
543 .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into()))
544 }
545
546 ///
547 /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ.
548 ///
unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()>549 fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> {
550 self.fd
551 .unregister_irqfd(fd, gsi)
552 .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into()))
553 }
554
555 ///
556 /// Creates a VcpuFd object from a vcpu RawFd.
557 ///
create_vcpu( &self, id: u8, vm_ops: Option<Arc<dyn VmOps>>, ) -> vm::Result<Arc<dyn cpu::Vcpu>>558 fn create_vcpu(
559 &self,
560 id: u8,
561 vm_ops: Option<Arc<dyn VmOps>>,
562 ) -> vm::Result<Arc<dyn cpu::Vcpu>> {
563 let fd = self
564 .fd
565 .create_vcpu(id as u64)
566 .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?;
567 let vcpu = KvmVcpu {
568 fd: Arc::new(Mutex::new(fd)),
569 #[cfg(target_arch = "x86_64")]
570 msrs: self.msrs.clone(),
571 vm_ops,
572 #[cfg(target_arch = "x86_64")]
573 hyperv_synic: AtomicBool::new(false),
574 };
575 Ok(Arc::new(vcpu))
576 }
577
578 #[cfg(target_arch = "aarch64")]
579 ///
580 /// Creates a virtual GIC device.
581 ///
create_vgic(&self, config: VgicConfig) -> vm::Result<Arc<Mutex<dyn Vgic>>>582 fn create_vgic(&self, config: VgicConfig) -> vm::Result<Arc<Mutex<dyn Vgic>>> {
583 let gic_device = KvmGicV3Its::new(self, config)
584 .map_err(|e| vm::HypervisorVmError::CreateVgic(anyhow!("Vgic error {:?}", e)))?;
585 Ok(Arc::new(Mutex::new(gic_device)))
586 }
587
588 #[cfg(target_arch = "riscv64")]
589 ///
590 /// Creates a virtual AIA device.
591 ///
create_vaia(&self, config: VaiaConfig) -> vm::Result<Arc<Mutex<dyn Vaia>>>592 fn create_vaia(&self, config: VaiaConfig) -> vm::Result<Arc<Mutex<dyn Vaia>>> {
593 let aia_device = KvmAiaImsics::new(self, config)
594 .map_err(|e| vm::HypervisorVmError::CreateVaia(anyhow!("Vaia error {:?}", e)))?;
595 Ok(Arc::new(Mutex::new(aia_device)))
596 }
597
598 ///
599 /// Registers an event to be signaled whenever a certain address is written to.
600 ///
register_ioevent( &self, fd: &EventFd, addr: &IoEventAddress, datamatch: Option<vm::DataMatch>, ) -> vm::Result<()>601 fn register_ioevent(
602 &self,
603 fd: &EventFd,
604 addr: &IoEventAddress,
605 datamatch: Option<vm::DataMatch>,
606 ) -> vm::Result<()> {
607 let addr = &kvm_ioctls::IoEventAddress::from(*addr);
608 if let Some(dm) = datamatch {
609 match dm {
610 vm::DataMatch::DataMatch32(kvm_dm32) => self
611 .fd
612 .register_ioevent(fd, addr, kvm_dm32)
613 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())),
614 vm::DataMatch::DataMatch64(kvm_dm64) => self
615 .fd
616 .register_ioevent(fd, addr, kvm_dm64)
617 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())),
618 }
619 } else {
620 self.fd
621 .register_ioevent(fd, addr, NoDatamatch)
622 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into()))
623 }
624 }
625
626 ///
627 /// Unregisters an event from a certain address it has been previously registered to.
628 ///
unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()>629 fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> {
630 let addr = &kvm_ioctls::IoEventAddress::from(*addr);
631 self.fd
632 .unregister_ioevent(fd, addr, NoDatamatch)
633 .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into()))
634 }
635
636 ///
637 /// Constructs a routing entry
638 ///
make_routing_entry(&self, gsi: u32, config: &InterruptSourceConfig) -> IrqRoutingEntry639 fn make_routing_entry(&self, gsi: u32, config: &InterruptSourceConfig) -> IrqRoutingEntry {
640 match &config {
641 InterruptSourceConfig::MsiIrq(cfg) => {
642 let mut kvm_route = kvm_irq_routing_entry {
643 gsi,
644 type_: KVM_IRQ_ROUTING_MSI,
645 ..Default::default()
646 };
647
648 kvm_route.u.msi.address_lo = cfg.low_addr;
649 kvm_route.u.msi.address_hi = cfg.high_addr;
650 kvm_route.u.msi.data = cfg.data;
651
652 if self.check_extension(crate::kvm::Cap::MsiDevid) {
653 // On AArch64, there is limitation on the range of the 'devid',
654 // it cannot be greater than 65536 (the max of u16).
655 //
656 // BDF cannot be used directly, because 'segment' is in high
657 // 16 bits. The layout of the u32 BDF is:
658 // |---- 16 bits ----|-- 8 bits --|-- 5 bits --|-- 3 bits --|
659 // | segment | bus | device | function |
660 //
661 // Now that we support 1 bus only in a segment, we can build a
662 // 'devid' by replacing the 'bus' bits with the low 8 bits of
663 // 'segment' data.
664 // This way we can resolve the range checking problem and give
665 // different `devid` to all the devices. Limitation is that at
666 // most 256 segments can be supported.
667 //
668 let modified_devid = ((cfg.devid & 0x00ff_0000) >> 8) | cfg.devid & 0xff;
669
670 kvm_route.flags = KVM_MSI_VALID_DEVID;
671 kvm_route.u.msi.__bindgen_anon_1.devid = modified_devid;
672 }
673 kvm_route.into()
674 }
675 InterruptSourceConfig::LegacyIrq(cfg) => {
676 let mut kvm_route = kvm_irq_routing_entry {
677 gsi,
678 type_: KVM_IRQ_ROUTING_IRQCHIP,
679 ..Default::default()
680 };
681 kvm_route.u.irqchip.irqchip = cfg.irqchip;
682 kvm_route.u.irqchip.pin = cfg.pin;
683
684 kvm_route.into()
685 }
686 }
687 }
688
689 ///
690 /// Sets the GSI routing table entries, overwriting any previously set
691 /// entries, as per the `KVM_SET_GSI_ROUTING` ioctl.
692 ///
set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()>693 fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> {
694 let entries: Vec<kvm_irq_routing_entry> = entries
695 .iter()
696 .map(|entry| match entry {
697 IrqRoutingEntry::Kvm(e) => *e,
698 #[allow(unreachable_patterns)]
699 _ => panic!("IrqRoutingEntry type is wrong"),
700 })
701 .collect();
702
703 let irq_routing =
704 kvm_bindings::fam_wrappers::KvmIrqRouting::from_entries(&entries).unwrap();
705
706 self.fd
707 .set_gsi_routing(&irq_routing)
708 .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into()))
709 }
710
711 ///
712 /// Creates a memory region structure that can be used with {create/remove}_user_memory_region
713 ///
make_user_memory_region( &self, slot: u32, guest_phys_addr: u64, memory_size: u64, userspace_addr: u64, readonly: bool, log_dirty_pages: bool, ) -> UserMemoryRegion714 fn make_user_memory_region(
715 &self,
716 slot: u32,
717 guest_phys_addr: u64,
718 memory_size: u64,
719 userspace_addr: u64,
720 readonly: bool,
721 log_dirty_pages: bool,
722 ) -> UserMemoryRegion {
723 kvm_userspace_memory_region {
724 slot,
725 guest_phys_addr,
726 memory_size,
727 userspace_addr,
728 flags: if readonly { KVM_MEM_READONLY } else { 0 }
729 | if log_dirty_pages {
730 KVM_MEM_LOG_DIRTY_PAGES
731 } else {
732 0
733 },
734 }
735 .into()
736 }
737
738 ///
739 /// Creates a guest physical memory region.
740 ///
create_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()>741 fn create_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> {
742 let mut region: kvm_userspace_memory_region = user_memory_region.into();
743
744 if (region.flags & KVM_MEM_LOG_DIRTY_PAGES) != 0 {
745 if (region.flags & KVM_MEM_READONLY) != 0 {
746 return Err(vm::HypervisorVmError::CreateUserMemory(anyhow!(
747 "Error creating regions with both 'dirty-pages-log' and 'read-only'."
748 )));
749 }
750
751 // Keep track of the regions that need dirty pages log
752 self.dirty_log_slots.write().unwrap().insert(
753 region.slot,
754 KvmDirtyLogSlot {
755 slot: region.slot,
756 guest_phys_addr: region.guest_phys_addr,
757 memory_size: region.memory_size,
758 userspace_addr: region.userspace_addr,
759 },
760 );
761
762 // Always create guest physical memory region without `KVM_MEM_LOG_DIRTY_PAGES`.
763 // For regions that need this flag, dirty pages log will be turned on in `start_dirty_log`.
764 region.flags = 0;
765 }
766
767 // SAFETY: Safe because guest regions are guaranteed not to overlap.
768 unsafe {
769 self.fd
770 .set_user_memory_region(region)
771 .map_err(|e| vm::HypervisorVmError::CreateUserMemory(e.into()))
772 }
773 }
774
775 ///
776 /// Removes a guest physical memory region.
777 ///
remove_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()>778 fn remove_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> {
779 let mut region: kvm_userspace_memory_region = user_memory_region.into();
780
781 // Remove the corresponding entry from "self.dirty_log_slots" if needed
782 self.dirty_log_slots.write().unwrap().remove(®ion.slot);
783
784 // Setting the size to 0 means "remove"
785 region.memory_size = 0;
786 // SAFETY: Safe because guest regions are guaranteed not to overlap.
787 unsafe {
788 self.fd
789 .set_user_memory_region(region)
790 .map_err(|e| vm::HypervisorVmError::RemoveUserMemory(e.into()))
791 }
792 }
793
794 ///
795 /// Returns the preferred CPU target type which can be emulated by KVM on underlying host.
796 ///
797 #[cfg(target_arch = "aarch64")]
get_preferred_target(&self, kvi: &mut crate::VcpuInit) -> vm::Result<()>798 fn get_preferred_target(&self, kvi: &mut crate::VcpuInit) -> vm::Result<()> {
799 let mut kvm_kvi: kvm_bindings::kvm_vcpu_init = (*kvi).into();
800 self.fd
801 .get_preferred_target(&mut kvm_kvi)
802 .map_err(|e| vm::HypervisorVmError::GetPreferredTarget(e.into()))?;
803 *kvi = kvm_kvi.into();
804 Ok(())
805 }
806
807 #[cfg(target_arch = "x86_64")]
enable_split_irq(&self) -> vm::Result<()>808 fn enable_split_irq(&self) -> vm::Result<()> {
809 // Create split irqchip
810 // Only the local APIC is emulated in kernel, both PICs and IOAPIC
811 // are not.
812 let mut cap = kvm_enable_cap {
813 cap: KVM_CAP_SPLIT_IRQCHIP,
814 ..Default::default()
815 };
816 cap.args[0] = NUM_IOAPIC_PINS as u64;
817 self.fd
818 .enable_cap(&cap)
819 .map_err(|e| vm::HypervisorVmError::EnableSplitIrq(e.into()))?;
820 Ok(())
821 }
822
823 #[cfg(target_arch = "x86_64")]
enable_sgx_attribute(&self, file: File) -> vm::Result<()>824 fn enable_sgx_attribute(&self, file: File) -> vm::Result<()> {
825 let mut cap = kvm_enable_cap {
826 cap: KVM_CAP_SGX_ATTRIBUTE,
827 ..Default::default()
828 };
829 cap.args[0] = file.as_raw_fd() as u64;
830 self.fd
831 .enable_cap(&cap)
832 .map_err(|e| vm::HypervisorVmError::EnableSgxAttribute(e.into()))?;
833 Ok(())
834 }
835
836 /// Retrieve guest clock.
837 #[cfg(target_arch = "x86_64")]
get_clock(&self) -> vm::Result<ClockData>838 fn get_clock(&self) -> vm::Result<ClockData> {
839 Ok(self
840 .fd
841 .get_clock()
842 .map_err(|e| vm::HypervisorVmError::GetClock(e.into()))?
843 .into())
844 }
845
846 /// Set guest clock.
847 #[cfg(target_arch = "x86_64")]
set_clock(&self, data: &ClockData) -> vm::Result<()>848 fn set_clock(&self, data: &ClockData) -> vm::Result<()> {
849 let data = (*data).into();
850 self.fd
851 .set_clock(&data)
852 .map_err(|e| vm::HypervisorVmError::SetClock(e.into()))
853 }
854
855 /// Create a device that is used for passthrough
create_passthrough_device(&self) -> vm::Result<VfioDeviceFd>856 fn create_passthrough_device(&self) -> vm::Result<VfioDeviceFd> {
857 let mut vfio_dev = kvm_create_device {
858 type_: kvm_device_type_KVM_DEV_TYPE_VFIO,
859 fd: 0,
860 flags: 0,
861 };
862
863 self.create_device(&mut vfio_dev)
864 .map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into()))
865 }
866
867 ///
868 /// Start logging dirty pages
869 ///
start_dirty_log(&self) -> vm::Result<()>870 fn start_dirty_log(&self) -> vm::Result<()> {
871 let dirty_log_slots = self.dirty_log_slots.read().unwrap();
872 for (_, s) in dirty_log_slots.iter() {
873 let region = kvm_userspace_memory_region {
874 slot: s.slot,
875 guest_phys_addr: s.guest_phys_addr,
876 memory_size: s.memory_size,
877 userspace_addr: s.userspace_addr,
878 flags: KVM_MEM_LOG_DIRTY_PAGES,
879 };
880 // SAFETY: Safe because guest regions are guaranteed not to overlap.
881 unsafe {
882 self.fd
883 .set_user_memory_region(region)
884 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?;
885 }
886 }
887
888 Ok(())
889 }
890
891 ///
892 /// Stop logging dirty pages
893 ///
stop_dirty_log(&self) -> vm::Result<()>894 fn stop_dirty_log(&self) -> vm::Result<()> {
895 let dirty_log_slots = self.dirty_log_slots.read().unwrap();
896 for (_, s) in dirty_log_slots.iter() {
897 let region = kvm_userspace_memory_region {
898 slot: s.slot,
899 guest_phys_addr: s.guest_phys_addr,
900 memory_size: s.memory_size,
901 userspace_addr: s.userspace_addr,
902 flags: 0,
903 };
904 // SAFETY: Safe because guest regions are guaranteed not to overlap.
905 unsafe {
906 self.fd
907 .set_user_memory_region(region)
908 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?;
909 }
910 }
911
912 Ok(())
913 }
914
915 ///
916 /// Get dirty pages bitmap (one bit per page)
917 ///
get_dirty_log(&self, slot: u32, _base_gpa: u64, memory_size: u64) -> vm::Result<Vec<u64>>918 fn get_dirty_log(&self, slot: u32, _base_gpa: u64, memory_size: u64) -> vm::Result<Vec<u64>> {
919 self.fd
920 .get_dirty_log(slot, memory_size as usize)
921 .map_err(|e| vm::HypervisorVmError::GetDirtyLog(e.into()))
922 }
923
924 ///
925 /// Initialize TDX for this VM
926 ///
927 #[cfg(feature = "tdx")]
tdx_init(&self, cpuid: &[CpuIdEntry], max_vcpus: u32) -> vm::Result<()>928 fn tdx_init(&self, cpuid: &[CpuIdEntry], max_vcpus: u32) -> vm::Result<()> {
929 const TDX_ATTR_SEPT_VE_DISABLE: usize = 28;
930
931 let mut cpuid: Vec<kvm_bindings::kvm_cpuid_entry2> =
932 cpuid.iter().map(|e| (*e).into()).collect();
933 cpuid.resize(256, kvm_bindings::kvm_cpuid_entry2::default());
934
935 #[repr(C)]
936 struct TdxInitVm {
937 attributes: u64,
938 max_vcpus: u32,
939 padding: u32,
940 mrconfigid: [u64; 6],
941 mrowner: [u64; 6],
942 mrownerconfig: [u64; 6],
943 cpuid_nent: u32,
944 cpuid_padding: u32,
945 cpuid_entries: [kvm_bindings::kvm_cpuid_entry2; 256],
946 }
947 let data = TdxInitVm {
948 attributes: 1 << TDX_ATTR_SEPT_VE_DISABLE,
949 max_vcpus,
950 padding: 0,
951 mrconfigid: [0; 6],
952 mrowner: [0; 6],
953 mrownerconfig: [0; 6],
954 cpuid_nent: cpuid.len() as u32,
955 cpuid_padding: 0,
956 cpuid_entries: cpuid.as_slice().try_into().unwrap(),
957 };
958
959 tdx_command(
960 &self.fd.as_raw_fd(),
961 TdxCommand::InitVm,
962 0,
963 &data as *const _ as u64,
964 )
965 .map_err(vm::HypervisorVmError::InitializeTdx)
966 }
967
968 ///
969 /// Finalize the TDX setup for this VM
970 ///
971 #[cfg(feature = "tdx")]
tdx_finalize(&self) -> vm::Result<()>972 fn tdx_finalize(&self) -> vm::Result<()> {
973 tdx_command(&self.fd.as_raw_fd(), TdxCommand::Finalize, 0, 0)
974 .map_err(vm::HypervisorVmError::FinalizeTdx)
975 }
976
977 ///
978 /// Initialize memory regions for the TDX VM
979 ///
980 #[cfg(feature = "tdx")]
tdx_init_memory_region( &self, host_address: u64, guest_address: u64, size: u64, measure: bool, ) -> vm::Result<()>981 fn tdx_init_memory_region(
982 &self,
983 host_address: u64,
984 guest_address: u64,
985 size: u64,
986 measure: bool,
987 ) -> vm::Result<()> {
988 #[repr(C)]
989 struct TdxInitMemRegion {
990 host_address: u64,
991 guest_address: u64,
992 pages: u64,
993 }
994 let data = TdxInitMemRegion {
995 host_address,
996 guest_address,
997 pages: size / 4096,
998 };
999
1000 tdx_command(
1001 &self.fd.as_raw_fd(),
1002 TdxCommand::InitMemRegion,
1003 u32::from(measure),
1004 &data as *const _ as u64,
1005 )
1006 .map_err(vm::HypervisorVmError::InitMemRegionTdx)
1007 }
1008
1009 /// Downcast to the underlying KvmVm type
as_any(&self) -> &dyn Any1010 fn as_any(&self) -> &dyn Any {
1011 self
1012 }
1013 }
1014
1015 #[cfg(feature = "tdx")]
tdx_command( fd: &RawFd, command: TdxCommand, flags: u32, data: u64, ) -> std::result::Result<(), std::io::Error>1016 fn tdx_command(
1017 fd: &RawFd,
1018 command: TdxCommand,
1019 flags: u32,
1020 data: u64,
1021 ) -> std::result::Result<(), std::io::Error> {
1022 #[repr(C)]
1023 struct TdxIoctlCmd {
1024 command: TdxCommand,
1025 flags: u32,
1026 data: u64,
1027 error: u64,
1028 unused: u64,
1029 }
1030 let cmd = TdxIoctlCmd {
1031 command,
1032 flags,
1033 data,
1034 error: 0,
1035 unused: 0,
1036 };
1037 // SAFETY: FFI call. All input parameters are valid.
1038 let ret = unsafe {
1039 ioctl_with_val(
1040 fd,
1041 KVM_MEMORY_ENCRYPT_OP(),
1042 &cmd as *const TdxIoctlCmd as std::os::raw::c_ulong,
1043 )
1044 };
1045
1046 if ret < 0 {
1047 return Err(std::io::Error::last_os_error());
1048 }
1049 Ok(())
1050 }
1051
1052 /// Wrapper over KVM system ioctls.
1053 pub struct KvmHypervisor {
1054 kvm: Kvm,
1055 }
1056
1057 impl KvmHypervisor {
1058 #[cfg(target_arch = "x86_64")]
1059 ///
1060 /// Retrieve the list of MSRs supported by the hypervisor.
1061 ///
get_msr_list(&self) -> hypervisor::Result<MsrList>1062 fn get_msr_list(&self) -> hypervisor::Result<MsrList> {
1063 self.kvm
1064 .get_msr_index_list()
1065 .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into()))
1066 }
1067 }
1068
1069 /// Enum for KVM related error
1070 #[derive(Debug, Error)]
1071 pub enum KvmError {
1072 #[error("Capability missing: {0:?}")]
1073 CapabilityMissing(Cap),
1074 }
1075
1076 pub type KvmResult<T> = result::Result<T, KvmError>;
1077
1078 impl KvmHypervisor {
1079 /// Create a hypervisor based on Kvm
1080 #[allow(clippy::new_ret_no_self)]
new() -> hypervisor::Result<Arc<dyn hypervisor::Hypervisor>>1081 pub fn new() -> hypervisor::Result<Arc<dyn hypervisor::Hypervisor>> {
1082 let kvm_obj = Kvm::new().map_err(|e| hypervisor::HypervisorError::VmCreate(e.into()))?;
1083 let api_version = kvm_obj.get_api_version();
1084
1085 if api_version != kvm_bindings::KVM_API_VERSION as i32 {
1086 return Err(hypervisor::HypervisorError::IncompatibleApiVersion);
1087 }
1088
1089 Ok(Arc::new(KvmHypervisor { kvm: kvm_obj }))
1090 }
1091
1092 /// Check if the hypervisor is available
is_available() -> hypervisor::Result<bool>1093 pub fn is_available() -> hypervisor::Result<bool> {
1094 match std::fs::metadata("/dev/kvm") {
1095 Ok(_) => Ok(true),
1096 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false),
1097 Err(err) => Err(hypervisor::HypervisorError::HypervisorAvailableCheck(
1098 err.into(),
1099 )),
1100 }
1101 }
1102 }
1103
1104 /// Implementation of Hypervisor trait for KVM
1105 ///
1106 /// # Examples
1107 ///
1108 /// ```
1109 /// # use hypervisor::kvm::KvmHypervisor;
1110 /// # use std::sync::Arc;
1111 /// let kvm = KvmHypervisor::new().unwrap();
1112 /// let hypervisor = Arc::new(kvm);
1113 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
1114 /// ```
1115 impl hypervisor::Hypervisor for KvmHypervisor {
1116 ///
1117 /// Returns the type of the hypervisor
1118 ///
hypervisor_type(&self) -> HypervisorType1119 fn hypervisor_type(&self) -> HypervisorType {
1120 HypervisorType::Kvm
1121 }
1122
1123 ///
1124 /// Create a Vm of a specific type using the underlying hypervisor, passing memory size
1125 /// Return a hypervisor-agnostic Vm trait object
1126 ///
1127 /// # Examples
1128 ///
1129 /// ```
1130 /// # use hypervisor::kvm::KvmHypervisor;
1131 /// use hypervisor::kvm::KvmVm;
1132 /// let hypervisor = KvmHypervisor::new().unwrap();
1133 /// let vm = hypervisor.create_vm_with_type_and_memory(0).unwrap();
1134 /// ```
create_vm_with_type_and_memory( &self, vm_type: u64, #[cfg(feature = "sev_snp")] _mem_size: u64, ) -> hypervisor::Result<Arc<dyn vm::Vm>>1135 fn create_vm_with_type_and_memory(
1136 &self,
1137 vm_type: u64,
1138 #[cfg(feature = "sev_snp")] _mem_size: u64,
1139 ) -> hypervisor::Result<Arc<dyn vm::Vm>> {
1140 self.create_vm_with_type(vm_type)
1141 }
1142
1143 /// Create a KVM vm object of a specific VM type and return the object as Vm trait object
1144 ///
1145 /// # Examples
1146 ///
1147 /// ```
1148 /// # use hypervisor::kvm::KvmHypervisor;
1149 /// use hypervisor::kvm::KvmVm;
1150 /// let hypervisor = KvmHypervisor::new().unwrap();
1151 /// let vm = hypervisor.create_vm_with_type(0).unwrap();
1152 /// ```
create_vm_with_type(&self, vm_type: u64) -> hypervisor::Result<Arc<dyn vm::Vm>>1153 fn create_vm_with_type(&self, vm_type: u64) -> hypervisor::Result<Arc<dyn vm::Vm>> {
1154 let fd: VmFd;
1155 loop {
1156 match self.kvm.create_vm_with_type(vm_type) {
1157 Ok(res) => fd = res,
1158 Err(e) => {
1159 if e.errno() == libc::EINTR {
1160 // If the error returned is EINTR, which means the
1161 // ioctl has been interrupted, we have to retry as
1162 // this can't be considered as a regular error.
1163 continue;
1164 } else {
1165 return Err(hypervisor::HypervisorError::VmCreate(e.into()));
1166 }
1167 }
1168 }
1169 break;
1170 }
1171
1172 let vm_fd = Arc::new(fd);
1173
1174 #[cfg(target_arch = "x86_64")]
1175 {
1176 let msr_list = self.get_msr_list()?;
1177 let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize;
1178 let mut msrs: Vec<MsrEntry> = vec![
1179 MsrEntry {
1180 ..Default::default()
1181 };
1182 num_msrs
1183 ];
1184 let indices = msr_list.as_slice();
1185 for (pos, index) in indices.iter().enumerate() {
1186 msrs[pos].index = *index;
1187 }
1188
1189 Ok(Arc::new(KvmVm {
1190 fd: vm_fd,
1191 msrs,
1192 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())),
1193 }))
1194 }
1195
1196 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
1197 {
1198 Ok(Arc::new(KvmVm {
1199 fd: vm_fd,
1200 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())),
1201 }))
1202 }
1203 }
1204
1205 /// Create a KVM vm object and return the object as Vm trait object
1206 ///
1207 /// # Examples
1208 ///
1209 /// ```
1210 /// # use hypervisor::kvm::KvmHypervisor;
1211 /// use hypervisor::kvm::KvmVm;
1212 /// let hypervisor = KvmHypervisor::new().unwrap();
1213 /// let vm = hypervisor.create_vm().unwrap();
1214 /// ```
create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>>1215 fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> {
1216 #[allow(unused_mut)]
1217 let mut vm_type: u64 = 0; // Create with default platform type
1218
1219 // When KVM supports Cap::ArmVmIPASize, it is better to get the IPA
1220 // size from the host and use that when creating the VM, which may
1221 // avoid unnecessary VM creation failures.
1222 #[cfg(target_arch = "aarch64")]
1223 if self.kvm.check_extension(Cap::ArmVmIPASize) {
1224 vm_type = self.kvm.get_host_ipa_limit().try_into().unwrap();
1225 }
1226
1227 self.create_vm_with_type(vm_type)
1228 }
1229
check_required_extensions(&self) -> hypervisor::Result<()>1230 fn check_required_extensions(&self) -> hypervisor::Result<()> {
1231 check_required_kvm_extensions(&self.kvm)
1232 .map_err(|e| hypervisor::HypervisorError::CheckExtensions(e.into()))
1233 }
1234
1235 #[cfg(target_arch = "x86_64")]
1236 ///
1237 /// X86 specific call to get the system supported CPUID values.
1238 ///
get_supported_cpuid(&self) -> hypervisor::Result<Vec<CpuIdEntry>>1239 fn get_supported_cpuid(&self) -> hypervisor::Result<Vec<CpuIdEntry>> {
1240 let kvm_cpuid = self
1241 .kvm
1242 .get_supported_cpuid(kvm_bindings::KVM_MAX_CPUID_ENTRIES)
1243 .map_err(|e| hypervisor::HypervisorError::GetCpuId(e.into()))?;
1244
1245 let v = kvm_cpuid.as_slice().iter().map(|e| (*e).into()).collect();
1246
1247 Ok(v)
1248 }
1249
1250 #[cfg(target_arch = "aarch64")]
1251 ///
1252 /// Retrieve AArch64 host maximum IPA size supported by KVM.
1253 ///
get_host_ipa_limit(&self) -> i321254 fn get_host_ipa_limit(&self) -> i32 {
1255 self.kvm.get_host_ipa_limit()
1256 }
1257
1258 ///
1259 /// Retrieve TDX capabilities
1260 ///
1261 #[cfg(feature = "tdx")]
tdx_capabilities(&self) -> hypervisor::Result<TdxCapabilities>1262 fn tdx_capabilities(&self) -> hypervisor::Result<TdxCapabilities> {
1263 let data = TdxCapabilities {
1264 nr_cpuid_configs: TDX_MAX_NR_CPUID_CONFIGS as u32,
1265 ..Default::default()
1266 };
1267
1268 tdx_command(
1269 &self.kvm.as_raw_fd(),
1270 TdxCommand::Capabilities,
1271 0,
1272 &data as *const _ as u64,
1273 )
1274 .map_err(|e| hypervisor::HypervisorError::TdxCapabilities(e.into()))?;
1275
1276 Ok(data)
1277 }
1278
1279 #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
1280 ///
1281 /// Get the number of supported hardware breakpoints
1282 ///
get_guest_debug_hw_bps(&self) -> usize1283 fn get_guest_debug_hw_bps(&self) -> usize {
1284 #[cfg(target_arch = "x86_64")]
1285 {
1286 4
1287 }
1288 #[cfg(target_arch = "aarch64")]
1289 {
1290 self.kvm.get_guest_debug_hw_bps() as usize
1291 }
1292 }
1293
1294 /// Get maximum number of vCPUs
get_max_vcpus(&self) -> u321295 fn get_max_vcpus(&self) -> u32 {
1296 self.kvm.get_max_vcpus().min(u32::MAX as usize) as u32
1297 }
1298 }
1299
1300 /// Vcpu struct for KVM
1301 pub struct KvmVcpu {
1302 fd: Arc<Mutex<VcpuFd>>,
1303 #[cfg(target_arch = "x86_64")]
1304 msrs: Vec<MsrEntry>,
1305 vm_ops: Option<Arc<dyn vm::VmOps>>,
1306 #[cfg(target_arch = "x86_64")]
1307 hyperv_synic: AtomicBool,
1308 }
1309
1310 /// Implementation of Vcpu trait for KVM
1311 ///
1312 /// # Examples
1313 ///
1314 /// ```
1315 /// # use hypervisor::kvm::KvmHypervisor;
1316 /// # use std::sync::Arc;
1317 /// let kvm = KvmHypervisor::new().unwrap();
1318 /// let hypervisor = Arc::new(kvm);
1319 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
1320 /// let vcpu = vm.create_vcpu(0, None).unwrap();
1321 /// ```
1322 impl cpu::Vcpu for KvmVcpu {
1323 ///
1324 /// Returns StandardRegisters with default value set
1325 ///
create_standard_regs(&self) -> StandardRegisters1326 fn create_standard_regs(&self) -> StandardRegisters {
1327 #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
1328 {
1329 kvm_bindings::kvm_regs::default().into()
1330 }
1331 #[cfg(target_arch = "riscv64")]
1332 {
1333 kvm_bindings::kvm_riscv_core::default().into()
1334 }
1335 }
1336 #[cfg(target_arch = "x86_64")]
1337 ///
1338 /// Returns the vCPU general purpose registers.
1339 ///
get_regs(&self) -> cpu::Result<StandardRegisters>1340 fn get_regs(&self) -> cpu::Result<StandardRegisters> {
1341 Ok(self
1342 .fd
1343 .lock()
1344 .unwrap()
1345 .get_regs()
1346 .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into()))?
1347 .into())
1348 }
1349
1350 ///
1351 /// Returns the vCPU general purpose registers.
1352 /// The `KVM_GET_REGS` ioctl is not available on AArch64, `KVM_GET_ONE_REG`
1353 /// is used to get registers one by one.
1354 ///
1355 #[cfg(target_arch = "aarch64")]
get_regs(&self) -> cpu::Result<StandardRegisters>1356 fn get_regs(&self) -> cpu::Result<StandardRegisters> {
1357 let mut state = kvm_regs::default();
1358 let mut off = offset_of!(user_pt_regs, regs);
1359 // There are 31 user_pt_regs:
1360 // https://elixir.free-electrons.com/linux/v4.14.174/source/arch/arm64/include/uapi/asm/ptrace.h#L72
1361 // These actually are the general-purpose registers of the Armv8-a
1362 // architecture (i.e x0-x30 if used as a 64bit register or w0-30 when used as a 32bit register).
1363 for i in 0..31 {
1364 let mut bytes = [0_u8; 8];
1365 self.fd
1366 .lock()
1367 .unwrap()
1368 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1369 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?;
1370 state.regs.regs[i] = u64::from_le_bytes(bytes);
1371 off += std::mem::size_of::<u64>();
1372 }
1373
1374 // We are now entering the "Other register" section of the ARMv8-a architecture.
1375 // First one, stack pointer.
1376 let off = offset_of!(user_pt_regs, sp);
1377 let mut bytes = [0_u8; 8];
1378 self.fd
1379 .lock()
1380 .unwrap()
1381 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1382 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?;
1383 state.regs.sp = u64::from_le_bytes(bytes);
1384
1385 // Second one, the program counter.
1386 let off = offset_of!(user_pt_regs, pc);
1387 let mut bytes = [0_u8; 8];
1388 self.fd
1389 .lock()
1390 .unwrap()
1391 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1392 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?;
1393 state.regs.pc = u64::from_le_bytes(bytes);
1394
1395 // Next is the processor state.
1396 let off = offset_of!(user_pt_regs, pstate);
1397 let mut bytes = [0_u8; 8];
1398 self.fd
1399 .lock()
1400 .unwrap()
1401 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1402 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?;
1403 state.regs.pstate = u64::from_le_bytes(bytes);
1404
1405 // The stack pointer associated with EL1
1406 let off = offset_of!(kvm_regs, sp_el1);
1407 let mut bytes = [0_u8; 8];
1408 self.fd
1409 .lock()
1410 .unwrap()
1411 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1412 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?;
1413 state.sp_el1 = u64::from_le_bytes(bytes);
1414
1415 // Exception Link Register for EL1, when taking an exception to EL1, this register
1416 // holds the address to which to return afterwards.
1417 let off = offset_of!(kvm_regs, elr_el1);
1418 let mut bytes = [0_u8; 8];
1419 self.fd
1420 .lock()
1421 .unwrap()
1422 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1423 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?;
1424 state.elr_el1 = u64::from_le_bytes(bytes);
1425
1426 // Saved Program Status Registers, there are 5 of them used in the kernel.
1427 let mut off = offset_of!(kvm_regs, spsr);
1428 for i in 0..KVM_NR_SPSR as usize {
1429 let mut bytes = [0_u8; 8];
1430 self.fd
1431 .lock()
1432 .unwrap()
1433 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1434 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?;
1435 state.spsr[i] = u64::from_le_bytes(bytes);
1436 off += std::mem::size_of::<u64>();
1437 }
1438
1439 // Now moving on to floating point registers which are stored in the user_fpsimd_state in the kernel:
1440 // https://elixir.free-electrons.com/linux/v4.9.62/source/arch/arm64/include/uapi/asm/kvm.h#L53
1441 let mut off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, vregs);
1442 for i in 0..32 {
1443 let mut bytes = [0_u8; 16];
1444 self.fd
1445 .lock()
1446 .unwrap()
1447 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U128, off), &mut bytes)
1448 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?;
1449 state.fp_regs.vregs[i] = u128::from_le_bytes(bytes);
1450 off += mem::size_of::<u128>();
1451 }
1452
1453 // Floating-point Status Register
1454 let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpsr);
1455 let mut bytes = [0_u8; 4];
1456 self.fd
1457 .lock()
1458 .unwrap()
1459 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off), &mut bytes)
1460 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?;
1461 state.fp_regs.fpsr = u32::from_le_bytes(bytes);
1462
1463 // Floating-point Control Register
1464 let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpcr);
1465 let mut bytes = [0_u8; 4];
1466 self.fd
1467 .lock()
1468 .unwrap()
1469 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off), &mut bytes)
1470 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?;
1471 state.fp_regs.fpcr = u32::from_le_bytes(bytes);
1472 Ok(state.into())
1473 }
1474
1475 #[cfg(target_arch = "riscv64")]
1476 ///
1477 /// Returns the RISC-V vCPU core registers.
1478 /// The `KVM_GET_REGS` ioctl is not available on RISC-V 64-bit,
1479 /// `KVM_GET_ONE_REG` is used to get registers one by one.
1480 ///
get_regs(&self) -> cpu::Result<StandardRegisters>1481 fn get_regs(&self) -> cpu::Result<StandardRegisters> {
1482 let mut state = kvm_riscv_core::default();
1483
1484 /// Macro used to extract RISC-V register data from KVM Vcpu according
1485 /// to `$reg_name` provided to `state`.
1486 macro_rules! riscv64_get_one_reg_from_vcpu {
1487 (mode) => {
1488 let off = offset_of!(kvm_riscv_core, mode);
1489 let mut bytes = [0_u8; 8];
1490 self.fd
1491 .lock()
1492 .unwrap()
1493 .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes)
1494 .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?;
1495 state.mode = u64::from_le_bytes(bytes);
1496 };
1497 ($reg_name:ident) => {
1498 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, $reg_name);
1499 let mut bytes = [0_u8; 8];
1500 self.fd
1501 .lock()
1502 .unwrap()
1503 .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes)
1504 .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?;
1505 state.regs.$reg_name = u64::from_le_bytes(bytes);
1506 };
1507 }
1508
1509 riscv64_get_one_reg_from_vcpu!(pc);
1510 riscv64_get_one_reg_from_vcpu!(ra);
1511 riscv64_get_one_reg_from_vcpu!(sp);
1512 riscv64_get_one_reg_from_vcpu!(gp);
1513 riscv64_get_one_reg_from_vcpu!(tp);
1514 riscv64_get_one_reg_from_vcpu!(t0);
1515 riscv64_get_one_reg_from_vcpu!(t1);
1516 riscv64_get_one_reg_from_vcpu!(t2);
1517 riscv64_get_one_reg_from_vcpu!(s0);
1518 riscv64_get_one_reg_from_vcpu!(s1);
1519 riscv64_get_one_reg_from_vcpu!(a0);
1520 riscv64_get_one_reg_from_vcpu!(a1);
1521 riscv64_get_one_reg_from_vcpu!(a2);
1522 riscv64_get_one_reg_from_vcpu!(a3);
1523 riscv64_get_one_reg_from_vcpu!(a4);
1524 riscv64_get_one_reg_from_vcpu!(a5);
1525 riscv64_get_one_reg_from_vcpu!(a6);
1526 riscv64_get_one_reg_from_vcpu!(a7);
1527 riscv64_get_one_reg_from_vcpu!(s2);
1528 riscv64_get_one_reg_from_vcpu!(s3);
1529 riscv64_get_one_reg_from_vcpu!(s4);
1530 riscv64_get_one_reg_from_vcpu!(s5);
1531 riscv64_get_one_reg_from_vcpu!(s6);
1532 riscv64_get_one_reg_from_vcpu!(s7);
1533 riscv64_get_one_reg_from_vcpu!(s8);
1534 riscv64_get_one_reg_from_vcpu!(s9);
1535 riscv64_get_one_reg_from_vcpu!(s10);
1536 riscv64_get_one_reg_from_vcpu!(s11);
1537 riscv64_get_one_reg_from_vcpu!(t3);
1538 riscv64_get_one_reg_from_vcpu!(t4);
1539 riscv64_get_one_reg_from_vcpu!(t5);
1540 riscv64_get_one_reg_from_vcpu!(t6);
1541 riscv64_get_one_reg_from_vcpu!(mode);
1542
1543 Ok(state.into())
1544 }
1545
1546 #[cfg(target_arch = "x86_64")]
1547 ///
1548 /// Sets the vCPU general purpose registers using the `KVM_SET_REGS` ioctl.
1549 ///
set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()>1550 fn set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()> {
1551 let regs = (*regs).into();
1552 self.fd
1553 .lock()
1554 .unwrap()
1555 .set_regs(®s)
1556 .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into()))
1557 }
1558
1559 ///
1560 /// Sets the vCPU general purpose registers.
1561 /// The `KVM_SET_REGS` ioctl is not available on AArch64, `KVM_SET_ONE_REG`
1562 /// is used to set registers one by one.
1563 ///
1564 #[cfg(target_arch = "aarch64")]
set_regs(&self, state: &StandardRegisters) -> cpu::Result<()>1565 fn set_regs(&self, state: &StandardRegisters) -> cpu::Result<()> {
1566 // The function follows the exact identical order from `state`. Look there
1567 // for some additional info on registers.
1568 let kvm_regs_state: kvm_regs = (*state).into();
1569 let mut off = offset_of!(user_pt_regs, regs);
1570 for i in 0..31 {
1571 self.fd
1572 .lock()
1573 .unwrap()
1574 .set_one_reg(
1575 arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1576 &kvm_regs_state.regs.regs[i].to_le_bytes(),
1577 )
1578 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?;
1579 off += std::mem::size_of::<u64>();
1580 }
1581
1582 let off = offset_of!(user_pt_regs, sp);
1583 self.fd
1584 .lock()
1585 .unwrap()
1586 .set_one_reg(
1587 arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1588 &kvm_regs_state.regs.sp.to_le_bytes(),
1589 )
1590 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?;
1591
1592 let off = offset_of!(user_pt_regs, pc);
1593 self.fd
1594 .lock()
1595 .unwrap()
1596 .set_one_reg(
1597 arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1598 &kvm_regs_state.regs.pc.to_le_bytes(),
1599 )
1600 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?;
1601
1602 let off = offset_of!(user_pt_regs, pstate);
1603 self.fd
1604 .lock()
1605 .unwrap()
1606 .set_one_reg(
1607 arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1608 &kvm_regs_state.regs.pstate.to_le_bytes(),
1609 )
1610 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?;
1611
1612 let off = offset_of!(kvm_regs, sp_el1);
1613 self.fd
1614 .lock()
1615 .unwrap()
1616 .set_one_reg(
1617 arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1618 &kvm_regs_state.sp_el1.to_le_bytes(),
1619 )
1620 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?;
1621
1622 let off = offset_of!(kvm_regs, elr_el1);
1623 self.fd
1624 .lock()
1625 .unwrap()
1626 .set_one_reg(
1627 arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1628 &kvm_regs_state.elr_el1.to_le_bytes(),
1629 )
1630 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?;
1631
1632 let mut off = offset_of!(kvm_regs, spsr);
1633 for i in 0..KVM_NR_SPSR as usize {
1634 self.fd
1635 .lock()
1636 .unwrap()
1637 .set_one_reg(
1638 arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1639 &kvm_regs_state.spsr[i].to_le_bytes(),
1640 )
1641 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?;
1642 off += std::mem::size_of::<u64>();
1643 }
1644
1645 let mut off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, vregs);
1646 for i in 0..32 {
1647 self.fd
1648 .lock()
1649 .unwrap()
1650 .set_one_reg(
1651 arm64_core_reg_id!(KVM_REG_SIZE_U128, off),
1652 &kvm_regs_state.fp_regs.vregs[i].to_le_bytes(),
1653 )
1654 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?;
1655 off += mem::size_of::<u128>();
1656 }
1657
1658 let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpsr);
1659 self.fd
1660 .lock()
1661 .unwrap()
1662 .set_one_reg(
1663 arm64_core_reg_id!(KVM_REG_SIZE_U32, off),
1664 &kvm_regs_state.fp_regs.fpsr.to_le_bytes(),
1665 )
1666 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?;
1667
1668 let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpcr);
1669 self.fd
1670 .lock()
1671 .unwrap()
1672 .set_one_reg(
1673 arm64_core_reg_id!(KVM_REG_SIZE_U32, off),
1674 &kvm_regs_state.fp_regs.fpcr.to_le_bytes(),
1675 )
1676 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?;
1677 Ok(())
1678 }
1679
1680 #[cfg(target_arch = "riscv64")]
1681 ///
1682 /// Sets the RISC-V vCPU core registers.
1683 /// The `KVM_SET_REGS` ioctl is not available on RISC-V 64-bit,
1684 /// `KVM_SET_ONE_REG` is used to set registers one by one.
1685 ///
set_regs(&self, state: &StandardRegisters) -> cpu::Result<()>1686 fn set_regs(&self, state: &StandardRegisters) -> cpu::Result<()> {
1687 // The function follows the exact identical order from `state`. Look there
1688 // for some additional info on registers.
1689 let kvm_regs_state: kvm_riscv_core = (*state).into();
1690
1691 /// Macro used to set value of specific RISC-V `$reg_name` stored in
1692 /// `state` to KVM Vcpu.
1693 macro_rules! riscv64_set_one_reg_to_vcpu {
1694 (mode) => {
1695 let off = offset_of!(kvm_riscv_core, mode);
1696 self.fd
1697 .lock()
1698 .unwrap()
1699 .set_one_reg(
1700 riscv64_reg_id!(KVM_REG_RISCV_CORE, off),
1701 &kvm_regs_state.mode.to_le_bytes(),
1702 )
1703 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
1704 };
1705 ($reg_name:ident) => {
1706 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, $reg_name);
1707 self.fd
1708 .lock()
1709 .unwrap()
1710 .set_one_reg(
1711 riscv64_reg_id!(KVM_REG_RISCV_CORE, off),
1712 &kvm_regs_state.regs.$reg_name.to_le_bytes(),
1713 )
1714 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
1715 };
1716 }
1717
1718 riscv64_set_one_reg_to_vcpu!(pc);
1719 riscv64_set_one_reg_to_vcpu!(ra);
1720 riscv64_set_one_reg_to_vcpu!(sp);
1721 riscv64_set_one_reg_to_vcpu!(gp);
1722 riscv64_set_one_reg_to_vcpu!(tp);
1723 riscv64_set_one_reg_to_vcpu!(t0);
1724 riscv64_set_one_reg_to_vcpu!(t1);
1725 riscv64_set_one_reg_to_vcpu!(t2);
1726 riscv64_set_one_reg_to_vcpu!(s0);
1727 riscv64_set_one_reg_to_vcpu!(s1);
1728 riscv64_set_one_reg_to_vcpu!(a0);
1729 riscv64_set_one_reg_to_vcpu!(a1);
1730 riscv64_set_one_reg_to_vcpu!(a2);
1731 riscv64_set_one_reg_to_vcpu!(a3);
1732 riscv64_set_one_reg_to_vcpu!(a4);
1733 riscv64_set_one_reg_to_vcpu!(a5);
1734 riscv64_set_one_reg_to_vcpu!(a6);
1735 riscv64_set_one_reg_to_vcpu!(a7);
1736 riscv64_set_one_reg_to_vcpu!(s2);
1737 riscv64_set_one_reg_to_vcpu!(s3);
1738 riscv64_set_one_reg_to_vcpu!(s4);
1739 riscv64_set_one_reg_to_vcpu!(s5);
1740 riscv64_set_one_reg_to_vcpu!(s6);
1741 riscv64_set_one_reg_to_vcpu!(s7);
1742 riscv64_set_one_reg_to_vcpu!(s8);
1743 riscv64_set_one_reg_to_vcpu!(s9);
1744 riscv64_set_one_reg_to_vcpu!(s10);
1745 riscv64_set_one_reg_to_vcpu!(s11);
1746 riscv64_set_one_reg_to_vcpu!(t3);
1747 riscv64_set_one_reg_to_vcpu!(t4);
1748 riscv64_set_one_reg_to_vcpu!(t5);
1749 riscv64_set_one_reg_to_vcpu!(t6);
1750 riscv64_set_one_reg_to_vcpu!(mode);
1751
1752 Ok(())
1753 }
1754
1755 #[cfg(target_arch = "x86_64")]
1756 ///
1757 /// Returns the vCPU special registers.
1758 ///
get_sregs(&self) -> cpu::Result<SpecialRegisters>1759 fn get_sregs(&self) -> cpu::Result<SpecialRegisters> {
1760 Ok(self
1761 .fd
1762 .lock()
1763 .unwrap()
1764 .get_sregs()
1765 .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into()))?
1766 .into())
1767 }
1768
1769 #[cfg(target_arch = "x86_64")]
1770 ///
1771 /// Sets the vCPU special registers using the `KVM_SET_SREGS` ioctl.
1772 ///
set_sregs(&self, sregs: &SpecialRegisters) -> cpu::Result<()>1773 fn set_sregs(&self, sregs: &SpecialRegisters) -> cpu::Result<()> {
1774 let sregs = (*sregs).into();
1775 self.fd
1776 .lock()
1777 .unwrap()
1778 .set_sregs(&sregs)
1779 .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into()))
1780 }
1781
1782 #[cfg(target_arch = "x86_64")]
1783 ///
1784 /// Returns the floating point state (FPU) from the vCPU.
1785 ///
get_fpu(&self) -> cpu::Result<FpuState>1786 fn get_fpu(&self) -> cpu::Result<FpuState> {
1787 Ok(self
1788 .fd
1789 .lock()
1790 .unwrap()
1791 .get_fpu()
1792 .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into()))?
1793 .into())
1794 }
1795
1796 #[cfg(target_arch = "x86_64")]
1797 ///
1798 /// Set the floating point state (FPU) of a vCPU using the `KVM_SET_FPU` ioctl.
1799 ///
set_fpu(&self, fpu: &FpuState) -> cpu::Result<()>1800 fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> {
1801 let fpu: kvm_bindings::kvm_fpu = (*fpu).clone().into();
1802 self.fd
1803 .lock()
1804 .unwrap()
1805 .set_fpu(&fpu)
1806 .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into()))
1807 }
1808
1809 #[cfg(target_arch = "x86_64")]
1810 ///
1811 /// X86 specific call to setup the CPUID registers.
1812 ///
set_cpuid2(&self, cpuid: &[CpuIdEntry]) -> cpu::Result<()>1813 fn set_cpuid2(&self, cpuid: &[CpuIdEntry]) -> cpu::Result<()> {
1814 let cpuid: Vec<kvm_bindings::kvm_cpuid_entry2> =
1815 cpuid.iter().map(|e| (*e).into()).collect();
1816 let kvm_cpuid = <CpuId>::from_entries(&cpuid)
1817 .map_err(|_| cpu::HypervisorCpuError::SetCpuid(anyhow!("failed to create CpuId")))?;
1818
1819 self.fd
1820 .lock()
1821 .unwrap()
1822 .set_cpuid2(&kvm_cpuid)
1823 .map_err(|e| cpu::HypervisorCpuError::SetCpuid(e.into()))
1824 }
1825
1826 #[cfg(target_arch = "x86_64")]
1827 ///
1828 /// X86 specific call to enable HyperV SynIC
1829 ///
enable_hyperv_synic(&self) -> cpu::Result<()>1830 fn enable_hyperv_synic(&self) -> cpu::Result<()> {
1831 // Update the information about Hyper-V SynIC being enabled and
1832 // emulated as it will influence later which MSRs should be saved.
1833 self.hyperv_synic.store(true, Ordering::Release);
1834
1835 let cap = kvm_enable_cap {
1836 cap: KVM_CAP_HYPERV_SYNIC,
1837 ..Default::default()
1838 };
1839 self.fd
1840 .lock()
1841 .unwrap()
1842 .enable_cap(&cap)
1843 .map_err(|e| cpu::HypervisorCpuError::EnableHyperVSyncIc(e.into()))
1844 }
1845
1846 ///
1847 /// X86 specific call to retrieve the CPUID registers.
1848 ///
1849 #[cfg(target_arch = "x86_64")]
get_cpuid2(&self, num_entries: usize) -> cpu::Result<Vec<CpuIdEntry>>1850 fn get_cpuid2(&self, num_entries: usize) -> cpu::Result<Vec<CpuIdEntry>> {
1851 let kvm_cpuid = self
1852 .fd
1853 .lock()
1854 .unwrap()
1855 .get_cpuid2(num_entries)
1856 .map_err(|e| cpu::HypervisorCpuError::GetCpuid(e.into()))?;
1857
1858 let v = kvm_cpuid.as_slice().iter().map(|e| (*e).into()).collect();
1859
1860 Ok(v)
1861 }
1862
1863 #[cfg(target_arch = "x86_64")]
1864 ///
1865 /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
1866 ///
get_lapic(&self) -> cpu::Result<LapicState>1867 fn get_lapic(&self) -> cpu::Result<LapicState> {
1868 Ok(self
1869 .fd
1870 .lock()
1871 .unwrap()
1872 .get_lapic()
1873 .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into()))?
1874 .into())
1875 }
1876
1877 #[cfg(target_arch = "x86_64")]
1878 ///
1879 /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
1880 ///
set_lapic(&self, klapic: &LapicState) -> cpu::Result<()>1881 fn set_lapic(&self, klapic: &LapicState) -> cpu::Result<()> {
1882 let klapic: kvm_bindings::kvm_lapic_state = (*klapic).clone().into();
1883 self.fd
1884 .lock()
1885 .unwrap()
1886 .set_lapic(&klapic)
1887 .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into()))
1888 }
1889
1890 #[cfg(target_arch = "x86_64")]
1891 ///
1892 /// Returns the model-specific registers (MSR) for this vCPU.
1893 ///
get_msrs(&self, msrs: &mut Vec<MsrEntry>) -> cpu::Result<usize>1894 fn get_msrs(&self, msrs: &mut Vec<MsrEntry>) -> cpu::Result<usize> {
1895 let kvm_msrs: Vec<kvm_msr_entry> = msrs.iter().map(|e| (*e).into()).collect();
1896 let mut kvm_msrs = MsrEntries::from_entries(&kvm_msrs).unwrap();
1897 let succ = self
1898 .fd
1899 .lock()
1900 .unwrap()
1901 .get_msrs(&mut kvm_msrs)
1902 .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into()))?;
1903
1904 msrs[..succ].copy_from_slice(
1905 &kvm_msrs.as_slice()[..succ]
1906 .iter()
1907 .map(|e| (*e).into())
1908 .collect::<Vec<MsrEntry>>(),
1909 );
1910
1911 Ok(succ)
1912 }
1913
1914 #[cfg(target_arch = "x86_64")]
1915 ///
1916 /// Setup the model-specific registers (MSR) for this vCPU.
1917 /// Returns the number of MSR entries actually written.
1918 ///
set_msrs(&self, msrs: &[MsrEntry]) -> cpu::Result<usize>1919 fn set_msrs(&self, msrs: &[MsrEntry]) -> cpu::Result<usize> {
1920 let kvm_msrs: Vec<kvm_msr_entry> = msrs.iter().map(|e| (*e).into()).collect();
1921 let kvm_msrs = MsrEntries::from_entries(&kvm_msrs).unwrap();
1922 self.fd
1923 .lock()
1924 .unwrap()
1925 .set_msrs(&kvm_msrs)
1926 .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into()))
1927 }
1928
1929 ///
1930 /// Returns the vcpu's current "multiprocessing state".
1931 ///
get_mp_state(&self) -> cpu::Result<MpState>1932 fn get_mp_state(&self) -> cpu::Result<MpState> {
1933 Ok(self
1934 .fd
1935 .lock()
1936 .unwrap()
1937 .get_mp_state()
1938 .map_err(|e| cpu::HypervisorCpuError::GetMpState(e.into()))?
1939 .into())
1940 }
1941
1942 ///
1943 /// Sets the vcpu's current "multiprocessing state".
1944 ///
set_mp_state(&self, mp_state: MpState) -> cpu::Result<()>1945 fn set_mp_state(&self, mp_state: MpState) -> cpu::Result<()> {
1946 self.fd
1947 .lock()
1948 .unwrap()
1949 .set_mp_state(mp_state.into())
1950 .map_err(|e| cpu::HypervisorCpuError::SetMpState(e.into()))
1951 }
1952
1953 #[cfg(target_arch = "x86_64")]
1954 ///
1955 /// Translates guest virtual address to guest physical address using the `KVM_TRANSLATE` ioctl.
1956 ///
translate_gva(&self, gva: u64, _flags: u64) -> cpu::Result<(u64, u32)>1957 fn translate_gva(&self, gva: u64, _flags: u64) -> cpu::Result<(u64, u32)> {
1958 let tr = self
1959 .fd
1960 .lock()
1961 .unwrap()
1962 .translate_gva(gva)
1963 .map_err(|e| cpu::HypervisorCpuError::TranslateVirtualAddress(e.into()))?;
1964 // tr.valid is set if the GVA is mapped to valid GPA.
1965 match tr.valid {
1966 0 => Err(cpu::HypervisorCpuError::TranslateVirtualAddress(anyhow!(
1967 "Invalid GVA: {:#x}",
1968 gva
1969 ))),
1970 _ => Ok((tr.physical_address, 0)),
1971 }
1972 }
1973
1974 ///
1975 /// Triggers the running of the current virtual CPU returning an exit reason.
1976 ///
run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError>1977 fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> {
1978 match self.fd.lock().unwrap().run() {
1979 Ok(run) => match run {
1980 #[cfg(target_arch = "x86_64")]
1981 VcpuExit::IoIn(addr, data) => {
1982 if let Some(vm_ops) = &self.vm_ops {
1983 return vm_ops
1984 .pio_read(addr.into(), data)
1985 .map(|_| cpu::VmExit::Ignore)
1986 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
1987 }
1988
1989 Ok(cpu::VmExit::Ignore)
1990 }
1991 #[cfg(target_arch = "x86_64")]
1992 VcpuExit::IoOut(addr, data) => {
1993 if let Some(vm_ops) = &self.vm_ops {
1994 return vm_ops
1995 .pio_write(addr.into(), data)
1996 .map(|_| cpu::VmExit::Ignore)
1997 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
1998 }
1999
2000 Ok(cpu::VmExit::Ignore)
2001 }
2002 #[cfg(target_arch = "x86_64")]
2003 VcpuExit::IoapicEoi(vector) => Ok(cpu::VmExit::IoapicEoi(vector)),
2004 #[cfg(target_arch = "x86_64")]
2005 VcpuExit::Shutdown | VcpuExit::Hlt => Ok(cpu::VmExit::Reset),
2006
2007 #[cfg(target_arch = "aarch64")]
2008 VcpuExit::SystemEvent(event_type, flags) => {
2009 use kvm_bindings::{KVM_SYSTEM_EVENT_RESET, KVM_SYSTEM_EVENT_SHUTDOWN};
2010 // On Aarch64, when the VM is shutdown, run() returns
2011 // VcpuExit::SystemEvent with reason KVM_SYSTEM_EVENT_SHUTDOWN
2012 if event_type == KVM_SYSTEM_EVENT_RESET {
2013 Ok(cpu::VmExit::Reset)
2014 } else if event_type == KVM_SYSTEM_EVENT_SHUTDOWN {
2015 Ok(cpu::VmExit::Shutdown)
2016 } else {
2017 Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
2018 "Unexpected system event with type 0x{:x}, flags 0x{:x?}",
2019 event_type,
2020 flags
2021 )))
2022 }
2023 }
2024
2025 VcpuExit::MmioRead(addr, data) => {
2026 if let Some(vm_ops) = &self.vm_ops {
2027 return vm_ops
2028 .mmio_read(addr, data)
2029 .map(|_| cpu::VmExit::Ignore)
2030 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
2031 }
2032
2033 Ok(cpu::VmExit::Ignore)
2034 }
2035 VcpuExit::MmioWrite(addr, data) => {
2036 if let Some(vm_ops) = &self.vm_ops {
2037 return vm_ops
2038 .mmio_write(addr, data)
2039 .map(|_| cpu::VmExit::Ignore)
2040 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
2041 }
2042
2043 Ok(cpu::VmExit::Ignore)
2044 }
2045 VcpuExit::Hyperv => Ok(cpu::VmExit::Hyperv),
2046 #[cfg(feature = "tdx")]
2047 VcpuExit::Unsupported(KVM_EXIT_TDX) => Ok(cpu::VmExit::Tdx),
2048 VcpuExit::Debug(_) => Ok(cpu::VmExit::Debug),
2049
2050 r => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
2051 "Unexpected exit reason on vcpu run: {:?}",
2052 r
2053 ))),
2054 },
2055
2056 Err(ref e) => match e.errno() {
2057 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore),
2058 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
2059 "VCPU error {:?}",
2060 e
2061 ))),
2062 },
2063 }
2064 }
2065
2066 #[cfg(target_arch = "x86_64")]
2067 ///
2068 /// Let the guest know that it has been paused, which prevents from
2069 /// potential soft lockups when being resumed.
2070 ///
notify_guest_clock_paused(&self) -> cpu::Result<()>2071 fn notify_guest_clock_paused(&self) -> cpu::Result<()> {
2072 if let Err(e) = self.fd.lock().unwrap().kvmclock_ctrl() {
2073 // Linux kernel returns -EINVAL if the PV clock isn't yet initialised
2074 // which could be because we're still in firmware or the guest doesn't
2075 // use KVM clock.
2076 if e.errno() != libc::EINVAL {
2077 return Err(cpu::HypervisorCpuError::NotifyGuestClockPaused(e.into()));
2078 }
2079 }
2080
2081 Ok(())
2082 }
2083
2084 #[cfg(not(target_arch = "riscv64"))]
2085 ///
2086 /// Sets debug registers to set hardware breakpoints and/or enable single step.
2087 ///
set_guest_debug( &self, addrs: &[vm_memory::GuestAddress], singlestep: bool, ) -> cpu::Result<()>2088 fn set_guest_debug(
2089 &self,
2090 addrs: &[vm_memory::GuestAddress],
2091 singlestep: bool,
2092 ) -> cpu::Result<()> {
2093 let mut dbg = kvm_guest_debug {
2094 #[cfg(target_arch = "x86_64")]
2095 control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP,
2096 #[cfg(target_arch = "aarch64")]
2097 control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW,
2098 ..Default::default()
2099 };
2100 if singlestep {
2101 dbg.control |= KVM_GUESTDBG_SINGLESTEP;
2102 }
2103
2104 // Set the debug registers.
2105 // Here we assume that the number of addresses do not exceed what
2106 // `Hypervisor::get_guest_debug_hw_bps()` specifies.
2107 #[cfg(target_arch = "x86_64")]
2108 {
2109 // Set bits 9 and 10.
2110 // bit 9: GE (global exact breakpoint enable) flag.
2111 // bit 10: always 1.
2112 dbg.arch.debugreg[7] = 0x0600;
2113
2114 for (i, addr) in addrs.iter().enumerate() {
2115 dbg.arch.debugreg[i] = addr.0;
2116 // Set global breakpoint enable flag
2117 dbg.arch.debugreg[7] |= 2 << (i * 2);
2118 }
2119 }
2120 #[cfg(target_arch = "aarch64")]
2121 {
2122 for (i, addr) in addrs.iter().enumerate() {
2123 // DBGBCR_EL1 (Debug Breakpoint Control Registers, D13.3.2):
2124 // bit 0: 1 (Enabled)
2125 // bit 1~2: 0b11 (PMC = EL1/EL0)
2126 // bit 5~8: 0b1111 (BAS = AArch64)
2127 // others: 0
2128 dbg.arch.dbg_bcr[i] = 0b1u64 | 0b110u64 | 0b1_1110_0000u64;
2129 // DBGBVR_EL1 (Debug Breakpoint Value Registers, D13.3.3):
2130 // bit 2~52: VA[2:52]
2131 dbg.arch.dbg_bvr[i] = (!0u64 >> 11) & addr.0;
2132 }
2133 }
2134 self.fd
2135 .lock()
2136 .unwrap()
2137 .set_guest_debug(&dbg)
2138 .map_err(|e| cpu::HypervisorCpuError::SetDebugRegs(e.into()))
2139 }
2140
2141 #[cfg(target_arch = "aarch64")]
vcpu_get_finalized_features(&self) -> i322142 fn vcpu_get_finalized_features(&self) -> i32 {
2143 kvm_bindings::KVM_ARM_VCPU_SVE as i32
2144 }
2145
2146 #[cfg(target_arch = "aarch64")]
vcpu_set_processor_features( &self, vm: &Arc<dyn crate::Vm>, kvi: &mut crate::VcpuInit, id: u8, ) -> cpu::Result<()>2147 fn vcpu_set_processor_features(
2148 &self,
2149 vm: &Arc<dyn crate::Vm>,
2150 kvi: &mut crate::VcpuInit,
2151 id: u8,
2152 ) -> cpu::Result<()> {
2153 use std::arch::is_aarch64_feature_detected;
2154 #[allow(clippy::nonminimal_bool)]
2155 let sve_supported =
2156 is_aarch64_feature_detected!("sve") || is_aarch64_feature_detected!("sve2");
2157
2158 let mut kvm_kvi: kvm_bindings::kvm_vcpu_init = (*kvi).into();
2159
2160 // We already checked that the capability is supported.
2161 kvm_kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_PSCI_0_2;
2162 if vm
2163 .as_any()
2164 .downcast_ref::<crate::kvm::KvmVm>()
2165 .unwrap()
2166 .check_extension(Cap::ArmPmuV3)
2167 {
2168 kvm_kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_PMU_V3;
2169 }
2170
2171 if sve_supported
2172 && vm
2173 .as_any()
2174 .downcast_ref::<crate::kvm::KvmVm>()
2175 .unwrap()
2176 .check_extension(Cap::ArmSve)
2177 {
2178 kvm_kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_SVE;
2179 }
2180
2181 // Non-boot cpus are powered off initially.
2182 if id > 0 {
2183 kvm_kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_POWER_OFF;
2184 }
2185
2186 *kvi = kvm_kvi.into();
2187
2188 Ok(())
2189 }
2190
2191 ///
2192 /// Return VcpuInit with default value set
2193 ///
2194 #[cfg(target_arch = "aarch64")]
create_vcpu_init(&self) -> crate::VcpuInit2195 fn create_vcpu_init(&self) -> crate::VcpuInit {
2196 kvm_bindings::kvm_vcpu_init::default().into()
2197 }
2198
2199 #[cfg(target_arch = "aarch64")]
vcpu_init(&self, kvi: &crate::VcpuInit) -> cpu::Result<()>2200 fn vcpu_init(&self, kvi: &crate::VcpuInit) -> cpu::Result<()> {
2201 let kvm_kvi: kvm_bindings::kvm_vcpu_init = (*kvi).into();
2202 self.fd
2203 .lock()
2204 .unwrap()
2205 .vcpu_init(&kvm_kvi)
2206 .map_err(|e| cpu::HypervisorCpuError::VcpuInit(e.into()))
2207 }
2208
2209 #[cfg(target_arch = "aarch64")]
vcpu_finalize(&self, feature: i32) -> cpu::Result<()>2210 fn vcpu_finalize(&self, feature: i32) -> cpu::Result<()> {
2211 self.fd
2212 .lock()
2213 .unwrap()
2214 .vcpu_finalize(&feature)
2215 .map_err(|e| cpu::HypervisorCpuError::VcpuFinalize(e.into()))
2216 }
2217
2218 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
2219 ///
2220 /// Gets a list of the guest registers that are supported for the
2221 /// KVM_GET_ONE_REG/KVM_SET_ONE_REG calls.
2222 ///
get_reg_list(&self, reg_list: &mut RegList) -> cpu::Result<()>2223 fn get_reg_list(&self, reg_list: &mut RegList) -> cpu::Result<()> {
2224 let mut kvm_reg_list: kvm_bindings::RegList = reg_list.clone().into();
2225 self.fd
2226 .lock()
2227 .unwrap()
2228 .get_reg_list(&mut kvm_reg_list)
2229 .map_err(|e: kvm_ioctls::Error| cpu::HypervisorCpuError::GetRegList(e.into()))?;
2230 *reg_list = kvm_reg_list.into();
2231 Ok(())
2232 }
2233
2234 ///
2235 /// Gets the value of a system register
2236 ///
2237 #[cfg(target_arch = "aarch64")]
get_sys_reg(&self, sys_reg: u32) -> cpu::Result<u64>2238 fn get_sys_reg(&self, sys_reg: u32) -> cpu::Result<u64> {
2239 //
2240 // Arm Architecture Reference Manual defines the encoding of
2241 // AArch64 system registers, see
2242 // https://developer.arm.com/documentation/ddi0487 (chapter D12).
2243 // While KVM defines another ID for each AArch64 system register,
2244 // which is used in calling `KVM_G/SET_ONE_REG` to access a system
2245 // register of a guest.
2246 // A mapping exists between the Arm standard encoding and the KVM ID.
2247 // This function takes the standard u32 ID as input parameter, converts
2248 // it to the corresponding KVM ID, and call `KVM_GET_ONE_REG` API to
2249 // get the value of the system parameter.
2250 //
2251 let id: u64 = KVM_REG_ARM64
2252 | KVM_REG_SIZE_U64
2253 | KVM_REG_ARM64_SYSREG as u64
2254 | ((((sys_reg) >> 5)
2255 & (KVM_REG_ARM64_SYSREG_OP0_MASK
2256 | KVM_REG_ARM64_SYSREG_OP1_MASK
2257 | KVM_REG_ARM64_SYSREG_CRN_MASK
2258 | KVM_REG_ARM64_SYSREG_CRM_MASK
2259 | KVM_REG_ARM64_SYSREG_OP2_MASK)) as u64);
2260 let mut bytes = [0_u8; 8];
2261 self.fd
2262 .lock()
2263 .unwrap()
2264 .get_one_reg(id, &mut bytes)
2265 .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?;
2266 Ok(u64::from_le_bytes(bytes))
2267 }
2268
2269 ///
2270 /// Gets the value of a non-core register
2271 ///
2272 #[cfg(target_arch = "riscv64")]
get_non_core_reg(&self, _non_core_reg: u32) -> cpu::Result<u64>2273 fn get_non_core_reg(&self, _non_core_reg: u32) -> cpu::Result<u64> {
2274 unimplemented!()
2275 }
2276
2277 ///
2278 /// Configure core registers for a given CPU.
2279 ///
2280 #[cfg(target_arch = "aarch64")]
setup_regs(&self, cpu_id: u8, boot_ip: u64, fdt_start: u64) -> cpu::Result<()>2281 fn setup_regs(&self, cpu_id: u8, boot_ip: u64, fdt_start: u64) -> cpu::Result<()> {
2282 let kreg_off = offset_of!(kvm_regs, regs);
2283
2284 // Get the register index of the PSTATE (Processor State) register.
2285 let pstate = offset_of!(user_pt_regs, pstate) + kreg_off;
2286 self.fd
2287 .lock()
2288 .unwrap()
2289 .set_one_reg(
2290 arm64_core_reg_id!(KVM_REG_SIZE_U64, pstate),
2291 ®s::PSTATE_FAULT_BITS_64.to_le_bytes(),
2292 )
2293 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?;
2294
2295 // Other vCPUs are powered off initially awaiting PSCI wakeup.
2296 if cpu_id == 0 {
2297 // Setting the PC (Processor Counter) to the current program address (kernel address).
2298 let pc = offset_of!(user_pt_regs, pc) + kreg_off;
2299 self.fd
2300 .lock()
2301 .unwrap()
2302 .set_one_reg(
2303 arm64_core_reg_id!(KVM_REG_SIZE_U64, pc),
2304 &boot_ip.to_le_bytes(),
2305 )
2306 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?;
2307
2308 // Last mandatory thing to set -> the address pointing to the FDT (also called DTB).
2309 // "The device tree blob (dtb) must be placed on an 8-byte boundary and must
2310 // not exceed 2 megabytes in size." -> https://www.kernel.org/doc/Documentation/arm64/booting.txt.
2311 // We are choosing to place it the end of DRAM. See `get_fdt_addr`.
2312 let regs0 = offset_of!(user_pt_regs, regs) + kreg_off;
2313 self.fd
2314 .lock()
2315 .unwrap()
2316 .set_one_reg(
2317 arm64_core_reg_id!(KVM_REG_SIZE_U64, regs0),
2318 &fdt_start.to_le_bytes(),
2319 )
2320 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?;
2321 }
2322 Ok(())
2323 }
2324
2325 #[cfg(target_arch = "riscv64")]
2326 ///
2327 /// Configure registers for a given RISC-V CPU.
2328 ///
setup_regs(&self, cpu_id: u8, boot_ip: u64, fdt_start: u64) -> cpu::Result<()>2329 fn setup_regs(&self, cpu_id: u8, boot_ip: u64, fdt_start: u64) -> cpu::Result<()> {
2330 // Setting the A0 () to the hartid of this CPU.
2331 let a0 = offset_of!(kvm_riscv_core, regs, user_regs_struct, a0);
2332 self.fd
2333 .lock()
2334 .unwrap()
2335 .set_one_reg(
2336 riscv64_reg_id!(KVM_REG_RISCV_CORE, a0),
2337 &u64::from(cpu_id).to_le_bytes(),
2338 )
2339 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
2340
2341 // Setting the PC (Processor Counter) to the current program address (kernel address).
2342 let pc = offset_of!(kvm_riscv_core, regs, user_regs_struct, pc);
2343 self.fd
2344 .lock()
2345 .unwrap()
2346 .set_one_reg(
2347 riscv64_reg_id!(KVM_REG_RISCV_CORE, pc),
2348 &boot_ip.to_le_bytes(),
2349 )
2350 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
2351
2352 // Last mandatory thing to set -> the address pointing to the FDT (also called DTB).
2353 // "The device tree blob (dtb) must be placed on an 8-byte boundary and must
2354 // not exceed 64 kilobytes in size." -> https://www.kernel.org/doc/Documentation/arch/riscv/boot.txt.
2355 let a1 = offset_of!(kvm_riscv_core, regs, user_regs_struct, a1);
2356 self.fd
2357 .lock()
2358 .unwrap()
2359 .set_one_reg(
2360 riscv64_reg_id!(KVM_REG_RISCV_CORE, a1),
2361 &fdt_start.to_le_bytes(),
2362 )
2363 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
2364
2365 Ok(())
2366 }
2367
2368 #[cfg(target_arch = "x86_64")]
2369 ///
2370 /// Get the current CPU state
2371 ///
2372 /// Ordering requirements:
2373 ///
2374 /// KVM_GET_MP_STATE calls kvm_apic_accept_events(), which might modify
2375 /// vCPU/LAPIC state. As such, it must be done before most everything
2376 /// else, otherwise we cannot restore everything and expect it to work.
2377 ///
2378 /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are
2379 /// still running.
2380 ///
2381 /// KVM_GET_LAPIC may change state of LAPIC before returning it.
2382 ///
2383 /// GET_VCPU_EVENTS should probably be last to save. The code looks as
2384 /// it might as well be affected by internal state modifications of the
2385 /// GET ioctls.
2386 ///
2387 /// SREGS saves/restores a pending interrupt, similar to what
2388 /// VCPU_EVENTS also does.
2389 ///
2390 /// GET_MSRS requires a prepopulated data structure to do something
2391 /// meaningful. For SET_MSRS it will then contain good data.
2392 ///
2393 /// # Example
2394 ///
2395 /// ```rust
2396 /// # use hypervisor::kvm::KvmHypervisor;
2397 /// # use std::sync::Arc;
2398 /// let kvm = KvmHypervisor::new().unwrap();
2399 /// let hv = Arc::new(kvm);
2400 /// let vm = hv.create_vm().expect("new VM fd creation failed");
2401 /// vm.enable_split_irq().unwrap();
2402 /// let vcpu = vm.create_vcpu(0, None).unwrap();
2403 /// let state = vcpu.state().unwrap();
2404 /// ```
state(&self) -> cpu::Result<CpuState>2405 fn state(&self) -> cpu::Result<CpuState> {
2406 let cpuid = self.get_cpuid2(kvm_bindings::KVM_MAX_CPUID_ENTRIES)?;
2407 let mp_state = self.get_mp_state()?.into();
2408 let regs = self.get_regs()?;
2409 let sregs = self.get_sregs()?;
2410 let xsave = self.get_xsave()?;
2411 let xcrs = self.get_xcrs()?;
2412 let lapic_state = self.get_lapic()?;
2413 let fpu = self.get_fpu()?;
2414
2415 // Try to get all MSRs based on the list previously retrieved from KVM.
2416 // If the number of MSRs obtained from GET_MSRS is different from the
2417 // expected amount, we fallback onto a slower method by getting MSRs
2418 // by chunks. This is the only way to make sure we try to get as many
2419 // MSRs as possible, even if some MSRs are not supported.
2420 let mut msr_entries = self.msrs.clone();
2421
2422 // Save extra MSRs if the Hyper-V synthetic interrupt controller is
2423 // emulated.
2424 if self.hyperv_synic.load(Ordering::Acquire) {
2425 let hyperv_synic_msrs = vec![
2426 0x40000020, 0x40000021, 0x40000080, 0x40000081, 0x40000082, 0x40000083, 0x40000084,
2427 0x40000090, 0x40000091, 0x40000092, 0x40000093, 0x40000094, 0x40000095, 0x40000096,
2428 0x40000097, 0x40000098, 0x40000099, 0x4000009a, 0x4000009b, 0x4000009c, 0x4000009d,
2429 0x4000009e, 0x4000009f, 0x400000b0, 0x400000b1, 0x400000b2, 0x400000b3, 0x400000b4,
2430 0x400000b5, 0x400000b6, 0x400000b7,
2431 ];
2432 for index in hyperv_synic_msrs {
2433 let msr = kvm_msr_entry {
2434 index,
2435 ..Default::default()
2436 };
2437 msr_entries.push(msr.into());
2438 }
2439 }
2440
2441 let expected_num_msrs = msr_entries.len();
2442 let num_msrs = self.get_msrs(&mut msr_entries)?;
2443 let msrs = if num_msrs != expected_num_msrs {
2444 let mut faulty_msr_index = num_msrs;
2445 let mut msr_entries_tmp = msr_entries[..faulty_msr_index].to_vec();
2446
2447 loop {
2448 warn!(
2449 "Detected faulty MSR 0x{:x} while getting MSRs",
2450 msr_entries[faulty_msr_index].index
2451 );
2452
2453 // Skip the first bad MSR
2454 let start_pos = faulty_msr_index + 1;
2455
2456 let mut sub_msr_entries = msr_entries[start_pos..].to_vec();
2457 let num_msrs = self.get_msrs(&mut sub_msr_entries)?;
2458
2459 msr_entries_tmp.extend(&sub_msr_entries[..num_msrs]);
2460
2461 if num_msrs == sub_msr_entries.len() {
2462 break;
2463 }
2464
2465 faulty_msr_index = start_pos + num_msrs;
2466 }
2467
2468 msr_entries_tmp
2469 } else {
2470 msr_entries
2471 };
2472
2473 let vcpu_events = self.get_vcpu_events()?;
2474 let tsc_khz = self.tsc_khz()?;
2475
2476 Ok(VcpuKvmState {
2477 cpuid,
2478 msrs,
2479 vcpu_events,
2480 regs: regs.into(),
2481 sregs: sregs.into(),
2482 fpu,
2483 lapic_state,
2484 xsave,
2485 xcrs,
2486 mp_state,
2487 tsc_khz,
2488 }
2489 .into())
2490 }
2491
2492 ///
2493 /// Get the current AArch64 CPU state
2494 ///
2495 #[cfg(target_arch = "aarch64")]
state(&self) -> cpu::Result<CpuState>2496 fn state(&self) -> cpu::Result<CpuState> {
2497 let mut state = VcpuKvmState {
2498 mp_state: self.get_mp_state()?.into(),
2499 ..Default::default()
2500 };
2501 // Get core registers
2502 state.core_regs = self.get_regs()?.into();
2503
2504 // Get systerm register
2505 // Call KVM_GET_REG_LIST to get all registers available to the guest.
2506 // For ArmV8 there are around 500 registers.
2507 let mut sys_regs: Vec<kvm_bindings::kvm_one_reg> = Vec::new();
2508 let mut reg_list = kvm_bindings::RegList::new(500).unwrap();
2509 self.fd
2510 .lock()
2511 .unwrap()
2512 .get_reg_list(&mut reg_list)
2513 .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into()))?;
2514
2515 // At this point reg_list should contain: core registers and system
2516 // registers.
2517 // The register list contains the number of registers and their ids. We
2518 // will be needing to call KVM_GET_ONE_REG on each id in order to save
2519 // all of them. We carve out from the list the core registers which are
2520 // represented in the kernel by kvm_regs structure and for which we can
2521 // calculate the id based on the offset in the structure.
2522 reg_list.retain(|regid| is_system_register(*regid));
2523
2524 // Now, for the rest of the registers left in the previously fetched
2525 // register list, we are simply calling KVM_GET_ONE_REG.
2526 let indices = reg_list.as_slice();
2527 for index in indices.iter() {
2528 let mut bytes = [0_u8; 8];
2529 self.fd
2530 .lock()
2531 .unwrap()
2532 .get_one_reg(*index, &mut bytes)
2533 .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?;
2534 sys_regs.push(kvm_bindings::kvm_one_reg {
2535 id: *index,
2536 addr: u64::from_le_bytes(bytes),
2537 });
2538 }
2539
2540 state.sys_regs = sys_regs;
2541
2542 Ok(state.into())
2543 }
2544
2545 #[cfg(target_arch = "riscv64")]
2546 ///
2547 /// Get the current RISC-V 64-bit CPU state
2548 ///
state(&self) -> cpu::Result<CpuState>2549 fn state(&self) -> cpu::Result<CpuState> {
2550 let mut state = VcpuKvmState {
2551 mp_state: self.get_mp_state()?.into(),
2552 ..Default::default()
2553 };
2554 // Get core registers
2555 state.core_regs = self.get_regs()?.into();
2556
2557 // Get non-core register
2558 // Call KVM_GET_REG_LIST to get all registers available to the guest.
2559 // For RISC-V 64-bit there are around 200 registers.
2560 let mut sys_regs: Vec<kvm_bindings::kvm_one_reg> = Vec::new();
2561 let mut reg_list = kvm_bindings::RegList::new(200).unwrap();
2562 self.fd
2563 .lock()
2564 .unwrap()
2565 .get_reg_list(&mut reg_list)
2566 .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into()))?;
2567
2568 // At this point reg_list should contain:
2569 // - core registers
2570 // - config registers
2571 // - timer registers
2572 // - control and status registers
2573 // - AIA control and status registers
2574 // - smstateen control and status registers
2575 // - sbi_sta control and status registers.
2576 //
2577 // The register list contains the number of registers and their ids. We
2578 // will be needing to call KVM_GET_ONE_REG on each id in order to save
2579 // all of them. We carve out from the list the core registers which are
2580 // represented in the kernel by `kvm_riscv_core` structure and for which
2581 // we can calculate the id based on the offset in the structure.
2582 reg_list.retain(|regid| is_non_core_register(*regid));
2583
2584 // Now, for the rest of the registers left in the previously fetched
2585 // register list, we are simply calling KVM_GET_ONE_REG.
2586 let indices = reg_list.as_slice();
2587 for index in indices.iter() {
2588 let mut bytes = [0_u8; 8];
2589 self.fd
2590 .lock()
2591 .unwrap()
2592 .get_one_reg(*index, &mut bytes)
2593 .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?;
2594 sys_regs.push(kvm_bindings::kvm_one_reg {
2595 id: *index,
2596 addr: u64::from_le_bytes(bytes),
2597 });
2598 }
2599
2600 state.non_core_regs = sys_regs;
2601
2602 Ok(state.into())
2603 }
2604
2605 #[cfg(target_arch = "x86_64")]
2606 ///
2607 /// Restore the previously saved CPU state
2608 ///
2609 /// Ordering requirements:
2610 ///
2611 /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are
2612 /// still running.
2613 ///
2614 /// Some SET ioctls (like set_mp_state) depend on kvm_vcpu_is_bsp(), so
2615 /// if we ever change the BSP, we have to do that before restoring anything.
2616 /// The same seems to be true for CPUID stuff.
2617 ///
2618 /// SREGS saves/restores a pending interrupt, similar to what
2619 /// VCPU_EVENTS also does.
2620 ///
2621 /// SET_REGS clears pending exceptions unconditionally, thus, it must be
2622 /// done before SET_VCPU_EVENTS, which restores it.
2623 ///
2624 /// SET_LAPIC must come after SET_SREGS, because the latter restores
2625 /// the apic base msr.
2626 ///
2627 /// SET_LAPIC must come before SET_MSRS, because the TSC deadline MSR
2628 /// only restores successfully, when the LAPIC is correctly configured.
2629 ///
2630 /// Arguments: CpuState
2631 /// # Example
2632 ///
2633 /// ```rust
2634 /// # use hypervisor::kvm::KvmHypervisor;
2635 /// # use std::sync::Arc;
2636 /// let kvm = KvmHypervisor::new().unwrap();
2637 /// let hv = Arc::new(kvm);
2638 /// let vm = hv.create_vm().expect("new VM fd creation failed");
2639 /// vm.enable_split_irq().unwrap();
2640 /// let vcpu = vm.create_vcpu(0, None).unwrap();
2641 /// let state = vcpu.state().unwrap();
2642 /// vcpu.set_state(&state).unwrap();
2643 /// ```
set_state(&self, state: &CpuState) -> cpu::Result<()>2644 fn set_state(&self, state: &CpuState) -> cpu::Result<()> {
2645 let state: VcpuKvmState = state.clone().into();
2646 self.set_cpuid2(&state.cpuid)?;
2647 self.set_mp_state(state.mp_state.into())?;
2648 self.set_regs(&state.regs.into())?;
2649 self.set_sregs(&state.sregs.into())?;
2650 self.set_xsave(&state.xsave)?;
2651 self.set_xcrs(&state.xcrs)?;
2652 self.set_lapic(&state.lapic_state)?;
2653 self.set_fpu(&state.fpu)?;
2654
2655 if let Some(freq) = state.tsc_khz {
2656 self.set_tsc_khz(freq)?;
2657 }
2658
2659 // Try to set all MSRs previously stored.
2660 // If the number of MSRs set from SET_MSRS is different from the
2661 // expected amount, we fallback onto a slower method by setting MSRs
2662 // by chunks. This is the only way to make sure we try to set as many
2663 // MSRs as possible, even if some MSRs are not supported.
2664 let expected_num_msrs = state.msrs.len();
2665 let num_msrs = self.set_msrs(&state.msrs)?;
2666 if num_msrs != expected_num_msrs {
2667 let mut faulty_msr_index = num_msrs;
2668
2669 loop {
2670 warn!(
2671 "Detected faulty MSR 0x{:x} while setting MSRs",
2672 state.msrs[faulty_msr_index].index
2673 );
2674
2675 // Skip the first bad MSR
2676 let start_pos = faulty_msr_index + 1;
2677
2678 let sub_msr_entries = state.msrs[start_pos..].to_vec();
2679
2680 let num_msrs = self.set_msrs(&sub_msr_entries)?;
2681
2682 if num_msrs == sub_msr_entries.len() {
2683 break;
2684 }
2685
2686 faulty_msr_index = start_pos + num_msrs;
2687 }
2688 }
2689
2690 self.set_vcpu_events(&state.vcpu_events)?;
2691
2692 Ok(())
2693 }
2694
2695 ///
2696 /// Restore the previously saved AArch64 CPU state
2697 ///
2698 #[cfg(target_arch = "aarch64")]
set_state(&self, state: &CpuState) -> cpu::Result<()>2699 fn set_state(&self, state: &CpuState) -> cpu::Result<()> {
2700 let state: VcpuKvmState = state.clone().into();
2701 // Set core registers
2702 self.set_regs(&state.core_regs.into())?;
2703 // Set system registers
2704 for reg in &state.sys_regs {
2705 self.fd
2706 .lock()
2707 .unwrap()
2708 .set_one_reg(reg.id, ®.addr.to_le_bytes())
2709 .map_err(|e| cpu::HypervisorCpuError::SetSysRegister(e.into()))?;
2710 }
2711
2712 self.set_mp_state(state.mp_state.into())?;
2713
2714 Ok(())
2715 }
2716
2717 #[cfg(target_arch = "riscv64")]
2718 ///
2719 /// Restore the previously saved RISC-V 64-bit CPU state
2720 ///
set_state(&self, state: &CpuState) -> cpu::Result<()>2721 fn set_state(&self, state: &CpuState) -> cpu::Result<()> {
2722 let state: VcpuKvmState = state.clone().into();
2723 // Set core registers
2724 self.set_regs(&state.core_regs.into())?;
2725 // Set system registers
2726 for reg in &state.non_core_regs {
2727 self.fd
2728 .lock()
2729 .unwrap()
2730 .set_one_reg(reg.id, ®.addr.to_le_bytes())
2731 .map_err(|e| cpu::HypervisorCpuError::SetSysRegister(e.into()))?;
2732 }
2733
2734 self.set_mp_state(state.mp_state.into())?;
2735
2736 Ok(())
2737 }
2738
2739 ///
2740 /// Initialize TDX for this CPU
2741 ///
2742 #[cfg(feature = "tdx")]
tdx_init(&self, hob_address: u64) -> cpu::Result<()>2743 fn tdx_init(&self, hob_address: u64) -> cpu::Result<()> {
2744 tdx_command(
2745 &self.fd.lock().unwrap().as_raw_fd(),
2746 TdxCommand::InitVcpu,
2747 0,
2748 hob_address,
2749 )
2750 .map_err(cpu::HypervisorCpuError::InitializeTdx)
2751 }
2752
2753 ///
2754 /// Set the "immediate_exit" state
2755 ///
set_immediate_exit(&self, exit: bool)2756 fn set_immediate_exit(&self, exit: bool) {
2757 self.fd.lock().unwrap().set_kvm_immediate_exit(exit.into());
2758 }
2759
2760 ///
2761 /// Returns the details about TDX exit reason
2762 ///
2763 #[cfg(feature = "tdx")]
get_tdx_exit_details(&mut self) -> cpu::Result<TdxExitDetails>2764 fn get_tdx_exit_details(&mut self) -> cpu::Result<TdxExitDetails> {
2765 let mut fd = self.fd.as_ref().lock().unwrap();
2766 let kvm_run = fd.get_kvm_run();
2767 // SAFETY: accessing a union field in a valid structure
2768 let tdx_vmcall = unsafe {
2769 &mut (*((&mut kvm_run.__bindgen_anon_1) as *mut kvm_run__bindgen_ty_1
2770 as *mut KvmTdxExit))
2771 .u
2772 .vmcall
2773 };
2774
2775 tdx_vmcall.status_code = TDG_VP_VMCALL_INVALID_OPERAND;
2776
2777 if tdx_vmcall.type_ != 0 {
2778 return Err(cpu::HypervisorCpuError::UnknownTdxVmCall);
2779 }
2780
2781 match tdx_vmcall.subfunction {
2782 TDG_VP_VMCALL_GET_QUOTE => Ok(TdxExitDetails::GetQuote),
2783 TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT => {
2784 Ok(TdxExitDetails::SetupEventNotifyInterrupt)
2785 }
2786 _ => Err(cpu::HypervisorCpuError::UnknownTdxVmCall),
2787 }
2788 }
2789
2790 ///
2791 /// Set the status code for TDX exit
2792 ///
2793 #[cfg(feature = "tdx")]
set_tdx_status(&mut self, status: TdxExitStatus)2794 fn set_tdx_status(&mut self, status: TdxExitStatus) {
2795 let mut fd = self.fd.as_ref().lock().unwrap();
2796 let kvm_run = fd.get_kvm_run();
2797 // SAFETY: accessing a union field in a valid structure
2798 let tdx_vmcall = unsafe {
2799 &mut (*((&mut kvm_run.__bindgen_anon_1) as *mut kvm_run__bindgen_ty_1
2800 as *mut KvmTdxExit))
2801 .u
2802 .vmcall
2803 };
2804
2805 tdx_vmcall.status_code = match status {
2806 TdxExitStatus::Success => TDG_VP_VMCALL_SUCCESS,
2807 TdxExitStatus::InvalidOperand => TDG_VP_VMCALL_INVALID_OPERAND,
2808 };
2809 }
2810
2811 #[cfg(target_arch = "x86_64")]
2812 ///
2813 /// Return the list of initial MSR entries for a VCPU
2814 ///
boot_msr_entries(&self) -> Vec<MsrEntry>2815 fn boot_msr_entries(&self) -> Vec<MsrEntry> {
2816 use crate::arch::x86::{msr_index, MTRR_ENABLE, MTRR_MEM_TYPE_WB};
2817
2818 [
2819 msr!(msr_index::MSR_IA32_SYSENTER_CS),
2820 msr!(msr_index::MSR_IA32_SYSENTER_ESP),
2821 msr!(msr_index::MSR_IA32_SYSENTER_EIP),
2822 msr!(msr_index::MSR_STAR),
2823 msr!(msr_index::MSR_CSTAR),
2824 msr!(msr_index::MSR_LSTAR),
2825 msr!(msr_index::MSR_KERNEL_GS_BASE),
2826 msr!(msr_index::MSR_SYSCALL_MASK),
2827 msr!(msr_index::MSR_IA32_TSC),
2828 msr_data!(
2829 msr_index::MSR_IA32_MISC_ENABLE,
2830 msr_index::MSR_IA32_MISC_ENABLE_FAST_STRING as u64
2831 ),
2832 msr_data!(msr_index::MSR_MTRRdefType, MTRR_ENABLE | MTRR_MEM_TYPE_WB),
2833 ]
2834 .to_vec()
2835 }
2836
2837 #[cfg(target_arch = "aarch64")]
has_pmu_support(&self) -> bool2838 fn has_pmu_support(&self) -> bool {
2839 let cpu_attr = kvm_bindings::kvm_device_attr {
2840 group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL,
2841 attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_INIT),
2842 addr: 0x0,
2843 flags: 0,
2844 };
2845 self.fd.lock().unwrap().has_device_attr(&cpu_attr).is_ok()
2846 }
2847
2848 #[cfg(target_arch = "aarch64")]
init_pmu(&self, irq: u32) -> cpu::Result<()>2849 fn init_pmu(&self, irq: u32) -> cpu::Result<()> {
2850 let cpu_attr = kvm_bindings::kvm_device_attr {
2851 group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL,
2852 attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_INIT),
2853 addr: 0x0,
2854 flags: 0,
2855 };
2856 let cpu_attr_irq = kvm_bindings::kvm_device_attr {
2857 group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL,
2858 attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_IRQ),
2859 addr: &irq as *const u32 as u64,
2860 flags: 0,
2861 };
2862 self.fd
2863 .lock()
2864 .unwrap()
2865 .set_device_attr(&cpu_attr_irq)
2866 .map_err(|_| cpu::HypervisorCpuError::InitializePmu)?;
2867 self.fd
2868 .lock()
2869 .unwrap()
2870 .set_device_attr(&cpu_attr)
2871 .map_err(|_| cpu::HypervisorCpuError::InitializePmu)
2872 }
2873
2874 #[cfg(target_arch = "x86_64")]
2875 ///
2876 /// Get the frequency of the TSC if available
2877 ///
tsc_khz(&self) -> cpu::Result<Option<u32>>2878 fn tsc_khz(&self) -> cpu::Result<Option<u32>> {
2879 match self.fd.lock().unwrap().get_tsc_khz() {
2880 Err(e) => {
2881 if e.errno() == libc::EIO {
2882 Ok(None)
2883 } else {
2884 Err(cpu::HypervisorCpuError::GetTscKhz(e.into()))
2885 }
2886 }
2887 Ok(v) => Ok(Some(v)),
2888 }
2889 }
2890
2891 #[cfg(target_arch = "x86_64")]
2892 ///
2893 /// Set the frequency of the TSC if available
2894 ///
set_tsc_khz(&self, freq: u32) -> cpu::Result<()>2895 fn set_tsc_khz(&self, freq: u32) -> cpu::Result<()> {
2896 match self.fd.lock().unwrap().set_tsc_khz(freq) {
2897 Err(e) => {
2898 if e.errno() == libc::EIO {
2899 Ok(())
2900 } else {
2901 Err(cpu::HypervisorCpuError::SetTscKhz(e.into()))
2902 }
2903 }
2904 Ok(_) => Ok(()),
2905 }
2906 }
2907
2908 #[cfg(target_arch = "x86_64")]
2909 ///
2910 /// Trigger NMI interrupt
2911 ///
nmi(&self) -> cpu::Result<()>2912 fn nmi(&self) -> cpu::Result<()> {
2913 match self.fd.lock().unwrap().nmi() {
2914 Err(e) => {
2915 if e.errno() == libc::EIO {
2916 Ok(())
2917 } else {
2918 Err(cpu::HypervisorCpuError::Nmi(e.into()))
2919 }
2920 }
2921 Ok(_) => Ok(()),
2922 }
2923 }
2924 }
2925
2926 impl KvmVcpu {
2927 #[cfg(target_arch = "x86_64")]
2928 ///
2929 /// X86 specific call that returns the vcpu's current "xsave struct".
2930 ///
get_xsave(&self) -> cpu::Result<XsaveState>2931 fn get_xsave(&self) -> cpu::Result<XsaveState> {
2932 Ok(self
2933 .fd
2934 .lock()
2935 .unwrap()
2936 .get_xsave()
2937 .map_err(|e| cpu::HypervisorCpuError::GetXsaveState(e.into()))?
2938 .into())
2939 }
2940
2941 #[cfg(target_arch = "x86_64")]
2942 ///
2943 /// X86 specific call that sets the vcpu's current "xsave struct".
2944 ///
set_xsave(&self, xsave: &XsaveState) -> cpu::Result<()>2945 fn set_xsave(&self, xsave: &XsaveState) -> cpu::Result<()> {
2946 let xsave: kvm_bindings::kvm_xsave = (*xsave).clone().into();
2947 // SAFETY: Here we trust the kernel not to read past the end of the kvm_xsave struct
2948 // when calling the kvm-ioctl library function.
2949 unsafe {
2950 self.fd
2951 .lock()
2952 .unwrap()
2953 .set_xsave(&xsave)
2954 .map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into()))
2955 }
2956 }
2957
2958 #[cfg(target_arch = "x86_64")]
2959 ///
2960 /// X86 specific call that returns the vcpu's current "xcrs".
2961 ///
get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters>2962 fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> {
2963 self.fd
2964 .lock()
2965 .unwrap()
2966 .get_xcrs()
2967 .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into()))
2968 }
2969
2970 #[cfg(target_arch = "x86_64")]
2971 ///
2972 /// X86 specific call that sets the vcpu's current "xcrs".
2973 ///
set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()>2974 fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> {
2975 self.fd
2976 .lock()
2977 .unwrap()
2978 .set_xcrs(xcrs)
2979 .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into()))
2980 }
2981
2982 #[cfg(target_arch = "x86_64")]
2983 ///
2984 /// Returns currently pending exceptions, interrupts, and NMIs as well as related
2985 /// states of the vcpu.
2986 ///
get_vcpu_events(&self) -> cpu::Result<VcpuEvents>2987 fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> {
2988 self.fd
2989 .lock()
2990 .unwrap()
2991 .get_vcpu_events()
2992 .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into()))
2993 }
2994
2995 #[cfg(target_arch = "x86_64")]
2996 ///
2997 /// Sets pending exceptions, interrupts, and NMIs as well as related states
2998 /// of the vcpu.
2999 ///
set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()>3000 fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> {
3001 self.fd
3002 .lock()
3003 .unwrap()
3004 .set_vcpu_events(events)
3005 .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into()))
3006 }
3007 }
3008
3009 #[cfg(test)]
3010 mod tests {
3011 #[test]
3012 #[cfg(target_arch = "riscv64")]
test_get_and_set_regs()3013 fn test_get_and_set_regs() {
3014 use super::*;
3015
3016 let kvm = KvmHypervisor::new().unwrap();
3017 let hypervisor = Arc::new(kvm);
3018 let vm = hypervisor.create_vm().expect("new VM fd creation failed");
3019 let vcpu0 = vm.create_vcpu(0, None).unwrap();
3020
3021 let core_regs = StandardRegisters::from(kvm_riscv_core {
3022 regs: user_regs_struct {
3023 pc: 0x00,
3024 ra: 0x01,
3025 sp: 0x02,
3026 gp: 0x03,
3027 tp: 0x04,
3028 t0: 0x05,
3029 t1: 0x06,
3030 t2: 0x07,
3031 s0: 0x08,
3032 s1: 0x09,
3033 a0: 0x0a,
3034 a1: 0x0b,
3035 a2: 0x0c,
3036 a3: 0x0d,
3037 a4: 0x0e,
3038 a5: 0x0f,
3039 a6: 0x10,
3040 a7: 0x11,
3041 s2: 0x12,
3042 s3: 0x13,
3043 s4: 0x14,
3044 s5: 0x15,
3045 s6: 0x16,
3046 s7: 0x17,
3047 s8: 0x18,
3048 s9: 0x19,
3049 s10: 0x1a,
3050 s11: 0x1b,
3051 t3: 0x1c,
3052 t4: 0x1d,
3053 t5: 0x1e,
3054 t6: 0x1f,
3055 },
3056 mode: 0x00,
3057 });
3058
3059 vcpu0.set_regs(&core_regs).unwrap();
3060 assert_eq!(vcpu0.get_regs().unwrap(), core_regs);
3061 }
3062 }
3063