1 // Copyright © 2019 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause 4 // 5 // Copyright © 2020, Microsoft Corporation 6 // 7 // Copyright 2018-2019 CrowdStrike, Inc. 8 // 9 // 10 11 #[cfg(target_arch = "aarch64")] 12 pub use crate::aarch64::{ 13 check_required_kvm_extensions, is_system_register, VcpuInit, VcpuKvmState as CpuState, 14 MPIDR_EL1, 15 }; 16 use crate::cpu; 17 use crate::device; 18 use crate::hypervisor; 19 use crate::vec_with_array_field; 20 use crate::vm::{self, VmmOps}; 21 #[cfg(target_arch = "aarch64")] 22 use crate::{arm64_core_reg_id, offset__of}; 23 use kvm_ioctls::{NoDatamatch, VcpuFd, VmFd}; 24 use serde_derive::{Deserialize, Serialize}; 25 use std::os::unix::io::{AsRawFd, RawFd}; 26 use std::result; 27 #[cfg(target_arch = "x86_64")] 28 use std::sync::atomic::{AtomicBool, Ordering}; 29 use std::sync::Arc; 30 #[cfg(target_arch = "x86_64")] 31 use vm_memory::Address; 32 use vmm_sys_util::eventfd::EventFd; 33 // x86_64 dependencies 34 #[cfg(target_arch = "x86_64")] 35 pub mod x86_64; 36 #[cfg(target_arch = "x86_64")] 37 use crate::arch::x86::NUM_IOAPIC_PINS; 38 #[cfg(target_arch = "aarch64")] 39 use aarch64::{RegList, Register, StandardRegisters}; 40 #[cfg(target_arch = "x86_64")] 41 use kvm_bindings::{ 42 kvm_enable_cap, kvm_msr_entry, MsrList, KVM_CAP_HYPERV_SYNIC, KVM_CAP_SPLIT_IRQCHIP, 43 }; 44 #[cfg(target_arch = "x86_64")] 45 use x86_64::{ 46 check_required_kvm_extensions, FpuState, SpecialRegisters, StandardRegisters, KVM_TSS_ADDRESS, 47 }; 48 #[cfg(target_arch = "x86_64")] 49 pub use x86_64::{ 50 CpuId, CpuIdEntry, ExtendedControlRegisters, LapicState, MsrEntries, VcpuKvmState as CpuState, 51 Xsave, CPUID_FLAG_VALID_INDEX, 52 }; 53 // aarch64 dependencies 54 #[cfg(target_arch = "aarch64")] 55 pub mod aarch64; 56 pub use kvm_bindings; 57 #[cfg(feature = "tdx")] 58 use kvm_bindings::KVMIO; 59 pub use kvm_bindings::{ 60 kvm_create_device, kvm_device_type_KVM_DEV_TYPE_VFIO, kvm_irq_routing, kvm_irq_routing_entry, 61 kvm_userspace_memory_region, KVM_IRQ_ROUTING_IRQCHIP, KVM_IRQ_ROUTING_MSI, 62 KVM_MEM_LOG_DIRTY_PAGES, KVM_MEM_READONLY, KVM_MSI_VALID_DEVID, 63 }; 64 #[cfg(target_arch = "aarch64")] 65 use kvm_bindings::{ 66 kvm_regs, user_fpsimd_state, user_pt_regs, KVM_NR_SPSR, KVM_REG_ARM64, KVM_REG_ARM_CORE, 67 KVM_REG_SIZE_U128, KVM_REG_SIZE_U32, KVM_REG_SIZE_U64, 68 }; 69 pub use kvm_ioctls; 70 pub use kvm_ioctls::{Cap, Kvm}; 71 #[cfg(target_arch = "aarch64")] 72 use std::mem; 73 #[cfg(feature = "tdx")] 74 use vmm_sys_util::{ioctl::ioctl_with_val, ioctl_expr, ioctl_ioc_nr, ioctl_iowr_nr}; 75 76 /// 77 /// Export generically-named wrappers of kvm-bindings for Unix-based platforms 78 /// 79 pub use { 80 kvm_bindings::kvm_clock_data as ClockData, kvm_bindings::kvm_create_device as CreateDevice, 81 kvm_bindings::kvm_device_attr as DeviceAttr, 82 kvm_bindings::kvm_irq_routing_entry as IrqRoutingEntry, kvm_bindings::kvm_mp_state as MpState, 83 kvm_bindings::kvm_userspace_memory_region as MemoryRegion, 84 kvm_bindings::kvm_vcpu_events as VcpuEvents, kvm_ioctls::DeviceFd, kvm_ioctls::IoEventAddress, 85 kvm_ioctls::VcpuExit, 86 }; 87 88 #[cfg(feature = "tdx")] 89 ioctl_iowr_nr!(KVM_MEMORY_ENCRYPT_OP, KVMIO, 0xba, std::os::raw::c_ulong); 90 91 #[cfg(feature = "tdx")] 92 #[repr(u32)] 93 enum TdxCommand { 94 #[allow(dead_code)] 95 Capabilities = 0, 96 InitVm, 97 InitVcpu, 98 InitMemRegion, 99 Finalize, 100 } 101 102 #[derive(Clone, Copy, Debug, PartialEq, Deserialize, Serialize)] 103 pub struct KvmVmState {} 104 105 pub use KvmVmState as VmState; 106 /// Wrapper over KVM VM ioctls. 107 pub struct KvmVm { 108 fd: Arc<VmFd>, 109 #[cfg(target_arch = "x86_64")] 110 msrs: MsrEntries, 111 state: KvmVmState, 112 } 113 114 /// 115 /// Implementation of Vm trait for KVM 116 /// Example: 117 /// #[cfg(feature = "kvm")] 118 /// extern crate hypervisor 119 /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap(); 120 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm); 121 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 122 /// vm.set/get().unwrap() 123 /// 124 impl vm::Vm for KvmVm { 125 #[cfg(target_arch = "x86_64")] 126 /// 127 /// Sets the address of the three-page region in the VM's address space. 128 /// 129 fn set_tss_address(&self, offset: usize) -> vm::Result<()> { 130 self.fd 131 .set_tss_address(offset) 132 .map_err(|e| vm::HypervisorVmError::SetTssAddress(e.into())) 133 } 134 /// 135 /// Creates an in-kernel interrupt controller. 136 /// 137 fn create_irq_chip(&self) -> vm::Result<()> { 138 self.fd 139 .create_irq_chip() 140 .map_err(|e| vm::HypervisorVmError::CreateIrq(e.into())) 141 } 142 /// 143 /// Registers an event that will, when signaled, trigger the `gsi` IRQ. 144 /// 145 fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 146 self.fd 147 .register_irqfd(fd, gsi) 148 .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into())) 149 } 150 /// 151 /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ. 152 /// 153 fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 154 self.fd 155 .unregister_irqfd(fd, gsi) 156 .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into())) 157 } 158 /// 159 /// Creates a VcpuFd object from a vcpu RawFd. 160 /// 161 fn create_vcpu( 162 &self, 163 id: u8, 164 vmmops: Option<Arc<Box<dyn VmmOps>>>, 165 ) -> vm::Result<Arc<dyn cpu::Vcpu>> { 166 let vc = self 167 .fd 168 .create_vcpu(id as u64) 169 .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?; 170 let vcpu = KvmVcpu { 171 fd: vc, 172 #[cfg(target_arch = "x86_64")] 173 msrs: self.msrs.clone(), 174 vmmops, 175 #[cfg(target_arch = "x86_64")] 176 hyperv_synic: AtomicBool::new(false), 177 }; 178 Ok(Arc::new(vcpu)) 179 } 180 /// 181 /// Registers an event to be signaled whenever a certain address is written to. 182 /// 183 fn register_ioevent( 184 &self, 185 fd: &EventFd, 186 addr: &IoEventAddress, 187 datamatch: Option<vm::DataMatch>, 188 ) -> vm::Result<()> { 189 if let Some(dm) = datamatch { 190 match dm { 191 vm::DataMatch::DataMatch32(kvm_dm32) => self 192 .fd 193 .register_ioevent(fd, addr, kvm_dm32) 194 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 195 vm::DataMatch::DataMatch64(kvm_dm64) => self 196 .fd 197 .register_ioevent(fd, addr, kvm_dm64) 198 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 199 } 200 } else { 201 self.fd 202 .register_ioevent(fd, addr, NoDatamatch) 203 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())) 204 } 205 } 206 /// 207 /// Unregisters an event from a certain address it has been previously registered to. 208 /// 209 fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> { 210 self.fd 211 .unregister_ioevent(fd, addr, NoDatamatch) 212 .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into())) 213 } 214 /// 215 /// Sets the GSI routing table entries, overwriting any previously set 216 /// entries, as per the `KVM_SET_GSI_ROUTING` ioctl. 217 /// 218 fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> { 219 let mut irq_routing = 220 vec_with_array_field::<kvm_irq_routing, kvm_irq_routing_entry>(entries.len()); 221 irq_routing[0].nr = entries.len() as u32; 222 irq_routing[0].flags = 0; 223 224 unsafe { 225 let entries_slice: &mut [kvm_irq_routing_entry] = 226 irq_routing[0].entries.as_mut_slice(entries.len()); 227 entries_slice.copy_from_slice(&entries); 228 } 229 230 self.fd 231 .set_gsi_routing(&irq_routing[0]) 232 .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into())) 233 } 234 /// 235 /// Creates a memory region structure that can be used with set_user_memory_region 236 /// 237 fn make_user_memory_region( 238 &self, 239 slot: u32, 240 guest_phys_addr: u64, 241 memory_size: u64, 242 userspace_addr: u64, 243 readonly: bool, 244 log_dirty_pages: bool, 245 ) -> MemoryRegion { 246 MemoryRegion { 247 slot, 248 guest_phys_addr, 249 memory_size, 250 userspace_addr, 251 flags: if readonly { KVM_MEM_READONLY } else { 0 } 252 | if log_dirty_pages { 253 KVM_MEM_LOG_DIRTY_PAGES 254 } else { 255 0 256 }, 257 } 258 } 259 /// 260 /// Creates/modifies a guest physical memory slot. 261 /// 262 fn set_user_memory_region(&self, user_memory_region: MemoryRegion) -> vm::Result<()> { 263 // Safe because guest regions are guaranteed not to overlap. 264 unsafe { 265 self.fd 266 .set_user_memory_region(user_memory_region) 267 .map_err(|e| vm::HypervisorVmError::SetUserMemory(e.into())) 268 } 269 } 270 /// 271 /// Creates an emulated device in the kernel. 272 /// 273 /// See the documentation for `KVM_CREATE_DEVICE`. 274 fn create_device(&self, device: &mut CreateDevice) -> vm::Result<Arc<dyn device::Device>> { 275 let fd = self 276 .fd 277 .create_device(device) 278 .map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?; 279 let device = KvmDevice { fd }; 280 Ok(Arc::new(device)) 281 } 282 /// 283 /// Returns the preferred CPU target type which can be emulated by KVM on underlying host. 284 /// 285 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 286 fn get_preferred_target(&self, kvi: &mut VcpuInit) -> vm::Result<()> { 287 self.fd 288 .get_preferred_target(kvi) 289 .map_err(|e| vm::HypervisorVmError::GetPreferredTarget(e.into())) 290 } 291 #[cfg(target_arch = "x86_64")] 292 fn enable_split_irq(&self) -> vm::Result<()> { 293 // Set TSS 294 self.fd 295 .set_tss_address(KVM_TSS_ADDRESS.raw_value() as usize) 296 .map_err(|e| vm::HypervisorVmError::EnableSplitIrq(e.into()))?; 297 // Create split irqchip 298 // Only the local APIC is emulated in kernel, both PICs and IOAPIC 299 // are not. 300 let mut cap = kvm_enable_cap { 301 cap: KVM_CAP_SPLIT_IRQCHIP, 302 ..Default::default() 303 }; 304 cap.args[0] = NUM_IOAPIC_PINS as u64; 305 self.fd 306 .enable_cap(&cap) 307 .map_err(|e| vm::HypervisorVmError::EnableSplitIrq(e.into()))?; 308 Ok(()) 309 } 310 /// Retrieve guest clock. 311 #[cfg(target_arch = "x86_64")] 312 fn get_clock(&self) -> vm::Result<ClockData> { 313 self.fd 314 .get_clock() 315 .map_err(|e| vm::HypervisorVmError::GetClock(e.into())) 316 } 317 /// Set guest clock. 318 #[cfg(target_arch = "x86_64")] 319 fn set_clock(&self, data: &ClockData) -> vm::Result<()> { 320 self.fd 321 .set_clock(data) 322 .map_err(|e| vm::HypervisorVmError::SetClock(e.into())) 323 } 324 /// Checks if a particular `Cap` is available. 325 fn check_extension(&self, c: Cap) -> bool { 326 self.fd.check_extension(c) 327 } 328 /// Create a device that is used for passthrough 329 fn create_passthrough_device(&self) -> vm::Result<Arc<dyn device::Device>> { 330 let mut vfio_dev = kvm_create_device { 331 type_: kvm_device_type_KVM_DEV_TYPE_VFIO, 332 fd: 0, 333 flags: 0, 334 }; 335 336 self.create_device(&mut vfio_dev) 337 .map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into())) 338 } 339 /// 340 /// Get the Vm state. Return VM specific data 341 /// 342 fn state(&self) -> vm::Result<VmState> { 343 Ok(self.state) 344 } 345 /// 346 /// Set the VM state 347 /// 348 fn set_state(&self, _state: VmState) -> vm::Result<()> { 349 Ok(()) 350 } 351 352 /// 353 /// Get dirty pages bitmap (one bit per page) 354 /// 355 fn get_dirty_log(&self, slot: u32, memory_size: u64) -> vm::Result<Vec<u64>> { 356 self.fd 357 .get_dirty_log(slot, memory_size as usize) 358 .map_err(|e| vm::HypervisorVmError::GetDirtyLog(e.into())) 359 } 360 361 /// 362 /// Initialize TDX for this VM 363 /// 364 #[cfg(feature = "tdx")] 365 fn tdx_init(&self, cpuid: &CpuId, max_vcpus: u32) -> vm::Result<()> { 366 #[repr(C)] 367 struct TdxInitVm { 368 max_vcpus: u32, 369 reserved: u32, 370 attributes: u64, 371 cpuid: u64, 372 } 373 let data = TdxInitVm { 374 max_vcpus, 375 reserved: 0, 376 attributes: 0, 377 cpuid: cpuid.as_fam_struct_ptr() as u64, 378 }; 379 380 tdx_command( 381 &self.fd.as_raw_fd(), 382 TdxCommand::InitVm, 383 0, 384 &data as *const _ as u64, 385 ) 386 .map_err(vm::HypervisorVmError::InitializeTdx) 387 } 388 389 /// 390 /// Finalize the TDX setup for this VM 391 /// 392 #[cfg(feature = "tdx")] 393 fn tdx_finalize(&self) -> vm::Result<()> { 394 tdx_command(&self.fd.as_raw_fd(), TdxCommand::Finalize, 0, 0) 395 .map_err(vm::HypervisorVmError::FinalizeTdx) 396 } 397 398 /// 399 /// Initialize memory regions for the TDX VM 400 /// 401 #[cfg(feature = "tdx")] 402 fn tdx_init_memory_region( 403 &self, 404 host_address: u64, 405 guest_address: u64, 406 size: u64, 407 measure: bool, 408 ) -> vm::Result<()> { 409 #[repr(C)] 410 struct TdxInitMemRegion { 411 host_address: u64, 412 guest_address: u64, 413 pages: u64, 414 } 415 let data = TdxInitMemRegion { 416 host_address, 417 guest_address, 418 pages: size / 4096, 419 }; 420 421 tdx_command( 422 &self.fd.as_raw_fd(), 423 TdxCommand::InitMemRegion, 424 if measure { 1 } else { 0 }, 425 &data as *const _ as u64, 426 ) 427 .map_err(vm::HypervisorVmError::InitMemRegionTdx) 428 } 429 } 430 431 #[cfg(feature = "tdx")] 432 fn tdx_command( 433 fd: &RawFd, 434 command: TdxCommand, 435 metadata: u32, 436 data: u64, 437 ) -> std::result::Result<(), std::io::Error> { 438 #[repr(C)] 439 struct TdxIoctlCmd { 440 command: TdxCommand, 441 metadata: u32, 442 data: u64, 443 } 444 let cmd = TdxIoctlCmd { 445 command, 446 metadata, 447 data, 448 }; 449 let ret = unsafe { 450 ioctl_with_val( 451 fd, 452 KVM_MEMORY_ENCRYPT_OP(), 453 &cmd as *const TdxIoctlCmd as std::os::raw::c_ulong, 454 ) 455 }; 456 457 if ret < 0 { 458 return Err(std::io::Error::last_os_error()); 459 } 460 Ok(()) 461 } 462 463 /// Wrapper over KVM system ioctls. 464 pub struct KvmHypervisor { 465 kvm: Kvm, 466 } 467 /// Enum for KVM related error 468 #[derive(Debug)] 469 pub enum KvmError { 470 CapabilityMissing(Cap), 471 } 472 pub type KvmResult<T> = result::Result<T, KvmError>; 473 impl KvmHypervisor { 474 /// Create a hypervisor based on Kvm 475 pub fn new() -> hypervisor::Result<KvmHypervisor> { 476 let kvm_obj = Kvm::new().map_err(|e| hypervisor::HypervisorError::VmCreate(e.into()))?; 477 let api_version = kvm_obj.get_api_version(); 478 479 if api_version != kvm_bindings::KVM_API_VERSION as i32 { 480 return Err(hypervisor::HypervisorError::IncompatibleApiVersion); 481 } 482 483 Ok(KvmHypervisor { kvm: kvm_obj }) 484 } 485 } 486 /// Implementation of Hypervisor trait for KVM 487 /// Example: 488 /// #[cfg(feature = "kvm")] 489 /// extern crate hypervisor 490 /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap(); 491 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm); 492 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 493 /// 494 impl hypervisor::Hypervisor for KvmHypervisor { 495 /// Create a KVM vm object of a specific VM type and return the object as Vm trait object 496 /// Example 497 /// # extern crate hypervisor; 498 /// # use hypervisor::KvmHypervisor; 499 /// use hypervisor::KvmVm; 500 /// let hypervisor = KvmHypervisor::new().unwrap(); 501 /// let vm = hypervisor.create_vm_with_type(KvmVmType::LegacyVm).unwrap() 502 /// 503 fn create_vm_with_type(&self, vm_type: u64) -> hypervisor::Result<Arc<dyn vm::Vm>> { 504 let fd: VmFd; 505 loop { 506 match self.kvm.create_vm_with_type(vm_type) { 507 Ok(res) => fd = res, 508 Err(e) => { 509 if e.errno() == libc::EINTR { 510 // If the error returned is EINTR, which means the 511 // ioctl has been interrupted, we have to retry as 512 // this can't be considered as a regular error. 513 continue; 514 } else { 515 return Err(hypervisor::HypervisorError::VmCreate(e.into())); 516 } 517 } 518 } 519 break; 520 } 521 522 let vm_fd = Arc::new(fd); 523 524 #[cfg(target_arch = "x86_64")] 525 { 526 let msr_list = self.get_msr_list()?; 527 let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize; 528 let mut msrs = MsrEntries::new(num_msrs).unwrap(); 529 let indices = msr_list.as_slice(); 530 let msr_entries = msrs.as_mut_slice(); 531 for (pos, index) in indices.iter().enumerate() { 532 msr_entries[pos].index = *index; 533 } 534 535 Ok(Arc::new(KvmVm { 536 fd: vm_fd, 537 msrs, 538 state: VmState {}, 539 })) 540 } 541 542 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 543 { 544 Ok(Arc::new(KvmVm { 545 fd: vm_fd, 546 state: VmState {}, 547 })) 548 } 549 } 550 551 /// Create a KVM vm object and return the object as Vm trait object 552 /// Example 553 /// # extern crate hypervisor; 554 /// # use hypervisor::KvmHypervisor; 555 /// use hypervisor::KvmVm; 556 /// let hypervisor = KvmHypervisor::new().unwrap(); 557 /// let vm = hypervisor.create_vm().unwrap() 558 /// 559 fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> { 560 self.create_vm_with_type(0) // Create with default platform type 561 } 562 563 fn check_required_extensions(&self) -> hypervisor::Result<()> { 564 check_required_kvm_extensions(&self.kvm).expect("Missing KVM capabilities"); 565 Ok(()) 566 } 567 568 /// 569 /// Returns the size of the memory mapping required to use the vcpu's `kvm_run` structure. 570 /// 571 fn get_vcpu_mmap_size(&self) -> hypervisor::Result<usize> { 572 self.kvm 573 .get_vcpu_mmap_size() 574 .map_err(|e| hypervisor::HypervisorError::GetVcpuMmap(e.into())) 575 } 576 /// 577 /// Gets the recommended maximum number of VCPUs per VM. 578 /// 579 fn get_max_vcpus(&self) -> hypervisor::Result<usize> { 580 Ok(self.kvm.get_max_vcpus()) 581 } 582 /// 583 /// Gets the recommended number of VCPUs per VM. 584 /// 585 fn get_nr_vcpus(&self) -> hypervisor::Result<usize> { 586 Ok(self.kvm.get_nr_vcpus()) 587 } 588 #[cfg(target_arch = "x86_64")] 589 /// 590 /// Checks if a particular `Cap` is available. 591 /// 592 fn check_capability(&self, c: Cap) -> bool { 593 self.kvm.check_extension(c) 594 } 595 #[cfg(target_arch = "x86_64")] 596 /// 597 /// X86 specific call to get the system supported CPUID values. 598 /// 599 fn get_cpuid(&self) -> hypervisor::Result<CpuId> { 600 self.kvm 601 .get_supported_cpuid(kvm_bindings::KVM_MAX_CPUID_ENTRIES) 602 .map_err(|e| hypervisor::HypervisorError::GetCpuId(e.into())) 603 } 604 #[cfg(target_arch = "x86_64")] 605 /// 606 /// Retrieve the list of MSRs supported by KVM. 607 /// 608 fn get_msr_list(&self) -> hypervisor::Result<MsrList> { 609 self.kvm 610 .get_msr_index_list() 611 .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into())) 612 } 613 } 614 /// Vcpu struct for KVM 615 pub struct KvmVcpu { 616 fd: VcpuFd, 617 #[cfg(target_arch = "x86_64")] 618 msrs: MsrEntries, 619 vmmops: Option<Arc<Box<dyn vm::VmmOps>>>, 620 #[cfg(target_arch = "x86_64")] 621 hyperv_synic: AtomicBool, 622 } 623 /// Implementation of Vcpu trait for KVM 624 /// Example: 625 /// #[cfg(feature = "kvm")] 626 /// extern crate hypervisor 627 /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap(); 628 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm); 629 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 630 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 631 /// vcpu.get/set().unwrap() 632 /// 633 impl cpu::Vcpu for KvmVcpu { 634 #[cfg(target_arch = "x86_64")] 635 /// 636 /// Returns the vCPU general purpose registers. 637 /// 638 fn get_regs(&self) -> cpu::Result<StandardRegisters> { 639 self.fd 640 .get_regs() 641 .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into())) 642 } 643 #[cfg(target_arch = "x86_64")] 644 /// 645 /// Sets the vCPU general purpose registers using the `KVM_SET_REGS` ioctl. 646 /// 647 fn set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()> { 648 self.fd 649 .set_regs(regs) 650 .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into())) 651 } 652 #[cfg(target_arch = "x86_64")] 653 /// 654 /// Returns the vCPU special registers. 655 /// 656 fn get_sregs(&self) -> cpu::Result<SpecialRegisters> { 657 self.fd 658 .get_sregs() 659 .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into())) 660 } 661 #[cfg(target_arch = "x86_64")] 662 /// 663 /// Sets the vCPU special registers using the `KVM_SET_SREGS` ioctl. 664 /// 665 fn set_sregs(&self, sregs: &SpecialRegisters) -> cpu::Result<()> { 666 self.fd 667 .set_sregs(sregs) 668 .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into())) 669 } 670 #[cfg(target_arch = "x86_64")] 671 /// 672 /// Returns the floating point state (FPU) from the vCPU. 673 /// 674 fn get_fpu(&self) -> cpu::Result<FpuState> { 675 self.fd 676 .get_fpu() 677 .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into())) 678 } 679 #[cfg(target_arch = "x86_64")] 680 /// 681 /// Set the floating point state (FPU) of a vCPU using the `KVM_SET_FPU` ioct. 682 /// 683 fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> { 684 self.fd 685 .set_fpu(fpu) 686 .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into())) 687 } 688 #[cfg(target_arch = "x86_64")] 689 /// 690 /// X86 specific call to setup the CPUID registers. 691 /// 692 fn set_cpuid2(&self, cpuid: &CpuId) -> cpu::Result<()> { 693 self.fd 694 .set_cpuid2(cpuid) 695 .map_err(|e| cpu::HypervisorCpuError::SetCpuid(e.into())) 696 } 697 #[cfg(target_arch = "x86_64")] 698 /// 699 /// X86 specific call to enable HyperV SynIC 700 /// 701 fn enable_hyperv_synic(&self) -> cpu::Result<()> { 702 // Update the information about Hyper-V SynIC being enabled and 703 // emulated as it will influence later which MSRs should be saved. 704 self.hyperv_synic.store(true, Ordering::Release); 705 706 let cap = kvm_enable_cap { 707 cap: KVM_CAP_HYPERV_SYNIC, 708 ..Default::default() 709 }; 710 self.fd 711 .enable_cap(&cap) 712 .map_err(|e| cpu::HypervisorCpuError::EnableHyperVSyncIc(e.into())) 713 } 714 /// 715 /// X86 specific call to retrieve the CPUID registers. 716 /// 717 #[cfg(target_arch = "x86_64")] 718 fn get_cpuid2(&self, num_entries: usize) -> cpu::Result<CpuId> { 719 self.fd 720 .get_cpuid2(num_entries) 721 .map_err(|e| cpu::HypervisorCpuError::GetCpuid(e.into())) 722 } 723 #[cfg(target_arch = "x86_64")] 724 /// 725 /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 726 /// 727 fn get_lapic(&self) -> cpu::Result<LapicState> { 728 self.fd 729 .get_lapic() 730 .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into())) 731 } 732 #[cfg(target_arch = "x86_64")] 733 /// 734 /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 735 /// 736 fn set_lapic(&self, klapic: &LapicState) -> cpu::Result<()> { 737 self.fd 738 .set_lapic(klapic) 739 .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into())) 740 } 741 #[cfg(target_arch = "x86_64")] 742 /// 743 /// Returns the model-specific registers (MSR) for this vCPU. 744 /// 745 fn get_msrs(&self, msrs: &mut MsrEntries) -> cpu::Result<usize> { 746 self.fd 747 .get_msrs(msrs) 748 .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into())) 749 } 750 #[cfg(target_arch = "x86_64")] 751 /// 752 /// Setup the model-specific registers (MSR) for this vCPU. 753 /// Returns the number of MSR entries actually written. 754 /// 755 fn set_msrs(&self, msrs: &MsrEntries) -> cpu::Result<usize> { 756 self.fd 757 .set_msrs(msrs) 758 .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into())) 759 } 760 /// 761 /// Returns the vcpu's current "multiprocessing state". 762 /// 763 fn get_mp_state(&self) -> cpu::Result<MpState> { 764 self.fd 765 .get_mp_state() 766 .map_err(|e| cpu::HypervisorCpuError::GetMpState(e.into())) 767 } 768 /// 769 /// Sets the vcpu's current "multiprocessing state". 770 /// 771 fn set_mp_state(&self, mp_state: MpState) -> cpu::Result<()> { 772 self.fd 773 .set_mp_state(mp_state) 774 .map_err(|e| cpu::HypervisorCpuError::SetMpState(e.into())) 775 } 776 #[cfg(target_arch = "x86_64")] 777 /// 778 /// X86 specific call that returns the vcpu's current "xsave struct". 779 /// 780 fn get_xsave(&self) -> cpu::Result<Xsave> { 781 self.fd 782 .get_xsave() 783 .map_err(|e| cpu::HypervisorCpuError::GetXsaveState(e.into())) 784 } 785 #[cfg(target_arch = "x86_64")] 786 /// 787 /// X86 specific call that sets the vcpu's current "xsave struct". 788 /// 789 fn set_xsave(&self, xsave: &Xsave) -> cpu::Result<()> { 790 self.fd 791 .set_xsave(xsave) 792 .map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into())) 793 } 794 #[cfg(target_arch = "x86_64")] 795 /// 796 /// X86 specific call that returns the vcpu's current "xcrs". 797 /// 798 fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> { 799 self.fd 800 .get_xcrs() 801 .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into())) 802 } 803 #[cfg(target_arch = "x86_64")] 804 /// 805 /// X86 specific call that sets the vcpu's current "xcrs". 806 /// 807 fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> { 808 self.fd 809 .set_xcrs(&xcrs) 810 .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into())) 811 } 812 /// 813 /// Triggers the running of the current virtual CPU returning an exit reason. 814 /// 815 fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> { 816 match self.fd.run() { 817 Ok(run) => match run { 818 #[cfg(target_arch = "x86_64")] 819 VcpuExit::IoIn(addr, data) => { 820 if let Some(vmmops) = &self.vmmops { 821 return vmmops 822 .pio_read(addr.into(), data) 823 .map(|_| cpu::VmExit::Ignore) 824 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 825 } 826 827 Ok(cpu::VmExit::IoIn(addr, data)) 828 } 829 #[cfg(target_arch = "x86_64")] 830 VcpuExit::IoOut(addr, data) => { 831 if let Some(vmmops) = &self.vmmops { 832 return vmmops 833 .pio_write(addr.into(), data) 834 .map(|_| cpu::VmExit::Ignore) 835 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 836 } 837 838 Ok(cpu::VmExit::IoOut(addr, data)) 839 } 840 #[cfg(target_arch = "x86_64")] 841 VcpuExit::IoapicEoi(vector) => Ok(cpu::VmExit::IoapicEoi(vector)), 842 #[cfg(target_arch = "x86_64")] 843 VcpuExit::Shutdown | VcpuExit::Hlt => Ok(cpu::VmExit::Reset), 844 845 #[cfg(target_arch = "aarch64")] 846 VcpuExit::SystemEvent(event_type, flags) => { 847 use kvm_bindings::{KVM_SYSTEM_EVENT_RESET, KVM_SYSTEM_EVENT_SHUTDOWN}; 848 // On Aarch64, when the VM is shutdown, run() returns 849 // VcpuExit::SystemEvent with reason KVM_SYSTEM_EVENT_SHUTDOWN 850 if event_type == KVM_SYSTEM_EVENT_RESET { 851 Ok(cpu::VmExit::Reset) 852 } else if event_type == KVM_SYSTEM_EVENT_SHUTDOWN { 853 Ok(cpu::VmExit::Shutdown) 854 } else { 855 Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 856 "Unexpected system event with type 0x{:x}, flags 0x{:x}", 857 event_type, 858 flags 859 ))) 860 } 861 } 862 863 VcpuExit::MmioRead(addr, data) => { 864 if let Some(vmmops) = &self.vmmops { 865 return vmmops 866 .mmio_read(addr, data) 867 .map(|_| cpu::VmExit::Ignore) 868 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 869 } 870 871 Ok(cpu::VmExit::MmioRead(addr, data)) 872 } 873 VcpuExit::MmioWrite(addr, data) => { 874 if let Some(vmmops) = &self.vmmops { 875 return vmmops 876 .mmio_write(addr, data) 877 .map(|_| cpu::VmExit::Ignore) 878 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 879 } 880 881 Ok(cpu::VmExit::MmioWrite(addr, data)) 882 } 883 VcpuExit::Hyperv => Ok(cpu::VmExit::Hyperv), 884 885 r => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 886 "Unexpected exit reason on vcpu run: {:?}", 887 r 888 ))), 889 }, 890 891 Err(ref e) => match e.errno() { 892 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore), 893 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 894 "VCPU error {:?}", 895 e 896 ))), 897 }, 898 } 899 } 900 #[cfg(target_arch = "x86_64")] 901 /// 902 /// Returns currently pending exceptions, interrupts, and NMIs as well as related 903 /// states of the vcpu. 904 /// 905 fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> { 906 self.fd 907 .get_vcpu_events() 908 .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into())) 909 } 910 #[cfg(target_arch = "x86_64")] 911 /// 912 /// Sets pending exceptions, interrupts, and NMIs as well as related states 913 /// of the vcpu. 914 /// 915 fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> { 916 self.fd 917 .set_vcpu_events(events) 918 .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into())) 919 } 920 #[cfg(target_arch = "x86_64")] 921 /// 922 /// Let the guest know that it has been paused, which prevents from 923 /// potential soft lockups when being resumed. 924 /// 925 fn notify_guest_clock_paused(&self) -> cpu::Result<()> { 926 self.fd 927 .kvmclock_ctrl() 928 .map_err(|e| cpu::HypervisorCpuError::NotifyGuestClockPaused(e.into())) 929 } 930 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 931 fn vcpu_init(&self, kvi: &VcpuInit) -> cpu::Result<()> { 932 self.fd 933 .vcpu_init(kvi) 934 .map_err(|e| cpu::HypervisorCpuError::VcpuInit(e.into())) 935 } 936 /// 937 /// Sets the value of one register for this vCPU. 938 /// 939 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 940 fn set_reg(&self, reg_id: u64, data: u64) -> cpu::Result<()> { 941 self.fd 942 .set_one_reg(reg_id, data) 943 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into())) 944 } 945 /// 946 /// Gets the value of one register for this vCPU. 947 /// 948 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 949 fn get_reg(&self, reg_id: u64) -> cpu::Result<u64> { 950 self.fd 951 .get_one_reg(reg_id) 952 .map_err(|e| cpu::HypervisorCpuError::GetRegister(e.into())) 953 } 954 /// 955 /// Gets a list of the guest registers that are supported for the 956 /// KVM_GET_ONE_REG/KVM_SET_ONE_REG calls. 957 /// 958 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 959 fn get_reg_list(&self, reg_list: &mut RegList) -> cpu::Result<()> { 960 self.fd 961 .get_reg_list(reg_list) 962 .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into())) 963 } 964 /// 965 /// Save the state of the core registers. 966 /// 967 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 968 fn core_registers(&self, state: &mut StandardRegisters) -> cpu::Result<()> { 969 let mut off = offset__of!(user_pt_regs, regs); 970 // There are 31 user_pt_regs: 971 // https://elixir.free-electrons.com/linux/v4.14.174/source/arch/arm64/include/uapi/asm/ptrace.h#L72 972 // These actually are the general-purpose registers of the Armv8-a 973 // architecture (i.e x0-x30 if used as a 64bit register or w0-30 when used as a 32bit register). 974 for i in 0..31 { 975 state.regs.regs[i] = self 976 .fd 977 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off)) 978 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 979 off += std::mem::size_of::<u64>(); 980 } 981 982 // We are now entering the "Other register" section of the ARMv8-a architecture. 983 // First one, stack pointer. 984 let off = offset__of!(user_pt_regs, sp); 985 state.regs.sp = self 986 .fd 987 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off)) 988 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 989 990 // Second one, the program counter. 991 let off = offset__of!(user_pt_regs, pc); 992 state.regs.pc = self 993 .fd 994 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off)) 995 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 996 997 // Next is the processor state. 998 let off = offset__of!(user_pt_regs, pstate); 999 state.regs.pstate = self 1000 .fd 1001 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off)) 1002 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 1003 1004 // The stack pointer associated with EL1 1005 let off = offset__of!(kvm_regs, sp_el1); 1006 state.sp_el1 = self 1007 .fd 1008 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off)) 1009 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 1010 1011 // Exception Link Register for EL1, when taking an exception to EL1, this register 1012 // holds the address to which to return afterwards. 1013 let off = offset__of!(kvm_regs, elr_el1); 1014 state.elr_el1 = self 1015 .fd 1016 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off)) 1017 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 1018 1019 // Saved Program Status Registers, there are 5 of them used in the kernel. 1020 let mut off = offset__of!(kvm_regs, spsr); 1021 for i in 0..KVM_NR_SPSR as usize { 1022 state.spsr[i] = self 1023 .fd 1024 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off)) 1025 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 1026 off += std::mem::size_of::<u64>(); 1027 } 1028 1029 // Now moving on to floting point registers which are stored in the user_fpsimd_state in the kernel: 1030 // https://elixir.free-electrons.com/linux/v4.9.62/source/arch/arm64/include/uapi/asm/kvm.h#L53 1031 let mut off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, vregs); 1032 for i in 0..32 { 1033 state.fp_regs.vregs[i][0] = self 1034 .fd 1035 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U128, off)) 1036 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 1037 off += mem::size_of::<u128>(); 1038 } 1039 1040 // Floating-point Status Register 1041 let off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, fpsr); 1042 state.fp_regs.fpsr = self 1043 .fd 1044 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off)) 1045 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))? 1046 as u32; 1047 1048 // Floating-point Control Register 1049 let off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, fpcr); 1050 state.fp_regs.fpcr = self 1051 .fd 1052 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off)) 1053 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))? 1054 as u32; 1055 Ok(()) 1056 } 1057 /// 1058 /// Restore the state of the core registers. 1059 /// 1060 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 1061 fn set_core_registers(&self, state: &StandardRegisters) -> cpu::Result<()> { 1062 // The function follows the exact identical order from `state`. Look there 1063 // for some additional info on registers. 1064 let mut off = offset__of!(user_pt_regs, regs); 1065 for i in 0..31 { 1066 self.fd 1067 .set_one_reg( 1068 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1069 state.regs.regs[i], 1070 ) 1071 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1072 off += std::mem::size_of::<u64>(); 1073 } 1074 1075 let off = offset__of!(user_pt_regs, sp); 1076 self.fd 1077 .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.regs.sp) 1078 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1079 1080 let off = offset__of!(user_pt_regs, pc); 1081 self.fd 1082 .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.regs.pc) 1083 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1084 1085 let off = offset__of!(user_pt_regs, pstate); 1086 self.fd 1087 .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.regs.pstate) 1088 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1089 1090 let off = offset__of!(kvm_regs, sp_el1); 1091 self.fd 1092 .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.sp_el1) 1093 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1094 1095 let off = offset__of!(kvm_regs, elr_el1); 1096 self.fd 1097 .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.elr_el1) 1098 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1099 1100 let mut off = offset__of!(kvm_regs, spsr); 1101 for i in 0..KVM_NR_SPSR as usize { 1102 self.fd 1103 .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.spsr[i]) 1104 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1105 off += std::mem::size_of::<u64>(); 1106 } 1107 1108 let mut off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, vregs); 1109 for i in 0..32 { 1110 self.fd 1111 .set_one_reg( 1112 arm64_core_reg_id!(KVM_REG_SIZE_U128, off), 1113 state.fp_regs.vregs[i][0], 1114 ) 1115 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1116 off += mem::size_of::<u128>(); 1117 } 1118 1119 let off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, fpsr); 1120 self.fd 1121 .set_one_reg( 1122 arm64_core_reg_id!(KVM_REG_SIZE_U32, off), 1123 state.fp_regs.fpsr as u64, 1124 ) 1125 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1126 1127 let off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, fpcr); 1128 self.fd 1129 .set_one_reg( 1130 arm64_core_reg_id!(KVM_REG_SIZE_U32, off), 1131 state.fp_regs.fpcr as u64, 1132 ) 1133 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1134 Ok(()) 1135 } 1136 /// 1137 /// Save the state of the system registers. 1138 /// 1139 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 1140 fn system_registers(&self, state: &mut Vec<Register>) -> cpu::Result<()> { 1141 // Call KVM_GET_REG_LIST to get all registers available to the guest. For ArmV8 there are 1142 // around 500 registers. 1143 let mut reg_list = RegList::new(500).unwrap(); 1144 self.fd 1145 .get_reg_list(&mut reg_list) 1146 .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into()))?; 1147 1148 // At this point reg_list should contain: core registers and system registers. 1149 // The register list contains the number of registers and their ids. We will be needing to 1150 // call KVM_GET_ONE_REG on each id in order to save all of them. We carve out from the list 1151 // the core registers which are represented in the kernel by kvm_regs structure and for which 1152 // we can calculate the id based on the offset in the structure. 1153 1154 reg_list.retain(|regid| *regid != 0); 1155 reg_list.as_slice().to_vec().sort_unstable(); 1156 1157 reg_list.retain(|regid| is_system_register(*regid)); 1158 1159 // Now, for the rest of the registers left in the previously fetched register list, we are 1160 // simply calling KVM_GET_ONE_REG. 1161 let indices = reg_list.as_slice(); 1162 for (_pos, index) in indices.iter().enumerate() { 1163 if _pos > 230 { 1164 break; 1165 } 1166 state.push(kvm_bindings::kvm_one_reg { 1167 id: *index, 1168 addr: self 1169 .fd 1170 .get_one_reg(*index) 1171 .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?, 1172 }); 1173 } 1174 1175 Ok(()) 1176 } 1177 /// 1178 /// Restore the state of the system registers. 1179 /// 1180 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 1181 fn set_system_registers(&self, state: &[Register]) -> cpu::Result<()> { 1182 for reg in state { 1183 self.fd 1184 .set_one_reg(reg.id, reg.addr) 1185 .map_err(|e| cpu::HypervisorCpuError::SetSysRegister(e.into()))?; 1186 } 1187 Ok(()) 1188 } 1189 /// 1190 /// Read the MPIDR - Multiprocessor Affinity Register. 1191 /// 1192 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] 1193 fn read_mpidr(&self) -> cpu::Result<u64> { 1194 self.fd 1195 .get_one_reg(MPIDR_EL1) 1196 .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into())) 1197 } 1198 #[cfg(target_arch = "x86_64")] 1199 /// 1200 /// Get the current CPU state 1201 /// 1202 /// Ordering requirements: 1203 /// 1204 /// KVM_GET_MP_STATE calls kvm_apic_accept_events(), which might modify 1205 /// vCPU/LAPIC state. As such, it must be done before most everything 1206 /// else, otherwise we cannot restore everything and expect it to work. 1207 /// 1208 /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are 1209 /// still running. 1210 /// 1211 /// KVM_GET_LAPIC may change state of LAPIC before returning it. 1212 /// 1213 /// GET_VCPU_EVENTS should probably be last to save. The code looks as 1214 /// it might as well be affected by internal state modifications of the 1215 /// GET ioctls. 1216 /// 1217 /// SREGS saves/restores a pending interrupt, similar to what 1218 /// VCPU_EVENTS also does. 1219 /// 1220 /// GET_MSRS requires a pre-populated data structure to do something 1221 /// meaningful. For SET_MSRS it will then contain good data. 1222 /// 1223 /// # Example 1224 /// 1225 /// ```rust 1226 /// # extern crate hypervisor; 1227 /// # use hypervisor::KvmHypervisor; 1228 /// # use std::sync::Arc; 1229 /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap(); 1230 /// let hv: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm); 1231 /// let vm = hv.create_vm().expect("new VM fd creation failed"); 1232 /// vm.enable_split_irq().unwrap(); 1233 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 1234 /// let state = vcpu.state().unwrap(); 1235 /// ``` 1236 fn state(&self) -> cpu::Result<CpuState> { 1237 let cpuid = self.get_cpuid2(kvm_bindings::KVM_MAX_CPUID_ENTRIES)?; 1238 let mp_state = self.get_mp_state()?; 1239 let regs = self.get_regs()?; 1240 let sregs = self.get_sregs()?; 1241 let xsave = self.get_xsave()?; 1242 let xcrs = self.get_xcrs()?; 1243 let lapic_state = self.get_lapic()?; 1244 let fpu = self.get_fpu()?; 1245 1246 // Try to get all MSRs based on the list previously retrieved from KVM. 1247 // If the number of MSRs obtained from GET_MSRS is different from the 1248 // expected amount, we fallback onto a slower method by getting MSRs 1249 // by chunks. This is the only way to make sure we try to get as many 1250 // MSRs as possible, even if some MSRs are not supported. 1251 let mut msr_entries = self.msrs.clone(); 1252 1253 // Save extra MSRs if the Hyper-V synthetic interrupt controller is 1254 // emulated. 1255 if self.hyperv_synic.load(Ordering::Acquire) { 1256 let hyperv_synic_msrs = vec![ 1257 0x40000020, 0x40000021, 0x40000080, 0x40000081, 0x40000082, 0x40000083, 0x40000084, 1258 0x40000090, 0x40000091, 0x40000092, 0x40000093, 0x40000094, 0x40000095, 0x40000096, 1259 0x40000097, 0x40000098, 0x40000099, 0x4000009a, 0x4000009b, 0x4000009c, 0x4000009d, 1260 0x4000009f, 0x400000b0, 0x400000b1, 0x400000b2, 0x400000b3, 0x400000b4, 0x400000b5, 1261 0x400000b6, 0x400000b7, 1262 ]; 1263 for index in hyperv_synic_msrs { 1264 let msr = kvm_msr_entry { 1265 index, 1266 ..Default::default() 1267 }; 1268 msr_entries.push(msr).unwrap(); 1269 } 1270 } 1271 1272 let expected_num_msrs = msr_entries.as_fam_struct_ref().nmsrs as usize; 1273 let num_msrs = self.get_msrs(&mut msr_entries)?; 1274 let msrs = if num_msrs != expected_num_msrs { 1275 let mut faulty_msr_index = num_msrs; 1276 let mut msr_entries_tmp = 1277 MsrEntries::from_entries(&msr_entries.as_slice()[..faulty_msr_index]).unwrap(); 1278 1279 loop { 1280 warn!( 1281 "Detected faulty MSR 0x{:x} while getting MSRs", 1282 msr_entries.as_slice()[faulty_msr_index].index 1283 ); 1284 1285 let start_pos = faulty_msr_index + 1; 1286 let mut sub_msr_entries = 1287 MsrEntries::from_entries(&msr_entries.as_slice()[start_pos..]).unwrap(); 1288 let expected_num_msrs = sub_msr_entries.as_fam_struct_ref().nmsrs as usize; 1289 let num_msrs = self.get_msrs(&mut sub_msr_entries)?; 1290 1291 for i in 0..num_msrs { 1292 msr_entries_tmp 1293 .push(sub_msr_entries.as_slice()[i]) 1294 .map_err(|e| { 1295 cpu::HypervisorCpuError::GetMsrEntries(anyhow!( 1296 "Failed adding MSR entries: {:?}", 1297 e 1298 )) 1299 })?; 1300 } 1301 1302 if num_msrs == expected_num_msrs { 1303 break; 1304 } 1305 1306 faulty_msr_index = start_pos + num_msrs; 1307 } 1308 1309 msr_entries_tmp 1310 } else { 1311 msr_entries 1312 }; 1313 1314 let vcpu_events = self.get_vcpu_events()?; 1315 1316 Ok(CpuState { 1317 cpuid, 1318 msrs, 1319 vcpu_events, 1320 regs, 1321 sregs, 1322 fpu, 1323 lapic_state, 1324 xsave, 1325 xcrs, 1326 mp_state, 1327 }) 1328 } 1329 /// 1330 /// Get the current AArch64 CPU state 1331 /// 1332 #[cfg(target_arch = "aarch64")] 1333 fn state(&self) -> cpu::Result<CpuState> { 1334 let mut state = CpuState { 1335 mp_state: self.get_mp_state()?, 1336 mpidr: self.read_mpidr()?, 1337 ..Default::default() 1338 }; 1339 self.core_registers(&mut state.core_regs)?; 1340 self.system_registers(&mut state.sys_regs)?; 1341 1342 Ok(state) 1343 } 1344 #[cfg(target_arch = "x86_64")] 1345 /// 1346 /// Restore the previously saved CPU state 1347 /// 1348 /// Ordering requirements: 1349 /// 1350 /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are 1351 /// still running. 1352 /// 1353 /// Some SET ioctls (like set_mp_state) depend on kvm_vcpu_is_bsp(), so 1354 /// if we ever change the BSP, we have to do that before restoring anything. 1355 /// The same seems to be true for CPUID stuff. 1356 /// 1357 /// SREGS saves/restores a pending interrupt, similar to what 1358 /// VCPU_EVENTS also does. 1359 /// 1360 /// SET_REGS clears pending exceptions unconditionally, thus, it must be 1361 /// done before SET_VCPU_EVENTS, which restores it. 1362 /// 1363 /// SET_LAPIC must come after SET_SREGS, because the latter restores 1364 /// the apic base msr. 1365 /// 1366 /// SET_LAPIC must come before SET_MSRS, because the TSC deadline MSR 1367 /// only restores successfully, when the LAPIC is correctly configured. 1368 /// 1369 /// Arguments: CpuState 1370 /// # Example 1371 /// 1372 /// ```rust 1373 /// # extern crate hypervisor; 1374 /// # use hypervisor::KvmHypervisor; 1375 /// # use std::sync::Arc; 1376 /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap(); 1377 /// let hv: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm); 1378 /// let vm = hv.create_vm().expect("new VM fd creation failed"); 1379 /// vm.enable_split_irq().unwrap(); 1380 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 1381 /// let state = vcpu.state().unwrap(); 1382 /// vcpu.set_state(&state).unwrap(); 1383 /// ``` 1384 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 1385 self.set_cpuid2(&state.cpuid)?; 1386 self.set_mp_state(state.mp_state)?; 1387 self.set_regs(&state.regs)?; 1388 self.set_sregs(&state.sregs)?; 1389 self.set_xsave(&state.xsave)?; 1390 self.set_xcrs(&state.xcrs)?; 1391 self.set_lapic(&state.lapic_state)?; 1392 self.set_fpu(&state.fpu)?; 1393 1394 // Try to set all MSRs previously stored. 1395 // If the number of MSRs set from SET_MSRS is different from the 1396 // expected amount, we fallback onto a slower method by setting MSRs 1397 // by chunks. This is the only way to make sure we try to set as many 1398 // MSRs as possible, even if some MSRs are not supported. 1399 let expected_num_msrs = state.msrs.as_fam_struct_ref().nmsrs as usize; 1400 let num_msrs = self.set_msrs(&state.msrs)?; 1401 if num_msrs != expected_num_msrs { 1402 let mut faulty_msr_index = num_msrs; 1403 1404 loop { 1405 warn!( 1406 "Detected faulty MSR 0x{:x} while setting MSRs", 1407 state.msrs.as_slice()[faulty_msr_index].index 1408 ); 1409 1410 let start_pos = faulty_msr_index + 1; 1411 let sub_msr_entries = 1412 MsrEntries::from_entries(&state.msrs.as_slice()[start_pos..]).unwrap(); 1413 let expected_num_msrs = sub_msr_entries.as_fam_struct_ref().nmsrs as usize; 1414 let num_msrs = self.set_msrs(&sub_msr_entries)?; 1415 1416 if num_msrs == expected_num_msrs { 1417 break; 1418 } 1419 1420 faulty_msr_index = start_pos + num_msrs; 1421 } 1422 } 1423 1424 self.set_vcpu_events(&state.vcpu_events)?; 1425 1426 Ok(()) 1427 } 1428 /// 1429 /// Restore the previously saved AArch64 CPU state 1430 /// 1431 #[cfg(target_arch = "aarch64")] 1432 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 1433 self.set_core_registers(&state.core_regs)?; 1434 self.set_system_registers(&state.sys_regs)?; 1435 self.set_mp_state(state.mp_state)?; 1436 1437 Ok(()) 1438 } 1439 1440 /// 1441 /// Initialize TDX for this CPU 1442 /// 1443 #[cfg(feature = "tdx")] 1444 fn tdx_init(&self, hob_address: u64) -> cpu::Result<()> { 1445 tdx_command(&self.fd.as_raw_fd(), TdxCommand::InitVcpu, 0, hob_address) 1446 .map_err(cpu::HypervisorCpuError::InitializeTdx) 1447 } 1448 } 1449 1450 /// Device struct for KVM 1451 pub struct KvmDevice { 1452 fd: DeviceFd, 1453 } 1454 1455 impl device::Device for KvmDevice { 1456 /// 1457 /// Set device attribute 1458 /// 1459 fn set_device_attr(&self, attr: &DeviceAttr) -> device::Result<()> { 1460 self.fd 1461 .set_device_attr(attr) 1462 .map_err(|e| device::HypervisorDeviceError::SetDeviceAttribute(e.into())) 1463 } 1464 /// 1465 /// Get device attribute 1466 /// 1467 fn get_device_attr(&self, attr: &mut DeviceAttr) -> device::Result<()> { 1468 self.fd 1469 .get_device_attr(attr) 1470 .map_err(|e| device::HypervisorDeviceError::GetDeviceAttribute(e.into())) 1471 } 1472 } 1473 1474 impl AsRawFd for KvmDevice { 1475 fn as_raw_fd(&self) -> RawFd { 1476 self.fd.as_raw_fd() 1477 } 1478 } 1479