1 // Copyright © 2020, Oracle and/or its affiliates. 2 // 3 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 // 5 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. 6 // Use of this source code is governed by a BSD-style license that can be 7 // found in the LICENSE-BSD-3-Clause file. 8 // 9 // Copyright © 2019 Intel Corporation 10 // 11 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause 12 // 13 14 use std::collections::BTreeMap; 15 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 16 use std::io::Write; 17 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 18 use std::mem::size_of; 19 use std::os::unix::thread::JoinHandleExt; 20 use std::sync::atomic::{AtomicBool, Ordering}; 21 use std::sync::{Arc, Barrier, Mutex}; 22 use std::{cmp, io, result, thread}; 23 24 #[cfg(not(target_arch = "riscv64"))] 25 use acpi_tables::sdt::Sdt; 26 use acpi_tables::{aml, Aml}; 27 use anyhow::anyhow; 28 #[cfg(target_arch = "x86_64")] 29 use arch::x86_64::get_x2apic_id; 30 use arch::{EntryPoint, NumaNodes}; 31 #[cfg(target_arch = "aarch64")] 32 use devices::gic::Gic; 33 use devices::interrupt_controller::InterruptController; 34 #[cfg(all(target_arch = "aarch64", feature = "guest_debug"))] 35 use gdbstub_arch::aarch64::reg::AArch64CoreRegs as CoreRegs; 36 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 37 use gdbstub_arch::x86::reg::{X86SegmentRegs, X86_64CoreRegs as CoreRegs}; 38 #[cfg(all(target_arch = "aarch64", feature = "guest_debug"))] 39 use hypervisor::arch::aarch64::regs::{ID_AA64MMFR0_EL1, TCR_EL1, TTBR1_EL1}; 40 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 41 use hypervisor::arch::x86::msr_index; 42 #[cfg(target_arch = "x86_64")] 43 use hypervisor::arch::x86::CpuIdEntry; 44 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 45 use hypervisor::arch::x86::MsrEntry; 46 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 47 use hypervisor::arch::x86::SpecialRegisters; 48 #[cfg(feature = "tdx")] 49 use hypervisor::kvm::{TdxExitDetails, TdxExitStatus}; 50 #[cfg(target_arch = "x86_64")] 51 use hypervisor::CpuVendor; 52 #[cfg(feature = "kvm")] 53 use hypervisor::HypervisorType; 54 #[cfg(feature = "guest_debug")] 55 use hypervisor::StandardRegisters; 56 use hypervisor::{CpuState, HypervisorCpuError, VmExit, VmOps}; 57 use libc::{c_void, siginfo_t}; 58 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 59 use linux_loader::elf::Elf64_Nhdr; 60 use seccompiler::{apply_filter, SeccompAction}; 61 use thiserror::Error; 62 use tracer::trace_scoped; 63 use vm_device::BusDevice; 64 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 65 use vm_memory::ByteValued; 66 #[cfg(feature = "guest_debug")] 67 use vm_memory::{Bytes, GuestAddressSpace}; 68 use vm_memory::{GuestAddress, GuestMemoryAtomic}; 69 use vm_migration::{ 70 snapshot_from_id, Migratable, MigratableError, Pausable, Snapshot, SnapshotData, Snapshottable, 71 Transportable, 72 }; 73 use vmm_sys_util::eventfd::EventFd; 74 use vmm_sys_util::signal::{register_signal_handler, SIGRTMIN}; 75 use zerocopy::{FromBytes, Immutable, IntoBytes}; 76 77 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 78 use crate::coredump::{ 79 CpuElf64Writable, CpuSegment, CpuState as DumpCpusState, DumpState, Elf64Writable, 80 GuestDebuggableError, NoteDescType, X86_64ElfPrStatus, X86_64UserRegs, COREDUMP_NAME_SIZE, 81 NT_PRSTATUS, 82 }; 83 #[cfg(feature = "guest_debug")] 84 use crate::gdb::{get_raw_tid, Debuggable, DebuggableError}; 85 #[cfg(target_arch = "x86_64")] 86 use crate::memory_manager::MemoryManager; 87 use crate::seccomp_filters::{get_seccomp_filter, Thread}; 88 #[cfg(target_arch = "x86_64")] 89 use crate::vm::physical_bits; 90 use crate::vm_config::CpusConfig; 91 use crate::{GuestMemoryMmap, CPU_MANAGER_SNAPSHOT_ID}; 92 93 #[cfg(all(target_arch = "aarch64", feature = "guest_debug"))] 94 /// Extract the specified bits of a 64-bit integer. 95 /// For example, to extrace 2 bits from offset 1 (zero based) of `6u64`, 96 /// following expression should return 3 (`0b11`): 97 /// `extract_bits_64!(0b0000_0110u64, 1, 2)` 98 /// 99 macro_rules! extract_bits_64 { 100 ($value: tt, $offset: tt, $length: tt) => { 101 ($value >> $offset) & (!0u64 >> (64 - $length)) 102 }; 103 } 104 105 #[cfg(all(target_arch = "aarch64", feature = "guest_debug"))] 106 macro_rules! extract_bits_64_without_offset { 107 ($value: tt, $length: tt) => { 108 $value & (!0u64 >> (64 - $length)) 109 }; 110 } 111 112 pub const CPU_MANAGER_ACPI_SIZE: usize = 0xc; 113 114 #[derive(Debug, Error)] 115 pub enum Error { 116 #[error("Error creating vCPU")] 117 VcpuCreate(#[source] anyhow::Error), 118 119 #[error("Error running vCPU")] 120 VcpuRun(#[source] anyhow::Error), 121 122 #[error("Error spawning vCPU thread")] 123 VcpuSpawn(#[source] io::Error), 124 125 #[error("Error generating common CPUID")] 126 CommonCpuId(#[source] arch::Error), 127 128 #[error("Error configuring vCPU")] 129 VcpuConfiguration(#[source] arch::Error), 130 131 #[error("Still pending removed vCPU")] 132 VcpuPendingRemovedVcpu, 133 134 #[cfg(target_arch = "aarch64")] 135 #[error("Error fetching preferred target")] 136 VcpuArmPreferredTarget(#[source] hypervisor::HypervisorVmError), 137 138 #[cfg(target_arch = "aarch64")] 139 #[error("Error setting vCPU processor features")] 140 VcpuSetProcessorFeatures(#[source] hypervisor::HypervisorCpuError), 141 142 #[cfg(target_arch = "aarch64")] 143 #[error("Error initialising vCPU")] 144 VcpuArmInit(#[source] hypervisor::HypervisorCpuError), 145 146 #[cfg(target_arch = "aarch64")] 147 #[error("Error finalising vCPU")] 148 VcpuArmFinalize(#[source] hypervisor::HypervisorCpuError), 149 150 #[cfg(target_arch = "aarch64")] 151 #[error("Error initialising GICR base address")] 152 VcpuSetGicrBaseAddr(#[source] hypervisor::HypervisorCpuError), 153 154 #[error("Failed to join on vCPU threads: {0:?}")] 155 ThreadCleanup(std::boxed::Box<dyn std::any::Any + std::marker::Send>), 156 157 #[error("Error adding CpuManager to MMIO bus")] 158 BusError(#[source] vm_device::BusError), 159 160 #[error("Requested zero vCPUs")] 161 DesiredVCpuCountIsZero, 162 163 #[error("Requested vCPUs exceed maximum")] 164 DesiredVCpuCountExceedsMax, 165 166 #[error("Cannot create seccomp filter")] 167 CreateSeccompFilter(#[source] seccompiler::Error), 168 169 #[error("Cannot apply seccomp filter")] 170 ApplySeccompFilter(#[source] seccompiler::Error), 171 172 #[error("Error starting vCPU after restore")] 173 StartRestoreVcpu(#[source] anyhow::Error), 174 175 #[error("Unexpected VmExit")] 176 UnexpectedVmExit, 177 178 #[error("Failed to allocate MMIO address for CpuManager")] 179 AllocateMmmioAddress, 180 181 #[cfg(feature = "tdx")] 182 #[error("Error initializing TDX")] 183 InitializeTdx(#[source] hypervisor::HypervisorCpuError), 184 185 #[cfg(target_arch = "aarch64")] 186 #[error("Error initializing PMU")] 187 InitPmu(#[source] hypervisor::HypervisorCpuError), 188 189 #[cfg(feature = "guest_debug")] 190 #[error("Error during CPU debug")] 191 CpuDebug(#[source] hypervisor::HypervisorCpuError), 192 193 #[cfg(feature = "guest_debug")] 194 #[error("Error translating virtual address")] 195 TranslateVirtualAddress(#[source] anyhow::Error), 196 197 #[cfg(target_arch = "x86_64")] 198 #[error("Error setting up AMX")] 199 AmxEnable(#[source] anyhow::Error), 200 201 #[error("Maximum number of vCPUs exceeds host limit")] 202 MaximumVcpusExceeded, 203 204 #[cfg(feature = "sev_snp")] 205 #[error("Failed to set sev control register")] 206 SetSevControlRegister(#[source] hypervisor::HypervisorCpuError), 207 208 #[cfg(target_arch = "x86_64")] 209 #[error("Failed to inject NMI")] 210 NmiError(#[source] hypervisor::HypervisorCpuError), 211 } 212 pub type Result<T> = result::Result<T, Error>; 213 214 #[cfg(target_arch = "x86_64")] 215 #[allow(dead_code)] 216 #[repr(C, packed)] 217 #[derive(IntoBytes, Immutable, FromBytes)] 218 struct LocalX2Apic { 219 pub r#type: u8, 220 pub length: u8, 221 pub _reserved: u16, 222 pub apic_id: u32, 223 pub flags: u32, 224 pub processor_id: u32, 225 } 226 227 #[allow(dead_code)] 228 #[repr(C, packed)] 229 #[derive(Default, IntoBytes, Immutable, FromBytes)] 230 struct Ioapic { 231 pub r#type: u8, 232 pub length: u8, 233 pub ioapic_id: u8, 234 _reserved: u8, 235 pub apic_address: u32, 236 pub gsi_base: u32, 237 } 238 239 #[cfg(target_arch = "aarch64")] 240 #[allow(dead_code)] 241 #[repr(C, packed)] 242 #[derive(IntoBytes, Immutable, FromBytes)] 243 struct GicC { 244 pub r#type: u8, 245 pub length: u8, 246 pub reserved0: u16, 247 pub cpu_interface_number: u32, 248 pub uid: u32, 249 pub flags: u32, 250 pub parking_version: u32, 251 pub performance_interrupt: u32, 252 pub parked_address: u64, 253 pub base_address: u64, 254 pub gicv_base_address: u64, 255 pub gich_base_address: u64, 256 pub vgic_interrupt: u32, 257 pub gicr_base_address: u64, 258 pub mpidr: u64, 259 pub proc_power_effi_class: u8, 260 pub reserved1: u8, 261 pub spe_overflow_interrupt: u16, 262 } 263 264 #[cfg(target_arch = "aarch64")] 265 #[allow(dead_code)] 266 #[repr(C, packed)] 267 #[derive(IntoBytes, Immutable, FromBytes)] 268 struct GicD { 269 pub r#type: u8, 270 pub length: u8, 271 pub reserved0: u16, 272 pub gic_id: u32, 273 pub base_address: u64, 274 pub global_irq_base: u32, 275 pub version: u8, 276 pub reserved1: [u8; 3], 277 } 278 279 #[cfg(target_arch = "aarch64")] 280 #[allow(dead_code)] 281 #[repr(C, packed)] 282 #[derive(IntoBytes, Immutable, FromBytes)] 283 struct GicR { 284 pub r#type: u8, 285 pub length: u8, 286 pub reserved: u16, 287 pub base_address: u64, 288 pub range_length: u32, 289 } 290 291 #[cfg(target_arch = "aarch64")] 292 #[allow(dead_code)] 293 #[repr(C, packed)] 294 #[derive(IntoBytes, Immutable, FromBytes)] 295 struct GicIts { 296 pub r#type: u8, 297 pub length: u8, 298 pub reserved0: u16, 299 pub translation_id: u32, 300 pub base_address: u64, 301 pub reserved1: u32, 302 } 303 304 #[cfg(target_arch = "aarch64")] 305 #[allow(dead_code)] 306 #[repr(C, packed)] 307 #[derive(IntoBytes, Immutable, FromBytes)] 308 struct ProcessorHierarchyNode { 309 pub r#type: u8, 310 pub length: u8, 311 pub reserved: u16, 312 pub flags: u32, 313 pub parent: u32, 314 pub acpi_processor_id: u32, 315 pub num_private_resources: u32, 316 } 317 318 #[allow(dead_code)] 319 #[repr(C, packed)] 320 #[derive(Default, IntoBytes, Immutable, FromBytes)] 321 struct InterruptSourceOverride { 322 pub r#type: u8, 323 pub length: u8, 324 pub bus: u8, 325 pub source: u8, 326 pub gsi: u32, 327 pub flags: u16, 328 } 329 330 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 331 macro_rules! round_up { 332 ($n:expr,$d:expr) => { 333 (($n / ($d + 1)) + 1) * $d 334 }; 335 } 336 337 /// A wrapper around creating and using a kvm-based VCPU. 338 pub struct Vcpu { 339 // The hypervisor abstracted CPU. 340 vcpu: Arc<dyn hypervisor::Vcpu>, 341 id: u8, 342 #[cfg(target_arch = "aarch64")] 343 mpidr: u64, 344 saved_state: Option<CpuState>, 345 #[cfg(target_arch = "x86_64")] 346 vendor: CpuVendor, 347 } 348 349 impl Vcpu { 350 /// Constructs a new VCPU for `vm`. 351 /// 352 /// # Arguments 353 /// 354 /// * `id` - Represents the CPU number between [0, max vcpus). 355 /// * `vm` - The virtual machine this vcpu will get attached to. 356 /// * `vm_ops` - Optional object for exit handling. 357 /// * `cpu_vendor` - CPU vendor as reported by __cpuid(0x0) 358 pub fn new( 359 id: u8, 360 apic_id: u8, 361 vm: &Arc<dyn hypervisor::Vm>, 362 vm_ops: Option<Arc<dyn VmOps>>, 363 #[cfg(target_arch = "x86_64")] cpu_vendor: CpuVendor, 364 ) -> Result<Self> { 365 let vcpu = vm 366 .create_vcpu(apic_id, vm_ops) 367 .map_err(|e| Error::VcpuCreate(e.into()))?; 368 // Initially the cpuid per vCPU is the one supported by this VM. 369 Ok(Vcpu { 370 vcpu, 371 id, 372 #[cfg(target_arch = "aarch64")] 373 mpidr: 0, 374 saved_state: None, 375 #[cfg(target_arch = "x86_64")] 376 vendor: cpu_vendor, 377 }) 378 } 379 380 /// Configures a vcpu and should be called once per vcpu when created. 381 /// 382 /// # Arguments 383 /// 384 /// * `kernel_entry_point` - Kernel entry point address in guest memory and boot protocol used. 385 /// * `guest_memory` - Guest memory. 386 /// * `cpuid` - (x86_64) CpuId, wrapper over the `kvm_cpuid2` structure. 387 pub fn configure( 388 &mut self, 389 #[cfg(target_arch = "aarch64")] vm: &Arc<dyn hypervisor::Vm>, 390 boot_setup: Option<(EntryPoint, &GuestMemoryAtomic<GuestMemoryMmap>)>, 391 #[cfg(target_arch = "x86_64")] cpuid: Vec<CpuIdEntry>, 392 #[cfg(target_arch = "x86_64")] kvm_hyperv: bool, 393 #[cfg(target_arch = "x86_64")] topology: Option<(u8, u8, u8)>, 394 ) -> Result<()> { 395 #[cfg(target_arch = "aarch64")] 396 { 397 self.init(vm)?; 398 self.mpidr = arch::configure_vcpu(&self.vcpu, self.id, boot_setup) 399 .map_err(Error::VcpuConfiguration)?; 400 } 401 #[cfg(target_arch = "riscv64")] 402 arch::configure_vcpu(&self.vcpu, self.id, boot_setup).map_err(Error::VcpuConfiguration)?; 403 info!("Configuring vCPU: cpu_id = {}", self.id); 404 #[cfg(target_arch = "x86_64")] 405 arch::configure_vcpu( 406 &self.vcpu, 407 self.id, 408 boot_setup, 409 cpuid, 410 kvm_hyperv, 411 self.vendor, 412 topology, 413 ) 414 .map_err(Error::VcpuConfiguration)?; 415 416 Ok(()) 417 } 418 419 /// Gets the MPIDR register value. 420 #[cfg(target_arch = "aarch64")] 421 pub fn get_mpidr(&self) -> u64 { 422 self.mpidr 423 } 424 425 /// Gets the saved vCPU state. 426 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] 427 pub fn get_saved_state(&self) -> Option<CpuState> { 428 self.saved_state.clone() 429 } 430 431 /// Initializes an aarch64 specific vcpu for booting Linux. 432 #[cfg(target_arch = "aarch64")] 433 pub fn init(&self, vm: &Arc<dyn hypervisor::Vm>) -> Result<()> { 434 use std::arch::is_aarch64_feature_detected; 435 #[allow(clippy::nonminimal_bool)] 436 let sve_supported = 437 is_aarch64_feature_detected!("sve") || is_aarch64_feature_detected!("sve2"); 438 let mut kvi = self.vcpu.create_vcpu_init(); 439 440 // This reads back the kernel's preferred target type. 441 vm.get_preferred_target(&mut kvi) 442 .map_err(Error::VcpuArmPreferredTarget)?; 443 444 self.vcpu 445 .vcpu_set_processor_features(vm, &mut kvi, self.id) 446 .map_err(Error::VcpuSetProcessorFeatures)?; 447 448 self.vcpu.vcpu_init(&kvi).map_err(Error::VcpuArmInit)?; 449 450 if sve_supported { 451 let finalized_features = self.vcpu.vcpu_get_finalized_features(); 452 self.vcpu 453 .vcpu_finalize(finalized_features) 454 .map_err(Error::VcpuArmFinalize)?; 455 } 456 Ok(()) 457 } 458 459 /// Runs the VCPU until it exits, returning the reason. 460 /// 461 /// Note that the state of the VCPU and associated VM must be setup first for this to do 462 /// anything useful. 463 pub fn run(&self) -> std::result::Result<VmExit, HypervisorCpuError> { 464 self.vcpu.run() 465 } 466 467 #[cfg(feature = "sev_snp")] 468 pub fn set_sev_control_register(&self, vmsa_pfn: u64) -> Result<()> { 469 self.vcpu 470 .set_sev_control_register(vmsa_pfn) 471 .map_err(Error::SetSevControlRegister) 472 } 473 474 /// 475 /// Sets the vCPU's GIC redistributor base address. 476 /// 477 #[cfg(target_arch = "aarch64")] 478 pub fn set_gic_redistributor_addr( 479 &self, 480 base_redist_addr: u64, 481 redist_size: u64, 482 ) -> Result<()> { 483 let gicr_base = base_redist_addr + (arch::layout::GIC_V3_REDIST_SIZE * self.id as u64); 484 assert!(gicr_base + arch::layout::GIC_V3_REDIST_SIZE <= base_redist_addr + redist_size); 485 self.vcpu 486 .set_gic_redistributor_addr(gicr_base) 487 .map_err(Error::VcpuSetGicrBaseAddr)?; 488 Ok(()) 489 } 490 } 491 492 impl Pausable for Vcpu {} 493 impl Snapshottable for Vcpu { 494 fn id(&self) -> String { 495 self.id.to_string() 496 } 497 498 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 499 let saved_state = self 500 .vcpu 501 .state() 502 .map_err(|e| MigratableError::Snapshot(anyhow!("Could not get vCPU state {:?}", e)))?; 503 504 self.saved_state = Some(saved_state.clone()); 505 506 Ok(Snapshot::from_data(SnapshotData::new_from_state( 507 &saved_state, 508 )?)) 509 } 510 } 511 512 pub struct CpuManager { 513 config: CpusConfig, 514 #[cfg_attr(target_arch = "aarch64", allow(dead_code))] 515 interrupt_controller: Option<Arc<Mutex<dyn InterruptController>>>, 516 #[cfg(target_arch = "x86_64")] 517 cpuid: Vec<CpuIdEntry>, 518 #[cfg_attr(target_arch = "aarch64", allow(dead_code))] 519 vm: Arc<dyn hypervisor::Vm>, 520 vcpus_kill_signalled: Arc<AtomicBool>, 521 vcpus_pause_signalled: Arc<AtomicBool>, 522 vcpus_kick_signalled: Arc<AtomicBool>, 523 exit_evt: EventFd, 524 #[cfg_attr(target_arch = "aarch64", allow(dead_code))] 525 reset_evt: EventFd, 526 #[cfg(feature = "guest_debug")] 527 vm_debug_evt: EventFd, 528 vcpu_states: Vec<VcpuState>, 529 selected_cpu: u8, 530 vcpus: Vec<Arc<Mutex<Vcpu>>>, 531 seccomp_action: SeccompAction, 532 vm_ops: Arc<dyn VmOps>, 533 #[cfg_attr(target_arch = "aarch64", allow(dead_code))] 534 acpi_address: Option<GuestAddress>, 535 proximity_domain_per_cpu: BTreeMap<u8, u32>, 536 affinity: BTreeMap<u8, Vec<usize>>, 537 dynamic: bool, 538 hypervisor: Arc<dyn hypervisor::Hypervisor>, 539 #[cfg(feature = "sev_snp")] 540 sev_snp_enabled: bool, 541 } 542 543 const CPU_ENABLE_FLAG: usize = 0; 544 const CPU_INSERTING_FLAG: usize = 1; 545 const CPU_REMOVING_FLAG: usize = 2; 546 const CPU_EJECT_FLAG: usize = 3; 547 548 const CPU_STATUS_OFFSET: u64 = 4; 549 const CPU_SELECTION_OFFSET: u64 = 0; 550 551 impl BusDevice for CpuManager { 552 fn read(&mut self, _base: u64, offset: u64, data: &mut [u8]) { 553 // The Linux kernel, quite reasonably, doesn't zero the memory it gives us. 554 data.fill(0); 555 556 match offset { 557 CPU_SELECTION_OFFSET => { 558 data[0] = self.selected_cpu; 559 } 560 CPU_STATUS_OFFSET => { 561 if self.selected_cpu < self.max_vcpus() { 562 let state = &self.vcpu_states[usize::from(self.selected_cpu)]; 563 if state.active() { 564 data[0] |= 1 << CPU_ENABLE_FLAG; 565 } 566 if state.inserting { 567 data[0] |= 1 << CPU_INSERTING_FLAG; 568 } 569 if state.removing { 570 data[0] |= 1 << CPU_REMOVING_FLAG; 571 } 572 } else { 573 warn!("Out of range vCPU id: {}", self.selected_cpu); 574 } 575 } 576 _ => { 577 warn!( 578 "Unexpected offset for accessing CPU manager device: {:#}", 579 offset 580 ); 581 } 582 } 583 } 584 585 fn write(&mut self, _base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> { 586 match offset { 587 CPU_SELECTION_OFFSET => { 588 self.selected_cpu = data[0]; 589 } 590 CPU_STATUS_OFFSET => { 591 if self.selected_cpu < self.max_vcpus() { 592 let state = &mut self.vcpu_states[usize::from(self.selected_cpu)]; 593 // The ACPI code writes back a 1 to acknowledge the insertion 594 if (data[0] & (1 << CPU_INSERTING_FLAG) == 1 << CPU_INSERTING_FLAG) 595 && state.inserting 596 { 597 state.inserting = false; 598 } 599 // Ditto for removal 600 if (data[0] & (1 << CPU_REMOVING_FLAG) == 1 << CPU_REMOVING_FLAG) 601 && state.removing 602 { 603 state.removing = false; 604 } 605 // Trigger removal of vCPU 606 if data[0] & (1 << CPU_EJECT_FLAG) == 1 << CPU_EJECT_FLAG { 607 if let Err(e) = self.remove_vcpu(self.selected_cpu) { 608 error!("Error removing vCPU: {:?}", e); 609 } 610 } 611 } else { 612 warn!("Out of range vCPU id: {}", self.selected_cpu); 613 } 614 } 615 _ => { 616 warn!( 617 "Unexpected offset for accessing CPU manager device: {:#}", 618 offset 619 ); 620 } 621 } 622 None 623 } 624 } 625 626 #[derive(Default)] 627 struct VcpuState { 628 inserting: bool, 629 removing: bool, 630 pending_removal: Arc<AtomicBool>, 631 handle: Option<thread::JoinHandle<()>>, 632 kill: Arc<AtomicBool>, 633 vcpu_run_interrupted: Arc<AtomicBool>, 634 paused: Arc<AtomicBool>, 635 } 636 637 impl VcpuState { 638 fn active(&self) -> bool { 639 self.handle.is_some() 640 } 641 642 fn signal_thread(&self) { 643 if let Some(handle) = self.handle.as_ref() { 644 loop { 645 // SAFETY: FFI call with correct arguments 646 unsafe { 647 libc::pthread_kill(handle.as_pthread_t() as _, SIGRTMIN()); 648 } 649 if self.vcpu_run_interrupted.load(Ordering::SeqCst) { 650 break; 651 } else { 652 // This is more effective than thread::yield_now() at 653 // avoiding a priority inversion with the vCPU thread 654 thread::sleep(std::time::Duration::from_millis(1)); 655 } 656 } 657 } 658 } 659 660 fn join_thread(&mut self) -> Result<()> { 661 if let Some(handle) = self.handle.take() { 662 handle.join().map_err(Error::ThreadCleanup)? 663 } 664 665 Ok(()) 666 } 667 668 fn unpark_thread(&self) { 669 if let Some(handle) = self.handle.as_ref() { 670 handle.thread().unpark() 671 } 672 } 673 } 674 675 impl CpuManager { 676 #[allow(unused_variables)] 677 #[allow(clippy::too_many_arguments)] 678 pub fn new( 679 config: &CpusConfig, 680 vm: Arc<dyn hypervisor::Vm>, 681 exit_evt: EventFd, 682 reset_evt: EventFd, 683 #[cfg(feature = "guest_debug")] vm_debug_evt: EventFd, 684 hypervisor: &Arc<dyn hypervisor::Hypervisor>, 685 seccomp_action: SeccompAction, 686 vm_ops: Arc<dyn VmOps>, 687 #[cfg(feature = "tdx")] tdx_enabled: bool, 688 numa_nodes: &NumaNodes, 689 #[cfg(feature = "sev_snp")] sev_snp_enabled: bool, 690 ) -> Result<Arc<Mutex<CpuManager>>> { 691 if u32::from(config.max_vcpus) > hypervisor.get_max_vcpus() { 692 return Err(Error::MaximumVcpusExceeded); 693 } 694 695 let mut vcpu_states = Vec::with_capacity(usize::from(config.max_vcpus)); 696 vcpu_states.resize_with(usize::from(config.max_vcpus), VcpuState::default); 697 let hypervisor_type = hypervisor.hypervisor_type(); 698 #[cfg(target_arch = "x86_64")] 699 let cpu_vendor = hypervisor.get_cpu_vendor(); 700 701 #[cfg(target_arch = "x86_64")] 702 if config.features.amx { 703 const ARCH_GET_XCOMP_GUEST_PERM: usize = 0x1024; 704 const ARCH_REQ_XCOMP_GUEST_PERM: usize = 0x1025; 705 const XFEATURE_XTILEDATA: usize = 18; 706 const XFEATURE_XTILEDATA_MASK: usize = 1 << XFEATURE_XTILEDATA; 707 708 // SAFETY: the syscall is only modifying kernel internal 709 // data structures that the kernel is itself expected to safeguard. 710 let amx_tile = unsafe { 711 libc::syscall( 712 libc::SYS_arch_prctl, 713 ARCH_REQ_XCOMP_GUEST_PERM, 714 XFEATURE_XTILEDATA, 715 ) 716 }; 717 718 if amx_tile != 0 { 719 return Err(Error::AmxEnable(anyhow!("Guest AMX usage not supported"))); 720 } else { 721 let mask: usize = 0; 722 // SAFETY: the mask being modified (not marked mutable as it is 723 // modified in unsafe only which is permitted) isn't in use elsewhere. 724 let result = unsafe { 725 libc::syscall(libc::SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &mask) 726 }; 727 if result != 0 || (mask & XFEATURE_XTILEDATA_MASK) != XFEATURE_XTILEDATA_MASK { 728 return Err(Error::AmxEnable(anyhow!("Guest AMX usage not supported"))); 729 } 730 } 731 } 732 733 let proximity_domain_per_cpu: BTreeMap<u8, u32> = { 734 let mut cpu_list = Vec::new(); 735 for (proximity_domain, numa_node) in numa_nodes.iter() { 736 for cpu in numa_node.cpus.iter() { 737 cpu_list.push((*cpu, *proximity_domain)) 738 } 739 } 740 cpu_list 741 } 742 .into_iter() 743 .collect(); 744 745 let affinity = if let Some(cpu_affinity) = config.affinity.as_ref() { 746 cpu_affinity 747 .iter() 748 .map(|a| (a.vcpu, a.host_cpus.clone())) 749 .collect() 750 } else { 751 BTreeMap::new() 752 }; 753 754 #[cfg(feature = "tdx")] 755 let dynamic = !tdx_enabled; 756 #[cfg(not(feature = "tdx"))] 757 let dynamic = true; 758 759 Ok(Arc::new(Mutex::new(CpuManager { 760 config: config.clone(), 761 interrupt_controller: None, 762 #[cfg(target_arch = "x86_64")] 763 cpuid: Vec::new(), 764 vm, 765 vcpus_kill_signalled: Arc::new(AtomicBool::new(false)), 766 vcpus_pause_signalled: Arc::new(AtomicBool::new(false)), 767 vcpus_kick_signalled: Arc::new(AtomicBool::new(false)), 768 vcpu_states, 769 exit_evt, 770 reset_evt, 771 #[cfg(feature = "guest_debug")] 772 vm_debug_evt, 773 selected_cpu: 0, 774 vcpus: Vec::with_capacity(usize::from(config.max_vcpus)), 775 seccomp_action, 776 vm_ops, 777 acpi_address: None, 778 proximity_domain_per_cpu, 779 affinity, 780 dynamic, 781 hypervisor: hypervisor.clone(), 782 #[cfg(feature = "sev_snp")] 783 sev_snp_enabled, 784 }))) 785 } 786 787 #[cfg(target_arch = "x86_64")] 788 pub fn populate_cpuid( 789 &mut self, 790 memory_manager: &Arc<Mutex<MemoryManager>>, 791 hypervisor: &Arc<dyn hypervisor::Hypervisor>, 792 #[cfg(feature = "tdx")] tdx: bool, 793 ) -> Result<()> { 794 let sgx_epc_sections = memory_manager 795 .lock() 796 .unwrap() 797 .sgx_epc_region() 798 .as_ref() 799 .map(|sgx_epc_region| sgx_epc_region.epc_sections().values().cloned().collect()); 800 801 self.cpuid = { 802 let phys_bits = physical_bits(hypervisor, self.config.max_phys_bits); 803 arch::generate_common_cpuid( 804 hypervisor, 805 &arch::CpuidConfig { 806 sgx_epc_sections, 807 phys_bits, 808 kvm_hyperv: self.config.kvm_hyperv, 809 #[cfg(feature = "tdx")] 810 tdx, 811 amx: self.config.features.amx, 812 }, 813 ) 814 .map_err(Error::CommonCpuId)? 815 }; 816 817 Ok(()) 818 } 819 820 fn create_vcpu(&mut self, cpu_id: u8, snapshot: Option<Snapshot>) -> Result<Arc<Mutex<Vcpu>>> { 821 info!("Creating vCPU: cpu_id = {}", cpu_id); 822 823 #[cfg(target_arch = "x86_64")] 824 let topology = self.get_vcpu_topology(); 825 #[cfg(target_arch = "x86_64")] 826 let x2apic_id = arch::x86_64::get_x2apic_id(cpu_id as u32, topology); 827 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] 828 let x2apic_id = cpu_id as u32; 829 830 let mut vcpu = Vcpu::new( 831 cpu_id, 832 x2apic_id as u8, 833 &self.vm, 834 Some(self.vm_ops.clone()), 835 #[cfg(target_arch = "x86_64")] 836 self.hypervisor.get_cpu_vendor(), 837 )?; 838 839 if let Some(snapshot) = snapshot { 840 // AArch64 vCPUs should be initialized after created. 841 #[cfg(target_arch = "aarch64")] 842 vcpu.init(&self.vm)?; 843 844 let state: CpuState = snapshot.to_state().map_err(|e| { 845 Error::VcpuCreate(anyhow!("Could not get vCPU state from snapshot {:?}", e)) 846 })?; 847 vcpu.vcpu 848 .set_state(&state) 849 .map_err(|e| Error::VcpuCreate(anyhow!("Could not set the vCPU state {:?}", e)))?; 850 851 vcpu.saved_state = Some(state); 852 } 853 854 let vcpu = Arc::new(Mutex::new(vcpu)); 855 856 // Adding vCPU to the CpuManager's vCPU list. 857 self.vcpus.push(vcpu.clone()); 858 859 Ok(vcpu) 860 } 861 862 pub fn configure_vcpu( 863 &self, 864 vcpu: Arc<Mutex<Vcpu>>, 865 boot_setup: Option<(EntryPoint, &GuestMemoryAtomic<GuestMemoryMmap>)>, 866 ) -> Result<()> { 867 let mut vcpu = vcpu.lock().unwrap(); 868 869 #[cfg(feature = "sev_snp")] 870 if self.sev_snp_enabled { 871 if let Some((kernel_entry_point, _)) = boot_setup { 872 vcpu.set_sev_control_register( 873 kernel_entry_point.entry_addr.0 / crate::igvm::HV_PAGE_SIZE, 874 )?; 875 } 876 877 // Traditional way to configure vcpu doesn't work for SEV-SNP guests. 878 // All the vCPU configuration for SEV-SNP guest is provided via VMSA. 879 return Ok(()); 880 } 881 882 #[cfg(target_arch = "x86_64")] 883 assert!(!self.cpuid.is_empty()); 884 885 #[cfg(target_arch = "x86_64")] 886 let topology = self.config.topology.clone().map_or_else( 887 || Some((1, self.boot_vcpus(), 1)), 888 |t| Some((t.threads_per_core, t.cores_per_die, t.dies_per_package)), 889 ); 890 #[cfg(target_arch = "x86_64")] 891 vcpu.configure( 892 boot_setup, 893 self.cpuid.clone(), 894 self.config.kvm_hyperv, 895 topology, 896 )?; 897 898 #[cfg(target_arch = "aarch64")] 899 vcpu.configure(&self.vm, boot_setup)?; 900 901 #[cfg(target_arch = "riscv64")] 902 vcpu.configure(boot_setup)?; 903 904 Ok(()) 905 } 906 907 /// Only create new vCPUs if there aren't any inactive ones to reuse 908 fn create_vcpus( 909 &mut self, 910 desired_vcpus: u8, 911 snapshot: Option<Snapshot>, 912 ) -> Result<Vec<Arc<Mutex<Vcpu>>>> { 913 let mut vcpus: Vec<Arc<Mutex<Vcpu>>> = vec![]; 914 info!( 915 "Request to create new vCPUs: desired = {}, max = {}, allocated = {}, present = {}", 916 desired_vcpus, 917 self.config.max_vcpus, 918 self.vcpus.len(), 919 self.present_vcpus() 920 ); 921 922 if desired_vcpus > self.config.max_vcpus { 923 return Err(Error::DesiredVCpuCountExceedsMax); 924 } 925 926 // Only create vCPUs in excess of all the allocated vCPUs. 927 for cpu_id in self.vcpus.len() as u8..desired_vcpus { 928 vcpus.push(self.create_vcpu( 929 cpu_id, 930 // TODO: The special format of the CPU id can be removed once 931 // ready to break live upgrade. 932 snapshot_from_id(snapshot.as_ref(), cpu_id.to_string().as_str()), 933 )?); 934 } 935 936 Ok(vcpus) 937 } 938 939 #[cfg(target_arch = "aarch64")] 940 pub fn init_pmu(&self, irq: u32) -> Result<bool> { 941 for cpu in self.vcpus.iter() { 942 let cpu = cpu.lock().unwrap(); 943 // Check if PMU attr is available, if not, log the information. 944 if cpu.vcpu.has_pmu_support() { 945 cpu.vcpu.init_pmu(irq).map_err(Error::InitPmu)?; 946 } else { 947 debug!( 948 "PMU attribute is not supported in vCPU{}, skip PMU init!", 949 cpu.id 950 ); 951 return Ok(false); 952 } 953 } 954 955 Ok(true) 956 } 957 958 pub fn vcpus(&self) -> Vec<Arc<Mutex<Vcpu>>> { 959 self.vcpus.clone() 960 } 961 962 fn start_vcpu( 963 &mut self, 964 vcpu: Arc<Mutex<Vcpu>>, 965 vcpu_id: u8, 966 vcpu_thread_barrier: Arc<Barrier>, 967 inserting: bool, 968 ) -> Result<()> { 969 let reset_evt = self.reset_evt.try_clone().unwrap(); 970 let exit_evt = self.exit_evt.try_clone().unwrap(); 971 #[cfg(feature = "kvm")] 972 let hypervisor_type = self.hypervisor.hypervisor_type(); 973 #[cfg(feature = "guest_debug")] 974 let vm_debug_evt = self.vm_debug_evt.try_clone().unwrap(); 975 let panic_exit_evt = self.exit_evt.try_clone().unwrap(); 976 let vcpu_kill_signalled = self.vcpus_kill_signalled.clone(); 977 let vcpu_pause_signalled = self.vcpus_pause_signalled.clone(); 978 let vcpu_kick_signalled = self.vcpus_kick_signalled.clone(); 979 980 let vcpu_kill = self.vcpu_states[usize::from(vcpu_id)].kill.clone(); 981 let vcpu_run_interrupted = self.vcpu_states[usize::from(vcpu_id)] 982 .vcpu_run_interrupted 983 .clone(); 984 let panic_vcpu_run_interrupted = vcpu_run_interrupted.clone(); 985 let vcpu_paused = self.vcpu_states[usize::from(vcpu_id)].paused.clone(); 986 987 // Prepare the CPU set the current vCPU is expected to run onto. 988 let cpuset = self.affinity.get(&vcpu_id).map(|host_cpus| { 989 // SAFETY: all zeros is a valid pattern 990 let mut cpuset: libc::cpu_set_t = unsafe { std::mem::zeroed() }; 991 // SAFETY: FFI call, trivially safe 992 unsafe { libc::CPU_ZERO(&mut cpuset) }; 993 for host_cpu in host_cpus { 994 // SAFETY: FFI call, trivially safe 995 unsafe { libc::CPU_SET(*host_cpu, &mut cpuset) }; 996 } 997 cpuset 998 }); 999 1000 // Retrieve seccomp filter for vcpu thread 1001 let vcpu_seccomp_filter = get_seccomp_filter( 1002 &self.seccomp_action, 1003 Thread::Vcpu, 1004 self.hypervisor.hypervisor_type(), 1005 ) 1006 .map_err(Error::CreateSeccompFilter)?; 1007 1008 #[cfg(target_arch = "x86_64")] 1009 let interrupt_controller_clone = self.interrupt_controller.as_ref().cloned(); 1010 1011 info!("Starting vCPU: cpu_id = {}", vcpu_id); 1012 1013 let handle = Some( 1014 thread::Builder::new() 1015 .name(format!("vcpu{vcpu_id}")) 1016 .spawn(move || { 1017 // Schedule the thread to run on the expected CPU set 1018 if let Some(cpuset) = cpuset.as_ref() { 1019 // SAFETY: FFI call with correct arguments 1020 let ret = unsafe { 1021 libc::sched_setaffinity( 1022 0, 1023 std::mem::size_of::<libc::cpu_set_t>(), 1024 cpuset as *const libc::cpu_set_t, 1025 ) 1026 }; 1027 1028 if ret != 0 { 1029 error!( 1030 "Failed scheduling the vCPU {} on the expected CPU set: {}", 1031 vcpu_id, 1032 io::Error::last_os_error() 1033 ); 1034 return; 1035 } 1036 } 1037 1038 // Apply seccomp filter for vcpu thread. 1039 if !vcpu_seccomp_filter.is_empty() { 1040 if let Err(e) = 1041 apply_filter(&vcpu_seccomp_filter).map_err(Error::ApplySeccompFilter) 1042 { 1043 error!("Error applying seccomp filter: {:?}", e); 1044 return; 1045 } 1046 } 1047 extern "C" fn handle_signal(_: i32, _: *mut siginfo_t, _: *mut c_void) {} 1048 // This uses an async signal safe handler to kill the vcpu handles. 1049 register_signal_handler(SIGRTMIN(), handle_signal) 1050 .expect("Failed to register vcpu signal handler"); 1051 // Block until all CPUs are ready. 1052 vcpu_thread_barrier.wait(); 1053 1054 std::panic::catch_unwind(move || { 1055 loop { 1056 // If we are being told to pause, we park the thread 1057 // until the pause boolean is toggled. 1058 // The resume operation is responsible for toggling 1059 // the boolean and unpark the thread. 1060 // We enter a loop because park() could spuriously 1061 // return. We will then park() again unless the 1062 // pause boolean has been toggled. 1063 1064 // Need to use Ordering::SeqCst as we have multiple 1065 // loads and stores to different atomics and we need 1066 // to see them in a consistent order in all threads 1067 1068 if vcpu_pause_signalled.load(Ordering::SeqCst) { 1069 // As a pause can be caused by PIO & MMIO exits then we need to ensure they are 1070 // completed by returning to KVM_RUN. From the kernel docs: 1071 // 1072 // For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_PAPR, KVM_EXIT_XEN, 1073 // KVM_EXIT_EPR, KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR the corresponding 1074 // operations are complete (and guest state is consistent) only after userspace 1075 // has re-entered the kernel with KVM_RUN. The kernel side will first finish 1076 // incomplete operations and then check for pending signals. 1077 // The pending state of the operation is not preserved in state which is 1078 // visible to userspace, thus userspace should ensure that the operation is 1079 // completed before performing a live migration. Userspace can re-enter the 1080 // guest with an unmasked signal pending or with the immediate_exit field set 1081 // to complete pending operations without allowing any further instructions 1082 // to be executed. 1083 1084 #[cfg(feature = "kvm")] 1085 if matches!(hypervisor_type, HypervisorType::Kvm) { 1086 vcpu.lock().as_ref().unwrap().vcpu.set_immediate_exit(true); 1087 if !matches!(vcpu.lock().unwrap().run(), Ok(VmExit::Ignore)) { 1088 error!("Unexpected VM exit on \"immediate_exit\" run"); 1089 break; 1090 } 1091 vcpu.lock().as_ref().unwrap().vcpu.set_immediate_exit(false); 1092 } 1093 1094 vcpu_run_interrupted.store(true, Ordering::SeqCst); 1095 1096 vcpu_paused.store(true, Ordering::SeqCst); 1097 while vcpu_pause_signalled.load(Ordering::SeqCst) { 1098 thread::park(); 1099 } 1100 vcpu_run_interrupted.store(false, Ordering::SeqCst); 1101 } 1102 1103 if vcpu_kick_signalled.load(Ordering::SeqCst) { 1104 vcpu_run_interrupted.store(true, Ordering::SeqCst); 1105 #[cfg(target_arch = "x86_64")] 1106 match vcpu.lock().as_ref().unwrap().vcpu.nmi() { 1107 Ok(()) => {}, 1108 Err(e) => { 1109 error!("Error when inject nmi {}", e); 1110 break; 1111 } 1112 } 1113 } 1114 1115 // We've been told to terminate 1116 if vcpu_kill_signalled.load(Ordering::SeqCst) 1117 || vcpu_kill.load(Ordering::SeqCst) 1118 { 1119 vcpu_run_interrupted.store(true, Ordering::SeqCst); 1120 break; 1121 } 1122 1123 #[cfg(feature = "tdx")] 1124 let mut vcpu = vcpu.lock().unwrap(); 1125 #[cfg(not(feature = "tdx"))] 1126 let vcpu = vcpu.lock().unwrap(); 1127 // vcpu.run() returns false on a triple-fault so trigger a reset 1128 match vcpu.run() { 1129 Ok(run) => match run { 1130 #[cfg(feature = "kvm")] 1131 VmExit::Debug => { 1132 info!("VmExit::Debug"); 1133 #[cfg(feature = "guest_debug")] 1134 { 1135 vcpu_pause_signalled.store(true, Ordering::SeqCst); 1136 let raw_tid = get_raw_tid(vcpu_id as usize); 1137 vm_debug_evt.write(raw_tid as u64).unwrap(); 1138 } 1139 } 1140 #[cfg(target_arch = "x86_64")] 1141 VmExit::IoapicEoi(vector) => { 1142 if let Some(interrupt_controller) = 1143 &interrupt_controller_clone 1144 { 1145 interrupt_controller 1146 .lock() 1147 .unwrap() 1148 .end_of_interrupt(vector); 1149 } 1150 } 1151 VmExit::Ignore => {} 1152 VmExit::Hyperv => {} 1153 VmExit::Reset => { 1154 info!("VmExit::Reset"); 1155 vcpu_run_interrupted.store(true, Ordering::SeqCst); 1156 reset_evt.write(1).unwrap(); 1157 break; 1158 } 1159 VmExit::Shutdown => { 1160 info!("VmExit::Shutdown"); 1161 vcpu_run_interrupted.store(true, Ordering::SeqCst); 1162 exit_evt.write(1).unwrap(); 1163 break; 1164 } 1165 #[cfg(feature = "tdx")] 1166 VmExit::Tdx => { 1167 if let Some(vcpu) = Arc::get_mut(&mut vcpu.vcpu) { 1168 match vcpu.get_tdx_exit_details() { 1169 Ok(details) => match details { 1170 TdxExitDetails::GetQuote => warn!("TDG_VP_VMCALL_GET_QUOTE not supported"), 1171 TdxExitDetails::SetupEventNotifyInterrupt => { 1172 warn!("TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT not supported") 1173 } 1174 }, 1175 Err(e) => error!("Unexpected TDX VMCALL: {}", e), 1176 } 1177 vcpu.set_tdx_status(TdxExitStatus::InvalidOperand); 1178 } else { 1179 // We should never reach this code as 1180 // this means the design from the code 1181 // is wrong. 1182 unreachable!("Couldn't get a mutable reference from Arc<dyn Vcpu> as there are multiple instances"); 1183 } 1184 } 1185 }, 1186 1187 Err(e) => { 1188 error!("VCPU generated error: {:?}", Error::VcpuRun(e.into())); 1189 vcpu_run_interrupted.store(true, Ordering::SeqCst); 1190 exit_evt.write(1).unwrap(); 1191 break; 1192 } 1193 } 1194 1195 // We've been told to terminate 1196 if vcpu_kill_signalled.load(Ordering::SeqCst) 1197 || vcpu_kill.load(Ordering::SeqCst) 1198 { 1199 vcpu_run_interrupted.store(true, Ordering::SeqCst); 1200 break; 1201 } 1202 } 1203 }) 1204 .or_else(|_| { 1205 panic_vcpu_run_interrupted.store(true, Ordering::SeqCst); 1206 error!("vCPU thread panicked"); 1207 panic_exit_evt.write(1) 1208 }) 1209 .ok(); 1210 }) 1211 .map_err(Error::VcpuSpawn)?, 1212 ); 1213 1214 // On hot plug calls into this function entry_point is None. It is for 1215 // those hotplug CPU additions that we need to set the inserting flag. 1216 self.vcpu_states[usize::from(vcpu_id)].handle = handle; 1217 self.vcpu_states[usize::from(vcpu_id)].inserting = inserting; 1218 1219 Ok(()) 1220 } 1221 1222 /// Start up as many vCPUs threads as needed to reach `desired_vcpus` 1223 fn activate_vcpus( 1224 &mut self, 1225 desired_vcpus: u8, 1226 inserting: bool, 1227 paused: Option<bool>, 1228 ) -> Result<()> { 1229 if desired_vcpus > self.config.max_vcpus { 1230 return Err(Error::DesiredVCpuCountExceedsMax); 1231 } 1232 1233 let vcpu_thread_barrier = Arc::new(Barrier::new( 1234 (desired_vcpus - self.present_vcpus() + 1) as usize, 1235 )); 1236 1237 if let Some(paused) = paused { 1238 self.vcpus_pause_signalled.store(paused, Ordering::SeqCst); 1239 } 1240 1241 info!( 1242 "Starting vCPUs: desired = {}, allocated = {}, present = {}, paused = {}", 1243 desired_vcpus, 1244 self.vcpus.len(), 1245 self.present_vcpus(), 1246 self.vcpus_pause_signalled.load(Ordering::SeqCst) 1247 ); 1248 1249 // This reuses any inactive vCPUs as well as any that were newly created 1250 for vcpu_id in self.present_vcpus()..desired_vcpus { 1251 let vcpu = Arc::clone(&self.vcpus[vcpu_id as usize]); 1252 self.start_vcpu(vcpu, vcpu_id, vcpu_thread_barrier.clone(), inserting)?; 1253 } 1254 1255 // Unblock all CPU threads. 1256 vcpu_thread_barrier.wait(); 1257 Ok(()) 1258 } 1259 1260 fn mark_vcpus_for_removal(&mut self, desired_vcpus: u8) { 1261 // Mark vCPUs for removal, actual removal happens on ejection 1262 for cpu_id in desired_vcpus..self.present_vcpus() { 1263 self.vcpu_states[usize::from(cpu_id)].removing = true; 1264 self.vcpu_states[usize::from(cpu_id)] 1265 .pending_removal 1266 .store(true, Ordering::SeqCst); 1267 } 1268 } 1269 1270 pub fn check_pending_removed_vcpu(&mut self) -> bool { 1271 for state in self.vcpu_states.iter() { 1272 if state.active() && state.pending_removal.load(Ordering::SeqCst) { 1273 return true; 1274 } 1275 } 1276 false 1277 } 1278 1279 fn remove_vcpu(&mut self, cpu_id: u8) -> Result<()> { 1280 info!("Removing vCPU: cpu_id = {}", cpu_id); 1281 let state = &mut self.vcpu_states[usize::from(cpu_id)]; 1282 state.kill.store(true, Ordering::SeqCst); 1283 state.signal_thread(); 1284 state.join_thread()?; 1285 state.handle = None; 1286 1287 // Once the thread has exited, clear the "kill" so that it can reused 1288 state.kill.store(false, Ordering::SeqCst); 1289 state.pending_removal.store(false, Ordering::SeqCst); 1290 1291 Ok(()) 1292 } 1293 1294 pub fn create_boot_vcpus( 1295 &mut self, 1296 snapshot: Option<Snapshot>, 1297 ) -> Result<Vec<Arc<Mutex<Vcpu>>>> { 1298 trace_scoped!("create_boot_vcpus"); 1299 1300 self.create_vcpus(self.boot_vcpus(), snapshot) 1301 } 1302 1303 // Starts all the vCPUs that the VM is booting with. Blocks until all vCPUs are running. 1304 pub fn start_boot_vcpus(&mut self, paused: bool) -> Result<()> { 1305 self.activate_vcpus(self.boot_vcpus(), false, Some(paused)) 1306 } 1307 1308 pub fn start_restored_vcpus(&mut self) -> Result<()> { 1309 self.activate_vcpus(self.vcpus.len() as u8, false, Some(true)) 1310 .map_err(|e| { 1311 Error::StartRestoreVcpu(anyhow!("Failed to start restored vCPUs: {:#?}", e)) 1312 })?; 1313 1314 Ok(()) 1315 } 1316 1317 pub fn resize(&mut self, desired_vcpus: u8) -> Result<bool> { 1318 if desired_vcpus.cmp(&self.present_vcpus()) == cmp::Ordering::Equal { 1319 return Ok(false); 1320 } 1321 1322 if !self.dynamic { 1323 return Ok(false); 1324 } 1325 1326 if desired_vcpus < 1 { 1327 return Err(Error::DesiredVCpuCountIsZero); 1328 } 1329 1330 if self.check_pending_removed_vcpu() { 1331 return Err(Error::VcpuPendingRemovedVcpu); 1332 } 1333 1334 match desired_vcpus.cmp(&self.present_vcpus()) { 1335 cmp::Ordering::Greater => { 1336 let vcpus = self.create_vcpus(desired_vcpus, None)?; 1337 for vcpu in vcpus { 1338 self.configure_vcpu(vcpu, None)? 1339 } 1340 self.activate_vcpus(desired_vcpus, true, None)?; 1341 Ok(true) 1342 } 1343 cmp::Ordering::Less => { 1344 self.mark_vcpus_for_removal(desired_vcpus); 1345 Ok(true) 1346 } 1347 _ => Ok(false), 1348 } 1349 } 1350 1351 pub fn shutdown(&mut self) -> Result<()> { 1352 // Tell the vCPUs to stop themselves next time they go through the loop 1353 self.vcpus_kill_signalled.store(true, Ordering::SeqCst); 1354 1355 // Toggle the vCPUs pause boolean 1356 self.vcpus_pause_signalled.store(false, Ordering::SeqCst); 1357 1358 // Unpark all the VCPU threads. 1359 for state in self.vcpu_states.iter() { 1360 state.unpark_thread(); 1361 } 1362 1363 // Signal to the spawned threads (vCPUs and console signal handler). For the vCPU threads 1364 // this will interrupt the KVM_RUN ioctl() allowing the loop to check the boolean set 1365 // above. 1366 for state in self.vcpu_states.iter() { 1367 state.signal_thread(); 1368 } 1369 1370 // Wait for all the threads to finish. This removes the state from the vector. 1371 for mut state in self.vcpu_states.drain(..) { 1372 state.join_thread()?; 1373 } 1374 1375 Ok(()) 1376 } 1377 1378 #[cfg(feature = "tdx")] 1379 pub fn initialize_tdx(&self, hob_address: u64) -> Result<()> { 1380 for vcpu in &self.vcpus { 1381 vcpu.lock() 1382 .unwrap() 1383 .vcpu 1384 .tdx_init(hob_address) 1385 .map_err(Error::InitializeTdx)?; 1386 } 1387 Ok(()) 1388 } 1389 1390 pub fn boot_vcpus(&self) -> u8 { 1391 self.config.boot_vcpus 1392 } 1393 1394 pub fn max_vcpus(&self) -> u8 { 1395 self.config.max_vcpus 1396 } 1397 1398 #[cfg(target_arch = "x86_64")] 1399 pub fn common_cpuid(&self) -> Vec<CpuIdEntry> { 1400 assert!(!self.cpuid.is_empty()); 1401 self.cpuid.clone() 1402 } 1403 1404 fn present_vcpus(&self) -> u8 { 1405 self.vcpu_states 1406 .iter() 1407 .fold(0, |acc, state| acc + state.active() as u8) 1408 } 1409 1410 #[cfg(target_arch = "aarch64")] 1411 pub fn get_mpidrs(&self) -> Vec<u64> { 1412 self.vcpus 1413 .iter() 1414 .map(|cpu| cpu.lock().unwrap().get_mpidr()) 1415 .collect() 1416 } 1417 1418 #[cfg(target_arch = "aarch64")] 1419 pub fn get_saved_states(&self) -> Vec<CpuState> { 1420 self.vcpus 1421 .iter() 1422 .map(|cpu| cpu.lock().unwrap().get_saved_state().unwrap()) 1423 .collect() 1424 } 1425 1426 pub fn get_vcpu_topology(&self) -> Option<(u8, u8, u8)> { 1427 self.config 1428 .topology 1429 .clone() 1430 .map(|t| (t.threads_per_core, t.cores_per_die, t.packages)) 1431 } 1432 1433 #[cfg(not(target_arch = "riscv64"))] 1434 pub fn create_madt(&self) -> Sdt { 1435 use crate::acpi; 1436 // This is also checked in the commandline parsing. 1437 assert!(self.config.boot_vcpus <= self.config.max_vcpus); 1438 1439 let mut madt = Sdt::new(*b"APIC", 44, 5, *b"CLOUDH", *b"CHMADT ", 1); 1440 #[cfg(target_arch = "x86_64")] 1441 { 1442 madt.write(36, arch::layout::APIC_START.0); 1443 1444 for cpu in 0..self.config.max_vcpus { 1445 let x2apic_id = get_x2apic_id(cpu.into(), self.get_vcpu_topology()); 1446 1447 let lapic = LocalX2Apic { 1448 r#type: acpi::ACPI_X2APIC_PROCESSOR, 1449 length: 16, 1450 processor_id: cpu.into(), 1451 apic_id: x2apic_id, 1452 flags: if cpu < self.config.boot_vcpus { 1453 1 << MADT_CPU_ENABLE_FLAG 1454 } else { 1455 0 1456 } | (1 << MADT_CPU_ONLINE_CAPABLE_FLAG), 1457 _reserved: 0, 1458 }; 1459 madt.append(lapic); 1460 } 1461 1462 madt.append(Ioapic { 1463 r#type: acpi::ACPI_APIC_IO, 1464 length: 12, 1465 ioapic_id: 0, 1466 apic_address: arch::layout::IOAPIC_START.0 as u32, 1467 gsi_base: 0, 1468 ..Default::default() 1469 }); 1470 1471 madt.append(InterruptSourceOverride { 1472 r#type: acpi::ACPI_APIC_XRUPT_OVERRIDE, 1473 length: 10, 1474 bus: 0, 1475 source: 4, 1476 gsi: 4, 1477 flags: 0, 1478 }); 1479 } 1480 1481 #[cfg(target_arch = "aarch64")] 1482 { 1483 /* Notes: 1484 * Ignore Local Interrupt Controller Address at byte offset 36 of MADT table. 1485 */ 1486 1487 // See section 5.2.12.14 GIC CPU Interface (GICC) Structure in ACPI spec. 1488 for cpu in 0..self.config.boot_vcpus { 1489 let vcpu = &self.vcpus[cpu as usize]; 1490 let mpidr = vcpu.lock().unwrap().get_mpidr(); 1491 /* ARMv8 MPIDR format: 1492 Bits [63:40] Must be zero 1493 Bits [39:32] Aff3 : Match Aff3 of target processor MPIDR 1494 Bits [31:24] Must be zero 1495 Bits [23:16] Aff2 : Match Aff2 of target processor MPIDR 1496 Bits [15:8] Aff1 : Match Aff1 of target processor MPIDR 1497 Bits [7:0] Aff0 : Match Aff0 of target processor MPIDR 1498 */ 1499 let mpidr_mask = 0xff_00ff_ffff; 1500 let gicc = GicC { 1501 r#type: acpi::ACPI_APIC_GENERIC_CPU_INTERFACE, 1502 length: 80, 1503 reserved0: 0, 1504 cpu_interface_number: cpu as u32, 1505 uid: cpu as u32, 1506 flags: 1, 1507 parking_version: 0, 1508 performance_interrupt: 0, 1509 parked_address: 0, 1510 base_address: 0, 1511 gicv_base_address: 0, 1512 gich_base_address: 0, 1513 vgic_interrupt: 0, 1514 gicr_base_address: 0, 1515 mpidr: mpidr & mpidr_mask, 1516 proc_power_effi_class: 0, 1517 reserved1: 0, 1518 spe_overflow_interrupt: 0, 1519 }; 1520 1521 madt.append(gicc); 1522 } 1523 let vgic_config = Gic::create_default_config(self.config.boot_vcpus.into()); 1524 1525 // GIC Distributor structure. See section 5.2.12.15 in ACPI spec. 1526 let gicd = GicD { 1527 r#type: acpi::ACPI_APIC_GENERIC_DISTRIBUTOR, 1528 length: 24, 1529 reserved0: 0, 1530 gic_id: 0, 1531 base_address: vgic_config.dist_addr, 1532 global_irq_base: 0, 1533 version: 3, 1534 reserved1: [0; 3], 1535 }; 1536 madt.append(gicd); 1537 1538 // See 5.2.12.17 GIC Redistributor (GICR) Structure in ACPI spec. 1539 let gicr = GicR { 1540 r#type: acpi::ACPI_APIC_GENERIC_REDISTRIBUTOR, 1541 length: 16, 1542 reserved: 0, 1543 base_address: vgic_config.redists_addr, 1544 range_length: vgic_config.redists_size as u32, 1545 }; 1546 madt.append(gicr); 1547 1548 // See 5.2.12.18 GIC Interrupt Translation Service (ITS) Structure in ACPI spec. 1549 let gicits = GicIts { 1550 r#type: acpi::ACPI_APIC_GENERIC_TRANSLATOR, 1551 length: 20, 1552 reserved0: 0, 1553 translation_id: 0, 1554 base_address: vgic_config.msi_addr, 1555 reserved1: 0, 1556 }; 1557 madt.append(gicits); 1558 1559 madt.update_checksum(); 1560 } 1561 1562 madt 1563 } 1564 1565 #[cfg(target_arch = "aarch64")] 1566 pub fn create_pptt(&self) -> Sdt { 1567 let pptt_start = 0; 1568 let mut cpus = 0; 1569 let mut uid = 0; 1570 // If topology is not specified, the default setting is: 1571 // 1 package, multiple cores, 1 thread per core 1572 // This is also the behavior when PPTT is missing. 1573 let (threads_per_core, cores_per_package, packages) = 1574 self.get_vcpu_topology().unwrap_or((1, self.max_vcpus(), 1)); 1575 1576 let mut pptt = Sdt::new(*b"PPTT", 36, 2, *b"CLOUDH", *b"CHPPTT ", 1); 1577 1578 for cluster_idx in 0..packages { 1579 if cpus < self.config.boot_vcpus as usize { 1580 let cluster_offset = pptt.len() - pptt_start; 1581 let cluster_hierarchy_node = ProcessorHierarchyNode { 1582 r#type: 0, 1583 length: 20, 1584 reserved: 0, 1585 flags: 0x2, 1586 parent: 0, 1587 acpi_processor_id: cluster_idx as u32, 1588 num_private_resources: 0, 1589 }; 1590 pptt.append(cluster_hierarchy_node); 1591 1592 for core_idx in 0..cores_per_package { 1593 let core_offset = pptt.len() - pptt_start; 1594 1595 if threads_per_core > 1 { 1596 let core_hierarchy_node = ProcessorHierarchyNode { 1597 r#type: 0, 1598 length: 20, 1599 reserved: 0, 1600 flags: 0x2, 1601 parent: cluster_offset as u32, 1602 acpi_processor_id: core_idx as u32, 1603 num_private_resources: 0, 1604 }; 1605 pptt.append(core_hierarchy_node); 1606 1607 for _thread_idx in 0..threads_per_core { 1608 let thread_hierarchy_node = ProcessorHierarchyNode { 1609 r#type: 0, 1610 length: 20, 1611 reserved: 0, 1612 flags: 0xE, 1613 parent: core_offset as u32, 1614 acpi_processor_id: uid as u32, 1615 num_private_resources: 0, 1616 }; 1617 pptt.append(thread_hierarchy_node); 1618 uid += 1; 1619 } 1620 } else { 1621 let thread_hierarchy_node = ProcessorHierarchyNode { 1622 r#type: 0, 1623 length: 20, 1624 reserved: 0, 1625 flags: 0xA, 1626 parent: cluster_offset as u32, 1627 acpi_processor_id: uid as u32, 1628 num_private_resources: 0, 1629 }; 1630 pptt.append(thread_hierarchy_node); 1631 uid += 1; 1632 } 1633 } 1634 cpus += (cores_per_package * threads_per_core) as usize; 1635 } 1636 } 1637 1638 pptt.update_checksum(); 1639 pptt 1640 } 1641 1642 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 1643 fn create_standard_regs(&self, cpu_id: u8) -> StandardRegisters { 1644 self.vcpus[usize::from(cpu_id)] 1645 .lock() 1646 .unwrap() 1647 .vcpu 1648 .create_standard_regs() 1649 } 1650 1651 #[cfg(feature = "guest_debug")] 1652 fn get_regs(&self, cpu_id: u8) -> Result<StandardRegisters> { 1653 self.vcpus[usize::from(cpu_id)] 1654 .lock() 1655 .unwrap() 1656 .vcpu 1657 .get_regs() 1658 .map_err(Error::CpuDebug) 1659 } 1660 1661 #[cfg(feature = "guest_debug")] 1662 fn set_regs(&self, cpu_id: u8, regs: &StandardRegisters) -> Result<()> { 1663 self.vcpus[usize::from(cpu_id)] 1664 .lock() 1665 .unwrap() 1666 .vcpu 1667 .set_regs(regs) 1668 .map_err(Error::CpuDebug) 1669 } 1670 1671 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 1672 fn get_sregs(&self, cpu_id: u8) -> Result<SpecialRegisters> { 1673 self.vcpus[usize::from(cpu_id)] 1674 .lock() 1675 .unwrap() 1676 .vcpu 1677 .get_sregs() 1678 .map_err(Error::CpuDebug) 1679 } 1680 1681 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 1682 fn set_sregs(&self, cpu_id: u8, sregs: &SpecialRegisters) -> Result<()> { 1683 self.vcpus[usize::from(cpu_id)] 1684 .lock() 1685 .unwrap() 1686 .vcpu 1687 .set_sregs(sregs) 1688 .map_err(Error::CpuDebug) 1689 } 1690 1691 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 1692 fn translate_gva( 1693 &self, 1694 _guest_memory: &GuestMemoryAtomic<GuestMemoryMmap>, 1695 cpu_id: u8, 1696 gva: u64, 1697 ) -> Result<u64> { 1698 let (gpa, _) = self.vcpus[usize::from(cpu_id)] 1699 .lock() 1700 .unwrap() 1701 .vcpu 1702 .translate_gva(gva, /* flags: unused */ 0) 1703 .map_err(|e| Error::TranslateVirtualAddress(e.into()))?; 1704 Ok(gpa) 1705 } 1706 1707 /// 1708 /// On AArch64, `translate_gva` API is not provided by KVM. We implemented 1709 /// it in VMM by walking through translation tables. 1710 /// 1711 /// Address translation is big topic, here we only focus the scenario that 1712 /// happens in VMM while debugging kernel. This `translate_gva` 1713 /// implementation is restricted to: 1714 /// - Exception Level 1 1715 /// - Translate high address range only (kernel space) 1716 /// 1717 /// This implementation supports following Arm-v8a features related to 1718 /// address translation: 1719 /// - FEAT_LPA 1720 /// - FEAT_LVA 1721 /// - FEAT_LPA2 1722 /// 1723 #[cfg(all(target_arch = "aarch64", feature = "guest_debug"))] 1724 fn translate_gva( 1725 &self, 1726 guest_memory: &GuestMemoryAtomic<GuestMemoryMmap>, 1727 cpu_id: u8, 1728 gva: u64, 1729 ) -> Result<u64> { 1730 let tcr_el1: u64 = self.vcpus[usize::from(cpu_id)] 1731 .lock() 1732 .unwrap() 1733 .vcpu 1734 .get_sys_reg(TCR_EL1) 1735 .map_err(|e| Error::TranslateVirtualAddress(e.into()))?; 1736 let ttbr1_el1: u64 = self.vcpus[usize::from(cpu_id)] 1737 .lock() 1738 .unwrap() 1739 .vcpu 1740 .get_sys_reg(TTBR1_EL1) 1741 .map_err(|e| Error::TranslateVirtualAddress(e.into()))?; 1742 let id_aa64mmfr0_el1: u64 = self.vcpus[usize::from(cpu_id)] 1743 .lock() 1744 .unwrap() 1745 .vcpu 1746 .get_sys_reg(ID_AA64MMFR0_EL1) 1747 .map_err(|e| Error::TranslateVirtualAddress(e.into()))?; 1748 1749 // Bit 55 of the VA determines the range, high (0xFFFxxx...) 1750 // or low (0x000xxx...). 1751 let high_range = extract_bits_64!(gva, 55, 1); 1752 if high_range == 0 { 1753 info!("VA (0x{:x}) range is not supported!", gva); 1754 return Ok(gva); 1755 } 1756 1757 // High range size offset 1758 let tsz = extract_bits_64!(tcr_el1, 16, 6); 1759 // Granule size 1760 let tg = extract_bits_64!(tcr_el1, 30, 2); 1761 // Indication of 48-bits (0) or 52-bits (1) for FEAT_LPA2 1762 let ds = extract_bits_64!(tcr_el1, 59, 1); 1763 1764 if tsz == 0 { 1765 info!("VA translation is not ready!"); 1766 return Ok(gva); 1767 } 1768 1769 // VA size is determined by TCR_BL1.T1SZ 1770 let va_size = 64 - tsz; 1771 // Number of bits in VA consumed in each level of translation 1772 let stride = match tg { 1773 3 => 13, // 64KB granule size 1774 1 => 11, // 16KB granule size 1775 _ => 9, // 4KB, default 1776 }; 1777 // Starting level of walking 1778 let mut level = 4 - (va_size - 4) / stride; 1779 1780 // PA or IPA size is determined 1781 let tcr_ips = extract_bits_64!(tcr_el1, 32, 3); 1782 let pa_range = extract_bits_64_without_offset!(id_aa64mmfr0_el1, 4); 1783 // The IPA size in TCR_BL1 and PA Range in ID_AA64MMFR0_EL1 should match. 1784 // To be safe, we use the minimum value if they are different. 1785 let pa_range = std::cmp::min(tcr_ips, pa_range); 1786 // PA size in bits 1787 let pa_size = match pa_range { 1788 0 => 32, 1789 1 => 36, 1790 2 => 40, 1791 3 => 42, 1792 4 => 44, 1793 5 => 48, 1794 6 => 52, 1795 _ => { 1796 return Err(Error::TranslateVirtualAddress(anyhow!(format!( 1797 "PA range not supported {pa_range}" 1798 )))) 1799 } 1800 }; 1801 1802 let indexmask_grainsize = (!0u64) >> (64 - (stride + 3)); 1803 let mut indexmask = (!0u64) >> (64 - (va_size - (stride * (4 - level)))); 1804 // If FEAT_LPA2 is present, the translation table descriptor holds 1805 // 50 bits of the table address of next level. 1806 // Otherwise, it is 48 bits. 1807 let descaddrmask = if ds == 1 { 1808 !0u64 >> (64 - 50) // mask with 50 least significant bits 1809 } else { 1810 !0u64 >> (64 - 48) // mask with 48 least significant bits 1811 }; 1812 let descaddrmask = descaddrmask & !indexmask_grainsize; 1813 1814 // Translation table base address 1815 let mut descaddr: u64 = extract_bits_64_without_offset!(ttbr1_el1, 48); 1816 // In the case of FEAT_LPA and FEAT_LPA2, the initial translation table 1817 // address bits [48:51] comes from TTBR1_EL1 bits [2:5]. 1818 if pa_size == 52 { 1819 descaddr |= extract_bits_64!(ttbr1_el1, 2, 4) << 48; 1820 } 1821 1822 // Loop through tables of each level 1823 loop { 1824 // Table offset for current level 1825 let table_offset: u64 = (gva >> (stride * (4 - level))) & indexmask; 1826 descaddr |= table_offset; 1827 descaddr &= !7u64; 1828 1829 let mut buf = [0; 8]; 1830 guest_memory 1831 .memory() 1832 .read(&mut buf, GuestAddress(descaddr)) 1833 .map_err(|e| Error::TranslateVirtualAddress(e.into()))?; 1834 let descriptor = u64::from_le_bytes(buf); 1835 1836 descaddr = descriptor & descaddrmask; 1837 // In the case of FEAT_LPA, the next-level translation table address 1838 // bits [48:51] comes from bits [12:15] of the current descriptor. 1839 // For FEAT_LPA2, the next-level translation table address 1840 // bits [50:51] comes from bits [8:9] of the current descriptor, 1841 // bits [48:49] comes from bits [48:49] of the descriptor which was 1842 // handled previously. 1843 if pa_size == 52 { 1844 if ds == 1 { 1845 // FEAT_LPA2 1846 descaddr |= extract_bits_64!(descriptor, 8, 2) << 50; 1847 } else { 1848 // FEAT_LPA 1849 descaddr |= extract_bits_64!(descriptor, 12, 4) << 48; 1850 } 1851 } 1852 1853 if (descriptor & 2) != 0 && (level < 3) { 1854 // This is a table entry. Go down to next level. 1855 level += 1; 1856 indexmask = indexmask_grainsize; 1857 continue; 1858 } 1859 1860 break; 1861 } 1862 1863 // We have reached either: 1864 // - a page entry at level 3 or 1865 // - a block entry at level 1 or 2 1866 let page_size = 1u64 << ((stride * (4 - level)) + 3); 1867 descaddr &= !(page_size - 1); 1868 descaddr |= gva & (page_size - 1); 1869 1870 Ok(descaddr) 1871 } 1872 1873 pub(crate) fn set_acpi_address(&mut self, acpi_address: GuestAddress) { 1874 self.acpi_address = Some(acpi_address); 1875 } 1876 1877 pub(crate) fn set_interrupt_controller( 1878 &mut self, 1879 interrupt_controller: Arc<Mutex<dyn InterruptController>>, 1880 ) { 1881 self.interrupt_controller = Some(interrupt_controller); 1882 } 1883 1884 pub(crate) fn vcpus_kill_signalled(&self) -> &Arc<AtomicBool> { 1885 &self.vcpus_kill_signalled 1886 } 1887 1888 #[cfg(feature = "igvm")] 1889 pub(crate) fn get_cpuid_leaf( 1890 &self, 1891 cpu_id: u8, 1892 eax: u32, 1893 ecx: u32, 1894 xfem: u64, 1895 xss: u64, 1896 ) -> Result<[u32; 4]> { 1897 let leaf_info = self.vcpus[usize::from(cpu_id)] 1898 .lock() 1899 .unwrap() 1900 .vcpu 1901 .get_cpuid_values(eax, ecx, xfem, xss) 1902 .unwrap(); 1903 Ok(leaf_info) 1904 } 1905 1906 #[cfg(feature = "sev_snp")] 1907 pub(crate) fn sev_snp_enabled(&self) -> bool { 1908 self.sev_snp_enabled 1909 } 1910 1911 pub(crate) fn nmi(&self) -> Result<()> { 1912 self.vcpus_kick_signalled.store(true, Ordering::SeqCst); 1913 1914 for state in self.vcpu_states.iter() { 1915 state.signal_thread(); 1916 } 1917 1918 self.vcpus_kick_signalled.store(false, Ordering::SeqCst); 1919 1920 Ok(()) 1921 } 1922 } 1923 1924 struct Cpu { 1925 cpu_id: u8, 1926 proximity_domain: u32, 1927 dynamic: bool, 1928 #[cfg(target_arch = "x86_64")] 1929 topology: Option<(u8, u8, u8)>, 1930 } 1931 1932 #[cfg(target_arch = "x86_64")] 1933 const MADT_CPU_ENABLE_FLAG: usize = 0; 1934 1935 #[cfg(target_arch = "x86_64")] 1936 const MADT_CPU_ONLINE_CAPABLE_FLAG: usize = 1; 1937 1938 impl Cpu { 1939 #[cfg(target_arch = "x86_64")] 1940 fn generate_mat(&self) -> Vec<u8> { 1941 let x2apic_id = arch::x86_64::get_x2apic_id(self.cpu_id.into(), self.topology); 1942 1943 let lapic = LocalX2Apic { 1944 r#type: crate::acpi::ACPI_X2APIC_PROCESSOR, 1945 length: 16, 1946 processor_id: self.cpu_id.into(), 1947 apic_id: x2apic_id, 1948 flags: 1 << MADT_CPU_ENABLE_FLAG, 1949 _reserved: 0, 1950 }; 1951 1952 let mut mat_data: Vec<u8> = vec![0; std::mem::size_of_val(&lapic)]; 1953 // SAFETY: mat_data is large enough to hold lapic 1954 unsafe { *(mat_data.as_mut_ptr() as *mut LocalX2Apic) = lapic }; 1955 1956 mat_data 1957 } 1958 } 1959 1960 impl Aml for Cpu { 1961 fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) { 1962 #[cfg(target_arch = "x86_64")] 1963 let mat_data: Vec<u8> = self.generate_mat(); 1964 #[allow(clippy::if_same_then_else)] 1965 if self.dynamic { 1966 aml::Device::new( 1967 format!("C{:03X}", self.cpu_id).as_str().into(), 1968 vec![ 1969 &aml::Name::new("_HID".into(), &"ACPI0007"), 1970 &aml::Name::new("_UID".into(), &self.cpu_id), 1971 // Currently, AArch64 cannot support following fields. 1972 /* 1973 _STA return value: 1974 Bit [0] – Set if the device is present. 1975 Bit [1] – Set if the device is enabled and decoding its resources. 1976 Bit [2] – Set if the device should be shown in the UI. 1977 Bit [3] – Set if the device is functioning properly (cleared if device failed its diagnostics). 1978 Bit [4] – Set if the battery is present. 1979 Bits [31:5] – Reserved (must be cleared). 1980 */ 1981 #[cfg(target_arch = "x86_64")] 1982 &aml::Method::new( 1983 "_STA".into(), 1984 0, 1985 false, 1986 // Call into CSTA method which will interrogate device 1987 vec![&aml::Return::new(&aml::MethodCall::new( 1988 "CSTA".into(), 1989 vec![&self.cpu_id], 1990 ))], 1991 ), 1992 &aml::Method::new( 1993 "_PXM".into(), 1994 0, 1995 false, 1996 vec![&aml::Return::new(&self.proximity_domain)], 1997 ), 1998 // The Linux kernel expects every CPU device to have a _MAT entry 1999 // containing the LAPIC for this processor with the enabled bit set 2000 // even it if is disabled in the MADT (non-boot CPU) 2001 #[cfg(target_arch = "x86_64")] 2002 &aml::Name::new("_MAT".into(), &aml::BufferData::new(mat_data)), 2003 // Trigger CPU ejection 2004 #[cfg(target_arch = "x86_64")] 2005 &aml::Method::new( 2006 "_EJ0".into(), 2007 1, 2008 false, 2009 // Call into CEJ0 method which will actually eject device 2010 vec![&aml::MethodCall::new("CEJ0".into(), vec![&self.cpu_id])], 2011 ), 2012 ], 2013 ) 2014 .to_aml_bytes(sink); 2015 } else { 2016 aml::Device::new( 2017 format!("C{:03X}", self.cpu_id).as_str().into(), 2018 vec![ 2019 &aml::Name::new("_HID".into(), &"ACPI0007"), 2020 &aml::Name::new("_UID".into(), &self.cpu_id), 2021 #[cfg(target_arch = "x86_64")] 2022 &aml::Method::new( 2023 "_STA".into(), 2024 0, 2025 false, 2026 // Mark CPU present see CSTA implementation 2027 vec![&aml::Return::new(&0xfu8)], 2028 ), 2029 &aml::Method::new( 2030 "_PXM".into(), 2031 0, 2032 false, 2033 vec![&aml::Return::new(&self.proximity_domain)], 2034 ), 2035 // The Linux kernel expects every CPU device to have a _MAT entry 2036 // containing the LAPIC for this processor with the enabled bit set 2037 // even it if is disabled in the MADT (non-boot CPU) 2038 #[cfg(target_arch = "x86_64")] 2039 &aml::Name::new("_MAT".into(), &aml::BufferData::new(mat_data)), 2040 ], 2041 ) 2042 .to_aml_bytes(sink); 2043 } 2044 } 2045 } 2046 2047 struct CpuNotify { 2048 cpu_id: u8, 2049 } 2050 2051 impl Aml for CpuNotify { 2052 fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) { 2053 let object = aml::Path::new(&format!("C{:03X}", self.cpu_id)); 2054 aml::If::new( 2055 &aml::Equal::new(&aml::Arg(0), &self.cpu_id), 2056 vec![&aml::Notify::new(&object, &aml::Arg(1))], 2057 ) 2058 .to_aml_bytes(sink) 2059 } 2060 } 2061 2062 struct CpuMethods { 2063 max_vcpus: u8, 2064 dynamic: bool, 2065 } 2066 2067 impl Aml for CpuMethods { 2068 fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) { 2069 if self.dynamic { 2070 // CPU status method 2071 aml::Method::new( 2072 "CSTA".into(), 2073 1, 2074 true, 2075 vec![ 2076 // Take lock defined above 2077 &aml::Acquire::new("\\_SB_.PRES.CPLK".into(), 0xffff), 2078 // Write CPU number (in first argument) to I/O port via field 2079 &aml::Store::new(&aml::Path::new("\\_SB_.PRES.CSEL"), &aml::Arg(0)), 2080 &aml::Store::new(&aml::Local(0), &aml::ZERO), 2081 // Check if CPEN bit is set, if so make the local variable 0xf (see _STA for details of meaning) 2082 &aml::If::new( 2083 &aml::Equal::new(&aml::Path::new("\\_SB_.PRES.CPEN"), &aml::ONE), 2084 vec![&aml::Store::new(&aml::Local(0), &0xfu8)], 2085 ), 2086 // Release lock 2087 &aml::Release::new("\\_SB_.PRES.CPLK".into()), 2088 // Return 0 or 0xf 2089 &aml::Return::new(&aml::Local(0)), 2090 ], 2091 ) 2092 .to_aml_bytes(sink); 2093 2094 let mut cpu_notifies = Vec::new(); 2095 for cpu_id in 0..self.max_vcpus { 2096 cpu_notifies.push(CpuNotify { cpu_id }); 2097 } 2098 2099 let mut cpu_notifies_refs: Vec<&dyn Aml> = Vec::new(); 2100 for cpu_id in 0..self.max_vcpus { 2101 cpu_notifies_refs.push(&cpu_notifies[usize::from(cpu_id)]); 2102 } 2103 2104 aml::Method::new("CTFY".into(), 2, true, cpu_notifies_refs).to_aml_bytes(sink); 2105 2106 aml::Method::new( 2107 "CEJ0".into(), 2108 1, 2109 true, 2110 vec![ 2111 &aml::Acquire::new("\\_SB_.PRES.CPLK".into(), 0xffff), 2112 // Write CPU number (in first argument) to I/O port via field 2113 &aml::Store::new(&aml::Path::new("\\_SB_.PRES.CSEL"), &aml::Arg(0)), 2114 // Set CEJ0 bit 2115 &aml::Store::new(&aml::Path::new("\\_SB_.PRES.CEJ0"), &aml::ONE), 2116 &aml::Release::new("\\_SB_.PRES.CPLK".into()), 2117 ], 2118 ) 2119 .to_aml_bytes(sink); 2120 2121 aml::Method::new( 2122 "CSCN".into(), 2123 0, 2124 true, 2125 vec![ 2126 // Take lock defined above 2127 &aml::Acquire::new("\\_SB_.PRES.CPLK".into(), 0xffff), 2128 &aml::Store::new(&aml::Local(0), &aml::ZERO), 2129 &aml::While::new( 2130 &aml::LessThan::new(&aml::Local(0), &self.max_vcpus), 2131 vec![ 2132 // Write CPU number (in first argument) to I/O port via field 2133 &aml::Store::new(&aml::Path::new("\\_SB_.PRES.CSEL"), &aml::Local(0)), 2134 // Check if CINS bit is set 2135 &aml::If::new( 2136 &aml::Equal::new(&aml::Path::new("\\_SB_.PRES.CINS"), &aml::ONE), 2137 // Notify device if it is 2138 vec![ 2139 &aml::MethodCall::new( 2140 "CTFY".into(), 2141 vec![&aml::Local(0), &aml::ONE], 2142 ), 2143 // Reset CINS bit 2144 &aml::Store::new( 2145 &aml::Path::new("\\_SB_.PRES.CINS"), 2146 &aml::ONE, 2147 ), 2148 ], 2149 ), 2150 // Check if CRMV bit is set 2151 &aml::If::new( 2152 &aml::Equal::new(&aml::Path::new("\\_SB_.PRES.CRMV"), &aml::ONE), 2153 // Notify device if it is (with the eject constant 0x3) 2154 vec![ 2155 &aml::MethodCall::new( 2156 "CTFY".into(), 2157 vec![&aml::Local(0), &3u8], 2158 ), 2159 // Reset CRMV bit 2160 &aml::Store::new( 2161 &aml::Path::new("\\_SB_.PRES.CRMV"), 2162 &aml::ONE, 2163 ), 2164 ], 2165 ), 2166 &aml::Add::new(&aml::Local(0), &aml::Local(0), &aml::ONE), 2167 ], 2168 ), 2169 // Release lock 2170 &aml::Release::new("\\_SB_.PRES.CPLK".into()), 2171 ], 2172 ) 2173 .to_aml_bytes(sink) 2174 } else { 2175 aml::Method::new("CSCN".into(), 0, true, vec![]).to_aml_bytes(sink) 2176 } 2177 } 2178 } 2179 2180 impl Aml for CpuManager { 2181 fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) { 2182 #[cfg(target_arch = "x86_64")] 2183 if let Some(acpi_address) = self.acpi_address { 2184 // CPU hotplug controller 2185 aml::Device::new( 2186 "_SB_.PRES".into(), 2187 vec![ 2188 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0A06")), 2189 &aml::Name::new("_UID".into(), &"CPU Hotplug Controller"), 2190 // Mutex to protect concurrent access as we write to choose CPU and then read back status 2191 &aml::Mutex::new("CPLK".into(), 0), 2192 &aml::Name::new( 2193 "_CRS".into(), 2194 &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory( 2195 aml::AddressSpaceCacheable::NotCacheable, 2196 true, 2197 acpi_address.0, 2198 acpi_address.0 + CPU_MANAGER_ACPI_SIZE as u64 - 1, 2199 None, 2200 )]), 2201 ), 2202 // OpRegion and Fields map MMIO range into individual field values 2203 &aml::OpRegion::new( 2204 "PRST".into(), 2205 aml::OpRegionSpace::SystemMemory, 2206 &(acpi_address.0 as usize), 2207 &CPU_MANAGER_ACPI_SIZE, 2208 ), 2209 &aml::Field::new( 2210 "PRST".into(), 2211 aml::FieldAccessType::Byte, 2212 aml::FieldLockRule::NoLock, 2213 aml::FieldUpdateRule::WriteAsZeroes, 2214 vec![ 2215 aml::FieldEntry::Reserved(32), 2216 aml::FieldEntry::Named(*b"CPEN", 1), 2217 aml::FieldEntry::Named(*b"CINS", 1), 2218 aml::FieldEntry::Named(*b"CRMV", 1), 2219 aml::FieldEntry::Named(*b"CEJ0", 1), 2220 aml::FieldEntry::Reserved(4), 2221 aml::FieldEntry::Named(*b"CCMD", 8), 2222 ], 2223 ), 2224 &aml::Field::new( 2225 "PRST".into(), 2226 aml::FieldAccessType::DWord, 2227 aml::FieldLockRule::NoLock, 2228 aml::FieldUpdateRule::Preserve, 2229 vec![ 2230 aml::FieldEntry::Named(*b"CSEL", 32), 2231 aml::FieldEntry::Reserved(32), 2232 aml::FieldEntry::Named(*b"CDAT", 32), 2233 ], 2234 ), 2235 ], 2236 ) 2237 .to_aml_bytes(sink); 2238 } 2239 2240 // CPU devices 2241 let hid = aml::Name::new("_HID".into(), &"ACPI0010"); 2242 let uid = aml::Name::new("_CID".into(), &aml::EISAName::new("PNP0A05")); 2243 // Bundle methods together under a common object 2244 let methods = CpuMethods { 2245 max_vcpus: self.config.max_vcpus, 2246 dynamic: self.dynamic, 2247 }; 2248 let mut cpu_data_inner: Vec<&dyn Aml> = vec![&hid, &uid, &methods]; 2249 2250 #[cfg(target_arch = "x86_64")] 2251 let topology = self.get_vcpu_topology(); 2252 let mut cpu_devices = Vec::new(); 2253 for cpu_id in 0..self.config.max_vcpus { 2254 let proximity_domain = *self.proximity_domain_per_cpu.get(&cpu_id).unwrap_or(&0); 2255 let cpu_device = Cpu { 2256 cpu_id, 2257 proximity_domain, 2258 dynamic: self.dynamic, 2259 #[cfg(target_arch = "x86_64")] 2260 topology, 2261 }; 2262 2263 cpu_devices.push(cpu_device); 2264 } 2265 2266 for cpu_device in cpu_devices.iter() { 2267 cpu_data_inner.push(cpu_device); 2268 } 2269 2270 aml::Device::new("_SB_.CPUS".into(), cpu_data_inner).to_aml_bytes(sink) 2271 } 2272 } 2273 2274 impl Pausable for CpuManager { 2275 fn pause(&mut self) -> std::result::Result<(), MigratableError> { 2276 // Tell the vCPUs to pause themselves next time they exit 2277 self.vcpus_pause_signalled.store(true, Ordering::SeqCst); 2278 2279 // Signal to the spawned threads (vCPUs and console signal handler). For the vCPU threads 2280 // this will interrupt the KVM_RUN ioctl() allowing the loop to check the boolean set 2281 // above. 2282 for state in self.vcpu_states.iter() { 2283 state.signal_thread(); 2284 } 2285 2286 for vcpu in self.vcpus.iter() { 2287 let mut vcpu = vcpu.lock().unwrap(); 2288 vcpu.pause()?; 2289 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 2290 if !self.config.kvm_hyperv { 2291 vcpu.vcpu.notify_guest_clock_paused().map_err(|e| { 2292 MigratableError::Pause(anyhow!( 2293 "Could not notify guest it has been paused {:?}", 2294 e 2295 )) 2296 })?; 2297 } 2298 } 2299 2300 // The vCPU thread will change its paused state before parking, wait here for each 2301 // activated vCPU change their state to ensure they have parked. 2302 for state in self.vcpu_states.iter() { 2303 if state.active() { 2304 while !state.paused.load(Ordering::SeqCst) { 2305 // To avoid a priority inversion with the vCPU thread 2306 thread::sleep(std::time::Duration::from_millis(1)); 2307 } 2308 } 2309 } 2310 2311 Ok(()) 2312 } 2313 2314 fn resume(&mut self) -> std::result::Result<(), MigratableError> { 2315 for vcpu in self.vcpus.iter() { 2316 vcpu.lock().unwrap().resume()?; 2317 } 2318 2319 // Toggle the vCPUs pause boolean 2320 self.vcpus_pause_signalled.store(false, Ordering::SeqCst); 2321 2322 // Unpark all the VCPU threads. 2323 // Once unparked, the next thing they will do is checking for the pause 2324 // boolean. Since it'll be set to false, they will exit their pause loop 2325 // and go back to vmx root. 2326 for state in self.vcpu_states.iter() { 2327 state.paused.store(false, Ordering::SeqCst); 2328 state.unpark_thread(); 2329 } 2330 Ok(()) 2331 } 2332 } 2333 2334 impl Snapshottable for CpuManager { 2335 fn id(&self) -> String { 2336 CPU_MANAGER_SNAPSHOT_ID.to_string() 2337 } 2338 2339 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 2340 let mut cpu_manager_snapshot = Snapshot::default(); 2341 2342 // The CpuManager snapshot is a collection of all vCPUs snapshots. 2343 for vcpu in &self.vcpus { 2344 let mut vcpu = vcpu.lock().unwrap(); 2345 cpu_manager_snapshot.add_snapshot(vcpu.id(), vcpu.snapshot()?); 2346 } 2347 2348 Ok(cpu_manager_snapshot) 2349 } 2350 } 2351 2352 impl Transportable for CpuManager {} 2353 impl Migratable for CpuManager {} 2354 2355 #[cfg(feature = "guest_debug")] 2356 impl Debuggable for CpuManager { 2357 #[cfg(feature = "kvm")] 2358 fn set_guest_debug( 2359 &self, 2360 cpu_id: usize, 2361 addrs: &[GuestAddress], 2362 singlestep: bool, 2363 ) -> std::result::Result<(), DebuggableError> { 2364 self.vcpus[cpu_id] 2365 .lock() 2366 .unwrap() 2367 .vcpu 2368 .set_guest_debug(addrs, singlestep) 2369 .map_err(DebuggableError::SetDebug) 2370 } 2371 2372 fn debug_pause(&mut self) -> std::result::Result<(), DebuggableError> { 2373 Ok(()) 2374 } 2375 2376 fn debug_resume(&mut self) -> std::result::Result<(), DebuggableError> { 2377 Ok(()) 2378 } 2379 2380 #[cfg(target_arch = "x86_64")] 2381 fn read_regs(&self, cpu_id: usize) -> std::result::Result<CoreRegs, DebuggableError> { 2382 // General registers: RAX, RBX, RCX, RDX, RSI, RDI, RBP, RSP, r8-r15 2383 let gregs = self 2384 .get_regs(cpu_id as u8) 2385 .map_err(DebuggableError::ReadRegs)?; 2386 let regs = [ 2387 gregs.get_rax(), 2388 gregs.get_rbx(), 2389 gregs.get_rcx(), 2390 gregs.get_rdx(), 2391 gregs.get_rsi(), 2392 gregs.get_rdi(), 2393 gregs.get_rbp(), 2394 gregs.get_rsp(), 2395 gregs.get_r8(), 2396 gregs.get_r9(), 2397 gregs.get_r10(), 2398 gregs.get_r11(), 2399 gregs.get_r12(), 2400 gregs.get_r13(), 2401 gregs.get_r14(), 2402 gregs.get_r15(), 2403 ]; 2404 2405 // GDB exposes 32-bit eflags instead of 64-bit rflags. 2406 // https://github.com/bminor/binutils-gdb/blob/master/gdb/features/i386/64bit-core.xml 2407 let eflags = gregs.get_rflags() as u32; 2408 let rip = gregs.get_rip(); 2409 2410 // Segment registers: CS, SS, DS, ES, FS, GS 2411 let sregs = self 2412 .get_sregs(cpu_id as u8) 2413 .map_err(DebuggableError::ReadRegs)?; 2414 let segments = X86SegmentRegs { 2415 cs: sregs.cs.selector as u32, 2416 ss: sregs.ss.selector as u32, 2417 ds: sregs.ds.selector as u32, 2418 es: sregs.es.selector as u32, 2419 fs: sregs.fs.selector as u32, 2420 gs: sregs.gs.selector as u32, 2421 }; 2422 2423 // TODO: Add other registers 2424 2425 Ok(CoreRegs { 2426 regs, 2427 eflags, 2428 rip, 2429 segments, 2430 ..Default::default() 2431 }) 2432 } 2433 2434 #[cfg(target_arch = "aarch64")] 2435 fn read_regs(&self, cpu_id: usize) -> std::result::Result<CoreRegs, DebuggableError> { 2436 let gregs = self 2437 .get_regs(cpu_id as u8) 2438 .map_err(DebuggableError::ReadRegs)?; 2439 Ok(CoreRegs { 2440 x: gregs.get_regs(), 2441 sp: gregs.get_sp(), 2442 pc: gregs.get_pc(), 2443 ..Default::default() 2444 }) 2445 } 2446 2447 #[cfg(target_arch = "x86_64")] 2448 fn write_regs( 2449 &self, 2450 cpu_id: usize, 2451 regs: &CoreRegs, 2452 ) -> std::result::Result<(), DebuggableError> { 2453 let orig_gregs = self 2454 .get_regs(cpu_id as u8) 2455 .map_err(DebuggableError::ReadRegs)?; 2456 let mut gregs = self.create_standard_regs(cpu_id as u8); 2457 gregs.set_rax(regs.regs[0]); 2458 gregs.set_rbx(regs.regs[1]); 2459 gregs.set_rcx(regs.regs[2]); 2460 gregs.set_rdx(regs.regs[3]); 2461 gregs.set_rsi(regs.regs[4]); 2462 gregs.set_rdi(regs.regs[5]); 2463 gregs.set_rbp(regs.regs[6]); 2464 gregs.set_rsp(regs.regs[7]); 2465 gregs.set_r8(regs.regs[8]); 2466 gregs.set_r9(regs.regs[9]); 2467 gregs.set_r10(regs.regs[10]); 2468 gregs.set_r11(regs.regs[11]); 2469 gregs.set_r12(regs.regs[12]); 2470 gregs.set_r13(regs.regs[13]); 2471 gregs.set_r14(regs.regs[14]); 2472 gregs.set_r15(regs.regs[15]); 2473 gregs.set_rip(regs.rip); 2474 // Update the lower 32-bit of rflags. 2475 gregs.set_rflags((orig_gregs.get_rflags() & !(u32::MAX as u64)) | (regs.eflags as u64)); 2476 2477 self.set_regs(cpu_id as u8, &gregs) 2478 .map_err(DebuggableError::WriteRegs)?; 2479 2480 // Segment registers: CS, SS, DS, ES, FS, GS 2481 // Since GDB care only selectors, we call get_sregs() first. 2482 let mut sregs = self 2483 .get_sregs(cpu_id as u8) 2484 .map_err(DebuggableError::ReadRegs)?; 2485 sregs.cs.selector = regs.segments.cs as u16; 2486 sregs.ss.selector = regs.segments.ss as u16; 2487 sregs.ds.selector = regs.segments.ds as u16; 2488 sregs.es.selector = regs.segments.es as u16; 2489 sregs.fs.selector = regs.segments.fs as u16; 2490 sregs.gs.selector = regs.segments.gs as u16; 2491 2492 self.set_sregs(cpu_id as u8, &sregs) 2493 .map_err(DebuggableError::WriteRegs)?; 2494 2495 // TODO: Add other registers 2496 2497 Ok(()) 2498 } 2499 2500 #[cfg(target_arch = "aarch64")] 2501 fn write_regs( 2502 &self, 2503 cpu_id: usize, 2504 regs: &CoreRegs, 2505 ) -> std::result::Result<(), DebuggableError> { 2506 let mut gregs = self 2507 .get_regs(cpu_id as u8) 2508 .map_err(DebuggableError::ReadRegs)?; 2509 2510 gregs.set_regs(regs.x); 2511 gregs.set_sp(regs.sp); 2512 gregs.set_pc(regs.pc); 2513 2514 self.set_regs(cpu_id as u8, &gregs) 2515 .map_err(DebuggableError::WriteRegs)?; 2516 2517 Ok(()) 2518 } 2519 2520 fn read_mem( 2521 &self, 2522 guest_memory: &GuestMemoryAtomic<GuestMemoryMmap>, 2523 cpu_id: usize, 2524 vaddr: GuestAddress, 2525 len: usize, 2526 ) -> std::result::Result<Vec<u8>, DebuggableError> { 2527 let mut buf = vec![0; len]; 2528 let mut total_read = 0_u64; 2529 2530 while total_read < len as u64 { 2531 let gaddr = vaddr.0 + total_read; 2532 let paddr = match self.translate_gva(guest_memory, cpu_id as u8, gaddr) { 2533 Ok(paddr) => paddr, 2534 Err(_) if gaddr == u64::MIN => gaddr, // Silently return GVA as GPA if GVA == 0. 2535 Err(e) => return Err(DebuggableError::TranslateGva(e)), 2536 }; 2537 let psize = arch::PAGE_SIZE as u64; 2538 let read_len = std::cmp::min(len as u64 - total_read, psize - (paddr & (psize - 1))); 2539 guest_memory 2540 .memory() 2541 .read( 2542 &mut buf[total_read as usize..total_read as usize + read_len as usize], 2543 GuestAddress(paddr), 2544 ) 2545 .map_err(DebuggableError::ReadMem)?; 2546 total_read += read_len; 2547 } 2548 Ok(buf) 2549 } 2550 2551 fn write_mem( 2552 &self, 2553 guest_memory: &GuestMemoryAtomic<GuestMemoryMmap>, 2554 cpu_id: usize, 2555 vaddr: &GuestAddress, 2556 data: &[u8], 2557 ) -> std::result::Result<(), DebuggableError> { 2558 let mut total_written = 0_u64; 2559 2560 while total_written < data.len() as u64 { 2561 let gaddr = vaddr.0 + total_written; 2562 let paddr = match self.translate_gva(guest_memory, cpu_id as u8, gaddr) { 2563 Ok(paddr) => paddr, 2564 Err(_) if gaddr == u64::MIN => gaddr, // Silently return GVA as GPA if GVA == 0. 2565 Err(e) => return Err(DebuggableError::TranslateGva(e)), 2566 }; 2567 let psize = arch::PAGE_SIZE as u64; 2568 let write_len = std::cmp::min( 2569 data.len() as u64 - total_written, 2570 psize - (paddr & (psize - 1)), 2571 ); 2572 guest_memory 2573 .memory() 2574 .write( 2575 &data[total_written as usize..total_written as usize + write_len as usize], 2576 GuestAddress(paddr), 2577 ) 2578 .map_err(DebuggableError::WriteMem)?; 2579 total_written += write_len; 2580 } 2581 Ok(()) 2582 } 2583 2584 fn active_vcpus(&self) -> usize { 2585 self.present_vcpus() as usize 2586 } 2587 } 2588 2589 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 2590 impl Elf64Writable for CpuManager {} 2591 2592 #[cfg(all(target_arch = "x86_64", feature = "guest_debug"))] 2593 impl CpuElf64Writable for CpuManager { 2594 fn cpu_write_elf64_note( 2595 &mut self, 2596 dump_state: &DumpState, 2597 ) -> std::result::Result<(), GuestDebuggableError> { 2598 let mut coredump_file = dump_state.file.as_ref().unwrap(); 2599 for vcpu in &self.vcpus { 2600 let note_size = self.get_note_size(NoteDescType::Elf, 1); 2601 let mut pos: usize = 0; 2602 let mut buf = vec![0; note_size as usize]; 2603 let descsz = size_of::<X86_64ElfPrStatus>(); 2604 let vcpu_id = vcpu.lock().unwrap().id; 2605 2606 let note = Elf64_Nhdr { 2607 n_namesz: COREDUMP_NAME_SIZE, 2608 n_descsz: descsz as u32, 2609 n_type: NT_PRSTATUS, 2610 }; 2611 2612 let bytes: &[u8] = note.as_slice(); 2613 buf.splice(0.., bytes.to_vec()); 2614 pos += round_up!(size_of::<Elf64_Nhdr>(), 4); 2615 buf.resize(pos + 4, 0); 2616 buf.splice(pos.., "CORE".to_string().into_bytes()); 2617 2618 pos += round_up!(COREDUMP_NAME_SIZE as usize, 4); 2619 buf.resize(pos + 32 + 4, 0); 2620 let pid = vcpu_id as u64; 2621 let bytes: &[u8] = pid.as_slice(); 2622 buf.splice(pos + 32.., bytes.to_vec()); /* pr_pid */ 2623 2624 pos += descsz - size_of::<X86_64UserRegs>() - size_of::<u64>(); 2625 2626 let orig_rax: u64 = 0; 2627 let gregs = self.vcpus[usize::from(vcpu_id)] 2628 .lock() 2629 .unwrap() 2630 .vcpu 2631 .get_regs() 2632 .map_err(|_e| GuestDebuggableError::Coredump(anyhow!("get regs failed")))?; 2633 2634 let regs1 = [ 2635 gregs.get_r15(), 2636 gregs.get_r14(), 2637 gregs.get_r13(), 2638 gregs.get_r12(), 2639 gregs.get_rbp(), 2640 gregs.get_rbx(), 2641 gregs.get_r11(), 2642 gregs.get_r10(), 2643 ]; 2644 let regs2 = [ 2645 gregs.get_r9(), 2646 gregs.get_r8(), 2647 gregs.get_rax(), 2648 gregs.get_rcx(), 2649 gregs.get_rdx(), 2650 gregs.get_rsi(), 2651 gregs.get_rdi(), 2652 orig_rax, 2653 ]; 2654 2655 let sregs = self.vcpus[usize::from(vcpu_id)] 2656 .lock() 2657 .unwrap() 2658 .vcpu 2659 .get_sregs() 2660 .map_err(|_e| GuestDebuggableError::Coredump(anyhow!("get sregs failed")))?; 2661 2662 debug!( 2663 "rip 0x{:x} rsp 0x{:x} gs 0x{:x} cs 0x{:x} ss 0x{:x} ds 0x{:x}", 2664 gregs.get_rip(), 2665 gregs.get_rsp(), 2666 sregs.gs.base, 2667 sregs.cs.selector, 2668 sregs.ss.selector, 2669 sregs.ds.selector, 2670 ); 2671 2672 let regs = X86_64UserRegs { 2673 regs1, 2674 regs2, 2675 rip: gregs.get_rip(), 2676 cs: sregs.cs.selector as u64, 2677 eflags: gregs.get_rflags(), 2678 rsp: gregs.get_rsp(), 2679 ss: sregs.ss.selector as u64, 2680 fs_base: sregs.fs.base, 2681 gs_base: sregs.gs.base, 2682 ds: sregs.ds.selector as u64, 2683 es: sregs.es.selector as u64, 2684 fs: sregs.fs.selector as u64, 2685 gs: sregs.gs.selector as u64, 2686 }; 2687 2688 // let bytes: &[u8] = unsafe { any_as_u8_slice(®s) }; 2689 let bytes: &[u8] = regs.as_slice(); 2690 buf.resize(note_size as usize, 0); 2691 buf.splice(pos.., bytes.to_vec()); 2692 buf.resize(note_size as usize, 0); 2693 2694 coredump_file 2695 .write(&buf) 2696 .map_err(GuestDebuggableError::CoredumpFile)?; 2697 } 2698 2699 Ok(()) 2700 } 2701 2702 fn cpu_write_vmm_note( 2703 &mut self, 2704 dump_state: &DumpState, 2705 ) -> std::result::Result<(), GuestDebuggableError> { 2706 let mut coredump_file = dump_state.file.as_ref().unwrap(); 2707 for vcpu in &self.vcpus { 2708 let note_size = self.get_note_size(NoteDescType::Vmm, 1); 2709 let mut pos: usize = 0; 2710 let mut buf = vec![0; note_size as usize]; 2711 let descsz = size_of::<DumpCpusState>(); 2712 let vcpu_id = vcpu.lock().unwrap().id; 2713 2714 let note = Elf64_Nhdr { 2715 n_namesz: COREDUMP_NAME_SIZE, 2716 n_descsz: descsz as u32, 2717 n_type: 0, 2718 }; 2719 2720 let bytes: &[u8] = note.as_slice(); 2721 buf.splice(0.., bytes.to_vec()); 2722 pos += round_up!(size_of::<Elf64_Nhdr>(), 4); 2723 2724 buf.resize(pos + 4, 0); 2725 buf.splice(pos.., "QEMU".to_string().into_bytes()); 2726 2727 pos += round_up!(COREDUMP_NAME_SIZE as usize, 4); 2728 2729 let gregs = self.vcpus[usize::from(vcpu_id)] 2730 .lock() 2731 .unwrap() 2732 .vcpu 2733 .get_regs() 2734 .map_err(|_e| GuestDebuggableError::Coredump(anyhow!("get regs failed")))?; 2735 2736 let regs1 = [ 2737 gregs.get_rax(), 2738 gregs.get_rbx(), 2739 gregs.get_rcx(), 2740 gregs.get_rdx(), 2741 gregs.get_rsi(), 2742 gregs.get_rdi(), 2743 gregs.get_rsp(), 2744 gregs.get_rbp(), 2745 ]; 2746 2747 let regs2 = [ 2748 gregs.get_r8(), 2749 gregs.get_r9(), 2750 gregs.get_r10(), 2751 gregs.get_r11(), 2752 gregs.get_r12(), 2753 gregs.get_r13(), 2754 gregs.get_r14(), 2755 gregs.get_r15(), 2756 ]; 2757 2758 let sregs = self.vcpus[usize::from(vcpu_id)] 2759 .lock() 2760 .unwrap() 2761 .vcpu 2762 .get_sregs() 2763 .map_err(|_e| GuestDebuggableError::Coredump(anyhow!("get sregs failed")))?; 2764 2765 let mut msrs = vec![MsrEntry { 2766 index: msr_index::MSR_KERNEL_GS_BASE, 2767 ..Default::default() 2768 }]; 2769 2770 self.vcpus[vcpu_id as usize] 2771 .lock() 2772 .unwrap() 2773 .vcpu 2774 .get_msrs(&mut msrs) 2775 .map_err(|_e| GuestDebuggableError::Coredump(anyhow!("get msr failed")))?; 2776 let kernel_gs_base = msrs[0].data; 2777 2778 let cs = CpuSegment::new(sregs.cs); 2779 let ds = CpuSegment::new(sregs.ds); 2780 let es = CpuSegment::new(sregs.es); 2781 let fs = CpuSegment::new(sregs.fs); 2782 let gs = CpuSegment::new(sregs.gs); 2783 let ss = CpuSegment::new(sregs.ss); 2784 let ldt = CpuSegment::new(sregs.ldt); 2785 let tr = CpuSegment::new(sregs.tr); 2786 let gdt = CpuSegment::new_from_table(sregs.gdt); 2787 let idt = CpuSegment::new_from_table(sregs.idt); 2788 let cr = [sregs.cr0, sregs.cr8, sregs.cr2, sregs.cr3, sregs.cr4]; 2789 let regs = DumpCpusState { 2790 version: 1, 2791 size: size_of::<DumpCpusState>() as u32, 2792 regs1, 2793 regs2, 2794 rip: gregs.get_rip(), 2795 rflags: gregs.get_rflags(), 2796 cs, 2797 ds, 2798 es, 2799 fs, 2800 gs, 2801 ss, 2802 ldt, 2803 tr, 2804 gdt, 2805 idt, 2806 cr, 2807 kernel_gs_base, 2808 }; 2809 2810 let bytes: &[u8] = regs.as_slice(); 2811 buf.resize(note_size as usize, 0); 2812 buf.splice(pos.., bytes.to_vec()); 2813 buf.resize(note_size as usize, 0); 2814 2815 coredump_file 2816 .write(&buf) 2817 .map_err(GuestDebuggableError::CoredumpFile)?; 2818 } 2819 2820 Ok(()) 2821 } 2822 } 2823 2824 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 2825 #[cfg(test)] 2826 mod tests { 2827 use arch::layout::{BOOT_STACK_POINTER, ZERO_PAGE_START}; 2828 use arch::x86_64::interrupts::*; 2829 use arch::x86_64::regs::*; 2830 use hypervisor::arch::x86::{FpuState, LapicState}; 2831 use hypervisor::StandardRegisters; 2832 use linux_loader::loader::bootparam::setup_header; 2833 2834 #[test] 2835 fn test_setlint() { 2836 let hv = hypervisor::new().unwrap(); 2837 let vm = hv.create_vm().expect("new VM fd creation failed"); 2838 hv.check_required_extensions().unwrap(); 2839 // Calling get_lapic will fail if there is no irqchip before hand. 2840 vm.create_irq_chip().unwrap(); 2841 let vcpu = vm.create_vcpu(0, None).unwrap(); 2842 let klapic_before: LapicState = vcpu.get_lapic().unwrap(); 2843 2844 // Compute the value that is expected to represent LVT0 and LVT1. 2845 let lint0 = klapic_before.get_klapic_reg(APIC_LVT0); 2846 let lint1 = klapic_before.get_klapic_reg(APIC_LVT1); 2847 let lint0_mode_expected = set_apic_delivery_mode(lint0, APIC_MODE_EXTINT); 2848 let lint1_mode_expected = set_apic_delivery_mode(lint1, APIC_MODE_NMI); 2849 2850 set_lint(&vcpu).unwrap(); 2851 2852 // Compute the value that represents LVT0 and LVT1 after set_lint. 2853 let klapic_actual: LapicState = vcpu.get_lapic().unwrap(); 2854 let lint0_mode_actual = klapic_actual.get_klapic_reg(APIC_LVT0); 2855 let lint1_mode_actual = klapic_actual.get_klapic_reg(APIC_LVT1); 2856 assert_eq!(lint0_mode_expected, lint0_mode_actual); 2857 assert_eq!(lint1_mode_expected, lint1_mode_actual); 2858 } 2859 2860 #[test] 2861 fn test_setup_fpu() { 2862 let hv = hypervisor::new().unwrap(); 2863 let vm = hv.create_vm().expect("new VM fd creation failed"); 2864 let vcpu = vm.create_vcpu(0, None).unwrap(); 2865 setup_fpu(&vcpu).unwrap(); 2866 2867 let expected_fpu: FpuState = FpuState { 2868 fcw: 0x37f, 2869 mxcsr: 0x1f80, 2870 ..Default::default() 2871 }; 2872 let actual_fpu: FpuState = vcpu.get_fpu().unwrap(); 2873 // TODO: auto-generate kvm related structures with PartialEq on. 2874 assert_eq!(expected_fpu.fcw, actual_fpu.fcw); 2875 // Setting the mxcsr register from FpuState inside setup_fpu does not influence anything. 2876 // See 'kvm_arch_vcpu_ioctl_set_fpu' from arch/x86/kvm/x86.c. 2877 // The mxcsr will stay 0 and the assert below fails. Decide whether or not we should 2878 // remove it at all. 2879 // assert!(expected_fpu.mxcsr == actual_fpu.mxcsr); 2880 } 2881 2882 #[test] 2883 fn test_setup_msrs() { 2884 use hypervisor::arch::x86::{msr_index, MsrEntry}; 2885 2886 let hv = hypervisor::new().unwrap(); 2887 let vm = hv.create_vm().expect("new VM fd creation failed"); 2888 let vcpu = vm.create_vcpu(0, None).unwrap(); 2889 setup_msrs(&vcpu).unwrap(); 2890 2891 // This test will check against the last MSR entry configured (the tenth one). 2892 // See create_msr_entries for details. 2893 let mut msrs = vec![MsrEntry { 2894 index: msr_index::MSR_IA32_MISC_ENABLE, 2895 ..Default::default() 2896 }]; 2897 2898 // get_msrs returns the number of msrs that it succeed in reading. We only want to read 1 2899 // in this test case scenario. 2900 let read_msrs = vcpu.get_msrs(&mut msrs).unwrap(); 2901 assert_eq!(read_msrs, 1); 2902 2903 // Official entries that were setup when we did setup_msrs. We need to assert that the 2904 // tenth one (i.e the one with index msr_index::MSR_IA32_MISC_ENABLE has the data we 2905 // expect. 2906 let entry_vec = vcpu.boot_msr_entries(); 2907 assert_eq!(entry_vec.as_slice()[9], msrs.as_slice()[0]); 2908 } 2909 2910 #[test] 2911 fn test_setup_regs_for_pvh() { 2912 let hv = hypervisor::new().unwrap(); 2913 let vm = hv.create_vm().expect("new VM fd creation failed"); 2914 let vcpu = vm.create_vcpu(0, None).unwrap(); 2915 2916 let mut expected_regs: StandardRegisters = vcpu.create_standard_regs(); 2917 expected_regs.set_rflags(0x0000000000000002u64); 2918 expected_regs.set_rbx(arch::layout::PVH_INFO_START.0); 2919 expected_regs.set_rip(1); 2920 2921 setup_regs( 2922 &vcpu, 2923 arch::EntryPoint { 2924 entry_addr: vm_memory::GuestAddress(expected_regs.get_rip()), 2925 setup_header: None, 2926 }, 2927 ) 2928 .unwrap(); 2929 2930 let actual_regs: StandardRegisters = vcpu.get_regs().unwrap(); 2931 assert_eq!(actual_regs, expected_regs); 2932 } 2933 2934 #[test] 2935 fn test_setup_regs_for_bzimage() { 2936 let hv = hypervisor::new().unwrap(); 2937 let vm = hv.create_vm().expect("new VM fd creation failed"); 2938 let vcpu = vm.create_vcpu(0, None).unwrap(); 2939 2940 let mut expected_regs: StandardRegisters = vcpu.create_standard_regs(); 2941 expected_regs.set_rflags(0x0000000000000002u64); 2942 expected_regs.set_rip(1); 2943 expected_regs.set_rsp(BOOT_STACK_POINTER.0); 2944 expected_regs.set_rsi(ZERO_PAGE_START.0); 2945 2946 setup_regs( 2947 &vcpu, 2948 arch::EntryPoint { 2949 entry_addr: vm_memory::GuestAddress(expected_regs.get_rip()), 2950 setup_header: Some(setup_header { 2951 ..Default::default() 2952 }), 2953 }, 2954 ) 2955 .unwrap(); 2956 2957 let actual_regs: StandardRegisters = vcpu.get_regs().unwrap(); 2958 assert_eq!(actual_regs, expected_regs); 2959 } 2960 } 2961 2962 #[cfg(target_arch = "aarch64")] 2963 #[cfg(test)] 2964 mod tests { 2965 #[cfg(feature = "kvm")] 2966 use std::mem; 2967 2968 use arch::layout; 2969 use hypervisor::arch::aarch64::regs::MPIDR_EL1; 2970 #[cfg(feature = "kvm")] 2971 use hypervisor::kvm::aarch64::is_system_register; 2972 #[cfg(feature = "kvm")] 2973 use hypervisor::kvm::kvm_bindings::{ 2974 user_pt_regs, KVM_REG_ARM64, KVM_REG_ARM64_SYSREG, KVM_REG_ARM_CORE, KVM_REG_SIZE_U64, 2975 }; 2976 use hypervisor::HypervisorCpuError; 2977 #[cfg(feature = "kvm")] 2978 use hypervisor::{arm64_core_reg_id, offset_of}; 2979 2980 #[test] 2981 fn test_setup_regs() { 2982 let hv = hypervisor::new().unwrap(); 2983 let vm = hv.create_vm().unwrap(); 2984 let vcpu = vm.create_vcpu(0, None).unwrap(); 2985 2986 // Must fail when vcpu is not initialized yet. 2987 vcpu.setup_regs(0, 0x0, layout::FDT_START.0).unwrap_err(); 2988 2989 let mut kvi = vcpu.create_vcpu_init(); 2990 vm.get_preferred_target(&mut kvi).unwrap(); 2991 vcpu.vcpu_init(&kvi).unwrap(); 2992 2993 vcpu.setup_regs(0, 0x0, layout::FDT_START.0).unwrap(); 2994 } 2995 2996 #[test] 2997 fn test_read_mpidr() { 2998 let hv = hypervisor::new().unwrap(); 2999 let vm = hv.create_vm().unwrap(); 3000 let vcpu = vm.create_vcpu(0, None).unwrap(); 3001 let mut kvi = vcpu.create_vcpu_init(); 3002 vm.get_preferred_target(&mut kvi).unwrap(); 3003 3004 // Must fail when vcpu is not initialized yet. 3005 vcpu.get_sys_reg(MPIDR_EL1).unwrap_err(); 3006 3007 vcpu.vcpu_init(&kvi).unwrap(); 3008 assert_eq!(vcpu.get_sys_reg(MPIDR_EL1).unwrap(), 0x80000000); 3009 } 3010 3011 #[cfg(feature = "kvm")] 3012 #[test] 3013 fn test_is_system_register() { 3014 let offset = offset_of!(user_pt_regs, pc); 3015 let regid = arm64_core_reg_id!(KVM_REG_SIZE_U64, offset); 3016 assert!(!is_system_register(regid)); 3017 let regid = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM64_SYSREG as u64; 3018 assert!(is_system_register(regid)); 3019 } 3020 3021 #[test] 3022 fn test_save_restore_core_regs() { 3023 let hv = hypervisor::new().unwrap(); 3024 let vm = hv.create_vm().unwrap(); 3025 let vcpu = vm.create_vcpu(0, None).unwrap(); 3026 let mut kvi = vcpu.create_vcpu_init(); 3027 vm.get_preferred_target(&mut kvi).unwrap(); 3028 3029 fn hypervisor_cpu_error_to_raw_os_error(error: &anyhow::Error) -> libc::c_int { 3030 let cause = error.chain().next().expect("should have root cause"); 3031 cause 3032 .downcast_ref::<vmm_sys_util::errno::Error>() 3033 .unwrap_or_else(|| panic!("should be io::Error but is: {cause:?}")) 3034 .errno() as libc::c_int 3035 } 3036 3037 // test get_regs 3038 { 3039 let error = vcpu 3040 .get_regs() 3041 .expect_err("should fail as vCPU is not initialized"); 3042 let io_error_raw = if let HypervisorCpuError::GetAarchCoreRegister(error) = error { 3043 hypervisor_cpu_error_to_raw_os_error(&error) 3044 } else { 3045 panic!("get_regs() must fail with error HypervisorCpuError::GetAarchCoreRegister"); 3046 }; 3047 assert_eq!(io_error_raw, libc::ENOEXEC); 3048 } 3049 3050 // test set_regs 3051 let mut state = vcpu.create_standard_regs(); 3052 { 3053 let error = vcpu 3054 .set_regs(&state) 3055 .expect_err("should fail as vCPU is not initialized"); 3056 let io_error_raw = if let HypervisorCpuError::SetAarchCoreRegister(error) = error { 3057 hypervisor_cpu_error_to_raw_os_error(&error) 3058 } else { 3059 panic!("set_regs() must fail with error HypervisorCpuError::SetAarchCoreRegister"); 3060 }; 3061 assert_eq!(io_error_raw, libc::ENOEXEC); 3062 } 3063 3064 vcpu.vcpu_init(&kvi).unwrap(); 3065 state = vcpu.get_regs().unwrap(); 3066 assert_eq!(state.get_pstate(), 0x3C5); 3067 3068 vcpu.set_regs(&state).unwrap(); 3069 } 3070 3071 #[test] 3072 fn test_get_set_mpstate() { 3073 let hv = hypervisor::new().unwrap(); 3074 let vm = hv.create_vm().unwrap(); 3075 let vcpu = vm.create_vcpu(0, None).unwrap(); 3076 let mut kvi = vcpu.create_vcpu_init(); 3077 vm.get_preferred_target(&mut kvi).unwrap(); 3078 3079 let state = vcpu.get_mp_state().unwrap(); 3080 vcpu.set_mp_state(state).unwrap(); 3081 } 3082 } 3083