1 // Copyright © 2020, Oracle and/or its affiliates. 2 // 3 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 // 5 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. 6 // Use of this source code is governed by a BSD-style license that can be 7 // found in the LICENSE-BSD-3-Clause file. 8 // 9 // Copyright © 2019 Intel Corporation 10 // 11 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause 12 // 13 14 use crate::config::CpusConfig; 15 #[cfg(feature = "guest_debug")] 16 use crate::coredump::{ 17 CpuElf64Writable, CpuSegment, CpuState as DumpCpusState, DumpState, Elf64Writable, 18 GuestDebuggableError, NoteDescType, X86_64ElfPrStatus, X86_64UserRegs, COREDUMP_NAME_SIZE, 19 NT_PRSTATUS, 20 }; 21 use crate::device_manager::DeviceManager; 22 #[cfg(feature = "gdb")] 23 use crate::gdb::{get_raw_tid, Debuggable, DebuggableError}; 24 use crate::memory_manager::MemoryManager; 25 use crate::seccomp_filters::{get_seccomp_filter, Thread}; 26 #[cfg(target_arch = "x86_64")] 27 use crate::vm::physical_bits; 28 use crate::GuestMemoryMmap; 29 use crate::CPU_MANAGER_SNAPSHOT_ID; 30 use acpi_tables::{aml, aml::Aml, sdt::Sdt}; 31 use anyhow::anyhow; 32 use arch::EntryPoint; 33 use arch::NumaNodes; 34 use devices::interrupt_controller::InterruptController; 35 #[cfg(all(target_arch = "x86_64", feature = "gdb"))] 36 use gdbstub_arch::x86::reg::{X86SegmentRegs, X86_64CoreRegs}; 37 #[cfg(feature = "guest_debug")] 38 use hypervisor::arch::x86::msr_index; 39 #[cfg(target_arch = "x86_64")] 40 use hypervisor::arch::x86::CpuIdEntry; 41 #[cfg(feature = "guest_debug")] 42 use hypervisor::arch::x86::MsrEntry; 43 #[cfg(all(target_arch = "x86_64", feature = "gdb"))] 44 use hypervisor::arch::x86::{SpecialRegisters, StandardRegisters}; 45 #[cfg(target_arch = "aarch64")] 46 use hypervisor::kvm::kvm_bindings; 47 #[cfg(feature = "tdx")] 48 use hypervisor::kvm::{TdxExitDetails, TdxExitStatus}; 49 use hypervisor::{CpuState, HypervisorCpuError, HypervisorType, VmExit, VmOps}; 50 use libc::{c_void, siginfo_t}; 51 #[cfg(feature = "guest_debug")] 52 use linux_loader::elf::Elf64_Nhdr; 53 use seccompiler::{apply_filter, SeccompAction}; 54 use std::collections::BTreeMap; 55 #[cfg(feature = "guest_debug")] 56 use std::io::Write; 57 #[cfg(feature = "guest_debug")] 58 use std::mem::size_of; 59 use std::os::unix::thread::JoinHandleExt; 60 use std::sync::atomic::{AtomicBool, Ordering}; 61 use std::sync::{Arc, Barrier, Mutex}; 62 use std::{cmp, io, result, thread}; 63 use thiserror::Error; 64 use vm_device::BusDevice; 65 #[cfg(feature = "guest_debug")] 66 use vm_memory::ByteValued; 67 #[cfg(feature = "gdb")] 68 use vm_memory::{Bytes, GuestAddressSpace}; 69 use vm_memory::{GuestAddress, GuestMemoryAtomic}; 70 use vm_migration::{ 71 Migratable, MigratableError, Pausable, Snapshot, SnapshotDataSection, Snapshottable, 72 Transportable, 73 }; 74 use vmm_sys_util::eventfd::EventFd; 75 use vmm_sys_util::signal::{register_signal_handler, SIGRTMIN}; 76 77 pub const CPU_MANAGER_ACPI_SIZE: usize = 0xc; 78 79 #[derive(Debug, Error)] 80 pub enum Error { 81 #[error("Error creating vCPU: {0}")] 82 VcpuCreate(#[source] anyhow::Error), 83 84 #[error("Error running bCPU: {0}")] 85 VcpuRun(#[source] anyhow::Error), 86 87 #[error("Error spawning vCPU thread: {0}")] 88 VcpuSpawn(#[source] io::Error), 89 90 #[error("Error generating common CPUID: {0}")] 91 CommonCpuId(#[source] arch::Error), 92 93 #[error("Error configuring vCPU: {0}")] 94 VcpuConfiguration(#[source] arch::Error), 95 96 #[cfg(target_arch = "aarch64")] 97 #[error("Error fetching preferred target: {0}")] 98 VcpuArmPreferredTarget(#[source] hypervisor::HypervisorVmError), 99 100 #[cfg(target_arch = "aarch64")] 101 #[error("Error initialising vCPU: {0}")] 102 VcpuArmInit(#[source] hypervisor::HypervisorCpuError), 103 104 #[error("Failed to join on vCPU threads: {0:?}")] 105 ThreadCleanup(std::boxed::Box<dyn std::any::Any + std::marker::Send>), 106 107 #[error("Error adding CpuManager to MMIO bus: {0}")] 108 BusError(#[source] vm_device::BusError), 109 110 #[error("Requested vCPUs exceed maximum")] 111 DesiredVCpuCountExceedsMax, 112 113 #[error("Cannot create seccomp filter: {0}")] 114 CreateSeccompFilter(#[source] seccompiler::Error), 115 116 #[error("Cannot apply seccomp filter: {0}")] 117 ApplySeccompFilter(#[source] seccompiler::Error), 118 119 #[error("Error starting vCPU after restore: {0}")] 120 StartRestoreVcpu(#[source] anyhow::Error), 121 122 #[error("Unexpected VmExit")] 123 UnexpectedVmExit, 124 125 #[error("Failed to allocate MMIO address for CpuManager")] 126 AllocateMmmioAddress, 127 128 #[cfg(feature = "tdx")] 129 #[error("Error initializing TDX: {0}")] 130 InitializeTdx(#[source] hypervisor::HypervisorCpuError), 131 132 #[cfg(target_arch = "aarch64")] 133 #[error("Error initializing PMU: {0}")] 134 InitPmu(#[source] hypervisor::HypervisorCpuError), 135 136 #[cfg(all(target_arch = "x86_64", feature = "gdb"))] 137 #[error("Error during CPU debug: {0}")] 138 CpuDebug(#[source] hypervisor::HypervisorCpuError), 139 140 #[cfg(all(target_arch = "x86_64", feature = "gdb"))] 141 #[error("Error translating virtual address: {0}")] 142 TranslateVirtualAddress(#[source] hypervisor::HypervisorCpuError), 143 144 #[cfg(all(feature = "amx", target_arch = "x86_64"))] 145 #[error("Error setting up AMX: {0}")] 146 AmxEnable(#[source] anyhow::Error), 147 } 148 pub type Result<T> = result::Result<T, Error>; 149 150 #[cfg(target_arch = "x86_64")] 151 #[allow(dead_code)] 152 #[repr(packed)] 153 struct LocalApic { 154 pub r#type: u8, 155 pub length: u8, 156 pub processor_id: u8, 157 pub apic_id: u8, 158 pub flags: u32, 159 } 160 161 #[allow(dead_code)] 162 #[repr(packed)] 163 #[derive(Default)] 164 struct Ioapic { 165 pub r#type: u8, 166 pub length: u8, 167 pub ioapic_id: u8, 168 _reserved: u8, 169 pub apic_address: u32, 170 pub gsi_base: u32, 171 } 172 173 #[cfg(target_arch = "aarch64")] 174 #[allow(dead_code)] 175 #[repr(packed)] 176 struct GicC { 177 pub r#type: u8, 178 pub length: u8, 179 pub reserved0: u16, 180 pub cpu_interface_number: u32, 181 pub uid: u32, 182 pub flags: u32, 183 pub parking_version: u32, 184 pub performance_interrupt: u32, 185 pub parked_address: u64, 186 pub base_address: u64, 187 pub gicv_base_address: u64, 188 pub gich_base_address: u64, 189 pub vgic_interrupt: u32, 190 pub gicr_base_address: u64, 191 pub mpidr: u64, 192 pub proc_power_effi_class: u8, 193 pub reserved1: u8, 194 pub spe_overflow_interrupt: u16, 195 } 196 197 #[cfg(target_arch = "aarch64")] 198 #[allow(dead_code)] 199 #[repr(packed)] 200 struct GicD { 201 pub r#type: u8, 202 pub length: u8, 203 pub reserved0: u16, 204 pub gic_id: u32, 205 pub base_address: u64, 206 pub global_irq_base: u32, 207 pub version: u8, 208 pub reserved1: [u8; 3], 209 } 210 211 #[cfg(target_arch = "aarch64")] 212 #[allow(dead_code)] 213 #[repr(packed)] 214 struct GicR { 215 pub r#type: u8, 216 pub length: u8, 217 pub reserved: u16, 218 pub base_address: u64, 219 pub range_length: u32, 220 } 221 222 #[cfg(target_arch = "aarch64")] 223 #[allow(dead_code)] 224 #[repr(packed)] 225 struct GicIts { 226 pub r#type: u8, 227 pub length: u8, 228 pub reserved0: u16, 229 pub translation_id: u32, 230 pub base_address: u64, 231 pub reserved1: u32, 232 } 233 234 #[cfg(target_arch = "aarch64")] 235 #[allow(dead_code)] 236 #[repr(packed)] 237 struct ProcessorHierarchyNode { 238 pub r#type: u8, 239 pub length: u8, 240 pub reserved: u16, 241 pub flags: u32, 242 pub parent: u32, 243 pub acpi_processor_id: u32, 244 pub num_private_resources: u32, 245 } 246 247 #[allow(dead_code)] 248 #[repr(packed)] 249 #[derive(Default)] 250 struct InterruptSourceOverride { 251 pub r#type: u8, 252 pub length: u8, 253 pub bus: u8, 254 pub source: u8, 255 pub gsi: u32, 256 pub flags: u16, 257 } 258 259 #[cfg(feature = "guest_debug")] 260 macro_rules! round_up { 261 ($n:expr,$d:expr) => { 262 (($n / ($d + 1)) + 1) * $d 263 }; 264 } 265 266 /// A wrapper around creating and using a kvm-based VCPU. 267 pub struct Vcpu { 268 // The hypervisor abstracted CPU. 269 vcpu: Arc<dyn hypervisor::Vcpu>, 270 id: u8, 271 #[cfg(target_arch = "aarch64")] 272 mpidr: u64, 273 saved_state: Option<CpuState>, 274 } 275 276 impl Vcpu { 277 /// Constructs a new VCPU for `vm`. 278 /// 279 /// # Arguments 280 /// 281 /// * `id` - Represents the CPU number between [0, max vcpus). 282 /// * `vm` - The virtual machine this vcpu will get attached to. 283 /// * `vm_ops` - Optional object for exit handling. 284 pub fn new( 285 id: u8, 286 vm: &Arc<dyn hypervisor::Vm>, 287 vm_ops: Option<Arc<dyn VmOps>>, 288 ) -> Result<Self> { 289 let vcpu = vm 290 .create_vcpu(id, vm_ops) 291 .map_err(|e| Error::VcpuCreate(e.into()))?; 292 // Initially the cpuid per vCPU is the one supported by this VM. 293 Ok(Vcpu { 294 vcpu, 295 id, 296 #[cfg(target_arch = "aarch64")] 297 mpidr: 0, 298 saved_state: None, 299 }) 300 } 301 302 /// Configures a vcpu and should be called once per vcpu when created. 303 /// 304 /// # Arguments 305 /// 306 /// * `kernel_entry_point` - Kernel entry point address in guest memory and boot protocol used. 307 /// * `vm_memory` - Guest memory. 308 /// * `cpuid` - (x86_64) CpuId, wrapper over the `kvm_cpuid2` structure. 309 pub fn configure( 310 &mut self, 311 #[cfg(target_arch = "aarch64")] vm: &Arc<dyn hypervisor::Vm>, 312 kernel_entry_point: Option<EntryPoint>, 313 #[cfg(target_arch = "x86_64")] vm_memory: &GuestMemoryAtomic<GuestMemoryMmap>, 314 #[cfg(target_arch = "x86_64")] cpuid: Vec<CpuIdEntry>, 315 #[cfg(target_arch = "x86_64")] kvm_hyperv: bool, 316 ) -> Result<()> { 317 #[cfg(target_arch = "aarch64")] 318 { 319 self.init(vm)?; 320 self.mpidr = arch::configure_vcpu(&self.vcpu, self.id, kernel_entry_point) 321 .map_err(Error::VcpuConfiguration)?; 322 } 323 info!("Configuring vCPU: cpu_id = {}", self.id); 324 #[cfg(target_arch = "x86_64")] 325 arch::configure_vcpu( 326 &self.vcpu, 327 self.id, 328 kernel_entry_point, 329 vm_memory, 330 cpuid, 331 kvm_hyperv, 332 ) 333 .map_err(Error::VcpuConfiguration)?; 334 335 Ok(()) 336 } 337 338 /// Gets the MPIDR register value. 339 #[cfg(target_arch = "aarch64")] 340 pub fn get_mpidr(&self) -> u64 { 341 self.mpidr 342 } 343 344 /// Gets the saved vCPU state. 345 #[cfg(target_arch = "aarch64")] 346 pub fn get_saved_state(&self) -> Option<CpuState> { 347 self.saved_state.clone() 348 } 349 350 /// Initializes an aarch64 specific vcpu for booting Linux. 351 #[cfg(target_arch = "aarch64")] 352 pub fn init(&self, vm: &Arc<dyn hypervisor::Vm>) -> Result<()> { 353 let mut kvi: kvm_bindings::kvm_vcpu_init = kvm_bindings::kvm_vcpu_init::default(); 354 355 // This reads back the kernel's preferred target type. 356 vm.get_preferred_target(&mut kvi) 357 .map_err(Error::VcpuArmPreferredTarget)?; 358 // We already checked that the capability is supported. 359 kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_PSCI_0_2; 360 kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_PMU_V3; 361 // Non-boot cpus are powered off initially. 362 if self.id > 0 { 363 kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_POWER_OFF; 364 } 365 self.vcpu.vcpu_init(&kvi).map_err(Error::VcpuArmInit) 366 } 367 368 /// Runs the VCPU until it exits, returning the reason. 369 /// 370 /// Note that the state of the VCPU and associated VM must be setup first for this to do 371 /// anything useful. 372 pub fn run(&self) -> std::result::Result<VmExit, HypervisorCpuError> { 373 self.vcpu.run() 374 } 375 } 376 377 const VCPU_SNAPSHOT_ID: &str = "vcpu"; 378 impl Pausable for Vcpu {} 379 impl Snapshottable for Vcpu { 380 fn id(&self) -> String { 381 VCPU_SNAPSHOT_ID.to_string() 382 } 383 384 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 385 let saved_state = self 386 .vcpu 387 .state() 388 .map_err(|e| MigratableError::Pause(anyhow!("Could not get vCPU state {:?}", e)))?; 389 390 let mut vcpu_snapshot = Snapshot::new(&format!("{:03}", self.id)); 391 vcpu_snapshot.add_data_section(SnapshotDataSection::new_from_state( 392 VCPU_SNAPSHOT_ID, 393 &saved_state, 394 )?); 395 396 self.saved_state = Some(saved_state); 397 398 Ok(vcpu_snapshot) 399 } 400 401 fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> { 402 let saved_state: CpuState = snapshot.to_state(VCPU_SNAPSHOT_ID)?; 403 404 self.vcpu 405 .set_state(&saved_state) 406 .map_err(|e| MigratableError::Pause(anyhow!("Could not set the vCPU state {:?}", e)))?; 407 408 self.saved_state = Some(saved_state); 409 410 Ok(()) 411 } 412 } 413 414 pub struct CpuManager { 415 hypervisor_type: HypervisorType, 416 config: CpusConfig, 417 #[cfg_attr(target_arch = "aarch64", allow(dead_code))] 418 interrupt_controller: Option<Arc<Mutex<dyn InterruptController>>>, 419 #[cfg_attr(target_arch = "aarch64", allow(dead_code))] 420 vm_memory: GuestMemoryAtomic<GuestMemoryMmap>, 421 #[cfg(target_arch = "x86_64")] 422 cpuid: Vec<CpuIdEntry>, 423 #[cfg_attr(target_arch = "aarch64", allow(dead_code))] 424 vm: Arc<dyn hypervisor::Vm>, 425 vcpus_kill_signalled: Arc<AtomicBool>, 426 vcpus_pause_signalled: Arc<AtomicBool>, 427 exit_evt: EventFd, 428 #[cfg_attr(target_arch = "aarch64", allow(dead_code))] 429 reset_evt: EventFd, 430 #[cfg(feature = "gdb")] 431 vm_debug_evt: EventFd, 432 vcpu_states: Vec<VcpuState>, 433 selected_cpu: u8, 434 vcpus: Vec<Arc<Mutex<Vcpu>>>, 435 seccomp_action: SeccompAction, 436 vm_ops: Arc<dyn VmOps>, 437 #[cfg_attr(target_arch = "aarch64", allow(dead_code))] 438 acpi_address: Option<GuestAddress>, 439 proximity_domain_per_cpu: BTreeMap<u8, u32>, 440 affinity: BTreeMap<u8, Vec<u8>>, 441 dynamic: bool, 442 } 443 444 const CPU_ENABLE_FLAG: usize = 0; 445 const CPU_INSERTING_FLAG: usize = 1; 446 const CPU_REMOVING_FLAG: usize = 2; 447 const CPU_EJECT_FLAG: usize = 3; 448 449 const CPU_STATUS_OFFSET: u64 = 4; 450 const CPU_SELECTION_OFFSET: u64 = 0; 451 452 impl BusDevice for CpuManager { 453 fn read(&mut self, _base: u64, offset: u64, data: &mut [u8]) { 454 // The Linux kernel, quite reasonably, doesn't zero the memory it gives us. 455 data.fill(0); 456 457 match offset { 458 CPU_SELECTION_OFFSET => { 459 data[0] = self.selected_cpu; 460 } 461 CPU_STATUS_OFFSET => { 462 if self.selected_cpu < self.max_vcpus() { 463 let state = &self.vcpu_states[usize::from(self.selected_cpu)]; 464 if state.active() { 465 data[0] |= 1 << CPU_ENABLE_FLAG; 466 } 467 if state.inserting { 468 data[0] |= 1 << CPU_INSERTING_FLAG; 469 } 470 if state.removing { 471 data[0] |= 1 << CPU_REMOVING_FLAG; 472 } 473 } else { 474 warn!("Out of range vCPU id: {}", self.selected_cpu); 475 } 476 } 477 _ => { 478 warn!( 479 "Unexpected offset for accessing CPU manager device: {:#}", 480 offset 481 ); 482 } 483 } 484 } 485 486 fn write(&mut self, _base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> { 487 match offset { 488 CPU_SELECTION_OFFSET => { 489 self.selected_cpu = data[0]; 490 } 491 CPU_STATUS_OFFSET => { 492 if self.selected_cpu < self.max_vcpus() { 493 let state = &mut self.vcpu_states[usize::from(self.selected_cpu)]; 494 // The ACPI code writes back a 1 to acknowledge the insertion 495 if (data[0] & (1 << CPU_INSERTING_FLAG) == 1 << CPU_INSERTING_FLAG) 496 && state.inserting 497 { 498 state.inserting = false; 499 } 500 // Ditto for removal 501 if (data[0] & (1 << CPU_REMOVING_FLAG) == 1 << CPU_REMOVING_FLAG) 502 && state.removing 503 { 504 state.removing = false; 505 } 506 // Trigger removal of vCPU 507 if data[0] & (1 << CPU_EJECT_FLAG) == 1 << CPU_EJECT_FLAG { 508 if let Err(e) = self.remove_vcpu(self.selected_cpu) { 509 error!("Error removing vCPU: {:?}", e); 510 } 511 } 512 } else { 513 warn!("Out of range vCPU id: {}", self.selected_cpu); 514 } 515 } 516 _ => { 517 warn!( 518 "Unexpected offset for accessing CPU manager device: {:#}", 519 offset 520 ); 521 } 522 } 523 None 524 } 525 } 526 527 #[derive(Default)] 528 struct VcpuState { 529 inserting: bool, 530 removing: bool, 531 handle: Option<thread::JoinHandle<()>>, 532 kill: Arc<AtomicBool>, 533 vcpu_run_interrupted: Arc<AtomicBool>, 534 } 535 536 impl VcpuState { 537 fn active(&self) -> bool { 538 self.handle.is_some() 539 } 540 541 fn signal_thread(&self) { 542 if let Some(handle) = self.handle.as_ref() { 543 loop { 544 unsafe { 545 libc::pthread_kill(handle.as_pthread_t() as _, SIGRTMIN()); 546 } 547 if self.vcpu_run_interrupted.load(Ordering::SeqCst) { 548 break; 549 } else { 550 // This is more effective than thread::yield_now() at 551 // avoiding a priority inversion with the vCPU thread 552 thread::sleep(std::time::Duration::from_millis(1)); 553 } 554 } 555 } 556 } 557 558 fn join_thread(&mut self) -> Result<()> { 559 if let Some(handle) = self.handle.take() { 560 handle.join().map_err(Error::ThreadCleanup)? 561 } 562 563 Ok(()) 564 } 565 566 fn unpark_thread(&self) { 567 if let Some(handle) = self.handle.as_ref() { 568 handle.thread().unpark() 569 } 570 } 571 } 572 573 impl CpuManager { 574 #[allow(unused_variables)] 575 #[allow(clippy::too_many_arguments)] 576 pub fn new( 577 config: &CpusConfig, 578 device_manager: &Arc<Mutex<DeviceManager>>, 579 memory_manager: &Arc<Mutex<MemoryManager>>, 580 vm: Arc<dyn hypervisor::Vm>, 581 exit_evt: EventFd, 582 reset_evt: EventFd, 583 #[cfg(feature = "gdb")] vm_debug_evt: EventFd, 584 hypervisor: Arc<dyn hypervisor::Hypervisor>, 585 seccomp_action: SeccompAction, 586 vm_ops: Arc<dyn VmOps>, 587 #[cfg(feature = "tdx")] tdx_enabled: bool, 588 numa_nodes: &NumaNodes, 589 ) -> Result<Arc<Mutex<CpuManager>>> { 590 let guest_memory = memory_manager.lock().unwrap().guest_memory(); 591 let mut vcpu_states = Vec::with_capacity(usize::from(config.max_vcpus)); 592 vcpu_states.resize_with(usize::from(config.max_vcpus), VcpuState::default); 593 let hypervisor_type = hypervisor.hypervisor_type(); 594 595 #[cfg(target_arch = "x86_64")] 596 let sgx_epc_sections = memory_manager 597 .lock() 598 .unwrap() 599 .sgx_epc_region() 600 .as_ref() 601 .map(|sgx_epc_region| sgx_epc_region.epc_sections().values().cloned().collect()); 602 #[cfg(target_arch = "x86_64")] 603 let cpuid = { 604 let phys_bits = physical_bits(config.max_phys_bits); 605 arch::generate_common_cpuid( 606 hypervisor, 607 config 608 .topology 609 .clone() 610 .map(|t| (t.threads_per_core, t.cores_per_die, t.dies_per_package)), 611 sgx_epc_sections, 612 phys_bits, 613 config.kvm_hyperv, 614 #[cfg(feature = "tdx")] 615 tdx_enabled, 616 ) 617 .map_err(Error::CommonCpuId)? 618 }; 619 #[cfg(all(feature = "amx", target_arch = "x86_64"))] 620 if config.features.amx { 621 const ARCH_GET_XCOMP_GUEST_PERM: usize = 0x1024; 622 const ARCH_REQ_XCOMP_GUEST_PERM: usize = 0x1025; 623 const XFEATURE_XTILEDATA: usize = 18; 624 const XFEATURE_XTILEDATA_MASK: usize = 1 << XFEATURE_XTILEDATA; 625 626 // This is safe as the syscall is only modifing kernel internal 627 // data structures that the kernel is itself expected to safeguard. 628 let amx_tile = unsafe { 629 libc::syscall( 630 libc::SYS_arch_prctl, 631 ARCH_REQ_XCOMP_GUEST_PERM, 632 XFEATURE_XTILEDATA, 633 ) 634 }; 635 636 if amx_tile != 0 { 637 return Err(Error::AmxEnable(anyhow!("Guest AMX usage not supported"))); 638 } else { 639 // This is safe as the mask being modified (not marked mutable as it is 640 // modified in unsafe only which is permitted) isn't in use elsewhere. 641 let mask: usize = 0; 642 let result = unsafe { 643 libc::syscall(libc::SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &mask) 644 }; 645 if result != 0 || (mask & XFEATURE_XTILEDATA_MASK) != XFEATURE_XTILEDATA_MASK { 646 return Err(Error::AmxEnable(anyhow!("Guest AMX usage not supported"))); 647 } 648 } 649 } 650 651 let device_manager = device_manager.lock().unwrap(); 652 653 let proximity_domain_per_cpu: BTreeMap<u8, u32> = { 654 let mut cpu_list = Vec::new(); 655 for (proximity_domain, numa_node) in numa_nodes.iter() { 656 for cpu in numa_node.cpus.iter() { 657 cpu_list.push((*cpu, *proximity_domain)) 658 } 659 } 660 cpu_list 661 } 662 .into_iter() 663 .collect(); 664 665 let affinity = if let Some(cpu_affinity) = config.affinity.as_ref() { 666 cpu_affinity 667 .iter() 668 .map(|a| (a.vcpu, a.host_cpus.clone())) 669 .collect() 670 } else { 671 BTreeMap::new() 672 }; 673 674 #[cfg(feature = "tdx")] 675 let dynamic = !tdx_enabled; 676 #[cfg(not(feature = "tdx"))] 677 let dynamic = true; 678 679 let acpi_address = if dynamic { 680 Some( 681 device_manager 682 .allocator() 683 .lock() 684 .unwrap() 685 .allocate_platform_mmio_addresses(None, CPU_MANAGER_ACPI_SIZE as u64, None) 686 .ok_or(Error::AllocateMmmioAddress)?, 687 ) 688 } else { 689 None 690 }; 691 692 let cpu_manager = Arc::new(Mutex::new(CpuManager { 693 hypervisor_type, 694 config: config.clone(), 695 interrupt_controller: device_manager.interrupt_controller().clone(), 696 vm_memory: guest_memory, 697 #[cfg(target_arch = "x86_64")] 698 cpuid, 699 vm, 700 vcpus_kill_signalled: Arc::new(AtomicBool::new(false)), 701 vcpus_pause_signalled: Arc::new(AtomicBool::new(false)), 702 vcpu_states, 703 exit_evt, 704 reset_evt, 705 #[cfg(feature = "gdb")] 706 vm_debug_evt, 707 selected_cpu: 0, 708 vcpus: Vec::with_capacity(usize::from(config.max_vcpus)), 709 seccomp_action, 710 vm_ops, 711 acpi_address, 712 proximity_domain_per_cpu, 713 affinity, 714 dynamic, 715 })); 716 717 if let Some(acpi_address) = acpi_address { 718 device_manager 719 .mmio_bus() 720 .insert( 721 cpu_manager.clone(), 722 acpi_address.0, 723 CPU_MANAGER_ACPI_SIZE as u64, 724 ) 725 .map_err(Error::BusError)?; 726 } 727 728 Ok(cpu_manager) 729 } 730 731 fn create_vcpu( 732 &mut self, 733 cpu_id: u8, 734 entry_point: Option<EntryPoint>, 735 snapshot: Option<Snapshot>, 736 ) -> Result<()> { 737 info!("Creating vCPU: cpu_id = {}", cpu_id); 738 739 let mut vcpu = Vcpu::new(cpu_id, &self.vm, Some(self.vm_ops.clone()))?; 740 741 if let Some(snapshot) = snapshot { 742 // AArch64 vCPUs should be initialized after created. 743 #[cfg(target_arch = "aarch64")] 744 vcpu.init(&self.vm)?; 745 746 vcpu.restore(snapshot).expect("Failed to restore vCPU"); 747 } else { 748 #[cfg(target_arch = "x86_64")] 749 vcpu.configure( 750 entry_point, 751 &self.vm_memory, 752 self.cpuid.clone(), 753 self.config.kvm_hyperv, 754 ) 755 .expect("Failed to configure vCPU"); 756 757 #[cfg(target_arch = "aarch64")] 758 vcpu.configure(&self.vm, entry_point) 759 .expect("Failed to configure vCPU"); 760 } 761 762 // Adding vCPU to the CpuManager's vCPU list. 763 let vcpu = Arc::new(Mutex::new(vcpu)); 764 self.vcpus.push(vcpu); 765 766 Ok(()) 767 } 768 769 /// Only create new vCPUs if there aren't any inactive ones to reuse 770 fn create_vcpus(&mut self, desired_vcpus: u8, entry_point: Option<EntryPoint>) -> Result<()> { 771 info!( 772 "Request to create new vCPUs: desired = {}, max = {}, allocated = {}, present = {}", 773 desired_vcpus, 774 self.config.max_vcpus, 775 self.vcpus.len(), 776 self.present_vcpus() 777 ); 778 779 if desired_vcpus > self.config.max_vcpus { 780 return Err(Error::DesiredVCpuCountExceedsMax); 781 } 782 783 // Only create vCPUs in excess of all the allocated vCPUs. 784 for cpu_id in self.vcpus.len() as u8..desired_vcpus { 785 self.create_vcpu(cpu_id, entry_point, None)?; 786 } 787 788 Ok(()) 789 } 790 791 #[cfg(target_arch = "aarch64")] 792 pub fn init_pmu(&self, irq: u32) -> Result<bool> { 793 for cpu in self.vcpus.iter() { 794 let cpu = cpu.lock().unwrap(); 795 // Check if PMU attr is available, if not, log the information. 796 if cpu.vcpu.has_pmu_support() { 797 cpu.vcpu.init_pmu(irq).map_err(Error::InitPmu)?; 798 } else { 799 debug!( 800 "PMU attribute is not supported in vCPU{}, skip PMU init!", 801 cpu.id 802 ); 803 return Ok(false); 804 } 805 } 806 807 Ok(true) 808 } 809 810 fn start_vcpu( 811 &mut self, 812 vcpu: Arc<Mutex<Vcpu>>, 813 vcpu_id: u8, 814 vcpu_thread_barrier: Arc<Barrier>, 815 inserting: bool, 816 ) -> Result<()> { 817 let reset_evt = self.reset_evt.try_clone().unwrap(); 818 let exit_evt = self.exit_evt.try_clone().unwrap(); 819 #[cfg(feature = "gdb")] 820 let vm_debug_evt = self.vm_debug_evt.try_clone().unwrap(); 821 let panic_exit_evt = self.exit_evt.try_clone().unwrap(); 822 let vcpu_kill_signalled = self.vcpus_kill_signalled.clone(); 823 let vcpu_pause_signalled = self.vcpus_pause_signalled.clone(); 824 825 let vcpu_kill = self.vcpu_states[usize::from(vcpu_id)].kill.clone(); 826 let vcpu_run_interrupted = self.vcpu_states[usize::from(vcpu_id)] 827 .vcpu_run_interrupted 828 .clone(); 829 let panic_vcpu_run_interrupted = vcpu_run_interrupted.clone(); 830 831 // Prepare the CPU set the current vCPU is expected to run onto. 832 let cpuset = self.affinity.get(&vcpu_id).map(|host_cpus| { 833 let mut cpuset: libc::cpu_set_t = unsafe { std::mem::zeroed() }; 834 unsafe { libc::CPU_ZERO(&mut cpuset) }; 835 for host_cpu in host_cpus { 836 unsafe { libc::CPU_SET(*host_cpu as usize, &mut cpuset) }; 837 } 838 cpuset 839 }); 840 841 // Retrieve seccomp filter for vcpu thread 842 let vcpu_seccomp_filter = 843 get_seccomp_filter(&self.seccomp_action, Thread::Vcpu, self.hypervisor_type) 844 .map_err(Error::CreateSeccompFilter)?; 845 846 #[cfg(target_arch = "x86_64")] 847 let interrupt_controller_clone = self.interrupt_controller.as_ref().cloned(); 848 849 info!("Starting vCPU: cpu_id = {}", vcpu_id); 850 851 let handle = Some( 852 thread::Builder::new() 853 .name(format!("vcpu{}", vcpu_id)) 854 .spawn(move || { 855 // Schedule the thread to run on the expected CPU set 856 if let Some(cpuset) = cpuset.as_ref() { 857 let ret = unsafe { 858 libc::sched_setaffinity( 859 0, 860 std::mem::size_of::<libc::cpu_set_t>(), 861 cpuset as *const libc::cpu_set_t, 862 ) 863 }; 864 865 if ret != 0 { 866 error!( 867 "Failed scheduling the vCPU {} on the expected CPU set: {}", 868 vcpu_id, 869 io::Error::last_os_error() 870 ); 871 return; 872 } 873 } 874 875 // Apply seccomp filter for vcpu thread. 876 if !vcpu_seccomp_filter.is_empty() { 877 if let Err(e) = 878 apply_filter(&vcpu_seccomp_filter).map_err(Error::ApplySeccompFilter) 879 { 880 error!("Error applying seccomp filter: {:?}", e); 881 return; 882 } 883 } 884 extern "C" fn handle_signal(_: i32, _: *mut siginfo_t, _: *mut c_void) {} 885 // This uses an async signal safe handler to kill the vcpu handles. 886 register_signal_handler(SIGRTMIN(), handle_signal) 887 .expect("Failed to register vcpu signal handler"); 888 // Block until all CPUs are ready. 889 vcpu_thread_barrier.wait(); 890 891 std::panic::catch_unwind(move || { 892 loop { 893 // If we are being told to pause, we park the thread 894 // until the pause boolean is toggled. 895 // The resume operation is responsible for toggling 896 // the boolean and unpark the thread. 897 // We enter a loop because park() could spuriously 898 // return. We will then park() again unless the 899 // pause boolean has been toggled. 900 901 // Need to use Ordering::SeqCst as we have multiple 902 // loads and stores to different atomics and we need 903 // to see them in a consistent order in all threads 904 905 if vcpu_pause_signalled.load(Ordering::SeqCst) { 906 // As a pause can be caused by PIO & MMIO exits then we need to ensure they are 907 // completed by returning to KVM_RUN. From the kernel docs: 908 // 909 // For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_PAPR, KVM_EXIT_XEN, 910 // KVM_EXIT_EPR, KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR the corresponding 911 // operations are complete (and guest state is consistent) only after userspace 912 // has re-entered the kernel with KVM_RUN. The kernel side will first finish 913 // incomplete operations and then check for pending signals. 914 // The pending state of the operation is not preserved in state which is 915 // visible to userspace, thus userspace should ensure that the operation is 916 // completed before performing a live migration. Userspace can re-enter the 917 // guest with an unmasked signal pending or with the immediate_exit field set 918 // to complete pending operations without allowing any further instructions 919 // to be executed. 920 921 #[cfg(feature = "kvm")] 922 { 923 vcpu.lock().as_ref().unwrap().vcpu.set_immediate_exit(true); 924 if !matches!(vcpu.lock().unwrap().run(), Ok(VmExit::Ignore)) { 925 error!("Unexpected VM exit on \"immediate_exit\" run"); 926 break; 927 } 928 vcpu.lock().as_ref().unwrap().vcpu.set_immediate_exit(false); 929 } 930 931 vcpu_run_interrupted.store(true, Ordering::SeqCst); 932 while vcpu_pause_signalled.load(Ordering::SeqCst) { 933 thread::park(); 934 } 935 vcpu_run_interrupted.store(false, Ordering::SeqCst); 936 } 937 938 // We've been told to terminate 939 if vcpu_kill_signalled.load(Ordering::SeqCst) 940 || vcpu_kill.load(Ordering::SeqCst) 941 { 942 vcpu_run_interrupted.store(true, Ordering::SeqCst); 943 break; 944 } 945 946 #[cfg(feature = "tdx")] 947 let mut vcpu = vcpu.lock().unwrap(); 948 #[cfg(not(feature = "tdx"))] 949 let vcpu = vcpu.lock().unwrap(); 950 // vcpu.run() returns false on a triple-fault so trigger a reset 951 match vcpu.run() { 952 Ok(run) => match run { 953 #[cfg(all(target_arch = "x86_64", feature = "kvm"))] 954 VmExit::Debug => { 955 info!("VmExit::Debug"); 956 #[cfg(feature = "gdb")] 957 { 958 vcpu_pause_signalled.store(true, Ordering::SeqCst); 959 let raw_tid = get_raw_tid(vcpu_id as usize); 960 vm_debug_evt.write(raw_tid as u64).unwrap(); 961 } 962 } 963 #[cfg(target_arch = "x86_64")] 964 VmExit::IoapicEoi(vector) => { 965 if let Some(interrupt_controller) = 966 &interrupt_controller_clone 967 { 968 interrupt_controller 969 .lock() 970 .unwrap() 971 .end_of_interrupt(vector); 972 } 973 } 974 VmExit::Ignore => {} 975 VmExit::Hyperv => {} 976 VmExit::Reset => { 977 info!("VmExit::Reset"); 978 vcpu_run_interrupted.store(true, Ordering::SeqCst); 979 reset_evt.write(1).unwrap(); 980 break; 981 } 982 VmExit::Shutdown => { 983 info!("VmExit::Shutdown"); 984 vcpu_run_interrupted.store(true, Ordering::SeqCst); 985 exit_evt.write(1).unwrap(); 986 break; 987 } 988 #[cfg(feature = "tdx")] 989 VmExit::Tdx => { 990 if let Some(vcpu) = Arc::get_mut(&mut vcpu.vcpu) { 991 match vcpu.get_tdx_exit_details() { 992 Ok(details) => match details { 993 TdxExitDetails::GetQuote => warn!("TDG_VP_VMCALL_GET_QUOTE not supported"), 994 TdxExitDetails::SetupEventNotifyInterrupt => { 995 warn!("TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT not supported") 996 } 997 }, 998 Err(e) => error!("Unexpected TDX VMCALL: {}", e), 999 } 1000 vcpu.set_tdx_status(TdxExitStatus::InvalidOperand); 1001 } else { 1002 // We should never reach this code as 1003 // this means the design from the code 1004 // is wrong. 1005 unreachable!("Couldn't get a mutable reference from Arc<dyn Vcpu> as there are multiple instances"); 1006 } 1007 } 1008 _ => { 1009 error!( 1010 "VCPU generated error: {:?}", 1011 Error::UnexpectedVmExit 1012 ); 1013 break; 1014 } 1015 }, 1016 1017 Err(e) => { 1018 error!("VCPU generated error: {:?}", Error::VcpuRun(e.into())); 1019 break; 1020 } 1021 } 1022 1023 // We've been told to terminate 1024 if vcpu_kill_signalled.load(Ordering::SeqCst) 1025 || vcpu_kill.load(Ordering::SeqCst) 1026 { 1027 vcpu_run_interrupted.store(true, Ordering::SeqCst); 1028 break; 1029 } 1030 } 1031 }) 1032 .or_else(|_| { 1033 panic_vcpu_run_interrupted.store(true, Ordering::SeqCst); 1034 error!("vCPU thread panicked"); 1035 panic_exit_evt.write(1) 1036 }) 1037 .ok(); 1038 }) 1039 .map_err(Error::VcpuSpawn)?, 1040 ); 1041 1042 // On hot plug calls into this function entry_point is None. It is for 1043 // those hotplug CPU additions that we need to set the inserting flag. 1044 self.vcpu_states[usize::from(vcpu_id)].handle = handle; 1045 self.vcpu_states[usize::from(vcpu_id)].inserting = inserting; 1046 1047 Ok(()) 1048 } 1049 1050 /// Start up as many vCPUs threads as needed to reach `desired_vcpus` 1051 fn activate_vcpus( 1052 &mut self, 1053 desired_vcpus: u8, 1054 inserting: bool, 1055 paused: Option<bool>, 1056 ) -> Result<()> { 1057 if desired_vcpus > self.config.max_vcpus { 1058 return Err(Error::DesiredVCpuCountExceedsMax); 1059 } 1060 1061 let vcpu_thread_barrier = Arc::new(Barrier::new( 1062 (desired_vcpus - self.present_vcpus() + 1) as usize, 1063 )); 1064 1065 if let Some(paused) = paused { 1066 self.vcpus_pause_signalled.store(paused, Ordering::SeqCst); 1067 } 1068 1069 info!( 1070 "Starting vCPUs: desired = {}, allocated = {}, present = {}, paused = {}", 1071 desired_vcpus, 1072 self.vcpus.len(), 1073 self.present_vcpus(), 1074 self.vcpus_pause_signalled.load(Ordering::SeqCst) 1075 ); 1076 1077 // This reuses any inactive vCPUs as well as any that were newly created 1078 for vcpu_id in self.present_vcpus()..desired_vcpus { 1079 let vcpu = Arc::clone(&self.vcpus[vcpu_id as usize]); 1080 self.start_vcpu(vcpu, vcpu_id, vcpu_thread_barrier.clone(), inserting)?; 1081 } 1082 1083 // Unblock all CPU threads. 1084 vcpu_thread_barrier.wait(); 1085 Ok(()) 1086 } 1087 1088 fn mark_vcpus_for_removal(&mut self, desired_vcpus: u8) { 1089 // Mark vCPUs for removal, actual removal happens on ejection 1090 for cpu_id in desired_vcpus..self.present_vcpus() { 1091 self.vcpu_states[usize::from(cpu_id)].removing = true; 1092 } 1093 } 1094 1095 fn remove_vcpu(&mut self, cpu_id: u8) -> Result<()> { 1096 info!("Removing vCPU: cpu_id = {}", cpu_id); 1097 let mut state = &mut self.vcpu_states[usize::from(cpu_id)]; 1098 state.kill.store(true, Ordering::SeqCst); 1099 state.signal_thread(); 1100 state.join_thread()?; 1101 state.handle = None; 1102 1103 // Once the thread has exited, clear the "kill" so that it can reused 1104 state.kill.store(false, Ordering::SeqCst); 1105 1106 Ok(()) 1107 } 1108 1109 pub fn create_boot_vcpus(&mut self, entry_point: Option<EntryPoint>) -> Result<()> { 1110 self.create_vcpus(self.boot_vcpus(), entry_point) 1111 } 1112 1113 // Starts all the vCPUs that the VM is booting with. Blocks until all vCPUs are running. 1114 pub fn start_boot_vcpus(&mut self, paused: bool) -> Result<()> { 1115 self.activate_vcpus(self.boot_vcpus(), false, Some(paused)) 1116 } 1117 1118 pub fn start_restored_vcpus(&mut self) -> Result<()> { 1119 self.activate_vcpus(self.vcpus.len() as u8, false, Some(true)) 1120 .map_err(|e| { 1121 Error::StartRestoreVcpu(anyhow!("Failed to start restored vCPUs: {:#?}", e)) 1122 })?; 1123 1124 Ok(()) 1125 } 1126 1127 pub fn resize(&mut self, desired_vcpus: u8) -> Result<bool> { 1128 if desired_vcpus.cmp(&self.present_vcpus()) == cmp::Ordering::Equal { 1129 return Ok(false); 1130 } 1131 1132 if !self.dynamic { 1133 return Ok(false); 1134 } 1135 1136 match desired_vcpus.cmp(&self.present_vcpus()) { 1137 cmp::Ordering::Greater => { 1138 self.create_vcpus(desired_vcpus, None)?; 1139 self.activate_vcpus(desired_vcpus, true, None)?; 1140 Ok(true) 1141 } 1142 cmp::Ordering::Less => { 1143 self.mark_vcpus_for_removal(desired_vcpus); 1144 Ok(true) 1145 } 1146 _ => Ok(false), 1147 } 1148 } 1149 1150 pub fn shutdown(&mut self) -> Result<()> { 1151 // Tell the vCPUs to stop themselves next time they go through the loop 1152 self.vcpus_kill_signalled.store(true, Ordering::SeqCst); 1153 1154 // Toggle the vCPUs pause boolean 1155 self.vcpus_pause_signalled.store(false, Ordering::SeqCst); 1156 1157 // Unpark all the VCPU threads. 1158 for state in self.vcpu_states.iter() { 1159 state.unpark_thread(); 1160 } 1161 1162 // Signal to the spawned threads (vCPUs and console signal handler). For the vCPU threads 1163 // this will interrupt the KVM_RUN ioctl() allowing the loop to check the boolean set 1164 // above. 1165 for state in self.vcpu_states.iter() { 1166 state.signal_thread(); 1167 } 1168 1169 // Wait for all the threads to finish. This removes the state from the vector. 1170 for mut state in self.vcpu_states.drain(..) { 1171 state.join_thread()?; 1172 } 1173 1174 Ok(()) 1175 } 1176 1177 #[cfg(feature = "tdx")] 1178 pub fn initialize_tdx(&self, hob_address: u64) -> Result<()> { 1179 for vcpu in &self.vcpus { 1180 vcpu.lock() 1181 .unwrap() 1182 .vcpu 1183 .tdx_init(hob_address) 1184 .map_err(Error::InitializeTdx)?; 1185 } 1186 Ok(()) 1187 } 1188 1189 pub fn boot_vcpus(&self) -> u8 { 1190 self.config.boot_vcpus 1191 } 1192 1193 pub fn max_vcpus(&self) -> u8 { 1194 self.config.max_vcpus 1195 } 1196 1197 #[cfg(target_arch = "x86_64")] 1198 pub fn common_cpuid(&self) -> Vec<CpuIdEntry> { 1199 self.cpuid.clone() 1200 } 1201 1202 fn present_vcpus(&self) -> u8 { 1203 self.vcpu_states 1204 .iter() 1205 .fold(0, |acc, state| acc + state.active() as u8) 1206 } 1207 1208 #[cfg(target_arch = "aarch64")] 1209 pub fn get_mpidrs(&self) -> Vec<u64> { 1210 self.vcpus 1211 .iter() 1212 .map(|cpu| cpu.lock().unwrap().get_mpidr()) 1213 .collect() 1214 } 1215 1216 #[cfg(target_arch = "aarch64")] 1217 pub fn get_saved_states(&self) -> Vec<CpuState> { 1218 self.vcpus 1219 .iter() 1220 .map(|cpu| cpu.lock().unwrap().get_saved_state().unwrap()) 1221 .collect() 1222 } 1223 1224 #[cfg(target_arch = "aarch64")] 1225 pub fn get_vcpu_topology(&self) -> Option<(u8, u8, u8)> { 1226 self.config 1227 .topology 1228 .clone() 1229 .map(|t| (t.threads_per_core, t.cores_per_die, t.packages)) 1230 } 1231 1232 pub fn create_madt(&self) -> Sdt { 1233 use crate::acpi; 1234 // This is also checked in the commandline parsing. 1235 assert!(self.config.boot_vcpus <= self.config.max_vcpus); 1236 1237 let mut madt = Sdt::new(*b"APIC", 44, 5, *b"CLOUDH", *b"CHMADT ", 1); 1238 #[cfg(target_arch = "x86_64")] 1239 { 1240 madt.write(36, arch::layout::APIC_START); 1241 1242 for cpu in 0..self.config.max_vcpus { 1243 let lapic = LocalApic { 1244 r#type: acpi::ACPI_APIC_PROCESSOR, 1245 length: 8, 1246 processor_id: cpu, 1247 apic_id: cpu, 1248 flags: if cpu < self.config.boot_vcpus { 1249 1 << MADT_CPU_ENABLE_FLAG 1250 } else { 1251 0 1252 } | 1 << MADT_CPU_ONLINE_CAPABLE_FLAG, 1253 }; 1254 madt.append(lapic); 1255 } 1256 1257 madt.append(Ioapic { 1258 r#type: acpi::ACPI_APIC_IO, 1259 length: 12, 1260 ioapic_id: 0, 1261 apic_address: arch::layout::IOAPIC_START.0 as u32, 1262 gsi_base: 0, 1263 ..Default::default() 1264 }); 1265 1266 madt.append(InterruptSourceOverride { 1267 r#type: acpi::ACPI_APIC_XRUPT_OVERRIDE, 1268 length: 10, 1269 bus: 0, 1270 source: 4, 1271 gsi: 4, 1272 flags: 0, 1273 }); 1274 } 1275 1276 #[cfg(target_arch = "aarch64")] 1277 { 1278 use vm_memory::Address; 1279 /* Notes: 1280 * Ignore Local Interrupt Controller Address at byte offset 36 of MADT table. 1281 */ 1282 1283 // See section 5.2.12.14 GIC CPU Interface (GICC) Structure in ACPI spec. 1284 for cpu in 0..self.config.boot_vcpus { 1285 let vcpu = &self.vcpus[cpu as usize]; 1286 let mpidr = vcpu.lock().unwrap().get_mpidr(); 1287 /* ARMv8 MPIDR format: 1288 Bits [63:40] Must be zero 1289 Bits [39:32] Aff3 : Match Aff3 of target processor MPIDR 1290 Bits [31:24] Must be zero 1291 Bits [23:16] Aff2 : Match Aff2 of target processor MPIDR 1292 Bits [15:8] Aff1 : Match Aff1 of target processor MPIDR 1293 Bits [7:0] Aff0 : Match Aff0 of target processor MPIDR 1294 */ 1295 let mpidr_mask = 0xff_00ff_ffff; 1296 let gicc = GicC { 1297 r#type: acpi::ACPI_APIC_GENERIC_CPU_INTERFACE, 1298 length: 80, 1299 reserved0: 0, 1300 cpu_interface_number: cpu as u32, 1301 uid: cpu as u32, 1302 flags: 1, 1303 parking_version: 0, 1304 performance_interrupt: 0, 1305 parked_address: 0, 1306 base_address: 0, 1307 gicv_base_address: 0, 1308 gich_base_address: 0, 1309 vgic_interrupt: 0, 1310 gicr_base_address: 0, 1311 mpidr: mpidr & mpidr_mask, 1312 proc_power_effi_class: 0, 1313 reserved1: 0, 1314 spe_overflow_interrupt: 0, 1315 }; 1316 1317 madt.append(gicc); 1318 } 1319 1320 // GIC Distributor structure. See section 5.2.12.15 in ACPI spec. 1321 let gicd = GicD { 1322 r#type: acpi::ACPI_APIC_GENERIC_DISTRIBUTOR, 1323 length: 24, 1324 reserved0: 0, 1325 gic_id: 0, 1326 base_address: arch::layout::MAPPED_IO_START.raw_value() - 0x0001_0000, 1327 global_irq_base: 0, 1328 version: 3, 1329 reserved1: [0; 3], 1330 }; 1331 madt.append(gicd); 1332 1333 // See 5.2.12.17 GIC Redistributor (GICR) Structure in ACPI spec. 1334 let gicr_size: u32 = 0x0001_0000 * 2 * (self.config.boot_vcpus as u32); 1335 let gicr_base: u64 = 1336 arch::layout::MAPPED_IO_START.raw_value() - 0x0001_0000 - gicr_size as u64; 1337 let gicr = GicR { 1338 r#type: acpi::ACPI_APIC_GENERIC_REDISTRIBUTOR, 1339 length: 16, 1340 reserved: 0, 1341 base_address: gicr_base, 1342 range_length: gicr_size, 1343 }; 1344 madt.append(gicr); 1345 1346 // See 5.2.12.18 GIC Interrupt Translation Service (ITS) Structure in ACPI spec. 1347 let gicits = GicIts { 1348 r#type: acpi::ACPI_APIC_GENERIC_TRANSLATOR, 1349 length: 20, 1350 reserved0: 0, 1351 translation_id: 0, 1352 base_address: gicr_base - 2 * 0x0001_0000, 1353 reserved1: 0, 1354 }; 1355 madt.append(gicits); 1356 1357 madt.update_checksum(); 1358 } 1359 1360 madt 1361 } 1362 1363 #[cfg(target_arch = "aarch64")] 1364 pub fn create_pptt(&self) -> Sdt { 1365 let pptt_start = 0; 1366 let mut cpus = 0; 1367 let mut uid = 0; 1368 // If topology is not specified, the default setting is: 1369 // 1 package, multiple cores, 1 thread per core 1370 // This is also the behavior when PPTT is missing. 1371 let (threads_per_core, cores_per_package, packages) = 1372 self.get_vcpu_topology().unwrap_or((1, self.max_vcpus(), 1)); 1373 1374 let mut pptt = Sdt::new(*b"PPTT", 36, 2, *b"CLOUDH", *b"CHPPTT ", 1); 1375 1376 for cluster_idx in 0..packages { 1377 if cpus < self.config.boot_vcpus as usize { 1378 let cluster_offset = pptt.len() - pptt_start; 1379 let cluster_hierarchy_node = ProcessorHierarchyNode { 1380 r#type: 0, 1381 length: 20, 1382 reserved: 0, 1383 flags: 0x2, 1384 parent: 0, 1385 acpi_processor_id: cluster_idx as u32, 1386 num_private_resources: 0, 1387 }; 1388 pptt.append(cluster_hierarchy_node); 1389 1390 for core_idx in 0..cores_per_package { 1391 let core_offset = pptt.len() - pptt_start; 1392 1393 if threads_per_core > 1 { 1394 let core_hierarchy_node = ProcessorHierarchyNode { 1395 r#type: 0, 1396 length: 20, 1397 reserved: 0, 1398 flags: 0x2, 1399 parent: cluster_offset as u32, 1400 acpi_processor_id: core_idx as u32, 1401 num_private_resources: 0, 1402 }; 1403 pptt.append(core_hierarchy_node); 1404 1405 for _thread_idx in 0..threads_per_core { 1406 let thread_hierarchy_node = ProcessorHierarchyNode { 1407 r#type: 0, 1408 length: 20, 1409 reserved: 0, 1410 flags: 0xE, 1411 parent: core_offset as u32, 1412 acpi_processor_id: uid as u32, 1413 num_private_resources: 0, 1414 }; 1415 pptt.append(thread_hierarchy_node); 1416 uid += 1; 1417 } 1418 } else { 1419 let thread_hierarchy_node = ProcessorHierarchyNode { 1420 r#type: 0, 1421 length: 20, 1422 reserved: 0, 1423 flags: 0xA, 1424 parent: cluster_offset as u32, 1425 acpi_processor_id: uid as u32, 1426 num_private_resources: 0, 1427 }; 1428 pptt.append(thread_hierarchy_node); 1429 uid += 1; 1430 } 1431 } 1432 cpus += (cores_per_package * threads_per_core) as usize; 1433 } 1434 } 1435 1436 pptt.update_checksum(); 1437 pptt 1438 } 1439 1440 #[cfg(all(target_arch = "x86_64", feature = "gdb"))] 1441 fn get_regs(&self, cpu_id: u8) -> Result<StandardRegisters> { 1442 self.vcpus[usize::from(cpu_id)] 1443 .lock() 1444 .unwrap() 1445 .vcpu 1446 .get_regs() 1447 .map_err(Error::CpuDebug) 1448 } 1449 1450 #[cfg(all(target_arch = "x86_64", feature = "gdb"))] 1451 fn set_regs(&self, cpu_id: u8, regs: &StandardRegisters) -> Result<()> { 1452 self.vcpus[usize::from(cpu_id)] 1453 .lock() 1454 .unwrap() 1455 .vcpu 1456 .set_regs(regs) 1457 .map_err(Error::CpuDebug) 1458 } 1459 1460 #[cfg(all(target_arch = "x86_64", feature = "gdb"))] 1461 fn get_sregs(&self, cpu_id: u8) -> Result<SpecialRegisters> { 1462 self.vcpus[usize::from(cpu_id)] 1463 .lock() 1464 .unwrap() 1465 .vcpu 1466 .get_sregs() 1467 .map_err(Error::CpuDebug) 1468 } 1469 1470 #[cfg(all(target_arch = "x86_64", feature = "gdb"))] 1471 fn set_sregs(&self, cpu_id: u8, sregs: &SpecialRegisters) -> Result<()> { 1472 self.vcpus[usize::from(cpu_id)] 1473 .lock() 1474 .unwrap() 1475 .vcpu 1476 .set_sregs(sregs) 1477 .map_err(Error::CpuDebug) 1478 } 1479 1480 #[cfg(all(target_arch = "x86_64", feature = "gdb"))] 1481 fn translate_gva(&self, cpu_id: u8, gva: u64) -> Result<u64> { 1482 let (gpa, _) = self.vcpus[usize::from(cpu_id)] 1483 .lock() 1484 .unwrap() 1485 .vcpu 1486 .translate_gva(gva, /* flags: unused */ 0) 1487 .map_err(Error::TranslateVirtualAddress)?; 1488 Ok(gpa) 1489 } 1490 } 1491 1492 struct Cpu { 1493 cpu_id: u8, 1494 proximity_domain: u32, 1495 dynamic: bool, 1496 } 1497 1498 #[cfg(target_arch = "x86_64")] 1499 const MADT_CPU_ENABLE_FLAG: usize = 0; 1500 1501 #[cfg(target_arch = "x86_64")] 1502 const MADT_CPU_ONLINE_CAPABLE_FLAG: usize = 1; 1503 1504 impl Cpu { 1505 #[cfg(target_arch = "x86_64")] 1506 fn generate_mat(&self) -> Vec<u8> { 1507 let lapic = LocalApic { 1508 r#type: 0, 1509 length: 8, 1510 processor_id: self.cpu_id, 1511 apic_id: self.cpu_id, 1512 flags: 1 << MADT_CPU_ENABLE_FLAG, 1513 }; 1514 1515 let mut mat_data: Vec<u8> = Vec::new(); 1516 mat_data.resize(std::mem::size_of_val(&lapic), 0); 1517 unsafe { *(mat_data.as_mut_ptr() as *mut LocalApic) = lapic }; 1518 1519 mat_data 1520 } 1521 } 1522 1523 impl Aml for Cpu { 1524 fn append_aml_bytes(&self, bytes: &mut Vec<u8>) { 1525 #[cfg(target_arch = "x86_64")] 1526 let mat_data: Vec<u8> = self.generate_mat(); 1527 #[allow(clippy::if_same_then_else)] 1528 if self.dynamic { 1529 aml::Device::new( 1530 format!("C{:03}", self.cpu_id).as_str().into(), 1531 vec![ 1532 &aml::Name::new("_HID".into(), &"ACPI0007"), 1533 &aml::Name::new("_UID".into(), &self.cpu_id), 1534 // Currently, AArch64 cannot support following fields. 1535 /* 1536 _STA return value: 1537 Bit [0] – Set if the device is present. 1538 Bit [1] – Set if the device is enabled and decoding its resources. 1539 Bit [2] – Set if the device should be shown in the UI. 1540 Bit [3] – Set if the device is functioning properly (cleared if device failed its diagnostics). 1541 Bit [4] – Set if the battery is present. 1542 Bits [31:5] – Reserved (must be cleared). 1543 */ 1544 #[cfg(target_arch = "x86_64")] 1545 &aml::Method::new( 1546 "_STA".into(), 1547 0, 1548 false, 1549 // Call into CSTA method which will interrogate device 1550 vec![&aml::Return::new(&aml::MethodCall::new( 1551 "CSTA".into(), 1552 vec![&self.cpu_id], 1553 ))], 1554 ), 1555 &aml::Method::new( 1556 "_PXM".into(), 1557 0, 1558 false, 1559 vec![&aml::Return::new(&self.proximity_domain)], 1560 ), 1561 // The Linux kernel expects every CPU device to have a _MAT entry 1562 // containing the LAPIC for this processor with the enabled bit set 1563 // even it if is disabled in the MADT (non-boot CPU) 1564 #[cfg(target_arch = "x86_64")] 1565 &aml::Name::new("_MAT".into(), &aml::Buffer::new(mat_data)), 1566 // Trigger CPU ejection 1567 #[cfg(target_arch = "x86_64")] 1568 &aml::Method::new( 1569 "_EJ0".into(), 1570 1, 1571 false, 1572 // Call into CEJ0 method which will actually eject device 1573 vec![&aml::MethodCall::new("CEJ0".into(), vec![&self.cpu_id])], 1574 ), 1575 ], 1576 ) 1577 .append_aml_bytes(bytes); 1578 } else { 1579 aml::Device::new( 1580 format!("C{:03}", self.cpu_id).as_str().into(), 1581 vec![ 1582 &aml::Name::new("_HID".into(), &"ACPI0007"), 1583 &aml::Name::new("_UID".into(), &self.cpu_id), 1584 #[cfg(target_arch = "x86_64")] 1585 &aml::Method::new( 1586 "_STA".into(), 1587 0, 1588 false, 1589 // Mark CPU present see CSTA implementation 1590 vec![&aml::Return::new(&0xfu8)], 1591 ), 1592 &aml::Method::new( 1593 "_PXM".into(), 1594 0, 1595 false, 1596 vec![&aml::Return::new(&self.proximity_domain)], 1597 ), 1598 // The Linux kernel expects every CPU device to have a _MAT entry 1599 // containing the LAPIC for this processor with the enabled bit set 1600 // even it if is disabled in the MADT (non-boot CPU) 1601 #[cfg(target_arch = "x86_64")] 1602 &aml::Name::new("_MAT".into(), &aml::Buffer::new(mat_data)), 1603 ], 1604 ) 1605 .append_aml_bytes(bytes); 1606 } 1607 } 1608 } 1609 1610 struct CpuNotify { 1611 cpu_id: u8, 1612 } 1613 1614 impl Aml for CpuNotify { 1615 fn append_aml_bytes(&self, bytes: &mut Vec<u8>) { 1616 let object = aml::Path::new(&format!("C{:03}", self.cpu_id)); 1617 aml::If::new( 1618 &aml::Equal::new(&aml::Arg(0), &self.cpu_id), 1619 vec![&aml::Notify::new(&object, &aml::Arg(1))], 1620 ) 1621 .append_aml_bytes(bytes) 1622 } 1623 } 1624 1625 struct CpuMethods { 1626 max_vcpus: u8, 1627 dynamic: bool, 1628 } 1629 1630 impl Aml for CpuMethods { 1631 fn append_aml_bytes(&self, bytes: &mut Vec<u8>) { 1632 if self.dynamic { 1633 // CPU status method 1634 aml::Method::new( 1635 "CSTA".into(), 1636 1, 1637 true, 1638 vec![ 1639 // Take lock defined above 1640 &aml::Acquire::new("\\_SB_.PRES.CPLK".into(), 0xffff), 1641 // Write CPU number (in first argument) to I/O port via field 1642 &aml::Store::new(&aml::Path::new("\\_SB_.PRES.CSEL"), &aml::Arg(0)), 1643 &aml::Store::new(&aml::Local(0), &aml::ZERO), 1644 // Check if CPEN bit is set, if so make the local variable 0xf (see _STA for details of meaning) 1645 &aml::If::new( 1646 &aml::Equal::new(&aml::Path::new("\\_SB_.PRES.CPEN"), &aml::ONE), 1647 vec![&aml::Store::new(&aml::Local(0), &0xfu8)], 1648 ), 1649 // Release lock 1650 &aml::Release::new("\\_SB_.PRES.CPLK".into()), 1651 // Return 0 or 0xf 1652 &aml::Return::new(&aml::Local(0)), 1653 ], 1654 ) 1655 .append_aml_bytes(bytes); 1656 1657 let mut cpu_notifies = Vec::new(); 1658 for cpu_id in 0..self.max_vcpus { 1659 cpu_notifies.push(CpuNotify { cpu_id }); 1660 } 1661 1662 let mut cpu_notifies_refs: Vec<&dyn aml::Aml> = Vec::new(); 1663 for cpu_id in 0..self.max_vcpus { 1664 cpu_notifies_refs.push(&cpu_notifies[usize::from(cpu_id)]); 1665 } 1666 1667 aml::Method::new("CTFY".into(), 2, true, cpu_notifies_refs).append_aml_bytes(bytes); 1668 1669 aml::Method::new( 1670 "CEJ0".into(), 1671 1, 1672 true, 1673 vec![ 1674 &aml::Acquire::new("\\_SB_.PRES.CPLK".into(), 0xffff), 1675 // Write CPU number (in first argument) to I/O port via field 1676 &aml::Store::new(&aml::Path::new("\\_SB_.PRES.CSEL"), &aml::Arg(0)), 1677 // Set CEJ0 bit 1678 &aml::Store::new(&aml::Path::new("\\_SB_.PRES.CEJ0"), &aml::ONE), 1679 &aml::Release::new("\\_SB_.PRES.CPLK".into()), 1680 ], 1681 ) 1682 .append_aml_bytes(bytes); 1683 1684 aml::Method::new( 1685 "CSCN".into(), 1686 0, 1687 true, 1688 vec![ 1689 // Take lock defined above 1690 &aml::Acquire::new("\\_SB_.PRES.CPLK".into(), 0xffff), 1691 &aml::Store::new(&aml::Local(0), &aml::ZERO), 1692 &aml::While::new( 1693 &aml::LessThan::new(&aml::Local(0), &self.max_vcpus), 1694 vec![ 1695 // Write CPU number (in first argument) to I/O port via field 1696 &aml::Store::new(&aml::Path::new("\\_SB_.PRES.CSEL"), &aml::Local(0)), 1697 // Check if CINS bit is set 1698 &aml::If::new( 1699 &aml::Equal::new(&aml::Path::new("\\_SB_.PRES.CINS"), &aml::ONE), 1700 // Notify device if it is 1701 vec![ 1702 &aml::MethodCall::new( 1703 "CTFY".into(), 1704 vec![&aml::Local(0), &aml::ONE], 1705 ), 1706 // Reset CINS bit 1707 &aml::Store::new( 1708 &aml::Path::new("\\_SB_.PRES.CINS"), 1709 &aml::ONE, 1710 ), 1711 ], 1712 ), 1713 // Check if CRMV bit is set 1714 &aml::If::new( 1715 &aml::Equal::new(&aml::Path::new("\\_SB_.PRES.CRMV"), &aml::ONE), 1716 // Notify device if it is (with the eject constant 0x3) 1717 vec![ 1718 &aml::MethodCall::new( 1719 "CTFY".into(), 1720 vec![&aml::Local(0), &3u8], 1721 ), 1722 // Reset CRMV bit 1723 &aml::Store::new( 1724 &aml::Path::new("\\_SB_.PRES.CRMV"), 1725 &aml::ONE, 1726 ), 1727 ], 1728 ), 1729 &aml::Add::new(&aml::Local(0), &aml::Local(0), &aml::ONE), 1730 ], 1731 ), 1732 // Release lock 1733 &aml::Release::new("\\_SB_.PRES.CPLK".into()), 1734 ], 1735 ) 1736 .append_aml_bytes(bytes) 1737 } else { 1738 aml::Method::new("CSCN".into(), 0, true, vec![]).append_aml_bytes(bytes) 1739 } 1740 } 1741 } 1742 1743 impl Aml for CpuManager { 1744 fn append_aml_bytes(&self, bytes: &mut Vec<u8>) { 1745 #[cfg(target_arch = "x86_64")] 1746 if let Some(acpi_address) = self.acpi_address { 1747 // CPU hotplug controller 1748 aml::Device::new( 1749 "_SB_.PRES".into(), 1750 vec![ 1751 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0A06")), 1752 &aml::Name::new("_UID".into(), &"CPU Hotplug Controller"), 1753 // Mutex to protect concurrent access as we write to choose CPU and then read back status 1754 &aml::Mutex::new("CPLK".into(), 0), 1755 &aml::Name::new( 1756 "_CRS".into(), 1757 &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory( 1758 aml::AddressSpaceCachable::NotCacheable, 1759 true, 1760 acpi_address.0 as u64, 1761 acpi_address.0 + CPU_MANAGER_ACPI_SIZE as u64 - 1, 1762 )]), 1763 ), 1764 // OpRegion and Fields map MMIO range into individual field values 1765 &aml::OpRegion::new( 1766 "PRST".into(), 1767 aml::OpRegionSpace::SystemMemory, 1768 acpi_address.0 as usize, 1769 CPU_MANAGER_ACPI_SIZE, 1770 ), 1771 &aml::Field::new( 1772 "PRST".into(), 1773 aml::FieldAccessType::Byte, 1774 aml::FieldUpdateRule::WriteAsZeroes, 1775 vec![ 1776 aml::FieldEntry::Reserved(32), 1777 aml::FieldEntry::Named(*b"CPEN", 1), 1778 aml::FieldEntry::Named(*b"CINS", 1), 1779 aml::FieldEntry::Named(*b"CRMV", 1), 1780 aml::FieldEntry::Named(*b"CEJ0", 1), 1781 aml::FieldEntry::Reserved(4), 1782 aml::FieldEntry::Named(*b"CCMD", 8), 1783 ], 1784 ), 1785 &aml::Field::new( 1786 "PRST".into(), 1787 aml::FieldAccessType::DWord, 1788 aml::FieldUpdateRule::Preserve, 1789 vec![ 1790 aml::FieldEntry::Named(*b"CSEL", 32), 1791 aml::FieldEntry::Reserved(32), 1792 aml::FieldEntry::Named(*b"CDAT", 32), 1793 ], 1794 ), 1795 ], 1796 ) 1797 .append_aml_bytes(bytes); 1798 } 1799 1800 // CPU devices 1801 let hid = aml::Name::new("_HID".into(), &"ACPI0010"); 1802 let uid = aml::Name::new("_CID".into(), &aml::EisaName::new("PNP0A05")); 1803 // Bundle methods together under a common object 1804 let methods = CpuMethods { 1805 max_vcpus: self.config.max_vcpus, 1806 dynamic: self.dynamic, 1807 }; 1808 let mut cpu_data_inner: Vec<&dyn aml::Aml> = vec![&hid, &uid, &methods]; 1809 1810 let mut cpu_devices = Vec::new(); 1811 for cpu_id in 0..self.config.max_vcpus { 1812 let proximity_domain = *self.proximity_domain_per_cpu.get(&cpu_id).unwrap_or(&0); 1813 let cpu_device = Cpu { 1814 cpu_id, 1815 proximity_domain, 1816 dynamic: self.dynamic, 1817 }; 1818 1819 cpu_devices.push(cpu_device); 1820 } 1821 1822 for cpu_device in cpu_devices.iter() { 1823 cpu_data_inner.push(cpu_device); 1824 } 1825 1826 aml::Device::new("_SB_.CPUS".into(), cpu_data_inner).append_aml_bytes(bytes) 1827 } 1828 } 1829 1830 impl Pausable for CpuManager { 1831 fn pause(&mut self) -> std::result::Result<(), MigratableError> { 1832 // Tell the vCPUs to pause themselves next time they exit 1833 self.vcpus_pause_signalled.store(true, Ordering::SeqCst); 1834 1835 // Signal to the spawned threads (vCPUs and console signal handler). For the vCPU threads 1836 // this will interrupt the KVM_RUN ioctl() allowing the loop to check the boolean set 1837 // above. 1838 for state in self.vcpu_states.iter() { 1839 state.signal_thread(); 1840 } 1841 1842 for vcpu in self.vcpus.iter() { 1843 let mut vcpu = vcpu.lock().unwrap(); 1844 vcpu.pause()?; 1845 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1846 if !self.config.kvm_hyperv { 1847 vcpu.vcpu.notify_guest_clock_paused().map_err(|e| { 1848 MigratableError::Pause(anyhow!( 1849 "Could not notify guest it has been paused {:?}", 1850 e 1851 )) 1852 })?; 1853 } 1854 } 1855 1856 Ok(()) 1857 } 1858 1859 fn resume(&mut self) -> std::result::Result<(), MigratableError> { 1860 for vcpu in self.vcpus.iter() { 1861 vcpu.lock().unwrap().resume()?; 1862 } 1863 1864 // Toggle the vCPUs pause boolean 1865 self.vcpus_pause_signalled.store(false, Ordering::SeqCst); 1866 1867 // Unpark all the VCPU threads. 1868 // Once unparked, the next thing they will do is checking for the pause 1869 // boolean. Since it'll be set to false, they will exit their pause loop 1870 // and go back to vmx root. 1871 for state in self.vcpu_states.iter() { 1872 state.unpark_thread(); 1873 } 1874 Ok(()) 1875 } 1876 } 1877 1878 impl Snapshottable for CpuManager { 1879 fn id(&self) -> String { 1880 CPU_MANAGER_SNAPSHOT_ID.to_string() 1881 } 1882 1883 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 1884 let mut cpu_manager_snapshot = Snapshot::new(CPU_MANAGER_SNAPSHOT_ID); 1885 1886 // The CpuManager snapshot is a collection of all vCPUs snapshots. 1887 for vcpu in &self.vcpus { 1888 let cpu_snapshot = vcpu.lock().unwrap().snapshot()?; 1889 cpu_manager_snapshot.add_snapshot(cpu_snapshot); 1890 } 1891 1892 Ok(cpu_manager_snapshot) 1893 } 1894 1895 fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> { 1896 for (cpu_id, snapshot) in snapshot.snapshots.iter() { 1897 info!("Restoring VCPU {}", cpu_id); 1898 self.create_vcpu(cpu_id.parse::<u8>().unwrap(), None, Some(*snapshot.clone())) 1899 .map_err(|e| MigratableError::Restore(anyhow!("Could not create vCPU {:?}", e)))?; 1900 } 1901 1902 Ok(()) 1903 } 1904 } 1905 1906 impl Transportable for CpuManager {} 1907 impl Migratable for CpuManager {} 1908 1909 #[cfg(feature = "gdb")] 1910 impl Debuggable for CpuManager { 1911 #[cfg(feature = "kvm")] 1912 fn set_guest_debug( 1913 &self, 1914 cpu_id: usize, 1915 addrs: &[GuestAddress], 1916 singlestep: bool, 1917 ) -> std::result::Result<(), DebuggableError> { 1918 self.vcpus[cpu_id] 1919 .lock() 1920 .unwrap() 1921 .vcpu 1922 .set_guest_debug(addrs, singlestep) 1923 .map_err(DebuggableError::SetDebug) 1924 } 1925 1926 fn debug_pause(&mut self) -> std::result::Result<(), DebuggableError> { 1927 Ok(()) 1928 } 1929 1930 fn debug_resume(&mut self) -> std::result::Result<(), DebuggableError> { 1931 Ok(()) 1932 } 1933 1934 #[cfg(target_arch = "x86_64")] 1935 fn read_regs(&self, cpu_id: usize) -> std::result::Result<X86_64CoreRegs, DebuggableError> { 1936 // General registers: RAX, RBX, RCX, RDX, RSI, RDI, RBP, RSP, r8-r15 1937 let gregs = self 1938 .get_regs(cpu_id as u8) 1939 .map_err(DebuggableError::ReadRegs)?; 1940 let regs = [ 1941 gregs.rax, gregs.rbx, gregs.rcx, gregs.rdx, gregs.rsi, gregs.rdi, gregs.rbp, gregs.rsp, 1942 gregs.r8, gregs.r9, gregs.r10, gregs.r11, gregs.r12, gregs.r13, gregs.r14, gregs.r15, 1943 ]; 1944 1945 // GDB exposes 32-bit eflags instead of 64-bit rflags. 1946 // https://github.com/bminor/binutils-gdb/blob/master/gdb/features/i386/64bit-core.xml 1947 let eflags = gregs.rflags as u32; 1948 let rip = gregs.rip; 1949 1950 // Segment registers: CS, SS, DS, ES, FS, GS 1951 let sregs = self 1952 .get_sregs(cpu_id as u8) 1953 .map_err(DebuggableError::ReadRegs)?; 1954 let segments = X86SegmentRegs { 1955 cs: sregs.cs.selector as u32, 1956 ss: sregs.ss.selector as u32, 1957 ds: sregs.ds.selector as u32, 1958 es: sregs.es.selector as u32, 1959 fs: sregs.fs.selector as u32, 1960 gs: sregs.gs.selector as u32, 1961 }; 1962 1963 // TODO: Add other registers 1964 1965 Ok(X86_64CoreRegs { 1966 regs, 1967 eflags, 1968 rip, 1969 segments, 1970 ..Default::default() 1971 }) 1972 } 1973 1974 #[cfg(target_arch = "x86_64")] 1975 fn write_regs( 1976 &self, 1977 cpu_id: usize, 1978 regs: &X86_64CoreRegs, 1979 ) -> std::result::Result<(), DebuggableError> { 1980 let orig_gregs = self 1981 .get_regs(cpu_id as u8) 1982 .map_err(DebuggableError::ReadRegs)?; 1983 let gregs = StandardRegisters { 1984 rax: regs.regs[0], 1985 rbx: regs.regs[1], 1986 rcx: regs.regs[2], 1987 rdx: regs.regs[3], 1988 rsi: regs.regs[4], 1989 rdi: regs.regs[5], 1990 rbp: regs.regs[6], 1991 rsp: regs.regs[7], 1992 r8: regs.regs[8], 1993 r9: regs.regs[9], 1994 r10: regs.regs[10], 1995 r11: regs.regs[11], 1996 r12: regs.regs[12], 1997 r13: regs.regs[13], 1998 r14: regs.regs[14], 1999 r15: regs.regs[15], 2000 rip: regs.rip, 2001 // Update the lower 32-bit of rflags. 2002 rflags: (orig_gregs.rflags & !(u32::MAX as u64)) | (regs.eflags as u64), 2003 }; 2004 2005 self.set_regs(cpu_id as u8, &gregs) 2006 .map_err(DebuggableError::WriteRegs)?; 2007 2008 // Segment registers: CS, SS, DS, ES, FS, GS 2009 // Since GDB care only selectors, we call get_sregs() first. 2010 let mut sregs = self 2011 .get_sregs(cpu_id as u8) 2012 .map_err(DebuggableError::ReadRegs)?; 2013 sregs.cs.selector = regs.segments.cs as u16; 2014 sregs.ss.selector = regs.segments.ss as u16; 2015 sregs.ds.selector = regs.segments.ds as u16; 2016 sregs.es.selector = regs.segments.es as u16; 2017 sregs.fs.selector = regs.segments.fs as u16; 2018 sregs.gs.selector = regs.segments.gs as u16; 2019 2020 self.set_sregs(cpu_id as u8, &sregs) 2021 .map_err(DebuggableError::WriteRegs)?; 2022 2023 // TODO: Add other registers 2024 2025 Ok(()) 2026 } 2027 2028 #[cfg(target_arch = "x86_64")] 2029 fn read_mem( 2030 &self, 2031 cpu_id: usize, 2032 vaddr: GuestAddress, 2033 len: usize, 2034 ) -> std::result::Result<Vec<u8>, DebuggableError> { 2035 let mut buf = vec![0; len]; 2036 let mut total_read = 0_u64; 2037 2038 while total_read < len as u64 { 2039 let gaddr = vaddr.0 + total_read; 2040 let paddr = match self.translate_gva(cpu_id as u8, gaddr) { 2041 Ok(paddr) => paddr, 2042 Err(_) if gaddr == u64::MIN => gaddr, // Silently return GVA as GPA if GVA == 0. 2043 Err(e) => return Err(DebuggableError::TranslateGva(e)), 2044 }; 2045 let psize = arch::PAGE_SIZE as u64; 2046 let read_len = std::cmp::min(len as u64 - total_read, psize - (paddr & (psize - 1))); 2047 self.vm_memory 2048 .memory() 2049 .read( 2050 &mut buf[total_read as usize..total_read as usize + read_len as usize], 2051 GuestAddress(paddr), 2052 ) 2053 .map_err(DebuggableError::ReadMem)?; 2054 total_read += read_len; 2055 } 2056 Ok(buf) 2057 } 2058 2059 #[cfg(target_arch = "x86_64")] 2060 fn write_mem( 2061 &self, 2062 cpu_id: usize, 2063 vaddr: &GuestAddress, 2064 data: &[u8], 2065 ) -> std::result::Result<(), DebuggableError> { 2066 let mut total_written = 0_u64; 2067 2068 while total_written < data.len() as u64 { 2069 let gaddr = vaddr.0 + total_written; 2070 let paddr = match self.translate_gva(cpu_id as u8, gaddr) { 2071 Ok(paddr) => paddr, 2072 Err(_) if gaddr == u64::MIN => gaddr, // Silently return GVA as GPA if GVA == 0. 2073 Err(e) => return Err(DebuggableError::TranslateGva(e)), 2074 }; 2075 let psize = arch::PAGE_SIZE as u64; 2076 let write_len = std::cmp::min( 2077 data.len() as u64 - total_written, 2078 psize - (paddr & (psize - 1)), 2079 ); 2080 self.vm_memory 2081 .memory() 2082 .write( 2083 &data[total_written as usize..total_written as usize + write_len as usize], 2084 GuestAddress(paddr), 2085 ) 2086 .map_err(DebuggableError::WriteMem)?; 2087 total_written += write_len; 2088 } 2089 Ok(()) 2090 } 2091 2092 fn active_vcpus(&self) -> usize { 2093 self.present_vcpus() as usize 2094 } 2095 } 2096 2097 #[cfg(feature = "guest_debug")] 2098 impl Elf64Writable for CpuManager {} 2099 2100 #[cfg(feature = "guest_debug")] 2101 impl CpuElf64Writable for CpuManager { 2102 fn cpu_write_elf64_note( 2103 &mut self, 2104 dump_state: &DumpState, 2105 ) -> std::result::Result<(), GuestDebuggableError> { 2106 let mut coredump_file = dump_state.file.as_ref().unwrap(); 2107 for vcpu in &self.vcpus { 2108 let note_size = self.get_note_size(NoteDescType::Elf, 1); 2109 let mut pos: usize = 0; 2110 let mut buf = vec![0; note_size as usize]; 2111 let descsz = size_of::<X86_64ElfPrStatus>(); 2112 let vcpu_id = vcpu.lock().unwrap().id; 2113 2114 let note = Elf64_Nhdr { 2115 n_namesz: COREDUMP_NAME_SIZE, 2116 n_descsz: descsz as u32, 2117 n_type: NT_PRSTATUS, 2118 }; 2119 2120 let bytes: &[u8] = note.as_slice(); 2121 buf.splice(0.., bytes.to_vec()); 2122 pos += round_up!(size_of::<Elf64_Nhdr>(), 4); 2123 buf.resize(pos + 4, 0); 2124 buf.splice(pos.., "CORE".to_string().into_bytes()); 2125 2126 pos += round_up!(COREDUMP_NAME_SIZE as usize, 4); 2127 buf.resize(pos + 32 + 4, 0); 2128 let pid = vcpu_id as u64; 2129 let bytes: &[u8] = pid.as_slice(); 2130 buf.splice(pos + 32.., bytes.to_vec()); /* pr_pid */ 2131 2132 pos += descsz - size_of::<X86_64UserRegs>() - size_of::<u64>(); 2133 2134 let orig_rax: u64 = 0; 2135 let gregs = self.vcpus[usize::from(vcpu_id)] 2136 .lock() 2137 .unwrap() 2138 .vcpu 2139 .get_regs() 2140 .map_err(|_e| GuestDebuggableError::Coredump(anyhow!("get regs failed")))?; 2141 2142 let regs1 = [ 2143 gregs.r15, gregs.r14, gregs.r13, gregs.r12, gregs.rbp, gregs.rbx, gregs.r11, 2144 gregs.r10, 2145 ]; 2146 let regs2 = [ 2147 gregs.r9, gregs.r8, gregs.rax, gregs.rcx, gregs.rdx, gregs.rsi, gregs.rdi, orig_rax, 2148 ]; 2149 2150 let sregs = self.vcpus[usize::from(vcpu_id)] 2151 .lock() 2152 .unwrap() 2153 .vcpu 2154 .get_sregs() 2155 .map_err(|_e| GuestDebuggableError::Coredump(anyhow!("get sregs failed")))?; 2156 2157 debug!( 2158 "rip 0x{:x} rsp 0x{:x} gs 0x{:x} cs 0x{:x} ss 0x{:x} ds 0x{:x}", 2159 gregs.rip, 2160 gregs.rsp, 2161 sregs.gs.base, 2162 sregs.cs.selector, 2163 sregs.ss.selector, 2164 sregs.ds.selector, 2165 ); 2166 2167 let regs = X86_64UserRegs { 2168 regs1, 2169 regs2, 2170 rip: gregs.rip, 2171 cs: sregs.cs.selector as u64, 2172 eflags: gregs.rflags, 2173 rsp: gregs.rsp, 2174 ss: sregs.ss.selector as u64, 2175 fs_base: sregs.fs.base as u64, 2176 gs_base: sregs.gs.base as u64, 2177 ds: sregs.ds.selector as u64, 2178 es: sregs.es.selector as u64, 2179 fs: sregs.fs.selector as u64, 2180 gs: sregs.gs.selector as u64, 2181 }; 2182 2183 // let bytes: &[u8] = unsafe { any_as_u8_slice(®s) }; 2184 let bytes: &[u8] = regs.as_slice(); 2185 buf.resize(note_size as usize, 0); 2186 buf.splice(pos.., bytes.to_vec()); 2187 buf.resize(note_size as usize, 0); 2188 2189 coredump_file 2190 .write(&buf) 2191 .map_err(GuestDebuggableError::CoredumpFile)?; 2192 } 2193 2194 Ok(()) 2195 } 2196 2197 fn cpu_write_vmm_note( 2198 &mut self, 2199 dump_state: &DumpState, 2200 ) -> std::result::Result<(), GuestDebuggableError> { 2201 let mut coredump_file = dump_state.file.as_ref().unwrap(); 2202 for vcpu in &self.vcpus { 2203 let note_size = self.get_note_size(NoteDescType::Vmm, 1); 2204 let mut pos: usize = 0; 2205 let mut buf = vec![0; note_size as usize]; 2206 let descsz = size_of::<DumpCpusState>(); 2207 let vcpu_id = vcpu.lock().unwrap().id; 2208 2209 let note = Elf64_Nhdr { 2210 n_namesz: COREDUMP_NAME_SIZE, 2211 n_descsz: descsz as u32, 2212 n_type: 0, 2213 }; 2214 2215 let bytes: &[u8] = note.as_slice(); 2216 buf.splice(0.., bytes.to_vec()); 2217 pos += round_up!(size_of::<Elf64_Nhdr>(), 4); 2218 2219 buf.resize(pos + 4, 0); 2220 buf.splice(pos.., "QEMU".to_string().into_bytes()); 2221 2222 pos += round_up!(COREDUMP_NAME_SIZE as usize, 4); 2223 2224 let gregs = self.vcpus[usize::from(vcpu_id)] 2225 .lock() 2226 .unwrap() 2227 .vcpu 2228 .get_regs() 2229 .map_err(|_e| GuestDebuggableError::Coredump(anyhow!("get regs failed")))?; 2230 2231 let regs1 = [ 2232 gregs.rax, gregs.rbx, gregs.rcx, gregs.rdx, gregs.rsi, gregs.rdi, gregs.rsp, 2233 gregs.rbp, 2234 ]; 2235 2236 let regs2 = [ 2237 gregs.r8, gregs.r9, gregs.r10, gregs.r11, gregs.r12, gregs.r13, gregs.r14, 2238 gregs.r15, 2239 ]; 2240 2241 let sregs = self.vcpus[usize::from(vcpu_id)] 2242 .lock() 2243 .unwrap() 2244 .vcpu 2245 .get_sregs() 2246 .map_err(|_e| GuestDebuggableError::Coredump(anyhow!("get sregs failed")))?; 2247 2248 let mut msrs = vec![MsrEntry { 2249 index: msr_index::MSR_KERNEL_GS_BASE, 2250 ..Default::default() 2251 }]; 2252 2253 self.vcpus[vcpu_id as usize] 2254 .lock() 2255 .unwrap() 2256 .vcpu 2257 .get_msrs(&mut msrs) 2258 .map_err(|_e| GuestDebuggableError::Coredump(anyhow!("get msr failed")))?; 2259 let kernel_gs_base = msrs[0].data; 2260 2261 let cs = CpuSegment::new(sregs.cs); 2262 let ds = CpuSegment::new(sregs.ds); 2263 let es = CpuSegment::new(sregs.es); 2264 let fs = CpuSegment::new(sregs.fs); 2265 let gs = CpuSegment::new(sregs.gs); 2266 let ss = CpuSegment::new(sregs.ss); 2267 let ldt = CpuSegment::new(sregs.ldt); 2268 let tr = CpuSegment::new(sregs.tr); 2269 let gdt = CpuSegment::new_from_table(sregs.gdt); 2270 let idt = CpuSegment::new_from_table(sregs.idt); 2271 let cr = [sregs.cr0, sregs.cr8, sregs.cr2, sregs.cr3, sregs.cr4]; 2272 let regs = DumpCpusState { 2273 version: 1, 2274 size: size_of::<DumpCpusState>() as u32, 2275 regs1, 2276 regs2, 2277 rip: gregs.rip, 2278 rflags: gregs.rflags, 2279 cs, 2280 ds, 2281 es, 2282 fs, 2283 gs, 2284 ss, 2285 ldt, 2286 tr, 2287 gdt, 2288 idt, 2289 cr, 2290 kernel_gs_base, 2291 }; 2292 2293 let bytes: &[u8] = regs.as_slice(); 2294 buf.resize(note_size as usize, 0); 2295 buf.splice(pos.., bytes.to_vec()); 2296 buf.resize(note_size as usize, 0); 2297 2298 coredump_file 2299 .write(&buf) 2300 .map_err(GuestDebuggableError::CoredumpFile)?; 2301 } 2302 2303 Ok(()) 2304 } 2305 } 2306 2307 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 2308 #[cfg(test)] 2309 mod tests { 2310 use arch::x86_64::interrupts::*; 2311 use arch::x86_64::regs::*; 2312 use hypervisor::arch::x86::{FpuState, LapicState, StandardRegisters}; 2313 2314 #[test] 2315 fn test_setlint() { 2316 let hv = hypervisor::new().unwrap(); 2317 let vm = hv.create_vm().expect("new VM fd creation failed"); 2318 assert!(hv.check_required_extensions().is_ok()); 2319 // Calling get_lapic will fail if there is no irqchip before hand. 2320 assert!(vm.create_irq_chip().is_ok()); 2321 let vcpu = vm.create_vcpu(0, None).unwrap(); 2322 let klapic_before: LapicState = vcpu.get_lapic().unwrap(); 2323 2324 // Compute the value that is expected to represent LVT0 and LVT1. 2325 let lint0 = klapic_before.get_klapic_reg(APIC_LVT0); 2326 let lint1 = klapic_before.get_klapic_reg(APIC_LVT1); 2327 let lint0_mode_expected = set_apic_delivery_mode(lint0, APIC_MODE_EXTINT); 2328 let lint1_mode_expected = set_apic_delivery_mode(lint1, APIC_MODE_NMI); 2329 2330 set_lint(&vcpu).unwrap(); 2331 2332 // Compute the value that represents LVT0 and LVT1 after set_lint. 2333 let klapic_actual: LapicState = vcpu.get_lapic().unwrap(); 2334 let lint0_mode_actual = klapic_actual.get_klapic_reg(APIC_LVT0); 2335 let lint1_mode_actual = klapic_actual.get_klapic_reg(APIC_LVT1); 2336 assert_eq!(lint0_mode_expected, lint0_mode_actual); 2337 assert_eq!(lint1_mode_expected, lint1_mode_actual); 2338 } 2339 2340 #[test] 2341 fn test_setup_fpu() { 2342 let hv = hypervisor::new().unwrap(); 2343 let vm = hv.create_vm().expect("new VM fd creation failed"); 2344 let vcpu = vm.create_vcpu(0, None).unwrap(); 2345 setup_fpu(&vcpu).unwrap(); 2346 2347 let expected_fpu: FpuState = FpuState { 2348 fcw: 0x37f, 2349 mxcsr: 0x1f80, 2350 ..Default::default() 2351 }; 2352 let actual_fpu: FpuState = vcpu.get_fpu().unwrap(); 2353 // TODO: auto-generate kvm related structures with PartialEq on. 2354 assert_eq!(expected_fpu.fcw, actual_fpu.fcw); 2355 // Setting the mxcsr register from FpuState inside setup_fpu does not influence anything. 2356 // See 'kvm_arch_vcpu_ioctl_set_fpu' from arch/x86/kvm/x86.c. 2357 // The mxcsr will stay 0 and the assert below fails. Decide whether or not we should 2358 // remove it at all. 2359 // assert!(expected_fpu.mxcsr == actual_fpu.mxcsr); 2360 } 2361 2362 #[test] 2363 fn test_setup_msrs() { 2364 use hypervisor::arch::x86::{msr_index, MsrEntry}; 2365 2366 let hv = hypervisor::new().unwrap(); 2367 let vm = hv.create_vm().expect("new VM fd creation failed"); 2368 let vcpu = vm.create_vcpu(0, None).unwrap(); 2369 setup_msrs(&vcpu).unwrap(); 2370 2371 // This test will check against the last MSR entry configured (the tenth one). 2372 // See create_msr_entries for details. 2373 let mut msrs = vec![MsrEntry { 2374 index: msr_index::MSR_IA32_MISC_ENABLE, 2375 ..Default::default() 2376 }]; 2377 2378 // get_msrs returns the number of msrs that it succeed in reading. We only want to read 1 2379 // in this test case scenario. 2380 let read_msrs = vcpu.get_msrs(&mut msrs).unwrap(); 2381 assert_eq!(read_msrs, 1); 2382 2383 // Official entries that were setup when we did setup_msrs. We need to assert that the 2384 // tenth one (i.e the one with index msr_index::MSR_IA32_MISC_ENABLE has the data we 2385 // expect. 2386 let entry_vec = vcpu.boot_msr_entries(); 2387 assert_eq!(entry_vec.as_slice()[9], msrs.as_slice()[0]); 2388 } 2389 2390 #[test] 2391 fn test_setup_regs() { 2392 let hv = hypervisor::new().unwrap(); 2393 let vm = hv.create_vm().expect("new VM fd creation failed"); 2394 let vcpu = vm.create_vcpu(0, None).unwrap(); 2395 2396 let expected_regs: StandardRegisters = StandardRegisters { 2397 rflags: 0x0000000000000002u64, 2398 rbx: arch::layout::PVH_INFO_START.0, 2399 rip: 1, 2400 ..Default::default() 2401 }; 2402 2403 setup_regs(&vcpu, expected_regs.rip).unwrap(); 2404 2405 let actual_regs: StandardRegisters = vcpu.get_regs().unwrap(); 2406 assert_eq!(actual_regs, expected_regs); 2407 } 2408 } 2409 2410 #[cfg(target_arch = "aarch64")] 2411 #[cfg(test)] 2412 mod tests { 2413 use arch::layout; 2414 use hypervisor::kvm::aarch64::{is_system_register, MPIDR_EL1}; 2415 use hypervisor::kvm::kvm_bindings::{ 2416 kvm_one_reg, kvm_regs, kvm_vcpu_init, user_pt_regs, KVM_REG_ARM64, KVM_REG_ARM64_SYSREG, 2417 KVM_REG_ARM_CORE, KVM_REG_SIZE_U64, 2418 }; 2419 use hypervisor::{arm64_core_reg_id, offset__of}; 2420 use std::mem; 2421 2422 #[test] 2423 fn test_setup_regs() { 2424 let hv = hypervisor::new().unwrap(); 2425 let vm = hv.create_vm().unwrap(); 2426 let vcpu = vm.create_vcpu(0, None).unwrap(); 2427 2428 let res = vcpu.setup_regs(0, 0x0, layout::FDT_START.0); 2429 // Must fail when vcpu is not initialized yet. 2430 assert!(res.is_err()); 2431 2432 let mut kvi: kvm_vcpu_init = kvm_vcpu_init::default(); 2433 vm.get_preferred_target(&mut kvi).unwrap(); 2434 vcpu.vcpu_init(&kvi).unwrap(); 2435 2436 assert!(vcpu.setup_regs(0, 0x0, layout::FDT_START.0).is_ok()); 2437 } 2438 2439 #[test] 2440 fn test_read_mpidr() { 2441 let hv = hypervisor::new().unwrap(); 2442 let vm = hv.create_vm().unwrap(); 2443 let vcpu = vm.create_vcpu(0, None).unwrap(); 2444 let mut kvi: kvm_vcpu_init = kvm_vcpu_init::default(); 2445 vm.get_preferred_target(&mut kvi).unwrap(); 2446 2447 // Must fail when vcpu is not initialized yet. 2448 assert!(vcpu.read_mpidr().is_err()); 2449 2450 vcpu.vcpu_init(&kvi).unwrap(); 2451 assert_eq!(vcpu.read_mpidr().unwrap(), 0x80000000); 2452 } 2453 2454 #[test] 2455 fn test_is_system_register() { 2456 let offset = offset__of!(user_pt_regs, pc); 2457 let regid = arm64_core_reg_id!(KVM_REG_SIZE_U64, offset); 2458 assert!(!is_system_register(regid)); 2459 let regid = KVM_REG_ARM64 as u64 | KVM_REG_SIZE_U64 as u64 | KVM_REG_ARM64_SYSREG as u64; 2460 assert!(is_system_register(regid)); 2461 } 2462 2463 #[test] 2464 fn test_save_restore_core_regs() { 2465 let hv = hypervisor::new().unwrap(); 2466 let vm = hv.create_vm().unwrap(); 2467 let vcpu = vm.create_vcpu(0, None).unwrap(); 2468 let mut kvi: kvm_vcpu_init = kvm_vcpu_init::default(); 2469 vm.get_preferred_target(&mut kvi).unwrap(); 2470 2471 // Must fail when vcpu is not initialized yet. 2472 let res = vcpu.get_regs(); 2473 assert!(res.is_err()); 2474 assert_eq!( 2475 format!("{}", res.unwrap_err()), 2476 "Failed to get core register: Exec format error (os error 8)" 2477 ); 2478 2479 let mut state = kvm_regs::default(); 2480 let res = vcpu.set_regs(&state); 2481 assert!(res.is_err()); 2482 assert_eq!( 2483 format!("{}", res.unwrap_err()), 2484 "Failed to set core register: Exec format error (os error 8)" 2485 ); 2486 2487 vcpu.vcpu_init(&kvi).unwrap(); 2488 let res = vcpu.get_regs(); 2489 assert!(res.is_ok()); 2490 state = res.unwrap(); 2491 assert_eq!(state.regs.pstate, 0x3C5); 2492 2493 assert!(vcpu.set_regs(&state).is_ok()); 2494 let off = offset__of!(user_pt_regs, pstate); 2495 let pstate = vcpu 2496 .get_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off)) 2497 .expect("Failed to call kvm get one reg"); 2498 assert_eq!(state.regs.pstate, pstate); 2499 } 2500 2501 #[test] 2502 fn test_save_restore_system_regs() { 2503 let hv = hypervisor::new().unwrap(); 2504 let vm = hv.create_vm().unwrap(); 2505 let vcpu = vm.create_vcpu(0, None).unwrap(); 2506 let mut kvi: kvm_vcpu_init = kvm_vcpu_init::default(); 2507 vm.get_preferred_target(&mut kvi).unwrap(); 2508 2509 // Must fail when vcpu is not initialized yet. 2510 let mut state: Vec<kvm_one_reg> = Vec::new(); 2511 let res = vcpu.get_sys_regs(); 2512 assert!(res.is_err()); 2513 assert_eq!( 2514 format!("{}", res.as_ref().unwrap_err()), 2515 "Failed to retrieve list of registers: Exec format error (os error 8)" 2516 ); 2517 2518 state.push(kvm_one_reg { 2519 id: MPIDR_EL1, 2520 addr: 0x00, 2521 }); 2522 let res = vcpu.set_sys_regs(&state); 2523 assert!(res.is_err()); 2524 assert_eq!( 2525 format!("{}", res.unwrap_err()), 2526 "Failed to set system register: Exec format error (os error 8)" 2527 ); 2528 2529 vcpu.vcpu_init(&kvi).unwrap(); 2530 let res = vcpu.get_sys_regs(); 2531 assert!(res.is_ok()); 2532 state = res.unwrap(); 2533 2534 let initial_mpidr: u64 = vcpu.read_mpidr().expect("Fail to read mpidr"); 2535 assert!(state.contains(&kvm_one_reg { 2536 id: MPIDR_EL1, 2537 addr: initial_mpidr 2538 })); 2539 2540 assert!(vcpu.set_sys_regs(&state).is_ok()); 2541 let mpidr: u64 = vcpu.read_mpidr().expect("Fail to read mpidr"); 2542 assert_eq!(initial_mpidr, mpidr); 2543 } 2544 2545 #[test] 2546 fn test_get_set_mpstate() { 2547 let hv = hypervisor::new().unwrap(); 2548 let vm = hv.create_vm().unwrap(); 2549 let vcpu = vm.create_vcpu(0, None).unwrap(); 2550 let mut kvi: kvm_vcpu_init = kvm_vcpu_init::default(); 2551 vm.get_preferred_target(&mut kvi).unwrap(); 2552 2553 let res = vcpu.get_mp_state(); 2554 assert!(res.is_ok()); 2555 assert!(vcpu.set_mp_state(res.unwrap()).is_ok()); 2556 } 2557 } 2558