1 // Copyright © 2020, Oracle and/or its affiliates. 2 // 3 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 // 5 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. 6 // Use of this source code is governed by a BSD-style license that can be 7 // found in the LICENSE-BSD-3-Clause file. 8 // 9 // Copyright © 2019 Intel Corporation 10 // 11 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause 12 // 13 14 use crate::config::CpusConfig; 15 #[cfg(feature = "guest_debug")] 16 use crate::coredump::{ 17 CpuElf64Writable, CpuSegment, CpuState as DumpCpusState, DumpState, Elf64Writable, 18 GuestDebuggableError, NoteDescType, X86_64ElfPrStatus, X86_64UserRegs, COREDUMP_NAME_SIZE, 19 NT_PRSTATUS, 20 }; 21 use crate::device_manager::DeviceManager; 22 #[cfg(feature = "gdb")] 23 use crate::gdb::{get_raw_tid, Debuggable, DebuggableError}; 24 use crate::memory_manager::MemoryManager; 25 use crate::seccomp_filters::{get_seccomp_filter, Thread}; 26 #[cfg(target_arch = "x86_64")] 27 use crate::vm::physical_bits; 28 use crate::GuestMemoryMmap; 29 use crate::CPU_MANAGER_SNAPSHOT_ID; 30 use acpi_tables::{aml, aml::Aml, sdt::Sdt}; 31 use anyhow::anyhow; 32 use arch::EntryPoint; 33 use arch::NumaNodes; 34 use devices::interrupt_controller::InterruptController; 35 #[cfg(all(target_arch = "x86_64", feature = "gdb"))] 36 use gdbstub_arch::x86::reg::{X86SegmentRegs, X86_64CoreRegs}; 37 #[cfg(feature = "guest_debug")] 38 use hypervisor::arch::x86::msr_index; 39 #[cfg(target_arch = "x86_64")] 40 use hypervisor::arch::x86::CpuIdEntry; 41 #[cfg(feature = "guest_debug")] 42 use hypervisor::arch::x86::MsrEntry; 43 #[cfg(all(target_arch = "x86_64", feature = "gdb"))] 44 use hypervisor::arch::x86::{SpecialRegisters, StandardRegisters}; 45 #[cfg(target_arch = "aarch64")] 46 use hypervisor::kvm::kvm_bindings; 47 #[cfg(feature = "tdx")] 48 use hypervisor::kvm::{TdxExitDetails, TdxExitStatus}; 49 use hypervisor::{CpuState, HypervisorCpuError, VmExit, VmOps}; 50 use libc::{c_void, siginfo_t}; 51 #[cfg(feature = "guest_debug")] 52 use linux_loader::elf::Elf64_Nhdr; 53 use seccompiler::{apply_filter, SeccompAction}; 54 use std::collections::BTreeMap; 55 #[cfg(feature = "guest_debug")] 56 use std::io::Write; 57 #[cfg(feature = "guest_debug")] 58 use std::mem::size_of; 59 use std::os::unix::thread::JoinHandleExt; 60 use std::sync::atomic::{AtomicBool, Ordering}; 61 use std::sync::{Arc, Barrier, Mutex}; 62 use std::{cmp, io, result, thread}; 63 use thiserror::Error; 64 use vm_device::BusDevice; 65 #[cfg(feature = "guest_debug")] 66 use vm_memory::ByteValued; 67 #[cfg(feature = "gdb")] 68 use vm_memory::{Bytes, GuestAddressSpace}; 69 use vm_memory::{GuestAddress, GuestMemoryAtomic}; 70 use vm_migration::{ 71 Migratable, MigratableError, Pausable, Snapshot, SnapshotDataSection, Snapshottable, 72 Transportable, 73 }; 74 use vmm_sys_util::eventfd::EventFd; 75 use vmm_sys_util::signal::{register_signal_handler, SIGRTMIN}; 76 77 pub const CPU_MANAGER_ACPI_SIZE: usize = 0xc; 78 79 #[derive(Debug, Error)] 80 pub enum Error { 81 #[error("Error creating vCPU: {0}")] 82 VcpuCreate(#[source] anyhow::Error), 83 84 #[error("Error running bCPU: {0}")] 85 VcpuRun(#[source] anyhow::Error), 86 87 #[error("Error spawning vCPU thread: {0}")] 88 VcpuSpawn(#[source] io::Error), 89 90 #[error("Error generating common CPUID: {0}")] 91 CommonCpuId(#[source] arch::Error), 92 93 #[error("Error configuring vCPU: {0}")] 94 VcpuConfiguration(#[source] arch::Error), 95 96 #[cfg(target_arch = "aarch64")] 97 #[error("Error fetching preferred target: {0}")] 98 VcpuArmPreferredTarget(#[source] hypervisor::HypervisorVmError), 99 100 #[cfg(target_arch = "aarch64")] 101 #[error("Error initialising vCPU: {0}")] 102 VcpuArmInit(#[source] hypervisor::HypervisorCpuError), 103 104 #[error("Failed to join on vCPU threads: {0:?}")] 105 ThreadCleanup(std::boxed::Box<dyn std::any::Any + std::marker::Send>), 106 107 #[error("Error adding CpuManager to MMIO bus: {0}")] 108 BusError(#[source] vm_device::BusError), 109 110 #[error("Requested vCPUs exceed maximum")] 111 DesiredVCpuCountExceedsMax, 112 113 #[error("Cannot create seccomp filter: {0}")] 114 CreateSeccompFilter(#[source] seccompiler::Error), 115 116 #[error("Cannot apply seccomp filter: {0}")] 117 ApplySeccompFilter(#[source] seccompiler::Error), 118 119 #[error("Error starting vCPU after restore: {0}")] 120 StartRestoreVcpu(#[source] anyhow::Error), 121 122 #[error("Unexpected VmExit")] 123 UnexpectedVmExit, 124 125 #[error("Failed to allocate MMIO address for CpuManager")] 126 AllocateMmmioAddress, 127 128 #[cfg(feature = "tdx")] 129 #[error("Error initializing TDX: {0}")] 130 InitializeTdx(#[source] hypervisor::HypervisorCpuError), 131 132 #[cfg(target_arch = "aarch64")] 133 #[error("Error initializing PMU: {0}")] 134 InitPmu(#[source] hypervisor::HypervisorCpuError), 135 136 #[cfg(all(target_arch = "x86_64", feature = "gdb"))] 137 #[error("Error during CPU debug: {0}")] 138 CpuDebug(#[source] hypervisor::HypervisorCpuError), 139 140 #[cfg(all(target_arch = "x86_64", feature = "gdb"))] 141 #[error("Error translating virtual address: {0}")] 142 TranslateVirtualAddress(#[source] hypervisor::HypervisorCpuError), 143 144 #[cfg(all(feature = "amx", target_arch = "x86_64"))] 145 #[error("Error setting up AMX: {0}")] 146 AmxEnable(#[source] anyhow::Error), 147 } 148 pub type Result<T> = result::Result<T, Error>; 149 150 #[cfg(target_arch = "x86_64")] 151 #[allow(dead_code)] 152 #[repr(packed)] 153 struct LocalApic { 154 pub r#type: u8, 155 pub length: u8, 156 pub processor_id: u8, 157 pub apic_id: u8, 158 pub flags: u32, 159 } 160 161 #[allow(dead_code)] 162 #[repr(packed)] 163 #[derive(Default)] 164 struct Ioapic { 165 pub r#type: u8, 166 pub length: u8, 167 pub ioapic_id: u8, 168 _reserved: u8, 169 pub apic_address: u32, 170 pub gsi_base: u32, 171 } 172 173 #[cfg(target_arch = "aarch64")] 174 #[allow(dead_code)] 175 #[repr(packed)] 176 struct GicC { 177 pub r#type: u8, 178 pub length: u8, 179 pub reserved0: u16, 180 pub cpu_interface_number: u32, 181 pub uid: u32, 182 pub flags: u32, 183 pub parking_version: u32, 184 pub performance_interrupt: u32, 185 pub parked_address: u64, 186 pub base_address: u64, 187 pub gicv_base_address: u64, 188 pub gich_base_address: u64, 189 pub vgic_interrupt: u32, 190 pub gicr_base_address: u64, 191 pub mpidr: u64, 192 pub proc_power_effi_class: u8, 193 pub reserved1: u8, 194 pub spe_overflow_interrupt: u16, 195 } 196 197 #[cfg(target_arch = "aarch64")] 198 #[allow(dead_code)] 199 #[repr(packed)] 200 struct GicD { 201 pub r#type: u8, 202 pub length: u8, 203 pub reserved0: u16, 204 pub gic_id: u32, 205 pub base_address: u64, 206 pub global_irq_base: u32, 207 pub version: u8, 208 pub reserved1: [u8; 3], 209 } 210 211 #[cfg(target_arch = "aarch64")] 212 #[allow(dead_code)] 213 #[repr(packed)] 214 struct GicR { 215 pub r#type: u8, 216 pub length: u8, 217 pub reserved: u16, 218 pub base_address: u64, 219 pub range_length: u32, 220 } 221 222 #[cfg(target_arch = "aarch64")] 223 #[allow(dead_code)] 224 #[repr(packed)] 225 struct GicIts { 226 pub r#type: u8, 227 pub length: u8, 228 pub reserved0: u16, 229 pub translation_id: u32, 230 pub base_address: u64, 231 pub reserved1: u32, 232 } 233 234 #[cfg(target_arch = "aarch64")] 235 #[allow(dead_code)] 236 #[repr(packed)] 237 struct ProcessorHierarchyNode { 238 pub r#type: u8, 239 pub length: u8, 240 pub reserved: u16, 241 pub flags: u32, 242 pub parent: u32, 243 pub acpi_processor_id: u32, 244 pub num_private_resources: u32, 245 } 246 247 #[allow(dead_code)] 248 #[repr(packed)] 249 #[derive(Default)] 250 struct InterruptSourceOverride { 251 pub r#type: u8, 252 pub length: u8, 253 pub bus: u8, 254 pub source: u8, 255 pub gsi: u32, 256 pub flags: u16, 257 } 258 259 #[cfg(feature = "guest_debug")] 260 macro_rules! round_up { 261 ($n:expr,$d:expr) => { 262 (($n / ($d + 1)) + 1) * $d 263 }; 264 } 265 266 /// A wrapper around creating and using a kvm-based VCPU. 267 pub struct Vcpu { 268 // The hypervisor abstracted CPU. 269 vcpu: Arc<dyn hypervisor::Vcpu>, 270 id: u8, 271 #[cfg(target_arch = "aarch64")] 272 mpidr: u64, 273 saved_state: Option<CpuState>, 274 } 275 276 impl Vcpu { 277 /// Constructs a new VCPU for `vm`. 278 /// 279 /// # Arguments 280 /// 281 /// * `id` - Represents the CPU number between [0, max vcpus). 282 /// * `vm` - The virtual machine this vcpu will get attached to. 283 /// * `vm_ops` - Optional object for exit handling. 284 pub fn new( 285 id: u8, 286 vm: &Arc<dyn hypervisor::Vm>, 287 vm_ops: Option<Arc<dyn VmOps>>, 288 ) -> Result<Self> { 289 let vcpu = vm 290 .create_vcpu(id, vm_ops) 291 .map_err(|e| Error::VcpuCreate(e.into()))?; 292 // Initially the cpuid per vCPU is the one supported by this VM. 293 Ok(Vcpu { 294 vcpu, 295 id, 296 #[cfg(target_arch = "aarch64")] 297 mpidr: 0, 298 saved_state: None, 299 }) 300 } 301 302 /// Configures a vcpu and should be called once per vcpu when created. 303 /// 304 /// # Arguments 305 /// 306 /// * `kernel_entry_point` - Kernel entry point address in guest memory and boot protocol used. 307 /// * `vm_memory` - Guest memory. 308 /// * `cpuid` - (x86_64) CpuId, wrapper over the `kvm_cpuid2` structure. 309 pub fn configure( 310 &mut self, 311 #[cfg(target_arch = "aarch64")] vm: &Arc<dyn hypervisor::Vm>, 312 kernel_entry_point: Option<EntryPoint>, 313 #[cfg(target_arch = "x86_64")] vm_memory: &GuestMemoryAtomic<GuestMemoryMmap>, 314 #[cfg(target_arch = "x86_64")] cpuid: Vec<CpuIdEntry>, 315 #[cfg(target_arch = "x86_64")] kvm_hyperv: bool, 316 ) -> Result<()> { 317 #[cfg(target_arch = "aarch64")] 318 { 319 self.init(vm)?; 320 self.mpidr = arch::configure_vcpu(&self.vcpu, self.id, kernel_entry_point) 321 .map_err(Error::VcpuConfiguration)?; 322 } 323 info!("Configuring vCPU: cpu_id = {}", self.id); 324 #[cfg(target_arch = "x86_64")] 325 arch::configure_vcpu( 326 &self.vcpu, 327 self.id, 328 kernel_entry_point, 329 vm_memory, 330 cpuid, 331 kvm_hyperv, 332 ) 333 .map_err(Error::VcpuConfiguration)?; 334 335 Ok(()) 336 } 337 338 /// Gets the MPIDR register value. 339 #[cfg(target_arch = "aarch64")] 340 pub fn get_mpidr(&self) -> u64 { 341 self.mpidr 342 } 343 344 /// Gets the saved vCPU state. 345 #[cfg(target_arch = "aarch64")] 346 pub fn get_saved_state(&self) -> Option<CpuState> { 347 self.saved_state.clone() 348 } 349 350 /// Initializes an aarch64 specific vcpu for booting Linux. 351 #[cfg(target_arch = "aarch64")] 352 pub fn init(&self, vm: &Arc<dyn hypervisor::Vm>) -> Result<()> { 353 let mut kvi: kvm_bindings::kvm_vcpu_init = kvm_bindings::kvm_vcpu_init::default(); 354 355 // This reads back the kernel's preferred target type. 356 vm.get_preferred_target(&mut kvi) 357 .map_err(Error::VcpuArmPreferredTarget)?; 358 // We already checked that the capability is supported. 359 kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_PSCI_0_2; 360 kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_PMU_V3; 361 // Non-boot cpus are powered off initially. 362 if self.id > 0 { 363 kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_POWER_OFF; 364 } 365 self.vcpu.vcpu_init(&kvi).map_err(Error::VcpuArmInit) 366 } 367 368 /// Runs the VCPU until it exits, returning the reason. 369 /// 370 /// Note that the state of the VCPU and associated VM must be setup first for this to do 371 /// anything useful. 372 pub fn run(&self) -> std::result::Result<VmExit, HypervisorCpuError> { 373 self.vcpu.run() 374 } 375 } 376 377 const VCPU_SNAPSHOT_ID: &str = "vcpu"; 378 impl Pausable for Vcpu {} 379 impl Snapshottable for Vcpu { 380 fn id(&self) -> String { 381 VCPU_SNAPSHOT_ID.to_string() 382 } 383 384 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 385 let saved_state = self 386 .vcpu 387 .state() 388 .map_err(|e| MigratableError::Pause(anyhow!("Could not get vCPU state {:?}", e)))?; 389 390 let mut vcpu_snapshot = Snapshot::new(&format!("{:03}", self.id)); 391 vcpu_snapshot.add_data_section(SnapshotDataSection::new_from_state( 392 VCPU_SNAPSHOT_ID, 393 &saved_state, 394 )?); 395 396 self.saved_state = Some(saved_state); 397 398 Ok(vcpu_snapshot) 399 } 400 401 fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> { 402 let saved_state: CpuState = snapshot.to_state(VCPU_SNAPSHOT_ID)?; 403 404 self.vcpu 405 .set_state(&saved_state) 406 .map_err(|e| MigratableError::Pause(anyhow!("Could not set the vCPU state {:?}", e)))?; 407 408 self.saved_state = Some(saved_state); 409 410 Ok(()) 411 } 412 } 413 414 pub struct CpuManager { 415 config: CpusConfig, 416 #[cfg_attr(target_arch = "aarch64", allow(dead_code))] 417 interrupt_controller: Option<Arc<Mutex<dyn InterruptController>>>, 418 #[cfg_attr(target_arch = "aarch64", allow(dead_code))] 419 vm_memory: GuestMemoryAtomic<GuestMemoryMmap>, 420 #[cfg(target_arch = "x86_64")] 421 cpuid: Vec<CpuIdEntry>, 422 #[cfg_attr(target_arch = "aarch64", allow(dead_code))] 423 vm: Arc<dyn hypervisor::Vm>, 424 vcpus_kill_signalled: Arc<AtomicBool>, 425 vcpus_pause_signalled: Arc<AtomicBool>, 426 exit_evt: EventFd, 427 #[cfg_attr(target_arch = "aarch64", allow(dead_code))] 428 reset_evt: EventFd, 429 #[cfg(feature = "gdb")] 430 vm_debug_evt: EventFd, 431 vcpu_states: Vec<VcpuState>, 432 selected_cpu: u8, 433 vcpus: Vec<Arc<Mutex<Vcpu>>>, 434 seccomp_action: SeccompAction, 435 vm_ops: Arc<dyn VmOps>, 436 #[cfg_attr(target_arch = "aarch64", allow(dead_code))] 437 acpi_address: Option<GuestAddress>, 438 proximity_domain_per_cpu: BTreeMap<u8, u32>, 439 affinity: BTreeMap<u8, Vec<u8>>, 440 dynamic: bool, 441 } 442 443 const CPU_ENABLE_FLAG: usize = 0; 444 const CPU_INSERTING_FLAG: usize = 1; 445 const CPU_REMOVING_FLAG: usize = 2; 446 const CPU_EJECT_FLAG: usize = 3; 447 448 const CPU_STATUS_OFFSET: u64 = 4; 449 const CPU_SELECTION_OFFSET: u64 = 0; 450 451 impl BusDevice for CpuManager { 452 fn read(&mut self, _base: u64, offset: u64, data: &mut [u8]) { 453 // The Linux kernel, quite reasonably, doesn't zero the memory it gives us. 454 data.fill(0); 455 456 match offset { 457 CPU_SELECTION_OFFSET => { 458 data[0] = self.selected_cpu; 459 } 460 CPU_STATUS_OFFSET => { 461 if self.selected_cpu < self.max_vcpus() { 462 let state = &self.vcpu_states[usize::from(self.selected_cpu)]; 463 if state.active() { 464 data[0] |= 1 << CPU_ENABLE_FLAG; 465 } 466 if state.inserting { 467 data[0] |= 1 << CPU_INSERTING_FLAG; 468 } 469 if state.removing { 470 data[0] |= 1 << CPU_REMOVING_FLAG; 471 } 472 } else { 473 warn!("Out of range vCPU id: {}", self.selected_cpu); 474 } 475 } 476 _ => { 477 warn!( 478 "Unexpected offset for accessing CPU manager device: {:#}", 479 offset 480 ); 481 } 482 } 483 } 484 485 fn write(&mut self, _base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> { 486 match offset { 487 CPU_SELECTION_OFFSET => { 488 self.selected_cpu = data[0]; 489 } 490 CPU_STATUS_OFFSET => { 491 if self.selected_cpu < self.max_vcpus() { 492 let state = &mut self.vcpu_states[usize::from(self.selected_cpu)]; 493 // The ACPI code writes back a 1 to acknowledge the insertion 494 if (data[0] & (1 << CPU_INSERTING_FLAG) == 1 << CPU_INSERTING_FLAG) 495 && state.inserting 496 { 497 state.inserting = false; 498 } 499 // Ditto for removal 500 if (data[0] & (1 << CPU_REMOVING_FLAG) == 1 << CPU_REMOVING_FLAG) 501 && state.removing 502 { 503 state.removing = false; 504 } 505 // Trigger removal of vCPU 506 if data[0] & (1 << CPU_EJECT_FLAG) == 1 << CPU_EJECT_FLAG { 507 if let Err(e) = self.remove_vcpu(self.selected_cpu) { 508 error!("Error removing vCPU: {:?}", e); 509 } 510 } 511 } else { 512 warn!("Out of range vCPU id: {}", self.selected_cpu); 513 } 514 } 515 _ => { 516 warn!( 517 "Unexpected offset for accessing CPU manager device: {:#}", 518 offset 519 ); 520 } 521 } 522 None 523 } 524 } 525 526 #[derive(Default)] 527 struct VcpuState { 528 inserting: bool, 529 removing: bool, 530 handle: Option<thread::JoinHandle<()>>, 531 kill: Arc<AtomicBool>, 532 vcpu_run_interrupted: Arc<AtomicBool>, 533 } 534 535 impl VcpuState { 536 fn active(&self) -> bool { 537 self.handle.is_some() 538 } 539 540 fn signal_thread(&self) { 541 if let Some(handle) = self.handle.as_ref() { 542 loop { 543 unsafe { 544 libc::pthread_kill(handle.as_pthread_t() as _, SIGRTMIN()); 545 } 546 if self.vcpu_run_interrupted.load(Ordering::SeqCst) { 547 break; 548 } else { 549 // This is more effective than thread::yield_now() at 550 // avoiding a priority inversion with the vCPU thread 551 thread::sleep(std::time::Duration::from_millis(1)); 552 } 553 } 554 } 555 } 556 557 fn join_thread(&mut self) -> Result<()> { 558 if let Some(handle) = self.handle.take() { 559 handle.join().map_err(Error::ThreadCleanup)? 560 } 561 562 Ok(()) 563 } 564 565 fn unpark_thread(&self) { 566 if let Some(handle) = self.handle.as_ref() { 567 handle.thread().unpark() 568 } 569 } 570 } 571 572 impl CpuManager { 573 #[allow(unused_variables)] 574 #[allow(clippy::too_many_arguments)] 575 pub fn new( 576 config: &CpusConfig, 577 device_manager: &Arc<Mutex<DeviceManager>>, 578 memory_manager: &Arc<Mutex<MemoryManager>>, 579 vm: Arc<dyn hypervisor::Vm>, 580 exit_evt: EventFd, 581 reset_evt: EventFd, 582 #[cfg(feature = "gdb")] vm_debug_evt: EventFd, 583 hypervisor: Arc<dyn hypervisor::Hypervisor>, 584 seccomp_action: SeccompAction, 585 vm_ops: Arc<dyn VmOps>, 586 #[cfg(feature = "tdx")] tdx_enabled: bool, 587 numa_nodes: &NumaNodes, 588 ) -> Result<Arc<Mutex<CpuManager>>> { 589 let guest_memory = memory_manager.lock().unwrap().guest_memory(); 590 let mut vcpu_states = Vec::with_capacity(usize::from(config.max_vcpus)); 591 vcpu_states.resize_with(usize::from(config.max_vcpus), VcpuState::default); 592 593 #[cfg(target_arch = "x86_64")] 594 let sgx_epc_sections = memory_manager 595 .lock() 596 .unwrap() 597 .sgx_epc_region() 598 .as_ref() 599 .map(|sgx_epc_region| sgx_epc_region.epc_sections().values().cloned().collect()); 600 #[cfg(target_arch = "x86_64")] 601 let cpuid = { 602 let phys_bits = physical_bits(config.max_phys_bits); 603 arch::generate_common_cpuid( 604 hypervisor, 605 config 606 .topology 607 .clone() 608 .map(|t| (t.threads_per_core, t.cores_per_die, t.dies_per_package)), 609 sgx_epc_sections, 610 phys_bits, 611 config.kvm_hyperv, 612 #[cfg(feature = "tdx")] 613 tdx_enabled, 614 ) 615 .map_err(Error::CommonCpuId)? 616 }; 617 #[cfg(all(feature = "amx", target_arch = "x86_64"))] 618 if config.features.amx { 619 const ARCH_GET_XCOMP_GUEST_PERM: usize = 0x1024; 620 const ARCH_REQ_XCOMP_GUEST_PERM: usize = 0x1025; 621 const XFEATURE_XTILEDATA: usize = 18; 622 const XFEATURE_XTILEDATA_MASK: usize = 1 << XFEATURE_XTILEDATA; 623 624 // This is safe as the syscall is only modifing kernel internal 625 // data structures that the kernel is itself expected to safeguard. 626 let amx_tile = unsafe { 627 libc::syscall( 628 libc::SYS_arch_prctl, 629 ARCH_REQ_XCOMP_GUEST_PERM, 630 XFEATURE_XTILEDATA, 631 ) 632 }; 633 634 if amx_tile != 0 { 635 return Err(Error::AmxEnable(anyhow!("Guest AMX usage not supported"))); 636 } else { 637 // This is safe as the mask being modified (not marked mutable as it is 638 // modified in unsafe only which is permitted) isn't in use elsewhere. 639 let mask: usize = 0; 640 let result = unsafe { 641 libc::syscall(libc::SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &mask) 642 }; 643 if result != 0 || (mask & XFEATURE_XTILEDATA_MASK) != XFEATURE_XTILEDATA_MASK { 644 return Err(Error::AmxEnable(anyhow!("Guest AMX usage not supported"))); 645 } 646 } 647 } 648 649 let device_manager = device_manager.lock().unwrap(); 650 651 let proximity_domain_per_cpu: BTreeMap<u8, u32> = { 652 let mut cpu_list = Vec::new(); 653 for (proximity_domain, numa_node) in numa_nodes.iter() { 654 for cpu in numa_node.cpus.iter() { 655 cpu_list.push((*cpu, *proximity_domain)) 656 } 657 } 658 cpu_list 659 } 660 .into_iter() 661 .collect(); 662 663 let affinity = if let Some(cpu_affinity) = config.affinity.as_ref() { 664 cpu_affinity 665 .iter() 666 .map(|a| (a.vcpu, a.host_cpus.clone())) 667 .collect() 668 } else { 669 BTreeMap::new() 670 }; 671 672 #[cfg(feature = "tdx")] 673 let dynamic = !tdx_enabled; 674 #[cfg(not(feature = "tdx"))] 675 let dynamic = true; 676 677 let acpi_address = if dynamic { 678 Some( 679 device_manager 680 .allocator() 681 .lock() 682 .unwrap() 683 .allocate_platform_mmio_addresses(None, CPU_MANAGER_ACPI_SIZE as u64, None) 684 .ok_or(Error::AllocateMmmioAddress)?, 685 ) 686 } else { 687 None 688 }; 689 690 let cpu_manager = Arc::new(Mutex::new(CpuManager { 691 config: config.clone(), 692 interrupt_controller: device_manager.interrupt_controller().clone(), 693 vm_memory: guest_memory, 694 #[cfg(target_arch = "x86_64")] 695 cpuid, 696 vm, 697 vcpus_kill_signalled: Arc::new(AtomicBool::new(false)), 698 vcpus_pause_signalled: Arc::new(AtomicBool::new(false)), 699 vcpu_states, 700 exit_evt, 701 reset_evt, 702 #[cfg(feature = "gdb")] 703 vm_debug_evt, 704 selected_cpu: 0, 705 vcpus: Vec::with_capacity(usize::from(config.max_vcpus)), 706 seccomp_action, 707 vm_ops, 708 acpi_address, 709 proximity_domain_per_cpu, 710 affinity, 711 dynamic, 712 })); 713 714 if let Some(acpi_address) = acpi_address { 715 device_manager 716 .mmio_bus() 717 .insert( 718 cpu_manager.clone(), 719 acpi_address.0, 720 CPU_MANAGER_ACPI_SIZE as u64, 721 ) 722 .map_err(Error::BusError)?; 723 } 724 725 Ok(cpu_manager) 726 } 727 728 fn create_vcpu( 729 &mut self, 730 cpu_id: u8, 731 entry_point: Option<EntryPoint>, 732 snapshot: Option<Snapshot>, 733 ) -> Result<()> { 734 info!("Creating vCPU: cpu_id = {}", cpu_id); 735 736 let mut vcpu = Vcpu::new(cpu_id, &self.vm, Some(self.vm_ops.clone()))?; 737 738 if let Some(snapshot) = snapshot { 739 // AArch64 vCPUs should be initialized after created. 740 #[cfg(target_arch = "aarch64")] 741 vcpu.init(&self.vm)?; 742 743 vcpu.restore(snapshot).expect("Failed to restore vCPU"); 744 } else { 745 #[cfg(target_arch = "x86_64")] 746 vcpu.configure( 747 entry_point, 748 &self.vm_memory, 749 self.cpuid.clone(), 750 self.config.kvm_hyperv, 751 ) 752 .expect("Failed to configure vCPU"); 753 754 #[cfg(target_arch = "aarch64")] 755 vcpu.configure(&self.vm, entry_point) 756 .expect("Failed to configure vCPU"); 757 } 758 759 // Adding vCPU to the CpuManager's vCPU list. 760 let vcpu = Arc::new(Mutex::new(vcpu)); 761 self.vcpus.push(vcpu); 762 763 Ok(()) 764 } 765 766 /// Only create new vCPUs if there aren't any inactive ones to reuse 767 fn create_vcpus(&mut self, desired_vcpus: u8, entry_point: Option<EntryPoint>) -> Result<()> { 768 info!( 769 "Request to create new vCPUs: desired = {}, max = {}, allocated = {}, present = {}", 770 desired_vcpus, 771 self.config.max_vcpus, 772 self.vcpus.len(), 773 self.present_vcpus() 774 ); 775 776 if desired_vcpus > self.config.max_vcpus { 777 return Err(Error::DesiredVCpuCountExceedsMax); 778 } 779 780 // Only create vCPUs in excess of all the allocated vCPUs. 781 for cpu_id in self.vcpus.len() as u8..desired_vcpus { 782 self.create_vcpu(cpu_id, entry_point, None)?; 783 } 784 785 Ok(()) 786 } 787 788 #[cfg(target_arch = "aarch64")] 789 pub fn init_pmu(&self, irq: u32) -> Result<bool> { 790 let cpu_attr = kvm_bindings::kvm_device_attr { 791 group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL, 792 attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_INIT), 793 addr: 0x0, 794 flags: 0, 795 }; 796 797 for cpu in self.vcpus.iter() { 798 let tmp = irq; 799 let cpu_attr_irq = kvm_bindings::kvm_device_attr { 800 group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL, 801 attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_IRQ), 802 addr: &tmp as *const u32 as u64, 803 flags: 0, 804 }; 805 806 // Check if PMU attr is available, if not, log the information. 807 if cpu.lock().unwrap().vcpu.has_vcpu_attr(&cpu_attr).is_ok() { 808 // Set irq for PMU 809 cpu.lock() 810 .unwrap() 811 .vcpu 812 .set_vcpu_attr(&cpu_attr_irq) 813 .map_err(Error::InitPmu)?; 814 815 // Init PMU 816 cpu.lock() 817 .unwrap() 818 .vcpu 819 .set_vcpu_attr(&cpu_attr) 820 .map_err(Error::InitPmu)?; 821 } else { 822 debug!( 823 "PMU attribute is not supported in vCPU{}, skip PMU init!", 824 cpu.lock().unwrap().id 825 ); 826 return Ok(false); 827 } 828 } 829 830 Ok(true) 831 } 832 833 fn start_vcpu( 834 &mut self, 835 vcpu: Arc<Mutex<Vcpu>>, 836 vcpu_id: u8, 837 vcpu_thread_barrier: Arc<Barrier>, 838 inserting: bool, 839 ) -> Result<()> { 840 let reset_evt = self.reset_evt.try_clone().unwrap(); 841 let exit_evt = self.exit_evt.try_clone().unwrap(); 842 #[cfg(feature = "gdb")] 843 let vm_debug_evt = self.vm_debug_evt.try_clone().unwrap(); 844 let panic_exit_evt = self.exit_evt.try_clone().unwrap(); 845 let vcpu_kill_signalled = self.vcpus_kill_signalled.clone(); 846 let vcpu_pause_signalled = self.vcpus_pause_signalled.clone(); 847 848 let vcpu_kill = self.vcpu_states[usize::from(vcpu_id)].kill.clone(); 849 let vcpu_run_interrupted = self.vcpu_states[usize::from(vcpu_id)] 850 .vcpu_run_interrupted 851 .clone(); 852 let panic_vcpu_run_interrupted = vcpu_run_interrupted.clone(); 853 854 // Prepare the CPU set the current vCPU is expected to run onto. 855 let cpuset = self.affinity.get(&vcpu_id).map(|host_cpus| { 856 let mut cpuset: libc::cpu_set_t = unsafe { std::mem::zeroed() }; 857 unsafe { libc::CPU_ZERO(&mut cpuset) }; 858 for host_cpu in host_cpus { 859 unsafe { libc::CPU_SET(*host_cpu as usize, &mut cpuset) }; 860 } 861 cpuset 862 }); 863 864 // Retrieve seccomp filter for vcpu thread 865 let vcpu_seccomp_filter = get_seccomp_filter(&self.seccomp_action, Thread::Vcpu) 866 .map_err(Error::CreateSeccompFilter)?; 867 868 #[cfg(target_arch = "x86_64")] 869 let interrupt_controller_clone = self.interrupt_controller.as_ref().cloned(); 870 871 info!("Starting vCPU: cpu_id = {}", vcpu_id); 872 873 let handle = Some( 874 thread::Builder::new() 875 .name(format!("vcpu{}", vcpu_id)) 876 .spawn(move || { 877 // Schedule the thread to run on the expected CPU set 878 if let Some(cpuset) = cpuset.as_ref() { 879 let ret = unsafe { 880 libc::sched_setaffinity( 881 0, 882 std::mem::size_of::<libc::cpu_set_t>(), 883 cpuset as *const libc::cpu_set_t, 884 ) 885 }; 886 887 if ret != 0 { 888 error!( 889 "Failed scheduling the vCPU {} on the expected CPU set: {}", 890 vcpu_id, 891 io::Error::last_os_error() 892 ); 893 return; 894 } 895 } 896 897 // Apply seccomp filter for vcpu thread. 898 if !vcpu_seccomp_filter.is_empty() { 899 if let Err(e) = 900 apply_filter(&vcpu_seccomp_filter).map_err(Error::ApplySeccompFilter) 901 { 902 error!("Error applying seccomp filter: {:?}", e); 903 return; 904 } 905 } 906 extern "C" fn handle_signal(_: i32, _: *mut siginfo_t, _: *mut c_void) {} 907 // This uses an async signal safe handler to kill the vcpu handles. 908 register_signal_handler(SIGRTMIN(), handle_signal) 909 .expect("Failed to register vcpu signal handler"); 910 // Block until all CPUs are ready. 911 vcpu_thread_barrier.wait(); 912 913 std::panic::catch_unwind(move || { 914 loop { 915 // If we are being told to pause, we park the thread 916 // until the pause boolean is toggled. 917 // The resume operation is responsible for toggling 918 // the boolean and unpark the thread. 919 // We enter a loop because park() could spuriously 920 // return. We will then park() again unless the 921 // pause boolean has been toggled. 922 923 // Need to use Ordering::SeqCst as we have multiple 924 // loads and stores to different atomics and we need 925 // to see them in a consistent order in all threads 926 927 if vcpu_pause_signalled.load(Ordering::SeqCst) { 928 // As a pause can be caused by PIO & MMIO exits then we need to ensure they are 929 // completed by returning to KVM_RUN. From the kernel docs: 930 // 931 // For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_PAPR, KVM_EXIT_XEN, 932 // KVM_EXIT_EPR, KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR the corresponding 933 // operations are complete (and guest state is consistent) only after userspace 934 // has re-entered the kernel with KVM_RUN. The kernel side will first finish 935 // incomplete operations and then check for pending signals. 936 // The pending state of the operation is not preserved in state which is 937 // visible to userspace, thus userspace should ensure that the operation is 938 // completed before performing a live migration. Userspace can re-enter the 939 // guest with an unmasked signal pending or with the immediate_exit field set 940 // to complete pending operations without allowing any further instructions 941 // to be executed. 942 943 #[cfg(feature = "kvm")] 944 { 945 vcpu.lock().as_ref().unwrap().vcpu.set_immediate_exit(true); 946 if !matches!(vcpu.lock().unwrap().run(), Ok(VmExit::Ignore)) { 947 error!("Unexpected VM exit on \"immediate_exit\" run"); 948 break; 949 } 950 vcpu.lock().as_ref().unwrap().vcpu.set_immediate_exit(false); 951 } 952 953 vcpu_run_interrupted.store(true, Ordering::SeqCst); 954 while vcpu_pause_signalled.load(Ordering::SeqCst) { 955 thread::park(); 956 } 957 vcpu_run_interrupted.store(false, Ordering::SeqCst); 958 } 959 960 // We've been told to terminate 961 if vcpu_kill_signalled.load(Ordering::SeqCst) 962 || vcpu_kill.load(Ordering::SeqCst) 963 { 964 vcpu_run_interrupted.store(true, Ordering::SeqCst); 965 break; 966 } 967 968 #[cfg(feature = "tdx")] 969 let mut vcpu = vcpu.lock().unwrap(); 970 #[cfg(not(feature = "tdx"))] 971 let vcpu = vcpu.lock().unwrap(); 972 // vcpu.run() returns false on a triple-fault so trigger a reset 973 match vcpu.run() { 974 Ok(run) => match run { 975 #[cfg(all(target_arch = "x86_64", feature = "kvm"))] 976 VmExit::Debug => { 977 info!("VmExit::Debug"); 978 #[cfg(feature = "gdb")] 979 { 980 vcpu_pause_signalled.store(true, Ordering::SeqCst); 981 let raw_tid = get_raw_tid(vcpu_id as usize); 982 vm_debug_evt.write(raw_tid as u64).unwrap(); 983 } 984 } 985 #[cfg(target_arch = "x86_64")] 986 VmExit::IoapicEoi(vector) => { 987 if let Some(interrupt_controller) = 988 &interrupt_controller_clone 989 { 990 interrupt_controller 991 .lock() 992 .unwrap() 993 .end_of_interrupt(vector); 994 } 995 } 996 VmExit::Ignore => {} 997 VmExit::Hyperv => {} 998 VmExit::Reset => { 999 info!("VmExit::Reset"); 1000 vcpu_run_interrupted.store(true, Ordering::SeqCst); 1001 reset_evt.write(1).unwrap(); 1002 break; 1003 } 1004 VmExit::Shutdown => { 1005 info!("VmExit::Shutdown"); 1006 vcpu_run_interrupted.store(true, Ordering::SeqCst); 1007 exit_evt.write(1).unwrap(); 1008 break; 1009 } 1010 #[cfg(feature = "tdx")] 1011 VmExit::Tdx => { 1012 if let Some(vcpu) = Arc::get_mut(&mut vcpu.vcpu) { 1013 match vcpu.get_tdx_exit_details() { 1014 Ok(details) => match details { 1015 TdxExitDetails::GetQuote => warn!("TDG_VP_VMCALL_GET_QUOTE not supported"), 1016 TdxExitDetails::SetupEventNotifyInterrupt => { 1017 warn!("TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT not supported") 1018 } 1019 }, 1020 Err(e) => error!("Unexpected TDX VMCALL: {}", e), 1021 } 1022 vcpu.set_tdx_status(TdxExitStatus::InvalidOperand); 1023 } else { 1024 // We should never reach this code as 1025 // this means the design from the code 1026 // is wrong. 1027 unreachable!("Couldn't get a mutable reference from Arc<dyn Vcpu> as there are multiple instances"); 1028 } 1029 } 1030 _ => { 1031 error!( 1032 "VCPU generated error: {:?}", 1033 Error::UnexpectedVmExit 1034 ); 1035 break; 1036 } 1037 }, 1038 1039 Err(e) => { 1040 error!("VCPU generated error: {:?}", Error::VcpuRun(e.into())); 1041 break; 1042 } 1043 } 1044 1045 // We've been told to terminate 1046 if vcpu_kill_signalled.load(Ordering::SeqCst) 1047 || vcpu_kill.load(Ordering::SeqCst) 1048 { 1049 vcpu_run_interrupted.store(true, Ordering::SeqCst); 1050 break; 1051 } 1052 } 1053 }) 1054 .or_else(|_| { 1055 panic_vcpu_run_interrupted.store(true, Ordering::SeqCst); 1056 error!("vCPU thread panicked"); 1057 panic_exit_evt.write(1) 1058 }) 1059 .ok(); 1060 }) 1061 .map_err(Error::VcpuSpawn)?, 1062 ); 1063 1064 // On hot plug calls into this function entry_point is None. It is for 1065 // those hotplug CPU additions that we need to set the inserting flag. 1066 self.vcpu_states[usize::from(vcpu_id)].handle = handle; 1067 self.vcpu_states[usize::from(vcpu_id)].inserting = inserting; 1068 1069 Ok(()) 1070 } 1071 1072 /// Start up as many vCPUs threads as needed to reach `desired_vcpus` 1073 fn activate_vcpus(&mut self, desired_vcpus: u8, inserting: bool) -> Result<()> { 1074 if desired_vcpus > self.config.max_vcpus { 1075 return Err(Error::DesiredVCpuCountExceedsMax); 1076 } 1077 1078 let vcpu_thread_barrier = Arc::new(Barrier::new( 1079 (desired_vcpus - self.present_vcpus() + 1) as usize, 1080 )); 1081 1082 info!( 1083 "Starting vCPUs: desired = {}, allocated = {}, present = {}", 1084 desired_vcpus, 1085 self.vcpus.len(), 1086 self.present_vcpus() 1087 ); 1088 1089 // This reuses any inactive vCPUs as well as any that were newly created 1090 for vcpu_id in self.present_vcpus()..desired_vcpus { 1091 let vcpu = Arc::clone(&self.vcpus[vcpu_id as usize]); 1092 self.start_vcpu(vcpu, vcpu_id, vcpu_thread_barrier.clone(), inserting)?; 1093 } 1094 1095 // Unblock all CPU threads. 1096 vcpu_thread_barrier.wait(); 1097 Ok(()) 1098 } 1099 1100 fn mark_vcpus_for_removal(&mut self, desired_vcpus: u8) { 1101 // Mark vCPUs for removal, actual removal happens on ejection 1102 for cpu_id in desired_vcpus..self.present_vcpus() { 1103 self.vcpu_states[usize::from(cpu_id)].removing = true; 1104 } 1105 } 1106 1107 fn remove_vcpu(&mut self, cpu_id: u8) -> Result<()> { 1108 info!("Removing vCPU: cpu_id = {}", cpu_id); 1109 let mut state = &mut self.vcpu_states[usize::from(cpu_id)]; 1110 state.kill.store(true, Ordering::SeqCst); 1111 state.signal_thread(); 1112 state.join_thread()?; 1113 state.handle = None; 1114 1115 // Once the thread has exited, clear the "kill" so that it can reused 1116 state.kill.store(false, Ordering::SeqCst); 1117 1118 Ok(()) 1119 } 1120 1121 pub fn create_boot_vcpus(&mut self, entry_point: Option<EntryPoint>) -> Result<()> { 1122 self.create_vcpus(self.boot_vcpus(), entry_point) 1123 } 1124 1125 // Starts all the vCPUs that the VM is booting with. Blocks until all vCPUs are running. 1126 pub fn start_boot_vcpus(&mut self) -> Result<()> { 1127 self.activate_vcpus(self.boot_vcpus(), false) 1128 } 1129 1130 pub fn start_restored_vcpus(&mut self) -> Result<()> { 1131 let vcpu_numbers = self.vcpus.len() as u8; 1132 let vcpu_thread_barrier = Arc::new(Barrier::new((vcpu_numbers + 1) as usize)); 1133 // Restore the vCPUs in "paused" state. 1134 self.vcpus_pause_signalled.store(true, Ordering::SeqCst); 1135 1136 for vcpu_id in 0..vcpu_numbers { 1137 let vcpu = Arc::clone(&self.vcpus[vcpu_id as usize]); 1138 1139 self.start_vcpu(vcpu, vcpu_id, vcpu_thread_barrier.clone(), false) 1140 .map_err(|e| { 1141 Error::StartRestoreVcpu(anyhow!("Failed to start restored vCPUs: {:#?}", e)) 1142 })?; 1143 } 1144 // Unblock all restored CPU threads. 1145 vcpu_thread_barrier.wait(); 1146 Ok(()) 1147 } 1148 1149 pub fn resize(&mut self, desired_vcpus: u8) -> Result<bool> { 1150 if desired_vcpus.cmp(&self.present_vcpus()) == cmp::Ordering::Equal { 1151 return Ok(false); 1152 } 1153 1154 if !self.dynamic { 1155 return Ok(false); 1156 } 1157 1158 match desired_vcpus.cmp(&self.present_vcpus()) { 1159 cmp::Ordering::Greater => { 1160 self.create_vcpus(desired_vcpus, None)?; 1161 self.activate_vcpus(desired_vcpus, true)?; 1162 Ok(true) 1163 } 1164 cmp::Ordering::Less => { 1165 self.mark_vcpus_for_removal(desired_vcpus); 1166 Ok(true) 1167 } 1168 _ => Ok(false), 1169 } 1170 } 1171 1172 pub fn shutdown(&mut self) -> Result<()> { 1173 // Tell the vCPUs to stop themselves next time they go through the loop 1174 self.vcpus_kill_signalled.store(true, Ordering::SeqCst); 1175 1176 // Toggle the vCPUs pause boolean 1177 self.vcpus_pause_signalled.store(false, Ordering::SeqCst); 1178 1179 // Unpark all the VCPU threads. 1180 for state in self.vcpu_states.iter() { 1181 state.unpark_thread(); 1182 } 1183 1184 // Signal to the spawned threads (vCPUs and console signal handler). For the vCPU threads 1185 // this will interrupt the KVM_RUN ioctl() allowing the loop to check the boolean set 1186 // above. 1187 for state in self.vcpu_states.iter() { 1188 state.signal_thread(); 1189 } 1190 1191 // Wait for all the threads to finish. This removes the state from the vector. 1192 for mut state in self.vcpu_states.drain(..) { 1193 state.join_thread()?; 1194 } 1195 1196 Ok(()) 1197 } 1198 1199 #[cfg(feature = "tdx")] 1200 pub fn initialize_tdx(&self, hob_address: u64) -> Result<()> { 1201 for vcpu in &self.vcpus { 1202 vcpu.lock() 1203 .unwrap() 1204 .vcpu 1205 .tdx_init(hob_address) 1206 .map_err(Error::InitializeTdx)?; 1207 } 1208 Ok(()) 1209 } 1210 1211 pub fn boot_vcpus(&self) -> u8 { 1212 self.config.boot_vcpus 1213 } 1214 1215 pub fn max_vcpus(&self) -> u8 { 1216 self.config.max_vcpus 1217 } 1218 1219 #[cfg(target_arch = "x86_64")] 1220 pub fn common_cpuid(&self) -> Vec<CpuIdEntry> { 1221 self.cpuid.clone() 1222 } 1223 1224 fn present_vcpus(&self) -> u8 { 1225 self.vcpu_states 1226 .iter() 1227 .fold(0, |acc, state| acc + state.active() as u8) 1228 } 1229 1230 #[cfg(target_arch = "aarch64")] 1231 pub fn get_mpidrs(&self) -> Vec<u64> { 1232 self.vcpus 1233 .iter() 1234 .map(|cpu| cpu.lock().unwrap().get_mpidr()) 1235 .collect() 1236 } 1237 1238 #[cfg(target_arch = "aarch64")] 1239 pub fn get_saved_states(&self) -> Vec<CpuState> { 1240 self.vcpus 1241 .iter() 1242 .map(|cpu| cpu.lock().unwrap().get_saved_state().unwrap()) 1243 .collect() 1244 } 1245 1246 #[cfg(target_arch = "aarch64")] 1247 pub fn get_vcpu_topology(&self) -> Option<(u8, u8, u8)> { 1248 self.config 1249 .topology 1250 .clone() 1251 .map(|t| (t.threads_per_core, t.cores_per_die, t.packages)) 1252 } 1253 1254 pub fn create_madt(&self) -> Sdt { 1255 use crate::acpi; 1256 // This is also checked in the commandline parsing. 1257 assert!(self.config.boot_vcpus <= self.config.max_vcpus); 1258 1259 let mut madt = Sdt::new(*b"APIC", 44, 5, *b"CLOUDH", *b"CHMADT ", 1); 1260 #[cfg(target_arch = "x86_64")] 1261 { 1262 madt.write(36, arch::layout::APIC_START); 1263 1264 for cpu in 0..self.config.max_vcpus { 1265 let lapic = LocalApic { 1266 r#type: acpi::ACPI_APIC_PROCESSOR, 1267 length: 8, 1268 processor_id: cpu, 1269 apic_id: cpu, 1270 flags: if cpu < self.config.boot_vcpus { 1271 1 << MADT_CPU_ENABLE_FLAG 1272 } else { 1273 0 1274 } | 1 << MADT_CPU_ONLINE_CAPABLE_FLAG, 1275 }; 1276 madt.append(lapic); 1277 } 1278 1279 madt.append(Ioapic { 1280 r#type: acpi::ACPI_APIC_IO, 1281 length: 12, 1282 ioapic_id: 0, 1283 apic_address: arch::layout::IOAPIC_START.0 as u32, 1284 gsi_base: 0, 1285 ..Default::default() 1286 }); 1287 1288 madt.append(InterruptSourceOverride { 1289 r#type: acpi::ACPI_APIC_XRUPT_OVERRIDE, 1290 length: 10, 1291 bus: 0, 1292 source: 4, 1293 gsi: 4, 1294 flags: 0, 1295 }); 1296 } 1297 1298 #[cfg(target_arch = "aarch64")] 1299 { 1300 use vm_memory::Address; 1301 /* Notes: 1302 * Ignore Local Interrupt Controller Address at byte offset 36 of MADT table. 1303 */ 1304 1305 // See section 5.2.12.14 GIC CPU Interface (GICC) Structure in ACPI spec. 1306 for cpu in 0..self.config.boot_vcpus { 1307 let vcpu = &self.vcpus[cpu as usize]; 1308 let mpidr = vcpu.lock().unwrap().get_mpidr(); 1309 /* ARMv8 MPIDR format: 1310 Bits [63:40] Must be zero 1311 Bits [39:32] Aff3 : Match Aff3 of target processor MPIDR 1312 Bits [31:24] Must be zero 1313 Bits [23:16] Aff2 : Match Aff2 of target processor MPIDR 1314 Bits [15:8] Aff1 : Match Aff1 of target processor MPIDR 1315 Bits [7:0] Aff0 : Match Aff0 of target processor MPIDR 1316 */ 1317 let mpidr_mask = 0xff_00ff_ffff; 1318 let gicc = GicC { 1319 r#type: acpi::ACPI_APIC_GENERIC_CPU_INTERFACE, 1320 length: 80, 1321 reserved0: 0, 1322 cpu_interface_number: cpu as u32, 1323 uid: cpu as u32, 1324 flags: 1, 1325 parking_version: 0, 1326 performance_interrupt: 0, 1327 parked_address: 0, 1328 base_address: 0, 1329 gicv_base_address: 0, 1330 gich_base_address: 0, 1331 vgic_interrupt: 0, 1332 gicr_base_address: 0, 1333 mpidr: mpidr & mpidr_mask, 1334 proc_power_effi_class: 0, 1335 reserved1: 0, 1336 spe_overflow_interrupt: 0, 1337 }; 1338 1339 madt.append(gicc); 1340 } 1341 1342 // GIC Distributor structure. See section 5.2.12.15 in ACPI spec. 1343 let gicd = GicD { 1344 r#type: acpi::ACPI_APIC_GENERIC_DISTRIBUTOR, 1345 length: 24, 1346 reserved0: 0, 1347 gic_id: 0, 1348 base_address: arch::layout::MAPPED_IO_START.raw_value() - 0x0001_0000, 1349 global_irq_base: 0, 1350 version: 3, 1351 reserved1: [0; 3], 1352 }; 1353 madt.append(gicd); 1354 1355 // See 5.2.12.17 GIC Redistributor (GICR) Structure in ACPI spec. 1356 let gicr_size: u32 = 0x0001_0000 * 2 * (self.config.boot_vcpus as u32); 1357 let gicr_base: u64 = 1358 arch::layout::MAPPED_IO_START.raw_value() - 0x0001_0000 - gicr_size as u64; 1359 let gicr = GicR { 1360 r#type: acpi::ACPI_APIC_GENERIC_REDISTRIBUTOR, 1361 length: 16, 1362 reserved: 0, 1363 base_address: gicr_base, 1364 range_length: gicr_size, 1365 }; 1366 madt.append(gicr); 1367 1368 // See 5.2.12.18 GIC Interrupt Translation Service (ITS) Structure in ACPI spec. 1369 let gicits = GicIts { 1370 r#type: acpi::ACPI_APIC_GENERIC_TRANSLATOR, 1371 length: 20, 1372 reserved0: 0, 1373 translation_id: 0, 1374 base_address: gicr_base - 2 * 0x0001_0000, 1375 reserved1: 0, 1376 }; 1377 madt.append(gicits); 1378 1379 madt.update_checksum(); 1380 } 1381 1382 madt 1383 } 1384 1385 #[cfg(target_arch = "aarch64")] 1386 pub fn create_pptt(&self) -> Sdt { 1387 let pptt_start = 0; 1388 let mut cpus = 0; 1389 let mut uid = 0; 1390 // If topology is not specified, the default setting is: 1391 // 1 package, multiple cores, 1 thread per core 1392 // This is also the behavior when PPTT is missing. 1393 let (threads_per_core, cores_per_package, packages) = 1394 self.get_vcpu_topology().unwrap_or((1, self.max_vcpus(), 1)); 1395 1396 let mut pptt = Sdt::new(*b"PPTT", 36, 2, *b"CLOUDH", *b"CHPPTT ", 1); 1397 1398 for cluster_idx in 0..packages { 1399 if cpus < self.config.boot_vcpus as usize { 1400 let cluster_offset = pptt.len() - pptt_start; 1401 let cluster_hierarchy_node = ProcessorHierarchyNode { 1402 r#type: 0, 1403 length: 20, 1404 reserved: 0, 1405 flags: 0x2, 1406 parent: 0, 1407 acpi_processor_id: cluster_idx as u32, 1408 num_private_resources: 0, 1409 }; 1410 pptt.append(cluster_hierarchy_node); 1411 1412 for core_idx in 0..cores_per_package { 1413 let core_offset = pptt.len() - pptt_start; 1414 1415 if threads_per_core > 1 { 1416 let core_hierarchy_node = ProcessorHierarchyNode { 1417 r#type: 0, 1418 length: 20, 1419 reserved: 0, 1420 flags: 0x2, 1421 parent: cluster_offset as u32, 1422 acpi_processor_id: core_idx as u32, 1423 num_private_resources: 0, 1424 }; 1425 pptt.append(core_hierarchy_node); 1426 1427 for _thread_idx in 0..threads_per_core { 1428 let thread_hierarchy_node = ProcessorHierarchyNode { 1429 r#type: 0, 1430 length: 20, 1431 reserved: 0, 1432 flags: 0xE, 1433 parent: core_offset as u32, 1434 acpi_processor_id: uid as u32, 1435 num_private_resources: 0, 1436 }; 1437 pptt.append(thread_hierarchy_node); 1438 uid += 1; 1439 } 1440 } else { 1441 let thread_hierarchy_node = ProcessorHierarchyNode { 1442 r#type: 0, 1443 length: 20, 1444 reserved: 0, 1445 flags: 0xA, 1446 parent: cluster_offset as u32, 1447 acpi_processor_id: uid as u32, 1448 num_private_resources: 0, 1449 }; 1450 pptt.append(thread_hierarchy_node); 1451 uid += 1; 1452 } 1453 } 1454 cpus += (cores_per_package * threads_per_core) as usize; 1455 } 1456 } 1457 1458 pptt.update_checksum(); 1459 pptt 1460 } 1461 1462 #[cfg(all(target_arch = "x86_64", feature = "gdb"))] 1463 fn get_regs(&self, cpu_id: u8) -> Result<StandardRegisters> { 1464 self.vcpus[usize::from(cpu_id)] 1465 .lock() 1466 .unwrap() 1467 .vcpu 1468 .get_regs() 1469 .map_err(Error::CpuDebug) 1470 } 1471 1472 #[cfg(all(target_arch = "x86_64", feature = "gdb"))] 1473 fn set_regs(&self, cpu_id: u8, regs: &StandardRegisters) -> Result<()> { 1474 self.vcpus[usize::from(cpu_id)] 1475 .lock() 1476 .unwrap() 1477 .vcpu 1478 .set_regs(regs) 1479 .map_err(Error::CpuDebug) 1480 } 1481 1482 #[cfg(all(target_arch = "x86_64", feature = "gdb"))] 1483 fn get_sregs(&self, cpu_id: u8) -> Result<SpecialRegisters> { 1484 self.vcpus[usize::from(cpu_id)] 1485 .lock() 1486 .unwrap() 1487 .vcpu 1488 .get_sregs() 1489 .map_err(Error::CpuDebug) 1490 } 1491 1492 #[cfg(all(target_arch = "x86_64", feature = "gdb"))] 1493 fn set_sregs(&self, cpu_id: u8, sregs: &SpecialRegisters) -> Result<()> { 1494 self.vcpus[usize::from(cpu_id)] 1495 .lock() 1496 .unwrap() 1497 .vcpu 1498 .set_sregs(sregs) 1499 .map_err(Error::CpuDebug) 1500 } 1501 1502 #[cfg(all(target_arch = "x86_64", feature = "gdb"))] 1503 fn translate_gva(&self, cpu_id: u8, gva: u64) -> Result<u64> { 1504 let (gpa, _) = self.vcpus[usize::from(cpu_id)] 1505 .lock() 1506 .unwrap() 1507 .vcpu 1508 .translate_gva(gva, /* flags: unused */ 0) 1509 .map_err(Error::TranslateVirtualAddress)?; 1510 Ok(gpa) 1511 } 1512 1513 pub fn vcpus_paused(&self) -> bool { 1514 self.vcpus_pause_signalled.load(Ordering::SeqCst) 1515 } 1516 } 1517 1518 struct Cpu { 1519 cpu_id: u8, 1520 proximity_domain: u32, 1521 dynamic: bool, 1522 } 1523 1524 #[cfg(target_arch = "x86_64")] 1525 const MADT_CPU_ENABLE_FLAG: usize = 0; 1526 1527 #[cfg(target_arch = "x86_64")] 1528 const MADT_CPU_ONLINE_CAPABLE_FLAG: usize = 1; 1529 1530 impl Cpu { 1531 #[cfg(target_arch = "x86_64")] 1532 fn generate_mat(&self) -> Vec<u8> { 1533 let lapic = LocalApic { 1534 r#type: 0, 1535 length: 8, 1536 processor_id: self.cpu_id, 1537 apic_id: self.cpu_id, 1538 flags: 1 << MADT_CPU_ENABLE_FLAG, 1539 }; 1540 1541 let mut mat_data: Vec<u8> = Vec::new(); 1542 mat_data.resize(std::mem::size_of_val(&lapic), 0); 1543 unsafe { *(mat_data.as_mut_ptr() as *mut LocalApic) = lapic }; 1544 1545 mat_data 1546 } 1547 } 1548 1549 impl Aml for Cpu { 1550 fn append_aml_bytes(&self, bytes: &mut Vec<u8>) { 1551 #[cfg(target_arch = "x86_64")] 1552 let mat_data: Vec<u8> = self.generate_mat(); 1553 #[allow(clippy::if_same_then_else)] 1554 if self.dynamic { 1555 aml::Device::new( 1556 format!("C{:03}", self.cpu_id).as_str().into(), 1557 vec![ 1558 &aml::Name::new("_HID".into(), &"ACPI0007"), 1559 &aml::Name::new("_UID".into(), &self.cpu_id), 1560 // Currently, AArch64 cannot support following fields. 1561 /* 1562 _STA return value: 1563 Bit [0] – Set if the device is present. 1564 Bit [1] – Set if the device is enabled and decoding its resources. 1565 Bit [2] – Set if the device should be shown in the UI. 1566 Bit [3] – Set if the device is functioning properly (cleared if device failed its diagnostics). 1567 Bit [4] – Set if the battery is present. 1568 Bits [31:5] – Reserved (must be cleared). 1569 */ 1570 #[cfg(target_arch = "x86_64")] 1571 &aml::Method::new( 1572 "_STA".into(), 1573 0, 1574 false, 1575 // Call into CSTA method which will interrogate device 1576 vec![&aml::Return::new(&aml::MethodCall::new( 1577 "CSTA".into(), 1578 vec![&self.cpu_id], 1579 ))], 1580 ), 1581 &aml::Method::new( 1582 "_PXM".into(), 1583 0, 1584 false, 1585 vec![&aml::Return::new(&self.proximity_domain)], 1586 ), 1587 // The Linux kernel expects every CPU device to have a _MAT entry 1588 // containing the LAPIC for this processor with the enabled bit set 1589 // even it if is disabled in the MADT (non-boot CPU) 1590 #[cfg(target_arch = "x86_64")] 1591 &aml::Name::new("_MAT".into(), &aml::Buffer::new(mat_data)), 1592 // Trigger CPU ejection 1593 #[cfg(target_arch = "x86_64")] 1594 &aml::Method::new( 1595 "_EJ0".into(), 1596 1, 1597 false, 1598 // Call into CEJ0 method which will actually eject device 1599 vec![&aml::MethodCall::new("CEJ0".into(), vec![&self.cpu_id])], 1600 ), 1601 ], 1602 ) 1603 .append_aml_bytes(bytes); 1604 } else { 1605 aml::Device::new( 1606 format!("C{:03}", self.cpu_id).as_str().into(), 1607 vec![ 1608 &aml::Name::new("_HID".into(), &"ACPI0007"), 1609 &aml::Name::new("_UID".into(), &self.cpu_id), 1610 #[cfg(target_arch = "x86_64")] 1611 &aml::Method::new( 1612 "_STA".into(), 1613 0, 1614 false, 1615 // Mark CPU present see CSTA implementation 1616 vec![&aml::Return::new(&0xfu8)], 1617 ), 1618 &aml::Method::new( 1619 "_PXM".into(), 1620 0, 1621 false, 1622 vec![&aml::Return::new(&self.proximity_domain)], 1623 ), 1624 // The Linux kernel expects every CPU device to have a _MAT entry 1625 // containing the LAPIC for this processor with the enabled bit set 1626 // even it if is disabled in the MADT (non-boot CPU) 1627 #[cfg(target_arch = "x86_64")] 1628 &aml::Name::new("_MAT".into(), &aml::Buffer::new(mat_data)), 1629 ], 1630 ) 1631 .append_aml_bytes(bytes); 1632 } 1633 } 1634 } 1635 1636 struct CpuNotify { 1637 cpu_id: u8, 1638 } 1639 1640 impl Aml for CpuNotify { 1641 fn append_aml_bytes(&self, bytes: &mut Vec<u8>) { 1642 let object = aml::Path::new(&format!("C{:03}", self.cpu_id)); 1643 aml::If::new( 1644 &aml::Equal::new(&aml::Arg(0), &self.cpu_id), 1645 vec![&aml::Notify::new(&object, &aml::Arg(1))], 1646 ) 1647 .append_aml_bytes(bytes) 1648 } 1649 } 1650 1651 struct CpuMethods { 1652 max_vcpus: u8, 1653 dynamic: bool, 1654 } 1655 1656 impl Aml for CpuMethods { 1657 fn append_aml_bytes(&self, bytes: &mut Vec<u8>) { 1658 if self.dynamic { 1659 // CPU status method 1660 aml::Method::new( 1661 "CSTA".into(), 1662 1, 1663 true, 1664 vec![ 1665 // Take lock defined above 1666 &aml::Acquire::new("\\_SB_.PRES.CPLK".into(), 0xffff), 1667 // Write CPU number (in first argument) to I/O port via field 1668 &aml::Store::new(&aml::Path::new("\\_SB_.PRES.CSEL"), &aml::Arg(0)), 1669 &aml::Store::new(&aml::Local(0), &aml::ZERO), 1670 // Check if CPEN bit is set, if so make the local variable 0xf (see _STA for details of meaning) 1671 &aml::If::new( 1672 &aml::Equal::new(&aml::Path::new("\\_SB_.PRES.CPEN"), &aml::ONE), 1673 vec![&aml::Store::new(&aml::Local(0), &0xfu8)], 1674 ), 1675 // Release lock 1676 &aml::Release::new("\\_SB_.PRES.CPLK".into()), 1677 // Return 0 or 0xf 1678 &aml::Return::new(&aml::Local(0)), 1679 ], 1680 ) 1681 .append_aml_bytes(bytes); 1682 1683 let mut cpu_notifies = Vec::new(); 1684 for cpu_id in 0..self.max_vcpus { 1685 cpu_notifies.push(CpuNotify { cpu_id }); 1686 } 1687 1688 let mut cpu_notifies_refs: Vec<&dyn aml::Aml> = Vec::new(); 1689 for cpu_id in 0..self.max_vcpus { 1690 cpu_notifies_refs.push(&cpu_notifies[usize::from(cpu_id)]); 1691 } 1692 1693 aml::Method::new("CTFY".into(), 2, true, cpu_notifies_refs).append_aml_bytes(bytes); 1694 1695 aml::Method::new( 1696 "CEJ0".into(), 1697 1, 1698 true, 1699 vec![ 1700 &aml::Acquire::new("\\_SB_.PRES.CPLK".into(), 0xffff), 1701 // Write CPU number (in first argument) to I/O port via field 1702 &aml::Store::new(&aml::Path::new("\\_SB_.PRES.CSEL"), &aml::Arg(0)), 1703 // Set CEJ0 bit 1704 &aml::Store::new(&aml::Path::new("\\_SB_.PRES.CEJ0"), &aml::ONE), 1705 &aml::Release::new("\\_SB_.PRES.CPLK".into()), 1706 ], 1707 ) 1708 .append_aml_bytes(bytes); 1709 1710 aml::Method::new( 1711 "CSCN".into(), 1712 0, 1713 true, 1714 vec![ 1715 // Take lock defined above 1716 &aml::Acquire::new("\\_SB_.PRES.CPLK".into(), 0xffff), 1717 &aml::Store::new(&aml::Local(0), &aml::ZERO), 1718 &aml::While::new( 1719 &aml::LessThan::new(&aml::Local(0), &self.max_vcpus), 1720 vec![ 1721 // Write CPU number (in first argument) to I/O port via field 1722 &aml::Store::new(&aml::Path::new("\\_SB_.PRES.CSEL"), &aml::Local(0)), 1723 // Check if CINS bit is set 1724 &aml::If::new( 1725 &aml::Equal::new(&aml::Path::new("\\_SB_.PRES.CINS"), &aml::ONE), 1726 // Notify device if it is 1727 vec![ 1728 &aml::MethodCall::new( 1729 "CTFY".into(), 1730 vec![&aml::Local(0), &aml::ONE], 1731 ), 1732 // Reset CINS bit 1733 &aml::Store::new( 1734 &aml::Path::new("\\_SB_.PRES.CINS"), 1735 &aml::ONE, 1736 ), 1737 ], 1738 ), 1739 // Check if CRMV bit is set 1740 &aml::If::new( 1741 &aml::Equal::new(&aml::Path::new("\\_SB_.PRES.CRMV"), &aml::ONE), 1742 // Notify device if it is (with the eject constant 0x3) 1743 vec![ 1744 &aml::MethodCall::new( 1745 "CTFY".into(), 1746 vec![&aml::Local(0), &3u8], 1747 ), 1748 // Reset CRMV bit 1749 &aml::Store::new( 1750 &aml::Path::new("\\_SB_.PRES.CRMV"), 1751 &aml::ONE, 1752 ), 1753 ], 1754 ), 1755 &aml::Add::new(&aml::Local(0), &aml::Local(0), &aml::ONE), 1756 ], 1757 ), 1758 // Release lock 1759 &aml::Release::new("\\_SB_.PRES.CPLK".into()), 1760 ], 1761 ) 1762 .append_aml_bytes(bytes) 1763 } else { 1764 aml::Method::new("CSCN".into(), 0, true, vec![]).append_aml_bytes(bytes) 1765 } 1766 } 1767 } 1768 1769 impl Aml for CpuManager { 1770 fn append_aml_bytes(&self, bytes: &mut Vec<u8>) { 1771 #[cfg(target_arch = "x86_64")] 1772 if let Some(acpi_address) = self.acpi_address { 1773 // CPU hotplug controller 1774 aml::Device::new( 1775 "_SB_.PRES".into(), 1776 vec![ 1777 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0A06")), 1778 &aml::Name::new("_UID".into(), &"CPU Hotplug Controller"), 1779 // Mutex to protect concurrent access as we write to choose CPU and then read back status 1780 &aml::Mutex::new("CPLK".into(), 0), 1781 &aml::Name::new( 1782 "_CRS".into(), 1783 &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory( 1784 aml::AddressSpaceCachable::NotCacheable, 1785 true, 1786 acpi_address.0 as u64, 1787 acpi_address.0 + CPU_MANAGER_ACPI_SIZE as u64 - 1, 1788 )]), 1789 ), 1790 // OpRegion and Fields map MMIO range into individual field values 1791 &aml::OpRegion::new( 1792 "PRST".into(), 1793 aml::OpRegionSpace::SystemMemory, 1794 acpi_address.0 as usize, 1795 CPU_MANAGER_ACPI_SIZE, 1796 ), 1797 &aml::Field::new( 1798 "PRST".into(), 1799 aml::FieldAccessType::Byte, 1800 aml::FieldUpdateRule::WriteAsZeroes, 1801 vec![ 1802 aml::FieldEntry::Reserved(32), 1803 aml::FieldEntry::Named(*b"CPEN", 1), 1804 aml::FieldEntry::Named(*b"CINS", 1), 1805 aml::FieldEntry::Named(*b"CRMV", 1), 1806 aml::FieldEntry::Named(*b"CEJ0", 1), 1807 aml::FieldEntry::Reserved(4), 1808 aml::FieldEntry::Named(*b"CCMD", 8), 1809 ], 1810 ), 1811 &aml::Field::new( 1812 "PRST".into(), 1813 aml::FieldAccessType::DWord, 1814 aml::FieldUpdateRule::Preserve, 1815 vec![ 1816 aml::FieldEntry::Named(*b"CSEL", 32), 1817 aml::FieldEntry::Reserved(32), 1818 aml::FieldEntry::Named(*b"CDAT", 32), 1819 ], 1820 ), 1821 ], 1822 ) 1823 .append_aml_bytes(bytes); 1824 } 1825 1826 // CPU devices 1827 let hid = aml::Name::new("_HID".into(), &"ACPI0010"); 1828 let uid = aml::Name::new("_CID".into(), &aml::EisaName::new("PNP0A05")); 1829 // Bundle methods together under a common object 1830 let methods = CpuMethods { 1831 max_vcpus: self.config.max_vcpus, 1832 dynamic: self.dynamic, 1833 }; 1834 let mut cpu_data_inner: Vec<&dyn aml::Aml> = vec![&hid, &uid, &methods]; 1835 1836 let mut cpu_devices = Vec::new(); 1837 for cpu_id in 0..self.config.max_vcpus { 1838 let proximity_domain = *self.proximity_domain_per_cpu.get(&cpu_id).unwrap_or(&0); 1839 let cpu_device = Cpu { 1840 cpu_id, 1841 proximity_domain, 1842 dynamic: self.dynamic, 1843 }; 1844 1845 cpu_devices.push(cpu_device); 1846 } 1847 1848 for cpu_device in cpu_devices.iter() { 1849 cpu_data_inner.push(cpu_device); 1850 } 1851 1852 aml::Device::new("_SB_.CPUS".into(), cpu_data_inner).append_aml_bytes(bytes) 1853 } 1854 } 1855 1856 impl Pausable for CpuManager { 1857 fn pause(&mut self) -> std::result::Result<(), MigratableError> { 1858 // Tell the vCPUs to pause themselves next time they exit 1859 self.vcpus_pause_signalled.store(true, Ordering::SeqCst); 1860 1861 // Signal to the spawned threads (vCPUs and console signal handler). For the vCPU threads 1862 // this will interrupt the KVM_RUN ioctl() allowing the loop to check the boolean set 1863 // above. 1864 for state in self.vcpu_states.iter() { 1865 state.signal_thread(); 1866 } 1867 1868 for vcpu in self.vcpus.iter() { 1869 let mut vcpu = vcpu.lock().unwrap(); 1870 vcpu.pause()?; 1871 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 1872 if !self.config.kvm_hyperv { 1873 vcpu.vcpu.notify_guest_clock_paused().map_err(|e| { 1874 MigratableError::Pause(anyhow!( 1875 "Could not notify guest it has been paused {:?}", 1876 e 1877 )) 1878 })?; 1879 } 1880 } 1881 1882 Ok(()) 1883 } 1884 1885 fn resume(&mut self) -> std::result::Result<(), MigratableError> { 1886 for vcpu in self.vcpus.iter() { 1887 vcpu.lock().unwrap().resume()?; 1888 } 1889 1890 // Toggle the vCPUs pause boolean 1891 self.vcpus_pause_signalled.store(false, Ordering::SeqCst); 1892 1893 // Unpark all the VCPU threads. 1894 // Once unparked, the next thing they will do is checking for the pause 1895 // boolean. Since it'll be set to false, they will exit their pause loop 1896 // and go back to vmx root. 1897 for state in self.vcpu_states.iter() { 1898 state.unpark_thread(); 1899 } 1900 Ok(()) 1901 } 1902 } 1903 1904 impl Snapshottable for CpuManager { 1905 fn id(&self) -> String { 1906 CPU_MANAGER_SNAPSHOT_ID.to_string() 1907 } 1908 1909 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 1910 let mut cpu_manager_snapshot = Snapshot::new(CPU_MANAGER_SNAPSHOT_ID); 1911 1912 // The CpuManager snapshot is a collection of all vCPUs snapshots. 1913 for vcpu in &self.vcpus { 1914 let cpu_snapshot = vcpu.lock().unwrap().snapshot()?; 1915 cpu_manager_snapshot.add_snapshot(cpu_snapshot); 1916 } 1917 1918 Ok(cpu_manager_snapshot) 1919 } 1920 1921 fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> { 1922 for (cpu_id, snapshot) in snapshot.snapshots.iter() { 1923 info!("Restoring VCPU {}", cpu_id); 1924 self.create_vcpu(cpu_id.parse::<u8>().unwrap(), None, Some(*snapshot.clone())) 1925 .map_err(|e| MigratableError::Restore(anyhow!("Could not create vCPU {:?}", e)))?; 1926 } 1927 1928 Ok(()) 1929 } 1930 } 1931 1932 impl Transportable for CpuManager {} 1933 impl Migratable for CpuManager {} 1934 1935 #[cfg(feature = "gdb")] 1936 impl Debuggable for CpuManager { 1937 #[cfg(feature = "kvm")] 1938 fn set_guest_debug( 1939 &self, 1940 cpu_id: usize, 1941 addrs: &[GuestAddress], 1942 singlestep: bool, 1943 ) -> std::result::Result<(), DebuggableError> { 1944 self.vcpus[cpu_id] 1945 .lock() 1946 .unwrap() 1947 .vcpu 1948 .set_guest_debug(addrs, singlestep) 1949 .map_err(DebuggableError::SetDebug) 1950 } 1951 1952 fn debug_pause(&mut self) -> std::result::Result<(), DebuggableError> { 1953 Ok(()) 1954 } 1955 1956 fn debug_resume(&mut self) -> std::result::Result<(), DebuggableError> { 1957 Ok(()) 1958 } 1959 1960 #[cfg(target_arch = "x86_64")] 1961 fn read_regs(&self, cpu_id: usize) -> std::result::Result<X86_64CoreRegs, DebuggableError> { 1962 // General registers: RAX, RBX, RCX, RDX, RSI, RDI, RBP, RSP, r8-r15 1963 let gregs = self 1964 .get_regs(cpu_id as u8) 1965 .map_err(DebuggableError::ReadRegs)?; 1966 let regs = [ 1967 gregs.rax, gregs.rbx, gregs.rcx, gregs.rdx, gregs.rsi, gregs.rdi, gregs.rbp, gregs.rsp, 1968 gregs.r8, gregs.r9, gregs.r10, gregs.r11, gregs.r12, gregs.r13, gregs.r14, gregs.r15, 1969 ]; 1970 1971 // GDB exposes 32-bit eflags instead of 64-bit rflags. 1972 // https://github.com/bminor/binutils-gdb/blob/master/gdb/features/i386/64bit-core.xml 1973 let eflags = gregs.rflags as u32; 1974 let rip = gregs.rip; 1975 1976 // Segment registers: CS, SS, DS, ES, FS, GS 1977 let sregs = self 1978 .get_sregs(cpu_id as u8) 1979 .map_err(DebuggableError::ReadRegs)?; 1980 let segments = X86SegmentRegs { 1981 cs: sregs.cs.selector as u32, 1982 ss: sregs.ss.selector as u32, 1983 ds: sregs.ds.selector as u32, 1984 es: sregs.es.selector as u32, 1985 fs: sregs.fs.selector as u32, 1986 gs: sregs.gs.selector as u32, 1987 }; 1988 1989 // TODO: Add other registers 1990 1991 Ok(X86_64CoreRegs { 1992 regs, 1993 eflags, 1994 rip, 1995 segments, 1996 ..Default::default() 1997 }) 1998 } 1999 2000 #[cfg(target_arch = "x86_64")] 2001 fn write_regs( 2002 &self, 2003 cpu_id: usize, 2004 regs: &X86_64CoreRegs, 2005 ) -> std::result::Result<(), DebuggableError> { 2006 let orig_gregs = self 2007 .get_regs(cpu_id as u8) 2008 .map_err(DebuggableError::ReadRegs)?; 2009 let gregs = StandardRegisters { 2010 rax: regs.regs[0], 2011 rbx: regs.regs[1], 2012 rcx: regs.regs[2], 2013 rdx: regs.regs[3], 2014 rsi: regs.regs[4], 2015 rdi: regs.regs[5], 2016 rbp: regs.regs[6], 2017 rsp: regs.regs[7], 2018 r8: regs.regs[8], 2019 r9: regs.regs[9], 2020 r10: regs.regs[10], 2021 r11: regs.regs[11], 2022 r12: regs.regs[12], 2023 r13: regs.regs[13], 2024 r14: regs.regs[14], 2025 r15: regs.regs[15], 2026 rip: regs.rip, 2027 // Update the lower 32-bit of rflags. 2028 rflags: (orig_gregs.rflags & !(u32::MAX as u64)) | (regs.eflags as u64), 2029 }; 2030 2031 self.set_regs(cpu_id as u8, &gregs) 2032 .map_err(DebuggableError::WriteRegs)?; 2033 2034 // Segment registers: CS, SS, DS, ES, FS, GS 2035 // Since GDB care only selectors, we call get_sregs() first. 2036 let mut sregs = self 2037 .get_sregs(cpu_id as u8) 2038 .map_err(DebuggableError::ReadRegs)?; 2039 sregs.cs.selector = regs.segments.cs as u16; 2040 sregs.ss.selector = regs.segments.ss as u16; 2041 sregs.ds.selector = regs.segments.ds as u16; 2042 sregs.es.selector = regs.segments.es as u16; 2043 sregs.fs.selector = regs.segments.fs as u16; 2044 sregs.gs.selector = regs.segments.gs as u16; 2045 2046 self.set_sregs(cpu_id as u8, &sregs) 2047 .map_err(DebuggableError::WriteRegs)?; 2048 2049 // TODO: Add other registers 2050 2051 Ok(()) 2052 } 2053 2054 #[cfg(target_arch = "x86_64")] 2055 fn read_mem( 2056 &self, 2057 cpu_id: usize, 2058 vaddr: GuestAddress, 2059 len: usize, 2060 ) -> std::result::Result<Vec<u8>, DebuggableError> { 2061 let mut buf = vec![0; len]; 2062 let mut total_read = 0_u64; 2063 2064 while total_read < len as u64 { 2065 let gaddr = vaddr.0 + total_read; 2066 let paddr = match self.translate_gva(cpu_id as u8, gaddr) { 2067 Ok(paddr) => paddr, 2068 Err(_) if gaddr == u64::MIN => gaddr, // Silently return GVA as GPA if GVA == 0. 2069 Err(e) => return Err(DebuggableError::TranslateGva(e)), 2070 }; 2071 let psize = arch::PAGE_SIZE as u64; 2072 let read_len = std::cmp::min(len as u64 - total_read, psize - (paddr & (psize - 1))); 2073 self.vm_memory 2074 .memory() 2075 .read( 2076 &mut buf[total_read as usize..total_read as usize + read_len as usize], 2077 GuestAddress(paddr), 2078 ) 2079 .map_err(DebuggableError::ReadMem)?; 2080 total_read += read_len; 2081 } 2082 Ok(buf) 2083 } 2084 2085 #[cfg(target_arch = "x86_64")] 2086 fn write_mem( 2087 &self, 2088 cpu_id: usize, 2089 vaddr: &GuestAddress, 2090 data: &[u8], 2091 ) -> std::result::Result<(), DebuggableError> { 2092 let mut total_written = 0_u64; 2093 2094 while total_written < data.len() as u64 { 2095 let gaddr = vaddr.0 + total_written; 2096 let paddr = match self.translate_gva(cpu_id as u8, gaddr) { 2097 Ok(paddr) => paddr, 2098 Err(_) if gaddr == u64::MIN => gaddr, // Silently return GVA as GPA if GVA == 0. 2099 Err(e) => return Err(DebuggableError::TranslateGva(e)), 2100 }; 2101 let psize = arch::PAGE_SIZE as u64; 2102 let write_len = std::cmp::min( 2103 data.len() as u64 - total_written, 2104 psize - (paddr & (psize - 1)), 2105 ); 2106 self.vm_memory 2107 .memory() 2108 .write( 2109 &data[total_written as usize..total_written as usize + write_len as usize], 2110 GuestAddress(paddr), 2111 ) 2112 .map_err(DebuggableError::WriteMem)?; 2113 total_written += write_len; 2114 } 2115 Ok(()) 2116 } 2117 2118 fn active_vcpus(&self) -> usize { 2119 self.present_vcpus() as usize 2120 } 2121 } 2122 2123 #[cfg(feature = "guest_debug")] 2124 impl Elf64Writable for CpuManager {} 2125 2126 #[cfg(feature = "guest_debug")] 2127 impl CpuElf64Writable for CpuManager { 2128 fn cpu_write_elf64_note( 2129 &mut self, 2130 dump_state: &DumpState, 2131 ) -> std::result::Result<(), GuestDebuggableError> { 2132 let mut coredump_file = dump_state.file.as_ref().unwrap(); 2133 for vcpu in &self.vcpus { 2134 let note_size = self.get_note_size(NoteDescType::Elf, 1); 2135 let mut pos: usize = 0; 2136 let mut buf = vec![0; note_size as usize]; 2137 let descsz = size_of::<X86_64ElfPrStatus>(); 2138 let vcpu_id = vcpu.lock().unwrap().id; 2139 2140 let note = Elf64_Nhdr { 2141 n_namesz: COREDUMP_NAME_SIZE, 2142 n_descsz: descsz as u32, 2143 n_type: NT_PRSTATUS, 2144 }; 2145 2146 let bytes: &[u8] = note.as_slice(); 2147 buf.splice(0.., bytes.to_vec()); 2148 pos += round_up!(size_of::<Elf64_Nhdr>(), 4); 2149 buf.resize(pos + 4, 0); 2150 buf.splice(pos.., "CORE".to_string().into_bytes()); 2151 2152 pos += round_up!(COREDUMP_NAME_SIZE as usize, 4); 2153 buf.resize(pos + 32 + 4, 0); 2154 let pid = vcpu_id as u64; 2155 let bytes: &[u8] = pid.as_slice(); 2156 buf.splice(pos + 32.., bytes.to_vec()); /* pr_pid */ 2157 2158 pos += descsz - size_of::<X86_64UserRegs>() - size_of::<u64>(); 2159 2160 let orig_rax: u64 = 0; 2161 let gregs = self.vcpus[usize::from(vcpu_id)] 2162 .lock() 2163 .unwrap() 2164 .vcpu 2165 .get_regs() 2166 .map_err(|_e| GuestDebuggableError::Coredump(anyhow!("get regs failed")))?; 2167 2168 let regs1 = [ 2169 gregs.r15, gregs.r14, gregs.r13, gregs.r12, gregs.rbp, gregs.rbx, gregs.r11, 2170 gregs.r10, 2171 ]; 2172 let regs2 = [ 2173 gregs.r9, gregs.r8, gregs.rax, gregs.rcx, gregs.rdx, gregs.rsi, gregs.rdi, orig_rax, 2174 ]; 2175 2176 let sregs = self.vcpus[usize::from(vcpu_id)] 2177 .lock() 2178 .unwrap() 2179 .vcpu 2180 .get_sregs() 2181 .map_err(|_e| GuestDebuggableError::Coredump(anyhow!("get sregs failed")))?; 2182 2183 debug!( 2184 "rip 0x{:x} rsp 0x{:x} gs 0x{:x} cs 0x{:x} ss 0x{:x} ds 0x{:x}", 2185 gregs.rip, 2186 gregs.rsp, 2187 sregs.gs.base, 2188 sregs.cs.selector, 2189 sregs.ss.selector, 2190 sregs.ds.selector, 2191 ); 2192 2193 let regs = X86_64UserRegs { 2194 regs1, 2195 regs2, 2196 rip: gregs.rip, 2197 cs: sregs.cs.selector as u64, 2198 eflags: gregs.rflags, 2199 rsp: gregs.rsp, 2200 ss: sregs.ss.selector as u64, 2201 fs_base: sregs.fs.base as u64, 2202 gs_base: sregs.gs.base as u64, 2203 ds: sregs.ds.selector as u64, 2204 es: sregs.es.selector as u64, 2205 fs: sregs.fs.selector as u64, 2206 gs: sregs.gs.selector as u64, 2207 }; 2208 2209 // let bytes: &[u8] = unsafe { any_as_u8_slice(®s) }; 2210 let bytes: &[u8] = regs.as_slice(); 2211 buf.resize(note_size as usize, 0); 2212 buf.splice(pos.., bytes.to_vec()); 2213 buf.resize(note_size as usize, 0); 2214 2215 coredump_file 2216 .write(&buf) 2217 .map_err(GuestDebuggableError::CoredumpFile)?; 2218 } 2219 2220 Ok(()) 2221 } 2222 2223 fn cpu_write_vmm_note( 2224 &mut self, 2225 dump_state: &DumpState, 2226 ) -> std::result::Result<(), GuestDebuggableError> { 2227 let mut coredump_file = dump_state.file.as_ref().unwrap(); 2228 for vcpu in &self.vcpus { 2229 let note_size = self.get_note_size(NoteDescType::Vmm, 1); 2230 let mut pos: usize = 0; 2231 let mut buf = vec![0; note_size as usize]; 2232 let descsz = size_of::<DumpCpusState>(); 2233 let vcpu_id = vcpu.lock().unwrap().id; 2234 2235 let note = Elf64_Nhdr { 2236 n_namesz: COREDUMP_NAME_SIZE, 2237 n_descsz: descsz as u32, 2238 n_type: 0, 2239 }; 2240 2241 let bytes: &[u8] = note.as_slice(); 2242 buf.splice(0.., bytes.to_vec()); 2243 pos += round_up!(size_of::<Elf64_Nhdr>(), 4); 2244 2245 buf.resize(pos + 4, 0); 2246 buf.splice(pos.., "QEMU".to_string().into_bytes()); 2247 2248 pos += round_up!(COREDUMP_NAME_SIZE as usize, 4); 2249 2250 let gregs = self.vcpus[usize::from(vcpu_id)] 2251 .lock() 2252 .unwrap() 2253 .vcpu 2254 .get_regs() 2255 .map_err(|_e| GuestDebuggableError::Coredump(anyhow!("get regs failed")))?; 2256 2257 let regs1 = [ 2258 gregs.rax, gregs.rbx, gregs.rcx, gregs.rdx, gregs.rsi, gregs.rdi, gregs.rsp, 2259 gregs.rbp, 2260 ]; 2261 2262 let regs2 = [ 2263 gregs.r8, gregs.r9, gregs.r10, gregs.r11, gregs.r12, gregs.r13, gregs.r14, 2264 gregs.r15, 2265 ]; 2266 2267 let sregs = self.vcpus[usize::from(vcpu_id)] 2268 .lock() 2269 .unwrap() 2270 .vcpu 2271 .get_sregs() 2272 .map_err(|_e| GuestDebuggableError::Coredump(anyhow!("get sregs failed")))?; 2273 2274 let mut msrs = vec![MsrEntry { 2275 index: msr_index::MSR_KERNEL_GS_BASE, 2276 ..Default::default() 2277 }]; 2278 2279 self.vcpus[vcpu_id as usize] 2280 .lock() 2281 .unwrap() 2282 .vcpu 2283 .get_msrs(&mut msrs) 2284 .map_err(|_e| GuestDebuggableError::Coredump(anyhow!("get msr failed")))?; 2285 let kernel_gs_base = msrs[0].data; 2286 2287 let cs = CpuSegment::new(sregs.cs); 2288 let ds = CpuSegment::new(sregs.ds); 2289 let es = CpuSegment::new(sregs.es); 2290 let fs = CpuSegment::new(sregs.fs); 2291 let gs = CpuSegment::new(sregs.gs); 2292 let ss = CpuSegment::new(sregs.ss); 2293 let ldt = CpuSegment::new(sregs.ldt); 2294 let tr = CpuSegment::new(sregs.tr); 2295 let gdt = CpuSegment::new_from_table(sregs.gdt); 2296 let idt = CpuSegment::new_from_table(sregs.idt); 2297 let cr = [sregs.cr0, sregs.cr8, sregs.cr2, sregs.cr3, sregs.cr4]; 2298 let regs = DumpCpusState { 2299 version: 1, 2300 size: size_of::<DumpCpusState>() as u32, 2301 regs1, 2302 regs2, 2303 rip: gregs.rip, 2304 rflags: gregs.rflags, 2305 cs, 2306 ds, 2307 es, 2308 fs, 2309 gs, 2310 ss, 2311 ldt, 2312 tr, 2313 gdt, 2314 idt, 2315 cr, 2316 kernel_gs_base, 2317 }; 2318 2319 let bytes: &[u8] = regs.as_slice(); 2320 buf.resize(note_size as usize, 0); 2321 buf.splice(pos.., bytes.to_vec()); 2322 buf.resize(note_size as usize, 0); 2323 2324 coredump_file 2325 .write(&buf) 2326 .map_err(GuestDebuggableError::CoredumpFile)?; 2327 } 2328 2329 Ok(()) 2330 } 2331 } 2332 2333 #[cfg(all(feature = "kvm", target_arch = "x86_64"))] 2334 #[cfg(test)] 2335 mod tests { 2336 use arch::x86_64::interrupts::*; 2337 use arch::x86_64::regs::*; 2338 use hypervisor::arch::x86::{FpuState, LapicState, StandardRegisters}; 2339 2340 #[test] 2341 fn test_setlint() { 2342 let hv = hypervisor::new().unwrap(); 2343 let vm = hv.create_vm().expect("new VM fd creation failed"); 2344 assert!(hv.check_required_extensions().is_ok()); 2345 // Calling get_lapic will fail if there is no irqchip before hand. 2346 assert!(vm.create_irq_chip().is_ok()); 2347 let vcpu = vm.create_vcpu(0, None).unwrap(); 2348 let klapic_before: LapicState = vcpu.get_lapic().unwrap(); 2349 2350 // Compute the value that is expected to represent LVT0 and LVT1. 2351 let lint0 = klapic_before.get_klapic_reg(APIC_LVT0); 2352 let lint1 = klapic_before.get_klapic_reg(APIC_LVT1); 2353 let lint0_mode_expected = set_apic_delivery_mode(lint0, APIC_MODE_EXTINT); 2354 let lint1_mode_expected = set_apic_delivery_mode(lint1, APIC_MODE_NMI); 2355 2356 set_lint(&vcpu).unwrap(); 2357 2358 // Compute the value that represents LVT0 and LVT1 after set_lint. 2359 let klapic_actual: LapicState = vcpu.get_lapic().unwrap(); 2360 let lint0_mode_actual = klapic_actual.get_klapic_reg(APIC_LVT0); 2361 let lint1_mode_actual = klapic_actual.get_klapic_reg(APIC_LVT1); 2362 assert_eq!(lint0_mode_expected, lint0_mode_actual); 2363 assert_eq!(lint1_mode_expected, lint1_mode_actual); 2364 } 2365 2366 #[test] 2367 fn test_setup_fpu() { 2368 let hv = hypervisor::new().unwrap(); 2369 let vm = hv.create_vm().expect("new VM fd creation failed"); 2370 let vcpu = vm.create_vcpu(0, None).unwrap(); 2371 setup_fpu(&vcpu).unwrap(); 2372 2373 let expected_fpu: FpuState = FpuState { 2374 fcw: 0x37f, 2375 mxcsr: 0x1f80, 2376 ..Default::default() 2377 }; 2378 let actual_fpu: FpuState = vcpu.get_fpu().unwrap(); 2379 // TODO: auto-generate kvm related structures with PartialEq on. 2380 assert_eq!(expected_fpu.fcw, actual_fpu.fcw); 2381 // Setting the mxcsr register from FpuState inside setup_fpu does not influence anything. 2382 // See 'kvm_arch_vcpu_ioctl_set_fpu' from arch/x86/kvm/x86.c. 2383 // The mxcsr will stay 0 and the assert below fails. Decide whether or not we should 2384 // remove it at all. 2385 // assert!(expected_fpu.mxcsr == actual_fpu.mxcsr); 2386 } 2387 2388 #[test] 2389 fn test_setup_msrs() { 2390 use hypervisor::arch::x86::{msr_index, MsrEntry}; 2391 2392 let hv = hypervisor::new().unwrap(); 2393 let vm = hv.create_vm().expect("new VM fd creation failed"); 2394 let vcpu = vm.create_vcpu(0, None).unwrap(); 2395 setup_msrs(&vcpu).unwrap(); 2396 2397 // This test will check against the last MSR entry configured (the tenth one). 2398 // See create_msr_entries for details. 2399 let mut msrs = vec![MsrEntry { 2400 index: msr_index::MSR_IA32_MISC_ENABLE, 2401 ..Default::default() 2402 }]; 2403 2404 // get_msrs returns the number of msrs that it succeed in reading. We only want to read 1 2405 // in this test case scenario. 2406 let read_msrs = vcpu.get_msrs(&mut msrs).unwrap(); 2407 assert_eq!(read_msrs, 1); 2408 2409 // Official entries that were setup when we did setup_msrs. We need to assert that the 2410 // tenth one (i.e the one with index msr_index::MSR_IA32_MISC_ENABLE has the data we 2411 // expect. 2412 let entry_vec = vcpu.boot_msr_entries(); 2413 assert_eq!(entry_vec.as_slice()[9], msrs.as_slice()[0]); 2414 } 2415 2416 #[test] 2417 fn test_setup_regs() { 2418 let hv = hypervisor::new().unwrap(); 2419 let vm = hv.create_vm().expect("new VM fd creation failed"); 2420 let vcpu = vm.create_vcpu(0, None).unwrap(); 2421 2422 let expected_regs: StandardRegisters = StandardRegisters { 2423 rflags: 0x0000000000000002u64, 2424 rbx: arch::layout::PVH_INFO_START.0, 2425 rip: 1, 2426 ..Default::default() 2427 }; 2428 2429 setup_regs(&vcpu, expected_regs.rip).unwrap(); 2430 2431 let actual_regs: StandardRegisters = vcpu.get_regs().unwrap(); 2432 assert_eq!(actual_regs, expected_regs); 2433 } 2434 } 2435 2436 #[cfg(target_arch = "aarch64")] 2437 #[cfg(test)] 2438 mod tests { 2439 use arch::layout; 2440 use hypervisor::kvm::aarch64::{is_system_register, MPIDR_EL1}; 2441 use hypervisor::kvm::kvm_bindings::{ 2442 kvm_one_reg, kvm_regs, kvm_vcpu_init, user_pt_regs, KVM_REG_ARM64, KVM_REG_ARM64_SYSREG, 2443 KVM_REG_ARM_CORE, KVM_REG_SIZE_U64, 2444 }; 2445 use hypervisor::{arm64_core_reg_id, offset__of}; 2446 use std::mem; 2447 2448 #[test] 2449 fn test_setup_regs() { 2450 let hv = hypervisor::new().unwrap(); 2451 let vm = hv.create_vm().unwrap(); 2452 let vcpu = vm.create_vcpu(0, None).unwrap(); 2453 2454 let res = vcpu.setup_regs(0, 0x0, layout::FDT_START.0); 2455 // Must fail when vcpu is not initialized yet. 2456 assert!(res.is_err()); 2457 2458 let mut kvi: kvm_vcpu_init = kvm_vcpu_init::default(); 2459 vm.get_preferred_target(&mut kvi).unwrap(); 2460 vcpu.vcpu_init(&kvi).unwrap(); 2461 2462 assert!(vcpu.setup_regs(0, 0x0, layout::FDT_START.0).is_ok()); 2463 } 2464 2465 #[test] 2466 fn test_read_mpidr() { 2467 let hv = hypervisor::new().unwrap(); 2468 let vm = hv.create_vm().unwrap(); 2469 let vcpu = vm.create_vcpu(0, None).unwrap(); 2470 let mut kvi: kvm_vcpu_init = kvm_vcpu_init::default(); 2471 vm.get_preferred_target(&mut kvi).unwrap(); 2472 2473 // Must fail when vcpu is not initialized yet. 2474 assert!(vcpu.read_mpidr().is_err()); 2475 2476 vcpu.vcpu_init(&kvi).unwrap(); 2477 assert_eq!(vcpu.read_mpidr().unwrap(), 0x80000000); 2478 } 2479 2480 #[test] 2481 fn test_is_system_register() { 2482 let offset = offset__of!(user_pt_regs, pc); 2483 let regid = arm64_core_reg_id!(KVM_REG_SIZE_U64, offset); 2484 assert!(!is_system_register(regid)); 2485 let regid = KVM_REG_ARM64 as u64 | KVM_REG_SIZE_U64 as u64 | KVM_REG_ARM64_SYSREG as u64; 2486 assert!(is_system_register(regid)); 2487 } 2488 2489 #[test] 2490 fn test_save_restore_core_regs() { 2491 let hv = hypervisor::new().unwrap(); 2492 let vm = hv.create_vm().unwrap(); 2493 let vcpu = vm.create_vcpu(0, None).unwrap(); 2494 let mut kvi: kvm_vcpu_init = kvm_vcpu_init::default(); 2495 vm.get_preferred_target(&mut kvi).unwrap(); 2496 2497 // Must fail when vcpu is not initialized yet. 2498 let res = vcpu.get_regs(); 2499 assert!(res.is_err()); 2500 assert_eq!( 2501 format!("{}", res.unwrap_err()), 2502 "Failed to get core register: Exec format error (os error 8)" 2503 ); 2504 2505 let mut state = kvm_regs::default(); 2506 let res = vcpu.set_regs(&state); 2507 assert!(res.is_err()); 2508 assert_eq!( 2509 format!("{}", res.unwrap_err()), 2510 "Failed to set core register: Exec format error (os error 8)" 2511 ); 2512 2513 vcpu.vcpu_init(&kvi).unwrap(); 2514 let res = vcpu.get_regs(); 2515 assert!(res.is_ok()); 2516 state = res.unwrap(); 2517 assert_eq!(state.regs.pstate, 0x3C5); 2518 2519 assert!(vcpu.set_regs(&state).is_ok()); 2520 let off = offset__of!(user_pt_regs, pstate); 2521 let pstate = vcpu 2522 .get_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off)) 2523 .expect("Failed to call kvm get one reg"); 2524 assert_eq!(state.regs.pstate, pstate); 2525 } 2526 2527 #[test] 2528 fn test_save_restore_system_regs() { 2529 let hv = hypervisor::new().unwrap(); 2530 let vm = hv.create_vm().unwrap(); 2531 let vcpu = vm.create_vcpu(0, None).unwrap(); 2532 let mut kvi: kvm_vcpu_init = kvm_vcpu_init::default(); 2533 vm.get_preferred_target(&mut kvi).unwrap(); 2534 2535 // Must fail when vcpu is not initialized yet. 2536 let mut state: Vec<kvm_one_reg> = Vec::new(); 2537 let res = vcpu.get_sys_regs(); 2538 assert!(res.is_err()); 2539 assert_eq!( 2540 format!("{}", res.as_ref().unwrap_err()), 2541 "Failed to retrieve list of registers: Exec format error (os error 8)" 2542 ); 2543 2544 state.push(kvm_one_reg { 2545 id: MPIDR_EL1, 2546 addr: 0x00, 2547 }); 2548 let res = vcpu.set_sys_regs(&state); 2549 assert!(res.is_err()); 2550 assert_eq!( 2551 format!("{}", res.unwrap_err()), 2552 "Failed to set system register: Exec format error (os error 8)" 2553 ); 2554 2555 vcpu.vcpu_init(&kvi).unwrap(); 2556 let res = vcpu.get_sys_regs(); 2557 assert!(res.is_ok()); 2558 state = res.unwrap(); 2559 2560 let initial_mpidr: u64 = vcpu.read_mpidr().expect("Fail to read mpidr"); 2561 assert!(state.contains(&kvm_one_reg { 2562 id: MPIDR_EL1, 2563 addr: initial_mpidr 2564 })); 2565 2566 assert!(vcpu.set_sys_regs(&state).is_ok()); 2567 let mpidr: u64 = vcpu.read_mpidr().expect("Fail to read mpidr"); 2568 assert_eq!(initial_mpidr, mpidr); 2569 } 2570 2571 #[test] 2572 fn test_get_set_mpstate() { 2573 let hv = hypervisor::new().unwrap(); 2574 let vm = hv.create_vm().unwrap(); 2575 let vcpu = vm.create_vcpu(0, None).unwrap(); 2576 let mut kvi: kvm_vcpu_init = kvm_vcpu_init::default(); 2577 vm.get_preferred_target(&mut kvi).unwrap(); 2578 2579 let res = vcpu.get_mp_state(); 2580 assert!(res.is_ok()); 2581 assert!(vcpu.set_mp_state(res.unwrap()).is_ok()); 2582 } 2583 } 2584