1 // Copyright © 2020, Oracle and/or its affiliates. 2 // 3 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 // SPDX-License-Identifier: Apache-2.0 5 // 6 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. 7 // Use of this source code is governed by a BSD-style license that can be 8 // found in the LICENSE-BSD-3-Clause file. 9 use std::sync::Arc; 10 pub mod interrupts; 11 pub mod layout; 12 mod mpspec; 13 mod mptable; 14 pub mod regs; 15 use std::collections::BTreeMap; 16 use std::mem; 17 18 use hypervisor::arch::x86::{CpuIdEntry, CPUID_FLAG_VALID_INDEX}; 19 use hypervisor::{CpuVendor, HypervisorCpuError, HypervisorError}; 20 use linux_loader::loader::bootparam::{boot_params, setup_header}; 21 use linux_loader::loader::elf::start_info::{ 22 hvm_memmap_table_entry, hvm_modlist_entry, hvm_start_info, 23 }; 24 use thiserror::Error; 25 use vm_memory::{ 26 Address, Bytes, GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryAtomic, 27 GuestMemoryRegion, GuestUsize, 28 }; 29 30 use crate::{GuestMemoryMmap, InitramfsConfig, RegionType}; 31 mod smbios; 32 use std::arch::x86_64; 33 #[cfg(feature = "tdx")] 34 pub mod tdx; 35 36 // CPUID feature bits 37 #[cfg(feature = "kvm")] 38 const TSC_DEADLINE_TIMER_ECX_BIT: u8 = 24; // tsc deadline timer ecx bit. 39 const HYPERVISOR_ECX_BIT: u8 = 31; // Hypervisor ecx bit. 40 const MTRR_EDX_BIT: u8 = 12; // Hypervisor ecx bit. 41 const INVARIANT_TSC_EDX_BIT: u8 = 8; // Invariant TSC bit on 0x8000_0007 EDX 42 const AMX_BF16: u8 = 22; // AMX tile computation on bfloat16 numbers 43 const AMX_TILE: u8 = 24; // AMX tile load/store instructions 44 const AMX_INT8: u8 = 25; // AMX tile computation on 8-bit integers 45 46 // KVM feature bits 47 #[cfg(feature = "tdx")] 48 const KVM_FEATURE_CLOCKSOURCE_BIT: u8 = 0; 49 #[cfg(feature = "tdx")] 50 const KVM_FEATURE_CLOCKSOURCE2_BIT: u8 = 3; 51 #[cfg(feature = "tdx")] 52 const KVM_FEATURE_CLOCKSOURCE_STABLE_BIT: u8 = 24; 53 #[cfg(feature = "tdx")] 54 const KVM_FEATURE_ASYNC_PF_BIT: u8 = 4; 55 #[cfg(feature = "tdx")] 56 const KVM_FEATURE_ASYNC_PF_VMEXIT_BIT: u8 = 10; 57 #[cfg(feature = "tdx")] 58 const KVM_FEATURE_STEAL_TIME_BIT: u8 = 5; 59 60 pub const _NSIG: i32 = 65; 61 62 #[derive(Debug, Copy, Clone)] 63 /// Specifies the entry point address where the guest must start 64 /// executing code, as well as which of the supported boot protocols 65 /// is to be used to configure the guest initial state. 66 pub struct EntryPoint { 67 /// Address in guest memory where the guest must start execution 68 pub entry_addr: GuestAddress, 69 /// This field is used for bzImage to fill the zero page 70 pub setup_header: Option<setup_header>, 71 } 72 73 const E820_RAM: u32 = 1; 74 const E820_RESERVED: u32 = 2; 75 76 #[derive(Clone)] 77 pub struct SgxEpcSection { 78 start: GuestAddress, 79 size: GuestUsize, 80 } 81 82 impl SgxEpcSection { 83 pub fn new(start: GuestAddress, size: GuestUsize) -> Self { 84 SgxEpcSection { start, size } 85 } 86 pub fn start(&self) -> GuestAddress { 87 self.start 88 } 89 pub fn size(&self) -> GuestUsize { 90 self.size 91 } 92 } 93 94 #[derive(Clone)] 95 pub struct SgxEpcRegion { 96 start: GuestAddress, 97 size: GuestUsize, 98 epc_sections: BTreeMap<String, SgxEpcSection>, 99 } 100 101 impl SgxEpcRegion { 102 pub fn new(start: GuestAddress, size: GuestUsize) -> Self { 103 SgxEpcRegion { 104 start, 105 size, 106 epc_sections: BTreeMap::new(), 107 } 108 } 109 pub fn start(&self) -> GuestAddress { 110 self.start 111 } 112 pub fn size(&self) -> GuestUsize { 113 self.size 114 } 115 pub fn epc_sections(&self) -> &BTreeMap<String, SgxEpcSection> { 116 &self.epc_sections 117 } 118 pub fn insert(&mut self, id: String, epc_section: SgxEpcSection) { 119 self.epc_sections.insert(id, epc_section); 120 } 121 } 122 123 pub struct CpuidConfig { 124 pub sgx_epc_sections: Option<Vec<SgxEpcSection>>, 125 pub phys_bits: u8, 126 pub kvm_hyperv: bool, 127 #[cfg(feature = "tdx")] 128 pub tdx: bool, 129 pub amx: bool, 130 } 131 132 #[derive(Debug, Error)] 133 pub enum Error { 134 /// Error writing MP table to memory. 135 #[error("Error writing MP table to memory: {0}")] 136 MpTableSetup(mptable::Error), 137 138 /// Error configuring the general purpose registers 139 #[error("Error configuring the general purpose registers: {0}")] 140 RegsConfiguration(regs::Error), 141 142 /// Error configuring the special registers 143 #[error("Error configuring the special registers: {0}")] 144 SregsConfiguration(regs::Error), 145 146 /// Error configuring the floating point related registers 147 #[error("Error configuring the floating point related registers: {0}")] 148 FpuConfiguration(regs::Error), 149 150 /// Error configuring the MSR registers 151 #[error("Error configuring the MSR registers: {0}")] 152 MsrsConfiguration(regs::Error), 153 154 /// Failed to set supported CPUs. 155 #[error("Failed to set supported CPUs: {0}")] 156 SetSupportedCpusFailed(anyhow::Error), 157 158 /// Cannot set the local interruption due to bad configuration. 159 #[error("Cannot set the local interruption due to bad configuration: {0}")] 160 LocalIntConfiguration(anyhow::Error), 161 162 /// Error setting up SMBIOS table 163 #[error("Error setting up SMBIOS table: {0}")] 164 SmbiosSetup(smbios::Error), 165 166 /// Could not find any SGX EPC section 167 #[error("Could not find any SGX EPC section")] 168 NoSgxEpcSection, 169 170 /// Missing SGX CPU feature 171 #[error("Missing SGX CPU feature")] 172 MissingSgxFeature, 173 174 /// Missing SGX_LC CPU feature 175 #[error("Missing SGX_LC CPU feature")] 176 MissingSgxLaunchControlFeature, 177 178 /// Error getting supported CPUID through the hypervisor (kvm/mshv) API 179 #[error("Error getting supported CPUID through the hypervisor API: {0}")] 180 CpuidGetSupported(HypervisorError), 181 182 /// Error populating CPUID with KVM HyperV emulation details 183 #[error("Error populating CPUID with KVM HyperV emulation details: {0}")] 184 CpuidKvmHyperV(vmm_sys_util::fam::Error), 185 186 /// Error populating CPUID with CPU identification 187 #[error("Error populating CPUID with CPU identification: {0}")] 188 CpuidIdentification(vmm_sys_util::fam::Error), 189 190 /// Error checking CPUID compatibility 191 #[error("Error checking CPUID compatibility")] 192 CpuidCheckCompatibility, 193 194 // Error writing EBDA address 195 #[error("Error writing EBDA address: {0}")] 196 EbdaSetup(vm_memory::GuestMemoryError), 197 198 // Error getting CPU TSC frequency 199 #[error("Error getting CPU TSC frequency: {0}")] 200 GetTscFrequency(HypervisorCpuError), 201 202 /// Error retrieving TDX capabilities through the hypervisor (kvm/mshv) API 203 #[cfg(feature = "tdx")] 204 #[error("Error retrieving TDX capabilities through the hypervisor API: {0}")] 205 TdxCapabilities(HypervisorError), 206 207 /// Failed to configure E820 map for bzImage 208 #[error("Failed to configure E820 map for bzImage")] 209 E820Configuration, 210 } 211 212 impl From<Error> for super::Error { 213 fn from(e: Error) -> super::Error { 214 super::Error::PlatformSpecific(e) 215 } 216 } 217 218 pub fn get_x2apic_id(cpu_id: u32, topology: Option<(u8, u8, u8)>) -> u32 { 219 if let Some(t) = topology { 220 let thread_mask_width = u8::BITS - (t.0 - 1).leading_zeros(); 221 let core_mask_width = u8::BITS - (t.1 - 1).leading_zeros(); 222 let die_mask_width = u8::BITS - (t.2 - 1).leading_zeros(); 223 224 let thread_id = cpu_id % (t.0 as u32); 225 let core_id = cpu_id / (t.0 as u32) % (t.1 as u32); 226 let die_id = cpu_id / ((t.0 * t.1) as u32) % (t.2 as u32); 227 let socket_id = cpu_id / ((t.0 * t.1 * t.2) as u32); 228 229 return thread_id 230 | (core_id << thread_mask_width) 231 | (die_id << (thread_mask_width + core_mask_width)) 232 | (socket_id << (thread_mask_width + core_mask_width + die_mask_width)); 233 } 234 235 cpu_id 236 } 237 238 #[derive(Copy, Clone, Debug)] 239 pub enum CpuidReg { 240 EAX, 241 EBX, 242 ECX, 243 EDX, 244 } 245 246 pub struct CpuidPatch { 247 pub function: u32, 248 pub index: u32, 249 pub flags_bit: Option<u8>, 250 pub eax_bit: Option<u8>, 251 pub ebx_bit: Option<u8>, 252 pub ecx_bit: Option<u8>, 253 pub edx_bit: Option<u8>, 254 } 255 256 impl CpuidPatch { 257 pub fn get_cpuid_reg( 258 cpuid: &[CpuIdEntry], 259 function: u32, 260 index: Option<u32>, 261 reg: CpuidReg, 262 ) -> Option<u32> { 263 for entry in cpuid.iter() { 264 if entry.function == function && (index.is_none() || index.unwrap() == entry.index) { 265 return match reg { 266 CpuidReg::EAX => Some(entry.eax), 267 CpuidReg::EBX => Some(entry.ebx), 268 CpuidReg::ECX => Some(entry.ecx), 269 CpuidReg::EDX => Some(entry.edx), 270 }; 271 } 272 } 273 274 None 275 } 276 277 pub fn set_cpuid_reg( 278 cpuid: &mut Vec<CpuIdEntry>, 279 function: u32, 280 index: Option<u32>, 281 reg: CpuidReg, 282 value: u32, 283 ) { 284 let mut entry_found = false; 285 for entry in cpuid.iter_mut() { 286 if entry.function == function && (index.is_none() || index.unwrap() == entry.index) { 287 entry_found = true; 288 match reg { 289 CpuidReg::EAX => { 290 entry.eax = value; 291 } 292 CpuidReg::EBX => { 293 entry.ebx = value; 294 } 295 CpuidReg::ECX => { 296 entry.ecx = value; 297 } 298 CpuidReg::EDX => { 299 entry.edx = value; 300 } 301 } 302 } 303 } 304 305 if entry_found { 306 return; 307 } 308 309 // Entry not found, so let's add it. 310 if let Some(index) = index { 311 let mut entry = CpuIdEntry { 312 function, 313 index, 314 flags: CPUID_FLAG_VALID_INDEX, 315 ..Default::default() 316 }; 317 match reg { 318 CpuidReg::EAX => { 319 entry.eax = value; 320 } 321 CpuidReg::EBX => { 322 entry.ebx = value; 323 } 324 CpuidReg::ECX => { 325 entry.ecx = value; 326 } 327 CpuidReg::EDX => { 328 entry.edx = value; 329 } 330 } 331 332 cpuid.push(entry); 333 } 334 } 335 336 pub fn patch_cpuid(cpuid: &mut [CpuIdEntry], patches: Vec<CpuidPatch>) { 337 for entry in cpuid { 338 for patch in patches.iter() { 339 if entry.function == patch.function && entry.index == patch.index { 340 if let Some(flags_bit) = patch.flags_bit { 341 entry.flags |= 1 << flags_bit; 342 } 343 if let Some(eax_bit) = patch.eax_bit { 344 entry.eax |= 1 << eax_bit; 345 } 346 if let Some(ebx_bit) = patch.ebx_bit { 347 entry.ebx |= 1 << ebx_bit; 348 } 349 if let Some(ecx_bit) = patch.ecx_bit { 350 entry.ecx |= 1 << ecx_bit; 351 } 352 if let Some(edx_bit) = patch.edx_bit { 353 entry.edx |= 1 << edx_bit; 354 } 355 } 356 } 357 } 358 } 359 360 pub fn is_feature_enabled( 361 cpuid: &[CpuIdEntry], 362 function: u32, 363 index: u32, 364 reg: CpuidReg, 365 feature_bit: usize, 366 ) -> bool { 367 let mask = 1 << feature_bit; 368 369 for entry in cpuid { 370 if entry.function == function && entry.index == index { 371 let reg_val = match reg { 372 CpuidReg::EAX => entry.eax, 373 CpuidReg::EBX => entry.ebx, 374 CpuidReg::ECX => entry.ecx, 375 CpuidReg::EDX => entry.edx, 376 }; 377 378 return (reg_val & mask) == mask; 379 } 380 } 381 382 false 383 } 384 } 385 386 #[derive(Debug)] 387 enum CpuidCompatibleCheck { 388 BitwiseSubset, // bitwise subset 389 Equal, // equal in value 390 NumNotGreater, // smaller or equal as a number 391 } 392 393 pub struct CpuidFeatureEntry { 394 function: u32, 395 index: u32, 396 feature_reg: CpuidReg, 397 compatible_check: CpuidCompatibleCheck, 398 } 399 400 impl CpuidFeatureEntry { 401 fn checked_feature_entry_list() -> Vec<CpuidFeatureEntry> { 402 vec![ 403 // The following list includes all hardware features bits from 404 // the CPUID Wiki Page: https://en.wikipedia.org/wiki/CPUID 405 // Leaf 0x1, ECX/EDX, feature bits 406 CpuidFeatureEntry { 407 function: 1, 408 index: 0, 409 feature_reg: CpuidReg::ECX, 410 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 411 }, 412 CpuidFeatureEntry { 413 function: 1, 414 index: 0, 415 feature_reg: CpuidReg::EDX, 416 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 417 }, 418 // Leaf 0x7, EAX/EBX/ECX/EDX, extended features 419 CpuidFeatureEntry { 420 function: 7, 421 index: 0, 422 feature_reg: CpuidReg::EAX, 423 compatible_check: CpuidCompatibleCheck::NumNotGreater, 424 }, 425 CpuidFeatureEntry { 426 function: 7, 427 index: 0, 428 feature_reg: CpuidReg::EBX, 429 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 430 }, 431 CpuidFeatureEntry { 432 function: 7, 433 index: 0, 434 feature_reg: CpuidReg::ECX, 435 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 436 }, 437 CpuidFeatureEntry { 438 function: 7, 439 index: 0, 440 feature_reg: CpuidReg::EDX, 441 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 442 }, 443 // Leaf 0x7 subleaf 0x1, EAX, extended features 444 CpuidFeatureEntry { 445 function: 7, 446 index: 1, 447 feature_reg: CpuidReg::EAX, 448 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 449 }, 450 // Leaf 0x8000_0001, ECX/EDX, CPUID features bits 451 CpuidFeatureEntry { 452 function: 0x8000_0001, 453 index: 0, 454 feature_reg: CpuidReg::ECX, 455 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 456 }, 457 CpuidFeatureEntry { 458 function: 0x8000_0001, 459 index: 0, 460 feature_reg: CpuidReg::EDX, 461 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 462 }, 463 // KVM CPUID bits: https://www.kernel.org/doc/html/latest/virt/kvm/cpuid.html 464 // Leaf 0x4000_0000, EAX/EBX/ECX/EDX, KVM CPUID SIGNATURE 465 CpuidFeatureEntry { 466 function: 0x4000_0000, 467 index: 0, 468 feature_reg: CpuidReg::EAX, 469 compatible_check: CpuidCompatibleCheck::NumNotGreater, 470 }, 471 CpuidFeatureEntry { 472 function: 0x4000_0000, 473 index: 0, 474 feature_reg: CpuidReg::EBX, 475 compatible_check: CpuidCompatibleCheck::Equal, 476 }, 477 CpuidFeatureEntry { 478 function: 0x4000_0000, 479 index: 0, 480 feature_reg: CpuidReg::ECX, 481 compatible_check: CpuidCompatibleCheck::Equal, 482 }, 483 CpuidFeatureEntry { 484 function: 0x4000_0000, 485 index: 0, 486 feature_reg: CpuidReg::EDX, 487 compatible_check: CpuidCompatibleCheck::Equal, 488 }, 489 // Leaf 0x4000_0001, EAX/EBX/ECX/EDX, KVM CPUID features 490 CpuidFeatureEntry { 491 function: 0x4000_0001, 492 index: 0, 493 feature_reg: CpuidReg::EAX, 494 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 495 }, 496 CpuidFeatureEntry { 497 function: 0x4000_0001, 498 index: 0, 499 feature_reg: CpuidReg::EBX, 500 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 501 }, 502 CpuidFeatureEntry { 503 function: 0x4000_0001, 504 index: 0, 505 feature_reg: CpuidReg::ECX, 506 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 507 }, 508 CpuidFeatureEntry { 509 function: 0x4000_0001, 510 index: 0, 511 feature_reg: CpuidReg::EDX, 512 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 513 }, 514 ] 515 } 516 517 fn get_features_from_cpuid( 518 cpuid: &[CpuIdEntry], 519 feature_entry_list: &[CpuidFeatureEntry], 520 ) -> Vec<u32> { 521 let mut features = vec![0; feature_entry_list.len()]; 522 for (i, feature_entry) in feature_entry_list.iter().enumerate() { 523 for cpuid_entry in cpuid { 524 if cpuid_entry.function == feature_entry.function 525 && cpuid_entry.index == feature_entry.index 526 { 527 match feature_entry.feature_reg { 528 CpuidReg::EAX => { 529 features[i] = cpuid_entry.eax; 530 } 531 CpuidReg::EBX => { 532 features[i] = cpuid_entry.ebx; 533 } 534 CpuidReg::ECX => { 535 features[i] = cpuid_entry.ecx; 536 } 537 CpuidReg::EDX => { 538 features[i] = cpuid_entry.edx; 539 } 540 } 541 542 break; 543 } 544 } 545 } 546 547 features 548 } 549 550 // The function returns `Error` (a.k.a. "incompatible"), when the CPUID features from `src_vm_cpuid` 551 // is not a subset of those of the `dest_vm_cpuid`. 552 pub fn check_cpuid_compatibility( 553 src_vm_cpuid: &[CpuIdEntry], 554 dest_vm_cpuid: &[CpuIdEntry], 555 ) -> Result<(), Error> { 556 let feature_entry_list = &Self::checked_feature_entry_list(); 557 let src_vm_features = Self::get_features_from_cpuid(src_vm_cpuid, feature_entry_list); 558 let dest_vm_features = Self::get_features_from_cpuid(dest_vm_cpuid, feature_entry_list); 559 560 // Loop on feature bit and check if the 'source vm' feature is a subset 561 // of those of the 'destination vm' feature 562 let mut compatible = true; 563 for (i, (src_vm_feature, dest_vm_feature)) in src_vm_features 564 .iter() 565 .zip(dest_vm_features.iter()) 566 .enumerate() 567 { 568 let entry = &feature_entry_list[i]; 569 let entry_compatible = match entry.compatible_check { 570 CpuidCompatibleCheck::BitwiseSubset => { 571 let different_feature_bits = src_vm_feature ^ dest_vm_feature; 572 let src_vm_feature_bits_only = different_feature_bits & src_vm_feature; 573 src_vm_feature_bits_only == 0 574 } 575 CpuidCompatibleCheck::Equal => src_vm_feature == dest_vm_feature, 576 CpuidCompatibleCheck::NumNotGreater => src_vm_feature <= dest_vm_feature, 577 }; 578 if !entry_compatible { 579 error!( 580 "Detected incompatible CPUID entry: leaf={:#02x} (subleaf={:#02x}), register='{:?}', \ 581 compatible_check='{:?}', source VM feature='{:#04x}', destination VM feature'{:#04x}'.", 582 entry.function, entry.index, entry.feature_reg, 583 entry.compatible_check, src_vm_feature, dest_vm_feature 584 ); 585 586 compatible = false; 587 } 588 } 589 590 if compatible { 591 info!("No CPU incompatibility detected."); 592 Ok(()) 593 } else { 594 Err(Error::CpuidCheckCompatibility) 595 } 596 } 597 } 598 599 pub fn generate_common_cpuid( 600 hypervisor: &Arc<dyn hypervisor::Hypervisor>, 601 config: &CpuidConfig, 602 ) -> super::Result<Vec<CpuIdEntry>> { 603 // SAFETY: cpuid called with valid leaves 604 if unsafe { x86_64::__cpuid(1) }.ecx & 1 << HYPERVISOR_ECX_BIT == 1 << HYPERVISOR_ECX_BIT { 605 // SAFETY: cpuid called with valid leaves 606 let hypervisor_cpuid = unsafe { x86_64::__cpuid(0x4000_0000) }; 607 608 let mut identifier: [u8; 12] = [0; 12]; 609 identifier[0..4].copy_from_slice(&hypervisor_cpuid.ebx.to_le_bytes()[..]); 610 identifier[4..8].copy_from_slice(&hypervisor_cpuid.ecx.to_le_bytes()[..]); 611 identifier[8..12].copy_from_slice(&hypervisor_cpuid.edx.to_le_bytes()[..]); 612 613 info!( 614 "Running under nested virtualisation. Hypervisor string: {}", 615 String::from_utf8_lossy(&identifier) 616 ); 617 } 618 619 info!( 620 "Generating guest CPUID for with physical address size: {}", 621 config.phys_bits 622 ); 623 #[allow(unused_mut)] 624 let mut cpuid_patches = vec![ 625 // Patch hypervisor bit 626 CpuidPatch { 627 function: 1, 628 index: 0, 629 flags_bit: None, 630 eax_bit: None, 631 ebx_bit: None, 632 ecx_bit: Some(HYPERVISOR_ECX_BIT), 633 edx_bit: None, 634 }, 635 // Enable MTRR feature 636 CpuidPatch { 637 function: 1, 638 index: 0, 639 flags_bit: None, 640 eax_bit: None, 641 ebx_bit: None, 642 ecx_bit: None, 643 edx_bit: Some(MTRR_EDX_BIT), 644 }, 645 ]; 646 647 #[cfg(feature = "kvm")] 648 if matches!( 649 hypervisor.hypervisor_type(), 650 hypervisor::HypervisorType::Kvm 651 ) { 652 // Patch tsc deadline timer bit 653 cpuid_patches.push(CpuidPatch { 654 function: 1, 655 index: 0, 656 flags_bit: None, 657 eax_bit: None, 658 ebx_bit: None, 659 ecx_bit: Some(TSC_DEADLINE_TIMER_ECX_BIT), 660 edx_bit: None, 661 }); 662 } 663 664 // Supported CPUID 665 let mut cpuid = hypervisor 666 .get_supported_cpuid() 667 .map_err(Error::CpuidGetSupported)?; 668 669 CpuidPatch::patch_cpuid(&mut cpuid, cpuid_patches); 670 671 if let Some(sgx_epc_sections) = &config.sgx_epc_sections { 672 update_cpuid_sgx(&mut cpuid, sgx_epc_sections)?; 673 } 674 675 #[cfg(feature = "tdx")] 676 let tdx_capabilities = if config.tdx { 677 let caps = hypervisor 678 .tdx_capabilities() 679 .map_err(Error::TdxCapabilities)?; 680 info!("TDX capabilities {:#?}", caps); 681 Some(caps) 682 } else { 683 None 684 }; 685 686 // Update some existing CPUID 687 for entry in cpuid.as_mut_slice().iter_mut() { 688 match entry.function { 689 // Clear AMX related bits if the AMX feature is not enabled 690 0x7 => { 691 if !config.amx && entry.index == 0 { 692 entry.edx &= !(1 << AMX_BF16 | 1 << AMX_TILE | 1 << AMX_INT8) 693 } 694 } 695 0xd => 696 { 697 #[cfg(feature = "tdx")] 698 if let Some(caps) = &tdx_capabilities { 699 let xcr0_mask: u64 = 0x82ff; 700 let xss_mask: u64 = !xcr0_mask; 701 if entry.index == 0 { 702 entry.eax &= (caps.xfam_fixed0 as u32) & (xcr0_mask as u32); 703 entry.eax |= (caps.xfam_fixed1 as u32) & (xcr0_mask as u32); 704 entry.edx &= ((caps.xfam_fixed0 & xcr0_mask) >> 32) as u32; 705 entry.edx |= ((caps.xfam_fixed1 & xcr0_mask) >> 32) as u32; 706 } else if entry.index == 1 { 707 entry.ecx &= (caps.xfam_fixed0 as u32) & (xss_mask as u32); 708 entry.ecx |= (caps.xfam_fixed1 as u32) & (xss_mask as u32); 709 entry.edx &= ((caps.xfam_fixed0 & xss_mask) >> 32) as u32; 710 entry.edx |= ((caps.xfam_fixed1 & xss_mask) >> 32) as u32; 711 } 712 } 713 } 714 // Copy host L1 cache details if not populated by KVM 715 0x8000_0005 => { 716 if entry.eax == 0 && entry.ebx == 0 && entry.ecx == 0 && entry.edx == 0 { 717 // SAFETY: cpuid called with valid leaves 718 if unsafe { std::arch::x86_64::__cpuid(0x8000_0000).eax } >= 0x8000_0005 { 719 // SAFETY: cpuid called with valid leaves 720 let leaf = unsafe { std::arch::x86_64::__cpuid(0x8000_0005) }; 721 entry.eax = leaf.eax; 722 entry.ebx = leaf.ebx; 723 entry.ecx = leaf.ecx; 724 entry.edx = leaf.edx; 725 } 726 } 727 } 728 // Copy host L2 cache details if not populated by KVM 729 0x8000_0006 => { 730 if entry.eax == 0 && entry.ebx == 0 && entry.ecx == 0 && entry.edx == 0 { 731 // SAFETY: cpuid called with valid leaves 732 if unsafe { std::arch::x86_64::__cpuid(0x8000_0000).eax } >= 0x8000_0006 { 733 // SAFETY: cpuid called with valid leaves 734 let leaf = unsafe { std::arch::x86_64::__cpuid(0x8000_0006) }; 735 entry.eax = leaf.eax; 736 entry.ebx = leaf.ebx; 737 entry.ecx = leaf.ecx; 738 entry.edx = leaf.edx; 739 } 740 } 741 } 742 // Set CPU physical bits 743 0x8000_0008 => { 744 entry.eax = (entry.eax & 0xffff_ff00) | (config.phys_bits as u32 & 0xff); 745 } 746 0x4000_0001 => { 747 // These features are not supported by TDX 748 #[cfg(feature = "tdx")] 749 if config.tdx { 750 entry.eax &= !(1 << KVM_FEATURE_CLOCKSOURCE_BIT 751 | 1 << KVM_FEATURE_CLOCKSOURCE2_BIT 752 | 1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT 753 | 1 << KVM_FEATURE_ASYNC_PF_BIT 754 | 1 << KVM_FEATURE_ASYNC_PF_VMEXIT_BIT 755 | 1 << KVM_FEATURE_STEAL_TIME_BIT) 756 } 757 } 758 _ => {} 759 } 760 } 761 762 // Copy CPU identification string 763 for i in 0x8000_0002..=0x8000_0004 { 764 cpuid.retain(|c| c.function != i); 765 // SAFETY: call cpuid with valid leaves 766 let leaf = unsafe { std::arch::x86_64::__cpuid(i) }; 767 cpuid.push(CpuIdEntry { 768 function: i, 769 eax: leaf.eax, 770 ebx: leaf.ebx, 771 ecx: leaf.ecx, 772 edx: leaf.edx, 773 ..Default::default() 774 }); 775 } 776 777 if config.kvm_hyperv { 778 // Remove conflicting entries 779 cpuid.retain(|c| c.function != 0x4000_0000); 780 cpuid.retain(|c| c.function != 0x4000_0001); 781 // See "Hypervisor Top Level Functional Specification" for details 782 // Compliance with "Hv#1" requires leaves up to 0x4000_000a 783 cpuid.push(CpuIdEntry { 784 function: 0x40000000, 785 eax: 0x4000000a, // Maximum cpuid leaf 786 ebx: 0x756e694c, // "Linu" 787 ecx: 0x564b2078, // "x KV" 788 edx: 0x7648204d, // "M Hv" 789 ..Default::default() 790 }); 791 cpuid.push(CpuIdEntry { 792 function: 0x40000001, 793 eax: 0x31237648, // "Hv#1" 794 ..Default::default() 795 }); 796 cpuid.push(CpuIdEntry { 797 function: 0x40000002, 798 eax: 0x3839, // "Build number" 799 ebx: 0xa0000, // "Version" 800 ..Default::default() 801 }); 802 cpuid.push(CpuIdEntry { 803 function: 0x4000_0003, 804 eax: 1 << 1 // AccessPartitionReferenceCounter 805 | 1 << 2 // AccessSynicRegs 806 | 1 << 3 // AccessSyntheticTimerRegs 807 | 1 << 9, // AccessPartitionReferenceTsc 808 edx: 1 << 3, // CPU dynamic partitioning 809 ..Default::default() 810 }); 811 cpuid.push(CpuIdEntry { 812 function: 0x4000_0004, 813 eax: 1 << 5, // Recommend relaxed timing 814 ..Default::default() 815 }); 816 for i in 0x4000_0005..=0x4000_000a { 817 cpuid.push(CpuIdEntry { 818 function: i, 819 ..Default::default() 820 }); 821 } 822 } 823 824 Ok(cpuid) 825 } 826 827 pub fn configure_vcpu( 828 vcpu: &Arc<dyn hypervisor::Vcpu>, 829 id: u8, 830 boot_setup: Option<(EntryPoint, &GuestMemoryAtomic<GuestMemoryMmap>)>, 831 cpuid: Vec<CpuIdEntry>, 832 kvm_hyperv: bool, 833 cpu_vendor: CpuVendor, 834 topology: Option<(u8, u8, u8)>, 835 ) -> super::Result<()> { 836 let x2apic_id = get_x2apic_id(id as u32, topology); 837 838 // Per vCPU CPUID changes; common are handled via generate_common_cpuid() 839 let mut cpuid = cpuid; 840 CpuidPatch::set_cpuid_reg(&mut cpuid, 0xb, None, CpuidReg::EDX, x2apic_id); 841 CpuidPatch::set_cpuid_reg(&mut cpuid, 0x1f, None, CpuidReg::EDX, x2apic_id); 842 if matches!(cpu_vendor, CpuVendor::AMD) { 843 CpuidPatch::set_cpuid_reg(&mut cpuid, 0x8000_001e, Some(0), CpuidReg::EAX, x2apic_id); 844 } 845 846 // Set ApicId in cpuid for each vcpu - found in cpuid ebx when eax = 1 847 let mut apic_id_patched = false; 848 for entry in &mut cpuid { 849 if entry.function == 1 { 850 entry.ebx &= 0xffffff; 851 entry.ebx |= x2apic_id << 24; 852 apic_id_patched = true; 853 break; 854 } 855 } 856 assert!(apic_id_patched); 857 858 if let Some(t) = topology { 859 update_cpuid_topology(&mut cpuid, t.0, t.1, t.2, cpu_vendor, id); 860 } 861 862 // The TSC frequency CPUID leaf should not be included when running with HyperV emulation 863 if !kvm_hyperv { 864 if let Some(tsc_khz) = vcpu.tsc_khz().map_err(Error::GetTscFrequency)? { 865 // Need to check that the TSC doesn't vary with dynamic frequency 866 // SAFETY: cpuid called with valid leaves 867 if unsafe { std::arch::x86_64::__cpuid(0x8000_0007) }.edx 868 & (1u32 << INVARIANT_TSC_EDX_BIT) 869 > 0 870 { 871 CpuidPatch::set_cpuid_reg( 872 &mut cpuid, 873 0x4000_0000, 874 None, 875 CpuidReg::EAX, 876 0x4000_0010, 877 ); 878 cpuid.retain(|c| c.function != 0x4000_0010); 879 cpuid.push(CpuIdEntry { 880 function: 0x4000_0010, 881 eax: tsc_khz, 882 ebx: 1000000, /* LAPIC resolution of 1ns (freq: 1GHz) is hardcoded in KVM's 883 * APIC_BUS_CYCLE_NS */ 884 ..Default::default() 885 }); 886 }; 887 } 888 } 889 890 vcpu.set_cpuid2(&cpuid) 891 .map_err(|e| Error::SetSupportedCpusFailed(e.into()))?; 892 893 if kvm_hyperv { 894 vcpu.enable_hyperv_synic().unwrap(); 895 } 896 897 regs::setup_msrs(vcpu).map_err(Error::MsrsConfiguration)?; 898 if let Some((kernel_entry_point, guest_memory)) = boot_setup { 899 regs::setup_regs(vcpu, kernel_entry_point).map_err(Error::RegsConfiguration)?; 900 regs::setup_fpu(vcpu).map_err(Error::FpuConfiguration)?; 901 regs::setup_sregs(&guest_memory.memory(), vcpu).map_err(Error::SregsConfiguration)?; 902 } 903 interrupts::set_lint(vcpu).map_err(|e| Error::LocalIntConfiguration(e.into()))?; 904 Ok(()) 905 } 906 907 /// Returns a Vec of the valid memory addresses. 908 /// 909 /// These should be used to configure the GuestMemory structure for the platform. 910 /// For x86_64 all addresses are valid from the start of the kernel except a 911 /// carve out at the end of 32bit address space. 912 pub fn arch_memory_regions() -> Vec<(GuestAddress, usize, RegionType)> { 913 vec![ 914 // 0 GiB ~ 3GiB: memory before the gap 915 ( 916 GuestAddress(0), 917 layout::MEM_32BIT_RESERVED_START.raw_value() as usize, 918 RegionType::Ram, 919 ), 920 // 4 GiB ~ inf: memory after the gap 921 (layout::RAM_64BIT_START, usize::MAX, RegionType::Ram), 922 // 3 GiB ~ 3712 MiB: 32-bit device memory hole 923 ( 924 layout::MEM_32BIT_RESERVED_START, 925 layout::MEM_32BIT_DEVICES_SIZE as usize, 926 RegionType::SubRegion, 927 ), 928 // 3712 MiB ~ 3968 MiB: 32-bit reserved memory hole 929 ( 930 layout::MEM_32BIT_RESERVED_START.unchecked_add(layout::MEM_32BIT_DEVICES_SIZE), 931 (layout::MEM_32BIT_RESERVED_SIZE - layout::MEM_32BIT_DEVICES_SIZE) as usize, 932 RegionType::Reserved, 933 ), 934 ] 935 } 936 937 /// Configures the system and should be called once per vm before starting vcpu threads. 938 /// 939 /// # Arguments 940 /// 941 /// * `guest_mem` - The memory to be used by the guest. 942 /// * `cmdline_addr` - Address in `guest_mem` where the kernel command line was loaded. 943 /// * `cmdline_size` - Size of the kernel command line in bytes including the null terminator. 944 /// * `num_cpus` - Number of virtual CPUs the guest will have. 945 #[allow(clippy::too_many_arguments)] 946 pub fn configure_system( 947 guest_mem: &GuestMemoryMmap, 948 cmdline_addr: GuestAddress, 949 cmdline_size: usize, 950 initramfs: &Option<InitramfsConfig>, 951 _num_cpus: u8, 952 setup_header: Option<setup_header>, 953 rsdp_addr: Option<GuestAddress>, 954 sgx_epc_region: Option<SgxEpcRegion>, 955 serial_number: Option<&str>, 956 uuid: Option<&str>, 957 oem_strings: Option<&[&str]>, 958 topology: Option<(u8, u8, u8)>, 959 ) -> super::Result<()> { 960 // Write EBDA address to location where ACPICA expects to find it 961 guest_mem 962 .write_obj((layout::EBDA_START.0 >> 4) as u16, layout::EBDA_POINTER) 963 .map_err(Error::EbdaSetup)?; 964 965 let size = smbios::setup_smbios(guest_mem, serial_number, uuid, oem_strings) 966 .map_err(Error::SmbiosSetup)?; 967 968 // Place the MP table after the SMIOS table aligned to 16 bytes 969 let offset = GuestAddress(layout::SMBIOS_START).unchecked_add(size); 970 let offset = GuestAddress((offset.0 + 16) & !0xf); 971 mptable::setup_mptable(offset, guest_mem, _num_cpus, topology).map_err(Error::MpTableSetup)?; 972 973 // Check that the RAM is not smaller than the RSDP start address 974 if let Some(rsdp_addr) = rsdp_addr { 975 if rsdp_addr.0 > guest_mem.last_addr().0 { 976 return Err(super::Error::RsdpPastRamEnd); 977 } 978 } 979 980 match setup_header { 981 Some(hdr) => configure_32bit_entry( 982 guest_mem, 983 cmdline_addr, 984 cmdline_size, 985 initramfs, 986 hdr, 987 rsdp_addr, 988 sgx_epc_region, 989 ), 990 None => configure_pvh( 991 guest_mem, 992 cmdline_addr, 993 initramfs, 994 rsdp_addr, 995 sgx_epc_region, 996 ), 997 } 998 } 999 1000 type RamRange = (u64, u64); 1001 1002 /// Returns usable physical memory ranges for the guest 1003 /// These should be used to create e820_RAM memory maps 1004 pub fn generate_ram_ranges(guest_mem: &GuestMemoryMmap) -> super::Result<Vec<RamRange>> { 1005 // Merge continuous memory regions into one region. 1006 // Note: memory regions from "GuestMemory" are sorted and non-zero sized. 1007 let ram_regions = { 1008 let mut ram_regions = Vec::new(); 1009 let mut current_start = guest_mem 1010 .iter() 1011 .next() 1012 .map(GuestMemoryRegion::start_addr) 1013 .expect("GuestMemory must have one memory region at least") 1014 .raw_value(); 1015 let mut current_end = current_start; 1016 1017 for (start, size) in guest_mem 1018 .iter() 1019 .map(|m| (m.start_addr().raw_value(), m.len())) 1020 { 1021 if current_end == start { 1022 // This zone is continuous with the previous one. 1023 current_end += size; 1024 } else { 1025 ram_regions.push((current_start, current_end)); 1026 1027 current_start = start; 1028 current_end = start + size; 1029 } 1030 } 1031 1032 ram_regions.push((current_start, current_end)); 1033 1034 ram_regions 1035 }; 1036 1037 // Create the memory map entry for memory region before the gap 1038 let mut ram_ranges = vec![]; 1039 1040 // Generate the first usable physical memory range before the gap. The e820 map 1041 // should only report memory above 1MiB. 1042 let first_ram_range = { 1043 let (first_region_start, first_region_end) = 1044 ram_regions.first().ok_or(super::Error::MemmapTableSetup)?; 1045 let high_ram_start = layout::HIGH_RAM_START.raw_value(); 1046 let mem_32bit_reserved_start = layout::MEM_32BIT_RESERVED_START.raw_value(); 1047 1048 if !((first_region_start <= &high_ram_start) 1049 && (first_region_end > &high_ram_start) 1050 && (first_region_end <= &mem_32bit_reserved_start)) 1051 { 1052 error!( 1053 "Unexpected first memory region layout: (start: 0x{:08x}, end: 0x{:08x}). 1054 high_ram_start: 0x{:08x}, mem_32bit_reserved_start: 0x{:08x}", 1055 first_region_start, first_region_end, high_ram_start, mem_32bit_reserved_start 1056 ); 1057 1058 return Err(super::Error::MemmapTableSetup); 1059 } 1060 1061 info!( 1062 "first usable physical memory range, start: 0x{:08x}, end: 0x{:08x}", 1063 high_ram_start, first_region_end 1064 ); 1065 1066 (high_ram_start, *first_region_end) 1067 }; 1068 ram_ranges.push(first_ram_range); 1069 1070 // Generate additional usable physical memory range after the gap if any. 1071 for ram_region in ram_regions.iter().skip(1) { 1072 info!( 1073 "found usable physical memory range, start: 0x{:08x}, end: 0x{:08x}", 1074 ram_region.0, ram_region.1 1075 ); 1076 1077 ram_ranges.push(*ram_region); 1078 } 1079 1080 Ok(ram_ranges) 1081 } 1082 1083 fn configure_pvh( 1084 guest_mem: &GuestMemoryMmap, 1085 cmdline_addr: GuestAddress, 1086 initramfs: &Option<InitramfsConfig>, 1087 rsdp_addr: Option<GuestAddress>, 1088 sgx_epc_region: Option<SgxEpcRegion>, 1089 ) -> super::Result<()> { 1090 const XEN_HVM_START_MAGIC_VALUE: u32 = 0x336ec578; 1091 1092 let mut start_info = hvm_start_info { 1093 magic: XEN_HVM_START_MAGIC_VALUE, 1094 version: 1, // pvh has version 1 1095 nr_modules: 0, 1096 cmdline_paddr: cmdline_addr.raw_value(), 1097 memmap_paddr: layout::MEMMAP_START.raw_value(), 1098 ..Default::default() 1099 }; 1100 1101 if let Some(rsdp_addr) = rsdp_addr { 1102 start_info.rsdp_paddr = rsdp_addr.0; 1103 } 1104 1105 if let Some(initramfs_config) = initramfs { 1106 // The initramfs has been written to guest memory already, here we just need to 1107 // create the module structure that describes it. 1108 let ramdisk_mod = hvm_modlist_entry { 1109 paddr: initramfs_config.address.raw_value(), 1110 size: initramfs_config.size as u64, 1111 ..Default::default() 1112 }; 1113 1114 start_info.nr_modules += 1; 1115 start_info.modlist_paddr = layout::MODLIST_START.raw_value(); 1116 1117 // Write the modlist struct to guest memory. 1118 guest_mem 1119 .write_obj(ramdisk_mod, layout::MODLIST_START) 1120 .map_err(super::Error::ModlistSetup)?; 1121 } 1122 1123 // Vector to hold the memory maps which needs to be written to guest memory 1124 // at MEMMAP_START after all of the mappings are recorded. 1125 let mut memmap: Vec<hvm_memmap_table_entry> = Vec::new(); 1126 1127 // Create the memory map entries. 1128 add_memmap_entry(&mut memmap, 0, layout::EBDA_START.raw_value(), E820_RAM); 1129 1130 // Get usable physical memory ranges 1131 let ram_ranges = generate_ram_ranges(guest_mem)?; 1132 1133 // Create e820 memory map entries 1134 for ram_range in ram_ranges { 1135 info!( 1136 "create_memmap_entry, start: 0x{:08x}, end: 0x{:08x}", 1137 ram_range.0, ram_range.1 1138 ); 1139 add_memmap_entry( 1140 &mut memmap, 1141 ram_range.0, 1142 ram_range.1 - ram_range.0, 1143 E820_RAM, 1144 ); 1145 } 1146 1147 add_memmap_entry( 1148 &mut memmap, 1149 layout::PCI_MMCONFIG_START.0, 1150 layout::PCI_MMCONFIG_SIZE, 1151 E820_RESERVED, 1152 ); 1153 1154 if let Some(sgx_epc_region) = sgx_epc_region { 1155 add_memmap_entry( 1156 &mut memmap, 1157 sgx_epc_region.start().raw_value(), 1158 sgx_epc_region.size(), 1159 E820_RESERVED, 1160 ); 1161 } 1162 1163 start_info.memmap_entries = memmap.len() as u32; 1164 1165 // Copy the vector with the memmap table to the MEMMAP_START address 1166 // which is already saved in the memmap_paddr field of hvm_start_info struct. 1167 let mut memmap_start_addr = layout::MEMMAP_START; 1168 1169 guest_mem 1170 .checked_offset( 1171 memmap_start_addr, 1172 mem::size_of::<hvm_memmap_table_entry>() * start_info.memmap_entries as usize, 1173 ) 1174 .ok_or(super::Error::MemmapTablePastRamEnd)?; 1175 1176 // For every entry in the memmap vector, write it to guest memory. 1177 for memmap_entry in memmap { 1178 guest_mem 1179 .write_obj(memmap_entry, memmap_start_addr) 1180 .map_err(|_| super::Error::MemmapTableSetup)?; 1181 memmap_start_addr = 1182 memmap_start_addr.unchecked_add(mem::size_of::<hvm_memmap_table_entry>() as u64); 1183 } 1184 1185 // The hvm_start_info struct itself must be stored at PVH_START_INFO 1186 // address, and %rbx will be initialized to contain PVH_INFO_START prior to 1187 // starting the guest, as required by the PVH ABI. 1188 let start_info_addr = layout::PVH_INFO_START; 1189 1190 guest_mem 1191 .checked_offset(start_info_addr, mem::size_of::<hvm_start_info>()) 1192 .ok_or(super::Error::StartInfoPastRamEnd)?; 1193 1194 // Write the start_info struct to guest memory. 1195 guest_mem 1196 .write_obj(start_info, start_info_addr) 1197 .map_err(|_| super::Error::StartInfoSetup)?; 1198 1199 Ok(()) 1200 } 1201 1202 fn configure_32bit_entry( 1203 guest_mem: &GuestMemoryMmap, 1204 cmdline_addr: GuestAddress, 1205 cmdline_size: usize, 1206 initramfs: &Option<InitramfsConfig>, 1207 setup_hdr: setup_header, 1208 rsdp_addr: Option<GuestAddress>, 1209 sgx_epc_region: Option<SgxEpcRegion>, 1210 ) -> super::Result<()> { 1211 const KERNEL_LOADER_OTHER: u8 = 0xff; 1212 1213 // Use the provided setup header 1214 let mut params = boot_params { 1215 hdr: setup_hdr, 1216 ..Default::default() 1217 }; 1218 1219 // Common bootparams settings 1220 if params.hdr.type_of_loader == 0 { 1221 params.hdr.type_of_loader = KERNEL_LOADER_OTHER; 1222 } 1223 params.hdr.cmd_line_ptr = cmdline_addr.raw_value() as u32; 1224 params.hdr.cmdline_size = cmdline_size as u32; 1225 1226 if let Some(initramfs_config) = initramfs { 1227 params.hdr.ramdisk_image = initramfs_config.address.raw_value() as u32; 1228 params.hdr.ramdisk_size = initramfs_config.size as u32; 1229 } 1230 1231 add_e820_entry(&mut params, 0, layout::EBDA_START.raw_value(), E820_RAM)?; 1232 1233 let mem_end = guest_mem.last_addr(); 1234 if mem_end < layout::MEM_32BIT_RESERVED_START { 1235 add_e820_entry( 1236 &mut params, 1237 layout::HIGH_RAM_START.raw_value(), 1238 mem_end.unchecked_offset_from(layout::HIGH_RAM_START) + 1, 1239 E820_RAM, 1240 )?; 1241 } else { 1242 add_e820_entry( 1243 &mut params, 1244 layout::HIGH_RAM_START.raw_value(), 1245 layout::MEM_32BIT_RESERVED_START.unchecked_offset_from(layout::HIGH_RAM_START), 1246 E820_RAM, 1247 )?; 1248 if mem_end > layout::RAM_64BIT_START { 1249 add_e820_entry( 1250 &mut params, 1251 layout::RAM_64BIT_START.raw_value(), 1252 mem_end.unchecked_offset_from(layout::RAM_64BIT_START) + 1, 1253 E820_RAM, 1254 )?; 1255 } 1256 } 1257 1258 add_e820_entry( 1259 &mut params, 1260 layout::PCI_MMCONFIG_START.0, 1261 layout::PCI_MMCONFIG_SIZE, 1262 E820_RESERVED, 1263 )?; 1264 1265 if let Some(sgx_epc_region) = sgx_epc_region { 1266 add_e820_entry( 1267 &mut params, 1268 sgx_epc_region.start().raw_value(), 1269 sgx_epc_region.size(), 1270 E820_RESERVED, 1271 )?; 1272 } 1273 1274 if let Some(rsdp_addr) = rsdp_addr { 1275 params.acpi_rsdp_addr = rsdp_addr.0; 1276 } 1277 1278 let zero_page_addr = layout::ZERO_PAGE_START; 1279 guest_mem 1280 .checked_offset(zero_page_addr, mem::size_of::<boot_params>()) 1281 .ok_or(super::Error::ZeroPagePastRamEnd)?; 1282 guest_mem 1283 .write_obj(params, zero_page_addr) 1284 .map_err(super::Error::ZeroPageSetup)?; 1285 1286 Ok(()) 1287 } 1288 1289 /// Add an e820 region to the e820 map. 1290 /// Returns Ok(()) if successful, or an error if there is no space left in the map. 1291 fn add_e820_entry( 1292 params: &mut boot_params, 1293 addr: u64, 1294 size: u64, 1295 mem_type: u32, 1296 ) -> Result<(), Error> { 1297 if params.e820_entries >= params.e820_table.len() as u8 { 1298 return Err(Error::E820Configuration); 1299 } 1300 1301 params.e820_table[params.e820_entries as usize].addr = addr; 1302 params.e820_table[params.e820_entries as usize].size = size; 1303 params.e820_table[params.e820_entries as usize].type_ = mem_type; 1304 params.e820_entries += 1; 1305 1306 Ok(()) 1307 } 1308 1309 fn add_memmap_entry(memmap: &mut Vec<hvm_memmap_table_entry>, addr: u64, size: u64, mem_type: u32) { 1310 // Add the table entry to the vector 1311 memmap.push(hvm_memmap_table_entry { 1312 addr, 1313 size, 1314 type_: mem_type, 1315 reserved: 0, 1316 }); 1317 } 1318 1319 /// Returns the memory address where the initramfs could be loaded. 1320 pub fn initramfs_load_addr( 1321 guest_mem: &GuestMemoryMmap, 1322 initramfs_size: usize, 1323 ) -> super::Result<u64> { 1324 let first_region = guest_mem 1325 .find_region(GuestAddress::new(0)) 1326 .ok_or(super::Error::InitramfsAddress)?; 1327 // It's safe to cast to usize because the size of a region can't be greater than usize. 1328 let lowmem_size = first_region.len() as usize; 1329 1330 if lowmem_size < initramfs_size { 1331 return Err(super::Error::InitramfsAddress); 1332 } 1333 1334 let aligned_addr: u64 = ((lowmem_size - initramfs_size) & !(crate::pagesize() - 1)) as u64; 1335 Ok(aligned_addr) 1336 } 1337 1338 pub fn get_host_cpu_phys_bits(hypervisor: &Arc<dyn hypervisor::Hypervisor>) -> u8 { 1339 // SAFETY: call cpuid with valid leaves 1340 unsafe { 1341 let leaf = x86_64::__cpuid(0x8000_0000); 1342 1343 // Detect and handle AMD SME (Secure Memory Encryption) properly. 1344 // Some physical address bits may become reserved when the feature is enabled. 1345 // See AMD64 Architecture Programmer's Manual Volume 2, Section 7.10.1 1346 let reduced = if leaf.eax >= 0x8000_001f 1347 && matches!(hypervisor.get_cpu_vendor(), CpuVendor::AMD) 1348 && x86_64::__cpuid(0x8000_001f).eax & 0x1 != 0 1349 { 1350 (x86_64::__cpuid(0x8000_001f).ebx >> 6) & 0x3f 1351 } else { 1352 0 1353 }; 1354 1355 if leaf.eax >= 0x8000_0008 { 1356 let leaf = x86_64::__cpuid(0x8000_0008); 1357 ((leaf.eax & 0xff) - reduced) as u8 1358 } else { 1359 36 1360 } 1361 } 1362 } 1363 1364 fn update_cpuid_topology( 1365 cpuid: &mut Vec<CpuIdEntry>, 1366 threads_per_core: u8, 1367 cores_per_die: u8, 1368 dies_per_package: u8, 1369 cpu_vendor: CpuVendor, 1370 id: u8, 1371 ) { 1372 let x2apic_id = get_x2apic_id( 1373 id as u32, 1374 Some((threads_per_core, cores_per_die, dies_per_package)), 1375 ); 1376 1377 let thread_width = 8 - (threads_per_core - 1).leading_zeros(); 1378 let core_width = (8 - (cores_per_die - 1).leading_zeros()) + thread_width; 1379 let die_width = (8 - (dies_per_package - 1).leading_zeros()) + core_width; 1380 1381 let mut cpu_ebx = CpuidPatch::get_cpuid_reg(cpuid, 0x1, None, CpuidReg::EBX).unwrap_or(0); 1382 cpu_ebx |= ((dies_per_package as u32) * (cores_per_die as u32) * (threads_per_core as u32)) 1383 & 0xff << 16; 1384 CpuidPatch::set_cpuid_reg(cpuid, 0x1, None, CpuidReg::EBX, cpu_ebx); 1385 1386 let mut cpu_edx = CpuidPatch::get_cpuid_reg(cpuid, 0x1, None, CpuidReg::EDX).unwrap_or(0); 1387 cpu_edx |= 1 << 28; 1388 CpuidPatch::set_cpuid_reg(cpuid, 0x1, None, CpuidReg::EDX, cpu_edx); 1389 1390 // CPU Topology leaf 0xb 1391 CpuidPatch::set_cpuid_reg(cpuid, 0xb, Some(0), CpuidReg::EAX, thread_width); 1392 CpuidPatch::set_cpuid_reg( 1393 cpuid, 1394 0xb, 1395 Some(0), 1396 CpuidReg::EBX, 1397 u32::from(threads_per_core), 1398 ); 1399 CpuidPatch::set_cpuid_reg(cpuid, 0xb, Some(0), CpuidReg::ECX, 1 << 8); 1400 1401 CpuidPatch::set_cpuid_reg(cpuid, 0xb, Some(1), CpuidReg::EAX, die_width); 1402 CpuidPatch::set_cpuid_reg( 1403 cpuid, 1404 0xb, 1405 Some(1), 1406 CpuidReg::EBX, 1407 u32::from(dies_per_package * cores_per_die * threads_per_core), 1408 ); 1409 CpuidPatch::set_cpuid_reg(cpuid, 0xb, Some(1), CpuidReg::ECX, 2 << 8); 1410 1411 // CPU Topology leaf 0x1f 1412 CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(0), CpuidReg::EAX, thread_width); 1413 CpuidPatch::set_cpuid_reg( 1414 cpuid, 1415 0x1f, 1416 Some(0), 1417 CpuidReg::EBX, 1418 u32::from(threads_per_core), 1419 ); 1420 CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(0), CpuidReg::ECX, 1 << 8); 1421 1422 CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(1), CpuidReg::EAX, core_width); 1423 CpuidPatch::set_cpuid_reg( 1424 cpuid, 1425 0x1f, 1426 Some(1), 1427 CpuidReg::EBX, 1428 u32::from(cores_per_die * threads_per_core), 1429 ); 1430 CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(1), CpuidReg::ECX, 2 << 8); 1431 1432 CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(2), CpuidReg::EAX, die_width); 1433 CpuidPatch::set_cpuid_reg( 1434 cpuid, 1435 0x1f, 1436 Some(2), 1437 CpuidReg::EBX, 1438 u32::from(dies_per_package * cores_per_die * threads_per_core), 1439 ); 1440 CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(2), CpuidReg::ECX, 5 << 8); 1441 1442 if matches!(cpu_vendor, CpuVendor::AMD) { 1443 CpuidPatch::set_cpuid_reg( 1444 cpuid, 1445 0x8000_001e, 1446 Some(0), 1447 CpuidReg::EBX, 1448 ((threads_per_core as u32 - 1) << 8) | (x2apic_id & 0xff), 1449 ); 1450 CpuidPatch::set_cpuid_reg( 1451 cpuid, 1452 0x8000_001e, 1453 Some(0), 1454 CpuidReg::ECX, 1455 ((dies_per_package as u32 - 1) << 8) | (thread_width + die_width) & 0xff, 1456 ); 1457 CpuidPatch::set_cpuid_reg(cpuid, 0x8000_001e, Some(0), CpuidReg::EDX, 0); 1458 if cores_per_die * threads_per_core > 1 { 1459 let ecx = 1460 CpuidPatch::get_cpuid_reg(cpuid, 0x8000_0001, Some(0), CpuidReg::ECX).unwrap_or(0); 1461 CpuidPatch::set_cpuid_reg( 1462 cpuid, 1463 0x8000_0001, 1464 Some(0), 1465 CpuidReg::ECX, 1466 ecx | (1u32 << 1) | (1u32 << 22), 1467 ); 1468 CpuidPatch::set_cpuid_reg( 1469 cpuid, 1470 0x0000_0001, 1471 Some(0), 1472 CpuidReg::EBX, 1473 (x2apic_id << 24) | (8 << 8) | (((cores_per_die * threads_per_core) as u32) << 16), 1474 ); 1475 let cpuid_patches = vec![ 1476 // Patch tsc deadline timer bit 1477 CpuidPatch { 1478 function: 1, 1479 index: 0, 1480 flags_bit: None, 1481 eax_bit: None, 1482 ebx_bit: None, 1483 ecx_bit: None, 1484 edx_bit: Some(28), 1485 }, 1486 ]; 1487 CpuidPatch::patch_cpuid(cpuid, cpuid_patches); 1488 CpuidPatch::set_cpuid_reg( 1489 cpuid, 1490 0x8000_0008, 1491 Some(0), 1492 CpuidReg::ECX, 1493 ((thread_width + core_width + die_width) << 12) 1494 | ((cores_per_die * threads_per_core) - 1) as u32, 1495 ); 1496 } else { 1497 CpuidPatch::set_cpuid_reg(cpuid, 0x8000_0008, Some(0), CpuidReg::ECX, 0u32); 1498 } 1499 } 1500 } 1501 1502 // The goal is to update the CPUID sub-leaves to reflect the number of EPC 1503 // sections exposed to the guest. 1504 fn update_cpuid_sgx( 1505 cpuid: &mut Vec<CpuIdEntry>, 1506 epc_sections: &[SgxEpcSection], 1507 ) -> Result<(), Error> { 1508 // Something's wrong if there's no EPC section. 1509 if epc_sections.is_empty() { 1510 return Err(Error::NoSgxEpcSection); 1511 } 1512 // We can't go further if the hypervisor does not support SGX feature. 1513 if !CpuidPatch::is_feature_enabled(cpuid, 0x7, 0, CpuidReg::EBX, 2) { 1514 return Err(Error::MissingSgxFeature); 1515 } 1516 // We can't go further if the hypervisor does not support SGX_LC feature. 1517 if !CpuidPatch::is_feature_enabled(cpuid, 0x7, 0, CpuidReg::ECX, 30) { 1518 return Err(Error::MissingSgxLaunchControlFeature); 1519 } 1520 1521 // Get host CPUID for leaf 0x12, subleaf 0x2. This is to retrieve EPC 1522 // properties such as confidentiality and integrity. 1523 // SAFETY: call cpuid with valid leaves 1524 let leaf = unsafe { std::arch::x86_64::__cpuid_count(0x12, 0x2) }; 1525 1526 for (i, epc_section) in epc_sections.iter().enumerate() { 1527 let subleaf_idx = i + 2; 1528 let start = epc_section.start().raw_value(); 1529 let size = epc_section.size(); 1530 let eax = (start & 0xffff_f000) as u32 | 0x1; 1531 let ebx = (start >> 32) as u32; 1532 let ecx = (size & 0xffff_f000) as u32 | (leaf.ecx & 0xf); 1533 let edx = (size >> 32) as u32; 1534 // CPU Topology leaf 0x12 1535 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EAX, eax); 1536 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EBX, ebx); 1537 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::ECX, ecx); 1538 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EDX, edx); 1539 } 1540 1541 // Add one NULL entry to terminate the dynamic list 1542 let subleaf_idx = epc_sections.len() + 2; 1543 // CPU Topology leaf 0x12 1544 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EAX, 0); 1545 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EBX, 0); 1546 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::ECX, 0); 1547 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EDX, 0); 1548 1549 Ok(()) 1550 } 1551 1552 #[cfg(test)] 1553 mod tests { 1554 use linux_loader::loader::bootparam::boot_e820_entry; 1555 1556 use super::*; 1557 1558 #[test] 1559 fn regions_base_addr() { 1560 let regions = arch_memory_regions(); 1561 assert_eq!(4, regions.len()); 1562 assert_eq!(GuestAddress(0), regions[0].0); 1563 assert_eq!(GuestAddress(1 << 32), regions[1].0); 1564 } 1565 1566 #[test] 1567 fn test_system_configuration() { 1568 let no_vcpus = 4; 1569 let gm = GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); 1570 let config_err = configure_system( 1571 &gm, 1572 GuestAddress(0), 1573 0, 1574 &None, 1575 1, 1576 None, 1577 Some(layout::RSDP_POINTER), 1578 None, 1579 None, 1580 None, 1581 None, 1582 None, 1583 ); 1584 config_err.unwrap_err(); 1585 1586 // Now assigning some memory that falls before the 32bit memory hole. 1587 let arch_mem_regions = arch_memory_regions(); 1588 let ram_regions: Vec<(GuestAddress, usize)> = arch_mem_regions 1589 .iter() 1590 .filter(|r| r.2 == RegionType::Ram && r.1 != usize::MAX) 1591 .map(|r| (r.0, r.1)) 1592 .collect(); 1593 let gm = GuestMemoryMmap::from_ranges(&ram_regions).unwrap(); 1594 1595 configure_system( 1596 &gm, 1597 GuestAddress(0), 1598 0, 1599 &None, 1600 no_vcpus, 1601 None, 1602 None, 1603 None, 1604 None, 1605 None, 1606 None, 1607 None, 1608 ) 1609 .unwrap(); 1610 1611 // Now assigning some memory that falls after the 32bit memory hole. 1612 let arch_mem_regions = arch_memory_regions(); 1613 let ram_regions: Vec<(GuestAddress, usize)> = arch_mem_regions 1614 .iter() 1615 .filter(|r| r.2 == RegionType::Ram) 1616 .map(|r| { 1617 if r.1 == usize::MAX { 1618 (r.0, 128 << 20) 1619 } else { 1620 (r.0, r.1) 1621 } 1622 }) 1623 .collect(); 1624 let gm = GuestMemoryMmap::from_ranges(&ram_regions).unwrap(); 1625 configure_system( 1626 &gm, 1627 GuestAddress(0), 1628 0, 1629 &None, 1630 no_vcpus, 1631 None, 1632 None, 1633 None, 1634 None, 1635 None, 1636 None, 1637 None, 1638 ) 1639 .unwrap(); 1640 1641 configure_system( 1642 &gm, 1643 GuestAddress(0), 1644 0, 1645 &None, 1646 no_vcpus, 1647 None, 1648 None, 1649 None, 1650 None, 1651 None, 1652 None, 1653 None, 1654 ) 1655 .unwrap(); 1656 } 1657 1658 #[test] 1659 fn test_add_e820_entry() { 1660 let e820_table = [(boot_e820_entry { 1661 addr: 0x1, 1662 size: 4, 1663 type_: 1, 1664 }); 128]; 1665 1666 let expected_params = boot_params { 1667 e820_table, 1668 e820_entries: 1, 1669 ..Default::default() 1670 }; 1671 1672 let mut params: boot_params = Default::default(); 1673 add_e820_entry( 1674 &mut params, 1675 e820_table[0].addr, 1676 e820_table[0].size, 1677 e820_table[0].type_, 1678 ) 1679 .unwrap(); 1680 assert_eq!( 1681 format!("{:?}", params.e820_table[0]), 1682 format!("{:?}", expected_params.e820_table[0]) 1683 ); 1684 assert_eq!(params.e820_entries, expected_params.e820_entries); 1685 1686 // Exercise the scenario where the field storing the length of the e820 entry table is 1687 // is bigger than the allocated memory. 1688 params.e820_entries = params.e820_table.len() as u8 + 1; 1689 add_e820_entry( 1690 &mut params, 1691 e820_table[0].addr, 1692 e820_table[0].size, 1693 e820_table[0].type_, 1694 ) 1695 .unwrap_err(); 1696 } 1697 1698 #[test] 1699 fn test_add_memmap_entry() { 1700 let mut memmap: Vec<hvm_memmap_table_entry> = Vec::new(); 1701 1702 let expected_memmap = vec![ 1703 hvm_memmap_table_entry { 1704 addr: 0x0, 1705 size: 0x1000, 1706 type_: E820_RAM, 1707 ..Default::default() 1708 }, 1709 hvm_memmap_table_entry { 1710 addr: 0x10000, 1711 size: 0xa000, 1712 type_: E820_RESERVED, 1713 ..Default::default() 1714 }, 1715 ]; 1716 1717 add_memmap_entry(&mut memmap, 0, 0x1000, E820_RAM); 1718 add_memmap_entry(&mut memmap, 0x10000, 0xa000, E820_RESERVED); 1719 1720 assert_eq!(format!("{memmap:?}"), format!("{expected_memmap:?}")); 1721 } 1722 1723 #[test] 1724 fn test_get_x2apic_id() { 1725 let x2apic_id = get_x2apic_id(0, Some((2, 3, 1))); 1726 assert_eq!(x2apic_id, 0); 1727 1728 let x2apic_id = get_x2apic_id(1, Some((2, 3, 1))); 1729 assert_eq!(x2apic_id, 1); 1730 1731 let x2apic_id = get_x2apic_id(2, Some((2, 3, 1))); 1732 assert_eq!(x2apic_id, 2); 1733 1734 let x2apic_id = get_x2apic_id(6, Some((2, 3, 1))); 1735 assert_eq!(x2apic_id, 8); 1736 1737 let x2apic_id = get_x2apic_id(7, Some((2, 3, 1))); 1738 assert_eq!(x2apic_id, 9); 1739 1740 let x2apic_id = get_x2apic_id(8, Some((2, 3, 1))); 1741 assert_eq!(x2apic_id, 10); 1742 } 1743 } 1744