1 // Copyright © 2020, Oracle and/or its affiliates. 2 // 3 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 // SPDX-License-Identifier: Apache-2.0 5 // 6 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. 7 // Use of this source code is governed by a BSD-style license that can be 8 // found in the LICENSE-BSD-3-Clause file. 9 use std::sync::Arc; 10 pub mod interrupts; 11 pub mod layout; 12 mod mpspec; 13 mod mptable; 14 pub mod regs; 15 use std::collections::BTreeMap; 16 use std::mem; 17 18 use hypervisor::arch::x86::{CpuIdEntry, CPUID_FLAG_VALID_INDEX}; 19 use hypervisor::{CpuVendor, HypervisorCpuError, HypervisorError}; 20 use linux_loader::loader::bootparam::{boot_params, setup_header}; 21 use linux_loader::loader::elf::start_info::{ 22 hvm_memmap_table_entry, hvm_modlist_entry, hvm_start_info, 23 }; 24 use thiserror::Error; 25 use vm_memory::{ 26 Address, Bytes, GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryAtomic, 27 GuestMemoryRegion, GuestUsize, 28 }; 29 30 use crate::{GuestMemoryMmap, InitramfsConfig, RegionType}; 31 mod smbios; 32 use std::arch::x86_64; 33 #[cfg(feature = "tdx")] 34 pub mod tdx; 35 36 // CPUID feature bits 37 #[cfg(feature = "kvm")] 38 const TSC_DEADLINE_TIMER_ECX_BIT: u8 = 24; // tsc deadline timer ecx bit. 39 const HYPERVISOR_ECX_BIT: u8 = 31; // Hypervisor ecx bit. 40 const MTRR_EDX_BIT: u8 = 12; // Hypervisor ecx bit. 41 const INVARIANT_TSC_EDX_BIT: u8 = 8; // Invariant TSC bit on 0x8000_0007 EDX 42 const AMX_BF16: u8 = 22; // AMX tile computation on bfloat16 numbers 43 const AMX_TILE: u8 = 24; // AMX tile load/store instructions 44 const AMX_INT8: u8 = 25; // AMX tile computation on 8-bit integers 45 46 // KVM feature bits 47 #[cfg(feature = "tdx")] 48 const KVM_FEATURE_CLOCKSOURCE_BIT: u8 = 0; 49 #[cfg(feature = "tdx")] 50 const KVM_FEATURE_CLOCKSOURCE2_BIT: u8 = 3; 51 #[cfg(feature = "tdx")] 52 const KVM_FEATURE_CLOCKSOURCE_STABLE_BIT: u8 = 24; 53 #[cfg(feature = "tdx")] 54 const KVM_FEATURE_ASYNC_PF_BIT: u8 = 4; 55 #[cfg(feature = "tdx")] 56 const KVM_FEATURE_ASYNC_PF_VMEXIT_BIT: u8 = 10; 57 #[cfg(feature = "tdx")] 58 const KVM_FEATURE_STEAL_TIME_BIT: u8 = 5; 59 60 pub const _NSIG: i32 = 65; 61 62 #[derive(Debug, Copy, Clone)] 63 /// Specifies the entry point address where the guest must start 64 /// executing code, as well as which of the supported boot protocols 65 /// is to be used to configure the guest initial state. 66 pub struct EntryPoint { 67 /// Address in guest memory where the guest must start execution 68 pub entry_addr: GuestAddress, 69 /// This field is used for bzImage to fill the zero page 70 pub setup_header: Option<setup_header>, 71 } 72 73 const E820_RAM: u32 = 1; 74 const E820_RESERVED: u32 = 2; 75 76 #[derive(Clone)] 77 pub struct SgxEpcSection { 78 start: GuestAddress, 79 size: GuestUsize, 80 } 81 82 impl SgxEpcSection { 83 pub fn new(start: GuestAddress, size: GuestUsize) -> Self { 84 SgxEpcSection { start, size } 85 } 86 pub fn start(&self) -> GuestAddress { 87 self.start 88 } 89 pub fn size(&self) -> GuestUsize { 90 self.size 91 } 92 } 93 94 #[derive(Clone)] 95 pub struct SgxEpcRegion { 96 start: GuestAddress, 97 size: GuestUsize, 98 epc_sections: BTreeMap<String, SgxEpcSection>, 99 } 100 101 impl SgxEpcRegion { 102 pub fn new(start: GuestAddress, size: GuestUsize) -> Self { 103 SgxEpcRegion { 104 start, 105 size, 106 epc_sections: BTreeMap::new(), 107 } 108 } 109 pub fn start(&self) -> GuestAddress { 110 self.start 111 } 112 pub fn size(&self) -> GuestUsize { 113 self.size 114 } 115 pub fn epc_sections(&self) -> &BTreeMap<String, SgxEpcSection> { 116 &self.epc_sections 117 } 118 pub fn insert(&mut self, id: String, epc_section: SgxEpcSection) { 119 self.epc_sections.insert(id, epc_section); 120 } 121 } 122 123 pub struct CpuidConfig { 124 pub sgx_epc_sections: Option<Vec<SgxEpcSection>>, 125 pub phys_bits: u8, 126 pub kvm_hyperv: bool, 127 #[cfg(feature = "tdx")] 128 pub tdx: bool, 129 pub amx: bool, 130 } 131 132 #[derive(Debug, Error)] 133 pub enum Error { 134 /// Error writing MP table to memory. 135 #[error("Error writing MP table to memory: {0}")] 136 MpTableSetup(#[source] mptable::Error), 137 138 /// Error configuring the general purpose registers 139 #[error("Error configuring the general purpose registers: {0}")] 140 RegsConfiguration(#[source] regs::Error), 141 142 /// Error configuring the special registers 143 #[error("Error configuring the special registers: {0}")] 144 SregsConfiguration(#[source] regs::Error), 145 146 /// Error configuring the floating point related registers 147 #[error("Error configuring the floating point related registers: {0}")] 148 FpuConfiguration(#[source] regs::Error), 149 150 /// Error configuring the MSR registers 151 #[error("Error configuring the MSR registers: {0}")] 152 MsrsConfiguration(#[source] regs::Error), 153 154 /// Failed to set supported CPUs. 155 #[error("Failed to set supported CPUs: {0}")] 156 SetSupportedCpusFailed(#[source] anyhow::Error), 157 158 /// Cannot set the local interruption due to bad configuration. 159 #[error("Cannot set the local interruption due to bad configuration: {0}")] 160 LocalIntConfiguration(#[source] anyhow::Error), 161 162 /// Error setting up SMBIOS table 163 #[error("Error setting up SMBIOS table: {0}")] 164 SmbiosSetup(#[source] smbios::Error), 165 166 /// Could not find any SGX EPC section 167 #[error("Could not find any SGX EPC section")] 168 NoSgxEpcSection, 169 170 /// Missing SGX CPU feature 171 #[error("Missing SGX CPU feature")] 172 MissingSgxFeature, 173 174 /// Missing SGX_LC CPU feature 175 #[error("Missing SGX_LC CPU feature")] 176 MissingSgxLaunchControlFeature, 177 178 /// Error getting supported CPUID through the hypervisor (kvm/mshv) API 179 #[error("Error getting supported CPUID through the hypervisor API: {0}")] 180 CpuidGetSupported(#[source] HypervisorError), 181 182 /// Error populating CPUID with KVM HyperV emulation details 183 #[error("Error populating CPUID with KVM HyperV emulation details: {0}")] 184 CpuidKvmHyperV(#[source] vmm_sys_util::fam::Error), 185 186 /// Error populating CPUID with CPU identification 187 #[error("Error populating CPUID with CPU identification: {0}")] 188 CpuidIdentification(#[source] vmm_sys_util::fam::Error), 189 190 /// Error checking CPUID compatibility 191 #[error("Error checking CPUID compatibility")] 192 CpuidCheckCompatibility, 193 194 // Error writing EBDA address 195 #[error("Error writing EBDA address: {0}")] 196 EbdaSetup(#[source] vm_memory::GuestMemoryError), 197 198 // Error getting CPU TSC frequency 199 #[error("Error getting CPU TSC frequency: {0}")] 200 GetTscFrequency(#[source] HypervisorCpuError), 201 202 /// Error retrieving TDX capabilities through the hypervisor (kvm/mshv) API 203 #[cfg(feature = "tdx")] 204 #[error("Error retrieving TDX capabilities through the hypervisor API: {0}")] 205 TdxCapabilities(#[source] HypervisorError), 206 207 /// Failed to configure E820 map for bzImage 208 #[error("Failed to configure E820 map for bzImage")] 209 E820Configuration, 210 } 211 212 pub fn get_x2apic_id(cpu_id: u32, topology: Option<(u8, u8, u8)>) -> u32 { 213 if let Some(t) = topology { 214 let thread_mask_width = u8::BITS - (t.0 - 1).leading_zeros(); 215 let core_mask_width = u8::BITS - (t.1 - 1).leading_zeros(); 216 let die_mask_width = u8::BITS - (t.2 - 1).leading_zeros(); 217 218 let thread_id = cpu_id % (t.0 as u32); 219 let core_id = cpu_id / (t.0 as u32) % (t.1 as u32); 220 let die_id = cpu_id / ((t.0 * t.1) as u32) % (t.2 as u32); 221 let socket_id = cpu_id / ((t.0 * t.1 * t.2) as u32); 222 223 return thread_id 224 | (core_id << thread_mask_width) 225 | (die_id << (thread_mask_width + core_mask_width)) 226 | (socket_id << (thread_mask_width + core_mask_width + die_mask_width)); 227 } 228 229 cpu_id 230 } 231 232 #[derive(Copy, Clone, Debug)] 233 pub enum CpuidReg { 234 EAX, 235 EBX, 236 ECX, 237 EDX, 238 } 239 240 pub struct CpuidPatch { 241 pub function: u32, 242 pub index: u32, 243 pub flags_bit: Option<u8>, 244 pub eax_bit: Option<u8>, 245 pub ebx_bit: Option<u8>, 246 pub ecx_bit: Option<u8>, 247 pub edx_bit: Option<u8>, 248 } 249 250 impl CpuidPatch { 251 pub fn get_cpuid_reg( 252 cpuid: &[CpuIdEntry], 253 function: u32, 254 index: Option<u32>, 255 reg: CpuidReg, 256 ) -> Option<u32> { 257 for entry in cpuid.iter() { 258 if entry.function == function && (index.is_none() || index.unwrap() == entry.index) { 259 return match reg { 260 CpuidReg::EAX => Some(entry.eax), 261 CpuidReg::EBX => Some(entry.ebx), 262 CpuidReg::ECX => Some(entry.ecx), 263 CpuidReg::EDX => Some(entry.edx), 264 }; 265 } 266 } 267 268 None 269 } 270 271 pub fn set_cpuid_reg( 272 cpuid: &mut Vec<CpuIdEntry>, 273 function: u32, 274 index: Option<u32>, 275 reg: CpuidReg, 276 value: u32, 277 ) { 278 let mut entry_found = false; 279 for entry in cpuid.iter_mut() { 280 if entry.function == function && (index.is_none() || index.unwrap() == entry.index) { 281 entry_found = true; 282 match reg { 283 CpuidReg::EAX => { 284 entry.eax = value; 285 } 286 CpuidReg::EBX => { 287 entry.ebx = value; 288 } 289 CpuidReg::ECX => { 290 entry.ecx = value; 291 } 292 CpuidReg::EDX => { 293 entry.edx = value; 294 } 295 } 296 } 297 } 298 299 if entry_found { 300 return; 301 } 302 303 // Entry not found, so let's add it. 304 if let Some(index) = index { 305 let mut entry = CpuIdEntry { 306 function, 307 index, 308 flags: CPUID_FLAG_VALID_INDEX, 309 ..Default::default() 310 }; 311 match reg { 312 CpuidReg::EAX => { 313 entry.eax = value; 314 } 315 CpuidReg::EBX => { 316 entry.ebx = value; 317 } 318 CpuidReg::ECX => { 319 entry.ecx = value; 320 } 321 CpuidReg::EDX => { 322 entry.edx = value; 323 } 324 } 325 326 cpuid.push(entry); 327 } 328 } 329 330 pub fn patch_cpuid(cpuid: &mut [CpuIdEntry], patches: Vec<CpuidPatch>) { 331 for entry in cpuid { 332 for patch in patches.iter() { 333 if entry.function == patch.function && entry.index == patch.index { 334 if let Some(flags_bit) = patch.flags_bit { 335 entry.flags |= 1 << flags_bit; 336 } 337 if let Some(eax_bit) = patch.eax_bit { 338 entry.eax |= 1 << eax_bit; 339 } 340 if let Some(ebx_bit) = patch.ebx_bit { 341 entry.ebx |= 1 << ebx_bit; 342 } 343 if let Some(ecx_bit) = patch.ecx_bit { 344 entry.ecx |= 1 << ecx_bit; 345 } 346 if let Some(edx_bit) = patch.edx_bit { 347 entry.edx |= 1 << edx_bit; 348 } 349 } 350 } 351 } 352 } 353 354 pub fn is_feature_enabled( 355 cpuid: &[CpuIdEntry], 356 function: u32, 357 index: u32, 358 reg: CpuidReg, 359 feature_bit: usize, 360 ) -> bool { 361 let mask = 1 << feature_bit; 362 363 for entry in cpuid { 364 if entry.function == function && entry.index == index { 365 let reg_val = match reg { 366 CpuidReg::EAX => entry.eax, 367 CpuidReg::EBX => entry.ebx, 368 CpuidReg::ECX => entry.ecx, 369 CpuidReg::EDX => entry.edx, 370 }; 371 372 return (reg_val & mask) == mask; 373 } 374 } 375 376 false 377 } 378 } 379 380 #[derive(Debug)] 381 enum CpuidCompatibleCheck { 382 BitwiseSubset, // bitwise subset 383 Equal, // equal in value 384 NumNotGreater, // smaller or equal as a number 385 } 386 387 pub struct CpuidFeatureEntry { 388 function: u32, 389 index: u32, 390 feature_reg: CpuidReg, 391 compatible_check: CpuidCompatibleCheck, 392 } 393 394 impl CpuidFeatureEntry { 395 fn checked_feature_entry_list() -> Vec<CpuidFeatureEntry> { 396 vec![ 397 // The following list includes all hardware features bits from 398 // the CPUID Wiki Page: https://en.wikipedia.org/wiki/CPUID 399 // Leaf 0x1, ECX/EDX, feature bits 400 CpuidFeatureEntry { 401 function: 1, 402 index: 0, 403 feature_reg: CpuidReg::ECX, 404 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 405 }, 406 CpuidFeatureEntry { 407 function: 1, 408 index: 0, 409 feature_reg: CpuidReg::EDX, 410 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 411 }, 412 // Leaf 0x7, EAX/EBX/ECX/EDX, extended features 413 CpuidFeatureEntry { 414 function: 7, 415 index: 0, 416 feature_reg: CpuidReg::EAX, 417 compatible_check: CpuidCompatibleCheck::NumNotGreater, 418 }, 419 CpuidFeatureEntry { 420 function: 7, 421 index: 0, 422 feature_reg: CpuidReg::EBX, 423 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 424 }, 425 CpuidFeatureEntry { 426 function: 7, 427 index: 0, 428 feature_reg: CpuidReg::ECX, 429 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 430 }, 431 CpuidFeatureEntry { 432 function: 7, 433 index: 0, 434 feature_reg: CpuidReg::EDX, 435 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 436 }, 437 // Leaf 0x7 subleaf 0x1, EAX, extended features 438 CpuidFeatureEntry { 439 function: 7, 440 index: 1, 441 feature_reg: CpuidReg::EAX, 442 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 443 }, 444 // Leaf 0x8000_0001, ECX/EDX, CPUID features bits 445 CpuidFeatureEntry { 446 function: 0x8000_0001, 447 index: 0, 448 feature_reg: CpuidReg::ECX, 449 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 450 }, 451 CpuidFeatureEntry { 452 function: 0x8000_0001, 453 index: 0, 454 feature_reg: CpuidReg::EDX, 455 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 456 }, 457 // KVM CPUID bits: https://www.kernel.org/doc/html/latest/virt/kvm/cpuid.html 458 // Leaf 0x4000_0000, EAX/EBX/ECX/EDX, KVM CPUID SIGNATURE 459 CpuidFeatureEntry { 460 function: 0x4000_0000, 461 index: 0, 462 feature_reg: CpuidReg::EAX, 463 compatible_check: CpuidCompatibleCheck::NumNotGreater, 464 }, 465 CpuidFeatureEntry { 466 function: 0x4000_0000, 467 index: 0, 468 feature_reg: CpuidReg::EBX, 469 compatible_check: CpuidCompatibleCheck::Equal, 470 }, 471 CpuidFeatureEntry { 472 function: 0x4000_0000, 473 index: 0, 474 feature_reg: CpuidReg::ECX, 475 compatible_check: CpuidCompatibleCheck::Equal, 476 }, 477 CpuidFeatureEntry { 478 function: 0x4000_0000, 479 index: 0, 480 feature_reg: CpuidReg::EDX, 481 compatible_check: CpuidCompatibleCheck::Equal, 482 }, 483 // Leaf 0x4000_0001, EAX/EBX/ECX/EDX, KVM CPUID features 484 CpuidFeatureEntry { 485 function: 0x4000_0001, 486 index: 0, 487 feature_reg: CpuidReg::EAX, 488 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 489 }, 490 CpuidFeatureEntry { 491 function: 0x4000_0001, 492 index: 0, 493 feature_reg: CpuidReg::EBX, 494 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 495 }, 496 CpuidFeatureEntry { 497 function: 0x4000_0001, 498 index: 0, 499 feature_reg: CpuidReg::ECX, 500 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 501 }, 502 CpuidFeatureEntry { 503 function: 0x4000_0001, 504 index: 0, 505 feature_reg: CpuidReg::EDX, 506 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 507 }, 508 ] 509 } 510 511 fn get_features_from_cpuid( 512 cpuid: &[CpuIdEntry], 513 feature_entry_list: &[CpuidFeatureEntry], 514 ) -> Vec<u32> { 515 let mut features = vec![0; feature_entry_list.len()]; 516 for (i, feature_entry) in feature_entry_list.iter().enumerate() { 517 for cpuid_entry in cpuid { 518 if cpuid_entry.function == feature_entry.function 519 && cpuid_entry.index == feature_entry.index 520 { 521 match feature_entry.feature_reg { 522 CpuidReg::EAX => { 523 features[i] = cpuid_entry.eax; 524 } 525 CpuidReg::EBX => { 526 features[i] = cpuid_entry.ebx; 527 } 528 CpuidReg::ECX => { 529 features[i] = cpuid_entry.ecx; 530 } 531 CpuidReg::EDX => { 532 features[i] = cpuid_entry.edx; 533 } 534 } 535 536 break; 537 } 538 } 539 } 540 541 features 542 } 543 544 // The function returns `Error` (a.k.a. "incompatible"), when the CPUID features from `src_vm_cpuid` 545 // is not a subset of those of the `dest_vm_cpuid`. 546 pub fn check_cpuid_compatibility( 547 src_vm_cpuid: &[CpuIdEntry], 548 dest_vm_cpuid: &[CpuIdEntry], 549 ) -> Result<(), Error> { 550 let feature_entry_list = &Self::checked_feature_entry_list(); 551 let src_vm_features = Self::get_features_from_cpuid(src_vm_cpuid, feature_entry_list); 552 let dest_vm_features = Self::get_features_from_cpuid(dest_vm_cpuid, feature_entry_list); 553 554 // Loop on feature bit and check if the 'source vm' feature is a subset 555 // of those of the 'destination vm' feature 556 let mut compatible = true; 557 for (i, (src_vm_feature, dest_vm_feature)) in src_vm_features 558 .iter() 559 .zip(dest_vm_features.iter()) 560 .enumerate() 561 { 562 let entry = &feature_entry_list[i]; 563 let entry_compatible = match entry.compatible_check { 564 CpuidCompatibleCheck::BitwiseSubset => { 565 let different_feature_bits = src_vm_feature ^ dest_vm_feature; 566 let src_vm_feature_bits_only = different_feature_bits & src_vm_feature; 567 src_vm_feature_bits_only == 0 568 } 569 CpuidCompatibleCheck::Equal => src_vm_feature == dest_vm_feature, 570 CpuidCompatibleCheck::NumNotGreater => src_vm_feature <= dest_vm_feature, 571 }; 572 if !entry_compatible { 573 error!( 574 "Detected incompatible CPUID entry: leaf={:#02x} (subleaf={:#02x}), register='{:?}', \ 575 compatible_check='{:?}', source VM feature='{:#04x}', destination VM feature'{:#04x}'.", 576 entry.function, entry.index, entry.feature_reg, 577 entry.compatible_check, src_vm_feature, dest_vm_feature 578 ); 579 580 compatible = false; 581 } 582 } 583 584 if compatible { 585 info!("No CPU incompatibility detected."); 586 Ok(()) 587 } else { 588 Err(Error::CpuidCheckCompatibility) 589 } 590 } 591 } 592 593 pub fn generate_common_cpuid( 594 hypervisor: &Arc<dyn hypervisor::Hypervisor>, 595 config: &CpuidConfig, 596 ) -> super::Result<Vec<CpuIdEntry>> { 597 // SAFETY: cpuid called with valid leaves 598 if unsafe { x86_64::__cpuid(1) }.ecx & (1 << HYPERVISOR_ECX_BIT) == 1 << HYPERVISOR_ECX_BIT { 599 // SAFETY: cpuid called with valid leaves 600 let hypervisor_cpuid = unsafe { x86_64::__cpuid(0x4000_0000) }; 601 602 let mut identifier: [u8; 12] = [0; 12]; 603 identifier[0..4].copy_from_slice(&hypervisor_cpuid.ebx.to_le_bytes()[..]); 604 identifier[4..8].copy_from_slice(&hypervisor_cpuid.ecx.to_le_bytes()[..]); 605 identifier[8..12].copy_from_slice(&hypervisor_cpuid.edx.to_le_bytes()[..]); 606 607 info!( 608 "Running under nested virtualisation. Hypervisor string: {}", 609 String::from_utf8_lossy(&identifier) 610 ); 611 } 612 613 info!( 614 "Generating guest CPUID for with physical address size: {}", 615 config.phys_bits 616 ); 617 #[allow(unused_mut)] 618 let mut cpuid_patches = vec![ 619 // Patch hypervisor bit 620 CpuidPatch { 621 function: 1, 622 index: 0, 623 flags_bit: None, 624 eax_bit: None, 625 ebx_bit: None, 626 ecx_bit: Some(HYPERVISOR_ECX_BIT), 627 edx_bit: None, 628 }, 629 // Enable MTRR feature 630 CpuidPatch { 631 function: 1, 632 index: 0, 633 flags_bit: None, 634 eax_bit: None, 635 ebx_bit: None, 636 ecx_bit: None, 637 edx_bit: Some(MTRR_EDX_BIT), 638 }, 639 ]; 640 641 #[cfg(feature = "kvm")] 642 if matches!( 643 hypervisor.hypervisor_type(), 644 hypervisor::HypervisorType::Kvm 645 ) { 646 // Patch tsc deadline timer bit 647 cpuid_patches.push(CpuidPatch { 648 function: 1, 649 index: 0, 650 flags_bit: None, 651 eax_bit: None, 652 ebx_bit: None, 653 ecx_bit: Some(TSC_DEADLINE_TIMER_ECX_BIT), 654 edx_bit: None, 655 }); 656 } 657 658 // Supported CPUID 659 let mut cpuid = hypervisor 660 .get_supported_cpuid() 661 .map_err(Error::CpuidGetSupported)?; 662 663 CpuidPatch::patch_cpuid(&mut cpuid, cpuid_patches); 664 665 if let Some(sgx_epc_sections) = &config.sgx_epc_sections { 666 update_cpuid_sgx(&mut cpuid, sgx_epc_sections)?; 667 } 668 669 #[cfg(feature = "tdx")] 670 let tdx_capabilities = if config.tdx { 671 let caps = hypervisor 672 .tdx_capabilities() 673 .map_err(Error::TdxCapabilities)?; 674 info!("TDX capabilities {:#?}", caps); 675 Some(caps) 676 } else { 677 None 678 }; 679 680 // Update some existing CPUID 681 for entry in cpuid.as_mut_slice().iter_mut() { 682 match entry.function { 683 // Clear AMX related bits if the AMX feature is not enabled 684 0x7 => { 685 if !config.amx && entry.index == 0 { 686 entry.edx &= !((1 << AMX_BF16) | (1 << AMX_TILE) | (1 << AMX_INT8)) 687 } 688 } 689 0xd => 690 { 691 #[cfg(feature = "tdx")] 692 if let Some(caps) = &tdx_capabilities { 693 let xcr0_mask: u64 = 0x82ff; 694 let xss_mask: u64 = !xcr0_mask; 695 if entry.index == 0 { 696 entry.eax &= (caps.xfam_fixed0 as u32) & (xcr0_mask as u32); 697 entry.eax |= (caps.xfam_fixed1 as u32) & (xcr0_mask as u32); 698 entry.edx &= ((caps.xfam_fixed0 & xcr0_mask) >> 32) as u32; 699 entry.edx |= ((caps.xfam_fixed1 & xcr0_mask) >> 32) as u32; 700 } else if entry.index == 1 { 701 entry.ecx &= (caps.xfam_fixed0 as u32) & (xss_mask as u32); 702 entry.ecx |= (caps.xfam_fixed1 as u32) & (xss_mask as u32); 703 entry.edx &= ((caps.xfam_fixed0 & xss_mask) >> 32) as u32; 704 entry.edx |= ((caps.xfam_fixed1 & xss_mask) >> 32) as u32; 705 } 706 } 707 } 708 // Copy host L1 cache details if not populated by KVM 709 0x8000_0005 => { 710 if entry.eax == 0 && entry.ebx == 0 && entry.ecx == 0 && entry.edx == 0 { 711 // SAFETY: cpuid called with valid leaves 712 if unsafe { std::arch::x86_64::__cpuid(0x8000_0000).eax } >= 0x8000_0005 { 713 // SAFETY: cpuid called with valid leaves 714 let leaf = unsafe { std::arch::x86_64::__cpuid(0x8000_0005) }; 715 entry.eax = leaf.eax; 716 entry.ebx = leaf.ebx; 717 entry.ecx = leaf.ecx; 718 entry.edx = leaf.edx; 719 } 720 } 721 } 722 // Copy host L2 cache details if not populated by KVM 723 0x8000_0006 => { 724 if entry.eax == 0 && entry.ebx == 0 && entry.ecx == 0 && entry.edx == 0 { 725 // SAFETY: cpuid called with valid leaves 726 if unsafe { std::arch::x86_64::__cpuid(0x8000_0000).eax } >= 0x8000_0006 { 727 // SAFETY: cpuid called with valid leaves 728 let leaf = unsafe { std::arch::x86_64::__cpuid(0x8000_0006) }; 729 entry.eax = leaf.eax; 730 entry.ebx = leaf.ebx; 731 entry.ecx = leaf.ecx; 732 entry.edx = leaf.edx; 733 } 734 } 735 } 736 // Set CPU physical bits 737 0x8000_0008 => { 738 entry.eax = (entry.eax & 0xffff_ff00) | (config.phys_bits as u32 & 0xff); 739 } 740 0x4000_0001 => { 741 // These features are not supported by TDX 742 #[cfg(feature = "tdx")] 743 if config.tdx { 744 entry.eax &= !((1 << KVM_FEATURE_CLOCKSOURCE_BIT) 745 | (1 << KVM_FEATURE_CLOCKSOURCE2_BIT) 746 | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) 747 | (1 << KVM_FEATURE_ASYNC_PF_BIT) 748 | (1 << KVM_FEATURE_ASYNC_PF_VMEXIT_BIT) 749 | (1 << KVM_FEATURE_STEAL_TIME_BIT)) 750 } 751 } 752 _ => {} 753 } 754 } 755 756 // Copy CPU identification string 757 for i in 0x8000_0002..=0x8000_0004 { 758 cpuid.retain(|c| c.function != i); 759 // SAFETY: call cpuid with valid leaves 760 let leaf = unsafe { std::arch::x86_64::__cpuid(i) }; 761 cpuid.push(CpuIdEntry { 762 function: i, 763 eax: leaf.eax, 764 ebx: leaf.ebx, 765 ecx: leaf.ecx, 766 edx: leaf.edx, 767 ..Default::default() 768 }); 769 } 770 771 if config.kvm_hyperv { 772 // Remove conflicting entries 773 cpuid.retain(|c| c.function != 0x4000_0000); 774 cpuid.retain(|c| c.function != 0x4000_0001); 775 // See "Hypervisor Top Level Functional Specification" for details 776 // Compliance with "Hv#1" requires leaves up to 0x4000_000a 777 cpuid.push(CpuIdEntry { 778 function: 0x40000000, 779 eax: 0x4000000a, // Maximum cpuid leaf 780 ebx: 0x756e694c, // "Linu" 781 ecx: 0x564b2078, // "x KV" 782 edx: 0x7648204d, // "M Hv" 783 ..Default::default() 784 }); 785 cpuid.push(CpuIdEntry { 786 function: 0x40000001, 787 eax: 0x31237648, // "Hv#1" 788 ..Default::default() 789 }); 790 cpuid.push(CpuIdEntry { 791 function: 0x40000002, 792 eax: 0x3839, // "Build number" 793 ebx: 0xa0000, // "Version" 794 ..Default::default() 795 }); 796 cpuid.push(CpuIdEntry { 797 function: 0x4000_0003, 798 eax: (1 << 1) // AccessPartitionReferenceCounter 799 | (1 << 2) // AccessSynicRegs 800 | (1 << 3) // AccessSyntheticTimerRegs 801 | (1 << 9), // AccessPartitionReferenceTsc 802 edx: 1 << 3, // CPU dynamic partitioning 803 ..Default::default() 804 }); 805 cpuid.push(CpuIdEntry { 806 function: 0x4000_0004, 807 eax: 1 << 5, // Recommend relaxed timing 808 ..Default::default() 809 }); 810 for i in 0x4000_0005..=0x4000_000a { 811 cpuid.push(CpuIdEntry { 812 function: i, 813 ..Default::default() 814 }); 815 } 816 } 817 818 Ok(cpuid) 819 } 820 821 pub fn configure_vcpu( 822 vcpu: &Arc<dyn hypervisor::Vcpu>, 823 id: u8, 824 boot_setup: Option<(EntryPoint, &GuestMemoryAtomic<GuestMemoryMmap>)>, 825 cpuid: Vec<CpuIdEntry>, 826 kvm_hyperv: bool, 827 cpu_vendor: CpuVendor, 828 topology: Option<(u8, u8, u8)>, 829 ) -> super::Result<()> { 830 let x2apic_id = get_x2apic_id(id as u32, topology); 831 832 // Per vCPU CPUID changes; common are handled via generate_common_cpuid() 833 let mut cpuid = cpuid; 834 CpuidPatch::set_cpuid_reg(&mut cpuid, 0xb, None, CpuidReg::EDX, x2apic_id); 835 CpuidPatch::set_cpuid_reg(&mut cpuid, 0x1f, None, CpuidReg::EDX, x2apic_id); 836 if matches!(cpu_vendor, CpuVendor::AMD) { 837 CpuidPatch::set_cpuid_reg(&mut cpuid, 0x8000_001e, Some(0), CpuidReg::EAX, x2apic_id); 838 } 839 840 // Set ApicId in cpuid for each vcpu - found in cpuid ebx when eax = 1 841 let mut apic_id_patched = false; 842 for entry in &mut cpuid { 843 if entry.function == 1 { 844 entry.ebx &= 0xffffff; 845 entry.ebx |= x2apic_id << 24; 846 apic_id_patched = true; 847 break; 848 } 849 } 850 assert!(apic_id_patched); 851 852 if let Some(t) = topology { 853 update_cpuid_topology(&mut cpuid, t.0, t.1, t.2, cpu_vendor, id); 854 } 855 856 // The TSC frequency CPUID leaf should not be included when running with HyperV emulation 857 if !kvm_hyperv { 858 if let Some(tsc_khz) = vcpu.tsc_khz().map_err(Error::GetTscFrequency)? { 859 // Need to check that the TSC doesn't vary with dynamic frequency 860 // SAFETY: cpuid called with valid leaves 861 if unsafe { std::arch::x86_64::__cpuid(0x8000_0007) }.edx 862 & (1u32 << INVARIANT_TSC_EDX_BIT) 863 > 0 864 { 865 CpuidPatch::set_cpuid_reg( 866 &mut cpuid, 867 0x4000_0000, 868 None, 869 CpuidReg::EAX, 870 0x4000_0010, 871 ); 872 cpuid.retain(|c| c.function != 0x4000_0010); 873 cpuid.push(CpuIdEntry { 874 function: 0x4000_0010, 875 eax: tsc_khz, 876 ebx: 1000000, /* LAPIC resolution of 1ns (freq: 1GHz) is hardcoded in KVM's 877 * APIC_BUS_CYCLE_NS */ 878 ..Default::default() 879 }); 880 }; 881 } 882 } 883 884 for c in &cpuid { 885 debug!("{}", c); 886 } 887 888 vcpu.set_cpuid2(&cpuid) 889 .map_err(|e| Error::SetSupportedCpusFailed(e.into()))?; 890 891 if kvm_hyperv { 892 vcpu.enable_hyperv_synic().unwrap(); 893 } 894 895 regs::setup_msrs(vcpu).map_err(Error::MsrsConfiguration)?; 896 if let Some((kernel_entry_point, guest_memory)) = boot_setup { 897 regs::setup_regs(vcpu, kernel_entry_point).map_err(Error::RegsConfiguration)?; 898 regs::setup_fpu(vcpu).map_err(Error::FpuConfiguration)?; 899 regs::setup_sregs(&guest_memory.memory(), vcpu).map_err(Error::SregsConfiguration)?; 900 } 901 interrupts::set_lint(vcpu).map_err(|e| Error::LocalIntConfiguration(e.into()))?; 902 Ok(()) 903 } 904 905 /// Returns a Vec of the valid memory addresses. 906 /// 907 /// These should be used to configure the GuestMemory structure for the platform. 908 /// For x86_64 all addresses are valid from the start of the kernel except a 909 /// carve out at the end of 32bit address space. 910 pub fn arch_memory_regions() -> Vec<(GuestAddress, usize, RegionType)> { 911 vec![ 912 // 0 GiB ~ 3GiB: memory before the gap 913 ( 914 GuestAddress(0), 915 layout::MEM_32BIT_RESERVED_START.raw_value() as usize, 916 RegionType::Ram, 917 ), 918 // 4 GiB ~ inf: memory after the gap 919 (layout::RAM_64BIT_START, usize::MAX, RegionType::Ram), 920 // 3 GiB ~ 3712 MiB: 32-bit device memory hole 921 ( 922 layout::MEM_32BIT_RESERVED_START, 923 layout::MEM_32BIT_DEVICES_SIZE as usize, 924 RegionType::SubRegion, 925 ), 926 // 3712 MiB ~ 3968 MiB: 32-bit reserved memory hole 927 ( 928 layout::MEM_32BIT_RESERVED_START.unchecked_add(layout::MEM_32BIT_DEVICES_SIZE), 929 (layout::MEM_32BIT_RESERVED_SIZE - layout::MEM_32BIT_DEVICES_SIZE) as usize, 930 RegionType::Reserved, 931 ), 932 ] 933 } 934 935 /// Configures the system and should be called once per vm before starting vcpu threads. 936 /// 937 /// # Arguments 938 /// 939 /// * `guest_mem` - The memory to be used by the guest. 940 /// * `cmdline_addr` - Address in `guest_mem` where the kernel command line was loaded. 941 /// * `cmdline_size` - Size of the kernel command line in bytes including the null terminator. 942 /// * `num_cpus` - Number of virtual CPUs the guest will have. 943 #[allow(clippy::too_many_arguments)] 944 pub fn configure_system( 945 guest_mem: &GuestMemoryMmap, 946 cmdline_addr: GuestAddress, 947 cmdline_size: usize, 948 initramfs: &Option<InitramfsConfig>, 949 _num_cpus: u8, 950 setup_header: Option<setup_header>, 951 rsdp_addr: Option<GuestAddress>, 952 sgx_epc_region: Option<SgxEpcRegion>, 953 serial_number: Option<&str>, 954 uuid: Option<&str>, 955 oem_strings: Option<&[&str]>, 956 topology: Option<(u8, u8, u8)>, 957 ) -> super::Result<()> { 958 // Write EBDA address to location where ACPICA expects to find it 959 guest_mem 960 .write_obj((layout::EBDA_START.0 >> 4) as u16, layout::EBDA_POINTER) 961 .map_err(Error::EbdaSetup)?; 962 963 let size = smbios::setup_smbios(guest_mem, serial_number, uuid, oem_strings) 964 .map_err(Error::SmbiosSetup)?; 965 966 // Place the MP table after the SMIOS table aligned to 16 bytes 967 let offset = GuestAddress(layout::SMBIOS_START).unchecked_add(size); 968 let offset = GuestAddress((offset.0 + 16) & !0xf); 969 mptable::setup_mptable(offset, guest_mem, _num_cpus, topology).map_err(Error::MpTableSetup)?; 970 971 // Check that the RAM is not smaller than the RSDP start address 972 if let Some(rsdp_addr) = rsdp_addr { 973 if rsdp_addr.0 > guest_mem.last_addr().0 { 974 return Err(super::Error::RsdpPastRamEnd); 975 } 976 } 977 978 match setup_header { 979 Some(hdr) => configure_32bit_entry( 980 guest_mem, 981 cmdline_addr, 982 cmdline_size, 983 initramfs, 984 hdr, 985 rsdp_addr, 986 sgx_epc_region, 987 ), 988 None => configure_pvh( 989 guest_mem, 990 cmdline_addr, 991 initramfs, 992 rsdp_addr, 993 sgx_epc_region, 994 ), 995 } 996 } 997 998 type RamRange = (u64, u64); 999 1000 /// Returns usable physical memory ranges for the guest 1001 /// These should be used to create e820_RAM memory maps 1002 pub fn generate_ram_ranges(guest_mem: &GuestMemoryMmap) -> super::Result<Vec<RamRange>> { 1003 // Merge continuous memory regions into one region. 1004 // Note: memory regions from "GuestMemory" are sorted and non-zero sized. 1005 let ram_regions = { 1006 let mut ram_regions = Vec::new(); 1007 let mut current_start = guest_mem 1008 .iter() 1009 .next() 1010 .map(GuestMemoryRegion::start_addr) 1011 .expect("GuestMemory must have one memory region at least") 1012 .raw_value(); 1013 let mut current_end = current_start; 1014 1015 for (start, size) in guest_mem 1016 .iter() 1017 .map(|m| (m.start_addr().raw_value(), m.len())) 1018 { 1019 if current_end == start { 1020 // This zone is continuous with the previous one. 1021 current_end += size; 1022 } else { 1023 ram_regions.push((current_start, current_end)); 1024 1025 current_start = start; 1026 current_end = start + size; 1027 } 1028 } 1029 1030 ram_regions.push((current_start, current_end)); 1031 1032 ram_regions 1033 }; 1034 1035 // Create the memory map entry for memory region before the gap 1036 let mut ram_ranges = vec![]; 1037 1038 // Generate the first usable physical memory range before the gap. The e820 map 1039 // should only report memory above 1MiB. 1040 let first_ram_range = { 1041 let (first_region_start, first_region_end) = 1042 ram_regions.first().ok_or(super::Error::MemmapTableSetup)?; 1043 let high_ram_start = layout::HIGH_RAM_START.raw_value(); 1044 let mem_32bit_reserved_start = layout::MEM_32BIT_RESERVED_START.raw_value(); 1045 1046 if !((first_region_start <= &high_ram_start) 1047 && (first_region_end > &high_ram_start) 1048 && (first_region_end <= &mem_32bit_reserved_start)) 1049 { 1050 error!( 1051 "Unexpected first memory region layout: (start: 0x{:08x}, end: 0x{:08x}). 1052 high_ram_start: 0x{:08x}, mem_32bit_reserved_start: 0x{:08x}", 1053 first_region_start, first_region_end, high_ram_start, mem_32bit_reserved_start 1054 ); 1055 1056 return Err(super::Error::MemmapTableSetup); 1057 } 1058 1059 info!( 1060 "first usable physical memory range, start: 0x{:08x}, end: 0x{:08x}", 1061 high_ram_start, first_region_end 1062 ); 1063 1064 (high_ram_start, *first_region_end) 1065 }; 1066 ram_ranges.push(first_ram_range); 1067 1068 // Generate additional usable physical memory range after the gap if any. 1069 for ram_region in ram_regions.iter().skip(1) { 1070 info!( 1071 "found usable physical memory range, start: 0x{:08x}, end: 0x{:08x}", 1072 ram_region.0, ram_region.1 1073 ); 1074 1075 ram_ranges.push(*ram_region); 1076 } 1077 1078 Ok(ram_ranges) 1079 } 1080 1081 fn configure_pvh( 1082 guest_mem: &GuestMemoryMmap, 1083 cmdline_addr: GuestAddress, 1084 initramfs: &Option<InitramfsConfig>, 1085 rsdp_addr: Option<GuestAddress>, 1086 sgx_epc_region: Option<SgxEpcRegion>, 1087 ) -> super::Result<()> { 1088 const XEN_HVM_START_MAGIC_VALUE: u32 = 0x336ec578; 1089 1090 let mut start_info = hvm_start_info { 1091 magic: XEN_HVM_START_MAGIC_VALUE, 1092 version: 1, // pvh has version 1 1093 nr_modules: 0, 1094 cmdline_paddr: cmdline_addr.raw_value(), 1095 memmap_paddr: layout::MEMMAP_START.raw_value(), 1096 ..Default::default() 1097 }; 1098 1099 if let Some(rsdp_addr) = rsdp_addr { 1100 start_info.rsdp_paddr = rsdp_addr.0; 1101 } 1102 1103 if let Some(initramfs_config) = initramfs { 1104 // The initramfs has been written to guest memory already, here we just need to 1105 // create the module structure that describes it. 1106 let ramdisk_mod = hvm_modlist_entry { 1107 paddr: initramfs_config.address.raw_value(), 1108 size: initramfs_config.size as u64, 1109 ..Default::default() 1110 }; 1111 1112 start_info.nr_modules += 1; 1113 start_info.modlist_paddr = layout::MODLIST_START.raw_value(); 1114 1115 // Write the modlist struct to guest memory. 1116 guest_mem 1117 .write_obj(ramdisk_mod, layout::MODLIST_START) 1118 .map_err(super::Error::ModlistSetup)?; 1119 } 1120 1121 // Vector to hold the memory maps which needs to be written to guest memory 1122 // at MEMMAP_START after all of the mappings are recorded. 1123 let mut memmap: Vec<hvm_memmap_table_entry> = Vec::new(); 1124 1125 // Create the memory map entries. 1126 add_memmap_entry(&mut memmap, 0, layout::EBDA_START.raw_value(), E820_RAM); 1127 1128 // Get usable physical memory ranges 1129 let ram_ranges = generate_ram_ranges(guest_mem)?; 1130 1131 // Create e820 memory map entries 1132 for ram_range in ram_ranges { 1133 info!( 1134 "create_memmap_entry, start: 0x{:08x}, end: 0x{:08x}", 1135 ram_range.0, ram_range.1 1136 ); 1137 add_memmap_entry( 1138 &mut memmap, 1139 ram_range.0, 1140 ram_range.1 - ram_range.0, 1141 E820_RAM, 1142 ); 1143 } 1144 1145 add_memmap_entry( 1146 &mut memmap, 1147 layout::PCI_MMCONFIG_START.0, 1148 layout::PCI_MMCONFIG_SIZE, 1149 E820_RESERVED, 1150 ); 1151 1152 if let Some(sgx_epc_region) = sgx_epc_region { 1153 add_memmap_entry( 1154 &mut memmap, 1155 sgx_epc_region.start().raw_value(), 1156 sgx_epc_region.size(), 1157 E820_RESERVED, 1158 ); 1159 } 1160 1161 start_info.memmap_entries = memmap.len() as u32; 1162 1163 // Copy the vector with the memmap table to the MEMMAP_START address 1164 // which is already saved in the memmap_paddr field of hvm_start_info struct. 1165 let mut memmap_start_addr = layout::MEMMAP_START; 1166 1167 guest_mem 1168 .checked_offset( 1169 memmap_start_addr, 1170 mem::size_of::<hvm_memmap_table_entry>() * start_info.memmap_entries as usize, 1171 ) 1172 .ok_or(super::Error::MemmapTablePastRamEnd)?; 1173 1174 // For every entry in the memmap vector, write it to guest memory. 1175 for memmap_entry in memmap { 1176 guest_mem 1177 .write_obj(memmap_entry, memmap_start_addr) 1178 .map_err(|_| super::Error::MemmapTableSetup)?; 1179 memmap_start_addr = 1180 memmap_start_addr.unchecked_add(mem::size_of::<hvm_memmap_table_entry>() as u64); 1181 } 1182 1183 // The hvm_start_info struct itself must be stored at PVH_START_INFO 1184 // address, and %rbx will be initialized to contain PVH_INFO_START prior to 1185 // starting the guest, as required by the PVH ABI. 1186 let start_info_addr = layout::PVH_INFO_START; 1187 1188 guest_mem 1189 .checked_offset(start_info_addr, mem::size_of::<hvm_start_info>()) 1190 .ok_or(super::Error::StartInfoPastRamEnd)?; 1191 1192 // Write the start_info struct to guest memory. 1193 guest_mem 1194 .write_obj(start_info, start_info_addr) 1195 .map_err(|_| super::Error::StartInfoSetup)?; 1196 1197 Ok(()) 1198 } 1199 1200 fn configure_32bit_entry( 1201 guest_mem: &GuestMemoryMmap, 1202 cmdline_addr: GuestAddress, 1203 cmdline_size: usize, 1204 initramfs: &Option<InitramfsConfig>, 1205 setup_hdr: setup_header, 1206 rsdp_addr: Option<GuestAddress>, 1207 sgx_epc_region: Option<SgxEpcRegion>, 1208 ) -> super::Result<()> { 1209 const KERNEL_LOADER_OTHER: u8 = 0xff; 1210 1211 // Use the provided setup header 1212 let mut params = boot_params { 1213 hdr: setup_hdr, 1214 ..Default::default() 1215 }; 1216 1217 // Common bootparams settings 1218 if params.hdr.type_of_loader == 0 { 1219 params.hdr.type_of_loader = KERNEL_LOADER_OTHER; 1220 } 1221 params.hdr.cmd_line_ptr = cmdline_addr.raw_value() as u32; 1222 params.hdr.cmdline_size = cmdline_size as u32; 1223 1224 if let Some(initramfs_config) = initramfs { 1225 params.hdr.ramdisk_image = initramfs_config.address.raw_value() as u32; 1226 params.hdr.ramdisk_size = initramfs_config.size as u32; 1227 } 1228 1229 add_e820_entry(&mut params, 0, layout::EBDA_START.raw_value(), E820_RAM)?; 1230 1231 let mem_end = guest_mem.last_addr(); 1232 if mem_end < layout::MEM_32BIT_RESERVED_START { 1233 add_e820_entry( 1234 &mut params, 1235 layout::HIGH_RAM_START.raw_value(), 1236 mem_end.unchecked_offset_from(layout::HIGH_RAM_START) + 1, 1237 E820_RAM, 1238 )?; 1239 } else { 1240 add_e820_entry( 1241 &mut params, 1242 layout::HIGH_RAM_START.raw_value(), 1243 layout::MEM_32BIT_RESERVED_START.unchecked_offset_from(layout::HIGH_RAM_START), 1244 E820_RAM, 1245 )?; 1246 if mem_end > layout::RAM_64BIT_START { 1247 add_e820_entry( 1248 &mut params, 1249 layout::RAM_64BIT_START.raw_value(), 1250 mem_end.unchecked_offset_from(layout::RAM_64BIT_START) + 1, 1251 E820_RAM, 1252 )?; 1253 } 1254 } 1255 1256 add_e820_entry( 1257 &mut params, 1258 layout::PCI_MMCONFIG_START.0, 1259 layout::PCI_MMCONFIG_SIZE, 1260 E820_RESERVED, 1261 )?; 1262 1263 if let Some(sgx_epc_region) = sgx_epc_region { 1264 add_e820_entry( 1265 &mut params, 1266 sgx_epc_region.start().raw_value(), 1267 sgx_epc_region.size(), 1268 E820_RESERVED, 1269 )?; 1270 } 1271 1272 if let Some(rsdp_addr) = rsdp_addr { 1273 params.acpi_rsdp_addr = rsdp_addr.0; 1274 } 1275 1276 let zero_page_addr = layout::ZERO_PAGE_START; 1277 guest_mem 1278 .checked_offset(zero_page_addr, mem::size_of::<boot_params>()) 1279 .ok_or(super::Error::ZeroPagePastRamEnd)?; 1280 guest_mem 1281 .write_obj(params, zero_page_addr) 1282 .map_err(super::Error::ZeroPageSetup)?; 1283 1284 Ok(()) 1285 } 1286 1287 /// Add an e820 region to the e820 map. 1288 /// Returns Ok(()) if successful, or an error if there is no space left in the map. 1289 fn add_e820_entry( 1290 params: &mut boot_params, 1291 addr: u64, 1292 size: u64, 1293 mem_type: u32, 1294 ) -> Result<(), Error> { 1295 if params.e820_entries >= params.e820_table.len() as u8 { 1296 return Err(Error::E820Configuration); 1297 } 1298 1299 params.e820_table[params.e820_entries as usize].addr = addr; 1300 params.e820_table[params.e820_entries as usize].size = size; 1301 params.e820_table[params.e820_entries as usize].type_ = mem_type; 1302 params.e820_entries += 1; 1303 1304 Ok(()) 1305 } 1306 1307 fn add_memmap_entry(memmap: &mut Vec<hvm_memmap_table_entry>, addr: u64, size: u64, mem_type: u32) { 1308 // Add the table entry to the vector 1309 memmap.push(hvm_memmap_table_entry { 1310 addr, 1311 size, 1312 type_: mem_type, 1313 reserved: 0, 1314 }); 1315 } 1316 1317 /// Returns the memory address where the initramfs could be loaded. 1318 pub fn initramfs_load_addr( 1319 guest_mem: &GuestMemoryMmap, 1320 initramfs_size: usize, 1321 ) -> super::Result<u64> { 1322 let first_region = guest_mem 1323 .find_region(GuestAddress::new(0)) 1324 .ok_or(super::Error::InitramfsAddress)?; 1325 // It's safe to cast to usize because the size of a region can't be greater than usize. 1326 let lowmem_size = first_region.len() as usize; 1327 1328 if lowmem_size < initramfs_size { 1329 return Err(super::Error::InitramfsAddress); 1330 } 1331 1332 let aligned_addr: u64 = ((lowmem_size - initramfs_size) & !(crate::pagesize() - 1)) as u64; 1333 Ok(aligned_addr) 1334 } 1335 1336 pub fn get_host_cpu_phys_bits(hypervisor: &Arc<dyn hypervisor::Hypervisor>) -> u8 { 1337 // SAFETY: call cpuid with valid leaves 1338 unsafe { 1339 let leaf = x86_64::__cpuid(0x8000_0000); 1340 1341 // Detect and handle AMD SME (Secure Memory Encryption) properly. 1342 // Some physical address bits may become reserved when the feature is enabled. 1343 // See AMD64 Architecture Programmer's Manual Volume 2, Section 7.10.1 1344 let reduced = if leaf.eax >= 0x8000_001f 1345 && matches!(hypervisor.get_cpu_vendor(), CpuVendor::AMD) 1346 && x86_64::__cpuid(0x8000_001f).eax & 0x1 != 0 1347 { 1348 (x86_64::__cpuid(0x8000_001f).ebx >> 6) & 0x3f 1349 } else { 1350 0 1351 }; 1352 1353 if leaf.eax >= 0x8000_0008 { 1354 let leaf = x86_64::__cpuid(0x8000_0008); 1355 ((leaf.eax & 0xff) - reduced) as u8 1356 } else { 1357 36 1358 } 1359 } 1360 } 1361 1362 fn update_cpuid_topology( 1363 cpuid: &mut Vec<CpuIdEntry>, 1364 threads_per_core: u8, 1365 cores_per_die: u8, 1366 dies_per_package: u8, 1367 cpu_vendor: CpuVendor, 1368 id: u8, 1369 ) { 1370 let x2apic_id = get_x2apic_id( 1371 id as u32, 1372 Some((threads_per_core, cores_per_die, dies_per_package)), 1373 ); 1374 1375 let thread_width = 8 - (threads_per_core - 1).leading_zeros(); 1376 let core_width = (8 - (cores_per_die - 1).leading_zeros()) + thread_width; 1377 let die_width = (8 - (dies_per_package - 1).leading_zeros()) + core_width; 1378 1379 let mut cpu_ebx = CpuidPatch::get_cpuid_reg(cpuid, 0x1, None, CpuidReg::EBX).unwrap_or(0); 1380 cpu_ebx |= ((dies_per_package as u32) * (cores_per_die as u32) * (threads_per_core as u32)) 1381 & (0xff << 16); 1382 CpuidPatch::set_cpuid_reg(cpuid, 0x1, None, CpuidReg::EBX, cpu_ebx); 1383 1384 let mut cpu_edx = CpuidPatch::get_cpuid_reg(cpuid, 0x1, None, CpuidReg::EDX).unwrap_or(0); 1385 cpu_edx |= 1 << 28; 1386 CpuidPatch::set_cpuid_reg(cpuid, 0x1, None, CpuidReg::EDX, cpu_edx); 1387 1388 // CPU Topology leaf 0xb 1389 CpuidPatch::set_cpuid_reg(cpuid, 0xb, Some(0), CpuidReg::EAX, thread_width); 1390 CpuidPatch::set_cpuid_reg( 1391 cpuid, 1392 0xb, 1393 Some(0), 1394 CpuidReg::EBX, 1395 u32::from(threads_per_core), 1396 ); 1397 CpuidPatch::set_cpuid_reg(cpuid, 0xb, Some(0), CpuidReg::ECX, 1 << 8); 1398 1399 CpuidPatch::set_cpuid_reg(cpuid, 0xb, Some(1), CpuidReg::EAX, die_width); 1400 CpuidPatch::set_cpuid_reg( 1401 cpuid, 1402 0xb, 1403 Some(1), 1404 CpuidReg::EBX, 1405 u32::from(dies_per_package * cores_per_die * threads_per_core), 1406 ); 1407 CpuidPatch::set_cpuid_reg(cpuid, 0xb, Some(1), CpuidReg::ECX, 2 << 8); 1408 CpuidPatch::set_cpuid_reg(cpuid, 0xb, Some(1), CpuidReg::EDX, x2apic_id); 1409 1410 // CPU Topology leaf 0x1f 1411 CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(0), CpuidReg::EAX, thread_width); 1412 CpuidPatch::set_cpuid_reg( 1413 cpuid, 1414 0x1f, 1415 Some(0), 1416 CpuidReg::EBX, 1417 u32::from(threads_per_core), 1418 ); 1419 CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(0), CpuidReg::ECX, 1 << 8); 1420 1421 CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(1), CpuidReg::EAX, core_width); 1422 CpuidPatch::set_cpuid_reg( 1423 cpuid, 1424 0x1f, 1425 Some(1), 1426 CpuidReg::EBX, 1427 u32::from(cores_per_die * threads_per_core), 1428 ); 1429 CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(1), CpuidReg::ECX, 2 << 8); 1430 1431 CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(2), CpuidReg::EAX, die_width); 1432 CpuidPatch::set_cpuid_reg( 1433 cpuid, 1434 0x1f, 1435 Some(2), 1436 CpuidReg::EBX, 1437 u32::from(dies_per_package * cores_per_die * threads_per_core), 1438 ); 1439 CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(2), CpuidReg::ECX, 5 << 8); 1440 1441 if matches!(cpu_vendor, CpuVendor::AMD) { 1442 CpuidPatch::set_cpuid_reg( 1443 cpuid, 1444 0x8000_001e, 1445 Some(0), 1446 CpuidReg::EBX, 1447 ((threads_per_core as u32 - 1) << 8) | (x2apic_id & 0xff), 1448 ); 1449 CpuidPatch::set_cpuid_reg( 1450 cpuid, 1451 0x8000_001e, 1452 Some(0), 1453 CpuidReg::ECX, 1454 ((dies_per_package as u32 - 1) << 8) | (thread_width + die_width) & 0xff, 1455 ); 1456 CpuidPatch::set_cpuid_reg(cpuid, 0x8000_001e, Some(0), CpuidReg::EDX, 0); 1457 if cores_per_die * threads_per_core > 1 { 1458 let ecx = 1459 CpuidPatch::get_cpuid_reg(cpuid, 0x8000_0001, Some(0), CpuidReg::ECX).unwrap_or(0); 1460 CpuidPatch::set_cpuid_reg( 1461 cpuid, 1462 0x8000_0001, 1463 Some(0), 1464 CpuidReg::ECX, 1465 ecx | (1u32 << 1) | (1u32 << 22), 1466 ); 1467 CpuidPatch::set_cpuid_reg( 1468 cpuid, 1469 0x0000_0001, 1470 Some(0), 1471 CpuidReg::EBX, 1472 (x2apic_id << 24) | (8 << 8) | (((cores_per_die * threads_per_core) as u32) << 16), 1473 ); 1474 let cpuid_patches = vec![ 1475 // Patch tsc deadline timer bit 1476 CpuidPatch { 1477 function: 1, 1478 index: 0, 1479 flags_bit: None, 1480 eax_bit: None, 1481 ebx_bit: None, 1482 ecx_bit: None, 1483 edx_bit: Some(28), 1484 }, 1485 ]; 1486 CpuidPatch::patch_cpuid(cpuid, cpuid_patches); 1487 CpuidPatch::set_cpuid_reg( 1488 cpuid, 1489 0x8000_0008, 1490 Some(0), 1491 CpuidReg::ECX, 1492 ((thread_width + core_width + die_width) << 12) 1493 | ((cores_per_die * threads_per_core) - 1) as u32, 1494 ); 1495 } else { 1496 CpuidPatch::set_cpuid_reg(cpuid, 0x8000_0008, Some(0), CpuidReg::ECX, 0u32); 1497 } 1498 } 1499 } 1500 1501 // The goal is to update the CPUID sub-leaves to reflect the number of EPC 1502 // sections exposed to the guest. 1503 fn update_cpuid_sgx( 1504 cpuid: &mut Vec<CpuIdEntry>, 1505 epc_sections: &[SgxEpcSection], 1506 ) -> Result<(), Error> { 1507 // Something's wrong if there's no EPC section. 1508 if epc_sections.is_empty() { 1509 return Err(Error::NoSgxEpcSection); 1510 } 1511 // We can't go further if the hypervisor does not support SGX feature. 1512 if !CpuidPatch::is_feature_enabled(cpuid, 0x7, 0, CpuidReg::EBX, 2) { 1513 return Err(Error::MissingSgxFeature); 1514 } 1515 // We can't go further if the hypervisor does not support SGX_LC feature. 1516 if !CpuidPatch::is_feature_enabled(cpuid, 0x7, 0, CpuidReg::ECX, 30) { 1517 return Err(Error::MissingSgxLaunchControlFeature); 1518 } 1519 1520 // Get host CPUID for leaf 0x12, subleaf 0x2. This is to retrieve EPC 1521 // properties such as confidentiality and integrity. 1522 // SAFETY: call cpuid with valid leaves 1523 let leaf = unsafe { std::arch::x86_64::__cpuid_count(0x12, 0x2) }; 1524 1525 for (i, epc_section) in epc_sections.iter().enumerate() { 1526 let subleaf_idx = i + 2; 1527 let start = epc_section.start().raw_value(); 1528 let size = epc_section.size(); 1529 let eax = (start & 0xffff_f000) as u32 | 0x1; 1530 let ebx = (start >> 32) as u32; 1531 let ecx = (size & 0xffff_f000) as u32 | (leaf.ecx & 0xf); 1532 let edx = (size >> 32) as u32; 1533 // CPU Topology leaf 0x12 1534 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EAX, eax); 1535 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EBX, ebx); 1536 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::ECX, ecx); 1537 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EDX, edx); 1538 } 1539 1540 // Add one NULL entry to terminate the dynamic list 1541 let subleaf_idx = epc_sections.len() + 2; 1542 // CPU Topology leaf 0x12 1543 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EAX, 0); 1544 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EBX, 0); 1545 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::ECX, 0); 1546 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EDX, 0); 1547 1548 Ok(()) 1549 } 1550 1551 #[cfg(test)] 1552 mod tests { 1553 use linux_loader::loader::bootparam::boot_e820_entry; 1554 1555 use super::*; 1556 1557 #[test] 1558 fn regions_base_addr() { 1559 let regions = arch_memory_regions(); 1560 assert_eq!(4, regions.len()); 1561 assert_eq!(GuestAddress(0), regions[0].0); 1562 assert_eq!(GuestAddress(1 << 32), regions[1].0); 1563 } 1564 1565 #[test] 1566 fn test_system_configuration() { 1567 let no_vcpus = 4; 1568 let gm = GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); 1569 let config_err = configure_system( 1570 &gm, 1571 GuestAddress(0), 1572 0, 1573 &None, 1574 1, 1575 None, 1576 Some(layout::RSDP_POINTER), 1577 None, 1578 None, 1579 None, 1580 None, 1581 None, 1582 ); 1583 config_err.unwrap_err(); 1584 1585 // Now assigning some memory that falls before the 32bit memory hole. 1586 let arch_mem_regions = arch_memory_regions(); 1587 let ram_regions: Vec<(GuestAddress, usize)> = arch_mem_regions 1588 .iter() 1589 .filter(|r| r.2 == RegionType::Ram && r.1 != usize::MAX) 1590 .map(|r| (r.0, r.1)) 1591 .collect(); 1592 let gm = GuestMemoryMmap::from_ranges(&ram_regions).unwrap(); 1593 1594 configure_system( 1595 &gm, 1596 GuestAddress(0), 1597 0, 1598 &None, 1599 no_vcpus, 1600 None, 1601 None, 1602 None, 1603 None, 1604 None, 1605 None, 1606 None, 1607 ) 1608 .unwrap(); 1609 1610 // Now assigning some memory that falls after the 32bit memory hole. 1611 let arch_mem_regions = arch_memory_regions(); 1612 let ram_regions: Vec<(GuestAddress, usize)> = arch_mem_regions 1613 .iter() 1614 .filter(|r| r.2 == RegionType::Ram) 1615 .map(|r| { 1616 if r.1 == usize::MAX { 1617 (r.0, 128 << 20) 1618 } else { 1619 (r.0, r.1) 1620 } 1621 }) 1622 .collect(); 1623 let gm = GuestMemoryMmap::from_ranges(&ram_regions).unwrap(); 1624 configure_system( 1625 &gm, 1626 GuestAddress(0), 1627 0, 1628 &None, 1629 no_vcpus, 1630 None, 1631 None, 1632 None, 1633 None, 1634 None, 1635 None, 1636 None, 1637 ) 1638 .unwrap(); 1639 1640 configure_system( 1641 &gm, 1642 GuestAddress(0), 1643 0, 1644 &None, 1645 no_vcpus, 1646 None, 1647 None, 1648 None, 1649 None, 1650 None, 1651 None, 1652 None, 1653 ) 1654 .unwrap(); 1655 } 1656 1657 #[test] 1658 fn test_add_e820_entry() { 1659 let e820_table = [(boot_e820_entry { 1660 addr: 0x1, 1661 size: 4, 1662 type_: 1, 1663 }); 128]; 1664 1665 let expected_params = boot_params { 1666 e820_table, 1667 e820_entries: 1, 1668 ..Default::default() 1669 }; 1670 1671 let mut params: boot_params = Default::default(); 1672 add_e820_entry( 1673 &mut params, 1674 e820_table[0].addr, 1675 e820_table[0].size, 1676 e820_table[0].type_, 1677 ) 1678 .unwrap(); 1679 assert_eq!( 1680 format!("{:?}", params.e820_table[0]), 1681 format!("{:?}", expected_params.e820_table[0]) 1682 ); 1683 assert_eq!(params.e820_entries, expected_params.e820_entries); 1684 1685 // Exercise the scenario where the field storing the length of the e820 entry table is 1686 // is bigger than the allocated memory. 1687 params.e820_entries = params.e820_table.len() as u8 + 1; 1688 add_e820_entry( 1689 &mut params, 1690 e820_table[0].addr, 1691 e820_table[0].size, 1692 e820_table[0].type_, 1693 ) 1694 .unwrap_err(); 1695 } 1696 1697 #[test] 1698 fn test_add_memmap_entry() { 1699 let mut memmap: Vec<hvm_memmap_table_entry> = Vec::new(); 1700 1701 let expected_memmap = vec![ 1702 hvm_memmap_table_entry { 1703 addr: 0x0, 1704 size: 0x1000, 1705 type_: E820_RAM, 1706 ..Default::default() 1707 }, 1708 hvm_memmap_table_entry { 1709 addr: 0x10000, 1710 size: 0xa000, 1711 type_: E820_RESERVED, 1712 ..Default::default() 1713 }, 1714 ]; 1715 1716 add_memmap_entry(&mut memmap, 0, 0x1000, E820_RAM); 1717 add_memmap_entry(&mut memmap, 0x10000, 0xa000, E820_RESERVED); 1718 1719 assert_eq!(format!("{memmap:?}"), format!("{expected_memmap:?}")); 1720 } 1721 1722 #[test] 1723 fn test_get_x2apic_id() { 1724 let x2apic_id = get_x2apic_id(0, Some((2, 3, 1))); 1725 assert_eq!(x2apic_id, 0); 1726 1727 let x2apic_id = get_x2apic_id(1, Some((2, 3, 1))); 1728 assert_eq!(x2apic_id, 1); 1729 1730 let x2apic_id = get_x2apic_id(2, Some((2, 3, 1))); 1731 assert_eq!(x2apic_id, 2); 1732 1733 let x2apic_id = get_x2apic_id(6, Some((2, 3, 1))); 1734 assert_eq!(x2apic_id, 8); 1735 1736 let x2apic_id = get_x2apic_id(7, Some((2, 3, 1))); 1737 assert_eq!(x2apic_id, 9); 1738 1739 let x2apic_id = get_x2apic_id(8, Some((2, 3, 1))); 1740 assert_eq!(x2apic_id, 10); 1741 } 1742 } 1743