1 // Copyright © 2020, Oracle and/or its affiliates. 2 // 3 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 // SPDX-License-Identifier: Apache-2.0 5 // 6 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. 7 // Use of this source code is governed by a BSD-style license that can be 8 // found in the LICENSE-BSD-3-Clause file. 9 use std::sync::Arc; 10 pub mod interrupts; 11 pub mod layout; 12 mod mpspec; 13 mod mptable; 14 pub mod regs; 15 use crate::GuestMemoryMmap; 16 use crate::InitramfsConfig; 17 use crate::RegionType; 18 use hypervisor::arch::x86::{CpuIdEntry, CPUID_FLAG_VALID_INDEX}; 19 use hypervisor::{CpuVendor, HypervisorCpuError, HypervisorError}; 20 use linux_loader::loader::bootparam::boot_params; 21 use linux_loader::loader::elf::start_info::{ 22 hvm_memmap_table_entry, hvm_modlist_entry, hvm_start_info, 23 }; 24 use std::collections::BTreeMap; 25 use std::mem; 26 use vm_memory::{ 27 Address, ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryAtomic, 28 GuestMemoryRegion, GuestUsize, 29 }; 30 mod smbios; 31 use std::arch::x86_64; 32 #[cfg(feature = "tdx")] 33 pub mod tdx; 34 35 // CPUID feature bits 36 const TSC_DEADLINE_TIMER_ECX_BIT: u8 = 24; // tsc deadline timer ecx bit. 37 const HYPERVISOR_ECX_BIT: u8 = 31; // Hypervisor ecx bit. 38 const MTRR_EDX_BIT: u8 = 12; // Hypervisor ecx bit. 39 const INVARIANT_TSC_EDX_BIT: u8 = 8; // Invariant TSC bit on 0x8000_0007 EDX 40 const AMX_BF16: u8 = 22; // AMX tile computation on bfloat16 numbers 41 const AMX_TILE: u8 = 24; // AMX tile load/store instructions 42 const AMX_INT8: u8 = 25; // AMX tile computation on 8-bit integers 43 44 // KVM feature bits 45 #[cfg(feature = "tdx")] 46 const KVM_FEATURE_CLOCKSOURCE_BIT: u8 = 0; 47 #[cfg(feature = "tdx")] 48 const KVM_FEATURE_CLOCKSOURCE2_BIT: u8 = 3; 49 #[cfg(feature = "tdx")] 50 const KVM_FEATURE_CLOCKSOURCE_STABLE_BIT: u8 = 24; 51 #[cfg(feature = "tdx")] 52 const KVM_FEATURE_ASYNC_PF_BIT: u8 = 4; 53 #[cfg(feature = "tdx")] 54 const KVM_FEATURE_ASYNC_PF_VMEXIT_BIT: u8 = 10; 55 #[cfg(feature = "tdx")] 56 const KVM_FEATURE_STEAL_TIME_BIT: u8 = 5; 57 58 pub const _NSIG: i32 = 65; 59 60 #[derive(Debug, Copy, Clone)] 61 /// Specifies the entry point address where the guest must start 62 /// executing code, as well as which of the supported boot protocols 63 /// is to be used to configure the guest initial state. 64 pub struct EntryPoint { 65 /// Address in guest memory where the guest must start execution 66 pub entry_addr: GuestAddress, 67 } 68 69 const E820_RAM: u32 = 1; 70 const E820_RESERVED: u32 = 2; 71 72 #[derive(Clone)] 73 pub struct SgxEpcSection { 74 start: GuestAddress, 75 size: GuestUsize, 76 } 77 78 impl SgxEpcSection { 79 pub fn new(start: GuestAddress, size: GuestUsize) -> Self { 80 SgxEpcSection { start, size } 81 } 82 pub fn start(&self) -> GuestAddress { 83 self.start 84 } 85 pub fn size(&self) -> GuestUsize { 86 self.size 87 } 88 } 89 90 #[derive(Clone)] 91 pub struct SgxEpcRegion { 92 start: GuestAddress, 93 size: GuestUsize, 94 epc_sections: BTreeMap<String, SgxEpcSection>, 95 } 96 97 impl SgxEpcRegion { 98 pub fn new(start: GuestAddress, size: GuestUsize) -> Self { 99 SgxEpcRegion { 100 start, 101 size, 102 epc_sections: BTreeMap::new(), 103 } 104 } 105 pub fn start(&self) -> GuestAddress { 106 self.start 107 } 108 pub fn size(&self) -> GuestUsize { 109 self.size 110 } 111 pub fn epc_sections(&self) -> &BTreeMap<String, SgxEpcSection> { 112 &self.epc_sections 113 } 114 pub fn insert(&mut self, id: String, epc_section: SgxEpcSection) { 115 self.epc_sections.insert(id, epc_section); 116 } 117 } 118 119 // This is a workaround to the Rust enforcement specifying that any implementation of a foreign 120 // trait (in this case `DataInit`) where: 121 // * the type that is implementing the trait is foreign or 122 // * all of the parameters being passed to the trait (if there are any) are also foreign 123 // is prohibited. 124 #[derive(Copy, Clone, Default)] 125 struct StartInfoWrapper(hvm_start_info); 126 127 #[derive(Copy, Clone, Default)] 128 struct MemmapTableEntryWrapper(hvm_memmap_table_entry); 129 130 #[derive(Copy, Clone, Default)] 131 struct ModlistEntryWrapper(hvm_modlist_entry); 132 133 // SAFETY: data structure only contain a series of integers 134 unsafe impl ByteValued for StartInfoWrapper {} 135 // SAFETY: data structure only contain a series of integers 136 unsafe impl ByteValued for MemmapTableEntryWrapper {} 137 // SAFETY: data structure only contain a series of integers 138 unsafe impl ByteValued for ModlistEntryWrapper {} 139 140 // This is a workaround to the Rust enforcement specifying that any implementation of a foreign 141 // trait (in this case `DataInit`) where: 142 // * the type that is implementing the trait is foreign or 143 // * all of the parameters being passed to the trait (if there are any) are also foreign 144 // is prohibited. 145 #[derive(Copy, Clone, Default)] 146 struct BootParamsWrapper(boot_params); 147 148 // SAFETY: BootParamsWrap is a wrapper over `boot_params` (a series of ints). 149 unsafe impl ByteValued for BootParamsWrapper {} 150 151 pub struct CpuidConfig { 152 pub sgx_epc_sections: Option<Vec<SgxEpcSection>>, 153 pub phys_bits: u8, 154 pub kvm_hyperv: bool, 155 #[cfg(feature = "tdx")] 156 pub tdx: bool, 157 pub amx: bool, 158 } 159 160 #[derive(Debug)] 161 pub enum Error { 162 /// Error writing MP table to memory. 163 MpTableSetup(mptable::Error), 164 165 /// Error configuring the general purpose registers 166 RegsConfiguration(regs::Error), 167 168 /// Error configuring the special registers 169 SregsConfiguration(regs::Error), 170 171 /// Error configuring the floating point related registers 172 FpuConfiguration(regs::Error), 173 174 /// Error configuring the MSR registers 175 MsrsConfiguration(regs::Error), 176 177 /// Failed to set supported CPUs. 178 SetSupportedCpusFailed(anyhow::Error), 179 180 /// Cannot set the local interruption due to bad configuration. 181 LocalIntConfiguration(anyhow::Error), 182 183 /// Error setting up SMBIOS table 184 SmbiosSetup(smbios::Error), 185 186 /// Could not find any SGX EPC section 187 NoSgxEpcSection, 188 189 /// Missing SGX CPU feature 190 MissingSgxFeature, 191 192 /// Missing SGX_LC CPU feature 193 MissingSgxLaunchControlFeature, 194 195 /// Error getting supported CPUID through the hypervisor (kvm/mshv) API 196 CpuidGetSupported(HypervisorError), 197 198 /// Error populating CPUID with KVM HyperV emulation details 199 CpuidKvmHyperV(vmm_sys_util::fam::Error), 200 201 /// Error populating CPUID with CPU identification 202 CpuidIdentification(vmm_sys_util::fam::Error), 203 204 /// Error checking CPUID compatibility 205 CpuidCheckCompatibility, 206 207 // Error writing EBDA address 208 EbdaSetup(vm_memory::GuestMemoryError), 209 210 // Error getting CPU TSC frequency 211 GetTscFrequency(HypervisorCpuError), 212 213 /// Error retrieving TDX capabilities through the hypervisor (kvm/mshv) API 214 #[cfg(feature = "tdx")] 215 TdxCapabilities(HypervisorError), 216 } 217 218 impl From<Error> for super::Error { 219 fn from(e: Error) -> super::Error { 220 super::Error::PlatformSpecific(e) 221 } 222 } 223 224 #[derive(Copy, Clone, Debug)] 225 pub enum CpuidReg { 226 EAX, 227 EBX, 228 ECX, 229 EDX, 230 } 231 232 pub struct CpuidPatch { 233 pub function: u32, 234 pub index: u32, 235 pub flags_bit: Option<u8>, 236 pub eax_bit: Option<u8>, 237 pub ebx_bit: Option<u8>, 238 pub ecx_bit: Option<u8>, 239 pub edx_bit: Option<u8>, 240 } 241 242 impl CpuidPatch { 243 pub fn set_cpuid_reg( 244 cpuid: &mut Vec<CpuIdEntry>, 245 function: u32, 246 index: Option<u32>, 247 reg: CpuidReg, 248 value: u32, 249 ) { 250 let mut entry_found = false; 251 for entry in cpuid.iter_mut() { 252 if entry.function == function && (index.is_none() || index.unwrap() == entry.index) { 253 entry_found = true; 254 match reg { 255 CpuidReg::EAX => { 256 entry.eax = value; 257 } 258 CpuidReg::EBX => { 259 entry.ebx = value; 260 } 261 CpuidReg::ECX => { 262 entry.ecx = value; 263 } 264 CpuidReg::EDX => { 265 entry.edx = value; 266 } 267 } 268 } 269 } 270 271 if entry_found { 272 return; 273 } 274 275 // Entry not found, so let's add it. 276 if let Some(index) = index { 277 let mut entry = CpuIdEntry { 278 function, 279 index, 280 flags: CPUID_FLAG_VALID_INDEX, 281 ..Default::default() 282 }; 283 match reg { 284 CpuidReg::EAX => { 285 entry.eax = value; 286 } 287 CpuidReg::EBX => { 288 entry.ebx = value; 289 } 290 CpuidReg::ECX => { 291 entry.ecx = value; 292 } 293 CpuidReg::EDX => { 294 entry.edx = value; 295 } 296 } 297 298 cpuid.push(entry); 299 } 300 } 301 302 pub fn patch_cpuid(cpuid: &mut [CpuIdEntry], patches: Vec<CpuidPatch>) { 303 for entry in cpuid { 304 for patch in patches.iter() { 305 if entry.function == patch.function && entry.index == patch.index { 306 if let Some(flags_bit) = patch.flags_bit { 307 entry.flags |= 1 << flags_bit; 308 } 309 if let Some(eax_bit) = patch.eax_bit { 310 entry.eax |= 1 << eax_bit; 311 } 312 if let Some(ebx_bit) = patch.ebx_bit { 313 entry.ebx |= 1 << ebx_bit; 314 } 315 if let Some(ecx_bit) = patch.ecx_bit { 316 entry.ecx |= 1 << ecx_bit; 317 } 318 if let Some(edx_bit) = patch.edx_bit { 319 entry.edx |= 1 << edx_bit; 320 } 321 } 322 } 323 } 324 } 325 326 pub fn is_feature_enabled( 327 cpuid: &[CpuIdEntry], 328 function: u32, 329 index: u32, 330 reg: CpuidReg, 331 feature_bit: usize, 332 ) -> bool { 333 let mask = 1 << feature_bit; 334 335 for entry in cpuid { 336 if entry.function == function && entry.index == index { 337 let reg_val = match reg { 338 CpuidReg::EAX => entry.eax, 339 CpuidReg::EBX => entry.ebx, 340 CpuidReg::ECX => entry.ecx, 341 CpuidReg::EDX => entry.edx, 342 }; 343 344 return (reg_val & mask) == mask; 345 } 346 } 347 348 false 349 } 350 } 351 352 #[derive(Debug)] 353 enum CpuidCompatibleCheck { 354 BitwiseSubset, // bitwise subset 355 Equal, // equal in value 356 NumNotGreater, // smaller or equal as a number 357 } 358 359 pub struct CpuidFeatureEntry { 360 function: u32, 361 index: u32, 362 feature_reg: CpuidReg, 363 compatible_check: CpuidCompatibleCheck, 364 } 365 366 impl CpuidFeatureEntry { 367 fn checked_feature_entry_list() -> Vec<CpuidFeatureEntry> { 368 vec![ 369 // The following list includes all hardware features bits from 370 // the CPUID Wiki Page: https://en.wikipedia.org/wiki/CPUID 371 // Leaf 0x1, ECX/EDX, feature bits 372 CpuidFeatureEntry { 373 function: 1, 374 index: 0, 375 feature_reg: CpuidReg::ECX, 376 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 377 }, 378 CpuidFeatureEntry { 379 function: 1, 380 index: 0, 381 feature_reg: CpuidReg::EDX, 382 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 383 }, 384 // Leaf 0x7, EAX/EBX/ECX/EDX, extended features 385 CpuidFeatureEntry { 386 function: 7, 387 index: 0, 388 feature_reg: CpuidReg::EAX, 389 compatible_check: CpuidCompatibleCheck::NumNotGreater, 390 }, 391 CpuidFeatureEntry { 392 function: 7, 393 index: 0, 394 feature_reg: CpuidReg::EBX, 395 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 396 }, 397 CpuidFeatureEntry { 398 function: 7, 399 index: 0, 400 feature_reg: CpuidReg::ECX, 401 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 402 }, 403 CpuidFeatureEntry { 404 function: 7, 405 index: 0, 406 feature_reg: CpuidReg::EDX, 407 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 408 }, 409 // Leaf 0x7 subleaf 0x1, EAX, extended features 410 CpuidFeatureEntry { 411 function: 7, 412 index: 1, 413 feature_reg: CpuidReg::EAX, 414 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 415 }, 416 // Leaf 0x8000_0001, ECX/EDX, CPUID features bits 417 CpuidFeatureEntry { 418 function: 0x8000_0001, 419 index: 0, 420 feature_reg: CpuidReg::ECX, 421 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 422 }, 423 CpuidFeatureEntry { 424 function: 0x8000_0001, 425 index: 0, 426 feature_reg: CpuidReg::EDX, 427 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 428 }, 429 // KVM CPUID bits: https://www.kernel.org/doc/html/latest/virt/kvm/cpuid.html 430 // Leaf 0x4000_0000, EAX/EBX/ECX/EDX, KVM CPUID SIGNATURE 431 CpuidFeatureEntry { 432 function: 0x4000_0000, 433 index: 0, 434 feature_reg: CpuidReg::EAX, 435 compatible_check: CpuidCompatibleCheck::NumNotGreater, 436 }, 437 CpuidFeatureEntry { 438 function: 0x4000_0000, 439 index: 0, 440 feature_reg: CpuidReg::EBX, 441 compatible_check: CpuidCompatibleCheck::Equal, 442 }, 443 CpuidFeatureEntry { 444 function: 0x4000_0000, 445 index: 0, 446 feature_reg: CpuidReg::ECX, 447 compatible_check: CpuidCompatibleCheck::Equal, 448 }, 449 CpuidFeatureEntry { 450 function: 0x4000_0000, 451 index: 0, 452 feature_reg: CpuidReg::EDX, 453 compatible_check: CpuidCompatibleCheck::Equal, 454 }, 455 // Leaf 0x4000_0001, EAX/EBX/ECX/EDX, KVM CPUID features 456 CpuidFeatureEntry { 457 function: 0x4000_0001, 458 index: 0, 459 feature_reg: CpuidReg::EAX, 460 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 461 }, 462 CpuidFeatureEntry { 463 function: 0x4000_0001, 464 index: 0, 465 feature_reg: CpuidReg::EBX, 466 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 467 }, 468 CpuidFeatureEntry { 469 function: 0x4000_0001, 470 index: 0, 471 feature_reg: CpuidReg::ECX, 472 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 473 }, 474 CpuidFeatureEntry { 475 function: 0x4000_0001, 476 index: 0, 477 feature_reg: CpuidReg::EDX, 478 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 479 }, 480 ] 481 } 482 483 fn get_features_from_cpuid( 484 cpuid: &[CpuIdEntry], 485 feature_entry_list: &[CpuidFeatureEntry], 486 ) -> Vec<u32> { 487 let mut features = vec![0; feature_entry_list.len()]; 488 for (i, feature_entry) in feature_entry_list.iter().enumerate() { 489 for cpuid_entry in cpuid { 490 if cpuid_entry.function == feature_entry.function 491 && cpuid_entry.index == feature_entry.index 492 { 493 match feature_entry.feature_reg { 494 CpuidReg::EAX => { 495 features[i] = cpuid_entry.eax; 496 } 497 CpuidReg::EBX => { 498 features[i] = cpuid_entry.ebx; 499 } 500 CpuidReg::ECX => { 501 features[i] = cpuid_entry.ecx; 502 } 503 CpuidReg::EDX => { 504 features[i] = cpuid_entry.edx; 505 } 506 } 507 508 break; 509 } 510 } 511 } 512 513 features 514 } 515 516 // The function returns `Error` (a.k.a. "incompatible"), when the CPUID features from `src_vm_cpuid` 517 // is not a subset of those of the `dest_vm_cpuid`. 518 pub fn check_cpuid_compatibility( 519 src_vm_cpuid: &[CpuIdEntry], 520 dest_vm_cpuid: &[CpuIdEntry], 521 ) -> Result<(), Error> { 522 let feature_entry_list = &Self::checked_feature_entry_list(); 523 let src_vm_features = Self::get_features_from_cpuid(src_vm_cpuid, feature_entry_list); 524 let dest_vm_features = Self::get_features_from_cpuid(dest_vm_cpuid, feature_entry_list); 525 526 // Loop on feature bit and check if the 'source vm' feature is a subset 527 // of those of the 'destination vm' feature 528 let mut compatible = true; 529 for (i, (src_vm_feature, dest_vm_feature)) in src_vm_features 530 .iter() 531 .zip(dest_vm_features.iter()) 532 .enumerate() 533 { 534 let entry = &feature_entry_list[i]; 535 let entry_compatible = match entry.compatible_check { 536 CpuidCompatibleCheck::BitwiseSubset => { 537 let different_feature_bits = src_vm_feature ^ dest_vm_feature; 538 let src_vm_feature_bits_only = different_feature_bits & src_vm_feature; 539 src_vm_feature_bits_only == 0 540 } 541 CpuidCompatibleCheck::Equal => src_vm_feature == dest_vm_feature, 542 CpuidCompatibleCheck::NumNotGreater => src_vm_feature <= dest_vm_feature, 543 }; 544 if !entry_compatible { 545 error!( 546 "Detected incompatible CPUID entry: leaf={:#02x} (subleaf={:#02x}), register='{:?}', \ 547 compatilbe_check='{:?}', source VM feature='{:#04x}', destination VM feature'{:#04x}'.", 548 entry.function, entry.index, entry.feature_reg, 549 entry.compatible_check, src_vm_feature, dest_vm_feature 550 ); 551 552 compatible = false; 553 } 554 } 555 556 if compatible { 557 info!("No CPU incompatibility detected."); 558 Ok(()) 559 } else { 560 Err(Error::CpuidCheckCompatibility) 561 } 562 } 563 } 564 565 pub fn generate_common_cpuid( 566 hypervisor: &Arc<dyn hypervisor::Hypervisor>, 567 config: &CpuidConfig, 568 ) -> super::Result<Vec<CpuIdEntry>> { 569 // SAFETY: cpuid called with valid leaves 570 if unsafe { x86_64::__cpuid(1) }.ecx & 1 << HYPERVISOR_ECX_BIT == 1 << HYPERVISOR_ECX_BIT { 571 // SAFETY: cpuid called with valid leaves 572 let hypervisor_cpuid = unsafe { x86_64::__cpuid(0x4000_0000) }; 573 574 let mut identifier: [u8; 12] = [0; 12]; 575 identifier[0..4].copy_from_slice(&hypervisor_cpuid.ebx.to_le_bytes()[..]); 576 identifier[4..8].copy_from_slice(&hypervisor_cpuid.ecx.to_le_bytes()[..]); 577 identifier[8..12].copy_from_slice(&hypervisor_cpuid.edx.to_le_bytes()[..]); 578 579 info!( 580 "Running under nested virtualisation. Hypervisor string: {}", 581 String::from_utf8_lossy(&identifier) 582 ); 583 } 584 585 info!( 586 "Generating guest CPUID for with physical address size: {}", 587 config.phys_bits 588 ); 589 let cpuid_patches = vec![ 590 // Patch tsc deadline timer bit 591 CpuidPatch { 592 function: 1, 593 index: 0, 594 flags_bit: None, 595 eax_bit: None, 596 ebx_bit: None, 597 ecx_bit: Some(TSC_DEADLINE_TIMER_ECX_BIT), 598 edx_bit: None, 599 }, 600 // Patch hypervisor bit 601 CpuidPatch { 602 function: 1, 603 index: 0, 604 flags_bit: None, 605 eax_bit: None, 606 ebx_bit: None, 607 ecx_bit: Some(HYPERVISOR_ECX_BIT), 608 edx_bit: None, 609 }, 610 // Enable MTRR feature 611 CpuidPatch { 612 function: 1, 613 index: 0, 614 flags_bit: None, 615 eax_bit: None, 616 ebx_bit: None, 617 ecx_bit: None, 618 edx_bit: Some(MTRR_EDX_BIT), 619 }, 620 ]; 621 622 // Supported CPUID 623 let mut cpuid = hypervisor 624 .get_supported_cpuid() 625 .map_err(Error::CpuidGetSupported)?; 626 627 CpuidPatch::patch_cpuid(&mut cpuid, cpuid_patches); 628 629 if let Some(sgx_epc_sections) = &config.sgx_epc_sections { 630 update_cpuid_sgx(&mut cpuid, sgx_epc_sections)?; 631 } 632 633 #[cfg(feature = "tdx")] 634 let tdx_capabilities = if config.tdx { 635 let caps = hypervisor 636 .tdx_capabilities() 637 .map_err(Error::TdxCapabilities)?; 638 info!("TDX capabilities {:#?}", caps); 639 Some(caps) 640 } else { 641 None 642 }; 643 644 // Update some existing CPUID 645 for entry in cpuid.as_mut_slice().iter_mut() { 646 match entry.function { 647 // Clear AMX related bits if the AMX feature is not enabled 648 0x7 => { 649 if !config.amx && entry.index == 0 { 650 entry.edx &= !(1 << AMX_BF16 | 1 << AMX_TILE | 1 << AMX_INT8) 651 } 652 } 653 0xd => 654 { 655 #[cfg(feature = "tdx")] 656 if let Some(caps) = &tdx_capabilities { 657 let xcr0_mask: u64 = 0x82ff; 658 let xss_mask: u64 = !xcr0_mask; 659 if entry.index == 0 { 660 entry.eax &= (caps.xfam_fixed0 as u32) & (xcr0_mask as u32); 661 entry.eax |= (caps.xfam_fixed1 as u32) & (xcr0_mask as u32); 662 entry.edx &= ((caps.xfam_fixed0 & xcr0_mask) >> 32) as u32; 663 entry.edx |= ((caps.xfam_fixed1 & xcr0_mask) >> 32) as u32; 664 } else if entry.index == 1 { 665 entry.ecx &= (caps.xfam_fixed0 as u32) & (xss_mask as u32); 666 entry.ecx |= (caps.xfam_fixed1 as u32) & (xss_mask as u32); 667 entry.edx &= ((caps.xfam_fixed0 & xss_mask) >> 32) as u32; 668 entry.edx |= ((caps.xfam_fixed1 & xss_mask) >> 32) as u32; 669 } 670 } 671 } 672 // Copy host L2 cache details if not populated by KVM 673 0x8000_0006 => { 674 if entry.eax == 0 && entry.ebx == 0 && entry.ecx == 0 && entry.edx == 0 { 675 // SAFETY: cpuid called with valid leaves 676 if unsafe { std::arch::x86_64::__cpuid(0x8000_0000).eax } >= 0x8000_0006 { 677 // SAFETY: cpuid called with valid leaves 678 let leaf = unsafe { std::arch::x86_64::__cpuid(0x8000_0006) }; 679 entry.eax = leaf.eax; 680 entry.ebx = leaf.ebx; 681 entry.ecx = leaf.ecx; 682 entry.edx = leaf.edx; 683 } 684 } 685 } 686 // Set CPU physical bits 687 0x8000_0008 => { 688 entry.eax = (entry.eax & 0xffff_ff00) | (config.phys_bits as u32 & 0xff); 689 } 690 0x4000_0001 => { 691 // These features are not supported by TDX 692 #[cfg(feature = "tdx")] 693 if config.tdx { 694 entry.eax &= !(1 << KVM_FEATURE_CLOCKSOURCE_BIT 695 | 1 << KVM_FEATURE_CLOCKSOURCE2_BIT 696 | 1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT 697 | 1 << KVM_FEATURE_ASYNC_PF_BIT 698 | 1 << KVM_FEATURE_ASYNC_PF_VMEXIT_BIT 699 | 1 << KVM_FEATURE_STEAL_TIME_BIT) 700 } 701 } 702 _ => {} 703 } 704 } 705 706 // Copy CPU identification string 707 for i in 0x8000_0002..=0x8000_0004 { 708 cpuid.retain(|c| c.function != i); 709 // SAFETY: call cpuid with valid leaves 710 let leaf = unsafe { std::arch::x86_64::__cpuid(i) }; 711 cpuid.push(CpuIdEntry { 712 function: i, 713 eax: leaf.eax, 714 ebx: leaf.ebx, 715 ecx: leaf.ecx, 716 edx: leaf.edx, 717 ..Default::default() 718 }); 719 } 720 721 if config.kvm_hyperv { 722 // Remove conflicting entries 723 cpuid.retain(|c| c.function != 0x4000_0000); 724 cpuid.retain(|c| c.function != 0x4000_0001); 725 // See "Hypervisor Top Level Functional Specification" for details 726 // Compliance with "Hv#1" requires leaves up to 0x4000_000a 727 cpuid.push(CpuIdEntry { 728 function: 0x40000000, 729 eax: 0x4000000a, // Maximum cpuid leaf 730 ebx: 0x756e694c, // "Linu" 731 ecx: 0x564b2078, // "x KV" 732 edx: 0x7648204d, // "M Hv" 733 ..Default::default() 734 }); 735 cpuid.push(CpuIdEntry { 736 function: 0x40000001, 737 eax: 0x31237648, // "Hv#1" 738 ..Default::default() 739 }); 740 cpuid.push(CpuIdEntry { 741 function: 0x40000002, 742 eax: 0x3839, // "Build number" 743 ebx: 0xa0000, // "Version" 744 ..Default::default() 745 }); 746 cpuid.push(CpuIdEntry { 747 function: 0x4000_0003, 748 eax: 1 << 1 // AccessPartitionReferenceCounter 749 | 1 << 2 // AccessSynicRegs 750 | 1 << 3 // AccessSyntheticTimerRegs 751 | 1 << 9, // AccessPartitionReferenceTsc 752 edx: 1 << 3, // CPU dynamic partitioning 753 ..Default::default() 754 }); 755 cpuid.push(CpuIdEntry { 756 function: 0x4000_0004, 757 eax: 1 << 5, // Recommend relaxed timing 758 ..Default::default() 759 }); 760 for i in 0x4000_0005..=0x4000_000a { 761 cpuid.push(CpuIdEntry { 762 function: i, 763 ..Default::default() 764 }); 765 } 766 } 767 768 Ok(cpuid) 769 } 770 771 pub fn configure_vcpu( 772 vcpu: &Arc<dyn hypervisor::Vcpu>, 773 id: u8, 774 boot_setup: Option<(EntryPoint, &GuestMemoryAtomic<GuestMemoryMmap>)>, 775 cpuid: Vec<CpuIdEntry>, 776 kvm_hyperv: bool, 777 cpu_vendor: CpuVendor, 778 topology: Option<(u8, u8, u8)>, 779 ) -> super::Result<()> { 780 // Per vCPU CPUID changes; common are handled via generate_common_cpuid() 781 let mut cpuid = cpuid; 782 CpuidPatch::set_cpuid_reg(&mut cpuid, 0xb, None, CpuidReg::EDX, u32::from(id)); 783 CpuidPatch::set_cpuid_reg(&mut cpuid, 0x1f, None, CpuidReg::EDX, u32::from(id)); 784 if matches!(cpu_vendor, CpuVendor::AMD) { 785 CpuidPatch::set_cpuid_reg( 786 &mut cpuid, 787 0x8000_001e, 788 Some(0), 789 CpuidReg::EAX, 790 u32::from(id), 791 ); 792 } 793 794 if let Some(t) = topology { 795 update_cpuid_topology(&mut cpuid, t.0, t.1, t.2, cpu_vendor, id); 796 } 797 798 // Set ApicId in cpuid for each vcpu 799 // SAFETY: get host cpuid when eax=1 800 let mut cpu_ebx = unsafe { core::arch::x86_64::__cpuid(1) }.ebx; 801 cpu_ebx &= 0xffffff; 802 cpu_ebx |= (id as u32) << 24; 803 CpuidPatch::set_cpuid_reg(&mut cpuid, 0x1, None, CpuidReg::EBX, cpu_ebx); 804 805 // The TSC frequency CPUID leaf should not be included when running with HyperV emulation 806 if !kvm_hyperv { 807 if let Some(tsc_khz) = vcpu.tsc_khz().map_err(Error::GetTscFrequency)? { 808 // Need to check that the TSC doesn't vary with dynamic frequency 809 // SAFETY: cpuid called with valid leaves 810 if unsafe { std::arch::x86_64::__cpuid(0x8000_0007) }.edx 811 & (1u32 << INVARIANT_TSC_EDX_BIT) 812 > 0 813 { 814 CpuidPatch::set_cpuid_reg( 815 &mut cpuid, 816 0x4000_0000, 817 None, 818 CpuidReg::EAX, 819 0x4000_0010, 820 ); 821 cpuid.retain(|c| c.function != 0x4000_0010); 822 cpuid.push(CpuIdEntry { 823 function: 0x4000_0010, 824 eax: tsc_khz, 825 ebx: 1000000, /* LAPIC resolution of 1ns (freq: 1GHz) is hardcoded in KVM's 826 * APIC_BUS_CYCLE_NS */ 827 ..Default::default() 828 }); 829 }; 830 } 831 } 832 833 vcpu.set_cpuid2(&cpuid) 834 .map_err(|e| Error::SetSupportedCpusFailed(e.into()))?; 835 836 if kvm_hyperv { 837 vcpu.enable_hyperv_synic().unwrap(); 838 } 839 840 regs::setup_msrs(vcpu).map_err(Error::MsrsConfiguration)?; 841 if let Some((kernel_entry_point, guest_memory)) = boot_setup { 842 regs::setup_regs(vcpu, kernel_entry_point.entry_addr.raw_value()) 843 .map_err(Error::RegsConfiguration)?; 844 regs::setup_fpu(vcpu).map_err(Error::FpuConfiguration)?; 845 regs::setup_sregs(&guest_memory.memory(), vcpu).map_err(Error::SregsConfiguration)?; 846 } 847 interrupts::set_lint(vcpu).map_err(|e| Error::LocalIntConfiguration(e.into()))?; 848 Ok(()) 849 } 850 851 /// Returns a Vec of the valid memory addresses. 852 /// These should be used to configure the GuestMemory structure for the platform. 853 /// For x86_64 all addresses are valid from the start of the kernel except a 854 /// carve out at the end of 32bit address space. 855 pub fn arch_memory_regions() -> Vec<(GuestAddress, usize, RegionType)> { 856 vec![ 857 // 0 GiB ~ 3GiB: memory before the gap 858 ( 859 GuestAddress(0), 860 layout::MEM_32BIT_RESERVED_START.raw_value() as usize, 861 RegionType::Ram, 862 ), 863 // 4 GiB ~ inf: memory after the gap 864 (layout::RAM_64BIT_START, usize::MAX, RegionType::Ram), 865 // 3 GiB ~ 3712 MiB: 32-bit device memory hole 866 ( 867 layout::MEM_32BIT_RESERVED_START, 868 layout::MEM_32BIT_DEVICES_SIZE as usize, 869 RegionType::SubRegion, 870 ), 871 // 3712 MiB ~ 3968 MiB: 32-bit reserved memory hole 872 ( 873 layout::MEM_32BIT_RESERVED_START.unchecked_add(layout::MEM_32BIT_DEVICES_SIZE), 874 (layout::MEM_32BIT_RESERVED_SIZE - layout::MEM_32BIT_DEVICES_SIZE) as usize, 875 RegionType::Reserved, 876 ), 877 ] 878 } 879 880 /// Configures the system and should be called once per vm before starting vcpu threads. 881 /// 882 /// # Arguments 883 /// 884 /// * `guest_mem` - The memory to be used by the guest. 885 /// * `cmdline_addr` - Address in `guest_mem` where the kernel command line was loaded. 886 /// * `cmdline_size` - Size of the kernel command line in bytes including the null terminator. 887 /// * `num_cpus` - Number of virtual CPUs the guest will have. 888 #[allow(clippy::too_many_arguments)] 889 pub fn configure_system( 890 guest_mem: &GuestMemoryMmap, 891 cmdline_addr: GuestAddress, 892 initramfs: &Option<InitramfsConfig>, 893 _num_cpus: u8, 894 rsdp_addr: Option<GuestAddress>, 895 sgx_epc_region: Option<SgxEpcRegion>, 896 serial_number: Option<&str>, 897 uuid: Option<&str>, 898 oem_strings: Option<&[&str]>, 899 ) -> super::Result<()> { 900 // Write EBDA address to location where ACPICA expects to find it 901 guest_mem 902 .write_obj((layout::EBDA_START.0 >> 4) as u16, layout::EBDA_POINTER) 903 .map_err(Error::EbdaSetup)?; 904 905 let size = smbios::setup_smbios(guest_mem, serial_number, uuid, oem_strings) 906 .map_err(Error::SmbiosSetup)?; 907 908 // Place the MP table after the SMIOS table aligned to 16 bytes 909 let offset = GuestAddress(layout::SMBIOS_START).unchecked_add(size); 910 let offset = GuestAddress((offset.0 + 16) & !0xf); 911 mptable::setup_mptable(offset, guest_mem, _num_cpus).map_err(Error::MpTableSetup)?; 912 913 // Check that the RAM is not smaller than the RSDP start address 914 if let Some(rsdp_addr) = rsdp_addr { 915 if rsdp_addr.0 > guest_mem.last_addr().0 { 916 return Err(super::Error::RsdpPastRamEnd); 917 } 918 } 919 920 configure_pvh( 921 guest_mem, 922 cmdline_addr, 923 initramfs, 924 rsdp_addr, 925 sgx_epc_region, 926 ) 927 } 928 929 fn configure_pvh( 930 guest_mem: &GuestMemoryMmap, 931 cmdline_addr: GuestAddress, 932 initramfs: &Option<InitramfsConfig>, 933 rsdp_addr: Option<GuestAddress>, 934 sgx_epc_region: Option<SgxEpcRegion>, 935 ) -> super::Result<()> { 936 const XEN_HVM_START_MAGIC_VALUE: u32 = 0x336ec578; 937 938 let mut start_info: StartInfoWrapper = StartInfoWrapper(hvm_start_info::default()); 939 940 start_info.0.magic = XEN_HVM_START_MAGIC_VALUE; 941 start_info.0.version = 1; // pvh has version 1 942 start_info.0.nr_modules = 0; 943 start_info.0.cmdline_paddr = cmdline_addr.raw_value(); 944 start_info.0.memmap_paddr = layout::MEMMAP_START.raw_value(); 945 946 if let Some(rsdp_addr) = rsdp_addr { 947 start_info.0.rsdp_paddr = rsdp_addr.0; 948 } 949 950 if let Some(initramfs_config) = initramfs { 951 // The initramfs has been written to guest memory already, here we just need to 952 // create the module structure that describes it. 953 let ramdisk_mod: ModlistEntryWrapper = ModlistEntryWrapper(hvm_modlist_entry { 954 paddr: initramfs_config.address.raw_value(), 955 size: initramfs_config.size as u64, 956 ..Default::default() 957 }); 958 959 start_info.0.nr_modules += 1; 960 start_info.0.modlist_paddr = layout::MODLIST_START.raw_value(); 961 962 // Write the modlist struct to guest memory. 963 guest_mem 964 .write_obj(ramdisk_mod, layout::MODLIST_START) 965 .map_err(super::Error::ModlistSetup)?; 966 } 967 968 // Vector to hold the memory maps which needs to be written to guest memory 969 // at MEMMAP_START after all of the mappings are recorded. 970 let mut memmap: Vec<hvm_memmap_table_entry> = Vec::new(); 971 972 // Create the memory map entries. 973 add_memmap_entry(&mut memmap, 0, layout::EBDA_START.raw_value(), E820_RAM); 974 975 // Merge continuous memory regions into one region. 976 // Note: memory regions from "GuestMemory" are sorted and non-zero sized. 977 let ram_regions = { 978 let mut ram_regions = Vec::new(); 979 let mut current_start = guest_mem 980 .iter() 981 .next() 982 .map(GuestMemoryRegion::start_addr) 983 .expect("GuestMemory must have one memory region at least") 984 .raw_value(); 985 let mut current_end = current_start; 986 987 for (start, size) in guest_mem 988 .iter() 989 .map(|m| (m.start_addr().raw_value(), m.len())) 990 { 991 if current_end == start { 992 // This zone is continuous with the previous one. 993 current_end += size; 994 } else { 995 ram_regions.push((current_start, current_end)); 996 997 current_start = start; 998 current_end = start + size; 999 } 1000 } 1001 1002 ram_regions.push((current_start, current_end)); 1003 1004 ram_regions 1005 }; 1006 1007 if ram_regions.len() > 2 { 1008 error!( 1009 "There should be up to two non-continuous regions, devidided by the 1010 gap at the end of 32bit address space (e.g. between 3G and 4G)." 1011 ); 1012 return Err(super::Error::MemmapTableSetup); 1013 } 1014 1015 // Create the memory map entry for memory region before the gap 1016 { 1017 let (first_region_start, first_region_end) = 1018 ram_regions.first().ok_or(super::Error::MemmapTableSetup)?; 1019 let high_ram_start = layout::HIGH_RAM_START.raw_value(); 1020 let mem_32bit_reserved_start = layout::MEM_32BIT_RESERVED_START.raw_value(); 1021 1022 if !((first_region_start <= &high_ram_start) 1023 && (first_region_end > &high_ram_start) 1024 && (first_region_end <= &mem_32bit_reserved_start)) 1025 { 1026 error!( 1027 "Unexpected first memory region layout: (start: 0x{:08x}, end: 0x{:08x}). 1028 high_ram_start: 0x{:08x}, mem_32bit_reserved_start: 0x{:08x}", 1029 first_region_start, first_region_end, high_ram_start, mem_32bit_reserved_start 1030 ); 1031 1032 return Err(super::Error::MemmapTableSetup); 1033 } 1034 1035 info!( 1036 "create_memmap_entry, start: 0x{:08x}, end: 0x{:08x}", 1037 high_ram_start, first_region_end 1038 ); 1039 1040 add_memmap_entry( 1041 &mut memmap, 1042 high_ram_start, 1043 first_region_end - high_ram_start, 1044 E820_RAM, 1045 ); 1046 } 1047 1048 // Create the memory map entry for memory region after the gap if any 1049 if let Some((second_region_start, second_region_end)) = ram_regions.get(1) { 1050 let ram_64bit_start = layout::RAM_64BIT_START.raw_value(); 1051 1052 if second_region_start != &ram_64bit_start { 1053 error!( 1054 "Unexpected second memory region layout: start: 0x{:08x}, ram_64bit_start: 0x{:08x}", 1055 second_region_start, ram_64bit_start 1056 ); 1057 1058 return Err(super::Error::MemmapTableSetup); 1059 } 1060 1061 info!( 1062 "create_memmap_entry, start: 0x{:08x}, end: 0x{:08x}", 1063 ram_64bit_start, second_region_end 1064 ); 1065 add_memmap_entry( 1066 &mut memmap, 1067 ram_64bit_start, 1068 second_region_end - ram_64bit_start, 1069 E820_RAM, 1070 ); 1071 } 1072 1073 add_memmap_entry( 1074 &mut memmap, 1075 layout::PCI_MMCONFIG_START.0, 1076 layout::PCI_MMCONFIG_SIZE, 1077 E820_RESERVED, 1078 ); 1079 1080 if let Some(sgx_epc_region) = sgx_epc_region { 1081 add_memmap_entry( 1082 &mut memmap, 1083 sgx_epc_region.start().raw_value(), 1084 sgx_epc_region.size(), 1085 E820_RESERVED, 1086 ); 1087 } 1088 1089 start_info.0.memmap_entries = memmap.len() as u32; 1090 1091 // Copy the vector with the memmap table to the MEMMAP_START address 1092 // which is already saved in the memmap_paddr field of hvm_start_info struct. 1093 let mut memmap_start_addr = layout::MEMMAP_START; 1094 1095 guest_mem 1096 .checked_offset( 1097 memmap_start_addr, 1098 mem::size_of::<hvm_memmap_table_entry>() * start_info.0.memmap_entries as usize, 1099 ) 1100 .ok_or(super::Error::MemmapTablePastRamEnd)?; 1101 1102 // For every entry in the memmap vector, create a MemmapTableEntryWrapper 1103 // and write it to guest memory. 1104 for memmap_entry in memmap { 1105 let map_entry_wrapper: MemmapTableEntryWrapper = MemmapTableEntryWrapper(memmap_entry); 1106 1107 guest_mem 1108 .write_obj(map_entry_wrapper, memmap_start_addr) 1109 .map_err(|_| super::Error::MemmapTableSetup)?; 1110 memmap_start_addr = 1111 memmap_start_addr.unchecked_add(mem::size_of::<hvm_memmap_table_entry>() as u64); 1112 } 1113 1114 // The hvm_start_info struct itself must be stored at PVH_START_INFO 1115 // address, and %rbx will be initialized to contain PVH_INFO_START prior to 1116 // starting the guest, as required by the PVH ABI. 1117 let start_info_addr = layout::PVH_INFO_START; 1118 1119 guest_mem 1120 .checked_offset(start_info_addr, mem::size_of::<hvm_start_info>()) 1121 .ok_or(super::Error::StartInfoPastRamEnd)?; 1122 1123 // Write the start_info struct to guest memory. 1124 guest_mem 1125 .write_obj(start_info, start_info_addr) 1126 .map_err(|_| super::Error::StartInfoSetup)?; 1127 1128 Ok(()) 1129 } 1130 1131 fn add_memmap_entry(memmap: &mut Vec<hvm_memmap_table_entry>, addr: u64, size: u64, mem_type: u32) { 1132 // Add the table entry to the vector 1133 memmap.push(hvm_memmap_table_entry { 1134 addr, 1135 size, 1136 type_: mem_type, 1137 reserved: 0, 1138 }); 1139 } 1140 1141 /// Returns the memory address where the initramfs could be loaded. 1142 pub fn initramfs_load_addr( 1143 guest_mem: &GuestMemoryMmap, 1144 initramfs_size: usize, 1145 ) -> super::Result<u64> { 1146 let first_region = guest_mem 1147 .find_region(GuestAddress::new(0)) 1148 .ok_or(super::Error::InitramfsAddress)?; 1149 // It's safe to cast to usize because the size of a region can't be greater than usize. 1150 let lowmem_size = first_region.len() as usize; 1151 1152 if lowmem_size < initramfs_size { 1153 return Err(super::Error::InitramfsAddress); 1154 } 1155 1156 let aligned_addr: u64 = ((lowmem_size - initramfs_size) & !(crate::pagesize() - 1)) as u64; 1157 Ok(aligned_addr) 1158 } 1159 1160 pub fn get_host_cpu_phys_bits(hypervisor: &Arc<dyn hypervisor::Hypervisor>) -> u8 { 1161 // SAFETY: call cpuid with valid leaves 1162 unsafe { 1163 let leaf = x86_64::__cpuid(0x8000_0000); 1164 1165 // Detect and handle AMD SME (Secure Memory Encryption) properly. 1166 // Some physical address bits may become reserved when the feature is enabled. 1167 // See AMD64 Architecture Programmer's Manual Volume 2, Section 7.10.1 1168 let reduced = if leaf.eax >= 0x8000_001f 1169 && matches!(hypervisor.get_cpu_vendor(), CpuVendor::AMD) 1170 && x86_64::__cpuid(0x8000_001f).eax & 0x1 != 0 1171 { 1172 (x86_64::__cpuid(0x8000_001f).ebx >> 6) & 0x3f 1173 } else { 1174 0 1175 }; 1176 1177 if leaf.eax >= 0x8000_0008 { 1178 let leaf = x86_64::__cpuid(0x8000_0008); 1179 ((leaf.eax & 0xff) - reduced) as u8 1180 } else { 1181 36 1182 } 1183 } 1184 } 1185 1186 fn update_cpuid_topology( 1187 cpuid: &mut Vec<CpuIdEntry>, 1188 threads_per_core: u8, 1189 cores_per_die: u8, 1190 dies_per_package: u8, 1191 cpu_vendor: CpuVendor, 1192 id: u8, 1193 ) { 1194 let thread_width = 8 - (threads_per_core - 1).leading_zeros(); 1195 let core_width = (8 - (cores_per_die - 1).leading_zeros()) + thread_width; 1196 let die_width = (8 - (dies_per_package - 1).leading_zeros()) + core_width; 1197 1198 // CPU Topology leaf 0xb 1199 CpuidPatch::set_cpuid_reg(cpuid, 0xb, Some(0), CpuidReg::EAX, thread_width); 1200 CpuidPatch::set_cpuid_reg( 1201 cpuid, 1202 0xb, 1203 Some(0), 1204 CpuidReg::EBX, 1205 u32::from(threads_per_core), 1206 ); 1207 CpuidPatch::set_cpuid_reg(cpuid, 0xb, Some(0), CpuidReg::ECX, 1 << 8); 1208 1209 CpuidPatch::set_cpuid_reg(cpuid, 0xb, Some(1), CpuidReg::EAX, die_width); 1210 CpuidPatch::set_cpuid_reg( 1211 cpuid, 1212 0xb, 1213 Some(1), 1214 CpuidReg::EBX, 1215 u32::from(dies_per_package * cores_per_die * threads_per_core), 1216 ); 1217 CpuidPatch::set_cpuid_reg(cpuid, 0xb, Some(1), CpuidReg::ECX, 2 << 8); 1218 1219 // CPU Topology leaf 0x1f 1220 CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(0), CpuidReg::EAX, thread_width); 1221 CpuidPatch::set_cpuid_reg( 1222 cpuid, 1223 0x1f, 1224 Some(0), 1225 CpuidReg::EBX, 1226 u32::from(threads_per_core), 1227 ); 1228 CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(0), CpuidReg::ECX, 1 << 8); 1229 1230 CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(1), CpuidReg::EAX, core_width); 1231 CpuidPatch::set_cpuid_reg( 1232 cpuid, 1233 0x1f, 1234 Some(1), 1235 CpuidReg::EBX, 1236 u32::from(cores_per_die * threads_per_core), 1237 ); 1238 CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(1), CpuidReg::ECX, 2 << 8); 1239 1240 CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(2), CpuidReg::EAX, die_width); 1241 CpuidPatch::set_cpuid_reg( 1242 cpuid, 1243 0x1f, 1244 Some(2), 1245 CpuidReg::EBX, 1246 u32::from(dies_per_package * cores_per_die * threads_per_core), 1247 ); 1248 CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(2), CpuidReg::ECX, 5 << 8); 1249 1250 if matches!(cpu_vendor, CpuVendor::AMD) { 1251 CpuidPatch::set_cpuid_reg( 1252 cpuid, 1253 0x8000_001e, 1254 Some(0), 1255 CpuidReg::EBX, 1256 ((threads_per_core as u32 - 1) << 8) | (id as u32 & 0xff), 1257 ); 1258 CpuidPatch::set_cpuid_reg( 1259 cpuid, 1260 0x8000_001e, 1261 Some(0), 1262 CpuidReg::ECX, 1263 ((dies_per_package as u32 - 1) << 8) | (thread_width + die_width) & 0xff, 1264 ); 1265 CpuidPatch::set_cpuid_reg(cpuid, 0x8000_001e, Some(0), CpuidReg::EDX, 0); 1266 if cores_per_die * threads_per_core > 1 { 1267 CpuidPatch::set_cpuid_reg( 1268 cpuid, 1269 0x8000_0001, 1270 Some(0), 1271 CpuidReg::ECX, 1272 (1u32 << 1) | (1u32 << 22), 1273 ); 1274 CpuidPatch::set_cpuid_reg( 1275 cpuid, 1276 0x0000_0001, 1277 Some(0), 1278 CpuidReg::EBX, 1279 ((id as u32) << 24) 1280 | (8 << 8) 1281 | (((cores_per_die * threads_per_core) as u32) << 16), 1282 ); 1283 let cpuid_patches = vec![ 1284 // Patch tsc deadline timer bit 1285 CpuidPatch { 1286 function: 1, 1287 index: 0, 1288 flags_bit: None, 1289 eax_bit: None, 1290 ebx_bit: None, 1291 ecx_bit: None, 1292 edx_bit: Some(28), 1293 }, 1294 ]; 1295 CpuidPatch::patch_cpuid(cpuid, cpuid_patches); 1296 CpuidPatch::set_cpuid_reg( 1297 cpuid, 1298 0x8000_0008, 1299 Some(0), 1300 CpuidReg::ECX, 1301 ((thread_width + core_width + die_width) << 12) 1302 | ((cores_per_die * threads_per_core) - 1) as u32, 1303 ); 1304 } else { 1305 CpuidPatch::set_cpuid_reg(cpuid, 0x8000_0008, Some(0), CpuidReg::ECX, 0u32); 1306 } 1307 } 1308 } 1309 1310 // The goal is to update the CPUID sub-leaves to reflect the number of EPC 1311 // sections exposed to the guest. 1312 fn update_cpuid_sgx( 1313 cpuid: &mut Vec<CpuIdEntry>, 1314 epc_sections: &Vec<SgxEpcSection>, 1315 ) -> Result<(), Error> { 1316 // Something's wrong if there's no EPC section. 1317 if epc_sections.is_empty() { 1318 return Err(Error::NoSgxEpcSection); 1319 } 1320 // We can't go further if the hypervisor does not support SGX feature. 1321 if !CpuidPatch::is_feature_enabled(cpuid, 0x7, 0, CpuidReg::EBX, 2) { 1322 return Err(Error::MissingSgxFeature); 1323 } 1324 // We can't go further if the hypervisor does not support SGX_LC feature. 1325 if !CpuidPatch::is_feature_enabled(cpuid, 0x7, 0, CpuidReg::ECX, 30) { 1326 return Err(Error::MissingSgxLaunchControlFeature); 1327 } 1328 1329 // Get host CPUID for leaf 0x12, subleaf 0x2. This is to retrieve EPC 1330 // properties such as confidentiality and integrity. 1331 // SAFETY: call cpuid with valid leaves 1332 let leaf = unsafe { std::arch::x86_64::__cpuid_count(0x12, 0x2) }; 1333 1334 for (i, epc_section) in epc_sections.iter().enumerate() { 1335 let subleaf_idx = i + 2; 1336 let start = epc_section.start().raw_value(); 1337 let size = epc_section.size(); 1338 let eax = (start & 0xffff_f000) as u32 | 0x1; 1339 let ebx = (start >> 32) as u32; 1340 let ecx = (size & 0xffff_f000) as u32 | (leaf.ecx & 0xf); 1341 let edx = (size >> 32) as u32; 1342 // CPU Topology leaf 0x12 1343 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EAX, eax); 1344 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EBX, ebx); 1345 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::ECX, ecx); 1346 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EDX, edx); 1347 } 1348 1349 // Add one NULL entry to terminate the dynamic list 1350 let subleaf_idx = epc_sections.len() + 2; 1351 // CPU Topology leaf 0x12 1352 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EAX, 0); 1353 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EBX, 0); 1354 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::ECX, 0); 1355 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EDX, 0); 1356 1357 Ok(()) 1358 } 1359 1360 #[cfg(test)] 1361 mod tests { 1362 use super::*; 1363 1364 #[test] 1365 fn regions_base_addr() { 1366 let regions = arch_memory_regions(); 1367 assert_eq!(4, regions.len()); 1368 assert_eq!(GuestAddress(0), regions[0].0); 1369 assert_eq!(GuestAddress(1 << 32), regions[1].0); 1370 } 1371 1372 #[test] 1373 fn test_system_configuration() { 1374 let no_vcpus = 4; 1375 let gm = GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); 1376 let config_err = configure_system( 1377 &gm, 1378 GuestAddress(0), 1379 &None, 1380 1, 1381 Some(layout::RSDP_POINTER), 1382 None, 1383 None, 1384 None, 1385 None, 1386 ); 1387 assert!(config_err.is_err()); 1388 1389 // Now assigning some memory that falls before the 32bit memory hole. 1390 let arch_mem_regions = arch_memory_regions(); 1391 let ram_regions: Vec<(GuestAddress, usize)> = arch_mem_regions 1392 .iter() 1393 .filter(|r| r.2 == RegionType::Ram && r.1 != usize::MAX) 1394 .map(|r| (r.0, r.1)) 1395 .collect(); 1396 let gm = GuestMemoryMmap::from_ranges(&ram_regions).unwrap(); 1397 1398 configure_system( 1399 &gm, 1400 GuestAddress(0), 1401 &None, 1402 no_vcpus, 1403 None, 1404 None, 1405 None, 1406 None, 1407 None, 1408 ) 1409 .unwrap(); 1410 1411 // Now assigning some memory that falls after the 32bit memory hole. 1412 let arch_mem_regions = arch_memory_regions(); 1413 let ram_regions: Vec<(GuestAddress, usize)> = arch_mem_regions 1414 .iter() 1415 .filter(|r| r.2 == RegionType::Ram) 1416 .map(|r| { 1417 if r.1 == usize::MAX { 1418 (r.0, 128 << 20) 1419 } else { 1420 (r.0, r.1) 1421 } 1422 }) 1423 .collect(); 1424 let gm = GuestMemoryMmap::from_ranges(&ram_regions).unwrap(); 1425 configure_system( 1426 &gm, 1427 GuestAddress(0), 1428 &None, 1429 no_vcpus, 1430 None, 1431 None, 1432 None, 1433 None, 1434 None, 1435 ) 1436 .unwrap(); 1437 1438 configure_system( 1439 &gm, 1440 GuestAddress(0), 1441 &None, 1442 no_vcpus, 1443 None, 1444 None, 1445 None, 1446 None, 1447 None, 1448 ) 1449 .unwrap(); 1450 } 1451 1452 #[test] 1453 fn test_add_memmap_entry() { 1454 let mut memmap: Vec<hvm_memmap_table_entry> = Vec::new(); 1455 1456 let expected_memmap = vec![ 1457 hvm_memmap_table_entry { 1458 addr: 0x0, 1459 size: 0x1000, 1460 type_: E820_RAM, 1461 ..Default::default() 1462 }, 1463 hvm_memmap_table_entry { 1464 addr: 0x10000, 1465 size: 0xa000, 1466 type_: E820_RESERVED, 1467 ..Default::default() 1468 }, 1469 ]; 1470 1471 add_memmap_entry(&mut memmap, 0, 0x1000, E820_RAM); 1472 add_memmap_entry(&mut memmap, 0x10000, 0xa000, E820_RESERVED); 1473 1474 assert_eq!(format!("{memmap:?}"), format!("{expected_memmap:?}")); 1475 } 1476 } 1477