1 // Copyright © 2020, Oracle and/or its affiliates. 2 // 3 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 // SPDX-License-Identifier: Apache-2.0 5 // 6 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. 7 // Use of this source code is governed by a BSD-style license that can be 8 // found in the LICENSE-BSD-3-Clause file. 9 use std::sync::Arc; 10 pub mod interrupts; 11 pub mod layout; 12 mod mpspec; 13 mod mptable; 14 pub mod regs; 15 use crate::GuestMemoryMmap; 16 use crate::InitramfsConfig; 17 use crate::RegionType; 18 use hypervisor::{CpuId, CpuIdEntry, HypervisorError, CPUID_FLAG_VALID_INDEX}; 19 use linux_loader::loader::bootparam::boot_params; 20 use linux_loader::loader::elf::start_info::{ 21 hvm_memmap_table_entry, hvm_modlist_entry, hvm_start_info, 22 }; 23 use std::collections::BTreeMap; 24 use std::mem; 25 use vm_memory::{ 26 Address, ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryAtomic, 27 GuestMemoryRegion, GuestUsize, 28 }; 29 mod smbios; 30 use std::arch::x86_64; 31 #[cfg(feature = "tdx")] 32 pub mod tdx; 33 34 // CPUID feature bits 35 const TSC_DEADLINE_TIMER_ECX_BIT: u8 = 24; // tsc deadline timer ecx bit. 36 const HYPERVISOR_ECX_BIT: u8 = 31; // Hypervisor ecx bit. 37 const MTRR_EDX_BIT: u8 = 12; // Hypervisor ecx bit. 38 39 // KVM feature bits 40 const KVM_FEATURE_ASYNC_PF_INT_BIT: u8 = 14; 41 #[cfg(feature = "tdx")] 42 const KVM_FEATURE_CLOCKSOURCE_BIT: u8 = 0; 43 #[cfg(feature = "tdx")] 44 const KVM_FEATURE_CLOCKSOURCE2_BIT: u8 = 3; 45 #[cfg(feature = "tdx")] 46 const KVM_FEATURE_CLOCKSOURCE_STABLE_BIT: u8 = 24; 47 #[cfg(feature = "tdx")] 48 const KVM_FEATURE_ASYNC_PF_BIT: u8 = 4; 49 #[cfg(feature = "tdx")] 50 const KVM_FEATURE_ASYNC_PF_VMEXIT_BIT: u8 = 10; 51 #[cfg(feature = "tdx")] 52 const KVM_FEATURE_STEAL_TIME_BIT: u8 = 5; 53 54 #[derive(Debug, Copy, Clone)] 55 /// Specifies the entry point address where the guest must start 56 /// executing code, as well as which of the supported boot protocols 57 /// is to be used to configure the guest initial state. 58 pub struct EntryPoint { 59 /// Address in guest memory where the guest must start execution 60 pub entry_addr: GuestAddress, 61 } 62 63 const E820_RAM: u32 = 1; 64 const E820_RESERVED: u32 = 2; 65 66 #[derive(Clone)] 67 pub struct SgxEpcSection { 68 start: GuestAddress, 69 size: GuestUsize, 70 } 71 72 impl SgxEpcSection { 73 pub fn new(start: GuestAddress, size: GuestUsize) -> Self { 74 SgxEpcSection { start, size } 75 } 76 pub fn start(&self) -> GuestAddress { 77 self.start 78 } 79 pub fn size(&self) -> GuestUsize { 80 self.size 81 } 82 } 83 84 #[derive(Clone)] 85 pub struct SgxEpcRegion { 86 start: GuestAddress, 87 size: GuestUsize, 88 epc_sections: BTreeMap<String, SgxEpcSection>, 89 } 90 91 impl SgxEpcRegion { 92 pub fn new(start: GuestAddress, size: GuestUsize) -> Self { 93 SgxEpcRegion { 94 start, 95 size, 96 epc_sections: BTreeMap::new(), 97 } 98 } 99 pub fn start(&self) -> GuestAddress { 100 self.start 101 } 102 pub fn size(&self) -> GuestUsize { 103 self.size 104 } 105 pub fn epc_sections(&self) -> &BTreeMap<String, SgxEpcSection> { 106 &self.epc_sections 107 } 108 pub fn insert(&mut self, id: String, epc_section: SgxEpcSection) { 109 self.epc_sections.insert(id, epc_section); 110 } 111 } 112 113 // This is a workaround to the Rust enforcement specifying that any implementation of a foreign 114 // trait (in this case `DataInit`) where: 115 // * the type that is implementing the trait is foreign or 116 // * all of the parameters being passed to the trait (if there are any) are also foreign 117 // is prohibited. 118 #[derive(Copy, Clone, Default)] 119 struct StartInfoWrapper(hvm_start_info); 120 121 // It is safe to initialize StartInfoWrapper which is a wrapper over `hvm_start_info` (a series of ints). 122 unsafe impl ByteValued for StartInfoWrapper {} 123 124 #[derive(Copy, Clone, Default)] 125 struct MemmapTableEntryWrapper(hvm_memmap_table_entry); 126 127 unsafe impl ByteValued for MemmapTableEntryWrapper {} 128 129 #[derive(Copy, Clone, Default)] 130 struct ModlistEntryWrapper(hvm_modlist_entry); 131 132 unsafe impl ByteValued for ModlistEntryWrapper {} 133 134 // This is a workaround to the Rust enforcement specifying that any implementation of a foreign 135 // trait (in this case `DataInit`) where: 136 // * the type that is implementing the trait is foreign or 137 // * all of the parameters being passed to the trait (if there are any) are also foreign 138 // is prohibited. 139 #[derive(Copy, Clone, Default)] 140 struct BootParamsWrapper(boot_params); 141 142 // It is safe to initialize BootParamsWrap which is a wrapper over `boot_params` (a series of ints). 143 unsafe impl ByteValued for BootParamsWrapper {} 144 145 #[derive(Debug)] 146 pub enum Error { 147 /// Error writing MP table to memory. 148 MpTableSetup(mptable::Error), 149 150 /// Error configuring the general purpose registers 151 RegsConfiguration(regs::Error), 152 153 /// Error configuring the special registers 154 SregsConfiguration(regs::Error), 155 156 /// Error configuring the floating point related registers 157 FpuConfiguration(regs::Error), 158 159 /// Error configuring the MSR registers 160 MsrsConfiguration(regs::Error), 161 162 /// Failed to set supported CPUs. 163 SetSupportedCpusFailed(anyhow::Error), 164 165 /// Cannot set the local interruption due to bad configuration. 166 LocalIntConfiguration(anyhow::Error), 167 168 /// Error setting up SMBIOS table 169 SmbiosSetup(smbios::Error), 170 171 /// Could not find any SGX EPC section 172 NoSgxEpcSection, 173 174 /// Missing SGX CPU feature 175 MissingSgxFeature, 176 177 /// Missing SGX_LC CPU feature 178 MissingSgxLaunchControlFeature, 179 180 /// Error getting supported CPUID through the hypervisor (kvm/mshv) API 181 CpuidGetSupported(HypervisorError), 182 183 /// Error populating CPUID with KVM HyperV emulation details 184 CpuidKvmHyperV(vmm_sys_util::fam::Error), 185 186 /// Error populating CPUID with CPU identification 187 CpuidIdentification(vmm_sys_util::fam::Error), 188 189 /// Error checking CPUID compatibility 190 CpuidCheckCompatibility, 191 } 192 193 impl From<Error> for super::Error { 194 fn from(e: Error) -> super::Error { 195 super::Error::X86_64Setup(e) 196 } 197 } 198 199 #[allow(dead_code, clippy::upper_case_acronyms)] 200 #[derive(Copy, Clone, Debug)] 201 pub enum CpuidReg { 202 EAX, 203 EBX, 204 ECX, 205 EDX, 206 } 207 208 pub struct CpuidPatch { 209 pub function: u32, 210 pub index: u32, 211 pub flags_bit: Option<u8>, 212 pub eax_bit: Option<u8>, 213 pub ebx_bit: Option<u8>, 214 pub ecx_bit: Option<u8>, 215 pub edx_bit: Option<u8>, 216 } 217 218 impl CpuidPatch { 219 pub fn set_cpuid_reg( 220 cpuid: &mut CpuId, 221 function: u32, 222 index: Option<u32>, 223 reg: CpuidReg, 224 value: u32, 225 ) { 226 let entries = cpuid.as_mut_slice(); 227 228 let mut entry_found = false; 229 for entry in entries.iter_mut() { 230 if entry.function == function && (index == None || index.unwrap() == entry.index) { 231 entry_found = true; 232 match reg { 233 CpuidReg::EAX => { 234 entry.eax = value; 235 } 236 CpuidReg::EBX => { 237 entry.ebx = value; 238 } 239 CpuidReg::ECX => { 240 entry.ecx = value; 241 } 242 CpuidReg::EDX => { 243 entry.edx = value; 244 } 245 } 246 } 247 } 248 249 if entry_found { 250 return; 251 } 252 253 // Entry not found, so let's add it. 254 if let Some(index) = index { 255 let mut entry = CpuIdEntry { 256 function, 257 index, 258 flags: CPUID_FLAG_VALID_INDEX, 259 ..Default::default() 260 }; 261 match reg { 262 CpuidReg::EAX => { 263 entry.eax = value; 264 } 265 CpuidReg::EBX => { 266 entry.ebx = value; 267 } 268 CpuidReg::ECX => { 269 entry.ecx = value; 270 } 271 CpuidReg::EDX => { 272 entry.edx = value; 273 } 274 } 275 276 if let Err(e) = cpuid.push(entry) { 277 error!("Failed adding new CPUID entry: {:?}", e); 278 } 279 } 280 } 281 282 pub fn patch_cpuid(cpuid: &mut CpuId, patches: Vec<CpuidPatch>) { 283 let entries = cpuid.as_mut_slice(); 284 285 for entry in entries.iter_mut() { 286 for patch in patches.iter() { 287 if entry.function == patch.function && entry.index == patch.index { 288 if let Some(flags_bit) = patch.flags_bit { 289 entry.flags |= 1 << flags_bit; 290 } 291 if let Some(eax_bit) = patch.eax_bit { 292 entry.eax |= 1 << eax_bit; 293 } 294 if let Some(ebx_bit) = patch.ebx_bit { 295 entry.ebx |= 1 << ebx_bit; 296 } 297 if let Some(ecx_bit) = patch.ecx_bit { 298 entry.ecx |= 1 << ecx_bit; 299 } 300 if let Some(edx_bit) = patch.edx_bit { 301 entry.edx |= 1 << edx_bit; 302 } 303 } 304 } 305 } 306 } 307 308 pub fn is_feature_enabled( 309 cpuid: &CpuId, 310 function: u32, 311 index: u32, 312 reg: CpuidReg, 313 feature_bit: usize, 314 ) -> bool { 315 let entries = cpuid.as_slice(); 316 let mask = 1 << feature_bit; 317 318 for entry in entries.iter() { 319 if entry.function == function && entry.index == index { 320 let reg_val: u32; 321 match reg { 322 CpuidReg::EAX => { 323 reg_val = entry.eax; 324 } 325 CpuidReg::EBX => { 326 reg_val = entry.ebx; 327 } 328 CpuidReg::ECX => { 329 reg_val = entry.ecx; 330 } 331 CpuidReg::EDX => { 332 reg_val = entry.edx; 333 } 334 } 335 336 return (reg_val & mask) == mask; 337 } 338 } 339 340 false 341 } 342 } 343 344 #[derive(Debug)] 345 enum CpuidCompatibleCheck { 346 BitwiseSubset, // bitwise subset 347 Equal, // equal in value 348 NumNotGreater, // smaller or equal as a number 349 } 350 351 pub struct CpuidFeatureEntry { 352 function: u32, 353 index: u32, 354 feature_reg: CpuidReg, 355 compatible_check: CpuidCompatibleCheck, 356 } 357 358 impl CpuidFeatureEntry { 359 fn checked_feature_entry_list() -> Vec<CpuidFeatureEntry> { 360 vec![ 361 // The following list includes all hardware features bits from 362 // the CPUID Wiki Page: https://en.wikipedia.org/wiki/CPUID 363 // Leaf 0x1, ECX/EDX, feature bits 364 CpuidFeatureEntry { 365 function: 1, 366 index: 0, 367 feature_reg: CpuidReg::ECX, 368 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 369 }, 370 CpuidFeatureEntry { 371 function: 1, 372 index: 0, 373 feature_reg: CpuidReg::EDX, 374 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 375 }, 376 // Leaf 0x7, EAX/EBX/ECX/EDX, extended features 377 CpuidFeatureEntry { 378 function: 7, 379 index: 0, 380 feature_reg: CpuidReg::EAX, 381 compatible_check: CpuidCompatibleCheck::NumNotGreater, 382 }, 383 CpuidFeatureEntry { 384 function: 7, 385 index: 0, 386 feature_reg: CpuidReg::EBX, 387 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 388 }, 389 CpuidFeatureEntry { 390 function: 7, 391 index: 0, 392 feature_reg: CpuidReg::ECX, 393 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 394 }, 395 CpuidFeatureEntry { 396 function: 7, 397 index: 0, 398 feature_reg: CpuidReg::EDX, 399 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 400 }, 401 // Leaf 0x7 subleaf 0x1, EAX, extended features 402 CpuidFeatureEntry { 403 function: 7, 404 index: 1, 405 feature_reg: CpuidReg::EAX, 406 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 407 }, 408 // Leaf 0x8000_0001, ECX/EDX, CPUID features bits 409 CpuidFeatureEntry { 410 function: 0x8000_0001, 411 index: 0, 412 feature_reg: CpuidReg::ECX, 413 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 414 }, 415 CpuidFeatureEntry { 416 function: 0x8000_0001, 417 index: 0, 418 feature_reg: CpuidReg::EDX, 419 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 420 }, 421 // KVM CPUID bits: https://www.kernel.org/doc/html/latest/virt/kvm/cpuid.html 422 // Leaf 0x4000_0000, EAX/EBX/ECX/EDX, KVM CPUID SIGNATURE 423 CpuidFeatureEntry { 424 function: 0x4000_0000, 425 index: 0, 426 feature_reg: CpuidReg::EAX, 427 compatible_check: CpuidCompatibleCheck::NumNotGreater, 428 }, 429 CpuidFeatureEntry { 430 function: 0x4000_0000, 431 index: 0, 432 feature_reg: CpuidReg::EBX, 433 compatible_check: CpuidCompatibleCheck::Equal, 434 }, 435 CpuidFeatureEntry { 436 function: 0x4000_0000, 437 index: 0, 438 feature_reg: CpuidReg::ECX, 439 compatible_check: CpuidCompatibleCheck::Equal, 440 }, 441 CpuidFeatureEntry { 442 function: 0x4000_0000, 443 index: 0, 444 feature_reg: CpuidReg::EDX, 445 compatible_check: CpuidCompatibleCheck::Equal, 446 }, 447 // Leaf 0x4000_0001, EAX/EBX/ECX/EDX, KVM CPUID features 448 CpuidFeatureEntry { 449 function: 0x4000_0001, 450 index: 0, 451 feature_reg: CpuidReg::EAX, 452 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 453 }, 454 CpuidFeatureEntry { 455 function: 0x4000_0001, 456 index: 0, 457 feature_reg: CpuidReg::EBX, 458 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 459 }, 460 CpuidFeatureEntry { 461 function: 0x4000_0001, 462 index: 0, 463 feature_reg: CpuidReg::ECX, 464 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 465 }, 466 CpuidFeatureEntry { 467 function: 0x4000_0001, 468 index: 0, 469 feature_reg: CpuidReg::EDX, 470 compatible_check: CpuidCompatibleCheck::BitwiseSubset, 471 }, 472 ] 473 } 474 475 fn get_features_from_cpuid( 476 cpuid: &CpuId, 477 feature_entry_list: &[CpuidFeatureEntry], 478 ) -> Vec<u32> { 479 let mut features = vec![0; feature_entry_list.len()]; 480 for (i, feature_entry) in feature_entry_list.iter().enumerate() { 481 for cpuid_entry in cpuid.as_slice().iter() { 482 if cpuid_entry.function == feature_entry.function 483 && cpuid_entry.index == feature_entry.index 484 { 485 match feature_entry.feature_reg { 486 CpuidReg::EAX => { 487 features[i] = cpuid_entry.eax; 488 } 489 CpuidReg::EBX => { 490 features[i] = cpuid_entry.ebx; 491 } 492 CpuidReg::ECX => { 493 features[i] = cpuid_entry.ecx; 494 } 495 CpuidReg::EDX => { 496 features[i] = cpuid_entry.edx; 497 } 498 } 499 500 break; 501 } 502 } 503 } 504 505 features 506 } 507 508 // The function returns `Error` (a.k.a. "incompatible"), when the CPUID features from `src_vm_cpuid` 509 // is not a subset of those of the `dest_vm_cpuid`. 510 pub fn check_cpuid_compatibility( 511 src_vm_cpuid: &CpuId, 512 dest_vm_cpuid: &CpuId, 513 ) -> Result<(), Error> { 514 let feature_entry_list = &Self::checked_feature_entry_list(); 515 let src_vm_features = Self::get_features_from_cpuid(src_vm_cpuid, feature_entry_list); 516 let dest_vm_features = Self::get_features_from_cpuid(dest_vm_cpuid, feature_entry_list); 517 518 // Loop on feature bit and check if the 'source vm' feature is a subset 519 // of those of the 'destination vm' feature 520 let mut compatible = true; 521 for (i, (src_vm_feature, dest_vm_feature)) in src_vm_features 522 .iter() 523 .zip(dest_vm_features.iter()) 524 .enumerate() 525 { 526 let entry = &feature_entry_list[i]; 527 let entry_compatible; 528 match entry.compatible_check { 529 CpuidCompatibleCheck::BitwiseSubset => { 530 let different_feature_bits = src_vm_feature ^ dest_vm_feature; 531 let src_vm_feature_bits_only = different_feature_bits & src_vm_feature; 532 entry_compatible = src_vm_feature_bits_only == 0; 533 } 534 CpuidCompatibleCheck::Equal => { 535 entry_compatible = src_vm_feature == dest_vm_feature; 536 } 537 CpuidCompatibleCheck::NumNotGreater => { 538 entry_compatible = src_vm_feature <= dest_vm_feature; 539 } 540 }; 541 if !entry_compatible { 542 error!( 543 "Detected incompatible CPUID entry: leaf={:#02x} (subleaf={:#02x}), register='{:?}', \ 544 compatilbe_check='{:?}', source VM feature='{:#04x}', destination VM feature'{:#04x}'.", 545 entry.function, entry.index, entry.feature_reg, 546 entry.compatible_check, src_vm_feature, dest_vm_feature 547 ); 548 549 compatible = false; 550 } 551 } 552 553 if compatible { 554 info!("No CPU incompatibility detected."); 555 Ok(()) 556 } else { 557 Err(Error::CpuidCheckCompatibility) 558 } 559 } 560 } 561 562 pub fn generate_common_cpuid( 563 hypervisor: Arc<dyn hypervisor::Hypervisor>, 564 topology: Option<(u8, u8, u8)>, 565 sgx_epc_sections: Option<Vec<SgxEpcSection>>, 566 phys_bits: u8, 567 kvm_hyperv: bool, 568 #[cfg(feature = "tdx")] tdx_enabled: bool, 569 ) -> super::Result<CpuId> { 570 let cpuid_patches = vec![ 571 // Patch tsc deadline timer bit 572 CpuidPatch { 573 function: 1, 574 index: 0, 575 flags_bit: None, 576 eax_bit: None, 577 ebx_bit: None, 578 ecx_bit: Some(TSC_DEADLINE_TIMER_ECX_BIT), 579 edx_bit: None, 580 }, 581 // Patch hypervisor bit 582 CpuidPatch { 583 function: 1, 584 index: 0, 585 flags_bit: None, 586 eax_bit: None, 587 ebx_bit: None, 588 ecx_bit: Some(HYPERVISOR_ECX_BIT), 589 edx_bit: None, 590 }, 591 // Enable MTRR feature 592 CpuidPatch { 593 function: 1, 594 index: 0, 595 flags_bit: None, 596 eax_bit: None, 597 ebx_bit: None, 598 ecx_bit: None, 599 edx_bit: Some(MTRR_EDX_BIT), 600 }, 601 ]; 602 603 // Supported CPUID 604 let mut cpuid = hypervisor.get_cpuid().map_err(Error::CpuidGetSupported)?; 605 606 CpuidPatch::patch_cpuid(&mut cpuid, cpuid_patches); 607 608 if let Some(t) = topology { 609 update_cpuid_topology(&mut cpuid, t.0, t.1, t.2); 610 } 611 612 if let Some(sgx_epc_sections) = sgx_epc_sections { 613 update_cpuid_sgx(&mut cpuid, sgx_epc_sections)?; 614 } 615 616 // Update some existing CPUID 617 for entry in cpuid.as_mut_slice().iter_mut() { 618 match entry.function { 619 // Set CPU physical bits 620 0x8000_0008 => { 621 entry.eax = (entry.eax & 0xffff_ff00) | (phys_bits as u32 & 0xff); 622 } 623 // Disable KVM_FEATURE_ASYNC_PF_INT 624 // This is required until we find out why the asynchronous page 625 // fault is generating unexpected behavior when using interrupt 626 // mechanism. 627 // TODO: Re-enable KVM_FEATURE_ASYNC_PF_INT (#2277) 628 0x4000_0001 => { 629 entry.eax &= !(1 << KVM_FEATURE_ASYNC_PF_INT_BIT); 630 631 // These features are not supported by TDX 632 #[cfg(feature = "tdx")] 633 if tdx_enabled { 634 entry.eax &= !(1 << KVM_FEATURE_CLOCKSOURCE_BIT 635 | 1 << KVM_FEATURE_CLOCKSOURCE2_BIT 636 | 1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT 637 | 1 << KVM_FEATURE_ASYNC_PF_BIT 638 | 1 << KVM_FEATURE_ASYNC_PF_VMEXIT_BIT 639 | 1 << KVM_FEATURE_STEAL_TIME_BIT) 640 } 641 } 642 _ => {} 643 } 644 } 645 646 // Copy CPU identification string 647 for i in 0x8000_0002..=0x8000_0004 { 648 cpuid.retain(|c| c.function != i); 649 let leaf = unsafe { std::arch::x86_64::__cpuid(i) }; 650 cpuid 651 .push(CpuIdEntry { 652 function: i, 653 eax: leaf.eax, 654 ebx: leaf.ebx, 655 ecx: leaf.ecx, 656 edx: leaf.edx, 657 ..Default::default() 658 }) 659 .map_err(Error::CpuidIdentification)?; 660 } 661 662 if kvm_hyperv { 663 // Remove conflicting entries 664 cpuid.retain(|c| c.function != 0x4000_0000); 665 cpuid.retain(|c| c.function != 0x4000_0001); 666 // See "Hypervisor Top Level Functional Specification" for details 667 // Compliance with "Hv#1" requires leaves up to 0x4000_000a 668 cpuid 669 .push(CpuIdEntry { 670 function: 0x40000000, 671 eax: 0x4000000a, // Maximum cpuid leaf 672 ebx: 0x756e694c, // "Linu" 673 ecx: 0x564b2078, // "x KV" 674 edx: 0x7648204d, // "M Hv" 675 ..Default::default() 676 }) 677 .map_err(Error::CpuidKvmHyperV)?; 678 cpuid 679 .push(CpuIdEntry { 680 function: 0x40000001, 681 eax: 0x31237648, // "Hv#1" 682 ..Default::default() 683 }) 684 .map_err(Error::CpuidKvmHyperV)?; 685 cpuid 686 .push(CpuIdEntry { 687 function: 0x40000002, 688 eax: 0x3839, // "Build number" 689 ebx: 0xa0000, // "Version" 690 ..Default::default() 691 }) 692 .map_err(Error::CpuidKvmHyperV)?; 693 cpuid 694 .push(CpuIdEntry { 695 function: 0x4000_0003, 696 eax: 1 << 1 // AccessPartitionReferenceCounter 697 | 1 << 2 // AccessSynicRegs 698 | 1 << 3 // AccessSyntheticTimerRegs 699 | 1 << 9, // AccessPartitionReferenceTsc 700 edx: 1 << 3, // CPU dynamic partitioning 701 ..Default::default() 702 }) 703 .map_err(Error::CpuidKvmHyperV)?; 704 cpuid 705 .push(CpuIdEntry { 706 function: 0x4000_0004, 707 eax: 1 << 5, // Recommend relaxed timing 708 ..Default::default() 709 }) 710 .map_err(Error::CpuidKvmHyperV)?; 711 for i in 0x4000_0005..=0x4000_000a { 712 cpuid 713 .push(CpuIdEntry { 714 function: i, 715 ..Default::default() 716 }) 717 .map_err(Error::CpuidKvmHyperV)?; 718 } 719 } 720 721 Ok(cpuid) 722 } 723 724 pub fn configure_vcpu( 725 fd: &Arc<dyn hypervisor::Vcpu>, 726 id: u8, 727 kernel_entry_point: Option<EntryPoint>, 728 vm_memory: &GuestMemoryAtomic<GuestMemoryMmap>, 729 cpuid: CpuId, 730 kvm_hyperv: bool, 731 ) -> super::Result<()> { 732 // Per vCPU CPUID changes; common are handled via generate_common_cpuid() 733 let mut cpuid = cpuid; 734 CpuidPatch::set_cpuid_reg(&mut cpuid, 0xb, None, CpuidReg::EDX, u32::from(id)); 735 CpuidPatch::set_cpuid_reg(&mut cpuid, 0x1f, None, CpuidReg::EDX, u32::from(id)); 736 737 fd.set_cpuid2(&cpuid) 738 .map_err(|e| Error::SetSupportedCpusFailed(e.into()))?; 739 740 if kvm_hyperv { 741 fd.enable_hyperv_synic().unwrap(); 742 } 743 744 regs::setup_msrs(fd).map_err(Error::MsrsConfiguration)?; 745 if let Some(kernel_entry_point) = kernel_entry_point { 746 // Safe to unwrap because this method is called after the VM is configured 747 regs::setup_regs(fd, kernel_entry_point.entry_addr.raw_value()) 748 .map_err(Error::RegsConfiguration)?; 749 regs::setup_fpu(fd).map_err(Error::FpuConfiguration)?; 750 regs::setup_sregs(&vm_memory.memory(), fd).map_err(Error::SregsConfiguration)?; 751 } 752 interrupts::set_lint(fd).map_err(|e| Error::LocalIntConfiguration(e.into()))?; 753 Ok(()) 754 } 755 756 /// Returns a Vec of the valid memory addresses. 757 /// These should be used to configure the GuestMemory structure for the platform. 758 /// For x86_64 all addresses are valid from the start of the kernel except a 759 /// carve out at the end of 32bit address space. 760 pub fn arch_memory_regions(size: GuestUsize) -> Vec<(GuestAddress, usize, RegionType)> { 761 let reserved_memory_gap_start = layout::MEM_32BIT_RESERVED_START 762 .checked_add(layout::MEM_32BIT_DEVICES_SIZE) 763 .expect("32-bit reserved region is too large"); 764 765 let requested_memory_size = GuestAddress(size as u64); 766 let mut regions = Vec::new(); 767 768 // case1: guest memory fits before the gap 769 if size as u64 <= layout::MEM_32BIT_RESERVED_START.raw_value() { 770 regions.push((GuestAddress(0), size as usize, RegionType::Ram)); 771 // case2: guest memory extends beyond the gap 772 } else { 773 // push memory before the gap 774 regions.push(( 775 GuestAddress(0), 776 layout::MEM_32BIT_RESERVED_START.raw_value() as usize, 777 RegionType::Ram, 778 )); 779 regions.push(( 780 layout::RAM_64BIT_START, 781 requested_memory_size.unchecked_offset_from(layout::MEM_32BIT_RESERVED_START) as usize, 782 RegionType::Ram, 783 )); 784 } 785 786 // Add the 32-bit device memory hole as a sub region. 787 regions.push(( 788 layout::MEM_32BIT_RESERVED_START, 789 layout::MEM_32BIT_DEVICES_SIZE as usize, 790 RegionType::SubRegion, 791 )); 792 793 // Add the 32-bit reserved memory hole as a sub region. 794 regions.push(( 795 reserved_memory_gap_start, 796 (layout::MEM_32BIT_RESERVED_SIZE - layout::MEM_32BIT_DEVICES_SIZE) as usize, 797 RegionType::Reserved, 798 )); 799 800 regions 801 } 802 803 /// Configures the system and should be called once per vm before starting vcpu threads. 804 /// 805 /// # Arguments 806 /// 807 /// * `guest_mem` - The memory to be used by the guest. 808 /// * `cmdline_addr` - Address in `guest_mem` where the kernel command line was loaded. 809 /// * `cmdline_size` - Size of the kernel command line in bytes including the null terminator. 810 /// * `num_cpus` - Number of virtual CPUs the guest will have. 811 #[allow(clippy::too_many_arguments)] 812 pub fn configure_system( 813 guest_mem: &GuestMemoryMmap, 814 cmdline_addr: GuestAddress, 815 initramfs: &Option<InitramfsConfig>, 816 _num_cpus: u8, 817 rsdp_addr: Option<GuestAddress>, 818 sgx_epc_region: Option<SgxEpcRegion>, 819 ) -> super::Result<()> { 820 let size = smbios::setup_smbios(guest_mem).map_err(Error::SmbiosSetup)?; 821 822 // Place the MP table after the SMIOS table aligned to 16 bytes 823 let offset = GuestAddress(layout::SMBIOS_START).unchecked_add(size); 824 let offset = GuestAddress((offset.0 + 16) & !0xf); 825 mptable::setup_mptable(offset, guest_mem, _num_cpus).map_err(Error::MpTableSetup)?; 826 827 // Check that the RAM is not smaller than the RSDP start address 828 if let Some(rsdp_addr) = rsdp_addr { 829 if rsdp_addr.0 > guest_mem.last_addr().0 { 830 return Err(super::Error::RsdpPastRamEnd); 831 } 832 } 833 834 configure_pvh( 835 guest_mem, 836 cmdline_addr, 837 initramfs, 838 rsdp_addr, 839 sgx_epc_region, 840 ) 841 } 842 843 fn configure_pvh( 844 guest_mem: &GuestMemoryMmap, 845 cmdline_addr: GuestAddress, 846 initramfs: &Option<InitramfsConfig>, 847 rsdp_addr: Option<GuestAddress>, 848 sgx_epc_region: Option<SgxEpcRegion>, 849 ) -> super::Result<()> { 850 const XEN_HVM_START_MAGIC_VALUE: u32 = 0x336ec578; 851 852 let mut start_info: StartInfoWrapper = StartInfoWrapper(hvm_start_info::default()); 853 854 start_info.0.magic = XEN_HVM_START_MAGIC_VALUE; 855 start_info.0.version = 1; // pvh has version 1 856 start_info.0.nr_modules = 0; 857 start_info.0.cmdline_paddr = cmdline_addr.raw_value() as u64; 858 start_info.0.memmap_paddr = layout::MEMMAP_START.raw_value(); 859 860 if let Some(rsdp_addr) = rsdp_addr { 861 start_info.0.rsdp_paddr = rsdp_addr.0; 862 } 863 864 if let Some(initramfs_config) = initramfs { 865 // The initramfs has been written to guest memory already, here we just need to 866 // create the module structure that describes it. 867 let ramdisk_mod: ModlistEntryWrapper = ModlistEntryWrapper(hvm_modlist_entry { 868 paddr: initramfs_config.address.raw_value(), 869 size: initramfs_config.size as u64, 870 ..Default::default() 871 }); 872 873 start_info.0.nr_modules += 1; 874 start_info.0.modlist_paddr = layout::MODLIST_START.raw_value(); 875 876 // Write the modlist struct to guest memory. 877 guest_mem 878 .write_obj(ramdisk_mod, layout::MODLIST_START) 879 .map_err(super::Error::ModlistSetup)?; 880 } 881 882 // Vector to hold the memory maps which needs to be written to guest memory 883 // at MEMMAP_START after all of the mappings are recorded. 884 let mut memmap: Vec<hvm_memmap_table_entry> = Vec::new(); 885 886 // Create the memory map entries. 887 add_memmap_entry(&mut memmap, 0, layout::EBDA_START.raw_value(), E820_RAM); 888 889 let mem_end = guest_mem.last_addr(); 890 891 if mem_end < layout::MEM_32BIT_RESERVED_START { 892 add_memmap_entry( 893 &mut memmap, 894 layout::HIGH_RAM_START.raw_value(), 895 mem_end.unchecked_offset_from(layout::HIGH_RAM_START) + 1, 896 E820_RAM, 897 ); 898 } else { 899 add_memmap_entry( 900 &mut memmap, 901 layout::HIGH_RAM_START.raw_value(), 902 layout::MEM_32BIT_RESERVED_START.unchecked_offset_from(layout::HIGH_RAM_START), 903 E820_RAM, 904 ); 905 if mem_end > layout::RAM_64BIT_START { 906 add_memmap_entry( 907 &mut memmap, 908 layout::RAM_64BIT_START.raw_value(), 909 mem_end.unchecked_offset_from(layout::RAM_64BIT_START) + 1, 910 E820_RAM, 911 ); 912 } 913 } 914 915 add_memmap_entry( 916 &mut memmap, 917 layout::PCI_MMCONFIG_START.0, 918 layout::PCI_MMCONFIG_SIZE, 919 E820_RESERVED, 920 ); 921 922 if let Some(sgx_epc_region) = sgx_epc_region { 923 add_memmap_entry( 924 &mut memmap, 925 sgx_epc_region.start().raw_value(), 926 sgx_epc_region.size() as u64, 927 E820_RESERVED, 928 ); 929 } 930 931 start_info.0.memmap_entries = memmap.len() as u32; 932 933 // Copy the vector with the memmap table to the MEMMAP_START address 934 // which is already saved in the memmap_paddr field of hvm_start_info struct. 935 let mut memmap_start_addr = layout::MEMMAP_START; 936 937 guest_mem 938 .checked_offset( 939 memmap_start_addr, 940 mem::size_of::<hvm_memmap_table_entry>() * start_info.0.memmap_entries as usize, 941 ) 942 .ok_or(super::Error::MemmapTablePastRamEnd)?; 943 944 // For every entry in the memmap vector, create a MemmapTableEntryWrapper 945 // and write it to guest memory. 946 for memmap_entry in memmap { 947 let map_entry_wrapper: MemmapTableEntryWrapper = MemmapTableEntryWrapper(memmap_entry); 948 949 guest_mem 950 .write_obj(map_entry_wrapper, memmap_start_addr) 951 .map_err(|_| super::Error::MemmapTableSetup)?; 952 memmap_start_addr = 953 memmap_start_addr.unchecked_add(mem::size_of::<hvm_memmap_table_entry>() as u64); 954 } 955 956 // The hvm_start_info struct itself must be stored at PVH_START_INFO 957 // address, and %rbx will be initialized to contain PVH_INFO_START prior to 958 // starting the guest, as required by the PVH ABI. 959 let start_info_addr = layout::PVH_INFO_START; 960 961 guest_mem 962 .checked_offset(start_info_addr, mem::size_of::<hvm_start_info>()) 963 .ok_or(super::Error::StartInfoPastRamEnd)?; 964 965 // Write the start_info struct to guest memory. 966 guest_mem 967 .write_obj(start_info, start_info_addr) 968 .map_err(|_| super::Error::StartInfoSetup)?; 969 970 Ok(()) 971 } 972 973 fn add_memmap_entry(memmap: &mut Vec<hvm_memmap_table_entry>, addr: u64, size: u64, mem_type: u32) { 974 // Add the table entry to the vector 975 memmap.push(hvm_memmap_table_entry { 976 addr, 977 size, 978 type_: mem_type, 979 reserved: 0, 980 }); 981 } 982 983 /// Returns the memory address where the initramfs could be loaded. 984 pub fn initramfs_load_addr( 985 guest_mem: &GuestMemoryMmap, 986 initramfs_size: usize, 987 ) -> super::Result<u64> { 988 let first_region = guest_mem 989 .find_region(GuestAddress::new(0)) 990 .ok_or(super::Error::InitramfsAddress)?; 991 // It's safe to cast to usize because the size of a region can't be greater than usize. 992 let lowmem_size = first_region.len() as usize; 993 994 if lowmem_size < initramfs_size { 995 return Err(super::Error::InitramfsAddress); 996 } 997 998 let aligned_addr: u64 = ((lowmem_size - initramfs_size) & !(crate::pagesize() - 1)) as u64; 999 Ok(aligned_addr) 1000 } 1001 1002 pub fn get_host_cpu_phys_bits() -> u8 { 1003 unsafe { 1004 let leaf = x86_64::__cpuid(0x8000_0000); 1005 1006 // Detect and handle AMD SME (Secure Memory Encryption) properly. 1007 // Some physical address bits may become reserved when the feature is enabled. 1008 // See AMD64 Architecture Programmer's Manual Volume 2, Section 7.10.1 1009 let reduced = if leaf.eax >= 0x8000_001f 1010 && leaf.ebx == 0x6874_7541 // Vendor ID: AuthenticAMD 1011 && leaf.ecx == 0x444d_4163 1012 && leaf.edx == 0x6974_6e65 1013 && x86_64::__cpuid(0x8000_001f).eax & 0x1 != 0 1014 { 1015 (x86_64::__cpuid(0x8000_001f).ebx >> 6) & 0x3f 1016 } else { 1017 0 1018 }; 1019 1020 if leaf.eax >= 0x8000_0008 { 1021 let leaf = x86_64::__cpuid(0x8000_0008); 1022 ((leaf.eax & 0xff) - reduced) as u8 1023 } else { 1024 36 1025 } 1026 } 1027 } 1028 1029 fn update_cpuid_topology( 1030 cpuid: &mut CpuId, 1031 threads_per_core: u8, 1032 cores_per_die: u8, 1033 dies_per_package: u8, 1034 ) { 1035 let thread_width = 8 - (threads_per_core - 1).leading_zeros(); 1036 let core_width = (8 - (cores_per_die - 1).leading_zeros()) + thread_width; 1037 let die_width = (8 - (dies_per_package - 1).leading_zeros()) + core_width; 1038 1039 // CPU Topology leaf 0xb 1040 CpuidPatch::set_cpuid_reg(cpuid, 0xb, Some(0), CpuidReg::EAX, thread_width); 1041 CpuidPatch::set_cpuid_reg( 1042 cpuid, 1043 0xb, 1044 Some(0), 1045 CpuidReg::EBX, 1046 u32::from(threads_per_core), 1047 ); 1048 CpuidPatch::set_cpuid_reg(cpuid, 0xb, Some(0), CpuidReg::ECX, 1 << 8); 1049 1050 CpuidPatch::set_cpuid_reg(cpuid, 0xb, Some(1), CpuidReg::EAX, die_width); 1051 CpuidPatch::set_cpuid_reg( 1052 cpuid, 1053 0xb, 1054 Some(1), 1055 CpuidReg::EBX, 1056 u32::from(dies_per_package * cores_per_die * threads_per_core), 1057 ); 1058 CpuidPatch::set_cpuid_reg(cpuid, 0xb, Some(1), CpuidReg::ECX, 2 << 8); 1059 1060 // CPU Topology leaf 0x1f 1061 CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(0), CpuidReg::EAX, thread_width); 1062 CpuidPatch::set_cpuid_reg( 1063 cpuid, 1064 0x1f, 1065 Some(0), 1066 CpuidReg::EBX, 1067 u32::from(threads_per_core), 1068 ); 1069 CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(0), CpuidReg::ECX, 1 << 8); 1070 1071 CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(1), CpuidReg::EAX, core_width); 1072 CpuidPatch::set_cpuid_reg( 1073 cpuid, 1074 0x1f, 1075 Some(1), 1076 CpuidReg::EBX, 1077 u32::from(cores_per_die * threads_per_core), 1078 ); 1079 CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(1), CpuidReg::ECX, 2 << 8); 1080 1081 CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(2), CpuidReg::EAX, die_width); 1082 CpuidPatch::set_cpuid_reg( 1083 cpuid, 1084 0x1f, 1085 Some(2), 1086 CpuidReg::EBX, 1087 u32::from(dies_per_package * cores_per_die * threads_per_core), 1088 ); 1089 CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(2), CpuidReg::ECX, 5 << 8); 1090 } 1091 1092 // The goal is to update the CPUID sub-leaves to reflect the number of EPC 1093 // sections exposed to the guest. 1094 fn update_cpuid_sgx(cpuid: &mut CpuId, epc_sections: Vec<SgxEpcSection>) -> Result<(), Error> { 1095 // Something's wrong if there's no EPC section. 1096 if epc_sections.is_empty() { 1097 return Err(Error::NoSgxEpcSection); 1098 } 1099 // We can't go further if the hypervisor does not support SGX feature. 1100 if !CpuidPatch::is_feature_enabled(cpuid, 0x7, 0, CpuidReg::EBX, 2) { 1101 return Err(Error::MissingSgxFeature); 1102 } 1103 // We can't go further if the hypervisor does not support SGX_LC feature. 1104 if !CpuidPatch::is_feature_enabled(cpuid, 0x7, 0, CpuidReg::ECX, 30) { 1105 return Err(Error::MissingSgxLaunchControlFeature); 1106 } 1107 1108 // Get host CPUID for leaf 0x12, subleaf 0x2. This is to retrieve EPC 1109 // properties such as confidentiality and integrity. 1110 let leaf = unsafe { std::arch::x86_64::__cpuid_count(0x12, 0x2) }; 1111 1112 for (i, epc_section) in epc_sections.iter().enumerate() { 1113 let subleaf_idx = i + 2; 1114 let start = epc_section.start().raw_value(); 1115 let size = epc_section.size() as u64; 1116 let eax = (start & 0xffff_f000) as u32 | 0x1; 1117 let ebx = (start >> 32) as u32; 1118 let ecx = (size & 0xffff_f000) as u32 | (leaf.ecx & 0xf); 1119 let edx = (size >> 32) as u32; 1120 // CPU Topology leaf 0x12 1121 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EAX, eax); 1122 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EBX, ebx); 1123 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::ECX, ecx); 1124 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EDX, edx); 1125 } 1126 1127 // Add one NULL entry to terminate the dynamic list 1128 let subleaf_idx = epc_sections.len() + 2; 1129 // CPU Topology leaf 0x12 1130 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EAX, 0); 1131 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EBX, 0); 1132 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::ECX, 0); 1133 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EDX, 0); 1134 1135 Ok(()) 1136 } 1137 1138 #[cfg(test)] 1139 mod tests { 1140 use super::*; 1141 1142 #[test] 1143 fn regions_lt_4gb() { 1144 let regions = arch_memory_regions(1 << 29); 1145 assert_eq!(3, regions.len()); 1146 assert_eq!(GuestAddress(0), regions[0].0); 1147 assert_eq!(1usize << 29, regions[0].1); 1148 } 1149 1150 #[test] 1151 fn regions_gt_4gb() { 1152 let regions = arch_memory_regions((1 << 32) + 0x8000); 1153 assert_eq!(4, regions.len()); 1154 assert_eq!(GuestAddress(0), regions[0].0); 1155 assert_eq!(GuestAddress(1 << 32), regions[1].0); 1156 } 1157 1158 #[test] 1159 fn test_system_configuration() { 1160 let no_vcpus = 4; 1161 let gm = GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); 1162 let config_err = configure_system( 1163 &gm, 1164 GuestAddress(0), 1165 &None, 1166 1, 1167 Some(layout::RSDP_POINTER), 1168 None, 1169 ); 1170 assert!(config_err.is_err()); 1171 1172 // Now assigning some memory that falls before the 32bit memory hole. 1173 let mem_size = 128 << 20; 1174 let arch_mem_regions = arch_memory_regions(mem_size); 1175 let ram_regions: Vec<(GuestAddress, usize)> = arch_mem_regions 1176 .iter() 1177 .filter(|r| r.2 == RegionType::Ram) 1178 .map(|r| (r.0, r.1)) 1179 .collect(); 1180 let gm = GuestMemoryMmap::from_ranges(&ram_regions).unwrap(); 1181 1182 configure_system(&gm, GuestAddress(0), &None, no_vcpus, None, None).unwrap(); 1183 1184 // Now assigning some memory that is equal to the start of the 32bit memory hole. 1185 let mem_size = 3328 << 20; 1186 let arch_mem_regions = arch_memory_regions(mem_size); 1187 let ram_regions: Vec<(GuestAddress, usize)> = arch_mem_regions 1188 .iter() 1189 .filter(|r| r.2 == RegionType::Ram) 1190 .map(|r| (r.0, r.1)) 1191 .collect(); 1192 let gm = GuestMemoryMmap::from_ranges(&ram_regions).unwrap(); 1193 configure_system(&gm, GuestAddress(0), &None, no_vcpus, None, None).unwrap(); 1194 1195 configure_system(&gm, GuestAddress(0), &None, no_vcpus, None, None).unwrap(); 1196 1197 // Now assigning some memory that falls after the 32bit memory hole. 1198 let mem_size = 3330 << 20; 1199 let arch_mem_regions = arch_memory_regions(mem_size); 1200 let ram_regions: Vec<(GuestAddress, usize)> = arch_mem_regions 1201 .iter() 1202 .filter(|r| r.2 == RegionType::Ram) 1203 .map(|r| (r.0, r.1)) 1204 .collect(); 1205 let gm = GuestMemoryMmap::from_ranges(&ram_regions).unwrap(); 1206 configure_system(&gm, GuestAddress(0), &None, no_vcpus, None, None).unwrap(); 1207 1208 configure_system(&gm, GuestAddress(0), &None, no_vcpus, None, None).unwrap(); 1209 } 1210 1211 #[test] 1212 fn test_add_memmap_entry() { 1213 let mut memmap: Vec<hvm_memmap_table_entry> = Vec::new(); 1214 1215 let expected_memmap = vec![ 1216 hvm_memmap_table_entry { 1217 addr: 0x0, 1218 size: 0x1000, 1219 type_: E820_RAM, 1220 ..Default::default() 1221 }, 1222 hvm_memmap_table_entry { 1223 addr: 0x10000, 1224 size: 0xa000, 1225 type_: E820_RESERVED, 1226 ..Default::default() 1227 }, 1228 ]; 1229 1230 add_memmap_entry(&mut memmap, 0, 0x1000, E820_RAM); 1231 add_memmap_entry(&mut memmap, 0x10000, 0xa000, E820_RESERVED); 1232 1233 assert_eq!(format!("{:?}", memmap), format!("{:?}", expected_memmap)); 1234 } 1235 } 1236