1 // Copyright © 2020, Oracle and/or its affiliates. 2 // 3 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 // SPDX-License-Identifier: Apache-2.0 5 // 6 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. 7 // Use of this source code is governed by a BSD-style license that can be 8 // found in the LICENSE-BSD-3-Clause file. 9 use std::sync::Arc; 10 pub mod interrupts; 11 pub mod layout; 12 mod mpspec; 13 mod mptable; 14 pub mod regs; 15 use crate::GuestMemoryMmap; 16 use crate::InitramfsConfig; 17 use crate::RegionType; 18 use hypervisor::{CpuId, CpuIdEntry, HypervisorError, CPUID_FLAG_VALID_INDEX}; 19 use linux_loader::loader::bootparam::boot_params; 20 use linux_loader::loader::elf::start_info::{ 21 hvm_memmap_table_entry, hvm_modlist_entry, hvm_start_info, 22 }; 23 use std::collections::BTreeMap; 24 use std::mem; 25 use vm_memory::{ 26 Address, ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryAtomic, 27 GuestMemoryRegion, GuestUsize, 28 }; 29 mod smbios; 30 use std::arch::x86_64; 31 #[cfg(feature = "tdx")] 32 pub mod tdx; 33 34 // CPUID feature bits 35 const TSC_DEADLINE_TIMER_ECX_BIT: u8 = 24; // tsc deadline timer ecx bit. 36 const HYPERVISOR_ECX_BIT: u8 = 31; // Hypervisor ecx bit. 37 const MTRR_EDX_BIT: u8 = 12; // Hypervisor ecx bit. 38 39 // KVM feature bits 40 const KVM_FEATURE_ASYNC_PF_INT_BIT: u8 = 14; 41 #[cfg(feature = "tdx")] 42 const KVM_FEATURE_CLOCKSOURCE_BIT: u8 = 0; 43 #[cfg(feature = "tdx")] 44 const KVM_FEATURE_CLOCKSOURCE2_BIT: u8 = 3; 45 #[cfg(feature = "tdx")] 46 const KVM_FEATURE_CLOCKSOURCE_STABLE_BIT: u8 = 24; 47 #[cfg(feature = "tdx")] 48 const KVM_FEATURE_ASYNC_PF_BIT: u8 = 4; 49 #[cfg(feature = "tdx")] 50 const KVM_FEATURE_ASYNC_PF_VMEXIT_BIT: u8 = 10; 51 #[cfg(feature = "tdx")] 52 const KVM_FEATURE_STEAL_TIME_BIT: u8 = 5; 53 54 #[derive(Debug, Copy, Clone)] 55 /// Specifies the entry point address where the guest must start 56 /// executing code, as well as which of the supported boot protocols 57 /// is to be used to configure the guest initial state. 58 pub struct EntryPoint { 59 /// Address in guest memory where the guest must start execution 60 pub entry_addr: GuestAddress, 61 } 62 63 const E820_RAM: u32 = 1; 64 const E820_RESERVED: u32 = 2; 65 66 #[derive(Clone)] 67 pub struct SgxEpcSection { 68 start: GuestAddress, 69 size: GuestUsize, 70 } 71 72 impl SgxEpcSection { 73 pub fn new(start: GuestAddress, size: GuestUsize) -> Self { 74 SgxEpcSection { start, size } 75 } 76 pub fn start(&self) -> GuestAddress { 77 self.start 78 } 79 pub fn size(&self) -> GuestUsize { 80 self.size 81 } 82 } 83 84 #[derive(Clone)] 85 pub struct SgxEpcRegion { 86 start: GuestAddress, 87 size: GuestUsize, 88 epc_sections: BTreeMap<String, SgxEpcSection>, 89 } 90 91 impl SgxEpcRegion { 92 pub fn new(start: GuestAddress, size: GuestUsize) -> Self { 93 SgxEpcRegion { 94 start, 95 size, 96 epc_sections: BTreeMap::new(), 97 } 98 } 99 pub fn start(&self) -> GuestAddress { 100 self.start 101 } 102 pub fn size(&self) -> GuestUsize { 103 self.size 104 } 105 pub fn epc_sections(&self) -> &BTreeMap<String, SgxEpcSection> { 106 &self.epc_sections 107 } 108 pub fn insert(&mut self, id: String, epc_section: SgxEpcSection) { 109 self.epc_sections.insert(id, epc_section); 110 } 111 } 112 113 // This is a workaround to the Rust enforcement specifying that any implementation of a foreign 114 // trait (in this case `DataInit`) where: 115 // * the type that is implementing the trait is foreign or 116 // * all of the parameters being passed to the trait (if there are any) are also foreign 117 // is prohibited. 118 #[derive(Copy, Clone, Default)] 119 struct StartInfoWrapper(hvm_start_info); 120 121 // It is safe to initialize StartInfoWrapper which is a wrapper over `hvm_start_info` (a series of ints). 122 unsafe impl ByteValued for StartInfoWrapper {} 123 124 #[derive(Copy, Clone, Default)] 125 struct MemmapTableEntryWrapper(hvm_memmap_table_entry); 126 127 unsafe impl ByteValued for MemmapTableEntryWrapper {} 128 129 #[derive(Copy, Clone, Default)] 130 struct ModlistEntryWrapper(hvm_modlist_entry); 131 132 unsafe impl ByteValued for ModlistEntryWrapper {} 133 134 // This is a workaround to the Rust enforcement specifying that any implementation of a foreign 135 // trait (in this case `DataInit`) where: 136 // * the type that is implementing the trait is foreign or 137 // * all of the parameters being passed to the trait (if there are any) are also foreign 138 // is prohibited. 139 #[derive(Copy, Clone, Default)] 140 struct BootParamsWrapper(boot_params); 141 142 // It is safe to initialize BootParamsWrap which is a wrapper over `boot_params` (a series of ints). 143 unsafe impl ByteValued for BootParamsWrapper {} 144 145 #[derive(Debug)] 146 pub enum Error { 147 /// Error writing MP table to memory. 148 MpTableSetup(mptable::Error), 149 150 /// Error configuring the general purpose registers 151 RegsConfiguration(regs::Error), 152 153 /// Error configuring the special registers 154 SregsConfiguration(regs::Error), 155 156 /// Error configuring the floating point related registers 157 FpuConfiguration(regs::Error), 158 159 /// Error configuring the MSR registers 160 MsrsConfiguration(regs::Error), 161 162 /// Failed to set supported CPUs. 163 SetSupportedCpusFailed(anyhow::Error), 164 165 /// Cannot set the local interruption due to bad configuration. 166 LocalIntConfiguration(anyhow::Error), 167 168 /// Error setting up SMBIOS table 169 SmbiosSetup(smbios::Error), 170 171 /// Could not find any SGX EPC section 172 NoSgxEpcSection, 173 174 /// Missing SGX CPU feature 175 MissingSgxFeature, 176 177 /// Missing SGX_LC CPU feature 178 MissingSgxLaunchControlFeature, 179 180 /// Error getting supported CPUID through the hypervisor (kvm/mshv) API 181 CpuidGetSupported(HypervisorError), 182 183 /// Error populating CPUID with KVM HyperV emulation details 184 CpuidKvmHyperV(vmm_sys_util::fam::Error), 185 186 /// Error populating CPUID with CPU identification 187 CpuidIdentification(vmm_sys_util::fam::Error), 188 } 189 190 impl From<Error> for super::Error { 191 fn from(e: Error) -> super::Error { 192 super::Error::X86_64Setup(e) 193 } 194 } 195 196 #[allow(dead_code, clippy::upper_case_acronyms)] 197 #[derive(Copy, Clone)] 198 pub enum CpuidReg { 199 EAX, 200 EBX, 201 ECX, 202 EDX, 203 } 204 205 pub struct CpuidPatch { 206 pub function: u32, 207 pub index: u32, 208 pub flags_bit: Option<u8>, 209 pub eax_bit: Option<u8>, 210 pub ebx_bit: Option<u8>, 211 pub ecx_bit: Option<u8>, 212 pub edx_bit: Option<u8>, 213 } 214 215 impl CpuidPatch { 216 pub fn set_cpuid_reg( 217 cpuid: &mut CpuId, 218 function: u32, 219 index: Option<u32>, 220 reg: CpuidReg, 221 value: u32, 222 ) { 223 let entries = cpuid.as_mut_slice(); 224 225 let mut entry_found = false; 226 for entry in entries.iter_mut() { 227 if entry.function == function && (index == None || index.unwrap() == entry.index) { 228 entry_found = true; 229 match reg { 230 CpuidReg::EAX => { 231 entry.eax = value; 232 } 233 CpuidReg::EBX => { 234 entry.ebx = value; 235 } 236 CpuidReg::ECX => { 237 entry.ecx = value; 238 } 239 CpuidReg::EDX => { 240 entry.edx = value; 241 } 242 } 243 } 244 } 245 246 if entry_found { 247 return; 248 } 249 250 // Entry not found, so let's add it. 251 if let Some(index) = index { 252 let mut entry = CpuIdEntry { 253 function, 254 index, 255 flags: CPUID_FLAG_VALID_INDEX, 256 ..Default::default() 257 }; 258 match reg { 259 CpuidReg::EAX => { 260 entry.eax = value; 261 } 262 CpuidReg::EBX => { 263 entry.ebx = value; 264 } 265 CpuidReg::ECX => { 266 entry.ecx = value; 267 } 268 CpuidReg::EDX => { 269 entry.edx = value; 270 } 271 } 272 273 if let Err(e) = cpuid.push(entry) { 274 error!("Failed adding new CPUID entry: {:?}", e); 275 } 276 } 277 } 278 279 pub fn patch_cpuid(cpuid: &mut CpuId, patches: Vec<CpuidPatch>) { 280 let entries = cpuid.as_mut_slice(); 281 282 for entry in entries.iter_mut() { 283 for patch in patches.iter() { 284 if entry.function == patch.function && entry.index == patch.index { 285 if let Some(flags_bit) = patch.flags_bit { 286 entry.flags |= 1 << flags_bit; 287 } 288 if let Some(eax_bit) = patch.eax_bit { 289 entry.eax |= 1 << eax_bit; 290 } 291 if let Some(ebx_bit) = patch.ebx_bit { 292 entry.ebx |= 1 << ebx_bit; 293 } 294 if let Some(ecx_bit) = patch.ecx_bit { 295 entry.ecx |= 1 << ecx_bit; 296 } 297 if let Some(edx_bit) = patch.edx_bit { 298 entry.edx |= 1 << edx_bit; 299 } 300 } 301 } 302 } 303 } 304 305 pub fn is_feature_enabled( 306 cpuid: &CpuId, 307 function: u32, 308 index: u32, 309 reg: CpuidReg, 310 feature_bit: usize, 311 ) -> bool { 312 let entries = cpuid.as_slice(); 313 let mask = 1 << feature_bit; 314 315 for entry in entries.iter() { 316 if entry.function == function && entry.index == index { 317 let reg_val: u32; 318 match reg { 319 CpuidReg::EAX => { 320 reg_val = entry.eax; 321 } 322 CpuidReg::EBX => { 323 reg_val = entry.ebx; 324 } 325 CpuidReg::ECX => { 326 reg_val = entry.ecx; 327 } 328 CpuidReg::EDX => { 329 reg_val = entry.edx; 330 } 331 } 332 333 return (reg_val & mask) == mask; 334 } 335 } 336 337 false 338 } 339 } 340 341 pub fn generate_common_cpuid( 342 hypervisor: Arc<dyn hypervisor::Hypervisor>, 343 topology: Option<(u8, u8, u8)>, 344 sgx_epc_sections: Option<Vec<SgxEpcSection>>, 345 phys_bits: u8, 346 kvm_hyperv: bool, 347 #[cfg(feature = "tdx")] tdx_enabled: bool, 348 ) -> super::Result<CpuId> { 349 let cpuid_patches = vec![ 350 // Patch tsc deadline timer bit 351 CpuidPatch { 352 function: 1, 353 index: 0, 354 flags_bit: None, 355 eax_bit: None, 356 ebx_bit: None, 357 ecx_bit: Some(TSC_DEADLINE_TIMER_ECX_BIT), 358 edx_bit: None, 359 }, 360 // Patch hypervisor bit 361 CpuidPatch { 362 function: 1, 363 index: 0, 364 flags_bit: None, 365 eax_bit: None, 366 ebx_bit: None, 367 ecx_bit: Some(HYPERVISOR_ECX_BIT), 368 edx_bit: None, 369 }, 370 // Enable MTRR feature 371 CpuidPatch { 372 function: 1, 373 index: 0, 374 flags_bit: None, 375 eax_bit: None, 376 ebx_bit: None, 377 ecx_bit: None, 378 edx_bit: Some(MTRR_EDX_BIT), 379 }, 380 ]; 381 382 // Supported CPUID 383 let mut cpuid = hypervisor.get_cpuid().map_err(Error::CpuidGetSupported)?; 384 385 CpuidPatch::patch_cpuid(&mut cpuid, cpuid_patches); 386 387 if let Some(t) = topology { 388 update_cpuid_topology(&mut cpuid, t.0, t.1, t.2); 389 } 390 391 if let Some(sgx_epc_sections) = sgx_epc_sections { 392 update_cpuid_sgx(&mut cpuid, sgx_epc_sections)?; 393 } 394 395 // Update some existing CPUID 396 for entry in cpuid.as_mut_slice().iter_mut() { 397 match entry.function { 398 // Set CPU physical bits 399 0x8000_0008 => { 400 entry.eax = (entry.eax & 0xffff_ff00) | (phys_bits as u32 & 0xff); 401 } 402 // Disable KVM_FEATURE_ASYNC_PF_INT 403 // This is required until we find out why the asynchronous page 404 // fault is generating unexpected behavior when using interrupt 405 // mechanism. 406 // TODO: Re-enable KVM_FEATURE_ASYNC_PF_INT (#2277) 407 0x4000_0001 => { 408 entry.eax &= !(1 << KVM_FEATURE_ASYNC_PF_INT_BIT); 409 410 // These features are not supported by TDX 411 #[cfg(feature = "tdx")] 412 if tdx_enabled { 413 entry.eax &= !(1 << KVM_FEATURE_CLOCKSOURCE_BIT 414 | 1 << KVM_FEATURE_CLOCKSOURCE2_BIT 415 | 1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT 416 | 1 << KVM_FEATURE_ASYNC_PF_BIT 417 | 1 << KVM_FEATURE_ASYNC_PF_VMEXIT_BIT 418 | 1 << KVM_FEATURE_STEAL_TIME_BIT) 419 } 420 } 421 _ => {} 422 } 423 } 424 425 // Copy CPU identification string 426 for i in 0x8000_0002..=0x8000_0004 { 427 cpuid.retain(|c| c.function != i); 428 let leaf = unsafe { std::arch::x86_64::__cpuid(i) }; 429 cpuid 430 .push(CpuIdEntry { 431 function: i, 432 eax: leaf.eax, 433 ebx: leaf.ebx, 434 ecx: leaf.ecx, 435 edx: leaf.edx, 436 ..Default::default() 437 }) 438 .map_err(Error::CpuidIdentification)?; 439 } 440 441 if kvm_hyperv { 442 // Remove conflicting entries 443 cpuid.retain(|c| c.function != 0x4000_0000); 444 cpuid.retain(|c| c.function != 0x4000_0001); 445 // See "Hypervisor Top Level Functional Specification" for details 446 // Compliance with "Hv#1" requires leaves up to 0x4000_000a 447 cpuid 448 .push(CpuIdEntry { 449 function: 0x40000000, 450 eax: 0x4000000a, // Maximum cpuid leaf 451 ebx: 0x756e694c, // "Linu" 452 ecx: 0x564b2078, // "x KV" 453 edx: 0x7648204d, // "M Hv" 454 ..Default::default() 455 }) 456 .map_err(Error::CpuidKvmHyperV)?; 457 cpuid 458 .push(CpuIdEntry { 459 function: 0x40000001, 460 eax: 0x31237648, // "Hv#1" 461 ..Default::default() 462 }) 463 .map_err(Error::CpuidKvmHyperV)?; 464 cpuid 465 .push(CpuIdEntry { 466 function: 0x40000002, 467 eax: 0x3839, // "Build number" 468 ebx: 0xa0000, // "Version" 469 ..Default::default() 470 }) 471 .map_err(Error::CpuidKvmHyperV)?; 472 cpuid 473 .push(CpuIdEntry { 474 function: 0x4000_0003, 475 eax: 1 << 1 // AccessPartitionReferenceCounter 476 | 1 << 2 // AccessSynicRegs 477 | 1 << 3 // AccessSyntheticTimerRegs 478 | 1 << 9, // AccessPartitionReferenceTsc 479 edx: 1 << 3, // CPU dynamic partitioning 480 ..Default::default() 481 }) 482 .map_err(Error::CpuidKvmHyperV)?; 483 cpuid 484 .push(CpuIdEntry { 485 function: 0x4000_0004, 486 eax: 1 << 5, // Recommend relaxed timing 487 ..Default::default() 488 }) 489 .map_err(Error::CpuidKvmHyperV)?; 490 for i in 0x4000_0005..=0x4000_000a { 491 cpuid 492 .push(CpuIdEntry { 493 function: i, 494 ..Default::default() 495 }) 496 .map_err(Error::CpuidKvmHyperV)?; 497 } 498 } 499 500 Ok(cpuid) 501 } 502 503 pub fn configure_vcpu( 504 fd: &Arc<dyn hypervisor::Vcpu>, 505 id: u8, 506 kernel_entry_point: Option<EntryPoint>, 507 vm_memory: &GuestMemoryAtomic<GuestMemoryMmap>, 508 cpuid: CpuId, 509 kvm_hyperv: bool, 510 ) -> super::Result<()> { 511 // Per vCPU CPUID changes; common are handled via generate_common_cpuid() 512 let mut cpuid = cpuid; 513 CpuidPatch::set_cpuid_reg(&mut cpuid, 0xb, None, CpuidReg::EDX, u32::from(id)); 514 CpuidPatch::set_cpuid_reg(&mut cpuid, 0x1f, None, CpuidReg::EDX, u32::from(id)); 515 516 fd.set_cpuid2(&cpuid) 517 .map_err(|e| Error::SetSupportedCpusFailed(e.into()))?; 518 519 if kvm_hyperv { 520 fd.enable_hyperv_synic().unwrap(); 521 } 522 523 regs::setup_msrs(fd).map_err(Error::MsrsConfiguration)?; 524 if let Some(kernel_entry_point) = kernel_entry_point { 525 // Safe to unwrap because this method is called after the VM is configured 526 regs::setup_regs(fd, kernel_entry_point.entry_addr.raw_value()) 527 .map_err(Error::RegsConfiguration)?; 528 regs::setup_fpu(fd).map_err(Error::FpuConfiguration)?; 529 regs::setup_sregs(&vm_memory.memory(), fd).map_err(Error::SregsConfiguration)?; 530 } 531 interrupts::set_lint(fd).map_err(|e| Error::LocalIntConfiguration(e.into()))?; 532 Ok(()) 533 } 534 535 /// Returns a Vec of the valid memory addresses. 536 /// These should be used to configure the GuestMemory structure for the platform. 537 /// For x86_64 all addresses are valid from the start of the kernel except a 538 /// carve out at the end of 32bit address space. 539 pub fn arch_memory_regions(size: GuestUsize) -> Vec<(GuestAddress, usize, RegionType)> { 540 let reserved_memory_gap_start = layout::MEM_32BIT_RESERVED_START 541 .checked_add(layout::MEM_32BIT_DEVICES_SIZE) 542 .expect("32-bit reserved region is too large"); 543 544 let requested_memory_size = GuestAddress(size as u64); 545 let mut regions = Vec::new(); 546 547 // case1: guest memory fits before the gap 548 if size as u64 <= layout::MEM_32BIT_RESERVED_START.raw_value() { 549 regions.push((GuestAddress(0), size as usize, RegionType::Ram)); 550 // case2: guest memory extends beyond the gap 551 } else { 552 // push memory before the gap 553 regions.push(( 554 GuestAddress(0), 555 layout::MEM_32BIT_RESERVED_START.raw_value() as usize, 556 RegionType::Ram, 557 )); 558 regions.push(( 559 layout::RAM_64BIT_START, 560 requested_memory_size.unchecked_offset_from(layout::MEM_32BIT_RESERVED_START) as usize, 561 RegionType::Ram, 562 )); 563 } 564 565 // Add the 32-bit device memory hole as a sub region. 566 regions.push(( 567 layout::MEM_32BIT_RESERVED_START, 568 layout::MEM_32BIT_DEVICES_SIZE as usize, 569 RegionType::SubRegion, 570 )); 571 572 // Add the 32-bit reserved memory hole as a sub region. 573 regions.push(( 574 reserved_memory_gap_start, 575 (layout::MEM_32BIT_RESERVED_SIZE - layout::MEM_32BIT_DEVICES_SIZE) as usize, 576 RegionType::Reserved, 577 )); 578 579 regions 580 } 581 582 /// Configures the system and should be called once per vm before starting vcpu threads. 583 /// 584 /// # Arguments 585 /// 586 /// * `guest_mem` - The memory to be used by the guest. 587 /// * `cmdline_addr` - Address in `guest_mem` where the kernel command line was loaded. 588 /// * `cmdline_size` - Size of the kernel command line in bytes including the null terminator. 589 /// * `num_cpus` - Number of virtual CPUs the guest will have. 590 #[allow(clippy::too_many_arguments)] 591 pub fn configure_system( 592 guest_mem: &GuestMemoryMmap, 593 cmdline_addr: GuestAddress, 594 initramfs: &Option<InitramfsConfig>, 595 _num_cpus: u8, 596 rsdp_addr: Option<GuestAddress>, 597 sgx_epc_region: Option<SgxEpcRegion>, 598 ) -> super::Result<()> { 599 let size = smbios::setup_smbios(guest_mem).map_err(Error::SmbiosSetup)?; 600 601 // Place the MP table after the SMIOS table aligned to 16 bytes 602 let offset = GuestAddress(layout::SMBIOS_START).unchecked_add(size); 603 let offset = GuestAddress((offset.0 + 16) & !0xf); 604 mptable::setup_mptable(offset, guest_mem, _num_cpus).map_err(Error::MpTableSetup)?; 605 606 // Check that the RAM is not smaller than the RSDP start address 607 if let Some(rsdp_addr) = rsdp_addr { 608 if rsdp_addr.0 > guest_mem.last_addr().0 { 609 return Err(super::Error::RsdpPastRamEnd); 610 } 611 } 612 613 configure_pvh( 614 guest_mem, 615 cmdline_addr, 616 initramfs, 617 rsdp_addr, 618 sgx_epc_region, 619 ) 620 } 621 622 fn configure_pvh( 623 guest_mem: &GuestMemoryMmap, 624 cmdline_addr: GuestAddress, 625 initramfs: &Option<InitramfsConfig>, 626 rsdp_addr: Option<GuestAddress>, 627 sgx_epc_region: Option<SgxEpcRegion>, 628 ) -> super::Result<()> { 629 const XEN_HVM_START_MAGIC_VALUE: u32 = 0x336ec578; 630 631 let mut start_info: StartInfoWrapper = StartInfoWrapper(hvm_start_info::default()); 632 633 start_info.0.magic = XEN_HVM_START_MAGIC_VALUE; 634 start_info.0.version = 1; // pvh has version 1 635 start_info.0.nr_modules = 0; 636 start_info.0.cmdline_paddr = cmdline_addr.raw_value() as u64; 637 start_info.0.memmap_paddr = layout::MEMMAP_START.raw_value(); 638 639 if let Some(rsdp_addr) = rsdp_addr { 640 start_info.0.rsdp_paddr = rsdp_addr.0; 641 } 642 643 if let Some(initramfs_config) = initramfs { 644 // The initramfs has been written to guest memory already, here we just need to 645 // create the module structure that describes it. 646 let ramdisk_mod: ModlistEntryWrapper = ModlistEntryWrapper(hvm_modlist_entry { 647 paddr: initramfs_config.address.raw_value(), 648 size: initramfs_config.size as u64, 649 ..Default::default() 650 }); 651 652 start_info.0.nr_modules += 1; 653 start_info.0.modlist_paddr = layout::MODLIST_START.raw_value(); 654 655 // Write the modlist struct to guest memory. 656 guest_mem 657 .write_obj(ramdisk_mod, layout::MODLIST_START) 658 .map_err(super::Error::ModlistSetup)?; 659 } 660 661 // Vector to hold the memory maps which needs to be written to guest memory 662 // at MEMMAP_START after all of the mappings are recorded. 663 let mut memmap: Vec<hvm_memmap_table_entry> = Vec::new(); 664 665 // Create the memory map entries. 666 add_memmap_entry(&mut memmap, 0, layout::EBDA_START.raw_value(), E820_RAM); 667 668 let mem_end = guest_mem.last_addr(); 669 670 if mem_end < layout::MEM_32BIT_RESERVED_START { 671 add_memmap_entry( 672 &mut memmap, 673 layout::HIGH_RAM_START.raw_value(), 674 mem_end.unchecked_offset_from(layout::HIGH_RAM_START) + 1, 675 E820_RAM, 676 ); 677 } else { 678 add_memmap_entry( 679 &mut memmap, 680 layout::HIGH_RAM_START.raw_value(), 681 layout::MEM_32BIT_RESERVED_START.unchecked_offset_from(layout::HIGH_RAM_START), 682 E820_RAM, 683 ); 684 if mem_end > layout::RAM_64BIT_START { 685 add_memmap_entry( 686 &mut memmap, 687 layout::RAM_64BIT_START.raw_value(), 688 mem_end.unchecked_offset_from(layout::RAM_64BIT_START) + 1, 689 E820_RAM, 690 ); 691 } 692 } 693 694 add_memmap_entry( 695 &mut memmap, 696 layout::PCI_MMCONFIG_START.0, 697 layout::PCI_MMCONFIG_SIZE, 698 E820_RESERVED, 699 ); 700 701 if let Some(sgx_epc_region) = sgx_epc_region { 702 add_memmap_entry( 703 &mut memmap, 704 sgx_epc_region.start().raw_value(), 705 sgx_epc_region.size() as u64, 706 E820_RESERVED, 707 ); 708 } 709 710 start_info.0.memmap_entries = memmap.len() as u32; 711 712 // Copy the vector with the memmap table to the MEMMAP_START address 713 // which is already saved in the memmap_paddr field of hvm_start_info struct. 714 let mut memmap_start_addr = layout::MEMMAP_START; 715 716 guest_mem 717 .checked_offset( 718 memmap_start_addr, 719 mem::size_of::<hvm_memmap_table_entry>() * start_info.0.memmap_entries as usize, 720 ) 721 .ok_or(super::Error::MemmapTablePastRamEnd)?; 722 723 // For every entry in the memmap vector, create a MemmapTableEntryWrapper 724 // and write it to guest memory. 725 for memmap_entry in memmap { 726 let map_entry_wrapper: MemmapTableEntryWrapper = MemmapTableEntryWrapper(memmap_entry); 727 728 guest_mem 729 .write_obj(map_entry_wrapper, memmap_start_addr) 730 .map_err(|_| super::Error::MemmapTableSetup)?; 731 memmap_start_addr = 732 memmap_start_addr.unchecked_add(mem::size_of::<hvm_memmap_table_entry>() as u64); 733 } 734 735 // The hvm_start_info struct itself must be stored at PVH_START_INFO 736 // address, and %rbx will be initialized to contain PVH_INFO_START prior to 737 // starting the guest, as required by the PVH ABI. 738 let start_info_addr = layout::PVH_INFO_START; 739 740 guest_mem 741 .checked_offset(start_info_addr, mem::size_of::<hvm_start_info>()) 742 .ok_or(super::Error::StartInfoPastRamEnd)?; 743 744 // Write the start_info struct to guest memory. 745 guest_mem 746 .write_obj(start_info, start_info_addr) 747 .map_err(|_| super::Error::StartInfoSetup)?; 748 749 Ok(()) 750 } 751 752 fn add_memmap_entry(memmap: &mut Vec<hvm_memmap_table_entry>, addr: u64, size: u64, mem_type: u32) { 753 // Add the table entry to the vector 754 memmap.push(hvm_memmap_table_entry { 755 addr, 756 size, 757 type_: mem_type, 758 reserved: 0, 759 }); 760 } 761 762 /// Returns the memory address where the initramfs could be loaded. 763 pub fn initramfs_load_addr( 764 guest_mem: &GuestMemoryMmap, 765 initramfs_size: usize, 766 ) -> super::Result<u64> { 767 let first_region = guest_mem 768 .find_region(GuestAddress::new(0)) 769 .ok_or(super::Error::InitramfsAddress)?; 770 // It's safe to cast to usize because the size of a region can't be greater than usize. 771 let lowmem_size = first_region.len() as usize; 772 773 if lowmem_size < initramfs_size { 774 return Err(super::Error::InitramfsAddress); 775 } 776 777 let aligned_addr: u64 = ((lowmem_size - initramfs_size) & !(crate::pagesize() - 1)) as u64; 778 Ok(aligned_addr) 779 } 780 781 pub fn get_host_cpu_phys_bits() -> u8 { 782 unsafe { 783 let leaf = x86_64::__cpuid(0x8000_0000); 784 785 // Detect and handle AMD SME (Secure Memory Encryption) properly. 786 // Some physical address bits may become reserved when the feature is enabled. 787 // See AMD64 Architecture Programmer's Manual Volume 2, Section 7.10.1 788 let reduced = if leaf.eax >= 0x8000_001f 789 && leaf.ebx == 0x6874_7541 // Vendor ID: AuthenticAMD 790 && leaf.ecx == 0x444d_4163 791 && leaf.edx == 0x6974_6e65 792 && x86_64::__cpuid(0x8000_001f).eax & 0x1 != 0 793 { 794 (x86_64::__cpuid(0x8000_001f).ebx >> 6) & 0x3f 795 } else { 796 0 797 }; 798 799 if leaf.eax >= 0x8000_0008 { 800 let leaf = x86_64::__cpuid(0x8000_0008); 801 ((leaf.eax & 0xff) - reduced) as u8 802 } else { 803 36 804 } 805 } 806 } 807 808 fn update_cpuid_topology( 809 cpuid: &mut CpuId, 810 threads_per_core: u8, 811 cores_per_die: u8, 812 dies_per_package: u8, 813 ) { 814 let thread_width = 8 - (threads_per_core - 1).leading_zeros(); 815 let core_width = (8 - (cores_per_die - 1).leading_zeros()) + thread_width; 816 let die_width = (8 - (dies_per_package - 1).leading_zeros()) + core_width; 817 818 // CPU Topology leaf 0xb 819 CpuidPatch::set_cpuid_reg(cpuid, 0xb, Some(0), CpuidReg::EAX, thread_width); 820 CpuidPatch::set_cpuid_reg( 821 cpuid, 822 0xb, 823 Some(0), 824 CpuidReg::EBX, 825 u32::from(threads_per_core), 826 ); 827 CpuidPatch::set_cpuid_reg(cpuid, 0xb, Some(0), CpuidReg::ECX, 1 << 8); 828 829 CpuidPatch::set_cpuid_reg(cpuid, 0xb, Some(1), CpuidReg::EAX, die_width); 830 CpuidPatch::set_cpuid_reg( 831 cpuid, 832 0xb, 833 Some(1), 834 CpuidReg::EBX, 835 u32::from(dies_per_package * cores_per_die * threads_per_core), 836 ); 837 CpuidPatch::set_cpuid_reg(cpuid, 0xb, Some(1), CpuidReg::ECX, 2 << 8); 838 839 // CPU Topology leaf 0x1f 840 CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(0), CpuidReg::EAX, thread_width); 841 CpuidPatch::set_cpuid_reg( 842 cpuid, 843 0x1f, 844 Some(0), 845 CpuidReg::EBX, 846 u32::from(threads_per_core), 847 ); 848 CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(0), CpuidReg::ECX, 1 << 8); 849 850 CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(1), CpuidReg::EAX, core_width); 851 CpuidPatch::set_cpuid_reg( 852 cpuid, 853 0x1f, 854 Some(1), 855 CpuidReg::EBX, 856 u32::from(cores_per_die * threads_per_core), 857 ); 858 CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(1), CpuidReg::ECX, 2 << 8); 859 860 CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(2), CpuidReg::EAX, die_width); 861 CpuidPatch::set_cpuid_reg( 862 cpuid, 863 0x1f, 864 Some(2), 865 CpuidReg::EBX, 866 u32::from(dies_per_package * cores_per_die * threads_per_core), 867 ); 868 CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(2), CpuidReg::ECX, 5 << 8); 869 } 870 871 // The goal is to update the CPUID sub-leaves to reflect the number of EPC 872 // sections exposed to the guest. 873 fn update_cpuid_sgx(cpuid: &mut CpuId, epc_sections: Vec<SgxEpcSection>) -> Result<(), Error> { 874 // Something's wrong if there's no EPC section. 875 if epc_sections.is_empty() { 876 return Err(Error::NoSgxEpcSection); 877 } 878 // We can't go further if the hypervisor does not support SGX feature. 879 if !CpuidPatch::is_feature_enabled(cpuid, 0x7, 0, CpuidReg::EBX, 2) { 880 return Err(Error::MissingSgxFeature); 881 } 882 // We can't go further if the hypervisor does not support SGX_LC feature. 883 if !CpuidPatch::is_feature_enabled(cpuid, 0x7, 0, CpuidReg::ECX, 30) { 884 return Err(Error::MissingSgxLaunchControlFeature); 885 } 886 887 // Get host CPUID for leaf 0x12, subleaf 0x2. This is to retrieve EPC 888 // properties such as confidentiality and integrity. 889 let leaf = unsafe { std::arch::x86_64::__cpuid_count(0x12, 0x2) }; 890 891 for (i, epc_section) in epc_sections.iter().enumerate() { 892 let subleaf_idx = i + 2; 893 let start = epc_section.start().raw_value(); 894 let size = epc_section.size() as u64; 895 let eax = (start & 0xffff_f000) as u32 | 0x1; 896 let ebx = (start >> 32) as u32; 897 let ecx = (size & 0xffff_f000) as u32 | (leaf.ecx & 0xf); 898 let edx = (size >> 32) as u32; 899 // CPU Topology leaf 0x12 900 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EAX, eax); 901 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EBX, ebx); 902 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::ECX, ecx); 903 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EDX, edx); 904 } 905 906 // Add one NULL entry to terminate the dynamic list 907 let subleaf_idx = epc_sections.len() + 2; 908 // CPU Topology leaf 0x12 909 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EAX, 0); 910 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EBX, 0); 911 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::ECX, 0); 912 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EDX, 0); 913 914 Ok(()) 915 } 916 917 #[cfg(test)] 918 mod tests { 919 use super::*; 920 921 #[test] 922 fn regions_lt_4gb() { 923 let regions = arch_memory_regions(1 << 29); 924 assert_eq!(3, regions.len()); 925 assert_eq!(GuestAddress(0), regions[0].0); 926 assert_eq!(1usize << 29, regions[0].1); 927 } 928 929 #[test] 930 fn regions_gt_4gb() { 931 let regions = arch_memory_regions((1 << 32) + 0x8000); 932 assert_eq!(4, regions.len()); 933 assert_eq!(GuestAddress(0), regions[0].0); 934 assert_eq!(GuestAddress(1 << 32), regions[1].0); 935 } 936 937 #[test] 938 fn test_system_configuration() { 939 let no_vcpus = 4; 940 let gm = GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); 941 let config_err = configure_system( 942 &gm, 943 GuestAddress(0), 944 &None, 945 1, 946 Some(layout::RSDP_POINTER), 947 None, 948 ); 949 assert!(config_err.is_err()); 950 951 // Now assigning some memory that falls before the 32bit memory hole. 952 let mem_size = 128 << 20; 953 let arch_mem_regions = arch_memory_regions(mem_size); 954 let ram_regions: Vec<(GuestAddress, usize)> = arch_mem_regions 955 .iter() 956 .filter(|r| r.2 == RegionType::Ram) 957 .map(|r| (r.0, r.1)) 958 .collect(); 959 let gm = GuestMemoryMmap::from_ranges(&ram_regions).unwrap(); 960 961 configure_system(&gm, GuestAddress(0), &None, no_vcpus, None, None).unwrap(); 962 963 // Now assigning some memory that is equal to the start of the 32bit memory hole. 964 let mem_size = 3328 << 20; 965 let arch_mem_regions = arch_memory_regions(mem_size); 966 let ram_regions: Vec<(GuestAddress, usize)> = arch_mem_regions 967 .iter() 968 .filter(|r| r.2 == RegionType::Ram) 969 .map(|r| (r.0, r.1)) 970 .collect(); 971 let gm = GuestMemoryMmap::from_ranges(&ram_regions).unwrap(); 972 configure_system(&gm, GuestAddress(0), &None, no_vcpus, None, None).unwrap(); 973 974 configure_system(&gm, GuestAddress(0), &None, no_vcpus, None, None).unwrap(); 975 976 // Now assigning some memory that falls after the 32bit memory hole. 977 let mem_size = 3330 << 20; 978 let arch_mem_regions = arch_memory_regions(mem_size); 979 let ram_regions: Vec<(GuestAddress, usize)> = arch_mem_regions 980 .iter() 981 .filter(|r| r.2 == RegionType::Ram) 982 .map(|r| (r.0, r.1)) 983 .collect(); 984 let gm = GuestMemoryMmap::from_ranges(&ram_regions).unwrap(); 985 configure_system(&gm, GuestAddress(0), &None, no_vcpus, None, None).unwrap(); 986 987 configure_system(&gm, GuestAddress(0), &None, no_vcpus, None, None).unwrap(); 988 } 989 990 #[test] 991 fn test_add_memmap_entry() { 992 let mut memmap: Vec<hvm_memmap_table_entry> = Vec::new(); 993 994 let expected_memmap = vec![ 995 hvm_memmap_table_entry { 996 addr: 0x0, 997 size: 0x1000, 998 type_: E820_RAM, 999 ..Default::default() 1000 }, 1001 hvm_memmap_table_entry { 1002 addr: 0x10000, 1003 size: 0xa000, 1004 type_: E820_RESERVED, 1005 ..Default::default() 1006 }, 1007 ]; 1008 1009 add_memmap_entry(&mut memmap, 0, 0x1000, E820_RAM); 1010 add_memmap_entry(&mut memmap, 0x10000, 0xa000, E820_RESERVED); 1011 1012 assert_eq!(format!("{:?}", memmap), format!("{:?}", expected_memmap)); 1013 } 1014 } 1015