xref: /cloud-hypervisor/arch/src/x86_64/mod.rs (revision 9af2968a7dc47b89bf07ea9dc5e735084efcfa3a)
1 // Copyright © 2020, Oracle and/or its affiliates.
2 //
3 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
4 // SPDX-License-Identifier: Apache-2.0
5 //
6 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE-BSD-3-Clause file.
9 use std::sync::Arc;
10 pub mod interrupts;
11 pub mod layout;
12 mod mpspec;
13 mod mptable;
14 pub mod regs;
15 use crate::GuestMemoryMmap;
16 use crate::InitramfsConfig;
17 use crate::RegionType;
18 use hypervisor::{CpuId, CpuIdEntry, HypervisorError, CPUID_FLAG_VALID_INDEX};
19 use linux_loader::loader::bootparam::boot_params;
20 use linux_loader::loader::elf::start_info::{
21     hvm_memmap_table_entry, hvm_modlist_entry, hvm_start_info,
22 };
23 use std::collections::BTreeMap;
24 use std::mem;
25 use vm_memory::{
26     Address, ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryAtomic,
27     GuestMemoryRegion, GuestUsize,
28 };
29 mod smbios;
30 use std::arch::x86_64;
31 #[cfg(feature = "tdx")]
32 pub mod tdx;
33 
34 // CPUID feature bits
35 const TSC_DEADLINE_TIMER_ECX_BIT: u8 = 24; // tsc deadline timer ecx bit.
36 const HYPERVISOR_ECX_BIT: u8 = 31; // Hypervisor ecx bit.
37 const MTRR_EDX_BIT: u8 = 12; // Hypervisor ecx bit.
38 
39 // KVM feature bits
40 const KVM_FEATURE_ASYNC_PF_INT_BIT: u8 = 14;
41 #[cfg(feature = "tdx")]
42 const KVM_FEATURE_CLOCKSOURCE_BIT: u8 = 0;
43 #[cfg(feature = "tdx")]
44 const KVM_FEATURE_CLOCKSOURCE2_BIT: u8 = 3;
45 #[cfg(feature = "tdx")]
46 const KVM_FEATURE_CLOCKSOURCE_STABLE_BIT: u8 = 24;
47 #[cfg(feature = "tdx")]
48 const KVM_FEATURE_ASYNC_PF_BIT: u8 = 4;
49 #[cfg(feature = "tdx")]
50 const KVM_FEATURE_ASYNC_PF_VMEXIT_BIT: u8 = 10;
51 #[cfg(feature = "tdx")]
52 const KVM_FEATURE_STEAL_TIME_BIT: u8 = 5;
53 
54 #[derive(Debug, Copy, Clone)]
55 /// Specifies the entry point address where the guest must start
56 /// executing code, as well as which of the supported boot protocols
57 /// is to be used to configure the guest initial state.
58 pub struct EntryPoint {
59     /// Address in guest memory where the guest must start execution
60     pub entry_addr: GuestAddress,
61 }
62 
63 const E820_RAM: u32 = 1;
64 const E820_RESERVED: u32 = 2;
65 
66 #[derive(Clone)]
67 pub struct SgxEpcSection {
68     start: GuestAddress,
69     size: GuestUsize,
70 }
71 
72 impl SgxEpcSection {
73     pub fn new(start: GuestAddress, size: GuestUsize) -> Self {
74         SgxEpcSection { start, size }
75     }
76     pub fn start(&self) -> GuestAddress {
77         self.start
78     }
79     pub fn size(&self) -> GuestUsize {
80         self.size
81     }
82 }
83 
84 #[derive(Clone)]
85 pub struct SgxEpcRegion {
86     start: GuestAddress,
87     size: GuestUsize,
88     epc_sections: BTreeMap<String, SgxEpcSection>,
89 }
90 
91 impl SgxEpcRegion {
92     pub fn new(start: GuestAddress, size: GuestUsize) -> Self {
93         SgxEpcRegion {
94             start,
95             size,
96             epc_sections: BTreeMap::new(),
97         }
98     }
99     pub fn start(&self) -> GuestAddress {
100         self.start
101     }
102     pub fn size(&self) -> GuestUsize {
103         self.size
104     }
105     pub fn epc_sections(&self) -> &BTreeMap<String, SgxEpcSection> {
106         &self.epc_sections
107     }
108     pub fn insert(&mut self, id: String, epc_section: SgxEpcSection) {
109         self.epc_sections.insert(id, epc_section);
110     }
111 }
112 
113 // This is a workaround to the Rust enforcement specifying that any implementation of a foreign
114 // trait (in this case `DataInit`) where:
115 // *    the type that is implementing the trait is foreign or
116 // *    all of the parameters being passed to the trait (if there are any) are also foreign
117 // is prohibited.
118 #[derive(Copy, Clone, Default)]
119 struct StartInfoWrapper(hvm_start_info);
120 
121 // It is safe to initialize StartInfoWrapper which is a wrapper over `hvm_start_info` (a series of ints).
122 unsafe impl ByteValued for StartInfoWrapper {}
123 
124 #[derive(Copy, Clone, Default)]
125 struct MemmapTableEntryWrapper(hvm_memmap_table_entry);
126 
127 unsafe impl ByteValued for MemmapTableEntryWrapper {}
128 
129 #[derive(Copy, Clone, Default)]
130 struct ModlistEntryWrapper(hvm_modlist_entry);
131 
132 unsafe impl ByteValued for ModlistEntryWrapper {}
133 
134 // This is a workaround to the Rust enforcement specifying that any implementation of a foreign
135 // trait (in this case `DataInit`) where:
136 // *    the type that is implementing the trait is foreign or
137 // *    all of the parameters being passed to the trait (if there are any) are also foreign
138 // is prohibited.
139 #[derive(Copy, Clone, Default)]
140 struct BootParamsWrapper(boot_params);
141 
142 // It is safe to initialize BootParamsWrap which is a wrapper over `boot_params` (a series of ints).
143 unsafe impl ByteValued for BootParamsWrapper {}
144 
145 #[derive(Debug)]
146 pub enum Error {
147     /// Error writing MP table to memory.
148     MpTableSetup(mptable::Error),
149 
150     /// Error configuring the general purpose registers
151     RegsConfiguration(regs::Error),
152 
153     /// Error configuring the special registers
154     SregsConfiguration(regs::Error),
155 
156     /// Error configuring the floating point related registers
157     FpuConfiguration(regs::Error),
158 
159     /// Error configuring the MSR registers
160     MsrsConfiguration(regs::Error),
161 
162     /// Failed to set supported CPUs.
163     SetSupportedCpusFailed(anyhow::Error),
164 
165     /// Cannot set the local interruption due to bad configuration.
166     LocalIntConfiguration(anyhow::Error),
167 
168     /// Error setting up SMBIOS table
169     SmbiosSetup(smbios::Error),
170 
171     /// Could not find any SGX EPC section
172     NoSgxEpcSection,
173 
174     /// Missing SGX CPU feature
175     MissingSgxFeature,
176 
177     /// Missing SGX_LC CPU feature
178     MissingSgxLaunchControlFeature,
179 
180     /// Error getting supported CPUID through the hypervisor (kvm/mshv) API
181     CpuidGetSupported(HypervisorError),
182 
183     /// Error populating CPUID with KVM HyperV emulation details
184     CpuidKvmHyperV(vmm_sys_util::fam::Error),
185 
186     /// Error populating CPUID with CPU identification
187     CpuidIdentification(vmm_sys_util::fam::Error),
188 }
189 
190 impl From<Error> for super::Error {
191     fn from(e: Error) -> super::Error {
192         super::Error::X86_64Setup(e)
193     }
194 }
195 
196 #[allow(dead_code, clippy::upper_case_acronyms)]
197 #[derive(Copy, Clone)]
198 pub enum CpuidReg {
199     EAX,
200     EBX,
201     ECX,
202     EDX,
203 }
204 
205 pub struct CpuidPatch {
206     pub function: u32,
207     pub index: u32,
208     pub flags_bit: Option<u8>,
209     pub eax_bit: Option<u8>,
210     pub ebx_bit: Option<u8>,
211     pub ecx_bit: Option<u8>,
212     pub edx_bit: Option<u8>,
213 }
214 
215 impl CpuidPatch {
216     pub fn set_cpuid_reg(
217         cpuid: &mut CpuId,
218         function: u32,
219         index: Option<u32>,
220         reg: CpuidReg,
221         value: u32,
222     ) {
223         let entries = cpuid.as_mut_slice();
224 
225         let mut entry_found = false;
226         for entry in entries.iter_mut() {
227             if entry.function == function && (index == None || index.unwrap() == entry.index) {
228                 entry_found = true;
229                 match reg {
230                     CpuidReg::EAX => {
231                         entry.eax = value;
232                     }
233                     CpuidReg::EBX => {
234                         entry.ebx = value;
235                     }
236                     CpuidReg::ECX => {
237                         entry.ecx = value;
238                     }
239                     CpuidReg::EDX => {
240                         entry.edx = value;
241                     }
242                 }
243             }
244         }
245 
246         if entry_found {
247             return;
248         }
249 
250         // Entry not found, so let's add it.
251         if let Some(index) = index {
252             let mut entry = CpuIdEntry {
253                 function,
254                 index,
255                 flags: CPUID_FLAG_VALID_INDEX,
256                 ..Default::default()
257             };
258             match reg {
259                 CpuidReg::EAX => {
260                     entry.eax = value;
261                 }
262                 CpuidReg::EBX => {
263                     entry.ebx = value;
264                 }
265                 CpuidReg::ECX => {
266                     entry.ecx = value;
267                 }
268                 CpuidReg::EDX => {
269                     entry.edx = value;
270                 }
271             }
272 
273             if let Err(e) = cpuid.push(entry) {
274                 error!("Failed adding new CPUID entry: {:?}", e);
275             }
276         }
277     }
278 
279     pub fn patch_cpuid(cpuid: &mut CpuId, patches: Vec<CpuidPatch>) {
280         let entries = cpuid.as_mut_slice();
281 
282         for entry in entries.iter_mut() {
283             for patch in patches.iter() {
284                 if entry.function == patch.function && entry.index == patch.index {
285                     if let Some(flags_bit) = patch.flags_bit {
286                         entry.flags |= 1 << flags_bit;
287                     }
288                     if let Some(eax_bit) = patch.eax_bit {
289                         entry.eax |= 1 << eax_bit;
290                     }
291                     if let Some(ebx_bit) = patch.ebx_bit {
292                         entry.ebx |= 1 << ebx_bit;
293                     }
294                     if let Some(ecx_bit) = patch.ecx_bit {
295                         entry.ecx |= 1 << ecx_bit;
296                     }
297                     if let Some(edx_bit) = patch.edx_bit {
298                         entry.edx |= 1 << edx_bit;
299                     }
300                 }
301             }
302         }
303     }
304 
305     pub fn is_feature_enabled(
306         cpuid: &CpuId,
307         function: u32,
308         index: u32,
309         reg: CpuidReg,
310         feature_bit: usize,
311     ) -> bool {
312         let entries = cpuid.as_slice();
313         let mask = 1 << feature_bit;
314 
315         for entry in entries.iter() {
316             if entry.function == function && entry.index == index {
317                 let reg_val: u32;
318                 match reg {
319                     CpuidReg::EAX => {
320                         reg_val = entry.eax;
321                     }
322                     CpuidReg::EBX => {
323                         reg_val = entry.ebx;
324                     }
325                     CpuidReg::ECX => {
326                         reg_val = entry.ecx;
327                     }
328                     CpuidReg::EDX => {
329                         reg_val = entry.edx;
330                     }
331                 }
332 
333                 return (reg_val & mask) == mask;
334             }
335         }
336 
337         false
338     }
339 }
340 
341 pub fn generate_common_cpuid(
342     hypervisor: Arc<dyn hypervisor::Hypervisor>,
343     topology: Option<(u8, u8, u8)>,
344     sgx_epc_sections: Option<Vec<SgxEpcSection>>,
345     phys_bits: u8,
346     kvm_hyperv: bool,
347     #[cfg(feature = "tdx")] tdx_enabled: bool,
348 ) -> super::Result<CpuId> {
349     let cpuid_patches = vec![
350         // Patch tsc deadline timer bit
351         CpuidPatch {
352             function: 1,
353             index: 0,
354             flags_bit: None,
355             eax_bit: None,
356             ebx_bit: None,
357             ecx_bit: Some(TSC_DEADLINE_TIMER_ECX_BIT),
358             edx_bit: None,
359         },
360         // Patch hypervisor bit
361         CpuidPatch {
362             function: 1,
363             index: 0,
364             flags_bit: None,
365             eax_bit: None,
366             ebx_bit: None,
367             ecx_bit: Some(HYPERVISOR_ECX_BIT),
368             edx_bit: None,
369         },
370         // Enable MTRR feature
371         CpuidPatch {
372             function: 1,
373             index: 0,
374             flags_bit: None,
375             eax_bit: None,
376             ebx_bit: None,
377             ecx_bit: None,
378             edx_bit: Some(MTRR_EDX_BIT),
379         },
380     ];
381 
382     // Supported CPUID
383     let mut cpuid = hypervisor.get_cpuid().map_err(Error::CpuidGetSupported)?;
384 
385     CpuidPatch::patch_cpuid(&mut cpuid, cpuid_patches);
386 
387     if let Some(t) = topology {
388         update_cpuid_topology(&mut cpuid, t.0, t.1, t.2);
389     }
390 
391     if let Some(sgx_epc_sections) = sgx_epc_sections {
392         update_cpuid_sgx(&mut cpuid, sgx_epc_sections)?;
393     }
394 
395     // Update some existing CPUID
396     for entry in cpuid.as_mut_slice().iter_mut() {
397         match entry.function {
398             // Set CPU physical bits
399             0x8000_0008 => {
400                 entry.eax = (entry.eax & 0xffff_ff00) | (phys_bits as u32 & 0xff);
401             }
402             // Disable KVM_FEATURE_ASYNC_PF_INT
403             // This is required until we find out why the asynchronous page
404             // fault is generating unexpected behavior when using interrupt
405             // mechanism.
406             // TODO: Re-enable KVM_FEATURE_ASYNC_PF_INT (#2277)
407             0x4000_0001 => {
408                 entry.eax &= !(1 << KVM_FEATURE_ASYNC_PF_INT_BIT);
409 
410                 // These features are not supported by TDX
411                 #[cfg(feature = "tdx")]
412                 if tdx_enabled {
413                     entry.eax &= !(1 << KVM_FEATURE_CLOCKSOURCE_BIT
414                         | 1 << KVM_FEATURE_CLOCKSOURCE2_BIT
415                         | 1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT
416                         | 1 << KVM_FEATURE_ASYNC_PF_BIT
417                         | 1 << KVM_FEATURE_ASYNC_PF_VMEXIT_BIT
418                         | 1 << KVM_FEATURE_STEAL_TIME_BIT)
419                 }
420             }
421             _ => {}
422         }
423     }
424 
425     // Copy CPU identification string
426     for i in 0x8000_0002..=0x8000_0004 {
427         cpuid.retain(|c| c.function != i);
428         let leaf = unsafe { std::arch::x86_64::__cpuid(i) };
429         cpuid
430             .push(CpuIdEntry {
431                 function: i,
432                 eax: leaf.eax,
433                 ebx: leaf.ebx,
434                 ecx: leaf.ecx,
435                 edx: leaf.edx,
436                 ..Default::default()
437             })
438             .map_err(Error::CpuidIdentification)?;
439     }
440 
441     if kvm_hyperv {
442         // Remove conflicting entries
443         cpuid.retain(|c| c.function != 0x4000_0000);
444         cpuid.retain(|c| c.function != 0x4000_0001);
445         // See "Hypervisor Top Level Functional Specification" for details
446         // Compliance with "Hv#1" requires leaves up to 0x4000_000a
447         cpuid
448             .push(CpuIdEntry {
449                 function: 0x40000000,
450                 eax: 0x4000000a, // Maximum cpuid leaf
451                 ebx: 0x756e694c, // "Linu"
452                 ecx: 0x564b2078, // "x KV"
453                 edx: 0x7648204d, // "M Hv"
454                 ..Default::default()
455             })
456             .map_err(Error::CpuidKvmHyperV)?;
457         cpuid
458             .push(CpuIdEntry {
459                 function: 0x40000001,
460                 eax: 0x31237648, // "Hv#1"
461                 ..Default::default()
462             })
463             .map_err(Error::CpuidKvmHyperV)?;
464         cpuid
465             .push(CpuIdEntry {
466                 function: 0x40000002,
467                 eax: 0x3839,  // "Build number"
468                 ebx: 0xa0000, // "Version"
469                 ..Default::default()
470             })
471             .map_err(Error::CpuidKvmHyperV)?;
472         cpuid
473             .push(CpuIdEntry {
474                 function: 0x4000_0003,
475                 eax: 1 << 1 // AccessPartitionReferenceCounter
476                    | 1 << 2 // AccessSynicRegs
477                    | 1 << 3 // AccessSyntheticTimerRegs
478                    | 1 << 9, // AccessPartitionReferenceTsc
479                 edx: 1 << 3, // CPU dynamic partitioning
480                 ..Default::default()
481             })
482             .map_err(Error::CpuidKvmHyperV)?;
483         cpuid
484             .push(CpuIdEntry {
485                 function: 0x4000_0004,
486                 eax: 1 << 5, // Recommend relaxed timing
487                 ..Default::default()
488             })
489             .map_err(Error::CpuidKvmHyperV)?;
490         for i in 0x4000_0005..=0x4000_000a {
491             cpuid
492                 .push(CpuIdEntry {
493                     function: i,
494                     ..Default::default()
495                 })
496                 .map_err(Error::CpuidKvmHyperV)?;
497         }
498     }
499 
500     Ok(cpuid)
501 }
502 
503 pub fn configure_vcpu(
504     fd: &Arc<dyn hypervisor::Vcpu>,
505     id: u8,
506     kernel_entry_point: Option<EntryPoint>,
507     vm_memory: &GuestMemoryAtomic<GuestMemoryMmap>,
508     cpuid: CpuId,
509     kvm_hyperv: bool,
510 ) -> super::Result<()> {
511     // Per vCPU CPUID changes; common are handled via generate_common_cpuid()
512     let mut cpuid = cpuid;
513     CpuidPatch::set_cpuid_reg(&mut cpuid, 0xb, None, CpuidReg::EDX, u32::from(id));
514     CpuidPatch::set_cpuid_reg(&mut cpuid, 0x1f, None, CpuidReg::EDX, u32::from(id));
515 
516     fd.set_cpuid2(&cpuid)
517         .map_err(|e| Error::SetSupportedCpusFailed(e.into()))?;
518 
519     if kvm_hyperv {
520         fd.enable_hyperv_synic().unwrap();
521     }
522 
523     regs::setup_msrs(fd).map_err(Error::MsrsConfiguration)?;
524     if let Some(kernel_entry_point) = kernel_entry_point {
525         // Safe to unwrap because this method is called after the VM is configured
526         regs::setup_regs(fd, kernel_entry_point.entry_addr.raw_value())
527             .map_err(Error::RegsConfiguration)?;
528         regs::setup_fpu(fd).map_err(Error::FpuConfiguration)?;
529         regs::setup_sregs(&vm_memory.memory(), fd).map_err(Error::SregsConfiguration)?;
530     }
531     interrupts::set_lint(fd).map_err(|e| Error::LocalIntConfiguration(e.into()))?;
532     Ok(())
533 }
534 
535 /// Returns a Vec of the valid memory addresses.
536 /// These should be used to configure the GuestMemory structure for the platform.
537 /// For x86_64 all addresses are valid from the start of the kernel except a
538 /// carve out at the end of 32bit address space.
539 pub fn arch_memory_regions(size: GuestUsize) -> Vec<(GuestAddress, usize, RegionType)> {
540     let reserved_memory_gap_start = layout::MEM_32BIT_RESERVED_START
541         .checked_add(layout::MEM_32BIT_DEVICES_SIZE)
542         .expect("32-bit reserved region is too large");
543 
544     let requested_memory_size = GuestAddress(size as u64);
545     let mut regions = Vec::new();
546 
547     // case1: guest memory fits before the gap
548     if size as u64 <= layout::MEM_32BIT_RESERVED_START.raw_value() {
549         regions.push((GuestAddress(0), size as usize, RegionType::Ram));
550     // case2: guest memory extends beyond the gap
551     } else {
552         // push memory before the gap
553         regions.push((
554             GuestAddress(0),
555             layout::MEM_32BIT_RESERVED_START.raw_value() as usize,
556             RegionType::Ram,
557         ));
558         regions.push((
559             layout::RAM_64BIT_START,
560             requested_memory_size.unchecked_offset_from(layout::MEM_32BIT_RESERVED_START) as usize,
561             RegionType::Ram,
562         ));
563     }
564 
565     // Add the 32-bit device memory hole as a sub region.
566     regions.push((
567         layout::MEM_32BIT_RESERVED_START,
568         layout::MEM_32BIT_DEVICES_SIZE as usize,
569         RegionType::SubRegion,
570     ));
571 
572     // Add the 32-bit reserved memory hole as a sub region.
573     regions.push((
574         reserved_memory_gap_start,
575         (layout::MEM_32BIT_RESERVED_SIZE - layout::MEM_32BIT_DEVICES_SIZE) as usize,
576         RegionType::Reserved,
577     ));
578 
579     regions
580 }
581 
582 /// Configures the system and should be called once per vm before starting vcpu threads.
583 ///
584 /// # Arguments
585 ///
586 /// * `guest_mem` - The memory to be used by the guest.
587 /// * `cmdline_addr` - Address in `guest_mem` where the kernel command line was loaded.
588 /// * `cmdline_size` - Size of the kernel command line in bytes including the null terminator.
589 /// * `num_cpus` - Number of virtual CPUs the guest will have.
590 #[allow(clippy::too_many_arguments)]
591 pub fn configure_system(
592     guest_mem: &GuestMemoryMmap,
593     cmdline_addr: GuestAddress,
594     initramfs: &Option<InitramfsConfig>,
595     _num_cpus: u8,
596     rsdp_addr: Option<GuestAddress>,
597     sgx_epc_region: Option<SgxEpcRegion>,
598 ) -> super::Result<()> {
599     let size = smbios::setup_smbios(guest_mem).map_err(Error::SmbiosSetup)?;
600 
601     // Place the MP table after the SMIOS table aligned to 16 bytes
602     let offset = GuestAddress(layout::SMBIOS_START).unchecked_add(size);
603     let offset = GuestAddress((offset.0 + 16) & !0xf);
604     mptable::setup_mptable(offset, guest_mem, _num_cpus).map_err(Error::MpTableSetup)?;
605 
606     // Check that the RAM is not smaller than the RSDP start address
607     if let Some(rsdp_addr) = rsdp_addr {
608         if rsdp_addr.0 > guest_mem.last_addr().0 {
609             return Err(super::Error::RsdpPastRamEnd);
610         }
611     }
612 
613     configure_pvh(
614         guest_mem,
615         cmdline_addr,
616         initramfs,
617         rsdp_addr,
618         sgx_epc_region,
619     )
620 }
621 
622 fn configure_pvh(
623     guest_mem: &GuestMemoryMmap,
624     cmdline_addr: GuestAddress,
625     initramfs: &Option<InitramfsConfig>,
626     rsdp_addr: Option<GuestAddress>,
627     sgx_epc_region: Option<SgxEpcRegion>,
628 ) -> super::Result<()> {
629     const XEN_HVM_START_MAGIC_VALUE: u32 = 0x336ec578;
630 
631     let mut start_info: StartInfoWrapper = StartInfoWrapper(hvm_start_info::default());
632 
633     start_info.0.magic = XEN_HVM_START_MAGIC_VALUE;
634     start_info.0.version = 1; // pvh has version 1
635     start_info.0.nr_modules = 0;
636     start_info.0.cmdline_paddr = cmdline_addr.raw_value() as u64;
637     start_info.0.memmap_paddr = layout::MEMMAP_START.raw_value();
638 
639     if let Some(rsdp_addr) = rsdp_addr {
640         start_info.0.rsdp_paddr = rsdp_addr.0;
641     }
642 
643     if let Some(initramfs_config) = initramfs {
644         // The initramfs has been written to guest memory already, here we just need to
645         // create the module structure that describes it.
646         let ramdisk_mod: ModlistEntryWrapper = ModlistEntryWrapper(hvm_modlist_entry {
647             paddr: initramfs_config.address.raw_value(),
648             size: initramfs_config.size as u64,
649             ..Default::default()
650         });
651 
652         start_info.0.nr_modules += 1;
653         start_info.0.modlist_paddr = layout::MODLIST_START.raw_value();
654 
655         // Write the modlist struct to guest memory.
656         guest_mem
657             .write_obj(ramdisk_mod, layout::MODLIST_START)
658             .map_err(super::Error::ModlistSetup)?;
659     }
660 
661     // Vector to hold the memory maps which needs to be written to guest memory
662     // at MEMMAP_START after all of the mappings are recorded.
663     let mut memmap: Vec<hvm_memmap_table_entry> = Vec::new();
664 
665     // Create the memory map entries.
666     add_memmap_entry(&mut memmap, 0, layout::EBDA_START.raw_value(), E820_RAM);
667 
668     let mem_end = guest_mem.last_addr();
669 
670     if mem_end < layout::MEM_32BIT_RESERVED_START {
671         add_memmap_entry(
672             &mut memmap,
673             layout::HIGH_RAM_START.raw_value(),
674             mem_end.unchecked_offset_from(layout::HIGH_RAM_START) + 1,
675             E820_RAM,
676         );
677     } else {
678         add_memmap_entry(
679             &mut memmap,
680             layout::HIGH_RAM_START.raw_value(),
681             layout::MEM_32BIT_RESERVED_START.unchecked_offset_from(layout::HIGH_RAM_START),
682             E820_RAM,
683         );
684         if mem_end > layout::RAM_64BIT_START {
685             add_memmap_entry(
686                 &mut memmap,
687                 layout::RAM_64BIT_START.raw_value(),
688                 mem_end.unchecked_offset_from(layout::RAM_64BIT_START) + 1,
689                 E820_RAM,
690             );
691         }
692     }
693 
694     add_memmap_entry(
695         &mut memmap,
696         layout::PCI_MMCONFIG_START.0,
697         layout::PCI_MMCONFIG_SIZE,
698         E820_RESERVED,
699     );
700 
701     if let Some(sgx_epc_region) = sgx_epc_region {
702         add_memmap_entry(
703             &mut memmap,
704             sgx_epc_region.start().raw_value(),
705             sgx_epc_region.size() as u64,
706             E820_RESERVED,
707         );
708     }
709 
710     start_info.0.memmap_entries = memmap.len() as u32;
711 
712     // Copy the vector with the memmap table to the MEMMAP_START address
713     // which is already saved in the memmap_paddr field of hvm_start_info struct.
714     let mut memmap_start_addr = layout::MEMMAP_START;
715 
716     guest_mem
717         .checked_offset(
718             memmap_start_addr,
719             mem::size_of::<hvm_memmap_table_entry>() * start_info.0.memmap_entries as usize,
720         )
721         .ok_or(super::Error::MemmapTablePastRamEnd)?;
722 
723     // For every entry in the memmap vector, create a MemmapTableEntryWrapper
724     // and write it to guest memory.
725     for memmap_entry in memmap {
726         let map_entry_wrapper: MemmapTableEntryWrapper = MemmapTableEntryWrapper(memmap_entry);
727 
728         guest_mem
729             .write_obj(map_entry_wrapper, memmap_start_addr)
730             .map_err(|_| super::Error::MemmapTableSetup)?;
731         memmap_start_addr =
732             memmap_start_addr.unchecked_add(mem::size_of::<hvm_memmap_table_entry>() as u64);
733     }
734 
735     // The hvm_start_info struct itself must be stored at PVH_START_INFO
736     // address, and %rbx will be initialized to contain PVH_INFO_START prior to
737     // starting the guest, as required by the PVH ABI.
738     let start_info_addr = layout::PVH_INFO_START;
739 
740     guest_mem
741         .checked_offset(start_info_addr, mem::size_of::<hvm_start_info>())
742         .ok_or(super::Error::StartInfoPastRamEnd)?;
743 
744     // Write the start_info struct to guest memory.
745     guest_mem
746         .write_obj(start_info, start_info_addr)
747         .map_err(|_| super::Error::StartInfoSetup)?;
748 
749     Ok(())
750 }
751 
752 fn add_memmap_entry(memmap: &mut Vec<hvm_memmap_table_entry>, addr: u64, size: u64, mem_type: u32) {
753     // Add the table entry to the vector
754     memmap.push(hvm_memmap_table_entry {
755         addr,
756         size,
757         type_: mem_type,
758         reserved: 0,
759     });
760 }
761 
762 /// Returns the memory address where the initramfs could be loaded.
763 pub fn initramfs_load_addr(
764     guest_mem: &GuestMemoryMmap,
765     initramfs_size: usize,
766 ) -> super::Result<u64> {
767     let first_region = guest_mem
768         .find_region(GuestAddress::new(0))
769         .ok_or(super::Error::InitramfsAddress)?;
770     // It's safe to cast to usize because the size of a region can't be greater than usize.
771     let lowmem_size = first_region.len() as usize;
772 
773     if lowmem_size < initramfs_size {
774         return Err(super::Error::InitramfsAddress);
775     }
776 
777     let aligned_addr: u64 = ((lowmem_size - initramfs_size) & !(crate::pagesize() - 1)) as u64;
778     Ok(aligned_addr)
779 }
780 
781 pub fn get_host_cpu_phys_bits() -> u8 {
782     unsafe {
783         let leaf = x86_64::__cpuid(0x8000_0000);
784 
785         // Detect and handle AMD SME (Secure Memory Encryption) properly.
786         // Some physical address bits may become reserved when the feature is enabled.
787         // See AMD64 Architecture Programmer's Manual Volume 2, Section 7.10.1
788         let reduced = if leaf.eax >= 0x8000_001f
789             && leaf.ebx == 0x6874_7541    // Vendor ID: AuthenticAMD
790             && leaf.ecx == 0x444d_4163
791             && leaf.edx == 0x6974_6e65
792             && x86_64::__cpuid(0x8000_001f).eax & 0x1 != 0
793         {
794             (x86_64::__cpuid(0x8000_001f).ebx >> 6) & 0x3f
795         } else {
796             0
797         };
798 
799         if leaf.eax >= 0x8000_0008 {
800             let leaf = x86_64::__cpuid(0x8000_0008);
801             ((leaf.eax & 0xff) - reduced) as u8
802         } else {
803             36
804         }
805     }
806 }
807 
808 fn update_cpuid_topology(
809     cpuid: &mut CpuId,
810     threads_per_core: u8,
811     cores_per_die: u8,
812     dies_per_package: u8,
813 ) {
814     let thread_width = 8 - (threads_per_core - 1).leading_zeros();
815     let core_width = (8 - (cores_per_die - 1).leading_zeros()) + thread_width;
816     let die_width = (8 - (dies_per_package - 1).leading_zeros()) + core_width;
817 
818     // CPU Topology leaf 0xb
819     CpuidPatch::set_cpuid_reg(cpuid, 0xb, Some(0), CpuidReg::EAX, thread_width);
820     CpuidPatch::set_cpuid_reg(
821         cpuid,
822         0xb,
823         Some(0),
824         CpuidReg::EBX,
825         u32::from(threads_per_core),
826     );
827     CpuidPatch::set_cpuid_reg(cpuid, 0xb, Some(0), CpuidReg::ECX, 1 << 8);
828 
829     CpuidPatch::set_cpuid_reg(cpuid, 0xb, Some(1), CpuidReg::EAX, die_width);
830     CpuidPatch::set_cpuid_reg(
831         cpuid,
832         0xb,
833         Some(1),
834         CpuidReg::EBX,
835         u32::from(dies_per_package * cores_per_die * threads_per_core),
836     );
837     CpuidPatch::set_cpuid_reg(cpuid, 0xb, Some(1), CpuidReg::ECX, 2 << 8);
838 
839     // CPU Topology leaf 0x1f
840     CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(0), CpuidReg::EAX, thread_width);
841     CpuidPatch::set_cpuid_reg(
842         cpuid,
843         0x1f,
844         Some(0),
845         CpuidReg::EBX,
846         u32::from(threads_per_core),
847     );
848     CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(0), CpuidReg::ECX, 1 << 8);
849 
850     CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(1), CpuidReg::EAX, core_width);
851     CpuidPatch::set_cpuid_reg(
852         cpuid,
853         0x1f,
854         Some(1),
855         CpuidReg::EBX,
856         u32::from(cores_per_die * threads_per_core),
857     );
858     CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(1), CpuidReg::ECX, 2 << 8);
859 
860     CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(2), CpuidReg::EAX, die_width);
861     CpuidPatch::set_cpuid_reg(
862         cpuid,
863         0x1f,
864         Some(2),
865         CpuidReg::EBX,
866         u32::from(dies_per_package * cores_per_die * threads_per_core),
867     );
868     CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(2), CpuidReg::ECX, 5 << 8);
869 }
870 
871 // The goal is to update the CPUID sub-leaves to reflect the number of EPC
872 // sections exposed to the guest.
873 fn update_cpuid_sgx(cpuid: &mut CpuId, epc_sections: Vec<SgxEpcSection>) -> Result<(), Error> {
874     // Something's wrong if there's no EPC section.
875     if epc_sections.is_empty() {
876         return Err(Error::NoSgxEpcSection);
877     }
878     // We can't go further if the hypervisor does not support SGX feature.
879     if !CpuidPatch::is_feature_enabled(cpuid, 0x7, 0, CpuidReg::EBX, 2) {
880         return Err(Error::MissingSgxFeature);
881     }
882     // We can't go further if the hypervisor does not support SGX_LC feature.
883     if !CpuidPatch::is_feature_enabled(cpuid, 0x7, 0, CpuidReg::ECX, 30) {
884         return Err(Error::MissingSgxLaunchControlFeature);
885     }
886 
887     // Get host CPUID for leaf 0x12, subleaf 0x2. This is to retrieve EPC
888     // properties such as confidentiality and integrity.
889     let leaf = unsafe { std::arch::x86_64::__cpuid_count(0x12, 0x2) };
890 
891     for (i, epc_section) in epc_sections.iter().enumerate() {
892         let subleaf_idx = i + 2;
893         let start = epc_section.start().raw_value();
894         let size = epc_section.size() as u64;
895         let eax = (start & 0xffff_f000) as u32 | 0x1;
896         let ebx = (start >> 32) as u32;
897         let ecx = (size & 0xffff_f000) as u32 | (leaf.ecx & 0xf);
898         let edx = (size >> 32) as u32;
899         // CPU Topology leaf 0x12
900         CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EAX, eax);
901         CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EBX, ebx);
902         CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::ECX, ecx);
903         CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EDX, edx);
904     }
905 
906     // Add one NULL entry to terminate the dynamic list
907     let subleaf_idx = epc_sections.len() + 2;
908     // CPU Topology leaf 0x12
909     CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EAX, 0);
910     CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EBX, 0);
911     CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::ECX, 0);
912     CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EDX, 0);
913 
914     Ok(())
915 }
916 
917 #[cfg(test)]
918 mod tests {
919     use super::*;
920 
921     #[test]
922     fn regions_lt_4gb() {
923         let regions = arch_memory_regions(1 << 29);
924         assert_eq!(3, regions.len());
925         assert_eq!(GuestAddress(0), regions[0].0);
926         assert_eq!(1usize << 29, regions[0].1);
927     }
928 
929     #[test]
930     fn regions_gt_4gb() {
931         let regions = arch_memory_regions((1 << 32) + 0x8000);
932         assert_eq!(4, regions.len());
933         assert_eq!(GuestAddress(0), regions[0].0);
934         assert_eq!(GuestAddress(1 << 32), regions[1].0);
935     }
936 
937     #[test]
938     fn test_system_configuration() {
939         let no_vcpus = 4;
940         let gm = GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap();
941         let config_err = configure_system(
942             &gm,
943             GuestAddress(0),
944             &None,
945             1,
946             Some(layout::RSDP_POINTER),
947             None,
948         );
949         assert!(config_err.is_err());
950 
951         // Now assigning some memory that falls before the 32bit memory hole.
952         let mem_size = 128 << 20;
953         let arch_mem_regions = arch_memory_regions(mem_size);
954         let ram_regions: Vec<(GuestAddress, usize)> = arch_mem_regions
955             .iter()
956             .filter(|r| r.2 == RegionType::Ram)
957             .map(|r| (r.0, r.1))
958             .collect();
959         let gm = GuestMemoryMmap::from_ranges(&ram_regions).unwrap();
960 
961         configure_system(&gm, GuestAddress(0), &None, no_vcpus, None, None).unwrap();
962 
963         // Now assigning some memory that is equal to the start of the 32bit memory hole.
964         let mem_size = 3328 << 20;
965         let arch_mem_regions = arch_memory_regions(mem_size);
966         let ram_regions: Vec<(GuestAddress, usize)> = arch_mem_regions
967             .iter()
968             .filter(|r| r.2 == RegionType::Ram)
969             .map(|r| (r.0, r.1))
970             .collect();
971         let gm = GuestMemoryMmap::from_ranges(&ram_regions).unwrap();
972         configure_system(&gm, GuestAddress(0), &None, no_vcpus, None, None).unwrap();
973 
974         configure_system(&gm, GuestAddress(0), &None, no_vcpus, None, None).unwrap();
975 
976         // Now assigning some memory that falls after the 32bit memory hole.
977         let mem_size = 3330 << 20;
978         let arch_mem_regions = arch_memory_regions(mem_size);
979         let ram_regions: Vec<(GuestAddress, usize)> = arch_mem_regions
980             .iter()
981             .filter(|r| r.2 == RegionType::Ram)
982             .map(|r| (r.0, r.1))
983             .collect();
984         let gm = GuestMemoryMmap::from_ranges(&ram_regions).unwrap();
985         configure_system(&gm, GuestAddress(0), &None, no_vcpus, None, None).unwrap();
986 
987         configure_system(&gm, GuestAddress(0), &None, no_vcpus, None, None).unwrap();
988     }
989 
990     #[test]
991     fn test_add_memmap_entry() {
992         let mut memmap: Vec<hvm_memmap_table_entry> = Vec::new();
993 
994         let expected_memmap = vec![
995             hvm_memmap_table_entry {
996                 addr: 0x0,
997                 size: 0x1000,
998                 type_: E820_RAM,
999                 ..Default::default()
1000             },
1001             hvm_memmap_table_entry {
1002                 addr: 0x10000,
1003                 size: 0xa000,
1004                 type_: E820_RESERVED,
1005                 ..Default::default()
1006             },
1007         ];
1008 
1009         add_memmap_entry(&mut memmap, 0, 0x1000, E820_RAM);
1010         add_memmap_entry(&mut memmap, 0x10000, 0xa000, E820_RESERVED);
1011 
1012         assert_eq!(format!("{:?}", memmap), format!("{:?}", expected_memmap));
1013     }
1014 }
1015