xref: /cloud-hypervisor/arch/src/x86_64/mod.rs (revision 274f1aa2e738d579ffff9d4cfd7ed7c45293af31)
1 // Copyright © 2020, Oracle and/or its affiliates.
2 //
3 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
4 // SPDX-License-Identifier: Apache-2.0
5 //
6 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE-BSD-3-Clause file.
9 use std::sync::Arc;
10 pub mod interrupts;
11 pub mod layout;
12 mod mpspec;
13 mod mptable;
14 pub mod regs;
15 use crate::GuestMemoryMmap;
16 use crate::InitramfsConfig;
17 use crate::RegionType;
18 use hypervisor::arch::x86::{CpuIdEntry, CPUID_FLAG_VALID_INDEX};
19 use hypervisor::{CpuVendor, HypervisorCpuError, HypervisorError};
20 use linux_loader::loader::bootparam::boot_params;
21 use linux_loader::loader::elf::start_info::{
22     hvm_memmap_table_entry, hvm_modlist_entry, hvm_start_info,
23 };
24 use std::collections::BTreeMap;
25 use std::mem;
26 use vm_memory::{
27     Address, ByteValued, Bytes, GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryAtomic,
28     GuestMemoryRegion, GuestUsize,
29 };
30 mod smbios;
31 use std::arch::x86_64;
32 #[cfg(feature = "tdx")]
33 pub mod tdx;
34 
35 // CPUID feature bits
36 const TSC_DEADLINE_TIMER_ECX_BIT: u8 = 24; // tsc deadline timer ecx bit.
37 const HYPERVISOR_ECX_BIT: u8 = 31; // Hypervisor ecx bit.
38 const MTRR_EDX_BIT: u8 = 12; // Hypervisor ecx bit.
39 const INVARIANT_TSC_EDX_BIT: u8 = 8; // Invariant TSC bit on 0x8000_0007 EDX
40 const AMX_BF16: u8 = 22; // AMX tile computation on bfloat16 numbers
41 const AMX_TILE: u8 = 24; // AMX tile load/store instructions
42 const AMX_INT8: u8 = 25; // AMX tile computation on 8-bit integers
43 
44 // KVM feature bits
45 #[cfg(feature = "tdx")]
46 const KVM_FEATURE_CLOCKSOURCE_BIT: u8 = 0;
47 #[cfg(feature = "tdx")]
48 const KVM_FEATURE_CLOCKSOURCE2_BIT: u8 = 3;
49 #[cfg(feature = "tdx")]
50 const KVM_FEATURE_CLOCKSOURCE_STABLE_BIT: u8 = 24;
51 #[cfg(feature = "tdx")]
52 const KVM_FEATURE_ASYNC_PF_BIT: u8 = 4;
53 #[cfg(feature = "tdx")]
54 const KVM_FEATURE_ASYNC_PF_VMEXIT_BIT: u8 = 10;
55 #[cfg(feature = "tdx")]
56 const KVM_FEATURE_STEAL_TIME_BIT: u8 = 5;
57 
58 pub const _NSIG: i32 = 65;
59 
60 #[derive(Debug, Copy, Clone)]
61 /// Specifies the entry point address where the guest must start
62 /// executing code, as well as which of the supported boot protocols
63 /// is to be used to configure the guest initial state.
64 pub struct EntryPoint {
65     /// Address in guest memory where the guest must start execution
66     pub entry_addr: GuestAddress,
67 }
68 
69 const E820_RAM: u32 = 1;
70 const E820_RESERVED: u32 = 2;
71 
72 #[derive(Clone)]
73 pub struct SgxEpcSection {
74     start: GuestAddress,
75     size: GuestUsize,
76 }
77 
78 impl SgxEpcSection {
79     pub fn new(start: GuestAddress, size: GuestUsize) -> Self {
80         SgxEpcSection { start, size }
81     }
82     pub fn start(&self) -> GuestAddress {
83         self.start
84     }
85     pub fn size(&self) -> GuestUsize {
86         self.size
87     }
88 }
89 
90 #[derive(Clone)]
91 pub struct SgxEpcRegion {
92     start: GuestAddress,
93     size: GuestUsize,
94     epc_sections: BTreeMap<String, SgxEpcSection>,
95 }
96 
97 impl SgxEpcRegion {
98     pub fn new(start: GuestAddress, size: GuestUsize) -> Self {
99         SgxEpcRegion {
100             start,
101             size,
102             epc_sections: BTreeMap::new(),
103         }
104     }
105     pub fn start(&self) -> GuestAddress {
106         self.start
107     }
108     pub fn size(&self) -> GuestUsize {
109         self.size
110     }
111     pub fn epc_sections(&self) -> &BTreeMap<String, SgxEpcSection> {
112         &self.epc_sections
113     }
114     pub fn insert(&mut self, id: String, epc_section: SgxEpcSection) {
115         self.epc_sections.insert(id, epc_section);
116     }
117 }
118 
119 // This is a workaround to the Rust enforcement specifying that any implementation of a foreign
120 // trait (in this case `DataInit`) where:
121 // *    the type that is implementing the trait is foreign or
122 // *    all of the parameters being passed to the trait (if there are any) are also foreign
123 // is prohibited.
124 #[derive(Copy, Clone, Default)]
125 struct StartInfoWrapper(hvm_start_info);
126 
127 #[derive(Copy, Clone, Default)]
128 struct MemmapTableEntryWrapper(hvm_memmap_table_entry);
129 
130 #[derive(Copy, Clone, Default)]
131 struct ModlistEntryWrapper(hvm_modlist_entry);
132 
133 // SAFETY: data structure only contain a series of integers
134 unsafe impl ByteValued for StartInfoWrapper {}
135 // SAFETY: data structure only contain a series of integers
136 unsafe impl ByteValued for MemmapTableEntryWrapper {}
137 // SAFETY: data structure only contain a series of integers
138 unsafe impl ByteValued for ModlistEntryWrapper {}
139 
140 // This is a workaround to the Rust enforcement specifying that any implementation of a foreign
141 // trait (in this case `DataInit`) where:
142 // *    the type that is implementing the trait is foreign or
143 // *    all of the parameters being passed to the trait (if there are any) are also foreign
144 // is prohibited.
145 #[derive(Copy, Clone, Default)]
146 struct BootParamsWrapper(boot_params);
147 
148 // SAFETY: BootParamsWrap is a wrapper over `boot_params` (a series of ints).
149 unsafe impl ByteValued for BootParamsWrapper {}
150 
151 pub struct CpuidConfig {
152     pub sgx_epc_sections: Option<Vec<SgxEpcSection>>,
153     pub phys_bits: u8,
154     pub kvm_hyperv: bool,
155     #[cfg(feature = "tdx")]
156     pub tdx: bool,
157     pub amx: bool,
158 }
159 
160 #[derive(Debug)]
161 pub enum Error {
162     /// Error writing MP table to memory.
163     MpTableSetup(mptable::Error),
164 
165     /// Error configuring the general purpose registers
166     RegsConfiguration(regs::Error),
167 
168     /// Error configuring the special registers
169     SregsConfiguration(regs::Error),
170 
171     /// Error configuring the floating point related registers
172     FpuConfiguration(regs::Error),
173 
174     /// Error configuring the MSR registers
175     MsrsConfiguration(regs::Error),
176 
177     /// Failed to set supported CPUs.
178     SetSupportedCpusFailed(anyhow::Error),
179 
180     /// Cannot set the local interruption due to bad configuration.
181     LocalIntConfiguration(anyhow::Error),
182 
183     /// Error setting up SMBIOS table
184     SmbiosSetup(smbios::Error),
185 
186     /// Could not find any SGX EPC section
187     NoSgxEpcSection,
188 
189     /// Missing SGX CPU feature
190     MissingSgxFeature,
191 
192     /// Missing SGX_LC CPU feature
193     MissingSgxLaunchControlFeature,
194 
195     /// Error getting supported CPUID through the hypervisor (kvm/mshv) API
196     CpuidGetSupported(HypervisorError),
197 
198     /// Error populating CPUID with KVM HyperV emulation details
199     CpuidKvmHyperV(vmm_sys_util::fam::Error),
200 
201     /// Error populating CPUID with CPU identification
202     CpuidIdentification(vmm_sys_util::fam::Error),
203 
204     /// Error checking CPUID compatibility
205     CpuidCheckCompatibility,
206 
207     // Error writing EBDA address
208     EbdaSetup(vm_memory::GuestMemoryError),
209 
210     // Error getting CPU TSC frequency
211     GetTscFrequency(HypervisorCpuError),
212 
213     /// Error retrieving TDX capabilities through the hypervisor (kvm/mshv) API
214     #[cfg(feature = "tdx")]
215     TdxCapabilities(HypervisorError),
216 }
217 
218 impl From<Error> for super::Error {
219     fn from(e: Error) -> super::Error {
220         super::Error::PlatformSpecific(e)
221     }
222 }
223 
224 #[derive(Copy, Clone, Debug)]
225 pub enum CpuidReg {
226     EAX,
227     EBX,
228     ECX,
229     EDX,
230 }
231 
232 pub struct CpuidPatch {
233     pub function: u32,
234     pub index: u32,
235     pub flags_bit: Option<u8>,
236     pub eax_bit: Option<u8>,
237     pub ebx_bit: Option<u8>,
238     pub ecx_bit: Option<u8>,
239     pub edx_bit: Option<u8>,
240 }
241 
242 impl CpuidPatch {
243     pub fn set_cpuid_reg(
244         cpuid: &mut Vec<CpuIdEntry>,
245         function: u32,
246         index: Option<u32>,
247         reg: CpuidReg,
248         value: u32,
249     ) {
250         let mut entry_found = false;
251         for entry in cpuid.iter_mut() {
252             if entry.function == function && (index.is_none() || index.unwrap() == entry.index) {
253                 entry_found = true;
254                 match reg {
255                     CpuidReg::EAX => {
256                         entry.eax = value;
257                     }
258                     CpuidReg::EBX => {
259                         entry.ebx = value;
260                     }
261                     CpuidReg::ECX => {
262                         entry.ecx = value;
263                     }
264                     CpuidReg::EDX => {
265                         entry.edx = value;
266                     }
267                 }
268             }
269         }
270 
271         if entry_found {
272             return;
273         }
274 
275         // Entry not found, so let's add it.
276         if let Some(index) = index {
277             let mut entry = CpuIdEntry {
278                 function,
279                 index,
280                 flags: CPUID_FLAG_VALID_INDEX,
281                 ..Default::default()
282             };
283             match reg {
284                 CpuidReg::EAX => {
285                     entry.eax = value;
286                 }
287                 CpuidReg::EBX => {
288                     entry.ebx = value;
289                 }
290                 CpuidReg::ECX => {
291                     entry.ecx = value;
292                 }
293                 CpuidReg::EDX => {
294                     entry.edx = value;
295                 }
296             }
297 
298             cpuid.push(entry);
299         }
300     }
301 
302     pub fn patch_cpuid(cpuid: &mut [CpuIdEntry], patches: Vec<CpuidPatch>) {
303         for entry in cpuid {
304             for patch in patches.iter() {
305                 if entry.function == patch.function && entry.index == patch.index {
306                     if let Some(flags_bit) = patch.flags_bit {
307                         entry.flags |= 1 << flags_bit;
308                     }
309                     if let Some(eax_bit) = patch.eax_bit {
310                         entry.eax |= 1 << eax_bit;
311                     }
312                     if let Some(ebx_bit) = patch.ebx_bit {
313                         entry.ebx |= 1 << ebx_bit;
314                     }
315                     if let Some(ecx_bit) = patch.ecx_bit {
316                         entry.ecx |= 1 << ecx_bit;
317                     }
318                     if let Some(edx_bit) = patch.edx_bit {
319                         entry.edx |= 1 << edx_bit;
320                     }
321                 }
322             }
323         }
324     }
325 
326     pub fn is_feature_enabled(
327         cpuid: &[CpuIdEntry],
328         function: u32,
329         index: u32,
330         reg: CpuidReg,
331         feature_bit: usize,
332     ) -> bool {
333         let mask = 1 << feature_bit;
334 
335         for entry in cpuid {
336             if entry.function == function && entry.index == index {
337                 let reg_val = match reg {
338                     CpuidReg::EAX => entry.eax,
339                     CpuidReg::EBX => entry.ebx,
340                     CpuidReg::ECX => entry.ecx,
341                     CpuidReg::EDX => entry.edx,
342                 };
343 
344                 return (reg_val & mask) == mask;
345             }
346         }
347 
348         false
349     }
350 }
351 
352 #[derive(Debug)]
353 enum CpuidCompatibleCheck {
354     BitwiseSubset, // bitwise subset
355     Equal,         // equal in value
356     NumNotGreater, // smaller or equal as a number
357 }
358 
359 pub struct CpuidFeatureEntry {
360     function: u32,
361     index: u32,
362     feature_reg: CpuidReg,
363     compatible_check: CpuidCompatibleCheck,
364 }
365 
366 impl CpuidFeatureEntry {
367     fn checked_feature_entry_list() -> Vec<CpuidFeatureEntry> {
368         vec![
369             // The following list includes all hardware features bits from
370             // the CPUID Wiki Page: https://en.wikipedia.org/wiki/CPUID
371             // Leaf 0x1, ECX/EDX, feature bits
372             CpuidFeatureEntry {
373                 function: 1,
374                 index: 0,
375                 feature_reg: CpuidReg::ECX,
376                 compatible_check: CpuidCompatibleCheck::BitwiseSubset,
377             },
378             CpuidFeatureEntry {
379                 function: 1,
380                 index: 0,
381                 feature_reg: CpuidReg::EDX,
382                 compatible_check: CpuidCompatibleCheck::BitwiseSubset,
383             },
384             // Leaf 0x7, EAX/EBX/ECX/EDX, extended features
385             CpuidFeatureEntry {
386                 function: 7,
387                 index: 0,
388                 feature_reg: CpuidReg::EAX,
389                 compatible_check: CpuidCompatibleCheck::NumNotGreater,
390             },
391             CpuidFeatureEntry {
392                 function: 7,
393                 index: 0,
394                 feature_reg: CpuidReg::EBX,
395                 compatible_check: CpuidCompatibleCheck::BitwiseSubset,
396             },
397             CpuidFeatureEntry {
398                 function: 7,
399                 index: 0,
400                 feature_reg: CpuidReg::ECX,
401                 compatible_check: CpuidCompatibleCheck::BitwiseSubset,
402             },
403             CpuidFeatureEntry {
404                 function: 7,
405                 index: 0,
406                 feature_reg: CpuidReg::EDX,
407                 compatible_check: CpuidCompatibleCheck::BitwiseSubset,
408             },
409             // Leaf 0x7 subleaf 0x1, EAX, extended features
410             CpuidFeatureEntry {
411                 function: 7,
412                 index: 1,
413                 feature_reg: CpuidReg::EAX,
414                 compatible_check: CpuidCompatibleCheck::BitwiseSubset,
415             },
416             // Leaf 0x8000_0001, ECX/EDX, CPUID features bits
417             CpuidFeatureEntry {
418                 function: 0x8000_0001,
419                 index: 0,
420                 feature_reg: CpuidReg::ECX,
421                 compatible_check: CpuidCompatibleCheck::BitwiseSubset,
422             },
423             CpuidFeatureEntry {
424                 function: 0x8000_0001,
425                 index: 0,
426                 feature_reg: CpuidReg::EDX,
427                 compatible_check: CpuidCompatibleCheck::BitwiseSubset,
428             },
429             // KVM CPUID bits: https://www.kernel.org/doc/html/latest/virt/kvm/cpuid.html
430             // Leaf 0x4000_0000, EAX/EBX/ECX/EDX, KVM CPUID SIGNATURE
431             CpuidFeatureEntry {
432                 function: 0x4000_0000,
433                 index: 0,
434                 feature_reg: CpuidReg::EAX,
435                 compatible_check: CpuidCompatibleCheck::NumNotGreater,
436             },
437             CpuidFeatureEntry {
438                 function: 0x4000_0000,
439                 index: 0,
440                 feature_reg: CpuidReg::EBX,
441                 compatible_check: CpuidCompatibleCheck::Equal,
442             },
443             CpuidFeatureEntry {
444                 function: 0x4000_0000,
445                 index: 0,
446                 feature_reg: CpuidReg::ECX,
447                 compatible_check: CpuidCompatibleCheck::Equal,
448             },
449             CpuidFeatureEntry {
450                 function: 0x4000_0000,
451                 index: 0,
452                 feature_reg: CpuidReg::EDX,
453                 compatible_check: CpuidCompatibleCheck::Equal,
454             },
455             // Leaf 0x4000_0001, EAX/EBX/ECX/EDX, KVM CPUID features
456             CpuidFeatureEntry {
457                 function: 0x4000_0001,
458                 index: 0,
459                 feature_reg: CpuidReg::EAX,
460                 compatible_check: CpuidCompatibleCheck::BitwiseSubset,
461             },
462             CpuidFeatureEntry {
463                 function: 0x4000_0001,
464                 index: 0,
465                 feature_reg: CpuidReg::EBX,
466                 compatible_check: CpuidCompatibleCheck::BitwiseSubset,
467             },
468             CpuidFeatureEntry {
469                 function: 0x4000_0001,
470                 index: 0,
471                 feature_reg: CpuidReg::ECX,
472                 compatible_check: CpuidCompatibleCheck::BitwiseSubset,
473             },
474             CpuidFeatureEntry {
475                 function: 0x4000_0001,
476                 index: 0,
477                 feature_reg: CpuidReg::EDX,
478                 compatible_check: CpuidCompatibleCheck::BitwiseSubset,
479             },
480         ]
481     }
482 
483     fn get_features_from_cpuid(
484         cpuid: &[CpuIdEntry],
485         feature_entry_list: &[CpuidFeatureEntry],
486     ) -> Vec<u32> {
487         let mut features = vec![0; feature_entry_list.len()];
488         for (i, feature_entry) in feature_entry_list.iter().enumerate() {
489             for cpuid_entry in cpuid {
490                 if cpuid_entry.function == feature_entry.function
491                     && cpuid_entry.index == feature_entry.index
492                 {
493                     match feature_entry.feature_reg {
494                         CpuidReg::EAX => {
495                             features[i] = cpuid_entry.eax;
496                         }
497                         CpuidReg::EBX => {
498                             features[i] = cpuid_entry.ebx;
499                         }
500                         CpuidReg::ECX => {
501                             features[i] = cpuid_entry.ecx;
502                         }
503                         CpuidReg::EDX => {
504                             features[i] = cpuid_entry.edx;
505                         }
506                     }
507 
508                     break;
509                 }
510             }
511         }
512 
513         features
514     }
515 
516     // The function returns `Error` (a.k.a. "incompatible"), when the CPUID features from `src_vm_cpuid`
517     // is not a subset of those of the `dest_vm_cpuid`.
518     pub fn check_cpuid_compatibility(
519         src_vm_cpuid: &[CpuIdEntry],
520         dest_vm_cpuid: &[CpuIdEntry],
521     ) -> Result<(), Error> {
522         let feature_entry_list = &Self::checked_feature_entry_list();
523         let src_vm_features = Self::get_features_from_cpuid(src_vm_cpuid, feature_entry_list);
524         let dest_vm_features = Self::get_features_from_cpuid(dest_vm_cpuid, feature_entry_list);
525 
526         // Loop on feature bit and check if the 'source vm' feature is a subset
527         // of those of the 'destination vm' feature
528         let mut compatible = true;
529         for (i, (src_vm_feature, dest_vm_feature)) in src_vm_features
530             .iter()
531             .zip(dest_vm_features.iter())
532             .enumerate()
533         {
534             let entry = &feature_entry_list[i];
535             let entry_compatible = match entry.compatible_check {
536                 CpuidCompatibleCheck::BitwiseSubset => {
537                     let different_feature_bits = src_vm_feature ^ dest_vm_feature;
538                     let src_vm_feature_bits_only = different_feature_bits & src_vm_feature;
539                     src_vm_feature_bits_only == 0
540                 }
541                 CpuidCompatibleCheck::Equal => src_vm_feature == dest_vm_feature,
542                 CpuidCompatibleCheck::NumNotGreater => src_vm_feature <= dest_vm_feature,
543             };
544             if !entry_compatible {
545                 error!(
546                     "Detected incompatible CPUID entry: leaf={:#02x} (subleaf={:#02x}), register='{:?}', \
547                     compatilbe_check='{:?}', source VM feature='{:#04x}', destination VM feature'{:#04x}'.",
548                     entry.function, entry.index, entry.feature_reg,
549                     entry.compatible_check, src_vm_feature, dest_vm_feature
550                     );
551 
552                 compatible = false;
553             }
554         }
555 
556         if compatible {
557             info!("No CPU incompatibility detected.");
558             Ok(())
559         } else {
560             Err(Error::CpuidCheckCompatibility)
561         }
562     }
563 }
564 
565 pub fn generate_common_cpuid(
566     hypervisor: &Arc<dyn hypervisor::Hypervisor>,
567     config: &CpuidConfig,
568 ) -> super::Result<Vec<CpuIdEntry>> {
569     // SAFETY: cpuid called with valid leaves
570     if unsafe { x86_64::__cpuid(1) }.ecx & 1 << HYPERVISOR_ECX_BIT == 1 << HYPERVISOR_ECX_BIT {
571         // SAFETY: cpuid called with valid leaves
572         let hypervisor_cpuid = unsafe { x86_64::__cpuid(0x4000_0000) };
573 
574         let mut identifier: [u8; 12] = [0; 12];
575         identifier[0..4].copy_from_slice(&hypervisor_cpuid.ebx.to_le_bytes()[..]);
576         identifier[4..8].copy_from_slice(&hypervisor_cpuid.ecx.to_le_bytes()[..]);
577         identifier[8..12].copy_from_slice(&hypervisor_cpuid.edx.to_le_bytes()[..]);
578 
579         info!(
580             "Running under nested virtualisation. Hypervisor string: {}",
581             String::from_utf8_lossy(&identifier)
582         );
583     }
584 
585     info!(
586         "Generating guest CPUID for with physical address size: {}",
587         config.phys_bits
588     );
589     let cpuid_patches = vec![
590         // Patch tsc deadline timer bit
591         CpuidPatch {
592             function: 1,
593             index: 0,
594             flags_bit: None,
595             eax_bit: None,
596             ebx_bit: None,
597             ecx_bit: Some(TSC_DEADLINE_TIMER_ECX_BIT),
598             edx_bit: None,
599         },
600         // Patch hypervisor bit
601         CpuidPatch {
602             function: 1,
603             index: 0,
604             flags_bit: None,
605             eax_bit: None,
606             ebx_bit: None,
607             ecx_bit: Some(HYPERVISOR_ECX_BIT),
608             edx_bit: None,
609         },
610         // Enable MTRR feature
611         CpuidPatch {
612             function: 1,
613             index: 0,
614             flags_bit: None,
615             eax_bit: None,
616             ebx_bit: None,
617             ecx_bit: None,
618             edx_bit: Some(MTRR_EDX_BIT),
619         },
620     ];
621 
622     // Supported CPUID
623     let mut cpuid = hypervisor
624         .get_supported_cpuid()
625         .map_err(Error::CpuidGetSupported)?;
626 
627     CpuidPatch::patch_cpuid(&mut cpuid, cpuid_patches);
628 
629     if let Some(sgx_epc_sections) = &config.sgx_epc_sections {
630         update_cpuid_sgx(&mut cpuid, sgx_epc_sections)?;
631     }
632 
633     #[cfg(feature = "tdx")]
634     let tdx_capabilities = if config.tdx {
635         let caps = hypervisor
636             .tdx_capabilities()
637             .map_err(Error::TdxCapabilities)?;
638         info!("TDX capabilities {:#?}", caps);
639         Some(caps)
640     } else {
641         None
642     };
643 
644     // Update some existing CPUID
645     for entry in cpuid.as_mut_slice().iter_mut() {
646         match entry.function {
647             // Clear AMX related bits if the AMX feature is not enabled
648             0x7 => {
649                 if !config.amx && entry.index == 0 {
650                     entry.edx &= !(1 << AMX_BF16 | 1 << AMX_TILE | 1 << AMX_INT8)
651                 }
652             }
653             0xd =>
654             {
655                 #[cfg(feature = "tdx")]
656                 if let Some(caps) = &tdx_capabilities {
657                     let xcr0_mask: u64 = 0x82ff;
658                     let xss_mask: u64 = !xcr0_mask;
659                     if entry.index == 0 {
660                         entry.eax &= (caps.xfam_fixed0 as u32) & (xcr0_mask as u32);
661                         entry.eax |= (caps.xfam_fixed1 as u32) & (xcr0_mask as u32);
662                         entry.edx &= ((caps.xfam_fixed0 & xcr0_mask) >> 32) as u32;
663                         entry.edx |= ((caps.xfam_fixed1 & xcr0_mask) >> 32) as u32;
664                     } else if entry.index == 1 {
665                         entry.ecx &= (caps.xfam_fixed0 as u32) & (xss_mask as u32);
666                         entry.ecx |= (caps.xfam_fixed1 as u32) & (xss_mask as u32);
667                         entry.edx &= ((caps.xfam_fixed0 & xss_mask) >> 32) as u32;
668                         entry.edx |= ((caps.xfam_fixed1 & xss_mask) >> 32) as u32;
669                     }
670                 }
671             }
672             // Copy host L2 cache details if not populated by KVM
673             0x8000_0006 => {
674                 if entry.eax == 0 && entry.ebx == 0 && entry.ecx == 0 && entry.edx == 0 {
675                     // SAFETY: cpuid called with valid leaves
676                     if unsafe { std::arch::x86_64::__cpuid(0x8000_0000).eax } >= 0x8000_0006 {
677                         // SAFETY: cpuid called with valid leaves
678                         let leaf = unsafe { std::arch::x86_64::__cpuid(0x8000_0006) };
679                         entry.eax = leaf.eax;
680                         entry.ebx = leaf.ebx;
681                         entry.ecx = leaf.ecx;
682                         entry.edx = leaf.edx;
683                     }
684                 }
685             }
686             // Set CPU physical bits
687             0x8000_0008 => {
688                 entry.eax = (entry.eax & 0xffff_ff00) | (config.phys_bits as u32 & 0xff);
689             }
690             0x4000_0001 => {
691                 // These features are not supported by TDX
692                 #[cfg(feature = "tdx")]
693                 if config.tdx {
694                     entry.eax &= !(1 << KVM_FEATURE_CLOCKSOURCE_BIT
695                         | 1 << KVM_FEATURE_CLOCKSOURCE2_BIT
696                         | 1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT
697                         | 1 << KVM_FEATURE_ASYNC_PF_BIT
698                         | 1 << KVM_FEATURE_ASYNC_PF_VMEXIT_BIT
699                         | 1 << KVM_FEATURE_STEAL_TIME_BIT)
700                 }
701             }
702             _ => {}
703         }
704     }
705 
706     // Copy CPU identification string
707     for i in 0x8000_0002..=0x8000_0004 {
708         cpuid.retain(|c| c.function != i);
709         // SAFETY: call cpuid with valid leaves
710         let leaf = unsafe { std::arch::x86_64::__cpuid(i) };
711         cpuid.push(CpuIdEntry {
712             function: i,
713             eax: leaf.eax,
714             ebx: leaf.ebx,
715             ecx: leaf.ecx,
716             edx: leaf.edx,
717             ..Default::default()
718         });
719     }
720 
721     if config.kvm_hyperv {
722         // Remove conflicting entries
723         cpuid.retain(|c| c.function != 0x4000_0000);
724         cpuid.retain(|c| c.function != 0x4000_0001);
725         // See "Hypervisor Top Level Functional Specification" for details
726         // Compliance with "Hv#1" requires leaves up to 0x4000_000a
727         cpuid.push(CpuIdEntry {
728             function: 0x40000000,
729             eax: 0x4000000a, // Maximum cpuid leaf
730             ebx: 0x756e694c, // "Linu"
731             ecx: 0x564b2078, // "x KV"
732             edx: 0x7648204d, // "M Hv"
733             ..Default::default()
734         });
735         cpuid.push(CpuIdEntry {
736             function: 0x40000001,
737             eax: 0x31237648, // "Hv#1"
738             ..Default::default()
739         });
740         cpuid.push(CpuIdEntry {
741             function: 0x40000002,
742             eax: 0x3839,  // "Build number"
743             ebx: 0xa0000, // "Version"
744             ..Default::default()
745         });
746         cpuid.push(CpuIdEntry {
747             function: 0x4000_0003,
748             eax: 1 << 1 // AccessPartitionReferenceCounter
749                    | 1 << 2 // AccessSynicRegs
750                    | 1 << 3 // AccessSyntheticTimerRegs
751                    | 1 << 9, // AccessPartitionReferenceTsc
752             edx: 1 << 3, // CPU dynamic partitioning
753             ..Default::default()
754         });
755         cpuid.push(CpuIdEntry {
756             function: 0x4000_0004,
757             eax: 1 << 5, // Recommend relaxed timing
758             ..Default::default()
759         });
760         for i in 0x4000_0005..=0x4000_000a {
761             cpuid.push(CpuIdEntry {
762                 function: i,
763                 ..Default::default()
764             });
765         }
766     }
767 
768     Ok(cpuid)
769 }
770 
771 pub fn configure_vcpu(
772     vcpu: &Arc<dyn hypervisor::Vcpu>,
773     id: u8,
774     boot_setup: Option<(EntryPoint, &GuestMemoryAtomic<GuestMemoryMmap>)>,
775     cpuid: Vec<CpuIdEntry>,
776     kvm_hyperv: bool,
777     cpu_vendor: CpuVendor,
778     topology: Option<(u8, u8, u8)>,
779 ) -> super::Result<()> {
780     // Per vCPU CPUID changes; common are handled via generate_common_cpuid()
781     let mut cpuid = cpuid;
782     CpuidPatch::set_cpuid_reg(&mut cpuid, 0xb, None, CpuidReg::EDX, u32::from(id));
783     CpuidPatch::set_cpuid_reg(&mut cpuid, 0x1f, None, CpuidReg::EDX, u32::from(id));
784     if matches!(cpu_vendor, CpuVendor::AMD) {
785         CpuidPatch::set_cpuid_reg(
786             &mut cpuid,
787             0x8000_001e,
788             Some(0),
789             CpuidReg::EAX,
790             u32::from(id),
791         );
792     }
793 
794     if let Some(t) = topology {
795         update_cpuid_topology(&mut cpuid, t.0, t.1, t.2, cpu_vendor, id);
796     }
797 
798     // Set ApicId in cpuid for each vcpu
799     // SAFETY: get host cpuid when eax=1
800     let mut cpu_ebx = unsafe { core::arch::x86_64::__cpuid(1) }.ebx;
801     cpu_ebx &= 0xffffff;
802     cpu_ebx |= (id as u32) << 24;
803     CpuidPatch::set_cpuid_reg(&mut cpuid, 0x1, None, CpuidReg::EBX, cpu_ebx);
804 
805     // The TSC frequency CPUID leaf should not be included when running with HyperV emulation
806     if !kvm_hyperv {
807         if let Some(tsc_khz) = vcpu.tsc_khz().map_err(Error::GetTscFrequency)? {
808             // Need to check that the TSC doesn't vary with dynamic frequency
809             // SAFETY: cpuid called with valid leaves
810             if unsafe { std::arch::x86_64::__cpuid(0x8000_0007) }.edx
811                 & (1u32 << INVARIANT_TSC_EDX_BIT)
812                 > 0
813             {
814                 CpuidPatch::set_cpuid_reg(
815                     &mut cpuid,
816                     0x4000_0000,
817                     None,
818                     CpuidReg::EAX,
819                     0x4000_0010,
820                 );
821                 cpuid.retain(|c| c.function != 0x4000_0010);
822                 cpuid.push(CpuIdEntry {
823                     function: 0x4000_0010,
824                     eax: tsc_khz,
825                     ebx: 1000000, /* LAPIC resolution of 1ns (freq: 1GHz) is hardcoded in KVM's
826                                    * APIC_BUS_CYCLE_NS */
827                     ..Default::default()
828                 });
829             };
830         }
831     }
832 
833     vcpu.set_cpuid2(&cpuid)
834         .map_err(|e| Error::SetSupportedCpusFailed(e.into()))?;
835 
836     if kvm_hyperv {
837         vcpu.enable_hyperv_synic().unwrap();
838     }
839 
840     regs::setup_msrs(vcpu).map_err(Error::MsrsConfiguration)?;
841     if let Some((kernel_entry_point, guest_memory)) = boot_setup {
842         regs::setup_regs(vcpu, kernel_entry_point.entry_addr.raw_value())
843             .map_err(Error::RegsConfiguration)?;
844         regs::setup_fpu(vcpu).map_err(Error::FpuConfiguration)?;
845         regs::setup_sregs(&guest_memory.memory(), vcpu).map_err(Error::SregsConfiguration)?;
846     }
847     interrupts::set_lint(vcpu).map_err(|e| Error::LocalIntConfiguration(e.into()))?;
848     Ok(())
849 }
850 
851 /// Returns a Vec of the valid memory addresses.
852 /// These should be used to configure the GuestMemory structure for the platform.
853 /// For x86_64 all addresses are valid from the start of the kernel except a
854 /// carve out at the end of 32bit address space.
855 pub fn arch_memory_regions() -> Vec<(GuestAddress, usize, RegionType)> {
856     vec![
857         // 0 GiB ~ 3GiB: memory before the gap
858         (
859             GuestAddress(0),
860             layout::MEM_32BIT_RESERVED_START.raw_value() as usize,
861             RegionType::Ram,
862         ),
863         // 4 GiB ~ inf: memory after the gap
864         (layout::RAM_64BIT_START, usize::MAX, RegionType::Ram),
865         // 3 GiB ~ 3712 MiB: 32-bit device memory hole
866         (
867             layout::MEM_32BIT_RESERVED_START,
868             layout::MEM_32BIT_DEVICES_SIZE as usize,
869             RegionType::SubRegion,
870         ),
871         // 3712 MiB ~ 3968 MiB: 32-bit reserved memory hole
872         (
873             layout::MEM_32BIT_RESERVED_START.unchecked_add(layout::MEM_32BIT_DEVICES_SIZE),
874             (layout::MEM_32BIT_RESERVED_SIZE - layout::MEM_32BIT_DEVICES_SIZE) as usize,
875             RegionType::Reserved,
876         ),
877     ]
878 }
879 
880 /// Configures the system and should be called once per vm before starting vcpu threads.
881 ///
882 /// # Arguments
883 ///
884 /// * `guest_mem` - The memory to be used by the guest.
885 /// * `cmdline_addr` - Address in `guest_mem` where the kernel command line was loaded.
886 /// * `cmdline_size` - Size of the kernel command line in bytes including the null terminator.
887 /// * `num_cpus` - Number of virtual CPUs the guest will have.
888 #[allow(clippy::too_many_arguments)]
889 pub fn configure_system(
890     guest_mem: &GuestMemoryMmap,
891     cmdline_addr: GuestAddress,
892     initramfs: &Option<InitramfsConfig>,
893     _num_cpus: u8,
894     rsdp_addr: Option<GuestAddress>,
895     sgx_epc_region: Option<SgxEpcRegion>,
896     serial_number: Option<&str>,
897     uuid: Option<&str>,
898     oem_strings: Option<&[&str]>,
899 ) -> super::Result<()> {
900     // Write EBDA address to location where ACPICA expects to find it
901     guest_mem
902         .write_obj((layout::EBDA_START.0 >> 4) as u16, layout::EBDA_POINTER)
903         .map_err(Error::EbdaSetup)?;
904 
905     let size = smbios::setup_smbios(guest_mem, serial_number, uuid, oem_strings)
906         .map_err(Error::SmbiosSetup)?;
907 
908     // Place the MP table after the SMIOS table aligned to 16 bytes
909     let offset = GuestAddress(layout::SMBIOS_START).unchecked_add(size);
910     let offset = GuestAddress((offset.0 + 16) & !0xf);
911     mptable::setup_mptable(offset, guest_mem, _num_cpus).map_err(Error::MpTableSetup)?;
912 
913     // Check that the RAM is not smaller than the RSDP start address
914     if let Some(rsdp_addr) = rsdp_addr {
915         if rsdp_addr.0 > guest_mem.last_addr().0 {
916             return Err(super::Error::RsdpPastRamEnd);
917         }
918     }
919 
920     configure_pvh(
921         guest_mem,
922         cmdline_addr,
923         initramfs,
924         rsdp_addr,
925         sgx_epc_region,
926     )
927 }
928 
929 fn configure_pvh(
930     guest_mem: &GuestMemoryMmap,
931     cmdline_addr: GuestAddress,
932     initramfs: &Option<InitramfsConfig>,
933     rsdp_addr: Option<GuestAddress>,
934     sgx_epc_region: Option<SgxEpcRegion>,
935 ) -> super::Result<()> {
936     const XEN_HVM_START_MAGIC_VALUE: u32 = 0x336ec578;
937 
938     let mut start_info: StartInfoWrapper = StartInfoWrapper(hvm_start_info::default());
939 
940     start_info.0.magic = XEN_HVM_START_MAGIC_VALUE;
941     start_info.0.version = 1; // pvh has version 1
942     start_info.0.nr_modules = 0;
943     start_info.0.cmdline_paddr = cmdline_addr.raw_value();
944     start_info.0.memmap_paddr = layout::MEMMAP_START.raw_value();
945 
946     if let Some(rsdp_addr) = rsdp_addr {
947         start_info.0.rsdp_paddr = rsdp_addr.0;
948     }
949 
950     if let Some(initramfs_config) = initramfs {
951         // The initramfs has been written to guest memory already, here we just need to
952         // create the module structure that describes it.
953         let ramdisk_mod: ModlistEntryWrapper = ModlistEntryWrapper(hvm_modlist_entry {
954             paddr: initramfs_config.address.raw_value(),
955             size: initramfs_config.size as u64,
956             ..Default::default()
957         });
958 
959         start_info.0.nr_modules += 1;
960         start_info.0.modlist_paddr = layout::MODLIST_START.raw_value();
961 
962         // Write the modlist struct to guest memory.
963         guest_mem
964             .write_obj(ramdisk_mod, layout::MODLIST_START)
965             .map_err(super::Error::ModlistSetup)?;
966     }
967 
968     // Vector to hold the memory maps which needs to be written to guest memory
969     // at MEMMAP_START after all of the mappings are recorded.
970     let mut memmap: Vec<hvm_memmap_table_entry> = Vec::new();
971 
972     // Create the memory map entries.
973     add_memmap_entry(&mut memmap, 0, layout::EBDA_START.raw_value(), E820_RAM);
974 
975     // Merge continuous memory regions into one region.
976     // Note: memory regions from "GuestMemory" are sorted and non-zero sized.
977     let ram_regions = {
978         let mut ram_regions = Vec::new();
979         let mut current_start = guest_mem
980             .iter()
981             .next()
982             .map(GuestMemoryRegion::start_addr)
983             .expect("GuestMemory must have one memory region at least")
984             .raw_value();
985         let mut current_end = current_start;
986 
987         for (start, size) in guest_mem
988             .iter()
989             .map(|m| (m.start_addr().raw_value(), m.len()))
990         {
991             if current_end == start {
992                 // This zone is continuous with the previous one.
993                 current_end += size;
994             } else {
995                 ram_regions.push((current_start, current_end));
996 
997                 current_start = start;
998                 current_end = start + size;
999             }
1000         }
1001 
1002         ram_regions.push((current_start, current_end));
1003 
1004         ram_regions
1005     };
1006 
1007     if ram_regions.len() > 2 {
1008         error!(
1009             "There should be up to two non-continuous regions, devidided by the
1010             gap at the end of 32bit address space (e.g. between 3G and 4G)."
1011         );
1012         return Err(super::Error::MemmapTableSetup);
1013     }
1014 
1015     // Create the memory map entry for memory region before the gap
1016     {
1017         let (first_region_start, first_region_end) =
1018             ram_regions.first().ok_or(super::Error::MemmapTableSetup)?;
1019         let high_ram_start = layout::HIGH_RAM_START.raw_value();
1020         let mem_32bit_reserved_start = layout::MEM_32BIT_RESERVED_START.raw_value();
1021 
1022         if !((first_region_start <= &high_ram_start)
1023             && (first_region_end > &high_ram_start)
1024             && (first_region_end <= &mem_32bit_reserved_start))
1025         {
1026             error!(
1027                 "Unexpected first memory region layout: (start: 0x{:08x}, end: 0x{:08x}).
1028                 high_ram_start: 0x{:08x}, mem_32bit_reserved_start: 0x{:08x}",
1029                 first_region_start, first_region_end, high_ram_start, mem_32bit_reserved_start
1030             );
1031 
1032             return Err(super::Error::MemmapTableSetup);
1033         }
1034 
1035         info!(
1036             "create_memmap_entry, start: 0x{:08x}, end: 0x{:08x}",
1037             high_ram_start, first_region_end
1038         );
1039 
1040         add_memmap_entry(
1041             &mut memmap,
1042             high_ram_start,
1043             first_region_end - high_ram_start,
1044             E820_RAM,
1045         );
1046     }
1047 
1048     // Create the memory map entry for memory region after the gap if any
1049     if let Some((second_region_start, second_region_end)) = ram_regions.get(1) {
1050         let ram_64bit_start = layout::RAM_64BIT_START.raw_value();
1051 
1052         if second_region_start != &ram_64bit_start {
1053             error!(
1054                 "Unexpected second memory region layout: start: 0x{:08x}, ram_64bit_start: 0x{:08x}",
1055                 second_region_start, ram_64bit_start
1056             );
1057 
1058             return Err(super::Error::MemmapTableSetup);
1059         }
1060 
1061         info!(
1062             "create_memmap_entry, start: 0x{:08x}, end: 0x{:08x}",
1063             ram_64bit_start, second_region_end
1064         );
1065         add_memmap_entry(
1066             &mut memmap,
1067             ram_64bit_start,
1068             second_region_end - ram_64bit_start,
1069             E820_RAM,
1070         );
1071     }
1072 
1073     add_memmap_entry(
1074         &mut memmap,
1075         layout::PCI_MMCONFIG_START.0,
1076         layout::PCI_MMCONFIG_SIZE,
1077         E820_RESERVED,
1078     );
1079 
1080     if let Some(sgx_epc_region) = sgx_epc_region {
1081         add_memmap_entry(
1082             &mut memmap,
1083             sgx_epc_region.start().raw_value(),
1084             sgx_epc_region.size(),
1085             E820_RESERVED,
1086         );
1087     }
1088 
1089     start_info.0.memmap_entries = memmap.len() as u32;
1090 
1091     // Copy the vector with the memmap table to the MEMMAP_START address
1092     // which is already saved in the memmap_paddr field of hvm_start_info struct.
1093     let mut memmap_start_addr = layout::MEMMAP_START;
1094 
1095     guest_mem
1096         .checked_offset(
1097             memmap_start_addr,
1098             mem::size_of::<hvm_memmap_table_entry>() * start_info.0.memmap_entries as usize,
1099         )
1100         .ok_or(super::Error::MemmapTablePastRamEnd)?;
1101 
1102     // For every entry in the memmap vector, create a MemmapTableEntryWrapper
1103     // and write it to guest memory.
1104     for memmap_entry in memmap {
1105         let map_entry_wrapper: MemmapTableEntryWrapper = MemmapTableEntryWrapper(memmap_entry);
1106 
1107         guest_mem
1108             .write_obj(map_entry_wrapper, memmap_start_addr)
1109             .map_err(|_| super::Error::MemmapTableSetup)?;
1110         memmap_start_addr =
1111             memmap_start_addr.unchecked_add(mem::size_of::<hvm_memmap_table_entry>() as u64);
1112     }
1113 
1114     // The hvm_start_info struct itself must be stored at PVH_START_INFO
1115     // address, and %rbx will be initialized to contain PVH_INFO_START prior to
1116     // starting the guest, as required by the PVH ABI.
1117     let start_info_addr = layout::PVH_INFO_START;
1118 
1119     guest_mem
1120         .checked_offset(start_info_addr, mem::size_of::<hvm_start_info>())
1121         .ok_or(super::Error::StartInfoPastRamEnd)?;
1122 
1123     // Write the start_info struct to guest memory.
1124     guest_mem
1125         .write_obj(start_info, start_info_addr)
1126         .map_err(|_| super::Error::StartInfoSetup)?;
1127 
1128     Ok(())
1129 }
1130 
1131 fn add_memmap_entry(memmap: &mut Vec<hvm_memmap_table_entry>, addr: u64, size: u64, mem_type: u32) {
1132     // Add the table entry to the vector
1133     memmap.push(hvm_memmap_table_entry {
1134         addr,
1135         size,
1136         type_: mem_type,
1137         reserved: 0,
1138     });
1139 }
1140 
1141 /// Returns the memory address where the initramfs could be loaded.
1142 pub fn initramfs_load_addr(
1143     guest_mem: &GuestMemoryMmap,
1144     initramfs_size: usize,
1145 ) -> super::Result<u64> {
1146     let first_region = guest_mem
1147         .find_region(GuestAddress::new(0))
1148         .ok_or(super::Error::InitramfsAddress)?;
1149     // It's safe to cast to usize because the size of a region can't be greater than usize.
1150     let lowmem_size = first_region.len() as usize;
1151 
1152     if lowmem_size < initramfs_size {
1153         return Err(super::Error::InitramfsAddress);
1154     }
1155 
1156     let aligned_addr: u64 = ((lowmem_size - initramfs_size) & !(crate::pagesize() - 1)) as u64;
1157     Ok(aligned_addr)
1158 }
1159 
1160 pub fn get_host_cpu_phys_bits(hypervisor: &Arc<dyn hypervisor::Hypervisor>) -> u8 {
1161     // SAFETY: call cpuid with valid leaves
1162     unsafe {
1163         let leaf = x86_64::__cpuid(0x8000_0000);
1164 
1165         // Detect and handle AMD SME (Secure Memory Encryption) properly.
1166         // Some physical address bits may become reserved when the feature is enabled.
1167         // See AMD64 Architecture Programmer's Manual Volume 2, Section 7.10.1
1168         let reduced = if leaf.eax >= 0x8000_001f
1169             && matches!(hypervisor.get_cpu_vendor(), CpuVendor::AMD)
1170             && x86_64::__cpuid(0x8000_001f).eax & 0x1 != 0
1171         {
1172             (x86_64::__cpuid(0x8000_001f).ebx >> 6) & 0x3f
1173         } else {
1174             0
1175         };
1176 
1177         if leaf.eax >= 0x8000_0008 {
1178             let leaf = x86_64::__cpuid(0x8000_0008);
1179             ((leaf.eax & 0xff) - reduced) as u8
1180         } else {
1181             36
1182         }
1183     }
1184 }
1185 
1186 fn update_cpuid_topology(
1187     cpuid: &mut Vec<CpuIdEntry>,
1188     threads_per_core: u8,
1189     cores_per_die: u8,
1190     dies_per_package: u8,
1191     cpu_vendor: CpuVendor,
1192     id: u8,
1193 ) {
1194     let thread_width = 8 - (threads_per_core - 1).leading_zeros();
1195     let core_width = (8 - (cores_per_die - 1).leading_zeros()) + thread_width;
1196     let die_width = (8 - (dies_per_package - 1).leading_zeros()) + core_width;
1197 
1198     // CPU Topology leaf 0xb
1199     CpuidPatch::set_cpuid_reg(cpuid, 0xb, Some(0), CpuidReg::EAX, thread_width);
1200     CpuidPatch::set_cpuid_reg(
1201         cpuid,
1202         0xb,
1203         Some(0),
1204         CpuidReg::EBX,
1205         u32::from(threads_per_core),
1206     );
1207     CpuidPatch::set_cpuid_reg(cpuid, 0xb, Some(0), CpuidReg::ECX, 1 << 8);
1208 
1209     CpuidPatch::set_cpuid_reg(cpuid, 0xb, Some(1), CpuidReg::EAX, die_width);
1210     CpuidPatch::set_cpuid_reg(
1211         cpuid,
1212         0xb,
1213         Some(1),
1214         CpuidReg::EBX,
1215         u32::from(dies_per_package * cores_per_die * threads_per_core),
1216     );
1217     CpuidPatch::set_cpuid_reg(cpuid, 0xb, Some(1), CpuidReg::ECX, 2 << 8);
1218 
1219     // CPU Topology leaf 0x1f
1220     CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(0), CpuidReg::EAX, thread_width);
1221     CpuidPatch::set_cpuid_reg(
1222         cpuid,
1223         0x1f,
1224         Some(0),
1225         CpuidReg::EBX,
1226         u32::from(threads_per_core),
1227     );
1228     CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(0), CpuidReg::ECX, 1 << 8);
1229 
1230     CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(1), CpuidReg::EAX, core_width);
1231     CpuidPatch::set_cpuid_reg(
1232         cpuid,
1233         0x1f,
1234         Some(1),
1235         CpuidReg::EBX,
1236         u32::from(cores_per_die * threads_per_core),
1237     );
1238     CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(1), CpuidReg::ECX, 2 << 8);
1239 
1240     CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(2), CpuidReg::EAX, die_width);
1241     CpuidPatch::set_cpuid_reg(
1242         cpuid,
1243         0x1f,
1244         Some(2),
1245         CpuidReg::EBX,
1246         u32::from(dies_per_package * cores_per_die * threads_per_core),
1247     );
1248     CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(2), CpuidReg::ECX, 5 << 8);
1249 
1250     if matches!(cpu_vendor, CpuVendor::AMD) {
1251         CpuidPatch::set_cpuid_reg(
1252             cpuid,
1253             0x8000_001e,
1254             Some(0),
1255             CpuidReg::EBX,
1256             ((threads_per_core as u32 - 1) << 8) | (id as u32 & 0xff),
1257         );
1258         CpuidPatch::set_cpuid_reg(
1259             cpuid,
1260             0x8000_001e,
1261             Some(0),
1262             CpuidReg::ECX,
1263             ((dies_per_package as u32 - 1) << 8) | (thread_width + die_width) & 0xff,
1264         );
1265         CpuidPatch::set_cpuid_reg(cpuid, 0x8000_001e, Some(0), CpuidReg::EDX, 0);
1266         if cores_per_die * threads_per_core > 1 {
1267             CpuidPatch::set_cpuid_reg(
1268                 cpuid,
1269                 0x8000_0001,
1270                 Some(0),
1271                 CpuidReg::ECX,
1272                 (1u32 << 1) | (1u32 << 22),
1273             );
1274             CpuidPatch::set_cpuid_reg(
1275                 cpuid,
1276                 0x0000_0001,
1277                 Some(0),
1278                 CpuidReg::EBX,
1279                 ((id as u32) << 24)
1280                     | (8 << 8)
1281                     | (((cores_per_die * threads_per_core) as u32) << 16),
1282             );
1283             let cpuid_patches = vec![
1284                 // Patch tsc deadline timer bit
1285                 CpuidPatch {
1286                     function: 1,
1287                     index: 0,
1288                     flags_bit: None,
1289                     eax_bit: None,
1290                     ebx_bit: None,
1291                     ecx_bit: None,
1292                     edx_bit: Some(28),
1293                 },
1294             ];
1295             CpuidPatch::patch_cpuid(cpuid, cpuid_patches);
1296             CpuidPatch::set_cpuid_reg(
1297                 cpuid,
1298                 0x8000_0008,
1299                 Some(0),
1300                 CpuidReg::ECX,
1301                 ((thread_width + core_width + die_width) << 12)
1302                     | ((cores_per_die * threads_per_core) - 1) as u32,
1303             );
1304         } else {
1305             CpuidPatch::set_cpuid_reg(cpuid, 0x8000_0008, Some(0), CpuidReg::ECX, 0u32);
1306         }
1307     }
1308 }
1309 
1310 // The goal is to update the CPUID sub-leaves to reflect the number of EPC
1311 // sections exposed to the guest.
1312 fn update_cpuid_sgx(
1313     cpuid: &mut Vec<CpuIdEntry>,
1314     epc_sections: &Vec<SgxEpcSection>,
1315 ) -> Result<(), Error> {
1316     // Something's wrong if there's no EPC section.
1317     if epc_sections.is_empty() {
1318         return Err(Error::NoSgxEpcSection);
1319     }
1320     // We can't go further if the hypervisor does not support SGX feature.
1321     if !CpuidPatch::is_feature_enabled(cpuid, 0x7, 0, CpuidReg::EBX, 2) {
1322         return Err(Error::MissingSgxFeature);
1323     }
1324     // We can't go further if the hypervisor does not support SGX_LC feature.
1325     if !CpuidPatch::is_feature_enabled(cpuid, 0x7, 0, CpuidReg::ECX, 30) {
1326         return Err(Error::MissingSgxLaunchControlFeature);
1327     }
1328 
1329     // Get host CPUID for leaf 0x12, subleaf 0x2. This is to retrieve EPC
1330     // properties such as confidentiality and integrity.
1331     // SAFETY: call cpuid with valid leaves
1332     let leaf = unsafe { std::arch::x86_64::__cpuid_count(0x12, 0x2) };
1333 
1334     for (i, epc_section) in epc_sections.iter().enumerate() {
1335         let subleaf_idx = i + 2;
1336         let start = epc_section.start().raw_value();
1337         let size = epc_section.size();
1338         let eax = (start & 0xffff_f000) as u32 | 0x1;
1339         let ebx = (start >> 32) as u32;
1340         let ecx = (size & 0xffff_f000) as u32 | (leaf.ecx & 0xf);
1341         let edx = (size >> 32) as u32;
1342         // CPU Topology leaf 0x12
1343         CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EAX, eax);
1344         CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EBX, ebx);
1345         CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::ECX, ecx);
1346         CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EDX, edx);
1347     }
1348 
1349     // Add one NULL entry to terminate the dynamic list
1350     let subleaf_idx = epc_sections.len() + 2;
1351     // CPU Topology leaf 0x12
1352     CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EAX, 0);
1353     CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EBX, 0);
1354     CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::ECX, 0);
1355     CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EDX, 0);
1356 
1357     Ok(())
1358 }
1359 
1360 #[cfg(test)]
1361 mod tests {
1362     use super::*;
1363 
1364     #[test]
1365     fn regions_base_addr() {
1366         let regions = arch_memory_regions();
1367         assert_eq!(4, regions.len());
1368         assert_eq!(GuestAddress(0), regions[0].0);
1369         assert_eq!(GuestAddress(1 << 32), regions[1].0);
1370     }
1371 
1372     #[test]
1373     fn test_system_configuration() {
1374         let no_vcpus = 4;
1375         let gm = GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap();
1376         let config_err = configure_system(
1377             &gm,
1378             GuestAddress(0),
1379             &None,
1380             1,
1381             Some(layout::RSDP_POINTER),
1382             None,
1383             None,
1384             None,
1385             None,
1386         );
1387         assert!(config_err.is_err());
1388 
1389         // Now assigning some memory that falls before the 32bit memory hole.
1390         let arch_mem_regions = arch_memory_regions();
1391         let ram_regions: Vec<(GuestAddress, usize)> = arch_mem_regions
1392             .iter()
1393             .filter(|r| r.2 == RegionType::Ram && r.1 != usize::MAX)
1394             .map(|r| (r.0, r.1))
1395             .collect();
1396         let gm = GuestMemoryMmap::from_ranges(&ram_regions).unwrap();
1397 
1398         configure_system(
1399             &gm,
1400             GuestAddress(0),
1401             &None,
1402             no_vcpus,
1403             None,
1404             None,
1405             None,
1406             None,
1407             None,
1408         )
1409         .unwrap();
1410 
1411         // Now assigning some memory that falls after the 32bit memory hole.
1412         let arch_mem_regions = arch_memory_regions();
1413         let ram_regions: Vec<(GuestAddress, usize)> = arch_mem_regions
1414             .iter()
1415             .filter(|r| r.2 == RegionType::Ram)
1416             .map(|r| {
1417                 if r.1 == usize::MAX {
1418                     (r.0, 128 << 20)
1419                 } else {
1420                     (r.0, r.1)
1421                 }
1422             })
1423             .collect();
1424         let gm = GuestMemoryMmap::from_ranges(&ram_regions).unwrap();
1425         configure_system(
1426             &gm,
1427             GuestAddress(0),
1428             &None,
1429             no_vcpus,
1430             None,
1431             None,
1432             None,
1433             None,
1434             None,
1435         )
1436         .unwrap();
1437 
1438         configure_system(
1439             &gm,
1440             GuestAddress(0),
1441             &None,
1442             no_vcpus,
1443             None,
1444             None,
1445             None,
1446             None,
1447             None,
1448         )
1449         .unwrap();
1450     }
1451 
1452     #[test]
1453     fn test_add_memmap_entry() {
1454         let mut memmap: Vec<hvm_memmap_table_entry> = Vec::new();
1455 
1456         let expected_memmap = vec![
1457             hvm_memmap_table_entry {
1458                 addr: 0x0,
1459                 size: 0x1000,
1460                 type_: E820_RAM,
1461                 ..Default::default()
1462             },
1463             hvm_memmap_table_entry {
1464                 addr: 0x10000,
1465                 size: 0xa000,
1466                 type_: E820_RESERVED,
1467                 ..Default::default()
1468             },
1469         ];
1470 
1471         add_memmap_entry(&mut memmap, 0, 0x1000, E820_RAM);
1472         add_memmap_entry(&mut memmap, 0x10000, 0xa000, E820_RESERVED);
1473 
1474         assert_eq!(format!("{memmap:?}"), format!("{expected_memmap:?}"));
1475     }
1476 }
1477