xref: /cloud-hypervisor/arch/src/x86_64/mod.rs (revision 8803e4a2e7f8e9596b72f81d3c916390e5b10fbd)
1 // Copyright © 2020, Oracle and/or its affiliates.
2 //
3 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
4 // SPDX-License-Identifier: Apache-2.0
5 //
6 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE-BSD-3-Clause file.
9 use std::sync::Arc;
10 pub mod interrupts;
11 pub mod layout;
12 mod mpspec;
13 mod mptable;
14 pub mod regs;
15 use crate::GuestMemoryMmap;
16 use crate::InitramfsConfig;
17 use crate::RegionType;
18 use hypervisor::arch::x86::{CpuIdEntry, CPUID_FLAG_VALID_INDEX};
19 use hypervisor::{CpuVendor, HypervisorCpuError, HypervisorError};
20 use linux_loader::loader::bootparam::{boot_params, setup_header};
21 use linux_loader::loader::elf::start_info::{
22     hvm_memmap_table_entry, hvm_modlist_entry, hvm_start_info,
23 };
24 use std::collections::BTreeMap;
25 use std::mem;
26 use thiserror::Error;
27 use vm_memory::{
28     Address, Bytes, GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryAtomic,
29     GuestMemoryRegion, GuestUsize,
30 };
31 mod smbios;
32 use std::arch::x86_64;
33 #[cfg(feature = "tdx")]
34 pub mod tdx;
35 
36 // CPUID feature bits
37 const TSC_DEADLINE_TIMER_ECX_BIT: u8 = 24; // tsc deadline timer ecx bit.
38 const HYPERVISOR_ECX_BIT: u8 = 31; // Hypervisor ecx bit.
39 const MTRR_EDX_BIT: u8 = 12; // Hypervisor ecx bit.
40 const INVARIANT_TSC_EDX_BIT: u8 = 8; // Invariant TSC bit on 0x8000_0007 EDX
41 const AMX_BF16: u8 = 22; // AMX tile computation on bfloat16 numbers
42 const AMX_TILE: u8 = 24; // AMX tile load/store instructions
43 const AMX_INT8: u8 = 25; // AMX tile computation on 8-bit integers
44 
45 // KVM feature bits
46 #[cfg(feature = "tdx")]
47 const KVM_FEATURE_CLOCKSOURCE_BIT: u8 = 0;
48 #[cfg(feature = "tdx")]
49 const KVM_FEATURE_CLOCKSOURCE2_BIT: u8 = 3;
50 #[cfg(feature = "tdx")]
51 const KVM_FEATURE_CLOCKSOURCE_STABLE_BIT: u8 = 24;
52 #[cfg(feature = "tdx")]
53 const KVM_FEATURE_ASYNC_PF_BIT: u8 = 4;
54 #[cfg(feature = "tdx")]
55 const KVM_FEATURE_ASYNC_PF_VMEXIT_BIT: u8 = 10;
56 #[cfg(feature = "tdx")]
57 const KVM_FEATURE_STEAL_TIME_BIT: u8 = 5;
58 
59 pub const _NSIG: i32 = 65;
60 
61 #[derive(Debug, Copy, Clone)]
62 /// Specifies the entry point address where the guest must start
63 /// executing code, as well as which of the supported boot protocols
64 /// is to be used to configure the guest initial state.
65 pub struct EntryPoint {
66     /// Address in guest memory where the guest must start execution
67     pub entry_addr: GuestAddress,
68     /// This field is used for bzImage to fill the zero page
69     pub setup_header: Option<setup_header>,
70 }
71 
72 const E820_RAM: u32 = 1;
73 const E820_RESERVED: u32 = 2;
74 
75 #[derive(Clone)]
76 pub struct SgxEpcSection {
77     start: GuestAddress,
78     size: GuestUsize,
79 }
80 
81 impl SgxEpcSection {
82     pub fn new(start: GuestAddress, size: GuestUsize) -> Self {
83         SgxEpcSection { start, size }
84     }
85     pub fn start(&self) -> GuestAddress {
86         self.start
87     }
88     pub fn size(&self) -> GuestUsize {
89         self.size
90     }
91 }
92 
93 #[derive(Clone)]
94 pub struct SgxEpcRegion {
95     start: GuestAddress,
96     size: GuestUsize,
97     epc_sections: BTreeMap<String, SgxEpcSection>,
98 }
99 
100 impl SgxEpcRegion {
101     pub fn new(start: GuestAddress, size: GuestUsize) -> Self {
102         SgxEpcRegion {
103             start,
104             size,
105             epc_sections: BTreeMap::new(),
106         }
107     }
108     pub fn start(&self) -> GuestAddress {
109         self.start
110     }
111     pub fn size(&self) -> GuestUsize {
112         self.size
113     }
114     pub fn epc_sections(&self) -> &BTreeMap<String, SgxEpcSection> {
115         &self.epc_sections
116     }
117     pub fn insert(&mut self, id: String, epc_section: SgxEpcSection) {
118         self.epc_sections.insert(id, epc_section);
119     }
120 }
121 
122 pub struct CpuidConfig {
123     pub sgx_epc_sections: Option<Vec<SgxEpcSection>>,
124     pub phys_bits: u8,
125     pub kvm_hyperv: bool,
126     #[cfg(feature = "tdx")]
127     pub tdx: bool,
128     pub amx: bool,
129 }
130 
131 #[derive(Debug, Error)]
132 pub enum Error {
133     /// Error writing MP table to memory.
134     #[error("Error writing MP table to memory: {0}")]
135     MpTableSetup(mptable::Error),
136 
137     /// Error configuring the general purpose registers
138     #[error("Error configuring the general purpose registers: {0}")]
139     RegsConfiguration(regs::Error),
140 
141     /// Error configuring the special registers
142     #[error("Error configuring the special registers: {0}")]
143     SregsConfiguration(regs::Error),
144 
145     /// Error configuring the floating point related registers
146     #[error("Error configuring the floating point related registers: {0}")]
147     FpuConfiguration(regs::Error),
148 
149     /// Error configuring the MSR registers
150     #[error("Error configuring the MSR registers: {0}")]
151     MsrsConfiguration(regs::Error),
152 
153     /// Failed to set supported CPUs.
154     #[error("Failed to set supported CPUs: {0}")]
155     SetSupportedCpusFailed(anyhow::Error),
156 
157     /// Cannot set the local interruption due to bad configuration.
158     #[error("Cannot set the local interruption due to bad configuration: {0}")]
159     LocalIntConfiguration(anyhow::Error),
160 
161     /// Error setting up SMBIOS table
162     #[error("Error setting up SMBIOS table: {0}")]
163     SmbiosSetup(smbios::Error),
164 
165     /// Could not find any SGX EPC section
166     #[error("Could not find any SGX EPC section")]
167     NoSgxEpcSection,
168 
169     /// Missing SGX CPU feature
170     #[error("Missing SGX CPU feature")]
171     MissingSgxFeature,
172 
173     /// Missing SGX_LC CPU feature
174     #[error("Missing SGX_LC CPU feature")]
175     MissingSgxLaunchControlFeature,
176 
177     /// Error getting supported CPUID through the hypervisor (kvm/mshv) API
178     #[error("Error getting supported CPUID through the hypervisor API: {0}")]
179     CpuidGetSupported(HypervisorError),
180 
181     /// Error populating CPUID with KVM HyperV emulation details
182     #[error("Error populating CPUID with KVM HyperV emulation details: {0}")]
183     CpuidKvmHyperV(vmm_sys_util::fam::Error),
184 
185     /// Error populating CPUID with CPU identification
186     #[error("Error populating CPUID with CPU identification: {0}")]
187     CpuidIdentification(vmm_sys_util::fam::Error),
188 
189     /// Error checking CPUID compatibility
190     #[error("Error checking CPUID compatibility")]
191     CpuidCheckCompatibility,
192 
193     // Error writing EBDA address
194     #[error("Error writing EBDA address: {0}")]
195     EbdaSetup(vm_memory::GuestMemoryError),
196 
197     // Error getting CPU TSC frequency
198     #[error("Error getting CPU TSC frequency: {0}")]
199     GetTscFrequency(HypervisorCpuError),
200 
201     /// Error retrieving TDX capabilities through the hypervisor (kvm/mshv) API
202     #[cfg(feature = "tdx")]
203     #[error("Error retrieving TDX capabilities through the hypervisor API: {0}")]
204     TdxCapabilities(HypervisorError),
205 
206     /// Failed to configure E820 map for bzImage
207     #[error("Failed to configure E820 map for bzImage")]
208     E820Configuration,
209 }
210 
211 impl From<Error> for super::Error {
212     fn from(e: Error) -> super::Error {
213         super::Error::PlatformSpecific(e)
214     }
215 }
216 
217 pub fn get_x2apic_id(cpu_id: u32, topology: Option<(u8, u8, u8)>) -> u32 {
218     if let Some(t) = topology {
219         let thread_mask_width = u8::BITS - (t.0 - 1).leading_zeros();
220         let core_mask_width = u8::BITS - (t.1 - 1).leading_zeros();
221         let die_mask_width = u8::BITS - (t.2 - 1).leading_zeros();
222 
223         let thread_id = cpu_id % (t.0 as u32);
224         let core_id = cpu_id / (t.0 as u32) % (t.1 as u32);
225         let die_id = cpu_id / ((t.0 * t.1) as u32) % (t.2 as u32);
226         let socket_id = cpu_id / ((t.0 * t.1 * t.2) as u32);
227 
228         return thread_id
229             | (core_id << thread_mask_width)
230             | (die_id << (thread_mask_width + core_mask_width))
231             | (socket_id << (thread_mask_width + core_mask_width + die_mask_width));
232     }
233 
234     cpu_id
235 }
236 
237 #[derive(Copy, Clone, Debug)]
238 pub enum CpuidReg {
239     EAX,
240     EBX,
241     ECX,
242     EDX,
243 }
244 
245 pub struct CpuidPatch {
246     pub function: u32,
247     pub index: u32,
248     pub flags_bit: Option<u8>,
249     pub eax_bit: Option<u8>,
250     pub ebx_bit: Option<u8>,
251     pub ecx_bit: Option<u8>,
252     pub edx_bit: Option<u8>,
253 }
254 
255 impl CpuidPatch {
256     pub fn get_cpuid_reg(
257         cpuid: &[CpuIdEntry],
258         function: u32,
259         index: Option<u32>,
260         reg: CpuidReg,
261     ) -> Option<u32> {
262         for entry in cpuid.iter() {
263             if entry.function == function && (index.is_none() || index.unwrap() == entry.index) {
264                 return match reg {
265                     CpuidReg::EAX => Some(entry.eax),
266                     CpuidReg::EBX => Some(entry.ebx),
267                     CpuidReg::ECX => Some(entry.ecx),
268                     CpuidReg::EDX => Some(entry.edx),
269                 };
270             }
271         }
272 
273         None
274     }
275 
276     pub fn set_cpuid_reg(
277         cpuid: &mut Vec<CpuIdEntry>,
278         function: u32,
279         index: Option<u32>,
280         reg: CpuidReg,
281         value: u32,
282     ) {
283         let mut entry_found = false;
284         for entry in cpuid.iter_mut() {
285             if entry.function == function && (index.is_none() || index.unwrap() == entry.index) {
286                 entry_found = true;
287                 match reg {
288                     CpuidReg::EAX => {
289                         entry.eax = value;
290                     }
291                     CpuidReg::EBX => {
292                         entry.ebx = value;
293                     }
294                     CpuidReg::ECX => {
295                         entry.ecx = value;
296                     }
297                     CpuidReg::EDX => {
298                         entry.edx = value;
299                     }
300                 }
301             }
302         }
303 
304         if entry_found {
305             return;
306         }
307 
308         // Entry not found, so let's add it.
309         if let Some(index) = index {
310             let mut entry = CpuIdEntry {
311                 function,
312                 index,
313                 flags: CPUID_FLAG_VALID_INDEX,
314                 ..Default::default()
315             };
316             match reg {
317                 CpuidReg::EAX => {
318                     entry.eax = value;
319                 }
320                 CpuidReg::EBX => {
321                     entry.ebx = value;
322                 }
323                 CpuidReg::ECX => {
324                     entry.ecx = value;
325                 }
326                 CpuidReg::EDX => {
327                     entry.edx = value;
328                 }
329             }
330 
331             cpuid.push(entry);
332         }
333     }
334 
335     pub fn patch_cpuid(cpuid: &mut [CpuIdEntry], patches: Vec<CpuidPatch>) {
336         for entry in cpuid {
337             for patch in patches.iter() {
338                 if entry.function == patch.function && entry.index == patch.index {
339                     if let Some(flags_bit) = patch.flags_bit {
340                         entry.flags |= 1 << flags_bit;
341                     }
342                     if let Some(eax_bit) = patch.eax_bit {
343                         entry.eax |= 1 << eax_bit;
344                     }
345                     if let Some(ebx_bit) = patch.ebx_bit {
346                         entry.ebx |= 1 << ebx_bit;
347                     }
348                     if let Some(ecx_bit) = patch.ecx_bit {
349                         entry.ecx |= 1 << ecx_bit;
350                     }
351                     if let Some(edx_bit) = patch.edx_bit {
352                         entry.edx |= 1 << edx_bit;
353                     }
354                 }
355             }
356         }
357     }
358 
359     pub fn is_feature_enabled(
360         cpuid: &[CpuIdEntry],
361         function: u32,
362         index: u32,
363         reg: CpuidReg,
364         feature_bit: usize,
365     ) -> bool {
366         let mask = 1 << feature_bit;
367 
368         for entry in cpuid {
369             if entry.function == function && entry.index == index {
370                 let reg_val = match reg {
371                     CpuidReg::EAX => entry.eax,
372                     CpuidReg::EBX => entry.ebx,
373                     CpuidReg::ECX => entry.ecx,
374                     CpuidReg::EDX => entry.edx,
375                 };
376 
377                 return (reg_val & mask) == mask;
378             }
379         }
380 
381         false
382     }
383 }
384 
385 #[derive(Debug)]
386 enum CpuidCompatibleCheck {
387     BitwiseSubset, // bitwise subset
388     Equal,         // equal in value
389     NumNotGreater, // smaller or equal as a number
390 }
391 
392 pub struct CpuidFeatureEntry {
393     function: u32,
394     index: u32,
395     feature_reg: CpuidReg,
396     compatible_check: CpuidCompatibleCheck,
397 }
398 
399 impl CpuidFeatureEntry {
400     fn checked_feature_entry_list() -> Vec<CpuidFeatureEntry> {
401         vec![
402             // The following list includes all hardware features bits from
403             // the CPUID Wiki Page: https://en.wikipedia.org/wiki/CPUID
404             // Leaf 0x1, ECX/EDX, feature bits
405             CpuidFeatureEntry {
406                 function: 1,
407                 index: 0,
408                 feature_reg: CpuidReg::ECX,
409                 compatible_check: CpuidCompatibleCheck::BitwiseSubset,
410             },
411             CpuidFeatureEntry {
412                 function: 1,
413                 index: 0,
414                 feature_reg: CpuidReg::EDX,
415                 compatible_check: CpuidCompatibleCheck::BitwiseSubset,
416             },
417             // Leaf 0x7, EAX/EBX/ECX/EDX, extended features
418             CpuidFeatureEntry {
419                 function: 7,
420                 index: 0,
421                 feature_reg: CpuidReg::EAX,
422                 compatible_check: CpuidCompatibleCheck::NumNotGreater,
423             },
424             CpuidFeatureEntry {
425                 function: 7,
426                 index: 0,
427                 feature_reg: CpuidReg::EBX,
428                 compatible_check: CpuidCompatibleCheck::BitwiseSubset,
429             },
430             CpuidFeatureEntry {
431                 function: 7,
432                 index: 0,
433                 feature_reg: CpuidReg::ECX,
434                 compatible_check: CpuidCompatibleCheck::BitwiseSubset,
435             },
436             CpuidFeatureEntry {
437                 function: 7,
438                 index: 0,
439                 feature_reg: CpuidReg::EDX,
440                 compatible_check: CpuidCompatibleCheck::BitwiseSubset,
441             },
442             // Leaf 0x7 subleaf 0x1, EAX, extended features
443             CpuidFeatureEntry {
444                 function: 7,
445                 index: 1,
446                 feature_reg: CpuidReg::EAX,
447                 compatible_check: CpuidCompatibleCheck::BitwiseSubset,
448             },
449             // Leaf 0x8000_0001, ECX/EDX, CPUID features bits
450             CpuidFeatureEntry {
451                 function: 0x8000_0001,
452                 index: 0,
453                 feature_reg: CpuidReg::ECX,
454                 compatible_check: CpuidCompatibleCheck::BitwiseSubset,
455             },
456             CpuidFeatureEntry {
457                 function: 0x8000_0001,
458                 index: 0,
459                 feature_reg: CpuidReg::EDX,
460                 compatible_check: CpuidCompatibleCheck::BitwiseSubset,
461             },
462             // KVM CPUID bits: https://www.kernel.org/doc/html/latest/virt/kvm/cpuid.html
463             // Leaf 0x4000_0000, EAX/EBX/ECX/EDX, KVM CPUID SIGNATURE
464             CpuidFeatureEntry {
465                 function: 0x4000_0000,
466                 index: 0,
467                 feature_reg: CpuidReg::EAX,
468                 compatible_check: CpuidCompatibleCheck::NumNotGreater,
469             },
470             CpuidFeatureEntry {
471                 function: 0x4000_0000,
472                 index: 0,
473                 feature_reg: CpuidReg::EBX,
474                 compatible_check: CpuidCompatibleCheck::Equal,
475             },
476             CpuidFeatureEntry {
477                 function: 0x4000_0000,
478                 index: 0,
479                 feature_reg: CpuidReg::ECX,
480                 compatible_check: CpuidCompatibleCheck::Equal,
481             },
482             CpuidFeatureEntry {
483                 function: 0x4000_0000,
484                 index: 0,
485                 feature_reg: CpuidReg::EDX,
486                 compatible_check: CpuidCompatibleCheck::Equal,
487             },
488             // Leaf 0x4000_0001, EAX/EBX/ECX/EDX, KVM CPUID features
489             CpuidFeatureEntry {
490                 function: 0x4000_0001,
491                 index: 0,
492                 feature_reg: CpuidReg::EAX,
493                 compatible_check: CpuidCompatibleCheck::BitwiseSubset,
494             },
495             CpuidFeatureEntry {
496                 function: 0x4000_0001,
497                 index: 0,
498                 feature_reg: CpuidReg::EBX,
499                 compatible_check: CpuidCompatibleCheck::BitwiseSubset,
500             },
501             CpuidFeatureEntry {
502                 function: 0x4000_0001,
503                 index: 0,
504                 feature_reg: CpuidReg::ECX,
505                 compatible_check: CpuidCompatibleCheck::BitwiseSubset,
506             },
507             CpuidFeatureEntry {
508                 function: 0x4000_0001,
509                 index: 0,
510                 feature_reg: CpuidReg::EDX,
511                 compatible_check: CpuidCompatibleCheck::BitwiseSubset,
512             },
513         ]
514     }
515 
516     fn get_features_from_cpuid(
517         cpuid: &[CpuIdEntry],
518         feature_entry_list: &[CpuidFeatureEntry],
519     ) -> Vec<u32> {
520         let mut features = vec![0; feature_entry_list.len()];
521         for (i, feature_entry) in feature_entry_list.iter().enumerate() {
522             for cpuid_entry in cpuid {
523                 if cpuid_entry.function == feature_entry.function
524                     && cpuid_entry.index == feature_entry.index
525                 {
526                     match feature_entry.feature_reg {
527                         CpuidReg::EAX => {
528                             features[i] = cpuid_entry.eax;
529                         }
530                         CpuidReg::EBX => {
531                             features[i] = cpuid_entry.ebx;
532                         }
533                         CpuidReg::ECX => {
534                             features[i] = cpuid_entry.ecx;
535                         }
536                         CpuidReg::EDX => {
537                             features[i] = cpuid_entry.edx;
538                         }
539                     }
540 
541                     break;
542                 }
543             }
544         }
545 
546         features
547     }
548 
549     // The function returns `Error` (a.k.a. "incompatible"), when the CPUID features from `src_vm_cpuid`
550     // is not a subset of those of the `dest_vm_cpuid`.
551     pub fn check_cpuid_compatibility(
552         src_vm_cpuid: &[CpuIdEntry],
553         dest_vm_cpuid: &[CpuIdEntry],
554     ) -> Result<(), Error> {
555         let feature_entry_list = &Self::checked_feature_entry_list();
556         let src_vm_features = Self::get_features_from_cpuid(src_vm_cpuid, feature_entry_list);
557         let dest_vm_features = Self::get_features_from_cpuid(dest_vm_cpuid, feature_entry_list);
558 
559         // Loop on feature bit and check if the 'source vm' feature is a subset
560         // of those of the 'destination vm' feature
561         let mut compatible = true;
562         for (i, (src_vm_feature, dest_vm_feature)) in src_vm_features
563             .iter()
564             .zip(dest_vm_features.iter())
565             .enumerate()
566         {
567             let entry = &feature_entry_list[i];
568             let entry_compatible = match entry.compatible_check {
569                 CpuidCompatibleCheck::BitwiseSubset => {
570                     let different_feature_bits = src_vm_feature ^ dest_vm_feature;
571                     let src_vm_feature_bits_only = different_feature_bits & src_vm_feature;
572                     src_vm_feature_bits_only == 0
573                 }
574                 CpuidCompatibleCheck::Equal => src_vm_feature == dest_vm_feature,
575                 CpuidCompatibleCheck::NumNotGreater => src_vm_feature <= dest_vm_feature,
576             };
577             if !entry_compatible {
578                 error!(
579                     "Detected incompatible CPUID entry: leaf={:#02x} (subleaf={:#02x}), register='{:?}', \
580                     compatible_check='{:?}', source VM feature='{:#04x}', destination VM feature'{:#04x}'.",
581                     entry.function, entry.index, entry.feature_reg,
582                     entry.compatible_check, src_vm_feature, dest_vm_feature
583                     );
584 
585                 compatible = false;
586             }
587         }
588 
589         if compatible {
590             info!("No CPU incompatibility detected.");
591             Ok(())
592         } else {
593             Err(Error::CpuidCheckCompatibility)
594         }
595     }
596 }
597 
598 pub fn generate_common_cpuid(
599     hypervisor: &Arc<dyn hypervisor::Hypervisor>,
600     config: &CpuidConfig,
601 ) -> super::Result<Vec<CpuIdEntry>> {
602     // SAFETY: cpuid called with valid leaves
603     if unsafe { x86_64::__cpuid(1) }.ecx & 1 << HYPERVISOR_ECX_BIT == 1 << HYPERVISOR_ECX_BIT {
604         // SAFETY: cpuid called with valid leaves
605         let hypervisor_cpuid = unsafe { x86_64::__cpuid(0x4000_0000) };
606 
607         let mut identifier: [u8; 12] = [0; 12];
608         identifier[0..4].copy_from_slice(&hypervisor_cpuid.ebx.to_le_bytes()[..]);
609         identifier[4..8].copy_from_slice(&hypervisor_cpuid.ecx.to_le_bytes()[..]);
610         identifier[8..12].copy_from_slice(&hypervisor_cpuid.edx.to_le_bytes()[..]);
611 
612         info!(
613             "Running under nested virtualisation. Hypervisor string: {}",
614             String::from_utf8_lossy(&identifier)
615         );
616     }
617 
618     info!(
619         "Generating guest CPUID for with physical address size: {}",
620         config.phys_bits
621     );
622     let cpuid_patches = vec![
623         // Patch tsc deadline timer bit
624         CpuidPatch {
625             function: 1,
626             index: 0,
627             flags_bit: None,
628             eax_bit: None,
629             ebx_bit: None,
630             ecx_bit: Some(TSC_DEADLINE_TIMER_ECX_BIT),
631             edx_bit: None,
632         },
633         // Patch hypervisor bit
634         CpuidPatch {
635             function: 1,
636             index: 0,
637             flags_bit: None,
638             eax_bit: None,
639             ebx_bit: None,
640             ecx_bit: Some(HYPERVISOR_ECX_BIT),
641             edx_bit: None,
642         },
643         // Enable MTRR feature
644         CpuidPatch {
645             function: 1,
646             index: 0,
647             flags_bit: None,
648             eax_bit: None,
649             ebx_bit: None,
650             ecx_bit: None,
651             edx_bit: Some(MTRR_EDX_BIT),
652         },
653     ];
654 
655     // Supported CPUID
656     let mut cpuid = hypervisor
657         .get_supported_cpuid()
658         .map_err(Error::CpuidGetSupported)?;
659 
660     CpuidPatch::patch_cpuid(&mut cpuid, cpuid_patches);
661 
662     if let Some(sgx_epc_sections) = &config.sgx_epc_sections {
663         update_cpuid_sgx(&mut cpuid, sgx_epc_sections)?;
664     }
665 
666     #[cfg(feature = "tdx")]
667     let tdx_capabilities = if config.tdx {
668         let caps = hypervisor
669             .tdx_capabilities()
670             .map_err(Error::TdxCapabilities)?;
671         info!("TDX capabilities {:#?}", caps);
672         Some(caps)
673     } else {
674         None
675     };
676 
677     // Update some existing CPUID
678     for entry in cpuid.as_mut_slice().iter_mut() {
679         match entry.function {
680             // Clear AMX related bits if the AMX feature is not enabled
681             0x7 => {
682                 if !config.amx && entry.index == 0 {
683                     entry.edx &= !(1 << AMX_BF16 | 1 << AMX_TILE | 1 << AMX_INT8)
684                 }
685             }
686             0xd =>
687             {
688                 #[cfg(feature = "tdx")]
689                 if let Some(caps) = &tdx_capabilities {
690                     let xcr0_mask: u64 = 0x82ff;
691                     let xss_mask: u64 = !xcr0_mask;
692                     if entry.index == 0 {
693                         entry.eax &= (caps.xfam_fixed0 as u32) & (xcr0_mask as u32);
694                         entry.eax |= (caps.xfam_fixed1 as u32) & (xcr0_mask as u32);
695                         entry.edx &= ((caps.xfam_fixed0 & xcr0_mask) >> 32) as u32;
696                         entry.edx |= ((caps.xfam_fixed1 & xcr0_mask) >> 32) as u32;
697                     } else if entry.index == 1 {
698                         entry.ecx &= (caps.xfam_fixed0 as u32) & (xss_mask as u32);
699                         entry.ecx |= (caps.xfam_fixed1 as u32) & (xss_mask as u32);
700                         entry.edx &= ((caps.xfam_fixed0 & xss_mask) >> 32) as u32;
701                         entry.edx |= ((caps.xfam_fixed1 & xss_mask) >> 32) as u32;
702                     }
703                 }
704             }
705             // Copy host L1 cache details if not populated by KVM
706             0x8000_0005 => {
707                 if entry.eax == 0 && entry.ebx == 0 && entry.ecx == 0 && entry.edx == 0 {
708                     // SAFETY: cpuid called with valid leaves
709                     if unsafe { std::arch::x86_64::__cpuid(0x8000_0000).eax } >= 0x8000_0005 {
710                         // SAFETY: cpuid called with valid leaves
711                         let leaf = unsafe { std::arch::x86_64::__cpuid(0x8000_0005) };
712                         entry.eax = leaf.eax;
713                         entry.ebx = leaf.ebx;
714                         entry.ecx = leaf.ecx;
715                         entry.edx = leaf.edx;
716                     }
717                 }
718             }
719             // Copy host L2 cache details if not populated by KVM
720             0x8000_0006 => {
721                 if entry.eax == 0 && entry.ebx == 0 && entry.ecx == 0 && entry.edx == 0 {
722                     // SAFETY: cpuid called with valid leaves
723                     if unsafe { std::arch::x86_64::__cpuid(0x8000_0000).eax } >= 0x8000_0006 {
724                         // SAFETY: cpuid called with valid leaves
725                         let leaf = unsafe { std::arch::x86_64::__cpuid(0x8000_0006) };
726                         entry.eax = leaf.eax;
727                         entry.ebx = leaf.ebx;
728                         entry.ecx = leaf.ecx;
729                         entry.edx = leaf.edx;
730                     }
731                 }
732             }
733             // Set CPU physical bits
734             0x8000_0008 => {
735                 entry.eax = (entry.eax & 0xffff_ff00) | (config.phys_bits as u32 & 0xff);
736             }
737             0x4000_0001 => {
738                 // These features are not supported by TDX
739                 #[cfg(feature = "tdx")]
740                 if config.tdx {
741                     entry.eax &= !(1 << KVM_FEATURE_CLOCKSOURCE_BIT
742                         | 1 << KVM_FEATURE_CLOCKSOURCE2_BIT
743                         | 1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT
744                         | 1 << KVM_FEATURE_ASYNC_PF_BIT
745                         | 1 << KVM_FEATURE_ASYNC_PF_VMEXIT_BIT
746                         | 1 << KVM_FEATURE_STEAL_TIME_BIT)
747                 }
748             }
749             _ => {}
750         }
751     }
752 
753     // Copy CPU identification string
754     for i in 0x8000_0002..=0x8000_0004 {
755         cpuid.retain(|c| c.function != i);
756         // SAFETY: call cpuid with valid leaves
757         let leaf = unsafe { std::arch::x86_64::__cpuid(i) };
758         cpuid.push(CpuIdEntry {
759             function: i,
760             eax: leaf.eax,
761             ebx: leaf.ebx,
762             ecx: leaf.ecx,
763             edx: leaf.edx,
764             ..Default::default()
765         });
766     }
767 
768     if config.kvm_hyperv {
769         // Remove conflicting entries
770         cpuid.retain(|c| c.function != 0x4000_0000);
771         cpuid.retain(|c| c.function != 0x4000_0001);
772         // See "Hypervisor Top Level Functional Specification" for details
773         // Compliance with "Hv#1" requires leaves up to 0x4000_000a
774         cpuid.push(CpuIdEntry {
775             function: 0x40000000,
776             eax: 0x4000000a, // Maximum cpuid leaf
777             ebx: 0x756e694c, // "Linu"
778             ecx: 0x564b2078, // "x KV"
779             edx: 0x7648204d, // "M Hv"
780             ..Default::default()
781         });
782         cpuid.push(CpuIdEntry {
783             function: 0x40000001,
784             eax: 0x31237648, // "Hv#1"
785             ..Default::default()
786         });
787         cpuid.push(CpuIdEntry {
788             function: 0x40000002,
789             eax: 0x3839,  // "Build number"
790             ebx: 0xa0000, // "Version"
791             ..Default::default()
792         });
793         cpuid.push(CpuIdEntry {
794             function: 0x4000_0003,
795             eax: 1 << 1 // AccessPartitionReferenceCounter
796                    | 1 << 2 // AccessSynicRegs
797                    | 1 << 3 // AccessSyntheticTimerRegs
798                    | 1 << 9, // AccessPartitionReferenceTsc
799             edx: 1 << 3, // CPU dynamic partitioning
800             ..Default::default()
801         });
802         cpuid.push(CpuIdEntry {
803             function: 0x4000_0004,
804             eax: 1 << 5, // Recommend relaxed timing
805             ..Default::default()
806         });
807         for i in 0x4000_0005..=0x4000_000a {
808             cpuid.push(CpuIdEntry {
809                 function: i,
810                 ..Default::default()
811             });
812         }
813     }
814 
815     Ok(cpuid)
816 }
817 
818 pub fn configure_vcpu(
819     vcpu: &Arc<dyn hypervisor::Vcpu>,
820     id: u8,
821     boot_setup: Option<(EntryPoint, &GuestMemoryAtomic<GuestMemoryMmap>)>,
822     cpuid: Vec<CpuIdEntry>,
823     kvm_hyperv: bool,
824     cpu_vendor: CpuVendor,
825     topology: Option<(u8, u8, u8)>,
826 ) -> super::Result<()> {
827     let x2apic_id = get_x2apic_id(id as u32, topology);
828 
829     // Per vCPU CPUID changes; common are handled via generate_common_cpuid()
830     let mut cpuid = cpuid;
831     CpuidPatch::set_cpuid_reg(&mut cpuid, 0xb, None, CpuidReg::EDX, x2apic_id);
832     CpuidPatch::set_cpuid_reg(&mut cpuid, 0x1f, None, CpuidReg::EDX, x2apic_id);
833     if matches!(cpu_vendor, CpuVendor::AMD) {
834         CpuidPatch::set_cpuid_reg(&mut cpuid, 0x8000_001e, Some(0), CpuidReg::EAX, x2apic_id);
835     }
836 
837     // Set ApicId in cpuid for each vcpu - found in cpuid ebx when eax = 1
838     let mut apic_id_patched = false;
839     for entry in &mut cpuid {
840         if entry.function == 1 {
841             entry.ebx &= 0xffffff;
842             entry.ebx |= x2apic_id << 24;
843             apic_id_patched = true;
844             break;
845         }
846     }
847     assert!(apic_id_patched);
848 
849     if let Some(t) = topology {
850         update_cpuid_topology(&mut cpuid, t.0, t.1, t.2, cpu_vendor, id);
851     }
852 
853     // The TSC frequency CPUID leaf should not be included when running with HyperV emulation
854     if !kvm_hyperv {
855         if let Some(tsc_khz) = vcpu.tsc_khz().map_err(Error::GetTscFrequency)? {
856             // Need to check that the TSC doesn't vary with dynamic frequency
857             // SAFETY: cpuid called with valid leaves
858             if unsafe { std::arch::x86_64::__cpuid(0x8000_0007) }.edx
859                 & (1u32 << INVARIANT_TSC_EDX_BIT)
860                 > 0
861             {
862                 CpuidPatch::set_cpuid_reg(
863                     &mut cpuid,
864                     0x4000_0000,
865                     None,
866                     CpuidReg::EAX,
867                     0x4000_0010,
868                 );
869                 cpuid.retain(|c| c.function != 0x4000_0010);
870                 cpuid.push(CpuIdEntry {
871                     function: 0x4000_0010,
872                     eax: tsc_khz,
873                     ebx: 1000000, /* LAPIC resolution of 1ns (freq: 1GHz) is hardcoded in KVM's
874                                    * APIC_BUS_CYCLE_NS */
875                     ..Default::default()
876                 });
877             };
878         }
879     }
880 
881     vcpu.set_cpuid2(&cpuid)
882         .map_err(|e| Error::SetSupportedCpusFailed(e.into()))?;
883 
884     if kvm_hyperv {
885         vcpu.enable_hyperv_synic().unwrap();
886     }
887 
888     regs::setup_msrs(vcpu).map_err(Error::MsrsConfiguration)?;
889     if let Some((kernel_entry_point, guest_memory)) = boot_setup {
890         regs::setup_regs(vcpu, kernel_entry_point).map_err(Error::RegsConfiguration)?;
891         regs::setup_fpu(vcpu).map_err(Error::FpuConfiguration)?;
892         regs::setup_sregs(&guest_memory.memory(), vcpu).map_err(Error::SregsConfiguration)?;
893     }
894     interrupts::set_lint(vcpu).map_err(|e| Error::LocalIntConfiguration(e.into()))?;
895     Ok(())
896 }
897 
898 /// Returns a Vec of the valid memory addresses.
899 /// These should be used to configure the GuestMemory structure for the platform.
900 /// For x86_64 all addresses are valid from the start of the kernel except a
901 /// carve out at the end of 32bit address space.
902 pub fn arch_memory_regions() -> Vec<(GuestAddress, usize, RegionType)> {
903     vec![
904         // 0 GiB ~ 3GiB: memory before the gap
905         (
906             GuestAddress(0),
907             layout::MEM_32BIT_RESERVED_START.raw_value() as usize,
908             RegionType::Ram,
909         ),
910         // 4 GiB ~ inf: memory after the gap
911         (layout::RAM_64BIT_START, usize::MAX, RegionType::Ram),
912         // 3 GiB ~ 3712 MiB: 32-bit device memory hole
913         (
914             layout::MEM_32BIT_RESERVED_START,
915             layout::MEM_32BIT_DEVICES_SIZE as usize,
916             RegionType::SubRegion,
917         ),
918         // 3712 MiB ~ 3968 MiB: 32-bit reserved memory hole
919         (
920             layout::MEM_32BIT_RESERVED_START.unchecked_add(layout::MEM_32BIT_DEVICES_SIZE),
921             (layout::MEM_32BIT_RESERVED_SIZE - layout::MEM_32BIT_DEVICES_SIZE) as usize,
922             RegionType::Reserved,
923         ),
924     ]
925 }
926 
927 /// Configures the system and should be called once per vm before starting vcpu threads.
928 ///
929 /// # Arguments
930 ///
931 /// * `guest_mem` - The memory to be used by the guest.
932 /// * `cmdline_addr` - Address in `guest_mem` where the kernel command line was loaded.
933 /// * `cmdline_size` - Size of the kernel command line in bytes including the null terminator.
934 /// * `num_cpus` - Number of virtual CPUs the guest will have.
935 #[allow(clippy::too_many_arguments)]
936 pub fn configure_system(
937     guest_mem: &GuestMemoryMmap,
938     cmdline_addr: GuestAddress,
939     cmdline_size: usize,
940     initramfs: &Option<InitramfsConfig>,
941     _num_cpus: u8,
942     setup_header: Option<setup_header>,
943     rsdp_addr: Option<GuestAddress>,
944     sgx_epc_region: Option<SgxEpcRegion>,
945     serial_number: Option<&str>,
946     uuid: Option<&str>,
947     oem_strings: Option<&[&str]>,
948     topology: Option<(u8, u8, u8)>,
949 ) -> super::Result<()> {
950     // Write EBDA address to location where ACPICA expects to find it
951     guest_mem
952         .write_obj((layout::EBDA_START.0 >> 4) as u16, layout::EBDA_POINTER)
953         .map_err(Error::EbdaSetup)?;
954 
955     let size = smbios::setup_smbios(guest_mem, serial_number, uuid, oem_strings)
956         .map_err(Error::SmbiosSetup)?;
957 
958     // Place the MP table after the SMIOS table aligned to 16 bytes
959     let offset = GuestAddress(layout::SMBIOS_START).unchecked_add(size);
960     let offset = GuestAddress((offset.0 + 16) & !0xf);
961     mptable::setup_mptable(offset, guest_mem, _num_cpus, topology).map_err(Error::MpTableSetup)?;
962 
963     // Check that the RAM is not smaller than the RSDP start address
964     if let Some(rsdp_addr) = rsdp_addr {
965         if rsdp_addr.0 > guest_mem.last_addr().0 {
966             return Err(super::Error::RsdpPastRamEnd);
967         }
968     }
969 
970     match setup_header {
971         Some(hdr) => configure_32bit_entry(
972             guest_mem,
973             cmdline_addr,
974             cmdline_size,
975             initramfs,
976             hdr,
977             rsdp_addr,
978             sgx_epc_region,
979         ),
980         None => configure_pvh(
981             guest_mem,
982             cmdline_addr,
983             initramfs,
984             rsdp_addr,
985             sgx_epc_region,
986         ),
987     }
988 }
989 
990 type RamRange = (u64, u64);
991 
992 /// Returns usable physical memory ranges for the guest
993 /// These should be used to create e820_RAM memory maps
994 pub fn generate_ram_ranges(guest_mem: &GuestMemoryMmap) -> super::Result<Vec<RamRange>> {
995     // Merge continuous memory regions into one region.
996     // Note: memory regions from "GuestMemory" are sorted and non-zero sized.
997     let ram_regions = {
998         let mut ram_regions = Vec::new();
999         let mut current_start = guest_mem
1000             .iter()
1001             .next()
1002             .map(GuestMemoryRegion::start_addr)
1003             .expect("GuestMemory must have one memory region at least")
1004             .raw_value();
1005         let mut current_end = current_start;
1006 
1007         for (start, size) in guest_mem
1008             .iter()
1009             .map(|m| (m.start_addr().raw_value(), m.len()))
1010         {
1011             if current_end == start {
1012                 // This zone is continuous with the previous one.
1013                 current_end += size;
1014             } else {
1015                 ram_regions.push((current_start, current_end));
1016 
1017                 current_start = start;
1018                 current_end = start + size;
1019             }
1020         }
1021 
1022         ram_regions.push((current_start, current_end));
1023 
1024         ram_regions
1025     };
1026 
1027     // Create the memory map entry for memory region before the gap
1028     let mut ram_ranges = vec![];
1029 
1030     // Generate the first usable physical memory range before the gap. The e820 map
1031     // should only report memory above 1MiB.
1032     let first_ram_range = {
1033         let (first_region_start, first_region_end) =
1034             ram_regions.first().ok_or(super::Error::MemmapTableSetup)?;
1035         let high_ram_start = layout::HIGH_RAM_START.raw_value();
1036         let mem_32bit_reserved_start = layout::MEM_32BIT_RESERVED_START.raw_value();
1037 
1038         if !((first_region_start <= &high_ram_start)
1039             && (first_region_end > &high_ram_start)
1040             && (first_region_end <= &mem_32bit_reserved_start))
1041         {
1042             error!(
1043                 "Unexpected first memory region layout: (start: 0x{:08x}, end: 0x{:08x}).
1044                 high_ram_start: 0x{:08x}, mem_32bit_reserved_start: 0x{:08x}",
1045                 first_region_start, first_region_end, high_ram_start, mem_32bit_reserved_start
1046             );
1047 
1048             return Err(super::Error::MemmapTableSetup);
1049         }
1050 
1051         info!(
1052             "first usable physical memory range, start: 0x{:08x}, end: 0x{:08x}",
1053             high_ram_start, first_region_end
1054         );
1055 
1056         (high_ram_start, *first_region_end)
1057     };
1058     ram_ranges.push(first_ram_range);
1059 
1060     // Generate additional usable physical memory range after the gap if any.
1061     for ram_region in ram_regions.iter().skip(1) {
1062         info!(
1063             "found usable physical memory range, start: 0x{:08x}, end: 0x{:08x}",
1064             ram_region.0, ram_region.1
1065         );
1066 
1067         ram_ranges.push(*ram_region);
1068     }
1069 
1070     Ok(ram_ranges)
1071 }
1072 
1073 fn configure_pvh(
1074     guest_mem: &GuestMemoryMmap,
1075     cmdline_addr: GuestAddress,
1076     initramfs: &Option<InitramfsConfig>,
1077     rsdp_addr: Option<GuestAddress>,
1078     sgx_epc_region: Option<SgxEpcRegion>,
1079 ) -> super::Result<()> {
1080     const XEN_HVM_START_MAGIC_VALUE: u32 = 0x336ec578;
1081 
1082     let mut start_info = hvm_start_info {
1083         magic: XEN_HVM_START_MAGIC_VALUE,
1084         version: 1, // pvh has version 1
1085         nr_modules: 0,
1086         cmdline_paddr: cmdline_addr.raw_value(),
1087         memmap_paddr: layout::MEMMAP_START.raw_value(),
1088         ..Default::default()
1089     };
1090 
1091     if let Some(rsdp_addr) = rsdp_addr {
1092         start_info.rsdp_paddr = rsdp_addr.0;
1093     }
1094 
1095     if let Some(initramfs_config) = initramfs {
1096         // The initramfs has been written to guest memory already, here we just need to
1097         // create the module structure that describes it.
1098         let ramdisk_mod = hvm_modlist_entry {
1099             paddr: initramfs_config.address.raw_value(),
1100             size: initramfs_config.size as u64,
1101             ..Default::default()
1102         };
1103 
1104         start_info.nr_modules += 1;
1105         start_info.modlist_paddr = layout::MODLIST_START.raw_value();
1106 
1107         // Write the modlist struct to guest memory.
1108         guest_mem
1109             .write_obj(ramdisk_mod, layout::MODLIST_START)
1110             .map_err(super::Error::ModlistSetup)?;
1111     }
1112 
1113     // Vector to hold the memory maps which needs to be written to guest memory
1114     // at MEMMAP_START after all of the mappings are recorded.
1115     let mut memmap: Vec<hvm_memmap_table_entry> = Vec::new();
1116 
1117     // Create the memory map entries.
1118     add_memmap_entry(&mut memmap, 0, layout::EBDA_START.raw_value(), E820_RAM);
1119 
1120     // Get usable physical memory ranges
1121     let ram_ranges = generate_ram_ranges(guest_mem)?;
1122 
1123     // Create e820 memory map entries
1124     for ram_range in ram_ranges {
1125         info!(
1126             "create_memmap_entry, start: 0x{:08x}, end: 0x{:08x}",
1127             ram_range.0, ram_range.1
1128         );
1129         add_memmap_entry(
1130             &mut memmap,
1131             ram_range.0,
1132             ram_range.1 - ram_range.0,
1133             E820_RAM,
1134         );
1135     }
1136 
1137     add_memmap_entry(
1138         &mut memmap,
1139         layout::PCI_MMCONFIG_START.0,
1140         layout::PCI_MMCONFIG_SIZE,
1141         E820_RESERVED,
1142     );
1143 
1144     if let Some(sgx_epc_region) = sgx_epc_region {
1145         add_memmap_entry(
1146             &mut memmap,
1147             sgx_epc_region.start().raw_value(),
1148             sgx_epc_region.size(),
1149             E820_RESERVED,
1150         );
1151     }
1152 
1153     start_info.memmap_entries = memmap.len() as u32;
1154 
1155     // Copy the vector with the memmap table to the MEMMAP_START address
1156     // which is already saved in the memmap_paddr field of hvm_start_info struct.
1157     let mut memmap_start_addr = layout::MEMMAP_START;
1158 
1159     guest_mem
1160         .checked_offset(
1161             memmap_start_addr,
1162             mem::size_of::<hvm_memmap_table_entry>() * start_info.memmap_entries as usize,
1163         )
1164         .ok_or(super::Error::MemmapTablePastRamEnd)?;
1165 
1166     // For every entry in the memmap vector, write it to guest memory.
1167     for memmap_entry in memmap {
1168         guest_mem
1169             .write_obj(memmap_entry, memmap_start_addr)
1170             .map_err(|_| super::Error::MemmapTableSetup)?;
1171         memmap_start_addr =
1172             memmap_start_addr.unchecked_add(mem::size_of::<hvm_memmap_table_entry>() as u64);
1173     }
1174 
1175     // The hvm_start_info struct itself must be stored at PVH_START_INFO
1176     // address, and %rbx will be initialized to contain PVH_INFO_START prior to
1177     // starting the guest, as required by the PVH ABI.
1178     let start_info_addr = layout::PVH_INFO_START;
1179 
1180     guest_mem
1181         .checked_offset(start_info_addr, mem::size_of::<hvm_start_info>())
1182         .ok_or(super::Error::StartInfoPastRamEnd)?;
1183 
1184     // Write the start_info struct to guest memory.
1185     guest_mem
1186         .write_obj(start_info, start_info_addr)
1187         .map_err(|_| super::Error::StartInfoSetup)?;
1188 
1189     Ok(())
1190 }
1191 
1192 fn configure_32bit_entry(
1193     guest_mem: &GuestMemoryMmap,
1194     cmdline_addr: GuestAddress,
1195     cmdline_size: usize,
1196     initramfs: &Option<InitramfsConfig>,
1197     setup_hdr: setup_header,
1198     rsdp_addr: Option<GuestAddress>,
1199     sgx_epc_region: Option<SgxEpcRegion>,
1200 ) -> super::Result<()> {
1201     const KERNEL_LOADER_OTHER: u8 = 0xff;
1202 
1203     // Use the provided setup header
1204     let mut params = boot_params {
1205         hdr: setup_hdr,
1206         ..Default::default()
1207     };
1208 
1209     // Common bootparams settings
1210     if params.hdr.type_of_loader == 0 {
1211         params.hdr.type_of_loader = KERNEL_LOADER_OTHER;
1212     }
1213     params.hdr.cmd_line_ptr = cmdline_addr.raw_value() as u32;
1214     params.hdr.cmdline_size = cmdline_size as u32;
1215 
1216     if let Some(initramfs_config) = initramfs {
1217         params.hdr.ramdisk_image = initramfs_config.address.raw_value() as u32;
1218         params.hdr.ramdisk_size = initramfs_config.size as u32;
1219     }
1220 
1221     add_e820_entry(&mut params, 0, layout::EBDA_START.raw_value(), E820_RAM)?;
1222 
1223     let mem_end = guest_mem.last_addr();
1224     if mem_end < layout::MEM_32BIT_RESERVED_START {
1225         add_e820_entry(
1226             &mut params,
1227             layout::HIGH_RAM_START.raw_value(),
1228             mem_end.unchecked_offset_from(layout::HIGH_RAM_START) + 1,
1229             E820_RAM,
1230         )?;
1231     } else {
1232         add_e820_entry(
1233             &mut params,
1234             layout::HIGH_RAM_START.raw_value(),
1235             layout::MEM_32BIT_RESERVED_START.unchecked_offset_from(layout::HIGH_RAM_START),
1236             E820_RAM,
1237         )?;
1238         if mem_end > layout::RAM_64BIT_START {
1239             add_e820_entry(
1240                 &mut params,
1241                 layout::RAM_64BIT_START.raw_value(),
1242                 mem_end.unchecked_offset_from(layout::RAM_64BIT_START) + 1,
1243                 E820_RAM,
1244             )?;
1245         }
1246     }
1247 
1248     add_e820_entry(
1249         &mut params,
1250         layout::PCI_MMCONFIG_START.0,
1251         layout::PCI_MMCONFIG_SIZE,
1252         E820_RESERVED,
1253     )?;
1254 
1255     if let Some(sgx_epc_region) = sgx_epc_region {
1256         add_e820_entry(
1257             &mut params,
1258             sgx_epc_region.start().raw_value(),
1259             sgx_epc_region.size(),
1260             E820_RESERVED,
1261         )?;
1262     }
1263 
1264     if let Some(rsdp_addr) = rsdp_addr {
1265         params.acpi_rsdp_addr = rsdp_addr.0;
1266     }
1267 
1268     let zero_page_addr = layout::ZERO_PAGE_START;
1269     guest_mem
1270         .checked_offset(zero_page_addr, mem::size_of::<boot_params>())
1271         .ok_or(super::Error::ZeroPagePastRamEnd)?;
1272     guest_mem
1273         .write_obj(params, zero_page_addr)
1274         .map_err(super::Error::ZeroPageSetup)?;
1275 
1276     Ok(())
1277 }
1278 
1279 /// Add an e820 region to the e820 map.
1280 /// Returns Ok(()) if successful, or an error if there is no space left in the map.
1281 fn add_e820_entry(
1282     params: &mut boot_params,
1283     addr: u64,
1284     size: u64,
1285     mem_type: u32,
1286 ) -> Result<(), Error> {
1287     if params.e820_entries >= params.e820_table.len() as u8 {
1288         return Err(Error::E820Configuration);
1289     }
1290 
1291     params.e820_table[params.e820_entries as usize].addr = addr;
1292     params.e820_table[params.e820_entries as usize].size = size;
1293     params.e820_table[params.e820_entries as usize].type_ = mem_type;
1294     params.e820_entries += 1;
1295 
1296     Ok(())
1297 }
1298 
1299 fn add_memmap_entry(memmap: &mut Vec<hvm_memmap_table_entry>, addr: u64, size: u64, mem_type: u32) {
1300     // Add the table entry to the vector
1301     memmap.push(hvm_memmap_table_entry {
1302         addr,
1303         size,
1304         type_: mem_type,
1305         reserved: 0,
1306     });
1307 }
1308 
1309 /// Returns the memory address where the initramfs could be loaded.
1310 pub fn initramfs_load_addr(
1311     guest_mem: &GuestMemoryMmap,
1312     initramfs_size: usize,
1313 ) -> super::Result<u64> {
1314     let first_region = guest_mem
1315         .find_region(GuestAddress::new(0))
1316         .ok_or(super::Error::InitramfsAddress)?;
1317     // It's safe to cast to usize because the size of a region can't be greater than usize.
1318     let lowmem_size = first_region.len() as usize;
1319 
1320     if lowmem_size < initramfs_size {
1321         return Err(super::Error::InitramfsAddress);
1322     }
1323 
1324     let aligned_addr: u64 = ((lowmem_size - initramfs_size) & !(crate::pagesize() - 1)) as u64;
1325     Ok(aligned_addr)
1326 }
1327 
1328 pub fn get_host_cpu_phys_bits(hypervisor: &Arc<dyn hypervisor::Hypervisor>) -> u8 {
1329     // SAFETY: call cpuid with valid leaves
1330     unsafe {
1331         let leaf = x86_64::__cpuid(0x8000_0000);
1332 
1333         // Detect and handle AMD SME (Secure Memory Encryption) properly.
1334         // Some physical address bits may become reserved when the feature is enabled.
1335         // See AMD64 Architecture Programmer's Manual Volume 2, Section 7.10.1
1336         let reduced = if leaf.eax >= 0x8000_001f
1337             && matches!(hypervisor.get_cpu_vendor(), CpuVendor::AMD)
1338             && x86_64::__cpuid(0x8000_001f).eax & 0x1 != 0
1339         {
1340             (x86_64::__cpuid(0x8000_001f).ebx >> 6) & 0x3f
1341         } else {
1342             0
1343         };
1344 
1345         if leaf.eax >= 0x8000_0008 {
1346             let leaf = x86_64::__cpuid(0x8000_0008);
1347             ((leaf.eax & 0xff) - reduced) as u8
1348         } else {
1349             36
1350         }
1351     }
1352 }
1353 
1354 fn update_cpuid_topology(
1355     cpuid: &mut Vec<CpuIdEntry>,
1356     threads_per_core: u8,
1357     cores_per_die: u8,
1358     dies_per_package: u8,
1359     cpu_vendor: CpuVendor,
1360     id: u8,
1361 ) {
1362     let x2apic_id = get_x2apic_id(
1363         id as u32,
1364         Some((threads_per_core, cores_per_die, dies_per_package)),
1365     );
1366 
1367     let thread_width = 8 - (threads_per_core - 1).leading_zeros();
1368     let core_width = (8 - (cores_per_die - 1).leading_zeros()) + thread_width;
1369     let die_width = (8 - (dies_per_package - 1).leading_zeros()) + core_width;
1370 
1371     let mut cpu_ebx = CpuidPatch::get_cpuid_reg(cpuid, 0x1, None, CpuidReg::EBX).unwrap_or(0);
1372     cpu_ebx |= ((dies_per_package as u32) * (cores_per_die as u32) * (threads_per_core as u32))
1373         & 0xff << 16;
1374     CpuidPatch::set_cpuid_reg(cpuid, 0x1, None, CpuidReg::EBX, cpu_ebx);
1375 
1376     let mut cpu_edx = CpuidPatch::get_cpuid_reg(cpuid, 0x1, None, CpuidReg::EDX).unwrap_or(0);
1377     cpu_edx |= 1 << 28;
1378     CpuidPatch::set_cpuid_reg(cpuid, 0x1, None, CpuidReg::EDX, cpu_edx);
1379 
1380     // CPU Topology leaf 0xb
1381     CpuidPatch::set_cpuid_reg(cpuid, 0xb, Some(0), CpuidReg::EAX, thread_width);
1382     CpuidPatch::set_cpuid_reg(
1383         cpuid,
1384         0xb,
1385         Some(0),
1386         CpuidReg::EBX,
1387         u32::from(threads_per_core),
1388     );
1389     CpuidPatch::set_cpuid_reg(cpuid, 0xb, Some(0), CpuidReg::ECX, 1 << 8);
1390 
1391     CpuidPatch::set_cpuid_reg(cpuid, 0xb, Some(1), CpuidReg::EAX, die_width);
1392     CpuidPatch::set_cpuid_reg(
1393         cpuid,
1394         0xb,
1395         Some(1),
1396         CpuidReg::EBX,
1397         u32::from(dies_per_package * cores_per_die * threads_per_core),
1398     );
1399     CpuidPatch::set_cpuid_reg(cpuid, 0xb, Some(1), CpuidReg::ECX, 2 << 8);
1400 
1401     // CPU Topology leaf 0x1f
1402     CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(0), CpuidReg::EAX, thread_width);
1403     CpuidPatch::set_cpuid_reg(
1404         cpuid,
1405         0x1f,
1406         Some(0),
1407         CpuidReg::EBX,
1408         u32::from(threads_per_core),
1409     );
1410     CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(0), CpuidReg::ECX, 1 << 8);
1411 
1412     CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(1), CpuidReg::EAX, core_width);
1413     CpuidPatch::set_cpuid_reg(
1414         cpuid,
1415         0x1f,
1416         Some(1),
1417         CpuidReg::EBX,
1418         u32::from(cores_per_die * threads_per_core),
1419     );
1420     CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(1), CpuidReg::ECX, 2 << 8);
1421 
1422     CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(2), CpuidReg::EAX, die_width);
1423     CpuidPatch::set_cpuid_reg(
1424         cpuid,
1425         0x1f,
1426         Some(2),
1427         CpuidReg::EBX,
1428         u32::from(dies_per_package * cores_per_die * threads_per_core),
1429     );
1430     CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(2), CpuidReg::ECX, 5 << 8);
1431 
1432     if matches!(cpu_vendor, CpuVendor::AMD) {
1433         CpuidPatch::set_cpuid_reg(
1434             cpuid,
1435             0x8000_001e,
1436             Some(0),
1437             CpuidReg::EBX,
1438             ((threads_per_core as u32 - 1) << 8) | (x2apic_id & 0xff),
1439         );
1440         CpuidPatch::set_cpuid_reg(
1441             cpuid,
1442             0x8000_001e,
1443             Some(0),
1444             CpuidReg::ECX,
1445             ((dies_per_package as u32 - 1) << 8) | (thread_width + die_width) & 0xff,
1446         );
1447         CpuidPatch::set_cpuid_reg(cpuid, 0x8000_001e, Some(0), CpuidReg::EDX, 0);
1448         if cores_per_die * threads_per_core > 1 {
1449             let ecx =
1450                 CpuidPatch::get_cpuid_reg(cpuid, 0x8000_0001, Some(0), CpuidReg::ECX).unwrap_or(0);
1451             CpuidPatch::set_cpuid_reg(
1452                 cpuid,
1453                 0x8000_0001,
1454                 Some(0),
1455                 CpuidReg::ECX,
1456                 ecx | (1u32 << 1) | (1u32 << 22),
1457             );
1458             CpuidPatch::set_cpuid_reg(
1459                 cpuid,
1460                 0x0000_0001,
1461                 Some(0),
1462                 CpuidReg::EBX,
1463                 (x2apic_id << 24) | (8 << 8) | (((cores_per_die * threads_per_core) as u32) << 16),
1464             );
1465             let cpuid_patches = vec![
1466                 // Patch tsc deadline timer bit
1467                 CpuidPatch {
1468                     function: 1,
1469                     index: 0,
1470                     flags_bit: None,
1471                     eax_bit: None,
1472                     ebx_bit: None,
1473                     ecx_bit: None,
1474                     edx_bit: Some(28),
1475                 },
1476             ];
1477             CpuidPatch::patch_cpuid(cpuid, cpuid_patches);
1478             CpuidPatch::set_cpuid_reg(
1479                 cpuid,
1480                 0x8000_0008,
1481                 Some(0),
1482                 CpuidReg::ECX,
1483                 ((thread_width + core_width + die_width) << 12)
1484                     | ((cores_per_die * threads_per_core) - 1) as u32,
1485             );
1486         } else {
1487             CpuidPatch::set_cpuid_reg(cpuid, 0x8000_0008, Some(0), CpuidReg::ECX, 0u32);
1488         }
1489     }
1490 }
1491 
1492 // The goal is to update the CPUID sub-leaves to reflect the number of EPC
1493 // sections exposed to the guest.
1494 fn update_cpuid_sgx(
1495     cpuid: &mut Vec<CpuIdEntry>,
1496     epc_sections: &[SgxEpcSection],
1497 ) -> Result<(), Error> {
1498     // Something's wrong if there's no EPC section.
1499     if epc_sections.is_empty() {
1500         return Err(Error::NoSgxEpcSection);
1501     }
1502     // We can't go further if the hypervisor does not support SGX feature.
1503     if !CpuidPatch::is_feature_enabled(cpuid, 0x7, 0, CpuidReg::EBX, 2) {
1504         return Err(Error::MissingSgxFeature);
1505     }
1506     // We can't go further if the hypervisor does not support SGX_LC feature.
1507     if !CpuidPatch::is_feature_enabled(cpuid, 0x7, 0, CpuidReg::ECX, 30) {
1508         return Err(Error::MissingSgxLaunchControlFeature);
1509     }
1510 
1511     // Get host CPUID for leaf 0x12, subleaf 0x2. This is to retrieve EPC
1512     // properties such as confidentiality and integrity.
1513     // SAFETY: call cpuid with valid leaves
1514     let leaf = unsafe { std::arch::x86_64::__cpuid_count(0x12, 0x2) };
1515 
1516     for (i, epc_section) in epc_sections.iter().enumerate() {
1517         let subleaf_idx = i + 2;
1518         let start = epc_section.start().raw_value();
1519         let size = epc_section.size();
1520         let eax = (start & 0xffff_f000) as u32 | 0x1;
1521         let ebx = (start >> 32) as u32;
1522         let ecx = (size & 0xffff_f000) as u32 | (leaf.ecx & 0xf);
1523         let edx = (size >> 32) as u32;
1524         // CPU Topology leaf 0x12
1525         CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EAX, eax);
1526         CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EBX, ebx);
1527         CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::ECX, ecx);
1528         CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EDX, edx);
1529     }
1530 
1531     // Add one NULL entry to terminate the dynamic list
1532     let subleaf_idx = epc_sections.len() + 2;
1533     // CPU Topology leaf 0x12
1534     CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EAX, 0);
1535     CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EBX, 0);
1536     CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::ECX, 0);
1537     CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EDX, 0);
1538 
1539     Ok(())
1540 }
1541 
1542 #[cfg(test)]
1543 mod tests {
1544     use super::*;
1545     use linux_loader::loader::bootparam::boot_e820_entry;
1546 
1547     #[test]
1548     fn regions_base_addr() {
1549         let regions = arch_memory_regions();
1550         assert_eq!(4, regions.len());
1551         assert_eq!(GuestAddress(0), regions[0].0);
1552         assert_eq!(GuestAddress(1 << 32), regions[1].0);
1553     }
1554 
1555     #[test]
1556     fn test_system_configuration() {
1557         let no_vcpus = 4;
1558         let gm = GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap();
1559         let config_err = configure_system(
1560             &gm,
1561             GuestAddress(0),
1562             0,
1563             &None,
1564             1,
1565             None,
1566             Some(layout::RSDP_POINTER),
1567             None,
1568             None,
1569             None,
1570             None,
1571             None,
1572         );
1573         assert!(config_err.is_err());
1574 
1575         // Now assigning some memory that falls before the 32bit memory hole.
1576         let arch_mem_regions = arch_memory_regions();
1577         let ram_regions: Vec<(GuestAddress, usize)> = arch_mem_regions
1578             .iter()
1579             .filter(|r| r.2 == RegionType::Ram && r.1 != usize::MAX)
1580             .map(|r| (r.0, r.1))
1581             .collect();
1582         let gm = GuestMemoryMmap::from_ranges(&ram_regions).unwrap();
1583 
1584         configure_system(
1585             &gm,
1586             GuestAddress(0),
1587             0,
1588             &None,
1589             no_vcpus,
1590             None,
1591             None,
1592             None,
1593             None,
1594             None,
1595             None,
1596             None,
1597         )
1598         .unwrap();
1599 
1600         // Now assigning some memory that falls after the 32bit memory hole.
1601         let arch_mem_regions = arch_memory_regions();
1602         let ram_regions: Vec<(GuestAddress, usize)> = arch_mem_regions
1603             .iter()
1604             .filter(|r| r.2 == RegionType::Ram)
1605             .map(|r| {
1606                 if r.1 == usize::MAX {
1607                     (r.0, 128 << 20)
1608                 } else {
1609                     (r.0, r.1)
1610                 }
1611             })
1612             .collect();
1613         let gm = GuestMemoryMmap::from_ranges(&ram_regions).unwrap();
1614         configure_system(
1615             &gm,
1616             GuestAddress(0),
1617             0,
1618             &None,
1619             no_vcpus,
1620             None,
1621             None,
1622             None,
1623             None,
1624             None,
1625             None,
1626             None,
1627         )
1628         .unwrap();
1629 
1630         configure_system(
1631             &gm,
1632             GuestAddress(0),
1633             0,
1634             &None,
1635             no_vcpus,
1636             None,
1637             None,
1638             None,
1639             None,
1640             None,
1641             None,
1642             None,
1643         )
1644         .unwrap();
1645     }
1646 
1647     #[test]
1648     fn test_add_e820_entry() {
1649         let e820_table = [(boot_e820_entry {
1650             addr: 0x1,
1651             size: 4,
1652             type_: 1,
1653         }); 128];
1654 
1655         let expected_params = boot_params {
1656             e820_table,
1657             e820_entries: 1,
1658             ..Default::default()
1659         };
1660 
1661         let mut params: boot_params = Default::default();
1662         add_e820_entry(
1663             &mut params,
1664             e820_table[0].addr,
1665             e820_table[0].size,
1666             e820_table[0].type_,
1667         )
1668         .unwrap();
1669         assert_eq!(
1670             format!("{:?}", params.e820_table[0]),
1671             format!("{:?}", expected_params.e820_table[0])
1672         );
1673         assert_eq!(params.e820_entries, expected_params.e820_entries);
1674 
1675         // Exercise the scenario where the field storing the length of the e820 entry table is
1676         // is bigger than the allocated memory.
1677         params.e820_entries = params.e820_table.len() as u8 + 1;
1678         assert!(add_e820_entry(
1679             &mut params,
1680             e820_table[0].addr,
1681             e820_table[0].size,
1682             e820_table[0].type_
1683         )
1684         .is_err());
1685     }
1686 
1687     #[test]
1688     fn test_add_memmap_entry() {
1689         let mut memmap: Vec<hvm_memmap_table_entry> = Vec::new();
1690 
1691         let expected_memmap = vec![
1692             hvm_memmap_table_entry {
1693                 addr: 0x0,
1694                 size: 0x1000,
1695                 type_: E820_RAM,
1696                 ..Default::default()
1697             },
1698             hvm_memmap_table_entry {
1699                 addr: 0x10000,
1700                 size: 0xa000,
1701                 type_: E820_RESERVED,
1702                 ..Default::default()
1703             },
1704         ];
1705 
1706         add_memmap_entry(&mut memmap, 0, 0x1000, E820_RAM);
1707         add_memmap_entry(&mut memmap, 0x10000, 0xa000, E820_RESERVED);
1708 
1709         assert_eq!(format!("{memmap:?}"), format!("{expected_memmap:?}"));
1710     }
1711 
1712     #[test]
1713     fn test_get_x2apic_id() {
1714         let x2apic_id = get_x2apic_id(0, Some((2, 3, 1)));
1715         assert_eq!(x2apic_id, 0);
1716 
1717         let x2apic_id = get_x2apic_id(1, Some((2, 3, 1)));
1718         assert_eq!(x2apic_id, 1);
1719 
1720         let x2apic_id = get_x2apic_id(2, Some((2, 3, 1)));
1721         assert_eq!(x2apic_id, 2);
1722 
1723         let x2apic_id = get_x2apic_id(6, Some((2, 3, 1)));
1724         assert_eq!(x2apic_id, 8);
1725 
1726         let x2apic_id = get_x2apic_id(7, Some((2, 3, 1)));
1727         assert_eq!(x2apic_id, 9);
1728 
1729         let x2apic_id = get_x2apic_id(8, Some((2, 3, 1)));
1730         assert_eq!(x2apic_id, 10);
1731     }
1732 }
1733