1 // Copyright © 2020, Oracle and/or its affiliates.
2 //
3 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
4 // SPDX-License-Identifier: Apache-2.0
5 //
6 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE-BSD-3-Clause file.
9 use std::sync::Arc;
10 pub mod interrupts;
11 pub mod layout;
12 mod mpspec;
13 mod mptable;
14 pub mod regs;
15 use std::collections::BTreeMap;
16 use std::mem;
17
18 use hypervisor::arch::x86::{CpuIdEntry, CPUID_FLAG_VALID_INDEX};
19 use hypervisor::{CpuVendor, HypervisorCpuError, HypervisorError};
20 use linux_loader::loader::bootparam::{boot_params, setup_header};
21 use linux_loader::loader::elf::start_info::{
22 hvm_memmap_table_entry, hvm_modlist_entry, hvm_start_info,
23 };
24 use thiserror::Error;
25 use vm_memory::{
26 Address, Bytes, GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryAtomic,
27 GuestMemoryRegion, GuestUsize,
28 };
29
30 use crate::{GuestMemoryMmap, InitramfsConfig, RegionType};
31 mod smbios;
32 use std::arch::x86_64;
33 #[cfg(feature = "tdx")]
34 pub mod tdx;
35
36 // CPUID feature bits
37 #[cfg(feature = "kvm")]
38 const TSC_DEADLINE_TIMER_ECX_BIT: u8 = 24; // tsc deadline timer ecx bit.
39 const HYPERVISOR_ECX_BIT: u8 = 31; // Hypervisor ecx bit.
40 const MTRR_EDX_BIT: u8 = 12; // Hypervisor ecx bit.
41 const INVARIANT_TSC_EDX_BIT: u8 = 8; // Invariant TSC bit on 0x8000_0007 EDX
42 const AMX_BF16: u8 = 22; // AMX tile computation on bfloat16 numbers
43 const AMX_TILE: u8 = 24; // AMX tile load/store instructions
44 const AMX_INT8: u8 = 25; // AMX tile computation on 8-bit integers
45
46 // KVM feature bits
47 #[cfg(feature = "tdx")]
48 const KVM_FEATURE_CLOCKSOURCE_BIT: u8 = 0;
49 #[cfg(feature = "tdx")]
50 const KVM_FEATURE_CLOCKSOURCE2_BIT: u8 = 3;
51 #[cfg(feature = "tdx")]
52 const KVM_FEATURE_CLOCKSOURCE_STABLE_BIT: u8 = 24;
53 #[cfg(feature = "tdx")]
54 const KVM_FEATURE_ASYNC_PF_BIT: u8 = 4;
55 #[cfg(feature = "tdx")]
56 const KVM_FEATURE_ASYNC_PF_VMEXIT_BIT: u8 = 10;
57 #[cfg(feature = "tdx")]
58 const KVM_FEATURE_STEAL_TIME_BIT: u8 = 5;
59
60 pub const _NSIG: i32 = 65;
61
62 #[derive(Debug, Copy, Clone)]
63 /// Specifies the entry point address where the guest must start
64 /// executing code, as well as which of the supported boot protocols
65 /// is to be used to configure the guest initial state.
66 pub struct EntryPoint {
67 /// Address in guest memory where the guest must start execution
68 pub entry_addr: GuestAddress,
69 /// This field is used for bzImage to fill the zero page
70 pub setup_header: Option<setup_header>,
71 }
72
73 const E820_RAM: u32 = 1;
74 const E820_RESERVED: u32 = 2;
75
76 #[derive(Clone)]
77 pub struct SgxEpcSection {
78 start: GuestAddress,
79 size: GuestUsize,
80 }
81
82 impl SgxEpcSection {
new(start: GuestAddress, size: GuestUsize) -> Self83 pub fn new(start: GuestAddress, size: GuestUsize) -> Self {
84 SgxEpcSection { start, size }
85 }
start(&self) -> GuestAddress86 pub fn start(&self) -> GuestAddress {
87 self.start
88 }
size(&self) -> GuestUsize89 pub fn size(&self) -> GuestUsize {
90 self.size
91 }
92 }
93
94 #[derive(Clone)]
95 pub struct SgxEpcRegion {
96 start: GuestAddress,
97 size: GuestUsize,
98 epc_sections: BTreeMap<String, SgxEpcSection>,
99 }
100
101 impl SgxEpcRegion {
new(start: GuestAddress, size: GuestUsize) -> Self102 pub fn new(start: GuestAddress, size: GuestUsize) -> Self {
103 SgxEpcRegion {
104 start,
105 size,
106 epc_sections: BTreeMap::new(),
107 }
108 }
start(&self) -> GuestAddress109 pub fn start(&self) -> GuestAddress {
110 self.start
111 }
size(&self) -> GuestUsize112 pub fn size(&self) -> GuestUsize {
113 self.size
114 }
epc_sections(&self) -> &BTreeMap<String, SgxEpcSection>115 pub fn epc_sections(&self) -> &BTreeMap<String, SgxEpcSection> {
116 &self.epc_sections
117 }
insert(&mut self, id: String, epc_section: SgxEpcSection)118 pub fn insert(&mut self, id: String, epc_section: SgxEpcSection) {
119 self.epc_sections.insert(id, epc_section);
120 }
121 }
122
123 pub struct CpuidConfig {
124 pub sgx_epc_sections: Option<Vec<SgxEpcSection>>,
125 pub phys_bits: u8,
126 pub kvm_hyperv: bool,
127 #[cfg(feature = "tdx")]
128 pub tdx: bool,
129 pub amx: bool,
130 }
131
132 #[derive(Debug, Error)]
133 pub enum Error {
134 /// Error writing MP table to memory.
135 #[error("Error writing MP table to memory")]
136 MpTableSetup(#[source] mptable::Error),
137
138 /// Error configuring the general purpose registers
139 #[error("Error configuring the general purpose registers")]
140 RegsConfiguration(#[source] regs::Error),
141
142 /// Error configuring the special registers
143 #[error("Error configuring the special registers")]
144 SregsConfiguration(#[source] regs::Error),
145
146 /// Error configuring the floating point related registers
147 #[error("Error configuring the floating point related registers")]
148 FpuConfiguration(#[source] regs::Error),
149
150 /// Error configuring the MSR registers
151 #[error("Error configuring the MSR registers")]
152 MsrsConfiguration(#[source] regs::Error),
153
154 /// Failed to set supported CPUs.
155 #[error("Failed to set supported CPUs")]
156 SetSupportedCpusFailed(#[source] anyhow::Error),
157
158 /// Cannot set the local interruption due to bad configuration.
159 #[error("Cannot set the local interruption due to bad configuration")]
160 LocalIntConfiguration(#[source] anyhow::Error),
161
162 /// Error setting up SMBIOS table
163 #[error("Error setting up SMBIOS table")]
164 SmbiosSetup(#[source] smbios::Error),
165
166 /// Could not find any SGX EPC section
167 #[error("Could not find any SGX EPC section")]
168 NoSgxEpcSection,
169
170 /// Missing SGX CPU feature
171 #[error("Missing SGX CPU feature")]
172 MissingSgxFeature,
173
174 /// Missing SGX_LC CPU feature
175 #[error("Missing SGX_LC CPU feature")]
176 MissingSgxLaunchControlFeature,
177
178 /// Error getting supported CPUID through the hypervisor (kvm/mshv) API
179 #[error("Error getting supported CPUID through the hypervisor API")]
180 CpuidGetSupported(#[source] HypervisorError),
181
182 /// Error populating CPUID with KVM HyperV emulation details
183 #[error("Error populating CPUID with KVM HyperV emulation details")]
184 CpuidKvmHyperV(#[source] vmm_sys_util::fam::Error),
185
186 /// Error populating CPUID with CPU identification
187 #[error("Error populating CPUID with CPU identification")]
188 CpuidIdentification(#[source] vmm_sys_util::fam::Error),
189
190 /// Error checking CPUID compatibility
191 #[error("Error checking CPUID compatibility")]
192 CpuidCheckCompatibility,
193
194 // Error writing EBDA address
195 #[error("Error writing EBDA address")]
196 EbdaSetup(#[source] vm_memory::GuestMemoryError),
197
198 // Error getting CPU TSC frequency
199 #[error("Error getting CPU TSC frequency")]
200 GetTscFrequency(#[source] HypervisorCpuError),
201
202 /// Error retrieving TDX capabilities through the hypervisor (kvm/mshv) API
203 #[cfg(feature = "tdx")]
204 #[error("Error retrieving TDX capabilities through the hypervisor API")]
205 TdxCapabilities(#[source] HypervisorError),
206
207 /// Failed to configure E820 map for bzImage
208 #[error("Failed to configure E820 map for bzImage")]
209 E820Configuration,
210 }
211
get_x2apic_id(cpu_id: u32, topology: Option<(u8, u8, u8)>) -> u32212 pub fn get_x2apic_id(cpu_id: u32, topology: Option<(u8, u8, u8)>) -> u32 {
213 if let Some(t) = topology {
214 let thread_mask_width = u8::BITS - (t.0 - 1).leading_zeros();
215 let core_mask_width = u8::BITS - (t.1 - 1).leading_zeros();
216 let die_mask_width = u8::BITS - (t.2 - 1).leading_zeros();
217
218 let thread_id = cpu_id % (t.0 as u32);
219 let core_id = cpu_id / (t.0 as u32) % (t.1 as u32);
220 let die_id = cpu_id / ((t.0 * t.1) as u32) % (t.2 as u32);
221 let socket_id = cpu_id / ((t.0 * t.1 * t.2) as u32);
222
223 return thread_id
224 | (core_id << thread_mask_width)
225 | (die_id << (thread_mask_width + core_mask_width))
226 | (socket_id << (thread_mask_width + core_mask_width + die_mask_width));
227 }
228
229 cpu_id
230 }
231
232 #[derive(Copy, Clone, Debug)]
233 pub enum CpuidReg {
234 EAX,
235 EBX,
236 ECX,
237 EDX,
238 }
239
240 pub struct CpuidPatch {
241 pub function: u32,
242 pub index: u32,
243 pub flags_bit: Option<u8>,
244 pub eax_bit: Option<u8>,
245 pub ebx_bit: Option<u8>,
246 pub ecx_bit: Option<u8>,
247 pub edx_bit: Option<u8>,
248 }
249
250 impl CpuidPatch {
get_cpuid_reg( cpuid: &[CpuIdEntry], function: u32, index: Option<u32>, reg: CpuidReg, ) -> Option<u32>251 pub fn get_cpuid_reg(
252 cpuid: &[CpuIdEntry],
253 function: u32,
254 index: Option<u32>,
255 reg: CpuidReg,
256 ) -> Option<u32> {
257 for entry in cpuid.iter() {
258 if entry.function == function && (index.is_none() || index.unwrap() == entry.index) {
259 return match reg {
260 CpuidReg::EAX => Some(entry.eax),
261 CpuidReg::EBX => Some(entry.ebx),
262 CpuidReg::ECX => Some(entry.ecx),
263 CpuidReg::EDX => Some(entry.edx),
264 };
265 }
266 }
267
268 None
269 }
270
set_cpuid_reg( cpuid: &mut Vec<CpuIdEntry>, function: u32, index: Option<u32>, reg: CpuidReg, value: u32, )271 pub fn set_cpuid_reg(
272 cpuid: &mut Vec<CpuIdEntry>,
273 function: u32,
274 index: Option<u32>,
275 reg: CpuidReg,
276 value: u32,
277 ) {
278 let mut entry_found = false;
279 for entry in cpuid.iter_mut() {
280 if entry.function == function && (index.is_none() || index.unwrap() == entry.index) {
281 entry_found = true;
282 match reg {
283 CpuidReg::EAX => {
284 entry.eax = value;
285 }
286 CpuidReg::EBX => {
287 entry.ebx = value;
288 }
289 CpuidReg::ECX => {
290 entry.ecx = value;
291 }
292 CpuidReg::EDX => {
293 entry.edx = value;
294 }
295 }
296 }
297 }
298
299 if entry_found {
300 return;
301 }
302
303 // Entry not found, so let's add it.
304 if let Some(index) = index {
305 let mut entry = CpuIdEntry {
306 function,
307 index,
308 flags: CPUID_FLAG_VALID_INDEX,
309 ..Default::default()
310 };
311 match reg {
312 CpuidReg::EAX => {
313 entry.eax = value;
314 }
315 CpuidReg::EBX => {
316 entry.ebx = value;
317 }
318 CpuidReg::ECX => {
319 entry.ecx = value;
320 }
321 CpuidReg::EDX => {
322 entry.edx = value;
323 }
324 }
325
326 cpuid.push(entry);
327 }
328 }
329
patch_cpuid(cpuid: &mut [CpuIdEntry], patches: Vec<CpuidPatch>)330 pub fn patch_cpuid(cpuid: &mut [CpuIdEntry], patches: Vec<CpuidPatch>) {
331 for entry in cpuid {
332 for patch in patches.iter() {
333 if entry.function == patch.function && entry.index == patch.index {
334 if let Some(flags_bit) = patch.flags_bit {
335 entry.flags |= 1 << flags_bit;
336 }
337 if let Some(eax_bit) = patch.eax_bit {
338 entry.eax |= 1 << eax_bit;
339 }
340 if let Some(ebx_bit) = patch.ebx_bit {
341 entry.ebx |= 1 << ebx_bit;
342 }
343 if let Some(ecx_bit) = patch.ecx_bit {
344 entry.ecx |= 1 << ecx_bit;
345 }
346 if let Some(edx_bit) = patch.edx_bit {
347 entry.edx |= 1 << edx_bit;
348 }
349 }
350 }
351 }
352 }
353
is_feature_enabled( cpuid: &[CpuIdEntry], function: u32, index: u32, reg: CpuidReg, feature_bit: usize, ) -> bool354 pub fn is_feature_enabled(
355 cpuid: &[CpuIdEntry],
356 function: u32,
357 index: u32,
358 reg: CpuidReg,
359 feature_bit: usize,
360 ) -> bool {
361 let mask = 1 << feature_bit;
362
363 for entry in cpuid {
364 if entry.function == function && entry.index == index {
365 let reg_val = match reg {
366 CpuidReg::EAX => entry.eax,
367 CpuidReg::EBX => entry.ebx,
368 CpuidReg::ECX => entry.ecx,
369 CpuidReg::EDX => entry.edx,
370 };
371
372 return (reg_val & mask) == mask;
373 }
374 }
375
376 false
377 }
378 }
379
380 #[derive(Debug)]
381 enum CpuidCompatibleCheck {
382 BitwiseSubset, // bitwise subset
383 Equal, // equal in value
384 NumNotGreater, // smaller or equal as a number
385 }
386
387 pub struct CpuidFeatureEntry {
388 function: u32,
389 index: u32,
390 feature_reg: CpuidReg,
391 compatible_check: CpuidCompatibleCheck,
392 }
393
394 impl CpuidFeatureEntry {
checked_feature_entry_list() -> Vec<CpuidFeatureEntry>395 fn checked_feature_entry_list() -> Vec<CpuidFeatureEntry> {
396 vec![
397 // The following list includes all hardware features bits from
398 // the CPUID Wiki Page: https://en.wikipedia.org/wiki/CPUID
399 // Leaf 0x1, ECX/EDX, feature bits
400 CpuidFeatureEntry {
401 function: 1,
402 index: 0,
403 feature_reg: CpuidReg::ECX,
404 compatible_check: CpuidCompatibleCheck::BitwiseSubset,
405 },
406 CpuidFeatureEntry {
407 function: 1,
408 index: 0,
409 feature_reg: CpuidReg::EDX,
410 compatible_check: CpuidCompatibleCheck::BitwiseSubset,
411 },
412 // Leaf 0x7, EAX/EBX/ECX/EDX, extended features
413 CpuidFeatureEntry {
414 function: 7,
415 index: 0,
416 feature_reg: CpuidReg::EAX,
417 compatible_check: CpuidCompatibleCheck::NumNotGreater,
418 },
419 CpuidFeatureEntry {
420 function: 7,
421 index: 0,
422 feature_reg: CpuidReg::EBX,
423 compatible_check: CpuidCompatibleCheck::BitwiseSubset,
424 },
425 CpuidFeatureEntry {
426 function: 7,
427 index: 0,
428 feature_reg: CpuidReg::ECX,
429 compatible_check: CpuidCompatibleCheck::BitwiseSubset,
430 },
431 CpuidFeatureEntry {
432 function: 7,
433 index: 0,
434 feature_reg: CpuidReg::EDX,
435 compatible_check: CpuidCompatibleCheck::BitwiseSubset,
436 },
437 // Leaf 0x7 subleaf 0x1, EAX, extended features
438 CpuidFeatureEntry {
439 function: 7,
440 index: 1,
441 feature_reg: CpuidReg::EAX,
442 compatible_check: CpuidCompatibleCheck::BitwiseSubset,
443 },
444 // Leaf 0x8000_0001, ECX/EDX, CPUID features bits
445 CpuidFeatureEntry {
446 function: 0x8000_0001,
447 index: 0,
448 feature_reg: CpuidReg::ECX,
449 compatible_check: CpuidCompatibleCheck::BitwiseSubset,
450 },
451 CpuidFeatureEntry {
452 function: 0x8000_0001,
453 index: 0,
454 feature_reg: CpuidReg::EDX,
455 compatible_check: CpuidCompatibleCheck::BitwiseSubset,
456 },
457 // KVM CPUID bits: https://www.kernel.org/doc/html/latest/virt/kvm/cpuid.html
458 // Leaf 0x4000_0000, EAX/EBX/ECX/EDX, KVM CPUID SIGNATURE
459 CpuidFeatureEntry {
460 function: 0x4000_0000,
461 index: 0,
462 feature_reg: CpuidReg::EAX,
463 compatible_check: CpuidCompatibleCheck::NumNotGreater,
464 },
465 CpuidFeatureEntry {
466 function: 0x4000_0000,
467 index: 0,
468 feature_reg: CpuidReg::EBX,
469 compatible_check: CpuidCompatibleCheck::Equal,
470 },
471 CpuidFeatureEntry {
472 function: 0x4000_0000,
473 index: 0,
474 feature_reg: CpuidReg::ECX,
475 compatible_check: CpuidCompatibleCheck::Equal,
476 },
477 CpuidFeatureEntry {
478 function: 0x4000_0000,
479 index: 0,
480 feature_reg: CpuidReg::EDX,
481 compatible_check: CpuidCompatibleCheck::Equal,
482 },
483 // Leaf 0x4000_0001, EAX/EBX/ECX/EDX, KVM CPUID features
484 CpuidFeatureEntry {
485 function: 0x4000_0001,
486 index: 0,
487 feature_reg: CpuidReg::EAX,
488 compatible_check: CpuidCompatibleCheck::BitwiseSubset,
489 },
490 CpuidFeatureEntry {
491 function: 0x4000_0001,
492 index: 0,
493 feature_reg: CpuidReg::EBX,
494 compatible_check: CpuidCompatibleCheck::BitwiseSubset,
495 },
496 CpuidFeatureEntry {
497 function: 0x4000_0001,
498 index: 0,
499 feature_reg: CpuidReg::ECX,
500 compatible_check: CpuidCompatibleCheck::BitwiseSubset,
501 },
502 CpuidFeatureEntry {
503 function: 0x4000_0001,
504 index: 0,
505 feature_reg: CpuidReg::EDX,
506 compatible_check: CpuidCompatibleCheck::BitwiseSubset,
507 },
508 ]
509 }
510
get_features_from_cpuid( cpuid: &[CpuIdEntry], feature_entry_list: &[CpuidFeatureEntry], ) -> Vec<u32>511 fn get_features_from_cpuid(
512 cpuid: &[CpuIdEntry],
513 feature_entry_list: &[CpuidFeatureEntry],
514 ) -> Vec<u32> {
515 let mut features = vec![0; feature_entry_list.len()];
516 for (i, feature_entry) in feature_entry_list.iter().enumerate() {
517 for cpuid_entry in cpuid {
518 if cpuid_entry.function == feature_entry.function
519 && cpuid_entry.index == feature_entry.index
520 {
521 match feature_entry.feature_reg {
522 CpuidReg::EAX => {
523 features[i] = cpuid_entry.eax;
524 }
525 CpuidReg::EBX => {
526 features[i] = cpuid_entry.ebx;
527 }
528 CpuidReg::ECX => {
529 features[i] = cpuid_entry.ecx;
530 }
531 CpuidReg::EDX => {
532 features[i] = cpuid_entry.edx;
533 }
534 }
535
536 break;
537 }
538 }
539 }
540
541 features
542 }
543
544 // The function returns `Error` (a.k.a. "incompatible"), when the CPUID features from `src_vm_cpuid`
545 // is not a subset of those of the `dest_vm_cpuid`.
check_cpuid_compatibility( src_vm_cpuid: &[CpuIdEntry], dest_vm_cpuid: &[CpuIdEntry], ) -> Result<(), Error>546 pub fn check_cpuid_compatibility(
547 src_vm_cpuid: &[CpuIdEntry],
548 dest_vm_cpuid: &[CpuIdEntry],
549 ) -> Result<(), Error> {
550 let feature_entry_list = &Self::checked_feature_entry_list();
551 let src_vm_features = Self::get_features_from_cpuid(src_vm_cpuid, feature_entry_list);
552 let dest_vm_features = Self::get_features_from_cpuid(dest_vm_cpuid, feature_entry_list);
553
554 // Loop on feature bit and check if the 'source vm' feature is a subset
555 // of those of the 'destination vm' feature
556 let mut compatible = true;
557 for (i, (src_vm_feature, dest_vm_feature)) in src_vm_features
558 .iter()
559 .zip(dest_vm_features.iter())
560 .enumerate()
561 {
562 let entry = &feature_entry_list[i];
563 let entry_compatible = match entry.compatible_check {
564 CpuidCompatibleCheck::BitwiseSubset => {
565 let different_feature_bits = src_vm_feature ^ dest_vm_feature;
566 let src_vm_feature_bits_only = different_feature_bits & src_vm_feature;
567 src_vm_feature_bits_only == 0
568 }
569 CpuidCompatibleCheck::Equal => src_vm_feature == dest_vm_feature,
570 CpuidCompatibleCheck::NumNotGreater => src_vm_feature <= dest_vm_feature,
571 };
572 if !entry_compatible {
573 error!(
574 "Detected incompatible CPUID entry: leaf={:#02x} (subleaf={:#02x}), register='{:?}', \
575 compatible_check='{:?}', source VM feature='{:#04x}', destination VM feature'{:#04x}'.",
576 entry.function, entry.index, entry.feature_reg,
577 entry.compatible_check, src_vm_feature, dest_vm_feature
578 );
579
580 compatible = false;
581 }
582 }
583
584 if compatible {
585 info!("No CPU incompatibility detected.");
586 Ok(())
587 } else {
588 Err(Error::CpuidCheckCompatibility)
589 }
590 }
591 }
592
generate_common_cpuid( hypervisor: &Arc<dyn hypervisor::Hypervisor>, config: &CpuidConfig, ) -> super::Result<Vec<CpuIdEntry>>593 pub fn generate_common_cpuid(
594 hypervisor: &Arc<dyn hypervisor::Hypervisor>,
595 config: &CpuidConfig,
596 ) -> super::Result<Vec<CpuIdEntry>> {
597 // SAFETY: cpuid called with valid leaves
598 if unsafe { x86_64::__cpuid(1) }.ecx & (1 << HYPERVISOR_ECX_BIT) == 1 << HYPERVISOR_ECX_BIT {
599 // SAFETY: cpuid called with valid leaves
600 let hypervisor_cpuid = unsafe { x86_64::__cpuid(0x4000_0000) };
601
602 let mut identifier: [u8; 12] = [0; 12];
603 identifier[0..4].copy_from_slice(&hypervisor_cpuid.ebx.to_le_bytes()[..]);
604 identifier[4..8].copy_from_slice(&hypervisor_cpuid.ecx.to_le_bytes()[..]);
605 identifier[8..12].copy_from_slice(&hypervisor_cpuid.edx.to_le_bytes()[..]);
606
607 info!(
608 "Running under nested virtualisation. Hypervisor string: {}",
609 String::from_utf8_lossy(&identifier)
610 );
611 }
612
613 info!(
614 "Generating guest CPUID for with physical address size: {}",
615 config.phys_bits
616 );
617 #[allow(unused_mut)]
618 let mut cpuid_patches = vec![
619 // Patch hypervisor bit
620 CpuidPatch {
621 function: 1,
622 index: 0,
623 flags_bit: None,
624 eax_bit: None,
625 ebx_bit: None,
626 ecx_bit: Some(HYPERVISOR_ECX_BIT),
627 edx_bit: None,
628 },
629 // Enable MTRR feature
630 CpuidPatch {
631 function: 1,
632 index: 0,
633 flags_bit: None,
634 eax_bit: None,
635 ebx_bit: None,
636 ecx_bit: None,
637 edx_bit: Some(MTRR_EDX_BIT),
638 },
639 ];
640
641 #[cfg(feature = "kvm")]
642 if matches!(
643 hypervisor.hypervisor_type(),
644 hypervisor::HypervisorType::Kvm
645 ) {
646 // Patch tsc deadline timer bit
647 cpuid_patches.push(CpuidPatch {
648 function: 1,
649 index: 0,
650 flags_bit: None,
651 eax_bit: None,
652 ebx_bit: None,
653 ecx_bit: Some(TSC_DEADLINE_TIMER_ECX_BIT),
654 edx_bit: None,
655 });
656 }
657
658 // Supported CPUID
659 let mut cpuid = hypervisor
660 .get_supported_cpuid()
661 .map_err(Error::CpuidGetSupported)?;
662
663 CpuidPatch::patch_cpuid(&mut cpuid, cpuid_patches);
664
665 if let Some(sgx_epc_sections) = &config.sgx_epc_sections {
666 update_cpuid_sgx(&mut cpuid, sgx_epc_sections)?;
667 }
668
669 #[cfg(feature = "tdx")]
670 let tdx_capabilities = if config.tdx {
671 let caps = hypervisor
672 .tdx_capabilities()
673 .map_err(Error::TdxCapabilities)?;
674 info!("TDX capabilities {:#?}", caps);
675 Some(caps)
676 } else {
677 None
678 };
679
680 // Update some existing CPUID
681 for entry in cpuid.as_mut_slice().iter_mut() {
682 match entry.function {
683 // Clear AMX related bits if the AMX feature is not enabled
684 0x7 => {
685 if !config.amx && entry.index == 0 {
686 entry.edx &= !((1 << AMX_BF16) | (1 << AMX_TILE) | (1 << AMX_INT8))
687 }
688 }
689 0xd =>
690 {
691 #[cfg(feature = "tdx")]
692 if let Some(caps) = &tdx_capabilities {
693 let xcr0_mask: u64 = 0x82ff;
694 let xss_mask: u64 = !xcr0_mask;
695 if entry.index == 0 {
696 entry.eax &= (caps.xfam_fixed0 as u32) & (xcr0_mask as u32);
697 entry.eax |= (caps.xfam_fixed1 as u32) & (xcr0_mask as u32);
698 entry.edx &= ((caps.xfam_fixed0 & xcr0_mask) >> 32) as u32;
699 entry.edx |= ((caps.xfam_fixed1 & xcr0_mask) >> 32) as u32;
700 } else if entry.index == 1 {
701 entry.ecx &= (caps.xfam_fixed0 as u32) & (xss_mask as u32);
702 entry.ecx |= (caps.xfam_fixed1 as u32) & (xss_mask as u32);
703 entry.edx &= ((caps.xfam_fixed0 & xss_mask) >> 32) as u32;
704 entry.edx |= ((caps.xfam_fixed1 & xss_mask) >> 32) as u32;
705 }
706 }
707 }
708 // Copy host L1 cache details if not populated by KVM
709 0x8000_0005 => {
710 if entry.eax == 0 && entry.ebx == 0 && entry.ecx == 0 && entry.edx == 0 {
711 // SAFETY: cpuid called with valid leaves
712 if unsafe { std::arch::x86_64::__cpuid(0x8000_0000).eax } >= 0x8000_0005 {
713 // SAFETY: cpuid called with valid leaves
714 let leaf = unsafe { std::arch::x86_64::__cpuid(0x8000_0005) };
715 entry.eax = leaf.eax;
716 entry.ebx = leaf.ebx;
717 entry.ecx = leaf.ecx;
718 entry.edx = leaf.edx;
719 }
720 }
721 }
722 // Copy host L2 cache details if not populated by KVM
723 0x8000_0006 => {
724 if entry.eax == 0 && entry.ebx == 0 && entry.ecx == 0 && entry.edx == 0 {
725 // SAFETY: cpuid called with valid leaves
726 if unsafe { std::arch::x86_64::__cpuid(0x8000_0000).eax } >= 0x8000_0006 {
727 // SAFETY: cpuid called with valid leaves
728 let leaf = unsafe { std::arch::x86_64::__cpuid(0x8000_0006) };
729 entry.eax = leaf.eax;
730 entry.ebx = leaf.ebx;
731 entry.ecx = leaf.ecx;
732 entry.edx = leaf.edx;
733 }
734 }
735 }
736 // Set CPU physical bits
737 0x8000_0008 => {
738 entry.eax = (entry.eax & 0xffff_ff00) | (config.phys_bits as u32 & 0xff);
739 }
740 0x4000_0001 => {
741 // These features are not supported by TDX
742 #[cfg(feature = "tdx")]
743 if config.tdx {
744 entry.eax &= !((1 << KVM_FEATURE_CLOCKSOURCE_BIT)
745 | (1 << KVM_FEATURE_CLOCKSOURCE2_BIT)
746 | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)
747 | (1 << KVM_FEATURE_ASYNC_PF_BIT)
748 | (1 << KVM_FEATURE_ASYNC_PF_VMEXIT_BIT)
749 | (1 << KVM_FEATURE_STEAL_TIME_BIT))
750 }
751 }
752 _ => {}
753 }
754 }
755
756 // Copy CPU identification string
757 for i in 0x8000_0002..=0x8000_0004 {
758 cpuid.retain(|c| c.function != i);
759 // SAFETY: call cpuid with valid leaves
760 let leaf = unsafe { std::arch::x86_64::__cpuid(i) };
761 cpuid.push(CpuIdEntry {
762 function: i,
763 eax: leaf.eax,
764 ebx: leaf.ebx,
765 ecx: leaf.ecx,
766 edx: leaf.edx,
767 ..Default::default()
768 });
769 }
770
771 if config.kvm_hyperv {
772 // Remove conflicting entries
773 cpuid.retain(|c| c.function != 0x4000_0000);
774 cpuid.retain(|c| c.function != 0x4000_0001);
775 // See "Hypervisor Top Level Functional Specification" for details
776 // Compliance with "Hv#1" requires leaves up to 0x4000_000a
777 cpuid.push(CpuIdEntry {
778 function: 0x40000000,
779 eax: 0x4000000a, // Maximum cpuid leaf
780 ebx: 0x756e694c, // "Linu"
781 ecx: 0x564b2078, // "x KV"
782 edx: 0x7648204d, // "M Hv"
783 ..Default::default()
784 });
785 cpuid.push(CpuIdEntry {
786 function: 0x40000001,
787 eax: 0x31237648, // "Hv#1"
788 ..Default::default()
789 });
790 cpuid.push(CpuIdEntry {
791 function: 0x40000002,
792 eax: 0x3839, // "Build number"
793 ebx: 0xa0000, // "Version"
794 ..Default::default()
795 });
796 cpuid.push(CpuIdEntry {
797 function: 0x4000_0003,
798 eax: (1 << 1) // AccessPartitionReferenceCounter
799 | (1 << 2) // AccessSynicRegs
800 | (1 << 3) // AccessSyntheticTimerRegs
801 | (1 << 9), // AccessPartitionReferenceTsc
802 edx: 1 << 3, // CPU dynamic partitioning
803 ..Default::default()
804 });
805 cpuid.push(CpuIdEntry {
806 function: 0x4000_0004,
807 eax: 1 << 5, // Recommend relaxed timing
808 ..Default::default()
809 });
810 for i in 0x4000_0005..=0x4000_000a {
811 cpuid.push(CpuIdEntry {
812 function: i,
813 ..Default::default()
814 });
815 }
816 }
817
818 Ok(cpuid)
819 }
820
configure_vcpu( vcpu: &Arc<dyn hypervisor::Vcpu>, id: u8, boot_setup: Option<(EntryPoint, &GuestMemoryAtomic<GuestMemoryMmap>)>, cpuid: Vec<CpuIdEntry>, kvm_hyperv: bool, cpu_vendor: CpuVendor, topology: Option<(u8, u8, u8)>, ) -> super::Result<()>821 pub fn configure_vcpu(
822 vcpu: &Arc<dyn hypervisor::Vcpu>,
823 id: u8,
824 boot_setup: Option<(EntryPoint, &GuestMemoryAtomic<GuestMemoryMmap>)>,
825 cpuid: Vec<CpuIdEntry>,
826 kvm_hyperv: bool,
827 cpu_vendor: CpuVendor,
828 topology: Option<(u8, u8, u8)>,
829 ) -> super::Result<()> {
830 let x2apic_id = get_x2apic_id(id as u32, topology);
831
832 // Per vCPU CPUID changes; common are handled via generate_common_cpuid()
833 let mut cpuid = cpuid;
834 CpuidPatch::set_cpuid_reg(&mut cpuid, 0xb, None, CpuidReg::EDX, x2apic_id);
835 CpuidPatch::set_cpuid_reg(&mut cpuid, 0x1f, None, CpuidReg::EDX, x2apic_id);
836 if matches!(cpu_vendor, CpuVendor::AMD) {
837 CpuidPatch::set_cpuid_reg(&mut cpuid, 0x8000_001e, Some(0), CpuidReg::EAX, x2apic_id);
838 }
839
840 // Set ApicId in cpuid for each vcpu - found in cpuid ebx when eax = 1
841 let mut apic_id_patched = false;
842 for entry in &mut cpuid {
843 if entry.function == 1 {
844 entry.ebx &= 0xffffff;
845 entry.ebx |= x2apic_id << 24;
846 apic_id_patched = true;
847 break;
848 }
849 }
850 assert!(apic_id_patched);
851
852 if let Some(t) = topology {
853 update_cpuid_topology(&mut cpuid, t.0, t.1, t.2, cpu_vendor, id);
854 }
855
856 // The TSC frequency CPUID leaf should not be included when running with HyperV emulation
857 if !kvm_hyperv {
858 if let Some(tsc_khz) = vcpu.tsc_khz().map_err(Error::GetTscFrequency)? {
859 // Need to check that the TSC doesn't vary with dynamic frequency
860 // SAFETY: cpuid called with valid leaves
861 if unsafe { std::arch::x86_64::__cpuid(0x8000_0007) }.edx
862 & (1u32 << INVARIANT_TSC_EDX_BIT)
863 > 0
864 {
865 CpuidPatch::set_cpuid_reg(
866 &mut cpuid,
867 0x4000_0000,
868 None,
869 CpuidReg::EAX,
870 0x4000_0010,
871 );
872 cpuid.retain(|c| c.function != 0x4000_0010);
873 cpuid.push(CpuIdEntry {
874 function: 0x4000_0010,
875 eax: tsc_khz,
876 ebx: 1000000, /* LAPIC resolution of 1ns (freq: 1GHz) is hardcoded in KVM's
877 * APIC_BUS_CYCLE_NS */
878 ..Default::default()
879 });
880 };
881 }
882 }
883
884 for c in &cpuid {
885 debug!("{}", c);
886 }
887
888 vcpu.set_cpuid2(&cpuid)
889 .map_err(|e| Error::SetSupportedCpusFailed(e.into()))?;
890
891 if kvm_hyperv {
892 vcpu.enable_hyperv_synic().unwrap();
893 }
894
895 regs::setup_msrs(vcpu).map_err(Error::MsrsConfiguration)?;
896 if let Some((kernel_entry_point, guest_memory)) = boot_setup {
897 regs::setup_regs(vcpu, kernel_entry_point).map_err(Error::RegsConfiguration)?;
898 regs::setup_fpu(vcpu).map_err(Error::FpuConfiguration)?;
899 regs::setup_sregs(&guest_memory.memory(), vcpu).map_err(Error::SregsConfiguration)?;
900 }
901 interrupts::set_lint(vcpu).map_err(|e| Error::LocalIntConfiguration(e.into()))?;
902 Ok(())
903 }
904
905 /// Returns a Vec of the valid memory addresses.
906 ///
907 /// These should be used to configure the GuestMemory structure for the platform.
908 /// For x86_64 all addresses are valid from the start of the kernel except a
909 /// carve out at the end of 32bit address space.
arch_memory_regions() -> Vec<(GuestAddress, usize, RegionType)>910 pub fn arch_memory_regions() -> Vec<(GuestAddress, usize, RegionType)> {
911 vec![
912 // 0 GiB ~ 3GiB: memory before the gap
913 (
914 GuestAddress(0),
915 layout::MEM_32BIT_RESERVED_START.raw_value() as usize,
916 RegionType::Ram,
917 ),
918 // 4 GiB ~ inf: memory after the gap
919 (layout::RAM_64BIT_START, usize::MAX, RegionType::Ram),
920 // 3 GiB ~ 3712 MiB: 32-bit device memory hole
921 (
922 layout::MEM_32BIT_RESERVED_START,
923 layout::MEM_32BIT_DEVICES_SIZE as usize,
924 RegionType::SubRegion,
925 ),
926 // 3712 MiB ~ 3968 MiB: 32-bit reserved memory hole
927 (
928 layout::MEM_32BIT_RESERVED_START.unchecked_add(layout::MEM_32BIT_DEVICES_SIZE),
929 (layout::MEM_32BIT_RESERVED_SIZE - layout::MEM_32BIT_DEVICES_SIZE) as usize,
930 RegionType::Reserved,
931 ),
932 ]
933 }
934
935 /// Configures the system and should be called once per vm before starting vcpu threads.
936 ///
937 /// # Arguments
938 ///
939 /// * `guest_mem` - The memory to be used by the guest.
940 /// * `cmdline_addr` - Address in `guest_mem` where the kernel command line was loaded.
941 /// * `cmdline_size` - Size of the kernel command line in bytes including the null terminator.
942 /// * `num_cpus` - Number of virtual CPUs the guest will have.
943 #[allow(clippy::too_many_arguments)]
configure_system( guest_mem: &GuestMemoryMmap, cmdline_addr: GuestAddress, cmdline_size: usize, initramfs: &Option<InitramfsConfig>, _num_cpus: u8, setup_header: Option<setup_header>, rsdp_addr: Option<GuestAddress>, sgx_epc_region: Option<SgxEpcRegion>, serial_number: Option<&str>, uuid: Option<&str>, oem_strings: Option<&[&str]>, topology: Option<(u8, u8, u8)>, ) -> super::Result<()>944 pub fn configure_system(
945 guest_mem: &GuestMemoryMmap,
946 cmdline_addr: GuestAddress,
947 cmdline_size: usize,
948 initramfs: &Option<InitramfsConfig>,
949 _num_cpus: u8,
950 setup_header: Option<setup_header>,
951 rsdp_addr: Option<GuestAddress>,
952 sgx_epc_region: Option<SgxEpcRegion>,
953 serial_number: Option<&str>,
954 uuid: Option<&str>,
955 oem_strings: Option<&[&str]>,
956 topology: Option<(u8, u8, u8)>,
957 ) -> super::Result<()> {
958 // Write EBDA address to location where ACPICA expects to find it
959 guest_mem
960 .write_obj((layout::EBDA_START.0 >> 4) as u16, layout::EBDA_POINTER)
961 .map_err(Error::EbdaSetup)?;
962
963 let size = smbios::setup_smbios(guest_mem, serial_number, uuid, oem_strings)
964 .map_err(Error::SmbiosSetup)?;
965
966 // Place the MP table after the SMIOS table aligned to 16 bytes
967 let offset = GuestAddress(layout::SMBIOS_START).unchecked_add(size);
968 let offset = GuestAddress((offset.0 + 16) & !0xf);
969 mptable::setup_mptable(offset, guest_mem, _num_cpus, topology).map_err(Error::MpTableSetup)?;
970
971 // Check that the RAM is not smaller than the RSDP start address
972 if let Some(rsdp_addr) = rsdp_addr {
973 if rsdp_addr.0 > guest_mem.last_addr().0 {
974 return Err(super::Error::RsdpPastRamEnd);
975 }
976 }
977
978 match setup_header {
979 Some(hdr) => configure_32bit_entry(
980 guest_mem,
981 cmdline_addr,
982 cmdline_size,
983 initramfs,
984 hdr,
985 rsdp_addr,
986 sgx_epc_region,
987 ),
988 None => configure_pvh(
989 guest_mem,
990 cmdline_addr,
991 initramfs,
992 rsdp_addr,
993 sgx_epc_region,
994 ),
995 }
996 }
997
998 type RamRange = (u64, u64);
999
1000 /// Returns usable physical memory ranges for the guest
1001 /// These should be used to create e820_RAM memory maps
generate_ram_ranges(guest_mem: &GuestMemoryMmap) -> super::Result<Vec<RamRange>>1002 pub fn generate_ram_ranges(guest_mem: &GuestMemoryMmap) -> super::Result<Vec<RamRange>> {
1003 // Merge continuous memory regions into one region.
1004 // Note: memory regions from "GuestMemory" are sorted and non-zero sized.
1005 let ram_regions = {
1006 let mut ram_regions = Vec::new();
1007 let mut current_start = guest_mem
1008 .iter()
1009 .next()
1010 .map(GuestMemoryRegion::start_addr)
1011 .expect("GuestMemory must have one memory region at least")
1012 .raw_value();
1013 let mut current_end = current_start;
1014
1015 for (start, size) in guest_mem
1016 .iter()
1017 .map(|m| (m.start_addr().raw_value(), m.len()))
1018 {
1019 if current_end == start {
1020 // This zone is continuous with the previous one.
1021 current_end += size;
1022 } else {
1023 ram_regions.push((current_start, current_end));
1024
1025 current_start = start;
1026 current_end = start + size;
1027 }
1028 }
1029
1030 ram_regions.push((current_start, current_end));
1031
1032 ram_regions
1033 };
1034
1035 // Create the memory map entry for memory region before the gap
1036 let mut ram_ranges = vec![];
1037
1038 // Generate the first usable physical memory range before the gap. The e820 map
1039 // should only report memory above 1MiB.
1040 let first_ram_range = {
1041 let (first_region_start, first_region_end) =
1042 ram_regions.first().ok_or(super::Error::MemmapTableSetup)?;
1043 let high_ram_start = layout::HIGH_RAM_START.raw_value();
1044 let mem_32bit_reserved_start = layout::MEM_32BIT_RESERVED_START.raw_value();
1045
1046 if !((first_region_start <= &high_ram_start)
1047 && (first_region_end > &high_ram_start)
1048 && (first_region_end <= &mem_32bit_reserved_start))
1049 {
1050 error!(
1051 "Unexpected first memory region layout: (start: 0x{:08x}, end: 0x{:08x}).
1052 high_ram_start: 0x{:08x}, mem_32bit_reserved_start: 0x{:08x}",
1053 first_region_start, first_region_end, high_ram_start, mem_32bit_reserved_start
1054 );
1055
1056 return Err(super::Error::MemmapTableSetup);
1057 }
1058
1059 info!(
1060 "first usable physical memory range, start: 0x{:08x}, end: 0x{:08x}",
1061 high_ram_start, first_region_end
1062 );
1063
1064 (high_ram_start, *first_region_end)
1065 };
1066 ram_ranges.push(first_ram_range);
1067
1068 // Generate additional usable physical memory range after the gap if any.
1069 for ram_region in ram_regions.iter().skip(1) {
1070 info!(
1071 "found usable physical memory range, start: 0x{:08x}, end: 0x{:08x}",
1072 ram_region.0, ram_region.1
1073 );
1074
1075 ram_ranges.push(*ram_region);
1076 }
1077
1078 Ok(ram_ranges)
1079 }
1080
configure_pvh( guest_mem: &GuestMemoryMmap, cmdline_addr: GuestAddress, initramfs: &Option<InitramfsConfig>, rsdp_addr: Option<GuestAddress>, sgx_epc_region: Option<SgxEpcRegion>, ) -> super::Result<()>1081 fn configure_pvh(
1082 guest_mem: &GuestMemoryMmap,
1083 cmdline_addr: GuestAddress,
1084 initramfs: &Option<InitramfsConfig>,
1085 rsdp_addr: Option<GuestAddress>,
1086 sgx_epc_region: Option<SgxEpcRegion>,
1087 ) -> super::Result<()> {
1088 const XEN_HVM_START_MAGIC_VALUE: u32 = 0x336ec578;
1089
1090 let mut start_info = hvm_start_info {
1091 magic: XEN_HVM_START_MAGIC_VALUE,
1092 version: 1, // pvh has version 1
1093 nr_modules: 0,
1094 cmdline_paddr: cmdline_addr.raw_value(),
1095 memmap_paddr: layout::MEMMAP_START.raw_value(),
1096 ..Default::default()
1097 };
1098
1099 if let Some(rsdp_addr) = rsdp_addr {
1100 start_info.rsdp_paddr = rsdp_addr.0;
1101 }
1102
1103 if let Some(initramfs_config) = initramfs {
1104 // The initramfs has been written to guest memory already, here we just need to
1105 // create the module structure that describes it.
1106 let ramdisk_mod = hvm_modlist_entry {
1107 paddr: initramfs_config.address.raw_value(),
1108 size: initramfs_config.size as u64,
1109 ..Default::default()
1110 };
1111
1112 start_info.nr_modules += 1;
1113 start_info.modlist_paddr = layout::MODLIST_START.raw_value();
1114
1115 // Write the modlist struct to guest memory.
1116 guest_mem
1117 .write_obj(ramdisk_mod, layout::MODLIST_START)
1118 .map_err(super::Error::ModlistSetup)?;
1119 }
1120
1121 // Vector to hold the memory maps which needs to be written to guest memory
1122 // at MEMMAP_START after all of the mappings are recorded.
1123 let mut memmap: Vec<hvm_memmap_table_entry> = Vec::new();
1124
1125 // Create the memory map entries.
1126 add_memmap_entry(&mut memmap, 0, layout::EBDA_START.raw_value(), E820_RAM);
1127
1128 // Get usable physical memory ranges
1129 let ram_ranges = generate_ram_ranges(guest_mem)?;
1130
1131 // Create e820 memory map entries
1132 for ram_range in ram_ranges {
1133 info!(
1134 "create_memmap_entry, start: 0x{:08x}, end: 0x{:08x}",
1135 ram_range.0, ram_range.1
1136 );
1137 add_memmap_entry(
1138 &mut memmap,
1139 ram_range.0,
1140 ram_range.1 - ram_range.0,
1141 E820_RAM,
1142 );
1143 }
1144
1145 add_memmap_entry(
1146 &mut memmap,
1147 layout::PCI_MMCONFIG_START.0,
1148 layout::PCI_MMCONFIG_SIZE,
1149 E820_RESERVED,
1150 );
1151
1152 if let Some(sgx_epc_region) = sgx_epc_region {
1153 add_memmap_entry(
1154 &mut memmap,
1155 sgx_epc_region.start().raw_value(),
1156 sgx_epc_region.size(),
1157 E820_RESERVED,
1158 );
1159 }
1160
1161 start_info.memmap_entries = memmap.len() as u32;
1162
1163 // Copy the vector with the memmap table to the MEMMAP_START address
1164 // which is already saved in the memmap_paddr field of hvm_start_info struct.
1165 let mut memmap_start_addr = layout::MEMMAP_START;
1166
1167 guest_mem
1168 .checked_offset(
1169 memmap_start_addr,
1170 mem::size_of::<hvm_memmap_table_entry>() * start_info.memmap_entries as usize,
1171 )
1172 .ok_or(super::Error::MemmapTablePastRamEnd)?;
1173
1174 // For every entry in the memmap vector, write it to guest memory.
1175 for memmap_entry in memmap {
1176 guest_mem
1177 .write_obj(memmap_entry, memmap_start_addr)
1178 .map_err(|_| super::Error::MemmapTableSetup)?;
1179 memmap_start_addr =
1180 memmap_start_addr.unchecked_add(mem::size_of::<hvm_memmap_table_entry>() as u64);
1181 }
1182
1183 // The hvm_start_info struct itself must be stored at PVH_START_INFO
1184 // address, and %rbx will be initialized to contain PVH_INFO_START prior to
1185 // starting the guest, as required by the PVH ABI.
1186 let start_info_addr = layout::PVH_INFO_START;
1187
1188 guest_mem
1189 .checked_offset(start_info_addr, mem::size_of::<hvm_start_info>())
1190 .ok_or(super::Error::StartInfoPastRamEnd)?;
1191
1192 // Write the start_info struct to guest memory.
1193 guest_mem
1194 .write_obj(start_info, start_info_addr)
1195 .map_err(|_| super::Error::StartInfoSetup)?;
1196
1197 Ok(())
1198 }
1199
configure_32bit_entry( guest_mem: &GuestMemoryMmap, cmdline_addr: GuestAddress, cmdline_size: usize, initramfs: &Option<InitramfsConfig>, setup_hdr: setup_header, rsdp_addr: Option<GuestAddress>, sgx_epc_region: Option<SgxEpcRegion>, ) -> super::Result<()>1200 fn configure_32bit_entry(
1201 guest_mem: &GuestMemoryMmap,
1202 cmdline_addr: GuestAddress,
1203 cmdline_size: usize,
1204 initramfs: &Option<InitramfsConfig>,
1205 setup_hdr: setup_header,
1206 rsdp_addr: Option<GuestAddress>,
1207 sgx_epc_region: Option<SgxEpcRegion>,
1208 ) -> super::Result<()> {
1209 const KERNEL_LOADER_OTHER: u8 = 0xff;
1210
1211 // Use the provided setup header
1212 let mut params = boot_params {
1213 hdr: setup_hdr,
1214 ..Default::default()
1215 };
1216
1217 // Common bootparams settings
1218 if params.hdr.type_of_loader == 0 {
1219 params.hdr.type_of_loader = KERNEL_LOADER_OTHER;
1220 }
1221 params.hdr.cmd_line_ptr = cmdline_addr.raw_value() as u32;
1222 params.hdr.cmdline_size = cmdline_size as u32;
1223
1224 if let Some(initramfs_config) = initramfs {
1225 params.hdr.ramdisk_image = initramfs_config.address.raw_value() as u32;
1226 params.hdr.ramdisk_size = initramfs_config.size as u32;
1227 }
1228
1229 add_e820_entry(&mut params, 0, layout::EBDA_START.raw_value(), E820_RAM)?;
1230
1231 let mem_end = guest_mem.last_addr();
1232 if mem_end < layout::MEM_32BIT_RESERVED_START {
1233 add_e820_entry(
1234 &mut params,
1235 layout::HIGH_RAM_START.raw_value(),
1236 mem_end.unchecked_offset_from(layout::HIGH_RAM_START) + 1,
1237 E820_RAM,
1238 )?;
1239 } else {
1240 add_e820_entry(
1241 &mut params,
1242 layout::HIGH_RAM_START.raw_value(),
1243 layout::MEM_32BIT_RESERVED_START.unchecked_offset_from(layout::HIGH_RAM_START),
1244 E820_RAM,
1245 )?;
1246 if mem_end > layout::RAM_64BIT_START {
1247 add_e820_entry(
1248 &mut params,
1249 layout::RAM_64BIT_START.raw_value(),
1250 mem_end.unchecked_offset_from(layout::RAM_64BIT_START) + 1,
1251 E820_RAM,
1252 )?;
1253 }
1254 }
1255
1256 add_e820_entry(
1257 &mut params,
1258 layout::PCI_MMCONFIG_START.0,
1259 layout::PCI_MMCONFIG_SIZE,
1260 E820_RESERVED,
1261 )?;
1262
1263 if let Some(sgx_epc_region) = sgx_epc_region {
1264 add_e820_entry(
1265 &mut params,
1266 sgx_epc_region.start().raw_value(),
1267 sgx_epc_region.size(),
1268 E820_RESERVED,
1269 )?;
1270 }
1271
1272 if let Some(rsdp_addr) = rsdp_addr {
1273 params.acpi_rsdp_addr = rsdp_addr.0;
1274 }
1275
1276 let zero_page_addr = layout::ZERO_PAGE_START;
1277 guest_mem
1278 .checked_offset(zero_page_addr, mem::size_of::<boot_params>())
1279 .ok_or(super::Error::ZeroPagePastRamEnd)?;
1280 guest_mem
1281 .write_obj(params, zero_page_addr)
1282 .map_err(super::Error::ZeroPageSetup)?;
1283
1284 Ok(())
1285 }
1286
1287 /// Add an e820 region to the e820 map.
1288 /// Returns Ok(()) if successful, or an error if there is no space left in the map.
add_e820_entry( params: &mut boot_params, addr: u64, size: u64, mem_type: u32, ) -> Result<(), Error>1289 fn add_e820_entry(
1290 params: &mut boot_params,
1291 addr: u64,
1292 size: u64,
1293 mem_type: u32,
1294 ) -> Result<(), Error> {
1295 if params.e820_entries >= params.e820_table.len() as u8 {
1296 return Err(Error::E820Configuration);
1297 }
1298
1299 params.e820_table[params.e820_entries as usize].addr = addr;
1300 params.e820_table[params.e820_entries as usize].size = size;
1301 params.e820_table[params.e820_entries as usize].type_ = mem_type;
1302 params.e820_entries += 1;
1303
1304 Ok(())
1305 }
1306
add_memmap_entry(memmap: &mut Vec<hvm_memmap_table_entry>, addr: u64, size: u64, mem_type: u32)1307 fn add_memmap_entry(memmap: &mut Vec<hvm_memmap_table_entry>, addr: u64, size: u64, mem_type: u32) {
1308 // Add the table entry to the vector
1309 memmap.push(hvm_memmap_table_entry {
1310 addr,
1311 size,
1312 type_: mem_type,
1313 reserved: 0,
1314 });
1315 }
1316
1317 /// Returns the memory address where the initramfs could be loaded.
initramfs_load_addr( guest_mem: &GuestMemoryMmap, initramfs_size: usize, ) -> super::Result<u64>1318 pub fn initramfs_load_addr(
1319 guest_mem: &GuestMemoryMmap,
1320 initramfs_size: usize,
1321 ) -> super::Result<u64> {
1322 let first_region = guest_mem
1323 .find_region(GuestAddress::new(0))
1324 .ok_or(super::Error::InitramfsAddress)?;
1325 // It's safe to cast to usize because the size of a region can't be greater than usize.
1326 let lowmem_size = first_region.len() as usize;
1327
1328 if lowmem_size < initramfs_size {
1329 return Err(super::Error::InitramfsAddress);
1330 }
1331
1332 let aligned_addr: u64 = ((lowmem_size - initramfs_size) & !(crate::pagesize() - 1)) as u64;
1333 Ok(aligned_addr)
1334 }
1335
get_host_cpu_phys_bits(hypervisor: &Arc<dyn hypervisor::Hypervisor>) -> u81336 pub fn get_host_cpu_phys_bits(hypervisor: &Arc<dyn hypervisor::Hypervisor>) -> u8 {
1337 // SAFETY: call cpuid with valid leaves
1338 unsafe {
1339 let leaf = x86_64::__cpuid(0x8000_0000);
1340
1341 // Detect and handle AMD SME (Secure Memory Encryption) properly.
1342 // Some physical address bits may become reserved when the feature is enabled.
1343 // See AMD64 Architecture Programmer's Manual Volume 2, Section 7.10.1
1344 let reduced = if leaf.eax >= 0x8000_001f
1345 && matches!(hypervisor.get_cpu_vendor(), CpuVendor::AMD)
1346 && x86_64::__cpuid(0x8000_001f).eax & 0x1 != 0
1347 {
1348 (x86_64::__cpuid(0x8000_001f).ebx >> 6) & 0x3f
1349 } else {
1350 0
1351 };
1352
1353 if leaf.eax >= 0x8000_0008 {
1354 let leaf = x86_64::__cpuid(0x8000_0008);
1355 ((leaf.eax & 0xff) - reduced) as u8
1356 } else {
1357 36
1358 }
1359 }
1360 }
1361
update_cpuid_topology( cpuid: &mut Vec<CpuIdEntry>, threads_per_core: u8, cores_per_die: u8, dies_per_package: u8, cpu_vendor: CpuVendor, id: u8, )1362 fn update_cpuid_topology(
1363 cpuid: &mut Vec<CpuIdEntry>,
1364 threads_per_core: u8,
1365 cores_per_die: u8,
1366 dies_per_package: u8,
1367 cpu_vendor: CpuVendor,
1368 id: u8,
1369 ) {
1370 let x2apic_id = get_x2apic_id(
1371 id as u32,
1372 Some((threads_per_core, cores_per_die, dies_per_package)),
1373 );
1374
1375 let thread_width = 8 - (threads_per_core - 1).leading_zeros();
1376 let core_width = (8 - (cores_per_die - 1).leading_zeros()) + thread_width;
1377 let die_width = (8 - (dies_per_package - 1).leading_zeros()) + core_width;
1378
1379 let mut cpu_ebx = CpuidPatch::get_cpuid_reg(cpuid, 0x1, None, CpuidReg::EBX).unwrap_or(0);
1380 cpu_ebx |= ((dies_per_package as u32) * (cores_per_die as u32) * (threads_per_core as u32))
1381 & (0xff << 16);
1382 CpuidPatch::set_cpuid_reg(cpuid, 0x1, None, CpuidReg::EBX, cpu_ebx);
1383
1384 let mut cpu_edx = CpuidPatch::get_cpuid_reg(cpuid, 0x1, None, CpuidReg::EDX).unwrap_or(0);
1385 cpu_edx |= 1 << 28;
1386 CpuidPatch::set_cpuid_reg(cpuid, 0x1, None, CpuidReg::EDX, cpu_edx);
1387
1388 // CPU Topology leaf 0xb
1389 CpuidPatch::set_cpuid_reg(cpuid, 0xb, Some(0), CpuidReg::EAX, thread_width);
1390 CpuidPatch::set_cpuid_reg(
1391 cpuid,
1392 0xb,
1393 Some(0),
1394 CpuidReg::EBX,
1395 u32::from(threads_per_core),
1396 );
1397 CpuidPatch::set_cpuid_reg(cpuid, 0xb, Some(0), CpuidReg::ECX, 1 << 8);
1398
1399 CpuidPatch::set_cpuid_reg(cpuid, 0xb, Some(1), CpuidReg::EAX, die_width);
1400 CpuidPatch::set_cpuid_reg(
1401 cpuid,
1402 0xb,
1403 Some(1),
1404 CpuidReg::EBX,
1405 u32::from(dies_per_package * cores_per_die * threads_per_core),
1406 );
1407 CpuidPatch::set_cpuid_reg(cpuid, 0xb, Some(1), CpuidReg::ECX, 2 << 8);
1408 CpuidPatch::set_cpuid_reg(cpuid, 0xb, Some(1), CpuidReg::EDX, x2apic_id);
1409
1410 // CPU Topology leaf 0x1f
1411 CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(0), CpuidReg::EAX, thread_width);
1412 CpuidPatch::set_cpuid_reg(
1413 cpuid,
1414 0x1f,
1415 Some(0),
1416 CpuidReg::EBX,
1417 u32::from(threads_per_core),
1418 );
1419 CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(0), CpuidReg::ECX, 1 << 8);
1420
1421 CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(1), CpuidReg::EAX, core_width);
1422 CpuidPatch::set_cpuid_reg(
1423 cpuid,
1424 0x1f,
1425 Some(1),
1426 CpuidReg::EBX,
1427 u32::from(cores_per_die * threads_per_core),
1428 );
1429 CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(1), CpuidReg::ECX, 2 << 8);
1430
1431 CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(2), CpuidReg::EAX, die_width);
1432 CpuidPatch::set_cpuid_reg(
1433 cpuid,
1434 0x1f,
1435 Some(2),
1436 CpuidReg::EBX,
1437 u32::from(dies_per_package * cores_per_die * threads_per_core),
1438 );
1439 CpuidPatch::set_cpuid_reg(cpuid, 0x1f, Some(2), CpuidReg::ECX, 5 << 8);
1440
1441 if matches!(cpu_vendor, CpuVendor::AMD) {
1442 CpuidPatch::set_cpuid_reg(
1443 cpuid,
1444 0x8000_001e,
1445 Some(0),
1446 CpuidReg::EBX,
1447 ((threads_per_core as u32 - 1) << 8) | (x2apic_id & 0xff),
1448 );
1449 CpuidPatch::set_cpuid_reg(
1450 cpuid,
1451 0x8000_001e,
1452 Some(0),
1453 CpuidReg::ECX,
1454 ((dies_per_package as u32 - 1) << 8) | (thread_width + die_width) & 0xff,
1455 );
1456 CpuidPatch::set_cpuid_reg(cpuid, 0x8000_001e, Some(0), CpuidReg::EDX, 0);
1457 if cores_per_die * threads_per_core > 1 {
1458 let ecx =
1459 CpuidPatch::get_cpuid_reg(cpuid, 0x8000_0001, Some(0), CpuidReg::ECX).unwrap_or(0);
1460 CpuidPatch::set_cpuid_reg(
1461 cpuid,
1462 0x8000_0001,
1463 Some(0),
1464 CpuidReg::ECX,
1465 ecx | (1u32 << 1) | (1u32 << 22),
1466 );
1467 CpuidPatch::set_cpuid_reg(
1468 cpuid,
1469 0x0000_0001,
1470 Some(0),
1471 CpuidReg::EBX,
1472 (x2apic_id << 24) | (8 << 8) | (((cores_per_die * threads_per_core) as u32) << 16),
1473 );
1474 let cpuid_patches = vec![
1475 // Patch tsc deadline timer bit
1476 CpuidPatch {
1477 function: 1,
1478 index: 0,
1479 flags_bit: None,
1480 eax_bit: None,
1481 ebx_bit: None,
1482 ecx_bit: None,
1483 edx_bit: Some(28),
1484 },
1485 ];
1486 CpuidPatch::patch_cpuid(cpuid, cpuid_patches);
1487 CpuidPatch::set_cpuid_reg(
1488 cpuid,
1489 0x8000_0008,
1490 Some(0),
1491 CpuidReg::ECX,
1492 ((thread_width + core_width + die_width) << 12)
1493 | ((cores_per_die * threads_per_core) - 1) as u32,
1494 );
1495 } else {
1496 CpuidPatch::set_cpuid_reg(cpuid, 0x8000_0008, Some(0), CpuidReg::ECX, 0u32);
1497 }
1498 }
1499 }
1500
1501 // The goal is to update the CPUID sub-leaves to reflect the number of EPC
1502 // sections exposed to the guest.
update_cpuid_sgx( cpuid: &mut Vec<CpuIdEntry>, epc_sections: &[SgxEpcSection], ) -> Result<(), Error>1503 fn update_cpuid_sgx(
1504 cpuid: &mut Vec<CpuIdEntry>,
1505 epc_sections: &[SgxEpcSection],
1506 ) -> Result<(), Error> {
1507 // Something's wrong if there's no EPC section.
1508 if epc_sections.is_empty() {
1509 return Err(Error::NoSgxEpcSection);
1510 }
1511 // We can't go further if the hypervisor does not support SGX feature.
1512 if !CpuidPatch::is_feature_enabled(cpuid, 0x7, 0, CpuidReg::EBX, 2) {
1513 return Err(Error::MissingSgxFeature);
1514 }
1515 // We can't go further if the hypervisor does not support SGX_LC feature.
1516 if !CpuidPatch::is_feature_enabled(cpuid, 0x7, 0, CpuidReg::ECX, 30) {
1517 return Err(Error::MissingSgxLaunchControlFeature);
1518 }
1519
1520 // Get host CPUID for leaf 0x12, subleaf 0x2. This is to retrieve EPC
1521 // properties such as confidentiality and integrity.
1522 // SAFETY: call cpuid with valid leaves
1523 let leaf = unsafe { std::arch::x86_64::__cpuid_count(0x12, 0x2) };
1524
1525 for (i, epc_section) in epc_sections.iter().enumerate() {
1526 let subleaf_idx = i + 2;
1527 let start = epc_section.start().raw_value();
1528 let size = epc_section.size();
1529 let eax = (start & 0xffff_f000) as u32 | 0x1;
1530 let ebx = (start >> 32) as u32;
1531 let ecx = (size & 0xffff_f000) as u32 | (leaf.ecx & 0xf);
1532 let edx = (size >> 32) as u32;
1533 // CPU Topology leaf 0x12
1534 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EAX, eax);
1535 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EBX, ebx);
1536 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::ECX, ecx);
1537 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EDX, edx);
1538 }
1539
1540 // Add one NULL entry to terminate the dynamic list
1541 let subleaf_idx = epc_sections.len() + 2;
1542 // CPU Topology leaf 0x12
1543 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EAX, 0);
1544 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EBX, 0);
1545 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::ECX, 0);
1546 CpuidPatch::set_cpuid_reg(cpuid, 0x12, Some(subleaf_idx as u32), CpuidReg::EDX, 0);
1547
1548 Ok(())
1549 }
1550
1551 #[cfg(test)]
1552 mod tests {
1553 use linux_loader::loader::bootparam::boot_e820_entry;
1554
1555 use super::*;
1556
1557 #[test]
regions_base_addr()1558 fn regions_base_addr() {
1559 let regions = arch_memory_regions();
1560 assert_eq!(4, regions.len());
1561 assert_eq!(GuestAddress(0), regions[0].0);
1562 assert_eq!(GuestAddress(1 << 32), regions[1].0);
1563 }
1564
1565 #[test]
test_system_configuration()1566 fn test_system_configuration() {
1567 let no_vcpus = 4;
1568 let gm = GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap();
1569 let config_err = configure_system(
1570 &gm,
1571 GuestAddress(0),
1572 0,
1573 &None,
1574 1,
1575 None,
1576 Some(layout::RSDP_POINTER),
1577 None,
1578 None,
1579 None,
1580 None,
1581 None,
1582 );
1583 config_err.unwrap_err();
1584
1585 // Now assigning some memory that falls before the 32bit memory hole.
1586 let arch_mem_regions = arch_memory_regions();
1587 let ram_regions: Vec<(GuestAddress, usize)> = arch_mem_regions
1588 .iter()
1589 .filter(|r| r.2 == RegionType::Ram && r.1 != usize::MAX)
1590 .map(|r| (r.0, r.1))
1591 .collect();
1592 let gm = GuestMemoryMmap::from_ranges(&ram_regions).unwrap();
1593
1594 configure_system(
1595 &gm,
1596 GuestAddress(0),
1597 0,
1598 &None,
1599 no_vcpus,
1600 None,
1601 None,
1602 None,
1603 None,
1604 None,
1605 None,
1606 None,
1607 )
1608 .unwrap();
1609
1610 // Now assigning some memory that falls after the 32bit memory hole.
1611 let arch_mem_regions = arch_memory_regions();
1612 let ram_regions: Vec<(GuestAddress, usize)> = arch_mem_regions
1613 .iter()
1614 .filter(|r| r.2 == RegionType::Ram)
1615 .map(|r| {
1616 if r.1 == usize::MAX {
1617 (r.0, 128 << 20)
1618 } else {
1619 (r.0, r.1)
1620 }
1621 })
1622 .collect();
1623 let gm = GuestMemoryMmap::from_ranges(&ram_regions).unwrap();
1624 configure_system(
1625 &gm,
1626 GuestAddress(0),
1627 0,
1628 &None,
1629 no_vcpus,
1630 None,
1631 None,
1632 None,
1633 None,
1634 None,
1635 None,
1636 None,
1637 )
1638 .unwrap();
1639
1640 configure_system(
1641 &gm,
1642 GuestAddress(0),
1643 0,
1644 &None,
1645 no_vcpus,
1646 None,
1647 None,
1648 None,
1649 None,
1650 None,
1651 None,
1652 None,
1653 )
1654 .unwrap();
1655 }
1656
1657 #[test]
test_add_e820_entry()1658 fn test_add_e820_entry() {
1659 let e820_table = [(boot_e820_entry {
1660 addr: 0x1,
1661 size: 4,
1662 type_: 1,
1663 }); 128];
1664
1665 let expected_params = boot_params {
1666 e820_table,
1667 e820_entries: 1,
1668 ..Default::default()
1669 };
1670
1671 let mut params: boot_params = Default::default();
1672 add_e820_entry(
1673 &mut params,
1674 e820_table[0].addr,
1675 e820_table[0].size,
1676 e820_table[0].type_,
1677 )
1678 .unwrap();
1679 assert_eq!(
1680 format!("{:?}", params.e820_table[0]),
1681 format!("{:?}", expected_params.e820_table[0])
1682 );
1683 assert_eq!(params.e820_entries, expected_params.e820_entries);
1684
1685 // Exercise the scenario where the field storing the length of the e820 entry table is
1686 // is bigger than the allocated memory.
1687 params.e820_entries = params.e820_table.len() as u8 + 1;
1688 add_e820_entry(
1689 &mut params,
1690 e820_table[0].addr,
1691 e820_table[0].size,
1692 e820_table[0].type_,
1693 )
1694 .unwrap_err();
1695 }
1696
1697 #[test]
test_add_memmap_entry()1698 fn test_add_memmap_entry() {
1699 let mut memmap: Vec<hvm_memmap_table_entry> = Vec::new();
1700
1701 let expected_memmap = vec![
1702 hvm_memmap_table_entry {
1703 addr: 0x0,
1704 size: 0x1000,
1705 type_: E820_RAM,
1706 ..Default::default()
1707 },
1708 hvm_memmap_table_entry {
1709 addr: 0x10000,
1710 size: 0xa000,
1711 type_: E820_RESERVED,
1712 ..Default::default()
1713 },
1714 ];
1715
1716 add_memmap_entry(&mut memmap, 0, 0x1000, E820_RAM);
1717 add_memmap_entry(&mut memmap, 0x10000, 0xa000, E820_RESERVED);
1718
1719 assert_eq!(format!("{memmap:?}"), format!("{expected_memmap:?}"));
1720 }
1721
1722 #[test]
test_get_x2apic_id()1723 fn test_get_x2apic_id() {
1724 let x2apic_id = get_x2apic_id(0, Some((2, 3, 1)));
1725 assert_eq!(x2apic_id, 0);
1726
1727 let x2apic_id = get_x2apic_id(1, Some((2, 3, 1)));
1728 assert_eq!(x2apic_id, 1);
1729
1730 let x2apic_id = get_x2apic_id(2, Some((2, 3, 1)));
1731 assert_eq!(x2apic_id, 2);
1732
1733 let x2apic_id = get_x2apic_id(6, Some((2, 3, 1)));
1734 assert_eq!(x2apic_id, 8);
1735
1736 let x2apic_id = get_x2apic_id(7, Some((2, 3, 1)));
1737 assert_eq!(x2apic_id, 9);
1738
1739 let x2apic_id = get_x2apic_id(8, Some((2, 3, 1)));
1740 assert_eq!(x2apic_id, 10);
1741 }
1742 }
1743