1 // Copyright © 2019 Intel Corporation
2 //
3 // SPDX-License-Identifier: Apache-2.0
4 //
5 use std::sync::{Arc, Mutex};
6 use std::time::Instant;
7
8 use acpi_tables::rsdp::Rsdp;
9 #[cfg(target_arch = "aarch64")]
10 use acpi_tables::sdt::GenericAddress;
11 use acpi_tables::sdt::Sdt;
12 use acpi_tables::Aml;
13 #[cfg(target_arch = "aarch64")]
14 use arch::aarch64::DeviceInfoForFdt;
15 #[cfg(target_arch = "aarch64")]
16 use arch::DeviceType;
17 use arch::NumaNodes;
18 use bitflags::bitflags;
19 use pci::PciBdf;
20 use tracer::trace_scoped;
21 use vm_memory::{Address, Bytes, GuestAddress, GuestMemoryRegion};
22 use zerocopy::{FromBytes, Immutable, IntoBytes};
23
24 use crate::cpu::CpuManager;
25 use crate::device_manager::DeviceManager;
26 use crate::memory_manager::MemoryManager;
27 use crate::pci_segment::PciSegment;
28 use crate::{GuestMemoryMmap, GuestRegionMmap};
29
30 /* Values for Type in APIC sub-headers */
31 #[cfg(target_arch = "x86_64")]
32 pub const ACPI_X2APIC_PROCESSOR: u8 = 9;
33 #[cfg(target_arch = "x86_64")]
34 pub const ACPI_APIC_IO: u8 = 1;
35 #[cfg(target_arch = "x86_64")]
36 pub const ACPI_APIC_XRUPT_OVERRIDE: u8 = 2;
37 #[cfg(target_arch = "aarch64")]
38 pub const ACPI_APIC_GENERIC_CPU_INTERFACE: u8 = 11;
39 #[cfg(target_arch = "aarch64")]
40 pub const ACPI_APIC_GENERIC_DISTRIBUTOR: u8 = 12;
41 #[cfg(target_arch = "aarch64")]
42 pub const ACPI_APIC_GENERIC_REDISTRIBUTOR: u8 = 14;
43 #[cfg(target_arch = "aarch64")]
44 pub const ACPI_APIC_GENERIC_TRANSLATOR: u8 = 15;
45
46 #[allow(dead_code)]
47 #[repr(C, packed)]
48 #[derive(Default, IntoBytes, Immutable, FromBytes)]
49 struct PciRangeEntry {
50 pub base_address: u64,
51 pub segment: u16,
52 pub start: u8,
53 pub end: u8,
54 _reserved: u32,
55 }
56
57 #[allow(dead_code)]
58 #[repr(C, packed)]
59 #[derive(Default, IntoBytes, Immutable, FromBytes)]
60 struct MemoryAffinity {
61 pub type_: u8,
62 pub length: u8,
63 pub proximity_domain: u32,
64 _reserved1: u16,
65 pub base_addr_lo: u32,
66 pub base_addr_hi: u32,
67 pub length_lo: u32,
68 pub length_hi: u32,
69 _reserved2: u32,
70 pub flags: u32,
71 _reserved3: u64,
72 }
73
74 #[allow(dead_code)]
75 #[repr(C, packed)]
76 #[derive(Default, IntoBytes, Immutable, FromBytes)]
77 struct ProcessorLocalX2ApicAffinity {
78 pub type_: u8,
79 pub length: u8,
80 _reserved1: u16,
81 pub proximity_domain: u32,
82 pub x2apic_id: u32,
83 pub flags: u32,
84 pub clock_domain: u32,
85 _reserved2: u32,
86 }
87
88 #[allow(dead_code)]
89 #[repr(C, packed)]
90 #[derive(Default, IntoBytes, Immutable, FromBytes)]
91 struct ProcessorGiccAffinity {
92 pub type_: u8,
93 pub length: u8,
94 pub proximity_domain: u32,
95 pub acpi_processor_uid: u32,
96 pub flags: u32,
97 pub clock_domain: u32,
98 }
99
100 bitflags! {
101 pub struct MemAffinityFlags: u32 {
102 const NOFLAGS = 0;
103 const ENABLE = 0b1;
104 const HOTPLUGGABLE = 0b10;
105 const NON_VOLATILE = 0b100;
106 }
107 }
108
109 impl MemoryAffinity {
from_region( region: &Arc<GuestRegionMmap>, proximity_domain: u32, flags: MemAffinityFlags, ) -> Self110 fn from_region(
111 region: &Arc<GuestRegionMmap>,
112 proximity_domain: u32,
113 flags: MemAffinityFlags,
114 ) -> Self {
115 Self::from_range(
116 region.start_addr().raw_value(),
117 region.len(),
118 proximity_domain,
119 flags,
120 )
121 }
122
from_range( base_addr: u64, size: u64, proximity_domain: u32, flags: MemAffinityFlags, ) -> Self123 fn from_range(
124 base_addr: u64,
125 size: u64,
126 proximity_domain: u32,
127 flags: MemAffinityFlags,
128 ) -> Self {
129 let base_addr_lo = (base_addr & 0xffff_ffff) as u32;
130 let base_addr_hi = (base_addr >> 32) as u32;
131 let length_lo = (size & 0xffff_ffff) as u32;
132 let length_hi = (size >> 32) as u32;
133
134 MemoryAffinity {
135 type_: 1,
136 length: 40,
137 proximity_domain,
138 base_addr_lo,
139 base_addr_hi,
140 length_lo,
141 length_hi,
142 flags: flags.bits(),
143 ..Default::default()
144 }
145 }
146 }
147
148 #[allow(dead_code)]
149 #[repr(C, packed)]
150 #[derive(Default, IntoBytes, Immutable, FromBytes)]
151 struct ViotVirtioPciNode {
152 pub type_: u8,
153 _reserved: u8,
154 pub length: u16,
155 pub pci_segment: u16,
156 pub pci_bdf_number: u16,
157 _reserved2: [u8; 8],
158 }
159
160 #[allow(dead_code)]
161 #[repr(C, packed)]
162 #[derive(Default, IntoBytes, Immutable, FromBytes)]
163 struct ViotPciRangeNode {
164 pub type_: u8,
165 _reserved: u8,
166 pub length: u16,
167 pub endpoint_start: u32,
168 pub pci_segment_start: u16,
169 pub pci_segment_end: u16,
170 pub pci_bdf_start: u16,
171 pub pci_bdf_end: u16,
172 pub output_node: u16,
173 _reserved2: [u8; 6],
174 }
175
create_dsdt_table( device_manager: &Arc<Mutex<DeviceManager>>, cpu_manager: &Arc<Mutex<CpuManager>>, memory_manager: &Arc<Mutex<MemoryManager>>, ) -> Sdt176 pub fn create_dsdt_table(
177 device_manager: &Arc<Mutex<DeviceManager>>,
178 cpu_manager: &Arc<Mutex<CpuManager>>,
179 memory_manager: &Arc<Mutex<MemoryManager>>,
180 ) -> Sdt {
181 trace_scoped!("create_dsdt_table");
182 // DSDT
183 let mut dsdt = Sdt::new(*b"DSDT", 36, 6, *b"CLOUDH", *b"CHDSDT ", 1);
184
185 let mut bytes = Vec::new();
186
187 device_manager.lock().unwrap().to_aml_bytes(&mut bytes);
188 cpu_manager.lock().unwrap().to_aml_bytes(&mut bytes);
189 memory_manager.lock().unwrap().to_aml_bytes(&mut bytes);
190 dsdt.append_slice(&bytes);
191
192 dsdt
193 }
194
create_facp_table(dsdt_offset: GuestAddress, device_manager: &Arc<Mutex<DeviceManager>>) -> Sdt195 fn create_facp_table(dsdt_offset: GuestAddress, device_manager: &Arc<Mutex<DeviceManager>>) -> Sdt {
196 trace_scoped!("create_facp_table");
197
198 // Revision 6 of the ACPI FADT table is 276 bytes long
199 let mut facp = Sdt::new(*b"FACP", 276, 6, *b"CLOUDH", *b"CHFACP ", 1);
200
201 {
202 let device_manager = device_manager.lock().unwrap();
203 if let Some(address) = device_manager.acpi_platform_addresses().reset_reg_address {
204 // RESET_REG
205 facp.write(116, address);
206 // RESET_VALUE
207 facp.write(128, 1u8);
208 }
209
210 if let Some(address) = device_manager
211 .acpi_platform_addresses()
212 .sleep_control_reg_address
213 {
214 // SLEEP_CONTROL_REG
215 facp.write(244, address);
216 }
217
218 if let Some(address) = device_manager
219 .acpi_platform_addresses()
220 .sleep_status_reg_address
221 {
222 // SLEEP_STATUS_REG
223 facp.write(256, address);
224 }
225
226 if let Some(address) = device_manager.acpi_platform_addresses().pm_timer_address {
227 // X_PM_TMR_BLK
228 facp.write(208, address);
229 }
230 }
231
232 // aarch64 specific fields
233 #[cfg(target_arch = "aarch64")]
234 // ARM_BOOT_ARCH: enable PSCI with HVC enable-method
235 facp.write(129, 3u16);
236
237 // Architecture common fields
238 // HW_REDUCED_ACPI, RESET_REG_SUP, TMR_VAL_EXT
239 let fadt_flags: u32 = (1 << 20) | (1 << 10) | (1 << 8);
240 facp.write(112, fadt_flags);
241 // FADT minor version
242 facp.write(131, 3u8);
243 // X_DSDT
244 facp.write(140, dsdt_offset.0);
245 // Hypervisor Vendor Identity
246 facp.write_bytes(268, b"CLOUDHYP");
247
248 facp.update_checksum();
249
250 facp
251 }
252
create_mcfg_table(pci_segments: &[PciSegment]) -> Sdt253 fn create_mcfg_table(pci_segments: &[PciSegment]) -> Sdt {
254 let mut mcfg = Sdt::new(*b"MCFG", 36, 1, *b"CLOUDH", *b"CHMCFG ", 1);
255
256 // MCFG reserved 8 bytes
257 mcfg.append(0u64);
258
259 for segment in pci_segments {
260 // 32-bit PCI enhanced configuration mechanism
261 mcfg.append(PciRangeEntry {
262 base_address: segment.mmio_config_address,
263 segment: segment.id,
264 start: 0,
265 end: 0,
266 ..Default::default()
267 });
268 }
269 mcfg
270 }
271
create_tpm2_table() -> Sdt272 fn create_tpm2_table() -> Sdt {
273 let mut tpm = Sdt::new(*b"TPM2", 52, 3, *b"CLOUDH", *b"CHTPM2 ", 1);
274
275 tpm.write(36, 0_u16); //Platform Class
276 tpm.write(38, 0_u16); // Reserved Space
277 tpm.write(40, 0xfed4_0040_u64); // Address of Control Area
278 tpm.write(48, 7_u32); //Start Method
279
280 tpm.update_checksum();
281 tpm
282 }
283
create_srat_table( numa_nodes: &NumaNodes, #[cfg(target_arch = "x86_64")] topology: Option<(u8, u8, u8)>, ) -> Sdt284 fn create_srat_table(
285 numa_nodes: &NumaNodes,
286 #[cfg(target_arch = "x86_64")] topology: Option<(u8, u8, u8)>,
287 ) -> Sdt {
288 let mut srat = Sdt::new(*b"SRAT", 36, 3, *b"CLOUDH", *b"CHSRAT ", 1);
289 // SRAT reserved 12 bytes
290 srat.append_slice(&[0u8; 12]);
291
292 // Check the MemoryAffinity structure is the right size as expected by
293 // the ACPI specification.
294 assert_eq!(std::mem::size_of::<MemoryAffinity>(), 40);
295
296 for (node_id, node) in numa_nodes.iter() {
297 let proximity_domain = *node_id;
298
299 for region in &node.memory_regions {
300 srat.append(MemoryAffinity::from_region(
301 region,
302 proximity_domain,
303 MemAffinityFlags::ENABLE,
304 ))
305 }
306
307 for region in &node.hotplug_regions {
308 srat.append(MemoryAffinity::from_region(
309 region,
310 proximity_domain,
311 MemAffinityFlags::ENABLE | MemAffinityFlags::HOTPLUGGABLE,
312 ))
313 }
314
315 #[cfg(target_arch = "x86_64")]
316 for section in &node.sgx_epc_sections {
317 srat.append(MemoryAffinity::from_range(
318 section.start().raw_value(),
319 section.size(),
320 proximity_domain,
321 MemAffinityFlags::ENABLE,
322 ))
323 }
324
325 for cpu in &node.cpus {
326 #[cfg(target_arch = "x86_64")]
327 let x2apic_id = arch::x86_64::get_x2apic_id(*cpu as u32, topology);
328 #[cfg(target_arch = "aarch64")]
329 let x2apic_id = *cpu as u32;
330
331 // Flags
332 // - Enabled = 1 (bit 0)
333 // - Reserved bits 1-31
334 let flags = 1;
335
336 #[cfg(target_arch = "x86_64")]
337 srat.append(ProcessorLocalX2ApicAffinity {
338 type_: 2,
339 length: 24,
340 proximity_domain,
341 x2apic_id,
342 flags,
343 clock_domain: 0,
344 ..Default::default()
345 });
346 #[cfg(target_arch = "aarch64")]
347 srat.append(ProcessorGiccAffinity {
348 type_: 3,
349 length: 18,
350 proximity_domain,
351 acpi_processor_uid: x2apic_id,
352 flags,
353 clock_domain: 0,
354 });
355 }
356 }
357 srat
358 }
359
create_slit_table(numa_nodes: &NumaNodes) -> Sdt360 fn create_slit_table(numa_nodes: &NumaNodes) -> Sdt {
361 let mut slit = Sdt::new(*b"SLIT", 36, 1, *b"CLOUDH", *b"CHSLIT ", 1);
362 // Number of System Localities on 8 bytes.
363 slit.append(numa_nodes.len() as u64);
364
365 let existing_nodes: Vec<u32> = numa_nodes.keys().cloned().collect();
366 for (node_id, node) in numa_nodes.iter() {
367 let distances = &node.distances;
368 for i in existing_nodes.iter() {
369 let dist: u8 = if *node_id == *i {
370 10
371 } else if let Some(distance) = distances.get(i) {
372 *distance
373 } else {
374 20
375 };
376
377 slit.append(dist);
378 }
379 }
380 slit
381 }
382
383 #[cfg(target_arch = "aarch64")]
create_gtdt_table() -> Sdt384 fn create_gtdt_table() -> Sdt {
385 const ARCH_TIMER_NS_EL2_IRQ: u32 = 10;
386 const ARCH_TIMER_VIRT_IRQ: u32 = 11;
387 const ARCH_TIMER_S_EL1_IRQ: u32 = 13;
388 const ARCH_TIMER_NS_EL1_IRQ: u32 = 14;
389 const ACPI_GTDT_INTERRUPT_MODE_LEVEL: u32 = 0;
390 const ACPI_GTDT_CAP_ALWAYS_ON: u32 = 1 << 2;
391
392 let irqflags: u32 = ACPI_GTDT_INTERRUPT_MODE_LEVEL;
393 // GTDT
394 let mut gtdt = Sdt::new(*b"GTDT", 104, 2, *b"CLOUDH", *b"CHGTDT ", 1);
395 // Secure EL1 Timer GSIV
396 gtdt.write(48, ARCH_TIMER_S_EL1_IRQ + 16);
397 // Secure EL1 Timer Flags
398 gtdt.write(52, irqflags);
399 // Non-Secure EL1 Timer GSIV
400 gtdt.write(56, ARCH_TIMER_NS_EL1_IRQ + 16);
401 // Non-Secure EL1 Timer Flags
402 gtdt.write(60, irqflags | ACPI_GTDT_CAP_ALWAYS_ON);
403 // Virtual EL1 Timer GSIV
404 gtdt.write(64, ARCH_TIMER_VIRT_IRQ + 16);
405 // Virtual EL1 Timer Flags
406 gtdt.write(68, irqflags);
407 // EL2 Timer GSIV
408 gtdt.write(72, ARCH_TIMER_NS_EL2_IRQ + 16);
409 // EL2 Timer Flags
410 gtdt.write(76, irqflags);
411
412 gtdt.update_checksum();
413
414 gtdt
415 }
416
417 #[cfg(target_arch = "aarch64")]
create_spcr_table(base_address: u64, gsi: u32) -> Sdt418 fn create_spcr_table(base_address: u64, gsi: u32) -> Sdt {
419 // SPCR
420 let mut spcr = Sdt::new(*b"SPCR", 80, 2, *b"CLOUDH", *b"CHSPCR ", 1);
421 // Interface Type
422 spcr.write(36, 3u8);
423 // Base Address in format ACPI Generic Address Structure
424 spcr.write(40, GenericAddress::mmio_address::<u8>(base_address));
425 // Interrupt Type: Bit[3] ARMH GIC interrupt
426 spcr.write(52, (1 << 3) as u8);
427 // Global System Interrupt used by the UART
428 spcr.write(54, gsi.to_le());
429 // Baud Rate: 3 = 9600
430 spcr.write(58, 3u8);
431 // Stop Bits: 1 Stop bit
432 spcr.write(60, 1u8);
433 // Flow Control: Bit[1] = RTS/CTS hardware flow control
434 spcr.write(61, (1 << 1) as u8);
435 // PCI Device ID: Not a PCI device
436 spcr.write(64, 0xffff_u16);
437 // PCI Vendor ID: Not a PCI device
438 spcr.write(66, 0xffff_u16);
439
440 spcr.update_checksum();
441
442 spcr
443 }
444
445 #[cfg(target_arch = "aarch64")]
create_dbg2_table(base_address: u64) -> Sdt446 fn create_dbg2_table(base_address: u64) -> Sdt {
447 let namespace = "_SB_.COM1";
448 let debug_device_info_offset = 44usize;
449 let debug_device_info_len: u16 = 22 /* BaseAddressRegisterOffset */ +
450 12 /* BaseAddressRegister */ +
451 4 /* AddressSize */ +
452 namespace.len() as u16 + 1 /* zero-terminated */;
453 let tbl_len: u32 = debug_device_info_offset as u32 + debug_device_info_len as u32;
454 let mut dbg2 = Sdt::new(*b"DBG2", tbl_len, 0, *b"CLOUDH", *b"CHDBG2 ", 1);
455
456 /* OffsetDbgDeviceInfo */
457 dbg2.write_u32(36, 44);
458 /* NumberDbgDeviceInfo */
459 dbg2.write_u32(40, 1);
460
461 /* Debug Device Information structure */
462 /* Offsets are calculated from the start of this structure. */
463 let namespace_offset = 38u16;
464 let base_address_register_offset = 22u16;
465 let address_size_offset = 34u16;
466 /* Revision */
467 dbg2.write_u8(debug_device_info_offset, 0);
468 /* Length */
469 dbg2.write_u16(debug_device_info_offset + 1, debug_device_info_len);
470 /* NumberofGenericAddressRegisters */
471 dbg2.write_u8(debug_device_info_offset + 3, 1);
472 /* NameSpaceStringLength */
473 dbg2.write_u16(debug_device_info_offset + 4, namespace.len() as u16 + 1);
474 /* NameSpaceStringOffset */
475 dbg2.write_u16(debug_device_info_offset + 6, namespace_offset);
476 /* OemDataLength */
477 dbg2.write_u16(debug_device_info_offset + 8, 0);
478 /* OemDataOffset */
479 dbg2.write_u16(debug_device_info_offset + 10, 0);
480 /* Port Type */
481 dbg2.write_u16(debug_device_info_offset + 12, 0x8000);
482 /* Port Subtype */
483 dbg2.write_u16(debug_device_info_offset + 14, 0x0003);
484 /* Reserved */
485 dbg2.write_u16(debug_device_info_offset + 16, 0);
486 /* BaseAddressRegisterOffset */
487 dbg2.write_u16(debug_device_info_offset + 18, base_address_register_offset);
488 /* AddressSizeOffset */
489 dbg2.write_u16(debug_device_info_offset + 20, address_size_offset);
490 /* BaseAddressRegister */
491 dbg2.write(
492 debug_device_info_offset + base_address_register_offset as usize,
493 GenericAddress::mmio_address::<u8>(base_address),
494 );
495 /* AddressSize */
496 dbg2.write_u32(
497 debug_device_info_offset + address_size_offset as usize,
498 0x1000,
499 );
500 /* NamespaceString, zero-terminated ASCII */
501 for (k, c) in namespace.chars().enumerate() {
502 dbg2.write_u8(
503 debug_device_info_offset + namespace_offset as usize + k,
504 c as u8,
505 );
506 }
507 dbg2.write_u8(
508 debug_device_info_offset + namespace_offset as usize + namespace.len(),
509 0,
510 );
511
512 dbg2.update_checksum();
513
514 dbg2
515 }
516
517 #[cfg(target_arch = "aarch64")]
create_iort_table(pci_segments: &[PciSegment]) -> Sdt518 fn create_iort_table(pci_segments: &[PciSegment]) -> Sdt {
519 const ACPI_IORT_NODE_ITS_GROUP: u8 = 0x00;
520 const ACPI_IORT_NODE_PCI_ROOT_COMPLEX: u8 = 0x02;
521 const ACPI_IORT_NODE_ROOT_COMPLEX_OFFSET: usize = 72;
522 const ACPI_IORT_NODE_ROOT_COMPLEX_SIZE: usize = 60;
523
524 // The IORT table contains:
525 // - Header (size = 40)
526 // - 1 x ITS Group Node (size = 24)
527 // - N x Root Complex Node (N = number of pci segments, size = 60 x N)
528 let iort_table_size: u32 = (ACPI_IORT_NODE_ROOT_COMPLEX_OFFSET
529 + ACPI_IORT_NODE_ROOT_COMPLEX_SIZE * pci_segments.len())
530 as u32;
531 let mut iort = Sdt::new(*b"IORT", iort_table_size, 2, *b"CLOUDH", *b"CHIORT ", 1);
532 iort.write(36, ((1 + pci_segments.len()) as u32).to_le());
533 iort.write(40, (48u32).to_le());
534
535 // ITS group node
536 iort.write(48, ACPI_IORT_NODE_ITS_GROUP);
537 // Length of the ITS group node in bytes
538 iort.write(49, (24u16).to_le());
539 // ITS counts
540 iort.write(64, (1u32).to_le());
541
542 // Root Complex Nodes
543 for (i, segment) in pci_segments.iter().enumerate() {
544 let node_offset: usize =
545 ACPI_IORT_NODE_ROOT_COMPLEX_OFFSET + i * ACPI_IORT_NODE_ROOT_COMPLEX_SIZE;
546 iort.write(node_offset, ACPI_IORT_NODE_PCI_ROOT_COMPLEX);
547 // Length of the root complex node in bytes
548 iort.write(
549 node_offset + 1,
550 (ACPI_IORT_NODE_ROOT_COMPLEX_SIZE as u16).to_le(),
551 );
552 // Revision
553 iort.write(node_offset + 3, (3u8).to_le());
554 // Node ID
555 iort.write(node_offset + 4, (segment.id as u32).to_le());
556 // Mapping counts
557 iort.write(node_offset + 8, (1u32).to_le());
558 // Offset from the start of the RC node to the start of its Array of ID mappings
559 iort.write(node_offset + 12, (36u32).to_le());
560 // Fully coherent device
561 iort.write(node_offset + 16, (1u32).to_le());
562 // CCA = CPM = DCAS = 1
563 iort.write(node_offset + 24, 3u8);
564 // PCI segment number
565 iort.write(node_offset + 28, (segment.id as u32).to_le());
566 // Memory address size limit
567 iort.write(node_offset + 32, (64u8).to_le());
568
569 // From offset 32 onward is the space for ID mappings Array.
570 // Now we have only one mapping.
571 let mapping_offset: usize = node_offset + 36;
572 // The lowest value in the input range
573 iort.write(mapping_offset, (0u32).to_le());
574 // The number of IDs in the range minus one:
575 // This should cover all the devices of a segment:
576 // 1 (bus) x 32 (devices) x 8 (functions) = 256
577 // Note: Currently only 1 bus is supported in a segment.
578 iort.write(mapping_offset + 4, (255_u32).to_le());
579 // The lowest value in the output range
580 iort.write(mapping_offset + 8, ((256 * segment.id) as u32).to_le());
581 // id_mapping_array_output_reference should be
582 // the ITS group node (the first node) if no SMMU
583 iort.write(mapping_offset + 12, (48u32).to_le());
584 // Flags
585 iort.write(mapping_offset + 16, (0u32).to_le());
586 }
587
588 iort.update_checksum();
589
590 iort
591 }
592
create_viot_table(iommu_bdf: &PciBdf, devices_bdf: &[PciBdf]) -> Sdt593 fn create_viot_table(iommu_bdf: &PciBdf, devices_bdf: &[PciBdf]) -> Sdt {
594 // VIOT
595 let mut viot = Sdt::new(*b"VIOT", 36, 0, *b"CLOUDH", *b"CHVIOT ", 0);
596 // Node count
597 viot.append((devices_bdf.len() + 1) as u16);
598 // Node offset
599 viot.append(48u16);
600 // VIOT reserved 8 bytes
601 viot.append_slice(&[0u8; 8]);
602
603 // Virtio-iommu based on virtio-pci node
604 viot.append(ViotVirtioPciNode {
605 type_: 3,
606 length: 16,
607 pci_segment: iommu_bdf.segment(),
608 pci_bdf_number: iommu_bdf.into(),
609 ..Default::default()
610 });
611
612 for device_bdf in devices_bdf {
613 viot.append(ViotPciRangeNode {
614 type_: 1,
615 length: 24,
616 endpoint_start: device_bdf.into(),
617 pci_segment_start: device_bdf.segment(),
618 pci_segment_end: device_bdf.segment(),
619 pci_bdf_start: device_bdf.into(),
620 pci_bdf_end: device_bdf.into(),
621 output_node: 48,
622 ..Default::default()
623 });
624 }
625
626 viot
627 }
628
create_acpi_tables( guest_mem: &GuestMemoryMmap, device_manager: &Arc<Mutex<DeviceManager>>, cpu_manager: &Arc<Mutex<CpuManager>>, memory_manager: &Arc<Mutex<MemoryManager>>, numa_nodes: &NumaNodes, tpm_enabled: bool, ) -> GuestAddress629 pub fn create_acpi_tables(
630 guest_mem: &GuestMemoryMmap,
631 device_manager: &Arc<Mutex<DeviceManager>>,
632 cpu_manager: &Arc<Mutex<CpuManager>>,
633 memory_manager: &Arc<Mutex<MemoryManager>>,
634 numa_nodes: &NumaNodes,
635 tpm_enabled: bool,
636 ) -> GuestAddress {
637 trace_scoped!("create_acpi_tables");
638
639 let start_time = Instant::now();
640 let rsdp_offset = arch::layout::RSDP_POINTER;
641 let mut tables: Vec<u64> = Vec::new();
642
643 // DSDT
644 let dsdt = create_dsdt_table(device_manager, cpu_manager, memory_manager);
645 let dsdt_offset = rsdp_offset.checked_add(Rsdp::len() as u64).unwrap();
646 guest_mem
647 .write_slice(dsdt.as_slice(), dsdt_offset)
648 .expect("Error writing DSDT table");
649
650 // FACP aka FADT
651 let facp = create_facp_table(dsdt_offset, device_manager);
652 let facp_offset = dsdt_offset.checked_add(dsdt.len() as u64).unwrap();
653 guest_mem
654 .write_slice(facp.as_slice(), facp_offset)
655 .expect("Error writing FACP table");
656 tables.push(facp_offset.0);
657
658 // MADT
659 let madt = cpu_manager.lock().unwrap().create_madt();
660 let madt_offset = facp_offset.checked_add(facp.len() as u64).unwrap();
661 guest_mem
662 .write_slice(madt.as_slice(), madt_offset)
663 .expect("Error writing MADT table");
664 tables.push(madt_offset.0);
665 let mut prev_tbl_len = madt.len() as u64;
666 let mut prev_tbl_off = madt_offset;
667
668 // PPTT
669 #[cfg(target_arch = "aarch64")]
670 {
671 let pptt = cpu_manager.lock().unwrap().create_pptt();
672 let pptt_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap();
673 guest_mem
674 .write_slice(pptt.as_slice(), pptt_offset)
675 .expect("Error writing PPTT table");
676 tables.push(pptt_offset.0);
677 prev_tbl_len = pptt.len() as u64;
678 prev_tbl_off = pptt_offset;
679 }
680
681 // GTDT
682 #[cfg(target_arch = "aarch64")]
683 {
684 let gtdt = create_gtdt_table();
685 let gtdt_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap();
686 guest_mem
687 .write_slice(gtdt.as_slice(), gtdt_offset)
688 .expect("Error writing GTDT table");
689 tables.push(gtdt_offset.0);
690 prev_tbl_len = gtdt.len() as u64;
691 prev_tbl_off = gtdt_offset;
692 }
693
694 // MCFG
695 let mcfg = create_mcfg_table(device_manager.lock().unwrap().pci_segments());
696 let mcfg_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap();
697 guest_mem
698 .write_slice(mcfg.as_slice(), mcfg_offset)
699 .expect("Error writing MCFG table");
700 tables.push(mcfg_offset.0);
701 prev_tbl_len = mcfg.len() as u64;
702 prev_tbl_off = mcfg_offset;
703
704 // SPCR and DBG2
705 #[cfg(target_arch = "aarch64")]
706 {
707 let is_serial_on = device_manager
708 .lock()
709 .unwrap()
710 .get_device_info()
711 .clone()
712 .contains_key(&(DeviceType::Serial, DeviceType::Serial.to_string()));
713 let serial_device_addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START.raw_value();
714 let serial_device_irq = if is_serial_on {
715 device_manager
716 .lock()
717 .unwrap()
718 .get_device_info()
719 .clone()
720 .get(&(DeviceType::Serial, DeviceType::Serial.to_string()))
721 .unwrap()
722 .irq()
723 } else {
724 // If serial is turned off, add a fake device with invalid irq.
725 31
726 };
727
728 // SPCR
729 let spcr = create_spcr_table(serial_device_addr, serial_device_irq);
730 let spcr_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap();
731 guest_mem
732 .write_slice(spcr.as_slice(), spcr_offset)
733 .expect("Error writing SPCR table");
734 tables.push(spcr_offset.0);
735 prev_tbl_len = spcr.len() as u64;
736 prev_tbl_off = spcr_offset;
737
738 // DBG2
739 let dbg2 = create_dbg2_table(serial_device_addr);
740 let dbg2_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap();
741 guest_mem
742 .write_slice(dbg2.as_slice(), dbg2_offset)
743 .expect("Error writing DBG2 table");
744 tables.push(dbg2_offset.0);
745 prev_tbl_len = dbg2.len() as u64;
746 prev_tbl_off = dbg2_offset;
747 }
748
749 if tpm_enabled {
750 // TPM2 Table
751 let tpm2 = create_tpm2_table();
752 let tpm2_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap();
753 guest_mem
754 .write_slice(tpm2.as_slice(), tpm2_offset)
755 .expect("Error writing TPM2 table");
756 tables.push(tpm2_offset.0);
757
758 prev_tbl_len = tpm2.len() as u64;
759 prev_tbl_off = tpm2_offset;
760 }
761 // SRAT and SLIT
762 // Only created if the NUMA nodes list is not empty.
763 if !numa_nodes.is_empty() {
764 #[cfg(target_arch = "x86_64")]
765 let topology = cpu_manager.lock().unwrap().get_vcpu_topology();
766 // SRAT
767 let srat = create_srat_table(
768 numa_nodes,
769 #[cfg(target_arch = "x86_64")]
770 topology,
771 );
772 let srat_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap();
773 guest_mem
774 .write_slice(srat.as_slice(), srat_offset)
775 .expect("Error writing SRAT table");
776 tables.push(srat_offset.0);
777
778 // SLIT
779 let slit = create_slit_table(numa_nodes);
780 let slit_offset = srat_offset.checked_add(srat.len() as u64).unwrap();
781 guest_mem
782 .write_slice(slit.as_slice(), slit_offset)
783 .expect("Error writing SLIT table");
784 tables.push(slit_offset.0);
785
786 prev_tbl_len = slit.len() as u64;
787 prev_tbl_off = slit_offset;
788 };
789
790 #[cfg(target_arch = "aarch64")]
791 {
792 let iort = create_iort_table(device_manager.lock().unwrap().pci_segments());
793 let iort_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap();
794 guest_mem
795 .write_slice(iort.as_slice(), iort_offset)
796 .expect("Error writing IORT table");
797 tables.push(iort_offset.0);
798 prev_tbl_len = iort.len() as u64;
799 prev_tbl_off = iort_offset;
800 }
801
802 // VIOT
803 if let Some((iommu_bdf, devices_bdf)) = device_manager.lock().unwrap().iommu_attached_devices()
804 {
805 let viot = create_viot_table(iommu_bdf, devices_bdf);
806
807 let viot_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap();
808 guest_mem
809 .write_slice(viot.as_slice(), viot_offset)
810 .expect("Error writing VIOT table");
811 tables.push(viot_offset.0);
812 prev_tbl_len = viot.len() as u64;
813 prev_tbl_off = viot_offset;
814 }
815
816 // XSDT
817 let mut xsdt = Sdt::new(*b"XSDT", 36, 1, *b"CLOUDH", *b"CHXSDT ", 1);
818 for table in tables {
819 xsdt.append(table);
820 }
821 xsdt.update_checksum();
822 let xsdt_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap();
823 guest_mem
824 .write_slice(xsdt.as_slice(), xsdt_offset)
825 .expect("Error writing XSDT table");
826
827 // RSDP
828 let rsdp = Rsdp::new(*b"CLOUDH", xsdt_offset.0);
829 guest_mem
830 .write_slice(rsdp.as_bytes(), rsdp_offset)
831 .expect("Error writing RSDP");
832
833 info!(
834 "Generated ACPI tables: took {}µs size = {}",
835 Instant::now().duration_since(start_time).as_micros(),
836 xsdt_offset.0 + xsdt.len() as u64 - rsdp_offset.0
837 );
838 rsdp_offset
839 }
840
841 #[cfg(feature = "tdx")]
create_acpi_tables_tdx( device_manager: &Arc<Mutex<DeviceManager>>, cpu_manager: &Arc<Mutex<CpuManager>>, memory_manager: &Arc<Mutex<MemoryManager>>, numa_nodes: &NumaNodes, ) -> Vec<Sdt>842 pub fn create_acpi_tables_tdx(
843 device_manager: &Arc<Mutex<DeviceManager>>,
844 cpu_manager: &Arc<Mutex<CpuManager>>,
845 memory_manager: &Arc<Mutex<MemoryManager>>,
846 numa_nodes: &NumaNodes,
847 ) -> Vec<Sdt> {
848 // DSDT
849 let mut tables = vec![create_dsdt_table(
850 device_manager,
851 cpu_manager,
852 memory_manager,
853 )];
854
855 // FACP aka FADT
856 tables.push(create_facp_table(GuestAddress(0), device_manager));
857
858 // MADT
859 tables.push(cpu_manager.lock().unwrap().create_madt());
860
861 // MCFG
862 tables.push(create_mcfg_table(
863 device_manager.lock().unwrap().pci_segments(),
864 ));
865
866 // SRAT and SLIT
867 // Only created if the NUMA nodes list is not empty.
868 if !numa_nodes.is_empty() {
869 #[cfg(target_arch = "x86_64")]
870 let topology = cpu_manager.lock().unwrap().get_vcpu_topology();
871
872 // SRAT
873 tables.push(create_srat_table(
874 numa_nodes,
875 #[cfg(target_arch = "x86_64")]
876 topology,
877 ));
878
879 // SLIT
880 tables.push(create_slit_table(numa_nodes));
881 };
882
883 // VIOT
884 if let Some((iommu_bdf, devices_bdf)) = device_manager.lock().unwrap().iommu_attached_devices()
885 {
886 tables.push(create_viot_table(iommu_bdf, devices_bdf));
887 }
888
889 tables
890 }
891