1 // Copyright © 2019 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 4 // 5 use crate::cpu::CpuManager; 6 use crate::device_manager::DeviceManager; 7 use crate::memory_manager::MemoryManager; 8 use crate::pci_segment::PciSegment; 9 use crate::{GuestMemoryMmap, GuestRegionMmap}; 10 #[cfg(target_arch = "aarch64")] 11 use acpi_tables::sdt::GenericAddress; 12 use acpi_tables::{rsdp::Rsdp, sdt::Sdt, Aml}; 13 #[cfg(target_arch = "aarch64")] 14 use arch::aarch64::DeviceInfoForFdt; 15 #[cfg(target_arch = "aarch64")] 16 use arch::DeviceType; 17 use arch::NumaNodes; 18 use bitflags::bitflags; 19 use pci::PciBdf; 20 use std::sync::{Arc, Mutex}; 21 use std::time::Instant; 22 use tracer::trace_scoped; 23 use vm_memory::{Address, Bytes, GuestAddress, GuestMemoryRegion}; 24 use zerocopy::AsBytes; 25 26 /* Values for Type in APIC sub-headers */ 27 #[cfg(target_arch = "x86_64")] 28 pub const ACPI_X2APIC_PROCESSOR: u8 = 9; 29 #[cfg(target_arch = "x86_64")] 30 pub const ACPI_APIC_IO: u8 = 1; 31 #[cfg(target_arch = "x86_64")] 32 pub const ACPI_APIC_XRUPT_OVERRIDE: u8 = 2; 33 #[cfg(target_arch = "aarch64")] 34 pub const ACPI_APIC_GENERIC_CPU_INTERFACE: u8 = 11; 35 #[cfg(target_arch = "aarch64")] 36 pub const ACPI_APIC_GENERIC_DISTRIBUTOR: u8 = 12; 37 #[cfg(target_arch = "aarch64")] 38 pub const ACPI_APIC_GENERIC_REDISTRIBUTOR: u8 = 14; 39 #[cfg(target_arch = "aarch64")] 40 pub const ACPI_APIC_GENERIC_TRANSLATOR: u8 = 15; 41 42 #[allow(dead_code)] 43 #[repr(packed)] 44 #[derive(Default, AsBytes)] 45 struct PciRangeEntry { 46 pub base_address: u64, 47 pub segment: u16, 48 pub start: u8, 49 pub end: u8, 50 _reserved: u32, 51 } 52 53 #[allow(dead_code)] 54 #[repr(packed)] 55 #[derive(Default, AsBytes)] 56 struct MemoryAffinity { 57 pub type_: u8, 58 pub length: u8, 59 pub proximity_domain: u32, 60 _reserved1: u16, 61 pub base_addr_lo: u32, 62 pub base_addr_hi: u32, 63 pub length_lo: u32, 64 pub length_hi: u32, 65 _reserved2: u32, 66 pub flags: u32, 67 _reserved3: u64, 68 } 69 70 #[allow(dead_code)] 71 #[repr(packed)] 72 #[derive(Default, AsBytes)] 73 struct ProcessorLocalX2ApicAffinity { 74 pub type_: u8, 75 pub length: u8, 76 _reserved1: u16, 77 pub proximity_domain: u32, 78 pub x2apic_id: u32, 79 pub flags: u32, 80 pub clock_domain: u32, 81 _reserved2: u32, 82 } 83 84 #[allow(dead_code)] 85 #[repr(packed)] 86 #[derive(Default, AsBytes)] 87 struct ProcessorGiccAffinity { 88 pub type_: u8, 89 pub length: u8, 90 pub proximity_domain: u32, 91 pub acpi_processor_uid: u32, 92 pub flags: u32, 93 pub clock_domain: u32, 94 } 95 96 bitflags! { 97 pub struct MemAffinityFlags: u32 { 98 const NOFLAGS = 0; 99 const ENABLE = 0b1; 100 const HOTPLUGGABLE = 0b10; 101 const NON_VOLATILE = 0b100; 102 } 103 } 104 105 impl MemoryAffinity { 106 fn from_region( 107 region: &Arc<GuestRegionMmap>, 108 proximity_domain: u32, 109 flags: MemAffinityFlags, 110 ) -> Self { 111 Self::from_range( 112 region.start_addr().raw_value(), 113 region.len(), 114 proximity_domain, 115 flags, 116 ) 117 } 118 119 fn from_range( 120 base_addr: u64, 121 size: u64, 122 proximity_domain: u32, 123 flags: MemAffinityFlags, 124 ) -> Self { 125 let base_addr_lo = (base_addr & 0xffff_ffff) as u32; 126 let base_addr_hi = (base_addr >> 32) as u32; 127 let length_lo = (size & 0xffff_ffff) as u32; 128 let length_hi = (size >> 32) as u32; 129 130 MemoryAffinity { 131 type_: 1, 132 length: 40, 133 proximity_domain, 134 base_addr_lo, 135 base_addr_hi, 136 length_lo, 137 length_hi, 138 flags: flags.bits(), 139 ..Default::default() 140 } 141 } 142 } 143 144 #[allow(dead_code)] 145 #[repr(packed)] 146 #[derive(Default, AsBytes)] 147 struct ViotVirtioPciNode { 148 pub type_: u8, 149 _reserved: u8, 150 pub length: u16, 151 pub pci_segment: u16, 152 pub pci_bdf_number: u16, 153 _reserved2: [u8; 8], 154 } 155 156 #[allow(dead_code)] 157 #[repr(packed)] 158 #[derive(Default, AsBytes)] 159 struct ViotPciRangeNode { 160 pub type_: u8, 161 _reserved: u8, 162 pub length: u16, 163 pub endpoint_start: u32, 164 pub pci_segment_start: u16, 165 pub pci_segment_end: u16, 166 pub pci_bdf_start: u16, 167 pub pci_bdf_end: u16, 168 pub output_node: u16, 169 _reserved2: [u8; 6], 170 } 171 172 pub fn create_dsdt_table( 173 device_manager: &Arc<Mutex<DeviceManager>>, 174 cpu_manager: &Arc<Mutex<CpuManager>>, 175 memory_manager: &Arc<Mutex<MemoryManager>>, 176 ) -> Sdt { 177 trace_scoped!("create_dsdt_table"); 178 // DSDT 179 let mut dsdt = Sdt::new(*b"DSDT", 36, 6, *b"CLOUDH", *b"CHDSDT ", 1); 180 181 let mut bytes = Vec::new(); 182 183 device_manager.lock().unwrap().to_aml_bytes(&mut bytes); 184 cpu_manager.lock().unwrap().to_aml_bytes(&mut bytes); 185 memory_manager.lock().unwrap().to_aml_bytes(&mut bytes); 186 dsdt.append_slice(&bytes); 187 188 dsdt 189 } 190 191 fn create_facp_table(dsdt_offset: GuestAddress, device_manager: &Arc<Mutex<DeviceManager>>) -> Sdt { 192 trace_scoped!("create_facp_table"); 193 194 // Revision 6 of the ACPI FADT table is 276 bytes long 195 let mut facp = Sdt::new(*b"FACP", 276, 6, *b"CLOUDH", *b"CHFACP ", 1); 196 197 { 198 let device_manager = device_manager.lock().unwrap(); 199 if let Some(address) = device_manager.acpi_platform_addresses().reset_reg_address { 200 // RESET_REG 201 facp.write(116, address); 202 // RESET_VALUE 203 facp.write(128, 1u8); 204 } 205 206 if let Some(address) = device_manager 207 .acpi_platform_addresses() 208 .sleep_control_reg_address 209 { 210 // SLEEP_CONTROL_REG 211 facp.write(244, address); 212 } 213 214 if let Some(address) = device_manager 215 .acpi_platform_addresses() 216 .sleep_status_reg_address 217 { 218 // SLEEP_STATUS_REG 219 facp.write(256, address); 220 } 221 222 if let Some(address) = device_manager.acpi_platform_addresses().pm_timer_address { 223 // X_PM_TMR_BLK 224 facp.write(208, address); 225 } 226 } 227 228 // aarch64 specific fields 229 #[cfg(target_arch = "aarch64")] 230 // ARM_BOOT_ARCH: enable PSCI with HVC enable-method 231 facp.write(129, 3u16); 232 233 // Architecture common fields 234 // HW_REDUCED_ACPI, RESET_REG_SUP, TMR_VAL_EXT 235 let fadt_flags: u32 = 1 << 20 | 1 << 10 | 1 << 8; 236 facp.write(112, fadt_flags); 237 // FADT minor version 238 facp.write(131, 3u8); 239 // X_DSDT 240 facp.write(140, dsdt_offset.0); 241 // Hypervisor Vendor Identity 242 facp.write_bytes(268, b"CLOUDHYP"); 243 244 facp.update_checksum(); 245 246 facp 247 } 248 249 fn create_mcfg_table(pci_segments: &[PciSegment]) -> Sdt { 250 let mut mcfg = Sdt::new(*b"MCFG", 36, 1, *b"CLOUDH", *b"CHMCFG ", 1); 251 252 // MCFG reserved 8 bytes 253 mcfg.append(0u64); 254 255 for segment in pci_segments { 256 // 32-bit PCI enhanced configuration mechanism 257 mcfg.append(PciRangeEntry { 258 base_address: segment.mmio_config_address, 259 segment: segment.id, 260 start: 0, 261 end: 0, 262 ..Default::default() 263 }); 264 } 265 mcfg 266 } 267 268 fn create_tpm2_table() -> Sdt { 269 let mut tpm = Sdt::new(*b"TPM2", 52, 3, *b"CLOUDH", *b"CHTPM2 ", 1); 270 271 tpm.write(36, 0_u16); //Platform Class 272 tpm.write(38, 0_u16); // Reserved Space 273 tpm.write(40, 0xfed4_0040_u64); // Address of Control Area 274 tpm.write(48, 7_u32); //Start Method 275 276 tpm.update_checksum(); 277 tpm 278 } 279 280 fn create_srat_table( 281 numa_nodes: &NumaNodes, 282 #[cfg(target_arch = "x86_64")] topology: Option<(u8, u8, u8)>, 283 ) -> Sdt { 284 let mut srat = Sdt::new(*b"SRAT", 36, 3, *b"CLOUDH", *b"CHSRAT ", 1); 285 // SRAT reserved 12 bytes 286 srat.append_slice(&[0u8; 12]); 287 288 // Check the MemoryAffinity structure is the right size as expected by 289 // the ACPI specification. 290 assert_eq!(std::mem::size_of::<MemoryAffinity>(), 40); 291 292 for (node_id, node) in numa_nodes.iter() { 293 let proximity_domain = *node_id; 294 295 for region in &node.memory_regions { 296 srat.append(MemoryAffinity::from_region( 297 region, 298 proximity_domain, 299 MemAffinityFlags::ENABLE, 300 )) 301 } 302 303 for region in &node.hotplug_regions { 304 srat.append(MemoryAffinity::from_region( 305 region, 306 proximity_domain, 307 MemAffinityFlags::ENABLE | MemAffinityFlags::HOTPLUGGABLE, 308 )) 309 } 310 311 #[cfg(target_arch = "x86_64")] 312 for section in &node.sgx_epc_sections { 313 srat.append(MemoryAffinity::from_range( 314 section.start().raw_value(), 315 section.size(), 316 proximity_domain, 317 MemAffinityFlags::ENABLE, 318 )) 319 } 320 321 for cpu in &node.cpus { 322 #[cfg(target_arch = "x86_64")] 323 let x2apic_id = arch::x86_64::get_x2apic_id(*cpu as u32, topology); 324 #[cfg(target_arch = "aarch64")] 325 let x2apic_id = *cpu as u32; 326 327 // Flags 328 // - Enabled = 1 (bit 0) 329 // - Reserved bits 1-31 330 let flags = 1; 331 332 #[cfg(target_arch = "x86_64")] 333 srat.append(ProcessorLocalX2ApicAffinity { 334 type_: 2, 335 length: 24, 336 proximity_domain, 337 x2apic_id, 338 flags, 339 clock_domain: 0, 340 ..Default::default() 341 }); 342 #[cfg(target_arch = "aarch64")] 343 srat.append(ProcessorGiccAffinity { 344 type_: 3, 345 length: 18, 346 proximity_domain, 347 acpi_processor_uid: x2apic_id, 348 flags, 349 clock_domain: 0, 350 }); 351 } 352 } 353 srat 354 } 355 356 fn create_slit_table(numa_nodes: &NumaNodes) -> Sdt { 357 let mut slit = Sdt::new(*b"SLIT", 36, 1, *b"CLOUDH", *b"CHSLIT ", 1); 358 // Number of System Localities on 8 bytes. 359 slit.append(numa_nodes.len() as u64); 360 361 let existing_nodes: Vec<u32> = numa_nodes.keys().cloned().collect(); 362 for (node_id, node) in numa_nodes.iter() { 363 let distances = &node.distances; 364 for i in existing_nodes.iter() { 365 let dist: u8 = if *node_id == *i { 366 10 367 } else if let Some(distance) = distances.get(i) { 368 *distance 369 } else { 370 20 371 }; 372 373 slit.append(dist); 374 } 375 } 376 slit 377 } 378 379 #[cfg(target_arch = "aarch64")] 380 fn create_gtdt_table() -> Sdt { 381 const ARCH_TIMER_NS_EL2_IRQ: u32 = 10; 382 const ARCH_TIMER_VIRT_IRQ: u32 = 11; 383 const ARCH_TIMER_S_EL1_IRQ: u32 = 13; 384 const ARCH_TIMER_NS_EL1_IRQ: u32 = 14; 385 const ACPI_GTDT_INTERRUPT_MODE_LEVEL: u32 = 0; 386 const ACPI_GTDT_CAP_ALWAYS_ON: u32 = 1 << 2; 387 388 let irqflags: u32 = ACPI_GTDT_INTERRUPT_MODE_LEVEL; 389 // GTDT 390 let mut gtdt = Sdt::new(*b"GTDT", 104, 2, *b"CLOUDH", *b"CHGTDT ", 1); 391 // Secure EL1 Timer GSIV 392 gtdt.write(48, ARCH_TIMER_S_EL1_IRQ + 16); 393 // Secure EL1 Timer Flags 394 gtdt.write(52, irqflags); 395 // Non-Secure EL1 Timer GSIV 396 gtdt.write(56, ARCH_TIMER_NS_EL1_IRQ + 16); 397 // Non-Secure EL1 Timer Flags 398 gtdt.write(60, irqflags | ACPI_GTDT_CAP_ALWAYS_ON); 399 // Virtual EL1 Timer GSIV 400 gtdt.write(64, ARCH_TIMER_VIRT_IRQ + 16); 401 // Virtual EL1 Timer Flags 402 gtdt.write(68, irqflags); 403 // EL2 Timer GSIV 404 gtdt.write(72, ARCH_TIMER_NS_EL2_IRQ + 16); 405 // EL2 Timer Flags 406 gtdt.write(76, irqflags); 407 408 gtdt.update_checksum(); 409 410 gtdt 411 } 412 413 #[cfg(target_arch = "aarch64")] 414 fn create_spcr_table(base_address: u64, gsi: u32) -> Sdt { 415 // SPCR 416 let mut spcr = Sdt::new(*b"SPCR", 80, 2, *b"CLOUDH", *b"CHSPCR ", 1); 417 // Interface Type 418 spcr.write(36, 3u8); 419 // Base Address in format ACPI Generic Address Structure 420 spcr.write(40, GenericAddress::mmio_address::<u8>(base_address)); 421 // Interrupt Type: Bit[3] ARMH GIC interrupt 422 spcr.write(52, (1 << 3) as u8); 423 // Global System Interrupt used by the UART 424 spcr.write(54, gsi.to_le()); 425 // Baud Rate: 3 = 9600 426 spcr.write(58, 3u8); 427 // Stop Bits: 1 Stop bit 428 spcr.write(60, 1u8); 429 // Flow Control: Bit[1] = RTS/CTS hardware flow control 430 spcr.write(61, (1 << 1) as u8); 431 // PCI Device ID: Not a PCI device 432 spcr.write(64, 0xffff_u16); 433 // PCI Vendor ID: Not a PCI device 434 spcr.write(66, 0xffff_u16); 435 436 spcr.update_checksum(); 437 438 spcr 439 } 440 441 #[cfg(target_arch = "aarch64")] 442 fn create_dbg2_table(base_address: u64) -> Sdt { 443 let namespace = "_SB_.COM1"; 444 let debug_device_info_offset = 44usize; 445 let debug_device_info_len: u16 = 22 /* BaseAddressRegisterOffset */ + 446 12 /* BaseAddressRegister */ + 447 4 /* AddressSize */ + 448 namespace.len() as u16 + 1 /* zero-terminated */; 449 let tbl_len: u32 = debug_device_info_offset as u32 + debug_device_info_len as u32; 450 let mut dbg2 = Sdt::new(*b"DBG2", tbl_len, 0, *b"CLOUDH", *b"CHDBG2 ", 1); 451 452 /* OffsetDbgDeviceInfo */ 453 dbg2.write_u32(36, 44); 454 /* NumberDbgDeviceInfo */ 455 dbg2.write_u32(40, 1); 456 457 /* Debug Device Information structure */ 458 /* Offsets are calculated from the start of this structure. */ 459 let namespace_offset = 38u16; 460 let base_address_register_offset = 22u16; 461 let address_size_offset = 34u16; 462 /* Revision */ 463 dbg2.write_u8(debug_device_info_offset, 0); 464 /* Length */ 465 dbg2.write_u16(debug_device_info_offset + 1, debug_device_info_len); 466 /* NumberofGenericAddressRegisters */ 467 dbg2.write_u8(debug_device_info_offset + 3, 1); 468 /* NameSpaceStringLength */ 469 dbg2.write_u16(debug_device_info_offset + 4, namespace.len() as u16 + 1); 470 /* NameSpaceStringOffset */ 471 dbg2.write_u16(debug_device_info_offset + 6, namespace_offset); 472 /* OemDataLength */ 473 dbg2.write_u16(debug_device_info_offset + 8, 0); 474 /* OemDataOffset */ 475 dbg2.write_u16(debug_device_info_offset + 10, 0); 476 /* Port Type */ 477 dbg2.write_u16(debug_device_info_offset + 12, 0x8000); 478 /* Port Subtype */ 479 dbg2.write_u16(debug_device_info_offset + 14, 0x0003); 480 /* Reserved */ 481 dbg2.write_u16(debug_device_info_offset + 16, 0); 482 /* BaseAddressRegisterOffset */ 483 dbg2.write_u16(debug_device_info_offset + 18, base_address_register_offset); 484 /* AddressSizeOffset */ 485 dbg2.write_u16(debug_device_info_offset + 20, address_size_offset); 486 /* BaseAddressRegister */ 487 dbg2.write( 488 debug_device_info_offset + base_address_register_offset as usize, 489 GenericAddress::mmio_address::<u8>(base_address), 490 ); 491 /* AddressSize */ 492 dbg2.write_u32( 493 debug_device_info_offset + address_size_offset as usize, 494 0x1000, 495 ); 496 /* NamespaceString, zero-terminated ASCII */ 497 for (k, c) in namespace.chars().enumerate() { 498 dbg2.write_u8( 499 debug_device_info_offset + namespace_offset as usize + k, 500 c as u8, 501 ); 502 } 503 dbg2.write_u8( 504 debug_device_info_offset + namespace_offset as usize + namespace.len(), 505 0, 506 ); 507 508 dbg2.update_checksum(); 509 510 dbg2 511 } 512 513 #[cfg(target_arch = "aarch64")] 514 fn create_iort_table(pci_segments: &[PciSegment]) -> Sdt { 515 const ACPI_IORT_NODE_ITS_GROUP: u8 = 0x00; 516 const ACPI_IORT_NODE_PCI_ROOT_COMPLEX: u8 = 0x02; 517 const ACPI_IORT_NODE_ROOT_COMPLEX_OFFSET: usize = 72; 518 const ACPI_IORT_NODE_ROOT_COMPLEX_SIZE: usize = 60; 519 520 // The IORT table contains: 521 // - Header (size = 40) 522 // - 1 x ITS Group Node (size = 24) 523 // - N x Root Complex Node (N = number of pci segments, size = 60 x N) 524 let iort_table_size: u32 = (ACPI_IORT_NODE_ROOT_COMPLEX_OFFSET 525 + ACPI_IORT_NODE_ROOT_COMPLEX_SIZE * pci_segments.len()) 526 as u32; 527 let mut iort = Sdt::new(*b"IORT", iort_table_size, 2, *b"CLOUDH", *b"CHIORT ", 1); 528 iort.write(36, ((1 + pci_segments.len()) as u32).to_le()); 529 iort.write(40, (48u32).to_le()); 530 531 // ITS group node 532 iort.write(48, ACPI_IORT_NODE_ITS_GROUP); 533 // Length of the ITS group node in bytes 534 iort.write(49, (24u16).to_le()); 535 // ITS counts 536 iort.write(64, (1u32).to_le()); 537 538 // Root Complex Nodes 539 for (i, segment) in pci_segments.iter().enumerate() { 540 let node_offset: usize = 541 ACPI_IORT_NODE_ROOT_COMPLEX_OFFSET + i * ACPI_IORT_NODE_ROOT_COMPLEX_SIZE; 542 iort.write(node_offset, ACPI_IORT_NODE_PCI_ROOT_COMPLEX); 543 // Length of the root complex node in bytes 544 iort.write( 545 node_offset + 1, 546 (ACPI_IORT_NODE_ROOT_COMPLEX_SIZE as u16).to_le(), 547 ); 548 // Revision 549 iort.write(node_offset + 3, (3u8).to_le()); 550 // Node ID 551 iort.write(node_offset + 4, (segment.id as u32).to_le()); 552 // Mapping counts 553 iort.write(node_offset + 8, (1u32).to_le()); 554 // Offset from the start of the RC node to the start of its Array of ID mappings 555 iort.write(node_offset + 12, (36u32).to_le()); 556 // Fully coherent device 557 iort.write(node_offset + 16, (1u32).to_le()); 558 // CCA = CPM = DCAS = 1 559 iort.write(node_offset + 24, 3u8); 560 // PCI segment number 561 iort.write(node_offset + 28, (segment.id as u32).to_le()); 562 // Memory address size limit 563 iort.write(node_offset + 32, (64u8).to_le()); 564 565 // From offset 32 onward is the space for ID mappings Array. 566 // Now we have only one mapping. 567 let mapping_offset: usize = node_offset + 36; 568 // The lowest value in the input range 569 iort.write(mapping_offset, (0u32).to_le()); 570 // The number of IDs in the range minus one: 571 // This should cover all the devices of a segment: 572 // 1 (bus) x 32 (devices) x 8 (functions) = 256 573 // Note: Currently only 1 bus is supported in a segment. 574 iort.write(mapping_offset + 4, (255_u32).to_le()); 575 // The lowest value in the output range 576 iort.write(mapping_offset + 8, ((256 * segment.id) as u32).to_le()); 577 // id_mapping_array_output_reference should be 578 // the ITS group node (the first node) if no SMMU 579 iort.write(mapping_offset + 12, (48u32).to_le()); 580 // Flags 581 iort.write(mapping_offset + 16, (0u32).to_le()); 582 } 583 584 iort.update_checksum(); 585 586 iort 587 } 588 589 fn create_viot_table(iommu_bdf: &PciBdf, devices_bdf: &[PciBdf]) -> Sdt { 590 // VIOT 591 let mut viot = Sdt::new(*b"VIOT", 36, 0, *b"CLOUDH", *b"CHVIOT ", 0); 592 // Node count 593 viot.append((devices_bdf.len() + 1) as u16); 594 // Node offset 595 viot.append(48u16); 596 // VIOT reserved 8 bytes 597 viot.append_slice(&[0u8; 8]); 598 599 // Virtio-iommu based on virtio-pci node 600 viot.append(ViotVirtioPciNode { 601 type_: 3, 602 length: 16, 603 pci_segment: iommu_bdf.segment(), 604 pci_bdf_number: iommu_bdf.into(), 605 ..Default::default() 606 }); 607 608 for device_bdf in devices_bdf { 609 viot.append(ViotPciRangeNode { 610 type_: 1, 611 length: 24, 612 endpoint_start: device_bdf.into(), 613 pci_segment_start: device_bdf.segment(), 614 pci_segment_end: device_bdf.segment(), 615 pci_bdf_start: device_bdf.into(), 616 pci_bdf_end: device_bdf.into(), 617 output_node: 48, 618 ..Default::default() 619 }); 620 } 621 622 viot 623 } 624 625 pub fn create_acpi_tables( 626 guest_mem: &GuestMemoryMmap, 627 device_manager: &Arc<Mutex<DeviceManager>>, 628 cpu_manager: &Arc<Mutex<CpuManager>>, 629 memory_manager: &Arc<Mutex<MemoryManager>>, 630 numa_nodes: &NumaNodes, 631 tpm_enabled: bool, 632 ) -> GuestAddress { 633 trace_scoped!("create_acpi_tables"); 634 635 let start_time = Instant::now(); 636 let rsdp_offset = arch::layout::RSDP_POINTER; 637 let mut tables: Vec<u64> = Vec::new(); 638 639 // DSDT 640 let dsdt = create_dsdt_table(device_manager, cpu_manager, memory_manager); 641 let dsdt_offset = rsdp_offset.checked_add(Rsdp::len() as u64).unwrap(); 642 guest_mem 643 .write_slice(dsdt.as_slice(), dsdt_offset) 644 .expect("Error writing DSDT table"); 645 646 // FACP aka FADT 647 let facp = create_facp_table(dsdt_offset, device_manager); 648 let facp_offset = dsdt_offset.checked_add(dsdt.len() as u64).unwrap(); 649 guest_mem 650 .write_slice(facp.as_slice(), facp_offset) 651 .expect("Error writing FACP table"); 652 tables.push(facp_offset.0); 653 654 // MADT 655 let madt = cpu_manager.lock().unwrap().create_madt(); 656 let madt_offset = facp_offset.checked_add(facp.len() as u64).unwrap(); 657 guest_mem 658 .write_slice(madt.as_slice(), madt_offset) 659 .expect("Error writing MADT table"); 660 tables.push(madt_offset.0); 661 let mut prev_tbl_len = madt.len() as u64; 662 let mut prev_tbl_off = madt_offset; 663 664 // PPTT 665 #[cfg(target_arch = "aarch64")] 666 { 667 let pptt = cpu_manager.lock().unwrap().create_pptt(); 668 let pptt_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); 669 guest_mem 670 .write_slice(pptt.as_slice(), pptt_offset) 671 .expect("Error writing PPTT table"); 672 tables.push(pptt_offset.0); 673 prev_tbl_len = pptt.len() as u64; 674 prev_tbl_off = pptt_offset; 675 } 676 677 // GTDT 678 #[cfg(target_arch = "aarch64")] 679 { 680 let gtdt = create_gtdt_table(); 681 let gtdt_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); 682 guest_mem 683 .write_slice(gtdt.as_slice(), gtdt_offset) 684 .expect("Error writing GTDT table"); 685 tables.push(gtdt_offset.0); 686 prev_tbl_len = gtdt.len() as u64; 687 prev_tbl_off = gtdt_offset; 688 } 689 690 // MCFG 691 let mcfg = create_mcfg_table(device_manager.lock().unwrap().pci_segments()); 692 let mcfg_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); 693 guest_mem 694 .write_slice(mcfg.as_slice(), mcfg_offset) 695 .expect("Error writing MCFG table"); 696 tables.push(mcfg_offset.0); 697 prev_tbl_len = mcfg.len() as u64; 698 prev_tbl_off = mcfg_offset; 699 700 // SPCR and DBG2 701 #[cfg(target_arch = "aarch64")] 702 { 703 let is_serial_on = device_manager 704 .lock() 705 .unwrap() 706 .get_device_info() 707 .clone() 708 .get(&(DeviceType::Serial, DeviceType::Serial.to_string())) 709 .is_some(); 710 let serial_device_addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START.raw_value(); 711 let serial_device_irq = if is_serial_on { 712 device_manager 713 .lock() 714 .unwrap() 715 .get_device_info() 716 .clone() 717 .get(&(DeviceType::Serial, DeviceType::Serial.to_string())) 718 .unwrap() 719 .irq() 720 } else { 721 // If serial is turned off, add a fake device with invalid irq. 722 31 723 }; 724 725 // SPCR 726 let spcr = create_spcr_table(serial_device_addr, serial_device_irq); 727 let spcr_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); 728 guest_mem 729 .write_slice(spcr.as_slice(), spcr_offset) 730 .expect("Error writing SPCR table"); 731 tables.push(spcr_offset.0); 732 prev_tbl_len = spcr.len() as u64; 733 prev_tbl_off = spcr_offset; 734 735 // DBG2 736 let dbg2 = create_dbg2_table(serial_device_addr); 737 let dbg2_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); 738 guest_mem 739 .write_slice(dbg2.as_slice(), dbg2_offset) 740 .expect("Error writing DBG2 table"); 741 tables.push(dbg2_offset.0); 742 prev_tbl_len = dbg2.len() as u64; 743 prev_tbl_off = dbg2_offset; 744 } 745 746 if tpm_enabled { 747 // TPM2 Table 748 let tpm2 = create_tpm2_table(); 749 let tpm2_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); 750 guest_mem 751 .write_slice(tpm2.as_slice(), tpm2_offset) 752 .expect("Error writing TPM2 table"); 753 tables.push(tpm2_offset.0); 754 755 prev_tbl_len = tpm2.len() as u64; 756 prev_tbl_off = tpm2_offset; 757 } 758 // SRAT and SLIT 759 // Only created if the NUMA nodes list is not empty. 760 if !numa_nodes.is_empty() { 761 #[cfg(target_arch = "x86_64")] 762 let topology = cpu_manager.lock().unwrap().get_vcpu_topology(); 763 // SRAT 764 let srat = create_srat_table( 765 numa_nodes, 766 #[cfg(target_arch = "x86_64")] 767 topology, 768 ); 769 let srat_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); 770 guest_mem 771 .write_slice(srat.as_slice(), srat_offset) 772 .expect("Error writing SRAT table"); 773 tables.push(srat_offset.0); 774 775 // SLIT 776 let slit = create_slit_table(numa_nodes); 777 let slit_offset = srat_offset.checked_add(srat.len() as u64).unwrap(); 778 guest_mem 779 .write_slice(slit.as_slice(), slit_offset) 780 .expect("Error writing SRAT table"); 781 tables.push(slit_offset.0); 782 783 prev_tbl_len = slit.len() as u64; 784 prev_tbl_off = slit_offset; 785 }; 786 787 #[cfg(target_arch = "aarch64")] 788 { 789 let iort = create_iort_table(device_manager.lock().unwrap().pci_segments()); 790 let iort_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); 791 guest_mem 792 .write_slice(iort.as_slice(), iort_offset) 793 .expect("Error writing IORT table"); 794 tables.push(iort_offset.0); 795 prev_tbl_len = iort.len() as u64; 796 prev_tbl_off = iort_offset; 797 } 798 799 // VIOT 800 if let Some((iommu_bdf, devices_bdf)) = device_manager.lock().unwrap().iommu_attached_devices() 801 { 802 let viot = create_viot_table(iommu_bdf, devices_bdf); 803 804 let viot_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); 805 guest_mem 806 .write_slice(viot.as_slice(), viot_offset) 807 .expect("Error writing VIOT table"); 808 tables.push(viot_offset.0); 809 prev_tbl_len = viot.len() as u64; 810 prev_tbl_off = viot_offset; 811 } 812 813 // XSDT 814 let mut xsdt = Sdt::new(*b"XSDT", 36, 1, *b"CLOUDH", *b"CHXSDT ", 1); 815 for table in tables { 816 xsdt.append(table); 817 } 818 xsdt.update_checksum(); 819 let xsdt_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); 820 guest_mem 821 .write_slice(xsdt.as_slice(), xsdt_offset) 822 .expect("Error writing XSDT table"); 823 824 // RSDP 825 let rsdp = Rsdp::new(*b"CLOUDH", xsdt_offset.0); 826 guest_mem 827 .write_slice(rsdp.as_bytes(), rsdp_offset) 828 .expect("Error writing RSDP"); 829 830 info!( 831 "Generated ACPI tables: took {}µs size = {}", 832 Instant::now().duration_since(start_time).as_micros(), 833 xsdt_offset.0 + xsdt.len() as u64 - rsdp_offset.0 834 ); 835 rsdp_offset 836 } 837 838 #[cfg(feature = "tdx")] 839 pub fn create_acpi_tables_tdx( 840 device_manager: &Arc<Mutex<DeviceManager>>, 841 cpu_manager: &Arc<Mutex<CpuManager>>, 842 memory_manager: &Arc<Mutex<MemoryManager>>, 843 numa_nodes: &NumaNodes, 844 ) -> Vec<Sdt> { 845 // DSDT 846 let mut tables = vec![create_dsdt_table( 847 device_manager, 848 cpu_manager, 849 memory_manager, 850 )]; 851 852 // FACP aka FADT 853 tables.push(create_facp_table(GuestAddress(0), device_manager)); 854 855 // MADT 856 tables.push(cpu_manager.lock().unwrap().create_madt()); 857 858 // MCFG 859 tables.push(create_mcfg_table( 860 device_manager.lock().unwrap().pci_segments(), 861 )); 862 863 // SRAT and SLIT 864 // Only created if the NUMA nodes list is not empty. 865 if !numa_nodes.is_empty() { 866 #[cfg(target_arch = "x86_64")] 867 let topology = cpu_manager.lock().unwrap().get_vcpu_topology(); 868 869 // SRAT 870 tables.push(create_srat_table( 871 numa_nodes, 872 #[cfg(target_arch = "x86_64")] 873 topology, 874 )); 875 876 // SLIT 877 tables.push(create_slit_table(numa_nodes)); 878 }; 879 880 // VIOT 881 if let Some((iommu_bdf, devices_bdf)) = device_manager.lock().unwrap().iommu_attached_devices() 882 { 883 tables.push(create_viot_table(iommu_bdf, devices_bdf)); 884 } 885 886 tables 887 } 888