1 // Copyright © 2019 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 4 // 5 use std::sync::{Arc, Mutex}; 6 use std::time::Instant; 7 8 #[cfg(target_arch = "aarch64")] 9 use acpi_tables::sdt::GenericAddress; 10 use acpi_tables::{rsdp::Rsdp, sdt::Sdt, Aml}; 11 #[cfg(target_arch = "aarch64")] 12 use arch::aarch64::DeviceInfoForFdt; 13 #[cfg(target_arch = "aarch64")] 14 use arch::DeviceType; 15 use arch::NumaNodes; 16 use bitflags::bitflags; 17 use pci::PciBdf; 18 use tracer::trace_scoped; 19 use vm_memory::{Address, Bytes, GuestAddress, GuestMemoryRegion}; 20 use zerocopy::AsBytes; 21 22 use crate::cpu::CpuManager; 23 use crate::device_manager::DeviceManager; 24 use crate::memory_manager::MemoryManager; 25 use crate::pci_segment::PciSegment; 26 use crate::{GuestMemoryMmap, GuestRegionMmap}; 27 28 /* Values for Type in APIC sub-headers */ 29 #[cfg(target_arch = "x86_64")] 30 pub const ACPI_X2APIC_PROCESSOR: u8 = 9; 31 #[cfg(target_arch = "x86_64")] 32 pub const ACPI_APIC_IO: u8 = 1; 33 #[cfg(target_arch = "x86_64")] 34 pub const ACPI_APIC_XRUPT_OVERRIDE: u8 = 2; 35 #[cfg(target_arch = "aarch64")] 36 pub const ACPI_APIC_GENERIC_CPU_INTERFACE: u8 = 11; 37 #[cfg(target_arch = "aarch64")] 38 pub const ACPI_APIC_GENERIC_DISTRIBUTOR: u8 = 12; 39 #[cfg(target_arch = "aarch64")] 40 pub const ACPI_APIC_GENERIC_REDISTRIBUTOR: u8 = 14; 41 #[cfg(target_arch = "aarch64")] 42 pub const ACPI_APIC_GENERIC_TRANSLATOR: u8 = 15; 43 44 #[allow(dead_code)] 45 #[repr(packed)] 46 #[derive(Default, AsBytes)] 47 struct PciRangeEntry { 48 pub base_address: u64, 49 pub segment: u16, 50 pub start: u8, 51 pub end: u8, 52 _reserved: u32, 53 } 54 55 #[allow(dead_code)] 56 #[repr(packed)] 57 #[derive(Default, AsBytes)] 58 struct MemoryAffinity { 59 pub type_: u8, 60 pub length: u8, 61 pub proximity_domain: u32, 62 _reserved1: u16, 63 pub base_addr_lo: u32, 64 pub base_addr_hi: u32, 65 pub length_lo: u32, 66 pub length_hi: u32, 67 _reserved2: u32, 68 pub flags: u32, 69 _reserved3: u64, 70 } 71 72 #[allow(dead_code)] 73 #[repr(packed)] 74 #[derive(Default, AsBytes)] 75 struct ProcessorLocalX2ApicAffinity { 76 pub type_: u8, 77 pub length: u8, 78 _reserved1: u16, 79 pub proximity_domain: u32, 80 pub x2apic_id: u32, 81 pub flags: u32, 82 pub clock_domain: u32, 83 _reserved2: u32, 84 } 85 86 #[allow(dead_code)] 87 #[repr(packed)] 88 #[derive(Default, AsBytes)] 89 struct ProcessorGiccAffinity { 90 pub type_: u8, 91 pub length: u8, 92 pub proximity_domain: u32, 93 pub acpi_processor_uid: u32, 94 pub flags: u32, 95 pub clock_domain: u32, 96 } 97 98 bitflags! { 99 pub struct MemAffinityFlags: u32 { 100 const NOFLAGS = 0; 101 const ENABLE = 0b1; 102 const HOTPLUGGABLE = 0b10; 103 const NON_VOLATILE = 0b100; 104 } 105 } 106 107 impl MemoryAffinity { 108 fn from_region( 109 region: &Arc<GuestRegionMmap>, 110 proximity_domain: u32, 111 flags: MemAffinityFlags, 112 ) -> Self { 113 Self::from_range( 114 region.start_addr().raw_value(), 115 region.len(), 116 proximity_domain, 117 flags, 118 ) 119 } 120 121 fn from_range( 122 base_addr: u64, 123 size: u64, 124 proximity_domain: u32, 125 flags: MemAffinityFlags, 126 ) -> Self { 127 let base_addr_lo = (base_addr & 0xffff_ffff) as u32; 128 let base_addr_hi = (base_addr >> 32) as u32; 129 let length_lo = (size & 0xffff_ffff) as u32; 130 let length_hi = (size >> 32) as u32; 131 132 MemoryAffinity { 133 type_: 1, 134 length: 40, 135 proximity_domain, 136 base_addr_lo, 137 base_addr_hi, 138 length_lo, 139 length_hi, 140 flags: flags.bits(), 141 ..Default::default() 142 } 143 } 144 } 145 146 #[allow(dead_code)] 147 #[repr(packed)] 148 #[derive(Default, AsBytes)] 149 struct ViotVirtioPciNode { 150 pub type_: u8, 151 _reserved: u8, 152 pub length: u16, 153 pub pci_segment: u16, 154 pub pci_bdf_number: u16, 155 _reserved2: [u8; 8], 156 } 157 158 #[allow(dead_code)] 159 #[repr(packed)] 160 #[derive(Default, AsBytes)] 161 struct ViotPciRangeNode { 162 pub type_: u8, 163 _reserved: u8, 164 pub length: u16, 165 pub endpoint_start: u32, 166 pub pci_segment_start: u16, 167 pub pci_segment_end: u16, 168 pub pci_bdf_start: u16, 169 pub pci_bdf_end: u16, 170 pub output_node: u16, 171 _reserved2: [u8; 6], 172 } 173 174 pub fn create_dsdt_table( 175 device_manager: &Arc<Mutex<DeviceManager>>, 176 cpu_manager: &Arc<Mutex<CpuManager>>, 177 memory_manager: &Arc<Mutex<MemoryManager>>, 178 ) -> Sdt { 179 trace_scoped!("create_dsdt_table"); 180 // DSDT 181 let mut dsdt = Sdt::new(*b"DSDT", 36, 6, *b"CLOUDH", *b"CHDSDT ", 1); 182 183 let mut bytes = Vec::new(); 184 185 device_manager.lock().unwrap().to_aml_bytes(&mut bytes); 186 cpu_manager.lock().unwrap().to_aml_bytes(&mut bytes); 187 memory_manager.lock().unwrap().to_aml_bytes(&mut bytes); 188 dsdt.append_slice(&bytes); 189 190 dsdt 191 } 192 193 fn create_facp_table(dsdt_offset: GuestAddress, device_manager: &Arc<Mutex<DeviceManager>>) -> Sdt { 194 trace_scoped!("create_facp_table"); 195 196 // Revision 6 of the ACPI FADT table is 276 bytes long 197 let mut facp = Sdt::new(*b"FACP", 276, 6, *b"CLOUDH", *b"CHFACP ", 1); 198 199 { 200 let device_manager = device_manager.lock().unwrap(); 201 if let Some(address) = device_manager.acpi_platform_addresses().reset_reg_address { 202 // RESET_REG 203 facp.write(116, address); 204 // RESET_VALUE 205 facp.write(128, 1u8); 206 } 207 208 if let Some(address) = device_manager 209 .acpi_platform_addresses() 210 .sleep_control_reg_address 211 { 212 // SLEEP_CONTROL_REG 213 facp.write(244, address); 214 } 215 216 if let Some(address) = device_manager 217 .acpi_platform_addresses() 218 .sleep_status_reg_address 219 { 220 // SLEEP_STATUS_REG 221 facp.write(256, address); 222 } 223 224 if let Some(address) = device_manager.acpi_platform_addresses().pm_timer_address { 225 // X_PM_TMR_BLK 226 facp.write(208, address); 227 } 228 } 229 230 // aarch64 specific fields 231 #[cfg(target_arch = "aarch64")] 232 // ARM_BOOT_ARCH: enable PSCI with HVC enable-method 233 facp.write(129, 3u16); 234 235 // Architecture common fields 236 // HW_REDUCED_ACPI, RESET_REG_SUP, TMR_VAL_EXT 237 let fadt_flags: u32 = 1 << 20 | 1 << 10 | 1 << 8; 238 facp.write(112, fadt_flags); 239 // FADT minor version 240 facp.write(131, 3u8); 241 // X_DSDT 242 facp.write(140, dsdt_offset.0); 243 // Hypervisor Vendor Identity 244 facp.write_bytes(268, b"CLOUDHYP"); 245 246 facp.update_checksum(); 247 248 facp 249 } 250 251 fn create_mcfg_table(pci_segments: &[PciSegment]) -> Sdt { 252 let mut mcfg = Sdt::new(*b"MCFG", 36, 1, *b"CLOUDH", *b"CHMCFG ", 1); 253 254 // MCFG reserved 8 bytes 255 mcfg.append(0u64); 256 257 for segment in pci_segments { 258 // 32-bit PCI enhanced configuration mechanism 259 mcfg.append(PciRangeEntry { 260 base_address: segment.mmio_config_address, 261 segment: segment.id, 262 start: 0, 263 end: 0, 264 ..Default::default() 265 }); 266 } 267 mcfg 268 } 269 270 fn create_tpm2_table() -> Sdt { 271 let mut tpm = Sdt::new(*b"TPM2", 52, 3, *b"CLOUDH", *b"CHTPM2 ", 1); 272 273 tpm.write(36, 0_u16); //Platform Class 274 tpm.write(38, 0_u16); // Reserved Space 275 tpm.write(40, 0xfed4_0040_u64); // Address of Control Area 276 tpm.write(48, 7_u32); //Start Method 277 278 tpm.update_checksum(); 279 tpm 280 } 281 282 fn create_srat_table( 283 numa_nodes: &NumaNodes, 284 #[cfg(target_arch = "x86_64")] topology: Option<(u8, u8, u8)>, 285 ) -> Sdt { 286 let mut srat = Sdt::new(*b"SRAT", 36, 3, *b"CLOUDH", *b"CHSRAT ", 1); 287 // SRAT reserved 12 bytes 288 srat.append_slice(&[0u8; 12]); 289 290 // Check the MemoryAffinity structure is the right size as expected by 291 // the ACPI specification. 292 assert_eq!(std::mem::size_of::<MemoryAffinity>(), 40); 293 294 for (node_id, node) in numa_nodes.iter() { 295 let proximity_domain = *node_id; 296 297 for region in &node.memory_regions { 298 srat.append(MemoryAffinity::from_region( 299 region, 300 proximity_domain, 301 MemAffinityFlags::ENABLE, 302 )) 303 } 304 305 for region in &node.hotplug_regions { 306 srat.append(MemoryAffinity::from_region( 307 region, 308 proximity_domain, 309 MemAffinityFlags::ENABLE | MemAffinityFlags::HOTPLUGGABLE, 310 )) 311 } 312 313 #[cfg(target_arch = "x86_64")] 314 for section in &node.sgx_epc_sections { 315 srat.append(MemoryAffinity::from_range( 316 section.start().raw_value(), 317 section.size(), 318 proximity_domain, 319 MemAffinityFlags::ENABLE, 320 )) 321 } 322 323 for cpu in &node.cpus { 324 #[cfg(target_arch = "x86_64")] 325 let x2apic_id = arch::x86_64::get_x2apic_id(*cpu as u32, topology); 326 #[cfg(target_arch = "aarch64")] 327 let x2apic_id = *cpu as u32; 328 329 // Flags 330 // - Enabled = 1 (bit 0) 331 // - Reserved bits 1-31 332 let flags = 1; 333 334 #[cfg(target_arch = "x86_64")] 335 srat.append(ProcessorLocalX2ApicAffinity { 336 type_: 2, 337 length: 24, 338 proximity_domain, 339 x2apic_id, 340 flags, 341 clock_domain: 0, 342 ..Default::default() 343 }); 344 #[cfg(target_arch = "aarch64")] 345 srat.append(ProcessorGiccAffinity { 346 type_: 3, 347 length: 18, 348 proximity_domain, 349 acpi_processor_uid: x2apic_id, 350 flags, 351 clock_domain: 0, 352 }); 353 } 354 } 355 srat 356 } 357 358 fn create_slit_table(numa_nodes: &NumaNodes) -> Sdt { 359 let mut slit = Sdt::new(*b"SLIT", 36, 1, *b"CLOUDH", *b"CHSLIT ", 1); 360 // Number of System Localities on 8 bytes. 361 slit.append(numa_nodes.len() as u64); 362 363 let existing_nodes: Vec<u32> = numa_nodes.keys().cloned().collect(); 364 for (node_id, node) in numa_nodes.iter() { 365 let distances = &node.distances; 366 for i in existing_nodes.iter() { 367 let dist: u8 = if *node_id == *i { 368 10 369 } else if let Some(distance) = distances.get(i) { 370 *distance 371 } else { 372 20 373 }; 374 375 slit.append(dist); 376 } 377 } 378 slit 379 } 380 381 #[cfg(target_arch = "aarch64")] 382 fn create_gtdt_table() -> Sdt { 383 const ARCH_TIMER_NS_EL2_IRQ: u32 = 10; 384 const ARCH_TIMER_VIRT_IRQ: u32 = 11; 385 const ARCH_TIMER_S_EL1_IRQ: u32 = 13; 386 const ARCH_TIMER_NS_EL1_IRQ: u32 = 14; 387 const ACPI_GTDT_INTERRUPT_MODE_LEVEL: u32 = 0; 388 const ACPI_GTDT_CAP_ALWAYS_ON: u32 = 1 << 2; 389 390 let irqflags: u32 = ACPI_GTDT_INTERRUPT_MODE_LEVEL; 391 // GTDT 392 let mut gtdt = Sdt::new(*b"GTDT", 104, 2, *b"CLOUDH", *b"CHGTDT ", 1); 393 // Secure EL1 Timer GSIV 394 gtdt.write(48, ARCH_TIMER_S_EL1_IRQ + 16); 395 // Secure EL1 Timer Flags 396 gtdt.write(52, irqflags); 397 // Non-Secure EL1 Timer GSIV 398 gtdt.write(56, ARCH_TIMER_NS_EL1_IRQ + 16); 399 // Non-Secure EL1 Timer Flags 400 gtdt.write(60, irqflags | ACPI_GTDT_CAP_ALWAYS_ON); 401 // Virtual EL1 Timer GSIV 402 gtdt.write(64, ARCH_TIMER_VIRT_IRQ + 16); 403 // Virtual EL1 Timer Flags 404 gtdt.write(68, irqflags); 405 // EL2 Timer GSIV 406 gtdt.write(72, ARCH_TIMER_NS_EL2_IRQ + 16); 407 // EL2 Timer Flags 408 gtdt.write(76, irqflags); 409 410 gtdt.update_checksum(); 411 412 gtdt 413 } 414 415 #[cfg(target_arch = "aarch64")] 416 fn create_spcr_table(base_address: u64, gsi: u32) -> Sdt { 417 // SPCR 418 let mut spcr = Sdt::new(*b"SPCR", 80, 2, *b"CLOUDH", *b"CHSPCR ", 1); 419 // Interface Type 420 spcr.write(36, 3u8); 421 // Base Address in format ACPI Generic Address Structure 422 spcr.write(40, GenericAddress::mmio_address::<u8>(base_address)); 423 // Interrupt Type: Bit[3] ARMH GIC interrupt 424 spcr.write(52, (1 << 3) as u8); 425 // Global System Interrupt used by the UART 426 spcr.write(54, gsi.to_le()); 427 // Baud Rate: 3 = 9600 428 spcr.write(58, 3u8); 429 // Stop Bits: 1 Stop bit 430 spcr.write(60, 1u8); 431 // Flow Control: Bit[1] = RTS/CTS hardware flow control 432 spcr.write(61, (1 << 1) as u8); 433 // PCI Device ID: Not a PCI device 434 spcr.write(64, 0xffff_u16); 435 // PCI Vendor ID: Not a PCI device 436 spcr.write(66, 0xffff_u16); 437 438 spcr.update_checksum(); 439 440 spcr 441 } 442 443 #[cfg(target_arch = "aarch64")] 444 fn create_dbg2_table(base_address: u64) -> Sdt { 445 let namespace = "_SB_.COM1"; 446 let debug_device_info_offset = 44usize; 447 let debug_device_info_len: u16 = 22 /* BaseAddressRegisterOffset */ + 448 12 /* BaseAddressRegister */ + 449 4 /* AddressSize */ + 450 namespace.len() as u16 + 1 /* zero-terminated */; 451 let tbl_len: u32 = debug_device_info_offset as u32 + debug_device_info_len as u32; 452 let mut dbg2 = Sdt::new(*b"DBG2", tbl_len, 0, *b"CLOUDH", *b"CHDBG2 ", 1); 453 454 /* OffsetDbgDeviceInfo */ 455 dbg2.write_u32(36, 44); 456 /* NumberDbgDeviceInfo */ 457 dbg2.write_u32(40, 1); 458 459 /* Debug Device Information structure */ 460 /* Offsets are calculated from the start of this structure. */ 461 let namespace_offset = 38u16; 462 let base_address_register_offset = 22u16; 463 let address_size_offset = 34u16; 464 /* Revision */ 465 dbg2.write_u8(debug_device_info_offset, 0); 466 /* Length */ 467 dbg2.write_u16(debug_device_info_offset + 1, debug_device_info_len); 468 /* NumberofGenericAddressRegisters */ 469 dbg2.write_u8(debug_device_info_offset + 3, 1); 470 /* NameSpaceStringLength */ 471 dbg2.write_u16(debug_device_info_offset + 4, namespace.len() as u16 + 1); 472 /* NameSpaceStringOffset */ 473 dbg2.write_u16(debug_device_info_offset + 6, namespace_offset); 474 /* OemDataLength */ 475 dbg2.write_u16(debug_device_info_offset + 8, 0); 476 /* OemDataOffset */ 477 dbg2.write_u16(debug_device_info_offset + 10, 0); 478 /* Port Type */ 479 dbg2.write_u16(debug_device_info_offset + 12, 0x8000); 480 /* Port Subtype */ 481 dbg2.write_u16(debug_device_info_offset + 14, 0x0003); 482 /* Reserved */ 483 dbg2.write_u16(debug_device_info_offset + 16, 0); 484 /* BaseAddressRegisterOffset */ 485 dbg2.write_u16(debug_device_info_offset + 18, base_address_register_offset); 486 /* AddressSizeOffset */ 487 dbg2.write_u16(debug_device_info_offset + 20, address_size_offset); 488 /* BaseAddressRegister */ 489 dbg2.write( 490 debug_device_info_offset + base_address_register_offset as usize, 491 GenericAddress::mmio_address::<u8>(base_address), 492 ); 493 /* AddressSize */ 494 dbg2.write_u32( 495 debug_device_info_offset + address_size_offset as usize, 496 0x1000, 497 ); 498 /* NamespaceString, zero-terminated ASCII */ 499 for (k, c) in namespace.chars().enumerate() { 500 dbg2.write_u8( 501 debug_device_info_offset + namespace_offset as usize + k, 502 c as u8, 503 ); 504 } 505 dbg2.write_u8( 506 debug_device_info_offset + namespace_offset as usize + namespace.len(), 507 0, 508 ); 509 510 dbg2.update_checksum(); 511 512 dbg2 513 } 514 515 #[cfg(target_arch = "aarch64")] 516 fn create_iort_table(pci_segments: &[PciSegment]) -> Sdt { 517 const ACPI_IORT_NODE_ITS_GROUP: u8 = 0x00; 518 const ACPI_IORT_NODE_PCI_ROOT_COMPLEX: u8 = 0x02; 519 const ACPI_IORT_NODE_ROOT_COMPLEX_OFFSET: usize = 72; 520 const ACPI_IORT_NODE_ROOT_COMPLEX_SIZE: usize = 60; 521 522 // The IORT table contains: 523 // - Header (size = 40) 524 // - 1 x ITS Group Node (size = 24) 525 // - N x Root Complex Node (N = number of pci segments, size = 60 x N) 526 let iort_table_size: u32 = (ACPI_IORT_NODE_ROOT_COMPLEX_OFFSET 527 + ACPI_IORT_NODE_ROOT_COMPLEX_SIZE * pci_segments.len()) 528 as u32; 529 let mut iort = Sdt::new(*b"IORT", iort_table_size, 2, *b"CLOUDH", *b"CHIORT ", 1); 530 iort.write(36, ((1 + pci_segments.len()) as u32).to_le()); 531 iort.write(40, (48u32).to_le()); 532 533 // ITS group node 534 iort.write(48, ACPI_IORT_NODE_ITS_GROUP); 535 // Length of the ITS group node in bytes 536 iort.write(49, (24u16).to_le()); 537 // ITS counts 538 iort.write(64, (1u32).to_le()); 539 540 // Root Complex Nodes 541 for (i, segment) in pci_segments.iter().enumerate() { 542 let node_offset: usize = 543 ACPI_IORT_NODE_ROOT_COMPLEX_OFFSET + i * ACPI_IORT_NODE_ROOT_COMPLEX_SIZE; 544 iort.write(node_offset, ACPI_IORT_NODE_PCI_ROOT_COMPLEX); 545 // Length of the root complex node in bytes 546 iort.write( 547 node_offset + 1, 548 (ACPI_IORT_NODE_ROOT_COMPLEX_SIZE as u16).to_le(), 549 ); 550 // Revision 551 iort.write(node_offset + 3, (3u8).to_le()); 552 // Node ID 553 iort.write(node_offset + 4, (segment.id as u32).to_le()); 554 // Mapping counts 555 iort.write(node_offset + 8, (1u32).to_le()); 556 // Offset from the start of the RC node to the start of its Array of ID mappings 557 iort.write(node_offset + 12, (36u32).to_le()); 558 // Fully coherent device 559 iort.write(node_offset + 16, (1u32).to_le()); 560 // CCA = CPM = DCAS = 1 561 iort.write(node_offset + 24, 3u8); 562 // PCI segment number 563 iort.write(node_offset + 28, (segment.id as u32).to_le()); 564 // Memory address size limit 565 iort.write(node_offset + 32, (64u8).to_le()); 566 567 // From offset 32 onward is the space for ID mappings Array. 568 // Now we have only one mapping. 569 let mapping_offset: usize = node_offset + 36; 570 // The lowest value in the input range 571 iort.write(mapping_offset, (0u32).to_le()); 572 // The number of IDs in the range minus one: 573 // This should cover all the devices of a segment: 574 // 1 (bus) x 32 (devices) x 8 (functions) = 256 575 // Note: Currently only 1 bus is supported in a segment. 576 iort.write(mapping_offset + 4, (255_u32).to_le()); 577 // The lowest value in the output range 578 iort.write(mapping_offset + 8, ((256 * segment.id) as u32).to_le()); 579 // id_mapping_array_output_reference should be 580 // the ITS group node (the first node) if no SMMU 581 iort.write(mapping_offset + 12, (48u32).to_le()); 582 // Flags 583 iort.write(mapping_offset + 16, (0u32).to_le()); 584 } 585 586 iort.update_checksum(); 587 588 iort 589 } 590 591 fn create_viot_table(iommu_bdf: &PciBdf, devices_bdf: &[PciBdf]) -> Sdt { 592 // VIOT 593 let mut viot = Sdt::new(*b"VIOT", 36, 0, *b"CLOUDH", *b"CHVIOT ", 0); 594 // Node count 595 viot.append((devices_bdf.len() + 1) as u16); 596 // Node offset 597 viot.append(48u16); 598 // VIOT reserved 8 bytes 599 viot.append_slice(&[0u8; 8]); 600 601 // Virtio-iommu based on virtio-pci node 602 viot.append(ViotVirtioPciNode { 603 type_: 3, 604 length: 16, 605 pci_segment: iommu_bdf.segment(), 606 pci_bdf_number: iommu_bdf.into(), 607 ..Default::default() 608 }); 609 610 for device_bdf in devices_bdf { 611 viot.append(ViotPciRangeNode { 612 type_: 1, 613 length: 24, 614 endpoint_start: device_bdf.into(), 615 pci_segment_start: device_bdf.segment(), 616 pci_segment_end: device_bdf.segment(), 617 pci_bdf_start: device_bdf.into(), 618 pci_bdf_end: device_bdf.into(), 619 output_node: 48, 620 ..Default::default() 621 }); 622 } 623 624 viot 625 } 626 627 pub fn create_acpi_tables( 628 guest_mem: &GuestMemoryMmap, 629 device_manager: &Arc<Mutex<DeviceManager>>, 630 cpu_manager: &Arc<Mutex<CpuManager>>, 631 memory_manager: &Arc<Mutex<MemoryManager>>, 632 numa_nodes: &NumaNodes, 633 tpm_enabled: bool, 634 ) -> GuestAddress { 635 trace_scoped!("create_acpi_tables"); 636 637 let start_time = Instant::now(); 638 let rsdp_offset = arch::layout::RSDP_POINTER; 639 let mut tables: Vec<u64> = Vec::new(); 640 641 // DSDT 642 let dsdt = create_dsdt_table(device_manager, cpu_manager, memory_manager); 643 let dsdt_offset = rsdp_offset.checked_add(Rsdp::len() as u64).unwrap(); 644 guest_mem 645 .write_slice(dsdt.as_slice(), dsdt_offset) 646 .expect("Error writing DSDT table"); 647 648 // FACP aka FADT 649 let facp = create_facp_table(dsdt_offset, device_manager); 650 let facp_offset = dsdt_offset.checked_add(dsdt.len() as u64).unwrap(); 651 guest_mem 652 .write_slice(facp.as_slice(), facp_offset) 653 .expect("Error writing FACP table"); 654 tables.push(facp_offset.0); 655 656 // MADT 657 let madt = cpu_manager.lock().unwrap().create_madt(); 658 let madt_offset = facp_offset.checked_add(facp.len() as u64).unwrap(); 659 guest_mem 660 .write_slice(madt.as_slice(), madt_offset) 661 .expect("Error writing MADT table"); 662 tables.push(madt_offset.0); 663 let mut prev_tbl_len = madt.len() as u64; 664 let mut prev_tbl_off = madt_offset; 665 666 // PPTT 667 #[cfg(target_arch = "aarch64")] 668 { 669 let pptt = cpu_manager.lock().unwrap().create_pptt(); 670 let pptt_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); 671 guest_mem 672 .write_slice(pptt.as_slice(), pptt_offset) 673 .expect("Error writing PPTT table"); 674 tables.push(pptt_offset.0); 675 prev_tbl_len = pptt.len() as u64; 676 prev_tbl_off = pptt_offset; 677 } 678 679 // GTDT 680 #[cfg(target_arch = "aarch64")] 681 { 682 let gtdt = create_gtdt_table(); 683 let gtdt_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); 684 guest_mem 685 .write_slice(gtdt.as_slice(), gtdt_offset) 686 .expect("Error writing GTDT table"); 687 tables.push(gtdt_offset.0); 688 prev_tbl_len = gtdt.len() as u64; 689 prev_tbl_off = gtdt_offset; 690 } 691 692 // MCFG 693 let mcfg = create_mcfg_table(device_manager.lock().unwrap().pci_segments()); 694 let mcfg_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); 695 guest_mem 696 .write_slice(mcfg.as_slice(), mcfg_offset) 697 .expect("Error writing MCFG table"); 698 tables.push(mcfg_offset.0); 699 prev_tbl_len = mcfg.len() as u64; 700 prev_tbl_off = mcfg_offset; 701 702 // SPCR and DBG2 703 #[cfg(target_arch = "aarch64")] 704 { 705 let is_serial_on = device_manager 706 .lock() 707 .unwrap() 708 .get_device_info() 709 .clone() 710 .contains_key(&(DeviceType::Serial, DeviceType::Serial.to_string())); 711 let serial_device_addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START.raw_value(); 712 let serial_device_irq = if is_serial_on { 713 device_manager 714 .lock() 715 .unwrap() 716 .get_device_info() 717 .clone() 718 .get(&(DeviceType::Serial, DeviceType::Serial.to_string())) 719 .unwrap() 720 .irq() 721 } else { 722 // If serial is turned off, add a fake device with invalid irq. 723 31 724 }; 725 726 // SPCR 727 let spcr = create_spcr_table(serial_device_addr, serial_device_irq); 728 let spcr_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); 729 guest_mem 730 .write_slice(spcr.as_slice(), spcr_offset) 731 .expect("Error writing SPCR table"); 732 tables.push(spcr_offset.0); 733 prev_tbl_len = spcr.len() as u64; 734 prev_tbl_off = spcr_offset; 735 736 // DBG2 737 let dbg2 = create_dbg2_table(serial_device_addr); 738 let dbg2_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); 739 guest_mem 740 .write_slice(dbg2.as_slice(), dbg2_offset) 741 .expect("Error writing DBG2 table"); 742 tables.push(dbg2_offset.0); 743 prev_tbl_len = dbg2.len() as u64; 744 prev_tbl_off = dbg2_offset; 745 } 746 747 if tpm_enabled { 748 // TPM2 Table 749 let tpm2 = create_tpm2_table(); 750 let tpm2_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); 751 guest_mem 752 .write_slice(tpm2.as_slice(), tpm2_offset) 753 .expect("Error writing TPM2 table"); 754 tables.push(tpm2_offset.0); 755 756 prev_tbl_len = tpm2.len() as u64; 757 prev_tbl_off = tpm2_offset; 758 } 759 // SRAT and SLIT 760 // Only created if the NUMA nodes list is not empty. 761 if !numa_nodes.is_empty() { 762 #[cfg(target_arch = "x86_64")] 763 let topology = cpu_manager.lock().unwrap().get_vcpu_topology(); 764 // SRAT 765 let srat = create_srat_table( 766 numa_nodes, 767 #[cfg(target_arch = "x86_64")] 768 topology, 769 ); 770 let srat_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); 771 guest_mem 772 .write_slice(srat.as_slice(), srat_offset) 773 .expect("Error writing SRAT table"); 774 tables.push(srat_offset.0); 775 776 // SLIT 777 let slit = create_slit_table(numa_nodes); 778 let slit_offset = srat_offset.checked_add(srat.len() as u64).unwrap(); 779 guest_mem 780 .write_slice(slit.as_slice(), slit_offset) 781 .expect("Error writing SRAT table"); 782 tables.push(slit_offset.0); 783 784 prev_tbl_len = slit.len() as u64; 785 prev_tbl_off = slit_offset; 786 }; 787 788 #[cfg(target_arch = "aarch64")] 789 { 790 let iort = create_iort_table(device_manager.lock().unwrap().pci_segments()); 791 let iort_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); 792 guest_mem 793 .write_slice(iort.as_slice(), iort_offset) 794 .expect("Error writing IORT table"); 795 tables.push(iort_offset.0); 796 prev_tbl_len = iort.len() as u64; 797 prev_tbl_off = iort_offset; 798 } 799 800 // VIOT 801 if let Some((iommu_bdf, devices_bdf)) = device_manager.lock().unwrap().iommu_attached_devices() 802 { 803 let viot = create_viot_table(iommu_bdf, devices_bdf); 804 805 let viot_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); 806 guest_mem 807 .write_slice(viot.as_slice(), viot_offset) 808 .expect("Error writing VIOT table"); 809 tables.push(viot_offset.0); 810 prev_tbl_len = viot.len() as u64; 811 prev_tbl_off = viot_offset; 812 } 813 814 // XSDT 815 let mut xsdt = Sdt::new(*b"XSDT", 36, 1, *b"CLOUDH", *b"CHXSDT ", 1); 816 for table in tables { 817 xsdt.append(table); 818 } 819 xsdt.update_checksum(); 820 let xsdt_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); 821 guest_mem 822 .write_slice(xsdt.as_slice(), xsdt_offset) 823 .expect("Error writing XSDT table"); 824 825 // RSDP 826 let rsdp = Rsdp::new(*b"CLOUDH", xsdt_offset.0); 827 guest_mem 828 .write_slice(rsdp.as_bytes(), rsdp_offset) 829 .expect("Error writing RSDP"); 830 831 info!( 832 "Generated ACPI tables: took {}µs size = {}", 833 Instant::now().duration_since(start_time).as_micros(), 834 xsdt_offset.0 + xsdt.len() as u64 - rsdp_offset.0 835 ); 836 rsdp_offset 837 } 838 839 #[cfg(feature = "tdx")] 840 pub fn create_acpi_tables_tdx( 841 device_manager: &Arc<Mutex<DeviceManager>>, 842 cpu_manager: &Arc<Mutex<CpuManager>>, 843 memory_manager: &Arc<Mutex<MemoryManager>>, 844 numa_nodes: &NumaNodes, 845 ) -> Vec<Sdt> { 846 // DSDT 847 let mut tables = vec![create_dsdt_table( 848 device_manager, 849 cpu_manager, 850 memory_manager, 851 )]; 852 853 // FACP aka FADT 854 tables.push(create_facp_table(GuestAddress(0), device_manager)); 855 856 // MADT 857 tables.push(cpu_manager.lock().unwrap().create_madt()); 858 859 // MCFG 860 tables.push(create_mcfg_table( 861 device_manager.lock().unwrap().pci_segments(), 862 )); 863 864 // SRAT and SLIT 865 // Only created if the NUMA nodes list is not empty. 866 if !numa_nodes.is_empty() { 867 #[cfg(target_arch = "x86_64")] 868 let topology = cpu_manager.lock().unwrap().get_vcpu_topology(); 869 870 // SRAT 871 tables.push(create_srat_table( 872 numa_nodes, 873 #[cfg(target_arch = "x86_64")] 874 topology, 875 )); 876 877 // SLIT 878 tables.push(create_slit_table(numa_nodes)); 879 }; 880 881 // VIOT 882 if let Some((iommu_bdf, devices_bdf)) = device_manager.lock().unwrap().iommu_attached_devices() 883 { 884 tables.push(create_viot_table(iommu_bdf, devices_bdf)); 885 } 886 887 tables 888 } 889