1 // Copyright © 2019 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 4 // 5 use crate::cpu::CpuManager; 6 use crate::device_manager::DeviceManager; 7 use crate::memory_manager::MemoryManager; 8 use crate::vm::NumaNodes; 9 use crate::{GuestMemoryMmap, GuestRegionMmap}; 10 use acpi_tables::sdt::GenericAddress; 11 use acpi_tables::{aml::Aml, rsdp::Rsdp, sdt::Sdt}; 12 #[cfg(target_arch = "aarch64")] 13 use arch::aarch64::DeviceInfoForFdt; 14 #[cfg(target_arch = "aarch64")] 15 use arch::DeviceType; 16 17 use bitflags::bitflags; 18 use std::sync::{Arc, Mutex}; 19 use vm_memory::{Address, ByteValued, Bytes, GuestAddress, GuestMemoryRegion}; 20 21 /* Values for Type in APIC sub-headers */ 22 #[cfg(target_arch = "x86_64")] 23 pub const ACPI_APIC_PROCESSOR: u8 = 0; 24 #[cfg(target_arch = "x86_64")] 25 pub const ACPI_APIC_IO: u8 = 1; 26 #[cfg(target_arch = "x86_64")] 27 pub const ACPI_APIC_XRUPT_OVERRIDE: u8 = 2; 28 #[cfg(target_arch = "aarch64")] 29 pub const ACPI_APIC_GENERIC_CPU_INTERFACE: u8 = 11; 30 #[cfg(target_arch = "aarch64")] 31 pub const ACPI_APIC_GENERIC_DISTRIBUTOR: u8 = 12; 32 #[cfg(target_arch = "aarch64")] 33 pub const ACPI_APIC_GENERIC_REDISTRIBUTOR: u8 = 14; 34 #[cfg(target_arch = "aarch64")] 35 pub const ACPI_APIC_GENERIC_TRANSLATOR: u8 = 15; 36 37 #[allow(dead_code)] 38 #[repr(packed)] 39 #[derive(Default)] 40 struct PciRangeEntry { 41 pub base_address: u64, 42 pub segment: u16, 43 pub start: u8, 44 pub end: u8, 45 _reserved: u32, 46 } 47 48 #[allow(dead_code)] 49 #[repr(packed)] 50 #[derive(Default)] 51 struct MemoryAffinity { 52 pub type_: u8, 53 pub length: u8, 54 pub proximity_domain: u32, 55 _reserved1: u16, 56 pub base_addr_lo: u32, 57 pub base_addr_hi: u32, 58 pub length_lo: u32, 59 pub length_hi: u32, 60 _reserved2: u32, 61 pub flags: u32, 62 _reserved3: u64, 63 } 64 65 #[allow(dead_code)] 66 #[repr(packed)] 67 #[derive(Default)] 68 struct ProcessorLocalX2ApicAffinity { 69 pub type_: u8, 70 pub length: u8, 71 _reserved1: u16, 72 pub proximity_domain: u32, 73 pub x2apic_id: u32, 74 pub flags: u32, 75 pub clock_domain: u32, 76 _reserved2: u32, 77 } 78 79 #[allow(dead_code)] 80 #[repr(packed)] 81 #[derive(Default)] 82 struct ProcessorGiccAffinity { 83 pub type_: u8, 84 pub length: u8, 85 pub proximity_domain: u32, 86 pub acpi_processor_uid: u32, 87 pub flags: u32, 88 pub clock_domain: u32, 89 } 90 91 bitflags! { 92 pub struct MemAffinityFlags: u32 { 93 const NOFLAGS = 0; 94 const ENABLE = 0b1; 95 const HOTPLUGGABLE = 0b10; 96 const NON_VOLATILE = 0b100; 97 } 98 } 99 100 impl MemoryAffinity { 101 fn from_region( 102 region: &Arc<GuestRegionMmap>, 103 proximity_domain: u32, 104 flags: MemAffinityFlags, 105 ) -> Self { 106 Self::from_range( 107 region.start_addr().raw_value(), 108 region.len(), 109 proximity_domain, 110 flags, 111 ) 112 } 113 114 fn from_range( 115 base_addr: u64, 116 size: u64, 117 proximity_domain: u32, 118 flags: MemAffinityFlags, 119 ) -> Self { 120 let base_addr_lo = (base_addr & 0xffff_ffff) as u32; 121 let base_addr_hi = (base_addr >> 32) as u32; 122 let length_lo = (size & 0xffff_ffff) as u32; 123 let length_hi = (size >> 32) as u32; 124 125 MemoryAffinity { 126 type_: 1, 127 length: 40, 128 proximity_domain, 129 base_addr_lo, 130 base_addr_hi, 131 length_lo, 132 length_hi, 133 flags: flags.bits(), 134 ..Default::default() 135 } 136 } 137 } 138 139 #[allow(dead_code)] 140 #[repr(packed)] 141 #[derive(Default)] 142 struct ViotVirtioPciNode { 143 pub type_: u8, 144 _reserved: u8, 145 pub length: u16, 146 pub pci_segment: u16, 147 pub pci_bdf_number: u16, 148 _reserved2: [u8; 8], 149 } 150 151 #[allow(dead_code)] 152 #[repr(packed)] 153 #[derive(Default)] 154 struct ViotPciRangeNode { 155 pub type_: u8, 156 _reserved: u8, 157 pub length: u16, 158 pub endpoint_start: u32, 159 pub pci_segment_start: u16, 160 pub pci_segment_end: u16, 161 pub pci_bdf_start: u16, 162 pub pci_bdf_end: u16, 163 pub output_node: u16, 164 _reserved2: [u8; 6], 165 } 166 167 pub fn create_dsdt_table( 168 device_manager: &Arc<Mutex<DeviceManager>>, 169 cpu_manager: &Arc<Mutex<CpuManager>>, 170 memory_manager: &Arc<Mutex<MemoryManager>>, 171 ) -> Sdt { 172 // DSDT 173 let mut dsdt = Sdt::new(*b"DSDT", 36, 6, *b"CLOUDH", *b"CHDSDT ", 1); 174 175 dsdt.append_slice(device_manager.lock().unwrap().to_aml_bytes().as_slice()); 176 dsdt.append_slice(cpu_manager.lock().unwrap().to_aml_bytes().as_slice()); 177 dsdt.append_slice(memory_manager.lock().unwrap().to_aml_bytes().as_slice()); 178 179 dsdt 180 } 181 182 fn create_facp_table(dsdt_offset: GuestAddress) -> Sdt { 183 // Revision 6 of the ACPI FADT table is 276 bytes long 184 let mut facp = Sdt::new(*b"FACP", 276, 6, *b"CLOUDH", *b"CHFACP ", 1); 185 186 // x86_64 specific fields 187 #[cfg(target_arch = "x86_64")] 188 { 189 // PM_TMR_BLK I/O port 190 facp.write(76, 0xb008u32); 191 // RESET_REG 192 facp.write(116, GenericAddress::io_port_address::<u8>(0x3c0)); 193 // RESET_VALUE 194 facp.write(128, 1u8); 195 // X_PM_TMR_BLK 196 facp.write(208, GenericAddress::io_port_address::<u32>(0xb008)); 197 // SLEEP_CONTROL_REG 198 facp.write(244, GenericAddress::io_port_address::<u8>(0x3c0)); 199 // SLEEP_STATUS_REG 200 facp.write(256, GenericAddress::io_port_address::<u8>(0x3c0)); 201 } 202 203 // aarch64 specific fields 204 #[cfg(target_arch = "aarch64")] 205 // ARM_BOOT_ARCH: enable PSCI with HVC enable-method 206 facp.write(129, 3u16); 207 208 // Architecture common fields 209 // HW_REDUCED_ACPI, RESET_REG_SUP, TMR_VAL_EXT 210 let fadt_flags: u32 = 1 << 20 | 1 << 10 | 1 << 8; 211 facp.write(112, fadt_flags); 212 // FADT minor version 213 facp.write(131, 3u8); 214 // X_DSDT 215 facp.write(140, dsdt_offset.0); 216 // Hypervisor Vendor Identity 217 facp.write(268, b"CLOUDHYP"); 218 219 facp.update_checksum(); 220 221 facp 222 } 223 224 fn create_mcfg_table() -> Sdt { 225 let mut mcfg = Sdt::new(*b"MCFG", 36, 1, *b"CLOUDH", *b"CHMCFG ", 1); 226 227 // MCFG reserved 8 bytes 228 mcfg.append(0u64); 229 230 // 32-bit PCI enhanced configuration mechanism 231 mcfg.append(PciRangeEntry { 232 base_address: arch::layout::PCI_MMCONFIG_START.0, 233 segment: 0, 234 start: 0, 235 end: ((arch::layout::PCI_MMCONFIG_SIZE - 1) >> 20) as u8, 236 ..Default::default() 237 }); 238 mcfg 239 } 240 241 fn create_srat_table(numa_nodes: &NumaNodes) -> Sdt { 242 let mut srat = Sdt::new(*b"SRAT", 36, 3, *b"CLOUDH", *b"CHSRAT ", 1); 243 // SRAT reserved 12 bytes 244 srat.append_slice(&[0u8; 12]); 245 246 // Check the MemoryAffinity structure is the right size as expected by 247 // the ACPI specification. 248 assert_eq!(std::mem::size_of::<MemoryAffinity>(), 40); 249 250 for (node_id, node) in numa_nodes.iter() { 251 let proximity_domain = *node_id as u32; 252 253 for region in node.memory_regions() { 254 srat.append(MemoryAffinity::from_region( 255 region, 256 proximity_domain, 257 MemAffinityFlags::ENABLE, 258 )) 259 } 260 261 for region in node.hotplug_regions() { 262 srat.append(MemoryAffinity::from_region( 263 region, 264 proximity_domain, 265 MemAffinityFlags::ENABLE | MemAffinityFlags::HOTPLUGGABLE, 266 )) 267 } 268 269 #[cfg(target_arch = "x86_64")] 270 for section in node.sgx_epc_sections() { 271 srat.append(MemoryAffinity::from_range( 272 section.start().raw_value(), 273 section.size(), 274 proximity_domain, 275 MemAffinityFlags::ENABLE, 276 )) 277 } 278 279 for cpu in node.cpus() { 280 let x2apic_id = *cpu as u32; 281 282 // Flags 283 // - Enabled = 1 (bit 0) 284 // - Reserved bits 1-31 285 let flags = 1; 286 287 #[cfg(target_arch = "x86_64")] 288 srat.append(ProcessorLocalX2ApicAffinity { 289 type_: 2, 290 length: 24, 291 proximity_domain, 292 x2apic_id, 293 flags, 294 clock_domain: 0, 295 ..Default::default() 296 }); 297 #[cfg(target_arch = "aarch64")] 298 srat.append(ProcessorGiccAffinity { 299 type_: 3, 300 length: 18, 301 proximity_domain, 302 acpi_processor_uid: x2apic_id, 303 flags, 304 clock_domain: 0, 305 }); 306 } 307 } 308 srat 309 } 310 311 fn create_slit_table(numa_nodes: &NumaNodes) -> Sdt { 312 let mut slit = Sdt::new(*b"SLIT", 36, 1, *b"CLOUDH", *b"CHSLIT ", 1); 313 // Number of System Localities on 8 bytes. 314 slit.append(numa_nodes.len() as u64); 315 316 let existing_nodes: Vec<u32> = numa_nodes.keys().cloned().collect(); 317 for (node_id, node) in numa_nodes.iter() { 318 let distances = node.distances(); 319 for i in existing_nodes.iter() { 320 let dist: u8 = if *node_id == *i { 321 10 322 } else if let Some(distance) = distances.get(i) { 323 *distance as u8 324 } else { 325 20 326 }; 327 328 slit.append(dist); 329 } 330 } 331 slit 332 } 333 334 #[cfg(target_arch = "aarch64")] 335 fn create_gtdt_table() -> Sdt { 336 const ARCH_TIMER_NS_EL2_IRQ: u32 = 10; 337 const ARCH_TIMER_VIRT_IRQ: u32 = 11; 338 const ARCH_TIMER_S_EL1_IRQ: u32 = 13; 339 const ARCH_TIMER_NS_EL1_IRQ: u32 = 14; 340 const ACPI_GTDT_INTERRUPT_MODE_LEVEL: u32 = 0; 341 const ACPI_GTDT_CAP_ALWAYS_ON: u32 = 1 << 2; 342 343 let irqflags: u32 = ACPI_GTDT_INTERRUPT_MODE_LEVEL; 344 // GTDT 345 let mut gtdt = Sdt::new(*b"GTDT", 104, 2, *b"CLOUDH", *b"CHGTDT ", 1); 346 // Secure EL1 Timer GSIV 347 gtdt.write(48, (ARCH_TIMER_S_EL1_IRQ + 16) as u32); 348 // Secure EL1 Timer Flags 349 gtdt.write(52, irqflags); 350 // Non-Secure EL1 Timer GSIV 351 gtdt.write(56, (ARCH_TIMER_NS_EL1_IRQ + 16) as u32); 352 // Non-Secure EL1 Timer Flags 353 gtdt.write(60, (irqflags | ACPI_GTDT_CAP_ALWAYS_ON) as u32); 354 // Virtual EL1 Timer GSIV 355 gtdt.write(64, (ARCH_TIMER_VIRT_IRQ + 16) as u32); 356 // Virtual EL1 Timer Flags 357 gtdt.write(68, irqflags); 358 // EL2 Timer GSIV 359 gtdt.write(72, (ARCH_TIMER_NS_EL2_IRQ + 16) as u32); 360 // EL2 Timer Flags 361 gtdt.write(76, irqflags); 362 363 gtdt.update_checksum(); 364 365 gtdt 366 } 367 368 #[cfg(target_arch = "aarch64")] 369 fn create_spcr_table(base_address: u64, gsi: u32) -> Sdt { 370 // SPCR 371 let mut spcr = Sdt::new(*b"SPCR", 80, 2, *b"CLOUDH", *b"CHSPCR ", 1); 372 // Interface Type 373 spcr.write(36, 3u8); 374 // Base Address in format ACPI Generic Address Structure 375 spcr.write(40, GenericAddress::mmio_address::<u8>(base_address)); 376 // Interrupt Type: Bit[3] ARMH GIC interrupt 377 spcr.write(52, (1 << 3) as u8); 378 // Global System Interrupt used by the UART 379 spcr.write(54, (gsi as u32).to_le()); 380 // Baud Rate: 3 = 9600 381 spcr.write(58, 3u8); 382 // Stop Bits: 1 Stop bit 383 spcr.write(60, 1u8); 384 // Flow Control: Bit[1] = RTS/CTS hardware flow control 385 spcr.write(61, (1 << 1) as u8); 386 // PCI Device ID: Not a PCI device 387 spcr.write(64, 0xffff_u16); 388 // PCI Vendor ID: Not a PCI device 389 spcr.write(66, 0xffff_u16); 390 391 spcr.update_checksum(); 392 393 spcr 394 } 395 396 #[cfg(target_arch = "aarch64")] 397 fn create_iort_table() -> Sdt { 398 const ACPI_IORT_NODE_ITS_GROUP: u8 = 0x00; 399 const ACPI_IORT_NODE_PCI_ROOT_COMPLEX: u8 = 0x02; 400 401 // IORT 402 let mut iort = Sdt::new(*b"IORT", 124, 2, *b"CLOUDH", *b"CHIORT ", 1); 403 // Nodes: PCI Root Complex, ITS 404 // Note: We currently do not support SMMU 405 iort.write(36, (2u32).to_le()); 406 iort.write(40, (48u32).to_le()); 407 408 // ITS group node 409 iort.write(48, ACPI_IORT_NODE_ITS_GROUP as u8); 410 // Length of the ITS group node in bytes 411 iort.write(49, (24u16).to_le()); 412 // ITS counts 413 iort.write(64, (1u32).to_le()); 414 415 // Root Complex Node 416 iort.write(72, ACPI_IORT_NODE_PCI_ROOT_COMPLEX as u8); 417 // Length of the root complex node in bytes 418 iort.write(73, (52u16).to_le()); 419 // Mapping counts 420 iort.write(80, (1u32).to_le()); 421 // Offset from the start of the RC node to the start of its Array of ID mappings 422 iort.write(84, (32u32).to_le()); 423 // Fully coherent device 424 iort.write(88, (1u32).to_le()); 425 // CCA = CPM = DCAS = 1 426 iort.write(95, 3u8); 427 // Identity RID mapping covering the whole input RID range 428 iort.write(108, (0xffff_u32).to_le()); 429 // id_mapping_array_output_reference should be 430 // the ITS group node (the first node) if no SMMU 431 iort.write(116, (48u32).to_le()); 432 433 iort.update_checksum(); 434 435 iort 436 } 437 438 fn create_viot_table(iommu_bdf: u32, devices_bdf: &[u32]) -> Sdt { 439 // VIOT 440 let mut viot = Sdt::new(*b"VIOT", 36, 0, *b"CLOUDH", *b"CHVIOT ", 0); 441 // Node count 442 viot.append((devices_bdf.len() + 1) as u16); 443 // Node offset 444 viot.append(48u16); 445 // VIOT reserved 8 bytes 446 viot.append_slice(&[0u8; 8]); 447 448 // Virtio-iommu based on virtio-pci node 449 viot.append(ViotVirtioPciNode { 450 type_: 3, 451 length: 16, 452 pci_segment: 0, 453 pci_bdf_number: iommu_bdf as u16, 454 ..Default::default() 455 }); 456 457 for device_bdf in devices_bdf { 458 viot.append(ViotPciRangeNode { 459 type_: 1, 460 length: 24, 461 endpoint_start: *device_bdf, 462 pci_segment_start: 0, 463 pci_segment_end: 0, 464 pci_bdf_start: *device_bdf as u16, 465 pci_bdf_end: *device_bdf as u16, 466 output_node: 48, 467 ..Default::default() 468 }); 469 } 470 471 viot 472 } 473 474 pub fn create_acpi_tables( 475 guest_mem: &GuestMemoryMmap, 476 device_manager: &Arc<Mutex<DeviceManager>>, 477 cpu_manager: &Arc<Mutex<CpuManager>>, 478 memory_manager: &Arc<Mutex<MemoryManager>>, 479 numa_nodes: &NumaNodes, 480 ) -> GuestAddress { 481 let mut prev_tbl_len: u64; 482 let mut prev_tbl_off: GuestAddress; 483 let rsdp_offset = arch::layout::RSDP_POINTER; 484 let mut tables: Vec<u64> = Vec::new(); 485 486 // DSDT 487 let dsdt = create_dsdt_table(device_manager, cpu_manager, memory_manager); 488 let dsdt_offset = rsdp_offset.checked_add(Rsdp::len() as u64).unwrap(); 489 guest_mem 490 .write_slice(dsdt.as_slice(), dsdt_offset) 491 .expect("Error writing DSDT table"); 492 493 // FACP aka FADT 494 let facp = create_facp_table(dsdt_offset); 495 let facp_offset = dsdt_offset.checked_add(dsdt.len() as u64).unwrap(); 496 guest_mem 497 .write_slice(facp.as_slice(), facp_offset) 498 .expect("Error writing FACP table"); 499 tables.push(facp_offset.0); 500 501 // MADT 502 let madt = cpu_manager.lock().unwrap().create_madt(); 503 let madt_offset = facp_offset.checked_add(facp.len() as u64).unwrap(); 504 guest_mem 505 .write_slice(madt.as_slice(), madt_offset) 506 .expect("Error writing MADT table"); 507 tables.push(madt_offset.0); 508 prev_tbl_len = madt.len() as u64; 509 prev_tbl_off = madt_offset; 510 511 // GTDT 512 #[cfg(target_arch = "aarch64")] 513 { 514 let gtdt = create_gtdt_table(); 515 let gtdt_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); 516 guest_mem 517 .write_slice(gtdt.as_slice(), gtdt_offset) 518 .expect("Error writing GTDT table"); 519 tables.push(gtdt_offset.0); 520 prev_tbl_len = gtdt.len() as u64; 521 prev_tbl_off = gtdt_offset; 522 } 523 524 // MCFG 525 let mcfg = create_mcfg_table(); 526 let mcfg_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); 527 guest_mem 528 .write_slice(mcfg.as_slice(), mcfg_offset) 529 .expect("Error writing MCFG table"); 530 tables.push(mcfg_offset.0); 531 prev_tbl_len = mcfg.len() as u64; 532 prev_tbl_off = mcfg_offset; 533 534 // SPCR 535 #[cfg(target_arch = "aarch64")] 536 { 537 let is_serial_on = device_manager 538 .lock() 539 .unwrap() 540 .get_device_info() 541 .clone() 542 .get(&(DeviceType::Serial, DeviceType::Serial.to_string())) 543 .is_some(); 544 let serial_device_addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START; 545 let serial_device_irq = if is_serial_on { 546 device_manager 547 .lock() 548 .unwrap() 549 .get_device_info() 550 .clone() 551 .get(&(DeviceType::Serial, DeviceType::Serial.to_string())) 552 .unwrap() 553 .irq() 554 } else { 555 // If serial is turned off, add a fake device with invalid irq. 556 31 557 }; 558 let spcr = create_spcr_table(serial_device_addr, serial_device_irq); 559 let spcr_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); 560 guest_mem 561 .write_slice(spcr.as_slice(), spcr_offset) 562 .expect("Error writing SPCR table"); 563 tables.push(spcr_offset.0); 564 prev_tbl_len = spcr.len() as u64; 565 prev_tbl_off = spcr_offset; 566 } 567 568 // SRAT and SLIT 569 // Only created if the NUMA nodes list is not empty. 570 if !numa_nodes.is_empty() { 571 // SRAT 572 let srat = create_srat_table(numa_nodes); 573 let srat_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); 574 guest_mem 575 .write_slice(srat.as_slice(), srat_offset) 576 .expect("Error writing SRAT table"); 577 tables.push(srat_offset.0); 578 579 // SLIT 580 let slit = create_slit_table(numa_nodes); 581 let slit_offset = srat_offset.checked_add(srat.len() as u64).unwrap(); 582 guest_mem 583 .write_slice(slit.as_slice(), slit_offset) 584 .expect("Error writing SRAT table"); 585 tables.push(slit_offset.0); 586 587 prev_tbl_len = slit.len() as u64; 588 prev_tbl_off = slit_offset; 589 }; 590 591 #[cfg(target_arch = "aarch64")] 592 { 593 let iort = create_iort_table(); 594 let iort_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); 595 guest_mem 596 .write_slice(iort.as_slice(), iort_offset) 597 .expect("Error writing IORT table"); 598 tables.push(iort_offset.0); 599 prev_tbl_len = iort.len() as u64; 600 prev_tbl_off = iort_offset; 601 } 602 603 // VIOT 604 if let Some((iommu_bdf, devices_bdf)) = device_manager.lock().unwrap().iommu_attached_devices() 605 { 606 let viot = create_viot_table(*iommu_bdf, devices_bdf); 607 608 let viot_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); 609 guest_mem 610 .write_slice(viot.as_slice(), viot_offset) 611 .expect("Error writing VIOT table"); 612 tables.push(viot_offset.0); 613 prev_tbl_len = viot.len() as u64; 614 prev_tbl_off = viot_offset; 615 } 616 617 // XSDT 618 let mut xsdt = Sdt::new(*b"XSDT", 36, 1, *b"CLOUDH", *b"CHXSDT ", 1); 619 for table in tables { 620 xsdt.append(table); 621 } 622 xsdt.update_checksum(); 623 let xsdt_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap(); 624 guest_mem 625 .write_slice(xsdt.as_slice(), xsdt_offset) 626 .expect("Error writing XSDT table"); 627 628 // RSDP 629 let rsdp = Rsdp::new(*b"CLOUDH", xsdt_offset.0); 630 guest_mem 631 .write_slice(rsdp.as_slice(), rsdp_offset) 632 .expect("Error writing RSDP"); 633 634 rsdp_offset 635 } 636