xref: /cloud-hypervisor/vmm/src/acpi.rs (revision 9af2968a7dc47b89bf07ea9dc5e735084efcfa3a)
1 // Copyright © 2019 Intel Corporation
2 //
3 // SPDX-License-Identifier: Apache-2.0
4 //
5 use crate::cpu::CpuManager;
6 use crate::device_manager::DeviceManager;
7 use crate::memory_manager::MemoryManager;
8 use crate::vm::NumaNodes;
9 use crate::{GuestMemoryMmap, GuestRegionMmap};
10 use acpi_tables::sdt::GenericAddress;
11 use acpi_tables::{aml::Aml, rsdp::Rsdp, sdt::Sdt};
12 #[cfg(target_arch = "aarch64")]
13 use arch::aarch64::DeviceInfoForFdt;
14 #[cfg(target_arch = "aarch64")]
15 use arch::DeviceType;
16 
17 use bitflags::bitflags;
18 use std::sync::{Arc, Mutex};
19 use vm_memory::{Address, ByteValued, Bytes, GuestAddress, GuestMemoryRegion};
20 
21 /* Values for Type in APIC sub-headers */
22 #[cfg(target_arch = "x86_64")]
23 pub const ACPI_APIC_PROCESSOR: u8 = 0;
24 #[cfg(target_arch = "x86_64")]
25 pub const ACPI_APIC_IO: u8 = 1;
26 #[cfg(target_arch = "x86_64")]
27 pub const ACPI_APIC_XRUPT_OVERRIDE: u8 = 2;
28 #[cfg(target_arch = "aarch64")]
29 pub const ACPI_APIC_GENERIC_CPU_INTERFACE: u8 = 11;
30 #[cfg(target_arch = "aarch64")]
31 pub const ACPI_APIC_GENERIC_DISTRIBUTOR: u8 = 12;
32 #[cfg(target_arch = "aarch64")]
33 pub const ACPI_APIC_GENERIC_REDISTRIBUTOR: u8 = 14;
34 #[cfg(target_arch = "aarch64")]
35 pub const ACPI_APIC_GENERIC_TRANSLATOR: u8 = 15;
36 
37 #[allow(dead_code)]
38 #[repr(packed)]
39 #[derive(Default)]
40 struct PciRangeEntry {
41     pub base_address: u64,
42     pub segment: u16,
43     pub start: u8,
44     pub end: u8,
45     _reserved: u32,
46 }
47 
48 #[allow(dead_code)]
49 #[repr(packed)]
50 #[derive(Default)]
51 struct MemoryAffinity {
52     pub type_: u8,
53     pub length: u8,
54     pub proximity_domain: u32,
55     _reserved1: u16,
56     pub base_addr_lo: u32,
57     pub base_addr_hi: u32,
58     pub length_lo: u32,
59     pub length_hi: u32,
60     _reserved2: u32,
61     pub flags: u32,
62     _reserved3: u64,
63 }
64 
65 #[allow(dead_code)]
66 #[repr(packed)]
67 #[derive(Default)]
68 struct ProcessorLocalX2ApicAffinity {
69     pub type_: u8,
70     pub length: u8,
71     _reserved1: u16,
72     pub proximity_domain: u32,
73     pub x2apic_id: u32,
74     pub flags: u32,
75     pub clock_domain: u32,
76     _reserved2: u32,
77 }
78 
79 #[allow(dead_code)]
80 #[repr(packed)]
81 #[derive(Default)]
82 struct ProcessorGiccAffinity {
83     pub type_: u8,
84     pub length: u8,
85     pub proximity_domain: u32,
86     pub acpi_processor_uid: u32,
87     pub flags: u32,
88     pub clock_domain: u32,
89 }
90 
91 bitflags! {
92     pub struct MemAffinityFlags: u32 {
93         const NOFLAGS = 0;
94         const ENABLE = 0b1;
95         const HOTPLUGGABLE = 0b10;
96         const NON_VOLATILE = 0b100;
97     }
98 }
99 
100 impl MemoryAffinity {
101     fn from_region(
102         region: &Arc<GuestRegionMmap>,
103         proximity_domain: u32,
104         flags: MemAffinityFlags,
105     ) -> Self {
106         Self::from_range(
107             region.start_addr().raw_value(),
108             region.len(),
109             proximity_domain,
110             flags,
111         )
112     }
113 
114     fn from_range(
115         base_addr: u64,
116         size: u64,
117         proximity_domain: u32,
118         flags: MemAffinityFlags,
119     ) -> Self {
120         let base_addr_lo = (base_addr & 0xffff_ffff) as u32;
121         let base_addr_hi = (base_addr >> 32) as u32;
122         let length_lo = (size & 0xffff_ffff) as u32;
123         let length_hi = (size >> 32) as u32;
124 
125         MemoryAffinity {
126             type_: 1,
127             length: 40,
128             proximity_domain,
129             base_addr_lo,
130             base_addr_hi,
131             length_lo,
132             length_hi,
133             flags: flags.bits(),
134             ..Default::default()
135         }
136     }
137 }
138 
139 #[allow(dead_code)]
140 #[repr(packed)]
141 #[derive(Default)]
142 struct ViotVirtioPciNode {
143     pub type_: u8,
144     _reserved: u8,
145     pub length: u16,
146     pub pci_segment: u16,
147     pub pci_bdf_number: u16,
148     _reserved2: [u8; 8],
149 }
150 
151 #[allow(dead_code)]
152 #[repr(packed)]
153 #[derive(Default)]
154 struct ViotPciRangeNode {
155     pub type_: u8,
156     _reserved: u8,
157     pub length: u16,
158     pub endpoint_start: u32,
159     pub pci_segment_start: u16,
160     pub pci_segment_end: u16,
161     pub pci_bdf_start: u16,
162     pub pci_bdf_end: u16,
163     pub output_node: u16,
164     _reserved2: [u8; 6],
165 }
166 
167 pub fn create_dsdt_table(
168     device_manager: &Arc<Mutex<DeviceManager>>,
169     cpu_manager: &Arc<Mutex<CpuManager>>,
170     memory_manager: &Arc<Mutex<MemoryManager>>,
171 ) -> Sdt {
172     // DSDT
173     let mut dsdt = Sdt::new(*b"DSDT", 36, 6, *b"CLOUDH", *b"CHDSDT  ", 1);
174 
175     dsdt.append_slice(device_manager.lock().unwrap().to_aml_bytes().as_slice());
176     dsdt.append_slice(cpu_manager.lock().unwrap().to_aml_bytes().as_slice());
177     dsdt.append_slice(memory_manager.lock().unwrap().to_aml_bytes().as_slice());
178 
179     dsdt
180 }
181 
182 fn create_facp_table(dsdt_offset: GuestAddress) -> Sdt {
183     // Revision 6 of the ACPI FADT table is 276 bytes long
184     let mut facp = Sdt::new(*b"FACP", 276, 6, *b"CLOUDH", *b"CHFACP  ", 1);
185 
186     // x86_64 specific fields
187     #[cfg(target_arch = "x86_64")]
188     {
189         // PM_TMR_BLK I/O port
190         facp.write(76, 0xb008u32);
191         // RESET_REG
192         facp.write(116, GenericAddress::io_port_address::<u8>(0x3c0));
193         // RESET_VALUE
194         facp.write(128, 1u8);
195         // X_PM_TMR_BLK
196         facp.write(208, GenericAddress::io_port_address::<u32>(0xb008));
197         // SLEEP_CONTROL_REG
198         facp.write(244, GenericAddress::io_port_address::<u8>(0x3c0));
199         // SLEEP_STATUS_REG
200         facp.write(256, GenericAddress::io_port_address::<u8>(0x3c0));
201     }
202 
203     // aarch64 specific fields
204     #[cfg(target_arch = "aarch64")]
205     // ARM_BOOT_ARCH: enable PSCI with HVC enable-method
206     facp.write(129, 3u16);
207 
208     // Architecture common fields
209     // HW_REDUCED_ACPI, RESET_REG_SUP, TMR_VAL_EXT
210     let fadt_flags: u32 = 1 << 20 | 1 << 10 | 1 << 8;
211     facp.write(112, fadt_flags);
212     // FADT minor version
213     facp.write(131, 3u8);
214     // X_DSDT
215     facp.write(140, dsdt_offset.0);
216     // Hypervisor Vendor Identity
217     facp.write(268, b"CLOUDHYP");
218 
219     facp.update_checksum();
220 
221     facp
222 }
223 
224 fn create_mcfg_table() -> Sdt {
225     let mut mcfg = Sdt::new(*b"MCFG", 36, 1, *b"CLOUDH", *b"CHMCFG  ", 1);
226 
227     // MCFG reserved 8 bytes
228     mcfg.append(0u64);
229 
230     // 32-bit PCI enhanced configuration mechanism
231     mcfg.append(PciRangeEntry {
232         base_address: arch::layout::PCI_MMCONFIG_START.0,
233         segment: 0,
234         start: 0,
235         end: ((arch::layout::PCI_MMCONFIG_SIZE - 1) >> 20) as u8,
236         ..Default::default()
237     });
238     mcfg
239 }
240 
241 fn create_srat_table(numa_nodes: &NumaNodes) -> Sdt {
242     let mut srat = Sdt::new(*b"SRAT", 36, 3, *b"CLOUDH", *b"CHSRAT  ", 1);
243     // SRAT reserved 12 bytes
244     srat.append_slice(&[0u8; 12]);
245 
246     // Check the MemoryAffinity structure is the right size as expected by
247     // the ACPI specification.
248     assert_eq!(std::mem::size_of::<MemoryAffinity>(), 40);
249 
250     for (node_id, node) in numa_nodes.iter() {
251         let proximity_domain = *node_id as u32;
252 
253         for region in node.memory_regions() {
254             srat.append(MemoryAffinity::from_region(
255                 region,
256                 proximity_domain,
257                 MemAffinityFlags::ENABLE,
258             ))
259         }
260 
261         for region in node.hotplug_regions() {
262             srat.append(MemoryAffinity::from_region(
263                 region,
264                 proximity_domain,
265                 MemAffinityFlags::ENABLE | MemAffinityFlags::HOTPLUGGABLE,
266             ))
267         }
268 
269         #[cfg(target_arch = "x86_64")]
270         for section in node.sgx_epc_sections() {
271             srat.append(MemoryAffinity::from_range(
272                 section.start().raw_value(),
273                 section.size(),
274                 proximity_domain,
275                 MemAffinityFlags::ENABLE,
276             ))
277         }
278 
279         for cpu in node.cpus() {
280             let x2apic_id = *cpu as u32;
281 
282             // Flags
283             // - Enabled = 1 (bit 0)
284             // - Reserved bits 1-31
285             let flags = 1;
286 
287             #[cfg(target_arch = "x86_64")]
288             srat.append(ProcessorLocalX2ApicAffinity {
289                 type_: 2,
290                 length: 24,
291                 proximity_domain,
292                 x2apic_id,
293                 flags,
294                 clock_domain: 0,
295                 ..Default::default()
296             });
297             #[cfg(target_arch = "aarch64")]
298             srat.append(ProcessorGiccAffinity {
299                 type_: 3,
300                 length: 18,
301                 proximity_domain,
302                 acpi_processor_uid: x2apic_id,
303                 flags,
304                 clock_domain: 0,
305             });
306         }
307     }
308     srat
309 }
310 
311 fn create_slit_table(numa_nodes: &NumaNodes) -> Sdt {
312     let mut slit = Sdt::new(*b"SLIT", 36, 1, *b"CLOUDH", *b"CHSLIT  ", 1);
313     // Number of System Localities on 8 bytes.
314     slit.append(numa_nodes.len() as u64);
315 
316     let existing_nodes: Vec<u32> = numa_nodes.keys().cloned().collect();
317     for (node_id, node) in numa_nodes.iter() {
318         let distances = node.distances();
319         for i in existing_nodes.iter() {
320             let dist: u8 = if *node_id == *i {
321                 10
322             } else if let Some(distance) = distances.get(i) {
323                 *distance as u8
324             } else {
325                 20
326             };
327 
328             slit.append(dist);
329         }
330     }
331     slit
332 }
333 
334 #[cfg(target_arch = "aarch64")]
335 fn create_gtdt_table() -> Sdt {
336     const ARCH_TIMER_NS_EL2_IRQ: u32 = 10;
337     const ARCH_TIMER_VIRT_IRQ: u32 = 11;
338     const ARCH_TIMER_S_EL1_IRQ: u32 = 13;
339     const ARCH_TIMER_NS_EL1_IRQ: u32 = 14;
340     const ACPI_GTDT_INTERRUPT_MODE_LEVEL: u32 = 0;
341     const ACPI_GTDT_CAP_ALWAYS_ON: u32 = 1 << 2;
342 
343     let irqflags: u32 = ACPI_GTDT_INTERRUPT_MODE_LEVEL;
344     // GTDT
345     let mut gtdt = Sdt::new(*b"GTDT", 104, 2, *b"CLOUDH", *b"CHGTDT  ", 1);
346     // Secure EL1 Timer GSIV
347     gtdt.write(48, (ARCH_TIMER_S_EL1_IRQ + 16) as u32);
348     // Secure EL1 Timer Flags
349     gtdt.write(52, irqflags);
350     // Non-Secure EL1 Timer GSIV
351     gtdt.write(56, (ARCH_TIMER_NS_EL1_IRQ + 16) as u32);
352     // Non-Secure EL1 Timer Flags
353     gtdt.write(60, (irqflags | ACPI_GTDT_CAP_ALWAYS_ON) as u32);
354     // Virtual EL1 Timer GSIV
355     gtdt.write(64, (ARCH_TIMER_VIRT_IRQ + 16) as u32);
356     // Virtual EL1 Timer Flags
357     gtdt.write(68, irqflags);
358     // EL2 Timer GSIV
359     gtdt.write(72, (ARCH_TIMER_NS_EL2_IRQ + 16) as u32);
360     // EL2 Timer Flags
361     gtdt.write(76, irqflags);
362 
363     gtdt.update_checksum();
364 
365     gtdt
366 }
367 
368 #[cfg(target_arch = "aarch64")]
369 fn create_spcr_table(base_address: u64, gsi: u32) -> Sdt {
370     // SPCR
371     let mut spcr = Sdt::new(*b"SPCR", 80, 2, *b"CLOUDH", *b"CHSPCR  ", 1);
372     // Interface Type
373     spcr.write(36, 3u8);
374     // Base Address in format ACPI Generic Address Structure
375     spcr.write(40, GenericAddress::mmio_address::<u8>(base_address));
376     // Interrupt Type: Bit[3] ARMH GIC interrupt
377     spcr.write(52, (1 << 3) as u8);
378     // Global System Interrupt used by the UART
379     spcr.write(54, (gsi as u32).to_le());
380     // Baud Rate: 3 = 9600
381     spcr.write(58, 3u8);
382     // Stop Bits: 1 Stop bit
383     spcr.write(60, 1u8);
384     // Flow Control: Bit[1] = RTS/CTS hardware flow control
385     spcr.write(61, (1 << 1) as u8);
386     // PCI Device ID: Not a PCI device
387     spcr.write(64, 0xffff_u16);
388     // PCI Vendor ID: Not a PCI device
389     spcr.write(66, 0xffff_u16);
390 
391     spcr.update_checksum();
392 
393     spcr
394 }
395 
396 #[cfg(target_arch = "aarch64")]
397 fn create_iort_table() -> Sdt {
398     const ACPI_IORT_NODE_ITS_GROUP: u8 = 0x00;
399     const ACPI_IORT_NODE_PCI_ROOT_COMPLEX: u8 = 0x02;
400 
401     // IORT
402     let mut iort = Sdt::new(*b"IORT", 124, 2, *b"CLOUDH", *b"CHIORT  ", 1);
403     // Nodes: PCI Root Complex, ITS
404     // Note: We currently do not support SMMU
405     iort.write(36, (2u32).to_le());
406     iort.write(40, (48u32).to_le());
407 
408     // ITS group node
409     iort.write(48, ACPI_IORT_NODE_ITS_GROUP as u8);
410     // Length of the ITS group node in bytes
411     iort.write(49, (24u16).to_le());
412     // ITS counts
413     iort.write(64, (1u32).to_le());
414 
415     // Root Complex Node
416     iort.write(72, ACPI_IORT_NODE_PCI_ROOT_COMPLEX as u8);
417     // Length of the root complex node in bytes
418     iort.write(73, (52u16).to_le());
419     // Mapping counts
420     iort.write(80, (1u32).to_le());
421     // Offset from the start of the RC node to the start of its Array of ID mappings
422     iort.write(84, (32u32).to_le());
423     // Fully coherent device
424     iort.write(88, (1u32).to_le());
425     // CCA = CPM = DCAS = 1
426     iort.write(95, 3u8);
427     // Identity RID mapping covering the whole input RID range
428     iort.write(108, (0xffff_u32).to_le());
429     // id_mapping_array_output_reference should be
430     // the ITS group node (the first node) if no SMMU
431     iort.write(116, (48u32).to_le());
432 
433     iort.update_checksum();
434 
435     iort
436 }
437 
438 fn create_viot_table(iommu_bdf: u32, devices_bdf: &[u32]) -> Sdt {
439     // VIOT
440     let mut viot = Sdt::new(*b"VIOT", 36, 0, *b"CLOUDH", *b"CHVIOT  ", 0);
441     // Node count
442     viot.append((devices_bdf.len() + 1) as u16);
443     // Node offset
444     viot.append(48u16);
445     // VIOT reserved 8 bytes
446     viot.append_slice(&[0u8; 8]);
447 
448     // Virtio-iommu based on virtio-pci node
449     viot.append(ViotVirtioPciNode {
450         type_: 3,
451         length: 16,
452         pci_segment: 0,
453         pci_bdf_number: iommu_bdf as u16,
454         ..Default::default()
455     });
456 
457     for device_bdf in devices_bdf {
458         viot.append(ViotPciRangeNode {
459             type_: 1,
460             length: 24,
461             endpoint_start: *device_bdf,
462             pci_segment_start: 0,
463             pci_segment_end: 0,
464             pci_bdf_start: *device_bdf as u16,
465             pci_bdf_end: *device_bdf as u16,
466             output_node: 48,
467             ..Default::default()
468         });
469     }
470 
471     viot
472 }
473 
474 pub fn create_acpi_tables(
475     guest_mem: &GuestMemoryMmap,
476     device_manager: &Arc<Mutex<DeviceManager>>,
477     cpu_manager: &Arc<Mutex<CpuManager>>,
478     memory_manager: &Arc<Mutex<MemoryManager>>,
479     numa_nodes: &NumaNodes,
480 ) -> GuestAddress {
481     let mut prev_tbl_len: u64;
482     let mut prev_tbl_off: GuestAddress;
483     let rsdp_offset = arch::layout::RSDP_POINTER;
484     let mut tables: Vec<u64> = Vec::new();
485 
486     // DSDT
487     let dsdt = create_dsdt_table(device_manager, cpu_manager, memory_manager);
488     let dsdt_offset = rsdp_offset.checked_add(Rsdp::len() as u64).unwrap();
489     guest_mem
490         .write_slice(dsdt.as_slice(), dsdt_offset)
491         .expect("Error writing DSDT table");
492 
493     // FACP aka FADT
494     let facp = create_facp_table(dsdt_offset);
495     let facp_offset = dsdt_offset.checked_add(dsdt.len() as u64).unwrap();
496     guest_mem
497         .write_slice(facp.as_slice(), facp_offset)
498         .expect("Error writing FACP table");
499     tables.push(facp_offset.0);
500 
501     // MADT
502     let madt = cpu_manager.lock().unwrap().create_madt();
503     let madt_offset = facp_offset.checked_add(facp.len() as u64).unwrap();
504     guest_mem
505         .write_slice(madt.as_slice(), madt_offset)
506         .expect("Error writing MADT table");
507     tables.push(madt_offset.0);
508     prev_tbl_len = madt.len() as u64;
509     prev_tbl_off = madt_offset;
510 
511     // GTDT
512     #[cfg(target_arch = "aarch64")]
513     {
514         let gtdt = create_gtdt_table();
515         let gtdt_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap();
516         guest_mem
517             .write_slice(gtdt.as_slice(), gtdt_offset)
518             .expect("Error writing GTDT table");
519         tables.push(gtdt_offset.0);
520         prev_tbl_len = gtdt.len() as u64;
521         prev_tbl_off = gtdt_offset;
522     }
523 
524     // MCFG
525     let mcfg = create_mcfg_table();
526     let mcfg_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap();
527     guest_mem
528         .write_slice(mcfg.as_slice(), mcfg_offset)
529         .expect("Error writing MCFG table");
530     tables.push(mcfg_offset.0);
531     prev_tbl_len = mcfg.len() as u64;
532     prev_tbl_off = mcfg_offset;
533 
534     // SPCR
535     #[cfg(target_arch = "aarch64")]
536     {
537         let is_serial_on = device_manager
538             .lock()
539             .unwrap()
540             .get_device_info()
541             .clone()
542             .get(&(DeviceType::Serial, DeviceType::Serial.to_string()))
543             .is_some();
544         let serial_device_addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START;
545         let serial_device_irq = if is_serial_on {
546             device_manager
547                 .lock()
548                 .unwrap()
549                 .get_device_info()
550                 .clone()
551                 .get(&(DeviceType::Serial, DeviceType::Serial.to_string()))
552                 .unwrap()
553                 .irq()
554         } else {
555             // If serial is turned off, add a fake device with invalid irq.
556             31
557         };
558         let spcr = create_spcr_table(serial_device_addr, serial_device_irq);
559         let spcr_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap();
560         guest_mem
561             .write_slice(spcr.as_slice(), spcr_offset)
562             .expect("Error writing SPCR table");
563         tables.push(spcr_offset.0);
564         prev_tbl_len = spcr.len() as u64;
565         prev_tbl_off = spcr_offset;
566     }
567 
568     // SRAT and SLIT
569     // Only created if the NUMA nodes list is not empty.
570     if !numa_nodes.is_empty() {
571         // SRAT
572         let srat = create_srat_table(numa_nodes);
573         let srat_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap();
574         guest_mem
575             .write_slice(srat.as_slice(), srat_offset)
576             .expect("Error writing SRAT table");
577         tables.push(srat_offset.0);
578 
579         // SLIT
580         let slit = create_slit_table(numa_nodes);
581         let slit_offset = srat_offset.checked_add(srat.len() as u64).unwrap();
582         guest_mem
583             .write_slice(slit.as_slice(), slit_offset)
584             .expect("Error writing SRAT table");
585         tables.push(slit_offset.0);
586 
587         prev_tbl_len = slit.len() as u64;
588         prev_tbl_off = slit_offset;
589     };
590 
591     #[cfg(target_arch = "aarch64")]
592     {
593         let iort = create_iort_table();
594         let iort_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap();
595         guest_mem
596             .write_slice(iort.as_slice(), iort_offset)
597             .expect("Error writing IORT table");
598         tables.push(iort_offset.0);
599         prev_tbl_len = iort.len() as u64;
600         prev_tbl_off = iort_offset;
601     }
602 
603     // VIOT
604     if let Some((iommu_bdf, devices_bdf)) = device_manager.lock().unwrap().iommu_attached_devices()
605     {
606         let viot = create_viot_table(*iommu_bdf, devices_bdf);
607 
608         let viot_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap();
609         guest_mem
610             .write_slice(viot.as_slice(), viot_offset)
611             .expect("Error writing VIOT table");
612         tables.push(viot_offset.0);
613         prev_tbl_len = viot.len() as u64;
614         prev_tbl_off = viot_offset;
615     }
616 
617     // XSDT
618     let mut xsdt = Sdt::new(*b"XSDT", 36, 1, *b"CLOUDH", *b"CHXSDT  ", 1);
619     for table in tables {
620         xsdt.append(table);
621     }
622     xsdt.update_checksum();
623     let xsdt_offset = prev_tbl_off.checked_add(prev_tbl_len).unwrap();
624     guest_mem
625         .write_slice(xsdt.as_slice(), xsdt_offset)
626         .expect("Error writing XSDT table");
627 
628     // RSDP
629     let rsdp = Rsdp::new(*b"CLOUDH", xsdt_offset.0);
630     guest_mem
631         .write_slice(rsdp.as_slice(), rsdp_offset)
632         .expect("Error writing RSDP");
633 
634     rsdp_offset
635 }
636