xref: /cloud-hypervisor/arch/src/aarch64/fdt.rs (revision 7d7bfb2034001d4cb15df2ddc56d2d350c8da30f)
1 // Copyright 2020 Arm Limited (or its affiliates). All rights reserved.
2 // Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3 // SPDX-License-Identifier: Apache-2.0
4 //
5 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
6 // Use of this source code is governed by a BSD-style license that can be
7 // found in the THIRD-PARTY file.
8 
9 use crate::{NumaNodes, PciSpaceInfo};
10 use byteorder::{BigEndian, ByteOrder};
11 use std::cmp;
12 use std::collections::HashMap;
13 use std::ffi::CStr;
14 use std::fmt::Debug;
15 use std::result;
16 use std::str;
17 
18 use super::super::DeviceType;
19 use super::super::GuestMemoryMmap;
20 use super::super::InitramfsConfig;
21 use super::gic::GicDevice;
22 use super::layout::{
23     IRQ_BASE, MEM_32BIT_DEVICES_SIZE, MEM_32BIT_DEVICES_START, MEM_PCI_IO_SIZE, MEM_PCI_IO_START,
24     PCI_HIGH_BASE, PCI_MMIO_CONFIG_SIZE_PER_SEGMENT,
25 };
26 use vm_fdt::{FdtWriter, FdtWriterResult};
27 use vm_memory::{Address, Bytes, GuestMemory, GuestMemoryError, GuestMemoryRegion};
28 
29 // This is a value for uniquely identifying the FDT node declaring the interrupt controller.
30 const GIC_PHANDLE: u32 = 1;
31 // This is a value for uniquely identifying the FDT node declaring the MSI controller.
32 const MSI_PHANDLE: u32 = 2;
33 // This is a value for uniquely identifying the FDT node containing the clock definition.
34 const CLOCK_PHANDLE: u32 = 3;
35 // This is a value for uniquely identifying the FDT node containing the gpio controller.
36 const GPIO_PHANDLE: u32 = 4;
37 // This is a value for virtio-iommu. Now only one virtio-iommu device is supported.
38 const VIRTIO_IOMMU_PHANDLE: u32 = 5;
39 // NOTE: Keep FIRST_VCPU_PHANDLE the last PHANDLE defined.
40 // This is a value for uniquely identifying the FDT node containing the first vCPU.
41 // The last number of vCPU phandle depends on the number of vCPUs.
42 const FIRST_VCPU_PHANDLE: u32 = 6;
43 
44 // Read the documentation specified when appending the root node to the FDT.
45 const ADDRESS_CELLS: u32 = 0x2;
46 const SIZE_CELLS: u32 = 0x2;
47 
48 // As per kvm tool and
49 // https://www.kernel.org/doc/Documentation/devicetree/bindings/interrupt-controller/arm%2Cgic.txt
50 // Look for "The 1st cell..."
51 const GIC_FDT_IRQ_TYPE_SPI: u32 = 0;
52 const GIC_FDT_IRQ_TYPE_PPI: u32 = 1;
53 const GIC_FDT_IRQ_PPI_CPU_SHIFT: u32 = 8;
54 const GIC_FDT_IRQ_PPI_CPU_MASK: u32 = 0xff << GIC_FDT_IRQ_PPI_CPU_SHIFT;
55 
56 // From https://elixir.bootlin.com/linux/v4.9.62/source/include/dt-bindings/interrupt-controller/irq.h#L17
57 const IRQ_TYPE_EDGE_RISING: u32 = 1;
58 const IRQ_TYPE_LEVEL_HI: u32 = 4;
59 
60 // PMU PPI interrupt number
61 pub const AARCH64_PMU_IRQ: u32 = 7;
62 
63 // Keys and Buttons
64 // System Power Down
65 const KEY_POWER: u32 = 116;
66 
67 /// Trait for devices to be added to the Flattened Device Tree.
68 pub trait DeviceInfoForFdt {
69     /// Returns the address where this device will be loaded.
70     fn addr(&self) -> u64;
71     /// Returns the associated interrupt for this device.
72     fn irq(&self) -> u32;
73     /// Returns the amount of memory that needs to be reserved for this device.
74     fn length(&self) -> u64;
75 }
76 
77 /// Errors thrown while configuring the Flattened Device Tree for aarch64.
78 #[derive(Debug)]
79 pub enum Error {
80     /// Failure in writing FDT in memory.
81     WriteFdtToMemory(GuestMemoryError),
82 }
83 type Result<T> = result::Result<T, Error>;
84 
85 /// Creates the flattened device tree for this aarch64 VM.
86 #[allow(clippy::too_many_arguments)]
87 pub fn create_fdt<T: DeviceInfoForFdt + Clone + Debug, S: ::std::hash::BuildHasher>(
88     guest_mem: &GuestMemoryMmap,
89     cmdline: &str,
90     vcpu_mpidr: Vec<u64>,
91     vcpu_topology: Option<(u8, u8, u8)>,
92     device_info: &HashMap<(DeviceType, String), T, S>,
93     gic_device: &dyn GicDevice,
94     initrd: &Option<InitramfsConfig>,
95     pci_space_info: &[PciSpaceInfo],
96     numa_nodes: &NumaNodes,
97     virtio_iommu_bdf: Option<u32>,
98     pmu_supported: bool,
99 ) -> FdtWriterResult<Vec<u8>> {
100     // Allocate stuff necessary for the holding the blob.
101     let mut fdt = FdtWriter::new().unwrap();
102 
103     // For an explanation why these nodes were introduced in the blob take a look at
104     // https://github.com/torvalds/linux/blob/master/Documentation/devicetree/booting-without-of.txt#L845
105     // Look for "Required nodes and properties".
106 
107     // Header or the root node as per above mentioned documentation.
108     let root_node = fdt.begin_node("")?;
109     fdt.property_string("compatible", "linux,dummy-virt")?;
110     // For info on #address-cells and size-cells read "Note about cells and address representation"
111     // from the above mentioned txt file.
112     fdt.property_u32("#address-cells", ADDRESS_CELLS)?;
113     fdt.property_u32("#size-cells", SIZE_CELLS)?;
114     // This is not mandatory but we use it to point the root node to the node
115     // containing description of the interrupt controller for this VM.
116     fdt.property_u32("interrupt-parent", GIC_PHANDLE)?;
117     create_cpu_nodes(&mut fdt, &vcpu_mpidr, vcpu_topology, numa_nodes)?;
118     create_memory_node(&mut fdt, guest_mem, numa_nodes)?;
119     create_chosen_node(&mut fdt, cmdline, initrd)?;
120     create_gic_node(&mut fdt, gic_device)?;
121     create_timer_node(&mut fdt)?;
122     if pmu_supported {
123         create_pmu_node(&mut fdt, vcpu_mpidr.len())?;
124     }
125     create_clock_node(&mut fdt)?;
126     create_psci_node(&mut fdt)?;
127     create_devices_node(&mut fdt, device_info)?;
128     create_pci_nodes(&mut fdt, pci_space_info, virtio_iommu_bdf)?;
129     if numa_nodes.len() > 1 {
130         create_distance_map_node(&mut fdt, numa_nodes)?;
131     }
132 
133     // End Header node.
134     fdt.end_node(root_node)?;
135 
136     let fdt_final = fdt.finish()?;
137 
138     Ok(fdt_final)
139 }
140 
141 pub fn write_fdt_to_memory(fdt_final: Vec<u8>, guest_mem: &GuestMemoryMmap) -> Result<()> {
142     // Write FDT to memory.
143     guest_mem
144         .write_slice(fdt_final.as_slice(), super::layout::FDT_START)
145         .map_err(Error::WriteFdtToMemory)?;
146     Ok(())
147 }
148 
149 // Following are the auxiliary function for creating the different nodes that we append to our FDT.
150 fn create_cpu_nodes(
151     fdt: &mut FdtWriter,
152     vcpu_mpidr: &[u64],
153     vcpu_topology: Option<(u8, u8, u8)>,
154     numa_nodes: &NumaNodes,
155 ) -> FdtWriterResult<()> {
156     // See https://github.com/torvalds/linux/blob/master/Documentation/devicetree/bindings/arm/cpus.yaml.
157     let cpus_node = fdt.begin_node("cpus")?;
158     fdt.property_u32("#address-cells", 0x1)?;
159     fdt.property_u32("#size-cells", 0x0)?;
160 
161     let num_cpus = vcpu_mpidr.len();
162 
163     for (cpu_id, mpidr) in vcpu_mpidr.iter().enumerate().take(num_cpus) {
164         let cpu_name = format!("cpu@{:x}", cpu_id);
165         let cpu_node = fdt.begin_node(&cpu_name)?;
166         fdt.property_string("device_type", "cpu")?;
167         fdt.property_string("compatible", "arm,arm-v8")?;
168         if num_cpus > 1 {
169             // This is required on armv8 64-bit. See aforementioned documentation.
170             fdt.property_string("enable-method", "psci")?;
171         }
172         // Set the field to first 24 bits of the MPIDR - Multiprocessor Affinity Register.
173         // See http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0488c/BABHBJCI.html.
174         fdt.property_u32("reg", (mpidr & 0x7FFFFF) as u32)?;
175         fdt.property_u32("phandle", cpu_id as u32 + FIRST_VCPU_PHANDLE)?;
176 
177         // Add `numa-node-id` property if there is any numa config.
178         if numa_nodes.len() > 1 {
179             for numa_node_idx in 0..numa_nodes.len() {
180                 let numa_node = numa_nodes.get(&(numa_node_idx as u32));
181                 if numa_node.unwrap().cpus.contains(&(cpu_id as u8)) {
182                     fdt.property_u32("numa-node-id", numa_node_idx as u32)?;
183                 }
184             }
185         }
186 
187         fdt.end_node(cpu_node)?;
188     }
189 
190     if let Some(topology) = vcpu_topology {
191         let (threads_per_core, cores_per_package, packages) = topology;
192         let cpu_map_node = fdt.begin_node("cpu-map")?;
193 
194         // Create device tree nodes with regard of above mapping.
195         for cluster_idx in 0..packages {
196             let cluster_name = format!("cluster{:x}", cluster_idx);
197             let cluster_node = fdt.begin_node(&cluster_name)?;
198 
199             for core_idx in 0..cores_per_package {
200                 let core_name = format!("core{:x}", core_idx);
201                 let core_node = fdt.begin_node(&core_name)?;
202 
203                 for thread_idx in 0..threads_per_core {
204                     let thread_name = format!("thread{:x}", thread_idx);
205                     let thread_node = fdt.begin_node(&thread_name)?;
206                     let cpu_idx = threads_per_core * cores_per_package * cluster_idx
207                         + threads_per_core * core_idx
208                         + thread_idx;
209                     fdt.property_u32("cpu", cpu_idx as u32 + FIRST_VCPU_PHANDLE)?;
210                     fdt.end_node(thread_node)?;
211                 }
212 
213                 fdt.end_node(core_node)?;
214             }
215             fdt.end_node(cluster_node)?;
216         }
217         fdt.end_node(cpu_map_node)?;
218     } else {
219         debug!("Boot using device tree, CPU topology is not (correctly) specified");
220     }
221 
222     fdt.end_node(cpus_node)?;
223 
224     Ok(())
225 }
226 
227 fn create_memory_node(
228     fdt: &mut FdtWriter,
229     guest_mem: &GuestMemoryMmap,
230     numa_nodes: &NumaNodes,
231 ) -> FdtWriterResult<()> {
232     // See https://github.com/torvalds/linux/blob/58ae0b51506802713aa0e9956d1853ba4c722c98/Documentation/devicetree/bindings/numa.txt
233     // for NUMA setting in memory node.
234     if numa_nodes.len() > 1 {
235         for numa_node_idx in 0..numa_nodes.len() {
236             let numa_node = numa_nodes.get(&(numa_node_idx as u32));
237             let mut mem_reg_prop: Vec<u64> = Vec::new();
238             let mut node_memory_addr: u64 = 0;
239             // Each memory zone of numa will have its own memory node, but
240             // different numa nodes should not share same memory zones.
241             for memory_region in numa_node.unwrap().memory_regions.iter() {
242                 let memory_region_start_addr: u64 = memory_region.start_addr().raw_value();
243                 let memory_region_size: u64 = memory_region.size() as u64;
244                 // RAM at 0-4M is hidden to the guest for edk2
245                 if memory_region_start_addr == 0 {
246                     continue;
247                 }
248                 mem_reg_prop.push(memory_region_start_addr);
249                 mem_reg_prop.push(memory_region_size);
250                 // Set the node address the first non-zero regison address
251                 if node_memory_addr == 0 {
252                     node_memory_addr = memory_region_start_addr;
253                 }
254             }
255             let memory_node_name = format!("memory@{:x}", node_memory_addr);
256             let memory_node = fdt.begin_node(&memory_node_name)?;
257             fdt.property_string("device_type", "memory")?;
258             fdt.property_array_u64("reg", &mem_reg_prop)?;
259             fdt.property_u32("numa-node-id", numa_node_idx as u32)?;
260             fdt.end_node(memory_node)?;
261         }
262     } else {
263         let last_addr = guest_mem.last_addr().raw_value();
264         if last_addr < super::layout::MEM_32BIT_RESERVED_START.raw_value() {
265             // Case 1: all RAM is under the hole
266             let mem_size = last_addr - super::layout::RAM_START.raw_value() + 1;
267             let mem_reg_prop = [super::layout::RAM_START.raw_value() as u64, mem_size as u64];
268             let memory_node = fdt.begin_node("memory")?;
269             fdt.property_string("device_type", "memory")?;
270             fdt.property_array_u64("reg", &mem_reg_prop)?;
271             fdt.end_node(memory_node)?;
272         } else {
273             // Case 2: RAM is split by the hole
274             // Region 1: RAM before the hole
275             let mem_size = super::layout::MEM_32BIT_RESERVED_START.raw_value()
276                 - super::layout::RAM_START.raw_value();
277             let mem_reg_prop = [super::layout::RAM_START.raw_value() as u64, mem_size as u64];
278             let memory_node_name = format!("memory@{:x}", super::layout::RAM_START.raw_value());
279             let memory_node = fdt.begin_node(&memory_node_name)?;
280             fdt.property_string("device_type", "memory")?;
281             fdt.property_array_u64("reg", &mem_reg_prop)?;
282             fdt.end_node(memory_node)?;
283 
284             // Region 2: RAM after the hole
285             let mem_size = last_addr - super::layout::RAM_64BIT_START.raw_value() + 1;
286             let mem_reg_prop = [
287                 super::layout::RAM_64BIT_START.raw_value() as u64,
288                 mem_size as u64,
289             ];
290             let memory_node_name =
291                 format!("memory@{:x}", super::layout::RAM_64BIT_START.raw_value());
292             let memory_node = fdt.begin_node(&memory_node_name)?;
293             fdt.property_string("device_type", "memory")?;
294             fdt.property_array_u64("reg", &mem_reg_prop)?;
295             fdt.end_node(memory_node)?;
296         }
297     }
298 
299     Ok(())
300 }
301 
302 fn create_chosen_node(
303     fdt: &mut FdtWriter,
304     cmdline: &str,
305     initrd: &Option<InitramfsConfig>,
306 ) -> FdtWriterResult<()> {
307     let chosen_node = fdt.begin_node("chosen")?;
308     fdt.property_string("bootargs", cmdline)?;
309 
310     if let Some(initrd_config) = initrd {
311         let initrd_start = initrd_config.address.raw_value() as u64;
312         let initrd_end = initrd_config.address.raw_value() + initrd_config.size as u64;
313         fdt.property_u64("linux,initrd-start", initrd_start)?;
314         fdt.property_u64("linux,initrd-end", initrd_end)?;
315     }
316 
317     fdt.end_node(chosen_node)?;
318 
319     Ok(())
320 }
321 
322 fn create_gic_node(fdt: &mut FdtWriter, gic_device: &dyn GicDevice) -> FdtWriterResult<()> {
323     let gic_reg_prop = gic_device.device_properties();
324 
325     let intc_node = fdt.begin_node("intc")?;
326 
327     fdt.property_string("compatible", gic_device.fdt_compatibility())?;
328     fdt.property_null("interrupt-controller")?;
329     // "interrupt-cells" field specifies the number of cells needed to encode an
330     // interrupt source. The type shall be a <u32> and the value shall be 3 if no PPI affinity description
331     // is required.
332     fdt.property_u32("#interrupt-cells", 3)?;
333     fdt.property_array_u64("reg", gic_reg_prop)?;
334     fdt.property_u32("phandle", GIC_PHANDLE)?;
335     fdt.property_u32("#address-cells", 2)?;
336     fdt.property_u32("#size-cells", 2)?;
337     fdt.property_null("ranges")?;
338 
339     let gic_intr_prop = [
340         GIC_FDT_IRQ_TYPE_PPI,
341         gic_device.fdt_maint_irq(),
342         IRQ_TYPE_LEVEL_HI,
343     ];
344     fdt.property_array_u32("interrupts", &gic_intr_prop)?;
345 
346     if gic_device.msi_compatible() {
347         let msic_node = fdt.begin_node("msic")?;
348         fdt.property_string("compatible", gic_device.msi_compatibility())?;
349         fdt.property_null("msi-controller")?;
350         fdt.property_u32("phandle", MSI_PHANDLE)?;
351         let msi_reg_prop = gic_device.msi_properties();
352         fdt.property_array_u64("reg", msi_reg_prop)?;
353         fdt.end_node(msic_node)?;
354     }
355 
356     fdt.end_node(intc_node)?;
357 
358     Ok(())
359 }
360 
361 fn create_clock_node(fdt: &mut FdtWriter) -> FdtWriterResult<()> {
362     // The Advanced Peripheral Bus (APB) is part of the Advanced Microcontroller Bus Architecture
363     // (AMBA) protocol family. It defines a low-cost interface that is optimized for minimal power
364     // consumption and reduced interface complexity.
365     // PCLK is the clock source and this node defines exactly the clock for the APB.
366     let clock_node = fdt.begin_node("apb-pclk")?;
367     fdt.property_string("compatible", "fixed-clock")?;
368     fdt.property_u32("#clock-cells", 0x0)?;
369     fdt.property_u32("clock-frequency", 24000000)?;
370     fdt.property_string("clock-output-names", "clk24mhz")?;
371     fdt.property_u32("phandle", CLOCK_PHANDLE)?;
372     fdt.end_node(clock_node)?;
373 
374     Ok(())
375 }
376 
377 fn create_timer_node(fdt: &mut FdtWriter) -> FdtWriterResult<()> {
378     // See
379     // https://github.com/torvalds/linux/blob/master/Documentation/devicetree/bindings/interrupt-controller/arch_timer.txt
380     // These are fixed interrupt numbers for the timer device.
381     let irqs = [13, 14, 11, 10];
382     let compatible = "arm,armv8-timer";
383 
384     let mut timer_reg_cells: Vec<u32> = Vec::new();
385     for &irq in irqs.iter() {
386         timer_reg_cells.push(GIC_FDT_IRQ_TYPE_PPI);
387         timer_reg_cells.push(irq);
388         timer_reg_cells.push(IRQ_TYPE_LEVEL_HI);
389     }
390 
391     let timer_node = fdt.begin_node("timer")?;
392     fdt.property_string("compatible", compatible)?;
393     fdt.property_null("always-on")?;
394     fdt.property_array_u32("interrupts", &timer_reg_cells)?;
395     fdt.end_node(timer_node)?;
396 
397     Ok(())
398 }
399 
400 fn create_psci_node(fdt: &mut FdtWriter) -> FdtWriterResult<()> {
401     let compatible = "arm,psci-0.2";
402     let psci_node = fdt.begin_node("psci")?;
403     fdt.property_string("compatible", compatible)?;
404     // Two methods available: hvc and smc.
405     // As per documentation, PSCI calls between a guest and hypervisor may use the HVC conduit instead of SMC.
406     // So, since we are using kvm, we need to use hvc.
407     fdt.property_string("method", "hvc")?;
408     fdt.end_node(psci_node)?;
409 
410     Ok(())
411 }
412 
413 fn create_virtio_node<T: DeviceInfoForFdt + Clone + Debug>(
414     fdt: &mut FdtWriter,
415     dev_info: &T,
416 ) -> FdtWriterResult<()> {
417     let device_reg_prop = [dev_info.addr(), dev_info.length()];
418     let irq = [GIC_FDT_IRQ_TYPE_SPI, dev_info.irq(), IRQ_TYPE_EDGE_RISING];
419 
420     let virtio_node = fdt.begin_node(&format!("virtio_mmio@{:x}", dev_info.addr()))?;
421     fdt.property_string("compatible", "virtio,mmio")?;
422     fdt.property_array_u64("reg", &device_reg_prop)?;
423     fdt.property_array_u32("interrupts", &irq)?;
424     fdt.property_u32("interrupt-parent", GIC_PHANDLE)?;
425     fdt.end_node(virtio_node)?;
426 
427     Ok(())
428 }
429 
430 fn create_serial_node<T: DeviceInfoForFdt + Clone + Debug>(
431     fdt: &mut FdtWriter,
432     dev_info: &T,
433 ) -> FdtWriterResult<()> {
434     let compatible = b"arm,pl011\0arm,primecell\0";
435     let serial_reg_prop = [dev_info.addr(), dev_info.length()];
436     let irq = [
437         GIC_FDT_IRQ_TYPE_SPI,
438         dev_info.irq() - IRQ_BASE,
439         IRQ_TYPE_EDGE_RISING,
440     ];
441 
442     let serial_node = fdt.begin_node(&format!("pl011@{:x}", dev_info.addr()))?;
443     fdt.property("compatible", compatible)?;
444     fdt.property_array_u64("reg", &serial_reg_prop)?;
445     fdt.property_u32("clocks", CLOCK_PHANDLE)?;
446     fdt.property_string("clock-names", "apb_pclk")?;
447     fdt.property_array_u32("interrupts", &irq)?;
448     fdt.end_node(serial_node)?;
449 
450     Ok(())
451 }
452 
453 fn create_rtc_node<T: DeviceInfoForFdt + Clone + Debug>(
454     fdt: &mut FdtWriter,
455     dev_info: &T,
456 ) -> FdtWriterResult<()> {
457     let compatible = b"arm,pl031\0arm,primecell\0";
458     let rtc_reg_prop = [dev_info.addr(), dev_info.length()];
459     let irq = [
460         GIC_FDT_IRQ_TYPE_SPI,
461         dev_info.irq() - IRQ_BASE,
462         IRQ_TYPE_LEVEL_HI,
463     ];
464 
465     let rtc_node = fdt.begin_node(&format!("rtc@{:x}", dev_info.addr()))?;
466     fdt.property("compatible", compatible)?;
467     fdt.property_array_u64("reg", &rtc_reg_prop)?;
468     fdt.property_array_u32("interrupts", &irq)?;
469     fdt.property_u32("clocks", CLOCK_PHANDLE)?;
470     fdt.property_string("clock-names", "apb_pclk")?;
471     fdt.end_node(rtc_node)?;
472 
473     Ok(())
474 }
475 
476 fn create_gpio_node<T: DeviceInfoForFdt + Clone + Debug>(
477     fdt: &mut FdtWriter,
478     dev_info: &T,
479 ) -> FdtWriterResult<()> {
480     // PL061 GPIO controller node
481     let compatible = b"arm,pl061\0arm,primecell\0";
482     let gpio_reg_prop = [dev_info.addr(), dev_info.length()];
483     let irq = [
484         GIC_FDT_IRQ_TYPE_SPI,
485         dev_info.irq() - IRQ_BASE,
486         IRQ_TYPE_EDGE_RISING,
487     ];
488 
489     let gpio_node = fdt.begin_node(&format!("pl061@{:x}", dev_info.addr()))?;
490     fdt.property("compatible", compatible)?;
491     fdt.property_array_u64("reg", &gpio_reg_prop)?;
492     fdt.property_array_u32("interrupts", &irq)?;
493     fdt.property_null("gpio-controller")?;
494     fdt.property_u32("#gpio-cells", 2)?;
495     fdt.property_u32("clocks", CLOCK_PHANDLE)?;
496     fdt.property_string("clock-names", "apb_pclk")?;
497     fdt.property_u32("phandle", GPIO_PHANDLE)?;
498     fdt.end_node(gpio_node)?;
499 
500     // gpio-keys node
501     let gpio_keys_node = fdt.begin_node("gpio-keys")?;
502     fdt.property_string("compatible", "gpio-keys")?;
503     fdt.property_u32("#size-cells", 0)?;
504     fdt.property_u32("#address-cells", 1)?;
505     let gpio_keys_poweroff_node = fdt.begin_node("button@1")?;
506     fdt.property_string("label", "GPIO Key Poweroff")?;
507     fdt.property_u32("linux,code", KEY_POWER)?;
508     let gpios = [GPIO_PHANDLE, 3, 0];
509     fdt.property_array_u32("gpios", &gpios)?;
510     fdt.end_node(gpio_keys_poweroff_node)?;
511     fdt.end_node(gpio_keys_node)?;
512 
513     Ok(())
514 }
515 
516 fn create_devices_node<T: DeviceInfoForFdt + Clone + Debug, S: ::std::hash::BuildHasher>(
517     fdt: &mut FdtWriter,
518     dev_info: &HashMap<(DeviceType, String), T, S>,
519 ) -> FdtWriterResult<()> {
520     // Create one temp Vec to store all virtio devices
521     let mut ordered_virtio_device: Vec<&T> = Vec::new();
522 
523     for ((device_type, _device_id), info) in dev_info {
524         match device_type {
525             DeviceType::Gpio => create_gpio_node(fdt, info)?,
526             DeviceType::Rtc => create_rtc_node(fdt, info)?,
527             DeviceType::Serial => create_serial_node(fdt, info)?,
528             DeviceType::Virtio(_) => {
529                 ordered_virtio_device.push(info);
530             }
531         }
532     }
533 
534     // Sort out virtio devices by address from low to high and insert them into fdt table.
535     ordered_virtio_device.sort_by_key(|&a| a.addr());
536     // Current address allocation strategy in cloud-hypervisor is: the first created device
537     // will be allocated to higher address. Here we reverse the vector to make sure that
538     // the older created device will appear in front of the newer created device in FDT.
539     ordered_virtio_device.reverse();
540     for ordered_device_info in ordered_virtio_device.drain(..) {
541         create_virtio_node(fdt, ordered_device_info)?;
542     }
543 
544     Ok(())
545 }
546 
547 fn create_pmu_node(fdt: &mut FdtWriter, cpu_nums: usize) -> FdtWriterResult<()> {
548     let num_cpus = cpu_nums as u64 as u32;
549     let compatible = "arm,armv8-pmuv3";
550     let cpu_mask: u32 =
551         (((1 << num_cpus) - 1) << GIC_FDT_IRQ_PPI_CPU_SHIFT) & GIC_FDT_IRQ_PPI_CPU_MASK;
552     let irq = [
553         GIC_FDT_IRQ_TYPE_PPI,
554         AARCH64_PMU_IRQ,
555         cpu_mask | IRQ_TYPE_LEVEL_HI,
556     ];
557 
558     let pmu_node = fdt.begin_node("pmu")?;
559     fdt.property_string("compatible", compatible)?;
560     fdt.property_array_u32("interrupts", &irq)?;
561     fdt.end_node(pmu_node)?;
562     Ok(())
563 }
564 
565 fn create_pci_nodes(
566     fdt: &mut FdtWriter,
567     pci_device_info: &[PciSpaceInfo],
568     virtio_iommu_bdf: Option<u32>,
569 ) -> FdtWriterResult<()> {
570     // Add node for PCIe controller.
571     // See Documentation/devicetree/bindings/pci/host-generic-pci.txt in the kernel
572     // and https://elinux.org/Device_Tree_Usage.
573     // In multiple PCI segments setup, each PCI segment needs a PCI node.
574     for pci_device_info_elem in pci_device_info.iter() {
575         // EDK2 requires the PCIe high space above 4G address.
576         // The actual space in CLH follows the RAM. If the RAM space is small, the PCIe high space
577         // could fall bellow 4G.
578         // Here we cut off PCI device space below 8G in FDT to workaround the EDK2 check.
579         // But the address written in ACPI is not impacted.
580         let (pci_device_base_64bit, pci_device_size_64bit) =
581             if pci_device_info_elem.pci_device_space_start < PCI_HIGH_BASE.raw_value() {
582                 (
583                     PCI_HIGH_BASE.raw_value(),
584                     pci_device_info_elem.pci_device_space_size
585                         - (PCI_HIGH_BASE.raw_value() - pci_device_info_elem.pci_device_space_start),
586                 )
587             } else {
588                 (
589                     pci_device_info_elem.pci_device_space_start,
590                     pci_device_info_elem.pci_device_space_size,
591                 )
592             };
593         // There is no specific requirement of the 32bit MMIO range, and
594         // therefore at least we can make these ranges 4K aligned.
595         let pci_device_size_32bit: u64 =
596             MEM_32BIT_DEVICES_SIZE / ((1 << 12) * pci_device_info.len() as u64) * (1 << 12);
597         let pci_device_base_32bit: u64 = MEM_32BIT_DEVICES_START.0
598             + pci_device_size_32bit * pci_device_info_elem.pci_segment_id as u64;
599 
600         let ranges = [
601             // io addresses. Since AArch64 will not use IO address,
602             // we can set the same IO address range for every segment.
603             0x1000000,
604             0_u32,
605             0_u32,
606             (MEM_PCI_IO_START.0 >> 32) as u32,
607             MEM_PCI_IO_START.0 as u32,
608             (MEM_PCI_IO_SIZE >> 32) as u32,
609             MEM_PCI_IO_SIZE as u32,
610             // mmio addresses
611             0x2000000,                            // (ss = 10: 32-bit memory space)
612             (pci_device_base_32bit >> 32) as u32, // PCI address
613             pci_device_base_32bit as u32,
614             (pci_device_base_32bit >> 32) as u32, // CPU address
615             pci_device_base_32bit as u32,
616             (pci_device_size_32bit >> 32) as u32, // size
617             pci_device_size_32bit as u32,
618             // device addresses
619             0x3000000,                            // (ss = 11: 64-bit memory space)
620             (pci_device_base_64bit >> 32) as u32, // PCI address
621             pci_device_base_64bit as u32,
622             (pci_device_base_64bit >> 32) as u32, // CPU address
623             pci_device_base_64bit as u32,
624             (pci_device_size_64bit >> 32) as u32, // size
625             pci_device_size_64bit as u32,
626         ];
627         let bus_range = [0, 0]; // Only bus 0
628         let reg = [
629             pci_device_info_elem.mmio_config_address,
630             PCI_MMIO_CONFIG_SIZE_PER_SEGMENT,
631         ];
632         // See kernel document Documentation/devicetree/bindings/pci/pci-msi.txt
633         let msi_map = [
634             // rid-base: A single cell describing the first RID matched by the entry.
635             0x0,
636             // msi-controller: A single phandle to an MSI controller.
637             MSI_PHANDLE,
638             // msi-base: An msi-specifier describing the msi-specifier produced for the
639             // first RID matched by the entry.
640             (pci_device_info_elem.pci_segment_id as u32) << 8,
641             // length: A single cell describing how many consecutive RIDs are matched
642             // following the rid-base.
643             0x100,
644         ];
645 
646         let pci_node_name = format!("pci@{:x}", pci_device_info_elem.mmio_config_address);
647         let pci_node = fdt.begin_node(&pci_node_name)?;
648 
649         fdt.property_string("compatible", "pci-host-ecam-generic")?;
650         fdt.property_string("device_type", "pci")?;
651         fdt.property_array_u32("ranges", &ranges)?;
652         fdt.property_array_u32("bus-range", &bus_range)?;
653         fdt.property_u32(
654             "linux,pci-domain",
655             pci_device_info_elem.pci_segment_id as u32,
656         )?;
657         fdt.property_u32("#address-cells", 3)?;
658         fdt.property_u32("#size-cells", 2)?;
659         fdt.property_array_u64("reg", &reg)?;
660         fdt.property_u32("#interrupt-cells", 1)?;
661         fdt.property_null("interrupt-map")?;
662         fdt.property_null("interrupt-map-mask")?;
663         fdt.property_null("dma-coherent")?;
664         fdt.property_array_u32("msi-map", &msi_map)?;
665         fdt.property_u32("msi-parent", MSI_PHANDLE)?;
666 
667         if pci_device_info_elem.pci_segment_id == 0 {
668             if let Some(virtio_iommu_bdf) = virtio_iommu_bdf {
669                 // See kernel document Documentation/devicetree/bindings/pci/pci-iommu.txt
670                 // for 'iommu-map' attribute setting.
671                 let iommu_map = [
672                     0_u32,
673                     VIRTIO_IOMMU_PHANDLE,
674                     0_u32,
675                     virtio_iommu_bdf,
676                     virtio_iommu_bdf + 1,
677                     VIRTIO_IOMMU_PHANDLE,
678                     virtio_iommu_bdf + 1,
679                     0xffff - virtio_iommu_bdf,
680                 ];
681                 fdt.property_array_u32("iommu-map", &iommu_map)?;
682 
683                 // See kernel document Documentation/devicetree/bindings/virtio/iommu.txt
684                 // for virtio-iommu node settings.
685                 let virtio_iommu_node_name = format!("virtio_iommu@{:x}", virtio_iommu_bdf);
686                 let virtio_iommu_node = fdt.begin_node(&virtio_iommu_node_name)?;
687                 fdt.property_u32("#iommu-cells", 1)?;
688                 fdt.property_string("compatible", "virtio,pci-iommu")?;
689 
690                 // 'reg' is a five-cell address encoded as
691                 // (phys.hi phys.mid phys.lo size.hi size.lo). phys.hi should contain the
692                 // device's BDF as 0b00000000 bbbbbbbb dddddfff 00000000. The other cells
693                 // should be zero.
694                 let reg = [virtio_iommu_bdf << 8, 0_u32, 0_u32, 0_u32, 0_u32];
695                 fdt.property_array_u32("reg", &reg)?;
696                 fdt.property_u32("phandle", VIRTIO_IOMMU_PHANDLE)?;
697 
698                 fdt.end_node(virtio_iommu_node)?;
699             }
700         }
701 
702         fdt.end_node(pci_node)?;
703     }
704 
705     Ok(())
706 }
707 
708 fn create_distance_map_node(fdt: &mut FdtWriter, numa_nodes: &NumaNodes) -> FdtWriterResult<()> {
709     let distance_map_node = fdt.begin_node("distance-map")?;
710     fdt.property_string("compatible", "numa-distance-map-v1")?;
711     // Construct the distance matrix.
712     // 1. We use the word entry to describe a distance from a node to
713     // its destination, e.g. 0 -> 1 = 20 is described as <0 1 20>.
714     // 2. Each entry represents distance from first node to second node.
715     // The distances are equal in either direction.
716     // 3. The distance from a node to self (local distance) is represented
717     // with value 10 and all internode distance should be represented with
718     // a value greater than 10.
719     // 4. distance-matrix should have entries in lexicographical ascending
720     // order of nodes.
721     let mut distance_matrix = Vec::new();
722     for numa_node_idx in 0..numa_nodes.len() {
723         let numa_node = numa_nodes.get(&(numa_node_idx as u32));
724         for dest_numa_node in 0..numa_node.unwrap().distances.len() + 1 {
725             if numa_node_idx == dest_numa_node {
726                 distance_matrix.push(numa_node_idx as u32);
727                 distance_matrix.push(dest_numa_node as u32);
728                 distance_matrix.push(10_u32);
729                 continue;
730             }
731 
732             distance_matrix.push(numa_node_idx as u32);
733             distance_matrix.push(dest_numa_node as u32);
734             distance_matrix.push(
735                 *numa_node
736                     .unwrap()
737                     .distances
738                     .get(&(dest_numa_node as u32))
739                     .unwrap() as u32,
740             );
741         }
742     }
743     fdt.property_array_u32("distance-matrix", distance_matrix.as_ref())?;
744     fdt.end_node(distance_map_node)?;
745 
746     Ok(())
747 }
748 
749 // Parse the DTB binary and print for debugging
750 pub fn print_fdt(dtb: &[u8]) {
751     match fdt_parser::Fdt::new(dtb) {
752         Ok(fdt) => {
753             if let Some(root) = fdt.find_node("/") {
754                 debug!("Printing the FDT:");
755                 print_node(root, 0);
756             } else {
757                 debug!("Failed to find root node in FDT for debugging.");
758             }
759         }
760         Err(_) => debug!("Failed to parse FDT for debugging."),
761     }
762 }
763 
764 fn print_node(node: fdt_parser::node::FdtNode<'_, '_>, n_spaces: usize) {
765     debug!("{:indent$}{}/", "", node.name, indent = n_spaces);
766     for property in node.properties() {
767         let name = property.name;
768 
769         // If the property is 'compatible', its value requires special handling.
770         // The u8 array could contain multiple null-terminated strings.
771         // We copy the original array and simply replace all 'null' characters with spaces.
772         let value = if name == "compatible" {
773             let mut compatible = vec![0u8; 256];
774             let handled_value = property
775                 .value
776                 .iter()
777                 .map(|&c| if c == 0 { b' ' } else { c })
778                 .collect::<Vec<_>>();
779             let len = cmp::min(255, handled_value.len());
780             compatible[..len].copy_from_slice(&handled_value[..len]);
781             compatible[..(len + 1)].to_vec()
782         } else {
783             property.value.to_vec()
784         };
785         let value = &value;
786 
787         // Now the value can be either:
788         //   - A null-terminated C string, or
789         //   - Binary data
790         // We follow a very simple logic to present the value:
791         //   - At first, try to convert it to CStr and print,
792         //   - If failed, print it as u32 array.
793         let value_result = match CStr::from_bytes_with_nul(value) {
794             Ok(value_cstr) => match value_cstr.to_str() {
795                 Ok(value_str) => Some(value_str),
796                 Err(_e) => None,
797             },
798             Err(_e) => None,
799         };
800 
801         if let Some(value_str) = value_result {
802             debug!(
803                 "{:indent$}{} : {:#?}",
804                 "",
805                 name,
806                 value_str,
807                 indent = (n_spaces + 2)
808             );
809         } else {
810             let mut array = Vec::with_capacity(256);
811             array.resize(value.len() / 4, 0u32);
812             BigEndian::read_u32_into(value, &mut array);
813             debug!(
814                 "{:indent$}{} : {:X?}",
815                 "",
816                 name,
817                 array,
818                 indent = (n_spaces + 2)
819             );
820         };
821     }
822 
823     // Print children nodes if there is any
824     for child in node.children() {
825         print_node(child, n_spaces + 2);
826     }
827 }
828