xref: /cloud-hypervisor/arch/src/aarch64/fdt.rs (revision 6f8bd27cf7629733582d930519e98d19e90afb16)
1 // Copyright 2020 Arm Limited (or its affiliates). All rights reserved.
2 // Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3 // SPDX-License-Identifier: Apache-2.0
4 //
5 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
6 // Use of this source code is governed by a BSD-style license that can be
7 // found in the THIRD-PARTY file.
8 
9 use crate::{NumaNodes, PciSpaceInfo};
10 use byteorder::{BigEndian, ByteOrder};
11 use hypervisor::arch::aarch64::gic::Vgic;
12 use std::cmp;
13 use std::collections::HashMap;
14 use std::ffi::CStr;
15 use std::fmt::Debug;
16 use std::result;
17 use std::str;
18 use std::sync::{Arc, Mutex};
19 
20 use super::super::DeviceType;
21 use super::super::GuestMemoryMmap;
22 use super::super::InitramfsConfig;
23 use super::layout::{
24     IRQ_BASE, MEM_32BIT_DEVICES_SIZE, MEM_32BIT_DEVICES_START, MEM_PCI_IO_SIZE, MEM_PCI_IO_START,
25     PCI_HIGH_BASE, PCI_MMIO_CONFIG_SIZE_PER_SEGMENT,
26 };
27 use vm_fdt::{FdtWriter, FdtWriterResult};
28 use vm_memory::{Address, Bytes, GuestMemory, GuestMemoryError, GuestMemoryRegion};
29 
30 // This is a value for uniquely identifying the FDT node declaring the interrupt controller.
31 const GIC_PHANDLE: u32 = 1;
32 // This is a value for uniquely identifying the FDT node declaring the MSI controller.
33 const MSI_PHANDLE: u32 = 2;
34 // This is a value for uniquely identifying the FDT node containing the clock definition.
35 const CLOCK_PHANDLE: u32 = 3;
36 // This is a value for uniquely identifying the FDT node containing the gpio controller.
37 const GPIO_PHANDLE: u32 = 4;
38 // This is a value for virtio-iommu. Now only one virtio-iommu device is supported.
39 const VIRTIO_IOMMU_PHANDLE: u32 = 5;
40 // NOTE: Keep FIRST_VCPU_PHANDLE the last PHANDLE defined.
41 // This is a value for uniquely identifying the FDT node containing the first vCPU.
42 // The last number of vCPU phandle depends on the number of vCPUs.
43 const FIRST_VCPU_PHANDLE: u32 = 6;
44 
45 // Read the documentation specified when appending the root node to the FDT.
46 const ADDRESS_CELLS: u32 = 0x2;
47 const SIZE_CELLS: u32 = 0x2;
48 
49 // As per kvm tool and
50 // https://www.kernel.org/doc/Documentation/devicetree/bindings/interrupt-controller/arm%2Cgic.txt
51 // Look for "The 1st cell..."
52 const GIC_FDT_IRQ_TYPE_SPI: u32 = 0;
53 const GIC_FDT_IRQ_TYPE_PPI: u32 = 1;
54 
55 // From https://elixir.bootlin.com/linux/v4.9.62/source/include/dt-bindings/interrupt-controller/irq.h#L17
56 const IRQ_TYPE_EDGE_RISING: u32 = 1;
57 const IRQ_TYPE_LEVEL_HI: u32 = 4;
58 
59 // PMU PPI interrupt number
60 pub const AARCH64_PMU_IRQ: u32 = 7;
61 
62 // Keys and Buttons
63 // System Power Down
64 const KEY_POWER: u32 = 116;
65 
66 /// Trait for devices to be added to the Flattened Device Tree.
67 pub trait DeviceInfoForFdt {
68     /// Returns the address where this device will be loaded.
69     fn addr(&self) -> u64;
70     /// Returns the associated interrupt for this device.
71     fn irq(&self) -> u32;
72     /// Returns the amount of memory that needs to be reserved for this device.
73     fn length(&self) -> u64;
74 }
75 
76 /// Errors thrown while configuring the Flattened Device Tree for aarch64.
77 #[derive(Debug)]
78 pub enum Error {
79     /// Failure in writing FDT in memory.
80     WriteFdtToMemory(GuestMemoryError),
81 }
82 type Result<T> = result::Result<T, Error>;
83 
84 /// Creates the flattened device tree for this aarch64 VM.
85 #[allow(clippy::too_many_arguments)]
86 pub fn create_fdt<T: DeviceInfoForFdt + Clone + Debug, S: ::std::hash::BuildHasher>(
87     guest_mem: &GuestMemoryMmap,
88     cmdline: &str,
89     vcpu_mpidr: Vec<u64>,
90     vcpu_topology: Option<(u8, u8, u8)>,
91     device_info: &HashMap<(DeviceType, String), T, S>,
92     gic_device: &Arc<Mutex<dyn Vgic>>,
93     initrd: &Option<InitramfsConfig>,
94     pci_space_info: &[PciSpaceInfo],
95     numa_nodes: &NumaNodes,
96     virtio_iommu_bdf: Option<u32>,
97     pmu_supported: bool,
98 ) -> FdtWriterResult<Vec<u8>> {
99     // Allocate stuff necessary for the holding the blob.
100     let mut fdt = FdtWriter::new().unwrap();
101 
102     // For an explanation why these nodes were introduced in the blob take a look at
103     // https://github.com/torvalds/linux/blob/master/Documentation/devicetree/booting-without-of.txt#L845
104     // Look for "Required nodes and properties".
105 
106     // Header or the root node as per above mentioned documentation.
107     let root_node = fdt.begin_node("")?;
108     fdt.property_string("compatible", "linux,dummy-virt")?;
109     // For info on #address-cells and size-cells read "Note about cells and address representation"
110     // from the above mentioned txt file.
111     fdt.property_u32("#address-cells", ADDRESS_CELLS)?;
112     fdt.property_u32("#size-cells", SIZE_CELLS)?;
113     // This is not mandatory but we use it to point the root node to the node
114     // containing description of the interrupt controller for this VM.
115     fdt.property_u32("interrupt-parent", GIC_PHANDLE)?;
116     create_cpu_nodes(&mut fdt, &vcpu_mpidr, vcpu_topology, numa_nodes)?;
117     create_memory_node(&mut fdt, guest_mem, numa_nodes)?;
118     create_chosen_node(&mut fdt, cmdline, initrd)?;
119     create_gic_node(&mut fdt, gic_device)?;
120     create_timer_node(&mut fdt)?;
121     if pmu_supported {
122         create_pmu_node(&mut fdt)?;
123     }
124     create_clock_node(&mut fdt)?;
125     create_psci_node(&mut fdt)?;
126     create_devices_node(&mut fdt, device_info)?;
127     create_pci_nodes(&mut fdt, pci_space_info, virtio_iommu_bdf)?;
128     if numa_nodes.len() > 1 {
129         create_distance_map_node(&mut fdt, numa_nodes)?;
130     }
131 
132     // End Header node.
133     fdt.end_node(root_node)?;
134 
135     let fdt_final = fdt.finish()?;
136 
137     Ok(fdt_final)
138 }
139 
140 pub fn write_fdt_to_memory(fdt_final: Vec<u8>, guest_mem: &GuestMemoryMmap) -> Result<()> {
141     // Write FDT to memory.
142     guest_mem
143         .write_slice(fdt_final.as_slice(), super::layout::FDT_START)
144         .map_err(Error::WriteFdtToMemory)?;
145     Ok(())
146 }
147 
148 // Following are the auxiliary function for creating the different nodes that we append to our FDT.
149 fn create_cpu_nodes(
150     fdt: &mut FdtWriter,
151     vcpu_mpidr: &[u64],
152     vcpu_topology: Option<(u8, u8, u8)>,
153     numa_nodes: &NumaNodes,
154 ) -> FdtWriterResult<()> {
155     // See https://github.com/torvalds/linux/blob/master/Documentation/devicetree/bindings/arm/cpus.yaml.
156     let cpus_node = fdt.begin_node("cpus")?;
157     fdt.property_u32("#address-cells", 0x1)?;
158     fdt.property_u32("#size-cells", 0x0)?;
159 
160     let num_cpus = vcpu_mpidr.len();
161 
162     for (cpu_id, mpidr) in vcpu_mpidr.iter().enumerate().take(num_cpus) {
163         let cpu_name = format!("cpu@{:x}", cpu_id);
164         let cpu_node = fdt.begin_node(&cpu_name)?;
165         fdt.property_string("device_type", "cpu")?;
166         fdt.property_string("compatible", "arm,arm-v8")?;
167         if num_cpus > 1 {
168             // This is required on armv8 64-bit. See aforementioned documentation.
169             fdt.property_string("enable-method", "psci")?;
170         }
171         // Set the field to first 24 bits of the MPIDR - Multiprocessor Affinity Register.
172         // See http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0488c/BABHBJCI.html.
173         fdt.property_u32("reg", (mpidr & 0x7FFFFF) as u32)?;
174         fdt.property_u32("phandle", cpu_id as u32 + FIRST_VCPU_PHANDLE)?;
175 
176         // Add `numa-node-id` property if there is any numa config.
177         if numa_nodes.len() > 1 {
178             for numa_node_idx in 0..numa_nodes.len() {
179                 let numa_node = numa_nodes.get(&(numa_node_idx as u32));
180                 if numa_node.unwrap().cpus.contains(&(cpu_id as u8)) {
181                     fdt.property_u32("numa-node-id", numa_node_idx as u32)?;
182                 }
183             }
184         }
185 
186         fdt.end_node(cpu_node)?;
187     }
188 
189     if let Some(topology) = vcpu_topology {
190         let (threads_per_core, cores_per_package, packages) = topology;
191         let cpu_map_node = fdt.begin_node("cpu-map")?;
192 
193         // Create device tree nodes with regard of above mapping.
194         for cluster_idx in 0..packages {
195             let cluster_name = format!("cluster{:x}", cluster_idx);
196             let cluster_node = fdt.begin_node(&cluster_name)?;
197 
198             for core_idx in 0..cores_per_package {
199                 let core_name = format!("core{:x}", core_idx);
200                 let core_node = fdt.begin_node(&core_name)?;
201 
202                 for thread_idx in 0..threads_per_core {
203                     let thread_name = format!("thread{:x}", thread_idx);
204                     let thread_node = fdt.begin_node(&thread_name)?;
205                     let cpu_idx = threads_per_core * cores_per_package * cluster_idx
206                         + threads_per_core * core_idx
207                         + thread_idx;
208                     fdt.property_u32("cpu", cpu_idx as u32 + FIRST_VCPU_PHANDLE)?;
209                     fdt.end_node(thread_node)?;
210                 }
211 
212                 fdt.end_node(core_node)?;
213             }
214             fdt.end_node(cluster_node)?;
215         }
216         fdt.end_node(cpu_map_node)?;
217     } else {
218         debug!("Boot using device tree, CPU topology is not (correctly) specified");
219     }
220 
221     fdt.end_node(cpus_node)?;
222 
223     Ok(())
224 }
225 
226 fn create_memory_node(
227     fdt: &mut FdtWriter,
228     guest_mem: &GuestMemoryMmap,
229     numa_nodes: &NumaNodes,
230 ) -> FdtWriterResult<()> {
231     // See https://github.com/torvalds/linux/blob/58ae0b51506802713aa0e9956d1853ba4c722c98/Documentation/devicetree/bindings/numa.txt
232     // for NUMA setting in memory node.
233     if numa_nodes.len() > 1 {
234         for numa_node_idx in 0..numa_nodes.len() {
235             let numa_node = numa_nodes.get(&(numa_node_idx as u32));
236             let mut mem_reg_prop: Vec<u64> = Vec::new();
237             let mut node_memory_addr: u64 = 0;
238             // Each memory zone of numa will have its own memory node, but
239             // different numa nodes should not share same memory zones.
240             for memory_region in numa_node.unwrap().memory_regions.iter() {
241                 let memory_region_start_addr: u64 = memory_region.start_addr().raw_value();
242                 let memory_region_size: u64 = memory_region.size() as u64;
243                 mem_reg_prop.push(memory_region_start_addr);
244                 mem_reg_prop.push(memory_region_size);
245                 // Set the node address the first non-zero regison address
246                 if node_memory_addr == 0 {
247                     node_memory_addr = memory_region_start_addr;
248                 }
249             }
250             let memory_node_name = format!("memory@{:x}", node_memory_addr);
251             let memory_node = fdt.begin_node(&memory_node_name)?;
252             fdt.property_string("device_type", "memory")?;
253             fdt.property_array_u64("reg", &mem_reg_prop)?;
254             fdt.property_u32("numa-node-id", numa_node_idx as u32)?;
255             fdt.end_node(memory_node)?;
256         }
257     } else {
258         let last_addr = guest_mem.last_addr().raw_value();
259         if last_addr < super::layout::MEM_32BIT_RESERVED_START.raw_value() {
260             // Case 1: all RAM is under the hole
261             let mem_size = last_addr - super::layout::RAM_START.raw_value() + 1;
262             let mem_reg_prop = [super::layout::RAM_START.raw_value() as u64, mem_size as u64];
263             let memory_node = fdt.begin_node("memory")?;
264             fdt.property_string("device_type", "memory")?;
265             fdt.property_array_u64("reg", &mem_reg_prop)?;
266             fdt.end_node(memory_node)?;
267         } else {
268             // Case 2: RAM is split by the hole
269             // Region 1: RAM before the hole
270             let mem_size = super::layout::MEM_32BIT_RESERVED_START.raw_value()
271                 - super::layout::RAM_START.raw_value();
272             let mem_reg_prop = [super::layout::RAM_START.raw_value() as u64, mem_size as u64];
273             let memory_node_name = format!("memory@{:x}", super::layout::RAM_START.raw_value());
274             let memory_node = fdt.begin_node(&memory_node_name)?;
275             fdt.property_string("device_type", "memory")?;
276             fdt.property_array_u64("reg", &mem_reg_prop)?;
277             fdt.end_node(memory_node)?;
278 
279             // Region 2: RAM after the hole
280             let mem_size = last_addr - super::layout::RAM_64BIT_START.raw_value() + 1;
281             let mem_reg_prop = [
282                 super::layout::RAM_64BIT_START.raw_value() as u64,
283                 mem_size as u64,
284             ];
285             let memory_node_name =
286                 format!("memory@{:x}", super::layout::RAM_64BIT_START.raw_value());
287             let memory_node = fdt.begin_node(&memory_node_name)?;
288             fdt.property_string("device_type", "memory")?;
289             fdt.property_array_u64("reg", &mem_reg_prop)?;
290             fdt.end_node(memory_node)?;
291         }
292     }
293 
294     Ok(())
295 }
296 
297 fn create_chosen_node(
298     fdt: &mut FdtWriter,
299     cmdline: &str,
300     initrd: &Option<InitramfsConfig>,
301 ) -> FdtWriterResult<()> {
302     let chosen_node = fdt.begin_node("chosen")?;
303     fdt.property_string("bootargs", cmdline)?;
304 
305     if let Some(initrd_config) = initrd {
306         let initrd_start = initrd_config.address.raw_value() as u64;
307         let initrd_end = initrd_config.address.raw_value() + initrd_config.size as u64;
308         fdt.property_u64("linux,initrd-start", initrd_start)?;
309         fdt.property_u64("linux,initrd-end", initrd_end)?;
310     }
311 
312     fdt.end_node(chosen_node)?;
313 
314     Ok(())
315 }
316 
317 fn create_gic_node(fdt: &mut FdtWriter, gic_device: &Arc<Mutex<dyn Vgic>>) -> FdtWriterResult<()> {
318     let gic_reg_prop = gic_device.lock().unwrap().device_properties();
319 
320     let intc_node = fdt.begin_node("intc")?;
321 
322     fdt.property_string("compatible", gic_device.lock().unwrap().fdt_compatibility())?;
323     fdt.property_null("interrupt-controller")?;
324     // "interrupt-cells" field specifies the number of cells needed to encode an
325     // interrupt source. The type shall be a <u32> and the value shall be 3 if no PPI affinity description
326     // is required.
327     fdt.property_u32("#interrupt-cells", 3)?;
328     fdt.property_array_u64("reg", &gic_reg_prop)?;
329     fdt.property_u32("phandle", GIC_PHANDLE)?;
330     fdt.property_u32("#address-cells", 2)?;
331     fdt.property_u32("#size-cells", 2)?;
332     fdt.property_null("ranges")?;
333 
334     let gic_intr_prop = [
335         GIC_FDT_IRQ_TYPE_PPI,
336         gic_device.lock().unwrap().fdt_maint_irq(),
337         IRQ_TYPE_LEVEL_HI,
338     ];
339     fdt.property_array_u32("interrupts", &gic_intr_prop)?;
340 
341     if gic_device.lock().unwrap().msi_compatible() {
342         let msic_node = fdt.begin_node("msic")?;
343         fdt.property_string("compatible", gic_device.lock().unwrap().msi_compatibility())?;
344         fdt.property_null("msi-controller")?;
345         fdt.property_u32("phandle", MSI_PHANDLE)?;
346         let msi_reg_prop = gic_device.lock().unwrap().msi_properties();
347         fdt.property_array_u64("reg", &msi_reg_prop)?;
348         fdt.end_node(msic_node)?;
349     }
350 
351     fdt.end_node(intc_node)?;
352 
353     Ok(())
354 }
355 
356 fn create_clock_node(fdt: &mut FdtWriter) -> FdtWriterResult<()> {
357     // The Advanced Peripheral Bus (APB) is part of the Advanced Microcontroller Bus Architecture
358     // (AMBA) protocol family. It defines a low-cost interface that is optimized for minimal power
359     // consumption and reduced interface complexity.
360     // PCLK is the clock source and this node defines exactly the clock for the APB.
361     let clock_node = fdt.begin_node("apb-pclk")?;
362     fdt.property_string("compatible", "fixed-clock")?;
363     fdt.property_u32("#clock-cells", 0x0)?;
364     fdt.property_u32("clock-frequency", 24000000)?;
365     fdt.property_string("clock-output-names", "clk24mhz")?;
366     fdt.property_u32("phandle", CLOCK_PHANDLE)?;
367     fdt.end_node(clock_node)?;
368 
369     Ok(())
370 }
371 
372 fn create_timer_node(fdt: &mut FdtWriter) -> FdtWriterResult<()> {
373     // See
374     // https://github.com/torvalds/linux/blob/master/Documentation/devicetree/bindings/interrupt-controller/arch_timer.txt
375     // These are fixed interrupt numbers for the timer device.
376     let irqs = [13, 14, 11, 10];
377     let compatible = "arm,armv8-timer";
378 
379     let mut timer_reg_cells: Vec<u32> = Vec::new();
380     for &irq in irqs.iter() {
381         timer_reg_cells.push(GIC_FDT_IRQ_TYPE_PPI);
382         timer_reg_cells.push(irq);
383         timer_reg_cells.push(IRQ_TYPE_LEVEL_HI);
384     }
385 
386     let timer_node = fdt.begin_node("timer")?;
387     fdt.property_string("compatible", compatible)?;
388     fdt.property_null("always-on")?;
389     fdt.property_array_u32("interrupts", &timer_reg_cells)?;
390     fdt.end_node(timer_node)?;
391 
392     Ok(())
393 }
394 
395 fn create_psci_node(fdt: &mut FdtWriter) -> FdtWriterResult<()> {
396     let compatible = "arm,psci-0.2";
397     let psci_node = fdt.begin_node("psci")?;
398     fdt.property_string("compatible", compatible)?;
399     // Two methods available: hvc and smc.
400     // As per documentation, PSCI calls between a guest and hypervisor may use the HVC conduit instead of SMC.
401     // So, since we are using kvm, we need to use hvc.
402     fdt.property_string("method", "hvc")?;
403     fdt.end_node(psci_node)?;
404 
405     Ok(())
406 }
407 
408 fn create_virtio_node<T: DeviceInfoForFdt + Clone + Debug>(
409     fdt: &mut FdtWriter,
410     dev_info: &T,
411 ) -> FdtWriterResult<()> {
412     let device_reg_prop = [dev_info.addr(), dev_info.length()];
413     let irq = [GIC_FDT_IRQ_TYPE_SPI, dev_info.irq(), IRQ_TYPE_EDGE_RISING];
414 
415     let virtio_node = fdt.begin_node(&format!("virtio_mmio@{:x}", dev_info.addr()))?;
416     fdt.property_string("compatible", "virtio,mmio")?;
417     fdt.property_array_u64("reg", &device_reg_prop)?;
418     fdt.property_array_u32("interrupts", &irq)?;
419     fdt.property_u32("interrupt-parent", GIC_PHANDLE)?;
420     fdt.end_node(virtio_node)?;
421 
422     Ok(())
423 }
424 
425 fn create_serial_node<T: DeviceInfoForFdt + Clone + Debug>(
426     fdt: &mut FdtWriter,
427     dev_info: &T,
428 ) -> FdtWriterResult<()> {
429     let compatible = b"arm,pl011\0arm,primecell\0";
430     let serial_reg_prop = [dev_info.addr(), dev_info.length()];
431     let irq = [
432         GIC_FDT_IRQ_TYPE_SPI,
433         dev_info.irq() - IRQ_BASE,
434         IRQ_TYPE_EDGE_RISING,
435     ];
436 
437     let serial_node = fdt.begin_node(&format!("pl011@{:x}", dev_info.addr()))?;
438     fdt.property("compatible", compatible)?;
439     fdt.property_array_u64("reg", &serial_reg_prop)?;
440     fdt.property_u32("clocks", CLOCK_PHANDLE)?;
441     fdt.property_string("clock-names", "apb_pclk")?;
442     fdt.property_array_u32("interrupts", &irq)?;
443     fdt.end_node(serial_node)?;
444 
445     Ok(())
446 }
447 
448 fn create_rtc_node<T: DeviceInfoForFdt + Clone + Debug>(
449     fdt: &mut FdtWriter,
450     dev_info: &T,
451 ) -> FdtWriterResult<()> {
452     let compatible = b"arm,pl031\0arm,primecell\0";
453     let rtc_reg_prop = [dev_info.addr(), dev_info.length()];
454     let irq = [
455         GIC_FDT_IRQ_TYPE_SPI,
456         dev_info.irq() - IRQ_BASE,
457         IRQ_TYPE_LEVEL_HI,
458     ];
459 
460     let rtc_node = fdt.begin_node(&format!("rtc@{:x}", dev_info.addr()))?;
461     fdt.property("compatible", compatible)?;
462     fdt.property_array_u64("reg", &rtc_reg_prop)?;
463     fdt.property_array_u32("interrupts", &irq)?;
464     fdt.property_u32("clocks", CLOCK_PHANDLE)?;
465     fdt.property_string("clock-names", "apb_pclk")?;
466     fdt.end_node(rtc_node)?;
467 
468     Ok(())
469 }
470 
471 fn create_gpio_node<T: DeviceInfoForFdt + Clone + Debug>(
472     fdt: &mut FdtWriter,
473     dev_info: &T,
474 ) -> FdtWriterResult<()> {
475     // PL061 GPIO controller node
476     let compatible = b"arm,pl061\0arm,primecell\0";
477     let gpio_reg_prop = [dev_info.addr(), dev_info.length()];
478     let irq = [
479         GIC_FDT_IRQ_TYPE_SPI,
480         dev_info.irq() - IRQ_BASE,
481         IRQ_TYPE_EDGE_RISING,
482     ];
483 
484     let gpio_node = fdt.begin_node(&format!("pl061@{:x}", dev_info.addr()))?;
485     fdt.property("compatible", compatible)?;
486     fdt.property_array_u64("reg", &gpio_reg_prop)?;
487     fdt.property_array_u32("interrupts", &irq)?;
488     fdt.property_null("gpio-controller")?;
489     fdt.property_u32("#gpio-cells", 2)?;
490     fdt.property_u32("clocks", CLOCK_PHANDLE)?;
491     fdt.property_string("clock-names", "apb_pclk")?;
492     fdt.property_u32("phandle", GPIO_PHANDLE)?;
493     fdt.end_node(gpio_node)?;
494 
495     // gpio-keys node
496     let gpio_keys_node = fdt.begin_node("gpio-keys")?;
497     fdt.property_string("compatible", "gpio-keys")?;
498     fdt.property_u32("#size-cells", 0)?;
499     fdt.property_u32("#address-cells", 1)?;
500     let gpio_keys_poweroff_node = fdt.begin_node("button@1")?;
501     fdt.property_string("label", "GPIO Key Poweroff")?;
502     fdt.property_u32("linux,code", KEY_POWER)?;
503     let gpios = [GPIO_PHANDLE, 3, 0];
504     fdt.property_array_u32("gpios", &gpios)?;
505     fdt.end_node(gpio_keys_poweroff_node)?;
506     fdt.end_node(gpio_keys_node)?;
507 
508     Ok(())
509 }
510 
511 fn create_devices_node<T: DeviceInfoForFdt + Clone + Debug, S: ::std::hash::BuildHasher>(
512     fdt: &mut FdtWriter,
513     dev_info: &HashMap<(DeviceType, String), T, S>,
514 ) -> FdtWriterResult<()> {
515     // Create one temp Vec to store all virtio devices
516     let mut ordered_virtio_device: Vec<&T> = Vec::new();
517 
518     for ((device_type, _device_id), info) in dev_info {
519         match device_type {
520             DeviceType::Gpio => create_gpio_node(fdt, info)?,
521             DeviceType::Rtc => create_rtc_node(fdt, info)?,
522             DeviceType::Serial => create_serial_node(fdt, info)?,
523             DeviceType::Virtio(_) => {
524                 ordered_virtio_device.push(info);
525             }
526         }
527     }
528 
529     // Sort out virtio devices by address from low to high and insert them into fdt table.
530     ordered_virtio_device.sort_by_key(|&a| a.addr());
531     // Current address allocation strategy in cloud-hypervisor is: the first created device
532     // will be allocated to higher address. Here we reverse the vector to make sure that
533     // the older created device will appear in front of the newer created device in FDT.
534     ordered_virtio_device.reverse();
535     for ordered_device_info in ordered_virtio_device.drain(..) {
536         create_virtio_node(fdt, ordered_device_info)?;
537     }
538 
539     Ok(())
540 }
541 
542 fn create_pmu_node(fdt: &mut FdtWriter) -> FdtWriterResult<()> {
543     let compatible = "arm,armv8-pmuv3";
544     let irq = [GIC_FDT_IRQ_TYPE_PPI, AARCH64_PMU_IRQ, IRQ_TYPE_LEVEL_HI];
545 
546     let pmu_node = fdt.begin_node("pmu")?;
547     fdt.property_string("compatible", compatible)?;
548     fdt.property_array_u32("interrupts", &irq)?;
549     fdt.end_node(pmu_node)?;
550     Ok(())
551 }
552 
553 fn create_pci_nodes(
554     fdt: &mut FdtWriter,
555     pci_device_info: &[PciSpaceInfo],
556     virtio_iommu_bdf: Option<u32>,
557 ) -> FdtWriterResult<()> {
558     // Add node for PCIe controller.
559     // See Documentation/devicetree/bindings/pci/host-generic-pci.txt in the kernel
560     // and https://elinux.org/Device_Tree_Usage.
561     // In multiple PCI segments setup, each PCI segment needs a PCI node.
562     for pci_device_info_elem in pci_device_info.iter() {
563         // EDK2 requires the PCIe high space above 4G address.
564         // The actual space in CLH follows the RAM. If the RAM space is small, the PCIe high space
565         // could fall bellow 4G.
566         // Here we cut off PCI device space below 8G in FDT to workaround the EDK2 check.
567         // But the address written in ACPI is not impacted.
568         let (pci_device_base_64bit, pci_device_size_64bit) =
569             if pci_device_info_elem.pci_device_space_start < PCI_HIGH_BASE.raw_value() {
570                 (
571                     PCI_HIGH_BASE.raw_value(),
572                     pci_device_info_elem.pci_device_space_size
573                         - (PCI_HIGH_BASE.raw_value() - pci_device_info_elem.pci_device_space_start),
574                 )
575             } else {
576                 (
577                     pci_device_info_elem.pci_device_space_start,
578                     pci_device_info_elem.pci_device_space_size,
579                 )
580             };
581         // There is no specific requirement of the 32bit MMIO range, and
582         // therefore at least we can make these ranges 4K aligned.
583         let pci_device_size_32bit: u64 =
584             MEM_32BIT_DEVICES_SIZE / ((1 << 12) * pci_device_info.len() as u64) * (1 << 12);
585         let pci_device_base_32bit: u64 = MEM_32BIT_DEVICES_START.0
586             + pci_device_size_32bit * pci_device_info_elem.pci_segment_id as u64;
587 
588         let ranges = [
589             // io addresses. Since AArch64 will not use IO address,
590             // we can set the same IO address range for every segment.
591             0x1000000,
592             0_u32,
593             0_u32,
594             (MEM_PCI_IO_START.0 >> 32) as u32,
595             MEM_PCI_IO_START.0 as u32,
596             (MEM_PCI_IO_SIZE >> 32) as u32,
597             MEM_PCI_IO_SIZE as u32,
598             // mmio addresses
599             0x2000000,                            // (ss = 10: 32-bit memory space)
600             (pci_device_base_32bit >> 32) as u32, // PCI address
601             pci_device_base_32bit as u32,
602             (pci_device_base_32bit >> 32) as u32, // CPU address
603             pci_device_base_32bit as u32,
604             (pci_device_size_32bit >> 32) as u32, // size
605             pci_device_size_32bit as u32,
606             // device addresses
607             0x3000000,                            // (ss = 11: 64-bit memory space)
608             (pci_device_base_64bit >> 32) as u32, // PCI address
609             pci_device_base_64bit as u32,
610             (pci_device_base_64bit >> 32) as u32, // CPU address
611             pci_device_base_64bit as u32,
612             (pci_device_size_64bit >> 32) as u32, // size
613             pci_device_size_64bit as u32,
614         ];
615         let bus_range = [0, 0]; // Only bus 0
616         let reg = [
617             pci_device_info_elem.mmio_config_address,
618             PCI_MMIO_CONFIG_SIZE_PER_SEGMENT,
619         ];
620         // See kernel document Documentation/devicetree/bindings/pci/pci-msi.txt
621         let msi_map = [
622             // rid-base: A single cell describing the first RID matched by the entry.
623             0x0,
624             // msi-controller: A single phandle to an MSI controller.
625             MSI_PHANDLE,
626             // msi-base: An msi-specifier describing the msi-specifier produced for the
627             // first RID matched by the entry.
628             (pci_device_info_elem.pci_segment_id as u32) << 8,
629             // length: A single cell describing how many consecutive RIDs are matched
630             // following the rid-base.
631             0x100,
632         ];
633 
634         let pci_node_name = format!("pci@{:x}", pci_device_info_elem.mmio_config_address);
635         let pci_node = fdt.begin_node(&pci_node_name)?;
636 
637         fdt.property_string("compatible", "pci-host-ecam-generic")?;
638         fdt.property_string("device_type", "pci")?;
639         fdt.property_array_u32("ranges", &ranges)?;
640         fdt.property_array_u32("bus-range", &bus_range)?;
641         fdt.property_u32(
642             "linux,pci-domain",
643             pci_device_info_elem.pci_segment_id as u32,
644         )?;
645         fdt.property_u32("#address-cells", 3)?;
646         fdt.property_u32("#size-cells", 2)?;
647         fdt.property_array_u64("reg", &reg)?;
648         fdt.property_u32("#interrupt-cells", 1)?;
649         fdt.property_null("interrupt-map")?;
650         fdt.property_null("interrupt-map-mask")?;
651         fdt.property_null("dma-coherent")?;
652         fdt.property_array_u32("msi-map", &msi_map)?;
653         fdt.property_u32("msi-parent", MSI_PHANDLE)?;
654 
655         if pci_device_info_elem.pci_segment_id == 0 {
656             if let Some(virtio_iommu_bdf) = virtio_iommu_bdf {
657                 // See kernel document Documentation/devicetree/bindings/pci/pci-iommu.txt
658                 // for 'iommu-map' attribute setting.
659                 let iommu_map = [
660                     0_u32,
661                     VIRTIO_IOMMU_PHANDLE,
662                     0_u32,
663                     virtio_iommu_bdf,
664                     virtio_iommu_bdf + 1,
665                     VIRTIO_IOMMU_PHANDLE,
666                     virtio_iommu_bdf + 1,
667                     0xffff - virtio_iommu_bdf,
668                 ];
669                 fdt.property_array_u32("iommu-map", &iommu_map)?;
670 
671                 // See kernel document Documentation/devicetree/bindings/virtio/iommu.txt
672                 // for virtio-iommu node settings.
673                 let virtio_iommu_node_name = format!("virtio_iommu@{:x}", virtio_iommu_bdf);
674                 let virtio_iommu_node = fdt.begin_node(&virtio_iommu_node_name)?;
675                 fdt.property_u32("#iommu-cells", 1)?;
676                 fdt.property_string("compatible", "virtio,pci-iommu")?;
677 
678                 // 'reg' is a five-cell address encoded as
679                 // (phys.hi phys.mid phys.lo size.hi size.lo). phys.hi should contain the
680                 // device's BDF as 0b00000000 bbbbbbbb dddddfff 00000000. The other cells
681                 // should be zero.
682                 let reg = [virtio_iommu_bdf << 8, 0_u32, 0_u32, 0_u32, 0_u32];
683                 fdt.property_array_u32("reg", &reg)?;
684                 fdt.property_u32("phandle", VIRTIO_IOMMU_PHANDLE)?;
685 
686                 fdt.end_node(virtio_iommu_node)?;
687             }
688         }
689 
690         fdt.end_node(pci_node)?;
691     }
692 
693     Ok(())
694 }
695 
696 fn create_distance_map_node(fdt: &mut FdtWriter, numa_nodes: &NumaNodes) -> FdtWriterResult<()> {
697     let distance_map_node = fdt.begin_node("distance-map")?;
698     fdt.property_string("compatible", "numa-distance-map-v1")?;
699     // Construct the distance matrix.
700     // 1. We use the word entry to describe a distance from a node to
701     // its destination, e.g. 0 -> 1 = 20 is described as <0 1 20>.
702     // 2. Each entry represents distance from first node to second node.
703     // The distances are equal in either direction.
704     // 3. The distance from a node to self (local distance) is represented
705     // with value 10 and all internode distance should be represented with
706     // a value greater than 10.
707     // 4. distance-matrix should have entries in lexicographical ascending
708     // order of nodes.
709     let mut distance_matrix = Vec::new();
710     for numa_node_idx in 0..numa_nodes.len() {
711         let numa_node = numa_nodes.get(&(numa_node_idx as u32));
712         for dest_numa_node in 0..numa_node.unwrap().distances.len() + 1 {
713             if numa_node_idx == dest_numa_node {
714                 distance_matrix.push(numa_node_idx as u32);
715                 distance_matrix.push(dest_numa_node as u32);
716                 distance_matrix.push(10_u32);
717                 continue;
718             }
719 
720             distance_matrix.push(numa_node_idx as u32);
721             distance_matrix.push(dest_numa_node as u32);
722             distance_matrix.push(
723                 *numa_node
724                     .unwrap()
725                     .distances
726                     .get(&(dest_numa_node as u32))
727                     .unwrap() as u32,
728             );
729         }
730     }
731     fdt.property_array_u32("distance-matrix", distance_matrix.as_ref())?;
732     fdt.end_node(distance_map_node)?;
733 
734     Ok(())
735 }
736 
737 // Parse the DTB binary and print for debugging
738 pub fn print_fdt(dtb: &[u8]) {
739     match fdt_parser::Fdt::new(dtb) {
740         Ok(fdt) => {
741             if let Some(root) = fdt.find_node("/") {
742                 debug!("Printing the FDT:");
743                 print_node(root, 0);
744             } else {
745                 debug!("Failed to find root node in FDT for debugging.");
746             }
747         }
748         Err(_) => debug!("Failed to parse FDT for debugging."),
749     }
750 }
751 
752 fn print_node(node: fdt_parser::node::FdtNode<'_, '_>, n_spaces: usize) {
753     debug!("{:indent$}{}/", "", node.name, indent = n_spaces);
754     for property in node.properties() {
755         let name = property.name;
756 
757         // If the property is 'compatible', its value requires special handling.
758         // The u8 array could contain multiple null-terminated strings.
759         // We copy the original array and simply replace all 'null' characters with spaces.
760         let value = if name == "compatible" {
761             let mut compatible = vec![0u8; 256];
762             let handled_value = property
763                 .value
764                 .iter()
765                 .map(|&c| if c == 0 { b' ' } else { c })
766                 .collect::<Vec<_>>();
767             let len = cmp::min(255, handled_value.len());
768             compatible[..len].copy_from_slice(&handled_value[..len]);
769             compatible[..(len + 1)].to_vec()
770         } else {
771             property.value.to_vec()
772         };
773         let value = &value;
774 
775         // Now the value can be either:
776         //   - A null-terminated C string, or
777         //   - Binary data
778         // We follow a very simple logic to present the value:
779         //   - At first, try to convert it to CStr and print,
780         //   - If failed, print it as u32 array.
781         let value_result = match CStr::from_bytes_with_nul(value) {
782             Ok(value_cstr) => match value_cstr.to_str() {
783                 Ok(value_str) => Some(value_str),
784                 Err(_e) => None,
785             },
786             Err(_e) => None,
787         };
788 
789         if let Some(value_str) = value_result {
790             debug!(
791                 "{:indent$}{} : {:#?}",
792                 "",
793                 name,
794                 value_str,
795                 indent = (n_spaces + 2)
796             );
797         } else {
798             let mut array = Vec::with_capacity(256);
799             array.resize(value.len() / 4, 0u32);
800             BigEndian::read_u32_into(value, &mut array);
801             debug!(
802                 "{:indent$}{} : {:X?}",
803                 "",
804                 name,
805                 array,
806                 indent = (n_spaces + 2)
807             );
808         };
809     }
810 
811     // Print children nodes if there is any
812     for child in node.children() {
813         print_node(child, n_spaces + 2);
814     }
815 }
816