1 // Copyright 2020 Arm Limited (or its affiliates). All rights reserved.
2 // Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3 // SPDX-License-Identifier: Apache-2.0
4 //
5 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
6 // Use of this source code is governed by a BSD-style license that can be
7 // found in the THIRD-PARTY file.
8
9 use std::collections::HashMap;
10 use std::ffi::CStr;
11 use std::fmt::Debug;
12 use std::path::Path;
13 use std::sync::{Arc, Mutex};
14 use std::{cmp, fs, result, str};
15
16 use byteorder::{BigEndian, ByteOrder};
17 use hypervisor::arch::aarch64::gic::Vgic;
18 use hypervisor::arch::aarch64::regs::{
19 AARCH64_ARCH_TIMER_HYP_IRQ, AARCH64_ARCH_TIMER_PHYS_NONSECURE_IRQ,
20 AARCH64_ARCH_TIMER_PHYS_SECURE_IRQ, AARCH64_ARCH_TIMER_VIRT_IRQ, AARCH64_PMU_IRQ,
21 };
22 use thiserror::Error;
23 use vm_fdt::{FdtWriter, FdtWriterResult};
24 use vm_memory::{Address, Bytes, GuestMemory, GuestMemoryError, GuestMemoryRegion};
25
26 use super::super::{DeviceType, GuestMemoryMmap, InitramfsConfig};
27 use super::layout::{
28 GIC_V2M_COMPATIBLE, IRQ_BASE, MEM_32BIT_DEVICES_SIZE, MEM_32BIT_DEVICES_START, MEM_PCI_IO_SIZE,
29 MEM_PCI_IO_START, PCI_HIGH_BASE, PCI_MMIO_CONFIG_SIZE_PER_SEGMENT, SPI_BASE, SPI_NUM,
30 };
31 use crate::{NumaNodes, PciSpaceInfo};
32
33 // This is a value for uniquely identifying the FDT node declaring the interrupt controller.
34 const GIC_PHANDLE: u32 = 1;
35 // This is a value for uniquely identifying the FDT node declaring the MSI controller.
36 const MSI_PHANDLE: u32 = 2;
37 // This is a value for uniquely identifying the FDT node containing the clock definition.
38 const CLOCK_PHANDLE: u32 = 3;
39 // This is a value for uniquely identifying the FDT node containing the gpio controller.
40 const GPIO_PHANDLE: u32 = 4;
41 // This is a value for virtio-iommu. Now only one virtio-iommu device is supported.
42 const VIRTIO_IOMMU_PHANDLE: u32 = 5;
43 // NOTE: Keep FIRST_VCPU_PHANDLE the last PHANDLE defined.
44 // This is a value for uniquely identifying the FDT node containing the first vCPU.
45 // The last number of vCPU phandle depends on the number of vCPUs.
46 const FIRST_VCPU_PHANDLE: u32 = 8;
47
48 // This is a value for uniquely identifying the FDT node containing the L2 cache info
49 const L2_CACHE_PHANDLE: u32 = 6;
50 // This is a value for uniquely identifying the FDT node containing the L3 cache info
51 const L3_CACHE_PHANDLE: u32 = 7;
52 // Read the documentation specified when appending the root node to the FDT.
53 const ADDRESS_CELLS: u32 = 0x2;
54 const SIZE_CELLS: u32 = 0x2;
55
56 // As per kvm tool and
57 // https://www.kernel.org/doc/Documentation/devicetree/bindings/interrupt-controller/arm%2Cgic.txt
58 // Look for "The 1st cell..."
59 const GIC_FDT_IRQ_TYPE_SPI: u32 = 0;
60 const GIC_FDT_IRQ_TYPE_PPI: u32 = 1;
61
62 // From https://elixir.bootlin.com/linux/v4.9.62/source/include/dt-bindings/interrupt-controller/irq.h#L17
63 const IRQ_TYPE_EDGE_RISING: u32 = 1;
64 const IRQ_TYPE_LEVEL_HI: u32 = 4;
65
66 // Keys and Buttons
67 // System Power Down
68 const KEY_POWER: u32 = 116;
69
70 /// Trait for devices to be added to the Flattened Device Tree.
71 pub trait DeviceInfoForFdt {
72 /// Returns the address where this device will be loaded.
addr(&self) -> u6473 fn addr(&self) -> u64;
74 /// Returns the associated interrupt for this device.
irq(&self) -> u3275 fn irq(&self) -> u32;
76 /// Returns the amount of memory that needs to be reserved for this device.
length(&self) -> u6477 fn length(&self) -> u64;
78 }
79
80 /// Errors thrown while configuring the Flattened Device Tree for aarch64.
81 #[derive(Debug, Error)]
82 pub enum Error {
83 /// Failure in writing FDT in memory.
84 #[error("Failure in writing FDT in memory")]
85 WriteFdtToMemory(#[source] GuestMemoryError),
86 }
87 type Result<T> = result::Result<T, Error>;
88
89 pub enum CacheLevel {
90 /// L1 data cache
91 L1D = 0,
92 /// L1 instruction cache
93 L1I = 1,
94 /// L2 cache
95 L2 = 2,
96 /// L3 cache
97 L3 = 3,
98 }
99
100 /// NOTE: cache size file directory example,
101 /// "/sys/devices/system/cpu/cpu0/cache/index0/size".
get_cache_size(cache_level: CacheLevel) -> u32102 pub fn get_cache_size(cache_level: CacheLevel) -> u32 {
103 let mut file_directory: String = "/sys/devices/system/cpu/cpu0/cache".to_string();
104 match cache_level {
105 CacheLevel::L1D => file_directory += "/index0/size",
106 CacheLevel::L1I => file_directory += "/index1/size",
107 CacheLevel::L2 => file_directory += "/index2/size",
108 CacheLevel::L3 => file_directory += "/index3/size",
109 }
110
111 let file_path = Path::new(&file_directory);
112 if !file_path.exists() {
113 warn!("File: {} does not exist.", file_directory);
114 0
115 } else {
116 info!("File: {} exist.", file_directory);
117
118 let src = fs::read_to_string(file_directory).expect("File not exists or file corrupted.");
119 // The content of the file is as simple as a size, like: "32K"
120 let src = src.trim();
121 let src_digits: u32 = src[0..src.len() - 1].parse().unwrap();
122 let src_unit = &src[src.len() - 1..];
123
124 src_digits
125 * match src_unit {
126 "K" => 1024,
127 "M" => 1024u32.pow(2),
128 "G" => 1024u32.pow(3),
129 _ => 1,
130 }
131 }
132 }
133
134 /// NOTE: coherency_line_size file directory example,
135 /// "/sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size".
get_cache_coherency_line_size(cache_level: CacheLevel) -> u32136 pub fn get_cache_coherency_line_size(cache_level: CacheLevel) -> u32 {
137 let mut file_directory: String = "/sys/devices/system/cpu/cpu0/cache".to_string();
138 match cache_level {
139 CacheLevel::L1D => file_directory += "/index0/coherency_line_size",
140 CacheLevel::L1I => file_directory += "/index1/coherency_line_size",
141 CacheLevel::L2 => file_directory += "/index2/coherency_line_size",
142 CacheLevel::L3 => file_directory += "/index3/coherency_line_size",
143 }
144
145 let file_path = Path::new(&file_directory);
146 if !file_path.exists() {
147 warn!("File: {} does not exist.", file_directory);
148 0
149 } else {
150 info!("File: {} exist.", file_directory);
151
152 let src = fs::read_to_string(file_directory).expect("File not exists or file corrupted.");
153 src.trim().parse::<u32>().unwrap()
154 }
155 }
156
157 /// NOTE: number_of_sets file directory example,
158 /// "/sys/devices/system/cpu/cpu0/cache/index0/number_of_sets".
get_cache_number_of_sets(cache_level: CacheLevel) -> u32159 pub fn get_cache_number_of_sets(cache_level: CacheLevel) -> u32 {
160 let mut file_directory: String = "/sys/devices/system/cpu/cpu0/cache".to_string();
161 match cache_level {
162 CacheLevel::L1D => file_directory += "/index0/number_of_sets",
163 CacheLevel::L1I => file_directory += "/index1/number_of_sets",
164 CacheLevel::L2 => file_directory += "/index2/number_of_sets",
165 CacheLevel::L3 => file_directory += "/index3/number_of_sets",
166 }
167
168 let file_path = Path::new(&file_directory);
169 if !file_path.exists() {
170 warn!("File: {} does not exist.", file_directory);
171 0
172 } else {
173 info!("File: {} exist.", file_directory);
174
175 let src = fs::read_to_string(file_directory).expect("File not exists or file corrupted.");
176 src.trim().parse::<u32>().unwrap()
177 }
178 }
179
180 /// NOTE: shared_cpu_list file directory example,
181 /// "/sys/devices/system/cpu/cpu0/cache/index0/shared_cpu_list".
get_cache_shared(cache_level: CacheLevel) -> bool182 pub fn get_cache_shared(cache_level: CacheLevel) -> bool {
183 let mut file_directory: String = "/sys/devices/system/cpu/cpu0/cache".to_string();
184 let mut result = true;
185
186 match cache_level {
187 CacheLevel::L1D | CacheLevel::L1I => result = false,
188 CacheLevel::L2 => file_directory += "/index2/shared_cpu_list",
189 CacheLevel::L3 => file_directory += "/index3/shared_cpu_list",
190 }
191
192 if !result {
193 return false;
194 }
195
196 let file_path = Path::new(&file_directory);
197 if !file_path.exists() {
198 warn!("File: {} does not exist.", file_directory);
199 result = false;
200 } else {
201 info!("File: {} exist.", file_directory);
202
203 let src = fs::read_to_string(file_directory).expect("File not exists or file corrupted.");
204 let src = src.trim();
205 if src.is_empty() {
206 result = false;
207 } else {
208 result = src.contains('-') || src.contains(',');
209 }
210 }
211
212 result
213 }
214
215 /// Creates the flattened device tree for this aarch64 VM.
216 #[allow(clippy::too_many_arguments)]
create_fdt<T: DeviceInfoForFdt + Clone + Debug, S: ::std::hash::BuildHasher>( guest_mem: &GuestMemoryMmap, cmdline: &str, vcpu_mpidr: Vec<u64>, vcpu_topology: Option<(u8, u8, u8)>, device_info: &HashMap<(DeviceType, String), T, S>, gic_device: &Arc<Mutex<dyn Vgic>>, initrd: &Option<InitramfsConfig>, pci_space_info: &[PciSpaceInfo], numa_nodes: &NumaNodes, virtio_iommu_bdf: Option<u32>, pmu_supported: bool, ) -> FdtWriterResult<Vec<u8>>217 pub fn create_fdt<T: DeviceInfoForFdt + Clone + Debug, S: ::std::hash::BuildHasher>(
218 guest_mem: &GuestMemoryMmap,
219 cmdline: &str,
220 vcpu_mpidr: Vec<u64>,
221 vcpu_topology: Option<(u8, u8, u8)>,
222 device_info: &HashMap<(DeviceType, String), T, S>,
223 gic_device: &Arc<Mutex<dyn Vgic>>,
224 initrd: &Option<InitramfsConfig>,
225 pci_space_info: &[PciSpaceInfo],
226 numa_nodes: &NumaNodes,
227 virtio_iommu_bdf: Option<u32>,
228 pmu_supported: bool,
229 ) -> FdtWriterResult<Vec<u8>> {
230 // Allocate stuff necessary for the holding the blob.
231 let mut fdt = FdtWriter::new().unwrap();
232
233 // For an explanation why these nodes were introduced in the blob take a look at
234 // https://github.com/torvalds/linux/blob/master/Documentation/devicetree/booting-without-of.txt#L845
235 // Look for "Required nodes and properties".
236
237 // Header or the root node as per above mentioned documentation.
238 let root_node = fdt.begin_node("")?;
239 fdt.property_string("compatible", "linux,dummy-virt")?;
240 // For info on #address-cells and size-cells read "Note about cells and address representation"
241 // from the above mentioned txt file.
242 fdt.property_u32("#address-cells", ADDRESS_CELLS)?;
243 fdt.property_u32("#size-cells", SIZE_CELLS)?;
244 // This is not mandatory but we use it to point the root node to the node
245 // containing description of the interrupt controller for this VM.
246 fdt.property_u32("interrupt-parent", GIC_PHANDLE)?;
247 create_cpu_nodes(&mut fdt, &vcpu_mpidr, vcpu_topology, numa_nodes)?;
248 create_memory_node(&mut fdt, guest_mem, numa_nodes)?;
249 create_chosen_node(&mut fdt, cmdline, initrd)?;
250 create_gic_node(&mut fdt, gic_device)?;
251 create_timer_node(&mut fdt)?;
252 if pmu_supported {
253 create_pmu_node(&mut fdt)?;
254 }
255 create_clock_node(&mut fdt)?;
256 create_psci_node(&mut fdt)?;
257 create_devices_node(&mut fdt, device_info)?;
258 create_pci_nodes(&mut fdt, pci_space_info, virtio_iommu_bdf)?;
259 if numa_nodes.len() > 1 {
260 create_distance_map_node(&mut fdt, numa_nodes)?;
261 }
262
263 // End Header node.
264 fdt.end_node(root_node)?;
265
266 let fdt_final = fdt.finish()?;
267
268 Ok(fdt_final)
269 }
270
write_fdt_to_memory(fdt_final: Vec<u8>, guest_mem: &GuestMemoryMmap) -> Result<()>271 pub fn write_fdt_to_memory(fdt_final: Vec<u8>, guest_mem: &GuestMemoryMmap) -> Result<()> {
272 // Write FDT to memory.
273 guest_mem
274 .write_slice(fdt_final.as_slice(), super::layout::FDT_START)
275 .map_err(Error::WriteFdtToMemory)?;
276 Ok(())
277 }
278
279 // Following are the auxiliary function for creating the different nodes that we append to our FDT.
create_cpu_nodes( fdt: &mut FdtWriter, vcpu_mpidr: &[u64], vcpu_topology: Option<(u8, u8, u8)>, numa_nodes: &NumaNodes, ) -> FdtWriterResult<()>280 fn create_cpu_nodes(
281 fdt: &mut FdtWriter,
282 vcpu_mpidr: &[u64],
283 vcpu_topology: Option<(u8, u8, u8)>,
284 numa_nodes: &NumaNodes,
285 ) -> FdtWriterResult<()> {
286 // See https://github.com/torvalds/linux/blob/master/Documentation/devicetree/bindings/arm/cpus.yaml.
287 let cpus_node = fdt.begin_node("cpus")?;
288 fdt.property_u32("#address-cells", 0x1)?;
289 fdt.property_u32("#size-cells", 0x0)?;
290
291 let num_cpus = vcpu_mpidr.len();
292 let (threads_per_core, cores_per_package, packages) = vcpu_topology.unwrap_or((1, 1, 1));
293 let max_cpus: u32 = (threads_per_core * cores_per_package * packages).into();
294
295 // Add cache info.
296 // L1 Data Cache Info.
297 let mut l1_d_cache_size: u32 = 0;
298 let mut l1_d_cache_line_size: u32 = 0;
299 let mut l1_d_cache_sets: u32 = 0;
300
301 // L1 Instruction Cache Info.
302 let mut l1_i_cache_size: u32 = 0;
303 let mut l1_i_cache_line_size: u32 = 0;
304 let mut l1_i_cache_sets: u32 = 0;
305
306 // L2 Cache Info.
307 let mut l2_cache_size: u32 = 0;
308 let mut l2_cache_line_size: u32 = 0;
309 let mut l2_cache_sets: u32 = 0;
310
311 // L3 Cache Info.
312 let mut l3_cache_size: u32 = 0;
313 let mut l3_cache_line_size: u32 = 0;
314 let mut l3_cache_sets: u32 = 0;
315
316 // Cache Shared Info.
317 let mut l2_cache_shared: bool = false;
318 let mut l3_cache_shared: bool = false;
319
320 let cache_path = Path::new("/sys/devices/system/cpu/cpu0/cache");
321 let cache_exist: bool = cache_path.exists();
322 if !cache_exist {
323 warn!("cache sysfs system does not exist.");
324 } else {
325 info!("cache sysfs system exists.");
326 // L1 Data Cache Info.
327 l1_d_cache_size = get_cache_size(CacheLevel::L1D);
328 l1_d_cache_line_size = get_cache_coherency_line_size(CacheLevel::L1D);
329 l1_d_cache_sets = get_cache_number_of_sets(CacheLevel::L1D);
330
331 // L1 Instruction Cache Info.
332 l1_i_cache_size = get_cache_size(CacheLevel::L1I);
333 l1_i_cache_line_size = get_cache_coherency_line_size(CacheLevel::L1I);
334 l1_i_cache_sets = get_cache_number_of_sets(CacheLevel::L1I);
335
336 // L2 Cache Info.
337 l2_cache_size = get_cache_size(CacheLevel::L2);
338 l2_cache_line_size = get_cache_coherency_line_size(CacheLevel::L2);
339 l2_cache_sets = get_cache_number_of_sets(CacheLevel::L2);
340
341 // L3 Cache Info.
342 l3_cache_size = get_cache_size(CacheLevel::L3);
343 l3_cache_line_size = get_cache_coherency_line_size(CacheLevel::L3);
344 l3_cache_sets = get_cache_number_of_sets(CacheLevel::L3);
345
346 // Cache Shared Info.
347 if l2_cache_size != 0 {
348 l2_cache_shared = get_cache_shared(CacheLevel::L2);
349 }
350 if l3_cache_size != 0 {
351 l3_cache_shared = get_cache_shared(CacheLevel::L3);
352 }
353 }
354
355 for (cpu_id, mpidr) in vcpu_mpidr.iter().enumerate().take(num_cpus) {
356 let cpu_name = format!("cpu@{cpu_id:x}");
357 let cpu_node = fdt.begin_node(&cpu_name)?;
358 fdt.property_string("device_type", "cpu")?;
359 fdt.property_string("compatible", "arm,arm-v8")?;
360 if num_cpus > 1 {
361 // This is required on armv8 64-bit. See aforementioned documentation.
362 fdt.property_string("enable-method", "psci")?;
363 }
364 // Set the field to first 24 bits of the MPIDR - Multiprocessor Affinity Register.
365 // See http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0488c/BABHBJCI.html.
366 fdt.property_u32("reg", (mpidr & 0x7FFFFF) as u32)?;
367 fdt.property_u32("phandle", cpu_id as u32 + FIRST_VCPU_PHANDLE)?;
368
369 // Add `numa-node-id` property if there is any numa config.
370 if numa_nodes.len() > 1 {
371 for numa_node_idx in 0..numa_nodes.len() {
372 let numa_node = numa_nodes.get(&(numa_node_idx as u32));
373 if numa_node.unwrap().cpus.contains(&(cpu_id as u8)) {
374 fdt.property_u32("numa-node-id", numa_node_idx as u32)?;
375 }
376 }
377 }
378
379 if cache_exist && l1_d_cache_size != 0 && l1_i_cache_size != 0 {
380 // Add cache info.
381 fdt.property_u32("d-cache-size", l1_d_cache_size)?;
382 fdt.property_u32("d-cache-line-size", l1_d_cache_line_size)?;
383 fdt.property_u32("d-cache-sets", l1_d_cache_sets)?;
384
385 fdt.property_u32("i-cache-size", l1_i_cache_size)?;
386 fdt.property_u32("i-cache-line-size", l1_i_cache_line_size)?;
387 fdt.property_u32("i-cache-sets", l1_i_cache_sets)?;
388
389 if l2_cache_size != 0 && !l2_cache_shared {
390 fdt.property_u32(
391 "next-level-cache",
392 cpu_id as u32 + max_cpus + FIRST_VCPU_PHANDLE + L2_CACHE_PHANDLE,
393 )?;
394
395 let l2_cache_name = "l2-cache0";
396 let l2_cache_node = fdt.begin_node(l2_cache_name)?;
397 // PHANDLE is used to mark device node, and PHANDLE is unique. To avoid phandle
398 // conflicts with other device nodes, consider the previous CPU PHANDLE, so the
399 // CPU L2 cache PHANDLE must start from the largest CPU PHANDLE plus 1.
400 fdt.property_u32(
401 "phandle",
402 cpu_id as u32 + max_cpus + FIRST_VCPU_PHANDLE + L2_CACHE_PHANDLE,
403 )?;
404
405 fdt.property_string("compatible", "cache")?;
406 fdt.property_u32("cache-size", l2_cache_size)?;
407 fdt.property_u32("cache-line-size", l2_cache_line_size)?;
408 fdt.property_u32("cache-sets", l2_cache_sets)?;
409 fdt.property_u32("cache-level", 2)?;
410
411 if l3_cache_size != 0 && l3_cache_shared {
412 let package_id: u32 = cpu_id as u32 / cores_per_package as u32;
413 fdt.property_u32(
414 "next-level-cache",
415 package_id
416 + num_cpus as u32
417 + max_cpus
418 + FIRST_VCPU_PHANDLE
419 + L2_CACHE_PHANDLE
420 + L3_CACHE_PHANDLE,
421 )?;
422 }
423
424 fdt.end_node(l2_cache_node)?;
425 }
426 if l2_cache_size != 0 && l2_cache_shared {
427 warn!("L2 cache shared with other cpus");
428 }
429 }
430
431 fdt.end_node(cpu_node)?;
432 }
433
434 if cache_exist && l3_cache_size != 0 && !l2_cache_shared && l3_cache_shared {
435 let mut i: u32 = 0;
436 while i < packages.into() {
437 let l3_cache_name = "l3-cache0";
438 let l3_cache_node = fdt.begin_node(l3_cache_name)?;
439 // ARM L3 cache is generally shared within the package (socket), so the
440 // L3 cache node pointed to by the CPU in the package has the same L3
441 // cache PHANDLE. The L3 cache phandle must start from the largest L2
442 // cache PHANDLE plus 1 to avoid duplication.
443 fdt.property_u32(
444 "phandle",
445 i + num_cpus as u32
446 + max_cpus
447 + FIRST_VCPU_PHANDLE
448 + L2_CACHE_PHANDLE
449 + L3_CACHE_PHANDLE,
450 )?;
451
452 fdt.property_string("compatible", "cache")?;
453 fdt.property_null("cache-unified")?;
454 fdt.property_u32("cache-size", l3_cache_size)?;
455 fdt.property_u32("cache-line-size", l3_cache_line_size)?;
456 fdt.property_u32("cache-sets", l3_cache_sets)?;
457 fdt.property_u32("cache-level", 3)?;
458 fdt.end_node(l3_cache_node)?;
459
460 i += 1;
461 }
462 }
463
464 if let Some(topology) = vcpu_topology {
465 let (threads_per_core, cores_per_package, packages) = topology;
466 let cpu_map_node = fdt.begin_node("cpu-map")?;
467
468 // Create device tree nodes with regard of above mapping.
469 for package_idx in 0..packages {
470 let package_name = format!("socket{package_idx:x}");
471 let package_node = fdt.begin_node(&package_name)?;
472
473 // Cluster is the container of cores, and it is mandatory in the CPU topology.
474 // Add a default "cluster0" in each socket/package.
475 let cluster_node = fdt.begin_node("cluster0")?;
476
477 for core_idx in 0..cores_per_package {
478 let core_name = format!("core{core_idx:x}");
479 let core_node = fdt.begin_node(&core_name)?;
480
481 for thread_idx in 0..threads_per_core {
482 let thread_name = format!("thread{thread_idx:x}");
483 let thread_node = fdt.begin_node(&thread_name)?;
484 let cpu_idx = threads_per_core * cores_per_package * package_idx
485 + threads_per_core * core_idx
486 + thread_idx;
487 fdt.property_u32("cpu", cpu_idx as u32 + FIRST_VCPU_PHANDLE)?;
488 fdt.end_node(thread_node)?;
489 }
490
491 fdt.end_node(core_node)?;
492 }
493 fdt.end_node(cluster_node)?;
494 fdt.end_node(package_node)?;
495 }
496 fdt.end_node(cpu_map_node)?;
497 } else {
498 debug!("Boot using device tree, CPU topology is not (correctly) specified");
499 }
500
501 fdt.end_node(cpus_node)?;
502
503 Ok(())
504 }
505
create_memory_node( fdt: &mut FdtWriter, guest_mem: &GuestMemoryMmap, numa_nodes: &NumaNodes, ) -> FdtWriterResult<()>506 fn create_memory_node(
507 fdt: &mut FdtWriter,
508 guest_mem: &GuestMemoryMmap,
509 numa_nodes: &NumaNodes,
510 ) -> FdtWriterResult<()> {
511 // See https://github.com/torvalds/linux/blob/58ae0b51506802713aa0e9956d1853ba4c722c98/Documentation/devicetree/bindings/numa.txt
512 // for NUMA setting in memory node.
513 if numa_nodes.len() > 1 {
514 for numa_node_idx in 0..numa_nodes.len() {
515 let numa_node = numa_nodes.get(&(numa_node_idx as u32));
516 let mut mem_reg_prop: Vec<u64> = Vec::new();
517 let mut node_memory_addr: u64 = 0;
518 // Each memory zone of numa will have its own memory node, but
519 // different numa nodes should not share same memory zones.
520 for memory_region in numa_node.unwrap().memory_regions.iter() {
521 let memory_region_start_addr: u64 = memory_region.start_addr().raw_value();
522 let memory_region_size: u64 = memory_region.size() as u64;
523 mem_reg_prop.push(memory_region_start_addr);
524 mem_reg_prop.push(memory_region_size);
525 // Set the node address the first non-zero region address
526 if node_memory_addr == 0 {
527 node_memory_addr = memory_region_start_addr;
528 }
529 }
530 let memory_node_name = format!("memory@{node_memory_addr:x}");
531 let memory_node = fdt.begin_node(&memory_node_name)?;
532 fdt.property_string("device_type", "memory")?;
533 fdt.property_array_u64("reg", &mem_reg_prop)?;
534 fdt.property_u32("numa-node-id", numa_node_idx as u32)?;
535 fdt.end_node(memory_node)?;
536 }
537 } else {
538 // Note: memory regions from "GuestMemory" are sorted and non-zero sized.
539 let ram_regions = {
540 let mut ram_regions = Vec::new();
541 let mut current_start = guest_mem
542 .iter()
543 .next()
544 .map(GuestMemoryRegion::start_addr)
545 .expect("GuestMemory must have one memory region at least")
546 .raw_value();
547 let mut current_end = current_start;
548
549 for (start, size) in guest_mem
550 .iter()
551 .map(|m| (m.start_addr().raw_value(), m.len()))
552 {
553 if current_end == start {
554 // This zone is continuous with the previous one.
555 current_end += size;
556 } else {
557 ram_regions.push((current_start, current_end));
558
559 current_start = start;
560 current_end = start + size;
561 }
562 }
563
564 ram_regions.push((current_start, current_end));
565
566 ram_regions
567 };
568
569 if ram_regions.len() > 2 {
570 panic!(
571 "There should be up to two non-continuous regions, divided by the
572 gap at the end of 32bit address space."
573 );
574 }
575
576 // Create the memory node for memory region before the gap
577 {
578 let (first_region_start, first_region_end) = ram_regions
579 .first()
580 .expect("There should be at last one memory region");
581 let ram_start = super::layout::RAM_START.raw_value();
582 let mem_32bit_reserved_start = super::layout::MEM_32BIT_RESERVED_START.raw_value();
583
584 if !((first_region_start <= &ram_start)
585 && (first_region_end > &ram_start)
586 && (first_region_end <= &mem_32bit_reserved_start))
587 {
588 panic!(
589 "Unexpected first memory region layout: (start: 0x{first_region_start:08x}, end: 0x{first_region_end:08x}).
590 ram_start: 0x{ram_start:08x}, mem_32bit_reserved_start: 0x{mem_32bit_reserved_start:08x}"
591 );
592 }
593
594 let mem_size = first_region_end - ram_start;
595 let mem_reg_prop = [ram_start, mem_size];
596 let memory_node_name = format!("memory@{ram_start:x}");
597 let memory_node = fdt.begin_node(&memory_node_name)?;
598 fdt.property_string("device_type", "memory")?;
599 fdt.property_array_u64("reg", &mem_reg_prop)?;
600 fdt.end_node(memory_node)?;
601 }
602
603 // Create the memory map entry for memory region after the gap if any
604 if let Some((second_region_start, second_region_end)) = ram_regions.get(1) {
605 let ram_64bit_start = super::layout::RAM_64BIT_START.raw_value();
606
607 if second_region_start != &ram_64bit_start {
608 panic!(
609 "Unexpected second memory region layout: start: 0x{second_region_start:08x}, ram_64bit_start: 0x{ram_64bit_start:08x}"
610 );
611 }
612
613 let mem_size = second_region_end - ram_64bit_start;
614 let mem_reg_prop = [ram_64bit_start, mem_size];
615 let memory_node_name = format!("memory@{ram_64bit_start:x}");
616 let memory_node = fdt.begin_node(&memory_node_name)?;
617 fdt.property_string("device_type", "memory")?;
618 fdt.property_array_u64("reg", &mem_reg_prop)?;
619 fdt.end_node(memory_node)?;
620 }
621 }
622
623 Ok(())
624 }
625
create_chosen_node( fdt: &mut FdtWriter, cmdline: &str, initrd: &Option<InitramfsConfig>, ) -> FdtWriterResult<()>626 fn create_chosen_node(
627 fdt: &mut FdtWriter,
628 cmdline: &str,
629 initrd: &Option<InitramfsConfig>,
630 ) -> FdtWriterResult<()> {
631 let chosen_node = fdt.begin_node("chosen")?;
632 fdt.property_string("bootargs", cmdline)?;
633
634 if let Some(initrd_config) = initrd {
635 let initrd_start = initrd_config.address.raw_value();
636 let initrd_end = initrd_config.address.raw_value() + initrd_config.size as u64;
637 fdt.property_u64("linux,initrd-start", initrd_start)?;
638 fdt.property_u64("linux,initrd-end", initrd_end)?;
639 }
640
641 fdt.end_node(chosen_node)?;
642
643 Ok(())
644 }
645
create_gic_node(fdt: &mut FdtWriter, gic_device: &Arc<Mutex<dyn Vgic>>) -> FdtWriterResult<()>646 fn create_gic_node(fdt: &mut FdtWriter, gic_device: &Arc<Mutex<dyn Vgic>>) -> FdtWriterResult<()> {
647 let gic_reg_prop = gic_device.lock().unwrap().device_properties();
648
649 let intc_node = fdt.begin_node("intc")?;
650
651 fdt.property_string("compatible", gic_device.lock().unwrap().fdt_compatibility())?;
652 fdt.property_null("interrupt-controller")?;
653 // "interrupt-cells" field specifies the number of cells needed to encode an
654 // interrupt source. The type shall be a <u32> and the value shall be 3 if no PPI affinity description
655 // is required.
656 fdt.property_u32("#interrupt-cells", 3)?;
657 fdt.property_array_u64("reg", &gic_reg_prop)?;
658 fdt.property_u32("phandle", GIC_PHANDLE)?;
659 fdt.property_u32("#address-cells", 2)?;
660 fdt.property_u32("#size-cells", 2)?;
661 fdt.property_null("ranges")?;
662
663 let gic_intr_prop = [
664 GIC_FDT_IRQ_TYPE_PPI,
665 gic_device.lock().unwrap().fdt_maint_irq(),
666 IRQ_TYPE_LEVEL_HI,
667 ];
668 fdt.property_array_u32("interrupts", &gic_intr_prop)?;
669
670 if gic_device.lock().unwrap().msi_compatible() {
671 let msic_node = fdt.begin_node("msic")?;
672 let msi_compatibility = gic_device.lock().unwrap().msi_compatibility().to_string();
673
674 fdt.property_string("compatible", msi_compatibility.as_str())?;
675 fdt.property_null("msi-controller")?;
676 fdt.property_u32("phandle", MSI_PHANDLE)?;
677 let msi_reg_prop = gic_device.lock().unwrap().msi_properties();
678 fdt.property_array_u64("reg", &msi_reg_prop)?;
679
680 if msi_compatibility == GIC_V2M_COMPATIBLE {
681 fdt.property_u32("arm,msi-base-spi", SPI_BASE)?;
682 fdt.property_u32("arm,msi-num-spis", SPI_NUM)?;
683 }
684
685 fdt.end_node(msic_node)?;
686 }
687
688 fdt.end_node(intc_node)?;
689
690 Ok(())
691 }
692
create_clock_node(fdt: &mut FdtWriter) -> FdtWriterResult<()>693 fn create_clock_node(fdt: &mut FdtWriter) -> FdtWriterResult<()> {
694 // The Advanced Peripheral Bus (APB) is part of the Advanced Microcontroller Bus Architecture
695 // (AMBA) protocol family. It defines a low-cost interface that is optimized for minimal power
696 // consumption and reduced interface complexity.
697 // PCLK is the clock source and this node defines exactly the clock for the APB.
698 let clock_node = fdt.begin_node("apb-pclk")?;
699 fdt.property_string("compatible", "fixed-clock")?;
700 fdt.property_u32("#clock-cells", 0x0)?;
701 fdt.property_u32("clock-frequency", 24000000)?;
702 fdt.property_string("clock-output-names", "clk24mhz")?;
703 fdt.property_u32("phandle", CLOCK_PHANDLE)?;
704 fdt.end_node(clock_node)?;
705
706 Ok(())
707 }
708
create_timer_node(fdt: &mut FdtWriter) -> FdtWriterResult<()>709 fn create_timer_node(fdt: &mut FdtWriter) -> FdtWriterResult<()> {
710 // See
711 // https://github.com/torvalds/linux/blob/master/Documentation/devicetree/bindings/timer/arm%2Carch_timer.yaml
712 // These are fixed interrupt numbers for the timer device.
713 let irqs = [
714 AARCH64_ARCH_TIMER_PHYS_SECURE_IRQ,
715 AARCH64_ARCH_TIMER_PHYS_NONSECURE_IRQ,
716 AARCH64_ARCH_TIMER_VIRT_IRQ,
717 AARCH64_ARCH_TIMER_HYP_IRQ,
718 ];
719 let compatible = "arm,armv8-timer";
720
721 let mut timer_reg_cells: Vec<u32> = Vec::new();
722 for &irq in irqs.iter() {
723 timer_reg_cells.push(GIC_FDT_IRQ_TYPE_PPI);
724 timer_reg_cells.push(irq);
725 timer_reg_cells.push(IRQ_TYPE_LEVEL_HI);
726 }
727
728 let timer_node = fdt.begin_node("timer")?;
729 fdt.property_string("compatible", compatible)?;
730 fdt.property_null("always-on")?;
731 fdt.property_array_u32("interrupts", &timer_reg_cells)?;
732 fdt.end_node(timer_node)?;
733
734 Ok(())
735 }
736
create_psci_node(fdt: &mut FdtWriter) -> FdtWriterResult<()>737 fn create_psci_node(fdt: &mut FdtWriter) -> FdtWriterResult<()> {
738 let compatible = "arm,psci-0.2";
739 let psci_node = fdt.begin_node("psci")?;
740 fdt.property_string("compatible", compatible)?;
741 // Two methods available: hvc and smc.
742 // As per documentation, PSCI calls between a guest and hypervisor may use the HVC conduit instead of SMC.
743 // So, since we are using kvm, we need to use hvc.
744 fdt.property_string("method", "hvc")?;
745 fdt.end_node(psci_node)?;
746
747 Ok(())
748 }
749
create_virtio_node<T: DeviceInfoForFdt + Clone + Debug>( fdt: &mut FdtWriter, dev_info: &T, ) -> FdtWriterResult<()>750 fn create_virtio_node<T: DeviceInfoForFdt + Clone + Debug>(
751 fdt: &mut FdtWriter,
752 dev_info: &T,
753 ) -> FdtWriterResult<()> {
754 let device_reg_prop = [dev_info.addr(), dev_info.length()];
755 let irq = [GIC_FDT_IRQ_TYPE_SPI, dev_info.irq(), IRQ_TYPE_EDGE_RISING];
756
757 let virtio_node = fdt.begin_node(&format!("virtio_mmio@{:x}", dev_info.addr()))?;
758 fdt.property_string("compatible", "virtio,mmio")?;
759 fdt.property_array_u64("reg", &device_reg_prop)?;
760 fdt.property_array_u32("interrupts", &irq)?;
761 fdt.property_u32("interrupt-parent", GIC_PHANDLE)?;
762 fdt.end_node(virtio_node)?;
763
764 Ok(())
765 }
766
create_serial_node<T: DeviceInfoForFdt + Clone + Debug>( fdt: &mut FdtWriter, dev_info: &T, ) -> FdtWriterResult<()>767 fn create_serial_node<T: DeviceInfoForFdt + Clone + Debug>(
768 fdt: &mut FdtWriter,
769 dev_info: &T,
770 ) -> FdtWriterResult<()> {
771 let compatible = b"arm,pl011\0arm,primecell\0";
772 let serial_reg_prop = [dev_info.addr(), dev_info.length()];
773 let irq = [
774 GIC_FDT_IRQ_TYPE_SPI,
775 dev_info.irq() - IRQ_BASE,
776 IRQ_TYPE_EDGE_RISING,
777 ];
778
779 let serial_node = fdt.begin_node(&format!("pl011@{:x}", dev_info.addr()))?;
780 fdt.property("compatible", compatible)?;
781 fdt.property_array_u64("reg", &serial_reg_prop)?;
782 fdt.property_u32("clocks", CLOCK_PHANDLE)?;
783 fdt.property_string("clock-names", "apb_pclk")?;
784 fdt.property_array_u32("interrupts", &irq)?;
785 fdt.end_node(serial_node)?;
786
787 Ok(())
788 }
789
create_rtc_node<T: DeviceInfoForFdt + Clone + Debug>( fdt: &mut FdtWriter, dev_info: &T, ) -> FdtWriterResult<()>790 fn create_rtc_node<T: DeviceInfoForFdt + Clone + Debug>(
791 fdt: &mut FdtWriter,
792 dev_info: &T,
793 ) -> FdtWriterResult<()> {
794 let compatible = b"arm,pl031\0arm,primecell\0";
795 let rtc_reg_prop = [dev_info.addr(), dev_info.length()];
796 let irq = [
797 GIC_FDT_IRQ_TYPE_SPI,
798 dev_info.irq() - IRQ_BASE,
799 IRQ_TYPE_LEVEL_HI,
800 ];
801
802 let rtc_node = fdt.begin_node(&format!("rtc@{:x}", dev_info.addr()))?;
803 fdt.property("compatible", compatible)?;
804 fdt.property_array_u64("reg", &rtc_reg_prop)?;
805 fdt.property_array_u32("interrupts", &irq)?;
806 fdt.property_u32("clocks", CLOCK_PHANDLE)?;
807 fdt.property_string("clock-names", "apb_pclk")?;
808 fdt.end_node(rtc_node)?;
809
810 Ok(())
811 }
812
create_gpio_node<T: DeviceInfoForFdt + Clone + Debug>( fdt: &mut FdtWriter, dev_info: &T, ) -> FdtWriterResult<()>813 fn create_gpio_node<T: DeviceInfoForFdt + Clone + Debug>(
814 fdt: &mut FdtWriter,
815 dev_info: &T,
816 ) -> FdtWriterResult<()> {
817 // PL061 GPIO controller node
818 let compatible = b"arm,pl061\0arm,primecell\0";
819 let gpio_reg_prop = [dev_info.addr(), dev_info.length()];
820 let irq = [
821 GIC_FDT_IRQ_TYPE_SPI,
822 dev_info.irq() - IRQ_BASE,
823 IRQ_TYPE_EDGE_RISING,
824 ];
825
826 let gpio_node = fdt.begin_node(&format!("pl061@{:x}", dev_info.addr()))?;
827 fdt.property("compatible", compatible)?;
828 fdt.property_array_u64("reg", &gpio_reg_prop)?;
829 fdt.property_array_u32("interrupts", &irq)?;
830 fdt.property_null("gpio-controller")?;
831 fdt.property_u32("#gpio-cells", 2)?;
832 fdt.property_u32("clocks", CLOCK_PHANDLE)?;
833 fdt.property_string("clock-names", "apb_pclk")?;
834 fdt.property_u32("phandle", GPIO_PHANDLE)?;
835 fdt.end_node(gpio_node)?;
836
837 // gpio-keys node
838 let gpio_keys_node = fdt.begin_node("gpio-keys")?;
839 fdt.property_string("compatible", "gpio-keys")?;
840 fdt.property_u32("#size-cells", 0)?;
841 fdt.property_u32("#address-cells", 1)?;
842 let gpio_keys_poweroff_node = fdt.begin_node("button@1")?;
843 fdt.property_string("label", "GPIO Key Poweroff")?;
844 fdt.property_u32("linux,code", KEY_POWER)?;
845 let gpios = [GPIO_PHANDLE, 3, 0];
846 fdt.property_array_u32("gpios", &gpios)?;
847 fdt.end_node(gpio_keys_poweroff_node)?;
848 fdt.end_node(gpio_keys_node)?;
849
850 Ok(())
851 }
852
create_devices_node<T: DeviceInfoForFdt + Clone + Debug, S: ::std::hash::BuildHasher>( fdt: &mut FdtWriter, dev_info: &HashMap<(DeviceType, String), T, S>, ) -> FdtWriterResult<()>853 fn create_devices_node<T: DeviceInfoForFdt + Clone + Debug, S: ::std::hash::BuildHasher>(
854 fdt: &mut FdtWriter,
855 dev_info: &HashMap<(DeviceType, String), T, S>,
856 ) -> FdtWriterResult<()> {
857 // Create one temp Vec to store all virtio devices
858 let mut ordered_virtio_device: Vec<&T> = Vec::new();
859
860 for ((device_type, _device_id), info) in dev_info {
861 match device_type {
862 DeviceType::Gpio => create_gpio_node(fdt, info)?,
863 DeviceType::Rtc => create_rtc_node(fdt, info)?,
864 DeviceType::Serial => create_serial_node(fdt, info)?,
865 DeviceType::Virtio(_) => {
866 ordered_virtio_device.push(info);
867 }
868 }
869 }
870
871 // Sort out virtio devices by address from low to high and insert them into fdt table.
872 ordered_virtio_device.sort_by_key(|&a| a.addr());
873 // Current address allocation strategy in cloud-hypervisor is: the first created device
874 // will be allocated to higher address. Here we reverse the vector to make sure that
875 // the older created device will appear in front of the newer created device in FDT.
876 ordered_virtio_device.reverse();
877 for ordered_device_info in ordered_virtio_device.drain(..) {
878 create_virtio_node(fdt, ordered_device_info)?;
879 }
880
881 Ok(())
882 }
883
create_pmu_node(fdt: &mut FdtWriter) -> FdtWriterResult<()>884 fn create_pmu_node(fdt: &mut FdtWriter) -> FdtWriterResult<()> {
885 let compatible = "arm,armv8-pmuv3";
886 let irq = [GIC_FDT_IRQ_TYPE_PPI, AARCH64_PMU_IRQ, IRQ_TYPE_LEVEL_HI];
887
888 let pmu_node = fdt.begin_node("pmu")?;
889 fdt.property_string("compatible", compatible)?;
890 fdt.property_array_u32("interrupts", &irq)?;
891 fdt.end_node(pmu_node)?;
892 Ok(())
893 }
894
create_pci_nodes( fdt: &mut FdtWriter, pci_device_info: &[PciSpaceInfo], virtio_iommu_bdf: Option<u32>, ) -> FdtWriterResult<()>895 fn create_pci_nodes(
896 fdt: &mut FdtWriter,
897 pci_device_info: &[PciSpaceInfo],
898 virtio_iommu_bdf: Option<u32>,
899 ) -> FdtWriterResult<()> {
900 // Add node for PCIe controller.
901 // See Documentation/devicetree/bindings/pci/host-generic-pci.txt in the kernel
902 // and https://elinux.org/Device_Tree_Usage.
903 // In multiple PCI segments setup, each PCI segment needs a PCI node.
904 for pci_device_info_elem in pci_device_info.iter() {
905 // EDK2 requires the PCIe high space above 4G address.
906 // The actual space in CLH follows the RAM. If the RAM space is small, the PCIe high space
907 // could fall below 4G.
908 // Here we cut off PCI device space below 8G in FDT to workaround the EDK2 check.
909 // But the address written in ACPI is not impacted.
910 let (pci_device_base_64bit, pci_device_size_64bit) =
911 if pci_device_info_elem.pci_device_space_start < PCI_HIGH_BASE.raw_value() {
912 (
913 PCI_HIGH_BASE.raw_value(),
914 pci_device_info_elem.pci_device_space_size
915 - (PCI_HIGH_BASE.raw_value() - pci_device_info_elem.pci_device_space_start),
916 )
917 } else {
918 (
919 pci_device_info_elem.pci_device_space_start,
920 pci_device_info_elem.pci_device_space_size,
921 )
922 };
923 // There is no specific requirement of the 32bit MMIO range, and
924 // therefore at least we can make these ranges 4K aligned.
925 let pci_device_size_32bit: u64 =
926 MEM_32BIT_DEVICES_SIZE / ((1 << 12) * pci_device_info.len() as u64) * (1 << 12);
927 let pci_device_base_32bit: u64 = MEM_32BIT_DEVICES_START.0
928 + pci_device_size_32bit * pci_device_info_elem.pci_segment_id as u64;
929
930 let ranges = [
931 // io addresses. Since AArch64 will not use IO address,
932 // we can set the same IO address range for every segment.
933 0x1000000,
934 0_u32,
935 0_u32,
936 (MEM_PCI_IO_START.0 >> 32) as u32,
937 MEM_PCI_IO_START.0 as u32,
938 (MEM_PCI_IO_SIZE >> 32) as u32,
939 MEM_PCI_IO_SIZE as u32,
940 // mmio addresses
941 0x2000000, // (ss = 10: 32-bit memory space)
942 (pci_device_base_32bit >> 32) as u32, // PCI address
943 pci_device_base_32bit as u32,
944 (pci_device_base_32bit >> 32) as u32, // CPU address
945 pci_device_base_32bit as u32,
946 (pci_device_size_32bit >> 32) as u32, // size
947 pci_device_size_32bit as u32,
948 // device addresses
949 0x3000000, // (ss = 11: 64-bit memory space)
950 (pci_device_base_64bit >> 32) as u32, // PCI address
951 pci_device_base_64bit as u32,
952 (pci_device_base_64bit >> 32) as u32, // CPU address
953 pci_device_base_64bit as u32,
954 (pci_device_size_64bit >> 32) as u32, // size
955 pci_device_size_64bit as u32,
956 ];
957 let bus_range = [0, 0]; // Only bus 0
958 let reg = [
959 pci_device_info_elem.mmio_config_address,
960 PCI_MMIO_CONFIG_SIZE_PER_SEGMENT,
961 ];
962 // See kernel document Documentation/devicetree/bindings/pci/pci-msi.txt
963 let msi_map = [
964 // rid-base: A single cell describing the first RID matched by the entry.
965 0x0,
966 // msi-controller: A single phandle to an MSI controller.
967 MSI_PHANDLE,
968 // msi-base: An msi-specifier describing the msi-specifier produced for the
969 // first RID matched by the entry.
970 (pci_device_info_elem.pci_segment_id as u32) << 8,
971 // length: A single cell describing how many consecutive RIDs are matched
972 // following the rid-base.
973 0x100,
974 ];
975
976 let pci_node_name = format!("pci@{:x}", pci_device_info_elem.mmio_config_address);
977 let pci_node = fdt.begin_node(&pci_node_name)?;
978
979 fdt.property_string("compatible", "pci-host-ecam-generic")?;
980 fdt.property_string("device_type", "pci")?;
981 fdt.property_array_u32("ranges", &ranges)?;
982 fdt.property_array_u32("bus-range", &bus_range)?;
983 fdt.property_u32(
984 "linux,pci-domain",
985 pci_device_info_elem.pci_segment_id as u32,
986 )?;
987 fdt.property_u32("#address-cells", 3)?;
988 fdt.property_u32("#size-cells", 2)?;
989 fdt.property_array_u64("reg", ®)?;
990 fdt.property_u32("#interrupt-cells", 1)?;
991 fdt.property_null("interrupt-map")?;
992 fdt.property_null("interrupt-map-mask")?;
993 fdt.property_null("dma-coherent")?;
994 fdt.property_array_u32("msi-map", &msi_map)?;
995 fdt.property_u32("msi-parent", MSI_PHANDLE)?;
996
997 if pci_device_info_elem.pci_segment_id == 0 {
998 if let Some(virtio_iommu_bdf) = virtio_iommu_bdf {
999 // See kernel document Documentation/devicetree/bindings/pci/pci-iommu.txt
1000 // for 'iommu-map' attribute setting.
1001 let iommu_map = [
1002 0_u32,
1003 VIRTIO_IOMMU_PHANDLE,
1004 0_u32,
1005 virtio_iommu_bdf,
1006 virtio_iommu_bdf + 1,
1007 VIRTIO_IOMMU_PHANDLE,
1008 virtio_iommu_bdf + 1,
1009 0xffff - virtio_iommu_bdf,
1010 ];
1011 fdt.property_array_u32("iommu-map", &iommu_map)?;
1012
1013 // See kernel document Documentation/devicetree/bindings/virtio/iommu.txt
1014 // for virtio-iommu node settings.
1015 let virtio_iommu_node_name = format!("virtio_iommu@{virtio_iommu_bdf:x}");
1016 let virtio_iommu_node = fdt.begin_node(&virtio_iommu_node_name)?;
1017 fdt.property_u32("#iommu-cells", 1)?;
1018 fdt.property_string("compatible", "virtio,pci-iommu")?;
1019
1020 // 'reg' is a five-cell address encoded as
1021 // (phys.hi phys.mid phys.lo size.hi size.lo). phys.hi should contain the
1022 // device's BDF as 0b00000000 bbbbbbbb dddddfff 00000000. The other cells
1023 // should be zero.
1024 let reg = [virtio_iommu_bdf << 8, 0_u32, 0_u32, 0_u32, 0_u32];
1025 fdt.property_array_u32("reg", ®)?;
1026 fdt.property_u32("phandle", VIRTIO_IOMMU_PHANDLE)?;
1027
1028 fdt.end_node(virtio_iommu_node)?;
1029 }
1030 }
1031
1032 fdt.end_node(pci_node)?;
1033 }
1034
1035 Ok(())
1036 }
1037
create_distance_map_node(fdt: &mut FdtWriter, numa_nodes: &NumaNodes) -> FdtWriterResult<()>1038 fn create_distance_map_node(fdt: &mut FdtWriter, numa_nodes: &NumaNodes) -> FdtWriterResult<()> {
1039 let distance_map_node = fdt.begin_node("distance-map")?;
1040 fdt.property_string("compatible", "numa-distance-map-v1")?;
1041 // Construct the distance matrix.
1042 // 1. We use the word entry to describe a distance from a node to
1043 // its destination, e.g. 0 -> 1 = 20 is described as <0 1 20>.
1044 // 2. Each entry represents distance from first node to second node.
1045 // The distances are equal in either direction.
1046 // 3. The distance from a node to self (local distance) is represented
1047 // with value 10 and all internode distance should be represented with
1048 // a value greater than 10.
1049 // 4. distance-matrix should have entries in lexicographical ascending
1050 // order of nodes.
1051 let mut distance_matrix = Vec::new();
1052 for numa_node_idx in 0..numa_nodes.len() {
1053 let numa_node = numa_nodes.get(&(numa_node_idx as u32));
1054 for dest_numa_node in 0..numa_node.unwrap().distances.len() + 1 {
1055 if numa_node_idx == dest_numa_node {
1056 distance_matrix.push(numa_node_idx as u32);
1057 distance_matrix.push(dest_numa_node as u32);
1058 distance_matrix.push(10_u32);
1059 continue;
1060 }
1061
1062 distance_matrix.push(numa_node_idx as u32);
1063 distance_matrix.push(dest_numa_node as u32);
1064 distance_matrix.push(
1065 *numa_node
1066 .unwrap()
1067 .distances
1068 .get(&(dest_numa_node as u32))
1069 .unwrap() as u32,
1070 );
1071 }
1072 }
1073 fdt.property_array_u32("distance-matrix", distance_matrix.as_ref())?;
1074 fdt.end_node(distance_map_node)?;
1075
1076 Ok(())
1077 }
1078
1079 // Parse the DTB binary and print for debugging
print_fdt(dtb: &[u8])1080 pub fn print_fdt(dtb: &[u8]) {
1081 match fdt_parser::Fdt::new(dtb) {
1082 Ok(fdt) => {
1083 if let Some(root) = fdt.find_node("/") {
1084 debug!("Printing the FDT:");
1085 print_node(root, 0);
1086 } else {
1087 debug!("Failed to find root node in FDT for debugging.");
1088 }
1089 }
1090 Err(_) => debug!("Failed to parse FDT for debugging."),
1091 }
1092 }
1093
print_node(node: fdt_parser::node::FdtNode<'_, '_>, n_spaces: usize)1094 fn print_node(node: fdt_parser::node::FdtNode<'_, '_>, n_spaces: usize) {
1095 debug!("{:indent$}{}/", "", node.name, indent = n_spaces);
1096 for property in node.properties() {
1097 let name = property.name;
1098
1099 // If the property is 'compatible', its value requires special handling.
1100 // The u8 array could contain multiple null-terminated strings.
1101 // We copy the original array and simply replace all 'null' characters with spaces.
1102 let value = if name == "compatible" {
1103 let mut compatible = vec![0u8; 256];
1104 let handled_value = property
1105 .value
1106 .iter()
1107 .map(|&c| if c == 0 { b' ' } else { c })
1108 .collect::<Vec<_>>();
1109 let len = cmp::min(255, handled_value.len());
1110 compatible[..len].copy_from_slice(&handled_value[..len]);
1111 compatible[..(len + 1)].to_vec()
1112 } else {
1113 property.value.to_vec()
1114 };
1115 let value = &value;
1116
1117 // Now the value can be either:
1118 // - A null-terminated C string, or
1119 // - Binary data
1120 // We follow a very simple logic to present the value:
1121 // - At first, try to convert it to CStr and print,
1122 // - If failed, print it as u32 array.
1123 let value_result = match CStr::from_bytes_with_nul(value) {
1124 Ok(value_cstr) => value_cstr.to_str().ok(),
1125 Err(_e) => None,
1126 };
1127
1128 if let Some(value_str) = value_result {
1129 debug!(
1130 "{:indent$}{} : {:#?}",
1131 "",
1132 name,
1133 value_str,
1134 indent = (n_spaces + 2)
1135 );
1136 } else {
1137 let mut array = Vec::with_capacity(256);
1138 array.resize(value.len() / 4, 0u32);
1139 BigEndian::read_u32_into(value, &mut array);
1140 debug!(
1141 "{:indent$}{} : {:X?}",
1142 "",
1143 name,
1144 array,
1145 indent = (n_spaces + 2)
1146 );
1147 };
1148 }
1149
1150 // Print children nodes if there is any
1151 for child in node.children() {
1152 print_node(child, n_spaces + 2);
1153 }
1154 }
1155