1 // Copyright 2020 Arm Limited (or its affiliates). All rights reserved. 2 // Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 // SPDX-License-Identifier: Apache-2.0 4 // 5 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. 6 // Use of this source code is governed by a BSD-style license that can be 7 // found in the THIRD-PARTY file. 8 9 use crate::{NumaNodes, PciSpaceInfo}; 10 use byteorder::{BigEndian, ByteOrder}; 11 use std::cmp; 12 use std::collections::HashMap; 13 use std::ffi::CStr; 14 use std::fmt::Debug; 15 use std::result; 16 use std::str; 17 18 use super::super::DeviceType; 19 use super::super::GuestMemoryMmap; 20 use super::super::InitramfsConfig; 21 use super::gic::GicDevice; 22 use super::layout::{ 23 IRQ_BASE, MEM_32BIT_DEVICES_SIZE, MEM_32BIT_DEVICES_START, MEM_PCI_IO_SIZE, MEM_PCI_IO_START, 24 PCI_HIGH_BASE, PCI_MMIO_CONFIG_SIZE_PER_SEGMENT, 25 }; 26 use vm_fdt::{FdtWriter, FdtWriterResult}; 27 use vm_memory::{Address, Bytes, GuestMemory, GuestMemoryError, GuestMemoryRegion}; 28 29 // This is a value for uniquely identifying the FDT node declaring the interrupt controller. 30 const GIC_PHANDLE: u32 = 1; 31 // This is a value for uniquely identifying the FDT node declaring the MSI controller. 32 const MSI_PHANDLE: u32 = 2; 33 // This is a value for uniquely identifying the FDT node containing the clock definition. 34 const CLOCK_PHANDLE: u32 = 3; 35 // This is a value for uniquely identifying the FDT node containing the gpio controller. 36 const GPIO_PHANDLE: u32 = 4; 37 // This is a value for virtio-iommu. Now only one virtio-iommu device is supported. 38 const VIRTIO_IOMMU_PHANDLE: u32 = 5; 39 // NOTE: Keep FIRST_VCPU_PHANDLE the last PHANDLE defined. 40 // This is a value for uniquely identifying the FDT node containing the first vCPU. 41 // The last number of vCPU phandle depends on the number of vCPUs. 42 const FIRST_VCPU_PHANDLE: u32 = 6; 43 44 // Read the documentation specified when appending the root node to the FDT. 45 const ADDRESS_CELLS: u32 = 0x2; 46 const SIZE_CELLS: u32 = 0x2; 47 48 // As per kvm tool and 49 // https://www.kernel.org/doc/Documentation/devicetree/bindings/interrupt-controller/arm%2Cgic.txt 50 // Look for "The 1st cell..." 51 const GIC_FDT_IRQ_TYPE_SPI: u32 = 0; 52 const GIC_FDT_IRQ_TYPE_PPI: u32 = 1; 53 const GIC_FDT_IRQ_PPI_CPU_SHIFT: u32 = 8; 54 const GIC_FDT_IRQ_PPI_CPU_MASK: u32 = 0xff << GIC_FDT_IRQ_PPI_CPU_SHIFT; 55 56 // From https://elixir.bootlin.com/linux/v4.9.62/source/include/dt-bindings/interrupt-controller/irq.h#L17 57 const IRQ_TYPE_EDGE_RISING: u32 = 1; 58 const IRQ_TYPE_LEVEL_HI: u32 = 4; 59 60 // PMU PPI interrupt number 61 pub const AARCH64_PMU_IRQ: u32 = 7; 62 63 // Keys and Buttons 64 // System Power Down 65 const KEY_POWER: u32 = 116; 66 67 /// Trait for devices to be added to the Flattened Device Tree. 68 pub trait DeviceInfoForFdt { 69 /// Returns the address where this device will be loaded. 70 fn addr(&self) -> u64; 71 /// Returns the associated interrupt for this device. 72 fn irq(&self) -> u32; 73 /// Returns the amount of memory that needs to be reserved for this device. 74 fn length(&self) -> u64; 75 } 76 77 /// Errors thrown while configuring the Flattened Device Tree for aarch64. 78 #[derive(Debug)] 79 pub enum Error { 80 /// Failure in writing FDT in memory. 81 WriteFdtToMemory(GuestMemoryError), 82 } 83 type Result<T> = result::Result<T, Error>; 84 85 /// Creates the flattened device tree for this aarch64 VM. 86 #[allow(clippy::too_many_arguments)] 87 pub fn create_fdt<T: DeviceInfoForFdt + Clone + Debug, S: ::std::hash::BuildHasher>( 88 guest_mem: &GuestMemoryMmap, 89 cmdline: &str, 90 vcpu_mpidr: Vec<u64>, 91 vcpu_topology: Option<(u8, u8, u8)>, 92 device_info: &HashMap<(DeviceType, String), T, S>, 93 gic_device: &dyn GicDevice, 94 initrd: &Option<InitramfsConfig>, 95 pci_space_info: &[PciSpaceInfo], 96 numa_nodes: &NumaNodes, 97 virtio_iommu_bdf: Option<u32>, 98 pmu_supported: bool, 99 ) -> FdtWriterResult<Vec<u8>> { 100 // Allocate stuff necessary for the holding the blob. 101 let mut fdt = FdtWriter::new().unwrap(); 102 103 // For an explanation why these nodes were introduced in the blob take a look at 104 // https://github.com/torvalds/linux/blob/master/Documentation/devicetree/booting-without-of.txt#L845 105 // Look for "Required nodes and properties". 106 107 // Header or the root node as per above mentioned documentation. 108 let root_node = fdt.begin_node("")?; 109 fdt.property_string("compatible", "linux,dummy-virt")?; 110 // For info on #address-cells and size-cells read "Note about cells and address representation" 111 // from the above mentioned txt file. 112 fdt.property_u32("#address-cells", ADDRESS_CELLS)?; 113 fdt.property_u32("#size-cells", SIZE_CELLS)?; 114 // This is not mandatory but we use it to point the root node to the node 115 // containing description of the interrupt controller for this VM. 116 fdt.property_u32("interrupt-parent", GIC_PHANDLE)?; 117 create_cpu_nodes(&mut fdt, &vcpu_mpidr, vcpu_topology, numa_nodes)?; 118 create_memory_node(&mut fdt, guest_mem, numa_nodes)?; 119 create_chosen_node(&mut fdt, cmdline, initrd)?; 120 create_gic_node(&mut fdt, gic_device)?; 121 create_timer_node(&mut fdt)?; 122 if pmu_supported { 123 create_pmu_node(&mut fdt, vcpu_mpidr.len())?; 124 } 125 create_clock_node(&mut fdt)?; 126 create_psci_node(&mut fdt)?; 127 create_devices_node(&mut fdt, device_info)?; 128 create_pci_nodes(&mut fdt, pci_space_info, virtio_iommu_bdf)?; 129 if numa_nodes.len() > 1 { 130 create_distance_map_node(&mut fdt, numa_nodes)?; 131 } 132 133 // End Header node. 134 fdt.end_node(root_node)?; 135 136 let fdt_final = fdt.finish()?; 137 138 Ok(fdt_final) 139 } 140 141 pub fn write_fdt_to_memory(fdt_final: Vec<u8>, guest_mem: &GuestMemoryMmap) -> Result<()> { 142 // Write FDT to memory. 143 guest_mem 144 .write_slice(fdt_final.as_slice(), super::layout::FDT_START) 145 .map_err(Error::WriteFdtToMemory)?; 146 Ok(()) 147 } 148 149 // Following are the auxiliary function for creating the different nodes that we append to our FDT. 150 fn create_cpu_nodes( 151 fdt: &mut FdtWriter, 152 vcpu_mpidr: &[u64], 153 vcpu_topology: Option<(u8, u8, u8)>, 154 numa_nodes: &NumaNodes, 155 ) -> FdtWriterResult<()> { 156 // See https://github.com/torvalds/linux/blob/master/Documentation/devicetree/bindings/arm/cpus.yaml. 157 let cpus_node = fdt.begin_node("cpus")?; 158 fdt.property_u32("#address-cells", 0x1)?; 159 fdt.property_u32("#size-cells", 0x0)?; 160 161 let num_cpus = vcpu_mpidr.len(); 162 163 for (cpu_id, mpidr) in vcpu_mpidr.iter().enumerate().take(num_cpus) { 164 let cpu_name = format!("cpu@{:x}", cpu_id); 165 let cpu_node = fdt.begin_node(&cpu_name)?; 166 fdt.property_string("device_type", "cpu")?; 167 fdt.property_string("compatible", "arm,arm-v8")?; 168 if num_cpus > 1 { 169 // This is required on armv8 64-bit. See aforementioned documentation. 170 fdt.property_string("enable-method", "psci")?; 171 } 172 // Set the field to first 24 bits of the MPIDR - Multiprocessor Affinity Register. 173 // See http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0488c/BABHBJCI.html. 174 fdt.property_u32("reg", (mpidr & 0x7FFFFF) as u32)?; 175 fdt.property_u32("phandle", cpu_id as u32 + FIRST_VCPU_PHANDLE)?; 176 177 // Add `numa-node-id` property if there is any numa config. 178 if numa_nodes.len() > 1 { 179 for numa_node_idx in 0..numa_nodes.len() { 180 let numa_node = numa_nodes.get(&(numa_node_idx as u32)); 181 if numa_node.unwrap().cpus.contains(&(cpu_id as u8)) { 182 fdt.property_u32("numa-node-id", numa_node_idx as u32)?; 183 } 184 } 185 } 186 187 fdt.end_node(cpu_node)?; 188 } 189 190 if let Some(topology) = vcpu_topology { 191 let (threads_per_core, cores_per_package, packages) = topology; 192 let cpu_map_node = fdt.begin_node("cpu-map")?; 193 194 // Create device tree nodes with regard of above mapping. 195 for cluster_idx in 0..packages { 196 let cluster_name = format!("cluster{:x}", cluster_idx); 197 let cluster_node = fdt.begin_node(&cluster_name)?; 198 199 for core_idx in 0..cores_per_package { 200 let core_name = format!("core{:x}", core_idx); 201 let core_node = fdt.begin_node(&core_name)?; 202 203 for thread_idx in 0..threads_per_core { 204 let thread_name = format!("thread{:x}", thread_idx); 205 let thread_node = fdt.begin_node(&thread_name)?; 206 let cpu_idx = threads_per_core * cores_per_package * cluster_idx 207 + threads_per_core * core_idx 208 + thread_idx; 209 fdt.property_u32("cpu", cpu_idx as u32 + FIRST_VCPU_PHANDLE)?; 210 fdt.end_node(thread_node)?; 211 } 212 213 fdt.end_node(core_node)?; 214 } 215 fdt.end_node(cluster_node)?; 216 } 217 fdt.end_node(cpu_map_node)?; 218 } else { 219 debug!("Boot using device tree, CPU topology is not (correctly) specified"); 220 } 221 222 fdt.end_node(cpus_node)?; 223 224 Ok(()) 225 } 226 227 fn create_memory_node( 228 fdt: &mut FdtWriter, 229 guest_mem: &GuestMemoryMmap, 230 numa_nodes: &NumaNodes, 231 ) -> FdtWriterResult<()> { 232 // See https://github.com/torvalds/linux/blob/58ae0b51506802713aa0e9956d1853ba4c722c98/Documentation/devicetree/bindings/numa.txt 233 // for NUMA setting in memory node. 234 if numa_nodes.len() > 1 { 235 for numa_node_idx in 0..numa_nodes.len() { 236 let numa_node = numa_nodes.get(&(numa_node_idx as u32)); 237 let mut mem_reg_prop: Vec<u64> = Vec::new(); 238 let mut node_memory_addr: u64 = 0; 239 // Each memory zone of numa will have its own memory node, but 240 // different numa nodes should not share same memory zones. 241 for memory_region in numa_node.unwrap().memory_regions.iter() { 242 let memory_region_start_addr: u64 = memory_region.start_addr().raw_value(); 243 let memory_region_size: u64 = memory_region.size() as u64; 244 // RAM at 0-4M is hidden to the guest for edk2 245 if memory_region_start_addr == 0 { 246 continue; 247 } 248 mem_reg_prop.push(memory_region_start_addr); 249 mem_reg_prop.push(memory_region_size); 250 // Set the node address the first non-zero regison address 251 if node_memory_addr == 0 { 252 node_memory_addr = memory_region_start_addr; 253 } 254 } 255 let memory_node_name = format!("memory@{:x}", node_memory_addr); 256 let memory_node = fdt.begin_node(&memory_node_name)?; 257 fdt.property_string("device_type", "memory")?; 258 fdt.property_array_u64("reg", &mem_reg_prop)?; 259 fdt.property_u32("numa-node-id", numa_node_idx as u32)?; 260 fdt.end_node(memory_node)?; 261 } 262 } else { 263 let last_addr = guest_mem.last_addr().raw_value(); 264 if last_addr < super::layout::MEM_32BIT_RESERVED_START.raw_value() { 265 // Case 1: all RAM is under the hole 266 let mem_size = last_addr - super::layout::RAM_START.raw_value() + 1; 267 let mem_reg_prop = [super::layout::RAM_START.raw_value() as u64, mem_size as u64]; 268 let memory_node = fdt.begin_node("memory")?; 269 fdt.property_string("device_type", "memory")?; 270 fdt.property_array_u64("reg", &mem_reg_prop)?; 271 fdt.end_node(memory_node)?; 272 } else { 273 // Case 2: RAM is split by the hole 274 // Region 1: RAM before the hole 275 let mem_size = super::layout::MEM_32BIT_RESERVED_START.raw_value() 276 - super::layout::RAM_START.raw_value(); 277 let mem_reg_prop = [super::layout::RAM_START.raw_value() as u64, mem_size as u64]; 278 let memory_node_name = format!("memory@{:x}", super::layout::RAM_START.raw_value()); 279 let memory_node = fdt.begin_node(&memory_node_name)?; 280 fdt.property_string("device_type", "memory")?; 281 fdt.property_array_u64("reg", &mem_reg_prop)?; 282 fdt.end_node(memory_node)?; 283 284 // Region 2: RAM after the hole 285 let mem_size = last_addr - super::layout::RAM_64BIT_START.raw_value() + 1; 286 let mem_reg_prop = [ 287 super::layout::RAM_64BIT_START.raw_value() as u64, 288 mem_size as u64, 289 ]; 290 let memory_node_name = 291 format!("memory@{:x}", super::layout::RAM_64BIT_START.raw_value()); 292 let memory_node = fdt.begin_node(&memory_node_name)?; 293 fdt.property_string("device_type", "memory")?; 294 fdt.property_array_u64("reg", &mem_reg_prop)?; 295 fdt.end_node(memory_node)?; 296 } 297 } 298 299 Ok(()) 300 } 301 302 fn create_chosen_node( 303 fdt: &mut FdtWriter, 304 cmdline: &str, 305 initrd: &Option<InitramfsConfig>, 306 ) -> FdtWriterResult<()> { 307 let chosen_node = fdt.begin_node("chosen")?; 308 fdt.property_string("bootargs", cmdline)?; 309 310 if let Some(initrd_config) = initrd { 311 let initrd_start = initrd_config.address.raw_value() as u64; 312 let initrd_end = initrd_config.address.raw_value() + initrd_config.size as u64; 313 fdt.property_u64("linux,initrd-start", initrd_start)?; 314 fdt.property_u64("linux,initrd-end", initrd_end)?; 315 } 316 317 fdt.end_node(chosen_node)?; 318 319 Ok(()) 320 } 321 322 fn create_gic_node(fdt: &mut FdtWriter, gic_device: &dyn GicDevice) -> FdtWriterResult<()> { 323 let gic_reg_prop = gic_device.device_properties(); 324 325 let intc_node = fdt.begin_node("intc")?; 326 327 fdt.property_string("compatible", gic_device.fdt_compatibility())?; 328 fdt.property_null("interrupt-controller")?; 329 // "interrupt-cells" field specifies the number of cells needed to encode an 330 // interrupt source. The type shall be a <u32> and the value shall be 3 if no PPI affinity description 331 // is required. 332 fdt.property_u32("#interrupt-cells", 3)?; 333 fdt.property_array_u64("reg", gic_reg_prop)?; 334 fdt.property_u32("phandle", GIC_PHANDLE)?; 335 fdt.property_u32("#address-cells", 2)?; 336 fdt.property_u32("#size-cells", 2)?; 337 fdt.property_null("ranges")?; 338 339 let gic_intr_prop = [ 340 GIC_FDT_IRQ_TYPE_PPI, 341 gic_device.fdt_maint_irq(), 342 IRQ_TYPE_LEVEL_HI, 343 ]; 344 fdt.property_array_u32("interrupts", &gic_intr_prop)?; 345 346 if gic_device.msi_compatible() { 347 let msic_node = fdt.begin_node("msic")?; 348 fdt.property_string("compatible", gic_device.msi_compatibility())?; 349 fdt.property_null("msi-controller")?; 350 fdt.property_u32("phandle", MSI_PHANDLE)?; 351 let msi_reg_prop = gic_device.msi_properties(); 352 fdt.property_array_u64("reg", msi_reg_prop)?; 353 fdt.end_node(msic_node)?; 354 } 355 356 fdt.end_node(intc_node)?; 357 358 Ok(()) 359 } 360 361 fn create_clock_node(fdt: &mut FdtWriter) -> FdtWriterResult<()> { 362 // The Advanced Peripheral Bus (APB) is part of the Advanced Microcontroller Bus Architecture 363 // (AMBA) protocol family. It defines a low-cost interface that is optimized for minimal power 364 // consumption and reduced interface complexity. 365 // PCLK is the clock source and this node defines exactly the clock for the APB. 366 let clock_node = fdt.begin_node("apb-pclk")?; 367 fdt.property_string("compatible", "fixed-clock")?; 368 fdt.property_u32("#clock-cells", 0x0)?; 369 fdt.property_u32("clock-frequency", 24000000)?; 370 fdt.property_string("clock-output-names", "clk24mhz")?; 371 fdt.property_u32("phandle", CLOCK_PHANDLE)?; 372 fdt.end_node(clock_node)?; 373 374 Ok(()) 375 } 376 377 fn create_timer_node(fdt: &mut FdtWriter) -> FdtWriterResult<()> { 378 // See 379 // https://github.com/torvalds/linux/blob/master/Documentation/devicetree/bindings/interrupt-controller/arch_timer.txt 380 // These are fixed interrupt numbers for the timer device. 381 let irqs = [13, 14, 11, 10]; 382 let compatible = "arm,armv8-timer"; 383 384 let mut timer_reg_cells: Vec<u32> = Vec::new(); 385 for &irq in irqs.iter() { 386 timer_reg_cells.push(GIC_FDT_IRQ_TYPE_PPI); 387 timer_reg_cells.push(irq); 388 timer_reg_cells.push(IRQ_TYPE_LEVEL_HI); 389 } 390 391 let timer_node = fdt.begin_node("timer")?; 392 fdt.property_string("compatible", compatible)?; 393 fdt.property_null("always-on")?; 394 fdt.property_array_u32("interrupts", &timer_reg_cells)?; 395 fdt.end_node(timer_node)?; 396 397 Ok(()) 398 } 399 400 fn create_psci_node(fdt: &mut FdtWriter) -> FdtWriterResult<()> { 401 let compatible = "arm,psci-0.2"; 402 let psci_node = fdt.begin_node("psci")?; 403 fdt.property_string("compatible", compatible)?; 404 // Two methods available: hvc and smc. 405 // As per documentation, PSCI calls between a guest and hypervisor may use the HVC conduit instead of SMC. 406 // So, since we are using kvm, we need to use hvc. 407 fdt.property_string("method", "hvc")?; 408 fdt.end_node(psci_node)?; 409 410 Ok(()) 411 } 412 413 fn create_virtio_node<T: DeviceInfoForFdt + Clone + Debug>( 414 fdt: &mut FdtWriter, 415 dev_info: &T, 416 ) -> FdtWriterResult<()> { 417 let device_reg_prop = [dev_info.addr(), dev_info.length()]; 418 let irq = [GIC_FDT_IRQ_TYPE_SPI, dev_info.irq(), IRQ_TYPE_EDGE_RISING]; 419 420 let virtio_node = fdt.begin_node(&format!("virtio_mmio@{:x}", dev_info.addr()))?; 421 fdt.property_string("compatible", "virtio,mmio")?; 422 fdt.property_array_u64("reg", &device_reg_prop)?; 423 fdt.property_array_u32("interrupts", &irq)?; 424 fdt.property_u32("interrupt-parent", GIC_PHANDLE)?; 425 fdt.end_node(virtio_node)?; 426 427 Ok(()) 428 } 429 430 fn create_serial_node<T: DeviceInfoForFdt + Clone + Debug>( 431 fdt: &mut FdtWriter, 432 dev_info: &T, 433 ) -> FdtWriterResult<()> { 434 let compatible = b"arm,pl011\0arm,primecell\0"; 435 let serial_reg_prop = [dev_info.addr(), dev_info.length()]; 436 let irq = [ 437 GIC_FDT_IRQ_TYPE_SPI, 438 dev_info.irq() - IRQ_BASE, 439 IRQ_TYPE_EDGE_RISING, 440 ]; 441 442 let serial_node = fdt.begin_node(&format!("pl011@{:x}", dev_info.addr()))?; 443 fdt.property("compatible", compatible)?; 444 fdt.property_array_u64("reg", &serial_reg_prop)?; 445 fdt.property_u32("clocks", CLOCK_PHANDLE)?; 446 fdt.property_string("clock-names", "apb_pclk")?; 447 fdt.property_array_u32("interrupts", &irq)?; 448 fdt.end_node(serial_node)?; 449 450 Ok(()) 451 } 452 453 fn create_rtc_node<T: DeviceInfoForFdt + Clone + Debug>( 454 fdt: &mut FdtWriter, 455 dev_info: &T, 456 ) -> FdtWriterResult<()> { 457 let compatible = b"arm,pl031\0arm,primecell\0"; 458 let rtc_reg_prop = [dev_info.addr(), dev_info.length()]; 459 let irq = [ 460 GIC_FDT_IRQ_TYPE_SPI, 461 dev_info.irq() - IRQ_BASE, 462 IRQ_TYPE_LEVEL_HI, 463 ]; 464 465 let rtc_node = fdt.begin_node(&format!("rtc@{:x}", dev_info.addr()))?; 466 fdt.property("compatible", compatible)?; 467 fdt.property_array_u64("reg", &rtc_reg_prop)?; 468 fdt.property_array_u32("interrupts", &irq)?; 469 fdt.property_u32("clocks", CLOCK_PHANDLE)?; 470 fdt.property_string("clock-names", "apb_pclk")?; 471 fdt.end_node(rtc_node)?; 472 473 Ok(()) 474 } 475 476 fn create_gpio_node<T: DeviceInfoForFdt + Clone + Debug>( 477 fdt: &mut FdtWriter, 478 dev_info: &T, 479 ) -> FdtWriterResult<()> { 480 // PL061 GPIO controller node 481 let compatible = b"arm,pl061\0arm,primecell\0"; 482 let gpio_reg_prop = [dev_info.addr(), dev_info.length()]; 483 let irq = [ 484 GIC_FDT_IRQ_TYPE_SPI, 485 dev_info.irq() - IRQ_BASE, 486 IRQ_TYPE_EDGE_RISING, 487 ]; 488 489 let gpio_node = fdt.begin_node(&format!("pl061@{:x}", dev_info.addr()))?; 490 fdt.property("compatible", compatible)?; 491 fdt.property_array_u64("reg", &gpio_reg_prop)?; 492 fdt.property_array_u32("interrupts", &irq)?; 493 fdt.property_null("gpio-controller")?; 494 fdt.property_u32("#gpio-cells", 2)?; 495 fdt.property_u32("clocks", CLOCK_PHANDLE)?; 496 fdt.property_string("clock-names", "apb_pclk")?; 497 fdt.property_u32("phandle", GPIO_PHANDLE)?; 498 fdt.end_node(gpio_node)?; 499 500 // gpio-keys node 501 let gpio_keys_node = fdt.begin_node("gpio-keys")?; 502 fdt.property_string("compatible", "gpio-keys")?; 503 fdt.property_u32("#size-cells", 0)?; 504 fdt.property_u32("#address-cells", 1)?; 505 let gpio_keys_poweroff_node = fdt.begin_node("button@1")?; 506 fdt.property_string("label", "GPIO Key Poweroff")?; 507 fdt.property_u32("linux,code", KEY_POWER)?; 508 let gpios = [GPIO_PHANDLE, 3, 0]; 509 fdt.property_array_u32("gpios", &gpios)?; 510 fdt.end_node(gpio_keys_poweroff_node)?; 511 fdt.end_node(gpio_keys_node)?; 512 513 Ok(()) 514 } 515 516 fn create_devices_node<T: DeviceInfoForFdt + Clone + Debug, S: ::std::hash::BuildHasher>( 517 fdt: &mut FdtWriter, 518 dev_info: &HashMap<(DeviceType, String), T, S>, 519 ) -> FdtWriterResult<()> { 520 // Create one temp Vec to store all virtio devices 521 let mut ordered_virtio_device: Vec<&T> = Vec::new(); 522 523 for ((device_type, _device_id), info) in dev_info { 524 match device_type { 525 DeviceType::Gpio => create_gpio_node(fdt, info)?, 526 DeviceType::Rtc => create_rtc_node(fdt, info)?, 527 DeviceType::Serial => create_serial_node(fdt, info)?, 528 DeviceType::Virtio(_) => { 529 ordered_virtio_device.push(info); 530 } 531 } 532 } 533 534 // Sort out virtio devices by address from low to high and insert them into fdt table. 535 ordered_virtio_device.sort_by_key(|&a| a.addr()); 536 // Current address allocation strategy in cloud-hypervisor is: the first created device 537 // will be allocated to higher address. Here we reverse the vector to make sure that 538 // the older created device will appear in front of the newer created device in FDT. 539 ordered_virtio_device.reverse(); 540 for ordered_device_info in ordered_virtio_device.drain(..) { 541 create_virtio_node(fdt, ordered_device_info)?; 542 } 543 544 Ok(()) 545 } 546 547 fn create_pmu_node(fdt: &mut FdtWriter, cpu_nums: usize) -> FdtWriterResult<()> { 548 let num_cpus = cpu_nums as u64 as u32; 549 let compatible = "arm,armv8-pmuv3"; 550 let cpu_mask: u32 = 551 (((1 << num_cpus) - 1) << GIC_FDT_IRQ_PPI_CPU_SHIFT) & GIC_FDT_IRQ_PPI_CPU_MASK; 552 let irq = [ 553 GIC_FDT_IRQ_TYPE_PPI, 554 AARCH64_PMU_IRQ, 555 cpu_mask | IRQ_TYPE_LEVEL_HI, 556 ]; 557 558 let pmu_node = fdt.begin_node("pmu")?; 559 fdt.property_string("compatible", compatible)?; 560 fdt.property_array_u32("interrupts", &irq)?; 561 fdt.end_node(pmu_node)?; 562 Ok(()) 563 } 564 565 fn create_pci_nodes( 566 fdt: &mut FdtWriter, 567 pci_device_info: &[PciSpaceInfo], 568 virtio_iommu_bdf: Option<u32>, 569 ) -> FdtWriterResult<()> { 570 // Add node for PCIe controller. 571 // See Documentation/devicetree/bindings/pci/host-generic-pci.txt in the kernel 572 // and https://elinux.org/Device_Tree_Usage. 573 // In multiple PCI segments setup, each PCI segment needs a PCI node. 574 for pci_device_info_elem in pci_device_info.iter() { 575 // EDK2 requires the PCIe high space above 4G address. 576 // The actual space in CLH follows the RAM. If the RAM space is small, the PCIe high space 577 // could fall bellow 4G. 578 // Here we cut off PCI device space below 8G in FDT to workaround the EDK2 check. 579 // But the address written in ACPI is not impacted. 580 let (pci_device_base_64bit, pci_device_size_64bit) = 581 if pci_device_info_elem.pci_device_space_start < PCI_HIGH_BASE.raw_value() { 582 ( 583 PCI_HIGH_BASE.raw_value(), 584 pci_device_info_elem.pci_device_space_size 585 - (PCI_HIGH_BASE.raw_value() - pci_device_info_elem.pci_device_space_start), 586 ) 587 } else { 588 ( 589 pci_device_info_elem.pci_device_space_start, 590 pci_device_info_elem.pci_device_space_size, 591 ) 592 }; 593 // There is no specific requirement of the 32bit MMIO range, and 594 // therefore at least we can make these ranges 4K aligned. 595 let pci_device_size_32bit: u64 = 596 MEM_32BIT_DEVICES_SIZE / ((1 << 12) * pci_device_info.len() as u64) * (1 << 12); 597 let pci_device_base_32bit: u64 = MEM_32BIT_DEVICES_START.0 598 + pci_device_size_32bit * pci_device_info_elem.pci_segment_id as u64; 599 600 let ranges = [ 601 // io addresses. Since AArch64 will not use IO address, 602 // we can set the same IO address range for every segment. 603 0x1000000, 604 0_u32, 605 0_u32, 606 (MEM_PCI_IO_START.0 >> 32) as u32, 607 MEM_PCI_IO_START.0 as u32, 608 (MEM_PCI_IO_SIZE >> 32) as u32, 609 MEM_PCI_IO_SIZE as u32, 610 // mmio addresses 611 0x2000000, // (ss = 10: 32-bit memory space) 612 (pci_device_base_32bit >> 32) as u32, // PCI address 613 pci_device_base_32bit as u32, 614 (pci_device_base_32bit >> 32) as u32, // CPU address 615 pci_device_base_32bit as u32, 616 (pci_device_size_32bit >> 32) as u32, // size 617 pci_device_size_32bit as u32, 618 // device addresses 619 0x3000000, // (ss = 11: 64-bit memory space) 620 (pci_device_base_64bit >> 32) as u32, // PCI address 621 pci_device_base_64bit as u32, 622 (pci_device_base_64bit >> 32) as u32, // CPU address 623 pci_device_base_64bit as u32, 624 (pci_device_size_64bit >> 32) as u32, // size 625 pci_device_size_64bit as u32, 626 ]; 627 let bus_range = [0, 0]; // Only bus 0 628 let reg = [ 629 pci_device_info_elem.mmio_config_address, 630 PCI_MMIO_CONFIG_SIZE_PER_SEGMENT, 631 ]; 632 // See kernel document Documentation/devicetree/bindings/pci/pci-msi.txt 633 let msi_map = [ 634 // rid-base: A single cell describing the first RID matched by the entry. 635 0x0, 636 // msi-controller: A single phandle to an MSI controller. 637 MSI_PHANDLE, 638 // msi-base: An msi-specifier describing the msi-specifier produced for the 639 // first RID matched by the entry. 640 (pci_device_info_elem.pci_segment_id as u32) << 8, 641 // length: A single cell describing how many consecutive RIDs are matched 642 // following the rid-base. 643 0x100, 644 ]; 645 646 let pci_node_name = format!("pci@{:x}", pci_device_info_elem.mmio_config_address); 647 let pci_node = fdt.begin_node(&pci_node_name)?; 648 649 fdt.property_string("compatible", "pci-host-ecam-generic")?; 650 fdt.property_string("device_type", "pci")?; 651 fdt.property_array_u32("ranges", &ranges)?; 652 fdt.property_array_u32("bus-range", &bus_range)?; 653 fdt.property_u32( 654 "linux,pci-domain", 655 pci_device_info_elem.pci_segment_id as u32, 656 )?; 657 fdt.property_u32("#address-cells", 3)?; 658 fdt.property_u32("#size-cells", 2)?; 659 fdt.property_array_u64("reg", ®)?; 660 fdt.property_u32("#interrupt-cells", 1)?; 661 fdt.property_null("interrupt-map")?; 662 fdt.property_null("interrupt-map-mask")?; 663 fdt.property_null("dma-coherent")?; 664 fdt.property_array_u32("msi-map", &msi_map)?; 665 fdt.property_u32("msi-parent", MSI_PHANDLE)?; 666 667 if pci_device_info_elem.pci_segment_id == 0 { 668 if let Some(virtio_iommu_bdf) = virtio_iommu_bdf { 669 // See kernel document Documentation/devicetree/bindings/pci/pci-iommu.txt 670 // for 'iommu-map' attribute setting. 671 let iommu_map = [ 672 0_u32, 673 VIRTIO_IOMMU_PHANDLE, 674 0_u32, 675 virtio_iommu_bdf, 676 virtio_iommu_bdf + 1, 677 VIRTIO_IOMMU_PHANDLE, 678 virtio_iommu_bdf + 1, 679 0xffff - virtio_iommu_bdf, 680 ]; 681 fdt.property_array_u32("iommu-map", &iommu_map)?; 682 683 // See kernel document Documentation/devicetree/bindings/virtio/iommu.txt 684 // for virtio-iommu node settings. 685 let virtio_iommu_node_name = format!("virtio_iommu@{:x}", virtio_iommu_bdf); 686 let virtio_iommu_node = fdt.begin_node(&virtio_iommu_node_name)?; 687 fdt.property_u32("#iommu-cells", 1)?; 688 fdt.property_string("compatible", "virtio,pci-iommu")?; 689 690 // 'reg' is a five-cell address encoded as 691 // (phys.hi phys.mid phys.lo size.hi size.lo). phys.hi should contain the 692 // device's BDF as 0b00000000 bbbbbbbb dddddfff 00000000. The other cells 693 // should be zero. 694 let reg = [virtio_iommu_bdf << 8, 0_u32, 0_u32, 0_u32, 0_u32]; 695 fdt.property_array_u32("reg", ®)?; 696 fdt.property_u32("phandle", VIRTIO_IOMMU_PHANDLE)?; 697 698 fdt.end_node(virtio_iommu_node)?; 699 } 700 } 701 702 fdt.end_node(pci_node)?; 703 } 704 705 Ok(()) 706 } 707 708 fn create_distance_map_node(fdt: &mut FdtWriter, numa_nodes: &NumaNodes) -> FdtWriterResult<()> { 709 let distance_map_node = fdt.begin_node("distance-map")?; 710 fdt.property_string("compatible", "numa-distance-map-v1")?; 711 // Construct the distance matrix. 712 // 1. We use the word entry to describe a distance from a node to 713 // its destination, e.g. 0 -> 1 = 20 is described as <0 1 20>. 714 // 2. Each entry represents distance from first node to second node. 715 // The distances are equal in either direction. 716 // 3. The distance from a node to self (local distance) is represented 717 // with value 10 and all internode distance should be represented with 718 // a value greater than 10. 719 // 4. distance-matrix should have entries in lexicographical ascending 720 // order of nodes. 721 let mut distance_matrix = Vec::new(); 722 for numa_node_idx in 0..numa_nodes.len() { 723 let numa_node = numa_nodes.get(&(numa_node_idx as u32)); 724 for dest_numa_node in 0..numa_node.unwrap().distances.len() + 1 { 725 if numa_node_idx == dest_numa_node { 726 distance_matrix.push(numa_node_idx as u32); 727 distance_matrix.push(dest_numa_node as u32); 728 distance_matrix.push(10_u32); 729 continue; 730 } 731 732 distance_matrix.push(numa_node_idx as u32); 733 distance_matrix.push(dest_numa_node as u32); 734 distance_matrix.push( 735 *numa_node 736 .unwrap() 737 .distances 738 .get(&(dest_numa_node as u32)) 739 .unwrap() as u32, 740 ); 741 } 742 } 743 fdt.property_array_u32("distance-matrix", distance_matrix.as_ref())?; 744 fdt.end_node(distance_map_node)?; 745 746 Ok(()) 747 } 748 749 // Parse the DTB binary and print for debugging 750 pub fn print_fdt(dtb: &[u8]) { 751 match fdt_parser::Fdt::new(dtb) { 752 Ok(fdt) => { 753 if let Some(root) = fdt.find_node("/") { 754 debug!("Printing the FDT:"); 755 print_node(root, 0); 756 } else { 757 debug!("Failed to find root node in FDT for debugging."); 758 } 759 } 760 Err(_) => debug!("Failed to parse FDT for debugging."), 761 } 762 } 763 764 fn print_node(node: fdt_parser::node::FdtNode<'_, '_>, n_spaces: usize) { 765 debug!("{:indent$}{}/", "", node.name, indent = n_spaces); 766 for property in node.properties() { 767 let name = property.name; 768 769 // If the property is 'compatible', its value requires special handling. 770 // The u8 array could contain multiple null-terminated strings. 771 // We copy the original array and simply replace all 'null' characters with spaces. 772 let value = if name == "compatible" { 773 let mut compatible = vec![0u8; 256]; 774 let handled_value = property 775 .value 776 .iter() 777 .map(|&c| if c == 0 { b' ' } else { c }) 778 .collect::<Vec<_>>(); 779 let len = cmp::min(255, handled_value.len()); 780 compatible[..len].copy_from_slice(&handled_value[..len]); 781 compatible[..(len + 1)].to_vec() 782 } else { 783 property.value.to_vec() 784 }; 785 let value = &value; 786 787 // Now the value can be either: 788 // - A null-terminated C string, or 789 // - Binary data 790 // We follow a very simple logic to present the value: 791 // - At first, try to convert it to CStr and print, 792 // - If failed, print it as u32 array. 793 let value_result = match CStr::from_bytes_with_nul(value) { 794 Ok(value_cstr) => match value_cstr.to_str() { 795 Ok(value_str) => Some(value_str), 796 Err(_e) => None, 797 }, 798 Err(_e) => None, 799 }; 800 801 if let Some(value_str) = value_result { 802 debug!( 803 "{:indent$}{} : {:#?}", 804 "", 805 name, 806 value_str, 807 indent = (n_spaces + 2) 808 ); 809 } else { 810 let mut array = Vec::with_capacity(256); 811 array.resize(value.len() / 4, 0u32); 812 BigEndian::read_u32_into(value, &mut array); 813 debug!( 814 "{:indent$}{} : {:X?}", 815 "", 816 name, 817 array, 818 indent = (n_spaces + 2) 819 ); 820 }; 821 } 822 823 // Print children nodes if there is any 824 for child in node.children() { 825 print_node(child, n_spaces + 2); 826 } 827 } 828