1 // Copyright 2020 Arm Limited (or its affiliates). All rights reserved. 2 // Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 // SPDX-License-Identifier: Apache-2.0 4 // 5 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. 6 // Use of this source code is governed by a BSD-style license that can be 7 // found in the THIRD-PARTY file. 8 9 use crate::{NumaNodes, PciSpaceInfo}; 10 use byteorder::{BigEndian, ByteOrder}; 11 use hypervisor::arch::aarch64::gic::Vgic; 12 use std::cmp; 13 use std::collections::HashMap; 14 use std::ffi::CStr; 15 use std::fmt::Debug; 16 use std::result; 17 use std::str; 18 use std::sync::{Arc, Mutex}; 19 20 use super::super::DeviceType; 21 use super::super::GuestMemoryMmap; 22 use super::super::InitramfsConfig; 23 use super::layout::{ 24 IRQ_BASE, MEM_32BIT_DEVICES_SIZE, MEM_32BIT_DEVICES_START, MEM_PCI_IO_SIZE, MEM_PCI_IO_START, 25 PCI_HIGH_BASE, PCI_MMIO_CONFIG_SIZE_PER_SEGMENT, 26 }; 27 use vm_fdt::{FdtWriter, FdtWriterResult}; 28 use vm_memory::{Address, Bytes, GuestMemory, GuestMemoryError, GuestMemoryRegion}; 29 30 // This is a value for uniquely identifying the FDT node declaring the interrupt controller. 31 const GIC_PHANDLE: u32 = 1; 32 // This is a value for uniquely identifying the FDT node declaring the MSI controller. 33 const MSI_PHANDLE: u32 = 2; 34 // This is a value for uniquely identifying the FDT node containing the clock definition. 35 const CLOCK_PHANDLE: u32 = 3; 36 // This is a value for uniquely identifying the FDT node containing the gpio controller. 37 const GPIO_PHANDLE: u32 = 4; 38 // This is a value for virtio-iommu. Now only one virtio-iommu device is supported. 39 const VIRTIO_IOMMU_PHANDLE: u32 = 5; 40 // NOTE: Keep FIRST_VCPU_PHANDLE the last PHANDLE defined. 41 // This is a value for uniquely identifying the FDT node containing the first vCPU. 42 // The last number of vCPU phandle depends on the number of vCPUs. 43 const FIRST_VCPU_PHANDLE: u32 = 6; 44 45 // Read the documentation specified when appending the root node to the FDT. 46 const ADDRESS_CELLS: u32 = 0x2; 47 const SIZE_CELLS: u32 = 0x2; 48 49 // As per kvm tool and 50 // https://www.kernel.org/doc/Documentation/devicetree/bindings/interrupt-controller/arm%2Cgic.txt 51 // Look for "The 1st cell..." 52 const GIC_FDT_IRQ_TYPE_SPI: u32 = 0; 53 const GIC_FDT_IRQ_TYPE_PPI: u32 = 1; 54 55 // From https://elixir.bootlin.com/linux/v4.9.62/source/include/dt-bindings/interrupt-controller/irq.h#L17 56 const IRQ_TYPE_EDGE_RISING: u32 = 1; 57 const IRQ_TYPE_LEVEL_HI: u32 = 4; 58 59 // PMU PPI interrupt number 60 pub const AARCH64_PMU_IRQ: u32 = 7; 61 62 // Keys and Buttons 63 // System Power Down 64 const KEY_POWER: u32 = 116; 65 66 /// Trait for devices to be added to the Flattened Device Tree. 67 pub trait DeviceInfoForFdt { 68 /// Returns the address where this device will be loaded. 69 fn addr(&self) -> u64; 70 /// Returns the associated interrupt for this device. 71 fn irq(&self) -> u32; 72 /// Returns the amount of memory that needs to be reserved for this device. 73 fn length(&self) -> u64; 74 } 75 76 /// Errors thrown while configuring the Flattened Device Tree for aarch64. 77 #[derive(Debug)] 78 pub enum Error { 79 /// Failure in writing FDT in memory. 80 WriteFdtToMemory(GuestMemoryError), 81 } 82 type Result<T> = result::Result<T, Error>; 83 84 /// Creates the flattened device tree for this aarch64 VM. 85 #[allow(clippy::too_many_arguments)] 86 pub fn create_fdt<T: DeviceInfoForFdt + Clone + Debug, S: ::std::hash::BuildHasher>( 87 guest_mem: &GuestMemoryMmap, 88 cmdline: &str, 89 vcpu_mpidr: Vec<u64>, 90 vcpu_topology: Option<(u8, u8, u8)>, 91 device_info: &HashMap<(DeviceType, String), T, S>, 92 gic_device: &Arc<Mutex<dyn Vgic>>, 93 initrd: &Option<InitramfsConfig>, 94 pci_space_info: &[PciSpaceInfo], 95 numa_nodes: &NumaNodes, 96 virtio_iommu_bdf: Option<u32>, 97 pmu_supported: bool, 98 ) -> FdtWriterResult<Vec<u8>> { 99 // Allocate stuff necessary for the holding the blob. 100 let mut fdt = FdtWriter::new().unwrap(); 101 102 // For an explanation why these nodes were introduced in the blob take a look at 103 // https://github.com/torvalds/linux/blob/master/Documentation/devicetree/booting-without-of.txt#L845 104 // Look for "Required nodes and properties". 105 106 // Header or the root node as per above mentioned documentation. 107 let root_node = fdt.begin_node("")?; 108 fdt.property_string("compatible", "linux,dummy-virt")?; 109 // For info on #address-cells and size-cells read "Note about cells and address representation" 110 // from the above mentioned txt file. 111 fdt.property_u32("#address-cells", ADDRESS_CELLS)?; 112 fdt.property_u32("#size-cells", SIZE_CELLS)?; 113 // This is not mandatory but we use it to point the root node to the node 114 // containing description of the interrupt controller for this VM. 115 fdt.property_u32("interrupt-parent", GIC_PHANDLE)?; 116 create_cpu_nodes(&mut fdt, &vcpu_mpidr, vcpu_topology, numa_nodes)?; 117 create_memory_node(&mut fdt, guest_mem, numa_nodes)?; 118 create_chosen_node(&mut fdt, cmdline, initrd)?; 119 create_gic_node(&mut fdt, gic_device)?; 120 create_timer_node(&mut fdt)?; 121 if pmu_supported { 122 create_pmu_node(&mut fdt)?; 123 } 124 create_clock_node(&mut fdt)?; 125 create_psci_node(&mut fdt)?; 126 create_devices_node(&mut fdt, device_info)?; 127 create_pci_nodes(&mut fdt, pci_space_info, virtio_iommu_bdf)?; 128 if numa_nodes.len() > 1 { 129 create_distance_map_node(&mut fdt, numa_nodes)?; 130 } 131 132 // End Header node. 133 fdt.end_node(root_node)?; 134 135 let fdt_final = fdt.finish()?; 136 137 Ok(fdt_final) 138 } 139 140 pub fn write_fdt_to_memory(fdt_final: Vec<u8>, guest_mem: &GuestMemoryMmap) -> Result<()> { 141 // Write FDT to memory. 142 guest_mem 143 .write_slice(fdt_final.as_slice(), super::layout::FDT_START) 144 .map_err(Error::WriteFdtToMemory)?; 145 Ok(()) 146 } 147 148 // Following are the auxiliary function for creating the different nodes that we append to our FDT. 149 fn create_cpu_nodes( 150 fdt: &mut FdtWriter, 151 vcpu_mpidr: &[u64], 152 vcpu_topology: Option<(u8, u8, u8)>, 153 numa_nodes: &NumaNodes, 154 ) -> FdtWriterResult<()> { 155 // See https://github.com/torvalds/linux/blob/master/Documentation/devicetree/bindings/arm/cpus.yaml. 156 let cpus_node = fdt.begin_node("cpus")?; 157 fdt.property_u32("#address-cells", 0x1)?; 158 fdt.property_u32("#size-cells", 0x0)?; 159 160 let num_cpus = vcpu_mpidr.len(); 161 162 for (cpu_id, mpidr) in vcpu_mpidr.iter().enumerate().take(num_cpus) { 163 let cpu_name = format!("cpu@{:x}", cpu_id); 164 let cpu_node = fdt.begin_node(&cpu_name)?; 165 fdt.property_string("device_type", "cpu")?; 166 fdt.property_string("compatible", "arm,arm-v8")?; 167 if num_cpus > 1 { 168 // This is required on armv8 64-bit. See aforementioned documentation. 169 fdt.property_string("enable-method", "psci")?; 170 } 171 // Set the field to first 24 bits of the MPIDR - Multiprocessor Affinity Register. 172 // See http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0488c/BABHBJCI.html. 173 fdt.property_u32("reg", (mpidr & 0x7FFFFF) as u32)?; 174 fdt.property_u32("phandle", cpu_id as u32 + FIRST_VCPU_PHANDLE)?; 175 176 // Add `numa-node-id` property if there is any numa config. 177 if numa_nodes.len() > 1 { 178 for numa_node_idx in 0..numa_nodes.len() { 179 let numa_node = numa_nodes.get(&(numa_node_idx as u32)); 180 if numa_node.unwrap().cpus.contains(&(cpu_id as u8)) { 181 fdt.property_u32("numa-node-id", numa_node_idx as u32)?; 182 } 183 } 184 } 185 186 fdt.end_node(cpu_node)?; 187 } 188 189 if let Some(topology) = vcpu_topology { 190 let (threads_per_core, cores_per_package, packages) = topology; 191 let cpu_map_node = fdt.begin_node("cpu-map")?; 192 193 // Create device tree nodes with regard of above mapping. 194 for cluster_idx in 0..packages { 195 let cluster_name = format!("cluster{:x}", cluster_idx); 196 let cluster_node = fdt.begin_node(&cluster_name)?; 197 198 for core_idx in 0..cores_per_package { 199 let core_name = format!("core{:x}", core_idx); 200 let core_node = fdt.begin_node(&core_name)?; 201 202 for thread_idx in 0..threads_per_core { 203 let thread_name = format!("thread{:x}", thread_idx); 204 let thread_node = fdt.begin_node(&thread_name)?; 205 let cpu_idx = threads_per_core * cores_per_package * cluster_idx 206 + threads_per_core * core_idx 207 + thread_idx; 208 fdt.property_u32("cpu", cpu_idx as u32 + FIRST_VCPU_PHANDLE)?; 209 fdt.end_node(thread_node)?; 210 } 211 212 fdt.end_node(core_node)?; 213 } 214 fdt.end_node(cluster_node)?; 215 } 216 fdt.end_node(cpu_map_node)?; 217 } else { 218 debug!("Boot using device tree, CPU topology is not (correctly) specified"); 219 } 220 221 fdt.end_node(cpus_node)?; 222 223 Ok(()) 224 } 225 226 fn create_memory_node( 227 fdt: &mut FdtWriter, 228 guest_mem: &GuestMemoryMmap, 229 numa_nodes: &NumaNodes, 230 ) -> FdtWriterResult<()> { 231 // See https://github.com/torvalds/linux/blob/58ae0b51506802713aa0e9956d1853ba4c722c98/Documentation/devicetree/bindings/numa.txt 232 // for NUMA setting in memory node. 233 if numa_nodes.len() > 1 { 234 for numa_node_idx in 0..numa_nodes.len() { 235 let numa_node = numa_nodes.get(&(numa_node_idx as u32)); 236 let mut mem_reg_prop: Vec<u64> = Vec::new(); 237 let mut node_memory_addr: u64 = 0; 238 // Each memory zone of numa will have its own memory node, but 239 // different numa nodes should not share same memory zones. 240 for memory_region in numa_node.unwrap().memory_regions.iter() { 241 let memory_region_start_addr: u64 = memory_region.start_addr().raw_value(); 242 let memory_region_size: u64 = memory_region.size() as u64; 243 mem_reg_prop.push(memory_region_start_addr); 244 mem_reg_prop.push(memory_region_size); 245 // Set the node address the first non-zero regison address 246 if node_memory_addr == 0 { 247 node_memory_addr = memory_region_start_addr; 248 } 249 } 250 let memory_node_name = format!("memory@{:x}", node_memory_addr); 251 let memory_node = fdt.begin_node(&memory_node_name)?; 252 fdt.property_string("device_type", "memory")?; 253 fdt.property_array_u64("reg", &mem_reg_prop)?; 254 fdt.property_u32("numa-node-id", numa_node_idx as u32)?; 255 fdt.end_node(memory_node)?; 256 } 257 } else { 258 let last_addr = guest_mem.last_addr().raw_value(); 259 if last_addr < super::layout::MEM_32BIT_RESERVED_START.raw_value() { 260 // Case 1: all RAM is under the hole 261 let mem_size = last_addr - super::layout::RAM_START.raw_value() + 1; 262 let mem_reg_prop = [super::layout::RAM_START.raw_value() as u64, mem_size as u64]; 263 let memory_node = fdt.begin_node("memory")?; 264 fdt.property_string("device_type", "memory")?; 265 fdt.property_array_u64("reg", &mem_reg_prop)?; 266 fdt.end_node(memory_node)?; 267 } else { 268 // Case 2: RAM is split by the hole 269 // Region 1: RAM before the hole 270 let mem_size = super::layout::MEM_32BIT_RESERVED_START.raw_value() 271 - super::layout::RAM_START.raw_value(); 272 let mem_reg_prop = [super::layout::RAM_START.raw_value() as u64, mem_size as u64]; 273 let memory_node_name = format!("memory@{:x}", super::layout::RAM_START.raw_value()); 274 let memory_node = fdt.begin_node(&memory_node_name)?; 275 fdt.property_string("device_type", "memory")?; 276 fdt.property_array_u64("reg", &mem_reg_prop)?; 277 fdt.end_node(memory_node)?; 278 279 // Region 2: RAM after the hole 280 let mem_size = last_addr - super::layout::RAM_64BIT_START.raw_value() + 1; 281 let mem_reg_prop = [ 282 super::layout::RAM_64BIT_START.raw_value() as u64, 283 mem_size as u64, 284 ]; 285 let memory_node_name = 286 format!("memory@{:x}", super::layout::RAM_64BIT_START.raw_value()); 287 let memory_node = fdt.begin_node(&memory_node_name)?; 288 fdt.property_string("device_type", "memory")?; 289 fdt.property_array_u64("reg", &mem_reg_prop)?; 290 fdt.end_node(memory_node)?; 291 } 292 } 293 294 Ok(()) 295 } 296 297 fn create_chosen_node( 298 fdt: &mut FdtWriter, 299 cmdline: &str, 300 initrd: &Option<InitramfsConfig>, 301 ) -> FdtWriterResult<()> { 302 let chosen_node = fdt.begin_node("chosen")?; 303 fdt.property_string("bootargs", cmdline)?; 304 305 if let Some(initrd_config) = initrd { 306 let initrd_start = initrd_config.address.raw_value() as u64; 307 let initrd_end = initrd_config.address.raw_value() + initrd_config.size as u64; 308 fdt.property_u64("linux,initrd-start", initrd_start)?; 309 fdt.property_u64("linux,initrd-end", initrd_end)?; 310 } 311 312 fdt.end_node(chosen_node)?; 313 314 Ok(()) 315 } 316 317 fn create_gic_node(fdt: &mut FdtWriter, gic_device: &Arc<Mutex<dyn Vgic>>) -> FdtWriterResult<()> { 318 let gic_reg_prop = gic_device.lock().unwrap().device_properties(); 319 320 let intc_node = fdt.begin_node("intc")?; 321 322 fdt.property_string("compatible", gic_device.lock().unwrap().fdt_compatibility())?; 323 fdt.property_null("interrupt-controller")?; 324 // "interrupt-cells" field specifies the number of cells needed to encode an 325 // interrupt source. The type shall be a <u32> and the value shall be 3 if no PPI affinity description 326 // is required. 327 fdt.property_u32("#interrupt-cells", 3)?; 328 fdt.property_array_u64("reg", &gic_reg_prop)?; 329 fdt.property_u32("phandle", GIC_PHANDLE)?; 330 fdt.property_u32("#address-cells", 2)?; 331 fdt.property_u32("#size-cells", 2)?; 332 fdt.property_null("ranges")?; 333 334 let gic_intr_prop = [ 335 GIC_FDT_IRQ_TYPE_PPI, 336 gic_device.lock().unwrap().fdt_maint_irq(), 337 IRQ_TYPE_LEVEL_HI, 338 ]; 339 fdt.property_array_u32("interrupts", &gic_intr_prop)?; 340 341 if gic_device.lock().unwrap().msi_compatible() { 342 let msic_node = fdt.begin_node("msic")?; 343 fdt.property_string("compatible", gic_device.lock().unwrap().msi_compatibility())?; 344 fdt.property_null("msi-controller")?; 345 fdt.property_u32("phandle", MSI_PHANDLE)?; 346 let msi_reg_prop = gic_device.lock().unwrap().msi_properties(); 347 fdt.property_array_u64("reg", &msi_reg_prop)?; 348 fdt.end_node(msic_node)?; 349 } 350 351 fdt.end_node(intc_node)?; 352 353 Ok(()) 354 } 355 356 fn create_clock_node(fdt: &mut FdtWriter) -> FdtWriterResult<()> { 357 // The Advanced Peripheral Bus (APB) is part of the Advanced Microcontroller Bus Architecture 358 // (AMBA) protocol family. It defines a low-cost interface that is optimized for minimal power 359 // consumption and reduced interface complexity. 360 // PCLK is the clock source and this node defines exactly the clock for the APB. 361 let clock_node = fdt.begin_node("apb-pclk")?; 362 fdt.property_string("compatible", "fixed-clock")?; 363 fdt.property_u32("#clock-cells", 0x0)?; 364 fdt.property_u32("clock-frequency", 24000000)?; 365 fdt.property_string("clock-output-names", "clk24mhz")?; 366 fdt.property_u32("phandle", CLOCK_PHANDLE)?; 367 fdt.end_node(clock_node)?; 368 369 Ok(()) 370 } 371 372 fn create_timer_node(fdt: &mut FdtWriter) -> FdtWriterResult<()> { 373 // See 374 // https://github.com/torvalds/linux/blob/master/Documentation/devicetree/bindings/interrupt-controller/arch_timer.txt 375 // These are fixed interrupt numbers for the timer device. 376 let irqs = [13, 14, 11, 10]; 377 let compatible = "arm,armv8-timer"; 378 379 let mut timer_reg_cells: Vec<u32> = Vec::new(); 380 for &irq in irqs.iter() { 381 timer_reg_cells.push(GIC_FDT_IRQ_TYPE_PPI); 382 timer_reg_cells.push(irq); 383 timer_reg_cells.push(IRQ_TYPE_LEVEL_HI); 384 } 385 386 let timer_node = fdt.begin_node("timer")?; 387 fdt.property_string("compatible", compatible)?; 388 fdt.property_null("always-on")?; 389 fdt.property_array_u32("interrupts", &timer_reg_cells)?; 390 fdt.end_node(timer_node)?; 391 392 Ok(()) 393 } 394 395 fn create_psci_node(fdt: &mut FdtWriter) -> FdtWriterResult<()> { 396 let compatible = "arm,psci-0.2"; 397 let psci_node = fdt.begin_node("psci")?; 398 fdt.property_string("compatible", compatible)?; 399 // Two methods available: hvc and smc. 400 // As per documentation, PSCI calls between a guest and hypervisor may use the HVC conduit instead of SMC. 401 // So, since we are using kvm, we need to use hvc. 402 fdt.property_string("method", "hvc")?; 403 fdt.end_node(psci_node)?; 404 405 Ok(()) 406 } 407 408 fn create_virtio_node<T: DeviceInfoForFdt + Clone + Debug>( 409 fdt: &mut FdtWriter, 410 dev_info: &T, 411 ) -> FdtWriterResult<()> { 412 let device_reg_prop = [dev_info.addr(), dev_info.length()]; 413 let irq = [GIC_FDT_IRQ_TYPE_SPI, dev_info.irq(), IRQ_TYPE_EDGE_RISING]; 414 415 let virtio_node = fdt.begin_node(&format!("virtio_mmio@{:x}", dev_info.addr()))?; 416 fdt.property_string("compatible", "virtio,mmio")?; 417 fdt.property_array_u64("reg", &device_reg_prop)?; 418 fdt.property_array_u32("interrupts", &irq)?; 419 fdt.property_u32("interrupt-parent", GIC_PHANDLE)?; 420 fdt.end_node(virtio_node)?; 421 422 Ok(()) 423 } 424 425 fn create_serial_node<T: DeviceInfoForFdt + Clone + Debug>( 426 fdt: &mut FdtWriter, 427 dev_info: &T, 428 ) -> FdtWriterResult<()> { 429 let compatible = b"arm,pl011\0arm,primecell\0"; 430 let serial_reg_prop = [dev_info.addr(), dev_info.length()]; 431 let irq = [ 432 GIC_FDT_IRQ_TYPE_SPI, 433 dev_info.irq() - IRQ_BASE, 434 IRQ_TYPE_EDGE_RISING, 435 ]; 436 437 let serial_node = fdt.begin_node(&format!("pl011@{:x}", dev_info.addr()))?; 438 fdt.property("compatible", compatible)?; 439 fdt.property_array_u64("reg", &serial_reg_prop)?; 440 fdt.property_u32("clocks", CLOCK_PHANDLE)?; 441 fdt.property_string("clock-names", "apb_pclk")?; 442 fdt.property_array_u32("interrupts", &irq)?; 443 fdt.end_node(serial_node)?; 444 445 Ok(()) 446 } 447 448 fn create_rtc_node<T: DeviceInfoForFdt + Clone + Debug>( 449 fdt: &mut FdtWriter, 450 dev_info: &T, 451 ) -> FdtWriterResult<()> { 452 let compatible = b"arm,pl031\0arm,primecell\0"; 453 let rtc_reg_prop = [dev_info.addr(), dev_info.length()]; 454 let irq = [ 455 GIC_FDT_IRQ_TYPE_SPI, 456 dev_info.irq() - IRQ_BASE, 457 IRQ_TYPE_LEVEL_HI, 458 ]; 459 460 let rtc_node = fdt.begin_node(&format!("rtc@{:x}", dev_info.addr()))?; 461 fdt.property("compatible", compatible)?; 462 fdt.property_array_u64("reg", &rtc_reg_prop)?; 463 fdt.property_array_u32("interrupts", &irq)?; 464 fdt.property_u32("clocks", CLOCK_PHANDLE)?; 465 fdt.property_string("clock-names", "apb_pclk")?; 466 fdt.end_node(rtc_node)?; 467 468 Ok(()) 469 } 470 471 fn create_gpio_node<T: DeviceInfoForFdt + Clone + Debug>( 472 fdt: &mut FdtWriter, 473 dev_info: &T, 474 ) -> FdtWriterResult<()> { 475 // PL061 GPIO controller node 476 let compatible = b"arm,pl061\0arm,primecell\0"; 477 let gpio_reg_prop = [dev_info.addr(), dev_info.length()]; 478 let irq = [ 479 GIC_FDT_IRQ_TYPE_SPI, 480 dev_info.irq() - IRQ_BASE, 481 IRQ_TYPE_EDGE_RISING, 482 ]; 483 484 let gpio_node = fdt.begin_node(&format!("pl061@{:x}", dev_info.addr()))?; 485 fdt.property("compatible", compatible)?; 486 fdt.property_array_u64("reg", &gpio_reg_prop)?; 487 fdt.property_array_u32("interrupts", &irq)?; 488 fdt.property_null("gpio-controller")?; 489 fdt.property_u32("#gpio-cells", 2)?; 490 fdt.property_u32("clocks", CLOCK_PHANDLE)?; 491 fdt.property_string("clock-names", "apb_pclk")?; 492 fdt.property_u32("phandle", GPIO_PHANDLE)?; 493 fdt.end_node(gpio_node)?; 494 495 // gpio-keys node 496 let gpio_keys_node = fdt.begin_node("gpio-keys")?; 497 fdt.property_string("compatible", "gpio-keys")?; 498 fdt.property_u32("#size-cells", 0)?; 499 fdt.property_u32("#address-cells", 1)?; 500 let gpio_keys_poweroff_node = fdt.begin_node("button@1")?; 501 fdt.property_string("label", "GPIO Key Poweroff")?; 502 fdt.property_u32("linux,code", KEY_POWER)?; 503 let gpios = [GPIO_PHANDLE, 3, 0]; 504 fdt.property_array_u32("gpios", &gpios)?; 505 fdt.end_node(gpio_keys_poweroff_node)?; 506 fdt.end_node(gpio_keys_node)?; 507 508 Ok(()) 509 } 510 511 fn create_devices_node<T: DeviceInfoForFdt + Clone + Debug, S: ::std::hash::BuildHasher>( 512 fdt: &mut FdtWriter, 513 dev_info: &HashMap<(DeviceType, String), T, S>, 514 ) -> FdtWriterResult<()> { 515 // Create one temp Vec to store all virtio devices 516 let mut ordered_virtio_device: Vec<&T> = Vec::new(); 517 518 for ((device_type, _device_id), info) in dev_info { 519 match device_type { 520 DeviceType::Gpio => create_gpio_node(fdt, info)?, 521 DeviceType::Rtc => create_rtc_node(fdt, info)?, 522 DeviceType::Serial => create_serial_node(fdt, info)?, 523 DeviceType::Virtio(_) => { 524 ordered_virtio_device.push(info); 525 } 526 } 527 } 528 529 // Sort out virtio devices by address from low to high and insert them into fdt table. 530 ordered_virtio_device.sort_by_key(|&a| a.addr()); 531 // Current address allocation strategy in cloud-hypervisor is: the first created device 532 // will be allocated to higher address. Here we reverse the vector to make sure that 533 // the older created device will appear in front of the newer created device in FDT. 534 ordered_virtio_device.reverse(); 535 for ordered_device_info in ordered_virtio_device.drain(..) { 536 create_virtio_node(fdt, ordered_device_info)?; 537 } 538 539 Ok(()) 540 } 541 542 fn create_pmu_node(fdt: &mut FdtWriter) -> FdtWriterResult<()> { 543 let compatible = "arm,armv8-pmuv3"; 544 let irq = [GIC_FDT_IRQ_TYPE_PPI, AARCH64_PMU_IRQ, IRQ_TYPE_LEVEL_HI]; 545 546 let pmu_node = fdt.begin_node("pmu")?; 547 fdt.property_string("compatible", compatible)?; 548 fdt.property_array_u32("interrupts", &irq)?; 549 fdt.end_node(pmu_node)?; 550 Ok(()) 551 } 552 553 fn create_pci_nodes( 554 fdt: &mut FdtWriter, 555 pci_device_info: &[PciSpaceInfo], 556 virtio_iommu_bdf: Option<u32>, 557 ) -> FdtWriterResult<()> { 558 // Add node for PCIe controller. 559 // See Documentation/devicetree/bindings/pci/host-generic-pci.txt in the kernel 560 // and https://elinux.org/Device_Tree_Usage. 561 // In multiple PCI segments setup, each PCI segment needs a PCI node. 562 for pci_device_info_elem in pci_device_info.iter() { 563 // EDK2 requires the PCIe high space above 4G address. 564 // The actual space in CLH follows the RAM. If the RAM space is small, the PCIe high space 565 // could fall bellow 4G. 566 // Here we cut off PCI device space below 8G in FDT to workaround the EDK2 check. 567 // But the address written in ACPI is not impacted. 568 let (pci_device_base_64bit, pci_device_size_64bit) = 569 if pci_device_info_elem.pci_device_space_start < PCI_HIGH_BASE.raw_value() { 570 ( 571 PCI_HIGH_BASE.raw_value(), 572 pci_device_info_elem.pci_device_space_size 573 - (PCI_HIGH_BASE.raw_value() - pci_device_info_elem.pci_device_space_start), 574 ) 575 } else { 576 ( 577 pci_device_info_elem.pci_device_space_start, 578 pci_device_info_elem.pci_device_space_size, 579 ) 580 }; 581 // There is no specific requirement of the 32bit MMIO range, and 582 // therefore at least we can make these ranges 4K aligned. 583 let pci_device_size_32bit: u64 = 584 MEM_32BIT_DEVICES_SIZE / ((1 << 12) * pci_device_info.len() as u64) * (1 << 12); 585 let pci_device_base_32bit: u64 = MEM_32BIT_DEVICES_START.0 586 + pci_device_size_32bit * pci_device_info_elem.pci_segment_id as u64; 587 588 let ranges = [ 589 // io addresses. Since AArch64 will not use IO address, 590 // we can set the same IO address range for every segment. 591 0x1000000, 592 0_u32, 593 0_u32, 594 (MEM_PCI_IO_START.0 >> 32) as u32, 595 MEM_PCI_IO_START.0 as u32, 596 (MEM_PCI_IO_SIZE >> 32) as u32, 597 MEM_PCI_IO_SIZE as u32, 598 // mmio addresses 599 0x2000000, // (ss = 10: 32-bit memory space) 600 (pci_device_base_32bit >> 32) as u32, // PCI address 601 pci_device_base_32bit as u32, 602 (pci_device_base_32bit >> 32) as u32, // CPU address 603 pci_device_base_32bit as u32, 604 (pci_device_size_32bit >> 32) as u32, // size 605 pci_device_size_32bit as u32, 606 // device addresses 607 0x3000000, // (ss = 11: 64-bit memory space) 608 (pci_device_base_64bit >> 32) as u32, // PCI address 609 pci_device_base_64bit as u32, 610 (pci_device_base_64bit >> 32) as u32, // CPU address 611 pci_device_base_64bit as u32, 612 (pci_device_size_64bit >> 32) as u32, // size 613 pci_device_size_64bit as u32, 614 ]; 615 let bus_range = [0, 0]; // Only bus 0 616 let reg = [ 617 pci_device_info_elem.mmio_config_address, 618 PCI_MMIO_CONFIG_SIZE_PER_SEGMENT, 619 ]; 620 // See kernel document Documentation/devicetree/bindings/pci/pci-msi.txt 621 let msi_map = [ 622 // rid-base: A single cell describing the first RID matched by the entry. 623 0x0, 624 // msi-controller: A single phandle to an MSI controller. 625 MSI_PHANDLE, 626 // msi-base: An msi-specifier describing the msi-specifier produced for the 627 // first RID matched by the entry. 628 (pci_device_info_elem.pci_segment_id as u32) << 8, 629 // length: A single cell describing how many consecutive RIDs are matched 630 // following the rid-base. 631 0x100, 632 ]; 633 634 let pci_node_name = format!("pci@{:x}", pci_device_info_elem.mmio_config_address); 635 let pci_node = fdt.begin_node(&pci_node_name)?; 636 637 fdt.property_string("compatible", "pci-host-ecam-generic")?; 638 fdt.property_string("device_type", "pci")?; 639 fdt.property_array_u32("ranges", &ranges)?; 640 fdt.property_array_u32("bus-range", &bus_range)?; 641 fdt.property_u32( 642 "linux,pci-domain", 643 pci_device_info_elem.pci_segment_id as u32, 644 )?; 645 fdt.property_u32("#address-cells", 3)?; 646 fdt.property_u32("#size-cells", 2)?; 647 fdt.property_array_u64("reg", ®)?; 648 fdt.property_u32("#interrupt-cells", 1)?; 649 fdt.property_null("interrupt-map")?; 650 fdt.property_null("interrupt-map-mask")?; 651 fdt.property_null("dma-coherent")?; 652 fdt.property_array_u32("msi-map", &msi_map)?; 653 fdt.property_u32("msi-parent", MSI_PHANDLE)?; 654 655 if pci_device_info_elem.pci_segment_id == 0 { 656 if let Some(virtio_iommu_bdf) = virtio_iommu_bdf { 657 // See kernel document Documentation/devicetree/bindings/pci/pci-iommu.txt 658 // for 'iommu-map' attribute setting. 659 let iommu_map = [ 660 0_u32, 661 VIRTIO_IOMMU_PHANDLE, 662 0_u32, 663 virtio_iommu_bdf, 664 virtio_iommu_bdf + 1, 665 VIRTIO_IOMMU_PHANDLE, 666 virtio_iommu_bdf + 1, 667 0xffff - virtio_iommu_bdf, 668 ]; 669 fdt.property_array_u32("iommu-map", &iommu_map)?; 670 671 // See kernel document Documentation/devicetree/bindings/virtio/iommu.txt 672 // for virtio-iommu node settings. 673 let virtio_iommu_node_name = format!("virtio_iommu@{:x}", virtio_iommu_bdf); 674 let virtio_iommu_node = fdt.begin_node(&virtio_iommu_node_name)?; 675 fdt.property_u32("#iommu-cells", 1)?; 676 fdt.property_string("compatible", "virtio,pci-iommu")?; 677 678 // 'reg' is a five-cell address encoded as 679 // (phys.hi phys.mid phys.lo size.hi size.lo). phys.hi should contain the 680 // device's BDF as 0b00000000 bbbbbbbb dddddfff 00000000. The other cells 681 // should be zero. 682 let reg = [virtio_iommu_bdf << 8, 0_u32, 0_u32, 0_u32, 0_u32]; 683 fdt.property_array_u32("reg", ®)?; 684 fdt.property_u32("phandle", VIRTIO_IOMMU_PHANDLE)?; 685 686 fdt.end_node(virtio_iommu_node)?; 687 } 688 } 689 690 fdt.end_node(pci_node)?; 691 } 692 693 Ok(()) 694 } 695 696 fn create_distance_map_node(fdt: &mut FdtWriter, numa_nodes: &NumaNodes) -> FdtWriterResult<()> { 697 let distance_map_node = fdt.begin_node("distance-map")?; 698 fdt.property_string("compatible", "numa-distance-map-v1")?; 699 // Construct the distance matrix. 700 // 1. We use the word entry to describe a distance from a node to 701 // its destination, e.g. 0 -> 1 = 20 is described as <0 1 20>. 702 // 2. Each entry represents distance from first node to second node. 703 // The distances are equal in either direction. 704 // 3. The distance from a node to self (local distance) is represented 705 // with value 10 and all internode distance should be represented with 706 // a value greater than 10. 707 // 4. distance-matrix should have entries in lexicographical ascending 708 // order of nodes. 709 let mut distance_matrix = Vec::new(); 710 for numa_node_idx in 0..numa_nodes.len() { 711 let numa_node = numa_nodes.get(&(numa_node_idx as u32)); 712 for dest_numa_node in 0..numa_node.unwrap().distances.len() + 1 { 713 if numa_node_idx == dest_numa_node { 714 distance_matrix.push(numa_node_idx as u32); 715 distance_matrix.push(dest_numa_node as u32); 716 distance_matrix.push(10_u32); 717 continue; 718 } 719 720 distance_matrix.push(numa_node_idx as u32); 721 distance_matrix.push(dest_numa_node as u32); 722 distance_matrix.push( 723 *numa_node 724 .unwrap() 725 .distances 726 .get(&(dest_numa_node as u32)) 727 .unwrap() as u32, 728 ); 729 } 730 } 731 fdt.property_array_u32("distance-matrix", distance_matrix.as_ref())?; 732 fdt.end_node(distance_map_node)?; 733 734 Ok(()) 735 } 736 737 // Parse the DTB binary and print for debugging 738 pub fn print_fdt(dtb: &[u8]) { 739 match fdt_parser::Fdt::new(dtb) { 740 Ok(fdt) => { 741 if let Some(root) = fdt.find_node("/") { 742 debug!("Printing the FDT:"); 743 print_node(root, 0); 744 } else { 745 debug!("Failed to find root node in FDT for debugging."); 746 } 747 } 748 Err(_) => debug!("Failed to parse FDT for debugging."), 749 } 750 } 751 752 fn print_node(node: fdt_parser::node::FdtNode<'_, '_>, n_spaces: usize) { 753 debug!("{:indent$}{}/", "", node.name, indent = n_spaces); 754 for property in node.properties() { 755 let name = property.name; 756 757 // If the property is 'compatible', its value requires special handling. 758 // The u8 array could contain multiple null-terminated strings. 759 // We copy the original array and simply replace all 'null' characters with spaces. 760 let value = if name == "compatible" { 761 let mut compatible = vec![0u8; 256]; 762 let handled_value = property 763 .value 764 .iter() 765 .map(|&c| if c == 0 { b' ' } else { c }) 766 .collect::<Vec<_>>(); 767 let len = cmp::min(255, handled_value.len()); 768 compatible[..len].copy_from_slice(&handled_value[..len]); 769 compatible[..(len + 1)].to_vec() 770 } else { 771 property.value.to_vec() 772 }; 773 let value = &value; 774 775 // Now the value can be either: 776 // - A null-terminated C string, or 777 // - Binary data 778 // We follow a very simple logic to present the value: 779 // - At first, try to convert it to CStr and print, 780 // - If failed, print it as u32 array. 781 let value_result = match CStr::from_bytes_with_nul(value) { 782 Ok(value_cstr) => match value_cstr.to_str() { 783 Ok(value_str) => Some(value_str), 784 Err(_e) => None, 785 }, 786 Err(_e) => None, 787 }; 788 789 if let Some(value_str) = value_result { 790 debug!( 791 "{:indent$}{} : {:#?}", 792 "", 793 name, 794 value_str, 795 indent = (n_spaces + 2) 796 ); 797 } else { 798 let mut array = Vec::with_capacity(256); 799 array.resize(value.len() / 4, 0u32); 800 BigEndian::read_u32_into(value, &mut array); 801 debug!( 802 "{:indent$}{} : {:X?}", 803 "", 804 name, 805 array, 806 indent = (n_spaces + 2) 807 ); 808 }; 809 } 810 811 // Print children nodes if there is any 812 for child in node.children() { 813 print_node(child, n_spaces + 2); 814 } 815 } 816