1 // Copyright © 2024 Institute of Software, CAS. All rights reserved. 2 // Copyright 2020 Arm Limited (or its affiliates). All rights reserved. 3 // Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 // SPDX-License-Identifier: Apache-2.0 5 // 6 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. 7 // Use of this source code is governed by a BSD-style license that can be 8 // found in the THIRD-PARTY file. 9 10 use std::collections::HashMap; 11 use std::ffi::CStr; 12 use std::fmt::Debug; 13 use std::sync::{Arc, Mutex}; 14 use std::{cmp, result, str}; 15 16 use byteorder::{BigEndian, ByteOrder}; 17 use hypervisor::arch::riscv64::aia::Vaia; 18 use thiserror::Error; 19 use vm_fdt::{FdtWriter, FdtWriterResult}; 20 use vm_memory::{Address, Bytes, GuestMemory, GuestMemoryError, GuestMemoryRegion}; 21 22 use super::super::{DeviceType, GuestMemoryMmap, InitramfsConfig}; 23 use super::layout::{ 24 IRQ_BASE, MEM_32BIT_DEVICES_SIZE, MEM_32BIT_DEVICES_START, MEM_PCI_IO_SIZE, MEM_PCI_IO_START, 25 PCI_HIGH_BASE, PCI_MMIO_CONFIG_SIZE_PER_SEGMENT, 26 }; 27 use crate::PciSpaceInfo; 28 29 const AIA_APLIC_PHANDLE: u32 = 1; 30 const AIA_IMSIC_PHANDLE: u32 = 2; 31 const CPU_INTC_BASE_PHANDLE: u32 = 3; 32 const CPU_BASE_PHANDLE: u32 = 256 + CPU_INTC_BASE_PHANDLE; 33 // Read the documentation specified when appending the root node to the FDT. 34 const ADDRESS_CELLS: u32 = 0x2; 35 const SIZE_CELLS: u32 = 0x2; 36 37 // From https://elixir.bootlin.com/linux/v6.10/source/include/dt-bindings/interrupt-controller/irq.h#L14 38 const _IRQ_TYPE_EDGE_RISING: u32 = 1; 39 const IRQ_TYPE_LEVEL_HI: u32 = 4; 40 41 const S_MODE_EXT_IRQ: u32 = 9; 42 43 /// Trait for devices to be added to the Flattened Device Tree. 44 pub trait DeviceInfoForFdt { 45 /// Returns the address where this device will be loaded. 46 fn addr(&self) -> u64; 47 /// Returns the associated interrupt for this device. 48 fn irq(&self) -> u32; 49 /// Returns the amount of memory that needs to be reserved for this device. 50 fn length(&self) -> u64; 51 } 52 53 /// Errors thrown while configuring the Flattened Device Tree for riscv64. 54 #[derive(Debug, Error)] 55 pub enum Error { 56 /// Failure in writing FDT in memory. 57 #[error("Failure in writing FDT in memory: {0}")] 58 WriteFdtToMemory(#[source] GuestMemoryError), 59 } 60 type Result<T> = result::Result<T, Error>; 61 62 /// Creates the flattened device tree for this riscv64 VM. 63 #[allow(clippy::too_many_arguments)] 64 pub fn create_fdt<T: DeviceInfoForFdt + Clone + Debug, S: ::std::hash::BuildHasher>( 65 guest_mem: &GuestMemoryMmap, 66 cmdline: &str, 67 num_vcpu: u32, 68 device_info: &HashMap<(DeviceType, String), T, S>, 69 aia_device: &Arc<Mutex<dyn Vaia>>, 70 initrd: &Option<InitramfsConfig>, 71 pci_space_info: &[PciSpaceInfo], 72 ) -> FdtWriterResult<Vec<u8>> { 73 // Allocate stuff necessary for the holding the blob. 74 let mut fdt = FdtWriter::new()?; 75 76 // For an explanation why these nodes were introduced in the blob take a look at 77 // https://github.com/devicetree-org/devicetree-specification/releases/tag/v0.4 78 // In chapter 3. 79 80 // Header or the root node as per above mentioned documentation. 81 let root_node = fdt.begin_node("")?; 82 fdt.property_string("compatible", "linux,dummy-virt")?; 83 // For info on #address-cells and size-cells resort to Table 3.1 Root Node 84 // Properties 85 fdt.property_u32("#address-cells", ADDRESS_CELLS)?; 86 fdt.property_u32("#size-cells", SIZE_CELLS)?; 87 create_cpu_nodes(&mut fdt, num_vcpu)?; 88 create_memory_node(&mut fdt, guest_mem)?; 89 create_chosen_node(&mut fdt, cmdline, initrd)?; 90 create_aia_node(&mut fdt, aia_device)?; 91 create_devices_node(&mut fdt, device_info)?; 92 create_pci_nodes(&mut fdt, pci_space_info)?; 93 94 // End Header node. 95 fdt.end_node(root_node)?; 96 97 let fdt_final = fdt.finish()?; 98 99 Ok(fdt_final) 100 } 101 102 pub fn write_fdt_to_memory(fdt_final: Vec<u8>, guest_mem: &GuestMemoryMmap) -> Result<()> { 103 // Write FDT to memory. 104 guest_mem 105 .write_slice(fdt_final.as_slice(), super::layout::FDT_START) 106 .map_err(Error::WriteFdtToMemory)?; 107 Ok(()) 108 } 109 110 // Following are the auxiliary function for creating the different nodes that we append to our FDT. 111 fn create_cpu_nodes(fdt: &mut FdtWriter, num_cpus: u32) -> FdtWriterResult<()> { 112 // See https://elixir.bootlin.com/linux/v6.10/source/Documentation/devicetree/bindings/riscv/cpus.yaml 113 let cpus = fdt.begin_node("cpus")?; 114 // As per documentation, on RISC-V 64-bit systems value should be set to 1. 115 fdt.property_u32("#address-cells", 0x01)?; 116 fdt.property_u32("#size-cells", 0x0)?; 117 // TODO: Retrieve CPU frequency from cpu timer regs 118 let timebase_frequency: u32 = 0x989680; 119 fdt.property_u32("timebase-frequency", timebase_frequency)?; 120 121 for cpu_index in 0..num_cpus { 122 let cpu = fdt.begin_node(&format!("cpu@{:x}", cpu_index))?; 123 fdt.property_string("device_type", "cpu")?; 124 fdt.property_string("compatible", "riscv")?; 125 fdt.property_string("mmu-type", "sv48")?; 126 fdt.property_string("riscv,isa", "rv64imafdc_smaia_ssaia")?; 127 fdt.property_string("status", "okay")?; 128 fdt.property_u32("reg", cpu_index)?; 129 fdt.property_u32("phandle", CPU_BASE_PHANDLE + cpu_index)?; 130 131 // interrupt controller node 132 let intc_node = fdt.begin_node("interrupt-controller")?; 133 fdt.property_string("compatible", "riscv,cpu-intc")?; 134 fdt.property_u32("#interrupt-cells", 1u32)?; 135 fdt.property_null("interrupt-controller")?; 136 fdt.property_u32("phandle", CPU_INTC_BASE_PHANDLE + cpu_index)?; 137 fdt.end_node(intc_node)?; 138 139 fdt.end_node(cpu)?; 140 } 141 142 fdt.end_node(cpus)?; 143 144 Ok(()) 145 } 146 147 fn create_memory_node(fdt: &mut FdtWriter, guest_mem: &GuestMemoryMmap) -> FdtWriterResult<()> { 148 // Note: memory regions from "GuestMemory" are sorted and non-zero sized. 149 let ram_regions = { 150 let mut ram_regions = Vec::new(); 151 let mut current_start = guest_mem 152 .iter() 153 .next() 154 .map(GuestMemoryRegion::start_addr) 155 .expect("GuestMemory must have one memory region at least") 156 .raw_value(); 157 let mut current_end = current_start; 158 159 for (start, size) in guest_mem 160 .iter() 161 .map(|m| (m.start_addr().raw_value(), m.len())) 162 { 163 if current_end == start { 164 // This zone is continuous with the previous one. 165 current_end += size; 166 } else { 167 ram_regions.push((current_start, current_end)); 168 169 current_start = start; 170 current_end = start + size; 171 } 172 } 173 174 ram_regions.push((current_start, current_end)); 175 176 ram_regions 177 }; 178 179 let mut mem_reg_property = Vec::new(); 180 for region in ram_regions { 181 let mem_size = region.1 - region.0; 182 mem_reg_property.push(region.0); 183 mem_reg_property.push(mem_size); 184 } 185 186 let ram_start = super::layout::RAM_START.raw_value(); 187 let memory_node_name = format!("memory@{:x}", ram_start); 188 let memory_node = fdt.begin_node(&memory_node_name)?; 189 fdt.property_string("device_type", "memory")?; 190 fdt.property_array_u64("reg", &mem_reg_property)?; 191 fdt.end_node(memory_node)?; 192 193 Ok(()) 194 } 195 196 fn create_chosen_node( 197 fdt: &mut FdtWriter, 198 cmdline: &str, 199 initrd: &Option<InitramfsConfig>, 200 ) -> FdtWriterResult<()> { 201 let chosen_node = fdt.begin_node("chosen")?; 202 fdt.property_string("bootargs", cmdline)?; 203 204 if let Some(initrd_config) = initrd { 205 let initrd_start = initrd_config.address.raw_value(); 206 let initrd_end = initrd_config.address.raw_value() + initrd_config.size as u64; 207 fdt.property_u64("linux,initrd-start", initrd_start)?; 208 fdt.property_u64("linux,initrd-end", initrd_end)?; 209 } 210 211 fdt.end_node(chosen_node)?; 212 213 Ok(()) 214 } 215 216 fn create_aia_node(fdt: &mut FdtWriter, aia_device: &Arc<Mutex<dyn Vaia>>) -> FdtWriterResult<()> { 217 // IMSIC 218 if aia_device.lock().unwrap().msi_compatible() { 219 use super::layout::IMSIC_START; 220 let imsic_name = format!("imsics@{:x}", IMSIC_START.0); 221 let imsic_node = fdt.begin_node(&imsic_name)?; 222 223 fdt.property_string( 224 "compatible", 225 aia_device.lock().unwrap().imsic_compatibility(), 226 )?; 227 let imsic_reg_prop = aia_device.lock().unwrap().imsic_properties(); 228 fdt.property_array_u32("reg", &imsic_reg_prop)?; 229 fdt.property_u32("#interrupt-cells", 0u32)?; 230 fdt.property_null("interrupt-controller")?; 231 fdt.property_null("msi-controller")?; 232 // TODO complete num-ids 233 fdt.property_u32("riscv,num-ids", 2047u32)?; 234 fdt.property_u32("phandle", AIA_IMSIC_PHANDLE)?; 235 236 let mut irq_cells = Vec::new(); 237 let num_cpus = aia_device.lock().unwrap().vcpu_count(); 238 for i in 0..num_cpus { 239 irq_cells.push(CPU_INTC_BASE_PHANDLE + i); 240 irq_cells.push(S_MODE_EXT_IRQ); 241 } 242 fdt.property_array_u32("interrupts-extended", &irq_cells)?; 243 244 fdt.end_node(imsic_node)?; 245 } 246 247 // APLIC 248 use super::layout::APLIC_START; 249 let aplic_name = format!("aplic@{:x}", APLIC_START.0); 250 let aplic_node = fdt.begin_node(&aplic_name)?; 251 252 fdt.property_string( 253 "compatible", 254 aia_device.lock().unwrap().aplic_compatibility(), 255 )?; 256 let reg_cells = aia_device.lock().unwrap().aplic_properties(); 257 fdt.property_array_u32("reg", ®_cells)?; 258 fdt.property_u32("#interrupt-cells", 2u32)?; 259 fdt.property_null("interrupt-controller")?; 260 // TODO complete num-srcs 261 fdt.property_u32("riscv,num-sources", 96u32)?; 262 fdt.property_u32("phandle", AIA_APLIC_PHANDLE)?; 263 fdt.property_u32("msi-parent", AIA_IMSIC_PHANDLE)?; 264 265 fdt.end_node(aplic_node)?; 266 267 Ok(()) 268 } 269 270 fn create_serial_node<T: DeviceInfoForFdt + Clone + Debug>( 271 fdt: &mut FdtWriter, 272 dev_info: &T, 273 ) -> FdtWriterResult<()> { 274 let serial_reg_prop = [dev_info.addr(), dev_info.length()]; 275 let irq = [dev_info.irq() - IRQ_BASE, IRQ_TYPE_LEVEL_HI]; 276 277 let serial_node = fdt.begin_node(&format!("serial@{:x}", dev_info.addr()))?; 278 fdt.property_string("compatible", "ns16550a")?; 279 fdt.property_array_u64("reg", &serial_reg_prop)?; 280 fdt.property_u32("clock-frequency", 3686400)?; 281 fdt.property_u32("interrupt-parent", AIA_APLIC_PHANDLE)?; 282 fdt.property_array_u32("interrupts", &irq)?; 283 fdt.end_node(serial_node)?; 284 285 Ok(()) 286 } 287 288 fn create_devices_node<T: DeviceInfoForFdt + Clone + Debug, S: ::std::hash::BuildHasher>( 289 fdt: &mut FdtWriter, 290 dev_info: &HashMap<(DeviceType, String), T, S>, 291 ) -> FdtWriterResult<()> { 292 for ((device_type, _device_id), info) in dev_info { 293 match device_type { 294 DeviceType::Serial => create_serial_node(fdt, info)?, 295 DeviceType::Virtio(_) => unreachable!(), 296 } 297 } 298 299 Ok(()) 300 } 301 302 fn create_pci_nodes(fdt: &mut FdtWriter, pci_device_info: &[PciSpaceInfo]) -> FdtWriterResult<()> { 303 // Add node for PCIe controller. 304 // See Documentation/devicetree/bindings/pci/host-generic-pci.txt in the kernel 305 // and https://elinux.org/Device_Tree_Usage. 306 // In multiple PCI segments setup, each PCI segment needs a PCI node. 307 for pci_device_info_elem in pci_device_info.iter() { 308 // EDK2 requires the PCIe high space above 4G address. 309 // The actual space in CLH follows the RAM. If the RAM space is small, the PCIe high space 310 // could fall below 4G. 311 // Here we cut off PCI device space below 8G in FDT to workaround the EDK2 check. 312 // But the address written in ACPI is not impacted. 313 let (pci_device_base_64bit, pci_device_size_64bit) = 314 if pci_device_info_elem.pci_device_space_start < PCI_HIGH_BASE.raw_value() { 315 ( 316 PCI_HIGH_BASE.raw_value(), 317 pci_device_info_elem.pci_device_space_size 318 - (PCI_HIGH_BASE.raw_value() - pci_device_info_elem.pci_device_space_start), 319 ) 320 } else { 321 ( 322 pci_device_info_elem.pci_device_space_start, 323 pci_device_info_elem.pci_device_space_size, 324 ) 325 }; 326 // There is no specific requirement of the 32bit MMIO range, and 327 // therefore at least we can make these ranges 4K aligned. 328 let pci_device_size_32bit: u64 = 329 MEM_32BIT_DEVICES_SIZE / ((1 << 12) * pci_device_info.len() as u64) * (1 << 12); 330 let pci_device_base_32bit: u64 = MEM_32BIT_DEVICES_START.0 331 + pci_device_size_32bit * pci_device_info_elem.pci_segment_id as u64; 332 333 let ranges = [ 334 // io addresses. Since AArch64 will not use IO address, 335 // we can set the same IO address range for every segment. 336 0x1000000, 337 0_u32, 338 0_u32, 339 (MEM_PCI_IO_START.0 >> 32) as u32, 340 MEM_PCI_IO_START.0 as u32, 341 (MEM_PCI_IO_SIZE >> 32) as u32, 342 MEM_PCI_IO_SIZE as u32, 343 // mmio addresses 344 0x2000000, // (ss = 10: 32-bit memory space) 345 (pci_device_base_32bit >> 32) as u32, // PCI address 346 pci_device_base_32bit as u32, 347 (pci_device_base_32bit >> 32) as u32, // CPU address 348 pci_device_base_32bit as u32, 349 (pci_device_size_32bit >> 32) as u32, // size 350 pci_device_size_32bit as u32, 351 // device addresses 352 0x3000000, // (ss = 11: 64-bit memory space) 353 (pci_device_base_64bit >> 32) as u32, // PCI address 354 pci_device_base_64bit as u32, 355 (pci_device_base_64bit >> 32) as u32, // CPU address 356 pci_device_base_64bit as u32, 357 (pci_device_size_64bit >> 32) as u32, // size 358 pci_device_size_64bit as u32, 359 ]; 360 let bus_range = [0, 0]; // Only bus 0 361 let reg = [ 362 pci_device_info_elem.mmio_config_address, 363 PCI_MMIO_CONFIG_SIZE_PER_SEGMENT, 364 ]; 365 // See kernel document Documentation/devicetree/bindings/pci/pci-msi.txt 366 let msi_map = [ 367 // rid-base: A single cell describing the first RID matched by the entry. 368 0x0, 369 // msi-controller: A single phandle to an MSI controller. 370 AIA_IMSIC_PHANDLE, 371 // msi-base: An msi-specifier describing the msi-specifier produced for the 372 // first RID matched by the entry. 373 (pci_device_info_elem.pci_segment_id as u32) << 8, 374 // length: A single cell describing how many consecutive RIDs are matched 375 // following the rid-base. 376 0x100, 377 ]; 378 379 let pci_node_name = format!("pci@{:x}", pci_device_info_elem.mmio_config_address); 380 let pci_node = fdt.begin_node(&pci_node_name)?; 381 382 fdt.property_string("compatible", "pci-host-ecam-generic")?; 383 fdt.property_string("device_type", "pci")?; 384 fdt.property_array_u32("ranges", &ranges)?; 385 fdt.property_array_u32("bus-range", &bus_range)?; 386 fdt.property_u32( 387 "linux,pci-domain", 388 pci_device_info_elem.pci_segment_id as u32, 389 )?; 390 fdt.property_u32("#address-cells", 3)?; 391 fdt.property_u32("#size-cells", 2)?; 392 fdt.property_array_u64("reg", ®)?; 393 fdt.property_u32("#interrupt-cells", 1)?; 394 fdt.property_null("interrupt-map")?; 395 fdt.property_null("interrupt-map-mask")?; 396 fdt.property_null("dma-coherent")?; 397 fdt.property_array_u32("msi-map", &msi_map)?; 398 fdt.property_u32("msi-parent", AIA_IMSIC_PHANDLE)?; 399 400 fdt.end_node(pci_node)?; 401 } 402 403 Ok(()) 404 } 405 406 // Parse the DTB binary and print for debugging 407 pub fn print_fdt(dtb: &[u8]) { 408 match fdt_parser::Fdt::new(dtb) { 409 Ok(fdt) => { 410 if let Some(root) = fdt.find_node("/") { 411 debug!("Printing the FDT:"); 412 print_node(root, 0); 413 } else { 414 debug!("Failed to find root node in FDT for debugging."); 415 } 416 } 417 Err(_) => debug!("Failed to parse FDT for debugging."), 418 } 419 } 420 421 fn print_node(node: fdt_parser::node::FdtNode<'_, '_>, n_spaces: usize) { 422 debug!("{:indent$}{}/", "", node.name, indent = n_spaces); 423 for property in node.properties() { 424 let name = property.name; 425 426 // If the property is 'compatible', its value requires special handling. 427 // The u8 array could contain multiple null-terminated strings. 428 // We copy the original array and simply replace all 'null' characters with spaces. 429 let value = if name == "compatible" { 430 let mut compatible = vec![0u8; 256]; 431 let handled_value = property 432 .value 433 .iter() 434 .map(|&c| if c == 0 { b' ' } else { c }) 435 .collect::<Vec<_>>(); 436 let len = cmp::min(255, handled_value.len()); 437 compatible[..len].copy_from_slice(&handled_value[..len]); 438 compatible[..(len + 1)].to_vec() 439 } else { 440 property.value.to_vec() 441 }; 442 let value = &value; 443 444 // Now the value can be either: 445 // - A null-terminated C string, or 446 // - Binary data 447 // We follow a very simple logic to present the value: 448 // - At first, try to convert it to CStr and print, 449 // - If failed, print it as u32 array. 450 let value_result = match CStr::from_bytes_with_nul(value) { 451 Ok(value_cstr) => match value_cstr.to_str() { 452 Ok(value_str) => Some(value_str), 453 Err(_e) => None, 454 }, 455 Err(_e) => None, 456 }; 457 458 if let Some(value_str) = value_result { 459 debug!( 460 "{:indent$}{} : {:#?}", 461 "", 462 name, 463 value_str, 464 indent = (n_spaces + 2) 465 ); 466 } else { 467 let mut array = Vec::with_capacity(256); 468 array.resize(value.len() / 4, 0u32); 469 BigEndian::read_u32_into(value, &mut array); 470 debug!( 471 "{:indent$}{} : {:X?}", 472 "", 473 name, 474 array, 475 indent = (n_spaces + 2) 476 ); 477 }; 478 } 479 480 // Print children nodes if there is any 481 for child in node.children() { 482 print_node(child, n_spaces + 2); 483 } 484 } 485