1/* 2 * Minimal AArch64 system boot code. 3 * 4 * Copyright Linaro Ltd 2019 5 * 6 * Loosely based on the newlib/libgloss setup stubs. Using semihosting 7 * for serial output and exit functions. 8 */ 9 10/* 11 * Semihosting interface on ARM AArch64 12 * See "Semihosting for AArch32 and AArch64 Release 2.0" by ARM 13 * w0 - semihosting call number 14 * x1 - semihosting parameter 15 */ 16#define semihosting_call hlt 0xf000 17#define SYS_WRITEC 0x03 /* character to debug channel */ 18#define SYS_WRITE0 0x04 /* string to debug channel */ 19#define SYS_GET_CMDLINE 0x15 /* get command line */ 20#define SYS_EXIT 0x18 21 22 .align 12 23 24 .macro ventry label 25 .align 7 26 b \label 27 .endm 28 29vector_table: 30 /* Current EL with SP0. */ 31 ventry curr_sp0_sync /* Synchronous */ 32 ventry curr_sp0_irq /* Irq/vIRQ */ 33 ventry curr_sp0_fiq /* Fiq/vFIQ */ 34 ventry curr_sp0_serror /* SError/VSError */ 35 36 /* Current EL with SPx. */ 37 ventry curr_spx_sync /* Synchronous */ 38 ventry curr_spx_irq /* IRQ/vIRQ */ 39 ventry curr_spx_fiq /* FIQ/vFIQ */ 40 ventry curr_spx_serror /* SError/VSError */ 41 42 /* Lower EL using AArch64. */ 43 ventry lower_a64_sync /* Synchronous */ 44 ventry lower_a64_irq /* IRQ/vIRQ */ 45 ventry lower_a64_fiq /* FIQ/vFIQ */ 46 ventry lower_a64_serror /* SError/VSError */ 47 48 /* Lower EL using AArch32. */ 49 ventry lower_a32_sync /* Synchronous */ 50 ventry lower_a32_irq /* IRQ/vIRQ */ 51 ventry lower_a32_fiq /* FIQ/vFIQ */ 52 ventry lower_a32_serror /* SError/VSError */ 53 54 .text 55 .align 4 56 57 /* Common vector handling for now */ 58curr_sp0_sync: 59curr_sp0_irq: 60curr_sp0_fiq: 61curr_sp0_serror: 62curr_spx_sync: 63curr_spx_irq: 64curr_spx_fiq: 65curr_spx_serror: 66lower_a64_sync: 67lower_a64_irq: 68lower_a64_fiq: 69lower_a64_serror: 70lower_a32_sync: 71lower_a32_irq: 72lower_a32_fiq: 73lower_a32_serror: 74 adr x1, .unexp_excp 75exit_msg: 76 mov x0, SYS_WRITE0 77 semihosting_call 78 mov x0, 1 /* EXIT_FAILURE */ 79 bl _exit 80 /* never returns */ 81 82 .section .rodata 83.unexp_excp: 84 .string "Unexpected exception.\n" 85.high_el_msg: 86 .string "Started in lower EL than requested.\n" 87.unexp_el0: 88 .string "Started in invalid EL.\n" 89 90 .align 8 91.get_cmd: 92 .quad cmdline 93 .quad 128 94 95 .text 96 .align 4 97 .global __start 98__start: 99 /* 100 * Initialise the stack for whatever EL we are in before 101 * anything else, we need it to be able to _exit cleanly. 102 * It's smaller than the stack we pass to the C code but we 103 * don't need much. 104 */ 105 adrp x0, system_stack_end 106 add x0, x0, :lo12:system_stack_end 107 mov sp, x0 108 109 /* 110 * The test can set the semihosting command line to the target 111 * EL needed for the test. However if no semihosting args are set we will 112 * end up with -kernel/-append data (see semihosting_arg_fallback). 113 * Keep the normalised target in w11. 114 */ 115 mov x0, SYS_GET_CMDLINE 116 adr x1, .get_cmd 117 semihosting_call 118 adrp x10, cmdline 119 add x10, x10, :lo12:cmdline 120 ldrb w11, [x10] 121 122 /* sanity check, normalise char to EL, clamp to 1 if outside range */ 123 subs w11, w11, #'0' 124 b.lt el_default 125 cmp w11, #3 126 b.gt el_default 127 b 1f 128 129el_high: 130 adr x1, .high_el_msg 131 b exit_msg 132 133el_default: 134 mov w11, #1 135 1361: 137 /* Determine current Exception Level */ 138 mrs x0, CurrentEL 139 lsr x0, x0, #2 /* CurrentEL[3:2] contains the current EL */ 140 141 /* Are we already in a lower EL than we want? */ 142 cmp w11, w0 143 bgt el_high 144 145 /* Branch based on current EL */ 146 cmp x0, #3 147 b.eq setup_el3 148 cmp x0, #2 149 b.eq setup_el2 150 cmp x0, #1 151 b.eq at_testel /* Already at EL1, skip transition */ 152 153 /* Should not be at EL0 - error out */ 154 adr x1, .unexp_el0 155 b exit_msg 156 157setup_el3: 158 /* Ensure we trap if we get anything wrong */ 159 adr x0, vector_table 160 msr vbar_el3, x0 161 162 /* Does the test want to be at EL3? */ 163 cmp w11, #3 164 beq at_testel 165 166 /* Configure EL3 to for lower states (EL2 or EL1) */ 167 mrs x0, scr_el3 168 orr x0, x0, #(1 << 10) /* RW = 1: EL2/EL1 execution state is AArch64 */ 169 orr x0, x0, #(1 << 0) /* NS = 1: Non-secure state */ 170 msr scr_el3, x0 171 172 /* 173 * We need to check if EL2 is actually enabled via ID_AA64PFR0_EL1, 174 * otherwise we should just jump straight to EL1. 175 */ 176 mrs x0, id_aa64pfr0_el1 177 ubfx x0, x0, #8, #4 /* Extract EL2 field (bits 11:8) */ 178 cbz x0, el2_not_present /* If field is 0 no EL2 */ 179 180 181 /* Prepare SPSR for exception return to EL2 */ 182 mov x0, #0x3c9 /* DAIF bits and EL2h mode (9) */ 183 msr spsr_el3, x0 184 185 /* Set EL2 entry point */ 186 adr x0, setup_el2 187 msr elr_el3, x0 188 189 /* Return to EL2 */ 190 eret 191 192el2_not_present: 193 /* Initialize SCTLR_EL1 with reset value */ 194 msr sctlr_el1, xzr 195 196 /* Set EL1 entry point */ 197 adr x0, at_testel 198 msr elr_el3, x0 199 200 /* Prepare SPSR for exception return to EL1h with interrupts masked */ 201 mov x0, #0x3c5 /* DAIF bits and EL1h mode (5) */ 202 msr spsr_el3, x0 203 204 isb /* Synchronization barrier */ 205 eret /* Jump to EL1 */ 206 207setup_el2: 208 /* Ensure we trap if we get anything wrong */ 209 adr x0, vector_table 210 msr vbar_el2, x0 211 212 /* Does the test want to be at EL2? */ 213 cmp w11, #2 214 beq at_testel 215 216 /* Configure EL2 to allow transition to EL1 */ 217 mrs x0, hcr_el2 218 orr x0, x0, #(1 << 31) /* RW = 1: EL1 execution state is AArch64 */ 219 msr hcr_el2, x0 220 221 /* Initialize SCTLR_EL1 with reset value */ 222 msr sctlr_el1, xzr 223 224 /* Set EL1 entry point */ 225 adr x0, at_testel 226 msr elr_el2, x0 227 228 /* Prepare SPSR for exception return to EL1 */ 229 mov x0, #(0x5 << 0) /* EL1h (SPx), with interrupts disabled */ 230 msr spsr_el2, x0 231 232 /* Return to EL1 */ 233 eret 234 235 /* 236 * At the target EL for the test, usually EL1. Note we still 237 * set everything up as if we were at EL1. 238 */ 239at_testel: 240 /* Installs a table of exception vectors to catch and handle all 241 exceptions by terminating the process with a diagnostic. */ 242 adr x0, vector_table 243 msr vbar_el1, x0 244 245 /* Page table setup (identity mapping). */ 246 adrp x0, ttb 247 add x0, x0, :lo12:ttb 248 msr ttbr0_el1, x0 249 250 /* 251 * Setup a flat address mapping page-tables. Stage one simply 252 * maps RAM to the first Gb. The stage2 tables have two 2mb 253 * translation block entries covering a series of adjacent 254 * 4k pages. 255 */ 256 257 /* Stage 1 entry: indexed by IA[38:30] */ 258 adr x1, . /* phys address */ 259 bic x1, x1, #(1 << 30) - 1 /* 1GB alignment*/ 260 add x2, x0, x1, lsr #(30 - 3) /* offset in l1 page table */ 261 262 /* point to stage 2 table [47:12] */ 263 adrp x0, ttb_stage2 264 orr x1, x0, #3 /* ptr to stage 2 */ 265 str x1, [x2] 266 267 /* Stage 2 entries: indexed by IA[29:21] */ 268 ldr x5, =(((1 << 9) - 1) << 21) 269 270 /* First block: .text/RO/execute enabled */ 271 adr x1, . /* phys address */ 272 bic x1, x1, #(1 << 21) - 1 /* 2mb block alignment */ 273 and x4, x1, x5 /* IA[29:21] */ 274 add x2, x0, x4, lsr #(21 - 3) /* offset in l2 page table */ 275 ldr x3, =0x401 /* attr(AF, block) */ 276 orr x1, x1, x3 277 str x1, [x2] /* 1st 2mb (.text & rodata) */ 278 279 /* Second block: .data/RW/no execute */ 280 adrp x1, .data 281 add x1, x1, :lo12:.data 282 bic x1, x1, #(1 << 21) - 1 /* 2mb block alignment */ 283 and x4, x1, x5 /* IA[29:21] */ 284 add x2, x0, x4, lsr #(21 - 3) /* offset in l2 page table */ 285 ldr x3, =(3 << 53) | 0x401 /* attr(AF, NX, block) */ 286 orr x1, x1, x3 287 str x1, [x2] /* 2nd 2mb (.data & .bss)*/ 288 289 /* Third block: at 'mte_page', set in kernel.ld */ 290 adrp x1, mte_page 291 add x1, x1, :lo12:mte_page 292 bic x1, x1, #(1 << 21) - 1 293 and x4, x1, x5 294 add x2, x0, x4, lsr #(21 - 3) 295 /* attr(AF, NX, block, AttrIndx=Attr1) */ 296 ldr x3, =(3 << 53) | 0x401 | (1 << 2) 297 orr x1, x1, x3 298 str x1, [x2] 299 300 /* Setup/enable the MMU. */ 301 302 /* 303 * TCR_EL1 - Translation Control Registers 304 * 305 * IPS[34:32] = 40-bit PA, 1TB 306 * TG0[14:15] = b00 => 4kb granuale 307 * ORGN0[11:10] = Outer: Normal, WB Read-Alloc No Write-Alloc Cacheable 308 * IRGN0[9:8] = Inner: Normal, WB Read-Alloc No Write-Alloc Cacheable 309 * T0SZ[5:0] = 2^(64 - 25) 310 * 311 * The size of T0SZ controls what the initial lookup level. It 312 * would be nice to start at level 2 but unfortunately for a 313 * flat-mapping on the virt machine we need to handle IA's 314 * with at least 1gb range to see RAM. So we start with a 315 * level 1 lookup. 316 */ 317 ldr x0, = (2 << 32) | 25 | (3 << 10) | (3 << 8) 318 msr tcr_el1, x0 319 320 mov x0, #0xee /* Inner/outer cacheable WB */ 321 msr mair_el1, x0 322 isb 323 324 /* 325 * SCTLR_EL1 - System Control Register 326 * 327 * WXN[19] = 0 = no effect, Write does not imply XN (execute never) 328 * I[12] = Instruction cachability control 329 * SA[3] = SP alignment check 330 * C[2] = Data cachability control 331 * M[0] = 1, enable stage 1 address translation for EL0/1 332 */ 333 mrs x0, sctlr_el1 334 ldr x1, =0x100d /* bits I(12) SA(3) C(2) M(0) */ 335 bic x0, x0, #(1 << 1) /* clear bit A(1) */ 336 bic x0, x0, #(1 << 19) /* clear WXN */ 337 orr x0, x0, x1 /* set bits */ 338 339 dsb sy 340 msr sctlr_el1, x0 341 isb 342 343 /* 344 * Enable FP/SVE registers. The standard C pre-amble will be 345 * saving these and A-profile compilers will use AdvSIMD 346 * registers unless we tell it not to. 347 */ 348 mrs x0, cpacr_el1 349 orr x0, x0, #(3 << 20) 350 orr x0, x0, #(3 << 16) 351 msr cpacr_el1, x0 352 353 /* 354 * Setup some stack space before we enter the test code. 355 * Assume everything except the return value is garbage when we 356 * return, we won't need it. 357 */ 358 adrp x0, stack_end 359 add x0, x0, :lo12:stack_end 360 mov sp, x0 361 bl main 362 363 /* pass return value to sys exit */ 364_exit: 365 mov x1, x0 366 ldr x0, =0x20026 /* ADP_Stopped_ApplicationExit */ 367 stp x0, x1, [sp, #-16]! 368 mov x1, sp 369 mov x0, SYS_EXIT 370 semihosting_call 371 /* never returns */ 372 373 /* 374 * Helper Functions 375 */ 376 377 /* Output a single character to serial port */ 378 .global __sys_outc 379__sys_outc: 380 stp x0, x1, [sp, #-16]! 381 /* pass address of c on stack */ 382 mov x1, sp 383 mov x0, SYS_WRITEC 384 semihosting_call 385 ldp x0, x1, [sp], #16 386 ret 387 388 .data 389 390 .align 8 391cmdline: 392 .space 128, 0 393 394 .align 12 395 396 /* Translation table 397 * @4k granuale: 9 bit lookup, 512 entries 398 */ 399ttb: 400 .space 4096, 0 401 402 .align 12 403ttb_stage2: 404 .space 4096, 0 405 406 .align 12 407system_stack: 408 .space 4096, 0 409system_stack_end: 410 411stack: 412 .space 65536, 0 413stack_end: 414