xref: /src/sys/arm64/arm64/machdep.c (revision 7d7295df9b13b98ac395b206667e7586c602862b)
1 /*-
2  * Copyright (c) 2014 Andrew Turner
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  */
27 
28 #include "opt_acpi.h"
29 #include "opt_kstack_pages.h"
30 #include "opt_platform.h"
31 #include "opt_ddb.h"
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/asan.h>
36 #include <sys/buf.h>
37 #include <sys/bus.h>
38 #include <sys/cons.h>
39 #include <sys/cpu.h>
40 #include <sys/csan.h>
41 #include <sys/efi.h>
42 #include <sys/efi_map.h>
43 #include <sys/exec.h>
44 #include <sys/imgact.h>
45 #include <sys/kdb.h>
46 #include <sys/kernel.h>
47 #include <sys/ktr.h>
48 #include <sys/limits.h>
49 #include <sys/linker.h>
50 #include <sys/msan.h>
51 #include <sys/msgbuf.h>
52 #include <sys/pcpu.h>
53 #include <sys/physmem.h>
54 #include <sys/proc.h>
55 #include <sys/ptrace.h>
56 #include <sys/reboot.h>
57 #include <sys/reg.h>
58 #include <sys/rwlock.h>
59 #include <sys/sched.h>
60 #include <sys/signalvar.h>
61 #include <sys/syscallsubr.h>
62 #include <sys/sysent.h>
63 #include <sys/sysproto.h>
64 #include <sys/ucontext.h>
65 #include <sys/vdso.h>
66 #include <sys/vmmeter.h>
67 
68 #include <vm/vm.h>
69 #include <vm/vm_param.h>
70 #include <vm/vm_kern.h>
71 #include <vm/vm_object.h>
72 #include <vm/vm_page.h>
73 #include <vm/vm_phys.h>
74 #include <vm/pmap.h>
75 #include <vm/vm_map.h>
76 #include <vm/vm_pager.h>
77 
78 #include <machine/armreg.h>
79 #include <machine/cpu.h>
80 #include <machine/cpu_feat.h>
81 #include <machine/debug_monitor.h>
82 #include <machine/hypervisor.h>
83 #include <machine/ifunc.h>
84 #include <machine/kdb.h>
85 #include <machine/machdep.h>
86 #include <machine/metadata.h>
87 #include <machine/md_var.h>
88 #include <machine/pcb.h>
89 #include <machine/undefined.h>
90 #include <machine/vmparam.h>
91 
92 #ifdef VFP
93 #include <machine/vfp.h>
94 #endif
95 
96 #ifdef DEV_ACPI
97 #include <contrib/dev/acpica/include/acpi.h>
98 #include <machine/acpica_machdep.h>
99 #endif
100 
101 #ifdef FDT
102 #include <dev/fdt/fdt_common.h>
103 #include <dev/ofw/openfirm.h>
104 #endif
105 
106 #include <dev/smbios/smbios.h>
107 
108 _Static_assert(sizeof(struct pcb) == 1248, "struct pcb is incorrect size");
109 _Static_assert(offsetof(struct pcb, pcb_fpusaved) == 136,
110     "pcb_fpusaved changed offset");
111 _Static_assert(offsetof(struct pcb, pcb_fpustate) == 192,
112     "pcb_fpustate changed offset");
113 
114 enum arm64_bus arm64_bus_method = ARM64_BUS_NONE;
115 
116 /*
117  * XXX: The .bss is assumed to be in the boot CPU NUMA domain. If not we
118  * could relocate this, but will need to keep the same virtual address as
119  * it's reverenced by the EARLY_COUNTER macro.
120  */
121 struct pcpu pcpu0;
122 
123 #if defined(PERTHREAD_SSP)
124 /*
125  * The boot SSP canary. Will be replaced with a per-thread canary when
126  * scheduling has started.
127  */
128 uintptr_t boot_canary = 0x49a2d892bc05a0b1ul;
129 #endif
130 
131 static struct trapframe proc0_tf;
132 
133 int early_boot = 1;
134 int cold = 1;
135 static int boot_el;
136 
137 struct kva_md_info kmi;
138 
139 int64_t dczva_line_size;	/* The size of cache line the dc zva zeroes */
140 int has_pan;
141 
142 #if defined(SOCDEV_PA)
143 /*
144  * This is the virtual address used to access SOCDEV_PA. As it's set before
145  * .bss is cleared we need to ensure it's preserved. To do this use
146  * __read_mostly as it's only ever set once but read in the putc functions.
147  */
148 uintptr_t socdev_va __read_mostly;
149 #endif
150 
151 /*
152  * Physical address of the EFI System Table. Stashed from the metadata hints
153  * passed into the kernel and used by the EFI code to call runtime services.
154  */
155 vm_paddr_t efi_systbl_phys;
156 static struct efi_map_header *efihdr;
157 
158 int (*apei_nmi)(void);
159 
160 #if defined(PERTHREAD_SSP_WARNING)
161 static void
print_ssp_warning(void * data __unused)162 print_ssp_warning(void *data __unused)
163 {
164 	printf("WARNING: Per-thread SSP is enabled but the compiler is too old to support it\n");
165 }
166 SYSINIT(ssp_warn, SI_SUB_COPYRIGHT, SI_ORDER_ANY, print_ssp_warning, NULL);
167 SYSINIT(ssp_warn2, SI_SUB_LAST, SI_ORDER_ANY, print_ssp_warning, NULL);
168 #endif
169 
170 static cpu_feat_en
pan_check(const struct cpu_feat * feat __unused,u_int midr __unused)171 pan_check(const struct cpu_feat *feat __unused, u_int midr __unused)
172 {
173 	uint64_t id_aa64mfr1;
174 
175 	get_kernel_reg(ID_AA64MMFR1_EL1, &id_aa64mfr1);
176 	if (ID_AA64MMFR1_PAN_VAL(id_aa64mfr1) == ID_AA64MMFR1_PAN_NONE)
177 		return (FEAT_ALWAYS_DISABLE);
178 
179 	return (FEAT_DEFAULT_ENABLE);
180 }
181 
182 static bool
pan_enable(const struct cpu_feat * feat __unused,cpu_feat_errata errata_status __unused,u_int * errata_list __unused,u_int errata_count __unused)183 pan_enable(const struct cpu_feat *feat __unused,
184     cpu_feat_errata errata_status __unused, u_int *errata_list __unused,
185     u_int errata_count __unused)
186 {
187 	has_pan = 1;
188 
189 	/*
190 	 * This sets the PAN bit, stopping the kernel from accessing
191 	 * memory when userspace can also access it unless the kernel
192 	 * uses the userspace load/store instructions.
193 	 */
194 	WRITE_SPECIALREG(sctlr_el1,
195 	    READ_SPECIALREG(sctlr_el1) & ~SCTLR_SPAN);
196 	__asm __volatile(
197 	    ".arch_extension pan	\n"
198 	    "msr pan, #1		\n"
199 	    ".arch_extension nopan	\n");
200 
201 	return (true);
202 }
203 
204 static void
pan_disabled(const struct cpu_feat * feat __unused)205 pan_disabled(const struct cpu_feat *feat __unused)
206 {
207 	if (PCPU_GET(cpuid) == 0)
208 		update_special_reg(ID_AA64MMFR1_EL1, ID_AA64MMFR1_PAN_MASK, 0);
209 }
210 
211 CPU_FEAT(feat_pan, "Privileged access never",
212     pan_check, NULL, pan_enable, pan_disabled,
213     CPU_FEAT_AFTER_DEV | CPU_FEAT_PER_CPU);
214 
215 static cpu_feat_en
mops_check(const struct cpu_feat * feat __unused,u_int midr __unused)216 mops_check(const struct cpu_feat *feat __unused, u_int midr __unused)
217 {
218 	uint64_t id_aa64isar2;
219 
220 	get_kernel_reg(ID_AA64ISAR2_EL1, &id_aa64isar2);
221 	if (ID_AA64ISAR2_MOPS_VAL(id_aa64isar2) == ID_AA64ISAR2_MOPS_NONE)
222 		return (FEAT_ALWAYS_DISABLE);
223 
224 	return (FEAT_DEFAULT_ENABLE);
225 }
226 
227 static bool
mops_enable(const struct cpu_feat * feat __unused,cpu_feat_errata errata_status __unused,u_int * errata_list __unused,u_int errata_count __unused)228 mops_enable(const struct cpu_feat *feat __unused,
229     cpu_feat_errata errata_status __unused, u_int *errata_list __unused,
230     u_int errata_count __unused)
231 {
232 	WRITE_SPECIALREG(sctlr_el1, READ_SPECIALREG(sctlr_el1) | SCTLR_MSCEn);
233 	isb();
234 
235 	return (true);
236 }
237 
238 static void
mops_disabled(const struct cpu_feat * feat __unused)239 mops_disabled(const struct cpu_feat *feat __unused)
240 {
241 	WRITE_SPECIALREG(sctlr_el1, READ_SPECIALREG(sctlr_el1) & ~SCTLR_MSCEn);
242 	isb();
243 }
244 
245 CPU_FEAT(feat_mops, "MOPS",
246     mops_check, NULL, mops_enable, mops_disabled,
247     CPU_FEAT_AFTER_DEV | CPU_FEAT_PER_CPU);
248 
249 bool
has_hyp(void)250 has_hyp(void)
251 {
252 	return (boot_el == CURRENTEL_EL_EL2);
253 }
254 
255 bool
in_vhe(void)256 in_vhe(void)
257 {
258 	/* If we are currently in EL2 then must be in VHE */
259 	return ((READ_SPECIALREG(CurrentEL) & CURRENTEL_EL_MASK) ==
260 	    CURRENTEL_EL_EL2);
261 }
262 
263 static void
cpu_startup(void * dummy)264 cpu_startup(void *dummy)
265 {
266 	vm_paddr_t size;
267 	int i;
268 
269 	printf("real memory  = %ju (%ju MB)\n", ptoa((uintmax_t)realmem),
270 	    ptoa((uintmax_t)realmem) / 1024 / 1024);
271 
272 	if (bootverbose) {
273 		printf("Physical memory chunk(s):\n");
274 		for (i = 0; phys_avail[i + 1] != 0; i += 2) {
275 			size = phys_avail[i + 1] - phys_avail[i];
276 			printf("%#016jx - %#016jx, %ju bytes (%ju pages)\n",
277 			    (uintmax_t)phys_avail[i],
278 			    (uintmax_t)phys_avail[i + 1] - 1,
279 			    (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
280 		}
281 	}
282 
283 	printf("avail memory = %ju (%ju MB)\n",
284 	    ptoa((uintmax_t)vm_free_count()),
285 	    ptoa((uintmax_t)vm_free_count()) / 1024 / 1024);
286 
287 	undef_init();
288 	install_cpu_errata();
289 
290 	vm_ksubmap_init(&kmi);
291 	bufinit();
292 	vm_pager_bufferinit();
293 }
294 
295 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
296 
297 static void
late_ifunc_resolve(void * dummy __unused)298 late_ifunc_resolve(void *dummy __unused)
299 {
300 	link_elf_late_ireloc();
301 }
302 /* Late enough for cpu_feat to have completed */
303 SYSINIT(late_ifunc_resolve, SI_SUB_CONFIGURE, SI_ORDER_ANY,
304     late_ifunc_resolve, NULL);
305 
306 int
cpu_idle_wakeup(int cpu)307 cpu_idle_wakeup(int cpu)
308 {
309 
310 	return (0);
311 }
312 
313 void
cpu_idle(int busy)314 cpu_idle(int busy)
315 {
316 
317 	spinlock_enter();
318 	if (!busy)
319 		cpu_idleclock();
320 	if (!sched_runnable())
321 		__asm __volatile(
322 		    "dsb sy \n"
323 		    "wfi    \n");
324 	if (!busy)
325 		cpu_activeclock();
326 	spinlock_exit();
327 }
328 
329 void
cpu_halt(void)330 cpu_halt(void)
331 {
332 
333 	/* We should have shutdown by now, if not enter a low power sleep */
334 	intr_disable();
335 	while (1) {
336 		__asm __volatile("wfi");
337 	}
338 }
339 
340 /*
341  * Flush the D-cache for non-DMA I/O so that the I-cache can
342  * be made coherent later.
343  */
344 void
cpu_flush_dcache(void * ptr,size_t len)345 cpu_flush_dcache(void *ptr, size_t len)
346 {
347 
348 	/* ARM64TODO TBD */
349 }
350 
351 /* Get current clock frequency for the given CPU ID. */
352 int
cpu_est_clockrate(int cpu_id,uint64_t * rate)353 cpu_est_clockrate(int cpu_id, uint64_t *rate)
354 {
355 	struct pcpu *pc;
356 
357 	pc = pcpu_find(cpu_id);
358 	if (pc == NULL || rate == NULL)
359 		return (EINVAL);
360 
361 	if (pc->pc_clock == 0)
362 		return (EOPNOTSUPP);
363 
364 	*rate = pc->pc_clock;
365 	return (0);
366 }
367 
368 void
cpu_pcpu_init(struct pcpu * pcpu,int cpuid,size_t size)369 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
370 {
371 
372 	pcpu->pc_acpi_id = 0xffffffff;
373 	pcpu->pc_mpidr = UINT64_MAX;
374 }
375 
376 void
spinlock_enter(void)377 spinlock_enter(void)
378 {
379 	struct thread *td;
380 	register_t daif;
381 
382 	td = curthread;
383 	if (td->td_md.md_spinlock_count == 0) {
384 		daif = intr_disable();
385 		td->td_md.md_spinlock_count = 1;
386 		td->td_md.md_saved_daif = daif;
387 		critical_enter();
388 	} else
389 		td->td_md.md_spinlock_count++;
390 }
391 
392 void
spinlock_exit(void)393 spinlock_exit(void)
394 {
395 	struct thread *td;
396 	register_t daif;
397 
398 	td = curthread;
399 	daif = td->td_md.md_saved_daif;
400 	td->td_md.md_spinlock_count--;
401 	if (td->td_md.md_spinlock_count == 0) {
402 		critical_exit();
403 		intr_restore(daif);
404 	}
405 }
406 
407 /*
408  * Construct a PCB from a trapframe. This is called from kdb_trap() where
409  * we want to start a backtrace from the function that caused us to enter
410  * the debugger. We have the context in the trapframe, but base the trace
411  * on the PCB. The PCB doesn't have to be perfect, as long as it contains
412  * enough for a backtrace.
413  */
414 void
makectx(struct trapframe * tf,struct pcb * pcb)415 makectx(struct trapframe *tf, struct pcb *pcb)
416 {
417 	int i;
418 
419 	/* NB: pcb_x[PCB_LR] is the PC, see PC_REGS() in db_machdep.h */
420 	for (i = 0; i < nitems(pcb->pcb_x); i++) {
421 		if (i == PCB_LR)
422 			pcb->pcb_x[i] = tf->tf_elr;
423 		else
424 			pcb->pcb_x[i] = tf->tf_x[i + PCB_X_START];
425 	}
426 
427 	pcb->pcb_sp = tf->tf_sp;
428 }
429 
430 static void
init_proc0(vm_offset_t kstack)431 init_proc0(vm_offset_t kstack)
432 {
433 	struct pcpu *pcpup;
434 
435 	pcpup = cpuid_to_pcpu[0];
436 	MPASS(pcpup != NULL);
437 
438 	proc_linkup0(&proc0, &thread0);
439 	thread0.td_kstack = kstack;
440 	thread0.td_kstack_pages = KSTACK_PAGES;
441 #if defined(PERTHREAD_SSP)
442 	thread0.td_md.md_canary = boot_canary;
443 #endif
444 	thread0.td_pcb = (struct pcb *)(thread0.td_kstack +
445 	    thread0.td_kstack_pages * PAGE_SIZE) - 1;
446 	thread0.td_pcb->pcb_flags = 0;
447 	thread0.td_pcb->pcb_fpflags = 0;
448 	thread0.td_pcb->pcb_fpusaved = &thread0.td_pcb->pcb_fpustate;
449 	thread0.td_pcb->pcb_vfpcpu = UINT_MAX;
450 	thread0.td_frame = &proc0_tf;
451 	ptrauth_thread0(&thread0);
452 	pcpup->pc_curpcb = thread0.td_pcb;
453 
454 	/*
455 	 * Unmask SError exceptions. They are used to signal a RAS failure,
456 	 * or other hardware error.
457 	 */
458 	serror_enable();
459 }
460 
461 /*
462  * Get an address to be used to write to kernel data that may be mapped
463  * read-only, e.g. to patch kernel code.
464  */
465 bool
arm64_get_writable_addr(void * addr,void ** out)466 arm64_get_writable_addr(void *addr, void **out)
467 {
468 	vm_paddr_t pa;
469 
470 	/* Check if the page is writable */
471 	if (PAR_SUCCESS(arm64_address_translate_s1e1w((vm_offset_t)addr))) {
472 		*out = addr;
473 		return (true);
474 	}
475 
476 	/*
477 	 * Find the physical address of the given page.
478 	 */
479 	if (!pmap_klookup((vm_offset_t)addr, &pa)) {
480 		return (false);
481 	}
482 
483 	/*
484 	 * If it is within the DMAP region and is writable use that.
485 	 */
486 	if (PHYS_IN_DMAP_RANGE(pa)) {
487 		addr = (void *)PHYS_TO_DMAP(pa);
488 		if (PAR_SUCCESS(arm64_address_translate_s1e1w(
489 		    (vm_offset_t)addr))) {
490 			*out = addr;
491 			return (true);
492 		}
493 	}
494 
495 	return (false);
496 }
497 
498 /*
499  * Map the passed in VA in EFI space to a void * using the efi memory table to
500  * find the PA and return it in the DMAP, if it exists. We're used between the
501  * calls to pmap_bootstrap() and physmem_init_kernel_globals() to parse CFG
502  * tables We assume that either the entry you are mapping fits within its page,
503  * or if it spills to the next page, that's contiguous in PA and in the DMAP.
504  * All observed tables obey the first part of this precondition.
505  */
506 struct early_map_data
507 {
508 	vm_offset_t va;
509 	vm_offset_t pa;
510 };
511 
512 static void
efi_early_map_entry(struct efi_md * p,void * argp)513 efi_early_map_entry(struct efi_md *p, void *argp)
514 {
515 	struct early_map_data *emdp = argp;
516 	vm_offset_t s, e;
517 
518 	if (emdp->pa != 0)
519 		return;
520 	if ((p->md_attr & EFI_MD_ATTR_RT) == 0)
521 		return;
522 	s = p->md_virt;
523 	e = p->md_virt + p->md_pages * EFI_PAGE_SIZE;
524 	if (emdp->va < s  || emdp->va >= e)
525 		return;
526 	emdp->pa = p->md_phys + (emdp->va - p->md_virt);
527 }
528 
529 static void *
efi_early_map(vm_offset_t va)530 efi_early_map(vm_offset_t va)
531 {
532 	struct early_map_data emd = { .va = va };
533 
534 	efi_map_foreach_entry(efihdr, efi_early_map_entry, &emd);
535 	if (emd.pa == 0)
536 		return NULL;
537 	return (void *)PHYS_TO_DMAP(emd.pa);
538 }
539 
540 
541 /*
542  * When booted via kexec from Linux, the prior kernel will pass in reserved
543  * memory areas in an EFI config table. We need to find that table and walk
544  * through it excluding the memory ranges in it. btw, this is called too early
545  * for the printf to do anything (unless EARLY_PRINTF is defined) since msgbufp
546  * isn't initialized, let alone a console, but breakpoints in printf help
547  * diagnose rare failures.
548  */
549 static void
exclude_efi_memreserve(vm_paddr_t efi_systbl_phys)550 exclude_efi_memreserve(vm_paddr_t efi_systbl_phys)
551 {
552 	struct efi_systbl *systbl;
553 	efi_guid_t efi_memreserve = LINUX_EFI_MEMRESERVE_TABLE;
554 
555 	systbl = (struct efi_systbl *)PHYS_TO_DMAP(efi_systbl_phys);
556 	if (systbl == NULL) {
557 		printf("can't map systbl\n");
558 		return;
559 	}
560 	if (systbl->st_hdr.th_sig != EFI_SYSTBL_SIG) {
561 		printf("Bad signature for systbl %#lx\n", systbl->st_hdr.th_sig);
562 		return;
563 	}
564 
565 	/*
566 	 * We don't yet have the pmap system booted enough to create a pmap for
567 	 * the efi firmware's preferred address space from the GetMemoryMap()
568 	 * table. The st_cfgtbl is a VA in this space, so we need to do the
569 	 * mapping ourselves to a kernel VA with efi_early_map. We assume that
570 	 * the cfgtbl entries don't span a page. Other pointers are PAs, as
571 	 * noted below.
572 	 */
573 	if (systbl->st_cfgtbl == 0)	/* Failsafe st_entries should == 0 in this case */
574 		return;
575 	for (int i = 0; i < systbl->st_entries; i++) {
576 		struct efi_cfgtbl *cfgtbl;
577 		struct linux_efi_memreserve *mr;
578 
579 		cfgtbl = efi_early_map(systbl->st_cfgtbl + i * sizeof(*cfgtbl));
580 		if (cfgtbl == NULL)
581 			panic("Can't map the config table entry %d\n", i);
582 		if (memcmp(&cfgtbl->ct_guid, &efi_memreserve, sizeof(efi_guid_t)) != 0)
583 			continue;
584 
585 		/*
586 		 * cfgtbl points are either VA or PA, depending on the GUID of
587 		 * the table. memreserve GUID pointers are PA and not converted
588 		 * after a SetVirtualAddressMap(). The list's mr_next pointer
589 		 * is also a PA.
590 		 */
591 		mr = (struct linux_efi_memreserve *)PHYS_TO_DMAP(
592 			(vm_offset_t)cfgtbl->ct_data);
593 		while (true) {
594 			for (int j = 0; j < mr->mr_count; j++) {
595 				struct linux_efi_memreserve_entry *mre;
596 
597 				mre = &mr->mr_entry[j];
598 				physmem_exclude_region(mre->mre_base, mre->mre_size,
599 				    EXFLAG_NODUMP | EXFLAG_NOALLOC);
600 			}
601 			if (mr->mr_next == 0)
602 				break;
603 			mr = (struct linux_efi_memreserve *)PHYS_TO_DMAP(mr->mr_next);
604 		};
605 	}
606 
607 }
608 
609 #ifdef FDT
610 static void
try_load_dtb(void)611 try_load_dtb(void)
612 {
613 	vm_offset_t dtbp;
614 
615 	dtbp = MD_FETCH(preload_kmdp, MODINFOMD_DTBP, vm_offset_t);
616 #if defined(FDT_DTB_STATIC)
617 	/*
618 	 * In case the device tree blob was not retrieved (from metadata) try
619 	 * to use the statically embedded one.
620 	 */
621 	if (dtbp == 0)
622 		dtbp = (vm_offset_t)&fdt_static_dtb;
623 #endif
624 
625 	if (dtbp == (vm_offset_t)NULL) {
626 #ifndef TSLOG
627 		printf("ERROR loading DTB\n");
628 #endif
629 		return;
630 	}
631 
632 	if (!OF_install(OFW_FDT, 0))
633 		panic("Cannot install FDT");
634 
635 	if (OF_init((void *)dtbp) != 0)
636 		panic("OF_init failed with the found device tree");
637 
638 	parse_fdt_bootargs();
639 }
640 #endif
641 
642 static bool
bus_probe(void)643 bus_probe(void)
644 {
645 	bool has_acpi, has_fdt;
646 	char *order, *env;
647 
648 	has_acpi = has_fdt = false;
649 
650 #ifdef FDT
651 	has_fdt = (OF_peer(0) != 0);
652 #endif
653 #ifdef DEV_ACPI
654 	has_acpi = (AcpiOsGetRootPointer() != 0);
655 #endif
656 
657 	env = kern_getenv("kern.cfg.order");
658 	if (env != NULL) {
659 		order = env;
660 		while (order != NULL) {
661 			if (has_acpi &&
662 			    strncmp(order, "acpi", 4) == 0 &&
663 			    (order[4] == ',' || order[4] == '\0')) {
664 				arm64_bus_method = ARM64_BUS_ACPI;
665 				break;
666 			}
667 			if (has_fdt &&
668 			    strncmp(order, "fdt", 3) == 0 &&
669 			    (order[3] == ',' || order[3] == '\0')) {
670 				arm64_bus_method = ARM64_BUS_FDT;
671 				break;
672 			}
673 			order = strchr(order, ',');
674 			if (order != NULL)
675 				order++;	/* Skip comma */
676 		}
677 		freeenv(env);
678 
679 		/* If we set the bus method it is valid */
680 		if (arm64_bus_method != ARM64_BUS_NONE)
681 			return (true);
682 	}
683 	/* If no order or an invalid order was set use the default */
684 	if (arm64_bus_method == ARM64_BUS_NONE) {
685 		if (has_acpi)
686 			arm64_bus_method = ARM64_BUS_ACPI;
687 		else if (has_fdt)
688 			arm64_bus_method = ARM64_BUS_FDT;
689 	}
690 
691 	/*
692 	 * If no option was set the default is valid, otherwise we are
693 	 * setting one to get cninit() working, then calling panic to tell
694 	 * the user about the invalid bus setup.
695 	 */
696 	return (env == NULL);
697 }
698 
699 static void
cache_setup(void)700 cache_setup(void)
701 {
702 	int dczva_line_shift;
703 	uint32_t dczid_el0;
704 
705 	identify_cache(READ_SPECIALREG(ctr_el0));
706 
707 	dczid_el0 = READ_SPECIALREG(dczid_el0);
708 
709 	/* Check if dc zva is not prohibited */
710 	if (dczid_el0 & DCZID_DZP)
711 		dczva_line_size = 0;
712 	else {
713 		/* Same as with above calculations */
714 		dczva_line_shift = DCZID_BS_SIZE(dczid_el0);
715 		dczva_line_size = sizeof(int) << dczva_line_shift;
716 	}
717 }
718 
719 int
memory_mapping_mode(vm_paddr_t pa)720 memory_mapping_mode(vm_paddr_t pa)
721 {
722 	struct efi_md *map, *p;
723 	size_t efisz;
724 	int ndesc, i;
725 
726 	if (efihdr == NULL)
727 		return (VM_MEMATTR_WRITE_BACK);
728 
729 	/*
730 	 * Memory map data provided by UEFI via the GetMemoryMap
731 	 * Boot Services API.
732 	 */
733 	efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf;
734 	map = (struct efi_md *)((uint8_t *)efihdr + efisz);
735 
736 	if (efihdr->descriptor_size == 0)
737 		return (VM_MEMATTR_WRITE_BACK);
738 	ndesc = efihdr->memory_size / efihdr->descriptor_size;
739 
740 	for (i = 0, p = map; i < ndesc; i++,
741 	    p = efi_next_descriptor(p, efihdr->descriptor_size)) {
742 		if (pa < p->md_phys ||
743 		    pa >= p->md_phys + p->md_pages * EFI_PAGE_SIZE)
744 			continue;
745 		if (p->md_type == EFI_MD_TYPE_IOMEM ||
746 		    p->md_type == EFI_MD_TYPE_IOPORT)
747 			return (VM_MEMATTR_DEVICE);
748 		else if ((p->md_attr & EFI_MD_ATTR_WB) != 0 ||
749 		    p->md_type == EFI_MD_TYPE_RECLAIM)
750 			return (VM_MEMATTR_WRITE_BACK);
751 		else if ((p->md_attr & EFI_MD_ATTR_WT) != 0)
752 			return (VM_MEMATTR_WRITE_THROUGH);
753 		else if ((p->md_attr & EFI_MD_ATTR_WC) != 0)
754 			return (VM_MEMATTR_WRITE_COMBINING);
755 		break;
756 	}
757 
758 	return (VM_MEMATTR_DEVICE);
759 }
760 
761 #ifdef FDT
762 static void
fdt_physmem_hardware_region_cb(const struct mem_region * mr,void * arg __unused)763 fdt_physmem_hardware_region_cb(const struct mem_region *mr, void *arg __unused)
764 {
765 	physmem_hardware_region(mr->mr_start, mr->mr_size);
766 }
767 
768 static void
fdt_physmem_exclude_region_cb(const struct mem_region * mr,void * arg __unused)769 fdt_physmem_exclude_region_cb(const struct mem_region *mr, void *arg __unused)
770 {
771 	physmem_exclude_region(mr->mr_start, mr->mr_size,
772 	    EXFLAG_NODUMP | EXFLAG_NOALLOC);
773 }
774 #endif
775 
776 void
initarm(struct arm64_bootparams * abp)777 initarm(struct arm64_bootparams *abp)
778 {
779 	struct efi_fb *efifb;
780 	struct pcpu *pcpup;
781 	char *env;
782 #ifdef FDT
783 	phandle_t root;
784 	char dts_version[255];
785 #endif
786 	vm_offset_t lastaddr;
787 	bool valid;
788 
789 	TSRAW(&thread0, TS_ENTER, __func__, NULL);
790 
791 	boot_el = abp->boot_el;
792 
793 	/* Parse loader or FDT boot parameters. Determine last used address. */
794 	lastaddr = parse_boot_param(abp);
795 
796 	identify_cpu(0);
797 	identify_hypervisor_smbios();
798 
799 	update_special_regs(0);
800 
801 	sched_instance_select();
802 	link_elf_ireloc();
803 
804 	/* Set the pcpu data, this is needed by pmap_bootstrap */
805 	pcpup = &pcpu0;
806 	pcpu_init(pcpup, 0, sizeof(struct pcpu));
807 
808 	/*
809 	 * Set the pcpu pointer with a backup in tpidr_el1 to be
810 	 * loaded when entering the kernel from userland.
811 	 */
812 	__asm __volatile(
813 	    "mov x18, %0 \n"
814 	    "msr tpidr_el1, %0" :: "r"(pcpup));
815 
816 	/* locore.S sets sp_el0 to &thread0 so no need to set it here. */
817 	PCPU_SET(curthread, &thread0);
818 	PCPU_SET(midr, get_midr());
819 
820 #ifdef FDT
821 	try_load_dtb();
822 #endif
823 
824 	efi_systbl_phys = MD_FETCH(preload_kmdp, MODINFOMD_FW_HANDLE,
825 	    vm_paddr_t);
826 
827 	/* Load the physical memory ranges */
828 	efihdr = (struct efi_map_header *)preload_search_info(preload_kmdp,
829 	    MODINFO_METADATA | MODINFOMD_EFI_MAP);
830 	if (efihdr != NULL)
831 		efi_map_add_entries(efihdr);
832 #ifdef FDT
833 	else {
834 		/* Grab physical memory regions information from device tree. */
835 		if (fdt_foreach_mem_region(fdt_physmem_hardware_region_cb,
836 		    NULL) != 0)
837 			panic("Cannot get physical memory regions");
838 	}
839 	fdt_foreach_reserved_mem(fdt_physmem_exclude_region_cb, NULL);
840 #endif
841 
842 	/* Exclude the EFI framebuffer from our view of physical memory. */
843 	efifb = (struct efi_fb *)preload_search_info(preload_kmdp,
844 	    MODINFO_METADATA | MODINFOMD_EFI_FB);
845 	if (efifb != NULL)
846 		physmem_exclude_region(efifb->fb_addr, efifb->fb_size,
847 		    EXFLAG_NOALLOC);
848 
849 	/* Do basic tuning, hz etc */
850 	init_param1();
851 
852 	cache_setup();
853 
854 	/*
855 	 * Perform a staged bootstrap of virtual memory.
856 	 *
857 	 * - First we create the DMAP region. This allows it to be used in
858 	 *   later bootstrapping.
859 	 * - Next exclude memory that is needed in the DMAP region, but must
860 	 *   not be used by FreeBSD.
861 	 * - Lastly complete the bootstrapping. It may use the physical
862 	 *   memory map so any excluded memory must be marked as such before
863 	 *   pmap_bootstrap() is called.
864 	 */
865 	pmap_bootstrap_dmap(lastaddr - KERNBASE);
866 	/*
867 	 * Exclude EFI entries needed in the DMAP, e.g. EFI_MD_TYPE_RECLAIM
868 	 * may contain the ACPI tables but shouldn't be used by the kernel
869 	 */
870 	if (efihdr != NULL)
871 		efi_map_exclude_entries(efihdr);
872 	/*  Do the same for reserve entries in the EFI MEMRESERVE table */
873 	if (efi_systbl_phys != 0)
874 		exclude_efi_memreserve(efi_systbl_phys);
875 	/* Continue bootstrapping pmap */
876 	pmap_bootstrap();
877 
878 	/*
879 	 * We carefully bootstrap the sanitizer map after we've excluded
880 	 * absolutely everything else that could impact phys_avail.  There's not
881 	 * always enough room for the initial shadow map after the kernel, so
882 	 * we'll end up searching for segments that we can safely use.  Those
883 	 * segments also get excluded from phys_avail.
884 	 */
885 #if defined(KASAN) || defined(KMSAN)
886 	pmap_bootstrap_san();
887 #endif
888 
889 	physmem_init_kernel_globals();
890 
891 	valid = bus_probe();
892 
893 	cninit();
894 	set_ttbr0(abp->kern_ttbr0);
895 	pmap_s1_invalidate_all_kernel();
896 
897 	if (!valid)
898 		panic("Invalid bus configuration: %s",
899 		    kern_getenv("kern.cfg.order"));
900 
901 	/* Detect early CPU feature support */
902 	enable_cpu_feat(CPU_FEAT_EARLY_BOOT);
903 
904 	/*
905 	 * Dump the boot metadata. We have to wait for cninit() since console
906 	 * output is required. If it's grossly incorrect the kernel will never
907 	 * make it this far.
908 	 */
909 	if (getenv_is_true("debug.dump_modinfo_at_boot"))
910 		preload_dump();
911 
912 	init_proc0(abp->kern_stack);
913 	msgbufinit(msgbufp, msgbufsize);
914 	mutex_init();
915 	init_param2(physmem);
916 
917 	dbg_init();
918 	kdb_init();
919 #ifdef KDB
920 	if ((boothowto & RB_KDB) != 0)
921 		kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
922 #endif
923 
924 	kcsan_cpu_init(0);
925 	kasan_init();
926 	kmsan_init();
927 
928 	env = kern_getenv("kernelname");
929 	if (env != NULL)
930 		strlcpy(kernelname, env, sizeof(kernelname));
931 
932 #ifdef FDT
933 	if (arm64_bus_method == ARM64_BUS_FDT) {
934 		root = OF_finddevice("/");
935 		if (OF_getprop(root, "freebsd,dts-version", dts_version, sizeof(dts_version)) > 0) {
936 			if (strcmp(LINUX_DTS_VERSION, dts_version) != 0)
937 				printf("WARNING: DTB version is %s while kernel expects %s, "
938 				    "please update the DTB in the ESP\n",
939 				    dts_version,
940 				    LINUX_DTS_VERSION);
941 		} else {
942 			printf("WARNING: Cannot find freebsd,dts-version property, "
943 			    "cannot check DTB compliance\n");
944 		}
945 	}
946 #endif
947 
948 	if (boothowto & RB_VERBOSE) {
949 		if (efihdr != NULL)
950 			efi_map_print_entries(efihdr);
951 		physmem_print_tables();
952 	}
953 
954 	early_boot = 0;
955 
956 	if (bootverbose && kstack_pages != KSTACK_PAGES)
957 		printf("kern.kstack_pages = %d ignored for thread0\n",
958 		    kstack_pages);
959 
960 	TSEXIT();
961 }
962 
963 void
dbg_init(void)964 dbg_init(void)
965 {
966 
967 	/* Clear OS lock */
968 	WRITE_SPECIALREG(oslar_el1, 0);
969 
970 	/* This permits DDB to use debug registers for watchpoints. */
971 	dbg_monitor_init();
972 
973 	/* TODO: Eventually will need to initialize debug registers here. */
974 }
975 
976 #ifdef DDB
977 #include <ddb/ddb.h>
978 
DB_SHOW_COMMAND(specialregs,db_show_spregs)979 DB_SHOW_COMMAND(specialregs, db_show_spregs)
980 {
981 #define	PRINT_REG(reg)	\
982     db_printf(__STRING(reg) " = %#016lx\n", READ_SPECIALREG(reg))
983 
984 	PRINT_REG(actlr_el1);
985 	PRINT_REG(afsr0_el1);
986 	PRINT_REG(afsr1_el1);
987 	PRINT_REG(aidr_el1);
988 	PRINT_REG(amair_el1);
989 	PRINT_REG(ccsidr_el1);
990 	PRINT_REG(clidr_el1);
991 	PRINT_REG(contextidr_el1);
992 	PRINT_REG(cpacr_el1);
993 	PRINT_REG(csselr_el1);
994 	PRINT_REG(ctr_el0);
995 	PRINT_REG(currentel);
996 	PRINT_REG(daif);
997 	PRINT_REG(dczid_el0);
998 	PRINT_REG(elr_el1);
999 	PRINT_REG(esr_el1);
1000 	PRINT_REG(far_el1);
1001 #if 0
1002 	/* ARM64TODO: Enable VFP before reading floating-point registers */
1003 	PRINT_REG(fpcr);
1004 	PRINT_REG(fpsr);
1005 #endif
1006 	PRINT_REG(id_aa64afr0_el1);
1007 	PRINT_REG(id_aa64afr1_el1);
1008 	PRINT_REG(id_aa64dfr0_el1);
1009 	PRINT_REG(id_aa64dfr1_el1);
1010 	PRINT_REG(id_aa64isar0_el1);
1011 	PRINT_REG(id_aa64isar1_el1);
1012 	PRINT_REG(id_aa64pfr0_el1);
1013 	PRINT_REG(id_aa64pfr1_el1);
1014 	PRINT_REG(id_afr0_el1);
1015 	PRINT_REG(id_dfr0_el1);
1016 	PRINT_REG(id_isar0_el1);
1017 	PRINT_REG(id_isar1_el1);
1018 	PRINT_REG(id_isar2_el1);
1019 	PRINT_REG(id_isar3_el1);
1020 	PRINT_REG(id_isar4_el1);
1021 	PRINT_REG(id_isar5_el1);
1022 	PRINT_REG(id_mmfr0_el1);
1023 	PRINT_REG(id_mmfr1_el1);
1024 	PRINT_REG(id_mmfr2_el1);
1025 	PRINT_REG(id_mmfr3_el1);
1026 #if 0
1027 	/* Missing from llvm */
1028 	PRINT_REG(id_mmfr4_el1);
1029 #endif
1030 	PRINT_REG(id_pfr0_el1);
1031 	PRINT_REG(id_pfr1_el1);
1032 	PRINT_REG(isr_el1);
1033 	PRINT_REG(mair_el1);
1034 	PRINT_REG(midr_el1);
1035 	PRINT_REG(mpidr_el1);
1036 	PRINT_REG(mvfr0_el1);
1037 	PRINT_REG(mvfr1_el1);
1038 	PRINT_REG(mvfr2_el1);
1039 	PRINT_REG(revidr_el1);
1040 	PRINT_REG(sctlr_el1);
1041 	PRINT_REG(sp_el0);
1042 	PRINT_REG(spsel);
1043 	PRINT_REG(spsr_el1);
1044 	PRINT_REG(tcr_el1);
1045 	PRINT_REG(tpidr_el0);
1046 	PRINT_REG(tpidr_el1);
1047 	PRINT_REG(tpidrro_el0);
1048 	PRINT_REG(ttbr0_el1);
1049 	PRINT_REG(ttbr1_el1);
1050 	PRINT_REG(vbar_el1);
1051 #undef PRINT_REG
1052 }
1053 
DB_SHOW_COMMAND(vtop,db_show_vtop)1054 DB_SHOW_COMMAND(vtop, db_show_vtop)
1055 {
1056 	uint64_t phys;
1057 
1058 	if (have_addr) {
1059 		phys = arm64_address_translate_s1e1r(addr);
1060 		db_printf("EL1 physical address reg (read):  0x%016lx\n", phys);
1061 		phys = arm64_address_translate_s1e1w(addr);
1062 		db_printf("EL1 physical address reg (write): 0x%016lx\n", phys);
1063 		phys = arm64_address_translate_s1e0r(addr);
1064 		db_printf("EL0 physical address reg (read):  0x%016lx\n", phys);
1065 		phys = arm64_address_translate_s1e0w(addr);
1066 		db_printf("EL0 physical address reg (write): 0x%016lx\n", phys);
1067 	} else
1068 		db_printf("show vtop <virt_addr>\n");
1069 }
1070 #endif
1071 
1072 #undef memset
1073 #undef memmove
1074 #undef memcpy
1075 
1076 void	*memset_std(void *buf, int c, size_t len);
1077 void	*memset_mops(void *buf, int c, size_t len);
1078 void    *memmove_std(void * _Nonnull dst, const void * _Nonnull src,
1079 	    size_t len);
1080 void    *memmove_mops(void * _Nonnull dst, const void * _Nonnull src,
1081 	    size_t len);
1082 void    *memcpy_std(void * _Nonnull dst, const void * _Nonnull src,
1083 	    size_t len);
1084 void    *memcpy_mops(void * _Nonnull dst, const void * _Nonnull src,
1085 	    size_t len);
1086 
1087 DEFINE_IFUNC(, void *, memset, (void *, int, size_t))
1088 {
1089 	return ((elf_hwcap2 & HWCAP2_MOPS) != 0 ? memset_mops : memset_std);
1090 }
1091 
1092 DEFINE_IFUNC(, void *, memmove, (void * _Nonnull, const void * _Nonnull,
1093     size_t))
1094 {
1095 	return ((elf_hwcap2 & HWCAP2_MOPS) != 0 ? memmove_mops : memmove_std);
1096 }
1097 
1098 DEFINE_IFUNC(, void *, memcpy, (void * _Nonnull, const void * _Nonnull,
1099     size_t))
1100 {
1101 	return ((elf_hwcap2 & HWCAP2_MOPS) != 0 ? memcpy_mops : memcpy_std);
1102 }
1103