193dd2aa3SAndrew Jones /* 293dd2aa3SAndrew Jones * Initialize machine setup information 393dd2aa3SAndrew Jones * 493dd2aa3SAndrew Jones * Copyright (C) 2017, Red Hat Inc, Andrew Jones <drjones@redhat.com> 5dbd38004SZixuan Wang * Copyright (C) 2021, Google Inc, Zixuan Wang <zixuanwang@google.com> 693dd2aa3SAndrew Jones * 793dd2aa3SAndrew Jones * This work is licensed under the terms of the GNU LGPL, version 2. 893dd2aa3SAndrew Jones */ 993dd2aa3SAndrew Jones #include "libcflat.h" 10716cea8aSPaolo Bonzini #include "fwcfg.h" 11716cea8aSPaolo Bonzini #include "alloc_phys.h" 1203b1e457SNadav Amit #include "argv.h" 13dbd38004SZixuan Wang #include "desc.h" 14dbd38004SZixuan Wang #include "apic.h" 15dbd38004SZixuan Wang #include "apic-defs.h" 16dbd38004SZixuan Wang #include "asm/setup.h" 1793dd2aa3SAndrew Jones 18716cea8aSPaolo Bonzini extern char edata; 19716cea8aSPaolo Bonzini 20716cea8aSPaolo Bonzini struct mbi_bootinfo { 21716cea8aSPaolo Bonzini u32 flags; 22716cea8aSPaolo Bonzini u32 mem_lower; 23716cea8aSPaolo Bonzini u32 mem_upper; 24716cea8aSPaolo Bonzini u32 boot_device; 25716cea8aSPaolo Bonzini u32 cmdline; 26716cea8aSPaolo Bonzini u32 mods_count; 27716cea8aSPaolo Bonzini u32 mods_addr; 2848a0145fSPaolo Bonzini u32 reserved[4]; /* 28-43 */ 2948a0145fSPaolo Bonzini u32 mmap_length; 30716cea8aSPaolo Bonzini u32 mmap_addr; 31716cea8aSPaolo Bonzini u32 reserved0[3]; /* 52-63 */ 32716cea8aSPaolo Bonzini u32 bootloader; 33716cea8aSPaolo Bonzini u32 reserved1[5]; /* 68-87 */ 34716cea8aSPaolo Bonzini u32 size; 35716cea8aSPaolo Bonzini }; 36716cea8aSPaolo Bonzini 37716cea8aSPaolo Bonzini struct mbi_module { 38716cea8aSPaolo Bonzini u32 start, end; 39716cea8aSPaolo Bonzini u32 cmdline; 40716cea8aSPaolo Bonzini u32 unused; 41716cea8aSPaolo Bonzini }; 4293dd2aa3SAndrew Jones 4348a0145fSPaolo Bonzini struct mbi_mem { 4448a0145fSPaolo Bonzini u32 size; 4548a0145fSPaolo Bonzini u64 base_addr; 4648a0145fSPaolo Bonzini u64 length; 4748a0145fSPaolo Bonzini u32 type; 4848a0145fSPaolo Bonzini } __attribute__((packed)); 4948a0145fSPaolo Bonzini 503c7d322eSAndrew Jones #define ENV_SIZE 16384 513c7d322eSAndrew Jones 5206846df5SThomas Huth void setup_env(char *env, int size); 5306846df5SThomas Huth void setup_multiboot(struct mbi_bootinfo *bootinfo); 5406846df5SThomas Huth void setup_libcflat(void); 553c7d322eSAndrew Jones 5693dd2aa3SAndrew Jones char *initrd; 5793dd2aa3SAndrew Jones u32 initrd_size; 5893dd2aa3SAndrew Jones 593c7d322eSAndrew Jones static char env[ENV_SIZE]; 6048a0145fSPaolo Bonzini static struct mbi_bootinfo *bootinfo; 613c7d322eSAndrew Jones 6248a0145fSPaolo Bonzini #define HUGEPAGE_SIZE (1 << 21) 6348a0145fSPaolo Bonzini 6448a0145fSPaolo Bonzini #ifdef __x86_64__ 6548a0145fSPaolo Bonzini void find_highmem(void) 6648a0145fSPaolo Bonzini { 6748a0145fSPaolo Bonzini /* Memory above 4 GB is only supported on 64-bit systems. */ 6848a0145fSPaolo Bonzini if (!(bootinfo->flags & 64)) 6948a0145fSPaolo Bonzini return; 7048a0145fSPaolo Bonzini 7148a0145fSPaolo Bonzini u64 upper_end = bootinfo->mem_upper * 1024ull; 7248a0145fSPaolo Bonzini u64 best_start = (uintptr_t) &edata; 7348a0145fSPaolo Bonzini u64 best_end = upper_end; 74eb2db85dSNadav Amit u64 max_end = fwcfg_get_u64(FW_CFG_MAX_RAM); 75eb2db85dSNadav Amit if (max_end == 0) 76eb2db85dSNadav Amit max_end = -1ull; 7748a0145fSPaolo Bonzini bool found = false; 7848a0145fSPaolo Bonzini 7948a0145fSPaolo Bonzini uintptr_t mmap = bootinfo->mmap_addr; 8048a0145fSPaolo Bonzini while (mmap < bootinfo->mmap_addr + bootinfo->mmap_length) { 8148a0145fSPaolo Bonzini struct mbi_mem *mem = (void *)mmap; 8248a0145fSPaolo Bonzini mmap += mem->size + 4; 8348a0145fSPaolo Bonzini if (mem->type != 1) 8448a0145fSPaolo Bonzini continue; 8548a0145fSPaolo Bonzini if (mem->base_addr <= (uintptr_t) &edata || 8648a0145fSPaolo Bonzini (mem->base_addr <= upper_end && mem->base_addr + mem->length <= upper_end)) 8748a0145fSPaolo Bonzini continue; 8848a0145fSPaolo Bonzini if (mem->length < best_end - best_start) 8948a0145fSPaolo Bonzini continue; 90eb2db85dSNadav Amit if (mem->base_addr >= max_end) 91eb2db85dSNadav Amit continue; 9248a0145fSPaolo Bonzini best_start = mem->base_addr; 9348a0145fSPaolo Bonzini best_end = mem->base_addr + mem->length; 94eb2db85dSNadav Amit if (best_end > max_end) 95eb2db85dSNadav Amit best_end = max_end; 9648a0145fSPaolo Bonzini found = true; 9748a0145fSPaolo Bonzini } 9848a0145fSPaolo Bonzini 9948a0145fSPaolo Bonzini if (found) { 10048a0145fSPaolo Bonzini best_start = (best_start + HUGEPAGE_SIZE - 1) & -HUGEPAGE_SIZE; 10148a0145fSPaolo Bonzini best_end = best_end & -HUGEPAGE_SIZE; 10248a0145fSPaolo Bonzini phys_alloc_init(best_start, best_end - best_start); 10348a0145fSPaolo Bonzini } 10448a0145fSPaolo Bonzini } 105dbd38004SZixuan Wang 106dbd38004SZixuan Wang /* Setup TSS for the current processor, and return TSS offset within GDT */ 1077e33895dSPaolo Bonzini unsigned long setup_tss(u8 *stacktop) 108dbd38004SZixuan Wang { 109dbd38004SZixuan Wang u32 id; 110dbd38004SZixuan Wang tss64_t *tss_entry; 111dbd38004SZixuan Wang 112*d8de5a33SSean Christopherson id = pre_boot_apic_id(); 113dbd38004SZixuan Wang 114dbd38004SZixuan Wang /* Runtime address of current TSS */ 115dbd38004SZixuan Wang tss_entry = &tss[id]; 116dbd38004SZixuan Wang 117dbd38004SZixuan Wang /* Update TSS */ 118dbd38004SZixuan Wang memset((void *)tss_entry, 0, sizeof(tss64_t)); 119dbd38004SZixuan Wang 120dbd38004SZixuan Wang /* Update TSS descriptors; each descriptor takes up 2 entries */ 121dbd38004SZixuan Wang set_gdt_entry(TSS_MAIN + id * 16, (unsigned long)tss_entry, 0xffff, 0x89, 0); 122dbd38004SZixuan Wang 123dbd38004SZixuan Wang return TSS_MAIN + id * 16; 124dbd38004SZixuan Wang } 1257e33895dSPaolo Bonzini #else 1267e33895dSPaolo Bonzini /* Setup TSS for the current processor, and return TSS offset within GDT */ 1277e33895dSPaolo Bonzini unsigned long setup_tss(u8 *stacktop) 1287e33895dSPaolo Bonzini { 1297e33895dSPaolo Bonzini u32 id; 1307e33895dSPaolo Bonzini tss32_t *tss_entry; 1317e33895dSPaolo Bonzini 132*d8de5a33SSean Christopherson id = pre_boot_apic_id(); 1337e33895dSPaolo Bonzini 1347e33895dSPaolo Bonzini /* Runtime address of current TSS */ 1357e33895dSPaolo Bonzini tss_entry = &tss[id]; 1367e33895dSPaolo Bonzini 1377e33895dSPaolo Bonzini /* Update TSS */ 1387e33895dSPaolo Bonzini memset((void *)tss_entry, 0, sizeof(tss32_t)); 1397e33895dSPaolo Bonzini tss_entry->ss0 = KERNEL_DS; 1407e33895dSPaolo Bonzini 1417e33895dSPaolo Bonzini /* Update descriptors for TSS and percpu data segment. */ 1427e33895dSPaolo Bonzini set_gdt_entry(TSS_MAIN + id * 8, 1437e33895dSPaolo Bonzini (unsigned long)tss_entry, 0xffff, 0x89, 0); 1447e33895dSPaolo Bonzini set_gdt_entry(TSS_MAIN + MAX_TEST_CPUS * 8 + id * 8, 1457e33895dSPaolo Bonzini (unsigned long)stacktop - 4096, 0xfffff, 0x93, 0xc0); 1467e33895dSPaolo Bonzini 1477e33895dSPaolo Bonzini return TSS_MAIN + id * 8; 1487e33895dSPaolo Bonzini } 14948a0145fSPaolo Bonzini #endif 15048a0145fSPaolo Bonzini 15148a0145fSPaolo Bonzini void setup_multiboot(struct mbi_bootinfo *bi) 15293dd2aa3SAndrew Jones { 153716cea8aSPaolo Bonzini struct mbi_module *mods; 15493dd2aa3SAndrew Jones 15548a0145fSPaolo Bonzini bootinfo = bi; 15648a0145fSPaolo Bonzini 15748a0145fSPaolo Bonzini u64 best_start = (uintptr_t) &edata; 15848a0145fSPaolo Bonzini u64 best_end = bootinfo->mem_upper * 1024ull; 15948a0145fSPaolo Bonzini phys_alloc_init(best_start, best_end - best_start); 160cb67196aSPaolo Bonzini 161716cea8aSPaolo Bonzini if (bootinfo->mods_count != 1) 16293dd2aa3SAndrew Jones return; 16393dd2aa3SAndrew Jones 164716cea8aSPaolo Bonzini mods = (struct mbi_module *)(uintptr_t) bootinfo->mods_addr; 16593dd2aa3SAndrew Jones 166716cea8aSPaolo Bonzini initrd = (char *)(uintptr_t) mods->start; 167716cea8aSPaolo Bonzini initrd_size = mods->end - mods->start; 16893dd2aa3SAndrew Jones } 1693c7d322eSAndrew Jones 170c98ce6e0SAlexandru Elisei #ifdef CONFIG_EFI 171ad5fb883SZixuan Wang 1724143d8a7SZixuan Wang /* From x86/efi/efistart64.S */ 1734143d8a7SZixuan Wang extern void load_idt(void); 1743298643cSZixuan Wang extern void load_gdt_tss(size_t tss_offset); 1753298643cSZixuan Wang 176b4e8c300SZixuan Wang static efi_status_t setup_memory_allocator(efi_bootinfo_t *efi_bootinfo) 1771ae9072eSZixuan Wang { 1781ae9072eSZixuan Wang int i; 179b4e8c300SZixuan Wang unsigned long free_mem_pages = 0; 180b4e8c300SZixuan Wang unsigned long free_mem_start = 0; 181b4e8c300SZixuan Wang struct efi_boot_memmap *map = &(efi_bootinfo->mem_map); 182b4e8c300SZixuan Wang efi_memory_desc_t *buffer = *map->map; 183b4e8c300SZixuan Wang efi_memory_desc_t *d = NULL; 1841ae9072eSZixuan Wang 1851ae9072eSZixuan Wang /* 1861ae9072eSZixuan Wang * The 'buffer' contains multiple descriptors that describe memory 1871ae9072eSZixuan Wang * regions maintained by UEFI. This code records the largest free 1881ae9072eSZixuan Wang * EFI_CONVENTIONAL_MEMORY region which will be used to set up the 1891ae9072eSZixuan Wang * memory allocator, so that the memory allocator can work in the 1901ae9072eSZixuan Wang * largest free continuous memory region. 1911ae9072eSZixuan Wang */ 192b4e8c300SZixuan Wang for (i = 0; i < *(map->map_size); i += *(map->desc_size)) { 1931ae9072eSZixuan Wang d = (efi_memory_desc_t *)(&((u8 *)buffer)[i]); 1941ae9072eSZixuan Wang if (d->type == EFI_CONVENTIONAL_MEMORY) { 195b4e8c300SZixuan Wang if (free_mem_pages < d->num_pages) { 196b4e8c300SZixuan Wang free_mem_pages = d->num_pages; 197b4e8c300SZixuan Wang free_mem_start = d->phys_addr; 1981ae9072eSZixuan Wang } 1991ae9072eSZixuan Wang } 2001ae9072eSZixuan Wang } 2011ae9072eSZixuan Wang 202b4e8c300SZixuan Wang if (free_mem_pages == 0) { 2031ae9072eSZixuan Wang return EFI_OUT_OF_RESOURCES; 2041ae9072eSZixuan Wang } 2051ae9072eSZixuan Wang 206b4e8c300SZixuan Wang phys_alloc_init(free_mem_start, free_mem_pages << EFI_PAGE_SHIFT); 207b4e8c300SZixuan Wang 2081ae9072eSZixuan Wang return EFI_SUCCESS; 2091ae9072eSZixuan Wang } 2101ae9072eSZixuan Wang 211b4e8c300SZixuan Wang static efi_status_t setup_rsdp(efi_bootinfo_t *efi_bootinfo) 2121ae9072eSZixuan Wang { 2131ae9072eSZixuan Wang efi_status_t status; 214b4e8c300SZixuan Wang struct rsdp_descriptor *rsdp; 2151ae9072eSZixuan Wang 216b4e8c300SZixuan Wang /* 217b4e8c300SZixuan Wang * RSDP resides in an EFI_ACPI_RECLAIM_MEMORY region, which is not used 218b4e8c300SZixuan Wang * by kvm-unit-tests x86's memory allocator. So it is not necessary to 219b4e8c300SZixuan Wang * copy the data structure to another memory region to prevent 220b4e8c300SZixuan Wang * unintentional overwrite. 221b4e8c300SZixuan Wang */ 222b4e8c300SZixuan Wang status = efi_get_system_config_table(ACPI_TABLE_GUID, (void **)&rsdp); 2231ae9072eSZixuan Wang if (status != EFI_SUCCESS) { 2241ae9072eSZixuan Wang return status; 2251ae9072eSZixuan Wang } 2261ae9072eSZixuan Wang 227b4e8c300SZixuan Wang set_efi_rsdp(rsdp); 228706ede18SZixuan Wang 2291ae9072eSZixuan Wang return EFI_SUCCESS; 2301ae9072eSZixuan Wang } 2311ae9072eSZixuan Wang 232e6f65fa4SZixuan Wang /* Defined in cstart64.S or efistart64.S */ 233e6f65fa4SZixuan Wang extern u8 ptl4; 234e6f65fa4SZixuan Wang extern u8 ptl3; 235e6f65fa4SZixuan Wang extern u8 ptl2; 236e6f65fa4SZixuan Wang 237e6f65fa4SZixuan Wang static void setup_page_table(void) 238e6f65fa4SZixuan Wang { 239e6f65fa4SZixuan Wang pgd_t *curr_pt; 240e6f65fa4SZixuan Wang phys_addr_t flags; 241e6f65fa4SZixuan Wang int i; 242e6f65fa4SZixuan Wang 243e6f65fa4SZixuan Wang /* Set default flags */ 244e6f65fa4SZixuan Wang flags = PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK; 245e6f65fa4SZixuan Wang 246350bf64aSZixuan Wang /* Set AMD SEV C-Bit for page table entries */ 247350bf64aSZixuan Wang flags |= get_amd_sev_c_bit_mask(); 248350bf64aSZixuan Wang 249e6f65fa4SZixuan Wang /* Level 4 */ 250e6f65fa4SZixuan Wang curr_pt = (pgd_t *)&ptl4; 251e6f65fa4SZixuan Wang curr_pt[0] = ((phys_addr_t)&ptl3) | flags; 252e6f65fa4SZixuan Wang /* Level 3 */ 253e6f65fa4SZixuan Wang curr_pt = (pgd_t *)&ptl3; 254e6f65fa4SZixuan Wang for (i = 0; i < 4; i++) { 255e6f65fa4SZixuan Wang curr_pt[i] = (((phys_addr_t)&ptl2) + i * PAGE_SIZE) | flags; 256e6f65fa4SZixuan Wang } 257e6f65fa4SZixuan Wang /* Level 2 */ 258e6f65fa4SZixuan Wang curr_pt = (pgd_t *)&ptl2; 259e6f65fa4SZixuan Wang flags |= PT_ACCESSED_MASK | PT_DIRTY_MASK | PT_PAGE_SIZE_MASK | PT_GLOBAL_MASK; 260e6f65fa4SZixuan Wang for (i = 0; i < 4 * 512; i++) { 261832e1c15SVarad Gautam curr_pt[i] = ((phys_addr_t) i << 21) | flags; 262e6f65fa4SZixuan Wang } 263e6f65fa4SZixuan Wang 264b114aa57SZixuan Wang if (amd_sev_es_enabled()) { 265b114aa57SZixuan Wang setup_ghcb_pte((pgd_t *)&ptl4); 266b114aa57SZixuan Wang } 267b114aa57SZixuan Wang 268e6f65fa4SZixuan Wang /* Load 4-level page table */ 269e6f65fa4SZixuan Wang write_cr3((ulong)&ptl4); 270e6f65fa4SZixuan Wang } 271e6f65fa4SZixuan Wang 2723298643cSZixuan Wang static void setup_gdt_tss(void) 2733298643cSZixuan Wang { 2743298643cSZixuan Wang size_t tss_offset; 2753298643cSZixuan Wang 2763298643cSZixuan Wang /* 64-bit setup_tss does not use the stacktop argument. */ 2773298643cSZixuan Wang tss_offset = setup_tss(NULL); 2783298643cSZixuan Wang load_gdt_tss(tss_offset); 2793298643cSZixuan Wang } 2804143d8a7SZixuan Wang 281b4e8c300SZixuan Wang efi_status_t setup_efi(efi_bootinfo_t *efi_bootinfo) 282ad5fb883SZixuan Wang { 283b4e8c300SZixuan Wang efi_status_t status; 2848238fdcaSPaolo Bonzini const char *phase; 285b4e8c300SZixuan Wang 286b4e8c300SZixuan Wang status = setup_memory_allocator(efi_bootinfo); 287b4e8c300SZixuan Wang if (status != EFI_SUCCESS) { 288b4e8c300SZixuan Wang printf("Failed to set up memory allocator: "); 289b4e8c300SZixuan Wang switch (status) { 290b4e8c300SZixuan Wang case EFI_OUT_OF_RESOURCES: 291b4e8c300SZixuan Wang printf("No free memory region\n"); 292b4e8c300SZixuan Wang break; 293b4e8c300SZixuan Wang default: 294b4e8c300SZixuan Wang printf("Unknown error\n"); 295b4e8c300SZixuan Wang break; 296b4e8c300SZixuan Wang } 297b4e8c300SZixuan Wang return status; 298b4e8c300SZixuan Wang } 299b4e8c300SZixuan Wang 300b4e8c300SZixuan Wang status = setup_rsdp(efi_bootinfo); 301b4e8c300SZixuan Wang if (status != EFI_SUCCESS) { 302b4e8c300SZixuan Wang printf("Cannot find RSDP in EFI system table\n"); 303b4e8c300SZixuan Wang return status; 304b4e8c300SZixuan Wang } 305b4e8c300SZixuan Wang 3068238fdcaSPaolo Bonzini phase = "AMD SEV"; 307b4e8c300SZixuan Wang status = setup_amd_sev(); 3088238fdcaSPaolo Bonzini 3098238fdcaSPaolo Bonzini /* Continue if AMD SEV is not supported, but skip SEV-ES setup */ 3108238fdcaSPaolo Bonzini if (status == EFI_SUCCESS) { 3118238fdcaSPaolo Bonzini phase = "AMD SEV-ES"; 3128238fdcaSPaolo Bonzini status = setup_amd_sev_es(); 313b4e8c300SZixuan Wang } 314b4e8c300SZixuan Wang 3158238fdcaSPaolo Bonzini if (status != EFI_SUCCESS && status != EFI_UNSUPPORTED) { 3168238fdcaSPaolo Bonzini printf("%s setup failed, error = 0x%lx\n", phase, status); 317b4e8c300SZixuan Wang return status; 318b4e8c300SZixuan Wang } 319b4e8c300SZixuan Wang 320ad5fb883SZixuan Wang reset_apic(); 3213298643cSZixuan Wang setup_gdt_tss(); 3224143d8a7SZixuan Wang setup_idt(); 3234143d8a7SZixuan Wang load_idt(); 324ad5fb883SZixuan Wang mask_pic_interrupts(); 325ad5fb883SZixuan Wang enable_apic(); 326ad5fb883SZixuan Wang enable_x2apic(); 327ad5fb883SZixuan Wang smp_init(); 328e6f65fa4SZixuan Wang setup_page_table(); 329b4e8c300SZixuan Wang 330b4e8c300SZixuan Wang return EFI_SUCCESS; 331ad5fb883SZixuan Wang } 332ad5fb883SZixuan Wang 333c98ce6e0SAlexandru Elisei #endif /* CONFIG_EFI */ 334ad5fb883SZixuan Wang 335716cea8aSPaolo Bonzini void setup_libcflat(void) 3363c7d322eSAndrew Jones { 3373c7d322eSAndrew Jones if (initrd) { 3383c7d322eSAndrew Jones /* environ is currently the only file in the initrd */ 3393c7d322eSAndrew Jones u32 size = MIN(initrd_size, ENV_SIZE); 34003b1e457SNadav Amit const char *str; 34103b1e457SNadav Amit 3423c7d322eSAndrew Jones memcpy(env, initrd, size); 3433c7d322eSAndrew Jones setup_env(env, size); 34403b1e457SNadav Amit if ((str = getenv("BOOTLOADER")) && atol(str) != 0) 34503b1e457SNadav Amit add_setup_arg("bootloader"); 3463c7d322eSAndrew Jones } 3473c7d322eSAndrew Jones } 348