/* * Initialize machine setup information * * Copyright (C) 2017, Red Hat Inc, Andrew Jones * Copyright (C) 2021, Google Inc, Zixuan Wang * * This work is licensed under the terms of the GNU LGPL, version 2. */ #include "libcflat.h" #include "fwcfg.h" #include "alloc_phys.h" #include "argv.h" #include "desc.h" #include "apic.h" #include "apic-defs.h" #include "asm/setup.h" #include "atomic.h" #include "pmu.h" #include "processor.h" #include "smp.h" extern char edata; struct mbi_bootinfo { u32 flags; u32 mem_lower; u32 mem_upper; u32 boot_device; u32 cmdline; u32 mods_count; u32 mods_addr; u32 reserved[4]; /* 28-43 */ u32 mmap_length; u32 mmap_addr; u32 reserved0[3]; /* 52-63 */ u32 bootloader; u32 reserved1[5]; /* 68-87 */ u32 size; }; struct mbi_module { u32 start, end; u32 cmdline; u32 unused; }; struct mbi_mem { u32 size; u64 base_addr; u64 length; u32 type; } __attribute__((packed)); #define ENV_SIZE 16384 void setup_env(char *env, int size); void setup_multiboot(struct mbi_bootinfo *bootinfo); void setup_libcflat(void); char *initrd; u32 initrd_size; static char env[ENV_SIZE]; static struct mbi_bootinfo *bootinfo; #define HUGEPAGE_SIZE (1 << 21) #ifdef __x86_64__ void find_highmem(void) { /* Memory above 4 GB is only supported on 64-bit systems. */ if (!(bootinfo->flags & 64)) return; u64 upper_end = bootinfo->mem_upper * 1024ull; u64 best_start = (uintptr_t) &edata; u64 best_end = upper_end; u64 max_end = fwcfg_get_u64(FW_CFG_MAX_RAM); if (max_end == 0) max_end = -1ull; bool found = false; uintptr_t mmap = bootinfo->mmap_addr; while (mmap < bootinfo->mmap_addr + bootinfo->mmap_length) { struct mbi_mem *mem = (void *)mmap; mmap += mem->size + 4; if (mem->type != 1) continue; if (mem->base_addr <= (uintptr_t) &edata || (mem->base_addr <= upper_end && mem->base_addr + mem->length <= upper_end)) continue; if (mem->length < best_end - best_start) continue; if (mem->base_addr >= max_end) continue; best_start = mem->base_addr; best_end = mem->base_addr + mem->length; if (best_end > max_end) best_end = max_end; found = true; } if (found) { best_start = (best_start + HUGEPAGE_SIZE - 1) & -HUGEPAGE_SIZE; best_end = best_end & -HUGEPAGE_SIZE; phys_alloc_init(best_start, best_end - best_start); } } /* Setup TSS for the current processor, and return TSS offset within GDT */ unsigned long setup_tss(u8 *stacktop) { u32 id; tss64_t *tss_entry; id = pre_boot_apic_id(); /* Runtime address of current TSS */ tss_entry = &tss[id]; /* Update TSS */ memset((void *)tss_entry, 0, sizeof(tss64_t)); /* Update TSS descriptors; each descriptor takes up 2 entries */ set_gdt_entry(TSS_MAIN + id * 16, (unsigned long)tss_entry, 0xffff, 0x89, 0); return TSS_MAIN + id * 16; } #else /* Setup TSS for the current processor, and return TSS offset within GDT */ unsigned long setup_tss(u8 *stacktop) { u32 id; tss32_t *tss_entry; id = pre_boot_apic_id(); /* Runtime address of current TSS */ tss_entry = &tss[id]; /* Update TSS */ memset((void *)tss_entry, 0, sizeof(tss32_t)); tss_entry->ss0 = KERNEL_DS; /* Update descriptors for TSS and percpu data segment. */ set_gdt_entry(TSS_MAIN + id * 8, (unsigned long)tss_entry, 0xffff, 0x89, 0); set_gdt_entry(TSS_MAIN + MAX_TEST_CPUS * 8 + id * 8, (unsigned long)stacktop - PER_CPU_SIZE, 0xfffff, 0x93, 0xc0); return TSS_MAIN + id * 8; } #endif void setup_multiboot(struct mbi_bootinfo *bi) { struct mbi_module *mods; bootinfo = bi; u64 best_start = (uintptr_t) &edata; u64 best_end = bootinfo->mem_upper * 1024ull; phys_alloc_init(best_start, best_end - best_start); if (bootinfo->mods_count != 1) return; mods = (struct mbi_module *)(uintptr_t) bootinfo->mods_addr; initrd = (char *)(uintptr_t) mods->start; initrd_size = mods->end - mods->start; } static void setup_gdt_tss(void) { size_t tss_offset; /* 64-bit setup_tss does not use the stacktop argument. */ tss_offset = setup_tss(NULL); load_gdt_tss(tss_offset); } #ifdef CONFIG_EFI static struct percpu_data __percpu_data[MAX_TEST_CPUS]; static void setup_segments64(void) { /* Update data segments */ write_ds(KERNEL_DS); write_es(KERNEL_DS); write_fs(KERNEL_DS); write_gs(KERNEL_DS); write_ss(KERNEL_DS); /* * Update the code segment by putting it on the stack before the return * address, then doing a far return: this will use the new code segment * along with the address. */ asm volatile("pushq %1\n\t" "lea 1f(%%rip), %0\n\t" "pushq %0\n\t" "lretq\n\t" "1:" :: "r" ((u64)KERNEL_DS), "i" (KERNEL_CS)); } static efi_status_t setup_memory_allocator(efi_bootinfo_t *efi_bootinfo) { int i; unsigned long free_mem_pages = 0; unsigned long free_mem_start = 0; struct efi_boot_memmap *map = &(efi_bootinfo->mem_map); efi_memory_desc_t *buffer = *map->map; efi_memory_desc_t *d = NULL; /* * The 'buffer' contains multiple descriptors that describe memory * regions maintained by UEFI. This code records the largest free * EFI_CONVENTIONAL_MEMORY region which will be used to set up the * memory allocator, so that the memory allocator can work in the * largest free continuous memory region. */ for (i = 0; i < *(map->map_size); i += *(map->desc_size)) { d = (efi_memory_desc_t *)(&((u8 *)buffer)[i]); if (d->type == EFI_CONVENTIONAL_MEMORY) { if (free_mem_pages < d->num_pages) { free_mem_pages = d->num_pages; free_mem_start = d->phys_addr; } } } if (free_mem_pages == 0) { return EFI_OUT_OF_RESOURCES; } phys_alloc_init(free_mem_start, free_mem_pages << EFI_PAGE_SHIFT); return EFI_SUCCESS; } static efi_status_t setup_rsdp(efi_bootinfo_t *efi_bootinfo) { efi_status_t status; struct acpi_table_rsdp *rsdp; /* * RSDP resides in an EFI_ACPI_RECLAIM_MEMORY region, which is not used * by kvm-unit-tests x86's memory allocator. So it is not necessary to * copy the data structure to another memory region to prevent * unintentional overwrite. */ status = efi_get_system_config_table(ACPI_TABLE_GUID, (void **)&rsdp); if (status != EFI_SUCCESS) { return status; } set_efi_rsdp(rsdp); return EFI_SUCCESS; } /* Defined in cstart64.S or efistart64.S */ extern u8 ptl4; extern u8 ptl3; extern u8 ptl2; static void setup_page_table(void) { pgd_t *curr_pt; phys_addr_t flags; int i; /* Set default flags */ flags = PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK; /* Set AMD SEV C-Bit for page table entries */ flags |= get_amd_sev_c_bit_mask(); /* Level 4 */ curr_pt = (pgd_t *)&ptl4; curr_pt[0] = ((phys_addr_t)&ptl3) | flags; /* Level 3 */ curr_pt = (pgd_t *)&ptl3; for (i = 0; i < 4; i++) { curr_pt[i] = (((phys_addr_t)&ptl2) + i * PAGE_SIZE) | flags; } /* Level 2 */ curr_pt = (pgd_t *)&ptl2; flags |= PT_ACCESSED_MASK | PT_DIRTY_MASK | PT_PAGE_SIZE_MASK | PT_GLOBAL_MASK; for (i = 0; i < 4 * 512; i++) { curr_pt[i] = ((phys_addr_t) i << 21) | flags; } if (amd_sev_es_enabled()) { setup_ghcb_pte((pgd_t *)&ptl4); } /* Load 4-level page table */ write_cr3((ulong)&ptl4); } efi_status_t setup_efi(efi_bootinfo_t *efi_bootinfo) { efi_status_t status; const char *phase; status = setup_memory_allocator(efi_bootinfo); if (status != EFI_SUCCESS) { printf("Failed to set up memory allocator: "); switch (status) { case EFI_OUT_OF_RESOURCES: printf("No free memory region\n"); break; default: printf("Unknown error\n"); break; } return status; } status = setup_rsdp(efi_bootinfo); if (status != EFI_SUCCESS) { printf("Cannot find RSDP in EFI system table\n"); return status; } phase = "AMD SEV"; status = setup_amd_sev(); /* Continue if AMD SEV is not supported, but skip SEV-ES setup */ if (status == EFI_SUCCESS) { phase = "AMD SEV-ES"; status = setup_amd_sev_es(); } if (status != EFI_SUCCESS && status != EFI_UNSUPPORTED) { printf("%s setup failed, error = 0x%lx\n", phase, status); return status; } setup_gdt_tss(); setup_segments64(); setup_idt(); load_idt(); /* * Load GS.base with the per-vCPU data. This must be done after * loading the IDT as reading the APIC ID may #VC when running * as an SEV-ES guest */ wrmsr(MSR_GS_BASE, (u64)&__percpu_data[pre_boot_apic_id()]); /* * Resetting the APIC sets the per-vCPU APIC ops and so must be * done after loading GS.base with the per-vCPU data. */ reset_apic(); mask_pic_interrupts(); setup_page_table(); enable_apic(); save_id(); bsp_rest_init(); return EFI_SUCCESS; } #endif /* CONFIG_EFI */ void setup_libcflat(void) { if (initrd) { /* environ is currently the only file in the initrd */ u32 size = MIN(initrd_size, ENV_SIZE); const char *str; memcpy(env, initrd, size); setup_env(env, size); if ((str = getenv("BOOTLOADER")) && atol(str) != 0) add_setup_arg("bootloader"); } } void save_id(void) { set_bit(apic_id(), online_cpus); } void ap_start64(void) { setup_gdt_tss(); reset_apic(); load_idt(); save_id(); enable_apic(); enable_x2apic(); ap_online(); } void bsp_rest_init(void) { bringup_aps(); enable_x2apic(); smp_init(); pmu_init(); }