1 /* 2 * Initialize machine setup information 3 * 4 * Copyright (C) 2017, Red Hat Inc, Andrew Jones <drjones@redhat.com> 5 * Copyright (C) 2021, Google Inc, Zixuan Wang <zixuanwang@google.com> 6 * 7 * This work is licensed under the terms of the GNU LGPL, version 2. 8 */ 9 #include "libcflat.h" 10 #include "fwcfg.h" 11 #include "alloc_phys.h" 12 #include "argv.h" 13 #include "desc.h" 14 #include "apic.h" 15 #include "apic-defs.h" 16 #include "asm/setup.h" 17 18 extern char edata; 19 20 struct mbi_bootinfo { 21 u32 flags; 22 u32 mem_lower; 23 u32 mem_upper; 24 u32 boot_device; 25 u32 cmdline; 26 u32 mods_count; 27 u32 mods_addr; 28 u32 reserved[4]; /* 28-43 */ 29 u32 mmap_length; 30 u32 mmap_addr; 31 u32 reserved0[3]; /* 52-63 */ 32 u32 bootloader; 33 u32 reserved1[5]; /* 68-87 */ 34 u32 size; 35 }; 36 37 struct mbi_module { 38 u32 start, end; 39 u32 cmdline; 40 u32 unused; 41 }; 42 43 struct mbi_mem { 44 u32 size; 45 u64 base_addr; 46 u64 length; 47 u32 type; 48 } __attribute__((packed)); 49 50 #define ENV_SIZE 16384 51 52 void setup_env(char *env, int size); 53 void setup_multiboot(struct mbi_bootinfo *bootinfo); 54 void setup_libcflat(void); 55 56 char *initrd; 57 u32 initrd_size; 58 59 static char env[ENV_SIZE]; 60 static struct mbi_bootinfo *bootinfo; 61 62 #define HUGEPAGE_SIZE (1 << 21) 63 64 #ifdef __x86_64__ 65 void find_highmem(void) 66 { 67 /* Memory above 4 GB is only supported on 64-bit systems. */ 68 if (!(bootinfo->flags & 64)) 69 return; 70 71 u64 upper_end = bootinfo->mem_upper * 1024ull; 72 u64 best_start = (uintptr_t) &edata; 73 u64 best_end = upper_end; 74 u64 max_end = fwcfg_get_u64(FW_CFG_MAX_RAM); 75 if (max_end == 0) 76 max_end = -1ull; 77 bool found = false; 78 79 uintptr_t mmap = bootinfo->mmap_addr; 80 while (mmap < bootinfo->mmap_addr + bootinfo->mmap_length) { 81 struct mbi_mem *mem = (void *)mmap; 82 mmap += mem->size + 4; 83 if (mem->type != 1) 84 continue; 85 if (mem->base_addr <= (uintptr_t) &edata || 86 (mem->base_addr <= upper_end && mem->base_addr + mem->length <= upper_end)) 87 continue; 88 if (mem->length < best_end - best_start) 89 continue; 90 if (mem->base_addr >= max_end) 91 continue; 92 best_start = mem->base_addr; 93 best_end = mem->base_addr + mem->length; 94 if (best_end > max_end) 95 best_end = max_end; 96 found = true; 97 } 98 99 if (found) { 100 best_start = (best_start + HUGEPAGE_SIZE - 1) & -HUGEPAGE_SIZE; 101 best_end = best_end & -HUGEPAGE_SIZE; 102 phys_alloc_init(best_start, best_end - best_start); 103 } 104 } 105 106 /* Setup TSS for the current processor, and return TSS offset within GDT */ 107 unsigned long setup_tss(u8 *stacktop) 108 { 109 u32 id; 110 tss64_t *tss_entry; 111 112 id = pre_boot_apic_id(); 113 114 /* Runtime address of current TSS */ 115 tss_entry = &tss[id]; 116 117 /* Update TSS */ 118 memset((void *)tss_entry, 0, sizeof(tss64_t)); 119 120 /* Update TSS descriptors; each descriptor takes up 2 entries */ 121 set_gdt_entry(TSS_MAIN + id * 16, (unsigned long)tss_entry, 0xffff, 0x89, 0); 122 123 return TSS_MAIN + id * 16; 124 } 125 #else 126 /* Setup TSS for the current processor, and return TSS offset within GDT */ 127 unsigned long setup_tss(u8 *stacktop) 128 { 129 u32 id; 130 tss32_t *tss_entry; 131 132 id = pre_boot_apic_id(); 133 134 /* Runtime address of current TSS */ 135 tss_entry = &tss[id]; 136 137 /* Update TSS */ 138 memset((void *)tss_entry, 0, sizeof(tss32_t)); 139 tss_entry->ss0 = KERNEL_DS; 140 141 /* Update descriptors for TSS and percpu data segment. */ 142 set_gdt_entry(TSS_MAIN + id * 8, 143 (unsigned long)tss_entry, 0xffff, 0x89, 0); 144 set_gdt_entry(TSS_MAIN + MAX_TEST_CPUS * 8 + id * 8, 145 (unsigned long)stacktop - 4096, 0xfffff, 0x93, 0xc0); 146 147 return TSS_MAIN + id * 8; 148 } 149 #endif 150 151 void setup_multiboot(struct mbi_bootinfo *bi) 152 { 153 struct mbi_module *mods; 154 155 bootinfo = bi; 156 157 u64 best_start = (uintptr_t) &edata; 158 u64 best_end = bootinfo->mem_upper * 1024ull; 159 phys_alloc_init(best_start, best_end - best_start); 160 161 if (bootinfo->mods_count != 1) 162 return; 163 164 mods = (struct mbi_module *)(uintptr_t) bootinfo->mods_addr; 165 166 initrd = (char *)(uintptr_t) mods->start; 167 initrd_size = mods->end - mods->start; 168 } 169 170 #ifdef CONFIG_EFI 171 172 static struct percpu_data __percpu_data[MAX_TEST_CPUS]; 173 174 static void setup_segments64(void) 175 { 176 /* Update data segments */ 177 write_ds(KERNEL_DS); 178 write_es(KERNEL_DS); 179 write_fs(KERNEL_DS); 180 write_gs(KERNEL_DS); 181 write_ss(KERNEL_DS); 182 183 /* Setup percpu base */ 184 wrmsr(MSR_GS_BASE, (u64)&__percpu_data[pre_boot_apic_id()]); 185 186 /* 187 * Update the code segment by putting it on the stack before the return 188 * address, then doing a far return: this will use the new code segment 189 * along with the address. 190 */ 191 asm volatile("pushq %1\n\t" 192 "lea 1f(%%rip), %0\n\t" 193 "pushq %0\n\t" 194 "lretq\n\t" 195 "1:" 196 :: "r" ((u64)KERNEL_DS), "i" (KERNEL_CS)); 197 } 198 199 static efi_status_t setup_memory_allocator(efi_bootinfo_t *efi_bootinfo) 200 { 201 int i; 202 unsigned long free_mem_pages = 0; 203 unsigned long free_mem_start = 0; 204 struct efi_boot_memmap *map = &(efi_bootinfo->mem_map); 205 efi_memory_desc_t *buffer = *map->map; 206 efi_memory_desc_t *d = NULL; 207 208 /* 209 * The 'buffer' contains multiple descriptors that describe memory 210 * regions maintained by UEFI. This code records the largest free 211 * EFI_CONVENTIONAL_MEMORY region which will be used to set up the 212 * memory allocator, so that the memory allocator can work in the 213 * largest free continuous memory region. 214 */ 215 for (i = 0; i < *(map->map_size); i += *(map->desc_size)) { 216 d = (efi_memory_desc_t *)(&((u8 *)buffer)[i]); 217 if (d->type == EFI_CONVENTIONAL_MEMORY) { 218 if (free_mem_pages < d->num_pages) { 219 free_mem_pages = d->num_pages; 220 free_mem_start = d->phys_addr; 221 } 222 } 223 } 224 225 if (free_mem_pages == 0) { 226 return EFI_OUT_OF_RESOURCES; 227 } 228 229 phys_alloc_init(free_mem_start, free_mem_pages << EFI_PAGE_SHIFT); 230 231 return EFI_SUCCESS; 232 } 233 234 static efi_status_t setup_rsdp(efi_bootinfo_t *efi_bootinfo) 235 { 236 efi_status_t status; 237 struct rsdp_descriptor *rsdp; 238 239 /* 240 * RSDP resides in an EFI_ACPI_RECLAIM_MEMORY region, which is not used 241 * by kvm-unit-tests x86's memory allocator. So it is not necessary to 242 * copy the data structure to another memory region to prevent 243 * unintentional overwrite. 244 */ 245 status = efi_get_system_config_table(ACPI_TABLE_GUID, (void **)&rsdp); 246 if (status != EFI_SUCCESS) { 247 return status; 248 } 249 250 set_efi_rsdp(rsdp); 251 252 return EFI_SUCCESS; 253 } 254 255 /* Defined in cstart64.S or efistart64.S */ 256 extern u8 ptl4; 257 extern u8 ptl3; 258 extern u8 ptl2; 259 260 static void setup_page_table(void) 261 { 262 pgd_t *curr_pt; 263 phys_addr_t flags; 264 int i; 265 266 /* Set default flags */ 267 flags = PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK; 268 269 /* Set AMD SEV C-Bit for page table entries */ 270 flags |= get_amd_sev_c_bit_mask(); 271 272 /* Level 4 */ 273 curr_pt = (pgd_t *)&ptl4; 274 curr_pt[0] = ((phys_addr_t)&ptl3) | flags; 275 /* Level 3 */ 276 curr_pt = (pgd_t *)&ptl3; 277 for (i = 0; i < 4; i++) { 278 curr_pt[i] = (((phys_addr_t)&ptl2) + i * PAGE_SIZE) | flags; 279 } 280 /* Level 2 */ 281 curr_pt = (pgd_t *)&ptl2; 282 flags |= PT_ACCESSED_MASK | PT_DIRTY_MASK | PT_PAGE_SIZE_MASK | PT_GLOBAL_MASK; 283 for (i = 0; i < 4 * 512; i++) { 284 curr_pt[i] = ((phys_addr_t) i << 21) | flags; 285 } 286 287 if (amd_sev_es_enabled()) { 288 setup_ghcb_pte((pgd_t *)&ptl4); 289 } 290 291 /* Load 4-level page table */ 292 write_cr3((ulong)&ptl4); 293 } 294 295 static void setup_gdt_tss(void) 296 { 297 size_t tss_offset; 298 299 /* 64-bit setup_tss does not use the stacktop argument. */ 300 tss_offset = setup_tss(NULL); 301 load_gdt_tss(tss_offset); 302 } 303 304 efi_status_t setup_efi(efi_bootinfo_t *efi_bootinfo) 305 { 306 efi_status_t status; 307 const char *phase; 308 309 status = setup_memory_allocator(efi_bootinfo); 310 if (status != EFI_SUCCESS) { 311 printf("Failed to set up memory allocator: "); 312 switch (status) { 313 case EFI_OUT_OF_RESOURCES: 314 printf("No free memory region\n"); 315 break; 316 default: 317 printf("Unknown error\n"); 318 break; 319 } 320 return status; 321 } 322 323 status = setup_rsdp(efi_bootinfo); 324 if (status != EFI_SUCCESS) { 325 printf("Cannot find RSDP in EFI system table\n"); 326 return status; 327 } 328 329 phase = "AMD SEV"; 330 status = setup_amd_sev(); 331 332 /* Continue if AMD SEV is not supported, but skip SEV-ES setup */ 333 if (status == EFI_SUCCESS) { 334 phase = "AMD SEV-ES"; 335 status = setup_amd_sev_es(); 336 } 337 338 if (status != EFI_SUCCESS && status != EFI_UNSUPPORTED) { 339 printf("%s setup failed, error = 0x%lx\n", phase, status); 340 return status; 341 } 342 343 setup_gdt_tss(); 344 /* 345 * GS.base, which points at the per-vCPU data, must be configured prior 346 * to resetting the APIC, which sets the per-vCPU APIC ops. 347 */ 348 setup_segments64(); 349 reset_apic(); 350 setup_idt(); 351 load_idt(); 352 mask_pic_interrupts(); 353 setup_page_table(); 354 enable_apic(); 355 ap_init(); 356 enable_x2apic(); 357 smp_init(); 358 359 return EFI_SUCCESS; 360 } 361 362 #endif /* CONFIG_EFI */ 363 364 void setup_libcflat(void) 365 { 366 if (initrd) { 367 /* environ is currently the only file in the initrd */ 368 u32 size = MIN(initrd_size, ENV_SIZE); 369 const char *str; 370 371 memcpy(env, initrd, size); 372 setup_env(env, size); 373 if ((str = getenv("BOOTLOADER")) && atol(str) != 0) 374 add_setup_arg("bootloader"); 375 } 376 } 377