1 /* 2 * Initialize machine setup information 3 * 4 * Copyright (C) 2017, Red Hat Inc, Andrew Jones <drjones@redhat.com> 5 * Copyright (C) 2021, Google Inc, Zixuan Wang <zixuanwang@google.com> 6 * 7 * This work is licensed under the terms of the GNU LGPL, version 2. 8 */ 9 #include "libcflat.h" 10 #include "fwcfg.h" 11 #include "alloc_phys.h" 12 #include "argv.h" 13 #include "desc.h" 14 #include "apic.h" 15 #include "apic-defs.h" 16 #include "asm/setup.h" 17 #include "atomic.h" 18 #include "pmu.h" 19 #include "processor.h" 20 #include "smp.h" 21 22 extern char edata; 23 24 struct mbi_bootinfo { 25 u32 flags; 26 u32 mem_lower; 27 u32 mem_upper; 28 u32 boot_device; 29 u32 cmdline; 30 u32 mods_count; 31 u32 mods_addr; 32 u32 reserved[4]; /* 28-43 */ 33 u32 mmap_length; 34 u32 mmap_addr; 35 u32 reserved0[3]; /* 52-63 */ 36 u32 bootloader; 37 u32 reserved1[5]; /* 68-87 */ 38 u32 size; 39 }; 40 41 struct mbi_module { 42 u32 start, end; 43 u32 cmdline; 44 u32 unused; 45 }; 46 47 struct mbi_mem { 48 u32 size; 49 u64 base_addr; 50 u64 length; 51 u32 type; 52 } __attribute__((packed)); 53 54 #define ENV_SIZE 16384 55 56 void setup_env(char *env, int size); 57 void setup_multiboot(struct mbi_bootinfo *bootinfo); 58 void setup_libcflat(void); 59 60 char *initrd; 61 u32 initrd_size; 62 63 static char env[ENV_SIZE]; 64 static struct mbi_bootinfo *bootinfo; 65 66 #define HUGEPAGE_SIZE (1 << 21) 67 68 #ifdef __x86_64__ 69 void find_highmem(void) 70 { 71 /* Memory above 4 GB is only supported on 64-bit systems. */ 72 if (!(bootinfo->flags & 64)) 73 return; 74 75 u64 upper_end = bootinfo->mem_upper * 1024ull; 76 u64 best_start = (uintptr_t) &edata; 77 u64 best_end = upper_end; 78 u64 max_end = fwcfg_get_u64(FW_CFG_MAX_RAM); 79 if (max_end == 0) 80 max_end = -1ull; 81 bool found = false; 82 83 uintptr_t mmap = bootinfo->mmap_addr; 84 while (mmap < bootinfo->mmap_addr + bootinfo->mmap_length) { 85 struct mbi_mem *mem = (void *)mmap; 86 mmap += mem->size + 4; 87 if (mem->type != 1) 88 continue; 89 if (mem->base_addr <= (uintptr_t) &edata || 90 (mem->base_addr <= upper_end && mem->base_addr + mem->length <= upper_end)) 91 continue; 92 if (mem->length < best_end - best_start) 93 continue; 94 if (mem->base_addr >= max_end) 95 continue; 96 best_start = mem->base_addr; 97 best_end = mem->base_addr + mem->length; 98 if (best_end > max_end) 99 best_end = max_end; 100 found = true; 101 } 102 103 if (found) { 104 best_start = (best_start + HUGEPAGE_SIZE - 1) & -HUGEPAGE_SIZE; 105 best_end = best_end & -HUGEPAGE_SIZE; 106 phys_alloc_init(best_start, best_end - best_start); 107 } 108 } 109 110 /* Setup TSS for the current processor, and return TSS offset within GDT */ 111 unsigned long setup_tss(u8 *stacktop) 112 { 113 u32 id; 114 tss64_t *tss_entry; 115 116 id = pre_boot_apic_id(); 117 118 /* Runtime address of current TSS */ 119 tss_entry = &tss[id]; 120 121 /* Update TSS */ 122 memset((void *)tss_entry, 0, sizeof(tss64_t)); 123 124 /* Update TSS descriptors; each descriptor takes up 2 entries */ 125 set_gdt_entry(TSS_MAIN + id * 16, (unsigned long)tss_entry, 0xffff, 0x89, 0); 126 127 return TSS_MAIN + id * 16; 128 } 129 #else 130 /* Setup TSS for the current processor, and return TSS offset within GDT */ 131 unsigned long setup_tss(u8 *stacktop) 132 { 133 u32 id; 134 tss32_t *tss_entry; 135 136 id = pre_boot_apic_id(); 137 138 /* Runtime address of current TSS */ 139 tss_entry = &tss[id]; 140 141 /* Update TSS */ 142 memset((void *)tss_entry, 0, sizeof(tss32_t)); 143 tss_entry->ss0 = KERNEL_DS; 144 145 /* Update descriptors for TSS and percpu data segment. */ 146 set_gdt_entry(TSS_MAIN + id * 8, 147 (unsigned long)tss_entry, 0xffff, 0x89, 0); 148 set_gdt_entry(TSS_MAIN + MAX_TEST_CPUS * 8 + id * 8, 149 (unsigned long)stacktop - 4096, 0xfffff, 0x93, 0xc0); 150 151 return TSS_MAIN + id * 8; 152 } 153 #endif 154 155 void setup_multiboot(struct mbi_bootinfo *bi) 156 { 157 struct mbi_module *mods; 158 159 bootinfo = bi; 160 161 u64 best_start = (uintptr_t) &edata; 162 u64 best_end = bootinfo->mem_upper * 1024ull; 163 phys_alloc_init(best_start, best_end - best_start); 164 165 if (bootinfo->mods_count != 1) 166 return; 167 168 mods = (struct mbi_module *)(uintptr_t) bootinfo->mods_addr; 169 170 initrd = (char *)(uintptr_t) mods->start; 171 initrd_size = mods->end - mods->start; 172 } 173 174 static void setup_gdt_tss(void) 175 { 176 size_t tss_offset; 177 178 /* 64-bit setup_tss does not use the stacktop argument. */ 179 tss_offset = setup_tss(NULL); 180 load_gdt_tss(tss_offset); 181 } 182 183 #ifdef CONFIG_EFI 184 185 static struct percpu_data __percpu_data[MAX_TEST_CPUS]; 186 187 static void setup_segments64(void) 188 { 189 /* Update data segments */ 190 write_ds(KERNEL_DS); 191 write_es(KERNEL_DS); 192 write_fs(KERNEL_DS); 193 write_gs(KERNEL_DS); 194 write_ss(KERNEL_DS); 195 196 197 /* 198 * Update the code segment by putting it on the stack before the return 199 * address, then doing a far return: this will use the new code segment 200 * along with the address. 201 */ 202 asm volatile("pushq %1\n\t" 203 "lea 1f(%%rip), %0\n\t" 204 "pushq %0\n\t" 205 "lretq\n\t" 206 "1:" 207 :: "r" ((u64)KERNEL_DS), "i" (KERNEL_CS)); 208 } 209 210 static efi_status_t setup_memory_allocator(efi_bootinfo_t *efi_bootinfo) 211 { 212 int i; 213 unsigned long free_mem_pages = 0; 214 unsigned long free_mem_start = 0; 215 struct efi_boot_memmap *map = &(efi_bootinfo->mem_map); 216 efi_memory_desc_t *buffer = *map->map; 217 efi_memory_desc_t *d = NULL; 218 219 /* 220 * The 'buffer' contains multiple descriptors that describe memory 221 * regions maintained by UEFI. This code records the largest free 222 * EFI_CONVENTIONAL_MEMORY region which will be used to set up the 223 * memory allocator, so that the memory allocator can work in the 224 * largest free continuous memory region. 225 */ 226 for (i = 0; i < *(map->map_size); i += *(map->desc_size)) { 227 d = (efi_memory_desc_t *)(&((u8 *)buffer)[i]); 228 if (d->type == EFI_CONVENTIONAL_MEMORY) { 229 if (free_mem_pages < d->num_pages) { 230 free_mem_pages = d->num_pages; 231 free_mem_start = d->phys_addr; 232 } 233 } 234 } 235 236 if (free_mem_pages == 0) { 237 return EFI_OUT_OF_RESOURCES; 238 } 239 240 phys_alloc_init(free_mem_start, free_mem_pages << EFI_PAGE_SHIFT); 241 242 return EFI_SUCCESS; 243 } 244 245 static efi_status_t setup_rsdp(efi_bootinfo_t *efi_bootinfo) 246 { 247 efi_status_t status; 248 struct acpi_table_rsdp *rsdp; 249 250 /* 251 * RSDP resides in an EFI_ACPI_RECLAIM_MEMORY region, which is not used 252 * by kvm-unit-tests x86's memory allocator. So it is not necessary to 253 * copy the data structure to another memory region to prevent 254 * unintentional overwrite. 255 */ 256 status = efi_get_system_config_table(ACPI_TABLE_GUID, (void **)&rsdp); 257 if (status != EFI_SUCCESS) { 258 return status; 259 } 260 261 set_efi_rsdp(rsdp); 262 263 return EFI_SUCCESS; 264 } 265 266 /* Defined in cstart64.S or efistart64.S */ 267 extern u8 ptl4; 268 extern u8 ptl3; 269 extern u8 ptl2; 270 271 static void setup_page_table(void) 272 { 273 pgd_t *curr_pt; 274 phys_addr_t flags; 275 int i; 276 277 /* Set default flags */ 278 flags = PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK; 279 280 /* Set AMD SEV C-Bit for page table entries */ 281 flags |= get_amd_sev_c_bit_mask(); 282 283 /* Level 4 */ 284 curr_pt = (pgd_t *)&ptl4; 285 curr_pt[0] = ((phys_addr_t)&ptl3) | flags; 286 /* Level 3 */ 287 curr_pt = (pgd_t *)&ptl3; 288 for (i = 0; i < 4; i++) { 289 curr_pt[i] = (((phys_addr_t)&ptl2) + i * PAGE_SIZE) | flags; 290 } 291 /* Level 2 */ 292 curr_pt = (pgd_t *)&ptl2; 293 flags |= PT_ACCESSED_MASK | PT_DIRTY_MASK | PT_PAGE_SIZE_MASK | PT_GLOBAL_MASK; 294 for (i = 0; i < 4 * 512; i++) { 295 curr_pt[i] = ((phys_addr_t) i << 21) | flags; 296 } 297 298 if (amd_sev_es_enabled()) { 299 setup_ghcb_pte((pgd_t *)&ptl4); 300 } 301 302 /* Load 4-level page table */ 303 write_cr3((ulong)&ptl4); 304 } 305 306 efi_status_t setup_efi(efi_bootinfo_t *efi_bootinfo) 307 { 308 efi_status_t status; 309 const char *phase; 310 311 status = setup_memory_allocator(efi_bootinfo); 312 if (status != EFI_SUCCESS) { 313 printf("Failed to set up memory allocator: "); 314 switch (status) { 315 case EFI_OUT_OF_RESOURCES: 316 printf("No free memory region\n"); 317 break; 318 default: 319 printf("Unknown error\n"); 320 break; 321 } 322 return status; 323 } 324 325 status = setup_rsdp(efi_bootinfo); 326 if (status != EFI_SUCCESS) { 327 printf("Cannot find RSDP in EFI system table\n"); 328 return status; 329 } 330 331 phase = "AMD SEV"; 332 status = setup_amd_sev(); 333 334 /* Continue if AMD SEV is not supported, but skip SEV-ES setup */ 335 if (status == EFI_SUCCESS) { 336 phase = "AMD SEV-ES"; 337 status = setup_amd_sev_es(); 338 } 339 340 if (status != EFI_SUCCESS && status != EFI_UNSUPPORTED) { 341 printf("%s setup failed, error = 0x%lx\n", phase, status); 342 return status; 343 } 344 345 setup_gdt_tss(); 346 setup_segments64(); 347 setup_idt(); 348 load_idt(); 349 /* 350 * Load GS.base with the per-vCPU data. This must be done after 351 * loading the IDT as reading the APIC ID may #VC when running 352 * as an SEV-ES guest 353 */ 354 wrmsr(MSR_GS_BASE, (u64)&__percpu_data[pre_boot_apic_id()]); 355 /* 356 * Resetting the APIC sets the per-vCPU APIC ops and so must be 357 * done after loading GS.base with the per-vCPU data. 358 */ 359 reset_apic(); 360 mask_pic_interrupts(); 361 setup_page_table(); 362 enable_apic(); 363 save_id(); 364 bsp_rest_init(); 365 366 return EFI_SUCCESS; 367 } 368 369 #endif /* CONFIG_EFI */ 370 371 void setup_libcflat(void) 372 { 373 if (initrd) { 374 /* environ is currently the only file in the initrd */ 375 u32 size = MIN(initrd_size, ENV_SIZE); 376 const char *str; 377 378 memcpy(env, initrd, size); 379 setup_env(env, size); 380 if ((str = getenv("BOOTLOADER")) && atol(str) != 0) 381 add_setup_arg("bootloader"); 382 } 383 } 384 385 void save_id(void) 386 { 387 set_bit(apic_id(), online_cpus); 388 } 389 390 void ap_start64(void) 391 { 392 setup_gdt_tss(); 393 reset_apic(); 394 load_idt(); 395 save_id(); 396 enable_apic(); 397 enable_x2apic(); 398 ap_online(); 399 } 400 401 void bsp_rest_init(void) 402 { 403 bringup_aps(); 404 enable_x2apic(); 405 smp_init(); 406 pmu_init(); 407 } 408