1 /* 2 * Initialize machine setup information 3 * 4 * Copyright (C) 2017, Red Hat Inc, Andrew Jones <drjones@redhat.com> 5 * Copyright (C) 2021, Google Inc, Zixuan Wang <zixuanwang@google.com> 6 * 7 * This work is licensed under the terms of the GNU LGPL, version 2. 8 */ 9 #include "libcflat.h" 10 #include "fwcfg.h" 11 #include "alloc_phys.h" 12 #include "argv.h" 13 #include "desc.h" 14 #include "apic.h" 15 #include "apic-defs.h" 16 #include "asm/setup.h" 17 #include "atomic.h" 18 #include "processor.h" 19 #include "smp.h" 20 21 extern char edata; 22 23 struct mbi_bootinfo { 24 u32 flags; 25 u32 mem_lower; 26 u32 mem_upper; 27 u32 boot_device; 28 u32 cmdline; 29 u32 mods_count; 30 u32 mods_addr; 31 u32 reserved[4]; /* 28-43 */ 32 u32 mmap_length; 33 u32 mmap_addr; 34 u32 reserved0[3]; /* 52-63 */ 35 u32 bootloader; 36 u32 reserved1[5]; /* 68-87 */ 37 u32 size; 38 }; 39 40 struct mbi_module { 41 u32 start, end; 42 u32 cmdline; 43 u32 unused; 44 }; 45 46 struct mbi_mem { 47 u32 size; 48 u64 base_addr; 49 u64 length; 50 u32 type; 51 } __attribute__((packed)); 52 53 #define ENV_SIZE 16384 54 55 void setup_env(char *env, int size); 56 void setup_multiboot(struct mbi_bootinfo *bootinfo); 57 void setup_libcflat(void); 58 59 char *initrd; 60 u32 initrd_size; 61 62 static char env[ENV_SIZE]; 63 static struct mbi_bootinfo *bootinfo; 64 65 #define HUGEPAGE_SIZE (1 << 21) 66 67 #ifdef __x86_64__ 68 void find_highmem(void) 69 { 70 /* Memory above 4 GB is only supported on 64-bit systems. */ 71 if (!(bootinfo->flags & 64)) 72 return; 73 74 u64 upper_end = bootinfo->mem_upper * 1024ull; 75 u64 best_start = (uintptr_t) &edata; 76 u64 best_end = upper_end; 77 u64 max_end = fwcfg_get_u64(FW_CFG_MAX_RAM); 78 if (max_end == 0) 79 max_end = -1ull; 80 bool found = false; 81 82 uintptr_t mmap = bootinfo->mmap_addr; 83 while (mmap < bootinfo->mmap_addr + bootinfo->mmap_length) { 84 struct mbi_mem *mem = (void *)mmap; 85 mmap += mem->size + 4; 86 if (mem->type != 1) 87 continue; 88 if (mem->base_addr <= (uintptr_t) &edata || 89 (mem->base_addr <= upper_end && mem->base_addr + mem->length <= upper_end)) 90 continue; 91 if (mem->length < best_end - best_start) 92 continue; 93 if (mem->base_addr >= max_end) 94 continue; 95 best_start = mem->base_addr; 96 best_end = mem->base_addr + mem->length; 97 if (best_end > max_end) 98 best_end = max_end; 99 found = true; 100 } 101 102 if (found) { 103 best_start = (best_start + HUGEPAGE_SIZE - 1) & -HUGEPAGE_SIZE; 104 best_end = best_end & -HUGEPAGE_SIZE; 105 phys_alloc_init(best_start, best_end - best_start); 106 } 107 } 108 109 /* Setup TSS for the current processor, and return TSS offset within GDT */ 110 unsigned long setup_tss(u8 *stacktop) 111 { 112 u32 id; 113 tss64_t *tss_entry; 114 115 id = pre_boot_apic_id(); 116 117 /* Runtime address of current TSS */ 118 tss_entry = &tss[id]; 119 120 /* Update TSS */ 121 memset((void *)tss_entry, 0, sizeof(tss64_t)); 122 123 /* Update TSS descriptors; each descriptor takes up 2 entries */ 124 set_gdt_entry(TSS_MAIN + id * 16, (unsigned long)tss_entry, 0xffff, 0x89, 0); 125 126 return TSS_MAIN + id * 16; 127 } 128 #else 129 /* Setup TSS for the current processor, and return TSS offset within GDT */ 130 unsigned long setup_tss(u8 *stacktop) 131 { 132 u32 id; 133 tss32_t *tss_entry; 134 135 id = pre_boot_apic_id(); 136 137 /* Runtime address of current TSS */ 138 tss_entry = &tss[id]; 139 140 /* Update TSS */ 141 memset((void *)tss_entry, 0, sizeof(tss32_t)); 142 tss_entry->ss0 = KERNEL_DS; 143 144 /* Update descriptors for TSS and percpu data segment. */ 145 set_gdt_entry(TSS_MAIN + id * 8, 146 (unsigned long)tss_entry, 0xffff, 0x89, 0); 147 set_gdt_entry(TSS_MAIN + MAX_TEST_CPUS * 8 + id * 8, 148 (unsigned long)stacktop - 4096, 0xfffff, 0x93, 0xc0); 149 150 return TSS_MAIN + id * 8; 151 } 152 #endif 153 154 void setup_multiboot(struct mbi_bootinfo *bi) 155 { 156 struct mbi_module *mods; 157 158 bootinfo = bi; 159 160 u64 best_start = (uintptr_t) &edata; 161 u64 best_end = bootinfo->mem_upper * 1024ull; 162 phys_alloc_init(best_start, best_end - best_start); 163 164 if (bootinfo->mods_count != 1) 165 return; 166 167 mods = (struct mbi_module *)(uintptr_t) bootinfo->mods_addr; 168 169 initrd = (char *)(uintptr_t) mods->start; 170 initrd_size = mods->end - mods->start; 171 } 172 173 static void setup_gdt_tss(void) 174 { 175 size_t tss_offset; 176 177 /* 64-bit setup_tss does not use the stacktop argument. */ 178 tss_offset = setup_tss(NULL); 179 load_gdt_tss(tss_offset); 180 } 181 182 #ifdef CONFIG_EFI 183 184 static struct percpu_data __percpu_data[MAX_TEST_CPUS]; 185 186 static void setup_segments64(void) 187 { 188 /* Update data segments */ 189 write_ds(KERNEL_DS); 190 write_es(KERNEL_DS); 191 write_fs(KERNEL_DS); 192 write_gs(KERNEL_DS); 193 write_ss(KERNEL_DS); 194 195 /* Setup percpu base */ 196 wrmsr(MSR_GS_BASE, (u64)&__percpu_data[pre_boot_apic_id()]); 197 198 /* 199 * Update the code segment by putting it on the stack before the return 200 * address, then doing a far return: this will use the new code segment 201 * along with the address. 202 */ 203 asm volatile("pushq %1\n\t" 204 "lea 1f(%%rip), %0\n\t" 205 "pushq %0\n\t" 206 "lretq\n\t" 207 "1:" 208 :: "r" ((u64)KERNEL_DS), "i" (KERNEL_CS)); 209 } 210 211 static efi_status_t setup_memory_allocator(efi_bootinfo_t *efi_bootinfo) 212 { 213 int i; 214 unsigned long free_mem_pages = 0; 215 unsigned long free_mem_start = 0; 216 struct efi_boot_memmap *map = &(efi_bootinfo->mem_map); 217 efi_memory_desc_t *buffer = *map->map; 218 efi_memory_desc_t *d = NULL; 219 220 /* 221 * The 'buffer' contains multiple descriptors that describe memory 222 * regions maintained by UEFI. This code records the largest free 223 * EFI_CONVENTIONAL_MEMORY region which will be used to set up the 224 * memory allocator, so that the memory allocator can work in the 225 * largest free continuous memory region. 226 */ 227 for (i = 0; i < *(map->map_size); i += *(map->desc_size)) { 228 d = (efi_memory_desc_t *)(&((u8 *)buffer)[i]); 229 if (d->type == EFI_CONVENTIONAL_MEMORY) { 230 if (free_mem_pages < d->num_pages) { 231 free_mem_pages = d->num_pages; 232 free_mem_start = d->phys_addr; 233 } 234 } 235 } 236 237 if (free_mem_pages == 0) { 238 return EFI_OUT_OF_RESOURCES; 239 } 240 241 phys_alloc_init(free_mem_start, free_mem_pages << EFI_PAGE_SHIFT); 242 243 return EFI_SUCCESS; 244 } 245 246 static efi_status_t setup_rsdp(efi_bootinfo_t *efi_bootinfo) 247 { 248 efi_status_t status; 249 struct rsdp_descriptor *rsdp; 250 251 /* 252 * RSDP resides in an EFI_ACPI_RECLAIM_MEMORY region, which is not used 253 * by kvm-unit-tests x86's memory allocator. So it is not necessary to 254 * copy the data structure to another memory region to prevent 255 * unintentional overwrite. 256 */ 257 status = efi_get_system_config_table(ACPI_TABLE_GUID, (void **)&rsdp); 258 if (status != EFI_SUCCESS) { 259 return status; 260 } 261 262 set_efi_rsdp(rsdp); 263 264 return EFI_SUCCESS; 265 } 266 267 /* Defined in cstart64.S or efistart64.S */ 268 extern u8 ptl4; 269 extern u8 ptl3; 270 extern u8 ptl2; 271 272 static void setup_page_table(void) 273 { 274 pgd_t *curr_pt; 275 phys_addr_t flags; 276 int i; 277 278 /* Set default flags */ 279 flags = PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK; 280 281 /* Set AMD SEV C-Bit for page table entries */ 282 flags |= get_amd_sev_c_bit_mask(); 283 284 /* Level 4 */ 285 curr_pt = (pgd_t *)&ptl4; 286 curr_pt[0] = ((phys_addr_t)&ptl3) | flags; 287 /* Level 3 */ 288 curr_pt = (pgd_t *)&ptl3; 289 for (i = 0; i < 4; i++) { 290 curr_pt[i] = (((phys_addr_t)&ptl2) + i * PAGE_SIZE) | flags; 291 } 292 /* Level 2 */ 293 curr_pt = (pgd_t *)&ptl2; 294 flags |= PT_ACCESSED_MASK | PT_DIRTY_MASK | PT_PAGE_SIZE_MASK | PT_GLOBAL_MASK; 295 for (i = 0; i < 4 * 512; i++) { 296 curr_pt[i] = ((phys_addr_t) i << 21) | flags; 297 } 298 299 if (amd_sev_es_enabled()) { 300 setup_ghcb_pte((pgd_t *)&ptl4); 301 } 302 303 /* Load 4-level page table */ 304 write_cr3((ulong)&ptl4); 305 } 306 307 efi_status_t setup_efi(efi_bootinfo_t *efi_bootinfo) 308 { 309 efi_status_t status; 310 const char *phase; 311 312 status = setup_memory_allocator(efi_bootinfo); 313 if (status != EFI_SUCCESS) { 314 printf("Failed to set up memory allocator: "); 315 switch (status) { 316 case EFI_OUT_OF_RESOURCES: 317 printf("No free memory region\n"); 318 break; 319 default: 320 printf("Unknown error\n"); 321 break; 322 } 323 return status; 324 } 325 326 status = setup_rsdp(efi_bootinfo); 327 if (status != EFI_SUCCESS) { 328 printf("Cannot find RSDP in EFI system table\n"); 329 return status; 330 } 331 332 phase = "AMD SEV"; 333 status = setup_amd_sev(); 334 335 /* Continue if AMD SEV is not supported, but skip SEV-ES setup */ 336 if (status == EFI_SUCCESS) { 337 phase = "AMD SEV-ES"; 338 status = setup_amd_sev_es(); 339 } 340 341 if (status != EFI_SUCCESS && status != EFI_UNSUPPORTED) { 342 printf("%s setup failed, error = 0x%lx\n", phase, status); 343 return status; 344 } 345 346 setup_gdt_tss(); 347 /* 348 * GS.base, which points at the per-vCPU data, must be configured prior 349 * to resetting the APIC, which sets the per-vCPU APIC ops. 350 */ 351 setup_segments64(); 352 reset_apic(); 353 setup_idt(); 354 load_idt(); 355 mask_pic_interrupts(); 356 setup_page_table(); 357 enable_apic(); 358 save_id(); 359 ap_init(); 360 enable_x2apic(); 361 smp_init(); 362 363 return EFI_SUCCESS; 364 } 365 366 #endif /* CONFIG_EFI */ 367 368 void setup_libcflat(void) 369 { 370 if (initrd) { 371 /* environ is currently the only file in the initrd */ 372 u32 size = MIN(initrd_size, ENV_SIZE); 373 const char *str; 374 375 memcpy(env, initrd, size); 376 setup_env(env, size); 377 if ((str = getenv("BOOTLOADER")) && atol(str) != 0) 378 add_setup_arg("bootloader"); 379 } 380 } 381 382 void save_id(void) 383 { 384 set_bit(apic_id(), online_cpus); 385 } 386 387 void ap_start64(void) 388 { 389 setup_gdt_tss(); 390 reset_apic(); 391 load_idt(); 392 save_id(); 393 enable_apic(); 394 enable_x2apic(); 395 sti(); 396 asm volatile ("nop"); 397 printf("setup: AP %d online\n", apic_id()); 398 atomic_inc(&cpu_online_count); 399 400 /* Only the BSP runs the test's main(), APs are given work via IPIs. */ 401 for (;;) 402 asm volatile("hlt"); 403 } 404