xref: /kvm-unit-tests/lib/x86/setup.c (revision dca3f4c041143c8e8dc70c6890a19a5730310230)
1 /*
2  * Initialize machine setup information
3  *
4  * Copyright (C) 2017, Red Hat Inc, Andrew Jones <drjones@redhat.com>
5  * Copyright (C) 2021, Google Inc, Zixuan Wang <zixuanwang@google.com>
6  *
7  * This work is licensed under the terms of the GNU LGPL, version 2.
8  */
9 #include "libcflat.h"
10 #include "fwcfg.h"
11 #include "alloc_phys.h"
12 #include "argv.h"
13 #include "desc.h"
14 #include "apic.h"
15 #include "apic-defs.h"
16 #include "asm/setup.h"
17 #include "atomic.h"
18 #include "pmu.h"
19 #include "processor.h"
20 #include "smp.h"
21 
22 extern char edata;
23 
24 struct mbi_bootinfo {
25 	u32 flags;
26 	u32 mem_lower;
27 	u32 mem_upper;
28 	u32 boot_device;
29 	u32 cmdline;
30 	u32 mods_count;
31 	u32 mods_addr;
32 	u32 reserved[4];   /* 28-43 */
33 	u32 mmap_length;
34 	u32 mmap_addr;
35 	u32 reserved0[3];  /* 52-63 */
36 	u32 bootloader;
37 	u32 reserved1[5];  /* 68-87 */
38 	u32 size;
39 };
40 
41 struct mbi_module {
42 	u32 start, end;
43 	u32 cmdline;
44 	u32 unused;
45 };
46 
47 struct mbi_mem {
48 	u32 size;
49 	u64 base_addr;
50 	u64 length;
51 	u32 type;
52 } __attribute__((packed));
53 
54 #define ENV_SIZE 16384
55 
56 void setup_env(char *env, int size);
57 void setup_multiboot(struct mbi_bootinfo *bootinfo);
58 void setup_libcflat(void);
59 
60 char *initrd;
61 u32 initrd_size;
62 
63 static char env[ENV_SIZE];
64 static struct mbi_bootinfo *bootinfo;
65 
66 #define HUGEPAGE_SIZE (1 << 21)
67 
68 #ifdef __x86_64__
find_highmem(void)69 void find_highmem(void)
70 {
71 	/* Memory above 4 GB is only supported on 64-bit systems.  */
72 	if (!(bootinfo->flags & 64))
73 	    	return;
74 
75 	u64 upper_end = bootinfo->mem_upper * 1024ull;
76 	u64 best_start = (uintptr_t) &edata;
77 	u64 best_end = upper_end;
78 	u64 max_end = fwcfg_get_u64(FW_CFG_MAX_RAM);
79 	if (max_end == 0)
80 		max_end = -1ull;
81 	bool found = false;
82 
83 	uintptr_t mmap = bootinfo->mmap_addr;
84 	while (mmap < bootinfo->mmap_addr + bootinfo->mmap_length) {
85 		struct mbi_mem *mem = (void *)mmap;
86 		mmap += mem->size + 4;
87 		if (mem->type != 1)
88 			continue;
89 		if (mem->base_addr <= (uintptr_t) &edata ||
90 		    (mem->base_addr <= upper_end && mem->base_addr + mem->length <= upper_end))
91 			continue;
92 		if (mem->length < best_end - best_start)
93 			continue;
94 		if (mem->base_addr >= max_end)
95 			continue;
96 		best_start = mem->base_addr;
97 		best_end = mem->base_addr + mem->length;
98 		if (best_end > max_end)
99 			best_end = max_end;
100 		found = true;
101 	}
102 
103 	if (found) {
104 		best_start = (best_start + HUGEPAGE_SIZE - 1) & -HUGEPAGE_SIZE;
105 		best_end = best_end & -HUGEPAGE_SIZE;
106 		phys_alloc_init(best_start, best_end - best_start);
107 	}
108 }
109 
110 /* Setup TSS for the current processor, and return TSS offset within GDT */
setup_tss(u8 * stacktop)111 unsigned long setup_tss(u8 *stacktop)
112 {
113 	u32 id;
114 	tss64_t *tss_entry;
115 
116 	id = pre_boot_apic_id();
117 
118 	/* Runtime address of current TSS */
119 	tss_entry = &tss[id];
120 
121 	/* Update TSS */
122 	memset((void *)tss_entry, 0, sizeof(tss64_t));
123 
124 	/* Update TSS descriptors; each descriptor takes up 2 entries */
125 	set_gdt_entry(TSS_MAIN + id * 16, (unsigned long)tss_entry, 0xffff, 0x89, 0);
126 
127 	return TSS_MAIN + id * 16;
128 }
129 #else
130 /* Setup TSS for the current processor, and return TSS offset within GDT */
setup_tss(u8 * stacktop)131 unsigned long setup_tss(u8 *stacktop)
132 {
133 	u32 id;
134 	tss32_t *tss_entry;
135 
136 	id = pre_boot_apic_id();
137 
138 	/* Runtime address of current TSS */
139 	tss_entry = &tss[id];
140 
141 	/* Update TSS */
142 	memset((void *)tss_entry, 0, sizeof(tss32_t));
143 	tss_entry->ss0 = KERNEL_DS;
144 
145 	/* Update descriptors for TSS and percpu data segment.  */
146 	set_gdt_entry(TSS_MAIN + id * 8,
147 		      (unsigned long)tss_entry, 0xffff, 0x89, 0);
148 	set_gdt_entry(TSS_MAIN + MAX_TEST_CPUS * 8 + id * 8,
149 		      (unsigned long)stacktop - PER_CPU_SIZE, 0xfffff, 0x93, 0xc0);
150 
151 	return TSS_MAIN + id * 8;
152 }
153 #endif
154 
setup_multiboot(struct mbi_bootinfo * bi)155 void setup_multiboot(struct mbi_bootinfo *bi)
156 {
157 	struct mbi_module *mods;
158 
159 	bootinfo = bi;
160 
161 	u64 best_start = (uintptr_t) &edata;
162 	u64 best_end = bootinfo->mem_upper * 1024ull;
163 	phys_alloc_init(best_start, best_end - best_start);
164 
165 	if (bootinfo->mods_count != 1)
166 		return;
167 
168 	mods = (struct mbi_module *)(uintptr_t) bootinfo->mods_addr;
169 
170 	initrd = (char *)(uintptr_t) mods->start;
171 	initrd_size = mods->end - mods->start;
172 }
173 
setup_gdt_tss(void)174 static void setup_gdt_tss(void)
175 {
176 	size_t tss_offset;
177 
178 	/* 64-bit setup_tss does not use the stacktop argument.  */
179 	tss_offset = setup_tss(NULL);
180 	load_gdt_tss(tss_offset);
181 }
182 
183 #ifdef CONFIG_EFI
184 
185 static struct percpu_data __percpu_data[MAX_TEST_CPUS];
186 
setup_segments64(void)187 static void setup_segments64(void)
188 {
189 	/* Update data segments */
190 	write_ds(KERNEL_DS);
191 	write_es(KERNEL_DS);
192 	write_fs(KERNEL_DS);
193 	write_gs(KERNEL_DS);
194 	write_ss(KERNEL_DS);
195 
196 
197 	/*
198 	 * Update the code segment by putting it on the stack before the return
199 	 * address, then doing a far return: this will use the new code segment
200 	 * along with the address.
201 	 */
202 	asm volatile("pushq %1\n\t"
203 		     "lea 1f(%%rip), %0\n\t"
204 		     "pushq %0\n\t"
205 		     "lretq\n\t"
206 		     "1:"
207 		     :: "r" ((u64)KERNEL_DS), "i" (KERNEL_CS));
208 }
209 
setup_memory_allocator(efi_bootinfo_t * efi_bootinfo)210 static efi_status_t setup_memory_allocator(efi_bootinfo_t *efi_bootinfo)
211 {
212 	int i;
213 	unsigned long free_mem_pages = 0;
214 	unsigned long free_mem_start = 0;
215 	struct efi_boot_memmap *map = &(efi_bootinfo->mem_map);
216 	efi_memory_desc_t *buffer = *map->map;
217 	efi_memory_desc_t *d = NULL;
218 
219 	/*
220 	 * The 'buffer' contains multiple descriptors that describe memory
221 	 * regions maintained by UEFI. This code records the largest free
222 	 * EFI_CONVENTIONAL_MEMORY region which will be used to set up the
223 	 * memory allocator, so that the memory allocator can work in the
224 	 * largest free continuous memory region.
225 	 */
226 	for (i = 0; i < *(map->map_size); i += *(map->desc_size)) {
227 		d = (efi_memory_desc_t *)(&((u8 *)buffer)[i]);
228 		if (d->type == EFI_CONVENTIONAL_MEMORY) {
229 			if (free_mem_pages < d->num_pages) {
230 				free_mem_pages = d->num_pages;
231 				free_mem_start = d->phys_addr;
232 			}
233 		}
234 	}
235 
236 	if (free_mem_pages == 0) {
237 		return EFI_OUT_OF_RESOURCES;
238 	}
239 
240 	phys_alloc_init(free_mem_start, free_mem_pages << EFI_PAGE_SHIFT);
241 
242 	return EFI_SUCCESS;
243 }
244 
setup_rsdp(efi_bootinfo_t * efi_bootinfo)245 static efi_status_t setup_rsdp(efi_bootinfo_t *efi_bootinfo)
246 {
247 	efi_status_t status;
248 	struct acpi_table_rsdp *rsdp;
249 
250 	/*
251 	 * RSDP resides in an EFI_ACPI_RECLAIM_MEMORY region, which is not used
252 	 * by kvm-unit-tests x86's memory allocator. So it is not necessary to
253 	 * copy the data structure to another memory region to prevent
254 	 * unintentional overwrite.
255 	 */
256 	status = efi_get_system_config_table(ACPI_TABLE_GUID, (void **)&rsdp);
257 	if (status != EFI_SUCCESS) {
258 		return status;
259 	}
260 
261 	set_efi_rsdp(rsdp);
262 
263 	return EFI_SUCCESS;
264 }
265 
266 /* Defined in cstart64.S or efistart64.S */
267 extern u8 ptl4;
268 extern u8 ptl3;
269 extern u8 ptl2;
270 
setup_page_table(void)271 static void setup_page_table(void)
272 {
273 	pgd_t *curr_pt;
274 	phys_addr_t flags;
275 	int i;
276 
277 	/* Set default flags */
278 	flags = PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
279 
280 	/* Set AMD SEV C-Bit for page table entries */
281 	flags |= get_amd_sev_c_bit_mask();
282 
283 	/* Level 4 */
284 	curr_pt = (pgd_t *)&ptl4;
285 	curr_pt[0] = ((phys_addr_t)&ptl3) | flags;
286 	/* Level 3 */
287 	curr_pt = (pgd_t *)&ptl3;
288 	for (i = 0; i < 4; i++) {
289 		curr_pt[i] = (((phys_addr_t)&ptl2) + i * PAGE_SIZE) | flags;
290 	}
291 	/* Level 2 */
292 	curr_pt = (pgd_t *)&ptl2;
293 	flags |= PT_ACCESSED_MASK | PT_DIRTY_MASK | PT_PAGE_SIZE_MASK | PT_GLOBAL_MASK;
294 	for (i = 0; i < 4 * 512; i++)	{
295 		curr_pt[i] = ((phys_addr_t) i << 21) | flags;
296 	}
297 
298 	if (amd_sev_es_enabled()) {
299 		setup_ghcb_pte((pgd_t *)&ptl4);
300 	}
301 
302 	/* Load 4-level page table */
303 	write_cr3((ulong)&ptl4);
304 }
305 
setup_efi(efi_bootinfo_t * efi_bootinfo)306 efi_status_t setup_efi(efi_bootinfo_t *efi_bootinfo)
307 {
308 	efi_status_t status;
309 	const char *phase;
310 
311 	status = setup_memory_allocator(efi_bootinfo);
312 	if (status != EFI_SUCCESS) {
313 		printf("Failed to set up memory allocator: ");
314 		switch (status) {
315 		case EFI_OUT_OF_RESOURCES:
316 			printf("No free memory region\n");
317 			break;
318 		default:
319 			printf("Unknown error\n");
320 			break;
321 		}
322 		return status;
323 	}
324 
325 	status = setup_rsdp(efi_bootinfo);
326 	if (status != EFI_SUCCESS) {
327 		printf("Cannot find RSDP in EFI system table\n");
328 		return status;
329 	}
330 
331 	phase = "AMD SEV";
332 	status = setup_amd_sev();
333 
334 	/* Continue if AMD SEV is not supported, but skip SEV-ES setup */
335 	if (status == EFI_SUCCESS) {
336 		phase = "AMD SEV-ES";
337 		status = setup_amd_sev_es();
338 	}
339 
340 	if (status != EFI_SUCCESS && status != EFI_UNSUPPORTED) {
341 		printf("%s setup failed, error = 0x%lx\n", phase, status);
342 		return status;
343 	}
344 
345 	setup_gdt_tss();
346 	setup_segments64();
347 	setup_idt();
348 	load_idt();
349 	/*
350 	 * Load GS.base with the per-vCPU data.  This must be done after
351 	 * loading the IDT as reading the APIC ID may #VC when running
352 	 * as an SEV-ES guest
353 	 */
354 	wrmsr(MSR_GS_BASE, (u64)&__percpu_data[pre_boot_apic_id()]);
355 	/*
356 	 * Resetting the APIC sets the per-vCPU APIC ops and so must be
357 	 * done after loading GS.base with the per-vCPU data.
358 	 */
359 	reset_apic();
360 	mask_pic_interrupts();
361 	setup_page_table();
362 	enable_apic();
363 	save_id();
364 	bsp_rest_init();
365 
366 	return EFI_SUCCESS;
367 }
368 
369 #endif /* CONFIG_EFI */
370 
setup_libcflat(void)371 void setup_libcflat(void)
372 {
373 	if (initrd) {
374 		/* environ is currently the only file in the initrd */
375 		u32 size = MIN(initrd_size, ENV_SIZE);
376 		const char *str;
377 
378 		memcpy(env, initrd, size);
379 		setup_env(env, size);
380 		if ((str = getenv("BOOTLOADER")) && atol(str) != 0)
381 			add_setup_arg("bootloader");
382 	}
383 }
384 
save_id(void)385 void save_id(void)
386 {
387 	set_bit(apic_id(), online_cpus);
388 }
389 
ap_start64(void)390 void ap_start64(void)
391 {
392 	setup_gdt_tss();
393 	reset_apic();
394 	load_idt();
395 	save_id();
396 	enable_apic();
397 	enable_x2apic();
398 	ap_online();
399 }
400 
bsp_rest_init(void)401 void bsp_rest_init(void)
402 {
403 	bringup_aps();
404 	enable_x2apic();
405 	smp_init();
406 	pmu_init();
407 }
408