xref: /kvm-unit-tests/lib/x86/setup.c (revision 0b7501c3b516bde6bd3d74b17e6af76b6aa2a116)
1 /*
2  * Initialize machine setup information
3  *
4  * Copyright (C) 2017, Red Hat Inc, Andrew Jones <drjones@redhat.com>
5  * Copyright (C) 2021, Google Inc, Zixuan Wang <zixuanwang@google.com>
6  *
7  * This work is licensed under the terms of the GNU LGPL, version 2.
8  */
9 #include "libcflat.h"
10 #include "fwcfg.h"
11 #include "alloc_phys.h"
12 #include "argv.h"
13 #include "desc.h"
14 #include "apic.h"
15 #include "apic-defs.h"
16 #include "asm/setup.h"
17 #include "atomic.h"
18 #include "processor.h"
19 #include "smp.h"
20 
21 extern char edata;
22 
23 struct mbi_bootinfo {
24 	u32 flags;
25 	u32 mem_lower;
26 	u32 mem_upper;
27 	u32 boot_device;
28 	u32 cmdline;
29 	u32 mods_count;
30 	u32 mods_addr;
31 	u32 reserved[4];   /* 28-43 */
32 	u32 mmap_length;
33 	u32 mmap_addr;
34 	u32 reserved0[3];  /* 52-63 */
35 	u32 bootloader;
36 	u32 reserved1[5];  /* 68-87 */
37 	u32 size;
38 };
39 
40 struct mbi_module {
41 	u32 start, end;
42 	u32 cmdline;
43 	u32 unused;
44 };
45 
46 struct mbi_mem {
47 	u32 size;
48 	u64 base_addr;
49 	u64 length;
50 	u32 type;
51 } __attribute__((packed));
52 
53 #define ENV_SIZE 16384
54 
55 void setup_env(char *env, int size);
56 void setup_multiboot(struct mbi_bootinfo *bootinfo);
57 void setup_libcflat(void);
58 
59 char *initrd;
60 u32 initrd_size;
61 
62 static char env[ENV_SIZE];
63 static struct mbi_bootinfo *bootinfo;
64 
65 #define HUGEPAGE_SIZE (1 << 21)
66 
67 #ifdef __x86_64__
68 void find_highmem(void)
69 {
70 	/* Memory above 4 GB is only supported on 64-bit systems.  */
71 	if (!(bootinfo->flags & 64))
72 	    	return;
73 
74 	u64 upper_end = bootinfo->mem_upper * 1024ull;
75 	u64 best_start = (uintptr_t) &edata;
76 	u64 best_end = upper_end;
77 	u64 max_end = fwcfg_get_u64(FW_CFG_MAX_RAM);
78 	if (max_end == 0)
79 		max_end = -1ull;
80 	bool found = false;
81 
82 	uintptr_t mmap = bootinfo->mmap_addr;
83 	while (mmap < bootinfo->mmap_addr + bootinfo->mmap_length) {
84 		struct mbi_mem *mem = (void *)mmap;
85 		mmap += mem->size + 4;
86 		if (mem->type != 1)
87 			continue;
88 		if (mem->base_addr <= (uintptr_t) &edata ||
89 		    (mem->base_addr <= upper_end && mem->base_addr + mem->length <= upper_end))
90 			continue;
91 		if (mem->length < best_end - best_start)
92 			continue;
93 		if (mem->base_addr >= max_end)
94 			continue;
95 		best_start = mem->base_addr;
96 		best_end = mem->base_addr + mem->length;
97 		if (best_end > max_end)
98 			best_end = max_end;
99 		found = true;
100 	}
101 
102 	if (found) {
103 		best_start = (best_start + HUGEPAGE_SIZE - 1) & -HUGEPAGE_SIZE;
104 		best_end = best_end & -HUGEPAGE_SIZE;
105 		phys_alloc_init(best_start, best_end - best_start);
106 	}
107 }
108 
109 /* Setup TSS for the current processor, and return TSS offset within GDT */
110 unsigned long setup_tss(u8 *stacktop)
111 {
112 	u32 id;
113 	tss64_t *tss_entry;
114 
115 	id = pre_boot_apic_id();
116 
117 	/* Runtime address of current TSS */
118 	tss_entry = &tss[id];
119 
120 	/* Update TSS */
121 	memset((void *)tss_entry, 0, sizeof(tss64_t));
122 
123 	/* Update TSS descriptors; each descriptor takes up 2 entries */
124 	set_gdt_entry(TSS_MAIN + id * 16, (unsigned long)tss_entry, 0xffff, 0x89, 0);
125 
126 	return TSS_MAIN + id * 16;
127 }
128 #else
129 /* Setup TSS for the current processor, and return TSS offset within GDT */
130 unsigned long setup_tss(u8 *stacktop)
131 {
132 	u32 id;
133 	tss32_t *tss_entry;
134 
135 	id = pre_boot_apic_id();
136 
137 	/* Runtime address of current TSS */
138 	tss_entry = &tss[id];
139 
140 	/* Update TSS */
141 	memset((void *)tss_entry, 0, sizeof(tss32_t));
142 	tss_entry->ss0 = KERNEL_DS;
143 
144 	/* Update descriptors for TSS and percpu data segment.  */
145 	set_gdt_entry(TSS_MAIN + id * 8,
146 		      (unsigned long)tss_entry, 0xffff, 0x89, 0);
147 	set_gdt_entry(TSS_MAIN + MAX_TEST_CPUS * 8 + id * 8,
148 		      (unsigned long)stacktop - 4096, 0xfffff, 0x93, 0xc0);
149 
150 	return TSS_MAIN + id * 8;
151 }
152 #endif
153 
154 void setup_multiboot(struct mbi_bootinfo *bi)
155 {
156 	struct mbi_module *mods;
157 
158 	bootinfo = bi;
159 
160 	u64 best_start = (uintptr_t) &edata;
161 	u64 best_end = bootinfo->mem_upper * 1024ull;
162 	phys_alloc_init(best_start, best_end - best_start);
163 
164 	if (bootinfo->mods_count != 1)
165 		return;
166 
167 	mods = (struct mbi_module *)(uintptr_t) bootinfo->mods_addr;
168 
169 	initrd = (char *)(uintptr_t) mods->start;
170 	initrd_size = mods->end - mods->start;
171 }
172 
173 static void setup_gdt_tss(void)
174 {
175 	size_t tss_offset;
176 
177 	/* 64-bit setup_tss does not use the stacktop argument.  */
178 	tss_offset = setup_tss(NULL);
179 	load_gdt_tss(tss_offset);
180 }
181 
182 #ifdef CONFIG_EFI
183 
184 static struct percpu_data __percpu_data[MAX_TEST_CPUS];
185 
186 static void setup_segments64(void)
187 {
188 	/* Update data segments */
189 	write_ds(KERNEL_DS);
190 	write_es(KERNEL_DS);
191 	write_fs(KERNEL_DS);
192 	write_gs(KERNEL_DS);
193 	write_ss(KERNEL_DS);
194 
195 	/* Setup percpu base */
196 	wrmsr(MSR_GS_BASE, (u64)&__percpu_data[pre_boot_apic_id()]);
197 
198 	/*
199 	 * Update the code segment by putting it on the stack before the return
200 	 * address, then doing a far return: this will use the new code segment
201 	 * along with the address.
202 	 */
203 	asm volatile("pushq %1\n\t"
204 		     "lea 1f(%%rip), %0\n\t"
205 		     "pushq %0\n\t"
206 		     "lretq\n\t"
207 		     "1:"
208 		     :: "r" ((u64)KERNEL_DS), "i" (KERNEL_CS));
209 }
210 
211 static efi_status_t setup_memory_allocator(efi_bootinfo_t *efi_bootinfo)
212 {
213 	int i;
214 	unsigned long free_mem_pages = 0;
215 	unsigned long free_mem_start = 0;
216 	struct efi_boot_memmap *map = &(efi_bootinfo->mem_map);
217 	efi_memory_desc_t *buffer = *map->map;
218 	efi_memory_desc_t *d = NULL;
219 
220 	/*
221 	 * The 'buffer' contains multiple descriptors that describe memory
222 	 * regions maintained by UEFI. This code records the largest free
223 	 * EFI_CONVENTIONAL_MEMORY region which will be used to set up the
224 	 * memory allocator, so that the memory allocator can work in the
225 	 * largest free continuous memory region.
226 	 */
227 	for (i = 0; i < *(map->map_size); i += *(map->desc_size)) {
228 		d = (efi_memory_desc_t *)(&((u8 *)buffer)[i]);
229 		if (d->type == EFI_CONVENTIONAL_MEMORY) {
230 			if (free_mem_pages < d->num_pages) {
231 				free_mem_pages = d->num_pages;
232 				free_mem_start = d->phys_addr;
233 			}
234 		}
235 	}
236 
237 	if (free_mem_pages == 0) {
238 		return EFI_OUT_OF_RESOURCES;
239 	}
240 
241 	phys_alloc_init(free_mem_start, free_mem_pages << EFI_PAGE_SHIFT);
242 
243 	return EFI_SUCCESS;
244 }
245 
246 static efi_status_t setup_rsdp(efi_bootinfo_t *efi_bootinfo)
247 {
248 	efi_status_t status;
249 	struct rsdp_descriptor *rsdp;
250 
251 	/*
252 	 * RSDP resides in an EFI_ACPI_RECLAIM_MEMORY region, which is not used
253 	 * by kvm-unit-tests x86's memory allocator. So it is not necessary to
254 	 * copy the data structure to another memory region to prevent
255 	 * unintentional overwrite.
256 	 */
257 	status = efi_get_system_config_table(ACPI_TABLE_GUID, (void **)&rsdp);
258 	if (status != EFI_SUCCESS) {
259 		return status;
260 	}
261 
262 	set_efi_rsdp(rsdp);
263 
264 	return EFI_SUCCESS;
265 }
266 
267 /* Defined in cstart64.S or efistart64.S */
268 extern u8 ptl4;
269 extern u8 ptl3;
270 extern u8 ptl2;
271 
272 static void setup_page_table(void)
273 {
274 	pgd_t *curr_pt;
275 	phys_addr_t flags;
276 	int i;
277 
278 	/* Set default flags */
279 	flags = PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
280 
281 	/* Set AMD SEV C-Bit for page table entries */
282 	flags |= get_amd_sev_c_bit_mask();
283 
284 	/* Level 4 */
285 	curr_pt = (pgd_t *)&ptl4;
286 	curr_pt[0] = ((phys_addr_t)&ptl3) | flags;
287 	/* Level 3 */
288 	curr_pt = (pgd_t *)&ptl3;
289 	for (i = 0; i < 4; i++) {
290 		curr_pt[i] = (((phys_addr_t)&ptl2) + i * PAGE_SIZE) | flags;
291 	}
292 	/* Level 2 */
293 	curr_pt = (pgd_t *)&ptl2;
294 	flags |= PT_ACCESSED_MASK | PT_DIRTY_MASK | PT_PAGE_SIZE_MASK | PT_GLOBAL_MASK;
295 	for (i = 0; i < 4 * 512; i++)	{
296 		curr_pt[i] = ((phys_addr_t) i << 21) | flags;
297 	}
298 
299 	if (amd_sev_es_enabled()) {
300 		setup_ghcb_pte((pgd_t *)&ptl4);
301 	}
302 
303 	/* Load 4-level page table */
304 	write_cr3((ulong)&ptl4);
305 }
306 
307 efi_status_t setup_efi(efi_bootinfo_t *efi_bootinfo)
308 {
309 	efi_status_t status;
310 	const char *phase;
311 
312 	status = setup_memory_allocator(efi_bootinfo);
313 	if (status != EFI_SUCCESS) {
314 		printf("Failed to set up memory allocator: ");
315 		switch (status) {
316 		case EFI_OUT_OF_RESOURCES:
317 			printf("No free memory region\n");
318 			break;
319 		default:
320 			printf("Unknown error\n");
321 			break;
322 		}
323 		return status;
324 	}
325 
326 	status = setup_rsdp(efi_bootinfo);
327 	if (status != EFI_SUCCESS) {
328 		printf("Cannot find RSDP in EFI system table\n");
329 		return status;
330 	}
331 
332 	phase = "AMD SEV";
333 	status = setup_amd_sev();
334 
335 	/* Continue if AMD SEV is not supported, but skip SEV-ES setup */
336 	if (status == EFI_SUCCESS) {
337 		phase = "AMD SEV-ES";
338 		status = setup_amd_sev_es();
339 	}
340 
341 	if (status != EFI_SUCCESS && status != EFI_UNSUPPORTED) {
342 		printf("%s setup failed, error = 0x%lx\n", phase, status);
343 		return status;
344 	}
345 
346 	setup_gdt_tss();
347 	/*
348 	 * GS.base, which points at the per-vCPU data, must be configured prior
349 	 * to resetting the APIC, which sets the per-vCPU APIC ops.
350 	 */
351 	setup_segments64();
352 	reset_apic();
353 	setup_idt();
354 	load_idt();
355 	mask_pic_interrupts();
356 	setup_page_table();
357 	enable_apic();
358 	save_id();
359 	ap_init();
360 	enable_x2apic();
361 	smp_init();
362 
363 	return EFI_SUCCESS;
364 }
365 
366 #endif /* CONFIG_EFI */
367 
368 void setup_libcflat(void)
369 {
370 	if (initrd) {
371 		/* environ is currently the only file in the initrd */
372 		u32 size = MIN(initrd_size, ENV_SIZE);
373 		const char *str;
374 
375 		memcpy(env, initrd, size);
376 		setup_env(env, size);
377 		if ((str = getenv("BOOTLOADER")) && atol(str) != 0)
378 			add_setup_arg("bootloader");
379 	}
380 }
381 
382 void save_id(void)
383 {
384 	set_bit(apic_id(), online_cpus);
385 }
386 
387 void ap_start64(void)
388 {
389 	setup_gdt_tss();
390 	reset_apic();
391 	load_idt();
392 	save_id();
393 	enable_apic();
394 	enable_x2apic();
395 	sti();
396 	asm volatile ("nop");
397 	printf("setup: AP %d online\n", apic_id());
398 	atomic_inc(&cpu_online_count);
399 
400 	/* Only the BSP runs the test's main(), APs are given work via IPIs. */
401 	for (;;)
402 		asm volatile("hlt");
403 }
404