xref: /kvm-unit-tests/lib/x86/setup.c (revision 1542cd7b194d665f4d7812f549c0168ee6ed2c80)
1 /*
2  * Initialize machine setup information
3  *
4  * Copyright (C) 2017, Red Hat Inc, Andrew Jones <drjones@redhat.com>
5  * Copyright (C) 2021, Google Inc, Zixuan Wang <zixuanwang@google.com>
6  *
7  * This work is licensed under the terms of the GNU LGPL, version 2.
8  */
9 #include "libcflat.h"
10 #include "fwcfg.h"
11 #include "alloc_phys.h"
12 #include "argv.h"
13 #include "desc.h"
14 #include "apic.h"
15 #include "apic-defs.h"
16 #include "asm/setup.h"
17 
18 extern char edata;
19 
20 struct mbi_bootinfo {
21 	u32 flags;
22 	u32 mem_lower;
23 	u32 mem_upper;
24 	u32 boot_device;
25 	u32 cmdline;
26 	u32 mods_count;
27 	u32 mods_addr;
28 	u32 reserved[4];   /* 28-43 */
29 	u32 mmap_length;
30 	u32 mmap_addr;
31 	u32 reserved0[3];  /* 52-63 */
32 	u32 bootloader;
33 	u32 reserved1[5];  /* 68-87 */
34 	u32 size;
35 };
36 
37 struct mbi_module {
38 	u32 start, end;
39 	u32 cmdline;
40 	u32 unused;
41 };
42 
43 struct mbi_mem {
44 	u32 size;
45 	u64 base_addr;
46 	u64 length;
47 	u32 type;
48 } __attribute__((packed));
49 
50 #define ENV_SIZE 16384
51 
52 void setup_env(char *env, int size);
53 void setup_multiboot(struct mbi_bootinfo *bootinfo);
54 void setup_libcflat(void);
55 
56 char *initrd;
57 u32 initrd_size;
58 
59 static char env[ENV_SIZE];
60 static struct mbi_bootinfo *bootinfo;
61 
62 #define HUGEPAGE_SIZE (1 << 21)
63 
64 #ifdef __x86_64__
65 void find_highmem(void)
66 {
67 	/* Memory above 4 GB is only supported on 64-bit systems.  */
68 	if (!(bootinfo->flags & 64))
69 	    	return;
70 
71 	u64 upper_end = bootinfo->mem_upper * 1024ull;
72 	u64 best_start = (uintptr_t) &edata;
73 	u64 best_end = upper_end;
74 	u64 max_end = fwcfg_get_u64(FW_CFG_MAX_RAM);
75 	if (max_end == 0)
76 		max_end = -1ull;
77 	bool found = false;
78 
79 	uintptr_t mmap = bootinfo->mmap_addr;
80 	while (mmap < bootinfo->mmap_addr + bootinfo->mmap_length) {
81 		struct mbi_mem *mem = (void *)mmap;
82 		mmap += mem->size + 4;
83 		if (mem->type != 1)
84 			continue;
85 		if (mem->base_addr <= (uintptr_t) &edata ||
86 		    (mem->base_addr <= upper_end && mem->base_addr + mem->length <= upper_end))
87 			continue;
88 		if (mem->length < best_end - best_start)
89 			continue;
90 		if (mem->base_addr >= max_end)
91 			continue;
92 		best_start = mem->base_addr;
93 		best_end = mem->base_addr + mem->length;
94 		if (best_end > max_end)
95 			best_end = max_end;
96 		found = true;
97 	}
98 
99 	if (found) {
100 		best_start = (best_start + HUGEPAGE_SIZE - 1) & -HUGEPAGE_SIZE;
101 		best_end = best_end & -HUGEPAGE_SIZE;
102 		phys_alloc_init(best_start, best_end - best_start);
103 	}
104 }
105 
106 /* Setup TSS for the current processor, and return TSS offset within GDT */
107 unsigned long setup_tss(u8 *stacktop)
108 {
109 	u32 id;
110 	tss64_t *tss_entry;
111 
112 	id = pre_boot_apic_id();
113 
114 	/* Runtime address of current TSS */
115 	tss_entry = &tss[id];
116 
117 	/* Update TSS */
118 	memset((void *)tss_entry, 0, sizeof(tss64_t));
119 
120 	/* Update TSS descriptors; each descriptor takes up 2 entries */
121 	set_gdt_entry(TSS_MAIN + id * 16, (unsigned long)tss_entry, 0xffff, 0x89, 0);
122 
123 	return TSS_MAIN + id * 16;
124 }
125 #else
126 /* Setup TSS for the current processor, and return TSS offset within GDT */
127 unsigned long setup_tss(u8 *stacktop)
128 {
129 	u32 id;
130 	tss32_t *tss_entry;
131 
132 	id = pre_boot_apic_id();
133 
134 	/* Runtime address of current TSS */
135 	tss_entry = &tss[id];
136 
137 	/* Update TSS */
138 	memset((void *)tss_entry, 0, sizeof(tss32_t));
139 	tss_entry->ss0 = KERNEL_DS;
140 
141 	/* Update descriptors for TSS and percpu data segment.  */
142 	set_gdt_entry(TSS_MAIN + id * 8,
143 		      (unsigned long)tss_entry, 0xffff, 0x89, 0);
144 	set_gdt_entry(TSS_MAIN + MAX_TEST_CPUS * 8 + id * 8,
145 		      (unsigned long)stacktop - 4096, 0xfffff, 0x93, 0xc0);
146 
147 	return TSS_MAIN + id * 8;
148 }
149 #endif
150 
151 void setup_multiboot(struct mbi_bootinfo *bi)
152 {
153 	struct mbi_module *mods;
154 
155 	bootinfo = bi;
156 
157 	u64 best_start = (uintptr_t) &edata;
158 	u64 best_end = bootinfo->mem_upper * 1024ull;
159 	phys_alloc_init(best_start, best_end - best_start);
160 
161 	if (bootinfo->mods_count != 1)
162 		return;
163 
164 	mods = (struct mbi_module *)(uintptr_t) bootinfo->mods_addr;
165 
166 	initrd = (char *)(uintptr_t) mods->start;
167 	initrd_size = mods->end - mods->start;
168 }
169 
170 #ifdef CONFIG_EFI
171 
172 static struct percpu_data __percpu_data[MAX_TEST_CPUS];
173 
174 static void setup_segments64(void)
175 {
176 	/* Update data segments */
177 	write_ds(KERNEL_DS);
178 	write_es(KERNEL_DS);
179 	write_fs(KERNEL_DS);
180 	write_gs(KERNEL_DS);
181 	write_ss(KERNEL_DS);
182 
183 	/* Setup percpu base */
184 	wrmsr(MSR_GS_BASE, (u64)&__percpu_data[pre_boot_apic_id()]);
185 
186 	/*
187 	 * Update the code segment by putting it on the stack before the return
188 	 * address, then doing a far return: this will use the new code segment
189 	 * along with the address.
190 	 */
191 	asm volatile("pushq %1\n\t"
192 		     "lea 1f(%%rip), %0\n\t"
193 		     "pushq %0\n\t"
194 		     "lretq\n\t"
195 		     "1:"
196 		     :: "r" ((u64)KERNEL_DS), "i" (KERNEL_CS));
197 }
198 
199 static efi_status_t setup_memory_allocator(efi_bootinfo_t *efi_bootinfo)
200 {
201 	int i;
202 	unsigned long free_mem_pages = 0;
203 	unsigned long free_mem_start = 0;
204 	struct efi_boot_memmap *map = &(efi_bootinfo->mem_map);
205 	efi_memory_desc_t *buffer = *map->map;
206 	efi_memory_desc_t *d = NULL;
207 
208 	/*
209 	 * The 'buffer' contains multiple descriptors that describe memory
210 	 * regions maintained by UEFI. This code records the largest free
211 	 * EFI_CONVENTIONAL_MEMORY region which will be used to set up the
212 	 * memory allocator, so that the memory allocator can work in the
213 	 * largest free continuous memory region.
214 	 */
215 	for (i = 0; i < *(map->map_size); i += *(map->desc_size)) {
216 		d = (efi_memory_desc_t *)(&((u8 *)buffer)[i]);
217 		if (d->type == EFI_CONVENTIONAL_MEMORY) {
218 			if (free_mem_pages < d->num_pages) {
219 				free_mem_pages = d->num_pages;
220 				free_mem_start = d->phys_addr;
221 			}
222 		}
223 	}
224 
225 	if (free_mem_pages == 0) {
226 		return EFI_OUT_OF_RESOURCES;
227 	}
228 
229 	phys_alloc_init(free_mem_start, free_mem_pages << EFI_PAGE_SHIFT);
230 
231 	return EFI_SUCCESS;
232 }
233 
234 static efi_status_t setup_rsdp(efi_bootinfo_t *efi_bootinfo)
235 {
236 	efi_status_t status;
237 	struct rsdp_descriptor *rsdp;
238 
239 	/*
240 	 * RSDP resides in an EFI_ACPI_RECLAIM_MEMORY region, which is not used
241 	 * by kvm-unit-tests x86's memory allocator. So it is not necessary to
242 	 * copy the data structure to another memory region to prevent
243 	 * unintentional overwrite.
244 	 */
245 	status = efi_get_system_config_table(ACPI_TABLE_GUID, (void **)&rsdp);
246 	if (status != EFI_SUCCESS) {
247 		return status;
248 	}
249 
250 	set_efi_rsdp(rsdp);
251 
252 	return EFI_SUCCESS;
253 }
254 
255 /* Defined in cstart64.S or efistart64.S */
256 extern u8 ptl4;
257 extern u8 ptl3;
258 extern u8 ptl2;
259 
260 static void setup_page_table(void)
261 {
262 	pgd_t *curr_pt;
263 	phys_addr_t flags;
264 	int i;
265 
266 	/* Set default flags */
267 	flags = PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
268 
269 	/* Set AMD SEV C-Bit for page table entries */
270 	flags |= get_amd_sev_c_bit_mask();
271 
272 	/* Level 4 */
273 	curr_pt = (pgd_t *)&ptl4;
274 	curr_pt[0] = ((phys_addr_t)&ptl3) | flags;
275 	/* Level 3 */
276 	curr_pt = (pgd_t *)&ptl3;
277 	for (i = 0; i < 4; i++) {
278 		curr_pt[i] = (((phys_addr_t)&ptl2) + i * PAGE_SIZE) | flags;
279 	}
280 	/* Level 2 */
281 	curr_pt = (pgd_t *)&ptl2;
282 	flags |= PT_ACCESSED_MASK | PT_DIRTY_MASK | PT_PAGE_SIZE_MASK | PT_GLOBAL_MASK;
283 	for (i = 0; i < 4 * 512; i++)	{
284 		curr_pt[i] = ((phys_addr_t) i << 21) | flags;
285 	}
286 
287 	if (amd_sev_es_enabled()) {
288 		setup_ghcb_pte((pgd_t *)&ptl4);
289 	}
290 
291 	/* Load 4-level page table */
292 	write_cr3((ulong)&ptl4);
293 }
294 
295 static void setup_gdt_tss(void)
296 {
297 	size_t tss_offset;
298 
299 	/* 64-bit setup_tss does not use the stacktop argument.  */
300 	tss_offset = setup_tss(NULL);
301 	load_gdt_tss(tss_offset);
302 }
303 
304 efi_status_t setup_efi(efi_bootinfo_t *efi_bootinfo)
305 {
306 	efi_status_t status;
307 	const char *phase;
308 
309 	status = setup_memory_allocator(efi_bootinfo);
310 	if (status != EFI_SUCCESS) {
311 		printf("Failed to set up memory allocator: ");
312 		switch (status) {
313 		case EFI_OUT_OF_RESOURCES:
314 			printf("No free memory region\n");
315 			break;
316 		default:
317 			printf("Unknown error\n");
318 			break;
319 		}
320 		return status;
321 	}
322 
323 	status = setup_rsdp(efi_bootinfo);
324 	if (status != EFI_SUCCESS) {
325 		printf("Cannot find RSDP in EFI system table\n");
326 		return status;
327 	}
328 
329 	phase = "AMD SEV";
330 	status = setup_amd_sev();
331 
332 	/* Continue if AMD SEV is not supported, but skip SEV-ES setup */
333 	if (status == EFI_SUCCESS) {
334 		phase = "AMD SEV-ES";
335 		status = setup_amd_sev_es();
336 	}
337 
338 	if (status != EFI_SUCCESS && status != EFI_UNSUPPORTED) {
339 		printf("%s setup failed, error = 0x%lx\n", phase, status);
340 		return status;
341 	}
342 
343 	setup_gdt_tss();
344 	/*
345 	 * GS.base, which points at the per-vCPU data, must be configured prior
346 	 * to resetting the APIC, which sets the per-vCPU APIC ops.
347 	 */
348 	setup_segments64();
349 	reset_apic();
350 	setup_idt();
351 	load_idt();
352 	mask_pic_interrupts();
353 	setup_page_table();
354 	enable_apic();
355 	ap_init();
356 	enable_x2apic();
357 	smp_init();
358 
359 	return EFI_SUCCESS;
360 }
361 
362 #endif /* CONFIG_EFI */
363 
364 void setup_libcflat(void)
365 {
366 	if (initrd) {
367 		/* environ is currently the only file in the initrd */
368 		u32 size = MIN(initrd_size, ENV_SIZE);
369 		const char *str;
370 
371 		memcpy(env, initrd, size);
372 		setup_env(env, size);
373 		if ((str = getenv("BOOTLOADER")) && atol(str) != 0)
374 			add_setup_arg("bootloader");
375 	}
376 }
377