xref: /kvm-unit-tests/lib/x86/desc.c (revision f3f338619e4938c2509f5c691adc1f331b07c203)
1 #include "libcflat.h"
2 #include "desc.h"
3 #include "processor.h"
4 #include "smp.h"
5 #include <setjmp.h>
6 #include "apic-defs.h"
7 
8 /* Boot-related data structures */
9 
10 /* IDT and IDT descriptor */
11 idt_entry_t boot_idt[256] = {0};
12 
13 struct descriptor_table_ptr idt_descr = {
14 	.limit = sizeof(boot_idt) - 1,
15 	.base = (unsigned long)boot_idt,
16 };
17 
18 #ifndef __x86_64__
19 /* GDT, TSS and descriptors */
20 gdt_entry_t gdt[TSS_MAIN / 8 + MAX_TEST_CPUS * 2] = {
21 	{     0, 0, 0, .type_limit_flags = 0x0000}, /* 0x00 null */
22 	{0xffff, 0, 0, .type_limit_flags = 0xcf9b}, /* flat 32-bit code segment */
23 	{0xffff, 0, 0, .type_limit_flags = 0xcf93}, /* flat 32-bit data segment */
24 	{0xffff, 0, 0, .type_limit_flags = 0xcf1b}, /* flat 32-bit code segment, not present */
25 	{     0, 0, 0, .type_limit_flags = 0x0000}, /* TSS for task gates */
26 	{0xffff, 0, 0, .type_limit_flags = 0x8f9b}, /* 16-bit code segment */
27 	{0xffff, 0, 0, .type_limit_flags = 0x8f93}, /* 16-bit data segment */
28 	{0xffff, 0, 0, .type_limit_flags = 0xcffb}, /* 32-bit code segment (user) */
29 	{0xffff, 0, 0, .type_limit_flags = 0xcff3}, /* 32-bit data segment (user) */
30 };
31 
32 tss32_t tss[MAX_TEST_CPUS] = {0};
33 #else
34 gdt_entry_t gdt[TSS_MAIN / 8 + MAX_TEST_CPUS * 2] = {
35 	{     0, 0, 0, .type_limit_flags = 0x0000}, /* 0x00 null */
36 	{0xffff, 0, 0, .type_limit_flags = 0xaf9b}, /* 0x08 64-bit code segment */
37 	{0xffff, 0, 0, .type_limit_flags = 0xcf93}, /* 0x10 32/64-bit data segment */
38 	{0xffff, 0, 0, .type_limit_flags = 0xaf1b}, /* 0x18 64-bit code segment, not present */
39 	{0xffff, 0, 0, .type_limit_flags = 0xcf9b}, /* 0x20 32-bit code segment */
40 	{0xffff, 0, 0, .type_limit_flags = 0x8f9b}, /* 0x28 16-bit code segment */
41 	{0xffff, 0, 0, .type_limit_flags = 0x8f93}, /* 0x30 16-bit data segment */
42 	{0xffff, 0, 0, .type_limit_flags = 0xcffb}, /* 0x38 32-bit code segment (user) */
43 	{0xffff, 0, 0, .type_limit_flags = 0xcff3}, /* 0x40 32/64-bit data segment (user) */
44 	{0xffff, 0, 0, .type_limit_flags = 0xaffb}, /* 0x48 64-bit code segment (user) */
45 };
46 
47 tss64_t tss[MAX_TEST_CPUS] = {0};
48 #endif
49 
50 struct descriptor_table_ptr gdt_descr = {
51 	.limit = sizeof(gdt) - 1,
52 	.base = (unsigned long)gdt,
53 };
54 
55 #ifndef __x86_64__
56 __attribute__((regparm(1)))
57 #endif
58 void do_handle_exception(struct ex_regs *regs);
59 
60 /*
61  * Fill an idt_entry_t or call gate entry, clearing e_sz bytes first.
62  *
63  * This can be used for both IDT entries and call gate entries, since the gate
64  * descriptor layout is identical to idt_entry_t, except for the absence of
65  * .offset2 and .reserved fields. To do so, pass in e_sz according to the gate
66  * descriptor size.
67  */
set_desc_entry(idt_entry_t * e,size_t e_sz,void * addr,u16 sel,u16 type,u16 dpl)68 void set_desc_entry(idt_entry_t *e, size_t e_sz, void *addr,
69 		    u16 sel, u16 type, u16 dpl)
70 {
71 	memset(e, 0, e_sz);
72 	e->offset0 = (unsigned long)addr;
73 	e->selector = sel;
74 	e->ist = 0;
75 	e->type = type;
76 	e->dpl = dpl;
77 	e->p = 1;
78 	e->offset1 = (unsigned long)addr >> 16;
79 #ifdef __x86_64__
80 	if (e_sz == sizeof(*e))
81 		e->offset2 = (unsigned long)addr >> 32;
82 #endif
83 }
84 
set_idt_entry(int vec,void * addr,int dpl)85 void set_idt_entry(int vec, void *addr, int dpl)
86 {
87 	idt_entry_t *e = &boot_idt[vec];
88 	set_desc_entry(e, sizeof *e, addr, read_cs(), 14, dpl);
89 }
90 
set_idt_dpl(int vec,u16 dpl)91 void set_idt_dpl(int vec, u16 dpl)
92 {
93 	idt_entry_t *e = &boot_idt[vec];
94 	e->dpl = dpl;
95 }
96 
set_idt_sel(int vec,u16 sel)97 void set_idt_sel(int vec, u16 sel)
98 {
99 	idt_entry_t *e = &boot_idt[vec];
100 	e->selector = sel;
101 }
102 
103 struct ex_record {
104 	unsigned long rip;
105 	unsigned long handler;
106 };
107 
108 extern struct ex_record exception_table_start, exception_table_end;
109 
exception_mnemonic(int vector)110 const char* exception_mnemonic(int vector)
111 {
112 	switch(vector) {
113 #define VEC(v) case v##_VECTOR: return "#" #v
114 	VEC(DE);
115 	VEC(DB);
116 	VEC(NMI);
117 	VEC(BP);
118 	VEC(OF);
119 	VEC(BR);
120 	VEC(UD);
121 	VEC(NM);
122 	VEC(DF);
123 	VEC(TS);
124 	VEC(NP);
125 	VEC(SS);
126 	VEC(GP);
127 	VEC(PF);
128 	VEC(MF);
129 	VEC(AC);
130 	VEC(MC);
131 	VEC(XM);
132 	VEC(VE);
133 	VEC(CP);
134 	VEC(HV);
135 	VEC(VC);
136 	VEC(SX);
137 	default: return "#??";
138 #undef VEC
139 	}
140 }
141 
unhandled_exception(struct ex_regs * regs,bool cpu)142 void unhandled_exception(struct ex_regs *regs, bool cpu)
143 {
144 	printf("Unhandled %sexception %ld %s at ip %016lx\n",
145 	       cpu ? "cpu " : "", regs->vector,
146 	       exception_mnemonic(regs->vector), regs->rip);
147 	if (regs->vector == 14)
148 		printf("PF at %#lx addr %#lx\n", regs->rip, read_cr2());
149 
150 	printf("error_code=%04lx      rflags=%08lx      cs=%08lx\n"
151 	       "rax=%016lx rcx=%016lx rdx=%016lx rbx=%016lx\n"
152 	       "rbp=%016lx rsi=%016lx rdi=%016lx\n"
153 #ifdef __x86_64__
154 	       " r8=%016lx  r9=%016lx r10=%016lx r11=%016lx\n"
155 	       "r12=%016lx r13=%016lx r14=%016lx r15=%016lx\n"
156 #endif
157 	       "cr0=%016lx cr2=%016lx cr3=%016lx cr4=%016lx\n"
158 #ifdef __x86_64__
159 	       "cr8=%016lx\n"
160 #endif
161 	       ,
162 	       regs->error_code, regs->rflags, regs->cs,
163 	       regs->rax, regs->rcx, regs->rdx, regs->rbx,
164 	       regs->rbp, regs->rsi, regs->rdi,
165 #ifdef __x86_64__
166 	       regs->r8, regs->r9, regs->r10, regs->r11,
167 	       regs->r12, regs->r13, regs->r14, regs->r15,
168 #endif
169 	       read_cr0(), read_cr2(), read_cr3(), read_cr4()
170 #ifdef __x86_64__
171 	       , read_cr8()
172 #endif
173 	);
174 	dump_frame_stack((void*) regs->rip, (void*) regs->rbp);
175 	abort();
176 }
177 
check_exception_table(struct ex_regs * regs)178 static void check_exception_table(struct ex_regs *regs)
179 {
180 	struct ex_record *ex;
181 
182 	this_cpu_write_exception_vector(regs->vector);
183 	this_cpu_write_exception_rflags_rf((regs->rflags >> 16) & 1);
184 	this_cpu_write_exception_error_code(regs->error_code);
185 
186 	for (ex = &exception_table_start; ex != &exception_table_end; ++ex) {
187 		if (ex->rip == regs->rip) {
188 			regs->rip = ex->handler;
189 			return;
190 		}
191 	}
192 	unhandled_exception(regs, false);
193 }
194 
195 static handler exception_handlers[32];
196 
handle_exception(u8 v,handler fn)197 handler handle_exception(u8 v, handler fn)
198 {
199 	handler old;
200 
201 	old = exception_handlers[v];
202 	if (v < 32)
203 		exception_handlers[v] = fn;
204 	return old;
205 }
206 
207 #ifndef __x86_64__
208 __attribute__((regparm(1)))
209 #endif
do_handle_exception(struct ex_regs * regs)210 void do_handle_exception(struct ex_regs *regs)
211 {
212 	if (regs->vector < 32 && exception_handlers[regs->vector]) {
213 		exception_handlers[regs->vector](regs);
214 		return;
215 	}
216 	unhandled_exception(regs, true);
217 }
218 
219 #define EX(NAME, N) extern char NAME##_fault;	\
220 	asm (".pushsection .text \n\t"		\
221 	     #NAME"_fault: \n\t"		\
222 	     "push"W" $0 \n\t"			\
223 	     "push"W" $"#N" \n\t"		\
224 	     "jmp __handle_exception \n\t"	\
225 	     ".popsection")
226 
227 #define EX_E(NAME, N) extern char NAME##_fault;	\
228 	asm (".pushsection .text \n\t"		\
229 	     #NAME"_fault: \n\t"		\
230 	     "push"W" $"#N" \n\t"		\
231 	     "jmp __handle_exception \n\t"	\
232 	     ".popsection")
233 
234 EX(de, 0);
235 EX(db, 1);
236 EX(nmi, 2);
237 EX(bp, 3);
238 EX(of, 4);
239 EX(br, 5);
240 EX(ud, 6);
241 EX(nm, 7);
242 EX_E(df, 8);
243 EX_E(ts, 10);
244 EX_E(np, 11);
245 EX_E(ss, 12);
246 EX_E(gp, 13);
247 EX_E(pf, 14);
248 EX(mf, 16);
249 EX_E(ac, 17);
250 EX(mc, 18);
251 EX(xm, 19);
252 EX_E(cp, 21);
253 
254 asm (".pushsection .text \n\t"
255      "__handle_exception: \n\t"
256 #ifdef __x86_64__
257      "push %r15; push %r14; push %r13; push %r12 \n\t"
258      "push %r11; push %r10; push %r9; push %r8 \n\t"
259 #endif
260      "push %"R "di; push %"R "si; push %"R "bp; sub $"S", %"R "sp \n\t"
261      "push %"R "bx; push %"R "dx; push %"R "cx; push %"R "ax \n\t"
262 #ifdef __x86_64__
263      "mov %"R "sp, %"R "di \n\t"
264 #else
265      "mov %"R "sp, %"R "ax \n\t"
266 #endif
267      "call do_handle_exception \n\t"
268      "pop %"R "ax; pop %"R "cx; pop %"R "dx; pop %"R "bx \n\t"
269      "add $"S", %"R "sp; pop %"R "bp; pop %"R "si; pop %"R "di \n\t"
270 #ifdef __x86_64__
271      "pop %r8; pop %r9; pop %r10; pop %r11 \n\t"
272      "pop %r12; pop %r13; pop %r14; pop %r15 \n\t"
273 #endif
274      "add $"S", %"R "sp \n\t"
275      "add $"S", %"R "sp \n\t"
276      "iret"W" \n\t"
277      ".popsection");
278 
279 static void *idt_handlers[32] = {
280 	[0] = &de_fault,
281 	[1] = &db_fault,
282 	[2] = &nmi_fault,
283 	[3] = &bp_fault,
284 	[4] = &of_fault,
285 	[5] = &br_fault,
286 	[6] = &ud_fault,
287 	[7] = &nm_fault,
288 	[8] = &df_fault,
289 	[10] = &ts_fault,
290 	[11] = &np_fault,
291 	[12] = &ss_fault,
292 	[13] = &gp_fault,
293 	[14] = &pf_fault,
294 	[16] = &mf_fault,
295 	[17] = &ac_fault,
296 	[18] = &mc_fault,
297 	[19] = &xm_fault,
298 	[21] = &cp_fault,
299 };
300 
301 bool is_fep_available;
302 
__is_fep_available(void)303 static bool __is_fep_available(void)
304 {
305 	/*
306 	 * Use the non-FEP ASM_TRY() as KVM will inject a #UD on the prefix
307 	 * itself if forced emulation is not available.
308 	 */
309 	asm goto(ASM_TRY("%l[fep_unavailable]")
310 		 KVM_FEP "nop\n\t"
311 		 ::: "memory" : fep_unavailable);
312 	return true;
313 fep_unavailable:
314 	return false;
315 }
316 
setup_idt(void)317 void setup_idt(void)
318 {
319 	int i;
320 
321 	for (i = 0; i < 32; i++) {
322 		if (!idt_handlers[i])
323 			continue;
324 
325                 set_idt_entry(i, idt_handlers[i], 0);
326                 handle_exception(i, check_exception_table);
327 	}
328 
329 	load_idt();
330 
331 	/*
332 	 * Detect support for forced emulation *after* loading the IDT, as this
333 	 * will #UD if FEP is unavailable.
334 	 */
335 	is_fep_available = __is_fep_available();
336 }
337 
load_idt(void)338 void load_idt(void)
339 {
340 	lidt(&idt_descr);
341 }
342 
exception_vector(void)343 unsigned exception_vector(void)
344 {
345 	return this_cpu_read_exception_vector();
346 }
347 
exception_error_code(void)348 unsigned exception_error_code(void)
349 {
350 	return this_cpu_read_exception_error_code();
351 }
352 
exception_rflags_rf(void)353 bool exception_rflags_rf(void)
354 {
355 	return this_cpu_read_exception_rflags_rf() & 1;
356 }
357 
358 static char intr_alt_stack[4096];
359 
set_gdt_entry_base(int sel,unsigned long base)360 void set_gdt_entry_base(int sel, unsigned long base)
361 {
362 	gdt_entry_t *entry = &gdt[sel >> 3];
363 
364 	/* Setup the descriptor base address */
365 	entry->base1 = (base & 0xFFFF);
366 	entry->base2 = (base >> 16) & 0xFF;
367 	entry->base3 = (base >> 24) & 0xFF;
368 
369 #ifdef __x86_64__
370 	if (!entry->s) {
371 		struct system_desc64 *entry16 = (struct system_desc64 *)entry;
372 		entry16->zero = 0;
373 		entry16->base4 = base >> 32;
374 	}
375 #endif
376 }
377 
set_gdt_entry(int sel,unsigned long base,u32 limit,u8 type,u8 flags)378 void set_gdt_entry(int sel, unsigned long base,  u32 limit, u8 type, u8 flags)
379 {
380 	gdt_entry_t *entry = &gdt[sel >> 3];
381 
382 	/* Setup the descriptor limits, type and flags */
383 	entry->limit1 = (limit & 0xFFFF);
384 	entry->type_limit_flags = ((limit & 0xF0000) >> 8) | ((flags & 0xF0) << 8) | type;
385 	set_gdt_entry_base(sel, base);
386 }
387 
clear_tss_busy(int sel)388 void clear_tss_busy(int sel)
389 {
390 	gdt_entry_t *entry = &gdt[sel >> 3];
391 
392 	entry->type_limit_flags &= ~0xFF;
393 	entry->type_limit_flags |= 0x89;
394 }
395 
load_gdt_tss(size_t tss_offset)396 void load_gdt_tss(size_t tss_offset)
397 {
398 	lgdt(&gdt_descr);
399 	ltr(tss_offset);
400 }
401 
402 #ifndef __x86_64__
set_gdt_task_gate(u16 sel,u16 tss_sel)403 void set_gdt_task_gate(u16 sel, u16 tss_sel)
404 {
405 	set_gdt_entry(sel, tss_sel, 0, 0x85, 0); // task, present
406 }
407 
set_idt_task_gate(int vec,u16 sel)408 void set_idt_task_gate(int vec, u16 sel)
409 {
410 	idt_entry_t *e = &boot_idt[vec];
411 
412 	memset(e, 0, sizeof *e);
413 
414 	e->selector = sel;
415 	e->ist = 0;
416 	e->type = 5;
417 	e->dpl = 0;
418 	e->p = 1;
419 }
420 
421 /*
422  * 0 - main task
423  * 1 - interrupt task
424  */
425 
426 tss32_t tss_intr;
427 
setup_tss32(void)428 void setup_tss32(void)
429 {
430 	u16 desc_size = sizeof(tss32_t);
431 
432 	tss[0].cr3 = read_cr3();
433 	tss_intr.cr3 = read_cr3();
434 	tss_intr.ss0 = tss_intr.ss1 = tss_intr.ss2 = 0x10;
435 	tss_intr.esp = tss_intr.esp0 = tss_intr.esp1 = tss_intr.esp2 =
436 		(u32)intr_alt_stack + 4096;
437 	tss_intr.cs = 0x08;
438 	tss_intr.ds = tss_intr.es = tss_intr.fs = tss_intr.ss = 0x10;
439 	tss_intr.gs = read_gs();
440 	tss_intr.iomap_base = (u16)desc_size;
441 	set_gdt_entry(TSS_INTR, (u32)&tss_intr, desc_size - 1, 0x89, 0);
442 }
443 
set_intr_task_gate(int e,void * fn)444 void set_intr_task_gate(int e, void *fn)
445 {
446 	tss_intr.eip = (u32)fn;
447 	set_idt_task_gate(e, TSS_INTR);
448 }
449 
setup_alt_stack(void)450 void setup_alt_stack(void)
451 {
452 	setup_tss32();
453 }
454 
set_intr_alt_stack(int e,void * fn)455 void set_intr_alt_stack(int e, void *fn)
456 {
457 	set_intr_task_gate(e, fn);
458 }
459 
print_current_tss_info(void)460 void print_current_tss_info(void)
461 {
462 	u16 tr = str();
463 
464 	if (tr != TSS_MAIN && tr != TSS_INTR)
465 		printf("Unknown TSS %x\n", tr);
466 	else
467 		printf("TR=%x (%s) Main TSS back link %x. Intr TSS back link %x\n",
468 		       tr, tr ? "interrupt" : "main", tss[0].prev, tss_intr.prev);
469 }
470 #else
set_intr_alt_stack(int e,void * addr)471 void set_intr_alt_stack(int e, void *addr)
472 {
473 	set_idt_entry(e, addr, 0);
474 	boot_idt[e].ist = 1;
475 }
476 
setup_alt_stack(void)477 void setup_alt_stack(void)
478 {
479 	tss[0].ist1 = (u64)intr_alt_stack + 4096;
480 }
481 #endif
482 
483 static bool exception;
484 static jmp_buf *exception_jmpbuf;
485 
exception_handler_longjmp(void)486 static void exception_handler_longjmp(void)
487 {
488 	longjmp(*exception_jmpbuf, 1);
489 }
490 
exception_handler(struct ex_regs * regs)491 static void exception_handler(struct ex_regs *regs)
492 {
493 	/* longjmp must happen after iret, so do not do it now.  */
494 	exception = true;
495 	regs->rip = (unsigned long)&exception_handler_longjmp;
496 	regs->cs = read_cs();
497 }
498 
test_for_exception(unsigned int ex,void (* trigger_func)(void * data),void * data)499 bool test_for_exception(unsigned int ex, void (*trigger_func)(void *data),
500 			void *data)
501 {
502 	handler old;
503 	jmp_buf jmpbuf;
504 	int ret;
505 
506 	old = handle_exception(ex, exception_handler);
507 	ret = set_exception_jmpbuf(jmpbuf);
508 	if (ret == 0)
509 		trigger_func(data);
510 	handle_exception(ex, old);
511 	return ret;
512 }
513 
__set_exception_jmpbuf(jmp_buf * addr)514 void __set_exception_jmpbuf(jmp_buf *addr)
515 {
516 	exception_jmpbuf = addr;
517 }
518 
get_gdt_entry(u16 sel)519 gdt_entry_t *get_gdt_entry(u16 sel)
520 {
521 	struct descriptor_table_ptr gdt_ptr;
522 	gdt_entry_t *gdt;
523 
524 	sgdt(&gdt_ptr);
525 	gdt = (gdt_entry_t *)gdt_ptr.base;
526 	return &gdt[sel / 8];
527 }
528 
get_tss_descr(void)529 gdt_entry_t *get_tss_descr(void)
530 {
531 	return get_gdt_entry(str());
532 }
533 
get_ldt_descr(void)534 gdt_entry_t *get_ldt_descr(void)
535 {
536 	return get_gdt_entry(sldt());
537 }
538 
get_gdt_entry_base(gdt_entry_t * entry)539 unsigned long get_gdt_entry_base(gdt_entry_t *entry)
540 {
541 	unsigned long base;
542 	base = entry->base1 | ((u32)entry->base2 << 16) | ((u32)entry->base3 << 24);
543 #ifdef __x86_64__
544 	if (!entry->s) {
545 		base |= (u64)((struct system_desc64 *)entry)->base4 << 32;
546 	}
547 #endif
548 	return base;
549 }
550 
get_gdt_entry_limit(gdt_entry_t * entry)551 unsigned long get_gdt_entry_limit(gdt_entry_t *entry)
552 {
553 	unsigned long limit;
554 	limit = entry->limit1 | ((u32)entry->limit2 << 16);
555 	if (entry->g) {
556 		limit = (limit << 12) | 0xFFF;
557 	}
558 	return limit;
559 }
560