1 #include "libcflat.h"
2 #include "desc.h"
3 #include "processor.h"
4 #include "smp.h"
5 #include <setjmp.h>
6 #include "apic-defs.h"
7
8 /* Boot-related data structures */
9
10 /* IDT and IDT descriptor */
11 idt_entry_t boot_idt[256] = {0};
12
13 struct descriptor_table_ptr idt_descr = {
14 .limit = sizeof(boot_idt) - 1,
15 .base = (unsigned long)boot_idt,
16 };
17
18 #ifndef __x86_64__
19 /* GDT, TSS and descriptors */
20 gdt_entry_t gdt[TSS_MAIN / 8 + MAX_TEST_CPUS * 2] = {
21 { 0, 0, 0, .type_limit_flags = 0x0000}, /* 0x00 null */
22 {0xffff, 0, 0, .type_limit_flags = 0xcf9b}, /* flat 32-bit code segment */
23 {0xffff, 0, 0, .type_limit_flags = 0xcf93}, /* flat 32-bit data segment */
24 {0xffff, 0, 0, .type_limit_flags = 0xcf1b}, /* flat 32-bit code segment, not present */
25 { 0, 0, 0, .type_limit_flags = 0x0000}, /* TSS for task gates */
26 {0xffff, 0, 0, .type_limit_flags = 0x8f9b}, /* 16-bit code segment */
27 {0xffff, 0, 0, .type_limit_flags = 0x8f93}, /* 16-bit data segment */
28 {0xffff, 0, 0, .type_limit_flags = 0xcffb}, /* 32-bit code segment (user) */
29 {0xffff, 0, 0, .type_limit_flags = 0xcff3}, /* 32-bit data segment (user) */
30 };
31
32 tss32_t tss[MAX_TEST_CPUS] = {0};
33 #else
34 gdt_entry_t gdt[TSS_MAIN / 8 + MAX_TEST_CPUS * 2] = {
35 { 0, 0, 0, .type_limit_flags = 0x0000}, /* 0x00 null */
36 {0xffff, 0, 0, .type_limit_flags = 0xaf9b}, /* 0x08 64-bit code segment */
37 {0xffff, 0, 0, .type_limit_flags = 0xcf93}, /* 0x10 32/64-bit data segment */
38 {0xffff, 0, 0, .type_limit_flags = 0xaf1b}, /* 0x18 64-bit code segment, not present */
39 {0xffff, 0, 0, .type_limit_flags = 0xcf9b}, /* 0x20 32-bit code segment */
40 {0xffff, 0, 0, .type_limit_flags = 0x8f9b}, /* 0x28 16-bit code segment */
41 {0xffff, 0, 0, .type_limit_flags = 0x8f93}, /* 0x30 16-bit data segment */
42 {0xffff, 0, 0, .type_limit_flags = 0xcffb}, /* 0x38 32-bit code segment (user) */
43 {0xffff, 0, 0, .type_limit_flags = 0xcff3}, /* 0x40 32/64-bit data segment (user) */
44 {0xffff, 0, 0, .type_limit_flags = 0xaffb}, /* 0x48 64-bit code segment (user) */
45 };
46
47 tss64_t tss[MAX_TEST_CPUS] = {0};
48 #endif
49
50 struct descriptor_table_ptr gdt_descr = {
51 .limit = sizeof(gdt) - 1,
52 .base = (unsigned long)gdt,
53 };
54
55 #ifndef __x86_64__
56 __attribute__((regparm(1)))
57 #endif
58 void do_handle_exception(struct ex_regs *regs);
59
60 /*
61 * Fill an idt_entry_t or call gate entry, clearing e_sz bytes first.
62 *
63 * This can be used for both IDT entries and call gate entries, since the gate
64 * descriptor layout is identical to idt_entry_t, except for the absence of
65 * .offset2 and .reserved fields. To do so, pass in e_sz according to the gate
66 * descriptor size.
67 */
set_desc_entry(idt_entry_t * e,size_t e_sz,void * addr,u16 sel,u16 type,u16 dpl)68 void set_desc_entry(idt_entry_t *e, size_t e_sz, void *addr,
69 u16 sel, u16 type, u16 dpl)
70 {
71 memset(e, 0, e_sz);
72 e->offset0 = (unsigned long)addr;
73 e->selector = sel;
74 e->ist = 0;
75 e->type = type;
76 e->dpl = dpl;
77 e->p = 1;
78 e->offset1 = (unsigned long)addr >> 16;
79 #ifdef __x86_64__
80 if (e_sz == sizeof(*e))
81 e->offset2 = (unsigned long)addr >> 32;
82 #endif
83 }
84
set_idt_entry(int vec,void * addr,int dpl)85 void set_idt_entry(int vec, void *addr, int dpl)
86 {
87 idt_entry_t *e = &boot_idt[vec];
88 set_desc_entry(e, sizeof *e, addr, read_cs(), 14, dpl);
89 }
90
set_idt_dpl(int vec,u16 dpl)91 void set_idt_dpl(int vec, u16 dpl)
92 {
93 idt_entry_t *e = &boot_idt[vec];
94 e->dpl = dpl;
95 }
96
set_idt_sel(int vec,u16 sel)97 void set_idt_sel(int vec, u16 sel)
98 {
99 idt_entry_t *e = &boot_idt[vec];
100 e->selector = sel;
101 }
102
103 struct ex_record {
104 unsigned long rip;
105 unsigned long handler;
106 };
107
108 extern struct ex_record exception_table_start, exception_table_end;
109
exception_mnemonic(int vector)110 const char* exception_mnemonic(int vector)
111 {
112 switch(vector) {
113 #define VEC(v) case v##_VECTOR: return "#" #v
114 VEC(DE);
115 VEC(DB);
116 VEC(NMI);
117 VEC(BP);
118 VEC(OF);
119 VEC(BR);
120 VEC(UD);
121 VEC(NM);
122 VEC(DF);
123 VEC(TS);
124 VEC(NP);
125 VEC(SS);
126 VEC(GP);
127 VEC(PF);
128 VEC(MF);
129 VEC(AC);
130 VEC(MC);
131 VEC(XM);
132 VEC(VE);
133 VEC(CP);
134 VEC(HV);
135 VEC(VC);
136 VEC(SX);
137 default: return "#??";
138 #undef VEC
139 }
140 }
141
unhandled_exception(struct ex_regs * regs,bool cpu)142 void unhandled_exception(struct ex_regs *regs, bool cpu)
143 {
144 printf("Unhandled %sexception %ld %s at ip %016lx\n",
145 cpu ? "cpu " : "", regs->vector,
146 exception_mnemonic(regs->vector), regs->rip);
147 if (regs->vector == 14)
148 printf("PF at %#lx addr %#lx\n", regs->rip, read_cr2());
149
150 printf("error_code=%04lx rflags=%08lx cs=%08lx\n"
151 "rax=%016lx rcx=%016lx rdx=%016lx rbx=%016lx\n"
152 "rbp=%016lx rsi=%016lx rdi=%016lx\n"
153 #ifdef __x86_64__
154 " r8=%016lx r9=%016lx r10=%016lx r11=%016lx\n"
155 "r12=%016lx r13=%016lx r14=%016lx r15=%016lx\n"
156 #endif
157 "cr0=%016lx cr2=%016lx cr3=%016lx cr4=%016lx\n"
158 #ifdef __x86_64__
159 "cr8=%016lx\n"
160 #endif
161 ,
162 regs->error_code, regs->rflags, regs->cs,
163 regs->rax, regs->rcx, regs->rdx, regs->rbx,
164 regs->rbp, regs->rsi, regs->rdi,
165 #ifdef __x86_64__
166 regs->r8, regs->r9, regs->r10, regs->r11,
167 regs->r12, regs->r13, regs->r14, regs->r15,
168 #endif
169 read_cr0(), read_cr2(), read_cr3(), read_cr4()
170 #ifdef __x86_64__
171 , read_cr8()
172 #endif
173 );
174 dump_frame_stack((void*) regs->rip, (void*) regs->rbp);
175 abort();
176 }
177
check_exception_table(struct ex_regs * regs)178 static void check_exception_table(struct ex_regs *regs)
179 {
180 struct ex_record *ex;
181
182 this_cpu_write_exception_vector(regs->vector);
183 this_cpu_write_exception_rflags_rf((regs->rflags >> 16) & 1);
184 this_cpu_write_exception_error_code(regs->error_code);
185
186 for (ex = &exception_table_start; ex != &exception_table_end; ++ex) {
187 if (ex->rip == regs->rip) {
188 regs->rip = ex->handler;
189 return;
190 }
191 }
192 unhandled_exception(regs, false);
193 }
194
195 static handler exception_handlers[32];
196
handle_exception(u8 v,handler fn)197 handler handle_exception(u8 v, handler fn)
198 {
199 handler old;
200
201 old = exception_handlers[v];
202 if (v < 32)
203 exception_handlers[v] = fn;
204 return old;
205 }
206
207 #ifndef __x86_64__
208 __attribute__((regparm(1)))
209 #endif
do_handle_exception(struct ex_regs * regs)210 void do_handle_exception(struct ex_regs *regs)
211 {
212 if (regs->vector < 32 && exception_handlers[regs->vector]) {
213 exception_handlers[regs->vector](regs);
214 return;
215 }
216 unhandled_exception(regs, true);
217 }
218
219 #define EX(NAME, N) extern char NAME##_fault; \
220 asm (".pushsection .text \n\t" \
221 #NAME"_fault: \n\t" \
222 "push"W" $0 \n\t" \
223 "push"W" $"#N" \n\t" \
224 "jmp __handle_exception \n\t" \
225 ".popsection")
226
227 #define EX_E(NAME, N) extern char NAME##_fault; \
228 asm (".pushsection .text \n\t" \
229 #NAME"_fault: \n\t" \
230 "push"W" $"#N" \n\t" \
231 "jmp __handle_exception \n\t" \
232 ".popsection")
233
234 EX(de, 0);
235 EX(db, 1);
236 EX(nmi, 2);
237 EX(bp, 3);
238 EX(of, 4);
239 EX(br, 5);
240 EX(ud, 6);
241 EX(nm, 7);
242 EX_E(df, 8);
243 EX_E(ts, 10);
244 EX_E(np, 11);
245 EX_E(ss, 12);
246 EX_E(gp, 13);
247 EX_E(pf, 14);
248 EX(mf, 16);
249 EX_E(ac, 17);
250 EX(mc, 18);
251 EX(xm, 19);
252 EX_E(cp, 21);
253
254 asm (".pushsection .text \n\t"
255 "__handle_exception: \n\t"
256 #ifdef __x86_64__
257 "push %r15; push %r14; push %r13; push %r12 \n\t"
258 "push %r11; push %r10; push %r9; push %r8 \n\t"
259 #endif
260 "push %"R "di; push %"R "si; push %"R "bp; sub $"S", %"R "sp \n\t"
261 "push %"R "bx; push %"R "dx; push %"R "cx; push %"R "ax \n\t"
262 #ifdef __x86_64__
263 "mov %"R "sp, %"R "di \n\t"
264 #else
265 "mov %"R "sp, %"R "ax \n\t"
266 #endif
267 "call do_handle_exception \n\t"
268 "pop %"R "ax; pop %"R "cx; pop %"R "dx; pop %"R "bx \n\t"
269 "add $"S", %"R "sp; pop %"R "bp; pop %"R "si; pop %"R "di \n\t"
270 #ifdef __x86_64__
271 "pop %r8; pop %r9; pop %r10; pop %r11 \n\t"
272 "pop %r12; pop %r13; pop %r14; pop %r15 \n\t"
273 #endif
274 "add $"S", %"R "sp \n\t"
275 "add $"S", %"R "sp \n\t"
276 "iret"W" \n\t"
277 ".popsection");
278
279 static void *idt_handlers[32] = {
280 [0] = &de_fault,
281 [1] = &db_fault,
282 [2] = &nmi_fault,
283 [3] = &bp_fault,
284 [4] = &of_fault,
285 [5] = &br_fault,
286 [6] = &ud_fault,
287 [7] = &nm_fault,
288 [8] = &df_fault,
289 [10] = &ts_fault,
290 [11] = &np_fault,
291 [12] = &ss_fault,
292 [13] = &gp_fault,
293 [14] = &pf_fault,
294 [16] = &mf_fault,
295 [17] = &ac_fault,
296 [18] = &mc_fault,
297 [19] = &xm_fault,
298 [21] = &cp_fault,
299 };
300
301 bool is_fep_available;
302
__is_fep_available(void)303 static bool __is_fep_available(void)
304 {
305 /*
306 * Use the non-FEP ASM_TRY() as KVM will inject a #UD on the prefix
307 * itself if forced emulation is not available.
308 */
309 asm goto(ASM_TRY("%l[fep_unavailable]")
310 KVM_FEP "nop\n\t"
311 ::: "memory" : fep_unavailable);
312 return true;
313 fep_unavailable:
314 return false;
315 }
316
setup_idt(void)317 void setup_idt(void)
318 {
319 int i;
320
321 for (i = 0; i < 32; i++) {
322 if (!idt_handlers[i])
323 continue;
324
325 set_idt_entry(i, idt_handlers[i], 0);
326 handle_exception(i, check_exception_table);
327 }
328
329 load_idt();
330
331 /*
332 * Detect support for forced emulation *after* loading the IDT, as this
333 * will #UD if FEP is unavailable.
334 */
335 is_fep_available = __is_fep_available();
336 }
337
load_idt(void)338 void load_idt(void)
339 {
340 lidt(&idt_descr);
341 }
342
exception_vector(void)343 unsigned exception_vector(void)
344 {
345 return this_cpu_read_exception_vector();
346 }
347
exception_error_code(void)348 unsigned exception_error_code(void)
349 {
350 return this_cpu_read_exception_error_code();
351 }
352
exception_rflags_rf(void)353 bool exception_rflags_rf(void)
354 {
355 return this_cpu_read_exception_rflags_rf() & 1;
356 }
357
358 static char intr_alt_stack[4096];
359
set_gdt_entry_base(int sel,unsigned long base)360 void set_gdt_entry_base(int sel, unsigned long base)
361 {
362 gdt_entry_t *entry = &gdt[sel >> 3];
363
364 /* Setup the descriptor base address */
365 entry->base1 = (base & 0xFFFF);
366 entry->base2 = (base >> 16) & 0xFF;
367 entry->base3 = (base >> 24) & 0xFF;
368
369 #ifdef __x86_64__
370 if (!entry->s) {
371 struct system_desc64 *entry16 = (struct system_desc64 *)entry;
372 entry16->zero = 0;
373 entry16->base4 = base >> 32;
374 }
375 #endif
376 }
377
set_gdt_entry(int sel,unsigned long base,u32 limit,u8 type,u8 flags)378 void set_gdt_entry(int sel, unsigned long base, u32 limit, u8 type, u8 flags)
379 {
380 gdt_entry_t *entry = &gdt[sel >> 3];
381
382 /* Setup the descriptor limits, type and flags */
383 entry->limit1 = (limit & 0xFFFF);
384 entry->type_limit_flags = ((limit & 0xF0000) >> 8) | ((flags & 0xF0) << 8) | type;
385 set_gdt_entry_base(sel, base);
386 }
387
clear_tss_busy(int sel)388 void clear_tss_busy(int sel)
389 {
390 gdt_entry_t *entry = &gdt[sel >> 3];
391
392 entry->type_limit_flags &= ~0xFF;
393 entry->type_limit_flags |= 0x89;
394 }
395
load_gdt_tss(size_t tss_offset)396 void load_gdt_tss(size_t tss_offset)
397 {
398 lgdt(&gdt_descr);
399 ltr(tss_offset);
400 }
401
402 #ifndef __x86_64__
set_gdt_task_gate(u16 sel,u16 tss_sel)403 void set_gdt_task_gate(u16 sel, u16 tss_sel)
404 {
405 set_gdt_entry(sel, tss_sel, 0, 0x85, 0); // task, present
406 }
407
set_idt_task_gate(int vec,u16 sel)408 void set_idt_task_gate(int vec, u16 sel)
409 {
410 idt_entry_t *e = &boot_idt[vec];
411
412 memset(e, 0, sizeof *e);
413
414 e->selector = sel;
415 e->ist = 0;
416 e->type = 5;
417 e->dpl = 0;
418 e->p = 1;
419 }
420
421 /*
422 * 0 - main task
423 * 1 - interrupt task
424 */
425
426 tss32_t tss_intr;
427
setup_tss32(void)428 void setup_tss32(void)
429 {
430 u16 desc_size = sizeof(tss32_t);
431
432 tss[0].cr3 = read_cr3();
433 tss_intr.cr3 = read_cr3();
434 tss_intr.ss0 = tss_intr.ss1 = tss_intr.ss2 = 0x10;
435 tss_intr.esp = tss_intr.esp0 = tss_intr.esp1 = tss_intr.esp2 =
436 (u32)intr_alt_stack + 4096;
437 tss_intr.cs = 0x08;
438 tss_intr.ds = tss_intr.es = tss_intr.fs = tss_intr.ss = 0x10;
439 tss_intr.gs = read_gs();
440 tss_intr.iomap_base = (u16)desc_size;
441 set_gdt_entry(TSS_INTR, (u32)&tss_intr, desc_size - 1, 0x89, 0);
442 }
443
set_intr_task_gate(int e,void * fn)444 void set_intr_task_gate(int e, void *fn)
445 {
446 tss_intr.eip = (u32)fn;
447 set_idt_task_gate(e, TSS_INTR);
448 }
449
setup_alt_stack(void)450 void setup_alt_stack(void)
451 {
452 setup_tss32();
453 }
454
set_intr_alt_stack(int e,void * fn)455 void set_intr_alt_stack(int e, void *fn)
456 {
457 set_intr_task_gate(e, fn);
458 }
459
print_current_tss_info(void)460 void print_current_tss_info(void)
461 {
462 u16 tr = str();
463
464 if (tr != TSS_MAIN && tr != TSS_INTR)
465 printf("Unknown TSS %x\n", tr);
466 else
467 printf("TR=%x (%s) Main TSS back link %x. Intr TSS back link %x\n",
468 tr, tr ? "interrupt" : "main", tss[0].prev, tss_intr.prev);
469 }
470 #else
set_intr_alt_stack(int e,void * addr)471 void set_intr_alt_stack(int e, void *addr)
472 {
473 set_idt_entry(e, addr, 0);
474 boot_idt[e].ist = 1;
475 }
476
setup_alt_stack(void)477 void setup_alt_stack(void)
478 {
479 tss[0].ist1 = (u64)intr_alt_stack + 4096;
480 }
481 #endif
482
483 static bool exception;
484 static jmp_buf *exception_jmpbuf;
485
exception_handler_longjmp(void)486 static void exception_handler_longjmp(void)
487 {
488 longjmp(*exception_jmpbuf, 1);
489 }
490
exception_handler(struct ex_regs * regs)491 static void exception_handler(struct ex_regs *regs)
492 {
493 /* longjmp must happen after iret, so do not do it now. */
494 exception = true;
495 regs->rip = (unsigned long)&exception_handler_longjmp;
496 regs->cs = read_cs();
497 }
498
test_for_exception(unsigned int ex,void (* trigger_func)(void * data),void * data)499 bool test_for_exception(unsigned int ex, void (*trigger_func)(void *data),
500 void *data)
501 {
502 handler old;
503 jmp_buf jmpbuf;
504 int ret;
505
506 old = handle_exception(ex, exception_handler);
507 ret = set_exception_jmpbuf(jmpbuf);
508 if (ret == 0)
509 trigger_func(data);
510 handle_exception(ex, old);
511 return ret;
512 }
513
__set_exception_jmpbuf(jmp_buf * addr)514 void __set_exception_jmpbuf(jmp_buf *addr)
515 {
516 exception_jmpbuf = addr;
517 }
518
get_gdt_entry(u16 sel)519 gdt_entry_t *get_gdt_entry(u16 sel)
520 {
521 struct descriptor_table_ptr gdt_ptr;
522 gdt_entry_t *gdt;
523
524 sgdt(&gdt_ptr);
525 gdt = (gdt_entry_t *)gdt_ptr.base;
526 return &gdt[sel / 8];
527 }
528
get_tss_descr(void)529 gdt_entry_t *get_tss_descr(void)
530 {
531 return get_gdt_entry(str());
532 }
533
get_ldt_descr(void)534 gdt_entry_t *get_ldt_descr(void)
535 {
536 return get_gdt_entry(sldt());
537 }
538
get_gdt_entry_base(gdt_entry_t * entry)539 unsigned long get_gdt_entry_base(gdt_entry_t *entry)
540 {
541 unsigned long base;
542 base = entry->base1 | ((u32)entry->base2 << 16) | ((u32)entry->base3 << 24);
543 #ifdef __x86_64__
544 if (!entry->s) {
545 base |= (u64)((struct system_desc64 *)entry)->base4 << 32;
546 }
547 #endif
548 return base;
549 }
550
get_gdt_entry_limit(gdt_entry_t * entry)551 unsigned long get_gdt_entry_limit(gdt_entry_t *entry)
552 {
553 unsigned long limit;
554 limit = entry->limit1 | ((u32)entry->limit2 << 16);
555 if (entry->g) {
556 limit = (limit << 12) | 0xFFF;
557 }
558 return limit;
559 }
560