xref: /qemu/linux-user/i386/cpu_loop.c (revision 5423e6d3a416342ee5857d02688c30b776574b66)
1 /*
2  *  qemu user cpu loop
3  *
4  *  Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "qemu-common.h"
22 #include "qemu.h"
23 #include "cpu_loop-common.h"
24 #include "signal-common.h"
25 #include "user-mmap.h"
26 
27 /***********************************************************/
28 /* CPUX86 core interface */
29 
30 uint64_t cpu_get_tsc(CPUX86State *env)
31 {
32     return cpu_get_host_ticks();
33 }
34 
35 static void write_dt(void *ptr, unsigned long addr, unsigned long limit,
36               int flags)
37 {
38     unsigned int e1, e2;
39     uint32_t *p;
40     e1 = (addr << 16) | (limit & 0xffff);
41     e2 = ((addr >> 16) & 0xff) | (addr & 0xff000000) | (limit & 0x000f0000);
42     e2 |= flags;
43     p = ptr;
44     p[0] = tswap32(e1);
45     p[1] = tswap32(e2);
46 }
47 
48 static uint64_t *idt_table;
49 #ifdef TARGET_X86_64
50 static void set_gate64(void *ptr, unsigned int type, unsigned int dpl,
51                        uint64_t addr, unsigned int sel)
52 {
53     uint32_t *p, e1, e2;
54     e1 = (addr & 0xffff) | (sel << 16);
55     e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8);
56     p = ptr;
57     p[0] = tswap32(e1);
58     p[1] = tswap32(e2);
59     p[2] = tswap32(addr >> 32);
60     p[3] = 0;
61 }
62 /* only dpl matters as we do only user space emulation */
63 static void set_idt(int n, unsigned int dpl)
64 {
65     set_gate64(idt_table + n * 2, 0, dpl, 0, 0);
66 }
67 #else
68 static void set_gate(void *ptr, unsigned int type, unsigned int dpl,
69                      uint32_t addr, unsigned int sel)
70 {
71     uint32_t *p, e1, e2;
72     e1 = (addr & 0xffff) | (sel << 16);
73     e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8);
74     p = ptr;
75     p[0] = tswap32(e1);
76     p[1] = tswap32(e2);
77 }
78 
79 /* only dpl matters as we do only user space emulation */
80 static void set_idt(int n, unsigned int dpl)
81 {
82     set_gate(idt_table + n, 0, dpl, 0, 0);
83 }
84 #endif
85 
86 static void gen_signal(CPUX86State *env, int sig, int code, abi_ptr addr)
87 {
88     target_siginfo_t info = {
89         .si_signo = sig,
90         .si_code = code,
91         ._sifields._sigfault._addr = addr
92     };
93 
94     queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
95 }
96 
97 #ifdef TARGET_X86_64
98 static bool write_ok_or_segv(CPUX86State *env, abi_ptr addr, size_t len)
99 {
100     /*
101      * For all the vsyscalls, NULL means "don't write anything" not
102      * "write it at address 0".
103      */
104     if (addr == 0 || access_ok(env_cpu(env), VERIFY_WRITE, addr, len)) {
105         return true;
106     }
107 
108     env->error_code = PG_ERROR_W_MASK | PG_ERROR_U_MASK;
109     gen_signal(env, TARGET_SIGSEGV, TARGET_SEGV_MAPERR, addr);
110     return false;
111 }
112 
113 /*
114  * Since v3.1, the kernel traps and emulates the vsyscall page.
115  * Entry points other than the official generate SIGSEGV.
116  */
117 static void emulate_vsyscall(CPUX86State *env)
118 {
119     int syscall;
120     abi_ulong ret;
121     uint64_t caller;
122 
123     /*
124      * Validate the entry point.  We have already validated the page
125      * during translation to get here; now verify the offset.
126      */
127     switch (env->eip & ~TARGET_PAGE_MASK) {
128     case 0x000:
129         syscall = TARGET_NR_gettimeofday;
130         break;
131     case 0x400:
132         syscall = TARGET_NR_time;
133         break;
134     case 0x800:
135         syscall = TARGET_NR_getcpu;
136         break;
137     default:
138         goto sigsegv;
139     }
140 
141     /*
142      * Validate the return address.
143      * Note that the kernel treats this the same as an invalid entry point.
144      */
145     if (get_user_u64(caller, env->regs[R_ESP])) {
146         goto sigsegv;
147     }
148 
149     /*
150      * Validate the the pointer arguments.
151      */
152     switch (syscall) {
153     case TARGET_NR_gettimeofday:
154         if (!write_ok_or_segv(env, env->regs[R_EDI],
155                               sizeof(struct target_timeval)) ||
156             !write_ok_or_segv(env, env->regs[R_ESI],
157                               sizeof(struct target_timezone))) {
158             return;
159         }
160         break;
161     case TARGET_NR_time:
162         if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(abi_long))) {
163             return;
164         }
165         break;
166     case TARGET_NR_getcpu:
167         if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(uint32_t)) ||
168             !write_ok_or_segv(env, env->regs[R_ESI], sizeof(uint32_t))) {
169             return;
170         }
171         break;
172     default:
173         g_assert_not_reached();
174     }
175 
176     /*
177      * Perform the syscall.  None of the vsyscalls should need restarting.
178      */
179     ret = do_syscall(env, syscall, env->regs[R_EDI], env->regs[R_ESI],
180                      env->regs[R_EDX], env->regs[10], env->regs[8],
181                      env->regs[9], 0, 0);
182     g_assert(ret != -TARGET_ERESTARTSYS);
183     g_assert(ret != -TARGET_QEMU_ESIGRETURN);
184     if (ret == -TARGET_EFAULT) {
185         goto sigsegv;
186     }
187     env->regs[R_EAX] = ret;
188 
189     /* Emulate a ret instruction to leave the vsyscall page.  */
190     env->eip = caller;
191     env->regs[R_ESP] += 8;
192     return;
193 
194  sigsegv:
195     /* Like force_sig(SIGSEGV).  */
196     gen_signal(env, TARGET_SIGSEGV, TARGET_SI_KERNEL, 0);
197 }
198 #endif
199 
200 void cpu_loop(CPUX86State *env)
201 {
202     CPUState *cs = env_cpu(env);
203     int trapnr;
204     abi_ulong pc;
205     abi_ulong ret;
206 
207     for(;;) {
208         cpu_exec_start(cs);
209         trapnr = cpu_exec(cs);
210         cpu_exec_end(cs);
211         process_queued_cpu_work(cs);
212 
213         switch(trapnr) {
214         case 0x80:
215             /* linux syscall from int $0x80 */
216             ret = do_syscall(env,
217                              env->regs[R_EAX],
218                              env->regs[R_EBX],
219                              env->regs[R_ECX],
220                              env->regs[R_EDX],
221                              env->regs[R_ESI],
222                              env->regs[R_EDI],
223                              env->regs[R_EBP],
224                              0, 0);
225             if (ret == -TARGET_ERESTARTSYS) {
226                 env->eip -= 2;
227             } else if (ret != -TARGET_QEMU_ESIGRETURN) {
228                 env->regs[R_EAX] = ret;
229             }
230             break;
231 #ifndef TARGET_ABI32
232         case EXCP_SYSCALL:
233             /* linux syscall from syscall instruction */
234             ret = do_syscall(env,
235                              env->regs[R_EAX],
236                              env->regs[R_EDI],
237                              env->regs[R_ESI],
238                              env->regs[R_EDX],
239                              env->regs[10],
240                              env->regs[8],
241                              env->regs[9],
242                              0, 0);
243             if (ret == -TARGET_ERESTARTSYS) {
244                 env->eip -= 2;
245             } else if (ret != -TARGET_QEMU_ESIGRETURN) {
246                 env->regs[R_EAX] = ret;
247             }
248             break;
249 #endif
250 #ifdef TARGET_X86_64
251         case EXCP_VSYSCALL:
252             emulate_vsyscall(env);
253             break;
254 #endif
255         case EXCP0B_NOSEG:
256         case EXCP0C_STACK:
257             gen_signal(env, TARGET_SIGBUS, TARGET_SI_KERNEL, 0);
258             break;
259         case EXCP0D_GPF:
260             /* XXX: potential problem if ABI32 */
261 #ifndef TARGET_X86_64
262             if (env->eflags & VM_MASK) {
263                 handle_vm86_fault(env);
264                 break;
265             }
266 #endif
267             gen_signal(env, TARGET_SIGSEGV, TARGET_SI_KERNEL, 0);
268             break;
269         case EXCP0E_PAGE:
270             gen_signal(env, TARGET_SIGSEGV,
271                        (env->error_code & 1 ?
272                         TARGET_SEGV_ACCERR : TARGET_SEGV_MAPERR),
273                        env->cr[2]);
274             break;
275         case EXCP00_DIVZ:
276 #ifndef TARGET_X86_64
277             if (env->eflags & VM_MASK) {
278                 handle_vm86_trap(env, trapnr);
279                 break;
280             }
281 #endif
282             gen_signal(env, TARGET_SIGFPE, TARGET_FPE_INTDIV, env->eip);
283             break;
284         case EXCP01_DB:
285         case EXCP03_INT3:
286 #ifndef TARGET_X86_64
287             if (env->eflags & VM_MASK) {
288                 handle_vm86_trap(env, trapnr);
289                 break;
290             }
291 #endif
292             if (trapnr == EXCP01_DB) {
293                 gen_signal(env, TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->eip);
294             } else {
295                 gen_signal(env, TARGET_SIGTRAP, TARGET_SI_KERNEL, 0);
296             }
297             break;
298         case EXCP04_INTO:
299         case EXCP05_BOUND:
300 #ifndef TARGET_X86_64
301             if (env->eflags & VM_MASK) {
302                 handle_vm86_trap(env, trapnr);
303                 break;
304             }
305 #endif
306             gen_signal(env, TARGET_SIGSEGV, TARGET_SI_KERNEL, 0);
307             break;
308         case EXCP06_ILLOP:
309             gen_signal(env, TARGET_SIGILL, TARGET_ILL_ILLOPN, env->eip);
310             break;
311         case EXCP_INTERRUPT:
312             /* just indicate that signals should be handled asap */
313             break;
314         case EXCP_DEBUG:
315             gen_signal(env, TARGET_SIGTRAP, TARGET_TRAP_BRKPT, 0);
316             break;
317         case EXCP_ATOMIC:
318             cpu_exec_step_atomic(cs);
319             break;
320         default:
321             pc = env->segs[R_CS].base + env->eip;
322             EXCP_DUMP(env, "qemu: 0x%08lx: unhandled CPU exception 0x%x - aborting\n",
323                       (long)pc, trapnr);
324             abort();
325         }
326         process_pending_signals(env);
327     }
328 }
329 
330 void target_cpu_copy_regs(CPUArchState *env, struct target_pt_regs *regs)
331 {
332     env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK;
333     env->hflags |= HF_PE_MASK | HF_CPL_MASK;
334     if (env->features[FEAT_1_EDX] & CPUID_SSE) {
335         env->cr[4] |= CR4_OSFXSR_MASK;
336         env->hflags |= HF_OSFXSR_MASK;
337     }
338 #ifndef TARGET_ABI32
339     /* enable 64 bit mode if possible */
340     if (!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM)) {
341         fprintf(stderr, "The selected x86 CPU does not support 64 bit mode\n");
342         exit(EXIT_FAILURE);
343     }
344     env->cr[4] |= CR4_PAE_MASK;
345     env->efer |= MSR_EFER_LMA | MSR_EFER_LME;
346     env->hflags |= HF_LMA_MASK;
347 #endif
348 
349     /* flags setup : we activate the IRQs by default as in user mode */
350     env->eflags |= IF_MASK;
351 
352     /* linux register setup */
353 #ifndef TARGET_ABI32
354     env->regs[R_EAX] = regs->rax;
355     env->regs[R_EBX] = regs->rbx;
356     env->regs[R_ECX] = regs->rcx;
357     env->regs[R_EDX] = regs->rdx;
358     env->regs[R_ESI] = regs->rsi;
359     env->regs[R_EDI] = regs->rdi;
360     env->regs[R_EBP] = regs->rbp;
361     env->regs[R_ESP] = regs->rsp;
362     env->eip = regs->rip;
363 #else
364     env->regs[R_EAX] = regs->eax;
365     env->regs[R_EBX] = regs->ebx;
366     env->regs[R_ECX] = regs->ecx;
367     env->regs[R_EDX] = regs->edx;
368     env->regs[R_ESI] = regs->esi;
369     env->regs[R_EDI] = regs->edi;
370     env->regs[R_EBP] = regs->ebp;
371     env->regs[R_ESP] = regs->esp;
372     env->eip = regs->eip;
373 #endif
374 
375     /* linux interrupt setup */
376 #ifndef TARGET_ABI32
377     env->idt.limit = 511;
378 #else
379     env->idt.limit = 255;
380 #endif
381     env->idt.base = target_mmap(0, sizeof(uint64_t) * (env->idt.limit + 1),
382                                 PROT_READ|PROT_WRITE,
383                                 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
384     idt_table = g2h_untagged(env->idt.base);
385     set_idt(0, 0);
386     set_idt(1, 0);
387     set_idt(2, 0);
388     set_idt(3, 3);
389     set_idt(4, 3);
390     set_idt(5, 0);
391     set_idt(6, 0);
392     set_idt(7, 0);
393     set_idt(8, 0);
394     set_idt(9, 0);
395     set_idt(10, 0);
396     set_idt(11, 0);
397     set_idt(12, 0);
398     set_idt(13, 0);
399     set_idt(14, 0);
400     set_idt(15, 0);
401     set_idt(16, 0);
402     set_idt(17, 0);
403     set_idt(18, 0);
404     set_idt(19, 0);
405     set_idt(0x80, 3);
406 
407     /* linux segment setup */
408     {
409         uint64_t *gdt_table;
410         env->gdt.base = target_mmap(0, sizeof(uint64_t) * TARGET_GDT_ENTRIES,
411                                     PROT_READ|PROT_WRITE,
412                                     MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
413         env->gdt.limit = sizeof(uint64_t) * TARGET_GDT_ENTRIES - 1;
414         gdt_table = g2h_untagged(env->gdt.base);
415 #ifdef TARGET_ABI32
416         write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff,
417                  DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
418                  (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT));
419 #else
420         /* 64 bit code segment */
421         write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff,
422                  DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
423                  DESC_L_MASK |
424                  (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT));
425 #endif
426         write_dt(&gdt_table[__USER_DS >> 3], 0, 0xfffff,
427                  DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
428                  (3 << DESC_DPL_SHIFT) | (0x2 << DESC_TYPE_SHIFT));
429     }
430     cpu_x86_load_seg(env, R_CS, __USER_CS);
431     cpu_x86_load_seg(env, R_SS, __USER_DS);
432 #ifdef TARGET_ABI32
433     cpu_x86_load_seg(env, R_DS, __USER_DS);
434     cpu_x86_load_seg(env, R_ES, __USER_DS);
435     cpu_x86_load_seg(env, R_FS, __USER_DS);
436     cpu_x86_load_seg(env, R_GS, __USER_DS);
437     /* This hack makes Wine work... */
438     env->segs[R_FS].selector = 0;
439 #else
440     cpu_x86_load_seg(env, R_DS, 0);
441     cpu_x86_load_seg(env, R_ES, 0);
442     cpu_x86_load_seg(env, R_FS, 0);
443     cpu_x86_load_seg(env, R_GS, 0);
444 #endif
445 }
446