1 /* 2 * qemu user cpu loop 3 * 4 * Copyright (c) 2003-2008 Fabrice Bellard 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu-common.h" 22 #include "qemu.h" 23 #include "cpu_loop-common.h" 24 #include "signal-common.h" 25 #include "user-mmap.h" 26 27 /***********************************************************/ 28 /* CPUX86 core interface */ 29 30 uint64_t cpu_get_tsc(CPUX86State *env) 31 { 32 return cpu_get_host_ticks(); 33 } 34 35 static void write_dt(void *ptr, unsigned long addr, unsigned long limit, 36 int flags) 37 { 38 unsigned int e1, e2; 39 uint32_t *p; 40 e1 = (addr << 16) | (limit & 0xffff); 41 e2 = ((addr >> 16) & 0xff) | (addr & 0xff000000) | (limit & 0x000f0000); 42 e2 |= flags; 43 p = ptr; 44 p[0] = tswap32(e1); 45 p[1] = tswap32(e2); 46 } 47 48 static uint64_t *idt_table; 49 #ifdef TARGET_X86_64 50 static void set_gate64(void *ptr, unsigned int type, unsigned int dpl, 51 uint64_t addr, unsigned int sel) 52 { 53 uint32_t *p, e1, e2; 54 e1 = (addr & 0xffff) | (sel << 16); 55 e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8); 56 p = ptr; 57 p[0] = tswap32(e1); 58 p[1] = tswap32(e2); 59 p[2] = tswap32(addr >> 32); 60 p[3] = 0; 61 } 62 /* only dpl matters as we do only user space emulation */ 63 static void set_idt(int n, unsigned int dpl) 64 { 65 set_gate64(idt_table + n * 2, 0, dpl, 0, 0); 66 } 67 #else 68 static void set_gate(void *ptr, unsigned int type, unsigned int dpl, 69 uint32_t addr, unsigned int sel) 70 { 71 uint32_t *p, e1, e2; 72 e1 = (addr & 0xffff) | (sel << 16); 73 e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8); 74 p = ptr; 75 p[0] = tswap32(e1); 76 p[1] = tswap32(e2); 77 } 78 79 /* only dpl matters as we do only user space emulation */ 80 static void set_idt(int n, unsigned int dpl) 81 { 82 set_gate(idt_table + n, 0, dpl, 0, 0); 83 } 84 #endif 85 86 static void gen_signal(CPUX86State *env, int sig, int code, abi_ptr addr) 87 { 88 target_siginfo_t info = { 89 .si_signo = sig, 90 .si_code = code, 91 ._sifields._sigfault._addr = addr 92 }; 93 94 queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); 95 } 96 97 #ifdef TARGET_X86_64 98 static bool write_ok_or_segv(CPUX86State *env, abi_ptr addr, size_t len) 99 { 100 /* 101 * For all the vsyscalls, NULL means "don't write anything" not 102 * "write it at address 0". 103 */ 104 if (addr == 0 || access_ok(env_cpu(env), VERIFY_WRITE, addr, len)) { 105 return true; 106 } 107 108 env->error_code = PG_ERROR_W_MASK | PG_ERROR_U_MASK; 109 gen_signal(env, TARGET_SIGSEGV, TARGET_SEGV_MAPERR, addr); 110 return false; 111 } 112 113 /* 114 * Since v3.1, the kernel traps and emulates the vsyscall page. 115 * Entry points other than the official generate SIGSEGV. 116 */ 117 static void emulate_vsyscall(CPUX86State *env) 118 { 119 int syscall; 120 abi_ulong ret; 121 uint64_t caller; 122 123 /* 124 * Validate the entry point. We have already validated the page 125 * during translation to get here; now verify the offset. 126 */ 127 switch (env->eip & ~TARGET_PAGE_MASK) { 128 case 0x000: 129 syscall = TARGET_NR_gettimeofday; 130 break; 131 case 0x400: 132 syscall = TARGET_NR_time; 133 break; 134 case 0x800: 135 syscall = TARGET_NR_getcpu; 136 break; 137 default: 138 goto sigsegv; 139 } 140 141 /* 142 * Validate the return address. 143 * Note that the kernel treats this the same as an invalid entry point. 144 */ 145 if (get_user_u64(caller, env->regs[R_ESP])) { 146 goto sigsegv; 147 } 148 149 /* 150 * Validate the the pointer arguments. 151 */ 152 switch (syscall) { 153 case TARGET_NR_gettimeofday: 154 if (!write_ok_or_segv(env, env->regs[R_EDI], 155 sizeof(struct target_timeval)) || 156 !write_ok_or_segv(env, env->regs[R_ESI], 157 sizeof(struct target_timezone))) { 158 return; 159 } 160 break; 161 case TARGET_NR_time: 162 if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(abi_long))) { 163 return; 164 } 165 break; 166 case TARGET_NR_getcpu: 167 if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(uint32_t)) || 168 !write_ok_or_segv(env, env->regs[R_ESI], sizeof(uint32_t))) { 169 return; 170 } 171 break; 172 default: 173 g_assert_not_reached(); 174 } 175 176 /* 177 * Perform the syscall. None of the vsyscalls should need restarting. 178 */ 179 ret = do_syscall(env, syscall, env->regs[R_EDI], env->regs[R_ESI], 180 env->regs[R_EDX], env->regs[10], env->regs[8], 181 env->regs[9], 0, 0); 182 g_assert(ret != -TARGET_ERESTARTSYS); 183 g_assert(ret != -TARGET_QEMU_ESIGRETURN); 184 if (ret == -TARGET_EFAULT) { 185 goto sigsegv; 186 } 187 env->regs[R_EAX] = ret; 188 189 /* Emulate a ret instruction to leave the vsyscall page. */ 190 env->eip = caller; 191 env->regs[R_ESP] += 8; 192 return; 193 194 sigsegv: 195 /* Like force_sig(SIGSEGV). */ 196 gen_signal(env, TARGET_SIGSEGV, TARGET_SI_KERNEL, 0); 197 } 198 #endif 199 200 void cpu_loop(CPUX86State *env) 201 { 202 CPUState *cs = env_cpu(env); 203 int trapnr; 204 abi_ulong pc; 205 abi_ulong ret; 206 207 for(;;) { 208 cpu_exec_start(cs); 209 trapnr = cpu_exec(cs); 210 cpu_exec_end(cs); 211 process_queued_cpu_work(cs); 212 213 switch(trapnr) { 214 case 0x80: 215 /* linux syscall from int $0x80 */ 216 ret = do_syscall(env, 217 env->regs[R_EAX], 218 env->regs[R_EBX], 219 env->regs[R_ECX], 220 env->regs[R_EDX], 221 env->regs[R_ESI], 222 env->regs[R_EDI], 223 env->regs[R_EBP], 224 0, 0); 225 if (ret == -TARGET_ERESTARTSYS) { 226 env->eip -= 2; 227 } else if (ret != -TARGET_QEMU_ESIGRETURN) { 228 env->regs[R_EAX] = ret; 229 } 230 break; 231 #ifndef TARGET_ABI32 232 case EXCP_SYSCALL: 233 /* linux syscall from syscall instruction */ 234 ret = do_syscall(env, 235 env->regs[R_EAX], 236 env->regs[R_EDI], 237 env->regs[R_ESI], 238 env->regs[R_EDX], 239 env->regs[10], 240 env->regs[8], 241 env->regs[9], 242 0, 0); 243 if (ret == -TARGET_ERESTARTSYS) { 244 env->eip -= 2; 245 } else if (ret != -TARGET_QEMU_ESIGRETURN) { 246 env->regs[R_EAX] = ret; 247 } 248 break; 249 #endif 250 #ifdef TARGET_X86_64 251 case EXCP_VSYSCALL: 252 emulate_vsyscall(env); 253 break; 254 #endif 255 case EXCP0B_NOSEG: 256 case EXCP0C_STACK: 257 gen_signal(env, TARGET_SIGBUS, TARGET_SI_KERNEL, 0); 258 break; 259 case EXCP0D_GPF: 260 /* XXX: potential problem if ABI32 */ 261 #ifndef TARGET_X86_64 262 if (env->eflags & VM_MASK) { 263 handle_vm86_fault(env); 264 break; 265 } 266 #endif 267 gen_signal(env, TARGET_SIGSEGV, TARGET_SI_KERNEL, 0); 268 break; 269 case EXCP0E_PAGE: 270 gen_signal(env, TARGET_SIGSEGV, 271 (env->error_code & 1 ? 272 TARGET_SEGV_ACCERR : TARGET_SEGV_MAPERR), 273 env->cr[2]); 274 break; 275 case EXCP00_DIVZ: 276 #ifndef TARGET_X86_64 277 if (env->eflags & VM_MASK) { 278 handle_vm86_trap(env, trapnr); 279 break; 280 } 281 #endif 282 gen_signal(env, TARGET_SIGFPE, TARGET_FPE_INTDIV, env->eip); 283 break; 284 case EXCP01_DB: 285 case EXCP03_INT3: 286 #ifndef TARGET_X86_64 287 if (env->eflags & VM_MASK) { 288 handle_vm86_trap(env, trapnr); 289 break; 290 } 291 #endif 292 if (trapnr == EXCP01_DB) { 293 gen_signal(env, TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->eip); 294 } else { 295 gen_signal(env, TARGET_SIGTRAP, TARGET_SI_KERNEL, 0); 296 } 297 break; 298 case EXCP04_INTO: 299 case EXCP05_BOUND: 300 #ifndef TARGET_X86_64 301 if (env->eflags & VM_MASK) { 302 handle_vm86_trap(env, trapnr); 303 break; 304 } 305 #endif 306 gen_signal(env, TARGET_SIGSEGV, TARGET_SI_KERNEL, 0); 307 break; 308 case EXCP06_ILLOP: 309 gen_signal(env, TARGET_SIGILL, TARGET_ILL_ILLOPN, env->eip); 310 break; 311 case EXCP_INTERRUPT: 312 /* just indicate that signals should be handled asap */ 313 break; 314 case EXCP_DEBUG: 315 gen_signal(env, TARGET_SIGTRAP, TARGET_TRAP_BRKPT, 0); 316 break; 317 case EXCP_ATOMIC: 318 cpu_exec_step_atomic(cs); 319 break; 320 default: 321 pc = env->segs[R_CS].base + env->eip; 322 EXCP_DUMP(env, "qemu: 0x%08lx: unhandled CPU exception 0x%x - aborting\n", 323 (long)pc, trapnr); 324 abort(); 325 } 326 process_pending_signals(env); 327 } 328 } 329 330 void target_cpu_copy_regs(CPUArchState *env, struct target_pt_regs *regs) 331 { 332 env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK; 333 env->hflags |= HF_PE_MASK | HF_CPL_MASK; 334 if (env->features[FEAT_1_EDX] & CPUID_SSE) { 335 env->cr[4] |= CR4_OSFXSR_MASK; 336 env->hflags |= HF_OSFXSR_MASK; 337 } 338 #ifndef TARGET_ABI32 339 /* enable 64 bit mode if possible */ 340 if (!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM)) { 341 fprintf(stderr, "The selected x86 CPU does not support 64 bit mode\n"); 342 exit(EXIT_FAILURE); 343 } 344 env->cr[4] |= CR4_PAE_MASK; 345 env->efer |= MSR_EFER_LMA | MSR_EFER_LME; 346 env->hflags |= HF_LMA_MASK; 347 #endif 348 349 /* flags setup : we activate the IRQs by default as in user mode */ 350 env->eflags |= IF_MASK; 351 352 /* linux register setup */ 353 #ifndef TARGET_ABI32 354 env->regs[R_EAX] = regs->rax; 355 env->regs[R_EBX] = regs->rbx; 356 env->regs[R_ECX] = regs->rcx; 357 env->regs[R_EDX] = regs->rdx; 358 env->regs[R_ESI] = regs->rsi; 359 env->regs[R_EDI] = regs->rdi; 360 env->regs[R_EBP] = regs->rbp; 361 env->regs[R_ESP] = regs->rsp; 362 env->eip = regs->rip; 363 #else 364 env->regs[R_EAX] = regs->eax; 365 env->regs[R_EBX] = regs->ebx; 366 env->regs[R_ECX] = regs->ecx; 367 env->regs[R_EDX] = regs->edx; 368 env->regs[R_ESI] = regs->esi; 369 env->regs[R_EDI] = regs->edi; 370 env->regs[R_EBP] = regs->ebp; 371 env->regs[R_ESP] = regs->esp; 372 env->eip = regs->eip; 373 #endif 374 375 /* linux interrupt setup */ 376 #ifndef TARGET_ABI32 377 env->idt.limit = 511; 378 #else 379 env->idt.limit = 255; 380 #endif 381 env->idt.base = target_mmap(0, sizeof(uint64_t) * (env->idt.limit + 1), 382 PROT_READ|PROT_WRITE, 383 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); 384 idt_table = g2h_untagged(env->idt.base); 385 set_idt(0, 0); 386 set_idt(1, 0); 387 set_idt(2, 0); 388 set_idt(3, 3); 389 set_idt(4, 3); 390 set_idt(5, 0); 391 set_idt(6, 0); 392 set_idt(7, 0); 393 set_idt(8, 0); 394 set_idt(9, 0); 395 set_idt(10, 0); 396 set_idt(11, 0); 397 set_idt(12, 0); 398 set_idt(13, 0); 399 set_idt(14, 0); 400 set_idt(15, 0); 401 set_idt(16, 0); 402 set_idt(17, 0); 403 set_idt(18, 0); 404 set_idt(19, 0); 405 set_idt(0x80, 3); 406 407 /* linux segment setup */ 408 { 409 uint64_t *gdt_table; 410 env->gdt.base = target_mmap(0, sizeof(uint64_t) * TARGET_GDT_ENTRIES, 411 PROT_READ|PROT_WRITE, 412 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); 413 env->gdt.limit = sizeof(uint64_t) * TARGET_GDT_ENTRIES - 1; 414 gdt_table = g2h_untagged(env->gdt.base); 415 #ifdef TARGET_ABI32 416 write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff, 417 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK | 418 (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT)); 419 #else 420 /* 64 bit code segment */ 421 write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff, 422 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK | 423 DESC_L_MASK | 424 (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT)); 425 #endif 426 write_dt(&gdt_table[__USER_DS >> 3], 0, 0xfffff, 427 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK | 428 (3 << DESC_DPL_SHIFT) | (0x2 << DESC_TYPE_SHIFT)); 429 } 430 cpu_x86_load_seg(env, R_CS, __USER_CS); 431 cpu_x86_load_seg(env, R_SS, __USER_DS); 432 #ifdef TARGET_ABI32 433 cpu_x86_load_seg(env, R_DS, __USER_DS); 434 cpu_x86_load_seg(env, R_ES, __USER_DS); 435 cpu_x86_load_seg(env, R_FS, __USER_DS); 436 cpu_x86_load_seg(env, R_GS, __USER_DS); 437 /* This hack makes Wine work... */ 438 env->segs[R_FS].selector = 0; 439 #else 440 cpu_x86_load_seg(env, R_DS, 0); 441 cpu_x86_load_seg(env, R_ES, 0); 442 cpu_x86_load_seg(env, R_FS, 0); 443 cpu_x86_load_seg(env, R_GS, 0); 444 #endif 445 } 446