1 /* 2 * emulator main execution loop 3 * 4 * Copyright (c) 2003-2005 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu/qemu-print.h" 22 #include "qapi/error.h" 23 #include "qapi/type-helpers.h" 24 #include "hw/core/cpu.h" 25 #include "accel/tcg/cpu-ldst.h" 26 #include "accel/tcg/cpu-ops.h" 27 #include "accel/tcg/helper-retaddr.h" 28 #include "trace.h" 29 #include "disas/disas.h" 30 #include "exec/cpu-common.h" 31 #include "exec/cpu-interrupt.h" 32 #include "exec/page-protection.h" 33 #include "exec/mmap-lock.h" 34 #include "exec/translation-block.h" 35 #include "tcg/tcg.h" 36 #include "qemu/atomic.h" 37 #include "qemu/rcu.h" 38 #include "exec/log.h" 39 #include "qemu/main-loop.h" 40 #include "cpu.h" 41 #include "exec/icount.h" 42 #include "exec/replay-core.h" 43 #include "system/tcg.h" 44 #include "exec/helper-proto-common.h" 45 #include "tb-jmp-cache.h" 46 #include "tb-hash.h" 47 #include "tb-context.h" 48 #include "tb-internal.h" 49 #include "internal-common.h" 50 51 /* -icount align implementation. */ 52 53 typedef struct SyncClocks { 54 int64_t diff_clk; 55 int64_t last_cpu_icount; 56 int64_t realtime_clock; 57 } SyncClocks; 58 59 #if !defined(CONFIG_USER_ONLY) 60 /* Allow the guest to have a max 3ms advance. 61 * The difference between the 2 clocks could therefore 62 * oscillate around 0. 63 */ 64 #define VM_CLOCK_ADVANCE 3000000 65 #define THRESHOLD_REDUCE 1.5 66 #define MAX_DELAY_PRINT_RATE 2000000000LL 67 #define MAX_NB_PRINTS 100 68 69 int64_t max_delay; 70 int64_t max_advance; 71 72 static void align_clocks(SyncClocks *sc, CPUState *cpu) 73 { 74 int64_t cpu_icount; 75 76 if (!icount_align_option) { 77 return; 78 } 79 80 cpu_icount = cpu->icount_extra + cpu->neg.icount_decr.u16.low; 81 sc->diff_clk += icount_to_ns(sc->last_cpu_icount - cpu_icount); 82 sc->last_cpu_icount = cpu_icount; 83 84 if (sc->diff_clk > VM_CLOCK_ADVANCE) { 85 #ifndef _WIN32 86 struct timespec sleep_delay, rem_delay; 87 sleep_delay.tv_sec = sc->diff_clk / 1000000000LL; 88 sleep_delay.tv_nsec = sc->diff_clk % 1000000000LL; 89 if (nanosleep(&sleep_delay, &rem_delay) < 0) { 90 sc->diff_clk = rem_delay.tv_sec * 1000000000LL + rem_delay.tv_nsec; 91 } else { 92 sc->diff_clk = 0; 93 } 94 #else 95 Sleep(sc->diff_clk / SCALE_MS); 96 sc->diff_clk = 0; 97 #endif 98 } 99 } 100 101 static void print_delay(const SyncClocks *sc) 102 { 103 static float threshold_delay; 104 static int64_t last_realtime_clock; 105 static int nb_prints; 106 107 if (icount_align_option && 108 sc->realtime_clock - last_realtime_clock >= MAX_DELAY_PRINT_RATE && 109 nb_prints < MAX_NB_PRINTS) { 110 if ((-sc->diff_clk / (float)1000000000LL > threshold_delay) || 111 (-sc->diff_clk / (float)1000000000LL < 112 (threshold_delay - THRESHOLD_REDUCE))) { 113 threshold_delay = (-sc->diff_clk / 1000000000LL) + 1; 114 qemu_printf("Warning: The guest is now late by %.1f to %.1f seconds\n", 115 threshold_delay - 1, 116 threshold_delay); 117 nb_prints++; 118 last_realtime_clock = sc->realtime_clock; 119 } 120 } 121 } 122 123 static void init_delay_params(SyncClocks *sc, CPUState *cpu) 124 { 125 if (!icount_align_option) { 126 return; 127 } 128 sc->realtime_clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT); 129 sc->diff_clk = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) - sc->realtime_clock; 130 sc->last_cpu_icount 131 = cpu->icount_extra + cpu->neg.icount_decr.u16.low; 132 if (sc->diff_clk < max_delay) { 133 max_delay = sc->diff_clk; 134 } 135 if (sc->diff_clk > max_advance) { 136 max_advance = sc->diff_clk; 137 } 138 139 /* Print every 2s max if the guest is late. We limit the number 140 of printed messages to NB_PRINT_MAX(currently 100) */ 141 print_delay(sc); 142 } 143 #else 144 static void align_clocks(SyncClocks *sc, const CPUState *cpu) 145 { 146 } 147 148 static void init_delay_params(SyncClocks *sc, const CPUState *cpu) 149 { 150 } 151 #endif /* CONFIG USER ONLY */ 152 153 struct tb_desc { 154 TCGTBCPUState s; 155 CPUArchState *env; 156 tb_page_addr_t page_addr0; 157 }; 158 159 static bool tb_lookup_cmp(const void *p, const void *d) 160 { 161 const TranslationBlock *tb = p; 162 const struct tb_desc *desc = d; 163 164 if ((tb_cflags(tb) & CF_PCREL || tb->pc == desc->s.pc) && 165 tb_page_addr0(tb) == desc->page_addr0 && 166 tb->cs_base == desc->s.cs_base && 167 tb->flags == desc->s.flags && 168 tb_cflags(tb) == desc->s.cflags) { 169 /* check next page if needed */ 170 tb_page_addr_t tb_phys_page1 = tb_page_addr1(tb); 171 if (tb_phys_page1 == -1) { 172 return true; 173 } else { 174 tb_page_addr_t phys_page1; 175 vaddr virt_page1; 176 177 /* 178 * We know that the first page matched, and an otherwise valid TB 179 * encountered an incomplete instruction at the end of that page, 180 * therefore we know that generating a new TB from the current PC 181 * must also require reading from the next page -- even if the 182 * second pages do not match, and therefore the resulting insn 183 * is different for the new TB. Therefore any exception raised 184 * here by the faulting lookup is not premature. 185 */ 186 virt_page1 = TARGET_PAGE_ALIGN(desc->s.pc); 187 phys_page1 = get_page_addr_code(desc->env, virt_page1); 188 if (tb_phys_page1 == phys_page1) { 189 return true; 190 } 191 } 192 } 193 return false; 194 } 195 196 static TranslationBlock *tb_htable_lookup(CPUState *cpu, TCGTBCPUState s) 197 { 198 tb_page_addr_t phys_pc; 199 struct tb_desc desc; 200 uint32_t h; 201 202 desc.s = s; 203 desc.env = cpu_env(cpu); 204 phys_pc = get_page_addr_code(desc.env, s.pc); 205 if (phys_pc == -1) { 206 return NULL; 207 } 208 desc.page_addr0 = phys_pc; 209 h = tb_hash_func(phys_pc, (s.cflags & CF_PCREL ? 0 : s.pc), 210 s.flags, s.cs_base, s.cflags); 211 return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp); 212 } 213 214 /** 215 * tb_lookup: 216 * @cpu: CPU that will execute the returned translation block 217 * @pc: guest PC 218 * @cs_base: arch-specific value associated with translation block 219 * @flags: arch-specific translation block flags 220 * @cflags: CF_* flags 221 * 222 * Look up a translation block inside the QHT using @pc, @cs_base, @flags and 223 * @cflags. Uses @cpu's tb_jmp_cache. Might cause an exception, so have a 224 * longjmp destination ready. 225 * 226 * Returns: an existing translation block or NULL. 227 */ 228 static inline TranslationBlock *tb_lookup(CPUState *cpu, TCGTBCPUState s) 229 { 230 TranslationBlock *tb; 231 CPUJumpCache *jc; 232 uint32_t hash; 233 234 /* we should never be trying to look up an INVALID tb */ 235 tcg_debug_assert(!(s.cflags & CF_INVALID)); 236 237 hash = tb_jmp_cache_hash_func(s.pc); 238 jc = cpu->tb_jmp_cache; 239 240 tb = qatomic_read(&jc->array[hash].tb); 241 if (likely(tb && 242 jc->array[hash].pc == s.pc && 243 tb->cs_base == s.cs_base && 244 tb->flags == s.flags && 245 tb_cflags(tb) == s.cflags)) { 246 goto hit; 247 } 248 249 tb = tb_htable_lookup(cpu, s); 250 if (tb == NULL) { 251 return NULL; 252 } 253 254 jc->array[hash].pc = s.pc; 255 qatomic_set(&jc->array[hash].tb, tb); 256 257 hit: 258 /* 259 * As long as tb is not NULL, the contents are consistent. Therefore, 260 * the virtual PC has to match for non-CF_PCREL translations. 261 */ 262 assert((tb_cflags(tb) & CF_PCREL) || tb->pc == s.pc); 263 return tb; 264 } 265 266 static void log_cpu_exec(vaddr pc, CPUState *cpu, 267 const TranslationBlock *tb) 268 { 269 if (qemu_log_in_addr_range(pc)) { 270 qemu_log_mask(CPU_LOG_EXEC, 271 "Trace %d: %p [%08" PRIx64 272 "/%016" VADDR_PRIx "/%08x/%08x] %s\n", 273 cpu->cpu_index, tb->tc.ptr, tb->cs_base, pc, 274 tb->flags, tb->cflags, lookup_symbol(pc)); 275 276 if (qemu_loglevel_mask(CPU_LOG_TB_CPU)) { 277 FILE *logfile = qemu_log_trylock(); 278 if (logfile) { 279 int flags = CPU_DUMP_CCOP; 280 281 if (qemu_loglevel_mask(CPU_LOG_TB_FPU)) { 282 flags |= CPU_DUMP_FPU; 283 } 284 if (qemu_loglevel_mask(CPU_LOG_TB_VPU)) { 285 flags |= CPU_DUMP_VPU; 286 } 287 cpu_dump_state(cpu, logfile, flags); 288 qemu_log_unlock(logfile); 289 } 290 } 291 } 292 } 293 294 static bool check_for_breakpoints_slow(CPUState *cpu, vaddr pc, 295 uint32_t *cflags) 296 { 297 CPUBreakpoint *bp; 298 bool match_page = false; 299 300 /* 301 * Singlestep overrides breakpoints. 302 * This requirement is visible in the record-replay tests, where 303 * we would fail to make forward progress in reverse-continue. 304 * 305 * TODO: gdb singlestep should only override gdb breakpoints, 306 * so that one could (gdb) singlestep into the guest kernel's 307 * architectural breakpoint handler. 308 */ 309 if (cpu->singlestep_enabled) { 310 return false; 311 } 312 313 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) { 314 /* 315 * If we have an exact pc match, trigger the breakpoint. 316 * Otherwise, note matches within the page. 317 */ 318 if (pc == bp->pc) { 319 bool match_bp = false; 320 321 if (bp->flags & BP_GDB) { 322 match_bp = true; 323 } else if (bp->flags & BP_CPU) { 324 #ifdef CONFIG_USER_ONLY 325 g_assert_not_reached(); 326 #else 327 const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops; 328 assert(tcg_ops->debug_check_breakpoint); 329 match_bp = tcg_ops->debug_check_breakpoint(cpu); 330 #endif 331 } 332 333 if (match_bp) { 334 cpu->exception_index = EXCP_DEBUG; 335 return true; 336 } 337 } else if (((pc ^ bp->pc) & TARGET_PAGE_MASK) == 0) { 338 match_page = true; 339 } 340 } 341 342 /* 343 * Within the same page as a breakpoint, single-step, 344 * returning to helper_lookup_tb_ptr after each insn looking 345 * for the actual breakpoint. 346 * 347 * TODO: Perhaps better to record all of the TBs associated 348 * with a given virtual page that contains a breakpoint, and 349 * then invalidate them when a new overlapping breakpoint is 350 * set on the page. Non-overlapping TBs would not be 351 * invalidated, nor would any TB need to be invalidated as 352 * breakpoints are removed. 353 */ 354 if (match_page) { 355 *cflags = (*cflags & ~CF_COUNT_MASK) | CF_NO_GOTO_TB | CF_BP_PAGE | 1; 356 } 357 return false; 358 } 359 360 static inline bool check_for_breakpoints(CPUState *cpu, vaddr pc, 361 uint32_t *cflags) 362 { 363 return unlikely(!QTAILQ_EMPTY(&cpu->breakpoints)) && 364 check_for_breakpoints_slow(cpu, pc, cflags); 365 } 366 367 /** 368 * helper_lookup_tb_ptr: quick check for next tb 369 * @env: current cpu state 370 * 371 * Look for an existing TB matching the current cpu state. 372 * If found, return the code pointer. If not found, return 373 * the tcg epilogue so that we return into cpu_tb_exec. 374 */ 375 const void *HELPER(lookup_tb_ptr)(CPUArchState *env) 376 { 377 CPUState *cpu = env_cpu(env); 378 TranslationBlock *tb; 379 380 /* 381 * By definition we've just finished a TB, so I/O is OK. 382 * Avoid the possibility of calling cpu_io_recompile() if 383 * a page table walk triggered by tb_lookup() calling 384 * probe_access_internal() happens to touch an MMIO device. 385 * The next TB, if we chain to it, will clear the flag again. 386 */ 387 cpu->neg.can_do_io = true; 388 389 TCGTBCPUState s = cpu->cc->tcg_ops->get_tb_cpu_state(cpu); 390 s.cflags = curr_cflags(cpu); 391 392 if (check_for_breakpoints(cpu, s.pc, &s.cflags)) { 393 cpu_loop_exit(cpu); 394 } 395 396 tb = tb_lookup(cpu, s); 397 if (tb == NULL) { 398 return tcg_code_gen_epilogue; 399 } 400 401 if (qemu_loglevel_mask(CPU_LOG_TB_CPU | CPU_LOG_EXEC)) { 402 log_cpu_exec(s.pc, cpu, tb); 403 } 404 405 return tb->tc.ptr; 406 } 407 408 /* Return the current PC from CPU, which may be cached in TB. */ 409 static vaddr log_pc(CPUState *cpu, const TranslationBlock *tb) 410 { 411 if (tb_cflags(tb) & CF_PCREL) { 412 return cpu->cc->get_pc(cpu); 413 } else { 414 return tb->pc; 415 } 416 } 417 418 /* Execute a TB, and fix up the CPU state afterwards if necessary */ 419 /* 420 * Disable CFI checks. 421 * TCG creates binary blobs at runtime, with the transformed code. 422 * A TB is a blob of binary code, created at runtime and called with an 423 * indirect function call. Since such function did not exist at compile time, 424 * the CFI runtime has no way to verify its signature and would fail. 425 * TCG is not considered a security-sensitive part of QEMU so this does not 426 * affect the impact of CFI in environment with high security requirements 427 */ 428 static inline TranslationBlock * QEMU_DISABLE_CFI 429 cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit) 430 { 431 uintptr_t ret; 432 TranslationBlock *last_tb; 433 const void *tb_ptr = itb->tc.ptr; 434 435 if (qemu_loglevel_mask(CPU_LOG_TB_CPU | CPU_LOG_EXEC)) { 436 log_cpu_exec(log_pc(cpu, itb), cpu, itb); 437 } 438 439 qemu_thread_jit_execute(); 440 ret = tcg_qemu_tb_exec(cpu_env(cpu), tb_ptr); 441 cpu->neg.can_do_io = true; 442 qemu_plugin_disable_mem_helpers(cpu); 443 /* 444 * TODO: Delay swapping back to the read-write region of the TB 445 * until we actually need to modify the TB. The read-only copy, 446 * coming from the rx region, shares the same host TLB entry as 447 * the code that executed the exit_tb opcode that arrived here. 448 * If we insist on touching both the RX and the RW pages, we 449 * double the host TLB pressure. 450 */ 451 last_tb = tcg_splitwx_to_rw((void *)(ret & ~TB_EXIT_MASK)); 452 *tb_exit = ret & TB_EXIT_MASK; 453 454 trace_exec_tb_exit(last_tb, *tb_exit); 455 456 if (*tb_exit > TB_EXIT_IDX1) { 457 /* We didn't start executing this TB (eg because the instruction 458 * counter hit zero); we must restore the guest PC to the address 459 * of the start of the TB. 460 */ 461 CPUClass *cc = cpu->cc; 462 const TCGCPUOps *tcg_ops = cc->tcg_ops; 463 464 if (tcg_ops->synchronize_from_tb) { 465 tcg_ops->synchronize_from_tb(cpu, last_tb); 466 } else { 467 tcg_debug_assert(!(tb_cflags(last_tb) & CF_PCREL)); 468 assert(cc->set_pc); 469 cc->set_pc(cpu, last_tb->pc); 470 } 471 if (qemu_loglevel_mask(CPU_LOG_EXEC)) { 472 vaddr pc = log_pc(cpu, last_tb); 473 if (qemu_log_in_addr_range(pc)) { 474 qemu_log("Stopped execution of TB chain before %p [%016" 475 VADDR_PRIx "] %s\n", 476 last_tb->tc.ptr, pc, lookup_symbol(pc)); 477 } 478 } 479 } 480 481 /* 482 * If gdb single-step, and we haven't raised another exception, 483 * raise a debug exception. Single-step with another exception 484 * is handled in cpu_handle_exception. 485 */ 486 if (unlikely(cpu->singlestep_enabled) && cpu->exception_index == -1) { 487 cpu->exception_index = EXCP_DEBUG; 488 cpu_loop_exit(cpu); 489 } 490 491 return last_tb; 492 } 493 494 495 static void cpu_exec_enter(CPUState *cpu) 496 { 497 const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops; 498 499 if (tcg_ops->cpu_exec_enter) { 500 tcg_ops->cpu_exec_enter(cpu); 501 } 502 } 503 504 static void cpu_exec_exit(CPUState *cpu) 505 { 506 const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops; 507 508 if (tcg_ops->cpu_exec_exit) { 509 tcg_ops->cpu_exec_exit(cpu); 510 } 511 } 512 513 static void cpu_exec_longjmp_cleanup(CPUState *cpu) 514 { 515 /* Non-buggy compilers preserve this; assert the correct value. */ 516 g_assert(cpu == current_cpu); 517 518 #ifdef CONFIG_USER_ONLY 519 clear_helper_retaddr(); 520 if (have_mmap_lock()) { 521 mmap_unlock(); 522 } 523 #else 524 /* 525 * For softmmu, a tlb_fill fault during translation will land here, 526 * and we need to release any page locks held. In system mode we 527 * have one tcg_ctx per thread, so we know it was this cpu doing 528 * the translation. 529 * 530 * Alternative 1: Install a cleanup to be called via an exception 531 * handling safe longjmp. It seems plausible that all our hosts 532 * support such a thing. We'd have to properly register unwind info 533 * for the JIT for EH, rather that just for GDB. 534 * 535 * Alternative 2: Set and restore cpu->jmp_env in tb_gen_code to 536 * capture the cpu_loop_exit longjmp, perform the cleanup, and 537 * jump again to arrive here. 538 */ 539 if (tcg_ctx->gen_tb) { 540 tb_unlock_pages(tcg_ctx->gen_tb); 541 tcg_ctx->gen_tb = NULL; 542 } 543 #endif 544 if (bql_locked()) { 545 bql_unlock(); 546 } 547 assert_no_pages_locked(); 548 } 549 550 void cpu_exec_step_atomic(CPUState *cpu) 551 { 552 TranslationBlock *tb; 553 int tb_exit; 554 555 if (sigsetjmp(cpu->jmp_env, 0) == 0) { 556 start_exclusive(); 557 g_assert(cpu == current_cpu); 558 g_assert(!cpu->running); 559 cpu->running = true; 560 561 TCGTBCPUState s = cpu->cc->tcg_ops->get_tb_cpu_state(cpu); 562 s.cflags = curr_cflags(cpu); 563 564 /* Execute in a serial context. */ 565 s.cflags &= ~CF_PARALLEL; 566 /* After 1 insn, return and release the exclusive lock. */ 567 s.cflags |= CF_NO_GOTO_TB | CF_NO_GOTO_PTR | 1; 568 /* 569 * No need to check_for_breakpoints here. 570 * We only arrive in cpu_exec_step_atomic after beginning execution 571 * of an insn that includes an atomic operation we can't handle. 572 * Any breakpoint for this insn will have been recognized earlier. 573 */ 574 575 tb = tb_lookup(cpu, s); 576 if (tb == NULL) { 577 mmap_lock(); 578 tb = tb_gen_code(cpu, s); 579 mmap_unlock(); 580 } 581 582 cpu_exec_enter(cpu); 583 /* execute the generated code */ 584 trace_exec_tb(tb, s.pc); 585 cpu_tb_exec(cpu, tb, &tb_exit); 586 cpu_exec_exit(cpu); 587 } else { 588 cpu_exec_longjmp_cleanup(cpu); 589 } 590 591 /* 592 * As we start the exclusive region before codegen we must still 593 * be in the region if we longjump out of either the codegen or 594 * the execution. 595 */ 596 g_assert(cpu_in_exclusive_context(cpu)); 597 cpu->running = false; 598 end_exclusive(); 599 } 600 601 void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr) 602 { 603 /* 604 * Get the rx view of the structure, from which we find the 605 * executable code address, and tb_target_set_jmp_target can 606 * produce a pc-relative displacement to jmp_target_addr[n]. 607 */ 608 const TranslationBlock *c_tb = tcg_splitwx_to_rx(tb); 609 uintptr_t offset = tb->jmp_insn_offset[n]; 610 uintptr_t jmp_rx = (uintptr_t)tb->tc.ptr + offset; 611 uintptr_t jmp_rw = jmp_rx - tcg_splitwx_diff; 612 613 tb->jmp_target_addr[n] = addr; 614 tb_target_set_jmp_target(c_tb, n, jmp_rx, jmp_rw); 615 } 616 617 static inline void tb_add_jump(TranslationBlock *tb, int n, 618 TranslationBlock *tb_next) 619 { 620 uintptr_t old; 621 622 qemu_thread_jit_write(); 623 assert(n < ARRAY_SIZE(tb->jmp_list_next)); 624 qemu_spin_lock(&tb_next->jmp_lock); 625 626 /* make sure the destination TB is valid */ 627 if (tb_next->cflags & CF_INVALID) { 628 goto out_unlock_next; 629 } 630 /* Atomically claim the jump destination slot only if it was NULL */ 631 old = qatomic_cmpxchg(&tb->jmp_dest[n], (uintptr_t)NULL, 632 (uintptr_t)tb_next); 633 if (old) { 634 goto out_unlock_next; 635 } 636 637 /* patch the native jump address */ 638 tb_set_jmp_target(tb, n, (uintptr_t)tb_next->tc.ptr); 639 640 /* add in TB jmp list */ 641 tb->jmp_list_next[n] = tb_next->jmp_list_head; 642 tb_next->jmp_list_head = (uintptr_t)tb | n; 643 644 qemu_spin_unlock(&tb_next->jmp_lock); 645 646 qemu_log_mask(CPU_LOG_EXEC, "Linking TBs %p index %d -> %p\n", 647 tb->tc.ptr, n, tb_next->tc.ptr); 648 return; 649 650 out_unlock_next: 651 qemu_spin_unlock(&tb_next->jmp_lock); 652 } 653 654 static inline bool cpu_handle_halt(CPUState *cpu) 655 { 656 #ifndef CONFIG_USER_ONLY 657 if (cpu->halted) { 658 const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops; 659 bool leave_halt = tcg_ops->cpu_exec_halt(cpu); 660 661 if (!leave_halt) { 662 return true; 663 } 664 665 cpu->halted = 0; 666 } 667 #endif /* !CONFIG_USER_ONLY */ 668 669 return false; 670 } 671 672 static inline void cpu_handle_debug_exception(CPUState *cpu) 673 { 674 const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops; 675 CPUWatchpoint *wp; 676 677 if (!cpu->watchpoint_hit) { 678 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) { 679 wp->flags &= ~BP_WATCHPOINT_HIT; 680 } 681 } 682 683 if (tcg_ops->debug_excp_handler) { 684 tcg_ops->debug_excp_handler(cpu); 685 } 686 } 687 688 static inline bool cpu_handle_exception(CPUState *cpu, int *ret) 689 { 690 if (cpu->exception_index < 0) { 691 #ifndef CONFIG_USER_ONLY 692 if (replay_has_exception() 693 && cpu->neg.icount_decr.u16.low + cpu->icount_extra == 0) { 694 /* Execute just one insn to trigger exception pending in the log */ 695 cpu->cflags_next_tb = (curr_cflags(cpu) & ~CF_USE_ICOUNT) 696 | CF_NOIRQ | 1; 697 } 698 #endif 699 return false; 700 } 701 702 if (cpu->exception_index >= EXCP_INTERRUPT) { 703 /* exit request from the cpu execution loop */ 704 *ret = cpu->exception_index; 705 if (*ret == EXCP_DEBUG) { 706 cpu_handle_debug_exception(cpu); 707 } 708 cpu->exception_index = -1; 709 return true; 710 } 711 712 #if defined(CONFIG_USER_ONLY) 713 /* 714 * If user mode only, we simulate a fake exception which will be 715 * handled outside the cpu execution loop. 716 */ 717 const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops; 718 if (tcg_ops->fake_user_interrupt) { 719 tcg_ops->fake_user_interrupt(cpu); 720 } 721 *ret = cpu->exception_index; 722 cpu->exception_index = -1; 723 return true; 724 #else 725 if (replay_exception()) { 726 const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops; 727 728 bql_lock(); 729 tcg_ops->do_interrupt(cpu); 730 bql_unlock(); 731 cpu->exception_index = -1; 732 733 if (unlikely(cpu->singlestep_enabled)) { 734 /* 735 * After processing the exception, ensure an EXCP_DEBUG is 736 * raised when single-stepping so that GDB doesn't miss the 737 * next instruction. 738 */ 739 *ret = EXCP_DEBUG; 740 cpu_handle_debug_exception(cpu); 741 return true; 742 } 743 } else if (!replay_has_interrupt()) { 744 /* give a chance to iothread in replay mode */ 745 *ret = EXCP_INTERRUPT; 746 return true; 747 } 748 #endif 749 750 return false; 751 } 752 753 static inline bool icount_exit_request(CPUState *cpu) 754 { 755 if (!icount_enabled()) { 756 return false; 757 } 758 if (cpu->cflags_next_tb != -1 && !(cpu->cflags_next_tb & CF_USE_ICOUNT)) { 759 return false; 760 } 761 return cpu->neg.icount_decr.u16.low + cpu->icount_extra == 0; 762 } 763 764 static inline bool cpu_handle_interrupt(CPUState *cpu, 765 TranslationBlock **last_tb) 766 { 767 /* 768 * If we have requested custom cflags with CF_NOIRQ we should 769 * skip checking here. Any pending interrupts will get picked up 770 * by the next TB we execute under normal cflags. 771 */ 772 if (cpu->cflags_next_tb != -1 && cpu->cflags_next_tb & CF_NOIRQ) { 773 return false; 774 } 775 776 /* Clear the interrupt flag now since we're processing 777 * cpu->interrupt_request and cpu->exit_request. 778 * Ensure zeroing happens before reading cpu->exit_request or 779 * cpu->interrupt_request (see also smp_wmb in cpu_exit()) 780 */ 781 qatomic_set_mb(&cpu->neg.icount_decr.u16.high, 0); 782 783 if (unlikely(qatomic_read(&cpu->interrupt_request))) { 784 int interrupt_request; 785 bql_lock(); 786 interrupt_request = cpu->interrupt_request; 787 if (unlikely(cpu->singlestep_enabled & SSTEP_NOIRQ)) { 788 /* Mask out external interrupts for this step. */ 789 interrupt_request &= ~CPU_INTERRUPT_SSTEP_MASK; 790 } 791 if (interrupt_request & CPU_INTERRUPT_DEBUG) { 792 cpu->interrupt_request &= ~CPU_INTERRUPT_DEBUG; 793 cpu->exception_index = EXCP_DEBUG; 794 bql_unlock(); 795 return true; 796 } 797 #if !defined(CONFIG_USER_ONLY) 798 if (replay_mode == REPLAY_MODE_PLAY && !replay_has_interrupt()) { 799 /* Do nothing */ 800 } else if (interrupt_request & CPU_INTERRUPT_HALT) { 801 replay_interrupt(); 802 cpu->interrupt_request &= ~CPU_INTERRUPT_HALT; 803 cpu->halted = 1; 804 cpu->exception_index = EXCP_HLT; 805 bql_unlock(); 806 return true; 807 } else { 808 const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops; 809 810 if (interrupt_request & CPU_INTERRUPT_RESET) { 811 replay_interrupt(); 812 tcg_ops->cpu_exec_reset(cpu); 813 bql_unlock(); 814 return true; 815 } 816 817 /* 818 * The target hook has 3 exit conditions: 819 * False when the interrupt isn't processed, 820 * True when it is, and we should restart on a new TB, 821 * and via longjmp via cpu_loop_exit. 822 */ 823 if (tcg_ops->cpu_exec_interrupt(cpu, interrupt_request)) { 824 if (!tcg_ops->need_replay_interrupt || 825 tcg_ops->need_replay_interrupt(interrupt_request)) { 826 replay_interrupt(); 827 } 828 /* 829 * After processing the interrupt, ensure an EXCP_DEBUG is 830 * raised when single-stepping so that GDB doesn't miss the 831 * next instruction. 832 */ 833 if (unlikely(cpu->singlestep_enabled)) { 834 cpu->exception_index = EXCP_DEBUG; 835 bql_unlock(); 836 return true; 837 } 838 cpu->exception_index = -1; 839 *last_tb = NULL; 840 } 841 /* The target hook may have updated the 'cpu->interrupt_request'; 842 * reload the 'interrupt_request' value */ 843 interrupt_request = cpu->interrupt_request; 844 } 845 #endif /* !CONFIG_USER_ONLY */ 846 if (interrupt_request & CPU_INTERRUPT_EXITTB) { 847 cpu->interrupt_request &= ~CPU_INTERRUPT_EXITTB; 848 /* ensure that no TB jump will be modified as 849 the program flow was changed */ 850 *last_tb = NULL; 851 } 852 853 /* If we exit via cpu_loop_exit/longjmp it is reset in cpu_exec */ 854 bql_unlock(); 855 } 856 857 /* Finally, check if we need to exit to the main loop. */ 858 if (unlikely(qatomic_read(&cpu->exit_request)) || icount_exit_request(cpu)) { 859 qatomic_set(&cpu->exit_request, 0); 860 if (cpu->exception_index == -1) { 861 cpu->exception_index = EXCP_INTERRUPT; 862 } 863 return true; 864 } 865 866 return false; 867 } 868 869 static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb, 870 vaddr pc, TranslationBlock **last_tb, 871 int *tb_exit) 872 { 873 trace_exec_tb(tb, pc); 874 tb = cpu_tb_exec(cpu, tb, tb_exit); 875 if (*tb_exit != TB_EXIT_REQUESTED) { 876 *last_tb = tb; 877 return; 878 } 879 880 *last_tb = NULL; 881 if (cpu_loop_exit_requested(cpu)) { 882 /* Something asked us to stop executing chained TBs; just 883 * continue round the main loop. Whatever requested the exit 884 * will also have set something else (eg exit_request or 885 * interrupt_request) which will be handled by 886 * cpu_handle_interrupt. cpu_handle_interrupt will also 887 * clear cpu->icount_decr.u16.high. 888 */ 889 return; 890 } 891 892 /* Instruction counter expired. */ 893 assert(icount_enabled()); 894 #ifndef CONFIG_USER_ONLY 895 /* Ensure global icount has gone forward */ 896 icount_update(cpu); 897 /* Refill decrementer and continue execution. */ 898 int32_t insns_left = MIN(0xffff, cpu->icount_budget); 899 cpu->neg.icount_decr.u16.low = insns_left; 900 cpu->icount_extra = cpu->icount_budget - insns_left; 901 902 /* 903 * If the next tb has more instructions than we have left to 904 * execute we need to ensure we find/generate a TB with exactly 905 * insns_left instructions in it. 906 */ 907 if (insns_left > 0 && insns_left < tb->icount) { 908 assert(insns_left <= CF_COUNT_MASK); 909 assert(cpu->icount_extra == 0); 910 cpu->cflags_next_tb = (tb->cflags & ~CF_COUNT_MASK) | insns_left; 911 } 912 #endif 913 } 914 915 /* main execution loop */ 916 917 static int __attribute__((noinline)) 918 cpu_exec_loop(CPUState *cpu, SyncClocks *sc) 919 { 920 int ret; 921 922 /* if an exception is pending, we execute it here */ 923 while (!cpu_handle_exception(cpu, &ret)) { 924 TranslationBlock *last_tb = NULL; 925 int tb_exit = 0; 926 927 while (!cpu_handle_interrupt(cpu, &last_tb)) { 928 TranslationBlock *tb; 929 TCGTBCPUState s = cpu->cc->tcg_ops->get_tb_cpu_state(cpu); 930 s.cflags = cpu->cflags_next_tb; 931 932 /* 933 * When requested, use an exact setting for cflags for the next 934 * execution. This is used for icount, precise smc, and stop- 935 * after-access watchpoints. Since this request should never 936 * have CF_INVALID set, -1 is a convenient invalid value that 937 * does not require tcg headers for cpu_common_reset. 938 */ 939 if (s.cflags == -1) { 940 s.cflags = curr_cflags(cpu); 941 } else { 942 cpu->cflags_next_tb = -1; 943 } 944 945 if (check_for_breakpoints(cpu, s.pc, &s.cflags)) { 946 break; 947 } 948 949 tb = tb_lookup(cpu, s); 950 if (tb == NULL) { 951 CPUJumpCache *jc; 952 uint32_t h; 953 954 mmap_lock(); 955 tb = tb_gen_code(cpu, s); 956 mmap_unlock(); 957 958 /* 959 * We add the TB in the virtual pc hash table 960 * for the fast lookup 961 */ 962 h = tb_jmp_cache_hash_func(s.pc); 963 jc = cpu->tb_jmp_cache; 964 jc->array[h].pc = s.pc; 965 qatomic_set(&jc->array[h].tb, tb); 966 } 967 968 #ifndef CONFIG_USER_ONLY 969 /* 970 * We don't take care of direct jumps when address mapping 971 * changes in system emulation. So it's not safe to make a 972 * direct jump to a TB spanning two pages because the mapping 973 * for the second page can change. 974 */ 975 if (tb_page_addr1(tb) != -1) { 976 last_tb = NULL; 977 } 978 #endif 979 /* See if we can patch the calling TB. */ 980 if (last_tb) { 981 tb_add_jump(last_tb, tb_exit, tb); 982 } 983 984 cpu_loop_exec_tb(cpu, tb, s.pc, &last_tb, &tb_exit); 985 986 /* Try to align the host and virtual clocks 987 if the guest is in advance */ 988 align_clocks(sc, cpu); 989 } 990 } 991 return ret; 992 } 993 994 static int cpu_exec_setjmp(CPUState *cpu, SyncClocks *sc) 995 { 996 /* Prepare setjmp context for exception handling. */ 997 if (unlikely(sigsetjmp(cpu->jmp_env, 0) != 0)) { 998 cpu_exec_longjmp_cleanup(cpu); 999 } 1000 1001 return cpu_exec_loop(cpu, sc); 1002 } 1003 1004 int cpu_exec(CPUState *cpu) 1005 { 1006 int ret; 1007 SyncClocks sc = { 0 }; 1008 1009 /* replay_interrupt may need current_cpu */ 1010 current_cpu = cpu; 1011 1012 if (cpu_handle_halt(cpu)) { 1013 return EXCP_HALTED; 1014 } 1015 1016 RCU_READ_LOCK_GUARD(); 1017 cpu_exec_enter(cpu); 1018 1019 /* 1020 * Calculate difference between guest clock and host clock. 1021 * This delay includes the delay of the last cycle, so 1022 * what we have to do is sleep until it is 0. As for the 1023 * advance/delay we gain here, we try to fix it next time. 1024 */ 1025 init_delay_params(&sc, cpu); 1026 1027 ret = cpu_exec_setjmp(cpu, &sc); 1028 1029 cpu_exec_exit(cpu); 1030 return ret; 1031 } 1032 1033 bool tcg_exec_realizefn(CPUState *cpu, Error **errp) 1034 { 1035 static bool tcg_target_initialized; 1036 1037 if (!tcg_target_initialized) { 1038 /* Check mandatory TCGCPUOps handlers */ 1039 const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops; 1040 #ifndef CONFIG_USER_ONLY 1041 assert(tcg_ops->cpu_exec_halt); 1042 assert(tcg_ops->cpu_exec_interrupt); 1043 assert(tcg_ops->cpu_exec_reset); 1044 #endif /* !CONFIG_USER_ONLY */ 1045 assert(tcg_ops->translate_code); 1046 assert(tcg_ops->get_tb_cpu_state); 1047 assert(tcg_ops->mmu_index); 1048 tcg_ops->initialize(); 1049 tcg_target_initialized = true; 1050 } 1051 1052 cpu->tb_jmp_cache = g_new0(CPUJumpCache, 1); 1053 tlb_init(cpu); 1054 #ifndef CONFIG_USER_ONLY 1055 tcg_iommu_init_notifier_list(cpu); 1056 #endif /* !CONFIG_USER_ONLY */ 1057 /* qemu_plugin_vcpu_init_hook delayed until cpu_index assigned. */ 1058 1059 return true; 1060 } 1061 1062 /* undo the initializations in reverse order */ 1063 void tcg_exec_unrealizefn(CPUState *cpu) 1064 { 1065 #ifndef CONFIG_USER_ONLY 1066 tcg_iommu_free_notifier_list(cpu); 1067 #endif /* !CONFIG_USER_ONLY */ 1068 1069 tlb_destroy(cpu); 1070 g_free_rcu(cpu->tb_jmp_cache, rcu); 1071 } 1072