1 /* 2 * User emulator execution 3 * 4 * Copyright (c) 2003-2005 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 #include "qemu/osdep.h" 20 #include "accel/tcg/cpu-ops.h" 21 #include "disas/disas.h" 22 #include "exec/vaddr.h" 23 #include "exec/tlb-flags.h" 24 #include "tcg/tcg.h" 25 #include "qemu/bitops.h" 26 #include "qemu/rcu.h" 27 #include "accel/tcg/cpu-ldst-common.h" 28 #include "accel/tcg/helper-retaddr.h" 29 #include "accel/tcg/probe.h" 30 #include "user/cpu_loop.h" 31 #include "user/guest-host.h" 32 #include "qemu/main-loop.h" 33 #include "user/page-protection.h" 34 #include "exec/page-protection.h" 35 #include "exec/helper-proto-common.h" 36 #include "qemu/atomic128.h" 37 #include "qemu/bswap.h" 38 #include "qemu/int128.h" 39 #include "trace.h" 40 #include "tcg/tcg-ldst.h" 41 #include "backend-ldst.h" 42 #include "internal-common.h" 43 #include "tb-internal.h" 44 45 __thread uintptr_t helper_retaddr; 46 47 //#define DEBUG_SIGNAL 48 49 void cpu_interrupt(CPUState *cpu, int mask) 50 { 51 g_assert(bql_locked()); 52 cpu->interrupt_request |= mask; 53 qatomic_set(&cpu->neg.icount_decr.u16.high, -1); 54 } 55 56 /* 57 * Adjust the pc to pass to cpu_restore_state; return the memop type. 58 */ 59 MMUAccessType adjust_signal_pc(uintptr_t *pc, bool is_write) 60 { 61 switch (helper_retaddr) { 62 default: 63 /* 64 * Fault during host memory operation within a helper function. 65 * The helper's host return address, saved here, gives us a 66 * pointer into the generated code that will unwind to the 67 * correct guest pc. 68 */ 69 *pc = helper_retaddr; 70 break; 71 72 case 0: 73 /* 74 * Fault during host memory operation within generated code. 75 * (Or, a unrelated bug within qemu, but we can't tell from here). 76 * 77 * We take the host pc from the signal frame. However, we cannot 78 * use that value directly. Within cpu_restore_state_from_tb, we 79 * assume PC comes from GETPC(), as used by the helper functions, 80 * so we adjust the address by -GETPC_ADJ to form an address that 81 * is within the call insn, so that the address does not accidentally 82 * match the beginning of the next guest insn. However, when the 83 * pc comes from the signal frame it points to the actual faulting 84 * host memory insn and not the return from a call insn. 85 * 86 * Therefore, adjust to compensate for what will be done later 87 * by cpu_restore_state_from_tb. 88 */ 89 *pc += GETPC_ADJ; 90 break; 91 92 case 1: 93 /* 94 * Fault during host read for translation, or loosely, "execution". 95 * 96 * The guest pc is already pointing to the start of the TB for which 97 * code is being generated. If the guest translator manages the 98 * page crossings correctly, this is exactly the correct address 99 * (and if the translator doesn't handle page boundaries correctly 100 * there's little we can do about that here). Therefore, do not 101 * trigger the unwinder. 102 */ 103 *pc = 0; 104 return MMU_INST_FETCH; 105 } 106 107 return is_write ? MMU_DATA_STORE : MMU_DATA_LOAD; 108 } 109 110 /** 111 * handle_sigsegv_accerr_write: 112 * @cpu: the cpu context 113 * @old_set: the sigset_t from the signal ucontext_t 114 * @host_pc: the host pc, adjusted for the signal 115 * @guest_addr: the guest address of the fault 116 * 117 * Return true if the write fault has been handled, and should be re-tried. 118 * 119 * Note that it is important that we don't call page_unprotect() unless 120 * this is really a "write to nonwritable page" fault, because 121 * page_unprotect() assumes that if it is called for an access to 122 * a page that's writable this means we had two threads racing and 123 * another thread got there first and already made the page writable; 124 * so we will retry the access. If we were to call page_unprotect() 125 * for some other kind of fault that should really be passed to the 126 * guest, we'd end up in an infinite loop of retrying the faulting access. 127 */ 128 bool handle_sigsegv_accerr_write(CPUState *cpu, sigset_t *old_set, 129 uintptr_t host_pc, vaddr guest_addr) 130 { 131 switch (page_unprotect(cpu, guest_addr, host_pc)) { 132 case 0: 133 /* 134 * Fault not caused by a page marked unwritable to protect 135 * cached translations, must be the guest binary's problem. 136 */ 137 return false; 138 case 1: 139 /* 140 * Fault caused by protection of cached translation; TBs 141 * invalidated, so resume execution. 142 */ 143 return true; 144 case 2: 145 /* 146 * Fault caused by protection of cached translation, and the 147 * currently executing TB was modified and must be exited immediately. 148 */ 149 sigprocmask(SIG_SETMASK, old_set, NULL); 150 cpu_loop_exit_noexc(cpu); 151 /* NORETURN */ 152 default: 153 g_assert_not_reached(); 154 } 155 } 156 157 typedef struct PageFlagsNode { 158 struct rcu_head rcu; 159 IntervalTreeNode itree; 160 int flags; 161 } PageFlagsNode; 162 163 static IntervalTreeRoot pageflags_root; 164 165 static PageFlagsNode *pageflags_find(vaddr start, vaddr last) 166 { 167 IntervalTreeNode *n; 168 169 n = interval_tree_iter_first(&pageflags_root, start, last); 170 return n ? container_of(n, PageFlagsNode, itree) : NULL; 171 } 172 173 static PageFlagsNode *pageflags_next(PageFlagsNode *p, vaddr start, vaddr last) 174 { 175 IntervalTreeNode *n; 176 177 n = interval_tree_iter_next(&p->itree, start, last); 178 return n ? container_of(n, PageFlagsNode, itree) : NULL; 179 } 180 181 int walk_memory_regions(void *priv, walk_memory_regions_fn fn) 182 { 183 IntervalTreeNode *n; 184 int rc = 0; 185 186 mmap_lock(); 187 for (n = interval_tree_iter_first(&pageflags_root, 0, -1); 188 n != NULL; 189 n = interval_tree_iter_next(n, 0, -1)) { 190 PageFlagsNode *p = container_of(n, PageFlagsNode, itree); 191 192 rc = fn(priv, n->start, n->last + 1, p->flags); 193 if (rc != 0) { 194 break; 195 } 196 } 197 mmap_unlock(); 198 199 return rc; 200 } 201 202 static int dump_region(void *opaque, vaddr start, vaddr end, int prot) 203 { 204 FILE *f = opaque; 205 uint64_t mask; 206 int width; 207 208 if (guest_addr_max <= UINT32_MAX) { 209 mask = UINT32_MAX, width = 8; 210 } else { 211 mask = UINT64_MAX, width = 16; 212 } 213 214 fprintf(f, "%0*" PRIx64 "-%0*" PRIx64 " %0*" PRIx64 " %c%c%c\n", 215 width, start & mask, 216 width, end & mask, 217 width, (end - start) & mask, 218 ((prot & PAGE_READ) ? 'r' : '-'), 219 ((prot & PAGE_WRITE) ? 'w' : '-'), 220 ((prot & PAGE_EXEC) ? 'x' : '-')); 221 return 0; 222 } 223 224 /* dump memory mappings */ 225 void page_dump(FILE *f) 226 { 227 int width = guest_addr_max <= UINT32_MAX ? 8 : 16; 228 229 fprintf(f, "%-*s %-*s %-*s %s\n", 230 width, "start", width, "end", width, "size", "prot"); 231 walk_memory_regions(f, dump_region); 232 } 233 234 int page_get_flags(vaddr address) 235 { 236 PageFlagsNode *p = pageflags_find(address, address); 237 238 /* 239 * See util/interval-tree.c re lockless lookups: no false positives but 240 * there are false negatives. If we find nothing, retry with the mmap 241 * lock acquired. 242 */ 243 if (p) { 244 return p->flags; 245 } 246 if (have_mmap_lock()) { 247 return 0; 248 } 249 250 mmap_lock(); 251 p = pageflags_find(address, address); 252 mmap_unlock(); 253 return p ? p->flags : 0; 254 } 255 256 /* A subroutine of page_set_flags: insert a new node for [start,last]. */ 257 static void pageflags_create(vaddr start, vaddr last, int flags) 258 { 259 PageFlagsNode *p = g_new(PageFlagsNode, 1); 260 261 p->itree.start = start; 262 p->itree.last = last; 263 p->flags = flags; 264 interval_tree_insert(&p->itree, &pageflags_root); 265 } 266 267 /* A subroutine of page_set_flags: remove everything in [start,last]. */ 268 static bool pageflags_unset(vaddr start, vaddr last) 269 { 270 bool inval_tb = false; 271 272 while (true) { 273 PageFlagsNode *p = pageflags_find(start, last); 274 vaddr p_last; 275 276 if (!p) { 277 break; 278 } 279 280 if (p->flags & PAGE_EXEC) { 281 inval_tb = true; 282 } 283 284 interval_tree_remove(&p->itree, &pageflags_root); 285 p_last = p->itree.last; 286 287 if (p->itree.start < start) { 288 /* Truncate the node from the end, or split out the middle. */ 289 p->itree.last = start - 1; 290 interval_tree_insert(&p->itree, &pageflags_root); 291 if (last < p_last) { 292 pageflags_create(last + 1, p_last, p->flags); 293 break; 294 } 295 } else if (p_last <= last) { 296 /* Range completely covers node -- remove it. */ 297 g_free_rcu(p, rcu); 298 } else { 299 /* Truncate the node from the start. */ 300 p->itree.start = last + 1; 301 interval_tree_insert(&p->itree, &pageflags_root); 302 break; 303 } 304 } 305 306 return inval_tb; 307 } 308 309 /* 310 * A subroutine of page_set_flags: nothing overlaps [start,last], 311 * but check adjacent mappings and maybe merge into a single range. 312 */ 313 static void pageflags_create_merge(vaddr start, vaddr last, int flags) 314 { 315 PageFlagsNode *next = NULL, *prev = NULL; 316 317 if (start > 0) { 318 prev = pageflags_find(start - 1, start - 1); 319 if (prev) { 320 if (prev->flags == flags) { 321 interval_tree_remove(&prev->itree, &pageflags_root); 322 } else { 323 prev = NULL; 324 } 325 } 326 } 327 if (last + 1 != 0) { 328 next = pageflags_find(last + 1, last + 1); 329 if (next) { 330 if (next->flags == flags) { 331 interval_tree_remove(&next->itree, &pageflags_root); 332 } else { 333 next = NULL; 334 } 335 } 336 } 337 338 if (prev) { 339 if (next) { 340 prev->itree.last = next->itree.last; 341 g_free_rcu(next, rcu); 342 } else { 343 prev->itree.last = last; 344 } 345 interval_tree_insert(&prev->itree, &pageflags_root); 346 } else if (next) { 347 next->itree.start = start; 348 interval_tree_insert(&next->itree, &pageflags_root); 349 } else { 350 pageflags_create(start, last, flags); 351 } 352 } 353 354 /* 355 * Allow the target to decide if PAGE_TARGET_[12] may be reset. 356 * By default, they are not kept. 357 */ 358 #ifndef PAGE_TARGET_STICKY 359 #define PAGE_TARGET_STICKY 0 360 #endif 361 #define PAGE_STICKY (PAGE_ANON | PAGE_PASSTHROUGH | PAGE_TARGET_STICKY) 362 363 /* A subroutine of page_set_flags: add flags to [start,last]. */ 364 static bool pageflags_set_clear(vaddr start, vaddr last, 365 int set_flags, int clear_flags) 366 { 367 PageFlagsNode *p; 368 vaddr p_start, p_last; 369 int p_flags, merge_flags; 370 bool inval_tb = false; 371 372 restart: 373 p = pageflags_find(start, last); 374 if (!p) { 375 if (set_flags) { 376 pageflags_create_merge(start, last, set_flags); 377 } 378 goto done; 379 } 380 381 p_start = p->itree.start; 382 p_last = p->itree.last; 383 p_flags = p->flags; 384 /* Using mprotect on a page does not change sticky bits. */ 385 merge_flags = (p_flags & ~clear_flags) | set_flags; 386 387 /* 388 * Need to flush if an overlapping executable region 389 * removes exec, or adds write. 390 */ 391 if ((p_flags & PAGE_EXEC) 392 && (!(merge_flags & PAGE_EXEC) 393 || (merge_flags & ~p_flags & PAGE_WRITE))) { 394 inval_tb = true; 395 } 396 397 /* 398 * If there is an exact range match, update and return without 399 * attempting to merge with adjacent regions. 400 */ 401 if (start == p_start && last == p_last) { 402 if (merge_flags) { 403 p->flags = merge_flags; 404 } else { 405 interval_tree_remove(&p->itree, &pageflags_root); 406 g_free_rcu(p, rcu); 407 } 408 goto done; 409 } 410 411 /* 412 * If sticky bits affect the original mapping, then we must be more 413 * careful about the existing intervals and the separate flags. 414 */ 415 if (set_flags != merge_flags) { 416 if (p_start < start) { 417 interval_tree_remove(&p->itree, &pageflags_root); 418 p->itree.last = start - 1; 419 interval_tree_insert(&p->itree, &pageflags_root); 420 421 if (last < p_last) { 422 if (merge_flags) { 423 pageflags_create(start, last, merge_flags); 424 } 425 pageflags_create(last + 1, p_last, p_flags); 426 } else { 427 if (merge_flags) { 428 pageflags_create(start, p_last, merge_flags); 429 } 430 if (p_last < last) { 431 start = p_last + 1; 432 goto restart; 433 } 434 } 435 } else { 436 if (start < p_start && set_flags) { 437 pageflags_create(start, p_start - 1, set_flags); 438 } 439 if (last < p_last) { 440 interval_tree_remove(&p->itree, &pageflags_root); 441 p->itree.start = last + 1; 442 interval_tree_insert(&p->itree, &pageflags_root); 443 if (merge_flags) { 444 pageflags_create(start, last, merge_flags); 445 } 446 } else { 447 if (merge_flags) { 448 p->flags = merge_flags; 449 } else { 450 interval_tree_remove(&p->itree, &pageflags_root); 451 g_free_rcu(p, rcu); 452 } 453 if (p_last < last) { 454 start = p_last + 1; 455 goto restart; 456 } 457 } 458 } 459 goto done; 460 } 461 462 /* If flags are not changing for this range, incorporate it. */ 463 if (set_flags == p_flags) { 464 if (start < p_start) { 465 interval_tree_remove(&p->itree, &pageflags_root); 466 p->itree.start = start; 467 interval_tree_insert(&p->itree, &pageflags_root); 468 } 469 if (p_last < last) { 470 start = p_last + 1; 471 goto restart; 472 } 473 goto done; 474 } 475 476 /* Maybe split out head and/or tail ranges with the original flags. */ 477 interval_tree_remove(&p->itree, &pageflags_root); 478 if (p_start < start) { 479 p->itree.last = start - 1; 480 interval_tree_insert(&p->itree, &pageflags_root); 481 482 if (p_last < last) { 483 goto restart; 484 } 485 if (last < p_last) { 486 pageflags_create(last + 1, p_last, p_flags); 487 } 488 } else if (last < p_last) { 489 p->itree.start = last + 1; 490 interval_tree_insert(&p->itree, &pageflags_root); 491 } else { 492 g_free_rcu(p, rcu); 493 goto restart; 494 } 495 if (set_flags) { 496 pageflags_create(start, last, set_flags); 497 } 498 499 done: 500 return inval_tb; 501 } 502 503 void page_set_flags(vaddr start, vaddr last, int flags) 504 { 505 bool reset = false; 506 bool inval_tb = false; 507 508 /* This function should never be called with addresses outside the 509 guest address space. If this assert fires, it probably indicates 510 a missing call to h2g_valid. */ 511 assert(start <= last); 512 assert(last <= guest_addr_max); 513 /* Only set PAGE_ANON with new mappings. */ 514 assert(!(flags & PAGE_ANON) || (flags & PAGE_RESET)); 515 assert_memory_lock(); 516 517 start &= TARGET_PAGE_MASK; 518 last |= ~TARGET_PAGE_MASK; 519 520 if (!(flags & PAGE_VALID)) { 521 flags = 0; 522 } else { 523 reset = flags & PAGE_RESET; 524 flags &= ~PAGE_RESET; 525 if (flags & PAGE_WRITE) { 526 flags |= PAGE_WRITE_ORG; 527 } 528 } 529 530 if (!flags || reset) { 531 page_reset_target_data(start, last); 532 inval_tb |= pageflags_unset(start, last); 533 } 534 if (flags) { 535 inval_tb |= pageflags_set_clear(start, last, flags, 536 ~(reset ? 0 : PAGE_STICKY)); 537 } 538 if (inval_tb) { 539 tb_invalidate_phys_range(NULL, start, last); 540 } 541 } 542 543 bool page_check_range(vaddr start, vaddr len, int flags) 544 { 545 vaddr last; 546 int locked; /* tri-state: =0: unlocked, +1: global, -1: local */ 547 bool ret; 548 549 if (len == 0) { 550 return true; /* trivial length */ 551 } 552 553 last = start + len - 1; 554 if (last < start) { 555 return false; /* wrap around */ 556 } 557 558 locked = have_mmap_lock(); 559 while (true) { 560 PageFlagsNode *p = pageflags_find(start, last); 561 int missing; 562 563 if (!p) { 564 if (!locked) { 565 /* 566 * Lockless lookups have false negatives. 567 * Retry with the lock held. 568 */ 569 mmap_lock(); 570 locked = -1; 571 p = pageflags_find(start, last); 572 } 573 if (!p) { 574 ret = false; /* entire region invalid */ 575 break; 576 } 577 } 578 if (start < p->itree.start) { 579 ret = false; /* initial bytes invalid */ 580 break; 581 } 582 583 missing = flags & ~p->flags; 584 if (missing & ~PAGE_WRITE) { 585 ret = false; /* page doesn't match */ 586 break; 587 } 588 if (missing & PAGE_WRITE) { 589 if (!(p->flags & PAGE_WRITE_ORG)) { 590 ret = false; /* page not writable */ 591 break; 592 } 593 /* Asking about writable, but has been protected: undo. */ 594 if (!page_unprotect(NULL, start, 0)) { 595 ret = false; 596 break; 597 } 598 /* TODO: page_unprotect should take a range, not a single page. */ 599 if (last - start < TARGET_PAGE_SIZE) { 600 ret = true; /* ok */ 601 break; 602 } 603 start += TARGET_PAGE_SIZE; 604 continue; 605 } 606 607 if (last <= p->itree.last) { 608 ret = true; /* ok */ 609 break; 610 } 611 start = p->itree.last + 1; 612 } 613 614 /* Release the lock if acquired locally. */ 615 if (locked < 0) { 616 mmap_unlock(); 617 } 618 return ret; 619 } 620 621 bool page_check_range_empty(vaddr start, vaddr last) 622 { 623 assert(last >= start); 624 assert_memory_lock(); 625 return pageflags_find(start, last) == NULL; 626 } 627 628 vaddr page_find_range_empty(vaddr min, vaddr max, vaddr len, vaddr align) 629 { 630 vaddr len_m1, align_m1; 631 632 assert(min <= max); 633 assert(max <= guest_addr_max); 634 assert(len != 0); 635 assert(is_power_of_2(align)); 636 assert_memory_lock(); 637 638 len_m1 = len - 1; 639 align_m1 = align - 1; 640 641 /* Iteratively narrow the search region. */ 642 while (1) { 643 PageFlagsNode *p; 644 645 /* Align min and double-check there's enough space remaining. */ 646 min = (min + align_m1) & ~align_m1; 647 if (min > max) { 648 return -1; 649 } 650 if (len_m1 > max - min) { 651 return -1; 652 } 653 654 p = pageflags_find(min, min + len_m1); 655 if (p == NULL) { 656 /* Found! */ 657 return min; 658 } 659 if (max <= p->itree.last) { 660 /* Existing allocation fills the remainder of the search region. */ 661 return -1; 662 } 663 /* Skip across existing allocation. */ 664 min = p->itree.last + 1; 665 } 666 } 667 668 void tb_lock_page0(tb_page_addr_t address) 669 { 670 PageFlagsNode *p; 671 vaddr start, last; 672 int host_page_size = qemu_real_host_page_size(); 673 int prot; 674 675 assert_memory_lock(); 676 677 if (host_page_size <= TARGET_PAGE_SIZE) { 678 start = address & TARGET_PAGE_MASK; 679 last = start + TARGET_PAGE_SIZE - 1; 680 } else { 681 start = address & -host_page_size; 682 last = start + host_page_size - 1; 683 } 684 685 p = pageflags_find(start, last); 686 if (!p) { 687 return; 688 } 689 prot = p->flags; 690 691 if (unlikely(p->itree.last < last)) { 692 /* More than one protection region covers the one host page. */ 693 assert(TARGET_PAGE_SIZE < host_page_size); 694 while ((p = pageflags_next(p, start, last)) != NULL) { 695 prot |= p->flags; 696 } 697 } 698 699 if (prot & PAGE_WRITE) { 700 pageflags_set_clear(start, last, 0, PAGE_WRITE); 701 mprotect(g2h_untagged(start), last - start + 1, 702 prot & (PAGE_READ | PAGE_EXEC) ? PROT_READ : PROT_NONE); 703 } 704 } 705 706 /* 707 * Called from signal handler: invalidate the code and unprotect the 708 * page. Return 0 if the fault was not handled, 1 if it was handled, 709 * and 2 if it was handled but the caller must cause the TB to be 710 * immediately exited. (We can only return 2 if the 'pc' argument is 711 * non-zero.) 712 */ 713 int page_unprotect(CPUState *cpu, tb_page_addr_t address, uintptr_t pc) 714 { 715 PageFlagsNode *p; 716 bool current_tb_invalidated; 717 718 assert((cpu == NULL) == (pc == 0)); 719 720 /* 721 * Technically this isn't safe inside a signal handler. However we 722 * know this only ever happens in a synchronous SEGV handler, so in 723 * practice it seems to be ok. 724 */ 725 mmap_lock(); 726 727 p = pageflags_find(address, address); 728 729 /* If this address was not really writable, nothing to do. */ 730 if (!p || !(p->flags & PAGE_WRITE_ORG)) { 731 mmap_unlock(); 732 return 0; 733 } 734 735 current_tb_invalidated = false; 736 if (p->flags & PAGE_WRITE) { 737 /* 738 * If the page is actually marked WRITE then assume this is because 739 * this thread raced with another one which got here first and 740 * set the page to PAGE_WRITE and did the TB invalidate for us. 741 */ 742 if (pc && cpu->cc->tcg_ops->precise_smc) { 743 TranslationBlock *current_tb = tcg_tb_lookup(pc); 744 if (current_tb) { 745 current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID; 746 } 747 } 748 } else { 749 int host_page_size = qemu_real_host_page_size(); 750 vaddr start, len, i; 751 int prot; 752 753 if (host_page_size <= TARGET_PAGE_SIZE) { 754 start = address & TARGET_PAGE_MASK; 755 len = TARGET_PAGE_SIZE; 756 prot = p->flags | PAGE_WRITE; 757 pageflags_set_clear(start, start + len - 1, PAGE_WRITE, 0); 758 current_tb_invalidated = 759 tb_invalidate_phys_page_unwind(cpu, start, pc); 760 } else { 761 start = address & -host_page_size; 762 len = host_page_size; 763 prot = 0; 764 765 for (i = 0; i < len; i += TARGET_PAGE_SIZE) { 766 vaddr addr = start + i; 767 768 p = pageflags_find(addr, addr); 769 if (p) { 770 prot |= p->flags; 771 if (p->flags & PAGE_WRITE_ORG) { 772 prot |= PAGE_WRITE; 773 pageflags_set_clear(addr, addr + TARGET_PAGE_SIZE - 1, 774 PAGE_WRITE, 0); 775 } 776 } 777 /* 778 * Since the content will be modified, we must invalidate 779 * the corresponding translated code. 780 */ 781 current_tb_invalidated |= 782 tb_invalidate_phys_page_unwind(cpu, addr, pc); 783 } 784 } 785 if (prot & PAGE_EXEC) { 786 prot = (prot & ~PAGE_EXEC) | PAGE_READ; 787 } 788 mprotect((void *)g2h_untagged(start), len, prot & PAGE_RWX); 789 } 790 mmap_unlock(); 791 792 /* If current TB was invalidated return to main loop */ 793 return current_tb_invalidated ? 2 : 1; 794 } 795 796 static int probe_access_internal(CPUArchState *env, vaddr addr, 797 int fault_size, MMUAccessType access_type, 798 bool nonfault, uintptr_t ra) 799 { 800 int acc_flag; 801 bool maperr; 802 803 switch (access_type) { 804 case MMU_DATA_STORE: 805 acc_flag = PAGE_WRITE_ORG; 806 break; 807 case MMU_DATA_LOAD: 808 acc_flag = PAGE_READ; 809 break; 810 case MMU_INST_FETCH: 811 acc_flag = PAGE_EXEC; 812 break; 813 default: 814 g_assert_not_reached(); 815 } 816 817 if (guest_addr_valid_untagged(addr)) { 818 int page_flags = page_get_flags(addr); 819 if (page_flags & acc_flag) { 820 if (access_type != MMU_INST_FETCH 821 && cpu_plugin_mem_cbs_enabled(env_cpu(env))) { 822 return TLB_MMIO; 823 } 824 return 0; /* success */ 825 } 826 maperr = !(page_flags & PAGE_VALID); 827 } else { 828 maperr = true; 829 } 830 831 if (nonfault) { 832 return TLB_INVALID_MASK; 833 } 834 835 cpu_loop_exit_sigsegv(env_cpu(env), addr, access_type, maperr, ra); 836 } 837 838 int probe_access_flags(CPUArchState *env, vaddr addr, int size, 839 MMUAccessType access_type, int mmu_idx, 840 bool nonfault, void **phost, uintptr_t ra) 841 { 842 int flags; 843 844 g_assert(-(addr | TARGET_PAGE_MASK) >= size); 845 flags = probe_access_internal(env, addr, size, access_type, nonfault, ra); 846 *phost = (flags & TLB_INVALID_MASK) ? NULL : g2h(env_cpu(env), addr); 847 return flags; 848 } 849 850 void *probe_access(CPUArchState *env, vaddr addr, int size, 851 MMUAccessType access_type, int mmu_idx, uintptr_t ra) 852 { 853 int flags; 854 855 g_assert(-(addr | TARGET_PAGE_MASK) >= size); 856 flags = probe_access_internal(env, addr, size, access_type, false, ra); 857 g_assert((flags & ~TLB_MMIO) == 0); 858 859 return size ? g2h(env_cpu(env), addr) : NULL; 860 } 861 862 void *tlb_vaddr_to_host(CPUArchState *env, vaddr addr, 863 MMUAccessType access_type, int mmu_idx) 864 { 865 return g2h(env_cpu(env), addr); 866 } 867 868 tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, vaddr addr, 869 void **hostp) 870 { 871 int flags; 872 873 flags = probe_access_internal(env, addr, 1, MMU_INST_FETCH, false, 0); 874 g_assert(flags == 0); 875 876 if (hostp) { 877 *hostp = g2h_untagged(addr); 878 } 879 return addr; 880 } 881 882 /* 883 * Allocate chunks of target data together. For the only current user, 884 * if we allocate one hunk per page, we have overhead of 40/128 or 40%. 885 * Therefore, allocate memory for 64 pages at a time for overhead < 1%. 886 */ 887 #define TPD_PAGES 64 888 #define TBD_MASK (TARGET_PAGE_MASK * TPD_PAGES) 889 890 typedef struct TargetPageDataNode { 891 struct rcu_head rcu; 892 IntervalTreeNode itree; 893 char data[] __attribute__((aligned)); 894 } TargetPageDataNode; 895 896 static IntervalTreeRoot targetdata_root; 897 static size_t target_page_data_size; 898 899 void page_reset_target_data(vaddr start, vaddr last) 900 { 901 IntervalTreeNode *n, *next; 902 size_t size = target_page_data_size; 903 904 if (likely(size == 0)) { 905 return; 906 } 907 908 assert_memory_lock(); 909 910 start &= TARGET_PAGE_MASK; 911 last |= ~TARGET_PAGE_MASK; 912 913 for (n = interval_tree_iter_first(&targetdata_root, start, last), 914 next = n ? interval_tree_iter_next(n, start, last) : NULL; 915 n != NULL; 916 n = next, 917 next = next ? interval_tree_iter_next(n, start, last) : NULL) { 918 vaddr n_start, n_last, p_ofs, p_len; 919 TargetPageDataNode *t = container_of(n, TargetPageDataNode, itree); 920 921 if (n->start >= start && n->last <= last) { 922 interval_tree_remove(n, &targetdata_root); 923 g_free_rcu(t, rcu); 924 continue; 925 } 926 927 if (n->start < start) { 928 n_start = start; 929 p_ofs = (start - n->start) >> TARGET_PAGE_BITS; 930 } else { 931 n_start = n->start; 932 p_ofs = 0; 933 } 934 n_last = MIN(last, n->last); 935 p_len = (n_last + 1 - n_start) >> TARGET_PAGE_BITS; 936 937 memset(t->data + p_ofs * size, 0, p_len * size); 938 } 939 } 940 941 void *page_get_target_data(vaddr address, size_t size) 942 { 943 IntervalTreeNode *n; 944 TargetPageDataNode *t; 945 vaddr page, region, p_ofs; 946 947 /* Remember the size from the first call, and it should be constant. */ 948 if (unlikely(target_page_data_size != size)) { 949 assert(target_page_data_size == 0); 950 target_page_data_size = size; 951 } 952 953 page = address & TARGET_PAGE_MASK; 954 region = address & TBD_MASK; 955 956 n = interval_tree_iter_first(&targetdata_root, page, page); 957 if (!n) { 958 /* 959 * See util/interval-tree.c re lockless lookups: no false positives 960 * but there are false negatives. If we find nothing, retry with 961 * the mmap lock acquired. We also need the lock for the 962 * allocation + insert. 963 */ 964 mmap_lock(); 965 n = interval_tree_iter_first(&targetdata_root, page, page); 966 if (!n) { 967 t = g_malloc0(sizeof(TargetPageDataNode) + TPD_PAGES * size); 968 n = &t->itree; 969 n->start = region; 970 n->last = region | ~TBD_MASK; 971 interval_tree_insert(n, &targetdata_root); 972 } 973 mmap_unlock(); 974 } 975 976 t = container_of(n, TargetPageDataNode, itree); 977 p_ofs = (page - region) >> TARGET_PAGE_BITS; 978 return t->data + p_ofs * size; 979 } 980 981 /* The system-mode versions of these helpers are in cputlb.c. */ 982 983 static void *cpu_mmu_lookup(CPUState *cpu, vaddr addr, 984 MemOp mop, uintptr_t ra, MMUAccessType type) 985 { 986 int a_bits = memop_alignment_bits(mop); 987 void *ret; 988 989 /* Enforce guest required alignment. */ 990 if (unlikely(addr & ((1 << a_bits) - 1))) { 991 cpu_loop_exit_sigbus(cpu, addr, type, ra); 992 } 993 994 ret = g2h(cpu, addr); 995 set_helper_retaddr(ra); 996 return ret; 997 } 998 999 /* physical memory access (slow version, mainly for debug) */ 1000 int cpu_memory_rw_debug(CPUState *cpu, vaddr addr, 1001 void *ptr, size_t len, bool is_write) 1002 { 1003 int flags; 1004 vaddr l, page; 1005 uint8_t *buf = ptr; 1006 ssize_t written; 1007 int ret = -1; 1008 int fd = -1; 1009 1010 mmap_lock(); 1011 1012 while (len > 0) { 1013 page = addr & TARGET_PAGE_MASK; 1014 l = (page + TARGET_PAGE_SIZE) - addr; 1015 if (l > len) { 1016 l = len; 1017 } 1018 flags = page_get_flags(page); 1019 if (!(flags & PAGE_VALID)) { 1020 goto out_close; 1021 } 1022 if (is_write) { 1023 if (flags & PAGE_WRITE) { 1024 memcpy(g2h(cpu, addr), buf, l); 1025 } else { 1026 /* Bypass the host page protection using ptrace. */ 1027 if (fd == -1) { 1028 fd = open("/proc/self/mem", O_WRONLY); 1029 if (fd == -1) { 1030 goto out; 1031 } 1032 } 1033 /* 1034 * If there is a TranslationBlock and we weren't bypassing the 1035 * host page protection, the memcpy() above would SEGV, 1036 * ultimately leading to page_unprotect(). So invalidate the 1037 * translations manually. Both invalidation and pwrite() must 1038 * be under mmap_lock() in order to prevent the creation of 1039 * another TranslationBlock in between. 1040 */ 1041 tb_invalidate_phys_range(NULL, addr, addr + l - 1); 1042 written = pwrite(fd, buf, l, 1043 (off_t)(uintptr_t)g2h_untagged(addr)); 1044 if (written != l) { 1045 goto out_close; 1046 } 1047 } 1048 } else if (flags & PAGE_READ) { 1049 memcpy(buf, g2h(cpu, addr), l); 1050 } else { 1051 /* Bypass the host page protection using ptrace. */ 1052 if (fd == -1) { 1053 fd = open("/proc/self/mem", O_RDONLY); 1054 if (fd == -1) { 1055 goto out; 1056 } 1057 } 1058 if (pread(fd, buf, l, 1059 (off_t)(uintptr_t)g2h_untagged(addr)) != l) { 1060 goto out_close; 1061 } 1062 } 1063 len -= l; 1064 buf += l; 1065 addr += l; 1066 } 1067 ret = 0; 1068 out_close: 1069 if (fd != -1) { 1070 close(fd); 1071 } 1072 out: 1073 mmap_unlock(); 1074 1075 return ret; 1076 } 1077 1078 #include "ldst_atomicity.c.inc" 1079 1080 static uint8_t do_ld1_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi, 1081 uintptr_t ra, MMUAccessType access_type) 1082 { 1083 void *haddr; 1084 uint8_t ret; 1085 1086 cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD); 1087 haddr = cpu_mmu_lookup(cpu, addr, get_memop(oi), ra, access_type); 1088 ret = ldub_p(haddr); 1089 clear_helper_retaddr(); 1090 return ret; 1091 } 1092 1093 static uint16_t do_ld2_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi, 1094 uintptr_t ra, MMUAccessType access_type) 1095 { 1096 void *haddr; 1097 uint16_t ret; 1098 MemOp mop = get_memop(oi); 1099 1100 cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD); 1101 haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type); 1102 ret = load_atom_2(cpu, ra, haddr, mop); 1103 clear_helper_retaddr(); 1104 1105 if (mop & MO_BSWAP) { 1106 ret = bswap16(ret); 1107 } 1108 return ret; 1109 } 1110 1111 static uint32_t do_ld4_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi, 1112 uintptr_t ra, MMUAccessType access_type) 1113 { 1114 void *haddr; 1115 uint32_t ret; 1116 MemOp mop = get_memop(oi); 1117 1118 cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD); 1119 haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type); 1120 ret = load_atom_4(cpu, ra, haddr, mop); 1121 clear_helper_retaddr(); 1122 1123 if (mop & MO_BSWAP) { 1124 ret = bswap32(ret); 1125 } 1126 return ret; 1127 } 1128 1129 static uint64_t do_ld8_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi, 1130 uintptr_t ra, MMUAccessType access_type) 1131 { 1132 void *haddr; 1133 uint64_t ret; 1134 MemOp mop = get_memop(oi); 1135 1136 cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD); 1137 haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type); 1138 ret = load_atom_8(cpu, ra, haddr, mop); 1139 clear_helper_retaddr(); 1140 1141 if (mop & MO_BSWAP) { 1142 ret = bswap64(ret); 1143 } 1144 return ret; 1145 } 1146 1147 static Int128 do_ld16_mmu(CPUState *cpu, vaddr addr, 1148 MemOpIdx oi, uintptr_t ra) 1149 { 1150 void *haddr; 1151 Int128 ret; 1152 MemOp mop = get_memop(oi); 1153 1154 tcg_debug_assert((mop & MO_SIZE) == MO_128); 1155 cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD); 1156 haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_LOAD); 1157 ret = load_atom_16(cpu, ra, haddr, mop); 1158 clear_helper_retaddr(); 1159 1160 if (mop & MO_BSWAP) { 1161 ret = bswap128(ret); 1162 } 1163 return ret; 1164 } 1165 1166 static void do_st1_mmu(CPUState *cpu, vaddr addr, uint8_t val, 1167 MemOpIdx oi, uintptr_t ra) 1168 { 1169 void *haddr; 1170 1171 cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST); 1172 haddr = cpu_mmu_lookup(cpu, addr, get_memop(oi), ra, MMU_DATA_STORE); 1173 stb_p(haddr, val); 1174 clear_helper_retaddr(); 1175 } 1176 1177 static void do_st2_mmu(CPUState *cpu, vaddr addr, uint16_t val, 1178 MemOpIdx oi, uintptr_t ra) 1179 { 1180 void *haddr; 1181 MemOp mop = get_memop(oi); 1182 1183 cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST); 1184 haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE); 1185 1186 if (mop & MO_BSWAP) { 1187 val = bswap16(val); 1188 } 1189 store_atom_2(cpu, ra, haddr, mop, val); 1190 clear_helper_retaddr(); 1191 } 1192 1193 static void do_st4_mmu(CPUState *cpu, vaddr addr, uint32_t val, 1194 MemOpIdx oi, uintptr_t ra) 1195 { 1196 void *haddr; 1197 MemOp mop = get_memop(oi); 1198 1199 cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST); 1200 haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE); 1201 1202 if (mop & MO_BSWAP) { 1203 val = bswap32(val); 1204 } 1205 store_atom_4(cpu, ra, haddr, mop, val); 1206 clear_helper_retaddr(); 1207 } 1208 1209 static void do_st8_mmu(CPUState *cpu, vaddr addr, uint64_t val, 1210 MemOpIdx oi, uintptr_t ra) 1211 { 1212 void *haddr; 1213 MemOp mop = get_memop(oi); 1214 1215 cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST); 1216 haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE); 1217 1218 if (mop & MO_BSWAP) { 1219 val = bswap64(val); 1220 } 1221 store_atom_8(cpu, ra, haddr, mop, val); 1222 clear_helper_retaddr(); 1223 } 1224 1225 static void do_st16_mmu(CPUState *cpu, vaddr addr, Int128 val, 1226 MemOpIdx oi, uintptr_t ra) 1227 { 1228 void *haddr; 1229 MemOpIdx mop = get_memop(oi); 1230 1231 cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST); 1232 haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE); 1233 1234 if (mop & MO_BSWAP) { 1235 val = bswap128(val); 1236 } 1237 store_atom_16(cpu, ra, haddr, mop, val); 1238 clear_helper_retaddr(); 1239 } 1240 1241 uint8_t cpu_ldb_code_mmu(CPUArchState *env, vaddr addr, 1242 MemOpIdx oi, uintptr_t ra) 1243 { 1244 return do_ld1_mmu(env_cpu(env), addr, oi, ra ? ra : 1, MMU_INST_FETCH); 1245 } 1246 1247 uint16_t cpu_ldw_code_mmu(CPUArchState *env, vaddr addr, 1248 MemOpIdx oi, uintptr_t ra) 1249 { 1250 return do_ld2_mmu(env_cpu(env), addr, oi, ra ? ra : 1, MMU_INST_FETCH); 1251 } 1252 1253 uint32_t cpu_ldl_code_mmu(CPUArchState *env, vaddr addr, 1254 MemOpIdx oi, uintptr_t ra) 1255 { 1256 return do_ld4_mmu(env_cpu(env), addr, oi, ra ? ra : 1, MMU_INST_FETCH); 1257 } 1258 1259 uint64_t cpu_ldq_code_mmu(CPUArchState *env, vaddr addr, 1260 MemOpIdx oi, uintptr_t ra) 1261 { 1262 return do_ld8_mmu(env_cpu(env), addr, oi, ra ? ra : 1, MMU_INST_FETCH); 1263 } 1264 1265 #include "ldst_common.c.inc" 1266 1267 /* 1268 * Do not allow unaligned operations to proceed. Return the host address. 1269 */ 1270 static void *atomic_mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi, 1271 int size, uintptr_t retaddr) 1272 { 1273 MemOp mop = get_memop(oi); 1274 int a_bits = memop_alignment_bits(mop); 1275 void *ret; 1276 1277 /* Enforce guest required alignment. */ 1278 if (unlikely(addr & ((1 << a_bits) - 1))) { 1279 cpu_loop_exit_sigbus(cpu, addr, MMU_DATA_STORE, retaddr); 1280 } 1281 1282 /* Enforce qemu required alignment. */ 1283 if (unlikely(addr & (size - 1))) { 1284 cpu_loop_exit_atomic(cpu, retaddr); 1285 } 1286 1287 ret = g2h(cpu, addr); 1288 set_helper_retaddr(retaddr); 1289 return ret; 1290 } 1291 1292 #include "atomic_common.c.inc" 1293 1294 /* 1295 * First set of functions passes in OI and RETADDR. 1296 * This makes them callable from other helpers. 1297 */ 1298 1299 #define ATOMIC_NAME(X) \ 1300 glue(glue(glue(cpu_atomic_ ## X, SUFFIX), END), _mmu) 1301 #define ATOMIC_MMU_CLEANUP do { clear_helper_retaddr(); } while (0) 1302 1303 #define DATA_SIZE 1 1304 #include "atomic_template.h" 1305 1306 #define DATA_SIZE 2 1307 #include "atomic_template.h" 1308 1309 #define DATA_SIZE 4 1310 #include "atomic_template.h" 1311 1312 #ifdef CONFIG_ATOMIC64 1313 #define DATA_SIZE 8 1314 #include "atomic_template.h" 1315 #endif 1316 1317 #if defined(CONFIG_ATOMIC128) || HAVE_CMPXCHG128 1318 #define DATA_SIZE 16 1319 #include "atomic_template.h" 1320 #endif 1321