1 /* 2 * User emulator execution 3 * 4 * Copyright (c) 2003-2005 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 #include "qemu/osdep.h" 20 #include "accel/tcg/cpu-ops.h" 21 #include "disas/disas.h" 22 #include "cpu.h" 23 #include "exec/vaddr.h" 24 #include "exec/tlb-flags.h" 25 #include "tcg/tcg.h" 26 #include "qemu/bitops.h" 27 #include "qemu/rcu.h" 28 #include "accel/tcg/cpu-ldst.h" 29 #include "accel/tcg/helper-retaddr.h" 30 #include "accel/tcg/probe.h" 31 #include "user/cpu_loop.h" 32 #include "qemu/main-loop.h" 33 #include "user/page-protection.h" 34 #include "exec/page-protection.h" 35 #include "exec/helper-proto.h" 36 #include "qemu/atomic128.h" 37 #include "qemu/bswap.h" 38 #include "qemu/int128.h" 39 #include "trace.h" 40 #include "tcg/tcg-ldst.h" 41 #include "backend-ldst.h" 42 #include "internal-common.h" 43 #include "tb-internal.h" 44 45 __thread uintptr_t helper_retaddr; 46 47 //#define DEBUG_SIGNAL 48 49 void cpu_interrupt(CPUState *cpu, int mask) 50 { 51 g_assert(bql_locked()); 52 cpu->interrupt_request |= mask; 53 qatomic_set(&cpu->neg.icount_decr.u16.high, -1); 54 } 55 56 /* 57 * Adjust the pc to pass to cpu_restore_state; return the memop type. 58 */ 59 MMUAccessType adjust_signal_pc(uintptr_t *pc, bool is_write) 60 { 61 switch (helper_retaddr) { 62 default: 63 /* 64 * Fault during host memory operation within a helper function. 65 * The helper's host return address, saved here, gives us a 66 * pointer into the generated code that will unwind to the 67 * correct guest pc. 68 */ 69 *pc = helper_retaddr; 70 break; 71 72 case 0: 73 /* 74 * Fault during host memory operation within generated code. 75 * (Or, a unrelated bug within qemu, but we can't tell from here). 76 * 77 * We take the host pc from the signal frame. However, we cannot 78 * use that value directly. Within cpu_restore_state_from_tb, we 79 * assume PC comes from GETPC(), as used by the helper functions, 80 * so we adjust the address by -GETPC_ADJ to form an address that 81 * is within the call insn, so that the address does not accidentally 82 * match the beginning of the next guest insn. However, when the 83 * pc comes from the signal frame it points to the actual faulting 84 * host memory insn and not the return from a call insn. 85 * 86 * Therefore, adjust to compensate for what will be done later 87 * by cpu_restore_state_from_tb. 88 */ 89 *pc += GETPC_ADJ; 90 break; 91 92 case 1: 93 /* 94 * Fault during host read for translation, or loosely, "execution". 95 * 96 * The guest pc is already pointing to the start of the TB for which 97 * code is being generated. If the guest translator manages the 98 * page crossings correctly, this is exactly the correct address 99 * (and if the translator doesn't handle page boundaries correctly 100 * there's little we can do about that here). Therefore, do not 101 * trigger the unwinder. 102 */ 103 *pc = 0; 104 return MMU_INST_FETCH; 105 } 106 107 return is_write ? MMU_DATA_STORE : MMU_DATA_LOAD; 108 } 109 110 /** 111 * handle_sigsegv_accerr_write: 112 * @cpu: the cpu context 113 * @old_set: the sigset_t from the signal ucontext_t 114 * @host_pc: the host pc, adjusted for the signal 115 * @guest_addr: the guest address of the fault 116 * 117 * Return true if the write fault has been handled, and should be re-tried. 118 * 119 * Note that it is important that we don't call page_unprotect() unless 120 * this is really a "write to nonwritable page" fault, because 121 * page_unprotect() assumes that if it is called for an access to 122 * a page that's writable this means we had two threads racing and 123 * another thread got there first and already made the page writable; 124 * so we will retry the access. If we were to call page_unprotect() 125 * for some other kind of fault that should really be passed to the 126 * guest, we'd end up in an infinite loop of retrying the faulting access. 127 */ 128 bool handle_sigsegv_accerr_write(CPUState *cpu, sigset_t *old_set, 129 uintptr_t host_pc, abi_ptr guest_addr) 130 { 131 switch (page_unprotect(cpu, guest_addr, host_pc)) { 132 case 0: 133 /* 134 * Fault not caused by a page marked unwritable to protect 135 * cached translations, must be the guest binary's problem. 136 */ 137 return false; 138 case 1: 139 /* 140 * Fault caused by protection of cached translation; TBs 141 * invalidated, so resume execution. 142 */ 143 return true; 144 case 2: 145 /* 146 * Fault caused by protection of cached translation, and the 147 * currently executing TB was modified and must be exited immediately. 148 */ 149 sigprocmask(SIG_SETMASK, old_set, NULL); 150 cpu_loop_exit_noexc(cpu); 151 /* NORETURN */ 152 default: 153 g_assert_not_reached(); 154 } 155 } 156 157 typedef struct PageFlagsNode { 158 struct rcu_head rcu; 159 IntervalTreeNode itree; 160 int flags; 161 } PageFlagsNode; 162 163 static IntervalTreeRoot pageflags_root; 164 165 static PageFlagsNode *pageflags_find(vaddr start, vaddr last) 166 { 167 IntervalTreeNode *n; 168 169 n = interval_tree_iter_first(&pageflags_root, start, last); 170 return n ? container_of(n, PageFlagsNode, itree) : NULL; 171 } 172 173 static PageFlagsNode *pageflags_next(PageFlagsNode *p, vaddr start, vaddr last) 174 { 175 IntervalTreeNode *n; 176 177 n = interval_tree_iter_next(&p->itree, start, last); 178 return n ? container_of(n, PageFlagsNode, itree) : NULL; 179 } 180 181 int walk_memory_regions(void *priv, walk_memory_regions_fn fn) 182 { 183 IntervalTreeNode *n; 184 int rc = 0; 185 186 mmap_lock(); 187 for (n = interval_tree_iter_first(&pageflags_root, 0, -1); 188 n != NULL; 189 n = interval_tree_iter_next(n, 0, -1)) { 190 PageFlagsNode *p = container_of(n, PageFlagsNode, itree); 191 192 rc = fn(priv, n->start, n->last + 1, p->flags); 193 if (rc != 0) { 194 break; 195 } 196 } 197 mmap_unlock(); 198 199 return rc; 200 } 201 202 static int dump_region(void *opaque, vaddr start, vaddr end, int prot) 203 { 204 FILE *f = opaque; 205 206 fprintf(f, TARGET_ABI_FMT_ptr "-" TARGET_ABI_FMT_ptr 207 " " TARGET_ABI_FMT_ptr " %c%c%c\n", 208 (abi_ptr)start, (abi_ptr)end, (abi_ptr)(end - start), 209 ((prot & PAGE_READ) ? 'r' : '-'), 210 ((prot & PAGE_WRITE) ? 'w' : '-'), 211 ((prot & PAGE_EXEC) ? 'x' : '-')); 212 return 0; 213 } 214 215 /* dump memory mappings */ 216 void page_dump(FILE *f) 217 { 218 const int length = sizeof(abi_ptr) * 2; 219 220 fprintf(f, "%-*s %-*s %-*s %s\n", 221 length, "start", length, "end", length, "size", "prot"); 222 walk_memory_regions(f, dump_region); 223 } 224 225 int page_get_flags(vaddr address) 226 { 227 PageFlagsNode *p = pageflags_find(address, address); 228 229 /* 230 * See util/interval-tree.c re lockless lookups: no false positives but 231 * there are false negatives. If we find nothing, retry with the mmap 232 * lock acquired. 233 */ 234 if (p) { 235 return p->flags; 236 } 237 if (have_mmap_lock()) { 238 return 0; 239 } 240 241 mmap_lock(); 242 p = pageflags_find(address, address); 243 mmap_unlock(); 244 return p ? p->flags : 0; 245 } 246 247 /* A subroutine of page_set_flags: insert a new node for [start,last]. */ 248 static void pageflags_create(vaddr start, vaddr last, int flags) 249 { 250 PageFlagsNode *p = g_new(PageFlagsNode, 1); 251 252 p->itree.start = start; 253 p->itree.last = last; 254 p->flags = flags; 255 interval_tree_insert(&p->itree, &pageflags_root); 256 } 257 258 /* A subroutine of page_set_flags: remove everything in [start,last]. */ 259 static bool pageflags_unset(vaddr start, vaddr last) 260 { 261 bool inval_tb = false; 262 263 while (true) { 264 PageFlagsNode *p = pageflags_find(start, last); 265 vaddr p_last; 266 267 if (!p) { 268 break; 269 } 270 271 if (p->flags & PAGE_EXEC) { 272 inval_tb = true; 273 } 274 275 interval_tree_remove(&p->itree, &pageflags_root); 276 p_last = p->itree.last; 277 278 if (p->itree.start < start) { 279 /* Truncate the node from the end, or split out the middle. */ 280 p->itree.last = start - 1; 281 interval_tree_insert(&p->itree, &pageflags_root); 282 if (last < p_last) { 283 pageflags_create(last + 1, p_last, p->flags); 284 break; 285 } 286 } else if (p_last <= last) { 287 /* Range completely covers node -- remove it. */ 288 g_free_rcu(p, rcu); 289 } else { 290 /* Truncate the node from the start. */ 291 p->itree.start = last + 1; 292 interval_tree_insert(&p->itree, &pageflags_root); 293 break; 294 } 295 } 296 297 return inval_tb; 298 } 299 300 /* 301 * A subroutine of page_set_flags: nothing overlaps [start,last], 302 * but check adjacent mappings and maybe merge into a single range. 303 */ 304 static void pageflags_create_merge(vaddr start, vaddr last, int flags) 305 { 306 PageFlagsNode *next = NULL, *prev = NULL; 307 308 if (start > 0) { 309 prev = pageflags_find(start - 1, start - 1); 310 if (prev) { 311 if (prev->flags == flags) { 312 interval_tree_remove(&prev->itree, &pageflags_root); 313 } else { 314 prev = NULL; 315 } 316 } 317 } 318 if (last + 1 != 0) { 319 next = pageflags_find(last + 1, last + 1); 320 if (next) { 321 if (next->flags == flags) { 322 interval_tree_remove(&next->itree, &pageflags_root); 323 } else { 324 next = NULL; 325 } 326 } 327 } 328 329 if (prev) { 330 if (next) { 331 prev->itree.last = next->itree.last; 332 g_free_rcu(next, rcu); 333 } else { 334 prev->itree.last = last; 335 } 336 interval_tree_insert(&prev->itree, &pageflags_root); 337 } else if (next) { 338 next->itree.start = start; 339 interval_tree_insert(&next->itree, &pageflags_root); 340 } else { 341 pageflags_create(start, last, flags); 342 } 343 } 344 345 /* 346 * Allow the target to decide if PAGE_TARGET_[12] may be reset. 347 * By default, they are not kept. 348 */ 349 #ifndef PAGE_TARGET_STICKY 350 #define PAGE_TARGET_STICKY 0 351 #endif 352 #define PAGE_STICKY (PAGE_ANON | PAGE_PASSTHROUGH | PAGE_TARGET_STICKY) 353 354 /* A subroutine of page_set_flags: add flags to [start,last]. */ 355 static bool pageflags_set_clear(vaddr start, vaddr last, 356 int set_flags, int clear_flags) 357 { 358 PageFlagsNode *p; 359 vaddr p_start, p_last; 360 int p_flags, merge_flags; 361 bool inval_tb = false; 362 363 restart: 364 p = pageflags_find(start, last); 365 if (!p) { 366 if (set_flags) { 367 pageflags_create_merge(start, last, set_flags); 368 } 369 goto done; 370 } 371 372 p_start = p->itree.start; 373 p_last = p->itree.last; 374 p_flags = p->flags; 375 /* Using mprotect on a page does not change sticky bits. */ 376 merge_flags = (p_flags & ~clear_flags) | set_flags; 377 378 /* 379 * Need to flush if an overlapping executable region 380 * removes exec, or adds write. 381 */ 382 if ((p_flags & PAGE_EXEC) 383 && (!(merge_flags & PAGE_EXEC) 384 || (merge_flags & ~p_flags & PAGE_WRITE))) { 385 inval_tb = true; 386 } 387 388 /* 389 * If there is an exact range match, update and return without 390 * attempting to merge with adjacent regions. 391 */ 392 if (start == p_start && last == p_last) { 393 if (merge_flags) { 394 p->flags = merge_flags; 395 } else { 396 interval_tree_remove(&p->itree, &pageflags_root); 397 g_free_rcu(p, rcu); 398 } 399 goto done; 400 } 401 402 /* 403 * If sticky bits affect the original mapping, then we must be more 404 * careful about the existing intervals and the separate flags. 405 */ 406 if (set_flags != merge_flags) { 407 if (p_start < start) { 408 interval_tree_remove(&p->itree, &pageflags_root); 409 p->itree.last = start - 1; 410 interval_tree_insert(&p->itree, &pageflags_root); 411 412 if (last < p_last) { 413 if (merge_flags) { 414 pageflags_create(start, last, merge_flags); 415 } 416 pageflags_create(last + 1, p_last, p_flags); 417 } else { 418 if (merge_flags) { 419 pageflags_create(start, p_last, merge_flags); 420 } 421 if (p_last < last) { 422 start = p_last + 1; 423 goto restart; 424 } 425 } 426 } else { 427 if (start < p_start && set_flags) { 428 pageflags_create(start, p_start - 1, set_flags); 429 } 430 if (last < p_last) { 431 interval_tree_remove(&p->itree, &pageflags_root); 432 p->itree.start = last + 1; 433 interval_tree_insert(&p->itree, &pageflags_root); 434 if (merge_flags) { 435 pageflags_create(start, last, merge_flags); 436 } 437 } else { 438 if (merge_flags) { 439 p->flags = merge_flags; 440 } else { 441 interval_tree_remove(&p->itree, &pageflags_root); 442 g_free_rcu(p, rcu); 443 } 444 if (p_last < last) { 445 start = p_last + 1; 446 goto restart; 447 } 448 } 449 } 450 goto done; 451 } 452 453 /* If flags are not changing for this range, incorporate it. */ 454 if (set_flags == p_flags) { 455 if (start < p_start) { 456 interval_tree_remove(&p->itree, &pageflags_root); 457 p->itree.start = start; 458 interval_tree_insert(&p->itree, &pageflags_root); 459 } 460 if (p_last < last) { 461 start = p_last + 1; 462 goto restart; 463 } 464 goto done; 465 } 466 467 /* Maybe split out head and/or tail ranges with the original flags. */ 468 interval_tree_remove(&p->itree, &pageflags_root); 469 if (p_start < start) { 470 p->itree.last = start - 1; 471 interval_tree_insert(&p->itree, &pageflags_root); 472 473 if (p_last < last) { 474 goto restart; 475 } 476 if (last < p_last) { 477 pageflags_create(last + 1, p_last, p_flags); 478 } 479 } else if (last < p_last) { 480 p->itree.start = last + 1; 481 interval_tree_insert(&p->itree, &pageflags_root); 482 } else { 483 g_free_rcu(p, rcu); 484 goto restart; 485 } 486 if (set_flags) { 487 pageflags_create(start, last, set_flags); 488 } 489 490 done: 491 return inval_tb; 492 } 493 494 void page_set_flags(vaddr start, vaddr last, int flags) 495 { 496 bool reset = false; 497 bool inval_tb = false; 498 499 /* This function should never be called with addresses outside the 500 guest address space. If this assert fires, it probably indicates 501 a missing call to h2g_valid. */ 502 assert(start <= last); 503 assert(last <= GUEST_ADDR_MAX); 504 /* Only set PAGE_ANON with new mappings. */ 505 assert(!(flags & PAGE_ANON) || (flags & PAGE_RESET)); 506 assert_memory_lock(); 507 508 start &= TARGET_PAGE_MASK; 509 last |= ~TARGET_PAGE_MASK; 510 511 if (!(flags & PAGE_VALID)) { 512 flags = 0; 513 } else { 514 reset = flags & PAGE_RESET; 515 flags &= ~PAGE_RESET; 516 if (flags & PAGE_WRITE) { 517 flags |= PAGE_WRITE_ORG; 518 } 519 } 520 521 if (!flags || reset) { 522 page_reset_target_data(start, last); 523 inval_tb |= pageflags_unset(start, last); 524 } 525 if (flags) { 526 inval_tb |= pageflags_set_clear(start, last, flags, 527 ~(reset ? 0 : PAGE_STICKY)); 528 } 529 if (inval_tb) { 530 tb_invalidate_phys_range(NULL, start, last); 531 } 532 } 533 534 bool page_check_range(vaddr start, vaddr len, int flags) 535 { 536 vaddr last; 537 int locked; /* tri-state: =0: unlocked, +1: global, -1: local */ 538 bool ret; 539 540 if (len == 0) { 541 return true; /* trivial length */ 542 } 543 544 last = start + len - 1; 545 if (last < start) { 546 return false; /* wrap around */ 547 } 548 549 locked = have_mmap_lock(); 550 while (true) { 551 PageFlagsNode *p = pageflags_find(start, last); 552 int missing; 553 554 if (!p) { 555 if (!locked) { 556 /* 557 * Lockless lookups have false negatives. 558 * Retry with the lock held. 559 */ 560 mmap_lock(); 561 locked = -1; 562 p = pageflags_find(start, last); 563 } 564 if (!p) { 565 ret = false; /* entire region invalid */ 566 break; 567 } 568 } 569 if (start < p->itree.start) { 570 ret = false; /* initial bytes invalid */ 571 break; 572 } 573 574 missing = flags & ~p->flags; 575 if (missing & ~PAGE_WRITE) { 576 ret = false; /* page doesn't match */ 577 break; 578 } 579 if (missing & PAGE_WRITE) { 580 if (!(p->flags & PAGE_WRITE_ORG)) { 581 ret = false; /* page not writable */ 582 break; 583 } 584 /* Asking about writable, but has been protected: undo. */ 585 if (!page_unprotect(NULL, start, 0)) { 586 ret = false; 587 break; 588 } 589 /* TODO: page_unprotect should take a range, not a single page. */ 590 if (last - start < TARGET_PAGE_SIZE) { 591 ret = true; /* ok */ 592 break; 593 } 594 start += TARGET_PAGE_SIZE; 595 continue; 596 } 597 598 if (last <= p->itree.last) { 599 ret = true; /* ok */ 600 break; 601 } 602 start = p->itree.last + 1; 603 } 604 605 /* Release the lock if acquired locally. */ 606 if (locked < 0) { 607 mmap_unlock(); 608 } 609 return ret; 610 } 611 612 bool page_check_range_empty(vaddr start, vaddr last) 613 { 614 assert(last >= start); 615 assert_memory_lock(); 616 return pageflags_find(start, last) == NULL; 617 } 618 619 vaddr page_find_range_empty(vaddr min, vaddr max, vaddr len, vaddr align) 620 { 621 vaddr len_m1, align_m1; 622 623 assert(min <= max); 624 assert(max <= GUEST_ADDR_MAX); 625 assert(len != 0); 626 assert(is_power_of_2(align)); 627 assert_memory_lock(); 628 629 len_m1 = len - 1; 630 align_m1 = align - 1; 631 632 /* Iteratively narrow the search region. */ 633 while (1) { 634 PageFlagsNode *p; 635 636 /* Align min and double-check there's enough space remaining. */ 637 min = (min + align_m1) & ~align_m1; 638 if (min > max) { 639 return -1; 640 } 641 if (len_m1 > max - min) { 642 return -1; 643 } 644 645 p = pageflags_find(min, min + len_m1); 646 if (p == NULL) { 647 /* Found! */ 648 return min; 649 } 650 if (max <= p->itree.last) { 651 /* Existing allocation fills the remainder of the search region. */ 652 return -1; 653 } 654 /* Skip across existing allocation. */ 655 min = p->itree.last + 1; 656 } 657 } 658 659 void tb_lock_page0(tb_page_addr_t address) 660 { 661 PageFlagsNode *p; 662 vaddr start, last; 663 int host_page_size = qemu_real_host_page_size(); 664 int prot; 665 666 assert_memory_lock(); 667 668 if (host_page_size <= TARGET_PAGE_SIZE) { 669 start = address & TARGET_PAGE_MASK; 670 last = start + TARGET_PAGE_SIZE - 1; 671 } else { 672 start = address & -host_page_size; 673 last = start + host_page_size - 1; 674 } 675 676 p = pageflags_find(start, last); 677 if (!p) { 678 return; 679 } 680 prot = p->flags; 681 682 if (unlikely(p->itree.last < last)) { 683 /* More than one protection region covers the one host page. */ 684 assert(TARGET_PAGE_SIZE < host_page_size); 685 while ((p = pageflags_next(p, start, last)) != NULL) { 686 prot |= p->flags; 687 } 688 } 689 690 if (prot & PAGE_WRITE) { 691 pageflags_set_clear(start, last, 0, PAGE_WRITE); 692 mprotect(g2h_untagged(start), last - start + 1, 693 prot & (PAGE_READ | PAGE_EXEC) ? PROT_READ : PROT_NONE); 694 } 695 } 696 697 /* 698 * Called from signal handler: invalidate the code and unprotect the 699 * page. Return 0 if the fault was not handled, 1 if it was handled, 700 * and 2 if it was handled but the caller must cause the TB to be 701 * immediately exited. (We can only return 2 if the 'pc' argument is 702 * non-zero.) 703 */ 704 int page_unprotect(CPUState *cpu, tb_page_addr_t address, uintptr_t pc) 705 { 706 PageFlagsNode *p; 707 bool current_tb_invalidated; 708 709 assert((cpu == NULL) == (pc == 0)); 710 711 /* 712 * Technically this isn't safe inside a signal handler. However we 713 * know this only ever happens in a synchronous SEGV handler, so in 714 * practice it seems to be ok. 715 */ 716 mmap_lock(); 717 718 p = pageflags_find(address, address); 719 720 /* If this address was not really writable, nothing to do. */ 721 if (!p || !(p->flags & PAGE_WRITE_ORG)) { 722 mmap_unlock(); 723 return 0; 724 } 725 726 current_tb_invalidated = false; 727 if (p->flags & PAGE_WRITE) { 728 /* 729 * If the page is actually marked WRITE then assume this is because 730 * this thread raced with another one which got here first and 731 * set the page to PAGE_WRITE and did the TB invalidate for us. 732 */ 733 if (pc && cpu->cc->tcg_ops->precise_smc) { 734 TranslationBlock *current_tb = tcg_tb_lookup(pc); 735 if (current_tb) { 736 current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID; 737 } 738 } 739 } else { 740 int host_page_size = qemu_real_host_page_size(); 741 vaddr start, len, i; 742 int prot; 743 744 if (host_page_size <= TARGET_PAGE_SIZE) { 745 start = address & TARGET_PAGE_MASK; 746 len = TARGET_PAGE_SIZE; 747 prot = p->flags | PAGE_WRITE; 748 pageflags_set_clear(start, start + len - 1, PAGE_WRITE, 0); 749 current_tb_invalidated = 750 tb_invalidate_phys_page_unwind(cpu, start, pc); 751 } else { 752 start = address & -host_page_size; 753 len = host_page_size; 754 prot = 0; 755 756 for (i = 0; i < len; i += TARGET_PAGE_SIZE) { 757 vaddr addr = start + i; 758 759 p = pageflags_find(addr, addr); 760 if (p) { 761 prot |= p->flags; 762 if (p->flags & PAGE_WRITE_ORG) { 763 prot |= PAGE_WRITE; 764 pageflags_set_clear(addr, addr + TARGET_PAGE_SIZE - 1, 765 PAGE_WRITE, 0); 766 } 767 } 768 /* 769 * Since the content will be modified, we must invalidate 770 * the corresponding translated code. 771 */ 772 current_tb_invalidated |= 773 tb_invalidate_phys_page_unwind(cpu, addr, pc); 774 } 775 } 776 if (prot & PAGE_EXEC) { 777 prot = (prot & ~PAGE_EXEC) | PAGE_READ; 778 } 779 mprotect((void *)g2h_untagged(start), len, prot & PAGE_RWX); 780 } 781 mmap_unlock(); 782 783 /* If current TB was invalidated return to main loop */ 784 return current_tb_invalidated ? 2 : 1; 785 } 786 787 static int probe_access_internal(CPUArchState *env, vaddr addr, 788 int fault_size, MMUAccessType access_type, 789 bool nonfault, uintptr_t ra) 790 { 791 int acc_flag; 792 bool maperr; 793 794 switch (access_type) { 795 case MMU_DATA_STORE: 796 acc_flag = PAGE_WRITE_ORG; 797 break; 798 case MMU_DATA_LOAD: 799 acc_flag = PAGE_READ; 800 break; 801 case MMU_INST_FETCH: 802 acc_flag = PAGE_EXEC; 803 break; 804 default: 805 g_assert_not_reached(); 806 } 807 808 if (guest_addr_valid_untagged(addr)) { 809 int page_flags = page_get_flags(addr); 810 if (page_flags & acc_flag) { 811 if (access_type != MMU_INST_FETCH 812 && cpu_plugin_mem_cbs_enabled(env_cpu(env))) { 813 return TLB_MMIO; 814 } 815 return 0; /* success */ 816 } 817 maperr = !(page_flags & PAGE_VALID); 818 } else { 819 maperr = true; 820 } 821 822 if (nonfault) { 823 return TLB_INVALID_MASK; 824 } 825 826 cpu_loop_exit_sigsegv(env_cpu(env), addr, access_type, maperr, ra); 827 } 828 829 int probe_access_flags(CPUArchState *env, vaddr addr, int size, 830 MMUAccessType access_type, int mmu_idx, 831 bool nonfault, void **phost, uintptr_t ra) 832 { 833 int flags; 834 835 g_assert(-(addr | TARGET_PAGE_MASK) >= size); 836 flags = probe_access_internal(env, addr, size, access_type, nonfault, ra); 837 *phost = (flags & TLB_INVALID_MASK) ? NULL : g2h(env_cpu(env), addr); 838 return flags; 839 } 840 841 void *probe_access(CPUArchState *env, vaddr addr, int size, 842 MMUAccessType access_type, int mmu_idx, uintptr_t ra) 843 { 844 int flags; 845 846 g_assert(-(addr | TARGET_PAGE_MASK) >= size); 847 flags = probe_access_internal(env, addr, size, access_type, false, ra); 848 g_assert((flags & ~TLB_MMIO) == 0); 849 850 return size ? g2h(env_cpu(env), addr) : NULL; 851 } 852 853 tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, vaddr addr, 854 void **hostp) 855 { 856 int flags; 857 858 flags = probe_access_internal(env, addr, 1, MMU_INST_FETCH, false, 0); 859 g_assert(flags == 0); 860 861 if (hostp) { 862 *hostp = g2h_untagged(addr); 863 } 864 return addr; 865 } 866 867 #ifdef TARGET_PAGE_DATA_SIZE 868 /* 869 * Allocate chunks of target data together. For the only current user, 870 * if we allocate one hunk per page, we have overhead of 40/128 or 40%. 871 * Therefore, allocate memory for 64 pages at a time for overhead < 1%. 872 */ 873 #define TPD_PAGES 64 874 #define TBD_MASK (TARGET_PAGE_MASK * TPD_PAGES) 875 876 typedef struct TargetPageDataNode { 877 struct rcu_head rcu; 878 IntervalTreeNode itree; 879 char data[] __attribute__((aligned)); 880 } TargetPageDataNode; 881 882 static IntervalTreeRoot targetdata_root; 883 884 void page_reset_target_data(vaddr start, vaddr last) 885 { 886 IntervalTreeNode *n, *next; 887 888 assert_memory_lock(); 889 890 start &= TARGET_PAGE_MASK; 891 last |= ~TARGET_PAGE_MASK; 892 893 for (n = interval_tree_iter_first(&targetdata_root, start, last), 894 next = n ? interval_tree_iter_next(n, start, last) : NULL; 895 n != NULL; 896 n = next, 897 next = next ? interval_tree_iter_next(n, start, last) : NULL) { 898 vaddr n_start, n_last, p_ofs, p_len; 899 TargetPageDataNode *t = container_of(n, TargetPageDataNode, itree); 900 901 if (n->start >= start && n->last <= last) { 902 interval_tree_remove(n, &targetdata_root); 903 g_free_rcu(t, rcu); 904 continue; 905 } 906 907 if (n->start < start) { 908 n_start = start; 909 p_ofs = (start - n->start) >> TARGET_PAGE_BITS; 910 } else { 911 n_start = n->start; 912 p_ofs = 0; 913 } 914 n_last = MIN(last, n->last); 915 p_len = (n_last + 1 - n_start) >> TARGET_PAGE_BITS; 916 917 memset(t->data + p_ofs * TARGET_PAGE_DATA_SIZE, 0, 918 p_len * TARGET_PAGE_DATA_SIZE); 919 } 920 } 921 922 void *page_get_target_data(vaddr address) 923 { 924 IntervalTreeNode *n; 925 TargetPageDataNode *t; 926 vaddr page, region, p_ofs; 927 928 page = address & TARGET_PAGE_MASK; 929 region = address & TBD_MASK; 930 931 n = interval_tree_iter_first(&targetdata_root, page, page); 932 if (!n) { 933 /* 934 * See util/interval-tree.c re lockless lookups: no false positives 935 * but there are false negatives. If we find nothing, retry with 936 * the mmap lock acquired. We also need the lock for the 937 * allocation + insert. 938 */ 939 mmap_lock(); 940 n = interval_tree_iter_first(&targetdata_root, page, page); 941 if (!n) { 942 t = g_malloc0(sizeof(TargetPageDataNode) 943 + TPD_PAGES * TARGET_PAGE_DATA_SIZE); 944 n = &t->itree; 945 n->start = region; 946 n->last = region | ~TBD_MASK; 947 interval_tree_insert(n, &targetdata_root); 948 } 949 mmap_unlock(); 950 } 951 952 t = container_of(n, TargetPageDataNode, itree); 953 p_ofs = (page - region) >> TARGET_PAGE_BITS; 954 return t->data + p_ofs * TARGET_PAGE_DATA_SIZE; 955 } 956 #else 957 void page_reset_target_data(vaddr start, vaddr last) { } 958 #endif /* TARGET_PAGE_DATA_SIZE */ 959 960 /* The system-mode versions of these helpers are in cputlb.c. */ 961 962 static void *cpu_mmu_lookup(CPUState *cpu, vaddr addr, 963 MemOp mop, uintptr_t ra, MMUAccessType type) 964 { 965 int a_bits = memop_alignment_bits(mop); 966 void *ret; 967 968 /* Enforce guest required alignment. */ 969 if (unlikely(addr & ((1 << a_bits) - 1))) { 970 cpu_loop_exit_sigbus(cpu, addr, type, ra); 971 } 972 973 ret = g2h(cpu, addr); 974 set_helper_retaddr(ra); 975 return ret; 976 } 977 978 /* physical memory access (slow version, mainly for debug) */ 979 int cpu_memory_rw_debug(CPUState *cpu, vaddr addr, 980 void *ptr, size_t len, bool is_write) 981 { 982 int flags; 983 vaddr l, page; 984 uint8_t *buf = ptr; 985 ssize_t written; 986 int ret = -1; 987 int fd = -1; 988 989 mmap_lock(); 990 991 while (len > 0) { 992 page = addr & TARGET_PAGE_MASK; 993 l = (page + TARGET_PAGE_SIZE) - addr; 994 if (l > len) { 995 l = len; 996 } 997 flags = page_get_flags(page); 998 if (!(flags & PAGE_VALID)) { 999 goto out_close; 1000 } 1001 if (is_write) { 1002 if (flags & PAGE_WRITE) { 1003 memcpy(g2h(cpu, addr), buf, l); 1004 } else { 1005 /* Bypass the host page protection using ptrace. */ 1006 if (fd == -1) { 1007 fd = open("/proc/self/mem", O_WRONLY); 1008 if (fd == -1) { 1009 goto out; 1010 } 1011 } 1012 /* 1013 * If there is a TranslationBlock and we weren't bypassing the 1014 * host page protection, the memcpy() above would SEGV, 1015 * ultimately leading to page_unprotect(). So invalidate the 1016 * translations manually. Both invalidation and pwrite() must 1017 * be under mmap_lock() in order to prevent the creation of 1018 * another TranslationBlock in between. 1019 */ 1020 tb_invalidate_phys_range(NULL, addr, addr + l - 1); 1021 written = pwrite(fd, buf, l, 1022 (off_t)(uintptr_t)g2h_untagged(addr)); 1023 if (written != l) { 1024 goto out_close; 1025 } 1026 } 1027 } else if (flags & PAGE_READ) { 1028 memcpy(buf, g2h(cpu, addr), l); 1029 } else { 1030 /* Bypass the host page protection using ptrace. */ 1031 if (fd == -1) { 1032 fd = open("/proc/self/mem", O_RDONLY); 1033 if (fd == -1) { 1034 goto out; 1035 } 1036 } 1037 if (pread(fd, buf, l, 1038 (off_t)(uintptr_t)g2h_untagged(addr)) != l) { 1039 goto out_close; 1040 } 1041 } 1042 len -= l; 1043 buf += l; 1044 addr += l; 1045 } 1046 ret = 0; 1047 out_close: 1048 if (fd != -1) { 1049 close(fd); 1050 } 1051 out: 1052 mmap_unlock(); 1053 1054 return ret; 1055 } 1056 1057 #include "ldst_atomicity.c.inc" 1058 1059 static uint8_t do_ld1_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi, 1060 uintptr_t ra, MMUAccessType access_type) 1061 { 1062 void *haddr; 1063 uint8_t ret; 1064 1065 cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD); 1066 haddr = cpu_mmu_lookup(cpu, addr, get_memop(oi), ra, access_type); 1067 ret = ldub_p(haddr); 1068 clear_helper_retaddr(); 1069 return ret; 1070 } 1071 1072 static uint16_t do_ld2_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi, 1073 uintptr_t ra, MMUAccessType access_type) 1074 { 1075 void *haddr; 1076 uint16_t ret; 1077 MemOp mop = get_memop(oi); 1078 1079 cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD); 1080 haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type); 1081 ret = load_atom_2(cpu, ra, haddr, mop); 1082 clear_helper_retaddr(); 1083 1084 if (mop & MO_BSWAP) { 1085 ret = bswap16(ret); 1086 } 1087 return ret; 1088 } 1089 1090 static uint32_t do_ld4_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi, 1091 uintptr_t ra, MMUAccessType access_type) 1092 { 1093 void *haddr; 1094 uint32_t ret; 1095 MemOp mop = get_memop(oi); 1096 1097 cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD); 1098 haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type); 1099 ret = load_atom_4(cpu, ra, haddr, mop); 1100 clear_helper_retaddr(); 1101 1102 if (mop & MO_BSWAP) { 1103 ret = bswap32(ret); 1104 } 1105 return ret; 1106 } 1107 1108 static uint64_t do_ld8_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi, 1109 uintptr_t ra, MMUAccessType access_type) 1110 { 1111 void *haddr; 1112 uint64_t ret; 1113 MemOp mop = get_memop(oi); 1114 1115 cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD); 1116 haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type); 1117 ret = load_atom_8(cpu, ra, haddr, mop); 1118 clear_helper_retaddr(); 1119 1120 if (mop & MO_BSWAP) { 1121 ret = bswap64(ret); 1122 } 1123 return ret; 1124 } 1125 1126 static Int128 do_ld16_mmu(CPUState *cpu, abi_ptr addr, 1127 MemOpIdx oi, uintptr_t ra) 1128 { 1129 void *haddr; 1130 Int128 ret; 1131 MemOp mop = get_memop(oi); 1132 1133 tcg_debug_assert((mop & MO_SIZE) == MO_128); 1134 cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD); 1135 haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_LOAD); 1136 ret = load_atom_16(cpu, ra, haddr, mop); 1137 clear_helper_retaddr(); 1138 1139 if (mop & MO_BSWAP) { 1140 ret = bswap128(ret); 1141 } 1142 return ret; 1143 } 1144 1145 static void do_st1_mmu(CPUState *cpu, vaddr addr, uint8_t val, 1146 MemOpIdx oi, uintptr_t ra) 1147 { 1148 void *haddr; 1149 1150 cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST); 1151 haddr = cpu_mmu_lookup(cpu, addr, get_memop(oi), ra, MMU_DATA_STORE); 1152 stb_p(haddr, val); 1153 clear_helper_retaddr(); 1154 } 1155 1156 static void do_st2_mmu(CPUState *cpu, vaddr addr, uint16_t val, 1157 MemOpIdx oi, uintptr_t ra) 1158 { 1159 void *haddr; 1160 MemOp mop = get_memop(oi); 1161 1162 cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST); 1163 haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE); 1164 1165 if (mop & MO_BSWAP) { 1166 val = bswap16(val); 1167 } 1168 store_atom_2(cpu, ra, haddr, mop, val); 1169 clear_helper_retaddr(); 1170 } 1171 1172 static void do_st4_mmu(CPUState *cpu, vaddr addr, uint32_t val, 1173 MemOpIdx oi, uintptr_t ra) 1174 { 1175 void *haddr; 1176 MemOp mop = get_memop(oi); 1177 1178 cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST); 1179 haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE); 1180 1181 if (mop & MO_BSWAP) { 1182 val = bswap32(val); 1183 } 1184 store_atom_4(cpu, ra, haddr, mop, val); 1185 clear_helper_retaddr(); 1186 } 1187 1188 static void do_st8_mmu(CPUState *cpu, vaddr addr, uint64_t val, 1189 MemOpIdx oi, uintptr_t ra) 1190 { 1191 void *haddr; 1192 MemOp mop = get_memop(oi); 1193 1194 cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST); 1195 haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE); 1196 1197 if (mop & MO_BSWAP) { 1198 val = bswap64(val); 1199 } 1200 store_atom_8(cpu, ra, haddr, mop, val); 1201 clear_helper_retaddr(); 1202 } 1203 1204 static void do_st16_mmu(CPUState *cpu, vaddr addr, Int128 val, 1205 MemOpIdx oi, uintptr_t ra) 1206 { 1207 void *haddr; 1208 MemOpIdx mop = get_memop(oi); 1209 1210 cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST); 1211 haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE); 1212 1213 if (mop & MO_BSWAP) { 1214 val = bswap128(val); 1215 } 1216 store_atom_16(cpu, ra, haddr, mop, val); 1217 clear_helper_retaddr(); 1218 } 1219 1220 uint8_t cpu_ldb_code_mmu(CPUArchState *env, vaddr addr, 1221 MemOpIdx oi, uintptr_t ra) 1222 { 1223 return do_ld1_mmu(env_cpu(env), addr, oi, ra ? ra : 1, MMU_INST_FETCH); 1224 } 1225 1226 uint16_t cpu_ldw_code_mmu(CPUArchState *env, vaddr addr, 1227 MemOpIdx oi, uintptr_t ra) 1228 { 1229 return do_ld2_mmu(env_cpu(env), addr, oi, ra ? ra : 1, MMU_INST_FETCH); 1230 } 1231 1232 uint32_t cpu_ldl_code_mmu(CPUArchState *env, vaddr addr, 1233 MemOpIdx oi, uintptr_t ra) 1234 { 1235 return do_ld4_mmu(env_cpu(env), addr, oi, ra ? ra : 1, MMU_INST_FETCH); 1236 } 1237 1238 uint64_t cpu_ldq_code_mmu(CPUArchState *env, vaddr addr, 1239 MemOpIdx oi, uintptr_t ra) 1240 { 1241 return do_ld8_mmu(env_cpu(env), addr, oi, ra ? ra : 1, MMU_INST_FETCH); 1242 } 1243 1244 #include "ldst_common.c.inc" 1245 1246 /* 1247 * Do not allow unaligned operations to proceed. Return the host address. 1248 */ 1249 static void *atomic_mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi, 1250 int size, uintptr_t retaddr) 1251 { 1252 MemOp mop = get_memop(oi); 1253 int a_bits = memop_alignment_bits(mop); 1254 void *ret; 1255 1256 /* Enforce guest required alignment. */ 1257 if (unlikely(addr & ((1 << a_bits) - 1))) { 1258 cpu_loop_exit_sigbus(cpu, addr, MMU_DATA_STORE, retaddr); 1259 } 1260 1261 /* Enforce qemu required alignment. */ 1262 if (unlikely(addr & (size - 1))) { 1263 cpu_loop_exit_atomic(cpu, retaddr); 1264 } 1265 1266 ret = g2h(cpu, addr); 1267 set_helper_retaddr(retaddr); 1268 return ret; 1269 } 1270 1271 #include "atomic_common.c.inc" 1272 1273 /* 1274 * First set of functions passes in OI and RETADDR. 1275 * This makes them callable from other helpers. 1276 */ 1277 1278 #define ATOMIC_NAME(X) \ 1279 glue(glue(glue(cpu_atomic_ ## X, SUFFIX), END), _mmu) 1280 #define ATOMIC_MMU_CLEANUP do { clear_helper_retaddr(); } while (0) 1281 1282 #define DATA_SIZE 1 1283 #include "atomic_template.h" 1284 1285 #define DATA_SIZE 2 1286 #include "atomic_template.h" 1287 1288 #define DATA_SIZE 4 1289 #include "atomic_template.h" 1290 1291 #ifdef CONFIG_ATOMIC64 1292 #define DATA_SIZE 8 1293 #include "atomic_template.h" 1294 #endif 1295 1296 #if defined(CONFIG_ATOMIC128) || HAVE_CMPXCHG128 1297 #define DATA_SIZE 16 1298 #include "atomic_template.h" 1299 #endif 1300