1 /* 2 * User emulator execution 3 * 4 * Copyright (c) 2003-2005 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 #include "qemu/osdep.h" 20 #include "accel/tcg/cpu-ops.h" 21 #include "disas/disas.h" 22 #include "exec/vaddr.h" 23 #include "exec/exec-all.h" 24 #include "tcg/tcg.h" 25 #include "qemu/bitops.h" 26 #include "qemu/rcu.h" 27 #include "exec/cpu_ldst.h" 28 #include "user/cpu_loop.h" 29 #include "qemu/main-loop.h" 30 #include "user/page-protection.h" 31 #include "exec/page-protection.h" 32 #include "exec/helper-proto.h" 33 #include "qemu/atomic128.h" 34 #include "qemu/bswap.h" 35 #include "qemu/int128.h" 36 #include "trace.h" 37 #include "tcg/tcg-ldst.h" 38 #include "internal-common.h" 39 #include "internal-target.h" 40 #include "tb-internal.h" 41 42 __thread uintptr_t helper_retaddr; 43 44 //#define DEBUG_SIGNAL 45 46 void cpu_interrupt(CPUState *cpu, int mask) 47 { 48 g_assert(bql_locked()); 49 cpu->interrupt_request |= mask; 50 qatomic_set(&cpu->neg.icount_decr.u16.high, -1); 51 } 52 53 /* 54 * Adjust the pc to pass to cpu_restore_state; return the memop type. 55 */ 56 MMUAccessType adjust_signal_pc(uintptr_t *pc, bool is_write) 57 { 58 switch (helper_retaddr) { 59 default: 60 /* 61 * Fault during host memory operation within a helper function. 62 * The helper's host return address, saved here, gives us a 63 * pointer into the generated code that will unwind to the 64 * correct guest pc. 65 */ 66 *pc = helper_retaddr; 67 break; 68 69 case 0: 70 /* 71 * Fault during host memory operation within generated code. 72 * (Or, a unrelated bug within qemu, but we can't tell from here). 73 * 74 * We take the host pc from the signal frame. However, we cannot 75 * use that value directly. Within cpu_restore_state_from_tb, we 76 * assume PC comes from GETPC(), as used by the helper functions, 77 * so we adjust the address by -GETPC_ADJ to form an address that 78 * is within the call insn, so that the address does not accidentally 79 * match the beginning of the next guest insn. However, when the 80 * pc comes from the signal frame it points to the actual faulting 81 * host memory insn and not the return from a call insn. 82 * 83 * Therefore, adjust to compensate for what will be done later 84 * by cpu_restore_state_from_tb. 85 */ 86 *pc += GETPC_ADJ; 87 break; 88 89 case 1: 90 /* 91 * Fault during host read for translation, or loosely, "execution". 92 * 93 * The guest pc is already pointing to the start of the TB for which 94 * code is being generated. If the guest translator manages the 95 * page crossings correctly, this is exactly the correct address 96 * (and if the translator doesn't handle page boundaries correctly 97 * there's little we can do about that here). Therefore, do not 98 * trigger the unwinder. 99 */ 100 *pc = 0; 101 return MMU_INST_FETCH; 102 } 103 104 return is_write ? MMU_DATA_STORE : MMU_DATA_LOAD; 105 } 106 107 /** 108 * handle_sigsegv_accerr_write: 109 * @cpu: the cpu context 110 * @old_set: the sigset_t from the signal ucontext_t 111 * @host_pc: the host pc, adjusted for the signal 112 * @guest_addr: the guest address of the fault 113 * 114 * Return true if the write fault has been handled, and should be re-tried. 115 * 116 * Note that it is important that we don't call page_unprotect() unless 117 * this is really a "write to nonwritable page" fault, because 118 * page_unprotect() assumes that if it is called for an access to 119 * a page that's writable this means we had two threads racing and 120 * another thread got there first and already made the page writable; 121 * so we will retry the access. If we were to call page_unprotect() 122 * for some other kind of fault that should really be passed to the 123 * guest, we'd end up in an infinite loop of retrying the faulting access. 124 */ 125 bool handle_sigsegv_accerr_write(CPUState *cpu, sigset_t *old_set, 126 uintptr_t host_pc, abi_ptr guest_addr) 127 { 128 switch (page_unprotect(guest_addr, host_pc)) { 129 case 0: 130 /* 131 * Fault not caused by a page marked unwritable to protect 132 * cached translations, must be the guest binary's problem. 133 */ 134 return false; 135 case 1: 136 /* 137 * Fault caused by protection of cached translation; TBs 138 * invalidated, so resume execution. 139 */ 140 return true; 141 case 2: 142 /* 143 * Fault caused by protection of cached translation, and the 144 * currently executing TB was modified and must be exited immediately. 145 */ 146 sigprocmask(SIG_SETMASK, old_set, NULL); 147 cpu_loop_exit_noexc(cpu); 148 /* NORETURN */ 149 default: 150 g_assert_not_reached(); 151 } 152 } 153 154 typedef struct PageFlagsNode { 155 struct rcu_head rcu; 156 IntervalTreeNode itree; 157 int flags; 158 } PageFlagsNode; 159 160 static IntervalTreeRoot pageflags_root; 161 162 static PageFlagsNode *pageflags_find(target_ulong start, target_ulong last) 163 { 164 IntervalTreeNode *n; 165 166 n = interval_tree_iter_first(&pageflags_root, start, last); 167 return n ? container_of(n, PageFlagsNode, itree) : NULL; 168 } 169 170 static PageFlagsNode *pageflags_next(PageFlagsNode *p, target_ulong start, 171 target_ulong last) 172 { 173 IntervalTreeNode *n; 174 175 n = interval_tree_iter_next(&p->itree, start, last); 176 return n ? container_of(n, PageFlagsNode, itree) : NULL; 177 } 178 179 int walk_memory_regions(void *priv, walk_memory_regions_fn fn) 180 { 181 IntervalTreeNode *n; 182 int rc = 0; 183 184 mmap_lock(); 185 for (n = interval_tree_iter_first(&pageflags_root, 0, -1); 186 n != NULL; 187 n = interval_tree_iter_next(n, 0, -1)) { 188 PageFlagsNode *p = container_of(n, PageFlagsNode, itree); 189 190 rc = fn(priv, n->start, n->last + 1, p->flags); 191 if (rc != 0) { 192 break; 193 } 194 } 195 mmap_unlock(); 196 197 return rc; 198 } 199 200 static int dump_region(void *priv, target_ulong start, 201 target_ulong end, unsigned long prot) 202 { 203 FILE *f = (FILE *)priv; 204 205 fprintf(f, TARGET_FMT_lx"-"TARGET_FMT_lx" "TARGET_FMT_lx" %c%c%c\n", 206 start, end, end - start, 207 ((prot & PAGE_READ) ? 'r' : '-'), 208 ((prot & PAGE_WRITE) ? 'w' : '-'), 209 ((prot & PAGE_EXEC) ? 'x' : '-')); 210 return 0; 211 } 212 213 /* dump memory mappings */ 214 void page_dump(FILE *f) 215 { 216 const int length = sizeof(target_ulong) * 2; 217 218 fprintf(f, "%-*s %-*s %-*s %s\n", 219 length, "start", length, "end", length, "size", "prot"); 220 walk_memory_regions(f, dump_region); 221 } 222 223 int page_get_flags(target_ulong address) 224 { 225 PageFlagsNode *p = pageflags_find(address, address); 226 227 /* 228 * See util/interval-tree.c re lockless lookups: no false positives but 229 * there are false negatives. If we find nothing, retry with the mmap 230 * lock acquired. 231 */ 232 if (p) { 233 return p->flags; 234 } 235 if (have_mmap_lock()) { 236 return 0; 237 } 238 239 mmap_lock(); 240 p = pageflags_find(address, address); 241 mmap_unlock(); 242 return p ? p->flags : 0; 243 } 244 245 /* A subroutine of page_set_flags: insert a new node for [start,last]. */ 246 static void pageflags_create(target_ulong start, target_ulong last, int flags) 247 { 248 PageFlagsNode *p = g_new(PageFlagsNode, 1); 249 250 p->itree.start = start; 251 p->itree.last = last; 252 p->flags = flags; 253 interval_tree_insert(&p->itree, &pageflags_root); 254 } 255 256 /* A subroutine of page_set_flags: remove everything in [start,last]. */ 257 static bool pageflags_unset(target_ulong start, target_ulong last) 258 { 259 bool inval_tb = false; 260 261 while (true) { 262 PageFlagsNode *p = pageflags_find(start, last); 263 target_ulong p_last; 264 265 if (!p) { 266 break; 267 } 268 269 if (p->flags & PAGE_EXEC) { 270 inval_tb = true; 271 } 272 273 interval_tree_remove(&p->itree, &pageflags_root); 274 p_last = p->itree.last; 275 276 if (p->itree.start < start) { 277 /* Truncate the node from the end, or split out the middle. */ 278 p->itree.last = start - 1; 279 interval_tree_insert(&p->itree, &pageflags_root); 280 if (last < p_last) { 281 pageflags_create(last + 1, p_last, p->flags); 282 break; 283 } 284 } else if (p_last <= last) { 285 /* Range completely covers node -- remove it. */ 286 g_free_rcu(p, rcu); 287 } else { 288 /* Truncate the node from the start. */ 289 p->itree.start = last + 1; 290 interval_tree_insert(&p->itree, &pageflags_root); 291 break; 292 } 293 } 294 295 return inval_tb; 296 } 297 298 /* 299 * A subroutine of page_set_flags: nothing overlaps [start,last], 300 * but check adjacent mappings and maybe merge into a single range. 301 */ 302 static void pageflags_create_merge(target_ulong start, target_ulong last, 303 int flags) 304 { 305 PageFlagsNode *next = NULL, *prev = NULL; 306 307 if (start > 0) { 308 prev = pageflags_find(start - 1, start - 1); 309 if (prev) { 310 if (prev->flags == flags) { 311 interval_tree_remove(&prev->itree, &pageflags_root); 312 } else { 313 prev = NULL; 314 } 315 } 316 } 317 if (last + 1 != 0) { 318 next = pageflags_find(last + 1, last + 1); 319 if (next) { 320 if (next->flags == flags) { 321 interval_tree_remove(&next->itree, &pageflags_root); 322 } else { 323 next = NULL; 324 } 325 } 326 } 327 328 if (prev) { 329 if (next) { 330 prev->itree.last = next->itree.last; 331 g_free_rcu(next, rcu); 332 } else { 333 prev->itree.last = last; 334 } 335 interval_tree_insert(&prev->itree, &pageflags_root); 336 } else if (next) { 337 next->itree.start = start; 338 interval_tree_insert(&next->itree, &pageflags_root); 339 } else { 340 pageflags_create(start, last, flags); 341 } 342 } 343 344 /* 345 * Allow the target to decide if PAGE_TARGET_[12] may be reset. 346 * By default, they are not kept. 347 */ 348 #ifndef PAGE_TARGET_STICKY 349 #define PAGE_TARGET_STICKY 0 350 #endif 351 #define PAGE_STICKY (PAGE_ANON | PAGE_PASSTHROUGH | PAGE_TARGET_STICKY) 352 353 /* A subroutine of page_set_flags: add flags to [start,last]. */ 354 static bool pageflags_set_clear(target_ulong start, target_ulong last, 355 int set_flags, int clear_flags) 356 { 357 PageFlagsNode *p; 358 target_ulong p_start, p_last; 359 int p_flags, merge_flags; 360 bool inval_tb = false; 361 362 restart: 363 p = pageflags_find(start, last); 364 if (!p) { 365 if (set_flags) { 366 pageflags_create_merge(start, last, set_flags); 367 } 368 goto done; 369 } 370 371 p_start = p->itree.start; 372 p_last = p->itree.last; 373 p_flags = p->flags; 374 /* Using mprotect on a page does not change sticky bits. */ 375 merge_flags = (p_flags & ~clear_flags) | set_flags; 376 377 /* 378 * Need to flush if an overlapping executable region 379 * removes exec, or adds write. 380 */ 381 if ((p_flags & PAGE_EXEC) 382 && (!(merge_flags & PAGE_EXEC) 383 || (merge_flags & ~p_flags & PAGE_WRITE))) { 384 inval_tb = true; 385 } 386 387 /* 388 * If there is an exact range match, update and return without 389 * attempting to merge with adjacent regions. 390 */ 391 if (start == p_start && last == p_last) { 392 if (merge_flags) { 393 p->flags = merge_flags; 394 } else { 395 interval_tree_remove(&p->itree, &pageflags_root); 396 g_free_rcu(p, rcu); 397 } 398 goto done; 399 } 400 401 /* 402 * If sticky bits affect the original mapping, then we must be more 403 * careful about the existing intervals and the separate flags. 404 */ 405 if (set_flags != merge_flags) { 406 if (p_start < start) { 407 interval_tree_remove(&p->itree, &pageflags_root); 408 p->itree.last = start - 1; 409 interval_tree_insert(&p->itree, &pageflags_root); 410 411 if (last < p_last) { 412 if (merge_flags) { 413 pageflags_create(start, last, merge_flags); 414 } 415 pageflags_create(last + 1, p_last, p_flags); 416 } else { 417 if (merge_flags) { 418 pageflags_create(start, p_last, merge_flags); 419 } 420 if (p_last < last) { 421 start = p_last + 1; 422 goto restart; 423 } 424 } 425 } else { 426 if (start < p_start && set_flags) { 427 pageflags_create(start, p_start - 1, set_flags); 428 } 429 if (last < p_last) { 430 interval_tree_remove(&p->itree, &pageflags_root); 431 p->itree.start = last + 1; 432 interval_tree_insert(&p->itree, &pageflags_root); 433 if (merge_flags) { 434 pageflags_create(start, last, merge_flags); 435 } 436 } else { 437 if (merge_flags) { 438 p->flags = merge_flags; 439 } else { 440 interval_tree_remove(&p->itree, &pageflags_root); 441 g_free_rcu(p, rcu); 442 } 443 if (p_last < last) { 444 start = p_last + 1; 445 goto restart; 446 } 447 } 448 } 449 goto done; 450 } 451 452 /* If flags are not changing for this range, incorporate it. */ 453 if (set_flags == p_flags) { 454 if (start < p_start) { 455 interval_tree_remove(&p->itree, &pageflags_root); 456 p->itree.start = start; 457 interval_tree_insert(&p->itree, &pageflags_root); 458 } 459 if (p_last < last) { 460 start = p_last + 1; 461 goto restart; 462 } 463 goto done; 464 } 465 466 /* Maybe split out head and/or tail ranges with the original flags. */ 467 interval_tree_remove(&p->itree, &pageflags_root); 468 if (p_start < start) { 469 p->itree.last = start - 1; 470 interval_tree_insert(&p->itree, &pageflags_root); 471 472 if (p_last < last) { 473 goto restart; 474 } 475 if (last < p_last) { 476 pageflags_create(last + 1, p_last, p_flags); 477 } 478 } else if (last < p_last) { 479 p->itree.start = last + 1; 480 interval_tree_insert(&p->itree, &pageflags_root); 481 } else { 482 g_free_rcu(p, rcu); 483 goto restart; 484 } 485 if (set_flags) { 486 pageflags_create(start, last, set_flags); 487 } 488 489 done: 490 return inval_tb; 491 } 492 493 void page_set_flags(target_ulong start, target_ulong last, int flags) 494 { 495 bool reset = false; 496 bool inval_tb = false; 497 498 /* This function should never be called with addresses outside the 499 guest address space. If this assert fires, it probably indicates 500 a missing call to h2g_valid. */ 501 assert(start <= last); 502 assert(last <= GUEST_ADDR_MAX); 503 /* Only set PAGE_ANON with new mappings. */ 504 assert(!(flags & PAGE_ANON) || (flags & PAGE_RESET)); 505 assert_memory_lock(); 506 507 start &= TARGET_PAGE_MASK; 508 last |= ~TARGET_PAGE_MASK; 509 510 if (!(flags & PAGE_VALID)) { 511 flags = 0; 512 } else { 513 reset = flags & PAGE_RESET; 514 flags &= ~PAGE_RESET; 515 if (flags & PAGE_WRITE) { 516 flags |= PAGE_WRITE_ORG; 517 } 518 } 519 520 if (!flags || reset) { 521 page_reset_target_data(start, last); 522 inval_tb |= pageflags_unset(start, last); 523 } 524 if (flags) { 525 inval_tb |= pageflags_set_clear(start, last, flags, 526 ~(reset ? 0 : PAGE_STICKY)); 527 } 528 if (inval_tb) { 529 tb_invalidate_phys_range(start, last); 530 } 531 } 532 533 bool page_check_range(target_ulong start, target_ulong len, int flags) 534 { 535 target_ulong last; 536 int locked; /* tri-state: =0: unlocked, +1: global, -1: local */ 537 bool ret; 538 539 if (len == 0) { 540 return true; /* trivial length */ 541 } 542 543 last = start + len - 1; 544 if (last < start) { 545 return false; /* wrap around */ 546 } 547 548 locked = have_mmap_lock(); 549 while (true) { 550 PageFlagsNode *p = pageflags_find(start, last); 551 int missing; 552 553 if (!p) { 554 if (!locked) { 555 /* 556 * Lockless lookups have false negatives. 557 * Retry with the lock held. 558 */ 559 mmap_lock(); 560 locked = -1; 561 p = pageflags_find(start, last); 562 } 563 if (!p) { 564 ret = false; /* entire region invalid */ 565 break; 566 } 567 } 568 if (start < p->itree.start) { 569 ret = false; /* initial bytes invalid */ 570 break; 571 } 572 573 missing = flags & ~p->flags; 574 if (missing & ~PAGE_WRITE) { 575 ret = false; /* page doesn't match */ 576 break; 577 } 578 if (missing & PAGE_WRITE) { 579 if (!(p->flags & PAGE_WRITE_ORG)) { 580 ret = false; /* page not writable */ 581 break; 582 } 583 /* Asking about writable, but has been protected: undo. */ 584 if (!page_unprotect(start, 0)) { 585 ret = false; 586 break; 587 } 588 /* TODO: page_unprotect should take a range, not a single page. */ 589 if (last - start < TARGET_PAGE_SIZE) { 590 ret = true; /* ok */ 591 break; 592 } 593 start += TARGET_PAGE_SIZE; 594 continue; 595 } 596 597 if (last <= p->itree.last) { 598 ret = true; /* ok */ 599 break; 600 } 601 start = p->itree.last + 1; 602 } 603 604 /* Release the lock if acquired locally. */ 605 if (locked < 0) { 606 mmap_unlock(); 607 } 608 return ret; 609 } 610 611 bool page_check_range_empty(target_ulong start, target_ulong last) 612 { 613 assert(last >= start); 614 assert_memory_lock(); 615 return pageflags_find(start, last) == NULL; 616 } 617 618 target_ulong page_find_range_empty(target_ulong min, target_ulong max, 619 target_ulong len, target_ulong align) 620 { 621 target_ulong len_m1, align_m1; 622 623 assert(min <= max); 624 assert(max <= GUEST_ADDR_MAX); 625 assert(len != 0); 626 assert(is_power_of_2(align)); 627 assert_memory_lock(); 628 629 len_m1 = len - 1; 630 align_m1 = align - 1; 631 632 /* Iteratively narrow the search region. */ 633 while (1) { 634 PageFlagsNode *p; 635 636 /* Align min and double-check there's enough space remaining. */ 637 min = (min + align_m1) & ~align_m1; 638 if (min > max) { 639 return -1; 640 } 641 if (len_m1 > max - min) { 642 return -1; 643 } 644 645 p = pageflags_find(min, min + len_m1); 646 if (p == NULL) { 647 /* Found! */ 648 return min; 649 } 650 if (max <= p->itree.last) { 651 /* Existing allocation fills the remainder of the search region. */ 652 return -1; 653 } 654 /* Skip across existing allocation. */ 655 min = p->itree.last + 1; 656 } 657 } 658 659 void page_protect(tb_page_addr_t address) 660 { 661 PageFlagsNode *p; 662 target_ulong start, last; 663 int host_page_size = qemu_real_host_page_size(); 664 int prot; 665 666 assert_memory_lock(); 667 668 if (host_page_size <= TARGET_PAGE_SIZE) { 669 start = address & TARGET_PAGE_MASK; 670 last = start + TARGET_PAGE_SIZE - 1; 671 } else { 672 start = address & -host_page_size; 673 last = start + host_page_size - 1; 674 } 675 676 p = pageflags_find(start, last); 677 if (!p) { 678 return; 679 } 680 prot = p->flags; 681 682 if (unlikely(p->itree.last < last)) { 683 /* More than one protection region covers the one host page. */ 684 assert(TARGET_PAGE_SIZE < host_page_size); 685 while ((p = pageflags_next(p, start, last)) != NULL) { 686 prot |= p->flags; 687 } 688 } 689 690 if (prot & PAGE_WRITE) { 691 pageflags_set_clear(start, last, 0, PAGE_WRITE); 692 mprotect(g2h_untagged(start), last - start + 1, 693 prot & (PAGE_READ | PAGE_EXEC) ? PROT_READ : PROT_NONE); 694 } 695 } 696 697 /* 698 * Called from signal handler: invalidate the code and unprotect the 699 * page. Return 0 if the fault was not handled, 1 if it was handled, 700 * and 2 if it was handled but the caller must cause the TB to be 701 * immediately exited. (We can only return 2 if the 'pc' argument is 702 * non-zero.) 703 */ 704 int page_unprotect(tb_page_addr_t address, uintptr_t pc) 705 { 706 PageFlagsNode *p; 707 bool current_tb_invalidated; 708 709 /* 710 * Technically this isn't safe inside a signal handler. However we 711 * know this only ever happens in a synchronous SEGV handler, so in 712 * practice it seems to be ok. 713 */ 714 mmap_lock(); 715 716 p = pageflags_find(address, address); 717 718 /* If this address was not really writable, nothing to do. */ 719 if (!p || !(p->flags & PAGE_WRITE_ORG)) { 720 mmap_unlock(); 721 return 0; 722 } 723 724 current_tb_invalidated = false; 725 if (p->flags & PAGE_WRITE) { 726 /* 727 * If the page is actually marked WRITE then assume this is because 728 * this thread raced with another one which got here first and 729 * set the page to PAGE_WRITE and did the TB invalidate for us. 730 */ 731 #ifdef TARGET_HAS_PRECISE_SMC 732 TranslationBlock *current_tb = tcg_tb_lookup(pc); 733 if (current_tb) { 734 current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID; 735 } 736 #endif 737 } else { 738 int host_page_size = qemu_real_host_page_size(); 739 target_ulong start, len, i; 740 int prot; 741 742 if (host_page_size <= TARGET_PAGE_SIZE) { 743 start = address & TARGET_PAGE_MASK; 744 len = TARGET_PAGE_SIZE; 745 prot = p->flags | PAGE_WRITE; 746 pageflags_set_clear(start, start + len - 1, PAGE_WRITE, 0); 747 current_tb_invalidated = tb_invalidate_phys_page_unwind(start, pc); 748 } else { 749 start = address & -host_page_size; 750 len = host_page_size; 751 prot = 0; 752 753 for (i = 0; i < len; i += TARGET_PAGE_SIZE) { 754 target_ulong addr = start + i; 755 756 p = pageflags_find(addr, addr); 757 if (p) { 758 prot |= p->flags; 759 if (p->flags & PAGE_WRITE_ORG) { 760 prot |= PAGE_WRITE; 761 pageflags_set_clear(addr, addr + TARGET_PAGE_SIZE - 1, 762 PAGE_WRITE, 0); 763 } 764 } 765 /* 766 * Since the content will be modified, we must invalidate 767 * the corresponding translated code. 768 */ 769 current_tb_invalidated |= 770 tb_invalidate_phys_page_unwind(addr, pc); 771 } 772 } 773 if (prot & PAGE_EXEC) { 774 prot = (prot & ~PAGE_EXEC) | PAGE_READ; 775 } 776 mprotect((void *)g2h_untagged(start), len, prot & PAGE_RWX); 777 } 778 mmap_unlock(); 779 780 /* If current TB was invalidated return to main loop */ 781 return current_tb_invalidated ? 2 : 1; 782 } 783 784 static int probe_access_internal(CPUArchState *env, vaddr addr, 785 int fault_size, MMUAccessType access_type, 786 bool nonfault, uintptr_t ra) 787 { 788 int acc_flag; 789 bool maperr; 790 791 switch (access_type) { 792 case MMU_DATA_STORE: 793 acc_flag = PAGE_WRITE_ORG; 794 break; 795 case MMU_DATA_LOAD: 796 acc_flag = PAGE_READ; 797 break; 798 case MMU_INST_FETCH: 799 acc_flag = PAGE_EXEC; 800 break; 801 default: 802 g_assert_not_reached(); 803 } 804 805 if (guest_addr_valid_untagged(addr)) { 806 int page_flags = page_get_flags(addr); 807 if (page_flags & acc_flag) { 808 if (access_type != MMU_INST_FETCH 809 && cpu_plugin_mem_cbs_enabled(env_cpu(env))) { 810 return TLB_MMIO; 811 } 812 return 0; /* success */ 813 } 814 maperr = !(page_flags & PAGE_VALID); 815 } else { 816 maperr = true; 817 } 818 819 if (nonfault) { 820 return TLB_INVALID_MASK; 821 } 822 823 cpu_loop_exit_sigsegv(env_cpu(env), addr, access_type, maperr, ra); 824 } 825 826 int probe_access_flags(CPUArchState *env, vaddr addr, int size, 827 MMUAccessType access_type, int mmu_idx, 828 bool nonfault, void **phost, uintptr_t ra) 829 { 830 int flags; 831 832 g_assert(-(addr | TARGET_PAGE_MASK) >= size); 833 flags = probe_access_internal(env, addr, size, access_type, nonfault, ra); 834 *phost = (flags & TLB_INVALID_MASK) ? NULL : g2h(env_cpu(env), addr); 835 return flags; 836 } 837 838 void *probe_access(CPUArchState *env, vaddr addr, int size, 839 MMUAccessType access_type, int mmu_idx, uintptr_t ra) 840 { 841 int flags; 842 843 g_assert(-(addr | TARGET_PAGE_MASK) >= size); 844 flags = probe_access_internal(env, addr, size, access_type, false, ra); 845 g_assert((flags & ~TLB_MMIO) == 0); 846 847 return size ? g2h(env_cpu(env), addr) : NULL; 848 } 849 850 tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, vaddr addr, 851 void **hostp) 852 { 853 int flags; 854 855 flags = probe_access_internal(env, addr, 1, MMU_INST_FETCH, false, 0); 856 g_assert(flags == 0); 857 858 if (hostp) { 859 *hostp = g2h_untagged(addr); 860 } 861 return addr; 862 } 863 864 #ifdef TARGET_PAGE_DATA_SIZE 865 /* 866 * Allocate chunks of target data together. For the only current user, 867 * if we allocate one hunk per page, we have overhead of 40/128 or 40%. 868 * Therefore, allocate memory for 64 pages at a time for overhead < 1%. 869 */ 870 #define TPD_PAGES 64 871 #define TBD_MASK (TARGET_PAGE_MASK * TPD_PAGES) 872 873 typedef struct TargetPageDataNode { 874 struct rcu_head rcu; 875 IntervalTreeNode itree; 876 char data[] __attribute__((aligned)); 877 } TargetPageDataNode; 878 879 static IntervalTreeRoot targetdata_root; 880 881 void page_reset_target_data(target_ulong start, target_ulong last) 882 { 883 IntervalTreeNode *n, *next; 884 885 assert_memory_lock(); 886 887 start &= TARGET_PAGE_MASK; 888 last |= ~TARGET_PAGE_MASK; 889 890 for (n = interval_tree_iter_first(&targetdata_root, start, last), 891 next = n ? interval_tree_iter_next(n, start, last) : NULL; 892 n != NULL; 893 n = next, 894 next = next ? interval_tree_iter_next(n, start, last) : NULL) { 895 target_ulong n_start, n_last, p_ofs, p_len; 896 TargetPageDataNode *t = container_of(n, TargetPageDataNode, itree); 897 898 if (n->start >= start && n->last <= last) { 899 interval_tree_remove(n, &targetdata_root); 900 g_free_rcu(t, rcu); 901 continue; 902 } 903 904 if (n->start < start) { 905 n_start = start; 906 p_ofs = (start - n->start) >> TARGET_PAGE_BITS; 907 } else { 908 n_start = n->start; 909 p_ofs = 0; 910 } 911 n_last = MIN(last, n->last); 912 p_len = (n_last + 1 - n_start) >> TARGET_PAGE_BITS; 913 914 memset(t->data + p_ofs * TARGET_PAGE_DATA_SIZE, 0, 915 p_len * TARGET_PAGE_DATA_SIZE); 916 } 917 } 918 919 void *page_get_target_data(target_ulong address) 920 { 921 IntervalTreeNode *n; 922 TargetPageDataNode *t; 923 target_ulong page, region, p_ofs; 924 925 page = address & TARGET_PAGE_MASK; 926 region = address & TBD_MASK; 927 928 n = interval_tree_iter_first(&targetdata_root, page, page); 929 if (!n) { 930 /* 931 * See util/interval-tree.c re lockless lookups: no false positives 932 * but there are false negatives. If we find nothing, retry with 933 * the mmap lock acquired. We also need the lock for the 934 * allocation + insert. 935 */ 936 mmap_lock(); 937 n = interval_tree_iter_first(&targetdata_root, page, page); 938 if (!n) { 939 t = g_malloc0(sizeof(TargetPageDataNode) 940 + TPD_PAGES * TARGET_PAGE_DATA_SIZE); 941 n = &t->itree; 942 n->start = region; 943 n->last = region | ~TBD_MASK; 944 interval_tree_insert(n, &targetdata_root); 945 } 946 mmap_unlock(); 947 } 948 949 t = container_of(n, TargetPageDataNode, itree); 950 p_ofs = (page - region) >> TARGET_PAGE_BITS; 951 return t->data + p_ofs * TARGET_PAGE_DATA_SIZE; 952 } 953 #else 954 void page_reset_target_data(target_ulong start, target_ulong last) { } 955 #endif /* TARGET_PAGE_DATA_SIZE */ 956 957 /* The system-mode versions of these helpers are in cputlb.c. */ 958 959 static void *cpu_mmu_lookup(CPUState *cpu, vaddr addr, 960 MemOp mop, uintptr_t ra, MMUAccessType type) 961 { 962 int a_bits = memop_alignment_bits(mop); 963 void *ret; 964 965 /* Enforce guest required alignment. */ 966 if (unlikely(addr & ((1 << a_bits) - 1))) { 967 cpu_loop_exit_sigbus(cpu, addr, type, ra); 968 } 969 970 ret = g2h(cpu, addr); 971 set_helper_retaddr(ra); 972 return ret; 973 } 974 975 /* physical memory access (slow version, mainly for debug) */ 976 int cpu_memory_rw_debug(CPUState *cpu, vaddr addr, 977 void *ptr, size_t len, bool is_write) 978 { 979 int flags; 980 vaddr l, page; 981 uint8_t *buf = ptr; 982 ssize_t written; 983 int ret = -1; 984 int fd = -1; 985 986 mmap_lock(); 987 988 while (len > 0) { 989 page = addr & TARGET_PAGE_MASK; 990 l = (page + TARGET_PAGE_SIZE) - addr; 991 if (l > len) { 992 l = len; 993 } 994 flags = page_get_flags(page); 995 if (!(flags & PAGE_VALID)) { 996 goto out_close; 997 } 998 if (is_write) { 999 if (flags & PAGE_WRITE) { 1000 memcpy(g2h(cpu, addr), buf, l); 1001 } else { 1002 /* Bypass the host page protection using ptrace. */ 1003 if (fd == -1) { 1004 fd = open("/proc/self/mem", O_WRONLY); 1005 if (fd == -1) { 1006 goto out; 1007 } 1008 } 1009 /* 1010 * If there is a TranslationBlock and we weren't bypassing the 1011 * host page protection, the memcpy() above would SEGV, 1012 * ultimately leading to page_unprotect(). So invalidate the 1013 * translations manually. Both invalidation and pwrite() must 1014 * be under mmap_lock() in order to prevent the creation of 1015 * another TranslationBlock in between. 1016 */ 1017 tb_invalidate_phys_range(addr, addr + l - 1); 1018 written = pwrite(fd, buf, l, 1019 (off_t)(uintptr_t)g2h_untagged(addr)); 1020 if (written != l) { 1021 goto out_close; 1022 } 1023 } 1024 } else if (flags & PAGE_READ) { 1025 memcpy(buf, g2h(cpu, addr), l); 1026 } else { 1027 /* Bypass the host page protection using ptrace. */ 1028 if (fd == -1) { 1029 fd = open("/proc/self/mem", O_RDONLY); 1030 if (fd == -1) { 1031 goto out; 1032 } 1033 } 1034 if (pread(fd, buf, l, 1035 (off_t)(uintptr_t)g2h_untagged(addr)) != l) { 1036 goto out_close; 1037 } 1038 } 1039 len -= l; 1040 buf += l; 1041 addr += l; 1042 } 1043 ret = 0; 1044 out_close: 1045 if (fd != -1) { 1046 close(fd); 1047 } 1048 out: 1049 mmap_unlock(); 1050 1051 return ret; 1052 } 1053 1054 #include "ldst_atomicity.c.inc" 1055 1056 static uint8_t do_ld1_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi, 1057 uintptr_t ra, MMUAccessType access_type) 1058 { 1059 void *haddr; 1060 uint8_t ret; 1061 1062 cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); 1063 haddr = cpu_mmu_lookup(cpu, addr, get_memop(oi), ra, access_type); 1064 ret = ldub_p(haddr); 1065 clear_helper_retaddr(); 1066 return ret; 1067 } 1068 1069 static uint16_t do_ld2_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi, 1070 uintptr_t ra, MMUAccessType access_type) 1071 { 1072 void *haddr; 1073 uint16_t ret; 1074 MemOp mop = get_memop(oi); 1075 1076 cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); 1077 haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type); 1078 ret = load_atom_2(cpu, ra, haddr, mop); 1079 clear_helper_retaddr(); 1080 1081 if (mop & MO_BSWAP) { 1082 ret = bswap16(ret); 1083 } 1084 return ret; 1085 } 1086 1087 static uint32_t do_ld4_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi, 1088 uintptr_t ra, MMUAccessType access_type) 1089 { 1090 void *haddr; 1091 uint32_t ret; 1092 MemOp mop = get_memop(oi); 1093 1094 cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); 1095 haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type); 1096 ret = load_atom_4(cpu, ra, haddr, mop); 1097 clear_helper_retaddr(); 1098 1099 if (mop & MO_BSWAP) { 1100 ret = bswap32(ret); 1101 } 1102 return ret; 1103 } 1104 1105 static uint64_t do_ld8_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi, 1106 uintptr_t ra, MMUAccessType access_type) 1107 { 1108 void *haddr; 1109 uint64_t ret; 1110 MemOp mop = get_memop(oi); 1111 1112 cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); 1113 haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type); 1114 ret = load_atom_8(cpu, ra, haddr, mop); 1115 clear_helper_retaddr(); 1116 1117 if (mop & MO_BSWAP) { 1118 ret = bswap64(ret); 1119 } 1120 return ret; 1121 } 1122 1123 static Int128 do_ld16_mmu(CPUState *cpu, abi_ptr addr, 1124 MemOpIdx oi, uintptr_t ra) 1125 { 1126 void *haddr; 1127 Int128 ret; 1128 MemOp mop = get_memop(oi); 1129 1130 tcg_debug_assert((mop & MO_SIZE) == MO_128); 1131 cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); 1132 haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_LOAD); 1133 ret = load_atom_16(cpu, ra, haddr, mop); 1134 clear_helper_retaddr(); 1135 1136 if (mop & MO_BSWAP) { 1137 ret = bswap128(ret); 1138 } 1139 return ret; 1140 } 1141 1142 static void do_st1_mmu(CPUState *cpu, vaddr addr, uint8_t val, 1143 MemOpIdx oi, uintptr_t ra) 1144 { 1145 void *haddr; 1146 1147 cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); 1148 haddr = cpu_mmu_lookup(cpu, addr, get_memop(oi), ra, MMU_DATA_STORE); 1149 stb_p(haddr, val); 1150 clear_helper_retaddr(); 1151 } 1152 1153 static void do_st2_mmu(CPUState *cpu, vaddr addr, uint16_t val, 1154 MemOpIdx oi, uintptr_t ra) 1155 { 1156 void *haddr; 1157 MemOp mop = get_memop(oi); 1158 1159 cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); 1160 haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE); 1161 1162 if (mop & MO_BSWAP) { 1163 val = bswap16(val); 1164 } 1165 store_atom_2(cpu, ra, haddr, mop, val); 1166 clear_helper_retaddr(); 1167 } 1168 1169 static void do_st4_mmu(CPUState *cpu, vaddr addr, uint32_t val, 1170 MemOpIdx oi, uintptr_t ra) 1171 { 1172 void *haddr; 1173 MemOp mop = get_memop(oi); 1174 1175 cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); 1176 haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE); 1177 1178 if (mop & MO_BSWAP) { 1179 val = bswap32(val); 1180 } 1181 store_atom_4(cpu, ra, haddr, mop, val); 1182 clear_helper_retaddr(); 1183 } 1184 1185 static void do_st8_mmu(CPUState *cpu, vaddr addr, uint64_t val, 1186 MemOpIdx oi, uintptr_t ra) 1187 { 1188 void *haddr; 1189 MemOp mop = get_memop(oi); 1190 1191 cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); 1192 haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE); 1193 1194 if (mop & MO_BSWAP) { 1195 val = bswap64(val); 1196 } 1197 store_atom_8(cpu, ra, haddr, mop, val); 1198 clear_helper_retaddr(); 1199 } 1200 1201 static void do_st16_mmu(CPUState *cpu, vaddr addr, Int128 val, 1202 MemOpIdx oi, uintptr_t ra) 1203 { 1204 void *haddr; 1205 MemOpIdx mop = get_memop(oi); 1206 1207 cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); 1208 haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE); 1209 1210 if (mop & MO_BSWAP) { 1211 val = bswap128(val); 1212 } 1213 store_atom_16(cpu, ra, haddr, mop, val); 1214 clear_helper_retaddr(); 1215 } 1216 1217 uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr ptr) 1218 { 1219 uint32_t ret; 1220 1221 set_helper_retaddr(1); 1222 ret = ldub_p(g2h_untagged(ptr)); 1223 clear_helper_retaddr(); 1224 return ret; 1225 } 1226 1227 uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr ptr) 1228 { 1229 uint32_t ret; 1230 1231 set_helper_retaddr(1); 1232 ret = lduw_p(g2h_untagged(ptr)); 1233 clear_helper_retaddr(); 1234 return ret; 1235 } 1236 1237 uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr ptr) 1238 { 1239 uint32_t ret; 1240 1241 set_helper_retaddr(1); 1242 ret = ldl_p(g2h_untagged(ptr)); 1243 clear_helper_retaddr(); 1244 return ret; 1245 } 1246 1247 uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr ptr) 1248 { 1249 uint64_t ret; 1250 1251 set_helper_retaddr(1); 1252 ret = ldq_p(g2h_untagged(ptr)); 1253 clear_helper_retaddr(); 1254 return ret; 1255 } 1256 1257 uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr, 1258 MemOpIdx oi, uintptr_t ra) 1259 { 1260 void *haddr; 1261 uint8_t ret; 1262 1263 haddr = cpu_mmu_lookup(env_cpu(env), addr, oi, ra, MMU_INST_FETCH); 1264 ret = ldub_p(haddr); 1265 clear_helper_retaddr(); 1266 return ret; 1267 } 1268 1269 uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr, 1270 MemOpIdx oi, uintptr_t ra) 1271 { 1272 void *haddr; 1273 uint16_t ret; 1274 1275 haddr = cpu_mmu_lookup(env_cpu(env), addr, oi, ra, MMU_INST_FETCH); 1276 ret = lduw_p(haddr); 1277 clear_helper_retaddr(); 1278 if (get_memop(oi) & MO_BSWAP) { 1279 ret = bswap16(ret); 1280 } 1281 return ret; 1282 } 1283 1284 uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr, 1285 MemOpIdx oi, uintptr_t ra) 1286 { 1287 void *haddr; 1288 uint32_t ret; 1289 1290 haddr = cpu_mmu_lookup(env_cpu(env), addr, oi, ra, MMU_INST_FETCH); 1291 ret = ldl_p(haddr); 1292 clear_helper_retaddr(); 1293 if (get_memop(oi) & MO_BSWAP) { 1294 ret = bswap32(ret); 1295 } 1296 return ret; 1297 } 1298 1299 uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr, 1300 MemOpIdx oi, uintptr_t ra) 1301 { 1302 void *haddr; 1303 uint64_t ret; 1304 1305 haddr = cpu_mmu_lookup(env_cpu(env), addr, oi, ra, MMU_DATA_LOAD); 1306 ret = ldq_p(haddr); 1307 clear_helper_retaddr(); 1308 if (get_memop(oi) & MO_BSWAP) { 1309 ret = bswap64(ret); 1310 } 1311 return ret; 1312 } 1313 1314 #include "ldst_common.c.inc" 1315 1316 /* 1317 * Do not allow unaligned operations to proceed. Return the host address. 1318 */ 1319 static void *atomic_mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi, 1320 int size, uintptr_t retaddr) 1321 { 1322 MemOp mop = get_memop(oi); 1323 int a_bits = memop_alignment_bits(mop); 1324 void *ret; 1325 1326 /* Enforce guest required alignment. */ 1327 if (unlikely(addr & ((1 << a_bits) - 1))) { 1328 cpu_loop_exit_sigbus(cpu, addr, MMU_DATA_STORE, retaddr); 1329 } 1330 1331 /* Enforce qemu required alignment. */ 1332 if (unlikely(addr & (size - 1))) { 1333 cpu_loop_exit_atomic(cpu, retaddr); 1334 } 1335 1336 ret = g2h(cpu, addr); 1337 set_helper_retaddr(retaddr); 1338 return ret; 1339 } 1340 1341 #include "atomic_common.c.inc" 1342 1343 /* 1344 * First set of functions passes in OI and RETADDR. 1345 * This makes them callable from other helpers. 1346 */ 1347 1348 #define ATOMIC_NAME(X) \ 1349 glue(glue(glue(cpu_atomic_ ## X, SUFFIX), END), _mmu) 1350 #define ATOMIC_MMU_CLEANUP do { clear_helper_retaddr(); } while (0) 1351 1352 #define DATA_SIZE 1 1353 #include "atomic_template.h" 1354 1355 #define DATA_SIZE 2 1356 #include "atomic_template.h" 1357 1358 #define DATA_SIZE 4 1359 #include "atomic_template.h" 1360 1361 #ifdef CONFIG_ATOMIC64 1362 #define DATA_SIZE 8 1363 #include "atomic_template.h" 1364 #endif 1365 1366 #if defined(CONFIG_ATOMIC128) || HAVE_CMPXCHG128 1367 #define DATA_SIZE 16 1368 #include "atomic_template.h" 1369 #endif 1370