1 /*
2 * User emulator execution
3 *
4 * Copyright (c) 2003-2005 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "qemu/osdep.h"
20 #include "accel/tcg/cpu-ops.h"
21 #include "disas/disas.h"
22 #include "exec/vaddr.h"
23 #include "exec/tlb-flags.h"
24 #include "tcg/tcg.h"
25 #include "qemu/bitops.h"
26 #include "qemu/rcu.h"
27 #include "accel/tcg/cpu-ldst-common.h"
28 #include "accel/tcg/helper-retaddr.h"
29 #include "accel/tcg/probe.h"
30 #include "user/cpu_loop.h"
31 #include "user/guest-host.h"
32 #include "qemu/main-loop.h"
33 #include "user/page-protection.h"
34 #include "exec/page-protection.h"
35 #include "exec/helper-proto-common.h"
36 #include "qemu/atomic128.h"
37 #include "qemu/bswap.h"
38 #include "qemu/int128.h"
39 #include "trace.h"
40 #include "tcg/tcg-ldst.h"
41 #include "backend-ldst.h"
42 #include "internal-common.h"
43 #include "tb-internal.h"
44
45 __thread uintptr_t helper_retaddr;
46
47 //#define DEBUG_SIGNAL
48
cpu_interrupt(CPUState * cpu,int mask)49 void cpu_interrupt(CPUState *cpu, int mask)
50 {
51 g_assert(bql_locked());
52 cpu->interrupt_request |= mask;
53 qatomic_set(&cpu->neg.icount_decr.u16.high, -1);
54 }
55
56 /*
57 * Adjust the pc to pass to cpu_restore_state; return the memop type.
58 */
adjust_signal_pc(uintptr_t * pc,bool is_write)59 MMUAccessType adjust_signal_pc(uintptr_t *pc, bool is_write)
60 {
61 switch (helper_retaddr) {
62 default:
63 /*
64 * Fault during host memory operation within a helper function.
65 * The helper's host return address, saved here, gives us a
66 * pointer into the generated code that will unwind to the
67 * correct guest pc.
68 */
69 *pc = helper_retaddr;
70 break;
71
72 case 0:
73 /*
74 * Fault during host memory operation within generated code.
75 * (Or, a unrelated bug within qemu, but we can't tell from here).
76 *
77 * We take the host pc from the signal frame. However, we cannot
78 * use that value directly. Within cpu_restore_state_from_tb, we
79 * assume PC comes from GETPC(), as used by the helper functions,
80 * so we adjust the address by -GETPC_ADJ to form an address that
81 * is within the call insn, so that the address does not accidentally
82 * match the beginning of the next guest insn. However, when the
83 * pc comes from the signal frame it points to the actual faulting
84 * host memory insn and not the return from a call insn.
85 *
86 * Therefore, adjust to compensate for what will be done later
87 * by cpu_restore_state_from_tb.
88 */
89 *pc += GETPC_ADJ;
90 break;
91
92 case 1:
93 /*
94 * Fault during host read for translation, or loosely, "execution".
95 *
96 * The guest pc is already pointing to the start of the TB for which
97 * code is being generated. If the guest translator manages the
98 * page crossings correctly, this is exactly the correct address
99 * (and if the translator doesn't handle page boundaries correctly
100 * there's little we can do about that here). Therefore, do not
101 * trigger the unwinder.
102 */
103 *pc = 0;
104 return MMU_INST_FETCH;
105 }
106
107 return is_write ? MMU_DATA_STORE : MMU_DATA_LOAD;
108 }
109
110 /**
111 * handle_sigsegv_accerr_write:
112 * @cpu: the cpu context
113 * @old_set: the sigset_t from the signal ucontext_t
114 * @host_pc: the host pc, adjusted for the signal
115 * @guest_addr: the guest address of the fault
116 *
117 * Return true if the write fault has been handled, and should be re-tried.
118 *
119 * Note that it is important that we don't call page_unprotect() unless
120 * this is really a "write to nonwritable page" fault, because
121 * page_unprotect() assumes that if it is called for an access to
122 * a page that's writable this means we had two threads racing and
123 * another thread got there first and already made the page writable;
124 * so we will retry the access. If we were to call page_unprotect()
125 * for some other kind of fault that should really be passed to the
126 * guest, we'd end up in an infinite loop of retrying the faulting access.
127 */
handle_sigsegv_accerr_write(CPUState * cpu,sigset_t * old_set,uintptr_t host_pc,vaddr guest_addr)128 bool handle_sigsegv_accerr_write(CPUState *cpu, sigset_t *old_set,
129 uintptr_t host_pc, vaddr guest_addr)
130 {
131 switch (page_unprotect(cpu, guest_addr, host_pc)) {
132 case 0:
133 /*
134 * Fault not caused by a page marked unwritable to protect
135 * cached translations, must be the guest binary's problem.
136 */
137 return false;
138 case 1:
139 /*
140 * Fault caused by protection of cached translation; TBs
141 * invalidated, so resume execution.
142 */
143 return true;
144 case 2:
145 /*
146 * Fault caused by protection of cached translation, and the
147 * currently executing TB was modified and must be exited immediately.
148 */
149 sigprocmask(SIG_SETMASK, old_set, NULL);
150 cpu_loop_exit_noexc(cpu);
151 /* NORETURN */
152 default:
153 g_assert_not_reached();
154 }
155 }
156
157 typedef struct PageFlagsNode {
158 struct rcu_head rcu;
159 IntervalTreeNode itree;
160 int flags;
161 } PageFlagsNode;
162
163 static IntervalTreeRoot pageflags_root;
164
pageflags_find(vaddr start,vaddr last)165 static PageFlagsNode *pageflags_find(vaddr start, vaddr last)
166 {
167 IntervalTreeNode *n;
168
169 n = interval_tree_iter_first(&pageflags_root, start, last);
170 return n ? container_of(n, PageFlagsNode, itree) : NULL;
171 }
172
pageflags_next(PageFlagsNode * p,vaddr start,vaddr last)173 static PageFlagsNode *pageflags_next(PageFlagsNode *p, vaddr start, vaddr last)
174 {
175 IntervalTreeNode *n;
176
177 n = interval_tree_iter_next(&p->itree, start, last);
178 return n ? container_of(n, PageFlagsNode, itree) : NULL;
179 }
180
walk_memory_regions(void * priv,walk_memory_regions_fn fn)181 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
182 {
183 IntervalTreeNode *n;
184 int rc = 0;
185
186 mmap_lock();
187 for (n = interval_tree_iter_first(&pageflags_root, 0, -1);
188 n != NULL;
189 n = interval_tree_iter_next(n, 0, -1)) {
190 PageFlagsNode *p = container_of(n, PageFlagsNode, itree);
191
192 rc = fn(priv, n->start, n->last + 1, p->flags);
193 if (rc != 0) {
194 break;
195 }
196 }
197 mmap_unlock();
198
199 return rc;
200 }
201
dump_region(void * opaque,vaddr start,vaddr end,int prot)202 static int dump_region(void *opaque, vaddr start, vaddr end, int prot)
203 {
204 FILE *f = opaque;
205 uint64_t mask;
206 int width;
207
208 if (guest_addr_max <= UINT32_MAX) {
209 mask = UINT32_MAX, width = 8;
210 } else {
211 mask = UINT64_MAX, width = 16;
212 }
213
214 fprintf(f, "%0*" PRIx64 "-%0*" PRIx64 " %0*" PRIx64 " %c%c%c\n",
215 width, start & mask,
216 width, end & mask,
217 width, (end - start) & mask,
218 ((prot & PAGE_READ) ? 'r' : '-'),
219 ((prot & PAGE_WRITE) ? 'w' : '-'),
220 ((prot & PAGE_EXEC) ? 'x' : '-'));
221 return 0;
222 }
223
224 /* dump memory mappings */
page_dump(FILE * f)225 void page_dump(FILE *f)
226 {
227 int width = guest_addr_max <= UINT32_MAX ? 8 : 16;
228
229 fprintf(f, "%-*s %-*s %-*s %s\n",
230 width, "start", width, "end", width, "size", "prot");
231 walk_memory_regions(f, dump_region);
232 }
233
page_get_flags(vaddr address)234 int page_get_flags(vaddr address)
235 {
236 PageFlagsNode *p = pageflags_find(address, address);
237
238 /*
239 * See util/interval-tree.c re lockless lookups: no false positives but
240 * there are false negatives. If we find nothing, retry with the mmap
241 * lock acquired.
242 */
243 if (p) {
244 return p->flags;
245 }
246 if (have_mmap_lock()) {
247 return 0;
248 }
249
250 mmap_lock();
251 p = pageflags_find(address, address);
252 mmap_unlock();
253 return p ? p->flags : 0;
254 }
255
256 /* A subroutine of page_set_flags: insert a new node for [start,last]. */
pageflags_create(vaddr start,vaddr last,int flags)257 static void pageflags_create(vaddr start, vaddr last, int flags)
258 {
259 PageFlagsNode *p = g_new(PageFlagsNode, 1);
260
261 p->itree.start = start;
262 p->itree.last = last;
263 p->flags = flags;
264 interval_tree_insert(&p->itree, &pageflags_root);
265 }
266
267 /* A subroutine of page_set_flags: remove everything in [start,last]. */
pageflags_unset(vaddr start,vaddr last)268 static bool pageflags_unset(vaddr start, vaddr last)
269 {
270 bool inval_tb = false;
271
272 while (true) {
273 PageFlagsNode *p = pageflags_find(start, last);
274 vaddr p_last;
275
276 if (!p) {
277 break;
278 }
279
280 if (p->flags & PAGE_EXEC) {
281 inval_tb = true;
282 }
283
284 interval_tree_remove(&p->itree, &pageflags_root);
285 p_last = p->itree.last;
286
287 if (p->itree.start < start) {
288 /* Truncate the node from the end, or split out the middle. */
289 p->itree.last = start - 1;
290 interval_tree_insert(&p->itree, &pageflags_root);
291 if (last < p_last) {
292 pageflags_create(last + 1, p_last, p->flags);
293 break;
294 }
295 } else if (p_last <= last) {
296 /* Range completely covers node -- remove it. */
297 g_free_rcu(p, rcu);
298 } else {
299 /* Truncate the node from the start. */
300 p->itree.start = last + 1;
301 interval_tree_insert(&p->itree, &pageflags_root);
302 break;
303 }
304 }
305
306 return inval_tb;
307 }
308
309 /*
310 * A subroutine of page_set_flags: nothing overlaps [start,last],
311 * but check adjacent mappings and maybe merge into a single range.
312 */
pageflags_create_merge(vaddr start,vaddr last,int flags)313 static void pageflags_create_merge(vaddr start, vaddr last, int flags)
314 {
315 PageFlagsNode *next = NULL, *prev = NULL;
316
317 if (start > 0) {
318 prev = pageflags_find(start - 1, start - 1);
319 if (prev) {
320 if (prev->flags == flags) {
321 interval_tree_remove(&prev->itree, &pageflags_root);
322 } else {
323 prev = NULL;
324 }
325 }
326 }
327 if (last + 1 != 0) {
328 next = pageflags_find(last + 1, last + 1);
329 if (next) {
330 if (next->flags == flags) {
331 interval_tree_remove(&next->itree, &pageflags_root);
332 } else {
333 next = NULL;
334 }
335 }
336 }
337
338 if (prev) {
339 if (next) {
340 prev->itree.last = next->itree.last;
341 g_free_rcu(next, rcu);
342 } else {
343 prev->itree.last = last;
344 }
345 interval_tree_insert(&prev->itree, &pageflags_root);
346 } else if (next) {
347 next->itree.start = start;
348 interval_tree_insert(&next->itree, &pageflags_root);
349 } else {
350 pageflags_create(start, last, flags);
351 }
352 }
353
354 /*
355 * Allow the target to decide if PAGE_TARGET_[12] may be reset.
356 * By default, they are not kept.
357 */
358 #ifndef PAGE_TARGET_STICKY
359 #define PAGE_TARGET_STICKY 0
360 #endif
361 #define PAGE_STICKY (PAGE_ANON | PAGE_PASSTHROUGH | PAGE_TARGET_STICKY)
362
363 /* A subroutine of page_set_flags: add flags to [start,last]. */
pageflags_set_clear(vaddr start,vaddr last,int set_flags,int clear_flags)364 static bool pageflags_set_clear(vaddr start, vaddr last,
365 int set_flags, int clear_flags)
366 {
367 PageFlagsNode *p;
368 vaddr p_start, p_last;
369 int p_flags, merge_flags;
370 bool inval_tb = false;
371
372 restart:
373 p = pageflags_find(start, last);
374 if (!p) {
375 if (set_flags) {
376 pageflags_create_merge(start, last, set_flags);
377 }
378 goto done;
379 }
380
381 p_start = p->itree.start;
382 p_last = p->itree.last;
383 p_flags = p->flags;
384 /* Using mprotect on a page does not change sticky bits. */
385 merge_flags = (p_flags & ~clear_flags) | set_flags;
386
387 /*
388 * Need to flush if an overlapping executable region
389 * removes exec, or adds write.
390 */
391 if ((p_flags & PAGE_EXEC)
392 && (!(merge_flags & PAGE_EXEC)
393 || (merge_flags & ~p_flags & PAGE_WRITE))) {
394 inval_tb = true;
395 }
396
397 /*
398 * If there is an exact range match, update and return without
399 * attempting to merge with adjacent regions.
400 */
401 if (start == p_start && last == p_last) {
402 if (merge_flags) {
403 p->flags = merge_flags;
404 } else {
405 interval_tree_remove(&p->itree, &pageflags_root);
406 g_free_rcu(p, rcu);
407 }
408 goto done;
409 }
410
411 /*
412 * If sticky bits affect the original mapping, then we must be more
413 * careful about the existing intervals and the separate flags.
414 */
415 if (set_flags != merge_flags) {
416 if (p_start < start) {
417 interval_tree_remove(&p->itree, &pageflags_root);
418 p->itree.last = start - 1;
419 interval_tree_insert(&p->itree, &pageflags_root);
420
421 if (last < p_last) {
422 if (merge_flags) {
423 pageflags_create(start, last, merge_flags);
424 }
425 pageflags_create(last + 1, p_last, p_flags);
426 } else {
427 if (merge_flags) {
428 pageflags_create(start, p_last, merge_flags);
429 }
430 if (p_last < last) {
431 start = p_last + 1;
432 goto restart;
433 }
434 }
435 } else {
436 if (start < p_start && set_flags) {
437 pageflags_create(start, p_start - 1, set_flags);
438 }
439 if (last < p_last) {
440 interval_tree_remove(&p->itree, &pageflags_root);
441 p->itree.start = last + 1;
442 interval_tree_insert(&p->itree, &pageflags_root);
443 if (merge_flags) {
444 pageflags_create(start, last, merge_flags);
445 }
446 } else {
447 if (merge_flags) {
448 p->flags = merge_flags;
449 } else {
450 interval_tree_remove(&p->itree, &pageflags_root);
451 g_free_rcu(p, rcu);
452 }
453 if (p_last < last) {
454 start = p_last + 1;
455 goto restart;
456 }
457 }
458 }
459 goto done;
460 }
461
462 /* If flags are not changing for this range, incorporate it. */
463 if (set_flags == p_flags) {
464 if (start < p_start) {
465 interval_tree_remove(&p->itree, &pageflags_root);
466 p->itree.start = start;
467 interval_tree_insert(&p->itree, &pageflags_root);
468 }
469 if (p_last < last) {
470 start = p_last + 1;
471 goto restart;
472 }
473 goto done;
474 }
475
476 /* Maybe split out head and/or tail ranges with the original flags. */
477 interval_tree_remove(&p->itree, &pageflags_root);
478 if (p_start < start) {
479 p->itree.last = start - 1;
480 interval_tree_insert(&p->itree, &pageflags_root);
481
482 if (p_last < last) {
483 goto restart;
484 }
485 if (last < p_last) {
486 pageflags_create(last + 1, p_last, p_flags);
487 }
488 } else if (last < p_last) {
489 p->itree.start = last + 1;
490 interval_tree_insert(&p->itree, &pageflags_root);
491 } else {
492 g_free_rcu(p, rcu);
493 goto restart;
494 }
495 if (set_flags) {
496 pageflags_create(start, last, set_flags);
497 }
498
499 done:
500 return inval_tb;
501 }
502
page_set_flags(vaddr start,vaddr last,int flags)503 void page_set_flags(vaddr start, vaddr last, int flags)
504 {
505 bool reset = false;
506 bool inval_tb = false;
507
508 /* This function should never be called with addresses outside the
509 guest address space. If this assert fires, it probably indicates
510 a missing call to h2g_valid. */
511 assert(start <= last);
512 assert(last <= guest_addr_max);
513 /* Only set PAGE_ANON with new mappings. */
514 assert(!(flags & PAGE_ANON) || (flags & PAGE_RESET));
515 assert_memory_lock();
516
517 start &= TARGET_PAGE_MASK;
518 last |= ~TARGET_PAGE_MASK;
519
520 if (!(flags & PAGE_VALID)) {
521 flags = 0;
522 } else {
523 reset = flags & PAGE_RESET;
524 flags &= ~PAGE_RESET;
525 if (flags & PAGE_WRITE) {
526 flags |= PAGE_WRITE_ORG;
527 }
528 }
529
530 if (!flags || reset) {
531 page_reset_target_data(start, last);
532 inval_tb |= pageflags_unset(start, last);
533 }
534 if (flags) {
535 inval_tb |= pageflags_set_clear(start, last, flags,
536 ~(reset ? 0 : PAGE_STICKY));
537 }
538 if (inval_tb) {
539 tb_invalidate_phys_range(NULL, start, last);
540 }
541 }
542
page_check_range(vaddr start,vaddr len,int flags)543 bool page_check_range(vaddr start, vaddr len, int flags)
544 {
545 vaddr last;
546 int locked; /* tri-state: =0: unlocked, +1: global, -1: local */
547 bool ret;
548
549 if (len == 0) {
550 return true; /* trivial length */
551 }
552
553 last = start + len - 1;
554 if (last < start) {
555 return false; /* wrap around */
556 }
557
558 locked = have_mmap_lock();
559 while (true) {
560 PageFlagsNode *p = pageflags_find(start, last);
561 int missing;
562
563 if (!p) {
564 if (!locked) {
565 /*
566 * Lockless lookups have false negatives.
567 * Retry with the lock held.
568 */
569 mmap_lock();
570 locked = -1;
571 p = pageflags_find(start, last);
572 }
573 if (!p) {
574 ret = false; /* entire region invalid */
575 break;
576 }
577 }
578 if (start < p->itree.start) {
579 ret = false; /* initial bytes invalid */
580 break;
581 }
582
583 missing = flags & ~p->flags;
584 if (missing & ~PAGE_WRITE) {
585 ret = false; /* page doesn't match */
586 break;
587 }
588 if (missing & PAGE_WRITE) {
589 if (!(p->flags & PAGE_WRITE_ORG)) {
590 ret = false; /* page not writable */
591 break;
592 }
593 /* Asking about writable, but has been protected: undo. */
594 if (!page_unprotect(NULL, start, 0)) {
595 ret = false;
596 break;
597 }
598 /* TODO: page_unprotect should take a range, not a single page. */
599 if (last - start < TARGET_PAGE_SIZE) {
600 ret = true; /* ok */
601 break;
602 }
603 start += TARGET_PAGE_SIZE;
604 continue;
605 }
606
607 if (last <= p->itree.last) {
608 ret = true; /* ok */
609 break;
610 }
611 start = p->itree.last + 1;
612 }
613
614 /* Release the lock if acquired locally. */
615 if (locked < 0) {
616 mmap_unlock();
617 }
618 return ret;
619 }
620
page_check_range_empty(vaddr start,vaddr last)621 bool page_check_range_empty(vaddr start, vaddr last)
622 {
623 assert(last >= start);
624 assert_memory_lock();
625 return pageflags_find(start, last) == NULL;
626 }
627
page_find_range_empty(vaddr min,vaddr max,vaddr len,vaddr align)628 vaddr page_find_range_empty(vaddr min, vaddr max, vaddr len, vaddr align)
629 {
630 vaddr len_m1, align_m1;
631
632 assert(min <= max);
633 assert(max <= guest_addr_max);
634 assert(len != 0);
635 assert(is_power_of_2(align));
636 assert_memory_lock();
637
638 len_m1 = len - 1;
639 align_m1 = align - 1;
640
641 /* Iteratively narrow the search region. */
642 while (1) {
643 PageFlagsNode *p;
644
645 /* Align min and double-check there's enough space remaining. */
646 min = (min + align_m1) & ~align_m1;
647 if (min > max) {
648 return -1;
649 }
650 if (len_m1 > max - min) {
651 return -1;
652 }
653
654 p = pageflags_find(min, min + len_m1);
655 if (p == NULL) {
656 /* Found! */
657 return min;
658 }
659 if (max <= p->itree.last) {
660 /* Existing allocation fills the remainder of the search region. */
661 return -1;
662 }
663 /* Skip across existing allocation. */
664 min = p->itree.last + 1;
665 }
666 }
667
tb_lock_page0(tb_page_addr_t address)668 void tb_lock_page0(tb_page_addr_t address)
669 {
670 PageFlagsNode *p;
671 vaddr start, last;
672 int host_page_size = qemu_real_host_page_size();
673 int prot;
674
675 assert_memory_lock();
676
677 if (host_page_size <= TARGET_PAGE_SIZE) {
678 start = address & TARGET_PAGE_MASK;
679 last = start + TARGET_PAGE_SIZE - 1;
680 } else {
681 start = address & -host_page_size;
682 last = start + host_page_size - 1;
683 }
684
685 p = pageflags_find(start, last);
686 if (!p) {
687 return;
688 }
689 prot = p->flags;
690
691 if (unlikely(p->itree.last < last)) {
692 /* More than one protection region covers the one host page. */
693 assert(TARGET_PAGE_SIZE < host_page_size);
694 while ((p = pageflags_next(p, start, last)) != NULL) {
695 prot |= p->flags;
696 }
697 }
698
699 if (prot & PAGE_WRITE) {
700 pageflags_set_clear(start, last, 0, PAGE_WRITE);
701 mprotect(g2h_untagged(start), last - start + 1,
702 prot & (PAGE_READ | PAGE_EXEC) ? PROT_READ : PROT_NONE);
703 }
704 }
705
706 /*
707 * Called from signal handler: invalidate the code and unprotect the
708 * page. Return 0 if the fault was not handled, 1 if it was handled,
709 * and 2 if it was handled but the caller must cause the TB to be
710 * immediately exited. (We can only return 2 if the 'pc' argument is
711 * non-zero.)
712 */
page_unprotect(CPUState * cpu,tb_page_addr_t address,uintptr_t pc)713 int page_unprotect(CPUState *cpu, tb_page_addr_t address, uintptr_t pc)
714 {
715 PageFlagsNode *p;
716 bool current_tb_invalidated;
717
718 assert((cpu == NULL) == (pc == 0));
719
720 /*
721 * Technically this isn't safe inside a signal handler. However we
722 * know this only ever happens in a synchronous SEGV handler, so in
723 * practice it seems to be ok.
724 */
725 mmap_lock();
726
727 p = pageflags_find(address, address);
728
729 /* If this address was not really writable, nothing to do. */
730 if (!p || !(p->flags & PAGE_WRITE_ORG)) {
731 mmap_unlock();
732 return 0;
733 }
734
735 current_tb_invalidated = false;
736 if (p->flags & PAGE_WRITE) {
737 /*
738 * If the page is actually marked WRITE then assume this is because
739 * this thread raced with another one which got here first and
740 * set the page to PAGE_WRITE and did the TB invalidate for us.
741 */
742 if (pc && cpu->cc->tcg_ops->precise_smc) {
743 TranslationBlock *current_tb = tcg_tb_lookup(pc);
744 if (current_tb) {
745 current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID;
746 }
747 }
748 } else {
749 int host_page_size = qemu_real_host_page_size();
750 vaddr start, len, i;
751 int prot;
752
753 if (host_page_size <= TARGET_PAGE_SIZE) {
754 start = address & TARGET_PAGE_MASK;
755 len = TARGET_PAGE_SIZE;
756 prot = p->flags | PAGE_WRITE;
757 pageflags_set_clear(start, start + len - 1, PAGE_WRITE, 0);
758 current_tb_invalidated =
759 tb_invalidate_phys_page_unwind(cpu, start, pc);
760 } else {
761 start = address & -host_page_size;
762 len = host_page_size;
763 prot = 0;
764
765 for (i = 0; i < len; i += TARGET_PAGE_SIZE) {
766 vaddr addr = start + i;
767
768 p = pageflags_find(addr, addr);
769 if (p) {
770 prot |= p->flags;
771 if (p->flags & PAGE_WRITE_ORG) {
772 prot |= PAGE_WRITE;
773 pageflags_set_clear(addr, addr + TARGET_PAGE_SIZE - 1,
774 PAGE_WRITE, 0);
775 }
776 }
777 /*
778 * Since the content will be modified, we must invalidate
779 * the corresponding translated code.
780 */
781 current_tb_invalidated |=
782 tb_invalidate_phys_page_unwind(cpu, addr, pc);
783 }
784 }
785 if (prot & PAGE_EXEC) {
786 prot = (prot & ~PAGE_EXEC) | PAGE_READ;
787 }
788 mprotect((void *)g2h_untagged(start), len, prot & PAGE_RWX);
789 }
790 mmap_unlock();
791
792 /* If current TB was invalidated return to main loop */
793 return current_tb_invalidated ? 2 : 1;
794 }
795
probe_access_internal(CPUArchState * env,vaddr addr,int fault_size,MMUAccessType access_type,bool nonfault,uintptr_t ra)796 static int probe_access_internal(CPUArchState *env, vaddr addr,
797 int fault_size, MMUAccessType access_type,
798 bool nonfault, uintptr_t ra)
799 {
800 int acc_flag;
801 bool maperr;
802
803 switch (access_type) {
804 case MMU_DATA_STORE:
805 acc_flag = PAGE_WRITE_ORG;
806 break;
807 case MMU_DATA_LOAD:
808 acc_flag = PAGE_READ;
809 break;
810 case MMU_INST_FETCH:
811 acc_flag = PAGE_EXEC;
812 break;
813 default:
814 g_assert_not_reached();
815 }
816
817 if (guest_addr_valid_untagged(addr)) {
818 int page_flags = page_get_flags(addr);
819 if (page_flags & acc_flag) {
820 if (access_type != MMU_INST_FETCH
821 && cpu_plugin_mem_cbs_enabled(env_cpu(env))) {
822 return TLB_MMIO;
823 }
824 return 0; /* success */
825 }
826 maperr = !(page_flags & PAGE_VALID);
827 } else {
828 maperr = true;
829 }
830
831 if (nonfault) {
832 return TLB_INVALID_MASK;
833 }
834
835 cpu_loop_exit_sigsegv(env_cpu(env), addr, access_type, maperr, ra);
836 }
837
probe_access_flags(CPUArchState * env,vaddr addr,int size,MMUAccessType access_type,int mmu_idx,bool nonfault,void ** phost,uintptr_t ra)838 int probe_access_flags(CPUArchState *env, vaddr addr, int size,
839 MMUAccessType access_type, int mmu_idx,
840 bool nonfault, void **phost, uintptr_t ra)
841 {
842 int flags;
843
844 g_assert(-(addr | TARGET_PAGE_MASK) >= size);
845 flags = probe_access_internal(env, addr, size, access_type, nonfault, ra);
846 *phost = (flags & TLB_INVALID_MASK) ? NULL : g2h(env_cpu(env), addr);
847 return flags;
848 }
849
probe_access(CPUArchState * env,vaddr addr,int size,MMUAccessType access_type,int mmu_idx,uintptr_t ra)850 void *probe_access(CPUArchState *env, vaddr addr, int size,
851 MMUAccessType access_type, int mmu_idx, uintptr_t ra)
852 {
853 int flags;
854
855 g_assert(-(addr | TARGET_PAGE_MASK) >= size);
856 flags = probe_access_internal(env, addr, size, access_type, false, ra);
857 g_assert((flags & ~TLB_MMIO) == 0);
858
859 return size ? g2h(env_cpu(env), addr) : NULL;
860 }
861
tlb_vaddr_to_host(CPUArchState * env,vaddr addr,MMUAccessType access_type,int mmu_idx)862 void *tlb_vaddr_to_host(CPUArchState *env, vaddr addr,
863 MMUAccessType access_type, int mmu_idx)
864 {
865 return g2h(env_cpu(env), addr);
866 }
867
get_page_addr_code_hostp(CPUArchState * env,vaddr addr,void ** hostp)868 tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, vaddr addr,
869 void **hostp)
870 {
871 int flags;
872
873 flags = probe_access_internal(env, addr, 1, MMU_INST_FETCH, false, 0);
874 g_assert(flags == 0);
875
876 if (hostp) {
877 *hostp = g2h_untagged(addr);
878 }
879 return addr;
880 }
881
882 /*
883 * Allocate chunks of target data together. For the only current user,
884 * if we allocate one hunk per page, we have overhead of 40/128 or 40%.
885 * Therefore, allocate memory for 64 pages at a time for overhead < 1%.
886 */
887 #define TPD_PAGES 64
888 #define TBD_MASK (TARGET_PAGE_MASK * TPD_PAGES)
889
890 typedef struct TargetPageDataNode {
891 struct rcu_head rcu;
892 IntervalTreeNode itree;
893 char data[] __attribute__((aligned));
894 } TargetPageDataNode;
895
896 static IntervalTreeRoot targetdata_root;
897 static size_t target_page_data_size;
898
page_reset_target_data(vaddr start,vaddr last)899 void page_reset_target_data(vaddr start, vaddr last)
900 {
901 IntervalTreeNode *n, *next;
902 size_t size = target_page_data_size;
903
904 if (likely(size == 0)) {
905 return;
906 }
907
908 assert_memory_lock();
909
910 start &= TARGET_PAGE_MASK;
911 last |= ~TARGET_PAGE_MASK;
912
913 for (n = interval_tree_iter_first(&targetdata_root, start, last),
914 next = n ? interval_tree_iter_next(n, start, last) : NULL;
915 n != NULL;
916 n = next,
917 next = next ? interval_tree_iter_next(n, start, last) : NULL) {
918 vaddr n_start, n_last, p_ofs, p_len;
919 TargetPageDataNode *t = container_of(n, TargetPageDataNode, itree);
920
921 if (n->start >= start && n->last <= last) {
922 interval_tree_remove(n, &targetdata_root);
923 g_free_rcu(t, rcu);
924 continue;
925 }
926
927 if (n->start < start) {
928 n_start = start;
929 p_ofs = (start - n->start) >> TARGET_PAGE_BITS;
930 } else {
931 n_start = n->start;
932 p_ofs = 0;
933 }
934 n_last = MIN(last, n->last);
935 p_len = (n_last + 1 - n_start) >> TARGET_PAGE_BITS;
936
937 memset(t->data + p_ofs * size, 0, p_len * size);
938 }
939 }
940
page_get_target_data(vaddr address,size_t size)941 void *page_get_target_data(vaddr address, size_t size)
942 {
943 IntervalTreeNode *n;
944 TargetPageDataNode *t;
945 vaddr page, region, p_ofs;
946
947 /* Remember the size from the first call, and it should be constant. */
948 if (unlikely(target_page_data_size != size)) {
949 assert(target_page_data_size == 0);
950 target_page_data_size = size;
951 }
952
953 page = address & TARGET_PAGE_MASK;
954 region = address & TBD_MASK;
955
956 n = interval_tree_iter_first(&targetdata_root, page, page);
957 if (!n) {
958 /*
959 * See util/interval-tree.c re lockless lookups: no false positives
960 * but there are false negatives. If we find nothing, retry with
961 * the mmap lock acquired. We also need the lock for the
962 * allocation + insert.
963 */
964 mmap_lock();
965 n = interval_tree_iter_first(&targetdata_root, page, page);
966 if (!n) {
967 t = g_malloc0(sizeof(TargetPageDataNode) + TPD_PAGES * size);
968 n = &t->itree;
969 n->start = region;
970 n->last = region | ~TBD_MASK;
971 interval_tree_insert(n, &targetdata_root);
972 }
973 mmap_unlock();
974 }
975
976 t = container_of(n, TargetPageDataNode, itree);
977 p_ofs = (page - region) >> TARGET_PAGE_BITS;
978 return t->data + p_ofs * size;
979 }
980
981 /* The system-mode versions of these helpers are in cputlb.c. */
982
cpu_mmu_lookup(CPUState * cpu,vaddr addr,MemOp mop,uintptr_t ra,MMUAccessType type)983 static void *cpu_mmu_lookup(CPUState *cpu, vaddr addr,
984 MemOp mop, uintptr_t ra, MMUAccessType type)
985 {
986 int a_bits = memop_alignment_bits(mop);
987 void *ret;
988
989 /* Enforce guest required alignment. */
990 if (unlikely(addr & ((1 << a_bits) - 1))) {
991 cpu_loop_exit_sigbus(cpu, addr, type, ra);
992 }
993
994 ret = g2h(cpu, addr);
995 set_helper_retaddr(ra);
996 return ret;
997 }
998
999 /* physical memory access (slow version, mainly for debug) */
cpu_memory_rw_debug(CPUState * cpu,vaddr addr,void * ptr,size_t len,bool is_write)1000 int cpu_memory_rw_debug(CPUState *cpu, vaddr addr,
1001 void *ptr, size_t len, bool is_write)
1002 {
1003 int flags;
1004 vaddr l, page;
1005 uint8_t *buf = ptr;
1006 ssize_t written;
1007 int ret = -1;
1008 int fd = -1;
1009
1010 mmap_lock();
1011
1012 while (len > 0) {
1013 page = addr & TARGET_PAGE_MASK;
1014 l = (page + TARGET_PAGE_SIZE) - addr;
1015 if (l > len) {
1016 l = len;
1017 }
1018 flags = page_get_flags(page);
1019 if (!(flags & PAGE_VALID)) {
1020 goto out_close;
1021 }
1022 if (is_write) {
1023 if (flags & PAGE_WRITE) {
1024 memcpy(g2h(cpu, addr), buf, l);
1025 } else {
1026 /* Bypass the host page protection using ptrace. */
1027 if (fd == -1) {
1028 fd = open("/proc/self/mem", O_WRONLY);
1029 if (fd == -1) {
1030 goto out;
1031 }
1032 }
1033 /*
1034 * If there is a TranslationBlock and we weren't bypassing the
1035 * host page protection, the memcpy() above would SEGV,
1036 * ultimately leading to page_unprotect(). So invalidate the
1037 * translations manually. Both invalidation and pwrite() must
1038 * be under mmap_lock() in order to prevent the creation of
1039 * another TranslationBlock in between.
1040 */
1041 tb_invalidate_phys_range(NULL, addr, addr + l - 1);
1042 written = pwrite(fd, buf, l,
1043 (off_t)(uintptr_t)g2h_untagged(addr));
1044 if (written != l) {
1045 goto out_close;
1046 }
1047 }
1048 } else if (flags & PAGE_READ) {
1049 memcpy(buf, g2h(cpu, addr), l);
1050 } else {
1051 /* Bypass the host page protection using ptrace. */
1052 if (fd == -1) {
1053 fd = open("/proc/self/mem", O_RDONLY);
1054 if (fd == -1) {
1055 goto out;
1056 }
1057 }
1058 if (pread(fd, buf, l,
1059 (off_t)(uintptr_t)g2h_untagged(addr)) != l) {
1060 goto out_close;
1061 }
1062 }
1063 len -= l;
1064 buf += l;
1065 addr += l;
1066 }
1067 ret = 0;
1068 out_close:
1069 if (fd != -1) {
1070 close(fd);
1071 }
1072 out:
1073 mmap_unlock();
1074
1075 return ret;
1076 }
1077
1078 #include "ldst_atomicity.c.inc"
1079
do_ld1_mmu(CPUState * cpu,vaddr addr,MemOpIdx oi,uintptr_t ra,MMUAccessType access_type)1080 static uint8_t do_ld1_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
1081 uintptr_t ra, MMUAccessType access_type)
1082 {
1083 void *haddr;
1084 uint8_t ret;
1085
1086 cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD);
1087 haddr = cpu_mmu_lookup(cpu, addr, get_memop(oi), ra, access_type);
1088 ret = ldub_p(haddr);
1089 clear_helper_retaddr();
1090 return ret;
1091 }
1092
do_ld2_mmu(CPUState * cpu,vaddr addr,MemOpIdx oi,uintptr_t ra,MMUAccessType access_type)1093 static uint16_t do_ld2_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
1094 uintptr_t ra, MMUAccessType access_type)
1095 {
1096 void *haddr;
1097 uint16_t ret;
1098 MemOp mop = get_memop(oi);
1099
1100 cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD);
1101 haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type);
1102 ret = load_atom_2(cpu, ra, haddr, mop);
1103 clear_helper_retaddr();
1104
1105 if (mop & MO_BSWAP) {
1106 ret = bswap16(ret);
1107 }
1108 return ret;
1109 }
1110
do_ld4_mmu(CPUState * cpu,vaddr addr,MemOpIdx oi,uintptr_t ra,MMUAccessType access_type)1111 static uint32_t do_ld4_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
1112 uintptr_t ra, MMUAccessType access_type)
1113 {
1114 void *haddr;
1115 uint32_t ret;
1116 MemOp mop = get_memop(oi);
1117
1118 cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD);
1119 haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type);
1120 ret = load_atom_4(cpu, ra, haddr, mop);
1121 clear_helper_retaddr();
1122
1123 if (mop & MO_BSWAP) {
1124 ret = bswap32(ret);
1125 }
1126 return ret;
1127 }
1128
do_ld8_mmu(CPUState * cpu,vaddr addr,MemOpIdx oi,uintptr_t ra,MMUAccessType access_type)1129 static uint64_t do_ld8_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
1130 uintptr_t ra, MMUAccessType access_type)
1131 {
1132 void *haddr;
1133 uint64_t ret;
1134 MemOp mop = get_memop(oi);
1135
1136 cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD);
1137 haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type);
1138 ret = load_atom_8(cpu, ra, haddr, mop);
1139 clear_helper_retaddr();
1140
1141 if (mop & MO_BSWAP) {
1142 ret = bswap64(ret);
1143 }
1144 return ret;
1145 }
1146
do_ld16_mmu(CPUState * cpu,vaddr addr,MemOpIdx oi,uintptr_t ra)1147 static Int128 do_ld16_mmu(CPUState *cpu, vaddr addr,
1148 MemOpIdx oi, uintptr_t ra)
1149 {
1150 void *haddr;
1151 Int128 ret;
1152 MemOp mop = get_memop(oi);
1153
1154 tcg_debug_assert((mop & MO_SIZE) == MO_128);
1155 cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD);
1156 haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_LOAD);
1157 ret = load_atom_16(cpu, ra, haddr, mop);
1158 clear_helper_retaddr();
1159
1160 if (mop & MO_BSWAP) {
1161 ret = bswap128(ret);
1162 }
1163 return ret;
1164 }
1165
do_st1_mmu(CPUState * cpu,vaddr addr,uint8_t val,MemOpIdx oi,uintptr_t ra)1166 static void do_st1_mmu(CPUState *cpu, vaddr addr, uint8_t val,
1167 MemOpIdx oi, uintptr_t ra)
1168 {
1169 void *haddr;
1170
1171 cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST);
1172 haddr = cpu_mmu_lookup(cpu, addr, get_memop(oi), ra, MMU_DATA_STORE);
1173 stb_p(haddr, val);
1174 clear_helper_retaddr();
1175 }
1176
do_st2_mmu(CPUState * cpu,vaddr addr,uint16_t val,MemOpIdx oi,uintptr_t ra)1177 static void do_st2_mmu(CPUState *cpu, vaddr addr, uint16_t val,
1178 MemOpIdx oi, uintptr_t ra)
1179 {
1180 void *haddr;
1181 MemOp mop = get_memop(oi);
1182
1183 cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST);
1184 haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1185
1186 if (mop & MO_BSWAP) {
1187 val = bswap16(val);
1188 }
1189 store_atom_2(cpu, ra, haddr, mop, val);
1190 clear_helper_retaddr();
1191 }
1192
do_st4_mmu(CPUState * cpu,vaddr addr,uint32_t val,MemOpIdx oi,uintptr_t ra)1193 static void do_st4_mmu(CPUState *cpu, vaddr addr, uint32_t val,
1194 MemOpIdx oi, uintptr_t ra)
1195 {
1196 void *haddr;
1197 MemOp mop = get_memop(oi);
1198
1199 cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST);
1200 haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1201
1202 if (mop & MO_BSWAP) {
1203 val = bswap32(val);
1204 }
1205 store_atom_4(cpu, ra, haddr, mop, val);
1206 clear_helper_retaddr();
1207 }
1208
do_st8_mmu(CPUState * cpu,vaddr addr,uint64_t val,MemOpIdx oi,uintptr_t ra)1209 static void do_st8_mmu(CPUState *cpu, vaddr addr, uint64_t val,
1210 MemOpIdx oi, uintptr_t ra)
1211 {
1212 void *haddr;
1213 MemOp mop = get_memop(oi);
1214
1215 cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST);
1216 haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1217
1218 if (mop & MO_BSWAP) {
1219 val = bswap64(val);
1220 }
1221 store_atom_8(cpu, ra, haddr, mop, val);
1222 clear_helper_retaddr();
1223 }
1224
do_st16_mmu(CPUState * cpu,vaddr addr,Int128 val,MemOpIdx oi,uintptr_t ra)1225 static void do_st16_mmu(CPUState *cpu, vaddr addr, Int128 val,
1226 MemOpIdx oi, uintptr_t ra)
1227 {
1228 void *haddr;
1229 MemOpIdx mop = get_memop(oi);
1230
1231 cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST);
1232 haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1233
1234 if (mop & MO_BSWAP) {
1235 val = bswap128(val);
1236 }
1237 store_atom_16(cpu, ra, haddr, mop, val);
1238 clear_helper_retaddr();
1239 }
1240
cpu_ldb_code_mmu(CPUArchState * env,vaddr addr,MemOpIdx oi,uintptr_t ra)1241 uint8_t cpu_ldb_code_mmu(CPUArchState *env, vaddr addr,
1242 MemOpIdx oi, uintptr_t ra)
1243 {
1244 return do_ld1_mmu(env_cpu(env), addr, oi, ra ? ra : 1, MMU_INST_FETCH);
1245 }
1246
cpu_ldw_code_mmu(CPUArchState * env,vaddr addr,MemOpIdx oi,uintptr_t ra)1247 uint16_t cpu_ldw_code_mmu(CPUArchState *env, vaddr addr,
1248 MemOpIdx oi, uintptr_t ra)
1249 {
1250 return do_ld2_mmu(env_cpu(env), addr, oi, ra ? ra : 1, MMU_INST_FETCH);
1251 }
1252
cpu_ldl_code_mmu(CPUArchState * env,vaddr addr,MemOpIdx oi,uintptr_t ra)1253 uint32_t cpu_ldl_code_mmu(CPUArchState *env, vaddr addr,
1254 MemOpIdx oi, uintptr_t ra)
1255 {
1256 return do_ld4_mmu(env_cpu(env), addr, oi, ra ? ra : 1, MMU_INST_FETCH);
1257 }
1258
cpu_ldq_code_mmu(CPUArchState * env,vaddr addr,MemOpIdx oi,uintptr_t ra)1259 uint64_t cpu_ldq_code_mmu(CPUArchState *env, vaddr addr,
1260 MemOpIdx oi, uintptr_t ra)
1261 {
1262 return do_ld8_mmu(env_cpu(env), addr, oi, ra ? ra : 1, MMU_INST_FETCH);
1263 }
1264
1265 #include "ldst_common.c.inc"
1266
1267 /*
1268 * Do not allow unaligned operations to proceed. Return the host address.
1269 */
atomic_mmu_lookup(CPUState * cpu,vaddr addr,MemOpIdx oi,int size,uintptr_t retaddr)1270 static void *atomic_mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi,
1271 int size, uintptr_t retaddr)
1272 {
1273 MemOp mop = get_memop(oi);
1274 int a_bits = memop_alignment_bits(mop);
1275 void *ret;
1276
1277 /* Enforce guest required alignment. */
1278 if (unlikely(addr & ((1 << a_bits) - 1))) {
1279 cpu_loop_exit_sigbus(cpu, addr, MMU_DATA_STORE, retaddr);
1280 }
1281
1282 /* Enforce qemu required alignment. */
1283 if (unlikely(addr & (size - 1))) {
1284 cpu_loop_exit_atomic(cpu, retaddr);
1285 }
1286
1287 ret = g2h(cpu, addr);
1288 set_helper_retaddr(retaddr);
1289 return ret;
1290 }
1291
1292 #include "atomic_common.c.inc"
1293
1294 /*
1295 * First set of functions passes in OI and RETADDR.
1296 * This makes them callable from other helpers.
1297 */
1298
1299 #define ATOMIC_NAME(X) \
1300 glue(glue(glue(cpu_atomic_ ## X, SUFFIX), END), _mmu)
1301 #define ATOMIC_MMU_CLEANUP do { clear_helper_retaddr(); } while (0)
1302
1303 #define DATA_SIZE 1
1304 #include "atomic_template.h"
1305
1306 #define DATA_SIZE 2
1307 #include "atomic_template.h"
1308
1309 #define DATA_SIZE 4
1310 #include "atomic_template.h"
1311
1312 #ifdef CONFIG_ATOMIC64
1313 #define DATA_SIZE 8
1314 #include "atomic_template.h"
1315 #endif
1316
1317 #if defined(CONFIG_ATOMIC128) || HAVE_CMPXCHG128
1318 #define DATA_SIZE 16
1319 #include "atomic_template.h"
1320 #endif
1321