xref: /qemu/accel/tcg/user-exec.c (revision 84307cd6027c4602913177ff09aeefa4743b7234)
1 /*
2  *  User emulator execution
3  *
4  *  Copyright (c) 2003-2005 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 #include "accel/tcg/cpu-ops.h"
21 #include "disas/disas.h"
22 #include "cpu.h"
23 #include "exec/vaddr.h"
24 #include "exec/tlb-flags.h"
25 #include "tcg/tcg.h"
26 #include "qemu/bitops.h"
27 #include "qemu/rcu.h"
28 #include "accel/tcg/cpu-ldst.h"
29 #include "accel/tcg/probe.h"
30 #include "user/cpu_loop.h"
31 #include "qemu/main-loop.h"
32 #include "user/page-protection.h"
33 #include "exec/page-protection.h"
34 #include "exec/helper-proto.h"
35 #include "qemu/atomic128.h"
36 #include "qemu/bswap.h"
37 #include "qemu/int128.h"
38 #include "trace.h"
39 #include "tcg/tcg-ldst.h"
40 #include "backend-ldst.h"
41 #include "internal-common.h"
42 #include "tb-internal.h"
43 
44 __thread uintptr_t helper_retaddr;
45 
46 //#define DEBUG_SIGNAL
47 
48 void cpu_interrupt(CPUState *cpu, int mask)
49 {
50     g_assert(bql_locked());
51     cpu->interrupt_request |= mask;
52     qatomic_set(&cpu->neg.icount_decr.u16.high, -1);
53 }
54 
55 /*
56  * Adjust the pc to pass to cpu_restore_state; return the memop type.
57  */
58 MMUAccessType adjust_signal_pc(uintptr_t *pc, bool is_write)
59 {
60     switch (helper_retaddr) {
61     default:
62         /*
63          * Fault during host memory operation within a helper function.
64          * The helper's host return address, saved here, gives us a
65          * pointer into the generated code that will unwind to the
66          * correct guest pc.
67          */
68         *pc = helper_retaddr;
69         break;
70 
71     case 0:
72         /*
73          * Fault during host memory operation within generated code.
74          * (Or, a unrelated bug within qemu, but we can't tell from here).
75          *
76          * We take the host pc from the signal frame.  However, we cannot
77          * use that value directly.  Within cpu_restore_state_from_tb, we
78          * assume PC comes from GETPC(), as used by the helper functions,
79          * so we adjust the address by -GETPC_ADJ to form an address that
80          * is within the call insn, so that the address does not accidentally
81          * match the beginning of the next guest insn.  However, when the
82          * pc comes from the signal frame it points to the actual faulting
83          * host memory insn and not the return from a call insn.
84          *
85          * Therefore, adjust to compensate for what will be done later
86          * by cpu_restore_state_from_tb.
87          */
88         *pc += GETPC_ADJ;
89         break;
90 
91     case 1:
92         /*
93          * Fault during host read for translation, or loosely, "execution".
94          *
95          * The guest pc is already pointing to the start of the TB for which
96          * code is being generated.  If the guest translator manages the
97          * page crossings correctly, this is exactly the correct address
98          * (and if the translator doesn't handle page boundaries correctly
99          * there's little we can do about that here).  Therefore, do not
100          * trigger the unwinder.
101          */
102         *pc = 0;
103         return MMU_INST_FETCH;
104     }
105 
106     return is_write ? MMU_DATA_STORE : MMU_DATA_LOAD;
107 }
108 
109 /**
110  * handle_sigsegv_accerr_write:
111  * @cpu: the cpu context
112  * @old_set: the sigset_t from the signal ucontext_t
113  * @host_pc: the host pc, adjusted for the signal
114  * @guest_addr: the guest address of the fault
115  *
116  * Return true if the write fault has been handled, and should be re-tried.
117  *
118  * Note that it is important that we don't call page_unprotect() unless
119  * this is really a "write to nonwritable page" fault, because
120  * page_unprotect() assumes that if it is called for an access to
121  * a page that's writable this means we had two threads racing and
122  * another thread got there first and already made the page writable;
123  * so we will retry the access. If we were to call page_unprotect()
124  * for some other kind of fault that should really be passed to the
125  * guest, we'd end up in an infinite loop of retrying the faulting access.
126  */
127 bool handle_sigsegv_accerr_write(CPUState *cpu, sigset_t *old_set,
128                                  uintptr_t host_pc, abi_ptr guest_addr)
129 {
130     switch (page_unprotect(cpu, guest_addr, host_pc)) {
131     case 0:
132         /*
133          * Fault not caused by a page marked unwritable to protect
134          * cached translations, must be the guest binary's problem.
135          */
136         return false;
137     case 1:
138         /*
139          * Fault caused by protection of cached translation; TBs
140          * invalidated, so resume execution.
141          */
142         return true;
143     case 2:
144         /*
145          * Fault caused by protection of cached translation, and the
146          * currently executing TB was modified and must be exited immediately.
147          */
148         sigprocmask(SIG_SETMASK, old_set, NULL);
149         cpu_loop_exit_noexc(cpu);
150         /* NORETURN */
151     default:
152         g_assert_not_reached();
153     }
154 }
155 
156 typedef struct PageFlagsNode {
157     struct rcu_head rcu;
158     IntervalTreeNode itree;
159     int flags;
160 } PageFlagsNode;
161 
162 static IntervalTreeRoot pageflags_root;
163 
164 static PageFlagsNode *pageflags_find(vaddr start, vaddr last)
165 {
166     IntervalTreeNode *n;
167 
168     n = interval_tree_iter_first(&pageflags_root, start, last);
169     return n ? container_of(n, PageFlagsNode, itree) : NULL;
170 }
171 
172 static PageFlagsNode *pageflags_next(PageFlagsNode *p, vaddr start, vaddr last)
173 {
174     IntervalTreeNode *n;
175 
176     n = interval_tree_iter_next(&p->itree, start, last);
177     return n ? container_of(n, PageFlagsNode, itree) : NULL;
178 }
179 
180 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
181 {
182     IntervalTreeNode *n;
183     int rc = 0;
184 
185     mmap_lock();
186     for (n = interval_tree_iter_first(&pageflags_root, 0, -1);
187          n != NULL;
188          n = interval_tree_iter_next(n, 0, -1)) {
189         PageFlagsNode *p = container_of(n, PageFlagsNode, itree);
190 
191         rc = fn(priv, n->start, n->last + 1, p->flags);
192         if (rc != 0) {
193             break;
194         }
195     }
196     mmap_unlock();
197 
198     return rc;
199 }
200 
201 static int dump_region(void *opaque, vaddr start, vaddr end, int prot)
202 {
203     FILE *f = opaque;
204 
205     fprintf(f, TARGET_ABI_FMT_ptr "-" TARGET_ABI_FMT_ptr
206             " " TARGET_ABI_FMT_ptr " %c%c%c\n",
207             (abi_ptr)start, (abi_ptr)end, (abi_ptr)(end - start),
208             ((prot & PAGE_READ) ? 'r' : '-'),
209             ((prot & PAGE_WRITE) ? 'w' : '-'),
210             ((prot & PAGE_EXEC) ? 'x' : '-'));
211     return 0;
212 }
213 
214 /* dump memory mappings */
215 void page_dump(FILE *f)
216 {
217     const int length = sizeof(abi_ptr) * 2;
218 
219     fprintf(f, "%-*s %-*s %-*s %s\n",
220             length, "start", length, "end", length, "size", "prot");
221     walk_memory_regions(f, dump_region);
222 }
223 
224 int page_get_flags(vaddr address)
225 {
226     PageFlagsNode *p = pageflags_find(address, address);
227 
228     /*
229      * See util/interval-tree.c re lockless lookups: no false positives but
230      * there are false negatives.  If we find nothing, retry with the mmap
231      * lock acquired.
232      */
233     if (p) {
234         return p->flags;
235     }
236     if (have_mmap_lock()) {
237         return 0;
238     }
239 
240     mmap_lock();
241     p = pageflags_find(address, address);
242     mmap_unlock();
243     return p ? p->flags : 0;
244 }
245 
246 /* A subroutine of page_set_flags: insert a new node for [start,last]. */
247 static void pageflags_create(vaddr start, vaddr last, int flags)
248 {
249     PageFlagsNode *p = g_new(PageFlagsNode, 1);
250 
251     p->itree.start = start;
252     p->itree.last = last;
253     p->flags = flags;
254     interval_tree_insert(&p->itree, &pageflags_root);
255 }
256 
257 /* A subroutine of page_set_flags: remove everything in [start,last]. */
258 static bool pageflags_unset(vaddr start, vaddr last)
259 {
260     bool inval_tb = false;
261 
262     while (true) {
263         PageFlagsNode *p = pageflags_find(start, last);
264         vaddr p_last;
265 
266         if (!p) {
267             break;
268         }
269 
270         if (p->flags & PAGE_EXEC) {
271             inval_tb = true;
272         }
273 
274         interval_tree_remove(&p->itree, &pageflags_root);
275         p_last = p->itree.last;
276 
277         if (p->itree.start < start) {
278             /* Truncate the node from the end, or split out the middle. */
279             p->itree.last = start - 1;
280             interval_tree_insert(&p->itree, &pageflags_root);
281             if (last < p_last) {
282                 pageflags_create(last + 1, p_last, p->flags);
283                 break;
284             }
285         } else if (p_last <= last) {
286             /* Range completely covers node -- remove it. */
287             g_free_rcu(p, rcu);
288         } else {
289             /* Truncate the node from the start. */
290             p->itree.start = last + 1;
291             interval_tree_insert(&p->itree, &pageflags_root);
292             break;
293         }
294     }
295 
296     return inval_tb;
297 }
298 
299 /*
300  * A subroutine of page_set_flags: nothing overlaps [start,last],
301  * but check adjacent mappings and maybe merge into a single range.
302  */
303 static void pageflags_create_merge(vaddr start, vaddr last, int flags)
304 {
305     PageFlagsNode *next = NULL, *prev = NULL;
306 
307     if (start > 0) {
308         prev = pageflags_find(start - 1, start - 1);
309         if (prev) {
310             if (prev->flags == flags) {
311                 interval_tree_remove(&prev->itree, &pageflags_root);
312             } else {
313                 prev = NULL;
314             }
315         }
316     }
317     if (last + 1 != 0) {
318         next = pageflags_find(last + 1, last + 1);
319         if (next) {
320             if (next->flags == flags) {
321                 interval_tree_remove(&next->itree, &pageflags_root);
322             } else {
323                 next = NULL;
324             }
325         }
326     }
327 
328     if (prev) {
329         if (next) {
330             prev->itree.last = next->itree.last;
331             g_free_rcu(next, rcu);
332         } else {
333             prev->itree.last = last;
334         }
335         interval_tree_insert(&prev->itree, &pageflags_root);
336     } else if (next) {
337         next->itree.start = start;
338         interval_tree_insert(&next->itree, &pageflags_root);
339     } else {
340         pageflags_create(start, last, flags);
341     }
342 }
343 
344 /*
345  * Allow the target to decide if PAGE_TARGET_[12] may be reset.
346  * By default, they are not kept.
347  */
348 #ifndef PAGE_TARGET_STICKY
349 #define PAGE_TARGET_STICKY  0
350 #endif
351 #define PAGE_STICKY  (PAGE_ANON | PAGE_PASSTHROUGH | PAGE_TARGET_STICKY)
352 
353 /* A subroutine of page_set_flags: add flags to [start,last]. */
354 static bool pageflags_set_clear(vaddr start, vaddr last,
355                                 int set_flags, int clear_flags)
356 {
357     PageFlagsNode *p;
358     vaddr p_start, p_last;
359     int p_flags, merge_flags;
360     bool inval_tb = false;
361 
362  restart:
363     p = pageflags_find(start, last);
364     if (!p) {
365         if (set_flags) {
366             pageflags_create_merge(start, last, set_flags);
367         }
368         goto done;
369     }
370 
371     p_start = p->itree.start;
372     p_last = p->itree.last;
373     p_flags = p->flags;
374     /* Using mprotect on a page does not change sticky bits. */
375     merge_flags = (p_flags & ~clear_flags) | set_flags;
376 
377     /*
378      * Need to flush if an overlapping executable region
379      * removes exec, or adds write.
380      */
381     if ((p_flags & PAGE_EXEC)
382         && (!(merge_flags & PAGE_EXEC)
383             || (merge_flags & ~p_flags & PAGE_WRITE))) {
384         inval_tb = true;
385     }
386 
387     /*
388      * If there is an exact range match, update and return without
389      * attempting to merge with adjacent regions.
390      */
391     if (start == p_start && last == p_last) {
392         if (merge_flags) {
393             p->flags = merge_flags;
394         } else {
395             interval_tree_remove(&p->itree, &pageflags_root);
396             g_free_rcu(p, rcu);
397         }
398         goto done;
399     }
400 
401     /*
402      * If sticky bits affect the original mapping, then we must be more
403      * careful about the existing intervals and the separate flags.
404      */
405     if (set_flags != merge_flags) {
406         if (p_start < start) {
407             interval_tree_remove(&p->itree, &pageflags_root);
408             p->itree.last = start - 1;
409             interval_tree_insert(&p->itree, &pageflags_root);
410 
411             if (last < p_last) {
412                 if (merge_flags) {
413                     pageflags_create(start, last, merge_flags);
414                 }
415                 pageflags_create(last + 1, p_last, p_flags);
416             } else {
417                 if (merge_flags) {
418                     pageflags_create(start, p_last, merge_flags);
419                 }
420                 if (p_last < last) {
421                     start = p_last + 1;
422                     goto restart;
423                 }
424             }
425         } else {
426             if (start < p_start && set_flags) {
427                 pageflags_create(start, p_start - 1, set_flags);
428             }
429             if (last < p_last) {
430                 interval_tree_remove(&p->itree, &pageflags_root);
431                 p->itree.start = last + 1;
432                 interval_tree_insert(&p->itree, &pageflags_root);
433                 if (merge_flags) {
434                     pageflags_create(start, last, merge_flags);
435                 }
436             } else {
437                 if (merge_flags) {
438                     p->flags = merge_flags;
439                 } else {
440                     interval_tree_remove(&p->itree, &pageflags_root);
441                     g_free_rcu(p, rcu);
442                 }
443                 if (p_last < last) {
444                     start = p_last + 1;
445                     goto restart;
446                 }
447             }
448         }
449         goto done;
450     }
451 
452     /* If flags are not changing for this range, incorporate it. */
453     if (set_flags == p_flags) {
454         if (start < p_start) {
455             interval_tree_remove(&p->itree, &pageflags_root);
456             p->itree.start = start;
457             interval_tree_insert(&p->itree, &pageflags_root);
458         }
459         if (p_last < last) {
460             start = p_last + 1;
461             goto restart;
462         }
463         goto done;
464     }
465 
466     /* Maybe split out head and/or tail ranges with the original flags. */
467     interval_tree_remove(&p->itree, &pageflags_root);
468     if (p_start < start) {
469         p->itree.last = start - 1;
470         interval_tree_insert(&p->itree, &pageflags_root);
471 
472         if (p_last < last) {
473             goto restart;
474         }
475         if (last < p_last) {
476             pageflags_create(last + 1, p_last, p_flags);
477         }
478     } else if (last < p_last) {
479         p->itree.start = last + 1;
480         interval_tree_insert(&p->itree, &pageflags_root);
481     } else {
482         g_free_rcu(p, rcu);
483         goto restart;
484     }
485     if (set_flags) {
486         pageflags_create(start, last, set_flags);
487     }
488 
489  done:
490     return inval_tb;
491 }
492 
493 void page_set_flags(vaddr start, vaddr last, int flags)
494 {
495     bool reset = false;
496     bool inval_tb = false;
497 
498     /* This function should never be called with addresses outside the
499        guest address space.  If this assert fires, it probably indicates
500        a missing call to h2g_valid.  */
501     assert(start <= last);
502     assert(last <= GUEST_ADDR_MAX);
503     /* Only set PAGE_ANON with new mappings. */
504     assert(!(flags & PAGE_ANON) || (flags & PAGE_RESET));
505     assert_memory_lock();
506 
507     start &= TARGET_PAGE_MASK;
508     last |= ~TARGET_PAGE_MASK;
509 
510     if (!(flags & PAGE_VALID)) {
511         flags = 0;
512     } else {
513         reset = flags & PAGE_RESET;
514         flags &= ~PAGE_RESET;
515         if (flags & PAGE_WRITE) {
516             flags |= PAGE_WRITE_ORG;
517         }
518     }
519 
520     if (!flags || reset) {
521         page_reset_target_data(start, last);
522         inval_tb |= pageflags_unset(start, last);
523     }
524     if (flags) {
525         inval_tb |= pageflags_set_clear(start, last, flags,
526                                         ~(reset ? 0 : PAGE_STICKY));
527     }
528     if (inval_tb) {
529         tb_invalidate_phys_range(NULL, start, last);
530     }
531 }
532 
533 bool page_check_range(vaddr start, vaddr len, int flags)
534 {
535     vaddr last;
536     int locked;  /* tri-state: =0: unlocked, +1: global, -1: local */
537     bool ret;
538 
539     if (len == 0) {
540         return true;  /* trivial length */
541     }
542 
543     last = start + len - 1;
544     if (last < start) {
545         return false; /* wrap around */
546     }
547 
548     locked = have_mmap_lock();
549     while (true) {
550         PageFlagsNode *p = pageflags_find(start, last);
551         int missing;
552 
553         if (!p) {
554             if (!locked) {
555                 /*
556                  * Lockless lookups have false negatives.
557                  * Retry with the lock held.
558                  */
559                 mmap_lock();
560                 locked = -1;
561                 p = pageflags_find(start, last);
562             }
563             if (!p) {
564                 ret = false; /* entire region invalid */
565                 break;
566             }
567         }
568         if (start < p->itree.start) {
569             ret = false; /* initial bytes invalid */
570             break;
571         }
572 
573         missing = flags & ~p->flags;
574         if (missing & ~PAGE_WRITE) {
575             ret = false; /* page doesn't match */
576             break;
577         }
578         if (missing & PAGE_WRITE) {
579             if (!(p->flags & PAGE_WRITE_ORG)) {
580                 ret = false; /* page not writable */
581                 break;
582             }
583             /* Asking about writable, but has been protected: undo. */
584             if (!page_unprotect(NULL, start, 0)) {
585                 ret = false;
586                 break;
587             }
588             /* TODO: page_unprotect should take a range, not a single page. */
589             if (last - start < TARGET_PAGE_SIZE) {
590                 ret = true; /* ok */
591                 break;
592             }
593             start += TARGET_PAGE_SIZE;
594             continue;
595         }
596 
597         if (last <= p->itree.last) {
598             ret = true; /* ok */
599             break;
600         }
601         start = p->itree.last + 1;
602     }
603 
604     /* Release the lock if acquired locally. */
605     if (locked < 0) {
606         mmap_unlock();
607     }
608     return ret;
609 }
610 
611 bool page_check_range_empty(vaddr start, vaddr last)
612 {
613     assert(last >= start);
614     assert_memory_lock();
615     return pageflags_find(start, last) == NULL;
616 }
617 
618 vaddr page_find_range_empty(vaddr min, vaddr max, vaddr len, vaddr align)
619 {
620     vaddr len_m1, align_m1;
621 
622     assert(min <= max);
623     assert(max <= GUEST_ADDR_MAX);
624     assert(len != 0);
625     assert(is_power_of_2(align));
626     assert_memory_lock();
627 
628     len_m1 = len - 1;
629     align_m1 = align - 1;
630 
631     /* Iteratively narrow the search region. */
632     while (1) {
633         PageFlagsNode *p;
634 
635         /* Align min and double-check there's enough space remaining. */
636         min = (min + align_m1) & ~align_m1;
637         if (min > max) {
638             return -1;
639         }
640         if (len_m1 > max - min) {
641             return -1;
642         }
643 
644         p = pageflags_find(min, min + len_m1);
645         if (p == NULL) {
646             /* Found! */
647             return min;
648         }
649         if (max <= p->itree.last) {
650             /* Existing allocation fills the remainder of the search region. */
651             return -1;
652         }
653         /* Skip across existing allocation. */
654         min = p->itree.last + 1;
655     }
656 }
657 
658 void tb_lock_page0(tb_page_addr_t address)
659 {
660     PageFlagsNode *p;
661     vaddr start, last;
662     int host_page_size = qemu_real_host_page_size();
663     int prot;
664 
665     assert_memory_lock();
666 
667     if (host_page_size <= TARGET_PAGE_SIZE) {
668         start = address & TARGET_PAGE_MASK;
669         last = start + TARGET_PAGE_SIZE - 1;
670     } else {
671         start = address & -host_page_size;
672         last = start + host_page_size - 1;
673     }
674 
675     p = pageflags_find(start, last);
676     if (!p) {
677         return;
678     }
679     prot = p->flags;
680 
681     if (unlikely(p->itree.last < last)) {
682         /* More than one protection region covers the one host page. */
683         assert(TARGET_PAGE_SIZE < host_page_size);
684         while ((p = pageflags_next(p, start, last)) != NULL) {
685             prot |= p->flags;
686         }
687     }
688 
689     if (prot & PAGE_WRITE) {
690         pageflags_set_clear(start, last, 0, PAGE_WRITE);
691         mprotect(g2h_untagged(start), last - start + 1,
692                  prot & (PAGE_READ | PAGE_EXEC) ? PROT_READ : PROT_NONE);
693     }
694 }
695 
696 /*
697  * Called from signal handler: invalidate the code and unprotect the
698  * page. Return 0 if the fault was not handled, 1 if it was handled,
699  * and 2 if it was handled but the caller must cause the TB to be
700  * immediately exited. (We can only return 2 if the 'pc' argument is
701  * non-zero.)
702  */
703 int page_unprotect(CPUState *cpu, tb_page_addr_t address, uintptr_t pc)
704 {
705     PageFlagsNode *p;
706     bool current_tb_invalidated;
707 
708     assert((cpu == NULL) == (pc == 0));
709 
710     /*
711      * Technically this isn't safe inside a signal handler.  However we
712      * know this only ever happens in a synchronous SEGV handler, so in
713      * practice it seems to be ok.
714      */
715     mmap_lock();
716 
717     p = pageflags_find(address, address);
718 
719     /* If this address was not really writable, nothing to do. */
720     if (!p || !(p->flags & PAGE_WRITE_ORG)) {
721         mmap_unlock();
722         return 0;
723     }
724 
725     current_tb_invalidated = false;
726     if (p->flags & PAGE_WRITE) {
727         /*
728          * If the page is actually marked WRITE then assume this is because
729          * this thread raced with another one which got here first and
730          * set the page to PAGE_WRITE and did the TB invalidate for us.
731          */
732         if (pc && cpu->cc->tcg_ops->precise_smc) {
733             TranslationBlock *current_tb = tcg_tb_lookup(pc);
734             if (current_tb) {
735                 current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID;
736             }
737         }
738     } else {
739         int host_page_size = qemu_real_host_page_size();
740         vaddr start, len, i;
741         int prot;
742 
743         if (host_page_size <= TARGET_PAGE_SIZE) {
744             start = address & TARGET_PAGE_MASK;
745             len = TARGET_PAGE_SIZE;
746             prot = p->flags | PAGE_WRITE;
747             pageflags_set_clear(start, start + len - 1, PAGE_WRITE, 0);
748             current_tb_invalidated =
749                 tb_invalidate_phys_page_unwind(cpu, start, pc);
750         } else {
751             start = address & -host_page_size;
752             len = host_page_size;
753             prot = 0;
754 
755             for (i = 0; i < len; i += TARGET_PAGE_SIZE) {
756                 vaddr addr = start + i;
757 
758                 p = pageflags_find(addr, addr);
759                 if (p) {
760                     prot |= p->flags;
761                     if (p->flags & PAGE_WRITE_ORG) {
762                         prot |= PAGE_WRITE;
763                         pageflags_set_clear(addr, addr + TARGET_PAGE_SIZE - 1,
764                                             PAGE_WRITE, 0);
765                     }
766                 }
767                 /*
768                  * Since the content will be modified, we must invalidate
769                  * the corresponding translated code.
770                  */
771                 current_tb_invalidated |=
772                     tb_invalidate_phys_page_unwind(cpu, addr, pc);
773             }
774         }
775         if (prot & PAGE_EXEC) {
776             prot = (prot & ~PAGE_EXEC) | PAGE_READ;
777         }
778         mprotect((void *)g2h_untagged(start), len, prot & PAGE_RWX);
779     }
780     mmap_unlock();
781 
782     /* If current TB was invalidated return to main loop */
783     return current_tb_invalidated ? 2 : 1;
784 }
785 
786 static int probe_access_internal(CPUArchState *env, vaddr addr,
787                                  int fault_size, MMUAccessType access_type,
788                                  bool nonfault, uintptr_t ra)
789 {
790     int acc_flag;
791     bool maperr;
792 
793     switch (access_type) {
794     case MMU_DATA_STORE:
795         acc_flag = PAGE_WRITE_ORG;
796         break;
797     case MMU_DATA_LOAD:
798         acc_flag = PAGE_READ;
799         break;
800     case MMU_INST_FETCH:
801         acc_flag = PAGE_EXEC;
802         break;
803     default:
804         g_assert_not_reached();
805     }
806 
807     if (guest_addr_valid_untagged(addr)) {
808         int page_flags = page_get_flags(addr);
809         if (page_flags & acc_flag) {
810             if (access_type != MMU_INST_FETCH
811                 && cpu_plugin_mem_cbs_enabled(env_cpu(env))) {
812                 return TLB_MMIO;
813             }
814             return 0; /* success */
815         }
816         maperr = !(page_flags & PAGE_VALID);
817     } else {
818         maperr = true;
819     }
820 
821     if (nonfault) {
822         return TLB_INVALID_MASK;
823     }
824 
825     cpu_loop_exit_sigsegv(env_cpu(env), addr, access_type, maperr, ra);
826 }
827 
828 int probe_access_flags(CPUArchState *env, vaddr addr, int size,
829                        MMUAccessType access_type, int mmu_idx,
830                        bool nonfault, void **phost, uintptr_t ra)
831 {
832     int flags;
833 
834     g_assert(-(addr | TARGET_PAGE_MASK) >= size);
835     flags = probe_access_internal(env, addr, size, access_type, nonfault, ra);
836     *phost = (flags & TLB_INVALID_MASK) ? NULL : g2h(env_cpu(env), addr);
837     return flags;
838 }
839 
840 void *probe_access(CPUArchState *env, vaddr addr, int size,
841                    MMUAccessType access_type, int mmu_idx, uintptr_t ra)
842 {
843     int flags;
844 
845     g_assert(-(addr | TARGET_PAGE_MASK) >= size);
846     flags = probe_access_internal(env, addr, size, access_type, false, ra);
847     g_assert((flags & ~TLB_MMIO) == 0);
848 
849     return size ? g2h(env_cpu(env), addr) : NULL;
850 }
851 
852 tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, vaddr addr,
853                                         void **hostp)
854 {
855     int flags;
856 
857     flags = probe_access_internal(env, addr, 1, MMU_INST_FETCH, false, 0);
858     g_assert(flags == 0);
859 
860     if (hostp) {
861         *hostp = g2h_untagged(addr);
862     }
863     return addr;
864 }
865 
866 #ifdef TARGET_PAGE_DATA_SIZE
867 /*
868  * Allocate chunks of target data together.  For the only current user,
869  * if we allocate one hunk per page, we have overhead of 40/128 or 40%.
870  * Therefore, allocate memory for 64 pages at a time for overhead < 1%.
871  */
872 #define TPD_PAGES  64
873 #define TBD_MASK   (TARGET_PAGE_MASK * TPD_PAGES)
874 
875 typedef struct TargetPageDataNode {
876     struct rcu_head rcu;
877     IntervalTreeNode itree;
878     char data[] __attribute__((aligned));
879 } TargetPageDataNode;
880 
881 static IntervalTreeRoot targetdata_root;
882 
883 void page_reset_target_data(vaddr start, vaddr last)
884 {
885     IntervalTreeNode *n, *next;
886 
887     assert_memory_lock();
888 
889     start &= TARGET_PAGE_MASK;
890     last |= ~TARGET_PAGE_MASK;
891 
892     for (n = interval_tree_iter_first(&targetdata_root, start, last),
893          next = n ? interval_tree_iter_next(n, start, last) : NULL;
894          n != NULL;
895          n = next,
896          next = next ? interval_tree_iter_next(n, start, last) : NULL) {
897         vaddr n_start, n_last, p_ofs, p_len;
898         TargetPageDataNode *t = container_of(n, TargetPageDataNode, itree);
899 
900         if (n->start >= start && n->last <= last) {
901             interval_tree_remove(n, &targetdata_root);
902             g_free_rcu(t, rcu);
903             continue;
904         }
905 
906         if (n->start < start) {
907             n_start = start;
908             p_ofs = (start - n->start) >> TARGET_PAGE_BITS;
909         } else {
910             n_start = n->start;
911             p_ofs = 0;
912         }
913         n_last = MIN(last, n->last);
914         p_len = (n_last + 1 - n_start) >> TARGET_PAGE_BITS;
915 
916         memset(t->data + p_ofs * TARGET_PAGE_DATA_SIZE, 0,
917                p_len * TARGET_PAGE_DATA_SIZE);
918     }
919 }
920 
921 void *page_get_target_data(vaddr address)
922 {
923     IntervalTreeNode *n;
924     TargetPageDataNode *t;
925     vaddr page, region, p_ofs;
926 
927     page = address & TARGET_PAGE_MASK;
928     region = address & TBD_MASK;
929 
930     n = interval_tree_iter_first(&targetdata_root, page, page);
931     if (!n) {
932         /*
933          * See util/interval-tree.c re lockless lookups: no false positives
934          * but there are false negatives.  If we find nothing, retry with
935          * the mmap lock acquired.  We also need the lock for the
936          * allocation + insert.
937          */
938         mmap_lock();
939         n = interval_tree_iter_first(&targetdata_root, page, page);
940         if (!n) {
941             t = g_malloc0(sizeof(TargetPageDataNode)
942                           + TPD_PAGES * TARGET_PAGE_DATA_SIZE);
943             n = &t->itree;
944             n->start = region;
945             n->last = region | ~TBD_MASK;
946             interval_tree_insert(n, &targetdata_root);
947         }
948         mmap_unlock();
949     }
950 
951     t = container_of(n, TargetPageDataNode, itree);
952     p_ofs = (page - region) >> TARGET_PAGE_BITS;
953     return t->data + p_ofs * TARGET_PAGE_DATA_SIZE;
954 }
955 #else
956 void page_reset_target_data(vaddr start, vaddr last) { }
957 #endif /* TARGET_PAGE_DATA_SIZE */
958 
959 /* The system-mode versions of these helpers are in cputlb.c.  */
960 
961 static void *cpu_mmu_lookup(CPUState *cpu, vaddr addr,
962                             MemOp mop, uintptr_t ra, MMUAccessType type)
963 {
964     int a_bits = memop_alignment_bits(mop);
965     void *ret;
966 
967     /* Enforce guest required alignment.  */
968     if (unlikely(addr & ((1 << a_bits) - 1))) {
969         cpu_loop_exit_sigbus(cpu, addr, type, ra);
970     }
971 
972     ret = g2h(cpu, addr);
973     set_helper_retaddr(ra);
974     return ret;
975 }
976 
977 /* physical memory access (slow version, mainly for debug) */
978 int cpu_memory_rw_debug(CPUState *cpu, vaddr addr,
979                         void *ptr, size_t len, bool is_write)
980 {
981     int flags;
982     vaddr l, page;
983     uint8_t *buf = ptr;
984     ssize_t written;
985     int ret = -1;
986     int fd = -1;
987 
988     mmap_lock();
989 
990     while (len > 0) {
991         page = addr & TARGET_PAGE_MASK;
992         l = (page + TARGET_PAGE_SIZE) - addr;
993         if (l > len) {
994             l = len;
995         }
996         flags = page_get_flags(page);
997         if (!(flags & PAGE_VALID)) {
998             goto out_close;
999         }
1000         if (is_write) {
1001             if (flags & PAGE_WRITE) {
1002                 memcpy(g2h(cpu, addr), buf, l);
1003             } else {
1004                 /* Bypass the host page protection using ptrace. */
1005                 if (fd == -1) {
1006                     fd = open("/proc/self/mem", O_WRONLY);
1007                     if (fd == -1) {
1008                         goto out;
1009                     }
1010                 }
1011                 /*
1012                  * If there is a TranslationBlock and we weren't bypassing the
1013                  * host page protection, the memcpy() above would SEGV,
1014                  * ultimately leading to page_unprotect(). So invalidate the
1015                  * translations manually. Both invalidation and pwrite() must
1016                  * be under mmap_lock() in order to prevent the creation of
1017                  * another TranslationBlock in between.
1018                  */
1019                 tb_invalidate_phys_range(NULL, addr, addr + l - 1);
1020                 written = pwrite(fd, buf, l,
1021                                  (off_t)(uintptr_t)g2h_untagged(addr));
1022                 if (written != l) {
1023                     goto out_close;
1024                 }
1025             }
1026         } else if (flags & PAGE_READ) {
1027             memcpy(buf, g2h(cpu, addr), l);
1028         } else {
1029             /* Bypass the host page protection using ptrace. */
1030             if (fd == -1) {
1031                 fd = open("/proc/self/mem", O_RDONLY);
1032                 if (fd == -1) {
1033                     goto out;
1034                 }
1035             }
1036             if (pread(fd, buf, l,
1037                       (off_t)(uintptr_t)g2h_untagged(addr)) != l) {
1038                 goto out_close;
1039             }
1040         }
1041         len -= l;
1042         buf += l;
1043         addr += l;
1044     }
1045     ret = 0;
1046 out_close:
1047     if (fd != -1) {
1048         close(fd);
1049     }
1050 out:
1051     mmap_unlock();
1052 
1053     return ret;
1054 }
1055 
1056 #include "ldst_atomicity.c.inc"
1057 
1058 static uint8_t do_ld1_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
1059                           uintptr_t ra, MMUAccessType access_type)
1060 {
1061     void *haddr;
1062     uint8_t ret;
1063 
1064     cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD);
1065     haddr = cpu_mmu_lookup(cpu, addr, get_memop(oi), ra, access_type);
1066     ret = ldub_p(haddr);
1067     clear_helper_retaddr();
1068     return ret;
1069 }
1070 
1071 static uint16_t do_ld2_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
1072                            uintptr_t ra, MMUAccessType access_type)
1073 {
1074     void *haddr;
1075     uint16_t ret;
1076     MemOp mop = get_memop(oi);
1077 
1078     cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD);
1079     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type);
1080     ret = load_atom_2(cpu, ra, haddr, mop);
1081     clear_helper_retaddr();
1082 
1083     if (mop & MO_BSWAP) {
1084         ret = bswap16(ret);
1085     }
1086     return ret;
1087 }
1088 
1089 static uint32_t do_ld4_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
1090                            uintptr_t ra, MMUAccessType access_type)
1091 {
1092     void *haddr;
1093     uint32_t ret;
1094     MemOp mop = get_memop(oi);
1095 
1096     cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD);
1097     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type);
1098     ret = load_atom_4(cpu, ra, haddr, mop);
1099     clear_helper_retaddr();
1100 
1101     if (mop & MO_BSWAP) {
1102         ret = bswap32(ret);
1103     }
1104     return ret;
1105 }
1106 
1107 static uint64_t do_ld8_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
1108                            uintptr_t ra, MMUAccessType access_type)
1109 {
1110     void *haddr;
1111     uint64_t ret;
1112     MemOp mop = get_memop(oi);
1113 
1114     cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD);
1115     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type);
1116     ret = load_atom_8(cpu, ra, haddr, mop);
1117     clear_helper_retaddr();
1118 
1119     if (mop & MO_BSWAP) {
1120         ret = bswap64(ret);
1121     }
1122     return ret;
1123 }
1124 
1125 static Int128 do_ld16_mmu(CPUState *cpu, abi_ptr addr,
1126                           MemOpIdx oi, uintptr_t ra)
1127 {
1128     void *haddr;
1129     Int128 ret;
1130     MemOp mop = get_memop(oi);
1131 
1132     tcg_debug_assert((mop & MO_SIZE) == MO_128);
1133     cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD);
1134     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_LOAD);
1135     ret = load_atom_16(cpu, ra, haddr, mop);
1136     clear_helper_retaddr();
1137 
1138     if (mop & MO_BSWAP) {
1139         ret = bswap128(ret);
1140     }
1141     return ret;
1142 }
1143 
1144 static void do_st1_mmu(CPUState *cpu, vaddr addr, uint8_t val,
1145                        MemOpIdx oi, uintptr_t ra)
1146 {
1147     void *haddr;
1148 
1149     cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST);
1150     haddr = cpu_mmu_lookup(cpu, addr, get_memop(oi), ra, MMU_DATA_STORE);
1151     stb_p(haddr, val);
1152     clear_helper_retaddr();
1153 }
1154 
1155 static void do_st2_mmu(CPUState *cpu, vaddr addr, uint16_t val,
1156                        MemOpIdx oi, uintptr_t ra)
1157 {
1158     void *haddr;
1159     MemOp mop = get_memop(oi);
1160 
1161     cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST);
1162     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1163 
1164     if (mop & MO_BSWAP) {
1165         val = bswap16(val);
1166     }
1167     store_atom_2(cpu, ra, haddr, mop, val);
1168     clear_helper_retaddr();
1169 }
1170 
1171 static void do_st4_mmu(CPUState *cpu, vaddr addr, uint32_t val,
1172                        MemOpIdx oi, uintptr_t ra)
1173 {
1174     void *haddr;
1175     MemOp mop = get_memop(oi);
1176 
1177     cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST);
1178     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1179 
1180     if (mop & MO_BSWAP) {
1181         val = bswap32(val);
1182     }
1183     store_atom_4(cpu, ra, haddr, mop, val);
1184     clear_helper_retaddr();
1185 }
1186 
1187 static void do_st8_mmu(CPUState *cpu, vaddr addr, uint64_t val,
1188                        MemOpIdx oi, uintptr_t ra)
1189 {
1190     void *haddr;
1191     MemOp mop = get_memop(oi);
1192 
1193     cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST);
1194     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1195 
1196     if (mop & MO_BSWAP) {
1197         val = bswap64(val);
1198     }
1199     store_atom_8(cpu, ra, haddr, mop, val);
1200     clear_helper_retaddr();
1201 }
1202 
1203 static void do_st16_mmu(CPUState *cpu, vaddr addr, Int128 val,
1204                         MemOpIdx oi, uintptr_t ra)
1205 {
1206     void *haddr;
1207     MemOpIdx mop = get_memop(oi);
1208 
1209     cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST);
1210     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1211 
1212     if (mop & MO_BSWAP) {
1213         val = bswap128(val);
1214     }
1215     store_atom_16(cpu, ra, haddr, mop, val);
1216     clear_helper_retaddr();
1217 }
1218 
1219 uint8_t cpu_ldb_code_mmu(CPUArchState *env, vaddr addr,
1220                          MemOpIdx oi, uintptr_t ra)
1221 {
1222     return do_ld1_mmu(env_cpu(env), addr, oi, ra ? ra : 1, MMU_INST_FETCH);
1223 }
1224 
1225 uint16_t cpu_ldw_code_mmu(CPUArchState *env, vaddr addr,
1226                           MemOpIdx oi, uintptr_t ra)
1227 {
1228     return do_ld2_mmu(env_cpu(env), addr, oi, ra ? ra : 1, MMU_INST_FETCH);
1229 }
1230 
1231 uint32_t cpu_ldl_code_mmu(CPUArchState *env, vaddr addr,
1232                           MemOpIdx oi, uintptr_t ra)
1233 {
1234     return do_ld4_mmu(env_cpu(env), addr, oi, ra ? ra : 1, MMU_INST_FETCH);
1235 }
1236 
1237 uint64_t cpu_ldq_code_mmu(CPUArchState *env, vaddr addr,
1238                           MemOpIdx oi, uintptr_t ra)
1239 {
1240     return do_ld8_mmu(env_cpu(env), addr, oi, ra ? ra : 1, MMU_INST_FETCH);
1241 }
1242 
1243 #include "ldst_common.c.inc"
1244 
1245 /*
1246  * Do not allow unaligned operations to proceed.  Return the host address.
1247  */
1248 static void *atomic_mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi,
1249                                int size, uintptr_t retaddr)
1250 {
1251     MemOp mop = get_memop(oi);
1252     int a_bits = memop_alignment_bits(mop);
1253     void *ret;
1254 
1255     /* Enforce guest required alignment.  */
1256     if (unlikely(addr & ((1 << a_bits) - 1))) {
1257         cpu_loop_exit_sigbus(cpu, addr, MMU_DATA_STORE, retaddr);
1258     }
1259 
1260     /* Enforce qemu required alignment.  */
1261     if (unlikely(addr & (size - 1))) {
1262         cpu_loop_exit_atomic(cpu, retaddr);
1263     }
1264 
1265     ret = g2h(cpu, addr);
1266     set_helper_retaddr(retaddr);
1267     return ret;
1268 }
1269 
1270 #include "atomic_common.c.inc"
1271 
1272 /*
1273  * First set of functions passes in OI and RETADDR.
1274  * This makes them callable from other helpers.
1275  */
1276 
1277 #define ATOMIC_NAME(X) \
1278     glue(glue(glue(cpu_atomic_ ## X, SUFFIX), END), _mmu)
1279 #define ATOMIC_MMU_CLEANUP do { clear_helper_retaddr(); } while (0)
1280 
1281 #define DATA_SIZE 1
1282 #include "atomic_template.h"
1283 
1284 #define DATA_SIZE 2
1285 #include "atomic_template.h"
1286 
1287 #define DATA_SIZE 4
1288 #include "atomic_template.h"
1289 
1290 #ifdef CONFIG_ATOMIC64
1291 #define DATA_SIZE 8
1292 #include "atomic_template.h"
1293 #endif
1294 
1295 #if defined(CONFIG_ATOMIC128) || HAVE_CMPXCHG128
1296 #define DATA_SIZE 16
1297 #include "atomic_template.h"
1298 #endif
1299