xref: /qemu/accel/tcg/user-exec.c (revision fb5c28e1955537228fe59a901e6cf6258da682d5)
1 /*
2  *  User emulator execution
3  *
4  *  Copyright (c) 2003-2005 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 #include "hw/core/tcg-cpu-ops.h"
21 #include "disas/disas.h"
22 #include "exec/exec-all.h"
23 #include "tcg/tcg.h"
24 #include "qemu/bitops.h"
25 #include "qemu/rcu.h"
26 #include "exec/cpu_ldst.h"
27 #include "qemu/main-loop.h"
28 #include "user/page-protection.h"
29 #include "exec/page-protection.h"
30 #include "exec/helper-proto.h"
31 #include "qemu/atomic128.h"
32 #include "trace.h"
33 #include "tcg/tcg-ldst.h"
34 #include "internal-common.h"
35 #include "internal-target.h"
36 #include "tb-internal.h"
37 
38 __thread uintptr_t helper_retaddr;
39 
40 //#define DEBUG_SIGNAL
41 
42 void cpu_interrupt(CPUState *cpu, int mask)
43 {
44     g_assert(bql_locked());
45     cpu->interrupt_request |= mask;
46     qatomic_set(&cpu->neg.icount_decr.u16.high, -1);
47 }
48 
49 /*
50  * Adjust the pc to pass to cpu_restore_state; return the memop type.
51  */
52 MMUAccessType adjust_signal_pc(uintptr_t *pc, bool is_write)
53 {
54     switch (helper_retaddr) {
55     default:
56         /*
57          * Fault during host memory operation within a helper function.
58          * The helper's host return address, saved here, gives us a
59          * pointer into the generated code that will unwind to the
60          * correct guest pc.
61          */
62         *pc = helper_retaddr;
63         break;
64 
65     case 0:
66         /*
67          * Fault during host memory operation within generated code.
68          * (Or, a unrelated bug within qemu, but we can't tell from here).
69          *
70          * We take the host pc from the signal frame.  However, we cannot
71          * use that value directly.  Within cpu_restore_state_from_tb, we
72          * assume PC comes from GETPC(), as used by the helper functions,
73          * so we adjust the address by -GETPC_ADJ to form an address that
74          * is within the call insn, so that the address does not accidentally
75          * match the beginning of the next guest insn.  However, when the
76          * pc comes from the signal frame it points to the actual faulting
77          * host memory insn and not the return from a call insn.
78          *
79          * Therefore, adjust to compensate for what will be done later
80          * by cpu_restore_state_from_tb.
81          */
82         *pc += GETPC_ADJ;
83         break;
84 
85     case 1:
86         /*
87          * Fault during host read for translation, or loosely, "execution".
88          *
89          * The guest pc is already pointing to the start of the TB for which
90          * code is being generated.  If the guest translator manages the
91          * page crossings correctly, this is exactly the correct address
92          * (and if the translator doesn't handle page boundaries correctly
93          * there's little we can do about that here).  Therefore, do not
94          * trigger the unwinder.
95          */
96         *pc = 0;
97         return MMU_INST_FETCH;
98     }
99 
100     return is_write ? MMU_DATA_STORE : MMU_DATA_LOAD;
101 }
102 
103 /**
104  * handle_sigsegv_accerr_write:
105  * @cpu: the cpu context
106  * @old_set: the sigset_t from the signal ucontext_t
107  * @host_pc: the host pc, adjusted for the signal
108  * @guest_addr: the guest address of the fault
109  *
110  * Return true if the write fault has been handled, and should be re-tried.
111  *
112  * Note that it is important that we don't call page_unprotect() unless
113  * this is really a "write to nonwritable page" fault, because
114  * page_unprotect() assumes that if it is called for an access to
115  * a page that's writable this means we had two threads racing and
116  * another thread got there first and already made the page writable;
117  * so we will retry the access. If we were to call page_unprotect()
118  * for some other kind of fault that should really be passed to the
119  * guest, we'd end up in an infinite loop of retrying the faulting access.
120  */
121 bool handle_sigsegv_accerr_write(CPUState *cpu, sigset_t *old_set,
122                                  uintptr_t host_pc, abi_ptr guest_addr)
123 {
124     switch (page_unprotect(guest_addr, host_pc)) {
125     case 0:
126         /*
127          * Fault not caused by a page marked unwritable to protect
128          * cached translations, must be the guest binary's problem.
129          */
130         return false;
131     case 1:
132         /*
133          * Fault caused by protection of cached translation; TBs
134          * invalidated, so resume execution.
135          */
136         return true;
137     case 2:
138         /*
139          * Fault caused by protection of cached translation, and the
140          * currently executing TB was modified and must be exited immediately.
141          */
142         sigprocmask(SIG_SETMASK, old_set, NULL);
143         cpu_loop_exit_noexc(cpu);
144         /* NORETURN */
145     default:
146         g_assert_not_reached();
147     }
148 }
149 
150 typedef struct PageFlagsNode {
151     struct rcu_head rcu;
152     IntervalTreeNode itree;
153     int flags;
154 } PageFlagsNode;
155 
156 static IntervalTreeRoot pageflags_root;
157 
158 static PageFlagsNode *pageflags_find(target_ulong start, target_ulong last)
159 {
160     IntervalTreeNode *n;
161 
162     n = interval_tree_iter_first(&pageflags_root, start, last);
163     return n ? container_of(n, PageFlagsNode, itree) : NULL;
164 }
165 
166 static PageFlagsNode *pageflags_next(PageFlagsNode *p, target_ulong start,
167                                      target_ulong last)
168 {
169     IntervalTreeNode *n;
170 
171     n = interval_tree_iter_next(&p->itree, start, last);
172     return n ? container_of(n, PageFlagsNode, itree) : NULL;
173 }
174 
175 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
176 {
177     IntervalTreeNode *n;
178     int rc = 0;
179 
180     mmap_lock();
181     for (n = interval_tree_iter_first(&pageflags_root, 0, -1);
182          n != NULL;
183          n = interval_tree_iter_next(n, 0, -1)) {
184         PageFlagsNode *p = container_of(n, PageFlagsNode, itree);
185 
186         rc = fn(priv, n->start, n->last + 1, p->flags);
187         if (rc != 0) {
188             break;
189         }
190     }
191     mmap_unlock();
192 
193     return rc;
194 }
195 
196 static int dump_region(void *priv, target_ulong start,
197                        target_ulong end, unsigned long prot)
198 {
199     FILE *f = (FILE *)priv;
200 
201     fprintf(f, TARGET_FMT_lx"-"TARGET_FMT_lx" "TARGET_FMT_lx" %c%c%c\n",
202             start, end, end - start,
203             ((prot & PAGE_READ) ? 'r' : '-'),
204             ((prot & PAGE_WRITE) ? 'w' : '-'),
205             ((prot & PAGE_EXEC) ? 'x' : '-'));
206     return 0;
207 }
208 
209 /* dump memory mappings */
210 void page_dump(FILE *f)
211 {
212     const int length = sizeof(target_ulong) * 2;
213 
214     fprintf(f, "%-*s %-*s %-*s %s\n",
215             length, "start", length, "end", length, "size", "prot");
216     walk_memory_regions(f, dump_region);
217 }
218 
219 int page_get_flags(target_ulong address)
220 {
221     PageFlagsNode *p = pageflags_find(address, address);
222 
223     /*
224      * See util/interval-tree.c re lockless lookups: no false positives but
225      * there are false negatives.  If we find nothing, retry with the mmap
226      * lock acquired.
227      */
228     if (p) {
229         return p->flags;
230     }
231     if (have_mmap_lock()) {
232         return 0;
233     }
234 
235     mmap_lock();
236     p = pageflags_find(address, address);
237     mmap_unlock();
238     return p ? p->flags : 0;
239 }
240 
241 /* A subroutine of page_set_flags: insert a new node for [start,last]. */
242 static void pageflags_create(target_ulong start, target_ulong last, int flags)
243 {
244     PageFlagsNode *p = g_new(PageFlagsNode, 1);
245 
246     p->itree.start = start;
247     p->itree.last = last;
248     p->flags = flags;
249     interval_tree_insert(&p->itree, &pageflags_root);
250 }
251 
252 /* A subroutine of page_set_flags: remove everything in [start,last]. */
253 static bool pageflags_unset(target_ulong start, target_ulong last)
254 {
255     bool inval_tb = false;
256 
257     while (true) {
258         PageFlagsNode *p = pageflags_find(start, last);
259         target_ulong p_last;
260 
261         if (!p) {
262             break;
263         }
264 
265         if (p->flags & PAGE_EXEC) {
266             inval_tb = true;
267         }
268 
269         interval_tree_remove(&p->itree, &pageflags_root);
270         p_last = p->itree.last;
271 
272         if (p->itree.start < start) {
273             /* Truncate the node from the end, or split out the middle. */
274             p->itree.last = start - 1;
275             interval_tree_insert(&p->itree, &pageflags_root);
276             if (last < p_last) {
277                 pageflags_create(last + 1, p_last, p->flags);
278                 break;
279             }
280         } else if (p_last <= last) {
281             /* Range completely covers node -- remove it. */
282             g_free_rcu(p, rcu);
283         } else {
284             /* Truncate the node from the start. */
285             p->itree.start = last + 1;
286             interval_tree_insert(&p->itree, &pageflags_root);
287             break;
288         }
289     }
290 
291     return inval_tb;
292 }
293 
294 /*
295  * A subroutine of page_set_flags: nothing overlaps [start,last],
296  * but check adjacent mappings and maybe merge into a single range.
297  */
298 static void pageflags_create_merge(target_ulong start, target_ulong last,
299                                    int flags)
300 {
301     PageFlagsNode *next = NULL, *prev = NULL;
302 
303     if (start > 0) {
304         prev = pageflags_find(start - 1, start - 1);
305         if (prev) {
306             if (prev->flags == flags) {
307                 interval_tree_remove(&prev->itree, &pageflags_root);
308             } else {
309                 prev = NULL;
310             }
311         }
312     }
313     if (last + 1 != 0) {
314         next = pageflags_find(last + 1, last + 1);
315         if (next) {
316             if (next->flags == flags) {
317                 interval_tree_remove(&next->itree, &pageflags_root);
318             } else {
319                 next = NULL;
320             }
321         }
322     }
323 
324     if (prev) {
325         if (next) {
326             prev->itree.last = next->itree.last;
327             g_free_rcu(next, rcu);
328         } else {
329             prev->itree.last = last;
330         }
331         interval_tree_insert(&prev->itree, &pageflags_root);
332     } else if (next) {
333         next->itree.start = start;
334         interval_tree_insert(&next->itree, &pageflags_root);
335     } else {
336         pageflags_create(start, last, flags);
337     }
338 }
339 
340 /*
341  * Allow the target to decide if PAGE_TARGET_[12] may be reset.
342  * By default, they are not kept.
343  */
344 #ifndef PAGE_TARGET_STICKY
345 #define PAGE_TARGET_STICKY  0
346 #endif
347 #define PAGE_STICKY  (PAGE_ANON | PAGE_PASSTHROUGH | PAGE_TARGET_STICKY)
348 
349 /* A subroutine of page_set_flags: add flags to [start,last]. */
350 static bool pageflags_set_clear(target_ulong start, target_ulong last,
351                                 int set_flags, int clear_flags)
352 {
353     PageFlagsNode *p;
354     target_ulong p_start, p_last;
355     int p_flags, merge_flags;
356     bool inval_tb = false;
357 
358  restart:
359     p = pageflags_find(start, last);
360     if (!p) {
361         if (set_flags) {
362             pageflags_create_merge(start, last, set_flags);
363         }
364         goto done;
365     }
366 
367     p_start = p->itree.start;
368     p_last = p->itree.last;
369     p_flags = p->flags;
370     /* Using mprotect on a page does not change sticky bits. */
371     merge_flags = (p_flags & ~clear_flags) | set_flags;
372 
373     /*
374      * Need to flush if an overlapping executable region
375      * removes exec, or adds write.
376      */
377     if ((p_flags & PAGE_EXEC)
378         && (!(merge_flags & PAGE_EXEC)
379             || (merge_flags & ~p_flags & PAGE_WRITE))) {
380         inval_tb = true;
381     }
382 
383     /*
384      * If there is an exact range match, update and return without
385      * attempting to merge with adjacent regions.
386      */
387     if (start == p_start && last == p_last) {
388         if (merge_flags) {
389             p->flags = merge_flags;
390         } else {
391             interval_tree_remove(&p->itree, &pageflags_root);
392             g_free_rcu(p, rcu);
393         }
394         goto done;
395     }
396 
397     /*
398      * If sticky bits affect the original mapping, then we must be more
399      * careful about the existing intervals and the separate flags.
400      */
401     if (set_flags != merge_flags) {
402         if (p_start < start) {
403             interval_tree_remove(&p->itree, &pageflags_root);
404             p->itree.last = start - 1;
405             interval_tree_insert(&p->itree, &pageflags_root);
406 
407             if (last < p_last) {
408                 if (merge_flags) {
409                     pageflags_create(start, last, merge_flags);
410                 }
411                 pageflags_create(last + 1, p_last, p_flags);
412             } else {
413                 if (merge_flags) {
414                     pageflags_create(start, p_last, merge_flags);
415                 }
416                 if (p_last < last) {
417                     start = p_last + 1;
418                     goto restart;
419                 }
420             }
421         } else {
422             if (start < p_start && set_flags) {
423                 pageflags_create(start, p_start - 1, set_flags);
424             }
425             if (last < p_last) {
426                 interval_tree_remove(&p->itree, &pageflags_root);
427                 p->itree.start = last + 1;
428                 interval_tree_insert(&p->itree, &pageflags_root);
429                 if (merge_flags) {
430                     pageflags_create(start, last, merge_flags);
431                 }
432             } else {
433                 if (merge_flags) {
434                     p->flags = merge_flags;
435                 } else {
436                     interval_tree_remove(&p->itree, &pageflags_root);
437                     g_free_rcu(p, rcu);
438                 }
439                 if (p_last < last) {
440                     start = p_last + 1;
441                     goto restart;
442                 }
443             }
444         }
445         goto done;
446     }
447 
448     /* If flags are not changing for this range, incorporate it. */
449     if (set_flags == p_flags) {
450         if (start < p_start) {
451             interval_tree_remove(&p->itree, &pageflags_root);
452             p->itree.start = start;
453             interval_tree_insert(&p->itree, &pageflags_root);
454         }
455         if (p_last < last) {
456             start = p_last + 1;
457             goto restart;
458         }
459         goto done;
460     }
461 
462     /* Maybe split out head and/or tail ranges with the original flags. */
463     interval_tree_remove(&p->itree, &pageflags_root);
464     if (p_start < start) {
465         p->itree.last = start - 1;
466         interval_tree_insert(&p->itree, &pageflags_root);
467 
468         if (p_last < last) {
469             goto restart;
470         }
471         if (last < p_last) {
472             pageflags_create(last + 1, p_last, p_flags);
473         }
474     } else if (last < p_last) {
475         p->itree.start = last + 1;
476         interval_tree_insert(&p->itree, &pageflags_root);
477     } else {
478         g_free_rcu(p, rcu);
479         goto restart;
480     }
481     if (set_flags) {
482         pageflags_create(start, last, set_flags);
483     }
484 
485  done:
486     return inval_tb;
487 }
488 
489 void page_set_flags(target_ulong start, target_ulong last, int flags)
490 {
491     bool reset = false;
492     bool inval_tb = false;
493 
494     /* This function should never be called with addresses outside the
495        guest address space.  If this assert fires, it probably indicates
496        a missing call to h2g_valid.  */
497     assert(start <= last);
498     assert(last <= GUEST_ADDR_MAX);
499     /* Only set PAGE_ANON with new mappings. */
500     assert(!(flags & PAGE_ANON) || (flags & PAGE_RESET));
501     assert_memory_lock();
502 
503     start &= TARGET_PAGE_MASK;
504     last |= ~TARGET_PAGE_MASK;
505 
506     if (!(flags & PAGE_VALID)) {
507         flags = 0;
508     } else {
509         reset = flags & PAGE_RESET;
510         flags &= ~PAGE_RESET;
511         if (flags & PAGE_WRITE) {
512             flags |= PAGE_WRITE_ORG;
513         }
514     }
515 
516     if (!flags || reset) {
517         page_reset_target_data(start, last);
518         inval_tb |= pageflags_unset(start, last);
519     }
520     if (flags) {
521         inval_tb |= pageflags_set_clear(start, last, flags,
522                                         ~(reset ? 0 : PAGE_STICKY));
523     }
524     if (inval_tb) {
525         tb_invalidate_phys_range(start, last);
526     }
527 }
528 
529 bool page_check_range(target_ulong start, target_ulong len, int flags)
530 {
531     target_ulong last;
532     int locked;  /* tri-state: =0: unlocked, +1: global, -1: local */
533     bool ret;
534 
535     if (len == 0) {
536         return true;  /* trivial length */
537     }
538 
539     last = start + len - 1;
540     if (last < start) {
541         return false; /* wrap around */
542     }
543 
544     locked = have_mmap_lock();
545     while (true) {
546         PageFlagsNode *p = pageflags_find(start, last);
547         int missing;
548 
549         if (!p) {
550             if (!locked) {
551                 /*
552                  * Lockless lookups have false negatives.
553                  * Retry with the lock held.
554                  */
555                 mmap_lock();
556                 locked = -1;
557                 p = pageflags_find(start, last);
558             }
559             if (!p) {
560                 ret = false; /* entire region invalid */
561                 break;
562             }
563         }
564         if (start < p->itree.start) {
565             ret = false; /* initial bytes invalid */
566             break;
567         }
568 
569         missing = flags & ~p->flags;
570         if (missing & ~PAGE_WRITE) {
571             ret = false; /* page doesn't match */
572             break;
573         }
574         if (missing & PAGE_WRITE) {
575             if (!(p->flags & PAGE_WRITE_ORG)) {
576                 ret = false; /* page not writable */
577                 break;
578             }
579             /* Asking about writable, but has been protected: undo. */
580             if (!page_unprotect(start, 0)) {
581                 ret = false;
582                 break;
583             }
584             /* TODO: page_unprotect should take a range, not a single page. */
585             if (last - start < TARGET_PAGE_SIZE) {
586                 ret = true; /* ok */
587                 break;
588             }
589             start += TARGET_PAGE_SIZE;
590             continue;
591         }
592 
593         if (last <= p->itree.last) {
594             ret = true; /* ok */
595             break;
596         }
597         start = p->itree.last + 1;
598     }
599 
600     /* Release the lock if acquired locally. */
601     if (locked < 0) {
602         mmap_unlock();
603     }
604     return ret;
605 }
606 
607 bool page_check_range_empty(target_ulong start, target_ulong last)
608 {
609     assert(last >= start);
610     assert_memory_lock();
611     return pageflags_find(start, last) == NULL;
612 }
613 
614 target_ulong page_find_range_empty(target_ulong min, target_ulong max,
615                                    target_ulong len, target_ulong align)
616 {
617     target_ulong len_m1, align_m1;
618 
619     assert(min <= max);
620     assert(max <= GUEST_ADDR_MAX);
621     assert(len != 0);
622     assert(is_power_of_2(align));
623     assert_memory_lock();
624 
625     len_m1 = len - 1;
626     align_m1 = align - 1;
627 
628     /* Iteratively narrow the search region. */
629     while (1) {
630         PageFlagsNode *p;
631 
632         /* Align min and double-check there's enough space remaining. */
633         min = (min + align_m1) & ~align_m1;
634         if (min > max) {
635             return -1;
636         }
637         if (len_m1 > max - min) {
638             return -1;
639         }
640 
641         p = pageflags_find(min, min + len_m1);
642         if (p == NULL) {
643             /* Found! */
644             return min;
645         }
646         if (max <= p->itree.last) {
647             /* Existing allocation fills the remainder of the search region. */
648             return -1;
649         }
650         /* Skip across existing allocation. */
651         min = p->itree.last + 1;
652     }
653 }
654 
655 void page_protect(tb_page_addr_t address)
656 {
657     PageFlagsNode *p;
658     target_ulong start, last;
659     int host_page_size = qemu_real_host_page_size();
660     int prot;
661 
662     assert_memory_lock();
663 
664     if (host_page_size <= TARGET_PAGE_SIZE) {
665         start = address & TARGET_PAGE_MASK;
666         last = start + TARGET_PAGE_SIZE - 1;
667     } else {
668         start = address & -host_page_size;
669         last = start + host_page_size - 1;
670     }
671 
672     p = pageflags_find(start, last);
673     if (!p) {
674         return;
675     }
676     prot = p->flags;
677 
678     if (unlikely(p->itree.last < last)) {
679         /* More than one protection region covers the one host page. */
680         assert(TARGET_PAGE_SIZE < host_page_size);
681         while ((p = pageflags_next(p, start, last)) != NULL) {
682             prot |= p->flags;
683         }
684     }
685 
686     if (prot & PAGE_WRITE) {
687         pageflags_set_clear(start, last, 0, PAGE_WRITE);
688         mprotect(g2h_untagged(start), last - start + 1,
689                  prot & (PAGE_READ | PAGE_EXEC) ? PROT_READ : PROT_NONE);
690     }
691 }
692 
693 /*
694  * Called from signal handler: invalidate the code and unprotect the
695  * page. Return 0 if the fault was not handled, 1 if it was handled,
696  * and 2 if it was handled but the caller must cause the TB to be
697  * immediately exited. (We can only return 2 if the 'pc' argument is
698  * non-zero.)
699  */
700 int page_unprotect(tb_page_addr_t address, uintptr_t pc)
701 {
702     PageFlagsNode *p;
703     bool current_tb_invalidated;
704 
705     /*
706      * Technically this isn't safe inside a signal handler.  However we
707      * know this only ever happens in a synchronous SEGV handler, so in
708      * practice it seems to be ok.
709      */
710     mmap_lock();
711 
712     p = pageflags_find(address, address);
713 
714     /* If this address was not really writable, nothing to do. */
715     if (!p || !(p->flags & PAGE_WRITE_ORG)) {
716         mmap_unlock();
717         return 0;
718     }
719 
720     current_tb_invalidated = false;
721     if (p->flags & PAGE_WRITE) {
722         /*
723          * If the page is actually marked WRITE then assume this is because
724          * this thread raced with another one which got here first and
725          * set the page to PAGE_WRITE and did the TB invalidate for us.
726          */
727 #ifdef TARGET_HAS_PRECISE_SMC
728         TranslationBlock *current_tb = tcg_tb_lookup(pc);
729         if (current_tb) {
730             current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID;
731         }
732 #endif
733     } else {
734         int host_page_size = qemu_real_host_page_size();
735         target_ulong start, len, i;
736         int prot;
737 
738         if (host_page_size <= TARGET_PAGE_SIZE) {
739             start = address & TARGET_PAGE_MASK;
740             len = TARGET_PAGE_SIZE;
741             prot = p->flags | PAGE_WRITE;
742             pageflags_set_clear(start, start + len - 1, PAGE_WRITE, 0);
743             current_tb_invalidated = tb_invalidate_phys_page_unwind(start, pc);
744         } else {
745             start = address & -host_page_size;
746             len = host_page_size;
747             prot = 0;
748 
749             for (i = 0; i < len; i += TARGET_PAGE_SIZE) {
750                 target_ulong addr = start + i;
751 
752                 p = pageflags_find(addr, addr);
753                 if (p) {
754                     prot |= p->flags;
755                     if (p->flags & PAGE_WRITE_ORG) {
756                         prot |= PAGE_WRITE;
757                         pageflags_set_clear(addr, addr + TARGET_PAGE_SIZE - 1,
758                                             PAGE_WRITE, 0);
759                     }
760                 }
761                 /*
762                  * Since the content will be modified, we must invalidate
763                  * the corresponding translated code.
764                  */
765                 current_tb_invalidated |=
766                     tb_invalidate_phys_page_unwind(addr, pc);
767             }
768         }
769         if (prot & PAGE_EXEC) {
770             prot = (prot & ~PAGE_EXEC) | PAGE_READ;
771         }
772         mprotect((void *)g2h_untagged(start), len, prot & PAGE_RWX);
773     }
774     mmap_unlock();
775 
776     /* If current TB was invalidated return to main loop */
777     return current_tb_invalidated ? 2 : 1;
778 }
779 
780 static int probe_access_internal(CPUArchState *env, vaddr addr,
781                                  int fault_size, MMUAccessType access_type,
782                                  bool nonfault, uintptr_t ra)
783 {
784     int acc_flag;
785     bool maperr;
786 
787     switch (access_type) {
788     case MMU_DATA_STORE:
789         acc_flag = PAGE_WRITE_ORG;
790         break;
791     case MMU_DATA_LOAD:
792         acc_flag = PAGE_READ;
793         break;
794     case MMU_INST_FETCH:
795         acc_flag = PAGE_EXEC;
796         break;
797     default:
798         g_assert_not_reached();
799     }
800 
801     if (guest_addr_valid_untagged(addr)) {
802         int page_flags = page_get_flags(addr);
803         if (page_flags & acc_flag) {
804             if (access_type != MMU_INST_FETCH
805                 && cpu_plugin_mem_cbs_enabled(env_cpu(env))) {
806                 return TLB_MMIO;
807             }
808             return 0; /* success */
809         }
810         maperr = !(page_flags & PAGE_VALID);
811     } else {
812         maperr = true;
813     }
814 
815     if (nonfault) {
816         return TLB_INVALID_MASK;
817     }
818 
819     cpu_loop_exit_sigsegv(env_cpu(env), addr, access_type, maperr, ra);
820 }
821 
822 int probe_access_flags(CPUArchState *env, vaddr addr, int size,
823                        MMUAccessType access_type, int mmu_idx,
824                        bool nonfault, void **phost, uintptr_t ra)
825 {
826     int flags;
827 
828     g_assert(-(addr | TARGET_PAGE_MASK) >= size);
829     flags = probe_access_internal(env, addr, size, access_type, nonfault, ra);
830     *phost = (flags & TLB_INVALID_MASK) ? NULL : g2h(env_cpu(env), addr);
831     return flags;
832 }
833 
834 void *probe_access(CPUArchState *env, vaddr addr, int size,
835                    MMUAccessType access_type, int mmu_idx, uintptr_t ra)
836 {
837     int flags;
838 
839     g_assert(-(addr | TARGET_PAGE_MASK) >= size);
840     flags = probe_access_internal(env, addr, size, access_type, false, ra);
841     g_assert((flags & ~TLB_MMIO) == 0);
842 
843     return size ? g2h(env_cpu(env), addr) : NULL;
844 }
845 
846 tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, vaddr addr,
847                                         void **hostp)
848 {
849     int flags;
850 
851     flags = probe_access_internal(env, addr, 1, MMU_INST_FETCH, false, 0);
852     g_assert(flags == 0);
853 
854     if (hostp) {
855         *hostp = g2h_untagged(addr);
856     }
857     return addr;
858 }
859 
860 #ifdef TARGET_PAGE_DATA_SIZE
861 /*
862  * Allocate chunks of target data together.  For the only current user,
863  * if we allocate one hunk per page, we have overhead of 40/128 or 40%.
864  * Therefore, allocate memory for 64 pages at a time for overhead < 1%.
865  */
866 #define TPD_PAGES  64
867 #define TBD_MASK   (TARGET_PAGE_MASK * TPD_PAGES)
868 
869 typedef struct TargetPageDataNode {
870     struct rcu_head rcu;
871     IntervalTreeNode itree;
872     char data[] __attribute__((aligned));
873 } TargetPageDataNode;
874 
875 static IntervalTreeRoot targetdata_root;
876 
877 void page_reset_target_data(target_ulong start, target_ulong last)
878 {
879     IntervalTreeNode *n, *next;
880 
881     assert_memory_lock();
882 
883     start &= TARGET_PAGE_MASK;
884     last |= ~TARGET_PAGE_MASK;
885 
886     for (n = interval_tree_iter_first(&targetdata_root, start, last),
887          next = n ? interval_tree_iter_next(n, start, last) : NULL;
888          n != NULL;
889          n = next,
890          next = next ? interval_tree_iter_next(n, start, last) : NULL) {
891         target_ulong n_start, n_last, p_ofs, p_len;
892         TargetPageDataNode *t = container_of(n, TargetPageDataNode, itree);
893 
894         if (n->start >= start && n->last <= last) {
895             interval_tree_remove(n, &targetdata_root);
896             g_free_rcu(t, rcu);
897             continue;
898         }
899 
900         if (n->start < start) {
901             n_start = start;
902             p_ofs = (start - n->start) >> TARGET_PAGE_BITS;
903         } else {
904             n_start = n->start;
905             p_ofs = 0;
906         }
907         n_last = MIN(last, n->last);
908         p_len = (n_last + 1 - n_start) >> TARGET_PAGE_BITS;
909 
910         memset(t->data + p_ofs * TARGET_PAGE_DATA_SIZE, 0,
911                p_len * TARGET_PAGE_DATA_SIZE);
912     }
913 }
914 
915 void *page_get_target_data(target_ulong address)
916 {
917     IntervalTreeNode *n;
918     TargetPageDataNode *t;
919     target_ulong page, region, p_ofs;
920 
921     page = address & TARGET_PAGE_MASK;
922     region = address & TBD_MASK;
923 
924     n = interval_tree_iter_first(&targetdata_root, page, page);
925     if (!n) {
926         /*
927          * See util/interval-tree.c re lockless lookups: no false positives
928          * but there are false negatives.  If we find nothing, retry with
929          * the mmap lock acquired.  We also need the lock for the
930          * allocation + insert.
931          */
932         mmap_lock();
933         n = interval_tree_iter_first(&targetdata_root, page, page);
934         if (!n) {
935             t = g_malloc0(sizeof(TargetPageDataNode)
936                           + TPD_PAGES * TARGET_PAGE_DATA_SIZE);
937             n = &t->itree;
938             n->start = region;
939             n->last = region | ~TBD_MASK;
940             interval_tree_insert(n, &targetdata_root);
941         }
942         mmap_unlock();
943     }
944 
945     t = container_of(n, TargetPageDataNode, itree);
946     p_ofs = (page - region) >> TARGET_PAGE_BITS;
947     return t->data + p_ofs * TARGET_PAGE_DATA_SIZE;
948 }
949 #else
950 void page_reset_target_data(target_ulong start, target_ulong last) { }
951 #endif /* TARGET_PAGE_DATA_SIZE */
952 
953 /* The system-mode versions of these helpers are in cputlb.c.  */
954 
955 static void *cpu_mmu_lookup(CPUState *cpu, vaddr addr,
956                             MemOp mop, uintptr_t ra, MMUAccessType type)
957 {
958     int a_bits = memop_alignment_bits(mop);
959     void *ret;
960 
961     /* Enforce guest required alignment.  */
962     if (unlikely(addr & ((1 << a_bits) - 1))) {
963         cpu_loop_exit_sigbus(cpu, addr, type, ra);
964     }
965 
966     ret = g2h(cpu, addr);
967     set_helper_retaddr(ra);
968     return ret;
969 }
970 
971 #include "ldst_atomicity.c.inc"
972 
973 static uint8_t do_ld1_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
974                           uintptr_t ra, MMUAccessType access_type)
975 {
976     void *haddr;
977     uint8_t ret;
978 
979     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
980     haddr = cpu_mmu_lookup(cpu, addr, get_memop(oi), ra, access_type);
981     ret = ldub_p(haddr);
982     clear_helper_retaddr();
983     return ret;
984 }
985 
986 static uint16_t do_ld2_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
987                            uintptr_t ra, MMUAccessType access_type)
988 {
989     void *haddr;
990     uint16_t ret;
991     MemOp mop = get_memop(oi);
992 
993     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
994     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type);
995     ret = load_atom_2(cpu, ra, haddr, mop);
996     clear_helper_retaddr();
997 
998     if (mop & MO_BSWAP) {
999         ret = bswap16(ret);
1000     }
1001     return ret;
1002 }
1003 
1004 static uint32_t do_ld4_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
1005                            uintptr_t ra, MMUAccessType access_type)
1006 {
1007     void *haddr;
1008     uint32_t ret;
1009     MemOp mop = get_memop(oi);
1010 
1011     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
1012     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type);
1013     ret = load_atom_4(cpu, ra, haddr, mop);
1014     clear_helper_retaddr();
1015 
1016     if (mop & MO_BSWAP) {
1017         ret = bswap32(ret);
1018     }
1019     return ret;
1020 }
1021 
1022 static uint64_t do_ld8_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
1023                            uintptr_t ra, MMUAccessType access_type)
1024 {
1025     void *haddr;
1026     uint64_t ret;
1027     MemOp mop = get_memop(oi);
1028 
1029     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
1030     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type);
1031     ret = load_atom_8(cpu, ra, haddr, mop);
1032     clear_helper_retaddr();
1033 
1034     if (mop & MO_BSWAP) {
1035         ret = bswap64(ret);
1036     }
1037     return ret;
1038 }
1039 
1040 static Int128 do_ld16_mmu(CPUState *cpu, abi_ptr addr,
1041                           MemOpIdx oi, uintptr_t ra)
1042 {
1043     void *haddr;
1044     Int128 ret;
1045     MemOp mop = get_memop(oi);
1046 
1047     tcg_debug_assert((mop & MO_SIZE) == MO_128);
1048     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
1049     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_LOAD);
1050     ret = load_atom_16(cpu, ra, haddr, mop);
1051     clear_helper_retaddr();
1052 
1053     if (mop & MO_BSWAP) {
1054         ret = bswap128(ret);
1055     }
1056     return ret;
1057 }
1058 
1059 static void do_st1_mmu(CPUState *cpu, vaddr addr, uint8_t val,
1060                        MemOpIdx oi, uintptr_t ra)
1061 {
1062     void *haddr;
1063 
1064     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
1065     haddr = cpu_mmu_lookup(cpu, addr, get_memop(oi), ra, MMU_DATA_STORE);
1066     stb_p(haddr, val);
1067     clear_helper_retaddr();
1068 }
1069 
1070 static void do_st2_mmu(CPUState *cpu, vaddr addr, uint16_t val,
1071                        MemOpIdx oi, uintptr_t ra)
1072 {
1073     void *haddr;
1074     MemOp mop = get_memop(oi);
1075 
1076     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
1077     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1078 
1079     if (mop & MO_BSWAP) {
1080         val = bswap16(val);
1081     }
1082     store_atom_2(cpu, ra, haddr, mop, val);
1083     clear_helper_retaddr();
1084 }
1085 
1086 static void do_st4_mmu(CPUState *cpu, vaddr addr, uint32_t val,
1087                        MemOpIdx oi, uintptr_t ra)
1088 {
1089     void *haddr;
1090     MemOp mop = get_memop(oi);
1091 
1092     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
1093     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1094 
1095     if (mop & MO_BSWAP) {
1096         val = bswap32(val);
1097     }
1098     store_atom_4(cpu, ra, haddr, mop, val);
1099     clear_helper_retaddr();
1100 }
1101 
1102 static void do_st8_mmu(CPUState *cpu, vaddr addr, uint64_t val,
1103                        MemOpIdx oi, uintptr_t ra)
1104 {
1105     void *haddr;
1106     MemOp mop = get_memop(oi);
1107 
1108     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
1109     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1110 
1111     if (mop & MO_BSWAP) {
1112         val = bswap64(val);
1113     }
1114     store_atom_8(cpu, ra, haddr, mop, val);
1115     clear_helper_retaddr();
1116 }
1117 
1118 static void do_st16_mmu(CPUState *cpu, vaddr addr, Int128 val,
1119                         MemOpIdx oi, uintptr_t ra)
1120 {
1121     void *haddr;
1122     MemOpIdx mop = get_memop(oi);
1123 
1124     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
1125     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1126 
1127     if (mop & MO_BSWAP) {
1128         val = bswap128(val);
1129     }
1130     store_atom_16(cpu, ra, haddr, mop, val);
1131     clear_helper_retaddr();
1132 }
1133 
1134 uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr ptr)
1135 {
1136     uint32_t ret;
1137 
1138     set_helper_retaddr(1);
1139     ret = ldub_p(g2h_untagged(ptr));
1140     clear_helper_retaddr();
1141     return ret;
1142 }
1143 
1144 uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr ptr)
1145 {
1146     uint32_t ret;
1147 
1148     set_helper_retaddr(1);
1149     ret = lduw_p(g2h_untagged(ptr));
1150     clear_helper_retaddr();
1151     return ret;
1152 }
1153 
1154 uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr ptr)
1155 {
1156     uint32_t ret;
1157 
1158     set_helper_retaddr(1);
1159     ret = ldl_p(g2h_untagged(ptr));
1160     clear_helper_retaddr();
1161     return ret;
1162 }
1163 
1164 uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr ptr)
1165 {
1166     uint64_t ret;
1167 
1168     set_helper_retaddr(1);
1169     ret = ldq_p(g2h_untagged(ptr));
1170     clear_helper_retaddr();
1171     return ret;
1172 }
1173 
1174 uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr,
1175                          MemOpIdx oi, uintptr_t ra)
1176 {
1177     void *haddr;
1178     uint8_t ret;
1179 
1180     haddr = cpu_mmu_lookup(env_cpu(env), addr, oi, ra, MMU_INST_FETCH);
1181     ret = ldub_p(haddr);
1182     clear_helper_retaddr();
1183     return ret;
1184 }
1185 
1186 uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr,
1187                           MemOpIdx oi, uintptr_t ra)
1188 {
1189     void *haddr;
1190     uint16_t ret;
1191 
1192     haddr = cpu_mmu_lookup(env_cpu(env), addr, oi, ra, MMU_INST_FETCH);
1193     ret = lduw_p(haddr);
1194     clear_helper_retaddr();
1195     if (get_memop(oi) & MO_BSWAP) {
1196         ret = bswap16(ret);
1197     }
1198     return ret;
1199 }
1200 
1201 uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr,
1202                           MemOpIdx oi, uintptr_t ra)
1203 {
1204     void *haddr;
1205     uint32_t ret;
1206 
1207     haddr = cpu_mmu_lookup(env_cpu(env), addr, oi, ra, MMU_INST_FETCH);
1208     ret = ldl_p(haddr);
1209     clear_helper_retaddr();
1210     if (get_memop(oi) & MO_BSWAP) {
1211         ret = bswap32(ret);
1212     }
1213     return ret;
1214 }
1215 
1216 uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
1217                           MemOpIdx oi, uintptr_t ra)
1218 {
1219     void *haddr;
1220     uint64_t ret;
1221 
1222     haddr = cpu_mmu_lookup(env_cpu(env), addr, oi, ra, MMU_DATA_LOAD);
1223     ret = ldq_p(haddr);
1224     clear_helper_retaddr();
1225     if (get_memop(oi) & MO_BSWAP) {
1226         ret = bswap64(ret);
1227     }
1228     return ret;
1229 }
1230 
1231 #include "ldst_common.c.inc"
1232 
1233 /*
1234  * Do not allow unaligned operations to proceed.  Return the host address.
1235  */
1236 static void *atomic_mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi,
1237                                int size, uintptr_t retaddr)
1238 {
1239     MemOp mop = get_memop(oi);
1240     int a_bits = memop_alignment_bits(mop);
1241     void *ret;
1242 
1243     /* Enforce guest required alignment.  */
1244     if (unlikely(addr & ((1 << a_bits) - 1))) {
1245         cpu_loop_exit_sigbus(cpu, addr, MMU_DATA_STORE, retaddr);
1246     }
1247 
1248     /* Enforce qemu required alignment.  */
1249     if (unlikely(addr & (size - 1))) {
1250         cpu_loop_exit_atomic(cpu, retaddr);
1251     }
1252 
1253     ret = g2h(cpu, addr);
1254     set_helper_retaddr(retaddr);
1255     return ret;
1256 }
1257 
1258 #include "atomic_common.c.inc"
1259 
1260 /*
1261  * First set of functions passes in OI and RETADDR.
1262  * This makes them callable from other helpers.
1263  */
1264 
1265 #define ATOMIC_NAME(X) \
1266     glue(glue(glue(cpu_atomic_ ## X, SUFFIX), END), _mmu)
1267 #define ATOMIC_MMU_CLEANUP do { clear_helper_retaddr(); } while (0)
1268 
1269 #define DATA_SIZE 1
1270 #include "atomic_template.h"
1271 
1272 #define DATA_SIZE 2
1273 #include "atomic_template.h"
1274 
1275 #define DATA_SIZE 4
1276 #include "atomic_template.h"
1277 
1278 #ifdef CONFIG_ATOMIC64
1279 #define DATA_SIZE 8
1280 #include "atomic_template.h"
1281 #endif
1282 
1283 #if defined(CONFIG_ATOMIC128) || HAVE_CMPXCHG128
1284 #define DATA_SIZE 16
1285 #include "atomic_template.h"
1286 #endif
1287