xref: /qemu/accel/tcg/user-exec.c (revision 513823e7521a09ed7ad1e32e6454bac3b2cbf52d)
1 /*
2  *  User emulator execution
3  *
4  *  Copyright (c) 2003-2005 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 #include "hw/core/tcg-cpu-ops.h"
21 #include "disas/disas.h"
22 #include "exec/exec-all.h"
23 #include "tcg/tcg.h"
24 #include "qemu/bitops.h"
25 #include "qemu/rcu.h"
26 #include "exec/cpu_ldst.h"
27 #include "user/cpu_loop.h"
28 #include "qemu/main-loop.h"
29 #include "user/page-protection.h"
30 #include "exec/page-protection.h"
31 #include "exec/helper-proto.h"
32 #include "qemu/atomic128.h"
33 #include "trace.h"
34 #include "tcg/tcg-ldst.h"
35 #include "internal-common.h"
36 #include "internal-target.h"
37 #include "tb-internal.h"
38 
39 __thread uintptr_t helper_retaddr;
40 
41 //#define DEBUG_SIGNAL
42 
43 void cpu_interrupt(CPUState *cpu, int mask)
44 {
45     g_assert(bql_locked());
46     cpu->interrupt_request |= mask;
47     qatomic_set(&cpu->neg.icount_decr.u16.high, -1);
48 }
49 
50 /*
51  * Adjust the pc to pass to cpu_restore_state; return the memop type.
52  */
53 MMUAccessType adjust_signal_pc(uintptr_t *pc, bool is_write)
54 {
55     switch (helper_retaddr) {
56     default:
57         /*
58          * Fault during host memory operation within a helper function.
59          * The helper's host return address, saved here, gives us a
60          * pointer into the generated code that will unwind to the
61          * correct guest pc.
62          */
63         *pc = helper_retaddr;
64         break;
65 
66     case 0:
67         /*
68          * Fault during host memory operation within generated code.
69          * (Or, a unrelated bug within qemu, but we can't tell from here).
70          *
71          * We take the host pc from the signal frame.  However, we cannot
72          * use that value directly.  Within cpu_restore_state_from_tb, we
73          * assume PC comes from GETPC(), as used by the helper functions,
74          * so we adjust the address by -GETPC_ADJ to form an address that
75          * is within the call insn, so that the address does not accidentally
76          * match the beginning of the next guest insn.  However, when the
77          * pc comes from the signal frame it points to the actual faulting
78          * host memory insn and not the return from a call insn.
79          *
80          * Therefore, adjust to compensate for what will be done later
81          * by cpu_restore_state_from_tb.
82          */
83         *pc += GETPC_ADJ;
84         break;
85 
86     case 1:
87         /*
88          * Fault during host read for translation, or loosely, "execution".
89          *
90          * The guest pc is already pointing to the start of the TB for which
91          * code is being generated.  If the guest translator manages the
92          * page crossings correctly, this is exactly the correct address
93          * (and if the translator doesn't handle page boundaries correctly
94          * there's little we can do about that here).  Therefore, do not
95          * trigger the unwinder.
96          */
97         *pc = 0;
98         return MMU_INST_FETCH;
99     }
100 
101     return is_write ? MMU_DATA_STORE : MMU_DATA_LOAD;
102 }
103 
104 /**
105  * handle_sigsegv_accerr_write:
106  * @cpu: the cpu context
107  * @old_set: the sigset_t from the signal ucontext_t
108  * @host_pc: the host pc, adjusted for the signal
109  * @guest_addr: the guest address of the fault
110  *
111  * Return true if the write fault has been handled, and should be re-tried.
112  *
113  * Note that it is important that we don't call page_unprotect() unless
114  * this is really a "write to nonwritable page" fault, because
115  * page_unprotect() assumes that if it is called for an access to
116  * a page that's writable this means we had two threads racing and
117  * another thread got there first and already made the page writable;
118  * so we will retry the access. If we were to call page_unprotect()
119  * for some other kind of fault that should really be passed to the
120  * guest, we'd end up in an infinite loop of retrying the faulting access.
121  */
122 bool handle_sigsegv_accerr_write(CPUState *cpu, sigset_t *old_set,
123                                  uintptr_t host_pc, abi_ptr guest_addr)
124 {
125     switch (page_unprotect(guest_addr, host_pc)) {
126     case 0:
127         /*
128          * Fault not caused by a page marked unwritable to protect
129          * cached translations, must be the guest binary's problem.
130          */
131         return false;
132     case 1:
133         /*
134          * Fault caused by protection of cached translation; TBs
135          * invalidated, so resume execution.
136          */
137         return true;
138     case 2:
139         /*
140          * Fault caused by protection of cached translation, and the
141          * currently executing TB was modified and must be exited immediately.
142          */
143         sigprocmask(SIG_SETMASK, old_set, NULL);
144         cpu_loop_exit_noexc(cpu);
145         /* NORETURN */
146     default:
147         g_assert_not_reached();
148     }
149 }
150 
151 typedef struct PageFlagsNode {
152     struct rcu_head rcu;
153     IntervalTreeNode itree;
154     int flags;
155 } PageFlagsNode;
156 
157 static IntervalTreeRoot pageflags_root;
158 
159 static PageFlagsNode *pageflags_find(target_ulong start, target_ulong last)
160 {
161     IntervalTreeNode *n;
162 
163     n = interval_tree_iter_first(&pageflags_root, start, last);
164     return n ? container_of(n, PageFlagsNode, itree) : NULL;
165 }
166 
167 static PageFlagsNode *pageflags_next(PageFlagsNode *p, target_ulong start,
168                                      target_ulong last)
169 {
170     IntervalTreeNode *n;
171 
172     n = interval_tree_iter_next(&p->itree, start, last);
173     return n ? container_of(n, PageFlagsNode, itree) : NULL;
174 }
175 
176 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
177 {
178     IntervalTreeNode *n;
179     int rc = 0;
180 
181     mmap_lock();
182     for (n = interval_tree_iter_first(&pageflags_root, 0, -1);
183          n != NULL;
184          n = interval_tree_iter_next(n, 0, -1)) {
185         PageFlagsNode *p = container_of(n, PageFlagsNode, itree);
186 
187         rc = fn(priv, n->start, n->last + 1, p->flags);
188         if (rc != 0) {
189             break;
190         }
191     }
192     mmap_unlock();
193 
194     return rc;
195 }
196 
197 static int dump_region(void *priv, target_ulong start,
198                        target_ulong end, unsigned long prot)
199 {
200     FILE *f = (FILE *)priv;
201 
202     fprintf(f, TARGET_FMT_lx"-"TARGET_FMT_lx" "TARGET_FMT_lx" %c%c%c\n",
203             start, end, end - start,
204             ((prot & PAGE_READ) ? 'r' : '-'),
205             ((prot & PAGE_WRITE) ? 'w' : '-'),
206             ((prot & PAGE_EXEC) ? 'x' : '-'));
207     return 0;
208 }
209 
210 /* dump memory mappings */
211 void page_dump(FILE *f)
212 {
213     const int length = sizeof(target_ulong) * 2;
214 
215     fprintf(f, "%-*s %-*s %-*s %s\n",
216             length, "start", length, "end", length, "size", "prot");
217     walk_memory_regions(f, dump_region);
218 }
219 
220 int page_get_flags(target_ulong address)
221 {
222     PageFlagsNode *p = pageflags_find(address, address);
223 
224     /*
225      * See util/interval-tree.c re lockless lookups: no false positives but
226      * there are false negatives.  If we find nothing, retry with the mmap
227      * lock acquired.
228      */
229     if (p) {
230         return p->flags;
231     }
232     if (have_mmap_lock()) {
233         return 0;
234     }
235 
236     mmap_lock();
237     p = pageflags_find(address, address);
238     mmap_unlock();
239     return p ? p->flags : 0;
240 }
241 
242 /* A subroutine of page_set_flags: insert a new node for [start,last]. */
243 static void pageflags_create(target_ulong start, target_ulong last, int flags)
244 {
245     PageFlagsNode *p = g_new(PageFlagsNode, 1);
246 
247     p->itree.start = start;
248     p->itree.last = last;
249     p->flags = flags;
250     interval_tree_insert(&p->itree, &pageflags_root);
251 }
252 
253 /* A subroutine of page_set_flags: remove everything in [start,last]. */
254 static bool pageflags_unset(target_ulong start, target_ulong last)
255 {
256     bool inval_tb = false;
257 
258     while (true) {
259         PageFlagsNode *p = pageflags_find(start, last);
260         target_ulong p_last;
261 
262         if (!p) {
263             break;
264         }
265 
266         if (p->flags & PAGE_EXEC) {
267             inval_tb = true;
268         }
269 
270         interval_tree_remove(&p->itree, &pageflags_root);
271         p_last = p->itree.last;
272 
273         if (p->itree.start < start) {
274             /* Truncate the node from the end, or split out the middle. */
275             p->itree.last = start - 1;
276             interval_tree_insert(&p->itree, &pageflags_root);
277             if (last < p_last) {
278                 pageflags_create(last + 1, p_last, p->flags);
279                 break;
280             }
281         } else if (p_last <= last) {
282             /* Range completely covers node -- remove it. */
283             g_free_rcu(p, rcu);
284         } else {
285             /* Truncate the node from the start. */
286             p->itree.start = last + 1;
287             interval_tree_insert(&p->itree, &pageflags_root);
288             break;
289         }
290     }
291 
292     return inval_tb;
293 }
294 
295 /*
296  * A subroutine of page_set_flags: nothing overlaps [start,last],
297  * but check adjacent mappings and maybe merge into a single range.
298  */
299 static void pageflags_create_merge(target_ulong start, target_ulong last,
300                                    int flags)
301 {
302     PageFlagsNode *next = NULL, *prev = NULL;
303 
304     if (start > 0) {
305         prev = pageflags_find(start - 1, start - 1);
306         if (prev) {
307             if (prev->flags == flags) {
308                 interval_tree_remove(&prev->itree, &pageflags_root);
309             } else {
310                 prev = NULL;
311             }
312         }
313     }
314     if (last + 1 != 0) {
315         next = pageflags_find(last + 1, last + 1);
316         if (next) {
317             if (next->flags == flags) {
318                 interval_tree_remove(&next->itree, &pageflags_root);
319             } else {
320                 next = NULL;
321             }
322         }
323     }
324 
325     if (prev) {
326         if (next) {
327             prev->itree.last = next->itree.last;
328             g_free_rcu(next, rcu);
329         } else {
330             prev->itree.last = last;
331         }
332         interval_tree_insert(&prev->itree, &pageflags_root);
333     } else if (next) {
334         next->itree.start = start;
335         interval_tree_insert(&next->itree, &pageflags_root);
336     } else {
337         pageflags_create(start, last, flags);
338     }
339 }
340 
341 /*
342  * Allow the target to decide if PAGE_TARGET_[12] may be reset.
343  * By default, they are not kept.
344  */
345 #ifndef PAGE_TARGET_STICKY
346 #define PAGE_TARGET_STICKY  0
347 #endif
348 #define PAGE_STICKY  (PAGE_ANON | PAGE_PASSTHROUGH | PAGE_TARGET_STICKY)
349 
350 /* A subroutine of page_set_flags: add flags to [start,last]. */
351 static bool pageflags_set_clear(target_ulong start, target_ulong last,
352                                 int set_flags, int clear_flags)
353 {
354     PageFlagsNode *p;
355     target_ulong p_start, p_last;
356     int p_flags, merge_flags;
357     bool inval_tb = false;
358 
359  restart:
360     p = pageflags_find(start, last);
361     if (!p) {
362         if (set_flags) {
363             pageflags_create_merge(start, last, set_flags);
364         }
365         goto done;
366     }
367 
368     p_start = p->itree.start;
369     p_last = p->itree.last;
370     p_flags = p->flags;
371     /* Using mprotect on a page does not change sticky bits. */
372     merge_flags = (p_flags & ~clear_flags) | set_flags;
373 
374     /*
375      * Need to flush if an overlapping executable region
376      * removes exec, or adds write.
377      */
378     if ((p_flags & PAGE_EXEC)
379         && (!(merge_flags & PAGE_EXEC)
380             || (merge_flags & ~p_flags & PAGE_WRITE))) {
381         inval_tb = true;
382     }
383 
384     /*
385      * If there is an exact range match, update and return without
386      * attempting to merge with adjacent regions.
387      */
388     if (start == p_start && last == p_last) {
389         if (merge_flags) {
390             p->flags = merge_flags;
391         } else {
392             interval_tree_remove(&p->itree, &pageflags_root);
393             g_free_rcu(p, rcu);
394         }
395         goto done;
396     }
397 
398     /*
399      * If sticky bits affect the original mapping, then we must be more
400      * careful about the existing intervals and the separate flags.
401      */
402     if (set_flags != merge_flags) {
403         if (p_start < start) {
404             interval_tree_remove(&p->itree, &pageflags_root);
405             p->itree.last = start - 1;
406             interval_tree_insert(&p->itree, &pageflags_root);
407 
408             if (last < p_last) {
409                 if (merge_flags) {
410                     pageflags_create(start, last, merge_flags);
411                 }
412                 pageflags_create(last + 1, p_last, p_flags);
413             } else {
414                 if (merge_flags) {
415                     pageflags_create(start, p_last, merge_flags);
416                 }
417                 if (p_last < last) {
418                     start = p_last + 1;
419                     goto restart;
420                 }
421             }
422         } else {
423             if (start < p_start && set_flags) {
424                 pageflags_create(start, p_start - 1, set_flags);
425             }
426             if (last < p_last) {
427                 interval_tree_remove(&p->itree, &pageflags_root);
428                 p->itree.start = last + 1;
429                 interval_tree_insert(&p->itree, &pageflags_root);
430                 if (merge_flags) {
431                     pageflags_create(start, last, merge_flags);
432                 }
433             } else {
434                 if (merge_flags) {
435                     p->flags = merge_flags;
436                 } else {
437                     interval_tree_remove(&p->itree, &pageflags_root);
438                     g_free_rcu(p, rcu);
439                 }
440                 if (p_last < last) {
441                     start = p_last + 1;
442                     goto restart;
443                 }
444             }
445         }
446         goto done;
447     }
448 
449     /* If flags are not changing for this range, incorporate it. */
450     if (set_flags == p_flags) {
451         if (start < p_start) {
452             interval_tree_remove(&p->itree, &pageflags_root);
453             p->itree.start = start;
454             interval_tree_insert(&p->itree, &pageflags_root);
455         }
456         if (p_last < last) {
457             start = p_last + 1;
458             goto restart;
459         }
460         goto done;
461     }
462 
463     /* Maybe split out head and/or tail ranges with the original flags. */
464     interval_tree_remove(&p->itree, &pageflags_root);
465     if (p_start < start) {
466         p->itree.last = start - 1;
467         interval_tree_insert(&p->itree, &pageflags_root);
468 
469         if (p_last < last) {
470             goto restart;
471         }
472         if (last < p_last) {
473             pageflags_create(last + 1, p_last, p_flags);
474         }
475     } else if (last < p_last) {
476         p->itree.start = last + 1;
477         interval_tree_insert(&p->itree, &pageflags_root);
478     } else {
479         g_free_rcu(p, rcu);
480         goto restart;
481     }
482     if (set_flags) {
483         pageflags_create(start, last, set_flags);
484     }
485 
486  done:
487     return inval_tb;
488 }
489 
490 void page_set_flags(target_ulong start, target_ulong last, int flags)
491 {
492     bool reset = false;
493     bool inval_tb = false;
494 
495     /* This function should never be called with addresses outside the
496        guest address space.  If this assert fires, it probably indicates
497        a missing call to h2g_valid.  */
498     assert(start <= last);
499     assert(last <= GUEST_ADDR_MAX);
500     /* Only set PAGE_ANON with new mappings. */
501     assert(!(flags & PAGE_ANON) || (flags & PAGE_RESET));
502     assert_memory_lock();
503 
504     start &= TARGET_PAGE_MASK;
505     last |= ~TARGET_PAGE_MASK;
506 
507     if (!(flags & PAGE_VALID)) {
508         flags = 0;
509     } else {
510         reset = flags & PAGE_RESET;
511         flags &= ~PAGE_RESET;
512         if (flags & PAGE_WRITE) {
513             flags |= PAGE_WRITE_ORG;
514         }
515     }
516 
517     if (!flags || reset) {
518         page_reset_target_data(start, last);
519         inval_tb |= pageflags_unset(start, last);
520     }
521     if (flags) {
522         inval_tb |= pageflags_set_clear(start, last, flags,
523                                         ~(reset ? 0 : PAGE_STICKY));
524     }
525     if (inval_tb) {
526         tb_invalidate_phys_range(start, last);
527     }
528 }
529 
530 bool page_check_range(target_ulong start, target_ulong len, int flags)
531 {
532     target_ulong last;
533     int locked;  /* tri-state: =0: unlocked, +1: global, -1: local */
534     bool ret;
535 
536     if (len == 0) {
537         return true;  /* trivial length */
538     }
539 
540     last = start + len - 1;
541     if (last < start) {
542         return false; /* wrap around */
543     }
544 
545     locked = have_mmap_lock();
546     while (true) {
547         PageFlagsNode *p = pageflags_find(start, last);
548         int missing;
549 
550         if (!p) {
551             if (!locked) {
552                 /*
553                  * Lockless lookups have false negatives.
554                  * Retry with the lock held.
555                  */
556                 mmap_lock();
557                 locked = -1;
558                 p = pageflags_find(start, last);
559             }
560             if (!p) {
561                 ret = false; /* entire region invalid */
562                 break;
563             }
564         }
565         if (start < p->itree.start) {
566             ret = false; /* initial bytes invalid */
567             break;
568         }
569 
570         missing = flags & ~p->flags;
571         if (missing & ~PAGE_WRITE) {
572             ret = false; /* page doesn't match */
573             break;
574         }
575         if (missing & PAGE_WRITE) {
576             if (!(p->flags & PAGE_WRITE_ORG)) {
577                 ret = false; /* page not writable */
578                 break;
579             }
580             /* Asking about writable, but has been protected: undo. */
581             if (!page_unprotect(start, 0)) {
582                 ret = false;
583                 break;
584             }
585             /* TODO: page_unprotect should take a range, not a single page. */
586             if (last - start < TARGET_PAGE_SIZE) {
587                 ret = true; /* ok */
588                 break;
589             }
590             start += TARGET_PAGE_SIZE;
591             continue;
592         }
593 
594         if (last <= p->itree.last) {
595             ret = true; /* ok */
596             break;
597         }
598         start = p->itree.last + 1;
599     }
600 
601     /* Release the lock if acquired locally. */
602     if (locked < 0) {
603         mmap_unlock();
604     }
605     return ret;
606 }
607 
608 bool page_check_range_empty(target_ulong start, target_ulong last)
609 {
610     assert(last >= start);
611     assert_memory_lock();
612     return pageflags_find(start, last) == NULL;
613 }
614 
615 target_ulong page_find_range_empty(target_ulong min, target_ulong max,
616                                    target_ulong len, target_ulong align)
617 {
618     target_ulong len_m1, align_m1;
619 
620     assert(min <= max);
621     assert(max <= GUEST_ADDR_MAX);
622     assert(len != 0);
623     assert(is_power_of_2(align));
624     assert_memory_lock();
625 
626     len_m1 = len - 1;
627     align_m1 = align - 1;
628 
629     /* Iteratively narrow the search region. */
630     while (1) {
631         PageFlagsNode *p;
632 
633         /* Align min and double-check there's enough space remaining. */
634         min = (min + align_m1) & ~align_m1;
635         if (min > max) {
636             return -1;
637         }
638         if (len_m1 > max - min) {
639             return -1;
640         }
641 
642         p = pageflags_find(min, min + len_m1);
643         if (p == NULL) {
644             /* Found! */
645             return min;
646         }
647         if (max <= p->itree.last) {
648             /* Existing allocation fills the remainder of the search region. */
649             return -1;
650         }
651         /* Skip across existing allocation. */
652         min = p->itree.last + 1;
653     }
654 }
655 
656 void page_protect(tb_page_addr_t address)
657 {
658     PageFlagsNode *p;
659     target_ulong start, last;
660     int host_page_size = qemu_real_host_page_size();
661     int prot;
662 
663     assert_memory_lock();
664 
665     if (host_page_size <= TARGET_PAGE_SIZE) {
666         start = address & TARGET_PAGE_MASK;
667         last = start + TARGET_PAGE_SIZE - 1;
668     } else {
669         start = address & -host_page_size;
670         last = start + host_page_size - 1;
671     }
672 
673     p = pageflags_find(start, last);
674     if (!p) {
675         return;
676     }
677     prot = p->flags;
678 
679     if (unlikely(p->itree.last < last)) {
680         /* More than one protection region covers the one host page. */
681         assert(TARGET_PAGE_SIZE < host_page_size);
682         while ((p = pageflags_next(p, start, last)) != NULL) {
683             prot |= p->flags;
684         }
685     }
686 
687     if (prot & PAGE_WRITE) {
688         pageflags_set_clear(start, last, 0, PAGE_WRITE);
689         mprotect(g2h_untagged(start), last - start + 1,
690                  prot & (PAGE_READ | PAGE_EXEC) ? PROT_READ : PROT_NONE);
691     }
692 }
693 
694 /*
695  * Called from signal handler: invalidate the code and unprotect the
696  * page. Return 0 if the fault was not handled, 1 if it was handled,
697  * and 2 if it was handled but the caller must cause the TB to be
698  * immediately exited. (We can only return 2 if the 'pc' argument is
699  * non-zero.)
700  */
701 int page_unprotect(tb_page_addr_t address, uintptr_t pc)
702 {
703     PageFlagsNode *p;
704     bool current_tb_invalidated;
705 
706     /*
707      * Technically this isn't safe inside a signal handler.  However we
708      * know this only ever happens in a synchronous SEGV handler, so in
709      * practice it seems to be ok.
710      */
711     mmap_lock();
712 
713     p = pageflags_find(address, address);
714 
715     /* If this address was not really writable, nothing to do. */
716     if (!p || !(p->flags & PAGE_WRITE_ORG)) {
717         mmap_unlock();
718         return 0;
719     }
720 
721     current_tb_invalidated = false;
722     if (p->flags & PAGE_WRITE) {
723         /*
724          * If the page is actually marked WRITE then assume this is because
725          * this thread raced with another one which got here first and
726          * set the page to PAGE_WRITE and did the TB invalidate for us.
727          */
728 #ifdef TARGET_HAS_PRECISE_SMC
729         TranslationBlock *current_tb = tcg_tb_lookup(pc);
730         if (current_tb) {
731             current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID;
732         }
733 #endif
734     } else {
735         int host_page_size = qemu_real_host_page_size();
736         target_ulong start, len, i;
737         int prot;
738 
739         if (host_page_size <= TARGET_PAGE_SIZE) {
740             start = address & TARGET_PAGE_MASK;
741             len = TARGET_PAGE_SIZE;
742             prot = p->flags | PAGE_WRITE;
743             pageflags_set_clear(start, start + len - 1, PAGE_WRITE, 0);
744             current_tb_invalidated = tb_invalidate_phys_page_unwind(start, pc);
745         } else {
746             start = address & -host_page_size;
747             len = host_page_size;
748             prot = 0;
749 
750             for (i = 0; i < len; i += TARGET_PAGE_SIZE) {
751                 target_ulong addr = start + i;
752 
753                 p = pageflags_find(addr, addr);
754                 if (p) {
755                     prot |= p->flags;
756                     if (p->flags & PAGE_WRITE_ORG) {
757                         prot |= PAGE_WRITE;
758                         pageflags_set_clear(addr, addr + TARGET_PAGE_SIZE - 1,
759                                             PAGE_WRITE, 0);
760                     }
761                 }
762                 /*
763                  * Since the content will be modified, we must invalidate
764                  * the corresponding translated code.
765                  */
766                 current_tb_invalidated |=
767                     tb_invalidate_phys_page_unwind(addr, pc);
768             }
769         }
770         if (prot & PAGE_EXEC) {
771             prot = (prot & ~PAGE_EXEC) | PAGE_READ;
772         }
773         mprotect((void *)g2h_untagged(start), len, prot & PAGE_RWX);
774     }
775     mmap_unlock();
776 
777     /* If current TB was invalidated return to main loop */
778     return current_tb_invalidated ? 2 : 1;
779 }
780 
781 static int probe_access_internal(CPUArchState *env, vaddr addr,
782                                  int fault_size, MMUAccessType access_type,
783                                  bool nonfault, uintptr_t ra)
784 {
785     int acc_flag;
786     bool maperr;
787 
788     switch (access_type) {
789     case MMU_DATA_STORE:
790         acc_flag = PAGE_WRITE_ORG;
791         break;
792     case MMU_DATA_LOAD:
793         acc_flag = PAGE_READ;
794         break;
795     case MMU_INST_FETCH:
796         acc_flag = PAGE_EXEC;
797         break;
798     default:
799         g_assert_not_reached();
800     }
801 
802     if (guest_addr_valid_untagged(addr)) {
803         int page_flags = page_get_flags(addr);
804         if (page_flags & acc_flag) {
805             if (access_type != MMU_INST_FETCH
806                 && cpu_plugin_mem_cbs_enabled(env_cpu(env))) {
807                 return TLB_MMIO;
808             }
809             return 0; /* success */
810         }
811         maperr = !(page_flags & PAGE_VALID);
812     } else {
813         maperr = true;
814     }
815 
816     if (nonfault) {
817         return TLB_INVALID_MASK;
818     }
819 
820     cpu_loop_exit_sigsegv(env_cpu(env), addr, access_type, maperr, ra);
821 }
822 
823 int probe_access_flags(CPUArchState *env, vaddr addr, int size,
824                        MMUAccessType access_type, int mmu_idx,
825                        bool nonfault, void **phost, uintptr_t ra)
826 {
827     int flags;
828 
829     g_assert(-(addr | TARGET_PAGE_MASK) >= size);
830     flags = probe_access_internal(env, addr, size, access_type, nonfault, ra);
831     *phost = (flags & TLB_INVALID_MASK) ? NULL : g2h(env_cpu(env), addr);
832     return flags;
833 }
834 
835 void *probe_access(CPUArchState *env, vaddr addr, int size,
836                    MMUAccessType access_type, int mmu_idx, uintptr_t ra)
837 {
838     int flags;
839 
840     g_assert(-(addr | TARGET_PAGE_MASK) >= size);
841     flags = probe_access_internal(env, addr, size, access_type, false, ra);
842     g_assert((flags & ~TLB_MMIO) == 0);
843 
844     return size ? g2h(env_cpu(env), addr) : NULL;
845 }
846 
847 tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, vaddr addr,
848                                         void **hostp)
849 {
850     int flags;
851 
852     flags = probe_access_internal(env, addr, 1, MMU_INST_FETCH, false, 0);
853     g_assert(flags == 0);
854 
855     if (hostp) {
856         *hostp = g2h_untagged(addr);
857     }
858     return addr;
859 }
860 
861 #ifdef TARGET_PAGE_DATA_SIZE
862 /*
863  * Allocate chunks of target data together.  For the only current user,
864  * if we allocate one hunk per page, we have overhead of 40/128 or 40%.
865  * Therefore, allocate memory for 64 pages at a time for overhead < 1%.
866  */
867 #define TPD_PAGES  64
868 #define TBD_MASK   (TARGET_PAGE_MASK * TPD_PAGES)
869 
870 typedef struct TargetPageDataNode {
871     struct rcu_head rcu;
872     IntervalTreeNode itree;
873     char data[] __attribute__((aligned));
874 } TargetPageDataNode;
875 
876 static IntervalTreeRoot targetdata_root;
877 
878 void page_reset_target_data(target_ulong start, target_ulong last)
879 {
880     IntervalTreeNode *n, *next;
881 
882     assert_memory_lock();
883 
884     start &= TARGET_PAGE_MASK;
885     last |= ~TARGET_PAGE_MASK;
886 
887     for (n = interval_tree_iter_first(&targetdata_root, start, last),
888          next = n ? interval_tree_iter_next(n, start, last) : NULL;
889          n != NULL;
890          n = next,
891          next = next ? interval_tree_iter_next(n, start, last) : NULL) {
892         target_ulong n_start, n_last, p_ofs, p_len;
893         TargetPageDataNode *t = container_of(n, TargetPageDataNode, itree);
894 
895         if (n->start >= start && n->last <= last) {
896             interval_tree_remove(n, &targetdata_root);
897             g_free_rcu(t, rcu);
898             continue;
899         }
900 
901         if (n->start < start) {
902             n_start = start;
903             p_ofs = (start - n->start) >> TARGET_PAGE_BITS;
904         } else {
905             n_start = n->start;
906             p_ofs = 0;
907         }
908         n_last = MIN(last, n->last);
909         p_len = (n_last + 1 - n_start) >> TARGET_PAGE_BITS;
910 
911         memset(t->data + p_ofs * TARGET_PAGE_DATA_SIZE, 0,
912                p_len * TARGET_PAGE_DATA_SIZE);
913     }
914 }
915 
916 void *page_get_target_data(target_ulong address)
917 {
918     IntervalTreeNode *n;
919     TargetPageDataNode *t;
920     target_ulong page, region, p_ofs;
921 
922     page = address & TARGET_PAGE_MASK;
923     region = address & TBD_MASK;
924 
925     n = interval_tree_iter_first(&targetdata_root, page, page);
926     if (!n) {
927         /*
928          * See util/interval-tree.c re lockless lookups: no false positives
929          * but there are false negatives.  If we find nothing, retry with
930          * the mmap lock acquired.  We also need the lock for the
931          * allocation + insert.
932          */
933         mmap_lock();
934         n = interval_tree_iter_first(&targetdata_root, page, page);
935         if (!n) {
936             t = g_malloc0(sizeof(TargetPageDataNode)
937                           + TPD_PAGES * TARGET_PAGE_DATA_SIZE);
938             n = &t->itree;
939             n->start = region;
940             n->last = region | ~TBD_MASK;
941             interval_tree_insert(n, &targetdata_root);
942         }
943         mmap_unlock();
944     }
945 
946     t = container_of(n, TargetPageDataNode, itree);
947     p_ofs = (page - region) >> TARGET_PAGE_BITS;
948     return t->data + p_ofs * TARGET_PAGE_DATA_SIZE;
949 }
950 #else
951 void page_reset_target_data(target_ulong start, target_ulong last) { }
952 #endif /* TARGET_PAGE_DATA_SIZE */
953 
954 /* The system-mode versions of these helpers are in cputlb.c.  */
955 
956 static void *cpu_mmu_lookup(CPUState *cpu, vaddr addr,
957                             MemOp mop, uintptr_t ra, MMUAccessType type)
958 {
959     int a_bits = memop_alignment_bits(mop);
960     void *ret;
961 
962     /* Enforce guest required alignment.  */
963     if (unlikely(addr & ((1 << a_bits) - 1))) {
964         cpu_loop_exit_sigbus(cpu, addr, type, ra);
965     }
966 
967     ret = g2h(cpu, addr);
968     set_helper_retaddr(ra);
969     return ret;
970 }
971 
972 #include "ldst_atomicity.c.inc"
973 
974 static uint8_t do_ld1_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
975                           uintptr_t ra, MMUAccessType access_type)
976 {
977     void *haddr;
978     uint8_t ret;
979 
980     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
981     haddr = cpu_mmu_lookup(cpu, addr, get_memop(oi), ra, access_type);
982     ret = ldub_p(haddr);
983     clear_helper_retaddr();
984     return ret;
985 }
986 
987 static uint16_t do_ld2_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
988                            uintptr_t ra, MMUAccessType access_type)
989 {
990     void *haddr;
991     uint16_t ret;
992     MemOp mop = get_memop(oi);
993 
994     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
995     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type);
996     ret = load_atom_2(cpu, ra, haddr, mop);
997     clear_helper_retaddr();
998 
999     if (mop & MO_BSWAP) {
1000         ret = bswap16(ret);
1001     }
1002     return ret;
1003 }
1004 
1005 static uint32_t do_ld4_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
1006                            uintptr_t ra, MMUAccessType access_type)
1007 {
1008     void *haddr;
1009     uint32_t ret;
1010     MemOp mop = get_memop(oi);
1011 
1012     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
1013     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type);
1014     ret = load_atom_4(cpu, ra, haddr, mop);
1015     clear_helper_retaddr();
1016 
1017     if (mop & MO_BSWAP) {
1018         ret = bswap32(ret);
1019     }
1020     return ret;
1021 }
1022 
1023 static uint64_t do_ld8_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
1024                            uintptr_t ra, MMUAccessType access_type)
1025 {
1026     void *haddr;
1027     uint64_t ret;
1028     MemOp mop = get_memop(oi);
1029 
1030     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
1031     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type);
1032     ret = load_atom_8(cpu, ra, haddr, mop);
1033     clear_helper_retaddr();
1034 
1035     if (mop & MO_BSWAP) {
1036         ret = bswap64(ret);
1037     }
1038     return ret;
1039 }
1040 
1041 static Int128 do_ld16_mmu(CPUState *cpu, abi_ptr addr,
1042                           MemOpIdx oi, uintptr_t ra)
1043 {
1044     void *haddr;
1045     Int128 ret;
1046     MemOp mop = get_memop(oi);
1047 
1048     tcg_debug_assert((mop & MO_SIZE) == MO_128);
1049     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
1050     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_LOAD);
1051     ret = load_atom_16(cpu, ra, haddr, mop);
1052     clear_helper_retaddr();
1053 
1054     if (mop & MO_BSWAP) {
1055         ret = bswap128(ret);
1056     }
1057     return ret;
1058 }
1059 
1060 static void do_st1_mmu(CPUState *cpu, vaddr addr, uint8_t val,
1061                        MemOpIdx oi, uintptr_t ra)
1062 {
1063     void *haddr;
1064 
1065     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
1066     haddr = cpu_mmu_lookup(cpu, addr, get_memop(oi), ra, MMU_DATA_STORE);
1067     stb_p(haddr, val);
1068     clear_helper_retaddr();
1069 }
1070 
1071 static void do_st2_mmu(CPUState *cpu, vaddr addr, uint16_t val,
1072                        MemOpIdx oi, uintptr_t ra)
1073 {
1074     void *haddr;
1075     MemOp mop = get_memop(oi);
1076 
1077     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
1078     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1079 
1080     if (mop & MO_BSWAP) {
1081         val = bswap16(val);
1082     }
1083     store_atom_2(cpu, ra, haddr, mop, val);
1084     clear_helper_retaddr();
1085 }
1086 
1087 static void do_st4_mmu(CPUState *cpu, vaddr addr, uint32_t val,
1088                        MemOpIdx oi, uintptr_t ra)
1089 {
1090     void *haddr;
1091     MemOp mop = get_memop(oi);
1092 
1093     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
1094     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1095 
1096     if (mop & MO_BSWAP) {
1097         val = bswap32(val);
1098     }
1099     store_atom_4(cpu, ra, haddr, mop, val);
1100     clear_helper_retaddr();
1101 }
1102 
1103 static void do_st8_mmu(CPUState *cpu, vaddr addr, uint64_t val,
1104                        MemOpIdx oi, uintptr_t ra)
1105 {
1106     void *haddr;
1107     MemOp mop = get_memop(oi);
1108 
1109     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
1110     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1111 
1112     if (mop & MO_BSWAP) {
1113         val = bswap64(val);
1114     }
1115     store_atom_8(cpu, ra, haddr, mop, val);
1116     clear_helper_retaddr();
1117 }
1118 
1119 static void do_st16_mmu(CPUState *cpu, vaddr addr, Int128 val,
1120                         MemOpIdx oi, uintptr_t ra)
1121 {
1122     void *haddr;
1123     MemOpIdx mop = get_memop(oi);
1124 
1125     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
1126     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1127 
1128     if (mop & MO_BSWAP) {
1129         val = bswap128(val);
1130     }
1131     store_atom_16(cpu, ra, haddr, mop, val);
1132     clear_helper_retaddr();
1133 }
1134 
1135 uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr ptr)
1136 {
1137     uint32_t ret;
1138 
1139     set_helper_retaddr(1);
1140     ret = ldub_p(g2h_untagged(ptr));
1141     clear_helper_retaddr();
1142     return ret;
1143 }
1144 
1145 uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr ptr)
1146 {
1147     uint32_t ret;
1148 
1149     set_helper_retaddr(1);
1150     ret = lduw_p(g2h_untagged(ptr));
1151     clear_helper_retaddr();
1152     return ret;
1153 }
1154 
1155 uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr ptr)
1156 {
1157     uint32_t ret;
1158 
1159     set_helper_retaddr(1);
1160     ret = ldl_p(g2h_untagged(ptr));
1161     clear_helper_retaddr();
1162     return ret;
1163 }
1164 
1165 uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr ptr)
1166 {
1167     uint64_t ret;
1168 
1169     set_helper_retaddr(1);
1170     ret = ldq_p(g2h_untagged(ptr));
1171     clear_helper_retaddr();
1172     return ret;
1173 }
1174 
1175 uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr,
1176                          MemOpIdx oi, uintptr_t ra)
1177 {
1178     void *haddr;
1179     uint8_t ret;
1180 
1181     haddr = cpu_mmu_lookup(env_cpu(env), addr, oi, ra, MMU_INST_FETCH);
1182     ret = ldub_p(haddr);
1183     clear_helper_retaddr();
1184     return ret;
1185 }
1186 
1187 uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr,
1188                           MemOpIdx oi, uintptr_t ra)
1189 {
1190     void *haddr;
1191     uint16_t ret;
1192 
1193     haddr = cpu_mmu_lookup(env_cpu(env), addr, oi, ra, MMU_INST_FETCH);
1194     ret = lduw_p(haddr);
1195     clear_helper_retaddr();
1196     if (get_memop(oi) & MO_BSWAP) {
1197         ret = bswap16(ret);
1198     }
1199     return ret;
1200 }
1201 
1202 uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr,
1203                           MemOpIdx oi, uintptr_t ra)
1204 {
1205     void *haddr;
1206     uint32_t ret;
1207 
1208     haddr = cpu_mmu_lookup(env_cpu(env), addr, oi, ra, MMU_INST_FETCH);
1209     ret = ldl_p(haddr);
1210     clear_helper_retaddr();
1211     if (get_memop(oi) & MO_BSWAP) {
1212         ret = bswap32(ret);
1213     }
1214     return ret;
1215 }
1216 
1217 uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
1218                           MemOpIdx oi, uintptr_t ra)
1219 {
1220     void *haddr;
1221     uint64_t ret;
1222 
1223     haddr = cpu_mmu_lookup(env_cpu(env), addr, oi, ra, MMU_DATA_LOAD);
1224     ret = ldq_p(haddr);
1225     clear_helper_retaddr();
1226     if (get_memop(oi) & MO_BSWAP) {
1227         ret = bswap64(ret);
1228     }
1229     return ret;
1230 }
1231 
1232 #include "ldst_common.c.inc"
1233 
1234 /*
1235  * Do not allow unaligned operations to proceed.  Return the host address.
1236  */
1237 static void *atomic_mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi,
1238                                int size, uintptr_t retaddr)
1239 {
1240     MemOp mop = get_memop(oi);
1241     int a_bits = memop_alignment_bits(mop);
1242     void *ret;
1243 
1244     /* Enforce guest required alignment.  */
1245     if (unlikely(addr & ((1 << a_bits) - 1))) {
1246         cpu_loop_exit_sigbus(cpu, addr, MMU_DATA_STORE, retaddr);
1247     }
1248 
1249     /* Enforce qemu required alignment.  */
1250     if (unlikely(addr & (size - 1))) {
1251         cpu_loop_exit_atomic(cpu, retaddr);
1252     }
1253 
1254     ret = g2h(cpu, addr);
1255     set_helper_retaddr(retaddr);
1256     return ret;
1257 }
1258 
1259 #include "atomic_common.c.inc"
1260 
1261 /*
1262  * First set of functions passes in OI and RETADDR.
1263  * This makes them callable from other helpers.
1264  */
1265 
1266 #define ATOMIC_NAME(X) \
1267     glue(glue(glue(cpu_atomic_ ## X, SUFFIX), END), _mmu)
1268 #define ATOMIC_MMU_CLEANUP do { clear_helper_retaddr(); } while (0)
1269 
1270 #define DATA_SIZE 1
1271 #include "atomic_template.h"
1272 
1273 #define DATA_SIZE 2
1274 #include "atomic_template.h"
1275 
1276 #define DATA_SIZE 4
1277 #include "atomic_template.h"
1278 
1279 #ifdef CONFIG_ATOMIC64
1280 #define DATA_SIZE 8
1281 #include "atomic_template.h"
1282 #endif
1283 
1284 #if defined(CONFIG_ATOMIC128) || HAVE_CMPXCHG128
1285 #define DATA_SIZE 16
1286 #include "atomic_template.h"
1287 #endif
1288