xref: /qemu/accel/tcg/user-exec.c (revision 43625e35d9319821f6d51cbf2798991bca533b26)
1 /*
2  *  User emulator execution
3  *
4  *  Copyright (c) 2003-2005 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 #include "accel/tcg/cpu-ops.h"
21 #include "disas/disas.h"
22 #include "cpu.h"
23 #include "exec/vaddr.h"
24 #include "exec/exec-all.h"
25 #include "exec/tlb-flags.h"
26 #include "tcg/tcg.h"
27 #include "qemu/bitops.h"
28 #include "qemu/rcu.h"
29 #include "accel/tcg/cpu-ldst.h"
30 #include "user/cpu_loop.h"
31 #include "qemu/main-loop.h"
32 #include "user/page-protection.h"
33 #include "exec/page-protection.h"
34 #include "exec/helper-proto.h"
35 #include "qemu/atomic128.h"
36 #include "qemu/bswap.h"
37 #include "qemu/int128.h"
38 #include "trace.h"
39 #include "tcg/tcg-ldst.h"
40 #include "backend-ldst.h"
41 #include "internal-common.h"
42 #include "internal-target.h"
43 #include "tb-internal.h"
44 
45 __thread uintptr_t helper_retaddr;
46 
47 //#define DEBUG_SIGNAL
48 
49 void cpu_interrupt(CPUState *cpu, int mask)
50 {
51     g_assert(bql_locked());
52     cpu->interrupt_request |= mask;
53     qatomic_set(&cpu->neg.icount_decr.u16.high, -1);
54 }
55 
56 /*
57  * Adjust the pc to pass to cpu_restore_state; return the memop type.
58  */
59 MMUAccessType adjust_signal_pc(uintptr_t *pc, bool is_write)
60 {
61     switch (helper_retaddr) {
62     default:
63         /*
64          * Fault during host memory operation within a helper function.
65          * The helper's host return address, saved here, gives us a
66          * pointer into the generated code that will unwind to the
67          * correct guest pc.
68          */
69         *pc = helper_retaddr;
70         break;
71 
72     case 0:
73         /*
74          * Fault during host memory operation within generated code.
75          * (Or, a unrelated bug within qemu, but we can't tell from here).
76          *
77          * We take the host pc from the signal frame.  However, we cannot
78          * use that value directly.  Within cpu_restore_state_from_tb, we
79          * assume PC comes from GETPC(), as used by the helper functions,
80          * so we adjust the address by -GETPC_ADJ to form an address that
81          * is within the call insn, so that the address does not accidentally
82          * match the beginning of the next guest insn.  However, when the
83          * pc comes from the signal frame it points to the actual faulting
84          * host memory insn and not the return from a call insn.
85          *
86          * Therefore, adjust to compensate for what will be done later
87          * by cpu_restore_state_from_tb.
88          */
89         *pc += GETPC_ADJ;
90         break;
91 
92     case 1:
93         /*
94          * Fault during host read for translation, or loosely, "execution".
95          *
96          * The guest pc is already pointing to the start of the TB for which
97          * code is being generated.  If the guest translator manages the
98          * page crossings correctly, this is exactly the correct address
99          * (and if the translator doesn't handle page boundaries correctly
100          * there's little we can do about that here).  Therefore, do not
101          * trigger the unwinder.
102          */
103         *pc = 0;
104         return MMU_INST_FETCH;
105     }
106 
107     return is_write ? MMU_DATA_STORE : MMU_DATA_LOAD;
108 }
109 
110 /**
111  * handle_sigsegv_accerr_write:
112  * @cpu: the cpu context
113  * @old_set: the sigset_t from the signal ucontext_t
114  * @host_pc: the host pc, adjusted for the signal
115  * @guest_addr: the guest address of the fault
116  *
117  * Return true if the write fault has been handled, and should be re-tried.
118  *
119  * Note that it is important that we don't call page_unprotect() unless
120  * this is really a "write to nonwritable page" fault, because
121  * page_unprotect() assumes that if it is called for an access to
122  * a page that's writable this means we had two threads racing and
123  * another thread got there first and already made the page writable;
124  * so we will retry the access. If we were to call page_unprotect()
125  * for some other kind of fault that should really be passed to the
126  * guest, we'd end up in an infinite loop of retrying the faulting access.
127  */
128 bool handle_sigsegv_accerr_write(CPUState *cpu, sigset_t *old_set,
129                                  uintptr_t host_pc, abi_ptr guest_addr)
130 {
131     switch (page_unprotect(cpu, guest_addr, host_pc)) {
132     case 0:
133         /*
134          * Fault not caused by a page marked unwritable to protect
135          * cached translations, must be the guest binary's problem.
136          */
137         return false;
138     case 1:
139         /*
140          * Fault caused by protection of cached translation; TBs
141          * invalidated, so resume execution.
142          */
143         return true;
144     case 2:
145         /*
146          * Fault caused by protection of cached translation, and the
147          * currently executing TB was modified and must be exited immediately.
148          */
149         sigprocmask(SIG_SETMASK, old_set, NULL);
150         cpu_loop_exit_noexc(cpu);
151         /* NORETURN */
152     default:
153         g_assert_not_reached();
154     }
155 }
156 
157 typedef struct PageFlagsNode {
158     struct rcu_head rcu;
159     IntervalTreeNode itree;
160     int flags;
161 } PageFlagsNode;
162 
163 static IntervalTreeRoot pageflags_root;
164 
165 static PageFlagsNode *pageflags_find(target_ulong start, target_ulong last)
166 {
167     IntervalTreeNode *n;
168 
169     n = interval_tree_iter_first(&pageflags_root, start, last);
170     return n ? container_of(n, PageFlagsNode, itree) : NULL;
171 }
172 
173 static PageFlagsNode *pageflags_next(PageFlagsNode *p, target_ulong start,
174                                      target_ulong last)
175 {
176     IntervalTreeNode *n;
177 
178     n = interval_tree_iter_next(&p->itree, start, last);
179     return n ? container_of(n, PageFlagsNode, itree) : NULL;
180 }
181 
182 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
183 {
184     IntervalTreeNode *n;
185     int rc = 0;
186 
187     mmap_lock();
188     for (n = interval_tree_iter_first(&pageflags_root, 0, -1);
189          n != NULL;
190          n = interval_tree_iter_next(n, 0, -1)) {
191         PageFlagsNode *p = container_of(n, PageFlagsNode, itree);
192 
193         rc = fn(priv, n->start, n->last + 1, p->flags);
194         if (rc != 0) {
195             break;
196         }
197     }
198     mmap_unlock();
199 
200     return rc;
201 }
202 
203 static int dump_region(void *priv, target_ulong start,
204                        target_ulong end, unsigned long prot)
205 {
206     FILE *f = (FILE *)priv;
207 
208     fprintf(f, TARGET_FMT_lx"-"TARGET_FMT_lx" "TARGET_FMT_lx" %c%c%c\n",
209             start, end, end - start,
210             ((prot & PAGE_READ) ? 'r' : '-'),
211             ((prot & PAGE_WRITE) ? 'w' : '-'),
212             ((prot & PAGE_EXEC) ? 'x' : '-'));
213     return 0;
214 }
215 
216 /* dump memory mappings */
217 void page_dump(FILE *f)
218 {
219     const int length = sizeof(target_ulong) * 2;
220 
221     fprintf(f, "%-*s %-*s %-*s %s\n",
222             length, "start", length, "end", length, "size", "prot");
223     walk_memory_regions(f, dump_region);
224 }
225 
226 int page_get_flags(target_ulong address)
227 {
228     PageFlagsNode *p = pageflags_find(address, address);
229 
230     /*
231      * See util/interval-tree.c re lockless lookups: no false positives but
232      * there are false negatives.  If we find nothing, retry with the mmap
233      * lock acquired.
234      */
235     if (p) {
236         return p->flags;
237     }
238     if (have_mmap_lock()) {
239         return 0;
240     }
241 
242     mmap_lock();
243     p = pageflags_find(address, address);
244     mmap_unlock();
245     return p ? p->flags : 0;
246 }
247 
248 /* A subroutine of page_set_flags: insert a new node for [start,last]. */
249 static void pageflags_create(target_ulong start, target_ulong last, int flags)
250 {
251     PageFlagsNode *p = g_new(PageFlagsNode, 1);
252 
253     p->itree.start = start;
254     p->itree.last = last;
255     p->flags = flags;
256     interval_tree_insert(&p->itree, &pageflags_root);
257 }
258 
259 /* A subroutine of page_set_flags: remove everything in [start,last]. */
260 static bool pageflags_unset(target_ulong start, target_ulong last)
261 {
262     bool inval_tb = false;
263 
264     while (true) {
265         PageFlagsNode *p = pageflags_find(start, last);
266         target_ulong p_last;
267 
268         if (!p) {
269             break;
270         }
271 
272         if (p->flags & PAGE_EXEC) {
273             inval_tb = true;
274         }
275 
276         interval_tree_remove(&p->itree, &pageflags_root);
277         p_last = p->itree.last;
278 
279         if (p->itree.start < start) {
280             /* Truncate the node from the end, or split out the middle. */
281             p->itree.last = start - 1;
282             interval_tree_insert(&p->itree, &pageflags_root);
283             if (last < p_last) {
284                 pageflags_create(last + 1, p_last, p->flags);
285                 break;
286             }
287         } else if (p_last <= last) {
288             /* Range completely covers node -- remove it. */
289             g_free_rcu(p, rcu);
290         } else {
291             /* Truncate the node from the start. */
292             p->itree.start = last + 1;
293             interval_tree_insert(&p->itree, &pageflags_root);
294             break;
295         }
296     }
297 
298     return inval_tb;
299 }
300 
301 /*
302  * A subroutine of page_set_flags: nothing overlaps [start,last],
303  * but check adjacent mappings and maybe merge into a single range.
304  */
305 static void pageflags_create_merge(target_ulong start, target_ulong last,
306                                    int flags)
307 {
308     PageFlagsNode *next = NULL, *prev = NULL;
309 
310     if (start > 0) {
311         prev = pageflags_find(start - 1, start - 1);
312         if (prev) {
313             if (prev->flags == flags) {
314                 interval_tree_remove(&prev->itree, &pageflags_root);
315             } else {
316                 prev = NULL;
317             }
318         }
319     }
320     if (last + 1 != 0) {
321         next = pageflags_find(last + 1, last + 1);
322         if (next) {
323             if (next->flags == flags) {
324                 interval_tree_remove(&next->itree, &pageflags_root);
325             } else {
326                 next = NULL;
327             }
328         }
329     }
330 
331     if (prev) {
332         if (next) {
333             prev->itree.last = next->itree.last;
334             g_free_rcu(next, rcu);
335         } else {
336             prev->itree.last = last;
337         }
338         interval_tree_insert(&prev->itree, &pageflags_root);
339     } else if (next) {
340         next->itree.start = start;
341         interval_tree_insert(&next->itree, &pageflags_root);
342     } else {
343         pageflags_create(start, last, flags);
344     }
345 }
346 
347 /*
348  * Allow the target to decide if PAGE_TARGET_[12] may be reset.
349  * By default, they are not kept.
350  */
351 #ifndef PAGE_TARGET_STICKY
352 #define PAGE_TARGET_STICKY  0
353 #endif
354 #define PAGE_STICKY  (PAGE_ANON | PAGE_PASSTHROUGH | PAGE_TARGET_STICKY)
355 
356 /* A subroutine of page_set_flags: add flags to [start,last]. */
357 static bool pageflags_set_clear(target_ulong start, target_ulong last,
358                                 int set_flags, int clear_flags)
359 {
360     PageFlagsNode *p;
361     target_ulong p_start, p_last;
362     int p_flags, merge_flags;
363     bool inval_tb = false;
364 
365  restart:
366     p = pageflags_find(start, last);
367     if (!p) {
368         if (set_flags) {
369             pageflags_create_merge(start, last, set_flags);
370         }
371         goto done;
372     }
373 
374     p_start = p->itree.start;
375     p_last = p->itree.last;
376     p_flags = p->flags;
377     /* Using mprotect on a page does not change sticky bits. */
378     merge_flags = (p_flags & ~clear_flags) | set_flags;
379 
380     /*
381      * Need to flush if an overlapping executable region
382      * removes exec, or adds write.
383      */
384     if ((p_flags & PAGE_EXEC)
385         && (!(merge_flags & PAGE_EXEC)
386             || (merge_flags & ~p_flags & PAGE_WRITE))) {
387         inval_tb = true;
388     }
389 
390     /*
391      * If there is an exact range match, update and return without
392      * attempting to merge with adjacent regions.
393      */
394     if (start == p_start && last == p_last) {
395         if (merge_flags) {
396             p->flags = merge_flags;
397         } else {
398             interval_tree_remove(&p->itree, &pageflags_root);
399             g_free_rcu(p, rcu);
400         }
401         goto done;
402     }
403 
404     /*
405      * If sticky bits affect the original mapping, then we must be more
406      * careful about the existing intervals and the separate flags.
407      */
408     if (set_flags != merge_flags) {
409         if (p_start < start) {
410             interval_tree_remove(&p->itree, &pageflags_root);
411             p->itree.last = start - 1;
412             interval_tree_insert(&p->itree, &pageflags_root);
413 
414             if (last < p_last) {
415                 if (merge_flags) {
416                     pageflags_create(start, last, merge_flags);
417                 }
418                 pageflags_create(last + 1, p_last, p_flags);
419             } else {
420                 if (merge_flags) {
421                     pageflags_create(start, p_last, merge_flags);
422                 }
423                 if (p_last < last) {
424                     start = p_last + 1;
425                     goto restart;
426                 }
427             }
428         } else {
429             if (start < p_start && set_flags) {
430                 pageflags_create(start, p_start - 1, set_flags);
431             }
432             if (last < p_last) {
433                 interval_tree_remove(&p->itree, &pageflags_root);
434                 p->itree.start = last + 1;
435                 interval_tree_insert(&p->itree, &pageflags_root);
436                 if (merge_flags) {
437                     pageflags_create(start, last, merge_flags);
438                 }
439             } else {
440                 if (merge_flags) {
441                     p->flags = merge_flags;
442                 } else {
443                     interval_tree_remove(&p->itree, &pageflags_root);
444                     g_free_rcu(p, rcu);
445                 }
446                 if (p_last < last) {
447                     start = p_last + 1;
448                     goto restart;
449                 }
450             }
451         }
452         goto done;
453     }
454 
455     /* If flags are not changing for this range, incorporate it. */
456     if (set_flags == p_flags) {
457         if (start < p_start) {
458             interval_tree_remove(&p->itree, &pageflags_root);
459             p->itree.start = start;
460             interval_tree_insert(&p->itree, &pageflags_root);
461         }
462         if (p_last < last) {
463             start = p_last + 1;
464             goto restart;
465         }
466         goto done;
467     }
468 
469     /* Maybe split out head and/or tail ranges with the original flags. */
470     interval_tree_remove(&p->itree, &pageflags_root);
471     if (p_start < start) {
472         p->itree.last = start - 1;
473         interval_tree_insert(&p->itree, &pageflags_root);
474 
475         if (p_last < last) {
476             goto restart;
477         }
478         if (last < p_last) {
479             pageflags_create(last + 1, p_last, p_flags);
480         }
481     } else if (last < p_last) {
482         p->itree.start = last + 1;
483         interval_tree_insert(&p->itree, &pageflags_root);
484     } else {
485         g_free_rcu(p, rcu);
486         goto restart;
487     }
488     if (set_flags) {
489         pageflags_create(start, last, set_flags);
490     }
491 
492  done:
493     return inval_tb;
494 }
495 
496 void page_set_flags(target_ulong start, target_ulong last, int flags)
497 {
498     bool reset = false;
499     bool inval_tb = false;
500 
501     /* This function should never be called with addresses outside the
502        guest address space.  If this assert fires, it probably indicates
503        a missing call to h2g_valid.  */
504     assert(start <= last);
505     assert(last <= GUEST_ADDR_MAX);
506     /* Only set PAGE_ANON with new mappings. */
507     assert(!(flags & PAGE_ANON) || (flags & PAGE_RESET));
508     assert_memory_lock();
509 
510     start &= TARGET_PAGE_MASK;
511     last |= ~TARGET_PAGE_MASK;
512 
513     if (!(flags & PAGE_VALID)) {
514         flags = 0;
515     } else {
516         reset = flags & PAGE_RESET;
517         flags &= ~PAGE_RESET;
518         if (flags & PAGE_WRITE) {
519             flags |= PAGE_WRITE_ORG;
520         }
521     }
522 
523     if (!flags || reset) {
524         page_reset_target_data(start, last);
525         inval_tb |= pageflags_unset(start, last);
526     }
527     if (flags) {
528         inval_tb |= pageflags_set_clear(start, last, flags,
529                                         ~(reset ? 0 : PAGE_STICKY));
530     }
531     if (inval_tb) {
532         tb_invalidate_phys_range(start, last);
533     }
534 }
535 
536 bool page_check_range(target_ulong start, target_ulong len, int flags)
537 {
538     target_ulong last;
539     int locked;  /* tri-state: =0: unlocked, +1: global, -1: local */
540     bool ret;
541 
542     if (len == 0) {
543         return true;  /* trivial length */
544     }
545 
546     last = start + len - 1;
547     if (last < start) {
548         return false; /* wrap around */
549     }
550 
551     locked = have_mmap_lock();
552     while (true) {
553         PageFlagsNode *p = pageflags_find(start, last);
554         int missing;
555 
556         if (!p) {
557             if (!locked) {
558                 /*
559                  * Lockless lookups have false negatives.
560                  * Retry with the lock held.
561                  */
562                 mmap_lock();
563                 locked = -1;
564                 p = pageflags_find(start, last);
565             }
566             if (!p) {
567                 ret = false; /* entire region invalid */
568                 break;
569             }
570         }
571         if (start < p->itree.start) {
572             ret = false; /* initial bytes invalid */
573             break;
574         }
575 
576         missing = flags & ~p->flags;
577         if (missing & ~PAGE_WRITE) {
578             ret = false; /* page doesn't match */
579             break;
580         }
581         if (missing & PAGE_WRITE) {
582             if (!(p->flags & PAGE_WRITE_ORG)) {
583                 ret = false; /* page not writable */
584                 break;
585             }
586             /* Asking about writable, but has been protected: undo. */
587             if (!page_unprotect(NULL, start, 0)) {
588                 ret = false;
589                 break;
590             }
591             /* TODO: page_unprotect should take a range, not a single page. */
592             if (last - start < TARGET_PAGE_SIZE) {
593                 ret = true; /* ok */
594                 break;
595             }
596             start += TARGET_PAGE_SIZE;
597             continue;
598         }
599 
600         if (last <= p->itree.last) {
601             ret = true; /* ok */
602             break;
603         }
604         start = p->itree.last + 1;
605     }
606 
607     /* Release the lock if acquired locally. */
608     if (locked < 0) {
609         mmap_unlock();
610     }
611     return ret;
612 }
613 
614 bool page_check_range_empty(target_ulong start, target_ulong last)
615 {
616     assert(last >= start);
617     assert_memory_lock();
618     return pageflags_find(start, last) == NULL;
619 }
620 
621 target_ulong page_find_range_empty(target_ulong min, target_ulong max,
622                                    target_ulong len, target_ulong align)
623 {
624     target_ulong len_m1, align_m1;
625 
626     assert(min <= max);
627     assert(max <= GUEST_ADDR_MAX);
628     assert(len != 0);
629     assert(is_power_of_2(align));
630     assert_memory_lock();
631 
632     len_m1 = len - 1;
633     align_m1 = align - 1;
634 
635     /* Iteratively narrow the search region. */
636     while (1) {
637         PageFlagsNode *p;
638 
639         /* Align min and double-check there's enough space remaining. */
640         min = (min + align_m1) & ~align_m1;
641         if (min > max) {
642             return -1;
643         }
644         if (len_m1 > max - min) {
645             return -1;
646         }
647 
648         p = pageflags_find(min, min + len_m1);
649         if (p == NULL) {
650             /* Found! */
651             return min;
652         }
653         if (max <= p->itree.last) {
654             /* Existing allocation fills the remainder of the search region. */
655             return -1;
656         }
657         /* Skip across existing allocation. */
658         min = p->itree.last + 1;
659     }
660 }
661 
662 void tb_lock_page0(tb_page_addr_t address)
663 {
664     PageFlagsNode *p;
665     target_ulong start, last;
666     int host_page_size = qemu_real_host_page_size();
667     int prot;
668 
669     assert_memory_lock();
670 
671     if (host_page_size <= TARGET_PAGE_SIZE) {
672         start = address & TARGET_PAGE_MASK;
673         last = start + TARGET_PAGE_SIZE - 1;
674     } else {
675         start = address & -host_page_size;
676         last = start + host_page_size - 1;
677     }
678 
679     p = pageflags_find(start, last);
680     if (!p) {
681         return;
682     }
683     prot = p->flags;
684 
685     if (unlikely(p->itree.last < last)) {
686         /* More than one protection region covers the one host page. */
687         assert(TARGET_PAGE_SIZE < host_page_size);
688         while ((p = pageflags_next(p, start, last)) != NULL) {
689             prot |= p->flags;
690         }
691     }
692 
693     if (prot & PAGE_WRITE) {
694         pageflags_set_clear(start, last, 0, PAGE_WRITE);
695         mprotect(g2h_untagged(start), last - start + 1,
696                  prot & (PAGE_READ | PAGE_EXEC) ? PROT_READ : PROT_NONE);
697     }
698 }
699 
700 /*
701  * Called from signal handler: invalidate the code and unprotect the
702  * page. Return 0 if the fault was not handled, 1 if it was handled,
703  * and 2 if it was handled but the caller must cause the TB to be
704  * immediately exited. (We can only return 2 if the 'pc' argument is
705  * non-zero.)
706  */
707 int page_unprotect(CPUState *cpu, tb_page_addr_t address, uintptr_t pc)
708 {
709     PageFlagsNode *p;
710     bool current_tb_invalidated;
711 
712     assert((cpu == NULL) == (pc == 0));
713 
714     /*
715      * Technically this isn't safe inside a signal handler.  However we
716      * know this only ever happens in a synchronous SEGV handler, so in
717      * practice it seems to be ok.
718      */
719     mmap_lock();
720 
721     p = pageflags_find(address, address);
722 
723     /* If this address was not really writable, nothing to do. */
724     if (!p || !(p->flags & PAGE_WRITE_ORG)) {
725         mmap_unlock();
726         return 0;
727     }
728 
729     current_tb_invalidated = false;
730     if (p->flags & PAGE_WRITE) {
731         /*
732          * If the page is actually marked WRITE then assume this is because
733          * this thread raced with another one which got here first and
734          * set the page to PAGE_WRITE and did the TB invalidate for us.
735          */
736 #ifdef TARGET_HAS_PRECISE_SMC
737         TranslationBlock *current_tb = tcg_tb_lookup(pc);
738         if (current_tb) {
739             current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID;
740         }
741 #endif
742     } else {
743         int host_page_size = qemu_real_host_page_size();
744         target_ulong start, len, i;
745         int prot;
746 
747         if (host_page_size <= TARGET_PAGE_SIZE) {
748             start = address & TARGET_PAGE_MASK;
749             len = TARGET_PAGE_SIZE;
750             prot = p->flags | PAGE_WRITE;
751             pageflags_set_clear(start, start + len - 1, PAGE_WRITE, 0);
752             current_tb_invalidated = tb_invalidate_phys_page_unwind(start, pc);
753         } else {
754             start = address & -host_page_size;
755             len = host_page_size;
756             prot = 0;
757 
758             for (i = 0; i < len; i += TARGET_PAGE_SIZE) {
759                 target_ulong addr = start + i;
760 
761                 p = pageflags_find(addr, addr);
762                 if (p) {
763                     prot |= p->flags;
764                     if (p->flags & PAGE_WRITE_ORG) {
765                         prot |= PAGE_WRITE;
766                         pageflags_set_clear(addr, addr + TARGET_PAGE_SIZE - 1,
767                                             PAGE_WRITE, 0);
768                     }
769                 }
770                 /*
771                  * Since the content will be modified, we must invalidate
772                  * the corresponding translated code.
773                  */
774                 current_tb_invalidated |=
775                     tb_invalidate_phys_page_unwind(addr, pc);
776             }
777         }
778         if (prot & PAGE_EXEC) {
779             prot = (prot & ~PAGE_EXEC) | PAGE_READ;
780         }
781         mprotect((void *)g2h_untagged(start), len, prot & PAGE_RWX);
782     }
783     mmap_unlock();
784 
785     /* If current TB was invalidated return to main loop */
786     return current_tb_invalidated ? 2 : 1;
787 }
788 
789 static int probe_access_internal(CPUArchState *env, vaddr addr,
790                                  int fault_size, MMUAccessType access_type,
791                                  bool nonfault, uintptr_t ra)
792 {
793     int acc_flag;
794     bool maperr;
795 
796     switch (access_type) {
797     case MMU_DATA_STORE:
798         acc_flag = PAGE_WRITE_ORG;
799         break;
800     case MMU_DATA_LOAD:
801         acc_flag = PAGE_READ;
802         break;
803     case MMU_INST_FETCH:
804         acc_flag = PAGE_EXEC;
805         break;
806     default:
807         g_assert_not_reached();
808     }
809 
810     if (guest_addr_valid_untagged(addr)) {
811         int page_flags = page_get_flags(addr);
812         if (page_flags & acc_flag) {
813             if (access_type != MMU_INST_FETCH
814                 && cpu_plugin_mem_cbs_enabled(env_cpu(env))) {
815                 return TLB_MMIO;
816             }
817             return 0; /* success */
818         }
819         maperr = !(page_flags & PAGE_VALID);
820     } else {
821         maperr = true;
822     }
823 
824     if (nonfault) {
825         return TLB_INVALID_MASK;
826     }
827 
828     cpu_loop_exit_sigsegv(env_cpu(env), addr, access_type, maperr, ra);
829 }
830 
831 int probe_access_flags(CPUArchState *env, vaddr addr, int size,
832                        MMUAccessType access_type, int mmu_idx,
833                        bool nonfault, void **phost, uintptr_t ra)
834 {
835     int flags;
836 
837     g_assert(-(addr | TARGET_PAGE_MASK) >= size);
838     flags = probe_access_internal(env, addr, size, access_type, nonfault, ra);
839     *phost = (flags & TLB_INVALID_MASK) ? NULL : g2h(env_cpu(env), addr);
840     return flags;
841 }
842 
843 void *probe_access(CPUArchState *env, vaddr addr, int size,
844                    MMUAccessType access_type, int mmu_idx, uintptr_t ra)
845 {
846     int flags;
847 
848     g_assert(-(addr | TARGET_PAGE_MASK) >= size);
849     flags = probe_access_internal(env, addr, size, access_type, false, ra);
850     g_assert((flags & ~TLB_MMIO) == 0);
851 
852     return size ? g2h(env_cpu(env), addr) : NULL;
853 }
854 
855 tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, vaddr addr,
856                                         void **hostp)
857 {
858     int flags;
859 
860     flags = probe_access_internal(env, addr, 1, MMU_INST_FETCH, false, 0);
861     g_assert(flags == 0);
862 
863     if (hostp) {
864         *hostp = g2h_untagged(addr);
865     }
866     return addr;
867 }
868 
869 #ifdef TARGET_PAGE_DATA_SIZE
870 /*
871  * Allocate chunks of target data together.  For the only current user,
872  * if we allocate one hunk per page, we have overhead of 40/128 or 40%.
873  * Therefore, allocate memory for 64 pages at a time for overhead < 1%.
874  */
875 #define TPD_PAGES  64
876 #define TBD_MASK   (TARGET_PAGE_MASK * TPD_PAGES)
877 
878 typedef struct TargetPageDataNode {
879     struct rcu_head rcu;
880     IntervalTreeNode itree;
881     char data[] __attribute__((aligned));
882 } TargetPageDataNode;
883 
884 static IntervalTreeRoot targetdata_root;
885 
886 void page_reset_target_data(target_ulong start, target_ulong last)
887 {
888     IntervalTreeNode *n, *next;
889 
890     assert_memory_lock();
891 
892     start &= TARGET_PAGE_MASK;
893     last |= ~TARGET_PAGE_MASK;
894 
895     for (n = interval_tree_iter_first(&targetdata_root, start, last),
896          next = n ? interval_tree_iter_next(n, start, last) : NULL;
897          n != NULL;
898          n = next,
899          next = next ? interval_tree_iter_next(n, start, last) : NULL) {
900         target_ulong n_start, n_last, p_ofs, p_len;
901         TargetPageDataNode *t = container_of(n, TargetPageDataNode, itree);
902 
903         if (n->start >= start && n->last <= last) {
904             interval_tree_remove(n, &targetdata_root);
905             g_free_rcu(t, rcu);
906             continue;
907         }
908 
909         if (n->start < start) {
910             n_start = start;
911             p_ofs = (start - n->start) >> TARGET_PAGE_BITS;
912         } else {
913             n_start = n->start;
914             p_ofs = 0;
915         }
916         n_last = MIN(last, n->last);
917         p_len = (n_last + 1 - n_start) >> TARGET_PAGE_BITS;
918 
919         memset(t->data + p_ofs * TARGET_PAGE_DATA_SIZE, 0,
920                p_len * TARGET_PAGE_DATA_SIZE);
921     }
922 }
923 
924 void *page_get_target_data(target_ulong address)
925 {
926     IntervalTreeNode *n;
927     TargetPageDataNode *t;
928     target_ulong page, region, p_ofs;
929 
930     page = address & TARGET_PAGE_MASK;
931     region = address & TBD_MASK;
932 
933     n = interval_tree_iter_first(&targetdata_root, page, page);
934     if (!n) {
935         /*
936          * See util/interval-tree.c re lockless lookups: no false positives
937          * but there are false negatives.  If we find nothing, retry with
938          * the mmap lock acquired.  We also need the lock for the
939          * allocation + insert.
940          */
941         mmap_lock();
942         n = interval_tree_iter_first(&targetdata_root, page, page);
943         if (!n) {
944             t = g_malloc0(sizeof(TargetPageDataNode)
945                           + TPD_PAGES * TARGET_PAGE_DATA_SIZE);
946             n = &t->itree;
947             n->start = region;
948             n->last = region | ~TBD_MASK;
949             interval_tree_insert(n, &targetdata_root);
950         }
951         mmap_unlock();
952     }
953 
954     t = container_of(n, TargetPageDataNode, itree);
955     p_ofs = (page - region) >> TARGET_PAGE_BITS;
956     return t->data + p_ofs * TARGET_PAGE_DATA_SIZE;
957 }
958 #else
959 void page_reset_target_data(target_ulong start, target_ulong last) { }
960 #endif /* TARGET_PAGE_DATA_SIZE */
961 
962 /* The system-mode versions of these helpers are in cputlb.c.  */
963 
964 static void *cpu_mmu_lookup(CPUState *cpu, vaddr addr,
965                             MemOp mop, uintptr_t ra, MMUAccessType type)
966 {
967     int a_bits = memop_alignment_bits(mop);
968     void *ret;
969 
970     /* Enforce guest required alignment.  */
971     if (unlikely(addr & ((1 << a_bits) - 1))) {
972         cpu_loop_exit_sigbus(cpu, addr, type, ra);
973     }
974 
975     ret = g2h(cpu, addr);
976     set_helper_retaddr(ra);
977     return ret;
978 }
979 
980 /* physical memory access (slow version, mainly for debug) */
981 int cpu_memory_rw_debug(CPUState *cpu, vaddr addr,
982                         void *ptr, size_t len, bool is_write)
983 {
984     int flags;
985     vaddr l, page;
986     uint8_t *buf = ptr;
987     ssize_t written;
988     int ret = -1;
989     int fd = -1;
990 
991     mmap_lock();
992 
993     while (len > 0) {
994         page = addr & TARGET_PAGE_MASK;
995         l = (page + TARGET_PAGE_SIZE) - addr;
996         if (l > len) {
997             l = len;
998         }
999         flags = page_get_flags(page);
1000         if (!(flags & PAGE_VALID)) {
1001             goto out_close;
1002         }
1003         if (is_write) {
1004             if (flags & PAGE_WRITE) {
1005                 memcpy(g2h(cpu, addr), buf, l);
1006             } else {
1007                 /* Bypass the host page protection using ptrace. */
1008                 if (fd == -1) {
1009                     fd = open("/proc/self/mem", O_WRONLY);
1010                     if (fd == -1) {
1011                         goto out;
1012                     }
1013                 }
1014                 /*
1015                  * If there is a TranslationBlock and we weren't bypassing the
1016                  * host page protection, the memcpy() above would SEGV,
1017                  * ultimately leading to page_unprotect(). So invalidate the
1018                  * translations manually. Both invalidation and pwrite() must
1019                  * be under mmap_lock() in order to prevent the creation of
1020                  * another TranslationBlock in between.
1021                  */
1022                 tb_invalidate_phys_range(addr, addr + l - 1);
1023                 written = pwrite(fd, buf, l,
1024                                  (off_t)(uintptr_t)g2h_untagged(addr));
1025                 if (written != l) {
1026                     goto out_close;
1027                 }
1028             }
1029         } else if (flags & PAGE_READ) {
1030             memcpy(buf, g2h(cpu, addr), l);
1031         } else {
1032             /* Bypass the host page protection using ptrace. */
1033             if (fd == -1) {
1034                 fd = open("/proc/self/mem", O_RDONLY);
1035                 if (fd == -1) {
1036                     goto out;
1037                 }
1038             }
1039             if (pread(fd, buf, l,
1040                       (off_t)(uintptr_t)g2h_untagged(addr)) != l) {
1041                 goto out_close;
1042             }
1043         }
1044         len -= l;
1045         buf += l;
1046         addr += l;
1047     }
1048     ret = 0;
1049 out_close:
1050     if (fd != -1) {
1051         close(fd);
1052     }
1053 out:
1054     mmap_unlock();
1055 
1056     return ret;
1057 }
1058 
1059 #include "ldst_atomicity.c.inc"
1060 
1061 static uint8_t do_ld1_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
1062                           uintptr_t ra, MMUAccessType access_type)
1063 {
1064     void *haddr;
1065     uint8_t ret;
1066 
1067     cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD);
1068     haddr = cpu_mmu_lookup(cpu, addr, get_memop(oi), ra, access_type);
1069     ret = ldub_p(haddr);
1070     clear_helper_retaddr();
1071     return ret;
1072 }
1073 
1074 static uint16_t do_ld2_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
1075                            uintptr_t ra, MMUAccessType access_type)
1076 {
1077     void *haddr;
1078     uint16_t ret;
1079     MemOp mop = get_memop(oi);
1080 
1081     cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD);
1082     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type);
1083     ret = load_atom_2(cpu, ra, haddr, mop);
1084     clear_helper_retaddr();
1085 
1086     if (mop & MO_BSWAP) {
1087         ret = bswap16(ret);
1088     }
1089     return ret;
1090 }
1091 
1092 static uint32_t do_ld4_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
1093                            uintptr_t ra, MMUAccessType access_type)
1094 {
1095     void *haddr;
1096     uint32_t ret;
1097     MemOp mop = get_memop(oi);
1098 
1099     cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD);
1100     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type);
1101     ret = load_atom_4(cpu, ra, haddr, mop);
1102     clear_helper_retaddr();
1103 
1104     if (mop & MO_BSWAP) {
1105         ret = bswap32(ret);
1106     }
1107     return ret;
1108 }
1109 
1110 static uint64_t do_ld8_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
1111                            uintptr_t ra, MMUAccessType access_type)
1112 {
1113     void *haddr;
1114     uint64_t ret;
1115     MemOp mop = get_memop(oi);
1116 
1117     cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD);
1118     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type);
1119     ret = load_atom_8(cpu, ra, haddr, mop);
1120     clear_helper_retaddr();
1121 
1122     if (mop & MO_BSWAP) {
1123         ret = bswap64(ret);
1124     }
1125     return ret;
1126 }
1127 
1128 static Int128 do_ld16_mmu(CPUState *cpu, abi_ptr addr,
1129                           MemOpIdx oi, uintptr_t ra)
1130 {
1131     void *haddr;
1132     Int128 ret;
1133     MemOp mop = get_memop(oi);
1134 
1135     tcg_debug_assert((mop & MO_SIZE) == MO_128);
1136     cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD);
1137     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_LOAD);
1138     ret = load_atom_16(cpu, ra, haddr, mop);
1139     clear_helper_retaddr();
1140 
1141     if (mop & MO_BSWAP) {
1142         ret = bswap128(ret);
1143     }
1144     return ret;
1145 }
1146 
1147 static void do_st1_mmu(CPUState *cpu, vaddr addr, uint8_t val,
1148                        MemOpIdx oi, uintptr_t ra)
1149 {
1150     void *haddr;
1151 
1152     cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST);
1153     haddr = cpu_mmu_lookup(cpu, addr, get_memop(oi), ra, MMU_DATA_STORE);
1154     stb_p(haddr, val);
1155     clear_helper_retaddr();
1156 }
1157 
1158 static void do_st2_mmu(CPUState *cpu, vaddr addr, uint16_t val,
1159                        MemOpIdx oi, uintptr_t ra)
1160 {
1161     void *haddr;
1162     MemOp mop = get_memop(oi);
1163 
1164     cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST);
1165     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1166 
1167     if (mop & MO_BSWAP) {
1168         val = bswap16(val);
1169     }
1170     store_atom_2(cpu, ra, haddr, mop, val);
1171     clear_helper_retaddr();
1172 }
1173 
1174 static void do_st4_mmu(CPUState *cpu, vaddr addr, uint32_t val,
1175                        MemOpIdx oi, uintptr_t ra)
1176 {
1177     void *haddr;
1178     MemOp mop = get_memop(oi);
1179 
1180     cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST);
1181     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1182 
1183     if (mop & MO_BSWAP) {
1184         val = bswap32(val);
1185     }
1186     store_atom_4(cpu, ra, haddr, mop, val);
1187     clear_helper_retaddr();
1188 }
1189 
1190 static void do_st8_mmu(CPUState *cpu, vaddr addr, uint64_t val,
1191                        MemOpIdx oi, uintptr_t ra)
1192 {
1193     void *haddr;
1194     MemOp mop = get_memop(oi);
1195 
1196     cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST);
1197     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1198 
1199     if (mop & MO_BSWAP) {
1200         val = bswap64(val);
1201     }
1202     store_atom_8(cpu, ra, haddr, mop, val);
1203     clear_helper_retaddr();
1204 }
1205 
1206 static void do_st16_mmu(CPUState *cpu, vaddr addr, Int128 val,
1207                         MemOpIdx oi, uintptr_t ra)
1208 {
1209     void *haddr;
1210     MemOpIdx mop = get_memop(oi);
1211 
1212     cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST);
1213     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1214 
1215     if (mop & MO_BSWAP) {
1216         val = bswap128(val);
1217     }
1218     store_atom_16(cpu, ra, haddr, mop, val);
1219     clear_helper_retaddr();
1220 }
1221 
1222 uint8_t cpu_ldb_code_mmu(CPUArchState *env, vaddr addr,
1223                          MemOpIdx oi, uintptr_t ra)
1224 {
1225     return do_ld1_mmu(env_cpu(env), addr, oi, ra ? ra : 1, MMU_INST_FETCH);
1226 }
1227 
1228 uint16_t cpu_ldw_code_mmu(CPUArchState *env, vaddr addr,
1229                           MemOpIdx oi, uintptr_t ra)
1230 {
1231     return do_ld2_mmu(env_cpu(env), addr, oi, ra ? ra : 1, MMU_INST_FETCH);
1232 }
1233 
1234 uint32_t cpu_ldl_code_mmu(CPUArchState *env, vaddr addr,
1235                           MemOpIdx oi, uintptr_t ra)
1236 {
1237     return do_ld4_mmu(env_cpu(env), addr, oi, ra ? ra : 1, MMU_INST_FETCH);
1238 }
1239 
1240 uint64_t cpu_ldq_code_mmu(CPUArchState *env, vaddr addr,
1241                           MemOpIdx oi, uintptr_t ra)
1242 {
1243     return do_ld8_mmu(env_cpu(env), addr, oi, ra ? ra : 1, MMU_INST_FETCH);
1244 }
1245 
1246 #include "ldst_common.c.inc"
1247 
1248 /*
1249  * Do not allow unaligned operations to proceed.  Return the host address.
1250  */
1251 static void *atomic_mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi,
1252                                int size, uintptr_t retaddr)
1253 {
1254     MemOp mop = get_memop(oi);
1255     int a_bits = memop_alignment_bits(mop);
1256     void *ret;
1257 
1258     /* Enforce guest required alignment.  */
1259     if (unlikely(addr & ((1 << a_bits) - 1))) {
1260         cpu_loop_exit_sigbus(cpu, addr, MMU_DATA_STORE, retaddr);
1261     }
1262 
1263     /* Enforce qemu required alignment.  */
1264     if (unlikely(addr & (size - 1))) {
1265         cpu_loop_exit_atomic(cpu, retaddr);
1266     }
1267 
1268     ret = g2h(cpu, addr);
1269     set_helper_retaddr(retaddr);
1270     return ret;
1271 }
1272 
1273 #include "atomic_common.c.inc"
1274 
1275 /*
1276  * First set of functions passes in OI and RETADDR.
1277  * This makes them callable from other helpers.
1278  */
1279 
1280 #define ATOMIC_NAME(X) \
1281     glue(glue(glue(cpu_atomic_ ## X, SUFFIX), END), _mmu)
1282 #define ATOMIC_MMU_CLEANUP do { clear_helper_retaddr(); } while (0)
1283 
1284 #define DATA_SIZE 1
1285 #include "atomic_template.h"
1286 
1287 #define DATA_SIZE 2
1288 #include "atomic_template.h"
1289 
1290 #define DATA_SIZE 4
1291 #include "atomic_template.h"
1292 
1293 #ifdef CONFIG_ATOMIC64
1294 #define DATA_SIZE 8
1295 #include "atomic_template.h"
1296 #endif
1297 
1298 #if defined(CONFIG_ATOMIC128) || HAVE_CMPXCHG128
1299 #define DATA_SIZE 16
1300 #include "atomic_template.h"
1301 #endif
1302