xref: /qemu/accel/tcg/user-exec.c (revision 0baf907b718e1602383b973de7822c25db4c4a36)
1 /*
2  *  User emulator execution
3  *
4  *  Copyright (c) 2003-2005 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 #include "accel/tcg/cpu-ops.h"
21 #include "disas/disas.h"
22 #include "cpu.h"
23 #include "exec/vaddr.h"
24 #include "exec/tlb-flags.h"
25 #include "tcg/tcg.h"
26 #include "qemu/bitops.h"
27 #include "qemu/rcu.h"
28 #include "accel/tcg/cpu-ldst.h"
29 #include "accel/tcg/helper-retaddr.h"
30 #include "accel/tcg/probe.h"
31 #include "user/cpu_loop.h"
32 #include "qemu/main-loop.h"
33 #include "user/page-protection.h"
34 #include "exec/page-protection.h"
35 #include "exec/helper-proto.h"
36 #include "qemu/atomic128.h"
37 #include "qemu/bswap.h"
38 #include "qemu/int128.h"
39 #include "trace.h"
40 #include "tcg/tcg-ldst.h"
41 #include "backend-ldst.h"
42 #include "internal-common.h"
43 #include "tb-internal.h"
44 
45 __thread uintptr_t helper_retaddr;
46 
47 //#define DEBUG_SIGNAL
48 
49 void cpu_interrupt(CPUState *cpu, int mask)
50 {
51     g_assert(bql_locked());
52     cpu->interrupt_request |= mask;
53     qatomic_set(&cpu->neg.icount_decr.u16.high, -1);
54 }
55 
56 /*
57  * Adjust the pc to pass to cpu_restore_state; return the memop type.
58  */
59 MMUAccessType adjust_signal_pc(uintptr_t *pc, bool is_write)
60 {
61     switch (helper_retaddr) {
62     default:
63         /*
64          * Fault during host memory operation within a helper function.
65          * The helper's host return address, saved here, gives us a
66          * pointer into the generated code that will unwind to the
67          * correct guest pc.
68          */
69         *pc = helper_retaddr;
70         break;
71 
72     case 0:
73         /*
74          * Fault during host memory operation within generated code.
75          * (Or, a unrelated bug within qemu, but we can't tell from here).
76          *
77          * We take the host pc from the signal frame.  However, we cannot
78          * use that value directly.  Within cpu_restore_state_from_tb, we
79          * assume PC comes from GETPC(), as used by the helper functions,
80          * so we adjust the address by -GETPC_ADJ to form an address that
81          * is within the call insn, so that the address does not accidentally
82          * match the beginning of the next guest insn.  However, when the
83          * pc comes from the signal frame it points to the actual faulting
84          * host memory insn and not the return from a call insn.
85          *
86          * Therefore, adjust to compensate for what will be done later
87          * by cpu_restore_state_from_tb.
88          */
89         *pc += GETPC_ADJ;
90         break;
91 
92     case 1:
93         /*
94          * Fault during host read for translation, or loosely, "execution".
95          *
96          * The guest pc is already pointing to the start of the TB for which
97          * code is being generated.  If the guest translator manages the
98          * page crossings correctly, this is exactly the correct address
99          * (and if the translator doesn't handle page boundaries correctly
100          * there's little we can do about that here).  Therefore, do not
101          * trigger the unwinder.
102          */
103         *pc = 0;
104         return MMU_INST_FETCH;
105     }
106 
107     return is_write ? MMU_DATA_STORE : MMU_DATA_LOAD;
108 }
109 
110 /**
111  * handle_sigsegv_accerr_write:
112  * @cpu: the cpu context
113  * @old_set: the sigset_t from the signal ucontext_t
114  * @host_pc: the host pc, adjusted for the signal
115  * @guest_addr: the guest address of the fault
116  *
117  * Return true if the write fault has been handled, and should be re-tried.
118  *
119  * Note that it is important that we don't call page_unprotect() unless
120  * this is really a "write to nonwritable page" fault, because
121  * page_unprotect() assumes that if it is called for an access to
122  * a page that's writable this means we had two threads racing and
123  * another thread got there first and already made the page writable;
124  * so we will retry the access. If we were to call page_unprotect()
125  * for some other kind of fault that should really be passed to the
126  * guest, we'd end up in an infinite loop of retrying the faulting access.
127  */
128 bool handle_sigsegv_accerr_write(CPUState *cpu, sigset_t *old_set,
129                                  uintptr_t host_pc, abi_ptr guest_addr)
130 {
131     switch (page_unprotect(cpu, guest_addr, host_pc)) {
132     case 0:
133         /*
134          * Fault not caused by a page marked unwritable to protect
135          * cached translations, must be the guest binary's problem.
136          */
137         return false;
138     case 1:
139         /*
140          * Fault caused by protection of cached translation; TBs
141          * invalidated, so resume execution.
142          */
143         return true;
144     case 2:
145         /*
146          * Fault caused by protection of cached translation, and the
147          * currently executing TB was modified and must be exited immediately.
148          */
149         sigprocmask(SIG_SETMASK, old_set, NULL);
150         cpu_loop_exit_noexc(cpu);
151         /* NORETURN */
152     default:
153         g_assert_not_reached();
154     }
155 }
156 
157 typedef struct PageFlagsNode {
158     struct rcu_head rcu;
159     IntervalTreeNode itree;
160     int flags;
161 } PageFlagsNode;
162 
163 static IntervalTreeRoot pageflags_root;
164 
165 static PageFlagsNode *pageflags_find(vaddr start, vaddr last)
166 {
167     IntervalTreeNode *n;
168 
169     n = interval_tree_iter_first(&pageflags_root, start, last);
170     return n ? container_of(n, PageFlagsNode, itree) : NULL;
171 }
172 
173 static PageFlagsNode *pageflags_next(PageFlagsNode *p, vaddr start, vaddr last)
174 {
175     IntervalTreeNode *n;
176 
177     n = interval_tree_iter_next(&p->itree, start, last);
178     return n ? container_of(n, PageFlagsNode, itree) : NULL;
179 }
180 
181 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
182 {
183     IntervalTreeNode *n;
184     int rc = 0;
185 
186     mmap_lock();
187     for (n = interval_tree_iter_first(&pageflags_root, 0, -1);
188          n != NULL;
189          n = interval_tree_iter_next(n, 0, -1)) {
190         PageFlagsNode *p = container_of(n, PageFlagsNode, itree);
191 
192         rc = fn(priv, n->start, n->last + 1, p->flags);
193         if (rc != 0) {
194             break;
195         }
196     }
197     mmap_unlock();
198 
199     return rc;
200 }
201 
202 static int dump_region(void *opaque, vaddr start, vaddr end, int prot)
203 {
204     FILE *f = opaque;
205 
206     fprintf(f, TARGET_ABI_FMT_ptr "-" TARGET_ABI_FMT_ptr
207             " " TARGET_ABI_FMT_ptr " %c%c%c\n",
208             (abi_ptr)start, (abi_ptr)end, (abi_ptr)(end - start),
209             ((prot & PAGE_READ) ? 'r' : '-'),
210             ((prot & PAGE_WRITE) ? 'w' : '-'),
211             ((prot & PAGE_EXEC) ? 'x' : '-'));
212     return 0;
213 }
214 
215 /* dump memory mappings */
216 void page_dump(FILE *f)
217 {
218     const int length = sizeof(abi_ptr) * 2;
219 
220     fprintf(f, "%-*s %-*s %-*s %s\n",
221             length, "start", length, "end", length, "size", "prot");
222     walk_memory_regions(f, dump_region);
223 }
224 
225 int page_get_flags(vaddr address)
226 {
227     PageFlagsNode *p = pageflags_find(address, address);
228 
229     /*
230      * See util/interval-tree.c re lockless lookups: no false positives but
231      * there are false negatives.  If we find nothing, retry with the mmap
232      * lock acquired.
233      */
234     if (p) {
235         return p->flags;
236     }
237     if (have_mmap_lock()) {
238         return 0;
239     }
240 
241     mmap_lock();
242     p = pageflags_find(address, address);
243     mmap_unlock();
244     return p ? p->flags : 0;
245 }
246 
247 /* A subroutine of page_set_flags: insert a new node for [start,last]. */
248 static void pageflags_create(vaddr start, vaddr last, int flags)
249 {
250     PageFlagsNode *p = g_new(PageFlagsNode, 1);
251 
252     p->itree.start = start;
253     p->itree.last = last;
254     p->flags = flags;
255     interval_tree_insert(&p->itree, &pageflags_root);
256 }
257 
258 /* A subroutine of page_set_flags: remove everything in [start,last]. */
259 static bool pageflags_unset(vaddr start, vaddr last)
260 {
261     bool inval_tb = false;
262 
263     while (true) {
264         PageFlagsNode *p = pageflags_find(start, last);
265         vaddr p_last;
266 
267         if (!p) {
268             break;
269         }
270 
271         if (p->flags & PAGE_EXEC) {
272             inval_tb = true;
273         }
274 
275         interval_tree_remove(&p->itree, &pageflags_root);
276         p_last = p->itree.last;
277 
278         if (p->itree.start < start) {
279             /* Truncate the node from the end, or split out the middle. */
280             p->itree.last = start - 1;
281             interval_tree_insert(&p->itree, &pageflags_root);
282             if (last < p_last) {
283                 pageflags_create(last + 1, p_last, p->flags);
284                 break;
285             }
286         } else if (p_last <= last) {
287             /* Range completely covers node -- remove it. */
288             g_free_rcu(p, rcu);
289         } else {
290             /* Truncate the node from the start. */
291             p->itree.start = last + 1;
292             interval_tree_insert(&p->itree, &pageflags_root);
293             break;
294         }
295     }
296 
297     return inval_tb;
298 }
299 
300 /*
301  * A subroutine of page_set_flags: nothing overlaps [start,last],
302  * but check adjacent mappings and maybe merge into a single range.
303  */
304 static void pageflags_create_merge(vaddr start, vaddr last, int flags)
305 {
306     PageFlagsNode *next = NULL, *prev = NULL;
307 
308     if (start > 0) {
309         prev = pageflags_find(start - 1, start - 1);
310         if (prev) {
311             if (prev->flags == flags) {
312                 interval_tree_remove(&prev->itree, &pageflags_root);
313             } else {
314                 prev = NULL;
315             }
316         }
317     }
318     if (last + 1 != 0) {
319         next = pageflags_find(last + 1, last + 1);
320         if (next) {
321             if (next->flags == flags) {
322                 interval_tree_remove(&next->itree, &pageflags_root);
323             } else {
324                 next = NULL;
325             }
326         }
327     }
328 
329     if (prev) {
330         if (next) {
331             prev->itree.last = next->itree.last;
332             g_free_rcu(next, rcu);
333         } else {
334             prev->itree.last = last;
335         }
336         interval_tree_insert(&prev->itree, &pageflags_root);
337     } else if (next) {
338         next->itree.start = start;
339         interval_tree_insert(&next->itree, &pageflags_root);
340     } else {
341         pageflags_create(start, last, flags);
342     }
343 }
344 
345 /*
346  * Allow the target to decide if PAGE_TARGET_[12] may be reset.
347  * By default, they are not kept.
348  */
349 #ifndef PAGE_TARGET_STICKY
350 #define PAGE_TARGET_STICKY  0
351 #endif
352 #define PAGE_STICKY  (PAGE_ANON | PAGE_PASSTHROUGH | PAGE_TARGET_STICKY)
353 
354 /* A subroutine of page_set_flags: add flags to [start,last]. */
355 static bool pageflags_set_clear(vaddr start, vaddr last,
356                                 int set_flags, int clear_flags)
357 {
358     PageFlagsNode *p;
359     vaddr p_start, p_last;
360     int p_flags, merge_flags;
361     bool inval_tb = false;
362 
363  restart:
364     p = pageflags_find(start, last);
365     if (!p) {
366         if (set_flags) {
367             pageflags_create_merge(start, last, set_flags);
368         }
369         goto done;
370     }
371 
372     p_start = p->itree.start;
373     p_last = p->itree.last;
374     p_flags = p->flags;
375     /* Using mprotect on a page does not change sticky bits. */
376     merge_flags = (p_flags & ~clear_flags) | set_flags;
377 
378     /*
379      * Need to flush if an overlapping executable region
380      * removes exec, or adds write.
381      */
382     if ((p_flags & PAGE_EXEC)
383         && (!(merge_flags & PAGE_EXEC)
384             || (merge_flags & ~p_flags & PAGE_WRITE))) {
385         inval_tb = true;
386     }
387 
388     /*
389      * If there is an exact range match, update and return without
390      * attempting to merge with adjacent regions.
391      */
392     if (start == p_start && last == p_last) {
393         if (merge_flags) {
394             p->flags = merge_flags;
395         } else {
396             interval_tree_remove(&p->itree, &pageflags_root);
397             g_free_rcu(p, rcu);
398         }
399         goto done;
400     }
401 
402     /*
403      * If sticky bits affect the original mapping, then we must be more
404      * careful about the existing intervals and the separate flags.
405      */
406     if (set_flags != merge_flags) {
407         if (p_start < start) {
408             interval_tree_remove(&p->itree, &pageflags_root);
409             p->itree.last = start - 1;
410             interval_tree_insert(&p->itree, &pageflags_root);
411 
412             if (last < p_last) {
413                 if (merge_flags) {
414                     pageflags_create(start, last, merge_flags);
415                 }
416                 pageflags_create(last + 1, p_last, p_flags);
417             } else {
418                 if (merge_flags) {
419                     pageflags_create(start, p_last, merge_flags);
420                 }
421                 if (p_last < last) {
422                     start = p_last + 1;
423                     goto restart;
424                 }
425             }
426         } else {
427             if (start < p_start && set_flags) {
428                 pageflags_create(start, p_start - 1, set_flags);
429             }
430             if (last < p_last) {
431                 interval_tree_remove(&p->itree, &pageflags_root);
432                 p->itree.start = last + 1;
433                 interval_tree_insert(&p->itree, &pageflags_root);
434                 if (merge_flags) {
435                     pageflags_create(start, last, merge_flags);
436                 }
437             } else {
438                 if (merge_flags) {
439                     p->flags = merge_flags;
440                 } else {
441                     interval_tree_remove(&p->itree, &pageflags_root);
442                     g_free_rcu(p, rcu);
443                 }
444                 if (p_last < last) {
445                     start = p_last + 1;
446                     goto restart;
447                 }
448             }
449         }
450         goto done;
451     }
452 
453     /* If flags are not changing for this range, incorporate it. */
454     if (set_flags == p_flags) {
455         if (start < p_start) {
456             interval_tree_remove(&p->itree, &pageflags_root);
457             p->itree.start = start;
458             interval_tree_insert(&p->itree, &pageflags_root);
459         }
460         if (p_last < last) {
461             start = p_last + 1;
462             goto restart;
463         }
464         goto done;
465     }
466 
467     /* Maybe split out head and/or tail ranges with the original flags. */
468     interval_tree_remove(&p->itree, &pageflags_root);
469     if (p_start < start) {
470         p->itree.last = start - 1;
471         interval_tree_insert(&p->itree, &pageflags_root);
472 
473         if (p_last < last) {
474             goto restart;
475         }
476         if (last < p_last) {
477             pageflags_create(last + 1, p_last, p_flags);
478         }
479     } else if (last < p_last) {
480         p->itree.start = last + 1;
481         interval_tree_insert(&p->itree, &pageflags_root);
482     } else {
483         g_free_rcu(p, rcu);
484         goto restart;
485     }
486     if (set_flags) {
487         pageflags_create(start, last, set_flags);
488     }
489 
490  done:
491     return inval_tb;
492 }
493 
494 void page_set_flags(vaddr start, vaddr last, int flags)
495 {
496     bool reset = false;
497     bool inval_tb = false;
498 
499     /* This function should never be called with addresses outside the
500        guest address space.  If this assert fires, it probably indicates
501        a missing call to h2g_valid.  */
502     assert(start <= last);
503     assert(last <= GUEST_ADDR_MAX);
504     /* Only set PAGE_ANON with new mappings. */
505     assert(!(flags & PAGE_ANON) || (flags & PAGE_RESET));
506     assert_memory_lock();
507 
508     start &= TARGET_PAGE_MASK;
509     last |= ~TARGET_PAGE_MASK;
510 
511     if (!(flags & PAGE_VALID)) {
512         flags = 0;
513     } else {
514         reset = flags & PAGE_RESET;
515         flags &= ~PAGE_RESET;
516         if (flags & PAGE_WRITE) {
517             flags |= PAGE_WRITE_ORG;
518         }
519     }
520 
521     if (!flags || reset) {
522         page_reset_target_data(start, last);
523         inval_tb |= pageflags_unset(start, last);
524     }
525     if (flags) {
526         inval_tb |= pageflags_set_clear(start, last, flags,
527                                         ~(reset ? 0 : PAGE_STICKY));
528     }
529     if (inval_tb) {
530         tb_invalidate_phys_range(NULL, start, last);
531     }
532 }
533 
534 bool page_check_range(vaddr start, vaddr len, int flags)
535 {
536     vaddr last;
537     int locked;  /* tri-state: =0: unlocked, +1: global, -1: local */
538     bool ret;
539 
540     if (len == 0) {
541         return true;  /* trivial length */
542     }
543 
544     last = start + len - 1;
545     if (last < start) {
546         return false; /* wrap around */
547     }
548 
549     locked = have_mmap_lock();
550     while (true) {
551         PageFlagsNode *p = pageflags_find(start, last);
552         int missing;
553 
554         if (!p) {
555             if (!locked) {
556                 /*
557                  * Lockless lookups have false negatives.
558                  * Retry with the lock held.
559                  */
560                 mmap_lock();
561                 locked = -1;
562                 p = pageflags_find(start, last);
563             }
564             if (!p) {
565                 ret = false; /* entire region invalid */
566                 break;
567             }
568         }
569         if (start < p->itree.start) {
570             ret = false; /* initial bytes invalid */
571             break;
572         }
573 
574         missing = flags & ~p->flags;
575         if (missing & ~PAGE_WRITE) {
576             ret = false; /* page doesn't match */
577             break;
578         }
579         if (missing & PAGE_WRITE) {
580             if (!(p->flags & PAGE_WRITE_ORG)) {
581                 ret = false; /* page not writable */
582                 break;
583             }
584             /* Asking about writable, but has been protected: undo. */
585             if (!page_unprotect(NULL, start, 0)) {
586                 ret = false;
587                 break;
588             }
589             /* TODO: page_unprotect should take a range, not a single page. */
590             if (last - start < TARGET_PAGE_SIZE) {
591                 ret = true; /* ok */
592                 break;
593             }
594             start += TARGET_PAGE_SIZE;
595             continue;
596         }
597 
598         if (last <= p->itree.last) {
599             ret = true; /* ok */
600             break;
601         }
602         start = p->itree.last + 1;
603     }
604 
605     /* Release the lock if acquired locally. */
606     if (locked < 0) {
607         mmap_unlock();
608     }
609     return ret;
610 }
611 
612 bool page_check_range_empty(vaddr start, vaddr last)
613 {
614     assert(last >= start);
615     assert_memory_lock();
616     return pageflags_find(start, last) == NULL;
617 }
618 
619 vaddr page_find_range_empty(vaddr min, vaddr max, vaddr len, vaddr align)
620 {
621     vaddr len_m1, align_m1;
622 
623     assert(min <= max);
624     assert(max <= GUEST_ADDR_MAX);
625     assert(len != 0);
626     assert(is_power_of_2(align));
627     assert_memory_lock();
628 
629     len_m1 = len - 1;
630     align_m1 = align - 1;
631 
632     /* Iteratively narrow the search region. */
633     while (1) {
634         PageFlagsNode *p;
635 
636         /* Align min and double-check there's enough space remaining. */
637         min = (min + align_m1) & ~align_m1;
638         if (min > max) {
639             return -1;
640         }
641         if (len_m1 > max - min) {
642             return -1;
643         }
644 
645         p = pageflags_find(min, min + len_m1);
646         if (p == NULL) {
647             /* Found! */
648             return min;
649         }
650         if (max <= p->itree.last) {
651             /* Existing allocation fills the remainder of the search region. */
652             return -1;
653         }
654         /* Skip across existing allocation. */
655         min = p->itree.last + 1;
656     }
657 }
658 
659 void tb_lock_page0(tb_page_addr_t address)
660 {
661     PageFlagsNode *p;
662     vaddr start, last;
663     int host_page_size = qemu_real_host_page_size();
664     int prot;
665 
666     assert_memory_lock();
667 
668     if (host_page_size <= TARGET_PAGE_SIZE) {
669         start = address & TARGET_PAGE_MASK;
670         last = start + TARGET_PAGE_SIZE - 1;
671     } else {
672         start = address & -host_page_size;
673         last = start + host_page_size - 1;
674     }
675 
676     p = pageflags_find(start, last);
677     if (!p) {
678         return;
679     }
680     prot = p->flags;
681 
682     if (unlikely(p->itree.last < last)) {
683         /* More than one protection region covers the one host page. */
684         assert(TARGET_PAGE_SIZE < host_page_size);
685         while ((p = pageflags_next(p, start, last)) != NULL) {
686             prot |= p->flags;
687         }
688     }
689 
690     if (prot & PAGE_WRITE) {
691         pageflags_set_clear(start, last, 0, PAGE_WRITE);
692         mprotect(g2h_untagged(start), last - start + 1,
693                  prot & (PAGE_READ | PAGE_EXEC) ? PROT_READ : PROT_NONE);
694     }
695 }
696 
697 /*
698  * Called from signal handler: invalidate the code and unprotect the
699  * page. Return 0 if the fault was not handled, 1 if it was handled,
700  * and 2 if it was handled but the caller must cause the TB to be
701  * immediately exited. (We can only return 2 if the 'pc' argument is
702  * non-zero.)
703  */
704 int page_unprotect(CPUState *cpu, tb_page_addr_t address, uintptr_t pc)
705 {
706     PageFlagsNode *p;
707     bool current_tb_invalidated;
708 
709     assert((cpu == NULL) == (pc == 0));
710 
711     /*
712      * Technically this isn't safe inside a signal handler.  However we
713      * know this only ever happens in a synchronous SEGV handler, so in
714      * practice it seems to be ok.
715      */
716     mmap_lock();
717 
718     p = pageflags_find(address, address);
719 
720     /* If this address was not really writable, nothing to do. */
721     if (!p || !(p->flags & PAGE_WRITE_ORG)) {
722         mmap_unlock();
723         return 0;
724     }
725 
726     current_tb_invalidated = false;
727     if (p->flags & PAGE_WRITE) {
728         /*
729          * If the page is actually marked WRITE then assume this is because
730          * this thread raced with another one which got here first and
731          * set the page to PAGE_WRITE and did the TB invalidate for us.
732          */
733         if (pc && cpu->cc->tcg_ops->precise_smc) {
734             TranslationBlock *current_tb = tcg_tb_lookup(pc);
735             if (current_tb) {
736                 current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID;
737             }
738         }
739     } else {
740         int host_page_size = qemu_real_host_page_size();
741         vaddr start, len, i;
742         int prot;
743 
744         if (host_page_size <= TARGET_PAGE_SIZE) {
745             start = address & TARGET_PAGE_MASK;
746             len = TARGET_PAGE_SIZE;
747             prot = p->flags | PAGE_WRITE;
748             pageflags_set_clear(start, start + len - 1, PAGE_WRITE, 0);
749             current_tb_invalidated =
750                 tb_invalidate_phys_page_unwind(cpu, start, pc);
751         } else {
752             start = address & -host_page_size;
753             len = host_page_size;
754             prot = 0;
755 
756             for (i = 0; i < len; i += TARGET_PAGE_SIZE) {
757                 vaddr addr = start + i;
758 
759                 p = pageflags_find(addr, addr);
760                 if (p) {
761                     prot |= p->flags;
762                     if (p->flags & PAGE_WRITE_ORG) {
763                         prot |= PAGE_WRITE;
764                         pageflags_set_clear(addr, addr + TARGET_PAGE_SIZE - 1,
765                                             PAGE_WRITE, 0);
766                     }
767                 }
768                 /*
769                  * Since the content will be modified, we must invalidate
770                  * the corresponding translated code.
771                  */
772                 current_tb_invalidated |=
773                     tb_invalidate_phys_page_unwind(cpu, addr, pc);
774             }
775         }
776         if (prot & PAGE_EXEC) {
777             prot = (prot & ~PAGE_EXEC) | PAGE_READ;
778         }
779         mprotect((void *)g2h_untagged(start), len, prot & PAGE_RWX);
780     }
781     mmap_unlock();
782 
783     /* If current TB was invalidated return to main loop */
784     return current_tb_invalidated ? 2 : 1;
785 }
786 
787 static int probe_access_internal(CPUArchState *env, vaddr addr,
788                                  int fault_size, MMUAccessType access_type,
789                                  bool nonfault, uintptr_t ra)
790 {
791     int acc_flag;
792     bool maperr;
793 
794     switch (access_type) {
795     case MMU_DATA_STORE:
796         acc_flag = PAGE_WRITE_ORG;
797         break;
798     case MMU_DATA_LOAD:
799         acc_flag = PAGE_READ;
800         break;
801     case MMU_INST_FETCH:
802         acc_flag = PAGE_EXEC;
803         break;
804     default:
805         g_assert_not_reached();
806     }
807 
808     if (guest_addr_valid_untagged(addr)) {
809         int page_flags = page_get_flags(addr);
810         if (page_flags & acc_flag) {
811             if (access_type != MMU_INST_FETCH
812                 && cpu_plugin_mem_cbs_enabled(env_cpu(env))) {
813                 return TLB_MMIO;
814             }
815             return 0; /* success */
816         }
817         maperr = !(page_flags & PAGE_VALID);
818     } else {
819         maperr = true;
820     }
821 
822     if (nonfault) {
823         return TLB_INVALID_MASK;
824     }
825 
826     cpu_loop_exit_sigsegv(env_cpu(env), addr, access_type, maperr, ra);
827 }
828 
829 int probe_access_flags(CPUArchState *env, vaddr addr, int size,
830                        MMUAccessType access_type, int mmu_idx,
831                        bool nonfault, void **phost, uintptr_t ra)
832 {
833     int flags;
834 
835     g_assert(-(addr | TARGET_PAGE_MASK) >= size);
836     flags = probe_access_internal(env, addr, size, access_type, nonfault, ra);
837     *phost = (flags & TLB_INVALID_MASK) ? NULL : g2h(env_cpu(env), addr);
838     return flags;
839 }
840 
841 void *probe_access(CPUArchState *env, vaddr addr, int size,
842                    MMUAccessType access_type, int mmu_idx, uintptr_t ra)
843 {
844     int flags;
845 
846     g_assert(-(addr | TARGET_PAGE_MASK) >= size);
847     flags = probe_access_internal(env, addr, size, access_type, false, ra);
848     g_assert((flags & ~TLB_MMIO) == 0);
849 
850     return size ? g2h(env_cpu(env), addr) : NULL;
851 }
852 
853 tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, vaddr addr,
854                                         void **hostp)
855 {
856     int flags;
857 
858     flags = probe_access_internal(env, addr, 1, MMU_INST_FETCH, false, 0);
859     g_assert(flags == 0);
860 
861     if (hostp) {
862         *hostp = g2h_untagged(addr);
863     }
864     return addr;
865 }
866 
867 #ifdef TARGET_PAGE_DATA_SIZE
868 /*
869  * Allocate chunks of target data together.  For the only current user,
870  * if we allocate one hunk per page, we have overhead of 40/128 or 40%.
871  * Therefore, allocate memory for 64 pages at a time for overhead < 1%.
872  */
873 #define TPD_PAGES  64
874 #define TBD_MASK   (TARGET_PAGE_MASK * TPD_PAGES)
875 
876 typedef struct TargetPageDataNode {
877     struct rcu_head rcu;
878     IntervalTreeNode itree;
879     char data[] __attribute__((aligned));
880 } TargetPageDataNode;
881 
882 static IntervalTreeRoot targetdata_root;
883 
884 void page_reset_target_data(vaddr start, vaddr last)
885 {
886     IntervalTreeNode *n, *next;
887 
888     assert_memory_lock();
889 
890     start &= TARGET_PAGE_MASK;
891     last |= ~TARGET_PAGE_MASK;
892 
893     for (n = interval_tree_iter_first(&targetdata_root, start, last),
894          next = n ? interval_tree_iter_next(n, start, last) : NULL;
895          n != NULL;
896          n = next,
897          next = next ? interval_tree_iter_next(n, start, last) : NULL) {
898         vaddr n_start, n_last, p_ofs, p_len;
899         TargetPageDataNode *t = container_of(n, TargetPageDataNode, itree);
900 
901         if (n->start >= start && n->last <= last) {
902             interval_tree_remove(n, &targetdata_root);
903             g_free_rcu(t, rcu);
904             continue;
905         }
906 
907         if (n->start < start) {
908             n_start = start;
909             p_ofs = (start - n->start) >> TARGET_PAGE_BITS;
910         } else {
911             n_start = n->start;
912             p_ofs = 0;
913         }
914         n_last = MIN(last, n->last);
915         p_len = (n_last + 1 - n_start) >> TARGET_PAGE_BITS;
916 
917         memset(t->data + p_ofs * TARGET_PAGE_DATA_SIZE, 0,
918                p_len * TARGET_PAGE_DATA_SIZE);
919     }
920 }
921 
922 void *page_get_target_data(vaddr address)
923 {
924     IntervalTreeNode *n;
925     TargetPageDataNode *t;
926     vaddr page, region, p_ofs;
927 
928     page = address & TARGET_PAGE_MASK;
929     region = address & TBD_MASK;
930 
931     n = interval_tree_iter_first(&targetdata_root, page, page);
932     if (!n) {
933         /*
934          * See util/interval-tree.c re lockless lookups: no false positives
935          * but there are false negatives.  If we find nothing, retry with
936          * the mmap lock acquired.  We also need the lock for the
937          * allocation + insert.
938          */
939         mmap_lock();
940         n = interval_tree_iter_first(&targetdata_root, page, page);
941         if (!n) {
942             t = g_malloc0(sizeof(TargetPageDataNode)
943                           + TPD_PAGES * TARGET_PAGE_DATA_SIZE);
944             n = &t->itree;
945             n->start = region;
946             n->last = region | ~TBD_MASK;
947             interval_tree_insert(n, &targetdata_root);
948         }
949         mmap_unlock();
950     }
951 
952     t = container_of(n, TargetPageDataNode, itree);
953     p_ofs = (page - region) >> TARGET_PAGE_BITS;
954     return t->data + p_ofs * TARGET_PAGE_DATA_SIZE;
955 }
956 #else
957 void page_reset_target_data(vaddr start, vaddr last) { }
958 #endif /* TARGET_PAGE_DATA_SIZE */
959 
960 /* The system-mode versions of these helpers are in cputlb.c.  */
961 
962 static void *cpu_mmu_lookup(CPUState *cpu, vaddr addr,
963                             MemOp mop, uintptr_t ra, MMUAccessType type)
964 {
965     int a_bits = memop_alignment_bits(mop);
966     void *ret;
967 
968     /* Enforce guest required alignment.  */
969     if (unlikely(addr & ((1 << a_bits) - 1))) {
970         cpu_loop_exit_sigbus(cpu, addr, type, ra);
971     }
972 
973     ret = g2h(cpu, addr);
974     set_helper_retaddr(ra);
975     return ret;
976 }
977 
978 /* physical memory access (slow version, mainly for debug) */
979 int cpu_memory_rw_debug(CPUState *cpu, vaddr addr,
980                         void *ptr, size_t len, bool is_write)
981 {
982     int flags;
983     vaddr l, page;
984     uint8_t *buf = ptr;
985     ssize_t written;
986     int ret = -1;
987     int fd = -1;
988 
989     mmap_lock();
990 
991     while (len > 0) {
992         page = addr & TARGET_PAGE_MASK;
993         l = (page + TARGET_PAGE_SIZE) - addr;
994         if (l > len) {
995             l = len;
996         }
997         flags = page_get_flags(page);
998         if (!(flags & PAGE_VALID)) {
999             goto out_close;
1000         }
1001         if (is_write) {
1002             if (flags & PAGE_WRITE) {
1003                 memcpy(g2h(cpu, addr), buf, l);
1004             } else {
1005                 /* Bypass the host page protection using ptrace. */
1006                 if (fd == -1) {
1007                     fd = open("/proc/self/mem", O_WRONLY);
1008                     if (fd == -1) {
1009                         goto out;
1010                     }
1011                 }
1012                 /*
1013                  * If there is a TranslationBlock and we weren't bypassing the
1014                  * host page protection, the memcpy() above would SEGV,
1015                  * ultimately leading to page_unprotect(). So invalidate the
1016                  * translations manually. Both invalidation and pwrite() must
1017                  * be under mmap_lock() in order to prevent the creation of
1018                  * another TranslationBlock in between.
1019                  */
1020                 tb_invalidate_phys_range(NULL, addr, addr + l - 1);
1021                 written = pwrite(fd, buf, l,
1022                                  (off_t)(uintptr_t)g2h_untagged(addr));
1023                 if (written != l) {
1024                     goto out_close;
1025                 }
1026             }
1027         } else if (flags & PAGE_READ) {
1028             memcpy(buf, g2h(cpu, addr), l);
1029         } else {
1030             /* Bypass the host page protection using ptrace. */
1031             if (fd == -1) {
1032                 fd = open("/proc/self/mem", O_RDONLY);
1033                 if (fd == -1) {
1034                     goto out;
1035                 }
1036             }
1037             if (pread(fd, buf, l,
1038                       (off_t)(uintptr_t)g2h_untagged(addr)) != l) {
1039                 goto out_close;
1040             }
1041         }
1042         len -= l;
1043         buf += l;
1044         addr += l;
1045     }
1046     ret = 0;
1047 out_close:
1048     if (fd != -1) {
1049         close(fd);
1050     }
1051 out:
1052     mmap_unlock();
1053 
1054     return ret;
1055 }
1056 
1057 #include "ldst_atomicity.c.inc"
1058 
1059 static uint8_t do_ld1_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
1060                           uintptr_t ra, MMUAccessType access_type)
1061 {
1062     void *haddr;
1063     uint8_t ret;
1064 
1065     cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD);
1066     haddr = cpu_mmu_lookup(cpu, addr, get_memop(oi), ra, access_type);
1067     ret = ldub_p(haddr);
1068     clear_helper_retaddr();
1069     return ret;
1070 }
1071 
1072 static uint16_t do_ld2_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
1073                            uintptr_t ra, MMUAccessType access_type)
1074 {
1075     void *haddr;
1076     uint16_t ret;
1077     MemOp mop = get_memop(oi);
1078 
1079     cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD);
1080     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type);
1081     ret = load_atom_2(cpu, ra, haddr, mop);
1082     clear_helper_retaddr();
1083 
1084     if (mop & MO_BSWAP) {
1085         ret = bswap16(ret);
1086     }
1087     return ret;
1088 }
1089 
1090 static uint32_t do_ld4_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
1091                            uintptr_t ra, MMUAccessType access_type)
1092 {
1093     void *haddr;
1094     uint32_t ret;
1095     MemOp mop = get_memop(oi);
1096 
1097     cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD);
1098     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type);
1099     ret = load_atom_4(cpu, ra, haddr, mop);
1100     clear_helper_retaddr();
1101 
1102     if (mop & MO_BSWAP) {
1103         ret = bswap32(ret);
1104     }
1105     return ret;
1106 }
1107 
1108 static uint64_t do_ld8_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
1109                            uintptr_t ra, MMUAccessType access_type)
1110 {
1111     void *haddr;
1112     uint64_t ret;
1113     MemOp mop = get_memop(oi);
1114 
1115     cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD);
1116     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type);
1117     ret = load_atom_8(cpu, ra, haddr, mop);
1118     clear_helper_retaddr();
1119 
1120     if (mop & MO_BSWAP) {
1121         ret = bswap64(ret);
1122     }
1123     return ret;
1124 }
1125 
1126 static Int128 do_ld16_mmu(CPUState *cpu, abi_ptr addr,
1127                           MemOpIdx oi, uintptr_t ra)
1128 {
1129     void *haddr;
1130     Int128 ret;
1131     MemOp mop = get_memop(oi);
1132 
1133     tcg_debug_assert((mop & MO_SIZE) == MO_128);
1134     cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD);
1135     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_LOAD);
1136     ret = load_atom_16(cpu, ra, haddr, mop);
1137     clear_helper_retaddr();
1138 
1139     if (mop & MO_BSWAP) {
1140         ret = bswap128(ret);
1141     }
1142     return ret;
1143 }
1144 
1145 static void do_st1_mmu(CPUState *cpu, vaddr addr, uint8_t val,
1146                        MemOpIdx oi, uintptr_t ra)
1147 {
1148     void *haddr;
1149 
1150     cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST);
1151     haddr = cpu_mmu_lookup(cpu, addr, get_memop(oi), ra, MMU_DATA_STORE);
1152     stb_p(haddr, val);
1153     clear_helper_retaddr();
1154 }
1155 
1156 static void do_st2_mmu(CPUState *cpu, vaddr addr, uint16_t val,
1157                        MemOpIdx oi, uintptr_t ra)
1158 {
1159     void *haddr;
1160     MemOp mop = get_memop(oi);
1161 
1162     cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST);
1163     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1164 
1165     if (mop & MO_BSWAP) {
1166         val = bswap16(val);
1167     }
1168     store_atom_2(cpu, ra, haddr, mop, val);
1169     clear_helper_retaddr();
1170 }
1171 
1172 static void do_st4_mmu(CPUState *cpu, vaddr addr, uint32_t val,
1173                        MemOpIdx oi, uintptr_t ra)
1174 {
1175     void *haddr;
1176     MemOp mop = get_memop(oi);
1177 
1178     cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST);
1179     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1180 
1181     if (mop & MO_BSWAP) {
1182         val = bswap32(val);
1183     }
1184     store_atom_4(cpu, ra, haddr, mop, val);
1185     clear_helper_retaddr();
1186 }
1187 
1188 static void do_st8_mmu(CPUState *cpu, vaddr addr, uint64_t val,
1189                        MemOpIdx oi, uintptr_t ra)
1190 {
1191     void *haddr;
1192     MemOp mop = get_memop(oi);
1193 
1194     cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST);
1195     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1196 
1197     if (mop & MO_BSWAP) {
1198         val = bswap64(val);
1199     }
1200     store_atom_8(cpu, ra, haddr, mop, val);
1201     clear_helper_retaddr();
1202 }
1203 
1204 static void do_st16_mmu(CPUState *cpu, vaddr addr, Int128 val,
1205                         MemOpIdx oi, uintptr_t ra)
1206 {
1207     void *haddr;
1208     MemOpIdx mop = get_memop(oi);
1209 
1210     cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST);
1211     haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1212 
1213     if (mop & MO_BSWAP) {
1214         val = bswap128(val);
1215     }
1216     store_atom_16(cpu, ra, haddr, mop, val);
1217     clear_helper_retaddr();
1218 }
1219 
1220 uint8_t cpu_ldb_code_mmu(CPUArchState *env, vaddr addr,
1221                          MemOpIdx oi, uintptr_t ra)
1222 {
1223     return do_ld1_mmu(env_cpu(env), addr, oi, ra ? ra : 1, MMU_INST_FETCH);
1224 }
1225 
1226 uint16_t cpu_ldw_code_mmu(CPUArchState *env, vaddr addr,
1227                           MemOpIdx oi, uintptr_t ra)
1228 {
1229     return do_ld2_mmu(env_cpu(env), addr, oi, ra ? ra : 1, MMU_INST_FETCH);
1230 }
1231 
1232 uint32_t cpu_ldl_code_mmu(CPUArchState *env, vaddr addr,
1233                           MemOpIdx oi, uintptr_t ra)
1234 {
1235     return do_ld4_mmu(env_cpu(env), addr, oi, ra ? ra : 1, MMU_INST_FETCH);
1236 }
1237 
1238 uint64_t cpu_ldq_code_mmu(CPUArchState *env, vaddr addr,
1239                           MemOpIdx oi, uintptr_t ra)
1240 {
1241     return do_ld8_mmu(env_cpu(env), addr, oi, ra ? ra : 1, MMU_INST_FETCH);
1242 }
1243 
1244 #include "ldst_common.c.inc"
1245 
1246 /*
1247  * Do not allow unaligned operations to proceed.  Return the host address.
1248  */
1249 static void *atomic_mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi,
1250                                int size, uintptr_t retaddr)
1251 {
1252     MemOp mop = get_memop(oi);
1253     int a_bits = memop_alignment_bits(mop);
1254     void *ret;
1255 
1256     /* Enforce guest required alignment.  */
1257     if (unlikely(addr & ((1 << a_bits) - 1))) {
1258         cpu_loop_exit_sigbus(cpu, addr, MMU_DATA_STORE, retaddr);
1259     }
1260 
1261     /* Enforce qemu required alignment.  */
1262     if (unlikely(addr & (size - 1))) {
1263         cpu_loop_exit_atomic(cpu, retaddr);
1264     }
1265 
1266     ret = g2h(cpu, addr);
1267     set_helper_retaddr(retaddr);
1268     return ret;
1269 }
1270 
1271 #include "atomic_common.c.inc"
1272 
1273 /*
1274  * First set of functions passes in OI and RETADDR.
1275  * This makes them callable from other helpers.
1276  */
1277 
1278 #define ATOMIC_NAME(X) \
1279     glue(glue(glue(cpu_atomic_ ## X, SUFFIX), END), _mmu)
1280 #define ATOMIC_MMU_CLEANUP do { clear_helper_retaddr(); } while (0)
1281 
1282 #define DATA_SIZE 1
1283 #include "atomic_template.h"
1284 
1285 #define DATA_SIZE 2
1286 #include "atomic_template.h"
1287 
1288 #define DATA_SIZE 4
1289 #include "atomic_template.h"
1290 
1291 #ifdef CONFIG_ATOMIC64
1292 #define DATA_SIZE 8
1293 #include "atomic_template.h"
1294 #endif
1295 
1296 #if defined(CONFIG_ATOMIC128) || HAVE_CMPXCHG128
1297 #define DATA_SIZE 16
1298 #include "atomic_template.h"
1299 #endif
1300