xref: /qemu/accel/tcg/cputlb.c (revision 342e313d6c1a8e6da758bd642777b85af1a0fc37)
1 /*
2  *  Common CPU TLB handling
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "qemu/main-loop.h"
22 #include "accel/tcg/cpu-ops.h"
23 #include "exec/exec-all.h"
24 #include "exec/page-protection.h"
25 #include "system/memory.h"
26 #include "exec/cpu_ldst.h"
27 #include "exec/cputlb.h"
28 #include "exec/tb-flush.h"
29 #include "system/ram_addr.h"
30 #include "exec/mmu-access-type.h"
31 #include "exec/tlb-common.h"
32 #include "exec/vaddr.h"
33 #include "tcg/tcg.h"
34 #include "qemu/error-report.h"
35 #include "exec/log.h"
36 #include "exec/helper-proto-common.h"
37 #include "exec/tlb-flags.h"
38 #include "qemu/atomic.h"
39 #include "qemu/atomic128.h"
40 #include "tb-internal.h"
41 #include "trace.h"
42 #include "tb-hash.h"
43 #include "tb-internal.h"
44 #include "tlb-bounds.h"
45 #include "internal-common.h"
46 #include "internal-target.h"
47 #ifdef CONFIG_PLUGIN
48 #include "qemu/plugin-memory.h"
49 #endif
50 #include "tcg/tcg-ldst.h"
51 
52 
53 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
54 /* #define DEBUG_TLB */
55 /* #define DEBUG_TLB_LOG */
56 
57 #ifdef DEBUG_TLB
58 # define DEBUG_TLB_GATE 1
59 # ifdef DEBUG_TLB_LOG
60 #  define DEBUG_TLB_LOG_GATE 1
61 # else
62 #  define DEBUG_TLB_LOG_GATE 0
63 # endif
64 #else
65 # define DEBUG_TLB_GATE 0
66 # define DEBUG_TLB_LOG_GATE 0
67 #endif
68 
69 #define tlb_debug(fmt, ...) do { \
70     if (DEBUG_TLB_LOG_GATE) { \
71         qemu_log_mask(CPU_LOG_MMU, "%s: " fmt, __func__, \
72                       ## __VA_ARGS__); \
73     } else if (DEBUG_TLB_GATE) { \
74         fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \
75     } \
76 } while (0)
77 
78 #define assert_cpu_is_self(cpu) do {                              \
79         if (DEBUG_TLB_GATE) {                                     \
80             g_assert(!(cpu)->created || qemu_cpu_is_self(cpu));   \
81         }                                                         \
82     } while (0)
83 
84 /* run_on_cpu_data.target_ptr should always be big enough for a
85  * vaddr even on 32 bit builds
86  */
87 QEMU_BUILD_BUG_ON(sizeof(vaddr) > sizeof(run_on_cpu_data));
88 
89 /* We currently can't handle more than 16 bits in the MMUIDX bitmask.
90  */
91 QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
92 #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
93 
94 static inline size_t tlb_n_entries(CPUTLBDescFast *fast)
95 {
96     return (fast->mask >> CPU_TLB_ENTRY_BITS) + 1;
97 }
98 
99 static inline size_t sizeof_tlb(CPUTLBDescFast *fast)
100 {
101     return fast->mask + (1 << CPU_TLB_ENTRY_BITS);
102 }
103 
104 static inline uint64_t tlb_read_idx(const CPUTLBEntry *entry,
105                                     MMUAccessType access_type)
106 {
107     /* Do not rearrange the CPUTLBEntry structure members. */
108     QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_read) !=
109                       MMU_DATA_LOAD * sizeof(uintptr_t));
110     QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_write) !=
111                       MMU_DATA_STORE * sizeof(uintptr_t));
112     QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_code) !=
113                       MMU_INST_FETCH * sizeof(uintptr_t));
114 
115     const uintptr_t *ptr = &entry->addr_idx[access_type];
116     /* ofs might correspond to .addr_write, so use qatomic_read */
117     return qatomic_read(ptr);
118 }
119 
120 static inline uint64_t tlb_addr_write(const CPUTLBEntry *entry)
121 {
122     return tlb_read_idx(entry, MMU_DATA_STORE);
123 }
124 
125 /* Find the TLB index corresponding to the mmu_idx + address pair.  */
126 static inline uintptr_t tlb_index(CPUState *cpu, uintptr_t mmu_idx,
127                                   vaddr addr)
128 {
129     uintptr_t size_mask = cpu->neg.tlb.f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS;
130 
131     return (addr >> TARGET_PAGE_BITS) & size_mask;
132 }
133 
134 /* Find the TLB entry corresponding to the mmu_idx + address pair.  */
135 static inline CPUTLBEntry *tlb_entry(CPUState *cpu, uintptr_t mmu_idx,
136                                      vaddr addr)
137 {
138     return &cpu->neg.tlb.f[mmu_idx].table[tlb_index(cpu, mmu_idx, addr)];
139 }
140 
141 static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
142                              size_t max_entries)
143 {
144     desc->window_begin_ns = ns;
145     desc->window_max_entries = max_entries;
146 }
147 
148 static void tb_jmp_cache_clear_page(CPUState *cpu, vaddr page_addr)
149 {
150     CPUJumpCache *jc = cpu->tb_jmp_cache;
151     int i, i0;
152 
153     if (unlikely(!jc)) {
154         return;
155     }
156 
157     i0 = tb_jmp_cache_hash_page(page_addr);
158     for (i = 0; i < TB_JMP_PAGE_SIZE; i++) {
159         qatomic_set(&jc->array[i0 + i].tb, NULL);
160     }
161 }
162 
163 /**
164  * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary
165  * @desc: The CPUTLBDesc portion of the TLB
166  * @fast: The CPUTLBDescFast portion of the same TLB
167  *
168  * Called with tlb_lock_held.
169  *
170  * We have two main constraints when resizing a TLB: (1) we only resize it
171  * on a TLB flush (otherwise we'd have to take a perf hit by either rehashing
172  * the array or unnecessarily flushing it), which means we do not control how
173  * frequently the resizing can occur; (2) we don't have access to the guest's
174  * future scheduling decisions, and therefore have to decide the magnitude of
175  * the resize based on past observations.
176  *
177  * In general, a memory-hungry process can benefit greatly from an appropriately
178  * sized TLB, since a guest TLB miss is very expensive. This doesn't mean that
179  * we just have to make the TLB as large as possible; while an oversized TLB
180  * results in minimal TLB miss rates, it also takes longer to be flushed
181  * (flushes can be _very_ frequent), and the reduced locality can also hurt
182  * performance.
183  *
184  * To achieve near-optimal performance for all kinds of workloads, we:
185  *
186  * 1. Aggressively increase the size of the TLB when the use rate of the
187  * TLB being flushed is high, since it is likely that in the near future this
188  * memory-hungry process will execute again, and its memory hungriness will
189  * probably be similar.
190  *
191  * 2. Slowly reduce the size of the TLB as the use rate declines over a
192  * reasonably large time window. The rationale is that if in such a time window
193  * we have not observed a high TLB use rate, it is likely that we won't observe
194  * it in the near future. In that case, once a time window expires we downsize
195  * the TLB to match the maximum use rate observed in the window.
196  *
197  * 3. Try to keep the maximum use rate in a time window in the 30-70% range,
198  * since in that range performance is likely near-optimal. Recall that the TLB
199  * is direct mapped, so we want the use rate to be low (or at least not too
200  * high), since otherwise we are likely to have a significant amount of
201  * conflict misses.
202  */
203 static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast,
204                                   int64_t now)
205 {
206     size_t old_size = tlb_n_entries(fast);
207     size_t rate;
208     size_t new_size = old_size;
209     int64_t window_len_ms = 100;
210     int64_t window_len_ns = window_len_ms * 1000 * 1000;
211     bool window_expired = now > desc->window_begin_ns + window_len_ns;
212 
213     if (desc->n_used_entries > desc->window_max_entries) {
214         desc->window_max_entries = desc->n_used_entries;
215     }
216     rate = desc->window_max_entries * 100 / old_size;
217 
218     if (rate > 70) {
219         new_size = MIN(old_size << 1, 1 << CPU_TLB_DYN_MAX_BITS);
220     } else if (rate < 30 && window_expired) {
221         size_t ceil = pow2ceil(desc->window_max_entries);
222         size_t expected_rate = desc->window_max_entries * 100 / ceil;
223 
224         /*
225          * Avoid undersizing when the max number of entries seen is just below
226          * a pow2. For instance, if max_entries == 1025, the expected use rate
227          * would be 1025/2048==50%. However, if max_entries == 1023, we'd get
228          * 1023/1024==99.9% use rate, so we'd likely end up doubling the size
229          * later. Thus, make sure that the expected use rate remains below 70%.
230          * (and since we double the size, that means the lowest rate we'd
231          * expect to get is 35%, which is still in the 30-70% range where
232          * we consider that the size is appropriate.)
233          */
234         if (expected_rate > 70) {
235             ceil *= 2;
236         }
237         new_size = MAX(ceil, 1 << CPU_TLB_DYN_MIN_BITS);
238     }
239 
240     if (new_size == old_size) {
241         if (window_expired) {
242             tlb_window_reset(desc, now, desc->n_used_entries);
243         }
244         return;
245     }
246 
247     g_free(fast->table);
248     g_free(desc->fulltlb);
249 
250     tlb_window_reset(desc, now, 0);
251     /* desc->n_used_entries is cleared by the caller */
252     fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
253     fast->table = g_try_new(CPUTLBEntry, new_size);
254     desc->fulltlb = g_try_new(CPUTLBEntryFull, new_size);
255 
256     /*
257      * If the allocations fail, try smaller sizes. We just freed some
258      * memory, so going back to half of new_size has a good chance of working.
259      * Increased memory pressure elsewhere in the system might cause the
260      * allocations to fail though, so we progressively reduce the allocation
261      * size, aborting if we cannot even allocate the smallest TLB we support.
262      */
263     while (fast->table == NULL || desc->fulltlb == NULL) {
264         if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) {
265             error_report("%s: %s", __func__, strerror(errno));
266             abort();
267         }
268         new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS);
269         fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
270 
271         g_free(fast->table);
272         g_free(desc->fulltlb);
273         fast->table = g_try_new(CPUTLBEntry, new_size);
274         desc->fulltlb = g_try_new(CPUTLBEntryFull, new_size);
275     }
276 }
277 
278 static void tlb_mmu_flush_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
279 {
280     desc->n_used_entries = 0;
281     desc->large_page_addr = -1;
282     desc->large_page_mask = -1;
283     desc->vindex = 0;
284     memset(fast->table, -1, sizeof_tlb(fast));
285     memset(desc->vtable, -1, sizeof(desc->vtable));
286 }
287 
288 static void tlb_flush_one_mmuidx_locked(CPUState *cpu, int mmu_idx,
289                                         int64_t now)
290 {
291     CPUTLBDesc *desc = &cpu->neg.tlb.d[mmu_idx];
292     CPUTLBDescFast *fast = &cpu->neg.tlb.f[mmu_idx];
293 
294     tlb_mmu_resize_locked(desc, fast, now);
295     tlb_mmu_flush_locked(desc, fast);
296 }
297 
298 static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now)
299 {
300     size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS;
301 
302     tlb_window_reset(desc, now, 0);
303     desc->n_used_entries = 0;
304     fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
305     fast->table = g_new(CPUTLBEntry, n_entries);
306     desc->fulltlb = g_new(CPUTLBEntryFull, n_entries);
307     tlb_mmu_flush_locked(desc, fast);
308 }
309 
310 static inline void tlb_n_used_entries_inc(CPUState *cpu, uintptr_t mmu_idx)
311 {
312     cpu->neg.tlb.d[mmu_idx].n_used_entries++;
313 }
314 
315 static inline void tlb_n_used_entries_dec(CPUState *cpu, uintptr_t mmu_idx)
316 {
317     cpu->neg.tlb.d[mmu_idx].n_used_entries--;
318 }
319 
320 void tlb_init(CPUState *cpu)
321 {
322     int64_t now = get_clock_realtime();
323     int i;
324 
325     qemu_spin_init(&cpu->neg.tlb.c.lock);
326 
327     /* All tlbs are initialized flushed. */
328     cpu->neg.tlb.c.dirty = 0;
329 
330     for (i = 0; i < NB_MMU_MODES; i++) {
331         tlb_mmu_init(&cpu->neg.tlb.d[i], &cpu->neg.tlb.f[i], now);
332     }
333 }
334 
335 void tlb_destroy(CPUState *cpu)
336 {
337     int i;
338 
339     qemu_spin_destroy(&cpu->neg.tlb.c.lock);
340     for (i = 0; i < NB_MMU_MODES; i++) {
341         CPUTLBDesc *desc = &cpu->neg.tlb.d[i];
342         CPUTLBDescFast *fast = &cpu->neg.tlb.f[i];
343 
344         g_free(fast->table);
345         g_free(desc->fulltlb);
346     }
347 }
348 
349 /* flush_all_helper: run fn across all cpus
350  *
351  * If the wait flag is set then the src cpu's helper will be queued as
352  * "safe" work and the loop exited creating a synchronisation point
353  * where all queued work will be finished before execution starts
354  * again.
355  */
356 static void flush_all_helper(CPUState *src, run_on_cpu_func fn,
357                              run_on_cpu_data d)
358 {
359     CPUState *cpu;
360 
361     CPU_FOREACH(cpu) {
362         if (cpu != src) {
363             async_run_on_cpu(cpu, fn, d);
364         }
365     }
366 }
367 
368 static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
369 {
370     uint16_t asked = data.host_int;
371     uint16_t all_dirty, work, to_clean;
372     int64_t now = get_clock_realtime();
373 
374     assert_cpu_is_self(cpu);
375 
376     tlb_debug("mmu_idx:0x%04" PRIx16 "\n", asked);
377 
378     qemu_spin_lock(&cpu->neg.tlb.c.lock);
379 
380     all_dirty = cpu->neg.tlb.c.dirty;
381     to_clean = asked & all_dirty;
382     all_dirty &= ~to_clean;
383     cpu->neg.tlb.c.dirty = all_dirty;
384 
385     for (work = to_clean; work != 0; work &= work - 1) {
386         int mmu_idx = ctz32(work);
387         tlb_flush_one_mmuidx_locked(cpu, mmu_idx, now);
388     }
389 
390     qemu_spin_unlock(&cpu->neg.tlb.c.lock);
391 
392     tcg_flush_jmp_cache(cpu);
393 
394     if (to_clean == ALL_MMUIDX_BITS) {
395         qatomic_set(&cpu->neg.tlb.c.full_flush_count,
396                     cpu->neg.tlb.c.full_flush_count + 1);
397     } else {
398         qatomic_set(&cpu->neg.tlb.c.part_flush_count,
399                     cpu->neg.tlb.c.part_flush_count + ctpop16(to_clean));
400         if (to_clean != asked) {
401             qatomic_set(&cpu->neg.tlb.c.elide_flush_count,
402                         cpu->neg.tlb.c.elide_flush_count +
403                         ctpop16(asked & ~to_clean));
404         }
405     }
406 }
407 
408 void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
409 {
410     tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap);
411 
412     assert_cpu_is_self(cpu);
413 
414     tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(idxmap));
415 }
416 
417 void tlb_flush(CPUState *cpu)
418 {
419     tlb_flush_by_mmuidx(cpu, ALL_MMUIDX_BITS);
420 }
421 
422 void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu, uint16_t idxmap)
423 {
424     const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
425 
426     tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
427 
428     flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
429     async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
430 }
431 
432 void tlb_flush_all_cpus_synced(CPUState *src_cpu)
433 {
434     tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, ALL_MMUIDX_BITS);
435 }
436 
437 static bool tlb_hit_page_mask_anyprot(CPUTLBEntry *tlb_entry,
438                                       vaddr page, vaddr mask)
439 {
440     page &= mask;
441     mask &= TARGET_PAGE_MASK | TLB_INVALID_MASK;
442 
443     return (page == (tlb_entry->addr_read & mask) ||
444             page == (tlb_addr_write(tlb_entry) & mask) ||
445             page == (tlb_entry->addr_code & mask));
446 }
447 
448 static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry, vaddr page)
449 {
450     return tlb_hit_page_mask_anyprot(tlb_entry, page, -1);
451 }
452 
453 /**
454  * tlb_entry_is_empty - return true if the entry is not in use
455  * @te: pointer to CPUTLBEntry
456  */
457 static inline bool tlb_entry_is_empty(const CPUTLBEntry *te)
458 {
459     return te->addr_read == -1 && te->addr_write == -1 && te->addr_code == -1;
460 }
461 
462 /* Called with tlb_c.lock held */
463 static bool tlb_flush_entry_mask_locked(CPUTLBEntry *tlb_entry,
464                                         vaddr page,
465                                         vaddr mask)
466 {
467     if (tlb_hit_page_mask_anyprot(tlb_entry, page, mask)) {
468         memset(tlb_entry, -1, sizeof(*tlb_entry));
469         return true;
470     }
471     return false;
472 }
473 
474 static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry, vaddr page)
475 {
476     return tlb_flush_entry_mask_locked(tlb_entry, page, -1);
477 }
478 
479 /* Called with tlb_c.lock held */
480 static void tlb_flush_vtlb_page_mask_locked(CPUState *cpu, int mmu_idx,
481                                             vaddr page,
482                                             vaddr mask)
483 {
484     CPUTLBDesc *d = &cpu->neg.tlb.d[mmu_idx];
485     int k;
486 
487     assert_cpu_is_self(cpu);
488     for (k = 0; k < CPU_VTLB_SIZE; k++) {
489         if (tlb_flush_entry_mask_locked(&d->vtable[k], page, mask)) {
490             tlb_n_used_entries_dec(cpu, mmu_idx);
491         }
492     }
493 }
494 
495 static inline void tlb_flush_vtlb_page_locked(CPUState *cpu, int mmu_idx,
496                                               vaddr page)
497 {
498     tlb_flush_vtlb_page_mask_locked(cpu, mmu_idx, page, -1);
499 }
500 
501 static void tlb_flush_page_locked(CPUState *cpu, int midx, vaddr page)
502 {
503     vaddr lp_addr = cpu->neg.tlb.d[midx].large_page_addr;
504     vaddr lp_mask = cpu->neg.tlb.d[midx].large_page_mask;
505 
506     /* Check if we need to flush due to large pages.  */
507     if ((page & lp_mask) == lp_addr) {
508         tlb_debug("forcing full flush midx %d (%016"
509                   VADDR_PRIx "/%016" VADDR_PRIx ")\n",
510                   midx, lp_addr, lp_mask);
511         tlb_flush_one_mmuidx_locked(cpu, midx, get_clock_realtime());
512     } else {
513         if (tlb_flush_entry_locked(tlb_entry(cpu, midx, page), page)) {
514             tlb_n_used_entries_dec(cpu, midx);
515         }
516         tlb_flush_vtlb_page_locked(cpu, midx, page);
517     }
518 }
519 
520 /**
521  * tlb_flush_page_by_mmuidx_async_0:
522  * @cpu: cpu on which to flush
523  * @addr: page of virtual address to flush
524  * @idxmap: set of mmu_idx to flush
525  *
526  * Helper for tlb_flush_page_by_mmuidx and friends, flush one page
527  * at @addr from the tlbs indicated by @idxmap from @cpu.
528  */
529 static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu,
530                                              vaddr addr,
531                                              uint16_t idxmap)
532 {
533     int mmu_idx;
534 
535     assert_cpu_is_self(cpu);
536 
537     tlb_debug("page addr: %016" VADDR_PRIx " mmu_map:0x%x\n", addr, idxmap);
538 
539     qemu_spin_lock(&cpu->neg.tlb.c.lock);
540     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
541         if ((idxmap >> mmu_idx) & 1) {
542             tlb_flush_page_locked(cpu, mmu_idx, addr);
543         }
544     }
545     qemu_spin_unlock(&cpu->neg.tlb.c.lock);
546 
547     /*
548      * Discard jump cache entries for any tb which might potentially
549      * overlap the flushed page, which includes the previous.
550      */
551     tb_jmp_cache_clear_page(cpu, addr - TARGET_PAGE_SIZE);
552     tb_jmp_cache_clear_page(cpu, addr);
553 }
554 
555 /**
556  * tlb_flush_page_by_mmuidx_async_1:
557  * @cpu: cpu on which to flush
558  * @data: encoded addr + idxmap
559  *
560  * Helper for tlb_flush_page_by_mmuidx and friends, called through
561  * async_run_on_cpu.  The idxmap parameter is encoded in the page
562  * offset of the target_ptr field.  This limits the set of mmu_idx
563  * that can be passed via this method.
564  */
565 static void tlb_flush_page_by_mmuidx_async_1(CPUState *cpu,
566                                              run_on_cpu_data data)
567 {
568     vaddr addr_and_idxmap = data.target_ptr;
569     vaddr addr = addr_and_idxmap & TARGET_PAGE_MASK;
570     uint16_t idxmap = addr_and_idxmap & ~TARGET_PAGE_MASK;
571 
572     tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
573 }
574 
575 typedef struct {
576     vaddr addr;
577     uint16_t idxmap;
578 } TLBFlushPageByMMUIdxData;
579 
580 /**
581  * tlb_flush_page_by_mmuidx_async_2:
582  * @cpu: cpu on which to flush
583  * @data: allocated addr + idxmap
584  *
585  * Helper for tlb_flush_page_by_mmuidx and friends, called through
586  * async_run_on_cpu.  The addr+idxmap parameters are stored in a
587  * TLBFlushPageByMMUIdxData structure that has been allocated
588  * specifically for this helper.  Free the structure when done.
589  */
590 static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu,
591                                              run_on_cpu_data data)
592 {
593     TLBFlushPageByMMUIdxData *d = data.host_ptr;
594 
595     tlb_flush_page_by_mmuidx_async_0(cpu, d->addr, d->idxmap);
596     g_free(d);
597 }
598 
599 void tlb_flush_page_by_mmuidx(CPUState *cpu, vaddr addr, uint16_t idxmap)
600 {
601     tlb_debug("addr: %016" VADDR_PRIx " mmu_idx:%" PRIx16 "\n", addr, idxmap);
602 
603     assert_cpu_is_self(cpu);
604 
605     /* This should already be page aligned */
606     addr &= TARGET_PAGE_MASK;
607 
608     tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
609 }
610 
611 void tlb_flush_page(CPUState *cpu, vaddr addr)
612 {
613     tlb_flush_page_by_mmuidx(cpu, addr, ALL_MMUIDX_BITS);
614 }
615 
616 void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
617                                               vaddr addr,
618                                               uint16_t idxmap)
619 {
620     tlb_debug("addr: %016" VADDR_PRIx " mmu_idx:%"PRIx16"\n", addr, idxmap);
621 
622     /* This should already be page aligned */
623     addr &= TARGET_PAGE_MASK;
624 
625     /*
626      * Allocate memory to hold addr+idxmap only when needed.
627      * See tlb_flush_page_by_mmuidx for details.
628      */
629     if (idxmap < TARGET_PAGE_SIZE) {
630         flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1,
631                          RUN_ON_CPU_TARGET_PTR(addr | idxmap));
632         async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_1,
633                               RUN_ON_CPU_TARGET_PTR(addr | idxmap));
634     } else {
635         CPUState *dst_cpu;
636         TLBFlushPageByMMUIdxData *d;
637 
638         /* Allocate a separate data block for each destination cpu.  */
639         CPU_FOREACH(dst_cpu) {
640             if (dst_cpu != src_cpu) {
641                 d = g_new(TLBFlushPageByMMUIdxData, 1);
642                 d->addr = addr;
643                 d->idxmap = idxmap;
644                 async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2,
645                                  RUN_ON_CPU_HOST_PTR(d));
646             }
647         }
648 
649         d = g_new(TLBFlushPageByMMUIdxData, 1);
650         d->addr = addr;
651         d->idxmap = idxmap;
652         async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_2,
653                               RUN_ON_CPU_HOST_PTR(d));
654     }
655 }
656 
657 void tlb_flush_page_all_cpus_synced(CPUState *src, vaddr addr)
658 {
659     tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS);
660 }
661 
662 static void tlb_flush_range_locked(CPUState *cpu, int midx,
663                                    vaddr addr, vaddr len,
664                                    unsigned bits)
665 {
666     CPUTLBDesc *d = &cpu->neg.tlb.d[midx];
667     CPUTLBDescFast *f = &cpu->neg.tlb.f[midx];
668     vaddr mask = MAKE_64BIT_MASK(0, bits);
669 
670     /*
671      * If @bits is smaller than the tlb size, there may be multiple entries
672      * within the TLB; otherwise all addresses that match under @mask hit
673      * the same TLB entry.
674      * TODO: Perhaps allow bits to be a few bits less than the size.
675      * For now, just flush the entire TLB.
676      *
677      * If @len is larger than the tlb size, then it will take longer to
678      * test all of the entries in the TLB than it will to flush it all.
679      */
680     if (mask < f->mask || len > f->mask) {
681         tlb_debug("forcing full flush midx %d ("
682                   "%016" VADDR_PRIx "/%016" VADDR_PRIx "+%016" VADDR_PRIx ")\n",
683                   midx, addr, mask, len);
684         tlb_flush_one_mmuidx_locked(cpu, midx, get_clock_realtime());
685         return;
686     }
687 
688     /*
689      * Check if we need to flush due to large pages.
690      * Because large_page_mask contains all 1's from the msb,
691      * we only need to test the end of the range.
692      */
693     if (((addr + len - 1) & d->large_page_mask) == d->large_page_addr) {
694         tlb_debug("forcing full flush midx %d ("
695                   "%016" VADDR_PRIx "/%016" VADDR_PRIx ")\n",
696                   midx, d->large_page_addr, d->large_page_mask);
697         tlb_flush_one_mmuidx_locked(cpu, midx, get_clock_realtime());
698         return;
699     }
700 
701     for (vaddr i = 0; i < len; i += TARGET_PAGE_SIZE) {
702         vaddr page = addr + i;
703         CPUTLBEntry *entry = tlb_entry(cpu, midx, page);
704 
705         if (tlb_flush_entry_mask_locked(entry, page, mask)) {
706             tlb_n_used_entries_dec(cpu, midx);
707         }
708         tlb_flush_vtlb_page_mask_locked(cpu, midx, page, mask);
709     }
710 }
711 
712 typedef struct {
713     vaddr addr;
714     vaddr len;
715     uint16_t idxmap;
716     uint16_t bits;
717 } TLBFlushRangeData;
718 
719 static void tlb_flush_range_by_mmuidx_async_0(CPUState *cpu,
720                                               TLBFlushRangeData d)
721 {
722     int mmu_idx;
723 
724     assert_cpu_is_self(cpu);
725 
726     tlb_debug("range: %016" VADDR_PRIx "/%u+%016" VADDR_PRIx " mmu_map:0x%x\n",
727               d.addr, d.bits, d.len, d.idxmap);
728 
729     qemu_spin_lock(&cpu->neg.tlb.c.lock);
730     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
731         if ((d.idxmap >> mmu_idx) & 1) {
732             tlb_flush_range_locked(cpu, mmu_idx, d.addr, d.len, d.bits);
733         }
734     }
735     qemu_spin_unlock(&cpu->neg.tlb.c.lock);
736 
737     /*
738      * If the length is larger than the jump cache size, then it will take
739      * longer to clear each entry individually than it will to clear it all.
740      */
741     if (d.len >= (TARGET_PAGE_SIZE * TB_JMP_CACHE_SIZE)) {
742         tcg_flush_jmp_cache(cpu);
743         return;
744     }
745 
746     /*
747      * Discard jump cache entries for any tb which might potentially
748      * overlap the flushed pages, which includes the previous.
749      */
750     d.addr -= TARGET_PAGE_SIZE;
751     for (vaddr i = 0, n = d.len / TARGET_PAGE_SIZE + 1; i < n; i++) {
752         tb_jmp_cache_clear_page(cpu, d.addr);
753         d.addr += TARGET_PAGE_SIZE;
754     }
755 }
756 
757 static void tlb_flush_range_by_mmuidx_async_1(CPUState *cpu,
758                                               run_on_cpu_data data)
759 {
760     TLBFlushRangeData *d = data.host_ptr;
761     tlb_flush_range_by_mmuidx_async_0(cpu, *d);
762     g_free(d);
763 }
764 
765 void tlb_flush_range_by_mmuidx(CPUState *cpu, vaddr addr,
766                                vaddr len, uint16_t idxmap,
767                                unsigned bits)
768 {
769     TLBFlushRangeData d;
770 
771     assert_cpu_is_self(cpu);
772 
773     /*
774      * If all bits are significant, and len is small,
775      * this devolves to tlb_flush_page.
776      */
777     if (bits >= TARGET_LONG_BITS && len <= TARGET_PAGE_SIZE) {
778         tlb_flush_page_by_mmuidx(cpu, addr, idxmap);
779         return;
780     }
781     /* If no page bits are significant, this devolves to tlb_flush. */
782     if (bits < TARGET_PAGE_BITS) {
783         tlb_flush_by_mmuidx(cpu, idxmap);
784         return;
785     }
786 
787     /* This should already be page aligned */
788     d.addr = addr & TARGET_PAGE_MASK;
789     d.len = len;
790     d.idxmap = idxmap;
791     d.bits = bits;
792 
793     tlb_flush_range_by_mmuidx_async_0(cpu, d);
794 }
795 
796 void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, vaddr addr,
797                                    uint16_t idxmap, unsigned bits)
798 {
799     tlb_flush_range_by_mmuidx(cpu, addr, TARGET_PAGE_SIZE, idxmap, bits);
800 }
801 
802 void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
803                                                vaddr addr,
804                                                vaddr len,
805                                                uint16_t idxmap,
806                                                unsigned bits)
807 {
808     TLBFlushRangeData d, *p;
809     CPUState *dst_cpu;
810 
811     /*
812      * If all bits are significant, and len is small,
813      * this devolves to tlb_flush_page.
814      */
815     if (bits >= TARGET_LONG_BITS && len <= TARGET_PAGE_SIZE) {
816         tlb_flush_page_by_mmuidx_all_cpus_synced(src_cpu, addr, idxmap);
817         return;
818     }
819     /* If no page bits are significant, this devolves to tlb_flush. */
820     if (bits < TARGET_PAGE_BITS) {
821         tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, idxmap);
822         return;
823     }
824 
825     /* This should already be page aligned */
826     d.addr = addr & TARGET_PAGE_MASK;
827     d.len = len;
828     d.idxmap = idxmap;
829     d.bits = bits;
830 
831     /* Allocate a separate data block for each destination cpu.  */
832     CPU_FOREACH(dst_cpu) {
833         if (dst_cpu != src_cpu) {
834             p = g_memdup(&d, sizeof(d));
835             async_run_on_cpu(dst_cpu, tlb_flush_range_by_mmuidx_async_1,
836                              RUN_ON_CPU_HOST_PTR(p));
837         }
838     }
839 
840     p = g_memdup(&d, sizeof(d));
841     async_safe_run_on_cpu(src_cpu, tlb_flush_range_by_mmuidx_async_1,
842                           RUN_ON_CPU_HOST_PTR(p));
843 }
844 
845 void tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
846                                                    vaddr addr,
847                                                    uint16_t idxmap,
848                                                    unsigned bits)
849 {
850     tlb_flush_range_by_mmuidx_all_cpus_synced(src_cpu, addr, TARGET_PAGE_SIZE,
851                                               idxmap, bits);
852 }
853 
854 /* update the TLBs so that writes to code in the virtual page 'addr'
855    can be detected */
856 void tlb_protect_code(ram_addr_t ram_addr)
857 {
858     cpu_physical_memory_test_and_clear_dirty(ram_addr & TARGET_PAGE_MASK,
859                                              TARGET_PAGE_SIZE,
860                                              DIRTY_MEMORY_CODE);
861 }
862 
863 /* update the TLB so that writes in physical page 'phys_addr' are no longer
864    tested for self modifying code */
865 void tlb_unprotect_code(ram_addr_t ram_addr)
866 {
867     cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE);
868 }
869 
870 
871 /*
872  * Dirty write flag handling
873  *
874  * When the TCG code writes to a location it looks up the address in
875  * the TLB and uses that data to compute the final address. If any of
876  * the lower bits of the address are set then the slow path is forced.
877  * There are a number of reasons to do this but for normal RAM the
878  * most usual is detecting writes to code regions which may invalidate
879  * generated code.
880  *
881  * Other vCPUs might be reading their TLBs during guest execution, so we update
882  * te->addr_write with qatomic_set. We don't need to worry about this for
883  * oversized guests as MTTCG is disabled for them.
884  *
885  * Called with tlb_c.lock held.
886  */
887 static void tlb_reset_dirty_range_locked(CPUTLBEntryFull *full, CPUTLBEntry *ent,
888                                          uintptr_t start, uintptr_t length)
889 {
890     const uintptr_t addr = ent->addr_write;
891     int flags = addr | full->slow_flags[MMU_DATA_STORE];
892 
893     flags &= TLB_INVALID_MASK | TLB_MMIO | TLB_DISCARD_WRITE | TLB_NOTDIRTY;
894     if (flags == 0) {
895         uintptr_t host = (addr & TARGET_PAGE_MASK) + ent->addend;
896         if ((host - start) < length) {
897             qatomic_set(&ent->addr_write, addr | TLB_NOTDIRTY);
898         }
899     }
900 }
901 
902 /*
903  * Called with tlb_c.lock held.
904  * Called only from the vCPU context, i.e. the TLB's owner thread.
905  */
906 static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s)
907 {
908     *d = *s;
909 }
910 
911 /* This is a cross vCPU call (i.e. another vCPU resetting the flags of
912  * the target vCPU).
913  * We must take tlb_c.lock to avoid racing with another vCPU update. The only
914  * thing actually updated is the target TLB entry ->addr_write flags.
915  */
916 void tlb_reset_dirty(CPUState *cpu, uintptr_t start, uintptr_t length)
917 {
918     int mmu_idx;
919 
920     qemu_spin_lock(&cpu->neg.tlb.c.lock);
921     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
922         CPUTLBDesc *desc = &cpu->neg.tlb.d[mmu_idx];
923         CPUTLBDescFast *fast = &cpu->neg.tlb.f[mmu_idx];
924         unsigned int n = tlb_n_entries(fast);
925         unsigned int i;
926 
927         for (i = 0; i < n; i++) {
928             tlb_reset_dirty_range_locked(&desc->fulltlb[i], &fast->table[i],
929                                          start, length);
930         }
931 
932         for (i = 0; i < CPU_VTLB_SIZE; i++) {
933             tlb_reset_dirty_range_locked(&desc->vfulltlb[i], &desc->vtable[i],
934                                          start, length);
935         }
936     }
937     qemu_spin_unlock(&cpu->neg.tlb.c.lock);
938 }
939 
940 /* Called with tlb_c.lock held */
941 static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry,
942                                          vaddr addr)
943 {
944     if (tlb_entry->addr_write == (addr | TLB_NOTDIRTY)) {
945         tlb_entry->addr_write = addr;
946     }
947 }
948 
949 /* update the TLB corresponding to virtual page vaddr
950    so that it is no longer dirty */
951 static void tlb_set_dirty(CPUState *cpu, vaddr addr)
952 {
953     int mmu_idx;
954 
955     assert_cpu_is_self(cpu);
956 
957     addr &= TARGET_PAGE_MASK;
958     qemu_spin_lock(&cpu->neg.tlb.c.lock);
959     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
960         tlb_set_dirty1_locked(tlb_entry(cpu, mmu_idx, addr), addr);
961     }
962 
963     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
964         int k;
965         for (k = 0; k < CPU_VTLB_SIZE; k++) {
966             tlb_set_dirty1_locked(&cpu->neg.tlb.d[mmu_idx].vtable[k], addr);
967         }
968     }
969     qemu_spin_unlock(&cpu->neg.tlb.c.lock);
970 }
971 
972 /* Our TLB does not support large pages, so remember the area covered by
973    large pages and trigger a full TLB flush if these are invalidated.  */
974 static void tlb_add_large_page(CPUState *cpu, int mmu_idx,
975                                vaddr addr, uint64_t size)
976 {
977     vaddr lp_addr = cpu->neg.tlb.d[mmu_idx].large_page_addr;
978     vaddr lp_mask = ~(size - 1);
979 
980     if (lp_addr == (vaddr)-1) {
981         /* No previous large page.  */
982         lp_addr = addr;
983     } else {
984         /* Extend the existing region to include the new page.
985            This is a compromise between unnecessary flushes and
986            the cost of maintaining a full variable size TLB.  */
987         lp_mask &= cpu->neg.tlb.d[mmu_idx].large_page_mask;
988         while (((lp_addr ^ addr) & lp_mask) != 0) {
989             lp_mask <<= 1;
990         }
991     }
992     cpu->neg.tlb.d[mmu_idx].large_page_addr = lp_addr & lp_mask;
993     cpu->neg.tlb.d[mmu_idx].large_page_mask = lp_mask;
994 }
995 
996 static inline void tlb_set_compare(CPUTLBEntryFull *full, CPUTLBEntry *ent,
997                                    vaddr address, int flags,
998                                    MMUAccessType access_type, bool enable)
999 {
1000     if (enable) {
1001         address |= flags & TLB_FLAGS_MASK;
1002         flags &= TLB_SLOW_FLAGS_MASK;
1003         if (flags) {
1004             address |= TLB_FORCE_SLOW;
1005         }
1006     } else {
1007         address = -1;
1008         flags = 0;
1009     }
1010     ent->addr_idx[access_type] = address;
1011     full->slow_flags[access_type] = flags;
1012 }
1013 
1014 /*
1015  * Add a new TLB entry. At most one entry for a given virtual address
1016  * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
1017  * supplied size is only used by tlb_flush_page.
1018  *
1019  * Called from TCG-generated code, which is under an RCU read-side
1020  * critical section.
1021  */
1022 void tlb_set_page_full(CPUState *cpu, int mmu_idx,
1023                        vaddr addr, CPUTLBEntryFull *full)
1024 {
1025     CPUTLB *tlb = &cpu->neg.tlb;
1026     CPUTLBDesc *desc = &tlb->d[mmu_idx];
1027     MemoryRegionSection *section;
1028     unsigned int index, read_flags, write_flags;
1029     uintptr_t addend;
1030     CPUTLBEntry *te, tn;
1031     hwaddr iotlb, xlat, sz, paddr_page;
1032     vaddr addr_page;
1033     int asidx, wp_flags, prot;
1034     bool is_ram, is_romd;
1035 
1036     assert_cpu_is_self(cpu);
1037 
1038     if (full->lg_page_size <= TARGET_PAGE_BITS) {
1039         sz = TARGET_PAGE_SIZE;
1040     } else {
1041         sz = (hwaddr)1 << full->lg_page_size;
1042         tlb_add_large_page(cpu, mmu_idx, addr, sz);
1043     }
1044     addr_page = addr & TARGET_PAGE_MASK;
1045     paddr_page = full->phys_addr & TARGET_PAGE_MASK;
1046 
1047     prot = full->prot;
1048     asidx = cpu_asidx_from_attrs(cpu, full->attrs);
1049     section = address_space_translate_for_iotlb(cpu, asidx, paddr_page,
1050                                                 &xlat, &sz, full->attrs, &prot);
1051     assert(sz >= TARGET_PAGE_SIZE);
1052 
1053     tlb_debug("vaddr=%016" VADDR_PRIx " paddr=0x" HWADDR_FMT_plx
1054               " prot=%x idx=%d\n",
1055               addr, full->phys_addr, prot, mmu_idx);
1056 
1057     read_flags = full->tlb_fill_flags;
1058     if (full->lg_page_size < TARGET_PAGE_BITS) {
1059         /* Repeat the MMU check and TLB fill on every access.  */
1060         read_flags |= TLB_INVALID_MASK;
1061     }
1062 
1063     is_ram = memory_region_is_ram(section->mr);
1064     is_romd = memory_region_is_romd(section->mr);
1065 
1066     if (is_ram || is_romd) {
1067         /* RAM and ROMD both have associated host memory. */
1068         addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat;
1069     } else {
1070         /* I/O does not; force the host address to NULL. */
1071         addend = 0;
1072     }
1073 
1074     write_flags = read_flags;
1075     if (is_ram) {
1076         iotlb = memory_region_get_ram_addr(section->mr) + xlat;
1077         assert(!(iotlb & ~TARGET_PAGE_MASK));
1078         /*
1079          * Computing is_clean is expensive; avoid all that unless
1080          * the page is actually writable.
1081          */
1082         if (prot & PAGE_WRITE) {
1083             if (section->readonly) {
1084                 write_flags |= TLB_DISCARD_WRITE;
1085             } else if (cpu_physical_memory_is_clean(iotlb)) {
1086                 write_flags |= TLB_NOTDIRTY;
1087             }
1088         }
1089     } else {
1090         /* I/O or ROMD */
1091         iotlb = memory_region_section_get_iotlb(cpu, section) + xlat;
1092         /*
1093          * Writes to romd devices must go through MMIO to enable write.
1094          * Reads to romd devices go through the ram_ptr found above,
1095          * but of course reads to I/O must go through MMIO.
1096          */
1097         write_flags |= TLB_MMIO;
1098         if (!is_romd) {
1099             read_flags = write_flags;
1100         }
1101     }
1102 
1103     wp_flags = cpu_watchpoint_address_matches(cpu, addr_page,
1104                                               TARGET_PAGE_SIZE);
1105 
1106     index = tlb_index(cpu, mmu_idx, addr_page);
1107     te = tlb_entry(cpu, mmu_idx, addr_page);
1108 
1109     /*
1110      * Hold the TLB lock for the rest of the function. We could acquire/release
1111      * the lock several times in the function, but it is faster to amortize the
1112      * acquisition cost by acquiring it just once. Note that this leads to
1113      * a longer critical section, but this is not a concern since the TLB lock
1114      * is unlikely to be contended.
1115      */
1116     qemu_spin_lock(&tlb->c.lock);
1117 
1118     /* Note that the tlb is no longer clean.  */
1119     tlb->c.dirty |= 1 << mmu_idx;
1120 
1121     /* Make sure there's no cached translation for the new page.  */
1122     tlb_flush_vtlb_page_locked(cpu, mmu_idx, addr_page);
1123 
1124     /*
1125      * Only evict the old entry to the victim tlb if it's for a
1126      * different page; otherwise just overwrite the stale data.
1127      */
1128     if (!tlb_hit_page_anyprot(te, addr_page) && !tlb_entry_is_empty(te)) {
1129         unsigned vidx = desc->vindex++ % CPU_VTLB_SIZE;
1130         CPUTLBEntry *tv = &desc->vtable[vidx];
1131 
1132         /* Evict the old entry into the victim tlb.  */
1133         copy_tlb_helper_locked(tv, te);
1134         desc->vfulltlb[vidx] = desc->fulltlb[index];
1135         tlb_n_used_entries_dec(cpu, mmu_idx);
1136     }
1137 
1138     /* refill the tlb */
1139     /*
1140      * When memory region is ram, iotlb contains a TARGET_PAGE_BITS
1141      * aligned ram_addr_t of the page base of the target RAM.
1142      * Otherwise, iotlb contains
1143      *  - a physical section number in the lower TARGET_PAGE_BITS
1144      *  - the offset within section->mr of the page base (I/O, ROMD) with the
1145      *    TARGET_PAGE_BITS masked off.
1146      * We subtract addr_page (which is page aligned and thus won't
1147      * disturb the low bits) to give an offset which can be added to the
1148      * (non-page-aligned) vaddr of the eventual memory access to get
1149      * the MemoryRegion offset for the access. Note that the vaddr we
1150      * subtract here is that of the page base, and not the same as the
1151      * vaddr we add back in io_prepare()/get_page_addr_code().
1152      */
1153     desc->fulltlb[index] = *full;
1154     full = &desc->fulltlb[index];
1155     full->xlat_section = iotlb - addr_page;
1156     full->phys_addr = paddr_page;
1157 
1158     /* Now calculate the new entry */
1159     tn.addend = addend - addr_page;
1160 
1161     tlb_set_compare(full, &tn, addr_page, read_flags,
1162                     MMU_INST_FETCH, prot & PAGE_EXEC);
1163 
1164     if (wp_flags & BP_MEM_READ) {
1165         read_flags |= TLB_WATCHPOINT;
1166     }
1167     tlb_set_compare(full, &tn, addr_page, read_flags,
1168                     MMU_DATA_LOAD, prot & PAGE_READ);
1169 
1170     if (prot & PAGE_WRITE_INV) {
1171         write_flags |= TLB_INVALID_MASK;
1172     }
1173     if (wp_flags & BP_MEM_WRITE) {
1174         write_flags |= TLB_WATCHPOINT;
1175     }
1176     tlb_set_compare(full, &tn, addr_page, write_flags,
1177                     MMU_DATA_STORE, prot & PAGE_WRITE);
1178 
1179     copy_tlb_helper_locked(te, &tn);
1180     tlb_n_used_entries_inc(cpu, mmu_idx);
1181     qemu_spin_unlock(&tlb->c.lock);
1182 }
1183 
1184 void tlb_set_page_with_attrs(CPUState *cpu, vaddr addr,
1185                              hwaddr paddr, MemTxAttrs attrs, int prot,
1186                              int mmu_idx, vaddr size)
1187 {
1188     CPUTLBEntryFull full = {
1189         .phys_addr = paddr,
1190         .attrs = attrs,
1191         .prot = prot,
1192         .lg_page_size = ctz64(size)
1193     };
1194 
1195     assert(is_power_of_2(size));
1196     tlb_set_page_full(cpu, mmu_idx, addr, &full);
1197 }
1198 
1199 void tlb_set_page(CPUState *cpu, vaddr addr,
1200                   hwaddr paddr, int prot,
1201                   int mmu_idx, vaddr size)
1202 {
1203     tlb_set_page_with_attrs(cpu, addr, paddr, MEMTXATTRS_UNSPECIFIED,
1204                             prot, mmu_idx, size);
1205 }
1206 
1207 /**
1208  * tlb_hit_page: return true if page aligned @addr is a hit against the
1209  * TLB entry @tlb_addr
1210  *
1211  * @addr: virtual address to test (must be page aligned)
1212  * @tlb_addr: TLB entry address (a CPUTLBEntry addr_read/write/code value)
1213  */
1214 static inline bool tlb_hit_page(uint64_t tlb_addr, vaddr addr)
1215 {
1216     return addr == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK));
1217 }
1218 
1219 /**
1220  * tlb_hit: return true if @addr is a hit against the TLB entry @tlb_addr
1221  *
1222  * @addr: virtual address to test (need not be page aligned)
1223  * @tlb_addr: TLB entry address (a CPUTLBEntry addr_read/write/code value)
1224  */
1225 static inline bool tlb_hit(uint64_t tlb_addr, vaddr addr)
1226 {
1227     return tlb_hit_page(tlb_addr, addr & TARGET_PAGE_MASK);
1228 }
1229 
1230 /*
1231  * Note: tlb_fill_align() can trigger a resize of the TLB.
1232  * This means that all of the caller's prior references to the TLB table
1233  * (e.g. CPUTLBEntry pointers) must be discarded and looked up again
1234  * (e.g. via tlb_entry()).
1235  */
1236 static bool tlb_fill_align(CPUState *cpu, vaddr addr, MMUAccessType type,
1237                            int mmu_idx, MemOp memop, int size,
1238                            bool probe, uintptr_t ra)
1239 {
1240     const TCGCPUOps *ops = cpu->cc->tcg_ops;
1241     CPUTLBEntryFull full;
1242 
1243     if (ops->tlb_fill_align) {
1244         if (ops->tlb_fill_align(cpu, &full, addr, type, mmu_idx,
1245                                 memop, size, probe, ra)) {
1246             tlb_set_page_full(cpu, mmu_idx, addr, &full);
1247             return true;
1248         }
1249     } else {
1250         /* Legacy behaviour is alignment before paging. */
1251         if (addr & ((1u << memop_alignment_bits(memop)) - 1)) {
1252             ops->do_unaligned_access(cpu, addr, type, mmu_idx, ra);
1253         }
1254         if (ops->tlb_fill(cpu, addr, size, type, mmu_idx, probe, ra)) {
1255             return true;
1256         }
1257     }
1258     assert(probe);
1259     return false;
1260 }
1261 
1262 static inline void cpu_unaligned_access(CPUState *cpu, vaddr addr,
1263                                         MMUAccessType access_type,
1264                                         int mmu_idx, uintptr_t retaddr)
1265 {
1266     cpu->cc->tcg_ops->do_unaligned_access(cpu, addr, access_type,
1267                                           mmu_idx, retaddr);
1268 }
1269 
1270 static MemoryRegionSection *
1271 io_prepare(hwaddr *out_offset, CPUState *cpu, hwaddr xlat,
1272            MemTxAttrs attrs, vaddr addr, uintptr_t retaddr)
1273 {
1274     MemoryRegionSection *section;
1275     hwaddr mr_offset;
1276 
1277     section = iotlb_to_section(cpu, xlat, attrs);
1278     mr_offset = (xlat & TARGET_PAGE_MASK) + addr;
1279     cpu->mem_io_pc = retaddr;
1280     if (!cpu->neg.can_do_io) {
1281         cpu_io_recompile(cpu, retaddr);
1282     }
1283 
1284     *out_offset = mr_offset;
1285     return section;
1286 }
1287 
1288 static void io_failed(CPUState *cpu, CPUTLBEntryFull *full, vaddr addr,
1289                       unsigned size, MMUAccessType access_type, int mmu_idx,
1290                       MemTxResult response, uintptr_t retaddr)
1291 {
1292     if (!cpu->ignore_memory_transaction_failures
1293         && cpu->cc->tcg_ops->do_transaction_failed) {
1294         hwaddr physaddr = full->phys_addr | (addr & ~TARGET_PAGE_MASK);
1295 
1296         cpu->cc->tcg_ops->do_transaction_failed(cpu, physaddr, addr, size,
1297                                                 access_type, mmu_idx,
1298                                                 full->attrs, response, retaddr);
1299     }
1300 }
1301 
1302 /* Return true if ADDR is present in the victim tlb, and has been copied
1303    back to the main tlb.  */
1304 static bool victim_tlb_hit(CPUState *cpu, size_t mmu_idx, size_t index,
1305                            MMUAccessType access_type, vaddr page)
1306 {
1307     size_t vidx;
1308 
1309     assert_cpu_is_self(cpu);
1310     for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
1311         CPUTLBEntry *vtlb = &cpu->neg.tlb.d[mmu_idx].vtable[vidx];
1312         uint64_t cmp = tlb_read_idx(vtlb, access_type);
1313 
1314         if (cmp == page) {
1315             /* Found entry in victim tlb, swap tlb and iotlb.  */
1316             CPUTLBEntry tmptlb, *tlb = &cpu->neg.tlb.f[mmu_idx].table[index];
1317 
1318             qemu_spin_lock(&cpu->neg.tlb.c.lock);
1319             copy_tlb_helper_locked(&tmptlb, tlb);
1320             copy_tlb_helper_locked(tlb, vtlb);
1321             copy_tlb_helper_locked(vtlb, &tmptlb);
1322             qemu_spin_unlock(&cpu->neg.tlb.c.lock);
1323 
1324             CPUTLBEntryFull *f1 = &cpu->neg.tlb.d[mmu_idx].fulltlb[index];
1325             CPUTLBEntryFull *f2 = &cpu->neg.tlb.d[mmu_idx].vfulltlb[vidx];
1326             CPUTLBEntryFull tmpf;
1327             tmpf = *f1; *f1 = *f2; *f2 = tmpf;
1328             return true;
1329         }
1330     }
1331     return false;
1332 }
1333 
1334 static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
1335                            CPUTLBEntryFull *full, uintptr_t retaddr)
1336 {
1337     ram_addr_t ram_addr = mem_vaddr + full->xlat_section;
1338 
1339     trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size);
1340 
1341     if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1342         tb_invalidate_phys_range_fast(ram_addr, size, retaddr);
1343     }
1344 
1345     /*
1346      * Set both VGA and migration bits for simplicity and to remove
1347      * the notdirty callback faster.
1348      */
1349     cpu_physical_memory_set_dirty_range(ram_addr, size, DIRTY_CLIENTS_NOCODE);
1350 
1351     /* We remove the notdirty callback only if the code has been flushed. */
1352     if (!cpu_physical_memory_is_clean(ram_addr)) {
1353         trace_memory_notdirty_set_dirty(mem_vaddr);
1354         tlb_set_dirty(cpu, mem_vaddr);
1355     }
1356 }
1357 
1358 static int probe_access_internal(CPUState *cpu, vaddr addr,
1359                                  int fault_size, MMUAccessType access_type,
1360                                  int mmu_idx, bool nonfault,
1361                                  void **phost, CPUTLBEntryFull **pfull,
1362                                  uintptr_t retaddr, bool check_mem_cbs)
1363 {
1364     uintptr_t index = tlb_index(cpu, mmu_idx, addr);
1365     CPUTLBEntry *entry = tlb_entry(cpu, mmu_idx, addr);
1366     uint64_t tlb_addr = tlb_read_idx(entry, access_type);
1367     vaddr page_addr = addr & TARGET_PAGE_MASK;
1368     int flags = TLB_FLAGS_MASK & ~TLB_FORCE_SLOW;
1369     bool force_mmio = check_mem_cbs && cpu_plugin_mem_cbs_enabled(cpu);
1370     CPUTLBEntryFull *full;
1371 
1372     if (!tlb_hit_page(tlb_addr, page_addr)) {
1373         if (!victim_tlb_hit(cpu, mmu_idx, index, access_type, page_addr)) {
1374             if (!tlb_fill_align(cpu, addr, access_type, mmu_idx,
1375                                 0, fault_size, nonfault, retaddr)) {
1376                 /* Non-faulting page table read failed.  */
1377                 *phost = NULL;
1378                 *pfull = NULL;
1379                 return TLB_INVALID_MASK;
1380             }
1381 
1382             /* TLB resize via tlb_fill_align may have moved the entry.  */
1383             index = tlb_index(cpu, mmu_idx, addr);
1384             entry = tlb_entry(cpu, mmu_idx, addr);
1385 
1386             /*
1387              * With PAGE_WRITE_INV, we set TLB_INVALID_MASK immediately,
1388              * to force the next access through tlb_fill_align.  We've just
1389              * called tlb_fill_align, so we know that this entry *is* valid.
1390              */
1391             flags &= ~TLB_INVALID_MASK;
1392         }
1393         tlb_addr = tlb_read_idx(entry, access_type);
1394     }
1395     flags &= tlb_addr;
1396 
1397     *pfull = full = &cpu->neg.tlb.d[mmu_idx].fulltlb[index];
1398     flags |= full->slow_flags[access_type];
1399 
1400     /* Fold all "mmio-like" bits into TLB_MMIO.  This is not RAM.  */
1401     if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY | TLB_CHECK_ALIGNED))
1402         || (access_type != MMU_INST_FETCH && force_mmio)) {
1403         *phost = NULL;
1404         return TLB_MMIO;
1405     }
1406 
1407     /* Everything else is RAM. */
1408     *phost = (void *)((uintptr_t)addr + entry->addend);
1409     return flags;
1410 }
1411 
1412 int probe_access_full(CPUArchState *env, vaddr addr, int size,
1413                       MMUAccessType access_type, int mmu_idx,
1414                       bool nonfault, void **phost, CPUTLBEntryFull **pfull,
1415                       uintptr_t retaddr)
1416 {
1417     int flags = probe_access_internal(env_cpu(env), addr, size, access_type,
1418                                       mmu_idx, nonfault, phost, pfull, retaddr,
1419                                       true);
1420 
1421     /* Handle clean RAM pages.  */
1422     if (unlikely(flags & TLB_NOTDIRTY)) {
1423         int dirtysize = size == 0 ? 1 : size;
1424         notdirty_write(env_cpu(env), addr, dirtysize, *pfull, retaddr);
1425         flags &= ~TLB_NOTDIRTY;
1426     }
1427 
1428     return flags;
1429 }
1430 
1431 int probe_access_full_mmu(CPUArchState *env, vaddr addr, int size,
1432                           MMUAccessType access_type, int mmu_idx,
1433                           void **phost, CPUTLBEntryFull **pfull)
1434 {
1435     void *discard_phost;
1436     CPUTLBEntryFull *discard_tlb;
1437 
1438     /* privately handle users that don't need full results */
1439     phost = phost ? phost : &discard_phost;
1440     pfull = pfull ? pfull : &discard_tlb;
1441 
1442     int flags = probe_access_internal(env_cpu(env), addr, size, access_type,
1443                                       mmu_idx, true, phost, pfull, 0, false);
1444 
1445     /* Handle clean RAM pages.  */
1446     if (unlikely(flags & TLB_NOTDIRTY)) {
1447         int dirtysize = size == 0 ? 1 : size;
1448         notdirty_write(env_cpu(env), addr, dirtysize, *pfull, 0);
1449         flags &= ~TLB_NOTDIRTY;
1450     }
1451 
1452     return flags;
1453 }
1454 
1455 int probe_access_flags(CPUArchState *env, vaddr addr, int size,
1456                        MMUAccessType access_type, int mmu_idx,
1457                        bool nonfault, void **phost, uintptr_t retaddr)
1458 {
1459     CPUTLBEntryFull *full;
1460     int flags;
1461 
1462     g_assert(-(addr | TARGET_PAGE_MASK) >= size);
1463 
1464     flags = probe_access_internal(env_cpu(env), addr, size, access_type,
1465                                   mmu_idx, nonfault, phost, &full, retaddr,
1466                                   true);
1467 
1468     /* Handle clean RAM pages. */
1469     if (unlikely(flags & TLB_NOTDIRTY)) {
1470         int dirtysize = size == 0 ? 1 : size;
1471         notdirty_write(env_cpu(env), addr, dirtysize, full, retaddr);
1472         flags &= ~TLB_NOTDIRTY;
1473     }
1474 
1475     return flags;
1476 }
1477 
1478 void *probe_access(CPUArchState *env, vaddr addr, int size,
1479                    MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
1480 {
1481     CPUTLBEntryFull *full;
1482     void *host;
1483     int flags;
1484 
1485     g_assert(-(addr | TARGET_PAGE_MASK) >= size);
1486 
1487     flags = probe_access_internal(env_cpu(env), addr, size, access_type,
1488                                   mmu_idx, false, &host, &full, retaddr,
1489                                   true);
1490 
1491     /* Per the interface, size == 0 merely faults the access. */
1492     if (size == 0) {
1493         return NULL;
1494     }
1495 
1496     if (unlikely(flags & (TLB_NOTDIRTY | TLB_WATCHPOINT))) {
1497         /* Handle watchpoints.  */
1498         if (flags & TLB_WATCHPOINT) {
1499             int wp_access = (access_type == MMU_DATA_STORE
1500                              ? BP_MEM_WRITE : BP_MEM_READ);
1501             cpu_check_watchpoint(env_cpu(env), addr, size,
1502                                  full->attrs, wp_access, retaddr);
1503         }
1504 
1505         /* Handle clean RAM pages.  */
1506         if (flags & TLB_NOTDIRTY) {
1507             notdirty_write(env_cpu(env), addr, size, full, retaddr);
1508         }
1509     }
1510 
1511     return host;
1512 }
1513 
1514 void *tlb_vaddr_to_host(CPUArchState *env, vaddr addr,
1515                         MMUAccessType access_type, int mmu_idx)
1516 {
1517     CPUTLBEntryFull *full;
1518     void *host;
1519     int flags;
1520 
1521     flags = probe_access_internal(env_cpu(env), addr, 0, access_type,
1522                                   mmu_idx, true, &host, &full, 0, false);
1523 
1524     /* No combination of flags are expected by the caller. */
1525     return flags ? NULL : host;
1526 }
1527 
1528 /*
1529  * Return a ram_addr_t for the virtual address for execution.
1530  *
1531  * Return -1 if we can't translate and execute from an entire page
1532  * of RAM.  This will force us to execute by loading and translating
1533  * one insn at a time, without caching.
1534  *
1535  * NOTE: This function will trigger an exception if the page is
1536  * not executable.
1537  */
1538 tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, vaddr addr,
1539                                         void **hostp)
1540 {
1541     CPUTLBEntryFull *full;
1542     void *p;
1543 
1544     (void)probe_access_internal(env_cpu(env), addr, 1, MMU_INST_FETCH,
1545                                 cpu_mmu_index(env_cpu(env), true), false,
1546                                 &p, &full, 0, false);
1547     if (p == NULL) {
1548         return -1;
1549     }
1550 
1551     if (full->lg_page_size < TARGET_PAGE_BITS) {
1552         return -1;
1553     }
1554 
1555     if (hostp) {
1556         *hostp = p;
1557     }
1558     return qemu_ram_addr_from_host_nofail(p);
1559 }
1560 
1561 /* Load/store with atomicity primitives. */
1562 #include "ldst_atomicity.c.inc"
1563 
1564 #ifdef CONFIG_PLUGIN
1565 /*
1566  * Perform a TLB lookup and populate the qemu_plugin_hwaddr structure.
1567  * This should be a hot path as we will have just looked this path up
1568  * in the softmmu lookup code (or helper). We don't handle re-fills or
1569  * checking the victim table. This is purely informational.
1570  *
1571  * The one corner case is i/o write, which can cause changes to the
1572  * address space.  Those changes, and the corresponding tlb flush,
1573  * should be delayed until the next TB, so even then this ought not fail.
1574  * But check, Just in Case.
1575  */
1576 bool tlb_plugin_lookup(CPUState *cpu, vaddr addr, int mmu_idx,
1577                        bool is_store, struct qemu_plugin_hwaddr *data)
1578 {
1579     CPUTLBEntry *tlbe = tlb_entry(cpu, mmu_idx, addr);
1580     uintptr_t index = tlb_index(cpu, mmu_idx, addr);
1581     MMUAccessType access_type = is_store ? MMU_DATA_STORE : MMU_DATA_LOAD;
1582     uint64_t tlb_addr = tlb_read_idx(tlbe, access_type);
1583     CPUTLBEntryFull *full;
1584 
1585     if (unlikely(!tlb_hit(tlb_addr, addr))) {
1586         return false;
1587     }
1588 
1589     full = &cpu->neg.tlb.d[mmu_idx].fulltlb[index];
1590     data->phys_addr = full->phys_addr | (addr & ~TARGET_PAGE_MASK);
1591 
1592     /* We must have an iotlb entry for MMIO */
1593     if (tlb_addr & TLB_MMIO) {
1594         MemoryRegionSection *section =
1595             iotlb_to_section(cpu, full->xlat_section & ~TARGET_PAGE_MASK,
1596                              full->attrs);
1597         data->is_io = true;
1598         data->mr = section->mr;
1599     } else {
1600         data->is_io = false;
1601         data->mr = NULL;
1602     }
1603     return true;
1604 }
1605 #endif
1606 
1607 /*
1608  * Probe for a load/store operation.
1609  * Return the host address and into @flags.
1610  */
1611 
1612 typedef struct MMULookupPageData {
1613     CPUTLBEntryFull *full;
1614     void *haddr;
1615     vaddr addr;
1616     int flags;
1617     int size;
1618 } MMULookupPageData;
1619 
1620 typedef struct MMULookupLocals {
1621     MMULookupPageData page[2];
1622     MemOp memop;
1623     int mmu_idx;
1624 } MMULookupLocals;
1625 
1626 /**
1627  * mmu_lookup1: translate one page
1628  * @cpu: generic cpu state
1629  * @data: lookup parameters
1630  * @memop: memory operation for the access, or 0
1631  * @mmu_idx: virtual address context
1632  * @access_type: load/store/code
1633  * @ra: return address into tcg generated code, or 0
1634  *
1635  * Resolve the translation for the one page at @data.addr, filling in
1636  * the rest of @data with the results.  If the translation fails,
1637  * tlb_fill_align will longjmp out.  Return true if the softmmu tlb for
1638  * @mmu_idx may have resized.
1639  */
1640 static bool mmu_lookup1(CPUState *cpu, MMULookupPageData *data, MemOp memop,
1641                         int mmu_idx, MMUAccessType access_type, uintptr_t ra)
1642 {
1643     vaddr addr = data->addr;
1644     uintptr_t index = tlb_index(cpu, mmu_idx, addr);
1645     CPUTLBEntry *entry = tlb_entry(cpu, mmu_idx, addr);
1646     uint64_t tlb_addr = tlb_read_idx(entry, access_type);
1647     bool maybe_resized = false;
1648     CPUTLBEntryFull *full;
1649     int flags;
1650 
1651     /* If the TLB entry is for a different page, reload and try again.  */
1652     if (!tlb_hit(tlb_addr, addr)) {
1653         if (!victim_tlb_hit(cpu, mmu_idx, index, access_type,
1654                             addr & TARGET_PAGE_MASK)) {
1655             tlb_fill_align(cpu, addr, access_type, mmu_idx,
1656                            memop, data->size, false, ra);
1657             maybe_resized = true;
1658             index = tlb_index(cpu, mmu_idx, addr);
1659             entry = tlb_entry(cpu, mmu_idx, addr);
1660         }
1661         tlb_addr = tlb_read_idx(entry, access_type) & ~TLB_INVALID_MASK;
1662     }
1663 
1664     full = &cpu->neg.tlb.d[mmu_idx].fulltlb[index];
1665     flags = tlb_addr & (TLB_FLAGS_MASK & ~TLB_FORCE_SLOW);
1666     flags |= full->slow_flags[access_type];
1667 
1668     if (likely(!maybe_resized)) {
1669         /* Alignment has not been checked by tlb_fill_align. */
1670         int a_bits = memop_alignment_bits(memop);
1671 
1672         /*
1673          * This alignment check differs from the one above, in that this is
1674          * based on the atomicity of the operation. The intended use case is
1675          * the ARM memory type field of each PTE, where access to pages with
1676          * Device memory type require alignment.
1677          */
1678         if (unlikely(flags & TLB_CHECK_ALIGNED)) {
1679             int at_bits = memop_atomicity_bits(memop);
1680             a_bits = MAX(a_bits, at_bits);
1681         }
1682         if (unlikely(addr & ((1 << a_bits) - 1))) {
1683             cpu_unaligned_access(cpu, addr, access_type, mmu_idx, ra);
1684         }
1685     }
1686 
1687     data->full = full;
1688     data->flags = flags;
1689     /* Compute haddr speculatively; depending on flags it might be invalid. */
1690     data->haddr = (void *)((uintptr_t)addr + entry->addend);
1691 
1692     return maybe_resized;
1693 }
1694 
1695 /**
1696  * mmu_watch_or_dirty
1697  * @cpu: generic cpu state
1698  * @data: lookup parameters
1699  * @access_type: load/store/code
1700  * @ra: return address into tcg generated code, or 0
1701  *
1702  * Trigger watchpoints for @data.addr:@data.size;
1703  * record writes to protected clean pages.
1704  */
1705 static void mmu_watch_or_dirty(CPUState *cpu, MMULookupPageData *data,
1706                                MMUAccessType access_type, uintptr_t ra)
1707 {
1708     CPUTLBEntryFull *full = data->full;
1709     vaddr addr = data->addr;
1710     int flags = data->flags;
1711     int size = data->size;
1712 
1713     /* On watchpoint hit, this will longjmp out.  */
1714     if (flags & TLB_WATCHPOINT) {
1715         int wp = access_type == MMU_DATA_STORE ? BP_MEM_WRITE : BP_MEM_READ;
1716         cpu_check_watchpoint(cpu, addr, size, full->attrs, wp, ra);
1717         flags &= ~TLB_WATCHPOINT;
1718     }
1719 
1720     /* Note that notdirty is only set for writes. */
1721     if (flags & TLB_NOTDIRTY) {
1722         notdirty_write(cpu, addr, size, full, ra);
1723         flags &= ~TLB_NOTDIRTY;
1724     }
1725     data->flags = flags;
1726 }
1727 
1728 /**
1729  * mmu_lookup: translate page(s)
1730  * @cpu: generic cpu state
1731  * @addr: virtual address
1732  * @oi: combined mmu_idx and MemOp
1733  * @ra: return address into tcg generated code, or 0
1734  * @access_type: load/store/code
1735  * @l: output result
1736  *
1737  * Resolve the translation for the page(s) beginning at @addr, for MemOp.size
1738  * bytes.  Return true if the lookup crosses a page boundary.
1739  */
1740 static bool mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi,
1741                        uintptr_t ra, MMUAccessType type, MMULookupLocals *l)
1742 {
1743     bool crosspage;
1744     int flags;
1745 
1746     l->memop = get_memop(oi);
1747     l->mmu_idx = get_mmuidx(oi);
1748 
1749     tcg_debug_assert(l->mmu_idx < NB_MMU_MODES);
1750 
1751     l->page[0].addr = addr;
1752     l->page[0].size = memop_size(l->memop);
1753     l->page[1].addr = (addr + l->page[0].size - 1) & TARGET_PAGE_MASK;
1754     l->page[1].size = 0;
1755     crosspage = (addr ^ l->page[1].addr) & TARGET_PAGE_MASK;
1756 
1757     if (likely(!crosspage)) {
1758         mmu_lookup1(cpu, &l->page[0], l->memop, l->mmu_idx, type, ra);
1759 
1760         flags = l->page[0].flags;
1761         if (unlikely(flags & (TLB_WATCHPOINT | TLB_NOTDIRTY))) {
1762             mmu_watch_or_dirty(cpu, &l->page[0], type, ra);
1763         }
1764         if (unlikely(flags & TLB_BSWAP)) {
1765             l->memop ^= MO_BSWAP;
1766         }
1767     } else {
1768         /* Finish compute of page crossing. */
1769         int size0 = l->page[1].addr - addr;
1770         l->page[1].size = l->page[0].size - size0;
1771         l->page[0].size = size0;
1772 
1773         /*
1774          * Lookup both pages, recognizing exceptions from either.  If the
1775          * second lookup potentially resized, refresh first CPUTLBEntryFull.
1776          */
1777         mmu_lookup1(cpu, &l->page[0], l->memop, l->mmu_idx, type, ra);
1778         if (mmu_lookup1(cpu, &l->page[1], 0, l->mmu_idx, type, ra)) {
1779             uintptr_t index = tlb_index(cpu, l->mmu_idx, addr);
1780             l->page[0].full = &cpu->neg.tlb.d[l->mmu_idx].fulltlb[index];
1781         }
1782 
1783         flags = l->page[0].flags | l->page[1].flags;
1784         if (unlikely(flags & (TLB_WATCHPOINT | TLB_NOTDIRTY))) {
1785             mmu_watch_or_dirty(cpu, &l->page[0], type, ra);
1786             mmu_watch_or_dirty(cpu, &l->page[1], type, ra);
1787         }
1788 
1789         /*
1790          * Since target/sparc is the only user of TLB_BSWAP, and all
1791          * Sparc accesses are aligned, any treatment across two pages
1792          * would be arbitrary.  Refuse it until there's a use.
1793          */
1794         tcg_debug_assert((flags & TLB_BSWAP) == 0);
1795     }
1796 
1797     return crosspage;
1798 }
1799 
1800 /*
1801  * Probe for an atomic operation.  Do not allow unaligned operations,
1802  * or io operations to proceed.  Return the host address.
1803  */
1804 static void *atomic_mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi,
1805                                int size, uintptr_t retaddr)
1806 {
1807     uintptr_t mmu_idx = get_mmuidx(oi);
1808     MemOp mop = get_memop(oi);
1809     uintptr_t index;
1810     CPUTLBEntry *tlbe;
1811     vaddr tlb_addr;
1812     void *hostaddr;
1813     CPUTLBEntryFull *full;
1814     bool did_tlb_fill = false;
1815 
1816     tcg_debug_assert(mmu_idx < NB_MMU_MODES);
1817 
1818     /* Adjust the given return address.  */
1819     retaddr -= GETPC_ADJ;
1820 
1821     index = tlb_index(cpu, mmu_idx, addr);
1822     tlbe = tlb_entry(cpu, mmu_idx, addr);
1823 
1824     /* Check TLB entry and enforce page permissions.  */
1825     tlb_addr = tlb_addr_write(tlbe);
1826     if (!tlb_hit(tlb_addr, addr)) {
1827         if (!victim_tlb_hit(cpu, mmu_idx, index, MMU_DATA_STORE,
1828                             addr & TARGET_PAGE_MASK)) {
1829             tlb_fill_align(cpu, addr, MMU_DATA_STORE, mmu_idx,
1830                            mop, size, false, retaddr);
1831             did_tlb_fill = true;
1832             index = tlb_index(cpu, mmu_idx, addr);
1833             tlbe = tlb_entry(cpu, mmu_idx, addr);
1834         }
1835         tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK;
1836     }
1837 
1838     /*
1839      * Let the guest notice RMW on a write-only page.
1840      * We have just verified that the page is writable.
1841      * Subpage lookups may have left TLB_INVALID_MASK set,
1842      * but addr_read will only be -1 if PAGE_READ was unset.
1843      */
1844     if (unlikely(tlbe->addr_read == -1)) {
1845         tlb_fill_align(cpu, addr, MMU_DATA_LOAD, mmu_idx,
1846                        0, size, false, retaddr);
1847         /*
1848          * Since we don't support reads and writes to different
1849          * addresses, and we do have the proper page loaded for
1850          * write, this shouldn't ever return.
1851          */
1852         g_assert_not_reached();
1853     }
1854 
1855     /* Enforce guest required alignment, if not handled by tlb_fill_align. */
1856     if (!did_tlb_fill && (addr & ((1 << memop_alignment_bits(mop)) - 1))) {
1857         cpu_unaligned_access(cpu, addr, MMU_DATA_STORE, mmu_idx, retaddr);
1858     }
1859 
1860     /* Enforce qemu required alignment.  */
1861     if (unlikely(addr & (size - 1))) {
1862         /*
1863          * We get here if guest alignment was not requested, or was not
1864          * enforced by cpu_unaligned_access or tlb_fill_align above.
1865          * We might widen the access and emulate, but for now
1866          * mark an exception and exit the cpu loop.
1867          */
1868         goto stop_the_world;
1869     }
1870 
1871     /* Collect tlb flags for read. */
1872     tlb_addr |= tlbe->addr_read;
1873 
1874     /* Notice an IO access or a needs-MMU-lookup access */
1875     if (unlikely(tlb_addr & (TLB_MMIO | TLB_DISCARD_WRITE))) {
1876         /* There's really nothing that can be done to
1877            support this apart from stop-the-world.  */
1878         goto stop_the_world;
1879     }
1880 
1881     hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
1882     full = &cpu->neg.tlb.d[mmu_idx].fulltlb[index];
1883 
1884     if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
1885         notdirty_write(cpu, addr, size, full, retaddr);
1886     }
1887 
1888     if (unlikely(tlb_addr & TLB_FORCE_SLOW)) {
1889         int wp_flags = 0;
1890 
1891         if (full->slow_flags[MMU_DATA_STORE] & TLB_WATCHPOINT) {
1892             wp_flags |= BP_MEM_WRITE;
1893         }
1894         if (full->slow_flags[MMU_DATA_LOAD] & TLB_WATCHPOINT) {
1895             wp_flags |= BP_MEM_READ;
1896         }
1897         if (wp_flags) {
1898             cpu_check_watchpoint(cpu, addr, size,
1899                                  full->attrs, wp_flags, retaddr);
1900         }
1901     }
1902 
1903     return hostaddr;
1904 
1905  stop_the_world:
1906     cpu_loop_exit_atomic(cpu, retaddr);
1907 }
1908 
1909 /*
1910  * Load Helpers
1911  *
1912  * We support two different access types. SOFTMMU_CODE_ACCESS is
1913  * specifically for reading instructions from system memory. It is
1914  * called by the translation loop and in some helpers where the code
1915  * is disassembled. It shouldn't be called directly by guest code.
1916  *
1917  * For the benefit of TCG generated code, we want to avoid the
1918  * complication of ABI-specific return type promotion and always
1919  * return a value extended to the register size of the host. This is
1920  * tcg_target_long, except in the case of a 32-bit host and 64-bit
1921  * data, and for that we always have uint64_t.
1922  *
1923  * We don't bother with this widened value for SOFTMMU_CODE_ACCESS.
1924  */
1925 
1926 /**
1927  * do_ld_mmio_beN:
1928  * @cpu: generic cpu state
1929  * @full: page parameters
1930  * @ret_be: accumulated data
1931  * @addr: virtual address
1932  * @size: number of bytes
1933  * @mmu_idx: virtual address context
1934  * @ra: return address into tcg generated code, or 0
1935  * Context: BQL held
1936  *
1937  * Load @size bytes from @addr, which is memory-mapped i/o.
1938  * The bytes are concatenated in big-endian order with @ret_be.
1939  */
1940 static uint64_t int_ld_mmio_beN(CPUState *cpu, CPUTLBEntryFull *full,
1941                                 uint64_t ret_be, vaddr addr, int size,
1942                                 int mmu_idx, MMUAccessType type, uintptr_t ra,
1943                                 MemoryRegion *mr, hwaddr mr_offset)
1944 {
1945     do {
1946         MemOp this_mop;
1947         unsigned this_size;
1948         uint64_t val;
1949         MemTxResult r;
1950 
1951         /* Read aligned pieces up to 8 bytes. */
1952         this_mop = ctz32(size | (int)addr | 8);
1953         this_size = 1 << this_mop;
1954         this_mop |= MO_BE;
1955 
1956         r = memory_region_dispatch_read(mr, mr_offset, &val,
1957                                         this_mop, full->attrs);
1958         if (unlikely(r != MEMTX_OK)) {
1959             io_failed(cpu, full, addr, this_size, type, mmu_idx, r, ra);
1960         }
1961         if (this_size == 8) {
1962             return val;
1963         }
1964 
1965         ret_be = (ret_be << (this_size * 8)) | val;
1966         addr += this_size;
1967         mr_offset += this_size;
1968         size -= this_size;
1969     } while (size);
1970 
1971     return ret_be;
1972 }
1973 
1974 static uint64_t do_ld_mmio_beN(CPUState *cpu, CPUTLBEntryFull *full,
1975                                uint64_t ret_be, vaddr addr, int size,
1976                                int mmu_idx, MMUAccessType type, uintptr_t ra)
1977 {
1978     MemoryRegionSection *section;
1979     MemoryRegion *mr;
1980     hwaddr mr_offset;
1981     MemTxAttrs attrs;
1982 
1983     tcg_debug_assert(size > 0 && size <= 8);
1984 
1985     attrs = full->attrs;
1986     section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra);
1987     mr = section->mr;
1988 
1989     BQL_LOCK_GUARD();
1990     return int_ld_mmio_beN(cpu, full, ret_be, addr, size, mmu_idx,
1991                            type, ra, mr, mr_offset);
1992 }
1993 
1994 static Int128 do_ld16_mmio_beN(CPUState *cpu, CPUTLBEntryFull *full,
1995                                uint64_t ret_be, vaddr addr, int size,
1996                                int mmu_idx, uintptr_t ra)
1997 {
1998     MemoryRegionSection *section;
1999     MemoryRegion *mr;
2000     hwaddr mr_offset;
2001     MemTxAttrs attrs;
2002     uint64_t a, b;
2003 
2004     tcg_debug_assert(size > 8 && size <= 16);
2005 
2006     attrs = full->attrs;
2007     section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra);
2008     mr = section->mr;
2009 
2010     BQL_LOCK_GUARD();
2011     a = int_ld_mmio_beN(cpu, full, ret_be, addr, size - 8, mmu_idx,
2012                         MMU_DATA_LOAD, ra, mr, mr_offset);
2013     b = int_ld_mmio_beN(cpu, full, ret_be, addr + size - 8, 8, mmu_idx,
2014                         MMU_DATA_LOAD, ra, mr, mr_offset + size - 8);
2015     return int128_make128(b, a);
2016 }
2017 
2018 /**
2019  * do_ld_bytes_beN
2020  * @p: translation parameters
2021  * @ret_be: accumulated data
2022  *
2023  * Load @p->size bytes from @p->haddr, which is RAM.
2024  * The bytes to concatenated in big-endian order with @ret_be.
2025  */
2026 static uint64_t do_ld_bytes_beN(MMULookupPageData *p, uint64_t ret_be)
2027 {
2028     uint8_t *haddr = p->haddr;
2029     int i, size = p->size;
2030 
2031     for (i = 0; i < size; i++) {
2032         ret_be = (ret_be << 8) | haddr[i];
2033     }
2034     return ret_be;
2035 }
2036 
2037 /**
2038  * do_ld_parts_beN
2039  * @p: translation parameters
2040  * @ret_be: accumulated data
2041  *
2042  * As do_ld_bytes_beN, but atomically on each aligned part.
2043  */
2044 static uint64_t do_ld_parts_beN(MMULookupPageData *p, uint64_t ret_be)
2045 {
2046     void *haddr = p->haddr;
2047     int size = p->size;
2048 
2049     do {
2050         uint64_t x;
2051         int n;
2052 
2053         /*
2054          * Find minimum of alignment and size.
2055          * This is slightly stronger than required by MO_ATOM_SUBALIGN, which
2056          * would have only checked the low bits of addr|size once at the start,
2057          * but is just as easy.
2058          */
2059         switch (((uintptr_t)haddr | size) & 7) {
2060         case 4:
2061             x = cpu_to_be32(load_atomic4(haddr));
2062             ret_be = (ret_be << 32) | x;
2063             n = 4;
2064             break;
2065         case 2:
2066         case 6:
2067             x = cpu_to_be16(load_atomic2(haddr));
2068             ret_be = (ret_be << 16) | x;
2069             n = 2;
2070             break;
2071         default:
2072             x = *(uint8_t *)haddr;
2073             ret_be = (ret_be << 8) | x;
2074             n = 1;
2075             break;
2076         case 0:
2077             g_assert_not_reached();
2078         }
2079         haddr += n;
2080         size -= n;
2081     } while (size != 0);
2082     return ret_be;
2083 }
2084 
2085 /**
2086  * do_ld_parts_be4
2087  * @p: translation parameters
2088  * @ret_be: accumulated data
2089  *
2090  * As do_ld_bytes_beN, but with one atomic load.
2091  * Four aligned bytes are guaranteed to cover the load.
2092  */
2093 static uint64_t do_ld_whole_be4(MMULookupPageData *p, uint64_t ret_be)
2094 {
2095     int o = p->addr & 3;
2096     uint32_t x = load_atomic4(p->haddr - o);
2097 
2098     x = cpu_to_be32(x);
2099     x <<= o * 8;
2100     x >>= (4 - p->size) * 8;
2101     return (ret_be << (p->size * 8)) | x;
2102 }
2103 
2104 /**
2105  * do_ld_parts_be8
2106  * @p: translation parameters
2107  * @ret_be: accumulated data
2108  *
2109  * As do_ld_bytes_beN, but with one atomic load.
2110  * Eight aligned bytes are guaranteed to cover the load.
2111  */
2112 static uint64_t do_ld_whole_be8(CPUState *cpu, uintptr_t ra,
2113                                 MMULookupPageData *p, uint64_t ret_be)
2114 {
2115     int o = p->addr & 7;
2116     uint64_t x = load_atomic8_or_exit(cpu, ra, p->haddr - o);
2117 
2118     x = cpu_to_be64(x);
2119     x <<= o * 8;
2120     x >>= (8 - p->size) * 8;
2121     return (ret_be << (p->size * 8)) | x;
2122 }
2123 
2124 /**
2125  * do_ld_parts_be16
2126  * @p: translation parameters
2127  * @ret_be: accumulated data
2128  *
2129  * As do_ld_bytes_beN, but with one atomic load.
2130  * 16 aligned bytes are guaranteed to cover the load.
2131  */
2132 static Int128 do_ld_whole_be16(CPUState *cpu, uintptr_t ra,
2133                                MMULookupPageData *p, uint64_t ret_be)
2134 {
2135     int o = p->addr & 15;
2136     Int128 x, y = load_atomic16_or_exit(cpu, ra, p->haddr - o);
2137     int size = p->size;
2138 
2139     if (!HOST_BIG_ENDIAN) {
2140         y = bswap128(y);
2141     }
2142     y = int128_lshift(y, o * 8);
2143     y = int128_urshift(y, (16 - size) * 8);
2144     x = int128_make64(ret_be);
2145     x = int128_lshift(x, size * 8);
2146     return int128_or(x, y);
2147 }
2148 
2149 /*
2150  * Wrapper for the above.
2151  */
2152 static uint64_t do_ld_beN(CPUState *cpu, MMULookupPageData *p,
2153                           uint64_t ret_be, int mmu_idx, MMUAccessType type,
2154                           MemOp mop, uintptr_t ra)
2155 {
2156     MemOp atom;
2157     unsigned tmp, half_size;
2158 
2159     if (unlikely(p->flags & TLB_MMIO)) {
2160         return do_ld_mmio_beN(cpu, p->full, ret_be, p->addr, p->size,
2161                               mmu_idx, type, ra);
2162     }
2163 
2164     /*
2165      * It is a given that we cross a page and therefore there is no
2166      * atomicity for the load as a whole, but subobjects may need attention.
2167      */
2168     atom = mop & MO_ATOM_MASK;
2169     switch (atom) {
2170     case MO_ATOM_SUBALIGN:
2171         return do_ld_parts_beN(p, ret_be);
2172 
2173     case MO_ATOM_IFALIGN_PAIR:
2174     case MO_ATOM_WITHIN16_PAIR:
2175         tmp = mop & MO_SIZE;
2176         tmp = tmp ? tmp - 1 : 0;
2177         half_size = 1 << tmp;
2178         if (atom == MO_ATOM_IFALIGN_PAIR
2179             ? p->size == half_size
2180             : p->size >= half_size) {
2181             if (!HAVE_al8_fast && p->size < 4) {
2182                 return do_ld_whole_be4(p, ret_be);
2183             } else {
2184                 return do_ld_whole_be8(cpu, ra, p, ret_be);
2185             }
2186         }
2187         /* fall through */
2188 
2189     case MO_ATOM_IFALIGN:
2190     case MO_ATOM_WITHIN16:
2191     case MO_ATOM_NONE:
2192         return do_ld_bytes_beN(p, ret_be);
2193 
2194     default:
2195         g_assert_not_reached();
2196     }
2197 }
2198 
2199 /*
2200  * Wrapper for the above, for 8 < size < 16.
2201  */
2202 static Int128 do_ld16_beN(CPUState *cpu, MMULookupPageData *p,
2203                           uint64_t a, int mmu_idx, MemOp mop, uintptr_t ra)
2204 {
2205     int size = p->size;
2206     uint64_t b;
2207     MemOp atom;
2208 
2209     if (unlikely(p->flags & TLB_MMIO)) {
2210         return do_ld16_mmio_beN(cpu, p->full, a, p->addr, size, mmu_idx, ra);
2211     }
2212 
2213     /*
2214      * It is a given that we cross a page and therefore there is no
2215      * atomicity for the load as a whole, but subobjects may need attention.
2216      */
2217     atom = mop & MO_ATOM_MASK;
2218     switch (atom) {
2219     case MO_ATOM_SUBALIGN:
2220         p->size = size - 8;
2221         a = do_ld_parts_beN(p, a);
2222         p->haddr += size - 8;
2223         p->size = 8;
2224         b = do_ld_parts_beN(p, 0);
2225         break;
2226 
2227     case MO_ATOM_WITHIN16_PAIR:
2228         /* Since size > 8, this is the half that must be atomic. */
2229         return do_ld_whole_be16(cpu, ra, p, a);
2230 
2231     case MO_ATOM_IFALIGN_PAIR:
2232         /*
2233          * Since size > 8, both halves are misaligned,
2234          * and so neither is atomic.
2235          */
2236     case MO_ATOM_IFALIGN:
2237     case MO_ATOM_WITHIN16:
2238     case MO_ATOM_NONE:
2239         p->size = size - 8;
2240         a = do_ld_bytes_beN(p, a);
2241         b = ldq_be_p(p->haddr + size - 8);
2242         break;
2243 
2244     default:
2245         g_assert_not_reached();
2246     }
2247 
2248     return int128_make128(b, a);
2249 }
2250 
2251 static uint8_t do_ld_1(CPUState *cpu, MMULookupPageData *p, int mmu_idx,
2252                        MMUAccessType type, uintptr_t ra)
2253 {
2254     if (unlikely(p->flags & TLB_MMIO)) {
2255         return do_ld_mmio_beN(cpu, p->full, 0, p->addr, 1, mmu_idx, type, ra);
2256     } else {
2257         return *(uint8_t *)p->haddr;
2258     }
2259 }
2260 
2261 static uint16_t do_ld_2(CPUState *cpu, MMULookupPageData *p, int mmu_idx,
2262                         MMUAccessType type, MemOp memop, uintptr_t ra)
2263 {
2264     uint16_t ret;
2265 
2266     if (unlikely(p->flags & TLB_MMIO)) {
2267         ret = do_ld_mmio_beN(cpu, p->full, 0, p->addr, 2, mmu_idx, type, ra);
2268         if ((memop & MO_BSWAP) == MO_LE) {
2269             ret = bswap16(ret);
2270         }
2271     } else {
2272         /* Perform the load host endian, then swap if necessary. */
2273         ret = load_atom_2(cpu, ra, p->haddr, memop);
2274         if (memop & MO_BSWAP) {
2275             ret = bswap16(ret);
2276         }
2277     }
2278     return ret;
2279 }
2280 
2281 static uint32_t do_ld_4(CPUState *cpu, MMULookupPageData *p, int mmu_idx,
2282                         MMUAccessType type, MemOp memop, uintptr_t ra)
2283 {
2284     uint32_t ret;
2285 
2286     if (unlikely(p->flags & TLB_MMIO)) {
2287         ret = do_ld_mmio_beN(cpu, p->full, 0, p->addr, 4, mmu_idx, type, ra);
2288         if ((memop & MO_BSWAP) == MO_LE) {
2289             ret = bswap32(ret);
2290         }
2291     } else {
2292         /* Perform the load host endian. */
2293         ret = load_atom_4(cpu, ra, p->haddr, memop);
2294         if (memop & MO_BSWAP) {
2295             ret = bswap32(ret);
2296         }
2297     }
2298     return ret;
2299 }
2300 
2301 static uint64_t do_ld_8(CPUState *cpu, MMULookupPageData *p, int mmu_idx,
2302                         MMUAccessType type, MemOp memop, uintptr_t ra)
2303 {
2304     uint64_t ret;
2305 
2306     if (unlikely(p->flags & TLB_MMIO)) {
2307         ret = do_ld_mmio_beN(cpu, p->full, 0, p->addr, 8, mmu_idx, type, ra);
2308         if ((memop & MO_BSWAP) == MO_LE) {
2309             ret = bswap64(ret);
2310         }
2311     } else {
2312         /* Perform the load host endian. */
2313         ret = load_atom_8(cpu, ra, p->haddr, memop);
2314         if (memop & MO_BSWAP) {
2315             ret = bswap64(ret);
2316         }
2317     }
2318     return ret;
2319 }
2320 
2321 static uint8_t do_ld1_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
2322                           uintptr_t ra, MMUAccessType access_type)
2323 {
2324     MMULookupLocals l;
2325     bool crosspage;
2326 
2327     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
2328     crosspage = mmu_lookup(cpu, addr, oi, ra, access_type, &l);
2329     tcg_debug_assert(!crosspage);
2330 
2331     return do_ld_1(cpu, &l.page[0], l.mmu_idx, access_type, ra);
2332 }
2333 
2334 static uint16_t do_ld2_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
2335                            uintptr_t ra, MMUAccessType access_type)
2336 {
2337     MMULookupLocals l;
2338     bool crosspage;
2339     uint16_t ret;
2340     uint8_t a, b;
2341 
2342     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
2343     crosspage = mmu_lookup(cpu, addr, oi, ra, access_type, &l);
2344     if (likely(!crosspage)) {
2345         return do_ld_2(cpu, &l.page[0], l.mmu_idx, access_type, l.memop, ra);
2346     }
2347 
2348     a = do_ld_1(cpu, &l.page[0], l.mmu_idx, access_type, ra);
2349     b = do_ld_1(cpu, &l.page[1], l.mmu_idx, access_type, ra);
2350 
2351     if ((l.memop & MO_BSWAP) == MO_LE) {
2352         ret = a | (b << 8);
2353     } else {
2354         ret = b | (a << 8);
2355     }
2356     return ret;
2357 }
2358 
2359 static uint32_t do_ld4_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
2360                            uintptr_t ra, MMUAccessType access_type)
2361 {
2362     MMULookupLocals l;
2363     bool crosspage;
2364     uint32_t ret;
2365 
2366     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
2367     crosspage = mmu_lookup(cpu, addr, oi, ra, access_type, &l);
2368     if (likely(!crosspage)) {
2369         return do_ld_4(cpu, &l.page[0], l.mmu_idx, access_type, l.memop, ra);
2370     }
2371 
2372     ret = do_ld_beN(cpu, &l.page[0], 0, l.mmu_idx, access_type, l.memop, ra);
2373     ret = do_ld_beN(cpu, &l.page[1], ret, l.mmu_idx, access_type, l.memop, ra);
2374     if ((l.memop & MO_BSWAP) == MO_LE) {
2375         ret = bswap32(ret);
2376     }
2377     return ret;
2378 }
2379 
2380 static uint64_t do_ld8_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
2381                            uintptr_t ra, MMUAccessType access_type)
2382 {
2383     MMULookupLocals l;
2384     bool crosspage;
2385     uint64_t ret;
2386 
2387     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
2388     crosspage = mmu_lookup(cpu, addr, oi, ra, access_type, &l);
2389     if (likely(!crosspage)) {
2390         return do_ld_8(cpu, &l.page[0], l.mmu_idx, access_type, l.memop, ra);
2391     }
2392 
2393     ret = do_ld_beN(cpu, &l.page[0], 0, l.mmu_idx, access_type, l.memop, ra);
2394     ret = do_ld_beN(cpu, &l.page[1], ret, l.mmu_idx, access_type, l.memop, ra);
2395     if ((l.memop & MO_BSWAP) == MO_LE) {
2396         ret = bswap64(ret);
2397     }
2398     return ret;
2399 }
2400 
2401 static Int128 do_ld16_mmu(CPUState *cpu, vaddr addr,
2402                           MemOpIdx oi, uintptr_t ra)
2403 {
2404     MMULookupLocals l;
2405     bool crosspage;
2406     uint64_t a, b;
2407     Int128 ret;
2408     int first;
2409 
2410     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
2411     crosspage = mmu_lookup(cpu, addr, oi, ra, MMU_DATA_LOAD, &l);
2412     if (likely(!crosspage)) {
2413         if (unlikely(l.page[0].flags & TLB_MMIO)) {
2414             ret = do_ld16_mmio_beN(cpu, l.page[0].full, 0, addr, 16,
2415                                    l.mmu_idx, ra);
2416             if ((l.memop & MO_BSWAP) == MO_LE) {
2417                 ret = bswap128(ret);
2418             }
2419         } else {
2420             /* Perform the load host endian. */
2421             ret = load_atom_16(cpu, ra, l.page[0].haddr, l.memop);
2422             if (l.memop & MO_BSWAP) {
2423                 ret = bswap128(ret);
2424             }
2425         }
2426         return ret;
2427     }
2428 
2429     first = l.page[0].size;
2430     if (first == 8) {
2431         MemOp mop8 = (l.memop & ~MO_SIZE) | MO_64;
2432 
2433         a = do_ld_8(cpu, &l.page[0], l.mmu_idx, MMU_DATA_LOAD, mop8, ra);
2434         b = do_ld_8(cpu, &l.page[1], l.mmu_idx, MMU_DATA_LOAD, mop8, ra);
2435         if ((mop8 & MO_BSWAP) == MO_LE) {
2436             ret = int128_make128(a, b);
2437         } else {
2438             ret = int128_make128(b, a);
2439         }
2440         return ret;
2441     }
2442 
2443     if (first < 8) {
2444         a = do_ld_beN(cpu, &l.page[0], 0, l.mmu_idx,
2445                       MMU_DATA_LOAD, l.memop, ra);
2446         ret = do_ld16_beN(cpu, &l.page[1], a, l.mmu_idx, l.memop, ra);
2447     } else {
2448         ret = do_ld16_beN(cpu, &l.page[0], 0, l.mmu_idx, l.memop, ra);
2449         b = int128_getlo(ret);
2450         ret = int128_lshift(ret, l.page[1].size * 8);
2451         a = int128_gethi(ret);
2452         b = do_ld_beN(cpu, &l.page[1], b, l.mmu_idx,
2453                       MMU_DATA_LOAD, l.memop, ra);
2454         ret = int128_make128(b, a);
2455     }
2456     if ((l.memop & MO_BSWAP) == MO_LE) {
2457         ret = bswap128(ret);
2458     }
2459     return ret;
2460 }
2461 
2462 /*
2463  * Store Helpers
2464  */
2465 
2466 /**
2467  * do_st_mmio_leN:
2468  * @cpu: generic cpu state
2469  * @full: page parameters
2470  * @val_le: data to store
2471  * @addr: virtual address
2472  * @size: number of bytes
2473  * @mmu_idx: virtual address context
2474  * @ra: return address into tcg generated code, or 0
2475  * Context: BQL held
2476  *
2477  * Store @size bytes at @addr, which is memory-mapped i/o.
2478  * The bytes to store are extracted in little-endian order from @val_le;
2479  * return the bytes of @val_le beyond @p->size that have not been stored.
2480  */
2481 static uint64_t int_st_mmio_leN(CPUState *cpu, CPUTLBEntryFull *full,
2482                                 uint64_t val_le, vaddr addr, int size,
2483                                 int mmu_idx, uintptr_t ra,
2484                                 MemoryRegion *mr, hwaddr mr_offset)
2485 {
2486     do {
2487         MemOp this_mop;
2488         unsigned this_size;
2489         MemTxResult r;
2490 
2491         /* Store aligned pieces up to 8 bytes. */
2492         this_mop = ctz32(size | (int)addr | 8);
2493         this_size = 1 << this_mop;
2494         this_mop |= MO_LE;
2495 
2496         r = memory_region_dispatch_write(mr, mr_offset, val_le,
2497                                          this_mop, full->attrs);
2498         if (unlikely(r != MEMTX_OK)) {
2499             io_failed(cpu, full, addr, this_size, MMU_DATA_STORE,
2500                       mmu_idx, r, ra);
2501         }
2502         if (this_size == 8) {
2503             return 0;
2504         }
2505 
2506         val_le >>= this_size * 8;
2507         addr += this_size;
2508         mr_offset += this_size;
2509         size -= this_size;
2510     } while (size);
2511 
2512     return val_le;
2513 }
2514 
2515 static uint64_t do_st_mmio_leN(CPUState *cpu, CPUTLBEntryFull *full,
2516                                uint64_t val_le, vaddr addr, int size,
2517                                int mmu_idx, uintptr_t ra)
2518 {
2519     MemoryRegionSection *section;
2520     hwaddr mr_offset;
2521     MemoryRegion *mr;
2522     MemTxAttrs attrs;
2523 
2524     tcg_debug_assert(size > 0 && size <= 8);
2525 
2526     attrs = full->attrs;
2527     section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra);
2528     mr = section->mr;
2529 
2530     BQL_LOCK_GUARD();
2531     return int_st_mmio_leN(cpu, full, val_le, addr, size, mmu_idx,
2532                            ra, mr, mr_offset);
2533 }
2534 
2535 static uint64_t do_st16_mmio_leN(CPUState *cpu, CPUTLBEntryFull *full,
2536                                  Int128 val_le, vaddr addr, int size,
2537                                  int mmu_idx, uintptr_t ra)
2538 {
2539     MemoryRegionSection *section;
2540     MemoryRegion *mr;
2541     hwaddr mr_offset;
2542     MemTxAttrs attrs;
2543 
2544     tcg_debug_assert(size > 8 && size <= 16);
2545 
2546     attrs = full->attrs;
2547     section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra);
2548     mr = section->mr;
2549 
2550     BQL_LOCK_GUARD();
2551     int_st_mmio_leN(cpu, full, int128_getlo(val_le), addr, 8,
2552                     mmu_idx, ra, mr, mr_offset);
2553     return int_st_mmio_leN(cpu, full, int128_gethi(val_le), addr + 8,
2554                            size - 8, mmu_idx, ra, mr, mr_offset + 8);
2555 }
2556 
2557 /*
2558  * Wrapper for the above.
2559  */
2560 static uint64_t do_st_leN(CPUState *cpu, MMULookupPageData *p,
2561                           uint64_t val_le, int mmu_idx,
2562                           MemOp mop, uintptr_t ra)
2563 {
2564     MemOp atom;
2565     unsigned tmp, half_size;
2566 
2567     if (unlikely(p->flags & TLB_MMIO)) {
2568         return do_st_mmio_leN(cpu, p->full, val_le, p->addr,
2569                               p->size, mmu_idx, ra);
2570     } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
2571         return val_le >> (p->size * 8);
2572     }
2573 
2574     /*
2575      * It is a given that we cross a page and therefore there is no atomicity
2576      * for the store as a whole, but subobjects may need attention.
2577      */
2578     atom = mop & MO_ATOM_MASK;
2579     switch (atom) {
2580     case MO_ATOM_SUBALIGN:
2581         return store_parts_leN(p->haddr, p->size, val_le);
2582 
2583     case MO_ATOM_IFALIGN_PAIR:
2584     case MO_ATOM_WITHIN16_PAIR:
2585         tmp = mop & MO_SIZE;
2586         tmp = tmp ? tmp - 1 : 0;
2587         half_size = 1 << tmp;
2588         if (atom == MO_ATOM_IFALIGN_PAIR
2589             ? p->size == half_size
2590             : p->size >= half_size) {
2591             if (!HAVE_al8_fast && p->size <= 4) {
2592                 return store_whole_le4(p->haddr, p->size, val_le);
2593             } else if (HAVE_al8) {
2594                 return store_whole_le8(p->haddr, p->size, val_le);
2595             } else {
2596                 cpu_loop_exit_atomic(cpu, ra);
2597             }
2598         }
2599         /* fall through */
2600 
2601     case MO_ATOM_IFALIGN:
2602     case MO_ATOM_WITHIN16:
2603     case MO_ATOM_NONE:
2604         return store_bytes_leN(p->haddr, p->size, val_le);
2605 
2606     default:
2607         g_assert_not_reached();
2608     }
2609 }
2610 
2611 /*
2612  * Wrapper for the above, for 8 < size < 16.
2613  */
2614 static uint64_t do_st16_leN(CPUState *cpu, MMULookupPageData *p,
2615                             Int128 val_le, int mmu_idx,
2616                             MemOp mop, uintptr_t ra)
2617 {
2618     int size = p->size;
2619     MemOp atom;
2620 
2621     if (unlikely(p->flags & TLB_MMIO)) {
2622         return do_st16_mmio_leN(cpu, p->full, val_le, p->addr,
2623                                 size, mmu_idx, ra);
2624     } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
2625         return int128_gethi(val_le) >> ((size - 8) * 8);
2626     }
2627 
2628     /*
2629      * It is a given that we cross a page and therefore there is no atomicity
2630      * for the store as a whole, but subobjects may need attention.
2631      */
2632     atom = mop & MO_ATOM_MASK;
2633     switch (atom) {
2634     case MO_ATOM_SUBALIGN:
2635         store_parts_leN(p->haddr, 8, int128_getlo(val_le));
2636         return store_parts_leN(p->haddr + 8, p->size - 8,
2637                                int128_gethi(val_le));
2638 
2639     case MO_ATOM_WITHIN16_PAIR:
2640         /* Since size > 8, this is the half that must be atomic. */
2641         if (!HAVE_CMPXCHG128) {
2642             cpu_loop_exit_atomic(cpu, ra);
2643         }
2644         return store_whole_le16(p->haddr, p->size, val_le);
2645 
2646     case MO_ATOM_IFALIGN_PAIR:
2647         /*
2648          * Since size > 8, both halves are misaligned,
2649          * and so neither is atomic.
2650          */
2651     case MO_ATOM_IFALIGN:
2652     case MO_ATOM_WITHIN16:
2653     case MO_ATOM_NONE:
2654         stq_le_p(p->haddr, int128_getlo(val_le));
2655         return store_bytes_leN(p->haddr + 8, p->size - 8,
2656                                int128_gethi(val_le));
2657 
2658     default:
2659         g_assert_not_reached();
2660     }
2661 }
2662 
2663 static void do_st_1(CPUState *cpu, MMULookupPageData *p, uint8_t val,
2664                     int mmu_idx, uintptr_t ra)
2665 {
2666     if (unlikely(p->flags & TLB_MMIO)) {
2667         do_st_mmio_leN(cpu, p->full, val, p->addr, 1, mmu_idx, ra);
2668     } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
2669         /* nothing */
2670     } else {
2671         *(uint8_t *)p->haddr = val;
2672     }
2673 }
2674 
2675 static void do_st_2(CPUState *cpu, MMULookupPageData *p, uint16_t val,
2676                     int mmu_idx, MemOp memop, uintptr_t ra)
2677 {
2678     if (unlikely(p->flags & TLB_MMIO)) {
2679         if ((memop & MO_BSWAP) != MO_LE) {
2680             val = bswap16(val);
2681         }
2682         do_st_mmio_leN(cpu, p->full, val, p->addr, 2, mmu_idx, ra);
2683     } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
2684         /* nothing */
2685     } else {
2686         /* Swap to host endian if necessary, then store. */
2687         if (memop & MO_BSWAP) {
2688             val = bswap16(val);
2689         }
2690         store_atom_2(cpu, ra, p->haddr, memop, val);
2691     }
2692 }
2693 
2694 static void do_st_4(CPUState *cpu, MMULookupPageData *p, uint32_t val,
2695                     int mmu_idx, MemOp memop, uintptr_t ra)
2696 {
2697     if (unlikely(p->flags & TLB_MMIO)) {
2698         if ((memop & MO_BSWAP) != MO_LE) {
2699             val = bswap32(val);
2700         }
2701         do_st_mmio_leN(cpu, p->full, val, p->addr, 4, mmu_idx, ra);
2702     } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
2703         /* nothing */
2704     } else {
2705         /* Swap to host endian if necessary, then store. */
2706         if (memop & MO_BSWAP) {
2707             val = bswap32(val);
2708         }
2709         store_atom_4(cpu, ra, p->haddr, memop, val);
2710     }
2711 }
2712 
2713 static void do_st_8(CPUState *cpu, MMULookupPageData *p, uint64_t val,
2714                     int mmu_idx, MemOp memop, uintptr_t ra)
2715 {
2716     if (unlikely(p->flags & TLB_MMIO)) {
2717         if ((memop & MO_BSWAP) != MO_LE) {
2718             val = bswap64(val);
2719         }
2720         do_st_mmio_leN(cpu, p->full, val, p->addr, 8, mmu_idx, ra);
2721     } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
2722         /* nothing */
2723     } else {
2724         /* Swap to host endian if necessary, then store. */
2725         if (memop & MO_BSWAP) {
2726             val = bswap64(val);
2727         }
2728         store_atom_8(cpu, ra, p->haddr, memop, val);
2729     }
2730 }
2731 
2732 static void do_st1_mmu(CPUState *cpu, vaddr addr, uint8_t val,
2733                        MemOpIdx oi, uintptr_t ra)
2734 {
2735     MMULookupLocals l;
2736     bool crosspage;
2737 
2738     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
2739     crosspage = mmu_lookup(cpu, addr, oi, ra, MMU_DATA_STORE, &l);
2740     tcg_debug_assert(!crosspage);
2741 
2742     do_st_1(cpu, &l.page[0], val, l.mmu_idx, ra);
2743 }
2744 
2745 static void do_st2_mmu(CPUState *cpu, vaddr addr, uint16_t val,
2746                        MemOpIdx oi, uintptr_t ra)
2747 {
2748     MMULookupLocals l;
2749     bool crosspage;
2750     uint8_t a, b;
2751 
2752     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
2753     crosspage = mmu_lookup(cpu, addr, oi, ra, MMU_DATA_STORE, &l);
2754     if (likely(!crosspage)) {
2755         do_st_2(cpu, &l.page[0], val, l.mmu_idx, l.memop, ra);
2756         return;
2757     }
2758 
2759     if ((l.memop & MO_BSWAP) == MO_LE) {
2760         a = val, b = val >> 8;
2761     } else {
2762         b = val, a = val >> 8;
2763     }
2764     do_st_1(cpu, &l.page[0], a, l.mmu_idx, ra);
2765     do_st_1(cpu, &l.page[1], b, l.mmu_idx, ra);
2766 }
2767 
2768 static void do_st4_mmu(CPUState *cpu, vaddr addr, uint32_t val,
2769                        MemOpIdx oi, uintptr_t ra)
2770 {
2771     MMULookupLocals l;
2772     bool crosspage;
2773 
2774     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
2775     crosspage = mmu_lookup(cpu, addr, oi, ra, MMU_DATA_STORE, &l);
2776     if (likely(!crosspage)) {
2777         do_st_4(cpu, &l.page[0], val, l.mmu_idx, l.memop, ra);
2778         return;
2779     }
2780 
2781     /* Swap to little endian for simplicity, then store by bytes. */
2782     if ((l.memop & MO_BSWAP) != MO_LE) {
2783         val = bswap32(val);
2784     }
2785     val = do_st_leN(cpu, &l.page[0], val, l.mmu_idx, l.memop, ra);
2786     (void) do_st_leN(cpu, &l.page[1], val, l.mmu_idx, l.memop, ra);
2787 }
2788 
2789 static void do_st8_mmu(CPUState *cpu, vaddr addr, uint64_t val,
2790                        MemOpIdx oi, uintptr_t ra)
2791 {
2792     MMULookupLocals l;
2793     bool crosspage;
2794 
2795     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
2796     crosspage = mmu_lookup(cpu, addr, oi, ra, MMU_DATA_STORE, &l);
2797     if (likely(!crosspage)) {
2798         do_st_8(cpu, &l.page[0], val, l.mmu_idx, l.memop, ra);
2799         return;
2800     }
2801 
2802     /* Swap to little endian for simplicity, then store by bytes. */
2803     if ((l.memop & MO_BSWAP) != MO_LE) {
2804         val = bswap64(val);
2805     }
2806     val = do_st_leN(cpu, &l.page[0], val, l.mmu_idx, l.memop, ra);
2807     (void) do_st_leN(cpu, &l.page[1], val, l.mmu_idx, l.memop, ra);
2808 }
2809 
2810 static void do_st16_mmu(CPUState *cpu, vaddr addr, Int128 val,
2811                         MemOpIdx oi, uintptr_t ra)
2812 {
2813     MMULookupLocals l;
2814     bool crosspage;
2815     uint64_t a, b;
2816     int first;
2817 
2818     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
2819     crosspage = mmu_lookup(cpu, addr, oi, ra, MMU_DATA_STORE, &l);
2820     if (likely(!crosspage)) {
2821         if (unlikely(l.page[0].flags & TLB_MMIO)) {
2822             if ((l.memop & MO_BSWAP) != MO_LE) {
2823                 val = bswap128(val);
2824             }
2825             do_st16_mmio_leN(cpu, l.page[0].full, val, addr, 16, l.mmu_idx, ra);
2826         } else if (unlikely(l.page[0].flags & TLB_DISCARD_WRITE)) {
2827             /* nothing */
2828         } else {
2829             /* Swap to host endian if necessary, then store. */
2830             if (l.memop & MO_BSWAP) {
2831                 val = bswap128(val);
2832             }
2833             store_atom_16(cpu, ra, l.page[0].haddr, l.memop, val);
2834         }
2835         return;
2836     }
2837 
2838     first = l.page[0].size;
2839     if (first == 8) {
2840         MemOp mop8 = (l.memop & ~(MO_SIZE | MO_BSWAP)) | MO_64;
2841 
2842         if (l.memop & MO_BSWAP) {
2843             val = bswap128(val);
2844         }
2845         if (HOST_BIG_ENDIAN) {
2846             b = int128_getlo(val), a = int128_gethi(val);
2847         } else {
2848             a = int128_getlo(val), b = int128_gethi(val);
2849         }
2850         do_st_8(cpu, &l.page[0], a, l.mmu_idx, mop8, ra);
2851         do_st_8(cpu, &l.page[1], b, l.mmu_idx, mop8, ra);
2852         return;
2853     }
2854 
2855     if ((l.memop & MO_BSWAP) != MO_LE) {
2856         val = bswap128(val);
2857     }
2858     if (first < 8) {
2859         do_st_leN(cpu, &l.page[0], int128_getlo(val), l.mmu_idx, l.memop, ra);
2860         val = int128_urshift(val, first * 8);
2861         do_st16_leN(cpu, &l.page[1], val, l.mmu_idx, l.memop, ra);
2862     } else {
2863         b = do_st16_leN(cpu, &l.page[0], val, l.mmu_idx, l.memop, ra);
2864         do_st_leN(cpu, &l.page[1], b, l.mmu_idx, l.memop, ra);
2865     }
2866 }
2867 
2868 #include "ldst_common.c.inc"
2869 
2870 /*
2871  * First set of functions passes in OI and RETADDR.
2872  * This makes them callable from other helpers.
2873  */
2874 
2875 #define ATOMIC_NAME(X) \
2876     glue(glue(glue(cpu_atomic_ ## X, SUFFIX), END), _mmu)
2877 
2878 #define ATOMIC_MMU_CLEANUP
2879 
2880 #include "atomic_common.c.inc"
2881 
2882 #define DATA_SIZE 1
2883 #include "atomic_template.h"
2884 
2885 #define DATA_SIZE 2
2886 #include "atomic_template.h"
2887 
2888 #define DATA_SIZE 4
2889 #include "atomic_template.h"
2890 
2891 #ifdef CONFIG_ATOMIC64
2892 #define DATA_SIZE 8
2893 #include "atomic_template.h"
2894 #endif
2895 
2896 #if defined(CONFIG_ATOMIC128) || HAVE_CMPXCHG128
2897 #define DATA_SIZE 16
2898 #include "atomic_template.h"
2899 #endif
2900 
2901 /* Code access functions.  */
2902 
2903 uint8_t cpu_ldb_code_mmu(CPUArchState *env, vaddr addr,
2904                          MemOpIdx oi, uintptr_t retaddr)
2905 {
2906     return do_ld1_mmu(env_cpu(env), addr, oi, retaddr, MMU_INST_FETCH);
2907 }
2908 
2909 uint16_t cpu_ldw_code_mmu(CPUArchState *env, vaddr addr,
2910                           MemOpIdx oi, uintptr_t retaddr)
2911 {
2912     return do_ld2_mmu(env_cpu(env), addr, oi, retaddr, MMU_INST_FETCH);
2913 }
2914 
2915 uint32_t cpu_ldl_code_mmu(CPUArchState *env, vaddr addr,
2916                           MemOpIdx oi, uintptr_t retaddr)
2917 {
2918     return do_ld4_mmu(env_cpu(env), addr, oi, retaddr, MMU_INST_FETCH);
2919 }
2920 
2921 uint64_t cpu_ldq_code_mmu(CPUArchState *env, vaddr addr,
2922                           MemOpIdx oi, uintptr_t retaddr)
2923 {
2924     return do_ld8_mmu(env_cpu(env), addr, oi, retaddr, MMU_INST_FETCH);
2925 }
2926