xref: /qemu/accel/tcg/cputlb.c (revision 6c1ae457a17a9462fb89ef1f30ad7da5266bfea6)
1 /*
2  *  Common CPU TLB handling
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "qemu/main-loop.h"
22 #include "accel/tcg/cpu-ops.h"
23 #include "exec/exec-all.h"
24 #include "exec/page-protection.h"
25 #include "system/memory.h"
26 #include "exec/cpu_ldst.h"
27 #include "exec/cputlb.h"
28 #include "exec/tb-flush.h"
29 #include "system/ram_addr.h"
30 #include "exec/mmu-access-type.h"
31 #include "exec/tlb-common.h"
32 #include "exec/vaddr.h"
33 #include "tcg/tcg.h"
34 #include "qemu/error-report.h"
35 #include "exec/log.h"
36 #include "exec/helper-proto-common.h"
37 #include "qemu/atomic.h"
38 #include "qemu/atomic128.h"
39 #include "tb-internal.h"
40 #include "trace.h"
41 #include "tb-hash.h"
42 #include "tb-internal.h"
43 #include "internal-common.h"
44 #include "internal-target.h"
45 #ifdef CONFIG_PLUGIN
46 #include "qemu/plugin-memory.h"
47 #endif
48 #include "tcg/tcg-ldst.h"
49 
50 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
51 /* #define DEBUG_TLB */
52 /* #define DEBUG_TLB_LOG */
53 
54 #ifdef DEBUG_TLB
55 # define DEBUG_TLB_GATE 1
56 # ifdef DEBUG_TLB_LOG
57 #  define DEBUG_TLB_LOG_GATE 1
58 # else
59 #  define DEBUG_TLB_LOG_GATE 0
60 # endif
61 #else
62 # define DEBUG_TLB_GATE 0
63 # define DEBUG_TLB_LOG_GATE 0
64 #endif
65 
66 #define tlb_debug(fmt, ...) do { \
67     if (DEBUG_TLB_LOG_GATE) { \
68         qemu_log_mask(CPU_LOG_MMU, "%s: " fmt, __func__, \
69                       ## __VA_ARGS__); \
70     } else if (DEBUG_TLB_GATE) { \
71         fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \
72     } \
73 } while (0)
74 
75 #define assert_cpu_is_self(cpu) do {                              \
76         if (DEBUG_TLB_GATE) {                                     \
77             g_assert(!(cpu)->created || qemu_cpu_is_self(cpu));   \
78         }                                                         \
79     } while (0)
80 
81 /* run_on_cpu_data.target_ptr should always be big enough for a
82  * vaddr even on 32 bit builds
83  */
84 QEMU_BUILD_BUG_ON(sizeof(vaddr) > sizeof(run_on_cpu_data));
85 
86 /* We currently can't handle more than 16 bits in the MMUIDX bitmask.
87  */
88 QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
89 #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
90 
91 static inline size_t tlb_n_entries(CPUTLBDescFast *fast)
92 {
93     return (fast->mask >> CPU_TLB_ENTRY_BITS) + 1;
94 }
95 
96 static inline size_t sizeof_tlb(CPUTLBDescFast *fast)
97 {
98     return fast->mask + (1 << CPU_TLB_ENTRY_BITS);
99 }
100 
101 static inline uint64_t tlb_read_idx(const CPUTLBEntry *entry,
102                                     MMUAccessType access_type)
103 {
104     /* Do not rearrange the CPUTLBEntry structure members. */
105     QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_read) !=
106                       MMU_DATA_LOAD * sizeof(uintptr_t));
107     QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_write) !=
108                       MMU_DATA_STORE * sizeof(uintptr_t));
109     QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_code) !=
110                       MMU_INST_FETCH * sizeof(uintptr_t));
111 
112     const uintptr_t *ptr = &entry->addr_idx[access_type];
113     /* ofs might correspond to .addr_write, so use qatomic_read */
114     return qatomic_read(ptr);
115 }
116 
117 static inline uint64_t tlb_addr_write(const CPUTLBEntry *entry)
118 {
119     return tlb_read_idx(entry, MMU_DATA_STORE);
120 }
121 
122 /* Find the TLB index corresponding to the mmu_idx + address pair.  */
123 static inline uintptr_t tlb_index(CPUState *cpu, uintptr_t mmu_idx,
124                                   vaddr addr)
125 {
126     uintptr_t size_mask = cpu->neg.tlb.f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS;
127 
128     return (addr >> TARGET_PAGE_BITS) & size_mask;
129 }
130 
131 /* Find the TLB entry corresponding to the mmu_idx + address pair.  */
132 static inline CPUTLBEntry *tlb_entry(CPUState *cpu, uintptr_t mmu_idx,
133                                      vaddr addr)
134 {
135     return &cpu->neg.tlb.f[mmu_idx].table[tlb_index(cpu, mmu_idx, addr)];
136 }
137 
138 static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
139                              size_t max_entries)
140 {
141     desc->window_begin_ns = ns;
142     desc->window_max_entries = max_entries;
143 }
144 
145 static void tb_jmp_cache_clear_page(CPUState *cpu, vaddr page_addr)
146 {
147     CPUJumpCache *jc = cpu->tb_jmp_cache;
148     int i, i0;
149 
150     if (unlikely(!jc)) {
151         return;
152     }
153 
154     i0 = tb_jmp_cache_hash_page(page_addr);
155     for (i = 0; i < TB_JMP_PAGE_SIZE; i++) {
156         qatomic_set(&jc->array[i0 + i].tb, NULL);
157     }
158 }
159 
160 /**
161  * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary
162  * @desc: The CPUTLBDesc portion of the TLB
163  * @fast: The CPUTLBDescFast portion of the same TLB
164  *
165  * Called with tlb_lock_held.
166  *
167  * We have two main constraints when resizing a TLB: (1) we only resize it
168  * on a TLB flush (otherwise we'd have to take a perf hit by either rehashing
169  * the array or unnecessarily flushing it), which means we do not control how
170  * frequently the resizing can occur; (2) we don't have access to the guest's
171  * future scheduling decisions, and therefore have to decide the magnitude of
172  * the resize based on past observations.
173  *
174  * In general, a memory-hungry process can benefit greatly from an appropriately
175  * sized TLB, since a guest TLB miss is very expensive. This doesn't mean that
176  * we just have to make the TLB as large as possible; while an oversized TLB
177  * results in minimal TLB miss rates, it also takes longer to be flushed
178  * (flushes can be _very_ frequent), and the reduced locality can also hurt
179  * performance.
180  *
181  * To achieve near-optimal performance for all kinds of workloads, we:
182  *
183  * 1. Aggressively increase the size of the TLB when the use rate of the
184  * TLB being flushed is high, since it is likely that in the near future this
185  * memory-hungry process will execute again, and its memory hungriness will
186  * probably be similar.
187  *
188  * 2. Slowly reduce the size of the TLB as the use rate declines over a
189  * reasonably large time window. The rationale is that if in such a time window
190  * we have not observed a high TLB use rate, it is likely that we won't observe
191  * it in the near future. In that case, once a time window expires we downsize
192  * the TLB to match the maximum use rate observed in the window.
193  *
194  * 3. Try to keep the maximum use rate in a time window in the 30-70% range,
195  * since in that range performance is likely near-optimal. Recall that the TLB
196  * is direct mapped, so we want the use rate to be low (or at least not too
197  * high), since otherwise we are likely to have a significant amount of
198  * conflict misses.
199  */
200 static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast,
201                                   int64_t now)
202 {
203     size_t old_size = tlb_n_entries(fast);
204     size_t rate;
205     size_t new_size = old_size;
206     int64_t window_len_ms = 100;
207     int64_t window_len_ns = window_len_ms * 1000 * 1000;
208     bool window_expired = now > desc->window_begin_ns + window_len_ns;
209 
210     if (desc->n_used_entries > desc->window_max_entries) {
211         desc->window_max_entries = desc->n_used_entries;
212     }
213     rate = desc->window_max_entries * 100 / old_size;
214 
215     if (rate > 70) {
216         new_size = MIN(old_size << 1, 1 << CPU_TLB_DYN_MAX_BITS);
217     } else if (rate < 30 && window_expired) {
218         size_t ceil = pow2ceil(desc->window_max_entries);
219         size_t expected_rate = desc->window_max_entries * 100 / ceil;
220 
221         /*
222          * Avoid undersizing when the max number of entries seen is just below
223          * a pow2. For instance, if max_entries == 1025, the expected use rate
224          * would be 1025/2048==50%. However, if max_entries == 1023, we'd get
225          * 1023/1024==99.9% use rate, so we'd likely end up doubling the size
226          * later. Thus, make sure that the expected use rate remains below 70%.
227          * (and since we double the size, that means the lowest rate we'd
228          * expect to get is 35%, which is still in the 30-70% range where
229          * we consider that the size is appropriate.)
230          */
231         if (expected_rate > 70) {
232             ceil *= 2;
233         }
234         new_size = MAX(ceil, 1 << CPU_TLB_DYN_MIN_BITS);
235     }
236 
237     if (new_size == old_size) {
238         if (window_expired) {
239             tlb_window_reset(desc, now, desc->n_used_entries);
240         }
241         return;
242     }
243 
244     g_free(fast->table);
245     g_free(desc->fulltlb);
246 
247     tlb_window_reset(desc, now, 0);
248     /* desc->n_used_entries is cleared by the caller */
249     fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
250     fast->table = g_try_new(CPUTLBEntry, new_size);
251     desc->fulltlb = g_try_new(CPUTLBEntryFull, new_size);
252 
253     /*
254      * If the allocations fail, try smaller sizes. We just freed some
255      * memory, so going back to half of new_size has a good chance of working.
256      * Increased memory pressure elsewhere in the system might cause the
257      * allocations to fail though, so we progressively reduce the allocation
258      * size, aborting if we cannot even allocate the smallest TLB we support.
259      */
260     while (fast->table == NULL || desc->fulltlb == NULL) {
261         if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) {
262             error_report("%s: %s", __func__, strerror(errno));
263             abort();
264         }
265         new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS);
266         fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
267 
268         g_free(fast->table);
269         g_free(desc->fulltlb);
270         fast->table = g_try_new(CPUTLBEntry, new_size);
271         desc->fulltlb = g_try_new(CPUTLBEntryFull, new_size);
272     }
273 }
274 
275 static void tlb_mmu_flush_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
276 {
277     desc->n_used_entries = 0;
278     desc->large_page_addr = -1;
279     desc->large_page_mask = -1;
280     desc->vindex = 0;
281     memset(fast->table, -1, sizeof_tlb(fast));
282     memset(desc->vtable, -1, sizeof(desc->vtable));
283 }
284 
285 static void tlb_flush_one_mmuidx_locked(CPUState *cpu, int mmu_idx,
286                                         int64_t now)
287 {
288     CPUTLBDesc *desc = &cpu->neg.tlb.d[mmu_idx];
289     CPUTLBDescFast *fast = &cpu->neg.tlb.f[mmu_idx];
290 
291     tlb_mmu_resize_locked(desc, fast, now);
292     tlb_mmu_flush_locked(desc, fast);
293 }
294 
295 static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now)
296 {
297     size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS;
298 
299     tlb_window_reset(desc, now, 0);
300     desc->n_used_entries = 0;
301     fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
302     fast->table = g_new(CPUTLBEntry, n_entries);
303     desc->fulltlb = g_new(CPUTLBEntryFull, n_entries);
304     tlb_mmu_flush_locked(desc, fast);
305 }
306 
307 static inline void tlb_n_used_entries_inc(CPUState *cpu, uintptr_t mmu_idx)
308 {
309     cpu->neg.tlb.d[mmu_idx].n_used_entries++;
310 }
311 
312 static inline void tlb_n_used_entries_dec(CPUState *cpu, uintptr_t mmu_idx)
313 {
314     cpu->neg.tlb.d[mmu_idx].n_used_entries--;
315 }
316 
317 void tlb_init(CPUState *cpu)
318 {
319     int64_t now = get_clock_realtime();
320     int i;
321 
322     qemu_spin_init(&cpu->neg.tlb.c.lock);
323 
324     /* All tlbs are initialized flushed. */
325     cpu->neg.tlb.c.dirty = 0;
326 
327     for (i = 0; i < NB_MMU_MODES; i++) {
328         tlb_mmu_init(&cpu->neg.tlb.d[i], &cpu->neg.tlb.f[i], now);
329     }
330 }
331 
332 void tlb_destroy(CPUState *cpu)
333 {
334     int i;
335 
336     qemu_spin_destroy(&cpu->neg.tlb.c.lock);
337     for (i = 0; i < NB_MMU_MODES; i++) {
338         CPUTLBDesc *desc = &cpu->neg.tlb.d[i];
339         CPUTLBDescFast *fast = &cpu->neg.tlb.f[i];
340 
341         g_free(fast->table);
342         g_free(desc->fulltlb);
343     }
344 }
345 
346 /* flush_all_helper: run fn across all cpus
347  *
348  * If the wait flag is set then the src cpu's helper will be queued as
349  * "safe" work and the loop exited creating a synchronisation point
350  * where all queued work will be finished before execution starts
351  * again.
352  */
353 static void flush_all_helper(CPUState *src, run_on_cpu_func fn,
354                              run_on_cpu_data d)
355 {
356     CPUState *cpu;
357 
358     CPU_FOREACH(cpu) {
359         if (cpu != src) {
360             async_run_on_cpu(cpu, fn, d);
361         }
362     }
363 }
364 
365 static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
366 {
367     uint16_t asked = data.host_int;
368     uint16_t all_dirty, work, to_clean;
369     int64_t now = get_clock_realtime();
370 
371     assert_cpu_is_self(cpu);
372 
373     tlb_debug("mmu_idx:0x%04" PRIx16 "\n", asked);
374 
375     qemu_spin_lock(&cpu->neg.tlb.c.lock);
376 
377     all_dirty = cpu->neg.tlb.c.dirty;
378     to_clean = asked & all_dirty;
379     all_dirty &= ~to_clean;
380     cpu->neg.tlb.c.dirty = all_dirty;
381 
382     for (work = to_clean; work != 0; work &= work - 1) {
383         int mmu_idx = ctz32(work);
384         tlb_flush_one_mmuidx_locked(cpu, mmu_idx, now);
385     }
386 
387     qemu_spin_unlock(&cpu->neg.tlb.c.lock);
388 
389     tcg_flush_jmp_cache(cpu);
390 
391     if (to_clean == ALL_MMUIDX_BITS) {
392         qatomic_set(&cpu->neg.tlb.c.full_flush_count,
393                     cpu->neg.tlb.c.full_flush_count + 1);
394     } else {
395         qatomic_set(&cpu->neg.tlb.c.part_flush_count,
396                     cpu->neg.tlb.c.part_flush_count + ctpop16(to_clean));
397         if (to_clean != asked) {
398             qatomic_set(&cpu->neg.tlb.c.elide_flush_count,
399                         cpu->neg.tlb.c.elide_flush_count +
400                         ctpop16(asked & ~to_clean));
401         }
402     }
403 }
404 
405 void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
406 {
407     tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap);
408 
409     assert_cpu_is_self(cpu);
410 
411     tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(idxmap));
412 }
413 
414 void tlb_flush(CPUState *cpu)
415 {
416     tlb_flush_by_mmuidx(cpu, ALL_MMUIDX_BITS);
417 }
418 
419 void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu, uint16_t idxmap)
420 {
421     const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
422 
423     tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
424 
425     flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
426     async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
427 }
428 
429 void tlb_flush_all_cpus_synced(CPUState *src_cpu)
430 {
431     tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, ALL_MMUIDX_BITS);
432 }
433 
434 static bool tlb_hit_page_mask_anyprot(CPUTLBEntry *tlb_entry,
435                                       vaddr page, vaddr mask)
436 {
437     page &= mask;
438     mask &= TARGET_PAGE_MASK | TLB_INVALID_MASK;
439 
440     return (page == (tlb_entry->addr_read & mask) ||
441             page == (tlb_addr_write(tlb_entry) & mask) ||
442             page == (tlb_entry->addr_code & mask));
443 }
444 
445 static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry, vaddr page)
446 {
447     return tlb_hit_page_mask_anyprot(tlb_entry, page, -1);
448 }
449 
450 /**
451  * tlb_entry_is_empty - return true if the entry is not in use
452  * @te: pointer to CPUTLBEntry
453  */
454 static inline bool tlb_entry_is_empty(const CPUTLBEntry *te)
455 {
456     return te->addr_read == -1 && te->addr_write == -1 && te->addr_code == -1;
457 }
458 
459 /* Called with tlb_c.lock held */
460 static bool tlb_flush_entry_mask_locked(CPUTLBEntry *tlb_entry,
461                                         vaddr page,
462                                         vaddr mask)
463 {
464     if (tlb_hit_page_mask_anyprot(tlb_entry, page, mask)) {
465         memset(tlb_entry, -1, sizeof(*tlb_entry));
466         return true;
467     }
468     return false;
469 }
470 
471 static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry, vaddr page)
472 {
473     return tlb_flush_entry_mask_locked(tlb_entry, page, -1);
474 }
475 
476 /* Called with tlb_c.lock held */
477 static void tlb_flush_vtlb_page_mask_locked(CPUState *cpu, int mmu_idx,
478                                             vaddr page,
479                                             vaddr mask)
480 {
481     CPUTLBDesc *d = &cpu->neg.tlb.d[mmu_idx];
482     int k;
483 
484     assert_cpu_is_self(cpu);
485     for (k = 0; k < CPU_VTLB_SIZE; k++) {
486         if (tlb_flush_entry_mask_locked(&d->vtable[k], page, mask)) {
487             tlb_n_used_entries_dec(cpu, mmu_idx);
488         }
489     }
490 }
491 
492 static inline void tlb_flush_vtlb_page_locked(CPUState *cpu, int mmu_idx,
493                                               vaddr page)
494 {
495     tlb_flush_vtlb_page_mask_locked(cpu, mmu_idx, page, -1);
496 }
497 
498 static void tlb_flush_page_locked(CPUState *cpu, int midx, vaddr page)
499 {
500     vaddr lp_addr = cpu->neg.tlb.d[midx].large_page_addr;
501     vaddr lp_mask = cpu->neg.tlb.d[midx].large_page_mask;
502 
503     /* Check if we need to flush due to large pages.  */
504     if ((page & lp_mask) == lp_addr) {
505         tlb_debug("forcing full flush midx %d (%016"
506                   VADDR_PRIx "/%016" VADDR_PRIx ")\n",
507                   midx, lp_addr, lp_mask);
508         tlb_flush_one_mmuidx_locked(cpu, midx, get_clock_realtime());
509     } else {
510         if (tlb_flush_entry_locked(tlb_entry(cpu, midx, page), page)) {
511             tlb_n_used_entries_dec(cpu, midx);
512         }
513         tlb_flush_vtlb_page_locked(cpu, midx, page);
514     }
515 }
516 
517 /**
518  * tlb_flush_page_by_mmuidx_async_0:
519  * @cpu: cpu on which to flush
520  * @addr: page of virtual address to flush
521  * @idxmap: set of mmu_idx to flush
522  *
523  * Helper for tlb_flush_page_by_mmuidx and friends, flush one page
524  * at @addr from the tlbs indicated by @idxmap from @cpu.
525  */
526 static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu,
527                                              vaddr addr,
528                                              uint16_t idxmap)
529 {
530     int mmu_idx;
531 
532     assert_cpu_is_self(cpu);
533 
534     tlb_debug("page addr: %016" VADDR_PRIx " mmu_map:0x%x\n", addr, idxmap);
535 
536     qemu_spin_lock(&cpu->neg.tlb.c.lock);
537     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
538         if ((idxmap >> mmu_idx) & 1) {
539             tlb_flush_page_locked(cpu, mmu_idx, addr);
540         }
541     }
542     qemu_spin_unlock(&cpu->neg.tlb.c.lock);
543 
544     /*
545      * Discard jump cache entries for any tb which might potentially
546      * overlap the flushed page, which includes the previous.
547      */
548     tb_jmp_cache_clear_page(cpu, addr - TARGET_PAGE_SIZE);
549     tb_jmp_cache_clear_page(cpu, addr);
550 }
551 
552 /**
553  * tlb_flush_page_by_mmuidx_async_1:
554  * @cpu: cpu on which to flush
555  * @data: encoded addr + idxmap
556  *
557  * Helper for tlb_flush_page_by_mmuidx and friends, called through
558  * async_run_on_cpu.  The idxmap parameter is encoded in the page
559  * offset of the target_ptr field.  This limits the set of mmu_idx
560  * that can be passed via this method.
561  */
562 static void tlb_flush_page_by_mmuidx_async_1(CPUState *cpu,
563                                              run_on_cpu_data data)
564 {
565     vaddr addr_and_idxmap = data.target_ptr;
566     vaddr addr = addr_and_idxmap & TARGET_PAGE_MASK;
567     uint16_t idxmap = addr_and_idxmap & ~TARGET_PAGE_MASK;
568 
569     tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
570 }
571 
572 typedef struct {
573     vaddr addr;
574     uint16_t idxmap;
575 } TLBFlushPageByMMUIdxData;
576 
577 /**
578  * tlb_flush_page_by_mmuidx_async_2:
579  * @cpu: cpu on which to flush
580  * @data: allocated addr + idxmap
581  *
582  * Helper for tlb_flush_page_by_mmuidx and friends, called through
583  * async_run_on_cpu.  The addr+idxmap parameters are stored in a
584  * TLBFlushPageByMMUIdxData structure that has been allocated
585  * specifically for this helper.  Free the structure when done.
586  */
587 static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu,
588                                              run_on_cpu_data data)
589 {
590     TLBFlushPageByMMUIdxData *d = data.host_ptr;
591 
592     tlb_flush_page_by_mmuidx_async_0(cpu, d->addr, d->idxmap);
593     g_free(d);
594 }
595 
596 void tlb_flush_page_by_mmuidx(CPUState *cpu, vaddr addr, uint16_t idxmap)
597 {
598     tlb_debug("addr: %016" VADDR_PRIx " mmu_idx:%" PRIx16 "\n", addr, idxmap);
599 
600     assert_cpu_is_self(cpu);
601 
602     /* This should already be page aligned */
603     addr &= TARGET_PAGE_MASK;
604 
605     tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
606 }
607 
608 void tlb_flush_page(CPUState *cpu, vaddr addr)
609 {
610     tlb_flush_page_by_mmuidx(cpu, addr, ALL_MMUIDX_BITS);
611 }
612 
613 void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
614                                               vaddr addr,
615                                               uint16_t idxmap)
616 {
617     tlb_debug("addr: %016" VADDR_PRIx " mmu_idx:%"PRIx16"\n", addr, idxmap);
618 
619     /* This should already be page aligned */
620     addr &= TARGET_PAGE_MASK;
621 
622     /*
623      * Allocate memory to hold addr+idxmap only when needed.
624      * See tlb_flush_page_by_mmuidx for details.
625      */
626     if (idxmap < TARGET_PAGE_SIZE) {
627         flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1,
628                          RUN_ON_CPU_TARGET_PTR(addr | idxmap));
629         async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_1,
630                               RUN_ON_CPU_TARGET_PTR(addr | idxmap));
631     } else {
632         CPUState *dst_cpu;
633         TLBFlushPageByMMUIdxData *d;
634 
635         /* Allocate a separate data block for each destination cpu.  */
636         CPU_FOREACH(dst_cpu) {
637             if (dst_cpu != src_cpu) {
638                 d = g_new(TLBFlushPageByMMUIdxData, 1);
639                 d->addr = addr;
640                 d->idxmap = idxmap;
641                 async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2,
642                                  RUN_ON_CPU_HOST_PTR(d));
643             }
644         }
645 
646         d = g_new(TLBFlushPageByMMUIdxData, 1);
647         d->addr = addr;
648         d->idxmap = idxmap;
649         async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_2,
650                               RUN_ON_CPU_HOST_PTR(d));
651     }
652 }
653 
654 void tlb_flush_page_all_cpus_synced(CPUState *src, vaddr addr)
655 {
656     tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS);
657 }
658 
659 static void tlb_flush_range_locked(CPUState *cpu, int midx,
660                                    vaddr addr, vaddr len,
661                                    unsigned bits)
662 {
663     CPUTLBDesc *d = &cpu->neg.tlb.d[midx];
664     CPUTLBDescFast *f = &cpu->neg.tlb.f[midx];
665     vaddr mask = MAKE_64BIT_MASK(0, bits);
666 
667     /*
668      * If @bits is smaller than the tlb size, there may be multiple entries
669      * within the TLB; otherwise all addresses that match under @mask hit
670      * the same TLB entry.
671      * TODO: Perhaps allow bits to be a few bits less than the size.
672      * For now, just flush the entire TLB.
673      *
674      * If @len is larger than the tlb size, then it will take longer to
675      * test all of the entries in the TLB than it will to flush it all.
676      */
677     if (mask < f->mask || len > f->mask) {
678         tlb_debug("forcing full flush midx %d ("
679                   "%016" VADDR_PRIx "/%016" VADDR_PRIx "+%016" VADDR_PRIx ")\n",
680                   midx, addr, mask, len);
681         tlb_flush_one_mmuidx_locked(cpu, midx, get_clock_realtime());
682         return;
683     }
684 
685     /*
686      * Check if we need to flush due to large pages.
687      * Because large_page_mask contains all 1's from the msb,
688      * we only need to test the end of the range.
689      */
690     if (((addr + len - 1) & d->large_page_mask) == d->large_page_addr) {
691         tlb_debug("forcing full flush midx %d ("
692                   "%016" VADDR_PRIx "/%016" VADDR_PRIx ")\n",
693                   midx, d->large_page_addr, d->large_page_mask);
694         tlb_flush_one_mmuidx_locked(cpu, midx, get_clock_realtime());
695         return;
696     }
697 
698     for (vaddr i = 0; i < len; i += TARGET_PAGE_SIZE) {
699         vaddr page = addr + i;
700         CPUTLBEntry *entry = tlb_entry(cpu, midx, page);
701 
702         if (tlb_flush_entry_mask_locked(entry, page, mask)) {
703             tlb_n_used_entries_dec(cpu, midx);
704         }
705         tlb_flush_vtlb_page_mask_locked(cpu, midx, page, mask);
706     }
707 }
708 
709 typedef struct {
710     vaddr addr;
711     vaddr len;
712     uint16_t idxmap;
713     uint16_t bits;
714 } TLBFlushRangeData;
715 
716 static void tlb_flush_range_by_mmuidx_async_0(CPUState *cpu,
717                                               TLBFlushRangeData d)
718 {
719     int mmu_idx;
720 
721     assert_cpu_is_self(cpu);
722 
723     tlb_debug("range: %016" VADDR_PRIx "/%u+%016" VADDR_PRIx " mmu_map:0x%x\n",
724               d.addr, d.bits, d.len, d.idxmap);
725 
726     qemu_spin_lock(&cpu->neg.tlb.c.lock);
727     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
728         if ((d.idxmap >> mmu_idx) & 1) {
729             tlb_flush_range_locked(cpu, mmu_idx, d.addr, d.len, d.bits);
730         }
731     }
732     qemu_spin_unlock(&cpu->neg.tlb.c.lock);
733 
734     /*
735      * If the length is larger than the jump cache size, then it will take
736      * longer to clear each entry individually than it will to clear it all.
737      */
738     if (d.len >= (TARGET_PAGE_SIZE * TB_JMP_CACHE_SIZE)) {
739         tcg_flush_jmp_cache(cpu);
740         return;
741     }
742 
743     /*
744      * Discard jump cache entries for any tb which might potentially
745      * overlap the flushed pages, which includes the previous.
746      */
747     d.addr -= TARGET_PAGE_SIZE;
748     for (vaddr i = 0, n = d.len / TARGET_PAGE_SIZE + 1; i < n; i++) {
749         tb_jmp_cache_clear_page(cpu, d.addr);
750         d.addr += TARGET_PAGE_SIZE;
751     }
752 }
753 
754 static void tlb_flush_range_by_mmuidx_async_1(CPUState *cpu,
755                                               run_on_cpu_data data)
756 {
757     TLBFlushRangeData *d = data.host_ptr;
758     tlb_flush_range_by_mmuidx_async_0(cpu, *d);
759     g_free(d);
760 }
761 
762 void tlb_flush_range_by_mmuidx(CPUState *cpu, vaddr addr,
763                                vaddr len, uint16_t idxmap,
764                                unsigned bits)
765 {
766     TLBFlushRangeData d;
767 
768     assert_cpu_is_self(cpu);
769 
770     /*
771      * If all bits are significant, and len is small,
772      * this devolves to tlb_flush_page.
773      */
774     if (bits >= TARGET_LONG_BITS && len <= TARGET_PAGE_SIZE) {
775         tlb_flush_page_by_mmuidx(cpu, addr, idxmap);
776         return;
777     }
778     /* If no page bits are significant, this devolves to tlb_flush. */
779     if (bits < TARGET_PAGE_BITS) {
780         tlb_flush_by_mmuidx(cpu, idxmap);
781         return;
782     }
783 
784     /* This should already be page aligned */
785     d.addr = addr & TARGET_PAGE_MASK;
786     d.len = len;
787     d.idxmap = idxmap;
788     d.bits = bits;
789 
790     tlb_flush_range_by_mmuidx_async_0(cpu, d);
791 }
792 
793 void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, vaddr addr,
794                                    uint16_t idxmap, unsigned bits)
795 {
796     tlb_flush_range_by_mmuidx(cpu, addr, TARGET_PAGE_SIZE, idxmap, bits);
797 }
798 
799 void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
800                                                vaddr addr,
801                                                vaddr len,
802                                                uint16_t idxmap,
803                                                unsigned bits)
804 {
805     TLBFlushRangeData d, *p;
806     CPUState *dst_cpu;
807 
808     /*
809      * If all bits are significant, and len is small,
810      * this devolves to tlb_flush_page.
811      */
812     if (bits >= TARGET_LONG_BITS && len <= TARGET_PAGE_SIZE) {
813         tlb_flush_page_by_mmuidx_all_cpus_synced(src_cpu, addr, idxmap);
814         return;
815     }
816     /* If no page bits are significant, this devolves to tlb_flush. */
817     if (bits < TARGET_PAGE_BITS) {
818         tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, idxmap);
819         return;
820     }
821 
822     /* This should already be page aligned */
823     d.addr = addr & TARGET_PAGE_MASK;
824     d.len = len;
825     d.idxmap = idxmap;
826     d.bits = bits;
827 
828     /* Allocate a separate data block for each destination cpu.  */
829     CPU_FOREACH(dst_cpu) {
830         if (dst_cpu != src_cpu) {
831             p = g_memdup(&d, sizeof(d));
832             async_run_on_cpu(dst_cpu, tlb_flush_range_by_mmuidx_async_1,
833                              RUN_ON_CPU_HOST_PTR(p));
834         }
835     }
836 
837     p = g_memdup(&d, sizeof(d));
838     async_safe_run_on_cpu(src_cpu, tlb_flush_range_by_mmuidx_async_1,
839                           RUN_ON_CPU_HOST_PTR(p));
840 }
841 
842 void tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
843                                                    vaddr addr,
844                                                    uint16_t idxmap,
845                                                    unsigned bits)
846 {
847     tlb_flush_range_by_mmuidx_all_cpus_synced(src_cpu, addr, TARGET_PAGE_SIZE,
848                                               idxmap, bits);
849 }
850 
851 /* update the TLBs so that writes to code in the virtual page 'addr'
852    can be detected */
853 void tlb_protect_code(ram_addr_t ram_addr)
854 {
855     cpu_physical_memory_test_and_clear_dirty(ram_addr & TARGET_PAGE_MASK,
856                                              TARGET_PAGE_SIZE,
857                                              DIRTY_MEMORY_CODE);
858 }
859 
860 /* update the TLB so that writes in physical page 'phys_addr' are no longer
861    tested for self modifying code */
862 void tlb_unprotect_code(ram_addr_t ram_addr)
863 {
864     cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE);
865 }
866 
867 
868 /*
869  * Dirty write flag handling
870  *
871  * When the TCG code writes to a location it looks up the address in
872  * the TLB and uses that data to compute the final address. If any of
873  * the lower bits of the address are set then the slow path is forced.
874  * There are a number of reasons to do this but for normal RAM the
875  * most usual is detecting writes to code regions which may invalidate
876  * generated code.
877  *
878  * Other vCPUs might be reading their TLBs during guest execution, so we update
879  * te->addr_write with qatomic_set. We don't need to worry about this for
880  * oversized guests as MTTCG is disabled for them.
881  *
882  * Called with tlb_c.lock held.
883  */
884 static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry,
885                                          uintptr_t start, uintptr_t length)
886 {
887     uintptr_t addr = tlb_entry->addr_write;
888 
889     if ((addr & (TLB_INVALID_MASK | TLB_MMIO |
890                  TLB_DISCARD_WRITE | TLB_NOTDIRTY)) == 0) {
891         addr &= TARGET_PAGE_MASK;
892         addr += tlb_entry->addend;
893         if ((addr - start) < length) {
894             qatomic_set(&tlb_entry->addr_write,
895                         tlb_entry->addr_write | TLB_NOTDIRTY);
896         }
897     }
898 }
899 
900 /*
901  * Called with tlb_c.lock held.
902  * Called only from the vCPU context, i.e. the TLB's owner thread.
903  */
904 static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s)
905 {
906     *d = *s;
907 }
908 
909 /* This is a cross vCPU call (i.e. another vCPU resetting the flags of
910  * the target vCPU).
911  * We must take tlb_c.lock to avoid racing with another vCPU update. The only
912  * thing actually updated is the target TLB entry ->addr_write flags.
913  */
914 void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
915 {
916     int mmu_idx;
917 
918     qemu_spin_lock(&cpu->neg.tlb.c.lock);
919     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
920         unsigned int i;
921         unsigned int n = tlb_n_entries(&cpu->neg.tlb.f[mmu_idx]);
922 
923         for (i = 0; i < n; i++) {
924             tlb_reset_dirty_range_locked(&cpu->neg.tlb.f[mmu_idx].table[i],
925                                          start1, length);
926         }
927 
928         for (i = 0; i < CPU_VTLB_SIZE; i++) {
929             tlb_reset_dirty_range_locked(&cpu->neg.tlb.d[mmu_idx].vtable[i],
930                                          start1, length);
931         }
932     }
933     qemu_spin_unlock(&cpu->neg.tlb.c.lock);
934 }
935 
936 /* Called with tlb_c.lock held */
937 static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry,
938                                          vaddr addr)
939 {
940     if (tlb_entry->addr_write == (addr | TLB_NOTDIRTY)) {
941         tlb_entry->addr_write = addr;
942     }
943 }
944 
945 /* update the TLB corresponding to virtual page vaddr
946    so that it is no longer dirty */
947 static void tlb_set_dirty(CPUState *cpu, vaddr addr)
948 {
949     int mmu_idx;
950 
951     assert_cpu_is_self(cpu);
952 
953     addr &= TARGET_PAGE_MASK;
954     qemu_spin_lock(&cpu->neg.tlb.c.lock);
955     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
956         tlb_set_dirty1_locked(tlb_entry(cpu, mmu_idx, addr), addr);
957     }
958 
959     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
960         int k;
961         for (k = 0; k < CPU_VTLB_SIZE; k++) {
962             tlb_set_dirty1_locked(&cpu->neg.tlb.d[mmu_idx].vtable[k], addr);
963         }
964     }
965     qemu_spin_unlock(&cpu->neg.tlb.c.lock);
966 }
967 
968 /* Our TLB does not support large pages, so remember the area covered by
969    large pages and trigger a full TLB flush if these are invalidated.  */
970 static void tlb_add_large_page(CPUState *cpu, int mmu_idx,
971                                vaddr addr, uint64_t size)
972 {
973     vaddr lp_addr = cpu->neg.tlb.d[mmu_idx].large_page_addr;
974     vaddr lp_mask = ~(size - 1);
975 
976     if (lp_addr == (vaddr)-1) {
977         /* No previous large page.  */
978         lp_addr = addr;
979     } else {
980         /* Extend the existing region to include the new page.
981            This is a compromise between unnecessary flushes and
982            the cost of maintaining a full variable size TLB.  */
983         lp_mask &= cpu->neg.tlb.d[mmu_idx].large_page_mask;
984         while (((lp_addr ^ addr) & lp_mask) != 0) {
985             lp_mask <<= 1;
986         }
987     }
988     cpu->neg.tlb.d[mmu_idx].large_page_addr = lp_addr & lp_mask;
989     cpu->neg.tlb.d[mmu_idx].large_page_mask = lp_mask;
990 }
991 
992 static inline void tlb_set_compare(CPUTLBEntryFull *full, CPUTLBEntry *ent,
993                                    vaddr address, int flags,
994                                    MMUAccessType access_type, bool enable)
995 {
996     if (enable) {
997         address |= flags & TLB_FLAGS_MASK;
998         flags &= TLB_SLOW_FLAGS_MASK;
999         if (flags) {
1000             address |= TLB_FORCE_SLOW;
1001         }
1002     } else {
1003         address = -1;
1004         flags = 0;
1005     }
1006     ent->addr_idx[access_type] = address;
1007     full->slow_flags[access_type] = flags;
1008 }
1009 
1010 /*
1011  * Add a new TLB entry. At most one entry for a given virtual address
1012  * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
1013  * supplied size is only used by tlb_flush_page.
1014  *
1015  * Called from TCG-generated code, which is under an RCU read-side
1016  * critical section.
1017  */
1018 void tlb_set_page_full(CPUState *cpu, int mmu_idx,
1019                        vaddr addr, CPUTLBEntryFull *full)
1020 {
1021     CPUTLB *tlb = &cpu->neg.tlb;
1022     CPUTLBDesc *desc = &tlb->d[mmu_idx];
1023     MemoryRegionSection *section;
1024     unsigned int index, read_flags, write_flags;
1025     uintptr_t addend;
1026     CPUTLBEntry *te, tn;
1027     hwaddr iotlb, xlat, sz, paddr_page;
1028     vaddr addr_page;
1029     int asidx, wp_flags, prot;
1030     bool is_ram, is_romd;
1031 
1032     assert_cpu_is_self(cpu);
1033 
1034     if (full->lg_page_size <= TARGET_PAGE_BITS) {
1035         sz = TARGET_PAGE_SIZE;
1036     } else {
1037         sz = (hwaddr)1 << full->lg_page_size;
1038         tlb_add_large_page(cpu, mmu_idx, addr, sz);
1039     }
1040     addr_page = addr & TARGET_PAGE_MASK;
1041     paddr_page = full->phys_addr & TARGET_PAGE_MASK;
1042 
1043     prot = full->prot;
1044     asidx = cpu_asidx_from_attrs(cpu, full->attrs);
1045     section = address_space_translate_for_iotlb(cpu, asidx, paddr_page,
1046                                                 &xlat, &sz, full->attrs, &prot);
1047     assert(sz >= TARGET_PAGE_SIZE);
1048 
1049     tlb_debug("vaddr=%016" VADDR_PRIx " paddr=0x" HWADDR_FMT_plx
1050               " prot=%x idx=%d\n",
1051               addr, full->phys_addr, prot, mmu_idx);
1052 
1053     read_flags = full->tlb_fill_flags;
1054     if (full->lg_page_size < TARGET_PAGE_BITS) {
1055         /* Repeat the MMU check and TLB fill on every access.  */
1056         read_flags |= TLB_INVALID_MASK;
1057     }
1058 
1059     is_ram = memory_region_is_ram(section->mr);
1060     is_romd = memory_region_is_romd(section->mr);
1061 
1062     if (is_ram || is_romd) {
1063         /* RAM and ROMD both have associated host memory. */
1064         addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat;
1065     } else {
1066         /* I/O does not; force the host address to NULL. */
1067         addend = 0;
1068     }
1069 
1070     write_flags = read_flags;
1071     if (is_ram) {
1072         iotlb = memory_region_get_ram_addr(section->mr) + xlat;
1073         assert(!(iotlb & ~TARGET_PAGE_MASK));
1074         /*
1075          * Computing is_clean is expensive; avoid all that unless
1076          * the page is actually writable.
1077          */
1078         if (prot & PAGE_WRITE) {
1079             if (section->readonly) {
1080                 write_flags |= TLB_DISCARD_WRITE;
1081             } else if (cpu_physical_memory_is_clean(iotlb)) {
1082                 write_flags |= TLB_NOTDIRTY;
1083             }
1084         }
1085     } else {
1086         /* I/O or ROMD */
1087         iotlb = memory_region_section_get_iotlb(cpu, section) + xlat;
1088         /*
1089          * Writes to romd devices must go through MMIO to enable write.
1090          * Reads to romd devices go through the ram_ptr found above,
1091          * but of course reads to I/O must go through MMIO.
1092          */
1093         write_flags |= TLB_MMIO;
1094         if (!is_romd) {
1095             read_flags = write_flags;
1096         }
1097     }
1098 
1099     wp_flags = cpu_watchpoint_address_matches(cpu, addr_page,
1100                                               TARGET_PAGE_SIZE);
1101 
1102     index = tlb_index(cpu, mmu_idx, addr_page);
1103     te = tlb_entry(cpu, mmu_idx, addr_page);
1104 
1105     /*
1106      * Hold the TLB lock for the rest of the function. We could acquire/release
1107      * the lock several times in the function, but it is faster to amortize the
1108      * acquisition cost by acquiring it just once. Note that this leads to
1109      * a longer critical section, but this is not a concern since the TLB lock
1110      * is unlikely to be contended.
1111      */
1112     qemu_spin_lock(&tlb->c.lock);
1113 
1114     /* Note that the tlb is no longer clean.  */
1115     tlb->c.dirty |= 1 << mmu_idx;
1116 
1117     /* Make sure there's no cached translation for the new page.  */
1118     tlb_flush_vtlb_page_locked(cpu, mmu_idx, addr_page);
1119 
1120     /*
1121      * Only evict the old entry to the victim tlb if it's for a
1122      * different page; otherwise just overwrite the stale data.
1123      */
1124     if (!tlb_hit_page_anyprot(te, addr_page) && !tlb_entry_is_empty(te)) {
1125         unsigned vidx = desc->vindex++ % CPU_VTLB_SIZE;
1126         CPUTLBEntry *tv = &desc->vtable[vidx];
1127 
1128         /* Evict the old entry into the victim tlb.  */
1129         copy_tlb_helper_locked(tv, te);
1130         desc->vfulltlb[vidx] = desc->fulltlb[index];
1131         tlb_n_used_entries_dec(cpu, mmu_idx);
1132     }
1133 
1134     /* refill the tlb */
1135     /*
1136      * When memory region is ram, iotlb contains a TARGET_PAGE_BITS
1137      * aligned ram_addr_t of the page base of the target RAM.
1138      * Otherwise, iotlb contains
1139      *  - a physical section number in the lower TARGET_PAGE_BITS
1140      *  - the offset within section->mr of the page base (I/O, ROMD) with the
1141      *    TARGET_PAGE_BITS masked off.
1142      * We subtract addr_page (which is page aligned and thus won't
1143      * disturb the low bits) to give an offset which can be added to the
1144      * (non-page-aligned) vaddr of the eventual memory access to get
1145      * the MemoryRegion offset for the access. Note that the vaddr we
1146      * subtract here is that of the page base, and not the same as the
1147      * vaddr we add back in io_prepare()/get_page_addr_code().
1148      */
1149     desc->fulltlb[index] = *full;
1150     full = &desc->fulltlb[index];
1151     full->xlat_section = iotlb - addr_page;
1152     full->phys_addr = paddr_page;
1153 
1154     /* Now calculate the new entry */
1155     tn.addend = addend - addr_page;
1156 
1157     tlb_set_compare(full, &tn, addr_page, read_flags,
1158                     MMU_INST_FETCH, prot & PAGE_EXEC);
1159 
1160     if (wp_flags & BP_MEM_READ) {
1161         read_flags |= TLB_WATCHPOINT;
1162     }
1163     tlb_set_compare(full, &tn, addr_page, read_flags,
1164                     MMU_DATA_LOAD, prot & PAGE_READ);
1165 
1166     if (prot & PAGE_WRITE_INV) {
1167         write_flags |= TLB_INVALID_MASK;
1168     }
1169     if (wp_flags & BP_MEM_WRITE) {
1170         write_flags |= TLB_WATCHPOINT;
1171     }
1172     tlb_set_compare(full, &tn, addr_page, write_flags,
1173                     MMU_DATA_STORE, prot & PAGE_WRITE);
1174 
1175     copy_tlb_helper_locked(te, &tn);
1176     tlb_n_used_entries_inc(cpu, mmu_idx);
1177     qemu_spin_unlock(&tlb->c.lock);
1178 }
1179 
1180 void tlb_set_page_with_attrs(CPUState *cpu, vaddr addr,
1181                              hwaddr paddr, MemTxAttrs attrs, int prot,
1182                              int mmu_idx, vaddr size)
1183 {
1184     CPUTLBEntryFull full = {
1185         .phys_addr = paddr,
1186         .attrs = attrs,
1187         .prot = prot,
1188         .lg_page_size = ctz64(size)
1189     };
1190 
1191     assert(is_power_of_2(size));
1192     tlb_set_page_full(cpu, mmu_idx, addr, &full);
1193 }
1194 
1195 void tlb_set_page(CPUState *cpu, vaddr addr,
1196                   hwaddr paddr, int prot,
1197                   int mmu_idx, vaddr size)
1198 {
1199     tlb_set_page_with_attrs(cpu, addr, paddr, MEMTXATTRS_UNSPECIFIED,
1200                             prot, mmu_idx, size);
1201 }
1202 
1203 /**
1204  * tlb_hit_page: return true if page aligned @addr is a hit against the
1205  * TLB entry @tlb_addr
1206  *
1207  * @addr: virtual address to test (must be page aligned)
1208  * @tlb_addr: TLB entry address (a CPUTLBEntry addr_read/write/code value)
1209  */
1210 static inline bool tlb_hit_page(uint64_t tlb_addr, vaddr addr)
1211 {
1212     return addr == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK));
1213 }
1214 
1215 /**
1216  * tlb_hit: return true if @addr is a hit against the TLB entry @tlb_addr
1217  *
1218  * @addr: virtual address to test (need not be page aligned)
1219  * @tlb_addr: TLB entry address (a CPUTLBEntry addr_read/write/code value)
1220  */
1221 static inline bool tlb_hit(uint64_t tlb_addr, vaddr addr)
1222 {
1223     return tlb_hit_page(tlb_addr, addr & TARGET_PAGE_MASK);
1224 }
1225 
1226 /*
1227  * Note: tlb_fill_align() can trigger a resize of the TLB.
1228  * This means that all of the caller's prior references to the TLB table
1229  * (e.g. CPUTLBEntry pointers) must be discarded and looked up again
1230  * (e.g. via tlb_entry()).
1231  */
1232 static bool tlb_fill_align(CPUState *cpu, vaddr addr, MMUAccessType type,
1233                            int mmu_idx, MemOp memop, int size,
1234                            bool probe, uintptr_t ra)
1235 {
1236     const TCGCPUOps *ops = cpu->cc->tcg_ops;
1237     CPUTLBEntryFull full;
1238 
1239     if (ops->tlb_fill_align) {
1240         if (ops->tlb_fill_align(cpu, &full, addr, type, mmu_idx,
1241                                 memop, size, probe, ra)) {
1242             tlb_set_page_full(cpu, mmu_idx, addr, &full);
1243             return true;
1244         }
1245     } else {
1246         /* Legacy behaviour is alignment before paging. */
1247         if (addr & ((1u << memop_alignment_bits(memop)) - 1)) {
1248             ops->do_unaligned_access(cpu, addr, type, mmu_idx, ra);
1249         }
1250         if (ops->tlb_fill(cpu, addr, size, type, mmu_idx, probe, ra)) {
1251             return true;
1252         }
1253     }
1254     assert(probe);
1255     return false;
1256 }
1257 
1258 static inline void cpu_unaligned_access(CPUState *cpu, vaddr addr,
1259                                         MMUAccessType access_type,
1260                                         int mmu_idx, uintptr_t retaddr)
1261 {
1262     cpu->cc->tcg_ops->do_unaligned_access(cpu, addr, access_type,
1263                                           mmu_idx, retaddr);
1264 }
1265 
1266 static MemoryRegionSection *
1267 io_prepare(hwaddr *out_offset, CPUState *cpu, hwaddr xlat,
1268            MemTxAttrs attrs, vaddr addr, uintptr_t retaddr)
1269 {
1270     MemoryRegionSection *section;
1271     hwaddr mr_offset;
1272 
1273     section = iotlb_to_section(cpu, xlat, attrs);
1274     mr_offset = (xlat & TARGET_PAGE_MASK) + addr;
1275     cpu->mem_io_pc = retaddr;
1276     if (!cpu->neg.can_do_io) {
1277         cpu_io_recompile(cpu, retaddr);
1278     }
1279 
1280     *out_offset = mr_offset;
1281     return section;
1282 }
1283 
1284 static void io_failed(CPUState *cpu, CPUTLBEntryFull *full, vaddr addr,
1285                       unsigned size, MMUAccessType access_type, int mmu_idx,
1286                       MemTxResult response, uintptr_t retaddr)
1287 {
1288     if (!cpu->ignore_memory_transaction_failures
1289         && cpu->cc->tcg_ops->do_transaction_failed) {
1290         hwaddr physaddr = full->phys_addr | (addr & ~TARGET_PAGE_MASK);
1291 
1292         cpu->cc->tcg_ops->do_transaction_failed(cpu, physaddr, addr, size,
1293                                                 access_type, mmu_idx,
1294                                                 full->attrs, response, retaddr);
1295     }
1296 }
1297 
1298 /* Return true if ADDR is present in the victim tlb, and has been copied
1299    back to the main tlb.  */
1300 static bool victim_tlb_hit(CPUState *cpu, size_t mmu_idx, size_t index,
1301                            MMUAccessType access_type, vaddr page)
1302 {
1303     size_t vidx;
1304 
1305     assert_cpu_is_self(cpu);
1306     for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
1307         CPUTLBEntry *vtlb = &cpu->neg.tlb.d[mmu_idx].vtable[vidx];
1308         uint64_t cmp = tlb_read_idx(vtlb, access_type);
1309 
1310         if (cmp == page) {
1311             /* Found entry in victim tlb, swap tlb and iotlb.  */
1312             CPUTLBEntry tmptlb, *tlb = &cpu->neg.tlb.f[mmu_idx].table[index];
1313 
1314             qemu_spin_lock(&cpu->neg.tlb.c.lock);
1315             copy_tlb_helper_locked(&tmptlb, tlb);
1316             copy_tlb_helper_locked(tlb, vtlb);
1317             copy_tlb_helper_locked(vtlb, &tmptlb);
1318             qemu_spin_unlock(&cpu->neg.tlb.c.lock);
1319 
1320             CPUTLBEntryFull *f1 = &cpu->neg.tlb.d[mmu_idx].fulltlb[index];
1321             CPUTLBEntryFull *f2 = &cpu->neg.tlb.d[mmu_idx].vfulltlb[vidx];
1322             CPUTLBEntryFull tmpf;
1323             tmpf = *f1; *f1 = *f2; *f2 = tmpf;
1324             return true;
1325         }
1326     }
1327     return false;
1328 }
1329 
1330 static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
1331                            CPUTLBEntryFull *full, uintptr_t retaddr)
1332 {
1333     ram_addr_t ram_addr = mem_vaddr + full->xlat_section;
1334 
1335     trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size);
1336 
1337     if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1338         tb_invalidate_phys_range_fast(ram_addr, size, retaddr);
1339     }
1340 
1341     /*
1342      * Set both VGA and migration bits for simplicity and to remove
1343      * the notdirty callback faster.
1344      */
1345     cpu_physical_memory_set_dirty_range(ram_addr, size, DIRTY_CLIENTS_NOCODE);
1346 
1347     /* We remove the notdirty callback only if the code has been flushed. */
1348     if (!cpu_physical_memory_is_clean(ram_addr)) {
1349         trace_memory_notdirty_set_dirty(mem_vaddr);
1350         tlb_set_dirty(cpu, mem_vaddr);
1351     }
1352 }
1353 
1354 static int probe_access_internal(CPUState *cpu, vaddr addr,
1355                                  int fault_size, MMUAccessType access_type,
1356                                  int mmu_idx, bool nonfault,
1357                                  void **phost, CPUTLBEntryFull **pfull,
1358                                  uintptr_t retaddr, bool check_mem_cbs)
1359 {
1360     uintptr_t index = tlb_index(cpu, mmu_idx, addr);
1361     CPUTLBEntry *entry = tlb_entry(cpu, mmu_idx, addr);
1362     uint64_t tlb_addr = tlb_read_idx(entry, access_type);
1363     vaddr page_addr = addr & TARGET_PAGE_MASK;
1364     int flags = TLB_FLAGS_MASK & ~TLB_FORCE_SLOW;
1365     bool force_mmio = check_mem_cbs && cpu_plugin_mem_cbs_enabled(cpu);
1366     CPUTLBEntryFull *full;
1367 
1368     if (!tlb_hit_page(tlb_addr, page_addr)) {
1369         if (!victim_tlb_hit(cpu, mmu_idx, index, access_type, page_addr)) {
1370             if (!tlb_fill_align(cpu, addr, access_type, mmu_idx,
1371                                 0, fault_size, nonfault, retaddr)) {
1372                 /* Non-faulting page table read failed.  */
1373                 *phost = NULL;
1374                 *pfull = NULL;
1375                 return TLB_INVALID_MASK;
1376             }
1377 
1378             /* TLB resize via tlb_fill_align may have moved the entry.  */
1379             index = tlb_index(cpu, mmu_idx, addr);
1380             entry = tlb_entry(cpu, mmu_idx, addr);
1381 
1382             /*
1383              * With PAGE_WRITE_INV, we set TLB_INVALID_MASK immediately,
1384              * to force the next access through tlb_fill_align.  We've just
1385              * called tlb_fill_align, so we know that this entry *is* valid.
1386              */
1387             flags &= ~TLB_INVALID_MASK;
1388         }
1389         tlb_addr = tlb_read_idx(entry, access_type);
1390     }
1391     flags &= tlb_addr;
1392 
1393     *pfull = full = &cpu->neg.tlb.d[mmu_idx].fulltlb[index];
1394     flags |= full->slow_flags[access_type];
1395 
1396     /* Fold all "mmio-like" bits into TLB_MMIO.  This is not RAM.  */
1397     if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY | TLB_CHECK_ALIGNED))
1398         || (access_type != MMU_INST_FETCH && force_mmio)) {
1399         *phost = NULL;
1400         return TLB_MMIO;
1401     }
1402 
1403     /* Everything else is RAM. */
1404     *phost = (void *)((uintptr_t)addr + entry->addend);
1405     return flags;
1406 }
1407 
1408 int probe_access_full(CPUArchState *env, vaddr addr, int size,
1409                       MMUAccessType access_type, int mmu_idx,
1410                       bool nonfault, void **phost, CPUTLBEntryFull **pfull,
1411                       uintptr_t retaddr)
1412 {
1413     int flags = probe_access_internal(env_cpu(env), addr, size, access_type,
1414                                       mmu_idx, nonfault, phost, pfull, retaddr,
1415                                       true);
1416 
1417     /* Handle clean RAM pages.  */
1418     if (unlikely(flags & TLB_NOTDIRTY)) {
1419         int dirtysize = size == 0 ? 1 : size;
1420         notdirty_write(env_cpu(env), addr, dirtysize, *pfull, retaddr);
1421         flags &= ~TLB_NOTDIRTY;
1422     }
1423 
1424     return flags;
1425 }
1426 
1427 int probe_access_full_mmu(CPUArchState *env, vaddr addr, int size,
1428                           MMUAccessType access_type, int mmu_idx,
1429                           void **phost, CPUTLBEntryFull **pfull)
1430 {
1431     void *discard_phost;
1432     CPUTLBEntryFull *discard_tlb;
1433 
1434     /* privately handle users that don't need full results */
1435     phost = phost ? phost : &discard_phost;
1436     pfull = pfull ? pfull : &discard_tlb;
1437 
1438     int flags = probe_access_internal(env_cpu(env), addr, size, access_type,
1439                                       mmu_idx, true, phost, pfull, 0, false);
1440 
1441     /* Handle clean RAM pages.  */
1442     if (unlikely(flags & TLB_NOTDIRTY)) {
1443         int dirtysize = size == 0 ? 1 : size;
1444         notdirty_write(env_cpu(env), addr, dirtysize, *pfull, 0);
1445         flags &= ~TLB_NOTDIRTY;
1446     }
1447 
1448     return flags;
1449 }
1450 
1451 int probe_access_flags(CPUArchState *env, vaddr addr, int size,
1452                        MMUAccessType access_type, int mmu_idx,
1453                        bool nonfault, void **phost, uintptr_t retaddr)
1454 {
1455     CPUTLBEntryFull *full;
1456     int flags;
1457 
1458     g_assert(-(addr | TARGET_PAGE_MASK) >= size);
1459 
1460     flags = probe_access_internal(env_cpu(env), addr, size, access_type,
1461                                   mmu_idx, nonfault, phost, &full, retaddr,
1462                                   true);
1463 
1464     /* Handle clean RAM pages. */
1465     if (unlikely(flags & TLB_NOTDIRTY)) {
1466         int dirtysize = size == 0 ? 1 : size;
1467         notdirty_write(env_cpu(env), addr, dirtysize, full, retaddr);
1468         flags &= ~TLB_NOTDIRTY;
1469     }
1470 
1471     return flags;
1472 }
1473 
1474 void *probe_access(CPUArchState *env, vaddr addr, int size,
1475                    MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
1476 {
1477     CPUTLBEntryFull *full;
1478     void *host;
1479     int flags;
1480 
1481     g_assert(-(addr | TARGET_PAGE_MASK) >= size);
1482 
1483     flags = probe_access_internal(env_cpu(env), addr, size, access_type,
1484                                   mmu_idx, false, &host, &full, retaddr,
1485                                   true);
1486 
1487     /* Per the interface, size == 0 merely faults the access. */
1488     if (size == 0) {
1489         return NULL;
1490     }
1491 
1492     if (unlikely(flags & (TLB_NOTDIRTY | TLB_WATCHPOINT))) {
1493         /* Handle watchpoints.  */
1494         if (flags & TLB_WATCHPOINT) {
1495             int wp_access = (access_type == MMU_DATA_STORE
1496                              ? BP_MEM_WRITE : BP_MEM_READ);
1497             cpu_check_watchpoint(env_cpu(env), addr, size,
1498                                  full->attrs, wp_access, retaddr);
1499         }
1500 
1501         /* Handle clean RAM pages.  */
1502         if (flags & TLB_NOTDIRTY) {
1503             notdirty_write(env_cpu(env), addr, size, full, retaddr);
1504         }
1505     }
1506 
1507     return host;
1508 }
1509 
1510 void *tlb_vaddr_to_host(CPUArchState *env, vaddr addr,
1511                         MMUAccessType access_type, int mmu_idx)
1512 {
1513     CPUTLBEntryFull *full;
1514     void *host;
1515     int flags;
1516 
1517     flags = probe_access_internal(env_cpu(env), addr, 0, access_type,
1518                                   mmu_idx, true, &host, &full, 0, false);
1519 
1520     /* No combination of flags are expected by the caller. */
1521     return flags ? NULL : host;
1522 }
1523 
1524 /*
1525  * Return a ram_addr_t for the virtual address for execution.
1526  *
1527  * Return -1 if we can't translate and execute from an entire page
1528  * of RAM.  This will force us to execute by loading and translating
1529  * one insn at a time, without caching.
1530  *
1531  * NOTE: This function will trigger an exception if the page is
1532  * not executable.
1533  */
1534 tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, vaddr addr,
1535                                         void **hostp)
1536 {
1537     CPUTLBEntryFull *full;
1538     void *p;
1539 
1540     (void)probe_access_internal(env_cpu(env), addr, 1, MMU_INST_FETCH,
1541                                 cpu_mmu_index(env_cpu(env), true), false,
1542                                 &p, &full, 0, false);
1543     if (p == NULL) {
1544         return -1;
1545     }
1546 
1547     if (full->lg_page_size < TARGET_PAGE_BITS) {
1548         return -1;
1549     }
1550 
1551     if (hostp) {
1552         *hostp = p;
1553     }
1554     return qemu_ram_addr_from_host_nofail(p);
1555 }
1556 
1557 /* Load/store with atomicity primitives. */
1558 #include "ldst_atomicity.c.inc"
1559 
1560 #ifdef CONFIG_PLUGIN
1561 /*
1562  * Perform a TLB lookup and populate the qemu_plugin_hwaddr structure.
1563  * This should be a hot path as we will have just looked this path up
1564  * in the softmmu lookup code (or helper). We don't handle re-fills or
1565  * checking the victim table. This is purely informational.
1566  *
1567  * The one corner case is i/o write, which can cause changes to the
1568  * address space.  Those changes, and the corresponding tlb flush,
1569  * should be delayed until the next TB, so even then this ought not fail.
1570  * But check, Just in Case.
1571  */
1572 bool tlb_plugin_lookup(CPUState *cpu, vaddr addr, int mmu_idx,
1573                        bool is_store, struct qemu_plugin_hwaddr *data)
1574 {
1575     CPUTLBEntry *tlbe = tlb_entry(cpu, mmu_idx, addr);
1576     uintptr_t index = tlb_index(cpu, mmu_idx, addr);
1577     MMUAccessType access_type = is_store ? MMU_DATA_STORE : MMU_DATA_LOAD;
1578     uint64_t tlb_addr = tlb_read_idx(tlbe, access_type);
1579     CPUTLBEntryFull *full;
1580 
1581     if (unlikely(!tlb_hit(tlb_addr, addr))) {
1582         return false;
1583     }
1584 
1585     full = &cpu->neg.tlb.d[mmu_idx].fulltlb[index];
1586     data->phys_addr = full->phys_addr | (addr & ~TARGET_PAGE_MASK);
1587 
1588     /* We must have an iotlb entry for MMIO */
1589     if (tlb_addr & TLB_MMIO) {
1590         MemoryRegionSection *section =
1591             iotlb_to_section(cpu, full->xlat_section & ~TARGET_PAGE_MASK,
1592                              full->attrs);
1593         data->is_io = true;
1594         data->mr = section->mr;
1595     } else {
1596         data->is_io = false;
1597         data->mr = NULL;
1598     }
1599     return true;
1600 }
1601 #endif
1602 
1603 /*
1604  * Probe for a load/store operation.
1605  * Return the host address and into @flags.
1606  */
1607 
1608 typedef struct MMULookupPageData {
1609     CPUTLBEntryFull *full;
1610     void *haddr;
1611     vaddr addr;
1612     int flags;
1613     int size;
1614 } MMULookupPageData;
1615 
1616 typedef struct MMULookupLocals {
1617     MMULookupPageData page[2];
1618     MemOp memop;
1619     int mmu_idx;
1620 } MMULookupLocals;
1621 
1622 /**
1623  * mmu_lookup1: translate one page
1624  * @cpu: generic cpu state
1625  * @data: lookup parameters
1626  * @memop: memory operation for the access, or 0
1627  * @mmu_idx: virtual address context
1628  * @access_type: load/store/code
1629  * @ra: return address into tcg generated code, or 0
1630  *
1631  * Resolve the translation for the one page at @data.addr, filling in
1632  * the rest of @data with the results.  If the translation fails,
1633  * tlb_fill_align will longjmp out.  Return true if the softmmu tlb for
1634  * @mmu_idx may have resized.
1635  */
1636 static bool mmu_lookup1(CPUState *cpu, MMULookupPageData *data, MemOp memop,
1637                         int mmu_idx, MMUAccessType access_type, uintptr_t ra)
1638 {
1639     vaddr addr = data->addr;
1640     uintptr_t index = tlb_index(cpu, mmu_idx, addr);
1641     CPUTLBEntry *entry = tlb_entry(cpu, mmu_idx, addr);
1642     uint64_t tlb_addr = tlb_read_idx(entry, access_type);
1643     bool maybe_resized = false;
1644     CPUTLBEntryFull *full;
1645     int flags;
1646 
1647     /* If the TLB entry is for a different page, reload and try again.  */
1648     if (!tlb_hit(tlb_addr, addr)) {
1649         if (!victim_tlb_hit(cpu, mmu_idx, index, access_type,
1650                             addr & TARGET_PAGE_MASK)) {
1651             tlb_fill_align(cpu, addr, access_type, mmu_idx,
1652                            memop, data->size, false, ra);
1653             maybe_resized = true;
1654             index = tlb_index(cpu, mmu_idx, addr);
1655             entry = tlb_entry(cpu, mmu_idx, addr);
1656         }
1657         tlb_addr = tlb_read_idx(entry, access_type) & ~TLB_INVALID_MASK;
1658     }
1659 
1660     full = &cpu->neg.tlb.d[mmu_idx].fulltlb[index];
1661     flags = tlb_addr & (TLB_FLAGS_MASK & ~TLB_FORCE_SLOW);
1662     flags |= full->slow_flags[access_type];
1663 
1664     if (likely(!maybe_resized)) {
1665         /* Alignment has not been checked by tlb_fill_align. */
1666         int a_bits = memop_alignment_bits(memop);
1667 
1668         /*
1669          * This alignment check differs from the one above, in that this is
1670          * based on the atomicity of the operation. The intended use case is
1671          * the ARM memory type field of each PTE, where access to pages with
1672          * Device memory type require alignment.
1673          */
1674         if (unlikely(flags & TLB_CHECK_ALIGNED)) {
1675             int at_bits = memop_atomicity_bits(memop);
1676             a_bits = MAX(a_bits, at_bits);
1677         }
1678         if (unlikely(addr & ((1 << a_bits) - 1))) {
1679             cpu_unaligned_access(cpu, addr, access_type, mmu_idx, ra);
1680         }
1681     }
1682 
1683     data->full = full;
1684     data->flags = flags;
1685     /* Compute haddr speculatively; depending on flags it might be invalid. */
1686     data->haddr = (void *)((uintptr_t)addr + entry->addend);
1687 
1688     return maybe_resized;
1689 }
1690 
1691 /**
1692  * mmu_watch_or_dirty
1693  * @cpu: generic cpu state
1694  * @data: lookup parameters
1695  * @access_type: load/store/code
1696  * @ra: return address into tcg generated code, or 0
1697  *
1698  * Trigger watchpoints for @data.addr:@data.size;
1699  * record writes to protected clean pages.
1700  */
1701 static void mmu_watch_or_dirty(CPUState *cpu, MMULookupPageData *data,
1702                                MMUAccessType access_type, uintptr_t ra)
1703 {
1704     CPUTLBEntryFull *full = data->full;
1705     vaddr addr = data->addr;
1706     int flags = data->flags;
1707     int size = data->size;
1708 
1709     /* On watchpoint hit, this will longjmp out.  */
1710     if (flags & TLB_WATCHPOINT) {
1711         int wp = access_type == MMU_DATA_STORE ? BP_MEM_WRITE : BP_MEM_READ;
1712         cpu_check_watchpoint(cpu, addr, size, full->attrs, wp, ra);
1713         flags &= ~TLB_WATCHPOINT;
1714     }
1715 
1716     /* Note that notdirty is only set for writes. */
1717     if (flags & TLB_NOTDIRTY) {
1718         notdirty_write(cpu, addr, size, full, ra);
1719         flags &= ~TLB_NOTDIRTY;
1720     }
1721     data->flags = flags;
1722 }
1723 
1724 /**
1725  * mmu_lookup: translate page(s)
1726  * @cpu: generic cpu state
1727  * @addr: virtual address
1728  * @oi: combined mmu_idx and MemOp
1729  * @ra: return address into tcg generated code, or 0
1730  * @access_type: load/store/code
1731  * @l: output result
1732  *
1733  * Resolve the translation for the page(s) beginning at @addr, for MemOp.size
1734  * bytes.  Return true if the lookup crosses a page boundary.
1735  */
1736 static bool mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi,
1737                        uintptr_t ra, MMUAccessType type, MMULookupLocals *l)
1738 {
1739     bool crosspage;
1740     int flags;
1741 
1742     l->memop = get_memop(oi);
1743     l->mmu_idx = get_mmuidx(oi);
1744 
1745     tcg_debug_assert(l->mmu_idx < NB_MMU_MODES);
1746 
1747     l->page[0].addr = addr;
1748     l->page[0].size = memop_size(l->memop);
1749     l->page[1].addr = (addr + l->page[0].size - 1) & TARGET_PAGE_MASK;
1750     l->page[1].size = 0;
1751     crosspage = (addr ^ l->page[1].addr) & TARGET_PAGE_MASK;
1752 
1753     if (likely(!crosspage)) {
1754         mmu_lookup1(cpu, &l->page[0], l->memop, l->mmu_idx, type, ra);
1755 
1756         flags = l->page[0].flags;
1757         if (unlikely(flags & (TLB_WATCHPOINT | TLB_NOTDIRTY))) {
1758             mmu_watch_or_dirty(cpu, &l->page[0], type, ra);
1759         }
1760         if (unlikely(flags & TLB_BSWAP)) {
1761             l->memop ^= MO_BSWAP;
1762         }
1763     } else {
1764         /* Finish compute of page crossing. */
1765         int size0 = l->page[1].addr - addr;
1766         l->page[1].size = l->page[0].size - size0;
1767         l->page[0].size = size0;
1768 
1769         /*
1770          * Lookup both pages, recognizing exceptions from either.  If the
1771          * second lookup potentially resized, refresh first CPUTLBEntryFull.
1772          */
1773         mmu_lookup1(cpu, &l->page[0], l->memop, l->mmu_idx, type, ra);
1774         if (mmu_lookup1(cpu, &l->page[1], 0, l->mmu_idx, type, ra)) {
1775             uintptr_t index = tlb_index(cpu, l->mmu_idx, addr);
1776             l->page[0].full = &cpu->neg.tlb.d[l->mmu_idx].fulltlb[index];
1777         }
1778 
1779         flags = l->page[0].flags | l->page[1].flags;
1780         if (unlikely(flags & (TLB_WATCHPOINT | TLB_NOTDIRTY))) {
1781             mmu_watch_or_dirty(cpu, &l->page[0], type, ra);
1782             mmu_watch_or_dirty(cpu, &l->page[1], type, ra);
1783         }
1784 
1785         /*
1786          * Since target/sparc is the only user of TLB_BSWAP, and all
1787          * Sparc accesses are aligned, any treatment across two pages
1788          * would be arbitrary.  Refuse it until there's a use.
1789          */
1790         tcg_debug_assert((flags & TLB_BSWAP) == 0);
1791     }
1792 
1793     return crosspage;
1794 }
1795 
1796 /*
1797  * Probe for an atomic operation.  Do not allow unaligned operations,
1798  * or io operations to proceed.  Return the host address.
1799  */
1800 static void *atomic_mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi,
1801                                int size, uintptr_t retaddr)
1802 {
1803     uintptr_t mmu_idx = get_mmuidx(oi);
1804     MemOp mop = get_memop(oi);
1805     uintptr_t index;
1806     CPUTLBEntry *tlbe;
1807     vaddr tlb_addr;
1808     void *hostaddr;
1809     CPUTLBEntryFull *full;
1810     bool did_tlb_fill = false;
1811 
1812     tcg_debug_assert(mmu_idx < NB_MMU_MODES);
1813 
1814     /* Adjust the given return address.  */
1815     retaddr -= GETPC_ADJ;
1816 
1817     index = tlb_index(cpu, mmu_idx, addr);
1818     tlbe = tlb_entry(cpu, mmu_idx, addr);
1819 
1820     /* Check TLB entry and enforce page permissions.  */
1821     tlb_addr = tlb_addr_write(tlbe);
1822     if (!tlb_hit(tlb_addr, addr)) {
1823         if (!victim_tlb_hit(cpu, mmu_idx, index, MMU_DATA_STORE,
1824                             addr & TARGET_PAGE_MASK)) {
1825             tlb_fill_align(cpu, addr, MMU_DATA_STORE, mmu_idx,
1826                            mop, size, false, retaddr);
1827             did_tlb_fill = true;
1828             index = tlb_index(cpu, mmu_idx, addr);
1829             tlbe = tlb_entry(cpu, mmu_idx, addr);
1830         }
1831         tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK;
1832     }
1833 
1834     /*
1835      * Let the guest notice RMW on a write-only page.
1836      * We have just verified that the page is writable.
1837      * Subpage lookups may have left TLB_INVALID_MASK set,
1838      * but addr_read will only be -1 if PAGE_READ was unset.
1839      */
1840     if (unlikely(tlbe->addr_read == -1)) {
1841         tlb_fill_align(cpu, addr, MMU_DATA_LOAD, mmu_idx,
1842                        0, size, false, retaddr);
1843         /*
1844          * Since we don't support reads and writes to different
1845          * addresses, and we do have the proper page loaded for
1846          * write, this shouldn't ever return.
1847          */
1848         g_assert_not_reached();
1849     }
1850 
1851     /* Enforce guest required alignment, if not handled by tlb_fill_align. */
1852     if (!did_tlb_fill && (addr & ((1 << memop_alignment_bits(mop)) - 1))) {
1853         cpu_unaligned_access(cpu, addr, MMU_DATA_STORE, mmu_idx, retaddr);
1854     }
1855 
1856     /* Enforce qemu required alignment.  */
1857     if (unlikely(addr & (size - 1))) {
1858         /*
1859          * We get here if guest alignment was not requested, or was not
1860          * enforced by cpu_unaligned_access or tlb_fill_align above.
1861          * We might widen the access and emulate, but for now
1862          * mark an exception and exit the cpu loop.
1863          */
1864         goto stop_the_world;
1865     }
1866 
1867     /* Collect tlb flags for read. */
1868     tlb_addr |= tlbe->addr_read;
1869 
1870     /* Notice an IO access or a needs-MMU-lookup access */
1871     if (unlikely(tlb_addr & (TLB_MMIO | TLB_DISCARD_WRITE))) {
1872         /* There's really nothing that can be done to
1873            support this apart from stop-the-world.  */
1874         goto stop_the_world;
1875     }
1876 
1877     hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
1878     full = &cpu->neg.tlb.d[mmu_idx].fulltlb[index];
1879 
1880     if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
1881         notdirty_write(cpu, addr, size, full, retaddr);
1882     }
1883 
1884     if (unlikely(tlb_addr & TLB_FORCE_SLOW)) {
1885         int wp_flags = 0;
1886 
1887         if (full->slow_flags[MMU_DATA_STORE] & TLB_WATCHPOINT) {
1888             wp_flags |= BP_MEM_WRITE;
1889         }
1890         if (full->slow_flags[MMU_DATA_LOAD] & TLB_WATCHPOINT) {
1891             wp_flags |= BP_MEM_READ;
1892         }
1893         if (wp_flags) {
1894             cpu_check_watchpoint(cpu, addr, size,
1895                                  full->attrs, wp_flags, retaddr);
1896         }
1897     }
1898 
1899     return hostaddr;
1900 
1901  stop_the_world:
1902     cpu_loop_exit_atomic(cpu, retaddr);
1903 }
1904 
1905 /*
1906  * Load Helpers
1907  *
1908  * We support two different access types. SOFTMMU_CODE_ACCESS is
1909  * specifically for reading instructions from system memory. It is
1910  * called by the translation loop and in some helpers where the code
1911  * is disassembled. It shouldn't be called directly by guest code.
1912  *
1913  * For the benefit of TCG generated code, we want to avoid the
1914  * complication of ABI-specific return type promotion and always
1915  * return a value extended to the register size of the host. This is
1916  * tcg_target_long, except in the case of a 32-bit host and 64-bit
1917  * data, and for that we always have uint64_t.
1918  *
1919  * We don't bother with this widened value for SOFTMMU_CODE_ACCESS.
1920  */
1921 
1922 /**
1923  * do_ld_mmio_beN:
1924  * @cpu: generic cpu state
1925  * @full: page parameters
1926  * @ret_be: accumulated data
1927  * @addr: virtual address
1928  * @size: number of bytes
1929  * @mmu_idx: virtual address context
1930  * @ra: return address into tcg generated code, or 0
1931  * Context: BQL held
1932  *
1933  * Load @size bytes from @addr, which is memory-mapped i/o.
1934  * The bytes are concatenated in big-endian order with @ret_be.
1935  */
1936 static uint64_t int_ld_mmio_beN(CPUState *cpu, CPUTLBEntryFull *full,
1937                                 uint64_t ret_be, vaddr addr, int size,
1938                                 int mmu_idx, MMUAccessType type, uintptr_t ra,
1939                                 MemoryRegion *mr, hwaddr mr_offset)
1940 {
1941     do {
1942         MemOp this_mop;
1943         unsigned this_size;
1944         uint64_t val;
1945         MemTxResult r;
1946 
1947         /* Read aligned pieces up to 8 bytes. */
1948         this_mop = ctz32(size | (int)addr | 8);
1949         this_size = 1 << this_mop;
1950         this_mop |= MO_BE;
1951 
1952         r = memory_region_dispatch_read(mr, mr_offset, &val,
1953                                         this_mop, full->attrs);
1954         if (unlikely(r != MEMTX_OK)) {
1955             io_failed(cpu, full, addr, this_size, type, mmu_idx, r, ra);
1956         }
1957         if (this_size == 8) {
1958             return val;
1959         }
1960 
1961         ret_be = (ret_be << (this_size * 8)) | val;
1962         addr += this_size;
1963         mr_offset += this_size;
1964         size -= this_size;
1965     } while (size);
1966 
1967     return ret_be;
1968 }
1969 
1970 static uint64_t do_ld_mmio_beN(CPUState *cpu, CPUTLBEntryFull *full,
1971                                uint64_t ret_be, vaddr addr, int size,
1972                                int mmu_idx, MMUAccessType type, uintptr_t ra)
1973 {
1974     MemoryRegionSection *section;
1975     MemoryRegion *mr;
1976     hwaddr mr_offset;
1977     MemTxAttrs attrs;
1978 
1979     tcg_debug_assert(size > 0 && size <= 8);
1980 
1981     attrs = full->attrs;
1982     section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra);
1983     mr = section->mr;
1984 
1985     BQL_LOCK_GUARD();
1986     return int_ld_mmio_beN(cpu, full, ret_be, addr, size, mmu_idx,
1987                            type, ra, mr, mr_offset);
1988 }
1989 
1990 static Int128 do_ld16_mmio_beN(CPUState *cpu, CPUTLBEntryFull *full,
1991                                uint64_t ret_be, vaddr addr, int size,
1992                                int mmu_idx, uintptr_t ra)
1993 {
1994     MemoryRegionSection *section;
1995     MemoryRegion *mr;
1996     hwaddr mr_offset;
1997     MemTxAttrs attrs;
1998     uint64_t a, b;
1999 
2000     tcg_debug_assert(size > 8 && size <= 16);
2001 
2002     attrs = full->attrs;
2003     section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra);
2004     mr = section->mr;
2005 
2006     BQL_LOCK_GUARD();
2007     a = int_ld_mmio_beN(cpu, full, ret_be, addr, size - 8, mmu_idx,
2008                         MMU_DATA_LOAD, ra, mr, mr_offset);
2009     b = int_ld_mmio_beN(cpu, full, ret_be, addr + size - 8, 8, mmu_idx,
2010                         MMU_DATA_LOAD, ra, mr, mr_offset + size - 8);
2011     return int128_make128(b, a);
2012 }
2013 
2014 /**
2015  * do_ld_bytes_beN
2016  * @p: translation parameters
2017  * @ret_be: accumulated data
2018  *
2019  * Load @p->size bytes from @p->haddr, which is RAM.
2020  * The bytes to concatenated in big-endian order with @ret_be.
2021  */
2022 static uint64_t do_ld_bytes_beN(MMULookupPageData *p, uint64_t ret_be)
2023 {
2024     uint8_t *haddr = p->haddr;
2025     int i, size = p->size;
2026 
2027     for (i = 0; i < size; i++) {
2028         ret_be = (ret_be << 8) | haddr[i];
2029     }
2030     return ret_be;
2031 }
2032 
2033 /**
2034  * do_ld_parts_beN
2035  * @p: translation parameters
2036  * @ret_be: accumulated data
2037  *
2038  * As do_ld_bytes_beN, but atomically on each aligned part.
2039  */
2040 static uint64_t do_ld_parts_beN(MMULookupPageData *p, uint64_t ret_be)
2041 {
2042     void *haddr = p->haddr;
2043     int size = p->size;
2044 
2045     do {
2046         uint64_t x;
2047         int n;
2048 
2049         /*
2050          * Find minimum of alignment and size.
2051          * This is slightly stronger than required by MO_ATOM_SUBALIGN, which
2052          * would have only checked the low bits of addr|size once at the start,
2053          * but is just as easy.
2054          */
2055         switch (((uintptr_t)haddr | size) & 7) {
2056         case 4:
2057             x = cpu_to_be32(load_atomic4(haddr));
2058             ret_be = (ret_be << 32) | x;
2059             n = 4;
2060             break;
2061         case 2:
2062         case 6:
2063             x = cpu_to_be16(load_atomic2(haddr));
2064             ret_be = (ret_be << 16) | x;
2065             n = 2;
2066             break;
2067         default:
2068             x = *(uint8_t *)haddr;
2069             ret_be = (ret_be << 8) | x;
2070             n = 1;
2071             break;
2072         case 0:
2073             g_assert_not_reached();
2074         }
2075         haddr += n;
2076         size -= n;
2077     } while (size != 0);
2078     return ret_be;
2079 }
2080 
2081 /**
2082  * do_ld_parts_be4
2083  * @p: translation parameters
2084  * @ret_be: accumulated data
2085  *
2086  * As do_ld_bytes_beN, but with one atomic load.
2087  * Four aligned bytes are guaranteed to cover the load.
2088  */
2089 static uint64_t do_ld_whole_be4(MMULookupPageData *p, uint64_t ret_be)
2090 {
2091     int o = p->addr & 3;
2092     uint32_t x = load_atomic4(p->haddr - o);
2093 
2094     x = cpu_to_be32(x);
2095     x <<= o * 8;
2096     x >>= (4 - p->size) * 8;
2097     return (ret_be << (p->size * 8)) | x;
2098 }
2099 
2100 /**
2101  * do_ld_parts_be8
2102  * @p: translation parameters
2103  * @ret_be: accumulated data
2104  *
2105  * As do_ld_bytes_beN, but with one atomic load.
2106  * Eight aligned bytes are guaranteed to cover the load.
2107  */
2108 static uint64_t do_ld_whole_be8(CPUState *cpu, uintptr_t ra,
2109                                 MMULookupPageData *p, uint64_t ret_be)
2110 {
2111     int o = p->addr & 7;
2112     uint64_t x = load_atomic8_or_exit(cpu, ra, p->haddr - o);
2113 
2114     x = cpu_to_be64(x);
2115     x <<= o * 8;
2116     x >>= (8 - p->size) * 8;
2117     return (ret_be << (p->size * 8)) | x;
2118 }
2119 
2120 /**
2121  * do_ld_parts_be16
2122  * @p: translation parameters
2123  * @ret_be: accumulated data
2124  *
2125  * As do_ld_bytes_beN, but with one atomic load.
2126  * 16 aligned bytes are guaranteed to cover the load.
2127  */
2128 static Int128 do_ld_whole_be16(CPUState *cpu, uintptr_t ra,
2129                                MMULookupPageData *p, uint64_t ret_be)
2130 {
2131     int o = p->addr & 15;
2132     Int128 x, y = load_atomic16_or_exit(cpu, ra, p->haddr - o);
2133     int size = p->size;
2134 
2135     if (!HOST_BIG_ENDIAN) {
2136         y = bswap128(y);
2137     }
2138     y = int128_lshift(y, o * 8);
2139     y = int128_urshift(y, (16 - size) * 8);
2140     x = int128_make64(ret_be);
2141     x = int128_lshift(x, size * 8);
2142     return int128_or(x, y);
2143 }
2144 
2145 /*
2146  * Wrapper for the above.
2147  */
2148 static uint64_t do_ld_beN(CPUState *cpu, MMULookupPageData *p,
2149                           uint64_t ret_be, int mmu_idx, MMUAccessType type,
2150                           MemOp mop, uintptr_t ra)
2151 {
2152     MemOp atom;
2153     unsigned tmp, half_size;
2154 
2155     if (unlikely(p->flags & TLB_MMIO)) {
2156         return do_ld_mmio_beN(cpu, p->full, ret_be, p->addr, p->size,
2157                               mmu_idx, type, ra);
2158     }
2159 
2160     /*
2161      * It is a given that we cross a page and therefore there is no
2162      * atomicity for the load as a whole, but subobjects may need attention.
2163      */
2164     atom = mop & MO_ATOM_MASK;
2165     switch (atom) {
2166     case MO_ATOM_SUBALIGN:
2167         return do_ld_parts_beN(p, ret_be);
2168 
2169     case MO_ATOM_IFALIGN_PAIR:
2170     case MO_ATOM_WITHIN16_PAIR:
2171         tmp = mop & MO_SIZE;
2172         tmp = tmp ? tmp - 1 : 0;
2173         half_size = 1 << tmp;
2174         if (atom == MO_ATOM_IFALIGN_PAIR
2175             ? p->size == half_size
2176             : p->size >= half_size) {
2177             if (!HAVE_al8_fast && p->size < 4) {
2178                 return do_ld_whole_be4(p, ret_be);
2179             } else {
2180                 return do_ld_whole_be8(cpu, ra, p, ret_be);
2181             }
2182         }
2183         /* fall through */
2184 
2185     case MO_ATOM_IFALIGN:
2186     case MO_ATOM_WITHIN16:
2187     case MO_ATOM_NONE:
2188         return do_ld_bytes_beN(p, ret_be);
2189 
2190     default:
2191         g_assert_not_reached();
2192     }
2193 }
2194 
2195 /*
2196  * Wrapper for the above, for 8 < size < 16.
2197  */
2198 static Int128 do_ld16_beN(CPUState *cpu, MMULookupPageData *p,
2199                           uint64_t a, int mmu_idx, MemOp mop, uintptr_t ra)
2200 {
2201     int size = p->size;
2202     uint64_t b;
2203     MemOp atom;
2204 
2205     if (unlikely(p->flags & TLB_MMIO)) {
2206         return do_ld16_mmio_beN(cpu, p->full, a, p->addr, size, mmu_idx, ra);
2207     }
2208 
2209     /*
2210      * It is a given that we cross a page and therefore there is no
2211      * atomicity for the load as a whole, but subobjects may need attention.
2212      */
2213     atom = mop & MO_ATOM_MASK;
2214     switch (atom) {
2215     case MO_ATOM_SUBALIGN:
2216         p->size = size - 8;
2217         a = do_ld_parts_beN(p, a);
2218         p->haddr += size - 8;
2219         p->size = 8;
2220         b = do_ld_parts_beN(p, 0);
2221         break;
2222 
2223     case MO_ATOM_WITHIN16_PAIR:
2224         /* Since size > 8, this is the half that must be atomic. */
2225         return do_ld_whole_be16(cpu, ra, p, a);
2226 
2227     case MO_ATOM_IFALIGN_PAIR:
2228         /*
2229          * Since size > 8, both halves are misaligned,
2230          * and so neither is atomic.
2231          */
2232     case MO_ATOM_IFALIGN:
2233     case MO_ATOM_WITHIN16:
2234     case MO_ATOM_NONE:
2235         p->size = size - 8;
2236         a = do_ld_bytes_beN(p, a);
2237         b = ldq_be_p(p->haddr + size - 8);
2238         break;
2239 
2240     default:
2241         g_assert_not_reached();
2242     }
2243 
2244     return int128_make128(b, a);
2245 }
2246 
2247 static uint8_t do_ld_1(CPUState *cpu, MMULookupPageData *p, int mmu_idx,
2248                        MMUAccessType type, uintptr_t ra)
2249 {
2250     if (unlikely(p->flags & TLB_MMIO)) {
2251         return do_ld_mmio_beN(cpu, p->full, 0, p->addr, 1, mmu_idx, type, ra);
2252     } else {
2253         return *(uint8_t *)p->haddr;
2254     }
2255 }
2256 
2257 static uint16_t do_ld_2(CPUState *cpu, MMULookupPageData *p, int mmu_idx,
2258                         MMUAccessType type, MemOp memop, uintptr_t ra)
2259 {
2260     uint16_t ret;
2261 
2262     if (unlikely(p->flags & TLB_MMIO)) {
2263         ret = do_ld_mmio_beN(cpu, p->full, 0, p->addr, 2, mmu_idx, type, ra);
2264         if ((memop & MO_BSWAP) == MO_LE) {
2265             ret = bswap16(ret);
2266         }
2267     } else {
2268         /* Perform the load host endian, then swap if necessary. */
2269         ret = load_atom_2(cpu, ra, p->haddr, memop);
2270         if (memop & MO_BSWAP) {
2271             ret = bswap16(ret);
2272         }
2273     }
2274     return ret;
2275 }
2276 
2277 static uint32_t do_ld_4(CPUState *cpu, MMULookupPageData *p, int mmu_idx,
2278                         MMUAccessType type, MemOp memop, uintptr_t ra)
2279 {
2280     uint32_t ret;
2281 
2282     if (unlikely(p->flags & TLB_MMIO)) {
2283         ret = do_ld_mmio_beN(cpu, p->full, 0, p->addr, 4, mmu_idx, type, ra);
2284         if ((memop & MO_BSWAP) == MO_LE) {
2285             ret = bswap32(ret);
2286         }
2287     } else {
2288         /* Perform the load host endian. */
2289         ret = load_atom_4(cpu, ra, p->haddr, memop);
2290         if (memop & MO_BSWAP) {
2291             ret = bswap32(ret);
2292         }
2293     }
2294     return ret;
2295 }
2296 
2297 static uint64_t do_ld_8(CPUState *cpu, MMULookupPageData *p, int mmu_idx,
2298                         MMUAccessType type, MemOp memop, uintptr_t ra)
2299 {
2300     uint64_t ret;
2301 
2302     if (unlikely(p->flags & TLB_MMIO)) {
2303         ret = do_ld_mmio_beN(cpu, p->full, 0, p->addr, 8, mmu_idx, type, ra);
2304         if ((memop & MO_BSWAP) == MO_LE) {
2305             ret = bswap64(ret);
2306         }
2307     } else {
2308         /* Perform the load host endian. */
2309         ret = load_atom_8(cpu, ra, p->haddr, memop);
2310         if (memop & MO_BSWAP) {
2311             ret = bswap64(ret);
2312         }
2313     }
2314     return ret;
2315 }
2316 
2317 static uint8_t do_ld1_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
2318                           uintptr_t ra, MMUAccessType access_type)
2319 {
2320     MMULookupLocals l;
2321     bool crosspage;
2322 
2323     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
2324     crosspage = mmu_lookup(cpu, addr, oi, ra, access_type, &l);
2325     tcg_debug_assert(!crosspage);
2326 
2327     return do_ld_1(cpu, &l.page[0], l.mmu_idx, access_type, ra);
2328 }
2329 
2330 static uint16_t do_ld2_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
2331                            uintptr_t ra, MMUAccessType access_type)
2332 {
2333     MMULookupLocals l;
2334     bool crosspage;
2335     uint16_t ret;
2336     uint8_t a, b;
2337 
2338     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
2339     crosspage = mmu_lookup(cpu, addr, oi, ra, access_type, &l);
2340     if (likely(!crosspage)) {
2341         return do_ld_2(cpu, &l.page[0], l.mmu_idx, access_type, l.memop, ra);
2342     }
2343 
2344     a = do_ld_1(cpu, &l.page[0], l.mmu_idx, access_type, ra);
2345     b = do_ld_1(cpu, &l.page[1], l.mmu_idx, access_type, ra);
2346 
2347     if ((l.memop & MO_BSWAP) == MO_LE) {
2348         ret = a | (b << 8);
2349     } else {
2350         ret = b | (a << 8);
2351     }
2352     return ret;
2353 }
2354 
2355 static uint32_t do_ld4_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
2356                            uintptr_t ra, MMUAccessType access_type)
2357 {
2358     MMULookupLocals l;
2359     bool crosspage;
2360     uint32_t ret;
2361 
2362     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
2363     crosspage = mmu_lookup(cpu, addr, oi, ra, access_type, &l);
2364     if (likely(!crosspage)) {
2365         return do_ld_4(cpu, &l.page[0], l.mmu_idx, access_type, l.memop, ra);
2366     }
2367 
2368     ret = do_ld_beN(cpu, &l.page[0], 0, l.mmu_idx, access_type, l.memop, ra);
2369     ret = do_ld_beN(cpu, &l.page[1], ret, l.mmu_idx, access_type, l.memop, ra);
2370     if ((l.memop & MO_BSWAP) == MO_LE) {
2371         ret = bswap32(ret);
2372     }
2373     return ret;
2374 }
2375 
2376 static uint64_t do_ld8_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
2377                            uintptr_t ra, MMUAccessType access_type)
2378 {
2379     MMULookupLocals l;
2380     bool crosspage;
2381     uint64_t ret;
2382 
2383     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
2384     crosspage = mmu_lookup(cpu, addr, oi, ra, access_type, &l);
2385     if (likely(!crosspage)) {
2386         return do_ld_8(cpu, &l.page[0], l.mmu_idx, access_type, l.memop, ra);
2387     }
2388 
2389     ret = do_ld_beN(cpu, &l.page[0], 0, l.mmu_idx, access_type, l.memop, ra);
2390     ret = do_ld_beN(cpu, &l.page[1], ret, l.mmu_idx, access_type, l.memop, ra);
2391     if ((l.memop & MO_BSWAP) == MO_LE) {
2392         ret = bswap64(ret);
2393     }
2394     return ret;
2395 }
2396 
2397 static Int128 do_ld16_mmu(CPUState *cpu, vaddr addr,
2398                           MemOpIdx oi, uintptr_t ra)
2399 {
2400     MMULookupLocals l;
2401     bool crosspage;
2402     uint64_t a, b;
2403     Int128 ret;
2404     int first;
2405 
2406     cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
2407     crosspage = mmu_lookup(cpu, addr, oi, ra, MMU_DATA_LOAD, &l);
2408     if (likely(!crosspage)) {
2409         if (unlikely(l.page[0].flags & TLB_MMIO)) {
2410             ret = do_ld16_mmio_beN(cpu, l.page[0].full, 0, addr, 16,
2411                                    l.mmu_idx, ra);
2412             if ((l.memop & MO_BSWAP) == MO_LE) {
2413                 ret = bswap128(ret);
2414             }
2415         } else {
2416             /* Perform the load host endian. */
2417             ret = load_atom_16(cpu, ra, l.page[0].haddr, l.memop);
2418             if (l.memop & MO_BSWAP) {
2419                 ret = bswap128(ret);
2420             }
2421         }
2422         return ret;
2423     }
2424 
2425     first = l.page[0].size;
2426     if (first == 8) {
2427         MemOp mop8 = (l.memop & ~MO_SIZE) | MO_64;
2428 
2429         a = do_ld_8(cpu, &l.page[0], l.mmu_idx, MMU_DATA_LOAD, mop8, ra);
2430         b = do_ld_8(cpu, &l.page[1], l.mmu_idx, MMU_DATA_LOAD, mop8, ra);
2431         if ((mop8 & MO_BSWAP) == MO_LE) {
2432             ret = int128_make128(a, b);
2433         } else {
2434             ret = int128_make128(b, a);
2435         }
2436         return ret;
2437     }
2438 
2439     if (first < 8) {
2440         a = do_ld_beN(cpu, &l.page[0], 0, l.mmu_idx,
2441                       MMU_DATA_LOAD, l.memop, ra);
2442         ret = do_ld16_beN(cpu, &l.page[1], a, l.mmu_idx, l.memop, ra);
2443     } else {
2444         ret = do_ld16_beN(cpu, &l.page[0], 0, l.mmu_idx, l.memop, ra);
2445         b = int128_getlo(ret);
2446         ret = int128_lshift(ret, l.page[1].size * 8);
2447         a = int128_gethi(ret);
2448         b = do_ld_beN(cpu, &l.page[1], b, l.mmu_idx,
2449                       MMU_DATA_LOAD, l.memop, ra);
2450         ret = int128_make128(b, a);
2451     }
2452     if ((l.memop & MO_BSWAP) == MO_LE) {
2453         ret = bswap128(ret);
2454     }
2455     return ret;
2456 }
2457 
2458 /*
2459  * Store Helpers
2460  */
2461 
2462 /**
2463  * do_st_mmio_leN:
2464  * @cpu: generic cpu state
2465  * @full: page parameters
2466  * @val_le: data to store
2467  * @addr: virtual address
2468  * @size: number of bytes
2469  * @mmu_idx: virtual address context
2470  * @ra: return address into tcg generated code, or 0
2471  * Context: BQL held
2472  *
2473  * Store @size bytes at @addr, which is memory-mapped i/o.
2474  * The bytes to store are extracted in little-endian order from @val_le;
2475  * return the bytes of @val_le beyond @p->size that have not been stored.
2476  */
2477 static uint64_t int_st_mmio_leN(CPUState *cpu, CPUTLBEntryFull *full,
2478                                 uint64_t val_le, vaddr addr, int size,
2479                                 int mmu_idx, uintptr_t ra,
2480                                 MemoryRegion *mr, hwaddr mr_offset)
2481 {
2482     do {
2483         MemOp this_mop;
2484         unsigned this_size;
2485         MemTxResult r;
2486 
2487         /* Store aligned pieces up to 8 bytes. */
2488         this_mop = ctz32(size | (int)addr | 8);
2489         this_size = 1 << this_mop;
2490         this_mop |= MO_LE;
2491 
2492         r = memory_region_dispatch_write(mr, mr_offset, val_le,
2493                                          this_mop, full->attrs);
2494         if (unlikely(r != MEMTX_OK)) {
2495             io_failed(cpu, full, addr, this_size, MMU_DATA_STORE,
2496                       mmu_idx, r, ra);
2497         }
2498         if (this_size == 8) {
2499             return 0;
2500         }
2501 
2502         val_le >>= this_size * 8;
2503         addr += this_size;
2504         mr_offset += this_size;
2505         size -= this_size;
2506     } while (size);
2507 
2508     return val_le;
2509 }
2510 
2511 static uint64_t do_st_mmio_leN(CPUState *cpu, CPUTLBEntryFull *full,
2512                                uint64_t val_le, vaddr addr, int size,
2513                                int mmu_idx, uintptr_t ra)
2514 {
2515     MemoryRegionSection *section;
2516     hwaddr mr_offset;
2517     MemoryRegion *mr;
2518     MemTxAttrs attrs;
2519 
2520     tcg_debug_assert(size > 0 && size <= 8);
2521 
2522     attrs = full->attrs;
2523     section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra);
2524     mr = section->mr;
2525 
2526     BQL_LOCK_GUARD();
2527     return int_st_mmio_leN(cpu, full, val_le, addr, size, mmu_idx,
2528                            ra, mr, mr_offset);
2529 }
2530 
2531 static uint64_t do_st16_mmio_leN(CPUState *cpu, CPUTLBEntryFull *full,
2532                                  Int128 val_le, vaddr addr, int size,
2533                                  int mmu_idx, uintptr_t ra)
2534 {
2535     MemoryRegionSection *section;
2536     MemoryRegion *mr;
2537     hwaddr mr_offset;
2538     MemTxAttrs attrs;
2539 
2540     tcg_debug_assert(size > 8 && size <= 16);
2541 
2542     attrs = full->attrs;
2543     section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra);
2544     mr = section->mr;
2545 
2546     BQL_LOCK_GUARD();
2547     int_st_mmio_leN(cpu, full, int128_getlo(val_le), addr, 8,
2548                     mmu_idx, ra, mr, mr_offset);
2549     return int_st_mmio_leN(cpu, full, int128_gethi(val_le), addr + 8,
2550                            size - 8, mmu_idx, ra, mr, mr_offset + 8);
2551 }
2552 
2553 /*
2554  * Wrapper for the above.
2555  */
2556 static uint64_t do_st_leN(CPUState *cpu, MMULookupPageData *p,
2557                           uint64_t val_le, int mmu_idx,
2558                           MemOp mop, uintptr_t ra)
2559 {
2560     MemOp atom;
2561     unsigned tmp, half_size;
2562 
2563     if (unlikely(p->flags & TLB_MMIO)) {
2564         return do_st_mmio_leN(cpu, p->full, val_le, p->addr,
2565                               p->size, mmu_idx, ra);
2566     } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
2567         return val_le >> (p->size * 8);
2568     }
2569 
2570     /*
2571      * It is a given that we cross a page and therefore there is no atomicity
2572      * for the store as a whole, but subobjects may need attention.
2573      */
2574     atom = mop & MO_ATOM_MASK;
2575     switch (atom) {
2576     case MO_ATOM_SUBALIGN:
2577         return store_parts_leN(p->haddr, p->size, val_le);
2578 
2579     case MO_ATOM_IFALIGN_PAIR:
2580     case MO_ATOM_WITHIN16_PAIR:
2581         tmp = mop & MO_SIZE;
2582         tmp = tmp ? tmp - 1 : 0;
2583         half_size = 1 << tmp;
2584         if (atom == MO_ATOM_IFALIGN_PAIR
2585             ? p->size == half_size
2586             : p->size >= half_size) {
2587             if (!HAVE_al8_fast && p->size <= 4) {
2588                 return store_whole_le4(p->haddr, p->size, val_le);
2589             } else if (HAVE_al8) {
2590                 return store_whole_le8(p->haddr, p->size, val_le);
2591             } else {
2592                 cpu_loop_exit_atomic(cpu, ra);
2593             }
2594         }
2595         /* fall through */
2596 
2597     case MO_ATOM_IFALIGN:
2598     case MO_ATOM_WITHIN16:
2599     case MO_ATOM_NONE:
2600         return store_bytes_leN(p->haddr, p->size, val_le);
2601 
2602     default:
2603         g_assert_not_reached();
2604     }
2605 }
2606 
2607 /*
2608  * Wrapper for the above, for 8 < size < 16.
2609  */
2610 static uint64_t do_st16_leN(CPUState *cpu, MMULookupPageData *p,
2611                             Int128 val_le, int mmu_idx,
2612                             MemOp mop, uintptr_t ra)
2613 {
2614     int size = p->size;
2615     MemOp atom;
2616 
2617     if (unlikely(p->flags & TLB_MMIO)) {
2618         return do_st16_mmio_leN(cpu, p->full, val_le, p->addr,
2619                                 size, mmu_idx, ra);
2620     } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
2621         return int128_gethi(val_le) >> ((size - 8) * 8);
2622     }
2623 
2624     /*
2625      * It is a given that we cross a page and therefore there is no atomicity
2626      * for the store as a whole, but subobjects may need attention.
2627      */
2628     atom = mop & MO_ATOM_MASK;
2629     switch (atom) {
2630     case MO_ATOM_SUBALIGN:
2631         store_parts_leN(p->haddr, 8, int128_getlo(val_le));
2632         return store_parts_leN(p->haddr + 8, p->size - 8,
2633                                int128_gethi(val_le));
2634 
2635     case MO_ATOM_WITHIN16_PAIR:
2636         /* Since size > 8, this is the half that must be atomic. */
2637         if (!HAVE_CMPXCHG128) {
2638             cpu_loop_exit_atomic(cpu, ra);
2639         }
2640         return store_whole_le16(p->haddr, p->size, val_le);
2641 
2642     case MO_ATOM_IFALIGN_PAIR:
2643         /*
2644          * Since size > 8, both halves are misaligned,
2645          * and so neither is atomic.
2646          */
2647     case MO_ATOM_IFALIGN:
2648     case MO_ATOM_WITHIN16:
2649     case MO_ATOM_NONE:
2650         stq_le_p(p->haddr, int128_getlo(val_le));
2651         return store_bytes_leN(p->haddr + 8, p->size - 8,
2652                                int128_gethi(val_le));
2653 
2654     default:
2655         g_assert_not_reached();
2656     }
2657 }
2658 
2659 static void do_st_1(CPUState *cpu, MMULookupPageData *p, uint8_t val,
2660                     int mmu_idx, uintptr_t ra)
2661 {
2662     if (unlikely(p->flags & TLB_MMIO)) {
2663         do_st_mmio_leN(cpu, p->full, val, p->addr, 1, mmu_idx, ra);
2664     } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
2665         /* nothing */
2666     } else {
2667         *(uint8_t *)p->haddr = val;
2668     }
2669 }
2670 
2671 static void do_st_2(CPUState *cpu, MMULookupPageData *p, uint16_t val,
2672                     int mmu_idx, MemOp memop, uintptr_t ra)
2673 {
2674     if (unlikely(p->flags & TLB_MMIO)) {
2675         if ((memop & MO_BSWAP) != MO_LE) {
2676             val = bswap16(val);
2677         }
2678         do_st_mmio_leN(cpu, p->full, val, p->addr, 2, mmu_idx, ra);
2679     } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
2680         /* nothing */
2681     } else {
2682         /* Swap to host endian if necessary, then store. */
2683         if (memop & MO_BSWAP) {
2684             val = bswap16(val);
2685         }
2686         store_atom_2(cpu, ra, p->haddr, memop, val);
2687     }
2688 }
2689 
2690 static void do_st_4(CPUState *cpu, MMULookupPageData *p, uint32_t val,
2691                     int mmu_idx, MemOp memop, uintptr_t ra)
2692 {
2693     if (unlikely(p->flags & TLB_MMIO)) {
2694         if ((memop & MO_BSWAP) != MO_LE) {
2695             val = bswap32(val);
2696         }
2697         do_st_mmio_leN(cpu, p->full, val, p->addr, 4, mmu_idx, ra);
2698     } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
2699         /* nothing */
2700     } else {
2701         /* Swap to host endian if necessary, then store. */
2702         if (memop & MO_BSWAP) {
2703             val = bswap32(val);
2704         }
2705         store_atom_4(cpu, ra, p->haddr, memop, val);
2706     }
2707 }
2708 
2709 static void do_st_8(CPUState *cpu, MMULookupPageData *p, uint64_t val,
2710                     int mmu_idx, MemOp memop, uintptr_t ra)
2711 {
2712     if (unlikely(p->flags & TLB_MMIO)) {
2713         if ((memop & MO_BSWAP) != MO_LE) {
2714             val = bswap64(val);
2715         }
2716         do_st_mmio_leN(cpu, p->full, val, p->addr, 8, mmu_idx, ra);
2717     } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
2718         /* nothing */
2719     } else {
2720         /* Swap to host endian if necessary, then store. */
2721         if (memop & MO_BSWAP) {
2722             val = bswap64(val);
2723         }
2724         store_atom_8(cpu, ra, p->haddr, memop, val);
2725     }
2726 }
2727 
2728 static void do_st1_mmu(CPUState *cpu, vaddr addr, uint8_t val,
2729                        MemOpIdx oi, uintptr_t ra)
2730 {
2731     MMULookupLocals l;
2732     bool crosspage;
2733 
2734     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
2735     crosspage = mmu_lookup(cpu, addr, oi, ra, MMU_DATA_STORE, &l);
2736     tcg_debug_assert(!crosspage);
2737 
2738     do_st_1(cpu, &l.page[0], val, l.mmu_idx, ra);
2739 }
2740 
2741 static void do_st2_mmu(CPUState *cpu, vaddr addr, uint16_t val,
2742                        MemOpIdx oi, uintptr_t ra)
2743 {
2744     MMULookupLocals l;
2745     bool crosspage;
2746     uint8_t a, b;
2747 
2748     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
2749     crosspage = mmu_lookup(cpu, addr, oi, ra, MMU_DATA_STORE, &l);
2750     if (likely(!crosspage)) {
2751         do_st_2(cpu, &l.page[0], val, l.mmu_idx, l.memop, ra);
2752         return;
2753     }
2754 
2755     if ((l.memop & MO_BSWAP) == MO_LE) {
2756         a = val, b = val >> 8;
2757     } else {
2758         b = val, a = val >> 8;
2759     }
2760     do_st_1(cpu, &l.page[0], a, l.mmu_idx, ra);
2761     do_st_1(cpu, &l.page[1], b, l.mmu_idx, ra);
2762 }
2763 
2764 static void do_st4_mmu(CPUState *cpu, vaddr addr, uint32_t val,
2765                        MemOpIdx oi, uintptr_t ra)
2766 {
2767     MMULookupLocals l;
2768     bool crosspage;
2769 
2770     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
2771     crosspage = mmu_lookup(cpu, addr, oi, ra, MMU_DATA_STORE, &l);
2772     if (likely(!crosspage)) {
2773         do_st_4(cpu, &l.page[0], val, l.mmu_idx, l.memop, ra);
2774         return;
2775     }
2776 
2777     /* Swap to little endian for simplicity, then store by bytes. */
2778     if ((l.memop & MO_BSWAP) != MO_LE) {
2779         val = bswap32(val);
2780     }
2781     val = do_st_leN(cpu, &l.page[0], val, l.mmu_idx, l.memop, ra);
2782     (void) do_st_leN(cpu, &l.page[1], val, l.mmu_idx, l.memop, ra);
2783 }
2784 
2785 static void do_st8_mmu(CPUState *cpu, vaddr addr, uint64_t val,
2786                        MemOpIdx oi, uintptr_t ra)
2787 {
2788     MMULookupLocals l;
2789     bool crosspage;
2790 
2791     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
2792     crosspage = mmu_lookup(cpu, addr, oi, ra, MMU_DATA_STORE, &l);
2793     if (likely(!crosspage)) {
2794         do_st_8(cpu, &l.page[0], val, l.mmu_idx, l.memop, ra);
2795         return;
2796     }
2797 
2798     /* Swap to little endian for simplicity, then store by bytes. */
2799     if ((l.memop & MO_BSWAP) != MO_LE) {
2800         val = bswap64(val);
2801     }
2802     val = do_st_leN(cpu, &l.page[0], val, l.mmu_idx, l.memop, ra);
2803     (void) do_st_leN(cpu, &l.page[1], val, l.mmu_idx, l.memop, ra);
2804 }
2805 
2806 static void do_st16_mmu(CPUState *cpu, vaddr addr, Int128 val,
2807                         MemOpIdx oi, uintptr_t ra)
2808 {
2809     MMULookupLocals l;
2810     bool crosspage;
2811     uint64_t a, b;
2812     int first;
2813 
2814     cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
2815     crosspage = mmu_lookup(cpu, addr, oi, ra, MMU_DATA_STORE, &l);
2816     if (likely(!crosspage)) {
2817         if (unlikely(l.page[0].flags & TLB_MMIO)) {
2818             if ((l.memop & MO_BSWAP) != MO_LE) {
2819                 val = bswap128(val);
2820             }
2821             do_st16_mmio_leN(cpu, l.page[0].full, val, addr, 16, l.mmu_idx, ra);
2822         } else if (unlikely(l.page[0].flags & TLB_DISCARD_WRITE)) {
2823             /* nothing */
2824         } else {
2825             /* Swap to host endian if necessary, then store. */
2826             if (l.memop & MO_BSWAP) {
2827                 val = bswap128(val);
2828             }
2829             store_atom_16(cpu, ra, l.page[0].haddr, l.memop, val);
2830         }
2831         return;
2832     }
2833 
2834     first = l.page[0].size;
2835     if (first == 8) {
2836         MemOp mop8 = (l.memop & ~(MO_SIZE | MO_BSWAP)) | MO_64;
2837 
2838         if (l.memop & MO_BSWAP) {
2839             val = bswap128(val);
2840         }
2841         if (HOST_BIG_ENDIAN) {
2842             b = int128_getlo(val), a = int128_gethi(val);
2843         } else {
2844             a = int128_getlo(val), b = int128_gethi(val);
2845         }
2846         do_st_8(cpu, &l.page[0], a, l.mmu_idx, mop8, ra);
2847         do_st_8(cpu, &l.page[1], b, l.mmu_idx, mop8, ra);
2848         return;
2849     }
2850 
2851     if ((l.memop & MO_BSWAP) != MO_LE) {
2852         val = bswap128(val);
2853     }
2854     if (first < 8) {
2855         do_st_leN(cpu, &l.page[0], int128_getlo(val), l.mmu_idx, l.memop, ra);
2856         val = int128_urshift(val, first * 8);
2857         do_st16_leN(cpu, &l.page[1], val, l.mmu_idx, l.memop, ra);
2858     } else {
2859         b = do_st16_leN(cpu, &l.page[0], val, l.mmu_idx, l.memop, ra);
2860         do_st_leN(cpu, &l.page[1], b, l.mmu_idx, l.memop, ra);
2861     }
2862 }
2863 
2864 #include "ldst_common.c.inc"
2865 
2866 /*
2867  * First set of functions passes in OI and RETADDR.
2868  * This makes them callable from other helpers.
2869  */
2870 
2871 #define ATOMIC_NAME(X) \
2872     glue(glue(glue(cpu_atomic_ ## X, SUFFIX), END), _mmu)
2873 
2874 #define ATOMIC_MMU_CLEANUP
2875 
2876 #include "atomic_common.c.inc"
2877 
2878 #define DATA_SIZE 1
2879 #include "atomic_template.h"
2880 
2881 #define DATA_SIZE 2
2882 #include "atomic_template.h"
2883 
2884 #define DATA_SIZE 4
2885 #include "atomic_template.h"
2886 
2887 #ifdef CONFIG_ATOMIC64
2888 #define DATA_SIZE 8
2889 #include "atomic_template.h"
2890 #endif
2891 
2892 #if defined(CONFIG_ATOMIC128) || HAVE_CMPXCHG128
2893 #define DATA_SIZE 16
2894 #include "atomic_template.h"
2895 #endif
2896 
2897 /* Code access functions.  */
2898 
2899 uint8_t cpu_ldb_code_mmu(CPUArchState *env, vaddr addr,
2900                          MemOpIdx oi, uintptr_t retaddr)
2901 {
2902     return do_ld1_mmu(env_cpu(env), addr, oi, retaddr, MMU_INST_FETCH);
2903 }
2904 
2905 uint16_t cpu_ldw_code_mmu(CPUArchState *env, vaddr addr,
2906                           MemOpIdx oi, uintptr_t retaddr)
2907 {
2908     return do_ld2_mmu(env_cpu(env), addr, oi, retaddr, MMU_INST_FETCH);
2909 }
2910 
2911 uint32_t cpu_ldl_code_mmu(CPUArchState *env, vaddr addr,
2912                           MemOpIdx oi, uintptr_t retaddr)
2913 {
2914     return do_ld4_mmu(env_cpu(env), addr, oi, retaddr, MMU_INST_FETCH);
2915 }
2916 
2917 uint64_t cpu_ldq_code_mmu(CPUArchState *env, vaddr addr,
2918                           MemOpIdx oi, uintptr_t retaddr)
2919 {
2920     return do_ld8_mmu(env_cpu(env), addr, oi, retaddr, MMU_INST_FETCH);
2921 }
2922