xref: /qemu/accel/tcg/tb-maint.c (revision cc944932ecef3b7a56ae62d89dd92fb9e56c5cc8)
1 /*
2  * Translation Block Maintenance
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "qemu/interval-tree.h"
22 #include "qemu/qtree.h"
23 #include "exec/cputlb.h"
24 #include "exec/log.h"
25 #include "exec/exec-all.h"
26 #include "exec/page-protection.h"
27 #include "exec/mmap-lock.h"
28 #include "exec/tb-flush.h"
29 #include "exec/target_page.h"
30 #include "tb-internal.h"
31 #include "system/tcg.h"
32 #include "tcg/tcg.h"
33 #include "tb-hash.h"
34 #include "tb-context.h"
35 #include "tb-internal.h"
36 #include "internal-common.h"
37 #include "internal-target.h"
38 #ifdef CONFIG_USER_ONLY
39 #include "user/page-protection.h"
40 #endif
41 
42 
43 /* List iterators for lists of tagged pointers in TranslationBlock. */
44 #define TB_FOR_EACH_TAGGED(head, tb, n, field)                          \
45     for (n = (head) & 1, tb = (TranslationBlock *)((head) & ~1);        \
46          tb; tb = (TranslationBlock *)tb->field[n], n = (uintptr_t)tb & 1, \
47              tb = (TranslationBlock *)((uintptr_t)tb & ~1))
48 
49 #define TB_FOR_EACH_JMP(head_tb, tb, n)                                 \
50     TB_FOR_EACH_TAGGED((head_tb)->jmp_list_head, tb, n, jmp_list_next)
51 
52 static bool tb_cmp(const void *ap, const void *bp)
53 {
54     const TranslationBlock *a = ap;
55     const TranslationBlock *b = bp;
56 
57     return ((tb_cflags(a) & CF_PCREL || a->pc == b->pc) &&
58             a->cs_base == b->cs_base &&
59             a->flags == b->flags &&
60             (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
61             tb_page_addr0(a) == tb_page_addr0(b) &&
62             tb_page_addr1(a) == tb_page_addr1(b));
63 }
64 
65 void tb_htable_init(void)
66 {
67     unsigned int mode = QHT_MODE_AUTO_RESIZE;
68 
69     qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
70 }
71 
72 typedef struct PageDesc PageDesc;
73 
74 #ifdef CONFIG_USER_ONLY
75 
76 /*
77  * In user-mode page locks aren't used; mmap_lock is enough.
78  */
79 #define assert_page_locked(pd) tcg_debug_assert(have_mmap_lock())
80 
81 static inline void tb_lock_pages(const TranslationBlock *tb) { }
82 
83 /*
84  * For user-only, since we are protecting all of memory with a single lock,
85  * and because the two pages of a TranslationBlock are always contiguous,
86  * use a single data structure to record all TranslationBlocks.
87  */
88 static IntervalTreeRoot tb_root;
89 
90 static void tb_remove_all(void)
91 {
92     assert_memory_lock();
93     memset(&tb_root, 0, sizeof(tb_root));
94 }
95 
96 /* Call with mmap_lock held. */
97 static void tb_record(TranslationBlock *tb)
98 {
99     vaddr addr;
100     int flags;
101 
102     assert_memory_lock();
103     tb->itree.last = tb->itree.start + tb->size - 1;
104 
105     /* translator_loop() must have made all TB pages non-writable */
106     addr = tb_page_addr0(tb);
107     flags = page_get_flags(addr);
108     assert(!(flags & PAGE_WRITE));
109 
110     addr = tb_page_addr1(tb);
111     if (addr != -1) {
112         flags = page_get_flags(addr);
113         assert(!(flags & PAGE_WRITE));
114     }
115 
116     interval_tree_insert(&tb->itree, &tb_root);
117 }
118 
119 /* Call with mmap_lock held. */
120 static void tb_remove(TranslationBlock *tb)
121 {
122     assert_memory_lock();
123     interval_tree_remove(&tb->itree, &tb_root);
124 }
125 
126 /* TODO: For now, still shared with translate-all.c for system mode. */
127 #define PAGE_FOR_EACH_TB(start, last, pagedesc, T, N)   \
128     for (T = foreach_tb_first(start, last),             \
129          N = foreach_tb_next(T, start, last);           \
130          T != NULL;                                     \
131          T = N, N = foreach_tb_next(N, start, last))
132 
133 typedef TranslationBlock *PageForEachNext;
134 
135 static PageForEachNext foreach_tb_first(tb_page_addr_t start,
136                                         tb_page_addr_t last)
137 {
138     IntervalTreeNode *n = interval_tree_iter_first(&tb_root, start, last);
139     return n ? container_of(n, TranslationBlock, itree) : NULL;
140 }
141 
142 static PageForEachNext foreach_tb_next(PageForEachNext tb,
143                                        tb_page_addr_t start,
144                                        tb_page_addr_t last)
145 {
146     IntervalTreeNode *n;
147 
148     if (tb) {
149         n = interval_tree_iter_next(&tb->itree, start, last);
150         if (n) {
151             return container_of(n, TranslationBlock, itree);
152         }
153     }
154     return NULL;
155 }
156 
157 #else
158 /*
159  * In system mode we want L1_MAP to be based on ram offsets.
160  */
161 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
162 # define L1_MAP_ADDR_SPACE_BITS  HOST_LONG_BITS
163 #else
164 # define L1_MAP_ADDR_SPACE_BITS  TARGET_PHYS_ADDR_SPACE_BITS
165 #endif
166 
167 /* Size of the L2 (and L3, etc) page tables.  */
168 #define V_L2_BITS 10
169 #define V_L2_SIZE (1 << V_L2_BITS)
170 
171 /*
172  * L1 Mapping properties
173  */
174 static int v_l1_size;
175 static int v_l1_shift;
176 static int v_l2_levels;
177 
178 /*
179  * The bottom level has pointers to PageDesc, and is indexed by
180  * anything from 4 to (V_L2_BITS + 3) bits, depending on target page size.
181  */
182 #define V_L1_MIN_BITS 4
183 #define V_L1_MAX_BITS (V_L2_BITS + 3)
184 #define V_L1_MAX_SIZE (1 << V_L1_MAX_BITS)
185 
186 static void *l1_map[V_L1_MAX_SIZE];
187 
188 struct PageDesc {
189     QemuSpin lock;
190     /* list of TBs intersecting this ram page */
191     uintptr_t first_tb;
192 };
193 
194 void page_table_config_init(void)
195 {
196     uint32_t v_l1_bits;
197 
198     assert(TARGET_PAGE_BITS);
199     /* The bits remaining after N lower levels of page tables.  */
200     v_l1_bits = (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS;
201     if (v_l1_bits < V_L1_MIN_BITS) {
202         v_l1_bits += V_L2_BITS;
203     }
204 
205     v_l1_size = 1 << v_l1_bits;
206     v_l1_shift = L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - v_l1_bits;
207     v_l2_levels = v_l1_shift / V_L2_BITS - 1;
208 
209     assert(v_l1_bits <= V_L1_MAX_BITS);
210     assert(v_l1_shift % V_L2_BITS == 0);
211     assert(v_l2_levels >= 0);
212 }
213 
214 static PageDesc *page_find_alloc(tb_page_addr_t index, bool alloc)
215 {
216     PageDesc *pd;
217     void **lp;
218 
219     /* Level 1.  Always allocated.  */
220     lp = l1_map + ((index >> v_l1_shift) & (v_l1_size - 1));
221 
222     /* Level 2..N-1.  */
223     for (int i = v_l2_levels; i > 0; i--) {
224         void **p = qatomic_rcu_read(lp);
225 
226         if (p == NULL) {
227             void *existing;
228 
229             if (!alloc) {
230                 return NULL;
231             }
232             p = g_new0(void *, V_L2_SIZE);
233             existing = qatomic_cmpxchg(lp, NULL, p);
234             if (unlikely(existing)) {
235                 g_free(p);
236                 p = existing;
237             }
238         }
239 
240         lp = p + ((index >> (i * V_L2_BITS)) & (V_L2_SIZE - 1));
241     }
242 
243     pd = qatomic_rcu_read(lp);
244     if (pd == NULL) {
245         void *existing;
246 
247         if (!alloc) {
248             return NULL;
249         }
250 
251         pd = g_new0(PageDesc, V_L2_SIZE);
252         for (int i = 0; i < V_L2_SIZE; i++) {
253             qemu_spin_init(&pd[i].lock);
254         }
255 
256         existing = qatomic_cmpxchg(lp, NULL, pd);
257         if (unlikely(existing)) {
258             for (int i = 0; i < V_L2_SIZE; i++) {
259                 qemu_spin_destroy(&pd[i].lock);
260             }
261             g_free(pd);
262             pd = existing;
263         }
264     }
265 
266     return pd + (index & (V_L2_SIZE - 1));
267 }
268 
269 static inline PageDesc *page_find(tb_page_addr_t index)
270 {
271     return page_find_alloc(index, false);
272 }
273 
274 /**
275  * struct page_entry - page descriptor entry
276  * @pd:     pointer to the &struct PageDesc of the page this entry represents
277  * @index:  page index of the page
278  * @locked: whether the page is locked
279  *
280  * This struct helps us keep track of the locked state of a page, without
281  * bloating &struct PageDesc.
282  *
283  * A page lock protects accesses to all fields of &struct PageDesc.
284  *
285  * See also: &struct page_collection.
286  */
287 struct page_entry {
288     PageDesc *pd;
289     tb_page_addr_t index;
290     bool locked;
291 };
292 
293 /**
294  * struct page_collection - tracks a set of pages (i.e. &struct page_entry's)
295  * @tree:   Binary search tree (BST) of the pages, with key == page index
296  * @max:    Pointer to the page in @tree with the highest page index
297  *
298  * To avoid deadlock we lock pages in ascending order of page index.
299  * When operating on a set of pages, we need to keep track of them so that
300  * we can lock them in order and also unlock them later. For this we collect
301  * pages (i.e. &struct page_entry's) in a binary search @tree. Given that the
302  * @tree implementation we use does not provide an O(1) operation to obtain the
303  * highest-ranked element, we use @max to keep track of the inserted page
304  * with the highest index. This is valuable because if a page is not in
305  * the tree and its index is higher than @max's, then we can lock it
306  * without breaking the locking order rule.
307  *
308  * Note on naming: 'struct page_set' would be shorter, but we already have a few
309  * page_set_*() helpers, so page_collection is used instead to avoid confusion.
310  *
311  * See also: page_collection_lock().
312  */
313 struct page_collection {
314     QTree *tree;
315     struct page_entry *max;
316 };
317 
318 typedef int PageForEachNext;
319 #define PAGE_FOR_EACH_TB(start, last, pagedesc, tb, n) \
320     TB_FOR_EACH_TAGGED((pagedesc)->first_tb, tb, n, page_next)
321 
322 #ifdef CONFIG_DEBUG_TCG
323 
324 static __thread GHashTable *ht_pages_locked_debug;
325 
326 static void ht_pages_locked_debug_init(void)
327 {
328     if (ht_pages_locked_debug) {
329         return;
330     }
331     ht_pages_locked_debug = g_hash_table_new(NULL, NULL);
332 }
333 
334 static bool page_is_locked(const PageDesc *pd)
335 {
336     PageDesc *found;
337 
338     ht_pages_locked_debug_init();
339     found = g_hash_table_lookup(ht_pages_locked_debug, pd);
340     return !!found;
341 }
342 
343 static void page_lock__debug(PageDesc *pd)
344 {
345     ht_pages_locked_debug_init();
346     g_assert(!page_is_locked(pd));
347     g_hash_table_insert(ht_pages_locked_debug, pd, pd);
348 }
349 
350 static void page_unlock__debug(const PageDesc *pd)
351 {
352     bool removed;
353 
354     ht_pages_locked_debug_init();
355     g_assert(page_is_locked(pd));
356     removed = g_hash_table_remove(ht_pages_locked_debug, pd);
357     g_assert(removed);
358 }
359 
360 static void do_assert_page_locked(const PageDesc *pd,
361                                   const char *file, int line)
362 {
363     if (unlikely(!page_is_locked(pd))) {
364         error_report("assert_page_lock: PageDesc %p not locked @ %s:%d",
365                      pd, file, line);
366         abort();
367     }
368 }
369 #define assert_page_locked(pd) do_assert_page_locked(pd, __FILE__, __LINE__)
370 
371 void assert_no_pages_locked(void)
372 {
373     ht_pages_locked_debug_init();
374     g_assert(g_hash_table_size(ht_pages_locked_debug) == 0);
375 }
376 
377 #else /* !CONFIG_DEBUG_TCG */
378 
379 static inline void page_lock__debug(const PageDesc *pd) { }
380 static inline void page_unlock__debug(const PageDesc *pd) { }
381 static inline void assert_page_locked(const PageDesc *pd) { }
382 
383 #endif /* CONFIG_DEBUG_TCG */
384 
385 static void page_lock(PageDesc *pd)
386 {
387     page_lock__debug(pd);
388     qemu_spin_lock(&pd->lock);
389 }
390 
391 /* Like qemu_spin_trylock, returns false on success */
392 static bool page_trylock(PageDesc *pd)
393 {
394     bool busy = qemu_spin_trylock(&pd->lock);
395     if (!busy) {
396         page_lock__debug(pd);
397     }
398     return busy;
399 }
400 
401 static void page_unlock(PageDesc *pd)
402 {
403     qemu_spin_unlock(&pd->lock);
404     page_unlock__debug(pd);
405 }
406 
407 void tb_lock_page0(tb_page_addr_t paddr)
408 {
409     page_lock(page_find_alloc(paddr >> TARGET_PAGE_BITS, true));
410 }
411 
412 void tb_lock_page1(tb_page_addr_t paddr0, tb_page_addr_t paddr1)
413 {
414     tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
415     tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
416     PageDesc *pd0, *pd1;
417 
418     if (pindex0 == pindex1) {
419         /* Identical pages, and the first page is already locked. */
420         return;
421     }
422 
423     pd1 = page_find_alloc(pindex1, true);
424     if (pindex0 < pindex1) {
425         /* Correct locking order, we may block. */
426         page_lock(pd1);
427         return;
428     }
429 
430     /* Incorrect locking order, we cannot block lest we deadlock. */
431     if (!page_trylock(pd1)) {
432         return;
433     }
434 
435     /*
436      * Drop the lock on page0 and get both page locks in the right order.
437      * Restart translation via longjmp.
438      */
439     pd0 = page_find_alloc(pindex0, false);
440     page_unlock(pd0);
441     page_lock(pd1);
442     page_lock(pd0);
443     siglongjmp(tcg_ctx->jmp_trans, -3);
444 }
445 
446 void tb_unlock_page1(tb_page_addr_t paddr0, tb_page_addr_t paddr1)
447 {
448     tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
449     tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
450 
451     if (pindex0 != pindex1) {
452         page_unlock(page_find_alloc(pindex1, false));
453     }
454 }
455 
456 static void tb_lock_pages(TranslationBlock *tb)
457 {
458     tb_page_addr_t paddr0 = tb_page_addr0(tb);
459     tb_page_addr_t paddr1 = tb_page_addr1(tb);
460     tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
461     tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
462 
463     if (unlikely(paddr0 == -1)) {
464         return;
465     }
466     if (unlikely(paddr1 != -1) && pindex0 != pindex1) {
467         if (pindex0 < pindex1) {
468             page_lock(page_find_alloc(pindex0, true));
469             page_lock(page_find_alloc(pindex1, true));
470             return;
471         }
472         page_lock(page_find_alloc(pindex1, true));
473     }
474     page_lock(page_find_alloc(pindex0, true));
475 }
476 
477 void tb_unlock_pages(TranslationBlock *tb)
478 {
479     tb_page_addr_t paddr0 = tb_page_addr0(tb);
480     tb_page_addr_t paddr1 = tb_page_addr1(tb);
481     tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
482     tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
483 
484     if (unlikely(paddr0 == -1)) {
485         return;
486     }
487     if (unlikely(paddr1 != -1) && pindex0 != pindex1) {
488         page_unlock(page_find_alloc(pindex1, false));
489     }
490     page_unlock(page_find_alloc(pindex0, false));
491 }
492 
493 static inline struct page_entry *
494 page_entry_new(PageDesc *pd, tb_page_addr_t index)
495 {
496     struct page_entry *pe = g_malloc(sizeof(*pe));
497 
498     pe->index = index;
499     pe->pd = pd;
500     pe->locked = false;
501     return pe;
502 }
503 
504 static void page_entry_destroy(gpointer p)
505 {
506     struct page_entry *pe = p;
507 
508     g_assert(pe->locked);
509     page_unlock(pe->pd);
510     g_free(pe);
511 }
512 
513 /* returns false on success */
514 static bool page_entry_trylock(struct page_entry *pe)
515 {
516     bool busy = page_trylock(pe->pd);
517     if (!busy) {
518         g_assert(!pe->locked);
519         pe->locked = true;
520     }
521     return busy;
522 }
523 
524 static void do_page_entry_lock(struct page_entry *pe)
525 {
526     page_lock(pe->pd);
527     g_assert(!pe->locked);
528     pe->locked = true;
529 }
530 
531 static gboolean page_entry_lock(gpointer key, gpointer value, gpointer data)
532 {
533     struct page_entry *pe = value;
534 
535     do_page_entry_lock(pe);
536     return FALSE;
537 }
538 
539 static gboolean page_entry_unlock(gpointer key, gpointer value, gpointer data)
540 {
541     struct page_entry *pe = value;
542 
543     if (pe->locked) {
544         pe->locked = false;
545         page_unlock(pe->pd);
546     }
547     return FALSE;
548 }
549 
550 /*
551  * Trylock a page, and if successful, add the page to a collection.
552  * Returns true ("busy") if the page could not be locked; false otherwise.
553  */
554 static bool page_trylock_add(struct page_collection *set, tb_page_addr_t addr)
555 {
556     tb_page_addr_t index = addr >> TARGET_PAGE_BITS;
557     struct page_entry *pe;
558     PageDesc *pd;
559 
560     pe = q_tree_lookup(set->tree, &index);
561     if (pe) {
562         return false;
563     }
564 
565     pd = page_find(index);
566     if (pd == NULL) {
567         return false;
568     }
569 
570     pe = page_entry_new(pd, index);
571     q_tree_insert(set->tree, &pe->index, pe);
572 
573     /*
574      * If this is either (1) the first insertion or (2) a page whose index
575      * is higher than any other so far, just lock the page and move on.
576      */
577     if (set->max == NULL || pe->index > set->max->index) {
578         set->max = pe;
579         do_page_entry_lock(pe);
580         return false;
581     }
582     /*
583      * Try to acquire out-of-order lock; if busy, return busy so that we acquire
584      * locks in order.
585      */
586     return page_entry_trylock(pe);
587 }
588 
589 static gint tb_page_addr_cmp(gconstpointer ap, gconstpointer bp, gpointer udata)
590 {
591     tb_page_addr_t a = *(const tb_page_addr_t *)ap;
592     tb_page_addr_t b = *(const tb_page_addr_t *)bp;
593 
594     if (a == b) {
595         return 0;
596     } else if (a < b) {
597         return -1;
598     }
599     return 1;
600 }
601 
602 /*
603  * Lock a range of pages ([@start,@last]) as well as the pages of all
604  * intersecting TBs.
605  * Locking order: acquire locks in ascending order of page index.
606  */
607 static struct page_collection *page_collection_lock(tb_page_addr_t start,
608                                                     tb_page_addr_t last)
609 {
610     struct page_collection *set = g_malloc(sizeof(*set));
611     tb_page_addr_t index;
612     PageDesc *pd;
613 
614     start >>= TARGET_PAGE_BITS;
615     last >>= TARGET_PAGE_BITS;
616     g_assert(start <= last);
617 
618     set->tree = q_tree_new_full(tb_page_addr_cmp, NULL, NULL,
619                                 page_entry_destroy);
620     set->max = NULL;
621     assert_no_pages_locked();
622 
623  retry:
624     q_tree_foreach(set->tree, page_entry_lock, NULL);
625 
626     for (index = start; index <= last; index++) {
627         TranslationBlock *tb;
628         PageForEachNext n;
629 
630         pd = page_find(index);
631         if (pd == NULL) {
632             continue;
633         }
634         if (page_trylock_add(set, index << TARGET_PAGE_BITS)) {
635             q_tree_foreach(set->tree, page_entry_unlock, NULL);
636             goto retry;
637         }
638         assert_page_locked(pd);
639         PAGE_FOR_EACH_TB(unused, unused, pd, tb, n) {
640             if (page_trylock_add(set, tb_page_addr0(tb)) ||
641                 (tb_page_addr1(tb) != -1 &&
642                  page_trylock_add(set, tb_page_addr1(tb)))) {
643                 /* drop all locks, and reacquire in order */
644                 q_tree_foreach(set->tree, page_entry_unlock, NULL);
645                 goto retry;
646             }
647         }
648     }
649     return set;
650 }
651 
652 static void page_collection_unlock(struct page_collection *set)
653 {
654     /* entries are unlocked and freed via page_entry_destroy */
655     q_tree_destroy(set->tree);
656     g_free(set);
657 }
658 
659 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
660 static void tb_remove_all_1(int level, void **lp)
661 {
662     int i;
663 
664     if (*lp == NULL) {
665         return;
666     }
667     if (level == 0) {
668         PageDesc *pd = *lp;
669 
670         for (i = 0; i < V_L2_SIZE; ++i) {
671             page_lock(&pd[i]);
672             pd[i].first_tb = (uintptr_t)NULL;
673             page_unlock(&pd[i]);
674         }
675     } else {
676         void **pp = *lp;
677 
678         for (i = 0; i < V_L2_SIZE; ++i) {
679             tb_remove_all_1(level - 1, pp + i);
680         }
681     }
682 }
683 
684 static void tb_remove_all(void)
685 {
686     int i, l1_sz = v_l1_size;
687 
688     for (i = 0; i < l1_sz; i++) {
689         tb_remove_all_1(v_l2_levels, l1_map + i);
690     }
691 }
692 
693 /*
694  * Add the tb in the target page and protect it if necessary.
695  * Called with @p->lock held.
696  */
697 static void tb_page_add(PageDesc *p, TranslationBlock *tb, unsigned int n)
698 {
699     bool page_already_protected;
700 
701     assert_page_locked(p);
702 
703     tb->page_next[n] = p->first_tb;
704     page_already_protected = p->first_tb != 0;
705     p->first_tb = (uintptr_t)tb | n;
706 
707     /*
708      * If some code is already present, then the pages are already
709      * protected. So we handle the case where only the first TB is
710      * allocated in a physical page.
711      */
712     if (!page_already_protected) {
713         tlb_protect_code(tb->page_addr[n] & TARGET_PAGE_MASK);
714     }
715 }
716 
717 static void tb_record(TranslationBlock *tb)
718 {
719     tb_page_addr_t paddr0 = tb_page_addr0(tb);
720     tb_page_addr_t paddr1 = tb_page_addr1(tb);
721     tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
722     tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
723 
724     assert(paddr0 != -1);
725     if (unlikely(paddr1 != -1) && pindex0 != pindex1) {
726         tb_page_add(page_find_alloc(pindex1, false), tb, 1);
727     }
728     tb_page_add(page_find_alloc(pindex0, false), tb, 0);
729 }
730 
731 static void tb_page_remove(PageDesc *pd, TranslationBlock *tb)
732 {
733     TranslationBlock *tb1;
734     uintptr_t *pprev;
735     PageForEachNext n1;
736 
737     assert_page_locked(pd);
738     pprev = &pd->first_tb;
739     PAGE_FOR_EACH_TB(unused, unused, pd, tb1, n1) {
740         if (tb1 == tb) {
741             *pprev = tb1->page_next[n1];
742             return;
743         }
744         pprev = &tb1->page_next[n1];
745     }
746     g_assert_not_reached();
747 }
748 
749 static void tb_remove(TranslationBlock *tb)
750 {
751     tb_page_addr_t paddr0 = tb_page_addr0(tb);
752     tb_page_addr_t paddr1 = tb_page_addr1(tb);
753     tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
754     tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
755 
756     assert(paddr0 != -1);
757     if (unlikely(paddr1 != -1) && pindex0 != pindex1) {
758         tb_page_remove(page_find_alloc(pindex1, false), tb);
759     }
760     tb_page_remove(page_find_alloc(pindex0, false), tb);
761 }
762 #endif /* CONFIG_USER_ONLY */
763 
764 /* flush all the translation blocks */
765 static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
766 {
767     bool did_flush = false;
768 
769     mmap_lock();
770     /* If it is already been done on request of another CPU, just retry. */
771     if (tb_ctx.tb_flush_count != tb_flush_count.host_int) {
772         goto done;
773     }
774     did_flush = true;
775 
776     CPU_FOREACH(cpu) {
777         tcg_flush_jmp_cache(cpu);
778     }
779 
780     qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
781     tb_remove_all();
782 
783     tcg_region_reset_all();
784     /* XXX: flush processor icache at this point if cache flush is expensive */
785     qatomic_inc(&tb_ctx.tb_flush_count);
786 
787 done:
788     mmap_unlock();
789     if (did_flush) {
790         qemu_plugin_flush_cb();
791     }
792 }
793 
794 void tb_flush(CPUState *cpu)
795 {
796     if (tcg_enabled()) {
797         unsigned tb_flush_count = qatomic_read(&tb_ctx.tb_flush_count);
798 
799         if (cpu_in_serial_context(cpu)) {
800             do_tb_flush(cpu, RUN_ON_CPU_HOST_INT(tb_flush_count));
801         } else {
802             async_safe_run_on_cpu(cpu, do_tb_flush,
803                                   RUN_ON_CPU_HOST_INT(tb_flush_count));
804         }
805     }
806 }
807 
808 /* remove @orig from its @n_orig-th jump list */
809 static inline void tb_remove_from_jmp_list(TranslationBlock *orig, int n_orig)
810 {
811     uintptr_t ptr, ptr_locked;
812     TranslationBlock *dest;
813     TranslationBlock *tb;
814     uintptr_t *pprev;
815     int n;
816 
817     /* mark the LSB of jmp_dest[] so that no further jumps can be inserted */
818     ptr = qatomic_or_fetch(&orig->jmp_dest[n_orig], 1);
819     dest = (TranslationBlock *)(ptr & ~1);
820     if (dest == NULL) {
821         return;
822     }
823 
824     qemu_spin_lock(&dest->jmp_lock);
825     /*
826      * While acquiring the lock, the jump might have been removed if the
827      * destination TB was invalidated; check again.
828      */
829     ptr_locked = qatomic_read(&orig->jmp_dest[n_orig]);
830     if (ptr_locked != ptr) {
831         qemu_spin_unlock(&dest->jmp_lock);
832         /*
833          * The only possibility is that the jump was unlinked via
834          * tb_jump_unlink(dest). Seeing here another destination would be a bug,
835          * because we set the LSB above.
836          */
837         g_assert(ptr_locked == 1 && dest->cflags & CF_INVALID);
838         return;
839     }
840     /*
841      * We first acquired the lock, and since the destination pointer matches,
842      * we know for sure that @orig is in the jmp list.
843      */
844     pprev = &dest->jmp_list_head;
845     TB_FOR_EACH_JMP(dest, tb, n) {
846         if (tb == orig && n == n_orig) {
847             *pprev = tb->jmp_list_next[n];
848             /* no need to set orig->jmp_dest[n]; setting the LSB was enough */
849             qemu_spin_unlock(&dest->jmp_lock);
850             return;
851         }
852         pprev = &tb->jmp_list_next[n];
853     }
854     g_assert_not_reached();
855 }
856 
857 /*
858  * Reset the jump entry 'n' of a TB so that it is not chained to another TB.
859  */
860 void tb_reset_jump(TranslationBlock *tb, int n)
861 {
862     uintptr_t addr = (uintptr_t)(tb->tc.ptr + tb->jmp_reset_offset[n]);
863     tb_set_jmp_target(tb, n, addr);
864 }
865 
866 /* remove any jumps to the TB */
867 static inline void tb_jmp_unlink(TranslationBlock *dest)
868 {
869     TranslationBlock *tb;
870     int n;
871 
872     qemu_spin_lock(&dest->jmp_lock);
873 
874     TB_FOR_EACH_JMP(dest, tb, n) {
875         tb_reset_jump(tb, n);
876         qatomic_and(&tb->jmp_dest[n], (uintptr_t)NULL | 1);
877         /* No need to clear the list entry; setting the dest ptr is enough */
878     }
879     dest->jmp_list_head = (uintptr_t)NULL;
880 
881     qemu_spin_unlock(&dest->jmp_lock);
882 }
883 
884 static void tb_jmp_cache_inval_tb(TranslationBlock *tb)
885 {
886     CPUState *cpu;
887 
888     if (tb_cflags(tb) & CF_PCREL) {
889         /* A TB may be at any virtual address */
890         CPU_FOREACH(cpu) {
891             tcg_flush_jmp_cache(cpu);
892         }
893     } else {
894         uint32_t h = tb_jmp_cache_hash_func(tb->pc);
895 
896         CPU_FOREACH(cpu) {
897             CPUJumpCache *jc = cpu->tb_jmp_cache;
898 
899             if (qatomic_read(&jc->array[h].tb) == tb) {
900                 qatomic_set(&jc->array[h].tb, NULL);
901             }
902         }
903     }
904 }
905 
906 /*
907  * In user-mode, call with mmap_lock held.
908  * In !user-mode, if @rm_from_page_list is set, call with the TB's pages'
909  * locks held.
910  */
911 static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
912 {
913     uint32_t h;
914     tb_page_addr_t phys_pc;
915     uint32_t orig_cflags = tb_cflags(tb);
916 
917     assert_memory_lock();
918 
919     /* make sure no further incoming jumps will be chained to this TB */
920     qemu_spin_lock(&tb->jmp_lock);
921     qatomic_set(&tb->cflags, tb->cflags | CF_INVALID);
922     qemu_spin_unlock(&tb->jmp_lock);
923 
924     /* remove the TB from the hash list */
925     phys_pc = tb_page_addr0(tb);
926     h = tb_hash_func(phys_pc, (orig_cflags & CF_PCREL ? 0 : tb->pc),
927                      tb->flags, tb->cs_base, orig_cflags);
928     if (!qht_remove(&tb_ctx.htable, tb, h)) {
929         return;
930     }
931 
932     /* remove the TB from the page list */
933     if (rm_from_page_list) {
934         tb_remove(tb);
935     }
936 
937     /* remove the TB from the hash list */
938     tb_jmp_cache_inval_tb(tb);
939 
940     /* suppress this TB from the two jump lists */
941     tb_remove_from_jmp_list(tb, 0);
942     tb_remove_from_jmp_list(tb, 1);
943 
944     /* suppress any remaining jumps to this TB */
945     tb_jmp_unlink(tb);
946 
947     qatomic_set(&tb_ctx.tb_phys_invalidate_count,
948                 tb_ctx.tb_phys_invalidate_count + 1);
949 }
950 
951 static void tb_phys_invalidate__locked(TranslationBlock *tb)
952 {
953     qemu_thread_jit_write();
954     do_tb_phys_invalidate(tb, true);
955     qemu_thread_jit_execute();
956 }
957 
958 /*
959  * Invalidate one TB.
960  * Called with mmap_lock held in user-mode.
961  */
962 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
963 {
964     if (page_addr == -1 && tb_page_addr0(tb) != -1) {
965         tb_lock_pages(tb);
966         do_tb_phys_invalidate(tb, true);
967         tb_unlock_pages(tb);
968     } else {
969         do_tb_phys_invalidate(tb, false);
970     }
971 }
972 
973 /*
974  * Add a new TB and link it to the physical page tables.
975  * Called with mmap_lock held for user-mode emulation.
976  *
977  * Returns a pointer @tb, or a pointer to an existing TB that matches @tb.
978  * Note that in !user-mode, another thread might have already added a TB
979  * for the same block of guest code that @tb corresponds to. In that case,
980  * the caller should discard the original @tb, and use instead the returned TB.
981  */
982 TranslationBlock *tb_link_page(TranslationBlock *tb)
983 {
984     void *existing_tb = NULL;
985     uint32_t h;
986 
987     assert_memory_lock();
988     tcg_debug_assert(!(tb->cflags & CF_INVALID));
989 
990     tb_record(tb);
991 
992     /* add in the hash table */
993     h = tb_hash_func(tb_page_addr0(tb), (tb->cflags & CF_PCREL ? 0 : tb->pc),
994                      tb->flags, tb->cs_base, tb->cflags);
995     qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
996 
997     /* remove TB from the page(s) if we couldn't insert it */
998     if (unlikely(existing_tb)) {
999         tb_remove(tb);
1000         tb_unlock_pages(tb);
1001         return existing_tb;
1002     }
1003 
1004     tb_unlock_pages(tb);
1005     return tb;
1006 }
1007 
1008 #ifdef CONFIG_USER_ONLY
1009 /*
1010  * Invalidate all TBs which intersect with the target address range.
1011  * Called with mmap_lock held for user-mode emulation.
1012  * NOTE: this function must not be called while a TB is running.
1013  */
1014 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t last)
1015 {
1016     TranslationBlock *tb;
1017     PageForEachNext n;
1018 
1019     assert_memory_lock();
1020 
1021     PAGE_FOR_EACH_TB(start, last, unused, tb, n) {
1022         tb_phys_invalidate__locked(tb);
1023     }
1024 }
1025 
1026 /*
1027  * Invalidate all TBs which intersect with the target address page @addr.
1028  * Called with mmap_lock held for user-mode emulation
1029  * NOTE: this function must not be called while a TB is running.
1030  */
1031 static void tb_invalidate_phys_page(tb_page_addr_t addr)
1032 {
1033     tb_page_addr_t start, last;
1034 
1035     start = addr & TARGET_PAGE_MASK;
1036     last = addr | ~TARGET_PAGE_MASK;
1037     tb_invalidate_phys_range(start, last);
1038 }
1039 
1040 /*
1041  * Called with mmap_lock held. If pc is not 0 then it indicates the
1042  * host PC of the faulting store instruction that caused this invalidate.
1043  * Returns true if the caller needs to abort execution of the current
1044  * TB (because it was modified by this store and the guest CPU has
1045  * precise-SMC semantics).
1046  */
1047 bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc)
1048 {
1049     TranslationBlock *current_tb;
1050     bool current_tb_modified;
1051     TranslationBlock *tb;
1052     PageForEachNext n;
1053     tb_page_addr_t last;
1054 
1055     /*
1056      * Without precise smc semantics, or when outside of a TB,
1057      * we can skip to invalidate.
1058      */
1059 #ifndef TARGET_HAS_PRECISE_SMC
1060     pc = 0;
1061 #endif
1062     if (!pc) {
1063         tb_invalidate_phys_page(addr);
1064         return false;
1065     }
1066 
1067     assert_memory_lock();
1068     current_tb = tcg_tb_lookup(pc);
1069 
1070     last = addr | ~TARGET_PAGE_MASK;
1071     addr &= TARGET_PAGE_MASK;
1072     current_tb_modified = false;
1073 
1074     PAGE_FOR_EACH_TB(addr, last, unused, tb, n) {
1075         if (current_tb == tb &&
1076             (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
1077             /*
1078              * If we are modifying the current TB, we must stop its
1079              * execution. We could be more precise by checking that
1080              * the modification is after the current PC, but it would
1081              * require a specialized function to partially restore
1082              * the CPU state.
1083              */
1084             current_tb_modified = true;
1085             cpu_restore_state_from_tb(current_cpu, current_tb, pc);
1086         }
1087         tb_phys_invalidate__locked(tb);
1088     }
1089 
1090     if (current_tb_modified) {
1091         /* Force execution of one insn next time.  */
1092         CPUState *cpu = current_cpu;
1093         cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(current_cpu);
1094         return true;
1095     }
1096     return false;
1097 }
1098 #else
1099 /*
1100  * @p must be non-NULL.
1101  * Call with all @pages locked.
1102  */
1103 static void
1104 tb_invalidate_phys_page_range__locked(struct page_collection *pages,
1105                                       PageDesc *p, tb_page_addr_t start,
1106                                       tb_page_addr_t last,
1107                                       uintptr_t retaddr)
1108 {
1109     TranslationBlock *tb;
1110     PageForEachNext n;
1111 #ifdef TARGET_HAS_PRECISE_SMC
1112     bool current_tb_modified = false;
1113     TranslationBlock *current_tb = retaddr ? tcg_tb_lookup(retaddr) : NULL;
1114 #endif /* TARGET_HAS_PRECISE_SMC */
1115 
1116     /* Range may not cross a page. */
1117     tcg_debug_assert(((start ^ last) & TARGET_PAGE_MASK) == 0);
1118 
1119     /*
1120      * We remove all the TBs in the range [start, last].
1121      * XXX: see if in some cases it could be faster to invalidate all the code
1122      */
1123     PAGE_FOR_EACH_TB(start, last, p, tb, n) {
1124         tb_page_addr_t tb_start, tb_last;
1125 
1126         /* NOTE: this is subtle as a TB may span two physical pages */
1127         tb_start = tb_page_addr0(tb);
1128         tb_last = tb_start + tb->size - 1;
1129         if (n == 0) {
1130             tb_last = MIN(tb_last, tb_start | ~TARGET_PAGE_MASK);
1131         } else {
1132             tb_start = tb_page_addr1(tb);
1133             tb_last = tb_start + (tb_last & ~TARGET_PAGE_MASK);
1134         }
1135         if (!(tb_last < start || tb_start > last)) {
1136 #ifdef TARGET_HAS_PRECISE_SMC
1137             if (current_tb == tb &&
1138                 (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
1139                 /*
1140                  * If we are modifying the current TB, we must stop
1141                  * its execution. We could be more precise by checking
1142                  * that the modification is after the current PC, but it
1143                  * would require a specialized function to partially
1144                  * restore the CPU state.
1145                  */
1146                 current_tb_modified = true;
1147                 cpu_restore_state_from_tb(current_cpu, current_tb, retaddr);
1148             }
1149 #endif /* TARGET_HAS_PRECISE_SMC */
1150             tb_phys_invalidate__locked(tb);
1151         }
1152     }
1153 
1154     /* if no code remaining, no need to continue to use slow writes */
1155     if (!p->first_tb) {
1156         tlb_unprotect_code(start);
1157     }
1158 
1159 #ifdef TARGET_HAS_PRECISE_SMC
1160     if (current_tb_modified) {
1161         page_collection_unlock(pages);
1162         /* Force execution of one insn next time.  */
1163         current_cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(current_cpu);
1164         mmap_unlock();
1165         cpu_loop_exit_noexc(current_cpu);
1166     }
1167 #endif
1168 }
1169 
1170 /*
1171  * Invalidate all TBs which intersect with the target physical address range
1172  * [start;last]. NOTE: start and end may refer to *different* physical pages.
1173  * 'is_cpu_write_access' should be true if called from a real cpu write
1174  * access: the virtual CPU will exit the current TB if code is modified inside
1175  * this TB.
1176  */
1177 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t last)
1178 {
1179     struct page_collection *pages;
1180     tb_page_addr_t index, index_last;
1181 
1182     pages = page_collection_lock(start, last);
1183 
1184     index_last = last >> TARGET_PAGE_BITS;
1185     for (index = start >> TARGET_PAGE_BITS; index <= index_last; index++) {
1186         PageDesc *pd = page_find(index);
1187         tb_page_addr_t page_start, page_last;
1188 
1189         if (pd == NULL) {
1190             continue;
1191         }
1192         assert_page_locked(pd);
1193         page_start = index << TARGET_PAGE_BITS;
1194         page_last = page_start | ~TARGET_PAGE_MASK;
1195         page_last = MIN(page_last, last);
1196         tb_invalidate_phys_page_range__locked(pages, pd,
1197                                               page_start, page_last, 0);
1198     }
1199     page_collection_unlock(pages);
1200 }
1201 
1202 /*
1203  * Call with all @pages in the range [@start, @start + len[ locked.
1204  */
1205 static void tb_invalidate_phys_page_fast__locked(struct page_collection *pages,
1206                                                  tb_page_addr_t start,
1207                                                  unsigned len, uintptr_t ra)
1208 {
1209     PageDesc *p;
1210 
1211     p = page_find(start >> TARGET_PAGE_BITS);
1212     if (!p) {
1213         return;
1214     }
1215 
1216     assert_page_locked(p);
1217     tb_invalidate_phys_page_range__locked(pages, p, start, start + len - 1, ra);
1218 }
1219 
1220 /*
1221  * len must be <= 8 and start must be a multiple of len.
1222  * Called via softmmu_template.h when code areas are written to with
1223  * iothread mutex not held.
1224  */
1225 void tb_invalidate_phys_range_fast(ram_addr_t ram_addr,
1226                                    unsigned size,
1227                                    uintptr_t retaddr)
1228 {
1229     struct page_collection *pages;
1230 
1231     pages = page_collection_lock(ram_addr, ram_addr + size - 1);
1232     tb_invalidate_phys_page_fast__locked(pages, ram_addr, size, retaddr);
1233     page_collection_unlock(pages);
1234 }
1235 
1236 #endif /* CONFIG_USER_ONLY */
1237