xref: /qemu/accel/tcg/tb-maint.c (revision 9c2ff9cdc9b33472333e9431cbf4417f5f228883)
1 /*
2  * Translation Block Maintenance
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "qemu/interval-tree.h"
22 #include "qemu/qtree.h"
23 #include "cpu.h"
24 #include "exec/cputlb.h"
25 #include "exec/log.h"
26 #include "exec/exec-all.h"
27 #include "exec/page-protection.h"
28 #include "exec/mmap-lock.h"
29 #include "exec/tb-flush.h"
30 #include "exec/target_page.h"
31 #include "tb-internal.h"
32 #include "system/tcg.h"
33 #include "tcg/tcg.h"
34 #include "tb-hash.h"
35 #include "tb-context.h"
36 #include "tb-internal.h"
37 #include "internal-common.h"
38 #include "internal-target.h"
39 #ifdef CONFIG_USER_ONLY
40 #include "user/page-protection.h"
41 #endif
42 
43 
44 /* List iterators for lists of tagged pointers in TranslationBlock. */
45 #define TB_FOR_EACH_TAGGED(head, tb, n, field)                          \
46     for (n = (head) & 1, tb = (TranslationBlock *)((head) & ~1);        \
47          tb; tb = (TranslationBlock *)tb->field[n], n = (uintptr_t)tb & 1, \
48              tb = (TranslationBlock *)((uintptr_t)tb & ~1))
49 
50 #define TB_FOR_EACH_JMP(head_tb, tb, n)                                 \
51     TB_FOR_EACH_TAGGED((head_tb)->jmp_list_head, tb, n, jmp_list_next)
52 
53 static bool tb_cmp(const void *ap, const void *bp)
54 {
55     const TranslationBlock *a = ap;
56     const TranslationBlock *b = bp;
57 
58     return ((tb_cflags(a) & CF_PCREL || a->pc == b->pc) &&
59             a->cs_base == b->cs_base &&
60             a->flags == b->flags &&
61             (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
62             tb_page_addr0(a) == tb_page_addr0(b) &&
63             tb_page_addr1(a) == tb_page_addr1(b));
64 }
65 
66 void tb_htable_init(void)
67 {
68     unsigned int mode = QHT_MODE_AUTO_RESIZE;
69 
70     qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
71 }
72 
73 typedef struct PageDesc PageDesc;
74 
75 #ifdef CONFIG_USER_ONLY
76 
77 /*
78  * In user-mode page locks aren't used; mmap_lock is enough.
79  */
80 #define assert_page_locked(pd) tcg_debug_assert(have_mmap_lock())
81 
82 static inline void tb_lock_pages(const TranslationBlock *tb) { }
83 
84 /*
85  * For user-only, since we are protecting all of memory with a single lock,
86  * and because the two pages of a TranslationBlock are always contiguous,
87  * use a single data structure to record all TranslationBlocks.
88  */
89 static IntervalTreeRoot tb_root;
90 
91 static void tb_remove_all(void)
92 {
93     assert_memory_lock();
94     memset(&tb_root, 0, sizeof(tb_root));
95 }
96 
97 /* Call with mmap_lock held. */
98 static void tb_record(TranslationBlock *tb)
99 {
100     vaddr addr;
101     int flags;
102 
103     assert_memory_lock();
104     tb->itree.last = tb->itree.start + tb->size - 1;
105 
106     /* translator_loop() must have made all TB pages non-writable */
107     addr = tb_page_addr0(tb);
108     flags = page_get_flags(addr);
109     assert(!(flags & PAGE_WRITE));
110 
111     addr = tb_page_addr1(tb);
112     if (addr != -1) {
113         flags = page_get_flags(addr);
114         assert(!(flags & PAGE_WRITE));
115     }
116 
117     interval_tree_insert(&tb->itree, &tb_root);
118 }
119 
120 /* Call with mmap_lock held. */
121 static void tb_remove(TranslationBlock *tb)
122 {
123     assert_memory_lock();
124     interval_tree_remove(&tb->itree, &tb_root);
125 }
126 
127 /* TODO: For now, still shared with translate-all.c for system mode. */
128 #define PAGE_FOR_EACH_TB(start, last, pagedesc, T, N)   \
129     for (T = foreach_tb_first(start, last),             \
130          N = foreach_tb_next(T, start, last);           \
131          T != NULL;                                     \
132          T = N, N = foreach_tb_next(N, start, last))
133 
134 typedef TranslationBlock *PageForEachNext;
135 
136 static PageForEachNext foreach_tb_first(tb_page_addr_t start,
137                                         tb_page_addr_t last)
138 {
139     IntervalTreeNode *n = interval_tree_iter_first(&tb_root, start, last);
140     return n ? container_of(n, TranslationBlock, itree) : NULL;
141 }
142 
143 static PageForEachNext foreach_tb_next(PageForEachNext tb,
144                                        tb_page_addr_t start,
145                                        tb_page_addr_t last)
146 {
147     IntervalTreeNode *n;
148 
149     if (tb) {
150         n = interval_tree_iter_next(&tb->itree, start, last);
151         if (n) {
152             return container_of(n, TranslationBlock, itree);
153         }
154     }
155     return NULL;
156 }
157 
158 #else
159 /*
160  * In system mode we want L1_MAP to be based on ram offsets.
161  */
162 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
163 # define L1_MAP_ADDR_SPACE_BITS  HOST_LONG_BITS
164 #else
165 # define L1_MAP_ADDR_SPACE_BITS  TARGET_PHYS_ADDR_SPACE_BITS
166 #endif
167 
168 /* Size of the L2 (and L3, etc) page tables.  */
169 #define V_L2_BITS 10
170 #define V_L2_SIZE (1 << V_L2_BITS)
171 
172 /*
173  * L1 Mapping properties
174  */
175 static int v_l1_size;
176 static int v_l1_shift;
177 static int v_l2_levels;
178 
179 /*
180  * The bottom level has pointers to PageDesc, and is indexed by
181  * anything from 4 to (V_L2_BITS + 3) bits, depending on target page size.
182  */
183 #define V_L1_MIN_BITS 4
184 #define V_L1_MAX_BITS (V_L2_BITS + 3)
185 #define V_L1_MAX_SIZE (1 << V_L1_MAX_BITS)
186 
187 static void *l1_map[V_L1_MAX_SIZE];
188 
189 struct PageDesc {
190     QemuSpin lock;
191     /* list of TBs intersecting this ram page */
192     uintptr_t first_tb;
193 };
194 
195 void page_table_config_init(void)
196 {
197     uint32_t v_l1_bits;
198 
199     assert(TARGET_PAGE_BITS);
200     /* The bits remaining after N lower levels of page tables.  */
201     v_l1_bits = (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS;
202     if (v_l1_bits < V_L1_MIN_BITS) {
203         v_l1_bits += V_L2_BITS;
204     }
205 
206     v_l1_size = 1 << v_l1_bits;
207     v_l1_shift = L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - v_l1_bits;
208     v_l2_levels = v_l1_shift / V_L2_BITS - 1;
209 
210     assert(v_l1_bits <= V_L1_MAX_BITS);
211     assert(v_l1_shift % V_L2_BITS == 0);
212     assert(v_l2_levels >= 0);
213 }
214 
215 static PageDesc *page_find_alloc(tb_page_addr_t index, bool alloc)
216 {
217     PageDesc *pd;
218     void **lp;
219 
220     /* Level 1.  Always allocated.  */
221     lp = l1_map + ((index >> v_l1_shift) & (v_l1_size - 1));
222 
223     /* Level 2..N-1.  */
224     for (int i = v_l2_levels; i > 0; i--) {
225         void **p = qatomic_rcu_read(lp);
226 
227         if (p == NULL) {
228             void *existing;
229 
230             if (!alloc) {
231                 return NULL;
232             }
233             p = g_new0(void *, V_L2_SIZE);
234             existing = qatomic_cmpxchg(lp, NULL, p);
235             if (unlikely(existing)) {
236                 g_free(p);
237                 p = existing;
238             }
239         }
240 
241         lp = p + ((index >> (i * V_L2_BITS)) & (V_L2_SIZE - 1));
242     }
243 
244     pd = qatomic_rcu_read(lp);
245     if (pd == NULL) {
246         void *existing;
247 
248         if (!alloc) {
249             return NULL;
250         }
251 
252         pd = g_new0(PageDesc, V_L2_SIZE);
253         for (int i = 0; i < V_L2_SIZE; i++) {
254             qemu_spin_init(&pd[i].lock);
255         }
256 
257         existing = qatomic_cmpxchg(lp, NULL, pd);
258         if (unlikely(existing)) {
259             for (int i = 0; i < V_L2_SIZE; i++) {
260                 qemu_spin_destroy(&pd[i].lock);
261             }
262             g_free(pd);
263             pd = existing;
264         }
265     }
266 
267     return pd + (index & (V_L2_SIZE - 1));
268 }
269 
270 static inline PageDesc *page_find(tb_page_addr_t index)
271 {
272     return page_find_alloc(index, false);
273 }
274 
275 /**
276  * struct page_entry - page descriptor entry
277  * @pd:     pointer to the &struct PageDesc of the page this entry represents
278  * @index:  page index of the page
279  * @locked: whether the page is locked
280  *
281  * This struct helps us keep track of the locked state of a page, without
282  * bloating &struct PageDesc.
283  *
284  * A page lock protects accesses to all fields of &struct PageDesc.
285  *
286  * See also: &struct page_collection.
287  */
288 struct page_entry {
289     PageDesc *pd;
290     tb_page_addr_t index;
291     bool locked;
292 };
293 
294 /**
295  * struct page_collection - tracks a set of pages (i.e. &struct page_entry's)
296  * @tree:   Binary search tree (BST) of the pages, with key == page index
297  * @max:    Pointer to the page in @tree with the highest page index
298  *
299  * To avoid deadlock we lock pages in ascending order of page index.
300  * When operating on a set of pages, we need to keep track of them so that
301  * we can lock them in order and also unlock them later. For this we collect
302  * pages (i.e. &struct page_entry's) in a binary search @tree. Given that the
303  * @tree implementation we use does not provide an O(1) operation to obtain the
304  * highest-ranked element, we use @max to keep track of the inserted page
305  * with the highest index. This is valuable because if a page is not in
306  * the tree and its index is higher than @max's, then we can lock it
307  * without breaking the locking order rule.
308  *
309  * Note on naming: 'struct page_set' would be shorter, but we already have a few
310  * page_set_*() helpers, so page_collection is used instead to avoid confusion.
311  *
312  * See also: page_collection_lock().
313  */
314 struct page_collection {
315     QTree *tree;
316     struct page_entry *max;
317 };
318 
319 typedef int PageForEachNext;
320 #define PAGE_FOR_EACH_TB(start, last, pagedesc, tb, n) \
321     TB_FOR_EACH_TAGGED((pagedesc)->first_tb, tb, n, page_next)
322 
323 #ifdef CONFIG_DEBUG_TCG
324 
325 static __thread GHashTable *ht_pages_locked_debug;
326 
327 static void ht_pages_locked_debug_init(void)
328 {
329     if (ht_pages_locked_debug) {
330         return;
331     }
332     ht_pages_locked_debug = g_hash_table_new(NULL, NULL);
333 }
334 
335 static bool page_is_locked(const PageDesc *pd)
336 {
337     PageDesc *found;
338 
339     ht_pages_locked_debug_init();
340     found = g_hash_table_lookup(ht_pages_locked_debug, pd);
341     return !!found;
342 }
343 
344 static void page_lock__debug(PageDesc *pd)
345 {
346     ht_pages_locked_debug_init();
347     g_assert(!page_is_locked(pd));
348     g_hash_table_insert(ht_pages_locked_debug, pd, pd);
349 }
350 
351 static void page_unlock__debug(const PageDesc *pd)
352 {
353     bool removed;
354 
355     ht_pages_locked_debug_init();
356     g_assert(page_is_locked(pd));
357     removed = g_hash_table_remove(ht_pages_locked_debug, pd);
358     g_assert(removed);
359 }
360 
361 static void do_assert_page_locked(const PageDesc *pd,
362                                   const char *file, int line)
363 {
364     if (unlikely(!page_is_locked(pd))) {
365         error_report("assert_page_lock: PageDesc %p not locked @ %s:%d",
366                      pd, file, line);
367         abort();
368     }
369 }
370 #define assert_page_locked(pd) do_assert_page_locked(pd, __FILE__, __LINE__)
371 
372 void assert_no_pages_locked(void)
373 {
374     ht_pages_locked_debug_init();
375     g_assert(g_hash_table_size(ht_pages_locked_debug) == 0);
376 }
377 
378 #else /* !CONFIG_DEBUG_TCG */
379 
380 static inline void page_lock__debug(const PageDesc *pd) { }
381 static inline void page_unlock__debug(const PageDesc *pd) { }
382 static inline void assert_page_locked(const PageDesc *pd) { }
383 
384 #endif /* CONFIG_DEBUG_TCG */
385 
386 static void page_lock(PageDesc *pd)
387 {
388     page_lock__debug(pd);
389     qemu_spin_lock(&pd->lock);
390 }
391 
392 /* Like qemu_spin_trylock, returns false on success */
393 static bool page_trylock(PageDesc *pd)
394 {
395     bool busy = qemu_spin_trylock(&pd->lock);
396     if (!busy) {
397         page_lock__debug(pd);
398     }
399     return busy;
400 }
401 
402 static void page_unlock(PageDesc *pd)
403 {
404     qemu_spin_unlock(&pd->lock);
405     page_unlock__debug(pd);
406 }
407 
408 void tb_lock_page0(tb_page_addr_t paddr)
409 {
410     page_lock(page_find_alloc(paddr >> TARGET_PAGE_BITS, true));
411 }
412 
413 void tb_lock_page1(tb_page_addr_t paddr0, tb_page_addr_t paddr1)
414 {
415     tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
416     tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
417     PageDesc *pd0, *pd1;
418 
419     if (pindex0 == pindex1) {
420         /* Identical pages, and the first page is already locked. */
421         return;
422     }
423 
424     pd1 = page_find_alloc(pindex1, true);
425     if (pindex0 < pindex1) {
426         /* Correct locking order, we may block. */
427         page_lock(pd1);
428         return;
429     }
430 
431     /* Incorrect locking order, we cannot block lest we deadlock. */
432     if (!page_trylock(pd1)) {
433         return;
434     }
435 
436     /*
437      * Drop the lock on page0 and get both page locks in the right order.
438      * Restart translation via longjmp.
439      */
440     pd0 = page_find_alloc(pindex0, false);
441     page_unlock(pd0);
442     page_lock(pd1);
443     page_lock(pd0);
444     siglongjmp(tcg_ctx->jmp_trans, -3);
445 }
446 
447 void tb_unlock_page1(tb_page_addr_t paddr0, tb_page_addr_t paddr1)
448 {
449     tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
450     tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
451 
452     if (pindex0 != pindex1) {
453         page_unlock(page_find_alloc(pindex1, false));
454     }
455 }
456 
457 static void tb_lock_pages(TranslationBlock *tb)
458 {
459     tb_page_addr_t paddr0 = tb_page_addr0(tb);
460     tb_page_addr_t paddr1 = tb_page_addr1(tb);
461     tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
462     tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
463 
464     if (unlikely(paddr0 == -1)) {
465         return;
466     }
467     if (unlikely(paddr1 != -1) && pindex0 != pindex1) {
468         if (pindex0 < pindex1) {
469             page_lock(page_find_alloc(pindex0, true));
470             page_lock(page_find_alloc(pindex1, true));
471             return;
472         }
473         page_lock(page_find_alloc(pindex1, true));
474     }
475     page_lock(page_find_alloc(pindex0, true));
476 }
477 
478 void tb_unlock_pages(TranslationBlock *tb)
479 {
480     tb_page_addr_t paddr0 = tb_page_addr0(tb);
481     tb_page_addr_t paddr1 = tb_page_addr1(tb);
482     tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
483     tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
484 
485     if (unlikely(paddr0 == -1)) {
486         return;
487     }
488     if (unlikely(paddr1 != -1) && pindex0 != pindex1) {
489         page_unlock(page_find_alloc(pindex1, false));
490     }
491     page_unlock(page_find_alloc(pindex0, false));
492 }
493 
494 static inline struct page_entry *
495 page_entry_new(PageDesc *pd, tb_page_addr_t index)
496 {
497     struct page_entry *pe = g_malloc(sizeof(*pe));
498 
499     pe->index = index;
500     pe->pd = pd;
501     pe->locked = false;
502     return pe;
503 }
504 
505 static void page_entry_destroy(gpointer p)
506 {
507     struct page_entry *pe = p;
508 
509     g_assert(pe->locked);
510     page_unlock(pe->pd);
511     g_free(pe);
512 }
513 
514 /* returns false on success */
515 static bool page_entry_trylock(struct page_entry *pe)
516 {
517     bool busy = page_trylock(pe->pd);
518     if (!busy) {
519         g_assert(!pe->locked);
520         pe->locked = true;
521     }
522     return busy;
523 }
524 
525 static void do_page_entry_lock(struct page_entry *pe)
526 {
527     page_lock(pe->pd);
528     g_assert(!pe->locked);
529     pe->locked = true;
530 }
531 
532 static gboolean page_entry_lock(gpointer key, gpointer value, gpointer data)
533 {
534     struct page_entry *pe = value;
535 
536     do_page_entry_lock(pe);
537     return FALSE;
538 }
539 
540 static gboolean page_entry_unlock(gpointer key, gpointer value, gpointer data)
541 {
542     struct page_entry *pe = value;
543 
544     if (pe->locked) {
545         pe->locked = false;
546         page_unlock(pe->pd);
547     }
548     return FALSE;
549 }
550 
551 /*
552  * Trylock a page, and if successful, add the page to a collection.
553  * Returns true ("busy") if the page could not be locked; false otherwise.
554  */
555 static bool page_trylock_add(struct page_collection *set, tb_page_addr_t addr)
556 {
557     tb_page_addr_t index = addr >> TARGET_PAGE_BITS;
558     struct page_entry *pe;
559     PageDesc *pd;
560 
561     pe = q_tree_lookup(set->tree, &index);
562     if (pe) {
563         return false;
564     }
565 
566     pd = page_find(index);
567     if (pd == NULL) {
568         return false;
569     }
570 
571     pe = page_entry_new(pd, index);
572     q_tree_insert(set->tree, &pe->index, pe);
573 
574     /*
575      * If this is either (1) the first insertion or (2) a page whose index
576      * is higher than any other so far, just lock the page and move on.
577      */
578     if (set->max == NULL || pe->index > set->max->index) {
579         set->max = pe;
580         do_page_entry_lock(pe);
581         return false;
582     }
583     /*
584      * Try to acquire out-of-order lock; if busy, return busy so that we acquire
585      * locks in order.
586      */
587     return page_entry_trylock(pe);
588 }
589 
590 static gint tb_page_addr_cmp(gconstpointer ap, gconstpointer bp, gpointer udata)
591 {
592     tb_page_addr_t a = *(const tb_page_addr_t *)ap;
593     tb_page_addr_t b = *(const tb_page_addr_t *)bp;
594 
595     if (a == b) {
596         return 0;
597     } else if (a < b) {
598         return -1;
599     }
600     return 1;
601 }
602 
603 /*
604  * Lock a range of pages ([@start,@last]) as well as the pages of all
605  * intersecting TBs.
606  * Locking order: acquire locks in ascending order of page index.
607  */
608 static struct page_collection *page_collection_lock(tb_page_addr_t start,
609                                                     tb_page_addr_t last)
610 {
611     struct page_collection *set = g_malloc(sizeof(*set));
612     tb_page_addr_t index;
613     PageDesc *pd;
614 
615     start >>= TARGET_PAGE_BITS;
616     last >>= TARGET_PAGE_BITS;
617     g_assert(start <= last);
618 
619     set->tree = q_tree_new_full(tb_page_addr_cmp, NULL, NULL,
620                                 page_entry_destroy);
621     set->max = NULL;
622     assert_no_pages_locked();
623 
624  retry:
625     q_tree_foreach(set->tree, page_entry_lock, NULL);
626 
627     for (index = start; index <= last; index++) {
628         TranslationBlock *tb;
629         PageForEachNext n;
630 
631         pd = page_find(index);
632         if (pd == NULL) {
633             continue;
634         }
635         if (page_trylock_add(set, index << TARGET_PAGE_BITS)) {
636             q_tree_foreach(set->tree, page_entry_unlock, NULL);
637             goto retry;
638         }
639         assert_page_locked(pd);
640         PAGE_FOR_EACH_TB(unused, unused, pd, tb, n) {
641             if (page_trylock_add(set, tb_page_addr0(tb)) ||
642                 (tb_page_addr1(tb) != -1 &&
643                  page_trylock_add(set, tb_page_addr1(tb)))) {
644                 /* drop all locks, and reacquire in order */
645                 q_tree_foreach(set->tree, page_entry_unlock, NULL);
646                 goto retry;
647             }
648         }
649     }
650     return set;
651 }
652 
653 static void page_collection_unlock(struct page_collection *set)
654 {
655     /* entries are unlocked and freed via page_entry_destroy */
656     q_tree_destroy(set->tree);
657     g_free(set);
658 }
659 
660 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
661 static void tb_remove_all_1(int level, void **lp)
662 {
663     int i;
664 
665     if (*lp == NULL) {
666         return;
667     }
668     if (level == 0) {
669         PageDesc *pd = *lp;
670 
671         for (i = 0; i < V_L2_SIZE; ++i) {
672             page_lock(&pd[i]);
673             pd[i].first_tb = (uintptr_t)NULL;
674             page_unlock(&pd[i]);
675         }
676     } else {
677         void **pp = *lp;
678 
679         for (i = 0; i < V_L2_SIZE; ++i) {
680             tb_remove_all_1(level - 1, pp + i);
681         }
682     }
683 }
684 
685 static void tb_remove_all(void)
686 {
687     int i, l1_sz = v_l1_size;
688 
689     for (i = 0; i < l1_sz; i++) {
690         tb_remove_all_1(v_l2_levels, l1_map + i);
691     }
692 }
693 
694 /*
695  * Add the tb in the target page and protect it if necessary.
696  * Called with @p->lock held.
697  */
698 static void tb_page_add(PageDesc *p, TranslationBlock *tb, unsigned int n)
699 {
700     bool page_already_protected;
701 
702     assert_page_locked(p);
703 
704     tb->page_next[n] = p->first_tb;
705     page_already_protected = p->first_tb != 0;
706     p->first_tb = (uintptr_t)tb | n;
707 
708     /*
709      * If some code is already present, then the pages are already
710      * protected. So we handle the case where only the first TB is
711      * allocated in a physical page.
712      */
713     if (!page_already_protected) {
714         tlb_protect_code(tb->page_addr[n] & TARGET_PAGE_MASK);
715     }
716 }
717 
718 static void tb_record(TranslationBlock *tb)
719 {
720     tb_page_addr_t paddr0 = tb_page_addr0(tb);
721     tb_page_addr_t paddr1 = tb_page_addr1(tb);
722     tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
723     tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
724 
725     assert(paddr0 != -1);
726     if (unlikely(paddr1 != -1) && pindex0 != pindex1) {
727         tb_page_add(page_find_alloc(pindex1, false), tb, 1);
728     }
729     tb_page_add(page_find_alloc(pindex0, false), tb, 0);
730 }
731 
732 static void tb_page_remove(PageDesc *pd, TranslationBlock *tb)
733 {
734     TranslationBlock *tb1;
735     uintptr_t *pprev;
736     PageForEachNext n1;
737 
738     assert_page_locked(pd);
739     pprev = &pd->first_tb;
740     PAGE_FOR_EACH_TB(unused, unused, pd, tb1, n1) {
741         if (tb1 == tb) {
742             *pprev = tb1->page_next[n1];
743             return;
744         }
745         pprev = &tb1->page_next[n1];
746     }
747     g_assert_not_reached();
748 }
749 
750 static void tb_remove(TranslationBlock *tb)
751 {
752     tb_page_addr_t paddr0 = tb_page_addr0(tb);
753     tb_page_addr_t paddr1 = tb_page_addr1(tb);
754     tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
755     tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
756 
757     assert(paddr0 != -1);
758     if (unlikely(paddr1 != -1) && pindex0 != pindex1) {
759         tb_page_remove(page_find_alloc(pindex1, false), tb);
760     }
761     tb_page_remove(page_find_alloc(pindex0, false), tb);
762 }
763 #endif /* CONFIG_USER_ONLY */
764 
765 /* flush all the translation blocks */
766 static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
767 {
768     bool did_flush = false;
769 
770     mmap_lock();
771     /* If it is already been done on request of another CPU, just retry. */
772     if (tb_ctx.tb_flush_count != tb_flush_count.host_int) {
773         goto done;
774     }
775     did_flush = true;
776 
777     CPU_FOREACH(cpu) {
778         tcg_flush_jmp_cache(cpu);
779     }
780 
781     qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
782     tb_remove_all();
783 
784     tcg_region_reset_all();
785     /* XXX: flush processor icache at this point if cache flush is expensive */
786     qatomic_inc(&tb_ctx.tb_flush_count);
787 
788 done:
789     mmap_unlock();
790     if (did_flush) {
791         qemu_plugin_flush_cb();
792     }
793 }
794 
795 void tb_flush(CPUState *cpu)
796 {
797     if (tcg_enabled()) {
798         unsigned tb_flush_count = qatomic_read(&tb_ctx.tb_flush_count);
799 
800         if (cpu_in_serial_context(cpu)) {
801             do_tb_flush(cpu, RUN_ON_CPU_HOST_INT(tb_flush_count));
802         } else {
803             async_safe_run_on_cpu(cpu, do_tb_flush,
804                                   RUN_ON_CPU_HOST_INT(tb_flush_count));
805         }
806     }
807 }
808 
809 /* remove @orig from its @n_orig-th jump list */
810 static inline void tb_remove_from_jmp_list(TranslationBlock *orig, int n_orig)
811 {
812     uintptr_t ptr, ptr_locked;
813     TranslationBlock *dest;
814     TranslationBlock *tb;
815     uintptr_t *pprev;
816     int n;
817 
818     /* mark the LSB of jmp_dest[] so that no further jumps can be inserted */
819     ptr = qatomic_or_fetch(&orig->jmp_dest[n_orig], 1);
820     dest = (TranslationBlock *)(ptr & ~1);
821     if (dest == NULL) {
822         return;
823     }
824 
825     qemu_spin_lock(&dest->jmp_lock);
826     /*
827      * While acquiring the lock, the jump might have been removed if the
828      * destination TB was invalidated; check again.
829      */
830     ptr_locked = qatomic_read(&orig->jmp_dest[n_orig]);
831     if (ptr_locked != ptr) {
832         qemu_spin_unlock(&dest->jmp_lock);
833         /*
834          * The only possibility is that the jump was unlinked via
835          * tb_jump_unlink(dest). Seeing here another destination would be a bug,
836          * because we set the LSB above.
837          */
838         g_assert(ptr_locked == 1 && dest->cflags & CF_INVALID);
839         return;
840     }
841     /*
842      * We first acquired the lock, and since the destination pointer matches,
843      * we know for sure that @orig is in the jmp list.
844      */
845     pprev = &dest->jmp_list_head;
846     TB_FOR_EACH_JMP(dest, tb, n) {
847         if (tb == orig && n == n_orig) {
848             *pprev = tb->jmp_list_next[n];
849             /* no need to set orig->jmp_dest[n]; setting the LSB was enough */
850             qemu_spin_unlock(&dest->jmp_lock);
851             return;
852         }
853         pprev = &tb->jmp_list_next[n];
854     }
855     g_assert_not_reached();
856 }
857 
858 /*
859  * Reset the jump entry 'n' of a TB so that it is not chained to another TB.
860  */
861 void tb_reset_jump(TranslationBlock *tb, int n)
862 {
863     uintptr_t addr = (uintptr_t)(tb->tc.ptr + tb->jmp_reset_offset[n]);
864     tb_set_jmp_target(tb, n, addr);
865 }
866 
867 /* remove any jumps to the TB */
868 static inline void tb_jmp_unlink(TranslationBlock *dest)
869 {
870     TranslationBlock *tb;
871     int n;
872 
873     qemu_spin_lock(&dest->jmp_lock);
874 
875     TB_FOR_EACH_JMP(dest, tb, n) {
876         tb_reset_jump(tb, n);
877         qatomic_and(&tb->jmp_dest[n], (uintptr_t)NULL | 1);
878         /* No need to clear the list entry; setting the dest ptr is enough */
879     }
880     dest->jmp_list_head = (uintptr_t)NULL;
881 
882     qemu_spin_unlock(&dest->jmp_lock);
883 }
884 
885 static void tb_jmp_cache_inval_tb(TranslationBlock *tb)
886 {
887     CPUState *cpu;
888 
889     if (tb_cflags(tb) & CF_PCREL) {
890         /* A TB may be at any virtual address */
891         CPU_FOREACH(cpu) {
892             tcg_flush_jmp_cache(cpu);
893         }
894     } else {
895         uint32_t h = tb_jmp_cache_hash_func(tb->pc);
896 
897         CPU_FOREACH(cpu) {
898             CPUJumpCache *jc = cpu->tb_jmp_cache;
899 
900             if (qatomic_read(&jc->array[h].tb) == tb) {
901                 qatomic_set(&jc->array[h].tb, NULL);
902             }
903         }
904     }
905 }
906 
907 /*
908  * In user-mode, call with mmap_lock held.
909  * In !user-mode, if @rm_from_page_list is set, call with the TB's pages'
910  * locks held.
911  */
912 static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
913 {
914     uint32_t h;
915     tb_page_addr_t phys_pc;
916     uint32_t orig_cflags = tb_cflags(tb);
917 
918     assert_memory_lock();
919 
920     /* make sure no further incoming jumps will be chained to this TB */
921     qemu_spin_lock(&tb->jmp_lock);
922     qatomic_set(&tb->cflags, tb->cflags | CF_INVALID);
923     qemu_spin_unlock(&tb->jmp_lock);
924 
925     /* remove the TB from the hash list */
926     phys_pc = tb_page_addr0(tb);
927     h = tb_hash_func(phys_pc, (orig_cflags & CF_PCREL ? 0 : tb->pc),
928                      tb->flags, tb->cs_base, orig_cflags);
929     if (!qht_remove(&tb_ctx.htable, tb, h)) {
930         return;
931     }
932 
933     /* remove the TB from the page list */
934     if (rm_from_page_list) {
935         tb_remove(tb);
936     }
937 
938     /* remove the TB from the hash list */
939     tb_jmp_cache_inval_tb(tb);
940 
941     /* suppress this TB from the two jump lists */
942     tb_remove_from_jmp_list(tb, 0);
943     tb_remove_from_jmp_list(tb, 1);
944 
945     /* suppress any remaining jumps to this TB */
946     tb_jmp_unlink(tb);
947 
948     qatomic_set(&tb_ctx.tb_phys_invalidate_count,
949                 tb_ctx.tb_phys_invalidate_count + 1);
950 }
951 
952 static void tb_phys_invalidate__locked(TranslationBlock *tb)
953 {
954     qemu_thread_jit_write();
955     do_tb_phys_invalidate(tb, true);
956     qemu_thread_jit_execute();
957 }
958 
959 /*
960  * Invalidate one TB.
961  * Called with mmap_lock held in user-mode.
962  */
963 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
964 {
965     if (page_addr == -1 && tb_page_addr0(tb) != -1) {
966         tb_lock_pages(tb);
967         do_tb_phys_invalidate(tb, true);
968         tb_unlock_pages(tb);
969     } else {
970         do_tb_phys_invalidate(tb, false);
971     }
972 }
973 
974 /*
975  * Add a new TB and link it to the physical page tables.
976  * Called with mmap_lock held for user-mode emulation.
977  *
978  * Returns a pointer @tb, or a pointer to an existing TB that matches @tb.
979  * Note that in !user-mode, another thread might have already added a TB
980  * for the same block of guest code that @tb corresponds to. In that case,
981  * the caller should discard the original @tb, and use instead the returned TB.
982  */
983 TranslationBlock *tb_link_page(TranslationBlock *tb)
984 {
985     void *existing_tb = NULL;
986     uint32_t h;
987 
988     assert_memory_lock();
989     tcg_debug_assert(!(tb->cflags & CF_INVALID));
990 
991     tb_record(tb);
992 
993     /* add in the hash table */
994     h = tb_hash_func(tb_page_addr0(tb), (tb->cflags & CF_PCREL ? 0 : tb->pc),
995                      tb->flags, tb->cs_base, tb->cflags);
996     qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
997 
998     /* remove TB from the page(s) if we couldn't insert it */
999     if (unlikely(existing_tb)) {
1000         tb_remove(tb);
1001         tb_unlock_pages(tb);
1002         return existing_tb;
1003     }
1004 
1005     tb_unlock_pages(tb);
1006     return tb;
1007 }
1008 
1009 #ifdef CONFIG_USER_ONLY
1010 /*
1011  * Invalidate all TBs which intersect with the target address range.
1012  * Called with mmap_lock held for user-mode emulation.
1013  * NOTE: this function must not be called while a TB is running.
1014  */
1015 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t last)
1016 {
1017     TranslationBlock *tb;
1018     PageForEachNext n;
1019 
1020     assert_memory_lock();
1021 
1022     PAGE_FOR_EACH_TB(start, last, unused, tb, n) {
1023         tb_phys_invalidate__locked(tb);
1024     }
1025 }
1026 
1027 /*
1028  * Invalidate all TBs which intersect with the target address page @addr.
1029  * Called with mmap_lock held for user-mode emulation
1030  * NOTE: this function must not be called while a TB is running.
1031  */
1032 static void tb_invalidate_phys_page(tb_page_addr_t addr)
1033 {
1034     tb_page_addr_t start, last;
1035 
1036     start = addr & TARGET_PAGE_MASK;
1037     last = addr | ~TARGET_PAGE_MASK;
1038     tb_invalidate_phys_range(start, last);
1039 }
1040 
1041 /*
1042  * Called with mmap_lock held. If pc is not 0 then it indicates the
1043  * host PC of the faulting store instruction that caused this invalidate.
1044  * Returns true if the caller needs to abort execution of the current
1045  * TB (because it was modified by this store and the guest CPU has
1046  * precise-SMC semantics).
1047  */
1048 bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc)
1049 {
1050     TranslationBlock *current_tb;
1051     bool current_tb_modified;
1052     TranslationBlock *tb;
1053     PageForEachNext n;
1054     tb_page_addr_t last;
1055 
1056     /*
1057      * Without precise smc semantics, or when outside of a TB,
1058      * we can skip to invalidate.
1059      */
1060 #ifndef TARGET_HAS_PRECISE_SMC
1061     pc = 0;
1062 #endif
1063     if (!pc) {
1064         tb_invalidate_phys_page(addr);
1065         return false;
1066     }
1067 
1068     assert_memory_lock();
1069     current_tb = tcg_tb_lookup(pc);
1070 
1071     last = addr | ~TARGET_PAGE_MASK;
1072     addr &= TARGET_PAGE_MASK;
1073     current_tb_modified = false;
1074 
1075     PAGE_FOR_EACH_TB(addr, last, unused, tb, n) {
1076         if (current_tb == tb &&
1077             (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
1078             /*
1079              * If we are modifying the current TB, we must stop its
1080              * execution. We could be more precise by checking that
1081              * the modification is after the current PC, but it would
1082              * require a specialized function to partially restore
1083              * the CPU state.
1084              */
1085             current_tb_modified = true;
1086             cpu_restore_state_from_tb(current_cpu, current_tb, pc);
1087         }
1088         tb_phys_invalidate__locked(tb);
1089     }
1090 
1091     if (current_tb_modified) {
1092         /* Force execution of one insn next time.  */
1093         CPUState *cpu = current_cpu;
1094         cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(current_cpu);
1095         return true;
1096     }
1097     return false;
1098 }
1099 #else
1100 /*
1101  * @p must be non-NULL.
1102  * Call with all @pages locked.
1103  */
1104 static void
1105 tb_invalidate_phys_page_range__locked(struct page_collection *pages,
1106                                       PageDesc *p, tb_page_addr_t start,
1107                                       tb_page_addr_t last,
1108                                       uintptr_t retaddr)
1109 {
1110     TranslationBlock *tb;
1111     PageForEachNext n;
1112 #ifdef TARGET_HAS_PRECISE_SMC
1113     bool current_tb_modified = false;
1114     TranslationBlock *current_tb = retaddr ? tcg_tb_lookup(retaddr) : NULL;
1115 #endif /* TARGET_HAS_PRECISE_SMC */
1116 
1117     /* Range may not cross a page. */
1118     tcg_debug_assert(((start ^ last) & TARGET_PAGE_MASK) == 0);
1119 
1120     /*
1121      * We remove all the TBs in the range [start, last].
1122      * XXX: see if in some cases it could be faster to invalidate all the code
1123      */
1124     PAGE_FOR_EACH_TB(start, last, p, tb, n) {
1125         tb_page_addr_t tb_start, tb_last;
1126 
1127         /* NOTE: this is subtle as a TB may span two physical pages */
1128         tb_start = tb_page_addr0(tb);
1129         tb_last = tb_start + tb->size - 1;
1130         if (n == 0) {
1131             tb_last = MIN(tb_last, tb_start | ~TARGET_PAGE_MASK);
1132         } else {
1133             tb_start = tb_page_addr1(tb);
1134             tb_last = tb_start + (tb_last & ~TARGET_PAGE_MASK);
1135         }
1136         if (!(tb_last < start || tb_start > last)) {
1137 #ifdef TARGET_HAS_PRECISE_SMC
1138             if (current_tb == tb &&
1139                 (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
1140                 /*
1141                  * If we are modifying the current TB, we must stop
1142                  * its execution. We could be more precise by checking
1143                  * that the modification is after the current PC, but it
1144                  * would require a specialized function to partially
1145                  * restore the CPU state.
1146                  */
1147                 current_tb_modified = true;
1148                 cpu_restore_state_from_tb(current_cpu, current_tb, retaddr);
1149             }
1150 #endif /* TARGET_HAS_PRECISE_SMC */
1151             tb_phys_invalidate__locked(tb);
1152         }
1153     }
1154 
1155     /* if no code remaining, no need to continue to use slow writes */
1156     if (!p->first_tb) {
1157         tlb_unprotect_code(start);
1158     }
1159 
1160 #ifdef TARGET_HAS_PRECISE_SMC
1161     if (current_tb_modified) {
1162         page_collection_unlock(pages);
1163         /* Force execution of one insn next time.  */
1164         current_cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(current_cpu);
1165         mmap_unlock();
1166         cpu_loop_exit_noexc(current_cpu);
1167     }
1168 #endif
1169 }
1170 
1171 /*
1172  * Invalidate all TBs which intersect with the target physical address range
1173  * [start;last]. NOTE: start and end may refer to *different* physical pages.
1174  * 'is_cpu_write_access' should be true if called from a real cpu write
1175  * access: the virtual CPU will exit the current TB if code is modified inside
1176  * this TB.
1177  */
1178 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t last)
1179 {
1180     struct page_collection *pages;
1181     tb_page_addr_t index, index_last;
1182 
1183     pages = page_collection_lock(start, last);
1184 
1185     index_last = last >> TARGET_PAGE_BITS;
1186     for (index = start >> TARGET_PAGE_BITS; index <= index_last; index++) {
1187         PageDesc *pd = page_find(index);
1188         tb_page_addr_t page_start, page_last;
1189 
1190         if (pd == NULL) {
1191             continue;
1192         }
1193         assert_page_locked(pd);
1194         page_start = index << TARGET_PAGE_BITS;
1195         page_last = page_start | ~TARGET_PAGE_MASK;
1196         page_last = MIN(page_last, last);
1197         tb_invalidate_phys_page_range__locked(pages, pd,
1198                                               page_start, page_last, 0);
1199     }
1200     page_collection_unlock(pages);
1201 }
1202 
1203 /*
1204  * Call with all @pages in the range [@start, @start + len[ locked.
1205  */
1206 static void tb_invalidate_phys_page_fast__locked(struct page_collection *pages,
1207                                                  tb_page_addr_t start,
1208                                                  unsigned len, uintptr_t ra)
1209 {
1210     PageDesc *p;
1211 
1212     p = page_find(start >> TARGET_PAGE_BITS);
1213     if (!p) {
1214         return;
1215     }
1216 
1217     assert_page_locked(p);
1218     tb_invalidate_phys_page_range__locked(pages, p, start, start + len - 1, ra);
1219 }
1220 
1221 /*
1222  * len must be <= 8 and start must be a multiple of len.
1223  * Called via softmmu_template.h when code areas are written to with
1224  * iothread mutex not held.
1225  */
1226 void tb_invalidate_phys_range_fast(ram_addr_t ram_addr,
1227                                    unsigned size,
1228                                    uintptr_t retaddr)
1229 {
1230     struct page_collection *pages;
1231 
1232     pages = page_collection_lock(ram_addr, ram_addr + size - 1);
1233     tb_invalidate_phys_page_fast__locked(pages, ram_addr, size, retaddr);
1234     page_collection_unlock(pages);
1235 }
1236 
1237 #endif /* CONFIG_USER_ONLY */
1238