xref: /qemu/accel/tcg/tb-maint.c (revision b103cc6e74ac92f070a0e004bd84334e845c20b5)
1 /*
2  * Translation Block Maintenance
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "qemu/interval-tree.h"
22 #include "qemu/qtree.h"
23 #include "exec/cputlb.h"
24 #include "exec/log.h"
25 #include "exec/exec-all.h"
26 #include "exec/page-protection.h"
27 #include "exec/mmap-lock.h"
28 #include "exec/tb-flush.h"
29 #include "tb-internal.h"
30 #include "system/tcg.h"
31 #include "tcg/tcg.h"
32 #include "tb-hash.h"
33 #include "tb-context.h"
34 #include "tb-internal.h"
35 #include "internal-common.h"
36 #include "internal-target.h"
37 #ifdef CONFIG_USER_ONLY
38 #include "user/page-protection.h"
39 #endif
40 
41 
42 /* List iterators for lists of tagged pointers in TranslationBlock. */
43 #define TB_FOR_EACH_TAGGED(head, tb, n, field)                          \
44     for (n = (head) & 1, tb = (TranslationBlock *)((head) & ~1);        \
45          tb; tb = (TranslationBlock *)tb->field[n], n = (uintptr_t)tb & 1, \
46              tb = (TranslationBlock *)((uintptr_t)tb & ~1))
47 
48 #define TB_FOR_EACH_JMP(head_tb, tb, n)                                 \
49     TB_FOR_EACH_TAGGED((head_tb)->jmp_list_head, tb, n, jmp_list_next)
50 
51 static bool tb_cmp(const void *ap, const void *bp)
52 {
53     const TranslationBlock *a = ap;
54     const TranslationBlock *b = bp;
55 
56     return ((tb_cflags(a) & CF_PCREL || a->pc == b->pc) &&
57             a->cs_base == b->cs_base &&
58             a->flags == b->flags &&
59             (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
60             tb_page_addr0(a) == tb_page_addr0(b) &&
61             tb_page_addr1(a) == tb_page_addr1(b));
62 }
63 
64 void tb_htable_init(void)
65 {
66     unsigned int mode = QHT_MODE_AUTO_RESIZE;
67 
68     qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
69 }
70 
71 typedef struct PageDesc PageDesc;
72 
73 #ifdef CONFIG_USER_ONLY
74 
75 /*
76  * In user-mode page locks aren't used; mmap_lock is enough.
77  */
78 #define assert_page_locked(pd) tcg_debug_assert(have_mmap_lock())
79 
80 static inline void tb_lock_pages(const TranslationBlock *tb) { }
81 
82 /*
83  * For user-only, since we are protecting all of memory with a single lock,
84  * and because the two pages of a TranslationBlock are always contiguous,
85  * use a single data structure to record all TranslationBlocks.
86  */
87 static IntervalTreeRoot tb_root;
88 
89 static void tb_remove_all(void)
90 {
91     assert_memory_lock();
92     memset(&tb_root, 0, sizeof(tb_root));
93 }
94 
95 /* Call with mmap_lock held. */
96 static void tb_record(TranslationBlock *tb)
97 {
98     vaddr addr;
99     int flags;
100 
101     assert_memory_lock();
102     tb->itree.last = tb->itree.start + tb->size - 1;
103 
104     /* translator_loop() must have made all TB pages non-writable */
105     addr = tb_page_addr0(tb);
106     flags = page_get_flags(addr);
107     assert(!(flags & PAGE_WRITE));
108 
109     addr = tb_page_addr1(tb);
110     if (addr != -1) {
111         flags = page_get_flags(addr);
112         assert(!(flags & PAGE_WRITE));
113     }
114 
115     interval_tree_insert(&tb->itree, &tb_root);
116 }
117 
118 /* Call with mmap_lock held. */
119 static void tb_remove(TranslationBlock *tb)
120 {
121     assert_memory_lock();
122     interval_tree_remove(&tb->itree, &tb_root);
123 }
124 
125 /* TODO: For now, still shared with translate-all.c for system mode. */
126 #define PAGE_FOR_EACH_TB(start, last, pagedesc, T, N)   \
127     for (T = foreach_tb_first(start, last),             \
128          N = foreach_tb_next(T, start, last);           \
129          T != NULL;                                     \
130          T = N, N = foreach_tb_next(N, start, last))
131 
132 typedef TranslationBlock *PageForEachNext;
133 
134 static PageForEachNext foreach_tb_first(tb_page_addr_t start,
135                                         tb_page_addr_t last)
136 {
137     IntervalTreeNode *n = interval_tree_iter_first(&tb_root, start, last);
138     return n ? container_of(n, TranslationBlock, itree) : NULL;
139 }
140 
141 static PageForEachNext foreach_tb_next(PageForEachNext tb,
142                                        tb_page_addr_t start,
143                                        tb_page_addr_t last)
144 {
145     IntervalTreeNode *n;
146 
147     if (tb) {
148         n = interval_tree_iter_next(&tb->itree, start, last);
149         if (n) {
150             return container_of(n, TranslationBlock, itree);
151         }
152     }
153     return NULL;
154 }
155 
156 #else
157 /*
158  * In system mode we want L1_MAP to be based on ram offsets.
159  */
160 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
161 # define L1_MAP_ADDR_SPACE_BITS  HOST_LONG_BITS
162 #else
163 # define L1_MAP_ADDR_SPACE_BITS  TARGET_PHYS_ADDR_SPACE_BITS
164 #endif
165 
166 /* Size of the L2 (and L3, etc) page tables.  */
167 #define V_L2_BITS 10
168 #define V_L2_SIZE (1 << V_L2_BITS)
169 
170 /*
171  * L1 Mapping properties
172  */
173 static int v_l1_size;
174 static int v_l1_shift;
175 static int v_l2_levels;
176 
177 /*
178  * The bottom level has pointers to PageDesc, and is indexed by
179  * anything from 4 to (V_L2_BITS + 3) bits, depending on target page size.
180  */
181 #define V_L1_MIN_BITS 4
182 #define V_L1_MAX_BITS (V_L2_BITS + 3)
183 #define V_L1_MAX_SIZE (1 << V_L1_MAX_BITS)
184 
185 static void *l1_map[V_L1_MAX_SIZE];
186 
187 struct PageDesc {
188     QemuSpin lock;
189     /* list of TBs intersecting this ram page */
190     uintptr_t first_tb;
191 };
192 
193 void page_table_config_init(void)
194 {
195     uint32_t v_l1_bits;
196 
197     assert(TARGET_PAGE_BITS);
198     /* The bits remaining after N lower levels of page tables.  */
199     v_l1_bits = (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS;
200     if (v_l1_bits < V_L1_MIN_BITS) {
201         v_l1_bits += V_L2_BITS;
202     }
203 
204     v_l1_size = 1 << v_l1_bits;
205     v_l1_shift = L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - v_l1_bits;
206     v_l2_levels = v_l1_shift / V_L2_BITS - 1;
207 
208     assert(v_l1_bits <= V_L1_MAX_BITS);
209     assert(v_l1_shift % V_L2_BITS == 0);
210     assert(v_l2_levels >= 0);
211 }
212 
213 static PageDesc *page_find_alloc(tb_page_addr_t index, bool alloc)
214 {
215     PageDesc *pd;
216     void **lp;
217 
218     /* Level 1.  Always allocated.  */
219     lp = l1_map + ((index >> v_l1_shift) & (v_l1_size - 1));
220 
221     /* Level 2..N-1.  */
222     for (int i = v_l2_levels; i > 0; i--) {
223         void **p = qatomic_rcu_read(lp);
224 
225         if (p == NULL) {
226             void *existing;
227 
228             if (!alloc) {
229                 return NULL;
230             }
231             p = g_new0(void *, V_L2_SIZE);
232             existing = qatomic_cmpxchg(lp, NULL, p);
233             if (unlikely(existing)) {
234                 g_free(p);
235                 p = existing;
236             }
237         }
238 
239         lp = p + ((index >> (i * V_L2_BITS)) & (V_L2_SIZE - 1));
240     }
241 
242     pd = qatomic_rcu_read(lp);
243     if (pd == NULL) {
244         void *existing;
245 
246         if (!alloc) {
247             return NULL;
248         }
249 
250         pd = g_new0(PageDesc, V_L2_SIZE);
251         for (int i = 0; i < V_L2_SIZE; i++) {
252             qemu_spin_init(&pd[i].lock);
253         }
254 
255         existing = qatomic_cmpxchg(lp, NULL, pd);
256         if (unlikely(existing)) {
257             for (int i = 0; i < V_L2_SIZE; i++) {
258                 qemu_spin_destroy(&pd[i].lock);
259             }
260             g_free(pd);
261             pd = existing;
262         }
263     }
264 
265     return pd + (index & (V_L2_SIZE - 1));
266 }
267 
268 static inline PageDesc *page_find(tb_page_addr_t index)
269 {
270     return page_find_alloc(index, false);
271 }
272 
273 /**
274  * struct page_entry - page descriptor entry
275  * @pd:     pointer to the &struct PageDesc of the page this entry represents
276  * @index:  page index of the page
277  * @locked: whether the page is locked
278  *
279  * This struct helps us keep track of the locked state of a page, without
280  * bloating &struct PageDesc.
281  *
282  * A page lock protects accesses to all fields of &struct PageDesc.
283  *
284  * See also: &struct page_collection.
285  */
286 struct page_entry {
287     PageDesc *pd;
288     tb_page_addr_t index;
289     bool locked;
290 };
291 
292 /**
293  * struct page_collection - tracks a set of pages (i.e. &struct page_entry's)
294  * @tree:   Binary search tree (BST) of the pages, with key == page index
295  * @max:    Pointer to the page in @tree with the highest page index
296  *
297  * To avoid deadlock we lock pages in ascending order of page index.
298  * When operating on a set of pages, we need to keep track of them so that
299  * we can lock them in order and also unlock them later. For this we collect
300  * pages (i.e. &struct page_entry's) in a binary search @tree. Given that the
301  * @tree implementation we use does not provide an O(1) operation to obtain the
302  * highest-ranked element, we use @max to keep track of the inserted page
303  * with the highest index. This is valuable because if a page is not in
304  * the tree and its index is higher than @max's, then we can lock it
305  * without breaking the locking order rule.
306  *
307  * Note on naming: 'struct page_set' would be shorter, but we already have a few
308  * page_set_*() helpers, so page_collection is used instead to avoid confusion.
309  *
310  * See also: page_collection_lock().
311  */
312 struct page_collection {
313     QTree *tree;
314     struct page_entry *max;
315 };
316 
317 typedef int PageForEachNext;
318 #define PAGE_FOR_EACH_TB(start, last, pagedesc, tb, n) \
319     TB_FOR_EACH_TAGGED((pagedesc)->first_tb, tb, n, page_next)
320 
321 #ifdef CONFIG_DEBUG_TCG
322 
323 static __thread GHashTable *ht_pages_locked_debug;
324 
325 static void ht_pages_locked_debug_init(void)
326 {
327     if (ht_pages_locked_debug) {
328         return;
329     }
330     ht_pages_locked_debug = g_hash_table_new(NULL, NULL);
331 }
332 
333 static bool page_is_locked(const PageDesc *pd)
334 {
335     PageDesc *found;
336 
337     ht_pages_locked_debug_init();
338     found = g_hash_table_lookup(ht_pages_locked_debug, pd);
339     return !!found;
340 }
341 
342 static void page_lock__debug(PageDesc *pd)
343 {
344     ht_pages_locked_debug_init();
345     g_assert(!page_is_locked(pd));
346     g_hash_table_insert(ht_pages_locked_debug, pd, pd);
347 }
348 
349 static void page_unlock__debug(const PageDesc *pd)
350 {
351     bool removed;
352 
353     ht_pages_locked_debug_init();
354     g_assert(page_is_locked(pd));
355     removed = g_hash_table_remove(ht_pages_locked_debug, pd);
356     g_assert(removed);
357 }
358 
359 static void do_assert_page_locked(const PageDesc *pd,
360                                   const char *file, int line)
361 {
362     if (unlikely(!page_is_locked(pd))) {
363         error_report("assert_page_lock: PageDesc %p not locked @ %s:%d",
364                      pd, file, line);
365         abort();
366     }
367 }
368 #define assert_page_locked(pd) do_assert_page_locked(pd, __FILE__, __LINE__)
369 
370 void assert_no_pages_locked(void)
371 {
372     ht_pages_locked_debug_init();
373     g_assert(g_hash_table_size(ht_pages_locked_debug) == 0);
374 }
375 
376 #else /* !CONFIG_DEBUG_TCG */
377 
378 static inline void page_lock__debug(const PageDesc *pd) { }
379 static inline void page_unlock__debug(const PageDesc *pd) { }
380 static inline void assert_page_locked(const PageDesc *pd) { }
381 
382 #endif /* CONFIG_DEBUG_TCG */
383 
384 static void page_lock(PageDesc *pd)
385 {
386     page_lock__debug(pd);
387     qemu_spin_lock(&pd->lock);
388 }
389 
390 /* Like qemu_spin_trylock, returns false on success */
391 static bool page_trylock(PageDesc *pd)
392 {
393     bool busy = qemu_spin_trylock(&pd->lock);
394     if (!busy) {
395         page_lock__debug(pd);
396     }
397     return busy;
398 }
399 
400 static void page_unlock(PageDesc *pd)
401 {
402     qemu_spin_unlock(&pd->lock);
403     page_unlock__debug(pd);
404 }
405 
406 void tb_lock_page0(tb_page_addr_t paddr)
407 {
408     page_lock(page_find_alloc(paddr >> TARGET_PAGE_BITS, true));
409 }
410 
411 void tb_lock_page1(tb_page_addr_t paddr0, tb_page_addr_t paddr1)
412 {
413     tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
414     tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
415     PageDesc *pd0, *pd1;
416 
417     if (pindex0 == pindex1) {
418         /* Identical pages, and the first page is already locked. */
419         return;
420     }
421 
422     pd1 = page_find_alloc(pindex1, true);
423     if (pindex0 < pindex1) {
424         /* Correct locking order, we may block. */
425         page_lock(pd1);
426         return;
427     }
428 
429     /* Incorrect locking order, we cannot block lest we deadlock. */
430     if (!page_trylock(pd1)) {
431         return;
432     }
433 
434     /*
435      * Drop the lock on page0 and get both page locks in the right order.
436      * Restart translation via longjmp.
437      */
438     pd0 = page_find_alloc(pindex0, false);
439     page_unlock(pd0);
440     page_lock(pd1);
441     page_lock(pd0);
442     siglongjmp(tcg_ctx->jmp_trans, -3);
443 }
444 
445 void tb_unlock_page1(tb_page_addr_t paddr0, tb_page_addr_t paddr1)
446 {
447     tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
448     tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
449 
450     if (pindex0 != pindex1) {
451         page_unlock(page_find_alloc(pindex1, false));
452     }
453 }
454 
455 static void tb_lock_pages(TranslationBlock *tb)
456 {
457     tb_page_addr_t paddr0 = tb_page_addr0(tb);
458     tb_page_addr_t paddr1 = tb_page_addr1(tb);
459     tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
460     tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
461 
462     if (unlikely(paddr0 == -1)) {
463         return;
464     }
465     if (unlikely(paddr1 != -1) && pindex0 != pindex1) {
466         if (pindex0 < pindex1) {
467             page_lock(page_find_alloc(pindex0, true));
468             page_lock(page_find_alloc(pindex1, true));
469             return;
470         }
471         page_lock(page_find_alloc(pindex1, true));
472     }
473     page_lock(page_find_alloc(pindex0, true));
474 }
475 
476 void tb_unlock_pages(TranslationBlock *tb)
477 {
478     tb_page_addr_t paddr0 = tb_page_addr0(tb);
479     tb_page_addr_t paddr1 = tb_page_addr1(tb);
480     tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
481     tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
482 
483     if (unlikely(paddr0 == -1)) {
484         return;
485     }
486     if (unlikely(paddr1 != -1) && pindex0 != pindex1) {
487         page_unlock(page_find_alloc(pindex1, false));
488     }
489     page_unlock(page_find_alloc(pindex0, false));
490 }
491 
492 static inline struct page_entry *
493 page_entry_new(PageDesc *pd, tb_page_addr_t index)
494 {
495     struct page_entry *pe = g_malloc(sizeof(*pe));
496 
497     pe->index = index;
498     pe->pd = pd;
499     pe->locked = false;
500     return pe;
501 }
502 
503 static void page_entry_destroy(gpointer p)
504 {
505     struct page_entry *pe = p;
506 
507     g_assert(pe->locked);
508     page_unlock(pe->pd);
509     g_free(pe);
510 }
511 
512 /* returns false on success */
513 static bool page_entry_trylock(struct page_entry *pe)
514 {
515     bool busy = page_trylock(pe->pd);
516     if (!busy) {
517         g_assert(!pe->locked);
518         pe->locked = true;
519     }
520     return busy;
521 }
522 
523 static void do_page_entry_lock(struct page_entry *pe)
524 {
525     page_lock(pe->pd);
526     g_assert(!pe->locked);
527     pe->locked = true;
528 }
529 
530 static gboolean page_entry_lock(gpointer key, gpointer value, gpointer data)
531 {
532     struct page_entry *pe = value;
533 
534     do_page_entry_lock(pe);
535     return FALSE;
536 }
537 
538 static gboolean page_entry_unlock(gpointer key, gpointer value, gpointer data)
539 {
540     struct page_entry *pe = value;
541 
542     if (pe->locked) {
543         pe->locked = false;
544         page_unlock(pe->pd);
545     }
546     return FALSE;
547 }
548 
549 /*
550  * Trylock a page, and if successful, add the page to a collection.
551  * Returns true ("busy") if the page could not be locked; false otherwise.
552  */
553 static bool page_trylock_add(struct page_collection *set, tb_page_addr_t addr)
554 {
555     tb_page_addr_t index = addr >> TARGET_PAGE_BITS;
556     struct page_entry *pe;
557     PageDesc *pd;
558 
559     pe = q_tree_lookup(set->tree, &index);
560     if (pe) {
561         return false;
562     }
563 
564     pd = page_find(index);
565     if (pd == NULL) {
566         return false;
567     }
568 
569     pe = page_entry_new(pd, index);
570     q_tree_insert(set->tree, &pe->index, pe);
571 
572     /*
573      * If this is either (1) the first insertion or (2) a page whose index
574      * is higher than any other so far, just lock the page and move on.
575      */
576     if (set->max == NULL || pe->index > set->max->index) {
577         set->max = pe;
578         do_page_entry_lock(pe);
579         return false;
580     }
581     /*
582      * Try to acquire out-of-order lock; if busy, return busy so that we acquire
583      * locks in order.
584      */
585     return page_entry_trylock(pe);
586 }
587 
588 static gint tb_page_addr_cmp(gconstpointer ap, gconstpointer bp, gpointer udata)
589 {
590     tb_page_addr_t a = *(const tb_page_addr_t *)ap;
591     tb_page_addr_t b = *(const tb_page_addr_t *)bp;
592 
593     if (a == b) {
594         return 0;
595     } else if (a < b) {
596         return -1;
597     }
598     return 1;
599 }
600 
601 /*
602  * Lock a range of pages ([@start,@last]) as well as the pages of all
603  * intersecting TBs.
604  * Locking order: acquire locks in ascending order of page index.
605  */
606 static struct page_collection *page_collection_lock(tb_page_addr_t start,
607                                                     tb_page_addr_t last)
608 {
609     struct page_collection *set = g_malloc(sizeof(*set));
610     tb_page_addr_t index;
611     PageDesc *pd;
612 
613     start >>= TARGET_PAGE_BITS;
614     last >>= TARGET_PAGE_BITS;
615     g_assert(start <= last);
616 
617     set->tree = q_tree_new_full(tb_page_addr_cmp, NULL, NULL,
618                                 page_entry_destroy);
619     set->max = NULL;
620     assert_no_pages_locked();
621 
622  retry:
623     q_tree_foreach(set->tree, page_entry_lock, NULL);
624 
625     for (index = start; index <= last; index++) {
626         TranslationBlock *tb;
627         PageForEachNext n;
628 
629         pd = page_find(index);
630         if (pd == NULL) {
631             continue;
632         }
633         if (page_trylock_add(set, index << TARGET_PAGE_BITS)) {
634             q_tree_foreach(set->tree, page_entry_unlock, NULL);
635             goto retry;
636         }
637         assert_page_locked(pd);
638         PAGE_FOR_EACH_TB(unused, unused, pd, tb, n) {
639             if (page_trylock_add(set, tb_page_addr0(tb)) ||
640                 (tb_page_addr1(tb) != -1 &&
641                  page_trylock_add(set, tb_page_addr1(tb)))) {
642                 /* drop all locks, and reacquire in order */
643                 q_tree_foreach(set->tree, page_entry_unlock, NULL);
644                 goto retry;
645             }
646         }
647     }
648     return set;
649 }
650 
651 static void page_collection_unlock(struct page_collection *set)
652 {
653     /* entries are unlocked and freed via page_entry_destroy */
654     q_tree_destroy(set->tree);
655     g_free(set);
656 }
657 
658 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
659 static void tb_remove_all_1(int level, void **lp)
660 {
661     int i;
662 
663     if (*lp == NULL) {
664         return;
665     }
666     if (level == 0) {
667         PageDesc *pd = *lp;
668 
669         for (i = 0; i < V_L2_SIZE; ++i) {
670             page_lock(&pd[i]);
671             pd[i].first_tb = (uintptr_t)NULL;
672             page_unlock(&pd[i]);
673         }
674     } else {
675         void **pp = *lp;
676 
677         for (i = 0; i < V_L2_SIZE; ++i) {
678             tb_remove_all_1(level - 1, pp + i);
679         }
680     }
681 }
682 
683 static void tb_remove_all(void)
684 {
685     int i, l1_sz = v_l1_size;
686 
687     for (i = 0; i < l1_sz; i++) {
688         tb_remove_all_1(v_l2_levels, l1_map + i);
689     }
690 }
691 
692 /*
693  * Add the tb in the target page and protect it if necessary.
694  * Called with @p->lock held.
695  */
696 static void tb_page_add(PageDesc *p, TranslationBlock *tb, unsigned int n)
697 {
698     bool page_already_protected;
699 
700     assert_page_locked(p);
701 
702     tb->page_next[n] = p->first_tb;
703     page_already_protected = p->first_tb != 0;
704     p->first_tb = (uintptr_t)tb | n;
705 
706     /*
707      * If some code is already present, then the pages are already
708      * protected. So we handle the case where only the first TB is
709      * allocated in a physical page.
710      */
711     if (!page_already_protected) {
712         tlb_protect_code(tb->page_addr[n] & TARGET_PAGE_MASK);
713     }
714 }
715 
716 static void tb_record(TranslationBlock *tb)
717 {
718     tb_page_addr_t paddr0 = tb_page_addr0(tb);
719     tb_page_addr_t paddr1 = tb_page_addr1(tb);
720     tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
721     tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
722 
723     assert(paddr0 != -1);
724     if (unlikely(paddr1 != -1) && pindex0 != pindex1) {
725         tb_page_add(page_find_alloc(pindex1, false), tb, 1);
726     }
727     tb_page_add(page_find_alloc(pindex0, false), tb, 0);
728 }
729 
730 static void tb_page_remove(PageDesc *pd, TranslationBlock *tb)
731 {
732     TranslationBlock *tb1;
733     uintptr_t *pprev;
734     PageForEachNext n1;
735 
736     assert_page_locked(pd);
737     pprev = &pd->first_tb;
738     PAGE_FOR_EACH_TB(unused, unused, pd, tb1, n1) {
739         if (tb1 == tb) {
740             *pprev = tb1->page_next[n1];
741             return;
742         }
743         pprev = &tb1->page_next[n1];
744     }
745     g_assert_not_reached();
746 }
747 
748 static void tb_remove(TranslationBlock *tb)
749 {
750     tb_page_addr_t paddr0 = tb_page_addr0(tb);
751     tb_page_addr_t paddr1 = tb_page_addr1(tb);
752     tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
753     tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
754 
755     assert(paddr0 != -1);
756     if (unlikely(paddr1 != -1) && pindex0 != pindex1) {
757         tb_page_remove(page_find_alloc(pindex1, false), tb);
758     }
759     tb_page_remove(page_find_alloc(pindex0, false), tb);
760 }
761 #endif /* CONFIG_USER_ONLY */
762 
763 /* flush all the translation blocks */
764 static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
765 {
766     bool did_flush = false;
767 
768     mmap_lock();
769     /* If it is already been done on request of another CPU, just retry. */
770     if (tb_ctx.tb_flush_count != tb_flush_count.host_int) {
771         goto done;
772     }
773     did_flush = true;
774 
775     CPU_FOREACH(cpu) {
776         tcg_flush_jmp_cache(cpu);
777     }
778 
779     qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
780     tb_remove_all();
781 
782     tcg_region_reset_all();
783     /* XXX: flush processor icache at this point if cache flush is expensive */
784     qatomic_inc(&tb_ctx.tb_flush_count);
785 
786 done:
787     mmap_unlock();
788     if (did_flush) {
789         qemu_plugin_flush_cb();
790     }
791 }
792 
793 void tb_flush(CPUState *cpu)
794 {
795     if (tcg_enabled()) {
796         unsigned tb_flush_count = qatomic_read(&tb_ctx.tb_flush_count);
797 
798         if (cpu_in_serial_context(cpu)) {
799             do_tb_flush(cpu, RUN_ON_CPU_HOST_INT(tb_flush_count));
800         } else {
801             async_safe_run_on_cpu(cpu, do_tb_flush,
802                                   RUN_ON_CPU_HOST_INT(tb_flush_count));
803         }
804     }
805 }
806 
807 /* remove @orig from its @n_orig-th jump list */
808 static inline void tb_remove_from_jmp_list(TranslationBlock *orig, int n_orig)
809 {
810     uintptr_t ptr, ptr_locked;
811     TranslationBlock *dest;
812     TranslationBlock *tb;
813     uintptr_t *pprev;
814     int n;
815 
816     /* mark the LSB of jmp_dest[] so that no further jumps can be inserted */
817     ptr = qatomic_or_fetch(&orig->jmp_dest[n_orig], 1);
818     dest = (TranslationBlock *)(ptr & ~1);
819     if (dest == NULL) {
820         return;
821     }
822 
823     qemu_spin_lock(&dest->jmp_lock);
824     /*
825      * While acquiring the lock, the jump might have been removed if the
826      * destination TB was invalidated; check again.
827      */
828     ptr_locked = qatomic_read(&orig->jmp_dest[n_orig]);
829     if (ptr_locked != ptr) {
830         qemu_spin_unlock(&dest->jmp_lock);
831         /*
832          * The only possibility is that the jump was unlinked via
833          * tb_jump_unlink(dest). Seeing here another destination would be a bug,
834          * because we set the LSB above.
835          */
836         g_assert(ptr_locked == 1 && dest->cflags & CF_INVALID);
837         return;
838     }
839     /*
840      * We first acquired the lock, and since the destination pointer matches,
841      * we know for sure that @orig is in the jmp list.
842      */
843     pprev = &dest->jmp_list_head;
844     TB_FOR_EACH_JMP(dest, tb, n) {
845         if (tb == orig && n == n_orig) {
846             *pprev = tb->jmp_list_next[n];
847             /* no need to set orig->jmp_dest[n]; setting the LSB was enough */
848             qemu_spin_unlock(&dest->jmp_lock);
849             return;
850         }
851         pprev = &tb->jmp_list_next[n];
852     }
853     g_assert_not_reached();
854 }
855 
856 /*
857  * Reset the jump entry 'n' of a TB so that it is not chained to another TB.
858  */
859 void tb_reset_jump(TranslationBlock *tb, int n)
860 {
861     uintptr_t addr = (uintptr_t)(tb->tc.ptr + tb->jmp_reset_offset[n]);
862     tb_set_jmp_target(tb, n, addr);
863 }
864 
865 /* remove any jumps to the TB */
866 static inline void tb_jmp_unlink(TranslationBlock *dest)
867 {
868     TranslationBlock *tb;
869     int n;
870 
871     qemu_spin_lock(&dest->jmp_lock);
872 
873     TB_FOR_EACH_JMP(dest, tb, n) {
874         tb_reset_jump(tb, n);
875         qatomic_and(&tb->jmp_dest[n], (uintptr_t)NULL | 1);
876         /* No need to clear the list entry; setting the dest ptr is enough */
877     }
878     dest->jmp_list_head = (uintptr_t)NULL;
879 
880     qemu_spin_unlock(&dest->jmp_lock);
881 }
882 
883 static void tb_jmp_cache_inval_tb(TranslationBlock *tb)
884 {
885     CPUState *cpu;
886 
887     if (tb_cflags(tb) & CF_PCREL) {
888         /* A TB may be at any virtual address */
889         CPU_FOREACH(cpu) {
890             tcg_flush_jmp_cache(cpu);
891         }
892     } else {
893         uint32_t h = tb_jmp_cache_hash_func(tb->pc);
894 
895         CPU_FOREACH(cpu) {
896             CPUJumpCache *jc = cpu->tb_jmp_cache;
897 
898             if (qatomic_read(&jc->array[h].tb) == tb) {
899                 qatomic_set(&jc->array[h].tb, NULL);
900             }
901         }
902     }
903 }
904 
905 /*
906  * In user-mode, call with mmap_lock held.
907  * In !user-mode, if @rm_from_page_list is set, call with the TB's pages'
908  * locks held.
909  */
910 static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
911 {
912     uint32_t h;
913     tb_page_addr_t phys_pc;
914     uint32_t orig_cflags = tb_cflags(tb);
915 
916     assert_memory_lock();
917 
918     /* make sure no further incoming jumps will be chained to this TB */
919     qemu_spin_lock(&tb->jmp_lock);
920     qatomic_set(&tb->cflags, tb->cflags | CF_INVALID);
921     qemu_spin_unlock(&tb->jmp_lock);
922 
923     /* remove the TB from the hash list */
924     phys_pc = tb_page_addr0(tb);
925     h = tb_hash_func(phys_pc, (orig_cflags & CF_PCREL ? 0 : tb->pc),
926                      tb->flags, tb->cs_base, orig_cflags);
927     if (!qht_remove(&tb_ctx.htable, tb, h)) {
928         return;
929     }
930 
931     /* remove the TB from the page list */
932     if (rm_from_page_list) {
933         tb_remove(tb);
934     }
935 
936     /* remove the TB from the hash list */
937     tb_jmp_cache_inval_tb(tb);
938 
939     /* suppress this TB from the two jump lists */
940     tb_remove_from_jmp_list(tb, 0);
941     tb_remove_from_jmp_list(tb, 1);
942 
943     /* suppress any remaining jumps to this TB */
944     tb_jmp_unlink(tb);
945 
946     qatomic_set(&tb_ctx.tb_phys_invalidate_count,
947                 tb_ctx.tb_phys_invalidate_count + 1);
948 }
949 
950 static void tb_phys_invalidate__locked(TranslationBlock *tb)
951 {
952     qemu_thread_jit_write();
953     do_tb_phys_invalidate(tb, true);
954     qemu_thread_jit_execute();
955 }
956 
957 /*
958  * Invalidate one TB.
959  * Called with mmap_lock held in user-mode.
960  */
961 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
962 {
963     if (page_addr == -1 && tb_page_addr0(tb) != -1) {
964         tb_lock_pages(tb);
965         do_tb_phys_invalidate(tb, true);
966         tb_unlock_pages(tb);
967     } else {
968         do_tb_phys_invalidate(tb, false);
969     }
970 }
971 
972 /*
973  * Add a new TB and link it to the physical page tables.
974  * Called with mmap_lock held for user-mode emulation.
975  *
976  * Returns a pointer @tb, or a pointer to an existing TB that matches @tb.
977  * Note that in !user-mode, another thread might have already added a TB
978  * for the same block of guest code that @tb corresponds to. In that case,
979  * the caller should discard the original @tb, and use instead the returned TB.
980  */
981 TranslationBlock *tb_link_page(TranslationBlock *tb)
982 {
983     void *existing_tb = NULL;
984     uint32_t h;
985 
986     assert_memory_lock();
987     tcg_debug_assert(!(tb->cflags & CF_INVALID));
988 
989     tb_record(tb);
990 
991     /* add in the hash table */
992     h = tb_hash_func(tb_page_addr0(tb), (tb->cflags & CF_PCREL ? 0 : tb->pc),
993                      tb->flags, tb->cs_base, tb->cflags);
994     qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
995 
996     /* remove TB from the page(s) if we couldn't insert it */
997     if (unlikely(existing_tb)) {
998         tb_remove(tb);
999         tb_unlock_pages(tb);
1000         return existing_tb;
1001     }
1002 
1003     tb_unlock_pages(tb);
1004     return tb;
1005 }
1006 
1007 #ifdef CONFIG_USER_ONLY
1008 /*
1009  * Invalidate all TBs which intersect with the target address range.
1010  * Called with mmap_lock held for user-mode emulation.
1011  * NOTE: this function must not be called while a TB is running.
1012  */
1013 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t last)
1014 {
1015     TranslationBlock *tb;
1016     PageForEachNext n;
1017 
1018     assert_memory_lock();
1019 
1020     PAGE_FOR_EACH_TB(start, last, unused, tb, n) {
1021         tb_phys_invalidate__locked(tb);
1022     }
1023 }
1024 
1025 /*
1026  * Invalidate all TBs which intersect with the target address page @addr.
1027  * Called with mmap_lock held for user-mode emulation
1028  * NOTE: this function must not be called while a TB is running.
1029  */
1030 static void tb_invalidate_phys_page(tb_page_addr_t addr)
1031 {
1032     tb_page_addr_t start, last;
1033 
1034     start = addr & TARGET_PAGE_MASK;
1035     last = addr | ~TARGET_PAGE_MASK;
1036     tb_invalidate_phys_range(start, last);
1037 }
1038 
1039 /*
1040  * Called with mmap_lock held. If pc is not 0 then it indicates the
1041  * host PC of the faulting store instruction that caused this invalidate.
1042  * Returns true if the caller needs to abort execution of the current
1043  * TB (because it was modified by this store and the guest CPU has
1044  * precise-SMC semantics).
1045  */
1046 bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc)
1047 {
1048     TranslationBlock *current_tb;
1049     bool current_tb_modified;
1050     TranslationBlock *tb;
1051     PageForEachNext n;
1052     tb_page_addr_t last;
1053 
1054     /*
1055      * Without precise smc semantics, or when outside of a TB,
1056      * we can skip to invalidate.
1057      */
1058 #ifndef TARGET_HAS_PRECISE_SMC
1059     pc = 0;
1060 #endif
1061     if (!pc) {
1062         tb_invalidate_phys_page(addr);
1063         return false;
1064     }
1065 
1066     assert_memory_lock();
1067     current_tb = tcg_tb_lookup(pc);
1068 
1069     last = addr | ~TARGET_PAGE_MASK;
1070     addr &= TARGET_PAGE_MASK;
1071     current_tb_modified = false;
1072 
1073     PAGE_FOR_EACH_TB(addr, last, unused, tb, n) {
1074         if (current_tb == tb &&
1075             (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
1076             /*
1077              * If we are modifying the current TB, we must stop its
1078              * execution. We could be more precise by checking that
1079              * the modification is after the current PC, but it would
1080              * require a specialized function to partially restore
1081              * the CPU state.
1082              */
1083             current_tb_modified = true;
1084             cpu_restore_state_from_tb(current_cpu, current_tb, pc);
1085         }
1086         tb_phys_invalidate__locked(tb);
1087     }
1088 
1089     if (current_tb_modified) {
1090         /* Force execution of one insn next time.  */
1091         CPUState *cpu = current_cpu;
1092         cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(current_cpu);
1093         return true;
1094     }
1095     return false;
1096 }
1097 #else
1098 /*
1099  * @p must be non-NULL.
1100  * Call with all @pages locked.
1101  */
1102 static void
1103 tb_invalidate_phys_page_range__locked(struct page_collection *pages,
1104                                       PageDesc *p, tb_page_addr_t start,
1105                                       tb_page_addr_t last,
1106                                       uintptr_t retaddr)
1107 {
1108     TranslationBlock *tb;
1109     PageForEachNext n;
1110 #ifdef TARGET_HAS_PRECISE_SMC
1111     bool current_tb_modified = false;
1112     TranslationBlock *current_tb = retaddr ? tcg_tb_lookup(retaddr) : NULL;
1113 #endif /* TARGET_HAS_PRECISE_SMC */
1114 
1115     /* Range may not cross a page. */
1116     tcg_debug_assert(((start ^ last) & TARGET_PAGE_MASK) == 0);
1117 
1118     /*
1119      * We remove all the TBs in the range [start, last].
1120      * XXX: see if in some cases it could be faster to invalidate all the code
1121      */
1122     PAGE_FOR_EACH_TB(start, last, p, tb, n) {
1123         tb_page_addr_t tb_start, tb_last;
1124 
1125         /* NOTE: this is subtle as a TB may span two physical pages */
1126         tb_start = tb_page_addr0(tb);
1127         tb_last = tb_start + tb->size - 1;
1128         if (n == 0) {
1129             tb_last = MIN(tb_last, tb_start | ~TARGET_PAGE_MASK);
1130         } else {
1131             tb_start = tb_page_addr1(tb);
1132             tb_last = tb_start + (tb_last & ~TARGET_PAGE_MASK);
1133         }
1134         if (!(tb_last < start || tb_start > last)) {
1135 #ifdef TARGET_HAS_PRECISE_SMC
1136             if (current_tb == tb &&
1137                 (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
1138                 /*
1139                  * If we are modifying the current TB, we must stop
1140                  * its execution. We could be more precise by checking
1141                  * that the modification is after the current PC, but it
1142                  * would require a specialized function to partially
1143                  * restore the CPU state.
1144                  */
1145                 current_tb_modified = true;
1146                 cpu_restore_state_from_tb(current_cpu, current_tb, retaddr);
1147             }
1148 #endif /* TARGET_HAS_PRECISE_SMC */
1149             tb_phys_invalidate__locked(tb);
1150         }
1151     }
1152 
1153     /* if no code remaining, no need to continue to use slow writes */
1154     if (!p->first_tb) {
1155         tlb_unprotect_code(start);
1156     }
1157 
1158 #ifdef TARGET_HAS_PRECISE_SMC
1159     if (current_tb_modified) {
1160         page_collection_unlock(pages);
1161         /* Force execution of one insn next time.  */
1162         current_cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(current_cpu);
1163         mmap_unlock();
1164         cpu_loop_exit_noexc(current_cpu);
1165     }
1166 #endif
1167 }
1168 
1169 /*
1170  * Invalidate all TBs which intersect with the target physical address range
1171  * [start;last]. NOTE: start and end may refer to *different* physical pages.
1172  * 'is_cpu_write_access' should be true if called from a real cpu write
1173  * access: the virtual CPU will exit the current TB if code is modified inside
1174  * this TB.
1175  */
1176 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t last)
1177 {
1178     struct page_collection *pages;
1179     tb_page_addr_t index, index_last;
1180 
1181     pages = page_collection_lock(start, last);
1182 
1183     index_last = last >> TARGET_PAGE_BITS;
1184     for (index = start >> TARGET_PAGE_BITS; index <= index_last; index++) {
1185         PageDesc *pd = page_find(index);
1186         tb_page_addr_t page_start, page_last;
1187 
1188         if (pd == NULL) {
1189             continue;
1190         }
1191         assert_page_locked(pd);
1192         page_start = index << TARGET_PAGE_BITS;
1193         page_last = page_start | ~TARGET_PAGE_MASK;
1194         page_last = MIN(page_last, last);
1195         tb_invalidate_phys_page_range__locked(pages, pd,
1196                                               page_start, page_last, 0);
1197     }
1198     page_collection_unlock(pages);
1199 }
1200 
1201 /*
1202  * Call with all @pages in the range [@start, @start + len[ locked.
1203  */
1204 static void tb_invalidate_phys_page_fast__locked(struct page_collection *pages,
1205                                                  tb_page_addr_t start,
1206                                                  unsigned len, uintptr_t ra)
1207 {
1208     PageDesc *p;
1209 
1210     p = page_find(start >> TARGET_PAGE_BITS);
1211     if (!p) {
1212         return;
1213     }
1214 
1215     assert_page_locked(p);
1216     tb_invalidate_phys_page_range__locked(pages, p, start, start + len - 1, ra);
1217 }
1218 
1219 /*
1220  * len must be <= 8 and start must be a multiple of len.
1221  * Called via softmmu_template.h when code areas are written to with
1222  * iothread mutex not held.
1223  */
1224 void tb_invalidate_phys_range_fast(ram_addr_t ram_addr,
1225                                    unsigned size,
1226                                    uintptr_t retaddr)
1227 {
1228     struct page_collection *pages;
1229 
1230     pages = page_collection_lock(ram_addr, ram_addr + size - 1);
1231     tb_invalidate_phys_page_fast__locked(pages, ram_addr, size, retaddr);
1232     page_collection_unlock(pages);
1233 }
1234 
1235 #endif /* CONFIG_USER_ONLY */
1236