xref: /qemu/accel/tcg/tb-maint.c (revision 7cef6d686309e2792186504ae17cf4f3eb57ef68)
1 /*
2  * Translation Block Maintenance
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "qemu/interval-tree.h"
22 #include "qemu/qtree.h"
23 #include "exec/cputlb.h"
24 #include "exec/log.h"
25 #include "exec/page-protection.h"
26 #include "exec/mmap-lock.h"
27 #include "exec/tb-flush.h"
28 #include "exec/target_page.h"
29 #include "accel/tcg/cpu-ops.h"
30 #include "tb-internal.h"
31 #include "system/tcg.h"
32 #include "tcg/tcg.h"
33 #include "tb-hash.h"
34 #include "tb-context.h"
35 #include "tb-internal.h"
36 #include "internal-common.h"
37 #ifdef CONFIG_USER_ONLY
38 #include "user/page-protection.h"
39 #endif
40 
41 
42 /* List iterators for lists of tagged pointers in TranslationBlock. */
43 #define TB_FOR_EACH_TAGGED(head, tb, n, field)                          \
44     for (n = (head) & 1, tb = (TranslationBlock *)((head) & ~1);        \
45          tb; tb = (TranslationBlock *)tb->field[n], n = (uintptr_t)tb & 1, \
46              tb = (TranslationBlock *)((uintptr_t)tb & ~1))
47 
48 #define TB_FOR_EACH_JMP(head_tb, tb, n)                                 \
49     TB_FOR_EACH_TAGGED((head_tb)->jmp_list_head, tb, n, jmp_list_next)
50 
tb_cmp(const void * ap,const void * bp)51 static bool tb_cmp(const void *ap, const void *bp)
52 {
53     const TranslationBlock *a = ap;
54     const TranslationBlock *b = bp;
55 
56     return ((tb_cflags(a) & CF_PCREL || a->pc == b->pc) &&
57             a->cs_base == b->cs_base &&
58             a->flags == b->flags &&
59             (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
60             tb_page_addr0(a) == tb_page_addr0(b) &&
61             tb_page_addr1(a) == tb_page_addr1(b));
62 }
63 
tb_htable_init(void)64 void tb_htable_init(void)
65 {
66     unsigned int mode = QHT_MODE_AUTO_RESIZE;
67 
68     qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
69 }
70 
71 typedef struct PageDesc PageDesc;
72 
73 #ifdef CONFIG_USER_ONLY
74 
75 /*
76  * In user-mode page locks aren't used; mmap_lock is enough.
77  */
78 #define assert_page_locked(pd) tcg_debug_assert(have_mmap_lock())
79 
tb_lock_pages(const TranslationBlock * tb)80 static inline void tb_lock_pages(const TranslationBlock *tb) { }
81 
82 /*
83  * For user-only, since we are protecting all of memory with a single lock,
84  * and because the two pages of a TranslationBlock are always contiguous,
85  * use a single data structure to record all TranslationBlocks.
86  */
87 static IntervalTreeRoot tb_root;
88 
tb_remove_all(void)89 static void tb_remove_all(void)
90 {
91     assert_memory_lock();
92     memset(&tb_root, 0, sizeof(tb_root));
93 }
94 
95 /* Call with mmap_lock held. */
tb_record(TranslationBlock * tb)96 static void tb_record(TranslationBlock *tb)
97 {
98     vaddr addr;
99     int flags;
100 
101     assert_memory_lock();
102     tb->itree.last = tb->itree.start + tb->size - 1;
103 
104     /* translator_loop() must have made all TB pages non-writable */
105     addr = tb_page_addr0(tb);
106     flags = page_get_flags(addr);
107     assert(!(flags & PAGE_WRITE));
108 
109     addr = tb_page_addr1(tb);
110     if (addr != -1) {
111         flags = page_get_flags(addr);
112         assert(!(flags & PAGE_WRITE));
113     }
114 
115     interval_tree_insert(&tb->itree, &tb_root);
116 }
117 
118 /* Call with mmap_lock held. */
tb_remove(TranslationBlock * tb)119 static void tb_remove(TranslationBlock *tb)
120 {
121     assert_memory_lock();
122     interval_tree_remove(&tb->itree, &tb_root);
123 }
124 
125 /* TODO: For now, still shared with translate-all.c for system mode. */
126 #define PAGE_FOR_EACH_TB(start, last, pagedesc, T, N)   \
127     for (T = foreach_tb_first(start, last),             \
128          N = foreach_tb_next(T, start, last);           \
129          T != NULL;                                     \
130          T = N, N = foreach_tb_next(N, start, last))
131 
132 typedef TranslationBlock *PageForEachNext;
133 
foreach_tb_first(tb_page_addr_t start,tb_page_addr_t last)134 static PageForEachNext foreach_tb_first(tb_page_addr_t start,
135                                         tb_page_addr_t last)
136 {
137     IntervalTreeNode *n = interval_tree_iter_first(&tb_root, start, last);
138     return n ? container_of(n, TranslationBlock, itree) : NULL;
139 }
140 
foreach_tb_next(PageForEachNext tb,tb_page_addr_t start,tb_page_addr_t last)141 static PageForEachNext foreach_tb_next(PageForEachNext tb,
142                                        tb_page_addr_t start,
143                                        tb_page_addr_t last)
144 {
145     IntervalTreeNode *n;
146 
147     if (tb) {
148         n = interval_tree_iter_next(&tb->itree, start, last);
149         if (n) {
150             return container_of(n, TranslationBlock, itree);
151         }
152     }
153     return NULL;
154 }
155 
156 #else
157 /*
158  * In system mode we want L1_MAP to be based on ram offsets.
159  */
160 #define L1_MAP_ADDR_SPACE_BITS  HOST_LONG_BITS
161 
162 /* Size of the L2 (and L3, etc) page tables.  */
163 #define V_L2_BITS 10
164 #define V_L2_SIZE (1 << V_L2_BITS)
165 
166 /*
167  * L1 Mapping properties
168  */
169 static int v_l1_size;
170 static int v_l1_shift;
171 static int v_l2_levels;
172 
173 /*
174  * The bottom level has pointers to PageDesc, and is indexed by
175  * anything from 4 to (V_L2_BITS + 3) bits, depending on target page size.
176  */
177 #define V_L1_MIN_BITS 4
178 #define V_L1_MAX_BITS (V_L2_BITS + 3)
179 #define V_L1_MAX_SIZE (1 << V_L1_MAX_BITS)
180 
181 static void *l1_map[V_L1_MAX_SIZE];
182 
183 struct PageDesc {
184     QemuSpin lock;
185     /* list of TBs intersecting this ram page */
186     uintptr_t first_tb;
187 };
188 
page_table_config_init(void)189 void page_table_config_init(void)
190 {
191     uint32_t v_l1_bits;
192 
193     assert(TARGET_PAGE_BITS);
194     /* The bits remaining after N lower levels of page tables.  */
195     v_l1_bits = (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS;
196     if (v_l1_bits < V_L1_MIN_BITS) {
197         v_l1_bits += V_L2_BITS;
198     }
199 
200     v_l1_size = 1 << v_l1_bits;
201     v_l1_shift = L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - v_l1_bits;
202     v_l2_levels = v_l1_shift / V_L2_BITS - 1;
203 
204     assert(v_l1_bits <= V_L1_MAX_BITS);
205     assert(v_l1_shift % V_L2_BITS == 0);
206     assert(v_l2_levels >= 0);
207 }
208 
page_find_alloc(tb_page_addr_t index,bool alloc)209 static PageDesc *page_find_alloc(tb_page_addr_t index, bool alloc)
210 {
211     PageDesc *pd;
212     void **lp;
213 
214     /* Level 1.  Always allocated.  */
215     lp = l1_map + ((index >> v_l1_shift) & (v_l1_size - 1));
216 
217     /* Level 2..N-1.  */
218     for (int i = v_l2_levels; i > 0; i--) {
219         void **p = qatomic_rcu_read(lp);
220 
221         if (p == NULL) {
222             void *existing;
223 
224             if (!alloc) {
225                 return NULL;
226             }
227             p = g_new0(void *, V_L2_SIZE);
228             existing = qatomic_cmpxchg(lp, NULL, p);
229             if (unlikely(existing)) {
230                 g_free(p);
231                 p = existing;
232             }
233         }
234 
235         lp = p + ((index >> (i * V_L2_BITS)) & (V_L2_SIZE - 1));
236     }
237 
238     pd = qatomic_rcu_read(lp);
239     if (pd == NULL) {
240         void *existing;
241 
242         if (!alloc) {
243             return NULL;
244         }
245 
246         pd = g_new0(PageDesc, V_L2_SIZE);
247         for (int i = 0; i < V_L2_SIZE; i++) {
248             qemu_spin_init(&pd[i].lock);
249         }
250 
251         existing = qatomic_cmpxchg(lp, NULL, pd);
252         if (unlikely(existing)) {
253             for (int i = 0; i < V_L2_SIZE; i++) {
254                 qemu_spin_destroy(&pd[i].lock);
255             }
256             g_free(pd);
257             pd = existing;
258         }
259     }
260 
261     return pd + (index & (V_L2_SIZE - 1));
262 }
263 
page_find(tb_page_addr_t index)264 static inline PageDesc *page_find(tb_page_addr_t index)
265 {
266     return page_find_alloc(index, false);
267 }
268 
269 /**
270  * struct page_entry - page descriptor entry
271  * @pd:     pointer to the &struct PageDesc of the page this entry represents
272  * @index:  page index of the page
273  * @locked: whether the page is locked
274  *
275  * This struct helps us keep track of the locked state of a page, without
276  * bloating &struct PageDesc.
277  *
278  * A page lock protects accesses to all fields of &struct PageDesc.
279  *
280  * See also: &struct page_collection.
281  */
282 struct page_entry {
283     PageDesc *pd;
284     tb_page_addr_t index;
285     bool locked;
286 };
287 
288 /**
289  * struct page_collection - tracks a set of pages (i.e. &struct page_entry's)
290  * @tree:   Binary search tree (BST) of the pages, with key == page index
291  * @max:    Pointer to the page in @tree with the highest page index
292  *
293  * To avoid deadlock we lock pages in ascending order of page index.
294  * When operating on a set of pages, we need to keep track of them so that
295  * we can lock them in order and also unlock them later. For this we collect
296  * pages (i.e. &struct page_entry's) in a binary search @tree. Given that the
297  * @tree implementation we use does not provide an O(1) operation to obtain the
298  * highest-ranked element, we use @max to keep track of the inserted page
299  * with the highest index. This is valuable because if a page is not in
300  * the tree and its index is higher than @max's, then we can lock it
301  * without breaking the locking order rule.
302  *
303  * Note on naming: 'struct page_set' would be shorter, but we already have a few
304  * page_set_*() helpers, so page_collection is used instead to avoid confusion.
305  *
306  * See also: page_collection_lock().
307  */
308 struct page_collection {
309     QTree *tree;
310     struct page_entry *max;
311 };
312 
313 typedef int PageForEachNext;
314 #define PAGE_FOR_EACH_TB(start, last, pagedesc, tb, n) \
315     TB_FOR_EACH_TAGGED((pagedesc)->first_tb, tb, n, page_next)
316 
317 #ifdef CONFIG_DEBUG_TCG
318 
319 static __thread GHashTable *ht_pages_locked_debug;
320 
ht_pages_locked_debug_init(void)321 static void ht_pages_locked_debug_init(void)
322 {
323     if (ht_pages_locked_debug) {
324         return;
325     }
326     ht_pages_locked_debug = g_hash_table_new(NULL, NULL);
327 }
328 
page_is_locked(const PageDesc * pd)329 static bool page_is_locked(const PageDesc *pd)
330 {
331     PageDesc *found;
332 
333     ht_pages_locked_debug_init();
334     found = g_hash_table_lookup(ht_pages_locked_debug, pd);
335     return !!found;
336 }
337 
page_lock__debug(PageDesc * pd)338 static void page_lock__debug(PageDesc *pd)
339 {
340     ht_pages_locked_debug_init();
341     g_assert(!page_is_locked(pd));
342     g_hash_table_insert(ht_pages_locked_debug, pd, pd);
343 }
344 
page_unlock__debug(const PageDesc * pd)345 static void page_unlock__debug(const PageDesc *pd)
346 {
347     bool removed;
348 
349     ht_pages_locked_debug_init();
350     g_assert(page_is_locked(pd));
351     removed = g_hash_table_remove(ht_pages_locked_debug, pd);
352     g_assert(removed);
353 }
354 
do_assert_page_locked(const PageDesc * pd,const char * file,int line)355 static void do_assert_page_locked(const PageDesc *pd,
356                                   const char *file, int line)
357 {
358     if (unlikely(!page_is_locked(pd))) {
359         error_report("assert_page_lock: PageDesc %p not locked @ %s:%d",
360                      pd, file, line);
361         abort();
362     }
363 }
364 #define assert_page_locked(pd) do_assert_page_locked(pd, __FILE__, __LINE__)
365 
assert_no_pages_locked(void)366 void assert_no_pages_locked(void)
367 {
368     ht_pages_locked_debug_init();
369     g_assert(g_hash_table_size(ht_pages_locked_debug) == 0);
370 }
371 
372 #else /* !CONFIG_DEBUG_TCG */
373 
page_lock__debug(const PageDesc * pd)374 static inline void page_lock__debug(const PageDesc *pd) { }
page_unlock__debug(const PageDesc * pd)375 static inline void page_unlock__debug(const PageDesc *pd) { }
assert_page_locked(const PageDesc * pd)376 static inline void assert_page_locked(const PageDesc *pd) { }
377 
378 #endif /* CONFIG_DEBUG_TCG */
379 
page_lock(PageDesc * pd)380 static void page_lock(PageDesc *pd)
381 {
382     page_lock__debug(pd);
383     qemu_spin_lock(&pd->lock);
384 }
385 
386 /* Like qemu_spin_trylock, returns false on success */
page_trylock(PageDesc * pd)387 static bool page_trylock(PageDesc *pd)
388 {
389     bool busy = qemu_spin_trylock(&pd->lock);
390     if (!busy) {
391         page_lock__debug(pd);
392     }
393     return busy;
394 }
395 
page_unlock(PageDesc * pd)396 static void page_unlock(PageDesc *pd)
397 {
398     qemu_spin_unlock(&pd->lock);
399     page_unlock__debug(pd);
400 }
401 
tb_lock_page0(tb_page_addr_t paddr)402 void tb_lock_page0(tb_page_addr_t paddr)
403 {
404     page_lock(page_find_alloc(paddr >> TARGET_PAGE_BITS, true));
405 }
406 
tb_lock_page1(tb_page_addr_t paddr0,tb_page_addr_t paddr1)407 void tb_lock_page1(tb_page_addr_t paddr0, tb_page_addr_t paddr1)
408 {
409     tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
410     tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
411     PageDesc *pd0, *pd1;
412 
413     if (pindex0 == pindex1) {
414         /* Identical pages, and the first page is already locked. */
415         return;
416     }
417 
418     pd1 = page_find_alloc(pindex1, true);
419     if (pindex0 < pindex1) {
420         /* Correct locking order, we may block. */
421         page_lock(pd1);
422         return;
423     }
424 
425     /* Incorrect locking order, we cannot block lest we deadlock. */
426     if (!page_trylock(pd1)) {
427         return;
428     }
429 
430     /*
431      * Drop the lock on page0 and get both page locks in the right order.
432      * Restart translation via longjmp.
433      */
434     pd0 = page_find_alloc(pindex0, false);
435     page_unlock(pd0);
436     page_lock(pd1);
437     page_lock(pd0);
438     siglongjmp(tcg_ctx->jmp_trans, -3);
439 }
440 
tb_unlock_page1(tb_page_addr_t paddr0,tb_page_addr_t paddr1)441 void tb_unlock_page1(tb_page_addr_t paddr0, tb_page_addr_t paddr1)
442 {
443     tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
444     tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
445 
446     if (pindex0 != pindex1) {
447         page_unlock(page_find_alloc(pindex1, false));
448     }
449 }
450 
tb_lock_pages(TranslationBlock * tb)451 static void tb_lock_pages(TranslationBlock *tb)
452 {
453     tb_page_addr_t paddr0 = tb_page_addr0(tb);
454     tb_page_addr_t paddr1 = tb_page_addr1(tb);
455     tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
456     tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
457 
458     if (unlikely(paddr0 == -1)) {
459         return;
460     }
461     if (unlikely(paddr1 != -1) && pindex0 != pindex1) {
462         if (pindex0 < pindex1) {
463             page_lock(page_find_alloc(pindex0, true));
464             page_lock(page_find_alloc(pindex1, true));
465             return;
466         }
467         page_lock(page_find_alloc(pindex1, true));
468     }
469     page_lock(page_find_alloc(pindex0, true));
470 }
471 
tb_unlock_pages(TranslationBlock * tb)472 void tb_unlock_pages(TranslationBlock *tb)
473 {
474     tb_page_addr_t paddr0 = tb_page_addr0(tb);
475     tb_page_addr_t paddr1 = tb_page_addr1(tb);
476     tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
477     tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
478 
479     if (unlikely(paddr0 == -1)) {
480         return;
481     }
482     if (unlikely(paddr1 != -1) && pindex0 != pindex1) {
483         page_unlock(page_find_alloc(pindex1, false));
484     }
485     page_unlock(page_find_alloc(pindex0, false));
486 }
487 
488 static inline struct page_entry *
page_entry_new(PageDesc * pd,tb_page_addr_t index)489 page_entry_new(PageDesc *pd, tb_page_addr_t index)
490 {
491     struct page_entry *pe = g_malloc(sizeof(*pe));
492 
493     pe->index = index;
494     pe->pd = pd;
495     pe->locked = false;
496     return pe;
497 }
498 
page_entry_destroy(gpointer p)499 static void page_entry_destroy(gpointer p)
500 {
501     struct page_entry *pe = p;
502 
503     g_assert(pe->locked);
504     page_unlock(pe->pd);
505     g_free(pe);
506 }
507 
508 /* returns false on success */
page_entry_trylock(struct page_entry * pe)509 static bool page_entry_trylock(struct page_entry *pe)
510 {
511     bool busy = page_trylock(pe->pd);
512     if (!busy) {
513         g_assert(!pe->locked);
514         pe->locked = true;
515     }
516     return busy;
517 }
518 
do_page_entry_lock(struct page_entry * pe)519 static void do_page_entry_lock(struct page_entry *pe)
520 {
521     page_lock(pe->pd);
522     g_assert(!pe->locked);
523     pe->locked = true;
524 }
525 
page_entry_lock(gpointer key,gpointer value,gpointer data)526 static gboolean page_entry_lock(gpointer key, gpointer value, gpointer data)
527 {
528     struct page_entry *pe = value;
529 
530     do_page_entry_lock(pe);
531     return FALSE;
532 }
533 
page_entry_unlock(gpointer key,gpointer value,gpointer data)534 static gboolean page_entry_unlock(gpointer key, gpointer value, gpointer data)
535 {
536     struct page_entry *pe = value;
537 
538     if (pe->locked) {
539         pe->locked = false;
540         page_unlock(pe->pd);
541     }
542     return FALSE;
543 }
544 
545 /*
546  * Trylock a page, and if successful, add the page to a collection.
547  * Returns true ("busy") if the page could not be locked; false otherwise.
548  */
page_trylock_add(struct page_collection * set,tb_page_addr_t addr)549 static bool page_trylock_add(struct page_collection *set, tb_page_addr_t addr)
550 {
551     tb_page_addr_t index = addr >> TARGET_PAGE_BITS;
552     struct page_entry *pe;
553     PageDesc *pd;
554 
555     pe = q_tree_lookup(set->tree, &index);
556     if (pe) {
557         return false;
558     }
559 
560     pd = page_find(index);
561     if (pd == NULL) {
562         return false;
563     }
564 
565     pe = page_entry_new(pd, index);
566     q_tree_insert(set->tree, &pe->index, pe);
567 
568     /*
569      * If this is either (1) the first insertion or (2) a page whose index
570      * is higher than any other so far, just lock the page and move on.
571      */
572     if (set->max == NULL || pe->index > set->max->index) {
573         set->max = pe;
574         do_page_entry_lock(pe);
575         return false;
576     }
577     /*
578      * Try to acquire out-of-order lock; if busy, return busy so that we acquire
579      * locks in order.
580      */
581     return page_entry_trylock(pe);
582 }
583 
tb_page_addr_cmp(gconstpointer ap,gconstpointer bp,gpointer udata)584 static gint tb_page_addr_cmp(gconstpointer ap, gconstpointer bp, gpointer udata)
585 {
586     tb_page_addr_t a = *(const tb_page_addr_t *)ap;
587     tb_page_addr_t b = *(const tb_page_addr_t *)bp;
588 
589     if (a == b) {
590         return 0;
591     } else if (a < b) {
592         return -1;
593     }
594     return 1;
595 }
596 
597 /*
598  * Lock a range of pages ([@start,@last]) as well as the pages of all
599  * intersecting TBs.
600  * Locking order: acquire locks in ascending order of page index.
601  */
page_collection_lock(tb_page_addr_t start,tb_page_addr_t last)602 static struct page_collection *page_collection_lock(tb_page_addr_t start,
603                                                     tb_page_addr_t last)
604 {
605     struct page_collection *set = g_malloc(sizeof(*set));
606     tb_page_addr_t index;
607     PageDesc *pd;
608 
609     start >>= TARGET_PAGE_BITS;
610     last >>= TARGET_PAGE_BITS;
611     g_assert(start <= last);
612 
613     set->tree = q_tree_new_full(tb_page_addr_cmp, NULL, NULL,
614                                 page_entry_destroy);
615     set->max = NULL;
616     assert_no_pages_locked();
617 
618  retry:
619     q_tree_foreach(set->tree, page_entry_lock, NULL);
620 
621     for (index = start; index <= last; index++) {
622         TranslationBlock *tb;
623         PageForEachNext n;
624 
625         pd = page_find(index);
626         if (pd == NULL) {
627             continue;
628         }
629         if (page_trylock_add(set, index << TARGET_PAGE_BITS)) {
630             q_tree_foreach(set->tree, page_entry_unlock, NULL);
631             goto retry;
632         }
633         assert_page_locked(pd);
634         PAGE_FOR_EACH_TB(unused, unused, pd, tb, n) {
635             if (page_trylock_add(set, tb_page_addr0(tb)) ||
636                 (tb_page_addr1(tb) != -1 &&
637                  page_trylock_add(set, tb_page_addr1(tb)))) {
638                 /* drop all locks, and reacquire in order */
639                 q_tree_foreach(set->tree, page_entry_unlock, NULL);
640                 goto retry;
641             }
642         }
643     }
644     return set;
645 }
646 
page_collection_unlock(struct page_collection * set)647 static void page_collection_unlock(struct page_collection *set)
648 {
649     /* entries are unlocked and freed via page_entry_destroy */
650     q_tree_destroy(set->tree);
651     g_free(set);
652 }
653 
654 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
tb_remove_all_1(int level,void ** lp)655 static void tb_remove_all_1(int level, void **lp)
656 {
657     int i;
658 
659     if (*lp == NULL) {
660         return;
661     }
662     if (level == 0) {
663         PageDesc *pd = *lp;
664 
665         for (i = 0; i < V_L2_SIZE; ++i) {
666             page_lock(&pd[i]);
667             pd[i].first_tb = (uintptr_t)NULL;
668             page_unlock(&pd[i]);
669         }
670     } else {
671         void **pp = *lp;
672 
673         for (i = 0; i < V_L2_SIZE; ++i) {
674             tb_remove_all_1(level - 1, pp + i);
675         }
676     }
677 }
678 
tb_remove_all(void)679 static void tb_remove_all(void)
680 {
681     int i, l1_sz = v_l1_size;
682 
683     for (i = 0; i < l1_sz; i++) {
684         tb_remove_all_1(v_l2_levels, l1_map + i);
685     }
686 }
687 
688 /*
689  * Add the tb in the target page and protect it if necessary.
690  * Called with @p->lock held.
691  */
tb_page_add(PageDesc * p,TranslationBlock * tb,unsigned int n)692 static void tb_page_add(PageDesc *p, TranslationBlock *tb, unsigned int n)
693 {
694     bool page_already_protected;
695 
696     assert_page_locked(p);
697 
698     tb->page_next[n] = p->first_tb;
699     page_already_protected = p->first_tb != 0;
700     p->first_tb = (uintptr_t)tb | n;
701 
702     /*
703      * If some code is already present, then the pages are already
704      * protected. So we handle the case where only the first TB is
705      * allocated in a physical page.
706      */
707     if (!page_already_protected) {
708         tlb_protect_code(tb->page_addr[n] & TARGET_PAGE_MASK);
709     }
710 }
711 
tb_record(TranslationBlock * tb)712 static void tb_record(TranslationBlock *tb)
713 {
714     tb_page_addr_t paddr0 = tb_page_addr0(tb);
715     tb_page_addr_t paddr1 = tb_page_addr1(tb);
716     tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
717     tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
718 
719     assert(paddr0 != -1);
720     if (unlikely(paddr1 != -1) && pindex0 != pindex1) {
721         tb_page_add(page_find_alloc(pindex1, false), tb, 1);
722     }
723     tb_page_add(page_find_alloc(pindex0, false), tb, 0);
724 }
725 
tb_page_remove(PageDesc * pd,TranslationBlock * tb)726 static void tb_page_remove(PageDesc *pd, TranslationBlock *tb)
727 {
728     TranslationBlock *tb1;
729     uintptr_t *pprev;
730     PageForEachNext n1;
731 
732     assert_page_locked(pd);
733     pprev = &pd->first_tb;
734     PAGE_FOR_EACH_TB(unused, unused, pd, tb1, n1) {
735         if (tb1 == tb) {
736             *pprev = tb1->page_next[n1];
737             return;
738         }
739         pprev = &tb1->page_next[n1];
740     }
741     g_assert_not_reached();
742 }
743 
tb_remove(TranslationBlock * tb)744 static void tb_remove(TranslationBlock *tb)
745 {
746     tb_page_addr_t paddr0 = tb_page_addr0(tb);
747     tb_page_addr_t paddr1 = tb_page_addr1(tb);
748     tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
749     tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
750 
751     assert(paddr0 != -1);
752     if (unlikely(paddr1 != -1) && pindex0 != pindex1) {
753         tb_page_remove(page_find_alloc(pindex1, false), tb);
754     }
755     tb_page_remove(page_find_alloc(pindex0, false), tb);
756 }
757 #endif /* CONFIG_USER_ONLY */
758 
759 /* flush all the translation blocks */
do_tb_flush(CPUState * cpu,run_on_cpu_data tb_flush_count)760 static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
761 {
762     bool did_flush = false;
763 
764     mmap_lock();
765     /* If it is already been done on request of another CPU, just retry. */
766     if (tb_ctx.tb_flush_count != tb_flush_count.host_int) {
767         goto done;
768     }
769     did_flush = true;
770 
771     CPU_FOREACH(cpu) {
772         tcg_flush_jmp_cache(cpu);
773     }
774 
775     qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
776     tb_remove_all();
777 
778     tcg_region_reset_all();
779     /* XXX: flush processor icache at this point if cache flush is expensive */
780     qatomic_inc(&tb_ctx.tb_flush_count);
781 
782 done:
783     mmap_unlock();
784     if (did_flush) {
785         qemu_plugin_flush_cb();
786     }
787 }
788 
tb_flush(CPUState * cpu)789 void tb_flush(CPUState *cpu)
790 {
791     if (tcg_enabled()) {
792         unsigned tb_flush_count = qatomic_read(&tb_ctx.tb_flush_count);
793 
794         if (cpu_in_serial_context(cpu)) {
795             do_tb_flush(cpu, RUN_ON_CPU_HOST_INT(tb_flush_count));
796         } else {
797             async_safe_run_on_cpu(cpu, do_tb_flush,
798                                   RUN_ON_CPU_HOST_INT(tb_flush_count));
799         }
800     }
801 }
802 
803 /* remove @orig from its @n_orig-th jump list */
tb_remove_from_jmp_list(TranslationBlock * orig,int n_orig)804 static inline void tb_remove_from_jmp_list(TranslationBlock *orig, int n_orig)
805 {
806     uintptr_t ptr, ptr_locked;
807     TranslationBlock *dest;
808     TranslationBlock *tb;
809     uintptr_t *pprev;
810     int n;
811 
812     /* mark the LSB of jmp_dest[] so that no further jumps can be inserted */
813     ptr = qatomic_or_fetch(&orig->jmp_dest[n_orig], 1);
814     dest = (TranslationBlock *)(ptr & ~1);
815     if (dest == NULL) {
816         return;
817     }
818 
819     qemu_spin_lock(&dest->jmp_lock);
820     /*
821      * While acquiring the lock, the jump might have been removed if the
822      * destination TB was invalidated; check again.
823      */
824     ptr_locked = qatomic_read(&orig->jmp_dest[n_orig]);
825     if (ptr_locked != ptr) {
826         qemu_spin_unlock(&dest->jmp_lock);
827         /*
828          * The only possibility is that the jump was unlinked via
829          * tb_jump_unlink(dest). Seeing here another destination would be a bug,
830          * because we set the LSB above.
831          */
832         g_assert(ptr_locked == 1 && dest->cflags & CF_INVALID);
833         return;
834     }
835     /*
836      * We first acquired the lock, and since the destination pointer matches,
837      * we know for sure that @orig is in the jmp list.
838      */
839     pprev = &dest->jmp_list_head;
840     TB_FOR_EACH_JMP(dest, tb, n) {
841         if (tb == orig && n == n_orig) {
842             *pprev = tb->jmp_list_next[n];
843             /* no need to set orig->jmp_dest[n]; setting the LSB was enough */
844             qemu_spin_unlock(&dest->jmp_lock);
845             return;
846         }
847         pprev = &tb->jmp_list_next[n];
848     }
849     g_assert_not_reached();
850 }
851 
852 /*
853  * Reset the jump entry 'n' of a TB so that it is not chained to another TB.
854  */
tb_reset_jump(TranslationBlock * tb,int n)855 void tb_reset_jump(TranslationBlock *tb, int n)
856 {
857     uintptr_t addr = (uintptr_t)(tb->tc.ptr + tb->jmp_reset_offset[n]);
858     tb_set_jmp_target(tb, n, addr);
859 }
860 
861 /* remove any jumps to the TB */
tb_jmp_unlink(TranslationBlock * dest)862 static inline void tb_jmp_unlink(TranslationBlock *dest)
863 {
864     TranslationBlock *tb;
865     int n;
866 
867     qemu_spin_lock(&dest->jmp_lock);
868 
869     TB_FOR_EACH_JMP(dest, tb, n) {
870         tb_reset_jump(tb, n);
871         qatomic_and(&tb->jmp_dest[n], (uintptr_t)NULL | 1);
872         /* No need to clear the list entry; setting the dest ptr is enough */
873     }
874     dest->jmp_list_head = (uintptr_t)NULL;
875 
876     qemu_spin_unlock(&dest->jmp_lock);
877 }
878 
tb_jmp_cache_inval_tb(TranslationBlock * tb)879 static void tb_jmp_cache_inval_tb(TranslationBlock *tb)
880 {
881     CPUState *cpu;
882 
883     if (tb_cflags(tb) & CF_PCREL) {
884         /* A TB may be at any virtual address */
885         CPU_FOREACH(cpu) {
886             tcg_flush_jmp_cache(cpu);
887         }
888     } else {
889         uint32_t h = tb_jmp_cache_hash_func(tb->pc);
890 
891         CPU_FOREACH(cpu) {
892             CPUJumpCache *jc = cpu->tb_jmp_cache;
893 
894             if (qatomic_read(&jc->array[h].tb) == tb) {
895                 qatomic_set(&jc->array[h].tb, NULL);
896             }
897         }
898     }
899 }
900 
901 /*
902  * In user-mode, call with mmap_lock held.
903  * In !user-mode, if @rm_from_page_list is set, call with the TB's pages'
904  * locks held.
905  */
do_tb_phys_invalidate(TranslationBlock * tb,bool rm_from_page_list)906 static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
907 {
908     uint32_t h;
909     tb_page_addr_t phys_pc;
910     uint32_t orig_cflags = tb_cflags(tb);
911 
912     assert_memory_lock();
913 
914     /* make sure no further incoming jumps will be chained to this TB */
915     qemu_spin_lock(&tb->jmp_lock);
916     qatomic_set(&tb->cflags, tb->cflags | CF_INVALID);
917     qemu_spin_unlock(&tb->jmp_lock);
918 
919     /* remove the TB from the hash list */
920     phys_pc = tb_page_addr0(tb);
921     h = tb_hash_func(phys_pc, (orig_cflags & CF_PCREL ? 0 : tb->pc),
922                      tb->flags, tb->cs_base, orig_cflags);
923     if (!qht_remove(&tb_ctx.htable, tb, h)) {
924         return;
925     }
926 
927     /* remove the TB from the page list */
928     if (rm_from_page_list) {
929         tb_remove(tb);
930     }
931 
932     /* remove the TB from the hash list */
933     tb_jmp_cache_inval_tb(tb);
934 
935     /* suppress this TB from the two jump lists */
936     tb_remove_from_jmp_list(tb, 0);
937     tb_remove_from_jmp_list(tb, 1);
938 
939     /* suppress any remaining jumps to this TB */
940     tb_jmp_unlink(tb);
941 
942     qatomic_set(&tb_ctx.tb_phys_invalidate_count,
943                 tb_ctx.tb_phys_invalidate_count + 1);
944 }
945 
tb_phys_invalidate__locked(TranslationBlock * tb)946 static void tb_phys_invalidate__locked(TranslationBlock *tb)
947 {
948     qemu_thread_jit_write();
949     do_tb_phys_invalidate(tb, true);
950     qemu_thread_jit_execute();
951 }
952 
953 /*
954  * Invalidate one TB.
955  * Called with mmap_lock held in user-mode.
956  */
tb_phys_invalidate(TranslationBlock * tb,tb_page_addr_t page_addr)957 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
958 {
959     if (page_addr == -1 && tb_page_addr0(tb) != -1) {
960         tb_lock_pages(tb);
961         do_tb_phys_invalidate(tb, true);
962         tb_unlock_pages(tb);
963     } else {
964         do_tb_phys_invalidate(tb, false);
965     }
966 }
967 
968 /*
969  * Add a new TB and link it to the physical page tables.
970  * Called with mmap_lock held for user-mode emulation.
971  *
972  * Returns a pointer @tb, or a pointer to an existing TB that matches @tb.
973  * Note that in !user-mode, another thread might have already added a TB
974  * for the same block of guest code that @tb corresponds to. In that case,
975  * the caller should discard the original @tb, and use instead the returned TB.
976  */
tb_link_page(TranslationBlock * tb)977 TranslationBlock *tb_link_page(TranslationBlock *tb)
978 {
979     void *existing_tb = NULL;
980     uint32_t h;
981 
982     assert_memory_lock();
983     tcg_debug_assert(!(tb->cflags & CF_INVALID));
984 
985     tb_record(tb);
986 
987     /* add in the hash table */
988     h = tb_hash_func(tb_page_addr0(tb), (tb->cflags & CF_PCREL ? 0 : tb->pc),
989                      tb->flags, tb->cs_base, tb->cflags);
990     qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
991 
992     /* remove TB from the page(s) if we couldn't insert it */
993     if (unlikely(existing_tb)) {
994         tb_remove(tb);
995         tb_unlock_pages(tb);
996         return existing_tb;
997     }
998 
999     tb_unlock_pages(tb);
1000     return tb;
1001 }
1002 
1003 #ifdef CONFIG_USER_ONLY
1004 /*
1005  * Invalidate all TBs which intersect with the target address range.
1006  * Called with mmap_lock held for user-mode emulation.
1007  * NOTE: this function must not be called while a TB is running.
1008  */
tb_invalidate_phys_range(CPUState * cpu,tb_page_addr_t start,tb_page_addr_t last)1009 void tb_invalidate_phys_range(CPUState *cpu, tb_page_addr_t start,
1010                               tb_page_addr_t last)
1011 {
1012     TranslationBlock *tb;
1013     PageForEachNext n;
1014 
1015     assert_memory_lock();
1016 
1017     PAGE_FOR_EACH_TB(start, last, unused, tb, n) {
1018         tb_phys_invalidate__locked(tb);
1019     }
1020 }
1021 
1022 /*
1023  * Invalidate all TBs which intersect with the target address page @addr.
1024  * Called with mmap_lock held for user-mode emulation
1025  * NOTE: this function must not be called while a TB is running.
1026  */
tb_invalidate_phys_page(tb_page_addr_t addr)1027 static void tb_invalidate_phys_page(tb_page_addr_t addr)
1028 {
1029     tb_page_addr_t start, last;
1030 
1031     start = addr & TARGET_PAGE_MASK;
1032     last = addr | ~TARGET_PAGE_MASK;
1033     tb_invalidate_phys_range(NULL, start, last);
1034 }
1035 
1036 /*
1037  * Called with mmap_lock held. If pc is not 0 then it indicates the
1038  * host PC of the faulting store instruction that caused this invalidate.
1039  * Returns true if the caller needs to abort execution of the current TB.
1040  */
tb_invalidate_phys_page_unwind(CPUState * cpu,tb_page_addr_t addr,uintptr_t pc)1041 bool tb_invalidate_phys_page_unwind(CPUState *cpu, tb_page_addr_t addr,
1042                                     uintptr_t pc)
1043 {
1044     TranslationBlock *current_tb;
1045     bool current_tb_modified;
1046     TranslationBlock *tb;
1047     PageForEachNext n;
1048     tb_page_addr_t last;
1049 
1050     /*
1051      * Without precise smc semantics, or when outside of a TB,
1052      * we can skip to invalidate.
1053      */
1054     if (!pc || !cpu || !cpu->cc->tcg_ops->precise_smc) {
1055         tb_invalidate_phys_page(addr);
1056         return false;
1057     }
1058 
1059     assert_memory_lock();
1060     current_tb = tcg_tb_lookup(pc);
1061 
1062     last = addr | ~TARGET_PAGE_MASK;
1063     addr &= TARGET_PAGE_MASK;
1064     current_tb_modified = false;
1065 
1066     PAGE_FOR_EACH_TB(addr, last, unused, tb, n) {
1067         if (current_tb == tb &&
1068             (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
1069             /*
1070              * If we are modifying the current TB, we must stop its
1071              * execution. We could be more precise by checking that
1072              * the modification is after the current PC, but it would
1073              * require a specialized function to partially restore
1074              * the CPU state.
1075              */
1076             current_tb_modified = true;
1077             cpu_restore_state_from_tb(cpu, current_tb, pc);
1078         }
1079         tb_phys_invalidate__locked(tb);
1080     }
1081 
1082     if (current_tb_modified) {
1083         /* Force execution of one insn next time.  */
1084         cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(cpu);
1085         return true;
1086     }
1087     return false;
1088 }
1089 #else
1090 /*
1091  * @p must be non-NULL.
1092  * Call with all @pages locked.
1093  * (@cpu, @retaddr) may be (NULL, 0) outside of a cpu context,
1094  * in which case precise_smc need not be detected.
1095  */
1096 static void
tb_invalidate_phys_page_range__locked(CPUState * cpu,struct page_collection * pages,PageDesc * p,tb_page_addr_t start,tb_page_addr_t last,uintptr_t retaddr)1097 tb_invalidate_phys_page_range__locked(CPUState *cpu,
1098                                       struct page_collection *pages,
1099                                       PageDesc *p, tb_page_addr_t start,
1100                                       tb_page_addr_t last,
1101                                       uintptr_t retaddr)
1102 {
1103     TranslationBlock *tb;
1104     PageForEachNext n;
1105     bool current_tb_modified = false;
1106     TranslationBlock *current_tb = NULL;
1107 
1108     /* Range may not cross a page. */
1109     tcg_debug_assert(((start ^ last) & TARGET_PAGE_MASK) == 0);
1110 
1111     if (retaddr && cpu && cpu->cc->tcg_ops->precise_smc) {
1112         current_tb = tcg_tb_lookup(retaddr);
1113     }
1114 
1115     /*
1116      * We remove all the TBs in the range [start, last].
1117      * XXX: see if in some cases it could be faster to invalidate all the code
1118      */
1119     PAGE_FOR_EACH_TB(start, last, p, tb, n) {
1120         tb_page_addr_t tb_start, tb_last;
1121 
1122         /* NOTE: this is subtle as a TB may span two physical pages */
1123         tb_start = tb_page_addr0(tb);
1124         tb_last = tb_start + tb->size - 1;
1125         if (n == 0) {
1126             tb_last = MIN(tb_last, tb_start | ~TARGET_PAGE_MASK);
1127         } else {
1128             tb_start = tb_page_addr1(tb);
1129             tb_last = tb_start + (tb_last & ~TARGET_PAGE_MASK);
1130         }
1131         if (!(tb_last < start || tb_start > last)) {
1132             if (unlikely(current_tb == tb) &&
1133                 (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
1134                 /*
1135                  * If we are modifying the current TB, we must stop
1136                  * its execution. We could be more precise by checking
1137                  * that the modification is after the current PC, but it
1138                  * would require a specialized function to partially
1139                  * restore the CPU state.
1140                  */
1141                 current_tb_modified = true;
1142                 cpu_restore_state_from_tb(cpu, current_tb, retaddr);
1143             }
1144             tb_phys_invalidate__locked(tb);
1145         }
1146     }
1147 
1148     /* if no code remaining, no need to continue to use slow writes */
1149     if (!p->first_tb) {
1150         tlb_unprotect_code(start);
1151     }
1152 
1153     if (unlikely(current_tb_modified)) {
1154         page_collection_unlock(pages);
1155         /* Force execution of one insn next time.  */
1156         cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(cpu);
1157         mmap_unlock();
1158         cpu_loop_exit_noexc(cpu);
1159     }
1160 }
1161 
1162 /*
1163  * Invalidate all TBs which intersect with the target physical address range
1164  * [start;last]. NOTE: start and end may refer to *different* physical pages.
1165  * 'is_cpu_write_access' should be true if called from a real cpu write
1166  * access: the virtual CPU will exit the current TB if code is modified inside
1167  * this TB.
1168  */
tb_invalidate_phys_range(CPUState * cpu,tb_page_addr_t start,tb_page_addr_t last)1169 void tb_invalidate_phys_range(CPUState *cpu, tb_page_addr_t start,
1170                               tb_page_addr_t last)
1171 {
1172     struct page_collection *pages;
1173     tb_page_addr_t index, index_last;
1174 
1175     pages = page_collection_lock(start, last);
1176 
1177     index_last = last >> TARGET_PAGE_BITS;
1178     for (index = start >> TARGET_PAGE_BITS; index <= index_last; index++) {
1179         PageDesc *pd = page_find(index);
1180         tb_page_addr_t page_start, page_last;
1181 
1182         if (pd == NULL) {
1183             continue;
1184         }
1185         assert_page_locked(pd);
1186         page_start = index << TARGET_PAGE_BITS;
1187         page_last = page_start | ~TARGET_PAGE_MASK;
1188         page_last = MIN(page_last, last);
1189         tb_invalidate_phys_page_range__locked(cpu, pages, pd,
1190                                               page_start, page_last, 0);
1191     }
1192     page_collection_unlock(pages);
1193 }
1194 
1195 /*
1196  * len must be <= 8 and start must be a multiple of len.
1197  * Called via softmmu_template.h when code areas are written to with
1198  * iothread mutex not held.
1199  */
tb_invalidate_phys_range_fast(CPUState * cpu,ram_addr_t start,unsigned len,uintptr_t ra)1200 void tb_invalidate_phys_range_fast(CPUState *cpu, ram_addr_t start,
1201                                    unsigned len, uintptr_t ra)
1202 {
1203     PageDesc *p = page_find(start >> TARGET_PAGE_BITS);
1204 
1205     if (p) {
1206         ram_addr_t last = start + len - 1;
1207         struct page_collection *pages = page_collection_lock(start, last);
1208 
1209         tb_invalidate_phys_page_range__locked(cpu, pages, p,
1210                                               start, last, ra);
1211         page_collection_unlock(pages);
1212     }
1213 }
1214 
1215 #endif /* CONFIG_USER_ONLY */
1216