1*5ff7258cSRichard Henderson /* 2*5ff7258cSRichard Henderson * Memory region management for Tiny Code Generator for QEMU 3*5ff7258cSRichard Henderson * 4*5ff7258cSRichard Henderson * Copyright (c) 2008 Fabrice Bellard 5*5ff7258cSRichard Henderson * 6*5ff7258cSRichard Henderson * Permission is hereby granted, free of charge, to any person obtaining a copy 7*5ff7258cSRichard Henderson * of this software and associated documentation files (the "Software"), to deal 8*5ff7258cSRichard Henderson * in the Software without restriction, including without limitation the rights 9*5ff7258cSRichard Henderson * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10*5ff7258cSRichard Henderson * copies of the Software, and to permit persons to whom the Software is 11*5ff7258cSRichard Henderson * furnished to do so, subject to the following conditions: 12*5ff7258cSRichard Henderson * 13*5ff7258cSRichard Henderson * The above copyright notice and this permission notice shall be included in 14*5ff7258cSRichard Henderson * all copies or substantial portions of the Software. 15*5ff7258cSRichard Henderson * 16*5ff7258cSRichard Henderson * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17*5ff7258cSRichard Henderson * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18*5ff7258cSRichard Henderson * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19*5ff7258cSRichard Henderson * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20*5ff7258cSRichard Henderson * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21*5ff7258cSRichard Henderson * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22*5ff7258cSRichard Henderson * THE SOFTWARE. 23*5ff7258cSRichard Henderson */ 24*5ff7258cSRichard Henderson 25*5ff7258cSRichard Henderson #include "qemu/osdep.h" 26*5ff7258cSRichard Henderson #include "exec/exec-all.h" 27*5ff7258cSRichard Henderson #include "tcg/tcg.h" 28*5ff7258cSRichard Henderson #if !defined(CONFIG_USER_ONLY) 29*5ff7258cSRichard Henderson #include "hw/boards.h" 30*5ff7258cSRichard Henderson #endif 31*5ff7258cSRichard Henderson #include "tcg-internal.h" 32*5ff7258cSRichard Henderson 33*5ff7258cSRichard Henderson 34*5ff7258cSRichard Henderson struct tcg_region_tree { 35*5ff7258cSRichard Henderson QemuMutex lock; 36*5ff7258cSRichard Henderson GTree *tree; 37*5ff7258cSRichard Henderson /* padding to avoid false sharing is computed at run-time */ 38*5ff7258cSRichard Henderson }; 39*5ff7258cSRichard Henderson 40*5ff7258cSRichard Henderson /* 41*5ff7258cSRichard Henderson * We divide code_gen_buffer into equally-sized "regions" that TCG threads 42*5ff7258cSRichard Henderson * dynamically allocate from as demand dictates. Given appropriate region 43*5ff7258cSRichard Henderson * sizing, this minimizes flushes even when some TCG threads generate a lot 44*5ff7258cSRichard Henderson * more code than others. 45*5ff7258cSRichard Henderson */ 46*5ff7258cSRichard Henderson struct tcg_region_state { 47*5ff7258cSRichard Henderson QemuMutex lock; 48*5ff7258cSRichard Henderson 49*5ff7258cSRichard Henderson /* fields set at init time */ 50*5ff7258cSRichard Henderson void *start; 51*5ff7258cSRichard Henderson void *start_aligned; 52*5ff7258cSRichard Henderson void *end; 53*5ff7258cSRichard Henderson size_t n; 54*5ff7258cSRichard Henderson size_t size; /* size of one region */ 55*5ff7258cSRichard Henderson size_t stride; /* .size + guard size */ 56*5ff7258cSRichard Henderson 57*5ff7258cSRichard Henderson /* fields protected by the lock */ 58*5ff7258cSRichard Henderson size_t current; /* current region index */ 59*5ff7258cSRichard Henderson size_t agg_size_full; /* aggregate size of full regions */ 60*5ff7258cSRichard Henderson }; 61*5ff7258cSRichard Henderson 62*5ff7258cSRichard Henderson static struct tcg_region_state region; 63*5ff7258cSRichard Henderson 64*5ff7258cSRichard Henderson /* 65*5ff7258cSRichard Henderson * This is an array of struct tcg_region_tree's, with padding. 66*5ff7258cSRichard Henderson * We use void * to simplify the computation of region_trees[i]; each 67*5ff7258cSRichard Henderson * struct is found every tree_size bytes. 68*5ff7258cSRichard Henderson */ 69*5ff7258cSRichard Henderson static void *region_trees; 70*5ff7258cSRichard Henderson static size_t tree_size; 71*5ff7258cSRichard Henderson 72*5ff7258cSRichard Henderson /* compare a pointer @ptr and a tb_tc @s */ 73*5ff7258cSRichard Henderson static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s) 74*5ff7258cSRichard Henderson { 75*5ff7258cSRichard Henderson if (ptr >= s->ptr + s->size) { 76*5ff7258cSRichard Henderson return 1; 77*5ff7258cSRichard Henderson } else if (ptr < s->ptr) { 78*5ff7258cSRichard Henderson return -1; 79*5ff7258cSRichard Henderson } 80*5ff7258cSRichard Henderson return 0; 81*5ff7258cSRichard Henderson } 82*5ff7258cSRichard Henderson 83*5ff7258cSRichard Henderson static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp) 84*5ff7258cSRichard Henderson { 85*5ff7258cSRichard Henderson const struct tb_tc *a = ap; 86*5ff7258cSRichard Henderson const struct tb_tc *b = bp; 87*5ff7258cSRichard Henderson 88*5ff7258cSRichard Henderson /* 89*5ff7258cSRichard Henderson * When both sizes are set, we know this isn't a lookup. 90*5ff7258cSRichard Henderson * This is the most likely case: every TB must be inserted; lookups 91*5ff7258cSRichard Henderson * are a lot less frequent. 92*5ff7258cSRichard Henderson */ 93*5ff7258cSRichard Henderson if (likely(a->size && b->size)) { 94*5ff7258cSRichard Henderson if (a->ptr > b->ptr) { 95*5ff7258cSRichard Henderson return 1; 96*5ff7258cSRichard Henderson } else if (a->ptr < b->ptr) { 97*5ff7258cSRichard Henderson return -1; 98*5ff7258cSRichard Henderson } 99*5ff7258cSRichard Henderson /* a->ptr == b->ptr should happen only on deletions */ 100*5ff7258cSRichard Henderson g_assert(a->size == b->size); 101*5ff7258cSRichard Henderson return 0; 102*5ff7258cSRichard Henderson } 103*5ff7258cSRichard Henderson /* 104*5ff7258cSRichard Henderson * All lookups have either .size field set to 0. 105*5ff7258cSRichard Henderson * From the glib sources we see that @ap is always the lookup key. However 106*5ff7258cSRichard Henderson * the docs provide no guarantee, so we just mark this case as likely. 107*5ff7258cSRichard Henderson */ 108*5ff7258cSRichard Henderson if (likely(a->size == 0)) { 109*5ff7258cSRichard Henderson return ptr_cmp_tb_tc(a->ptr, b); 110*5ff7258cSRichard Henderson } 111*5ff7258cSRichard Henderson return ptr_cmp_tb_tc(b->ptr, a); 112*5ff7258cSRichard Henderson } 113*5ff7258cSRichard Henderson 114*5ff7258cSRichard Henderson static void tcg_region_trees_init(void) 115*5ff7258cSRichard Henderson { 116*5ff7258cSRichard Henderson size_t i; 117*5ff7258cSRichard Henderson 118*5ff7258cSRichard Henderson tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize); 119*5ff7258cSRichard Henderson region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size); 120*5ff7258cSRichard Henderson for (i = 0; i < region.n; i++) { 121*5ff7258cSRichard Henderson struct tcg_region_tree *rt = region_trees + i * tree_size; 122*5ff7258cSRichard Henderson 123*5ff7258cSRichard Henderson qemu_mutex_init(&rt->lock); 124*5ff7258cSRichard Henderson rt->tree = g_tree_new(tb_tc_cmp); 125*5ff7258cSRichard Henderson } 126*5ff7258cSRichard Henderson } 127*5ff7258cSRichard Henderson 128*5ff7258cSRichard Henderson static struct tcg_region_tree *tc_ptr_to_region_tree(const void *p) 129*5ff7258cSRichard Henderson { 130*5ff7258cSRichard Henderson size_t region_idx; 131*5ff7258cSRichard Henderson 132*5ff7258cSRichard Henderson /* 133*5ff7258cSRichard Henderson * Like tcg_splitwx_to_rw, with no assert. The pc may come from 134*5ff7258cSRichard Henderson * a signal handler over which the caller has no control. 135*5ff7258cSRichard Henderson */ 136*5ff7258cSRichard Henderson if (!in_code_gen_buffer(p)) { 137*5ff7258cSRichard Henderson p -= tcg_splitwx_diff; 138*5ff7258cSRichard Henderson if (!in_code_gen_buffer(p)) { 139*5ff7258cSRichard Henderson return NULL; 140*5ff7258cSRichard Henderson } 141*5ff7258cSRichard Henderson } 142*5ff7258cSRichard Henderson 143*5ff7258cSRichard Henderson if (p < region.start_aligned) { 144*5ff7258cSRichard Henderson region_idx = 0; 145*5ff7258cSRichard Henderson } else { 146*5ff7258cSRichard Henderson ptrdiff_t offset = p - region.start_aligned; 147*5ff7258cSRichard Henderson 148*5ff7258cSRichard Henderson if (offset > region.stride * (region.n - 1)) { 149*5ff7258cSRichard Henderson region_idx = region.n - 1; 150*5ff7258cSRichard Henderson } else { 151*5ff7258cSRichard Henderson region_idx = offset / region.stride; 152*5ff7258cSRichard Henderson } 153*5ff7258cSRichard Henderson } 154*5ff7258cSRichard Henderson return region_trees + region_idx * tree_size; 155*5ff7258cSRichard Henderson } 156*5ff7258cSRichard Henderson 157*5ff7258cSRichard Henderson void tcg_tb_insert(TranslationBlock *tb) 158*5ff7258cSRichard Henderson { 159*5ff7258cSRichard Henderson struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr); 160*5ff7258cSRichard Henderson 161*5ff7258cSRichard Henderson g_assert(rt != NULL); 162*5ff7258cSRichard Henderson qemu_mutex_lock(&rt->lock); 163*5ff7258cSRichard Henderson g_tree_insert(rt->tree, &tb->tc, tb); 164*5ff7258cSRichard Henderson qemu_mutex_unlock(&rt->lock); 165*5ff7258cSRichard Henderson } 166*5ff7258cSRichard Henderson 167*5ff7258cSRichard Henderson void tcg_tb_remove(TranslationBlock *tb) 168*5ff7258cSRichard Henderson { 169*5ff7258cSRichard Henderson struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr); 170*5ff7258cSRichard Henderson 171*5ff7258cSRichard Henderson g_assert(rt != NULL); 172*5ff7258cSRichard Henderson qemu_mutex_lock(&rt->lock); 173*5ff7258cSRichard Henderson g_tree_remove(rt->tree, &tb->tc); 174*5ff7258cSRichard Henderson qemu_mutex_unlock(&rt->lock); 175*5ff7258cSRichard Henderson } 176*5ff7258cSRichard Henderson 177*5ff7258cSRichard Henderson /* 178*5ff7258cSRichard Henderson * Find the TB 'tb' such that 179*5ff7258cSRichard Henderson * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size 180*5ff7258cSRichard Henderson * Return NULL if not found. 181*5ff7258cSRichard Henderson */ 182*5ff7258cSRichard Henderson TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr) 183*5ff7258cSRichard Henderson { 184*5ff7258cSRichard Henderson struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr); 185*5ff7258cSRichard Henderson TranslationBlock *tb; 186*5ff7258cSRichard Henderson struct tb_tc s = { .ptr = (void *)tc_ptr }; 187*5ff7258cSRichard Henderson 188*5ff7258cSRichard Henderson if (rt == NULL) { 189*5ff7258cSRichard Henderson return NULL; 190*5ff7258cSRichard Henderson } 191*5ff7258cSRichard Henderson 192*5ff7258cSRichard Henderson qemu_mutex_lock(&rt->lock); 193*5ff7258cSRichard Henderson tb = g_tree_lookup(rt->tree, &s); 194*5ff7258cSRichard Henderson qemu_mutex_unlock(&rt->lock); 195*5ff7258cSRichard Henderson return tb; 196*5ff7258cSRichard Henderson } 197*5ff7258cSRichard Henderson 198*5ff7258cSRichard Henderson static void tcg_region_tree_lock_all(void) 199*5ff7258cSRichard Henderson { 200*5ff7258cSRichard Henderson size_t i; 201*5ff7258cSRichard Henderson 202*5ff7258cSRichard Henderson for (i = 0; i < region.n; i++) { 203*5ff7258cSRichard Henderson struct tcg_region_tree *rt = region_trees + i * tree_size; 204*5ff7258cSRichard Henderson 205*5ff7258cSRichard Henderson qemu_mutex_lock(&rt->lock); 206*5ff7258cSRichard Henderson } 207*5ff7258cSRichard Henderson } 208*5ff7258cSRichard Henderson 209*5ff7258cSRichard Henderson static void tcg_region_tree_unlock_all(void) 210*5ff7258cSRichard Henderson { 211*5ff7258cSRichard Henderson size_t i; 212*5ff7258cSRichard Henderson 213*5ff7258cSRichard Henderson for (i = 0; i < region.n; i++) { 214*5ff7258cSRichard Henderson struct tcg_region_tree *rt = region_trees + i * tree_size; 215*5ff7258cSRichard Henderson 216*5ff7258cSRichard Henderson qemu_mutex_unlock(&rt->lock); 217*5ff7258cSRichard Henderson } 218*5ff7258cSRichard Henderson } 219*5ff7258cSRichard Henderson 220*5ff7258cSRichard Henderson void tcg_tb_foreach(GTraverseFunc func, gpointer user_data) 221*5ff7258cSRichard Henderson { 222*5ff7258cSRichard Henderson size_t i; 223*5ff7258cSRichard Henderson 224*5ff7258cSRichard Henderson tcg_region_tree_lock_all(); 225*5ff7258cSRichard Henderson for (i = 0; i < region.n; i++) { 226*5ff7258cSRichard Henderson struct tcg_region_tree *rt = region_trees + i * tree_size; 227*5ff7258cSRichard Henderson 228*5ff7258cSRichard Henderson g_tree_foreach(rt->tree, func, user_data); 229*5ff7258cSRichard Henderson } 230*5ff7258cSRichard Henderson tcg_region_tree_unlock_all(); 231*5ff7258cSRichard Henderson } 232*5ff7258cSRichard Henderson 233*5ff7258cSRichard Henderson size_t tcg_nb_tbs(void) 234*5ff7258cSRichard Henderson { 235*5ff7258cSRichard Henderson size_t nb_tbs = 0; 236*5ff7258cSRichard Henderson size_t i; 237*5ff7258cSRichard Henderson 238*5ff7258cSRichard Henderson tcg_region_tree_lock_all(); 239*5ff7258cSRichard Henderson for (i = 0; i < region.n; i++) { 240*5ff7258cSRichard Henderson struct tcg_region_tree *rt = region_trees + i * tree_size; 241*5ff7258cSRichard Henderson 242*5ff7258cSRichard Henderson nb_tbs += g_tree_nnodes(rt->tree); 243*5ff7258cSRichard Henderson } 244*5ff7258cSRichard Henderson tcg_region_tree_unlock_all(); 245*5ff7258cSRichard Henderson return nb_tbs; 246*5ff7258cSRichard Henderson } 247*5ff7258cSRichard Henderson 248*5ff7258cSRichard Henderson static gboolean tcg_region_tree_traverse(gpointer k, gpointer v, gpointer data) 249*5ff7258cSRichard Henderson { 250*5ff7258cSRichard Henderson TranslationBlock *tb = v; 251*5ff7258cSRichard Henderson 252*5ff7258cSRichard Henderson tb_destroy(tb); 253*5ff7258cSRichard Henderson return FALSE; 254*5ff7258cSRichard Henderson } 255*5ff7258cSRichard Henderson 256*5ff7258cSRichard Henderson static void tcg_region_tree_reset_all(void) 257*5ff7258cSRichard Henderson { 258*5ff7258cSRichard Henderson size_t i; 259*5ff7258cSRichard Henderson 260*5ff7258cSRichard Henderson tcg_region_tree_lock_all(); 261*5ff7258cSRichard Henderson for (i = 0; i < region.n; i++) { 262*5ff7258cSRichard Henderson struct tcg_region_tree *rt = region_trees + i * tree_size; 263*5ff7258cSRichard Henderson 264*5ff7258cSRichard Henderson g_tree_foreach(rt->tree, tcg_region_tree_traverse, NULL); 265*5ff7258cSRichard Henderson /* Increment the refcount first so that destroy acts as a reset */ 266*5ff7258cSRichard Henderson g_tree_ref(rt->tree); 267*5ff7258cSRichard Henderson g_tree_destroy(rt->tree); 268*5ff7258cSRichard Henderson } 269*5ff7258cSRichard Henderson tcg_region_tree_unlock_all(); 270*5ff7258cSRichard Henderson } 271*5ff7258cSRichard Henderson 272*5ff7258cSRichard Henderson static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend) 273*5ff7258cSRichard Henderson { 274*5ff7258cSRichard Henderson void *start, *end; 275*5ff7258cSRichard Henderson 276*5ff7258cSRichard Henderson start = region.start_aligned + curr_region * region.stride; 277*5ff7258cSRichard Henderson end = start + region.size; 278*5ff7258cSRichard Henderson 279*5ff7258cSRichard Henderson if (curr_region == 0) { 280*5ff7258cSRichard Henderson start = region.start; 281*5ff7258cSRichard Henderson } 282*5ff7258cSRichard Henderson if (curr_region == region.n - 1) { 283*5ff7258cSRichard Henderson end = region.end; 284*5ff7258cSRichard Henderson } 285*5ff7258cSRichard Henderson 286*5ff7258cSRichard Henderson *pstart = start; 287*5ff7258cSRichard Henderson *pend = end; 288*5ff7258cSRichard Henderson } 289*5ff7258cSRichard Henderson 290*5ff7258cSRichard Henderson static void tcg_region_assign(TCGContext *s, size_t curr_region) 291*5ff7258cSRichard Henderson { 292*5ff7258cSRichard Henderson void *start, *end; 293*5ff7258cSRichard Henderson 294*5ff7258cSRichard Henderson tcg_region_bounds(curr_region, &start, &end); 295*5ff7258cSRichard Henderson 296*5ff7258cSRichard Henderson s->code_gen_buffer = start; 297*5ff7258cSRichard Henderson s->code_gen_ptr = start; 298*5ff7258cSRichard Henderson s->code_gen_buffer_size = end - start; 299*5ff7258cSRichard Henderson s->code_gen_highwater = end - TCG_HIGHWATER; 300*5ff7258cSRichard Henderson } 301*5ff7258cSRichard Henderson 302*5ff7258cSRichard Henderson static bool tcg_region_alloc__locked(TCGContext *s) 303*5ff7258cSRichard Henderson { 304*5ff7258cSRichard Henderson if (region.current == region.n) { 305*5ff7258cSRichard Henderson return true; 306*5ff7258cSRichard Henderson } 307*5ff7258cSRichard Henderson tcg_region_assign(s, region.current); 308*5ff7258cSRichard Henderson region.current++; 309*5ff7258cSRichard Henderson return false; 310*5ff7258cSRichard Henderson } 311*5ff7258cSRichard Henderson 312*5ff7258cSRichard Henderson /* 313*5ff7258cSRichard Henderson * Request a new region once the one in use has filled up. 314*5ff7258cSRichard Henderson * Returns true on error. 315*5ff7258cSRichard Henderson */ 316*5ff7258cSRichard Henderson bool tcg_region_alloc(TCGContext *s) 317*5ff7258cSRichard Henderson { 318*5ff7258cSRichard Henderson bool err; 319*5ff7258cSRichard Henderson /* read the region size now; alloc__locked will overwrite it on success */ 320*5ff7258cSRichard Henderson size_t size_full = s->code_gen_buffer_size; 321*5ff7258cSRichard Henderson 322*5ff7258cSRichard Henderson qemu_mutex_lock(®ion.lock); 323*5ff7258cSRichard Henderson err = tcg_region_alloc__locked(s); 324*5ff7258cSRichard Henderson if (!err) { 325*5ff7258cSRichard Henderson region.agg_size_full += size_full - TCG_HIGHWATER; 326*5ff7258cSRichard Henderson } 327*5ff7258cSRichard Henderson qemu_mutex_unlock(®ion.lock); 328*5ff7258cSRichard Henderson return err; 329*5ff7258cSRichard Henderson } 330*5ff7258cSRichard Henderson 331*5ff7258cSRichard Henderson /* 332*5ff7258cSRichard Henderson * Perform a context's first region allocation. 333*5ff7258cSRichard Henderson * This function does _not_ increment region.agg_size_full. 334*5ff7258cSRichard Henderson */ 335*5ff7258cSRichard Henderson static void tcg_region_initial_alloc__locked(TCGContext *s) 336*5ff7258cSRichard Henderson { 337*5ff7258cSRichard Henderson bool err = tcg_region_alloc__locked(s); 338*5ff7258cSRichard Henderson g_assert(!err); 339*5ff7258cSRichard Henderson } 340*5ff7258cSRichard Henderson 341*5ff7258cSRichard Henderson void tcg_region_initial_alloc(TCGContext *s) 342*5ff7258cSRichard Henderson { 343*5ff7258cSRichard Henderson qemu_mutex_lock(®ion.lock); 344*5ff7258cSRichard Henderson tcg_region_initial_alloc__locked(s); 345*5ff7258cSRichard Henderson qemu_mutex_unlock(®ion.lock); 346*5ff7258cSRichard Henderson } 347*5ff7258cSRichard Henderson 348*5ff7258cSRichard Henderson /* Call from a safe-work context */ 349*5ff7258cSRichard Henderson void tcg_region_reset_all(void) 350*5ff7258cSRichard Henderson { 351*5ff7258cSRichard Henderson unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs); 352*5ff7258cSRichard Henderson unsigned int i; 353*5ff7258cSRichard Henderson 354*5ff7258cSRichard Henderson qemu_mutex_lock(®ion.lock); 355*5ff7258cSRichard Henderson region.current = 0; 356*5ff7258cSRichard Henderson region.agg_size_full = 0; 357*5ff7258cSRichard Henderson 358*5ff7258cSRichard Henderson for (i = 0; i < n_ctxs; i++) { 359*5ff7258cSRichard Henderson TCGContext *s = qatomic_read(&tcg_ctxs[i]); 360*5ff7258cSRichard Henderson tcg_region_initial_alloc__locked(s); 361*5ff7258cSRichard Henderson } 362*5ff7258cSRichard Henderson qemu_mutex_unlock(®ion.lock); 363*5ff7258cSRichard Henderson 364*5ff7258cSRichard Henderson tcg_region_tree_reset_all(); 365*5ff7258cSRichard Henderson } 366*5ff7258cSRichard Henderson 367*5ff7258cSRichard Henderson #ifdef CONFIG_USER_ONLY 368*5ff7258cSRichard Henderson static size_t tcg_n_regions(void) 369*5ff7258cSRichard Henderson { 370*5ff7258cSRichard Henderson return 1; 371*5ff7258cSRichard Henderson } 372*5ff7258cSRichard Henderson #else 373*5ff7258cSRichard Henderson /* 374*5ff7258cSRichard Henderson * It is likely that some vCPUs will translate more code than others, so we 375*5ff7258cSRichard Henderson * first try to set more regions than max_cpus, with those regions being of 376*5ff7258cSRichard Henderson * reasonable size. If that's not possible we make do by evenly dividing 377*5ff7258cSRichard Henderson * the code_gen_buffer among the vCPUs. 378*5ff7258cSRichard Henderson */ 379*5ff7258cSRichard Henderson static size_t tcg_n_regions(void) 380*5ff7258cSRichard Henderson { 381*5ff7258cSRichard Henderson size_t i; 382*5ff7258cSRichard Henderson 383*5ff7258cSRichard Henderson /* Use a single region if all we have is one vCPU thread */ 384*5ff7258cSRichard Henderson #if !defined(CONFIG_USER_ONLY) 385*5ff7258cSRichard Henderson MachineState *ms = MACHINE(qdev_get_machine()); 386*5ff7258cSRichard Henderson unsigned int max_cpus = ms->smp.max_cpus; 387*5ff7258cSRichard Henderson #endif 388*5ff7258cSRichard Henderson if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) { 389*5ff7258cSRichard Henderson return 1; 390*5ff7258cSRichard Henderson } 391*5ff7258cSRichard Henderson 392*5ff7258cSRichard Henderson /* Try to have more regions than max_cpus, with each region being >= 2 MB */ 393*5ff7258cSRichard Henderson for (i = 8; i > 0; i--) { 394*5ff7258cSRichard Henderson size_t regions_per_thread = i; 395*5ff7258cSRichard Henderson size_t region_size; 396*5ff7258cSRichard Henderson 397*5ff7258cSRichard Henderson region_size = tcg_init_ctx.code_gen_buffer_size; 398*5ff7258cSRichard Henderson region_size /= max_cpus * regions_per_thread; 399*5ff7258cSRichard Henderson 400*5ff7258cSRichard Henderson if (region_size >= 2 * 1024u * 1024) { 401*5ff7258cSRichard Henderson return max_cpus * regions_per_thread; 402*5ff7258cSRichard Henderson } 403*5ff7258cSRichard Henderson } 404*5ff7258cSRichard Henderson /* If we can't, then just allocate one region per vCPU thread */ 405*5ff7258cSRichard Henderson return max_cpus; 406*5ff7258cSRichard Henderson } 407*5ff7258cSRichard Henderson #endif 408*5ff7258cSRichard Henderson 409*5ff7258cSRichard Henderson /* 410*5ff7258cSRichard Henderson * Initializes region partitioning. 411*5ff7258cSRichard Henderson * 412*5ff7258cSRichard Henderson * Called at init time from the parent thread (i.e. the one calling 413*5ff7258cSRichard Henderson * tcg_context_init), after the target's TCG globals have been set. 414*5ff7258cSRichard Henderson * 415*5ff7258cSRichard Henderson * Region partitioning works by splitting code_gen_buffer into separate regions, 416*5ff7258cSRichard Henderson * and then assigning regions to TCG threads so that the threads can translate 417*5ff7258cSRichard Henderson * code in parallel without synchronization. 418*5ff7258cSRichard Henderson * 419*5ff7258cSRichard Henderson * In softmmu the number of TCG threads is bounded by max_cpus, so we use at 420*5ff7258cSRichard Henderson * least max_cpus regions in MTTCG. In !MTTCG we use a single region. 421*5ff7258cSRichard Henderson * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...]) 422*5ff7258cSRichard Henderson * must have been parsed before calling this function, since it calls 423*5ff7258cSRichard Henderson * qemu_tcg_mttcg_enabled(). 424*5ff7258cSRichard Henderson * 425*5ff7258cSRichard Henderson * In user-mode we use a single region. Having multiple regions in user-mode 426*5ff7258cSRichard Henderson * is not supported, because the number of vCPU threads (recall that each thread 427*5ff7258cSRichard Henderson * spawned by the guest corresponds to a vCPU thread) is only bounded by the 428*5ff7258cSRichard Henderson * OS, and usually this number is huge (tens of thousands is not uncommon). 429*5ff7258cSRichard Henderson * Thus, given this large bound on the number of vCPU threads and the fact 430*5ff7258cSRichard Henderson * that code_gen_buffer is allocated at compile-time, we cannot guarantee 431*5ff7258cSRichard Henderson * that the availability of at least one region per vCPU thread. 432*5ff7258cSRichard Henderson * 433*5ff7258cSRichard Henderson * However, this user-mode limitation is unlikely to be a significant problem 434*5ff7258cSRichard Henderson * in practice. Multi-threaded guests share most if not all of their translated 435*5ff7258cSRichard Henderson * code, which makes parallel code generation less appealing than in softmmu. 436*5ff7258cSRichard Henderson */ 437*5ff7258cSRichard Henderson void tcg_region_init(void) 438*5ff7258cSRichard Henderson { 439*5ff7258cSRichard Henderson void *buf = tcg_init_ctx.code_gen_buffer; 440*5ff7258cSRichard Henderson void *aligned; 441*5ff7258cSRichard Henderson size_t size = tcg_init_ctx.code_gen_buffer_size; 442*5ff7258cSRichard Henderson size_t page_size = qemu_real_host_page_size; 443*5ff7258cSRichard Henderson size_t region_size; 444*5ff7258cSRichard Henderson size_t n_regions; 445*5ff7258cSRichard Henderson size_t i; 446*5ff7258cSRichard Henderson 447*5ff7258cSRichard Henderson n_regions = tcg_n_regions(); 448*5ff7258cSRichard Henderson 449*5ff7258cSRichard Henderson /* The first region will be 'aligned - buf' bytes larger than the others */ 450*5ff7258cSRichard Henderson aligned = QEMU_ALIGN_PTR_UP(buf, page_size); 451*5ff7258cSRichard Henderson g_assert(aligned < tcg_init_ctx.code_gen_buffer + size); 452*5ff7258cSRichard Henderson /* 453*5ff7258cSRichard Henderson * Make region_size a multiple of page_size, using aligned as the start. 454*5ff7258cSRichard Henderson * As a result of this we might end up with a few extra pages at the end of 455*5ff7258cSRichard Henderson * the buffer; we will assign those to the last region. 456*5ff7258cSRichard Henderson */ 457*5ff7258cSRichard Henderson region_size = (size - (aligned - buf)) / n_regions; 458*5ff7258cSRichard Henderson region_size = QEMU_ALIGN_DOWN(region_size, page_size); 459*5ff7258cSRichard Henderson 460*5ff7258cSRichard Henderson /* A region must have at least 2 pages; one code, one guard */ 461*5ff7258cSRichard Henderson g_assert(region_size >= 2 * page_size); 462*5ff7258cSRichard Henderson 463*5ff7258cSRichard Henderson /* init the region struct */ 464*5ff7258cSRichard Henderson qemu_mutex_init(®ion.lock); 465*5ff7258cSRichard Henderson region.n = n_regions; 466*5ff7258cSRichard Henderson region.size = region_size - page_size; 467*5ff7258cSRichard Henderson region.stride = region_size; 468*5ff7258cSRichard Henderson region.start = buf; 469*5ff7258cSRichard Henderson region.start_aligned = aligned; 470*5ff7258cSRichard Henderson /* page-align the end, since its last page will be a guard page */ 471*5ff7258cSRichard Henderson region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size); 472*5ff7258cSRichard Henderson /* account for that last guard page */ 473*5ff7258cSRichard Henderson region.end -= page_size; 474*5ff7258cSRichard Henderson 475*5ff7258cSRichard Henderson /* 476*5ff7258cSRichard Henderson * Set guard pages in the rw buffer, as that's the one into which 477*5ff7258cSRichard Henderson * buffer overruns could occur. Do not set guard pages in the rx 478*5ff7258cSRichard Henderson * buffer -- let that one use hugepages throughout. 479*5ff7258cSRichard Henderson */ 480*5ff7258cSRichard Henderson for (i = 0; i < region.n; i++) { 481*5ff7258cSRichard Henderson void *start, *end; 482*5ff7258cSRichard Henderson 483*5ff7258cSRichard Henderson tcg_region_bounds(i, &start, &end); 484*5ff7258cSRichard Henderson 485*5ff7258cSRichard Henderson /* 486*5ff7258cSRichard Henderson * macOS 11.2 has a bug (Apple Feedback FB8994773) in which mprotect 487*5ff7258cSRichard Henderson * rejects a permission change from RWX -> NONE. Guard pages are 488*5ff7258cSRichard Henderson * nice for bug detection but are not essential; ignore any failure. 489*5ff7258cSRichard Henderson */ 490*5ff7258cSRichard Henderson (void)qemu_mprotect_none(end, page_size); 491*5ff7258cSRichard Henderson } 492*5ff7258cSRichard Henderson 493*5ff7258cSRichard Henderson tcg_region_trees_init(); 494*5ff7258cSRichard Henderson 495*5ff7258cSRichard Henderson /* 496*5ff7258cSRichard Henderson * Leave the initial context initialized to the first region. 497*5ff7258cSRichard Henderson * This will be the context into which we generate the prologue. 498*5ff7258cSRichard Henderson * It is also the only context for CONFIG_USER_ONLY. 499*5ff7258cSRichard Henderson */ 500*5ff7258cSRichard Henderson tcg_region_initial_alloc__locked(&tcg_init_ctx); 501*5ff7258cSRichard Henderson } 502*5ff7258cSRichard Henderson 503*5ff7258cSRichard Henderson void tcg_region_prologue_set(TCGContext *s) 504*5ff7258cSRichard Henderson { 505*5ff7258cSRichard Henderson /* Deduct the prologue from the first region. */ 506*5ff7258cSRichard Henderson g_assert(region.start == s->code_gen_buffer); 507*5ff7258cSRichard Henderson region.start = s->code_ptr; 508*5ff7258cSRichard Henderson 509*5ff7258cSRichard Henderson /* Recompute boundaries of the first region. */ 510*5ff7258cSRichard Henderson tcg_region_assign(s, 0); 511*5ff7258cSRichard Henderson 512*5ff7258cSRichard Henderson /* Register the balance of the buffer with gdb. */ 513*5ff7258cSRichard Henderson tcg_register_jit(tcg_splitwx_to_rx(region.start), 514*5ff7258cSRichard Henderson region.end - region.start); 515*5ff7258cSRichard Henderson } 516*5ff7258cSRichard Henderson 517*5ff7258cSRichard Henderson /* 518*5ff7258cSRichard Henderson * Returns the size (in bytes) of all translated code (i.e. from all regions) 519*5ff7258cSRichard Henderson * currently in the cache. 520*5ff7258cSRichard Henderson * See also: tcg_code_capacity() 521*5ff7258cSRichard Henderson * Do not confuse with tcg_current_code_size(); that one applies to a single 522*5ff7258cSRichard Henderson * TCG context. 523*5ff7258cSRichard Henderson */ 524*5ff7258cSRichard Henderson size_t tcg_code_size(void) 525*5ff7258cSRichard Henderson { 526*5ff7258cSRichard Henderson unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs); 527*5ff7258cSRichard Henderson unsigned int i; 528*5ff7258cSRichard Henderson size_t total; 529*5ff7258cSRichard Henderson 530*5ff7258cSRichard Henderson qemu_mutex_lock(®ion.lock); 531*5ff7258cSRichard Henderson total = region.agg_size_full; 532*5ff7258cSRichard Henderson for (i = 0; i < n_ctxs; i++) { 533*5ff7258cSRichard Henderson const TCGContext *s = qatomic_read(&tcg_ctxs[i]); 534*5ff7258cSRichard Henderson size_t size; 535*5ff7258cSRichard Henderson 536*5ff7258cSRichard Henderson size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer; 537*5ff7258cSRichard Henderson g_assert(size <= s->code_gen_buffer_size); 538*5ff7258cSRichard Henderson total += size; 539*5ff7258cSRichard Henderson } 540*5ff7258cSRichard Henderson qemu_mutex_unlock(®ion.lock); 541*5ff7258cSRichard Henderson return total; 542*5ff7258cSRichard Henderson } 543*5ff7258cSRichard Henderson 544*5ff7258cSRichard Henderson /* 545*5ff7258cSRichard Henderson * Returns the code capacity (in bytes) of the entire cache, i.e. including all 546*5ff7258cSRichard Henderson * regions. 547*5ff7258cSRichard Henderson * See also: tcg_code_size() 548*5ff7258cSRichard Henderson */ 549*5ff7258cSRichard Henderson size_t tcg_code_capacity(void) 550*5ff7258cSRichard Henderson { 551*5ff7258cSRichard Henderson size_t guard_size, capacity; 552*5ff7258cSRichard Henderson 553*5ff7258cSRichard Henderson /* no need for synchronization; these variables are set at init time */ 554*5ff7258cSRichard Henderson guard_size = region.stride - region.size; 555*5ff7258cSRichard Henderson capacity = region.end + guard_size - region.start; 556*5ff7258cSRichard Henderson capacity -= region.n * (guard_size + TCG_HIGHWATER); 557*5ff7258cSRichard Henderson return capacity; 558*5ff7258cSRichard Henderson } 559*5ff7258cSRichard Henderson 560*5ff7258cSRichard Henderson size_t tcg_tb_phys_invalidate_count(void) 561*5ff7258cSRichard Henderson { 562*5ff7258cSRichard Henderson unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs); 563*5ff7258cSRichard Henderson unsigned int i; 564*5ff7258cSRichard Henderson size_t total = 0; 565*5ff7258cSRichard Henderson 566*5ff7258cSRichard Henderson for (i = 0; i < n_ctxs; i++) { 567*5ff7258cSRichard Henderson const TCGContext *s = qatomic_read(&tcg_ctxs[i]); 568*5ff7258cSRichard Henderson 569*5ff7258cSRichard Henderson total += qatomic_read(&s->tb_phys_invalidate_count); 570*5ff7258cSRichard Henderson } 571*5ff7258cSRichard Henderson return total; 572*5ff7258cSRichard Henderson } 573