1 /* 2 * plugin-gen.c - TCG-related bits of plugin infrastructure 3 * 4 * Copyright (C) 2018, Emilio G. Cota <cota@braap.org> 5 * License: GNU GPL, version 2 or later. 6 * See the COPYING file in the top-level directory. 7 * 8 * We support instrumentation at an instruction granularity. That is, 9 * if a plugin wants to instrument the memory accesses performed by a 10 * particular instruction, it can just do that instead of instrumenting 11 * all memory accesses. Thus, in order to do this we first have to 12 * translate a TB, so that plugins can decide what/where to instrument. 13 * 14 * Injecting the desired instrumentation could be done with a second 15 * translation pass that combined the instrumentation requests, but that 16 * would be ugly and inefficient since we would decode the guest code twice. 17 * Instead, during TB translation we add "empty" instrumentation calls for all 18 * possible instrumentation events, and then once we collect the instrumentation 19 * requests from plugins, we either "fill in" those empty events or remove them 20 * if they have no requests. 21 * 22 * When "filling in" an event we first copy the empty callback's TCG ops. This 23 * might seem unnecessary, but it is done to support an arbitrary number 24 * of callbacks per event. Take for example a regular instruction callback. 25 * We first generate a callback to an empty helper function. Then, if two 26 * plugins register one callback each for this instruction, we make two copies 27 * of the TCG ops generated for the empty callback, substituting the function 28 * pointer that points to the empty helper function with the plugins' desired 29 * callback functions. After that we remove the empty callback's ops. 30 * 31 * Note that the location in TCGOp.args[] of the pointer to a helper function 32 * varies across different guest and host architectures. Instead of duplicating 33 * the logic that figures this out, we rely on the fact that the empty 34 * callbacks point to empty functions that are unique pointers in the program. 35 * Thus, to find the right location we just have to look for a match in 36 * TCGOp.args[]. This is the main reason why we first copy an empty callback's 37 * TCG ops and then fill them in; regardless of whether we have one or many 38 * callbacks for that event, the logic to add all of them is the same. 39 * 40 * When generating more than one callback per event, we make a small 41 * optimization to avoid generating redundant operations. For instance, for the 42 * second and all subsequent callbacks of an event, we do not need to reload the 43 * CPU's index into a TCG temp, since the first callback did it already. 44 */ 45 #include "qemu/osdep.h" 46 #include "qemu/plugin.h" 47 #include "qemu/log.h" 48 #include "cpu.h" 49 #include "tcg/tcg.h" 50 #include "tcg/tcg-temp-internal.h" 51 #include "tcg/tcg-op.h" 52 #include "exec/exec-all.h" 53 #include "exec/plugin-gen.h" 54 #include "exec/translator.h" 55 56 enum plugin_gen_from { 57 PLUGIN_GEN_FROM_TB, 58 PLUGIN_GEN_FROM_INSN, 59 PLUGIN_GEN_AFTER_INSN, 60 PLUGIN_GEN_AFTER_TB, 61 }; 62 63 static void plugin_gen_empty_callback(enum plugin_gen_from from) 64 { 65 switch (from) { 66 case PLUGIN_GEN_AFTER_INSN: 67 case PLUGIN_GEN_FROM_TB: 68 case PLUGIN_GEN_FROM_INSN: 69 tcg_gen_plugin_cb(from); 70 break; 71 default: 72 g_assert_not_reached(); 73 } 74 } 75 76 /* called before finishing a TB with exit_tb, goto_tb or goto_ptr */ 77 void plugin_gen_disable_mem_helpers(void) 78 { 79 if (tcg_ctx->plugin_insn) { 80 tcg_gen_plugin_cb(PLUGIN_GEN_AFTER_TB); 81 } 82 } 83 84 static void gen_enable_mem_helper(struct qemu_plugin_tb *ptb, 85 struct qemu_plugin_insn *insn) 86 { 87 GArray *arr; 88 size_t len; 89 90 /* 91 * Tracking memory accesses performed from helpers requires extra work. 92 * If an instruction is emulated with helpers, we do two things: 93 * (1) copy the CB descriptors, and keep track of it so that they can be 94 * freed later on, and (2) point CPUState.plugin_mem_cbs to the 95 * descriptors, so that we can read them at run-time 96 * (i.e. when the helper executes). 97 * This run-time access is performed from qemu_plugin_vcpu_mem_cb. 98 * 99 * Note that plugin_gen_disable_mem_helpers undoes (2). Since it 100 * is possible that the code we generate after the instruction is 101 * dead, we also add checks before generating tb_exit etc. 102 */ 103 if (!insn->calls_helpers) { 104 return; 105 } 106 107 if (!insn->mem_cbs || !insn->mem_cbs->len) { 108 insn->mem_helper = false; 109 return; 110 } 111 insn->mem_helper = true; 112 ptb->mem_helper = true; 113 114 /* 115 * TODO: It seems like we should be able to use ref/unref 116 * to avoid needing to actually copy this array. 117 * Alternately, perhaps we could allocate new memory adjacent 118 * to the TranslationBlock itself, so that we do not have to 119 * actively manage the lifetime after this. 120 */ 121 len = insn->mem_cbs->len; 122 arr = g_array_sized_new(false, false, 123 sizeof(struct qemu_plugin_dyn_cb), len); 124 memcpy(arr->data, insn->mem_cbs->data, 125 len * sizeof(struct qemu_plugin_dyn_cb)); 126 qemu_plugin_add_dyn_cb_arr(arr); 127 128 tcg_gen_st_ptr(tcg_constant_ptr((intptr_t)arr), tcg_env, 129 offsetof(CPUState, plugin_mem_cbs) - 130 offsetof(ArchCPU, env)); 131 } 132 133 static void gen_disable_mem_helper(void) 134 { 135 tcg_gen_st_ptr(tcg_constant_ptr(0), tcg_env, 136 offsetof(CPUState, plugin_mem_cbs) - 137 offsetof(ArchCPU, env)); 138 } 139 140 static void gen_udata_cb(struct qemu_plugin_dyn_cb *cb) 141 { 142 TCGv_i32 cpu_index = tcg_temp_ebb_new_i32(); 143 144 tcg_gen_ld_i32(cpu_index, tcg_env, 145 -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index)); 146 tcg_gen_call2(cb->regular.f.vcpu_udata, cb->regular.info, NULL, 147 tcgv_i32_temp(cpu_index), 148 tcgv_ptr_temp(tcg_constant_ptr(cb->userp))); 149 tcg_temp_free_i32(cpu_index); 150 } 151 152 static void gen_inline_cb(struct qemu_plugin_dyn_cb *cb) 153 { 154 GArray *arr = cb->inline_insn.entry.score->data; 155 size_t offset = cb->inline_insn.entry.offset; 156 TCGv_i32 cpu_index = tcg_temp_ebb_new_i32(); 157 TCGv_i64 val = tcg_temp_ebb_new_i64(); 158 TCGv_ptr ptr = tcg_temp_ebb_new_ptr(); 159 160 tcg_gen_ld_i32(cpu_index, tcg_env, 161 -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index)); 162 tcg_gen_muli_i32(cpu_index, cpu_index, g_array_get_element_size(arr)); 163 tcg_gen_ext_i32_ptr(ptr, cpu_index); 164 tcg_temp_free_i32(cpu_index); 165 166 tcg_gen_addi_ptr(ptr, ptr, (intptr_t)arr->data); 167 tcg_gen_ld_i64(val, ptr, offset); 168 tcg_gen_addi_i64(val, val, cb->inline_insn.imm); 169 tcg_gen_st_i64(val, ptr, offset); 170 171 tcg_temp_free_i64(val); 172 tcg_temp_free_ptr(ptr); 173 } 174 175 static void gen_mem_cb(struct qemu_plugin_dyn_cb *cb, 176 qemu_plugin_meminfo_t meminfo, TCGv_i64 addr) 177 { 178 TCGv_i32 cpu_index = tcg_temp_ebb_new_i32(); 179 180 tcg_gen_ld_i32(cpu_index, tcg_env, 181 -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index)); 182 tcg_gen_call4(cb->regular.f.vcpu_mem, cb->regular.info, NULL, 183 tcgv_i32_temp(cpu_index), 184 tcgv_i32_temp(tcg_constant_i32(meminfo)), 185 tcgv_i64_temp(addr), 186 tcgv_ptr_temp(tcg_constant_ptr(cb->userp))); 187 tcg_temp_free_i32(cpu_index); 188 } 189 190 static void inject_cb(struct qemu_plugin_dyn_cb *cb) 191 192 { 193 switch (cb->type) { 194 case PLUGIN_CB_REGULAR: 195 gen_udata_cb(cb); 196 break; 197 case PLUGIN_CB_INLINE: 198 gen_inline_cb(cb); 199 break; 200 default: 201 g_assert_not_reached(); 202 } 203 } 204 205 static void inject_mem_cb(struct qemu_plugin_dyn_cb *cb, 206 enum qemu_plugin_mem_rw rw, 207 qemu_plugin_meminfo_t meminfo, TCGv_i64 addr) 208 { 209 if (cb->rw & rw) { 210 switch (cb->type) { 211 case PLUGIN_CB_MEM_REGULAR: 212 gen_mem_cb(cb, meminfo, addr); 213 break; 214 default: 215 inject_cb(cb); 216 break; 217 } 218 } 219 } 220 221 static void plugin_gen_inject(struct qemu_plugin_tb *plugin_tb) 222 { 223 TCGOp *op, *next; 224 int insn_idx = -1; 225 226 if (unlikely(qemu_loglevel_mask(LOG_TB_OP_PLUGIN) 227 && qemu_log_in_addr_range(plugin_tb->vaddr))) { 228 FILE *logfile = qemu_log_trylock(); 229 if (logfile) { 230 fprintf(logfile, "OP before plugin injection:\n"); 231 tcg_dump_ops(tcg_ctx, logfile, false); 232 fprintf(logfile, "\n"); 233 qemu_log_unlock(logfile); 234 } 235 } 236 237 /* 238 * While injecting code, we cannot afford to reuse any ebb temps 239 * that might be live within the existing opcode stream. 240 * The simplest solution is to release them all and create new. 241 */ 242 memset(tcg_ctx->free_temps, 0, sizeof(tcg_ctx->free_temps)); 243 244 QTAILQ_FOREACH_SAFE(op, &tcg_ctx->ops, link, next) { 245 switch (op->opc) { 246 case INDEX_op_insn_start: 247 insn_idx++; 248 break; 249 250 case INDEX_op_plugin_cb: 251 { 252 enum plugin_gen_from from = op->args[0]; 253 struct qemu_plugin_insn *insn = NULL; 254 const GArray *cbs; 255 int i, n; 256 257 if (insn_idx >= 0) { 258 insn = g_ptr_array_index(plugin_tb->insns, insn_idx); 259 } 260 261 tcg_ctx->emit_before_op = op; 262 263 switch (from) { 264 case PLUGIN_GEN_AFTER_TB: 265 if (plugin_tb->mem_helper) { 266 gen_disable_mem_helper(); 267 } 268 break; 269 270 case PLUGIN_GEN_AFTER_INSN: 271 assert(insn != NULL); 272 if (insn->mem_helper) { 273 gen_disable_mem_helper(); 274 } 275 break; 276 277 case PLUGIN_GEN_FROM_TB: 278 assert(insn == NULL); 279 280 cbs = plugin_tb->cbs; 281 for (i = 0, n = (cbs ? cbs->len : 0); i < n; i++) { 282 inject_cb( 283 &g_array_index(cbs, struct qemu_plugin_dyn_cb, i)); 284 } 285 break; 286 287 case PLUGIN_GEN_FROM_INSN: 288 assert(insn != NULL); 289 290 gen_enable_mem_helper(plugin_tb, insn); 291 292 cbs = insn->insn_cbs; 293 for (i = 0, n = (cbs ? cbs->len : 0); i < n; i++) { 294 inject_cb( 295 &g_array_index(cbs, struct qemu_plugin_dyn_cb, i)); 296 } 297 break; 298 299 default: 300 g_assert_not_reached(); 301 } 302 303 tcg_ctx->emit_before_op = NULL; 304 tcg_op_remove(tcg_ctx, op); 305 break; 306 } 307 308 case INDEX_op_plugin_mem_cb: 309 { 310 TCGv_i64 addr = temp_tcgv_i64(arg_temp(op->args[0])); 311 qemu_plugin_meminfo_t meminfo = op->args[1]; 312 enum qemu_plugin_mem_rw rw = 313 (qemu_plugin_mem_is_store(meminfo) 314 ? QEMU_PLUGIN_MEM_W : QEMU_PLUGIN_MEM_R); 315 struct qemu_plugin_insn *insn; 316 const GArray *cbs; 317 int i, n; 318 319 assert(insn_idx >= 0); 320 insn = g_ptr_array_index(plugin_tb->insns, insn_idx); 321 322 tcg_ctx->emit_before_op = op; 323 324 cbs = insn->mem_cbs; 325 for (i = 0, n = (cbs ? cbs->len : 0); i < n; i++) { 326 inject_mem_cb(&g_array_index(cbs, struct qemu_plugin_dyn_cb, i), 327 rw, meminfo, addr); 328 } 329 330 tcg_ctx->emit_before_op = NULL; 331 tcg_op_remove(tcg_ctx, op); 332 break; 333 } 334 335 default: 336 /* plugins don't care about any other ops */ 337 break; 338 } 339 } 340 } 341 342 bool plugin_gen_tb_start(CPUState *cpu, const DisasContextBase *db, 343 bool mem_only) 344 { 345 bool ret = false; 346 347 if (test_bit(QEMU_PLUGIN_EV_VCPU_TB_TRANS, cpu->plugin_state->event_mask)) { 348 struct qemu_plugin_tb *ptb = tcg_ctx->plugin_tb; 349 350 /* reset callbacks */ 351 if (ptb->cbs) { 352 g_array_set_size(ptb->cbs, 0); 353 } 354 ptb->n = 0; 355 356 ret = true; 357 358 ptb->vaddr = db->pc_first; 359 ptb->vaddr2 = -1; 360 ptb->haddr1 = db->host_addr[0]; 361 ptb->haddr2 = NULL; 362 ptb->mem_only = mem_only; 363 ptb->mem_helper = false; 364 365 plugin_gen_empty_callback(PLUGIN_GEN_FROM_TB); 366 } 367 368 tcg_ctx->plugin_insn = NULL; 369 370 return ret; 371 } 372 373 void plugin_gen_insn_start(CPUState *cpu, const DisasContextBase *db) 374 { 375 struct qemu_plugin_tb *ptb = tcg_ctx->plugin_tb; 376 struct qemu_plugin_insn *pinsn; 377 378 pinsn = qemu_plugin_tb_insn_get(ptb, db->pc_next); 379 tcg_ctx->plugin_insn = pinsn; 380 plugin_gen_empty_callback(PLUGIN_GEN_FROM_INSN); 381 382 /* 383 * Detect page crossing to get the new host address. 384 * Note that we skip this when haddr1 == NULL, e.g. when we're 385 * fetching instructions from a region not backed by RAM. 386 */ 387 if (ptb->haddr1 == NULL) { 388 pinsn->haddr = NULL; 389 } else if (is_same_page(db, db->pc_next)) { 390 pinsn->haddr = ptb->haddr1 + pinsn->vaddr - ptb->vaddr; 391 } else { 392 if (ptb->vaddr2 == -1) { 393 ptb->vaddr2 = TARGET_PAGE_ALIGN(db->pc_first); 394 get_page_addr_code_hostp(cpu_env(cpu), ptb->vaddr2, &ptb->haddr2); 395 } 396 pinsn->haddr = ptb->haddr2 + pinsn->vaddr - ptb->vaddr2; 397 } 398 } 399 400 void plugin_gen_insn_end(void) 401 { 402 plugin_gen_empty_callback(PLUGIN_GEN_AFTER_INSN); 403 } 404 405 /* 406 * There are cases where we never get to finalise a translation - for 407 * example a page fault during translation. As a result we shouldn't 408 * do any clean-up here and make sure things are reset in 409 * plugin_gen_tb_start. 410 */ 411 void plugin_gen_tb_end(CPUState *cpu, size_t num_insns) 412 { 413 struct qemu_plugin_tb *ptb = tcg_ctx->plugin_tb; 414 415 /* translator may have removed instructions, update final count */ 416 g_assert(num_insns <= ptb->n); 417 ptb->n = num_insns; 418 419 /* collect instrumentation requests */ 420 qemu_plugin_tb_trans_cb(cpu, ptb); 421 422 /* inject the instrumentation at the appropriate places */ 423 plugin_gen_inject(ptb); 424 } 425