1 /* 2 * plugin-gen.c - TCG-related bits of plugin infrastructure 3 * 4 * Copyright (C) 2018, Emilio G. Cota <cota@braap.org> 5 * License: GNU GPL, version 2 or later. 6 * See the COPYING file in the top-level directory. 7 * 8 * We support instrumentation at an instruction granularity. That is, 9 * if a plugin wants to instrument the memory accesses performed by a 10 * particular instruction, it can just do that instead of instrumenting 11 * all memory accesses. Thus, in order to do this we first have to 12 * translate a TB, so that plugins can decide what/where to instrument. 13 * 14 * Injecting the desired instrumentation could be done with a second 15 * translation pass that combined the instrumentation requests, but that 16 * would be ugly and inefficient since we would decode the guest code twice. 17 * Instead, during TB translation we add "empty" instrumentation calls for all 18 * possible instrumentation events, and then once we collect the instrumentation 19 * requests from plugins, we either "fill in" those empty events or remove them 20 * if they have no requests. 21 * 22 * When "filling in" an event we first copy the empty callback's TCG ops. This 23 * might seem unnecessary, but it is done to support an arbitrary number 24 * of callbacks per event. Take for example a regular instruction callback. 25 * We first generate a callback to an empty helper function. Then, if two 26 * plugins register one callback each for this instruction, we make two copies 27 * of the TCG ops generated for the empty callback, substituting the function 28 * pointer that points to the empty helper function with the plugins' desired 29 * callback functions. After that we remove the empty callback's ops. 30 * 31 * Note that the location in TCGOp.args[] of the pointer to a helper function 32 * varies across different guest and host architectures. Instead of duplicating 33 * the logic that figures this out, we rely on the fact that the empty 34 * callbacks point to empty functions that are unique pointers in the program. 35 * Thus, to find the right location we just have to look for a match in 36 * TCGOp.args[]. This is the main reason why we first copy an empty callback's 37 * TCG ops and then fill them in; regardless of whether we have one or many 38 * callbacks for that event, the logic to add all of them is the same. 39 * 40 * When generating more than one callback per event, we make a small 41 * optimization to avoid generating redundant operations. For instance, for the 42 * second and all subsequent callbacks of an event, we do not need to reload the 43 * CPU's index into a TCG temp, since the first callback did it already. 44 */ 45 #include "qemu/osdep.h" 46 #include "qemu/plugin.h" 47 #include "cpu.h" 48 #include "tcg/tcg.h" 49 #include "tcg/tcg-temp-internal.h" 50 #include "tcg/tcg-op.h" 51 #include "exec/exec-all.h" 52 #include "exec/plugin-gen.h" 53 #include "exec/translator.h" 54 #include "exec/helper-proto-common.h" 55 56 #define HELPER_H "accel/tcg/plugin-helpers.h" 57 #include "exec/helper-info.c.inc" 58 #undef HELPER_H 59 60 /* 61 * plugin_cb_start TCG op args[]: 62 * 0: enum plugin_gen_from 63 * 1: enum plugin_gen_cb 64 * 2: set to 1 for mem callback that is a write, 0 otherwise. 65 */ 66 67 enum plugin_gen_from { 68 PLUGIN_GEN_FROM_TB, 69 PLUGIN_GEN_FROM_INSN, 70 PLUGIN_GEN_AFTER_INSN, 71 PLUGIN_GEN_AFTER_TB, 72 PLUGIN_GEN_N_FROMS, 73 }; 74 75 enum plugin_gen_cb { 76 PLUGIN_GEN_CB_UDATA, 77 PLUGIN_GEN_CB_UDATA_R, 78 PLUGIN_GEN_CB_INLINE, 79 PLUGIN_GEN_CB_MEM, 80 PLUGIN_GEN_ENABLE_MEM_HELPER, 81 PLUGIN_GEN_DISABLE_MEM_HELPER, 82 PLUGIN_GEN_N_CBS, 83 }; 84 85 /* 86 * These helpers are stubs that get dynamically switched out for calls 87 * direct to the plugin if they are subscribed to. 88 */ 89 void HELPER(plugin_vcpu_udata_cb_no_wg)(uint32_t cpu_index, void *udata) 90 { } 91 92 void HELPER(plugin_vcpu_udata_cb_no_rwg)(uint32_t cpu_index, void *udata) 93 { } 94 95 void HELPER(plugin_vcpu_mem_cb)(unsigned int vcpu_index, 96 qemu_plugin_meminfo_t info, uint64_t vaddr, 97 void *userdata) 98 { } 99 100 static void plugin_gen_empty_callback(enum plugin_gen_from from) 101 { 102 switch (from) { 103 case PLUGIN_GEN_AFTER_INSN: 104 case PLUGIN_GEN_FROM_TB: 105 case PLUGIN_GEN_FROM_INSN: 106 tcg_gen_plugin_cb(from); 107 break; 108 default: 109 g_assert_not_reached(); 110 } 111 } 112 113 /* called before finishing a TB with exit_tb, goto_tb or goto_ptr */ 114 void plugin_gen_disable_mem_helpers(void) 115 { 116 if (tcg_ctx->plugin_insn) { 117 tcg_gen_plugin_cb(PLUGIN_GEN_AFTER_TB); 118 } 119 } 120 121 static void gen_enable_mem_helper(struct qemu_plugin_tb *ptb, 122 struct qemu_plugin_insn *insn) 123 { 124 GArray *cbs[2]; 125 GArray *arr; 126 size_t n_cbs; 127 128 /* 129 * Tracking memory accesses performed from helpers requires extra work. 130 * If an instruction is emulated with helpers, we do two things: 131 * (1) copy the CB descriptors, and keep track of it so that they can be 132 * freed later on, and (2) point CPUState.plugin_mem_cbs to the 133 * descriptors, so that we can read them at run-time 134 * (i.e. when the helper executes). 135 * This run-time access is performed from qemu_plugin_vcpu_mem_cb. 136 * 137 * Note that plugin_gen_disable_mem_helpers undoes (2). Since it 138 * is possible that the code we generate after the instruction is 139 * dead, we also add checks before generating tb_exit etc. 140 */ 141 if (!insn->calls_helpers) { 142 return; 143 } 144 145 cbs[0] = insn->cbs[PLUGIN_CB_MEM][PLUGIN_CB_REGULAR]; 146 cbs[1] = insn->cbs[PLUGIN_CB_MEM][PLUGIN_CB_INLINE]; 147 n_cbs = cbs[0]->len + cbs[1]->len; 148 149 if (n_cbs == 0) { 150 insn->mem_helper = false; 151 return; 152 } 153 insn->mem_helper = true; 154 ptb->mem_helper = true; 155 156 arr = g_array_sized_new(false, false, 157 sizeof(struct qemu_plugin_dyn_cb), n_cbs); 158 g_array_append_vals(arr, cbs[0]->data, cbs[0]->len); 159 g_array_append_vals(arr, cbs[1]->data, cbs[1]->len); 160 161 qemu_plugin_add_dyn_cb_arr(arr); 162 163 tcg_gen_st_ptr(tcg_constant_ptr((intptr_t)arr), tcg_env, 164 offsetof(CPUState, plugin_mem_cbs) - 165 offsetof(ArchCPU, env)); 166 } 167 168 static void gen_disable_mem_helper(void) 169 { 170 tcg_gen_st_ptr(tcg_constant_ptr(0), tcg_env, 171 offsetof(CPUState, plugin_mem_cbs) - 172 offsetof(ArchCPU, env)); 173 } 174 175 static void gen_udata_cb(struct qemu_plugin_dyn_cb *cb) 176 { 177 TCGv_i32 cpu_index = tcg_temp_ebb_new_i32(); 178 179 tcg_gen_ld_i32(cpu_index, tcg_env, 180 -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index)); 181 tcg_gen_call2(cb->regular.f.vcpu_udata, cb->regular.info, NULL, 182 tcgv_i32_temp(cpu_index), 183 tcgv_ptr_temp(tcg_constant_ptr(cb->userp))); 184 tcg_temp_free_i32(cpu_index); 185 } 186 187 static void gen_inline_cb(struct qemu_plugin_dyn_cb *cb) 188 { 189 GArray *arr = cb->inline_insn.entry.score->data; 190 size_t offset = cb->inline_insn.entry.offset; 191 TCGv_i32 cpu_index = tcg_temp_ebb_new_i32(); 192 TCGv_i64 val = tcg_temp_ebb_new_i64(); 193 TCGv_ptr ptr = tcg_temp_ebb_new_ptr(); 194 195 tcg_gen_ld_i32(cpu_index, tcg_env, 196 -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index)); 197 tcg_gen_muli_i32(cpu_index, cpu_index, g_array_get_element_size(arr)); 198 tcg_gen_ext_i32_ptr(ptr, cpu_index); 199 tcg_temp_free_i32(cpu_index); 200 201 tcg_gen_addi_ptr(ptr, ptr, (intptr_t)arr->data); 202 tcg_gen_ld_i64(val, ptr, offset); 203 tcg_gen_addi_i64(val, val, cb->inline_insn.imm); 204 tcg_gen_st_i64(val, ptr, offset); 205 206 tcg_temp_free_i64(val); 207 tcg_temp_free_ptr(ptr); 208 } 209 210 static void gen_mem_cb(struct qemu_plugin_dyn_cb *cb, 211 qemu_plugin_meminfo_t meminfo, TCGv_i64 addr) 212 { 213 TCGv_i32 cpu_index = tcg_temp_ebb_new_i32(); 214 215 tcg_gen_ld_i32(cpu_index, tcg_env, 216 -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index)); 217 tcg_gen_call4(cb->regular.f.vcpu_mem, cb->regular.info, NULL, 218 tcgv_i32_temp(cpu_index), 219 tcgv_i32_temp(tcg_constant_i32(meminfo)), 220 tcgv_i64_temp(addr), 221 tcgv_ptr_temp(tcg_constant_ptr(cb->userp))); 222 tcg_temp_free_i32(cpu_index); 223 } 224 225 /* #define DEBUG_PLUGIN_GEN_OPS */ 226 static void pr_ops(void) 227 { 228 #ifdef DEBUG_PLUGIN_GEN_OPS 229 TCGOp *op; 230 int i = 0; 231 232 QTAILQ_FOREACH(op, &tcg_ctx->ops, link) { 233 const char *name = ""; 234 const char *type = ""; 235 236 if (op->opc == INDEX_op_plugin_cb_start) { 237 switch (op->args[0]) { 238 case PLUGIN_GEN_FROM_TB: 239 name = "tb"; 240 break; 241 case PLUGIN_GEN_FROM_INSN: 242 name = "insn"; 243 break; 244 case PLUGIN_GEN_FROM_MEM: 245 name = "mem"; 246 break; 247 case PLUGIN_GEN_AFTER_INSN: 248 name = "after insn"; 249 break; 250 default: 251 break; 252 } 253 switch (op->args[1]) { 254 case PLUGIN_GEN_CB_UDATA: 255 type = "udata"; 256 break; 257 case PLUGIN_GEN_CB_INLINE: 258 type = "inline"; 259 break; 260 case PLUGIN_GEN_CB_MEM: 261 type = "mem"; 262 break; 263 case PLUGIN_GEN_ENABLE_MEM_HELPER: 264 type = "enable mem helper"; 265 break; 266 case PLUGIN_GEN_DISABLE_MEM_HELPER: 267 type = "disable mem helper"; 268 break; 269 default: 270 break; 271 } 272 } 273 printf("op[%2i]: %s %s %s\n", i, tcg_op_defs[op->opc].name, name, type); 274 i++; 275 } 276 #endif 277 } 278 279 static void plugin_gen_inject(struct qemu_plugin_tb *plugin_tb) 280 { 281 TCGOp *op, *next; 282 int insn_idx = -1; 283 284 pr_ops(); 285 286 /* 287 * While injecting code, we cannot afford to reuse any ebb temps 288 * that might be live within the existing opcode stream. 289 * The simplest solution is to release them all and create new. 290 */ 291 memset(tcg_ctx->free_temps, 0, sizeof(tcg_ctx->free_temps)); 292 293 QTAILQ_FOREACH_SAFE(op, &tcg_ctx->ops, link, next) { 294 switch (op->opc) { 295 case INDEX_op_insn_start: 296 insn_idx++; 297 break; 298 299 case INDEX_op_plugin_cb: 300 { 301 enum plugin_gen_from from = op->args[0]; 302 struct qemu_plugin_insn *insn = NULL; 303 const GArray *cbs; 304 int i, n; 305 306 if (insn_idx >= 0) { 307 insn = g_ptr_array_index(plugin_tb->insns, insn_idx); 308 } 309 310 tcg_ctx->emit_before_op = op; 311 312 switch (from) { 313 case PLUGIN_GEN_AFTER_TB: 314 if (plugin_tb->mem_helper) { 315 gen_disable_mem_helper(); 316 } 317 break; 318 319 case PLUGIN_GEN_AFTER_INSN: 320 assert(insn != NULL); 321 if (insn->mem_helper) { 322 gen_disable_mem_helper(); 323 } 324 break; 325 326 case PLUGIN_GEN_FROM_TB: 327 assert(insn == NULL); 328 329 cbs = plugin_tb->cbs[PLUGIN_CB_REGULAR]; 330 for (i = 0, n = (cbs ? cbs->len : 0); i < n; i++) { 331 struct qemu_plugin_dyn_cb *cb = 332 &g_array_index(cbs, struct qemu_plugin_dyn_cb, i); 333 gen_udata_cb(cb); 334 } 335 336 cbs = plugin_tb->cbs[PLUGIN_CB_INLINE]; 337 for (i = 0, n = (cbs ? cbs->len : 0); i < n; i++) { 338 struct qemu_plugin_dyn_cb *cb = 339 &g_array_index(cbs, struct qemu_plugin_dyn_cb, i); 340 gen_inline_cb(cb); 341 } 342 break; 343 344 case PLUGIN_GEN_FROM_INSN: 345 assert(insn != NULL); 346 347 gen_enable_mem_helper(plugin_tb, insn); 348 349 cbs = insn->cbs[PLUGIN_CB_INSN][PLUGIN_CB_REGULAR]; 350 for (i = 0, n = (cbs ? cbs->len : 0); i < n; i++) { 351 struct qemu_plugin_dyn_cb *cb = 352 &g_array_index(cbs, struct qemu_plugin_dyn_cb, i); 353 gen_udata_cb(cb); 354 } 355 356 cbs = insn->cbs[PLUGIN_CB_INSN][PLUGIN_CB_INLINE]; 357 for (i = 0, n = (cbs ? cbs->len : 0); i < n; i++) { 358 struct qemu_plugin_dyn_cb *cb = 359 &g_array_index(cbs, struct qemu_plugin_dyn_cb, i); 360 gen_inline_cb(cb); 361 } 362 break; 363 364 default: 365 g_assert_not_reached(); 366 } 367 368 tcg_ctx->emit_before_op = NULL; 369 tcg_op_remove(tcg_ctx, op); 370 break; 371 } 372 373 case INDEX_op_plugin_mem_cb: 374 { 375 TCGv_i64 addr = temp_tcgv_i64(arg_temp(op->args[0])); 376 qemu_plugin_meminfo_t meminfo = op->args[1]; 377 struct qemu_plugin_insn *insn; 378 const GArray *cbs; 379 int i, n, rw; 380 381 assert(insn_idx >= 0); 382 insn = g_ptr_array_index(plugin_tb->insns, insn_idx); 383 rw = qemu_plugin_mem_is_store(meminfo) ? 2 : 1; 384 385 tcg_ctx->emit_before_op = op; 386 387 cbs = insn->cbs[PLUGIN_CB_MEM][PLUGIN_CB_REGULAR]; 388 for (i = 0, n = (cbs ? cbs->len : 0); i < n; i++) { 389 struct qemu_plugin_dyn_cb *cb = 390 &g_array_index(cbs, struct qemu_plugin_dyn_cb, i); 391 if (cb->rw & rw) { 392 gen_mem_cb(cb, meminfo, addr); 393 } 394 } 395 396 cbs = insn->cbs[PLUGIN_CB_MEM][PLUGIN_CB_INLINE]; 397 for (i = 0, n = (cbs ? cbs->len : 0); i < n; i++) { 398 struct qemu_plugin_dyn_cb *cb = 399 &g_array_index(cbs, struct qemu_plugin_dyn_cb, i); 400 if (cb->rw & rw) { 401 gen_inline_cb(cb); 402 } 403 } 404 405 tcg_ctx->emit_before_op = NULL; 406 tcg_op_remove(tcg_ctx, op); 407 break; 408 } 409 410 default: 411 /* plugins don't care about any other ops */ 412 break; 413 } 414 } 415 pr_ops(); 416 } 417 418 bool plugin_gen_tb_start(CPUState *cpu, const DisasContextBase *db, 419 bool mem_only) 420 { 421 bool ret = false; 422 423 if (test_bit(QEMU_PLUGIN_EV_VCPU_TB_TRANS, cpu->plugin_state->event_mask)) { 424 struct qemu_plugin_tb *ptb = tcg_ctx->plugin_tb; 425 int i; 426 427 /* reset callbacks */ 428 for (i = 0; i < PLUGIN_N_CB_SUBTYPES; i++) { 429 if (ptb->cbs[i]) { 430 g_array_set_size(ptb->cbs[i], 0); 431 } 432 } 433 ptb->n = 0; 434 435 ret = true; 436 437 ptb->vaddr = db->pc_first; 438 ptb->vaddr2 = -1; 439 ptb->haddr1 = db->host_addr[0]; 440 ptb->haddr2 = NULL; 441 ptb->mem_only = mem_only; 442 ptb->mem_helper = false; 443 444 plugin_gen_empty_callback(PLUGIN_GEN_FROM_TB); 445 } 446 447 tcg_ctx->plugin_insn = NULL; 448 449 return ret; 450 } 451 452 void plugin_gen_insn_start(CPUState *cpu, const DisasContextBase *db) 453 { 454 struct qemu_plugin_tb *ptb = tcg_ctx->plugin_tb; 455 struct qemu_plugin_insn *pinsn; 456 457 pinsn = qemu_plugin_tb_insn_get(ptb, db->pc_next); 458 tcg_ctx->plugin_insn = pinsn; 459 plugin_gen_empty_callback(PLUGIN_GEN_FROM_INSN); 460 461 /* 462 * Detect page crossing to get the new host address. 463 * Note that we skip this when haddr1 == NULL, e.g. when we're 464 * fetching instructions from a region not backed by RAM. 465 */ 466 if (ptb->haddr1 == NULL) { 467 pinsn->haddr = NULL; 468 } else if (is_same_page(db, db->pc_next)) { 469 pinsn->haddr = ptb->haddr1 + pinsn->vaddr - ptb->vaddr; 470 } else { 471 if (ptb->vaddr2 == -1) { 472 ptb->vaddr2 = TARGET_PAGE_ALIGN(db->pc_first); 473 get_page_addr_code_hostp(cpu_env(cpu), ptb->vaddr2, &ptb->haddr2); 474 } 475 pinsn->haddr = ptb->haddr2 + pinsn->vaddr - ptb->vaddr2; 476 } 477 } 478 479 void plugin_gen_insn_end(void) 480 { 481 plugin_gen_empty_callback(PLUGIN_GEN_AFTER_INSN); 482 } 483 484 /* 485 * There are cases where we never get to finalise a translation - for 486 * example a page fault during translation. As a result we shouldn't 487 * do any clean-up here and make sure things are reset in 488 * plugin_gen_tb_start. 489 */ 490 void plugin_gen_tb_end(CPUState *cpu, size_t num_insns) 491 { 492 struct qemu_plugin_tb *ptb = tcg_ctx->plugin_tb; 493 494 /* translator may have removed instructions, update final count */ 495 g_assert(num_insns <= ptb->n); 496 ptb->n = num_insns; 497 498 /* collect instrumentation requests */ 499 qemu_plugin_tb_trans_cb(cpu, ptb); 500 501 /* inject the instrumentation at the appropriate places */ 502 plugin_gen_inject(ptb); 503 } 504