1 /* 2 * plugin-gen.c - TCG-related bits of plugin infrastructure 3 * 4 * Copyright (C) 2018, Emilio G. Cota <cota@braap.org> 5 * License: GNU GPL, version 2 or later. 6 * See the COPYING file in the top-level directory. 7 * 8 * We support instrumentation at an instruction granularity. That is, 9 * if a plugin wants to instrument the memory accesses performed by a 10 * particular instruction, it can just do that instead of instrumenting 11 * all memory accesses. Thus, in order to do this we first have to 12 * translate a TB, so that plugins can decide what/where to instrument. 13 * 14 * Injecting the desired instrumentation could be done with a second 15 * translation pass that combined the instrumentation requests, but that 16 * would be ugly and inefficient since we would decode the guest code twice. 17 * Instead, during TB translation we add "empty" instrumentation calls for all 18 * possible instrumentation events, and then once we collect the instrumentation 19 * requests from plugins, we either "fill in" those empty events or remove them 20 * if they have no requests. 21 * 22 * When "filling in" an event we first copy the empty callback's TCG ops. This 23 * might seem unnecessary, but it is done to support an arbitrary number 24 * of callbacks per event. Take for example a regular instruction callback. 25 * We first generate a callback to an empty helper function. Then, if two 26 * plugins register one callback each for this instruction, we make two copies 27 * of the TCG ops generated for the empty callback, substituting the function 28 * pointer that points to the empty helper function with the plugins' desired 29 * callback functions. After that we remove the empty callback's ops. 30 * 31 * Note that the location in TCGOp.args[] of the pointer to a helper function 32 * varies across different guest and host architectures. Instead of duplicating 33 * the logic that figures this out, we rely on the fact that the empty 34 * callbacks point to empty functions that are unique pointers in the program. 35 * Thus, to find the right location we just have to look for a match in 36 * TCGOp.args[]. This is the main reason why we first copy an empty callback's 37 * TCG ops and then fill them in; regardless of whether we have one or many 38 * callbacks for that event, the logic to add all of them is the same. 39 * 40 * When generating more than one callback per event, we make a small 41 * optimization to avoid generating redundant operations. For instance, for the 42 * second and all subsequent callbacks of an event, we do not need to reload the 43 * CPU's index into a TCG temp, since the first callback did it already. 44 */ 45 #include "qemu/osdep.h" 46 #include "qemu/plugin.h" 47 #include "cpu.h" 48 #include "tcg/tcg.h" 49 #include "tcg/tcg-temp-internal.h" 50 #include "tcg/tcg-op.h" 51 #include "exec/exec-all.h" 52 #include "exec/plugin-gen.h" 53 #include "exec/translator.h" 54 55 enum plugin_gen_from { 56 PLUGIN_GEN_FROM_TB, 57 PLUGIN_GEN_FROM_INSN, 58 PLUGIN_GEN_AFTER_INSN, 59 PLUGIN_GEN_AFTER_TB, 60 }; 61 62 static void plugin_gen_empty_callback(enum plugin_gen_from from) 63 { 64 switch (from) { 65 case PLUGIN_GEN_AFTER_INSN: 66 case PLUGIN_GEN_FROM_TB: 67 case PLUGIN_GEN_FROM_INSN: 68 tcg_gen_plugin_cb(from); 69 break; 70 default: 71 g_assert_not_reached(); 72 } 73 } 74 75 /* called before finishing a TB with exit_tb, goto_tb or goto_ptr */ 76 void plugin_gen_disable_mem_helpers(void) 77 { 78 if (tcg_ctx->plugin_insn) { 79 tcg_gen_plugin_cb(PLUGIN_GEN_AFTER_TB); 80 } 81 } 82 83 static void gen_enable_mem_helper(struct qemu_plugin_tb *ptb, 84 struct qemu_plugin_insn *insn) 85 { 86 GArray *arr; 87 size_t len; 88 89 /* 90 * Tracking memory accesses performed from helpers requires extra work. 91 * If an instruction is emulated with helpers, we do two things: 92 * (1) copy the CB descriptors, and keep track of it so that they can be 93 * freed later on, and (2) point CPUState.plugin_mem_cbs to the 94 * descriptors, so that we can read them at run-time 95 * (i.e. when the helper executes). 96 * This run-time access is performed from qemu_plugin_vcpu_mem_cb. 97 * 98 * Note that plugin_gen_disable_mem_helpers undoes (2). Since it 99 * is possible that the code we generate after the instruction is 100 * dead, we also add checks before generating tb_exit etc. 101 */ 102 if (!insn->calls_helpers) { 103 return; 104 } 105 106 if (!insn->mem_cbs || !insn->mem_cbs->len) { 107 insn->mem_helper = false; 108 return; 109 } 110 insn->mem_helper = true; 111 ptb->mem_helper = true; 112 113 /* 114 * TODO: It seems like we should be able to use ref/unref 115 * to avoid needing to actually copy this array. 116 * Alternately, perhaps we could allocate new memory adjacent 117 * to the TranslationBlock itself, so that we do not have to 118 * actively manage the lifetime after this. 119 */ 120 len = insn->mem_cbs->len; 121 arr = g_array_sized_new(false, false, 122 sizeof(struct qemu_plugin_dyn_cb), len); 123 memcpy(arr->data, insn->mem_cbs->data, 124 len * sizeof(struct qemu_plugin_dyn_cb)); 125 qemu_plugin_add_dyn_cb_arr(arr); 126 127 tcg_gen_st_ptr(tcg_constant_ptr((intptr_t)arr), tcg_env, 128 offsetof(CPUState, plugin_mem_cbs) - 129 offsetof(ArchCPU, env)); 130 } 131 132 static void gen_disable_mem_helper(void) 133 { 134 tcg_gen_st_ptr(tcg_constant_ptr(0), tcg_env, 135 offsetof(CPUState, plugin_mem_cbs) - 136 offsetof(ArchCPU, env)); 137 } 138 139 static void gen_udata_cb(struct qemu_plugin_dyn_cb *cb) 140 { 141 TCGv_i32 cpu_index = tcg_temp_ebb_new_i32(); 142 143 tcg_gen_ld_i32(cpu_index, tcg_env, 144 -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index)); 145 tcg_gen_call2(cb->regular.f.vcpu_udata, cb->regular.info, NULL, 146 tcgv_i32_temp(cpu_index), 147 tcgv_ptr_temp(tcg_constant_ptr(cb->userp))); 148 tcg_temp_free_i32(cpu_index); 149 } 150 151 static void gen_inline_cb(struct qemu_plugin_dyn_cb *cb) 152 { 153 GArray *arr = cb->inline_insn.entry.score->data; 154 size_t offset = cb->inline_insn.entry.offset; 155 TCGv_i32 cpu_index = tcg_temp_ebb_new_i32(); 156 TCGv_i64 val = tcg_temp_ebb_new_i64(); 157 TCGv_ptr ptr = tcg_temp_ebb_new_ptr(); 158 159 tcg_gen_ld_i32(cpu_index, tcg_env, 160 -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index)); 161 tcg_gen_muli_i32(cpu_index, cpu_index, g_array_get_element_size(arr)); 162 tcg_gen_ext_i32_ptr(ptr, cpu_index); 163 tcg_temp_free_i32(cpu_index); 164 165 tcg_gen_addi_ptr(ptr, ptr, (intptr_t)arr->data); 166 tcg_gen_ld_i64(val, ptr, offset); 167 tcg_gen_addi_i64(val, val, cb->inline_insn.imm); 168 tcg_gen_st_i64(val, ptr, offset); 169 170 tcg_temp_free_i64(val); 171 tcg_temp_free_ptr(ptr); 172 } 173 174 static void gen_mem_cb(struct qemu_plugin_dyn_cb *cb, 175 qemu_plugin_meminfo_t meminfo, TCGv_i64 addr) 176 { 177 TCGv_i32 cpu_index = tcg_temp_ebb_new_i32(); 178 179 tcg_gen_ld_i32(cpu_index, tcg_env, 180 -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index)); 181 tcg_gen_call4(cb->regular.f.vcpu_mem, cb->regular.info, NULL, 182 tcgv_i32_temp(cpu_index), 183 tcgv_i32_temp(tcg_constant_i32(meminfo)), 184 tcgv_i64_temp(addr), 185 tcgv_ptr_temp(tcg_constant_ptr(cb->userp))); 186 tcg_temp_free_i32(cpu_index); 187 } 188 189 /* #define DEBUG_PLUGIN_GEN_OPS */ 190 static void pr_ops(void) 191 { 192 #ifdef DEBUG_PLUGIN_GEN_OPS 193 TCGOp *op; 194 int i = 0; 195 196 QTAILQ_FOREACH(op, &tcg_ctx->ops, link) { 197 const char *name = ""; 198 const char *type = ""; 199 200 if (op->opc == INDEX_op_plugin_cb_start) { 201 switch (op->args[0]) { 202 case PLUGIN_GEN_FROM_TB: 203 name = "tb"; 204 break; 205 case PLUGIN_GEN_FROM_INSN: 206 name = "insn"; 207 break; 208 case PLUGIN_GEN_FROM_MEM: 209 name = "mem"; 210 break; 211 case PLUGIN_GEN_AFTER_INSN: 212 name = "after insn"; 213 break; 214 default: 215 break; 216 } 217 switch (op->args[1]) { 218 case PLUGIN_GEN_CB_UDATA: 219 type = "udata"; 220 break; 221 case PLUGIN_GEN_CB_INLINE: 222 type = "inline"; 223 break; 224 case PLUGIN_GEN_CB_MEM: 225 type = "mem"; 226 break; 227 case PLUGIN_GEN_ENABLE_MEM_HELPER: 228 type = "enable mem helper"; 229 break; 230 case PLUGIN_GEN_DISABLE_MEM_HELPER: 231 type = "disable mem helper"; 232 break; 233 default: 234 break; 235 } 236 } 237 printf("op[%2i]: %s %s %s\n", i, tcg_op_defs[op->opc].name, name, type); 238 i++; 239 } 240 #endif 241 } 242 243 static void plugin_gen_inject(struct qemu_plugin_tb *plugin_tb) 244 { 245 TCGOp *op, *next; 246 int insn_idx = -1; 247 248 pr_ops(); 249 250 /* 251 * While injecting code, we cannot afford to reuse any ebb temps 252 * that might be live within the existing opcode stream. 253 * The simplest solution is to release them all and create new. 254 */ 255 memset(tcg_ctx->free_temps, 0, sizeof(tcg_ctx->free_temps)); 256 257 QTAILQ_FOREACH_SAFE(op, &tcg_ctx->ops, link, next) { 258 switch (op->opc) { 259 case INDEX_op_insn_start: 260 insn_idx++; 261 break; 262 263 case INDEX_op_plugin_cb: 264 { 265 enum plugin_gen_from from = op->args[0]; 266 struct qemu_plugin_insn *insn = NULL; 267 const GArray *cbs; 268 int i, n; 269 270 if (insn_idx >= 0) { 271 insn = g_ptr_array_index(plugin_tb->insns, insn_idx); 272 } 273 274 tcg_ctx->emit_before_op = op; 275 276 switch (from) { 277 case PLUGIN_GEN_AFTER_TB: 278 if (plugin_tb->mem_helper) { 279 gen_disable_mem_helper(); 280 } 281 break; 282 283 case PLUGIN_GEN_AFTER_INSN: 284 assert(insn != NULL); 285 if (insn->mem_helper) { 286 gen_disable_mem_helper(); 287 } 288 break; 289 290 case PLUGIN_GEN_FROM_TB: 291 assert(insn == NULL); 292 293 cbs = plugin_tb->cbs; 294 for (i = 0, n = (cbs ? cbs->len : 0); i < n; i++) { 295 struct qemu_plugin_dyn_cb *cb = 296 &g_array_index(cbs, struct qemu_plugin_dyn_cb, i); 297 298 switch (cb->type) { 299 case PLUGIN_CB_REGULAR: 300 gen_udata_cb(cb); 301 break; 302 case PLUGIN_CB_INLINE: 303 gen_inline_cb(cb); 304 break; 305 default: 306 g_assert_not_reached(); 307 } 308 } 309 break; 310 311 case PLUGIN_GEN_FROM_INSN: 312 assert(insn != NULL); 313 314 gen_enable_mem_helper(plugin_tb, insn); 315 316 cbs = insn->insn_cbs; 317 for (i = 0, n = (cbs ? cbs->len : 0); i < n; i++) { 318 struct qemu_plugin_dyn_cb *cb = 319 &g_array_index(cbs, struct qemu_plugin_dyn_cb, i); 320 321 switch (cb->type) { 322 case PLUGIN_CB_REGULAR: 323 gen_udata_cb(cb); 324 break; 325 case PLUGIN_CB_INLINE: 326 gen_inline_cb(cb); 327 break; 328 default: 329 g_assert_not_reached(); 330 } 331 } 332 break; 333 334 default: 335 g_assert_not_reached(); 336 } 337 338 tcg_ctx->emit_before_op = NULL; 339 tcg_op_remove(tcg_ctx, op); 340 break; 341 } 342 343 case INDEX_op_plugin_mem_cb: 344 { 345 TCGv_i64 addr = temp_tcgv_i64(arg_temp(op->args[0])); 346 qemu_plugin_meminfo_t meminfo = op->args[1]; 347 struct qemu_plugin_insn *insn; 348 const GArray *cbs; 349 int i, n, rw; 350 351 assert(insn_idx >= 0); 352 insn = g_ptr_array_index(plugin_tb->insns, insn_idx); 353 rw = qemu_plugin_mem_is_store(meminfo) ? 2 : 1; 354 355 tcg_ctx->emit_before_op = op; 356 357 cbs = insn->mem_cbs; 358 for (i = 0, n = (cbs ? cbs->len : 0); i < n; i++) { 359 struct qemu_plugin_dyn_cb *cb = 360 &g_array_index(cbs, struct qemu_plugin_dyn_cb, i); 361 362 if (cb->rw & rw) { 363 switch (cb->type) { 364 case PLUGIN_CB_REGULAR: 365 gen_mem_cb(cb, meminfo, addr); 366 break; 367 case PLUGIN_CB_INLINE: 368 gen_inline_cb(cb); 369 break; 370 default: 371 g_assert_not_reached(); 372 } 373 } 374 } 375 376 tcg_ctx->emit_before_op = NULL; 377 tcg_op_remove(tcg_ctx, op); 378 break; 379 } 380 381 default: 382 /* plugins don't care about any other ops */ 383 break; 384 } 385 } 386 pr_ops(); 387 } 388 389 bool plugin_gen_tb_start(CPUState *cpu, const DisasContextBase *db, 390 bool mem_only) 391 { 392 bool ret = false; 393 394 if (test_bit(QEMU_PLUGIN_EV_VCPU_TB_TRANS, cpu->plugin_state->event_mask)) { 395 struct qemu_plugin_tb *ptb = tcg_ctx->plugin_tb; 396 397 /* reset callbacks */ 398 if (ptb->cbs) { 399 g_array_set_size(ptb->cbs, 0); 400 } 401 ptb->n = 0; 402 403 ret = true; 404 405 ptb->vaddr = db->pc_first; 406 ptb->vaddr2 = -1; 407 ptb->haddr1 = db->host_addr[0]; 408 ptb->haddr2 = NULL; 409 ptb->mem_only = mem_only; 410 ptb->mem_helper = false; 411 412 plugin_gen_empty_callback(PLUGIN_GEN_FROM_TB); 413 } 414 415 tcg_ctx->plugin_insn = NULL; 416 417 return ret; 418 } 419 420 void plugin_gen_insn_start(CPUState *cpu, const DisasContextBase *db) 421 { 422 struct qemu_plugin_tb *ptb = tcg_ctx->plugin_tb; 423 struct qemu_plugin_insn *pinsn; 424 425 pinsn = qemu_plugin_tb_insn_get(ptb, db->pc_next); 426 tcg_ctx->plugin_insn = pinsn; 427 plugin_gen_empty_callback(PLUGIN_GEN_FROM_INSN); 428 429 /* 430 * Detect page crossing to get the new host address. 431 * Note that we skip this when haddr1 == NULL, e.g. when we're 432 * fetching instructions from a region not backed by RAM. 433 */ 434 if (ptb->haddr1 == NULL) { 435 pinsn->haddr = NULL; 436 } else if (is_same_page(db, db->pc_next)) { 437 pinsn->haddr = ptb->haddr1 + pinsn->vaddr - ptb->vaddr; 438 } else { 439 if (ptb->vaddr2 == -1) { 440 ptb->vaddr2 = TARGET_PAGE_ALIGN(db->pc_first); 441 get_page_addr_code_hostp(cpu_env(cpu), ptb->vaddr2, &ptb->haddr2); 442 } 443 pinsn->haddr = ptb->haddr2 + pinsn->vaddr - ptb->vaddr2; 444 } 445 } 446 447 void plugin_gen_insn_end(void) 448 { 449 plugin_gen_empty_callback(PLUGIN_GEN_AFTER_INSN); 450 } 451 452 /* 453 * There are cases where we never get to finalise a translation - for 454 * example a page fault during translation. As a result we shouldn't 455 * do any clean-up here and make sure things are reset in 456 * plugin_gen_tb_start. 457 */ 458 void plugin_gen_tb_end(CPUState *cpu, size_t num_insns) 459 { 460 struct qemu_plugin_tb *ptb = tcg_ctx->plugin_tb; 461 462 /* translator may have removed instructions, update final count */ 463 g_assert(num_insns <= ptb->n); 464 ptb->n = num_insns; 465 466 /* collect instrumentation requests */ 467 qemu_plugin_tb_trans_cb(cpu, ptb); 468 469 /* inject the instrumentation at the appropriate places */ 470 plugin_gen_inject(ptb); 471 } 472