xref: /qemu/accel/tcg/plugin-gen.c (revision ccd8f17e02d70c4e9e1d7f3b8ee0f80dd58dc979)
1 /*
2  * plugin-gen.c - TCG-related bits of plugin infrastructure
3  *
4  * Copyright (C) 2018, Emilio G. Cota <cota@braap.org>
5  * License: GNU GPL, version 2 or later.
6  *   See the COPYING file in the top-level directory.
7  *
8  * We support instrumentation at an instruction granularity. That is,
9  * if a plugin wants to instrument the memory accesses performed by a
10  * particular instruction, it can just do that instead of instrumenting
11  * all memory accesses. Thus, in order to do this we first have to
12  * translate a TB, so that plugins can decide what/where to instrument.
13  *
14  * Injecting the desired instrumentation could be done with a second
15  * translation pass that combined the instrumentation requests, but that
16  * would be ugly and inefficient since we would decode the guest code twice.
17  * Instead, during TB translation we add "empty" instrumentation calls for all
18  * possible instrumentation events, and then once we collect the instrumentation
19  * requests from plugins, we either "fill in" those empty events or remove them
20  * if they have no requests.
21  *
22  * When "filling in" an event we first copy the empty callback's TCG ops. This
23  * might seem unnecessary, but it is done to support an arbitrary number
24  * of callbacks per event. Take for example a regular instruction callback.
25  * We first generate a callback to an empty helper function. Then, if two
26  * plugins register one callback each for this instruction, we make two copies
27  * of the TCG ops generated for the empty callback, substituting the function
28  * pointer that points to the empty helper function with the plugins' desired
29  * callback functions. After that we remove the empty callback's ops.
30  *
31  * Note that the location in TCGOp.args[] of the pointer to a helper function
32  * varies across different guest and host architectures. Instead of duplicating
33  * the logic that figures this out, we rely on the fact that the empty
34  * callbacks point to empty functions that are unique pointers in the program.
35  * Thus, to find the right location we just have to look for a match in
36  * TCGOp.args[]. This is the main reason why we first copy an empty callback's
37  * TCG ops and then fill them in; regardless of whether we have one or many
38  * callbacks for that event, the logic to add all of them is the same.
39  *
40  * When generating more than one callback per event, we make a small
41  * optimization to avoid generating redundant operations. For instance, for the
42  * second and all subsequent callbacks of an event, we do not need to reload the
43  * CPU's index into a TCG temp, since the first callback did it already.
44  */
45 #include "qemu/osdep.h"
46 #include "qemu/plugin.h"
47 #include "cpu.h"
48 #include "tcg/tcg.h"
49 #include "tcg/tcg-temp-internal.h"
50 #include "tcg/tcg-op.h"
51 #include "exec/exec-all.h"
52 #include "exec/plugin-gen.h"
53 #include "exec/translator.h"
54 
55 enum plugin_gen_from {
56     PLUGIN_GEN_FROM_TB,
57     PLUGIN_GEN_FROM_INSN,
58     PLUGIN_GEN_AFTER_INSN,
59     PLUGIN_GEN_AFTER_TB,
60 };
61 
62 static void plugin_gen_empty_callback(enum plugin_gen_from from)
63 {
64     switch (from) {
65     case PLUGIN_GEN_AFTER_INSN:
66     case PLUGIN_GEN_FROM_TB:
67     case PLUGIN_GEN_FROM_INSN:
68         tcg_gen_plugin_cb(from);
69         break;
70     default:
71         g_assert_not_reached();
72     }
73 }
74 
75 /* called before finishing a TB with exit_tb, goto_tb or goto_ptr */
76 void plugin_gen_disable_mem_helpers(void)
77 {
78     if (tcg_ctx->plugin_insn) {
79         tcg_gen_plugin_cb(PLUGIN_GEN_AFTER_TB);
80     }
81 }
82 
83 static void gen_enable_mem_helper(struct qemu_plugin_tb *ptb,
84                                   struct qemu_plugin_insn *insn)
85 {
86     GArray *arr;
87     size_t len;
88 
89     /*
90      * Tracking memory accesses performed from helpers requires extra work.
91      * If an instruction is emulated with helpers, we do two things:
92      * (1) copy the CB descriptors, and keep track of it so that they can be
93      * freed later on, and (2) point CPUState.plugin_mem_cbs to the
94      * descriptors, so that we can read them at run-time
95      * (i.e. when the helper executes).
96      * This run-time access is performed from qemu_plugin_vcpu_mem_cb.
97      *
98      * Note that plugin_gen_disable_mem_helpers undoes (2). Since it
99      * is possible that the code we generate after the instruction is
100      * dead, we also add checks before generating tb_exit etc.
101      */
102     if (!insn->calls_helpers) {
103         return;
104     }
105 
106     if (!insn->mem_cbs || !insn->mem_cbs->len) {
107         insn->mem_helper = false;
108         return;
109     }
110     insn->mem_helper = true;
111     ptb->mem_helper = true;
112 
113     /*
114      * TODO: It seems like we should be able to use ref/unref
115      * to avoid needing to actually copy this array.
116      * Alternately, perhaps we could allocate new memory adjacent
117      * to the TranslationBlock itself, so that we do not have to
118      * actively manage the lifetime after this.
119      */
120     len = insn->mem_cbs->len;
121     arr = g_array_sized_new(false, false,
122                             sizeof(struct qemu_plugin_dyn_cb), len);
123     memcpy(arr->data, insn->mem_cbs->data,
124            len * sizeof(struct qemu_plugin_dyn_cb));
125     qemu_plugin_add_dyn_cb_arr(arr);
126 
127     tcg_gen_st_ptr(tcg_constant_ptr((intptr_t)arr), tcg_env,
128                    offsetof(CPUState, plugin_mem_cbs) -
129                    offsetof(ArchCPU, env));
130 }
131 
132 static void gen_disable_mem_helper(void)
133 {
134     tcg_gen_st_ptr(tcg_constant_ptr(0), tcg_env,
135                    offsetof(CPUState, plugin_mem_cbs) -
136                    offsetof(ArchCPU, env));
137 }
138 
139 static void gen_udata_cb(struct qemu_plugin_dyn_cb *cb)
140 {
141     TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
142 
143     tcg_gen_ld_i32(cpu_index, tcg_env,
144                    -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
145     tcg_gen_call2(cb->regular.f.vcpu_udata, cb->regular.info, NULL,
146                   tcgv_i32_temp(cpu_index),
147                   tcgv_ptr_temp(tcg_constant_ptr(cb->userp)));
148     tcg_temp_free_i32(cpu_index);
149 }
150 
151 static void gen_inline_cb(struct qemu_plugin_dyn_cb *cb)
152 {
153     GArray *arr = cb->inline_insn.entry.score->data;
154     size_t offset = cb->inline_insn.entry.offset;
155     TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
156     TCGv_i64 val = tcg_temp_ebb_new_i64();
157     TCGv_ptr ptr = tcg_temp_ebb_new_ptr();
158 
159     tcg_gen_ld_i32(cpu_index, tcg_env,
160                    -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
161     tcg_gen_muli_i32(cpu_index, cpu_index, g_array_get_element_size(arr));
162     tcg_gen_ext_i32_ptr(ptr, cpu_index);
163     tcg_temp_free_i32(cpu_index);
164 
165     tcg_gen_addi_ptr(ptr, ptr, (intptr_t)arr->data);
166     tcg_gen_ld_i64(val, ptr, offset);
167     tcg_gen_addi_i64(val, val, cb->inline_insn.imm);
168     tcg_gen_st_i64(val, ptr, offset);
169 
170     tcg_temp_free_i64(val);
171     tcg_temp_free_ptr(ptr);
172 }
173 
174 static void gen_mem_cb(struct qemu_plugin_dyn_cb *cb,
175                        qemu_plugin_meminfo_t meminfo, TCGv_i64 addr)
176 {
177     TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
178 
179     tcg_gen_ld_i32(cpu_index, tcg_env,
180                    -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
181     tcg_gen_call4(cb->regular.f.vcpu_mem, cb->regular.info, NULL,
182                   tcgv_i32_temp(cpu_index),
183                   tcgv_i32_temp(tcg_constant_i32(meminfo)),
184                   tcgv_i64_temp(addr),
185                   tcgv_ptr_temp(tcg_constant_ptr(cb->userp)));
186     tcg_temp_free_i32(cpu_index);
187 }
188 
189 /* #define DEBUG_PLUGIN_GEN_OPS */
190 static void pr_ops(void)
191 {
192 #ifdef DEBUG_PLUGIN_GEN_OPS
193     TCGOp *op;
194     int i = 0;
195 
196     QTAILQ_FOREACH(op, &tcg_ctx->ops, link) {
197         const char *name = "";
198         const char *type = "";
199 
200         if (op->opc == INDEX_op_plugin_cb_start) {
201             switch (op->args[0]) {
202             case PLUGIN_GEN_FROM_TB:
203                 name = "tb";
204                 break;
205             case PLUGIN_GEN_FROM_INSN:
206                 name = "insn";
207                 break;
208             case PLUGIN_GEN_FROM_MEM:
209                 name = "mem";
210                 break;
211             case PLUGIN_GEN_AFTER_INSN:
212                 name = "after insn";
213                 break;
214             default:
215                 break;
216             }
217             switch (op->args[1]) {
218             case PLUGIN_GEN_CB_UDATA:
219                 type = "udata";
220                 break;
221             case PLUGIN_GEN_CB_INLINE:
222                 type = "inline";
223                 break;
224             case PLUGIN_GEN_CB_MEM:
225                 type = "mem";
226                 break;
227             case PLUGIN_GEN_ENABLE_MEM_HELPER:
228                 type = "enable mem helper";
229                 break;
230             case PLUGIN_GEN_DISABLE_MEM_HELPER:
231                 type = "disable mem helper";
232                 break;
233             default:
234                 break;
235             }
236         }
237         printf("op[%2i]: %s %s %s\n", i, tcg_op_defs[op->opc].name, name, type);
238         i++;
239     }
240 #endif
241 }
242 
243 static void plugin_gen_inject(struct qemu_plugin_tb *plugin_tb)
244 {
245     TCGOp *op, *next;
246     int insn_idx = -1;
247 
248     pr_ops();
249 
250     /*
251      * While injecting code, we cannot afford to reuse any ebb temps
252      * that might be live within the existing opcode stream.
253      * The simplest solution is to release them all and create new.
254      */
255     memset(tcg_ctx->free_temps, 0, sizeof(tcg_ctx->free_temps));
256 
257     QTAILQ_FOREACH_SAFE(op, &tcg_ctx->ops, link, next) {
258         switch (op->opc) {
259         case INDEX_op_insn_start:
260             insn_idx++;
261             break;
262 
263         case INDEX_op_plugin_cb:
264         {
265             enum plugin_gen_from from = op->args[0];
266             struct qemu_plugin_insn *insn = NULL;
267             const GArray *cbs;
268             int i, n;
269 
270             if (insn_idx >= 0) {
271                 insn = g_ptr_array_index(plugin_tb->insns, insn_idx);
272             }
273 
274             tcg_ctx->emit_before_op = op;
275 
276             switch (from) {
277             case PLUGIN_GEN_AFTER_TB:
278                 if (plugin_tb->mem_helper) {
279                     gen_disable_mem_helper();
280                 }
281                 break;
282 
283             case PLUGIN_GEN_AFTER_INSN:
284                 assert(insn != NULL);
285                 if (insn->mem_helper) {
286                     gen_disable_mem_helper();
287                 }
288                 break;
289 
290             case PLUGIN_GEN_FROM_TB:
291                 assert(insn == NULL);
292 
293                 cbs = plugin_tb->cbs;
294                 for (i = 0, n = (cbs ? cbs->len : 0); i < n; i++) {
295                     struct qemu_plugin_dyn_cb *cb =
296                         &g_array_index(cbs, struct qemu_plugin_dyn_cb, i);
297 
298                     switch (cb->type) {
299                     case PLUGIN_CB_REGULAR:
300                         gen_udata_cb(cb);
301                         break;
302                     case PLUGIN_CB_INLINE:
303                         gen_inline_cb(cb);
304                         break;
305                     default:
306                         g_assert_not_reached();
307                     }
308                 }
309                 break;
310 
311             case PLUGIN_GEN_FROM_INSN:
312                 assert(insn != NULL);
313 
314                 gen_enable_mem_helper(plugin_tb, insn);
315 
316                 cbs = insn->insn_cbs;
317                 for (i = 0, n = (cbs ? cbs->len : 0); i < n; i++) {
318                     struct qemu_plugin_dyn_cb *cb =
319                         &g_array_index(cbs, struct qemu_plugin_dyn_cb, i);
320 
321                     switch (cb->type) {
322                     case PLUGIN_CB_REGULAR:
323                         gen_udata_cb(cb);
324                         break;
325                     case PLUGIN_CB_INLINE:
326                         gen_inline_cb(cb);
327                         break;
328                     default:
329                         g_assert_not_reached();
330                     }
331                 }
332                 break;
333 
334             default:
335                 g_assert_not_reached();
336             }
337 
338             tcg_ctx->emit_before_op = NULL;
339             tcg_op_remove(tcg_ctx, op);
340             break;
341         }
342 
343         case INDEX_op_plugin_mem_cb:
344         {
345             TCGv_i64 addr = temp_tcgv_i64(arg_temp(op->args[0]));
346             qemu_plugin_meminfo_t meminfo = op->args[1];
347             struct qemu_plugin_insn *insn;
348             const GArray *cbs;
349             int i, n, rw;
350 
351             assert(insn_idx >= 0);
352             insn = g_ptr_array_index(plugin_tb->insns, insn_idx);
353             rw = qemu_plugin_mem_is_store(meminfo) ? 2 : 1;
354 
355             tcg_ctx->emit_before_op = op;
356 
357             cbs = insn->mem_cbs;
358             for (i = 0, n = (cbs ? cbs->len : 0); i < n; i++) {
359                 struct qemu_plugin_dyn_cb *cb =
360                     &g_array_index(cbs, struct qemu_plugin_dyn_cb, i);
361 
362                 if (cb->rw & rw) {
363                     switch (cb->type) {
364                     case PLUGIN_CB_MEM_REGULAR:
365                         gen_mem_cb(cb, meminfo, addr);
366                         break;
367                     case PLUGIN_CB_INLINE:
368                         gen_inline_cb(cb);
369                         break;
370                     default:
371                         g_assert_not_reached();
372                     }
373                 }
374             }
375 
376             tcg_ctx->emit_before_op = NULL;
377             tcg_op_remove(tcg_ctx, op);
378             break;
379         }
380 
381         default:
382             /* plugins don't care about any other ops */
383             break;
384         }
385     }
386     pr_ops();
387 }
388 
389 bool plugin_gen_tb_start(CPUState *cpu, const DisasContextBase *db,
390                          bool mem_only)
391 {
392     bool ret = false;
393 
394     if (test_bit(QEMU_PLUGIN_EV_VCPU_TB_TRANS, cpu->plugin_state->event_mask)) {
395         struct qemu_plugin_tb *ptb = tcg_ctx->plugin_tb;
396 
397         /* reset callbacks */
398         if (ptb->cbs) {
399             g_array_set_size(ptb->cbs, 0);
400         }
401         ptb->n = 0;
402 
403         ret = true;
404 
405         ptb->vaddr = db->pc_first;
406         ptb->vaddr2 = -1;
407         ptb->haddr1 = db->host_addr[0];
408         ptb->haddr2 = NULL;
409         ptb->mem_only = mem_only;
410         ptb->mem_helper = false;
411 
412         plugin_gen_empty_callback(PLUGIN_GEN_FROM_TB);
413     }
414 
415     tcg_ctx->plugin_insn = NULL;
416 
417     return ret;
418 }
419 
420 void plugin_gen_insn_start(CPUState *cpu, const DisasContextBase *db)
421 {
422     struct qemu_plugin_tb *ptb = tcg_ctx->plugin_tb;
423     struct qemu_plugin_insn *pinsn;
424 
425     pinsn = qemu_plugin_tb_insn_get(ptb, db->pc_next);
426     tcg_ctx->plugin_insn = pinsn;
427     plugin_gen_empty_callback(PLUGIN_GEN_FROM_INSN);
428 
429     /*
430      * Detect page crossing to get the new host address.
431      * Note that we skip this when haddr1 == NULL, e.g. when we're
432      * fetching instructions from a region not backed by RAM.
433      */
434     if (ptb->haddr1 == NULL) {
435         pinsn->haddr = NULL;
436     } else if (is_same_page(db, db->pc_next)) {
437         pinsn->haddr = ptb->haddr1 + pinsn->vaddr - ptb->vaddr;
438     } else {
439         if (ptb->vaddr2 == -1) {
440             ptb->vaddr2 = TARGET_PAGE_ALIGN(db->pc_first);
441             get_page_addr_code_hostp(cpu_env(cpu), ptb->vaddr2, &ptb->haddr2);
442         }
443         pinsn->haddr = ptb->haddr2 + pinsn->vaddr - ptb->vaddr2;
444     }
445 }
446 
447 void plugin_gen_insn_end(void)
448 {
449     plugin_gen_empty_callback(PLUGIN_GEN_AFTER_INSN);
450 }
451 
452 /*
453  * There are cases where we never get to finalise a translation - for
454  * example a page fault during translation. As a result we shouldn't
455  * do any clean-up here and make sure things are reset in
456  * plugin_gen_tb_start.
457  */
458 void plugin_gen_tb_end(CPUState *cpu, size_t num_insns)
459 {
460     struct qemu_plugin_tb *ptb = tcg_ctx->plugin_tb;
461 
462     /* translator may have removed instructions, update final count */
463     g_assert(num_insns <= ptb->n);
464     ptb->n = num_insns;
465 
466     /* collect instrumentation requests */
467     qemu_plugin_tb_trans_cb(cpu, ptb);
468 
469     /* inject the instrumentation at the appropriate places */
470     plugin_gen_inject(ptb);
471 }
472