xref: /qemu/contrib/plugins/howvec.c (revision 08916fd4b6b308941568ecd7305455121ce7c267)
1  /*
2   * Copyright (C) 2019, Alex BennĂ©e <alex.bennee@linaro.org>
3   *
4   * How vectorised is this code?
5   *
6   * Attempt to measure the amount of vectorisation that has been done
7   * on some code by counting classes of instruction.
8   *
9   * License: GNU GPL, version 2 or later.
10   *   See the COPYING file in the top-level directory.
11   */
12  #include <inttypes.h>
13  #include <assert.h>
14  #include <stdlib.h>
15  #include <inttypes.h>
16  #include <string.h>
17  #include <unistd.h>
18  #include <stdio.h>
19  #include <glib.h>
20  
21  #include <qemu-plugin.h>
22  
23  QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
24  
25  #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
26  
27  typedef enum {
28      COUNT_CLASS,
29      COUNT_INDIVIDUAL,
30      COUNT_NONE
31  } CountType;
32  
33  static int limit = 50;
34  static bool do_inline;
35  static bool verbose;
36  
37  static GMutex lock;
38  static GHashTable *insns;
39  
40  typedef struct {
41      const char *class;
42      const char *opt;
43      uint32_t mask;
44      uint32_t pattern;
45      CountType what;
46      qemu_plugin_u64 count;
47  } InsnClassExecCount;
48  
49  typedef struct {
50      char *insn;
51      uint32_t opcode;
52      qemu_plugin_u64 count;
53      InsnClassExecCount *class;
54  } InsnExecCount;
55  
56  /*
57   * Matchers for classes of instructions, order is important.
58   *
59   * Your most precise match must be before looser matches. If no match
60   * is found in the table we can create an individual entry.
61   *
62   * 31..28 27..24 23..20 19..16 15..12 11..8 7..4 3..0
63   */
64  static InsnClassExecCount aarch64_insn_classes[] = {
65      /* "Reserved"" */
66      { "  UDEF",              "udef",   0xffff0000, 0x00000000, COUNT_NONE},
67      { "  SVE",               "sve",    0x1e000000, 0x04000000, COUNT_CLASS},
68      { "Reserved",            "res",    0x1e000000, 0x00000000, COUNT_CLASS},
69      /* Data Processing Immediate */
70      { "  PCrel addr",        "pcrel",  0x1f000000, 0x10000000, COUNT_CLASS},
71      { "  Add/Sub (imm,tags)", "asit",   0x1f800000, 0x11800000, COUNT_CLASS},
72      { "  Add/Sub (imm)",     "asi",    0x1f000000, 0x11000000, COUNT_CLASS},
73      { "  Logical (imm)",     "logi",   0x1f800000, 0x12000000, COUNT_CLASS},
74      { "  Move Wide (imm)",   "movwi",  0x1f800000, 0x12800000, COUNT_CLASS},
75      { "  Bitfield",          "bitf",   0x1f800000, 0x13000000, COUNT_CLASS},
76      { "  Extract",           "extr",   0x1f800000, 0x13800000, COUNT_CLASS},
77      { "Data Proc Imm",       "dpri",   0x1c000000, 0x10000000, COUNT_CLASS},
78      /* Branches */
79      { "  Cond Branch (imm)", "cndb",   0xfe000000, 0x54000000, COUNT_CLASS},
80      { "  Exception Gen",     "excp",   0xff000000, 0xd4000000, COUNT_CLASS},
81      { "    NOP",             "nop",    0xffffffff, 0xd503201f, COUNT_NONE},
82      { "  Hints",             "hint",   0xfffff000, 0xd5032000, COUNT_CLASS},
83      { "  Barriers",          "barr",   0xfffff000, 0xd5033000, COUNT_CLASS},
84      { "  PSTATE",            "psta",   0xfff8f000, 0xd5004000, COUNT_CLASS},
85      { "  System Insn",       "sins",   0xffd80000, 0xd5080000, COUNT_CLASS},
86      { "  System Reg",        "sreg",   0xffd00000, 0xd5100000, COUNT_CLASS},
87      { "  Branch (reg)",      "breg",   0xfe000000, 0xd6000000, COUNT_CLASS},
88      { "  Branch (imm)",      "bimm",   0x7c000000, 0x14000000, COUNT_CLASS},
89      { "  Cmp & Branch",      "cmpb",   0x7e000000, 0x34000000, COUNT_CLASS},
90      { "  Tst & Branch",      "tstb",   0x7e000000, 0x36000000, COUNT_CLASS},
91      { "Branches",            "branch", 0x1c000000, 0x14000000, COUNT_CLASS},
92      /* Loads and Stores */
93      { "  AdvSimd ldstmult",  "advlsm", 0xbfbf0000, 0x0c000000, COUNT_CLASS},
94      { "  AdvSimd ldstmult++", "advlsmp", 0xbfb00000, 0x0c800000, COUNT_CLASS},
95      { "  AdvSimd ldst",      "advlss", 0xbf9f0000, 0x0d000000, COUNT_CLASS},
96      { "  AdvSimd ldst++",    "advlssp", 0xbf800000, 0x0d800000, COUNT_CLASS},
97      { "  ldst excl",         "ldstx",  0x3f000000, 0x08000000, COUNT_CLASS},
98      { "    Prefetch",        "prfm",   0xff000000, 0xd8000000, COUNT_CLASS},
99      { "  Load Reg (lit)",    "ldlit",  0x1b000000, 0x18000000, COUNT_CLASS},
100      { "  ldst noalloc pair", "ldstnap", 0x3b800000, 0x28000000, COUNT_CLASS},
101      { "  ldst pair",         "ldstp",  0x38000000, 0x28000000, COUNT_CLASS},
102      { "  ldst reg",          "ldstr",  0x3b200000, 0x38000000, COUNT_CLASS},
103      { "  Atomic ldst",       "atomic", 0x3b200c00, 0x38200000, COUNT_CLASS},
104      { "  ldst reg (reg off)", "ldstro", 0x3b200b00, 0x38200800, COUNT_CLASS},
105      { "  ldst reg (pac)",    "ldstpa", 0x3b200200, 0x38200800, COUNT_CLASS},
106      { "  ldst reg (imm)",    "ldsti",  0x3b000000, 0x39000000, COUNT_CLASS},
107      { "Loads & Stores",      "ldst",   0x0a000000, 0x08000000, COUNT_CLASS},
108      /* Data Processing Register */
109      { "Data Proc Reg",       "dprr",   0x0e000000, 0x0a000000, COUNT_CLASS},
110      /* Scalar FP */
111      { "Scalar FP ",          "fpsimd", 0x0e000000, 0x0e000000, COUNT_CLASS},
112      /* Unclassified */
113      { "Unclassified",        "unclas", 0x00000000, 0x00000000, COUNT_CLASS},
114  };
115  
116  static InsnClassExecCount sparc32_insn_classes[] = {
117      { "Call",                "call",   0xc0000000, 0x40000000, COUNT_CLASS},
118      { "Branch ICond",        "bcc",    0xc1c00000, 0x00800000, COUNT_CLASS},
119      { "Branch Fcond",        "fbcc",   0xc1c00000, 0x01800000, COUNT_CLASS},
120      { "SetHi",               "sethi",  0xc1c00000, 0x01000000, COUNT_CLASS},
121      { "FPU ALU",             "fpu",    0xc1f00000, 0x81a00000, COUNT_CLASS},
122      { "ALU",                 "alu",    0xc0000000, 0x80000000, COUNT_CLASS},
123      { "Load/Store",          "ldst",   0xc0000000, 0xc0000000, COUNT_CLASS},
124      /* Unclassified */
125      { "Unclassified",        "unclas", 0x00000000, 0x00000000, COUNT_INDIVIDUAL},
126  };
127  
128  static InsnClassExecCount sparc64_insn_classes[] = {
129      { "SetHi & Branches",     "op0",   0xc0000000, 0x00000000, COUNT_CLASS},
130      { "Call",                 "op1",   0xc0000000, 0x40000000, COUNT_CLASS},
131      { "Arith/Logical/Move",   "op2",   0xc0000000, 0x80000000, COUNT_CLASS},
132      { "Arith/Logical/Move",   "op3",   0xc0000000, 0xc0000000, COUNT_CLASS},
133      /* Unclassified */
134      { "Unclassified",        "unclas", 0x00000000, 0x00000000, COUNT_INDIVIDUAL},
135  };
136  
137  /* Default matcher for currently unclassified architectures */
138  static InsnClassExecCount default_insn_classes[] = {
139      { "Unclassified",        "unclas", 0x00000000, 0x00000000, COUNT_INDIVIDUAL},
140  };
141  
142  typedef struct {
143      const char *qemu_target;
144      InsnClassExecCount *table;
145      int table_sz;
146  } ClassSelector;
147  
148  static ClassSelector class_tables[] = {
149      { "aarch64", aarch64_insn_classes, ARRAY_SIZE(aarch64_insn_classes) },
150      { "sparc",   sparc32_insn_classes, ARRAY_SIZE(sparc32_insn_classes) },
151      { "sparc64", sparc64_insn_classes, ARRAY_SIZE(sparc64_insn_classes) },
152      { NULL, default_insn_classes, ARRAY_SIZE(default_insn_classes) },
153  };
154  
155  static InsnClassExecCount *class_table;
156  static int class_table_sz;
157  
158  static gint cmp_exec_count(gconstpointer a, gconstpointer b)
159  {
160      InsnExecCount *ea = (InsnExecCount *) a;
161      InsnExecCount *eb = (InsnExecCount *) b;
162      uint64_t count_a = qemu_plugin_u64_sum(ea->count);
163      uint64_t count_b = qemu_plugin_u64_sum(eb->count);
164      return count_a > count_b ? -1 : 1;
165  }
166  
167  static void free_record(gpointer data)
168  {
169      InsnExecCount *rec = (InsnExecCount *) data;
170      qemu_plugin_scoreboard_free(rec->count.score);
171      g_free(rec->insn);
172      g_free(rec);
173  }
174  
175  static void plugin_exit(qemu_plugin_id_t id, void *p)
176  {
177      g_autoptr(GString) report = g_string_new("Instruction Classes:\n");
178      int i;
179      uint64_t total_count;
180      GList *counts;
181      InsnClassExecCount *class = NULL;
182  
183      for (i = 0; i < class_table_sz; i++) {
184          class = &class_table[i];
185          switch (class->what) {
186          case COUNT_CLASS:
187              total_count = qemu_plugin_u64_sum(class->count);
188              if (total_count || verbose) {
189                  g_string_append_printf(report,
190                                         "Class: %-24s\t(%" PRId64 " hits)\n",
191                                         class->class,
192                                         total_count);
193              }
194              break;
195          case COUNT_INDIVIDUAL:
196              g_string_append_printf(report, "Class: %-24s\tcounted individually\n",
197                                     class->class);
198              break;
199          case COUNT_NONE:
200              g_string_append_printf(report, "Class: %-24s\tnot counted\n",
201                                     class->class);
202              break;
203          default:
204              break;
205          }
206      }
207  
208      counts = g_hash_table_get_values(insns);
209      if (counts && g_list_next(counts)) {
210          g_string_append_printf(report, "Individual Instructions:\n");
211          counts = g_list_sort(counts, cmp_exec_count);
212  
213          for (i = 0; i < limit && g_list_next(counts);
214               i++, counts = g_list_next(counts)) {
215              InsnExecCount *rec = (InsnExecCount *) counts->data;
216              g_string_append_printf(report,
217                                     "Instr: %-24s\t(%" PRId64 " hits)"
218                                     "\t(op=0x%08x/%s)\n",
219                                     rec->insn,
220                                     qemu_plugin_u64_sum(rec->count),
221                                     rec->opcode,
222                                     rec->class ?
223                                     rec->class->class : "un-categorised");
224          }
225          g_list_free(counts);
226      }
227  
228      g_hash_table_destroy(insns);
229      for (i = 0; i < ARRAY_SIZE(class_tables); i++) {
230          for (int j = 0; j < class_tables[i].table_sz; ++j) {
231              qemu_plugin_scoreboard_free(class_tables[i].table[j].count.score);
232          }
233      }
234  
235  
236      qemu_plugin_outs(report->str);
237  }
238  
239  static void plugin_init(void)
240  {
241      insns = g_hash_table_new_full(NULL, g_direct_equal, NULL, &free_record);
242  }
243  
244  static void vcpu_insn_exec_before(unsigned int cpu_index, void *udata)
245  {
246      struct qemu_plugin_scoreboard *score = udata;
247      qemu_plugin_u64_add(qemu_plugin_scoreboard_u64(score), cpu_index, 1);
248  }
249  
250  static struct qemu_plugin_scoreboard *find_counter(
251      struct qemu_plugin_insn *insn)
252  {
253      int i;
254      uint64_t *cnt = NULL;
255      uint32_t opcode = 0;
256      /* if opcode is greater than 32 bits, we should refactor insn hash table. */
257      G_STATIC_ASSERT(sizeof(opcode) == sizeof(uint32_t));
258      InsnClassExecCount *class = NULL;
259  
260      /*
261       * We only match the first 32 bits of the instruction which is
262       * fine for most RISCs but a bit limiting for CISC architectures.
263       * They would probably benefit from a more tailored plugin.
264       * However we can fall back to individual instruction counting.
265       */
266      qemu_plugin_insn_data(insn, &opcode, sizeof(opcode));
267  
268      for (i = 0; !cnt && i < class_table_sz; i++) {
269          class = &class_table[i];
270          uint32_t masked_bits = opcode & class->mask;
271          if (masked_bits == class->pattern) {
272              break;
273          }
274      }
275  
276      g_assert(class);
277  
278      switch (class->what) {
279      case COUNT_NONE:
280          return NULL;
281      case COUNT_CLASS:
282          return class->count.score;
283      case COUNT_INDIVIDUAL:
284      {
285          InsnExecCount *icount;
286  
287          g_mutex_lock(&lock);
288          icount = (InsnExecCount *) g_hash_table_lookup(insns,
289                                                         (gpointer)(intptr_t) opcode);
290  
291          if (!icount) {
292              icount = g_new0(InsnExecCount, 1);
293              icount->opcode = opcode;
294              icount->insn = qemu_plugin_insn_disas(insn);
295              icount->class = class;
296              struct qemu_plugin_scoreboard *score =
297                  qemu_plugin_scoreboard_new(sizeof(uint64_t));
298              icount->count = qemu_plugin_scoreboard_u64(score);
299  
300              g_hash_table_insert(insns, (gpointer)(intptr_t) opcode, icount);
301          }
302          g_mutex_unlock(&lock);
303  
304          return icount->count.score;
305      }
306      default:
307          g_assert_not_reached();
308      }
309  
310      return NULL;
311  }
312  
313  static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
314  {
315      size_t n = qemu_plugin_tb_n_insns(tb);
316      size_t i;
317  
318      for (i = 0; i < n; i++) {
319          struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i);
320          struct qemu_plugin_scoreboard *cnt = find_counter(insn);
321  
322          if (cnt) {
323              if (do_inline) {
324                  qemu_plugin_register_vcpu_insn_exec_inline_per_vcpu(
325                      insn, QEMU_PLUGIN_INLINE_ADD_U64,
326                      qemu_plugin_scoreboard_u64(cnt), 1);
327              } else {
328                  qemu_plugin_register_vcpu_insn_exec_cb(
329                      insn, vcpu_insn_exec_before, QEMU_PLUGIN_CB_NO_REGS, cnt);
330              }
331          }
332      }
333  }
334  
335  QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
336                                             const qemu_info_t *info,
337                                             int argc, char **argv)
338  {
339      int i;
340  
341      for (i = 0; i < ARRAY_SIZE(class_tables); i++) {
342          for (int j = 0; j < class_tables[i].table_sz; ++j) {
343              struct qemu_plugin_scoreboard *score =
344                  qemu_plugin_scoreboard_new(sizeof(uint64_t));
345              class_tables[i].table[j].count = qemu_plugin_scoreboard_u64(score);
346          }
347      }
348  
349      /* Select a class table appropriate to the guest architecture */
350      for (i = 0; i < ARRAY_SIZE(class_tables); i++) {
351          ClassSelector *entry = &class_tables[i];
352          if (!entry->qemu_target ||
353              strcmp(entry->qemu_target, info->target_name) == 0) {
354              class_table = entry->table;
355              class_table_sz = entry->table_sz;
356              break;
357          }
358      }
359  
360      for (i = 0; i < argc; i++) {
361          char *p = argv[i];
362          g_auto(GStrv) tokens = g_strsplit(p, "=", -1);
363          if (g_strcmp0(tokens[0], "inline") == 0) {
364              if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &do_inline)) {
365                  fprintf(stderr, "boolean argument parsing failed: %s\n", p);
366                  return -1;
367              }
368          } else if (g_strcmp0(tokens[0], "verbose") == 0) {
369              if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &verbose)) {
370                  fprintf(stderr, "boolean argument parsing failed: %s\n", p);
371                  return -1;
372              }
373          } else if (g_strcmp0(tokens[0], "count") == 0) {
374              char *value = tokens[1];
375              int j;
376              CountType type = COUNT_INDIVIDUAL;
377              if (*value == '!') {
378                  type = COUNT_NONE;
379                  value++;
380              }
381              for (j = 0; j < class_table_sz; j++) {
382                  if (strcmp(value, class_table[j].opt) == 0) {
383                      class_table[j].what = type;
384                      break;
385                  }
386              }
387          } else {
388              fprintf(stderr, "option parsing failed: %s\n", p);
389              return -1;
390          }
391      }
392  
393      plugin_init();
394  
395      qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
396      qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
397      return 0;
398  }
399