1 /* 2 * Copyright (C) 2019, Alex Bennée <alex.bennee@linaro.org> 3 * 4 * How vectorised is this code? 5 * 6 * Attempt to measure the amount of vectorisation that has been done 7 * on some code by counting classes of instruction. 8 * 9 * License: GNU GPL, version 2 or later. 10 * See the COPYING file in the top-level directory. 11 */ 12 #include <inttypes.h> 13 #include <assert.h> 14 #include <stdlib.h> 15 #include <inttypes.h> 16 #include <string.h> 17 #include <unistd.h> 18 #include <stdio.h> 19 #include <glib.h> 20 21 #include <qemu-plugin.h> 22 23 QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION; 24 25 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) 26 27 typedef enum { 28 COUNT_CLASS, 29 COUNT_INDIVIDUAL, 30 COUNT_NONE 31 } CountType; 32 33 static int limit = 50; 34 static bool do_inline; 35 static bool verbose; 36 37 static GMutex lock; 38 static GHashTable *insns; 39 40 typedef struct { 41 const char *class; 42 const char *opt; 43 uint32_t mask; 44 uint32_t pattern; 45 CountType what; 46 qemu_plugin_u64 count; 47 } InsnClassExecCount; 48 49 typedef struct { 50 char *insn; 51 uint32_t opcode; 52 qemu_plugin_u64 count; 53 InsnClassExecCount *class; 54 } InsnExecCount; 55 56 /* 57 * Matchers for classes of instructions, order is important. 58 * 59 * Your most precise match must be before looser matches. If no match 60 * is found in the table we can create an individual entry. 61 * 62 * 31..28 27..24 23..20 19..16 15..12 11..8 7..4 3..0 63 */ 64 static InsnClassExecCount aarch64_insn_classes[] = { 65 /* "Reserved"" */ 66 { " UDEF", "udef", 0xffff0000, 0x00000000, COUNT_NONE}, 67 { " SVE", "sve", 0x1e000000, 0x04000000, COUNT_CLASS}, 68 { "Reserved", "res", 0x1e000000, 0x00000000, COUNT_CLASS}, 69 /* Data Processing Immediate */ 70 { " PCrel addr", "pcrel", 0x1f000000, 0x10000000, COUNT_CLASS}, 71 { " Add/Sub (imm,tags)", "asit", 0x1f800000, 0x11800000, COUNT_CLASS}, 72 { " Add/Sub (imm)", "asi", 0x1f000000, 0x11000000, COUNT_CLASS}, 73 { " Logical (imm)", "logi", 0x1f800000, 0x12000000, COUNT_CLASS}, 74 { " Move Wide (imm)", "movwi", 0x1f800000, 0x12800000, COUNT_CLASS}, 75 { " Bitfield", "bitf", 0x1f800000, 0x13000000, COUNT_CLASS}, 76 { " Extract", "extr", 0x1f800000, 0x13800000, COUNT_CLASS}, 77 { "Data Proc Imm", "dpri", 0x1c000000, 0x10000000, COUNT_CLASS}, 78 /* Branches */ 79 { " Cond Branch (imm)", "cndb", 0xfe000000, 0x54000000, COUNT_CLASS}, 80 { " Exception Gen", "excp", 0xff000000, 0xd4000000, COUNT_CLASS}, 81 { " NOP", "nop", 0xffffffff, 0xd503201f, COUNT_NONE}, 82 { " Hints", "hint", 0xfffff000, 0xd5032000, COUNT_CLASS}, 83 { " Barriers", "barr", 0xfffff000, 0xd5033000, COUNT_CLASS}, 84 { " PSTATE", "psta", 0xfff8f000, 0xd5004000, COUNT_CLASS}, 85 { " System Insn", "sins", 0xffd80000, 0xd5080000, COUNT_CLASS}, 86 { " System Reg", "sreg", 0xffd00000, 0xd5100000, COUNT_CLASS}, 87 { " Branch (reg)", "breg", 0xfe000000, 0xd6000000, COUNT_CLASS}, 88 { " Branch (imm)", "bimm", 0x7c000000, 0x14000000, COUNT_CLASS}, 89 { " Cmp & Branch", "cmpb", 0x7e000000, 0x34000000, COUNT_CLASS}, 90 { " Tst & Branch", "tstb", 0x7e000000, 0x36000000, COUNT_CLASS}, 91 { "Branches", "branch", 0x1c000000, 0x14000000, COUNT_CLASS}, 92 /* Loads and Stores */ 93 { " AdvSimd ldstmult", "advlsm", 0xbfbf0000, 0x0c000000, COUNT_CLASS}, 94 { " AdvSimd ldstmult++", "advlsmp", 0xbfb00000, 0x0c800000, COUNT_CLASS}, 95 { " AdvSimd ldst", "advlss", 0xbf9f0000, 0x0d000000, COUNT_CLASS}, 96 { " AdvSimd ldst++", "advlssp", 0xbf800000, 0x0d800000, COUNT_CLASS}, 97 { " ldst excl", "ldstx", 0x3f000000, 0x08000000, COUNT_CLASS}, 98 { " Prefetch", "prfm", 0xff000000, 0xd8000000, COUNT_CLASS}, 99 { " Load Reg (lit)", "ldlit", 0x1b000000, 0x18000000, COUNT_CLASS}, 100 { " ldst noalloc pair", "ldstnap", 0x3b800000, 0x28000000, COUNT_CLASS}, 101 { " ldst pair", "ldstp", 0x38000000, 0x28000000, COUNT_CLASS}, 102 { " ldst reg", "ldstr", 0x3b200000, 0x38000000, COUNT_CLASS}, 103 { " Atomic ldst", "atomic", 0x3b200c00, 0x38200000, COUNT_CLASS}, 104 { " ldst reg (reg off)", "ldstro", 0x3b200b00, 0x38200800, COUNT_CLASS}, 105 { " ldst reg (pac)", "ldstpa", 0x3b200200, 0x38200800, COUNT_CLASS}, 106 { " ldst reg (imm)", "ldsti", 0x3b000000, 0x39000000, COUNT_CLASS}, 107 { "Loads & Stores", "ldst", 0x0a000000, 0x08000000, COUNT_CLASS}, 108 /* Data Processing Register */ 109 { "Data Proc Reg", "dprr", 0x0e000000, 0x0a000000, COUNT_CLASS}, 110 /* Scalar FP */ 111 { "Scalar FP ", "fpsimd", 0x0e000000, 0x0e000000, COUNT_CLASS}, 112 /* Unclassified */ 113 { "Unclassified", "unclas", 0x00000000, 0x00000000, COUNT_CLASS}, 114 }; 115 116 static InsnClassExecCount sparc32_insn_classes[] = { 117 { "Call", "call", 0xc0000000, 0x40000000, COUNT_CLASS}, 118 { "Branch ICond", "bcc", 0xc1c00000, 0x00800000, COUNT_CLASS}, 119 { "Branch Fcond", "fbcc", 0xc1c00000, 0x01800000, COUNT_CLASS}, 120 { "SetHi", "sethi", 0xc1c00000, 0x01000000, COUNT_CLASS}, 121 { "FPU ALU", "fpu", 0xc1f00000, 0x81a00000, COUNT_CLASS}, 122 { "ALU", "alu", 0xc0000000, 0x80000000, COUNT_CLASS}, 123 { "Load/Store", "ldst", 0xc0000000, 0xc0000000, COUNT_CLASS}, 124 /* Unclassified */ 125 { "Unclassified", "unclas", 0x00000000, 0x00000000, COUNT_INDIVIDUAL}, 126 }; 127 128 static InsnClassExecCount sparc64_insn_classes[] = { 129 { "SetHi & Branches", "op0", 0xc0000000, 0x00000000, COUNT_CLASS}, 130 { "Call", "op1", 0xc0000000, 0x40000000, COUNT_CLASS}, 131 { "Arith/Logical/Move", "op2", 0xc0000000, 0x80000000, COUNT_CLASS}, 132 { "Arith/Logical/Move", "op3", 0xc0000000, 0xc0000000, COUNT_CLASS}, 133 /* Unclassified */ 134 { "Unclassified", "unclas", 0x00000000, 0x00000000, COUNT_INDIVIDUAL}, 135 }; 136 137 /* Default matcher for currently unclassified architectures */ 138 static InsnClassExecCount default_insn_classes[] = { 139 { "Unclassified", "unclas", 0x00000000, 0x00000000, COUNT_INDIVIDUAL}, 140 }; 141 142 typedef struct { 143 const char *qemu_target; 144 InsnClassExecCount *table; 145 int table_sz; 146 } ClassSelector; 147 148 static ClassSelector class_tables[] = { 149 { "aarch64", aarch64_insn_classes, ARRAY_SIZE(aarch64_insn_classes) }, 150 { "sparc", sparc32_insn_classes, ARRAY_SIZE(sparc32_insn_classes) }, 151 { "sparc64", sparc64_insn_classes, ARRAY_SIZE(sparc64_insn_classes) }, 152 { NULL, default_insn_classes, ARRAY_SIZE(default_insn_classes) }, 153 }; 154 155 static InsnClassExecCount *class_table; 156 static int class_table_sz; 157 158 static gint cmp_exec_count(gconstpointer a, gconstpointer b, gpointer d) 159 { 160 InsnExecCount *ea = (InsnExecCount *) a; 161 InsnExecCount *eb = (InsnExecCount *) b; 162 uint64_t count_a = qemu_plugin_u64_sum(ea->count); 163 uint64_t count_b = qemu_plugin_u64_sum(eb->count); 164 return count_a > count_b ? -1 : 1; 165 } 166 167 static void free_record(gpointer data) 168 { 169 InsnExecCount *rec = (InsnExecCount *) data; 170 qemu_plugin_scoreboard_free(rec->count.score); 171 g_free(rec->insn); 172 g_free(rec); 173 } 174 175 static void plugin_exit(qemu_plugin_id_t id, void *p) 176 { 177 g_autoptr(GString) report = g_string_new("Instruction Classes:\n"); 178 int i; 179 uint64_t total_count; 180 GList *counts; 181 InsnClassExecCount *class = NULL; 182 183 for (i = 0; i < class_table_sz; i++) { 184 class = &class_table[i]; 185 switch (class->what) { 186 case COUNT_CLASS: 187 total_count = qemu_plugin_u64_sum(class->count); 188 if (total_count || verbose) { 189 g_string_append_printf(report, 190 "Class: %-24s\t(%" PRId64 " hits)\n", 191 class->class, 192 total_count); 193 } 194 break; 195 case COUNT_INDIVIDUAL: 196 g_string_append_printf(report, "Class: %-24s\tcounted individually\n", 197 class->class); 198 break; 199 case COUNT_NONE: 200 g_string_append_printf(report, "Class: %-24s\tnot counted\n", 201 class->class); 202 break; 203 default: 204 break; 205 } 206 } 207 208 counts = g_hash_table_get_values(insns); 209 if (counts && g_list_next(counts)) { 210 g_string_append_printf(report, "Individual Instructions:\n"); 211 counts = g_list_sort_with_data(counts, cmp_exec_count, NULL); 212 213 for (i = 0; i < limit && g_list_next(counts); 214 i++, counts = g_list_next(counts)) { 215 InsnExecCount *rec = (InsnExecCount *) counts->data; 216 g_string_append_printf(report, 217 "Instr: %-24s\t(%" PRId64 " hits)" 218 "\t(op=0x%08x/%s)\n", 219 rec->insn, 220 qemu_plugin_u64_sum(rec->count), 221 rec->opcode, 222 rec->class ? 223 rec->class->class : "un-categorised"); 224 } 225 g_list_free(counts); 226 } 227 228 g_hash_table_destroy(insns); 229 for (i = 0; i < ARRAY_SIZE(class_tables); i++) { 230 for (int j = 0; j < class_tables[i].table_sz; ++j) { 231 qemu_plugin_scoreboard_free(class_tables[i].table[j].count.score); 232 } 233 } 234 235 236 qemu_plugin_outs(report->str); 237 } 238 239 static void plugin_init(void) 240 { 241 insns = g_hash_table_new_full(NULL, g_direct_equal, NULL, &free_record); 242 } 243 244 static void vcpu_insn_exec_before(unsigned int cpu_index, void *udata) 245 { 246 struct qemu_plugin_scoreboard *score = udata; 247 qemu_plugin_u64_add(qemu_plugin_scoreboard_u64(score), cpu_index, 1); 248 } 249 250 static struct qemu_plugin_scoreboard *find_counter( 251 struct qemu_plugin_insn *insn) 252 { 253 int i; 254 uint64_t *cnt = NULL; 255 uint32_t opcode = 0; 256 /* if opcode is greater than 32 bits, we should refactor insn hash table. */ 257 G_STATIC_ASSERT(sizeof(opcode) == sizeof(uint32_t)); 258 InsnClassExecCount *class = NULL; 259 260 /* 261 * We only match the first 32 bits of the instruction which is 262 * fine for most RISCs but a bit limiting for CISC architectures. 263 * They would probably benefit from a more tailored plugin. 264 * However we can fall back to individual instruction counting. 265 */ 266 qemu_plugin_insn_data(insn, &opcode, sizeof(opcode)); 267 268 for (i = 0; !cnt && i < class_table_sz; i++) { 269 class = &class_table[i]; 270 uint32_t masked_bits = opcode & class->mask; 271 if (masked_bits == class->pattern) { 272 break; 273 } 274 } 275 276 g_assert(class); 277 278 switch (class->what) { 279 case COUNT_NONE: 280 return NULL; 281 case COUNT_CLASS: 282 return class->count.score; 283 case COUNT_INDIVIDUAL: 284 { 285 InsnExecCount *icount; 286 287 g_mutex_lock(&lock); 288 icount = (InsnExecCount *) g_hash_table_lookup(insns, 289 (gpointer)(intptr_t) opcode); 290 291 if (!icount) { 292 icount = g_new0(InsnExecCount, 1); 293 icount->opcode = opcode; 294 icount->insn = qemu_plugin_insn_disas(insn); 295 icount->class = class; 296 struct qemu_plugin_scoreboard *score = 297 qemu_plugin_scoreboard_new(sizeof(uint64_t)); 298 icount->count = qemu_plugin_scoreboard_u64(score); 299 300 g_hash_table_insert(insns, (gpointer)(intptr_t) opcode, icount); 301 } 302 g_mutex_unlock(&lock); 303 304 return icount->count.score; 305 } 306 default: 307 g_assert_not_reached(); 308 } 309 310 return NULL; 311 } 312 313 static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb) 314 { 315 size_t n = qemu_plugin_tb_n_insns(tb); 316 size_t i; 317 318 for (i = 0; i < n; i++) { 319 struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i); 320 struct qemu_plugin_scoreboard *cnt = find_counter(insn); 321 322 if (cnt) { 323 if (do_inline) { 324 qemu_plugin_register_vcpu_insn_exec_inline_per_vcpu( 325 insn, QEMU_PLUGIN_INLINE_ADD_U64, 326 qemu_plugin_scoreboard_u64(cnt), 1); 327 } else { 328 qemu_plugin_register_vcpu_insn_exec_cb( 329 insn, vcpu_insn_exec_before, QEMU_PLUGIN_CB_NO_REGS, cnt); 330 } 331 } 332 } 333 } 334 335 QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id, 336 const qemu_info_t *info, 337 int argc, char **argv) 338 { 339 int i; 340 341 for (i = 0; i < ARRAY_SIZE(class_tables); i++) { 342 for (int j = 0; j < class_tables[i].table_sz; ++j) { 343 struct qemu_plugin_scoreboard *score = 344 qemu_plugin_scoreboard_new(sizeof(uint64_t)); 345 class_tables[i].table[j].count = qemu_plugin_scoreboard_u64(score); 346 } 347 } 348 349 /* Select a class table appropriate to the guest architecture */ 350 for (i = 0; i < ARRAY_SIZE(class_tables); i++) { 351 ClassSelector *entry = &class_tables[i]; 352 if (!entry->qemu_target || 353 strcmp(entry->qemu_target, info->target_name) == 0) { 354 class_table = entry->table; 355 class_table_sz = entry->table_sz; 356 break; 357 } 358 } 359 360 for (i = 0; i < argc; i++) { 361 char *p = argv[i]; 362 g_auto(GStrv) tokens = g_strsplit(p, "=", -1); 363 if (g_strcmp0(tokens[0], "inline") == 0) { 364 if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &do_inline)) { 365 fprintf(stderr, "boolean argument parsing failed: %s\n", p); 366 return -1; 367 } 368 } else if (g_strcmp0(tokens[0], "verbose") == 0) { 369 if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &verbose)) { 370 fprintf(stderr, "boolean argument parsing failed: %s\n", p); 371 return -1; 372 } 373 } else if (g_strcmp0(tokens[0], "count") == 0) { 374 char *value = tokens[1]; 375 int j; 376 CountType type = COUNT_INDIVIDUAL; 377 if (*value == '!') { 378 type = COUNT_NONE; 379 value++; 380 } 381 for (j = 0; j < class_table_sz; j++) { 382 if (strcmp(value, class_table[j].opt) == 0) { 383 class_table[j].what = type; 384 break; 385 } 386 } 387 } else { 388 fprintf(stderr, "option parsing failed: %s\n", p); 389 return -1; 390 } 391 } 392 393 plugin_init(); 394 395 qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans); 396 qemu_plugin_register_atexit_cb(id, plugin_exit, NULL); 397 return 0; 398 } 399