1 /*
2 * Copyright (C) 2021, Mahmoud Mandour <ma.mandourr@gmail.com>
3 *
4 * License: GNU GPL, version 2 or later.
5 * See the COPYING file in the top-level directory.
6 */
7
8 #include <inttypes.h>
9 #include <stdio.h>
10 #include <glib.h>
11
12 #include <qemu-plugin.h>
13
14 #define STRTOLL(x) g_ascii_strtoll(x, NULL, 10)
15
16 QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
17
18 static enum qemu_plugin_mem_rw rw = QEMU_PLUGIN_MEM_RW;
19
20 static GHashTable *miss_ht;
21
22 static GMutex hashtable_lock;
23 static GRand *rng;
24
25 static int limit;
26 static bool sys;
27
28 enum EvictionPolicy {
29 LRU,
30 FIFO,
31 RAND,
32 };
33
34 enum EvictionPolicy policy;
35
36 /*
37 * A CacheSet is a set of cache blocks. A memory block that maps to a set can be
38 * put in any of the blocks inside the set. The number of block per set is
39 * called the associativity (assoc).
40 *
41 * Each block contains the stored tag and a valid bit. Since this is not
42 * a functional simulator, the data itself is not stored. We only identify
43 * whether a block is in the cache or not by searching for its tag.
44 *
45 * In order to search for memory data in the cache, the set identifier and tag
46 * are extracted from the address and the set is probed to see whether a tag
47 * match occur.
48 *
49 * An address is logically divided into three portions: The block offset,
50 * the set number, and the tag.
51 *
52 * The set number is used to identify the set in which the block may exist.
53 * The tag is compared against all the tags of a set to search for a match. If a
54 * match is found, then the access is a hit.
55 *
56 * The CacheSet also contains bookkeaping information about eviction details.
57 */
58
59 typedef struct {
60 uint64_t tag;
61 bool valid;
62 } CacheBlock;
63
64 typedef struct {
65 CacheBlock *blocks;
66 uint64_t *lru_priorities;
67 uint64_t lru_gen_counter;
68 GQueue *fifo_queue;
69 } CacheSet;
70
71 typedef struct {
72 CacheSet *sets;
73 int num_sets;
74 int cachesize;
75 int assoc;
76 int blksize_shift;
77 uint64_t set_mask;
78 uint64_t tag_mask;
79 uint64_t accesses;
80 uint64_t misses;
81 } Cache;
82
83 typedef struct {
84 char *disas_str;
85 const char *symbol;
86 uint64_t addr;
87 uint64_t l1_dmisses;
88 uint64_t l1_imisses;
89 uint64_t l2_misses;
90 } InsnData;
91
92 void (*update_hit)(Cache *cache, int set, int blk);
93 void (*update_miss)(Cache *cache, int set, int blk);
94
95 void (*metadata_init)(Cache *cache);
96 void (*metadata_destroy)(Cache *cache);
97
98 static int cores;
99 static Cache **l1_dcaches, **l1_icaches;
100
101 static bool use_l2;
102 static Cache **l2_ucaches;
103
104 static GMutex *l1_dcache_locks;
105 static GMutex *l1_icache_locks;
106 static GMutex *l2_ucache_locks;
107
108 static uint64_t l1_dmem_accesses;
109 static uint64_t l1_imem_accesses;
110 static uint64_t l1_imisses;
111 static uint64_t l1_dmisses;
112
113 static uint64_t l2_mem_accesses;
114 static uint64_t l2_misses;
115
pow_of_two(int num)116 static int pow_of_two(int num)
117 {
118 g_assert((num & (num - 1)) == 0);
119 int ret = 0;
120 while (num /= 2) {
121 ret++;
122 }
123 return ret;
124 }
125
126 /*
127 * LRU evection policy: For each set, a generation counter is maintained
128 * alongside a priority array.
129 *
130 * On each set access, the generation counter is incremented.
131 *
132 * On a cache hit: The hit-block is assigned the current generation counter,
133 * indicating that it is the most recently used block.
134 *
135 * On a cache miss: The block with the least priority is searched and replaced
136 * with the newly-cached block, of which the priority is set to the current
137 * generation number.
138 */
139
lru_priorities_init(Cache * cache)140 static void lru_priorities_init(Cache *cache)
141 {
142 int i;
143
144 for (i = 0; i < cache->num_sets; i++) {
145 cache->sets[i].lru_priorities = g_new0(uint64_t, cache->assoc);
146 cache->sets[i].lru_gen_counter = 0;
147 }
148 }
149
lru_update_blk(Cache * cache,int set_idx,int blk_idx)150 static void lru_update_blk(Cache *cache, int set_idx, int blk_idx)
151 {
152 CacheSet *set = &cache->sets[set_idx];
153 set->lru_priorities[blk_idx] = cache->sets[set_idx].lru_gen_counter;
154 set->lru_gen_counter++;
155 }
156
lru_get_lru_block(Cache * cache,int set_idx)157 static int lru_get_lru_block(Cache *cache, int set_idx)
158 {
159 int i, min_idx, min_priority;
160
161 min_priority = cache->sets[set_idx].lru_priorities[0];
162 min_idx = 0;
163
164 for (i = 1; i < cache->assoc; i++) {
165 if (cache->sets[set_idx].lru_priorities[i] < min_priority) {
166 min_priority = cache->sets[set_idx].lru_priorities[i];
167 min_idx = i;
168 }
169 }
170 return min_idx;
171 }
172
lru_priorities_destroy(Cache * cache)173 static void lru_priorities_destroy(Cache *cache)
174 {
175 int i;
176
177 for (i = 0; i < cache->num_sets; i++) {
178 g_free(cache->sets[i].lru_priorities);
179 }
180 }
181
182 /*
183 * FIFO eviction policy: a FIFO queue is maintained for each CacheSet that
184 * stores accesses to the cache.
185 *
186 * On a compulsory miss: The block index is enqueued to the fifo_queue to
187 * indicate that it's the latest cached block.
188 *
189 * On a conflict miss: The first-in block is removed from the cache and the new
190 * block is put in its place and enqueued to the FIFO queue.
191 */
192
fifo_init(Cache * cache)193 static void fifo_init(Cache *cache)
194 {
195 int i;
196
197 for (i = 0; i < cache->num_sets; i++) {
198 cache->sets[i].fifo_queue = g_queue_new();
199 }
200 }
201
fifo_get_first_block(Cache * cache,int set)202 static int fifo_get_first_block(Cache *cache, int set)
203 {
204 GQueue *q = cache->sets[set].fifo_queue;
205 return GPOINTER_TO_INT(g_queue_pop_tail(q));
206 }
207
fifo_update_on_miss(Cache * cache,int set,int blk_idx)208 static void fifo_update_on_miss(Cache *cache, int set, int blk_idx)
209 {
210 GQueue *q = cache->sets[set].fifo_queue;
211 g_queue_push_head(q, (gpointer)(intptr_t) blk_idx);
212 }
213
fifo_destroy(Cache * cache)214 static void fifo_destroy(Cache *cache)
215 {
216 int i;
217
218 for (i = 0; i < cache->num_sets; i++) {
219 g_queue_free(cache->sets[i].fifo_queue);
220 }
221 }
222
extract_tag(Cache * cache,uint64_t addr)223 static inline uint64_t extract_tag(Cache *cache, uint64_t addr)
224 {
225 return addr & cache->tag_mask;
226 }
227
extract_set(Cache * cache,uint64_t addr)228 static inline uint64_t extract_set(Cache *cache, uint64_t addr)
229 {
230 return (addr & cache->set_mask) >> cache->blksize_shift;
231 }
232
cache_config_error(int blksize,int assoc,int cachesize)233 static const char *cache_config_error(int blksize, int assoc, int cachesize)
234 {
235 if (cachesize % blksize != 0) {
236 return "cache size must be divisible by block size";
237 } else if (cachesize % (blksize * assoc) != 0) {
238 return "cache size must be divisible by set size (assoc * block size)";
239 } else {
240 return NULL;
241 }
242 }
243
bad_cache_params(int blksize,int assoc,int cachesize)244 static bool bad_cache_params(int blksize, int assoc, int cachesize)
245 {
246 return (cachesize % blksize) != 0 || (cachesize % (blksize * assoc) != 0);
247 }
248
cache_init(int blksize,int assoc,int cachesize)249 static Cache *cache_init(int blksize, int assoc, int cachesize)
250 {
251 Cache *cache;
252 int i;
253 uint64_t blk_mask;
254
255 /*
256 * This function shall not be called directly, and hence expects suitable
257 * parameters.
258 */
259 g_assert(!bad_cache_params(blksize, assoc, cachesize));
260
261 cache = g_new(Cache, 1);
262 cache->assoc = assoc;
263 cache->cachesize = cachesize;
264 cache->num_sets = cachesize / (blksize * assoc);
265 cache->sets = g_new(CacheSet, cache->num_sets);
266 cache->blksize_shift = pow_of_two(blksize);
267 cache->accesses = 0;
268 cache->misses = 0;
269
270 for (i = 0; i < cache->num_sets; i++) {
271 cache->sets[i].blocks = g_new0(CacheBlock, assoc);
272 }
273
274 blk_mask = blksize - 1;
275 cache->set_mask = ((cache->num_sets - 1) << cache->blksize_shift);
276 cache->tag_mask = ~(cache->set_mask | blk_mask);
277
278 if (metadata_init) {
279 metadata_init(cache);
280 }
281
282 return cache;
283 }
284
caches_init(int blksize,int assoc,int cachesize)285 static Cache **caches_init(int blksize, int assoc, int cachesize)
286 {
287 Cache **caches;
288 int i;
289
290 if (bad_cache_params(blksize, assoc, cachesize)) {
291 return NULL;
292 }
293
294 caches = g_new(Cache *, cores);
295
296 for (i = 0; i < cores; i++) {
297 caches[i] = cache_init(blksize, assoc, cachesize);
298 }
299
300 return caches;
301 }
302
get_invalid_block(Cache * cache,uint64_t set)303 static int get_invalid_block(Cache *cache, uint64_t set)
304 {
305 int i;
306
307 for (i = 0; i < cache->assoc; i++) {
308 if (!cache->sets[set].blocks[i].valid) {
309 return i;
310 }
311 }
312
313 return -1;
314 }
315
get_replaced_block(Cache * cache,int set)316 static int get_replaced_block(Cache *cache, int set)
317 {
318 switch (policy) {
319 case RAND:
320 return g_rand_int_range(rng, 0, cache->assoc);
321 case LRU:
322 return lru_get_lru_block(cache, set);
323 case FIFO:
324 return fifo_get_first_block(cache, set);
325 default:
326 g_assert_not_reached();
327 }
328 }
329
in_cache(Cache * cache,uint64_t addr)330 static int in_cache(Cache *cache, uint64_t addr)
331 {
332 int i;
333 uint64_t tag, set;
334
335 tag = extract_tag(cache, addr);
336 set = extract_set(cache, addr);
337
338 for (i = 0; i < cache->assoc; i++) {
339 if (cache->sets[set].blocks[i].tag == tag &&
340 cache->sets[set].blocks[i].valid) {
341 return i;
342 }
343 }
344
345 return -1;
346 }
347
348 /**
349 * access_cache(): Simulate a cache access
350 * @cache: The cache under simulation
351 * @addr: The address of the requested memory location
352 *
353 * Returns true if the requested data is hit in the cache and false when missed.
354 * The cache is updated on miss for the next access.
355 */
access_cache(Cache * cache,uint64_t addr)356 static bool access_cache(Cache *cache, uint64_t addr)
357 {
358 int hit_blk, replaced_blk;
359 uint64_t tag, set;
360
361 tag = extract_tag(cache, addr);
362 set = extract_set(cache, addr);
363
364 hit_blk = in_cache(cache, addr);
365 if (hit_blk != -1) {
366 if (update_hit) {
367 update_hit(cache, set, hit_blk);
368 }
369 return true;
370 }
371
372 replaced_blk = get_invalid_block(cache, set);
373
374 if (replaced_blk == -1) {
375 replaced_blk = get_replaced_block(cache, set);
376 }
377
378 if (update_miss) {
379 update_miss(cache, set, replaced_blk);
380 }
381
382 cache->sets[set].blocks[replaced_blk].tag = tag;
383 cache->sets[set].blocks[replaced_blk].valid = true;
384
385 return false;
386 }
387
vcpu_mem_access(unsigned int vcpu_index,qemu_plugin_meminfo_t info,uint64_t vaddr,void * userdata)388 static void vcpu_mem_access(unsigned int vcpu_index, qemu_plugin_meminfo_t info,
389 uint64_t vaddr, void *userdata)
390 {
391 uint64_t effective_addr;
392 struct qemu_plugin_hwaddr *hwaddr;
393 int cache_idx;
394 InsnData *insn;
395 bool hit_in_l1;
396
397 hwaddr = qemu_plugin_get_hwaddr(info, vaddr);
398 if (hwaddr && qemu_plugin_hwaddr_is_io(hwaddr)) {
399 return;
400 }
401
402 effective_addr = hwaddr ? qemu_plugin_hwaddr_phys_addr(hwaddr) : vaddr;
403 cache_idx = vcpu_index % cores;
404
405 g_mutex_lock(&l1_dcache_locks[cache_idx]);
406 hit_in_l1 = access_cache(l1_dcaches[cache_idx], effective_addr);
407 if (!hit_in_l1) {
408 insn = userdata;
409 __atomic_fetch_add(&insn->l1_dmisses, 1, __ATOMIC_SEQ_CST);
410 l1_dcaches[cache_idx]->misses++;
411 }
412 l1_dcaches[cache_idx]->accesses++;
413 g_mutex_unlock(&l1_dcache_locks[cache_idx]);
414
415 if (hit_in_l1 || !use_l2) {
416 /* No need to access L2 */
417 return;
418 }
419
420 g_mutex_lock(&l2_ucache_locks[cache_idx]);
421 if (!access_cache(l2_ucaches[cache_idx], effective_addr)) {
422 insn = userdata;
423 __atomic_fetch_add(&insn->l2_misses, 1, __ATOMIC_SEQ_CST);
424 l2_ucaches[cache_idx]->misses++;
425 }
426 l2_ucaches[cache_idx]->accesses++;
427 g_mutex_unlock(&l2_ucache_locks[cache_idx]);
428 }
429
vcpu_insn_exec(unsigned int vcpu_index,void * userdata)430 static void vcpu_insn_exec(unsigned int vcpu_index, void *userdata)
431 {
432 uint64_t insn_addr;
433 InsnData *insn;
434 int cache_idx;
435 bool hit_in_l1;
436
437 insn_addr = ((InsnData *) userdata)->addr;
438
439 cache_idx = vcpu_index % cores;
440 g_mutex_lock(&l1_icache_locks[cache_idx]);
441 hit_in_l1 = access_cache(l1_icaches[cache_idx], insn_addr);
442 if (!hit_in_l1) {
443 insn = userdata;
444 __atomic_fetch_add(&insn->l1_imisses, 1, __ATOMIC_SEQ_CST);
445 l1_icaches[cache_idx]->misses++;
446 }
447 l1_icaches[cache_idx]->accesses++;
448 g_mutex_unlock(&l1_icache_locks[cache_idx]);
449
450 if (hit_in_l1 || !use_l2) {
451 /* No need to access L2 */
452 return;
453 }
454
455 g_mutex_lock(&l2_ucache_locks[cache_idx]);
456 if (!access_cache(l2_ucaches[cache_idx], insn_addr)) {
457 insn = userdata;
458 __atomic_fetch_add(&insn->l2_misses, 1, __ATOMIC_SEQ_CST);
459 l2_ucaches[cache_idx]->misses++;
460 }
461 l2_ucaches[cache_idx]->accesses++;
462 g_mutex_unlock(&l2_ucache_locks[cache_idx]);
463 }
464
vcpu_tb_trans(qemu_plugin_id_t id,struct qemu_plugin_tb * tb)465 static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
466 {
467 size_t n_insns;
468 size_t i;
469 InsnData *data;
470
471 n_insns = qemu_plugin_tb_n_insns(tb);
472 for (i = 0; i < n_insns; i++) {
473 struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i);
474 uint64_t effective_addr = sys ? (uintptr_t) qemu_plugin_insn_haddr(insn) :
475 qemu_plugin_insn_vaddr(insn);
476
477 /*
478 * Instructions might get translated multiple times, we do not create
479 * new entries for those instructions. Instead, we fetch the same
480 * entry from the hash table and register it for the callback again.
481 */
482 g_mutex_lock(&hashtable_lock);
483 data = g_hash_table_lookup(miss_ht, &effective_addr);
484 if (data == NULL) {
485 data = g_new0(InsnData, 1);
486 data->disas_str = qemu_plugin_insn_disas(insn);
487 data->symbol = qemu_plugin_insn_symbol(insn);
488 data->addr = effective_addr;
489 g_hash_table_insert(miss_ht, &data->addr, data);
490 }
491 g_mutex_unlock(&hashtable_lock);
492
493 qemu_plugin_register_vcpu_mem_cb(insn, vcpu_mem_access,
494 QEMU_PLUGIN_CB_NO_REGS,
495 rw, data);
496
497 qemu_plugin_register_vcpu_insn_exec_cb(insn, vcpu_insn_exec,
498 QEMU_PLUGIN_CB_NO_REGS, data);
499 }
500 }
501
insn_free(gpointer data)502 static void insn_free(gpointer data)
503 {
504 InsnData *insn = (InsnData *) data;
505 g_free(insn->disas_str);
506 g_free(insn);
507 }
508
cache_free(Cache * cache)509 static void cache_free(Cache *cache)
510 {
511 for (int i = 0; i < cache->num_sets; i++) {
512 g_free(cache->sets[i].blocks);
513 }
514
515 if (metadata_destroy) {
516 metadata_destroy(cache);
517 }
518
519 g_free(cache->sets);
520 g_free(cache);
521 }
522
caches_free(Cache ** caches)523 static void caches_free(Cache **caches)
524 {
525 int i;
526
527 for (i = 0; i < cores; i++) {
528 cache_free(caches[i]);
529 }
530 }
531
append_stats_line(GString * line,uint64_t l1_daccess,uint64_t l1_dmisses,uint64_t l1_iaccess,uint64_t l1_imisses,uint64_t l2_access,uint64_t l2_misses)532 static void append_stats_line(GString *line,
533 uint64_t l1_daccess, uint64_t l1_dmisses,
534 uint64_t l1_iaccess, uint64_t l1_imisses,
535 uint64_t l2_access, uint64_t l2_misses)
536 {
537 double l1_dmiss_rate = ((double) l1_dmisses) / (l1_daccess) * 100.0;
538 double l1_imiss_rate = ((double) l1_imisses) / (l1_iaccess) * 100.0;
539
540 g_string_append_printf(line, "%-14" PRIu64 " %-12" PRIu64 " %9.4lf%%"
541 " %-14" PRIu64 " %-12" PRIu64 " %9.4lf%%",
542 l1_daccess,
543 l1_dmisses,
544 l1_daccess ? l1_dmiss_rate : 0.0,
545 l1_iaccess,
546 l1_imisses,
547 l1_iaccess ? l1_imiss_rate : 0.0);
548
549 if (l2_access && l2_misses) {
550 double l2_miss_rate = ((double) l2_misses) / (l2_access) * 100.0;
551 g_string_append_printf(line,
552 " %-12" PRIu64 " %-11" PRIu64 " %10.4lf%%",
553 l2_access,
554 l2_misses,
555 l2_miss_rate);
556 }
557
558 g_string_append(line, "\n");
559 }
560
sum_stats(void)561 static void sum_stats(void)
562 {
563 int i;
564
565 g_assert(cores > 1);
566 for (i = 0; i < cores; i++) {
567 l1_imisses += l1_icaches[i]->misses;
568 l1_dmisses += l1_dcaches[i]->misses;
569 l1_imem_accesses += l1_icaches[i]->accesses;
570 l1_dmem_accesses += l1_dcaches[i]->accesses;
571
572 if (use_l2) {
573 l2_misses += l2_ucaches[i]->misses;
574 l2_mem_accesses += l2_ucaches[i]->accesses;
575 }
576 }
577 }
578
dcmp(gconstpointer a,gconstpointer b,gpointer d)579 static int dcmp(gconstpointer a, gconstpointer b, gpointer d)
580 {
581 InsnData *insn_a = (InsnData *) a;
582 InsnData *insn_b = (InsnData *) b;
583
584 return insn_a->l1_dmisses < insn_b->l1_dmisses ? 1 : -1;
585 }
586
icmp(gconstpointer a,gconstpointer b,gpointer d)587 static int icmp(gconstpointer a, gconstpointer b, gpointer d)
588 {
589 InsnData *insn_a = (InsnData *) a;
590 InsnData *insn_b = (InsnData *) b;
591
592 return insn_a->l1_imisses < insn_b->l1_imisses ? 1 : -1;
593 }
594
l2_cmp(gconstpointer a,gconstpointer b,gpointer d)595 static int l2_cmp(gconstpointer a, gconstpointer b, gpointer d)
596 {
597 InsnData *insn_a = (InsnData *) a;
598 InsnData *insn_b = (InsnData *) b;
599
600 return insn_a->l2_misses < insn_b->l2_misses ? 1 : -1;
601 }
602
log_stats(void)603 static void log_stats(void)
604 {
605 int i;
606 Cache *icache, *dcache, *l2_cache = NULL;
607
608 g_autoptr(GString) rep = g_string_new("core #, data accesses, data misses,"
609 " dmiss rate, insn accesses,"
610 " insn misses, imiss rate");
611
612 if (use_l2) {
613 g_string_append(rep, ", l2 accesses, l2 misses, l2 miss rate");
614 }
615
616 g_string_append(rep, "\n");
617
618 for (i = 0; i < cores; i++) {
619 g_string_append_printf(rep, "%-8d", i);
620 dcache = l1_dcaches[i];
621 icache = l1_icaches[i];
622 l2_cache = use_l2 ? l2_ucaches[i] : NULL;
623 append_stats_line(rep, dcache->accesses, dcache->misses,
624 icache->accesses, icache->misses,
625 l2_cache ? l2_cache->accesses : 0,
626 l2_cache ? l2_cache->misses : 0);
627 }
628
629 if (cores > 1) {
630 sum_stats();
631 g_string_append_printf(rep, "%-8s", "sum");
632 append_stats_line(rep, l1_dmem_accesses, l1_dmisses,
633 l1_imem_accesses, l1_imisses,
634 l2_cache ? l2_mem_accesses : 0, l2_cache ? l2_misses : 0);
635 }
636
637 g_string_append(rep, "\n");
638 qemu_plugin_outs(rep->str);
639 }
640
log_top_insns(void)641 static void log_top_insns(void)
642 {
643 int i;
644 GList *curr, *miss_insns;
645 InsnData *insn;
646
647 miss_insns = g_hash_table_get_values(miss_ht);
648 miss_insns = g_list_sort_with_data(miss_insns, dcmp, NULL);
649 g_autoptr(GString) rep = g_string_new("");
650 g_string_append_printf(rep, "%s", "address, data misses, instruction\n");
651
652 for (curr = miss_insns, i = 0; curr && i < limit; i++, curr = curr->next) {
653 insn = (InsnData *) curr->data;
654 g_string_append_printf(rep, "0x%" PRIx64, insn->addr);
655 if (insn->symbol) {
656 g_string_append_printf(rep, " (%s)", insn->symbol);
657 }
658 g_string_append_printf(rep, ", %" PRId64 ", %s\n",
659 insn->l1_dmisses, insn->disas_str);
660 }
661
662 miss_insns = g_list_sort_with_data(miss_insns, icmp, NULL);
663 g_string_append_printf(rep, "%s", "\naddress, fetch misses, instruction\n");
664
665 for (curr = miss_insns, i = 0; curr && i < limit; i++, curr = curr->next) {
666 insn = (InsnData *) curr->data;
667 g_string_append_printf(rep, "0x%" PRIx64, insn->addr);
668 if (insn->symbol) {
669 g_string_append_printf(rep, " (%s)", insn->symbol);
670 }
671 g_string_append_printf(rep, ", %" PRId64 ", %s\n",
672 insn->l1_imisses, insn->disas_str);
673 }
674
675 if (!use_l2) {
676 goto finish;
677 }
678
679 miss_insns = g_list_sort_with_data(miss_insns, l2_cmp, NULL);
680 g_string_append_printf(rep, "%s", "\naddress, L2 misses, instruction\n");
681
682 for (curr = miss_insns, i = 0; curr && i < limit; i++, curr = curr->next) {
683 insn = (InsnData *) curr->data;
684 g_string_append_printf(rep, "0x%" PRIx64, insn->addr);
685 if (insn->symbol) {
686 g_string_append_printf(rep, " (%s)", insn->symbol);
687 }
688 g_string_append_printf(rep, ", %" PRId64 ", %s\n",
689 insn->l2_misses, insn->disas_str);
690 }
691
692 finish:
693 qemu_plugin_outs(rep->str);
694 g_list_free(miss_insns);
695 }
696
plugin_exit(qemu_plugin_id_t id,void * p)697 static void plugin_exit(qemu_plugin_id_t id, void *p)
698 {
699 log_stats();
700 log_top_insns();
701
702 caches_free(l1_dcaches);
703 caches_free(l1_icaches);
704
705 g_free(l1_dcache_locks);
706 g_free(l1_icache_locks);
707
708 if (use_l2) {
709 caches_free(l2_ucaches);
710 g_free(l2_ucache_locks);
711 }
712
713 g_hash_table_destroy(miss_ht);
714 }
715
policy_init(void)716 static void policy_init(void)
717 {
718 switch (policy) {
719 case LRU:
720 update_hit = lru_update_blk;
721 update_miss = lru_update_blk;
722 metadata_init = lru_priorities_init;
723 metadata_destroy = lru_priorities_destroy;
724 break;
725 case FIFO:
726 update_miss = fifo_update_on_miss;
727 metadata_init = fifo_init;
728 metadata_destroy = fifo_destroy;
729 break;
730 case RAND:
731 rng = g_rand_new();
732 break;
733 default:
734 g_assert_not_reached();
735 }
736 }
737
738 QEMU_PLUGIN_EXPORT
qemu_plugin_install(qemu_plugin_id_t id,const qemu_info_t * info,int argc,char ** argv)739 int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
740 int argc, char **argv)
741 {
742 int i;
743 int l1_iassoc, l1_iblksize, l1_icachesize;
744 int l1_dassoc, l1_dblksize, l1_dcachesize;
745 int l2_assoc, l2_blksize, l2_cachesize;
746
747 limit = 32;
748 sys = info->system_emulation;
749
750 l1_dassoc = 8;
751 l1_dblksize = 64;
752 l1_dcachesize = l1_dblksize * l1_dassoc * 32;
753
754 l1_iassoc = 8;
755 l1_iblksize = 64;
756 l1_icachesize = l1_iblksize * l1_iassoc * 32;
757
758 l2_assoc = 16;
759 l2_blksize = 64;
760 l2_cachesize = l2_assoc * l2_blksize * 2048;
761
762 policy = LRU;
763
764 cores = sys ? info->system.smp_vcpus : 1;
765
766 for (i = 0; i < argc; i++) {
767 char *opt = argv[i];
768 g_auto(GStrv) tokens = g_strsplit(opt, "=", 2);
769
770 if (g_strcmp0(tokens[0], "iblksize") == 0) {
771 l1_iblksize = STRTOLL(tokens[1]);
772 } else if (g_strcmp0(tokens[0], "iassoc") == 0) {
773 l1_iassoc = STRTOLL(tokens[1]);
774 } else if (g_strcmp0(tokens[0], "icachesize") == 0) {
775 l1_icachesize = STRTOLL(tokens[1]);
776 } else if (g_strcmp0(tokens[0], "dblksize") == 0) {
777 l1_dblksize = STRTOLL(tokens[1]);
778 } else if (g_strcmp0(tokens[0], "dassoc") == 0) {
779 l1_dassoc = STRTOLL(tokens[1]);
780 } else if (g_strcmp0(tokens[0], "dcachesize") == 0) {
781 l1_dcachesize = STRTOLL(tokens[1]);
782 } else if (g_strcmp0(tokens[0], "limit") == 0) {
783 limit = STRTOLL(tokens[1]);
784 } else if (g_strcmp0(tokens[0], "cores") == 0) {
785 cores = STRTOLL(tokens[1]);
786 } else if (g_strcmp0(tokens[0], "l2cachesize") == 0) {
787 use_l2 = true;
788 l2_cachesize = STRTOLL(tokens[1]);
789 } else if (g_strcmp0(tokens[0], "l2blksize") == 0) {
790 use_l2 = true;
791 l2_blksize = STRTOLL(tokens[1]);
792 } else if (g_strcmp0(tokens[0], "l2assoc") == 0) {
793 use_l2 = true;
794 l2_assoc = STRTOLL(tokens[1]);
795 } else if (g_strcmp0(tokens[0], "l2") == 0) {
796 if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &use_l2)) {
797 fprintf(stderr, "boolean argument parsing failed: %s\n", opt);
798 return -1;
799 }
800 } else if (g_strcmp0(tokens[0], "evict") == 0) {
801 if (g_strcmp0(tokens[1], "rand") == 0) {
802 policy = RAND;
803 } else if (g_strcmp0(tokens[1], "lru") == 0) {
804 policy = LRU;
805 } else if (g_strcmp0(tokens[1], "fifo") == 0) {
806 policy = FIFO;
807 } else {
808 fprintf(stderr, "invalid eviction policy: %s\n", opt);
809 return -1;
810 }
811 } else {
812 fprintf(stderr, "option parsing failed: %s\n", opt);
813 return -1;
814 }
815 }
816
817 policy_init();
818
819 l1_dcaches = caches_init(l1_dblksize, l1_dassoc, l1_dcachesize);
820 if (!l1_dcaches) {
821 const char *err = cache_config_error(l1_dblksize, l1_dassoc, l1_dcachesize);
822 fprintf(stderr, "dcache cannot be constructed from given parameters\n");
823 fprintf(stderr, "%s\n", err);
824 return -1;
825 }
826
827 l1_icaches = caches_init(l1_iblksize, l1_iassoc, l1_icachesize);
828 if (!l1_icaches) {
829 const char *err = cache_config_error(l1_iblksize, l1_iassoc, l1_icachesize);
830 fprintf(stderr, "icache cannot be constructed from given parameters\n");
831 fprintf(stderr, "%s\n", err);
832 return -1;
833 }
834
835 l2_ucaches = use_l2 ? caches_init(l2_blksize, l2_assoc, l2_cachesize) : NULL;
836 if (!l2_ucaches && use_l2) {
837 const char *err = cache_config_error(l2_blksize, l2_assoc, l2_cachesize);
838 fprintf(stderr, "L2 cache cannot be constructed from given parameters\n");
839 fprintf(stderr, "%s\n", err);
840 return -1;
841 }
842
843 l1_dcache_locks = g_new0(GMutex, cores);
844 l1_icache_locks = g_new0(GMutex, cores);
845 l2_ucache_locks = use_l2 ? g_new0(GMutex, cores) : NULL;
846
847 qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
848 qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
849
850 miss_ht = g_hash_table_new_full(g_int64_hash, g_int64_equal, NULL, insn_free);
851
852 return 0;
853 }
854