1 /* 2 * Test the ARM Performance Monitors Unit (PMU). 3 * 4 * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved. 5 * Copyright (C) 2016, Red Hat Inc, Wei Huang <wei@redhat.com> 6 * 7 * This program is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU Lesser General Public License version 2.1 and 9 * only version 2.1 as published by the Free Software Foundation. 10 * 11 * This program is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License 14 * for more details. 15 */ 16 #include "libcflat.h" 17 #include "errata.h" 18 #include "asm/barrier.h" 19 #include "asm/sysreg.h" 20 #include "asm/processor.h" 21 22 #define PMU_PMCR_E (1 << 0) 23 #define PMU_PMCR_C (1 << 2) 24 #define PMU_PMCR_LC (1 << 6) 25 #define PMU_PMCR_N_SHIFT 11 26 #define PMU_PMCR_N_MASK 0x1f 27 #define PMU_PMCR_ID_SHIFT 16 28 #define PMU_PMCR_ID_MASK 0xff 29 #define PMU_PMCR_IMP_SHIFT 24 30 #define PMU_PMCR_IMP_MASK 0xff 31 32 #define PMU_CYCLE_IDX 31 33 34 #define NR_SAMPLES 10 35 36 static unsigned int pmu_version; 37 #if defined(__arm__) 38 #define ID_DFR0_PERFMON_SHIFT 24 39 #define ID_DFR0_PERFMON_MASK 0xf 40 41 #define PMCR __ACCESS_CP15(c9, 0, c12, 0) 42 #define ID_DFR0 __ACCESS_CP15(c0, 0, c1, 2) 43 #define PMSELR __ACCESS_CP15(c9, 0, c12, 5) 44 #define PMXEVTYPER __ACCESS_CP15(c9, 0, c13, 1) 45 #define PMCNTENSET __ACCESS_CP15(c9, 0, c12, 1) 46 #define PMCCNTR32 __ACCESS_CP15(c9, 0, c13, 0) 47 #define PMCCNTR64 __ACCESS_CP15_64(0, c9) 48 49 static inline uint32_t get_id_dfr0(void) { return read_sysreg(ID_DFR0); } 50 static inline uint32_t get_pmcr(void) { return read_sysreg(PMCR); } 51 static inline void set_pmcr(uint32_t v) { write_sysreg(v, PMCR); } 52 static inline void set_pmcntenset(uint32_t v) { write_sysreg(v, PMCNTENSET); } 53 54 static inline uint8_t get_pmu_version(void) 55 { 56 return (get_id_dfr0() >> ID_DFR0_PERFMON_SHIFT) & ID_DFR0_PERFMON_MASK; 57 } 58 59 static inline uint64_t get_pmccntr(void) 60 { 61 return read_sysreg(PMCCNTR32); 62 } 63 64 static inline void set_pmccntr(uint64_t value) 65 { 66 write_sysreg(value & 0xffffffff, PMCCNTR32); 67 } 68 69 /* PMCCFILTR is an obsolete name for PMXEVTYPER31 in ARMv7 */ 70 static inline void set_pmccfiltr(uint32_t value) 71 { 72 write_sysreg(PMU_CYCLE_IDX, PMSELR); 73 write_sysreg(value, PMXEVTYPER); 74 isb(); 75 } 76 77 /* 78 * Extra instructions inserted by the compiler would be difficult to compensate 79 * for, so hand assemble everything between, and including, the PMCR accesses 80 * to start and stop counting. isb instructions were inserted to make sure 81 * pmccntr read after this function returns the exact instructions executed in 82 * the controlled block. Total instrs = isb + mcr + 2*loop = 2 + 2*loop. 83 */ 84 static inline void precise_instrs_loop(int loop, uint32_t pmcr) 85 { 86 asm volatile( 87 " mcr p15, 0, %[pmcr], c9, c12, 0\n" 88 " isb\n" 89 "1: subs %[loop], %[loop], #1\n" 90 " bgt 1b\n" 91 " mcr p15, 0, %[z], c9, c12, 0\n" 92 " isb\n" 93 : [loop] "+r" (loop) 94 : [pmcr] "r" (pmcr), [z] "r" (0) 95 : "cc"); 96 } 97 #elif defined(__aarch64__) 98 #define ID_AA64DFR0_PERFMON_SHIFT 8 99 #define ID_AA64DFR0_PERFMON_MASK 0xf 100 101 static inline uint32_t get_id_aa64dfr0(void) { return read_sysreg(id_aa64dfr0_el1); } 102 static inline uint32_t get_pmcr(void) { return read_sysreg(pmcr_el0); } 103 static inline void set_pmcr(uint32_t v) { write_sysreg(v, pmcr_el0); } 104 static inline uint64_t get_pmccntr(void) { return read_sysreg(pmccntr_el0); } 105 static inline void set_pmccntr(uint64_t v) { write_sysreg(v, pmccntr_el0); } 106 static inline void set_pmcntenset(uint32_t v) { write_sysreg(v, pmcntenset_el0); } 107 static inline void set_pmccfiltr(uint32_t v) { write_sysreg(v, pmccfiltr_el0); } 108 109 static inline uint8_t get_pmu_version(void) 110 { 111 uint8_t ver = (get_id_aa64dfr0() >> ID_AA64DFR0_PERFMON_SHIFT) & ID_AA64DFR0_PERFMON_MASK; 112 return ver == 1 ? 3 : ver; 113 } 114 115 /* 116 * Extra instructions inserted by the compiler would be difficult to compensate 117 * for, so hand assemble everything between, and including, the PMCR accesses 118 * to start and stop counting. isb instructions are inserted to make sure 119 * pmccntr read after this function returns the exact instructions executed 120 * in the controlled block. Total instrs = isb + msr + 2*loop = 2 + 2*loop. 121 */ 122 static inline void precise_instrs_loop(int loop, uint32_t pmcr) 123 { 124 asm volatile( 125 " msr pmcr_el0, %[pmcr]\n" 126 " isb\n" 127 "1: subs %[loop], %[loop], #1\n" 128 " b.gt 1b\n" 129 " msr pmcr_el0, xzr\n" 130 " isb\n" 131 : [loop] "+r" (loop) 132 : [pmcr] "r" (pmcr) 133 : "cc"); 134 } 135 #endif 136 137 /* 138 * As a simple sanity check on the PMCR_EL0, ensure the implementer field isn't 139 * null. Also print out a couple other interesting fields for diagnostic 140 * purposes. For example, as of fall 2016, QEMU TCG mode doesn't implement 141 * event counters and therefore reports zero event counters, but hopefully 142 * support for at least the instructions event will be added in the future and 143 * the reported number of event counters will become nonzero. 144 */ 145 static bool check_pmcr(void) 146 { 147 uint32_t pmcr; 148 149 pmcr = get_pmcr(); 150 151 report_info("PMU implementer/ID code/counters: %#x(\"%c\")/%#x/%d", 152 (pmcr >> PMU_PMCR_IMP_SHIFT) & PMU_PMCR_IMP_MASK, 153 ((pmcr >> PMU_PMCR_IMP_SHIFT) & PMU_PMCR_IMP_MASK) ? : ' ', 154 (pmcr >> PMU_PMCR_ID_SHIFT) & PMU_PMCR_ID_MASK, 155 (pmcr >> PMU_PMCR_N_SHIFT) & PMU_PMCR_N_MASK); 156 157 return ((pmcr >> PMU_PMCR_IMP_SHIFT) & PMU_PMCR_IMP_MASK) != 0; 158 } 159 160 /* 161 * Ensure that the cycle counter progresses between back-to-back reads. 162 */ 163 static bool check_cycles_increase(void) 164 { 165 bool success = true; 166 167 /* init before event access, this test only cares about cycle count */ 168 set_pmcntenset(1 << PMU_CYCLE_IDX); 169 set_pmccfiltr(0); /* count cycles in EL0, EL1, but not EL2 */ 170 171 set_pmcr(get_pmcr() | PMU_PMCR_LC | PMU_PMCR_C | PMU_PMCR_E); 172 173 for (int i = 0; i < NR_SAMPLES; i++) { 174 uint64_t a, b; 175 176 a = get_pmccntr(); 177 b = get_pmccntr(); 178 179 if (a >= b) { 180 printf("Read %"PRId64" then %"PRId64".\n", a, b); 181 success = false; 182 break; 183 } 184 } 185 186 set_pmcr(get_pmcr() & ~PMU_PMCR_E); 187 188 return success; 189 } 190 191 /* 192 * Execute a known number of guest instructions. Only even instruction counts 193 * greater than or equal to 4 are supported by the in-line assembly code. The 194 * control register (PMCR_EL0) is initialized with the provided value (allowing 195 * for example for the cycle counter or event counters to be reset). At the end 196 * of the exact instruction loop, zero is written to PMCR_EL0 to disable 197 * counting, allowing the cycle counter or event counters to be read at the 198 * leisure of the calling code. 199 */ 200 static void measure_instrs(int num, uint32_t pmcr) 201 { 202 int loop = (num - 2) / 2; 203 204 assert(num >= 4 && ((num - 2) % 2 == 0)); 205 precise_instrs_loop(loop, pmcr); 206 } 207 208 /* 209 * Measure cycle counts for various known instruction counts. Ensure that the 210 * cycle counter progresses (similar to check_cycles_increase() but with more 211 * instructions and using reset and stop controls). If supplied a positive, 212 * nonzero CPI parameter, it also strictly checks that every measurement matches 213 * it. Strict CPI checking is used to test -icount mode. 214 */ 215 static bool check_cpi(int cpi) 216 { 217 uint32_t pmcr = get_pmcr() | PMU_PMCR_LC | PMU_PMCR_C | PMU_PMCR_E; 218 219 /* init before event access, this test only cares about cycle count */ 220 set_pmcntenset(1 << PMU_CYCLE_IDX); 221 set_pmccfiltr(0); /* count cycles in EL0, EL1, but not EL2 */ 222 223 if (cpi > 0) 224 printf("Checking for CPI=%d.\n", cpi); 225 printf("instrs : cycles0 cycles1 ...\n"); 226 227 for (unsigned int i = 4; i < 300; i += 32) { 228 uint64_t avg, sum = 0; 229 230 printf("%4d:", i); 231 for (int j = 0; j < NR_SAMPLES; j++) { 232 uint64_t cycles; 233 234 set_pmccntr(0); 235 measure_instrs(i, pmcr); 236 cycles = get_pmccntr(); 237 printf(" %4"PRId64"", cycles); 238 239 if (!cycles) { 240 printf("\ncycles not incrementing!\n"); 241 return false; 242 } else if (cpi > 0 && cycles != i * cpi) { 243 printf("\nunexpected cycle count received!\n"); 244 return false; 245 } else if ((cycles >> 32) != 0) { 246 /* The cycles taken by the loop above should 247 * fit in 32 bits easily. We check the upper 248 * 32 bits of the cycle counter to make sure 249 * there is no supprise. */ 250 printf("\ncycle count bigger than 32bit!\n"); 251 return false; 252 } 253 254 sum += cycles; 255 } 256 avg = sum / NR_SAMPLES; 257 printf(" avg=%-4"PRId64" %s=%-3"PRId64"\n", avg, 258 (avg >= i) ? "cpi" : "ipc", 259 (avg >= i) ? avg / i : i / avg); 260 } 261 262 return true; 263 } 264 265 static void pmccntr64_test(void) 266 { 267 #ifdef __arm__ 268 if (pmu_version == 0x3) { 269 if (ERRATA(9e3f7a296940)) { 270 write_sysreg(0xdead, PMCCNTR64); 271 report(read_sysreg(PMCCNTR64) == 0xdead, "pmccntr64"); 272 } else 273 report_skip("Skipping unsafe pmccntr64 test. Set ERRATA_9e3f7a296940=y to enable."); 274 } 275 #endif 276 } 277 278 /* Return FALSE if no PMU found, otherwise return TRUE */ 279 static bool pmu_probe(void) 280 { 281 pmu_version = get_pmu_version(); 282 report_info("PMU version: %d", pmu_version); 283 return pmu_version != 0 && pmu_version != 0xf; 284 } 285 286 int main(int argc, char *argv[]) 287 { 288 int cpi = 0; 289 290 if (argc > 1) 291 cpi = atol(argv[1]); 292 293 if (!pmu_probe()) { 294 printf("No PMU found, test skipped...\n"); 295 return report_summary(); 296 } 297 298 report_prefix_push("pmu"); 299 300 report(check_pmcr(), "Control register"); 301 report(check_cycles_increase(), 302 "Monotonically increasing cycle count"); 303 report(check_cpi(cpi), "Cycle/instruction ratio"); 304 305 pmccntr64_test(); 306 307 return report_summary(); 308 } 309