1 2 #include "x86/msr.h" 3 #include "x86/processor.h" 4 #include "x86/pmu.h" 5 #include "x86/apic-defs.h" 6 #include "x86/apic.h" 7 #include "x86/desc.h" 8 #include "x86/isr.h" 9 #include "alloc.h" 10 11 #include "libcflat.h" 12 #include <stdint.h> 13 14 #define N 1000000 15 16 // These values match the number of instructions and branches in the 17 // assembly block in check_emulated_instr(). 18 #define EXPECTED_INSTR 17 19 #define EXPECTED_BRNCH 5 20 21 typedef struct { 22 uint32_t ctr; 23 uint32_t config; 24 uint64_t count; 25 int idx; 26 } pmu_counter_t; 27 28 struct pmu_event { 29 const char *name; 30 uint32_t unit_sel; 31 int min; 32 int max; 33 } intel_gp_events[] = { 34 {"core cycles", 0x003c, 1*N, 50*N}, 35 {"instructions", 0x00c0, 10*N, 10.2*N}, 36 {"ref cycles", 0x013c, 1*N, 30*N}, 37 {"llc references", 0x4f2e, 1, 2*N}, 38 {"llc misses", 0x412e, 1, 1*N}, 39 {"branches", 0x00c4, 1*N, 1.1*N}, 40 {"branch misses", 0x00c5, 0, 0.1*N}, 41 }, fixed_events[] = { 42 {"fixed 1", MSR_CORE_PERF_FIXED_CTR0, 10*N, 10.2*N}, 43 {"fixed 2", MSR_CORE_PERF_FIXED_CTR0 + 1, 1*N, 30*N}, 44 {"fixed 3", MSR_CORE_PERF_FIXED_CTR0 + 2, 0.1*N, 30*N} 45 }; 46 47 char *buf; 48 49 static struct pmu_event *gp_events; 50 static unsigned int gp_events_size; 51 52 static inline void loop(void) 53 { 54 unsigned long tmp, tmp2, tmp3; 55 56 asm volatile("1: mov (%1), %2; add $64, %1; nop; nop; nop; nop; nop; nop; nop; loop 1b" 57 : "=c"(tmp), "=r"(tmp2), "=r"(tmp3): "0"(N), "1"(buf)); 58 59 } 60 61 volatile uint64_t irq_received; 62 63 static void cnt_overflow(isr_regs_t *regs) 64 { 65 irq_received++; 66 apic_write(APIC_EOI, 0); 67 } 68 69 static bool check_irq(void) 70 { 71 int i; 72 irq_received = 0; 73 irq_enable(); 74 for (i = 0; i < 100000 && !irq_received; i++) 75 asm volatile("pause"); 76 irq_disable(); 77 return irq_received; 78 } 79 80 static bool is_gp(pmu_counter_t *evt) 81 { 82 return evt->ctr < MSR_CORE_PERF_FIXED_CTR0 || 83 evt->ctr >= MSR_IA32_PMC0; 84 } 85 86 static int event_to_global_idx(pmu_counter_t *cnt) 87 { 88 return cnt->ctr - (is_gp(cnt) ? pmu.msr_gp_counter_base : 89 (MSR_CORE_PERF_FIXED_CTR0 - FIXED_CNT_INDEX)); 90 } 91 92 static struct pmu_event* get_counter_event(pmu_counter_t *cnt) 93 { 94 if (is_gp(cnt)) { 95 int i; 96 97 for (i = 0; i < gp_events_size; i++) 98 if (gp_events[i].unit_sel == (cnt->config & 0xffff)) 99 return &gp_events[i]; 100 } else 101 return &fixed_events[cnt->ctr - MSR_CORE_PERF_FIXED_CTR0]; 102 103 return (void*)0; 104 } 105 106 static void global_enable(pmu_counter_t *cnt) 107 { 108 if (!this_cpu_has_perf_global_ctrl()) 109 return; 110 111 cnt->idx = event_to_global_idx(cnt); 112 wrmsr(pmu.msr_global_ctl, rdmsr(pmu.msr_global_ctl) | BIT_ULL(cnt->idx)); 113 } 114 115 static void global_disable(pmu_counter_t *cnt) 116 { 117 if (!this_cpu_has_perf_global_ctrl()) 118 return; 119 120 wrmsr(pmu.msr_global_ctl, rdmsr(pmu.msr_global_ctl) & ~BIT_ULL(cnt->idx)); 121 } 122 123 static void __start_event(pmu_counter_t *evt, uint64_t count) 124 { 125 evt->count = count; 126 wrmsr(evt->ctr, evt->count); 127 if (is_gp(evt)) { 128 wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)), 129 evt->config | EVNTSEL_EN); 130 } else { 131 uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL); 132 int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4; 133 uint32_t usrospmi = 0; 134 135 if (evt->config & EVNTSEL_OS) 136 usrospmi |= (1 << 0); 137 if (evt->config & EVNTSEL_USR) 138 usrospmi |= (1 << 1); 139 if (evt->config & EVNTSEL_INT) 140 usrospmi |= (1 << 3); // PMI on overflow 141 ctrl = (ctrl & ~(0xf << shift)) | (usrospmi << shift); 142 wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl); 143 } 144 global_enable(evt); 145 apic_write(APIC_LVTPC, PMI_VECTOR); 146 } 147 148 static void start_event(pmu_counter_t *evt) 149 { 150 __start_event(evt, 0); 151 } 152 153 static void stop_event(pmu_counter_t *evt) 154 { 155 global_disable(evt); 156 if (is_gp(evt)) { 157 wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)), 158 evt->config & ~EVNTSEL_EN); 159 } else { 160 uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL); 161 int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4; 162 wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl & ~(0xf << shift)); 163 } 164 evt->count = rdmsr(evt->ctr); 165 } 166 167 static noinline void measure_many(pmu_counter_t *evt, int count) 168 { 169 int i; 170 for (i = 0; i < count; i++) 171 start_event(&evt[i]); 172 loop(); 173 for (i = 0; i < count; i++) 174 stop_event(&evt[i]); 175 } 176 177 static void measure_one(pmu_counter_t *evt) 178 { 179 measure_many(evt, 1); 180 } 181 182 static noinline void __measure(pmu_counter_t *evt, uint64_t count) 183 { 184 __start_event(evt, count); 185 loop(); 186 stop_event(evt); 187 } 188 189 static bool verify_event(uint64_t count, struct pmu_event *e) 190 { 191 // printf("%d <= %ld <= %d\n", e->min, count, e->max); 192 return count >= e->min && count <= e->max; 193 194 } 195 196 static bool verify_counter(pmu_counter_t *cnt) 197 { 198 return verify_event(cnt->count, get_counter_event(cnt)); 199 } 200 201 static void check_gp_counter(struct pmu_event *evt) 202 { 203 pmu_counter_t cnt = { 204 .config = EVNTSEL_OS | EVNTSEL_USR | evt->unit_sel, 205 }; 206 int i; 207 208 for (i = 0; i < pmu.nr_gp_counters; i++) { 209 cnt.ctr = MSR_GP_COUNTERx(i); 210 measure_one(&cnt); 211 report(verify_event(cnt.count, evt), "%s-%d", evt->name, i); 212 } 213 } 214 215 static void check_gp_counters(void) 216 { 217 int i; 218 219 for (i = 0; i < gp_events_size; i++) 220 if (pmu_gp_counter_is_available(i)) 221 check_gp_counter(&gp_events[i]); 222 else 223 printf("GP event '%s' is disabled\n", 224 gp_events[i].name); 225 } 226 227 static void check_fixed_counters(void) 228 { 229 pmu_counter_t cnt = { 230 .config = EVNTSEL_OS | EVNTSEL_USR, 231 }; 232 int i; 233 234 for (i = 0; i < pmu.nr_fixed_counters; i++) { 235 cnt.ctr = fixed_events[i].unit_sel; 236 measure_one(&cnt); 237 report(verify_event(cnt.count, &fixed_events[i]), "fixed-%d", i); 238 } 239 } 240 241 static void check_counters_many(void) 242 { 243 pmu_counter_t cnt[10]; 244 int i, n; 245 246 for (i = 0, n = 0; n < pmu.nr_gp_counters; i++) { 247 if (!pmu_gp_counter_is_available(i)) 248 continue; 249 250 cnt[n].ctr = MSR_GP_COUNTERx(n); 251 cnt[n].config = EVNTSEL_OS | EVNTSEL_USR | 252 gp_events[i % gp_events_size].unit_sel; 253 n++; 254 } 255 for (i = 0; i < pmu.nr_fixed_counters; i++) { 256 cnt[n].ctr = fixed_events[i].unit_sel; 257 cnt[n].config = EVNTSEL_OS | EVNTSEL_USR; 258 n++; 259 } 260 261 measure_many(cnt, n); 262 263 for (i = 0; i < n; i++) 264 if (!verify_counter(&cnt[i])) 265 break; 266 267 report(i == n, "all counters"); 268 } 269 270 static uint64_t measure_for_overflow(pmu_counter_t *cnt) 271 { 272 __measure(cnt, 0); 273 /* 274 * To generate overflow, i.e. roll over to '0', the initial count just 275 * needs to be preset to the negative expected count. However, as per 276 * Intel's SDM, the preset count needs to be incremented by 1 to ensure 277 * the overflow interrupt is generated immediately instead of possibly 278 * waiting for the overflow to propagate through the counter. 279 */ 280 assert(cnt->count > 1); 281 return 1 - cnt->count; 282 } 283 284 static void check_counter_overflow(void) 285 { 286 uint64_t overflow_preset; 287 int i; 288 pmu_counter_t cnt = { 289 .ctr = MSR_GP_COUNTERx(0), 290 .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel /* instructions */, 291 }; 292 overflow_preset = measure_for_overflow(&cnt); 293 294 /* clear status before test */ 295 if (this_cpu_has_perf_global_status()) 296 pmu_clear_global_status(); 297 298 report_prefix_push("overflow"); 299 300 for (i = 0; i < pmu.nr_gp_counters + 1; i++) { 301 uint64_t status; 302 int idx; 303 304 cnt.count = overflow_preset; 305 if (pmu_use_full_writes()) 306 cnt.count &= (1ull << pmu.gp_counter_width) - 1; 307 308 if (i == pmu.nr_gp_counters) { 309 cnt.ctr = fixed_events[0].unit_sel; 310 cnt.count = measure_for_overflow(&cnt); 311 cnt.count &= (1ull << pmu.gp_counter_width) - 1; 312 } else { 313 cnt.ctr = MSR_GP_COUNTERx(i); 314 } 315 316 if (i % 2) 317 cnt.config |= EVNTSEL_INT; 318 else 319 cnt.config &= ~EVNTSEL_INT; 320 idx = event_to_global_idx(&cnt); 321 __measure(&cnt, cnt.count); 322 report(cnt.count == 1, "cntr-%d", i); 323 324 if (!this_cpu_has_perf_global_status()) 325 continue; 326 327 status = rdmsr(pmu.msr_global_status); 328 report(status & (1ull << idx), "status-%d", i); 329 wrmsr(pmu.msr_global_status_clr, status); 330 status = rdmsr(pmu.msr_global_status); 331 report(!(status & (1ull << idx)), "status clear-%d", i); 332 report(check_irq() == (i % 2), "irq-%d", i); 333 } 334 335 report_prefix_pop(); 336 } 337 338 static void check_gp_counter_cmask(void) 339 { 340 pmu_counter_t cnt = { 341 .ctr = MSR_GP_COUNTERx(0), 342 .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel /* instructions */, 343 }; 344 cnt.config |= (0x2 << EVNTSEL_CMASK_SHIFT); 345 measure_one(&cnt); 346 report(cnt.count < gp_events[1].min, "cmask"); 347 } 348 349 static void do_rdpmc_fast(void *ptr) 350 { 351 pmu_counter_t *cnt = ptr; 352 uint32_t idx = (uint32_t)cnt->idx | (1u << 31); 353 354 if (!is_gp(cnt)) 355 idx |= 1 << 30; 356 357 cnt->count = rdpmc(idx); 358 } 359 360 361 static void check_rdpmc(void) 362 { 363 uint64_t val = 0xff0123456789ull; 364 bool exc; 365 int i; 366 367 report_prefix_push("rdpmc"); 368 369 for (i = 0; i < pmu.nr_gp_counters; i++) { 370 uint64_t x; 371 pmu_counter_t cnt = { 372 .ctr = MSR_GP_COUNTERx(i), 373 .idx = i 374 }; 375 376 /* 377 * Without full-width writes, only the low 32 bits are writable, 378 * and the value is sign-extended. 379 */ 380 if (pmu.msr_gp_counter_base == MSR_IA32_PERFCTR0) 381 x = (uint64_t)(int64_t)(int32_t)val; 382 else 383 x = (uint64_t)(int64_t)val; 384 385 /* Mask according to the number of supported bits */ 386 x &= (1ull << pmu.gp_counter_width) - 1; 387 388 wrmsr(MSR_GP_COUNTERx(i), val); 389 report(rdpmc(i) == x, "cntr-%d", i); 390 391 exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt); 392 if (exc) 393 report_skip("fast-%d", i); 394 else 395 report(cnt.count == (u32)val, "fast-%d", i); 396 } 397 for (i = 0; i < pmu.nr_fixed_counters; i++) { 398 uint64_t x = val & ((1ull << pmu.fixed_counter_width) - 1); 399 pmu_counter_t cnt = { 400 .ctr = MSR_CORE_PERF_FIXED_CTR0 + i, 401 .idx = i 402 }; 403 404 wrmsr(MSR_PERF_FIXED_CTRx(i), x); 405 report(rdpmc(i | (1 << 30)) == x, "fixed cntr-%d", i); 406 407 exc = test_for_exception(GP_VECTOR, do_rdpmc_fast, &cnt); 408 if (exc) 409 report_skip("fixed fast-%d", i); 410 else 411 report(cnt.count == (u32)x, "fixed fast-%d", i); 412 } 413 414 report_prefix_pop(); 415 } 416 417 static void check_running_counter_wrmsr(void) 418 { 419 uint64_t status; 420 uint64_t count; 421 pmu_counter_t evt = { 422 .ctr = MSR_GP_COUNTERx(0), 423 .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel, 424 }; 425 426 report_prefix_push("running counter wrmsr"); 427 428 start_event(&evt); 429 loop(); 430 wrmsr(MSR_GP_COUNTERx(0), 0); 431 stop_event(&evt); 432 report(evt.count < gp_events[1].min, "cntr"); 433 434 /* clear status before overflow test */ 435 if (this_cpu_has_perf_global_status()) 436 pmu_clear_global_status(); 437 438 start_event(&evt); 439 440 count = -1; 441 if (pmu_use_full_writes()) 442 count &= (1ull << pmu.gp_counter_width) - 1; 443 444 wrmsr(MSR_GP_COUNTERx(0), count); 445 446 loop(); 447 stop_event(&evt); 448 449 if (this_cpu_has_perf_global_status()) { 450 status = rdmsr(pmu.msr_global_status); 451 report(status & 1, "status msr bit"); 452 } 453 454 report_prefix_pop(); 455 } 456 457 static void check_emulated_instr(void) 458 { 459 uint64_t status, instr_start, brnch_start; 460 pmu_counter_t brnch_cnt = { 461 .ctr = MSR_GP_COUNTERx(0), 462 /* branch instructions */ 463 .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[5].unit_sel, 464 }; 465 pmu_counter_t instr_cnt = { 466 .ctr = MSR_GP_COUNTERx(1), 467 /* instructions */ 468 .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel, 469 }; 470 report_prefix_push("emulated instruction"); 471 472 if (this_cpu_has_perf_global_status()) 473 pmu_clear_global_status(); 474 475 start_event(&brnch_cnt); 476 start_event(&instr_cnt); 477 478 brnch_start = -EXPECTED_BRNCH; 479 instr_start = -EXPECTED_INSTR; 480 wrmsr(MSR_GP_COUNTERx(0), brnch_start); 481 wrmsr(MSR_GP_COUNTERx(1), instr_start); 482 // KVM_FEP is a magic prefix that forces emulation so 483 // 'KVM_FEP "jne label\n"' just counts as a single instruction. 484 asm volatile( 485 "mov $0x0, %%eax\n" 486 "cmp $0x0, %%eax\n" 487 KVM_FEP "jne label\n" 488 KVM_FEP "jne label\n" 489 KVM_FEP "jne label\n" 490 KVM_FEP "jne label\n" 491 KVM_FEP "jne label\n" 492 "mov $0xa, %%eax\n" 493 "cpuid\n" 494 "mov $0xa, %%eax\n" 495 "cpuid\n" 496 "mov $0xa, %%eax\n" 497 "cpuid\n" 498 "mov $0xa, %%eax\n" 499 "cpuid\n" 500 "mov $0xa, %%eax\n" 501 "cpuid\n" 502 "label:\n" 503 : 504 : 505 : "eax", "ebx", "ecx", "edx"); 506 507 if (this_cpu_has_perf_global_ctrl()) 508 wrmsr(pmu.msr_global_ctl, 0); 509 510 stop_event(&brnch_cnt); 511 stop_event(&instr_cnt); 512 513 // Check that the end count - start count is at least the expected 514 // number of instructions and branches. 515 report(instr_cnt.count - instr_start >= EXPECTED_INSTR, 516 "instruction count"); 517 report(brnch_cnt.count - brnch_start >= EXPECTED_BRNCH, 518 "branch count"); 519 if (this_cpu_has_perf_global_status()) { 520 // Additionally check that those counters overflowed properly. 521 status = rdmsr(pmu.msr_global_status); 522 report(status & 1, "branch counter overflow"); 523 report(status & 2, "instruction counter overflow"); 524 } 525 526 report_prefix_pop(); 527 } 528 529 static void check_counters(void) 530 { 531 if (is_fep_available()) 532 check_emulated_instr(); 533 534 check_gp_counters(); 535 check_fixed_counters(); 536 check_rdpmc(); 537 check_counters_many(); 538 check_counter_overflow(); 539 check_gp_counter_cmask(); 540 check_running_counter_wrmsr(); 541 } 542 543 static void do_unsupported_width_counter_write(void *index) 544 { 545 wrmsr(MSR_IA32_PMC0 + *((int *) index), 0xffffff0123456789ull); 546 } 547 548 static void check_gp_counters_write_width(void) 549 { 550 u64 val_64 = 0xffffff0123456789ull; 551 u64 val_32 = val_64 & ((1ull << 32) - 1); 552 u64 val_max_width = val_64 & ((1ull << pmu.gp_counter_width) - 1); 553 int i; 554 555 /* 556 * MSR_IA32_PERFCTRn supports 64-bit writes, 557 * but only the lowest 32 bits are valid. 558 */ 559 for (i = 0; i < pmu.nr_gp_counters; i++) { 560 wrmsr(MSR_IA32_PERFCTR0 + i, val_32); 561 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 562 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 563 564 wrmsr(MSR_IA32_PERFCTR0 + i, val_max_width); 565 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 566 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 567 568 wrmsr(MSR_IA32_PERFCTR0 + i, val_64); 569 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 570 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 571 } 572 573 /* 574 * MSR_IA32_PMCn supports writing values up to GP counter width, 575 * and only the lowest bits of GP counter width are valid. 576 */ 577 for (i = 0; i < pmu.nr_gp_counters; i++) { 578 wrmsr(MSR_IA32_PMC0 + i, val_32); 579 assert(rdmsr(MSR_IA32_PMC0 + i) == val_32); 580 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_32); 581 582 wrmsr(MSR_IA32_PMC0 + i, val_max_width); 583 assert(rdmsr(MSR_IA32_PMC0 + i) == val_max_width); 584 assert(rdmsr(MSR_IA32_PERFCTR0 + i) == val_max_width); 585 586 report(test_for_exception(GP_VECTOR, 587 do_unsupported_width_counter_write, &i), 588 "writing unsupported width to MSR_IA32_PMC%d raises #GP", i); 589 } 590 } 591 592 /* 593 * Per the SDM, reference cycles are currently implemented using the 594 * core crystal clock, TSC, or bus clock. Calibrate to the TSC 595 * frequency to set reasonable expectations. 596 */ 597 static void set_ref_cycle_expectations(void) 598 { 599 pmu_counter_t cnt = { 600 .ctr = MSR_IA32_PERFCTR0, 601 .config = EVNTSEL_OS | EVNTSEL_USR | intel_gp_events[2].unit_sel, 602 }; 603 uint64_t tsc_delta; 604 uint64_t t0, t1, t2, t3; 605 606 /* Bit 2 enumerates the availability of reference cycles events. */ 607 if (!pmu.nr_gp_counters || !pmu_gp_counter_is_available(2)) 608 return; 609 610 if (this_cpu_has_perf_global_ctrl()) 611 wrmsr(pmu.msr_global_ctl, 0); 612 613 t0 = fenced_rdtsc(); 614 start_event(&cnt); 615 t1 = fenced_rdtsc(); 616 617 /* 618 * This loop has to run long enough to dominate the VM-exit 619 * costs for playing with the PMU MSRs on start and stop. 620 * 621 * On a 2.6GHz Ice Lake, with the TSC frequency at 104 times 622 * the core crystal clock, this function calculated a guest 623 * TSC : ref cycles ratio of around 105 with ECX initialized 624 * to one billion. 625 */ 626 asm volatile("loop ." : "+c"((int){1000000000ull})); 627 628 t2 = fenced_rdtsc(); 629 stop_event(&cnt); 630 t3 = fenced_rdtsc(); 631 632 tsc_delta = ((t2 - t1) + (t3 - t0)) / 2; 633 634 if (!tsc_delta) 635 return; 636 637 intel_gp_events[2].min = (intel_gp_events[2].min * cnt.count) / tsc_delta; 638 intel_gp_events[2].max = (intel_gp_events[2].max * cnt.count) / tsc_delta; 639 } 640 641 static void check_invalid_rdpmc_gp(void) 642 { 643 uint64_t val; 644 645 report(rdpmc_safe(64, &val) == GP_VECTOR, 646 "Expected #GP on RDPMC(64)"); 647 } 648 649 int main(int ac, char **av) 650 { 651 setup_vm(); 652 handle_irq(PMI_VECTOR, cnt_overflow); 653 buf = malloc(N*64); 654 655 check_invalid_rdpmc_gp(); 656 657 if (!pmu.version) { 658 report_skip("No Intel Arch PMU is detected!"); 659 return report_summary(); 660 } 661 662 gp_events = (struct pmu_event *)intel_gp_events; 663 gp_events_size = sizeof(intel_gp_events)/sizeof(intel_gp_events[0]); 664 set_ref_cycle_expectations(); 665 666 printf("PMU version: %d\n", pmu.version); 667 printf("GP counters: %d\n", pmu.nr_gp_counters); 668 printf("GP counter width: %d\n", pmu.gp_counter_width); 669 printf("Mask length: %d\n", pmu.gp_counter_mask_length); 670 printf("Fixed counters: %d\n", pmu.nr_fixed_counters); 671 printf("Fixed counter width: %d\n", pmu.fixed_counter_width); 672 673 apic_write(APIC_LVTPC, PMI_VECTOR); 674 675 check_counters(); 676 677 if (pmu_has_full_writes()) { 678 pmu.msr_gp_counter_base = MSR_IA32_PMC0; 679 680 report_prefix_push("full-width writes"); 681 check_counters(); 682 check_gp_counters_write_width(); 683 report_prefix_pop(); 684 } 685 686 return report_summary(); 687 } 688