1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Linux performance counter support for LoongArch. 4 * 5 * Copyright (C) 2022 Loongson Technology Corporation Limited 6 * 7 * Derived from MIPS: 8 * Copyright (C) 2010 MIPS Technologies, Inc. 9 * Copyright (C) 2011 Cavium Networks, Inc. 10 * Author: Deng-Cheng Zhu 11 */ 12 13 #include <linux/cpumask.h> 14 #include <linux/interrupt.h> 15 #include <linux/smp.h> 16 #include <linux/kernel.h> 17 #include <linux/perf_event.h> 18 #include <linux/uaccess.h> 19 #include <linux/sched/task_stack.h> 20 21 #include <asm/irq.h> 22 #include <asm/irq_regs.h> 23 #include <asm/stacktrace.h> 24 #include <asm/unwind.h> 25 26 /* 27 * Get the return address for a single stackframe and return a pointer to the 28 * next frame tail. 29 */ 30 static unsigned long 31 user_backtrace(struct perf_callchain_entry_ctx *entry, unsigned long fp) 32 { 33 unsigned long err; 34 unsigned long __user *user_frame_tail; 35 struct stack_frame buftail; 36 37 user_frame_tail = (unsigned long __user *)(fp - sizeof(struct stack_frame)); 38 39 /* Also check accessibility of one struct frame_tail beyond */ 40 if (!access_ok(user_frame_tail, sizeof(buftail))) 41 return 0; 42 43 pagefault_disable(); 44 err = __copy_from_user_inatomic(&buftail, user_frame_tail, sizeof(buftail)); 45 pagefault_enable(); 46 47 if (err || (unsigned long)user_frame_tail >= buftail.fp) 48 return 0; 49 50 perf_callchain_store(entry, buftail.ra); 51 52 return buftail.fp; 53 } 54 55 void perf_callchain_user(struct perf_callchain_entry_ctx *entry, 56 struct pt_regs *regs) 57 { 58 unsigned long fp; 59 60 if (perf_guest_state()) { 61 /* We don't support guest os callchain now */ 62 return; 63 } 64 65 perf_callchain_store(entry, regs->csr_era); 66 67 fp = regs->regs[22]; 68 69 while (entry->nr < entry->max_stack && fp && !((unsigned long)fp & 0xf)) 70 fp = user_backtrace(entry, fp); 71 } 72 73 void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, 74 struct pt_regs *regs) 75 { 76 struct unwind_state state; 77 unsigned long addr; 78 79 for (unwind_start(&state, current, regs); 80 !unwind_done(&state); unwind_next_frame(&state)) { 81 addr = unwind_get_return_address(&state); 82 if (!addr || perf_callchain_store(entry, addr)) 83 return; 84 } 85 } 86 87 #define LOONGARCH_MAX_HWEVENTS 32 88 89 struct cpu_hw_events { 90 /* Array of events on this cpu. */ 91 struct perf_event *events[LOONGARCH_MAX_HWEVENTS]; 92 93 /* 94 * Set the bit (indexed by the counter number) when the counter 95 * is used for an event. 96 */ 97 unsigned long used_mask[BITS_TO_LONGS(LOONGARCH_MAX_HWEVENTS)]; 98 99 /* 100 * Software copy of the control register for each performance counter. 101 */ 102 unsigned int saved_ctrl[LOONGARCH_MAX_HWEVENTS]; 103 }; 104 static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { 105 .saved_ctrl = {0}, 106 }; 107 108 /* The description of LoongArch performance events. */ 109 struct loongarch_perf_event { 110 unsigned int event_id; 111 }; 112 113 static struct loongarch_perf_event raw_event; 114 static DEFINE_MUTEX(raw_event_mutex); 115 116 #define C(x) PERF_COUNT_HW_CACHE_##x 117 #define HW_OP_UNSUPPORTED 0xffffffff 118 #define CACHE_OP_UNSUPPORTED 0xffffffff 119 120 #define PERF_MAP_ALL_UNSUPPORTED \ 121 [0 ... PERF_COUNT_HW_MAX - 1] = {HW_OP_UNSUPPORTED} 122 123 #define PERF_CACHE_MAP_ALL_UNSUPPORTED \ 124 [0 ... C(MAX) - 1] = { \ 125 [0 ... C(OP_MAX) - 1] = { \ 126 [0 ... C(RESULT_MAX) - 1] = {CACHE_OP_UNSUPPORTED}, \ 127 }, \ 128 } 129 130 struct loongarch_pmu { 131 u64 max_period; 132 u64 valid_count; 133 u64 overflow; 134 const char *name; 135 unsigned int num_counters; 136 u64 (*read_counter)(unsigned int idx); 137 void (*write_counter)(unsigned int idx, u64 val); 138 const struct loongarch_perf_event *(*map_raw_event)(u64 config); 139 const struct loongarch_perf_event (*general_event_map)[PERF_COUNT_HW_MAX]; 140 const struct loongarch_perf_event (*cache_event_map) 141 [PERF_COUNT_HW_CACHE_MAX] 142 [PERF_COUNT_HW_CACHE_OP_MAX] 143 [PERF_COUNT_HW_CACHE_RESULT_MAX]; 144 }; 145 146 static struct loongarch_pmu loongarch_pmu; 147 148 #define M_PERFCTL_EVENT(event) (event & CSR_PERFCTRL_EVENT) 149 150 #define M_PERFCTL_COUNT_EVENT_WHENEVER (CSR_PERFCTRL_PLV0 | \ 151 CSR_PERFCTRL_PLV1 | \ 152 CSR_PERFCTRL_PLV2 | \ 153 CSR_PERFCTRL_PLV3 | \ 154 CSR_PERFCTRL_IE) 155 156 #define M_PERFCTL_CONFIG_MASK 0x1f0000 157 158 static void pause_local_counters(void); 159 static void resume_local_counters(void); 160 161 static u64 loongarch_pmu_read_counter(unsigned int idx) 162 { 163 u64 val = -1; 164 165 switch (idx) { 166 case 0: 167 val = read_csr_perfcntr0(); 168 break; 169 case 1: 170 val = read_csr_perfcntr1(); 171 break; 172 case 2: 173 val = read_csr_perfcntr2(); 174 break; 175 case 3: 176 val = read_csr_perfcntr3(); 177 break; 178 default: 179 WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx); 180 return 0; 181 } 182 183 return val; 184 } 185 186 static void loongarch_pmu_write_counter(unsigned int idx, u64 val) 187 { 188 switch (idx) { 189 case 0: 190 write_csr_perfcntr0(val); 191 return; 192 case 1: 193 write_csr_perfcntr1(val); 194 return; 195 case 2: 196 write_csr_perfcntr2(val); 197 return; 198 case 3: 199 write_csr_perfcntr3(val); 200 return; 201 default: 202 WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx); 203 return; 204 } 205 } 206 207 static unsigned int loongarch_pmu_read_control(unsigned int idx) 208 { 209 unsigned int val = -1; 210 211 switch (idx) { 212 case 0: 213 val = read_csr_perfctrl0(); 214 break; 215 case 1: 216 val = read_csr_perfctrl1(); 217 break; 218 case 2: 219 val = read_csr_perfctrl2(); 220 break; 221 case 3: 222 val = read_csr_perfctrl3(); 223 break; 224 default: 225 WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx); 226 return 0; 227 } 228 229 return val; 230 } 231 232 static void loongarch_pmu_write_control(unsigned int idx, unsigned int val) 233 { 234 switch (idx) { 235 case 0: 236 write_csr_perfctrl0(val); 237 return; 238 case 1: 239 write_csr_perfctrl1(val); 240 return; 241 case 2: 242 write_csr_perfctrl2(val); 243 return; 244 case 3: 245 write_csr_perfctrl3(val); 246 return; 247 default: 248 WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx); 249 return; 250 } 251 } 252 253 static int loongarch_pmu_alloc_counter(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) 254 { 255 int i; 256 257 for (i = 0; i < loongarch_pmu.num_counters; i++) { 258 if (!test_and_set_bit(i, cpuc->used_mask)) 259 return i; 260 } 261 262 return -EAGAIN; 263 } 264 265 static void loongarch_pmu_enable_event(struct hw_perf_event *evt, int idx) 266 { 267 unsigned int cpu; 268 struct perf_event *event = container_of(evt, struct perf_event, hw); 269 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 270 271 WARN_ON(idx < 0 || idx >= loongarch_pmu.num_counters); 272 273 /* Make sure interrupt enabled. */ 274 cpuc->saved_ctrl[idx] = M_PERFCTL_EVENT(evt->event_base) | 275 (evt->config_base & M_PERFCTL_CONFIG_MASK) | CSR_PERFCTRL_IE; 276 277 cpu = (event->cpu >= 0) ? event->cpu : smp_processor_id(); 278 279 /* 280 * We do not actually let the counter run. Leave it until start(). 281 */ 282 pr_debug("Enabling perf counter for CPU%d\n", cpu); 283 } 284 285 static void loongarch_pmu_disable_event(int idx) 286 { 287 unsigned long flags; 288 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 289 290 WARN_ON(idx < 0 || idx >= loongarch_pmu.num_counters); 291 292 local_irq_save(flags); 293 cpuc->saved_ctrl[idx] = loongarch_pmu_read_control(idx) & 294 ~M_PERFCTL_COUNT_EVENT_WHENEVER; 295 loongarch_pmu_write_control(idx, cpuc->saved_ctrl[idx]); 296 local_irq_restore(flags); 297 } 298 299 static int loongarch_pmu_event_set_period(struct perf_event *event, 300 struct hw_perf_event *hwc, 301 int idx) 302 { 303 int ret = 0; 304 u64 left = local64_read(&hwc->period_left); 305 u64 period = hwc->sample_period; 306 307 if (unlikely((left + period) & (1ULL << 63))) { 308 /* left underflowed by more than period. */ 309 left = period; 310 local64_set(&hwc->period_left, left); 311 hwc->last_period = period; 312 ret = 1; 313 } else if (unlikely((left + period) <= period)) { 314 /* left underflowed by less than period. */ 315 left += period; 316 local64_set(&hwc->period_left, left); 317 hwc->last_period = period; 318 ret = 1; 319 } 320 321 if (left > loongarch_pmu.max_period) { 322 left = loongarch_pmu.max_period; 323 local64_set(&hwc->period_left, left); 324 } 325 326 local64_set(&hwc->prev_count, loongarch_pmu.overflow - left); 327 328 loongarch_pmu.write_counter(idx, loongarch_pmu.overflow - left); 329 330 perf_event_update_userpage(event); 331 332 return ret; 333 } 334 335 static void loongarch_pmu_event_update(struct perf_event *event, 336 struct hw_perf_event *hwc, 337 int idx) 338 { 339 u64 delta; 340 u64 prev_raw_count, new_raw_count; 341 342 again: 343 prev_raw_count = local64_read(&hwc->prev_count); 344 new_raw_count = loongarch_pmu.read_counter(idx); 345 346 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, 347 new_raw_count) != prev_raw_count) 348 goto again; 349 350 delta = new_raw_count - prev_raw_count; 351 352 local64_add(delta, &event->count); 353 local64_sub(delta, &hwc->period_left); 354 } 355 356 static void loongarch_pmu_start(struct perf_event *event, int flags) 357 { 358 struct hw_perf_event *hwc = &event->hw; 359 360 if (flags & PERF_EF_RELOAD) 361 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); 362 363 hwc->state = 0; 364 365 /* Set the period for the event. */ 366 loongarch_pmu_event_set_period(event, hwc, hwc->idx); 367 368 /* Enable the event. */ 369 loongarch_pmu_enable_event(hwc, hwc->idx); 370 } 371 372 static void loongarch_pmu_stop(struct perf_event *event, int flags) 373 { 374 struct hw_perf_event *hwc = &event->hw; 375 376 if (!(hwc->state & PERF_HES_STOPPED)) { 377 /* We are working on a local event. */ 378 loongarch_pmu_disable_event(hwc->idx); 379 barrier(); 380 loongarch_pmu_event_update(event, hwc, hwc->idx); 381 hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; 382 } 383 } 384 385 static int loongarch_pmu_add(struct perf_event *event, int flags) 386 { 387 int idx, err = 0; 388 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 389 struct hw_perf_event *hwc = &event->hw; 390 391 perf_pmu_disable(event->pmu); 392 393 /* To look for a free counter for this event. */ 394 idx = loongarch_pmu_alloc_counter(cpuc, hwc); 395 if (idx < 0) { 396 err = idx; 397 goto out; 398 } 399 400 /* 401 * If there is an event in the counter we are going to use then 402 * make sure it is disabled. 403 */ 404 event->hw.idx = idx; 405 loongarch_pmu_disable_event(idx); 406 cpuc->events[idx] = event; 407 408 hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; 409 if (flags & PERF_EF_START) 410 loongarch_pmu_start(event, PERF_EF_RELOAD); 411 412 /* Propagate our changes to the userspace mapping. */ 413 perf_event_update_userpage(event); 414 415 out: 416 perf_pmu_enable(event->pmu); 417 return err; 418 } 419 420 static void loongarch_pmu_del(struct perf_event *event, int flags) 421 { 422 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 423 struct hw_perf_event *hwc = &event->hw; 424 int idx = hwc->idx; 425 426 WARN_ON(idx < 0 || idx >= loongarch_pmu.num_counters); 427 428 loongarch_pmu_stop(event, PERF_EF_UPDATE); 429 cpuc->events[idx] = NULL; 430 clear_bit(idx, cpuc->used_mask); 431 432 perf_event_update_userpage(event); 433 } 434 435 static void loongarch_pmu_read(struct perf_event *event) 436 { 437 struct hw_perf_event *hwc = &event->hw; 438 439 /* Don't read disabled counters! */ 440 if (hwc->idx < 0) 441 return; 442 443 loongarch_pmu_event_update(event, hwc, hwc->idx); 444 } 445 446 static void loongarch_pmu_enable(struct pmu *pmu) 447 { 448 resume_local_counters(); 449 } 450 451 static void loongarch_pmu_disable(struct pmu *pmu) 452 { 453 pause_local_counters(); 454 } 455 456 static DEFINE_MUTEX(pmu_reserve_mutex); 457 static atomic_t active_events = ATOMIC_INIT(0); 458 459 static void reset_counters(void *arg); 460 static int __hw_perf_event_init(struct perf_event *event); 461 462 static void hw_perf_event_destroy(struct perf_event *event) 463 { 464 if (atomic_dec_and_mutex_lock(&active_events, &pmu_reserve_mutex)) { 465 on_each_cpu(reset_counters, NULL, 1); 466 free_irq(get_percpu_irq(INT_PCOV), &loongarch_pmu); 467 mutex_unlock(&pmu_reserve_mutex); 468 } 469 } 470 471 static void handle_associated_event(struct cpu_hw_events *cpuc, int idx, 472 struct perf_sample_data *data, struct pt_regs *regs) 473 { 474 struct perf_event *event = cpuc->events[idx]; 475 struct hw_perf_event *hwc = &event->hw; 476 477 loongarch_pmu_event_update(event, hwc, idx); 478 data->period = event->hw.last_period; 479 if (!loongarch_pmu_event_set_period(event, hwc, idx)) 480 return; 481 482 perf_event_overflow(event, data, regs); 483 } 484 485 static irqreturn_t pmu_handle_irq(int irq, void *dev) 486 { 487 int n; 488 int handled = IRQ_NONE; 489 uint64_t counter; 490 struct pt_regs *regs; 491 struct perf_sample_data data; 492 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 493 494 /* 495 * First we pause the local counters, so that when we are locked 496 * here, the counters are all paused. When it gets locked due to 497 * perf_disable(), the timer interrupt handler will be delayed. 498 * 499 * See also loongarch_pmu_start(). 500 */ 501 pause_local_counters(); 502 503 regs = get_irq_regs(); 504 505 perf_sample_data_init(&data, 0, 0); 506 507 for (n = 0; n < loongarch_pmu.num_counters; n++) { 508 if (test_bit(n, cpuc->used_mask)) { 509 counter = loongarch_pmu.read_counter(n); 510 if (counter & loongarch_pmu.overflow) { 511 handle_associated_event(cpuc, n, &data, regs); 512 handled = IRQ_HANDLED; 513 } 514 } 515 } 516 517 resume_local_counters(); 518 519 /* 520 * Do all the work for the pending perf events. We can do this 521 * in here because the performance counter interrupt is a regular 522 * interrupt, not NMI. 523 */ 524 if (handled == IRQ_HANDLED) 525 irq_work_run(); 526 527 return handled; 528 } 529 530 static int loongarch_pmu_event_init(struct perf_event *event) 531 { 532 int r, irq; 533 unsigned long flags; 534 535 /* does not support taken branch sampling */ 536 if (has_branch_stack(event)) 537 return -EOPNOTSUPP; 538 539 switch (event->attr.type) { 540 case PERF_TYPE_RAW: 541 case PERF_TYPE_HARDWARE: 542 case PERF_TYPE_HW_CACHE: 543 break; 544 545 default: 546 /* Init it to avoid false validate_group */ 547 event->hw.event_base = 0xffffffff; 548 return -ENOENT; 549 } 550 551 if (event->cpu >= 0 && !cpu_online(event->cpu)) 552 return -ENODEV; 553 554 irq = get_percpu_irq(INT_PCOV); 555 flags = IRQF_PERCPU | IRQF_NOBALANCING | IRQF_NO_THREAD | IRQF_NO_SUSPEND | IRQF_SHARED; 556 if (!atomic_inc_not_zero(&active_events)) { 557 mutex_lock(&pmu_reserve_mutex); 558 if (atomic_read(&active_events) == 0) { 559 r = request_irq(irq, pmu_handle_irq, flags, "Perf_PMU", &loongarch_pmu); 560 if (r < 0) { 561 mutex_unlock(&pmu_reserve_mutex); 562 pr_warn("PMU IRQ request failed\n"); 563 return -ENODEV; 564 } 565 } 566 atomic_inc(&active_events); 567 mutex_unlock(&pmu_reserve_mutex); 568 } 569 570 return __hw_perf_event_init(event); 571 } 572 573 static struct pmu pmu = { 574 .pmu_enable = loongarch_pmu_enable, 575 .pmu_disable = loongarch_pmu_disable, 576 .event_init = loongarch_pmu_event_init, 577 .add = loongarch_pmu_add, 578 .del = loongarch_pmu_del, 579 .start = loongarch_pmu_start, 580 .stop = loongarch_pmu_stop, 581 .read = loongarch_pmu_read, 582 }; 583 584 static unsigned int loongarch_pmu_perf_event_encode(const struct loongarch_perf_event *pev) 585 { 586 return M_PERFCTL_EVENT(pev->event_id); 587 } 588 589 static const struct loongarch_perf_event *loongarch_pmu_map_general_event(int idx) 590 { 591 const struct loongarch_perf_event *pev; 592 593 pev = &(*loongarch_pmu.general_event_map)[idx]; 594 595 if (pev->event_id == HW_OP_UNSUPPORTED) 596 return ERR_PTR(-ENOENT); 597 598 return pev; 599 } 600 601 static const struct loongarch_perf_event *loongarch_pmu_map_cache_event(u64 config) 602 { 603 unsigned int cache_type, cache_op, cache_result; 604 const struct loongarch_perf_event *pev; 605 606 cache_type = (config >> 0) & 0xff; 607 if (cache_type >= PERF_COUNT_HW_CACHE_MAX) 608 return ERR_PTR(-EINVAL); 609 610 cache_op = (config >> 8) & 0xff; 611 if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) 612 return ERR_PTR(-EINVAL); 613 614 cache_result = (config >> 16) & 0xff; 615 if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) 616 return ERR_PTR(-EINVAL); 617 618 pev = &((*loongarch_pmu.cache_event_map) 619 [cache_type] 620 [cache_op] 621 [cache_result]); 622 623 if (pev->event_id == CACHE_OP_UNSUPPORTED) 624 return ERR_PTR(-ENOENT); 625 626 return pev; 627 } 628 629 static int validate_group(struct perf_event *event) 630 { 631 struct cpu_hw_events fake_cpuc; 632 struct perf_event *sibling, *leader = event->group_leader; 633 634 memset(&fake_cpuc, 0, sizeof(fake_cpuc)); 635 636 if (loongarch_pmu_alloc_counter(&fake_cpuc, &leader->hw) < 0) 637 return -EINVAL; 638 639 for_each_sibling_event(sibling, leader) { 640 if (loongarch_pmu_alloc_counter(&fake_cpuc, &sibling->hw) < 0) 641 return -EINVAL; 642 } 643 644 if (loongarch_pmu_alloc_counter(&fake_cpuc, &event->hw) < 0) 645 return -EINVAL; 646 647 return 0; 648 } 649 650 static void reset_counters(void *arg) 651 { 652 int n; 653 int counters = loongarch_pmu.num_counters; 654 655 for (n = 0; n < counters; n++) { 656 loongarch_pmu_write_control(n, 0); 657 loongarch_pmu.write_counter(n, 0); 658 } 659 } 660 661 static const struct loongarch_perf_event loongson_event_map[PERF_COUNT_HW_MAX] = { 662 PERF_MAP_ALL_UNSUPPORTED, 663 [PERF_COUNT_HW_CPU_CYCLES] = { 0x00 }, 664 [PERF_COUNT_HW_INSTRUCTIONS] = { 0x01 }, 665 [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x08 }, 666 [PERF_COUNT_HW_CACHE_MISSES] = { 0x09 }, 667 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x02 }, 668 [PERF_COUNT_HW_BRANCH_MISSES] = { 0x03 }, 669 }; 670 671 static const struct loongarch_perf_event loongson_cache_map 672 [PERF_COUNT_HW_CACHE_MAX] 673 [PERF_COUNT_HW_CACHE_OP_MAX] 674 [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 675 PERF_CACHE_MAP_ALL_UNSUPPORTED, 676 [C(L1D)] = { 677 /* 678 * Like some other architectures (e.g. ARM), the performance 679 * counters don't differentiate between read and write 680 * accesses/misses, so this isn't strictly correct, but it's the 681 * best we can do. Writes and reads get combined. 682 */ 683 [C(OP_READ)] = { 684 [C(RESULT_ACCESS)] = { 0x8 }, 685 [C(RESULT_MISS)] = { 0x9 }, 686 }, 687 [C(OP_WRITE)] = { 688 [C(RESULT_ACCESS)] = { 0x8 }, 689 [C(RESULT_MISS)] = { 0x9 }, 690 }, 691 [C(OP_PREFETCH)] = { 692 [C(RESULT_ACCESS)] = { 0xaa }, 693 [C(RESULT_MISS)] = { 0xa9 }, 694 }, 695 }, 696 [C(L1I)] = { 697 [C(OP_READ)] = { 698 [C(RESULT_ACCESS)] = { 0x6 }, 699 [C(RESULT_MISS)] = { 0x7 }, 700 }, 701 }, 702 [C(LL)] = { 703 [C(OP_READ)] = { 704 [C(RESULT_ACCESS)] = { 0xc }, 705 [C(RESULT_MISS)] = { 0xd }, 706 }, 707 [C(OP_WRITE)] = { 708 [C(RESULT_ACCESS)] = { 0xc }, 709 [C(RESULT_MISS)] = { 0xd }, 710 }, 711 }, 712 [C(ITLB)] = { 713 [C(OP_READ)] = { 714 [C(RESULT_MISS)] = { 0x3b }, 715 }, 716 }, 717 [C(DTLB)] = { 718 [C(OP_READ)] = { 719 [C(RESULT_ACCESS)] = { 0x4 }, 720 [C(RESULT_MISS)] = { 0x3c }, 721 }, 722 [C(OP_WRITE)] = { 723 [C(RESULT_ACCESS)] = { 0x4 }, 724 [C(RESULT_MISS)] = { 0x3c }, 725 }, 726 }, 727 [C(BPU)] = { 728 /* Using the same code for *HW_BRANCH* */ 729 [C(OP_READ)] = { 730 [C(RESULT_ACCESS)] = { 0x02 }, 731 [C(RESULT_MISS)] = { 0x03 }, 732 }, 733 }, 734 }; 735 736 static int __hw_perf_event_init(struct perf_event *event) 737 { 738 int err; 739 struct hw_perf_event *hwc = &event->hw; 740 struct perf_event_attr *attr = &event->attr; 741 const struct loongarch_perf_event *pev; 742 743 /* Returning LoongArch event descriptor for generic perf event. */ 744 if (PERF_TYPE_HARDWARE == event->attr.type) { 745 if (event->attr.config >= PERF_COUNT_HW_MAX) 746 return -EINVAL; 747 pev = loongarch_pmu_map_general_event(event->attr.config); 748 } else if (PERF_TYPE_HW_CACHE == event->attr.type) { 749 pev = loongarch_pmu_map_cache_event(event->attr.config); 750 } else if (PERF_TYPE_RAW == event->attr.type) { 751 /* We are working on the global raw event. */ 752 mutex_lock(&raw_event_mutex); 753 pev = loongarch_pmu.map_raw_event(event->attr.config); 754 } else { 755 /* The event type is not (yet) supported. */ 756 return -EOPNOTSUPP; 757 } 758 759 if (IS_ERR(pev)) { 760 if (PERF_TYPE_RAW == event->attr.type) 761 mutex_unlock(&raw_event_mutex); 762 return PTR_ERR(pev); 763 } 764 765 /* 766 * We allow max flexibility on how each individual counter shared 767 * by the single CPU operates (the mode exclusion and the range). 768 */ 769 hwc->config_base = CSR_PERFCTRL_IE; 770 771 hwc->event_base = loongarch_pmu_perf_event_encode(pev); 772 if (PERF_TYPE_RAW == event->attr.type) 773 mutex_unlock(&raw_event_mutex); 774 775 if (!attr->exclude_user) { 776 hwc->config_base |= CSR_PERFCTRL_PLV3; 777 hwc->config_base |= CSR_PERFCTRL_PLV2; 778 } 779 if (!attr->exclude_kernel) { 780 hwc->config_base |= CSR_PERFCTRL_PLV0; 781 } 782 if (!attr->exclude_hv) { 783 hwc->config_base |= CSR_PERFCTRL_PLV1; 784 } 785 786 hwc->config_base &= M_PERFCTL_CONFIG_MASK; 787 /* 788 * The event can belong to another cpu. We do not assign a local 789 * counter for it for now. 790 */ 791 hwc->idx = -1; 792 hwc->config = 0; 793 794 if (!hwc->sample_period) { 795 hwc->sample_period = loongarch_pmu.max_period; 796 hwc->last_period = hwc->sample_period; 797 local64_set(&hwc->period_left, hwc->sample_period); 798 } 799 800 err = 0; 801 if (event->group_leader != event) 802 err = validate_group(event); 803 804 event->destroy = hw_perf_event_destroy; 805 806 if (err) 807 event->destroy(event); 808 809 return err; 810 } 811 812 static void pause_local_counters(void) 813 { 814 unsigned long flags; 815 int ctr = loongarch_pmu.num_counters; 816 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 817 818 local_irq_save(flags); 819 do { 820 ctr--; 821 cpuc->saved_ctrl[ctr] = loongarch_pmu_read_control(ctr); 822 loongarch_pmu_write_control(ctr, cpuc->saved_ctrl[ctr] & 823 ~M_PERFCTL_COUNT_EVENT_WHENEVER); 824 } while (ctr > 0); 825 local_irq_restore(flags); 826 } 827 828 static void resume_local_counters(void) 829 { 830 int ctr = loongarch_pmu.num_counters; 831 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 832 833 do { 834 ctr--; 835 loongarch_pmu_write_control(ctr, cpuc->saved_ctrl[ctr]); 836 } while (ctr > 0); 837 } 838 839 static const struct loongarch_perf_event *loongarch_pmu_map_raw_event(u64 config) 840 { 841 raw_event.event_id = M_PERFCTL_EVENT(config); 842 843 return &raw_event; 844 } 845 846 static int __init init_hw_perf_events(void) 847 { 848 int counters; 849 850 if (!cpu_has_pmp) 851 return -ENODEV; 852 853 pr_info("Performance counters: "); 854 counters = ((read_cpucfg(LOONGARCH_CPUCFG6) & CPUCFG6_PMNUM) >> 4) + 1; 855 856 loongarch_pmu.num_counters = counters; 857 loongarch_pmu.max_period = (1ULL << 63) - 1; 858 loongarch_pmu.valid_count = (1ULL << 63) - 1; 859 loongarch_pmu.overflow = 1ULL << 63; 860 loongarch_pmu.name = "loongarch/loongson64"; 861 loongarch_pmu.read_counter = loongarch_pmu_read_counter; 862 loongarch_pmu.write_counter = loongarch_pmu_write_counter; 863 loongarch_pmu.map_raw_event = loongarch_pmu_map_raw_event; 864 loongarch_pmu.general_event_map = &loongson_event_map; 865 loongarch_pmu.cache_event_map = &loongson_cache_map; 866 867 on_each_cpu(reset_counters, NULL, 1); 868 869 pr_cont("%s PMU enabled, %d %d-bit counters available to each CPU.\n", 870 loongarch_pmu.name, counters, 64); 871 872 perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); 873 874 return 0; 875 } 876 pure_initcall(init_hw_perf_events); 877