1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/perf_event.h> 3 #include <linux/types.h> 4 5 #include <asm/cpu_device_id.h> 6 #include <asm/perf_event.h> 7 #include <asm/msr.h> 8 9 #include "../perf_event.h" 10 11 /* 12 * Intel LBR_SELECT bits 13 * Intel Vol3a, April 2011, Section 16.7 Table 16-10 14 * 15 * Hardware branch filter (not available on all CPUs) 16 */ 17 #define LBR_KERNEL_BIT 0 /* do not capture at ring0 */ 18 #define LBR_USER_BIT 1 /* do not capture at ring > 0 */ 19 #define LBR_JCC_BIT 2 /* do not capture conditional branches */ 20 #define LBR_REL_CALL_BIT 3 /* do not capture relative calls */ 21 #define LBR_IND_CALL_BIT 4 /* do not capture indirect calls */ 22 #define LBR_RETURN_BIT 5 /* do not capture near returns */ 23 #define LBR_IND_JMP_BIT 6 /* do not capture indirect jumps */ 24 #define LBR_REL_JMP_BIT 7 /* do not capture relative jumps */ 25 #define LBR_FAR_BIT 8 /* do not capture far branches */ 26 #define LBR_CALL_STACK_BIT 9 /* enable call stack */ 27 28 /* 29 * Following bit only exists in Linux; we mask it out before writing it to 30 * the actual MSR. But it helps the constraint perf code to understand 31 * that this is a separate configuration. 32 */ 33 #define LBR_NO_INFO_BIT 63 /* don't read LBR_INFO. */ 34 35 #define LBR_KERNEL (1 << LBR_KERNEL_BIT) 36 #define LBR_USER (1 << LBR_USER_BIT) 37 #define LBR_JCC (1 << LBR_JCC_BIT) 38 #define LBR_REL_CALL (1 << LBR_REL_CALL_BIT) 39 #define LBR_IND_CALL (1 << LBR_IND_CALL_BIT) 40 #define LBR_RETURN (1 << LBR_RETURN_BIT) 41 #define LBR_REL_JMP (1 << LBR_REL_JMP_BIT) 42 #define LBR_IND_JMP (1 << LBR_IND_JMP_BIT) 43 #define LBR_FAR (1 << LBR_FAR_BIT) 44 #define LBR_CALL_STACK (1 << LBR_CALL_STACK_BIT) 45 #define LBR_NO_INFO (1ULL << LBR_NO_INFO_BIT) 46 47 #define LBR_PLM (LBR_KERNEL | LBR_USER) 48 49 #define LBR_SEL_MASK 0x3ff /* valid bits in LBR_SELECT */ 50 #define LBR_NOT_SUPP -1 /* LBR filter not supported */ 51 #define LBR_IGN 0 /* ignored */ 52 53 #define LBR_ANY \ 54 (LBR_JCC |\ 55 LBR_REL_CALL |\ 56 LBR_IND_CALL |\ 57 LBR_RETURN |\ 58 LBR_REL_JMP |\ 59 LBR_IND_JMP |\ 60 LBR_FAR) 61 62 #define LBR_FROM_FLAG_MISPRED BIT_ULL(63) 63 #define LBR_FROM_FLAG_IN_TX BIT_ULL(62) 64 #define LBR_FROM_FLAG_ABORT BIT_ULL(61) 65 66 #define LBR_FROM_SIGNEXT_2MSB (BIT_ULL(60) | BIT_ULL(59)) 67 68 /* 69 * Intel LBR_CTL bits 70 * 71 * Hardware branch filter for Arch LBR 72 */ 73 #define ARCH_LBR_KERNEL_BIT 1 /* capture at ring0 */ 74 #define ARCH_LBR_USER_BIT 2 /* capture at ring > 0 */ 75 #define ARCH_LBR_CALL_STACK_BIT 3 /* enable call stack */ 76 #define ARCH_LBR_JCC_BIT 16 /* capture conditional branches */ 77 #define ARCH_LBR_REL_JMP_BIT 17 /* capture relative jumps */ 78 #define ARCH_LBR_IND_JMP_BIT 18 /* capture indirect jumps */ 79 #define ARCH_LBR_REL_CALL_BIT 19 /* capture relative calls */ 80 #define ARCH_LBR_IND_CALL_BIT 20 /* capture indirect calls */ 81 #define ARCH_LBR_RETURN_BIT 21 /* capture near returns */ 82 #define ARCH_LBR_OTHER_BRANCH_BIT 22 /* capture other branches */ 83 84 #define ARCH_LBR_KERNEL (1ULL << ARCH_LBR_KERNEL_BIT) 85 #define ARCH_LBR_USER (1ULL << ARCH_LBR_USER_BIT) 86 #define ARCH_LBR_CALL_STACK (1ULL << ARCH_LBR_CALL_STACK_BIT) 87 #define ARCH_LBR_JCC (1ULL << ARCH_LBR_JCC_BIT) 88 #define ARCH_LBR_REL_JMP (1ULL << ARCH_LBR_REL_JMP_BIT) 89 #define ARCH_LBR_IND_JMP (1ULL << ARCH_LBR_IND_JMP_BIT) 90 #define ARCH_LBR_REL_CALL (1ULL << ARCH_LBR_REL_CALL_BIT) 91 #define ARCH_LBR_IND_CALL (1ULL << ARCH_LBR_IND_CALL_BIT) 92 #define ARCH_LBR_RETURN (1ULL << ARCH_LBR_RETURN_BIT) 93 #define ARCH_LBR_OTHER_BRANCH (1ULL << ARCH_LBR_OTHER_BRANCH_BIT) 94 95 #define ARCH_LBR_ANY \ 96 (ARCH_LBR_JCC |\ 97 ARCH_LBR_REL_JMP |\ 98 ARCH_LBR_IND_JMP |\ 99 ARCH_LBR_REL_CALL |\ 100 ARCH_LBR_IND_CALL |\ 101 ARCH_LBR_RETURN |\ 102 ARCH_LBR_OTHER_BRANCH) 103 104 #define ARCH_LBR_CTL_MASK 0x7f000e 105 106 static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc); 107 108 static __always_inline bool is_lbr_call_stack_bit_set(u64 config) 109 { 110 if (static_cpu_has(X86_FEATURE_ARCH_LBR)) 111 return !!(config & ARCH_LBR_CALL_STACK); 112 113 return !!(config & LBR_CALL_STACK); 114 } 115 116 /* 117 * We only support LBR implementations that have FREEZE_LBRS_ON_PMI 118 * otherwise it becomes near impossible to get a reliable stack. 119 */ 120 121 static void __intel_pmu_lbr_enable(bool pmi) 122 { 123 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 124 u64 debugctl, lbr_select = 0, orig_debugctl; 125 126 /* 127 * No need to unfreeze manually, as v4 can do that as part 128 * of the GLOBAL_STATUS ack. 129 */ 130 if (pmi && x86_pmu.version >= 4) 131 return; 132 133 /* 134 * No need to reprogram LBR_SELECT in a PMI, as it 135 * did not change. 136 */ 137 if (cpuc->lbr_sel) 138 lbr_select = cpuc->lbr_sel->config & x86_pmu.lbr_sel_mask; 139 if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && !pmi && cpuc->lbr_sel) 140 wrmsrl(MSR_LBR_SELECT, lbr_select); 141 142 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); 143 orig_debugctl = debugctl; 144 145 if (!static_cpu_has(X86_FEATURE_ARCH_LBR)) 146 debugctl |= DEBUGCTLMSR_LBR; 147 /* 148 * LBR callstack does not work well with FREEZE_LBRS_ON_PMI. 149 * If FREEZE_LBRS_ON_PMI is set, PMI near call/return instructions 150 * may cause superfluous increase/decrease of LBR_TOS. 151 */ 152 if (is_lbr_call_stack_bit_set(lbr_select)) 153 debugctl &= ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI; 154 else 155 debugctl |= DEBUGCTLMSR_FREEZE_LBRS_ON_PMI; 156 157 if (orig_debugctl != debugctl) 158 wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); 159 160 if (static_cpu_has(X86_FEATURE_ARCH_LBR)) 161 wrmsrl(MSR_ARCH_LBR_CTL, lbr_select | ARCH_LBR_CTL_LBREN); 162 } 163 164 void intel_pmu_lbr_reset_32(void) 165 { 166 int i; 167 168 for (i = 0; i < x86_pmu.lbr_nr; i++) 169 wrmsrl(x86_pmu.lbr_from + i, 0); 170 } 171 172 void intel_pmu_lbr_reset_64(void) 173 { 174 int i; 175 176 for (i = 0; i < x86_pmu.lbr_nr; i++) { 177 wrmsrl(x86_pmu.lbr_from + i, 0); 178 wrmsrl(x86_pmu.lbr_to + i, 0); 179 if (x86_pmu.lbr_has_info) 180 wrmsrl(x86_pmu.lbr_info + i, 0); 181 } 182 } 183 184 static void intel_pmu_arch_lbr_reset(void) 185 { 186 /* Write to ARCH_LBR_DEPTH MSR, all LBR entries are reset to 0 */ 187 wrmsrl(MSR_ARCH_LBR_DEPTH, x86_pmu.lbr_nr); 188 } 189 190 void intel_pmu_lbr_reset(void) 191 { 192 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 193 194 if (!x86_pmu.lbr_nr) 195 return; 196 197 x86_pmu.lbr_reset(); 198 199 cpuc->last_task_ctx = NULL; 200 cpuc->last_log_id = 0; 201 if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && cpuc->lbr_select) 202 wrmsrl(MSR_LBR_SELECT, 0); 203 } 204 205 /* 206 * TOS = most recently recorded branch 207 */ 208 static inline u64 intel_pmu_lbr_tos(void) 209 { 210 u64 tos; 211 212 rdmsrl(x86_pmu.lbr_tos, tos); 213 return tos; 214 } 215 216 enum { 217 LBR_NONE, 218 LBR_VALID, 219 }; 220 221 /* 222 * For format LBR_FORMAT_EIP_FLAGS2, bits 61:62 in MSR_LAST_BRANCH_FROM_x 223 * are the TSX flags when TSX is supported, but when TSX is not supported 224 * they have no consistent behavior: 225 * 226 * - For wrmsr(), bits 61:62 are considered part of the sign extension. 227 * - For HW updates (branch captures) bits 61:62 are always OFF and are not 228 * part of the sign extension. 229 * 230 * Therefore, if: 231 * 232 * 1) LBR format LBR_FORMAT_EIP_FLAGS2 233 * 2) CPU has no TSX support enabled 234 * 235 * ... then any value passed to wrmsr() must be sign extended to 63 bits and any 236 * value from rdmsr() must be converted to have a 61 bits sign extension, 237 * ignoring the TSX flags. 238 */ 239 static inline bool lbr_from_signext_quirk_needed(void) 240 { 241 bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) || 242 boot_cpu_has(X86_FEATURE_RTM); 243 244 return !tsx_support; 245 } 246 247 static DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key); 248 249 /* If quirk is enabled, ensure sign extension is 63 bits: */ 250 inline u64 lbr_from_signext_quirk_wr(u64 val) 251 { 252 if (static_branch_unlikely(&lbr_from_quirk_key)) { 253 /* 254 * Sign extend into bits 61:62 while preserving bit 63. 255 * 256 * Quirk is enabled when TSX is disabled. Therefore TSX bits 257 * in val are always OFF and must be changed to be sign 258 * extension bits. Since bits 59:60 are guaranteed to be 259 * part of the sign extension bits, we can just copy them 260 * to 61:62. 261 */ 262 val |= (LBR_FROM_SIGNEXT_2MSB & val) << 2; 263 } 264 return val; 265 } 266 267 /* 268 * If quirk is needed, ensure sign extension is 61 bits: 269 */ 270 static u64 lbr_from_signext_quirk_rd(u64 val) 271 { 272 if (static_branch_unlikely(&lbr_from_quirk_key)) { 273 /* 274 * Quirk is on when TSX is not enabled. Therefore TSX 275 * flags must be read as OFF. 276 */ 277 val &= ~(LBR_FROM_FLAG_IN_TX | LBR_FROM_FLAG_ABORT); 278 } 279 return val; 280 } 281 282 static __always_inline void wrlbr_from(unsigned int idx, u64 val) 283 { 284 val = lbr_from_signext_quirk_wr(val); 285 wrmsrl(x86_pmu.lbr_from + idx, val); 286 } 287 288 static __always_inline void wrlbr_to(unsigned int idx, u64 val) 289 { 290 wrmsrl(x86_pmu.lbr_to + idx, val); 291 } 292 293 static __always_inline void wrlbr_info(unsigned int idx, u64 val) 294 { 295 wrmsrl(x86_pmu.lbr_info + idx, val); 296 } 297 298 static __always_inline u64 rdlbr_from(unsigned int idx, struct lbr_entry *lbr) 299 { 300 u64 val; 301 302 if (lbr) 303 return lbr->from; 304 305 rdmsrl(x86_pmu.lbr_from + idx, val); 306 307 return lbr_from_signext_quirk_rd(val); 308 } 309 310 static __always_inline u64 rdlbr_to(unsigned int idx, struct lbr_entry *lbr) 311 { 312 u64 val; 313 314 if (lbr) 315 return lbr->to; 316 317 rdmsrl(x86_pmu.lbr_to + idx, val); 318 319 return val; 320 } 321 322 static __always_inline u64 rdlbr_info(unsigned int idx, struct lbr_entry *lbr) 323 { 324 u64 val; 325 326 if (lbr) 327 return lbr->info; 328 329 rdmsrl(x86_pmu.lbr_info + idx, val); 330 331 return val; 332 } 333 334 static inline void 335 wrlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info) 336 { 337 wrlbr_from(idx, lbr->from); 338 wrlbr_to(idx, lbr->to); 339 if (need_info) 340 wrlbr_info(idx, lbr->info); 341 } 342 343 static inline bool 344 rdlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info) 345 { 346 u64 from = rdlbr_from(idx, NULL); 347 348 /* Don't read invalid entry */ 349 if (!from) 350 return false; 351 352 lbr->from = from; 353 lbr->to = rdlbr_to(idx, NULL); 354 if (need_info) 355 lbr->info = rdlbr_info(idx, NULL); 356 357 return true; 358 } 359 360 void intel_pmu_lbr_restore(void *ctx) 361 { 362 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 363 struct x86_perf_task_context *task_ctx = ctx; 364 bool need_info = x86_pmu.lbr_has_info; 365 u64 tos = task_ctx->tos; 366 unsigned lbr_idx, mask; 367 int i; 368 369 mask = x86_pmu.lbr_nr - 1; 370 for (i = 0; i < task_ctx->valid_lbrs; i++) { 371 lbr_idx = (tos - i) & mask; 372 wrlbr_all(&task_ctx->lbr[i], lbr_idx, need_info); 373 } 374 375 for (; i < x86_pmu.lbr_nr; i++) { 376 lbr_idx = (tos - i) & mask; 377 wrlbr_from(lbr_idx, 0); 378 wrlbr_to(lbr_idx, 0); 379 if (need_info) 380 wrlbr_info(lbr_idx, 0); 381 } 382 383 wrmsrl(x86_pmu.lbr_tos, tos); 384 385 if (cpuc->lbr_select) 386 wrmsrl(MSR_LBR_SELECT, task_ctx->lbr_sel); 387 } 388 389 static void intel_pmu_arch_lbr_restore(void *ctx) 390 { 391 struct x86_perf_task_context_arch_lbr *task_ctx = ctx; 392 struct lbr_entry *entries = task_ctx->entries; 393 int i; 394 395 /* Fast reset the LBRs before restore if the call stack is not full. */ 396 if (!entries[x86_pmu.lbr_nr - 1].from) 397 intel_pmu_arch_lbr_reset(); 398 399 for (i = 0; i < x86_pmu.lbr_nr; i++) { 400 if (!entries[i].from) 401 break; 402 wrlbr_all(&entries[i], i, true); 403 } 404 } 405 406 /* 407 * Restore the Architecture LBR state from the xsave area in the perf 408 * context data for the task via the XRSTORS instruction. 409 */ 410 static void intel_pmu_arch_lbr_xrstors(void *ctx) 411 { 412 struct x86_perf_task_context_arch_lbr_xsave *task_ctx = ctx; 413 414 xrstors(&task_ctx->xsave, XFEATURE_MASK_LBR); 415 } 416 417 static __always_inline bool lbr_is_reset_in_cstate(void *ctx) 418 { 419 if (static_cpu_has(X86_FEATURE_ARCH_LBR)) 420 return x86_pmu.lbr_deep_c_reset && !rdlbr_from(0, NULL); 421 422 return !rdlbr_from(((struct x86_perf_task_context *)ctx)->tos, NULL); 423 } 424 425 static inline bool has_lbr_callstack_users(void *ctx) 426 { 427 return task_context_opt(ctx)->lbr_callstack_users || 428 x86_pmu.lbr_callstack_users; 429 } 430 431 static void __intel_pmu_lbr_restore(void *ctx) 432 { 433 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 434 435 if (!has_lbr_callstack_users(ctx) || 436 task_context_opt(ctx)->lbr_stack_state == LBR_NONE) { 437 intel_pmu_lbr_reset(); 438 return; 439 } 440 441 /* 442 * Does not restore the LBR registers, if 443 * - No one else touched them, and 444 * - Was not cleared in Cstate 445 */ 446 if ((ctx == cpuc->last_task_ctx) && 447 (task_context_opt(ctx)->log_id == cpuc->last_log_id) && 448 !lbr_is_reset_in_cstate(ctx)) { 449 task_context_opt(ctx)->lbr_stack_state = LBR_NONE; 450 return; 451 } 452 453 x86_pmu.lbr_restore(ctx); 454 455 task_context_opt(ctx)->lbr_stack_state = LBR_NONE; 456 } 457 458 void intel_pmu_lbr_save(void *ctx) 459 { 460 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 461 struct x86_perf_task_context *task_ctx = ctx; 462 bool need_info = x86_pmu.lbr_has_info; 463 unsigned lbr_idx, mask; 464 u64 tos; 465 int i; 466 467 mask = x86_pmu.lbr_nr - 1; 468 tos = intel_pmu_lbr_tos(); 469 for (i = 0; i < x86_pmu.lbr_nr; i++) { 470 lbr_idx = (tos - i) & mask; 471 if (!rdlbr_all(&task_ctx->lbr[i], lbr_idx, need_info)) 472 break; 473 } 474 task_ctx->valid_lbrs = i; 475 task_ctx->tos = tos; 476 477 if (cpuc->lbr_select) 478 rdmsrl(MSR_LBR_SELECT, task_ctx->lbr_sel); 479 } 480 481 static void intel_pmu_arch_lbr_save(void *ctx) 482 { 483 struct x86_perf_task_context_arch_lbr *task_ctx = ctx; 484 struct lbr_entry *entries = task_ctx->entries; 485 int i; 486 487 for (i = 0; i < x86_pmu.lbr_nr; i++) { 488 if (!rdlbr_all(&entries[i], i, true)) 489 break; 490 } 491 492 /* LBR call stack is not full. Reset is required in restore. */ 493 if (i < x86_pmu.lbr_nr) 494 entries[x86_pmu.lbr_nr - 1].from = 0; 495 } 496 497 /* 498 * Save the Architecture LBR state to the xsave area in the perf 499 * context data for the task via the XSAVES instruction. 500 */ 501 static void intel_pmu_arch_lbr_xsaves(void *ctx) 502 { 503 struct x86_perf_task_context_arch_lbr_xsave *task_ctx = ctx; 504 505 xsaves(&task_ctx->xsave, XFEATURE_MASK_LBR); 506 } 507 508 static void __intel_pmu_lbr_save(void *ctx) 509 { 510 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 511 512 if (!has_lbr_callstack_users(ctx)) { 513 task_context_opt(ctx)->lbr_stack_state = LBR_NONE; 514 return; 515 } 516 517 x86_pmu.lbr_save(ctx); 518 519 task_context_opt(ctx)->lbr_stack_state = LBR_VALID; 520 521 cpuc->last_task_ctx = ctx; 522 cpuc->last_log_id = ++task_context_opt(ctx)->log_id; 523 } 524 525 void intel_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, 526 struct task_struct *task, bool sched_in) 527 { 528 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 529 struct perf_ctx_data *ctx_data; 530 void *task_ctx; 531 532 if (!cpuc->lbr_users) 533 return; 534 535 /* 536 * If LBR callstack feature is enabled and the stack was saved when 537 * the task was scheduled out, restore the stack. Otherwise flush 538 * the LBR stack. 539 */ 540 rcu_read_lock(); 541 ctx_data = rcu_dereference(task->perf_ctx_data); 542 task_ctx = ctx_data ? ctx_data->data : NULL; 543 if (task_ctx) { 544 if (sched_in) 545 __intel_pmu_lbr_restore(task_ctx); 546 else 547 __intel_pmu_lbr_save(task_ctx); 548 rcu_read_unlock(); 549 return; 550 } 551 rcu_read_unlock(); 552 553 /* 554 * Since a context switch can flip the address space and LBR entries 555 * are not tagged with an identifier, we need to wipe the LBR, even for 556 * per-cpu events. You simply cannot resolve the branches from the old 557 * address space. 558 */ 559 if (sched_in) 560 intel_pmu_lbr_reset(); 561 } 562 563 static inline bool branch_user_callstack(unsigned br_sel) 564 { 565 return (br_sel & X86_BR_USER) && (br_sel & X86_BR_CALL_STACK); 566 } 567 568 void intel_pmu_lbr_add(struct perf_event *event) 569 { 570 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 571 572 if (!x86_pmu.lbr_nr) 573 return; 574 575 if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT) 576 cpuc->lbr_select = 1; 577 578 cpuc->br_sel = event->hw.branch_reg.reg; 579 580 if (branch_user_callstack(cpuc->br_sel)) { 581 if (event->attach_state & PERF_ATTACH_TASK) { 582 struct task_struct *task = event->hw.target; 583 struct perf_ctx_data *ctx_data; 584 585 rcu_read_lock(); 586 ctx_data = rcu_dereference(task->perf_ctx_data); 587 if (ctx_data) 588 task_context_opt(ctx_data->data)->lbr_callstack_users++; 589 rcu_read_unlock(); 590 } else 591 x86_pmu.lbr_callstack_users++; 592 } 593 /* 594 * Request pmu::sched_task() callback, which will fire inside the 595 * regular perf event scheduling, so that call will: 596 * 597 * - restore or wipe; when LBR-callstack, 598 * - wipe; otherwise, 599 * 600 * when this is from __perf_event_task_sched_in(). 601 * 602 * However, if this is from perf_install_in_context(), no such callback 603 * will follow and we'll need to reset the LBR here if this is the 604 * first LBR event. 605 * 606 * The problem is, we cannot tell these cases apart... but we can 607 * exclude the biggest chunk of cases by looking at 608 * event->total_time_running. An event that has accrued runtime cannot 609 * be 'new'. Conversely, a new event can get installed through the 610 * context switch path for the first time. 611 */ 612 if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0) 613 cpuc->lbr_pebs_users++; 614 perf_sched_cb_inc(event->pmu); 615 if (!cpuc->lbr_users++ && !event->total_time_running) 616 intel_pmu_lbr_reset(); 617 } 618 619 void release_lbr_buffers(void) 620 { 621 struct kmem_cache *kmem_cache; 622 struct cpu_hw_events *cpuc; 623 int cpu; 624 625 if (!static_cpu_has(X86_FEATURE_ARCH_LBR)) 626 return; 627 628 for_each_possible_cpu(cpu) { 629 cpuc = per_cpu_ptr(&cpu_hw_events, cpu); 630 kmem_cache = x86_get_pmu(cpu)->task_ctx_cache; 631 if (kmem_cache && cpuc->lbr_xsave) { 632 kmem_cache_free(kmem_cache, cpuc->lbr_xsave); 633 cpuc->lbr_xsave = NULL; 634 } 635 } 636 } 637 638 void reserve_lbr_buffers(void) 639 { 640 struct kmem_cache *kmem_cache; 641 struct cpu_hw_events *cpuc; 642 int cpu; 643 644 if (!static_cpu_has(X86_FEATURE_ARCH_LBR)) 645 return; 646 647 for_each_possible_cpu(cpu) { 648 cpuc = per_cpu_ptr(&cpu_hw_events, cpu); 649 kmem_cache = x86_get_pmu(cpu)->task_ctx_cache; 650 if (!kmem_cache || cpuc->lbr_xsave) 651 continue; 652 653 cpuc->lbr_xsave = kmem_cache_alloc_node(kmem_cache, 654 GFP_KERNEL | __GFP_ZERO, 655 cpu_to_node(cpu)); 656 } 657 } 658 659 void intel_pmu_lbr_del(struct perf_event *event) 660 { 661 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 662 663 if (!x86_pmu.lbr_nr) 664 return; 665 666 if (branch_user_callstack(cpuc->br_sel)) { 667 if (event->attach_state & PERF_ATTACH_TASK) { 668 struct task_struct *task = event->hw.target; 669 struct perf_ctx_data *ctx_data; 670 671 rcu_read_lock(); 672 ctx_data = rcu_dereference(task->perf_ctx_data); 673 if (ctx_data) 674 task_context_opt(ctx_data->data)->lbr_callstack_users--; 675 rcu_read_unlock(); 676 } else 677 x86_pmu.lbr_callstack_users--; 678 } 679 680 if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT) 681 cpuc->lbr_select = 0; 682 683 if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0) 684 cpuc->lbr_pebs_users--; 685 cpuc->lbr_users--; 686 WARN_ON_ONCE(cpuc->lbr_users < 0); 687 WARN_ON_ONCE(cpuc->lbr_pebs_users < 0); 688 perf_sched_cb_dec(event->pmu); 689 690 /* 691 * The logged occurrences information is only valid for the 692 * current LBR group. If another LBR group is scheduled in 693 * later, the information from the stale LBRs will be wrongly 694 * interpreted. Reset the LBRs here. 695 * 696 * Only clear once for a branch counter group with the leader 697 * event. Because 698 * - Cannot simply reset the LBRs with the !cpuc->lbr_users. 699 * Because it's possible that the last LBR user is not in a 700 * branch counter group, e.g., a branch_counters group + 701 * several normal LBR events. 702 * - The LBR reset can be done with any one of the events in a 703 * branch counter group, since they are always scheduled together. 704 * It's easy to force the leader event an LBR event. 705 */ 706 if (is_branch_counters_group(event) && event == event->group_leader) 707 intel_pmu_lbr_reset(); 708 } 709 710 static inline bool vlbr_exclude_host(void) 711 { 712 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 713 714 return test_bit(INTEL_PMC_IDX_FIXED_VLBR, 715 (unsigned long *)&cpuc->intel_ctrl_guest_mask); 716 } 717 718 void intel_pmu_lbr_enable_all(bool pmi) 719 { 720 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 721 722 if (cpuc->lbr_users && !vlbr_exclude_host()) 723 __intel_pmu_lbr_enable(pmi); 724 } 725 726 void intel_pmu_lbr_disable_all(void) 727 { 728 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 729 730 if (cpuc->lbr_users && !vlbr_exclude_host()) { 731 if (static_cpu_has(X86_FEATURE_ARCH_LBR)) 732 return __intel_pmu_arch_lbr_disable(); 733 734 __intel_pmu_lbr_disable(); 735 } 736 } 737 738 void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) 739 { 740 unsigned long mask = x86_pmu.lbr_nr - 1; 741 struct perf_branch_entry *br = cpuc->lbr_entries; 742 u64 tos = intel_pmu_lbr_tos(); 743 int i; 744 745 for (i = 0; i < x86_pmu.lbr_nr; i++) { 746 unsigned long lbr_idx = (tos - i) & mask; 747 union { 748 struct { 749 u32 from; 750 u32 to; 751 }; 752 u64 lbr; 753 } msr_lastbranch; 754 755 rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); 756 757 perf_clear_branch_entry_bitfields(br); 758 759 br->from = msr_lastbranch.from; 760 br->to = msr_lastbranch.to; 761 br++; 762 } 763 cpuc->lbr_stack.nr = i; 764 cpuc->lbr_stack.hw_idx = tos; 765 } 766 767 /* 768 * Due to lack of segmentation in Linux the effective address (offset) 769 * is the same as the linear address, allowing us to merge the LIP and EIP 770 * LBR formats. 771 */ 772 void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) 773 { 774 bool need_info = false, call_stack = false; 775 unsigned long mask = x86_pmu.lbr_nr - 1; 776 struct perf_branch_entry *br = cpuc->lbr_entries; 777 u64 tos = intel_pmu_lbr_tos(); 778 int i; 779 int out = 0; 780 int num = x86_pmu.lbr_nr; 781 782 if (cpuc->lbr_sel) { 783 need_info = !(cpuc->lbr_sel->config & LBR_NO_INFO); 784 if (cpuc->lbr_sel->config & LBR_CALL_STACK) 785 call_stack = true; 786 } 787 788 for (i = 0; i < num; i++) { 789 unsigned long lbr_idx = (tos - i) & mask; 790 u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0; 791 u16 cycles = 0; 792 793 from = rdlbr_from(lbr_idx, NULL); 794 to = rdlbr_to(lbr_idx, NULL); 795 796 /* 797 * Read LBR call stack entries 798 * until invalid entry (0s) is detected. 799 */ 800 if (call_stack && !from) 801 break; 802 803 if (x86_pmu.lbr_has_info) { 804 if (need_info) { 805 u64 info; 806 807 info = rdlbr_info(lbr_idx, NULL); 808 mis = !!(info & LBR_INFO_MISPRED); 809 pred = !mis; 810 cycles = (info & LBR_INFO_CYCLES); 811 if (x86_pmu.lbr_has_tsx) { 812 in_tx = !!(info & LBR_INFO_IN_TX); 813 abort = !!(info & LBR_INFO_ABORT); 814 } 815 } 816 } else { 817 int skip = 0; 818 819 if (x86_pmu.lbr_from_flags) { 820 mis = !!(from & LBR_FROM_FLAG_MISPRED); 821 pred = !mis; 822 skip = 1; 823 } 824 if (x86_pmu.lbr_has_tsx) { 825 in_tx = !!(from & LBR_FROM_FLAG_IN_TX); 826 abort = !!(from & LBR_FROM_FLAG_ABORT); 827 skip = 3; 828 } 829 from = (u64)((((s64)from) << skip) >> skip); 830 831 if (x86_pmu.lbr_to_cycles) { 832 cycles = ((to >> 48) & LBR_INFO_CYCLES); 833 to = (u64)((((s64)to) << 16) >> 16); 834 } 835 } 836 837 /* 838 * Some CPUs report duplicated abort records, 839 * with the second entry not having an abort bit set. 840 * Skip them here. This loop runs backwards, 841 * so we need to undo the previous record. 842 * If the abort just happened outside the window 843 * the extra entry cannot be removed. 844 */ 845 if (abort && x86_pmu.lbr_double_abort && out > 0) 846 out--; 847 848 perf_clear_branch_entry_bitfields(br+out); 849 br[out].from = from; 850 br[out].to = to; 851 br[out].mispred = mis; 852 br[out].predicted = pred; 853 br[out].in_tx = in_tx; 854 br[out].abort = abort; 855 br[out].cycles = cycles; 856 out++; 857 } 858 cpuc->lbr_stack.nr = out; 859 cpuc->lbr_stack.hw_idx = tos; 860 } 861 862 static DEFINE_STATIC_KEY_FALSE(x86_lbr_mispred); 863 static DEFINE_STATIC_KEY_FALSE(x86_lbr_cycles); 864 static DEFINE_STATIC_KEY_FALSE(x86_lbr_type); 865 866 static __always_inline int get_lbr_br_type(u64 info) 867 { 868 int type = 0; 869 870 if (static_branch_likely(&x86_lbr_type)) 871 type = (info & LBR_INFO_BR_TYPE) >> LBR_INFO_BR_TYPE_OFFSET; 872 873 return type; 874 } 875 876 static __always_inline bool get_lbr_mispred(u64 info) 877 { 878 bool mispred = 0; 879 880 if (static_branch_likely(&x86_lbr_mispred)) 881 mispred = !!(info & LBR_INFO_MISPRED); 882 883 return mispred; 884 } 885 886 static __always_inline u16 get_lbr_cycles(u64 info) 887 { 888 u16 cycles = info & LBR_INFO_CYCLES; 889 890 if (static_cpu_has(X86_FEATURE_ARCH_LBR) && 891 (!static_branch_likely(&x86_lbr_cycles) || 892 !(info & LBR_INFO_CYC_CNT_VALID))) 893 cycles = 0; 894 895 return cycles; 896 } 897 898 static_assert((64 - PERF_BRANCH_ENTRY_INFO_BITS_MAX) > LBR_INFO_BR_CNTR_NUM * LBR_INFO_BR_CNTR_BITS); 899 900 static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc, 901 struct lbr_entry *entries) 902 { 903 struct perf_branch_entry *e; 904 struct lbr_entry *lbr; 905 u64 from, to, info; 906 int i; 907 908 for (i = 0; i < x86_pmu.lbr_nr; i++) { 909 lbr = entries ? &entries[i] : NULL; 910 e = &cpuc->lbr_entries[i]; 911 912 from = rdlbr_from(i, lbr); 913 /* 914 * Read LBR entries until invalid entry (0s) is detected. 915 */ 916 if (!from) 917 break; 918 919 to = rdlbr_to(i, lbr); 920 info = rdlbr_info(i, lbr); 921 922 perf_clear_branch_entry_bitfields(e); 923 924 e->from = from; 925 e->to = to; 926 e->mispred = get_lbr_mispred(info); 927 e->predicted = !e->mispred; 928 e->in_tx = !!(info & LBR_INFO_IN_TX); 929 e->abort = !!(info & LBR_INFO_ABORT); 930 e->cycles = get_lbr_cycles(info); 931 e->type = get_lbr_br_type(info); 932 933 /* 934 * Leverage the reserved field of cpuc->lbr_entries[i] to 935 * temporarily store the branch counters information. 936 * The later code will decide what content can be disclosed 937 * to the perf tool. Pleae see intel_pmu_lbr_counters_reorder(). 938 */ 939 e->reserved = (info >> LBR_INFO_BR_CNTR_OFFSET) & LBR_INFO_BR_CNTR_FULL_MASK; 940 } 941 942 cpuc->lbr_stack.nr = i; 943 } 944 945 /* 946 * The enabled order may be different from the counter order. 947 * Update the lbr_counters with the enabled order. 948 */ 949 static void intel_pmu_lbr_counters_reorder(struct cpu_hw_events *cpuc, 950 struct perf_event *event) 951 { 952 int i, j, pos = 0, order[X86_PMC_IDX_MAX]; 953 struct perf_event *leader, *sibling; 954 u64 src, dst, cnt; 955 956 leader = event->group_leader; 957 if (branch_sample_counters(leader)) 958 order[pos++] = leader->hw.idx; 959 960 for_each_sibling_event(sibling, leader) { 961 if (!branch_sample_counters(sibling)) 962 continue; 963 order[pos++] = sibling->hw.idx; 964 } 965 966 WARN_ON_ONCE(!pos); 967 968 for (i = 0; i < cpuc->lbr_stack.nr; i++) { 969 src = cpuc->lbr_entries[i].reserved; 970 dst = 0; 971 for (j = 0; j < pos; j++) { 972 cnt = (src >> (order[j] * LBR_INFO_BR_CNTR_BITS)) & LBR_INFO_BR_CNTR_MASK; 973 dst |= cnt << j * LBR_INFO_BR_CNTR_BITS; 974 } 975 cpuc->lbr_counters[i] = dst; 976 cpuc->lbr_entries[i].reserved = 0; 977 } 978 } 979 980 void intel_pmu_lbr_save_brstack(struct perf_sample_data *data, 981 struct cpu_hw_events *cpuc, 982 struct perf_event *event) 983 { 984 if (is_branch_counters_group(event)) { 985 intel_pmu_lbr_counters_reorder(cpuc, event); 986 perf_sample_save_brstack(data, event, &cpuc->lbr_stack, cpuc->lbr_counters); 987 return; 988 } 989 990 perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL); 991 } 992 993 static void intel_pmu_arch_lbr_read(struct cpu_hw_events *cpuc) 994 { 995 intel_pmu_store_lbr(cpuc, NULL); 996 } 997 998 static void intel_pmu_arch_lbr_read_xsave(struct cpu_hw_events *cpuc) 999 { 1000 struct x86_perf_task_context_arch_lbr_xsave *xsave = cpuc->lbr_xsave; 1001 1002 if (!xsave) { 1003 intel_pmu_store_lbr(cpuc, NULL); 1004 return; 1005 } 1006 xsaves(&xsave->xsave, XFEATURE_MASK_LBR); 1007 1008 intel_pmu_store_lbr(cpuc, xsave->lbr.entries); 1009 } 1010 1011 void intel_pmu_lbr_read(void) 1012 { 1013 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 1014 1015 /* 1016 * Don't read when all LBRs users are using adaptive PEBS. 1017 * 1018 * This could be smarter and actually check the event, 1019 * but this simple approach seems to work for now. 1020 */ 1021 if (!cpuc->lbr_users || vlbr_exclude_host() || 1022 cpuc->lbr_users == cpuc->lbr_pebs_users) 1023 return; 1024 1025 x86_pmu.lbr_read(cpuc); 1026 1027 intel_pmu_lbr_filter(cpuc); 1028 } 1029 1030 /* 1031 * SW filter is used: 1032 * - in case there is no HW filter 1033 * - in case the HW filter has errata or limitations 1034 */ 1035 static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event) 1036 { 1037 u64 br_type = event->attr.branch_sample_type; 1038 int mask = 0; 1039 1040 if (br_type & PERF_SAMPLE_BRANCH_USER) 1041 mask |= X86_BR_USER; 1042 1043 if (br_type & PERF_SAMPLE_BRANCH_KERNEL) 1044 mask |= X86_BR_KERNEL; 1045 1046 /* we ignore BRANCH_HV here */ 1047 1048 if (br_type & PERF_SAMPLE_BRANCH_ANY) 1049 mask |= X86_BR_ANY; 1050 1051 if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL) 1052 mask |= X86_BR_ANY_CALL; 1053 1054 if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN) 1055 mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET; 1056 1057 if (br_type & PERF_SAMPLE_BRANCH_IND_CALL) 1058 mask |= X86_BR_IND_CALL; 1059 1060 if (br_type & PERF_SAMPLE_BRANCH_ABORT_TX) 1061 mask |= X86_BR_ABORT; 1062 1063 if (br_type & PERF_SAMPLE_BRANCH_IN_TX) 1064 mask |= X86_BR_IN_TX; 1065 1066 if (br_type & PERF_SAMPLE_BRANCH_NO_TX) 1067 mask |= X86_BR_NO_TX; 1068 1069 if (br_type & PERF_SAMPLE_BRANCH_COND) 1070 mask |= X86_BR_JCC; 1071 1072 if (br_type & PERF_SAMPLE_BRANCH_CALL_STACK) { 1073 if (!x86_pmu_has_lbr_callstack()) 1074 return -EOPNOTSUPP; 1075 if (mask & ~(X86_BR_USER | X86_BR_KERNEL)) 1076 return -EINVAL; 1077 mask |= X86_BR_CALL | X86_BR_IND_CALL | X86_BR_RET | 1078 X86_BR_CALL_STACK; 1079 } 1080 1081 if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP) 1082 mask |= X86_BR_IND_JMP; 1083 1084 if (br_type & PERF_SAMPLE_BRANCH_CALL) 1085 mask |= X86_BR_CALL | X86_BR_ZERO_CALL; 1086 1087 if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE) 1088 mask |= X86_BR_TYPE_SAVE; 1089 1090 /* 1091 * stash actual user request into reg, it may 1092 * be used by fixup code for some CPU 1093 */ 1094 event->hw.branch_reg.reg = mask; 1095 return 0; 1096 } 1097 1098 /* 1099 * setup the HW LBR filter 1100 * Used only when available, may not be enough to disambiguate 1101 * all branches, may need the help of the SW filter 1102 */ 1103 static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) 1104 { 1105 struct hw_perf_event_extra *reg; 1106 u64 br_type = event->attr.branch_sample_type; 1107 u64 mask = 0, v; 1108 int i; 1109 1110 for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) { 1111 if (!(br_type & (1ULL << i))) 1112 continue; 1113 1114 v = x86_pmu.lbr_sel_map[i]; 1115 if (v == LBR_NOT_SUPP) 1116 return -EOPNOTSUPP; 1117 1118 if (v != LBR_IGN) 1119 mask |= v; 1120 } 1121 1122 reg = &event->hw.branch_reg; 1123 reg->idx = EXTRA_REG_LBR; 1124 1125 if (static_cpu_has(X86_FEATURE_ARCH_LBR)) { 1126 reg->config = mask; 1127 1128 /* 1129 * The Arch LBR HW can retrieve the common branch types 1130 * from the LBR_INFO. It doesn't require the high overhead 1131 * SW disassemble. 1132 * Enable the branch type by default for the Arch LBR. 1133 */ 1134 reg->reg |= X86_BR_TYPE_SAVE; 1135 return 0; 1136 } 1137 1138 /* 1139 * The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate 1140 * in suppress mode. So LBR_SELECT should be set to 1141 * (~mask & LBR_SEL_MASK) | (mask & ~LBR_SEL_MASK) 1142 * But the 10th bit LBR_CALL_STACK does not operate 1143 * in suppress mode. 1144 */ 1145 reg->config = mask ^ (x86_pmu.lbr_sel_mask & ~LBR_CALL_STACK); 1146 1147 if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) && 1148 (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) && 1149 x86_pmu.lbr_has_info) 1150 reg->config |= LBR_NO_INFO; 1151 1152 return 0; 1153 } 1154 1155 int intel_pmu_setup_lbr_filter(struct perf_event *event) 1156 { 1157 int ret = 0; 1158 1159 /* 1160 * no LBR on this PMU 1161 */ 1162 if (!x86_pmu.lbr_nr) 1163 return -EOPNOTSUPP; 1164 1165 /* 1166 * setup SW LBR filter 1167 */ 1168 ret = intel_pmu_setup_sw_lbr_filter(event); 1169 if (ret) 1170 return ret; 1171 1172 /* 1173 * setup HW LBR filter, if any 1174 */ 1175 if (x86_pmu.lbr_sel_map) 1176 ret = intel_pmu_setup_hw_lbr_filter(event); 1177 1178 return ret; 1179 } 1180 1181 enum { 1182 ARCH_LBR_BR_TYPE_JCC = 0, 1183 ARCH_LBR_BR_TYPE_NEAR_IND_JMP = 1, 1184 ARCH_LBR_BR_TYPE_NEAR_REL_JMP = 2, 1185 ARCH_LBR_BR_TYPE_NEAR_IND_CALL = 3, 1186 ARCH_LBR_BR_TYPE_NEAR_REL_CALL = 4, 1187 ARCH_LBR_BR_TYPE_NEAR_RET = 5, 1188 ARCH_LBR_BR_TYPE_KNOWN_MAX = ARCH_LBR_BR_TYPE_NEAR_RET, 1189 1190 ARCH_LBR_BR_TYPE_MAP_MAX = 16, 1191 }; 1192 1193 static const int arch_lbr_br_type_map[ARCH_LBR_BR_TYPE_MAP_MAX] = { 1194 [ARCH_LBR_BR_TYPE_JCC] = X86_BR_JCC, 1195 [ARCH_LBR_BR_TYPE_NEAR_IND_JMP] = X86_BR_IND_JMP, 1196 [ARCH_LBR_BR_TYPE_NEAR_REL_JMP] = X86_BR_JMP, 1197 [ARCH_LBR_BR_TYPE_NEAR_IND_CALL] = X86_BR_IND_CALL, 1198 [ARCH_LBR_BR_TYPE_NEAR_REL_CALL] = X86_BR_CALL, 1199 [ARCH_LBR_BR_TYPE_NEAR_RET] = X86_BR_RET, 1200 }; 1201 1202 /* 1203 * implement actual branch filter based on user demand. 1204 * Hardware may not exactly satisfy that request, thus 1205 * we need to inspect opcodes. Mismatched branches are 1206 * discarded. Therefore, the number of branches returned 1207 * in PERF_SAMPLE_BRANCH_STACK sample may vary. 1208 */ 1209 static void 1210 intel_pmu_lbr_filter(struct cpu_hw_events *cpuc) 1211 { 1212 u64 from, to; 1213 int br_sel = cpuc->br_sel; 1214 int i, j, type, to_plm; 1215 bool compress = false; 1216 1217 /* if sampling all branches, then nothing to filter */ 1218 if (((br_sel & X86_BR_ALL) == X86_BR_ALL) && 1219 ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE)) 1220 return; 1221 1222 for (i = 0; i < cpuc->lbr_stack.nr; i++) { 1223 1224 from = cpuc->lbr_entries[i].from; 1225 to = cpuc->lbr_entries[i].to; 1226 type = cpuc->lbr_entries[i].type; 1227 1228 /* 1229 * Parse the branch type recorded in LBR_x_INFO MSR. 1230 * Doesn't support OTHER_BRANCH decoding for now. 1231 * OTHER_BRANCH branch type still rely on software decoding. 1232 */ 1233 if (static_cpu_has(X86_FEATURE_ARCH_LBR) && 1234 type <= ARCH_LBR_BR_TYPE_KNOWN_MAX) { 1235 to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER; 1236 type = arch_lbr_br_type_map[type] | to_plm; 1237 } else 1238 type = branch_type(from, to, cpuc->lbr_entries[i].abort); 1239 if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) { 1240 if (cpuc->lbr_entries[i].in_tx) 1241 type |= X86_BR_IN_TX; 1242 else 1243 type |= X86_BR_NO_TX; 1244 } 1245 1246 /* if type does not correspond, then discard */ 1247 if (type == X86_BR_NONE || (br_sel & type) != type) { 1248 cpuc->lbr_entries[i].from = 0; 1249 compress = true; 1250 } 1251 1252 if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE) 1253 cpuc->lbr_entries[i].type = common_branch_type(type); 1254 } 1255 1256 if (!compress) 1257 return; 1258 1259 /* remove all entries with from=0 */ 1260 for (i = 0; i < cpuc->lbr_stack.nr; ) { 1261 if (!cpuc->lbr_entries[i].from) { 1262 j = i; 1263 while (++j < cpuc->lbr_stack.nr) { 1264 cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j]; 1265 cpuc->lbr_counters[j-1] = cpuc->lbr_counters[j]; 1266 } 1267 cpuc->lbr_stack.nr--; 1268 if (!cpuc->lbr_entries[i].from) 1269 continue; 1270 } 1271 i++; 1272 } 1273 } 1274 1275 void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr) 1276 { 1277 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 1278 1279 /* Cannot get TOS for large PEBS and Arch LBR */ 1280 if (static_cpu_has(X86_FEATURE_ARCH_LBR) || 1281 (cpuc->n_pebs == cpuc->n_large_pebs)) 1282 cpuc->lbr_stack.hw_idx = -1ULL; 1283 else 1284 cpuc->lbr_stack.hw_idx = intel_pmu_lbr_tos(); 1285 1286 intel_pmu_store_lbr(cpuc, lbr); 1287 intel_pmu_lbr_filter(cpuc); 1288 } 1289 1290 /* 1291 * Map interface branch filters onto LBR filters 1292 */ 1293 static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { 1294 [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, 1295 [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, 1296 [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, 1297 [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN, 1298 [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_REL_JMP 1299 | LBR_IND_JMP | LBR_FAR, 1300 /* 1301 * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches 1302 */ 1303 [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = 1304 LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR, 1305 /* 1306 * NHM/WSM erratum: must include IND_JMP to capture IND_CALL 1307 */ 1308 [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL | LBR_IND_JMP, 1309 [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, 1310 [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP, 1311 }; 1312 1313 static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { 1314 [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, 1315 [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, 1316 [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, 1317 [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN, 1318 [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR, 1319 [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL 1320 | LBR_FAR, 1321 [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL, 1322 [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, 1323 [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP, 1324 [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL, 1325 }; 1326 1327 static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { 1328 [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, 1329 [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, 1330 [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, 1331 [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN, 1332 [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR, 1333 [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL 1334 | LBR_FAR, 1335 [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL, 1336 [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, 1337 [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_REL_CALL | LBR_IND_CALL 1338 | LBR_RETURN | LBR_CALL_STACK, 1339 [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP, 1340 [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL, 1341 }; 1342 1343 static int arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { 1344 [PERF_SAMPLE_BRANCH_ANY_SHIFT] = ARCH_LBR_ANY, 1345 [PERF_SAMPLE_BRANCH_USER_SHIFT] = ARCH_LBR_USER, 1346 [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = ARCH_LBR_KERNEL, 1347 [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN, 1348 [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = ARCH_LBR_RETURN | 1349 ARCH_LBR_OTHER_BRANCH, 1350 [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = ARCH_LBR_REL_CALL | 1351 ARCH_LBR_IND_CALL | 1352 ARCH_LBR_OTHER_BRANCH, 1353 [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = ARCH_LBR_IND_CALL, 1354 [PERF_SAMPLE_BRANCH_COND_SHIFT] = ARCH_LBR_JCC, 1355 [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = ARCH_LBR_REL_CALL | 1356 ARCH_LBR_IND_CALL | 1357 ARCH_LBR_RETURN | 1358 ARCH_LBR_CALL_STACK, 1359 [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = ARCH_LBR_IND_JMP, 1360 [PERF_SAMPLE_BRANCH_CALL_SHIFT] = ARCH_LBR_REL_CALL, 1361 }; 1362 1363 /* core */ 1364 void __init intel_pmu_lbr_init_core(void) 1365 { 1366 x86_pmu.lbr_nr = 4; 1367 x86_pmu.lbr_tos = MSR_LBR_TOS; 1368 x86_pmu.lbr_from = MSR_LBR_CORE_FROM; 1369 x86_pmu.lbr_to = MSR_LBR_CORE_TO; 1370 1371 /* 1372 * SW branch filter usage: 1373 * - compensate for lack of HW filter 1374 */ 1375 } 1376 1377 /* nehalem/westmere */ 1378 void __init intel_pmu_lbr_init_nhm(void) 1379 { 1380 x86_pmu.lbr_nr = 16; 1381 x86_pmu.lbr_tos = MSR_LBR_TOS; 1382 x86_pmu.lbr_from = MSR_LBR_NHM_FROM; 1383 x86_pmu.lbr_to = MSR_LBR_NHM_TO; 1384 1385 x86_pmu.lbr_sel_mask = LBR_SEL_MASK; 1386 x86_pmu.lbr_sel_map = nhm_lbr_sel_map; 1387 1388 /* 1389 * SW branch filter usage: 1390 * - workaround LBR_SEL errata (see above) 1391 * - support syscall, sysret capture. 1392 * That requires LBR_FAR but that means far 1393 * jmp need to be filtered out 1394 */ 1395 } 1396 1397 /* sandy bridge */ 1398 void __init intel_pmu_lbr_init_snb(void) 1399 { 1400 x86_pmu.lbr_nr = 16; 1401 x86_pmu.lbr_tos = MSR_LBR_TOS; 1402 x86_pmu.lbr_from = MSR_LBR_NHM_FROM; 1403 x86_pmu.lbr_to = MSR_LBR_NHM_TO; 1404 1405 x86_pmu.lbr_sel_mask = LBR_SEL_MASK; 1406 x86_pmu.lbr_sel_map = snb_lbr_sel_map; 1407 1408 /* 1409 * SW branch filter usage: 1410 * - support syscall, sysret capture. 1411 * That requires LBR_FAR but that means far 1412 * jmp need to be filtered out 1413 */ 1414 } 1415 1416 static inline struct kmem_cache * 1417 create_lbr_kmem_cache(size_t size, size_t align) 1418 { 1419 return kmem_cache_create("x86_lbr", size, align, 0, NULL); 1420 } 1421 1422 /* haswell */ 1423 void intel_pmu_lbr_init_hsw(void) 1424 { 1425 size_t size = sizeof(struct x86_perf_task_context); 1426 1427 x86_pmu.lbr_nr = 16; 1428 x86_pmu.lbr_tos = MSR_LBR_TOS; 1429 x86_pmu.lbr_from = MSR_LBR_NHM_FROM; 1430 x86_pmu.lbr_to = MSR_LBR_NHM_TO; 1431 1432 x86_pmu.lbr_sel_mask = LBR_SEL_MASK; 1433 x86_pmu.lbr_sel_map = hsw_lbr_sel_map; 1434 1435 x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0); 1436 } 1437 1438 /* skylake */ 1439 __init void intel_pmu_lbr_init_skl(void) 1440 { 1441 size_t size = sizeof(struct x86_perf_task_context); 1442 1443 x86_pmu.lbr_nr = 32; 1444 x86_pmu.lbr_tos = MSR_LBR_TOS; 1445 x86_pmu.lbr_from = MSR_LBR_NHM_FROM; 1446 x86_pmu.lbr_to = MSR_LBR_NHM_TO; 1447 x86_pmu.lbr_info = MSR_LBR_INFO_0; 1448 1449 x86_pmu.lbr_sel_mask = LBR_SEL_MASK; 1450 x86_pmu.lbr_sel_map = hsw_lbr_sel_map; 1451 1452 x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0); 1453 1454 /* 1455 * SW branch filter usage: 1456 * - support syscall, sysret capture. 1457 * That requires LBR_FAR but that means far 1458 * jmp need to be filtered out 1459 */ 1460 } 1461 1462 /* atom */ 1463 void __init intel_pmu_lbr_init_atom(void) 1464 { 1465 /* 1466 * only models starting at stepping 10 seems 1467 * to have an operational LBR which can freeze 1468 * on PMU interrupt 1469 */ 1470 if (boot_cpu_data.x86_vfm == INTEL_ATOM_BONNELL 1471 && boot_cpu_data.x86_stepping < 10) { 1472 pr_cont("LBR disabled due to erratum"); 1473 return; 1474 } 1475 1476 x86_pmu.lbr_nr = 8; 1477 x86_pmu.lbr_tos = MSR_LBR_TOS; 1478 x86_pmu.lbr_from = MSR_LBR_CORE_FROM; 1479 x86_pmu.lbr_to = MSR_LBR_CORE_TO; 1480 1481 /* 1482 * SW branch filter usage: 1483 * - compensate for lack of HW filter 1484 */ 1485 } 1486 1487 /* slm */ 1488 void __init intel_pmu_lbr_init_slm(void) 1489 { 1490 x86_pmu.lbr_nr = 8; 1491 x86_pmu.lbr_tos = MSR_LBR_TOS; 1492 x86_pmu.lbr_from = MSR_LBR_CORE_FROM; 1493 x86_pmu.lbr_to = MSR_LBR_CORE_TO; 1494 1495 x86_pmu.lbr_sel_mask = LBR_SEL_MASK; 1496 x86_pmu.lbr_sel_map = nhm_lbr_sel_map; 1497 1498 /* 1499 * SW branch filter usage: 1500 * - compensate for lack of HW filter 1501 */ 1502 pr_cont("8-deep LBR, "); 1503 } 1504 1505 /* Knights Landing */ 1506 void intel_pmu_lbr_init_knl(void) 1507 { 1508 x86_pmu.lbr_nr = 8; 1509 x86_pmu.lbr_tos = MSR_LBR_TOS; 1510 x86_pmu.lbr_from = MSR_LBR_NHM_FROM; 1511 x86_pmu.lbr_to = MSR_LBR_NHM_TO; 1512 1513 x86_pmu.lbr_sel_mask = LBR_SEL_MASK; 1514 x86_pmu.lbr_sel_map = snb_lbr_sel_map; 1515 1516 /* Knights Landing does have MISPREDICT bit */ 1517 if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_LIP) 1518 x86_pmu.intel_cap.lbr_format = LBR_FORMAT_EIP_FLAGS; 1519 } 1520 1521 void intel_pmu_lbr_init(void) 1522 { 1523 switch (x86_pmu.intel_cap.lbr_format) { 1524 case LBR_FORMAT_EIP_FLAGS2: 1525 x86_pmu.lbr_has_tsx = 1; 1526 x86_pmu.lbr_from_flags = 1; 1527 if (lbr_from_signext_quirk_needed()) 1528 static_branch_enable(&lbr_from_quirk_key); 1529 break; 1530 1531 case LBR_FORMAT_EIP_FLAGS: 1532 x86_pmu.lbr_from_flags = 1; 1533 break; 1534 1535 case LBR_FORMAT_INFO: 1536 x86_pmu.lbr_has_tsx = 1; 1537 fallthrough; 1538 case LBR_FORMAT_INFO2: 1539 x86_pmu.lbr_has_info = 1; 1540 break; 1541 1542 case LBR_FORMAT_TIME: 1543 x86_pmu.lbr_from_flags = 1; 1544 x86_pmu.lbr_to_cycles = 1; 1545 break; 1546 } 1547 1548 if (x86_pmu.lbr_has_info) { 1549 /* 1550 * Only used in combination with baseline pebs. 1551 */ 1552 static_branch_enable(&x86_lbr_mispred); 1553 static_branch_enable(&x86_lbr_cycles); 1554 } 1555 } 1556 1557 /* 1558 * LBR state size is variable based on the max number of registers. 1559 * This calculates the expected state size, which should match 1560 * what the hardware enumerates for the size of XFEATURE_LBR. 1561 */ 1562 static inline unsigned int get_lbr_state_size(void) 1563 { 1564 return sizeof(struct arch_lbr_state) + 1565 x86_pmu.lbr_nr * sizeof(struct lbr_entry); 1566 } 1567 1568 static bool is_arch_lbr_xsave_available(void) 1569 { 1570 if (!boot_cpu_has(X86_FEATURE_XSAVES)) 1571 return false; 1572 1573 /* 1574 * Check the LBR state with the corresponding software structure. 1575 * Disable LBR XSAVES support if the size doesn't match. 1576 */ 1577 if (xfeature_size(XFEATURE_LBR) == 0) 1578 return false; 1579 1580 if (WARN_ON(xfeature_size(XFEATURE_LBR) != get_lbr_state_size())) 1581 return false; 1582 1583 return true; 1584 } 1585 1586 void __init intel_pmu_arch_lbr_init(void) 1587 { 1588 struct pmu *pmu = x86_get_pmu(smp_processor_id()); 1589 union cpuid28_eax eax; 1590 union cpuid28_ebx ebx; 1591 union cpuid28_ecx ecx; 1592 unsigned int unused_edx; 1593 bool arch_lbr_xsave; 1594 size_t size; 1595 u64 lbr_nr; 1596 1597 /* Arch LBR Capabilities */ 1598 cpuid(28, &eax.full, &ebx.full, &ecx.full, &unused_edx); 1599 1600 lbr_nr = fls(eax.split.lbr_depth_mask) * 8; 1601 if (!lbr_nr) 1602 goto clear_arch_lbr; 1603 1604 /* Apply the max depth of Arch LBR */ 1605 if (wrmsrl_safe(MSR_ARCH_LBR_DEPTH, lbr_nr)) 1606 goto clear_arch_lbr; 1607 1608 x86_pmu.lbr_depth_mask = eax.split.lbr_depth_mask; 1609 x86_pmu.lbr_deep_c_reset = eax.split.lbr_deep_c_reset; 1610 x86_pmu.lbr_lip = eax.split.lbr_lip; 1611 x86_pmu.lbr_cpl = ebx.split.lbr_cpl; 1612 x86_pmu.lbr_filter = ebx.split.lbr_filter; 1613 x86_pmu.lbr_call_stack = ebx.split.lbr_call_stack; 1614 x86_pmu.lbr_mispred = ecx.split.lbr_mispred; 1615 x86_pmu.lbr_timed_lbr = ecx.split.lbr_timed_lbr; 1616 x86_pmu.lbr_br_type = ecx.split.lbr_br_type; 1617 x86_pmu.lbr_counters = ecx.split.lbr_counters; 1618 x86_pmu.lbr_nr = lbr_nr; 1619 1620 if (!!x86_pmu.lbr_counters) 1621 x86_pmu.flags |= PMU_FL_BR_CNTR; 1622 1623 if (x86_pmu.lbr_mispred) 1624 static_branch_enable(&x86_lbr_mispred); 1625 if (x86_pmu.lbr_timed_lbr) 1626 static_branch_enable(&x86_lbr_cycles); 1627 if (x86_pmu.lbr_br_type) 1628 static_branch_enable(&x86_lbr_type); 1629 1630 arch_lbr_xsave = is_arch_lbr_xsave_available(); 1631 if (arch_lbr_xsave) { 1632 size = sizeof(struct x86_perf_task_context_arch_lbr_xsave) + 1633 get_lbr_state_size(); 1634 pmu->task_ctx_cache = create_lbr_kmem_cache(size, 1635 XSAVE_ALIGNMENT); 1636 } 1637 1638 if (!pmu->task_ctx_cache) { 1639 arch_lbr_xsave = false; 1640 1641 size = sizeof(struct x86_perf_task_context_arch_lbr) + 1642 lbr_nr * sizeof(struct lbr_entry); 1643 pmu->task_ctx_cache = create_lbr_kmem_cache(size, 0); 1644 } 1645 1646 x86_pmu.lbr_from = MSR_ARCH_LBR_FROM_0; 1647 x86_pmu.lbr_to = MSR_ARCH_LBR_TO_0; 1648 x86_pmu.lbr_info = MSR_ARCH_LBR_INFO_0; 1649 1650 /* LBR callstack requires both CPL and Branch Filtering support */ 1651 if (!x86_pmu.lbr_cpl || 1652 !x86_pmu.lbr_filter || 1653 !x86_pmu.lbr_call_stack) 1654 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_NOT_SUPP; 1655 1656 if (!x86_pmu.lbr_cpl) { 1657 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_NOT_SUPP; 1658 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_NOT_SUPP; 1659 } else if (!x86_pmu.lbr_filter) { 1660 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_NOT_SUPP; 1661 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_NOT_SUPP; 1662 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_NOT_SUPP; 1663 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_NOT_SUPP; 1664 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_NOT_SUPP; 1665 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_NOT_SUPP; 1666 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_NOT_SUPP; 1667 } 1668 1669 x86_pmu.lbr_ctl_mask = ARCH_LBR_CTL_MASK; 1670 x86_pmu.lbr_ctl_map = arch_lbr_ctl_map; 1671 1672 if (!x86_pmu.lbr_cpl && !x86_pmu.lbr_filter) 1673 x86_pmu.lbr_ctl_map = NULL; 1674 1675 x86_pmu.lbr_reset = intel_pmu_arch_lbr_reset; 1676 if (arch_lbr_xsave) { 1677 x86_pmu.lbr_save = intel_pmu_arch_lbr_xsaves; 1678 x86_pmu.lbr_restore = intel_pmu_arch_lbr_xrstors; 1679 x86_pmu.lbr_read = intel_pmu_arch_lbr_read_xsave; 1680 pr_cont("XSAVE "); 1681 } else { 1682 x86_pmu.lbr_save = intel_pmu_arch_lbr_save; 1683 x86_pmu.lbr_restore = intel_pmu_arch_lbr_restore; 1684 x86_pmu.lbr_read = intel_pmu_arch_lbr_read; 1685 } 1686 1687 pr_cont("Architectural LBR, "); 1688 1689 return; 1690 1691 clear_arch_lbr: 1692 setup_clear_cpu_cap(X86_FEATURE_ARCH_LBR); 1693 } 1694 1695 /** 1696 * x86_perf_get_lbr - get the LBR records information 1697 * 1698 * @lbr: the caller's memory to store the LBR records information 1699 */ 1700 void x86_perf_get_lbr(struct x86_pmu_lbr *lbr) 1701 { 1702 lbr->nr = x86_pmu.lbr_nr; 1703 lbr->from = x86_pmu.lbr_from; 1704 lbr->to = x86_pmu.lbr_to; 1705 lbr->info = x86_pmu.lbr_info; 1706 lbr->has_callstack = x86_pmu_has_lbr_callstack(); 1707 } 1708 EXPORT_SYMBOL_GPL(x86_perf_get_lbr); 1709 1710 struct event_constraint vlbr_constraint = 1711 __EVENT_CONSTRAINT(INTEL_FIXED_VLBR_EVENT, (1ULL << INTEL_PMC_IDX_FIXED_VLBR), 1712 FIXED_EVENT_FLAGS, 1, 0, PERF_X86_EVENT_LBR_SELECT); 1713