1 // SPDX-License-Identifier: GPL-2.0 2 #include "util/cgroup.h" 3 #include "util/debug.h" 4 #include "util/evlist.h" 5 #include "util/hashmap.h" 6 #include "util/machine.h" 7 #include "util/map.h" 8 #include "util/symbol.h" 9 #include "util/target.h" 10 #include "util/thread.h" 11 #include "util/thread_map.h" 12 #include "util/lock-contention.h" 13 #include <linux/zalloc.h> 14 #include <linux/string.h> 15 #include <api/fs/fs.h> 16 #include <bpf/bpf.h> 17 #include <bpf/btf.h> 18 #include <inttypes.h> 19 20 #include "bpf_skel/lock_contention.skel.h" 21 #include "bpf_skel/lock_data.h" 22 23 static struct lock_contention_bpf *skel; 24 static bool has_slab_iter; 25 static struct hashmap slab_hash; 26 27 static size_t slab_cache_hash(long key, void *ctx __maybe_unused) 28 { 29 return key; 30 } 31 32 static bool slab_cache_equal(long key1, long key2, void *ctx __maybe_unused) 33 { 34 return key1 == key2; 35 } 36 37 static void check_slab_cache_iter(struct lock_contention *con) 38 { 39 s32 ret; 40 41 hashmap__init(&slab_hash, slab_cache_hash, slab_cache_equal, /*ctx=*/NULL); 42 43 con->btf = btf__load_vmlinux_btf(); 44 if (con->btf == NULL) { 45 pr_debug("BTF loading failed: %s\n", strerror(errno)); 46 return; 47 } 48 49 ret = btf__find_by_name_kind(con->btf, "bpf_iter__kmem_cache", BTF_KIND_STRUCT); 50 if (ret < 0) { 51 bpf_program__set_autoload(skel->progs.slab_cache_iter, false); 52 pr_debug("slab cache iterator is not available: %d\n", ret); 53 return; 54 } 55 56 has_slab_iter = true; 57 58 bpf_map__set_max_entries(skel->maps.slab_caches, con->map_nr_entries); 59 } 60 61 static void run_slab_cache_iter(void) 62 { 63 int fd; 64 char buf[256]; 65 long key, *prev_key; 66 67 if (!has_slab_iter) 68 return; 69 70 fd = bpf_iter_create(bpf_link__fd(skel->links.slab_cache_iter)); 71 if (fd < 0) { 72 pr_debug("cannot create slab cache iter: %d\n", fd); 73 return; 74 } 75 76 /* This will run the bpf program */ 77 while (read(fd, buf, sizeof(buf)) > 0) 78 continue; 79 80 close(fd); 81 82 /* Read the slab cache map and build a hash with IDs */ 83 fd = bpf_map__fd(skel->maps.slab_caches); 84 prev_key = NULL; 85 while (!bpf_map_get_next_key(fd, prev_key, &key)) { 86 struct slab_cache_data *data; 87 88 data = malloc(sizeof(*data)); 89 if (data == NULL) 90 break; 91 92 if (bpf_map_lookup_elem(fd, &key, data) < 0) 93 break; 94 95 hashmap__add(&slab_hash, data->id, data); 96 prev_key = &key; 97 } 98 } 99 100 static void exit_slab_cache_iter(void) 101 { 102 struct hashmap_entry *cur; 103 unsigned bkt; 104 105 hashmap__for_each_entry(&slab_hash, cur, bkt) 106 free(cur->pvalue); 107 108 hashmap__clear(&slab_hash); 109 } 110 111 static void init_numa_data(struct lock_contention *con) 112 { 113 struct symbol *sym; 114 struct map *kmap; 115 char *buf = NULL, *p; 116 size_t len; 117 long last = -1; 118 int ret; 119 120 /* 121 * 'struct zone' is embedded in 'struct pglist_data' as an array. 122 * As we may not have full information of the struct zone in the 123 * (fake) vmlinux.h, let's get the actual size from BTF. 124 */ 125 ret = btf__find_by_name_kind(con->btf, "zone", BTF_KIND_STRUCT); 126 if (ret < 0) { 127 pr_debug("cannot get type of struct zone: %d\n", ret); 128 return; 129 } 130 131 ret = btf__resolve_size(con->btf, ret); 132 if (ret < 0) { 133 pr_debug("cannot get size of struct zone: %d\n", ret); 134 return; 135 } 136 skel->rodata->sizeof_zone = ret; 137 138 /* UMA system doesn't have 'node_data[]' - just use contig_page_data. */ 139 sym = machine__find_kernel_symbol_by_name(con->machine, 140 "contig_page_data", 141 &kmap); 142 if (sym) { 143 skel->rodata->contig_page_data_addr = map__unmap_ip(kmap, sym->start); 144 map__put(kmap); 145 return; 146 } 147 148 /* 149 * The 'node_data' is an array of pointers to struct pglist_data. 150 * It needs to follow the pointer for each node in BPF to get the 151 * address of struct pglist_data and its zones. 152 */ 153 sym = machine__find_kernel_symbol_by_name(con->machine, 154 "node_data", 155 &kmap); 156 if (sym == NULL) 157 return; 158 159 skel->rodata->node_data_addr = map__unmap_ip(kmap, sym->start); 160 map__put(kmap); 161 162 /* get the number of online nodes using the last node number + 1 */ 163 ret = sysfs__read_str("devices/system/node/online", &buf, &len); 164 if (ret < 0) { 165 pr_debug("failed to read online node: %d\n", ret); 166 return; 167 } 168 169 p = buf; 170 while (p && *p) { 171 last = strtol(p, &p, 0); 172 173 if (p && (*p == ',' || *p == '-' || *p == '\n')) 174 p++; 175 } 176 skel->rodata->nr_nodes = last + 1; 177 free(buf); 178 } 179 180 int lock_contention_prepare(struct lock_contention *con) 181 { 182 int i, fd; 183 int ncpus = 1, ntasks = 1, ntypes = 1, naddrs = 1, ncgrps = 1, nslabs = 1; 184 struct evlist *evlist = con->evlist; 185 struct target *target = con->target; 186 187 skel = lock_contention_bpf__open(); 188 if (!skel) { 189 pr_err("Failed to open lock-contention BPF skeleton\n"); 190 return -1; 191 } 192 193 bpf_map__set_value_size(skel->maps.stacks, con->max_stack * sizeof(u64)); 194 bpf_map__set_max_entries(skel->maps.lock_stat, con->map_nr_entries); 195 bpf_map__set_max_entries(skel->maps.tstamp, con->map_nr_entries); 196 197 if (con->aggr_mode == LOCK_AGGR_TASK) 198 bpf_map__set_max_entries(skel->maps.task_data, con->map_nr_entries); 199 else 200 bpf_map__set_max_entries(skel->maps.task_data, 1); 201 202 if (con->save_callstack) { 203 bpf_map__set_max_entries(skel->maps.stacks, con->map_nr_entries); 204 if (con->owner) { 205 bpf_map__set_value_size(skel->maps.stack_buf, con->max_stack * sizeof(u64)); 206 bpf_map__set_key_size(skel->maps.owner_stacks, 207 con->max_stack * sizeof(u64)); 208 bpf_map__set_max_entries(skel->maps.owner_stacks, con->map_nr_entries); 209 bpf_map__set_max_entries(skel->maps.owner_data, con->map_nr_entries); 210 bpf_map__set_max_entries(skel->maps.owner_stat, con->map_nr_entries); 211 skel->rodata->max_stack = con->max_stack; 212 } 213 } else { 214 bpf_map__set_max_entries(skel->maps.stacks, 1); 215 } 216 217 if (target__has_cpu(target)) { 218 skel->rodata->has_cpu = 1; 219 ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus); 220 } 221 if (target__has_task(target)) { 222 skel->rodata->has_task = 1; 223 ntasks = perf_thread_map__nr(evlist->core.threads); 224 } 225 if (con->filters->nr_types) { 226 skel->rodata->has_type = 1; 227 ntypes = con->filters->nr_types; 228 } 229 if (con->filters->nr_cgrps) { 230 skel->rodata->has_cgroup = 1; 231 ncgrps = con->filters->nr_cgrps; 232 } 233 234 /* resolve lock name filters to addr */ 235 if (con->filters->nr_syms) { 236 struct symbol *sym; 237 struct map *kmap; 238 unsigned long *addrs; 239 240 for (i = 0; i < con->filters->nr_syms; i++) { 241 sym = machine__find_kernel_symbol_by_name(con->machine, 242 con->filters->syms[i], 243 &kmap); 244 if (sym == NULL) { 245 pr_warning("ignore unknown symbol: %s\n", 246 con->filters->syms[i]); 247 continue; 248 } 249 250 addrs = realloc(con->filters->addrs, 251 (con->filters->nr_addrs + 1) * sizeof(*addrs)); 252 if (addrs == NULL) { 253 pr_warning("memory allocation failure\n"); 254 continue; 255 } 256 257 addrs[con->filters->nr_addrs++] = map__unmap_ip(kmap, sym->start); 258 con->filters->addrs = addrs; 259 } 260 naddrs = con->filters->nr_addrs; 261 skel->rodata->has_addr = 1; 262 } 263 264 /* resolve lock name in delays */ 265 if (con->nr_delays) { 266 struct symbol *sym; 267 struct map *kmap; 268 269 for (i = 0; i < con->nr_delays; i++) { 270 sym = machine__find_kernel_symbol_by_name(con->machine, 271 con->delays[i].sym, 272 &kmap); 273 if (sym == NULL) { 274 pr_warning("ignore unknown symbol: %s\n", 275 con->delays[i].sym); 276 continue; 277 } 278 279 con->delays[i].addr = map__unmap_ip(kmap, sym->start); 280 } 281 skel->rodata->lock_delay = 1; 282 bpf_map__set_max_entries(skel->maps.lock_delays, con->nr_delays); 283 } 284 285 bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus); 286 bpf_map__set_max_entries(skel->maps.task_filter, ntasks); 287 bpf_map__set_max_entries(skel->maps.type_filter, ntypes); 288 bpf_map__set_max_entries(skel->maps.addr_filter, naddrs); 289 bpf_map__set_max_entries(skel->maps.cgroup_filter, ncgrps); 290 291 skel->rodata->stack_skip = con->stack_skip; 292 skel->rodata->aggr_mode = con->aggr_mode; 293 skel->rodata->needs_callstack = con->save_callstack; 294 skel->rodata->lock_owner = con->owner; 295 296 if (con->aggr_mode == LOCK_AGGR_CGROUP || con->filters->nr_cgrps) { 297 if (cgroup_is_v2("perf_event")) 298 skel->rodata->use_cgroup_v2 = 1; 299 } 300 301 check_slab_cache_iter(con); 302 303 if (con->filters->nr_slabs && has_slab_iter) { 304 skel->rodata->has_slab = 1; 305 nslabs = con->filters->nr_slabs; 306 } 307 308 bpf_map__set_max_entries(skel->maps.slab_filter, nslabs); 309 310 init_numa_data(con); 311 312 if (lock_contention_bpf__load(skel) < 0) { 313 pr_err("Failed to load lock-contention BPF skeleton\n"); 314 return -1; 315 } 316 317 if (target__has_cpu(target)) { 318 u32 cpu; 319 u8 val = 1; 320 321 fd = bpf_map__fd(skel->maps.cpu_filter); 322 323 for (i = 0; i < ncpus; i++) { 324 cpu = perf_cpu_map__cpu(evlist->core.user_requested_cpus, i).cpu; 325 bpf_map_update_elem(fd, &cpu, &val, BPF_ANY); 326 } 327 } 328 329 if (target__has_task(target)) { 330 u32 pid; 331 u8 val = 1; 332 333 fd = bpf_map__fd(skel->maps.task_filter); 334 335 for (i = 0; i < ntasks; i++) { 336 pid = perf_thread_map__pid(evlist->core.threads, i); 337 bpf_map_update_elem(fd, &pid, &val, BPF_ANY); 338 } 339 } 340 341 if (target__none(target) && evlist->workload.pid > 0) { 342 u32 pid = evlist->workload.pid; 343 u8 val = 1; 344 345 fd = bpf_map__fd(skel->maps.task_filter); 346 bpf_map_update_elem(fd, &pid, &val, BPF_ANY); 347 } 348 349 if (con->filters->nr_types) { 350 u8 val = 1; 351 352 fd = bpf_map__fd(skel->maps.type_filter); 353 354 for (i = 0; i < con->filters->nr_types; i++) 355 bpf_map_update_elem(fd, &con->filters->types[i], &val, BPF_ANY); 356 } 357 358 if (con->filters->nr_addrs) { 359 u8 val = 1; 360 361 fd = bpf_map__fd(skel->maps.addr_filter); 362 363 for (i = 0; i < con->filters->nr_addrs; i++) 364 bpf_map_update_elem(fd, &con->filters->addrs[i], &val, BPF_ANY); 365 } 366 367 if (con->filters->nr_cgrps) { 368 u8 val = 1; 369 370 fd = bpf_map__fd(skel->maps.cgroup_filter); 371 372 for (i = 0; i < con->filters->nr_cgrps; i++) 373 bpf_map_update_elem(fd, &con->filters->cgrps[i], &val, BPF_ANY); 374 } 375 376 if (con->nr_delays) { 377 fd = bpf_map__fd(skel->maps.lock_delays); 378 379 for (i = 0; i < con->nr_delays; i++) 380 bpf_map_update_elem(fd, &con->delays[i].addr, &con->delays[i].time, BPF_ANY); 381 } 382 383 if (con->aggr_mode == LOCK_AGGR_CGROUP) 384 read_all_cgroups(&con->cgroups); 385 386 bpf_program__set_autoload(skel->progs.collect_lock_syms, false); 387 388 lock_contention_bpf__attach(skel); 389 390 /* run the slab iterator after attaching */ 391 run_slab_cache_iter(); 392 393 if (con->filters->nr_slabs) { 394 u8 val = 1; 395 int cache_fd; 396 long key, *prev_key; 397 398 fd = bpf_map__fd(skel->maps.slab_filter); 399 400 /* Read the slab cache map and build a hash with its address */ 401 cache_fd = bpf_map__fd(skel->maps.slab_caches); 402 prev_key = NULL; 403 while (!bpf_map_get_next_key(cache_fd, prev_key, &key)) { 404 struct slab_cache_data data; 405 406 if (bpf_map_lookup_elem(cache_fd, &key, &data) < 0) 407 break; 408 409 for (i = 0; i < con->filters->nr_slabs; i++) { 410 if (!strcmp(con->filters->slabs[i], data.name)) { 411 bpf_map_update_elem(fd, &key, &val, BPF_ANY); 412 break; 413 } 414 } 415 prev_key = &key; 416 } 417 } 418 419 return 0; 420 } 421 422 /* 423 * Run the BPF program directly using BPF_PROG_TEST_RUN to update the end 424 * timestamp in ktime so that it can calculate delta easily. 425 */ 426 static void mark_end_timestamp(void) 427 { 428 DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, 429 .flags = BPF_F_TEST_RUN_ON_CPU, 430 ); 431 int prog_fd = bpf_program__fd(skel->progs.end_timestamp); 432 433 bpf_prog_test_run_opts(prog_fd, &opts); 434 } 435 436 static void update_lock_stat(int map_fd, int pid, u64 end_ts, 437 enum lock_aggr_mode aggr_mode, 438 struct tstamp_data *ts_data) 439 { 440 u64 delta; 441 struct contention_key stat_key = {}; 442 struct contention_data stat_data; 443 444 if (ts_data->timestamp >= end_ts) 445 return; 446 447 delta = end_ts - ts_data->timestamp; 448 449 switch (aggr_mode) { 450 case LOCK_AGGR_CALLER: 451 stat_key.stack_id = ts_data->stack_id; 452 break; 453 case LOCK_AGGR_TASK: 454 stat_key.pid = pid; 455 break; 456 case LOCK_AGGR_ADDR: 457 stat_key.lock_addr_or_cgroup = ts_data->lock; 458 break; 459 case LOCK_AGGR_CGROUP: 460 /* TODO */ 461 return; 462 default: 463 return; 464 } 465 466 if (bpf_map_lookup_elem(map_fd, &stat_key, &stat_data) < 0) 467 return; 468 469 stat_data.total_time += delta; 470 stat_data.count++; 471 472 if (delta > stat_data.max_time) 473 stat_data.max_time = delta; 474 if (delta < stat_data.min_time) 475 stat_data.min_time = delta; 476 477 bpf_map_update_elem(map_fd, &stat_key, &stat_data, BPF_EXIST); 478 } 479 480 /* 481 * Account entries in the tstamp map (which didn't see the corresponding 482 * lock:contention_end tracepoint) using end_ts. 483 */ 484 static void account_end_timestamp(struct lock_contention *con) 485 { 486 int ts_fd, stat_fd; 487 int *prev_key, key; 488 u64 end_ts = skel->bss->end_ts; 489 int total_cpus; 490 enum lock_aggr_mode aggr_mode = con->aggr_mode; 491 struct tstamp_data ts_data, *cpu_data; 492 493 /* Iterate per-task tstamp map (key = TID) */ 494 ts_fd = bpf_map__fd(skel->maps.tstamp); 495 stat_fd = bpf_map__fd(skel->maps.lock_stat); 496 497 prev_key = NULL; 498 while (!bpf_map_get_next_key(ts_fd, prev_key, &key)) { 499 if (bpf_map_lookup_elem(ts_fd, &key, &ts_data) == 0) { 500 int pid = key; 501 502 if (aggr_mode == LOCK_AGGR_TASK && con->owner) 503 pid = ts_data.flags; 504 505 update_lock_stat(stat_fd, pid, end_ts, aggr_mode, 506 &ts_data); 507 } 508 509 prev_key = &key; 510 } 511 512 /* Now it'll check per-cpu tstamp map which doesn't have TID. */ 513 if (aggr_mode == LOCK_AGGR_TASK || aggr_mode == LOCK_AGGR_CGROUP) 514 return; 515 516 total_cpus = cpu__max_cpu().cpu; 517 ts_fd = bpf_map__fd(skel->maps.tstamp_cpu); 518 519 cpu_data = calloc(total_cpus, sizeof(*cpu_data)); 520 if (cpu_data == NULL) 521 return; 522 523 prev_key = NULL; 524 while (!bpf_map_get_next_key(ts_fd, prev_key, &key)) { 525 if (bpf_map_lookup_elem(ts_fd, &key, cpu_data) < 0) 526 goto next; 527 528 for (int i = 0; i < total_cpus; i++) { 529 if (cpu_data[i].lock == 0) 530 continue; 531 532 update_lock_stat(stat_fd, -1, end_ts, aggr_mode, 533 &cpu_data[i]); 534 } 535 536 next: 537 prev_key = &key; 538 } 539 free(cpu_data); 540 } 541 542 int lock_contention_start(void) 543 { 544 skel->bss->enabled = 1; 545 return 0; 546 } 547 548 int lock_contention_stop(void) 549 { 550 skel->bss->enabled = 0; 551 mark_end_timestamp(); 552 return 0; 553 } 554 555 static const char *lock_contention_get_name(struct lock_contention *con, 556 struct contention_key *key, 557 u64 *stack_trace, u32 flags) 558 { 559 int idx = 0; 560 u64 addr; 561 static char name_buf[KSYM_NAME_LEN]; 562 struct symbol *sym; 563 struct map *kmap; 564 struct machine *machine = con->machine; 565 566 if (con->aggr_mode == LOCK_AGGR_TASK) { 567 struct contention_task_data task; 568 int pid = key->pid; 569 int task_fd = bpf_map__fd(skel->maps.task_data); 570 571 /* do not update idle comm which contains CPU number */ 572 if (pid) { 573 struct thread *t = machine__findnew_thread(machine, /*pid=*/-1, pid); 574 575 if (t != NULL && 576 !bpf_map_lookup_elem(task_fd, &pid, &task) && 577 thread__set_comm(t, task.comm, /*timestamp=*/0)) { 578 snprintf(name_buf, sizeof(name_buf), "%s", task.comm); 579 return name_buf; 580 } 581 } 582 return ""; 583 } 584 585 if (con->aggr_mode == LOCK_AGGR_ADDR) { 586 int lock_fd = bpf_map__fd(skel->maps.lock_syms); 587 struct slab_cache_data *slab_data; 588 589 /* per-process locks set upper bits of the flags */ 590 if (flags & LCD_F_MMAP_LOCK) 591 return "mmap_lock"; 592 if (flags & LCD_F_SIGHAND_LOCK) 593 return "siglock"; 594 595 /* global locks with symbols */ 596 sym = machine__find_kernel_symbol(machine, key->lock_addr_or_cgroup, &kmap); 597 if (sym) 598 return sym->name; 599 600 /* try semi-global locks collected separately */ 601 if (!bpf_map_lookup_elem(lock_fd, &key->lock_addr_or_cgroup, &flags)) { 602 if (flags == LOCK_CLASS_RQLOCK) 603 return "rq_lock"; 604 } 605 606 if (!bpf_map_lookup_elem(lock_fd, &key->lock_addr_or_cgroup, &flags)) { 607 if (flags == LOCK_CLASS_ZONE_LOCK) 608 return "zone_lock"; 609 } 610 611 /* look slab_hash for dynamic locks in a slab object */ 612 if (hashmap__find(&slab_hash, flags & LCB_F_SLAB_ID_MASK, &slab_data)) { 613 snprintf(name_buf, sizeof(name_buf), "&%s", slab_data->name); 614 return name_buf; 615 } 616 617 return ""; 618 } 619 620 if (con->aggr_mode == LOCK_AGGR_CGROUP) { 621 u64 cgrp_id = key->lock_addr_or_cgroup; 622 struct cgroup *cgrp = __cgroup__find(&con->cgroups, cgrp_id); 623 624 if (cgrp) 625 return cgrp->name; 626 627 snprintf(name_buf, sizeof(name_buf), "cgroup:%" PRIu64 "", cgrp_id); 628 return name_buf; 629 } 630 631 /* LOCK_AGGR_CALLER: skip lock internal functions */ 632 while (machine__is_lock_function(machine, stack_trace[idx]) && 633 idx < con->max_stack - 1) 634 idx++; 635 636 addr = stack_trace[idx]; 637 sym = machine__find_kernel_symbol(machine, addr, &kmap); 638 639 if (sym) { 640 unsigned long offset; 641 642 offset = map__map_ip(kmap, addr) - sym->start; 643 644 if (offset == 0) 645 return sym->name; 646 647 snprintf(name_buf, sizeof(name_buf), "%s+%#lx", sym->name, offset); 648 } else { 649 snprintf(name_buf, sizeof(name_buf), "%#lx", (unsigned long)addr); 650 } 651 652 return name_buf; 653 } 654 655 struct lock_stat *pop_owner_stack_trace(struct lock_contention *con) 656 { 657 int stacks_fd, stat_fd; 658 u64 *stack_trace = NULL; 659 s32 stack_id; 660 struct contention_key ckey = {}; 661 struct contention_data cdata = {}; 662 size_t stack_size = con->max_stack * sizeof(*stack_trace); 663 struct lock_stat *st = NULL; 664 665 stacks_fd = bpf_map__fd(skel->maps.owner_stacks); 666 stat_fd = bpf_map__fd(skel->maps.owner_stat); 667 if (!stacks_fd || !stat_fd) 668 goto out_err; 669 670 stack_trace = zalloc(stack_size); 671 if (stack_trace == NULL) 672 goto out_err; 673 674 if (bpf_map_get_next_key(stacks_fd, NULL, stack_trace)) 675 goto out_err; 676 677 bpf_map_lookup_elem(stacks_fd, stack_trace, &stack_id); 678 ckey.stack_id = stack_id; 679 bpf_map_lookup_elem(stat_fd, &ckey, &cdata); 680 681 st = zalloc(sizeof(struct lock_stat)); 682 if (!st) 683 goto out_err; 684 685 st->name = strdup(stack_trace[0] ? lock_contention_get_name(con, NULL, stack_trace, 0) : 686 "unknown"); 687 if (!st->name) 688 goto out_err; 689 690 st->flags = cdata.flags; 691 st->nr_contended = cdata.count; 692 st->wait_time_total = cdata.total_time; 693 st->wait_time_max = cdata.max_time; 694 st->wait_time_min = cdata.min_time; 695 st->callstack = stack_trace; 696 697 if (cdata.count) 698 st->avg_wait_time = cdata.total_time / cdata.count; 699 700 bpf_map_delete_elem(stacks_fd, stack_trace); 701 bpf_map_delete_elem(stat_fd, &ckey); 702 703 return st; 704 705 out_err: 706 free(stack_trace); 707 free(st); 708 709 return NULL; 710 } 711 712 int lock_contention_read(struct lock_contention *con) 713 { 714 int fd, stack, err = 0; 715 struct contention_key *prev_key, key = {}; 716 struct contention_data data = {}; 717 struct lock_stat *st = NULL; 718 struct machine *machine = con->machine; 719 u64 *stack_trace; 720 size_t stack_size = con->max_stack * sizeof(*stack_trace); 721 722 fd = bpf_map__fd(skel->maps.lock_stat); 723 stack = bpf_map__fd(skel->maps.stacks); 724 725 con->fails.task = skel->bss->task_fail; 726 con->fails.stack = skel->bss->stack_fail; 727 con->fails.time = skel->bss->time_fail; 728 con->fails.data = skel->bss->data_fail; 729 730 stack_trace = zalloc(stack_size); 731 if (stack_trace == NULL) 732 return -1; 733 734 account_end_timestamp(con); 735 736 if (con->aggr_mode == LOCK_AGGR_TASK) { 737 struct thread *idle = machine__findnew_thread(machine, 738 /*pid=*/0, 739 /*tid=*/0); 740 thread__set_comm(idle, "swapper", /*timestamp=*/0); 741 } 742 743 if (con->aggr_mode == LOCK_AGGR_ADDR) { 744 DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, 745 .flags = BPF_F_TEST_RUN_ON_CPU, 746 ); 747 int prog_fd = bpf_program__fd(skel->progs.collect_lock_syms); 748 749 bpf_prog_test_run_opts(prog_fd, &opts); 750 } 751 752 /* make sure it loads the kernel map */ 753 maps__load_first(machine->kmaps); 754 755 prev_key = NULL; 756 while (!bpf_map_get_next_key(fd, prev_key, &key)) { 757 s64 ls_key; 758 const char *name; 759 760 /* to handle errors in the loop body */ 761 err = -1; 762 763 bpf_map_lookup_elem(fd, &key, &data); 764 if (con->save_callstack) { 765 bpf_map_lookup_elem(stack, &key.stack_id, stack_trace); 766 767 if (!match_callstack_filter(machine, stack_trace, con->max_stack)) { 768 con->nr_filtered += data.count; 769 goto next; 770 } 771 } 772 773 switch (con->aggr_mode) { 774 case LOCK_AGGR_CALLER: 775 ls_key = key.stack_id; 776 break; 777 case LOCK_AGGR_TASK: 778 ls_key = key.pid; 779 break; 780 case LOCK_AGGR_ADDR: 781 case LOCK_AGGR_CGROUP: 782 ls_key = key.lock_addr_or_cgroup; 783 break; 784 default: 785 goto next; 786 } 787 788 st = lock_stat_find(ls_key); 789 if (st != NULL) { 790 st->wait_time_total += data.total_time; 791 if (st->wait_time_max < data.max_time) 792 st->wait_time_max = data.max_time; 793 if (st->wait_time_min > data.min_time) 794 st->wait_time_min = data.min_time; 795 796 st->nr_contended += data.count; 797 if (st->nr_contended) 798 st->avg_wait_time = st->wait_time_total / st->nr_contended; 799 goto next; 800 } 801 802 name = lock_contention_get_name(con, &key, stack_trace, data.flags); 803 st = lock_stat_findnew(ls_key, name, data.flags); 804 if (st == NULL) 805 break; 806 807 st->nr_contended = data.count; 808 st->wait_time_total = data.total_time; 809 st->wait_time_max = data.max_time; 810 st->wait_time_min = data.min_time; 811 812 if (data.count) 813 st->avg_wait_time = data.total_time / data.count; 814 815 if (con->aggr_mode == LOCK_AGGR_CALLER && verbose > 0) { 816 st->callstack = memdup(stack_trace, stack_size); 817 if (st->callstack == NULL) 818 break; 819 } 820 821 next: 822 prev_key = &key; 823 824 /* we're fine now, reset the error */ 825 err = 0; 826 } 827 828 free(stack_trace); 829 830 return err; 831 } 832 833 int lock_contention_finish(struct lock_contention *con) 834 { 835 if (skel) { 836 skel->bss->enabled = 0; 837 lock_contention_bpf__destroy(skel); 838 } 839 840 while (!RB_EMPTY_ROOT(&con->cgroups)) { 841 struct rb_node *node = rb_first(&con->cgroups); 842 struct cgroup *cgrp = rb_entry(node, struct cgroup, node); 843 844 rb_erase(node, &con->cgroups); 845 cgroup__put(cgrp); 846 } 847 848 exit_slab_cache_iter(); 849 btf__free(con->btf); 850 851 return 0; 852 } 853