1 // SPDX-License-Identifier: GPL-2.0
2 #include <errno.h>
3 #include <inttypes.h>
4 #include "builtin.h"
5 #include "perf.h"
6
7 #include "util/evlist.h" // for struct evsel_str_handler
8 #include "util/evsel.h"
9 #include "util/symbol.h"
10 #include "util/thread.h"
11 #include "util/header.h"
12 #include "util/target.h"
13 #include "util/cgroup.h"
14 #include "util/callchain.h"
15 #include "util/lock-contention.h"
16 #include "util/bpf_skel/lock_data.h"
17
18 #include <subcmd/pager.h>
19 #include <subcmd/parse-options.h>
20 #include "util/trace-event.h"
21 #include "util/tracepoint.h"
22
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/tool.h"
26 #include "util/data.h"
27 #include "util/string2.h"
28 #include "util/map.h"
29 #include "util/util.h"
30
31 #include <stdio.h>
32 #include <sys/types.h>
33 #include <sys/prctl.h>
34 #include <semaphore.h>
35 #include <math.h>
36 #include <limits.h>
37 #include <ctype.h>
38
39 #include <linux/list.h>
40 #include <linux/hash.h>
41 #include <linux/kernel.h>
42 #include <linux/zalloc.h>
43 #include <linux/err.h>
44 #include <linux/stringify.h>
45
46 static struct perf_session *session;
47 static struct target target;
48
49 static struct rb_root thread_stats;
50
51 static bool combine_locks;
52 static bool show_thread_stats;
53 static bool show_lock_addrs;
54 static bool show_lock_owner;
55 static bool show_lock_cgroups;
56 static bool use_bpf;
57 static unsigned long bpf_map_entries = MAX_ENTRIES;
58 static int max_stack_depth = CONTENTION_STACK_DEPTH;
59 static int stack_skip = CONTENTION_STACK_SKIP;
60 static int print_nr_entries = INT_MAX / 2;
61 static const char *output_name = NULL;
62 static FILE *lock_output;
63
64 static struct lock_filter filters;
65
66 static enum lock_aggr_mode aggr_mode = LOCK_AGGR_ADDR;
67
thread_stat_find(u32 tid)68 static struct thread_stat *thread_stat_find(u32 tid)
69 {
70 struct rb_node *node;
71 struct thread_stat *st;
72
73 node = thread_stats.rb_node;
74 while (node) {
75 st = container_of(node, struct thread_stat, rb);
76 if (st->tid == tid)
77 return st;
78 else if (tid < st->tid)
79 node = node->rb_left;
80 else
81 node = node->rb_right;
82 }
83
84 return NULL;
85 }
86
thread_stat_insert(struct thread_stat * new)87 static void thread_stat_insert(struct thread_stat *new)
88 {
89 struct rb_node **rb = &thread_stats.rb_node;
90 struct rb_node *parent = NULL;
91 struct thread_stat *p;
92
93 while (*rb) {
94 p = container_of(*rb, struct thread_stat, rb);
95 parent = *rb;
96
97 if (new->tid < p->tid)
98 rb = &(*rb)->rb_left;
99 else if (new->tid > p->tid)
100 rb = &(*rb)->rb_right;
101 else
102 BUG_ON("inserting invalid thread_stat\n");
103 }
104
105 rb_link_node(&new->rb, parent, rb);
106 rb_insert_color(&new->rb, &thread_stats);
107 }
108
thread_stat_findnew_after_first(u32 tid)109 static struct thread_stat *thread_stat_findnew_after_first(u32 tid)
110 {
111 struct thread_stat *st;
112
113 st = thread_stat_find(tid);
114 if (st)
115 return st;
116
117 st = zalloc(sizeof(struct thread_stat));
118 if (!st) {
119 pr_err("memory allocation failed\n");
120 return NULL;
121 }
122
123 st->tid = tid;
124 INIT_LIST_HEAD(&st->seq_list);
125
126 thread_stat_insert(st);
127
128 return st;
129 }
130
131 static struct thread_stat *thread_stat_findnew_first(u32 tid);
132 static struct thread_stat *(*thread_stat_findnew)(u32 tid) =
133 thread_stat_findnew_first;
134
thread_stat_findnew_first(u32 tid)135 static struct thread_stat *thread_stat_findnew_first(u32 tid)
136 {
137 struct thread_stat *st;
138
139 st = zalloc(sizeof(struct thread_stat));
140 if (!st) {
141 pr_err("memory allocation failed\n");
142 return NULL;
143 }
144 st->tid = tid;
145 INIT_LIST_HEAD(&st->seq_list);
146
147 rb_link_node(&st->rb, NULL, &thread_stats.rb_node);
148 rb_insert_color(&st->rb, &thread_stats);
149
150 thread_stat_findnew = thread_stat_findnew_after_first;
151 return st;
152 }
153
154 /* build simple key function one is bigger than two */
155 #define SINGLE_KEY(member) \
156 static int lock_stat_key_ ## member(struct lock_stat *one, \
157 struct lock_stat *two) \
158 { \
159 return one->member > two->member; \
160 }
161
162 SINGLE_KEY(nr_acquired)
SINGLE_KEY(nr_contended)163 SINGLE_KEY(nr_contended)
164 SINGLE_KEY(avg_wait_time)
165 SINGLE_KEY(wait_time_total)
166 SINGLE_KEY(wait_time_max)
167
168 static int lock_stat_key_wait_time_min(struct lock_stat *one,
169 struct lock_stat *two)
170 {
171 u64 s1 = one->wait_time_min;
172 u64 s2 = two->wait_time_min;
173 if (s1 == ULLONG_MAX)
174 s1 = 0;
175 if (s2 == ULLONG_MAX)
176 s2 = 0;
177 return s1 > s2;
178 }
179
180 struct lock_key {
181 /*
182 * name: the value for specify by user
183 * this should be simpler than raw name of member
184 * e.g. nr_acquired -> acquired, wait_time_total -> wait_total
185 */
186 const char *name;
187 /* header: the string printed on the header line */
188 const char *header;
189 /* len: the printing width of the field */
190 int len;
191 /* key: a pointer to function to compare two lock stats for sorting */
192 int (*key)(struct lock_stat*, struct lock_stat*);
193 /* print: a pointer to function to print a given lock stats */
194 void (*print)(struct lock_key*, struct lock_stat*);
195 /* list: list entry to link this */
196 struct list_head list;
197 };
198
lock_stat_key_print_time(unsigned long long nsec,int len)199 static void lock_stat_key_print_time(unsigned long long nsec, int len)
200 {
201 static const struct {
202 float base;
203 const char *unit;
204 } table[] = {
205 { 1e9 * 3600, "h " },
206 { 1e9 * 60, "m " },
207 { 1e9, "s " },
208 { 1e6, "ms" },
209 { 1e3, "us" },
210 { 0, NULL },
211 };
212
213 /* for CSV output */
214 if (len == 0) {
215 fprintf(lock_output, "%llu", nsec);
216 return;
217 }
218
219 for (int i = 0; table[i].unit; i++) {
220 if (nsec < table[i].base)
221 continue;
222
223 fprintf(lock_output, "%*.2f %s", len - 3, nsec / table[i].base, table[i].unit);
224 return;
225 }
226
227 fprintf(lock_output, "%*llu %s", len - 3, nsec, "ns");
228 }
229
230 #define PRINT_KEY(member) \
231 static void lock_stat_key_print_ ## member(struct lock_key *key, \
232 struct lock_stat *ls) \
233 { \
234 fprintf(lock_output, "%*llu", key->len, (unsigned long long)ls->member);\
235 }
236
237 #define PRINT_TIME(member) \
238 static void lock_stat_key_print_ ## member(struct lock_key *key, \
239 struct lock_stat *ls) \
240 { \
241 lock_stat_key_print_time((unsigned long long)ls->member, key->len); \
242 }
243
244 PRINT_KEY(nr_acquired)
PRINT_KEY(nr_contended)245 PRINT_KEY(nr_contended)
246 PRINT_TIME(avg_wait_time)
247 PRINT_TIME(wait_time_total)
248 PRINT_TIME(wait_time_max)
249
250 static void lock_stat_key_print_wait_time_min(struct lock_key *key,
251 struct lock_stat *ls)
252 {
253 u64 wait_time = ls->wait_time_min;
254
255 if (wait_time == ULLONG_MAX)
256 wait_time = 0;
257
258 lock_stat_key_print_time(wait_time, key->len);
259 }
260
261
262 static const char *sort_key = "acquired";
263
264 static int (*compare)(struct lock_stat *, struct lock_stat *);
265
266 static struct rb_root sorted; /* place to store intermediate data */
267 static struct rb_root result; /* place to store sorted data */
268
269 static LIST_HEAD(lock_keys);
270 static const char *output_fields;
271
272 #define DEF_KEY_LOCK(name, header, fn_suffix, len) \
273 { #name, header, len, lock_stat_key_ ## fn_suffix, lock_stat_key_print_ ## fn_suffix, {} }
274 static struct lock_key report_keys[] = {
275 DEF_KEY_LOCK(acquired, "acquired", nr_acquired, 10),
276 DEF_KEY_LOCK(contended, "contended", nr_contended, 10),
277 DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12),
278 DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12),
279 DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12),
280 DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12),
281
282 /* extra comparisons much complicated should be here */
283 { }
284 };
285
286 static struct lock_key contention_keys[] = {
287 DEF_KEY_LOCK(contended, "contended", nr_contended, 10),
288 DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12),
289 DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12),
290 DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12),
291 DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12),
292
293 /* extra comparisons much complicated should be here */
294 { }
295 };
296
select_key(bool contention)297 static int select_key(bool contention)
298 {
299 int i;
300 struct lock_key *keys = report_keys;
301
302 if (contention)
303 keys = contention_keys;
304
305 for (i = 0; keys[i].name; i++) {
306 if (!strcmp(keys[i].name, sort_key)) {
307 compare = keys[i].key;
308
309 /* selected key should be in the output fields */
310 if (list_empty(&keys[i].list))
311 list_add_tail(&keys[i].list, &lock_keys);
312
313 return 0;
314 }
315 }
316
317 pr_err("Unknown compare key: %s\n", sort_key);
318 return -1;
319 }
320
add_output_field(bool contention,char * name)321 static int add_output_field(bool contention, char *name)
322 {
323 int i;
324 struct lock_key *keys = report_keys;
325
326 if (contention)
327 keys = contention_keys;
328
329 for (i = 0; keys[i].name; i++) {
330 if (strcmp(keys[i].name, name))
331 continue;
332
333 /* prevent double link */
334 if (list_empty(&keys[i].list))
335 list_add_tail(&keys[i].list, &lock_keys);
336
337 return 0;
338 }
339
340 pr_err("Unknown output field: %s\n", name);
341 return -1;
342 }
343
setup_output_field(bool contention,const char * str)344 static int setup_output_field(bool contention, const char *str)
345 {
346 char *tok, *tmp, *orig;
347 int i, ret = 0;
348 struct lock_key *keys = report_keys;
349
350 if (contention)
351 keys = contention_keys;
352
353 /* no output field given: use all of them */
354 if (str == NULL) {
355 for (i = 0; keys[i].name; i++)
356 list_add_tail(&keys[i].list, &lock_keys);
357 return 0;
358 }
359
360 for (i = 0; keys[i].name; i++)
361 INIT_LIST_HEAD(&keys[i].list);
362
363 orig = tmp = strdup(str);
364 if (orig == NULL)
365 return -ENOMEM;
366
367 while ((tok = strsep(&tmp, ",")) != NULL){
368 ret = add_output_field(contention, tok);
369 if (ret < 0)
370 break;
371 }
372 free(orig);
373
374 return ret;
375 }
376
combine_lock_stats(struct lock_stat * st)377 static void combine_lock_stats(struct lock_stat *st)
378 {
379 struct rb_node **rb = &sorted.rb_node;
380 struct rb_node *parent = NULL;
381 struct lock_stat *p;
382 int ret;
383
384 while (*rb) {
385 p = container_of(*rb, struct lock_stat, rb);
386 parent = *rb;
387
388 if (st->name && p->name)
389 ret = strcmp(st->name, p->name);
390 else
391 ret = !!st->name - !!p->name;
392
393 if (ret == 0) {
394 p->nr_acquired += st->nr_acquired;
395 p->nr_contended += st->nr_contended;
396 p->wait_time_total += st->wait_time_total;
397
398 if (p->nr_contended)
399 p->avg_wait_time = p->wait_time_total / p->nr_contended;
400
401 if (p->wait_time_min > st->wait_time_min)
402 p->wait_time_min = st->wait_time_min;
403 if (p->wait_time_max < st->wait_time_max)
404 p->wait_time_max = st->wait_time_max;
405
406 p->broken |= st->broken;
407 st->combined = 1;
408 return;
409 }
410
411 if (ret < 0)
412 rb = &(*rb)->rb_left;
413 else
414 rb = &(*rb)->rb_right;
415 }
416
417 rb_link_node(&st->rb, parent, rb);
418 rb_insert_color(&st->rb, &sorted);
419 }
420
insert_to(struct rb_root * rr,struct lock_stat * st,int (* bigger)(struct lock_stat *,struct lock_stat *))421 static void insert_to(struct rb_root *rr, struct lock_stat *st,
422 int (*bigger)(struct lock_stat *, struct lock_stat *))
423 {
424 struct rb_node **rb = &rr->rb_node;
425 struct rb_node *parent = NULL;
426 struct lock_stat *p;
427
428 while (*rb) {
429 p = container_of(*rb, struct lock_stat, rb);
430 parent = *rb;
431
432 if (bigger(st, p))
433 rb = &(*rb)->rb_left;
434 else
435 rb = &(*rb)->rb_right;
436 }
437
438 rb_link_node(&st->rb, parent, rb);
439 rb_insert_color(&st->rb, rr);
440 }
441
insert_to_result(struct lock_stat * st,int (* bigger)(struct lock_stat *,struct lock_stat *))442 static inline void insert_to_result(struct lock_stat *st,
443 int (*bigger)(struct lock_stat *,
444 struct lock_stat *))
445 {
446 if (combine_locks && st->combined)
447 return;
448 insert_to(&result, st, bigger);
449 }
450
pop_from(struct rb_root * rr)451 static inline struct lock_stat *pop_from(struct rb_root *rr)
452 {
453 struct rb_node *node = rr->rb_node;
454
455 if (!node)
456 return NULL;
457
458 while (node->rb_left)
459 node = node->rb_left;
460
461 rb_erase(node, rr);
462 return container_of(node, struct lock_stat, rb);
463
464 }
465
466 /* returns left most element of result, and erase it */
pop_from_result(void)467 static struct lock_stat *pop_from_result(void)
468 {
469 return pop_from(&result);
470 }
471
472 struct trace_lock_handler {
473 /* it's used on CONFIG_LOCKDEP */
474 int (*acquire_event)(struct evsel *evsel,
475 struct perf_sample *sample);
476
477 /* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */
478 int (*acquired_event)(struct evsel *evsel,
479 struct perf_sample *sample);
480
481 /* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */
482 int (*contended_event)(struct evsel *evsel,
483 struct perf_sample *sample);
484
485 /* it's used on CONFIG_LOCKDEP */
486 int (*release_event)(struct evsel *evsel,
487 struct perf_sample *sample);
488
489 /* it's used when CONFIG_LOCKDEP is off */
490 int (*contention_begin_event)(struct evsel *evsel,
491 struct perf_sample *sample);
492
493 /* it's used when CONFIG_LOCKDEP is off */
494 int (*contention_end_event)(struct evsel *evsel,
495 struct perf_sample *sample);
496 };
497
get_seq(struct thread_stat * ts,u64 addr)498 static struct lock_seq_stat *get_seq(struct thread_stat *ts, u64 addr)
499 {
500 struct lock_seq_stat *seq;
501
502 list_for_each_entry(seq, &ts->seq_list, list) {
503 if (seq->addr == addr)
504 return seq;
505 }
506
507 seq = zalloc(sizeof(struct lock_seq_stat));
508 if (!seq) {
509 pr_err("memory allocation failed\n");
510 return NULL;
511 }
512 seq->state = SEQ_STATE_UNINITIALIZED;
513 seq->addr = addr;
514
515 list_add(&seq->list, &ts->seq_list);
516 return seq;
517 }
518
519 enum broken_state {
520 BROKEN_ACQUIRE,
521 BROKEN_ACQUIRED,
522 BROKEN_CONTENDED,
523 BROKEN_RELEASE,
524 BROKEN_MAX,
525 };
526
527 static int bad_hist[BROKEN_MAX];
528
529 enum acquire_flags {
530 TRY_LOCK = 1,
531 READ_LOCK = 2,
532 };
533
get_key_by_aggr_mode_simple(u64 * key,u64 addr,u32 tid)534 static int get_key_by_aggr_mode_simple(u64 *key, u64 addr, u32 tid)
535 {
536 switch (aggr_mode) {
537 case LOCK_AGGR_ADDR:
538 *key = addr;
539 break;
540 case LOCK_AGGR_TASK:
541 *key = tid;
542 break;
543 case LOCK_AGGR_CALLER:
544 case LOCK_AGGR_CGROUP:
545 default:
546 pr_err("Invalid aggregation mode: %d\n", aggr_mode);
547 return -EINVAL;
548 }
549 return 0;
550 }
551
552 static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample);
553
get_key_by_aggr_mode(u64 * key,u64 addr,struct evsel * evsel,struct perf_sample * sample)554 static int get_key_by_aggr_mode(u64 *key, u64 addr, struct evsel *evsel,
555 struct perf_sample *sample)
556 {
557 if (aggr_mode == LOCK_AGGR_CALLER) {
558 *key = callchain_id(evsel, sample);
559 return 0;
560 }
561 return get_key_by_aggr_mode_simple(key, addr, sample->tid);
562 }
563
report_lock_acquire_event(struct evsel * evsel,struct perf_sample * sample)564 static int report_lock_acquire_event(struct evsel *evsel,
565 struct perf_sample *sample)
566 {
567 struct lock_stat *ls;
568 struct thread_stat *ts;
569 struct lock_seq_stat *seq;
570 const char *name = evsel__strval(evsel, sample, "name");
571 u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
572 int flag = evsel__intval(evsel, sample, "flags");
573 u64 key;
574 int ret;
575
576 ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
577 if (ret < 0)
578 return ret;
579
580 ls = lock_stat_findnew(key, name, 0);
581 if (!ls)
582 return -ENOMEM;
583
584 ts = thread_stat_findnew(sample->tid);
585 if (!ts)
586 return -ENOMEM;
587
588 seq = get_seq(ts, addr);
589 if (!seq)
590 return -ENOMEM;
591
592 switch (seq->state) {
593 case SEQ_STATE_UNINITIALIZED:
594 case SEQ_STATE_RELEASED:
595 if (!flag) {
596 seq->state = SEQ_STATE_ACQUIRING;
597 } else {
598 if (flag & TRY_LOCK)
599 ls->nr_trylock++;
600 if (flag & READ_LOCK)
601 ls->nr_readlock++;
602 seq->state = SEQ_STATE_READ_ACQUIRED;
603 seq->read_count = 1;
604 ls->nr_acquired++;
605 }
606 break;
607 case SEQ_STATE_READ_ACQUIRED:
608 if (flag & READ_LOCK) {
609 seq->read_count++;
610 ls->nr_acquired++;
611 goto end;
612 } else {
613 goto broken;
614 }
615 break;
616 case SEQ_STATE_ACQUIRED:
617 case SEQ_STATE_ACQUIRING:
618 case SEQ_STATE_CONTENDED:
619 broken:
620 /* broken lock sequence */
621 if (!ls->broken) {
622 ls->broken = 1;
623 bad_hist[BROKEN_ACQUIRE]++;
624 }
625 list_del_init(&seq->list);
626 free(seq);
627 goto end;
628 default:
629 BUG_ON("Unknown state of lock sequence found!\n");
630 break;
631 }
632
633 ls->nr_acquire++;
634 seq->prev_event_time = sample->time;
635 end:
636 return 0;
637 }
638
report_lock_acquired_event(struct evsel * evsel,struct perf_sample * sample)639 static int report_lock_acquired_event(struct evsel *evsel,
640 struct perf_sample *sample)
641 {
642 struct lock_stat *ls;
643 struct thread_stat *ts;
644 struct lock_seq_stat *seq;
645 u64 contended_term;
646 const char *name = evsel__strval(evsel, sample, "name");
647 u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
648 u64 key;
649 int ret;
650
651 ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
652 if (ret < 0)
653 return ret;
654
655 ls = lock_stat_findnew(key, name, 0);
656 if (!ls)
657 return -ENOMEM;
658
659 ts = thread_stat_findnew(sample->tid);
660 if (!ts)
661 return -ENOMEM;
662
663 seq = get_seq(ts, addr);
664 if (!seq)
665 return -ENOMEM;
666
667 switch (seq->state) {
668 case SEQ_STATE_UNINITIALIZED:
669 /* orphan event, do nothing */
670 return 0;
671 case SEQ_STATE_ACQUIRING:
672 break;
673 case SEQ_STATE_CONTENDED:
674 contended_term = sample->time - seq->prev_event_time;
675 ls->wait_time_total += contended_term;
676 if (contended_term < ls->wait_time_min)
677 ls->wait_time_min = contended_term;
678 if (ls->wait_time_max < contended_term)
679 ls->wait_time_max = contended_term;
680 break;
681 case SEQ_STATE_RELEASED:
682 case SEQ_STATE_ACQUIRED:
683 case SEQ_STATE_READ_ACQUIRED:
684 /* broken lock sequence */
685 if (!ls->broken) {
686 ls->broken = 1;
687 bad_hist[BROKEN_ACQUIRED]++;
688 }
689 list_del_init(&seq->list);
690 free(seq);
691 goto end;
692 default:
693 BUG_ON("Unknown state of lock sequence found!\n");
694 break;
695 }
696
697 seq->state = SEQ_STATE_ACQUIRED;
698 ls->nr_acquired++;
699 ls->avg_wait_time = ls->nr_contended ? ls->wait_time_total/ls->nr_contended : 0;
700 seq->prev_event_time = sample->time;
701 end:
702 return 0;
703 }
704
report_lock_contended_event(struct evsel * evsel,struct perf_sample * sample)705 static int report_lock_contended_event(struct evsel *evsel,
706 struct perf_sample *sample)
707 {
708 struct lock_stat *ls;
709 struct thread_stat *ts;
710 struct lock_seq_stat *seq;
711 const char *name = evsel__strval(evsel, sample, "name");
712 u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
713 u64 key;
714 int ret;
715
716 ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
717 if (ret < 0)
718 return ret;
719
720 ls = lock_stat_findnew(key, name, 0);
721 if (!ls)
722 return -ENOMEM;
723
724 ts = thread_stat_findnew(sample->tid);
725 if (!ts)
726 return -ENOMEM;
727
728 seq = get_seq(ts, addr);
729 if (!seq)
730 return -ENOMEM;
731
732 switch (seq->state) {
733 case SEQ_STATE_UNINITIALIZED:
734 /* orphan event, do nothing */
735 return 0;
736 case SEQ_STATE_ACQUIRING:
737 break;
738 case SEQ_STATE_RELEASED:
739 case SEQ_STATE_ACQUIRED:
740 case SEQ_STATE_READ_ACQUIRED:
741 case SEQ_STATE_CONTENDED:
742 /* broken lock sequence */
743 if (!ls->broken) {
744 ls->broken = 1;
745 bad_hist[BROKEN_CONTENDED]++;
746 }
747 list_del_init(&seq->list);
748 free(seq);
749 goto end;
750 default:
751 BUG_ON("Unknown state of lock sequence found!\n");
752 break;
753 }
754
755 seq->state = SEQ_STATE_CONTENDED;
756 ls->nr_contended++;
757 ls->avg_wait_time = ls->wait_time_total/ls->nr_contended;
758 seq->prev_event_time = sample->time;
759 end:
760 return 0;
761 }
762
report_lock_release_event(struct evsel * evsel,struct perf_sample * sample)763 static int report_lock_release_event(struct evsel *evsel,
764 struct perf_sample *sample)
765 {
766 struct lock_stat *ls;
767 struct thread_stat *ts;
768 struct lock_seq_stat *seq;
769 const char *name = evsel__strval(evsel, sample, "name");
770 u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
771 u64 key;
772 int ret;
773
774 ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
775 if (ret < 0)
776 return ret;
777
778 ls = lock_stat_findnew(key, name, 0);
779 if (!ls)
780 return -ENOMEM;
781
782 ts = thread_stat_findnew(sample->tid);
783 if (!ts)
784 return -ENOMEM;
785
786 seq = get_seq(ts, addr);
787 if (!seq)
788 return -ENOMEM;
789
790 switch (seq->state) {
791 case SEQ_STATE_UNINITIALIZED:
792 goto end;
793 case SEQ_STATE_ACQUIRED:
794 break;
795 case SEQ_STATE_READ_ACQUIRED:
796 seq->read_count--;
797 BUG_ON(seq->read_count < 0);
798 if (seq->read_count) {
799 ls->nr_release++;
800 goto end;
801 }
802 break;
803 case SEQ_STATE_ACQUIRING:
804 case SEQ_STATE_CONTENDED:
805 case SEQ_STATE_RELEASED:
806 /* broken lock sequence */
807 if (!ls->broken) {
808 ls->broken = 1;
809 bad_hist[BROKEN_RELEASE]++;
810 }
811 goto free_seq;
812 default:
813 BUG_ON("Unknown state of lock sequence found!\n");
814 break;
815 }
816
817 ls->nr_release++;
818 free_seq:
819 list_del_init(&seq->list);
820 free(seq);
821 end:
822 return 0;
823 }
824
get_symbol_name_offset(struct map * map,struct symbol * sym,u64 ip,char * buf,int size)825 static int get_symbol_name_offset(struct map *map, struct symbol *sym, u64 ip,
826 char *buf, int size)
827 {
828 u64 offset;
829
830 if (map == NULL || sym == NULL) {
831 buf[0] = '\0';
832 return 0;
833 }
834
835 offset = map__map_ip(map, ip) - sym->start;
836
837 if (offset)
838 return scnprintf(buf, size, "%s+%#lx", sym->name, offset);
839 else
840 return strlcpy(buf, sym->name, size);
841 }
lock_contention_caller(struct evsel * evsel,struct perf_sample * sample,char * buf,int size)842 static int lock_contention_caller(struct evsel *evsel, struct perf_sample *sample,
843 char *buf, int size)
844 {
845 struct thread *thread;
846 struct callchain_cursor *cursor;
847 struct machine *machine = &session->machines.host;
848 struct symbol *sym;
849 int skip = 0;
850 int ret;
851
852 /* lock names will be replaced to task name later */
853 if (show_thread_stats)
854 return -1;
855
856 thread = machine__findnew_thread(machine, -1, sample->pid);
857 if (thread == NULL)
858 return -1;
859
860 cursor = get_tls_callchain_cursor();
861
862 /* use caller function name from the callchain */
863 ret = thread__resolve_callchain(thread, cursor, evsel, sample,
864 NULL, NULL, max_stack_depth);
865 if (ret != 0) {
866 thread__put(thread);
867 return -1;
868 }
869
870 callchain_cursor_commit(cursor);
871 thread__put(thread);
872
873 while (true) {
874 struct callchain_cursor_node *node;
875
876 node = callchain_cursor_current(cursor);
877 if (node == NULL)
878 break;
879
880 /* skip first few entries - for lock functions */
881 if (++skip <= stack_skip)
882 goto next;
883
884 sym = node->ms.sym;
885 if (sym && !machine__is_lock_function(machine, node->ip)) {
886 get_symbol_name_offset(node->ms.map, sym, node->ip,
887 buf, size);
888 return 0;
889 }
890
891 next:
892 callchain_cursor_advance(cursor);
893 }
894 return -1;
895 }
896
callchain_id(struct evsel * evsel,struct perf_sample * sample)897 static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample)
898 {
899 struct callchain_cursor *cursor;
900 struct machine *machine = &session->machines.host;
901 struct thread *thread;
902 u64 hash = 0;
903 int skip = 0;
904 int ret;
905
906 thread = machine__findnew_thread(machine, -1, sample->pid);
907 if (thread == NULL)
908 return -1;
909
910 cursor = get_tls_callchain_cursor();
911 /* use caller function name from the callchain */
912 ret = thread__resolve_callchain(thread, cursor, evsel, sample,
913 NULL, NULL, max_stack_depth);
914 thread__put(thread);
915
916 if (ret != 0)
917 return -1;
918
919 callchain_cursor_commit(cursor);
920
921 while (true) {
922 struct callchain_cursor_node *node;
923
924 node = callchain_cursor_current(cursor);
925 if (node == NULL)
926 break;
927
928 /* skip first few entries - for lock functions */
929 if (++skip <= stack_skip)
930 goto next;
931
932 if (node->ms.sym && machine__is_lock_function(machine, node->ip))
933 goto next;
934
935 hash ^= hash_long((unsigned long)node->ip, 64);
936
937 next:
938 callchain_cursor_advance(cursor);
939 }
940 return hash;
941 }
942
get_callstack(struct perf_sample * sample,int max_stack)943 static u64 *get_callstack(struct perf_sample *sample, int max_stack)
944 {
945 u64 *callstack;
946 u64 i;
947 int c;
948
949 callstack = calloc(max_stack, sizeof(*callstack));
950 if (callstack == NULL)
951 return NULL;
952
953 for (i = 0, c = 0; i < sample->callchain->nr && c < max_stack; i++) {
954 u64 ip = sample->callchain->ips[i];
955
956 if (ip >= PERF_CONTEXT_MAX)
957 continue;
958
959 callstack[c++] = ip;
960 }
961 return callstack;
962 }
963
report_lock_contention_begin_event(struct evsel * evsel,struct perf_sample * sample)964 static int report_lock_contention_begin_event(struct evsel *evsel,
965 struct perf_sample *sample)
966 {
967 struct lock_stat *ls;
968 struct thread_stat *ts;
969 struct lock_seq_stat *seq;
970 u64 addr = evsel__intval(evsel, sample, "lock_addr");
971 unsigned int flags = evsel__intval(evsel, sample, "flags");
972 u64 key;
973 int i, ret;
974 static bool kmap_loaded;
975 struct machine *machine = &session->machines.host;
976 struct map *kmap;
977 struct symbol *sym;
978
979 ret = get_key_by_aggr_mode(&key, addr, evsel, sample);
980 if (ret < 0)
981 return ret;
982
983 if (!kmap_loaded) {
984 unsigned long *addrs;
985
986 /* make sure it loads the kernel map to find lock symbols */
987 map__load(machine__kernel_map(machine));
988 kmap_loaded = true;
989
990 /* convert (kernel) symbols to addresses */
991 for (i = 0; i < filters.nr_syms; i++) {
992 sym = machine__find_kernel_symbol_by_name(machine,
993 filters.syms[i],
994 &kmap);
995 if (sym == NULL) {
996 pr_warning("ignore unknown symbol: %s\n",
997 filters.syms[i]);
998 continue;
999 }
1000
1001 addrs = realloc(filters.addrs,
1002 (filters.nr_addrs + 1) * sizeof(*addrs));
1003 if (addrs == NULL) {
1004 pr_warning("memory allocation failure\n");
1005 return -ENOMEM;
1006 }
1007
1008 addrs[filters.nr_addrs++] = map__unmap_ip(kmap, sym->start);
1009 filters.addrs = addrs;
1010 }
1011 }
1012
1013 ls = lock_stat_find(key);
1014 if (!ls) {
1015 char buf[128];
1016 const char *name = "";
1017
1018 switch (aggr_mode) {
1019 case LOCK_AGGR_ADDR:
1020 sym = machine__find_kernel_symbol(machine, key, &kmap);
1021 if (sym)
1022 name = sym->name;
1023 break;
1024 case LOCK_AGGR_CALLER:
1025 name = buf;
1026 if (lock_contention_caller(evsel, sample, buf, sizeof(buf)) < 0)
1027 name = "Unknown";
1028 break;
1029 case LOCK_AGGR_CGROUP:
1030 case LOCK_AGGR_TASK:
1031 default:
1032 break;
1033 }
1034
1035 ls = lock_stat_findnew(key, name, flags);
1036 if (!ls)
1037 return -ENOMEM;
1038 }
1039
1040 if (filters.nr_types) {
1041 bool found = false;
1042
1043 for (i = 0; i < filters.nr_types; i++) {
1044 if (flags == filters.types[i]) {
1045 found = true;
1046 break;
1047 }
1048 }
1049
1050 if (!found)
1051 return 0;
1052 }
1053
1054 if (filters.nr_addrs) {
1055 bool found = false;
1056
1057 for (i = 0; i < filters.nr_addrs; i++) {
1058 if (addr == filters.addrs[i]) {
1059 found = true;
1060 break;
1061 }
1062 }
1063
1064 if (!found)
1065 return 0;
1066 }
1067
1068 if (needs_callstack()) {
1069 u64 *callstack = get_callstack(sample, max_stack_depth);
1070 if (callstack == NULL)
1071 return -ENOMEM;
1072
1073 if (!match_callstack_filter(machine, callstack, max_stack_depth)) {
1074 free(callstack);
1075 return 0;
1076 }
1077
1078 if (ls->callstack == NULL)
1079 ls->callstack = callstack;
1080 else
1081 free(callstack);
1082 }
1083
1084 ts = thread_stat_findnew(sample->tid);
1085 if (!ts)
1086 return -ENOMEM;
1087
1088 seq = get_seq(ts, addr);
1089 if (!seq)
1090 return -ENOMEM;
1091
1092 switch (seq->state) {
1093 case SEQ_STATE_UNINITIALIZED:
1094 case SEQ_STATE_ACQUIRED:
1095 break;
1096 case SEQ_STATE_CONTENDED:
1097 /*
1098 * It can have nested contention begin with mutex spinning,
1099 * then we would use the original contention begin event and
1100 * ignore the second one.
1101 */
1102 goto end;
1103 case SEQ_STATE_ACQUIRING:
1104 case SEQ_STATE_READ_ACQUIRED:
1105 case SEQ_STATE_RELEASED:
1106 /* broken lock sequence */
1107 if (!ls->broken) {
1108 ls->broken = 1;
1109 bad_hist[BROKEN_CONTENDED]++;
1110 }
1111 list_del_init(&seq->list);
1112 free(seq);
1113 goto end;
1114 default:
1115 BUG_ON("Unknown state of lock sequence found!\n");
1116 break;
1117 }
1118
1119 if (seq->state != SEQ_STATE_CONTENDED) {
1120 seq->state = SEQ_STATE_CONTENDED;
1121 seq->prev_event_time = sample->time;
1122 ls->nr_contended++;
1123 }
1124 end:
1125 return 0;
1126 }
1127
report_lock_contention_end_event(struct evsel * evsel,struct perf_sample * sample)1128 static int report_lock_contention_end_event(struct evsel *evsel,
1129 struct perf_sample *sample)
1130 {
1131 struct lock_stat *ls;
1132 struct thread_stat *ts;
1133 struct lock_seq_stat *seq;
1134 u64 contended_term;
1135 u64 addr = evsel__intval(evsel, sample, "lock_addr");
1136 u64 key;
1137 int ret;
1138
1139 ret = get_key_by_aggr_mode(&key, addr, evsel, sample);
1140 if (ret < 0)
1141 return ret;
1142
1143 ls = lock_stat_find(key);
1144 if (!ls)
1145 return 0;
1146
1147 ts = thread_stat_find(sample->tid);
1148 if (!ts)
1149 return 0;
1150
1151 seq = get_seq(ts, addr);
1152 if (!seq)
1153 return -ENOMEM;
1154
1155 switch (seq->state) {
1156 case SEQ_STATE_UNINITIALIZED:
1157 goto end;
1158 case SEQ_STATE_CONTENDED:
1159 contended_term = sample->time - seq->prev_event_time;
1160 ls->wait_time_total += contended_term;
1161 if (contended_term < ls->wait_time_min)
1162 ls->wait_time_min = contended_term;
1163 if (ls->wait_time_max < contended_term)
1164 ls->wait_time_max = contended_term;
1165 break;
1166 case SEQ_STATE_ACQUIRING:
1167 case SEQ_STATE_ACQUIRED:
1168 case SEQ_STATE_READ_ACQUIRED:
1169 case SEQ_STATE_RELEASED:
1170 /* broken lock sequence */
1171 if (!ls->broken) {
1172 ls->broken = 1;
1173 bad_hist[BROKEN_ACQUIRED]++;
1174 }
1175 list_del_init(&seq->list);
1176 free(seq);
1177 goto end;
1178 default:
1179 BUG_ON("Unknown state of lock sequence found!\n");
1180 break;
1181 }
1182
1183 seq->state = SEQ_STATE_ACQUIRED;
1184 ls->nr_acquired++;
1185 ls->avg_wait_time = ls->wait_time_total/ls->nr_acquired;
1186 end:
1187 return 0;
1188 }
1189
1190 /* lock oriented handlers */
1191 /* TODO: handlers for CPU oriented, thread oriented */
1192 static struct trace_lock_handler report_lock_ops = {
1193 .acquire_event = report_lock_acquire_event,
1194 .acquired_event = report_lock_acquired_event,
1195 .contended_event = report_lock_contended_event,
1196 .release_event = report_lock_release_event,
1197 .contention_begin_event = report_lock_contention_begin_event,
1198 .contention_end_event = report_lock_contention_end_event,
1199 };
1200
1201 static struct trace_lock_handler contention_lock_ops = {
1202 .contention_begin_event = report_lock_contention_begin_event,
1203 .contention_end_event = report_lock_contention_end_event,
1204 };
1205
1206
1207 static struct trace_lock_handler *trace_handler;
1208
evsel__process_lock_acquire(struct evsel * evsel,struct perf_sample * sample)1209 static int evsel__process_lock_acquire(struct evsel *evsel, struct perf_sample *sample)
1210 {
1211 if (trace_handler->acquire_event)
1212 return trace_handler->acquire_event(evsel, sample);
1213 return 0;
1214 }
1215
evsel__process_lock_acquired(struct evsel * evsel,struct perf_sample * sample)1216 static int evsel__process_lock_acquired(struct evsel *evsel, struct perf_sample *sample)
1217 {
1218 if (trace_handler->acquired_event)
1219 return trace_handler->acquired_event(evsel, sample);
1220 return 0;
1221 }
1222
evsel__process_lock_contended(struct evsel * evsel,struct perf_sample * sample)1223 static int evsel__process_lock_contended(struct evsel *evsel, struct perf_sample *sample)
1224 {
1225 if (trace_handler->contended_event)
1226 return trace_handler->contended_event(evsel, sample);
1227 return 0;
1228 }
1229
evsel__process_lock_release(struct evsel * evsel,struct perf_sample * sample)1230 static int evsel__process_lock_release(struct evsel *evsel, struct perf_sample *sample)
1231 {
1232 if (trace_handler->release_event)
1233 return trace_handler->release_event(evsel, sample);
1234 return 0;
1235 }
1236
evsel__process_contention_begin(struct evsel * evsel,struct perf_sample * sample)1237 static int evsel__process_contention_begin(struct evsel *evsel, struct perf_sample *sample)
1238 {
1239 if (trace_handler->contention_begin_event)
1240 return trace_handler->contention_begin_event(evsel, sample);
1241 return 0;
1242 }
1243
evsel__process_contention_end(struct evsel * evsel,struct perf_sample * sample)1244 static int evsel__process_contention_end(struct evsel *evsel, struct perf_sample *sample)
1245 {
1246 if (trace_handler->contention_end_event)
1247 return trace_handler->contention_end_event(evsel, sample);
1248 return 0;
1249 }
1250
print_bad_events(int bad,int total)1251 static void print_bad_events(int bad, int total)
1252 {
1253 /* Output for debug, this have to be removed */
1254 int i;
1255 int broken = 0;
1256 const char *name[4] =
1257 { "acquire", "acquired", "contended", "release" };
1258
1259 for (i = 0; i < BROKEN_MAX; i++)
1260 broken += bad_hist[i];
1261
1262 if (quiet || total == 0 || (broken == 0 && verbose <= 0))
1263 return;
1264
1265 fprintf(lock_output, "\n=== output for debug ===\n\n");
1266 fprintf(lock_output, "bad: %d, total: %d\n", bad, total);
1267 fprintf(lock_output, "bad rate: %.2f %%\n", (double)bad / (double)total * 100);
1268 fprintf(lock_output, "histogram of events caused bad sequence\n");
1269 for (i = 0; i < BROKEN_MAX; i++)
1270 fprintf(lock_output, " %10s: %d\n", name[i], bad_hist[i]);
1271 }
1272
1273 /* TODO: various way to print, coloring, nano or milli sec */
print_result(void)1274 static void print_result(void)
1275 {
1276 struct lock_stat *st;
1277 struct lock_key *key;
1278 char cut_name[20];
1279 int bad, total, printed;
1280
1281 if (!quiet) {
1282 fprintf(lock_output, "%20s ", "Name");
1283 list_for_each_entry(key, &lock_keys, list)
1284 fprintf(lock_output, "%*s ", key->len, key->header);
1285 fprintf(lock_output, "\n\n");
1286 }
1287
1288 bad = total = printed = 0;
1289 while ((st = pop_from_result())) {
1290 total++;
1291 if (st->broken)
1292 bad++;
1293 if (!st->nr_acquired)
1294 continue;
1295
1296 bzero(cut_name, 20);
1297
1298 if (strlen(st->name) < 20) {
1299 /* output raw name */
1300 const char *name = st->name;
1301
1302 if (show_thread_stats) {
1303 struct thread *t;
1304
1305 /* st->addr contains tid of thread */
1306 t = perf_session__findnew(session, st->addr);
1307 name = thread__comm_str(t);
1308 }
1309
1310 fprintf(lock_output, "%20s ", name);
1311 } else {
1312 strncpy(cut_name, st->name, 16);
1313 cut_name[16] = '.';
1314 cut_name[17] = '.';
1315 cut_name[18] = '.';
1316 cut_name[19] = '\0';
1317 /* cut off name for saving output style */
1318 fprintf(lock_output, "%20s ", cut_name);
1319 }
1320
1321 list_for_each_entry(key, &lock_keys, list) {
1322 key->print(key, st);
1323 fprintf(lock_output, " ");
1324 }
1325 fprintf(lock_output, "\n");
1326
1327 if (++printed >= print_nr_entries)
1328 break;
1329 }
1330
1331 print_bad_events(bad, total);
1332 }
1333
1334 static bool info_threads, info_map;
1335
dump_threads(void)1336 static void dump_threads(void)
1337 {
1338 struct thread_stat *st;
1339 struct rb_node *node;
1340 struct thread *t;
1341
1342 fprintf(lock_output, "%10s: comm\n", "Thread ID");
1343
1344 node = rb_first(&thread_stats);
1345 while (node) {
1346 st = container_of(node, struct thread_stat, rb);
1347 t = perf_session__findnew(session, st->tid);
1348 fprintf(lock_output, "%10d: %s\n", st->tid, thread__comm_str(t));
1349 node = rb_next(node);
1350 thread__put(t);
1351 }
1352 }
1353
compare_maps(struct lock_stat * a,struct lock_stat * b)1354 static int compare_maps(struct lock_stat *a, struct lock_stat *b)
1355 {
1356 int ret;
1357
1358 if (a->name && b->name)
1359 ret = strcmp(a->name, b->name);
1360 else
1361 ret = !!a->name - !!b->name;
1362
1363 if (!ret)
1364 return a->addr < b->addr;
1365 else
1366 return ret < 0;
1367 }
1368
dump_map(void)1369 static void dump_map(void)
1370 {
1371 unsigned int i;
1372 struct lock_stat *st;
1373
1374 fprintf(lock_output, "Address of instance: name of class\n");
1375 for (i = 0; i < LOCKHASH_SIZE; i++) {
1376 hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1377 insert_to_result(st, compare_maps);
1378 }
1379 }
1380
1381 while ((st = pop_from_result()))
1382 fprintf(lock_output, " %#llx: %s\n", (unsigned long long)st->addr, st->name);
1383 }
1384
dump_info(void)1385 static void dump_info(void)
1386 {
1387 if (info_threads)
1388 dump_threads();
1389
1390 if (info_map) {
1391 if (info_threads)
1392 fputc('\n', lock_output);
1393 dump_map();
1394 }
1395 }
1396
1397 static const struct evsel_str_handler lock_tracepoints[] = {
1398 { "lock:lock_acquire", evsel__process_lock_acquire, }, /* CONFIG_LOCKDEP */
1399 { "lock:lock_acquired", evsel__process_lock_acquired, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
1400 { "lock:lock_contended", evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
1401 { "lock:lock_release", evsel__process_lock_release, }, /* CONFIG_LOCKDEP */
1402 };
1403
1404 static const struct evsel_str_handler contention_tracepoints[] = {
1405 { "lock:contention_begin", evsel__process_contention_begin, },
1406 { "lock:contention_end", evsel__process_contention_end, },
1407 };
1408
process_event_update(const struct perf_tool * tool,union perf_event * event,struct evlist ** pevlist)1409 static int process_event_update(const struct perf_tool *tool,
1410 union perf_event *event,
1411 struct evlist **pevlist)
1412 {
1413 int ret;
1414
1415 ret = perf_event__process_event_update(tool, event, pevlist);
1416 if (ret < 0)
1417 return ret;
1418
1419 /* this can return -EEXIST since we call it for each evsel */
1420 perf_session__set_tracepoints_handlers(session, lock_tracepoints);
1421 perf_session__set_tracepoints_handlers(session, contention_tracepoints);
1422 return 0;
1423 }
1424
1425 typedef int (*tracepoint_handler)(struct evsel *evsel,
1426 struct perf_sample *sample);
1427
process_sample_event(const struct perf_tool * tool __maybe_unused,union perf_event * event,struct perf_sample * sample,struct evsel * evsel,struct machine * machine)1428 static int process_sample_event(const struct perf_tool *tool __maybe_unused,
1429 union perf_event *event,
1430 struct perf_sample *sample,
1431 struct evsel *evsel,
1432 struct machine *machine)
1433 {
1434 int err = 0;
1435 struct thread *thread = machine__findnew_thread(machine, sample->pid,
1436 sample->tid);
1437
1438 if (thread == NULL) {
1439 pr_debug("problem processing %d event, skipping it.\n",
1440 event->header.type);
1441 return -1;
1442 }
1443
1444 if (evsel->handler != NULL) {
1445 tracepoint_handler f = evsel->handler;
1446 err = f(evsel, sample);
1447 }
1448
1449 thread__put(thread);
1450
1451 return err;
1452 }
1453
combine_result(void)1454 static void combine_result(void)
1455 {
1456 unsigned int i;
1457 struct lock_stat *st;
1458
1459 if (!combine_locks)
1460 return;
1461
1462 for (i = 0; i < LOCKHASH_SIZE; i++) {
1463 hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1464 combine_lock_stats(st);
1465 }
1466 }
1467 }
1468
sort_result(void)1469 static void sort_result(void)
1470 {
1471 unsigned int i;
1472 struct lock_stat *st;
1473
1474 for (i = 0; i < LOCKHASH_SIZE; i++) {
1475 hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1476 insert_to_result(st, compare);
1477 }
1478 }
1479 }
1480
1481 static const struct {
1482 unsigned int flags;
1483 /*
1484 * Name of the lock flags (access), with delimeter ':'.
1485 * For example, rwsem:R of rwsem:W.
1486 */
1487 const char *flags_name;
1488 /* Name of the lock (type), for example, rwlock or rwsem. */
1489 const char *lock_name;
1490 } lock_type_table[] = {
1491 { 0, "semaphore", "semaphore" },
1492 { LCB_F_SPIN, "spinlock", "spinlock" },
1493 { LCB_F_SPIN | LCB_F_READ, "rwlock:R", "rwlock" },
1494 { LCB_F_SPIN | LCB_F_WRITE, "rwlock:W", "rwlock" },
1495 { LCB_F_READ, "rwsem:R", "rwsem" },
1496 { LCB_F_WRITE, "rwsem:W", "rwsem" },
1497 { LCB_F_RT, "rt-mutex", "rt-mutex" },
1498 { LCB_F_RT | LCB_F_READ, "rwlock-rt:R", "rwlock-rt" },
1499 { LCB_F_RT | LCB_F_WRITE, "rwlock-rt:W", "rwlock-rt" },
1500 { LCB_F_PERCPU | LCB_F_READ, "pcpu-sem:R", "percpu-rwsem" },
1501 { LCB_F_PERCPU | LCB_F_WRITE, "pcpu-sem:W", "percpu-rwsem" },
1502 { LCB_F_MUTEX, "mutex", "mutex" },
1503 { LCB_F_MUTEX | LCB_F_SPIN, "mutex", "mutex" },
1504 /* alias for optimistic spinning only */
1505 { LCB_F_MUTEX | LCB_F_SPIN, "mutex:spin", "mutex-spin" },
1506 };
1507
get_type_flags_name(unsigned int flags)1508 static const char *get_type_flags_name(unsigned int flags)
1509 {
1510 flags &= LCB_F_TYPE_MASK;
1511
1512 for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
1513 if (lock_type_table[i].flags == flags)
1514 return lock_type_table[i].flags_name;
1515 }
1516 return "unknown";
1517 }
1518
get_type_lock_name(unsigned int flags)1519 static const char *get_type_lock_name(unsigned int flags)
1520 {
1521 flags &= LCB_F_TYPE_MASK;
1522
1523 for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
1524 if (lock_type_table[i].flags == flags)
1525 return lock_type_table[i].lock_name;
1526 }
1527 return "unknown";
1528 }
1529
lock_filter_finish(void)1530 static void lock_filter_finish(void)
1531 {
1532 zfree(&filters.types);
1533 filters.nr_types = 0;
1534
1535 zfree(&filters.addrs);
1536 filters.nr_addrs = 0;
1537
1538 for (int i = 0; i < filters.nr_syms; i++)
1539 free(filters.syms[i]);
1540
1541 zfree(&filters.syms);
1542 filters.nr_syms = 0;
1543
1544 zfree(&filters.cgrps);
1545 filters.nr_cgrps = 0;
1546
1547 for (int i = 0; i < filters.nr_slabs; i++)
1548 free(filters.slabs[i]);
1549
1550 zfree(&filters.slabs);
1551 filters.nr_slabs = 0;
1552 }
1553
sort_contention_result(void)1554 static void sort_contention_result(void)
1555 {
1556 sort_result();
1557 }
1558
print_header_stdio(void)1559 static void print_header_stdio(void)
1560 {
1561 struct lock_key *key;
1562
1563 list_for_each_entry(key, &lock_keys, list)
1564 fprintf(lock_output, "%*s ", key->len, key->header);
1565
1566 switch (aggr_mode) {
1567 case LOCK_AGGR_TASK:
1568 fprintf(lock_output, " %10s %s\n\n", "pid",
1569 show_lock_owner ? "owner" : "comm");
1570 break;
1571 case LOCK_AGGR_CALLER:
1572 fprintf(lock_output, " %10s %s\n\n", "type", "caller");
1573 break;
1574 case LOCK_AGGR_ADDR:
1575 fprintf(lock_output, " %16s %s\n\n", "address", "symbol");
1576 break;
1577 case LOCK_AGGR_CGROUP:
1578 fprintf(lock_output, " %s\n\n", "cgroup");
1579 break;
1580 default:
1581 break;
1582 }
1583 }
1584
print_header_csv(const char * sep)1585 static void print_header_csv(const char *sep)
1586 {
1587 struct lock_key *key;
1588
1589 fprintf(lock_output, "# output: ");
1590 list_for_each_entry(key, &lock_keys, list)
1591 fprintf(lock_output, "%s%s ", key->header, sep);
1592
1593 switch (aggr_mode) {
1594 case LOCK_AGGR_TASK:
1595 fprintf(lock_output, "%s%s %s\n", "pid", sep,
1596 show_lock_owner ? "owner" : "comm");
1597 break;
1598 case LOCK_AGGR_CALLER:
1599 fprintf(lock_output, "%s%s %s", "type", sep, "caller");
1600 if (verbose > 0)
1601 fprintf(lock_output, "%s %s", sep, "stacktrace");
1602 fprintf(lock_output, "\n");
1603 break;
1604 case LOCK_AGGR_ADDR:
1605 fprintf(lock_output, "%s%s %s%s %s\n", "address", sep, "symbol", sep, "type");
1606 break;
1607 case LOCK_AGGR_CGROUP:
1608 fprintf(lock_output, "%s\n", "cgroup");
1609 break;
1610 default:
1611 break;
1612 }
1613 }
1614
print_header(void)1615 static void print_header(void)
1616 {
1617 if (!quiet) {
1618 if (symbol_conf.field_sep)
1619 print_header_csv(symbol_conf.field_sep);
1620 else
1621 print_header_stdio();
1622 }
1623 }
1624
print_lock_stat_stdio(struct lock_contention * con,struct lock_stat * st)1625 static void print_lock_stat_stdio(struct lock_contention *con, struct lock_stat *st)
1626 {
1627 struct lock_key *key;
1628 struct thread *t;
1629 int pid;
1630
1631 list_for_each_entry(key, &lock_keys, list) {
1632 key->print(key, st);
1633 fprintf(lock_output, " ");
1634 }
1635
1636 switch (aggr_mode) {
1637 case LOCK_AGGR_CALLER:
1638 fprintf(lock_output, " %10s %s\n", get_type_flags_name(st->flags), st->name);
1639 break;
1640 case LOCK_AGGR_TASK:
1641 pid = st->addr;
1642 t = perf_session__findnew(session, pid);
1643 fprintf(lock_output, " %10d %s\n",
1644 pid, pid == -1 ? "Unknown" : thread__comm_str(t));
1645 break;
1646 case LOCK_AGGR_ADDR:
1647 fprintf(lock_output, " %016llx %s (%s)\n", (unsigned long long)st->addr,
1648 st->name, get_type_lock_name(st->flags));
1649 break;
1650 case LOCK_AGGR_CGROUP:
1651 fprintf(lock_output, " %s\n", st->name);
1652 break;
1653 default:
1654 break;
1655 }
1656
1657 if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) {
1658 struct map *kmap;
1659 struct symbol *sym;
1660 char buf[128];
1661 u64 ip;
1662
1663 for (int i = 0; i < max_stack_depth; i++) {
1664 if (!st->callstack || !st->callstack[i])
1665 break;
1666
1667 ip = st->callstack[i];
1668 sym = machine__find_kernel_symbol(con->machine, ip, &kmap);
1669 get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf));
1670 fprintf(lock_output, "\t\t\t%#lx %s\n", (unsigned long)ip, buf);
1671 }
1672 }
1673 }
1674
print_lock_stat_csv(struct lock_contention * con,struct lock_stat * st,const char * sep)1675 static void print_lock_stat_csv(struct lock_contention *con, struct lock_stat *st,
1676 const char *sep)
1677 {
1678 struct lock_key *key;
1679 struct thread *t;
1680 int pid;
1681
1682 list_for_each_entry(key, &lock_keys, list) {
1683 key->print(key, st);
1684 fprintf(lock_output, "%s ", sep);
1685 }
1686
1687 switch (aggr_mode) {
1688 case LOCK_AGGR_CALLER:
1689 fprintf(lock_output, "%s%s %s", get_type_flags_name(st->flags), sep, st->name);
1690 if (verbose <= 0)
1691 fprintf(lock_output, "\n");
1692 break;
1693 case LOCK_AGGR_TASK:
1694 pid = st->addr;
1695 t = perf_session__findnew(session, pid);
1696 fprintf(lock_output, "%d%s %s\n", pid, sep,
1697 pid == -1 ? "Unknown" : thread__comm_str(t));
1698 break;
1699 case LOCK_AGGR_ADDR:
1700 fprintf(lock_output, "%llx%s %s%s %s\n", (unsigned long long)st->addr, sep,
1701 st->name, sep, get_type_lock_name(st->flags));
1702 break;
1703 case LOCK_AGGR_CGROUP:
1704 fprintf(lock_output, "%s\n",st->name);
1705 break;
1706 default:
1707 break;
1708 }
1709
1710 if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) {
1711 struct map *kmap;
1712 struct symbol *sym;
1713 char buf[128];
1714 u64 ip;
1715
1716 for (int i = 0; i < max_stack_depth; i++) {
1717 if (!st->callstack || !st->callstack[i])
1718 break;
1719
1720 ip = st->callstack[i];
1721 sym = machine__find_kernel_symbol(con->machine, ip, &kmap);
1722 get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf));
1723 fprintf(lock_output, "%s %#lx %s", i ? ":" : sep, (unsigned long) ip, buf);
1724 }
1725 fprintf(lock_output, "\n");
1726 }
1727 }
1728
print_lock_stat(struct lock_contention * con,struct lock_stat * st)1729 static void print_lock_stat(struct lock_contention *con, struct lock_stat *st)
1730 {
1731 if (symbol_conf.field_sep)
1732 print_lock_stat_csv(con, st, symbol_conf.field_sep);
1733 else
1734 print_lock_stat_stdio(con, st);
1735 }
1736
print_footer_stdio(int total,int bad,struct lock_contention_fails * fails)1737 static void print_footer_stdio(int total, int bad, struct lock_contention_fails *fails)
1738 {
1739 /* Output for debug, this have to be removed */
1740 int broken = fails->task + fails->stack + fails->time + fails->data;
1741
1742 if (!use_bpf)
1743 print_bad_events(bad, total);
1744
1745 if (quiet || total == 0 || (broken == 0 && verbose <= 0))
1746 return;
1747
1748 total += broken;
1749 fprintf(lock_output, "\n=== output for debug ===\n\n");
1750 fprintf(lock_output, "bad: %d, total: %d\n", broken, total);
1751 fprintf(lock_output, "bad rate: %.2f %%\n", 100.0 * broken / total);
1752
1753 fprintf(lock_output, "histogram of failure reasons\n");
1754 fprintf(lock_output, " %10s: %d\n", "task", fails->task);
1755 fprintf(lock_output, " %10s: %d\n", "stack", fails->stack);
1756 fprintf(lock_output, " %10s: %d\n", "time", fails->time);
1757 fprintf(lock_output, " %10s: %d\n", "data", fails->data);
1758 }
1759
print_footer_csv(int total,int bad,struct lock_contention_fails * fails,const char * sep)1760 static void print_footer_csv(int total, int bad, struct lock_contention_fails *fails,
1761 const char *sep)
1762 {
1763 /* Output for debug, this have to be removed */
1764 if (use_bpf)
1765 bad = fails->task + fails->stack + fails->time + fails->data;
1766
1767 if (quiet || total == 0 || (bad == 0 && verbose <= 0))
1768 return;
1769
1770 total += bad;
1771 fprintf(lock_output, "# debug: total=%d%s bad=%d", total, sep, bad);
1772
1773 if (use_bpf) {
1774 fprintf(lock_output, "%s bad_%s=%d", sep, "task", fails->task);
1775 fprintf(lock_output, "%s bad_%s=%d", sep, "stack", fails->stack);
1776 fprintf(lock_output, "%s bad_%s=%d", sep, "time", fails->time);
1777 fprintf(lock_output, "%s bad_%s=%d", sep, "data", fails->data);
1778 } else {
1779 int i;
1780 const char *name[4] = { "acquire", "acquired", "contended", "release" };
1781
1782 for (i = 0; i < BROKEN_MAX; i++)
1783 fprintf(lock_output, "%s bad_%s=%d", sep, name[i], bad_hist[i]);
1784 }
1785 fprintf(lock_output, "\n");
1786 }
1787
print_footer(int total,int bad,struct lock_contention_fails * fails)1788 static void print_footer(int total, int bad, struct lock_contention_fails *fails)
1789 {
1790 if (symbol_conf.field_sep)
1791 print_footer_csv(total, bad, fails, symbol_conf.field_sep);
1792 else
1793 print_footer_stdio(total, bad, fails);
1794 }
1795
print_contention_result(struct lock_contention * con)1796 static void print_contention_result(struct lock_contention *con)
1797 {
1798 struct lock_stat *st;
1799 int bad, total, printed;
1800
1801 if (!quiet)
1802 print_header();
1803
1804 bad = total = printed = 0;
1805
1806 while ((st = pop_from_result())) {
1807 total += use_bpf ? st->nr_contended : 1;
1808 if (st->broken)
1809 bad++;
1810
1811 if (!st->wait_time_total)
1812 continue;
1813
1814 print_lock_stat(con, st);
1815
1816 if (++printed >= print_nr_entries)
1817 break;
1818 }
1819
1820 if (con->owner && con->save_callstack && verbose > 0) {
1821 struct rb_root root = RB_ROOT;
1822
1823 if (symbol_conf.field_sep)
1824 fprintf(lock_output, "# owner stack trace:\n");
1825 else
1826 fprintf(lock_output, "\n=== owner stack trace ===\n\n");
1827 while ((st = pop_owner_stack_trace(con)))
1828 insert_to(&root, st, compare);
1829
1830 while ((st = pop_from(&root))) {
1831 print_lock_stat(con, st);
1832 free(st);
1833 }
1834 }
1835
1836 if (print_nr_entries) {
1837 /* update the total/bad stats */
1838 while ((st = pop_from_result())) {
1839 total += use_bpf ? st->nr_contended : 1;
1840 if (st->broken)
1841 bad++;
1842 }
1843 }
1844 /* some entries are collected but hidden by the callstack filter */
1845 total += con->nr_filtered;
1846
1847 print_footer(total, bad, &con->fails);
1848 }
1849
1850 static bool force;
1851
__cmd_report(bool display_info)1852 static int __cmd_report(bool display_info)
1853 {
1854 int err = -EINVAL;
1855 struct perf_tool eops;
1856 struct perf_data data = {
1857 .path = input_name,
1858 .mode = PERF_DATA_MODE_READ,
1859 .force = force,
1860 };
1861
1862 perf_tool__init(&eops, /*ordered_events=*/true);
1863 eops.attr = perf_event__process_attr;
1864 eops.event_update = process_event_update;
1865 eops.sample = process_sample_event;
1866 eops.comm = perf_event__process_comm;
1867 eops.mmap = perf_event__process_mmap;
1868 eops.namespaces = perf_event__process_namespaces;
1869 eops.tracing_data = perf_event__process_tracing_data;
1870 session = perf_session__new(&data, &eops);
1871 if (IS_ERR(session)) {
1872 pr_err("Initializing perf session failed\n");
1873 return PTR_ERR(session);
1874 }
1875
1876 symbol_conf.allow_aliases = true;
1877 symbol__init(&session->header.env);
1878
1879 if (!data.is_pipe) {
1880 if (!perf_session__has_traces(session, "lock record"))
1881 goto out_delete;
1882
1883 if (perf_session__set_tracepoints_handlers(session, lock_tracepoints)) {
1884 pr_err("Initializing perf session tracepoint handlers failed\n");
1885 goto out_delete;
1886 }
1887
1888 if (perf_session__set_tracepoints_handlers(session, contention_tracepoints)) {
1889 pr_err("Initializing perf session tracepoint handlers failed\n");
1890 goto out_delete;
1891 }
1892 }
1893
1894 if (setup_output_field(false, output_fields))
1895 goto out_delete;
1896
1897 if (select_key(false))
1898 goto out_delete;
1899
1900 if (show_thread_stats)
1901 aggr_mode = LOCK_AGGR_TASK;
1902
1903 err = perf_session__process_events(session);
1904 if (err)
1905 goto out_delete;
1906
1907 setup_pager();
1908 if (display_info) /* used for info subcommand */
1909 dump_info();
1910 else {
1911 combine_result();
1912 sort_result();
1913 print_result();
1914 }
1915
1916 out_delete:
1917 perf_session__delete(session);
1918 return err;
1919 }
1920
sighandler(int sig __maybe_unused)1921 static void sighandler(int sig __maybe_unused)
1922 {
1923 }
1924
check_lock_contention_options(const struct option * options,const char * const * usage)1925 static int check_lock_contention_options(const struct option *options,
1926 const char * const *usage)
1927
1928 {
1929 if (show_thread_stats && show_lock_addrs) {
1930 pr_err("Cannot use thread and addr mode together\n");
1931 parse_options_usage(usage, options, "threads", 0);
1932 parse_options_usage(NULL, options, "lock-addr", 0);
1933 return -1;
1934 }
1935
1936 if (show_lock_owner && !use_bpf) {
1937 pr_err("Lock owners are available only with BPF\n");
1938 parse_options_usage(usage, options, "lock-owner", 0);
1939 parse_options_usage(NULL, options, "use-bpf", 0);
1940 return -1;
1941 }
1942
1943 if (show_lock_owner && show_lock_addrs) {
1944 pr_err("Cannot use owner and addr mode together\n");
1945 parse_options_usage(usage, options, "lock-owner", 0);
1946 parse_options_usage(NULL, options, "lock-addr", 0);
1947 return -1;
1948 }
1949
1950 if (show_lock_cgroups && !use_bpf) {
1951 pr_err("Cgroups are available only with BPF\n");
1952 parse_options_usage(usage, options, "lock-cgroup", 0);
1953 parse_options_usage(NULL, options, "use-bpf", 0);
1954 return -1;
1955 }
1956
1957 if (show_lock_cgroups && show_lock_addrs) {
1958 pr_err("Cannot use cgroup and addr mode together\n");
1959 parse_options_usage(usage, options, "lock-cgroup", 0);
1960 parse_options_usage(NULL, options, "lock-addr", 0);
1961 return -1;
1962 }
1963
1964 if (show_lock_cgroups && show_thread_stats) {
1965 pr_err("Cannot use cgroup and thread mode together\n");
1966 parse_options_usage(usage, options, "lock-cgroup", 0);
1967 parse_options_usage(NULL, options, "threads", 0);
1968 return -1;
1969 }
1970
1971 if (symbol_conf.field_sep) {
1972 if (strstr(symbol_conf.field_sep, ":") || /* part of type flags */
1973 strstr(symbol_conf.field_sep, "+") || /* part of caller offset */
1974 strstr(symbol_conf.field_sep, ".")) { /* can be in a symbol name */
1975 pr_err("Cannot use the separator that is already used\n");
1976 parse_options_usage(usage, options, "x", 1);
1977 return -1;
1978 }
1979 }
1980
1981 if (show_lock_owner && !show_thread_stats) {
1982 pr_warning("Now -o try to show owner's callstack instead of pid and comm.\n");
1983 pr_warning("Please use -t option too to keep the old behavior.\n");
1984 }
1985
1986 return 0;
1987 }
1988
__cmd_contention(int argc,const char ** argv)1989 static int __cmd_contention(int argc, const char **argv)
1990 {
1991 int err = -EINVAL;
1992 struct perf_tool eops;
1993 struct perf_data data = {
1994 .path = input_name,
1995 .mode = PERF_DATA_MODE_READ,
1996 .force = force,
1997 };
1998 struct lock_contention con = {
1999 .target = &target,
2000 .map_nr_entries = bpf_map_entries,
2001 .max_stack = max_stack_depth,
2002 .stack_skip = stack_skip,
2003 .filters = &filters,
2004 .save_callstack = needs_callstack(),
2005 .owner = show_lock_owner,
2006 .cgroups = RB_ROOT,
2007 };
2008
2009 lockhash_table = calloc(LOCKHASH_SIZE, sizeof(*lockhash_table));
2010 if (!lockhash_table)
2011 return -ENOMEM;
2012
2013 con.result = &lockhash_table[0];
2014
2015 perf_tool__init(&eops, /*ordered_events=*/true);
2016 eops.attr = perf_event__process_attr;
2017 eops.event_update = process_event_update;
2018 eops.sample = process_sample_event;
2019 eops.comm = perf_event__process_comm;
2020 eops.mmap = perf_event__process_mmap;
2021 eops.tracing_data = perf_event__process_tracing_data;
2022
2023 session = perf_session__new(use_bpf ? NULL : &data, &eops);
2024 if (IS_ERR(session)) {
2025 pr_err("Initializing perf session failed\n");
2026 err = PTR_ERR(session);
2027 session = NULL;
2028 goto out_delete;
2029 }
2030
2031 con.machine = &session->machines.host;
2032
2033 con.aggr_mode = aggr_mode = show_thread_stats ? LOCK_AGGR_TASK :
2034 show_lock_addrs ? LOCK_AGGR_ADDR :
2035 show_lock_cgroups ? LOCK_AGGR_CGROUP : LOCK_AGGR_CALLER;
2036
2037 if (con.aggr_mode == LOCK_AGGR_CALLER)
2038 con.save_callstack = true;
2039
2040 symbol_conf.allow_aliases = true;
2041 symbol__init(&session->header.env);
2042
2043 if (use_bpf) {
2044 err = target__validate(&target);
2045 if (err) {
2046 char errbuf[512];
2047
2048 target__strerror(&target, err, errbuf, 512);
2049 pr_err("%s\n", errbuf);
2050 goto out_delete;
2051 }
2052
2053 signal(SIGINT, sighandler);
2054 signal(SIGCHLD, sighandler);
2055 signal(SIGTERM, sighandler);
2056
2057 con.evlist = evlist__new();
2058 if (con.evlist == NULL) {
2059 err = -ENOMEM;
2060 goto out_delete;
2061 }
2062
2063 err = evlist__create_maps(con.evlist, &target);
2064 if (err < 0)
2065 goto out_delete;
2066
2067 if (argc) {
2068 err = evlist__prepare_workload(con.evlist, &target,
2069 argv, false, NULL);
2070 if (err < 0)
2071 goto out_delete;
2072 }
2073
2074 err = lock_contention_prepare(&con);
2075 if (err < 0) {
2076 pr_err("lock contention BPF setup failed\n");
2077 goto out_delete;
2078 }
2079 } else if (!data.is_pipe) {
2080 if (!perf_session__has_traces(session, "lock record"))
2081 goto out_delete;
2082
2083 if (!evlist__find_evsel_by_str(session->evlist,
2084 "lock:contention_begin")) {
2085 pr_err("lock contention evsel not found\n");
2086 goto out_delete;
2087 }
2088
2089 if (perf_session__set_tracepoints_handlers(session,
2090 contention_tracepoints)) {
2091 pr_err("Initializing perf session tracepoint handlers failed\n");
2092 goto out_delete;
2093 }
2094 }
2095
2096 err = setup_output_field(true, output_fields);
2097 if (err) {
2098 pr_err("Failed to setup output field\n");
2099 goto out_delete;
2100 }
2101
2102 err = select_key(true);
2103 if (err)
2104 goto out_delete;
2105
2106 if (symbol_conf.field_sep) {
2107 int i;
2108 struct lock_key *keys = contention_keys;
2109
2110 /* do not align output in CSV format */
2111 for (i = 0; keys[i].name; i++)
2112 keys[i].len = 0;
2113 }
2114
2115 if (use_bpf) {
2116 lock_contention_start();
2117 if (argc)
2118 evlist__start_workload(con.evlist);
2119
2120 /* wait for signal */
2121 pause();
2122
2123 lock_contention_stop();
2124 lock_contention_read(&con);
2125 } else {
2126 err = perf_session__process_events(session);
2127 if (err)
2128 goto out_delete;
2129 }
2130
2131 setup_pager();
2132
2133 sort_contention_result();
2134 print_contention_result(&con);
2135
2136 out_delete:
2137 lock_filter_finish();
2138 evlist__delete(con.evlist);
2139 lock_contention_finish(&con);
2140 perf_session__delete(session);
2141 zfree(&lockhash_table);
2142 return err;
2143 }
2144
2145
__cmd_record(int argc,const char ** argv)2146 static int __cmd_record(int argc, const char **argv)
2147 {
2148 const char *record_args[] = {
2149 "record", "-R", "-m", "1024", "-c", "1", "--synth", "task",
2150 };
2151 const char *callgraph_args[] = {
2152 "--call-graph", "fp," __stringify(CONTENTION_STACK_DEPTH),
2153 };
2154 unsigned int rec_argc, i, j, ret;
2155 unsigned int nr_tracepoints;
2156 unsigned int nr_callgraph_args = 0;
2157 const char **rec_argv;
2158 bool has_lock_stat = true;
2159
2160 for (i = 0; i < ARRAY_SIZE(lock_tracepoints); i++) {
2161 if (!is_valid_tracepoint(lock_tracepoints[i].name)) {
2162 pr_debug("tracepoint %s is not enabled. "
2163 "Are CONFIG_LOCKDEP and CONFIG_LOCK_STAT enabled?\n",
2164 lock_tracepoints[i].name);
2165 has_lock_stat = false;
2166 break;
2167 }
2168 }
2169
2170 if (has_lock_stat)
2171 goto setup_args;
2172
2173 for (i = 0; i < ARRAY_SIZE(contention_tracepoints); i++) {
2174 if (!is_valid_tracepoint(contention_tracepoints[i].name)) {
2175 pr_err("tracepoint %s is not enabled.\n",
2176 contention_tracepoints[i].name);
2177 return 1;
2178 }
2179 }
2180
2181 nr_callgraph_args = ARRAY_SIZE(callgraph_args);
2182
2183 setup_args:
2184 rec_argc = ARRAY_SIZE(record_args) + nr_callgraph_args + argc - 1;
2185
2186 if (has_lock_stat)
2187 nr_tracepoints = ARRAY_SIZE(lock_tracepoints);
2188 else
2189 nr_tracepoints = ARRAY_SIZE(contention_tracepoints);
2190
2191 /* factor of 2 is for -e in front of each tracepoint */
2192 rec_argc += 2 * nr_tracepoints;
2193
2194 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2195 if (!rec_argv)
2196 return -ENOMEM;
2197
2198 for (i = 0; i < ARRAY_SIZE(record_args); i++)
2199 rec_argv[i] = record_args[i];
2200
2201 for (j = 0; j < nr_tracepoints; j++) {
2202 rec_argv[i++] = "-e";
2203 rec_argv[i++] = has_lock_stat
2204 ? lock_tracepoints[j].name
2205 : contention_tracepoints[j].name;
2206 }
2207
2208 for (j = 0; j < nr_callgraph_args; j++, i++)
2209 rec_argv[i] = callgraph_args[j];
2210
2211 for (j = 1; j < (unsigned int)argc; j++, i++)
2212 rec_argv[i] = argv[j];
2213
2214 BUG_ON(i != rec_argc);
2215
2216 ret = cmd_record(i, rec_argv);
2217 free(rec_argv);
2218 return ret;
2219 }
2220
parse_map_entry(const struct option * opt,const char * str,int unset __maybe_unused)2221 static int parse_map_entry(const struct option *opt, const char *str,
2222 int unset __maybe_unused)
2223 {
2224 unsigned long *len = (unsigned long *)opt->value;
2225 unsigned long val;
2226 char *endptr;
2227
2228 errno = 0;
2229 val = strtoul(str, &endptr, 0);
2230 if (*endptr != '\0' || errno != 0) {
2231 pr_err("invalid BPF map length: %s\n", str);
2232 return -1;
2233 }
2234
2235 *len = val;
2236 return 0;
2237 }
2238
parse_max_stack(const struct option * opt,const char * str,int unset __maybe_unused)2239 static int parse_max_stack(const struct option *opt, const char *str,
2240 int unset __maybe_unused)
2241 {
2242 unsigned long *len = (unsigned long *)opt->value;
2243 long val;
2244 char *endptr;
2245
2246 errno = 0;
2247 val = strtol(str, &endptr, 0);
2248 if (*endptr != '\0' || errno != 0) {
2249 pr_err("invalid max stack depth: %s\n", str);
2250 return -1;
2251 }
2252
2253 if (val < 0 || val > sysctl__max_stack()) {
2254 pr_err("invalid max stack depth: %ld\n", val);
2255 return -1;
2256 }
2257
2258 *len = val;
2259 return 0;
2260 }
2261
add_lock_type(unsigned int flags)2262 static bool add_lock_type(unsigned int flags)
2263 {
2264 unsigned int *tmp;
2265
2266 tmp = realloc(filters.types, (filters.nr_types + 1) * sizeof(*filters.types));
2267 if (tmp == NULL)
2268 return false;
2269
2270 tmp[filters.nr_types++] = flags;
2271 filters.types = tmp;
2272 return true;
2273 }
2274
parse_lock_type(const struct option * opt __maybe_unused,const char * str,int unset __maybe_unused)2275 static int parse_lock_type(const struct option *opt __maybe_unused, const char *str,
2276 int unset __maybe_unused)
2277 {
2278 char *s, *tmp, *tok;
2279
2280 s = strdup(str);
2281 if (s == NULL)
2282 return -1;
2283
2284 for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2285 bool found = false;
2286
2287 /* `tok` is a flags name if it contains ':'. */
2288 if (strchr(tok, ':')) {
2289 for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
2290 if (!strcmp(lock_type_table[i].flags_name, tok) &&
2291 add_lock_type(lock_type_table[i].flags)) {
2292 found = true;
2293 break;
2294 }
2295 }
2296
2297 if (!found) {
2298 pr_err("Unknown lock flags name: %s\n", tok);
2299 free(s);
2300 return -1;
2301 }
2302
2303 continue;
2304 }
2305
2306 /*
2307 * Otherwise `tok` is a lock name.
2308 * Single lock name could contain multiple flags.
2309 * Replace alias `pcpu-sem` with actual name `percpu-rwsem.
2310 */
2311 if (!strcmp(tok, "pcpu-sem"))
2312 tok = (char *)"percpu-rwsem";
2313 for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
2314 if (!strcmp(lock_type_table[i].lock_name, tok)) {
2315 if (add_lock_type(lock_type_table[i].flags)) {
2316 found = true;
2317 } else {
2318 free(s);
2319 return -1;
2320 }
2321 }
2322 }
2323
2324 if (!found) {
2325 pr_err("Unknown lock name: %s\n", tok);
2326 free(s);
2327 return -1;
2328 }
2329
2330 }
2331
2332 free(s);
2333 return 0;
2334 }
2335
add_lock_addr(unsigned long addr)2336 static bool add_lock_addr(unsigned long addr)
2337 {
2338 unsigned long *tmp;
2339
2340 tmp = realloc(filters.addrs, (filters.nr_addrs + 1) * sizeof(*filters.addrs));
2341 if (tmp == NULL) {
2342 pr_err("Memory allocation failure\n");
2343 return false;
2344 }
2345
2346 tmp[filters.nr_addrs++] = addr;
2347 filters.addrs = tmp;
2348 return true;
2349 }
2350
add_lock_sym(char * name)2351 static bool add_lock_sym(char *name)
2352 {
2353 char **tmp;
2354 char *sym = strdup(name);
2355
2356 if (sym == NULL) {
2357 pr_err("Memory allocation failure\n");
2358 return false;
2359 }
2360
2361 tmp = realloc(filters.syms, (filters.nr_syms + 1) * sizeof(*filters.syms));
2362 if (tmp == NULL) {
2363 pr_err("Memory allocation failure\n");
2364 free(sym);
2365 return false;
2366 }
2367
2368 tmp[filters.nr_syms++] = sym;
2369 filters.syms = tmp;
2370 return true;
2371 }
2372
add_lock_slab(char * name)2373 static bool add_lock_slab(char *name)
2374 {
2375 char **tmp;
2376 char *sym = strdup(name);
2377
2378 if (sym == NULL) {
2379 pr_err("Memory allocation failure\n");
2380 return false;
2381 }
2382
2383 tmp = realloc(filters.slabs, (filters.nr_slabs + 1) * sizeof(*filters.slabs));
2384 if (tmp == NULL) {
2385 pr_err("Memory allocation failure\n");
2386 return false;
2387 }
2388
2389 tmp[filters.nr_slabs++] = sym;
2390 filters.slabs = tmp;
2391 return true;
2392 }
2393
parse_lock_addr(const struct option * opt __maybe_unused,const char * str,int unset __maybe_unused)2394 static int parse_lock_addr(const struct option *opt __maybe_unused, const char *str,
2395 int unset __maybe_unused)
2396 {
2397 char *s, *tmp, *tok;
2398 int ret = 0;
2399 u64 addr;
2400
2401 s = strdup(str);
2402 if (s == NULL)
2403 return -1;
2404
2405 for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2406 char *end;
2407
2408 addr = strtoul(tok, &end, 16);
2409 if (*end == '\0') {
2410 if (!add_lock_addr(addr)) {
2411 ret = -1;
2412 break;
2413 }
2414 continue;
2415 }
2416
2417 if (*tok == '&') {
2418 if (!add_lock_slab(tok + 1)) {
2419 ret = -1;
2420 break;
2421 }
2422 continue;
2423 }
2424
2425 /*
2426 * At this moment, we don't have kernel symbols. Save the symbols
2427 * in a separate list and resolve them to addresses later.
2428 */
2429 if (!add_lock_sym(tok)) {
2430 ret = -1;
2431 break;
2432 }
2433 }
2434
2435 free(s);
2436 return ret;
2437 }
2438
parse_output(const struct option * opt __maybe_unused,const char * str,int unset __maybe_unused)2439 static int parse_output(const struct option *opt __maybe_unused, const char *str,
2440 int unset __maybe_unused)
2441 {
2442 const char **name = (const char **)opt->value;
2443
2444 if (str == NULL)
2445 return -1;
2446
2447 lock_output = fopen(str, "w");
2448 if (lock_output == NULL) {
2449 pr_err("Cannot open %s\n", str);
2450 return -1;
2451 }
2452
2453 *name = str;
2454 return 0;
2455 }
2456
add_lock_cgroup(char * name)2457 static bool add_lock_cgroup(char *name)
2458 {
2459 u64 *tmp;
2460 struct cgroup *cgrp;
2461
2462 cgrp = cgroup__new(name, /*do_open=*/false);
2463 if (cgrp == NULL) {
2464 pr_err("Failed to create cgroup: %s\n", name);
2465 return false;
2466 }
2467
2468 if (read_cgroup_id(cgrp) < 0) {
2469 pr_err("Failed to read cgroup id for %s\n", name);
2470 cgroup__put(cgrp);
2471 return false;
2472 }
2473
2474 tmp = realloc(filters.cgrps, (filters.nr_cgrps + 1) * sizeof(*filters.cgrps));
2475 if (tmp == NULL) {
2476 pr_err("Memory allocation failure\n");
2477 return false;
2478 }
2479
2480 tmp[filters.nr_cgrps++] = cgrp->id;
2481 filters.cgrps = tmp;
2482 cgroup__put(cgrp);
2483 return true;
2484 }
2485
parse_cgroup_filter(const struct option * opt __maybe_unused,const char * str,int unset __maybe_unused)2486 static int parse_cgroup_filter(const struct option *opt __maybe_unused, const char *str,
2487 int unset __maybe_unused)
2488 {
2489 char *s, *tmp, *tok;
2490 int ret = 0;
2491
2492 s = strdup(str);
2493 if (s == NULL)
2494 return -1;
2495
2496 for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2497 if (!add_lock_cgroup(tok)) {
2498 ret = -1;
2499 break;
2500 }
2501 }
2502
2503 free(s);
2504 return ret;
2505 }
2506
cmd_lock(int argc,const char ** argv)2507 int cmd_lock(int argc, const char **argv)
2508 {
2509 const struct option lock_options[] = {
2510 OPT_STRING('i', "input", &input_name, "file", "input file name"),
2511 OPT_CALLBACK(0, "output", &output_name, "file", "output file name", parse_output),
2512 OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"),
2513 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"),
2514 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
2515 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
2516 "file", "vmlinux pathname"),
2517 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
2518 "file", "kallsyms pathname"),
2519 OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any warnings or messages"),
2520 OPT_END()
2521 };
2522
2523 const struct option info_options[] = {
2524 OPT_BOOLEAN('t', "threads", &info_threads,
2525 "dump the thread list in perf.data"),
2526 OPT_BOOLEAN('m', "map", &info_map,
2527 "dump the map of lock instances (address:name table)"),
2528 OPT_PARENT(lock_options)
2529 };
2530
2531 const struct option report_options[] = {
2532 OPT_STRING('k', "key", &sort_key, "acquired",
2533 "key for sorting (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"),
2534 OPT_STRING('F', "field", &output_fields, NULL,
2535 "output fields (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"),
2536 /* TODO: type */
2537 OPT_BOOLEAN('c', "combine-locks", &combine_locks,
2538 "combine locks in the same class"),
2539 OPT_BOOLEAN('t', "threads", &show_thread_stats,
2540 "show per-thread lock stats"),
2541 OPT_INTEGER('E', "entries", &print_nr_entries, "display this many functions"),
2542 OPT_PARENT(lock_options)
2543 };
2544
2545 struct option contention_options[] = {
2546 OPT_STRING('k', "key", &sort_key, "wait_total",
2547 "key for sorting (contended / wait_total / wait_max / wait_min / avg_wait)"),
2548 OPT_STRING('F', "field", &output_fields, "contended,wait_total,wait_max,avg_wait",
2549 "output fields (contended / wait_total / wait_max / wait_min / avg_wait)"),
2550 OPT_BOOLEAN('t', "threads", &show_thread_stats,
2551 "show per-thread lock stats"),
2552 OPT_BOOLEAN('b', "use-bpf", &use_bpf, "use BPF program to collect lock contention stats"),
2553 OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
2554 "System-wide collection from all CPUs"),
2555 OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
2556 "List of cpus to monitor"),
2557 OPT_STRING('p', "pid", &target.pid, "pid",
2558 "Trace on existing process id"),
2559 OPT_STRING(0, "tid", &target.tid, "tid",
2560 "Trace on existing thread id (exclusive to --pid)"),
2561 OPT_CALLBACK('M', "map-nr-entries", &bpf_map_entries, "num",
2562 "Max number of BPF map entries", parse_map_entry),
2563 OPT_CALLBACK(0, "max-stack", &max_stack_depth, "num",
2564 "Set the maximum stack depth when collecting lock contention, "
2565 "Default: " __stringify(CONTENTION_STACK_DEPTH), parse_max_stack),
2566 OPT_INTEGER(0, "stack-skip", &stack_skip,
2567 "Set the number of stack depth to skip when finding a lock caller, "
2568 "Default: " __stringify(CONTENTION_STACK_SKIP)),
2569 OPT_INTEGER('E', "entries", &print_nr_entries, "display this many functions"),
2570 OPT_BOOLEAN('l', "lock-addr", &show_lock_addrs, "show lock stats by address"),
2571 OPT_CALLBACK('Y', "type-filter", NULL, "FLAGS",
2572 "Filter specific type of locks", parse_lock_type),
2573 OPT_CALLBACK('L', "lock-filter", NULL, "ADDRS/NAMES",
2574 "Filter specific address/symbol of locks", parse_lock_addr),
2575 OPT_CALLBACK('S', "callstack-filter", NULL, "NAMES",
2576 "Filter specific function in the callstack", parse_call_stack),
2577 OPT_BOOLEAN('o', "lock-owner", &show_lock_owner, "show lock owners instead of waiters"),
2578 OPT_STRING_NOEMPTY('x', "field-separator", &symbol_conf.field_sep, "separator",
2579 "print result in CSV format with custom separator"),
2580 OPT_BOOLEAN(0, "lock-cgroup", &show_lock_cgroups, "show lock stats by cgroup"),
2581 OPT_CALLBACK('G', "cgroup-filter", NULL, "CGROUPS",
2582 "Filter specific cgroups", parse_cgroup_filter),
2583 OPT_PARENT(lock_options)
2584 };
2585
2586 const char * const info_usage[] = {
2587 "perf lock info [<options>]",
2588 NULL
2589 };
2590 const char *const lock_subcommands[] = { "record", "report", "script",
2591 "info", "contention", NULL };
2592 const char *lock_usage[] = {
2593 NULL,
2594 NULL
2595 };
2596 const char * const report_usage[] = {
2597 "perf lock report [<options>]",
2598 NULL
2599 };
2600 const char * const contention_usage[] = {
2601 "perf lock contention [<options>]",
2602 NULL
2603 };
2604 unsigned int i;
2605 int rc = 0;
2606
2607 lockhash_table = calloc(LOCKHASH_SIZE, sizeof(*lockhash_table));
2608 if (!lockhash_table)
2609 return -ENOMEM;
2610
2611 for (i = 0; i < LOCKHASH_SIZE; i++)
2612 INIT_HLIST_HEAD(lockhash_table + i);
2613
2614 lock_output = stderr;
2615 argc = parse_options_subcommand(argc, argv, lock_options, lock_subcommands,
2616 lock_usage, PARSE_OPT_STOP_AT_NON_OPTION);
2617 if (!argc)
2618 usage_with_options(lock_usage, lock_options);
2619
2620 if (strlen(argv[0]) > 2 && strstarts("record", argv[0])) {
2621 return __cmd_record(argc, argv);
2622 } else if (strlen(argv[0]) > 2 && strstarts("report", argv[0])) {
2623 trace_handler = &report_lock_ops;
2624 if (argc) {
2625 argc = parse_options(argc, argv,
2626 report_options, report_usage, 0);
2627 if (argc)
2628 usage_with_options(report_usage, report_options);
2629 }
2630 rc = __cmd_report(false);
2631 } else if (!strcmp(argv[0], "script")) {
2632 /* Aliased to 'perf script' */
2633 rc = cmd_script(argc, argv);
2634 } else if (!strcmp(argv[0], "info")) {
2635 if (argc) {
2636 argc = parse_options(argc, argv,
2637 info_options, info_usage, 0);
2638 if (argc)
2639 usage_with_options(info_usage, info_options);
2640 }
2641
2642 /* If neither threads nor map requested, display both */
2643 if (!info_threads && !info_map) {
2644 info_threads = true;
2645 info_map = true;
2646 }
2647
2648 /* recycling report_lock_ops */
2649 trace_handler = &report_lock_ops;
2650 rc = __cmd_report(true);
2651 } else if (strlen(argv[0]) > 2 && strstarts("contention", argv[0])) {
2652 trace_handler = &contention_lock_ops;
2653 sort_key = "wait_total";
2654 output_fields = "contended,wait_total,wait_max,avg_wait";
2655
2656 #ifndef HAVE_BPF_SKEL
2657 set_option_nobuild(contention_options, 'b', "use-bpf",
2658 "no BUILD_BPF_SKEL=1", false);
2659 #endif
2660 if (argc) {
2661 argc = parse_options(argc, argv, contention_options,
2662 contention_usage, 0);
2663 }
2664
2665 if (check_lock_contention_options(contention_options,
2666 contention_usage) < 0)
2667 return -1;
2668
2669 rc = __cmd_contention(argc, argv);
2670 } else {
2671 usage_with_options(lock_usage, lock_options);
2672 }
2673
2674 /* free usage string allocated by parse_options_subcommand */
2675 free((void *)lock_usage[0]);
2676
2677 zfree(&lockhash_table);
2678 return rc;
2679 }
2680