1 #include <stdio.h>
2 #include <fcntl.h>
3 #include <stdint.h>
4 #include <stdlib.h>
5 
6 #include <linux/err.h>
7 
8 #include "util/ftrace.h"
9 #include "util/cpumap.h"
10 #include "util/thread_map.h"
11 #include "util/debug.h"
12 #include "util/evlist.h"
13 #include "util/bpf_counter.h"
14 #include "util/stat.h"
15 
16 #include "util/bpf_skel/func_latency.skel.h"
17 
18 static struct func_latency_bpf *skel;
19 
perf_ftrace__latency_prepare_bpf(struct perf_ftrace * ftrace)20 int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
21 {
22 	int fd, err;
23 	int i, ncpus = 1, ntasks = 1;
24 	struct filter_entry *func;
25 
26 	if (!list_is_singular(&ftrace->filters)) {
27 		pr_err("ERROR: %s target function(s).\n",
28 		       list_empty(&ftrace->filters) ? "No" : "Too many");
29 		return -1;
30 	}
31 
32 	func = list_first_entry(&ftrace->filters, struct filter_entry, list);
33 
34 	skel = func_latency_bpf__open();
35 	if (!skel) {
36 		pr_err("Failed to open func latency skeleton\n");
37 		return -1;
38 	}
39 
40 	skel->rodata->bucket_range = ftrace->bucket_range;
41 	skel->rodata->min_latency = ftrace->min_latency;
42 	skel->rodata->bucket_num = ftrace->bucket_num;
43 	if (ftrace->bucket_range && ftrace->bucket_num) {
44 		bpf_map__set_max_entries(skel->maps.latency, ftrace->bucket_num);
45 	}
46 
47 	/* don't need to set cpu filter for system-wide mode */
48 	if (ftrace->target.cpu_list) {
49 		ncpus = perf_cpu_map__nr(ftrace->evlist->core.user_requested_cpus);
50 		bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus);
51 		skel->rodata->has_cpu = 1;
52 	}
53 
54 	if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) {
55 		ntasks = perf_thread_map__nr(ftrace->evlist->core.threads);
56 		bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
57 		skel->rodata->has_task = 1;
58 	}
59 
60 	skel->rodata->use_nsec = ftrace->use_nsec;
61 
62 	set_max_rlimit();
63 
64 	err = func_latency_bpf__load(skel);
65 	if (err) {
66 		pr_err("Failed to load func latency skeleton\n");
67 		goto out;
68 	}
69 
70 	if (ftrace->target.cpu_list) {
71 		u32 cpu;
72 		u8 val = 1;
73 
74 		fd = bpf_map__fd(skel->maps.cpu_filter);
75 
76 		for (i = 0; i < ncpus; i++) {
77 			cpu = perf_cpu_map__cpu(ftrace->evlist->core.user_requested_cpus, i).cpu;
78 			bpf_map_update_elem(fd, &cpu, &val, BPF_ANY);
79 		}
80 	}
81 
82 	if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) {
83 		u32 pid;
84 		u8 val = 1;
85 
86 		fd = bpf_map__fd(skel->maps.task_filter);
87 
88 		for (i = 0; i < ntasks; i++) {
89 			pid = perf_thread_map__pid(ftrace->evlist->core.threads, i);
90 			bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
91 		}
92 	}
93 
94 	skel->bss->min = INT64_MAX;
95 
96 	skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin,
97 							    false, func->name);
98 	if (IS_ERR(skel->links.func_begin)) {
99 		pr_err("Failed to attach fentry program\n");
100 		err = PTR_ERR(skel->links.func_begin);
101 		goto out;
102 	}
103 
104 	skel->links.func_end = bpf_program__attach_kprobe(skel->progs.func_end,
105 							  true, func->name);
106 	if (IS_ERR(skel->links.func_end)) {
107 		pr_err("Failed to attach fexit program\n");
108 		err = PTR_ERR(skel->links.func_end);
109 		goto out;
110 	}
111 
112 	/* XXX: we don't actually use this fd - just for poll() */
113 	return open("/dev/null", O_RDONLY);
114 
115 out:
116 	return err;
117 }
118 
perf_ftrace__latency_start_bpf(struct perf_ftrace * ftrace __maybe_unused)119 int perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace __maybe_unused)
120 {
121 	skel->bss->enabled = 1;
122 	return 0;
123 }
124 
perf_ftrace__latency_stop_bpf(struct perf_ftrace * ftrace __maybe_unused)125 int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused)
126 {
127 	skel->bss->enabled = 0;
128 	return 0;
129 }
130 
perf_ftrace__latency_read_bpf(struct perf_ftrace * ftrace,int buckets[],struct stats * stats)131 int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace,
132 				  int buckets[], struct stats *stats)
133 {
134 	int i, fd, err;
135 	u32 idx;
136 	u64 *hist;
137 	int ncpus = cpu__max_cpu().cpu;
138 
139 	fd = bpf_map__fd(skel->maps.latency);
140 
141 	hist = calloc(ncpus, sizeof(*hist));
142 	if (hist == NULL)
143 		return -ENOMEM;
144 
145 	for (idx = 0; idx < skel->rodata->bucket_num; idx++) {
146 		err = bpf_map_lookup_elem(fd, &idx, hist);
147 		if (err) {
148 			buckets[idx] = 0;
149 			continue;
150 		}
151 
152 		for (i = 0; i < ncpus; i++)
153 			buckets[idx] += hist[i];
154 	}
155 
156 	if (skel->bss->count) {
157 		stats->mean = skel->bss->total / skel->bss->count;
158 		stats->n = skel->bss->count;
159 		stats->max = skel->bss->max;
160 		stats->min = skel->bss->min;
161 
162 		if (!ftrace->use_nsec) {
163 			stats->mean /= 1000;
164 			stats->max /= 1000;
165 			stats->min /= 1000;
166 		}
167 	}
168 
169 	free(hist);
170 	return 0;
171 }
172 
perf_ftrace__latency_cleanup_bpf(struct perf_ftrace * ftrace __maybe_unused)173 int perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace __maybe_unused)
174 {
175 	func_latency_bpf__destroy(skel);
176 	return 0;
177 }
178