1 /* SPDX-License-Identifier: GPL-2.0 */
2 #include <inttypes.h>
3 #include <math.h>
4 #include <stdio.h>
5 #include <stdlib.h>
6
7 #include "dwarf-regs.h" /* for EM_HOST */
8 #include "syscalltbl.h"
9 #include "util/cgroup.h"
10 #include "util/hashmap.h"
11 #include "util/trace.h"
12 #include "util/util.h"
13 #include <bpf/bpf.h>
14 #include <linux/rbtree.h>
15 #include <linux/time64.h>
16 #include <tools/libc_compat.h> /* reallocarray */
17
18 #include "bpf_skel/syscall_summary.h"
19 #include "bpf_skel/syscall_summary.skel.h"
20
21
22 static struct syscall_summary_bpf *skel;
23 static struct rb_root cgroups = RB_ROOT;
24
trace_prepare_bpf_summary(enum trace_summary_mode mode)25 int trace_prepare_bpf_summary(enum trace_summary_mode mode)
26 {
27 skel = syscall_summary_bpf__open();
28 if (skel == NULL) {
29 fprintf(stderr, "failed to open syscall summary bpf skeleton\n");
30 return -1;
31 }
32
33 if (mode == SUMMARY__BY_THREAD)
34 skel->rodata->aggr_mode = SYSCALL_AGGR_THREAD;
35 else if (mode == SUMMARY__BY_CGROUP)
36 skel->rodata->aggr_mode = SYSCALL_AGGR_CGROUP;
37 else
38 skel->rodata->aggr_mode = SYSCALL_AGGR_CPU;
39
40 if (cgroup_is_v2("perf_event") > 0)
41 skel->rodata->use_cgroup_v2 = 1;
42
43 if (syscall_summary_bpf__load(skel) < 0) {
44 fprintf(stderr, "failed to load syscall summary bpf skeleton\n");
45 return -1;
46 }
47
48 if (syscall_summary_bpf__attach(skel) < 0) {
49 fprintf(stderr, "failed to attach syscall summary bpf skeleton\n");
50 return -1;
51 }
52
53 if (mode == SUMMARY__BY_CGROUP)
54 read_all_cgroups(&cgroups);
55
56 return 0;
57 }
58
trace_start_bpf_summary(void)59 void trace_start_bpf_summary(void)
60 {
61 skel->bss->enabled = 1;
62 }
63
trace_end_bpf_summary(void)64 void trace_end_bpf_summary(void)
65 {
66 skel->bss->enabled = 0;
67 }
68
69 struct syscall_node {
70 int syscall_nr;
71 struct syscall_stats stats;
72 };
73
rel_stddev(struct syscall_stats * stat)74 static double rel_stddev(struct syscall_stats *stat)
75 {
76 double variance, average;
77
78 if (stat->count < 2)
79 return 0;
80
81 average = (double)stat->total_time / stat->count;
82
83 variance = stat->squared_sum;
84 variance -= (stat->total_time * stat->total_time) / stat->count;
85 variance /= stat->count - 1;
86
87 return 100 * sqrt(variance / stat->count) / average;
88 }
89
90 /*
91 * The syscall_data is to maintain syscall stats ordered by total time.
92 * It supports different summary modes like per-thread or global.
93 *
94 * For per-thread stats, it uses two-level data strurcture -
95 * syscall_data is keyed by TID and has an array of nodes which
96 * represents each syscall for the thread.
97 *
98 * For global stats, it's still two-level technically but we don't need
99 * per-cpu analysis so it's keyed by the syscall number to combine stats
100 * from different CPUs. And syscall_data always has a syscall_node so
101 * it can effectively work as flat hierarchy.
102 *
103 * For per-cgroup stats, it uses two-level data structure like thread
104 * syscall_data is keyed by CGROUP and has an array of node which
105 * represents each syscall for the cgroup.
106 */
107 struct syscall_data {
108 u64 key; /* tid if AGGR_THREAD, syscall-nr if AGGR_CPU, cgroup if AGGR_CGROUP */
109 int nr_events;
110 int nr_nodes;
111 u64 total_time;
112 struct syscall_node *nodes;
113 };
114
datacmp(const void * a,const void * b)115 static int datacmp(const void *a, const void *b)
116 {
117 const struct syscall_data * const *sa = a;
118 const struct syscall_data * const *sb = b;
119
120 return (*sa)->total_time > (*sb)->total_time ? -1 : 1;
121 }
122
nodecmp(const void * a,const void * b)123 static int nodecmp(const void *a, const void *b)
124 {
125 const struct syscall_node *na = a;
126 const struct syscall_node *nb = b;
127
128 return na->stats.total_time > nb->stats.total_time ? -1 : 1;
129 }
130
sc_node_hash(long key,void * ctx __maybe_unused)131 static size_t sc_node_hash(long key, void *ctx __maybe_unused)
132 {
133 return key;
134 }
135
sc_node_equal(long key1,long key2,void * ctx __maybe_unused)136 static bool sc_node_equal(long key1, long key2, void *ctx __maybe_unused)
137 {
138 return key1 == key2;
139 }
140
print_common_stats(struct syscall_data * data,FILE * fp)141 static int print_common_stats(struct syscall_data *data, FILE *fp)
142 {
143 int printed = 0;
144
145 for (int i = 0; i < data->nr_nodes; i++) {
146 struct syscall_node *node = &data->nodes[i];
147 struct syscall_stats *stat = &node->stats;
148 double total = (double)(stat->total_time) / NSEC_PER_MSEC;
149 double min = (double)(stat->min_time) / NSEC_PER_MSEC;
150 double max = (double)(stat->max_time) / NSEC_PER_MSEC;
151 double avg = total / stat->count;
152 const char *name;
153
154 /* TODO: support other ABIs */
155 name = syscalltbl__name(EM_HOST, node->syscall_nr);
156 if (name)
157 printed += fprintf(fp, " %-15s", name);
158 else
159 printed += fprintf(fp, " syscall:%-7d", node->syscall_nr);
160
161 printed += fprintf(fp, " %8u %6u %9.3f %9.3f %9.3f %9.3f %9.2f%%\n",
162 stat->count, stat->error, total, min, avg, max,
163 rel_stddev(stat));
164 }
165 return printed;
166 }
167
update_thread_stats(struct hashmap * hash,struct syscall_key * map_key,struct syscall_stats * map_data)168 static int update_thread_stats(struct hashmap *hash, struct syscall_key *map_key,
169 struct syscall_stats *map_data)
170 {
171 struct syscall_data *data;
172 struct syscall_node *nodes;
173
174 if (!hashmap__find(hash, map_key->cpu_or_tid, &data)) {
175 data = zalloc(sizeof(*data));
176 if (data == NULL)
177 return -ENOMEM;
178
179 data->key = map_key->cpu_or_tid;
180 if (hashmap__add(hash, data->key, data) < 0) {
181 free(data);
182 return -ENOMEM;
183 }
184 }
185
186 /* update thread total stats */
187 data->nr_events += map_data->count;
188 data->total_time += map_data->total_time;
189
190 nodes = reallocarray(data->nodes, data->nr_nodes + 1, sizeof(*nodes));
191 if (nodes == NULL)
192 return -ENOMEM;
193
194 data->nodes = nodes;
195 nodes = &data->nodes[data->nr_nodes++];
196 nodes->syscall_nr = map_key->nr;
197
198 /* each thread has an entry for each syscall, just use the stat */
199 memcpy(&nodes->stats, map_data, sizeof(*map_data));
200 return 0;
201 }
202
print_thread_stat(struct syscall_data * data,FILE * fp)203 static int print_thread_stat(struct syscall_data *data, FILE *fp)
204 {
205 int printed = 0;
206
207 qsort(data->nodes, data->nr_nodes, sizeof(*data->nodes), nodecmp);
208
209 printed += fprintf(fp, " thread (%d), ", (int)data->key);
210 printed += fprintf(fp, "%d events\n\n", data->nr_events);
211
212 printed += fprintf(fp, " syscall calls errors total min avg max stddev\n");
213 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
214 printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n");
215
216 printed += print_common_stats(data, fp);
217 printed += fprintf(fp, "\n\n");
218
219 return printed;
220 }
221
print_thread_stats(struct syscall_data ** data,int nr_data,FILE * fp)222 static int print_thread_stats(struct syscall_data **data, int nr_data, FILE *fp)
223 {
224 int printed = 0;
225
226 for (int i = 0; i < nr_data; i++)
227 printed += print_thread_stat(data[i], fp);
228
229 return printed;
230 }
231
update_total_stats(struct hashmap * hash,struct syscall_key * map_key,struct syscall_stats * map_data)232 static int update_total_stats(struct hashmap *hash, struct syscall_key *map_key,
233 struct syscall_stats *map_data)
234 {
235 struct syscall_data *data;
236 struct syscall_stats *stat;
237
238 if (!hashmap__find(hash, map_key->nr, &data)) {
239 data = zalloc(sizeof(*data));
240 if (data == NULL)
241 return -ENOMEM;
242
243 data->nodes = zalloc(sizeof(*data->nodes));
244 if (data->nodes == NULL) {
245 free(data);
246 return -ENOMEM;
247 }
248
249 data->nr_nodes = 1;
250 data->key = map_key->nr;
251 data->nodes->syscall_nr = data->key;
252
253 if (hashmap__add(hash, data->key, data) < 0) {
254 free(data->nodes);
255 free(data);
256 return -ENOMEM;
257 }
258 }
259
260 /* update total stats for this syscall */
261 data->nr_events += map_data->count;
262 data->total_time += map_data->total_time;
263
264 /* This is sum of the same syscall from different CPUs */
265 stat = &data->nodes->stats;
266
267 stat->total_time += map_data->total_time;
268 stat->squared_sum += map_data->squared_sum;
269 stat->count += map_data->count;
270 stat->error += map_data->error;
271
272 if (stat->max_time < map_data->max_time)
273 stat->max_time = map_data->max_time;
274 if (stat->min_time > map_data->min_time || stat->min_time == 0)
275 stat->min_time = map_data->min_time;
276
277 return 0;
278 }
279
print_total_stats(struct syscall_data ** data,int nr_data,FILE * fp)280 static int print_total_stats(struct syscall_data **data, int nr_data, FILE *fp)
281 {
282 int printed = 0;
283 int nr_events = 0;
284
285 for (int i = 0; i < nr_data; i++)
286 nr_events += data[i]->nr_events;
287
288 printed += fprintf(fp, " total, %d events\n\n", nr_events);
289
290 printed += fprintf(fp, " syscall calls errors total min avg max stddev\n");
291 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
292 printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n");
293
294 for (int i = 0; i < nr_data; i++)
295 printed += print_common_stats(data[i], fp);
296
297 printed += fprintf(fp, "\n\n");
298 return printed;
299 }
300
update_cgroup_stats(struct hashmap * hash,struct syscall_key * map_key,struct syscall_stats * map_data)301 static int update_cgroup_stats(struct hashmap *hash, struct syscall_key *map_key,
302 struct syscall_stats *map_data)
303 {
304 struct syscall_data *data;
305 struct syscall_node *nodes;
306
307 if (!hashmap__find(hash, map_key->cgroup, &data)) {
308 data = zalloc(sizeof(*data));
309 if (data == NULL)
310 return -ENOMEM;
311
312 data->key = map_key->cgroup;
313 if (hashmap__add(hash, data->key, data) < 0) {
314 free(data);
315 return -ENOMEM;
316 }
317 }
318
319 /* update thread total stats */
320 data->nr_events += map_data->count;
321 data->total_time += map_data->total_time;
322
323 nodes = reallocarray(data->nodes, data->nr_nodes + 1, sizeof(*nodes));
324 if (nodes == NULL)
325 return -ENOMEM;
326
327 data->nodes = nodes;
328 nodes = &data->nodes[data->nr_nodes++];
329 nodes->syscall_nr = map_key->nr;
330
331 /* each thread has an entry for each syscall, just use the stat */
332 memcpy(&nodes->stats, map_data, sizeof(*map_data));
333 return 0;
334 }
335
print_cgroup_stat(struct syscall_data * data,FILE * fp)336 static int print_cgroup_stat(struct syscall_data *data, FILE *fp)
337 {
338 int printed = 0;
339 struct cgroup *cgrp = __cgroup__find(&cgroups, data->key);
340
341 qsort(data->nodes, data->nr_nodes, sizeof(*data->nodes), nodecmp);
342
343 if (cgrp)
344 printed += fprintf(fp, " cgroup %s,", cgrp->name);
345 else
346 printed += fprintf(fp, " cgroup id:%lu,", (unsigned long)data->key);
347
348 printed += fprintf(fp, " %d events\n\n", data->nr_events);
349
350 printed += fprintf(fp, " syscall calls errors total min avg max stddev\n");
351 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
352 printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n");
353
354 printed += print_common_stats(data, fp);
355 printed += fprintf(fp, "\n\n");
356
357 return printed;
358 }
359
print_cgroup_stats(struct syscall_data ** data,int nr_data,FILE * fp)360 static int print_cgroup_stats(struct syscall_data **data, int nr_data, FILE *fp)
361 {
362 int printed = 0;
363
364 for (int i = 0; i < nr_data; i++)
365 printed += print_cgroup_stat(data[i], fp);
366
367 return printed;
368 }
369
trace_print_bpf_summary(FILE * fp)370 int trace_print_bpf_summary(FILE *fp)
371 {
372 struct bpf_map *map = skel->maps.syscall_stats_map;
373 struct syscall_key *prev_key, key;
374 struct syscall_data **data = NULL;
375 struct hashmap schash;
376 struct hashmap_entry *entry;
377 int nr_data = 0;
378 int printed = 0;
379 int i;
380 size_t bkt;
381
382 hashmap__init(&schash, sc_node_hash, sc_node_equal, /*ctx=*/NULL);
383
384 printed = fprintf(fp, "\n Summary of events:\n\n");
385
386 /* get stats from the bpf map */
387 prev_key = NULL;
388 while (!bpf_map__get_next_key(map, prev_key, &key, sizeof(key))) {
389 struct syscall_stats stat;
390
391 if (!bpf_map__lookup_elem(map, &key, sizeof(key), &stat, sizeof(stat), 0)) {
392 switch (skel->rodata->aggr_mode) {
393 case SYSCALL_AGGR_THREAD:
394 update_thread_stats(&schash, &key, &stat);
395 break;
396 case SYSCALL_AGGR_CPU:
397 update_total_stats(&schash, &key, &stat);
398 break;
399 case SYSCALL_AGGR_CGROUP:
400 update_cgroup_stats(&schash, &key, &stat);
401 break;
402 default:
403 break;
404 }
405 }
406
407 prev_key = &key;
408 }
409
410 nr_data = hashmap__size(&schash);
411 data = calloc(nr_data, sizeof(*data));
412 if (data == NULL)
413 goto out;
414
415 i = 0;
416 hashmap__for_each_entry(&schash, entry, bkt)
417 data[i++] = entry->pvalue;
418
419 qsort(data, nr_data, sizeof(*data), datacmp);
420
421 switch (skel->rodata->aggr_mode) {
422 case SYSCALL_AGGR_THREAD:
423 printed += print_thread_stats(data, nr_data, fp);
424 break;
425 case SYSCALL_AGGR_CPU:
426 printed += print_total_stats(data, nr_data, fp);
427 break;
428 case SYSCALL_AGGR_CGROUP:
429 printed += print_cgroup_stats(data, nr_data, fp);
430 break;
431 default:
432 break;
433 }
434
435 for (i = 0; i < nr_data && data; i++) {
436 free(data[i]->nodes);
437 free(data[i]);
438 }
439 free(data);
440
441 out:
442 hashmap__clear(&schash);
443 return printed;
444 }
445
trace_cleanup_bpf_summary(void)446 void trace_cleanup_bpf_summary(void)
447 {
448 if (!RB_EMPTY_ROOT(&cgroups)) {
449 struct cgroup *cgrp, *tmp;
450
451 rbtree_postorder_for_each_entry_safe(cgrp, tmp, &cgroups, node)
452 cgroup__put(cgrp);
453
454 cgroups = RB_ROOT;
455 }
456
457 syscall_summary_bpf__destroy(skel);
458 }
459