1 // SPDX-License-Identifier: GPL-2.0-only
2 #include "cgroup.h"
3 #include "counts.h"
4 #include "cputopo.h"
5 #include "evsel.h"
6 #include "pmu.h"
7 #include "print-events.h"
8 #include "smt.h"
9 #include "time-utils.h"
10 #include "tool_pmu.h"
11 #include "tsc.h"
12 #include <api/fs/fs.h>
13 #include <api/io.h>
14 #include <internal/threadmap.h>
15 #include <perf/threadmap.h>
16 #include <fcntl.h>
17 #include <strings.h>
18
19 static const char *const tool_pmu__event_names[TOOL_PMU__EVENT_MAX] = {
20 NULL,
21 "duration_time",
22 "user_time",
23 "system_time",
24 "has_pmem",
25 "num_cores",
26 "num_cpus",
27 "num_cpus_online",
28 "num_dies",
29 "num_packages",
30 "slots",
31 "smt_on",
32 "system_tsc_freq",
33 };
34
tool_pmu__skip_event(const char * name __maybe_unused)35 bool tool_pmu__skip_event(const char *name __maybe_unused)
36 {
37 #if !defined(__aarch64__)
38 /* The slots event should only appear on arm64. */
39 if (strcasecmp(name, "slots") == 0)
40 return true;
41 #endif
42 #if !defined(__i386__) && !defined(__x86_64__)
43 /* The system_tsc_freq event should only appear on x86. */
44 if (strcasecmp(name, "system_tsc_freq") == 0)
45 return true;
46 #endif
47 return false;
48 }
49
tool_pmu__num_skip_events(void)50 int tool_pmu__num_skip_events(void)
51 {
52 int num = 0;
53
54 #if !defined(__aarch64__)
55 num++;
56 #endif
57 #if !defined(__i386__) && !defined(__x86_64__)
58 num++;
59 #endif
60 return num;
61 }
62
tool_pmu__event_to_str(enum tool_pmu_event ev)63 const char *tool_pmu__event_to_str(enum tool_pmu_event ev)
64 {
65 if ((ev > TOOL_PMU__EVENT_NONE && ev < TOOL_PMU__EVENT_MAX) &&
66 !tool_pmu__skip_event(tool_pmu__event_names[ev]))
67 return tool_pmu__event_names[ev];
68
69 return NULL;
70 }
71
tool_pmu__str_to_event(const char * str)72 enum tool_pmu_event tool_pmu__str_to_event(const char *str)
73 {
74 int i;
75
76 if (tool_pmu__skip_event(str))
77 return TOOL_PMU__EVENT_NONE;
78
79 tool_pmu__for_each_event(i) {
80 if (!strcasecmp(str, tool_pmu__event_names[i]))
81 return i;
82 }
83 return TOOL_PMU__EVENT_NONE;
84 }
85
perf_pmu__is_tool(const struct perf_pmu * pmu)86 bool perf_pmu__is_tool(const struct perf_pmu *pmu)
87 {
88 return pmu && pmu->type == PERF_PMU_TYPE_TOOL;
89 }
90
evsel__is_tool(const struct evsel * evsel)91 bool evsel__is_tool(const struct evsel *evsel)
92 {
93 return perf_pmu__is_tool(evsel->pmu);
94 }
95
evsel__tool_event(const struct evsel * evsel)96 enum tool_pmu_event evsel__tool_event(const struct evsel *evsel)
97 {
98 if (!evsel__is_tool(evsel))
99 return TOOL_PMU__EVENT_NONE;
100
101 return (enum tool_pmu_event)evsel->core.attr.config;
102 }
103
evsel__tool_pmu_event_name(const struct evsel * evsel)104 const char *evsel__tool_pmu_event_name(const struct evsel *evsel)
105 {
106 return tool_pmu__event_to_str(evsel->core.attr.config);
107 }
108
read_until_char(struct io * io,char e)109 static bool read_until_char(struct io *io, char e)
110 {
111 int c;
112
113 do {
114 c = io__get_char(io);
115 if (c == -1)
116 return false;
117 } while (c != e);
118 return true;
119 }
120
read_stat_field(int fd,struct perf_cpu cpu,int field,__u64 * val)121 static int read_stat_field(int fd, struct perf_cpu cpu, int field, __u64 *val)
122 {
123 char buf[256];
124 struct io io;
125 int i;
126
127 io__init(&io, fd, buf, sizeof(buf));
128
129 /* Skip lines to relevant CPU. */
130 for (i = -1; i < cpu.cpu; i++) {
131 if (!read_until_char(&io, '\n'))
132 return -EINVAL;
133 }
134 /* Skip to "cpu". */
135 if (io__get_char(&io) != 'c') return -EINVAL;
136 if (io__get_char(&io) != 'p') return -EINVAL;
137 if (io__get_char(&io) != 'u') return -EINVAL;
138
139 /* Skip N of cpuN. */
140 if (!read_until_char(&io, ' '))
141 return -EINVAL;
142
143 i = 1;
144 while (true) {
145 if (io__get_dec(&io, val) != ' ')
146 break;
147 if (field == i)
148 return 0;
149 i++;
150 }
151 return -EINVAL;
152 }
153
read_pid_stat_field(int fd,int field,__u64 * val)154 static int read_pid_stat_field(int fd, int field, __u64 *val)
155 {
156 char buf[256];
157 struct io io;
158 int c, i;
159
160 io__init(&io, fd, buf, sizeof(buf));
161 if (io__get_dec(&io, val) != ' ')
162 return -EINVAL;
163 if (field == 1)
164 return 0;
165
166 /* Skip comm. */
167 if (io__get_char(&io) != '(' || !read_until_char(&io, ')'))
168 return -EINVAL;
169 if (field == 2)
170 return -EINVAL; /* String can't be returned. */
171
172 /* Skip state */
173 if (io__get_char(&io) != ' ' || io__get_char(&io) == -1)
174 return -EINVAL;
175 if (field == 3)
176 return -EINVAL; /* String can't be returned. */
177
178 /* Loop over numeric fields*/
179 if (io__get_char(&io) != ' ')
180 return -EINVAL;
181
182 i = 4;
183 while (true) {
184 c = io__get_dec(&io, val);
185 if (c == -1)
186 return -EINVAL;
187 if (c == -2) {
188 /* Assume a -ve was read */
189 c = io__get_dec(&io, val);
190 *val *= -1;
191 }
192 if (c != ' ')
193 return -EINVAL;
194 if (field == i)
195 return 0;
196 i++;
197 }
198 return -EINVAL;
199 }
200
evsel__tool_pmu_prepare_open(struct evsel * evsel,struct perf_cpu_map * cpus,int nthreads)201 int evsel__tool_pmu_prepare_open(struct evsel *evsel,
202 struct perf_cpu_map *cpus,
203 int nthreads)
204 {
205 if ((evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME ||
206 evsel__tool_event(evsel) == TOOL_PMU__EVENT_USER_TIME) &&
207 !evsel->start_times) {
208 evsel->start_times = xyarray__new(perf_cpu_map__nr(cpus),
209 nthreads,
210 sizeof(__u64));
211 if (!evsel->start_times)
212 return -ENOMEM;
213 }
214 return 0;
215 }
216
217 #define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y))
218
evsel__tool_pmu_open(struct evsel * evsel,struct perf_thread_map * threads,int start_cpu_map_idx,int end_cpu_map_idx)219 int evsel__tool_pmu_open(struct evsel *evsel,
220 struct perf_thread_map *threads,
221 int start_cpu_map_idx, int end_cpu_map_idx)
222 {
223 enum tool_pmu_event ev = evsel__tool_event(evsel);
224 int pid = -1, idx = 0, thread = 0, nthreads, err = 0, old_errno;
225
226 if (ev == TOOL_PMU__EVENT_NUM_CPUS)
227 return 0;
228
229 if (ev == TOOL_PMU__EVENT_DURATION_TIME) {
230 if (evsel->core.attr.sample_period) /* no sampling */
231 return -EINVAL;
232 evsel->start_time = rdclock();
233 return 0;
234 }
235
236 if (evsel->cgrp)
237 pid = evsel->cgrp->fd;
238
239 nthreads = perf_thread_map__nr(threads);
240 for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) {
241 for (thread = 0; thread < nthreads; thread++) {
242 if (thread >= nthreads)
243 break;
244
245 if (!evsel->cgrp && !evsel->core.system_wide)
246 pid = perf_thread_map__pid(threads, thread);
247
248 if (ev == TOOL_PMU__EVENT_USER_TIME || ev == TOOL_PMU__EVENT_SYSTEM_TIME) {
249 bool system = ev == TOOL_PMU__EVENT_SYSTEM_TIME;
250 __u64 *start_time = NULL;
251 int fd;
252
253 if (evsel->core.attr.sample_period) {
254 /* no sampling */
255 err = -EINVAL;
256 goto out_close;
257 }
258 if (pid > -1) {
259 char buf[64];
260
261 snprintf(buf, sizeof(buf), "/proc/%d/stat", pid);
262 fd = open(buf, O_RDONLY);
263 evsel->pid_stat = true;
264 } else {
265 fd = open("/proc/stat", O_RDONLY);
266 }
267 FD(evsel, idx, thread) = fd;
268 if (fd < 0) {
269 err = -errno;
270 goto out_close;
271 }
272 start_time = xyarray__entry(evsel->start_times, idx, thread);
273 if (pid > -1) {
274 err = read_pid_stat_field(fd, system ? 15 : 14,
275 start_time);
276 } else {
277 struct perf_cpu cpu;
278
279 cpu = perf_cpu_map__cpu(evsel->core.cpus, idx);
280 err = read_stat_field(fd, cpu, system ? 3 : 1,
281 start_time);
282 }
283 if (err)
284 goto out_close;
285 }
286
287 }
288 }
289 return 0;
290 out_close:
291 if (err)
292 threads->err_thread = thread;
293
294 old_errno = errno;
295 do {
296 while (--thread >= 0) {
297 if (FD(evsel, idx, thread) >= 0)
298 close(FD(evsel, idx, thread));
299 FD(evsel, idx, thread) = -1;
300 }
301 thread = nthreads;
302 } while (--idx >= 0);
303 errno = old_errno;
304 return err;
305 }
306
307 #if !defined(__i386__) && !defined(__x86_64__)
arch_get_tsc_freq(void)308 u64 arch_get_tsc_freq(void)
309 {
310 return 0;
311 }
312 #endif
313
314 #if !defined(__aarch64__)
tool_pmu__cpu_slots_per_cycle(void)315 u64 tool_pmu__cpu_slots_per_cycle(void)
316 {
317 return 0;
318 }
319 #endif
320
has_pmem(void)321 static bool has_pmem(void)
322 {
323 static bool has_pmem, cached;
324 const char *sysfs = sysfs__mountpoint();
325 char path[PATH_MAX];
326
327 if (!cached) {
328 snprintf(path, sizeof(path), "%s/firmware/acpi/tables/NFIT", sysfs);
329 has_pmem = access(path, F_OK) == 0;
330 cached = true;
331 }
332 return has_pmem;
333 }
334
tool_pmu__read_event(enum tool_pmu_event ev,u64 * result)335 bool tool_pmu__read_event(enum tool_pmu_event ev, u64 *result)
336 {
337 const struct cpu_topology *topology;
338
339 switch (ev) {
340 case TOOL_PMU__EVENT_HAS_PMEM:
341 *result = has_pmem() ? 1 : 0;
342 return true;
343
344 case TOOL_PMU__EVENT_NUM_CORES:
345 topology = online_topology();
346 *result = topology->core_cpus_lists;
347 return true;
348
349 case TOOL_PMU__EVENT_NUM_CPUS:
350 *result = cpu__max_present_cpu().cpu;
351 return true;
352
353 case TOOL_PMU__EVENT_NUM_CPUS_ONLINE: {
354 struct perf_cpu_map *online = cpu_map__online();
355
356 if (online) {
357 *result = perf_cpu_map__nr(online);
358 perf_cpu_map__put(online);
359 return true;
360 }
361 return false;
362 }
363 case TOOL_PMU__EVENT_NUM_DIES:
364 topology = online_topology();
365 *result = topology->die_cpus_lists;
366 return true;
367
368 case TOOL_PMU__EVENT_NUM_PACKAGES:
369 topology = online_topology();
370 *result = topology->package_cpus_lists;
371 return true;
372
373 case TOOL_PMU__EVENT_SLOTS:
374 *result = tool_pmu__cpu_slots_per_cycle();
375 return *result ? true : false;
376
377 case TOOL_PMU__EVENT_SMT_ON:
378 *result = smt_on() ? 1 : 0;
379 return true;
380
381 case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ:
382 *result = arch_get_tsc_freq();
383 return true;
384
385 case TOOL_PMU__EVENT_NONE:
386 case TOOL_PMU__EVENT_DURATION_TIME:
387 case TOOL_PMU__EVENT_USER_TIME:
388 case TOOL_PMU__EVENT_SYSTEM_TIME:
389 case TOOL_PMU__EVENT_MAX:
390 default:
391 return false;
392 }
393 }
394
evsel__tool_pmu_read(struct evsel * evsel,int cpu_map_idx,int thread)395 int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread)
396 {
397 __u64 *start_time, cur_time, delta_start;
398 u64 val;
399 int fd, err = 0;
400 struct perf_counts_values *count, *old_count = NULL;
401 bool adjust = false;
402 enum tool_pmu_event ev = evsel__tool_event(evsel);
403
404 count = perf_counts(evsel->counts, cpu_map_idx, thread);
405
406 switch (ev) {
407 case TOOL_PMU__EVENT_HAS_PMEM:
408 case TOOL_PMU__EVENT_NUM_CORES:
409 case TOOL_PMU__EVENT_NUM_CPUS:
410 case TOOL_PMU__EVENT_NUM_CPUS_ONLINE:
411 case TOOL_PMU__EVENT_NUM_DIES:
412 case TOOL_PMU__EVENT_NUM_PACKAGES:
413 case TOOL_PMU__EVENT_SLOTS:
414 case TOOL_PMU__EVENT_SMT_ON:
415 case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ:
416 if (evsel->prev_raw_counts)
417 old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread);
418 val = 0;
419 if (cpu_map_idx == 0 && thread == 0) {
420 if (!tool_pmu__read_event(ev, &val)) {
421 count->lost++;
422 val = 0;
423 }
424 }
425 if (old_count) {
426 count->val = old_count->val + val;
427 count->run = old_count->run + 1;
428 count->ena = old_count->ena + 1;
429 } else {
430 count->val = val;
431 count->run++;
432 count->ena++;
433 }
434 return 0;
435 case TOOL_PMU__EVENT_DURATION_TIME:
436 /*
437 * Pretend duration_time is only on the first CPU and thread, or
438 * else aggregation will scale duration_time by the number of
439 * CPUs/threads.
440 */
441 start_time = &evsel->start_time;
442 if (cpu_map_idx == 0 && thread == 0)
443 cur_time = rdclock();
444 else
445 cur_time = *start_time;
446 break;
447 case TOOL_PMU__EVENT_USER_TIME:
448 case TOOL_PMU__EVENT_SYSTEM_TIME: {
449 bool system = evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME;
450
451 start_time = xyarray__entry(evsel->start_times, cpu_map_idx, thread);
452 fd = FD(evsel, cpu_map_idx, thread);
453 lseek(fd, SEEK_SET, 0);
454 if (evsel->pid_stat) {
455 /* The event exists solely on 1 CPU. */
456 if (cpu_map_idx == 0)
457 err = read_pid_stat_field(fd, system ? 15 : 14, &cur_time);
458 else
459 cur_time = 0;
460 } else {
461 /* The event is for all threads. */
462 if (thread == 0) {
463 struct perf_cpu cpu = perf_cpu_map__cpu(evsel->core.cpus,
464 cpu_map_idx);
465
466 err = read_stat_field(fd, cpu, system ? 3 : 1, &cur_time);
467 } else {
468 cur_time = 0;
469 }
470 }
471 adjust = true;
472 break;
473 }
474 case TOOL_PMU__EVENT_NONE:
475 case TOOL_PMU__EVENT_MAX:
476 default:
477 err = -EINVAL;
478 }
479 if (err)
480 return err;
481
482 delta_start = cur_time - *start_time;
483 if (adjust) {
484 __u64 ticks_per_sec = sysconf(_SC_CLK_TCK);
485
486 delta_start *= 1000000000 / ticks_per_sec;
487 }
488 count->val = delta_start;
489 count->ena = count->run = delta_start;
490 count->lost = 0;
491 return 0;
492 }
493
tool_pmu__new(void)494 struct perf_pmu *tool_pmu__new(void)
495 {
496 struct perf_pmu *tool = zalloc(sizeof(struct perf_pmu));
497
498 if (!tool)
499 goto out;
500 tool->name = strdup("tool");
501 if (!tool->name) {
502 zfree(&tool);
503 goto out;
504 }
505
506 tool->type = PERF_PMU_TYPE_TOOL;
507 INIT_LIST_HEAD(&tool->aliases);
508 INIT_LIST_HEAD(&tool->caps);
509 INIT_LIST_HEAD(&tool->format);
510 tool->events_table = find_core_events_table("common", "common");
511
512 out:
513 return tool;
514 }
515