1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * builtin-record.c
4 *
5 * Builtin record command: Record the profile of a workload
6 * (or a CPU, or a PID) into the perf.data output file - for
7 * later analysis via perf report.
8 */
9 #include "builtin.h"
10
11 #include "util/build-id.h"
12 #include <subcmd/parse-options.h>
13 #include <internal/xyarray.h>
14 #include "util/parse-events.h"
15 #include "util/config.h"
16
17 #include "util/callchain.h"
18 #include "util/cgroup.h"
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/mmap.h"
25 #include "util/mutex.h"
26 #include "util/target.h"
27 #include "util/session.h"
28 #include "util/tool.h"
29 #include "util/symbol.h"
30 #include "util/record.h"
31 #include "util/cpumap.h"
32 #include "util/thread_map.h"
33 #include "util/data.h"
34 #include "util/perf_regs.h"
35 #include "util/auxtrace.h"
36 #include "util/tsc.h"
37 #include "util/parse-branch-options.h"
38 #include "util/parse-regs-options.h"
39 #include "util/perf_api_probe.h"
40 #include "util/trigger.h"
41 #include "util/perf-hooks.h"
42 #include "util/cpu-set-sched.h"
43 #include "util/synthetic-events.h"
44 #include "util/time-utils.h"
45 #include "util/units.h"
46 #include "util/bpf-event.h"
47 #include "util/util.h"
48 #include "util/pfm.h"
49 #include "util/pmu.h"
50 #include "util/pmus.h"
51 #include "util/clockid.h"
52 #include "util/off_cpu.h"
53 #include "util/bpf-filter.h"
54 #include "asm/bug.h"
55 #include "perf.h"
56 #include "cputopo.h"
57
58 #include <errno.h>
59 #include <inttypes.h>
60 #include <locale.h>
61 #include <poll.h>
62 #include <pthread.h>
63 #include <unistd.h>
64 #ifndef HAVE_GETTID
65 #include <syscall.h>
66 #endif
67 #include <sched.h>
68 #include <signal.h>
69 #ifdef HAVE_EVENTFD_SUPPORT
70 #include <sys/eventfd.h>
71 #endif
72 #include <sys/mman.h>
73 #include <sys/wait.h>
74 #include <sys/types.h>
75 #include <sys/stat.h>
76 #include <fcntl.h>
77 #include <linux/err.h>
78 #include <linux/string.h>
79 #include <linux/time64.h>
80 #include <linux/zalloc.h>
81 #include <linux/bitmap.h>
82 #include <sys/time.h>
83
84 struct switch_output {
85 bool enabled;
86 bool signal;
87 unsigned long size;
88 unsigned long time;
89 const char *str;
90 bool set;
91 char **filenames;
92 int num_files;
93 int cur_file;
94 };
95
96 struct thread_mask {
97 struct mmap_cpu_mask maps;
98 struct mmap_cpu_mask affinity;
99 };
100
101 struct record_thread {
102 pid_t tid;
103 struct thread_mask *mask;
104 struct {
105 int msg[2];
106 int ack[2];
107 } pipes;
108 struct fdarray pollfd;
109 int ctlfd_pos;
110 int nr_mmaps;
111 struct mmap **maps;
112 struct mmap **overwrite_maps;
113 struct record *rec;
114 unsigned long long samples;
115 unsigned long waking;
116 u64 bytes_written;
117 u64 bytes_transferred;
118 u64 bytes_compressed;
119 };
120
121 static __thread struct record_thread *thread;
122
123 enum thread_msg {
124 THREAD_MSG__UNDEFINED = 0,
125 THREAD_MSG__READY,
126 THREAD_MSG__MAX,
127 };
128
129 static const char *thread_msg_tags[THREAD_MSG__MAX] = {
130 "UNDEFINED", "READY"
131 };
132
133 enum thread_spec {
134 THREAD_SPEC__UNDEFINED = 0,
135 THREAD_SPEC__CPU,
136 THREAD_SPEC__CORE,
137 THREAD_SPEC__PACKAGE,
138 THREAD_SPEC__NUMA,
139 THREAD_SPEC__USER,
140 THREAD_SPEC__MAX,
141 };
142
143 static const char *thread_spec_tags[THREAD_SPEC__MAX] = {
144 "undefined", "cpu", "core", "package", "numa", "user"
145 };
146
147 struct pollfd_index_map {
148 int evlist_pollfd_index;
149 int thread_pollfd_index;
150 };
151
152 struct record {
153 struct perf_tool tool;
154 struct record_opts opts;
155 u64 bytes_written;
156 u64 thread_bytes_written;
157 struct perf_data data;
158 struct auxtrace_record *itr;
159 struct evlist *evlist;
160 struct perf_session *session;
161 struct evlist *sb_evlist;
162 pthread_t thread_id;
163 int realtime_prio;
164 bool latency;
165 bool switch_output_event_set;
166 bool no_buildid;
167 bool no_buildid_set;
168 bool no_buildid_cache;
169 bool no_buildid_cache_set;
170 bool buildid_all;
171 bool buildid_mmap;
172 bool timestamp_filename;
173 bool timestamp_boundary;
174 bool off_cpu;
175 const char *filter_action;
176 struct switch_output switch_output;
177 unsigned long long samples;
178 unsigned long output_max_size; /* = 0: unlimited */
179 struct perf_debuginfod debuginfod;
180 int nr_threads;
181 struct thread_mask *thread_masks;
182 struct record_thread *thread_data;
183 struct pollfd_index_map *index_map;
184 size_t index_map_sz;
185 size_t index_map_cnt;
186 };
187
188 static volatile int done;
189
190 static volatile int auxtrace_record__snapshot_started;
191 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
192 static DEFINE_TRIGGER(switch_output_trigger);
193
194 static const char *affinity_tags[PERF_AFFINITY_MAX] = {
195 "SYS", "NODE", "CPU"
196 };
197
198 static int build_id__process_mmap(const struct perf_tool *tool, union perf_event *event,
199 struct perf_sample *sample, struct machine *machine);
200 static int build_id__process_mmap2(const struct perf_tool *tool, union perf_event *event,
201 struct perf_sample *sample, struct machine *machine);
202 static int process_timestamp_boundary(const struct perf_tool *tool,
203 union perf_event *event,
204 struct perf_sample *sample,
205 struct machine *machine);
206
207 #ifndef HAVE_GETTID
gettid(void)208 static inline pid_t gettid(void)
209 {
210 return (pid_t)syscall(__NR_gettid);
211 }
212 #endif
213
record__threads_enabled(struct record * rec)214 static int record__threads_enabled(struct record *rec)
215 {
216 return rec->opts.threads_spec;
217 }
218
switch_output_signal(struct record * rec)219 static bool switch_output_signal(struct record *rec)
220 {
221 return rec->switch_output.signal &&
222 trigger_is_ready(&switch_output_trigger);
223 }
224
switch_output_size(struct record * rec)225 static bool switch_output_size(struct record *rec)
226 {
227 return rec->switch_output.size &&
228 trigger_is_ready(&switch_output_trigger) &&
229 (rec->bytes_written >= rec->switch_output.size);
230 }
231
switch_output_time(struct record * rec)232 static bool switch_output_time(struct record *rec)
233 {
234 return rec->switch_output.time &&
235 trigger_is_ready(&switch_output_trigger);
236 }
237
record__bytes_written(struct record * rec)238 static u64 record__bytes_written(struct record *rec)
239 {
240 return rec->bytes_written + rec->thread_bytes_written;
241 }
242
record__output_max_size_exceeded(struct record * rec)243 static bool record__output_max_size_exceeded(struct record *rec)
244 {
245 return rec->output_max_size &&
246 (record__bytes_written(rec) >= rec->output_max_size);
247 }
248
record__write(struct record * rec,struct mmap * map __maybe_unused,void * bf,size_t size)249 static int record__write(struct record *rec, struct mmap *map __maybe_unused,
250 void *bf, size_t size)
251 {
252 struct perf_data_file *file = &rec->session->data->file;
253
254 if (map && map->file)
255 file = map->file;
256
257 if (perf_data_file__write(file, bf, size) < 0) {
258 pr_err("failed to write perf data, error: %m\n");
259 return -1;
260 }
261
262 if (map && map->file) {
263 thread->bytes_written += size;
264 rec->thread_bytes_written += size;
265 } else {
266 rec->bytes_written += size;
267 }
268
269 if (record__output_max_size_exceeded(rec) && !done) {
270 fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB),"
271 " stopping session ]\n",
272 record__bytes_written(rec) >> 10);
273 done = 1;
274 }
275
276 if (switch_output_size(rec))
277 trigger_hit(&switch_output_trigger);
278
279 return 0;
280 }
281
282 static int record__aio_enabled(struct record *rec);
283 static int record__comp_enabled(struct record *rec);
284 static ssize_t zstd_compress(struct perf_session *session, struct mmap *map,
285 void *dst, size_t dst_size, void *src, size_t src_size);
286
287 #ifdef HAVE_AIO_SUPPORT
record__aio_write(struct aiocb * cblock,int trace_fd,void * buf,size_t size,off_t off)288 static int record__aio_write(struct aiocb *cblock, int trace_fd,
289 void *buf, size_t size, off_t off)
290 {
291 int rc;
292
293 cblock->aio_fildes = trace_fd;
294 cblock->aio_buf = buf;
295 cblock->aio_nbytes = size;
296 cblock->aio_offset = off;
297 cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
298
299 do {
300 rc = aio_write(cblock);
301 if (rc == 0) {
302 break;
303 } else if (errno != EAGAIN) {
304 cblock->aio_fildes = -1;
305 pr_err("failed to queue perf data, error: %m\n");
306 break;
307 }
308 } while (1);
309
310 return rc;
311 }
312
record__aio_complete(struct mmap * md,struct aiocb * cblock)313 static int record__aio_complete(struct mmap *md, struct aiocb *cblock)
314 {
315 void *rem_buf;
316 off_t rem_off;
317 size_t rem_size;
318 int rc, aio_errno;
319 ssize_t aio_ret, written;
320
321 aio_errno = aio_error(cblock);
322 if (aio_errno == EINPROGRESS)
323 return 0;
324
325 written = aio_ret = aio_return(cblock);
326 if (aio_ret < 0) {
327 if (aio_errno != EINTR)
328 pr_err("failed to write perf data, error: %m\n");
329 written = 0;
330 }
331
332 rem_size = cblock->aio_nbytes - written;
333
334 if (rem_size == 0) {
335 cblock->aio_fildes = -1;
336 /*
337 * md->refcount is incremented in record__aio_pushfn() for
338 * every aio write request started in record__aio_push() so
339 * decrement it because the request is now complete.
340 */
341 perf_mmap__put(&md->core);
342 rc = 1;
343 } else {
344 /*
345 * aio write request may require restart with the
346 * remainder if the kernel didn't write whole
347 * chunk at once.
348 */
349 rem_off = cblock->aio_offset + written;
350 rem_buf = (void *)(cblock->aio_buf + written);
351 record__aio_write(cblock, cblock->aio_fildes,
352 rem_buf, rem_size, rem_off);
353 rc = 0;
354 }
355
356 return rc;
357 }
358
record__aio_sync(struct mmap * md,bool sync_all)359 static int record__aio_sync(struct mmap *md, bool sync_all)
360 {
361 struct aiocb **aiocb = md->aio.aiocb;
362 struct aiocb *cblocks = md->aio.cblocks;
363 struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */
364 int i, do_suspend;
365
366 do {
367 do_suspend = 0;
368 for (i = 0; i < md->aio.nr_cblocks; ++i) {
369 if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
370 if (sync_all)
371 aiocb[i] = NULL;
372 else
373 return i;
374 } else {
375 /*
376 * Started aio write is not complete yet
377 * so it has to be waited before the
378 * next allocation.
379 */
380 aiocb[i] = &cblocks[i];
381 do_suspend = 1;
382 }
383 }
384 if (!do_suspend)
385 return -1;
386
387 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
388 if (!(errno == EAGAIN || errno == EINTR))
389 pr_err("failed to sync perf data, error: %m\n");
390 }
391 } while (1);
392 }
393
394 struct record_aio {
395 struct record *rec;
396 void *data;
397 size_t size;
398 };
399
record__aio_pushfn(struct mmap * map,void * to,void * buf,size_t size)400 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size)
401 {
402 struct record_aio *aio = to;
403
404 /*
405 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer
406 * to release space in the kernel buffer as fast as possible, calling
407 * perf_mmap__consume() from perf_mmap__push() function.
408 *
409 * That lets the kernel to proceed with storing more profiling data into
410 * the kernel buffer earlier than other per-cpu kernel buffers are handled.
411 *
412 * Coping can be done in two steps in case the chunk of profiling data
413 * crosses the upper bound of the kernel buffer. In this case we first move
414 * part of data from map->start till the upper bound and then the remainder
415 * from the beginning of the kernel buffer till the end of the data chunk.
416 */
417
418 if (record__comp_enabled(aio->rec)) {
419 ssize_t compressed = zstd_compress(aio->rec->session, NULL, aio->data + aio->size,
420 mmap__mmap_len(map) - aio->size,
421 buf, size);
422 if (compressed < 0)
423 return (int)compressed;
424
425 size = compressed;
426 } else {
427 memcpy(aio->data + aio->size, buf, size);
428 }
429
430 if (!aio->size) {
431 /*
432 * Increment map->refcount to guard map->aio.data[] buffer
433 * from premature deallocation because map object can be
434 * released earlier than aio write request started on
435 * map->aio.data[] buffer is complete.
436 *
437 * perf_mmap__put() is done at record__aio_complete()
438 * after started aio request completion or at record__aio_push()
439 * if the request failed to start.
440 */
441 perf_mmap__get(&map->core);
442 }
443
444 aio->size += size;
445
446 return size;
447 }
448
record__aio_push(struct record * rec,struct mmap * map,off_t * off)449 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off)
450 {
451 int ret, idx;
452 int trace_fd = rec->session->data->file.fd;
453 struct record_aio aio = { .rec = rec, .size = 0 };
454
455 /*
456 * Call record__aio_sync() to wait till map->aio.data[] buffer
457 * becomes available after previous aio write operation.
458 */
459
460 idx = record__aio_sync(map, false);
461 aio.data = map->aio.data[idx];
462 ret = perf_mmap__push(map, &aio, record__aio_pushfn);
463 if (ret != 0) /* ret > 0 - no data, ret < 0 - error */
464 return ret;
465
466 rec->samples++;
467 ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off);
468 if (!ret) {
469 *off += aio.size;
470 rec->bytes_written += aio.size;
471 if (switch_output_size(rec))
472 trigger_hit(&switch_output_trigger);
473 } else {
474 /*
475 * Decrement map->refcount incremented in record__aio_pushfn()
476 * back if record__aio_write() operation failed to start, otherwise
477 * map->refcount is decremented in record__aio_complete() after
478 * aio write operation finishes successfully.
479 */
480 perf_mmap__put(&map->core);
481 }
482
483 return ret;
484 }
485
record__aio_get_pos(int trace_fd)486 static off_t record__aio_get_pos(int trace_fd)
487 {
488 return lseek(trace_fd, 0, SEEK_CUR);
489 }
490
record__aio_set_pos(int trace_fd,off_t pos)491 static void record__aio_set_pos(int trace_fd, off_t pos)
492 {
493 lseek(trace_fd, pos, SEEK_SET);
494 }
495
record__aio_mmap_read_sync(struct record * rec)496 static void record__aio_mmap_read_sync(struct record *rec)
497 {
498 int i;
499 struct evlist *evlist = rec->evlist;
500 struct mmap *maps = evlist->mmap;
501
502 if (!record__aio_enabled(rec))
503 return;
504
505 for (i = 0; i < evlist->core.nr_mmaps; i++) {
506 struct mmap *map = &maps[i];
507
508 if (map->core.base)
509 record__aio_sync(map, true);
510 }
511 }
512
513 static int nr_cblocks_default = 1;
514 static int nr_cblocks_max = 4;
515
record__aio_parse(const struct option * opt,const char * str,int unset)516 static int record__aio_parse(const struct option *opt,
517 const char *str,
518 int unset)
519 {
520 struct record_opts *opts = (struct record_opts *)opt->value;
521
522 if (unset) {
523 opts->nr_cblocks = 0;
524 } else {
525 if (str)
526 opts->nr_cblocks = strtol(str, NULL, 0);
527 if (!opts->nr_cblocks)
528 opts->nr_cblocks = nr_cblocks_default;
529 }
530
531 return 0;
532 }
533 #else /* HAVE_AIO_SUPPORT */
534 static int nr_cblocks_max = 0;
535
record__aio_push(struct record * rec __maybe_unused,struct mmap * map __maybe_unused,off_t * off __maybe_unused)536 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused,
537 off_t *off __maybe_unused)
538 {
539 return -1;
540 }
541
record__aio_get_pos(int trace_fd __maybe_unused)542 static off_t record__aio_get_pos(int trace_fd __maybe_unused)
543 {
544 return -1;
545 }
546
record__aio_set_pos(int trace_fd __maybe_unused,off_t pos __maybe_unused)547 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
548 {
549 }
550
record__aio_mmap_read_sync(struct record * rec __maybe_unused)551 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
552 {
553 }
554 #endif
555
record__aio_enabled(struct record * rec)556 static int record__aio_enabled(struct record *rec)
557 {
558 return rec->opts.nr_cblocks > 0;
559 }
560
561 #define MMAP_FLUSH_DEFAULT 1
record__mmap_flush_parse(const struct option * opt,const char * str,int unset)562 static int record__mmap_flush_parse(const struct option *opt,
563 const char *str,
564 int unset)
565 {
566 int flush_max;
567 struct record_opts *opts = (struct record_opts *)opt->value;
568 static struct parse_tag tags[] = {
569 { .tag = 'B', .mult = 1 },
570 { .tag = 'K', .mult = 1 << 10 },
571 { .tag = 'M', .mult = 1 << 20 },
572 { .tag = 'G', .mult = 1 << 30 },
573 { .tag = 0 },
574 };
575
576 if (unset)
577 return 0;
578
579 if (str) {
580 opts->mmap_flush = parse_tag_value(str, tags);
581 if (opts->mmap_flush == (int)-1)
582 opts->mmap_flush = strtol(str, NULL, 0);
583 }
584
585 if (!opts->mmap_flush)
586 opts->mmap_flush = MMAP_FLUSH_DEFAULT;
587
588 flush_max = evlist__mmap_size(opts->mmap_pages);
589 flush_max /= 4;
590 if (opts->mmap_flush > flush_max)
591 opts->mmap_flush = flush_max;
592
593 return 0;
594 }
595
596 #ifdef HAVE_ZSTD_SUPPORT
597 static unsigned int comp_level_default = 1;
598
record__parse_comp_level(const struct option * opt,const char * str,int unset)599 static int record__parse_comp_level(const struct option *opt, const char *str, int unset)
600 {
601 struct record_opts *opts = opt->value;
602
603 if (unset) {
604 opts->comp_level = 0;
605 } else {
606 if (str)
607 opts->comp_level = strtol(str, NULL, 0);
608 if (!opts->comp_level)
609 opts->comp_level = comp_level_default;
610 }
611
612 return 0;
613 }
614 #endif
615 static unsigned int comp_level_max = 22;
616
record__comp_enabled(struct record * rec)617 static int record__comp_enabled(struct record *rec)
618 {
619 return rec->opts.comp_level > 0;
620 }
621
process_synthesized_event(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample __maybe_unused,struct machine * machine __maybe_unused)622 static int process_synthesized_event(const struct perf_tool *tool,
623 union perf_event *event,
624 struct perf_sample *sample __maybe_unused,
625 struct machine *machine __maybe_unused)
626 {
627 struct record *rec = container_of(tool, struct record, tool);
628 return record__write(rec, NULL, event, event->header.size);
629 }
630
631 static struct mutex synth_lock;
632
process_locked_synthesized_event(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample __maybe_unused,struct machine * machine __maybe_unused)633 static int process_locked_synthesized_event(const struct perf_tool *tool,
634 union perf_event *event,
635 struct perf_sample *sample __maybe_unused,
636 struct machine *machine __maybe_unused)
637 {
638 int ret;
639
640 mutex_lock(&synth_lock);
641 ret = process_synthesized_event(tool, event, sample, machine);
642 mutex_unlock(&synth_lock);
643 return ret;
644 }
645
record__pushfn(struct mmap * map,void * to,void * bf,size_t size)646 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
647 {
648 struct record *rec = to;
649
650 if (record__comp_enabled(rec)) {
651 ssize_t compressed = zstd_compress(rec->session, map, map->data,
652 mmap__mmap_len(map), bf, size);
653
654 if (compressed < 0)
655 return (int)compressed;
656
657 size = compressed;
658 bf = map->data;
659 }
660
661 thread->samples++;
662 return record__write(rec, map, bf, size);
663 }
664
665 static volatile sig_atomic_t signr = -1;
666 static volatile sig_atomic_t child_finished;
667 #ifdef HAVE_EVENTFD_SUPPORT
668 static volatile sig_atomic_t done_fd = -1;
669 #endif
670
sig_handler(int sig)671 static void sig_handler(int sig)
672 {
673 if (sig == SIGCHLD)
674 child_finished = 1;
675 else
676 signr = sig;
677
678 done = 1;
679 #ifdef HAVE_EVENTFD_SUPPORT
680 if (done_fd >= 0) {
681 u64 tmp = 1;
682 int orig_errno = errno;
683
684 /*
685 * It is possible for this signal handler to run after done is
686 * checked in the main loop, but before the perf counter fds are
687 * polled. If this happens, the poll() will continue to wait
688 * even though done is set, and will only break out if either
689 * another signal is received, or the counters are ready for
690 * read. To ensure the poll() doesn't sleep when done is set,
691 * use an eventfd (done_fd) to wake up the poll().
692 */
693 if (write(done_fd, &tmp, sizeof(tmp)) < 0)
694 pr_err("failed to signal wakeup fd, error: %m\n");
695
696 errno = orig_errno;
697 }
698 #endif // HAVE_EVENTFD_SUPPORT
699 }
700
sigsegv_handler(int sig)701 static void sigsegv_handler(int sig)
702 {
703 perf_hooks__recover();
704 sighandler_dump_stack(sig);
705 }
706
record__sig_exit(void)707 static void record__sig_exit(void)
708 {
709 if (signr == -1)
710 return;
711
712 signal(signr, SIG_DFL);
713 raise(signr);
714 }
715
716 #ifdef HAVE_AUXTRACE_SUPPORT
717
record__process_auxtrace(const struct perf_tool * tool,struct mmap * map,union perf_event * event,void * data1,size_t len1,void * data2,size_t len2)718 static int record__process_auxtrace(const struct perf_tool *tool,
719 struct mmap *map,
720 union perf_event *event, void *data1,
721 size_t len1, void *data2, size_t len2)
722 {
723 struct record *rec = container_of(tool, struct record, tool);
724 struct perf_data *data = &rec->data;
725 size_t padding;
726 u8 pad[8] = {0};
727
728 if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) {
729 off_t file_offset;
730 int fd = perf_data__fd(data);
731 int err;
732
733 file_offset = lseek(fd, 0, SEEK_CUR);
734 if (file_offset == -1)
735 return -1;
736 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
737 event, file_offset);
738 if (err)
739 return err;
740 }
741
742 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
743 padding = (len1 + len2) & 7;
744 if (padding)
745 padding = 8 - padding;
746
747 record__write(rec, map, event, event->header.size);
748 record__write(rec, map, data1, len1);
749 if (len2)
750 record__write(rec, map, data2, len2);
751 record__write(rec, map, &pad, padding);
752
753 return 0;
754 }
755
record__auxtrace_mmap_read(struct record * rec,struct mmap * map)756 static int record__auxtrace_mmap_read(struct record *rec,
757 struct mmap *map)
758 {
759 int ret;
760
761 ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
762 record__process_auxtrace);
763 if (ret < 0)
764 return ret;
765
766 if (ret)
767 rec->samples++;
768
769 return 0;
770 }
771
record__auxtrace_mmap_read_snapshot(struct record * rec,struct mmap * map)772 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
773 struct mmap *map)
774 {
775 int ret;
776
777 ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
778 record__process_auxtrace,
779 rec->opts.auxtrace_snapshot_size);
780 if (ret < 0)
781 return ret;
782
783 if (ret)
784 rec->samples++;
785
786 return 0;
787 }
788
record__auxtrace_read_snapshot_all(struct record * rec)789 static int record__auxtrace_read_snapshot_all(struct record *rec)
790 {
791 int i;
792 int rc = 0;
793
794 for (i = 0; i < rec->evlist->core.nr_mmaps; i++) {
795 struct mmap *map = &rec->evlist->mmap[i];
796
797 if (!map->auxtrace_mmap.base)
798 continue;
799
800 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
801 rc = -1;
802 goto out;
803 }
804 }
805 out:
806 return rc;
807 }
808
record__read_auxtrace_snapshot(struct record * rec,bool on_exit)809 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit)
810 {
811 pr_debug("Recording AUX area tracing snapshot\n");
812 if (record__auxtrace_read_snapshot_all(rec) < 0) {
813 trigger_error(&auxtrace_snapshot_trigger);
814 } else {
815 if (auxtrace_record__snapshot_finish(rec->itr, on_exit))
816 trigger_error(&auxtrace_snapshot_trigger);
817 else
818 trigger_ready(&auxtrace_snapshot_trigger);
819 }
820 }
821
record__auxtrace_snapshot_exit(struct record * rec)822 static int record__auxtrace_snapshot_exit(struct record *rec)
823 {
824 if (trigger_is_error(&auxtrace_snapshot_trigger))
825 return 0;
826
827 if (!auxtrace_record__snapshot_started &&
828 auxtrace_record__snapshot_start(rec->itr))
829 return -1;
830
831 record__read_auxtrace_snapshot(rec, true);
832 if (trigger_is_error(&auxtrace_snapshot_trigger))
833 return -1;
834
835 return 0;
836 }
837
record__auxtrace_init(struct record * rec)838 static int record__auxtrace_init(struct record *rec)
839 {
840 int err;
841
842 if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts)
843 && record__threads_enabled(rec)) {
844 pr_err("AUX area tracing options are not available in parallel streaming mode.\n");
845 return -EINVAL;
846 }
847
848 if (!rec->itr) {
849 rec->itr = auxtrace_record__init(rec->evlist, &err);
850 if (err)
851 return err;
852 }
853
854 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
855 rec->opts.auxtrace_snapshot_opts);
856 if (err)
857 return err;
858
859 err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts,
860 rec->opts.auxtrace_sample_opts);
861 if (err)
862 return err;
863
864 err = auxtrace_parse_aux_action(rec->evlist);
865 if (err)
866 return err;
867
868 return auxtrace_parse_filters(rec->evlist);
869 }
870
871 #else
872
873 static inline
record__auxtrace_mmap_read(struct record * rec __maybe_unused,struct mmap * map __maybe_unused)874 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
875 struct mmap *map __maybe_unused)
876 {
877 return 0;
878 }
879
880 static inline
record__read_auxtrace_snapshot(struct record * rec __maybe_unused,bool on_exit __maybe_unused)881 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused,
882 bool on_exit __maybe_unused)
883 {
884 }
885
886 static inline
auxtrace_record__snapshot_start(struct auxtrace_record * itr __maybe_unused)887 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
888 {
889 return 0;
890 }
891
892 static inline
record__auxtrace_snapshot_exit(struct record * rec __maybe_unused)893 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused)
894 {
895 return 0;
896 }
897
record__auxtrace_init(struct record * rec __maybe_unused)898 static int record__auxtrace_init(struct record *rec __maybe_unused)
899 {
900 return 0;
901 }
902
903 #endif
904
record__config_text_poke(struct evlist * evlist)905 static int record__config_text_poke(struct evlist *evlist)
906 {
907 struct evsel *evsel;
908
909 /* Nothing to do if text poke is already configured */
910 evlist__for_each_entry(evlist, evsel) {
911 if (evsel->core.attr.text_poke)
912 return 0;
913 }
914
915 evsel = evlist__add_dummy_on_all_cpus(evlist);
916 if (!evsel)
917 return -ENOMEM;
918
919 evsel->core.attr.text_poke = 1;
920 evsel->core.attr.ksymbol = 1;
921 evsel->immediate = true;
922 evsel__set_sample_bit(evsel, TIME);
923
924 return 0;
925 }
926
record__config_off_cpu(struct record * rec)927 static int record__config_off_cpu(struct record *rec)
928 {
929 return off_cpu_prepare(rec->evlist, &rec->opts.target, &rec->opts);
930 }
931
record__tracking_system_wide(struct record * rec)932 static bool record__tracking_system_wide(struct record *rec)
933 {
934 struct evlist *evlist = rec->evlist;
935 struct evsel *evsel;
936
937 /*
938 * If non-dummy evsel exists, system_wide sideband is need to
939 * help parse sample information.
940 * For example, PERF_EVENT_MMAP event to help parse symbol,
941 * and PERF_EVENT_COMM event to help parse task executable name.
942 */
943 evlist__for_each_entry(evlist, evsel) {
944 if (!evsel__is_dummy_event(evsel))
945 return true;
946 }
947
948 return false;
949 }
950
record__config_tracking_events(struct record * rec)951 static int record__config_tracking_events(struct record *rec)
952 {
953 struct record_opts *opts = &rec->opts;
954 struct evlist *evlist = rec->evlist;
955 bool system_wide = false;
956 struct evsel *evsel;
957
958 /*
959 * For initial_delay, system wide or a hybrid system, we need to add
960 * tracking event so that we can track PERF_RECORD_MMAP to cover the
961 * delay of waiting or event synthesis.
962 */
963 if (opts->target.initial_delay || target__has_cpu(&opts->target) ||
964 perf_pmus__num_core_pmus() > 1) {
965
966 /*
967 * User space tasks can migrate between CPUs, so when tracing
968 * selected CPUs, sideband for all CPUs is still needed.
969 */
970 if (!!opts->target.cpu_list && record__tracking_system_wide(rec))
971 system_wide = true;
972
973 evsel = evlist__findnew_tracking_event(evlist, system_wide);
974 if (!evsel)
975 return -ENOMEM;
976
977 /*
978 * Enable the tracking event when the process is forked for
979 * initial_delay, immediately for system wide.
980 */
981 if (opts->target.initial_delay && !evsel->immediate &&
982 !target__has_cpu(&opts->target))
983 evsel->core.attr.enable_on_exec = 1;
984 else
985 evsel->immediate = 1;
986 }
987
988 return 0;
989 }
990
record__kcore_readable(struct machine * machine)991 static bool record__kcore_readable(struct machine *machine)
992 {
993 char kcore[PATH_MAX];
994 int fd;
995
996 scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir);
997
998 fd = open(kcore, O_RDONLY);
999 if (fd < 0)
1000 return false;
1001
1002 close(fd);
1003
1004 return true;
1005 }
1006
record__kcore_copy(struct machine * machine,struct perf_data * data)1007 static int record__kcore_copy(struct machine *machine, struct perf_data *data)
1008 {
1009 char from_dir[PATH_MAX];
1010 char kcore_dir[PATH_MAX];
1011 int ret;
1012
1013 snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir);
1014
1015 ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir));
1016 if (ret)
1017 return ret;
1018
1019 return kcore_copy(from_dir, kcore_dir);
1020 }
1021
record__thread_data_init_pipes(struct record_thread * thread_data)1022 static void record__thread_data_init_pipes(struct record_thread *thread_data)
1023 {
1024 thread_data->pipes.msg[0] = -1;
1025 thread_data->pipes.msg[1] = -1;
1026 thread_data->pipes.ack[0] = -1;
1027 thread_data->pipes.ack[1] = -1;
1028 }
1029
record__thread_data_open_pipes(struct record_thread * thread_data)1030 static int record__thread_data_open_pipes(struct record_thread *thread_data)
1031 {
1032 if (pipe(thread_data->pipes.msg))
1033 return -EINVAL;
1034
1035 if (pipe(thread_data->pipes.ack)) {
1036 close(thread_data->pipes.msg[0]);
1037 thread_data->pipes.msg[0] = -1;
1038 close(thread_data->pipes.msg[1]);
1039 thread_data->pipes.msg[1] = -1;
1040 return -EINVAL;
1041 }
1042
1043 pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data,
1044 thread_data->pipes.msg[0], thread_data->pipes.msg[1],
1045 thread_data->pipes.ack[0], thread_data->pipes.ack[1]);
1046
1047 return 0;
1048 }
1049
record__thread_data_close_pipes(struct record_thread * thread_data)1050 static void record__thread_data_close_pipes(struct record_thread *thread_data)
1051 {
1052 if (thread_data->pipes.msg[0] != -1) {
1053 close(thread_data->pipes.msg[0]);
1054 thread_data->pipes.msg[0] = -1;
1055 }
1056 if (thread_data->pipes.msg[1] != -1) {
1057 close(thread_data->pipes.msg[1]);
1058 thread_data->pipes.msg[1] = -1;
1059 }
1060 if (thread_data->pipes.ack[0] != -1) {
1061 close(thread_data->pipes.ack[0]);
1062 thread_data->pipes.ack[0] = -1;
1063 }
1064 if (thread_data->pipes.ack[1] != -1) {
1065 close(thread_data->pipes.ack[1]);
1066 thread_data->pipes.ack[1] = -1;
1067 }
1068 }
1069
evlist__per_thread(struct evlist * evlist)1070 static bool evlist__per_thread(struct evlist *evlist)
1071 {
1072 return cpu_map__is_dummy(evlist->core.user_requested_cpus);
1073 }
1074
record__thread_data_init_maps(struct record_thread * thread_data,struct evlist * evlist)1075 static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist)
1076 {
1077 int m, tm, nr_mmaps = evlist->core.nr_mmaps;
1078 struct mmap *mmap = evlist->mmap;
1079 struct mmap *overwrite_mmap = evlist->overwrite_mmap;
1080 struct perf_cpu_map *cpus = evlist->core.all_cpus;
1081 bool per_thread = evlist__per_thread(evlist);
1082
1083 if (per_thread)
1084 thread_data->nr_mmaps = nr_mmaps;
1085 else
1086 thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits,
1087 thread_data->mask->maps.nbits);
1088 if (mmap) {
1089 thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
1090 if (!thread_data->maps)
1091 return -ENOMEM;
1092 }
1093 if (overwrite_mmap) {
1094 thread_data->overwrite_maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
1095 if (!thread_data->overwrite_maps) {
1096 zfree(&thread_data->maps);
1097 return -ENOMEM;
1098 }
1099 }
1100 pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data,
1101 thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps);
1102
1103 for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) {
1104 if (per_thread ||
1105 test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) {
1106 if (thread_data->maps) {
1107 thread_data->maps[tm] = &mmap[m];
1108 pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n",
1109 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
1110 }
1111 if (thread_data->overwrite_maps) {
1112 thread_data->overwrite_maps[tm] = &overwrite_mmap[m];
1113 pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n",
1114 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
1115 }
1116 tm++;
1117 }
1118 }
1119
1120 return 0;
1121 }
1122
record__thread_data_init_pollfd(struct record_thread * thread_data,struct evlist * evlist)1123 static int record__thread_data_init_pollfd(struct record_thread *thread_data, struct evlist *evlist)
1124 {
1125 int f, tm, pos;
1126 struct mmap *map, *overwrite_map;
1127
1128 fdarray__init(&thread_data->pollfd, 64);
1129
1130 for (tm = 0; tm < thread_data->nr_mmaps; tm++) {
1131 map = thread_data->maps ? thread_data->maps[tm] : NULL;
1132 overwrite_map = thread_data->overwrite_maps ?
1133 thread_data->overwrite_maps[tm] : NULL;
1134
1135 for (f = 0; f < evlist->core.pollfd.nr; f++) {
1136 void *ptr = evlist->core.pollfd.priv[f].ptr;
1137
1138 if ((map && ptr == map) || (overwrite_map && ptr == overwrite_map)) {
1139 pos = fdarray__dup_entry_from(&thread_data->pollfd, f,
1140 &evlist->core.pollfd);
1141 if (pos < 0)
1142 return pos;
1143 pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n",
1144 thread_data, pos, evlist->core.pollfd.entries[f].fd);
1145 }
1146 }
1147 }
1148
1149 return 0;
1150 }
1151
record__free_thread_data(struct record * rec)1152 static void record__free_thread_data(struct record *rec)
1153 {
1154 int t;
1155 struct record_thread *thread_data = rec->thread_data;
1156
1157 if (thread_data == NULL)
1158 return;
1159
1160 for (t = 0; t < rec->nr_threads; t++) {
1161 record__thread_data_close_pipes(&thread_data[t]);
1162 zfree(&thread_data[t].maps);
1163 zfree(&thread_data[t].overwrite_maps);
1164 fdarray__exit(&thread_data[t].pollfd);
1165 }
1166
1167 zfree(&rec->thread_data);
1168 }
1169
record__map_thread_evlist_pollfd_indexes(struct record * rec,int evlist_pollfd_index,int thread_pollfd_index)1170 static int record__map_thread_evlist_pollfd_indexes(struct record *rec,
1171 int evlist_pollfd_index,
1172 int thread_pollfd_index)
1173 {
1174 size_t x = rec->index_map_cnt;
1175
1176 if (realloc_array_as_needed(rec->index_map, rec->index_map_sz, x, NULL))
1177 return -ENOMEM;
1178 rec->index_map[x].evlist_pollfd_index = evlist_pollfd_index;
1179 rec->index_map[x].thread_pollfd_index = thread_pollfd_index;
1180 rec->index_map_cnt += 1;
1181 return 0;
1182 }
1183
record__update_evlist_pollfd_from_thread(struct record * rec,struct evlist * evlist,struct record_thread * thread_data)1184 static int record__update_evlist_pollfd_from_thread(struct record *rec,
1185 struct evlist *evlist,
1186 struct record_thread *thread_data)
1187 {
1188 struct pollfd *e_entries = evlist->core.pollfd.entries;
1189 struct pollfd *t_entries = thread_data->pollfd.entries;
1190 int err = 0;
1191 size_t i;
1192
1193 for (i = 0; i < rec->index_map_cnt; i++) {
1194 int e_pos = rec->index_map[i].evlist_pollfd_index;
1195 int t_pos = rec->index_map[i].thread_pollfd_index;
1196
1197 if (e_entries[e_pos].fd != t_entries[t_pos].fd ||
1198 e_entries[e_pos].events != t_entries[t_pos].events) {
1199 pr_err("Thread and evlist pollfd index mismatch\n");
1200 err = -EINVAL;
1201 continue;
1202 }
1203 e_entries[e_pos].revents = t_entries[t_pos].revents;
1204 }
1205 return err;
1206 }
1207
record__dup_non_perf_events(struct record * rec,struct evlist * evlist,struct record_thread * thread_data)1208 static int record__dup_non_perf_events(struct record *rec,
1209 struct evlist *evlist,
1210 struct record_thread *thread_data)
1211 {
1212 struct fdarray *fda = &evlist->core.pollfd;
1213 int i, ret;
1214
1215 for (i = 0; i < fda->nr; i++) {
1216 if (!(fda->priv[i].flags & fdarray_flag__non_perf_event))
1217 continue;
1218 ret = fdarray__dup_entry_from(&thread_data->pollfd, i, fda);
1219 if (ret < 0) {
1220 pr_err("Failed to duplicate descriptor in main thread pollfd\n");
1221 return ret;
1222 }
1223 pr_debug2("thread_data[%p]: pollfd[%d] <- non_perf_event fd=%d\n",
1224 thread_data, ret, fda->entries[i].fd);
1225 ret = record__map_thread_evlist_pollfd_indexes(rec, i, ret);
1226 if (ret < 0) {
1227 pr_err("Failed to map thread and evlist pollfd indexes\n");
1228 return ret;
1229 }
1230 }
1231 return 0;
1232 }
1233
record__alloc_thread_data(struct record * rec,struct evlist * evlist)1234 static int record__alloc_thread_data(struct record *rec, struct evlist *evlist)
1235 {
1236 int t, ret;
1237 struct record_thread *thread_data;
1238
1239 rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data)));
1240 if (!rec->thread_data) {
1241 pr_err("Failed to allocate thread data\n");
1242 return -ENOMEM;
1243 }
1244 thread_data = rec->thread_data;
1245
1246 for (t = 0; t < rec->nr_threads; t++)
1247 record__thread_data_init_pipes(&thread_data[t]);
1248
1249 for (t = 0; t < rec->nr_threads; t++) {
1250 thread_data[t].rec = rec;
1251 thread_data[t].mask = &rec->thread_masks[t];
1252 ret = record__thread_data_init_maps(&thread_data[t], evlist);
1253 if (ret) {
1254 pr_err("Failed to initialize thread[%d] maps\n", t);
1255 goto out_free;
1256 }
1257 ret = record__thread_data_init_pollfd(&thread_data[t], evlist);
1258 if (ret) {
1259 pr_err("Failed to initialize thread[%d] pollfd\n", t);
1260 goto out_free;
1261 }
1262 if (t) {
1263 thread_data[t].tid = -1;
1264 ret = record__thread_data_open_pipes(&thread_data[t]);
1265 if (ret) {
1266 pr_err("Failed to open thread[%d] communication pipes\n", t);
1267 goto out_free;
1268 }
1269 ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0],
1270 POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable);
1271 if (ret < 0) {
1272 pr_err("Failed to add descriptor to thread[%d] pollfd\n", t);
1273 goto out_free;
1274 }
1275 thread_data[t].ctlfd_pos = ret;
1276 pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n",
1277 thread_data, thread_data[t].ctlfd_pos,
1278 thread_data[t].pipes.msg[0]);
1279 } else {
1280 thread_data[t].tid = gettid();
1281
1282 ret = record__dup_non_perf_events(rec, evlist, &thread_data[t]);
1283 if (ret < 0)
1284 goto out_free;
1285
1286 thread_data[t].ctlfd_pos = -1; /* Not used */
1287 }
1288 }
1289
1290 return 0;
1291
1292 out_free:
1293 record__free_thread_data(rec);
1294
1295 return ret;
1296 }
1297
record__mmap_evlist(struct record * rec,struct evlist * evlist)1298 static int record__mmap_evlist(struct record *rec,
1299 struct evlist *evlist)
1300 {
1301 int i, ret;
1302 struct record_opts *opts = &rec->opts;
1303 bool auxtrace_overwrite = opts->auxtrace_snapshot_mode ||
1304 opts->auxtrace_sample_mode;
1305 char msg[512];
1306
1307 if (opts->affinity != PERF_AFFINITY_SYS)
1308 cpu__setup_cpunode_map();
1309
1310 if (evlist__mmap_ex(evlist, opts->mmap_pages,
1311 opts->auxtrace_mmap_pages,
1312 auxtrace_overwrite,
1313 opts->nr_cblocks, opts->affinity,
1314 opts->mmap_flush, opts->comp_level) < 0) {
1315 if (errno == EPERM) {
1316 pr_err("Permission error mapping pages.\n"
1317 "Consider increasing "
1318 "/proc/sys/kernel/perf_event_mlock_kb,\n"
1319 "or try again with a smaller value of -m/--mmap_pages.\n"
1320 "(current value: %u,%u)\n",
1321 opts->mmap_pages, opts->auxtrace_mmap_pages);
1322 return -errno;
1323 } else {
1324 pr_err("failed to mmap with %d (%s)\n", errno,
1325 str_error_r(errno, msg, sizeof(msg)));
1326 if (errno)
1327 return -errno;
1328 else
1329 return -EINVAL;
1330 }
1331 }
1332
1333 if (evlist__initialize_ctlfd(evlist, opts->ctl_fd, opts->ctl_fd_ack))
1334 return -1;
1335
1336 ret = record__alloc_thread_data(rec, evlist);
1337 if (ret)
1338 return ret;
1339
1340 if (record__threads_enabled(rec)) {
1341 ret = perf_data__create_dir(&rec->data, evlist->core.nr_mmaps);
1342 if (ret) {
1343 pr_err("Failed to create data directory: %s\n", strerror(-ret));
1344 return ret;
1345 }
1346 for (i = 0; i < evlist->core.nr_mmaps; i++) {
1347 if (evlist->mmap)
1348 evlist->mmap[i].file = &rec->data.dir.files[i];
1349 if (evlist->overwrite_mmap)
1350 evlist->overwrite_mmap[i].file = &rec->data.dir.files[i];
1351 }
1352 }
1353
1354 return 0;
1355 }
1356
record__mmap(struct record * rec)1357 static int record__mmap(struct record *rec)
1358 {
1359 return record__mmap_evlist(rec, rec->evlist);
1360 }
1361
record__open(struct record * rec)1362 static int record__open(struct record *rec)
1363 {
1364 char msg[BUFSIZ];
1365 struct evsel *pos;
1366 struct evlist *evlist = rec->evlist;
1367 struct perf_session *session = rec->session;
1368 struct record_opts *opts = &rec->opts;
1369 int rc = 0;
1370
1371 evlist__for_each_entry(evlist, pos) {
1372 try_again:
1373 if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
1374 if (evsel__fallback(pos, &opts->target, errno, msg, sizeof(msg))) {
1375 if (verbose > 0)
1376 ui__warning("%s\n", msg);
1377 goto try_again;
1378 }
1379 if ((errno == EINVAL || errno == EBADF) &&
1380 pos->core.leader != &pos->core &&
1381 pos->weak_group) {
1382 pos = evlist__reset_weak_group(evlist, pos, true);
1383 goto try_again;
1384 }
1385 rc = -errno;
1386 evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg));
1387 ui__error("%s\n", msg);
1388 goto out;
1389 }
1390
1391 pos->supported = true;
1392 }
1393
1394 if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) {
1395 pr_warning(
1396 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1397 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
1398 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1399 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1400 "Samples in kernel modules won't be resolved at all.\n\n"
1401 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1402 "even with a suitable vmlinux or kallsyms file.\n\n");
1403 }
1404
1405 if (evlist__apply_filters(evlist, &pos, &opts->target)) {
1406 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
1407 pos->filter ?: "BPF", evsel__name(pos), errno,
1408 str_error_r(errno, msg, sizeof(msg)));
1409 rc = -1;
1410 goto out;
1411 }
1412
1413 rc = record__mmap(rec);
1414 if (rc)
1415 goto out;
1416
1417 session->evlist = evlist;
1418 perf_session__set_id_hdr_size(session);
1419 out:
1420 return rc;
1421 }
1422
set_timestamp_boundary(struct record * rec,u64 sample_time)1423 static void set_timestamp_boundary(struct record *rec, u64 sample_time)
1424 {
1425 if (rec->evlist->first_sample_time == 0)
1426 rec->evlist->first_sample_time = sample_time;
1427
1428 if (sample_time)
1429 rec->evlist->last_sample_time = sample_time;
1430 }
1431
process_sample_event(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct evsel * evsel,struct machine * machine)1432 static int process_sample_event(const struct perf_tool *tool,
1433 union perf_event *event,
1434 struct perf_sample *sample,
1435 struct evsel *evsel,
1436 struct machine *machine)
1437 {
1438 struct record *rec = container_of(tool, struct record, tool);
1439
1440 set_timestamp_boundary(rec, sample->time);
1441
1442 if (rec->buildid_all)
1443 return 0;
1444
1445 rec->samples++;
1446 return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
1447 }
1448
process_buildids(struct record * rec)1449 static int process_buildids(struct record *rec)
1450 {
1451 struct perf_session *session = rec->session;
1452
1453 if (perf_data__size(&rec->data) == 0)
1454 return 0;
1455
1456 /*
1457 * During this process, it'll load kernel map and replace the
1458 * dso->long_name to a real pathname it found. In this case
1459 * we prefer the vmlinux path like
1460 * /lib/modules/3.16.4/build/vmlinux
1461 *
1462 * rather than build-id path (in debug directory).
1463 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
1464 */
1465 symbol_conf.ignore_vmlinux_buildid = true;
1466
1467 /*
1468 * If --buildid-all is given, it marks all DSO regardless of hits,
1469 * so no need to process samples. But if timestamp_boundary is enabled,
1470 * it still needs to walk on all samples to get the timestamps of
1471 * first/last samples.
1472 */
1473 if (rec->buildid_all && !rec->timestamp_boundary)
1474 rec->tool.sample = process_event_sample_stub;
1475
1476 return perf_session__process_events(session);
1477 }
1478
perf_event__synthesize_guest_os(struct machine * machine,void * data)1479 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
1480 {
1481 int err;
1482 struct perf_tool *tool = data;
1483 /*
1484 *As for guest kernel when processing subcommand record&report,
1485 *we arrange module mmap prior to guest kernel mmap and trigger
1486 *a preload dso because default guest module symbols are loaded
1487 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
1488 *method is used to avoid symbol missing when the first addr is
1489 *in module instead of in guest kernel.
1490 */
1491 err = perf_event__synthesize_modules(tool, process_synthesized_event,
1492 machine);
1493 if (err < 0)
1494 pr_err("Couldn't record guest kernel [%d]'s reference"
1495 " relocation symbol.\n", machine->pid);
1496
1497 /*
1498 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
1499 * have no _text sometimes.
1500 */
1501 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1502 machine);
1503 if (err < 0)
1504 pr_err("Couldn't record guest kernel [%d]'s reference"
1505 " relocation symbol.\n", machine->pid);
1506 }
1507
1508 static struct perf_event_header finished_round_event = {
1509 .size = sizeof(struct perf_event_header),
1510 .type = PERF_RECORD_FINISHED_ROUND,
1511 };
1512
1513 static struct perf_event_header finished_init_event = {
1514 .size = sizeof(struct perf_event_header),
1515 .type = PERF_RECORD_FINISHED_INIT,
1516 };
1517
record__adjust_affinity(struct record * rec,struct mmap * map)1518 static void record__adjust_affinity(struct record *rec, struct mmap *map)
1519 {
1520 if (rec->opts.affinity != PERF_AFFINITY_SYS &&
1521 !bitmap_equal(thread->mask->affinity.bits, map->affinity_mask.bits,
1522 thread->mask->affinity.nbits)) {
1523 bitmap_zero(thread->mask->affinity.bits, thread->mask->affinity.nbits);
1524 bitmap_or(thread->mask->affinity.bits, thread->mask->affinity.bits,
1525 map->affinity_mask.bits, thread->mask->affinity.nbits);
1526 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
1527 (cpu_set_t *)thread->mask->affinity.bits);
1528 if (verbose == 2) {
1529 pr_debug("threads[%d]: running on cpu%d: ", thread->tid, sched_getcpu());
1530 mmap_cpu_mask__scnprintf(&thread->mask->affinity, "affinity");
1531 }
1532 }
1533 }
1534
process_comp_header(void * record,size_t increment)1535 static size_t process_comp_header(void *record, size_t increment)
1536 {
1537 struct perf_record_compressed *event = record;
1538 size_t size = sizeof(*event);
1539
1540 if (increment) {
1541 event->header.size += increment;
1542 return increment;
1543 }
1544
1545 event->header.type = PERF_RECORD_COMPRESSED;
1546 event->header.size = size;
1547
1548 return size;
1549 }
1550
zstd_compress(struct perf_session * session,struct mmap * map,void * dst,size_t dst_size,void * src,size_t src_size)1551 static ssize_t zstd_compress(struct perf_session *session, struct mmap *map,
1552 void *dst, size_t dst_size, void *src, size_t src_size)
1553 {
1554 ssize_t compressed;
1555 size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1;
1556 struct zstd_data *zstd_data = &session->zstd_data;
1557
1558 if (map && map->file)
1559 zstd_data = &map->zstd_data;
1560
1561 compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size,
1562 max_record_size, process_comp_header);
1563 if (compressed < 0)
1564 return compressed;
1565
1566 if (map && map->file) {
1567 thread->bytes_transferred += src_size;
1568 thread->bytes_compressed += compressed;
1569 } else {
1570 session->bytes_transferred += src_size;
1571 session->bytes_compressed += compressed;
1572 }
1573
1574 return compressed;
1575 }
1576
record__mmap_read_evlist(struct record * rec,struct evlist * evlist,bool overwrite,bool synch)1577 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
1578 bool overwrite, bool synch)
1579 {
1580 u64 bytes_written = rec->bytes_written;
1581 int i;
1582 int rc = 0;
1583 int nr_mmaps;
1584 struct mmap **maps;
1585 int trace_fd = rec->data.file.fd;
1586 off_t off = 0;
1587
1588 if (!evlist)
1589 return 0;
1590
1591 nr_mmaps = thread->nr_mmaps;
1592 maps = overwrite ? thread->overwrite_maps : thread->maps;
1593
1594 if (!maps)
1595 return 0;
1596
1597 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
1598 return 0;
1599
1600 if (record__aio_enabled(rec))
1601 off = record__aio_get_pos(trace_fd);
1602
1603 for (i = 0; i < nr_mmaps; i++) {
1604 u64 flush = 0;
1605 struct mmap *map = maps[i];
1606
1607 if (map->core.base) {
1608 record__adjust_affinity(rec, map);
1609 if (synch) {
1610 flush = map->core.flush;
1611 map->core.flush = 1;
1612 }
1613 if (!record__aio_enabled(rec)) {
1614 if (perf_mmap__push(map, rec, record__pushfn) < 0) {
1615 if (synch)
1616 map->core.flush = flush;
1617 rc = -1;
1618 goto out;
1619 }
1620 } else {
1621 if (record__aio_push(rec, map, &off) < 0) {
1622 record__aio_set_pos(trace_fd, off);
1623 if (synch)
1624 map->core.flush = flush;
1625 rc = -1;
1626 goto out;
1627 }
1628 }
1629 if (synch)
1630 map->core.flush = flush;
1631 }
1632
1633 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
1634 !rec->opts.auxtrace_sample_mode &&
1635 record__auxtrace_mmap_read(rec, map) != 0) {
1636 rc = -1;
1637 goto out;
1638 }
1639 }
1640
1641 if (record__aio_enabled(rec))
1642 record__aio_set_pos(trace_fd, off);
1643
1644 /*
1645 * Mark the round finished in case we wrote
1646 * at least one event.
1647 *
1648 * No need for round events in directory mode,
1649 * because per-cpu maps and files have data
1650 * sorted by kernel.
1651 */
1652 if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written)
1653 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
1654
1655 if (overwrite)
1656 evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
1657 out:
1658 return rc;
1659 }
1660
record__mmap_read_all(struct record * rec,bool synch)1661 static int record__mmap_read_all(struct record *rec, bool synch)
1662 {
1663 int err;
1664
1665 err = record__mmap_read_evlist(rec, rec->evlist, false, synch);
1666 if (err)
1667 return err;
1668
1669 return record__mmap_read_evlist(rec, rec->evlist, true, synch);
1670 }
1671
record__thread_munmap_filtered(struct fdarray * fda,int fd,void * arg __maybe_unused)1672 static void record__thread_munmap_filtered(struct fdarray *fda, int fd,
1673 void *arg __maybe_unused)
1674 {
1675 struct perf_mmap *map = fda->priv[fd].ptr;
1676
1677 if (map)
1678 perf_mmap__put(map);
1679 }
1680
record__thread(void * arg)1681 static void *record__thread(void *arg)
1682 {
1683 enum thread_msg msg = THREAD_MSG__READY;
1684 bool terminate = false;
1685 struct fdarray *pollfd;
1686 int err, ctlfd_pos;
1687
1688 thread = arg;
1689 thread->tid = gettid();
1690
1691 err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1692 if (err == -1)
1693 pr_warning("threads[%d]: failed to notify on start: %s\n",
1694 thread->tid, strerror(errno));
1695
1696 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
1697
1698 pollfd = &thread->pollfd;
1699 ctlfd_pos = thread->ctlfd_pos;
1700
1701 for (;;) {
1702 unsigned long long hits = thread->samples;
1703
1704 if (record__mmap_read_all(thread->rec, false) < 0 || terminate)
1705 break;
1706
1707 if (hits == thread->samples) {
1708
1709 err = fdarray__poll(pollfd, -1);
1710 /*
1711 * Propagate error, only if there's any. Ignore positive
1712 * number of returned events and interrupt error.
1713 */
1714 if (err > 0 || (err < 0 && errno == EINTR))
1715 err = 0;
1716 thread->waking++;
1717
1718 if (fdarray__filter(pollfd, POLLERR | POLLHUP,
1719 record__thread_munmap_filtered, NULL) == 0)
1720 break;
1721 }
1722
1723 if (pollfd->entries[ctlfd_pos].revents & POLLHUP) {
1724 terminate = true;
1725 close(thread->pipes.msg[0]);
1726 thread->pipes.msg[0] = -1;
1727 pollfd->entries[ctlfd_pos].fd = -1;
1728 pollfd->entries[ctlfd_pos].events = 0;
1729 }
1730
1731 pollfd->entries[ctlfd_pos].revents = 0;
1732 }
1733 record__mmap_read_all(thread->rec, true);
1734
1735 err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1736 if (err == -1)
1737 pr_warning("threads[%d]: failed to notify on termination: %s\n",
1738 thread->tid, strerror(errno));
1739
1740 return NULL;
1741 }
1742
record__init_features(struct record * rec)1743 static void record__init_features(struct record *rec)
1744 {
1745 struct perf_session *session = rec->session;
1746 int feat;
1747
1748 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
1749 perf_header__set_feat(&session->header, feat);
1750
1751 if (rec->no_buildid)
1752 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
1753
1754 if (!have_tracepoints(&rec->evlist->core.entries))
1755 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
1756
1757 if (!rec->opts.branch_stack)
1758 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
1759
1760 if (!rec->opts.full_auxtrace)
1761 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
1762
1763 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
1764 perf_header__clear_feat(&session->header, HEADER_CLOCKID);
1765
1766 if (!rec->opts.use_clockid)
1767 perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA);
1768
1769 if (!record__threads_enabled(rec))
1770 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
1771
1772 if (!record__comp_enabled(rec))
1773 perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
1774
1775 perf_header__clear_feat(&session->header, HEADER_STAT);
1776 }
1777
1778 static void
record__finish_output(struct record * rec)1779 record__finish_output(struct record *rec)
1780 {
1781 int i;
1782 struct perf_data *data = &rec->data;
1783 int fd = perf_data__fd(data);
1784
1785 if (data->is_pipe) {
1786 /* Just to display approx. size */
1787 data->file.size = rec->bytes_written;
1788 return;
1789 }
1790
1791 rec->session->header.data_size += rec->bytes_written;
1792 data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
1793 if (record__threads_enabled(rec)) {
1794 for (i = 0; i < data->dir.nr; i++)
1795 data->dir.files[i].size = lseek(data->dir.files[i].fd, 0, SEEK_CUR);
1796 }
1797
1798 if (!rec->no_buildid) {
1799 process_buildids(rec);
1800
1801 if (rec->buildid_all)
1802 perf_session__dsos_hit_all(rec->session);
1803 }
1804 perf_session__write_header(rec->session, rec->evlist, fd, true);
1805
1806 return;
1807 }
1808
record__synthesize_workload(struct record * rec,bool tail)1809 static int record__synthesize_workload(struct record *rec, bool tail)
1810 {
1811 int err;
1812 struct perf_thread_map *thread_map;
1813 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
1814
1815 if (rec->opts.tail_synthesize != tail)
1816 return 0;
1817
1818 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
1819 if (thread_map == NULL)
1820 return -1;
1821
1822 err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
1823 process_synthesized_event,
1824 &rec->session->machines.host,
1825 needs_mmap,
1826 rec->opts.sample_address);
1827 perf_thread_map__put(thread_map);
1828 return err;
1829 }
1830
write_finished_init(struct record * rec,bool tail)1831 static int write_finished_init(struct record *rec, bool tail)
1832 {
1833 if (rec->opts.tail_synthesize != tail)
1834 return 0;
1835
1836 return record__write(rec, NULL, &finished_init_event, sizeof(finished_init_event));
1837 }
1838
1839 static int record__synthesize(struct record *rec, bool tail);
1840
1841 static int
record__switch_output(struct record * rec,bool at_exit)1842 record__switch_output(struct record *rec, bool at_exit)
1843 {
1844 struct perf_data *data = &rec->data;
1845 char *new_filename = NULL;
1846 int fd, err;
1847
1848 /* Same Size: "2015122520103046"*/
1849 char timestamp[] = "InvalidTimestamp";
1850
1851 record__aio_mmap_read_sync(rec);
1852
1853 write_finished_init(rec, true);
1854
1855 record__synthesize(rec, true);
1856 if (target__none(&rec->opts.target))
1857 record__synthesize_workload(rec, true);
1858
1859 rec->samples = 0;
1860 record__finish_output(rec);
1861 err = fetch_current_timestamp(timestamp, sizeof(timestamp));
1862 if (err) {
1863 pr_err("Failed to get current timestamp\n");
1864 return -EINVAL;
1865 }
1866
1867 fd = perf_data__switch(data, timestamp,
1868 rec->session->header.data_offset,
1869 at_exit, &new_filename);
1870 if (fd >= 0 && !at_exit) {
1871 rec->bytes_written = 0;
1872 rec->session->header.data_size = 0;
1873 }
1874
1875 if (!quiet) {
1876 fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
1877 data->path, timestamp);
1878 }
1879
1880 if (rec->switch_output.num_files) {
1881 int n = rec->switch_output.cur_file + 1;
1882
1883 if (n >= rec->switch_output.num_files)
1884 n = 0;
1885 rec->switch_output.cur_file = n;
1886 if (rec->switch_output.filenames[n]) {
1887 remove(rec->switch_output.filenames[n]);
1888 zfree(&rec->switch_output.filenames[n]);
1889 }
1890 rec->switch_output.filenames[n] = new_filename;
1891 } else {
1892 free(new_filename);
1893 }
1894
1895 /* Output tracking events */
1896 if (!at_exit) {
1897 record__synthesize(rec, false);
1898
1899 /*
1900 * In 'perf record --switch-output' without -a,
1901 * record__synthesize() in record__switch_output() won't
1902 * generate tracking events because there's no thread_map
1903 * in evlist. Which causes newly created perf.data doesn't
1904 * contain map and comm information.
1905 * Create a fake thread_map and directly call
1906 * perf_event__synthesize_thread_map() for those events.
1907 */
1908 if (target__none(&rec->opts.target))
1909 record__synthesize_workload(rec, false);
1910 write_finished_init(rec, false);
1911 }
1912 return fd;
1913 }
1914
__record__save_lost_samples(struct record * rec,struct evsel * evsel,struct perf_record_lost_samples * lost,int cpu_idx,int thread_idx,u64 lost_count,u16 misc_flag)1915 static void __record__save_lost_samples(struct record *rec, struct evsel *evsel,
1916 struct perf_record_lost_samples *lost,
1917 int cpu_idx, int thread_idx, u64 lost_count,
1918 u16 misc_flag)
1919 {
1920 struct perf_sample_id *sid;
1921 struct perf_sample sample;
1922 int id_hdr_size;
1923
1924 perf_sample__init(&sample, /*all=*/true);
1925 lost->lost = lost_count;
1926 if (evsel->core.ids) {
1927 sid = xyarray__entry(evsel->core.sample_id, cpu_idx, thread_idx);
1928 sample.id = sid->id;
1929 }
1930
1931 id_hdr_size = perf_event__synthesize_id_sample((void *)(lost + 1),
1932 evsel->core.attr.sample_type, &sample);
1933 lost->header.size = sizeof(*lost) + id_hdr_size;
1934 lost->header.misc = misc_flag;
1935 record__write(rec, NULL, lost, lost->header.size);
1936 perf_sample__exit(&sample);
1937 }
1938
record__read_lost_samples(struct record * rec)1939 static void record__read_lost_samples(struct record *rec)
1940 {
1941 struct perf_session *session = rec->session;
1942 struct perf_record_lost_samples_and_ids lost;
1943 struct evsel *evsel;
1944
1945 /* there was an error during record__open */
1946 if (session->evlist == NULL)
1947 return;
1948
1949 evlist__for_each_entry(session->evlist, evsel) {
1950 struct xyarray *xy = evsel->core.sample_id;
1951 u64 lost_count;
1952
1953 if (xy == NULL || evsel->core.fd == NULL)
1954 continue;
1955 if (xyarray__max_x(evsel->core.fd) != xyarray__max_x(xy) ||
1956 xyarray__max_y(evsel->core.fd) != xyarray__max_y(xy)) {
1957 pr_debug("Unmatched FD vs. sample ID: skip reading LOST count\n");
1958 continue;
1959 }
1960
1961 for (int x = 0; x < xyarray__max_x(xy); x++) {
1962 for (int y = 0; y < xyarray__max_y(xy); y++) {
1963 struct perf_counts_values count;
1964
1965 if (perf_evsel__read(&evsel->core, x, y, &count) < 0) {
1966 pr_debug("read LOST count failed\n");
1967 return;
1968 }
1969
1970 if (count.lost) {
1971 memset(&lost, 0, sizeof(lost));
1972 lost.lost.header.type = PERF_RECORD_LOST_SAMPLES;
1973 __record__save_lost_samples(rec, evsel, &lost.lost,
1974 x, y, count.lost, 0);
1975 }
1976 }
1977 }
1978
1979 lost_count = perf_bpf_filter__lost_count(evsel);
1980 if (lost_count) {
1981 memset(&lost, 0, sizeof(lost));
1982 lost.lost.header.type = PERF_RECORD_LOST_SAMPLES;
1983 __record__save_lost_samples(rec, evsel, &lost.lost, 0, 0, lost_count,
1984 PERF_RECORD_MISC_LOST_SAMPLES_BPF);
1985 }
1986 }
1987 }
1988
1989 static volatile sig_atomic_t workload_exec_errno;
1990
1991 /*
1992 * evlist__prepare_workload will send a SIGUSR1
1993 * if the fork fails, since we asked by setting its
1994 * want_signal to true.
1995 */
workload_exec_failed_signal(int signo __maybe_unused,siginfo_t * info,void * ucontext __maybe_unused)1996 static void workload_exec_failed_signal(int signo __maybe_unused,
1997 siginfo_t *info,
1998 void *ucontext __maybe_unused)
1999 {
2000 workload_exec_errno = info->si_value.sival_int;
2001 done = 1;
2002 child_finished = 1;
2003 }
2004
2005 static void snapshot_sig_handler(int sig);
2006 static void alarm_sig_handler(int sig);
2007
evlist__pick_pc(struct evlist * evlist)2008 static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist)
2009 {
2010 if (evlist) {
2011 if (evlist->mmap && evlist->mmap[0].core.base)
2012 return evlist->mmap[0].core.base;
2013 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base)
2014 return evlist->overwrite_mmap[0].core.base;
2015 }
2016 return NULL;
2017 }
2018
record__pick_pc(struct record * rec)2019 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
2020 {
2021 const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist);
2022 if (pc)
2023 return pc;
2024 return NULL;
2025 }
2026
record__synthesize(struct record * rec,bool tail)2027 static int record__synthesize(struct record *rec, bool tail)
2028 {
2029 struct perf_session *session = rec->session;
2030 struct machine *machine = &session->machines.host;
2031 struct perf_data *data = &rec->data;
2032 struct record_opts *opts = &rec->opts;
2033 struct perf_tool *tool = &rec->tool;
2034 int err = 0;
2035 event_op f = process_synthesized_event;
2036
2037 if (rec->opts.tail_synthesize != tail)
2038 return 0;
2039
2040 if (data->is_pipe) {
2041 err = perf_event__synthesize_for_pipe(tool, session, data,
2042 process_synthesized_event);
2043 if (err < 0)
2044 goto out;
2045
2046 rec->bytes_written += err;
2047 }
2048
2049 err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
2050 process_synthesized_event, machine);
2051 if (err)
2052 goto out;
2053
2054 /* Synthesize id_index before auxtrace_info */
2055 err = perf_event__synthesize_id_index(tool,
2056 process_synthesized_event,
2057 session->evlist, machine);
2058 if (err)
2059 goto out;
2060
2061 if (rec->opts.full_auxtrace) {
2062 err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
2063 session, process_synthesized_event);
2064 if (err)
2065 goto out;
2066 }
2067
2068 if (!evlist__exclude_kernel(rec->evlist)) {
2069 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
2070 machine);
2071 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
2072 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
2073 "Check /proc/kallsyms permission or run as root.\n");
2074
2075 err = perf_event__synthesize_modules(tool, process_synthesized_event,
2076 machine);
2077 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
2078 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
2079 "Check /proc/modules permission or run as root.\n");
2080 }
2081
2082 if (perf_guest) {
2083 machines__process_guests(&session->machines,
2084 perf_event__synthesize_guest_os, tool);
2085 }
2086
2087 err = perf_event__synthesize_extra_attr(&rec->tool,
2088 rec->evlist,
2089 process_synthesized_event,
2090 data->is_pipe);
2091 if (err)
2092 goto out;
2093
2094 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads,
2095 process_synthesized_event,
2096 NULL);
2097 if (err < 0) {
2098 pr_err("Couldn't synthesize thread map.\n");
2099 return err;
2100 }
2101
2102 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.all_cpus,
2103 process_synthesized_event, NULL);
2104 if (err < 0) {
2105 pr_err("Couldn't synthesize cpu map.\n");
2106 return err;
2107 }
2108
2109 err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
2110 machine, opts);
2111 if (err < 0) {
2112 pr_warning("Couldn't synthesize bpf events.\n");
2113 err = 0;
2114 }
2115
2116 if (rec->opts.synth & PERF_SYNTH_CGROUP) {
2117 err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
2118 machine);
2119 if (err < 0) {
2120 pr_warning("Couldn't synthesize cgroup events.\n");
2121 err = 0;
2122 }
2123 }
2124
2125 if (rec->opts.nr_threads_synthesize > 1) {
2126 mutex_init(&synth_lock);
2127 perf_set_multithreaded();
2128 f = process_locked_synthesized_event;
2129 }
2130
2131 if (rec->opts.synth & PERF_SYNTH_TASK) {
2132 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
2133
2134 err = __machine__synthesize_threads(machine, tool, &opts->target,
2135 rec->evlist->core.threads,
2136 f, needs_mmap, opts->sample_address,
2137 rec->opts.nr_threads_synthesize);
2138 }
2139
2140 if (rec->opts.nr_threads_synthesize > 1) {
2141 perf_set_singlethreaded();
2142 mutex_destroy(&synth_lock);
2143 }
2144
2145 out:
2146 return err;
2147 }
2148
record__process_signal_event(union perf_event * event __maybe_unused,void * data)2149 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data)
2150 {
2151 struct record *rec = data;
2152 pthread_kill(rec->thread_id, SIGUSR2);
2153 return 0;
2154 }
2155
record__setup_sb_evlist(struct record * rec)2156 static int record__setup_sb_evlist(struct record *rec)
2157 {
2158 struct record_opts *opts = &rec->opts;
2159
2160 if (rec->sb_evlist != NULL) {
2161 /*
2162 * We get here if --switch-output-event populated the
2163 * sb_evlist, so associate a callback that will send a SIGUSR2
2164 * to the main thread.
2165 */
2166 evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec);
2167 rec->thread_id = pthread_self();
2168 }
2169 #ifdef HAVE_LIBBPF_SUPPORT
2170 if (!opts->no_bpf_event) {
2171 if (rec->sb_evlist == NULL) {
2172 rec->sb_evlist = evlist__new();
2173
2174 if (rec->sb_evlist == NULL) {
2175 pr_err("Couldn't create side band evlist.\n.");
2176 return -1;
2177 }
2178 }
2179
2180 if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) {
2181 pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n.");
2182 return -1;
2183 }
2184 }
2185 #endif
2186 if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
2187 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
2188 opts->no_bpf_event = true;
2189 }
2190
2191 return 0;
2192 }
2193
record__init_clock(struct record * rec)2194 static int record__init_clock(struct record *rec)
2195 {
2196 struct perf_session *session = rec->session;
2197 struct timespec ref_clockid;
2198 struct timeval ref_tod;
2199 u64 ref;
2200
2201 if (!rec->opts.use_clockid)
2202 return 0;
2203
2204 if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
2205 session->header.env.clock.clockid_res_ns = rec->opts.clockid_res_ns;
2206
2207 session->header.env.clock.clockid = rec->opts.clockid;
2208
2209 if (gettimeofday(&ref_tod, NULL) != 0) {
2210 pr_err("gettimeofday failed, cannot set reference time.\n");
2211 return -1;
2212 }
2213
2214 if (clock_gettime(rec->opts.clockid, &ref_clockid)) {
2215 pr_err("clock_gettime failed, cannot set reference time.\n");
2216 return -1;
2217 }
2218
2219 ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC +
2220 (u64) ref_tod.tv_usec * NSEC_PER_USEC;
2221
2222 session->header.env.clock.tod_ns = ref;
2223
2224 ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC +
2225 (u64) ref_clockid.tv_nsec;
2226
2227 session->header.env.clock.clockid_ns = ref;
2228 return 0;
2229 }
2230
hit_auxtrace_snapshot_trigger(struct record * rec)2231 static void hit_auxtrace_snapshot_trigger(struct record *rec)
2232 {
2233 if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2234 trigger_hit(&auxtrace_snapshot_trigger);
2235 auxtrace_record__snapshot_started = 1;
2236 if (auxtrace_record__snapshot_start(rec->itr))
2237 trigger_error(&auxtrace_snapshot_trigger);
2238 }
2239 }
2240
record__terminate_thread(struct record_thread * thread_data)2241 static int record__terminate_thread(struct record_thread *thread_data)
2242 {
2243 int err;
2244 enum thread_msg ack = THREAD_MSG__UNDEFINED;
2245 pid_t tid = thread_data->tid;
2246
2247 close(thread_data->pipes.msg[1]);
2248 thread_data->pipes.msg[1] = -1;
2249 err = read(thread_data->pipes.ack[0], &ack, sizeof(ack));
2250 if (err > 0)
2251 pr_debug2("threads[%d]: sent %s\n", tid, thread_msg_tags[ack]);
2252 else
2253 pr_warning("threads[%d]: failed to receive termination notification from %d\n",
2254 thread->tid, tid);
2255
2256 return 0;
2257 }
2258
record__start_threads(struct record * rec)2259 static int record__start_threads(struct record *rec)
2260 {
2261 int t, tt, err, ret = 0, nr_threads = rec->nr_threads;
2262 struct record_thread *thread_data = rec->thread_data;
2263 sigset_t full, mask;
2264 pthread_t handle;
2265 pthread_attr_t attrs;
2266
2267 thread = &thread_data[0];
2268
2269 if (!record__threads_enabled(rec))
2270 return 0;
2271
2272 sigfillset(&full);
2273 if (sigprocmask(SIG_SETMASK, &full, &mask)) {
2274 pr_err("Failed to block signals on threads start: %s\n", strerror(errno));
2275 return -1;
2276 }
2277
2278 pthread_attr_init(&attrs);
2279 pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
2280
2281 for (t = 1; t < nr_threads; t++) {
2282 enum thread_msg msg = THREAD_MSG__UNDEFINED;
2283
2284 #ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP
2285 pthread_attr_setaffinity_np(&attrs,
2286 MMAP_CPU_MASK_BYTES(&(thread_data[t].mask->affinity)),
2287 (cpu_set_t *)(thread_data[t].mask->affinity.bits));
2288 #endif
2289 if (pthread_create(&handle, &attrs, record__thread, &thread_data[t])) {
2290 for (tt = 1; tt < t; tt++)
2291 record__terminate_thread(&thread_data[t]);
2292 pr_err("Failed to start threads: %s\n", strerror(errno));
2293 ret = -1;
2294 goto out_err;
2295 }
2296
2297 err = read(thread_data[t].pipes.ack[0], &msg, sizeof(msg));
2298 if (err > 0)
2299 pr_debug2("threads[%d]: sent %s\n", rec->thread_data[t].tid,
2300 thread_msg_tags[msg]);
2301 else
2302 pr_warning("threads[%d]: failed to receive start notification from %d\n",
2303 thread->tid, rec->thread_data[t].tid);
2304 }
2305
2306 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
2307 (cpu_set_t *)thread->mask->affinity.bits);
2308
2309 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
2310
2311 out_err:
2312 pthread_attr_destroy(&attrs);
2313
2314 if (sigprocmask(SIG_SETMASK, &mask, NULL)) {
2315 pr_err("Failed to unblock signals on threads start: %s\n", strerror(errno));
2316 ret = -1;
2317 }
2318
2319 return ret;
2320 }
2321
record__stop_threads(struct record * rec)2322 static int record__stop_threads(struct record *rec)
2323 {
2324 int t;
2325 struct record_thread *thread_data = rec->thread_data;
2326
2327 for (t = 1; t < rec->nr_threads; t++)
2328 record__terminate_thread(&thread_data[t]);
2329
2330 for (t = 0; t < rec->nr_threads; t++) {
2331 rec->samples += thread_data[t].samples;
2332 if (!record__threads_enabled(rec))
2333 continue;
2334 rec->session->bytes_transferred += thread_data[t].bytes_transferred;
2335 rec->session->bytes_compressed += thread_data[t].bytes_compressed;
2336 pr_debug("threads[%d]: samples=%lld, wakes=%ld, ", thread_data[t].tid,
2337 thread_data[t].samples, thread_data[t].waking);
2338 if (thread_data[t].bytes_transferred && thread_data[t].bytes_compressed)
2339 pr_debug("transferred=%" PRIu64 ", compressed=%" PRIu64 "\n",
2340 thread_data[t].bytes_transferred, thread_data[t].bytes_compressed);
2341 else
2342 pr_debug("written=%" PRIu64 "\n", thread_data[t].bytes_written);
2343 }
2344
2345 return 0;
2346 }
2347
record__waking(struct record * rec)2348 static unsigned long record__waking(struct record *rec)
2349 {
2350 int t;
2351 unsigned long waking = 0;
2352 struct record_thread *thread_data = rec->thread_data;
2353
2354 for (t = 0; t < rec->nr_threads; t++)
2355 waking += thread_data[t].waking;
2356
2357 return waking;
2358 }
2359
__cmd_record(struct record * rec,int argc,const char ** argv)2360 static int __cmd_record(struct record *rec, int argc, const char **argv)
2361 {
2362 int err;
2363 int status = 0;
2364 const bool forks = argc > 0;
2365 struct perf_tool *tool = &rec->tool;
2366 struct record_opts *opts = &rec->opts;
2367 struct perf_data *data = &rec->data;
2368 struct perf_session *session;
2369 bool disabled = false, draining = false;
2370 int fd;
2371 float ratio = 0;
2372 enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED;
2373
2374 atexit(record__sig_exit);
2375 signal(SIGCHLD, sig_handler);
2376 signal(SIGINT, sig_handler);
2377 signal(SIGTERM, sig_handler);
2378 signal(SIGSEGV, sigsegv_handler);
2379
2380 if (rec->opts.record_cgroup) {
2381 #ifndef HAVE_FILE_HANDLE
2382 pr_err("cgroup tracking is not supported\n");
2383 return -1;
2384 #endif
2385 }
2386
2387 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
2388 signal(SIGUSR2, snapshot_sig_handler);
2389 if (rec->opts.auxtrace_snapshot_mode)
2390 trigger_on(&auxtrace_snapshot_trigger);
2391 if (rec->switch_output.enabled)
2392 trigger_on(&switch_output_trigger);
2393 } else {
2394 signal(SIGUSR2, SIG_IGN);
2395 }
2396
2397 perf_tool__init(tool, /*ordered_events=*/true);
2398 tool->sample = process_sample_event;
2399 tool->fork = perf_event__process_fork;
2400 tool->exit = perf_event__process_exit;
2401 tool->comm = perf_event__process_comm;
2402 tool->namespaces = perf_event__process_namespaces;
2403 tool->mmap = build_id__process_mmap;
2404 tool->mmap2 = build_id__process_mmap2;
2405 tool->itrace_start = process_timestamp_boundary;
2406 tool->aux = process_timestamp_boundary;
2407 tool->namespace_events = rec->opts.record_namespaces;
2408 tool->cgroup_events = rec->opts.record_cgroup;
2409 session = perf_session__new(data, tool);
2410 if (IS_ERR(session)) {
2411 pr_err("Perf session creation failed.\n");
2412 return PTR_ERR(session);
2413 }
2414
2415 if (record__threads_enabled(rec)) {
2416 if (perf_data__is_pipe(&rec->data)) {
2417 pr_err("Parallel trace streaming is not available in pipe mode.\n");
2418 return -1;
2419 }
2420 if (rec->opts.full_auxtrace) {
2421 pr_err("Parallel trace streaming is not available in AUX area tracing mode.\n");
2422 return -1;
2423 }
2424 }
2425
2426 fd = perf_data__fd(data);
2427 rec->session = session;
2428
2429 if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) {
2430 pr_err("Compression initialization failed.\n");
2431 return -1;
2432 }
2433 #ifdef HAVE_EVENTFD_SUPPORT
2434 done_fd = eventfd(0, EFD_NONBLOCK);
2435 if (done_fd < 0) {
2436 pr_err("Failed to create wakeup eventfd, error: %m\n");
2437 status = -1;
2438 goto out_delete_session;
2439 }
2440 err = evlist__add_wakeup_eventfd(rec->evlist, done_fd);
2441 if (err < 0) {
2442 pr_err("Failed to add wakeup eventfd to poll list\n");
2443 status = err;
2444 goto out_delete_session;
2445 }
2446 #endif // HAVE_EVENTFD_SUPPORT
2447
2448 session->header.env.comp_type = PERF_COMP_ZSTD;
2449 session->header.env.comp_level = rec->opts.comp_level;
2450
2451 if (rec->opts.kcore &&
2452 !record__kcore_readable(&session->machines.host)) {
2453 pr_err("ERROR: kcore is not readable.\n");
2454 return -1;
2455 }
2456
2457 if (record__init_clock(rec))
2458 return -1;
2459
2460 record__init_features(rec);
2461
2462 if (forks) {
2463 err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe,
2464 workload_exec_failed_signal);
2465 if (err < 0) {
2466 pr_err("Couldn't run the workload!\n");
2467 status = err;
2468 goto out_delete_session;
2469 }
2470 }
2471
2472 /*
2473 * If we have just single event and are sending data
2474 * through pipe, we need to force the ids allocation,
2475 * because we synthesize event name through the pipe
2476 * and need the id for that.
2477 */
2478 if (data->is_pipe && rec->evlist->core.nr_entries == 1)
2479 rec->opts.sample_id = true;
2480
2481 if (rec->timestamp_filename && perf_data__is_pipe(data)) {
2482 rec->timestamp_filename = false;
2483 pr_warning("WARNING: --timestamp-filename option is not available in pipe mode.\n");
2484 }
2485
2486 evlist__uniquify_name(rec->evlist);
2487
2488 evlist__config(rec->evlist, opts, &callchain_param);
2489
2490 /* Debug message used by test scripts */
2491 pr_debug3("perf record opening and mmapping events\n");
2492 if (record__open(rec) != 0) {
2493 err = -1;
2494 goto out_free_threads;
2495 }
2496 /* Debug message used by test scripts */
2497 pr_debug3("perf record done opening and mmapping events\n");
2498 session->header.env.comp_mmap_len = session->evlist->core.mmap_len;
2499
2500 if (rec->opts.kcore) {
2501 err = record__kcore_copy(&session->machines.host, data);
2502 if (err) {
2503 pr_err("ERROR: Failed to copy kcore\n");
2504 goto out_free_threads;
2505 }
2506 }
2507
2508 /*
2509 * Normally perf_session__new would do this, but it doesn't have the
2510 * evlist.
2511 */
2512 if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) {
2513 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
2514 rec->tool.ordered_events = false;
2515 }
2516
2517 if (evlist__nr_groups(rec->evlist) == 0)
2518 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
2519
2520 if (data->is_pipe) {
2521 err = perf_header__write_pipe(fd);
2522 if (err < 0)
2523 goto out_free_threads;
2524 } else {
2525 err = perf_session__write_header(session, rec->evlist, fd, false);
2526 if (err < 0)
2527 goto out_free_threads;
2528 }
2529
2530 err = -1;
2531 if (!rec->no_buildid
2532 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
2533 pr_err("Couldn't generate buildids. "
2534 "Use --no-buildid to profile anyway.\n");
2535 goto out_free_threads;
2536 }
2537
2538 if (!evlist__needs_bpf_sb_event(rec->evlist))
2539 opts->no_bpf_event = true;
2540
2541 err = record__setup_sb_evlist(rec);
2542 if (err)
2543 goto out_free_threads;
2544
2545 err = record__synthesize(rec, false);
2546 if (err < 0)
2547 goto out_free_threads;
2548
2549 if (rec->realtime_prio) {
2550 struct sched_param param;
2551
2552 param.sched_priority = rec->realtime_prio;
2553 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) {
2554 pr_err("Could not set realtime priority.\n");
2555 err = -1;
2556 goto out_free_threads;
2557 }
2558 }
2559
2560 if (record__start_threads(rec))
2561 goto out_free_threads;
2562
2563 /*
2564 * When perf is starting the traced process, all the events
2565 * (apart from group members) have enable_on_exec=1 set,
2566 * so don't spoil it by prematurely enabling them.
2567 */
2568 if (!target__none(&opts->target) && !opts->target.initial_delay)
2569 evlist__enable(rec->evlist);
2570
2571 /*
2572 * Let the child rip
2573 */
2574 if (forks) {
2575 struct machine *machine = &session->machines.host;
2576 union perf_event *event;
2577 pid_t tgid;
2578
2579 event = malloc(sizeof(event->comm) + machine->id_hdr_size);
2580 if (event == NULL) {
2581 err = -ENOMEM;
2582 goto out_child;
2583 }
2584
2585 /*
2586 * Some H/W events are generated before COMM event
2587 * which is emitted during exec(), so perf script
2588 * cannot see a correct process name for those events.
2589 * Synthesize COMM event to prevent it.
2590 */
2591 tgid = perf_event__synthesize_comm(tool, event,
2592 rec->evlist->workload.pid,
2593 process_synthesized_event,
2594 machine);
2595 free(event);
2596
2597 if (tgid == -1)
2598 goto out_child;
2599
2600 event = malloc(sizeof(event->namespaces) +
2601 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
2602 machine->id_hdr_size);
2603 if (event == NULL) {
2604 err = -ENOMEM;
2605 goto out_child;
2606 }
2607
2608 /*
2609 * Synthesize NAMESPACES event for the command specified.
2610 */
2611 perf_event__synthesize_namespaces(tool, event,
2612 rec->evlist->workload.pid,
2613 tgid, process_synthesized_event,
2614 machine);
2615 free(event);
2616
2617 evlist__start_workload(rec->evlist);
2618 }
2619
2620 if (opts->target.initial_delay) {
2621 pr_info(EVLIST_DISABLED_MSG);
2622 if (opts->target.initial_delay > 0) {
2623 usleep(opts->target.initial_delay * USEC_PER_MSEC);
2624 evlist__enable(rec->evlist);
2625 pr_info(EVLIST_ENABLED_MSG);
2626 }
2627 }
2628
2629 err = event_enable_timer__start(rec->evlist->eet);
2630 if (err)
2631 goto out_child;
2632
2633 /* Debug message used by test scripts */
2634 pr_debug3("perf record has started\n");
2635 fflush(stderr);
2636
2637 trigger_ready(&auxtrace_snapshot_trigger);
2638 trigger_ready(&switch_output_trigger);
2639 perf_hooks__invoke_record_start();
2640
2641 /*
2642 * Must write FINISHED_INIT so it will be seen after all other
2643 * synthesized user events, but before any regular events.
2644 */
2645 err = write_finished_init(rec, false);
2646 if (err < 0)
2647 goto out_child;
2648
2649 for (;;) {
2650 unsigned long long hits = thread->samples;
2651
2652 /*
2653 * rec->evlist->bkw_mmap_state is possible to be
2654 * BKW_MMAP_EMPTY here: when done == true and
2655 * hits != rec->samples in previous round.
2656 *
2657 * evlist__toggle_bkw_mmap ensure we never
2658 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
2659 */
2660 if (trigger_is_hit(&switch_output_trigger) || done || draining)
2661 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
2662
2663 if (record__mmap_read_all(rec, false) < 0) {
2664 trigger_error(&auxtrace_snapshot_trigger);
2665 trigger_error(&switch_output_trigger);
2666 err = -1;
2667 goto out_child;
2668 }
2669
2670 if (auxtrace_record__snapshot_started) {
2671 auxtrace_record__snapshot_started = 0;
2672 if (!trigger_is_error(&auxtrace_snapshot_trigger))
2673 record__read_auxtrace_snapshot(rec, false);
2674 if (trigger_is_error(&auxtrace_snapshot_trigger)) {
2675 pr_err("AUX area tracing snapshot failed\n");
2676 err = -1;
2677 goto out_child;
2678 }
2679 }
2680
2681 if (trigger_is_hit(&switch_output_trigger)) {
2682 /*
2683 * If switch_output_trigger is hit, the data in
2684 * overwritable ring buffer should have been collected,
2685 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
2686 *
2687 * If SIGUSR2 raise after or during record__mmap_read_all(),
2688 * record__mmap_read_all() didn't collect data from
2689 * overwritable ring buffer. Read again.
2690 */
2691 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
2692 continue;
2693 trigger_ready(&switch_output_trigger);
2694
2695 /*
2696 * Reenable events in overwrite ring buffer after
2697 * record__mmap_read_all(): we should have collected
2698 * data from it.
2699 */
2700 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
2701
2702 if (!quiet)
2703 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
2704 record__waking(rec));
2705 thread->waking = 0;
2706 fd = record__switch_output(rec, false);
2707 if (fd < 0) {
2708 pr_err("Failed to switch to new file\n");
2709 trigger_error(&switch_output_trigger);
2710 err = fd;
2711 goto out_child;
2712 }
2713
2714 /* re-arm the alarm */
2715 if (rec->switch_output.time)
2716 alarm(rec->switch_output.time);
2717 }
2718
2719 if (hits == thread->samples) {
2720 if (done || draining)
2721 break;
2722 err = fdarray__poll(&thread->pollfd, -1);
2723 /*
2724 * Propagate error, only if there's any. Ignore positive
2725 * number of returned events and interrupt error.
2726 */
2727 if (err > 0 || (err < 0 && errno == EINTR))
2728 err = 0;
2729 thread->waking++;
2730
2731 if (fdarray__filter(&thread->pollfd, POLLERR | POLLHUP,
2732 record__thread_munmap_filtered, NULL) == 0)
2733 draining = true;
2734
2735 err = record__update_evlist_pollfd_from_thread(rec, rec->evlist, thread);
2736 if (err)
2737 goto out_child;
2738 }
2739
2740 if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) {
2741 switch (cmd) {
2742 case EVLIST_CTL_CMD_SNAPSHOT:
2743 hit_auxtrace_snapshot_trigger(rec);
2744 evlist__ctlfd_ack(rec->evlist);
2745 break;
2746 case EVLIST_CTL_CMD_STOP:
2747 done = 1;
2748 break;
2749 case EVLIST_CTL_CMD_ACK:
2750 case EVLIST_CTL_CMD_UNSUPPORTED:
2751 case EVLIST_CTL_CMD_ENABLE:
2752 case EVLIST_CTL_CMD_DISABLE:
2753 case EVLIST_CTL_CMD_EVLIST:
2754 case EVLIST_CTL_CMD_PING:
2755 default:
2756 break;
2757 }
2758 }
2759
2760 err = event_enable_timer__process(rec->evlist->eet);
2761 if (err < 0)
2762 goto out_child;
2763 if (err) {
2764 err = 0;
2765 done = 1;
2766 }
2767
2768 /*
2769 * When perf is starting the traced process, at the end events
2770 * die with the process and we wait for that. Thus no need to
2771 * disable events in this case.
2772 */
2773 if (done && !disabled && !target__none(&opts->target)) {
2774 trigger_off(&auxtrace_snapshot_trigger);
2775 evlist__disable(rec->evlist);
2776 disabled = true;
2777 }
2778 }
2779
2780 trigger_off(&auxtrace_snapshot_trigger);
2781 trigger_off(&switch_output_trigger);
2782
2783 if (opts->auxtrace_snapshot_on_exit)
2784 record__auxtrace_snapshot_exit(rec);
2785
2786 if (forks && workload_exec_errno) {
2787 char msg[STRERR_BUFSIZE], strevsels[2048];
2788 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
2789
2790 evlist__scnprintf_evsels(rec->evlist, sizeof(strevsels), strevsels);
2791
2792 pr_err("Failed to collect '%s' for the '%s' workload: %s\n",
2793 strevsels, argv[0], emsg);
2794 err = -1;
2795 goto out_child;
2796 }
2797
2798 if (!quiet)
2799 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n",
2800 record__waking(rec));
2801
2802 write_finished_init(rec, true);
2803
2804 if (target__none(&rec->opts.target))
2805 record__synthesize_workload(rec, true);
2806
2807 out_child:
2808 record__stop_threads(rec);
2809 record__mmap_read_all(rec, true);
2810 out_free_threads:
2811 record__free_thread_data(rec);
2812 evlist__finalize_ctlfd(rec->evlist);
2813 record__aio_mmap_read_sync(rec);
2814
2815 if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
2816 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
2817 session->header.env.comp_ratio = ratio + 0.5;
2818 }
2819
2820 if (forks) {
2821 int exit_status;
2822
2823 if (!child_finished)
2824 kill(rec->evlist->workload.pid, SIGTERM);
2825
2826 wait(&exit_status);
2827
2828 if (err < 0)
2829 status = err;
2830 else if (WIFEXITED(exit_status))
2831 status = WEXITSTATUS(exit_status);
2832 else if (WIFSIGNALED(exit_status))
2833 signr = WTERMSIG(exit_status);
2834 } else
2835 status = err;
2836
2837 if (rec->off_cpu)
2838 rec->bytes_written += off_cpu_write(rec->session);
2839
2840 record__read_lost_samples(rec);
2841 record__synthesize(rec, true);
2842 /* this will be recalculated during process_buildids() */
2843 rec->samples = 0;
2844
2845 if (!err) {
2846 if (!rec->timestamp_filename) {
2847 record__finish_output(rec);
2848 } else {
2849 fd = record__switch_output(rec, true);
2850 if (fd < 0) {
2851 status = fd;
2852 goto out_delete_session;
2853 }
2854 }
2855 }
2856
2857 perf_hooks__invoke_record_end();
2858
2859 if (!err && !quiet) {
2860 char samples[128];
2861 const char *postfix = rec->timestamp_filename ?
2862 ".<timestamp>" : "";
2863
2864 if (rec->samples && !rec->opts.full_auxtrace)
2865 scnprintf(samples, sizeof(samples),
2866 " (%" PRIu64 " samples)", rec->samples);
2867 else
2868 samples[0] = '\0';
2869
2870 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s",
2871 perf_data__size(data) / 1024.0 / 1024.0,
2872 data->path, postfix, samples);
2873 if (ratio) {
2874 fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)",
2875 rec->session->bytes_transferred / 1024.0 / 1024.0,
2876 ratio);
2877 }
2878 fprintf(stderr, " ]\n");
2879 }
2880
2881 out_delete_session:
2882 #ifdef HAVE_EVENTFD_SUPPORT
2883 if (done_fd >= 0) {
2884 fd = done_fd;
2885 done_fd = -1;
2886
2887 close(fd);
2888 }
2889 #endif
2890 zstd_fini(&session->zstd_data);
2891 if (!opts->no_bpf_event)
2892 evlist__stop_sb_thread(rec->sb_evlist);
2893
2894 perf_session__delete(session);
2895 return status;
2896 }
2897
callchain_debug(struct callchain_param * callchain)2898 static void callchain_debug(struct callchain_param *callchain)
2899 {
2900 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
2901
2902 pr_debug("callchain: type %s\n", str[callchain->record_mode]);
2903
2904 if (callchain->record_mode == CALLCHAIN_DWARF)
2905 pr_debug("callchain: stack dump size %d\n",
2906 callchain->dump_size);
2907 }
2908
record_opts__parse_callchain(struct record_opts * record,struct callchain_param * callchain,const char * arg,bool unset)2909 int record_opts__parse_callchain(struct record_opts *record,
2910 struct callchain_param *callchain,
2911 const char *arg, bool unset)
2912 {
2913 int ret;
2914 callchain->enabled = !unset;
2915
2916 /* --no-call-graph */
2917 if (unset) {
2918 callchain->record_mode = CALLCHAIN_NONE;
2919 pr_debug("callchain: disabled\n");
2920 return 0;
2921 }
2922
2923 ret = parse_callchain_record_opt(arg, callchain);
2924 if (!ret) {
2925 /* Enable data address sampling for DWARF unwind. */
2926 if (callchain->record_mode == CALLCHAIN_DWARF)
2927 record->sample_address = true;
2928 callchain_debug(callchain);
2929 }
2930
2931 return ret;
2932 }
2933
record_parse_callchain_opt(const struct option * opt,const char * arg,int unset)2934 int record_parse_callchain_opt(const struct option *opt,
2935 const char *arg,
2936 int unset)
2937 {
2938 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
2939 }
2940
record_callchain_opt(const struct option * opt,const char * arg __maybe_unused,int unset __maybe_unused)2941 int record_callchain_opt(const struct option *opt,
2942 const char *arg __maybe_unused,
2943 int unset __maybe_unused)
2944 {
2945 struct callchain_param *callchain = opt->value;
2946
2947 callchain->enabled = true;
2948
2949 if (callchain->record_mode == CALLCHAIN_NONE)
2950 callchain->record_mode = CALLCHAIN_FP;
2951
2952 callchain_debug(callchain);
2953 return 0;
2954 }
2955
perf_record_config(const char * var,const char * value,void * cb)2956 static int perf_record_config(const char *var, const char *value, void *cb)
2957 {
2958 struct record *rec = cb;
2959
2960 if (!strcmp(var, "record.build-id")) {
2961 if (!strcmp(value, "cache"))
2962 rec->no_buildid_cache = false;
2963 else if (!strcmp(value, "no-cache"))
2964 rec->no_buildid_cache = true;
2965 else if (!strcmp(value, "skip"))
2966 rec->no_buildid = true;
2967 else if (!strcmp(value, "mmap"))
2968 rec->buildid_mmap = true;
2969 else
2970 return -1;
2971 return 0;
2972 }
2973 if (!strcmp(var, "record.call-graph")) {
2974 var = "call-graph.record-mode";
2975 return perf_default_config(var, value, cb);
2976 }
2977 #ifdef HAVE_AIO_SUPPORT
2978 if (!strcmp(var, "record.aio")) {
2979 rec->opts.nr_cblocks = strtol(value, NULL, 0);
2980 if (!rec->opts.nr_cblocks)
2981 rec->opts.nr_cblocks = nr_cblocks_default;
2982 }
2983 #endif
2984 if (!strcmp(var, "record.debuginfod")) {
2985 rec->debuginfod.urls = strdup(value);
2986 if (!rec->debuginfod.urls)
2987 return -ENOMEM;
2988 rec->debuginfod.set = true;
2989 }
2990
2991 return 0;
2992 }
2993
record__parse_event_enable_time(const struct option * opt,const char * str,int unset)2994 static int record__parse_event_enable_time(const struct option *opt, const char *str, int unset)
2995 {
2996 struct record *rec = (struct record *)opt->value;
2997
2998 return evlist__parse_event_enable_time(rec->evlist, &rec->opts, str, unset);
2999 }
3000
record__parse_affinity(const struct option * opt,const char * str,int unset)3001 static int record__parse_affinity(const struct option *opt, const char *str, int unset)
3002 {
3003 struct record_opts *opts = (struct record_opts *)opt->value;
3004
3005 if (unset || !str)
3006 return 0;
3007
3008 if (!strcasecmp(str, "node"))
3009 opts->affinity = PERF_AFFINITY_NODE;
3010 else if (!strcasecmp(str, "cpu"))
3011 opts->affinity = PERF_AFFINITY_CPU;
3012
3013 return 0;
3014 }
3015
record__mmap_cpu_mask_alloc(struct mmap_cpu_mask * mask,int nr_bits)3016 static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask *mask, int nr_bits)
3017 {
3018 mask->nbits = nr_bits;
3019 mask->bits = bitmap_zalloc(mask->nbits);
3020 if (!mask->bits)
3021 return -ENOMEM;
3022
3023 return 0;
3024 }
3025
record__mmap_cpu_mask_free(struct mmap_cpu_mask * mask)3026 static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask)
3027 {
3028 bitmap_free(mask->bits);
3029 mask->nbits = 0;
3030 }
3031
record__thread_mask_alloc(struct thread_mask * mask,int nr_bits)3032 static int record__thread_mask_alloc(struct thread_mask *mask, int nr_bits)
3033 {
3034 int ret;
3035
3036 ret = record__mmap_cpu_mask_alloc(&mask->maps, nr_bits);
3037 if (ret) {
3038 mask->affinity.bits = NULL;
3039 return ret;
3040 }
3041
3042 ret = record__mmap_cpu_mask_alloc(&mask->affinity, nr_bits);
3043 if (ret) {
3044 record__mmap_cpu_mask_free(&mask->maps);
3045 mask->maps.bits = NULL;
3046 }
3047
3048 return ret;
3049 }
3050
record__thread_mask_free(struct thread_mask * mask)3051 static void record__thread_mask_free(struct thread_mask *mask)
3052 {
3053 record__mmap_cpu_mask_free(&mask->maps);
3054 record__mmap_cpu_mask_free(&mask->affinity);
3055 }
3056
record__parse_threads(const struct option * opt,const char * str,int unset)3057 static int record__parse_threads(const struct option *opt, const char *str, int unset)
3058 {
3059 int s;
3060 struct record_opts *opts = opt->value;
3061
3062 if (unset || !str || !strlen(str)) {
3063 opts->threads_spec = THREAD_SPEC__CPU;
3064 } else {
3065 for (s = 1; s < THREAD_SPEC__MAX; s++) {
3066 if (s == THREAD_SPEC__USER) {
3067 opts->threads_user_spec = strdup(str);
3068 if (!opts->threads_user_spec)
3069 return -ENOMEM;
3070 opts->threads_spec = THREAD_SPEC__USER;
3071 break;
3072 }
3073 if (!strncasecmp(str, thread_spec_tags[s], strlen(thread_spec_tags[s]))) {
3074 opts->threads_spec = s;
3075 break;
3076 }
3077 }
3078 }
3079
3080 if (opts->threads_spec == THREAD_SPEC__USER)
3081 pr_debug("threads_spec: %s\n", opts->threads_user_spec);
3082 else
3083 pr_debug("threads_spec: %s\n", thread_spec_tags[opts->threads_spec]);
3084
3085 return 0;
3086 }
3087
parse_output_max_size(const struct option * opt,const char * str,int unset)3088 static int parse_output_max_size(const struct option *opt,
3089 const char *str, int unset)
3090 {
3091 unsigned long *s = (unsigned long *)opt->value;
3092 static struct parse_tag tags_size[] = {
3093 { .tag = 'B', .mult = 1 },
3094 { .tag = 'K', .mult = 1 << 10 },
3095 { .tag = 'M', .mult = 1 << 20 },
3096 { .tag = 'G', .mult = 1 << 30 },
3097 { .tag = 0 },
3098 };
3099 unsigned long val;
3100
3101 if (unset) {
3102 *s = 0;
3103 return 0;
3104 }
3105
3106 val = parse_tag_value(str, tags_size);
3107 if (val != (unsigned long) -1) {
3108 *s = val;
3109 return 0;
3110 }
3111
3112 return -1;
3113 }
3114
record__parse_mmap_pages(const struct option * opt,const char * str,int unset __maybe_unused)3115 static int record__parse_mmap_pages(const struct option *opt,
3116 const char *str,
3117 int unset __maybe_unused)
3118 {
3119 struct record_opts *opts = opt->value;
3120 char *s, *p;
3121 unsigned int mmap_pages;
3122 int ret;
3123
3124 if (!str)
3125 return -EINVAL;
3126
3127 s = strdup(str);
3128 if (!s)
3129 return -ENOMEM;
3130
3131 p = strchr(s, ',');
3132 if (p)
3133 *p = '\0';
3134
3135 if (*s) {
3136 ret = __evlist__parse_mmap_pages(&mmap_pages, s);
3137 if (ret)
3138 goto out_free;
3139 opts->mmap_pages = mmap_pages;
3140 }
3141
3142 if (!p) {
3143 ret = 0;
3144 goto out_free;
3145 }
3146
3147 ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1);
3148 if (ret)
3149 goto out_free;
3150
3151 opts->auxtrace_mmap_pages = mmap_pages;
3152
3153 out_free:
3154 free(s);
3155 return ret;
3156 }
3157
arch__add_leaf_frame_record_opts(struct record_opts * opts __maybe_unused)3158 void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused)
3159 {
3160 }
3161
parse_control_option(const struct option * opt,const char * str,int unset __maybe_unused)3162 static int parse_control_option(const struct option *opt,
3163 const char *str,
3164 int unset __maybe_unused)
3165 {
3166 struct record_opts *opts = opt->value;
3167
3168 return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close);
3169 }
3170
switch_output_size_warn(struct record * rec)3171 static void switch_output_size_warn(struct record *rec)
3172 {
3173 u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages);
3174 struct switch_output *s = &rec->switch_output;
3175
3176 wakeup_size /= 2;
3177
3178 if (s->size < wakeup_size) {
3179 char buf[100];
3180
3181 unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
3182 pr_warning("WARNING: switch-output data size lower than "
3183 "wakeup kernel buffer size (%s) "
3184 "expect bigger perf.data sizes\n", buf);
3185 }
3186 }
3187
switch_output_setup(struct record * rec)3188 static int switch_output_setup(struct record *rec)
3189 {
3190 struct switch_output *s = &rec->switch_output;
3191 static struct parse_tag tags_size[] = {
3192 { .tag = 'B', .mult = 1 },
3193 { .tag = 'K', .mult = 1 << 10 },
3194 { .tag = 'M', .mult = 1 << 20 },
3195 { .tag = 'G', .mult = 1 << 30 },
3196 { .tag = 0 },
3197 };
3198 static struct parse_tag tags_time[] = {
3199 { .tag = 's', .mult = 1 },
3200 { .tag = 'm', .mult = 60 },
3201 { .tag = 'h', .mult = 60*60 },
3202 { .tag = 'd', .mult = 60*60*24 },
3203 { .tag = 0 },
3204 };
3205 unsigned long val;
3206
3207 /*
3208 * If we're using --switch-output-events, then we imply its
3209 * --switch-output=signal, as we'll send a SIGUSR2 from the side band
3210 * thread to its parent.
3211 */
3212 if (rec->switch_output_event_set) {
3213 if (record__threads_enabled(rec)) {
3214 pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n");
3215 return 0;
3216 }
3217 goto do_signal;
3218 }
3219
3220 if (!s->set)
3221 return 0;
3222
3223 if (record__threads_enabled(rec)) {
3224 pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n");
3225 return 0;
3226 }
3227
3228 if (!strcmp(s->str, "signal")) {
3229 do_signal:
3230 s->signal = true;
3231 pr_debug("switch-output with SIGUSR2 signal\n");
3232 goto enabled;
3233 }
3234
3235 val = parse_tag_value(s->str, tags_size);
3236 if (val != (unsigned long) -1) {
3237 s->size = val;
3238 pr_debug("switch-output with %s size threshold\n", s->str);
3239 goto enabled;
3240 }
3241
3242 val = parse_tag_value(s->str, tags_time);
3243 if (val != (unsigned long) -1) {
3244 s->time = val;
3245 pr_debug("switch-output with %s time threshold (%lu seconds)\n",
3246 s->str, s->time);
3247 goto enabled;
3248 }
3249
3250 return -1;
3251
3252 enabled:
3253 rec->timestamp_filename = true;
3254 s->enabled = true;
3255
3256 if (s->size && !rec->opts.no_buffering)
3257 switch_output_size_warn(rec);
3258
3259 return 0;
3260 }
3261
3262 static const char * const __record_usage[] = {
3263 "perf record [<options>] [<command>]",
3264 "perf record [<options>] -- <command> [<options>]",
3265 NULL
3266 };
3267 const char * const *record_usage = __record_usage;
3268
build_id__process_mmap(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct machine * machine)3269 static int build_id__process_mmap(const struct perf_tool *tool, union perf_event *event,
3270 struct perf_sample *sample, struct machine *machine)
3271 {
3272 /*
3273 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3274 * no need to add them twice.
3275 */
3276 if (!(event->header.misc & PERF_RECORD_MISC_USER))
3277 return 0;
3278 return perf_event__process_mmap(tool, event, sample, machine);
3279 }
3280
build_id__process_mmap2(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct machine * machine)3281 static int build_id__process_mmap2(const struct perf_tool *tool, union perf_event *event,
3282 struct perf_sample *sample, struct machine *machine)
3283 {
3284 /*
3285 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3286 * no need to add them twice.
3287 */
3288 if (!(event->header.misc & PERF_RECORD_MISC_USER))
3289 return 0;
3290
3291 return perf_event__process_mmap2(tool, event, sample, machine);
3292 }
3293
process_timestamp_boundary(const struct perf_tool * tool,union perf_event * event __maybe_unused,struct perf_sample * sample,struct machine * machine __maybe_unused)3294 static int process_timestamp_boundary(const struct perf_tool *tool,
3295 union perf_event *event __maybe_unused,
3296 struct perf_sample *sample,
3297 struct machine *machine __maybe_unused)
3298 {
3299 struct record *rec = container_of(tool, struct record, tool);
3300
3301 set_timestamp_boundary(rec, sample->time);
3302 return 0;
3303 }
3304
parse_record_synth_option(const struct option * opt,const char * str,int unset __maybe_unused)3305 static int parse_record_synth_option(const struct option *opt,
3306 const char *str,
3307 int unset __maybe_unused)
3308 {
3309 struct record_opts *opts = opt->value;
3310 char *p = strdup(str);
3311
3312 if (p == NULL)
3313 return -1;
3314
3315 opts->synth = parse_synth_opt(p);
3316 free(p);
3317
3318 if (opts->synth < 0) {
3319 pr_err("Invalid synth option: %s\n", str);
3320 return -1;
3321 }
3322 return 0;
3323 }
3324
3325 /*
3326 * XXX Ideally would be local to cmd_record() and passed to a record__new
3327 * because we need to have access to it in record__exit, that is called
3328 * after cmd_record() exits, but since record_options need to be accessible to
3329 * builtin-script, leave it here.
3330 *
3331 * At least we don't ouch it in all the other functions here directly.
3332 *
3333 * Just say no to tons of global variables, sigh.
3334 */
3335 static struct record record = {
3336 .opts = {
3337 .sample_time = true,
3338 .mmap_pages = UINT_MAX,
3339 .user_freq = UINT_MAX,
3340 .user_interval = ULLONG_MAX,
3341 .freq = 4000,
3342 .target = {
3343 .uses_mmap = true,
3344 .default_per_cpu = true,
3345 },
3346 .mmap_flush = MMAP_FLUSH_DEFAULT,
3347 .nr_threads_synthesize = 1,
3348 .ctl_fd = -1,
3349 .ctl_fd_ack = -1,
3350 .synth = PERF_SYNTH_ALL,
3351 },
3352 };
3353
3354 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
3355 "\n\t\t\t\tDefault: fp";
3356
3357 static bool dry_run;
3358
3359 static struct parse_events_option_args parse_events_option_args = {
3360 .evlistp = &record.evlist,
3361 };
3362
3363 static struct parse_events_option_args switch_output_parse_events_option_args = {
3364 .evlistp = &record.sb_evlist,
3365 };
3366
3367 /*
3368 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
3369 * with it and switch to use the library functions in perf_evlist that came
3370 * from builtin-record.c, i.e. use record_opts,
3371 * evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
3372 * using pipes, etc.
3373 */
3374 static struct option __record_options[] = {
3375 OPT_CALLBACK('e', "event", &parse_events_option_args, "event",
3376 "event selector. use 'perf list' to list available events",
3377 parse_events_option),
3378 OPT_CALLBACK(0, "filter", &record.evlist, "filter",
3379 "event filter", parse_filter),
3380 OPT_BOOLEAN(0, "latency", &record.latency,
3381 "Enable data collection for latency profiling.\n"
3382 "\t\t\t Use perf report --latency for latency-centric profile."),
3383 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
3384 NULL, "don't record events from perf itself",
3385 exclude_perf),
3386 OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
3387 "record events on existing process id"),
3388 OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
3389 "record events on existing thread id"),
3390 OPT_INTEGER('r', "realtime", &record.realtime_prio,
3391 "collect data with this RT SCHED_FIFO priority"),
3392 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
3393 "collect data without buffering"),
3394 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
3395 "collect raw sample records from all opened counters"),
3396 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
3397 "system-wide collection from all CPUs"),
3398 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
3399 "list of cpus to monitor"),
3400 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
3401 OPT_STRING('o', "output", &record.data.path, "file",
3402 "output file name"),
3403 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
3404 &record.opts.no_inherit_set,
3405 "child tasks do not inherit counters"),
3406 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
3407 "synthesize non-sample events at the end of output"),
3408 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
3409 OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"),
3410 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
3411 "Fail if the specified frequency can't be used"),
3412 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
3413 "profile at this frequency",
3414 record__parse_freq),
3415 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
3416 "number of mmap data pages and AUX area tracing mmap pages",
3417 record__parse_mmap_pages),
3418 OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
3419 "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
3420 record__mmap_flush_parse),
3421 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
3422 NULL, "enables call-graph recording" ,
3423 &record_callchain_opt),
3424 OPT_CALLBACK(0, "call-graph", &record.opts,
3425 "record_mode[,record_size]", record_callchain_help,
3426 &record_parse_callchain_opt),
3427 OPT_INCR('v', "verbose", &verbose,
3428 "be more verbose (show counter open errors, etc)"),
3429 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any warnings or messages"),
3430 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
3431 "per thread counts"),
3432 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
3433 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
3434 "Record the sample physical addresses"),
3435 OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size,
3436 "Record the sampled data address data page size"),
3437 OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size,
3438 "Record the sampled code address (ip) page size"),
3439 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
3440 OPT_BOOLEAN(0, "sample-identifier", &record.opts.sample_identifier,
3441 "Record the sample identifier"),
3442 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
3443 &record.opts.sample_time_set,
3444 "Record the sample timestamps"),
3445 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
3446 "Record the sample period"),
3447 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
3448 "don't sample"),
3449 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
3450 &record.no_buildid_cache_set,
3451 "do not update the buildid cache"),
3452 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
3453 &record.no_buildid_set,
3454 "do not collect buildids in perf.data"),
3455 OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
3456 "monitor event in cgroup name only",
3457 parse_cgroups),
3458 OPT_CALLBACK('D', "delay", &record, "ms",
3459 "ms to wait before starting measurement after program start (-1: start with events disabled), "
3460 "or ranges of time to enable events e.g. '-D 10-20,30-40'",
3461 record__parse_event_enable_time),
3462 OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"),
3463 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
3464 "user to profile"),
3465
3466 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
3467 "branch any", "sample any taken branches",
3468 parse_branch_stack),
3469
3470 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
3471 "branch filter mask", "branch stack filter modes",
3472 parse_branch_stack),
3473 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
3474 "sample by weight (on special events only)"),
3475 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
3476 "sample transaction flags (special events only)"),
3477 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
3478 "use per-thread mmaps"),
3479 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
3480 "sample selected machine registers on interrupt,"
3481 " use '-I?' to list register names", parse_intr_regs),
3482 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
3483 "sample selected machine registers on interrupt,"
3484 " use '--user-regs=?' to list register names", parse_user_regs),
3485 OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
3486 "Record running/enabled time of read (:S) events"),
3487 OPT_CALLBACK('k', "clockid", &record.opts,
3488 "clockid", "clockid to use for events, see clock_gettime()",
3489 parse_clockid),
3490 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
3491 "opts", "AUX area tracing Snapshot Mode", ""),
3492 OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts,
3493 "opts", "sample AUX area", ""),
3494 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
3495 "per thread proc mmap processing timeout in ms"),
3496 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
3497 "Record namespaces events"),
3498 OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup,
3499 "Record cgroup events"),
3500 OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events,
3501 &record.opts.record_switch_events_set,
3502 "Record context switch events"),
3503 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
3504 "Configure all used events to run in kernel space.",
3505 PARSE_OPT_EXCLUSIVE),
3506 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
3507 "Configure all used events to run in user space.",
3508 PARSE_OPT_EXCLUSIVE),
3509 OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains,
3510 "collect kernel callchains"),
3511 OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains,
3512 "collect user callchains"),
3513 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
3514 "file", "vmlinux pathname"),
3515 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
3516 "Record build-id of all DSOs regardless of hits"),
3517 OPT_BOOLEAN(0, "buildid-mmap", &record.buildid_mmap,
3518 "Record build-id in map events"),
3519 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
3520 "append timestamp to output filename"),
3521 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
3522 "Record timestamp boundary (time of first/last samples)"),
3523 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
3524 &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
3525 "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
3526 "signal"),
3527 OPT_CALLBACK_SET(0, "switch-output-event", &switch_output_parse_events_option_args,
3528 &record.switch_output_event_set, "switch output event",
3529 "switch output event selector. use 'perf list' to list available events",
3530 parse_events_option_new_evlist),
3531 OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
3532 "Limit number of switch output generated files"),
3533 OPT_BOOLEAN(0, "dry-run", &dry_run,
3534 "Parse options then exit"),
3535 #ifdef HAVE_AIO_SUPPORT
3536 OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
3537 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
3538 record__aio_parse),
3539 #endif
3540 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
3541 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
3542 record__parse_affinity),
3543 #ifdef HAVE_ZSTD_SUPPORT
3544 OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, "n",
3545 "Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
3546 record__parse_comp_level),
3547 #endif
3548 OPT_CALLBACK(0, "max-size", &record.output_max_size,
3549 "size", "Limit the maximum size of the output file", parse_output_max_size),
3550 OPT_UINTEGER(0, "num-thread-synthesize",
3551 &record.opts.nr_threads_synthesize,
3552 "number of threads to run for event synthesis"),
3553 #ifdef HAVE_LIBPFM
3554 OPT_CALLBACK(0, "pfm-events", &record.evlist, "event",
3555 "libpfm4 event selector. use 'perf list' to list available events",
3556 parse_libpfm_events_option),
3557 #endif
3558 OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]",
3559 "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n"
3560 "\t\t\t 'snapshot': AUX area tracing snapshot).\n"
3561 "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n"
3562 "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.",
3563 parse_control_option),
3564 OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup",
3565 "Fine-tune event synthesis: default=all", parse_record_synth_option),
3566 OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls,
3567 &record.debuginfod.set, "debuginfod urls",
3568 "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls",
3569 "system"),
3570 OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec",
3571 "write collected trace data into several data files using parallel threads",
3572 record__parse_threads),
3573 OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"),
3574 OPT_STRING(0, "setup-filter", &record.filter_action, "pin|unpin",
3575 "BPF filter action"),
3576 OPT_END()
3577 };
3578
3579 struct option *record_options = __record_options;
3580
record__mmap_cpu_mask_init(struct mmap_cpu_mask * mask,struct perf_cpu_map * cpus)3581 static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus)
3582 {
3583 struct perf_cpu cpu;
3584 int idx;
3585
3586 if (cpu_map__is_dummy(cpus))
3587 return 0;
3588
3589 perf_cpu_map__for_each_cpu_skip_any(cpu, idx, cpus) {
3590 /* Return ENODEV is input cpu is greater than max cpu */
3591 if ((unsigned long)cpu.cpu > mask->nbits)
3592 return -ENODEV;
3593 __set_bit(cpu.cpu, mask->bits);
3594 }
3595
3596 return 0;
3597 }
3598
record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask * mask,const char * mask_spec)3599 static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec)
3600 {
3601 struct perf_cpu_map *cpus;
3602
3603 cpus = perf_cpu_map__new(mask_spec);
3604 if (!cpus)
3605 return -ENOMEM;
3606
3607 bitmap_zero(mask->bits, mask->nbits);
3608 if (record__mmap_cpu_mask_init(mask, cpus))
3609 return -ENODEV;
3610
3611 perf_cpu_map__put(cpus);
3612
3613 return 0;
3614 }
3615
record__free_thread_masks(struct record * rec,int nr_threads)3616 static void record__free_thread_masks(struct record *rec, int nr_threads)
3617 {
3618 int t;
3619
3620 if (rec->thread_masks)
3621 for (t = 0; t < nr_threads; t++)
3622 record__thread_mask_free(&rec->thread_masks[t]);
3623
3624 zfree(&rec->thread_masks);
3625 }
3626
record__alloc_thread_masks(struct record * rec,int nr_threads,int nr_bits)3627 static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits)
3628 {
3629 int t, ret;
3630
3631 rec->thread_masks = zalloc(nr_threads * sizeof(*(rec->thread_masks)));
3632 if (!rec->thread_masks) {
3633 pr_err("Failed to allocate thread masks\n");
3634 return -ENOMEM;
3635 }
3636
3637 for (t = 0; t < nr_threads; t++) {
3638 ret = record__thread_mask_alloc(&rec->thread_masks[t], nr_bits);
3639 if (ret) {
3640 pr_err("Failed to allocate thread masks[%d]\n", t);
3641 goto out_free;
3642 }
3643 }
3644
3645 return 0;
3646
3647 out_free:
3648 record__free_thread_masks(rec, nr_threads);
3649
3650 return ret;
3651 }
3652
record__init_thread_cpu_masks(struct record * rec,struct perf_cpu_map * cpus)3653 static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map *cpus)
3654 {
3655 int t, ret, nr_cpus = perf_cpu_map__nr(cpus);
3656
3657 ret = record__alloc_thread_masks(rec, nr_cpus, cpu__max_cpu().cpu);
3658 if (ret)
3659 return ret;
3660
3661 rec->nr_threads = nr_cpus;
3662 pr_debug("nr_threads: %d\n", rec->nr_threads);
3663
3664 for (t = 0; t < rec->nr_threads; t++) {
3665 __set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits);
3666 __set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits);
3667 if (verbose > 0) {
3668 pr_debug("thread_masks[%d]: ", t);
3669 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3670 pr_debug("thread_masks[%d]: ", t);
3671 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3672 }
3673 }
3674
3675 return 0;
3676 }
3677
record__init_thread_masks_spec(struct record * rec,struct perf_cpu_map * cpus,const char ** maps_spec,const char ** affinity_spec,u32 nr_spec)3678 static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus,
3679 const char **maps_spec, const char **affinity_spec,
3680 u32 nr_spec)
3681 {
3682 u32 s;
3683 int ret = 0, t = 0;
3684 struct mmap_cpu_mask cpus_mask;
3685 struct thread_mask thread_mask, full_mask, *thread_masks;
3686
3687 ret = record__mmap_cpu_mask_alloc(&cpus_mask, cpu__max_cpu().cpu);
3688 if (ret) {
3689 pr_err("Failed to allocate CPUs mask\n");
3690 return ret;
3691 }
3692
3693 ret = record__mmap_cpu_mask_init(&cpus_mask, cpus);
3694 if (ret) {
3695 pr_err("Failed to init cpu mask\n");
3696 goto out_free_cpu_mask;
3697 }
3698
3699 ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu);
3700 if (ret) {
3701 pr_err("Failed to allocate full mask\n");
3702 goto out_free_cpu_mask;
3703 }
3704
3705 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3706 if (ret) {
3707 pr_err("Failed to allocate thread mask\n");
3708 goto out_free_full_and_cpu_masks;
3709 }
3710
3711 for (s = 0; s < nr_spec; s++) {
3712 ret = record__mmap_cpu_mask_init_spec(&thread_mask.maps, maps_spec[s]);
3713 if (ret) {
3714 pr_err("Failed to initialize maps thread mask\n");
3715 goto out_free;
3716 }
3717 ret = record__mmap_cpu_mask_init_spec(&thread_mask.affinity, affinity_spec[s]);
3718 if (ret) {
3719 pr_err("Failed to initialize affinity thread mask\n");
3720 goto out_free;
3721 }
3722
3723 /* ignore invalid CPUs but do not allow empty masks */
3724 if (!bitmap_and(thread_mask.maps.bits, thread_mask.maps.bits,
3725 cpus_mask.bits, thread_mask.maps.nbits)) {
3726 pr_err("Empty maps mask: %s\n", maps_spec[s]);
3727 ret = -EINVAL;
3728 goto out_free;
3729 }
3730 if (!bitmap_and(thread_mask.affinity.bits, thread_mask.affinity.bits,
3731 cpus_mask.bits, thread_mask.affinity.nbits)) {
3732 pr_err("Empty affinity mask: %s\n", affinity_spec[s]);
3733 ret = -EINVAL;
3734 goto out_free;
3735 }
3736
3737 /* do not allow intersection with other masks (full_mask) */
3738 if (bitmap_intersects(thread_mask.maps.bits, full_mask.maps.bits,
3739 thread_mask.maps.nbits)) {
3740 pr_err("Intersecting maps mask: %s\n", maps_spec[s]);
3741 ret = -EINVAL;
3742 goto out_free;
3743 }
3744 if (bitmap_intersects(thread_mask.affinity.bits, full_mask.affinity.bits,
3745 thread_mask.affinity.nbits)) {
3746 pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]);
3747 ret = -EINVAL;
3748 goto out_free;
3749 }
3750
3751 bitmap_or(full_mask.maps.bits, full_mask.maps.bits,
3752 thread_mask.maps.bits, full_mask.maps.nbits);
3753 bitmap_or(full_mask.affinity.bits, full_mask.affinity.bits,
3754 thread_mask.affinity.bits, full_mask.maps.nbits);
3755
3756 thread_masks = realloc(rec->thread_masks, (t + 1) * sizeof(struct thread_mask));
3757 if (!thread_masks) {
3758 pr_err("Failed to reallocate thread masks\n");
3759 ret = -ENOMEM;
3760 goto out_free;
3761 }
3762 rec->thread_masks = thread_masks;
3763 rec->thread_masks[t] = thread_mask;
3764 if (verbose > 0) {
3765 pr_debug("thread_masks[%d]: ", t);
3766 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3767 pr_debug("thread_masks[%d]: ", t);
3768 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3769 }
3770 t++;
3771 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3772 if (ret) {
3773 pr_err("Failed to allocate thread mask\n");
3774 goto out_free_full_and_cpu_masks;
3775 }
3776 }
3777 rec->nr_threads = t;
3778 pr_debug("nr_threads: %d\n", rec->nr_threads);
3779 if (!rec->nr_threads)
3780 ret = -EINVAL;
3781
3782 out_free:
3783 record__thread_mask_free(&thread_mask);
3784 out_free_full_and_cpu_masks:
3785 record__thread_mask_free(&full_mask);
3786 out_free_cpu_mask:
3787 record__mmap_cpu_mask_free(&cpus_mask);
3788
3789 return ret;
3790 }
3791
record__init_thread_core_masks(struct record * rec,struct perf_cpu_map * cpus)3792 static int record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus)
3793 {
3794 int ret;
3795 struct cpu_topology *topo;
3796
3797 topo = cpu_topology__new();
3798 if (!topo) {
3799 pr_err("Failed to allocate CPU topology\n");
3800 return -ENOMEM;
3801 }
3802
3803 ret = record__init_thread_masks_spec(rec, cpus, topo->core_cpus_list,
3804 topo->core_cpus_list, topo->core_cpus_lists);
3805 cpu_topology__delete(topo);
3806
3807 return ret;
3808 }
3809
record__init_thread_package_masks(struct record * rec,struct perf_cpu_map * cpus)3810 static int record__init_thread_package_masks(struct record *rec, struct perf_cpu_map *cpus)
3811 {
3812 int ret;
3813 struct cpu_topology *topo;
3814
3815 topo = cpu_topology__new();
3816 if (!topo) {
3817 pr_err("Failed to allocate CPU topology\n");
3818 return -ENOMEM;
3819 }
3820
3821 ret = record__init_thread_masks_spec(rec, cpus, topo->package_cpus_list,
3822 topo->package_cpus_list, topo->package_cpus_lists);
3823 cpu_topology__delete(topo);
3824
3825 return ret;
3826 }
3827
record__init_thread_numa_masks(struct record * rec,struct perf_cpu_map * cpus)3828 static int record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus)
3829 {
3830 u32 s;
3831 int ret;
3832 const char **spec;
3833 struct numa_topology *topo;
3834
3835 topo = numa_topology__new();
3836 if (!topo) {
3837 pr_err("Failed to allocate NUMA topology\n");
3838 return -ENOMEM;
3839 }
3840
3841 spec = zalloc(topo->nr * sizeof(char *));
3842 if (!spec) {
3843 pr_err("Failed to allocate NUMA spec\n");
3844 ret = -ENOMEM;
3845 goto out_delete_topo;
3846 }
3847 for (s = 0; s < topo->nr; s++)
3848 spec[s] = topo->nodes[s].cpus;
3849
3850 ret = record__init_thread_masks_spec(rec, cpus, spec, spec, topo->nr);
3851
3852 zfree(&spec);
3853
3854 out_delete_topo:
3855 numa_topology__delete(topo);
3856
3857 return ret;
3858 }
3859
record__init_thread_user_masks(struct record * rec,struct perf_cpu_map * cpus)3860 static int record__init_thread_user_masks(struct record *rec, struct perf_cpu_map *cpus)
3861 {
3862 int t, ret;
3863 u32 s, nr_spec = 0;
3864 char **maps_spec = NULL, **affinity_spec = NULL, **tmp_spec;
3865 char *user_spec, *spec, *spec_ptr, *mask, *mask_ptr, *dup_mask = NULL;
3866
3867 for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) {
3868 spec = strtok_r(user_spec, ":", &spec_ptr);
3869 if (spec == NULL)
3870 break;
3871 pr_debug2("threads_spec[%d]: %s\n", t, spec);
3872 mask = strtok_r(spec, "/", &mask_ptr);
3873 if (mask == NULL)
3874 break;
3875 pr_debug2(" maps mask: %s\n", mask);
3876 tmp_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *));
3877 if (!tmp_spec) {
3878 pr_err("Failed to reallocate maps spec\n");
3879 ret = -ENOMEM;
3880 goto out_free;
3881 }
3882 maps_spec = tmp_spec;
3883 maps_spec[nr_spec] = dup_mask = strdup(mask);
3884 if (!maps_spec[nr_spec]) {
3885 pr_err("Failed to allocate maps spec[%d]\n", nr_spec);
3886 ret = -ENOMEM;
3887 goto out_free;
3888 }
3889 mask = strtok_r(NULL, "/", &mask_ptr);
3890 if (mask == NULL) {
3891 pr_err("Invalid thread maps or affinity specs\n");
3892 ret = -EINVAL;
3893 goto out_free;
3894 }
3895 pr_debug2(" affinity mask: %s\n", mask);
3896 tmp_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *));
3897 if (!tmp_spec) {
3898 pr_err("Failed to reallocate affinity spec\n");
3899 ret = -ENOMEM;
3900 goto out_free;
3901 }
3902 affinity_spec = tmp_spec;
3903 affinity_spec[nr_spec] = strdup(mask);
3904 if (!affinity_spec[nr_spec]) {
3905 pr_err("Failed to allocate affinity spec[%d]\n", nr_spec);
3906 ret = -ENOMEM;
3907 goto out_free;
3908 }
3909 dup_mask = NULL;
3910 nr_spec++;
3911 }
3912
3913 ret = record__init_thread_masks_spec(rec, cpus, (const char **)maps_spec,
3914 (const char **)affinity_spec, nr_spec);
3915
3916 out_free:
3917 free(dup_mask);
3918 for (s = 0; s < nr_spec; s++) {
3919 if (maps_spec)
3920 free(maps_spec[s]);
3921 if (affinity_spec)
3922 free(affinity_spec[s]);
3923 }
3924 free(affinity_spec);
3925 free(maps_spec);
3926
3927 return ret;
3928 }
3929
record__init_thread_default_masks(struct record * rec,struct perf_cpu_map * cpus)3930 static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus)
3931 {
3932 int ret;
3933
3934 ret = record__alloc_thread_masks(rec, 1, cpu__max_cpu().cpu);
3935 if (ret)
3936 return ret;
3937
3938 if (record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus))
3939 return -ENODEV;
3940
3941 rec->nr_threads = 1;
3942
3943 return 0;
3944 }
3945
record__init_thread_masks(struct record * rec)3946 static int record__init_thread_masks(struct record *rec)
3947 {
3948 int ret = 0;
3949 struct perf_cpu_map *cpus = rec->evlist->core.all_cpus;
3950
3951 if (!record__threads_enabled(rec))
3952 return record__init_thread_default_masks(rec, cpus);
3953
3954 if (evlist__per_thread(rec->evlist)) {
3955 pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n");
3956 return -EINVAL;
3957 }
3958
3959 switch (rec->opts.threads_spec) {
3960 case THREAD_SPEC__CPU:
3961 ret = record__init_thread_cpu_masks(rec, cpus);
3962 break;
3963 case THREAD_SPEC__CORE:
3964 ret = record__init_thread_core_masks(rec, cpus);
3965 break;
3966 case THREAD_SPEC__PACKAGE:
3967 ret = record__init_thread_package_masks(rec, cpus);
3968 break;
3969 case THREAD_SPEC__NUMA:
3970 ret = record__init_thread_numa_masks(rec, cpus);
3971 break;
3972 case THREAD_SPEC__USER:
3973 ret = record__init_thread_user_masks(rec, cpus);
3974 break;
3975 default:
3976 break;
3977 }
3978
3979 return ret;
3980 }
3981
cmd_record(int argc,const char ** argv)3982 int cmd_record(int argc, const char **argv)
3983 {
3984 int err;
3985 struct record *rec = &record;
3986 char errbuf[BUFSIZ];
3987
3988 setlocale(LC_ALL, "");
3989
3990 #ifndef HAVE_BPF_SKEL
3991 # define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c)
3992 set_nobuild('\0', "off-cpu", "no BUILD_BPF_SKEL=1", true);
3993 # undef set_nobuild
3994 #endif
3995
3996 /* Disable eager loading of kernel symbols that adds overhead to perf record. */
3997 symbol_conf.lazy_load_kernel_maps = true;
3998 rec->opts.affinity = PERF_AFFINITY_SYS;
3999
4000 rec->evlist = evlist__new();
4001 if (rec->evlist == NULL)
4002 return -ENOMEM;
4003
4004 err = perf_config(perf_record_config, rec);
4005 if (err)
4006 return err;
4007
4008 argc = parse_options(argc, argv, record_options, record_usage,
4009 PARSE_OPT_STOP_AT_NON_OPTION);
4010 if (quiet)
4011 perf_quiet_option();
4012
4013 err = symbol__validate_sym_arguments();
4014 if (err)
4015 return err;
4016
4017 perf_debuginfod_setup(&record.debuginfod);
4018
4019 /* Make system wide (-a) the default target. */
4020 if (!argc && target__none(&rec->opts.target))
4021 rec->opts.target.system_wide = true;
4022
4023 if (nr_cgroups && !rec->opts.target.system_wide) {
4024 usage_with_options_msg(record_usage, record_options,
4025 "cgroup monitoring only available in system-wide mode");
4026
4027 }
4028
4029 if (record.latency) {
4030 /*
4031 * There is no fundamental reason why latency profiling
4032 * can't work for system-wide mode, but exact semantics
4033 * and details are to be defined.
4034 * See the following thread for details:
4035 * https://lore.kernel.org/all/Z4XDJyvjiie3howF@google.com/
4036 */
4037 if (record.opts.target.system_wide) {
4038 pr_err("Failed: latency profiling is not supported with system-wide collection.\n");
4039 err = -EINVAL;
4040 goto out_opts;
4041 }
4042 record.opts.record_switch_events = true;
4043 }
4044
4045 if (rec->buildid_mmap) {
4046 if (!perf_can_record_build_id()) {
4047 pr_err("Failed: no support to record build id in mmap events, update your kernel.\n");
4048 err = -EINVAL;
4049 goto out_opts;
4050 }
4051 pr_debug("Enabling build id in mmap2 events.\n");
4052 /* Enable mmap build id synthesizing. */
4053 symbol_conf.buildid_mmap2 = true;
4054 /* Enable perf_event_attr::build_id bit. */
4055 rec->opts.build_id = true;
4056 /* Disable build id cache. */
4057 rec->no_buildid = true;
4058 }
4059
4060 if (rec->opts.record_cgroup && !perf_can_record_cgroup()) {
4061 pr_err("Kernel has no cgroup sampling support.\n");
4062 err = -EINVAL;
4063 goto out_opts;
4064 }
4065
4066 if (rec->opts.kcore)
4067 rec->opts.text_poke = true;
4068
4069 if (rec->opts.kcore || record__threads_enabled(rec))
4070 rec->data.is_dir = true;
4071
4072 if (record__threads_enabled(rec)) {
4073 if (rec->opts.affinity != PERF_AFFINITY_SYS) {
4074 pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n");
4075 goto out_opts;
4076 }
4077 if (record__aio_enabled(rec)) {
4078 pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n");
4079 goto out_opts;
4080 }
4081 }
4082
4083 if (rec->opts.comp_level != 0) {
4084 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
4085 rec->no_buildid = true;
4086 }
4087
4088 if (rec->opts.record_switch_events &&
4089 !perf_can_record_switch_events()) {
4090 ui__error("kernel does not support recording context switch events\n");
4091 parse_options_usage(record_usage, record_options, "switch-events", 0);
4092 err = -EINVAL;
4093 goto out_opts;
4094 }
4095
4096 if (switch_output_setup(rec)) {
4097 parse_options_usage(record_usage, record_options, "switch-output", 0);
4098 err = -EINVAL;
4099 goto out_opts;
4100 }
4101
4102 if (rec->switch_output.time) {
4103 signal(SIGALRM, alarm_sig_handler);
4104 alarm(rec->switch_output.time);
4105 }
4106
4107 if (rec->switch_output.num_files) {
4108 rec->switch_output.filenames = calloc(rec->switch_output.num_files,
4109 sizeof(char *));
4110 if (!rec->switch_output.filenames) {
4111 err = -EINVAL;
4112 goto out_opts;
4113 }
4114 }
4115
4116 if (rec->timestamp_filename && record__threads_enabled(rec)) {
4117 rec->timestamp_filename = false;
4118 pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n");
4119 }
4120
4121 if (rec->filter_action) {
4122 if (!strcmp(rec->filter_action, "pin"))
4123 err = perf_bpf_filter__pin();
4124 else if (!strcmp(rec->filter_action, "unpin"))
4125 err = perf_bpf_filter__unpin();
4126 else {
4127 pr_warning("Unknown BPF filter action: %s\n", rec->filter_action);
4128 err = -EINVAL;
4129 }
4130 goto out_opts;
4131 }
4132
4133 /*
4134 * Allow aliases to facilitate the lookup of symbols for address
4135 * filters. Refer to auxtrace_parse_filters().
4136 */
4137 symbol_conf.allow_aliases = true;
4138
4139 symbol__init(NULL);
4140
4141 err = record__auxtrace_init(rec);
4142 if (err)
4143 goto out;
4144
4145 if (dry_run)
4146 goto out;
4147
4148 err = -ENOMEM;
4149
4150 if (rec->no_buildid_cache || rec->no_buildid) {
4151 disable_buildid_cache();
4152 } else if (rec->switch_output.enabled) {
4153 /*
4154 * In 'perf record --switch-output', disable buildid
4155 * generation by default to reduce data file switching
4156 * overhead. Still generate buildid if they are required
4157 * explicitly using
4158 *
4159 * perf record --switch-output --no-no-buildid \
4160 * --no-no-buildid-cache
4161 *
4162 * Following code equals to:
4163 *
4164 * if ((rec->no_buildid || !rec->no_buildid_set) &&
4165 * (rec->no_buildid_cache || !rec->no_buildid_cache_set))
4166 * disable_buildid_cache();
4167 */
4168 bool disable = true;
4169
4170 if (rec->no_buildid_set && !rec->no_buildid)
4171 disable = false;
4172 if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
4173 disable = false;
4174 if (disable) {
4175 rec->no_buildid = true;
4176 rec->no_buildid_cache = true;
4177 disable_buildid_cache();
4178 }
4179 }
4180
4181 if (record.opts.overwrite)
4182 record.opts.tail_synthesize = true;
4183
4184 if (rec->evlist->core.nr_entries == 0) {
4185 err = parse_event(rec->evlist, "cycles:P");
4186 if (err)
4187 goto out;
4188 }
4189
4190 if (rec->opts.target.tid && !rec->opts.no_inherit_set)
4191 rec->opts.no_inherit = true;
4192
4193 err = target__validate(&rec->opts.target);
4194 if (err) {
4195 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
4196 ui__warning("%s\n", errbuf);
4197 }
4198
4199 err = target__parse_uid(&rec->opts.target);
4200 if (err) {
4201 int saved_errno = errno;
4202
4203 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
4204 ui__error("%s", errbuf);
4205
4206 err = -saved_errno;
4207 goto out;
4208 }
4209
4210 /* Enable ignoring missing threads when -u/-p option is defined. */
4211 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
4212
4213 evlist__warn_user_requested_cpus(rec->evlist, rec->opts.target.cpu_list);
4214
4215 if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP)
4216 arch__add_leaf_frame_record_opts(&rec->opts);
4217
4218 err = -ENOMEM;
4219 if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) {
4220 if (rec->opts.target.pid != NULL) {
4221 pr_err("Couldn't create thread/CPU maps: %s\n",
4222 errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf)));
4223 goto out;
4224 }
4225 else
4226 usage_with_options(record_usage, record_options);
4227 }
4228
4229 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
4230 if (err)
4231 goto out;
4232
4233 /*
4234 * We take all buildids when the file contains
4235 * AUX area tracing data because we do not decode the
4236 * trace because it would take too long.
4237 */
4238 if (rec->opts.full_auxtrace)
4239 rec->buildid_all = true;
4240
4241 if (rec->opts.text_poke) {
4242 err = record__config_text_poke(rec->evlist);
4243 if (err) {
4244 pr_err("record__config_text_poke failed, error %d\n", err);
4245 goto out;
4246 }
4247 }
4248
4249 if (rec->off_cpu) {
4250 err = record__config_off_cpu(rec);
4251 if (err) {
4252 pr_err("record__config_off_cpu failed, error %d\n", err);
4253 goto out;
4254 }
4255 }
4256
4257 if (record_opts__config(&rec->opts)) {
4258 err = -EINVAL;
4259 goto out;
4260 }
4261
4262 err = record__config_tracking_events(rec);
4263 if (err) {
4264 pr_err("record__config_tracking_events failed, error %d\n", err);
4265 goto out;
4266 }
4267
4268 err = record__init_thread_masks(rec);
4269 if (err) {
4270 pr_err("Failed to initialize parallel data streaming masks\n");
4271 goto out;
4272 }
4273
4274 if (rec->opts.nr_cblocks > nr_cblocks_max)
4275 rec->opts.nr_cblocks = nr_cblocks_max;
4276 pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks);
4277
4278 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
4279 pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
4280
4281 if (rec->opts.comp_level > comp_level_max)
4282 rec->opts.comp_level = comp_level_max;
4283 pr_debug("comp level: %d\n", rec->opts.comp_level);
4284
4285 err = __cmd_record(&record, argc, argv);
4286 out:
4287 record__free_thread_masks(rec, rec->nr_threads);
4288 rec->nr_threads = 0;
4289 symbol__exit();
4290 auxtrace_record__free(rec->itr);
4291 out_opts:
4292 evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close);
4293 evlist__delete(rec->evlist);
4294 return err;
4295 }
4296
snapshot_sig_handler(int sig __maybe_unused)4297 static void snapshot_sig_handler(int sig __maybe_unused)
4298 {
4299 struct record *rec = &record;
4300
4301 hit_auxtrace_snapshot_trigger(rec);
4302
4303 if (switch_output_signal(rec))
4304 trigger_hit(&switch_output_trigger);
4305 }
4306
alarm_sig_handler(int sig __maybe_unused)4307 static void alarm_sig_handler(int sig __maybe_unused)
4308 {
4309 struct record *rec = &record;
4310
4311 if (switch_output_time(rec))
4312 trigger_hit(&switch_output_trigger);
4313 }
4314