1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * builtin-record.c
4  *
5  * Builtin record command: Record the profile of a workload
6  * (or a CPU, or a PID) into the perf.data output file - for
7  * later analysis via perf report.
8  */
9 #include "builtin.h"
10 
11 #include "util/build-id.h"
12 #include <subcmd/parse-options.h>
13 #include <internal/xyarray.h>
14 #include "util/parse-events.h"
15 #include "util/config.h"
16 
17 #include "util/callchain.h"
18 #include "util/cgroup.h"
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/mmap.h"
25 #include "util/mutex.h"
26 #include "util/target.h"
27 #include "util/session.h"
28 #include "util/tool.h"
29 #include "util/symbol.h"
30 #include "util/record.h"
31 #include "util/cpumap.h"
32 #include "util/thread_map.h"
33 #include "util/data.h"
34 #include "util/perf_regs.h"
35 #include "util/auxtrace.h"
36 #include "util/tsc.h"
37 #include "util/parse-branch-options.h"
38 #include "util/parse-regs-options.h"
39 #include "util/perf_api_probe.h"
40 #include "util/trigger.h"
41 #include "util/perf-hooks.h"
42 #include "util/cpu-set-sched.h"
43 #include "util/synthetic-events.h"
44 #include "util/time-utils.h"
45 #include "util/units.h"
46 #include "util/bpf-event.h"
47 #include "util/util.h"
48 #include "util/pfm.h"
49 #include "util/pmu.h"
50 #include "util/pmus.h"
51 #include "util/clockid.h"
52 #include "util/off_cpu.h"
53 #include "util/bpf-filter.h"
54 #include "asm/bug.h"
55 #include "perf.h"
56 #include "cputopo.h"
57 
58 #include <errno.h>
59 #include <inttypes.h>
60 #include <locale.h>
61 #include <poll.h>
62 #include <pthread.h>
63 #include <unistd.h>
64 #ifndef HAVE_GETTID
65 #include <syscall.h>
66 #endif
67 #include <sched.h>
68 #include <signal.h>
69 #ifdef HAVE_EVENTFD_SUPPORT
70 #include <sys/eventfd.h>
71 #endif
72 #include <sys/mman.h>
73 #include <sys/wait.h>
74 #include <sys/types.h>
75 #include <sys/stat.h>
76 #include <fcntl.h>
77 #include <linux/err.h>
78 #include <linux/string.h>
79 #include <linux/time64.h>
80 #include <linux/zalloc.h>
81 #include <linux/bitmap.h>
82 #include <sys/time.h>
83 
84 struct switch_output {
85 	bool		 enabled;
86 	bool		 signal;
87 	unsigned long	 size;
88 	unsigned long	 time;
89 	const char	*str;
90 	bool		 set;
91 	char		 **filenames;
92 	int		 num_files;
93 	int		 cur_file;
94 };
95 
96 struct thread_mask {
97 	struct mmap_cpu_mask	maps;
98 	struct mmap_cpu_mask	affinity;
99 };
100 
101 struct record_thread {
102 	pid_t			tid;
103 	struct thread_mask	*mask;
104 	struct {
105 		int		msg[2];
106 		int		ack[2];
107 	} pipes;
108 	struct fdarray		pollfd;
109 	int			ctlfd_pos;
110 	int			nr_mmaps;
111 	struct mmap		**maps;
112 	struct mmap		**overwrite_maps;
113 	struct record		*rec;
114 	unsigned long long	samples;
115 	unsigned long		waking;
116 	u64			bytes_written;
117 	u64			bytes_transferred;
118 	u64			bytes_compressed;
119 };
120 
121 static __thread struct record_thread *thread;
122 
123 enum thread_msg {
124 	THREAD_MSG__UNDEFINED = 0,
125 	THREAD_MSG__READY,
126 	THREAD_MSG__MAX,
127 };
128 
129 static const char *thread_msg_tags[THREAD_MSG__MAX] = {
130 	"UNDEFINED", "READY"
131 };
132 
133 enum thread_spec {
134 	THREAD_SPEC__UNDEFINED = 0,
135 	THREAD_SPEC__CPU,
136 	THREAD_SPEC__CORE,
137 	THREAD_SPEC__PACKAGE,
138 	THREAD_SPEC__NUMA,
139 	THREAD_SPEC__USER,
140 	THREAD_SPEC__MAX,
141 };
142 
143 static const char *thread_spec_tags[THREAD_SPEC__MAX] = {
144 	"undefined", "cpu", "core", "package", "numa", "user"
145 };
146 
147 struct pollfd_index_map {
148 	int evlist_pollfd_index;
149 	int thread_pollfd_index;
150 };
151 
152 struct record {
153 	struct perf_tool	tool;
154 	struct record_opts	opts;
155 	u64			bytes_written;
156 	u64			thread_bytes_written;
157 	struct perf_data	data;
158 	struct auxtrace_record	*itr;
159 	struct evlist	*evlist;
160 	struct perf_session	*session;
161 	struct evlist		*sb_evlist;
162 	pthread_t		thread_id;
163 	int			realtime_prio;
164 	bool			latency;
165 	bool			switch_output_event_set;
166 	bool			no_buildid;
167 	bool			no_buildid_set;
168 	bool			no_buildid_cache;
169 	bool			no_buildid_cache_set;
170 	bool			buildid_all;
171 	bool			buildid_mmap;
172 	bool			timestamp_filename;
173 	bool			timestamp_boundary;
174 	bool			off_cpu;
175 	const char		*filter_action;
176 	struct switch_output	switch_output;
177 	unsigned long long	samples;
178 	unsigned long		output_max_size;	/* = 0: unlimited */
179 	struct perf_debuginfod	debuginfod;
180 	int			nr_threads;
181 	struct thread_mask	*thread_masks;
182 	struct record_thread	*thread_data;
183 	struct pollfd_index_map	*index_map;
184 	size_t			index_map_sz;
185 	size_t			index_map_cnt;
186 };
187 
188 static volatile int done;
189 
190 static volatile int auxtrace_record__snapshot_started;
191 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
192 static DEFINE_TRIGGER(switch_output_trigger);
193 
194 static const char *affinity_tags[PERF_AFFINITY_MAX] = {
195 	"SYS", "NODE", "CPU"
196 };
197 
198 static int build_id__process_mmap(const struct perf_tool *tool, union perf_event *event,
199 				  struct perf_sample *sample, struct machine *machine);
200 static int build_id__process_mmap2(const struct perf_tool *tool, union perf_event *event,
201 				   struct perf_sample *sample, struct machine *machine);
202 static int process_timestamp_boundary(const struct perf_tool *tool,
203 				      union perf_event *event,
204 				      struct perf_sample *sample,
205 				      struct machine *machine);
206 
207 #ifndef HAVE_GETTID
gettid(void)208 static inline pid_t gettid(void)
209 {
210 	return (pid_t)syscall(__NR_gettid);
211 }
212 #endif
213 
record__threads_enabled(struct record * rec)214 static int record__threads_enabled(struct record *rec)
215 {
216 	return rec->opts.threads_spec;
217 }
218 
switch_output_signal(struct record * rec)219 static bool switch_output_signal(struct record *rec)
220 {
221 	return rec->switch_output.signal &&
222 	       trigger_is_ready(&switch_output_trigger);
223 }
224 
switch_output_size(struct record * rec)225 static bool switch_output_size(struct record *rec)
226 {
227 	return rec->switch_output.size &&
228 	       trigger_is_ready(&switch_output_trigger) &&
229 	       (rec->bytes_written >= rec->switch_output.size);
230 }
231 
switch_output_time(struct record * rec)232 static bool switch_output_time(struct record *rec)
233 {
234 	return rec->switch_output.time &&
235 	       trigger_is_ready(&switch_output_trigger);
236 }
237 
record__bytes_written(struct record * rec)238 static u64 record__bytes_written(struct record *rec)
239 {
240 	return rec->bytes_written + rec->thread_bytes_written;
241 }
242 
record__output_max_size_exceeded(struct record * rec)243 static bool record__output_max_size_exceeded(struct record *rec)
244 {
245 	return rec->output_max_size &&
246 	       (record__bytes_written(rec) >= rec->output_max_size);
247 }
248 
record__write(struct record * rec,struct mmap * map __maybe_unused,void * bf,size_t size)249 static int record__write(struct record *rec, struct mmap *map __maybe_unused,
250 			 void *bf, size_t size)
251 {
252 	struct perf_data_file *file = &rec->session->data->file;
253 
254 	if (map && map->file)
255 		file = map->file;
256 
257 	if (perf_data_file__write(file, bf, size) < 0) {
258 		pr_err("failed to write perf data, error: %m\n");
259 		return -1;
260 	}
261 
262 	if (map && map->file) {
263 		thread->bytes_written += size;
264 		rec->thread_bytes_written += size;
265 	} else {
266 		rec->bytes_written += size;
267 	}
268 
269 	if (record__output_max_size_exceeded(rec) && !done) {
270 		fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB),"
271 				" stopping session ]\n",
272 				record__bytes_written(rec) >> 10);
273 		done = 1;
274 	}
275 
276 	if (switch_output_size(rec))
277 		trigger_hit(&switch_output_trigger);
278 
279 	return 0;
280 }
281 
282 static int record__aio_enabled(struct record *rec);
283 static int record__comp_enabled(struct record *rec);
284 static ssize_t zstd_compress(struct perf_session *session, struct mmap *map,
285 			    void *dst, size_t dst_size, void *src, size_t src_size);
286 
287 #ifdef HAVE_AIO_SUPPORT
record__aio_write(struct aiocb * cblock,int trace_fd,void * buf,size_t size,off_t off)288 static int record__aio_write(struct aiocb *cblock, int trace_fd,
289 		void *buf, size_t size, off_t off)
290 {
291 	int rc;
292 
293 	cblock->aio_fildes = trace_fd;
294 	cblock->aio_buf    = buf;
295 	cblock->aio_nbytes = size;
296 	cblock->aio_offset = off;
297 	cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
298 
299 	do {
300 		rc = aio_write(cblock);
301 		if (rc == 0) {
302 			break;
303 		} else if (errno != EAGAIN) {
304 			cblock->aio_fildes = -1;
305 			pr_err("failed to queue perf data, error: %m\n");
306 			break;
307 		}
308 	} while (1);
309 
310 	return rc;
311 }
312 
record__aio_complete(struct mmap * md,struct aiocb * cblock)313 static int record__aio_complete(struct mmap *md, struct aiocb *cblock)
314 {
315 	void *rem_buf;
316 	off_t rem_off;
317 	size_t rem_size;
318 	int rc, aio_errno;
319 	ssize_t aio_ret, written;
320 
321 	aio_errno = aio_error(cblock);
322 	if (aio_errno == EINPROGRESS)
323 		return 0;
324 
325 	written = aio_ret = aio_return(cblock);
326 	if (aio_ret < 0) {
327 		if (aio_errno != EINTR)
328 			pr_err("failed to write perf data, error: %m\n");
329 		written = 0;
330 	}
331 
332 	rem_size = cblock->aio_nbytes - written;
333 
334 	if (rem_size == 0) {
335 		cblock->aio_fildes = -1;
336 		/*
337 		 * md->refcount is incremented in record__aio_pushfn() for
338 		 * every aio write request started in record__aio_push() so
339 		 * decrement it because the request is now complete.
340 		 */
341 		perf_mmap__put(&md->core);
342 		rc = 1;
343 	} else {
344 		/*
345 		 * aio write request may require restart with the
346 		 * remainder if the kernel didn't write whole
347 		 * chunk at once.
348 		 */
349 		rem_off = cblock->aio_offset + written;
350 		rem_buf = (void *)(cblock->aio_buf + written);
351 		record__aio_write(cblock, cblock->aio_fildes,
352 				rem_buf, rem_size, rem_off);
353 		rc = 0;
354 	}
355 
356 	return rc;
357 }
358 
record__aio_sync(struct mmap * md,bool sync_all)359 static int record__aio_sync(struct mmap *md, bool sync_all)
360 {
361 	struct aiocb **aiocb = md->aio.aiocb;
362 	struct aiocb *cblocks = md->aio.cblocks;
363 	struct timespec timeout = { 0, 1000 * 1000  * 1 }; /* 1ms */
364 	int i, do_suspend;
365 
366 	do {
367 		do_suspend = 0;
368 		for (i = 0; i < md->aio.nr_cblocks; ++i) {
369 			if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
370 				if (sync_all)
371 					aiocb[i] = NULL;
372 				else
373 					return i;
374 			} else {
375 				/*
376 				 * Started aio write is not complete yet
377 				 * so it has to be waited before the
378 				 * next allocation.
379 				 */
380 				aiocb[i] = &cblocks[i];
381 				do_suspend = 1;
382 			}
383 		}
384 		if (!do_suspend)
385 			return -1;
386 
387 		while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
388 			if (!(errno == EAGAIN || errno == EINTR))
389 				pr_err("failed to sync perf data, error: %m\n");
390 		}
391 	} while (1);
392 }
393 
394 struct record_aio {
395 	struct record	*rec;
396 	void		*data;
397 	size_t		size;
398 };
399 
record__aio_pushfn(struct mmap * map,void * to,void * buf,size_t size)400 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size)
401 {
402 	struct record_aio *aio = to;
403 
404 	/*
405 	 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer
406 	 * to release space in the kernel buffer as fast as possible, calling
407 	 * perf_mmap__consume() from perf_mmap__push() function.
408 	 *
409 	 * That lets the kernel to proceed with storing more profiling data into
410 	 * the kernel buffer earlier than other per-cpu kernel buffers are handled.
411 	 *
412 	 * Coping can be done in two steps in case the chunk of profiling data
413 	 * crosses the upper bound of the kernel buffer. In this case we first move
414 	 * part of data from map->start till the upper bound and then the remainder
415 	 * from the beginning of the kernel buffer till the end of the data chunk.
416 	 */
417 
418 	if (record__comp_enabled(aio->rec)) {
419 		ssize_t compressed = zstd_compress(aio->rec->session, NULL, aio->data + aio->size,
420 						   mmap__mmap_len(map) - aio->size,
421 						   buf, size);
422 		if (compressed < 0)
423 			return (int)compressed;
424 
425 		size = compressed;
426 	} else {
427 		memcpy(aio->data + aio->size, buf, size);
428 	}
429 
430 	if (!aio->size) {
431 		/*
432 		 * Increment map->refcount to guard map->aio.data[] buffer
433 		 * from premature deallocation because map object can be
434 		 * released earlier than aio write request started on
435 		 * map->aio.data[] buffer is complete.
436 		 *
437 		 * perf_mmap__put() is done at record__aio_complete()
438 		 * after started aio request completion or at record__aio_push()
439 		 * if the request failed to start.
440 		 */
441 		perf_mmap__get(&map->core);
442 	}
443 
444 	aio->size += size;
445 
446 	return size;
447 }
448 
record__aio_push(struct record * rec,struct mmap * map,off_t * off)449 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off)
450 {
451 	int ret, idx;
452 	int trace_fd = rec->session->data->file.fd;
453 	struct record_aio aio = { .rec = rec, .size = 0 };
454 
455 	/*
456 	 * Call record__aio_sync() to wait till map->aio.data[] buffer
457 	 * becomes available after previous aio write operation.
458 	 */
459 
460 	idx = record__aio_sync(map, false);
461 	aio.data = map->aio.data[idx];
462 	ret = perf_mmap__push(map, &aio, record__aio_pushfn);
463 	if (ret != 0) /* ret > 0 - no data, ret < 0 - error */
464 		return ret;
465 
466 	rec->samples++;
467 	ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off);
468 	if (!ret) {
469 		*off += aio.size;
470 		rec->bytes_written += aio.size;
471 		if (switch_output_size(rec))
472 			trigger_hit(&switch_output_trigger);
473 	} else {
474 		/*
475 		 * Decrement map->refcount incremented in record__aio_pushfn()
476 		 * back if record__aio_write() operation failed to start, otherwise
477 		 * map->refcount is decremented in record__aio_complete() after
478 		 * aio write operation finishes successfully.
479 		 */
480 		perf_mmap__put(&map->core);
481 	}
482 
483 	return ret;
484 }
485 
record__aio_get_pos(int trace_fd)486 static off_t record__aio_get_pos(int trace_fd)
487 {
488 	return lseek(trace_fd, 0, SEEK_CUR);
489 }
490 
record__aio_set_pos(int trace_fd,off_t pos)491 static void record__aio_set_pos(int trace_fd, off_t pos)
492 {
493 	lseek(trace_fd, pos, SEEK_SET);
494 }
495 
record__aio_mmap_read_sync(struct record * rec)496 static void record__aio_mmap_read_sync(struct record *rec)
497 {
498 	int i;
499 	struct evlist *evlist = rec->evlist;
500 	struct mmap *maps = evlist->mmap;
501 
502 	if (!record__aio_enabled(rec))
503 		return;
504 
505 	for (i = 0; i < evlist->core.nr_mmaps; i++) {
506 		struct mmap *map = &maps[i];
507 
508 		if (map->core.base)
509 			record__aio_sync(map, true);
510 	}
511 }
512 
513 static int nr_cblocks_default = 1;
514 static int nr_cblocks_max = 4;
515 
record__aio_parse(const struct option * opt,const char * str,int unset)516 static int record__aio_parse(const struct option *opt,
517 			     const char *str,
518 			     int unset)
519 {
520 	struct record_opts *opts = (struct record_opts *)opt->value;
521 
522 	if (unset) {
523 		opts->nr_cblocks = 0;
524 	} else {
525 		if (str)
526 			opts->nr_cblocks = strtol(str, NULL, 0);
527 		if (!opts->nr_cblocks)
528 			opts->nr_cblocks = nr_cblocks_default;
529 	}
530 
531 	return 0;
532 }
533 #else /* HAVE_AIO_SUPPORT */
534 static int nr_cblocks_max = 0;
535 
record__aio_push(struct record * rec __maybe_unused,struct mmap * map __maybe_unused,off_t * off __maybe_unused)536 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused,
537 			    off_t *off __maybe_unused)
538 {
539 	return -1;
540 }
541 
record__aio_get_pos(int trace_fd __maybe_unused)542 static off_t record__aio_get_pos(int trace_fd __maybe_unused)
543 {
544 	return -1;
545 }
546 
record__aio_set_pos(int trace_fd __maybe_unused,off_t pos __maybe_unused)547 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
548 {
549 }
550 
record__aio_mmap_read_sync(struct record * rec __maybe_unused)551 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
552 {
553 }
554 #endif
555 
record__aio_enabled(struct record * rec)556 static int record__aio_enabled(struct record *rec)
557 {
558 	return rec->opts.nr_cblocks > 0;
559 }
560 
561 #define MMAP_FLUSH_DEFAULT 1
record__mmap_flush_parse(const struct option * opt,const char * str,int unset)562 static int record__mmap_flush_parse(const struct option *opt,
563 				    const char *str,
564 				    int unset)
565 {
566 	int flush_max;
567 	struct record_opts *opts = (struct record_opts *)opt->value;
568 	static struct parse_tag tags[] = {
569 			{ .tag  = 'B', .mult = 1       },
570 			{ .tag  = 'K', .mult = 1 << 10 },
571 			{ .tag  = 'M', .mult = 1 << 20 },
572 			{ .tag  = 'G', .mult = 1 << 30 },
573 			{ .tag  = 0 },
574 	};
575 
576 	if (unset)
577 		return 0;
578 
579 	if (str) {
580 		opts->mmap_flush = parse_tag_value(str, tags);
581 		if (opts->mmap_flush == (int)-1)
582 			opts->mmap_flush = strtol(str, NULL, 0);
583 	}
584 
585 	if (!opts->mmap_flush)
586 		opts->mmap_flush = MMAP_FLUSH_DEFAULT;
587 
588 	flush_max = evlist__mmap_size(opts->mmap_pages);
589 	flush_max /= 4;
590 	if (opts->mmap_flush > flush_max)
591 		opts->mmap_flush = flush_max;
592 
593 	return 0;
594 }
595 
596 #ifdef HAVE_ZSTD_SUPPORT
597 static unsigned int comp_level_default = 1;
598 
record__parse_comp_level(const struct option * opt,const char * str,int unset)599 static int record__parse_comp_level(const struct option *opt, const char *str, int unset)
600 {
601 	struct record_opts *opts = opt->value;
602 
603 	if (unset) {
604 		opts->comp_level = 0;
605 	} else {
606 		if (str)
607 			opts->comp_level = strtol(str, NULL, 0);
608 		if (!opts->comp_level)
609 			opts->comp_level = comp_level_default;
610 	}
611 
612 	return 0;
613 }
614 #endif
615 static unsigned int comp_level_max = 22;
616 
record__comp_enabled(struct record * rec)617 static int record__comp_enabled(struct record *rec)
618 {
619 	return rec->opts.comp_level > 0;
620 }
621 
process_synthesized_event(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample __maybe_unused,struct machine * machine __maybe_unused)622 static int process_synthesized_event(const struct perf_tool *tool,
623 				     union perf_event *event,
624 				     struct perf_sample *sample __maybe_unused,
625 				     struct machine *machine __maybe_unused)
626 {
627 	struct record *rec = container_of(tool, struct record, tool);
628 	return record__write(rec, NULL, event, event->header.size);
629 }
630 
631 static struct mutex synth_lock;
632 
process_locked_synthesized_event(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample __maybe_unused,struct machine * machine __maybe_unused)633 static int process_locked_synthesized_event(const struct perf_tool *tool,
634 				     union perf_event *event,
635 				     struct perf_sample *sample __maybe_unused,
636 				     struct machine *machine __maybe_unused)
637 {
638 	int ret;
639 
640 	mutex_lock(&synth_lock);
641 	ret = process_synthesized_event(tool, event, sample, machine);
642 	mutex_unlock(&synth_lock);
643 	return ret;
644 }
645 
record__pushfn(struct mmap * map,void * to,void * bf,size_t size)646 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
647 {
648 	struct record *rec = to;
649 
650 	if (record__comp_enabled(rec)) {
651 		ssize_t compressed = zstd_compress(rec->session, map, map->data,
652 						   mmap__mmap_len(map), bf, size);
653 
654 		if (compressed < 0)
655 			return (int)compressed;
656 
657 		size = compressed;
658 		bf   = map->data;
659 	}
660 
661 	thread->samples++;
662 	return record__write(rec, map, bf, size);
663 }
664 
665 static volatile sig_atomic_t signr = -1;
666 static volatile sig_atomic_t child_finished;
667 #ifdef HAVE_EVENTFD_SUPPORT
668 static volatile sig_atomic_t done_fd = -1;
669 #endif
670 
sig_handler(int sig)671 static void sig_handler(int sig)
672 {
673 	if (sig == SIGCHLD)
674 		child_finished = 1;
675 	else
676 		signr = sig;
677 
678 	done = 1;
679 #ifdef HAVE_EVENTFD_SUPPORT
680 	if (done_fd >= 0) {
681 		u64 tmp = 1;
682 		int orig_errno = errno;
683 
684 		/*
685 		 * It is possible for this signal handler to run after done is
686 		 * checked in the main loop, but before the perf counter fds are
687 		 * polled. If this happens, the poll() will continue to wait
688 		 * even though done is set, and will only break out if either
689 		 * another signal is received, or the counters are ready for
690 		 * read. To ensure the poll() doesn't sleep when done is set,
691 		 * use an eventfd (done_fd) to wake up the poll().
692 		 */
693 		if (write(done_fd, &tmp, sizeof(tmp)) < 0)
694 			pr_err("failed to signal wakeup fd, error: %m\n");
695 
696 		errno = orig_errno;
697 	}
698 #endif // HAVE_EVENTFD_SUPPORT
699 }
700 
sigsegv_handler(int sig)701 static void sigsegv_handler(int sig)
702 {
703 	perf_hooks__recover();
704 	sighandler_dump_stack(sig);
705 }
706 
record__sig_exit(void)707 static void record__sig_exit(void)
708 {
709 	if (signr == -1)
710 		return;
711 
712 	signal(signr, SIG_DFL);
713 	raise(signr);
714 }
715 
716 #ifdef HAVE_AUXTRACE_SUPPORT
717 
record__process_auxtrace(const struct perf_tool * tool,struct mmap * map,union perf_event * event,void * data1,size_t len1,void * data2,size_t len2)718 static int record__process_auxtrace(const struct perf_tool *tool,
719 				    struct mmap *map,
720 				    union perf_event *event, void *data1,
721 				    size_t len1, void *data2, size_t len2)
722 {
723 	struct record *rec = container_of(tool, struct record, tool);
724 	struct perf_data *data = &rec->data;
725 	size_t padding;
726 	u8 pad[8] = {0};
727 
728 	if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) {
729 		off_t file_offset;
730 		int fd = perf_data__fd(data);
731 		int err;
732 
733 		file_offset = lseek(fd, 0, SEEK_CUR);
734 		if (file_offset == -1)
735 			return -1;
736 		err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
737 						     event, file_offset);
738 		if (err)
739 			return err;
740 	}
741 
742 	/* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
743 	padding = (len1 + len2) & 7;
744 	if (padding)
745 		padding = 8 - padding;
746 
747 	record__write(rec, map, event, event->header.size);
748 	record__write(rec, map, data1, len1);
749 	if (len2)
750 		record__write(rec, map, data2, len2);
751 	record__write(rec, map, &pad, padding);
752 
753 	return 0;
754 }
755 
record__auxtrace_mmap_read(struct record * rec,struct mmap * map)756 static int record__auxtrace_mmap_read(struct record *rec,
757 				      struct mmap *map)
758 {
759 	int ret;
760 
761 	ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
762 				  record__process_auxtrace);
763 	if (ret < 0)
764 		return ret;
765 
766 	if (ret)
767 		rec->samples++;
768 
769 	return 0;
770 }
771 
record__auxtrace_mmap_read_snapshot(struct record * rec,struct mmap * map)772 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
773 					       struct mmap *map)
774 {
775 	int ret;
776 
777 	ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
778 					   record__process_auxtrace,
779 					   rec->opts.auxtrace_snapshot_size);
780 	if (ret < 0)
781 		return ret;
782 
783 	if (ret)
784 		rec->samples++;
785 
786 	return 0;
787 }
788 
record__auxtrace_read_snapshot_all(struct record * rec)789 static int record__auxtrace_read_snapshot_all(struct record *rec)
790 {
791 	int i;
792 	int rc = 0;
793 
794 	for (i = 0; i < rec->evlist->core.nr_mmaps; i++) {
795 		struct mmap *map = &rec->evlist->mmap[i];
796 
797 		if (!map->auxtrace_mmap.base)
798 			continue;
799 
800 		if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
801 			rc = -1;
802 			goto out;
803 		}
804 	}
805 out:
806 	return rc;
807 }
808 
record__read_auxtrace_snapshot(struct record * rec,bool on_exit)809 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit)
810 {
811 	pr_debug("Recording AUX area tracing snapshot\n");
812 	if (record__auxtrace_read_snapshot_all(rec) < 0) {
813 		trigger_error(&auxtrace_snapshot_trigger);
814 	} else {
815 		if (auxtrace_record__snapshot_finish(rec->itr, on_exit))
816 			trigger_error(&auxtrace_snapshot_trigger);
817 		else
818 			trigger_ready(&auxtrace_snapshot_trigger);
819 	}
820 }
821 
record__auxtrace_snapshot_exit(struct record * rec)822 static int record__auxtrace_snapshot_exit(struct record *rec)
823 {
824 	if (trigger_is_error(&auxtrace_snapshot_trigger))
825 		return 0;
826 
827 	if (!auxtrace_record__snapshot_started &&
828 	    auxtrace_record__snapshot_start(rec->itr))
829 		return -1;
830 
831 	record__read_auxtrace_snapshot(rec, true);
832 	if (trigger_is_error(&auxtrace_snapshot_trigger))
833 		return -1;
834 
835 	return 0;
836 }
837 
record__auxtrace_init(struct record * rec)838 static int record__auxtrace_init(struct record *rec)
839 {
840 	int err;
841 
842 	if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts)
843 	    && record__threads_enabled(rec)) {
844 		pr_err("AUX area tracing options are not available in parallel streaming mode.\n");
845 		return -EINVAL;
846 	}
847 
848 	if (!rec->itr) {
849 		rec->itr = auxtrace_record__init(rec->evlist, &err);
850 		if (err)
851 			return err;
852 	}
853 
854 	err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
855 					      rec->opts.auxtrace_snapshot_opts);
856 	if (err)
857 		return err;
858 
859 	err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts,
860 					    rec->opts.auxtrace_sample_opts);
861 	if (err)
862 		return err;
863 
864 	err = auxtrace_parse_aux_action(rec->evlist);
865 	if (err)
866 		return err;
867 
868 	return auxtrace_parse_filters(rec->evlist);
869 }
870 
871 #else
872 
873 static inline
record__auxtrace_mmap_read(struct record * rec __maybe_unused,struct mmap * map __maybe_unused)874 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
875 			       struct mmap *map __maybe_unused)
876 {
877 	return 0;
878 }
879 
880 static inline
record__read_auxtrace_snapshot(struct record * rec __maybe_unused,bool on_exit __maybe_unused)881 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused,
882 				    bool on_exit __maybe_unused)
883 {
884 }
885 
886 static inline
auxtrace_record__snapshot_start(struct auxtrace_record * itr __maybe_unused)887 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
888 {
889 	return 0;
890 }
891 
892 static inline
record__auxtrace_snapshot_exit(struct record * rec __maybe_unused)893 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused)
894 {
895 	return 0;
896 }
897 
record__auxtrace_init(struct record * rec __maybe_unused)898 static int record__auxtrace_init(struct record *rec __maybe_unused)
899 {
900 	return 0;
901 }
902 
903 #endif
904 
record__config_text_poke(struct evlist * evlist)905 static int record__config_text_poke(struct evlist *evlist)
906 {
907 	struct evsel *evsel;
908 
909 	/* Nothing to do if text poke is already configured */
910 	evlist__for_each_entry(evlist, evsel) {
911 		if (evsel->core.attr.text_poke)
912 			return 0;
913 	}
914 
915 	evsel = evlist__add_dummy_on_all_cpus(evlist);
916 	if (!evsel)
917 		return -ENOMEM;
918 
919 	evsel->core.attr.text_poke = 1;
920 	evsel->core.attr.ksymbol = 1;
921 	evsel->immediate = true;
922 	evsel__set_sample_bit(evsel, TIME);
923 
924 	return 0;
925 }
926 
record__config_off_cpu(struct record * rec)927 static int record__config_off_cpu(struct record *rec)
928 {
929 	return off_cpu_prepare(rec->evlist, &rec->opts.target, &rec->opts);
930 }
931 
record__tracking_system_wide(struct record * rec)932 static bool record__tracking_system_wide(struct record *rec)
933 {
934 	struct evlist *evlist = rec->evlist;
935 	struct evsel *evsel;
936 
937 	/*
938 	 * If non-dummy evsel exists, system_wide sideband is need to
939 	 * help parse sample information.
940 	 * For example, PERF_EVENT_MMAP event to help parse symbol,
941 	 * and PERF_EVENT_COMM event to help parse task executable name.
942 	 */
943 	evlist__for_each_entry(evlist, evsel) {
944 		if (!evsel__is_dummy_event(evsel))
945 			return true;
946 	}
947 
948 	return false;
949 }
950 
record__config_tracking_events(struct record * rec)951 static int record__config_tracking_events(struct record *rec)
952 {
953 	struct record_opts *opts = &rec->opts;
954 	struct evlist *evlist = rec->evlist;
955 	bool system_wide = false;
956 	struct evsel *evsel;
957 
958 	/*
959 	 * For initial_delay, system wide or a hybrid system, we need to add
960 	 * tracking event so that we can track PERF_RECORD_MMAP to cover the
961 	 * delay of waiting or event synthesis.
962 	 */
963 	if (opts->target.initial_delay || target__has_cpu(&opts->target) ||
964 	    perf_pmus__num_core_pmus() > 1) {
965 
966 		/*
967 		 * User space tasks can migrate between CPUs, so when tracing
968 		 * selected CPUs, sideband for all CPUs is still needed.
969 		 */
970 		if (!!opts->target.cpu_list && record__tracking_system_wide(rec))
971 			system_wide = true;
972 
973 		evsel = evlist__findnew_tracking_event(evlist, system_wide);
974 		if (!evsel)
975 			return -ENOMEM;
976 
977 		/*
978 		 * Enable the tracking event when the process is forked for
979 		 * initial_delay, immediately for system wide.
980 		 */
981 		if (opts->target.initial_delay && !evsel->immediate &&
982 		    !target__has_cpu(&opts->target))
983 			evsel->core.attr.enable_on_exec = 1;
984 		else
985 			evsel->immediate = 1;
986 	}
987 
988 	return 0;
989 }
990 
record__kcore_readable(struct machine * machine)991 static bool record__kcore_readable(struct machine *machine)
992 {
993 	char kcore[PATH_MAX];
994 	int fd;
995 
996 	scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir);
997 
998 	fd = open(kcore, O_RDONLY);
999 	if (fd < 0)
1000 		return false;
1001 
1002 	close(fd);
1003 
1004 	return true;
1005 }
1006 
record__kcore_copy(struct machine * machine,struct perf_data * data)1007 static int record__kcore_copy(struct machine *machine, struct perf_data *data)
1008 {
1009 	char from_dir[PATH_MAX];
1010 	char kcore_dir[PATH_MAX];
1011 	int ret;
1012 
1013 	snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir);
1014 
1015 	ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir));
1016 	if (ret)
1017 		return ret;
1018 
1019 	return kcore_copy(from_dir, kcore_dir);
1020 }
1021 
record__thread_data_init_pipes(struct record_thread * thread_data)1022 static void record__thread_data_init_pipes(struct record_thread *thread_data)
1023 {
1024 	thread_data->pipes.msg[0] = -1;
1025 	thread_data->pipes.msg[1] = -1;
1026 	thread_data->pipes.ack[0] = -1;
1027 	thread_data->pipes.ack[1] = -1;
1028 }
1029 
record__thread_data_open_pipes(struct record_thread * thread_data)1030 static int record__thread_data_open_pipes(struct record_thread *thread_data)
1031 {
1032 	if (pipe(thread_data->pipes.msg))
1033 		return -EINVAL;
1034 
1035 	if (pipe(thread_data->pipes.ack)) {
1036 		close(thread_data->pipes.msg[0]);
1037 		thread_data->pipes.msg[0] = -1;
1038 		close(thread_data->pipes.msg[1]);
1039 		thread_data->pipes.msg[1] = -1;
1040 		return -EINVAL;
1041 	}
1042 
1043 	pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data,
1044 		 thread_data->pipes.msg[0], thread_data->pipes.msg[1],
1045 		 thread_data->pipes.ack[0], thread_data->pipes.ack[1]);
1046 
1047 	return 0;
1048 }
1049 
record__thread_data_close_pipes(struct record_thread * thread_data)1050 static void record__thread_data_close_pipes(struct record_thread *thread_data)
1051 {
1052 	if (thread_data->pipes.msg[0] != -1) {
1053 		close(thread_data->pipes.msg[0]);
1054 		thread_data->pipes.msg[0] = -1;
1055 	}
1056 	if (thread_data->pipes.msg[1] != -1) {
1057 		close(thread_data->pipes.msg[1]);
1058 		thread_data->pipes.msg[1] = -1;
1059 	}
1060 	if (thread_data->pipes.ack[0] != -1) {
1061 		close(thread_data->pipes.ack[0]);
1062 		thread_data->pipes.ack[0] = -1;
1063 	}
1064 	if (thread_data->pipes.ack[1] != -1) {
1065 		close(thread_data->pipes.ack[1]);
1066 		thread_data->pipes.ack[1] = -1;
1067 	}
1068 }
1069 
evlist__per_thread(struct evlist * evlist)1070 static bool evlist__per_thread(struct evlist *evlist)
1071 {
1072 	return cpu_map__is_dummy(evlist->core.user_requested_cpus);
1073 }
1074 
record__thread_data_init_maps(struct record_thread * thread_data,struct evlist * evlist)1075 static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist)
1076 {
1077 	int m, tm, nr_mmaps = evlist->core.nr_mmaps;
1078 	struct mmap *mmap = evlist->mmap;
1079 	struct mmap *overwrite_mmap = evlist->overwrite_mmap;
1080 	struct perf_cpu_map *cpus = evlist->core.all_cpus;
1081 	bool per_thread = evlist__per_thread(evlist);
1082 
1083 	if (per_thread)
1084 		thread_data->nr_mmaps = nr_mmaps;
1085 	else
1086 		thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits,
1087 						      thread_data->mask->maps.nbits);
1088 	if (mmap) {
1089 		thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
1090 		if (!thread_data->maps)
1091 			return -ENOMEM;
1092 	}
1093 	if (overwrite_mmap) {
1094 		thread_data->overwrite_maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
1095 		if (!thread_data->overwrite_maps) {
1096 			zfree(&thread_data->maps);
1097 			return -ENOMEM;
1098 		}
1099 	}
1100 	pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data,
1101 		 thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps);
1102 
1103 	for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) {
1104 		if (per_thread ||
1105 		    test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) {
1106 			if (thread_data->maps) {
1107 				thread_data->maps[tm] = &mmap[m];
1108 				pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n",
1109 					  thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
1110 			}
1111 			if (thread_data->overwrite_maps) {
1112 				thread_data->overwrite_maps[tm] = &overwrite_mmap[m];
1113 				pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n",
1114 					  thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
1115 			}
1116 			tm++;
1117 		}
1118 	}
1119 
1120 	return 0;
1121 }
1122 
record__thread_data_init_pollfd(struct record_thread * thread_data,struct evlist * evlist)1123 static int record__thread_data_init_pollfd(struct record_thread *thread_data, struct evlist *evlist)
1124 {
1125 	int f, tm, pos;
1126 	struct mmap *map, *overwrite_map;
1127 
1128 	fdarray__init(&thread_data->pollfd, 64);
1129 
1130 	for (tm = 0; tm < thread_data->nr_mmaps; tm++) {
1131 		map = thread_data->maps ? thread_data->maps[tm] : NULL;
1132 		overwrite_map = thread_data->overwrite_maps ?
1133 				thread_data->overwrite_maps[tm] : NULL;
1134 
1135 		for (f = 0; f < evlist->core.pollfd.nr; f++) {
1136 			void *ptr = evlist->core.pollfd.priv[f].ptr;
1137 
1138 			if ((map && ptr == map) || (overwrite_map && ptr == overwrite_map)) {
1139 				pos = fdarray__dup_entry_from(&thread_data->pollfd, f,
1140 							      &evlist->core.pollfd);
1141 				if (pos < 0)
1142 					return pos;
1143 				pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n",
1144 					 thread_data, pos, evlist->core.pollfd.entries[f].fd);
1145 			}
1146 		}
1147 	}
1148 
1149 	return 0;
1150 }
1151 
record__free_thread_data(struct record * rec)1152 static void record__free_thread_data(struct record *rec)
1153 {
1154 	int t;
1155 	struct record_thread *thread_data = rec->thread_data;
1156 
1157 	if (thread_data == NULL)
1158 		return;
1159 
1160 	for (t = 0; t < rec->nr_threads; t++) {
1161 		record__thread_data_close_pipes(&thread_data[t]);
1162 		zfree(&thread_data[t].maps);
1163 		zfree(&thread_data[t].overwrite_maps);
1164 		fdarray__exit(&thread_data[t].pollfd);
1165 	}
1166 
1167 	zfree(&rec->thread_data);
1168 }
1169 
record__map_thread_evlist_pollfd_indexes(struct record * rec,int evlist_pollfd_index,int thread_pollfd_index)1170 static int record__map_thread_evlist_pollfd_indexes(struct record *rec,
1171 						    int evlist_pollfd_index,
1172 						    int thread_pollfd_index)
1173 {
1174 	size_t x = rec->index_map_cnt;
1175 
1176 	if (realloc_array_as_needed(rec->index_map, rec->index_map_sz, x, NULL))
1177 		return -ENOMEM;
1178 	rec->index_map[x].evlist_pollfd_index = evlist_pollfd_index;
1179 	rec->index_map[x].thread_pollfd_index = thread_pollfd_index;
1180 	rec->index_map_cnt += 1;
1181 	return 0;
1182 }
1183 
record__update_evlist_pollfd_from_thread(struct record * rec,struct evlist * evlist,struct record_thread * thread_data)1184 static int record__update_evlist_pollfd_from_thread(struct record *rec,
1185 						    struct evlist *evlist,
1186 						    struct record_thread *thread_data)
1187 {
1188 	struct pollfd *e_entries = evlist->core.pollfd.entries;
1189 	struct pollfd *t_entries = thread_data->pollfd.entries;
1190 	int err = 0;
1191 	size_t i;
1192 
1193 	for (i = 0; i < rec->index_map_cnt; i++) {
1194 		int e_pos = rec->index_map[i].evlist_pollfd_index;
1195 		int t_pos = rec->index_map[i].thread_pollfd_index;
1196 
1197 		if (e_entries[e_pos].fd != t_entries[t_pos].fd ||
1198 		    e_entries[e_pos].events != t_entries[t_pos].events) {
1199 			pr_err("Thread and evlist pollfd index mismatch\n");
1200 			err = -EINVAL;
1201 			continue;
1202 		}
1203 		e_entries[e_pos].revents = t_entries[t_pos].revents;
1204 	}
1205 	return err;
1206 }
1207 
record__dup_non_perf_events(struct record * rec,struct evlist * evlist,struct record_thread * thread_data)1208 static int record__dup_non_perf_events(struct record *rec,
1209 				       struct evlist *evlist,
1210 				       struct record_thread *thread_data)
1211 {
1212 	struct fdarray *fda = &evlist->core.pollfd;
1213 	int i, ret;
1214 
1215 	for (i = 0; i < fda->nr; i++) {
1216 		if (!(fda->priv[i].flags & fdarray_flag__non_perf_event))
1217 			continue;
1218 		ret = fdarray__dup_entry_from(&thread_data->pollfd, i, fda);
1219 		if (ret < 0) {
1220 			pr_err("Failed to duplicate descriptor in main thread pollfd\n");
1221 			return ret;
1222 		}
1223 		pr_debug2("thread_data[%p]: pollfd[%d] <- non_perf_event fd=%d\n",
1224 			  thread_data, ret, fda->entries[i].fd);
1225 		ret = record__map_thread_evlist_pollfd_indexes(rec, i, ret);
1226 		if (ret < 0) {
1227 			pr_err("Failed to map thread and evlist pollfd indexes\n");
1228 			return ret;
1229 		}
1230 	}
1231 	return 0;
1232 }
1233 
record__alloc_thread_data(struct record * rec,struct evlist * evlist)1234 static int record__alloc_thread_data(struct record *rec, struct evlist *evlist)
1235 {
1236 	int t, ret;
1237 	struct record_thread *thread_data;
1238 
1239 	rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data)));
1240 	if (!rec->thread_data) {
1241 		pr_err("Failed to allocate thread data\n");
1242 		return -ENOMEM;
1243 	}
1244 	thread_data = rec->thread_data;
1245 
1246 	for (t = 0; t < rec->nr_threads; t++)
1247 		record__thread_data_init_pipes(&thread_data[t]);
1248 
1249 	for (t = 0; t < rec->nr_threads; t++) {
1250 		thread_data[t].rec = rec;
1251 		thread_data[t].mask = &rec->thread_masks[t];
1252 		ret = record__thread_data_init_maps(&thread_data[t], evlist);
1253 		if (ret) {
1254 			pr_err("Failed to initialize thread[%d] maps\n", t);
1255 			goto out_free;
1256 		}
1257 		ret = record__thread_data_init_pollfd(&thread_data[t], evlist);
1258 		if (ret) {
1259 			pr_err("Failed to initialize thread[%d] pollfd\n", t);
1260 			goto out_free;
1261 		}
1262 		if (t) {
1263 			thread_data[t].tid = -1;
1264 			ret = record__thread_data_open_pipes(&thread_data[t]);
1265 			if (ret) {
1266 				pr_err("Failed to open thread[%d] communication pipes\n", t);
1267 				goto out_free;
1268 			}
1269 			ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0],
1270 					   POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable);
1271 			if (ret < 0) {
1272 				pr_err("Failed to add descriptor to thread[%d] pollfd\n", t);
1273 				goto out_free;
1274 			}
1275 			thread_data[t].ctlfd_pos = ret;
1276 			pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n",
1277 				 thread_data, thread_data[t].ctlfd_pos,
1278 				 thread_data[t].pipes.msg[0]);
1279 		} else {
1280 			thread_data[t].tid = gettid();
1281 
1282 			ret = record__dup_non_perf_events(rec, evlist, &thread_data[t]);
1283 			if (ret < 0)
1284 				goto out_free;
1285 
1286 			thread_data[t].ctlfd_pos = -1; /* Not used */
1287 		}
1288 	}
1289 
1290 	return 0;
1291 
1292 out_free:
1293 	record__free_thread_data(rec);
1294 
1295 	return ret;
1296 }
1297 
record__mmap_evlist(struct record * rec,struct evlist * evlist)1298 static int record__mmap_evlist(struct record *rec,
1299 			       struct evlist *evlist)
1300 {
1301 	int i, ret;
1302 	struct record_opts *opts = &rec->opts;
1303 	bool auxtrace_overwrite = opts->auxtrace_snapshot_mode ||
1304 				  opts->auxtrace_sample_mode;
1305 	char msg[512];
1306 
1307 	if (opts->affinity != PERF_AFFINITY_SYS)
1308 		cpu__setup_cpunode_map();
1309 
1310 	if (evlist__mmap_ex(evlist, opts->mmap_pages,
1311 				 opts->auxtrace_mmap_pages,
1312 				 auxtrace_overwrite,
1313 				 opts->nr_cblocks, opts->affinity,
1314 				 opts->mmap_flush, opts->comp_level) < 0) {
1315 		if (errno == EPERM) {
1316 			pr_err("Permission error mapping pages.\n"
1317 			       "Consider increasing "
1318 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
1319 			       "or try again with a smaller value of -m/--mmap_pages.\n"
1320 			       "(current value: %u,%u)\n",
1321 			       opts->mmap_pages, opts->auxtrace_mmap_pages);
1322 			return -errno;
1323 		} else {
1324 			pr_err("failed to mmap with %d (%s)\n", errno,
1325 				str_error_r(errno, msg, sizeof(msg)));
1326 			if (errno)
1327 				return -errno;
1328 			else
1329 				return -EINVAL;
1330 		}
1331 	}
1332 
1333 	if (evlist__initialize_ctlfd(evlist, opts->ctl_fd, opts->ctl_fd_ack))
1334 		return -1;
1335 
1336 	ret = record__alloc_thread_data(rec, evlist);
1337 	if (ret)
1338 		return ret;
1339 
1340 	if (record__threads_enabled(rec)) {
1341 		ret = perf_data__create_dir(&rec->data, evlist->core.nr_mmaps);
1342 		if (ret) {
1343 			pr_err("Failed to create data directory: %s\n", strerror(-ret));
1344 			return ret;
1345 		}
1346 		for (i = 0; i < evlist->core.nr_mmaps; i++) {
1347 			if (evlist->mmap)
1348 				evlist->mmap[i].file = &rec->data.dir.files[i];
1349 			if (evlist->overwrite_mmap)
1350 				evlist->overwrite_mmap[i].file = &rec->data.dir.files[i];
1351 		}
1352 	}
1353 
1354 	return 0;
1355 }
1356 
record__mmap(struct record * rec)1357 static int record__mmap(struct record *rec)
1358 {
1359 	return record__mmap_evlist(rec, rec->evlist);
1360 }
1361 
record__open(struct record * rec)1362 static int record__open(struct record *rec)
1363 {
1364 	char msg[BUFSIZ];
1365 	struct evsel *pos;
1366 	struct evlist *evlist = rec->evlist;
1367 	struct perf_session *session = rec->session;
1368 	struct record_opts *opts = &rec->opts;
1369 	int rc = 0;
1370 
1371 	evlist__for_each_entry(evlist, pos) {
1372 try_again:
1373 		if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
1374 			if (evsel__fallback(pos, &opts->target, errno, msg, sizeof(msg))) {
1375 				if (verbose > 0)
1376 					ui__warning("%s\n", msg);
1377 				goto try_again;
1378 			}
1379 			if ((errno == EINVAL || errno == EBADF) &&
1380 			    pos->core.leader != &pos->core &&
1381 			    pos->weak_group) {
1382 			        pos = evlist__reset_weak_group(evlist, pos, true);
1383 				goto try_again;
1384 			}
1385 			rc = -errno;
1386 			evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg));
1387 			ui__error("%s\n", msg);
1388 			goto out;
1389 		}
1390 
1391 		pos->supported = true;
1392 	}
1393 
1394 	if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) {
1395 		pr_warning(
1396 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1397 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
1398 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1399 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1400 "Samples in kernel modules won't be resolved at all.\n\n"
1401 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1402 "even with a suitable vmlinux or kallsyms file.\n\n");
1403 	}
1404 
1405 	if (evlist__apply_filters(evlist, &pos, &opts->target)) {
1406 		pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
1407 			pos->filter ?: "BPF", evsel__name(pos), errno,
1408 			str_error_r(errno, msg, sizeof(msg)));
1409 		rc = -1;
1410 		goto out;
1411 	}
1412 
1413 	rc = record__mmap(rec);
1414 	if (rc)
1415 		goto out;
1416 
1417 	session->evlist = evlist;
1418 	perf_session__set_id_hdr_size(session);
1419 out:
1420 	return rc;
1421 }
1422 
set_timestamp_boundary(struct record * rec,u64 sample_time)1423 static void set_timestamp_boundary(struct record *rec, u64 sample_time)
1424 {
1425 	if (rec->evlist->first_sample_time == 0)
1426 		rec->evlist->first_sample_time = sample_time;
1427 
1428 	if (sample_time)
1429 		rec->evlist->last_sample_time = sample_time;
1430 }
1431 
process_sample_event(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct evsel * evsel,struct machine * machine)1432 static int process_sample_event(const struct perf_tool *tool,
1433 				union perf_event *event,
1434 				struct perf_sample *sample,
1435 				struct evsel *evsel,
1436 				struct machine *machine)
1437 {
1438 	struct record *rec = container_of(tool, struct record, tool);
1439 
1440 	set_timestamp_boundary(rec, sample->time);
1441 
1442 	if (rec->buildid_all)
1443 		return 0;
1444 
1445 	rec->samples++;
1446 	return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
1447 }
1448 
process_buildids(struct record * rec)1449 static int process_buildids(struct record *rec)
1450 {
1451 	struct perf_session *session = rec->session;
1452 
1453 	if (perf_data__size(&rec->data) == 0)
1454 		return 0;
1455 
1456 	/*
1457 	 * During this process, it'll load kernel map and replace the
1458 	 * dso->long_name to a real pathname it found.  In this case
1459 	 * we prefer the vmlinux path like
1460 	 *   /lib/modules/3.16.4/build/vmlinux
1461 	 *
1462 	 * rather than build-id path (in debug directory).
1463 	 *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
1464 	 */
1465 	symbol_conf.ignore_vmlinux_buildid = true;
1466 
1467 	/*
1468 	 * If --buildid-all is given, it marks all DSO regardless of hits,
1469 	 * so no need to process samples. But if timestamp_boundary is enabled,
1470 	 * it still needs to walk on all samples to get the timestamps of
1471 	 * first/last samples.
1472 	 */
1473 	if (rec->buildid_all && !rec->timestamp_boundary)
1474 		rec->tool.sample = process_event_sample_stub;
1475 
1476 	return perf_session__process_events(session);
1477 }
1478 
perf_event__synthesize_guest_os(struct machine * machine,void * data)1479 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
1480 {
1481 	int err;
1482 	struct perf_tool *tool = data;
1483 	/*
1484 	 *As for guest kernel when processing subcommand record&report,
1485 	 *we arrange module mmap prior to guest kernel mmap and trigger
1486 	 *a preload dso because default guest module symbols are loaded
1487 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
1488 	 *method is used to avoid symbol missing when the first addr is
1489 	 *in module instead of in guest kernel.
1490 	 */
1491 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
1492 					     machine);
1493 	if (err < 0)
1494 		pr_err("Couldn't record guest kernel [%d]'s reference"
1495 		       " relocation symbol.\n", machine->pid);
1496 
1497 	/*
1498 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
1499 	 * have no _text sometimes.
1500 	 */
1501 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1502 						 machine);
1503 	if (err < 0)
1504 		pr_err("Couldn't record guest kernel [%d]'s reference"
1505 		       " relocation symbol.\n", machine->pid);
1506 }
1507 
1508 static struct perf_event_header finished_round_event = {
1509 	.size = sizeof(struct perf_event_header),
1510 	.type = PERF_RECORD_FINISHED_ROUND,
1511 };
1512 
1513 static struct perf_event_header finished_init_event = {
1514 	.size = sizeof(struct perf_event_header),
1515 	.type = PERF_RECORD_FINISHED_INIT,
1516 };
1517 
record__adjust_affinity(struct record * rec,struct mmap * map)1518 static void record__adjust_affinity(struct record *rec, struct mmap *map)
1519 {
1520 	if (rec->opts.affinity != PERF_AFFINITY_SYS &&
1521 	    !bitmap_equal(thread->mask->affinity.bits, map->affinity_mask.bits,
1522 			  thread->mask->affinity.nbits)) {
1523 		bitmap_zero(thread->mask->affinity.bits, thread->mask->affinity.nbits);
1524 		bitmap_or(thread->mask->affinity.bits, thread->mask->affinity.bits,
1525 			  map->affinity_mask.bits, thread->mask->affinity.nbits);
1526 		sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
1527 					(cpu_set_t *)thread->mask->affinity.bits);
1528 		if (verbose == 2) {
1529 			pr_debug("threads[%d]: running on cpu%d: ", thread->tid, sched_getcpu());
1530 			mmap_cpu_mask__scnprintf(&thread->mask->affinity, "affinity");
1531 		}
1532 	}
1533 }
1534 
process_comp_header(void * record,size_t increment)1535 static size_t process_comp_header(void *record, size_t increment)
1536 {
1537 	struct perf_record_compressed *event = record;
1538 	size_t size = sizeof(*event);
1539 
1540 	if (increment) {
1541 		event->header.size += increment;
1542 		return increment;
1543 	}
1544 
1545 	event->header.type = PERF_RECORD_COMPRESSED;
1546 	event->header.size = size;
1547 
1548 	return size;
1549 }
1550 
zstd_compress(struct perf_session * session,struct mmap * map,void * dst,size_t dst_size,void * src,size_t src_size)1551 static ssize_t zstd_compress(struct perf_session *session, struct mmap *map,
1552 			    void *dst, size_t dst_size, void *src, size_t src_size)
1553 {
1554 	ssize_t compressed;
1555 	size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1;
1556 	struct zstd_data *zstd_data = &session->zstd_data;
1557 
1558 	if (map && map->file)
1559 		zstd_data = &map->zstd_data;
1560 
1561 	compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size,
1562 						     max_record_size, process_comp_header);
1563 	if (compressed < 0)
1564 		return compressed;
1565 
1566 	if (map && map->file) {
1567 		thread->bytes_transferred += src_size;
1568 		thread->bytes_compressed  += compressed;
1569 	} else {
1570 		session->bytes_transferred += src_size;
1571 		session->bytes_compressed  += compressed;
1572 	}
1573 
1574 	return compressed;
1575 }
1576 
record__mmap_read_evlist(struct record * rec,struct evlist * evlist,bool overwrite,bool synch)1577 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
1578 				    bool overwrite, bool synch)
1579 {
1580 	u64 bytes_written = rec->bytes_written;
1581 	int i;
1582 	int rc = 0;
1583 	int nr_mmaps;
1584 	struct mmap **maps;
1585 	int trace_fd = rec->data.file.fd;
1586 	off_t off = 0;
1587 
1588 	if (!evlist)
1589 		return 0;
1590 
1591 	nr_mmaps = thread->nr_mmaps;
1592 	maps = overwrite ? thread->overwrite_maps : thread->maps;
1593 
1594 	if (!maps)
1595 		return 0;
1596 
1597 	if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
1598 		return 0;
1599 
1600 	if (record__aio_enabled(rec))
1601 		off = record__aio_get_pos(trace_fd);
1602 
1603 	for (i = 0; i < nr_mmaps; i++) {
1604 		u64 flush = 0;
1605 		struct mmap *map = maps[i];
1606 
1607 		if (map->core.base) {
1608 			record__adjust_affinity(rec, map);
1609 			if (synch) {
1610 				flush = map->core.flush;
1611 				map->core.flush = 1;
1612 			}
1613 			if (!record__aio_enabled(rec)) {
1614 				if (perf_mmap__push(map, rec, record__pushfn) < 0) {
1615 					if (synch)
1616 						map->core.flush = flush;
1617 					rc = -1;
1618 					goto out;
1619 				}
1620 			} else {
1621 				if (record__aio_push(rec, map, &off) < 0) {
1622 					record__aio_set_pos(trace_fd, off);
1623 					if (synch)
1624 						map->core.flush = flush;
1625 					rc = -1;
1626 					goto out;
1627 				}
1628 			}
1629 			if (synch)
1630 				map->core.flush = flush;
1631 		}
1632 
1633 		if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
1634 		    !rec->opts.auxtrace_sample_mode &&
1635 		    record__auxtrace_mmap_read(rec, map) != 0) {
1636 			rc = -1;
1637 			goto out;
1638 		}
1639 	}
1640 
1641 	if (record__aio_enabled(rec))
1642 		record__aio_set_pos(trace_fd, off);
1643 
1644 	/*
1645 	 * Mark the round finished in case we wrote
1646 	 * at least one event.
1647 	 *
1648 	 * No need for round events in directory mode,
1649 	 * because per-cpu maps and files have data
1650 	 * sorted by kernel.
1651 	 */
1652 	if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written)
1653 		rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
1654 
1655 	if (overwrite)
1656 		evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
1657 out:
1658 	return rc;
1659 }
1660 
record__mmap_read_all(struct record * rec,bool synch)1661 static int record__mmap_read_all(struct record *rec, bool synch)
1662 {
1663 	int err;
1664 
1665 	err = record__mmap_read_evlist(rec, rec->evlist, false, synch);
1666 	if (err)
1667 		return err;
1668 
1669 	return record__mmap_read_evlist(rec, rec->evlist, true, synch);
1670 }
1671 
record__thread_munmap_filtered(struct fdarray * fda,int fd,void * arg __maybe_unused)1672 static void record__thread_munmap_filtered(struct fdarray *fda, int fd,
1673 					   void *arg __maybe_unused)
1674 {
1675 	struct perf_mmap *map = fda->priv[fd].ptr;
1676 
1677 	if (map)
1678 		perf_mmap__put(map);
1679 }
1680 
record__thread(void * arg)1681 static void *record__thread(void *arg)
1682 {
1683 	enum thread_msg msg = THREAD_MSG__READY;
1684 	bool terminate = false;
1685 	struct fdarray *pollfd;
1686 	int err, ctlfd_pos;
1687 
1688 	thread = arg;
1689 	thread->tid = gettid();
1690 
1691 	err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1692 	if (err == -1)
1693 		pr_warning("threads[%d]: failed to notify on start: %s\n",
1694 			   thread->tid, strerror(errno));
1695 
1696 	pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
1697 
1698 	pollfd = &thread->pollfd;
1699 	ctlfd_pos = thread->ctlfd_pos;
1700 
1701 	for (;;) {
1702 		unsigned long long hits = thread->samples;
1703 
1704 		if (record__mmap_read_all(thread->rec, false) < 0 || terminate)
1705 			break;
1706 
1707 		if (hits == thread->samples) {
1708 
1709 			err = fdarray__poll(pollfd, -1);
1710 			/*
1711 			 * Propagate error, only if there's any. Ignore positive
1712 			 * number of returned events and interrupt error.
1713 			 */
1714 			if (err > 0 || (err < 0 && errno == EINTR))
1715 				err = 0;
1716 			thread->waking++;
1717 
1718 			if (fdarray__filter(pollfd, POLLERR | POLLHUP,
1719 					    record__thread_munmap_filtered, NULL) == 0)
1720 				break;
1721 		}
1722 
1723 		if (pollfd->entries[ctlfd_pos].revents & POLLHUP) {
1724 			terminate = true;
1725 			close(thread->pipes.msg[0]);
1726 			thread->pipes.msg[0] = -1;
1727 			pollfd->entries[ctlfd_pos].fd = -1;
1728 			pollfd->entries[ctlfd_pos].events = 0;
1729 		}
1730 
1731 		pollfd->entries[ctlfd_pos].revents = 0;
1732 	}
1733 	record__mmap_read_all(thread->rec, true);
1734 
1735 	err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1736 	if (err == -1)
1737 		pr_warning("threads[%d]: failed to notify on termination: %s\n",
1738 			   thread->tid, strerror(errno));
1739 
1740 	return NULL;
1741 }
1742 
record__init_features(struct record * rec)1743 static void record__init_features(struct record *rec)
1744 {
1745 	struct perf_session *session = rec->session;
1746 	int feat;
1747 
1748 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
1749 		perf_header__set_feat(&session->header, feat);
1750 
1751 	if (rec->no_buildid)
1752 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
1753 
1754 	if (!have_tracepoints(&rec->evlist->core.entries))
1755 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
1756 
1757 	if (!rec->opts.branch_stack)
1758 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
1759 
1760 	if (!rec->opts.full_auxtrace)
1761 		perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
1762 
1763 	if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
1764 		perf_header__clear_feat(&session->header, HEADER_CLOCKID);
1765 
1766 	if (!rec->opts.use_clockid)
1767 		perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA);
1768 
1769 	if (!record__threads_enabled(rec))
1770 		perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
1771 
1772 	if (!record__comp_enabled(rec))
1773 		perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
1774 
1775 	perf_header__clear_feat(&session->header, HEADER_STAT);
1776 }
1777 
1778 static void
record__finish_output(struct record * rec)1779 record__finish_output(struct record *rec)
1780 {
1781 	int i;
1782 	struct perf_data *data = &rec->data;
1783 	int fd = perf_data__fd(data);
1784 
1785 	if (data->is_pipe) {
1786 		/* Just to display approx. size */
1787 		data->file.size = rec->bytes_written;
1788 		return;
1789 	}
1790 
1791 	rec->session->header.data_size += rec->bytes_written;
1792 	data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
1793 	if (record__threads_enabled(rec)) {
1794 		for (i = 0; i < data->dir.nr; i++)
1795 			data->dir.files[i].size = lseek(data->dir.files[i].fd, 0, SEEK_CUR);
1796 	}
1797 
1798 	if (!rec->no_buildid) {
1799 		process_buildids(rec);
1800 
1801 		if (rec->buildid_all)
1802 			perf_session__dsos_hit_all(rec->session);
1803 	}
1804 	perf_session__write_header(rec->session, rec->evlist, fd, true);
1805 
1806 	return;
1807 }
1808 
record__synthesize_workload(struct record * rec,bool tail)1809 static int record__synthesize_workload(struct record *rec, bool tail)
1810 {
1811 	int err;
1812 	struct perf_thread_map *thread_map;
1813 	bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
1814 
1815 	if (rec->opts.tail_synthesize != tail)
1816 		return 0;
1817 
1818 	thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
1819 	if (thread_map == NULL)
1820 		return -1;
1821 
1822 	err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
1823 						 process_synthesized_event,
1824 						 &rec->session->machines.host,
1825 						 needs_mmap,
1826 						 rec->opts.sample_address);
1827 	perf_thread_map__put(thread_map);
1828 	return err;
1829 }
1830 
write_finished_init(struct record * rec,bool tail)1831 static int write_finished_init(struct record *rec, bool tail)
1832 {
1833 	if (rec->opts.tail_synthesize != tail)
1834 		return 0;
1835 
1836 	return record__write(rec, NULL, &finished_init_event, sizeof(finished_init_event));
1837 }
1838 
1839 static int record__synthesize(struct record *rec, bool tail);
1840 
1841 static int
record__switch_output(struct record * rec,bool at_exit)1842 record__switch_output(struct record *rec, bool at_exit)
1843 {
1844 	struct perf_data *data = &rec->data;
1845 	char *new_filename = NULL;
1846 	int fd, err;
1847 
1848 	/* Same Size:      "2015122520103046"*/
1849 	char timestamp[] = "InvalidTimestamp";
1850 
1851 	record__aio_mmap_read_sync(rec);
1852 
1853 	write_finished_init(rec, true);
1854 
1855 	record__synthesize(rec, true);
1856 	if (target__none(&rec->opts.target))
1857 		record__synthesize_workload(rec, true);
1858 
1859 	rec->samples = 0;
1860 	record__finish_output(rec);
1861 	err = fetch_current_timestamp(timestamp, sizeof(timestamp));
1862 	if (err) {
1863 		pr_err("Failed to get current timestamp\n");
1864 		return -EINVAL;
1865 	}
1866 
1867 	fd = perf_data__switch(data, timestamp,
1868 			       rec->session->header.data_offset,
1869 			       at_exit, &new_filename);
1870 	if (fd >= 0 && !at_exit) {
1871 		rec->bytes_written = 0;
1872 		rec->session->header.data_size = 0;
1873 	}
1874 
1875 	if (!quiet) {
1876 		fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
1877 			data->path, timestamp);
1878 	}
1879 
1880 	if (rec->switch_output.num_files) {
1881 		int n = rec->switch_output.cur_file + 1;
1882 
1883 		if (n >= rec->switch_output.num_files)
1884 			n = 0;
1885 		rec->switch_output.cur_file = n;
1886 		if (rec->switch_output.filenames[n]) {
1887 			remove(rec->switch_output.filenames[n]);
1888 			zfree(&rec->switch_output.filenames[n]);
1889 		}
1890 		rec->switch_output.filenames[n] = new_filename;
1891 	} else {
1892 		free(new_filename);
1893 	}
1894 
1895 	/* Output tracking events */
1896 	if (!at_exit) {
1897 		record__synthesize(rec, false);
1898 
1899 		/*
1900 		 * In 'perf record --switch-output' without -a,
1901 		 * record__synthesize() in record__switch_output() won't
1902 		 * generate tracking events because there's no thread_map
1903 		 * in evlist. Which causes newly created perf.data doesn't
1904 		 * contain map and comm information.
1905 		 * Create a fake thread_map and directly call
1906 		 * perf_event__synthesize_thread_map() for those events.
1907 		 */
1908 		if (target__none(&rec->opts.target))
1909 			record__synthesize_workload(rec, false);
1910 		write_finished_init(rec, false);
1911 	}
1912 	return fd;
1913 }
1914 
__record__save_lost_samples(struct record * rec,struct evsel * evsel,struct perf_record_lost_samples * lost,int cpu_idx,int thread_idx,u64 lost_count,u16 misc_flag)1915 static void __record__save_lost_samples(struct record *rec, struct evsel *evsel,
1916 					struct perf_record_lost_samples *lost,
1917 					int cpu_idx, int thread_idx, u64 lost_count,
1918 					u16 misc_flag)
1919 {
1920 	struct perf_sample_id *sid;
1921 	struct perf_sample sample;
1922 	int id_hdr_size;
1923 
1924 	perf_sample__init(&sample, /*all=*/true);
1925 	lost->lost = lost_count;
1926 	if (evsel->core.ids) {
1927 		sid = xyarray__entry(evsel->core.sample_id, cpu_idx, thread_idx);
1928 		sample.id = sid->id;
1929 	}
1930 
1931 	id_hdr_size = perf_event__synthesize_id_sample((void *)(lost + 1),
1932 						       evsel->core.attr.sample_type, &sample);
1933 	lost->header.size = sizeof(*lost) + id_hdr_size;
1934 	lost->header.misc = misc_flag;
1935 	record__write(rec, NULL, lost, lost->header.size);
1936 	perf_sample__exit(&sample);
1937 }
1938 
record__read_lost_samples(struct record * rec)1939 static void record__read_lost_samples(struct record *rec)
1940 {
1941 	struct perf_session *session = rec->session;
1942 	struct perf_record_lost_samples_and_ids lost;
1943 	struct evsel *evsel;
1944 
1945 	/* there was an error during record__open */
1946 	if (session->evlist == NULL)
1947 		return;
1948 
1949 	evlist__for_each_entry(session->evlist, evsel) {
1950 		struct xyarray *xy = evsel->core.sample_id;
1951 		u64 lost_count;
1952 
1953 		if (xy == NULL || evsel->core.fd == NULL)
1954 			continue;
1955 		if (xyarray__max_x(evsel->core.fd) != xyarray__max_x(xy) ||
1956 		    xyarray__max_y(evsel->core.fd) != xyarray__max_y(xy)) {
1957 			pr_debug("Unmatched FD vs. sample ID: skip reading LOST count\n");
1958 			continue;
1959 		}
1960 
1961 		for (int x = 0; x < xyarray__max_x(xy); x++) {
1962 			for (int y = 0; y < xyarray__max_y(xy); y++) {
1963 				struct perf_counts_values count;
1964 
1965 				if (perf_evsel__read(&evsel->core, x, y, &count) < 0) {
1966 					pr_debug("read LOST count failed\n");
1967 					return;
1968 				}
1969 
1970 				if (count.lost) {
1971 					memset(&lost, 0, sizeof(lost));
1972 					lost.lost.header.type = PERF_RECORD_LOST_SAMPLES;
1973 					__record__save_lost_samples(rec, evsel, &lost.lost,
1974 								    x, y, count.lost, 0);
1975 				}
1976 			}
1977 		}
1978 
1979 		lost_count = perf_bpf_filter__lost_count(evsel);
1980 		if (lost_count) {
1981 			memset(&lost, 0, sizeof(lost));
1982 			lost.lost.header.type = PERF_RECORD_LOST_SAMPLES;
1983 			__record__save_lost_samples(rec, evsel, &lost.lost, 0, 0, lost_count,
1984 						    PERF_RECORD_MISC_LOST_SAMPLES_BPF);
1985 		}
1986 	}
1987 }
1988 
1989 static volatile sig_atomic_t workload_exec_errno;
1990 
1991 /*
1992  * evlist__prepare_workload will send a SIGUSR1
1993  * if the fork fails, since we asked by setting its
1994  * want_signal to true.
1995  */
workload_exec_failed_signal(int signo __maybe_unused,siginfo_t * info,void * ucontext __maybe_unused)1996 static void workload_exec_failed_signal(int signo __maybe_unused,
1997 					siginfo_t *info,
1998 					void *ucontext __maybe_unused)
1999 {
2000 	workload_exec_errno = info->si_value.sival_int;
2001 	done = 1;
2002 	child_finished = 1;
2003 }
2004 
2005 static void snapshot_sig_handler(int sig);
2006 static void alarm_sig_handler(int sig);
2007 
evlist__pick_pc(struct evlist * evlist)2008 static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist)
2009 {
2010 	if (evlist) {
2011 		if (evlist->mmap && evlist->mmap[0].core.base)
2012 			return evlist->mmap[0].core.base;
2013 		if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base)
2014 			return evlist->overwrite_mmap[0].core.base;
2015 	}
2016 	return NULL;
2017 }
2018 
record__pick_pc(struct record * rec)2019 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
2020 {
2021 	const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist);
2022 	if (pc)
2023 		return pc;
2024 	return NULL;
2025 }
2026 
record__synthesize(struct record * rec,bool tail)2027 static int record__synthesize(struct record *rec, bool tail)
2028 {
2029 	struct perf_session *session = rec->session;
2030 	struct machine *machine = &session->machines.host;
2031 	struct perf_data *data = &rec->data;
2032 	struct record_opts *opts = &rec->opts;
2033 	struct perf_tool *tool = &rec->tool;
2034 	int err = 0;
2035 	event_op f = process_synthesized_event;
2036 
2037 	if (rec->opts.tail_synthesize != tail)
2038 		return 0;
2039 
2040 	if (data->is_pipe) {
2041 		err = perf_event__synthesize_for_pipe(tool, session, data,
2042 						      process_synthesized_event);
2043 		if (err < 0)
2044 			goto out;
2045 
2046 		rec->bytes_written += err;
2047 	}
2048 
2049 	err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
2050 					  process_synthesized_event, machine);
2051 	if (err)
2052 		goto out;
2053 
2054 	/* Synthesize id_index before auxtrace_info */
2055 	err = perf_event__synthesize_id_index(tool,
2056 					      process_synthesized_event,
2057 					      session->evlist, machine);
2058 	if (err)
2059 		goto out;
2060 
2061 	if (rec->opts.full_auxtrace) {
2062 		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
2063 					session, process_synthesized_event);
2064 		if (err)
2065 			goto out;
2066 	}
2067 
2068 	if (!evlist__exclude_kernel(rec->evlist)) {
2069 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
2070 							 machine);
2071 		WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
2072 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
2073 				   "Check /proc/kallsyms permission or run as root.\n");
2074 
2075 		err = perf_event__synthesize_modules(tool, process_synthesized_event,
2076 						     machine);
2077 		WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
2078 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
2079 				   "Check /proc/modules permission or run as root.\n");
2080 	}
2081 
2082 	if (perf_guest) {
2083 		machines__process_guests(&session->machines,
2084 					 perf_event__synthesize_guest_os, tool);
2085 	}
2086 
2087 	err = perf_event__synthesize_extra_attr(&rec->tool,
2088 						rec->evlist,
2089 						process_synthesized_event,
2090 						data->is_pipe);
2091 	if (err)
2092 		goto out;
2093 
2094 	err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads,
2095 						 process_synthesized_event,
2096 						NULL);
2097 	if (err < 0) {
2098 		pr_err("Couldn't synthesize thread map.\n");
2099 		return err;
2100 	}
2101 
2102 	err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.all_cpus,
2103 					     process_synthesized_event, NULL);
2104 	if (err < 0) {
2105 		pr_err("Couldn't synthesize cpu map.\n");
2106 		return err;
2107 	}
2108 
2109 	err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
2110 						machine, opts);
2111 	if (err < 0) {
2112 		pr_warning("Couldn't synthesize bpf events.\n");
2113 		err = 0;
2114 	}
2115 
2116 	if (rec->opts.synth & PERF_SYNTH_CGROUP) {
2117 		err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
2118 						     machine);
2119 		if (err < 0) {
2120 			pr_warning("Couldn't synthesize cgroup events.\n");
2121 			err = 0;
2122 		}
2123 	}
2124 
2125 	if (rec->opts.nr_threads_synthesize > 1) {
2126 		mutex_init(&synth_lock);
2127 		perf_set_multithreaded();
2128 		f = process_locked_synthesized_event;
2129 	}
2130 
2131 	if (rec->opts.synth & PERF_SYNTH_TASK) {
2132 		bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
2133 
2134 		err = __machine__synthesize_threads(machine, tool, &opts->target,
2135 						    rec->evlist->core.threads,
2136 						    f, needs_mmap, opts->sample_address,
2137 						    rec->opts.nr_threads_synthesize);
2138 	}
2139 
2140 	if (rec->opts.nr_threads_synthesize > 1) {
2141 		perf_set_singlethreaded();
2142 		mutex_destroy(&synth_lock);
2143 	}
2144 
2145 out:
2146 	return err;
2147 }
2148 
record__process_signal_event(union perf_event * event __maybe_unused,void * data)2149 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data)
2150 {
2151 	struct record *rec = data;
2152 	pthread_kill(rec->thread_id, SIGUSR2);
2153 	return 0;
2154 }
2155 
record__setup_sb_evlist(struct record * rec)2156 static int record__setup_sb_evlist(struct record *rec)
2157 {
2158 	struct record_opts *opts = &rec->opts;
2159 
2160 	if (rec->sb_evlist != NULL) {
2161 		/*
2162 		 * We get here if --switch-output-event populated the
2163 		 * sb_evlist, so associate a callback that will send a SIGUSR2
2164 		 * to the main thread.
2165 		 */
2166 		evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec);
2167 		rec->thread_id = pthread_self();
2168 	}
2169 #ifdef HAVE_LIBBPF_SUPPORT
2170 	if (!opts->no_bpf_event) {
2171 		if (rec->sb_evlist == NULL) {
2172 			rec->sb_evlist = evlist__new();
2173 
2174 			if (rec->sb_evlist == NULL) {
2175 				pr_err("Couldn't create side band evlist.\n.");
2176 				return -1;
2177 			}
2178 		}
2179 
2180 		if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) {
2181 			pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n.");
2182 			return -1;
2183 		}
2184 	}
2185 #endif
2186 	if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
2187 		pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
2188 		opts->no_bpf_event = true;
2189 	}
2190 
2191 	return 0;
2192 }
2193 
record__init_clock(struct record * rec)2194 static int record__init_clock(struct record *rec)
2195 {
2196 	struct perf_session *session = rec->session;
2197 	struct timespec ref_clockid;
2198 	struct timeval ref_tod;
2199 	u64 ref;
2200 
2201 	if (!rec->opts.use_clockid)
2202 		return 0;
2203 
2204 	if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
2205 		session->header.env.clock.clockid_res_ns = rec->opts.clockid_res_ns;
2206 
2207 	session->header.env.clock.clockid = rec->opts.clockid;
2208 
2209 	if (gettimeofday(&ref_tod, NULL) != 0) {
2210 		pr_err("gettimeofday failed, cannot set reference time.\n");
2211 		return -1;
2212 	}
2213 
2214 	if (clock_gettime(rec->opts.clockid, &ref_clockid)) {
2215 		pr_err("clock_gettime failed, cannot set reference time.\n");
2216 		return -1;
2217 	}
2218 
2219 	ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC +
2220 	      (u64) ref_tod.tv_usec * NSEC_PER_USEC;
2221 
2222 	session->header.env.clock.tod_ns = ref;
2223 
2224 	ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC +
2225 	      (u64) ref_clockid.tv_nsec;
2226 
2227 	session->header.env.clock.clockid_ns = ref;
2228 	return 0;
2229 }
2230 
hit_auxtrace_snapshot_trigger(struct record * rec)2231 static void hit_auxtrace_snapshot_trigger(struct record *rec)
2232 {
2233 	if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2234 		trigger_hit(&auxtrace_snapshot_trigger);
2235 		auxtrace_record__snapshot_started = 1;
2236 		if (auxtrace_record__snapshot_start(rec->itr))
2237 			trigger_error(&auxtrace_snapshot_trigger);
2238 	}
2239 }
2240 
record__terminate_thread(struct record_thread * thread_data)2241 static int record__terminate_thread(struct record_thread *thread_data)
2242 {
2243 	int err;
2244 	enum thread_msg ack = THREAD_MSG__UNDEFINED;
2245 	pid_t tid = thread_data->tid;
2246 
2247 	close(thread_data->pipes.msg[1]);
2248 	thread_data->pipes.msg[1] = -1;
2249 	err = read(thread_data->pipes.ack[0], &ack, sizeof(ack));
2250 	if (err > 0)
2251 		pr_debug2("threads[%d]: sent %s\n", tid, thread_msg_tags[ack]);
2252 	else
2253 		pr_warning("threads[%d]: failed to receive termination notification from %d\n",
2254 			   thread->tid, tid);
2255 
2256 	return 0;
2257 }
2258 
record__start_threads(struct record * rec)2259 static int record__start_threads(struct record *rec)
2260 {
2261 	int t, tt, err, ret = 0, nr_threads = rec->nr_threads;
2262 	struct record_thread *thread_data = rec->thread_data;
2263 	sigset_t full, mask;
2264 	pthread_t handle;
2265 	pthread_attr_t attrs;
2266 
2267 	thread = &thread_data[0];
2268 
2269 	if (!record__threads_enabled(rec))
2270 		return 0;
2271 
2272 	sigfillset(&full);
2273 	if (sigprocmask(SIG_SETMASK, &full, &mask)) {
2274 		pr_err("Failed to block signals on threads start: %s\n", strerror(errno));
2275 		return -1;
2276 	}
2277 
2278 	pthread_attr_init(&attrs);
2279 	pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
2280 
2281 	for (t = 1; t < nr_threads; t++) {
2282 		enum thread_msg msg = THREAD_MSG__UNDEFINED;
2283 
2284 #ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP
2285 		pthread_attr_setaffinity_np(&attrs,
2286 					    MMAP_CPU_MASK_BYTES(&(thread_data[t].mask->affinity)),
2287 					    (cpu_set_t *)(thread_data[t].mask->affinity.bits));
2288 #endif
2289 		if (pthread_create(&handle, &attrs, record__thread, &thread_data[t])) {
2290 			for (tt = 1; tt < t; tt++)
2291 				record__terminate_thread(&thread_data[t]);
2292 			pr_err("Failed to start threads: %s\n", strerror(errno));
2293 			ret = -1;
2294 			goto out_err;
2295 		}
2296 
2297 		err = read(thread_data[t].pipes.ack[0], &msg, sizeof(msg));
2298 		if (err > 0)
2299 			pr_debug2("threads[%d]: sent %s\n", rec->thread_data[t].tid,
2300 				  thread_msg_tags[msg]);
2301 		else
2302 			pr_warning("threads[%d]: failed to receive start notification from %d\n",
2303 				   thread->tid, rec->thread_data[t].tid);
2304 	}
2305 
2306 	sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
2307 			(cpu_set_t *)thread->mask->affinity.bits);
2308 
2309 	pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
2310 
2311 out_err:
2312 	pthread_attr_destroy(&attrs);
2313 
2314 	if (sigprocmask(SIG_SETMASK, &mask, NULL)) {
2315 		pr_err("Failed to unblock signals on threads start: %s\n", strerror(errno));
2316 		ret = -1;
2317 	}
2318 
2319 	return ret;
2320 }
2321 
record__stop_threads(struct record * rec)2322 static int record__stop_threads(struct record *rec)
2323 {
2324 	int t;
2325 	struct record_thread *thread_data = rec->thread_data;
2326 
2327 	for (t = 1; t < rec->nr_threads; t++)
2328 		record__terminate_thread(&thread_data[t]);
2329 
2330 	for (t = 0; t < rec->nr_threads; t++) {
2331 		rec->samples += thread_data[t].samples;
2332 		if (!record__threads_enabled(rec))
2333 			continue;
2334 		rec->session->bytes_transferred += thread_data[t].bytes_transferred;
2335 		rec->session->bytes_compressed += thread_data[t].bytes_compressed;
2336 		pr_debug("threads[%d]: samples=%lld, wakes=%ld, ", thread_data[t].tid,
2337 			 thread_data[t].samples, thread_data[t].waking);
2338 		if (thread_data[t].bytes_transferred && thread_data[t].bytes_compressed)
2339 			pr_debug("transferred=%" PRIu64 ", compressed=%" PRIu64 "\n",
2340 				 thread_data[t].bytes_transferred, thread_data[t].bytes_compressed);
2341 		else
2342 			pr_debug("written=%" PRIu64 "\n", thread_data[t].bytes_written);
2343 	}
2344 
2345 	return 0;
2346 }
2347 
record__waking(struct record * rec)2348 static unsigned long record__waking(struct record *rec)
2349 {
2350 	int t;
2351 	unsigned long waking = 0;
2352 	struct record_thread *thread_data = rec->thread_data;
2353 
2354 	for (t = 0; t < rec->nr_threads; t++)
2355 		waking += thread_data[t].waking;
2356 
2357 	return waking;
2358 }
2359 
__cmd_record(struct record * rec,int argc,const char ** argv)2360 static int __cmd_record(struct record *rec, int argc, const char **argv)
2361 {
2362 	int err;
2363 	int status = 0;
2364 	const bool forks = argc > 0;
2365 	struct perf_tool *tool = &rec->tool;
2366 	struct record_opts *opts = &rec->opts;
2367 	struct perf_data *data = &rec->data;
2368 	struct perf_session *session;
2369 	bool disabled = false, draining = false;
2370 	int fd;
2371 	float ratio = 0;
2372 	enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED;
2373 
2374 	atexit(record__sig_exit);
2375 	signal(SIGCHLD, sig_handler);
2376 	signal(SIGINT, sig_handler);
2377 	signal(SIGTERM, sig_handler);
2378 	signal(SIGSEGV, sigsegv_handler);
2379 
2380 	if (rec->opts.record_cgroup) {
2381 #ifndef HAVE_FILE_HANDLE
2382 		pr_err("cgroup tracking is not supported\n");
2383 		return -1;
2384 #endif
2385 	}
2386 
2387 	if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
2388 		signal(SIGUSR2, snapshot_sig_handler);
2389 		if (rec->opts.auxtrace_snapshot_mode)
2390 			trigger_on(&auxtrace_snapshot_trigger);
2391 		if (rec->switch_output.enabled)
2392 			trigger_on(&switch_output_trigger);
2393 	} else {
2394 		signal(SIGUSR2, SIG_IGN);
2395 	}
2396 
2397 	perf_tool__init(tool, /*ordered_events=*/true);
2398 	tool->sample		= process_sample_event;
2399 	tool->fork		= perf_event__process_fork;
2400 	tool->exit		= perf_event__process_exit;
2401 	tool->comm		= perf_event__process_comm;
2402 	tool->namespaces	= perf_event__process_namespaces;
2403 	tool->mmap		= build_id__process_mmap;
2404 	tool->mmap2		= build_id__process_mmap2;
2405 	tool->itrace_start	= process_timestamp_boundary;
2406 	tool->aux		= process_timestamp_boundary;
2407 	tool->namespace_events	= rec->opts.record_namespaces;
2408 	tool->cgroup_events	= rec->opts.record_cgroup;
2409 	session = perf_session__new(data, tool);
2410 	if (IS_ERR(session)) {
2411 		pr_err("Perf session creation failed.\n");
2412 		return PTR_ERR(session);
2413 	}
2414 
2415 	if (record__threads_enabled(rec)) {
2416 		if (perf_data__is_pipe(&rec->data)) {
2417 			pr_err("Parallel trace streaming is not available in pipe mode.\n");
2418 			return -1;
2419 		}
2420 		if (rec->opts.full_auxtrace) {
2421 			pr_err("Parallel trace streaming is not available in AUX area tracing mode.\n");
2422 			return -1;
2423 		}
2424 	}
2425 
2426 	fd = perf_data__fd(data);
2427 	rec->session = session;
2428 
2429 	if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) {
2430 		pr_err("Compression initialization failed.\n");
2431 		return -1;
2432 	}
2433 #ifdef HAVE_EVENTFD_SUPPORT
2434 	done_fd = eventfd(0, EFD_NONBLOCK);
2435 	if (done_fd < 0) {
2436 		pr_err("Failed to create wakeup eventfd, error: %m\n");
2437 		status = -1;
2438 		goto out_delete_session;
2439 	}
2440 	err = evlist__add_wakeup_eventfd(rec->evlist, done_fd);
2441 	if (err < 0) {
2442 		pr_err("Failed to add wakeup eventfd to poll list\n");
2443 		status = err;
2444 		goto out_delete_session;
2445 	}
2446 #endif // HAVE_EVENTFD_SUPPORT
2447 
2448 	session->header.env.comp_type  = PERF_COMP_ZSTD;
2449 	session->header.env.comp_level = rec->opts.comp_level;
2450 
2451 	if (rec->opts.kcore &&
2452 	    !record__kcore_readable(&session->machines.host)) {
2453 		pr_err("ERROR: kcore is not readable.\n");
2454 		return -1;
2455 	}
2456 
2457 	if (record__init_clock(rec))
2458 		return -1;
2459 
2460 	record__init_features(rec);
2461 
2462 	if (forks) {
2463 		err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe,
2464 					       workload_exec_failed_signal);
2465 		if (err < 0) {
2466 			pr_err("Couldn't run the workload!\n");
2467 			status = err;
2468 			goto out_delete_session;
2469 		}
2470 	}
2471 
2472 	/*
2473 	 * If we have just single event and are sending data
2474 	 * through pipe, we need to force the ids allocation,
2475 	 * because we synthesize event name through the pipe
2476 	 * and need the id for that.
2477 	 */
2478 	if (data->is_pipe && rec->evlist->core.nr_entries == 1)
2479 		rec->opts.sample_id = true;
2480 
2481 	if (rec->timestamp_filename && perf_data__is_pipe(data)) {
2482 		rec->timestamp_filename = false;
2483 		pr_warning("WARNING: --timestamp-filename option is not available in pipe mode.\n");
2484 	}
2485 
2486 	evlist__uniquify_name(rec->evlist);
2487 
2488 	evlist__config(rec->evlist, opts, &callchain_param);
2489 
2490 	/* Debug message used by test scripts */
2491 	pr_debug3("perf record opening and mmapping events\n");
2492 	if (record__open(rec) != 0) {
2493 		err = -1;
2494 		goto out_free_threads;
2495 	}
2496 	/* Debug message used by test scripts */
2497 	pr_debug3("perf record done opening and mmapping events\n");
2498 	session->header.env.comp_mmap_len = session->evlist->core.mmap_len;
2499 
2500 	if (rec->opts.kcore) {
2501 		err = record__kcore_copy(&session->machines.host, data);
2502 		if (err) {
2503 			pr_err("ERROR: Failed to copy kcore\n");
2504 			goto out_free_threads;
2505 		}
2506 	}
2507 
2508 	/*
2509 	 * Normally perf_session__new would do this, but it doesn't have the
2510 	 * evlist.
2511 	 */
2512 	if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) {
2513 		pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
2514 		rec->tool.ordered_events = false;
2515 	}
2516 
2517 	if (evlist__nr_groups(rec->evlist) == 0)
2518 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
2519 
2520 	if (data->is_pipe) {
2521 		err = perf_header__write_pipe(fd);
2522 		if (err < 0)
2523 			goto out_free_threads;
2524 	} else {
2525 		err = perf_session__write_header(session, rec->evlist, fd, false);
2526 		if (err < 0)
2527 			goto out_free_threads;
2528 	}
2529 
2530 	err = -1;
2531 	if (!rec->no_buildid
2532 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
2533 		pr_err("Couldn't generate buildids. "
2534 		       "Use --no-buildid to profile anyway.\n");
2535 		goto out_free_threads;
2536 	}
2537 
2538 	if (!evlist__needs_bpf_sb_event(rec->evlist))
2539 		opts->no_bpf_event = true;
2540 
2541 	err = record__setup_sb_evlist(rec);
2542 	if (err)
2543 		goto out_free_threads;
2544 
2545 	err = record__synthesize(rec, false);
2546 	if (err < 0)
2547 		goto out_free_threads;
2548 
2549 	if (rec->realtime_prio) {
2550 		struct sched_param param;
2551 
2552 		param.sched_priority = rec->realtime_prio;
2553 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
2554 			pr_err("Could not set realtime priority.\n");
2555 			err = -1;
2556 			goto out_free_threads;
2557 		}
2558 	}
2559 
2560 	if (record__start_threads(rec))
2561 		goto out_free_threads;
2562 
2563 	/*
2564 	 * When perf is starting the traced process, all the events
2565 	 * (apart from group members) have enable_on_exec=1 set,
2566 	 * so don't spoil it by prematurely enabling them.
2567 	 */
2568 	if (!target__none(&opts->target) && !opts->target.initial_delay)
2569 		evlist__enable(rec->evlist);
2570 
2571 	/*
2572 	 * Let the child rip
2573 	 */
2574 	if (forks) {
2575 		struct machine *machine = &session->machines.host;
2576 		union perf_event *event;
2577 		pid_t tgid;
2578 
2579 		event = malloc(sizeof(event->comm) + machine->id_hdr_size);
2580 		if (event == NULL) {
2581 			err = -ENOMEM;
2582 			goto out_child;
2583 		}
2584 
2585 		/*
2586 		 * Some H/W events are generated before COMM event
2587 		 * which is emitted during exec(), so perf script
2588 		 * cannot see a correct process name for those events.
2589 		 * Synthesize COMM event to prevent it.
2590 		 */
2591 		tgid = perf_event__synthesize_comm(tool, event,
2592 						   rec->evlist->workload.pid,
2593 						   process_synthesized_event,
2594 						   machine);
2595 		free(event);
2596 
2597 		if (tgid == -1)
2598 			goto out_child;
2599 
2600 		event = malloc(sizeof(event->namespaces) +
2601 			       (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
2602 			       machine->id_hdr_size);
2603 		if (event == NULL) {
2604 			err = -ENOMEM;
2605 			goto out_child;
2606 		}
2607 
2608 		/*
2609 		 * Synthesize NAMESPACES event for the command specified.
2610 		 */
2611 		perf_event__synthesize_namespaces(tool, event,
2612 						  rec->evlist->workload.pid,
2613 						  tgid, process_synthesized_event,
2614 						  machine);
2615 		free(event);
2616 
2617 		evlist__start_workload(rec->evlist);
2618 	}
2619 
2620 	if (opts->target.initial_delay) {
2621 		pr_info(EVLIST_DISABLED_MSG);
2622 		if (opts->target.initial_delay > 0) {
2623 			usleep(opts->target.initial_delay * USEC_PER_MSEC);
2624 			evlist__enable(rec->evlist);
2625 			pr_info(EVLIST_ENABLED_MSG);
2626 		}
2627 	}
2628 
2629 	err = event_enable_timer__start(rec->evlist->eet);
2630 	if (err)
2631 		goto out_child;
2632 
2633 	/* Debug message used by test scripts */
2634 	pr_debug3("perf record has started\n");
2635 	fflush(stderr);
2636 
2637 	trigger_ready(&auxtrace_snapshot_trigger);
2638 	trigger_ready(&switch_output_trigger);
2639 	perf_hooks__invoke_record_start();
2640 
2641 	/*
2642 	 * Must write FINISHED_INIT so it will be seen after all other
2643 	 * synthesized user events, but before any regular events.
2644 	 */
2645 	err = write_finished_init(rec, false);
2646 	if (err < 0)
2647 		goto out_child;
2648 
2649 	for (;;) {
2650 		unsigned long long hits = thread->samples;
2651 
2652 		/*
2653 		 * rec->evlist->bkw_mmap_state is possible to be
2654 		 * BKW_MMAP_EMPTY here: when done == true and
2655 		 * hits != rec->samples in previous round.
2656 		 *
2657 		 * evlist__toggle_bkw_mmap ensure we never
2658 		 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
2659 		 */
2660 		if (trigger_is_hit(&switch_output_trigger) || done || draining)
2661 			evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
2662 
2663 		if (record__mmap_read_all(rec, false) < 0) {
2664 			trigger_error(&auxtrace_snapshot_trigger);
2665 			trigger_error(&switch_output_trigger);
2666 			err = -1;
2667 			goto out_child;
2668 		}
2669 
2670 		if (auxtrace_record__snapshot_started) {
2671 			auxtrace_record__snapshot_started = 0;
2672 			if (!trigger_is_error(&auxtrace_snapshot_trigger))
2673 				record__read_auxtrace_snapshot(rec, false);
2674 			if (trigger_is_error(&auxtrace_snapshot_trigger)) {
2675 				pr_err("AUX area tracing snapshot failed\n");
2676 				err = -1;
2677 				goto out_child;
2678 			}
2679 		}
2680 
2681 		if (trigger_is_hit(&switch_output_trigger)) {
2682 			/*
2683 			 * If switch_output_trigger is hit, the data in
2684 			 * overwritable ring buffer should have been collected,
2685 			 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
2686 			 *
2687 			 * If SIGUSR2 raise after or during record__mmap_read_all(),
2688 			 * record__mmap_read_all() didn't collect data from
2689 			 * overwritable ring buffer. Read again.
2690 			 */
2691 			if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
2692 				continue;
2693 			trigger_ready(&switch_output_trigger);
2694 
2695 			/*
2696 			 * Reenable events in overwrite ring buffer after
2697 			 * record__mmap_read_all(): we should have collected
2698 			 * data from it.
2699 			 */
2700 			evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
2701 
2702 			if (!quiet)
2703 				fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
2704 					record__waking(rec));
2705 			thread->waking = 0;
2706 			fd = record__switch_output(rec, false);
2707 			if (fd < 0) {
2708 				pr_err("Failed to switch to new file\n");
2709 				trigger_error(&switch_output_trigger);
2710 				err = fd;
2711 				goto out_child;
2712 			}
2713 
2714 			/* re-arm the alarm */
2715 			if (rec->switch_output.time)
2716 				alarm(rec->switch_output.time);
2717 		}
2718 
2719 		if (hits == thread->samples) {
2720 			if (done || draining)
2721 				break;
2722 			err = fdarray__poll(&thread->pollfd, -1);
2723 			/*
2724 			 * Propagate error, only if there's any. Ignore positive
2725 			 * number of returned events and interrupt error.
2726 			 */
2727 			if (err > 0 || (err < 0 && errno == EINTR))
2728 				err = 0;
2729 			thread->waking++;
2730 
2731 			if (fdarray__filter(&thread->pollfd, POLLERR | POLLHUP,
2732 					    record__thread_munmap_filtered, NULL) == 0)
2733 				draining = true;
2734 
2735 			err = record__update_evlist_pollfd_from_thread(rec, rec->evlist, thread);
2736 			if (err)
2737 				goto out_child;
2738 		}
2739 
2740 		if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) {
2741 			switch (cmd) {
2742 			case EVLIST_CTL_CMD_SNAPSHOT:
2743 				hit_auxtrace_snapshot_trigger(rec);
2744 				evlist__ctlfd_ack(rec->evlist);
2745 				break;
2746 			case EVLIST_CTL_CMD_STOP:
2747 				done = 1;
2748 				break;
2749 			case EVLIST_CTL_CMD_ACK:
2750 			case EVLIST_CTL_CMD_UNSUPPORTED:
2751 			case EVLIST_CTL_CMD_ENABLE:
2752 			case EVLIST_CTL_CMD_DISABLE:
2753 			case EVLIST_CTL_CMD_EVLIST:
2754 			case EVLIST_CTL_CMD_PING:
2755 			default:
2756 				break;
2757 			}
2758 		}
2759 
2760 		err = event_enable_timer__process(rec->evlist->eet);
2761 		if (err < 0)
2762 			goto out_child;
2763 		if (err) {
2764 			err = 0;
2765 			done = 1;
2766 		}
2767 
2768 		/*
2769 		 * When perf is starting the traced process, at the end events
2770 		 * die with the process and we wait for that. Thus no need to
2771 		 * disable events in this case.
2772 		 */
2773 		if (done && !disabled && !target__none(&opts->target)) {
2774 			trigger_off(&auxtrace_snapshot_trigger);
2775 			evlist__disable(rec->evlist);
2776 			disabled = true;
2777 		}
2778 	}
2779 
2780 	trigger_off(&auxtrace_snapshot_trigger);
2781 	trigger_off(&switch_output_trigger);
2782 
2783 	if (opts->auxtrace_snapshot_on_exit)
2784 		record__auxtrace_snapshot_exit(rec);
2785 
2786 	if (forks && workload_exec_errno) {
2787 		char msg[STRERR_BUFSIZE], strevsels[2048];
2788 		const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
2789 
2790 		evlist__scnprintf_evsels(rec->evlist, sizeof(strevsels), strevsels);
2791 
2792 		pr_err("Failed to collect '%s' for the '%s' workload: %s\n",
2793 			strevsels, argv[0], emsg);
2794 		err = -1;
2795 		goto out_child;
2796 	}
2797 
2798 	if (!quiet)
2799 		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n",
2800 			record__waking(rec));
2801 
2802 	write_finished_init(rec, true);
2803 
2804 	if (target__none(&rec->opts.target))
2805 		record__synthesize_workload(rec, true);
2806 
2807 out_child:
2808 	record__stop_threads(rec);
2809 	record__mmap_read_all(rec, true);
2810 out_free_threads:
2811 	record__free_thread_data(rec);
2812 	evlist__finalize_ctlfd(rec->evlist);
2813 	record__aio_mmap_read_sync(rec);
2814 
2815 	if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
2816 		ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
2817 		session->header.env.comp_ratio = ratio + 0.5;
2818 	}
2819 
2820 	if (forks) {
2821 		int exit_status;
2822 
2823 		if (!child_finished)
2824 			kill(rec->evlist->workload.pid, SIGTERM);
2825 
2826 		wait(&exit_status);
2827 
2828 		if (err < 0)
2829 			status = err;
2830 		else if (WIFEXITED(exit_status))
2831 			status = WEXITSTATUS(exit_status);
2832 		else if (WIFSIGNALED(exit_status))
2833 			signr = WTERMSIG(exit_status);
2834 	} else
2835 		status = err;
2836 
2837 	if (rec->off_cpu)
2838 		rec->bytes_written += off_cpu_write(rec->session);
2839 
2840 	record__read_lost_samples(rec);
2841 	record__synthesize(rec, true);
2842 	/* this will be recalculated during process_buildids() */
2843 	rec->samples = 0;
2844 
2845 	if (!err) {
2846 		if (!rec->timestamp_filename) {
2847 			record__finish_output(rec);
2848 		} else {
2849 			fd = record__switch_output(rec, true);
2850 			if (fd < 0) {
2851 				status = fd;
2852 				goto out_delete_session;
2853 			}
2854 		}
2855 	}
2856 
2857 	perf_hooks__invoke_record_end();
2858 
2859 	if (!err && !quiet) {
2860 		char samples[128];
2861 		const char *postfix = rec->timestamp_filename ?
2862 					".<timestamp>" : "";
2863 
2864 		if (rec->samples && !rec->opts.full_auxtrace)
2865 			scnprintf(samples, sizeof(samples),
2866 				  " (%" PRIu64 " samples)", rec->samples);
2867 		else
2868 			samples[0] = '\0';
2869 
2870 		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s%s",
2871 			perf_data__size(data) / 1024.0 / 1024.0,
2872 			data->path, postfix, samples);
2873 		if (ratio) {
2874 			fprintf(stderr,	", compressed (original %.3f MB, ratio is %.3f)",
2875 					rec->session->bytes_transferred / 1024.0 / 1024.0,
2876 					ratio);
2877 		}
2878 		fprintf(stderr, " ]\n");
2879 	}
2880 
2881 out_delete_session:
2882 #ifdef HAVE_EVENTFD_SUPPORT
2883 	if (done_fd >= 0) {
2884 		fd = done_fd;
2885 		done_fd = -1;
2886 
2887 		close(fd);
2888 	}
2889 #endif
2890 	zstd_fini(&session->zstd_data);
2891 	if (!opts->no_bpf_event)
2892 		evlist__stop_sb_thread(rec->sb_evlist);
2893 
2894 	perf_session__delete(session);
2895 	return status;
2896 }
2897 
callchain_debug(struct callchain_param * callchain)2898 static void callchain_debug(struct callchain_param *callchain)
2899 {
2900 	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
2901 
2902 	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
2903 
2904 	if (callchain->record_mode == CALLCHAIN_DWARF)
2905 		pr_debug("callchain: stack dump size %d\n",
2906 			 callchain->dump_size);
2907 }
2908 
record_opts__parse_callchain(struct record_opts * record,struct callchain_param * callchain,const char * arg,bool unset)2909 int record_opts__parse_callchain(struct record_opts *record,
2910 				 struct callchain_param *callchain,
2911 				 const char *arg, bool unset)
2912 {
2913 	int ret;
2914 	callchain->enabled = !unset;
2915 
2916 	/* --no-call-graph */
2917 	if (unset) {
2918 		callchain->record_mode = CALLCHAIN_NONE;
2919 		pr_debug("callchain: disabled\n");
2920 		return 0;
2921 	}
2922 
2923 	ret = parse_callchain_record_opt(arg, callchain);
2924 	if (!ret) {
2925 		/* Enable data address sampling for DWARF unwind. */
2926 		if (callchain->record_mode == CALLCHAIN_DWARF)
2927 			record->sample_address = true;
2928 		callchain_debug(callchain);
2929 	}
2930 
2931 	return ret;
2932 }
2933 
record_parse_callchain_opt(const struct option * opt,const char * arg,int unset)2934 int record_parse_callchain_opt(const struct option *opt,
2935 			       const char *arg,
2936 			       int unset)
2937 {
2938 	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
2939 }
2940 
record_callchain_opt(const struct option * opt,const char * arg __maybe_unused,int unset __maybe_unused)2941 int record_callchain_opt(const struct option *opt,
2942 			 const char *arg __maybe_unused,
2943 			 int unset __maybe_unused)
2944 {
2945 	struct callchain_param *callchain = opt->value;
2946 
2947 	callchain->enabled = true;
2948 
2949 	if (callchain->record_mode == CALLCHAIN_NONE)
2950 		callchain->record_mode = CALLCHAIN_FP;
2951 
2952 	callchain_debug(callchain);
2953 	return 0;
2954 }
2955 
perf_record_config(const char * var,const char * value,void * cb)2956 static int perf_record_config(const char *var, const char *value, void *cb)
2957 {
2958 	struct record *rec = cb;
2959 
2960 	if (!strcmp(var, "record.build-id")) {
2961 		if (!strcmp(value, "cache"))
2962 			rec->no_buildid_cache = false;
2963 		else if (!strcmp(value, "no-cache"))
2964 			rec->no_buildid_cache = true;
2965 		else if (!strcmp(value, "skip"))
2966 			rec->no_buildid = true;
2967 		else if (!strcmp(value, "mmap"))
2968 			rec->buildid_mmap = true;
2969 		else
2970 			return -1;
2971 		return 0;
2972 	}
2973 	if (!strcmp(var, "record.call-graph")) {
2974 		var = "call-graph.record-mode";
2975 		return perf_default_config(var, value, cb);
2976 	}
2977 #ifdef HAVE_AIO_SUPPORT
2978 	if (!strcmp(var, "record.aio")) {
2979 		rec->opts.nr_cblocks = strtol(value, NULL, 0);
2980 		if (!rec->opts.nr_cblocks)
2981 			rec->opts.nr_cblocks = nr_cblocks_default;
2982 	}
2983 #endif
2984 	if (!strcmp(var, "record.debuginfod")) {
2985 		rec->debuginfod.urls = strdup(value);
2986 		if (!rec->debuginfod.urls)
2987 			return -ENOMEM;
2988 		rec->debuginfod.set = true;
2989 	}
2990 
2991 	return 0;
2992 }
2993 
record__parse_event_enable_time(const struct option * opt,const char * str,int unset)2994 static int record__parse_event_enable_time(const struct option *opt, const char *str, int unset)
2995 {
2996 	struct record *rec = (struct record *)opt->value;
2997 
2998 	return evlist__parse_event_enable_time(rec->evlist, &rec->opts, str, unset);
2999 }
3000 
record__parse_affinity(const struct option * opt,const char * str,int unset)3001 static int record__parse_affinity(const struct option *opt, const char *str, int unset)
3002 {
3003 	struct record_opts *opts = (struct record_opts *)opt->value;
3004 
3005 	if (unset || !str)
3006 		return 0;
3007 
3008 	if (!strcasecmp(str, "node"))
3009 		opts->affinity = PERF_AFFINITY_NODE;
3010 	else if (!strcasecmp(str, "cpu"))
3011 		opts->affinity = PERF_AFFINITY_CPU;
3012 
3013 	return 0;
3014 }
3015 
record__mmap_cpu_mask_alloc(struct mmap_cpu_mask * mask,int nr_bits)3016 static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask *mask, int nr_bits)
3017 {
3018 	mask->nbits = nr_bits;
3019 	mask->bits = bitmap_zalloc(mask->nbits);
3020 	if (!mask->bits)
3021 		return -ENOMEM;
3022 
3023 	return 0;
3024 }
3025 
record__mmap_cpu_mask_free(struct mmap_cpu_mask * mask)3026 static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask)
3027 {
3028 	bitmap_free(mask->bits);
3029 	mask->nbits = 0;
3030 }
3031 
record__thread_mask_alloc(struct thread_mask * mask,int nr_bits)3032 static int record__thread_mask_alloc(struct thread_mask *mask, int nr_bits)
3033 {
3034 	int ret;
3035 
3036 	ret = record__mmap_cpu_mask_alloc(&mask->maps, nr_bits);
3037 	if (ret) {
3038 		mask->affinity.bits = NULL;
3039 		return ret;
3040 	}
3041 
3042 	ret = record__mmap_cpu_mask_alloc(&mask->affinity, nr_bits);
3043 	if (ret) {
3044 		record__mmap_cpu_mask_free(&mask->maps);
3045 		mask->maps.bits = NULL;
3046 	}
3047 
3048 	return ret;
3049 }
3050 
record__thread_mask_free(struct thread_mask * mask)3051 static void record__thread_mask_free(struct thread_mask *mask)
3052 {
3053 	record__mmap_cpu_mask_free(&mask->maps);
3054 	record__mmap_cpu_mask_free(&mask->affinity);
3055 }
3056 
record__parse_threads(const struct option * opt,const char * str,int unset)3057 static int record__parse_threads(const struct option *opt, const char *str, int unset)
3058 {
3059 	int s;
3060 	struct record_opts *opts = opt->value;
3061 
3062 	if (unset || !str || !strlen(str)) {
3063 		opts->threads_spec = THREAD_SPEC__CPU;
3064 	} else {
3065 		for (s = 1; s < THREAD_SPEC__MAX; s++) {
3066 			if (s == THREAD_SPEC__USER) {
3067 				opts->threads_user_spec = strdup(str);
3068 				if (!opts->threads_user_spec)
3069 					return -ENOMEM;
3070 				opts->threads_spec = THREAD_SPEC__USER;
3071 				break;
3072 			}
3073 			if (!strncasecmp(str, thread_spec_tags[s], strlen(thread_spec_tags[s]))) {
3074 				opts->threads_spec = s;
3075 				break;
3076 			}
3077 		}
3078 	}
3079 
3080 	if (opts->threads_spec == THREAD_SPEC__USER)
3081 		pr_debug("threads_spec: %s\n", opts->threads_user_spec);
3082 	else
3083 		pr_debug("threads_spec: %s\n", thread_spec_tags[opts->threads_spec]);
3084 
3085 	return 0;
3086 }
3087 
parse_output_max_size(const struct option * opt,const char * str,int unset)3088 static int parse_output_max_size(const struct option *opt,
3089 				 const char *str, int unset)
3090 {
3091 	unsigned long *s = (unsigned long *)opt->value;
3092 	static struct parse_tag tags_size[] = {
3093 		{ .tag  = 'B', .mult = 1       },
3094 		{ .tag  = 'K', .mult = 1 << 10 },
3095 		{ .tag  = 'M', .mult = 1 << 20 },
3096 		{ .tag  = 'G', .mult = 1 << 30 },
3097 		{ .tag  = 0 },
3098 	};
3099 	unsigned long val;
3100 
3101 	if (unset) {
3102 		*s = 0;
3103 		return 0;
3104 	}
3105 
3106 	val = parse_tag_value(str, tags_size);
3107 	if (val != (unsigned long) -1) {
3108 		*s = val;
3109 		return 0;
3110 	}
3111 
3112 	return -1;
3113 }
3114 
record__parse_mmap_pages(const struct option * opt,const char * str,int unset __maybe_unused)3115 static int record__parse_mmap_pages(const struct option *opt,
3116 				    const char *str,
3117 				    int unset __maybe_unused)
3118 {
3119 	struct record_opts *opts = opt->value;
3120 	char *s, *p;
3121 	unsigned int mmap_pages;
3122 	int ret;
3123 
3124 	if (!str)
3125 		return -EINVAL;
3126 
3127 	s = strdup(str);
3128 	if (!s)
3129 		return -ENOMEM;
3130 
3131 	p = strchr(s, ',');
3132 	if (p)
3133 		*p = '\0';
3134 
3135 	if (*s) {
3136 		ret = __evlist__parse_mmap_pages(&mmap_pages, s);
3137 		if (ret)
3138 			goto out_free;
3139 		opts->mmap_pages = mmap_pages;
3140 	}
3141 
3142 	if (!p) {
3143 		ret = 0;
3144 		goto out_free;
3145 	}
3146 
3147 	ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1);
3148 	if (ret)
3149 		goto out_free;
3150 
3151 	opts->auxtrace_mmap_pages = mmap_pages;
3152 
3153 out_free:
3154 	free(s);
3155 	return ret;
3156 }
3157 
arch__add_leaf_frame_record_opts(struct record_opts * opts __maybe_unused)3158 void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused)
3159 {
3160 }
3161 
parse_control_option(const struct option * opt,const char * str,int unset __maybe_unused)3162 static int parse_control_option(const struct option *opt,
3163 				const char *str,
3164 				int unset __maybe_unused)
3165 {
3166 	struct record_opts *opts = opt->value;
3167 
3168 	return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close);
3169 }
3170 
switch_output_size_warn(struct record * rec)3171 static void switch_output_size_warn(struct record *rec)
3172 {
3173 	u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages);
3174 	struct switch_output *s = &rec->switch_output;
3175 
3176 	wakeup_size /= 2;
3177 
3178 	if (s->size < wakeup_size) {
3179 		char buf[100];
3180 
3181 		unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
3182 		pr_warning("WARNING: switch-output data size lower than "
3183 			   "wakeup kernel buffer size (%s) "
3184 			   "expect bigger perf.data sizes\n", buf);
3185 	}
3186 }
3187 
switch_output_setup(struct record * rec)3188 static int switch_output_setup(struct record *rec)
3189 {
3190 	struct switch_output *s = &rec->switch_output;
3191 	static struct parse_tag tags_size[] = {
3192 		{ .tag  = 'B', .mult = 1       },
3193 		{ .tag  = 'K', .mult = 1 << 10 },
3194 		{ .tag  = 'M', .mult = 1 << 20 },
3195 		{ .tag  = 'G', .mult = 1 << 30 },
3196 		{ .tag  = 0 },
3197 	};
3198 	static struct parse_tag tags_time[] = {
3199 		{ .tag  = 's', .mult = 1        },
3200 		{ .tag  = 'm', .mult = 60       },
3201 		{ .tag  = 'h', .mult = 60*60    },
3202 		{ .tag  = 'd', .mult = 60*60*24 },
3203 		{ .tag  = 0 },
3204 	};
3205 	unsigned long val;
3206 
3207 	/*
3208 	 * If we're using --switch-output-events, then we imply its
3209 	 * --switch-output=signal, as we'll send a SIGUSR2 from the side band
3210 	 *  thread to its parent.
3211 	 */
3212 	if (rec->switch_output_event_set) {
3213 		if (record__threads_enabled(rec)) {
3214 			pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n");
3215 			return 0;
3216 		}
3217 		goto do_signal;
3218 	}
3219 
3220 	if (!s->set)
3221 		return 0;
3222 
3223 	if (record__threads_enabled(rec)) {
3224 		pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n");
3225 		return 0;
3226 	}
3227 
3228 	if (!strcmp(s->str, "signal")) {
3229 do_signal:
3230 		s->signal = true;
3231 		pr_debug("switch-output with SIGUSR2 signal\n");
3232 		goto enabled;
3233 	}
3234 
3235 	val = parse_tag_value(s->str, tags_size);
3236 	if (val != (unsigned long) -1) {
3237 		s->size = val;
3238 		pr_debug("switch-output with %s size threshold\n", s->str);
3239 		goto enabled;
3240 	}
3241 
3242 	val = parse_tag_value(s->str, tags_time);
3243 	if (val != (unsigned long) -1) {
3244 		s->time = val;
3245 		pr_debug("switch-output with %s time threshold (%lu seconds)\n",
3246 			 s->str, s->time);
3247 		goto enabled;
3248 	}
3249 
3250 	return -1;
3251 
3252 enabled:
3253 	rec->timestamp_filename = true;
3254 	s->enabled              = true;
3255 
3256 	if (s->size && !rec->opts.no_buffering)
3257 		switch_output_size_warn(rec);
3258 
3259 	return 0;
3260 }
3261 
3262 static const char * const __record_usage[] = {
3263 	"perf record [<options>] [<command>]",
3264 	"perf record [<options>] -- <command> [<options>]",
3265 	NULL
3266 };
3267 const char * const *record_usage = __record_usage;
3268 
build_id__process_mmap(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct machine * machine)3269 static int build_id__process_mmap(const struct perf_tool *tool, union perf_event *event,
3270 				  struct perf_sample *sample, struct machine *machine)
3271 {
3272 	/*
3273 	 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3274 	 * no need to add them twice.
3275 	 */
3276 	if (!(event->header.misc & PERF_RECORD_MISC_USER))
3277 		return 0;
3278 	return perf_event__process_mmap(tool, event, sample, machine);
3279 }
3280 
build_id__process_mmap2(const struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct machine * machine)3281 static int build_id__process_mmap2(const struct perf_tool *tool, union perf_event *event,
3282 				   struct perf_sample *sample, struct machine *machine)
3283 {
3284 	/*
3285 	 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3286 	 * no need to add them twice.
3287 	 */
3288 	if (!(event->header.misc & PERF_RECORD_MISC_USER))
3289 		return 0;
3290 
3291 	return perf_event__process_mmap2(tool, event, sample, machine);
3292 }
3293 
process_timestamp_boundary(const struct perf_tool * tool,union perf_event * event __maybe_unused,struct perf_sample * sample,struct machine * machine __maybe_unused)3294 static int process_timestamp_boundary(const struct perf_tool *tool,
3295 				      union perf_event *event __maybe_unused,
3296 				      struct perf_sample *sample,
3297 				      struct machine *machine __maybe_unused)
3298 {
3299 	struct record *rec = container_of(tool, struct record, tool);
3300 
3301 	set_timestamp_boundary(rec, sample->time);
3302 	return 0;
3303 }
3304 
parse_record_synth_option(const struct option * opt,const char * str,int unset __maybe_unused)3305 static int parse_record_synth_option(const struct option *opt,
3306 				     const char *str,
3307 				     int unset __maybe_unused)
3308 {
3309 	struct record_opts *opts = opt->value;
3310 	char *p = strdup(str);
3311 
3312 	if (p == NULL)
3313 		return -1;
3314 
3315 	opts->synth = parse_synth_opt(p);
3316 	free(p);
3317 
3318 	if (opts->synth < 0) {
3319 		pr_err("Invalid synth option: %s\n", str);
3320 		return -1;
3321 	}
3322 	return 0;
3323 }
3324 
3325 /*
3326  * XXX Ideally would be local to cmd_record() and passed to a record__new
3327  * because we need to have access to it in record__exit, that is called
3328  * after cmd_record() exits, but since record_options need to be accessible to
3329  * builtin-script, leave it here.
3330  *
3331  * At least we don't ouch it in all the other functions here directly.
3332  *
3333  * Just say no to tons of global variables, sigh.
3334  */
3335 static struct record record = {
3336 	.opts = {
3337 		.sample_time	     = true,
3338 		.mmap_pages	     = UINT_MAX,
3339 		.user_freq	     = UINT_MAX,
3340 		.user_interval	     = ULLONG_MAX,
3341 		.freq		     = 4000,
3342 		.target		     = {
3343 			.uses_mmap   = true,
3344 			.default_per_cpu = true,
3345 		},
3346 		.mmap_flush          = MMAP_FLUSH_DEFAULT,
3347 		.nr_threads_synthesize = 1,
3348 		.ctl_fd              = -1,
3349 		.ctl_fd_ack          = -1,
3350 		.synth               = PERF_SYNTH_ALL,
3351 	},
3352 };
3353 
3354 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
3355 	"\n\t\t\t\tDefault: fp";
3356 
3357 static bool dry_run;
3358 
3359 static struct parse_events_option_args parse_events_option_args = {
3360 	.evlistp = &record.evlist,
3361 };
3362 
3363 static struct parse_events_option_args switch_output_parse_events_option_args = {
3364 	.evlistp = &record.sb_evlist,
3365 };
3366 
3367 /*
3368  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
3369  * with it and switch to use the library functions in perf_evlist that came
3370  * from builtin-record.c, i.e. use record_opts,
3371  * evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
3372  * using pipes, etc.
3373  */
3374 static struct option __record_options[] = {
3375 	OPT_CALLBACK('e', "event", &parse_events_option_args, "event",
3376 		     "event selector. use 'perf list' to list available events",
3377 		     parse_events_option),
3378 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
3379 		     "event filter", parse_filter),
3380 	OPT_BOOLEAN(0, "latency", &record.latency,
3381 		    "Enable data collection for latency profiling.\n"
3382 		    "\t\t\t  Use perf report --latency for latency-centric profile."),
3383 	OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
3384 			   NULL, "don't record events from perf itself",
3385 			   exclude_perf),
3386 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
3387 		    "record events on existing process id"),
3388 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
3389 		    "record events on existing thread id"),
3390 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
3391 		    "collect data with this RT SCHED_FIFO priority"),
3392 	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
3393 		    "collect data without buffering"),
3394 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
3395 		    "collect raw sample records from all opened counters"),
3396 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
3397 			    "system-wide collection from all CPUs"),
3398 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
3399 		    "list of cpus to monitor"),
3400 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
3401 	OPT_STRING('o', "output", &record.data.path, "file",
3402 		    "output file name"),
3403 	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
3404 			&record.opts.no_inherit_set,
3405 			"child tasks do not inherit counters"),
3406 	OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
3407 		    "synthesize non-sample events at the end of output"),
3408 	OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
3409 	OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"),
3410 	OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
3411 		    "Fail if the specified frequency can't be used"),
3412 	OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
3413 		     "profile at this frequency",
3414 		      record__parse_freq),
3415 	OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
3416 		     "number of mmap data pages and AUX area tracing mmap pages",
3417 		     record__parse_mmap_pages),
3418 	OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
3419 		     "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
3420 		     record__mmap_flush_parse),
3421 	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
3422 			   NULL, "enables call-graph recording" ,
3423 			   &record_callchain_opt),
3424 	OPT_CALLBACK(0, "call-graph", &record.opts,
3425 		     "record_mode[,record_size]", record_callchain_help,
3426 		     &record_parse_callchain_opt),
3427 	OPT_INCR('v', "verbose", &verbose,
3428 		    "be more verbose (show counter open errors, etc)"),
3429 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any warnings or messages"),
3430 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
3431 		    "per thread counts"),
3432 	OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
3433 	OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
3434 		    "Record the sample physical addresses"),
3435 	OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size,
3436 		    "Record the sampled data address data page size"),
3437 	OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size,
3438 		    "Record the sampled code address (ip) page size"),
3439 	OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
3440 	OPT_BOOLEAN(0, "sample-identifier", &record.opts.sample_identifier,
3441 		    "Record the sample identifier"),
3442 	OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
3443 			&record.opts.sample_time_set,
3444 			"Record the sample timestamps"),
3445 	OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
3446 			"Record the sample period"),
3447 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
3448 		    "don't sample"),
3449 	OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
3450 			&record.no_buildid_cache_set,
3451 			"do not update the buildid cache"),
3452 	OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
3453 			&record.no_buildid_set,
3454 			"do not collect buildids in perf.data"),
3455 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
3456 		     "monitor event in cgroup name only",
3457 		     parse_cgroups),
3458 	OPT_CALLBACK('D', "delay", &record, "ms",
3459 		     "ms to wait before starting measurement after program start (-1: start with events disabled), "
3460 		     "or ranges of time to enable events e.g. '-D 10-20,30-40'",
3461 		     record__parse_event_enable_time),
3462 	OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"),
3463 	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
3464 		   "user to profile"),
3465 
3466 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
3467 		     "branch any", "sample any taken branches",
3468 		     parse_branch_stack),
3469 
3470 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
3471 		     "branch filter mask", "branch stack filter modes",
3472 		     parse_branch_stack),
3473 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
3474 		    "sample by weight (on special events only)"),
3475 	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
3476 		    "sample transaction flags (special events only)"),
3477 	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
3478 		    "use per-thread mmaps"),
3479 	OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
3480 		    "sample selected machine registers on interrupt,"
3481 		    " use '-I?' to list register names", parse_intr_regs),
3482 	OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
3483 		    "sample selected machine registers on interrupt,"
3484 		    " use '--user-regs=?' to list register names", parse_user_regs),
3485 	OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
3486 		    "Record running/enabled time of read (:S) events"),
3487 	OPT_CALLBACK('k', "clockid", &record.opts,
3488 	"clockid", "clockid to use for events, see clock_gettime()",
3489 	parse_clockid),
3490 	OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
3491 			  "opts", "AUX area tracing Snapshot Mode", ""),
3492 	OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts,
3493 			  "opts", "sample AUX area", ""),
3494 	OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
3495 			"per thread proc mmap processing timeout in ms"),
3496 	OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
3497 		    "Record namespaces events"),
3498 	OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup,
3499 		    "Record cgroup events"),
3500 	OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events,
3501 			&record.opts.record_switch_events_set,
3502 			"Record context switch events"),
3503 	OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
3504 			 "Configure all used events to run in kernel space.",
3505 			 PARSE_OPT_EXCLUSIVE),
3506 	OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
3507 			 "Configure all used events to run in user space.",
3508 			 PARSE_OPT_EXCLUSIVE),
3509 	OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains,
3510 		    "collect kernel callchains"),
3511 	OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains,
3512 		    "collect user callchains"),
3513 	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
3514 		   "file", "vmlinux pathname"),
3515 	OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
3516 		    "Record build-id of all DSOs regardless of hits"),
3517 	OPT_BOOLEAN(0, "buildid-mmap", &record.buildid_mmap,
3518 		    "Record build-id in map events"),
3519 	OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
3520 		    "append timestamp to output filename"),
3521 	OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
3522 		    "Record timestamp boundary (time of first/last samples)"),
3523 	OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
3524 			  &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
3525 			  "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
3526 			  "signal"),
3527 	OPT_CALLBACK_SET(0, "switch-output-event", &switch_output_parse_events_option_args,
3528 			 &record.switch_output_event_set, "switch output event",
3529 			 "switch output event selector. use 'perf list' to list available events",
3530 			 parse_events_option_new_evlist),
3531 	OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
3532 		   "Limit number of switch output generated files"),
3533 	OPT_BOOLEAN(0, "dry-run", &dry_run,
3534 		    "Parse options then exit"),
3535 #ifdef HAVE_AIO_SUPPORT
3536 	OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
3537 		     &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
3538 		     record__aio_parse),
3539 #endif
3540 	OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
3541 		     "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
3542 		     record__parse_affinity),
3543 #ifdef HAVE_ZSTD_SUPPORT
3544 	OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, "n",
3545 			    "Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
3546 			    record__parse_comp_level),
3547 #endif
3548 	OPT_CALLBACK(0, "max-size", &record.output_max_size,
3549 		     "size", "Limit the maximum size of the output file", parse_output_max_size),
3550 	OPT_UINTEGER(0, "num-thread-synthesize",
3551 		     &record.opts.nr_threads_synthesize,
3552 		     "number of threads to run for event synthesis"),
3553 #ifdef HAVE_LIBPFM
3554 	OPT_CALLBACK(0, "pfm-events", &record.evlist, "event",
3555 		"libpfm4 event selector. use 'perf list' to list available events",
3556 		parse_libpfm_events_option),
3557 #endif
3558 	OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]",
3559 		     "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n"
3560 		     "\t\t\t  'snapshot': AUX area tracing snapshot).\n"
3561 		     "\t\t\t  Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n"
3562 		     "\t\t\t  Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.",
3563 		      parse_control_option),
3564 	OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup",
3565 		     "Fine-tune event synthesis: default=all", parse_record_synth_option),
3566 	OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls,
3567 			  &record.debuginfod.set, "debuginfod urls",
3568 			  "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls",
3569 			  "system"),
3570 	OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec",
3571 			    "write collected trace data into several data files using parallel threads",
3572 			    record__parse_threads),
3573 	OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"),
3574 	OPT_STRING(0, "setup-filter", &record.filter_action, "pin|unpin",
3575 		   "BPF filter action"),
3576 	OPT_END()
3577 };
3578 
3579 struct option *record_options = __record_options;
3580 
record__mmap_cpu_mask_init(struct mmap_cpu_mask * mask,struct perf_cpu_map * cpus)3581 static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus)
3582 {
3583 	struct perf_cpu cpu;
3584 	int idx;
3585 
3586 	if (cpu_map__is_dummy(cpus))
3587 		return 0;
3588 
3589 	perf_cpu_map__for_each_cpu_skip_any(cpu, idx, cpus) {
3590 		/* Return ENODEV is input cpu is greater than max cpu */
3591 		if ((unsigned long)cpu.cpu > mask->nbits)
3592 			return -ENODEV;
3593 		__set_bit(cpu.cpu, mask->bits);
3594 	}
3595 
3596 	return 0;
3597 }
3598 
record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask * mask,const char * mask_spec)3599 static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec)
3600 {
3601 	struct perf_cpu_map *cpus;
3602 
3603 	cpus = perf_cpu_map__new(mask_spec);
3604 	if (!cpus)
3605 		return -ENOMEM;
3606 
3607 	bitmap_zero(mask->bits, mask->nbits);
3608 	if (record__mmap_cpu_mask_init(mask, cpus))
3609 		return -ENODEV;
3610 
3611 	perf_cpu_map__put(cpus);
3612 
3613 	return 0;
3614 }
3615 
record__free_thread_masks(struct record * rec,int nr_threads)3616 static void record__free_thread_masks(struct record *rec, int nr_threads)
3617 {
3618 	int t;
3619 
3620 	if (rec->thread_masks)
3621 		for (t = 0; t < nr_threads; t++)
3622 			record__thread_mask_free(&rec->thread_masks[t]);
3623 
3624 	zfree(&rec->thread_masks);
3625 }
3626 
record__alloc_thread_masks(struct record * rec,int nr_threads,int nr_bits)3627 static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits)
3628 {
3629 	int t, ret;
3630 
3631 	rec->thread_masks = zalloc(nr_threads * sizeof(*(rec->thread_masks)));
3632 	if (!rec->thread_masks) {
3633 		pr_err("Failed to allocate thread masks\n");
3634 		return -ENOMEM;
3635 	}
3636 
3637 	for (t = 0; t < nr_threads; t++) {
3638 		ret = record__thread_mask_alloc(&rec->thread_masks[t], nr_bits);
3639 		if (ret) {
3640 			pr_err("Failed to allocate thread masks[%d]\n", t);
3641 			goto out_free;
3642 		}
3643 	}
3644 
3645 	return 0;
3646 
3647 out_free:
3648 	record__free_thread_masks(rec, nr_threads);
3649 
3650 	return ret;
3651 }
3652 
record__init_thread_cpu_masks(struct record * rec,struct perf_cpu_map * cpus)3653 static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map *cpus)
3654 {
3655 	int t, ret, nr_cpus = perf_cpu_map__nr(cpus);
3656 
3657 	ret = record__alloc_thread_masks(rec, nr_cpus, cpu__max_cpu().cpu);
3658 	if (ret)
3659 		return ret;
3660 
3661 	rec->nr_threads = nr_cpus;
3662 	pr_debug("nr_threads: %d\n", rec->nr_threads);
3663 
3664 	for (t = 0; t < rec->nr_threads; t++) {
3665 		__set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits);
3666 		__set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits);
3667 		if (verbose > 0) {
3668 			pr_debug("thread_masks[%d]: ", t);
3669 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3670 			pr_debug("thread_masks[%d]: ", t);
3671 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3672 		}
3673 	}
3674 
3675 	return 0;
3676 }
3677 
record__init_thread_masks_spec(struct record * rec,struct perf_cpu_map * cpus,const char ** maps_spec,const char ** affinity_spec,u32 nr_spec)3678 static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus,
3679 					  const char **maps_spec, const char **affinity_spec,
3680 					  u32 nr_spec)
3681 {
3682 	u32 s;
3683 	int ret = 0, t = 0;
3684 	struct mmap_cpu_mask cpus_mask;
3685 	struct thread_mask thread_mask, full_mask, *thread_masks;
3686 
3687 	ret = record__mmap_cpu_mask_alloc(&cpus_mask, cpu__max_cpu().cpu);
3688 	if (ret) {
3689 		pr_err("Failed to allocate CPUs mask\n");
3690 		return ret;
3691 	}
3692 
3693 	ret = record__mmap_cpu_mask_init(&cpus_mask, cpus);
3694 	if (ret) {
3695 		pr_err("Failed to init cpu mask\n");
3696 		goto out_free_cpu_mask;
3697 	}
3698 
3699 	ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu);
3700 	if (ret) {
3701 		pr_err("Failed to allocate full mask\n");
3702 		goto out_free_cpu_mask;
3703 	}
3704 
3705 	ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3706 	if (ret) {
3707 		pr_err("Failed to allocate thread mask\n");
3708 		goto out_free_full_and_cpu_masks;
3709 	}
3710 
3711 	for (s = 0; s < nr_spec; s++) {
3712 		ret = record__mmap_cpu_mask_init_spec(&thread_mask.maps, maps_spec[s]);
3713 		if (ret) {
3714 			pr_err("Failed to initialize maps thread mask\n");
3715 			goto out_free;
3716 		}
3717 		ret = record__mmap_cpu_mask_init_spec(&thread_mask.affinity, affinity_spec[s]);
3718 		if (ret) {
3719 			pr_err("Failed to initialize affinity thread mask\n");
3720 			goto out_free;
3721 		}
3722 
3723 		/* ignore invalid CPUs but do not allow empty masks */
3724 		if (!bitmap_and(thread_mask.maps.bits, thread_mask.maps.bits,
3725 				cpus_mask.bits, thread_mask.maps.nbits)) {
3726 			pr_err("Empty maps mask: %s\n", maps_spec[s]);
3727 			ret = -EINVAL;
3728 			goto out_free;
3729 		}
3730 		if (!bitmap_and(thread_mask.affinity.bits, thread_mask.affinity.bits,
3731 				cpus_mask.bits, thread_mask.affinity.nbits)) {
3732 			pr_err("Empty affinity mask: %s\n", affinity_spec[s]);
3733 			ret = -EINVAL;
3734 			goto out_free;
3735 		}
3736 
3737 		/* do not allow intersection with other masks (full_mask) */
3738 		if (bitmap_intersects(thread_mask.maps.bits, full_mask.maps.bits,
3739 				      thread_mask.maps.nbits)) {
3740 			pr_err("Intersecting maps mask: %s\n", maps_spec[s]);
3741 			ret = -EINVAL;
3742 			goto out_free;
3743 		}
3744 		if (bitmap_intersects(thread_mask.affinity.bits, full_mask.affinity.bits,
3745 				      thread_mask.affinity.nbits)) {
3746 			pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]);
3747 			ret = -EINVAL;
3748 			goto out_free;
3749 		}
3750 
3751 		bitmap_or(full_mask.maps.bits, full_mask.maps.bits,
3752 			  thread_mask.maps.bits, full_mask.maps.nbits);
3753 		bitmap_or(full_mask.affinity.bits, full_mask.affinity.bits,
3754 			  thread_mask.affinity.bits, full_mask.maps.nbits);
3755 
3756 		thread_masks = realloc(rec->thread_masks, (t + 1) * sizeof(struct thread_mask));
3757 		if (!thread_masks) {
3758 			pr_err("Failed to reallocate thread masks\n");
3759 			ret = -ENOMEM;
3760 			goto out_free;
3761 		}
3762 		rec->thread_masks = thread_masks;
3763 		rec->thread_masks[t] = thread_mask;
3764 		if (verbose > 0) {
3765 			pr_debug("thread_masks[%d]: ", t);
3766 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3767 			pr_debug("thread_masks[%d]: ", t);
3768 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3769 		}
3770 		t++;
3771 		ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3772 		if (ret) {
3773 			pr_err("Failed to allocate thread mask\n");
3774 			goto out_free_full_and_cpu_masks;
3775 		}
3776 	}
3777 	rec->nr_threads = t;
3778 	pr_debug("nr_threads: %d\n", rec->nr_threads);
3779 	if (!rec->nr_threads)
3780 		ret = -EINVAL;
3781 
3782 out_free:
3783 	record__thread_mask_free(&thread_mask);
3784 out_free_full_and_cpu_masks:
3785 	record__thread_mask_free(&full_mask);
3786 out_free_cpu_mask:
3787 	record__mmap_cpu_mask_free(&cpus_mask);
3788 
3789 	return ret;
3790 }
3791 
record__init_thread_core_masks(struct record * rec,struct perf_cpu_map * cpus)3792 static int record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus)
3793 {
3794 	int ret;
3795 	struct cpu_topology *topo;
3796 
3797 	topo = cpu_topology__new();
3798 	if (!topo) {
3799 		pr_err("Failed to allocate CPU topology\n");
3800 		return -ENOMEM;
3801 	}
3802 
3803 	ret = record__init_thread_masks_spec(rec, cpus, topo->core_cpus_list,
3804 					     topo->core_cpus_list, topo->core_cpus_lists);
3805 	cpu_topology__delete(topo);
3806 
3807 	return ret;
3808 }
3809 
record__init_thread_package_masks(struct record * rec,struct perf_cpu_map * cpus)3810 static int record__init_thread_package_masks(struct record *rec, struct perf_cpu_map *cpus)
3811 {
3812 	int ret;
3813 	struct cpu_topology *topo;
3814 
3815 	topo = cpu_topology__new();
3816 	if (!topo) {
3817 		pr_err("Failed to allocate CPU topology\n");
3818 		return -ENOMEM;
3819 	}
3820 
3821 	ret = record__init_thread_masks_spec(rec, cpus, topo->package_cpus_list,
3822 					     topo->package_cpus_list, topo->package_cpus_lists);
3823 	cpu_topology__delete(topo);
3824 
3825 	return ret;
3826 }
3827 
record__init_thread_numa_masks(struct record * rec,struct perf_cpu_map * cpus)3828 static int record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus)
3829 {
3830 	u32 s;
3831 	int ret;
3832 	const char **spec;
3833 	struct numa_topology *topo;
3834 
3835 	topo = numa_topology__new();
3836 	if (!topo) {
3837 		pr_err("Failed to allocate NUMA topology\n");
3838 		return -ENOMEM;
3839 	}
3840 
3841 	spec = zalloc(topo->nr * sizeof(char *));
3842 	if (!spec) {
3843 		pr_err("Failed to allocate NUMA spec\n");
3844 		ret = -ENOMEM;
3845 		goto out_delete_topo;
3846 	}
3847 	for (s = 0; s < topo->nr; s++)
3848 		spec[s] = topo->nodes[s].cpus;
3849 
3850 	ret = record__init_thread_masks_spec(rec, cpus, spec, spec, topo->nr);
3851 
3852 	zfree(&spec);
3853 
3854 out_delete_topo:
3855 	numa_topology__delete(topo);
3856 
3857 	return ret;
3858 }
3859 
record__init_thread_user_masks(struct record * rec,struct perf_cpu_map * cpus)3860 static int record__init_thread_user_masks(struct record *rec, struct perf_cpu_map *cpus)
3861 {
3862 	int t, ret;
3863 	u32 s, nr_spec = 0;
3864 	char **maps_spec = NULL, **affinity_spec = NULL, **tmp_spec;
3865 	char *user_spec, *spec, *spec_ptr, *mask, *mask_ptr, *dup_mask = NULL;
3866 
3867 	for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) {
3868 		spec = strtok_r(user_spec, ":", &spec_ptr);
3869 		if (spec == NULL)
3870 			break;
3871 		pr_debug2("threads_spec[%d]: %s\n", t, spec);
3872 		mask = strtok_r(spec, "/", &mask_ptr);
3873 		if (mask == NULL)
3874 			break;
3875 		pr_debug2("  maps mask: %s\n", mask);
3876 		tmp_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *));
3877 		if (!tmp_spec) {
3878 			pr_err("Failed to reallocate maps spec\n");
3879 			ret = -ENOMEM;
3880 			goto out_free;
3881 		}
3882 		maps_spec = tmp_spec;
3883 		maps_spec[nr_spec] = dup_mask = strdup(mask);
3884 		if (!maps_spec[nr_spec]) {
3885 			pr_err("Failed to allocate maps spec[%d]\n", nr_spec);
3886 			ret = -ENOMEM;
3887 			goto out_free;
3888 		}
3889 		mask = strtok_r(NULL, "/", &mask_ptr);
3890 		if (mask == NULL) {
3891 			pr_err("Invalid thread maps or affinity specs\n");
3892 			ret = -EINVAL;
3893 			goto out_free;
3894 		}
3895 		pr_debug2("  affinity mask: %s\n", mask);
3896 		tmp_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *));
3897 		if (!tmp_spec) {
3898 			pr_err("Failed to reallocate affinity spec\n");
3899 			ret = -ENOMEM;
3900 			goto out_free;
3901 		}
3902 		affinity_spec = tmp_spec;
3903 		affinity_spec[nr_spec] = strdup(mask);
3904 		if (!affinity_spec[nr_spec]) {
3905 			pr_err("Failed to allocate affinity spec[%d]\n", nr_spec);
3906 			ret = -ENOMEM;
3907 			goto out_free;
3908 		}
3909 		dup_mask = NULL;
3910 		nr_spec++;
3911 	}
3912 
3913 	ret = record__init_thread_masks_spec(rec, cpus, (const char **)maps_spec,
3914 					     (const char **)affinity_spec, nr_spec);
3915 
3916 out_free:
3917 	free(dup_mask);
3918 	for (s = 0; s < nr_spec; s++) {
3919 		if (maps_spec)
3920 			free(maps_spec[s]);
3921 		if (affinity_spec)
3922 			free(affinity_spec[s]);
3923 	}
3924 	free(affinity_spec);
3925 	free(maps_spec);
3926 
3927 	return ret;
3928 }
3929 
record__init_thread_default_masks(struct record * rec,struct perf_cpu_map * cpus)3930 static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus)
3931 {
3932 	int ret;
3933 
3934 	ret = record__alloc_thread_masks(rec, 1, cpu__max_cpu().cpu);
3935 	if (ret)
3936 		return ret;
3937 
3938 	if (record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus))
3939 		return -ENODEV;
3940 
3941 	rec->nr_threads = 1;
3942 
3943 	return 0;
3944 }
3945 
record__init_thread_masks(struct record * rec)3946 static int record__init_thread_masks(struct record *rec)
3947 {
3948 	int ret = 0;
3949 	struct perf_cpu_map *cpus = rec->evlist->core.all_cpus;
3950 
3951 	if (!record__threads_enabled(rec))
3952 		return record__init_thread_default_masks(rec, cpus);
3953 
3954 	if (evlist__per_thread(rec->evlist)) {
3955 		pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n");
3956 		return -EINVAL;
3957 	}
3958 
3959 	switch (rec->opts.threads_spec) {
3960 	case THREAD_SPEC__CPU:
3961 		ret = record__init_thread_cpu_masks(rec, cpus);
3962 		break;
3963 	case THREAD_SPEC__CORE:
3964 		ret = record__init_thread_core_masks(rec, cpus);
3965 		break;
3966 	case THREAD_SPEC__PACKAGE:
3967 		ret = record__init_thread_package_masks(rec, cpus);
3968 		break;
3969 	case THREAD_SPEC__NUMA:
3970 		ret = record__init_thread_numa_masks(rec, cpus);
3971 		break;
3972 	case THREAD_SPEC__USER:
3973 		ret = record__init_thread_user_masks(rec, cpus);
3974 		break;
3975 	default:
3976 		break;
3977 	}
3978 
3979 	return ret;
3980 }
3981 
cmd_record(int argc,const char ** argv)3982 int cmd_record(int argc, const char **argv)
3983 {
3984 	int err;
3985 	struct record *rec = &record;
3986 	char errbuf[BUFSIZ];
3987 
3988 	setlocale(LC_ALL, "");
3989 
3990 #ifndef HAVE_BPF_SKEL
3991 # define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c)
3992 	set_nobuild('\0', "off-cpu", "no BUILD_BPF_SKEL=1", true);
3993 # undef set_nobuild
3994 #endif
3995 
3996 	/* Disable eager loading of kernel symbols that adds overhead to perf record. */
3997 	symbol_conf.lazy_load_kernel_maps = true;
3998 	rec->opts.affinity = PERF_AFFINITY_SYS;
3999 
4000 	rec->evlist = evlist__new();
4001 	if (rec->evlist == NULL)
4002 		return -ENOMEM;
4003 
4004 	err = perf_config(perf_record_config, rec);
4005 	if (err)
4006 		return err;
4007 
4008 	argc = parse_options(argc, argv, record_options, record_usage,
4009 			    PARSE_OPT_STOP_AT_NON_OPTION);
4010 	if (quiet)
4011 		perf_quiet_option();
4012 
4013 	err = symbol__validate_sym_arguments();
4014 	if (err)
4015 		return err;
4016 
4017 	perf_debuginfod_setup(&record.debuginfod);
4018 
4019 	/* Make system wide (-a) the default target. */
4020 	if (!argc && target__none(&rec->opts.target))
4021 		rec->opts.target.system_wide = true;
4022 
4023 	if (nr_cgroups && !rec->opts.target.system_wide) {
4024 		usage_with_options_msg(record_usage, record_options,
4025 			"cgroup monitoring only available in system-wide mode");
4026 
4027 	}
4028 
4029 	if (record.latency) {
4030 		/*
4031 		 * There is no fundamental reason why latency profiling
4032 		 * can't work for system-wide mode, but exact semantics
4033 		 * and details are to be defined.
4034 		 * See the following thread for details:
4035 		 * https://lore.kernel.org/all/Z4XDJyvjiie3howF@google.com/
4036 		 */
4037 		if (record.opts.target.system_wide) {
4038 			pr_err("Failed: latency profiling is not supported with system-wide collection.\n");
4039 			err = -EINVAL;
4040 			goto out_opts;
4041 		}
4042 		record.opts.record_switch_events = true;
4043 	}
4044 
4045 	if (rec->buildid_mmap) {
4046 		if (!perf_can_record_build_id()) {
4047 			pr_err("Failed: no support to record build id in mmap events, update your kernel.\n");
4048 			err = -EINVAL;
4049 			goto out_opts;
4050 		}
4051 		pr_debug("Enabling build id in mmap2 events.\n");
4052 		/* Enable mmap build id synthesizing. */
4053 		symbol_conf.buildid_mmap2 = true;
4054 		/* Enable perf_event_attr::build_id bit. */
4055 		rec->opts.build_id = true;
4056 		/* Disable build id cache. */
4057 		rec->no_buildid = true;
4058 	}
4059 
4060 	if (rec->opts.record_cgroup && !perf_can_record_cgroup()) {
4061 		pr_err("Kernel has no cgroup sampling support.\n");
4062 		err = -EINVAL;
4063 		goto out_opts;
4064 	}
4065 
4066 	if (rec->opts.kcore)
4067 		rec->opts.text_poke = true;
4068 
4069 	if (rec->opts.kcore || record__threads_enabled(rec))
4070 		rec->data.is_dir = true;
4071 
4072 	if (record__threads_enabled(rec)) {
4073 		if (rec->opts.affinity != PERF_AFFINITY_SYS) {
4074 			pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n");
4075 			goto out_opts;
4076 		}
4077 		if (record__aio_enabled(rec)) {
4078 			pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n");
4079 			goto out_opts;
4080 		}
4081 	}
4082 
4083 	if (rec->opts.comp_level != 0) {
4084 		pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
4085 		rec->no_buildid = true;
4086 	}
4087 
4088 	if (rec->opts.record_switch_events &&
4089 	    !perf_can_record_switch_events()) {
4090 		ui__error("kernel does not support recording context switch events\n");
4091 		parse_options_usage(record_usage, record_options, "switch-events", 0);
4092 		err = -EINVAL;
4093 		goto out_opts;
4094 	}
4095 
4096 	if (switch_output_setup(rec)) {
4097 		parse_options_usage(record_usage, record_options, "switch-output", 0);
4098 		err = -EINVAL;
4099 		goto out_opts;
4100 	}
4101 
4102 	if (rec->switch_output.time) {
4103 		signal(SIGALRM, alarm_sig_handler);
4104 		alarm(rec->switch_output.time);
4105 	}
4106 
4107 	if (rec->switch_output.num_files) {
4108 		rec->switch_output.filenames = calloc(rec->switch_output.num_files,
4109 						      sizeof(char *));
4110 		if (!rec->switch_output.filenames) {
4111 			err = -EINVAL;
4112 			goto out_opts;
4113 		}
4114 	}
4115 
4116 	if (rec->timestamp_filename && record__threads_enabled(rec)) {
4117 		rec->timestamp_filename = false;
4118 		pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n");
4119 	}
4120 
4121 	if (rec->filter_action) {
4122 		if (!strcmp(rec->filter_action, "pin"))
4123 			err = perf_bpf_filter__pin();
4124 		else if (!strcmp(rec->filter_action, "unpin"))
4125 			err = perf_bpf_filter__unpin();
4126 		else {
4127 			pr_warning("Unknown BPF filter action: %s\n", rec->filter_action);
4128 			err = -EINVAL;
4129 		}
4130 		goto out_opts;
4131 	}
4132 
4133 	/*
4134 	 * Allow aliases to facilitate the lookup of symbols for address
4135 	 * filters. Refer to auxtrace_parse_filters().
4136 	 */
4137 	symbol_conf.allow_aliases = true;
4138 
4139 	symbol__init(NULL);
4140 
4141 	err = record__auxtrace_init(rec);
4142 	if (err)
4143 		goto out;
4144 
4145 	if (dry_run)
4146 		goto out;
4147 
4148 	err = -ENOMEM;
4149 
4150 	if (rec->no_buildid_cache || rec->no_buildid) {
4151 		disable_buildid_cache();
4152 	} else if (rec->switch_output.enabled) {
4153 		/*
4154 		 * In 'perf record --switch-output', disable buildid
4155 		 * generation by default to reduce data file switching
4156 		 * overhead. Still generate buildid if they are required
4157 		 * explicitly using
4158 		 *
4159 		 *  perf record --switch-output --no-no-buildid \
4160 		 *              --no-no-buildid-cache
4161 		 *
4162 		 * Following code equals to:
4163 		 *
4164 		 * if ((rec->no_buildid || !rec->no_buildid_set) &&
4165 		 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
4166 		 *         disable_buildid_cache();
4167 		 */
4168 		bool disable = true;
4169 
4170 		if (rec->no_buildid_set && !rec->no_buildid)
4171 			disable = false;
4172 		if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
4173 			disable = false;
4174 		if (disable) {
4175 			rec->no_buildid = true;
4176 			rec->no_buildid_cache = true;
4177 			disable_buildid_cache();
4178 		}
4179 	}
4180 
4181 	if (record.opts.overwrite)
4182 		record.opts.tail_synthesize = true;
4183 
4184 	if (rec->evlist->core.nr_entries == 0) {
4185 		err = parse_event(rec->evlist, "cycles:P");
4186 		if (err)
4187 			goto out;
4188 	}
4189 
4190 	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
4191 		rec->opts.no_inherit = true;
4192 
4193 	err = target__validate(&rec->opts.target);
4194 	if (err) {
4195 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
4196 		ui__warning("%s\n", errbuf);
4197 	}
4198 
4199 	err = target__parse_uid(&rec->opts.target);
4200 	if (err) {
4201 		int saved_errno = errno;
4202 
4203 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
4204 		ui__error("%s", errbuf);
4205 
4206 		err = -saved_errno;
4207 		goto out;
4208 	}
4209 
4210 	/* Enable ignoring missing threads when -u/-p option is defined. */
4211 	rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
4212 
4213 	evlist__warn_user_requested_cpus(rec->evlist, rec->opts.target.cpu_list);
4214 
4215 	if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP)
4216 		arch__add_leaf_frame_record_opts(&rec->opts);
4217 
4218 	err = -ENOMEM;
4219 	if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) {
4220 		if (rec->opts.target.pid != NULL) {
4221 			pr_err("Couldn't create thread/CPU maps: %s\n",
4222 				errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf)));
4223 			goto out;
4224 		}
4225 		else
4226 			usage_with_options(record_usage, record_options);
4227 	}
4228 
4229 	err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
4230 	if (err)
4231 		goto out;
4232 
4233 	/*
4234 	 * We take all buildids when the file contains
4235 	 * AUX area tracing data because we do not decode the
4236 	 * trace because it would take too long.
4237 	 */
4238 	if (rec->opts.full_auxtrace)
4239 		rec->buildid_all = true;
4240 
4241 	if (rec->opts.text_poke) {
4242 		err = record__config_text_poke(rec->evlist);
4243 		if (err) {
4244 			pr_err("record__config_text_poke failed, error %d\n", err);
4245 			goto out;
4246 		}
4247 	}
4248 
4249 	if (rec->off_cpu) {
4250 		err = record__config_off_cpu(rec);
4251 		if (err) {
4252 			pr_err("record__config_off_cpu failed, error %d\n", err);
4253 			goto out;
4254 		}
4255 	}
4256 
4257 	if (record_opts__config(&rec->opts)) {
4258 		err = -EINVAL;
4259 		goto out;
4260 	}
4261 
4262 	err = record__config_tracking_events(rec);
4263 	if (err) {
4264 		pr_err("record__config_tracking_events failed, error %d\n", err);
4265 		goto out;
4266 	}
4267 
4268 	err = record__init_thread_masks(rec);
4269 	if (err) {
4270 		pr_err("Failed to initialize parallel data streaming masks\n");
4271 		goto out;
4272 	}
4273 
4274 	if (rec->opts.nr_cblocks > nr_cblocks_max)
4275 		rec->opts.nr_cblocks = nr_cblocks_max;
4276 	pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks);
4277 
4278 	pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
4279 	pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
4280 
4281 	if (rec->opts.comp_level > comp_level_max)
4282 		rec->opts.comp_level = comp_level_max;
4283 	pr_debug("comp level: %d\n", rec->opts.comp_level);
4284 
4285 	err = __cmd_record(&record, argc, argv);
4286 out:
4287 	record__free_thread_masks(rec, rec->nr_threads);
4288 	rec->nr_threads = 0;
4289 	symbol__exit();
4290 	auxtrace_record__free(rec->itr);
4291 out_opts:
4292 	evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close);
4293 	evlist__delete(rec->evlist);
4294 	return err;
4295 }
4296 
snapshot_sig_handler(int sig __maybe_unused)4297 static void snapshot_sig_handler(int sig __maybe_unused)
4298 {
4299 	struct record *rec = &record;
4300 
4301 	hit_auxtrace_snapshot_trigger(rec);
4302 
4303 	if (switch_output_signal(rec))
4304 		trigger_hit(&switch_output_trigger);
4305 }
4306 
alarm_sig_handler(int sig __maybe_unused)4307 static void alarm_sig_handler(int sig __maybe_unused)
4308 {
4309 	struct record *rec = &record;
4310 
4311 	if (switch_output_time(rec))
4312 		trigger_hit(&switch_output_trigger);
4313 }
4314