1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #define _FILE_OFFSET_BITS 64
9 
10 #include "builtin.h"
11 
12 #include "perf.h"
13 
14 #include "util/build-id.h"
15 #include "util/util.h"
16 #include "util/parse-options.h"
17 #include "util/parse-events.h"
18 
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/tool.h"
26 #include "util/symbol.h"
27 #include "util/cpumap.h"
28 #include "util/thread_map.h"
29 
30 #include <unistd.h>
31 #include <sched.h>
32 #include <sys/mman.h>
33 
34 enum write_mode_t {
35 	WRITE_FORCE,
36 	WRITE_APPEND
37 };
38 
39 struct perf_record {
40 	struct perf_tool	tool;
41 	struct perf_record_opts	opts;
42 	u64			bytes_written;
43 	const char		*output_name;
44 	struct perf_evlist	*evlist;
45 	struct perf_session	*session;
46 	const char		*progname;
47 	int			output;
48 	unsigned int		page_size;
49 	int			realtime_prio;
50 	enum write_mode_t	write_mode;
51 	bool			no_buildid;
52 	bool			no_buildid_cache;
53 	bool			force;
54 	bool			file_new;
55 	bool			append_file;
56 	long			samples;
57 	off_t			post_processing_offset;
58 };
59 
advance_output(struct perf_record * rec,size_t size)60 static void advance_output(struct perf_record *rec, size_t size)
61 {
62 	rec->bytes_written += size;
63 }
64 
write_output(struct perf_record * rec,void * buf,size_t size)65 static void write_output(struct perf_record *rec, void *buf, size_t size)
66 {
67 	while (size) {
68 		int ret = write(rec->output, buf, size);
69 
70 		if (ret < 0)
71 			die("failed to write");
72 
73 		size -= ret;
74 		buf += ret;
75 
76 		rec->bytes_written += ret;
77 	}
78 }
79 
process_synthesized_event(struct perf_tool * tool,union perf_event * event,struct perf_sample * sample __used,struct machine * machine __used)80 static int process_synthesized_event(struct perf_tool *tool,
81 				     union perf_event *event,
82 				     struct perf_sample *sample __used,
83 				     struct machine *machine __used)
84 {
85 	struct perf_record *rec = container_of(tool, struct perf_record, tool);
86 	write_output(rec, event, event->header.size);
87 	return 0;
88 }
89 
perf_record__mmap_read(struct perf_record * rec,struct perf_mmap * md)90 static void perf_record__mmap_read(struct perf_record *rec,
91 				   struct perf_mmap *md)
92 {
93 	unsigned int head = perf_mmap__read_head(md);
94 	unsigned int old = md->prev;
95 	unsigned char *data = md->base + rec->page_size;
96 	unsigned long size;
97 	void *buf;
98 
99 	if (old == head)
100 		return;
101 
102 	rec->samples++;
103 
104 	size = head - old;
105 
106 	if ((old & md->mask) + size != (head & md->mask)) {
107 		buf = &data[old & md->mask];
108 		size = md->mask + 1 - (old & md->mask);
109 		old += size;
110 
111 		write_output(rec, buf, size);
112 	}
113 
114 	buf = &data[old & md->mask];
115 	size = head - old;
116 	old += size;
117 
118 	write_output(rec, buf, size);
119 
120 	md->prev = old;
121 	perf_mmap__write_tail(md, old);
122 }
123 
124 static volatile int done = 0;
125 static volatile int signr = -1;
126 static volatile int child_finished = 0;
127 
sig_handler(int sig)128 static void sig_handler(int sig)
129 {
130 	if (sig == SIGCHLD)
131 		child_finished = 1;
132 
133 	done = 1;
134 	signr = sig;
135 }
136 
perf_record__sig_exit(int exit_status __used,void * arg)137 static void perf_record__sig_exit(int exit_status __used, void *arg)
138 {
139 	struct perf_record *rec = arg;
140 	int status;
141 
142 	if (rec->evlist->workload.pid > 0) {
143 		if (!child_finished)
144 			kill(rec->evlist->workload.pid, SIGTERM);
145 
146 		wait(&status);
147 		if (WIFSIGNALED(status))
148 			psignal(WTERMSIG(status), rec->progname);
149 	}
150 
151 	if (signr == -1 || signr == SIGUSR1)
152 		return;
153 
154 	signal(signr, SIG_DFL);
155 	kill(getpid(), signr);
156 }
157 
perf_evlist__equal(struct perf_evlist * evlist,struct perf_evlist * other)158 static bool perf_evlist__equal(struct perf_evlist *evlist,
159 			       struct perf_evlist *other)
160 {
161 	struct perf_evsel *pos, *pair;
162 
163 	if (evlist->nr_entries != other->nr_entries)
164 		return false;
165 
166 	pair = list_entry(other->entries.next, struct perf_evsel, node);
167 
168 	list_for_each_entry(pos, &evlist->entries, node) {
169 		if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
170 			return false;
171 		pair = list_entry(pair->node.next, struct perf_evsel, node);
172 	}
173 
174 	return true;
175 }
176 
perf_record__open(struct perf_record * rec)177 static void perf_record__open(struct perf_record *rec)
178 {
179 	struct perf_evsel *pos, *first;
180 	struct perf_evlist *evlist = rec->evlist;
181 	struct perf_session *session = rec->session;
182 	struct perf_record_opts *opts = &rec->opts;
183 
184 	first = list_entry(evlist->entries.next, struct perf_evsel, node);
185 
186 	perf_evlist__config_attrs(evlist, opts);
187 
188 	list_for_each_entry(pos, &evlist->entries, node) {
189 		struct perf_event_attr *attr = &pos->attr;
190 		struct xyarray *group_fd = NULL;
191 		/*
192 		 * Check if parse_single_tracepoint_event has already asked for
193 		 * PERF_SAMPLE_TIME.
194 		 *
195 		 * XXX this is kludgy but short term fix for problems introduced by
196 		 * eac23d1c that broke 'perf script' by having different sample_types
197 		 * when using multiple tracepoint events when we use a perf binary
198 		 * that tries to use sample_id_all on an older kernel.
199 		 *
200 		 * We need to move counter creation to perf_session, support
201 		 * different sample_types, etc.
202 		 */
203 		bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
204 
205 		if (opts->group && pos != first)
206 			group_fd = first->fd;
207 fallback_missing_features:
208 		if (opts->exclude_guest_missing)
209 			attr->exclude_guest = attr->exclude_host = 0;
210 retry_sample_id:
211 		attr->sample_id_all = opts->sample_id_all_avail ? 1 : 0;
212 try_again:
213 		if (perf_evsel__open(pos, evlist->cpus, evlist->threads,
214 				     opts->group, group_fd) < 0) {
215 			int err = errno;
216 
217 			if (err == EPERM || err == EACCES) {
218 				ui__error_paranoid();
219 				exit(EXIT_FAILURE);
220 			} else if (err ==  ENODEV && opts->cpu_list) {
221 				die("No such device - did you specify"
222 					" an out-of-range profile CPU?\n");
223 			} else if (err == EINVAL) {
224 				if (!opts->exclude_guest_missing &&
225 				    (attr->exclude_guest || attr->exclude_host)) {
226 					pr_debug("Old kernel, cannot exclude "
227 						 "guest or host samples.\n");
228 					opts->exclude_guest_missing = true;
229 					goto fallback_missing_features;
230 				} else if (opts->sample_id_all_avail) {
231 					/*
232 					 * Old kernel, no attr->sample_id_type_all field
233 					 */
234 					opts->sample_id_all_avail = false;
235 					if (!opts->sample_time && !opts->raw_samples && !time_needed)
236 						attr->sample_type &= ~PERF_SAMPLE_TIME;
237 
238 					goto retry_sample_id;
239 				}
240 			}
241 
242 			/*
243 			 * If it's cycles then fall back to hrtimer
244 			 * based cpu-clock-tick sw counter, which
245 			 * is always available even if no PMU support:
246 			 */
247 			if (attr->type == PERF_TYPE_HARDWARE
248 					&& attr->config == PERF_COUNT_HW_CPU_CYCLES) {
249 
250 				if (verbose)
251 					ui__warning("The cycles event is not supported, "
252 						    "trying to fall back to cpu-clock-ticks\n");
253 				attr->type = PERF_TYPE_SOFTWARE;
254 				attr->config = PERF_COUNT_SW_CPU_CLOCK;
255 				goto try_again;
256 			}
257 
258 			if (err == ENOENT) {
259 				ui__warning("The %s event is not supported.\n",
260 					    event_name(pos));
261 				exit(EXIT_FAILURE);
262 			}
263 
264 			printf("\n");
265 			error("sys_perf_event_open() syscall returned with %d (%s).  /bin/dmesg may provide additional information.\n",
266 			      err, strerror(err));
267 
268 #if defined(__i386__) || defined(__x86_64__)
269 			if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
270 				die("No hardware sampling interrupt available."
271 				    " No APIC? If so then you can boot the kernel"
272 				    " with the \"lapic\" boot parameter to"
273 				    " force-enable it.\n");
274 #endif
275 
276 			die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
277 		}
278 	}
279 
280 	if (perf_evlist__set_filters(evlist)) {
281 		error("failed to set filter with %d (%s)\n", errno,
282 			strerror(errno));
283 		exit(-1);
284 	}
285 
286 	if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
287 		if (errno == EPERM)
288 			die("Permission error mapping pages.\n"
289 			    "Consider increasing "
290 			    "/proc/sys/kernel/perf_event_mlock_kb,\n"
291 			    "or try again with a smaller value of -m/--mmap_pages.\n"
292 			    "(current value: %d)\n", opts->mmap_pages);
293 		else if (!is_power_of_2(opts->mmap_pages))
294 			die("--mmap_pages/-m value must be a power of two.");
295 
296 		die("failed to mmap with %d (%s)\n", errno, strerror(errno));
297 	}
298 
299 	if (rec->file_new)
300 		session->evlist = evlist;
301 	else {
302 		if (!perf_evlist__equal(session->evlist, evlist)) {
303 			fprintf(stderr, "incompatible append\n");
304 			exit(-1);
305 		}
306  	}
307 
308 	perf_session__update_sample_type(session);
309 }
310 
process_buildids(struct perf_record * rec)311 static int process_buildids(struct perf_record *rec)
312 {
313 	u64 size = lseek(rec->output, 0, SEEK_CUR);
314 
315 	if (size == 0)
316 		return 0;
317 
318 	rec->session->fd = rec->output;
319 	return __perf_session__process_events(rec->session, rec->post_processing_offset,
320 					      size - rec->post_processing_offset,
321 					      size, &build_id__mark_dso_hit_ops);
322 }
323 
perf_record__exit(int status __used,void * arg)324 static void perf_record__exit(int status __used, void *arg)
325 {
326 	struct perf_record *rec = arg;
327 
328 	if (!rec->opts.pipe_output) {
329 		rec->session->header.data_size += rec->bytes_written;
330 
331 		if (!rec->no_buildid)
332 			process_buildids(rec);
333 		perf_session__write_header(rec->session, rec->evlist,
334 					   rec->output, true);
335 		perf_session__delete(rec->session);
336 		perf_evlist__delete(rec->evlist);
337 		symbol__exit();
338 	}
339 }
340 
perf_event__synthesize_guest_os(struct machine * machine,void * data)341 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
342 {
343 	int err;
344 	struct perf_tool *tool = data;
345 
346 	if (machine__is_host(machine))
347 		return;
348 
349 	/*
350 	 *As for guest kernel when processing subcommand record&report,
351 	 *we arrange module mmap prior to guest kernel mmap and trigger
352 	 *a preload dso because default guest module symbols are loaded
353 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
354 	 *method is used to avoid symbol missing when the first addr is
355 	 *in module instead of in guest kernel.
356 	 */
357 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
358 					     machine);
359 	if (err < 0)
360 		pr_err("Couldn't record guest kernel [%d]'s reference"
361 		       " relocation symbol.\n", machine->pid);
362 
363 	/*
364 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
365 	 * have no _text sometimes.
366 	 */
367 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
368 						 machine, "_text");
369 	if (err < 0)
370 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
371 							 machine, "_stext");
372 	if (err < 0)
373 		pr_err("Couldn't record guest kernel [%d]'s reference"
374 		       " relocation symbol.\n", machine->pid);
375 }
376 
377 static struct perf_event_header finished_round_event = {
378 	.size = sizeof(struct perf_event_header),
379 	.type = PERF_RECORD_FINISHED_ROUND,
380 };
381 
perf_record__mmap_read_all(struct perf_record * rec)382 static void perf_record__mmap_read_all(struct perf_record *rec)
383 {
384 	int i;
385 
386 	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
387 		if (rec->evlist->mmap[i].base)
388 			perf_record__mmap_read(rec, &rec->evlist->mmap[i]);
389 	}
390 
391 	if (perf_header__has_feat(&rec->session->header, HEADER_TRACE_INFO))
392 		write_output(rec, &finished_round_event, sizeof(finished_round_event));
393 }
394 
__cmd_record(struct perf_record * rec,int argc,const char ** argv)395 static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
396 {
397 	struct stat st;
398 	int flags;
399 	int err, output;
400 	unsigned long waking = 0;
401 	const bool forks = argc > 0;
402 	struct machine *machine;
403 	struct perf_tool *tool = &rec->tool;
404 	struct perf_record_opts *opts = &rec->opts;
405 	struct perf_evlist *evsel_list = rec->evlist;
406 	const char *output_name = rec->output_name;
407 	struct perf_session *session;
408 
409 	rec->progname = argv[0];
410 
411 	rec->page_size = sysconf(_SC_PAGE_SIZE);
412 
413 	on_exit(perf_record__sig_exit, rec);
414 	signal(SIGCHLD, sig_handler);
415 	signal(SIGINT, sig_handler);
416 	signal(SIGUSR1, sig_handler);
417 
418 	if (!output_name) {
419 		if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
420 			opts->pipe_output = true;
421 		else
422 			rec->output_name = output_name = "perf.data";
423 	}
424 	if (output_name) {
425 		if (!strcmp(output_name, "-"))
426 			opts->pipe_output = true;
427 		else if (!stat(output_name, &st) && st.st_size) {
428 			if (rec->write_mode == WRITE_FORCE) {
429 				char oldname[PATH_MAX];
430 				snprintf(oldname, sizeof(oldname), "%s.old",
431 					 output_name);
432 				unlink(oldname);
433 				rename(output_name, oldname);
434 			}
435 		} else if (rec->write_mode == WRITE_APPEND) {
436 			rec->write_mode = WRITE_FORCE;
437 		}
438 	}
439 
440 	flags = O_CREAT|O_RDWR;
441 	if (rec->write_mode == WRITE_APPEND)
442 		rec->file_new = 0;
443 	else
444 		flags |= O_TRUNC;
445 
446 	if (opts->pipe_output)
447 		output = STDOUT_FILENO;
448 	else
449 		output = open(output_name, flags, S_IRUSR | S_IWUSR);
450 	if (output < 0) {
451 		perror("failed to create output file");
452 		exit(-1);
453 	}
454 
455 	rec->output = output;
456 
457 	session = perf_session__new(output_name, O_WRONLY,
458 				    rec->write_mode == WRITE_FORCE, false, NULL);
459 	if (session == NULL) {
460 		pr_err("Not enough memory for reading perf file header\n");
461 		return -1;
462 	}
463 
464 	rec->session = session;
465 
466 	if (!rec->no_buildid)
467 		perf_header__set_feat(&session->header, HEADER_BUILD_ID);
468 
469 	if (!rec->file_new) {
470 		err = perf_session__read_header(session, output);
471 		if (err < 0)
472 			goto out_delete_session;
473 	}
474 
475 	if (have_tracepoints(&evsel_list->entries))
476 		perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
477 
478 	perf_header__set_feat(&session->header, HEADER_HOSTNAME);
479 	perf_header__set_feat(&session->header, HEADER_OSRELEASE);
480 	perf_header__set_feat(&session->header, HEADER_ARCH);
481 	perf_header__set_feat(&session->header, HEADER_CPUDESC);
482 	perf_header__set_feat(&session->header, HEADER_NRCPUS);
483 	perf_header__set_feat(&session->header, HEADER_EVENT_DESC);
484 	perf_header__set_feat(&session->header, HEADER_CMDLINE);
485 	perf_header__set_feat(&session->header, HEADER_VERSION);
486 	perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY);
487 	perf_header__set_feat(&session->header, HEADER_TOTAL_MEM);
488 	perf_header__set_feat(&session->header, HEADER_NUMA_TOPOLOGY);
489 	perf_header__set_feat(&session->header, HEADER_CPUID);
490 
491 	if (forks) {
492 		err = perf_evlist__prepare_workload(evsel_list, opts, argv);
493 		if (err < 0) {
494 			pr_err("Couldn't run the workload!\n");
495 			goto out_delete_session;
496 		}
497 	}
498 
499 	perf_record__open(rec);
500 
501 	/*
502 	 * perf_session__delete(session) will be called at perf_record__exit()
503 	 */
504 	on_exit(perf_record__exit, rec);
505 
506 	if (opts->pipe_output) {
507 		err = perf_header__write_pipe(output);
508 		if (err < 0)
509 			return err;
510 	} else if (rec->file_new) {
511 		err = perf_session__write_header(session, evsel_list,
512 						 output, false);
513 		if (err < 0)
514 			return err;
515 	}
516 
517 	if (!rec->no_buildid
518 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
519 		pr_err("Couldn't generate buildids. "
520 		       "Use --no-buildid to profile anyway.\n");
521 		return -1;
522 	}
523 
524 	rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
525 
526 	machine = perf_session__find_host_machine(session);
527 	if (!machine) {
528 		pr_err("Couldn't find native kernel information.\n");
529 		return -1;
530 	}
531 
532 	if (opts->pipe_output) {
533 		err = perf_event__synthesize_attrs(tool, session,
534 						   process_synthesized_event);
535 		if (err < 0) {
536 			pr_err("Couldn't synthesize attrs.\n");
537 			return err;
538 		}
539 
540 		err = perf_event__synthesize_event_types(tool, process_synthesized_event,
541 							 machine);
542 		if (err < 0) {
543 			pr_err("Couldn't synthesize event_types.\n");
544 			return err;
545 		}
546 
547 		if (have_tracepoints(&evsel_list->entries)) {
548 			/*
549 			 * FIXME err <= 0 here actually means that
550 			 * there were no tracepoints so its not really
551 			 * an error, just that we don't need to
552 			 * synthesize anything.  We really have to
553 			 * return this more properly and also
554 			 * propagate errors that now are calling die()
555 			 */
556 			err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
557 								  process_synthesized_event);
558 			if (err <= 0) {
559 				pr_err("Couldn't record tracing data.\n");
560 				return err;
561 			}
562 			advance_output(rec, err);
563 		}
564 	}
565 
566 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
567 						 machine, "_text");
568 	if (err < 0)
569 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
570 							 machine, "_stext");
571 	if (err < 0)
572 		pr_err("Couldn't record kernel reference relocation symbol\n"
573 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
574 		       "Check /proc/kallsyms permission or run as root.\n");
575 
576 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
577 					     machine);
578 	if (err < 0)
579 		pr_err("Couldn't record kernel module information.\n"
580 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
581 		       "Check /proc/modules permission or run as root.\n");
582 
583 	if (perf_guest)
584 		perf_session__process_machines(session, tool,
585 					       perf_event__synthesize_guest_os);
586 
587 	if (!opts->system_wide)
588 		perf_event__synthesize_thread_map(tool, evsel_list->threads,
589 						  process_synthesized_event,
590 						  machine);
591 	else
592 		perf_event__synthesize_threads(tool, process_synthesized_event,
593 					       machine);
594 
595 	if (rec->realtime_prio) {
596 		struct sched_param param;
597 
598 		param.sched_priority = rec->realtime_prio;
599 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
600 			pr_err("Could not set realtime priority.\n");
601 			exit(-1);
602 		}
603 	}
604 
605 	perf_evlist__enable(evsel_list);
606 
607 	/*
608 	 * Let the child rip
609 	 */
610 	if (forks)
611 		perf_evlist__start_workload(evsel_list);
612 
613 	for (;;) {
614 		int hits = rec->samples;
615 
616 		perf_record__mmap_read_all(rec);
617 
618 		if (hits == rec->samples) {
619 			if (done)
620 				break;
621 			err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
622 			waking++;
623 		}
624 
625 		if (done)
626 			perf_evlist__disable(evsel_list);
627 	}
628 
629 	if (quiet || signr == SIGUSR1)
630 		return 0;
631 
632 	fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
633 
634 	/*
635 	 * Approximate RIP event size: 24 bytes.
636 	 */
637 	fprintf(stderr,
638 		"[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
639 		(double)rec->bytes_written / 1024.0 / 1024.0,
640 		output_name,
641 		rec->bytes_written / 24);
642 
643 	return 0;
644 
645 out_delete_session:
646 	perf_session__delete(session);
647 	return err;
648 }
649 
650 static const char * const record_usage[] = {
651 	"perf record [<options>] [<command>]",
652 	"perf record [<options>] -- <command> [<options>]",
653 	NULL
654 };
655 
656 /*
657  * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
658  * because we need to have access to it in perf_record__exit, that is called
659  * after cmd_record() exits, but since record_options need to be accessible to
660  * builtin-script, leave it here.
661  *
662  * At least we don't ouch it in all the other functions here directly.
663  *
664  * Just say no to tons of global variables, sigh.
665  */
666 static struct perf_record record = {
667 	.opts = {
668 		.target_pid	     = -1,
669 		.target_tid	     = -1,
670 		.mmap_pages	     = UINT_MAX,
671 		.user_freq	     = UINT_MAX,
672 		.user_interval	     = ULLONG_MAX,
673 		.freq		     = 1000,
674 		.sample_id_all_avail = true,
675 	},
676 	.write_mode = WRITE_FORCE,
677 	.file_new   = true,
678 };
679 
680 /*
681  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
682  * with it and switch to use the library functions in perf_evlist that came
683  * from builtin-record.c, i.e. use perf_record_opts,
684  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
685  * using pipes, etc.
686  */
687 const struct option record_options[] = {
688 	OPT_CALLBACK('e', "event", &record.evlist, "event",
689 		     "event selector. use 'perf list' to list available events",
690 		     parse_events_option),
691 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
692 		     "event filter", parse_filter),
693 	OPT_INTEGER('p', "pid", &record.opts.target_pid,
694 		    "record events on existing process id"),
695 	OPT_INTEGER('t', "tid", &record.opts.target_tid,
696 		    "record events on existing thread id"),
697 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
698 		    "collect data with this RT SCHED_FIFO priority"),
699 	OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
700 		    "collect data without buffering"),
701 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
702 		    "collect raw sample records from all opened counters"),
703 	OPT_BOOLEAN('a', "all-cpus", &record.opts.system_wide,
704 			    "system-wide collection from all CPUs"),
705 	OPT_BOOLEAN('A', "append", &record.append_file,
706 			    "append to the output file to do incremental profiling"),
707 	OPT_STRING('C', "cpu", &record.opts.cpu_list, "cpu",
708 		    "list of cpus to monitor"),
709 	OPT_BOOLEAN('f', "force", &record.force,
710 			"overwrite existing data file (deprecated)"),
711 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
712 	OPT_STRING('o', "output", &record.output_name, "file",
713 		    "output file name"),
714 	OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
715 		    "child tasks do not inherit counters"),
716 	OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
717 	OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
718 		     "number of mmap data pages"),
719 	OPT_BOOLEAN(0, "group", &record.opts.group,
720 		    "put the counters into a counter group"),
721 	OPT_BOOLEAN('g', "call-graph", &record.opts.call_graph,
722 		    "do call-graph (stack chain/backtrace) recording"),
723 	OPT_INCR('v', "verbose", &verbose,
724 		    "be more verbose (show counter open errors, etc)"),
725 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
726 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
727 		    "per thread counts"),
728 	OPT_BOOLEAN('d', "data", &record.opts.sample_address,
729 		    "Sample addresses"),
730 	OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
731 	OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
732 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
733 		    "don't sample"),
734 	OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
735 		    "do not update the buildid cache"),
736 	OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
737 		    "do not collect buildids in perf.data"),
738 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
739 		     "monitor event in cgroup name only",
740 		     parse_cgroups),
741 	OPT_END()
742 };
743 
cmd_record(int argc,const char ** argv,const char * prefix __used)744 int cmd_record(int argc, const char **argv, const char *prefix __used)
745 {
746 	int err = -ENOMEM;
747 	struct perf_evsel *pos;
748 	struct perf_evlist *evsel_list;
749 	struct perf_record *rec = &record;
750 
751 	perf_header__set_cmdline(argc, argv);
752 
753 	evsel_list = perf_evlist__new(NULL, NULL);
754 	if (evsel_list == NULL)
755 		return -ENOMEM;
756 
757 	rec->evlist = evsel_list;
758 
759 	argc = parse_options(argc, argv, record_options, record_usage,
760 			    PARSE_OPT_STOP_AT_NON_OPTION);
761 	if (!argc && rec->opts.target_pid == -1 && rec->opts.target_tid == -1 &&
762 		!rec->opts.system_wide && !rec->opts.cpu_list)
763 		usage_with_options(record_usage, record_options);
764 
765 	if (rec->force && rec->append_file) {
766 		fprintf(stderr, "Can't overwrite and append at the same time."
767 				" You need to choose between -f and -A");
768 		usage_with_options(record_usage, record_options);
769 	} else if (rec->append_file) {
770 		rec->write_mode = WRITE_APPEND;
771 	} else {
772 		rec->write_mode = WRITE_FORCE;
773 	}
774 
775 	if (nr_cgroups && !rec->opts.system_wide) {
776 		fprintf(stderr, "cgroup monitoring only available in"
777 			" system-wide mode\n");
778 		usage_with_options(record_usage, record_options);
779 	}
780 
781 	symbol__init();
782 
783 	if (symbol_conf.kptr_restrict)
784 		pr_warning(
785 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
786 "check /proc/sys/kernel/kptr_restrict.\n\n"
787 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
788 "file is not found in the buildid cache or in the vmlinux path.\n\n"
789 "Samples in kernel modules won't be resolved at all.\n\n"
790 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
791 "even with a suitable vmlinux or kallsyms file.\n\n");
792 
793 	if (rec->no_buildid_cache || rec->no_buildid)
794 		disable_buildid_cache();
795 
796 	if (evsel_list->nr_entries == 0 &&
797 	    perf_evlist__add_default(evsel_list) < 0) {
798 		pr_err("Not enough memory for event selector list\n");
799 		goto out_symbol_exit;
800 	}
801 
802 	if (rec->opts.target_pid != -1)
803 		rec->opts.target_tid = rec->opts.target_pid;
804 
805 	if (perf_evlist__create_maps(evsel_list, rec->opts.target_pid,
806 				     rec->opts.target_tid, rec->opts.cpu_list) < 0)
807 		usage_with_options(record_usage, record_options);
808 
809 	list_for_each_entry(pos, &evsel_list->entries, node) {
810 		if (perf_header__push_event(pos->attr.config, event_name(pos)))
811 			goto out_free_fd;
812 	}
813 
814 	if (rec->opts.user_interval != ULLONG_MAX)
815 		rec->opts.default_interval = rec->opts.user_interval;
816 	if (rec->opts.user_freq != UINT_MAX)
817 		rec->opts.freq = rec->opts.user_freq;
818 
819 	/*
820 	 * User specified count overrides default frequency.
821 	 */
822 	if (rec->opts.default_interval)
823 		rec->opts.freq = 0;
824 	else if (rec->opts.freq) {
825 		rec->opts.default_interval = rec->opts.freq;
826 	} else {
827 		fprintf(stderr, "frequency and count are zero, aborting\n");
828 		err = -EINVAL;
829 		goto out_free_fd;
830 	}
831 
832 	err = __cmd_record(&record, argc, argv);
833 out_free_fd:
834 	perf_evlist__delete_maps(evsel_list);
835 out_symbol_exit:
836 	symbol__exit();
837 	return err;
838 }
839