1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * builtin-record.c 4 * 5 * Builtin record command: Record the profile of a workload 6 * (or a CPU, or a PID) into the perf.data output file - for 7 * later analysis via perf report. 8 */ 9 #include "builtin.h" 10 11 #include "util/build-id.h" 12 #include <subcmd/parse-options.h> 13 #include <internal/xyarray.h> 14 #include "util/parse-events.h" 15 #include "util/config.h" 16 17 #include "util/callchain.h" 18 #include "util/cgroup.h" 19 #include "util/header.h" 20 #include "util/event.h" 21 #include "util/evlist.h" 22 #include "util/evsel.h" 23 #include "util/debug.h" 24 #include "util/mmap.h" 25 #include "util/mutex.h" 26 #include "util/target.h" 27 #include "util/session.h" 28 #include "util/tool.h" 29 #include "util/symbol.h" 30 #include "util/record.h" 31 #include "util/cpumap.h" 32 #include "util/thread_map.h" 33 #include "util/data.h" 34 #include "util/perf_regs.h" 35 #include "util/auxtrace.h" 36 #include "util/tsc.h" 37 #include "util/parse-branch-options.h" 38 #include "util/parse-regs-options.h" 39 #include "util/perf_api_probe.h" 40 #include "util/trigger.h" 41 #include "util/perf-hooks.h" 42 #include "util/cpu-set-sched.h" 43 #include "util/synthetic-events.h" 44 #include "util/time-utils.h" 45 #include "util/units.h" 46 #include "util/bpf-event.h" 47 #include "util/util.h" 48 #include "util/pfm.h" 49 #include "util/pmu.h" 50 #include "util/pmus.h" 51 #include "util/clockid.h" 52 #include "util/off_cpu.h" 53 #include "util/bpf-filter.h" 54 #include "asm/bug.h" 55 #include "perf.h" 56 #include "cputopo.h" 57 58 #include <errno.h> 59 #include <inttypes.h> 60 #include <locale.h> 61 #include <poll.h> 62 #include <pthread.h> 63 #include <unistd.h> 64 #ifndef HAVE_GETTID 65 #include <syscall.h> 66 #endif 67 #include <sched.h> 68 #include <signal.h> 69 #ifdef HAVE_EVENTFD_SUPPORT 70 #include <sys/eventfd.h> 71 #endif 72 #include <sys/mman.h> 73 #include <sys/wait.h> 74 #include <sys/types.h> 75 #include <sys/stat.h> 76 #include <fcntl.h> 77 #include <linux/err.h> 78 #include <linux/string.h> 79 #include <linux/time64.h> 80 #include <linux/zalloc.h> 81 #include <linux/bitmap.h> 82 #include <sys/time.h> 83 84 struct switch_output { 85 bool enabled; 86 bool signal; 87 unsigned long size; 88 unsigned long time; 89 const char *str; 90 bool set; 91 char **filenames; 92 int num_files; 93 int cur_file; 94 }; 95 96 struct thread_mask { 97 struct mmap_cpu_mask maps; 98 struct mmap_cpu_mask affinity; 99 }; 100 101 struct record_thread { 102 pid_t tid; 103 struct thread_mask *mask; 104 struct { 105 int msg[2]; 106 int ack[2]; 107 } pipes; 108 struct fdarray pollfd; 109 int ctlfd_pos; 110 int nr_mmaps; 111 struct mmap **maps; 112 struct mmap **overwrite_maps; 113 struct record *rec; 114 unsigned long long samples; 115 unsigned long waking; 116 u64 bytes_written; 117 u64 bytes_transferred; 118 u64 bytes_compressed; 119 }; 120 121 static __thread struct record_thread *thread; 122 123 enum thread_msg { 124 THREAD_MSG__UNDEFINED = 0, 125 THREAD_MSG__READY, 126 THREAD_MSG__MAX, 127 }; 128 129 static const char *thread_msg_tags[THREAD_MSG__MAX] = { 130 "UNDEFINED", "READY" 131 }; 132 133 enum thread_spec { 134 THREAD_SPEC__UNDEFINED = 0, 135 THREAD_SPEC__CPU, 136 THREAD_SPEC__CORE, 137 THREAD_SPEC__PACKAGE, 138 THREAD_SPEC__NUMA, 139 THREAD_SPEC__USER, 140 THREAD_SPEC__MAX, 141 }; 142 143 static const char *thread_spec_tags[THREAD_SPEC__MAX] = { 144 "undefined", "cpu", "core", "package", "numa", "user" 145 }; 146 147 struct pollfd_index_map { 148 int evlist_pollfd_index; 149 int thread_pollfd_index; 150 }; 151 152 struct record { 153 struct perf_tool tool; 154 struct record_opts opts; 155 u64 bytes_written; 156 u64 thread_bytes_written; 157 struct perf_data data; 158 struct auxtrace_record *itr; 159 struct evlist *evlist; 160 struct perf_session *session; 161 struct evlist *sb_evlist; 162 pthread_t thread_id; 163 int realtime_prio; 164 bool latency; 165 bool switch_output_event_set; 166 bool no_buildid; 167 bool no_buildid_set; 168 bool no_buildid_cache; 169 bool no_buildid_cache_set; 170 bool buildid_all; 171 bool buildid_mmap; 172 bool timestamp_filename; 173 bool timestamp_boundary; 174 bool off_cpu; 175 const char *filter_action; 176 struct switch_output switch_output; 177 unsigned long long samples; 178 unsigned long output_max_size; /* = 0: unlimited */ 179 struct perf_debuginfod debuginfod; 180 int nr_threads; 181 struct thread_mask *thread_masks; 182 struct record_thread *thread_data; 183 struct pollfd_index_map *index_map; 184 size_t index_map_sz; 185 size_t index_map_cnt; 186 }; 187 188 static volatile int done; 189 190 static volatile int auxtrace_record__snapshot_started; 191 static DEFINE_TRIGGER(auxtrace_snapshot_trigger); 192 static DEFINE_TRIGGER(switch_output_trigger); 193 194 static const char *affinity_tags[PERF_AFFINITY_MAX] = { 195 "SYS", "NODE", "CPU" 196 }; 197 198 static int build_id__process_mmap(const struct perf_tool *tool, union perf_event *event, 199 struct perf_sample *sample, struct machine *machine); 200 static int build_id__process_mmap2(const struct perf_tool *tool, union perf_event *event, 201 struct perf_sample *sample, struct machine *machine); 202 static int process_timestamp_boundary(const struct perf_tool *tool, 203 union perf_event *event, 204 struct perf_sample *sample, 205 struct machine *machine); 206 207 #ifndef HAVE_GETTID 208 static inline pid_t gettid(void) 209 { 210 return (pid_t)syscall(__NR_gettid); 211 } 212 #endif 213 214 static int record__threads_enabled(struct record *rec) 215 { 216 return rec->opts.threads_spec; 217 } 218 219 static bool switch_output_signal(struct record *rec) 220 { 221 return rec->switch_output.signal && 222 trigger_is_ready(&switch_output_trigger); 223 } 224 225 static bool switch_output_size(struct record *rec) 226 { 227 return rec->switch_output.size && 228 trigger_is_ready(&switch_output_trigger) && 229 (rec->bytes_written >= rec->switch_output.size); 230 } 231 232 static bool switch_output_time(struct record *rec) 233 { 234 return rec->switch_output.time && 235 trigger_is_ready(&switch_output_trigger); 236 } 237 238 static u64 record__bytes_written(struct record *rec) 239 { 240 return rec->bytes_written + rec->thread_bytes_written; 241 } 242 243 static bool record__output_max_size_exceeded(struct record *rec) 244 { 245 return rec->output_max_size && 246 (record__bytes_written(rec) >= rec->output_max_size); 247 } 248 249 static int record__write(struct record *rec, struct mmap *map __maybe_unused, 250 void *bf, size_t size) 251 { 252 struct perf_data_file *file = &rec->session->data->file; 253 254 if (map && map->file) 255 file = map->file; 256 257 if (perf_data_file__write(file, bf, size) < 0) { 258 pr_err("failed to write perf data, error: %m\n"); 259 return -1; 260 } 261 262 if (map && map->file) { 263 thread->bytes_written += size; 264 rec->thread_bytes_written += size; 265 } else { 266 rec->bytes_written += size; 267 } 268 269 if (record__output_max_size_exceeded(rec) && !done) { 270 fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB)," 271 " stopping session ]\n", 272 record__bytes_written(rec) >> 10); 273 done = 1; 274 } 275 276 if (switch_output_size(rec)) 277 trigger_hit(&switch_output_trigger); 278 279 return 0; 280 } 281 282 static int record__aio_enabled(struct record *rec); 283 static int record__comp_enabled(struct record *rec); 284 static ssize_t zstd_compress(struct perf_session *session, struct mmap *map, 285 void *dst, size_t dst_size, void *src, size_t src_size); 286 287 #ifdef HAVE_AIO_SUPPORT 288 static int record__aio_write(struct aiocb *cblock, int trace_fd, 289 void *buf, size_t size, off_t off) 290 { 291 int rc; 292 293 cblock->aio_fildes = trace_fd; 294 cblock->aio_buf = buf; 295 cblock->aio_nbytes = size; 296 cblock->aio_offset = off; 297 cblock->aio_sigevent.sigev_notify = SIGEV_NONE; 298 299 do { 300 rc = aio_write(cblock); 301 if (rc == 0) { 302 break; 303 } else if (errno != EAGAIN) { 304 cblock->aio_fildes = -1; 305 pr_err("failed to queue perf data, error: %m\n"); 306 break; 307 } 308 } while (1); 309 310 return rc; 311 } 312 313 static int record__aio_complete(struct mmap *md, struct aiocb *cblock) 314 { 315 void *rem_buf; 316 off_t rem_off; 317 size_t rem_size; 318 int rc, aio_errno; 319 ssize_t aio_ret, written; 320 321 aio_errno = aio_error(cblock); 322 if (aio_errno == EINPROGRESS) 323 return 0; 324 325 written = aio_ret = aio_return(cblock); 326 if (aio_ret < 0) { 327 if (aio_errno != EINTR) 328 pr_err("failed to write perf data, error: %m\n"); 329 written = 0; 330 } 331 332 rem_size = cblock->aio_nbytes - written; 333 334 if (rem_size == 0) { 335 cblock->aio_fildes = -1; 336 /* 337 * md->refcount is incremented in record__aio_pushfn() for 338 * every aio write request started in record__aio_push() so 339 * decrement it because the request is now complete. 340 */ 341 perf_mmap__put(&md->core); 342 rc = 1; 343 } else { 344 /* 345 * aio write request may require restart with the 346 * remainder if the kernel didn't write whole 347 * chunk at once. 348 */ 349 rem_off = cblock->aio_offset + written; 350 rem_buf = (void *)(cblock->aio_buf + written); 351 record__aio_write(cblock, cblock->aio_fildes, 352 rem_buf, rem_size, rem_off); 353 rc = 0; 354 } 355 356 return rc; 357 } 358 359 static int record__aio_sync(struct mmap *md, bool sync_all) 360 { 361 struct aiocb **aiocb = md->aio.aiocb; 362 struct aiocb *cblocks = md->aio.cblocks; 363 struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */ 364 int i, do_suspend; 365 366 do { 367 do_suspend = 0; 368 for (i = 0; i < md->aio.nr_cblocks; ++i) { 369 if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) { 370 if (sync_all) 371 aiocb[i] = NULL; 372 else 373 return i; 374 } else { 375 /* 376 * Started aio write is not complete yet 377 * so it has to be waited before the 378 * next allocation. 379 */ 380 aiocb[i] = &cblocks[i]; 381 do_suspend = 1; 382 } 383 } 384 if (!do_suspend) 385 return -1; 386 387 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) { 388 if (!(errno == EAGAIN || errno == EINTR)) 389 pr_err("failed to sync perf data, error: %m\n"); 390 } 391 } while (1); 392 } 393 394 struct record_aio { 395 struct record *rec; 396 void *data; 397 size_t size; 398 }; 399 400 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size) 401 { 402 struct record_aio *aio = to; 403 404 /* 405 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer 406 * to release space in the kernel buffer as fast as possible, calling 407 * perf_mmap__consume() from perf_mmap__push() function. 408 * 409 * That lets the kernel to proceed with storing more profiling data into 410 * the kernel buffer earlier than other per-cpu kernel buffers are handled. 411 * 412 * Coping can be done in two steps in case the chunk of profiling data 413 * crosses the upper bound of the kernel buffer. In this case we first move 414 * part of data from map->start till the upper bound and then the remainder 415 * from the beginning of the kernel buffer till the end of the data chunk. 416 */ 417 418 if (record__comp_enabled(aio->rec)) { 419 ssize_t compressed = zstd_compress(aio->rec->session, NULL, aio->data + aio->size, 420 mmap__mmap_len(map) - aio->size, 421 buf, size); 422 if (compressed < 0) 423 return (int)compressed; 424 425 size = compressed; 426 } else { 427 memcpy(aio->data + aio->size, buf, size); 428 } 429 430 if (!aio->size) { 431 /* 432 * Increment map->refcount to guard map->aio.data[] buffer 433 * from premature deallocation because map object can be 434 * released earlier than aio write request started on 435 * map->aio.data[] buffer is complete. 436 * 437 * perf_mmap__put() is done at record__aio_complete() 438 * after started aio request completion or at record__aio_push() 439 * if the request failed to start. 440 */ 441 perf_mmap__get(&map->core); 442 } 443 444 aio->size += size; 445 446 return size; 447 } 448 449 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off) 450 { 451 int ret, idx; 452 int trace_fd = rec->session->data->file.fd; 453 struct record_aio aio = { .rec = rec, .size = 0 }; 454 455 /* 456 * Call record__aio_sync() to wait till map->aio.data[] buffer 457 * becomes available after previous aio write operation. 458 */ 459 460 idx = record__aio_sync(map, false); 461 aio.data = map->aio.data[idx]; 462 ret = perf_mmap__push(map, &aio, record__aio_pushfn); 463 if (ret != 0) /* ret > 0 - no data, ret < 0 - error */ 464 return ret; 465 466 rec->samples++; 467 ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off); 468 if (!ret) { 469 *off += aio.size; 470 rec->bytes_written += aio.size; 471 if (switch_output_size(rec)) 472 trigger_hit(&switch_output_trigger); 473 } else { 474 /* 475 * Decrement map->refcount incremented in record__aio_pushfn() 476 * back if record__aio_write() operation failed to start, otherwise 477 * map->refcount is decremented in record__aio_complete() after 478 * aio write operation finishes successfully. 479 */ 480 perf_mmap__put(&map->core); 481 } 482 483 return ret; 484 } 485 486 static off_t record__aio_get_pos(int trace_fd) 487 { 488 return lseek(trace_fd, 0, SEEK_CUR); 489 } 490 491 static void record__aio_set_pos(int trace_fd, off_t pos) 492 { 493 lseek(trace_fd, pos, SEEK_SET); 494 } 495 496 static void record__aio_mmap_read_sync(struct record *rec) 497 { 498 int i; 499 struct evlist *evlist = rec->evlist; 500 struct mmap *maps = evlist->mmap; 501 502 if (!record__aio_enabled(rec)) 503 return; 504 505 for (i = 0; i < evlist->core.nr_mmaps; i++) { 506 struct mmap *map = &maps[i]; 507 508 if (map->core.base) 509 record__aio_sync(map, true); 510 } 511 } 512 513 static int nr_cblocks_default = 1; 514 static int nr_cblocks_max = 4; 515 516 static int record__aio_parse(const struct option *opt, 517 const char *str, 518 int unset) 519 { 520 struct record_opts *opts = (struct record_opts *)opt->value; 521 522 if (unset) { 523 opts->nr_cblocks = 0; 524 } else { 525 if (str) 526 opts->nr_cblocks = strtol(str, NULL, 0); 527 if (!opts->nr_cblocks) 528 opts->nr_cblocks = nr_cblocks_default; 529 } 530 531 return 0; 532 } 533 #else /* HAVE_AIO_SUPPORT */ 534 static int nr_cblocks_max = 0; 535 536 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused, 537 off_t *off __maybe_unused) 538 { 539 return -1; 540 } 541 542 static off_t record__aio_get_pos(int trace_fd __maybe_unused) 543 { 544 return -1; 545 } 546 547 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused) 548 { 549 } 550 551 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused) 552 { 553 } 554 #endif 555 556 static int record__aio_enabled(struct record *rec) 557 { 558 return rec->opts.nr_cblocks > 0; 559 } 560 561 #define MMAP_FLUSH_DEFAULT 1 562 static int record__mmap_flush_parse(const struct option *opt, 563 const char *str, 564 int unset) 565 { 566 int flush_max; 567 struct record_opts *opts = (struct record_opts *)opt->value; 568 static struct parse_tag tags[] = { 569 { .tag = 'B', .mult = 1 }, 570 { .tag = 'K', .mult = 1 << 10 }, 571 { .tag = 'M', .mult = 1 << 20 }, 572 { .tag = 'G', .mult = 1 << 30 }, 573 { .tag = 0 }, 574 }; 575 576 if (unset) 577 return 0; 578 579 if (str) { 580 opts->mmap_flush = parse_tag_value(str, tags); 581 if (opts->mmap_flush == (int)-1) 582 opts->mmap_flush = strtol(str, NULL, 0); 583 } 584 585 if (!opts->mmap_flush) 586 opts->mmap_flush = MMAP_FLUSH_DEFAULT; 587 588 flush_max = evlist__mmap_size(opts->mmap_pages); 589 flush_max /= 4; 590 if (opts->mmap_flush > flush_max) 591 opts->mmap_flush = flush_max; 592 593 return 0; 594 } 595 596 #ifdef HAVE_ZSTD_SUPPORT 597 static unsigned int comp_level_default = 1; 598 599 static int record__parse_comp_level(const struct option *opt, const char *str, int unset) 600 { 601 struct record_opts *opts = opt->value; 602 603 if (unset) { 604 opts->comp_level = 0; 605 } else { 606 if (str) 607 opts->comp_level = strtol(str, NULL, 0); 608 if (!opts->comp_level) 609 opts->comp_level = comp_level_default; 610 } 611 612 return 0; 613 } 614 #endif 615 static unsigned int comp_level_max = 22; 616 617 static int record__comp_enabled(struct record *rec) 618 { 619 return rec->opts.comp_level > 0; 620 } 621 622 static int process_synthesized_event(const struct perf_tool *tool, 623 union perf_event *event, 624 struct perf_sample *sample __maybe_unused, 625 struct machine *machine __maybe_unused) 626 { 627 struct record *rec = container_of(tool, struct record, tool); 628 return record__write(rec, NULL, event, event->header.size); 629 } 630 631 static struct mutex synth_lock; 632 633 static int process_locked_synthesized_event(const struct perf_tool *tool, 634 union perf_event *event, 635 struct perf_sample *sample __maybe_unused, 636 struct machine *machine __maybe_unused) 637 { 638 int ret; 639 640 mutex_lock(&synth_lock); 641 ret = process_synthesized_event(tool, event, sample, machine); 642 mutex_unlock(&synth_lock); 643 return ret; 644 } 645 646 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size) 647 { 648 struct record *rec = to; 649 650 if (record__comp_enabled(rec)) { 651 ssize_t compressed = zstd_compress(rec->session, map, map->data, 652 mmap__mmap_len(map), bf, size); 653 654 if (compressed < 0) 655 return (int)compressed; 656 657 size = compressed; 658 bf = map->data; 659 } 660 661 thread->samples++; 662 return record__write(rec, map, bf, size); 663 } 664 665 static volatile sig_atomic_t signr = -1; 666 static volatile sig_atomic_t child_finished; 667 #ifdef HAVE_EVENTFD_SUPPORT 668 static volatile sig_atomic_t done_fd = -1; 669 #endif 670 671 static void sig_handler(int sig) 672 { 673 if (sig == SIGCHLD) 674 child_finished = 1; 675 else 676 signr = sig; 677 678 done = 1; 679 #ifdef HAVE_EVENTFD_SUPPORT 680 if (done_fd >= 0) { 681 u64 tmp = 1; 682 int orig_errno = errno; 683 684 /* 685 * It is possible for this signal handler to run after done is 686 * checked in the main loop, but before the perf counter fds are 687 * polled. If this happens, the poll() will continue to wait 688 * even though done is set, and will only break out if either 689 * another signal is received, or the counters are ready for 690 * read. To ensure the poll() doesn't sleep when done is set, 691 * use an eventfd (done_fd) to wake up the poll(). 692 */ 693 if (write(done_fd, &tmp, sizeof(tmp)) < 0) 694 pr_err("failed to signal wakeup fd, error: %m\n"); 695 696 errno = orig_errno; 697 } 698 #endif // HAVE_EVENTFD_SUPPORT 699 } 700 701 static void sigsegv_handler(int sig) 702 { 703 perf_hooks__recover(); 704 sighandler_dump_stack(sig); 705 } 706 707 static void record__sig_exit(void) 708 { 709 if (signr == -1) 710 return; 711 712 signal(signr, SIG_DFL); 713 raise(signr); 714 } 715 716 #ifdef HAVE_AUXTRACE_SUPPORT 717 718 static int record__process_auxtrace(const struct perf_tool *tool, 719 struct mmap *map, 720 union perf_event *event, void *data1, 721 size_t len1, void *data2, size_t len2) 722 { 723 struct record *rec = container_of(tool, struct record, tool); 724 struct perf_data *data = &rec->data; 725 size_t padding; 726 u8 pad[8] = {0}; 727 728 if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) { 729 off_t file_offset; 730 int fd = perf_data__fd(data); 731 int err; 732 733 file_offset = lseek(fd, 0, SEEK_CUR); 734 if (file_offset == -1) 735 return -1; 736 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index, 737 event, file_offset); 738 if (err) 739 return err; 740 } 741 742 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */ 743 padding = (len1 + len2) & 7; 744 if (padding) 745 padding = 8 - padding; 746 747 record__write(rec, map, event, event->header.size); 748 record__write(rec, map, data1, len1); 749 if (len2) 750 record__write(rec, map, data2, len2); 751 record__write(rec, map, &pad, padding); 752 753 return 0; 754 } 755 756 static int record__auxtrace_mmap_read(struct record *rec, 757 struct mmap *map) 758 { 759 int ret; 760 761 ret = auxtrace_mmap__read(map, rec->itr, &rec->tool, 762 record__process_auxtrace); 763 if (ret < 0) 764 return ret; 765 766 if (ret) 767 rec->samples++; 768 769 return 0; 770 } 771 772 static int record__auxtrace_mmap_read_snapshot(struct record *rec, 773 struct mmap *map) 774 { 775 int ret; 776 777 ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool, 778 record__process_auxtrace, 779 rec->opts.auxtrace_snapshot_size); 780 if (ret < 0) 781 return ret; 782 783 if (ret) 784 rec->samples++; 785 786 return 0; 787 } 788 789 static int record__auxtrace_read_snapshot_all(struct record *rec) 790 { 791 int i; 792 int rc = 0; 793 794 for (i = 0; i < rec->evlist->core.nr_mmaps; i++) { 795 struct mmap *map = &rec->evlist->mmap[i]; 796 797 if (!map->auxtrace_mmap.base) 798 continue; 799 800 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) { 801 rc = -1; 802 goto out; 803 } 804 } 805 out: 806 return rc; 807 } 808 809 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit) 810 { 811 pr_debug("Recording AUX area tracing snapshot\n"); 812 if (record__auxtrace_read_snapshot_all(rec) < 0) { 813 trigger_error(&auxtrace_snapshot_trigger); 814 } else { 815 if (auxtrace_record__snapshot_finish(rec->itr, on_exit)) 816 trigger_error(&auxtrace_snapshot_trigger); 817 else 818 trigger_ready(&auxtrace_snapshot_trigger); 819 } 820 } 821 822 static int record__auxtrace_snapshot_exit(struct record *rec) 823 { 824 if (trigger_is_error(&auxtrace_snapshot_trigger)) 825 return 0; 826 827 if (!auxtrace_record__snapshot_started && 828 auxtrace_record__snapshot_start(rec->itr)) 829 return -1; 830 831 record__read_auxtrace_snapshot(rec, true); 832 if (trigger_is_error(&auxtrace_snapshot_trigger)) 833 return -1; 834 835 return 0; 836 } 837 838 static int record__auxtrace_init(struct record *rec) 839 { 840 int err; 841 842 if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts) 843 && record__threads_enabled(rec)) { 844 pr_err("AUX area tracing options are not available in parallel streaming mode.\n"); 845 return -EINVAL; 846 } 847 848 if (!rec->itr) { 849 rec->itr = auxtrace_record__init(rec->evlist, &err); 850 if (err) 851 return err; 852 } 853 854 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts, 855 rec->opts.auxtrace_snapshot_opts); 856 if (err) 857 return err; 858 859 err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts, 860 rec->opts.auxtrace_sample_opts); 861 if (err) 862 return err; 863 864 err = auxtrace_parse_aux_action(rec->evlist); 865 if (err) 866 return err; 867 868 return auxtrace_parse_filters(rec->evlist); 869 } 870 871 #else 872 873 static inline 874 int record__auxtrace_mmap_read(struct record *rec __maybe_unused, 875 struct mmap *map __maybe_unused) 876 { 877 return 0; 878 } 879 880 static inline 881 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused, 882 bool on_exit __maybe_unused) 883 { 884 } 885 886 static inline 887 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused) 888 { 889 return 0; 890 } 891 892 static inline 893 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused) 894 { 895 return 0; 896 } 897 898 static int record__auxtrace_init(struct record *rec __maybe_unused) 899 { 900 return 0; 901 } 902 903 #endif 904 905 static int record__config_text_poke(struct evlist *evlist) 906 { 907 struct evsel *evsel; 908 909 /* Nothing to do if text poke is already configured */ 910 evlist__for_each_entry(evlist, evsel) { 911 if (evsel->core.attr.text_poke) 912 return 0; 913 } 914 915 evsel = evlist__add_dummy_on_all_cpus(evlist); 916 if (!evsel) 917 return -ENOMEM; 918 919 evsel->core.attr.text_poke = 1; 920 evsel->core.attr.ksymbol = 1; 921 evsel->immediate = true; 922 evsel__set_sample_bit(evsel, TIME); 923 924 return 0; 925 } 926 927 static int record__config_off_cpu(struct record *rec) 928 { 929 return off_cpu_prepare(rec->evlist, &rec->opts.target, &rec->opts); 930 } 931 932 static bool record__tracking_system_wide(struct record *rec) 933 { 934 struct evlist *evlist = rec->evlist; 935 struct evsel *evsel; 936 937 /* 938 * If non-dummy evsel exists, system_wide sideband is need to 939 * help parse sample information. 940 * For example, PERF_EVENT_MMAP event to help parse symbol, 941 * and PERF_EVENT_COMM event to help parse task executable name. 942 */ 943 evlist__for_each_entry(evlist, evsel) { 944 if (!evsel__is_dummy_event(evsel)) 945 return true; 946 } 947 948 return false; 949 } 950 951 static int record__config_tracking_events(struct record *rec) 952 { 953 struct record_opts *opts = &rec->opts; 954 struct evlist *evlist = rec->evlist; 955 bool system_wide = false; 956 struct evsel *evsel; 957 958 /* 959 * For initial_delay, system wide or a hybrid system, we need to add 960 * tracking event so that we can track PERF_RECORD_MMAP to cover the 961 * delay of waiting or event synthesis. 962 */ 963 if (opts->target.initial_delay || target__has_cpu(&opts->target) || 964 perf_pmus__num_core_pmus() > 1) { 965 966 /* 967 * User space tasks can migrate between CPUs, so when tracing 968 * selected CPUs, sideband for all CPUs is still needed. 969 */ 970 if (!!opts->target.cpu_list && record__tracking_system_wide(rec)) 971 system_wide = true; 972 973 evsel = evlist__findnew_tracking_event(evlist, system_wide); 974 if (!evsel) 975 return -ENOMEM; 976 977 /* 978 * Enable the tracking event when the process is forked for 979 * initial_delay, immediately for system wide. 980 */ 981 if (opts->target.initial_delay && !evsel->immediate && 982 !target__has_cpu(&opts->target)) 983 evsel->core.attr.enable_on_exec = 1; 984 else 985 evsel->immediate = 1; 986 } 987 988 return 0; 989 } 990 991 static bool record__kcore_readable(struct machine *machine) 992 { 993 char kcore[PATH_MAX]; 994 int fd; 995 996 scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir); 997 998 fd = open(kcore, O_RDONLY); 999 if (fd < 0) 1000 return false; 1001 1002 close(fd); 1003 1004 return true; 1005 } 1006 1007 static int record__kcore_copy(struct machine *machine, struct perf_data *data) 1008 { 1009 char from_dir[PATH_MAX]; 1010 char kcore_dir[PATH_MAX]; 1011 int ret; 1012 1013 snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir); 1014 1015 ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir)); 1016 if (ret) 1017 return ret; 1018 1019 return kcore_copy(from_dir, kcore_dir); 1020 } 1021 1022 static void record__thread_data_init_pipes(struct record_thread *thread_data) 1023 { 1024 thread_data->pipes.msg[0] = -1; 1025 thread_data->pipes.msg[1] = -1; 1026 thread_data->pipes.ack[0] = -1; 1027 thread_data->pipes.ack[1] = -1; 1028 } 1029 1030 static int record__thread_data_open_pipes(struct record_thread *thread_data) 1031 { 1032 if (pipe(thread_data->pipes.msg)) 1033 return -EINVAL; 1034 1035 if (pipe(thread_data->pipes.ack)) { 1036 close(thread_data->pipes.msg[0]); 1037 thread_data->pipes.msg[0] = -1; 1038 close(thread_data->pipes.msg[1]); 1039 thread_data->pipes.msg[1] = -1; 1040 return -EINVAL; 1041 } 1042 1043 pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data, 1044 thread_data->pipes.msg[0], thread_data->pipes.msg[1], 1045 thread_data->pipes.ack[0], thread_data->pipes.ack[1]); 1046 1047 return 0; 1048 } 1049 1050 static void record__thread_data_close_pipes(struct record_thread *thread_data) 1051 { 1052 if (thread_data->pipes.msg[0] != -1) { 1053 close(thread_data->pipes.msg[0]); 1054 thread_data->pipes.msg[0] = -1; 1055 } 1056 if (thread_data->pipes.msg[1] != -1) { 1057 close(thread_data->pipes.msg[1]); 1058 thread_data->pipes.msg[1] = -1; 1059 } 1060 if (thread_data->pipes.ack[0] != -1) { 1061 close(thread_data->pipes.ack[0]); 1062 thread_data->pipes.ack[0] = -1; 1063 } 1064 if (thread_data->pipes.ack[1] != -1) { 1065 close(thread_data->pipes.ack[1]); 1066 thread_data->pipes.ack[1] = -1; 1067 } 1068 } 1069 1070 static bool evlist__per_thread(struct evlist *evlist) 1071 { 1072 return cpu_map__is_dummy(evlist->core.user_requested_cpus); 1073 } 1074 1075 static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist) 1076 { 1077 int m, tm, nr_mmaps = evlist->core.nr_mmaps; 1078 struct mmap *mmap = evlist->mmap; 1079 struct mmap *overwrite_mmap = evlist->overwrite_mmap; 1080 struct perf_cpu_map *cpus = evlist->core.all_cpus; 1081 bool per_thread = evlist__per_thread(evlist); 1082 1083 if (per_thread) 1084 thread_data->nr_mmaps = nr_mmaps; 1085 else 1086 thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits, 1087 thread_data->mask->maps.nbits); 1088 if (mmap) { 1089 thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *)); 1090 if (!thread_data->maps) 1091 return -ENOMEM; 1092 } 1093 if (overwrite_mmap) { 1094 thread_data->overwrite_maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *)); 1095 if (!thread_data->overwrite_maps) { 1096 zfree(&thread_data->maps); 1097 return -ENOMEM; 1098 } 1099 } 1100 pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data, 1101 thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps); 1102 1103 for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) { 1104 if (per_thread || 1105 test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) { 1106 if (thread_data->maps) { 1107 thread_data->maps[tm] = &mmap[m]; 1108 pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n", 1109 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m); 1110 } 1111 if (thread_data->overwrite_maps) { 1112 thread_data->overwrite_maps[tm] = &overwrite_mmap[m]; 1113 pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n", 1114 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m); 1115 } 1116 tm++; 1117 } 1118 } 1119 1120 return 0; 1121 } 1122 1123 static int record__thread_data_init_pollfd(struct record_thread *thread_data, struct evlist *evlist) 1124 { 1125 int f, tm, pos; 1126 struct mmap *map, *overwrite_map; 1127 1128 fdarray__init(&thread_data->pollfd, 64); 1129 1130 for (tm = 0; tm < thread_data->nr_mmaps; tm++) { 1131 map = thread_data->maps ? thread_data->maps[tm] : NULL; 1132 overwrite_map = thread_data->overwrite_maps ? 1133 thread_data->overwrite_maps[tm] : NULL; 1134 1135 for (f = 0; f < evlist->core.pollfd.nr; f++) { 1136 void *ptr = evlist->core.pollfd.priv[f].ptr; 1137 1138 if ((map && ptr == map) || (overwrite_map && ptr == overwrite_map)) { 1139 pos = fdarray__dup_entry_from(&thread_data->pollfd, f, 1140 &evlist->core.pollfd); 1141 if (pos < 0) 1142 return pos; 1143 pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n", 1144 thread_data, pos, evlist->core.pollfd.entries[f].fd); 1145 } 1146 } 1147 } 1148 1149 return 0; 1150 } 1151 1152 static void record__free_thread_data(struct record *rec) 1153 { 1154 int t; 1155 struct record_thread *thread_data = rec->thread_data; 1156 1157 if (thread_data == NULL) 1158 return; 1159 1160 for (t = 0; t < rec->nr_threads; t++) { 1161 record__thread_data_close_pipes(&thread_data[t]); 1162 zfree(&thread_data[t].maps); 1163 zfree(&thread_data[t].overwrite_maps); 1164 fdarray__exit(&thread_data[t].pollfd); 1165 } 1166 1167 zfree(&rec->thread_data); 1168 } 1169 1170 static int record__map_thread_evlist_pollfd_indexes(struct record *rec, 1171 int evlist_pollfd_index, 1172 int thread_pollfd_index) 1173 { 1174 size_t x = rec->index_map_cnt; 1175 1176 if (realloc_array_as_needed(rec->index_map, rec->index_map_sz, x, NULL)) 1177 return -ENOMEM; 1178 rec->index_map[x].evlist_pollfd_index = evlist_pollfd_index; 1179 rec->index_map[x].thread_pollfd_index = thread_pollfd_index; 1180 rec->index_map_cnt += 1; 1181 return 0; 1182 } 1183 1184 static int record__update_evlist_pollfd_from_thread(struct record *rec, 1185 struct evlist *evlist, 1186 struct record_thread *thread_data) 1187 { 1188 struct pollfd *e_entries = evlist->core.pollfd.entries; 1189 struct pollfd *t_entries = thread_data->pollfd.entries; 1190 int err = 0; 1191 size_t i; 1192 1193 for (i = 0; i < rec->index_map_cnt; i++) { 1194 int e_pos = rec->index_map[i].evlist_pollfd_index; 1195 int t_pos = rec->index_map[i].thread_pollfd_index; 1196 1197 if (e_entries[e_pos].fd != t_entries[t_pos].fd || 1198 e_entries[e_pos].events != t_entries[t_pos].events) { 1199 pr_err("Thread and evlist pollfd index mismatch\n"); 1200 err = -EINVAL; 1201 continue; 1202 } 1203 e_entries[e_pos].revents = t_entries[t_pos].revents; 1204 } 1205 return err; 1206 } 1207 1208 static int record__dup_non_perf_events(struct record *rec, 1209 struct evlist *evlist, 1210 struct record_thread *thread_data) 1211 { 1212 struct fdarray *fda = &evlist->core.pollfd; 1213 int i, ret; 1214 1215 for (i = 0; i < fda->nr; i++) { 1216 if (!(fda->priv[i].flags & fdarray_flag__non_perf_event)) 1217 continue; 1218 ret = fdarray__dup_entry_from(&thread_data->pollfd, i, fda); 1219 if (ret < 0) { 1220 pr_err("Failed to duplicate descriptor in main thread pollfd\n"); 1221 return ret; 1222 } 1223 pr_debug2("thread_data[%p]: pollfd[%d] <- non_perf_event fd=%d\n", 1224 thread_data, ret, fda->entries[i].fd); 1225 ret = record__map_thread_evlist_pollfd_indexes(rec, i, ret); 1226 if (ret < 0) { 1227 pr_err("Failed to map thread and evlist pollfd indexes\n"); 1228 return ret; 1229 } 1230 } 1231 return 0; 1232 } 1233 1234 static int record__alloc_thread_data(struct record *rec, struct evlist *evlist) 1235 { 1236 int t, ret; 1237 struct record_thread *thread_data; 1238 1239 rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data))); 1240 if (!rec->thread_data) { 1241 pr_err("Failed to allocate thread data\n"); 1242 return -ENOMEM; 1243 } 1244 thread_data = rec->thread_data; 1245 1246 for (t = 0; t < rec->nr_threads; t++) 1247 record__thread_data_init_pipes(&thread_data[t]); 1248 1249 for (t = 0; t < rec->nr_threads; t++) { 1250 thread_data[t].rec = rec; 1251 thread_data[t].mask = &rec->thread_masks[t]; 1252 ret = record__thread_data_init_maps(&thread_data[t], evlist); 1253 if (ret) { 1254 pr_err("Failed to initialize thread[%d] maps\n", t); 1255 goto out_free; 1256 } 1257 ret = record__thread_data_init_pollfd(&thread_data[t], evlist); 1258 if (ret) { 1259 pr_err("Failed to initialize thread[%d] pollfd\n", t); 1260 goto out_free; 1261 } 1262 if (t) { 1263 thread_data[t].tid = -1; 1264 ret = record__thread_data_open_pipes(&thread_data[t]); 1265 if (ret) { 1266 pr_err("Failed to open thread[%d] communication pipes\n", t); 1267 goto out_free; 1268 } 1269 ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0], 1270 POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable); 1271 if (ret < 0) { 1272 pr_err("Failed to add descriptor to thread[%d] pollfd\n", t); 1273 goto out_free; 1274 } 1275 thread_data[t].ctlfd_pos = ret; 1276 pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n", 1277 thread_data, thread_data[t].ctlfd_pos, 1278 thread_data[t].pipes.msg[0]); 1279 } else { 1280 thread_data[t].tid = gettid(); 1281 1282 ret = record__dup_non_perf_events(rec, evlist, &thread_data[t]); 1283 if (ret < 0) 1284 goto out_free; 1285 1286 thread_data[t].ctlfd_pos = -1; /* Not used */ 1287 } 1288 } 1289 1290 return 0; 1291 1292 out_free: 1293 record__free_thread_data(rec); 1294 1295 return ret; 1296 } 1297 1298 static int record__mmap_evlist(struct record *rec, 1299 struct evlist *evlist) 1300 { 1301 int i, ret; 1302 struct record_opts *opts = &rec->opts; 1303 bool auxtrace_overwrite = opts->auxtrace_snapshot_mode || 1304 opts->auxtrace_sample_mode; 1305 char msg[512]; 1306 1307 if (opts->affinity != PERF_AFFINITY_SYS) 1308 cpu__setup_cpunode_map(); 1309 1310 if (evlist__mmap_ex(evlist, opts->mmap_pages, 1311 opts->auxtrace_mmap_pages, 1312 auxtrace_overwrite, 1313 opts->nr_cblocks, opts->affinity, 1314 opts->mmap_flush, opts->comp_level) < 0) { 1315 if (errno == EPERM) { 1316 pr_err("Permission error mapping pages.\n" 1317 "Consider increasing " 1318 "/proc/sys/kernel/perf_event_mlock_kb,\n" 1319 "or try again with a smaller value of -m/--mmap_pages.\n" 1320 "(current value: %u,%u)\n", 1321 opts->mmap_pages, opts->auxtrace_mmap_pages); 1322 return -errno; 1323 } else { 1324 pr_err("failed to mmap with %d (%s)\n", errno, 1325 str_error_r(errno, msg, sizeof(msg))); 1326 if (errno) 1327 return -errno; 1328 else 1329 return -EINVAL; 1330 } 1331 } 1332 1333 if (evlist__initialize_ctlfd(evlist, opts->ctl_fd, opts->ctl_fd_ack)) 1334 return -1; 1335 1336 ret = record__alloc_thread_data(rec, evlist); 1337 if (ret) 1338 return ret; 1339 1340 if (record__threads_enabled(rec)) { 1341 ret = perf_data__create_dir(&rec->data, evlist->core.nr_mmaps); 1342 if (ret) { 1343 pr_err("Failed to create data directory: %s\n", strerror(-ret)); 1344 return ret; 1345 } 1346 for (i = 0; i < evlist->core.nr_mmaps; i++) { 1347 if (evlist->mmap) 1348 evlist->mmap[i].file = &rec->data.dir.files[i]; 1349 if (evlist->overwrite_mmap) 1350 evlist->overwrite_mmap[i].file = &rec->data.dir.files[i]; 1351 } 1352 } 1353 1354 return 0; 1355 } 1356 1357 static int record__mmap(struct record *rec) 1358 { 1359 return record__mmap_evlist(rec, rec->evlist); 1360 } 1361 1362 static int record__open(struct record *rec) 1363 { 1364 char msg[BUFSIZ]; 1365 struct evsel *pos; 1366 struct evlist *evlist = rec->evlist; 1367 struct perf_session *session = rec->session; 1368 struct record_opts *opts = &rec->opts; 1369 int rc = 0; 1370 1371 evlist__for_each_entry(evlist, pos) { 1372 try_again: 1373 if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) { 1374 if (evsel__fallback(pos, &opts->target, errno, msg, sizeof(msg))) { 1375 if (verbose > 0) 1376 ui__warning("%s\n", msg); 1377 goto try_again; 1378 } 1379 if ((errno == EINVAL || errno == EBADF) && 1380 pos->core.leader != &pos->core && 1381 pos->weak_group) { 1382 pos = evlist__reset_weak_group(evlist, pos, true); 1383 goto try_again; 1384 } 1385 rc = -errno; 1386 evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg)); 1387 ui__error("%s\n", msg); 1388 goto out; 1389 } 1390 1391 pos->supported = true; 1392 } 1393 1394 if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) { 1395 pr_warning( 1396 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" 1397 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n" 1398 "Samples in kernel functions may not be resolved if a suitable vmlinux\n" 1399 "file is not found in the buildid cache or in the vmlinux path.\n\n" 1400 "Samples in kernel modules won't be resolved at all.\n\n" 1401 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n" 1402 "even with a suitable vmlinux or kallsyms file.\n\n"); 1403 } 1404 1405 if (evlist__apply_filters(evlist, &pos, &opts->target)) { 1406 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", 1407 pos->filter ?: "BPF", evsel__name(pos), errno, 1408 str_error_r(errno, msg, sizeof(msg))); 1409 rc = -1; 1410 goto out; 1411 } 1412 1413 rc = record__mmap(rec); 1414 if (rc) 1415 goto out; 1416 1417 session->evlist = evlist; 1418 perf_session__set_id_hdr_size(session); 1419 out: 1420 return rc; 1421 } 1422 1423 static void set_timestamp_boundary(struct record *rec, u64 sample_time) 1424 { 1425 if (rec->evlist->first_sample_time == 0) 1426 rec->evlist->first_sample_time = sample_time; 1427 1428 if (sample_time) 1429 rec->evlist->last_sample_time = sample_time; 1430 } 1431 1432 static int process_sample_event(const struct perf_tool *tool, 1433 union perf_event *event, 1434 struct perf_sample *sample, 1435 struct evsel *evsel, 1436 struct machine *machine) 1437 { 1438 struct record *rec = container_of(tool, struct record, tool); 1439 1440 set_timestamp_boundary(rec, sample->time); 1441 1442 if (rec->buildid_all) 1443 return 0; 1444 1445 rec->samples++; 1446 return build_id__mark_dso_hit(tool, event, sample, evsel, machine); 1447 } 1448 1449 static int process_buildids(struct record *rec) 1450 { 1451 struct perf_session *session = rec->session; 1452 1453 if (perf_data__size(&rec->data) == 0) 1454 return 0; 1455 1456 /* 1457 * During this process, it'll load kernel map and replace the 1458 * dso->long_name to a real pathname it found. In this case 1459 * we prefer the vmlinux path like 1460 * /lib/modules/3.16.4/build/vmlinux 1461 * 1462 * rather than build-id path (in debug directory). 1463 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551 1464 */ 1465 symbol_conf.ignore_vmlinux_buildid = true; 1466 1467 /* 1468 * If --buildid-all is given, it marks all DSO regardless of hits, 1469 * so no need to process samples. But if timestamp_boundary is enabled, 1470 * it still needs to walk on all samples to get the timestamps of 1471 * first/last samples. 1472 */ 1473 if (rec->buildid_all && !rec->timestamp_boundary) 1474 rec->tool.sample = process_event_sample_stub; 1475 1476 return perf_session__process_events(session); 1477 } 1478 1479 static void perf_event__synthesize_guest_os(struct machine *machine, void *data) 1480 { 1481 int err; 1482 struct perf_tool *tool = data; 1483 /* 1484 *As for guest kernel when processing subcommand record&report, 1485 *we arrange module mmap prior to guest kernel mmap and trigger 1486 *a preload dso because default guest module symbols are loaded 1487 *from guest kallsyms instead of /lib/modules/XXX/XXX. This 1488 *method is used to avoid symbol missing when the first addr is 1489 *in module instead of in guest kernel. 1490 */ 1491 err = perf_event__synthesize_modules(tool, process_synthesized_event, 1492 machine); 1493 if (err < 0) 1494 pr_err("Couldn't record guest kernel [%d]'s reference" 1495 " relocation symbol.\n", machine->pid); 1496 1497 /* 1498 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 1499 * have no _text sometimes. 1500 */ 1501 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 1502 machine); 1503 if (err < 0) 1504 pr_err("Couldn't record guest kernel [%d]'s reference" 1505 " relocation symbol.\n", machine->pid); 1506 } 1507 1508 static struct perf_event_header finished_round_event = { 1509 .size = sizeof(struct perf_event_header), 1510 .type = PERF_RECORD_FINISHED_ROUND, 1511 }; 1512 1513 static struct perf_event_header finished_init_event = { 1514 .size = sizeof(struct perf_event_header), 1515 .type = PERF_RECORD_FINISHED_INIT, 1516 }; 1517 1518 static void record__adjust_affinity(struct record *rec, struct mmap *map) 1519 { 1520 if (rec->opts.affinity != PERF_AFFINITY_SYS && 1521 !bitmap_equal(thread->mask->affinity.bits, map->affinity_mask.bits, 1522 thread->mask->affinity.nbits)) { 1523 bitmap_zero(thread->mask->affinity.bits, thread->mask->affinity.nbits); 1524 bitmap_or(thread->mask->affinity.bits, thread->mask->affinity.bits, 1525 map->affinity_mask.bits, thread->mask->affinity.nbits); 1526 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity), 1527 (cpu_set_t *)thread->mask->affinity.bits); 1528 if (verbose == 2) { 1529 pr_debug("threads[%d]: running on cpu%d: ", thread->tid, sched_getcpu()); 1530 mmap_cpu_mask__scnprintf(&thread->mask->affinity, "affinity"); 1531 } 1532 } 1533 } 1534 1535 static size_t process_comp_header(void *record, size_t increment) 1536 { 1537 struct perf_record_compressed *event = record; 1538 size_t size = sizeof(*event); 1539 1540 if (increment) { 1541 event->header.size += increment; 1542 return increment; 1543 } 1544 1545 event->header.type = PERF_RECORD_COMPRESSED; 1546 event->header.size = size; 1547 1548 return size; 1549 } 1550 1551 static ssize_t zstd_compress(struct perf_session *session, struct mmap *map, 1552 void *dst, size_t dst_size, void *src, size_t src_size) 1553 { 1554 ssize_t compressed; 1555 size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1; 1556 struct zstd_data *zstd_data = &session->zstd_data; 1557 1558 if (map && map->file) 1559 zstd_data = &map->zstd_data; 1560 1561 compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size, 1562 max_record_size, process_comp_header); 1563 if (compressed < 0) 1564 return compressed; 1565 1566 if (map && map->file) { 1567 thread->bytes_transferred += src_size; 1568 thread->bytes_compressed += compressed; 1569 } else { 1570 session->bytes_transferred += src_size; 1571 session->bytes_compressed += compressed; 1572 } 1573 1574 return compressed; 1575 } 1576 1577 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist, 1578 bool overwrite, bool synch) 1579 { 1580 u64 bytes_written = rec->bytes_written; 1581 int i; 1582 int rc = 0; 1583 int nr_mmaps; 1584 struct mmap **maps; 1585 int trace_fd = rec->data.file.fd; 1586 off_t off = 0; 1587 1588 if (!evlist) 1589 return 0; 1590 1591 nr_mmaps = thread->nr_mmaps; 1592 maps = overwrite ? thread->overwrite_maps : thread->maps; 1593 1594 if (!maps) 1595 return 0; 1596 1597 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING) 1598 return 0; 1599 1600 if (record__aio_enabled(rec)) 1601 off = record__aio_get_pos(trace_fd); 1602 1603 for (i = 0; i < nr_mmaps; i++) { 1604 u64 flush = 0; 1605 struct mmap *map = maps[i]; 1606 1607 if (map->core.base) { 1608 record__adjust_affinity(rec, map); 1609 if (synch) { 1610 flush = map->core.flush; 1611 map->core.flush = 1; 1612 } 1613 if (!record__aio_enabled(rec)) { 1614 if (perf_mmap__push(map, rec, record__pushfn) < 0) { 1615 if (synch) 1616 map->core.flush = flush; 1617 rc = -1; 1618 goto out; 1619 } 1620 } else { 1621 if (record__aio_push(rec, map, &off) < 0) { 1622 record__aio_set_pos(trace_fd, off); 1623 if (synch) 1624 map->core.flush = flush; 1625 rc = -1; 1626 goto out; 1627 } 1628 } 1629 if (synch) 1630 map->core.flush = flush; 1631 } 1632 1633 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode && 1634 !rec->opts.auxtrace_sample_mode && 1635 record__auxtrace_mmap_read(rec, map) != 0) { 1636 rc = -1; 1637 goto out; 1638 } 1639 } 1640 1641 if (record__aio_enabled(rec)) 1642 record__aio_set_pos(trace_fd, off); 1643 1644 /* 1645 * Mark the round finished in case we wrote 1646 * at least one event. 1647 * 1648 * No need for round events in directory mode, 1649 * because per-cpu maps and files have data 1650 * sorted by kernel. 1651 */ 1652 if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written) 1653 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event)); 1654 1655 if (overwrite) 1656 evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); 1657 out: 1658 return rc; 1659 } 1660 1661 static int record__mmap_read_all(struct record *rec, bool synch) 1662 { 1663 int err; 1664 1665 err = record__mmap_read_evlist(rec, rec->evlist, false, synch); 1666 if (err) 1667 return err; 1668 1669 return record__mmap_read_evlist(rec, rec->evlist, true, synch); 1670 } 1671 1672 static void record__thread_munmap_filtered(struct fdarray *fda, int fd, 1673 void *arg __maybe_unused) 1674 { 1675 struct perf_mmap *map = fda->priv[fd].ptr; 1676 1677 if (map) 1678 perf_mmap__put(map); 1679 } 1680 1681 static void *record__thread(void *arg) 1682 { 1683 enum thread_msg msg = THREAD_MSG__READY; 1684 bool terminate = false; 1685 struct fdarray *pollfd; 1686 int err, ctlfd_pos; 1687 1688 thread = arg; 1689 thread->tid = gettid(); 1690 1691 err = write(thread->pipes.ack[1], &msg, sizeof(msg)); 1692 if (err == -1) 1693 pr_warning("threads[%d]: failed to notify on start: %s\n", 1694 thread->tid, strerror(errno)); 1695 1696 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu()); 1697 1698 pollfd = &thread->pollfd; 1699 ctlfd_pos = thread->ctlfd_pos; 1700 1701 for (;;) { 1702 unsigned long long hits = thread->samples; 1703 1704 if (record__mmap_read_all(thread->rec, false) < 0 || terminate) 1705 break; 1706 1707 if (hits == thread->samples) { 1708 1709 err = fdarray__poll(pollfd, -1); 1710 /* 1711 * Propagate error, only if there's any. Ignore positive 1712 * number of returned events and interrupt error. 1713 */ 1714 if (err > 0 || (err < 0 && errno == EINTR)) 1715 err = 0; 1716 thread->waking++; 1717 1718 if (fdarray__filter(pollfd, POLLERR | POLLHUP, 1719 record__thread_munmap_filtered, NULL) == 0) 1720 break; 1721 } 1722 1723 if (pollfd->entries[ctlfd_pos].revents & POLLHUP) { 1724 terminate = true; 1725 close(thread->pipes.msg[0]); 1726 thread->pipes.msg[0] = -1; 1727 pollfd->entries[ctlfd_pos].fd = -1; 1728 pollfd->entries[ctlfd_pos].events = 0; 1729 } 1730 1731 pollfd->entries[ctlfd_pos].revents = 0; 1732 } 1733 record__mmap_read_all(thread->rec, true); 1734 1735 err = write(thread->pipes.ack[1], &msg, sizeof(msg)); 1736 if (err == -1) 1737 pr_warning("threads[%d]: failed to notify on termination: %s\n", 1738 thread->tid, strerror(errno)); 1739 1740 return NULL; 1741 } 1742 1743 static void record__init_features(struct record *rec) 1744 { 1745 struct perf_session *session = rec->session; 1746 int feat; 1747 1748 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 1749 perf_header__set_feat(&session->header, feat); 1750 1751 if (rec->no_buildid) 1752 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 1753 1754 if (!have_tracepoints(&rec->evlist->core.entries)) 1755 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 1756 1757 if (!rec->opts.branch_stack) 1758 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 1759 1760 if (!rec->opts.full_auxtrace) 1761 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 1762 1763 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns)) 1764 perf_header__clear_feat(&session->header, HEADER_CLOCKID); 1765 1766 if (!rec->opts.use_clockid) 1767 perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA); 1768 1769 if (!record__threads_enabled(rec)) 1770 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT); 1771 1772 if (!record__comp_enabled(rec)) 1773 perf_header__clear_feat(&session->header, HEADER_COMPRESSED); 1774 1775 perf_header__clear_feat(&session->header, HEADER_STAT); 1776 } 1777 1778 static void 1779 record__finish_output(struct record *rec) 1780 { 1781 int i; 1782 struct perf_data *data = &rec->data; 1783 int fd = perf_data__fd(data); 1784 1785 if (data->is_pipe) { 1786 /* Just to display approx. size */ 1787 data->file.size = rec->bytes_written; 1788 return; 1789 } 1790 1791 rec->session->header.data_size += rec->bytes_written; 1792 data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR); 1793 if (record__threads_enabled(rec)) { 1794 for (i = 0; i < data->dir.nr; i++) 1795 data->dir.files[i].size = lseek(data->dir.files[i].fd, 0, SEEK_CUR); 1796 } 1797 1798 if (!rec->no_buildid) { 1799 process_buildids(rec); 1800 1801 if (rec->buildid_all) 1802 perf_session__dsos_hit_all(rec->session); 1803 } 1804 perf_session__write_header(rec->session, rec->evlist, fd, true); 1805 1806 return; 1807 } 1808 1809 static int record__synthesize_workload(struct record *rec, bool tail) 1810 { 1811 int err; 1812 struct perf_thread_map *thread_map; 1813 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; 1814 1815 if (rec->opts.tail_synthesize != tail) 1816 return 0; 1817 1818 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid); 1819 if (thread_map == NULL) 1820 return -1; 1821 1822 err = perf_event__synthesize_thread_map(&rec->tool, thread_map, 1823 process_synthesized_event, 1824 &rec->session->machines.host, 1825 needs_mmap, 1826 rec->opts.sample_address); 1827 perf_thread_map__put(thread_map); 1828 return err; 1829 } 1830 1831 static int write_finished_init(struct record *rec, bool tail) 1832 { 1833 if (rec->opts.tail_synthesize != tail) 1834 return 0; 1835 1836 return record__write(rec, NULL, &finished_init_event, sizeof(finished_init_event)); 1837 } 1838 1839 static int record__synthesize(struct record *rec, bool tail); 1840 1841 static int 1842 record__switch_output(struct record *rec, bool at_exit) 1843 { 1844 struct perf_data *data = &rec->data; 1845 char *new_filename = NULL; 1846 int fd, err; 1847 1848 /* Same Size: "2015122520103046"*/ 1849 char timestamp[] = "InvalidTimestamp"; 1850 1851 record__aio_mmap_read_sync(rec); 1852 1853 write_finished_init(rec, true); 1854 1855 record__synthesize(rec, true); 1856 if (target__none(&rec->opts.target)) 1857 record__synthesize_workload(rec, true); 1858 1859 rec->samples = 0; 1860 record__finish_output(rec); 1861 err = fetch_current_timestamp(timestamp, sizeof(timestamp)); 1862 if (err) { 1863 pr_err("Failed to get current timestamp\n"); 1864 return -EINVAL; 1865 } 1866 1867 fd = perf_data__switch(data, timestamp, 1868 rec->session->header.data_offset, 1869 at_exit, &new_filename); 1870 if (fd >= 0 && !at_exit) { 1871 rec->bytes_written = 0; 1872 rec->session->header.data_size = 0; 1873 } 1874 1875 if (!quiet) { 1876 fprintf(stderr, "[ perf record: Dump %s.%s ]\n", 1877 data->path, timestamp); 1878 } 1879 1880 if (rec->switch_output.num_files) { 1881 int n = rec->switch_output.cur_file + 1; 1882 1883 if (n >= rec->switch_output.num_files) 1884 n = 0; 1885 rec->switch_output.cur_file = n; 1886 if (rec->switch_output.filenames[n]) { 1887 remove(rec->switch_output.filenames[n]); 1888 zfree(&rec->switch_output.filenames[n]); 1889 } 1890 rec->switch_output.filenames[n] = new_filename; 1891 } else { 1892 free(new_filename); 1893 } 1894 1895 /* Output tracking events */ 1896 if (!at_exit) { 1897 record__synthesize(rec, false); 1898 1899 /* 1900 * In 'perf record --switch-output' without -a, 1901 * record__synthesize() in record__switch_output() won't 1902 * generate tracking events because there's no thread_map 1903 * in evlist. Which causes newly created perf.data doesn't 1904 * contain map and comm information. 1905 * Create a fake thread_map and directly call 1906 * perf_event__synthesize_thread_map() for those events. 1907 */ 1908 if (target__none(&rec->opts.target)) 1909 record__synthesize_workload(rec, false); 1910 write_finished_init(rec, false); 1911 } 1912 return fd; 1913 } 1914 1915 static void __record__save_lost_samples(struct record *rec, struct evsel *evsel, 1916 struct perf_record_lost_samples *lost, 1917 int cpu_idx, int thread_idx, u64 lost_count, 1918 u16 misc_flag) 1919 { 1920 struct perf_sample_id *sid; 1921 struct perf_sample sample; 1922 int id_hdr_size; 1923 1924 perf_sample__init(&sample, /*all=*/true); 1925 lost->lost = lost_count; 1926 if (evsel->core.ids) { 1927 sid = xyarray__entry(evsel->core.sample_id, cpu_idx, thread_idx); 1928 sample.id = sid->id; 1929 } 1930 1931 id_hdr_size = perf_event__synthesize_id_sample((void *)(lost + 1), 1932 evsel->core.attr.sample_type, &sample); 1933 lost->header.size = sizeof(*lost) + id_hdr_size; 1934 lost->header.misc = misc_flag; 1935 record__write(rec, NULL, lost, lost->header.size); 1936 perf_sample__exit(&sample); 1937 } 1938 1939 static void record__read_lost_samples(struct record *rec) 1940 { 1941 struct perf_session *session = rec->session; 1942 struct perf_record_lost_samples_and_ids lost; 1943 struct evsel *evsel; 1944 1945 /* there was an error during record__open */ 1946 if (session->evlist == NULL) 1947 return; 1948 1949 evlist__for_each_entry(session->evlist, evsel) { 1950 struct xyarray *xy = evsel->core.sample_id; 1951 u64 lost_count; 1952 1953 if (xy == NULL || evsel->core.fd == NULL) 1954 continue; 1955 if (xyarray__max_x(evsel->core.fd) != xyarray__max_x(xy) || 1956 xyarray__max_y(evsel->core.fd) != xyarray__max_y(xy)) { 1957 pr_debug("Unmatched FD vs. sample ID: skip reading LOST count\n"); 1958 continue; 1959 } 1960 1961 for (int x = 0; x < xyarray__max_x(xy); x++) { 1962 for (int y = 0; y < xyarray__max_y(xy); y++) { 1963 struct perf_counts_values count; 1964 1965 if (perf_evsel__read(&evsel->core, x, y, &count) < 0) { 1966 pr_debug("read LOST count failed\n"); 1967 return; 1968 } 1969 1970 if (count.lost) { 1971 memset(&lost, 0, sizeof(lost)); 1972 lost.lost.header.type = PERF_RECORD_LOST_SAMPLES; 1973 __record__save_lost_samples(rec, evsel, &lost.lost, 1974 x, y, count.lost, 0); 1975 } 1976 } 1977 } 1978 1979 lost_count = perf_bpf_filter__lost_count(evsel); 1980 if (lost_count) { 1981 memset(&lost, 0, sizeof(lost)); 1982 lost.lost.header.type = PERF_RECORD_LOST_SAMPLES; 1983 __record__save_lost_samples(rec, evsel, &lost.lost, 0, 0, lost_count, 1984 PERF_RECORD_MISC_LOST_SAMPLES_BPF); 1985 } 1986 } 1987 } 1988 1989 static volatile sig_atomic_t workload_exec_errno; 1990 1991 /* 1992 * evlist__prepare_workload will send a SIGUSR1 1993 * if the fork fails, since we asked by setting its 1994 * want_signal to true. 1995 */ 1996 static void workload_exec_failed_signal(int signo __maybe_unused, 1997 siginfo_t *info, 1998 void *ucontext __maybe_unused) 1999 { 2000 workload_exec_errno = info->si_value.sival_int; 2001 done = 1; 2002 child_finished = 1; 2003 } 2004 2005 static void snapshot_sig_handler(int sig); 2006 static void alarm_sig_handler(int sig); 2007 2008 static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist) 2009 { 2010 if (evlist) { 2011 if (evlist->mmap && evlist->mmap[0].core.base) 2012 return evlist->mmap[0].core.base; 2013 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base) 2014 return evlist->overwrite_mmap[0].core.base; 2015 } 2016 return NULL; 2017 } 2018 2019 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec) 2020 { 2021 const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist); 2022 if (pc) 2023 return pc; 2024 return NULL; 2025 } 2026 2027 static int record__synthesize(struct record *rec, bool tail) 2028 { 2029 struct perf_session *session = rec->session; 2030 struct machine *machine = &session->machines.host; 2031 struct perf_data *data = &rec->data; 2032 struct record_opts *opts = &rec->opts; 2033 struct perf_tool *tool = &rec->tool; 2034 int err = 0; 2035 event_op f = process_synthesized_event; 2036 2037 if (rec->opts.tail_synthesize != tail) 2038 return 0; 2039 2040 if (data->is_pipe) { 2041 err = perf_event__synthesize_for_pipe(tool, session, data, 2042 process_synthesized_event); 2043 if (err < 0) 2044 goto out; 2045 2046 rec->bytes_written += err; 2047 } 2048 2049 err = perf_event__synth_time_conv(record__pick_pc(rec), tool, 2050 process_synthesized_event, machine); 2051 if (err) 2052 goto out; 2053 2054 /* Synthesize id_index before auxtrace_info */ 2055 err = perf_event__synthesize_id_index(tool, 2056 process_synthesized_event, 2057 session->evlist, machine); 2058 if (err) 2059 goto out; 2060 2061 if (rec->opts.full_auxtrace) { 2062 err = perf_event__synthesize_auxtrace_info(rec->itr, tool, 2063 session, process_synthesized_event); 2064 if (err) 2065 goto out; 2066 } 2067 2068 if (!evlist__exclude_kernel(rec->evlist)) { 2069 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 2070 machine); 2071 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" 2072 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 2073 "Check /proc/kallsyms permission or run as root.\n"); 2074 2075 err = perf_event__synthesize_modules(tool, process_synthesized_event, 2076 machine); 2077 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" 2078 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 2079 "Check /proc/modules permission or run as root.\n"); 2080 } 2081 2082 if (perf_guest) { 2083 machines__process_guests(&session->machines, 2084 perf_event__synthesize_guest_os, tool); 2085 } 2086 2087 err = perf_event__synthesize_extra_attr(&rec->tool, 2088 rec->evlist, 2089 process_synthesized_event, 2090 data->is_pipe); 2091 if (err) 2092 goto out; 2093 2094 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads, 2095 process_synthesized_event, 2096 NULL); 2097 if (err < 0) { 2098 pr_err("Couldn't synthesize thread map.\n"); 2099 return err; 2100 } 2101 2102 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.all_cpus, 2103 process_synthesized_event, NULL); 2104 if (err < 0) { 2105 pr_err("Couldn't synthesize cpu map.\n"); 2106 return err; 2107 } 2108 2109 err = perf_event__synthesize_bpf_events(session, process_synthesized_event, 2110 machine, opts); 2111 if (err < 0) { 2112 pr_warning("Couldn't synthesize bpf events.\n"); 2113 err = 0; 2114 } 2115 2116 if (rec->opts.synth & PERF_SYNTH_CGROUP) { 2117 err = perf_event__synthesize_cgroups(tool, process_synthesized_event, 2118 machine); 2119 if (err < 0) { 2120 pr_warning("Couldn't synthesize cgroup events.\n"); 2121 err = 0; 2122 } 2123 } 2124 2125 if (rec->opts.nr_threads_synthesize > 1) { 2126 mutex_init(&synth_lock); 2127 perf_set_multithreaded(); 2128 f = process_locked_synthesized_event; 2129 } 2130 2131 if (rec->opts.synth & PERF_SYNTH_TASK) { 2132 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; 2133 2134 err = __machine__synthesize_threads(machine, tool, &opts->target, 2135 rec->evlist->core.threads, 2136 f, needs_mmap, opts->sample_address, 2137 rec->opts.nr_threads_synthesize); 2138 } 2139 2140 if (rec->opts.nr_threads_synthesize > 1) { 2141 perf_set_singlethreaded(); 2142 mutex_destroy(&synth_lock); 2143 } 2144 2145 out: 2146 return err; 2147 } 2148 2149 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data) 2150 { 2151 struct record *rec = data; 2152 pthread_kill(rec->thread_id, SIGUSR2); 2153 return 0; 2154 } 2155 2156 static int record__setup_sb_evlist(struct record *rec) 2157 { 2158 struct record_opts *opts = &rec->opts; 2159 2160 if (rec->sb_evlist != NULL) { 2161 /* 2162 * We get here if --switch-output-event populated the 2163 * sb_evlist, so associate a callback that will send a SIGUSR2 2164 * to the main thread. 2165 */ 2166 evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec); 2167 rec->thread_id = pthread_self(); 2168 } 2169 #ifdef HAVE_LIBBPF_SUPPORT 2170 if (!opts->no_bpf_event) { 2171 if (rec->sb_evlist == NULL) { 2172 rec->sb_evlist = evlist__new(); 2173 2174 if (rec->sb_evlist == NULL) { 2175 pr_err("Couldn't create side band evlist.\n."); 2176 return -1; 2177 } 2178 } 2179 2180 if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) { 2181 pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n."); 2182 return -1; 2183 } 2184 } 2185 #endif 2186 if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) { 2187 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); 2188 opts->no_bpf_event = true; 2189 } 2190 2191 return 0; 2192 } 2193 2194 static int record__init_clock(struct record *rec) 2195 { 2196 struct perf_session *session = rec->session; 2197 struct timespec ref_clockid; 2198 struct timeval ref_tod; 2199 u64 ref; 2200 2201 if (!rec->opts.use_clockid) 2202 return 0; 2203 2204 if (rec->opts.use_clockid && rec->opts.clockid_res_ns) 2205 session->header.env.clock.clockid_res_ns = rec->opts.clockid_res_ns; 2206 2207 session->header.env.clock.clockid = rec->opts.clockid; 2208 2209 if (gettimeofday(&ref_tod, NULL) != 0) { 2210 pr_err("gettimeofday failed, cannot set reference time.\n"); 2211 return -1; 2212 } 2213 2214 if (clock_gettime(rec->opts.clockid, &ref_clockid)) { 2215 pr_err("clock_gettime failed, cannot set reference time.\n"); 2216 return -1; 2217 } 2218 2219 ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC + 2220 (u64) ref_tod.tv_usec * NSEC_PER_USEC; 2221 2222 session->header.env.clock.tod_ns = ref; 2223 2224 ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC + 2225 (u64) ref_clockid.tv_nsec; 2226 2227 session->header.env.clock.clockid_ns = ref; 2228 return 0; 2229 } 2230 2231 static void hit_auxtrace_snapshot_trigger(struct record *rec) 2232 { 2233 if (trigger_is_ready(&auxtrace_snapshot_trigger)) { 2234 trigger_hit(&auxtrace_snapshot_trigger); 2235 auxtrace_record__snapshot_started = 1; 2236 if (auxtrace_record__snapshot_start(rec->itr)) 2237 trigger_error(&auxtrace_snapshot_trigger); 2238 } 2239 } 2240 2241 static int record__terminate_thread(struct record_thread *thread_data) 2242 { 2243 int err; 2244 enum thread_msg ack = THREAD_MSG__UNDEFINED; 2245 pid_t tid = thread_data->tid; 2246 2247 close(thread_data->pipes.msg[1]); 2248 thread_data->pipes.msg[1] = -1; 2249 err = read(thread_data->pipes.ack[0], &ack, sizeof(ack)); 2250 if (err > 0) 2251 pr_debug2("threads[%d]: sent %s\n", tid, thread_msg_tags[ack]); 2252 else 2253 pr_warning("threads[%d]: failed to receive termination notification from %d\n", 2254 thread->tid, tid); 2255 2256 return 0; 2257 } 2258 2259 static int record__start_threads(struct record *rec) 2260 { 2261 int t, tt, err, ret = 0, nr_threads = rec->nr_threads; 2262 struct record_thread *thread_data = rec->thread_data; 2263 sigset_t full, mask; 2264 pthread_t handle; 2265 pthread_attr_t attrs; 2266 2267 thread = &thread_data[0]; 2268 2269 if (!record__threads_enabled(rec)) 2270 return 0; 2271 2272 sigfillset(&full); 2273 if (sigprocmask(SIG_SETMASK, &full, &mask)) { 2274 pr_err("Failed to block signals on threads start: %s\n", strerror(errno)); 2275 return -1; 2276 } 2277 2278 pthread_attr_init(&attrs); 2279 pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED); 2280 2281 for (t = 1; t < nr_threads; t++) { 2282 enum thread_msg msg = THREAD_MSG__UNDEFINED; 2283 2284 #ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP 2285 pthread_attr_setaffinity_np(&attrs, 2286 MMAP_CPU_MASK_BYTES(&(thread_data[t].mask->affinity)), 2287 (cpu_set_t *)(thread_data[t].mask->affinity.bits)); 2288 #endif 2289 if (pthread_create(&handle, &attrs, record__thread, &thread_data[t])) { 2290 for (tt = 1; tt < t; tt++) 2291 record__terminate_thread(&thread_data[t]); 2292 pr_err("Failed to start threads: %s\n", strerror(errno)); 2293 ret = -1; 2294 goto out_err; 2295 } 2296 2297 err = read(thread_data[t].pipes.ack[0], &msg, sizeof(msg)); 2298 if (err > 0) 2299 pr_debug2("threads[%d]: sent %s\n", rec->thread_data[t].tid, 2300 thread_msg_tags[msg]); 2301 else 2302 pr_warning("threads[%d]: failed to receive start notification from %d\n", 2303 thread->tid, rec->thread_data[t].tid); 2304 } 2305 2306 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity), 2307 (cpu_set_t *)thread->mask->affinity.bits); 2308 2309 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu()); 2310 2311 out_err: 2312 pthread_attr_destroy(&attrs); 2313 2314 if (sigprocmask(SIG_SETMASK, &mask, NULL)) { 2315 pr_err("Failed to unblock signals on threads start: %s\n", strerror(errno)); 2316 ret = -1; 2317 } 2318 2319 return ret; 2320 } 2321 2322 static int record__stop_threads(struct record *rec) 2323 { 2324 int t; 2325 struct record_thread *thread_data = rec->thread_data; 2326 2327 for (t = 1; t < rec->nr_threads; t++) 2328 record__terminate_thread(&thread_data[t]); 2329 2330 for (t = 0; t < rec->nr_threads; t++) { 2331 rec->samples += thread_data[t].samples; 2332 if (!record__threads_enabled(rec)) 2333 continue; 2334 rec->session->bytes_transferred += thread_data[t].bytes_transferred; 2335 rec->session->bytes_compressed += thread_data[t].bytes_compressed; 2336 pr_debug("threads[%d]: samples=%lld, wakes=%ld, ", thread_data[t].tid, 2337 thread_data[t].samples, thread_data[t].waking); 2338 if (thread_data[t].bytes_transferred && thread_data[t].bytes_compressed) 2339 pr_debug("transferred=%" PRIu64 ", compressed=%" PRIu64 "\n", 2340 thread_data[t].bytes_transferred, thread_data[t].bytes_compressed); 2341 else 2342 pr_debug("written=%" PRIu64 "\n", thread_data[t].bytes_written); 2343 } 2344 2345 return 0; 2346 } 2347 2348 static unsigned long record__waking(struct record *rec) 2349 { 2350 int t; 2351 unsigned long waking = 0; 2352 struct record_thread *thread_data = rec->thread_data; 2353 2354 for (t = 0; t < rec->nr_threads; t++) 2355 waking += thread_data[t].waking; 2356 2357 return waking; 2358 } 2359 2360 static int __cmd_record(struct record *rec, int argc, const char **argv) 2361 { 2362 int err; 2363 int status = 0; 2364 const bool forks = argc > 0; 2365 struct perf_tool *tool = &rec->tool; 2366 struct record_opts *opts = &rec->opts; 2367 struct perf_data *data = &rec->data; 2368 struct perf_session *session; 2369 bool disabled = false, draining = false; 2370 int fd; 2371 float ratio = 0; 2372 enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED; 2373 2374 atexit(record__sig_exit); 2375 signal(SIGCHLD, sig_handler); 2376 signal(SIGINT, sig_handler); 2377 signal(SIGTERM, sig_handler); 2378 signal(SIGSEGV, sigsegv_handler); 2379 2380 if (rec->opts.record_cgroup) { 2381 #ifndef HAVE_FILE_HANDLE 2382 pr_err("cgroup tracking is not supported\n"); 2383 return -1; 2384 #endif 2385 } 2386 2387 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) { 2388 signal(SIGUSR2, snapshot_sig_handler); 2389 if (rec->opts.auxtrace_snapshot_mode) 2390 trigger_on(&auxtrace_snapshot_trigger); 2391 if (rec->switch_output.enabled) 2392 trigger_on(&switch_output_trigger); 2393 } else { 2394 signal(SIGUSR2, SIG_IGN); 2395 } 2396 2397 perf_tool__init(tool, /*ordered_events=*/true); 2398 tool->sample = process_sample_event; 2399 tool->fork = perf_event__process_fork; 2400 tool->exit = perf_event__process_exit; 2401 tool->comm = perf_event__process_comm; 2402 tool->namespaces = perf_event__process_namespaces; 2403 tool->mmap = build_id__process_mmap; 2404 tool->mmap2 = build_id__process_mmap2; 2405 tool->itrace_start = process_timestamp_boundary; 2406 tool->aux = process_timestamp_boundary; 2407 tool->namespace_events = rec->opts.record_namespaces; 2408 tool->cgroup_events = rec->opts.record_cgroup; 2409 session = perf_session__new(data, tool); 2410 if (IS_ERR(session)) { 2411 pr_err("Perf session creation failed.\n"); 2412 return PTR_ERR(session); 2413 } 2414 2415 if (record__threads_enabled(rec)) { 2416 if (perf_data__is_pipe(&rec->data)) { 2417 pr_err("Parallel trace streaming is not available in pipe mode.\n"); 2418 return -1; 2419 } 2420 if (rec->opts.full_auxtrace) { 2421 pr_err("Parallel trace streaming is not available in AUX area tracing mode.\n"); 2422 return -1; 2423 } 2424 } 2425 2426 fd = perf_data__fd(data); 2427 rec->session = session; 2428 2429 if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) { 2430 pr_err("Compression initialization failed.\n"); 2431 return -1; 2432 } 2433 #ifdef HAVE_EVENTFD_SUPPORT 2434 done_fd = eventfd(0, EFD_NONBLOCK); 2435 if (done_fd < 0) { 2436 pr_err("Failed to create wakeup eventfd, error: %m\n"); 2437 status = -1; 2438 goto out_delete_session; 2439 } 2440 err = evlist__add_wakeup_eventfd(rec->evlist, done_fd); 2441 if (err < 0) { 2442 pr_err("Failed to add wakeup eventfd to poll list\n"); 2443 status = err; 2444 goto out_delete_session; 2445 } 2446 #endif // HAVE_EVENTFD_SUPPORT 2447 2448 session->header.env.comp_type = PERF_COMP_ZSTD; 2449 session->header.env.comp_level = rec->opts.comp_level; 2450 2451 if (rec->opts.kcore && 2452 !record__kcore_readable(&session->machines.host)) { 2453 pr_err("ERROR: kcore is not readable.\n"); 2454 return -1; 2455 } 2456 2457 if (record__init_clock(rec)) 2458 return -1; 2459 2460 record__init_features(rec); 2461 2462 if (forks) { 2463 err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe, 2464 workload_exec_failed_signal); 2465 if (err < 0) { 2466 pr_err("Couldn't run the workload!\n"); 2467 status = err; 2468 goto out_delete_session; 2469 } 2470 } 2471 2472 /* 2473 * If we have just single event and are sending data 2474 * through pipe, we need to force the ids allocation, 2475 * because we synthesize event name through the pipe 2476 * and need the id for that. 2477 */ 2478 if (data->is_pipe && rec->evlist->core.nr_entries == 1) 2479 rec->opts.sample_id = true; 2480 2481 if (rec->timestamp_filename && perf_data__is_pipe(data)) { 2482 rec->timestamp_filename = false; 2483 pr_warning("WARNING: --timestamp-filename option is not available in pipe mode.\n"); 2484 } 2485 2486 evlist__uniquify_name(rec->evlist); 2487 2488 evlist__config(rec->evlist, opts, &callchain_param); 2489 2490 /* Debug message used by test scripts */ 2491 pr_debug3("perf record opening and mmapping events\n"); 2492 if (record__open(rec) != 0) { 2493 err = -1; 2494 goto out_free_threads; 2495 } 2496 /* Debug message used by test scripts */ 2497 pr_debug3("perf record done opening and mmapping events\n"); 2498 session->header.env.comp_mmap_len = session->evlist->core.mmap_len; 2499 2500 if (rec->opts.kcore) { 2501 err = record__kcore_copy(&session->machines.host, data); 2502 if (err) { 2503 pr_err("ERROR: Failed to copy kcore\n"); 2504 goto out_free_threads; 2505 } 2506 } 2507 2508 /* 2509 * Normally perf_session__new would do this, but it doesn't have the 2510 * evlist. 2511 */ 2512 if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) { 2513 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n"); 2514 rec->tool.ordered_events = false; 2515 } 2516 2517 if (evlist__nr_groups(rec->evlist) == 0) 2518 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); 2519 2520 if (data->is_pipe) { 2521 err = perf_header__write_pipe(fd); 2522 if (err < 0) 2523 goto out_free_threads; 2524 } else { 2525 err = perf_session__write_header(session, rec->evlist, fd, false); 2526 if (err < 0) 2527 goto out_free_threads; 2528 } 2529 2530 err = -1; 2531 if (!rec->no_buildid 2532 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { 2533 pr_err("Couldn't generate buildids. " 2534 "Use --no-buildid to profile anyway.\n"); 2535 goto out_free_threads; 2536 } 2537 2538 if (!evlist__needs_bpf_sb_event(rec->evlist)) 2539 opts->no_bpf_event = true; 2540 2541 err = record__setup_sb_evlist(rec); 2542 if (err) 2543 goto out_free_threads; 2544 2545 err = record__synthesize(rec, false); 2546 if (err < 0) 2547 goto out_free_threads; 2548 2549 if (rec->realtime_prio) { 2550 struct sched_param param; 2551 2552 param.sched_priority = rec->realtime_prio; 2553 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 2554 pr_err("Could not set realtime priority.\n"); 2555 err = -1; 2556 goto out_free_threads; 2557 } 2558 } 2559 2560 if (record__start_threads(rec)) 2561 goto out_free_threads; 2562 2563 /* 2564 * When perf is starting the traced process, all the events 2565 * (apart from group members) have enable_on_exec=1 set, 2566 * so don't spoil it by prematurely enabling them. 2567 */ 2568 if (!target__none(&opts->target) && !opts->target.initial_delay) 2569 evlist__enable(rec->evlist); 2570 2571 /* 2572 * Let the child rip 2573 */ 2574 if (forks) { 2575 struct machine *machine = &session->machines.host; 2576 union perf_event *event; 2577 pid_t tgid; 2578 2579 event = malloc(sizeof(event->comm) + machine->id_hdr_size); 2580 if (event == NULL) { 2581 err = -ENOMEM; 2582 goto out_child; 2583 } 2584 2585 /* 2586 * Some H/W events are generated before COMM event 2587 * which is emitted during exec(), so perf script 2588 * cannot see a correct process name for those events. 2589 * Synthesize COMM event to prevent it. 2590 */ 2591 tgid = perf_event__synthesize_comm(tool, event, 2592 rec->evlist->workload.pid, 2593 process_synthesized_event, 2594 machine); 2595 free(event); 2596 2597 if (tgid == -1) 2598 goto out_child; 2599 2600 event = malloc(sizeof(event->namespaces) + 2601 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + 2602 machine->id_hdr_size); 2603 if (event == NULL) { 2604 err = -ENOMEM; 2605 goto out_child; 2606 } 2607 2608 /* 2609 * Synthesize NAMESPACES event for the command specified. 2610 */ 2611 perf_event__synthesize_namespaces(tool, event, 2612 rec->evlist->workload.pid, 2613 tgid, process_synthesized_event, 2614 machine); 2615 free(event); 2616 2617 evlist__start_workload(rec->evlist); 2618 } 2619 2620 if (opts->target.initial_delay) { 2621 pr_info(EVLIST_DISABLED_MSG); 2622 if (opts->target.initial_delay > 0) { 2623 usleep(opts->target.initial_delay * USEC_PER_MSEC); 2624 evlist__enable(rec->evlist); 2625 pr_info(EVLIST_ENABLED_MSG); 2626 } 2627 } 2628 2629 err = event_enable_timer__start(rec->evlist->eet); 2630 if (err) 2631 goto out_child; 2632 2633 /* Debug message used by test scripts */ 2634 pr_debug3("perf record has started\n"); 2635 fflush(stderr); 2636 2637 trigger_ready(&auxtrace_snapshot_trigger); 2638 trigger_ready(&switch_output_trigger); 2639 perf_hooks__invoke_record_start(); 2640 2641 /* 2642 * Must write FINISHED_INIT so it will be seen after all other 2643 * synthesized user events, but before any regular events. 2644 */ 2645 err = write_finished_init(rec, false); 2646 if (err < 0) 2647 goto out_child; 2648 2649 for (;;) { 2650 unsigned long long hits = thread->samples; 2651 2652 /* 2653 * rec->evlist->bkw_mmap_state is possible to be 2654 * BKW_MMAP_EMPTY here: when done == true and 2655 * hits != rec->samples in previous round. 2656 * 2657 * evlist__toggle_bkw_mmap ensure we never 2658 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING. 2659 */ 2660 if (trigger_is_hit(&switch_output_trigger) || done || draining) 2661 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING); 2662 2663 if (record__mmap_read_all(rec, false) < 0) { 2664 trigger_error(&auxtrace_snapshot_trigger); 2665 trigger_error(&switch_output_trigger); 2666 err = -1; 2667 goto out_child; 2668 } 2669 2670 if (auxtrace_record__snapshot_started) { 2671 auxtrace_record__snapshot_started = 0; 2672 if (!trigger_is_error(&auxtrace_snapshot_trigger)) 2673 record__read_auxtrace_snapshot(rec, false); 2674 if (trigger_is_error(&auxtrace_snapshot_trigger)) { 2675 pr_err("AUX area tracing snapshot failed\n"); 2676 err = -1; 2677 goto out_child; 2678 } 2679 } 2680 2681 if (trigger_is_hit(&switch_output_trigger)) { 2682 /* 2683 * If switch_output_trigger is hit, the data in 2684 * overwritable ring buffer should have been collected, 2685 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY. 2686 * 2687 * If SIGUSR2 raise after or during record__mmap_read_all(), 2688 * record__mmap_read_all() didn't collect data from 2689 * overwritable ring buffer. Read again. 2690 */ 2691 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING) 2692 continue; 2693 trigger_ready(&switch_output_trigger); 2694 2695 /* 2696 * Reenable events in overwrite ring buffer after 2697 * record__mmap_read_all(): we should have collected 2698 * data from it. 2699 */ 2700 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING); 2701 2702 if (!quiet) 2703 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n", 2704 record__waking(rec)); 2705 thread->waking = 0; 2706 fd = record__switch_output(rec, false); 2707 if (fd < 0) { 2708 pr_err("Failed to switch to new file\n"); 2709 trigger_error(&switch_output_trigger); 2710 err = fd; 2711 goto out_child; 2712 } 2713 2714 /* re-arm the alarm */ 2715 if (rec->switch_output.time) 2716 alarm(rec->switch_output.time); 2717 } 2718 2719 if (hits == thread->samples) { 2720 if (done || draining) 2721 break; 2722 err = fdarray__poll(&thread->pollfd, -1); 2723 /* 2724 * Propagate error, only if there's any. Ignore positive 2725 * number of returned events and interrupt error. 2726 */ 2727 if (err > 0 || (err < 0 && errno == EINTR)) 2728 err = 0; 2729 thread->waking++; 2730 2731 if (fdarray__filter(&thread->pollfd, POLLERR | POLLHUP, 2732 record__thread_munmap_filtered, NULL) == 0) 2733 draining = true; 2734 2735 err = record__update_evlist_pollfd_from_thread(rec, rec->evlist, thread); 2736 if (err) 2737 goto out_child; 2738 } 2739 2740 if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) { 2741 switch (cmd) { 2742 case EVLIST_CTL_CMD_SNAPSHOT: 2743 hit_auxtrace_snapshot_trigger(rec); 2744 evlist__ctlfd_ack(rec->evlist); 2745 break; 2746 case EVLIST_CTL_CMD_STOP: 2747 done = 1; 2748 break; 2749 case EVLIST_CTL_CMD_ACK: 2750 case EVLIST_CTL_CMD_UNSUPPORTED: 2751 case EVLIST_CTL_CMD_ENABLE: 2752 case EVLIST_CTL_CMD_DISABLE: 2753 case EVLIST_CTL_CMD_EVLIST: 2754 case EVLIST_CTL_CMD_PING: 2755 default: 2756 break; 2757 } 2758 } 2759 2760 err = event_enable_timer__process(rec->evlist->eet); 2761 if (err < 0) 2762 goto out_child; 2763 if (err) { 2764 err = 0; 2765 done = 1; 2766 } 2767 2768 /* 2769 * When perf is starting the traced process, at the end events 2770 * die with the process and we wait for that. Thus no need to 2771 * disable events in this case. 2772 */ 2773 if (done && !disabled && !target__none(&opts->target)) { 2774 trigger_off(&auxtrace_snapshot_trigger); 2775 evlist__disable(rec->evlist); 2776 disabled = true; 2777 } 2778 } 2779 2780 trigger_off(&auxtrace_snapshot_trigger); 2781 trigger_off(&switch_output_trigger); 2782 2783 if (opts->auxtrace_snapshot_on_exit) 2784 record__auxtrace_snapshot_exit(rec); 2785 2786 if (forks && workload_exec_errno) { 2787 char msg[STRERR_BUFSIZE], strevsels[2048]; 2788 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 2789 2790 evlist__scnprintf_evsels(rec->evlist, sizeof(strevsels), strevsels); 2791 2792 pr_err("Failed to collect '%s' for the '%s' workload: %s\n", 2793 strevsels, argv[0], emsg); 2794 err = -1; 2795 goto out_child; 2796 } 2797 2798 if (!quiet) 2799 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", 2800 record__waking(rec)); 2801 2802 write_finished_init(rec, true); 2803 2804 if (target__none(&rec->opts.target)) 2805 record__synthesize_workload(rec, true); 2806 2807 out_child: 2808 record__stop_threads(rec); 2809 record__mmap_read_all(rec, true); 2810 out_free_threads: 2811 record__free_thread_data(rec); 2812 evlist__finalize_ctlfd(rec->evlist); 2813 record__aio_mmap_read_sync(rec); 2814 2815 if (rec->session->bytes_transferred && rec->session->bytes_compressed) { 2816 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed; 2817 session->header.env.comp_ratio = ratio + 0.5; 2818 } 2819 2820 if (forks) { 2821 int exit_status; 2822 2823 if (!child_finished) 2824 kill(rec->evlist->workload.pid, SIGTERM); 2825 2826 wait(&exit_status); 2827 2828 if (err < 0) 2829 status = err; 2830 else if (WIFEXITED(exit_status)) 2831 status = WEXITSTATUS(exit_status); 2832 else if (WIFSIGNALED(exit_status)) 2833 signr = WTERMSIG(exit_status); 2834 } else 2835 status = err; 2836 2837 if (rec->off_cpu) 2838 rec->bytes_written += off_cpu_write(rec->session); 2839 2840 record__read_lost_samples(rec); 2841 record__synthesize(rec, true); 2842 /* this will be recalculated during process_buildids() */ 2843 rec->samples = 0; 2844 2845 if (!err) { 2846 if (!rec->timestamp_filename) { 2847 record__finish_output(rec); 2848 } else { 2849 fd = record__switch_output(rec, true); 2850 if (fd < 0) { 2851 status = fd; 2852 goto out_delete_session; 2853 } 2854 } 2855 } 2856 2857 perf_hooks__invoke_record_end(); 2858 2859 if (!err && !quiet) { 2860 char samples[128]; 2861 const char *postfix = rec->timestamp_filename ? 2862 ".<timestamp>" : ""; 2863 2864 if (rec->samples && !rec->opts.full_auxtrace) 2865 scnprintf(samples, sizeof(samples), 2866 " (%" PRIu64 " samples)", rec->samples); 2867 else 2868 samples[0] = '\0'; 2869 2870 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s", 2871 perf_data__size(data) / 1024.0 / 1024.0, 2872 data->path, postfix, samples); 2873 if (ratio) { 2874 fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)", 2875 rec->session->bytes_transferred / 1024.0 / 1024.0, 2876 ratio); 2877 } 2878 fprintf(stderr, " ]\n"); 2879 } 2880 2881 out_delete_session: 2882 #ifdef HAVE_EVENTFD_SUPPORT 2883 if (done_fd >= 0) { 2884 fd = done_fd; 2885 done_fd = -1; 2886 2887 close(fd); 2888 } 2889 #endif 2890 zstd_fini(&session->zstd_data); 2891 if (!opts->no_bpf_event) 2892 evlist__stop_sb_thread(rec->sb_evlist); 2893 2894 perf_session__delete(session); 2895 return status; 2896 } 2897 2898 static void callchain_debug(struct callchain_param *callchain) 2899 { 2900 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" }; 2901 2902 pr_debug("callchain: type %s\n", str[callchain->record_mode]); 2903 2904 if (callchain->record_mode == CALLCHAIN_DWARF) 2905 pr_debug("callchain: stack dump size %d\n", 2906 callchain->dump_size); 2907 } 2908 2909 int record_opts__parse_callchain(struct record_opts *record, 2910 struct callchain_param *callchain, 2911 const char *arg, bool unset) 2912 { 2913 int ret; 2914 callchain->enabled = !unset; 2915 2916 /* --no-call-graph */ 2917 if (unset) { 2918 callchain->record_mode = CALLCHAIN_NONE; 2919 pr_debug("callchain: disabled\n"); 2920 return 0; 2921 } 2922 2923 ret = parse_callchain_record_opt(arg, callchain); 2924 if (!ret) { 2925 /* Enable data address sampling for DWARF unwind. */ 2926 if (callchain->record_mode == CALLCHAIN_DWARF) 2927 record->sample_address = true; 2928 callchain_debug(callchain); 2929 } 2930 2931 return ret; 2932 } 2933 2934 int record_parse_callchain_opt(const struct option *opt, 2935 const char *arg, 2936 int unset) 2937 { 2938 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset); 2939 } 2940 2941 int record_callchain_opt(const struct option *opt, 2942 const char *arg __maybe_unused, 2943 int unset __maybe_unused) 2944 { 2945 struct callchain_param *callchain = opt->value; 2946 2947 callchain->enabled = true; 2948 2949 if (callchain->record_mode == CALLCHAIN_NONE) 2950 callchain->record_mode = CALLCHAIN_FP; 2951 2952 callchain_debug(callchain); 2953 return 0; 2954 } 2955 2956 static int perf_record_config(const char *var, const char *value, void *cb) 2957 { 2958 struct record *rec = cb; 2959 2960 if (!strcmp(var, "record.build-id")) { 2961 if (!strcmp(value, "cache")) 2962 rec->no_buildid_cache = false; 2963 else if (!strcmp(value, "no-cache")) 2964 rec->no_buildid_cache = true; 2965 else if (!strcmp(value, "skip")) 2966 rec->no_buildid = true; 2967 else if (!strcmp(value, "mmap")) 2968 rec->buildid_mmap = true; 2969 else 2970 return -1; 2971 return 0; 2972 } 2973 if (!strcmp(var, "record.call-graph")) { 2974 var = "call-graph.record-mode"; 2975 return perf_default_config(var, value, cb); 2976 } 2977 #ifdef HAVE_AIO_SUPPORT 2978 if (!strcmp(var, "record.aio")) { 2979 rec->opts.nr_cblocks = strtol(value, NULL, 0); 2980 if (!rec->opts.nr_cblocks) 2981 rec->opts.nr_cblocks = nr_cblocks_default; 2982 } 2983 #endif 2984 if (!strcmp(var, "record.debuginfod")) { 2985 rec->debuginfod.urls = strdup(value); 2986 if (!rec->debuginfod.urls) 2987 return -ENOMEM; 2988 rec->debuginfod.set = true; 2989 } 2990 2991 return 0; 2992 } 2993 2994 static int record__parse_event_enable_time(const struct option *opt, const char *str, int unset) 2995 { 2996 struct record *rec = (struct record *)opt->value; 2997 2998 return evlist__parse_event_enable_time(rec->evlist, &rec->opts, str, unset); 2999 } 3000 3001 static int record__parse_affinity(const struct option *opt, const char *str, int unset) 3002 { 3003 struct record_opts *opts = (struct record_opts *)opt->value; 3004 3005 if (unset || !str) 3006 return 0; 3007 3008 if (!strcasecmp(str, "node")) 3009 opts->affinity = PERF_AFFINITY_NODE; 3010 else if (!strcasecmp(str, "cpu")) 3011 opts->affinity = PERF_AFFINITY_CPU; 3012 3013 return 0; 3014 } 3015 3016 static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask *mask, int nr_bits) 3017 { 3018 mask->nbits = nr_bits; 3019 mask->bits = bitmap_zalloc(mask->nbits); 3020 if (!mask->bits) 3021 return -ENOMEM; 3022 3023 return 0; 3024 } 3025 3026 static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask) 3027 { 3028 bitmap_free(mask->bits); 3029 mask->nbits = 0; 3030 } 3031 3032 static int record__thread_mask_alloc(struct thread_mask *mask, int nr_bits) 3033 { 3034 int ret; 3035 3036 ret = record__mmap_cpu_mask_alloc(&mask->maps, nr_bits); 3037 if (ret) { 3038 mask->affinity.bits = NULL; 3039 return ret; 3040 } 3041 3042 ret = record__mmap_cpu_mask_alloc(&mask->affinity, nr_bits); 3043 if (ret) { 3044 record__mmap_cpu_mask_free(&mask->maps); 3045 mask->maps.bits = NULL; 3046 } 3047 3048 return ret; 3049 } 3050 3051 static void record__thread_mask_free(struct thread_mask *mask) 3052 { 3053 record__mmap_cpu_mask_free(&mask->maps); 3054 record__mmap_cpu_mask_free(&mask->affinity); 3055 } 3056 3057 static int record__parse_threads(const struct option *opt, const char *str, int unset) 3058 { 3059 int s; 3060 struct record_opts *opts = opt->value; 3061 3062 if (unset || !str || !strlen(str)) { 3063 opts->threads_spec = THREAD_SPEC__CPU; 3064 } else { 3065 for (s = 1; s < THREAD_SPEC__MAX; s++) { 3066 if (s == THREAD_SPEC__USER) { 3067 opts->threads_user_spec = strdup(str); 3068 if (!opts->threads_user_spec) 3069 return -ENOMEM; 3070 opts->threads_spec = THREAD_SPEC__USER; 3071 break; 3072 } 3073 if (!strncasecmp(str, thread_spec_tags[s], strlen(thread_spec_tags[s]))) { 3074 opts->threads_spec = s; 3075 break; 3076 } 3077 } 3078 } 3079 3080 if (opts->threads_spec == THREAD_SPEC__USER) 3081 pr_debug("threads_spec: %s\n", opts->threads_user_spec); 3082 else 3083 pr_debug("threads_spec: %s\n", thread_spec_tags[opts->threads_spec]); 3084 3085 return 0; 3086 } 3087 3088 static int parse_output_max_size(const struct option *opt, 3089 const char *str, int unset) 3090 { 3091 unsigned long *s = (unsigned long *)opt->value; 3092 static struct parse_tag tags_size[] = { 3093 { .tag = 'B', .mult = 1 }, 3094 { .tag = 'K', .mult = 1 << 10 }, 3095 { .tag = 'M', .mult = 1 << 20 }, 3096 { .tag = 'G', .mult = 1 << 30 }, 3097 { .tag = 0 }, 3098 }; 3099 unsigned long val; 3100 3101 if (unset) { 3102 *s = 0; 3103 return 0; 3104 } 3105 3106 val = parse_tag_value(str, tags_size); 3107 if (val != (unsigned long) -1) { 3108 *s = val; 3109 return 0; 3110 } 3111 3112 return -1; 3113 } 3114 3115 static int record__parse_mmap_pages(const struct option *opt, 3116 const char *str, 3117 int unset __maybe_unused) 3118 { 3119 struct record_opts *opts = opt->value; 3120 char *s, *p; 3121 unsigned int mmap_pages; 3122 int ret; 3123 3124 if (!str) 3125 return -EINVAL; 3126 3127 s = strdup(str); 3128 if (!s) 3129 return -ENOMEM; 3130 3131 p = strchr(s, ','); 3132 if (p) 3133 *p = '\0'; 3134 3135 if (*s) { 3136 ret = __evlist__parse_mmap_pages(&mmap_pages, s); 3137 if (ret) 3138 goto out_free; 3139 opts->mmap_pages = mmap_pages; 3140 } 3141 3142 if (!p) { 3143 ret = 0; 3144 goto out_free; 3145 } 3146 3147 ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1); 3148 if (ret) 3149 goto out_free; 3150 3151 opts->auxtrace_mmap_pages = mmap_pages; 3152 3153 out_free: 3154 free(s); 3155 return ret; 3156 } 3157 3158 void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused) 3159 { 3160 } 3161 3162 static int parse_control_option(const struct option *opt, 3163 const char *str, 3164 int unset __maybe_unused) 3165 { 3166 struct record_opts *opts = opt->value; 3167 3168 return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close); 3169 } 3170 3171 static void switch_output_size_warn(struct record *rec) 3172 { 3173 u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages); 3174 struct switch_output *s = &rec->switch_output; 3175 3176 wakeup_size /= 2; 3177 3178 if (s->size < wakeup_size) { 3179 char buf[100]; 3180 3181 unit_number__scnprintf(buf, sizeof(buf), wakeup_size); 3182 pr_warning("WARNING: switch-output data size lower than " 3183 "wakeup kernel buffer size (%s) " 3184 "expect bigger perf.data sizes\n", buf); 3185 } 3186 } 3187 3188 static int switch_output_setup(struct record *rec) 3189 { 3190 struct switch_output *s = &rec->switch_output; 3191 static struct parse_tag tags_size[] = { 3192 { .tag = 'B', .mult = 1 }, 3193 { .tag = 'K', .mult = 1 << 10 }, 3194 { .tag = 'M', .mult = 1 << 20 }, 3195 { .tag = 'G', .mult = 1 << 30 }, 3196 { .tag = 0 }, 3197 }; 3198 static struct parse_tag tags_time[] = { 3199 { .tag = 's', .mult = 1 }, 3200 { .tag = 'm', .mult = 60 }, 3201 { .tag = 'h', .mult = 60*60 }, 3202 { .tag = 'd', .mult = 60*60*24 }, 3203 { .tag = 0 }, 3204 }; 3205 unsigned long val; 3206 3207 /* 3208 * If we're using --switch-output-events, then we imply its 3209 * --switch-output=signal, as we'll send a SIGUSR2 from the side band 3210 * thread to its parent. 3211 */ 3212 if (rec->switch_output_event_set) { 3213 if (record__threads_enabled(rec)) { 3214 pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n"); 3215 return 0; 3216 } 3217 goto do_signal; 3218 } 3219 3220 if (!s->set) 3221 return 0; 3222 3223 if (record__threads_enabled(rec)) { 3224 pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n"); 3225 return 0; 3226 } 3227 3228 if (!strcmp(s->str, "signal")) { 3229 do_signal: 3230 s->signal = true; 3231 pr_debug("switch-output with SIGUSR2 signal\n"); 3232 goto enabled; 3233 } 3234 3235 val = parse_tag_value(s->str, tags_size); 3236 if (val != (unsigned long) -1) { 3237 s->size = val; 3238 pr_debug("switch-output with %s size threshold\n", s->str); 3239 goto enabled; 3240 } 3241 3242 val = parse_tag_value(s->str, tags_time); 3243 if (val != (unsigned long) -1) { 3244 s->time = val; 3245 pr_debug("switch-output with %s time threshold (%lu seconds)\n", 3246 s->str, s->time); 3247 goto enabled; 3248 } 3249 3250 return -1; 3251 3252 enabled: 3253 rec->timestamp_filename = true; 3254 s->enabled = true; 3255 3256 if (s->size && !rec->opts.no_buffering) 3257 switch_output_size_warn(rec); 3258 3259 return 0; 3260 } 3261 3262 static const char * const __record_usage[] = { 3263 "perf record [<options>] [<command>]", 3264 "perf record [<options>] -- <command> [<options>]", 3265 NULL 3266 }; 3267 const char * const *record_usage = __record_usage; 3268 3269 static int build_id__process_mmap(const struct perf_tool *tool, union perf_event *event, 3270 struct perf_sample *sample, struct machine *machine) 3271 { 3272 /* 3273 * We already have the kernel maps, put in place via perf_session__create_kernel_maps() 3274 * no need to add them twice. 3275 */ 3276 if (!(event->header.misc & PERF_RECORD_MISC_USER)) 3277 return 0; 3278 return perf_event__process_mmap(tool, event, sample, machine); 3279 } 3280 3281 static int build_id__process_mmap2(const struct perf_tool *tool, union perf_event *event, 3282 struct perf_sample *sample, struct machine *machine) 3283 { 3284 /* 3285 * We already have the kernel maps, put in place via perf_session__create_kernel_maps() 3286 * no need to add them twice. 3287 */ 3288 if (!(event->header.misc & PERF_RECORD_MISC_USER)) 3289 return 0; 3290 3291 return perf_event__process_mmap2(tool, event, sample, machine); 3292 } 3293 3294 static int process_timestamp_boundary(const struct perf_tool *tool, 3295 union perf_event *event __maybe_unused, 3296 struct perf_sample *sample, 3297 struct machine *machine __maybe_unused) 3298 { 3299 struct record *rec = container_of(tool, struct record, tool); 3300 3301 set_timestamp_boundary(rec, sample->time); 3302 return 0; 3303 } 3304 3305 static int parse_record_synth_option(const struct option *opt, 3306 const char *str, 3307 int unset __maybe_unused) 3308 { 3309 struct record_opts *opts = opt->value; 3310 char *p = strdup(str); 3311 3312 if (p == NULL) 3313 return -1; 3314 3315 opts->synth = parse_synth_opt(p); 3316 free(p); 3317 3318 if (opts->synth < 0) { 3319 pr_err("Invalid synth option: %s\n", str); 3320 return -1; 3321 } 3322 return 0; 3323 } 3324 3325 /* 3326 * XXX Ideally would be local to cmd_record() and passed to a record__new 3327 * because we need to have access to it in record__exit, that is called 3328 * after cmd_record() exits, but since record_options need to be accessible to 3329 * builtin-script, leave it here. 3330 * 3331 * At least we don't ouch it in all the other functions here directly. 3332 * 3333 * Just say no to tons of global variables, sigh. 3334 */ 3335 static struct record record = { 3336 .opts = { 3337 .sample_time = true, 3338 .mmap_pages = UINT_MAX, 3339 .user_freq = UINT_MAX, 3340 .user_interval = ULLONG_MAX, 3341 .freq = 4000, 3342 .target = { 3343 .uses_mmap = true, 3344 .default_per_cpu = true, 3345 }, 3346 .mmap_flush = MMAP_FLUSH_DEFAULT, 3347 .nr_threads_synthesize = 1, 3348 .ctl_fd = -1, 3349 .ctl_fd_ack = -1, 3350 .synth = PERF_SYNTH_ALL, 3351 }, 3352 }; 3353 3354 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP 3355 "\n\t\t\t\tDefault: fp"; 3356 3357 static bool dry_run; 3358 3359 static struct parse_events_option_args parse_events_option_args = { 3360 .evlistp = &record.evlist, 3361 }; 3362 3363 static struct parse_events_option_args switch_output_parse_events_option_args = { 3364 .evlistp = &record.sb_evlist, 3365 }; 3366 3367 /* 3368 * XXX Will stay a global variable till we fix builtin-script.c to stop messing 3369 * with it and switch to use the library functions in perf_evlist that came 3370 * from builtin-record.c, i.e. use record_opts, 3371 * evlist__prepare_workload, etc instead of fork+exec'in 'perf record', 3372 * using pipes, etc. 3373 */ 3374 static struct option __record_options[] = { 3375 OPT_CALLBACK('e', "event", &parse_events_option_args, "event", 3376 "event selector. use 'perf list' to list available events", 3377 parse_events_option), 3378 OPT_CALLBACK(0, "filter", &record.evlist, "filter", 3379 "event filter", parse_filter), 3380 OPT_BOOLEAN(0, "latency", &record.latency, 3381 "Enable data collection for latency profiling.\n" 3382 "\t\t\t Use perf report --latency for latency-centric profile."), 3383 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist, 3384 NULL, "don't record events from perf itself", 3385 exclude_perf), 3386 OPT_STRING('p', "pid", &record.opts.target.pid, "pid", 3387 "record events on existing process id"), 3388 OPT_STRING('t', "tid", &record.opts.target.tid, "tid", 3389 "record events on existing thread id"), 3390 OPT_INTEGER('r', "realtime", &record.realtime_prio, 3391 "collect data with this RT SCHED_FIFO priority"), 3392 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering, 3393 "collect data without buffering"), 3394 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples, 3395 "collect raw sample records from all opened counters"), 3396 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide, 3397 "system-wide collection from all CPUs"), 3398 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", 3399 "list of cpus to monitor"), 3400 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), 3401 OPT_STRING('o', "output", &record.data.path, "file", 3402 "output file name"), 3403 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, 3404 &record.opts.no_inherit_set, 3405 "child tasks do not inherit counters"), 3406 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, 3407 "synthesize non-sample events at the end of output"), 3408 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), 3409 OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"), 3410 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq, 3411 "Fail if the specified frequency can't be used"), 3412 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'", 3413 "profile at this frequency", 3414 record__parse_freq), 3415 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]", 3416 "number of mmap data pages and AUX area tracing mmap pages", 3417 record__parse_mmap_pages), 3418 OPT_CALLBACK(0, "mmap-flush", &record.opts, "number", 3419 "Minimal number of bytes that is extracted from mmap data pages (default: 1)", 3420 record__mmap_flush_parse), 3421 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param, 3422 NULL, "enables call-graph recording" , 3423 &record_callchain_opt), 3424 OPT_CALLBACK(0, "call-graph", &record.opts, 3425 "record_mode[,record_size]", record_callchain_help, 3426 &record_parse_callchain_opt), 3427 OPT_INCR('v', "verbose", &verbose, 3428 "be more verbose (show counter open errors, etc)"), 3429 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any warnings or messages"), 3430 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, 3431 "per thread counts"), 3432 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), 3433 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr, 3434 "Record the sample physical addresses"), 3435 OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size, 3436 "Record the sampled data address data page size"), 3437 OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size, 3438 "Record the sampled code address (ip) page size"), 3439 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), 3440 OPT_BOOLEAN(0, "sample-identifier", &record.opts.sample_identifier, 3441 "Record the sample identifier"), 3442 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, 3443 &record.opts.sample_time_set, 3444 "Record the sample timestamps"), 3445 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set, 3446 "Record the sample period"), 3447 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, 3448 "don't sample"), 3449 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache, 3450 &record.no_buildid_cache_set, 3451 "do not update the buildid cache"), 3452 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid, 3453 &record.no_buildid_set, 3454 "do not collect buildids in perf.data"), 3455 OPT_CALLBACK('G', "cgroup", &record.evlist, "name", 3456 "monitor event in cgroup name only", 3457 parse_cgroups), 3458 OPT_CALLBACK('D', "delay", &record, "ms", 3459 "ms to wait before starting measurement after program start (-1: start with events disabled), " 3460 "or ranges of time to enable events e.g. '-D 10-20,30-40'", 3461 record__parse_event_enable_time), 3462 OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"), 3463 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user", 3464 "user to profile"), 3465 3466 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, 3467 "branch any", "sample any taken branches", 3468 parse_branch_stack), 3469 3470 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, 3471 "branch filter mask", "branch stack filter modes", 3472 parse_branch_stack), 3473 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight, 3474 "sample by weight (on special events only)"), 3475 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction, 3476 "sample transaction flags (special events only)"), 3477 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, 3478 "use per-thread mmaps"), 3479 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", 3480 "sample selected machine registers on interrupt," 3481 " use '-I?' to list register names", parse_intr_regs), 3482 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register", 3483 "sample selected machine registers on interrupt," 3484 " use '--user-regs=?' to list register names", parse_user_regs), 3485 OPT_BOOLEAN(0, "running-time", &record.opts.running_time, 3486 "Record running/enabled time of read (:S) events"), 3487 OPT_CALLBACK('k', "clockid", &record.opts, 3488 "clockid", "clockid to use for events, see clock_gettime()", 3489 parse_clockid), 3490 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts, 3491 "opts", "AUX area tracing Snapshot Mode", ""), 3492 OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts, 3493 "opts", "sample AUX area", ""), 3494 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout, 3495 "per thread proc mmap processing timeout in ms"), 3496 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces, 3497 "Record namespaces events"), 3498 OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup, 3499 "Record cgroup events"), 3500 OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events, 3501 &record.opts.record_switch_events_set, 3502 "Record context switch events"), 3503 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, 3504 "Configure all used events to run in kernel space.", 3505 PARSE_OPT_EXCLUSIVE), 3506 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user, 3507 "Configure all used events to run in user space.", 3508 PARSE_OPT_EXCLUSIVE), 3509 OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains, 3510 "collect kernel callchains"), 3511 OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains, 3512 "collect user callchains"), 3513 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, 3514 "file", "vmlinux pathname"), 3515 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all, 3516 "Record build-id of all DSOs regardless of hits"), 3517 OPT_BOOLEAN(0, "buildid-mmap", &record.buildid_mmap, 3518 "Record build-id in map events"), 3519 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, 3520 "append timestamp to output filename"), 3521 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary, 3522 "Record timestamp boundary (time of first/last samples)"), 3523 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, 3524 &record.switch_output.set, "signal or size[BKMG] or time[smhd]", 3525 "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold", 3526 "signal"), 3527 OPT_CALLBACK_SET(0, "switch-output-event", &switch_output_parse_events_option_args, 3528 &record.switch_output_event_set, "switch output event", 3529 "switch output event selector. use 'perf list' to list available events", 3530 parse_events_option_new_evlist), 3531 OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files, 3532 "Limit number of switch output generated files"), 3533 OPT_BOOLEAN(0, "dry-run", &dry_run, 3534 "Parse options then exit"), 3535 #ifdef HAVE_AIO_SUPPORT 3536 OPT_CALLBACK_OPTARG(0, "aio", &record.opts, 3537 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)", 3538 record__aio_parse), 3539 #endif 3540 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu", 3541 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer", 3542 record__parse_affinity), 3543 #ifdef HAVE_ZSTD_SUPPORT 3544 OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, "n", 3545 "Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)", 3546 record__parse_comp_level), 3547 #endif 3548 OPT_CALLBACK(0, "max-size", &record.output_max_size, 3549 "size", "Limit the maximum size of the output file", parse_output_max_size), 3550 OPT_UINTEGER(0, "num-thread-synthesize", 3551 &record.opts.nr_threads_synthesize, 3552 "number of threads to run for event synthesis"), 3553 #ifdef HAVE_LIBPFM 3554 OPT_CALLBACK(0, "pfm-events", &record.evlist, "event", 3555 "libpfm4 event selector. use 'perf list' to list available events", 3556 parse_libpfm_events_option), 3557 #endif 3558 OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]", 3559 "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n" 3560 "\t\t\t 'snapshot': AUX area tracing snapshot).\n" 3561 "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n" 3562 "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.", 3563 parse_control_option), 3564 OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup", 3565 "Fine-tune event synthesis: default=all", parse_record_synth_option), 3566 OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls, 3567 &record.debuginfod.set, "debuginfod urls", 3568 "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls", 3569 "system"), 3570 OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec", 3571 "write collected trace data into several data files using parallel threads", 3572 record__parse_threads), 3573 OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"), 3574 OPT_STRING(0, "setup-filter", &record.filter_action, "pin|unpin", 3575 "BPF filter action"), 3576 OPT_END() 3577 }; 3578 3579 struct option *record_options = __record_options; 3580 3581 static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus) 3582 { 3583 struct perf_cpu cpu; 3584 int idx; 3585 3586 if (cpu_map__is_dummy(cpus)) 3587 return 0; 3588 3589 perf_cpu_map__for_each_cpu_skip_any(cpu, idx, cpus) { 3590 /* Return ENODEV is input cpu is greater than max cpu */ 3591 if ((unsigned long)cpu.cpu > mask->nbits) 3592 return -ENODEV; 3593 __set_bit(cpu.cpu, mask->bits); 3594 } 3595 3596 return 0; 3597 } 3598 3599 static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec) 3600 { 3601 struct perf_cpu_map *cpus; 3602 3603 cpus = perf_cpu_map__new(mask_spec); 3604 if (!cpus) 3605 return -ENOMEM; 3606 3607 bitmap_zero(mask->bits, mask->nbits); 3608 if (record__mmap_cpu_mask_init(mask, cpus)) 3609 return -ENODEV; 3610 3611 perf_cpu_map__put(cpus); 3612 3613 return 0; 3614 } 3615 3616 static void record__free_thread_masks(struct record *rec, int nr_threads) 3617 { 3618 int t; 3619 3620 if (rec->thread_masks) 3621 for (t = 0; t < nr_threads; t++) 3622 record__thread_mask_free(&rec->thread_masks[t]); 3623 3624 zfree(&rec->thread_masks); 3625 } 3626 3627 static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits) 3628 { 3629 int t, ret; 3630 3631 rec->thread_masks = zalloc(nr_threads * sizeof(*(rec->thread_masks))); 3632 if (!rec->thread_masks) { 3633 pr_err("Failed to allocate thread masks\n"); 3634 return -ENOMEM; 3635 } 3636 3637 for (t = 0; t < nr_threads; t++) { 3638 ret = record__thread_mask_alloc(&rec->thread_masks[t], nr_bits); 3639 if (ret) { 3640 pr_err("Failed to allocate thread masks[%d]\n", t); 3641 goto out_free; 3642 } 3643 } 3644 3645 return 0; 3646 3647 out_free: 3648 record__free_thread_masks(rec, nr_threads); 3649 3650 return ret; 3651 } 3652 3653 static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map *cpus) 3654 { 3655 int t, ret, nr_cpus = perf_cpu_map__nr(cpus); 3656 3657 ret = record__alloc_thread_masks(rec, nr_cpus, cpu__max_cpu().cpu); 3658 if (ret) 3659 return ret; 3660 3661 rec->nr_threads = nr_cpus; 3662 pr_debug("nr_threads: %d\n", rec->nr_threads); 3663 3664 for (t = 0; t < rec->nr_threads; t++) { 3665 __set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits); 3666 __set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits); 3667 if (verbose > 0) { 3668 pr_debug("thread_masks[%d]: ", t); 3669 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps"); 3670 pr_debug("thread_masks[%d]: ", t); 3671 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity"); 3672 } 3673 } 3674 3675 return 0; 3676 } 3677 3678 static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus, 3679 const char **maps_spec, const char **affinity_spec, 3680 u32 nr_spec) 3681 { 3682 u32 s; 3683 int ret = 0, t = 0; 3684 struct mmap_cpu_mask cpus_mask; 3685 struct thread_mask thread_mask, full_mask, *thread_masks; 3686 3687 ret = record__mmap_cpu_mask_alloc(&cpus_mask, cpu__max_cpu().cpu); 3688 if (ret) { 3689 pr_err("Failed to allocate CPUs mask\n"); 3690 return ret; 3691 } 3692 3693 ret = record__mmap_cpu_mask_init(&cpus_mask, cpus); 3694 if (ret) { 3695 pr_err("Failed to init cpu mask\n"); 3696 goto out_free_cpu_mask; 3697 } 3698 3699 ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu); 3700 if (ret) { 3701 pr_err("Failed to allocate full mask\n"); 3702 goto out_free_cpu_mask; 3703 } 3704 3705 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu); 3706 if (ret) { 3707 pr_err("Failed to allocate thread mask\n"); 3708 goto out_free_full_and_cpu_masks; 3709 } 3710 3711 for (s = 0; s < nr_spec; s++) { 3712 ret = record__mmap_cpu_mask_init_spec(&thread_mask.maps, maps_spec[s]); 3713 if (ret) { 3714 pr_err("Failed to initialize maps thread mask\n"); 3715 goto out_free; 3716 } 3717 ret = record__mmap_cpu_mask_init_spec(&thread_mask.affinity, affinity_spec[s]); 3718 if (ret) { 3719 pr_err("Failed to initialize affinity thread mask\n"); 3720 goto out_free; 3721 } 3722 3723 /* ignore invalid CPUs but do not allow empty masks */ 3724 if (!bitmap_and(thread_mask.maps.bits, thread_mask.maps.bits, 3725 cpus_mask.bits, thread_mask.maps.nbits)) { 3726 pr_err("Empty maps mask: %s\n", maps_spec[s]); 3727 ret = -EINVAL; 3728 goto out_free; 3729 } 3730 if (!bitmap_and(thread_mask.affinity.bits, thread_mask.affinity.bits, 3731 cpus_mask.bits, thread_mask.affinity.nbits)) { 3732 pr_err("Empty affinity mask: %s\n", affinity_spec[s]); 3733 ret = -EINVAL; 3734 goto out_free; 3735 } 3736 3737 /* do not allow intersection with other masks (full_mask) */ 3738 if (bitmap_intersects(thread_mask.maps.bits, full_mask.maps.bits, 3739 thread_mask.maps.nbits)) { 3740 pr_err("Intersecting maps mask: %s\n", maps_spec[s]); 3741 ret = -EINVAL; 3742 goto out_free; 3743 } 3744 if (bitmap_intersects(thread_mask.affinity.bits, full_mask.affinity.bits, 3745 thread_mask.affinity.nbits)) { 3746 pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]); 3747 ret = -EINVAL; 3748 goto out_free; 3749 } 3750 3751 bitmap_or(full_mask.maps.bits, full_mask.maps.bits, 3752 thread_mask.maps.bits, full_mask.maps.nbits); 3753 bitmap_or(full_mask.affinity.bits, full_mask.affinity.bits, 3754 thread_mask.affinity.bits, full_mask.maps.nbits); 3755 3756 thread_masks = realloc(rec->thread_masks, (t + 1) * sizeof(struct thread_mask)); 3757 if (!thread_masks) { 3758 pr_err("Failed to reallocate thread masks\n"); 3759 ret = -ENOMEM; 3760 goto out_free; 3761 } 3762 rec->thread_masks = thread_masks; 3763 rec->thread_masks[t] = thread_mask; 3764 if (verbose > 0) { 3765 pr_debug("thread_masks[%d]: ", t); 3766 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps"); 3767 pr_debug("thread_masks[%d]: ", t); 3768 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity"); 3769 } 3770 t++; 3771 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu); 3772 if (ret) { 3773 pr_err("Failed to allocate thread mask\n"); 3774 goto out_free_full_and_cpu_masks; 3775 } 3776 } 3777 rec->nr_threads = t; 3778 pr_debug("nr_threads: %d\n", rec->nr_threads); 3779 if (!rec->nr_threads) 3780 ret = -EINVAL; 3781 3782 out_free: 3783 record__thread_mask_free(&thread_mask); 3784 out_free_full_and_cpu_masks: 3785 record__thread_mask_free(&full_mask); 3786 out_free_cpu_mask: 3787 record__mmap_cpu_mask_free(&cpus_mask); 3788 3789 return ret; 3790 } 3791 3792 static int record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus) 3793 { 3794 int ret; 3795 struct cpu_topology *topo; 3796 3797 topo = cpu_topology__new(); 3798 if (!topo) { 3799 pr_err("Failed to allocate CPU topology\n"); 3800 return -ENOMEM; 3801 } 3802 3803 ret = record__init_thread_masks_spec(rec, cpus, topo->core_cpus_list, 3804 topo->core_cpus_list, topo->core_cpus_lists); 3805 cpu_topology__delete(topo); 3806 3807 return ret; 3808 } 3809 3810 static int record__init_thread_package_masks(struct record *rec, struct perf_cpu_map *cpus) 3811 { 3812 int ret; 3813 struct cpu_topology *topo; 3814 3815 topo = cpu_topology__new(); 3816 if (!topo) { 3817 pr_err("Failed to allocate CPU topology\n"); 3818 return -ENOMEM; 3819 } 3820 3821 ret = record__init_thread_masks_spec(rec, cpus, topo->package_cpus_list, 3822 topo->package_cpus_list, topo->package_cpus_lists); 3823 cpu_topology__delete(topo); 3824 3825 return ret; 3826 } 3827 3828 static int record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus) 3829 { 3830 u32 s; 3831 int ret; 3832 const char **spec; 3833 struct numa_topology *topo; 3834 3835 topo = numa_topology__new(); 3836 if (!topo) { 3837 pr_err("Failed to allocate NUMA topology\n"); 3838 return -ENOMEM; 3839 } 3840 3841 spec = zalloc(topo->nr * sizeof(char *)); 3842 if (!spec) { 3843 pr_err("Failed to allocate NUMA spec\n"); 3844 ret = -ENOMEM; 3845 goto out_delete_topo; 3846 } 3847 for (s = 0; s < topo->nr; s++) 3848 spec[s] = topo->nodes[s].cpus; 3849 3850 ret = record__init_thread_masks_spec(rec, cpus, spec, spec, topo->nr); 3851 3852 zfree(&spec); 3853 3854 out_delete_topo: 3855 numa_topology__delete(topo); 3856 3857 return ret; 3858 } 3859 3860 static int record__init_thread_user_masks(struct record *rec, struct perf_cpu_map *cpus) 3861 { 3862 int t, ret; 3863 u32 s, nr_spec = 0; 3864 char **maps_spec = NULL, **affinity_spec = NULL, **tmp_spec; 3865 char *user_spec, *spec, *spec_ptr, *mask, *mask_ptr, *dup_mask = NULL; 3866 3867 for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) { 3868 spec = strtok_r(user_spec, ":", &spec_ptr); 3869 if (spec == NULL) 3870 break; 3871 pr_debug2("threads_spec[%d]: %s\n", t, spec); 3872 mask = strtok_r(spec, "/", &mask_ptr); 3873 if (mask == NULL) 3874 break; 3875 pr_debug2(" maps mask: %s\n", mask); 3876 tmp_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *)); 3877 if (!tmp_spec) { 3878 pr_err("Failed to reallocate maps spec\n"); 3879 ret = -ENOMEM; 3880 goto out_free; 3881 } 3882 maps_spec = tmp_spec; 3883 maps_spec[nr_spec] = dup_mask = strdup(mask); 3884 if (!maps_spec[nr_spec]) { 3885 pr_err("Failed to allocate maps spec[%d]\n", nr_spec); 3886 ret = -ENOMEM; 3887 goto out_free; 3888 } 3889 mask = strtok_r(NULL, "/", &mask_ptr); 3890 if (mask == NULL) { 3891 pr_err("Invalid thread maps or affinity specs\n"); 3892 ret = -EINVAL; 3893 goto out_free; 3894 } 3895 pr_debug2(" affinity mask: %s\n", mask); 3896 tmp_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *)); 3897 if (!tmp_spec) { 3898 pr_err("Failed to reallocate affinity spec\n"); 3899 ret = -ENOMEM; 3900 goto out_free; 3901 } 3902 affinity_spec = tmp_spec; 3903 affinity_spec[nr_spec] = strdup(mask); 3904 if (!affinity_spec[nr_spec]) { 3905 pr_err("Failed to allocate affinity spec[%d]\n", nr_spec); 3906 ret = -ENOMEM; 3907 goto out_free; 3908 } 3909 dup_mask = NULL; 3910 nr_spec++; 3911 } 3912 3913 ret = record__init_thread_masks_spec(rec, cpus, (const char **)maps_spec, 3914 (const char **)affinity_spec, nr_spec); 3915 3916 out_free: 3917 free(dup_mask); 3918 for (s = 0; s < nr_spec; s++) { 3919 if (maps_spec) 3920 free(maps_spec[s]); 3921 if (affinity_spec) 3922 free(affinity_spec[s]); 3923 } 3924 free(affinity_spec); 3925 free(maps_spec); 3926 3927 return ret; 3928 } 3929 3930 static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus) 3931 { 3932 int ret; 3933 3934 ret = record__alloc_thread_masks(rec, 1, cpu__max_cpu().cpu); 3935 if (ret) 3936 return ret; 3937 3938 if (record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus)) 3939 return -ENODEV; 3940 3941 rec->nr_threads = 1; 3942 3943 return 0; 3944 } 3945 3946 static int record__init_thread_masks(struct record *rec) 3947 { 3948 int ret = 0; 3949 struct perf_cpu_map *cpus = rec->evlist->core.all_cpus; 3950 3951 if (!record__threads_enabled(rec)) 3952 return record__init_thread_default_masks(rec, cpus); 3953 3954 if (evlist__per_thread(rec->evlist)) { 3955 pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n"); 3956 return -EINVAL; 3957 } 3958 3959 switch (rec->opts.threads_spec) { 3960 case THREAD_SPEC__CPU: 3961 ret = record__init_thread_cpu_masks(rec, cpus); 3962 break; 3963 case THREAD_SPEC__CORE: 3964 ret = record__init_thread_core_masks(rec, cpus); 3965 break; 3966 case THREAD_SPEC__PACKAGE: 3967 ret = record__init_thread_package_masks(rec, cpus); 3968 break; 3969 case THREAD_SPEC__NUMA: 3970 ret = record__init_thread_numa_masks(rec, cpus); 3971 break; 3972 case THREAD_SPEC__USER: 3973 ret = record__init_thread_user_masks(rec, cpus); 3974 break; 3975 default: 3976 break; 3977 } 3978 3979 return ret; 3980 } 3981 3982 int cmd_record(int argc, const char **argv) 3983 { 3984 int err; 3985 struct record *rec = &record; 3986 char errbuf[BUFSIZ]; 3987 3988 setlocale(LC_ALL, ""); 3989 3990 #ifndef HAVE_BPF_SKEL 3991 # define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c) 3992 set_nobuild('\0', "off-cpu", "no BUILD_BPF_SKEL=1", true); 3993 # undef set_nobuild 3994 #endif 3995 3996 /* Disable eager loading of kernel symbols that adds overhead to perf record. */ 3997 symbol_conf.lazy_load_kernel_maps = true; 3998 rec->opts.affinity = PERF_AFFINITY_SYS; 3999 4000 rec->evlist = evlist__new(); 4001 if (rec->evlist == NULL) 4002 return -ENOMEM; 4003 4004 err = perf_config(perf_record_config, rec); 4005 if (err) 4006 return err; 4007 4008 argc = parse_options(argc, argv, record_options, record_usage, 4009 PARSE_OPT_STOP_AT_NON_OPTION); 4010 if (quiet) 4011 perf_quiet_option(); 4012 4013 err = symbol__validate_sym_arguments(); 4014 if (err) 4015 return err; 4016 4017 perf_debuginfod_setup(&record.debuginfod); 4018 4019 /* Make system wide (-a) the default target. */ 4020 if (!argc && target__none(&rec->opts.target)) 4021 rec->opts.target.system_wide = true; 4022 4023 if (nr_cgroups && !rec->opts.target.system_wide) { 4024 usage_with_options_msg(record_usage, record_options, 4025 "cgroup monitoring only available in system-wide mode"); 4026 4027 } 4028 4029 if (record.latency) { 4030 /* 4031 * There is no fundamental reason why latency profiling 4032 * can't work for system-wide mode, but exact semantics 4033 * and details are to be defined. 4034 * See the following thread for details: 4035 * https://lore.kernel.org/all/Z4XDJyvjiie3howF@google.com/ 4036 */ 4037 if (record.opts.target.system_wide) { 4038 pr_err("Failed: latency profiling is not supported with system-wide collection.\n"); 4039 err = -EINVAL; 4040 goto out_opts; 4041 } 4042 record.opts.record_switch_events = true; 4043 } 4044 4045 if (rec->buildid_mmap) { 4046 if (!perf_can_record_build_id()) { 4047 pr_err("Failed: no support to record build id in mmap events, update your kernel.\n"); 4048 err = -EINVAL; 4049 goto out_opts; 4050 } 4051 pr_debug("Enabling build id in mmap2 events.\n"); 4052 /* Enable mmap build id synthesizing. */ 4053 symbol_conf.buildid_mmap2 = true; 4054 /* Enable perf_event_attr::build_id bit. */ 4055 rec->opts.build_id = true; 4056 /* Disable build id cache. */ 4057 rec->no_buildid = true; 4058 } 4059 4060 if (rec->opts.record_cgroup && !perf_can_record_cgroup()) { 4061 pr_err("Kernel has no cgroup sampling support.\n"); 4062 err = -EINVAL; 4063 goto out_opts; 4064 } 4065 4066 if (rec->opts.kcore) 4067 rec->opts.text_poke = true; 4068 4069 if (rec->opts.kcore || record__threads_enabled(rec)) 4070 rec->data.is_dir = true; 4071 4072 if (record__threads_enabled(rec)) { 4073 if (rec->opts.affinity != PERF_AFFINITY_SYS) { 4074 pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n"); 4075 goto out_opts; 4076 } 4077 if (record__aio_enabled(rec)) { 4078 pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n"); 4079 goto out_opts; 4080 } 4081 } 4082 4083 if (rec->opts.comp_level != 0) { 4084 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n"); 4085 rec->no_buildid = true; 4086 } 4087 4088 if (rec->opts.record_switch_events && 4089 !perf_can_record_switch_events()) { 4090 ui__error("kernel does not support recording context switch events\n"); 4091 parse_options_usage(record_usage, record_options, "switch-events", 0); 4092 err = -EINVAL; 4093 goto out_opts; 4094 } 4095 4096 if (switch_output_setup(rec)) { 4097 parse_options_usage(record_usage, record_options, "switch-output", 0); 4098 err = -EINVAL; 4099 goto out_opts; 4100 } 4101 4102 if (rec->switch_output.time) { 4103 signal(SIGALRM, alarm_sig_handler); 4104 alarm(rec->switch_output.time); 4105 } 4106 4107 if (rec->switch_output.num_files) { 4108 rec->switch_output.filenames = calloc(rec->switch_output.num_files, 4109 sizeof(char *)); 4110 if (!rec->switch_output.filenames) { 4111 err = -EINVAL; 4112 goto out_opts; 4113 } 4114 } 4115 4116 if (rec->timestamp_filename && record__threads_enabled(rec)) { 4117 rec->timestamp_filename = false; 4118 pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n"); 4119 } 4120 4121 if (rec->filter_action) { 4122 if (!strcmp(rec->filter_action, "pin")) 4123 err = perf_bpf_filter__pin(); 4124 else if (!strcmp(rec->filter_action, "unpin")) 4125 err = perf_bpf_filter__unpin(); 4126 else { 4127 pr_warning("Unknown BPF filter action: %s\n", rec->filter_action); 4128 err = -EINVAL; 4129 } 4130 goto out_opts; 4131 } 4132 4133 /* 4134 * Allow aliases to facilitate the lookup of symbols for address 4135 * filters. Refer to auxtrace_parse_filters(). 4136 */ 4137 symbol_conf.allow_aliases = true; 4138 4139 symbol__init(NULL); 4140 4141 err = record__auxtrace_init(rec); 4142 if (err) 4143 goto out; 4144 4145 if (dry_run) 4146 goto out; 4147 4148 err = -ENOMEM; 4149 4150 if (rec->no_buildid_cache || rec->no_buildid) { 4151 disable_buildid_cache(); 4152 } else if (rec->switch_output.enabled) { 4153 /* 4154 * In 'perf record --switch-output', disable buildid 4155 * generation by default to reduce data file switching 4156 * overhead. Still generate buildid if they are required 4157 * explicitly using 4158 * 4159 * perf record --switch-output --no-no-buildid \ 4160 * --no-no-buildid-cache 4161 * 4162 * Following code equals to: 4163 * 4164 * if ((rec->no_buildid || !rec->no_buildid_set) && 4165 * (rec->no_buildid_cache || !rec->no_buildid_cache_set)) 4166 * disable_buildid_cache(); 4167 */ 4168 bool disable = true; 4169 4170 if (rec->no_buildid_set && !rec->no_buildid) 4171 disable = false; 4172 if (rec->no_buildid_cache_set && !rec->no_buildid_cache) 4173 disable = false; 4174 if (disable) { 4175 rec->no_buildid = true; 4176 rec->no_buildid_cache = true; 4177 disable_buildid_cache(); 4178 } 4179 } 4180 4181 if (record.opts.overwrite) 4182 record.opts.tail_synthesize = true; 4183 4184 if (rec->evlist->core.nr_entries == 0) { 4185 err = parse_event(rec->evlist, "cycles:P"); 4186 if (err) 4187 goto out; 4188 } 4189 4190 if (rec->opts.target.tid && !rec->opts.no_inherit_set) 4191 rec->opts.no_inherit = true; 4192 4193 err = target__validate(&rec->opts.target); 4194 if (err) { 4195 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 4196 ui__warning("%s\n", errbuf); 4197 } 4198 4199 err = target__parse_uid(&rec->opts.target); 4200 if (err) { 4201 int saved_errno = errno; 4202 4203 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 4204 ui__error("%s", errbuf); 4205 4206 err = -saved_errno; 4207 goto out; 4208 } 4209 4210 /* Enable ignoring missing threads when -u/-p option is defined. */ 4211 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid; 4212 4213 evlist__warn_user_requested_cpus(rec->evlist, rec->opts.target.cpu_list); 4214 4215 if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP) 4216 arch__add_leaf_frame_record_opts(&rec->opts); 4217 4218 err = -ENOMEM; 4219 if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) { 4220 if (rec->opts.target.pid != NULL) { 4221 pr_err("Couldn't create thread/CPU maps: %s\n", 4222 errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf))); 4223 goto out; 4224 } 4225 else 4226 usage_with_options(record_usage, record_options); 4227 } 4228 4229 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts); 4230 if (err) 4231 goto out; 4232 4233 /* 4234 * We take all buildids when the file contains 4235 * AUX area tracing data because we do not decode the 4236 * trace because it would take too long. 4237 */ 4238 if (rec->opts.full_auxtrace) 4239 rec->buildid_all = true; 4240 4241 if (rec->opts.text_poke) { 4242 err = record__config_text_poke(rec->evlist); 4243 if (err) { 4244 pr_err("record__config_text_poke failed, error %d\n", err); 4245 goto out; 4246 } 4247 } 4248 4249 if (rec->off_cpu) { 4250 err = record__config_off_cpu(rec); 4251 if (err) { 4252 pr_err("record__config_off_cpu failed, error %d\n", err); 4253 goto out; 4254 } 4255 } 4256 4257 if (record_opts__config(&rec->opts)) { 4258 err = -EINVAL; 4259 goto out; 4260 } 4261 4262 err = record__config_tracking_events(rec); 4263 if (err) { 4264 pr_err("record__config_tracking_events failed, error %d\n", err); 4265 goto out; 4266 } 4267 4268 err = record__init_thread_masks(rec); 4269 if (err) { 4270 pr_err("Failed to initialize parallel data streaming masks\n"); 4271 goto out; 4272 } 4273 4274 if (rec->opts.nr_cblocks > nr_cblocks_max) 4275 rec->opts.nr_cblocks = nr_cblocks_max; 4276 pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks); 4277 4278 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]); 4279 pr_debug("mmap flush: %d\n", rec->opts.mmap_flush); 4280 4281 if (rec->opts.comp_level > comp_level_max) 4282 rec->opts.comp_level = comp_level_max; 4283 pr_debug("comp level: %d\n", rec->opts.comp_level); 4284 4285 err = __cmd_record(&record, argc, argv); 4286 out: 4287 record__free_thread_masks(rec, rec->nr_threads); 4288 rec->nr_threads = 0; 4289 symbol__exit(); 4290 auxtrace_record__free(rec->itr); 4291 out_opts: 4292 evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close); 4293 evlist__delete(rec->evlist); 4294 return err; 4295 } 4296 4297 static void snapshot_sig_handler(int sig __maybe_unused) 4298 { 4299 struct record *rec = &record; 4300 4301 hit_auxtrace_snapshot_trigger(rec); 4302 4303 if (switch_output_signal(rec)) 4304 trigger_hit(&switch_output_trigger); 4305 } 4306 4307 static void alarm_sig_handler(int sig __maybe_unused) 4308 { 4309 struct record *rec = &record; 4310 4311 if (switch_output_time(rec)) 4312 trigger_hit(&switch_output_trigger); 4313 } 4314