1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
4 */
5
6 #define _GNU_SOURCE
7 #ifdef HAVE_LIBCPUPOWER_SUPPORT
8 #include <cpuidle.h>
9 #endif /* HAVE_LIBCPUPOWER_SUPPORT */
10 #include <dirent.h>
11 #include <stdarg.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <unistd.h>
15 #include <ctype.h>
16 #include <errno.h>
17 #include <fcntl.h>
18 #include <sched.h>
19 #include <stdio.h>
20 #include <limits.h>
21
22 #include "common.h"
23
24 #define MAX_MSG_LENGTH 1024
25 int config_debug;
26
27 /*
28 * err_msg - print an error message to the stderr
29 */
err_msg(const char * fmt,...)30 void err_msg(const char *fmt, ...)
31 {
32 char message[MAX_MSG_LENGTH];
33 va_list ap;
34
35 va_start(ap, fmt);
36 vsnprintf(message, sizeof(message), fmt, ap);
37 va_end(ap);
38
39 fprintf(stderr, "%s", message);
40 }
41
42 /*
43 * debug_msg - print a debug message to stderr if debug is set
44 */
debug_msg(const char * fmt,...)45 void debug_msg(const char *fmt, ...)
46 {
47 char message[MAX_MSG_LENGTH];
48 va_list ap;
49
50 if (!config_debug)
51 return;
52
53 va_start(ap, fmt);
54 vsnprintf(message, sizeof(message), fmt, ap);
55 va_end(ap);
56
57 fprintf(stderr, "%s", message);
58 }
59
60 /*
61 * fatal - print an error message and EOL to stderr and exit with ERROR
62 */
fatal(const char * fmt,...)63 void fatal(const char *fmt, ...)
64 {
65 va_list ap;
66
67 va_start(ap, fmt);
68 vfprintf(stderr, fmt, ap);
69 va_end(ap);
70 fprintf(stderr, "\n");
71
72 exit(ERROR);
73 }
74
75 /*
76 * get_llong_from_str - get a long long int from a string
77 */
get_llong_from_str(char * start)78 long long get_llong_from_str(char *start)
79 {
80 long long value;
81 char *end;
82
83 errno = 0;
84 value = strtoll(start, &end, 10);
85 if (errno || start == end)
86 return -1;
87
88 return value;
89 }
90
91 /*
92 * get_duration - fill output with a human readable duration since start_time
93 */
get_duration(time_t start_time,char * output,int output_size)94 void get_duration(time_t start_time, char *output, int output_size)
95 {
96 time_t now = time(NULL);
97 struct tm *tm_info;
98 time_t duration;
99
100 duration = difftime(now, start_time);
101 tm_info = gmtime(&duration);
102
103 snprintf(output, output_size, "%3d %02d:%02d:%02d",
104 tm_info->tm_yday,
105 tm_info->tm_hour,
106 tm_info->tm_min,
107 tm_info->tm_sec);
108 }
109
110 /*
111 * parse_cpu_set - parse a cpu_list filling cpu_set_t argument
112 *
113 * Receives a cpu list, like 1-3,5 (cpus 1, 2, 3, 5), and then set
114 * filling cpu_set_t argument.
115 *
116 * Returns 0 on success, 1 otherwise.
117 */
parse_cpu_set(char * cpu_list,cpu_set_t * set)118 int parse_cpu_set(char *cpu_list, cpu_set_t *set)
119 {
120 const char *p;
121 int end_cpu;
122 int cpu;
123 int i;
124
125 CPU_ZERO(set);
126
127 for (p = cpu_list; *p; ) {
128 cpu = atoi(p);
129 if (cpu < 0 || (!cpu && *p != '0') || cpu >= nr_cpus)
130 goto err;
131
132 while (isdigit(*p))
133 p++;
134 if (*p == '-') {
135 p++;
136 end_cpu = atoi(p);
137 if (end_cpu < cpu || (!end_cpu && *p != '0') || end_cpu >= nr_cpus)
138 goto err;
139 while (isdigit(*p))
140 p++;
141 } else
142 end_cpu = cpu;
143
144 if (cpu == end_cpu) {
145 debug_msg("cpu_set: adding cpu %d\n", cpu);
146 CPU_SET(cpu, set);
147 } else {
148 for (i = cpu; i <= end_cpu; i++) {
149 debug_msg("cpu_set: adding cpu %d\n", i);
150 CPU_SET(i, set);
151 }
152 }
153
154 if (*p == ',')
155 p++;
156 }
157
158 return 0;
159 err:
160 debug_msg("Error parsing the cpu set %s\n", cpu_list);
161 return 1;
162 }
163
164 /*
165 * parse_stack_format - parse the stack format
166 *
167 * Return: the stack format on success, -1 otherwise.
168 */
parse_stack_format(char * arg)169 int parse_stack_format(char *arg)
170 {
171 if (!strcmp(arg, "truncate"))
172 return STACK_FORMAT_TRUNCATE;
173 if (!strcmp(arg, "skip"))
174 return STACK_FORMAT_SKIP;
175 if (!strcmp(arg, "full"))
176 return STACK_FORMAT_FULL;
177
178 debug_msg("Error parsing the stack format %s\n", arg);
179 return -1;
180 }
181
182 /*
183 * parse_duration - parse duration with s/m/h/d suffix converting it to seconds
184 */
parse_seconds_duration(char * val)185 long parse_seconds_duration(char *val)
186 {
187 char *end;
188 long t;
189
190 t = strtol(val, &end, 10);
191
192 if (end) {
193 switch (*end) {
194 case 's':
195 case 'S':
196 break;
197 case 'm':
198 case 'M':
199 t *= 60;
200 break;
201 case 'h':
202 case 'H':
203 t *= 60 * 60;
204 break;
205
206 case 'd':
207 case 'D':
208 t *= 24 * 60 * 60;
209 break;
210 }
211 }
212
213 return t;
214 }
215
216 /*
217 * match_time_unit - check if str starts with unit followed by end-of-string or ':'
218 *
219 * This allows the time unit parser to work both in standalone duration strings
220 * like "100ms" and in colon-delimited SCHED_DEADLINE specifications like
221 * "d:10ms:100ms", while still rejecting malformed input like "100msx".
222 */
match_time_unit(const char * str,const char * unit)223 static bool match_time_unit(const char *str, const char *unit)
224 {
225 size_t len = strlen(unit);
226
227 return strncmp(str, unit, len) == 0 &&
228 (str[len] == '\0' || str[len] == ':');
229 }
230
231 /*
232 * parse_ns_duration - parse duration with ns/us/ms/s converting it to nanoseconds
233 */
parse_ns_duration(char * val)234 long parse_ns_duration(char *val)
235 {
236 char *end;
237 long t;
238
239 t = strtol(val, &end, 10);
240
241 if (end) {
242 if (match_time_unit(end, "ns")) {
243 return t;
244 } else if (match_time_unit(end, "us")) {
245 t *= 1000;
246 return t;
247 } else if (match_time_unit(end, "ms")) {
248 t *= 1000 * 1000;
249 return t;
250 } else if (match_time_unit(end, "s")) {
251 t *= 1000 * 1000 * 1000;
252 return t;
253 }
254 return -1;
255 }
256
257 return t;
258 }
259
260 /*
261 * This is a set of helper functions to use SCHED_DEADLINE.
262 */
263 #ifndef __NR_sched_setattr
264 # ifdef __x86_64__
265 # define __NR_sched_setattr 314
266 # elif __i386__
267 # define __NR_sched_setattr 351
268 # elif __arm__
269 # define __NR_sched_setattr 380
270 # elif __aarch64__ || __riscv
271 # define __NR_sched_setattr 274
272 # elif __powerpc__
273 # define __NR_sched_setattr 355
274 # elif __s390x__
275 # define __NR_sched_setattr 345
276 # elif __loongarch__
277 # define __NR_sched_setattr 274
278 # endif
279 #endif
280
281 #define SCHED_DEADLINE 6
282
syscall_sched_setattr(pid_t pid,const struct sched_attr * attr,unsigned int flags)283 static inline int syscall_sched_setattr(pid_t pid, const struct sched_attr *attr,
284 unsigned int flags) {
285 return syscall(__NR_sched_setattr, pid, attr, flags);
286 }
287
__set_sched_attr(int pid,struct sched_attr * attr)288 int __set_sched_attr(int pid, struct sched_attr *attr)
289 {
290 int flags = 0;
291 int retval;
292
293 retval = syscall_sched_setattr(pid, attr, flags);
294 if (retval < 0) {
295 err_msg("Failed to set sched attributes to the pid %d: %s\n",
296 pid, strerror(errno));
297 return 1;
298 }
299
300 return 0;
301 }
302
303 /*
304 * procfs_is_workload_pid - check if a procfs entry contains a comm_prefix* comm
305 *
306 * Check if the procfs entry is a directory of a process, and then check if the
307 * process has a comm with the prefix set in char *comm_prefix. As the
308 * current users of this function only check for kernel threads, there is no
309 * need to check for the threads for the process.
310 *
311 * Return: True if the proc_entry contains a comm file with comm_prefix*.
312 * Otherwise returns false.
313 */
procfs_is_workload_pid(const char * comm_prefix,struct dirent * proc_entry)314 static int procfs_is_workload_pid(const char *comm_prefix, struct dirent *proc_entry)
315 {
316 char buffer[MAX_PATH];
317 int comm_fd, retval;
318 char *t_name;
319
320 if (proc_entry->d_type != DT_DIR)
321 return 0;
322
323 if (*proc_entry->d_name == '.')
324 return 0;
325
326 /* check if the string is a pid */
327 for (t_name = proc_entry->d_name; *t_name; t_name++) {
328 if (!isdigit(*t_name))
329 break;
330 }
331
332 if (*t_name != '\0')
333 return 0;
334
335 snprintf(buffer, MAX_PATH, "/proc/%s/comm", proc_entry->d_name);
336 comm_fd = open(buffer, O_RDONLY);
337 if (comm_fd < 0)
338 return 0;
339
340 memset(buffer, 0, MAX_PATH);
341 retval = read(comm_fd, buffer, MAX_PATH);
342
343 close(comm_fd);
344
345 if (retval <= 0)
346 return 0;
347
348 buffer[MAX_PATH-1] = '\0';
349 if (!str_has_prefix(buffer, comm_prefix))
350 return 0;
351
352 /* comm already have \n */
353 debug_msg("Found workload pid:%s comm:%s", proc_entry->d_name, buffer);
354
355 return 1;
356 }
357
358 /*
359 * set_comm_sched_attr - set sched params to threads starting with char *comm_prefix
360 *
361 * This function uses procfs to list the currently running threads and then set the
362 * sched_attr *attr to the threads that start with char *comm_prefix. It is
363 * mainly used to set the priority to the kernel threads created by the
364 * tracers.
365 */
set_comm_sched_attr(const char * comm_prefix,struct sched_attr * attr)366 int set_comm_sched_attr(const char *comm_prefix, struct sched_attr *attr)
367 {
368 struct dirent *proc_entry;
369 DIR *procfs;
370 int retval;
371 int pid;
372
373 if (strlen(comm_prefix) >= MAX_PATH) {
374 err_msg("Command prefix is too long: %d < strlen(%s)\n",
375 MAX_PATH, comm_prefix);
376 return 1;
377 }
378
379 procfs = opendir("/proc");
380 if (!procfs) {
381 err_msg("Could not open procfs\n");
382 return 1;
383 }
384
385 while ((proc_entry = readdir(procfs))) {
386
387 retval = procfs_is_workload_pid(comm_prefix, proc_entry);
388 if (!retval)
389 continue;
390
391 if (strtoi(proc_entry->d_name, &pid)) {
392 err_msg("'%s' is not a valid pid", proc_entry->d_name);
393 retval = 1;
394 goto out;
395 }
396 /* procfs_is_workload_pid confirmed it is a pid */
397 retval = __set_sched_attr(pid, attr);
398 if (retval) {
399 err_msg("Error setting sched attributes for pid:%s\n", proc_entry->d_name);
400 goto out;
401 }
402
403 debug_msg("Set sched attributes for pid:%s\n", proc_entry->d_name);
404 }
405
406 retval = 0;
407 out:
408 closedir(procfs);
409 return retval;
410 }
411
412 #define INVALID_VAL (~0L)
get_long_ns_after_colon(char * start)413 static long get_long_ns_after_colon(char *start)
414 {
415 long val = INVALID_VAL;
416
417 /* find the ":" */
418 start = strstr(start, ":");
419 if (!start)
420 return -1;
421
422 /* skip ":" */
423 start++;
424 val = parse_ns_duration(start);
425
426 return val;
427 }
428
get_long_after_colon(char * start)429 static long get_long_after_colon(char *start)
430 {
431 long val = INVALID_VAL;
432
433 /* find the ":" */
434 start = strstr(start, ":");
435 if (!start)
436 return -1;
437
438 /* skip ":" */
439 start++;
440 val = get_llong_from_str(start);
441
442 return val;
443 }
444
445 /*
446 * parse priority in the format:
447 * SCHED_OTHER:
448 * o:<prio>
449 * O:<prio>
450 * SCHED_RR:
451 * r:<prio>
452 * R:<prio>
453 * SCHED_FIFO:
454 * f:<prio>
455 * F:<prio>
456 * SCHED_DEADLINE:
457 * d:runtime:period
458 * D:runtime:period
459 */
parse_prio(char * arg,struct sched_attr * sched_param)460 int parse_prio(char *arg, struct sched_attr *sched_param)
461 {
462 long prio;
463 long runtime;
464 long period;
465
466 memset(sched_param, 0, sizeof(*sched_param));
467 sched_param->size = sizeof(*sched_param);
468
469 switch (arg[0]) {
470 case 'd':
471 case 'D':
472 /* d:runtime:period */
473 if (strlen(arg) < 4)
474 return -1;
475
476 runtime = get_long_ns_after_colon(arg);
477 if (runtime == INVALID_VAL)
478 return -1;
479
480 period = get_long_ns_after_colon(&arg[2]);
481 if (period == INVALID_VAL)
482 return -1;
483
484 if (runtime > period)
485 return -1;
486
487 sched_param->sched_policy = SCHED_DEADLINE;
488 sched_param->sched_runtime = runtime;
489 sched_param->sched_deadline = period;
490 sched_param->sched_period = period;
491 break;
492 case 'f':
493 case 'F':
494 /* f:prio */
495 prio = get_long_after_colon(arg);
496 if (prio == INVALID_VAL)
497 return -1;
498
499 if (prio < sched_get_priority_min(SCHED_FIFO))
500 return -1;
501 if (prio > sched_get_priority_max(SCHED_FIFO))
502 return -1;
503
504 sched_param->sched_policy = SCHED_FIFO;
505 sched_param->sched_priority = prio;
506 break;
507 case 'r':
508 case 'R':
509 /* r:prio */
510 prio = get_long_after_colon(arg);
511 if (prio == INVALID_VAL)
512 return -1;
513
514 if (prio < sched_get_priority_min(SCHED_RR))
515 return -1;
516 if (prio > sched_get_priority_max(SCHED_RR))
517 return -1;
518
519 sched_param->sched_policy = SCHED_RR;
520 sched_param->sched_priority = prio;
521 break;
522 case 'o':
523 case 'O':
524 /* o:prio */
525 prio = get_long_after_colon(arg);
526 if (prio == INVALID_VAL)
527 return -1;
528
529 if (prio < MIN_NICE)
530 return -1;
531 if (prio > MAX_NICE)
532 return -1;
533
534 sched_param->sched_policy = SCHED_OTHER;
535 sched_param->sched_nice = prio;
536 break;
537 default:
538 return -1;
539 }
540 return 0;
541 }
542
543 /*
544 * set_cpu_dma_latency - set the /dev/cpu_dma_latecy
545 *
546 * This is used to reduce the exit from idle latency. The value
547 * will be reset once the file descriptor of /dev/cpu_dma_latecy
548 * is closed.
549 *
550 * Return: the /dev/cpu_dma_latecy file descriptor
551 */
set_cpu_dma_latency(int32_t latency)552 int set_cpu_dma_latency(int32_t latency)
553 {
554 int retval;
555 int fd;
556
557 fd = open("/dev/cpu_dma_latency", O_RDWR);
558 if (fd < 0) {
559 err_msg("Error opening /dev/cpu_dma_latency\n");
560 return -1;
561 }
562
563 retval = write(fd, &latency, 4);
564 if (retval < 1) {
565 err_msg("Error setting /dev/cpu_dma_latency\n");
566 close(fd);
567 return -1;
568 }
569
570 debug_msg("Set /dev/cpu_dma_latency to %d\n", latency);
571
572 return fd;
573 }
574
575 #ifdef HAVE_LIBCPUPOWER_SUPPORT
576 static unsigned int **saved_cpu_idle_disable_state;
577 static size_t saved_cpu_idle_disable_state_alloc_ctr;
578
579 /*
580 * save_cpu_idle_state_disable - save disable for all idle states of a cpu
581 *
582 * Saves the current disable of all idle states of a cpu, to be subsequently
583 * restored via restore_cpu_idle_disable_state.
584 *
585 * Return: idle state count on success, negative on error
586 */
save_cpu_idle_disable_state(unsigned int cpu)587 int save_cpu_idle_disable_state(unsigned int cpu)
588 {
589 unsigned int nr_states;
590 unsigned int state;
591 int disabled;
592
593 nr_states = cpuidle_state_count(cpu);
594
595 if (nr_states == 0)
596 return 0;
597
598 if (saved_cpu_idle_disable_state == NULL) {
599 saved_cpu_idle_disable_state = calloc(nr_cpus, sizeof(unsigned int *));
600 if (!saved_cpu_idle_disable_state)
601 return -1;
602 }
603
604 saved_cpu_idle_disable_state[cpu] = calloc(nr_states, sizeof(unsigned int));
605 if (!saved_cpu_idle_disable_state[cpu])
606 return -1;
607 saved_cpu_idle_disable_state_alloc_ctr++;
608
609 for (state = 0; state < nr_states; state++) {
610 disabled = cpuidle_is_state_disabled(cpu, state);
611 if (disabled < 0)
612 return disabled;
613 saved_cpu_idle_disable_state[cpu][state] = disabled;
614 }
615
616 return nr_states;
617 }
618
619 /*
620 * restore_cpu_idle_disable_state - restore disable for all idle states of a cpu
621 *
622 * Restores the current disable state of all idle states of a cpu that was
623 * previously saved by save_cpu_idle_disable_state.
624 *
625 * Return: idle state count on success, negative on error
626 */
restore_cpu_idle_disable_state(unsigned int cpu)627 int restore_cpu_idle_disable_state(unsigned int cpu)
628 {
629 unsigned int nr_states;
630 unsigned int state;
631 int disabled;
632 int result;
633
634 nr_states = cpuidle_state_count(cpu);
635
636 if (nr_states == 0)
637 return 0;
638
639 if (!saved_cpu_idle_disable_state)
640 return -1;
641
642 for (state = 0; state < nr_states; state++) {
643 if (!saved_cpu_idle_disable_state[cpu])
644 return -1;
645 disabled = saved_cpu_idle_disable_state[cpu][state];
646 result = cpuidle_state_disable(cpu, state, disabled);
647 if (result < 0)
648 return result;
649 }
650
651 free(saved_cpu_idle_disable_state[cpu]);
652 saved_cpu_idle_disable_state[cpu] = NULL;
653 saved_cpu_idle_disable_state_alloc_ctr--;
654 if (saved_cpu_idle_disable_state_alloc_ctr == 0) {
655 free(saved_cpu_idle_disable_state);
656 saved_cpu_idle_disable_state = NULL;
657 }
658
659 return nr_states;
660 }
661
662 /*
663 * free_cpu_idle_disable_states - free saved idle state disable for all cpus
664 *
665 * Frees the memory used for storing cpu idle state disable for all cpus
666 * and states.
667 *
668 * Normally, the memory is freed automatically in
669 * restore_cpu_idle_disable_state; this is mostly for cleaning up after an
670 * error.
671 */
free_cpu_idle_disable_states(void)672 void free_cpu_idle_disable_states(void)
673 {
674 int cpu;
675
676 if (!saved_cpu_idle_disable_state)
677 return;
678
679 for (cpu = 0; cpu < nr_cpus; cpu++) {
680 free(saved_cpu_idle_disable_state[cpu]);
681 saved_cpu_idle_disable_state[cpu] = NULL;
682 }
683
684 free(saved_cpu_idle_disable_state);
685 saved_cpu_idle_disable_state = NULL;
686 }
687
688 /*
689 * set_deepest_cpu_idle_state - limit idle state of cpu
690 *
691 * Disables all idle states deeper than the one given in
692 * deepest_state (assuming states with higher number are deeper).
693 *
694 * This is used to reduce the exit from idle latency. Unlike
695 * set_cpu_dma_latency, it can disable idle states per cpu.
696 *
697 * Return: idle state count on success, negative on error
698 */
set_deepest_cpu_idle_state(unsigned int cpu,unsigned int deepest_state)699 int set_deepest_cpu_idle_state(unsigned int cpu, unsigned int deepest_state)
700 {
701 unsigned int nr_states;
702 unsigned int state;
703 int result;
704
705 nr_states = cpuidle_state_count(cpu);
706
707 for (state = deepest_state + 1; state < nr_states; state++) {
708 result = cpuidle_state_disable(cpu, state, 1);
709 if (result < 0)
710 return result;
711 }
712
713 return nr_states;
714 }
715 #endif /* HAVE_LIBCPUPOWER_SUPPORT */
716
717 #define _STR(x) #x
718 #define STR(x) _STR(x)
719
720 /*
721 * find_mount - find a the mount point of a given fs
722 *
723 * Returns 0 if mount is not found, otherwise return 1 and fill mp
724 * with the mount point.
725 */
find_mount(const char * fs,char * mp,int sizeof_mp)726 static const int find_mount(const char *fs, char *mp, int sizeof_mp)
727 {
728 char mount_point[MAX_PATH+1];
729 char type[100];
730 int found = 0;
731 FILE *fp;
732
733 fp = fopen("/proc/mounts", "r");
734 if (!fp)
735 return 0;
736
737 while (fscanf(fp, "%*s %" STR(MAX_PATH) "s %99s %*s %*d %*d\n", mount_point, type) == 2) {
738 if (strcmp(type, fs) == 0) {
739 found = 1;
740 break;
741 }
742 }
743 fclose(fp);
744
745 if (!found)
746 return 0;
747
748 memset(mp, 0, sizeof_mp);
749 strncpy(mp, mount_point, sizeof_mp - 1);
750
751 debug_msg("Fs %s found at %s\n", fs, mp);
752 return 1;
753 }
754
755 /*
756 * get_self_cgroup - get the current thread cgroup path
757 *
758 * Parse /proc/$$/cgroup file to get the thread's cgroup. As an example of line to parse:
759 *
760 * 0::/user.slice/user-0.slice/session-3.scope'\n'
761 *
762 * This function is interested in the content after the second : and before the '\n'.
763 *
764 * Returns 1 if a string was found, 0 otherwise.
765 */
get_self_cgroup(char * self_cg,int sizeof_self_cg)766 static int get_self_cgroup(char *self_cg, int sizeof_self_cg)
767 {
768 char path[MAX_PATH], *start;
769 int fd, retval;
770
771 snprintf(path, MAX_PATH, "/proc/%d/cgroup", getpid());
772
773 fd = open(path, O_RDONLY);
774 if (fd < 0)
775 return 0;
776
777 memset(path, 0, sizeof(path));
778 retval = read(fd, path, MAX_PATH);
779
780 close(fd);
781
782 if (retval <= 0)
783 return 0;
784
785 path[MAX_PATH-1] = '\0';
786 start = path;
787
788 start = strstr(start, ":");
789 if (!start)
790 return 0;
791
792 /* skip ":" */
793 start++;
794
795 start = strstr(start, ":");
796 if (!start)
797 return 0;
798
799 /* skip ":" */
800 start++;
801
802 if (strlen(start) >= sizeof_self_cg)
803 return 0;
804
805 snprintf(self_cg, sizeof_self_cg, "%s", start);
806
807 /* Swap '\n' with '\0' */
808 start = strstr(self_cg, "\n");
809
810 /* there must be '\n' */
811 if (!start)
812 return 0;
813
814 /* ok, it found a string after the second : and before the \n */
815 *start = '\0';
816
817 return 1;
818 }
819
820 /*
821 * open_cgroup_procs - Open the cgroup.procs file for the given cgroup
822 *
823 * If cgroup argument is not NULL, the cgroup.procs file for that cgroup
824 * will be opened. Otherwise, the cgroup of the calling, i.e., rtla, thread
825 * will be used.
826 *
827 * Supports cgroup v2.
828 *
829 * Returns the file descriptor on success, -1 otherwise.
830 */
open_cgroup_procs(const char * cgroup)831 static int open_cgroup_procs(const char *cgroup)
832 {
833 char cgroup_path[MAX_PATH - strlen("/cgroup.procs")];
834 char cgroup_procs[MAX_PATH];
835 int retval;
836 int cg_fd;
837 size_t cg_path_len;
838
839 retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path));
840 if (!retval) {
841 err_msg("Did not find cgroupv2 mount point\n");
842 return -1;
843 }
844
845 cg_path_len = strlen(cgroup_path);
846
847 if (!cgroup) {
848 retval = get_self_cgroup(&cgroup_path[cg_path_len],
849 sizeof(cgroup_path) - cg_path_len);
850 if (!retval) {
851 err_msg("Did not find self cgroup\n");
852 return -1;
853 }
854 } else {
855 snprintf(&cgroup_path[cg_path_len],
856 sizeof(cgroup_path) - cg_path_len, "%s/", cgroup);
857 }
858
859 snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path);
860
861 debug_msg("Using cgroup path at: %s\n", cgroup_procs);
862
863 cg_fd = open(cgroup_procs, O_RDWR);
864 if (cg_fd < 0)
865 return -1;
866
867 return cg_fd;
868 }
869
870 /*
871 * set_pid_cgroup - Set cgroup to pid_t pid
872 *
873 * If cgroup argument is not NULL, the threads will move to the given cgroup.
874 * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used.
875 *
876 * Supports cgroup v2.
877 *
878 * Returns 1 on success, 0 otherwise.
879 */
set_pid_cgroup(pid_t pid,const char * cgroup)880 int set_pid_cgroup(pid_t pid, const char *cgroup)
881 {
882 char pid_str[24];
883 int retval;
884 int cg_fd;
885
886 cg_fd = open_cgroup_procs(cgroup);
887 if (cg_fd < 0)
888 return 0;
889
890 snprintf(pid_str, sizeof(pid_str), "%d\n", pid);
891
892 retval = write(cg_fd, pid_str, strlen(pid_str));
893 if (retval < 0)
894 err_msg("Error setting cgroup attributes for pid:%s - %s\n",
895 pid_str, strerror(errno));
896 else
897 debug_msg("Set cgroup attributes for pid:%s\n", pid_str);
898
899 close(cg_fd);
900
901 return (retval >= 0);
902 }
903
904 /**
905 * set_comm_cgroup - Set cgroup to threads starting with char *comm_prefix
906 *
907 * If cgroup argument is not NULL, the threads will move to the given cgroup.
908 * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used.
909 *
910 * Supports cgroup v2.
911 *
912 * Returns 1 on success, 0 otherwise.
913 */
set_comm_cgroup(const char * comm_prefix,const char * cgroup)914 int set_comm_cgroup(const char *comm_prefix, const char *cgroup)
915 {
916 struct dirent *proc_entry;
917 DIR *procfs;
918 int retval;
919 int cg_fd;
920
921 if (strlen(comm_prefix) >= MAX_PATH) {
922 err_msg("Command prefix is too long: %d < strlen(%s)\n",
923 MAX_PATH, comm_prefix);
924 return 0;
925 }
926
927 cg_fd = open_cgroup_procs(cgroup);
928 if (cg_fd < 0)
929 return 0;
930
931 procfs = opendir("/proc");
932 if (!procfs) {
933 err_msg("Could not open procfs\n");
934 goto out_cg;
935 }
936
937 while ((proc_entry = readdir(procfs))) {
938
939 retval = procfs_is_workload_pid(comm_prefix, proc_entry);
940 if (!retval)
941 continue;
942
943 retval = write(cg_fd, proc_entry->d_name, strlen(proc_entry->d_name));
944 if (retval < 0) {
945 err_msg("Error setting cgroup attributes for pid:%s - %s\n",
946 proc_entry->d_name, strerror(errno));
947 goto out_procfs;
948 }
949
950 debug_msg("Set cgroup attributes for pid:%s\n", proc_entry->d_name);
951 }
952
953 closedir(procfs);
954 close(cg_fd);
955 return 1;
956
957 out_procfs:
958 closedir(procfs);
959 out_cg:
960 close(cg_fd);
961 return 0;
962 }
963
964 /**
965 * auto_house_keeping - Automatically move rtla out of measurement threads
966 *
967 * Try to move rtla away from the tracer, if possible.
968 *
969 * Returns 1 on success, 0 otherwise.
970 */
auto_house_keeping(cpu_set_t * monitored_cpus)971 int auto_house_keeping(cpu_set_t *monitored_cpus)
972 {
973 cpu_set_t rtla_cpus, house_keeping_cpus;
974 int retval;
975
976 /* first get the CPUs in which rtla can actually run. */
977 retval = sched_getaffinity(getpid(), sizeof(rtla_cpus), &rtla_cpus);
978 if (retval == -1) {
979 debug_msg("Could not get rtla affinity, rtla might run with the threads!\n");
980 return 0;
981 }
982
983 /* then check if the existing setup is already good. */
984 CPU_AND(&house_keeping_cpus, &rtla_cpus, monitored_cpus);
985 if (!CPU_COUNT(&house_keeping_cpus)) {
986 debug_msg("rtla and the monitored CPUs do not share CPUs.");
987 debug_msg("Skipping auto house-keeping\n");
988 return 1;
989 }
990
991 /* remove the intersection */
992 CPU_XOR(&house_keeping_cpus, &rtla_cpus, monitored_cpus);
993
994 /* get only those that rtla can run */
995 CPU_AND(&house_keeping_cpus, &house_keeping_cpus, &rtla_cpus);
996
997 /* is there any cpu left? */
998 if (!CPU_COUNT(&house_keeping_cpus)) {
999 debug_msg("Could not find any CPU for auto house-keeping\n");
1000 return 0;
1001 }
1002
1003 retval = sched_setaffinity(getpid(), sizeof(house_keeping_cpus), &house_keeping_cpus);
1004 if (retval == -1) {
1005 debug_msg("Could not set affinity for auto house-keeping\n");
1006 return 0;
1007 }
1008
1009 debug_msg("rtla automatically moved to an auto house-keeping cpu set\n");
1010
1011 return 1;
1012 }
1013
1014 /**
1015 * parse_optional_arg - Parse optional argument value
1016 *
1017 * Parse optional argument value, which can be in the form of:
1018 * -sarg, -s/--long=arg, -s/--long arg
1019 *
1020 * Returns arg value if found, NULL otherwise.
1021 */
parse_optional_arg(int argc,char ** argv)1022 char *parse_optional_arg(int argc, char **argv)
1023 {
1024 if (optarg) {
1025 if (optarg[0] == '=') {
1026 /* skip the = */
1027 return &optarg[1];
1028 } else {
1029 return optarg;
1030 }
1031 /* parse argument of form -s [arg] and --long [arg]*/
1032 } else if (optind < argc && argv[optind][0] != '-') {
1033 /* consume optind */
1034 return argv[optind++];
1035 } else {
1036 return NULL;
1037 }
1038 }
1039
1040 /*
1041 * strtoi - convert string to integer with error checking
1042 *
1043 * Returns 0 on success, -1 if conversion fails or result is out of int range.
1044 */
strtoi(const char * s,int * res)1045 int strtoi(const char *s, int *res)
1046 {
1047 char *end_ptr;
1048 long lres;
1049
1050 if (!*s)
1051 return -1;
1052
1053 errno = 0;
1054 lres = strtol(s, &end_ptr, 0);
1055 if (errno || *end_ptr || lres > INT_MAX || lres < INT_MIN)
1056 return -1;
1057
1058 *res = (int) lres;
1059 return 0;
1060 }
1061
fatal_alloc(void)1062 static inline void fatal_alloc(void)
1063 {
1064 fatal("Error allocating memory\n");
1065 }
1066
calloc_fatal(size_t n,size_t size)1067 void *calloc_fatal(size_t n, size_t size)
1068 {
1069 void *p = calloc(n, size);
1070
1071 if (!p)
1072 fatal_alloc();
1073
1074 return p;
1075 }
1076
reallocarray_fatal(void * p,size_t n,size_t size)1077 void *reallocarray_fatal(void *p, size_t n, size_t size)
1078 {
1079 p = reallocarray(p, n, size);
1080
1081 if (!p)
1082 fatal_alloc();
1083
1084 return p;
1085 }
1086
strdup_fatal(const char * s)1087 char *strdup_fatal(const char *s)
1088 {
1089 char *p = strdup(s);
1090
1091 if (!p)
1092 fatal_alloc();
1093
1094 return p;
1095 }
1096