1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
4 */
5
6 #define _GNU_SOURCE
7 #ifdef HAVE_LIBCPUPOWER_SUPPORT
8 #include <cpuidle.h>
9 #endif /* HAVE_LIBCPUPOWER_SUPPORT */
10 #include <dirent.h>
11 #include <stdarg.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <unistd.h>
15 #include <ctype.h>
16 #include <errno.h>
17 #include <fcntl.h>
18 #include <sched.h>
19 #include <stdio.h>
20 #include <limits.h>
21
22 #include "utils.h"
23
24 #define MAX_MSG_LENGTH 1024
25 int config_debug;
26
27 /*
28 * err_msg - print an error message to the stderr
29 */
err_msg(const char * fmt,...)30 void err_msg(const char *fmt, ...)
31 {
32 char message[MAX_MSG_LENGTH];
33 va_list ap;
34
35 va_start(ap, fmt);
36 vsnprintf(message, sizeof(message), fmt, ap);
37 va_end(ap);
38
39 fprintf(stderr, "%s", message);
40 }
41
42 /*
43 * debug_msg - print a debug message to stderr if debug is set
44 */
debug_msg(const char * fmt,...)45 void debug_msg(const char *fmt, ...)
46 {
47 char message[MAX_MSG_LENGTH];
48 va_list ap;
49
50 if (!config_debug)
51 return;
52
53 va_start(ap, fmt);
54 vsnprintf(message, sizeof(message), fmt, ap);
55 va_end(ap);
56
57 fprintf(stderr, "%s", message);
58 }
59
60 /*
61 * fatal - print an error message and EOL to stderr and exit with ERROR
62 */
fatal(const char * fmt,...)63 void fatal(const char *fmt, ...)
64 {
65 va_list ap;
66
67 va_start(ap, fmt);
68 vfprintf(stderr, fmt, ap);
69 va_end(ap);
70 fprintf(stderr, "\n");
71
72 exit(ERROR);
73 }
74
75 /*
76 * get_llong_from_str - get a long long int from a string
77 */
get_llong_from_str(char * start)78 long long get_llong_from_str(char *start)
79 {
80 long long value;
81 char *end;
82
83 errno = 0;
84 value = strtoll(start, &end, 10);
85 if (errno || start == end)
86 return -1;
87
88 return value;
89 }
90
91 /*
92 * get_duration - fill output with a human readable duration since start_time
93 */
get_duration(time_t start_time,char * output,int output_size)94 void get_duration(time_t start_time, char *output, int output_size)
95 {
96 time_t now = time(NULL);
97 struct tm *tm_info;
98 time_t duration;
99
100 duration = difftime(now, start_time);
101 tm_info = gmtime(&duration);
102
103 snprintf(output, output_size, "%3d %02d:%02d:%02d",
104 tm_info->tm_yday,
105 tm_info->tm_hour,
106 tm_info->tm_min,
107 tm_info->tm_sec);
108 }
109
110 /*
111 * parse_cpu_set - parse a cpu_list filling cpu_set_t argument
112 *
113 * Receives a cpu list, like 1-3,5 (cpus 1, 2, 3, 5), and then set
114 * filling cpu_set_t argument.
115 *
116 * Returns 0 on success, 1 otherwise.
117 */
parse_cpu_set(char * cpu_list,cpu_set_t * set)118 int parse_cpu_set(char *cpu_list, cpu_set_t *set)
119 {
120 const char *p;
121 int end_cpu;
122 int nr_cpus;
123 int cpu;
124 int i;
125
126 CPU_ZERO(set);
127
128 nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
129
130 for (p = cpu_list; *p; ) {
131 cpu = atoi(p);
132 if (cpu < 0 || (!cpu && *p != '0') || cpu >= nr_cpus)
133 goto err;
134
135 while (isdigit(*p))
136 p++;
137 if (*p == '-') {
138 p++;
139 end_cpu = atoi(p);
140 if (end_cpu < cpu || (!end_cpu && *p != '0') || end_cpu >= nr_cpus)
141 goto err;
142 while (isdigit(*p))
143 p++;
144 } else
145 end_cpu = cpu;
146
147 if (cpu == end_cpu) {
148 debug_msg("cpu_set: adding cpu %d\n", cpu);
149 CPU_SET(cpu, set);
150 } else {
151 for (i = cpu; i <= end_cpu; i++) {
152 debug_msg("cpu_set: adding cpu %d\n", i);
153 CPU_SET(i, set);
154 }
155 }
156
157 if (*p == ',')
158 p++;
159 }
160
161 return 0;
162 err:
163 debug_msg("Error parsing the cpu set %s\n", cpu_list);
164 return 1;
165 }
166
167 /*
168 * parse_duration - parse duration with s/m/h/d suffix converting it to seconds
169 */
parse_seconds_duration(char * val)170 long parse_seconds_duration(char *val)
171 {
172 char *end;
173 long t;
174
175 t = strtol(val, &end, 10);
176
177 if (end) {
178 switch (*end) {
179 case 's':
180 case 'S':
181 break;
182 case 'm':
183 case 'M':
184 t *= 60;
185 break;
186 case 'h':
187 case 'H':
188 t *= 60 * 60;
189 break;
190
191 case 'd':
192 case 'D':
193 t *= 24 * 60 * 60;
194 break;
195 }
196 }
197
198 return t;
199 }
200
201 /*
202 * parse_ns_duration - parse duration with ns/us/ms/s converting it to nanoseconds
203 */
parse_ns_duration(char * val)204 long parse_ns_duration(char *val)
205 {
206 char *end;
207 long t;
208
209 t = strtol(val, &end, 10);
210
211 if (end) {
212 if (!strncmp(end, "ns", 2)) {
213 return t;
214 } else if (!strncmp(end, "us", 2)) {
215 t *= 1000;
216 return t;
217 } else if (!strncmp(end, "ms", 2)) {
218 t *= 1000 * 1000;
219 return t;
220 } else if (!strncmp(end, "s", 1)) {
221 t *= 1000 * 1000 * 1000;
222 return t;
223 }
224 return -1;
225 }
226
227 return t;
228 }
229
230 /*
231 * This is a set of helper functions to use SCHED_DEADLINE.
232 */
233 #ifndef __NR_sched_setattr
234 # ifdef __x86_64__
235 # define __NR_sched_setattr 314
236 # elif __i386__
237 # define __NR_sched_setattr 351
238 # elif __arm__
239 # define __NR_sched_setattr 380
240 # elif __aarch64__ || __riscv
241 # define __NR_sched_setattr 274
242 # elif __powerpc__
243 # define __NR_sched_setattr 355
244 # elif __s390x__
245 # define __NR_sched_setattr 345
246 # elif __loongarch__
247 # define __NR_sched_setattr 274
248 # endif
249 #endif
250
251 #define SCHED_DEADLINE 6
252
syscall_sched_setattr(pid_t pid,const struct sched_attr * attr,unsigned int flags)253 static inline int syscall_sched_setattr(pid_t pid, const struct sched_attr *attr,
254 unsigned int flags) {
255 return syscall(__NR_sched_setattr, pid, attr, flags);
256 }
257
__set_sched_attr(int pid,struct sched_attr * attr)258 int __set_sched_attr(int pid, struct sched_attr *attr)
259 {
260 int flags = 0;
261 int retval;
262
263 retval = syscall_sched_setattr(pid, attr, flags);
264 if (retval < 0) {
265 err_msg("Failed to set sched attributes to the pid %d: %s\n",
266 pid, strerror(errno));
267 return 1;
268 }
269
270 return 0;
271 }
272
273 /*
274 * procfs_is_workload_pid - check if a procfs entry contains a comm_prefix* comm
275 *
276 * Check if the procfs entry is a directory of a process, and then check if the
277 * process has a comm with the prefix set in char *comm_prefix. As the
278 * current users of this function only check for kernel threads, there is no
279 * need to check for the threads for the process.
280 *
281 * Return: True if the proc_entry contains a comm file with comm_prefix*.
282 * Otherwise returns false.
283 */
procfs_is_workload_pid(const char * comm_prefix,struct dirent * proc_entry)284 static int procfs_is_workload_pid(const char *comm_prefix, struct dirent *proc_entry)
285 {
286 char buffer[MAX_PATH];
287 int comm_fd, retval;
288 char *t_name;
289
290 if (proc_entry->d_type != DT_DIR)
291 return 0;
292
293 if (*proc_entry->d_name == '.')
294 return 0;
295
296 /* check if the string is a pid */
297 for (t_name = proc_entry->d_name; t_name; t_name++) {
298 if (!isdigit(*t_name))
299 break;
300 }
301
302 if (*t_name != '\0')
303 return 0;
304
305 snprintf(buffer, MAX_PATH, "/proc/%s/comm", proc_entry->d_name);
306 comm_fd = open(buffer, O_RDONLY);
307 if (comm_fd < 0)
308 return 0;
309
310 memset(buffer, 0, MAX_PATH);
311 retval = read(comm_fd, buffer, MAX_PATH);
312
313 close(comm_fd);
314
315 if (retval <= 0)
316 return 0;
317
318 buffer[MAX_PATH-1] = '\0';
319 retval = strncmp(comm_prefix, buffer, strlen(comm_prefix));
320 if (retval)
321 return 0;
322
323 /* comm already have \n */
324 debug_msg("Found workload pid:%s comm:%s", proc_entry->d_name, buffer);
325
326 return 1;
327 }
328
329 /*
330 * set_comm_sched_attr - set sched params to threads starting with char *comm_prefix
331 *
332 * This function uses procfs to list the currently running threads and then set the
333 * sched_attr *attr to the threads that start with char *comm_prefix. It is
334 * mainly used to set the priority to the kernel threads created by the
335 * tracers.
336 */
set_comm_sched_attr(const char * comm_prefix,struct sched_attr * attr)337 int set_comm_sched_attr(const char *comm_prefix, struct sched_attr *attr)
338 {
339 struct dirent *proc_entry;
340 DIR *procfs;
341 int retval;
342 int pid;
343
344 if (strlen(comm_prefix) >= MAX_PATH) {
345 err_msg("Command prefix is too long: %d < strlen(%s)\n",
346 MAX_PATH, comm_prefix);
347 return 1;
348 }
349
350 procfs = opendir("/proc");
351 if (!procfs) {
352 err_msg("Could not open procfs\n");
353 return 1;
354 }
355
356 while ((proc_entry = readdir(procfs))) {
357
358 retval = procfs_is_workload_pid(comm_prefix, proc_entry);
359 if (!retval)
360 continue;
361
362 if (strtoi(proc_entry->d_name, &pid)) {
363 err_msg("'%s' is not a valid pid", proc_entry->d_name);
364 goto out_err;
365 }
366 /* procfs_is_workload_pid confirmed it is a pid */
367 retval = __set_sched_attr(pid, attr);
368 if (retval) {
369 err_msg("Error setting sched attributes for pid:%s\n", proc_entry->d_name);
370 goto out_err;
371 }
372
373 debug_msg("Set sched attributes for pid:%s\n", proc_entry->d_name);
374 }
375 return 0;
376
377 out_err:
378 closedir(procfs);
379 return 1;
380 }
381
382 #define INVALID_VAL (~0L)
get_long_ns_after_colon(char * start)383 static long get_long_ns_after_colon(char *start)
384 {
385 long val = INVALID_VAL;
386
387 /* find the ":" */
388 start = strstr(start, ":");
389 if (!start)
390 return -1;
391
392 /* skip ":" */
393 start++;
394 val = parse_ns_duration(start);
395
396 return val;
397 }
398
get_long_after_colon(char * start)399 static long get_long_after_colon(char *start)
400 {
401 long val = INVALID_VAL;
402
403 /* find the ":" */
404 start = strstr(start, ":");
405 if (!start)
406 return -1;
407
408 /* skip ":" */
409 start++;
410 val = get_llong_from_str(start);
411
412 return val;
413 }
414
415 /*
416 * parse priority in the format:
417 * SCHED_OTHER:
418 * o:<prio>
419 * O:<prio>
420 * SCHED_RR:
421 * r:<prio>
422 * R:<prio>
423 * SCHED_FIFO:
424 * f:<prio>
425 * F:<prio>
426 * SCHED_DEADLINE:
427 * d:runtime:period
428 * D:runtime:period
429 */
parse_prio(char * arg,struct sched_attr * sched_param)430 int parse_prio(char *arg, struct sched_attr *sched_param)
431 {
432 long prio;
433 long runtime;
434 long period;
435
436 memset(sched_param, 0, sizeof(*sched_param));
437 sched_param->size = sizeof(*sched_param);
438
439 switch (arg[0]) {
440 case 'd':
441 case 'D':
442 /* d:runtime:period */
443 if (strlen(arg) < 4)
444 return -1;
445
446 runtime = get_long_ns_after_colon(arg);
447 if (runtime == INVALID_VAL)
448 return -1;
449
450 period = get_long_ns_after_colon(&arg[2]);
451 if (period == INVALID_VAL)
452 return -1;
453
454 if (runtime > period)
455 return -1;
456
457 sched_param->sched_policy = SCHED_DEADLINE;
458 sched_param->sched_runtime = runtime;
459 sched_param->sched_deadline = period;
460 sched_param->sched_period = period;
461 break;
462 case 'f':
463 case 'F':
464 /* f:prio */
465 prio = get_long_after_colon(arg);
466 if (prio == INVALID_VAL)
467 return -1;
468
469 if (prio < sched_get_priority_min(SCHED_FIFO))
470 return -1;
471 if (prio > sched_get_priority_max(SCHED_FIFO))
472 return -1;
473
474 sched_param->sched_policy = SCHED_FIFO;
475 sched_param->sched_priority = prio;
476 break;
477 case 'r':
478 case 'R':
479 /* r:prio */
480 prio = get_long_after_colon(arg);
481 if (prio == INVALID_VAL)
482 return -1;
483
484 if (prio < sched_get_priority_min(SCHED_RR))
485 return -1;
486 if (prio > sched_get_priority_max(SCHED_RR))
487 return -1;
488
489 sched_param->sched_policy = SCHED_RR;
490 sched_param->sched_priority = prio;
491 break;
492 case 'o':
493 case 'O':
494 /* o:prio */
495 prio = get_long_after_colon(arg);
496 if (prio == INVALID_VAL)
497 return -1;
498
499 if (prio < MIN_NICE)
500 return -1;
501 if (prio > MAX_NICE)
502 return -1;
503
504 sched_param->sched_policy = SCHED_OTHER;
505 sched_param->sched_nice = prio;
506 break;
507 default:
508 return -1;
509 }
510 return 0;
511 }
512
513 /*
514 * set_cpu_dma_latency - set the /dev/cpu_dma_latecy
515 *
516 * This is used to reduce the exit from idle latency. The value
517 * will be reset once the file descriptor of /dev/cpu_dma_latecy
518 * is closed.
519 *
520 * Return: the /dev/cpu_dma_latecy file descriptor
521 */
set_cpu_dma_latency(int32_t latency)522 int set_cpu_dma_latency(int32_t latency)
523 {
524 int retval;
525 int fd;
526
527 fd = open("/dev/cpu_dma_latency", O_RDWR);
528 if (fd < 0) {
529 err_msg("Error opening /dev/cpu_dma_latency\n");
530 return -1;
531 }
532
533 retval = write(fd, &latency, 4);
534 if (retval < 1) {
535 err_msg("Error setting /dev/cpu_dma_latency\n");
536 close(fd);
537 return -1;
538 }
539
540 debug_msg("Set /dev/cpu_dma_latency to %d\n", latency);
541
542 return fd;
543 }
544
545 #ifdef HAVE_LIBCPUPOWER_SUPPORT
546 static unsigned int **saved_cpu_idle_disable_state;
547 static size_t saved_cpu_idle_disable_state_alloc_ctr;
548
549 /*
550 * save_cpu_idle_state_disable - save disable for all idle states of a cpu
551 *
552 * Saves the current disable of all idle states of a cpu, to be subsequently
553 * restored via restore_cpu_idle_disable_state.
554 *
555 * Return: idle state count on success, negative on error
556 */
save_cpu_idle_disable_state(unsigned int cpu)557 int save_cpu_idle_disable_state(unsigned int cpu)
558 {
559 unsigned int nr_states;
560 unsigned int state;
561 int disabled;
562 int nr_cpus;
563
564 nr_states = cpuidle_state_count(cpu);
565
566 if (nr_states == 0)
567 return 0;
568
569 if (saved_cpu_idle_disable_state == NULL) {
570 nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
571 saved_cpu_idle_disable_state = calloc(nr_cpus, sizeof(unsigned int *));
572 if (!saved_cpu_idle_disable_state)
573 return -1;
574 }
575
576 saved_cpu_idle_disable_state[cpu] = calloc(nr_states, sizeof(unsigned int));
577 if (!saved_cpu_idle_disable_state[cpu])
578 return -1;
579 saved_cpu_idle_disable_state_alloc_ctr++;
580
581 for (state = 0; state < nr_states; state++) {
582 disabled = cpuidle_is_state_disabled(cpu, state);
583 if (disabled < 0)
584 return disabled;
585 saved_cpu_idle_disable_state[cpu][state] = disabled;
586 }
587
588 return nr_states;
589 }
590
591 /*
592 * restore_cpu_idle_disable_state - restore disable for all idle states of a cpu
593 *
594 * Restores the current disable state of all idle states of a cpu that was
595 * previously saved by save_cpu_idle_disable_state.
596 *
597 * Return: idle state count on success, negative on error
598 */
restore_cpu_idle_disable_state(unsigned int cpu)599 int restore_cpu_idle_disable_state(unsigned int cpu)
600 {
601 unsigned int nr_states;
602 unsigned int state;
603 int disabled;
604 int result;
605
606 nr_states = cpuidle_state_count(cpu);
607
608 if (nr_states == 0)
609 return 0;
610
611 if (!saved_cpu_idle_disable_state)
612 return -1;
613
614 for (state = 0; state < nr_states; state++) {
615 if (!saved_cpu_idle_disable_state[cpu])
616 return -1;
617 disabled = saved_cpu_idle_disable_state[cpu][state];
618 result = cpuidle_state_disable(cpu, state, disabled);
619 if (result < 0)
620 return result;
621 }
622
623 free(saved_cpu_idle_disable_state[cpu]);
624 saved_cpu_idle_disable_state[cpu] = NULL;
625 saved_cpu_idle_disable_state_alloc_ctr--;
626 if (saved_cpu_idle_disable_state_alloc_ctr == 0) {
627 free(saved_cpu_idle_disable_state);
628 saved_cpu_idle_disable_state = NULL;
629 }
630
631 return nr_states;
632 }
633
634 /*
635 * free_cpu_idle_disable_states - free saved idle state disable for all cpus
636 *
637 * Frees the memory used for storing cpu idle state disable for all cpus
638 * and states.
639 *
640 * Normally, the memory is freed automatically in
641 * restore_cpu_idle_disable_state; this is mostly for cleaning up after an
642 * error.
643 */
free_cpu_idle_disable_states(void)644 void free_cpu_idle_disable_states(void)
645 {
646 int cpu;
647 int nr_cpus;
648
649 if (!saved_cpu_idle_disable_state)
650 return;
651
652 nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
653
654 for (cpu = 0; cpu < nr_cpus; cpu++) {
655 free(saved_cpu_idle_disable_state[cpu]);
656 saved_cpu_idle_disable_state[cpu] = NULL;
657 }
658
659 free(saved_cpu_idle_disable_state);
660 saved_cpu_idle_disable_state = NULL;
661 }
662
663 /*
664 * set_deepest_cpu_idle_state - limit idle state of cpu
665 *
666 * Disables all idle states deeper than the one given in
667 * deepest_state (assuming states with higher number are deeper).
668 *
669 * This is used to reduce the exit from idle latency. Unlike
670 * set_cpu_dma_latency, it can disable idle states per cpu.
671 *
672 * Return: idle state count on success, negative on error
673 */
set_deepest_cpu_idle_state(unsigned int cpu,unsigned int deepest_state)674 int set_deepest_cpu_idle_state(unsigned int cpu, unsigned int deepest_state)
675 {
676 unsigned int nr_states;
677 unsigned int state;
678 int result;
679
680 nr_states = cpuidle_state_count(cpu);
681
682 for (state = deepest_state + 1; state < nr_states; state++) {
683 result = cpuidle_state_disable(cpu, state, 1);
684 if (result < 0)
685 return result;
686 }
687
688 return nr_states;
689 }
690 #endif /* HAVE_LIBCPUPOWER_SUPPORT */
691
692 #define _STR(x) #x
693 #define STR(x) _STR(x)
694
695 /*
696 * find_mount - find a the mount point of a given fs
697 *
698 * Returns 0 if mount is not found, otherwise return 1 and fill mp
699 * with the mount point.
700 */
find_mount(const char * fs,char * mp,int sizeof_mp)701 static const int find_mount(const char *fs, char *mp, int sizeof_mp)
702 {
703 char mount_point[MAX_PATH+1];
704 char type[100];
705 int found = 0;
706 FILE *fp;
707
708 fp = fopen("/proc/mounts", "r");
709 if (!fp)
710 return 0;
711
712 while (fscanf(fp, "%*s %" STR(MAX_PATH) "s %99s %*s %*d %*d\n", mount_point, type) == 2) {
713 if (strcmp(type, fs) == 0) {
714 found = 1;
715 break;
716 }
717 }
718 fclose(fp);
719
720 if (!found)
721 return 0;
722
723 memset(mp, 0, sizeof_mp);
724 strncpy(mp, mount_point, sizeof_mp - 1);
725
726 debug_msg("Fs %s found at %s\n", fs, mp);
727 return 1;
728 }
729
730 /*
731 * get_self_cgroup - get the current thread cgroup path
732 *
733 * Parse /proc/$$/cgroup file to get the thread's cgroup. As an example of line to parse:
734 *
735 * 0::/user.slice/user-0.slice/session-3.scope'\n'
736 *
737 * This function is interested in the content after the second : and before the '\n'.
738 *
739 * Returns 1 if a string was found, 0 otherwise.
740 */
get_self_cgroup(char * self_cg,int sizeof_self_cg)741 static int get_self_cgroup(char *self_cg, int sizeof_self_cg)
742 {
743 char path[MAX_PATH], *start;
744 int fd, retval;
745
746 snprintf(path, MAX_PATH, "/proc/%d/cgroup", getpid());
747
748 fd = open(path, O_RDONLY);
749 if (fd < 0)
750 return 0;
751
752 memset(path, 0, sizeof(path));
753 retval = read(fd, path, MAX_PATH);
754
755 close(fd);
756
757 if (retval <= 0)
758 return 0;
759
760 path[MAX_PATH-1] = '\0';
761 start = path;
762
763 start = strstr(start, ":");
764 if (!start)
765 return 0;
766
767 /* skip ":" */
768 start++;
769
770 start = strstr(start, ":");
771 if (!start)
772 return 0;
773
774 /* skip ":" */
775 start++;
776
777 if (strlen(start) >= sizeof_self_cg)
778 return 0;
779
780 snprintf(self_cg, sizeof_self_cg, "%s", start);
781
782 /* Swap '\n' with '\0' */
783 start = strstr(self_cg, "\n");
784
785 /* there must be '\n' */
786 if (!start)
787 return 0;
788
789 /* ok, it found a string after the second : and before the \n */
790 *start = '\0';
791
792 return 1;
793 }
794
795 /*
796 * open_cgroup_procs - Open the cgroup.procs file for the given cgroup
797 *
798 * If cgroup argument is not NULL, the cgroup.procs file for that cgroup
799 * will be opened. Otherwise, the cgroup of the calling, i.e., rtla, thread
800 * will be used.
801 *
802 * Supports cgroup v2.
803 *
804 * Returns the file descriptor on success, -1 otherwise.
805 */
open_cgroup_procs(const char * cgroup)806 static int open_cgroup_procs(const char *cgroup)
807 {
808 char cgroup_path[MAX_PATH - strlen("/cgroup.procs")];
809 char cgroup_procs[MAX_PATH];
810 int retval;
811 int cg_fd;
812
813 retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path));
814 if (!retval) {
815 err_msg("Did not find cgroupv2 mount point\n");
816 return -1;
817 }
818
819 if (!cgroup) {
820 retval = get_self_cgroup(&cgroup_path[strlen(cgroup_path)],
821 sizeof(cgroup_path) - strlen(cgroup_path));
822 if (!retval) {
823 err_msg("Did not find self cgroup\n");
824 return -1;
825 }
826 } else {
827 snprintf(&cgroup_path[strlen(cgroup_path)],
828 sizeof(cgroup_path) - strlen(cgroup_path), "%s/", cgroup);
829 }
830
831 snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path);
832
833 debug_msg("Using cgroup path at: %s\n", cgroup_procs);
834
835 cg_fd = open(cgroup_procs, O_RDWR);
836 if (cg_fd < 0)
837 return -1;
838
839 return cg_fd;
840 }
841
842 /*
843 * set_pid_cgroup - Set cgroup to pid_t pid
844 *
845 * If cgroup argument is not NULL, the threads will move to the given cgroup.
846 * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used.
847 *
848 * Supports cgroup v2.
849 *
850 * Returns 1 on success, 0 otherwise.
851 */
set_pid_cgroup(pid_t pid,const char * cgroup)852 int set_pid_cgroup(pid_t pid, const char *cgroup)
853 {
854 char pid_str[24];
855 int retval;
856 int cg_fd;
857
858 cg_fd = open_cgroup_procs(cgroup);
859 if (cg_fd < 0)
860 return 0;
861
862 snprintf(pid_str, sizeof(pid_str), "%d\n", pid);
863
864 retval = write(cg_fd, pid_str, strlen(pid_str));
865 if (retval < 0)
866 err_msg("Error setting cgroup attributes for pid:%s - %s\n",
867 pid_str, strerror(errno));
868 else
869 debug_msg("Set cgroup attributes for pid:%s\n", pid_str);
870
871 close(cg_fd);
872
873 return (retval >= 0);
874 }
875
876 /**
877 * set_comm_cgroup - Set cgroup to threads starting with char *comm_prefix
878 *
879 * If cgroup argument is not NULL, the threads will move to the given cgroup.
880 * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used.
881 *
882 * Supports cgroup v2.
883 *
884 * Returns 1 on success, 0 otherwise.
885 */
set_comm_cgroup(const char * comm_prefix,const char * cgroup)886 int set_comm_cgroup(const char *comm_prefix, const char *cgroup)
887 {
888 struct dirent *proc_entry;
889 DIR *procfs;
890 int retval;
891 int cg_fd;
892
893 if (strlen(comm_prefix) >= MAX_PATH) {
894 err_msg("Command prefix is too long: %d < strlen(%s)\n",
895 MAX_PATH, comm_prefix);
896 return 0;
897 }
898
899 cg_fd = open_cgroup_procs(cgroup);
900 if (cg_fd < 0)
901 return 0;
902
903 procfs = opendir("/proc");
904 if (!procfs) {
905 err_msg("Could not open procfs\n");
906 goto out_cg;
907 }
908
909 while ((proc_entry = readdir(procfs))) {
910
911 retval = procfs_is_workload_pid(comm_prefix, proc_entry);
912 if (!retval)
913 continue;
914
915 retval = write(cg_fd, proc_entry->d_name, strlen(proc_entry->d_name));
916 if (retval < 0) {
917 err_msg("Error setting cgroup attributes for pid:%s - %s\n",
918 proc_entry->d_name, strerror(errno));
919 goto out_procfs;
920 }
921
922 debug_msg("Set cgroup attributes for pid:%s\n", proc_entry->d_name);
923 }
924
925 closedir(procfs);
926 close(cg_fd);
927 return 1;
928
929 out_procfs:
930 closedir(procfs);
931 out_cg:
932 close(cg_fd);
933 return 0;
934 }
935
936 /**
937 * auto_house_keeping - Automatically move rtla out of measurement threads
938 *
939 * Try to move rtla away from the tracer, if possible.
940 *
941 * Returns 1 on success, 0 otherwise.
942 */
auto_house_keeping(cpu_set_t * monitored_cpus)943 int auto_house_keeping(cpu_set_t *monitored_cpus)
944 {
945 cpu_set_t rtla_cpus, house_keeping_cpus;
946 int retval;
947
948 /* first get the CPUs in which rtla can actually run. */
949 retval = sched_getaffinity(getpid(), sizeof(rtla_cpus), &rtla_cpus);
950 if (retval == -1) {
951 debug_msg("Could not get rtla affinity, rtla might run with the threads!\n");
952 return 0;
953 }
954
955 /* then check if the existing setup is already good. */
956 CPU_AND(&house_keeping_cpus, &rtla_cpus, monitored_cpus);
957 if (!CPU_COUNT(&house_keeping_cpus)) {
958 debug_msg("rtla and the monitored CPUs do not share CPUs.");
959 debug_msg("Skipping auto house-keeping\n");
960 return 1;
961 }
962
963 /* remove the intersection */
964 CPU_XOR(&house_keeping_cpus, &rtla_cpus, monitored_cpus);
965
966 /* get only those that rtla can run */
967 CPU_AND(&house_keeping_cpus, &house_keeping_cpus, &rtla_cpus);
968
969 /* is there any cpu left? */
970 if (!CPU_COUNT(&house_keeping_cpus)) {
971 debug_msg("Could not find any CPU for auto house-keeping\n");
972 return 0;
973 }
974
975 retval = sched_setaffinity(getpid(), sizeof(house_keeping_cpus), &house_keeping_cpus);
976 if (retval == -1) {
977 debug_msg("Could not set affinity for auto house-keeping\n");
978 return 0;
979 }
980
981 debug_msg("rtla automatically moved to an auto house-keeping cpu set\n");
982
983 return 1;
984 }
985
986 /**
987 * parse_optional_arg - Parse optional argument value
988 *
989 * Parse optional argument value, which can be in the form of:
990 * -sarg, -s/--long=arg, -s/--long arg
991 *
992 * Returns arg value if found, NULL otherwise.
993 */
parse_optional_arg(int argc,char ** argv)994 char *parse_optional_arg(int argc, char **argv)
995 {
996 if (optarg) {
997 if (optarg[0] == '=') {
998 /* skip the = */
999 return &optarg[1];
1000 } else {
1001 return optarg;
1002 }
1003 /* parse argument of form -s [arg] and --long [arg]*/
1004 } else if (optind < argc && argv[optind][0] != '-') {
1005 /* consume optind */
1006 return argv[optind++];
1007 } else {
1008 return NULL;
1009 }
1010 }
1011
1012 /*
1013 * strtoi - convert string to integer with error checking
1014 *
1015 * Returns 0 on success, -1 if conversion fails or result is out of int range.
1016 */
strtoi(const char * s,int * res)1017 int strtoi(const char *s, int *res)
1018 {
1019 char *end_ptr;
1020 long lres;
1021
1022 if (!*s)
1023 return -1;
1024
1025 errno = 0;
1026 lres = strtol(s, &end_ptr, 0);
1027 if (errno || *end_ptr || lres > INT_MAX || lres < INT_MIN)
1028 return -1;
1029
1030 *res = (int) lres;
1031 return 0;
1032 }
1033