xref: /linux/tools/tracing/rtla/src/utils.c (revision 9e1e9d660255d7216067193d774f338d08d8528d)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
4  */
5 
6 #define _GNU_SOURCE
7 #ifdef HAVE_LIBCPUPOWER_SUPPORT
8 #include <cpuidle.h>
9 #endif /* HAVE_LIBCPUPOWER_SUPPORT */
10 #include <dirent.h>
11 #include <stdarg.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <unistd.h>
15 #include <ctype.h>
16 #include <errno.h>
17 #include <fcntl.h>
18 #include <sched.h>
19 #include <stdio.h>
20 #include <limits.h>
21 
22 #include "common.h"
23 
24 #define MAX_MSG_LENGTH	1024
25 int config_debug;
26 
27 /*
28  * err_msg - print an error message to the stderr
29  */
err_msg(const char * fmt,...)30 void err_msg(const char *fmt, ...)
31 {
32 	char message[MAX_MSG_LENGTH];
33 	va_list ap;
34 
35 	va_start(ap, fmt);
36 	vsnprintf(message, sizeof(message), fmt, ap);
37 	va_end(ap);
38 
39 	fprintf(stderr, "%s", message);
40 }
41 
42 /*
43  * debug_msg - print a debug message to stderr if debug is set
44  */
debug_msg(const char * fmt,...)45 void debug_msg(const char *fmt, ...)
46 {
47 	char message[MAX_MSG_LENGTH];
48 	va_list ap;
49 
50 	if (!config_debug)
51 		return;
52 
53 	va_start(ap, fmt);
54 	vsnprintf(message, sizeof(message), fmt, ap);
55 	va_end(ap);
56 
57 	fprintf(stderr, "%s", message);
58 }
59 
60 /*
61  * fatal - print an error message and EOL to stderr and exit with ERROR
62  */
fatal(const char * fmt,...)63 void fatal(const char *fmt, ...)
64 {
65 	va_list ap;
66 
67 	va_start(ap, fmt);
68 	vfprintf(stderr, fmt, ap);
69 	va_end(ap);
70 	fprintf(stderr, "\n");
71 
72 	exit(ERROR);
73 }
74 
75 /*
76  * get_llong_from_str - get a long long int from a string
77  */
get_llong_from_str(char * start)78 long long get_llong_from_str(char *start)
79 {
80 	long long value;
81 	char *end;
82 
83 	errno = 0;
84 	value = strtoll(start, &end, 10);
85 	if (errno || start == end)
86 		return -1;
87 
88 	return value;
89 }
90 
91 /*
92  * get_duration - fill output with a human readable duration since start_time
93  */
get_duration(time_t start_time,char * output,int output_size)94 void get_duration(time_t start_time, char *output, int output_size)
95 {
96 	time_t now = time(NULL);
97 	struct tm *tm_info;
98 	time_t duration;
99 
100 	duration = difftime(now, start_time);
101 	tm_info = gmtime(&duration);
102 
103 	snprintf(output, output_size, "%3d %02d:%02d:%02d",
104 			tm_info->tm_yday,
105 			tm_info->tm_hour,
106 			tm_info->tm_min,
107 			tm_info->tm_sec);
108 }
109 
110 /*
111  * parse_cpu_set - parse a cpu_list filling cpu_set_t argument
112  *
113  * Receives a cpu list, like 1-3,5 (cpus 1, 2, 3, 5), and then set
114  * filling cpu_set_t argument.
115  *
116  * Returns 0 on success, 1 otherwise.
117  */
parse_cpu_set(char * cpu_list,cpu_set_t * set)118 int parse_cpu_set(char *cpu_list, cpu_set_t *set)
119 {
120 	const char *p;
121 	int end_cpu;
122 	int cpu;
123 	int i;
124 
125 	CPU_ZERO(set);
126 
127 	for (p = cpu_list; *p; ) {
128 		cpu = atoi(p);
129 		if (cpu < 0 || (!cpu && *p != '0') || cpu >= nr_cpus)
130 			goto err;
131 
132 		while (isdigit(*p))
133 			p++;
134 		if (*p == '-') {
135 			p++;
136 			end_cpu = atoi(p);
137 			if (end_cpu < cpu || (!end_cpu && *p != '0') || end_cpu >= nr_cpus)
138 				goto err;
139 			while (isdigit(*p))
140 				p++;
141 		} else
142 			end_cpu = cpu;
143 
144 		if (cpu == end_cpu) {
145 			debug_msg("cpu_set: adding cpu %d\n", cpu);
146 			CPU_SET(cpu, set);
147 		} else {
148 			for (i = cpu; i <= end_cpu; i++) {
149 				debug_msg("cpu_set: adding cpu %d\n", i);
150 				CPU_SET(i, set);
151 			}
152 		}
153 
154 		if (*p == ',')
155 			p++;
156 	}
157 
158 	return 0;
159 err:
160 	debug_msg("Error parsing the cpu set %s\n", cpu_list);
161 	return 1;
162 }
163 
164 /*
165  * parse_stack_format - parse the stack format
166  *
167  * Return: the stack format on success, -1 otherwise.
168  */
parse_stack_format(char * arg)169 int parse_stack_format(char *arg)
170 {
171 	if (!strcmp(arg, "truncate"))
172 		return STACK_FORMAT_TRUNCATE;
173 	if (!strcmp(arg, "skip"))
174 		return STACK_FORMAT_SKIP;
175 	if (!strcmp(arg, "full"))
176 		return STACK_FORMAT_FULL;
177 
178 	debug_msg("Error parsing the stack format %s\n", arg);
179 	return -1;
180 }
181 
182 /*
183  * parse_duration - parse duration with s/m/h/d suffix converting it to seconds
184  */
parse_seconds_duration(char * val)185 long parse_seconds_duration(char *val)
186 {
187 	char *end;
188 	long t;
189 
190 	t = strtol(val, &end, 10);
191 
192 	if (end) {
193 		switch (*end) {
194 		case 's':
195 		case 'S':
196 			break;
197 		case 'm':
198 		case 'M':
199 			t *= 60;
200 			break;
201 		case 'h':
202 		case 'H':
203 			t *= 60 * 60;
204 			break;
205 
206 		case 'd':
207 		case 'D':
208 			t *= 24 * 60 * 60;
209 			break;
210 		}
211 	}
212 
213 	return t;
214 }
215 
216 /*
217  * match_time_unit - check if str starts with unit followed by end-of-string or ':'
218  *
219  * This allows the time unit parser to work both in standalone duration strings
220  * like "100ms" and in colon-delimited SCHED_DEADLINE specifications like
221  * "d:10ms:100ms", while still rejecting malformed input like "100msx".
222  */
match_time_unit(const char * str,const char * unit)223 static bool match_time_unit(const char *str, const char *unit)
224 {
225 	size_t len = strlen(unit);
226 
227 	return strncmp(str, unit, len) == 0 &&
228 	       (str[len] == '\0' || str[len] == ':');
229 }
230 
231 /*
232  * parse_ns_duration - parse duration with ns/us/ms/s converting it to nanoseconds
233  */
parse_ns_duration(char * val)234 long parse_ns_duration(char *val)
235 {
236 	char *end;
237 	long t;
238 
239 	t = strtol(val, &end, 10);
240 
241 	if (end) {
242 		if (match_time_unit(end, "ns")) {
243 			return t;
244 		} else if (match_time_unit(end, "us")) {
245 			t *= 1000;
246 			return t;
247 		} else if (match_time_unit(end, "ms")) {
248 			t *= 1000 * 1000;
249 			return t;
250 		} else if (match_time_unit(end, "s")) {
251 			t *= 1000 * 1000 * 1000;
252 			return t;
253 		}
254 		return -1;
255 	}
256 
257 	return t;
258 }
259 
260 /*
261  * This is a set of helper functions to use SCHED_DEADLINE.
262  */
263 #ifndef __NR_sched_setattr
264 # ifdef __x86_64__
265 #  define __NR_sched_setattr	314
266 # elif __i386__
267 #  define __NR_sched_setattr	351
268 # elif __arm__
269 #  define __NR_sched_setattr	380
270 # elif __aarch64__ || __riscv
271 #  define __NR_sched_setattr	274
272 # elif __powerpc__
273 #  define __NR_sched_setattr	355
274 # elif __s390x__
275 #  define __NR_sched_setattr	345
276 # elif __loongarch__
277 #  define __NR_sched_setattr	274
278 # endif
279 #endif
280 
281 #define SCHED_DEADLINE		6
282 
syscall_sched_setattr(pid_t pid,const struct sched_attr * attr,unsigned int flags)283 static inline int syscall_sched_setattr(pid_t pid, const struct sched_attr *attr,
284 				unsigned int flags) {
285 	return syscall(__NR_sched_setattr, pid, attr, flags);
286 }
287 
__set_sched_attr(int pid,struct sched_attr * attr)288 int __set_sched_attr(int pid, struct sched_attr *attr)
289 {
290 	int flags = 0;
291 	int retval;
292 
293 	retval = syscall_sched_setattr(pid, attr, flags);
294 	if (retval < 0) {
295 		err_msg("Failed to set sched attributes to the pid %d: %s\n",
296 			pid, strerror(errno));
297 		return 1;
298 	}
299 
300 	return 0;
301 }
302 
303 /*
304  * procfs_is_workload_pid - check if a procfs entry contains a comm_prefix* comm
305  *
306  * Check if the procfs entry is a directory of a process, and then check if the
307  * process has a comm with the prefix set in char *comm_prefix. As the
308  * current users of this function only check for kernel threads, there is no
309  * need to check for the threads for the process.
310  *
311  * Return: True if the proc_entry contains a comm file with comm_prefix*.
312  * Otherwise returns false.
313  */
procfs_is_workload_pid(const char * comm_prefix,struct dirent * proc_entry)314 static int procfs_is_workload_pid(const char *comm_prefix, struct dirent *proc_entry)
315 {
316 	char buffer[MAX_PATH];
317 	int comm_fd, retval;
318 	char *t_name;
319 
320 	if (proc_entry->d_type != DT_DIR)
321 		return 0;
322 
323 	if (*proc_entry->d_name == '.')
324 		return 0;
325 
326 	/* check if the string is a pid */
327 	for (t_name = proc_entry->d_name; *t_name; t_name++) {
328 		if (!isdigit(*t_name))
329 			break;
330 	}
331 
332 	if (*t_name != '\0')
333 		return 0;
334 
335 	snprintf(buffer, MAX_PATH, "/proc/%s/comm", proc_entry->d_name);
336 	comm_fd = open(buffer, O_RDONLY);
337 	if (comm_fd < 0)
338 		return 0;
339 
340 	memset(buffer, 0, MAX_PATH);
341 	retval = read(comm_fd, buffer, MAX_PATH);
342 
343 	close(comm_fd);
344 
345 	if (retval <= 0)
346 		return 0;
347 
348 	buffer[MAX_PATH-1] = '\0';
349 	if (!str_has_prefix(buffer, comm_prefix))
350 		return 0;
351 
352 	/* comm already have \n */
353 	debug_msg("Found workload pid:%s comm:%s", proc_entry->d_name, buffer);
354 
355 	return 1;
356 }
357 
358 /*
359  * set_comm_sched_attr - set sched params to threads starting with char *comm_prefix
360  *
361  * This function uses procfs to list the currently running threads and then set the
362  * sched_attr *attr to the threads that start with char *comm_prefix. It is
363  * mainly used to set the priority to the kernel threads created by the
364  * tracers.
365  */
set_comm_sched_attr(const char * comm_prefix,struct sched_attr * attr)366 int set_comm_sched_attr(const char *comm_prefix, struct sched_attr *attr)
367 {
368 	struct dirent *proc_entry;
369 	DIR *procfs;
370 	int retval;
371 	int pid;
372 
373 	if (strlen(comm_prefix) >= MAX_PATH) {
374 		err_msg("Command prefix is too long: %d < strlen(%s)\n",
375 			MAX_PATH, comm_prefix);
376 		return 1;
377 	}
378 
379 	procfs = opendir("/proc");
380 	if (!procfs) {
381 		err_msg("Could not open procfs\n");
382 		return 1;
383 	}
384 
385 	while ((proc_entry = readdir(procfs))) {
386 
387 		retval = procfs_is_workload_pid(comm_prefix, proc_entry);
388 		if (!retval)
389 			continue;
390 
391 		if (strtoi(proc_entry->d_name, &pid)) {
392 			err_msg("'%s' is not a valid pid", proc_entry->d_name);
393 			retval = 1;
394 			goto out;
395 		}
396 		/* procfs_is_workload_pid confirmed it is a pid */
397 		retval = __set_sched_attr(pid, attr);
398 		if (retval) {
399 			err_msg("Error setting sched attributes for pid:%s\n", proc_entry->d_name);
400 			goto out;
401 		}
402 
403 		debug_msg("Set sched attributes for pid:%s\n", proc_entry->d_name);
404 	}
405 
406 	retval = 0;
407 out:
408 	closedir(procfs);
409 	return retval;
410 }
411 
412 #define INVALID_VAL	(~0L)
get_long_ns_after_colon(char * start)413 static long get_long_ns_after_colon(char *start)
414 {
415 	long val = INVALID_VAL;
416 
417 	/* find the ":" */
418 	start = strstr(start, ":");
419 	if (!start)
420 		return -1;
421 
422 	/* skip ":" */
423 	start++;
424 	val = parse_ns_duration(start);
425 
426 	return val;
427 }
428 
get_long_after_colon(char * start)429 static long get_long_after_colon(char *start)
430 {
431 	long val = INVALID_VAL;
432 
433 	/* find the ":" */
434 	start = strstr(start, ":");
435 	if (!start)
436 		return -1;
437 
438 	/* skip ":" */
439 	start++;
440 	val = get_llong_from_str(start);
441 
442 	return val;
443 }
444 
445 /*
446  * parse priority in the format:
447  * SCHED_OTHER:
448  *		o:<prio>
449  *		O:<prio>
450  * SCHED_RR:
451  *		r:<prio>
452  *		R:<prio>
453  * SCHED_FIFO:
454  *		f:<prio>
455  *		F:<prio>
456  * SCHED_DEADLINE:
457  *		d:runtime:period
458  *		D:runtime:period
459  */
parse_prio(char * arg,struct sched_attr * sched_param)460 int parse_prio(char *arg, struct sched_attr *sched_param)
461 {
462 	long prio;
463 	long runtime;
464 	long period;
465 
466 	memset(sched_param, 0, sizeof(*sched_param));
467 	sched_param->size = sizeof(*sched_param);
468 
469 	switch (arg[0]) {
470 	case 'd':
471 	case 'D':
472 		/* d:runtime:period */
473 		if (strlen(arg) < 4)
474 			return -1;
475 
476 		runtime = get_long_ns_after_colon(arg);
477 		if (runtime == INVALID_VAL)
478 			return -1;
479 
480 		period = get_long_ns_after_colon(&arg[2]);
481 		if (period == INVALID_VAL)
482 			return -1;
483 
484 		if (runtime > period)
485 			return -1;
486 
487 		sched_param->sched_policy   = SCHED_DEADLINE;
488 		sched_param->sched_runtime  = runtime;
489 		sched_param->sched_deadline = period;
490 		sched_param->sched_period   = period;
491 		break;
492 	case 'f':
493 	case 'F':
494 		/* f:prio */
495 		prio = get_long_after_colon(arg);
496 		if (prio == INVALID_VAL)
497 			return -1;
498 
499 		if (prio < sched_get_priority_min(SCHED_FIFO))
500 			return -1;
501 		if (prio > sched_get_priority_max(SCHED_FIFO))
502 			return -1;
503 
504 		sched_param->sched_policy   = SCHED_FIFO;
505 		sched_param->sched_priority = prio;
506 		break;
507 	case 'r':
508 	case 'R':
509 		/* r:prio */
510 		prio = get_long_after_colon(arg);
511 		if (prio == INVALID_VAL)
512 			return -1;
513 
514 		if (prio < sched_get_priority_min(SCHED_RR))
515 			return -1;
516 		if (prio > sched_get_priority_max(SCHED_RR))
517 			return -1;
518 
519 		sched_param->sched_policy   = SCHED_RR;
520 		sched_param->sched_priority = prio;
521 		break;
522 	case 'o':
523 	case 'O':
524 		/* o:prio */
525 		prio = get_long_after_colon(arg);
526 		if (prio == INVALID_VAL)
527 			return -1;
528 
529 		if (prio < MIN_NICE)
530 			return -1;
531 		if (prio > MAX_NICE)
532 			return -1;
533 
534 		sched_param->sched_policy   = SCHED_OTHER;
535 		sched_param->sched_nice = prio;
536 		break;
537 	default:
538 		return -1;
539 	}
540 	return 0;
541 }
542 
543 /*
544  * set_cpu_dma_latency - set the /dev/cpu_dma_latecy
545  *
546  * This is used to reduce the exit from idle latency. The value
547  * will be reset once the file descriptor of /dev/cpu_dma_latecy
548  * is closed.
549  *
550  * Return: the /dev/cpu_dma_latecy file descriptor
551  */
set_cpu_dma_latency(int32_t latency)552 int set_cpu_dma_latency(int32_t latency)
553 {
554 	int retval;
555 	int fd;
556 
557 	fd = open("/dev/cpu_dma_latency", O_RDWR);
558 	if (fd < 0) {
559 		err_msg("Error opening /dev/cpu_dma_latency\n");
560 		return -1;
561 	}
562 
563 	retval = write(fd, &latency, 4);
564 	if (retval < 1) {
565 		err_msg("Error setting /dev/cpu_dma_latency\n");
566 		close(fd);
567 		return -1;
568 	}
569 
570 	debug_msg("Set /dev/cpu_dma_latency to %d\n", latency);
571 
572 	return fd;
573 }
574 
575 #ifdef HAVE_LIBCPUPOWER_SUPPORT
576 static unsigned int **saved_cpu_idle_disable_state;
577 static size_t saved_cpu_idle_disable_state_alloc_ctr;
578 
579 /*
580  * save_cpu_idle_state_disable - save disable for all idle states of a cpu
581  *
582  * Saves the current disable of all idle states of a cpu, to be subsequently
583  * restored via restore_cpu_idle_disable_state.
584  *
585  * Return: idle state count on success, negative on error
586  */
save_cpu_idle_disable_state(unsigned int cpu)587 int save_cpu_idle_disable_state(unsigned int cpu)
588 {
589 	unsigned int nr_states;
590 	unsigned int state;
591 	int disabled;
592 
593 	nr_states = cpuidle_state_count(cpu);
594 
595 	if (nr_states == 0)
596 		return 0;
597 
598 	if (saved_cpu_idle_disable_state == NULL) {
599 		saved_cpu_idle_disable_state = calloc(nr_cpus, sizeof(unsigned int *));
600 		if (!saved_cpu_idle_disable_state)
601 			return -1;
602 	}
603 
604 	saved_cpu_idle_disable_state[cpu] = calloc(nr_states, sizeof(unsigned int));
605 	if (!saved_cpu_idle_disable_state[cpu])
606 		return -1;
607 	saved_cpu_idle_disable_state_alloc_ctr++;
608 
609 	for (state = 0; state < nr_states; state++) {
610 		disabled = cpuidle_is_state_disabled(cpu, state);
611 		if (disabled < 0)
612 			return disabled;
613 		saved_cpu_idle_disable_state[cpu][state] = disabled;
614 	}
615 
616 	return nr_states;
617 }
618 
619 /*
620  * restore_cpu_idle_disable_state - restore disable for all idle states of a cpu
621  *
622  * Restores the current disable state of all idle states of a cpu that was
623  * previously saved by save_cpu_idle_disable_state.
624  *
625  * Return: idle state count on success, negative on error
626  */
restore_cpu_idle_disable_state(unsigned int cpu)627 int restore_cpu_idle_disable_state(unsigned int cpu)
628 {
629 	unsigned int nr_states;
630 	unsigned int state;
631 	int disabled;
632 	int result;
633 
634 	nr_states = cpuidle_state_count(cpu);
635 
636 	if (nr_states == 0)
637 		return 0;
638 
639 	if (!saved_cpu_idle_disable_state)
640 		return -1;
641 
642 	for (state = 0; state < nr_states; state++) {
643 		if (!saved_cpu_idle_disable_state[cpu])
644 			return -1;
645 		disabled = saved_cpu_idle_disable_state[cpu][state];
646 		result = cpuidle_state_disable(cpu, state, disabled);
647 		if (result < 0)
648 			return result;
649 	}
650 
651 	free(saved_cpu_idle_disable_state[cpu]);
652 	saved_cpu_idle_disable_state[cpu] = NULL;
653 	saved_cpu_idle_disable_state_alloc_ctr--;
654 	if (saved_cpu_idle_disable_state_alloc_ctr == 0) {
655 		free(saved_cpu_idle_disable_state);
656 		saved_cpu_idle_disable_state = NULL;
657 	}
658 
659 	return nr_states;
660 }
661 
662 /*
663  * free_cpu_idle_disable_states - free saved idle state disable for all cpus
664  *
665  * Frees the memory used for storing cpu idle state disable for all cpus
666  * and states.
667  *
668  * Normally, the memory is freed automatically in
669  * restore_cpu_idle_disable_state; this is mostly for cleaning up after an
670  * error.
671  */
free_cpu_idle_disable_states(void)672 void free_cpu_idle_disable_states(void)
673 {
674 	int cpu;
675 
676 	if (!saved_cpu_idle_disable_state)
677 		return;
678 
679 	for (cpu = 0; cpu < nr_cpus; cpu++) {
680 		free(saved_cpu_idle_disable_state[cpu]);
681 		saved_cpu_idle_disable_state[cpu] = NULL;
682 	}
683 
684 	free(saved_cpu_idle_disable_state);
685 	saved_cpu_idle_disable_state = NULL;
686 }
687 
688 /*
689  * set_deepest_cpu_idle_state - limit idle state of cpu
690  *
691  * Disables all idle states deeper than the one given in
692  * deepest_state (assuming states with higher number are deeper).
693  *
694  * This is used to reduce the exit from idle latency. Unlike
695  * set_cpu_dma_latency, it can disable idle states per cpu.
696  *
697  * Return: idle state count on success, negative on error
698  */
set_deepest_cpu_idle_state(unsigned int cpu,unsigned int deepest_state)699 int set_deepest_cpu_idle_state(unsigned int cpu, unsigned int deepest_state)
700 {
701 	unsigned int nr_states;
702 	unsigned int state;
703 	int result;
704 
705 	nr_states = cpuidle_state_count(cpu);
706 
707 	for (state = deepest_state + 1; state < nr_states; state++) {
708 		result = cpuidle_state_disable(cpu, state, 1);
709 		if (result < 0)
710 			return result;
711 	}
712 
713 	return nr_states;
714 }
715 #endif /* HAVE_LIBCPUPOWER_SUPPORT */
716 
717 #define _STR(x) #x
718 #define STR(x) _STR(x)
719 
720 /*
721  * find_mount - find a the mount point of a given fs
722  *
723  * Returns 0 if mount is not found, otherwise return 1 and fill mp
724  * with the mount point.
725  */
find_mount(const char * fs,char * mp,int sizeof_mp)726 static const int find_mount(const char *fs, char *mp, int sizeof_mp)
727 {
728 	char mount_point[MAX_PATH+1];
729 	char type[100];
730 	int found = 0;
731 	FILE *fp;
732 
733 	fp = fopen("/proc/mounts", "r");
734 	if (!fp)
735 		return 0;
736 
737 	while (fscanf(fp, "%*s %" STR(MAX_PATH) "s %99s %*s %*d %*d\n",	mount_point, type) == 2) {
738 		if (strcmp(type, fs) == 0) {
739 			found = 1;
740 			break;
741 		}
742 	}
743 	fclose(fp);
744 
745 	if (!found)
746 		return 0;
747 
748 	memset(mp, 0, sizeof_mp);
749 	strncpy(mp, mount_point, sizeof_mp - 1);
750 
751 	debug_msg("Fs %s found at %s\n", fs, mp);
752 	return 1;
753 }
754 
755 /*
756  * get_self_cgroup - get the current thread cgroup path
757  *
758  * Parse /proc/$$/cgroup file to get the thread's cgroup. As an example of line to parse:
759  *
760  * 0::/user.slice/user-0.slice/session-3.scope'\n'
761  *
762  * This function is interested in the content after the second : and before the '\n'.
763  *
764  * Returns 1 if a string was found, 0 otherwise.
765  */
get_self_cgroup(char * self_cg,int sizeof_self_cg)766 static int get_self_cgroup(char *self_cg, int sizeof_self_cg)
767 {
768 	char path[MAX_PATH], *start;
769 	int fd, retval;
770 
771 	snprintf(path, MAX_PATH, "/proc/%d/cgroup", getpid());
772 
773 	fd = open(path, O_RDONLY);
774 	if (fd < 0)
775 		return 0;
776 
777 	memset(path, 0, sizeof(path));
778 	retval = read(fd, path, MAX_PATH);
779 
780 	close(fd);
781 
782 	if (retval <= 0)
783 		return 0;
784 
785 	path[MAX_PATH-1] = '\0';
786 	start = path;
787 
788 	start = strstr(start, ":");
789 	if (!start)
790 		return 0;
791 
792 	/* skip ":" */
793 	start++;
794 
795 	start = strstr(start, ":");
796 	if (!start)
797 		return 0;
798 
799 	/* skip ":" */
800 	start++;
801 
802 	if (strlen(start) >= sizeof_self_cg)
803 		return 0;
804 
805 	snprintf(self_cg, sizeof_self_cg, "%s", start);
806 
807 	/* Swap '\n' with '\0' */
808 	start = strstr(self_cg, "\n");
809 
810 	/* there must be '\n' */
811 	if (!start)
812 		return 0;
813 
814 	/* ok, it found a string after the second : and before the \n */
815 	*start = '\0';
816 
817 	return 1;
818 }
819 
820 /*
821  * open_cgroup_procs - Open the cgroup.procs file for the given cgroup
822  *
823  * If cgroup argument is not NULL, the cgroup.procs file for that cgroup
824  * will be opened. Otherwise, the cgroup of the calling, i.e., rtla, thread
825  * will be used.
826  *
827  * Supports cgroup v2.
828  *
829  * Returns the file descriptor on success, -1 otherwise.
830  */
open_cgroup_procs(const char * cgroup)831 static int open_cgroup_procs(const char *cgroup)
832 {
833 	char cgroup_path[MAX_PATH - strlen("/cgroup.procs")];
834 	char cgroup_procs[MAX_PATH];
835 	int retval;
836 	int cg_fd;
837 	size_t cg_path_len;
838 
839 	retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path));
840 	if (!retval) {
841 		err_msg("Did not find cgroupv2 mount point\n");
842 		return -1;
843 	}
844 
845 	cg_path_len = strlen(cgroup_path);
846 
847 	if (!cgroup) {
848 		retval = get_self_cgroup(&cgroup_path[cg_path_len],
849 				sizeof(cgroup_path) - cg_path_len);
850 		if (!retval) {
851 			err_msg("Did not find self cgroup\n");
852 			return -1;
853 		}
854 	} else {
855 		snprintf(&cgroup_path[cg_path_len],
856 				sizeof(cgroup_path) - cg_path_len, "%s/", cgroup);
857 	}
858 
859 	snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path);
860 
861 	debug_msg("Using cgroup path at: %s\n", cgroup_procs);
862 
863 	cg_fd = open(cgroup_procs, O_RDWR);
864 	if (cg_fd < 0)
865 		return -1;
866 
867 	return cg_fd;
868 }
869 
870 /*
871  * set_pid_cgroup - Set cgroup to pid_t pid
872  *
873  * If cgroup argument is not NULL, the threads will move to the given cgroup.
874  * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used.
875  *
876  * Supports cgroup v2.
877  *
878  * Returns 1 on success, 0 otherwise.
879  */
set_pid_cgroup(pid_t pid,const char * cgroup)880 int set_pid_cgroup(pid_t pid, const char *cgroup)
881 {
882 	char pid_str[24];
883 	int retval;
884 	int cg_fd;
885 
886 	cg_fd = open_cgroup_procs(cgroup);
887 	if (cg_fd < 0)
888 		return 0;
889 
890 	snprintf(pid_str, sizeof(pid_str), "%d\n", pid);
891 
892 	retval = write(cg_fd, pid_str, strlen(pid_str));
893 	if (retval < 0)
894 		err_msg("Error setting cgroup attributes for pid:%s - %s\n",
895 				pid_str, strerror(errno));
896 	else
897 		debug_msg("Set cgroup attributes for pid:%s\n", pid_str);
898 
899 	close(cg_fd);
900 
901 	return (retval >= 0);
902 }
903 
904 /**
905  * set_comm_cgroup - Set cgroup to threads starting with char *comm_prefix
906  *
907  * If cgroup argument is not NULL, the threads will move to the given cgroup.
908  * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used.
909  *
910  * Supports cgroup v2.
911  *
912  * Returns 1 on success, 0 otherwise.
913  */
set_comm_cgroup(const char * comm_prefix,const char * cgroup)914 int set_comm_cgroup(const char *comm_prefix, const char *cgroup)
915 {
916 	struct dirent *proc_entry;
917 	DIR *procfs;
918 	int retval;
919 	int cg_fd;
920 
921 	if (strlen(comm_prefix) >= MAX_PATH) {
922 		err_msg("Command prefix is too long: %d < strlen(%s)\n",
923 			MAX_PATH, comm_prefix);
924 		return 0;
925 	}
926 
927 	cg_fd = open_cgroup_procs(cgroup);
928 	if (cg_fd < 0)
929 		return 0;
930 
931 	procfs = opendir("/proc");
932 	if (!procfs) {
933 		err_msg("Could not open procfs\n");
934 		goto out_cg;
935 	}
936 
937 	while ((proc_entry = readdir(procfs))) {
938 
939 		retval = procfs_is_workload_pid(comm_prefix, proc_entry);
940 		if (!retval)
941 			continue;
942 
943 		retval = write(cg_fd, proc_entry->d_name, strlen(proc_entry->d_name));
944 		if (retval < 0) {
945 			err_msg("Error setting cgroup attributes for pid:%s - %s\n",
946 				proc_entry->d_name, strerror(errno));
947 			goto out_procfs;
948 		}
949 
950 		debug_msg("Set cgroup attributes for pid:%s\n", proc_entry->d_name);
951 	}
952 
953 	closedir(procfs);
954 	close(cg_fd);
955 	return 1;
956 
957 out_procfs:
958 	closedir(procfs);
959 out_cg:
960 	close(cg_fd);
961 	return 0;
962 }
963 
964 /**
965  * auto_house_keeping - Automatically move rtla out of measurement threads
966  *
967  * Try to move rtla away from the tracer, if possible.
968  *
969  * Returns 1 on success, 0 otherwise.
970  */
auto_house_keeping(cpu_set_t * monitored_cpus)971 int auto_house_keeping(cpu_set_t *monitored_cpus)
972 {
973 	cpu_set_t rtla_cpus, house_keeping_cpus;
974 	int retval;
975 
976 	/* first get the CPUs in which rtla can actually run. */
977 	retval = sched_getaffinity(getpid(), sizeof(rtla_cpus), &rtla_cpus);
978 	if (retval == -1) {
979 		debug_msg("Could not get rtla affinity, rtla might run with the threads!\n");
980 		return 0;
981 	}
982 
983 	/* then check if the existing setup is already good. */
984 	CPU_AND(&house_keeping_cpus, &rtla_cpus, monitored_cpus);
985 	if (!CPU_COUNT(&house_keeping_cpus)) {
986 		debug_msg("rtla and the monitored CPUs do not share CPUs.");
987 		debug_msg("Skipping auto house-keeping\n");
988 		return 1;
989 	}
990 
991 	/* remove the intersection */
992 	CPU_XOR(&house_keeping_cpus, &rtla_cpus, monitored_cpus);
993 
994 	/* get only those that rtla can run */
995 	CPU_AND(&house_keeping_cpus, &house_keeping_cpus, &rtla_cpus);
996 
997 	/* is there any cpu left? */
998 	if (!CPU_COUNT(&house_keeping_cpus)) {
999 		debug_msg("Could not find any CPU for auto house-keeping\n");
1000 		return 0;
1001 	}
1002 
1003 	retval = sched_setaffinity(getpid(), sizeof(house_keeping_cpus), &house_keeping_cpus);
1004 	if (retval == -1) {
1005 		debug_msg("Could not set affinity for auto house-keeping\n");
1006 		return 0;
1007 	}
1008 
1009 	debug_msg("rtla automatically moved to an auto house-keeping cpu set\n");
1010 
1011 	return 1;
1012 }
1013 
1014 /**
1015  * parse_optional_arg - Parse optional argument value
1016  *
1017  * Parse optional argument value, which can be in the form of:
1018  * -sarg, -s/--long=arg, -s/--long arg
1019  *
1020  * Returns arg value if found, NULL otherwise.
1021  */
parse_optional_arg(int argc,char ** argv)1022 char *parse_optional_arg(int argc, char **argv)
1023 {
1024 	if (optarg) {
1025 		if (optarg[0] == '=') {
1026 			/* skip the = */
1027 			return &optarg[1];
1028 		} else {
1029 			return optarg;
1030 		}
1031 	/* parse argument of form -s [arg] and --long [arg]*/
1032 	} else if (optind < argc && argv[optind][0] != '-') {
1033 		/* consume optind */
1034 		return argv[optind++];
1035 	} else {
1036 		return NULL;
1037 	}
1038 }
1039 
1040 /*
1041  * strtoi - convert string to integer with error checking
1042  *
1043  * Returns 0 on success, -1 if conversion fails or result is out of int range.
1044  */
strtoi(const char * s,int * res)1045 int strtoi(const char *s, int *res)
1046 {
1047 	char *end_ptr;
1048 	long lres;
1049 
1050 	if (!*s)
1051 		return -1;
1052 
1053 	errno = 0;
1054 	lres = strtol(s, &end_ptr, 0);
1055 	if (errno || *end_ptr || lres > INT_MAX || lres < INT_MIN)
1056 		return -1;
1057 
1058 	*res = (int) lres;
1059 	return 0;
1060 }
1061 
fatal_alloc(void)1062 static inline void fatal_alloc(void)
1063 {
1064 	fatal("Error allocating memory\n");
1065 }
1066 
calloc_fatal(size_t n,size_t size)1067 void *calloc_fatal(size_t n, size_t size)
1068 {
1069 	void *p = calloc(n, size);
1070 
1071 	if (!p)
1072 		fatal_alloc();
1073 
1074 	return p;
1075 }
1076 
reallocarray_fatal(void * p,size_t n,size_t size)1077 void *reallocarray_fatal(void *p, size_t n, size_t size)
1078 {
1079 	p = reallocarray(p, n, size);
1080 
1081 	if (!p)
1082 		fatal_alloc();
1083 
1084 	return p;
1085 }
1086 
strdup_fatal(const char * s)1087 char *strdup_fatal(const char *s)
1088 {
1089 	char *p = strdup(s);
1090 
1091 	if (!p)
1092 		fatal_alloc();
1093 
1094 	return p;
1095 }
1096