1 /* SPDX-License-Identifier: GPL-2.0 */
2
3 #define _GNU_SOURCE
4
5 #include <errno.h>
6 #include <fcntl.h>
7 #include <linux/limits.h>
8 #include <poll.h>
9 #include <signal.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <sys/inotify.h>
14 #include <sys/stat.h>
15 #include <sys/types.h>
16 #include <sys/wait.h>
17 #include <unistd.h>
18
19 #include "cgroup_util.h"
20 #include "../../clone3/clone3_selftests.h"
21
22 bool cg_test_v1_named;
23
24 /* Returns read len on success, or -errno on failure. */
read_text(const char * path,char * buf,size_t max_len)25 ssize_t read_text(const char *path, char *buf, size_t max_len)
26 {
27 ssize_t len;
28 int fd;
29
30 fd = open(path, O_RDONLY);
31 if (fd < 0)
32 return -errno;
33
34 len = read(fd, buf, max_len - 1);
35
36 if (len >= 0)
37 buf[len] = 0;
38
39 close(fd);
40 return len < 0 ? -errno : len;
41 }
42
43 /* Returns written len on success, or -errno on failure. */
write_text(const char * path,char * buf,ssize_t len)44 ssize_t write_text(const char *path, char *buf, ssize_t len)
45 {
46 int fd;
47
48 fd = open(path, O_WRONLY | O_APPEND);
49 if (fd < 0)
50 return -errno;
51
52 len = write(fd, buf, len);
53 close(fd);
54 return len < 0 ? -errno : len;
55 }
56
cg_name(const char * root,const char * name)57 char *cg_name(const char *root, const char *name)
58 {
59 size_t len = strlen(root) + strlen(name) + 2;
60 char *ret = malloc(len);
61
62 snprintf(ret, len, "%s/%s", root, name);
63
64 return ret;
65 }
66
cg_name_indexed(const char * root,const char * name,int index)67 char *cg_name_indexed(const char *root, const char *name, int index)
68 {
69 size_t len = strlen(root) + strlen(name) + 10;
70 char *ret = malloc(len);
71
72 snprintf(ret, len, "%s/%s_%d", root, name, index);
73
74 return ret;
75 }
76
cg_control(const char * cgroup,const char * control)77 char *cg_control(const char *cgroup, const char *control)
78 {
79 size_t len = strlen(cgroup) + strlen(control) + 2;
80 char *ret = malloc(len);
81
82 snprintf(ret, len, "%s/%s", cgroup, control);
83
84 return ret;
85 }
86
87 /* Returns 0 on success, or -errno on failure. */
cg_read(const char * cgroup,const char * control,char * buf,size_t len)88 int cg_read(const char *cgroup, const char *control, char *buf, size_t len)
89 {
90 char path[PATH_MAX];
91 ssize_t ret;
92
93 snprintf(path, sizeof(path), "%s/%s", cgroup, control);
94
95 ret = read_text(path, buf, len);
96 return ret >= 0 ? 0 : ret;
97 }
98
cg_read_strcmp(const char * cgroup,const char * control,const char * expected)99 int cg_read_strcmp(const char *cgroup, const char *control,
100 const char *expected)
101 {
102 size_t size;
103 char *buf;
104 int ret;
105
106 /* Handle the case of comparing against empty string */
107 if (!expected)
108 return -1;
109 else
110 size = strlen(expected) + 1;
111
112 buf = malloc(size);
113 if (!buf)
114 return -1;
115
116 if (cg_read(cgroup, control, buf, size)) {
117 free(buf);
118 return -1;
119 }
120
121 ret = strcmp(expected, buf);
122 free(buf);
123 return ret;
124 }
125
cg_read_strstr(const char * cgroup,const char * control,const char * needle)126 int cg_read_strstr(const char *cgroup, const char *control, const char *needle)
127 {
128 char buf[PAGE_SIZE];
129
130 if (cg_read(cgroup, control, buf, sizeof(buf)))
131 return -1;
132
133 return strstr(buf, needle) ? 0 : -1;
134 }
135
cg_read_long(const char * cgroup,const char * control)136 long cg_read_long(const char *cgroup, const char *control)
137 {
138 char buf[128];
139
140 if (cg_read(cgroup, control, buf, sizeof(buf)))
141 return -1;
142
143 return atol(buf);
144 }
145
cg_read_long_fd(int fd)146 long cg_read_long_fd(int fd)
147 {
148 char buf[128];
149
150 if (pread(fd, buf, sizeof(buf), 0) <= 0)
151 return -1;
152
153 return atol(buf);
154 }
155
cg_read_key_long(const char * cgroup,const char * control,const char * key)156 long cg_read_key_long(const char *cgroup, const char *control, const char *key)
157 {
158 char buf[PAGE_SIZE];
159 char *ptr;
160
161 if (cg_read(cgroup, control, buf, sizeof(buf)))
162 return -1;
163
164 ptr = strstr(buf, key);
165 if (!ptr)
166 return -1;
167
168 return atol(ptr + strlen(key));
169 }
170
cg_read_lc(const char * cgroup,const char * control)171 long cg_read_lc(const char *cgroup, const char *control)
172 {
173 char buf[PAGE_SIZE];
174 const char delim[] = "\n";
175 char *line;
176 long cnt = 0;
177
178 if (cg_read(cgroup, control, buf, sizeof(buf)))
179 return -1;
180
181 for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
182 cnt++;
183
184 return cnt;
185 }
186
187 /* Returns 0 on success, or -errno on failure. */
cg_write(const char * cgroup,const char * control,char * buf)188 int cg_write(const char *cgroup, const char *control, char *buf)
189 {
190 char path[PATH_MAX];
191 ssize_t len = strlen(buf), ret;
192
193 snprintf(path, sizeof(path), "%s/%s", cgroup, control);
194 ret = write_text(path, buf, len);
195 return ret == len ? 0 : ret;
196 }
197
198 /*
199 * Returns fd on success, or -1 on failure.
200 * (fd should be closed with close() as usual)
201 */
cg_open(const char * cgroup,const char * control,int flags)202 int cg_open(const char *cgroup, const char *control, int flags)
203 {
204 char path[PATH_MAX];
205
206 snprintf(path, sizeof(path), "%s/%s", cgroup, control);
207 return open(path, flags);
208 }
209
cg_write_numeric(const char * cgroup,const char * control,long value)210 int cg_write_numeric(const char *cgroup, const char *control, long value)
211 {
212 char buf[64];
213 int ret;
214
215 ret = sprintf(buf, "%lu", value);
216 if (ret < 0)
217 return ret;
218
219 return cg_write(cgroup, control, buf);
220 }
221
cg_find_root(char * root,size_t len,const char * controller,bool * nsdelegate)222 static int cg_find_root(char *root, size_t len, const char *controller,
223 bool *nsdelegate)
224 {
225 char buf[10 * PAGE_SIZE];
226 char *fs, *mount, *type, *options;
227 const char delim[] = "\n\t ";
228
229 if (read_text("/proc/self/mounts", buf, sizeof(buf)) <= 0)
230 return -1;
231
232 /*
233 * Example:
234 * cgroup /sys/fs/cgroup cgroup2 rw,seclabel,noexec,relatime 0 0
235 */
236 for (fs = strtok(buf, delim); fs; fs = strtok(NULL, delim)) {
237 mount = strtok(NULL, delim);
238 type = strtok(NULL, delim);
239 options = strtok(NULL, delim);
240 strtok(NULL, delim);
241 strtok(NULL, delim);
242 if (strcmp(type, "cgroup") == 0) {
243 if (!controller || !strstr(options, controller))
244 continue;
245 } else if (strcmp(type, "cgroup2") == 0) {
246 if (controller &&
247 cg_read_strstr(mount, "cgroup.controllers", controller))
248 continue;
249 } else {
250 continue;
251 }
252 strncpy(root, mount, len);
253
254 if (nsdelegate)
255 *nsdelegate = !!strstr(options, "nsdelegate");
256 return 0;
257
258 }
259
260 return -1;
261 }
262
cg_find_controller_root(char * root,size_t len,const char * controller)263 int cg_find_controller_root(char *root, size_t len, const char *controller)
264 {
265 return cg_find_root(root, len, controller, NULL);
266 }
267
cg_find_unified_root(char * root,size_t len,bool * nsdelegate)268 int cg_find_unified_root(char *root, size_t len, bool *nsdelegate)
269 {
270 return cg_find_root(root, len, NULL, nsdelegate);
271 }
272
cg_create(const char * cgroup)273 int cg_create(const char *cgroup)
274 {
275 return mkdir(cgroup, 0755);
276 }
277
cg_wait_for_proc_count(const char * cgroup,int count)278 int cg_wait_for_proc_count(const char *cgroup, int count)
279 {
280 char buf[10 * PAGE_SIZE] = {0};
281 int attempts;
282 char *ptr;
283
284 for (attempts = 10; attempts >= 0; attempts--) {
285 int nr = 0;
286
287 if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
288 break;
289
290 for (ptr = buf; *ptr; ptr++)
291 if (*ptr == '\n')
292 nr++;
293
294 if (nr >= count)
295 return 0;
296
297 usleep(100000);
298 }
299
300 return -1;
301 }
302
cg_killall(const char * cgroup)303 int cg_killall(const char *cgroup)
304 {
305 char buf[PAGE_SIZE];
306 char *ptr = buf;
307
308 /* If cgroup.kill exists use it. */
309 if (!cg_write(cgroup, "cgroup.kill", "1"))
310 return 0;
311
312 if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
313 return -1;
314
315 while (ptr < buf + sizeof(buf)) {
316 int pid = strtol(ptr, &ptr, 10);
317
318 if (pid == 0)
319 break;
320 if (*ptr)
321 ptr++;
322 else
323 break;
324 if (kill(pid, SIGKILL))
325 return -1;
326 }
327
328 return 0;
329 }
330
cg_destroy(const char * cgroup)331 int cg_destroy(const char *cgroup)
332 {
333 int ret;
334
335 if (!cgroup)
336 return 0;
337 retry:
338 ret = rmdir(cgroup);
339 if (ret && errno == EBUSY) {
340 cg_killall(cgroup);
341 usleep(100);
342 goto retry;
343 }
344
345 if (ret && errno == ENOENT)
346 ret = 0;
347
348 return ret;
349 }
350
cg_enter(const char * cgroup,int pid)351 int cg_enter(const char *cgroup, int pid)
352 {
353 char pidbuf[64];
354
355 snprintf(pidbuf, sizeof(pidbuf), "%d", pid);
356 return cg_write(cgroup, "cgroup.procs", pidbuf);
357 }
358
cg_enter_current(const char * cgroup)359 int cg_enter_current(const char *cgroup)
360 {
361 return cg_write(cgroup, "cgroup.procs", "0");
362 }
363
cg_enter_current_thread(const char * cgroup)364 int cg_enter_current_thread(const char *cgroup)
365 {
366 return cg_write(cgroup, CG_THREADS_FILE, "0");
367 }
368
cg_run(const char * cgroup,int (* fn)(const char * cgroup,void * arg),void * arg)369 int cg_run(const char *cgroup,
370 int (*fn)(const char *cgroup, void *arg),
371 void *arg)
372 {
373 int pid, retcode;
374
375 pid = fork();
376 if (pid < 0) {
377 return pid;
378 } else if (pid == 0) {
379 char buf[64];
380
381 snprintf(buf, sizeof(buf), "%d", getpid());
382 if (cg_write(cgroup, "cgroup.procs", buf))
383 exit(EXIT_FAILURE);
384 exit(fn(cgroup, arg));
385 } else {
386 waitpid(pid, &retcode, 0);
387 if (WIFEXITED(retcode))
388 return WEXITSTATUS(retcode);
389 else
390 return -1;
391 }
392 }
393
clone_into_cgroup(int cgroup_fd)394 pid_t clone_into_cgroup(int cgroup_fd)
395 {
396 #ifdef CLONE_ARGS_SIZE_VER2
397 pid_t pid;
398
399 struct __clone_args args = {
400 .flags = CLONE_INTO_CGROUP,
401 .exit_signal = SIGCHLD,
402 .cgroup = cgroup_fd,
403 };
404
405 pid = sys_clone3(&args, sizeof(struct __clone_args));
406 /*
407 * Verify that this is a genuine test failure:
408 * ENOSYS -> clone3() not available
409 * E2BIG -> CLONE_INTO_CGROUP not available
410 */
411 if (pid < 0 && (errno == ENOSYS || errno == E2BIG))
412 goto pretend_enosys;
413
414 return pid;
415
416 pretend_enosys:
417 #endif
418 errno = ENOSYS;
419 return -ENOSYS;
420 }
421
clone_reap(pid_t pid,int options)422 int clone_reap(pid_t pid, int options)
423 {
424 int ret;
425 siginfo_t info = {
426 .si_signo = 0,
427 };
428
429 again:
430 ret = waitid(P_PID, pid, &info, options | __WALL | __WNOTHREAD);
431 if (ret < 0) {
432 if (errno == EINTR)
433 goto again;
434 return -1;
435 }
436
437 if (options & WEXITED) {
438 if (WIFEXITED(info.si_status))
439 return WEXITSTATUS(info.si_status);
440 }
441
442 if (options & WSTOPPED) {
443 if (WIFSTOPPED(info.si_status))
444 return WSTOPSIG(info.si_status);
445 }
446
447 if (options & WCONTINUED) {
448 if (WIFCONTINUED(info.si_status))
449 return 0;
450 }
451
452 return -1;
453 }
454
dirfd_open_opath(const char * dir)455 int dirfd_open_opath(const char *dir)
456 {
457 return open(dir, O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW | O_PATH);
458 }
459
460 #define close_prot_errno(fd) \
461 if (fd >= 0) { \
462 int _e_ = errno; \
463 close(fd); \
464 errno = _e_; \
465 }
466
clone_into_cgroup_run_nowait(const char * cgroup,int (* fn)(const char * cgroup,void * arg),void * arg)467 static int clone_into_cgroup_run_nowait(const char *cgroup,
468 int (*fn)(const char *cgroup, void *arg),
469 void *arg)
470 {
471 int cgroup_fd;
472 pid_t pid;
473
474 cgroup_fd = dirfd_open_opath(cgroup);
475 if (cgroup_fd < 0)
476 return -1;
477
478 pid = clone_into_cgroup(cgroup_fd);
479 close_prot_errno(cgroup_fd);
480 if (pid == 0)
481 exit(fn(cgroup, arg));
482
483 return pid;
484 }
485
cg_run_nowait(const char * cgroup,int (* fn)(const char * cgroup,void * arg),void * arg)486 int cg_run_nowait(const char *cgroup,
487 int (*fn)(const char *cgroup, void *arg),
488 void *arg)
489 {
490 int pid;
491
492 pid = clone_into_cgroup_run_nowait(cgroup, fn, arg);
493 if (pid > 0)
494 return pid;
495
496 /* Genuine test failure. */
497 if (pid < 0 && errno != ENOSYS)
498 return -1;
499
500 pid = fork();
501 if (pid == 0) {
502 char buf[64];
503
504 snprintf(buf, sizeof(buf), "%d", getpid());
505 if (cg_write(cgroup, "cgroup.procs", buf))
506 exit(EXIT_FAILURE);
507 exit(fn(cgroup, arg));
508 }
509
510 return pid;
511 }
512
proc_mount_contains(const char * option)513 int proc_mount_contains(const char *option)
514 {
515 char buf[4 * PAGE_SIZE];
516 ssize_t read;
517
518 read = read_text("/proc/mounts", buf, sizeof(buf));
519 if (read < 0)
520 return read;
521
522 return strstr(buf, option) != NULL;
523 }
524
proc_read_text(int pid,bool thread,const char * item,char * buf,size_t size)525 ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size)
526 {
527 char path[PATH_MAX];
528 ssize_t ret;
529
530 if (!pid)
531 snprintf(path, sizeof(path), "/proc/%s/%s",
532 thread ? "thread-self" : "self", item);
533 else
534 snprintf(path, sizeof(path), "/proc/%d/%s", pid, item);
535
536 ret = read_text(path, buf, size);
537 return ret < 0 ? -1 : ret;
538 }
539
proc_read_strstr(int pid,bool thread,const char * item,const char * needle)540 int proc_read_strstr(int pid, bool thread, const char *item, const char *needle)
541 {
542 char buf[PAGE_SIZE];
543
544 if (proc_read_text(pid, thread, item, buf, sizeof(buf)) < 0)
545 return -1;
546
547 return strstr(buf, needle) ? 0 : -1;
548 }
549
clone_into_cgroup_run_wait(const char * cgroup)550 int clone_into_cgroup_run_wait(const char *cgroup)
551 {
552 int cgroup_fd;
553 pid_t pid;
554
555 cgroup_fd = dirfd_open_opath(cgroup);
556 if (cgroup_fd < 0)
557 return -1;
558
559 pid = clone_into_cgroup(cgroup_fd);
560 close_prot_errno(cgroup_fd);
561 if (pid < 0)
562 return -1;
563
564 if (pid == 0)
565 exit(EXIT_SUCCESS);
566
567 /*
568 * We don't care whether this fails. We only care whether the initial
569 * clone succeeded.
570 */
571 (void)clone_reap(pid, WEXITED);
572 return 0;
573 }
574
__prepare_for_wait(const char * cgroup,const char * filename)575 static int __prepare_for_wait(const char *cgroup, const char *filename)
576 {
577 int fd, ret = -1;
578
579 fd = inotify_init1(0);
580 if (fd == -1)
581 return fd;
582
583 ret = inotify_add_watch(fd, cg_control(cgroup, filename), IN_MODIFY);
584 if (ret == -1) {
585 close(fd);
586 fd = -1;
587 }
588
589 return fd;
590 }
591
cg_prepare_for_wait(const char * cgroup)592 int cg_prepare_for_wait(const char *cgroup)
593 {
594 return __prepare_for_wait(cgroup, "cgroup.events");
595 }
596
memcg_prepare_for_wait(const char * cgroup)597 int memcg_prepare_for_wait(const char *cgroup)
598 {
599 return __prepare_for_wait(cgroup, "memory.events");
600 }
601
cg_wait_for(int fd)602 int cg_wait_for(int fd)
603 {
604 int ret = -1;
605 struct pollfd fds = {
606 .fd = fd,
607 .events = POLLIN,
608 };
609
610 while (true) {
611 ret = poll(&fds, 1, 10000);
612
613 if (ret == -1) {
614 if (errno == EINTR)
615 continue;
616
617 break;
618 }
619
620 if (ret > 0 && fds.revents & POLLIN) {
621 ret = 0;
622 break;
623 }
624 }
625
626 return ret;
627 }
628