176d46ad2SChristian Brauner // SPDX-License-Identifier: GPL-2.0
276d46ad2SChristian Brauner // Copyright (c) 2026 Christian Brauner <brauner@kernel.org>
376d46ad2SChristian Brauner
476d46ad2SChristian Brauner #define _GNU_SOURCE
576d46ad2SChristian Brauner #include <errno.h>
676d46ad2SChristian Brauner #include <linux/types.h>
776d46ad2SChristian Brauner #include <poll.h>
876d46ad2SChristian Brauner #include <pthread.h>
976d46ad2SChristian Brauner #include <sched.h>
1076d46ad2SChristian Brauner #include <signal.h>
1176d46ad2SChristian Brauner #include <stdio.h>
1276d46ad2SChristian Brauner #include <stdlib.h>
1376d46ad2SChristian Brauner #include <string.h>
1476d46ad2SChristian Brauner #include <syscall.h>
1576d46ad2SChristian Brauner #include <sys/ioctl.h>
1676d46ad2SChristian Brauner #include <sys/prctl.h>
1776d46ad2SChristian Brauner #include <sys/socket.h>
1876d46ad2SChristian Brauner #include <sys/types.h>
1976d46ad2SChristian Brauner #include <sys/wait.h>
2076d46ad2SChristian Brauner #include <unistd.h>
2176d46ad2SChristian Brauner
2276d46ad2SChristian Brauner #include "pidfd.h"
2376d46ad2SChristian Brauner #include "kselftest_harness.h"
2476d46ad2SChristian Brauner
2576d46ad2SChristian Brauner #ifndef CLONE_AUTOREAP
2676d46ad2SChristian Brauner #define CLONE_AUTOREAP (1ULL << 34)
2776d46ad2SChristian Brauner #endif
2876d46ad2SChristian Brauner
292a4d85aaSChristian Brauner #ifndef CLONE_NNP
302a4d85aaSChristian Brauner #define CLONE_NNP (1ULL << 35)
312a4d85aaSChristian Brauner #endif
322a4d85aaSChristian Brauner
33*ec26879eSChristian Brauner #ifndef CLONE_PIDFD_AUTOKILL
34*ec26879eSChristian Brauner #define CLONE_PIDFD_AUTOKILL (1ULL << 36)
35*ec26879eSChristian Brauner #endif
36*ec26879eSChristian Brauner
37*ec26879eSChristian Brauner #ifndef _LINUX_CAPABILITY_VERSION_3
38*ec26879eSChristian Brauner #define _LINUX_CAPABILITY_VERSION_3 0x20080522
39*ec26879eSChristian Brauner #endif
40*ec26879eSChristian Brauner
41*ec26879eSChristian Brauner struct cap_header {
42*ec26879eSChristian Brauner __u32 version;
43*ec26879eSChristian Brauner int pid;
44*ec26879eSChristian Brauner };
45*ec26879eSChristian Brauner
46*ec26879eSChristian Brauner struct cap_data {
47*ec26879eSChristian Brauner __u32 effective;
48*ec26879eSChristian Brauner __u32 permitted;
49*ec26879eSChristian Brauner __u32 inheritable;
50*ec26879eSChristian Brauner };
51*ec26879eSChristian Brauner
drop_all_caps(void)52*ec26879eSChristian Brauner static int drop_all_caps(void)
53*ec26879eSChristian Brauner {
54*ec26879eSChristian Brauner struct cap_header hdr = { .version = _LINUX_CAPABILITY_VERSION_3 };
55*ec26879eSChristian Brauner struct cap_data data[2] = {};
56*ec26879eSChristian Brauner
57*ec26879eSChristian Brauner return syscall(__NR_capset, &hdr, data);
58*ec26879eSChristian Brauner }
59*ec26879eSChristian Brauner
create_autoreap_child(int * pidfd)6076d46ad2SChristian Brauner static pid_t create_autoreap_child(int *pidfd)
6176d46ad2SChristian Brauner {
6276d46ad2SChristian Brauner struct __clone_args args = {
6376d46ad2SChristian Brauner .flags = CLONE_PIDFD | CLONE_AUTOREAP,
6476d46ad2SChristian Brauner .exit_signal = 0,
6576d46ad2SChristian Brauner .pidfd = ptr_to_u64(pidfd),
6676d46ad2SChristian Brauner };
6776d46ad2SChristian Brauner
6876d46ad2SChristian Brauner return sys_clone3(&args, sizeof(args));
6976d46ad2SChristian Brauner }
7076d46ad2SChristian Brauner
7176d46ad2SChristian Brauner /*
7276d46ad2SChristian Brauner * Test that CLONE_AUTOREAP works without CLONE_PIDFD (fire-and-forget).
7376d46ad2SChristian Brauner */
TEST(autoreap_without_pidfd)7476d46ad2SChristian Brauner TEST(autoreap_without_pidfd)
7576d46ad2SChristian Brauner {
7676d46ad2SChristian Brauner struct __clone_args args = {
7776d46ad2SChristian Brauner .flags = CLONE_AUTOREAP,
7876d46ad2SChristian Brauner .exit_signal = 0,
7976d46ad2SChristian Brauner };
8076d46ad2SChristian Brauner pid_t pid;
8176d46ad2SChristian Brauner int ret;
8276d46ad2SChristian Brauner
8376d46ad2SChristian Brauner pid = sys_clone3(&args, sizeof(args));
8476d46ad2SChristian Brauner if (pid < 0 && errno == EINVAL)
8576d46ad2SChristian Brauner SKIP(return, "CLONE_AUTOREAP not supported");
8676d46ad2SChristian Brauner ASSERT_GE(pid, 0);
8776d46ad2SChristian Brauner
8876d46ad2SChristian Brauner if (pid == 0)
8976d46ad2SChristian Brauner _exit(0);
9076d46ad2SChristian Brauner
9176d46ad2SChristian Brauner /*
9276d46ad2SChristian Brauner * Give the child a moment to exit and be autoreaped.
9376d46ad2SChristian Brauner * Then verify no zombie remains.
9476d46ad2SChristian Brauner */
9576d46ad2SChristian Brauner usleep(200000);
9676d46ad2SChristian Brauner ret = waitpid(pid, NULL, WNOHANG);
9776d46ad2SChristian Brauner ASSERT_EQ(ret, -1);
9876d46ad2SChristian Brauner ASSERT_EQ(errno, ECHILD);
9976d46ad2SChristian Brauner }
10076d46ad2SChristian Brauner
10176d46ad2SChristian Brauner /*
10276d46ad2SChristian Brauner * Test that CLONE_AUTOREAP with a non-zero exit_signal fails.
10376d46ad2SChristian Brauner */
TEST(autoreap_rejects_exit_signal)10476d46ad2SChristian Brauner TEST(autoreap_rejects_exit_signal)
10576d46ad2SChristian Brauner {
10676d46ad2SChristian Brauner struct __clone_args args = {
10776d46ad2SChristian Brauner .flags = CLONE_AUTOREAP,
10876d46ad2SChristian Brauner .exit_signal = SIGCHLD,
10976d46ad2SChristian Brauner };
11076d46ad2SChristian Brauner pid_t pid;
11176d46ad2SChristian Brauner
11276d46ad2SChristian Brauner pid = sys_clone3(&args, sizeof(args));
11376d46ad2SChristian Brauner ASSERT_EQ(pid, -1);
11476d46ad2SChristian Brauner ASSERT_EQ(errno, EINVAL);
11576d46ad2SChristian Brauner }
11676d46ad2SChristian Brauner
11776d46ad2SChristian Brauner /*
11876d46ad2SChristian Brauner * Test that CLONE_AUTOREAP with CLONE_PARENT fails.
11976d46ad2SChristian Brauner */
TEST(autoreap_rejects_parent)12076d46ad2SChristian Brauner TEST(autoreap_rejects_parent)
12176d46ad2SChristian Brauner {
12276d46ad2SChristian Brauner struct __clone_args args = {
12376d46ad2SChristian Brauner .flags = CLONE_AUTOREAP | CLONE_PARENT,
12476d46ad2SChristian Brauner .exit_signal = 0,
12576d46ad2SChristian Brauner };
12676d46ad2SChristian Brauner pid_t pid;
12776d46ad2SChristian Brauner
12876d46ad2SChristian Brauner pid = sys_clone3(&args, sizeof(args));
12976d46ad2SChristian Brauner ASSERT_EQ(pid, -1);
13076d46ad2SChristian Brauner ASSERT_EQ(errno, EINVAL);
13176d46ad2SChristian Brauner }
13276d46ad2SChristian Brauner
13376d46ad2SChristian Brauner /*
13476d46ad2SChristian Brauner * Test that CLONE_AUTOREAP with CLONE_THREAD fails.
13576d46ad2SChristian Brauner */
TEST(autoreap_rejects_thread)13676d46ad2SChristian Brauner TEST(autoreap_rejects_thread)
13776d46ad2SChristian Brauner {
13876d46ad2SChristian Brauner struct __clone_args args = {
13976d46ad2SChristian Brauner .flags = CLONE_AUTOREAP | CLONE_THREAD |
14076d46ad2SChristian Brauner CLONE_SIGHAND | CLONE_VM,
14176d46ad2SChristian Brauner .exit_signal = 0,
14276d46ad2SChristian Brauner };
14376d46ad2SChristian Brauner pid_t pid;
14476d46ad2SChristian Brauner
14576d46ad2SChristian Brauner pid = sys_clone3(&args, sizeof(args));
14676d46ad2SChristian Brauner ASSERT_EQ(pid, -1);
14776d46ad2SChristian Brauner ASSERT_EQ(errno, EINVAL);
14876d46ad2SChristian Brauner }
14976d46ad2SChristian Brauner
15076d46ad2SChristian Brauner /*
15176d46ad2SChristian Brauner * Basic test: create an autoreap child, let it exit, verify:
15276d46ad2SChristian Brauner * - pidfd becomes readable (poll returns POLLIN)
15376d46ad2SChristian Brauner * - PIDFD_GET_INFO returns the correct exit code
15476d46ad2SChristian Brauner * - waitpid() returns -1/ECHILD (no zombie)
15576d46ad2SChristian Brauner */
TEST(autoreap_basic)15676d46ad2SChristian Brauner TEST(autoreap_basic)
15776d46ad2SChristian Brauner {
15876d46ad2SChristian Brauner struct pidfd_info info = { .mask = PIDFD_INFO_EXIT };
15976d46ad2SChristian Brauner int pidfd = -1, ret;
16076d46ad2SChristian Brauner struct pollfd pfd;
16176d46ad2SChristian Brauner pid_t pid;
16276d46ad2SChristian Brauner
16376d46ad2SChristian Brauner pid = create_autoreap_child(&pidfd);
16476d46ad2SChristian Brauner if (pid < 0 && errno == EINVAL)
16576d46ad2SChristian Brauner SKIP(return, "CLONE_AUTOREAP not supported");
16676d46ad2SChristian Brauner ASSERT_GE(pid, 0);
16776d46ad2SChristian Brauner
16876d46ad2SChristian Brauner if (pid == 0)
16976d46ad2SChristian Brauner _exit(42);
17076d46ad2SChristian Brauner
17176d46ad2SChristian Brauner ASSERT_GE(pidfd, 0);
17276d46ad2SChristian Brauner
17376d46ad2SChristian Brauner /* Wait for the child to exit via pidfd poll. */
17476d46ad2SChristian Brauner pfd.fd = pidfd;
17576d46ad2SChristian Brauner pfd.events = POLLIN;
17676d46ad2SChristian Brauner ret = poll(&pfd, 1, 5000);
17776d46ad2SChristian Brauner ASSERT_EQ(ret, 1);
17876d46ad2SChristian Brauner ASSERT_TRUE(pfd.revents & POLLIN);
17976d46ad2SChristian Brauner
18076d46ad2SChristian Brauner /* Verify exit info via PIDFD_GET_INFO. */
18176d46ad2SChristian Brauner ret = ioctl(pidfd, PIDFD_GET_INFO, &info);
18276d46ad2SChristian Brauner ASSERT_EQ(ret, 0);
18376d46ad2SChristian Brauner ASSERT_TRUE(info.mask & PIDFD_INFO_EXIT);
18476d46ad2SChristian Brauner /*
18576d46ad2SChristian Brauner * exit_code is in waitpid format: for _exit(42),
18676d46ad2SChristian Brauner * WIFEXITED is true and WEXITSTATUS is 42.
18776d46ad2SChristian Brauner */
18876d46ad2SChristian Brauner ASSERT_TRUE(WIFEXITED(info.exit_code));
18976d46ad2SChristian Brauner ASSERT_EQ(WEXITSTATUS(info.exit_code), 42);
19076d46ad2SChristian Brauner
19176d46ad2SChristian Brauner /* Verify no zombie: waitpid should fail with ECHILD. */
19276d46ad2SChristian Brauner ret = waitpid(pid, NULL, WNOHANG);
19376d46ad2SChristian Brauner ASSERT_EQ(ret, -1);
19476d46ad2SChristian Brauner ASSERT_EQ(errno, ECHILD);
19576d46ad2SChristian Brauner
19676d46ad2SChristian Brauner close(pidfd);
19776d46ad2SChristian Brauner }
19876d46ad2SChristian Brauner
19976d46ad2SChristian Brauner /*
20076d46ad2SChristian Brauner * Test that an autoreap child killed by a signal reports
20176d46ad2SChristian Brauner * the correct exit info.
20276d46ad2SChristian Brauner */
TEST(autoreap_signaled)20376d46ad2SChristian Brauner TEST(autoreap_signaled)
20476d46ad2SChristian Brauner {
20576d46ad2SChristian Brauner struct pidfd_info info = { .mask = PIDFD_INFO_EXIT };
20676d46ad2SChristian Brauner int pidfd = -1, ret;
20776d46ad2SChristian Brauner struct pollfd pfd;
20876d46ad2SChristian Brauner pid_t pid;
20976d46ad2SChristian Brauner
21076d46ad2SChristian Brauner pid = create_autoreap_child(&pidfd);
21176d46ad2SChristian Brauner if (pid < 0 && errno == EINVAL)
21276d46ad2SChristian Brauner SKIP(return, "CLONE_AUTOREAP not supported");
21376d46ad2SChristian Brauner ASSERT_GE(pid, 0);
21476d46ad2SChristian Brauner
21576d46ad2SChristian Brauner if (pid == 0) {
21676d46ad2SChristian Brauner pause();
21776d46ad2SChristian Brauner _exit(1);
21876d46ad2SChristian Brauner }
21976d46ad2SChristian Brauner
22076d46ad2SChristian Brauner ASSERT_GE(pidfd, 0);
22176d46ad2SChristian Brauner
22276d46ad2SChristian Brauner /* Kill the child. */
22376d46ad2SChristian Brauner ret = sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0);
22476d46ad2SChristian Brauner ASSERT_EQ(ret, 0);
22576d46ad2SChristian Brauner
22676d46ad2SChristian Brauner /* Wait for exit via pidfd. */
22776d46ad2SChristian Brauner pfd.fd = pidfd;
22876d46ad2SChristian Brauner pfd.events = POLLIN;
22976d46ad2SChristian Brauner ret = poll(&pfd, 1, 5000);
23076d46ad2SChristian Brauner ASSERT_EQ(ret, 1);
23176d46ad2SChristian Brauner ASSERT_TRUE(pfd.revents & POLLIN);
23276d46ad2SChristian Brauner
23376d46ad2SChristian Brauner /* Verify signal info. */
23476d46ad2SChristian Brauner ret = ioctl(pidfd, PIDFD_GET_INFO, &info);
23576d46ad2SChristian Brauner ASSERT_EQ(ret, 0);
23676d46ad2SChristian Brauner ASSERT_TRUE(info.mask & PIDFD_INFO_EXIT);
23776d46ad2SChristian Brauner ASSERT_TRUE(WIFSIGNALED(info.exit_code));
23876d46ad2SChristian Brauner ASSERT_EQ(WTERMSIG(info.exit_code), SIGKILL);
23976d46ad2SChristian Brauner
24076d46ad2SChristian Brauner /* No zombie. */
24176d46ad2SChristian Brauner ret = waitpid(pid, NULL, WNOHANG);
24276d46ad2SChristian Brauner ASSERT_EQ(ret, -1);
24376d46ad2SChristian Brauner ASSERT_EQ(errno, ECHILD);
24476d46ad2SChristian Brauner
24576d46ad2SChristian Brauner close(pidfd);
24676d46ad2SChristian Brauner }
24776d46ad2SChristian Brauner
24876d46ad2SChristian Brauner /*
24976d46ad2SChristian Brauner * Test autoreap survives reparenting: middle process creates an
25076d46ad2SChristian Brauner * autoreap grandchild, then exits. The grandchild gets reparented
25176d46ad2SChristian Brauner * to us (the grandparent, which is a subreaper). When the grandchild
25276d46ad2SChristian Brauner * exits, it should still be autoreaped - no zombie under us.
25376d46ad2SChristian Brauner */
TEST(autoreap_reparent)25476d46ad2SChristian Brauner TEST(autoreap_reparent)
25576d46ad2SChristian Brauner {
25676d46ad2SChristian Brauner int ipc_sockets[2], ret;
25776d46ad2SChristian Brauner int pidfd = -1;
25876d46ad2SChristian Brauner struct pollfd pfd;
25976d46ad2SChristian Brauner pid_t mid_pid, grandchild_pid;
26076d46ad2SChristian Brauner char buf[32] = {};
26176d46ad2SChristian Brauner
26276d46ad2SChristian Brauner /* Make ourselves a subreaper so reparented children come to us. */
26376d46ad2SChristian Brauner ret = prctl(PR_SET_CHILD_SUBREAPER, 1);
26476d46ad2SChristian Brauner ASSERT_EQ(ret, 0);
26576d46ad2SChristian Brauner
26676d46ad2SChristian Brauner ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
26776d46ad2SChristian Brauner ASSERT_EQ(ret, 0);
26876d46ad2SChristian Brauner
26976d46ad2SChristian Brauner mid_pid = fork();
27076d46ad2SChristian Brauner ASSERT_GE(mid_pid, 0);
27176d46ad2SChristian Brauner
27276d46ad2SChristian Brauner if (mid_pid == 0) {
27376d46ad2SChristian Brauner /* Middle child: create an autoreap grandchild. */
27476d46ad2SChristian Brauner int gc_pidfd = -1;
27576d46ad2SChristian Brauner
27676d46ad2SChristian Brauner close(ipc_sockets[0]);
27776d46ad2SChristian Brauner
27876d46ad2SChristian Brauner grandchild_pid = create_autoreap_child(&gc_pidfd);
27976d46ad2SChristian Brauner if (grandchild_pid < 0) {
28076d46ad2SChristian Brauner write_nointr(ipc_sockets[1], "E", 1);
28176d46ad2SChristian Brauner close(ipc_sockets[1]);
28276d46ad2SChristian Brauner _exit(1);
28376d46ad2SChristian Brauner }
28476d46ad2SChristian Brauner
28576d46ad2SChristian Brauner if (grandchild_pid == 0) {
28676d46ad2SChristian Brauner /* Grandchild: wait for signal to exit. */
28776d46ad2SChristian Brauner close(ipc_sockets[1]);
28876d46ad2SChristian Brauner if (gc_pidfd >= 0)
28976d46ad2SChristian Brauner close(gc_pidfd);
29076d46ad2SChristian Brauner pause();
29176d46ad2SChristian Brauner _exit(0);
29276d46ad2SChristian Brauner }
29376d46ad2SChristian Brauner
29476d46ad2SChristian Brauner /* Send grandchild PID to grandparent. */
29576d46ad2SChristian Brauner snprintf(buf, sizeof(buf), "%d", grandchild_pid);
29676d46ad2SChristian Brauner write_nointr(ipc_sockets[1], buf, strlen(buf));
29776d46ad2SChristian Brauner close(ipc_sockets[1]);
29876d46ad2SChristian Brauner if (gc_pidfd >= 0)
29976d46ad2SChristian Brauner close(gc_pidfd);
30076d46ad2SChristian Brauner
30176d46ad2SChristian Brauner /* Middle child exits, grandchild gets reparented. */
30276d46ad2SChristian Brauner _exit(0);
30376d46ad2SChristian Brauner }
30476d46ad2SChristian Brauner
30576d46ad2SChristian Brauner close(ipc_sockets[1]);
30676d46ad2SChristian Brauner
30776d46ad2SChristian Brauner /* Read grandchild's PID. */
30876d46ad2SChristian Brauner ret = read_nointr(ipc_sockets[0], buf, sizeof(buf) - 1);
30976d46ad2SChristian Brauner close(ipc_sockets[0]);
31076d46ad2SChristian Brauner ASSERT_GT(ret, 0);
31176d46ad2SChristian Brauner
31276d46ad2SChristian Brauner if (buf[0] == 'E') {
31376d46ad2SChristian Brauner waitpid(mid_pid, NULL, 0);
31476d46ad2SChristian Brauner prctl(PR_SET_CHILD_SUBREAPER, 0);
31576d46ad2SChristian Brauner SKIP(return, "CLONE_AUTOREAP not supported");
31676d46ad2SChristian Brauner }
31776d46ad2SChristian Brauner
31876d46ad2SChristian Brauner grandchild_pid = atoi(buf);
31976d46ad2SChristian Brauner ASSERT_GT(grandchild_pid, 0);
32076d46ad2SChristian Brauner
32176d46ad2SChristian Brauner /* Wait for the middle child to exit. */
32276d46ad2SChristian Brauner ret = waitpid(mid_pid, NULL, 0);
32376d46ad2SChristian Brauner ASSERT_EQ(ret, mid_pid);
32476d46ad2SChristian Brauner
32576d46ad2SChristian Brauner /*
32676d46ad2SChristian Brauner * Now the grandchild is reparented to us (subreaper).
32776d46ad2SChristian Brauner * Open a pidfd for the grandchild and kill it.
32876d46ad2SChristian Brauner */
32976d46ad2SChristian Brauner pidfd = sys_pidfd_open(grandchild_pid, 0);
33076d46ad2SChristian Brauner ASSERT_GE(pidfd, 0);
33176d46ad2SChristian Brauner
33276d46ad2SChristian Brauner ret = sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0);
33376d46ad2SChristian Brauner ASSERT_EQ(ret, 0);
33476d46ad2SChristian Brauner
33576d46ad2SChristian Brauner /* Wait for it to exit via pidfd poll. */
33676d46ad2SChristian Brauner pfd.fd = pidfd;
33776d46ad2SChristian Brauner pfd.events = POLLIN;
33876d46ad2SChristian Brauner ret = poll(&pfd, 1, 5000);
33976d46ad2SChristian Brauner ASSERT_EQ(ret, 1);
34076d46ad2SChristian Brauner ASSERT_TRUE(pfd.revents & POLLIN);
34176d46ad2SChristian Brauner
34276d46ad2SChristian Brauner /*
34376d46ad2SChristian Brauner * The grandchild should have been autoreaped even though
34476d46ad2SChristian Brauner * we (the new parent) haven't set SA_NOCLDWAIT.
34576d46ad2SChristian Brauner * waitpid should return -1/ECHILD.
34676d46ad2SChristian Brauner */
34776d46ad2SChristian Brauner ret = waitpid(grandchild_pid, NULL, WNOHANG);
34876d46ad2SChristian Brauner EXPECT_EQ(ret, -1);
34976d46ad2SChristian Brauner EXPECT_EQ(errno, ECHILD);
35076d46ad2SChristian Brauner
35176d46ad2SChristian Brauner close(pidfd);
35276d46ad2SChristian Brauner
35376d46ad2SChristian Brauner /* Clean up subreaper status. */
35476d46ad2SChristian Brauner prctl(PR_SET_CHILD_SUBREAPER, 0);
35576d46ad2SChristian Brauner }
35676d46ad2SChristian Brauner
35776d46ad2SChristian Brauner static int thread_sock_fd;
35876d46ad2SChristian Brauner
thread_func(void * arg)35976d46ad2SChristian Brauner static void *thread_func(void *arg)
36076d46ad2SChristian Brauner {
36176d46ad2SChristian Brauner /* Signal parent we're running. */
36276d46ad2SChristian Brauner write_nointr(thread_sock_fd, "1", 1);
36376d46ad2SChristian Brauner
36476d46ad2SChristian Brauner /* Give main thread time to call _exit() first. */
36576d46ad2SChristian Brauner usleep(200000);
36676d46ad2SChristian Brauner
36776d46ad2SChristian Brauner return NULL;
36876d46ad2SChristian Brauner }
36976d46ad2SChristian Brauner
37076d46ad2SChristian Brauner /*
37176d46ad2SChristian Brauner * Test that an autoreap child with multiple threads is properly
37276d46ad2SChristian Brauner * autoreaped only after all threads have exited.
37376d46ad2SChristian Brauner */
TEST(autoreap_multithreaded)37476d46ad2SChristian Brauner TEST(autoreap_multithreaded)
37576d46ad2SChristian Brauner {
37676d46ad2SChristian Brauner struct pidfd_info info = { .mask = PIDFD_INFO_EXIT };
37776d46ad2SChristian Brauner int ipc_sockets[2], ret;
37876d46ad2SChristian Brauner int pidfd = -1;
37976d46ad2SChristian Brauner struct pollfd pfd;
38076d46ad2SChristian Brauner pid_t pid;
38176d46ad2SChristian Brauner char c;
38276d46ad2SChristian Brauner
38376d46ad2SChristian Brauner ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
38476d46ad2SChristian Brauner ASSERT_EQ(ret, 0);
38576d46ad2SChristian Brauner
38676d46ad2SChristian Brauner pid = create_autoreap_child(&pidfd);
38776d46ad2SChristian Brauner if (pid < 0 && errno == EINVAL) {
38876d46ad2SChristian Brauner close(ipc_sockets[0]);
38976d46ad2SChristian Brauner close(ipc_sockets[1]);
39076d46ad2SChristian Brauner SKIP(return, "CLONE_AUTOREAP not supported");
39176d46ad2SChristian Brauner }
39276d46ad2SChristian Brauner ASSERT_GE(pid, 0);
39376d46ad2SChristian Brauner
39476d46ad2SChristian Brauner if (pid == 0) {
39576d46ad2SChristian Brauner pthread_t thread;
39676d46ad2SChristian Brauner
39776d46ad2SChristian Brauner close(ipc_sockets[0]);
39876d46ad2SChristian Brauner
39976d46ad2SChristian Brauner /*
40076d46ad2SChristian Brauner * Create a sub-thread that outlives the main thread.
40176d46ad2SChristian Brauner * The thread signals readiness, then sleeps.
40276d46ad2SChristian Brauner * The main thread waits briefly, then calls _exit().
40376d46ad2SChristian Brauner */
40476d46ad2SChristian Brauner thread_sock_fd = ipc_sockets[1];
40576d46ad2SChristian Brauner pthread_create(&thread, NULL, thread_func, NULL);
40676d46ad2SChristian Brauner pthread_detach(thread);
40776d46ad2SChristian Brauner
40876d46ad2SChristian Brauner /* Wait for thread to be running. */
40976d46ad2SChristian Brauner usleep(100000);
41076d46ad2SChristian Brauner
41176d46ad2SChristian Brauner /* Main thread exits; sub-thread is still alive. */
41276d46ad2SChristian Brauner _exit(99);
41376d46ad2SChristian Brauner }
41476d46ad2SChristian Brauner
41576d46ad2SChristian Brauner close(ipc_sockets[1]);
41676d46ad2SChristian Brauner
41776d46ad2SChristian Brauner /* Wait for the sub-thread to signal readiness. */
41876d46ad2SChristian Brauner ret = read_nointr(ipc_sockets[0], &c, 1);
41976d46ad2SChristian Brauner close(ipc_sockets[0]);
42076d46ad2SChristian Brauner ASSERT_EQ(ret, 1);
42176d46ad2SChristian Brauner
42276d46ad2SChristian Brauner /* Wait for the process to fully exit via pidfd poll. */
42376d46ad2SChristian Brauner pfd.fd = pidfd;
42476d46ad2SChristian Brauner pfd.events = POLLIN;
42576d46ad2SChristian Brauner ret = poll(&pfd, 1, 5000);
42676d46ad2SChristian Brauner ASSERT_EQ(ret, 1);
42776d46ad2SChristian Brauner ASSERT_TRUE(pfd.revents & POLLIN);
42876d46ad2SChristian Brauner
42976d46ad2SChristian Brauner /* Verify exit info. */
43076d46ad2SChristian Brauner ret = ioctl(pidfd, PIDFD_GET_INFO, &info);
43176d46ad2SChristian Brauner ASSERT_EQ(ret, 0);
43276d46ad2SChristian Brauner ASSERT_TRUE(info.mask & PIDFD_INFO_EXIT);
43376d46ad2SChristian Brauner ASSERT_TRUE(WIFEXITED(info.exit_code));
43476d46ad2SChristian Brauner ASSERT_EQ(WEXITSTATUS(info.exit_code), 99);
43576d46ad2SChristian Brauner
43676d46ad2SChristian Brauner /* No zombie. */
43776d46ad2SChristian Brauner ret = waitpid(pid, NULL, WNOHANG);
43876d46ad2SChristian Brauner ASSERT_EQ(ret, -1);
43976d46ad2SChristian Brauner ASSERT_EQ(errno, ECHILD);
44076d46ad2SChristian Brauner
44176d46ad2SChristian Brauner close(pidfd);
44276d46ad2SChristian Brauner }
44376d46ad2SChristian Brauner
44476d46ad2SChristian Brauner /*
44576d46ad2SChristian Brauner * Test that autoreap is NOT inherited by grandchildren.
44676d46ad2SChristian Brauner */
TEST(autoreap_no_inherit)44776d46ad2SChristian Brauner TEST(autoreap_no_inherit)
44876d46ad2SChristian Brauner {
44976d46ad2SChristian Brauner int ipc_sockets[2], ret;
45076d46ad2SChristian Brauner int pidfd = -1;
45176d46ad2SChristian Brauner pid_t pid;
45276d46ad2SChristian Brauner char buf[2] = {};
45376d46ad2SChristian Brauner struct pollfd pfd;
45476d46ad2SChristian Brauner
45576d46ad2SChristian Brauner ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
45676d46ad2SChristian Brauner ASSERT_EQ(ret, 0);
45776d46ad2SChristian Brauner
45876d46ad2SChristian Brauner pid = create_autoreap_child(&pidfd);
45976d46ad2SChristian Brauner if (pid < 0 && errno == EINVAL) {
46076d46ad2SChristian Brauner close(ipc_sockets[0]);
46176d46ad2SChristian Brauner close(ipc_sockets[1]);
46276d46ad2SChristian Brauner SKIP(return, "CLONE_AUTOREAP not supported");
46376d46ad2SChristian Brauner }
46476d46ad2SChristian Brauner ASSERT_GE(pid, 0);
46576d46ad2SChristian Brauner
46676d46ad2SChristian Brauner if (pid == 0) {
46776d46ad2SChristian Brauner pid_t gc;
46876d46ad2SChristian Brauner int status;
46976d46ad2SChristian Brauner
47076d46ad2SChristian Brauner close(ipc_sockets[0]);
47176d46ad2SChristian Brauner
47276d46ad2SChristian Brauner /* Autoreap child forks a grandchild (without autoreap). */
47376d46ad2SChristian Brauner gc = fork();
47476d46ad2SChristian Brauner if (gc < 0) {
47576d46ad2SChristian Brauner write_nointr(ipc_sockets[1], "E", 1);
47676d46ad2SChristian Brauner _exit(1);
47776d46ad2SChristian Brauner }
47876d46ad2SChristian Brauner if (gc == 0) {
47976d46ad2SChristian Brauner /* Grandchild: exit immediately. */
48076d46ad2SChristian Brauner close(ipc_sockets[1]);
48176d46ad2SChristian Brauner _exit(77);
48276d46ad2SChristian Brauner }
48376d46ad2SChristian Brauner
48476d46ad2SChristian Brauner /*
48576d46ad2SChristian Brauner * The grandchild should become a regular zombie
48676d46ad2SChristian Brauner * since it was NOT created with CLONE_AUTOREAP.
48776d46ad2SChristian Brauner * Wait for it to verify.
48876d46ad2SChristian Brauner */
48976d46ad2SChristian Brauner ret = waitpid(gc, &status, 0);
49076d46ad2SChristian Brauner if (ret == gc && WIFEXITED(status) &&
49176d46ad2SChristian Brauner WEXITSTATUS(status) == 77) {
49276d46ad2SChristian Brauner write_nointr(ipc_sockets[1], "P", 1);
49376d46ad2SChristian Brauner } else {
49476d46ad2SChristian Brauner write_nointr(ipc_sockets[1], "F", 1);
49576d46ad2SChristian Brauner }
49676d46ad2SChristian Brauner close(ipc_sockets[1]);
49776d46ad2SChristian Brauner _exit(0);
49876d46ad2SChristian Brauner }
49976d46ad2SChristian Brauner
50076d46ad2SChristian Brauner close(ipc_sockets[1]);
50176d46ad2SChristian Brauner
50276d46ad2SChristian Brauner ret = read_nointr(ipc_sockets[0], buf, 1);
50376d46ad2SChristian Brauner close(ipc_sockets[0]);
50476d46ad2SChristian Brauner ASSERT_EQ(ret, 1);
50576d46ad2SChristian Brauner
50676d46ad2SChristian Brauner /*
50776d46ad2SChristian Brauner * 'P' means the autoreap child was able to waitpid() its
50876d46ad2SChristian Brauner * grandchild (correct - grandchild should be a normal zombie,
50976d46ad2SChristian Brauner * not autoreaped).
51076d46ad2SChristian Brauner */
51176d46ad2SChristian Brauner ASSERT_EQ(buf[0], 'P');
51276d46ad2SChristian Brauner
51376d46ad2SChristian Brauner /* Wait for the autoreap child to exit. */
51476d46ad2SChristian Brauner pfd.fd = pidfd;
51576d46ad2SChristian Brauner pfd.events = POLLIN;
51676d46ad2SChristian Brauner ret = poll(&pfd, 1, 5000);
51776d46ad2SChristian Brauner ASSERT_EQ(ret, 1);
51876d46ad2SChristian Brauner
51976d46ad2SChristian Brauner /* Autoreap child itself should be autoreaped. */
52076d46ad2SChristian Brauner ret = waitpid(pid, NULL, WNOHANG);
52176d46ad2SChristian Brauner ASSERT_EQ(ret, -1);
52276d46ad2SChristian Brauner ASSERT_EQ(errno, ECHILD);
52376d46ad2SChristian Brauner
52476d46ad2SChristian Brauner close(pidfd);
52576d46ad2SChristian Brauner }
52676d46ad2SChristian Brauner
5272a4d85aaSChristian Brauner /*
5282a4d85aaSChristian Brauner * Test that CLONE_NNP sets no_new_privs on the child.
5292a4d85aaSChristian Brauner * The child checks via prctl(PR_GET_NO_NEW_PRIVS) and reports back.
5302a4d85aaSChristian Brauner * The parent must NOT have no_new_privs set afterwards.
5312a4d85aaSChristian Brauner */
TEST(nnp_sets_no_new_privs)5322a4d85aaSChristian Brauner TEST(nnp_sets_no_new_privs)
5332a4d85aaSChristian Brauner {
5342a4d85aaSChristian Brauner struct __clone_args args = {
5352a4d85aaSChristian Brauner .flags = CLONE_PIDFD | CLONE_AUTOREAP | CLONE_NNP,
5362a4d85aaSChristian Brauner .exit_signal = 0,
5372a4d85aaSChristian Brauner };
5382a4d85aaSChristian Brauner struct pidfd_info info = { .mask = PIDFD_INFO_EXIT };
5392a4d85aaSChristian Brauner int pidfd = -1, ret;
5402a4d85aaSChristian Brauner struct pollfd pfd;
5412a4d85aaSChristian Brauner pid_t pid;
5422a4d85aaSChristian Brauner
5432a4d85aaSChristian Brauner /* Ensure parent does not already have no_new_privs. */
5442a4d85aaSChristian Brauner ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
5452a4d85aaSChristian Brauner ASSERT_EQ(ret, 0) {
5462a4d85aaSChristian Brauner TH_LOG("Parent already has no_new_privs set, cannot run test");
5472a4d85aaSChristian Brauner }
5482a4d85aaSChristian Brauner
5492a4d85aaSChristian Brauner args.pidfd = ptr_to_u64(&pidfd);
5502a4d85aaSChristian Brauner
5512a4d85aaSChristian Brauner pid = sys_clone3(&args, sizeof(args));
5522a4d85aaSChristian Brauner if (pid < 0 && errno == EINVAL)
5532a4d85aaSChristian Brauner SKIP(return, "CLONE_NNP not supported");
5542a4d85aaSChristian Brauner ASSERT_GE(pid, 0);
5552a4d85aaSChristian Brauner
5562a4d85aaSChristian Brauner if (pid == 0) {
5572a4d85aaSChristian Brauner /*
5582a4d85aaSChristian Brauner * Child: check no_new_privs. Exit 0 if set, 1 if not.
5592a4d85aaSChristian Brauner */
5602a4d85aaSChristian Brauner ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
5612a4d85aaSChristian Brauner _exit(ret == 1 ? 0 : 1);
5622a4d85aaSChristian Brauner }
5632a4d85aaSChristian Brauner
5642a4d85aaSChristian Brauner ASSERT_GE(pidfd, 0);
5652a4d85aaSChristian Brauner
5662a4d85aaSChristian Brauner /* Parent must still NOT have no_new_privs. */
5672a4d85aaSChristian Brauner ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
5682a4d85aaSChristian Brauner ASSERT_EQ(ret, 0) {
5692a4d85aaSChristian Brauner TH_LOG("Parent got no_new_privs after creating CLONE_NNP child");
5702a4d85aaSChristian Brauner }
5712a4d85aaSChristian Brauner
5722a4d85aaSChristian Brauner /* Wait for child to exit. */
5732a4d85aaSChristian Brauner pfd.fd = pidfd;
5742a4d85aaSChristian Brauner pfd.events = POLLIN;
5752a4d85aaSChristian Brauner ret = poll(&pfd, 1, 5000);
5762a4d85aaSChristian Brauner ASSERT_EQ(ret, 1);
5772a4d85aaSChristian Brauner
5782a4d85aaSChristian Brauner /* Verify child exited with 0 (no_new_privs was set). */
5792a4d85aaSChristian Brauner ret = ioctl(pidfd, PIDFD_GET_INFO, &info);
5802a4d85aaSChristian Brauner ASSERT_EQ(ret, 0);
5812a4d85aaSChristian Brauner ASSERT_TRUE(info.mask & PIDFD_INFO_EXIT);
5822a4d85aaSChristian Brauner ASSERT_TRUE(WIFEXITED(info.exit_code));
5832a4d85aaSChristian Brauner ASSERT_EQ(WEXITSTATUS(info.exit_code), 0) {
5842a4d85aaSChristian Brauner TH_LOG("Child did not have no_new_privs set");
5852a4d85aaSChristian Brauner }
5862a4d85aaSChristian Brauner
5872a4d85aaSChristian Brauner close(pidfd);
5882a4d85aaSChristian Brauner }
5892a4d85aaSChristian Brauner
5902a4d85aaSChristian Brauner /*
5912a4d85aaSChristian Brauner * Test that CLONE_NNP with CLONE_THREAD fails with EINVAL.
5922a4d85aaSChristian Brauner */
TEST(nnp_rejects_thread)5932a4d85aaSChristian Brauner TEST(nnp_rejects_thread)
5942a4d85aaSChristian Brauner {
5952a4d85aaSChristian Brauner struct __clone_args args = {
5962a4d85aaSChristian Brauner .flags = CLONE_NNP | CLONE_THREAD |
5972a4d85aaSChristian Brauner CLONE_SIGHAND | CLONE_VM,
5982a4d85aaSChristian Brauner .exit_signal = 0,
5992a4d85aaSChristian Brauner };
6002a4d85aaSChristian Brauner pid_t pid;
6012a4d85aaSChristian Brauner
6022a4d85aaSChristian Brauner pid = sys_clone3(&args, sizeof(args));
6032a4d85aaSChristian Brauner ASSERT_EQ(pid, -1);
6042a4d85aaSChristian Brauner ASSERT_EQ(errno, EINVAL);
6052a4d85aaSChristian Brauner }
6062a4d85aaSChristian Brauner
6072a4d85aaSChristian Brauner /*
6082a4d85aaSChristian Brauner * Test that a plain CLONE_AUTOREAP child does NOT get no_new_privs.
6092a4d85aaSChristian Brauner * Only CLONE_NNP should set it.
6102a4d85aaSChristian Brauner */
TEST(autoreap_no_new_privs_unset)6112a4d85aaSChristian Brauner TEST(autoreap_no_new_privs_unset)
6122a4d85aaSChristian Brauner {
6132a4d85aaSChristian Brauner struct pidfd_info info = { .mask = PIDFD_INFO_EXIT };
6142a4d85aaSChristian Brauner int pidfd = -1, ret;
6152a4d85aaSChristian Brauner struct pollfd pfd;
6162a4d85aaSChristian Brauner pid_t pid;
6172a4d85aaSChristian Brauner
6182a4d85aaSChristian Brauner pid = create_autoreap_child(&pidfd);
6192a4d85aaSChristian Brauner if (pid < 0 && errno == EINVAL)
6202a4d85aaSChristian Brauner SKIP(return, "CLONE_AUTOREAP not supported");
6212a4d85aaSChristian Brauner ASSERT_GE(pid, 0);
6222a4d85aaSChristian Brauner
6232a4d85aaSChristian Brauner if (pid == 0) {
6242a4d85aaSChristian Brauner /*
6252a4d85aaSChristian Brauner * Child: check no_new_privs. Exit 0 if NOT set, 1 if set.
6262a4d85aaSChristian Brauner */
6272a4d85aaSChristian Brauner ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
6282a4d85aaSChristian Brauner _exit(ret == 0 ? 0 : 1);
6292a4d85aaSChristian Brauner }
6302a4d85aaSChristian Brauner
6312a4d85aaSChristian Brauner ASSERT_GE(pidfd, 0);
6322a4d85aaSChristian Brauner
6332a4d85aaSChristian Brauner pfd.fd = pidfd;
6342a4d85aaSChristian Brauner pfd.events = POLLIN;
6352a4d85aaSChristian Brauner ret = poll(&pfd, 1, 5000);
6362a4d85aaSChristian Brauner ASSERT_EQ(ret, 1);
6372a4d85aaSChristian Brauner
6382a4d85aaSChristian Brauner ret = ioctl(pidfd, PIDFD_GET_INFO, &info);
6392a4d85aaSChristian Brauner ASSERT_EQ(ret, 0);
6402a4d85aaSChristian Brauner ASSERT_TRUE(info.mask & PIDFD_INFO_EXIT);
6412a4d85aaSChristian Brauner ASSERT_TRUE(WIFEXITED(info.exit_code));
6422a4d85aaSChristian Brauner ASSERT_EQ(WEXITSTATUS(info.exit_code), 0) {
6432a4d85aaSChristian Brauner TH_LOG("Plain autoreap child unexpectedly has no_new_privs");
6442a4d85aaSChristian Brauner }
6452a4d85aaSChristian Brauner
6462a4d85aaSChristian Brauner close(pidfd);
6472a4d85aaSChristian Brauner }
6482a4d85aaSChristian Brauner
649*ec26879eSChristian Brauner /*
650*ec26879eSChristian Brauner * Helper: create a child with CLONE_PIDFD | CLONE_PIDFD_AUTOKILL | CLONE_AUTOREAP | CLONE_NNP.
651*ec26879eSChristian Brauner */
create_autokill_child(int * pidfd)652*ec26879eSChristian Brauner static pid_t create_autokill_child(int *pidfd)
653*ec26879eSChristian Brauner {
654*ec26879eSChristian Brauner struct __clone_args args = {
655*ec26879eSChristian Brauner .flags = CLONE_PIDFD | CLONE_PIDFD_AUTOKILL |
656*ec26879eSChristian Brauner CLONE_AUTOREAP | CLONE_NNP,
657*ec26879eSChristian Brauner .exit_signal = 0,
658*ec26879eSChristian Brauner .pidfd = ptr_to_u64(pidfd),
659*ec26879eSChristian Brauner };
660*ec26879eSChristian Brauner
661*ec26879eSChristian Brauner return sys_clone3(&args, sizeof(args));
662*ec26879eSChristian Brauner }
663*ec26879eSChristian Brauner
664*ec26879eSChristian Brauner /*
665*ec26879eSChristian Brauner * Basic autokill test: child blocks in pause(), parent closes the
666*ec26879eSChristian Brauner * clone3 pidfd, child should be killed and autoreaped.
667*ec26879eSChristian Brauner */
TEST(autokill_basic)668*ec26879eSChristian Brauner TEST(autokill_basic)
669*ec26879eSChristian Brauner {
670*ec26879eSChristian Brauner int pidfd = -1, pollfd_fd = -1, ret;
671*ec26879eSChristian Brauner struct pollfd pfd;
672*ec26879eSChristian Brauner pid_t pid;
673*ec26879eSChristian Brauner
674*ec26879eSChristian Brauner pid = create_autokill_child(&pidfd);
675*ec26879eSChristian Brauner if (pid < 0 && errno == EINVAL)
676*ec26879eSChristian Brauner SKIP(return, "CLONE_PIDFD_AUTOKILL not supported");
677*ec26879eSChristian Brauner ASSERT_GE(pid, 0);
678*ec26879eSChristian Brauner
679*ec26879eSChristian Brauner if (pid == 0) {
680*ec26879eSChristian Brauner pause();
681*ec26879eSChristian Brauner _exit(1);
682*ec26879eSChristian Brauner }
683*ec26879eSChristian Brauner
684*ec26879eSChristian Brauner ASSERT_GE(pidfd, 0);
685*ec26879eSChristian Brauner
686*ec26879eSChristian Brauner /*
687*ec26879eSChristian Brauner * Open a second pidfd via pidfd_open() so we can observe the
688*ec26879eSChristian Brauner * child's death after closing the clone3 pidfd.
689*ec26879eSChristian Brauner */
690*ec26879eSChristian Brauner pollfd_fd = sys_pidfd_open(pid, 0);
691*ec26879eSChristian Brauner ASSERT_GE(pollfd_fd, 0);
692*ec26879eSChristian Brauner
693*ec26879eSChristian Brauner /* Close the clone3 pidfd — this should trigger autokill. */
694*ec26879eSChristian Brauner close(pidfd);
695*ec26879eSChristian Brauner
696*ec26879eSChristian Brauner /* Wait for the child to die via the pidfd_open'd fd. */
697*ec26879eSChristian Brauner pfd.fd = pollfd_fd;
698*ec26879eSChristian Brauner pfd.events = POLLIN;
699*ec26879eSChristian Brauner ret = poll(&pfd, 1, 5000);
700*ec26879eSChristian Brauner ASSERT_EQ(ret, 1);
701*ec26879eSChristian Brauner ASSERT_TRUE(pfd.revents & POLLIN);
702*ec26879eSChristian Brauner
703*ec26879eSChristian Brauner /* Child should be autoreaped — no zombie. */
704*ec26879eSChristian Brauner usleep(100000);
705*ec26879eSChristian Brauner ret = waitpid(pid, NULL, WNOHANG);
706*ec26879eSChristian Brauner ASSERT_EQ(ret, -1);
707*ec26879eSChristian Brauner ASSERT_EQ(errno, ECHILD);
708*ec26879eSChristian Brauner
709*ec26879eSChristian Brauner close(pollfd_fd);
710*ec26879eSChristian Brauner }
711*ec26879eSChristian Brauner
712*ec26879eSChristian Brauner /*
713*ec26879eSChristian Brauner * CLONE_PIDFD_AUTOKILL without CLONE_PIDFD must fail with EINVAL.
714*ec26879eSChristian Brauner */
TEST(autokill_requires_pidfd)715*ec26879eSChristian Brauner TEST(autokill_requires_pidfd)
716*ec26879eSChristian Brauner {
717*ec26879eSChristian Brauner struct __clone_args args = {
718*ec26879eSChristian Brauner .flags = CLONE_PIDFD_AUTOKILL | CLONE_AUTOREAP,
719*ec26879eSChristian Brauner .exit_signal = 0,
720*ec26879eSChristian Brauner };
721*ec26879eSChristian Brauner pid_t pid;
722*ec26879eSChristian Brauner
723*ec26879eSChristian Brauner pid = sys_clone3(&args, sizeof(args));
724*ec26879eSChristian Brauner ASSERT_EQ(pid, -1);
725*ec26879eSChristian Brauner ASSERT_EQ(errno, EINVAL);
726*ec26879eSChristian Brauner }
727*ec26879eSChristian Brauner
728*ec26879eSChristian Brauner /*
729*ec26879eSChristian Brauner * CLONE_PIDFD_AUTOKILL without CLONE_AUTOREAP must fail with EINVAL.
730*ec26879eSChristian Brauner */
TEST(autokill_requires_autoreap)731*ec26879eSChristian Brauner TEST(autokill_requires_autoreap)
732*ec26879eSChristian Brauner {
733*ec26879eSChristian Brauner int pidfd = -1;
734*ec26879eSChristian Brauner struct __clone_args args = {
735*ec26879eSChristian Brauner .flags = CLONE_PIDFD | CLONE_PIDFD_AUTOKILL,
736*ec26879eSChristian Brauner .exit_signal = 0,
737*ec26879eSChristian Brauner .pidfd = ptr_to_u64(&pidfd),
738*ec26879eSChristian Brauner };
739*ec26879eSChristian Brauner pid_t pid;
740*ec26879eSChristian Brauner
741*ec26879eSChristian Brauner pid = sys_clone3(&args, sizeof(args));
742*ec26879eSChristian Brauner ASSERT_EQ(pid, -1);
743*ec26879eSChristian Brauner ASSERT_EQ(errno, EINVAL);
744*ec26879eSChristian Brauner }
745*ec26879eSChristian Brauner
746*ec26879eSChristian Brauner /*
747*ec26879eSChristian Brauner * CLONE_PIDFD_AUTOKILL with CLONE_THREAD must fail with EINVAL.
748*ec26879eSChristian Brauner */
TEST(autokill_rejects_thread)749*ec26879eSChristian Brauner TEST(autokill_rejects_thread)
750*ec26879eSChristian Brauner {
751*ec26879eSChristian Brauner int pidfd = -1;
752*ec26879eSChristian Brauner struct __clone_args args = {
753*ec26879eSChristian Brauner .flags = CLONE_PIDFD | CLONE_PIDFD_AUTOKILL |
754*ec26879eSChristian Brauner CLONE_AUTOREAP | CLONE_THREAD |
755*ec26879eSChristian Brauner CLONE_SIGHAND | CLONE_VM,
756*ec26879eSChristian Brauner .exit_signal = 0,
757*ec26879eSChristian Brauner .pidfd = ptr_to_u64(&pidfd),
758*ec26879eSChristian Brauner };
759*ec26879eSChristian Brauner pid_t pid;
760*ec26879eSChristian Brauner
761*ec26879eSChristian Brauner pid = sys_clone3(&args, sizeof(args));
762*ec26879eSChristian Brauner ASSERT_EQ(pid, -1);
763*ec26879eSChristian Brauner ASSERT_EQ(errno, EINVAL);
764*ec26879eSChristian Brauner }
765*ec26879eSChristian Brauner
766*ec26879eSChristian Brauner /*
767*ec26879eSChristian Brauner * Test that only the clone3 pidfd triggers autokill, not pidfd_open().
768*ec26879eSChristian Brauner * Close the pidfd_open'd fd first — child should survive.
769*ec26879eSChristian Brauner * Then close the clone3 pidfd — child should be killed and autoreaped.
770*ec26879eSChristian Brauner */
TEST(autokill_pidfd_open_no_effect)771*ec26879eSChristian Brauner TEST(autokill_pidfd_open_no_effect)
772*ec26879eSChristian Brauner {
773*ec26879eSChristian Brauner int pidfd = -1, open_fd = -1, ret;
774*ec26879eSChristian Brauner struct pollfd pfd;
775*ec26879eSChristian Brauner pid_t pid;
776*ec26879eSChristian Brauner
777*ec26879eSChristian Brauner pid = create_autokill_child(&pidfd);
778*ec26879eSChristian Brauner if (pid < 0 && errno == EINVAL)
779*ec26879eSChristian Brauner SKIP(return, "CLONE_PIDFD_AUTOKILL not supported");
780*ec26879eSChristian Brauner ASSERT_GE(pid, 0);
781*ec26879eSChristian Brauner
782*ec26879eSChristian Brauner if (pid == 0) {
783*ec26879eSChristian Brauner pause();
784*ec26879eSChristian Brauner _exit(1);
785*ec26879eSChristian Brauner }
786*ec26879eSChristian Brauner
787*ec26879eSChristian Brauner ASSERT_GE(pidfd, 0);
788*ec26879eSChristian Brauner
789*ec26879eSChristian Brauner /* Open a second pidfd via pidfd_open(). */
790*ec26879eSChristian Brauner open_fd = sys_pidfd_open(pid, 0);
791*ec26879eSChristian Brauner ASSERT_GE(open_fd, 0);
792*ec26879eSChristian Brauner
793*ec26879eSChristian Brauner /*
794*ec26879eSChristian Brauner * Close the pidfd_open'd fd — child should survive because
795*ec26879eSChristian Brauner * only the clone3 pidfd has autokill.
796*ec26879eSChristian Brauner */
797*ec26879eSChristian Brauner close(open_fd);
798*ec26879eSChristian Brauner usleep(200000);
799*ec26879eSChristian Brauner
800*ec26879eSChristian Brauner /* Verify child is still alive by polling the clone3 pidfd. */
801*ec26879eSChristian Brauner pfd.fd = pidfd;
802*ec26879eSChristian Brauner pfd.events = POLLIN;
803*ec26879eSChristian Brauner ret = poll(&pfd, 1, 0);
804*ec26879eSChristian Brauner ASSERT_EQ(ret, 0) {
805*ec26879eSChristian Brauner TH_LOG("Child died after closing pidfd_open fd — should still be alive");
806*ec26879eSChristian Brauner }
807*ec26879eSChristian Brauner
808*ec26879eSChristian Brauner /* Open another observation fd before triggering autokill. */
809*ec26879eSChristian Brauner open_fd = sys_pidfd_open(pid, 0);
810*ec26879eSChristian Brauner ASSERT_GE(open_fd, 0);
811*ec26879eSChristian Brauner
812*ec26879eSChristian Brauner /* Now close the clone3 pidfd — this triggers autokill. */
813*ec26879eSChristian Brauner close(pidfd);
814*ec26879eSChristian Brauner
815*ec26879eSChristian Brauner pfd.fd = open_fd;
816*ec26879eSChristian Brauner pfd.events = POLLIN;
817*ec26879eSChristian Brauner ret = poll(&pfd, 1, 5000);
818*ec26879eSChristian Brauner ASSERT_EQ(ret, 1);
819*ec26879eSChristian Brauner ASSERT_TRUE(pfd.revents & POLLIN);
820*ec26879eSChristian Brauner
821*ec26879eSChristian Brauner /* Child should be autoreaped — no zombie. */
822*ec26879eSChristian Brauner usleep(100000);
823*ec26879eSChristian Brauner ret = waitpid(pid, NULL, WNOHANG);
824*ec26879eSChristian Brauner ASSERT_EQ(ret, -1);
825*ec26879eSChristian Brauner ASSERT_EQ(errno, ECHILD);
826*ec26879eSChristian Brauner
827*ec26879eSChristian Brauner close(open_fd);
828*ec26879eSChristian Brauner }
829*ec26879eSChristian Brauner
830*ec26879eSChristian Brauner /*
831*ec26879eSChristian Brauner * Test that CLONE_PIDFD_AUTOKILL without CLONE_NNP fails with EPERM
832*ec26879eSChristian Brauner * for an unprivileged caller.
833*ec26879eSChristian Brauner */
TEST(autokill_requires_cap_sys_admin)834*ec26879eSChristian Brauner TEST(autokill_requires_cap_sys_admin)
835*ec26879eSChristian Brauner {
836*ec26879eSChristian Brauner int pidfd = -1, ret;
837*ec26879eSChristian Brauner struct __clone_args args = {
838*ec26879eSChristian Brauner .flags = CLONE_PIDFD | CLONE_PIDFD_AUTOKILL |
839*ec26879eSChristian Brauner CLONE_AUTOREAP,
840*ec26879eSChristian Brauner .exit_signal = 0,
841*ec26879eSChristian Brauner .pidfd = ptr_to_u64(&pidfd),
842*ec26879eSChristian Brauner };
843*ec26879eSChristian Brauner pid_t pid;
844*ec26879eSChristian Brauner
845*ec26879eSChristian Brauner /* Drop all capabilities so we lack CAP_SYS_ADMIN. */
846*ec26879eSChristian Brauner ret = drop_all_caps();
847*ec26879eSChristian Brauner ASSERT_EQ(ret, 0);
848*ec26879eSChristian Brauner
849*ec26879eSChristian Brauner pid = sys_clone3(&args, sizeof(args));
850*ec26879eSChristian Brauner ASSERT_EQ(pid, -1);
851*ec26879eSChristian Brauner ASSERT_EQ(errno, EPERM);
852*ec26879eSChristian Brauner }
853*ec26879eSChristian Brauner
854*ec26879eSChristian Brauner /*
855*ec26879eSChristian Brauner * Test that CLONE_PIDFD_AUTOKILL without CLONE_NNP succeeds with
856*ec26879eSChristian Brauner * CAP_SYS_ADMIN.
857*ec26879eSChristian Brauner */
TEST(autokill_without_nnp_with_cap)858*ec26879eSChristian Brauner TEST(autokill_without_nnp_with_cap)
859*ec26879eSChristian Brauner {
860*ec26879eSChristian Brauner struct __clone_args args = {
861*ec26879eSChristian Brauner .flags = CLONE_PIDFD | CLONE_PIDFD_AUTOKILL |
862*ec26879eSChristian Brauner CLONE_AUTOREAP,
863*ec26879eSChristian Brauner .exit_signal = 0,
864*ec26879eSChristian Brauner };
865*ec26879eSChristian Brauner struct pidfd_info info = { .mask = PIDFD_INFO_EXIT };
866*ec26879eSChristian Brauner int pidfd = -1, ret;
867*ec26879eSChristian Brauner struct pollfd pfd;
868*ec26879eSChristian Brauner pid_t pid;
869*ec26879eSChristian Brauner
870*ec26879eSChristian Brauner if (geteuid() != 0)
871*ec26879eSChristian Brauner SKIP(return, "Need root/CAP_SYS_ADMIN");
872*ec26879eSChristian Brauner
873*ec26879eSChristian Brauner args.pidfd = ptr_to_u64(&pidfd);
874*ec26879eSChristian Brauner
875*ec26879eSChristian Brauner pid = sys_clone3(&args, sizeof(args));
876*ec26879eSChristian Brauner if (pid < 0 && errno == EINVAL)
877*ec26879eSChristian Brauner SKIP(return, "CLONE_PIDFD_AUTOKILL not supported");
878*ec26879eSChristian Brauner ASSERT_GE(pid, 0);
879*ec26879eSChristian Brauner
880*ec26879eSChristian Brauner if (pid == 0)
881*ec26879eSChristian Brauner _exit(0);
882*ec26879eSChristian Brauner
883*ec26879eSChristian Brauner ASSERT_GE(pidfd, 0);
884*ec26879eSChristian Brauner
885*ec26879eSChristian Brauner /* Wait for child to exit. */
886*ec26879eSChristian Brauner pfd.fd = pidfd;
887*ec26879eSChristian Brauner pfd.events = POLLIN;
888*ec26879eSChristian Brauner ret = poll(&pfd, 1, 5000);
889*ec26879eSChristian Brauner ASSERT_EQ(ret, 1);
890*ec26879eSChristian Brauner
891*ec26879eSChristian Brauner ret = ioctl(pidfd, PIDFD_GET_INFO, &info);
892*ec26879eSChristian Brauner ASSERT_EQ(ret, 0);
893*ec26879eSChristian Brauner ASSERT_TRUE(info.mask & PIDFD_INFO_EXIT);
894*ec26879eSChristian Brauner ASSERT_TRUE(WIFEXITED(info.exit_code));
895*ec26879eSChristian Brauner ASSERT_EQ(WEXITSTATUS(info.exit_code), 0);
896*ec26879eSChristian Brauner
897*ec26879eSChristian Brauner close(pidfd);
898*ec26879eSChristian Brauner }
899*ec26879eSChristian Brauner
90076d46ad2SChristian Brauner TEST_HARNESS_MAIN
901