1 // SPDX-License-Identifier: GPL-2.0
2 
3 #define _GNU_SOURCE
4 #include <errno.h>
5 #include <fcntl.h>
6 #include <limits.h>
7 #include <linux/types.h>
8 #include <poll.h>
9 #include <pthread.h>
10 #include <sched.h>
11 #include <signal.h>
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <string.h>
15 #include <syscall.h>
16 #include <sys/prctl.h>
17 #include <sys/wait.h>
18 #include <unistd.h>
19 #include <sys/socket.h>
20 #include <linux/kcmp.h>
21 #include <sys/stat.h>
22 
23 #include "pidfd.h"
24 #include "../kselftest_harness.h"
25 
FIXTURE(pidfd_info)26 FIXTURE(pidfd_info)
27 {
28 	pid_t child_pid1;
29 	int child_pidfd1;
30 
31 	pid_t child_pid2;
32 	int child_pidfd2;
33 
34 	pid_t child_pid3;
35 	int child_pidfd3;
36 
37 	pid_t child_pid4;
38 	int child_pidfd4;
39 };
40 
FIXTURE_SETUP(pidfd_info)41 FIXTURE_SETUP(pidfd_info)
42 {
43 	int ret;
44 	int ipc_sockets[2];
45 	char c;
46 
47 	ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
48 	EXPECT_EQ(ret, 0);
49 
50 	self->child_pid1 = create_child(&self->child_pidfd1, 0);
51 	EXPECT_GE(self->child_pid1, 0);
52 
53 	if (self->child_pid1 == 0) {
54 		close(ipc_sockets[0]);
55 
56 		if (write_nointr(ipc_sockets[1], "1", 1) < 0)
57 			_exit(EXIT_FAILURE);
58 
59 		close(ipc_sockets[1]);
60 
61 		pause();
62 		_exit(EXIT_SUCCESS);
63 	}
64 
65 	EXPECT_EQ(close(ipc_sockets[1]), 0);
66 	ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
67 	EXPECT_EQ(close(ipc_sockets[0]), 0);
68 
69 	/* SIGKILL but don't reap. */
70 	EXPECT_EQ(sys_pidfd_send_signal(self->child_pidfd1, SIGKILL, NULL, 0), 0);
71 
72 	ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
73 	EXPECT_EQ(ret, 0);
74 
75 	self->child_pid2 = create_child(&self->child_pidfd2, 0);
76 	EXPECT_GE(self->child_pid2, 0);
77 
78 	if (self->child_pid2 == 0) {
79 		close(ipc_sockets[0]);
80 
81 		if (write_nointr(ipc_sockets[1], "1", 1) < 0)
82 			_exit(EXIT_FAILURE);
83 
84 		close(ipc_sockets[1]);
85 
86 		pause();
87 		_exit(EXIT_SUCCESS);
88 	}
89 
90 	EXPECT_EQ(close(ipc_sockets[1]), 0);
91 	ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
92 	EXPECT_EQ(close(ipc_sockets[0]), 0);
93 
94 	/* SIGKILL and reap. */
95 	EXPECT_EQ(sys_pidfd_send_signal(self->child_pidfd2, SIGKILL, NULL, 0), 0);
96 	EXPECT_EQ(sys_waitid(P_PID, self->child_pid2, NULL, WEXITED), 0);
97 
98 	self->child_pid3 = create_child(&self->child_pidfd3, CLONE_NEWUSER | CLONE_NEWPID);
99 	EXPECT_GE(self->child_pid3, 0);
100 
101 	if (self->child_pid3 == 0)
102 		_exit(EXIT_SUCCESS);
103 
104 	self->child_pid4 = create_child(&self->child_pidfd4, CLONE_NEWUSER | CLONE_NEWPID);
105 	EXPECT_GE(self->child_pid4, 0);
106 
107 	if (self->child_pid4 == 0)
108 		_exit(EXIT_SUCCESS);
109 
110 	EXPECT_EQ(sys_waitid(P_PID, self->child_pid4, NULL, WEXITED), 0);
111 }
112 
FIXTURE_TEARDOWN(pidfd_info)113 FIXTURE_TEARDOWN(pidfd_info)
114 {
115 	sys_pidfd_send_signal(self->child_pidfd1, SIGKILL, NULL, 0);
116 	if (self->child_pidfd1 >= 0)
117 		EXPECT_EQ(0, close(self->child_pidfd1));
118 
119 	sys_waitid(P_PID, self->child_pid1, NULL, WEXITED);
120 
121 	sys_pidfd_send_signal(self->child_pidfd2, SIGKILL, NULL, 0);
122 	if (self->child_pidfd2 >= 0)
123 		EXPECT_EQ(0, close(self->child_pidfd2));
124 
125 	sys_waitid(P_PID, self->child_pid2, NULL, WEXITED);
126 	sys_waitid(P_PID, self->child_pid3, NULL, WEXITED);
127 	sys_waitid(P_PID, self->child_pid4, NULL, WEXITED);
128 }
129 
TEST_F(pidfd_info,sigkill_exit)130 TEST_F(pidfd_info, sigkill_exit)
131 {
132 	struct pidfd_info info = {
133 		.mask = PIDFD_INFO_CGROUPID,
134 	};
135 
136 	/* Process has exited but not been reaped so this must work. */
137 	ASSERT_EQ(ioctl(self->child_pidfd1, PIDFD_GET_INFO, &info), 0);
138 
139 	info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
140 	ASSERT_EQ(ioctl(self->child_pidfd1, PIDFD_GET_INFO, &info), 0);
141 	ASSERT_TRUE(!!(info.mask & PIDFD_INFO_CREDS));
142 	/* Process has exited but not been reaped, so no PIDFD_INFO_EXIT information yet. */
143 	ASSERT_FALSE(!!(info.mask & PIDFD_INFO_EXIT));
144 }
145 
TEST_F(pidfd_info,sigkill_reaped)146 TEST_F(pidfd_info, sigkill_reaped)
147 {
148 	struct pidfd_info info = {
149 		.mask = PIDFD_INFO_CGROUPID,
150 	};
151 
152 	/* Process has already been reaped and PIDFD_INFO_EXIT hasn't been set. */
153 	ASSERT_NE(ioctl(self->child_pidfd2, PIDFD_GET_INFO, &info), 0);
154 	ASSERT_EQ(errno, ESRCH);
155 
156 	info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
157 	ASSERT_EQ(ioctl(self->child_pidfd2, PIDFD_GET_INFO, &info), 0);
158 	ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS));
159 	ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT));
160 	ASSERT_TRUE(WIFSIGNALED(info.exit_code));
161 	ASSERT_EQ(WTERMSIG(info.exit_code), SIGKILL);
162 }
163 
TEST_F(pidfd_info,success_exit)164 TEST_F(pidfd_info, success_exit)
165 {
166 	struct pidfd_info info = {
167 		.mask = PIDFD_INFO_CGROUPID,
168 	};
169 
170 	/* Process has exited but not been reaped so this must work. */
171 	ASSERT_EQ(ioctl(self->child_pidfd3, PIDFD_GET_INFO, &info), 0);
172 
173 	info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
174 	ASSERT_EQ(ioctl(self->child_pidfd3, PIDFD_GET_INFO, &info), 0);
175 	ASSERT_TRUE(!!(info.mask & PIDFD_INFO_CREDS));
176 	/* Process has exited but not been reaped, so no PIDFD_INFO_EXIT information yet. */
177 	ASSERT_FALSE(!!(info.mask & PIDFD_INFO_EXIT));
178 }
179 
TEST_F(pidfd_info,success_reaped)180 TEST_F(pidfd_info, success_reaped)
181 {
182 	struct pidfd_info info = {
183 		.mask = PIDFD_INFO_CGROUPID,
184 	};
185 
186 	/* Process has already been reaped and PIDFD_INFO_EXIT hasn't been set. */
187 	ASSERT_NE(ioctl(self->child_pidfd4, PIDFD_GET_INFO, &info), 0);
188 	ASSERT_EQ(errno, ESRCH);
189 
190 	info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
191 	ASSERT_EQ(ioctl(self->child_pidfd4, PIDFD_GET_INFO, &info), 0);
192 	ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS));
193 	ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT));
194 	ASSERT_TRUE(WIFEXITED(info.exit_code));
195 	ASSERT_EQ(WEXITSTATUS(info.exit_code), 0);
196 }
197 
TEST_F(pidfd_info,success_reaped_poll)198 TEST_F(pidfd_info, success_reaped_poll)
199 {
200 	struct pidfd_info info = {
201 		.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT,
202 	};
203 	struct pollfd fds = {};
204 	int nevents;
205 
206 	fds.events = POLLIN;
207 	fds.fd = self->child_pidfd2;
208 
209 	nevents = poll(&fds, 1, -1);
210 	ASSERT_EQ(nevents, 1);
211 	ASSERT_TRUE(!!(fds.revents & POLLIN));
212 	ASSERT_TRUE(!!(fds.revents & POLLHUP));
213 
214 	ASSERT_EQ(ioctl(self->child_pidfd2, PIDFD_GET_INFO, &info), 0);
215 	ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS));
216 	ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT));
217 	ASSERT_TRUE(WIFSIGNALED(info.exit_code));
218 	ASSERT_EQ(WTERMSIG(info.exit_code), SIGKILL);
219 }
220 
pidfd_info_pause_thread(void * arg)221 static void *pidfd_info_pause_thread(void *arg)
222 {
223 	pid_t pid_thread = gettid();
224 	int ipc_socket = *(int *)arg;
225 
226 	/* Inform the grand-parent what the tid of this thread is. */
227 	if (write_nointr(ipc_socket, &pid_thread, sizeof(pid_thread)) != sizeof(pid_thread))
228 		return NULL;
229 
230 	close(ipc_socket);
231 
232 	/* Sleep untill we're killed. */
233 	pause();
234 	return NULL;
235 }
236 
TEST_F(pidfd_info,thread_group)237 TEST_F(pidfd_info, thread_group)
238 {
239 	pid_t pid_leader, pid_poller, pid_thread;
240 	pthread_t thread;
241 	int nevents, pidfd_leader, pidfd_thread, pidfd_leader_thread, ret;
242 	int ipc_sockets[2];
243 	struct pollfd fds = {};
244 	struct pidfd_info info = {
245 		.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT,
246 	}, info2;
247 
248 	ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
249 	EXPECT_EQ(ret, 0);
250 
251 	pid_leader = create_child(&pidfd_leader, 0);
252 	EXPECT_GE(pid_leader, 0);
253 
254 	if (pid_leader == 0) {
255 		close(ipc_sockets[0]);
256 
257 		/* The thread will outlive the thread-group leader. */
258 		if (pthread_create(&thread, NULL, pidfd_info_pause_thread, &ipc_sockets[1]))
259 			syscall(__NR_exit, EXIT_FAILURE);
260 
261 		/* Make the thread-group leader exit prematurely. */
262 		syscall(__NR_exit, EXIT_SUCCESS);
263 	}
264 
265 	/*
266 	 * Opening a PIDFD_THREAD aka thread-specific pidfd based on a
267 	 * thread-group leader must succeed.
268 	 */
269 	pidfd_leader_thread = sys_pidfd_open(pid_leader, PIDFD_THREAD);
270 	ASSERT_GE(pidfd_leader_thread, 0);
271 
272 	pid_poller = fork();
273 	ASSERT_GE(pid_poller, 0);
274 	if (pid_poller == 0) {
275 		/*
276 		 * We can't poll and wait for the old thread-group
277 		 * leader to exit using a thread-specific pidfd. The
278 		 * thread-group leader exited prematurely and
279 		 * notification is delayed until all subthreads have
280 		 * exited.
281 		 */
282 		fds.events = POLLIN;
283 		fds.fd = pidfd_leader_thread;
284 		nevents = poll(&fds, 1, 10000 /* wait 5 seconds */);
285 		if (nevents != 0)
286 			_exit(EXIT_FAILURE);
287 		if (fds.revents & POLLIN)
288 			_exit(EXIT_FAILURE);
289 		if (fds.revents & POLLHUP)
290 			_exit(EXIT_FAILURE);
291 		_exit(EXIT_SUCCESS);
292 	}
293 
294 	/* Retrieve the tid of the thread. */
295 	EXPECT_EQ(close(ipc_sockets[1]), 0);
296 	ASSERT_EQ(read_nointr(ipc_sockets[0], &pid_thread, sizeof(pid_thread)), sizeof(pid_thread));
297 	EXPECT_EQ(close(ipc_sockets[0]), 0);
298 
299 	/* Opening a thread as a thread-group leader must fail. */
300 	pidfd_thread = sys_pidfd_open(pid_thread, 0);
301 	ASSERT_LT(pidfd_thread, 0);
302 
303 	/* Opening a thread as a PIDFD_THREAD must succeed. */
304 	pidfd_thread = sys_pidfd_open(pid_thread, PIDFD_THREAD);
305 	ASSERT_GE(pidfd_thread, 0);
306 
307 	ASSERT_EQ(wait_for_pid(pid_poller), 0);
308 
309 	/*
310 	 * Note that pidfd_leader is a thread-group pidfd, so polling on it
311 	 * would only notify us once all thread in the thread-group have
312 	 * exited. So we can't poll before we have taken down the whole
313 	 * thread-group.
314 	 */
315 
316 	/* Get PIDFD_GET_INFO using the thread-group leader pidfd. */
317 	ASSERT_EQ(ioctl(pidfd_leader, PIDFD_GET_INFO, &info), 0);
318 	ASSERT_TRUE(!!(info.mask & PIDFD_INFO_CREDS));
319 	/* Process has exited but not been reaped, so no PIDFD_INFO_EXIT information yet. */
320 	ASSERT_FALSE(!!(info.mask & PIDFD_INFO_EXIT));
321 	ASSERT_EQ(info.pid, pid_leader);
322 
323 	/*
324 	 * Now retrieve the same info using the thread specific pidfd
325 	 * for the thread-group leader.
326 	 */
327 	info2.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
328 	ASSERT_EQ(ioctl(pidfd_leader_thread, PIDFD_GET_INFO, &info2), 0);
329 	ASSERT_TRUE(!!(info2.mask & PIDFD_INFO_CREDS));
330 	/* Process has exited but not been reaped, so no PIDFD_INFO_EXIT information yet. */
331 	ASSERT_FALSE(!!(info2.mask & PIDFD_INFO_EXIT));
332 	ASSERT_EQ(info2.pid, pid_leader);
333 
334 	/* Now try the thread-specific pidfd. */
335 	ASSERT_EQ(ioctl(pidfd_thread, PIDFD_GET_INFO, &info), 0);
336 	ASSERT_TRUE(!!(info.mask & PIDFD_INFO_CREDS));
337 	/* The thread hasn't exited, so no PIDFD_INFO_EXIT information yet. */
338 	ASSERT_FALSE(!!(info.mask & PIDFD_INFO_EXIT));
339 	ASSERT_EQ(info.pid, pid_thread);
340 
341 	/*
342 	 * Take down the whole thread-group. The thread-group leader
343 	 * exited successfully but the thread will now be SIGKILLed.
344 	 * This must be reflected in the recorded exit information.
345 	 */
346 	EXPECT_EQ(sys_pidfd_send_signal(pidfd_leader, SIGKILL, NULL, 0), 0);
347 	EXPECT_EQ(sys_waitid(P_PIDFD, pidfd_leader, NULL, WEXITED), 0);
348 
349 	fds.events = POLLIN;
350 	fds.fd = pidfd_leader;
351 	nevents = poll(&fds, 1, -1);
352 	ASSERT_EQ(nevents, 1);
353 	ASSERT_TRUE(!!(fds.revents & POLLIN));
354 	/* The thread-group leader has been reaped. */
355 	ASSERT_TRUE(!!(fds.revents & POLLHUP));
356 
357 	/*
358 	 * Retrieve exit information for the thread-group leader via the
359 	 * thread-group leader pidfd.
360 	 */
361 	info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
362 	ASSERT_EQ(ioctl(pidfd_leader, PIDFD_GET_INFO, &info), 0);
363 	ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS));
364 	ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT));
365 	/* The thread-group leader exited successfully. Only the specific thread was SIGKILLed. */
366 	ASSERT_TRUE(WIFEXITED(info.exit_code));
367 	ASSERT_EQ(WEXITSTATUS(info.exit_code), 0);
368 
369 	/*
370 	 * Retrieve exit information for the thread-group leader via the
371 	 * thread-specific pidfd.
372 	 */
373 	info2.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
374 	ASSERT_EQ(ioctl(pidfd_leader_thread, PIDFD_GET_INFO, &info2), 0);
375 	ASSERT_FALSE(!!(info2.mask & PIDFD_INFO_CREDS));
376 	ASSERT_TRUE(!!(info2.mask & PIDFD_INFO_EXIT));
377 
378 	/* The thread-group leader exited successfully. Only the specific thread was SIGKILLed. */
379 	ASSERT_TRUE(WIFEXITED(info2.exit_code));
380 	ASSERT_EQ(WEXITSTATUS(info2.exit_code), 0);
381 
382 	/* Retrieve exit information for the thread. */
383 	info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
384 	ASSERT_EQ(ioctl(pidfd_thread, PIDFD_GET_INFO, &info), 0);
385 	ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS));
386 	ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT));
387 
388 	/* The thread got SIGKILLed. */
389 	ASSERT_TRUE(WIFSIGNALED(info.exit_code));
390 	ASSERT_EQ(WTERMSIG(info.exit_code), SIGKILL);
391 
392 	EXPECT_EQ(close(pidfd_leader), 0);
393 	EXPECT_EQ(close(pidfd_thread), 0);
394 }
395 
pidfd_info_thread_exec(void * arg)396 static void *pidfd_info_thread_exec(void *arg)
397 {
398 	pid_t pid_thread = gettid();
399 	int ipc_socket = *(int *)arg;
400 
401 	/* Inform the grand-parent what the tid of this thread is. */
402 	if (write_nointr(ipc_socket, &pid_thread, sizeof(pid_thread)) != sizeof(pid_thread))
403 		return NULL;
404 
405 	if (read_nointr(ipc_socket, &pid_thread, sizeof(pid_thread)) != sizeof(pid_thread))
406 		return NULL;
407 
408 	close(ipc_socket);
409 
410 	sys_execveat(AT_FDCWD, "pidfd_exec_helper", NULL, NULL, 0);
411 	return NULL;
412 }
413 
TEST_F(pidfd_info,thread_group_exec)414 TEST_F(pidfd_info, thread_group_exec)
415 {
416 	pid_t pid_leader, pid_poller, pid_thread;
417 	pthread_t thread;
418 	int nevents, pidfd_leader, pidfd_leader_thread, pidfd_thread, ret;
419 	int ipc_sockets[2];
420 	struct pollfd fds = {};
421 	struct pidfd_info info = {
422 		.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT,
423 	};
424 
425 	ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
426 	EXPECT_EQ(ret, 0);
427 
428 	pid_leader = create_child(&pidfd_leader, 0);
429 	EXPECT_GE(pid_leader, 0);
430 
431 	if (pid_leader == 0) {
432 		close(ipc_sockets[0]);
433 
434 		/* The thread will outlive the thread-group leader. */
435 		if (pthread_create(&thread, NULL, pidfd_info_thread_exec, &ipc_sockets[1]))
436 			syscall(__NR_exit, EXIT_FAILURE);
437 
438 		/* Make the thread-group leader exit prematurely. */
439 		syscall(__NR_exit, EXIT_SUCCESS);
440 	}
441 
442 	/* Open a thread-specific pidfd for the thread-group leader. */
443 	pidfd_leader_thread = sys_pidfd_open(pid_leader, PIDFD_THREAD);
444 	ASSERT_GE(pidfd_leader_thread, 0);
445 
446 	pid_poller = fork();
447 	ASSERT_GE(pid_poller, 0);
448 	if (pid_poller == 0) {
449 		/*
450 		 * We can't poll and wait for the old thread-group
451 		 * leader to exit using a thread-specific pidfd. The
452 		 * thread-group leader exited prematurely and
453 		 * notification is delayed until all subthreads have
454 		 * exited.
455 		 *
456 		 * When the thread has execed it will taken over the old
457 		 * thread-group leaders struct pid. Calling poll after
458 		 * the thread execed will thus block again because a new
459 		 * thread-group has started.
460 		 */
461 		fds.events = POLLIN;
462 		fds.fd = pidfd_leader_thread;
463 		nevents = poll(&fds, 1, 10000 /* wait 5 seconds */);
464 		if (nevents != 0)
465 			_exit(EXIT_FAILURE);
466 		if (fds.revents & POLLIN)
467 			_exit(EXIT_FAILURE);
468 		if (fds.revents & POLLHUP)
469 			_exit(EXIT_FAILURE);
470 		_exit(EXIT_SUCCESS);
471 	}
472 
473 	/* Retrieve the tid of the thread. */
474 	EXPECT_EQ(close(ipc_sockets[1]), 0);
475 	ASSERT_EQ(read_nointr(ipc_sockets[0], &pid_thread, sizeof(pid_thread)), sizeof(pid_thread));
476 
477 	/* Opening a thread as a PIDFD_THREAD must succeed. */
478 	pidfd_thread = sys_pidfd_open(pid_thread, PIDFD_THREAD);
479 	ASSERT_GE(pidfd_thread, 0);
480 
481 	/* Now that we've opened a thread-specific pidfd the thread can exec. */
482 	ASSERT_EQ(write_nointr(ipc_sockets[0], &pid_thread, sizeof(pid_thread)), sizeof(pid_thread));
483 	EXPECT_EQ(close(ipc_sockets[0]), 0);
484 
485 	ASSERT_EQ(wait_for_pid(pid_poller), 0);
486 
487 	/* Wait until the kernel has SIGKILLed the thread. */
488 	fds.events = POLLHUP;
489 	fds.fd = pidfd_thread;
490 	nevents = poll(&fds, 1, -1);
491 	ASSERT_EQ(nevents, 1);
492 	/* The thread has been reaped. */
493 	ASSERT_TRUE(!!(fds.revents & POLLHUP));
494 
495 	/* Retrieve thread-specific exit info from pidfd. */
496 	ASSERT_EQ(ioctl(pidfd_thread, PIDFD_GET_INFO, &info), 0);
497 	ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS));
498 	ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT));
499 	/*
500 	 * While the kernel will have SIGKILLed the whole thread-group
501 	 * during exec it will cause the individual threads to exit
502 	 * cleanly.
503 	 */
504 	ASSERT_TRUE(WIFEXITED(info.exit_code));
505 	ASSERT_EQ(WEXITSTATUS(info.exit_code), 0);
506 
507 	/*
508 	 * The thread-group leader is still alive, the thread has taken
509 	 * over its struct pid and thus its pid number.
510 	 */
511 	info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
512 	ASSERT_EQ(ioctl(pidfd_leader, PIDFD_GET_INFO, &info), 0);
513 	ASSERT_TRUE(!!(info.mask & PIDFD_INFO_CREDS));
514 	ASSERT_FALSE(!!(info.mask & PIDFD_INFO_EXIT));
515 	ASSERT_EQ(info.pid, pid_leader);
516 
517 	/* Take down the thread-group leader. */
518 	EXPECT_EQ(sys_pidfd_send_signal(pidfd_leader, SIGKILL, NULL, 0), 0);
519 
520 	/*
521 	 * Afte the exec we're dealing with an empty thread-group so now
522 	 * we must see an exit notification on the thread-specific pidfd
523 	 * for the thread-group leader as there's no subthread that can
524 	 * revive the struct pid.
525 	 */
526 	fds.events = POLLIN;
527 	fds.fd = pidfd_leader_thread;
528 	nevents = poll(&fds, 1, -1);
529 	ASSERT_EQ(nevents, 1);
530 	ASSERT_TRUE(!!(fds.revents & POLLIN));
531 	ASSERT_FALSE(!!(fds.revents & POLLHUP));
532 
533 	EXPECT_EQ(sys_waitid(P_PIDFD, pidfd_leader, NULL, WEXITED), 0);
534 
535 	/* Retrieve exit information for the thread-group leader. */
536 	info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
537 	ASSERT_EQ(ioctl(pidfd_leader, PIDFD_GET_INFO, &info), 0);
538 	ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS));
539 	ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT));
540 
541 	EXPECT_EQ(close(pidfd_leader), 0);
542 	EXPECT_EQ(close(pidfd_thread), 0);
543 }
544 
pidfd_info_thread_exec_sane(void * arg)545 static void *pidfd_info_thread_exec_sane(void *arg)
546 {
547 	pid_t pid_thread = gettid();
548 	int ipc_socket = *(int *)arg;
549 
550 	/* Inform the grand-parent what the tid of this thread is. */
551 	if (write_nointr(ipc_socket, &pid_thread, sizeof(pid_thread)) != sizeof(pid_thread))
552 		return NULL;
553 
554 	if (read_nointr(ipc_socket, &pid_thread, sizeof(pid_thread)) != sizeof(pid_thread))
555 		return NULL;
556 
557 	close(ipc_socket);
558 
559 	sys_execveat(AT_FDCWD, "pidfd_exec_helper", NULL, NULL, 0);
560 	return NULL;
561 }
562 
TEST_F(pidfd_info,thread_group_exec_thread)563 TEST_F(pidfd_info, thread_group_exec_thread)
564 {
565 	pid_t pid_leader, pid_poller, pid_thread;
566 	pthread_t thread;
567 	int nevents, pidfd_leader, pidfd_leader_thread, pidfd_thread, ret;
568 	int ipc_sockets[2];
569 	struct pollfd fds = {};
570 	struct pidfd_info info = {
571 		.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT,
572 	};
573 
574 	ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
575 	EXPECT_EQ(ret, 0);
576 
577 	pid_leader = create_child(&pidfd_leader, 0);
578 	EXPECT_GE(pid_leader, 0);
579 
580 	if (pid_leader == 0) {
581 		close(ipc_sockets[0]);
582 
583 		/* The thread will outlive the thread-group leader. */
584 		if (pthread_create(&thread, NULL, pidfd_info_thread_exec_sane, &ipc_sockets[1]))
585 			syscall(__NR_exit, EXIT_FAILURE);
586 
587 		/*
588 		 * Pause the thread-group leader. It will be killed once
589 		 * the subthread execs.
590 		 */
591 		pause();
592 		syscall(__NR_exit, EXIT_SUCCESS);
593 	}
594 
595 	/* Retrieve the tid of the thread. */
596 	EXPECT_EQ(close(ipc_sockets[1]), 0);
597 	ASSERT_EQ(read_nointr(ipc_sockets[0], &pid_thread, sizeof(pid_thread)), sizeof(pid_thread));
598 
599 	/* Opening a thread as a PIDFD_THREAD must succeed. */
600 	pidfd_thread = sys_pidfd_open(pid_thread, PIDFD_THREAD);
601 	ASSERT_GE(pidfd_thread, 0);
602 
603 	/* Open a thread-specific pidfd for the thread-group leader. */
604 	pidfd_leader_thread = sys_pidfd_open(pid_leader, PIDFD_THREAD);
605 	ASSERT_GE(pidfd_leader_thread, 0);
606 
607 	pid_poller = fork();
608 	ASSERT_GE(pid_poller, 0);
609 	if (pid_poller == 0) {
610 		/*
611 		 * The subthread will now exec. The struct pid of the old
612 		 * thread-group leader will be assumed by the subthread which
613 		 * becomes the new thread-group leader. So no exit notification
614 		 * must be generated. Wait for 5 seconds and call it a success
615 		 * if no notification has been received.
616 		 */
617 		fds.events = POLLIN;
618 		fds.fd = pidfd_leader_thread;
619 		nevents = poll(&fds, 1, 10000 /* wait 5 seconds */);
620 		if (nevents != 0)
621 			_exit(EXIT_FAILURE);
622 		if (fds.revents & POLLIN)
623 			_exit(EXIT_FAILURE);
624 		if (fds.revents & POLLHUP)
625 			_exit(EXIT_FAILURE);
626 		_exit(EXIT_SUCCESS);
627 	}
628 
629 	/* Now that we've opened a thread-specific pidfd the thread can exec. */
630 	ASSERT_EQ(write_nointr(ipc_sockets[0], &pid_thread, sizeof(pid_thread)), sizeof(pid_thread));
631 	EXPECT_EQ(close(ipc_sockets[0]), 0);
632 	ASSERT_EQ(wait_for_pid(pid_poller), 0);
633 
634 	/* Wait until the kernel has SIGKILLed the thread. */
635 	fds.events = POLLHUP;
636 	fds.fd = pidfd_thread;
637 	nevents = poll(&fds, 1, -1);
638 	ASSERT_EQ(nevents, 1);
639 	/* The thread has been reaped. */
640 	ASSERT_TRUE(!!(fds.revents & POLLHUP));
641 
642 	/* Retrieve thread-specific exit info from pidfd. */
643 	ASSERT_EQ(ioctl(pidfd_thread, PIDFD_GET_INFO, &info), 0);
644 	ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS));
645 	ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT));
646 	/*
647 	 * While the kernel will have SIGKILLed the whole thread-group
648 	 * during exec it will cause the individual threads to exit
649 	 * cleanly.
650 	 */
651 	ASSERT_TRUE(WIFEXITED(info.exit_code));
652 	ASSERT_EQ(WEXITSTATUS(info.exit_code), 0);
653 
654 	/*
655 	 * The thread-group leader is still alive, the thread has taken
656 	 * over its struct pid and thus its pid number.
657 	 */
658 	info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
659 	ASSERT_EQ(ioctl(pidfd_leader, PIDFD_GET_INFO, &info), 0);
660 	ASSERT_TRUE(!!(info.mask & PIDFD_INFO_CREDS));
661 	ASSERT_FALSE(!!(info.mask & PIDFD_INFO_EXIT));
662 	ASSERT_EQ(info.pid, pid_leader);
663 
664 	/* Take down the thread-group leader. */
665 	EXPECT_EQ(sys_pidfd_send_signal(pidfd_leader, SIGKILL, NULL, 0), 0);
666 
667 	/*
668 	 * Afte the exec we're dealing with an empty thread-group so now
669 	 * we must see an exit notification on the thread-specific pidfd
670 	 * for the thread-group leader as there's no subthread that can
671 	 * revive the struct pid.
672 	 */
673 	fds.events = POLLIN;
674 	fds.fd = pidfd_leader_thread;
675 	nevents = poll(&fds, 1, -1);
676 	ASSERT_EQ(nevents, 1);
677 	ASSERT_TRUE(!!(fds.revents & POLLIN));
678 	ASSERT_FALSE(!!(fds.revents & POLLHUP));
679 
680 	EXPECT_EQ(sys_waitid(P_PIDFD, pidfd_leader, NULL, WEXITED), 0);
681 
682 	/* Retrieve exit information for the thread-group leader. */
683 	info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
684 	ASSERT_EQ(ioctl(pidfd_leader, PIDFD_GET_INFO, &info), 0);
685 	ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS));
686 	ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT));
687 
688 	EXPECT_EQ(close(pidfd_leader), 0);
689 	EXPECT_EQ(close(pidfd_thread), 0);
690 }
691 
692 TEST_HARNESS_MAIN
693