xref: /src/tests/sys/capsicum/linux.cc (revision fba81b33aabff74ad03d5f9f9663c176cf060fa6)
1e5a5dd6cSEnji Cooper // Tests of Linux-specific functionality
2e5a5dd6cSEnji Cooper #ifdef __linux__
3e5a5dd6cSEnji Cooper 
4e5a5dd6cSEnji Cooper #include <sys/types.h>
5e5a5dd6cSEnji Cooper #include <sys/stat.h>
6e5a5dd6cSEnji Cooper #include <sys/socket.h>
7e5a5dd6cSEnji Cooper #include <sys/timerfd.h>
8e5a5dd6cSEnji Cooper #include <sys/signalfd.h>
9e5a5dd6cSEnji Cooper #include <sys/eventfd.h>
10e5a5dd6cSEnji Cooper #include <sys/epoll.h>
11e5a5dd6cSEnji Cooper #include <sys/inotify.h>
12e5a5dd6cSEnji Cooper #include <sys/fanotify.h>
13e5a5dd6cSEnji Cooper #include <sys/mman.h>
14e5a5dd6cSEnji Cooper #include <sys/capability.h>  // Requires e.g. libcap-dev package for POSIX.1e capabilities headers
15e5a5dd6cSEnji Cooper #include <linux/aio_abi.h>
16e5a5dd6cSEnji Cooper #include <linux/filter.h>
17e5a5dd6cSEnji Cooper #include <linux/seccomp.h>
18e5a5dd6cSEnji Cooper #include <linux/version.h>
19e5a5dd6cSEnji Cooper #include <poll.h>
20e5a5dd6cSEnji Cooper #include <sched.h>
21e5a5dd6cSEnji Cooper #include <signal.h>
22e5a5dd6cSEnji Cooper #include <fcntl.h>
23e5a5dd6cSEnji Cooper #include <unistd.h>
24e5a5dd6cSEnji Cooper 
25e5a5dd6cSEnji Cooper #include <string>
26e5a5dd6cSEnji Cooper 
27e5a5dd6cSEnji Cooper #include "capsicum.h"
28e5a5dd6cSEnji Cooper #include "syscalls.h"
29e5a5dd6cSEnji Cooper #include "capsicum-test.h"
30e5a5dd6cSEnji Cooper 
TEST(Linux,TimerFD)31e5a5dd6cSEnji Cooper TEST(Linux, TimerFD) {
32e5a5dd6cSEnji Cooper   int fd = timerfd_create(CLOCK_MONOTONIC, 0);
33e5a5dd6cSEnji Cooper 
34e5a5dd6cSEnji Cooper   cap_rights_t r_ro;
35e5a5dd6cSEnji Cooper   cap_rights_init(&r_ro, CAP_READ);
36e5a5dd6cSEnji Cooper   cap_rights_t r_wo;
37e5a5dd6cSEnji Cooper   cap_rights_init(&r_wo, CAP_WRITE);
38e5a5dd6cSEnji Cooper   cap_rights_t r_rw;
39e5a5dd6cSEnji Cooper   cap_rights_init(&r_rw, CAP_READ, CAP_WRITE);
40e5a5dd6cSEnji Cooper   cap_rights_t r_rwpoll;
41e5a5dd6cSEnji Cooper   cap_rights_init(&r_rwpoll, CAP_READ, CAP_WRITE, CAP_EVENT);
42e5a5dd6cSEnji Cooper 
43e5a5dd6cSEnji Cooper   int cap_fd_ro = dup(fd);
44e5a5dd6cSEnji Cooper   EXPECT_OK(cap_fd_ro);
45e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(cap_fd_ro, &r_ro));
46e5a5dd6cSEnji Cooper   int cap_fd_wo = dup(fd);
47e5a5dd6cSEnji Cooper   EXPECT_OK(cap_fd_wo);
48e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(cap_fd_wo, &r_wo));
49e5a5dd6cSEnji Cooper   int cap_fd_rw = dup(fd);
50e5a5dd6cSEnji Cooper   EXPECT_OK(cap_fd_rw);
51e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(cap_fd_rw, &r_rw));
52e5a5dd6cSEnji Cooper   int cap_fd_all = dup(fd);
53e5a5dd6cSEnji Cooper   EXPECT_OK(cap_fd_all);
54e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(cap_fd_all, &r_rwpoll));
55e5a5dd6cSEnji Cooper 
56e5a5dd6cSEnji Cooper   struct itimerspec old_ispec;
57e5a5dd6cSEnji Cooper   struct itimerspec ispec;
58e5a5dd6cSEnji Cooper   ispec.it_interval.tv_sec = 0;
59e5a5dd6cSEnji Cooper   ispec.it_interval.tv_nsec = 0;
60e5a5dd6cSEnji Cooper   ispec.it_value.tv_sec = 0;
61e5a5dd6cSEnji Cooper   ispec.it_value.tv_nsec = 100000000;  // 100ms
62e5a5dd6cSEnji Cooper   EXPECT_NOTCAPABLE(timerfd_settime(cap_fd_ro, 0, &ispec, NULL));
63e5a5dd6cSEnji Cooper   EXPECT_NOTCAPABLE(timerfd_settime(cap_fd_wo, 0, &ispec, &old_ispec));
64e5a5dd6cSEnji Cooper   EXPECT_OK(timerfd_settime(cap_fd_wo, 0, &ispec, NULL));
65e5a5dd6cSEnji Cooper   EXPECT_OK(timerfd_settime(cap_fd_rw, 0, &ispec, NULL));
66e5a5dd6cSEnji Cooper   EXPECT_OK(timerfd_settime(cap_fd_all, 0, &ispec, NULL));
67e5a5dd6cSEnji Cooper 
68e5a5dd6cSEnji Cooper   EXPECT_NOTCAPABLE(timerfd_gettime(cap_fd_wo, &old_ispec));
69e5a5dd6cSEnji Cooper   EXPECT_OK(timerfd_gettime(cap_fd_ro, &old_ispec));
70e5a5dd6cSEnji Cooper   EXPECT_OK(timerfd_gettime(cap_fd_rw, &old_ispec));
71e5a5dd6cSEnji Cooper   EXPECT_OK(timerfd_gettime(cap_fd_all, &old_ispec));
72e5a5dd6cSEnji Cooper 
73e5a5dd6cSEnji Cooper   // To be able to poll() for the timer pop, still need CAP_EVENT.
74e5a5dd6cSEnji Cooper   struct pollfd poll_fd;
75e5a5dd6cSEnji Cooper   for (int ii = 0; ii < 3; ii++) {
76e5a5dd6cSEnji Cooper     poll_fd.revents = 0;
77e5a5dd6cSEnji Cooper     poll_fd.events = POLLIN;
78e5a5dd6cSEnji Cooper     switch (ii) {
79e5a5dd6cSEnji Cooper     case 0: poll_fd.fd = cap_fd_ro; break;
80e5a5dd6cSEnji Cooper     case 1: poll_fd.fd = cap_fd_wo; break;
81e5a5dd6cSEnji Cooper     case 2: poll_fd.fd = cap_fd_rw; break;
82e5a5dd6cSEnji Cooper     }
83e5a5dd6cSEnji Cooper     // Poll immediately returns with POLLNVAL
84e5a5dd6cSEnji Cooper     EXPECT_OK(poll(&poll_fd, 1, 400));
85e5a5dd6cSEnji Cooper     EXPECT_EQ(0, (poll_fd.revents & POLLIN));
86e5a5dd6cSEnji Cooper     EXPECT_NE(0, (poll_fd.revents & POLLNVAL));
87e5a5dd6cSEnji Cooper   }
88e5a5dd6cSEnji Cooper 
89e5a5dd6cSEnji Cooper   poll_fd.fd = cap_fd_all;
90e5a5dd6cSEnji Cooper   EXPECT_OK(poll(&poll_fd, 1, 400));
91e5a5dd6cSEnji Cooper   EXPECT_NE(0, (poll_fd.revents & POLLIN));
92e5a5dd6cSEnji Cooper   EXPECT_EQ(0, (poll_fd.revents & POLLNVAL));
93e5a5dd6cSEnji Cooper 
94e5a5dd6cSEnji Cooper   EXPECT_OK(timerfd_gettime(cap_fd_all, &old_ispec));
95e5a5dd6cSEnji Cooper   EXPECT_EQ(0, old_ispec.it_value.tv_sec);
96e5a5dd6cSEnji Cooper   EXPECT_EQ(0, old_ispec.it_value.tv_nsec);
97e5a5dd6cSEnji Cooper   EXPECT_EQ(0, old_ispec.it_interval.tv_sec);
98e5a5dd6cSEnji Cooper   EXPECT_EQ(0, old_ispec.it_interval.tv_nsec);
99e5a5dd6cSEnji Cooper 
100e5a5dd6cSEnji Cooper   close(cap_fd_all);
101e5a5dd6cSEnji Cooper   close(cap_fd_rw);
102e5a5dd6cSEnji Cooper   close(cap_fd_wo);
103e5a5dd6cSEnji Cooper   close(cap_fd_ro);
104e5a5dd6cSEnji Cooper   close(fd);
105e5a5dd6cSEnji Cooper }
106e5a5dd6cSEnji Cooper 
FORK_TEST(Linux,SignalFDIfSingleThreaded)107b6973c8fSAlex Richardson FORK_TEST(Linux, SignalFDIfSingleThreaded) {
108e5a5dd6cSEnji Cooper   if (force_mt) {
109b6973c8fSAlex Richardson     GTEST_SKIP() << "multi-threaded run clashes with signals";
110e5a5dd6cSEnji Cooper   }
111e5a5dd6cSEnji Cooper   pid_t me = getpid();
112e5a5dd6cSEnji Cooper   sigset_t mask;
113e5a5dd6cSEnji Cooper   sigemptyset(&mask);
114e5a5dd6cSEnji Cooper   sigaddset(&mask, SIGUSR1);
115e5a5dd6cSEnji Cooper 
116e5a5dd6cSEnji Cooper   // Block signals before registering against a new signal FD.
117e5a5dd6cSEnji Cooper   EXPECT_OK(sigprocmask(SIG_BLOCK, &mask, NULL));
118e5a5dd6cSEnji Cooper   int fd = signalfd(-1, &mask, 0);
119e5a5dd6cSEnji Cooper   EXPECT_OK(fd);
120e5a5dd6cSEnji Cooper 
121e5a5dd6cSEnji Cooper   cap_rights_t r_rs;
122e5a5dd6cSEnji Cooper   cap_rights_init(&r_rs, CAP_READ, CAP_SEEK);
123e5a5dd6cSEnji Cooper   cap_rights_t r_ws;
124e5a5dd6cSEnji Cooper   cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK);
125e5a5dd6cSEnji Cooper   cap_rights_t r_sig;
126e5a5dd6cSEnji Cooper   cap_rights_init(&r_sig, CAP_FSIGNAL);
127e5a5dd6cSEnji Cooper   cap_rights_t r_rssig;
128e5a5dd6cSEnji Cooper   cap_rights_init(&r_rssig, CAP_FSIGNAL, CAP_READ, CAP_SEEK);
129e5a5dd6cSEnji Cooper   cap_rights_t r_rssig_poll;
130e5a5dd6cSEnji Cooper   cap_rights_init(&r_rssig_poll, CAP_FSIGNAL, CAP_READ, CAP_SEEK, CAP_EVENT);
131e5a5dd6cSEnji Cooper 
132e5a5dd6cSEnji Cooper   // Various capability variants.
133e5a5dd6cSEnji Cooper   int cap_fd_none = dup(fd);
134e5a5dd6cSEnji Cooper   EXPECT_OK(cap_fd_none);
135e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(cap_fd_none, &r_ws));
136e5a5dd6cSEnji Cooper   int cap_fd_read = dup(fd);
137e5a5dd6cSEnji Cooper   EXPECT_OK(cap_fd_read);
138e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(cap_fd_read, &r_rs));
139e5a5dd6cSEnji Cooper   int cap_fd_sig = dup(fd);
140e5a5dd6cSEnji Cooper   EXPECT_OK(cap_fd_sig);
141e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(cap_fd_sig, &r_sig));
142e5a5dd6cSEnji Cooper   int cap_fd_sig_read = dup(fd);
143e5a5dd6cSEnji Cooper   EXPECT_OK(cap_fd_sig_read);
144e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(cap_fd_sig_read, &r_rssig));
145e5a5dd6cSEnji Cooper   int cap_fd_all = dup(fd);
146e5a5dd6cSEnji Cooper   EXPECT_OK(cap_fd_all);
147e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(cap_fd_all, &r_rssig_poll));
148e5a5dd6cSEnji Cooper 
149e5a5dd6cSEnji Cooper   struct signalfd_siginfo fdsi;
150e5a5dd6cSEnji Cooper 
151e5a5dd6cSEnji Cooper   // Need CAP_READ to read the signal information
152e5a5dd6cSEnji Cooper   kill(me, SIGUSR1);
153e5a5dd6cSEnji Cooper   EXPECT_NOTCAPABLE(read(cap_fd_none, &fdsi, sizeof(struct signalfd_siginfo)));
154e5a5dd6cSEnji Cooper   EXPECT_NOTCAPABLE(read(cap_fd_sig, &fdsi, sizeof(struct signalfd_siginfo)));
155e5a5dd6cSEnji Cooper   int len = read(cap_fd_read, &fdsi, sizeof(struct signalfd_siginfo));
156e5a5dd6cSEnji Cooper   EXPECT_OK(len);
157e5a5dd6cSEnji Cooper   EXPECT_EQ(sizeof(struct signalfd_siginfo), (size_t)len);
158e5a5dd6cSEnji Cooper   EXPECT_EQ(SIGUSR1, (int)fdsi.ssi_signo);
159e5a5dd6cSEnji Cooper 
160e5a5dd6cSEnji Cooper   // Need CAP_FSIGNAL to modify the signal mask.
161e5a5dd6cSEnji Cooper   sigemptyset(&mask);
162e5a5dd6cSEnji Cooper   sigaddset(&mask, SIGUSR1);
163e5a5dd6cSEnji Cooper   sigaddset(&mask, SIGUSR2);
164e5a5dd6cSEnji Cooper   EXPECT_OK(sigprocmask(SIG_BLOCK, &mask, NULL));
165e5a5dd6cSEnji Cooper   EXPECT_NOTCAPABLE(signalfd(cap_fd_none, &mask, 0));
166e5a5dd6cSEnji Cooper   EXPECT_NOTCAPABLE(signalfd(cap_fd_read, &mask, 0));
167e5a5dd6cSEnji Cooper   EXPECT_EQ(cap_fd_sig, signalfd(cap_fd_sig, &mask, 0));
168e5a5dd6cSEnji Cooper 
169e5a5dd6cSEnji Cooper   // Need CAP_EVENT to get notification of a signal in poll(2).
170e5a5dd6cSEnji Cooper   kill(me, SIGUSR2);
171e5a5dd6cSEnji Cooper 
172e5a5dd6cSEnji Cooper   struct pollfd poll_fd;
173e5a5dd6cSEnji Cooper   poll_fd.revents = 0;
174e5a5dd6cSEnji Cooper   poll_fd.events = POLLIN;
175e5a5dd6cSEnji Cooper   poll_fd.fd = cap_fd_sig_read;
176e5a5dd6cSEnji Cooper   EXPECT_OK(poll(&poll_fd, 1, 400));
177e5a5dd6cSEnji Cooper   EXPECT_EQ(0, (poll_fd.revents & POLLIN));
178e5a5dd6cSEnji Cooper   EXPECT_NE(0, (poll_fd.revents & POLLNVAL));
179e5a5dd6cSEnji Cooper 
180e5a5dd6cSEnji Cooper   poll_fd.fd = cap_fd_all;
181e5a5dd6cSEnji Cooper   EXPECT_OK(poll(&poll_fd, 1, 400));
182e5a5dd6cSEnji Cooper   EXPECT_NE(0, (poll_fd.revents & POLLIN));
183e5a5dd6cSEnji Cooper   EXPECT_EQ(0, (poll_fd.revents & POLLNVAL));
184e5a5dd6cSEnji Cooper }
185e5a5dd6cSEnji Cooper 
TEST(Linux,EventFD)186e5a5dd6cSEnji Cooper TEST(Linux, EventFD) {
187e5a5dd6cSEnji Cooper   int fd = eventfd(0, 0);
188e5a5dd6cSEnji Cooper   EXPECT_OK(fd);
189e5a5dd6cSEnji Cooper 
190e5a5dd6cSEnji Cooper   cap_rights_t r_rs;
191e5a5dd6cSEnji Cooper   cap_rights_init(&r_rs, CAP_READ, CAP_SEEK);
192e5a5dd6cSEnji Cooper   cap_rights_t r_ws;
193e5a5dd6cSEnji Cooper   cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK);
194e5a5dd6cSEnji Cooper   cap_rights_t r_rws;
195e5a5dd6cSEnji Cooper   cap_rights_init(&r_rws, CAP_READ, CAP_WRITE, CAP_SEEK);
196e5a5dd6cSEnji Cooper   cap_rights_t r_rwspoll;
197e5a5dd6cSEnji Cooper   cap_rights_init(&r_rwspoll, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_EVENT);
198e5a5dd6cSEnji Cooper 
199e5a5dd6cSEnji Cooper   int cap_ro = dup(fd);
200e5a5dd6cSEnji Cooper   EXPECT_OK(cap_ro);
201e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(cap_ro, &r_rs));
202e5a5dd6cSEnji Cooper   int cap_wo = dup(fd);
203e5a5dd6cSEnji Cooper   EXPECT_OK(cap_wo);
204e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(cap_wo, &r_ws));
205e5a5dd6cSEnji Cooper   int cap_rw = dup(fd);
206e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rw);
207e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(cap_rw, &r_rws));
208e5a5dd6cSEnji Cooper   int cap_all = dup(fd);
209e5a5dd6cSEnji Cooper   EXPECT_OK(cap_all);
210e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(cap_all, &r_rwspoll));
211e5a5dd6cSEnji Cooper 
212e5a5dd6cSEnji Cooper   pid_t child = fork();
213e5a5dd6cSEnji Cooper   if (child == 0) {
214e5a5dd6cSEnji Cooper     // Child: write counter to eventfd
215e5a5dd6cSEnji Cooper     uint64_t u = 42;
216e5a5dd6cSEnji Cooper     EXPECT_NOTCAPABLE(write(cap_ro, &u, sizeof(u)));
217e5a5dd6cSEnji Cooper     EXPECT_OK(write(cap_wo, &u, sizeof(u)));
218e5a5dd6cSEnji Cooper     exit(HasFailure());
219e5a5dd6cSEnji Cooper   }
220e5a5dd6cSEnji Cooper 
221e5a5dd6cSEnji Cooper   sleep(1);  // Allow child to write
222e5a5dd6cSEnji Cooper 
223e5a5dd6cSEnji Cooper   struct pollfd poll_fd;
224e5a5dd6cSEnji Cooper   poll_fd.revents = 0;
225e5a5dd6cSEnji Cooper   poll_fd.events = POLLIN;
226e5a5dd6cSEnji Cooper   poll_fd.fd = cap_rw;
227e5a5dd6cSEnji Cooper   EXPECT_OK(poll(&poll_fd, 1, 400));
228e5a5dd6cSEnji Cooper   EXPECT_EQ(0, (poll_fd.revents & POLLIN));
229e5a5dd6cSEnji Cooper   EXPECT_NE(0, (poll_fd.revents & POLLNVAL));
230e5a5dd6cSEnji Cooper 
231e5a5dd6cSEnji Cooper   poll_fd.fd = cap_all;
232e5a5dd6cSEnji Cooper   EXPECT_OK(poll(&poll_fd, 1, 400));
233e5a5dd6cSEnji Cooper   EXPECT_NE(0, (poll_fd.revents & POLLIN));
234e5a5dd6cSEnji Cooper   EXPECT_EQ(0, (poll_fd.revents & POLLNVAL));
235e5a5dd6cSEnji Cooper 
236e5a5dd6cSEnji Cooper   uint64_t u;
237e5a5dd6cSEnji Cooper   EXPECT_NOTCAPABLE(read(cap_wo, &u, sizeof(u)));
238e5a5dd6cSEnji Cooper   EXPECT_OK(read(cap_ro, &u, sizeof(u)));
239e5a5dd6cSEnji Cooper   EXPECT_EQ(42, (int)u);
240e5a5dd6cSEnji Cooper 
241e5a5dd6cSEnji Cooper   // Wait for the child.
242e5a5dd6cSEnji Cooper   int status;
243e5a5dd6cSEnji Cooper   EXPECT_EQ(child, waitpid(child, &status, 0));
244e5a5dd6cSEnji Cooper   int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
245e5a5dd6cSEnji Cooper   EXPECT_EQ(0, rc);
246e5a5dd6cSEnji Cooper 
247e5a5dd6cSEnji Cooper   close(cap_all);
248e5a5dd6cSEnji Cooper   close(cap_rw);
249e5a5dd6cSEnji Cooper   close(cap_wo);
250e5a5dd6cSEnji Cooper   close(cap_ro);
251e5a5dd6cSEnji Cooper   close(fd);
252e5a5dd6cSEnji Cooper }
253e5a5dd6cSEnji Cooper 
FORK_TEST(Linux,epoll)254e5a5dd6cSEnji Cooper FORK_TEST(Linux, epoll) {
255e5a5dd6cSEnji Cooper   int sock_fds[2];
256e5a5dd6cSEnji Cooper   EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fds));
257e5a5dd6cSEnji Cooper   // Queue some data.
258e5a5dd6cSEnji Cooper   char buffer[4] = {1, 2, 3, 4};
259e5a5dd6cSEnji Cooper   EXPECT_OK(write(sock_fds[1], buffer, sizeof(buffer)));
260e5a5dd6cSEnji Cooper 
261e5a5dd6cSEnji Cooper   EXPECT_OK(cap_enter());  // Enter capability mode.
262e5a5dd6cSEnji Cooper 
263e5a5dd6cSEnji Cooper   int epoll_fd = epoll_create(1);
264e5a5dd6cSEnji Cooper   EXPECT_OK(epoll_fd);
265e5a5dd6cSEnji Cooper 
266e5a5dd6cSEnji Cooper   cap_rights_t r_rs;
267e5a5dd6cSEnji Cooper   cap_rights_init(&r_rs, CAP_READ, CAP_SEEK);
268e5a5dd6cSEnji Cooper   cap_rights_t r_ws;
269e5a5dd6cSEnji Cooper   cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK);
270e5a5dd6cSEnji Cooper   cap_rights_t r_rws;
271e5a5dd6cSEnji Cooper   cap_rights_init(&r_rws, CAP_READ, CAP_WRITE, CAP_SEEK);
272e5a5dd6cSEnji Cooper   cap_rights_t r_rwspoll;
273e5a5dd6cSEnji Cooper   cap_rights_init(&r_rwspoll, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_EVENT);
274e5a5dd6cSEnji Cooper   cap_rights_t r_epoll;
275e5a5dd6cSEnji Cooper   cap_rights_init(&r_epoll, CAP_EPOLL_CTL);
276e5a5dd6cSEnji Cooper 
277e5a5dd6cSEnji Cooper   int cap_epoll_wo = dup(epoll_fd);
278e5a5dd6cSEnji Cooper   EXPECT_OK(cap_epoll_wo);
279e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(cap_epoll_wo, &r_ws));
280e5a5dd6cSEnji Cooper   int cap_epoll_ro = dup(epoll_fd);
281e5a5dd6cSEnji Cooper   EXPECT_OK(cap_epoll_ro);
282e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(cap_epoll_ro, &r_rs));
283e5a5dd6cSEnji Cooper   int cap_epoll_rw = dup(epoll_fd);
284e5a5dd6cSEnji Cooper   EXPECT_OK(cap_epoll_rw);
285e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(cap_epoll_rw, &r_rws));
286e5a5dd6cSEnji Cooper   int cap_epoll_poll = dup(epoll_fd);
287e5a5dd6cSEnji Cooper   EXPECT_OK(cap_epoll_poll);
288e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(cap_epoll_poll, &r_rwspoll));
289e5a5dd6cSEnji Cooper   int cap_epoll_ctl = dup(epoll_fd);
290e5a5dd6cSEnji Cooper   EXPECT_OK(cap_epoll_ctl);
291e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(cap_epoll_ctl, &r_epoll));
292e5a5dd6cSEnji Cooper 
293e5a5dd6cSEnji Cooper   // Can only modify the FDs being monitored if the CAP_EPOLL_CTL right is present.
294e5a5dd6cSEnji Cooper   struct epoll_event eev;
295e5a5dd6cSEnji Cooper   memset(&eev, 0, sizeof(eev));
296e5a5dd6cSEnji Cooper   eev.events = EPOLLIN|EPOLLOUT|EPOLLPRI;
297e5a5dd6cSEnji Cooper   EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_ro, EPOLL_CTL_ADD, sock_fds[0], &eev));
298e5a5dd6cSEnji Cooper   EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_wo, EPOLL_CTL_ADD, sock_fds[0], &eev));
299e5a5dd6cSEnji Cooper   EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_rw, EPOLL_CTL_ADD, sock_fds[0], &eev));
300e5a5dd6cSEnji Cooper   EXPECT_OK(epoll_ctl(cap_epoll_ctl, EPOLL_CTL_ADD, sock_fds[0], &eev));
301e5a5dd6cSEnji Cooper   eev.events = EPOLLIN|EPOLLOUT;
302e5a5dd6cSEnji Cooper   EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_ro, EPOLL_CTL_MOD, sock_fds[0], &eev));
303e5a5dd6cSEnji Cooper   EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_wo, EPOLL_CTL_MOD, sock_fds[0], &eev));
304e5a5dd6cSEnji Cooper   EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_rw, EPOLL_CTL_MOD, sock_fds[0], &eev));
305e5a5dd6cSEnji Cooper   EXPECT_OK(epoll_ctl(cap_epoll_ctl, EPOLL_CTL_MOD, sock_fds[0], &eev));
306e5a5dd6cSEnji Cooper 
307e5a5dd6cSEnji Cooper   // Running epoll_pwait(2) requires CAP_EVENT.
308e5a5dd6cSEnji Cooper   eev.events = 0;
309e5a5dd6cSEnji Cooper   EXPECT_NOTCAPABLE(epoll_pwait(cap_epoll_ro, &eev, 1, 100, NULL));
310e5a5dd6cSEnji Cooper   EXPECT_NOTCAPABLE(epoll_pwait(cap_epoll_wo, &eev, 1, 100, NULL));
311e5a5dd6cSEnji Cooper   EXPECT_NOTCAPABLE(epoll_pwait(cap_epoll_rw, &eev, 1, 100, NULL));
312e5a5dd6cSEnji Cooper   EXPECT_OK(epoll_pwait(cap_epoll_poll, &eev, 1, 100, NULL));
313e5a5dd6cSEnji Cooper   EXPECT_EQ(EPOLLIN, eev.events & EPOLLIN);
314e5a5dd6cSEnji Cooper 
315e5a5dd6cSEnji Cooper   EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_ro, EPOLL_CTL_DEL, sock_fds[0], &eev));
316e5a5dd6cSEnji Cooper   EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_wo, EPOLL_CTL_DEL, sock_fds[0], &eev));
317e5a5dd6cSEnji Cooper   EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_rw, EPOLL_CTL_DEL, sock_fds[0], &eev));
318e5a5dd6cSEnji Cooper   EXPECT_OK(epoll_ctl(epoll_fd, EPOLL_CTL_DEL, sock_fds[0], &eev));
319e5a5dd6cSEnji Cooper 
320e5a5dd6cSEnji Cooper   close(cap_epoll_ctl);
321e5a5dd6cSEnji Cooper   close(cap_epoll_poll);
322e5a5dd6cSEnji Cooper   close(cap_epoll_rw);
323e5a5dd6cSEnji Cooper   close(cap_epoll_ro);
324e5a5dd6cSEnji Cooper   close(cap_epoll_wo);
325e5a5dd6cSEnji Cooper   close(epoll_fd);
326e5a5dd6cSEnji Cooper   close(sock_fds[1]);
327e5a5dd6cSEnji Cooper   close(sock_fds[0]);
328e5a5dd6cSEnji Cooper }
329e5a5dd6cSEnji Cooper 
TEST(Linux,fstatat)330e5a5dd6cSEnji Cooper TEST(Linux, fstatat) {
331e5a5dd6cSEnji Cooper   int fd = open(TmpFile("cap_fstatat"), O_CREAT|O_RDWR, 0644);
332e5a5dd6cSEnji Cooper   EXPECT_OK(fd);
333e5a5dd6cSEnji Cooper   unsigned char buffer[] = {1, 2, 3, 4};
334e5a5dd6cSEnji Cooper   EXPECT_OK(write(fd, buffer, sizeof(buffer)));
335e5a5dd6cSEnji Cooper   cap_rights_t rights;
336e5a5dd6cSEnji Cooper   int cap_rf = dup(fd);
337e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rf);
338e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(cap_rf, cap_rights_init(&rights, CAP_READ, CAP_FSTAT)));
339e5a5dd6cSEnji Cooper   int cap_ro = dup(fd);
340e5a5dd6cSEnji Cooper   EXPECT_OK(cap_ro);
341e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(cap_ro, cap_rights_init(&rights, CAP_READ)));
342e5a5dd6cSEnji Cooper 
343e5a5dd6cSEnji Cooper   struct stat info;
344e5a5dd6cSEnji Cooper   EXPECT_OK(fstatat(fd, "", &info, AT_EMPTY_PATH));
345e5a5dd6cSEnji Cooper   EXPECT_NOTCAPABLE(fstatat(cap_ro, "", &info, AT_EMPTY_PATH));
346e5a5dd6cSEnji Cooper   EXPECT_OK(fstatat(cap_rf, "", &info, AT_EMPTY_PATH));
347e5a5dd6cSEnji Cooper 
348e5a5dd6cSEnji Cooper   close(cap_ro);
349e5a5dd6cSEnji Cooper   close(cap_rf);
350e5a5dd6cSEnji Cooper   close(fd);
351e5a5dd6cSEnji Cooper 
352e5a5dd6cSEnji Cooper   int dir = open(tmpdir.c_str(), O_RDONLY);
353e5a5dd6cSEnji Cooper   EXPECT_OK(dir);
354e5a5dd6cSEnji Cooper   int dir_rf = dup(dir);
355e5a5dd6cSEnji Cooper   EXPECT_OK(dir_rf);
356e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(dir_rf, cap_rights_init(&rights, CAP_READ, CAP_FSTAT)));
357e5a5dd6cSEnji Cooper   int dir_ro = dup(fd);
358e5a5dd6cSEnji Cooper   EXPECT_OK(dir_ro);
359e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(dir_ro, cap_rights_init(&rights, CAP_READ)));
360e5a5dd6cSEnji Cooper 
361e5a5dd6cSEnji Cooper   EXPECT_OK(fstatat(dir, "cap_fstatat", &info, AT_EMPTY_PATH));
362e5a5dd6cSEnji Cooper   EXPECT_NOTCAPABLE(fstatat(dir_ro, "cap_fstatat", &info, AT_EMPTY_PATH));
363e5a5dd6cSEnji Cooper   EXPECT_OK(fstatat(dir_rf, "cap_fstatat", &info, AT_EMPTY_PATH));
364e5a5dd6cSEnji Cooper 
365e5a5dd6cSEnji Cooper   close(dir_ro);
366e5a5dd6cSEnji Cooper   close(dir_rf);
367e5a5dd6cSEnji Cooper   close(dir);
368e5a5dd6cSEnji Cooper 
369e5a5dd6cSEnji Cooper   unlink(TmpFile("cap_fstatat"));
370e5a5dd6cSEnji Cooper }
371e5a5dd6cSEnji Cooper 
372e5a5dd6cSEnji Cooper // fanotify support may not be available at compile-time
373e5a5dd6cSEnji Cooper #ifdef __NR_fanotify_init
TEST(Linux,FanotifyIfRoot)374b6973c8fSAlex Richardson TEST(Linux, FanotifyIfRoot) {
375b6973c8fSAlex Richardson   GTEST_SKIP_IF_NOT_ROOT();
376e5a5dd6cSEnji Cooper   int fa_fd = fanotify_init(FAN_CLASS_NOTIF, O_RDWR);
377e5a5dd6cSEnji Cooper   EXPECT_OK(fa_fd);
378e5a5dd6cSEnji Cooper   if (fa_fd < 0) return;  // May not be enabled
379e5a5dd6cSEnji Cooper 
380e5a5dd6cSEnji Cooper   cap_rights_t r_rs;
381e5a5dd6cSEnji Cooper   cap_rights_init(&r_rs, CAP_READ, CAP_SEEK);
382e5a5dd6cSEnji Cooper   cap_rights_t r_ws;
383e5a5dd6cSEnji Cooper   cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK);
384e5a5dd6cSEnji Cooper   cap_rights_t r_rws;
385e5a5dd6cSEnji Cooper   cap_rights_init(&r_rws, CAP_READ, CAP_WRITE, CAP_SEEK);
386e5a5dd6cSEnji Cooper   cap_rights_t r_rwspoll;
387e5a5dd6cSEnji Cooper   cap_rights_init(&r_rwspoll, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_EVENT);
388e5a5dd6cSEnji Cooper   cap_rights_t r_rwsnotify;
389e5a5dd6cSEnji Cooper   cap_rights_init(&r_rwsnotify, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_NOTIFY);
390e5a5dd6cSEnji Cooper   cap_rights_t r_rsl;
391e5a5dd6cSEnji Cooper   cap_rights_init(&r_rsl, CAP_READ, CAP_SEEK, CAP_LOOKUP);
392e5a5dd6cSEnji Cooper   cap_rights_t r_rslstat;
393e5a5dd6cSEnji Cooper   cap_rights_init(&r_rslstat, CAP_READ, CAP_SEEK, CAP_LOOKUP, CAP_FSTAT);
394e5a5dd6cSEnji Cooper   cap_rights_t r_rsstat;
395e5a5dd6cSEnji Cooper   cap_rights_init(&r_rsstat, CAP_READ, CAP_SEEK, CAP_FSTAT);
396e5a5dd6cSEnji Cooper 
397e5a5dd6cSEnji Cooper   int cap_fd_ro = dup(fa_fd);
398e5a5dd6cSEnji Cooper   EXPECT_OK(cap_fd_ro);
399e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(cap_fd_ro, &r_rs));
400e5a5dd6cSEnji Cooper   int cap_fd_wo = dup(fa_fd);
401e5a5dd6cSEnji Cooper   EXPECT_OK(cap_fd_wo);
402e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(cap_fd_wo, &r_ws));
403e5a5dd6cSEnji Cooper   int cap_fd_rw = dup(fa_fd);
404e5a5dd6cSEnji Cooper   EXPECT_OK(cap_fd_rw);
405e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(cap_fd_rw, &r_rws));
406e5a5dd6cSEnji Cooper   int cap_fd_poll = dup(fa_fd);
407e5a5dd6cSEnji Cooper   EXPECT_OK(cap_fd_poll);
408e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(cap_fd_poll, &r_rwspoll));
409e5a5dd6cSEnji Cooper   int cap_fd_not = dup(fa_fd);
410e5a5dd6cSEnji Cooper   EXPECT_OK(cap_fd_not);
411e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(cap_fd_not, &r_rwsnotify));
412e5a5dd6cSEnji Cooper 
413e5a5dd6cSEnji Cooper   int rc = mkdir(TmpFile("cap_notify"), 0755);
414e5a5dd6cSEnji Cooper   EXPECT_TRUE(rc == 0 || errno == EEXIST);
415e5a5dd6cSEnji Cooper   int dfd = open(TmpFile("cap_notify"), O_RDONLY);
416e5a5dd6cSEnji Cooper   EXPECT_OK(dfd);
417e5a5dd6cSEnji Cooper   int fd = open(TmpFile("cap_notify/file"), O_CREAT|O_RDWR, 0644);
418e5a5dd6cSEnji Cooper   close(fd);
419e5a5dd6cSEnji Cooper   int cap_dfd = dup(dfd);
420e5a5dd6cSEnji Cooper   EXPECT_OK(cap_dfd);
421e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(cap_dfd, &r_rslstat));
422e5a5dd6cSEnji Cooper   EXPECT_OK(cap_dfd);
423e5a5dd6cSEnji Cooper   int cap_dfd_rs = dup(dfd);
424e5a5dd6cSEnji Cooper   EXPECT_OK(cap_dfd_rs);
425e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(cap_dfd_rs, &r_rs));
426e5a5dd6cSEnji Cooper   EXPECT_OK(cap_dfd_rs);
427e5a5dd6cSEnji Cooper   int cap_dfd_rsstat = dup(dfd);
428e5a5dd6cSEnji Cooper   EXPECT_OK(cap_dfd_rsstat);
429e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(cap_dfd_rsstat, &r_rsstat));
430e5a5dd6cSEnji Cooper   EXPECT_OK(cap_dfd_rsstat);
431e5a5dd6cSEnji Cooper   int cap_dfd_rsl = dup(dfd);
432e5a5dd6cSEnji Cooper   EXPECT_OK(cap_dfd_rsl);
433e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(cap_dfd_rsl, &r_rsl));
434e5a5dd6cSEnji Cooper   EXPECT_OK(cap_dfd_rsl);
435e5a5dd6cSEnji Cooper 
436e5a5dd6cSEnji Cooper   // Need CAP_NOTIFY to change what's monitored.
437e5a5dd6cSEnji Cooper   EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_ro, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd, NULL));
438e5a5dd6cSEnji Cooper   EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_wo, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd, NULL));
439e5a5dd6cSEnji Cooper   EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_rw, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd, NULL));
440e5a5dd6cSEnji Cooper   EXPECT_OK(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd, NULL));
441e5a5dd6cSEnji Cooper 
442e5a5dd6cSEnji Cooper   // Need CAP_FSTAT on the thing monitored.
443e5a5dd6cSEnji Cooper   EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd_rs, NULL));
444e5a5dd6cSEnji Cooper   EXPECT_OK(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd_rsstat, NULL));
445e5a5dd6cSEnji Cooper 
446e5a5dd6cSEnji Cooper   // Too add monitoring of a file under a dfd, need CAP_LOOKUP|CAP_FSTAT on the dfd.
447e5a5dd6cSEnji Cooper   EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY, cap_dfd_rsstat, "file"));
448e5a5dd6cSEnji Cooper   EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY, cap_dfd_rsl, "file"));
449e5a5dd6cSEnji Cooper   EXPECT_OK(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY, cap_dfd, "file"));
450e5a5dd6cSEnji Cooper 
451e5a5dd6cSEnji Cooper   pid_t child = fork();
452e5a5dd6cSEnji Cooper   if (child == 0) {
453e5a5dd6cSEnji Cooper     // Child: Perform activity in the directory under notify.
454e5a5dd6cSEnji Cooper     sleep(1);
455e5a5dd6cSEnji Cooper     unlink(TmpFile("cap_notify/temp"));
456e5a5dd6cSEnji Cooper     int fd = open(TmpFile("cap_notify/temp"), O_CREAT|O_RDWR, 0644);
457e5a5dd6cSEnji Cooper     close(fd);
458e5a5dd6cSEnji Cooper     exit(0);
459e5a5dd6cSEnji Cooper   }
460e5a5dd6cSEnji Cooper 
461e5a5dd6cSEnji Cooper   // Need CAP_EVENT to poll.
462e5a5dd6cSEnji Cooper   struct pollfd poll_fd;
463e5a5dd6cSEnji Cooper   poll_fd.revents = 0;
464e5a5dd6cSEnji Cooper   poll_fd.events = POLLIN;
465e5a5dd6cSEnji Cooper   poll_fd.fd = cap_fd_rw;
466e5a5dd6cSEnji Cooper   EXPECT_OK(poll(&poll_fd, 1, 1400));
467e5a5dd6cSEnji Cooper   EXPECT_EQ(0, (poll_fd.revents & POLLIN));
468e5a5dd6cSEnji Cooper   EXPECT_NE(0, (poll_fd.revents & POLLNVAL));
469e5a5dd6cSEnji Cooper 
470e5a5dd6cSEnji Cooper   poll_fd.fd = cap_fd_not;
471e5a5dd6cSEnji Cooper   EXPECT_OK(poll(&poll_fd, 1, 1400));
472e5a5dd6cSEnji Cooper   EXPECT_EQ(0, (poll_fd.revents & POLLIN));
473e5a5dd6cSEnji Cooper   EXPECT_NE(0, (poll_fd.revents & POLLNVAL));
474e5a5dd6cSEnji Cooper 
475e5a5dd6cSEnji Cooper   poll_fd.fd = cap_fd_poll;
476e5a5dd6cSEnji Cooper   EXPECT_OK(poll(&poll_fd, 1, 1400));
477e5a5dd6cSEnji Cooper   EXPECT_NE(0, (poll_fd.revents & POLLIN));
478e5a5dd6cSEnji Cooper   EXPECT_EQ(0, (poll_fd.revents & POLLNVAL));
479e5a5dd6cSEnji Cooper 
480e5a5dd6cSEnji Cooper   // Need CAP_READ to read.
481e5a5dd6cSEnji Cooper   struct fanotify_event_metadata ev;
482e5a5dd6cSEnji Cooper   memset(&ev, 0, sizeof(ev));
483e5a5dd6cSEnji Cooper   EXPECT_NOTCAPABLE(read(cap_fd_wo, &ev, sizeof(ev)));
484e5a5dd6cSEnji Cooper   rc = read(fa_fd, &ev, sizeof(ev));
485e5a5dd6cSEnji Cooper   EXPECT_OK(rc);
486e5a5dd6cSEnji Cooper   EXPECT_EQ((int)sizeof(struct fanotify_event_metadata), rc);
487e5a5dd6cSEnji Cooper   EXPECT_EQ(child, ev.pid);
488e5a5dd6cSEnji Cooper   EXPECT_NE(0, ev.fd);
489e5a5dd6cSEnji Cooper 
490e5a5dd6cSEnji Cooper   // TODO(drysdale): reinstate if/when capsicum-linux propagates rights
491e5a5dd6cSEnji Cooper   // to fanotify-generated FDs.
492e5a5dd6cSEnji Cooper #ifdef OMIT
493e5a5dd6cSEnji Cooper   // fanotify(7) gives us a FD for the changed file.  This should
494e5a5dd6cSEnji Cooper   // only have rights that are a subset of those for the original
495e5a5dd6cSEnji Cooper   // monitored directory file descriptor.
496e5a5dd6cSEnji Cooper   cap_rights_t rights;
497*fba81b33SAlan Somers   CAL_ALL(&rights);
498e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_get(ev.fd, &rights));
499e5a5dd6cSEnji Cooper   EXPECT_RIGHTS_IN(&rights, &r_rslstat);
500e5a5dd6cSEnji Cooper #endif
501e5a5dd6cSEnji Cooper 
502e5a5dd6cSEnji Cooper   // Wait for the child.
503e5a5dd6cSEnji Cooper   int status;
504e5a5dd6cSEnji Cooper   EXPECT_EQ(child, waitpid(child, &status, 0));
505e5a5dd6cSEnji Cooper   rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
506e5a5dd6cSEnji Cooper   EXPECT_EQ(0, rc);
507e5a5dd6cSEnji Cooper 
508e5a5dd6cSEnji Cooper   close(cap_dfd_rsstat);
509e5a5dd6cSEnji Cooper   close(cap_dfd_rsl);
510e5a5dd6cSEnji Cooper   close(cap_dfd_rs);
511e5a5dd6cSEnji Cooper   close(cap_dfd);
512e5a5dd6cSEnji Cooper   close(dfd);
513e5a5dd6cSEnji Cooper   unlink(TmpFile("cap_notify/file"));
514e5a5dd6cSEnji Cooper   unlink(TmpFile("cap_notify/temp"));
515e5a5dd6cSEnji Cooper   rmdir(TmpFile("cap_notify"));
516e5a5dd6cSEnji Cooper   close(cap_fd_not);
517e5a5dd6cSEnji Cooper   close(cap_fd_poll);
518e5a5dd6cSEnji Cooper   close(cap_fd_rw);
519e5a5dd6cSEnji Cooper   close(cap_fd_wo);
520e5a5dd6cSEnji Cooper   close(cap_fd_ro);
521e5a5dd6cSEnji Cooper   close(fa_fd);
522e5a5dd6cSEnji Cooper }
523e5a5dd6cSEnji Cooper #endif
524e5a5dd6cSEnji Cooper 
TEST(Linux,inotify)525e5a5dd6cSEnji Cooper TEST(Linux, inotify) {
526e5a5dd6cSEnji Cooper   int i_fd = inotify_init();
527e5a5dd6cSEnji Cooper   EXPECT_OK(i_fd);
528e5a5dd6cSEnji Cooper 
529e5a5dd6cSEnji Cooper   cap_rights_t r_rs;
530e5a5dd6cSEnji Cooper   cap_rights_init(&r_rs, CAP_READ, CAP_SEEK);
531e5a5dd6cSEnji Cooper   cap_rights_t r_ws;
532e5a5dd6cSEnji Cooper   cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK);
533e5a5dd6cSEnji Cooper   cap_rights_t r_rws;
534e5a5dd6cSEnji Cooper   cap_rights_init(&r_rws, CAP_READ, CAP_WRITE, CAP_SEEK);
535e5a5dd6cSEnji Cooper   cap_rights_t r_rwsnotify;
536e5a5dd6cSEnji Cooper   cap_rights_init(&r_rwsnotify, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_NOTIFY);
537e5a5dd6cSEnji Cooper 
538e5a5dd6cSEnji Cooper   int cap_fd_ro = dup(i_fd);
539e5a5dd6cSEnji Cooper   EXPECT_OK(cap_fd_ro);
540e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(cap_fd_ro, &r_rs));
541e5a5dd6cSEnji Cooper   int cap_fd_wo = dup(i_fd);
542e5a5dd6cSEnji Cooper   EXPECT_OK(cap_fd_wo);
543e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(cap_fd_wo, &r_ws));
544e5a5dd6cSEnji Cooper   int cap_fd_rw = dup(i_fd);
545e5a5dd6cSEnji Cooper   EXPECT_OK(cap_fd_rw);
546e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(cap_fd_rw, &r_rws));
547e5a5dd6cSEnji Cooper   int cap_fd_all = dup(i_fd);
548e5a5dd6cSEnji Cooper   EXPECT_OK(cap_fd_all);
549e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(cap_fd_all, &r_rwsnotify));
550e5a5dd6cSEnji Cooper 
551e5a5dd6cSEnji Cooper   int fd = open(TmpFile("cap_inotify"), O_CREAT|O_RDWR, 0644);
552e5a5dd6cSEnji Cooper   EXPECT_NOTCAPABLE(inotify_add_watch(cap_fd_rw, TmpFile("cap_inotify"), IN_ACCESS|IN_MODIFY));
553e5a5dd6cSEnji Cooper   int wd = inotify_add_watch(i_fd, TmpFile("cap_inotify"), IN_ACCESS|IN_MODIFY);
554e5a5dd6cSEnji Cooper   EXPECT_OK(wd);
555e5a5dd6cSEnji Cooper 
556e5a5dd6cSEnji Cooper   unsigned char buffer[] = {1, 2, 3, 4};
557e5a5dd6cSEnji Cooper   EXPECT_OK(write(fd, buffer, sizeof(buffer)));
558e5a5dd6cSEnji Cooper 
559e5a5dd6cSEnji Cooper   struct inotify_event iev;
560e5a5dd6cSEnji Cooper   memset(&iev, 0, sizeof(iev));
561e5a5dd6cSEnji Cooper   EXPECT_NOTCAPABLE(read(cap_fd_wo, &iev, sizeof(iev)));
562e5a5dd6cSEnji Cooper   int rc = read(cap_fd_ro, &iev, sizeof(iev));
563e5a5dd6cSEnji Cooper   EXPECT_OK(rc);
564e5a5dd6cSEnji Cooper   EXPECT_EQ((int)sizeof(iev), rc);
565e5a5dd6cSEnji Cooper   EXPECT_EQ(wd, iev.wd);
566e5a5dd6cSEnji Cooper 
567e5a5dd6cSEnji Cooper   EXPECT_NOTCAPABLE(inotify_rm_watch(cap_fd_wo, wd));
568e5a5dd6cSEnji Cooper   EXPECT_OK(inotify_rm_watch(cap_fd_all, wd));
569e5a5dd6cSEnji Cooper 
570e5a5dd6cSEnji Cooper   close(fd);
571e5a5dd6cSEnji Cooper   close(cap_fd_all);
572e5a5dd6cSEnji Cooper   close(cap_fd_rw);
573e5a5dd6cSEnji Cooper   close(cap_fd_wo);
574e5a5dd6cSEnji Cooper   close(cap_fd_ro);
575e5a5dd6cSEnji Cooper   close(i_fd);
576e5a5dd6cSEnji Cooper   unlink(TmpFile("cap_inotify"));
577e5a5dd6cSEnji Cooper }
578e5a5dd6cSEnji Cooper 
TEST(Linux,ArchChangeIfAvailable)579b6973c8fSAlex Richardson TEST(Linux, ArchChangeIfAvailable) {
580e5a5dd6cSEnji Cooper   const char* prog_candidates[] = {"./mini-me.32", "./mini-me.x32", "./mini-me.64"};
581e5a5dd6cSEnji Cooper   const char* progs[] = {NULL, NULL, NULL};
582e5a5dd6cSEnji Cooper   char* argv_pass[] = {(char*)"to-come", (char*)"--capmode", NULL};
583e5a5dd6cSEnji Cooper   char* null_envp[] = {NULL};
584e5a5dd6cSEnji Cooper   int fds[3];
585e5a5dd6cSEnji Cooper   int count = 0;
586e5a5dd6cSEnji Cooper 
587e5a5dd6cSEnji Cooper   for (int ii = 0; ii < 3; ii++) {
588e5a5dd6cSEnji Cooper     fds[count] = open(prog_candidates[ii], O_RDONLY);
589e5a5dd6cSEnji Cooper     if (fds[count] >= 0) {
590e5a5dd6cSEnji Cooper       progs[count] = prog_candidates[ii];
591e5a5dd6cSEnji Cooper       count++;
592e5a5dd6cSEnji Cooper     }
593e5a5dd6cSEnji Cooper   }
594e5a5dd6cSEnji Cooper   if (count == 0) {
595b6973c8fSAlex Richardson     GTEST_SKIP() << "no different-architecture programs available";
596e5a5dd6cSEnji Cooper   }
597e5a5dd6cSEnji Cooper 
598e5a5dd6cSEnji Cooper   for (int ii = 0; ii < count; ii++) {
599e5a5dd6cSEnji Cooper     // Fork-and-exec a binary of this architecture.
600e5a5dd6cSEnji Cooper     pid_t child = fork();
601e5a5dd6cSEnji Cooper     if (child == 0) {
602e5a5dd6cSEnji Cooper       EXPECT_OK(cap_enter());  // Enter capability mode
603e5a5dd6cSEnji Cooper       if (verbose) fprintf(stderr, "[%d] call fexecve(%s, %s)\n",
604e5a5dd6cSEnji Cooper                            getpid_(), progs[ii], argv_pass[1]);
605e5a5dd6cSEnji Cooper       argv_pass[0] = (char *)progs[ii];
606e5a5dd6cSEnji Cooper       int rc = fexecve_(fds[ii], argv_pass, null_envp);
607e5a5dd6cSEnji Cooper       fprintf(stderr, "fexecve(%s) returned %d errno %d\n", progs[ii], rc, errno);
608e5a5dd6cSEnji Cooper       exit(99);  // Should not reach here.
609e5a5dd6cSEnji Cooper     }
610e5a5dd6cSEnji Cooper     int status;
611e5a5dd6cSEnji Cooper     EXPECT_EQ(child, waitpid(child, &status, 0));
612e5a5dd6cSEnji Cooper     int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
613e5a5dd6cSEnji Cooper     EXPECT_EQ(0, rc);
614e5a5dd6cSEnji Cooper     close(fds[ii]);
615e5a5dd6cSEnji Cooper   }
616e5a5dd6cSEnji Cooper }
617e5a5dd6cSEnji Cooper 
FORK_TEST(Linux,NamespaceIfRoot)618b6973c8fSAlex Richardson FORK_TEST(Linux, NamespaceIfRoot) {
619b6973c8fSAlex Richardson   GTEST_SKIP_IF_NOT_ROOT();
620e5a5dd6cSEnji Cooper   pid_t me = getpid_();
621e5a5dd6cSEnji Cooper 
622e5a5dd6cSEnji Cooper   // Create a new UTS namespace.
623e5a5dd6cSEnji Cooper   EXPECT_OK(unshare(CLONE_NEWUTS));
624e5a5dd6cSEnji Cooper   // Open an FD to its symlink.
625e5a5dd6cSEnji Cooper   char buffer[256];
626e5a5dd6cSEnji Cooper   sprintf(buffer, "/proc/%d/ns/uts", me);
627e5a5dd6cSEnji Cooper   int ns_fd = open(buffer, O_RDONLY);
628e5a5dd6cSEnji Cooper 
629e5a5dd6cSEnji Cooper   cap_rights_t r_rwlstat;
630e5a5dd6cSEnji Cooper   cap_rights_init(&r_rwlstat, CAP_READ, CAP_WRITE, CAP_LOOKUP, CAP_FSTAT);
631e5a5dd6cSEnji Cooper   cap_rights_t r_rwlstatns;
632e5a5dd6cSEnji Cooper   cap_rights_init(&r_rwlstatns, CAP_READ, CAP_WRITE, CAP_LOOKUP, CAP_FSTAT, CAP_SETNS);
633e5a5dd6cSEnji Cooper 
634e5a5dd6cSEnji Cooper   int cap_fd = dup(ns_fd);
635e5a5dd6cSEnji Cooper   EXPECT_OK(cap_fd);
636e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(cap_fd, &r_rwlstat));
637e5a5dd6cSEnji Cooper   int cap_fd_setns = dup(ns_fd);
638e5a5dd6cSEnji Cooper   EXPECT_OK(cap_fd_setns);
639e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(cap_fd_setns, &r_rwlstatns));
640e5a5dd6cSEnji Cooper   EXPECT_NOTCAPABLE(setns(cap_fd, CLONE_NEWUTS));
641e5a5dd6cSEnji Cooper   EXPECT_OK(setns(cap_fd_setns, CLONE_NEWUTS));
642e5a5dd6cSEnji Cooper 
643e5a5dd6cSEnji Cooper   EXPECT_OK(cap_enter());  // Enter capability mode.
644e5a5dd6cSEnji Cooper 
645e5a5dd6cSEnji Cooper   // No setns(2) but unshare(2) is allowed.
646e5a5dd6cSEnji Cooper   EXPECT_CAPMODE(setns(ns_fd, CLONE_NEWUTS));
647e5a5dd6cSEnji Cooper   EXPECT_OK(unshare(CLONE_NEWUTS));
648e5a5dd6cSEnji Cooper }
649e5a5dd6cSEnji Cooper 
SendFD(int fd,int over)650e5a5dd6cSEnji Cooper static void SendFD(int fd, int over) {
651e5a5dd6cSEnji Cooper   struct msghdr mh;
652e5a5dd6cSEnji Cooper   mh.msg_name = NULL;  // No address needed
653e5a5dd6cSEnji Cooper   mh.msg_namelen = 0;
654e5a5dd6cSEnji Cooper   char buffer1[1024];
655e5a5dd6cSEnji Cooper   struct iovec iov[1];
656e5a5dd6cSEnji Cooper   iov[0].iov_base = buffer1;
657e5a5dd6cSEnji Cooper   iov[0].iov_len = sizeof(buffer1);
658e5a5dd6cSEnji Cooper   mh.msg_iov = iov;
659e5a5dd6cSEnji Cooper   mh.msg_iovlen = 1;
660e5a5dd6cSEnji Cooper   char buffer2[1024];
661e5a5dd6cSEnji Cooper   mh.msg_control = buffer2;
662e5a5dd6cSEnji Cooper   mh.msg_controllen = CMSG_LEN(sizeof(int));
663e5a5dd6cSEnji Cooper   struct cmsghdr *cmptr = CMSG_FIRSTHDR(&mh);
664e5a5dd6cSEnji Cooper   cmptr->cmsg_level = SOL_SOCKET;
665e5a5dd6cSEnji Cooper   cmptr->cmsg_type = SCM_RIGHTS;
666e5a5dd6cSEnji Cooper   cmptr->cmsg_len = CMSG_LEN(sizeof(int));
667e5a5dd6cSEnji Cooper   *(int *)CMSG_DATA(cmptr) = fd;
668e5a5dd6cSEnji Cooper   buffer1[0] = 0;
669e5a5dd6cSEnji Cooper   iov[0].iov_len = 1;
670e5a5dd6cSEnji Cooper   int rc = sendmsg(over, &mh, 0);
671e5a5dd6cSEnji Cooper   EXPECT_OK(rc);
672e5a5dd6cSEnji Cooper }
673e5a5dd6cSEnji Cooper 
ReceiveFD(int over)674e5a5dd6cSEnji Cooper static int ReceiveFD(int over) {
675e5a5dd6cSEnji Cooper   struct msghdr mh;
676e5a5dd6cSEnji Cooper   mh.msg_name = NULL;  // No address needed
677e5a5dd6cSEnji Cooper   mh.msg_namelen = 0;
678e5a5dd6cSEnji Cooper   char buffer1[1024];
679e5a5dd6cSEnji Cooper   struct iovec iov[1];
680e5a5dd6cSEnji Cooper   iov[0].iov_base = buffer1;
681e5a5dd6cSEnji Cooper   iov[0].iov_len = sizeof(buffer1);
682e5a5dd6cSEnji Cooper   mh.msg_iov = iov;
683e5a5dd6cSEnji Cooper   mh.msg_iovlen = 1;
684e5a5dd6cSEnji Cooper   char buffer2[1024];
685e5a5dd6cSEnji Cooper   mh.msg_control = buffer2;
686e5a5dd6cSEnji Cooper   mh.msg_controllen = sizeof(buffer2);
687e5a5dd6cSEnji Cooper   int rc = recvmsg(over, &mh, 0);
688e5a5dd6cSEnji Cooper   EXPECT_OK(rc);
689e5a5dd6cSEnji Cooper   EXPECT_LE(CMSG_LEN(sizeof(int)), mh.msg_controllen);
690e5a5dd6cSEnji Cooper   struct cmsghdr *cmptr = CMSG_FIRSTHDR(&mh);
691e5a5dd6cSEnji Cooper   int fd = *(int*)CMSG_DATA(cmptr);
692e5a5dd6cSEnji Cooper   EXPECT_EQ(CMSG_LEN(sizeof(int)), cmptr->cmsg_len);
693e5a5dd6cSEnji Cooper   cmptr = CMSG_NXTHDR(&mh, cmptr);
694e5a5dd6cSEnji Cooper   EXPECT_TRUE(cmptr == NULL);
695e5a5dd6cSEnji Cooper   return fd;
696e5a5dd6cSEnji Cooper }
697e5a5dd6cSEnji Cooper 
698e5a5dd6cSEnji Cooper static int shared_pd = -1;
699e5a5dd6cSEnji Cooper static int shared_sock_fds[2];
700e5a5dd6cSEnji Cooper 
ChildFunc(void * arg)701e5a5dd6cSEnji Cooper static int ChildFunc(void *arg) {
702e5a5dd6cSEnji Cooper   // This function is running in a new PID namespace, and so is pid 1.
703e5a5dd6cSEnji Cooper   if (verbose) fprintf(stderr, "    ChildFunc: pid=%d, ppid=%d\n", getpid_(), getppid());
704e5a5dd6cSEnji Cooper   EXPECT_EQ(1, getpid_());
705e5a5dd6cSEnji Cooper   EXPECT_EQ(0, getppid());
706e5a5dd6cSEnji Cooper 
707e5a5dd6cSEnji Cooper   // The shared process descriptor is outside our namespace, so we cannot
708e5a5dd6cSEnji Cooper   // get its pid.
709e5a5dd6cSEnji Cooper   if (verbose) fprintf(stderr, "    ChildFunc: shared_pd=%d\n", shared_pd);
710e5a5dd6cSEnji Cooper   pid_t shared_child = -1;
711e5a5dd6cSEnji Cooper   EXPECT_OK(pdgetpid(shared_pd, &shared_child));
712e5a5dd6cSEnji Cooper   if (verbose) fprintf(stderr, "    ChildFunc: corresponding pid=%d\n", shared_child);
713e5a5dd6cSEnji Cooper   EXPECT_EQ(0, shared_child);
714e5a5dd6cSEnji Cooper 
715e5a5dd6cSEnji Cooper   // But we can pdkill() it even so.
716e5a5dd6cSEnji Cooper   if (verbose) fprintf(stderr, "    ChildFunc: call pdkill(pd=%d)\n", shared_pd);
717e5a5dd6cSEnji Cooper   EXPECT_OK(pdkill(shared_pd, SIGINT));
718e5a5dd6cSEnji Cooper 
719e5a5dd6cSEnji Cooper   int pd;
720e5a5dd6cSEnji Cooper   pid_t child = pdfork(&pd, 0);
721e5a5dd6cSEnji Cooper   EXPECT_OK(child);
722e5a5dd6cSEnji Cooper   if (child == 0) {
723e5a5dd6cSEnji Cooper     // Child: expect pid 2.
724e5a5dd6cSEnji Cooper     if (verbose) fprintf(stderr, "      child of ChildFunc: pid=%d, ppid=%d\n", getpid_(), getppid());
725e5a5dd6cSEnji Cooper     EXPECT_EQ(2, getpid_());
726e5a5dd6cSEnji Cooper     EXPECT_EQ(1, getppid());
727e5a5dd6cSEnji Cooper     while (true) {
728e5a5dd6cSEnji Cooper       if (verbose) fprintf(stderr, "      child of ChildFunc: \"I aten't dead\"\n");
729e5a5dd6cSEnji Cooper       sleep(1);
730e5a5dd6cSEnji Cooper     }
731e5a5dd6cSEnji Cooper     exit(0);
732e5a5dd6cSEnji Cooper   }
733e5a5dd6cSEnji Cooper   EXPECT_EQ(2, child);
734e5a5dd6cSEnji Cooper   EXPECT_PID_ALIVE(child);
735e5a5dd6cSEnji Cooper   if (verbose) fprintf(stderr, "    ChildFunc: pdfork() -> pd=%d, corresponding pid=%d state='%c'\n",
736e5a5dd6cSEnji Cooper                        pd, child, ProcessState(child));
737e5a5dd6cSEnji Cooper 
738e5a5dd6cSEnji Cooper   pid_t pid;
739e5a5dd6cSEnji Cooper   EXPECT_OK(pdgetpid(pd, &pid));
740e5a5dd6cSEnji Cooper   EXPECT_EQ(child, pid);
741e5a5dd6cSEnji Cooper 
742e5a5dd6cSEnji Cooper   sleep(2);
743e5a5dd6cSEnji Cooper 
744e5a5dd6cSEnji Cooper   // Send the process descriptor over UNIX domain socket back to parent.
745e5a5dd6cSEnji Cooper   SendFD(pd, shared_sock_fds[1]);
746e5a5dd6cSEnji Cooper 
747e5a5dd6cSEnji Cooper   // Wait for death of (grand)child, killed by our parent.
748e5a5dd6cSEnji Cooper   if (verbose) fprintf(stderr, "    ChildFunc: wait on pid=%d\n", child);
749e5a5dd6cSEnji Cooper   int status;
750e5a5dd6cSEnji Cooper   EXPECT_EQ(child, wait4(child, &status, __WALL, NULL));
751e5a5dd6cSEnji Cooper 
752e5a5dd6cSEnji Cooper   if (verbose) fprintf(stderr, "    ChildFunc: return 0\n");
753e5a5dd6cSEnji Cooper   return 0;
754e5a5dd6cSEnji Cooper }
755e5a5dd6cSEnji Cooper 
756e5a5dd6cSEnji Cooper #define STACK_SIZE (1024 * 1024)
757e5a5dd6cSEnji Cooper static char child_stack[STACK_SIZE];
758e5a5dd6cSEnji Cooper 
759b6973c8fSAlex Richardson // TODO(drysdale): fork into a user namespace first so GTEST_SKIP_IF_NOT_ROOT can be removed.
TEST(Linux,PidNamespacePdForkIfRoot)760b6973c8fSAlex Richardson TEST(Linux, PidNamespacePdForkIfRoot) {
761b6973c8fSAlex Richardson   GTEST_SKIP_IF_NOT_ROOT();
762e5a5dd6cSEnji Cooper   // Pass process descriptors in both directions across a PID namespace boundary.
763e5a5dd6cSEnji Cooper   // pdfork() off a child before we start, holding its process descriptor in a global
764e5a5dd6cSEnji Cooper   // variable that's accessible to children.
765e5a5dd6cSEnji Cooper   pid_t firstborn = pdfork(&shared_pd, 0);
766e5a5dd6cSEnji Cooper   EXPECT_OK(firstborn);
767e5a5dd6cSEnji Cooper   if (firstborn == 0) {
768e5a5dd6cSEnji Cooper     while (true) {
769e5a5dd6cSEnji Cooper       if (verbose) fprintf(stderr, "  Firstborn: \"I aten't dead\"\n");
770e5a5dd6cSEnji Cooper       sleep(1);
771e5a5dd6cSEnji Cooper     }
772e5a5dd6cSEnji Cooper     exit(0);
773e5a5dd6cSEnji Cooper   }
774e5a5dd6cSEnji Cooper   EXPECT_PID_ALIVE(firstborn);
775e5a5dd6cSEnji Cooper   if (verbose) fprintf(stderr, "Parent: pre-pdfork()ed pd=%d, pid=%d state='%c'\n",
776e5a5dd6cSEnji Cooper                        shared_pd, firstborn, ProcessState(firstborn));
777e5a5dd6cSEnji Cooper   sleep(2);
778e5a5dd6cSEnji Cooper 
779e5a5dd6cSEnji Cooper   // Prepare sockets to communicate with child process.
780e5a5dd6cSEnji Cooper   EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, shared_sock_fds));
781e5a5dd6cSEnji Cooper 
782e5a5dd6cSEnji Cooper   // Clone into a child process with a new pid namespace.
783e5a5dd6cSEnji Cooper   pid_t child = clone(ChildFunc, child_stack + STACK_SIZE,
784e5a5dd6cSEnji Cooper                       CLONE_FILES|CLONE_NEWPID|SIGCHLD, NULL);
785e5a5dd6cSEnji Cooper   EXPECT_OK(child);
786e5a5dd6cSEnji Cooper   EXPECT_PID_ALIVE(child);
787e5a5dd6cSEnji Cooper   if (verbose) fprintf(stderr, "Parent: child is %d state='%c'\n", child, ProcessState(child));
788e5a5dd6cSEnji Cooper 
789e5a5dd6cSEnji Cooper   // Ensure the child runs.  First thing it does is to kill our firstborn, using shared_pd.
790e5a5dd6cSEnji Cooper   sleep(1);
791e5a5dd6cSEnji Cooper   EXPECT_PID_DEAD(firstborn);
792e5a5dd6cSEnji Cooper 
793e5a5dd6cSEnji Cooper   // But we can still retrieve firstborn's PID, as it's not been reaped yet.
794e5a5dd6cSEnji Cooper   pid_t child0;
795e5a5dd6cSEnji Cooper   EXPECT_OK(pdgetpid(shared_pd, &child0));
796e5a5dd6cSEnji Cooper   EXPECT_EQ(firstborn, child0);
797e5a5dd6cSEnji Cooper   if (verbose) fprintf(stderr, "Parent: check on firstborn: pdgetpid(pd=%d) -> child=%d state='%c'\n",
798e5a5dd6cSEnji Cooper                        shared_pd, child0, ProcessState(child0));
799e5a5dd6cSEnji Cooper 
800e5a5dd6cSEnji Cooper   // Now reap it.
801e5a5dd6cSEnji Cooper   int status;
802e5a5dd6cSEnji Cooper   EXPECT_EQ(firstborn, waitpid(firstborn, &status, __WALL));
803e5a5dd6cSEnji Cooper 
804e5a5dd6cSEnji Cooper   // Get the process descriptor of the child-of-child via socket transfer.
805e5a5dd6cSEnji Cooper   int grandchild_pd = ReceiveFD(shared_sock_fds[0]);
806e5a5dd6cSEnji Cooper 
807e5a5dd6cSEnji Cooper   // Our notion of the pid associated with the grandchild is in the main PID namespace.
808e5a5dd6cSEnji Cooper   pid_t grandchild;
809e5a5dd6cSEnji Cooper   EXPECT_OK(pdgetpid(grandchild_pd, &grandchild));
810e5a5dd6cSEnji Cooper   EXPECT_NE(2, grandchild);
811e5a5dd6cSEnji Cooper   if (verbose) fprintf(stderr, "Parent: pre-pdkill:  pdgetpid(grandchild_pd=%d) -> grandchild=%d state='%c'\n",
812e5a5dd6cSEnji Cooper                        grandchild_pd, grandchild, ProcessState(grandchild));
813e5a5dd6cSEnji Cooper   EXPECT_PID_ALIVE(grandchild);
814e5a5dd6cSEnji Cooper 
815e5a5dd6cSEnji Cooper   // Kill the grandchild via the process descriptor.
816e5a5dd6cSEnji Cooper   EXPECT_OK(pdkill(grandchild_pd, SIGINT));
817e5a5dd6cSEnji Cooper   usleep(10000);
818e5a5dd6cSEnji Cooper   if (verbose) fprintf(stderr, "Parent: post-pdkill: pdgetpid(grandchild_pd=%d) -> grandchild=%d state='%c'\n",
819e5a5dd6cSEnji Cooper                        grandchild_pd, grandchild, ProcessState(grandchild));
820e5a5dd6cSEnji Cooper   EXPECT_PID_DEAD(grandchild);
821e5a5dd6cSEnji Cooper 
822e5a5dd6cSEnji Cooper   sleep(2);
823e5a5dd6cSEnji Cooper 
824e5a5dd6cSEnji Cooper   // Wait for the child.
825e5a5dd6cSEnji Cooper   EXPECT_EQ(child, waitpid(child, &status, WNOHANG));
826e5a5dd6cSEnji Cooper   int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
827e5a5dd6cSEnji Cooper   EXPECT_EQ(0, rc);
828e5a5dd6cSEnji Cooper 
829e5a5dd6cSEnji Cooper   close(shared_sock_fds[0]);
830e5a5dd6cSEnji Cooper   close(shared_sock_fds[1]);
831e5a5dd6cSEnji Cooper   close(shared_pd);
832e5a5dd6cSEnji Cooper   close(grandchild_pd);
833e5a5dd6cSEnji Cooper }
834e5a5dd6cSEnji Cooper 
NSInit(void * data)835e5a5dd6cSEnji Cooper int NSInit(void *data) {
836e5a5dd6cSEnji Cooper   // This function is running in a new PID namespace, and so is pid 1.
837e5a5dd6cSEnji Cooper   if (verbose) fprintf(stderr, "  NSInit: pid=%d, ppid=%d\n", getpid_(), getppid());
838e5a5dd6cSEnji Cooper   EXPECT_EQ(1, getpid_());
839e5a5dd6cSEnji Cooper   EXPECT_EQ(0, getppid());
840e5a5dd6cSEnji Cooper 
841e5a5dd6cSEnji Cooper   int pd;
842e5a5dd6cSEnji Cooper   pid_t child = pdfork(&pd, 0);
843e5a5dd6cSEnji Cooper   EXPECT_OK(child);
844e5a5dd6cSEnji Cooper   if (child == 0) {
845e5a5dd6cSEnji Cooper     // Child: loop forever until terminated.
846e5a5dd6cSEnji Cooper     if (verbose) fprintf(stderr, "    child of NSInit: pid=%d, ppid=%d\n", getpid_(), getppid());
847e5a5dd6cSEnji Cooper     while (true) {
848e5a5dd6cSEnji Cooper       if (verbose) fprintf(stderr, "    child of NSInit: \"I aten't dead\"\n");
849e5a5dd6cSEnji Cooper       usleep(100000);
850e5a5dd6cSEnji Cooper     }
851e5a5dd6cSEnji Cooper     exit(0);
852e5a5dd6cSEnji Cooper   }
853e5a5dd6cSEnji Cooper   EXPECT_EQ(2, child);
854e5a5dd6cSEnji Cooper   EXPECT_PID_ALIVE(child);
855e5a5dd6cSEnji Cooper   if (verbose) fprintf(stderr, "  NSInit: pdfork() -> pd=%d, corresponding pid=%d state='%c'\n",
856e5a5dd6cSEnji Cooper                        pd, child, ProcessState(child));
857e5a5dd6cSEnji Cooper   sleep(1);
858e5a5dd6cSEnji Cooper 
859e5a5dd6cSEnji Cooper   // Send the process descriptor over UNIX domain socket back to parent.
860e5a5dd6cSEnji Cooper   SendFD(pd, shared_sock_fds[1]);
861e5a5dd6cSEnji Cooper   close(pd);
862e5a5dd6cSEnji Cooper 
863e5a5dd6cSEnji Cooper   // Wait for a byte back in the other direction.
864e5a5dd6cSEnji Cooper   int value;
865e5a5dd6cSEnji Cooper   if (verbose) fprintf(stderr, "  NSInit: block waiting for value\n");
866e5a5dd6cSEnji Cooper   read(shared_sock_fds[1], &value, sizeof(value));
867e5a5dd6cSEnji Cooper 
868e5a5dd6cSEnji Cooper   if (verbose) fprintf(stderr, "  NSInit: return 0\n");
869e5a5dd6cSEnji Cooper   return 0;
870e5a5dd6cSEnji Cooper }
871e5a5dd6cSEnji Cooper 
TEST(Linux,DeadNSInitIfRoot)872b6973c8fSAlex Richardson TEST(Linux, DeadNSInitIfRoot) {
873b6973c8fSAlex Richardson   GTEST_SKIP_IF_NOT_ROOT();
874e5a5dd6cSEnji Cooper 
875e5a5dd6cSEnji Cooper   // Prepare sockets to communicate with child process.
876e5a5dd6cSEnji Cooper   EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, shared_sock_fds));
877e5a5dd6cSEnji Cooper 
878e5a5dd6cSEnji Cooper   // Clone into a child process with a new pid namespace.
879e5a5dd6cSEnji Cooper   pid_t child = clone(NSInit, child_stack + STACK_SIZE,
880e5a5dd6cSEnji Cooper                       CLONE_FILES|CLONE_NEWPID|SIGCHLD, NULL);
881e5a5dd6cSEnji Cooper   usleep(10000);
882e5a5dd6cSEnji Cooper   EXPECT_OK(child);
883e5a5dd6cSEnji Cooper   EXPECT_PID_ALIVE(child);
884e5a5dd6cSEnji Cooper   if (verbose) fprintf(stderr, "Parent: child is %d state='%c'\n", child, ProcessState(child));
885e5a5dd6cSEnji Cooper 
886e5a5dd6cSEnji Cooper   // Get the process descriptor of the child-of-child via socket transfer.
887e5a5dd6cSEnji Cooper   int grandchild_pd = ReceiveFD(shared_sock_fds[0]);
888e5a5dd6cSEnji Cooper   pid_t grandchild;
889e5a5dd6cSEnji Cooper   EXPECT_OK(pdgetpid(grandchild_pd, &grandchild));
890e5a5dd6cSEnji Cooper   if (verbose) fprintf(stderr, "Parent: grandchild is %d state='%c'\n", grandchild, ProcessState(grandchild));
891e5a5dd6cSEnji Cooper 
892e5a5dd6cSEnji Cooper   // Send an int to the child to trigger its termination.  Grandchild should also
893e5a5dd6cSEnji Cooper   // go, as its init process is gone.
894e5a5dd6cSEnji Cooper   int zero = 0;
895e5a5dd6cSEnji Cooper   if (verbose) fprintf(stderr, "Parent: write 0 to pipe\n");
896e5a5dd6cSEnji Cooper   write(shared_sock_fds[0], &zero, sizeof(zero));
897e5a5dd6cSEnji Cooper   EXPECT_PID_ZOMBIE(child);
898e5a5dd6cSEnji Cooper   EXPECT_PID_GONE(grandchild);
899e5a5dd6cSEnji Cooper 
900e5a5dd6cSEnji Cooper   // Wait for the child.
901e5a5dd6cSEnji Cooper   int status;
902e5a5dd6cSEnji Cooper   EXPECT_EQ(child, waitpid(child, &status, WNOHANG));
903e5a5dd6cSEnji Cooper   int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
904e5a5dd6cSEnji Cooper   EXPECT_EQ(0, rc);
905e5a5dd6cSEnji Cooper   EXPECT_PID_GONE(child);
906e5a5dd6cSEnji Cooper 
907e5a5dd6cSEnji Cooper   close(shared_sock_fds[0]);
908e5a5dd6cSEnji Cooper   close(shared_sock_fds[1]);
909e5a5dd6cSEnji Cooper   close(grandchild_pd);
910e5a5dd6cSEnji Cooper 
911e5a5dd6cSEnji Cooper   if (verbose) {
912e5a5dd6cSEnji Cooper     fprintf(stderr, "Parent: child %d in state='%c'\n", child, ProcessState(child));
913e5a5dd6cSEnji Cooper     fprintf(stderr, "Parent: grandchild %d in state='%c'\n", grandchild, ProcessState(grandchild));
914e5a5dd6cSEnji Cooper   }
915e5a5dd6cSEnji Cooper }
916e5a5dd6cSEnji Cooper 
TEST(Linux,DeadNSInit2IfRoot)917b6973c8fSAlex Richardson TEST(Linux, DeadNSInit2IfRoot) {
918b6973c8fSAlex Richardson   GTEST_SKIP_IF_NOT_ROOT();
919e5a5dd6cSEnji Cooper 
920e5a5dd6cSEnji Cooper   // Prepare sockets to communicate with child process.
921e5a5dd6cSEnji Cooper   EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, shared_sock_fds));
922e5a5dd6cSEnji Cooper 
923e5a5dd6cSEnji Cooper   // Clone into a child process with a new pid namespace.
924e5a5dd6cSEnji Cooper   pid_t child = clone(NSInit, child_stack + STACK_SIZE,
925e5a5dd6cSEnji Cooper                       CLONE_FILES|CLONE_NEWPID|SIGCHLD, NULL);
926e5a5dd6cSEnji Cooper   usleep(10000);
927e5a5dd6cSEnji Cooper   EXPECT_OK(child);
928e5a5dd6cSEnji Cooper   EXPECT_PID_ALIVE(child);
929e5a5dd6cSEnji Cooper   if (verbose) fprintf(stderr, "Parent: child is %d state='%c'\n", child, ProcessState(child));
930e5a5dd6cSEnji Cooper 
931e5a5dd6cSEnji Cooper   // Get the process descriptor of the child-of-child via socket transfer.
932e5a5dd6cSEnji Cooper   int grandchild_pd = ReceiveFD(shared_sock_fds[0]);
933e5a5dd6cSEnji Cooper   pid_t grandchild;
934e5a5dd6cSEnji Cooper   EXPECT_OK(pdgetpid(grandchild_pd, &grandchild));
935e5a5dd6cSEnji Cooper   if (verbose) fprintf(stderr, "Parent: grandchild is %d state='%c'\n", grandchild, ProcessState(grandchild));
936e5a5dd6cSEnji Cooper 
937e5a5dd6cSEnji Cooper   // Kill the grandchild
938e5a5dd6cSEnji Cooper   EXPECT_OK(pdkill(grandchild_pd, SIGINT));
939e5a5dd6cSEnji Cooper   usleep(10000);
940e5a5dd6cSEnji Cooper   EXPECT_PID_ZOMBIE(grandchild);
941e5a5dd6cSEnji Cooper   // Close the process descriptor, so there are now no procdesc references to grandchild.
942e5a5dd6cSEnji Cooper   close(grandchild_pd);
943e5a5dd6cSEnji Cooper 
944e5a5dd6cSEnji Cooper   // Send an int to the child to trigger its termination.  Grandchild should also
945e5a5dd6cSEnji Cooper   // go, as its init process is gone.
946e5a5dd6cSEnji Cooper   int zero = 0;
947e5a5dd6cSEnji Cooper   if (verbose) fprintf(stderr, "Parent: write 0 to pipe\n");
948e5a5dd6cSEnji Cooper   write(shared_sock_fds[0], &zero, sizeof(zero));
949e5a5dd6cSEnji Cooper   EXPECT_PID_ZOMBIE(child);
950e5a5dd6cSEnji Cooper   EXPECT_PID_GONE(grandchild);
951e5a5dd6cSEnji Cooper 
952e5a5dd6cSEnji Cooper   // Wait for the child.
953e5a5dd6cSEnji Cooper   int status;
954e5a5dd6cSEnji Cooper   EXPECT_EQ(child, waitpid(child, &status, WNOHANG));
955e5a5dd6cSEnji Cooper   int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
956e5a5dd6cSEnji Cooper   EXPECT_EQ(0, rc);
957e5a5dd6cSEnji Cooper 
958e5a5dd6cSEnji Cooper   close(shared_sock_fds[0]);
959e5a5dd6cSEnji Cooper   close(shared_sock_fds[1]);
960e5a5dd6cSEnji Cooper 
961e5a5dd6cSEnji Cooper   if (verbose) {
962e5a5dd6cSEnji Cooper     fprintf(stderr, "Parent: child %d in state='%c'\n", child, ProcessState(child));
963e5a5dd6cSEnji Cooper     fprintf(stderr, "Parent: grandchild %d in state='%c'\n", grandchild, ProcessState(grandchild));
964e5a5dd6cSEnji Cooper   }
965e5a5dd6cSEnji Cooper }
966e5a5dd6cSEnji Cooper 
967e5a5dd6cSEnji Cooper #ifdef __x86_64__
FORK_TEST(Linux,CheckHighWord)968e5a5dd6cSEnji Cooper FORK_TEST(Linux, CheckHighWord) {
969e5a5dd6cSEnji Cooper   EXPECT_OK(cap_enter());  // Enter capability mode.
970e5a5dd6cSEnji Cooper 
971e5a5dd6cSEnji Cooper   int rc = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
972e5a5dd6cSEnji Cooper   EXPECT_OK(rc);
973e5a5dd6cSEnji Cooper   EXPECT_EQ(1, rc);  // no_new_privs = 1
974e5a5dd6cSEnji Cooper 
975e5a5dd6cSEnji Cooper   // Set some of the high 32-bits of argument zero.
976e5a5dd6cSEnji Cooper   uint64_t big_cmd = PR_GET_NO_NEW_PRIVS | 0x100000000LL;
977e5a5dd6cSEnji Cooper   EXPECT_CAPMODE(syscall(__NR_prctl, big_cmd, 0, 0, 0, 0));
978e5a5dd6cSEnji Cooper }
979e5a5dd6cSEnji Cooper #endif
980e5a5dd6cSEnji Cooper 
FORK_TEST(Linux,PrctlOpenatBeneath)981e5a5dd6cSEnji Cooper FORK_TEST(Linux, PrctlOpenatBeneath) {
982e5a5dd6cSEnji Cooper   // Set no_new_privs = 1
983e5a5dd6cSEnji Cooper   EXPECT_OK(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
984e5a5dd6cSEnji Cooper   int rc = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
985e5a5dd6cSEnji Cooper   EXPECT_OK(rc);
986e5a5dd6cSEnji Cooper   EXPECT_EQ(1, rc);  // no_new_privs = 1
987e5a5dd6cSEnji Cooper 
988e5a5dd6cSEnji Cooper   // Set openat-beneath mode
989e5a5dd6cSEnji Cooper   EXPECT_OK(prctl(PR_SET_OPENAT_BENEATH, 1, 0, 0, 0));
990e5a5dd6cSEnji Cooper   rc = prctl(PR_GET_OPENAT_BENEATH, 0, 0, 0, 0);
991e5a5dd6cSEnji Cooper   EXPECT_OK(rc);
992e5a5dd6cSEnji Cooper   EXPECT_EQ(1, rc);  // openat_beneath = 1
993e5a5dd6cSEnji Cooper 
994e5a5dd6cSEnji Cooper   // Clear openat-beneath mode
995e5a5dd6cSEnji Cooper   EXPECT_OK(prctl(PR_SET_OPENAT_BENEATH, 0, 0, 0, 0));
996e5a5dd6cSEnji Cooper   rc = prctl(PR_GET_OPENAT_BENEATH, 0, 0, 0, 0);
997e5a5dd6cSEnji Cooper   EXPECT_OK(rc);
998e5a5dd6cSEnji Cooper   EXPECT_EQ(0, rc);  // openat_beneath = 0
999e5a5dd6cSEnji Cooper 
1000e5a5dd6cSEnji Cooper   EXPECT_OK(cap_enter());  // Enter capability mode
1001e5a5dd6cSEnji Cooper 
1002e5a5dd6cSEnji Cooper   // Expect to be in openat_beneath mode
1003e5a5dd6cSEnji Cooper   rc = prctl(PR_GET_OPENAT_BENEATH, 0, 0, 0, 0);
1004e5a5dd6cSEnji Cooper   EXPECT_OK(rc);
1005e5a5dd6cSEnji Cooper   EXPECT_EQ(1, rc);  // openat_beneath = 1
1006e5a5dd6cSEnji Cooper 
1007e5a5dd6cSEnji Cooper   // Expect this to be immutable.
1008e5a5dd6cSEnji Cooper   EXPECT_CAPMODE(prctl(PR_SET_OPENAT_BENEATH, 0, 0, 0, 0));
1009e5a5dd6cSEnji Cooper   rc = prctl(PR_GET_OPENAT_BENEATH, 0, 0, 0, 0);
1010e5a5dd6cSEnji Cooper   EXPECT_OK(rc);
1011e5a5dd6cSEnji Cooper   EXPECT_EQ(1, rc);  // openat_beneath = 1
1012e5a5dd6cSEnji Cooper 
1013e5a5dd6cSEnji Cooper }
1014e5a5dd6cSEnji Cooper 
FORK_TEST(Linux,NoNewPrivs)1015e5a5dd6cSEnji Cooper FORK_TEST(Linux, NoNewPrivs) {
1016e5a5dd6cSEnji Cooper   if (getuid() == 0) {
1017e5a5dd6cSEnji Cooper     // If root, drop CAP_SYS_ADMIN POSIX.1e capability.
1018e5a5dd6cSEnji Cooper     struct __user_cap_header_struct hdr;
1019e5a5dd6cSEnji Cooper     hdr.version = _LINUX_CAPABILITY_VERSION_3;
1020e5a5dd6cSEnji Cooper     hdr.pid = getpid_();
1021e5a5dd6cSEnji Cooper     struct __user_cap_data_struct data[3];
1022e5a5dd6cSEnji Cooper     EXPECT_OK(capget(&hdr, &data[0]));
1023e5a5dd6cSEnji Cooper     data[0].effective &= ~(1 << CAP_SYS_ADMIN);
1024e5a5dd6cSEnji Cooper     data[0].permitted &= ~(1 << CAP_SYS_ADMIN);
1025e5a5dd6cSEnji Cooper     data[0].inheritable &= ~(1 << CAP_SYS_ADMIN);
1026e5a5dd6cSEnji Cooper     EXPECT_OK(capset(&hdr, &data[0]));
1027e5a5dd6cSEnji Cooper   }
1028e5a5dd6cSEnji Cooper   int rc = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
1029e5a5dd6cSEnji Cooper   EXPECT_OK(rc);
1030e5a5dd6cSEnji Cooper   EXPECT_EQ(0, rc);  // no_new_privs == 0
1031e5a5dd6cSEnji Cooper 
1032e5a5dd6cSEnji Cooper   // Can't enter seccomp-bpf mode with no_new_privs == 0
1033e5a5dd6cSEnji Cooper   struct sock_filter filter[] = {
1034e5a5dd6cSEnji Cooper     BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
1035e5a5dd6cSEnji Cooper   };
1036e5a5dd6cSEnji Cooper   struct sock_fprog bpf;
1037e5a5dd6cSEnji Cooper   bpf.len = (sizeof(filter) / sizeof(filter[0]));
1038e5a5dd6cSEnji Cooper   bpf.filter = filter;
1039e5a5dd6cSEnji Cooper   rc = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bpf, 0, 0);
1040e5a5dd6cSEnji Cooper   EXPECT_EQ(-1, rc);
1041e5a5dd6cSEnji Cooper   EXPECT_EQ(EACCES, errno);
1042e5a5dd6cSEnji Cooper 
1043e5a5dd6cSEnji Cooper   // Set no_new_privs = 1
1044e5a5dd6cSEnji Cooper   EXPECT_OK(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
1045e5a5dd6cSEnji Cooper   rc = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
1046e5a5dd6cSEnji Cooper   EXPECT_OK(rc);
1047e5a5dd6cSEnji Cooper   EXPECT_EQ(1, rc);  // no_new_privs = 1
1048e5a5dd6cSEnji Cooper 
1049e5a5dd6cSEnji Cooper   // Can now turn on seccomp mode
1050e5a5dd6cSEnji Cooper   EXPECT_OK(prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bpf, 0, 0));
1051e5a5dd6cSEnji Cooper }
1052e5a5dd6cSEnji Cooper 
1053e5a5dd6cSEnji Cooper /* Macros for BPF generation */
1054e5a5dd6cSEnji Cooper #define BPF_RETURN_ERRNO(err) \
1055e5a5dd6cSEnji Cooper   BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO | (err & 0xFFFF))
1056e5a5dd6cSEnji Cooper #define BPF_KILL_PROCESS \
1057e5a5dd6cSEnji Cooper   BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL)
1058e5a5dd6cSEnji Cooper #define BPF_ALLOW \
1059e5a5dd6cSEnji Cooper   BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
1060e5a5dd6cSEnji Cooper #define EXAMINE_SYSCALL \
1061e5a5dd6cSEnji Cooper   BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct seccomp_data, nr))
1062e5a5dd6cSEnji Cooper #define ALLOW_SYSCALL(name) \
1063e5a5dd6cSEnji Cooper   BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_##name, 0, 1), \
1064e5a5dd6cSEnji Cooper   BPF_ALLOW
1065e5a5dd6cSEnji Cooper #define KILL_SYSCALL(name) \
1066e5a5dd6cSEnji Cooper   BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_##name, 0, 1), \
1067e5a5dd6cSEnji Cooper   BPF_KILL_PROCESS
1068e5a5dd6cSEnji Cooper #define FAIL_SYSCALL(name, err) \
1069e5a5dd6cSEnji Cooper   BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_##name, 0, 1), \
1070e5a5dd6cSEnji Cooper   BPF_RETURN_ERRNO(err)
1071e5a5dd6cSEnji Cooper 
TEST(Linux,CapModeWithBPF)1072e5a5dd6cSEnji Cooper TEST(Linux, CapModeWithBPF) {
1073e5a5dd6cSEnji Cooper   pid_t child = fork();
1074e5a5dd6cSEnji Cooper   EXPECT_OK(child);
1075e5a5dd6cSEnji Cooper   if (child == 0) {
1076e5a5dd6cSEnji Cooper     int fd = open(TmpFile("cap_bpf_capmode"), O_CREAT|O_RDWR, 0644);
1077e5a5dd6cSEnji Cooper     cap_rights_t rights;
1078e5a5dd6cSEnji Cooper     cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_FSYNC);
1079e5a5dd6cSEnji Cooper     EXPECT_OK(cap_rights_limit(fd, &rights));
1080e5a5dd6cSEnji Cooper 
1081e5a5dd6cSEnji Cooper     struct sock_filter filter[] = { EXAMINE_SYSCALL,
1082e5a5dd6cSEnji Cooper                                     FAIL_SYSCALL(fchmod, ENOMEM),
1083e5a5dd6cSEnji Cooper                                     FAIL_SYSCALL(fstat, ENOEXEC),
1084e5a5dd6cSEnji Cooper                                     ALLOW_SYSCALL(close),
1085e5a5dd6cSEnji Cooper                                     KILL_SYSCALL(fsync),
1086e5a5dd6cSEnji Cooper                                     BPF_ALLOW };
1087e5a5dd6cSEnji Cooper     struct sock_fprog bpf = {.len = (sizeof(filter) / sizeof(filter[0])),
1088e5a5dd6cSEnji Cooper                              .filter = filter};
1089e5a5dd6cSEnji Cooper     // Set up seccomp-bpf first.
1090e5a5dd6cSEnji Cooper     EXPECT_OK(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
1091e5a5dd6cSEnji Cooper     EXPECT_OK(prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bpf, 0, 0));
1092e5a5dd6cSEnji Cooper 
1093e5a5dd6cSEnji Cooper     EXPECT_OK(cap_enter());  // Enter capability mode.
1094e5a5dd6cSEnji Cooper 
1095e5a5dd6cSEnji Cooper     // fchmod is allowed by Capsicum, but failed by BPF.
1096e5a5dd6cSEnji Cooper     EXPECT_SYSCALL_FAIL(ENOMEM, fchmod(fd, 0644));
1097e5a5dd6cSEnji Cooper     // open is allowed by BPF, but failed by Capsicum
1098e5a5dd6cSEnji Cooper     EXPECT_SYSCALL_FAIL(ECAPMODE, open(TmpFile("cap_bpf_capmode"), O_RDONLY));
1099e5a5dd6cSEnji Cooper     // fstat is failed by both BPF and Capsicum; tie-break is on errno
1100e5a5dd6cSEnji Cooper     struct stat buf;
1101e5a5dd6cSEnji Cooper     EXPECT_SYSCALL_FAIL(ENOEXEC, fstat(fd, &buf));
1102e5a5dd6cSEnji Cooper     // fsync is allowed by Capsicum, but BPF's SIGSYS generation take precedence
1103e5a5dd6cSEnji Cooper     fsync(fd);  // terminate with unhandled SIGSYS
1104e5a5dd6cSEnji Cooper     exit(0);
1105e5a5dd6cSEnji Cooper   }
1106e5a5dd6cSEnji Cooper   int status;
1107e5a5dd6cSEnji Cooper   EXPECT_EQ(child, waitpid(child, &status, 0));
1108e5a5dd6cSEnji Cooper   EXPECT_TRUE(WIFSIGNALED(status));
1109e5a5dd6cSEnji Cooper   EXPECT_EQ(SIGSYS, WTERMSIG(status));
1110e5a5dd6cSEnji Cooper   unlink(TmpFile("cap_bpf_capmode"));
1111e5a5dd6cSEnji Cooper }
1112e5a5dd6cSEnji Cooper 
TEST(Linux,AIO)1113e5a5dd6cSEnji Cooper TEST(Linux, AIO) {
1114e5a5dd6cSEnji Cooper   int fd = open(TmpFile("cap_aio"), O_CREAT|O_RDWR, 0644);
1115e5a5dd6cSEnji Cooper   EXPECT_OK(fd);
1116e5a5dd6cSEnji Cooper 
1117e5a5dd6cSEnji Cooper   cap_rights_t r_rs;
1118e5a5dd6cSEnji Cooper   cap_rights_init(&r_rs, CAP_READ, CAP_SEEK);
1119e5a5dd6cSEnji Cooper   cap_rights_t r_ws;
1120e5a5dd6cSEnji Cooper   cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK);
1121e5a5dd6cSEnji Cooper   cap_rights_t r_rwssync;
1122e5a5dd6cSEnji Cooper   cap_rights_init(&r_rwssync, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_FSYNC);
1123e5a5dd6cSEnji Cooper 
1124e5a5dd6cSEnji Cooper   int cap_ro = dup(fd);
1125e5a5dd6cSEnji Cooper   EXPECT_OK(cap_ro);
1126e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(cap_ro, &r_rs));
1127e5a5dd6cSEnji Cooper   EXPECT_OK(cap_ro);
1128e5a5dd6cSEnji Cooper   int cap_wo = dup(fd);
1129e5a5dd6cSEnji Cooper   EXPECT_OK(cap_wo);
1130e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(cap_wo, &r_ws));
1131e5a5dd6cSEnji Cooper   EXPECT_OK(cap_wo);
1132e5a5dd6cSEnji Cooper   int cap_all = dup(fd);
1133e5a5dd6cSEnji Cooper   EXPECT_OK(cap_all);
1134e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(cap_all, &r_rwssync));
1135e5a5dd6cSEnji Cooper   EXPECT_OK(cap_all);
1136e5a5dd6cSEnji Cooper 
1137e5a5dd6cSEnji Cooper   // Linux: io_setup, io_submit, io_getevents, io_cancel, io_destroy
1138e5a5dd6cSEnji Cooper   aio_context_t ctx = 0;
1139e5a5dd6cSEnji Cooper   EXPECT_OK(syscall(__NR_io_setup, 10, &ctx));
1140e5a5dd6cSEnji Cooper 
1141e5a5dd6cSEnji Cooper   unsigned char buffer[32] = {1, 2, 3, 4};
1142e5a5dd6cSEnji Cooper   struct iocb req;
1143e5a5dd6cSEnji Cooper   memset(&req, 0, sizeof(req));
1144e5a5dd6cSEnji Cooper   req.aio_reqprio = 0;
1145e5a5dd6cSEnji Cooper   req.aio_fildes = fd;
1146e5a5dd6cSEnji Cooper   uintptr_t bufaddr = (uintptr_t)buffer;
1147e5a5dd6cSEnji Cooper   req.aio_buf = (__u64)bufaddr;
1148e5a5dd6cSEnji Cooper   req.aio_nbytes = 4;
1149e5a5dd6cSEnji Cooper   req.aio_offset = 0;
1150e5a5dd6cSEnji Cooper   struct iocb* reqs[1] = {&req};
1151e5a5dd6cSEnji Cooper 
1152e5a5dd6cSEnji Cooper   // Write operation
1153e5a5dd6cSEnji Cooper   req.aio_lio_opcode = IOCB_CMD_PWRITE;
1154e5a5dd6cSEnji Cooper   req.aio_fildes = cap_ro;
1155e5a5dd6cSEnji Cooper   EXPECT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1,  reqs));
1156e5a5dd6cSEnji Cooper   req.aio_fildes = cap_wo;
1157e5a5dd6cSEnji Cooper   EXPECT_OK(syscall(__NR_io_submit, ctx, 1,  reqs));
1158e5a5dd6cSEnji Cooper 
1159e5a5dd6cSEnji Cooper   // Sync operation
1160e5a5dd6cSEnji Cooper   req.aio_lio_opcode = IOCB_CMD_FSYNC;
1161e5a5dd6cSEnji Cooper   EXPECT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs));
1162e5a5dd6cSEnji Cooper   req.aio_lio_opcode = IOCB_CMD_FDSYNC;
1163e5a5dd6cSEnji Cooper   EXPECT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs));
1164e5a5dd6cSEnji Cooper   // Even with CAP_FSYNC, turns out fsync/fdsync aren't implemented
1165e5a5dd6cSEnji Cooper   req.aio_fildes = cap_all;
1166e5a5dd6cSEnji Cooper   EXPECT_FAIL_NOT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs));
1167e5a5dd6cSEnji Cooper   req.aio_lio_opcode = IOCB_CMD_FSYNC;
1168e5a5dd6cSEnji Cooper   EXPECT_FAIL_NOT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs));
1169e5a5dd6cSEnji Cooper 
1170e5a5dd6cSEnji Cooper   // Read operation
1171e5a5dd6cSEnji Cooper   req.aio_lio_opcode = IOCB_CMD_PREAD;
1172e5a5dd6cSEnji Cooper   req.aio_fildes = cap_wo;
1173e5a5dd6cSEnji Cooper   EXPECT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1,  reqs));
1174e5a5dd6cSEnji Cooper   req.aio_fildes = cap_ro;
1175e5a5dd6cSEnji Cooper   EXPECT_OK(syscall(__NR_io_submit, ctx, 1,  reqs));
1176e5a5dd6cSEnji Cooper 
1177e5a5dd6cSEnji Cooper   EXPECT_OK(syscall(__NR_io_destroy, ctx));
1178e5a5dd6cSEnji Cooper 
1179e5a5dd6cSEnji Cooper   close(cap_all);
1180e5a5dd6cSEnji Cooper   close(cap_wo);
1181e5a5dd6cSEnji Cooper   close(cap_ro);
1182e5a5dd6cSEnji Cooper   close(fd);
1183e5a5dd6cSEnji Cooper   unlink(TmpFile("cap_aio"));
1184e5a5dd6cSEnji Cooper }
1185e5a5dd6cSEnji Cooper 
1186e5a5dd6cSEnji Cooper #ifndef KCMP_FILE
1187e5a5dd6cSEnji Cooper #define KCMP_FILE 0
1188e5a5dd6cSEnji Cooper #endif
TEST(Linux,KcmpIfAvailable)1189b6973c8fSAlex Richardson TEST(Linux, KcmpIfAvailable) {
1190e5a5dd6cSEnji Cooper   // This requires CONFIG_CHECKPOINT_RESTORE in kernel config.
1191e5a5dd6cSEnji Cooper   int fd = open("/etc/passwd", O_RDONLY);
1192e5a5dd6cSEnji Cooper   EXPECT_OK(fd);
1193e5a5dd6cSEnji Cooper   pid_t parent = getpid_();
1194e5a5dd6cSEnji Cooper 
1195e5a5dd6cSEnji Cooper   errno = 0;
1196e5a5dd6cSEnji Cooper   int rc = syscall(__NR_kcmp, parent, parent, KCMP_FILE, fd, fd);
1197e5a5dd6cSEnji Cooper   if (rc == -1 && errno == ENOSYS) {
1198b6973c8fSAlex Richardson     GTEST_SKIP() << "kcmp(2) gives -ENOSYS";
1199e5a5dd6cSEnji Cooper   }
1200e5a5dd6cSEnji Cooper 
1201e5a5dd6cSEnji Cooper   pid_t child = fork();
1202e5a5dd6cSEnji Cooper   if (child == 0) {
1203e5a5dd6cSEnji Cooper     // Child: limit rights on FD.
1204e5a5dd6cSEnji Cooper     child = getpid_();
1205e5a5dd6cSEnji Cooper     EXPECT_OK(syscall(__NR_kcmp, parent, child, KCMP_FILE, fd, fd));
1206e5a5dd6cSEnji Cooper     cap_rights_t rights;
1207e5a5dd6cSEnji Cooper     cap_rights_init(&rights, CAP_READ, CAP_WRITE);
1208e5a5dd6cSEnji Cooper     EXPECT_OK(cap_rights_limit(fd, &rights));
1209e5a5dd6cSEnji Cooper     // A capability wrapping a normal FD is different (from a kcmp(2) perspective)
1210e5a5dd6cSEnji Cooper     // than the original file.
1211e5a5dd6cSEnji Cooper     EXPECT_NE(0, syscall(__NR_kcmp, parent, child, KCMP_FILE, fd, fd));
1212e5a5dd6cSEnji Cooper     exit(HasFailure());
1213e5a5dd6cSEnji Cooper   }
1214e5a5dd6cSEnji Cooper   // Wait for the child.
1215e5a5dd6cSEnji Cooper   int status;
1216e5a5dd6cSEnji Cooper   EXPECT_EQ(child, waitpid(child, &status, 0));
1217e5a5dd6cSEnji Cooper   rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
1218e5a5dd6cSEnji Cooper   EXPECT_EQ(0, rc);
1219e5a5dd6cSEnji Cooper 
1220e5a5dd6cSEnji Cooper   close(fd);
1221e5a5dd6cSEnji Cooper }
1222e5a5dd6cSEnji Cooper 
TEST(Linux,ProcFS)1223e5a5dd6cSEnji Cooper TEST(Linux, ProcFS) {
1224e5a5dd6cSEnji Cooper   cap_rights_t rights;
1225e5a5dd6cSEnji Cooper   cap_rights_init(&rights, CAP_READ, CAP_SEEK);
1226e5a5dd6cSEnji Cooper   int fd = open("/etc/passwd", O_RDONLY);
1227e5a5dd6cSEnji Cooper   EXPECT_OK(fd);
1228e5a5dd6cSEnji Cooper   lseek(fd, 4, SEEK_SET);
1229e5a5dd6cSEnji Cooper   int cap = dup(fd);
1230e5a5dd6cSEnji Cooper   EXPECT_OK(cap);
1231e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(cap, &rights));
1232e5a5dd6cSEnji Cooper   pid_t me = getpid_();
1233e5a5dd6cSEnji Cooper 
1234e5a5dd6cSEnji Cooper   char buffer[1024];
1235e5a5dd6cSEnji Cooper   sprintf(buffer, "/proc/%d/fdinfo/%d", me, cap);
1236e5a5dd6cSEnji Cooper   int procfd = open(buffer, O_RDONLY);
1237e5a5dd6cSEnji Cooper   EXPECT_OK(procfd) << " failed to open " << buffer;
1238e5a5dd6cSEnji Cooper   if (procfd < 0) return;
1239e5a5dd6cSEnji Cooper   int proccap = dup(procfd);
1240e5a5dd6cSEnji Cooper   EXPECT_OK(proccap);
1241e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(proccap, &rights));
1242e5a5dd6cSEnji Cooper 
1243e5a5dd6cSEnji Cooper   EXPECT_OK(read(proccap, buffer, sizeof(buffer)));
1244e5a5dd6cSEnji Cooper   // The fdinfo should include the file pos of the underlying file
1245e5a5dd6cSEnji Cooper   EXPECT_NE((char*)NULL, strstr(buffer, "pos:\t4"));
1246e5a5dd6cSEnji Cooper   // ...and the rights of the Capsicum capability.
1247e5a5dd6cSEnji Cooper   EXPECT_NE((char*)NULL, strstr(buffer, "rights:\t0x"));
1248e5a5dd6cSEnji Cooper 
1249e5a5dd6cSEnji Cooper   close(procfd);
1250e5a5dd6cSEnji Cooper   close(proccap);
1251e5a5dd6cSEnji Cooper   close(cap);
1252e5a5dd6cSEnji Cooper   close(fd);
1253e5a5dd6cSEnji Cooper }
1254e5a5dd6cSEnji Cooper 
FORK_TEST(Linux,ProcessClocks)1255e5a5dd6cSEnji Cooper FORK_TEST(Linux, ProcessClocks) {
1256e5a5dd6cSEnji Cooper   pid_t self = getpid_();
1257e5a5dd6cSEnji Cooper   pid_t child = fork();
1258e5a5dd6cSEnji Cooper   EXPECT_OK(child);
1259e5a5dd6cSEnji Cooper   if (child == 0) {
1260e5a5dd6cSEnji Cooper     child = getpid_();
1261e5a5dd6cSEnji Cooper     usleep(100000);
1262e5a5dd6cSEnji Cooper     exit(0);
1263e5a5dd6cSEnji Cooper   }
1264e5a5dd6cSEnji Cooper 
1265e5a5dd6cSEnji Cooper   EXPECT_OK(cap_enter());  // Enter capability mode.
1266e5a5dd6cSEnji Cooper 
1267e5a5dd6cSEnji Cooper   // Nefariously build a clock ID for the child's CPU time.
1268e5a5dd6cSEnji Cooper   // This relies on knowledge of the internal layout of clock IDs.
1269e5a5dd6cSEnji Cooper   clockid_t child_clock;
1270e5a5dd6cSEnji Cooper   child_clock = ((~child) << 3) | 0x0;
1271e5a5dd6cSEnji Cooper   struct timespec ts;
1272e5a5dd6cSEnji Cooper   memset(&ts, 0, sizeof(ts));
1273e5a5dd6cSEnji Cooper 
1274e5a5dd6cSEnji Cooper   // TODO(drysdale): Should not be possible to retrieve info about a
1275e5a5dd6cSEnji Cooper   // different process, as the PID global namespace should be locked
1276e5a5dd6cSEnji Cooper   // down.
1277e5a5dd6cSEnji Cooper   EXPECT_OK(clock_gettime(child_clock, &ts));
1278e5a5dd6cSEnji Cooper   if (verbose) fprintf(stderr, "[parent: %d] clock_gettime(child=%d->0x%08x) is %ld.%09ld \n",
1279e5a5dd6cSEnji Cooper                        self, child, child_clock, (long)ts.tv_sec, (long)ts.tv_nsec);
1280e5a5dd6cSEnji Cooper 
1281e5a5dd6cSEnji Cooper   child_clock = ((~1) << 3) | 0x0;
1282e5a5dd6cSEnji Cooper   memset(&ts, 0, sizeof(ts));
1283e5a5dd6cSEnji Cooper   EXPECT_OK(clock_gettime(child_clock, &ts));
1284e5a5dd6cSEnji Cooper   if (verbose) fprintf(stderr, "[parent: %d] clock_gettime(init=1->0x%08x) is %ld.%09ld \n",
1285e5a5dd6cSEnji Cooper                        self, child_clock, (long)ts.tv_sec, (long)ts.tv_nsec);
1286e5a5dd6cSEnji Cooper 
1287e5a5dd6cSEnji Cooper   // Orphan the child.
1288e5a5dd6cSEnji Cooper }
1289e5a5dd6cSEnji Cooper 
TEST(Linux,SetLease)1290e5a5dd6cSEnji Cooper TEST(Linux, SetLease) {
1291e5a5dd6cSEnji Cooper   int fd_all = open(TmpFile("cap_lease"), O_CREAT|O_RDWR, 0644);
1292e5a5dd6cSEnji Cooper   EXPECT_OK(fd_all);
1293e5a5dd6cSEnji Cooper   int fd_rw = dup(fd_all);
1294e5a5dd6cSEnji Cooper   EXPECT_OK(fd_rw);
1295e5a5dd6cSEnji Cooper 
1296e5a5dd6cSEnji Cooper   cap_rights_t r_all;
1297e5a5dd6cSEnji Cooper   cap_rights_init(&r_all, CAP_READ, CAP_WRITE, CAP_FLOCK, CAP_FSIGNAL);
1298e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(fd_all, &r_all));
1299e5a5dd6cSEnji Cooper 
1300e5a5dd6cSEnji Cooper   cap_rights_t r_rw;
1301e5a5dd6cSEnji Cooper   cap_rights_init(&r_rw, CAP_READ, CAP_WRITE);
1302e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(fd_rw, &r_rw));
1303e5a5dd6cSEnji Cooper 
1304e5a5dd6cSEnji Cooper   EXPECT_NOTCAPABLE(fcntl(fd_rw, F_SETLEASE, F_WRLCK));
1305e5a5dd6cSEnji Cooper   EXPECT_NOTCAPABLE(fcntl(fd_rw, F_GETLEASE));
1306e5a5dd6cSEnji Cooper 
1307e5a5dd6cSEnji Cooper   if (!tmpdir_on_tmpfs) {  // tmpfs doesn't support leases
1308e5a5dd6cSEnji Cooper     EXPECT_OK(fcntl(fd_all, F_SETLEASE, F_WRLCK));
1309e5a5dd6cSEnji Cooper     EXPECT_EQ(F_WRLCK, fcntl(fd_all, F_GETLEASE));
1310e5a5dd6cSEnji Cooper 
1311e5a5dd6cSEnji Cooper     EXPECT_OK(fcntl(fd_all, F_SETLEASE, F_UNLCK, 0));
1312e5a5dd6cSEnji Cooper     EXPECT_EQ(F_UNLCK, fcntl(fd_all, F_GETLEASE));
1313e5a5dd6cSEnji Cooper   }
1314e5a5dd6cSEnji Cooper   close(fd_all);
1315e5a5dd6cSEnji Cooper   close(fd_rw);
1316e5a5dd6cSEnji Cooper   unlink(TmpFile("cap_lease"));
1317e5a5dd6cSEnji Cooper }
1318e5a5dd6cSEnji Cooper 
TEST(Linux,InvalidRightsSyscall)1319e5a5dd6cSEnji Cooper TEST(Linux, InvalidRightsSyscall) {
1320e5a5dd6cSEnji Cooper   int fd = open(TmpFile("cap_invalid_rights"), O_RDONLY|O_CREAT, 0644);
1321e5a5dd6cSEnji Cooper   EXPECT_OK(fd);
1322e5a5dd6cSEnji Cooper 
1323e5a5dd6cSEnji Cooper   cap_rights_t rights;
1324e5a5dd6cSEnji Cooper   cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_FCHMOD, CAP_FSTAT);
1325e5a5dd6cSEnji Cooper 
1326e5a5dd6cSEnji Cooper   // Use the raw syscall throughout.
1327e5a5dd6cSEnji Cooper   EXPECT_EQ(0, syscall(__NR_cap_rights_limit, fd, &rights, 0, 0, NULL, 0));
1328e5a5dd6cSEnji Cooper 
1329e5a5dd6cSEnji Cooper   // Directly access the syscall, and find all unseemly manner of use for it.
1330e5a5dd6cSEnji Cooper   //  - Invalid flags
1331e5a5dd6cSEnji Cooper   EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, 0, NULL, 1));
1332e5a5dd6cSEnji Cooper   EXPECT_EQ(EINVAL, errno);
1333e5a5dd6cSEnji Cooper   //  - Specify an fcntl subright, but no CAP_FCNTL set
1334e5a5dd6cSEnji Cooper   EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, CAP_FCNTL_GETFL, 0, NULL, 0));
1335e5a5dd6cSEnji Cooper   EXPECT_EQ(EINVAL, errno);
1336e5a5dd6cSEnji Cooper   //  - Specify an ioctl subright, but no CAP_IOCTL set
1337e5a5dd6cSEnji Cooper   unsigned int ioctl1 = 1;
1338e5a5dd6cSEnji Cooper   EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, 1, &ioctl1, 0));
1339e5a5dd6cSEnji Cooper   EXPECT_EQ(EINVAL, errno);
1340e5a5dd6cSEnji Cooper   //  - N ioctls, but null pointer passed
1341e5a5dd6cSEnji Cooper   EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, 1, NULL, 0));
1342e5a5dd6cSEnji Cooper   EXPECT_EQ(EINVAL, errno);
1343e5a5dd6cSEnji Cooper   //  - Invalid nioctls
1344e5a5dd6cSEnji Cooper   EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, -2, NULL, 0));
1345e5a5dd6cSEnji Cooper   EXPECT_EQ(EINVAL, errno);
1346e5a5dd6cSEnji Cooper   //  - Null primary rights
1347e5a5dd6cSEnji Cooper   EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, NULL, 0, 0, NULL, 0));
1348e5a5dd6cSEnji Cooper   EXPECT_EQ(EFAULT, errno);
1349e5a5dd6cSEnji Cooper   //  - Invalid index bitmask
1350e5a5dd6cSEnji Cooper   rights.cr_rights[0] |= 3ULL << 57;
1351e5a5dd6cSEnji Cooper   EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, 0, NULL, 0));
1352e5a5dd6cSEnji Cooper   EXPECT_EQ(EINVAL, errno);
1353e5a5dd6cSEnji Cooper   //  - Invalid version
1354e5a5dd6cSEnji Cooper   rights.cr_rights[0] |= 2ULL << 62;
1355e5a5dd6cSEnji Cooper   EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, 0, NULL, 0));
1356e5a5dd6cSEnji Cooper   EXPECT_EQ(EINVAL, errno);
1357e5a5dd6cSEnji Cooper 
1358e5a5dd6cSEnji Cooper   close(fd);
1359e5a5dd6cSEnji Cooper   unlink(TmpFile("cap_invalid_rights"));
1360e5a5dd6cSEnji Cooper }
1361e5a5dd6cSEnji Cooper 
1362b6973c8fSAlex Richardson FORK_TEST_ON(Linux, OpenByHandleAtIfRoot, TmpFile("cap_openbyhandle_testfile")) {
1363b6973c8fSAlex Richardson   GTEST_SKIP_IF_NOT_ROOT();
1364e5a5dd6cSEnji Cooper   int dir = open(tmpdir.c_str(), O_RDONLY);
1365e5a5dd6cSEnji Cooper   EXPECT_OK(dir);
1366e5a5dd6cSEnji Cooper   int fd = openat(dir, "cap_openbyhandle_testfile", O_RDWR|O_CREAT, 0644);
1367e5a5dd6cSEnji Cooper   EXPECT_OK(fd);
1368e5a5dd6cSEnji Cooper   const char* message = "Saved text";
1369e5a5dd6cSEnji Cooper   EXPECT_OK(write(fd, message, strlen(message)));
1370e5a5dd6cSEnji Cooper   close(fd);
1371e5a5dd6cSEnji Cooper 
1372e5a5dd6cSEnji Cooper   struct file_handle* fhandle = (struct file_handle*)malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ);
1373e5a5dd6cSEnji Cooper   fhandle->handle_bytes = MAX_HANDLE_SZ;
1374e5a5dd6cSEnji Cooper   int mount_id;
1375e5a5dd6cSEnji Cooper   EXPECT_OK(name_to_handle_at(dir, "cap_openbyhandle_testfile", fhandle,  &mount_id, 0));
1376e5a5dd6cSEnji Cooper 
1377e5a5dd6cSEnji Cooper   fd = open_by_handle_at(dir, fhandle, O_RDONLY);
1378e5a5dd6cSEnji Cooper   EXPECT_OK(fd);
1379e5a5dd6cSEnji Cooper   char buffer[200];
1380b6973c8fSAlex Richardson   ssize_t len = read(fd, buffer, 199);
1381b6973c8fSAlex Richardson   EXPECT_OK(len);
1382b6973c8fSAlex Richardson   EXPECT_EQ(std::string(message), std::string(buffer, len));
1383e5a5dd6cSEnji Cooper   close(fd);
1384e5a5dd6cSEnji Cooper 
1385e5a5dd6cSEnji Cooper   // Cannot issue open_by_handle_at after entering capability mode.
1386e5a5dd6cSEnji Cooper   cap_enter();
1387e5a5dd6cSEnji Cooper   EXPECT_CAPMODE(open_by_handle_at(dir, fhandle, O_RDONLY));
1388e5a5dd6cSEnji Cooper 
1389e5a5dd6cSEnji Cooper   close(dir);
1390e5a5dd6cSEnji Cooper }
1391e5a5dd6cSEnji Cooper 
getrandom_(void * buf,size_t buflen,unsigned int flags)1392e5a5dd6cSEnji Cooper int getrandom_(void *buf, size_t buflen, unsigned int flags) {
1393e5a5dd6cSEnji Cooper #ifdef __NR_getrandom
1394e5a5dd6cSEnji Cooper   return syscall(__NR_getrandom, buf, buflen, flags);
1395e5a5dd6cSEnji Cooper #else
1396e5a5dd6cSEnji Cooper   errno = ENOSYS;
1397e5a5dd6cSEnji Cooper   return -1;
1398e5a5dd6cSEnji Cooper #endif
1399e5a5dd6cSEnji Cooper }
1400e5a5dd6cSEnji Cooper 
1401e5a5dd6cSEnji Cooper #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
1402e5a5dd6cSEnji Cooper #include <linux/random.h>  // Requires 3.17 kernel
FORK_TEST(Linux,GetRandom)1403e5a5dd6cSEnji Cooper FORK_TEST(Linux, GetRandom) {
1404e5a5dd6cSEnji Cooper   EXPECT_OK(cap_enter());
1405e5a5dd6cSEnji Cooper   unsigned char buffer[1024];
1406e5a5dd6cSEnji Cooper   unsigned char buffer2[1024];
1407e5a5dd6cSEnji Cooper   EXPECT_OK(getrandom_(buffer, sizeof(buffer), GRND_NONBLOCK));
1408e5a5dd6cSEnji Cooper   EXPECT_OK(getrandom_(buffer2, sizeof(buffer2), GRND_NONBLOCK));
1409e5a5dd6cSEnji Cooper   EXPECT_NE(0, memcmp(buffer, buffer2, sizeof(buffer)));
1410e5a5dd6cSEnji Cooper }
1411e5a5dd6cSEnji Cooper #endif
1412e5a5dd6cSEnji Cooper 
memfd_create_(const char * name,unsigned int flags)1413e5a5dd6cSEnji Cooper int memfd_create_(const char *name, unsigned int flags) {
1414e5a5dd6cSEnji Cooper #ifdef __NR_memfd_create
1415e5a5dd6cSEnji Cooper   return syscall(__NR_memfd_create, name, flags);
1416e5a5dd6cSEnji Cooper #else
1417e5a5dd6cSEnji Cooper   errno = ENOSYS;
1418e5a5dd6cSEnji Cooper   return -1;
1419e5a5dd6cSEnji Cooper #endif
1420e5a5dd6cSEnji Cooper }
1421e5a5dd6cSEnji Cooper 
1422e5a5dd6cSEnji Cooper #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
1423e5a5dd6cSEnji Cooper #include <linux/memfd.h>  // Requires 3.17 kernel
TEST(Linux,MemFDDeathTestIfAvailable)1424b6973c8fSAlex Richardson TEST(Linux, MemFDDeathTestIfAvailable) {
1425e5a5dd6cSEnji Cooper   int memfd = memfd_create_("capsicum-test", MFD_ALLOW_SEALING);
1426e5a5dd6cSEnji Cooper   if (memfd == -1 && errno == ENOSYS) {
1427b6973c8fSAlex Richardson     GTEST_SKIP() << "memfd_create(2) gives -ENOSYS";
1428e5a5dd6cSEnji Cooper   }
1429e5a5dd6cSEnji Cooper   const int LEN = 16;
1430e5a5dd6cSEnji Cooper   EXPECT_OK(ftruncate(memfd, LEN));
1431e5a5dd6cSEnji Cooper   int memfd_ro = dup(memfd);
1432e5a5dd6cSEnji Cooper   int memfd_rw = dup(memfd);
1433e5a5dd6cSEnji Cooper   EXPECT_OK(memfd_ro);
1434e5a5dd6cSEnji Cooper   EXPECT_OK(memfd_rw);
1435e5a5dd6cSEnji Cooper   cap_rights_t rights;
1436e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(memfd_ro, cap_rights_init(&rights, CAP_MMAP_R, CAP_FSTAT)));
1437e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(memfd_rw, cap_rights_init(&rights, CAP_MMAP_RW, CAP_FCHMOD)));
1438e5a5dd6cSEnji Cooper 
1439e5a5dd6cSEnji Cooper   unsigned char *p_ro = (unsigned char *)mmap(NULL, LEN, PROT_READ, MAP_SHARED, memfd_ro, 0);
1440e5a5dd6cSEnji Cooper   EXPECT_NE((unsigned char *)MAP_FAILED, p_ro);
1441e5a5dd6cSEnji Cooper   unsigned char *p_rw = (unsigned char *)mmap(NULL, LEN, PROT_READ|PROT_WRITE, MAP_SHARED, memfd_rw, 0);
1442e5a5dd6cSEnji Cooper   EXPECT_NE((unsigned char *)MAP_FAILED, p_rw);
1443e5a5dd6cSEnji Cooper   EXPECT_EQ(MAP_FAILED,
1444e5a5dd6cSEnji Cooper             mmap(NULL, LEN, PROT_READ|PROT_WRITE, MAP_SHARED, memfd_ro, 0));
1445e5a5dd6cSEnji Cooper 
1446e5a5dd6cSEnji Cooper   *p_rw = 42;
1447e5a5dd6cSEnji Cooper   EXPECT_EQ(42, *p_ro);
1448e5a5dd6cSEnji Cooper   EXPECT_DEATH(*p_ro = 42, "");
1449e5a5dd6cSEnji Cooper 
1450e5a5dd6cSEnji Cooper #ifndef F_ADD_SEALS
1451e5a5dd6cSEnji Cooper   // Hack for when libc6 does not yet include the updated linux/fcntl.h from kernel 3.17
1452e5a5dd6cSEnji Cooper #define _F_LINUX_SPECIFIC_BASE F_SETLEASE
1453e5a5dd6cSEnji Cooper #define F_ADD_SEALS	(_F_LINUX_SPECIFIC_BASE + 9)
1454e5a5dd6cSEnji Cooper #define F_GET_SEALS	(_F_LINUX_SPECIFIC_BASE + 10)
1455e5a5dd6cSEnji Cooper #define F_SEAL_SEAL	0x0001	/* prevent further seals from being set */
1456e5a5dd6cSEnji Cooper #define F_SEAL_SHRINK	0x0002	/* prevent file from shrinking */
1457e5a5dd6cSEnji Cooper #define F_SEAL_GROW	0x0004	/* prevent file from growing */
1458e5a5dd6cSEnji Cooper #define F_SEAL_WRITE	0x0008	/* prevent writes */
1459e5a5dd6cSEnji Cooper #endif
1460e5a5dd6cSEnji Cooper 
1461e5a5dd6cSEnji Cooper   // Reading the seal information requires CAP_FSTAT.
1462e5a5dd6cSEnji Cooper   int seals = fcntl(memfd, F_GET_SEALS);
1463e5a5dd6cSEnji Cooper   EXPECT_OK(seals);
1464e5a5dd6cSEnji Cooper   if (verbose) fprintf(stderr, "seals are %08x on base fd\n", seals);
1465e5a5dd6cSEnji Cooper   int seals_ro = fcntl(memfd_ro, F_GET_SEALS);
1466e5a5dd6cSEnji Cooper   EXPECT_EQ(seals, seals_ro);
1467e5a5dd6cSEnji Cooper   if (verbose) fprintf(stderr, "seals are %08x on read-only fd\n", seals_ro);
1468e5a5dd6cSEnji Cooper   int seals_rw = fcntl(memfd_rw, F_GET_SEALS);
1469e5a5dd6cSEnji Cooper   EXPECT_NOTCAPABLE(seals_rw);
1470e5a5dd6cSEnji Cooper 
1471e5a5dd6cSEnji Cooper   // Fail to seal as a writable mapping exists.
1472e5a5dd6cSEnji Cooper   EXPECT_EQ(-1, fcntl(memfd_rw, F_ADD_SEALS, F_SEAL_WRITE));
1473e5a5dd6cSEnji Cooper   EXPECT_EQ(EBUSY, errno);
1474e5a5dd6cSEnji Cooper   *p_rw = 42;
1475e5a5dd6cSEnji Cooper 
1476e5a5dd6cSEnji Cooper   // Seal the rw version; need to unmap first.
1477e5a5dd6cSEnji Cooper   munmap(p_rw, LEN);
1478e5a5dd6cSEnji Cooper   munmap(p_ro, LEN);
1479e5a5dd6cSEnji Cooper   EXPECT_OK(fcntl(memfd_rw, F_ADD_SEALS, F_SEAL_WRITE));
1480e5a5dd6cSEnji Cooper 
1481e5a5dd6cSEnji Cooper   seals = fcntl(memfd, F_GET_SEALS);
1482e5a5dd6cSEnji Cooper   EXPECT_OK(seals);
1483e5a5dd6cSEnji Cooper   if (verbose) fprintf(stderr, "seals are %08x on base fd\n", seals);
1484e5a5dd6cSEnji Cooper   seals_ro = fcntl(memfd_ro, F_GET_SEALS);
1485e5a5dd6cSEnji Cooper   EXPECT_EQ(seals, seals_ro);
1486e5a5dd6cSEnji Cooper   if (verbose) fprintf(stderr, "seals are %08x on read-only fd\n", seals_ro);
1487e5a5dd6cSEnji Cooper 
1488e5a5dd6cSEnji Cooper   // Remove the CAP_FCHMOD right, can no longer add seals.
1489e5a5dd6cSEnji Cooper   EXPECT_OK(cap_rights_limit(memfd_rw, cap_rights_init(&rights, CAP_MMAP_RW)));
1490e5a5dd6cSEnji Cooper   EXPECT_NOTCAPABLE(fcntl(memfd_rw, F_ADD_SEALS, F_SEAL_WRITE));
1491e5a5dd6cSEnji Cooper 
1492e5a5dd6cSEnji Cooper   close(memfd);
1493e5a5dd6cSEnji Cooper   close(memfd_ro);
1494e5a5dd6cSEnji Cooper   close(memfd_rw);
1495e5a5dd6cSEnji Cooper }
1496e5a5dd6cSEnji Cooper #endif
1497e5a5dd6cSEnji Cooper 
1498e5a5dd6cSEnji Cooper #else
noop()1499e5a5dd6cSEnji Cooper void noop() {}
1500e5a5dd6cSEnji Cooper #endif
1501