1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3 * syscall_numbering.c - test calling the x86-64 kernel with various
4 * valid and invalid system call numbers.
5 *
6 * Copyright (c) 2018 Andrew Lutomirski
7 */
8
9 #define _GNU_SOURCE
10
11 #include <stdlib.h>
12 #include <stdio.h>
13 #include <stdbool.h>
14 #include <errno.h>
15 #include <unistd.h>
16 #include <string.h>
17 #include <fcntl.h>
18 #include <limits.h>
19 #include <signal.h>
20 #include <sysexits.h>
21
22 #include <sys/ptrace.h>
23 #include <sys/user.h>
24 #include <sys/wait.h>
25 #include <sys/mman.h>
26
27 #include <linux/ptrace.h>
28 #include "../kselftest.h"
29
30 /* Common system call numbers */
31 #define SYS_READ 0
32 #define SYS_WRITE 1
33 #define SYS_GETPID 39
34 /* x64-only system call numbers */
35 #define X64_IOCTL 16
36 #define X64_READV 19
37 #define X64_WRITEV 20
38 /* x32-only system call numbers (without X32_BIT) */
39 #define X32_IOCTL 514
40 #define X32_READV 515
41 #define X32_WRITEV 516
42
43 #define X32_BIT 0x40000000
44
45 static int nullfd = -1; /* File descriptor for /dev/null */
46 static bool with_x32; /* x32 supported on this kernel? */
47
48 enum ptrace_pass {
49 PTP_NOTHING,
50 PTP_GETREGS,
51 PTP_WRITEBACK,
52 PTP_FUZZRET,
53 PTP_FUZZHIGH,
54 PTP_INTNUM,
55 PTP_DONE
56 };
57
58 static const char * const ptrace_pass_name[] =
59 {
60 [PTP_NOTHING] = "just stop, no data read",
61 [PTP_GETREGS] = "only getregs",
62 [PTP_WRITEBACK] = "getregs, unmodified setregs",
63 [PTP_FUZZRET] = "modifying the default return",
64 [PTP_FUZZHIGH] = "clobbering the top 32 bits",
65 [PTP_INTNUM] = "sign-extending the syscall number",
66 };
67
68 /*
69 * Shared memory block between tracer and test
70 */
71 struct shared {
72 unsigned int nerr; /* Total error count */
73 unsigned int indent; /* Message indentation level */
74 enum ptrace_pass ptrace_pass;
75 bool probing_syscall; /* In probe_syscall() */
76 };
77 static volatile struct shared *sh;
78
offset(void)79 static inline unsigned int offset(void)
80 {
81 unsigned int level = sh ? sh->indent : 0;
82
83 return 8 + level * 4;
84 }
85
86 #define msg(lvl, fmt, ...) printf("%-*s" fmt, offset(), "[" #lvl "]", \
87 ## __VA_ARGS__)
88
89 #define run(fmt, ...) msg(RUN, fmt, ## __VA_ARGS__)
90 #define info(fmt, ...) msg(INFO, fmt, ## __VA_ARGS__)
91 #define ok(fmt, ...) msg(OK, fmt, ## __VA_ARGS__)
92
93 #define fail(fmt, ...) \
94 do { \
95 msg(FAIL, fmt, ## __VA_ARGS__); \
96 sh->nerr++; \
97 } while (0)
98
99 #define crit(fmt, ...) \
100 do { \
101 sh->indent = 0; \
102 msg(FAIL, fmt, ## __VA_ARGS__); \
103 msg(SKIP, "Unable to run test\n"); \
104 exit(EX_OSERR); \
105 } while (0)
106
107 /* Sentinel for ptrace-modified return value */
108 #define MODIFIED_BY_PTRACE -9999
109
110 /*
111 * Directly invokes the given syscall with nullfd as the first argument
112 * and the rest zero. Avoids involving glibc wrappers in case they ever
113 * end up intercepting some system calls for some reason, or modify
114 * the system call number itself.
115 */
probe_syscall(int msb,int lsb)116 static long long probe_syscall(int msb, int lsb)
117 {
118 register long long arg1 asm("rdi") = nullfd;
119 register long long arg2 asm("rsi") = 0;
120 register long long arg3 asm("rdx") = 0;
121 register long long arg4 asm("r10") = 0;
122 register long long arg5 asm("r8") = 0;
123 register long long arg6 asm("r9") = 0;
124 long long nr = ((long long)msb << 32) | (unsigned int)lsb;
125 long long ret;
126
127 /*
128 * We pass in an extra copy of the extended system call number
129 * in %rbx, so we can examine it from the ptrace handler without
130 * worrying about it being possibly modified. This is to test
131 * the validity of struct user regs.orig_rax a.k.a.
132 * struct pt_regs.orig_ax.
133 */
134 sh->probing_syscall = true;
135 asm volatile("syscall"
136 : "=a" (ret)
137 : "a" (nr), "b" (nr),
138 "r" (arg1), "r" (arg2), "r" (arg3),
139 "r" (arg4), "r" (arg5), "r" (arg6)
140 : "rcx", "r11", "memory", "cc");
141 sh->probing_syscall = false;
142
143 return ret;
144 }
145
syscall_str(int msb,int start,int end)146 static const char *syscall_str(int msb, int start, int end)
147 {
148 static char buf[64];
149 const char * const type = (start & X32_BIT) ? "x32" : "x64";
150 int lsb = start;
151
152 /*
153 * Improve readability by stripping the x32 bit, but round
154 * toward zero so we don't display -1 as -1073741825.
155 */
156 if (lsb < 0)
157 lsb |= X32_BIT;
158 else
159 lsb &= ~X32_BIT;
160
161 if (start == end)
162 snprintf(buf, sizeof buf, "%s syscall %d:%d",
163 type, msb, lsb);
164 else
165 snprintf(buf, sizeof buf, "%s syscalls %d:%d..%d",
166 type, msb, lsb, lsb + (end-start));
167
168 return buf;
169 }
170
_check_for(int msb,int start,int end,long long expect,const char * expect_str)171 static unsigned int _check_for(int msb, int start, int end, long long expect,
172 const char *expect_str)
173 {
174 unsigned int err = 0;
175
176 sh->indent++;
177 if (start != end)
178 sh->indent++;
179
180 for (int nr = start; nr <= end; nr++) {
181 long long ret = probe_syscall(msb, nr);
182
183 if (ret != expect) {
184 fail("%s returned %lld, but it should have returned %s\n",
185 syscall_str(msb, nr, nr),
186 ret, expect_str);
187 err++;
188 }
189 }
190
191 if (start != end)
192 sh->indent--;
193
194 if (err) {
195 if (start != end)
196 fail("%s had %u failure%s\n",
197 syscall_str(msb, start, end),
198 err, err == 1 ? "s" : "");
199 } else {
200 ok("%s returned %s as expected\n",
201 syscall_str(msb, start, end), expect_str);
202 }
203
204 sh->indent--;
205
206 return err;
207 }
208
209 #define check_for(msb,start,end,expect) \
210 _check_for(msb,start,end,expect,#expect)
211
check_zero(int msb,int nr)212 static bool check_zero(int msb, int nr)
213 {
214 return check_for(msb, nr, nr, 0);
215 }
216
check_enosys(int msb,int nr)217 static bool check_enosys(int msb, int nr)
218 {
219 return check_for(msb, nr, nr, -ENOSYS);
220 }
221
222 /*
223 * Anyone diagnosing a failure will want to know whether the kernel
224 * supports x32. Tell them. This can also be used to conditionalize
225 * tests based on existence or nonexistence of x32.
226 */
test_x32(void)227 static bool test_x32(void)
228 {
229 long long ret;
230 pid_t mypid = getpid();
231
232 run("Checking for x32 by calling x32 getpid()\n");
233 ret = probe_syscall(0, SYS_GETPID | X32_BIT);
234
235 sh->indent++;
236 if (ret == mypid) {
237 info("x32 is supported\n");
238 with_x32 = true;
239 } else if (ret == -ENOSYS) {
240 info("x32 is not supported\n");
241 with_x32 = false;
242 } else {
243 fail("x32 getpid() returned %lld, but it should have returned either %lld or -ENOSYS\n", ret, (long long)mypid);
244 with_x32 = false;
245 }
246 sh->indent--;
247 return with_x32;
248 }
249
test_syscalls_common(int msb)250 static void test_syscalls_common(int msb)
251 {
252 enum ptrace_pass pass = sh->ptrace_pass;
253
254 run("Checking some common syscalls as 64 bit\n");
255 check_zero(msb, SYS_READ);
256 check_zero(msb, SYS_WRITE);
257
258 run("Checking some 64-bit only syscalls as 64 bit\n");
259 check_zero(msb, X64_READV);
260 check_zero(msb, X64_WRITEV);
261
262 run("Checking out of range system calls\n");
263 check_for(msb, -64, -2, -ENOSYS);
264 if (pass >= PTP_FUZZRET)
265 check_for(msb, -1, -1, MODIFIED_BY_PTRACE);
266 else
267 check_for(msb, -1, -1, -ENOSYS);
268 check_for(msb, X32_BIT-64, X32_BIT-1, -ENOSYS);
269 check_for(msb, -64-X32_BIT, -1-X32_BIT, -ENOSYS);
270 check_for(msb, INT_MAX-64, INT_MAX-1, -ENOSYS);
271 }
272
test_syscalls_with_x32(int msb)273 static void test_syscalls_with_x32(int msb)
274 {
275 /*
276 * Syscalls 512-547 are "x32" syscalls. They are
277 * intended to be called with the x32 (0x40000000) bit
278 * set. Calling them without the x32 bit set is
279 * nonsense and should not work.
280 */
281 run("Checking x32 syscalls as 64 bit\n");
282 check_for(msb, 512, 547, -ENOSYS);
283
284 run("Checking some common syscalls as x32\n");
285 check_zero(msb, SYS_READ | X32_BIT);
286 check_zero(msb, SYS_WRITE | X32_BIT);
287
288 run("Checking some x32 syscalls as x32\n");
289 check_zero(msb, X32_READV | X32_BIT);
290 check_zero(msb, X32_WRITEV | X32_BIT);
291
292 run("Checking some 64-bit syscalls as x32\n");
293 check_enosys(msb, X64_IOCTL | X32_BIT);
294 check_enosys(msb, X64_READV | X32_BIT);
295 check_enosys(msb, X64_WRITEV | X32_BIT);
296 }
297
test_syscalls_without_x32(int msb)298 static void test_syscalls_without_x32(int msb)
299 {
300 run("Checking for absence of x32 system calls\n");
301 check_for(msb, 0 | X32_BIT, 999 | X32_BIT, -ENOSYS);
302 }
303
test_syscall_numbering(void)304 static void test_syscall_numbering(void)
305 {
306 static const int msbs[] = {
307 0, 1, -1, X32_BIT-1, X32_BIT, X32_BIT-1, -X32_BIT, INT_MAX,
308 INT_MIN, INT_MIN+1
309 };
310
311 sh->indent++;
312
313 /*
314 * The MSB is supposed to be ignored, so we loop over a few
315 * to test that out.
316 */
317 for (size_t i = 0; i < ARRAY_SIZE(msbs); i++) {
318 int msb = msbs[i];
319 run("Checking system calls with msb = %d (0x%x)\n",
320 msb, msb);
321
322 sh->indent++;
323
324 test_syscalls_common(msb);
325 if (with_x32)
326 test_syscalls_with_x32(msb);
327 else
328 test_syscalls_without_x32(msb);
329
330 sh->indent--;
331 }
332
333 sh->indent--;
334 }
335
syscall_numbering_tracee(void)336 static void syscall_numbering_tracee(void)
337 {
338 enum ptrace_pass pass;
339
340 if (ptrace(PTRACE_TRACEME, 0, 0, 0)) {
341 crit("Failed to request tracing\n");
342 return;
343 }
344 raise(SIGSTOP);
345
346 for (sh->ptrace_pass = pass = PTP_NOTHING; pass < PTP_DONE;
347 sh->ptrace_pass = ++pass) {
348 run("Running tests under ptrace: %s\n", ptrace_pass_name[pass]);
349 test_syscall_numbering();
350 }
351 }
352
mess_with_syscall(pid_t testpid,enum ptrace_pass pass)353 static void mess_with_syscall(pid_t testpid, enum ptrace_pass pass)
354 {
355 struct user_regs_struct regs;
356
357 sh->probing_syscall = false; /* Do this on entry only */
358
359 /* For these, don't even getregs */
360 if (pass == PTP_NOTHING || pass == PTP_DONE)
361 return;
362
363 ptrace(PTRACE_GETREGS, testpid, NULL, ®s);
364
365 if (regs.orig_rax != regs.rbx) {
366 fail("orig_rax %#llx doesn't match syscall number %#llx\n",
367 (unsigned long long)regs.orig_rax,
368 (unsigned long long)regs.rbx);
369 }
370
371 switch (pass) {
372 case PTP_GETREGS:
373 /* Just read, no writeback */
374 return;
375 case PTP_WRITEBACK:
376 /* Write back the same register state verbatim */
377 break;
378 case PTP_FUZZRET:
379 regs.rax = MODIFIED_BY_PTRACE;
380 break;
381 case PTP_FUZZHIGH:
382 regs.rax = MODIFIED_BY_PTRACE;
383 regs.orig_rax = regs.orig_rax | 0xffffffff00000000ULL;
384 break;
385 case PTP_INTNUM:
386 regs.rax = MODIFIED_BY_PTRACE;
387 regs.orig_rax = (int)regs.orig_rax;
388 break;
389 default:
390 crit("invalid ptrace_pass\n");
391 break;
392 }
393
394 ptrace(PTRACE_SETREGS, testpid, NULL, ®s);
395 }
396
syscall_numbering_tracer(pid_t testpid)397 static void syscall_numbering_tracer(pid_t testpid)
398 {
399 int wstatus;
400
401 do {
402 pid_t wpid = waitpid(testpid, &wstatus, 0);
403 if (wpid < 0 && errno != EINTR)
404 break;
405 if (wpid != testpid)
406 continue;
407 if (!WIFSTOPPED(wstatus))
408 break; /* Thread exited? */
409
410 if (sh->probing_syscall && WSTOPSIG(wstatus) == SIGTRAP)
411 mess_with_syscall(testpid, sh->ptrace_pass);
412 } while (sh->ptrace_pass != PTP_DONE &&
413 !ptrace(PTRACE_SYSCALL, testpid, NULL, NULL));
414
415 ptrace(PTRACE_DETACH, testpid, NULL, NULL);
416
417 /* Wait for the child process to terminate */
418 while (waitpid(testpid, &wstatus, 0) != testpid || !WIFEXITED(wstatus))
419 /* wait some more */;
420 }
421
test_traced_syscall_numbering(void)422 static void test_traced_syscall_numbering(void)
423 {
424 pid_t testpid;
425
426 /* Launch the test thread; this thread continues as the tracer thread */
427 testpid = fork();
428
429 if (testpid < 0) {
430 crit("Unable to launch tracer process\n");
431 } else if (testpid == 0) {
432 syscall_numbering_tracee();
433 _exit(0);
434 } else {
435 syscall_numbering_tracer(testpid);
436 }
437 }
438
main(void)439 int main(void)
440 {
441 unsigned int nerr;
442
443 /*
444 * It is quite likely to get a segfault on a failure, so make
445 * sure the message gets out by setting stdout to nonbuffered.
446 */
447 setvbuf(stdout, NULL, _IONBF, 0);
448
449 /*
450 * Harmless file descriptor to work on...
451 */
452 nullfd = open("/dev/null", O_RDWR);
453 if (nullfd < 0) {
454 crit("Unable to open /dev/null: %s\n", strerror(errno));
455 }
456
457 /*
458 * Set up a block of shared memory...
459 */
460 sh = mmap(NULL, sysconf(_SC_PAGE_SIZE), PROT_READ|PROT_WRITE,
461 MAP_ANONYMOUS|MAP_SHARED, 0, 0);
462 if (sh == MAP_FAILED) {
463 crit("Unable to allocated shared memory block: %s\n",
464 strerror(errno));
465 }
466
467 with_x32 = test_x32();
468
469 run("Running tests without ptrace...\n");
470 test_syscall_numbering();
471
472 test_traced_syscall_numbering();
473
474 nerr = sh->nerr;
475 if (!nerr) {
476 ok("All system calls succeeded or failed as expected\n");
477 return 0;
478 } else {
479 fail("A total of %u system call%s had incorrect behavior\n",
480 nerr, nerr != 1 ? "s" : "");
481 return 1;
482 }
483 }
484