1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * sigreturn.c - tests for x86 sigreturn(2) and exit-to-userspace
4 * Copyright (c) 2014-2015 Andrew Lutomirski
5 *
6 * This is a series of tests that exercises the sigreturn(2) syscall and
7 * the IRET / SYSRET paths in the kernel.
8 *
9 * For now, this focuses on the effects of unusual CS and SS values,
10 * and it has a bunch of tests to make sure that ESP/RSP is restored
11 * properly.
12 *
13 * The basic idea behind these tests is to raise(SIGUSR1) to create a
14 * sigcontext frame, plug in the values to be tested, and then return,
15 * which implicitly invokes sigreturn(2) and programs the user context
16 * as desired.
17 *
18 * For tests for which we expect sigreturn and the subsequent return to
19 * user mode to succeed, we return to a short trampoline that generates
20 * SIGTRAP so that the meat of the tests can be ordinary C code in a
21 * SIGTRAP handler.
22 *
23 * The inner workings of each test is documented below.
24 *
25 * Do not run on outdated, unpatched kernels at risk of nasty crashes.
26 */
27
28 #define _GNU_SOURCE
29
30 #include <sys/time.h>
31 #include <time.h>
32 #include <stdlib.h>
33 #include <sys/syscall.h>
34 #include <unistd.h>
35 #include <stdio.h>
36 #include <string.h>
37 #include <inttypes.h>
38 #include <sys/mman.h>
39 #include <sys/signal.h>
40 #include <sys/ucontext.h>
41 #include <asm/ldt.h>
42 #include <err.h>
43 #include <setjmp.h>
44 #include <stddef.h>
45 #include <stdbool.h>
46 #include <sys/ptrace.h>
47 #include <sys/user.h>
48
49 #include "helpers.h"
50
51 /* Pull in AR_xyz defines. */
52 typedef unsigned int u32;
53 typedef unsigned short u16;
54 #include "../../../../arch/x86/include/asm/desc_defs.h"
55
56 /*
57 * Copied from asm/ucontext.h, as asm/ucontext.h conflicts badly with the glibc
58 * headers.
59 */
60 #ifdef __x86_64__
61 /*
62 * UC_SIGCONTEXT_SS will be set when delivering 64-bit or x32 signals on
63 * kernels that save SS in the sigcontext. All kernels that set
64 * UC_SIGCONTEXT_SS will correctly restore at least the low 32 bits of esp
65 * regardless of SS (i.e. they implement espfix).
66 *
67 * Kernels that set UC_SIGCONTEXT_SS will also set UC_STRICT_RESTORE_SS
68 * when delivering a signal that came from 64-bit code.
69 *
70 * Sigreturn restores SS as follows:
71 *
72 * if (saved SS is valid || UC_STRICT_RESTORE_SS is set ||
73 * saved CS is not 64-bit)
74 * new SS = saved SS (will fail IRET and signal if invalid)
75 * else
76 * new SS = a flat 32-bit data segment
77 */
78 #define UC_SIGCONTEXT_SS 0x2
79 #define UC_STRICT_RESTORE_SS 0x4
80 #endif
81
82 /*
83 * In principle, this test can run on Linux emulation layers (e.g.
84 * Illumos "LX branded zones"). Solaris-based kernels reserve LDT
85 * entries 0-5 for their own internal purposes, so start our LDT
86 * allocations above that reservation. (The tests don't pass on LX
87 * branded zones, but at least this lets them run.)
88 */
89 #define LDT_OFFSET 6
90
91 /* An aligned stack accessible through some of our segments. */
92 static unsigned char stack16[65536] __attribute__((aligned(4096)));
93
94 /*
95 * An aligned int3 instruction used as a trampoline. Some of the tests
96 * want to fish out their ss values, so this trampoline copies ss to eax
97 * before the int3.
98 */
99 asm (".pushsection .text\n\t"
100 ".type int3, @function\n\t"
101 ".align 4096\n\t"
102 "int3:\n\t"
103 "mov %ss,%ecx\n\t"
104 "int3\n\t"
105 ".size int3, . - int3\n\t"
106 ".align 4096, 0xcc\n\t"
107 ".popsection");
108 extern char int3[4096];
109
110 /*
111 * At startup, we prepapre:
112 *
113 * - ldt_nonexistent_sel: An LDT entry that doesn't exist (all-zero
114 * descriptor or out of bounds).
115 * - code16_sel: A 16-bit LDT code segment pointing to int3.
116 * - data16_sel: A 16-bit LDT data segment pointing to stack16.
117 * - npcode32_sel: A 32-bit not-present LDT code segment pointing to int3.
118 * - npdata32_sel: A 32-bit not-present LDT data segment pointing to stack16.
119 * - gdt_data16_idx: A 16-bit GDT data segment pointing to stack16.
120 * - gdt_npdata32_idx: A 32-bit not-present GDT data segment pointing to
121 * stack16.
122 *
123 * For no particularly good reason, xyz_sel is a selector value with the
124 * RPL and LDT bits filled in, whereas xyz_idx is just an index into the
125 * descriptor table. These variables will be zero if their respective
126 * segments could not be allocated.
127 */
128 static unsigned short ldt_nonexistent_sel;
129 static unsigned short code16_sel, data16_sel, npcode32_sel, npdata32_sel;
130
131 static unsigned short gdt_data16_idx, gdt_npdata32_idx;
132
GDT3(int idx)133 static unsigned short GDT3(int idx)
134 {
135 return (idx << 3) | 3;
136 }
137
LDT3(int idx)138 static unsigned short LDT3(int idx)
139 {
140 return (idx << 3) | 7;
141 }
142
add_ldt(const struct user_desc * desc,unsigned short * var,const char * name)143 static void add_ldt(const struct user_desc *desc, unsigned short *var,
144 const char *name)
145 {
146 if (syscall(SYS_modify_ldt, 1, desc, sizeof(*desc)) == 0) {
147 *var = LDT3(desc->entry_number);
148 } else {
149 printf("[NOTE]\tFailed to create %s segment\n", name);
150 *var = 0;
151 }
152 }
153
setup_ldt(void)154 static void setup_ldt(void)
155 {
156 if ((unsigned long)stack16 > (1ULL << 32) - sizeof(stack16))
157 errx(1, "stack16 is too high\n");
158 if ((unsigned long)int3 > (1ULL << 32) - sizeof(int3))
159 errx(1, "int3 is too high\n");
160
161 ldt_nonexistent_sel = LDT3(LDT_OFFSET + 2);
162
163 const struct user_desc code16_desc = {
164 .entry_number = LDT_OFFSET + 0,
165 .base_addr = (unsigned long)int3,
166 .limit = 4095,
167 .seg_32bit = 0,
168 .contents = 2, /* Code, not conforming */
169 .read_exec_only = 0,
170 .limit_in_pages = 0,
171 .seg_not_present = 0,
172 .useable = 0
173 };
174 add_ldt(&code16_desc, &code16_sel, "code16");
175
176 const struct user_desc data16_desc = {
177 .entry_number = LDT_OFFSET + 1,
178 .base_addr = (unsigned long)stack16,
179 .limit = 0xffff,
180 .seg_32bit = 0,
181 .contents = 0, /* Data, grow-up */
182 .read_exec_only = 0,
183 .limit_in_pages = 0,
184 .seg_not_present = 0,
185 .useable = 0
186 };
187 add_ldt(&data16_desc, &data16_sel, "data16");
188
189 const struct user_desc npcode32_desc = {
190 .entry_number = LDT_OFFSET + 3,
191 .base_addr = (unsigned long)int3,
192 .limit = 4095,
193 .seg_32bit = 1,
194 .contents = 2, /* Code, not conforming */
195 .read_exec_only = 0,
196 .limit_in_pages = 0,
197 .seg_not_present = 1,
198 .useable = 0
199 };
200 add_ldt(&npcode32_desc, &npcode32_sel, "npcode32");
201
202 const struct user_desc npdata32_desc = {
203 .entry_number = LDT_OFFSET + 4,
204 .base_addr = (unsigned long)stack16,
205 .limit = 0xffff,
206 .seg_32bit = 1,
207 .contents = 0, /* Data, grow-up */
208 .read_exec_only = 0,
209 .limit_in_pages = 0,
210 .seg_not_present = 1,
211 .useable = 0
212 };
213 add_ldt(&npdata32_desc, &npdata32_sel, "npdata32");
214
215 struct user_desc gdt_data16_desc = {
216 .entry_number = -1,
217 .base_addr = (unsigned long)stack16,
218 .limit = 0xffff,
219 .seg_32bit = 0,
220 .contents = 0, /* Data, grow-up */
221 .read_exec_only = 0,
222 .limit_in_pages = 0,
223 .seg_not_present = 0,
224 .useable = 0
225 };
226
227 if (syscall(SYS_set_thread_area, &gdt_data16_desc) == 0) {
228 /*
229 * This probably indicates vulnerability to CVE-2014-8133.
230 * Merely getting here isn't definitive, though, and we'll
231 * diagnose the problem for real later on.
232 */
233 printf("[WARN]\tset_thread_area allocated data16 at index %d\n",
234 gdt_data16_desc.entry_number);
235 gdt_data16_idx = gdt_data16_desc.entry_number;
236 } else {
237 printf("[OK]\tset_thread_area refused 16-bit data\n");
238 }
239
240 struct user_desc gdt_npdata32_desc = {
241 .entry_number = -1,
242 .base_addr = (unsigned long)stack16,
243 .limit = 0xffff,
244 .seg_32bit = 1,
245 .contents = 0, /* Data, grow-up */
246 .read_exec_only = 0,
247 .limit_in_pages = 0,
248 .seg_not_present = 1,
249 .useable = 0
250 };
251
252 if (syscall(SYS_set_thread_area, &gdt_npdata32_desc) == 0) {
253 /*
254 * As a hardening measure, newer kernels don't allow this.
255 */
256 printf("[WARN]\tset_thread_area allocated npdata32 at index %d\n",
257 gdt_npdata32_desc.entry_number);
258 gdt_npdata32_idx = gdt_npdata32_desc.entry_number;
259 } else {
260 printf("[OK]\tset_thread_area refused 16-bit data\n");
261 }
262 }
263
264 /* State used by our signal handlers. */
265 static gregset_t initial_regs, requested_regs, resulting_regs;
266
267 /* Instructions for the SIGUSR1 handler. */
268 static volatile unsigned short sig_cs, sig_ss;
269 static volatile sig_atomic_t sig_trapped, sig_err, sig_trapno;
270 #ifdef __x86_64__
271 static volatile sig_atomic_t sig_corrupt_final_ss;
272 #endif
273
274 /* Abstractions for some 32-bit vs 64-bit differences. */
275 #ifdef __x86_64__
276 # define REG_IP REG_RIP
277 # define REG_SP REG_RSP
278 # define REG_CX REG_RCX
279
280 struct selectors {
281 unsigned short cs, gs, fs, ss;
282 };
283
ssptr(ucontext_t * ctx)284 static unsigned short *ssptr(ucontext_t *ctx)
285 {
286 struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS];
287 return &sels->ss;
288 }
289
csptr(ucontext_t * ctx)290 static unsigned short *csptr(ucontext_t *ctx)
291 {
292 struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS];
293 return &sels->cs;
294 }
295 #else
296 # define REG_IP REG_EIP
297 # define REG_SP REG_ESP
298 # define REG_CX REG_ECX
299
ssptr(ucontext_t * ctx)300 static greg_t *ssptr(ucontext_t *ctx)
301 {
302 return &ctx->uc_mcontext.gregs[REG_SS];
303 }
304
csptr(ucontext_t * ctx)305 static greg_t *csptr(ucontext_t *ctx)
306 {
307 return &ctx->uc_mcontext.gregs[REG_CS];
308 }
309 #endif
310
311 /*
312 * Checks a given selector for its code bitness or returns -1 if it's not
313 * a usable code segment selector.
314 */
cs_bitness(unsigned short cs)315 int cs_bitness(unsigned short cs)
316 {
317 uint32_t valid = 0, ar;
318 asm ("lar %[cs], %[ar]\n\t"
319 "jnz 1f\n\t"
320 "mov $1, %[valid]\n\t"
321 "1:"
322 : [ar] "=r" (ar), [valid] "+rm" (valid)
323 : [cs] "r" (cs));
324
325 if (!valid)
326 return -1;
327
328 bool db = (ar & (1 << 22));
329 bool l = (ar & (1 << 21));
330
331 if (!(ar & (1<<11)))
332 return -1; /* Not code. */
333
334 if (l && !db)
335 return 64;
336 else if (!l && db)
337 return 32;
338 else if (!l && !db)
339 return 16;
340 else
341 return -1; /* Unknown bitness. */
342 }
343
344 /*
345 * Checks a given selector for its code bitness or returns -1 if it's not
346 * a usable code segment selector.
347 */
is_valid_ss(unsigned short cs)348 bool is_valid_ss(unsigned short cs)
349 {
350 uint32_t valid = 0, ar;
351 asm ("lar %[cs], %[ar]\n\t"
352 "jnz 1f\n\t"
353 "mov $1, %[valid]\n\t"
354 "1:"
355 : [ar] "=r" (ar), [valid] "+rm" (valid)
356 : [cs] "r" (cs));
357
358 if (!valid)
359 return false;
360
361 if ((ar & AR_TYPE_MASK) != AR_TYPE_RWDATA &&
362 (ar & AR_TYPE_MASK) != AR_TYPE_RWDATA_EXPDOWN)
363 return false;
364
365 return (ar & AR_P);
366 }
367
368 /* Number of errors in the current test case. */
369 static volatile sig_atomic_t nerrs;
370
validate_signal_ss(int sig,ucontext_t * ctx)371 static void validate_signal_ss(int sig, ucontext_t *ctx)
372 {
373 #ifdef __x86_64__
374 bool was_64bit = (cs_bitness(*csptr(ctx)) == 64);
375
376 if (!(ctx->uc_flags & UC_SIGCONTEXT_SS)) {
377 printf("[FAIL]\tUC_SIGCONTEXT_SS was not set\n");
378 nerrs++;
379
380 /*
381 * This happens on Linux 4.1. The rest will fail, too, so
382 * return now to reduce the noise.
383 */
384 return;
385 }
386
387 /* UC_STRICT_RESTORE_SS is set iff we came from 64-bit mode. */
388 if (!!(ctx->uc_flags & UC_STRICT_RESTORE_SS) != was_64bit) {
389 printf("[FAIL]\tUC_STRICT_RESTORE_SS was wrong in signal %d\n",
390 sig);
391 nerrs++;
392 }
393
394 if (is_valid_ss(*ssptr(ctx))) {
395 /*
396 * DOSEMU was written before 64-bit sigcontext had SS, and
397 * it tries to figure out the signal source SS by looking at
398 * the physical register. Make sure that keeps working.
399 */
400 unsigned short hw_ss;
401 asm ("mov %%ss, %0" : "=rm" (hw_ss));
402 if (hw_ss != *ssptr(ctx)) {
403 printf("[FAIL]\tHW SS didn't match saved SS\n");
404 nerrs++;
405 }
406 }
407 #endif
408 }
409
410 /*
411 * SIGUSR1 handler. Sets CS and SS as requested and points IP to the
412 * int3 trampoline. Sets SP to a large known value so that we can see
413 * whether the value round-trips back to user mode correctly.
414 */
sigusr1(int sig,siginfo_t * info,void * ctx_void)415 static void sigusr1(int sig, siginfo_t *info, void *ctx_void)
416 {
417 ucontext_t *ctx = (ucontext_t*)ctx_void;
418
419 validate_signal_ss(sig, ctx);
420
421 memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
422
423 *csptr(ctx) = sig_cs;
424 *ssptr(ctx) = sig_ss;
425
426 ctx->uc_mcontext.gregs[REG_IP] =
427 sig_cs == code16_sel ? 0 : (unsigned long)&int3;
428 ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)0x8badf00d5aadc0deULL;
429 ctx->uc_mcontext.gregs[REG_CX] = 0;
430
431 #ifdef __i386__
432 /*
433 * Make sure the kernel doesn't inadvertently use DS or ES-relative
434 * accesses in a region where user DS or ES is loaded.
435 *
436 * Skip this for 64-bit builds because long mode doesn't care about
437 * DS and ES and skipping it increases test coverage a little bit,
438 * since 64-bit kernels can still run the 32-bit build.
439 */
440 ctx->uc_mcontext.gregs[REG_DS] = 0;
441 ctx->uc_mcontext.gregs[REG_ES] = 0;
442 #endif
443
444 memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
445 requested_regs[REG_CX] = *ssptr(ctx); /* The asm code does this. */
446
447 return;
448 }
449
450 /*
451 * Called after a successful sigreturn (via int3) or from a failed
452 * sigreturn (directly by kernel). Restores our state so that the
453 * original raise(SIGUSR1) returns.
454 */
sigtrap(int sig,siginfo_t * info,void * ctx_void)455 static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
456 {
457 ucontext_t *ctx = (ucontext_t*)ctx_void;
458
459 validate_signal_ss(sig, ctx);
460
461 sig_err = ctx->uc_mcontext.gregs[REG_ERR];
462 sig_trapno = ctx->uc_mcontext.gregs[REG_TRAPNO];
463
464 unsigned short ss;
465 asm ("mov %%ss,%0" : "=r" (ss));
466
467 greg_t asm_ss = ctx->uc_mcontext.gregs[REG_CX];
468 if (asm_ss != sig_ss && sig == SIGTRAP) {
469 /* Sanity check failure. */
470 printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %x, ax = %llx\n",
471 ss, *ssptr(ctx), (unsigned long long)asm_ss);
472 nerrs++;
473 }
474
475 memcpy(&resulting_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
476 memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t));
477
478 #ifdef __x86_64__
479 if (sig_corrupt_final_ss) {
480 if (ctx->uc_flags & UC_STRICT_RESTORE_SS) {
481 printf("[FAIL]\tUC_STRICT_RESTORE_SS was set inappropriately\n");
482 nerrs++;
483 } else {
484 /*
485 * DOSEMU transitions from 32-bit to 64-bit mode by
486 * adjusting sigcontext, and it requires that this work
487 * even if the saved SS is bogus.
488 */
489 printf("\tCorrupting SS on return to 64-bit mode\n");
490 *ssptr(ctx) = 0;
491 }
492 }
493 #endif
494
495 sig_trapped = sig;
496 }
497
498 #ifdef __x86_64__
499 /* Tests recovery if !UC_STRICT_RESTORE_SS */
sigusr2(int sig,siginfo_t * info,void * ctx_void)500 static void sigusr2(int sig, siginfo_t *info, void *ctx_void)
501 {
502 ucontext_t *ctx = (ucontext_t*)ctx_void;
503
504 if (!(ctx->uc_flags & UC_STRICT_RESTORE_SS)) {
505 printf("[FAIL]\traise(2) didn't set UC_STRICT_RESTORE_SS\n");
506 nerrs++;
507 return; /* We can't do the rest. */
508 }
509
510 ctx->uc_flags &= ~UC_STRICT_RESTORE_SS;
511 *ssptr(ctx) = 0;
512
513 /* Return. The kernel should recover without sending another signal. */
514 }
515
test_nonstrict_ss(void)516 static int test_nonstrict_ss(void)
517 {
518 clearhandler(SIGUSR1);
519 clearhandler(SIGTRAP);
520 clearhandler(SIGSEGV);
521 clearhandler(SIGILL);
522 sethandler(SIGUSR2, sigusr2, 0);
523
524 nerrs = 0;
525
526 printf("[RUN]\tClear UC_STRICT_RESTORE_SS and corrupt SS\n");
527 raise(SIGUSR2);
528 if (!nerrs)
529 printf("[OK]\tIt worked\n");
530
531 return nerrs;
532 }
533 #endif
534
535 /* Finds a usable code segment of the requested bitness. */
find_cs(int bitness)536 int find_cs(int bitness)
537 {
538 unsigned short my_cs;
539
540 asm ("mov %%cs,%0" : "=r" (my_cs));
541
542 if (cs_bitness(my_cs) == bitness)
543 return my_cs;
544 if (cs_bitness(my_cs + (2 << 3)) == bitness)
545 return my_cs + (2 << 3);
546 if (my_cs > (2<<3) && cs_bitness(my_cs - (2 << 3)) == bitness)
547 return my_cs - (2 << 3);
548 if (cs_bitness(code16_sel) == bitness)
549 return code16_sel;
550
551 printf("[WARN]\tCould not find %d-bit CS\n", bitness);
552 return -1;
553 }
554
test_valid_sigreturn(int cs_bits,bool use_16bit_ss,int force_ss)555 static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss)
556 {
557 int cs = find_cs(cs_bits);
558 if (cs == -1) {
559 printf("[SKIP]\tCode segment unavailable for %d-bit CS, %d-bit SS\n",
560 cs_bits, use_16bit_ss ? 16 : 32);
561 return 0;
562 }
563
564 if (force_ss != -1) {
565 sig_ss = force_ss;
566 } else {
567 if (use_16bit_ss) {
568 if (!data16_sel) {
569 printf("[SKIP]\tData segment unavailable for %d-bit CS, 16-bit SS\n",
570 cs_bits);
571 return 0;
572 }
573 sig_ss = data16_sel;
574 } else {
575 asm volatile ("mov %%ss,%0" : "=r" (sig_ss));
576 }
577 }
578
579 sig_cs = cs;
580
581 printf("[RUN]\tValid sigreturn: %d-bit CS (%hx), %d-bit SS (%hx%s)\n",
582 cs_bits, sig_cs, use_16bit_ss ? 16 : 32, sig_ss,
583 (sig_ss & 4) ? "" : ", GDT");
584
585 raise(SIGUSR1);
586
587 nerrs = 0;
588
589 /*
590 * Check that each register had an acceptable value when the
591 * int3 trampoline was invoked.
592 */
593 for (int i = 0; i < NGREG; i++) {
594 greg_t req = requested_regs[i], res = resulting_regs[i];
595
596 if (i == REG_TRAPNO || i == REG_IP)
597 continue; /* don't care */
598
599 if (i == REG_SP) {
600 /*
601 * If we were using a 16-bit stack segment, then
602 * the kernel is a bit stuck: IRET only restores
603 * the low 16 bits of ESP/RSP if SS is 16-bit.
604 * The kernel uses a hack to restore bits 31:16,
605 * but that hack doesn't help with bits 63:32.
606 * On Intel CPUs, bits 63:32 end up zeroed, and, on
607 * AMD CPUs, they leak the high bits of the kernel
608 * espfix64 stack pointer. There's very little that
609 * the kernel can do about it.
610 *
611 * Similarly, if we are returning to a 32-bit context,
612 * the CPU will often lose the high 32 bits of RSP.
613 */
614
615 if (res == req)
616 continue;
617
618 if (cs_bits != 64 && ((res ^ req) & 0xFFFFFFFF) == 0) {
619 printf("[NOTE]\tSP: %llx -> %llx\n",
620 (unsigned long long)req,
621 (unsigned long long)res);
622 continue;
623 }
624
625 printf("[FAIL]\tSP mismatch: requested 0x%llx; got 0x%llx\n",
626 (unsigned long long)requested_regs[i],
627 (unsigned long long)resulting_regs[i]);
628 nerrs++;
629 continue;
630 }
631
632 bool ignore_reg = false;
633 #if __i386__
634 if (i == REG_UESP)
635 ignore_reg = true;
636 #else
637 if (i == REG_CSGSFS) {
638 struct selectors *req_sels =
639 (void *)&requested_regs[REG_CSGSFS];
640 struct selectors *res_sels =
641 (void *)&resulting_regs[REG_CSGSFS];
642 if (req_sels->cs != res_sels->cs) {
643 printf("[FAIL]\tCS mismatch: requested 0x%hx; got 0x%hx\n",
644 req_sels->cs, res_sels->cs);
645 nerrs++;
646 }
647
648 if (req_sels->ss != res_sels->ss) {
649 printf("[FAIL]\tSS mismatch: requested 0x%hx; got 0x%hx\n",
650 req_sels->ss, res_sels->ss);
651 nerrs++;
652 }
653
654 continue;
655 }
656 #endif
657
658 /* Sanity check on the kernel */
659 if (i == REG_CX && req != res) {
660 printf("[FAIL]\tCX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n",
661 (unsigned long long)req,
662 (unsigned long long)res);
663 nerrs++;
664 continue;
665 }
666
667 if (req != res && !ignore_reg) {
668 printf("[FAIL]\tReg %d mismatch: requested 0x%llx; got 0x%llx\n",
669 i, (unsigned long long)req,
670 (unsigned long long)res);
671 nerrs++;
672 }
673 }
674
675 if (nerrs == 0)
676 printf("[OK]\tall registers okay\n");
677
678 return nerrs;
679 }
680
test_bad_iret(int cs_bits,unsigned short ss,int force_cs)681 static int test_bad_iret(int cs_bits, unsigned short ss, int force_cs)
682 {
683 int cs = force_cs == -1 ? find_cs(cs_bits) : force_cs;
684 if (cs == -1)
685 return 0;
686
687 sig_cs = cs;
688 sig_ss = ss;
689
690 printf("[RUN]\t%d-bit CS (%hx), bogus SS (%hx)\n",
691 cs_bits, sig_cs, sig_ss);
692
693 sig_trapped = 0;
694 raise(SIGUSR1);
695 if (sig_trapped) {
696 char errdesc[32] = "";
697 if (sig_err) {
698 const char *src = (sig_err & 1) ? " EXT" : "";
699 const char *table;
700 if ((sig_err & 0x6) == 0x0)
701 table = "GDT";
702 else if ((sig_err & 0x6) == 0x4)
703 table = "LDT";
704 else if ((sig_err & 0x6) == 0x2)
705 table = "IDT";
706 else
707 table = "???";
708
709 sprintf(errdesc, "%s%s index %d, ",
710 table, src, sig_err >> 3);
711 }
712
713 char trapname[32];
714 if (sig_trapno == 13)
715 strcpy(trapname, "GP");
716 else if (sig_trapno == 11)
717 strcpy(trapname, "NP");
718 else if (sig_trapno == 12)
719 strcpy(trapname, "SS");
720 else if (sig_trapno == 32)
721 strcpy(trapname, "IRET"); /* X86_TRAP_IRET */
722 else
723 sprintf(trapname, "%d", sig_trapno);
724
725 printf("[OK]\tGot #%s(0x%lx) (i.e. %s%s)\n",
726 trapname, (unsigned long)sig_err,
727 errdesc, strsignal(sig_trapped));
728 return 0;
729 } else {
730 /*
731 * This also implicitly tests UC_STRICT_RESTORE_SS:
732 * We check that these signals set UC_STRICT_RESTORE_SS and,
733 * if UC_STRICT_RESTORE_SS doesn't cause strict behavior,
734 * then we won't get SIGSEGV.
735 */
736 printf("[FAIL]\tDid not get SIGSEGV\n");
737 return 1;
738 }
739 }
740
main()741 int main()
742 {
743 int total_nerrs = 0;
744 unsigned short my_cs, my_ss;
745
746 asm volatile ("mov %%cs,%0" : "=r" (my_cs));
747 asm volatile ("mov %%ss,%0" : "=r" (my_ss));
748 setup_ldt();
749
750 stack_t stack = {
751 /* Our sigaltstack scratch space. */
752 .ss_sp = malloc(sizeof(char) * SIGSTKSZ),
753 .ss_size = SIGSTKSZ,
754 };
755 if (sigaltstack(&stack, NULL) != 0)
756 err(1, "sigaltstack");
757
758 sethandler(SIGUSR1, sigusr1, 0);
759 sethandler(SIGTRAP, sigtrap, SA_ONSTACK);
760
761 /* Easy cases: return to a 32-bit SS in each possible CS bitness. */
762 total_nerrs += test_valid_sigreturn(64, false, -1);
763 total_nerrs += test_valid_sigreturn(32, false, -1);
764 total_nerrs += test_valid_sigreturn(16, false, -1);
765
766 /*
767 * Test easy espfix cases: return to a 16-bit LDT SS in each possible
768 * CS bitness. NB: with a long mode CS, the SS bitness is irrelevant.
769 *
770 * This catches the original missing-espfix-on-64-bit-kernels issue
771 * as well as CVE-2014-8134.
772 */
773 total_nerrs += test_valid_sigreturn(64, true, -1);
774 total_nerrs += test_valid_sigreturn(32, true, -1);
775 total_nerrs += test_valid_sigreturn(16, true, -1);
776
777 if (gdt_data16_idx) {
778 /*
779 * For performance reasons, Linux skips espfix if SS points
780 * to the GDT. If we were able to allocate a 16-bit SS in
781 * the GDT, see if it leaks parts of the kernel stack pointer.
782 *
783 * This tests for CVE-2014-8133.
784 */
785 total_nerrs += test_valid_sigreturn(64, true,
786 GDT3(gdt_data16_idx));
787 total_nerrs += test_valid_sigreturn(32, true,
788 GDT3(gdt_data16_idx));
789 total_nerrs += test_valid_sigreturn(16, true,
790 GDT3(gdt_data16_idx));
791 }
792
793 #ifdef __x86_64__
794 /* Nasty ABI case: check SS corruption handling. */
795 sig_corrupt_final_ss = 1;
796 total_nerrs += test_valid_sigreturn(32, false, -1);
797 total_nerrs += test_valid_sigreturn(32, true, -1);
798 sig_corrupt_final_ss = 0;
799 #endif
800
801 /*
802 * We're done testing valid sigreturn cases. Now we test states
803 * for which sigreturn itself will succeed but the subsequent
804 * entry to user mode will fail.
805 *
806 * Depending on the failure mode and the kernel bitness, these
807 * entry failures can generate SIGSEGV, SIGBUS, or SIGILL.
808 */
809 clearhandler(SIGTRAP);
810 sethandler(SIGSEGV, sigtrap, SA_ONSTACK);
811 sethandler(SIGBUS, sigtrap, SA_ONSTACK);
812 sethandler(SIGILL, sigtrap, SA_ONSTACK); /* 32-bit kernels do this */
813
814 /* Easy failures: invalid SS, resulting in #GP(0) */
815 test_bad_iret(64, ldt_nonexistent_sel, -1);
816 test_bad_iret(32, ldt_nonexistent_sel, -1);
817 test_bad_iret(16, ldt_nonexistent_sel, -1);
818
819 /* These fail because SS isn't a data segment, resulting in #GP(SS) */
820 test_bad_iret(64, my_cs, -1);
821 test_bad_iret(32, my_cs, -1);
822 test_bad_iret(16, my_cs, -1);
823
824 /* Try to return to a not-present code segment, triggering #NP(SS). */
825 test_bad_iret(32, my_ss, npcode32_sel);
826
827 /*
828 * Try to return to a not-present but otherwise valid data segment.
829 * This will cause IRET to fail with #SS on the espfix stack. This
830 * exercises CVE-2014-9322.
831 *
832 * Note that, if espfix is enabled, 64-bit Linux will lose track
833 * of the actual cause of failure and report #GP(0) instead.
834 * This would be very difficult for Linux to avoid, because
835 * espfix64 causes IRET failures to be promoted to #DF, so the
836 * original exception frame is never pushed onto the stack.
837 */
838 test_bad_iret(32, npdata32_sel, -1);
839
840 /*
841 * Try to return to a not-present but otherwise valid data
842 * segment without invoking espfix. Newer kernels don't allow
843 * this to happen in the first place. On older kernels, though,
844 * this can trigger CVE-2014-9322.
845 */
846 if (gdt_npdata32_idx)
847 test_bad_iret(32, GDT3(gdt_npdata32_idx), -1);
848
849 #ifdef __x86_64__
850 total_nerrs += test_nonstrict_ss();
851 #endif
852
853 free(stack.ss_sp);
854 return total_nerrs ? 1 : 0;
855 }
856