1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2024 Rivos, Inc.
4 * Deepak Gupta <debug@rivosinc.com>
5 */
6
7 #include <linux/sched.h>
8 #include <linux/bitops.h>
9 #include <linux/types.h>
10 #include <linux/mm.h>
11 #include <linux/mman.h>
12 #include <linux/uaccess.h>
13 #include <linux/sizes.h>
14 #include <linux/user.h>
15 #include <linux/syscalls.h>
16 #include <linux/prctl.h>
17 #include <asm/csr.h>
18 #include <asm/usercfi.h>
19
20 unsigned long riscv_nousercfi __read_mostly;
21
22 #define SHSTK_ENTRY_SIZE sizeof(void *)
23
is_shstk_enabled(struct task_struct * task)24 bool is_shstk_enabled(struct task_struct *task)
25 {
26 return task->thread_info.user_cfi_state.ubcfi_en;
27 }
28
is_shstk_allocated(struct task_struct * task)29 bool is_shstk_allocated(struct task_struct *task)
30 {
31 return task->thread_info.user_cfi_state.shdw_stk_base;
32 }
33
is_shstk_locked(struct task_struct * task)34 bool is_shstk_locked(struct task_struct *task)
35 {
36 return task->thread_info.user_cfi_state.ubcfi_locked;
37 }
38
set_shstk_base(struct task_struct * task,unsigned long shstk_addr,unsigned long size)39 void set_shstk_base(struct task_struct *task, unsigned long shstk_addr, unsigned long size)
40 {
41 task->thread_info.user_cfi_state.shdw_stk_base = shstk_addr;
42 task->thread_info.user_cfi_state.shdw_stk_size = size;
43 }
44
get_shstk_base(struct task_struct * task,unsigned long * size)45 unsigned long get_shstk_base(struct task_struct *task, unsigned long *size)
46 {
47 if (size)
48 *size = task->thread_info.user_cfi_state.shdw_stk_size;
49 return task->thread_info.user_cfi_state.shdw_stk_base;
50 }
51
set_active_shstk(struct task_struct * task,unsigned long shstk_addr)52 void set_active_shstk(struct task_struct *task, unsigned long shstk_addr)
53 {
54 task->thread_info.user_cfi_state.user_shdw_stk = shstk_addr;
55 }
56
get_active_shstk(struct task_struct * task)57 unsigned long get_active_shstk(struct task_struct *task)
58 {
59 return task->thread_info.user_cfi_state.user_shdw_stk;
60 }
61
set_shstk_status(struct task_struct * task,bool enable)62 void set_shstk_status(struct task_struct *task, bool enable)
63 {
64 if (!is_user_shstk_enabled())
65 return;
66
67 task->thread_info.user_cfi_state.ubcfi_en = enable ? 1 : 0;
68
69 if (enable)
70 task->thread.envcfg |= ENVCFG_SSE;
71 else
72 task->thread.envcfg &= ~ENVCFG_SSE;
73
74 csr_write(CSR_ENVCFG, task->thread.envcfg);
75 }
76
set_shstk_lock(struct task_struct * task)77 void set_shstk_lock(struct task_struct *task)
78 {
79 task->thread_info.user_cfi_state.ubcfi_locked = 1;
80 }
81
is_indir_lp_enabled(struct task_struct * task)82 bool is_indir_lp_enabled(struct task_struct *task)
83 {
84 return task->thread_info.user_cfi_state.ufcfi_en;
85 }
86
is_indir_lp_locked(struct task_struct * task)87 bool is_indir_lp_locked(struct task_struct *task)
88 {
89 return task->thread_info.user_cfi_state.ufcfi_locked;
90 }
91
set_indir_lp_status(struct task_struct * task,bool enable)92 void set_indir_lp_status(struct task_struct *task, bool enable)
93 {
94 if (!is_user_lpad_enabled())
95 return;
96
97 task->thread_info.user_cfi_state.ufcfi_en = enable ? 1 : 0;
98
99 if (enable)
100 task->thread.envcfg |= ENVCFG_LPE;
101 else
102 task->thread.envcfg &= ~ENVCFG_LPE;
103
104 csr_write(CSR_ENVCFG, task->thread.envcfg);
105 }
106
set_indir_lp_lock(struct task_struct * task)107 void set_indir_lp_lock(struct task_struct *task)
108 {
109 task->thread_info.user_cfi_state.ufcfi_locked = 1;
110 }
111 /*
112 * If size is 0, then to be compatible with regular stack we want it to be as big as
113 * regular stack. Else PAGE_ALIGN it and return back
114 */
calc_shstk_size(unsigned long size)115 static unsigned long calc_shstk_size(unsigned long size)
116 {
117 if (size)
118 return PAGE_ALIGN(size);
119
120 return PAGE_ALIGN(min_t(unsigned long long, rlimit(RLIMIT_STACK), SZ_4G));
121 }
122
123 /*
124 * Writes on shadow stack can either be `sspush` or `ssamoswap`. `sspush` can happen
125 * implicitly on current shadow stack pointed to by CSR_SSP. `ssamoswap` takes pointer to
126 * shadow stack. To keep it simple, we plan to use `ssamoswap` to perform writes on shadow
127 * stack.
128 */
amo_user_shstk(unsigned long __user * addr,unsigned long val)129 static noinline unsigned long amo_user_shstk(unsigned long __user *addr, unsigned long val)
130 {
131 /*
132 * Never expect -1 on shadow stack. Expect return addresses and zero
133 */
134 unsigned long swap = -1;
135
136 __enable_user_access();
137 asm goto(".option push\n"
138 ".option arch, +zicfiss\n"
139 "1: ssamoswap.d %[swap], %[val], %[addr]\n"
140 _ASM_EXTABLE(1b, %l[fault])
141 ".option pop\n"
142 : [swap] "=r" (swap), [addr] "+A" (*(__force unsigned long *)addr)
143 : [val] "r" (val)
144 : "memory"
145 : fault
146 );
147 __disable_user_access();
148 return swap;
149 fault:
150 __disable_user_access();
151 return -1;
152 }
153
154 /*
155 * Create a restore token on the shadow stack. A token is always XLEN wide
156 * and aligned to XLEN.
157 */
create_rstor_token(unsigned long ssp,unsigned long * token_addr)158 static int create_rstor_token(unsigned long ssp, unsigned long *token_addr)
159 {
160 unsigned long addr;
161
162 /* Token must be aligned */
163 if (!IS_ALIGNED(ssp, SHSTK_ENTRY_SIZE))
164 return -EINVAL;
165
166 /* On RISC-V we're constructing token to be function of address itself */
167 addr = ssp - SHSTK_ENTRY_SIZE;
168
169 if (amo_user_shstk((unsigned long __user *)addr, (unsigned long)ssp) == -1)
170 return -EFAULT;
171
172 if (token_addr)
173 *token_addr = addr;
174
175 return 0;
176 }
177
178 /*
179 * Save user shadow stack pointer on the shadow stack itself and return a pointer to saved location.
180 * Returns -EFAULT if unsuccessful.
181 */
save_user_shstk(struct task_struct * tsk,unsigned long * saved_shstk_ptr)182 int save_user_shstk(struct task_struct *tsk, unsigned long *saved_shstk_ptr)
183 {
184 unsigned long ss_ptr = 0;
185 unsigned long token_loc = 0;
186 int ret = 0;
187
188 if (!saved_shstk_ptr)
189 return -EINVAL;
190
191 ss_ptr = get_active_shstk(tsk);
192 ret = create_rstor_token(ss_ptr, &token_loc);
193
194 if (!ret) {
195 *saved_shstk_ptr = token_loc;
196 set_active_shstk(tsk, token_loc);
197 }
198
199 return ret;
200 }
201
202 /*
203 * Restores the user shadow stack pointer from the token on the shadow stack for task 'tsk'.
204 * Returns -EFAULT if unsuccessful.
205 */
restore_user_shstk(struct task_struct * tsk,unsigned long shstk_ptr)206 int restore_user_shstk(struct task_struct *tsk, unsigned long shstk_ptr)
207 {
208 unsigned long token = 0;
209
210 token = amo_user_shstk((unsigned long __user *)shstk_ptr, 0);
211
212 if (token == -1)
213 return -EFAULT;
214
215 /* invalid token, return EINVAL */
216 if ((token - shstk_ptr) != SHSTK_ENTRY_SIZE) {
217 pr_info_ratelimited("%s[%d]: bad restore token in %s: pc=%p sp=%p, token=%p, shstk_ptr=%p\n",
218 tsk->comm, task_pid_nr(tsk), __func__,
219 (void *)(task_pt_regs(tsk)->epc),
220 (void *)(task_pt_regs(tsk)->sp),
221 (void *)token, (void *)shstk_ptr);
222 return -EINVAL;
223 }
224
225 /* all checks passed, set active shstk and return success */
226 set_active_shstk(tsk, token);
227 return 0;
228 }
229
allocate_shadow_stack(unsigned long addr,unsigned long size,unsigned long token_offset,bool set_tok)230 static unsigned long allocate_shadow_stack(unsigned long addr, unsigned long size,
231 unsigned long token_offset, bool set_tok)
232 {
233 int flags = MAP_ANONYMOUS | MAP_PRIVATE;
234 struct mm_struct *mm = current->mm;
235 unsigned long populate;
236
237 if (addr)
238 flags |= MAP_FIXED_NOREPLACE;
239
240 mmap_write_lock(mm);
241 addr = do_mmap(NULL, addr, size, PROT_READ, flags,
242 VM_SHADOW_STACK | VM_WRITE, 0, &populate, NULL);
243 mmap_write_unlock(mm);
244
245 if (!set_tok || IS_ERR_VALUE(addr))
246 goto out;
247
248 if (create_rstor_token(addr + token_offset, NULL)) {
249 vm_munmap(addr, size);
250 return -EINVAL;
251 }
252
253 out:
254 return addr;
255 }
256
SYSCALL_DEFINE3(map_shadow_stack,unsigned long,addr,unsigned long,size,unsigned int,flags)257 SYSCALL_DEFINE3(map_shadow_stack, unsigned long, addr, unsigned long, size, unsigned int, flags)
258 {
259 bool set_tok = flags & SHADOW_STACK_SET_TOKEN;
260 unsigned long aligned_size = 0;
261
262 if (!is_user_shstk_enabled())
263 return -EOPNOTSUPP;
264
265 /* Anything other than set token should result in invalid param */
266 if (flags & ~SHADOW_STACK_SET_TOKEN)
267 return -EINVAL;
268
269 /*
270 * Unlike other architectures, on RISC-V, SSP pointer is held in CSR_SSP and is an available
271 * CSR in all modes. CSR accesses are performed using 12bit index programmed in instruction
272 * itself. This provides static property on register programming and writes to CSR can't
273 * be unintentional from programmer's perspective. As long as programmer has guarded areas
274 * which perform writes to CSR_SSP properly, shadow stack pivoting is not possible. Since
275 * CSR_SSP is writable by user mode, it itself can setup a shadow stack token subsequent
276 * to allocation. Although in order to provide portablity with other architectures (because
277 * `map_shadow_stack` is arch agnostic syscall), RISC-V will follow expectation of a token
278 * flag in flags and if provided in flags, will setup a token at the base.
279 */
280
281 /* If there isn't space for a token */
282 if (set_tok && size < SHSTK_ENTRY_SIZE)
283 return -ENOSPC;
284
285 if (addr && (addr & (PAGE_SIZE - 1)))
286 return -EINVAL;
287
288 aligned_size = PAGE_ALIGN(size);
289 if (aligned_size < size)
290 return -EOVERFLOW;
291
292 return allocate_shadow_stack(addr, aligned_size, size, set_tok);
293 }
294
295 /*
296 * This gets called during clone/clone3/fork. And is needed to allocate a shadow stack for
297 * cases where CLONE_VM is specified and thus a different stack is specified by user. We
298 * thus need a separate shadow stack too. How a separate shadow stack is specified by
299 * user is still being debated. Once that's settled, remove this part of the comment.
300 * This function simply returns 0 if shadow stacks are not supported or if separate shadow
301 * stack allocation is not needed (like in case of !CLONE_VM)
302 */
shstk_alloc_thread_stack(struct task_struct * tsk,const struct kernel_clone_args * args)303 unsigned long shstk_alloc_thread_stack(struct task_struct *tsk,
304 const struct kernel_clone_args *args)
305 {
306 unsigned long addr, size;
307
308 /* If shadow stack is not supported, return 0 */
309 if (!is_user_shstk_enabled())
310 return 0;
311
312 /*
313 * If shadow stack is not enabled on the new thread, skip any
314 * switch to a new shadow stack.
315 */
316 if (!is_shstk_enabled(tsk))
317 return 0;
318
319 /*
320 * For CLONE_VFORK the child will share the parents shadow stack.
321 * Set base = 0 and size = 0, this is special means to track this state
322 * so the freeing logic run for child knows to leave it alone.
323 */
324 if (args->flags & CLONE_VFORK) {
325 set_shstk_base(tsk, 0, 0);
326 return 0;
327 }
328
329 /*
330 * For !CLONE_VM the child will use a copy of the parents shadow
331 * stack.
332 */
333 if (!(args->flags & CLONE_VM))
334 return 0;
335
336 /*
337 * reaching here means, CLONE_VM was specified and thus a separate shadow
338 * stack is needed for new cloned thread. Note: below allocation is happening
339 * using current mm.
340 */
341 size = calc_shstk_size(args->stack_size);
342 addr = allocate_shadow_stack(0, size, 0, false);
343 if (IS_ERR_VALUE(addr))
344 return addr;
345
346 set_shstk_base(tsk, addr, size);
347
348 return addr + size;
349 }
350
shstk_release(struct task_struct * tsk)351 void shstk_release(struct task_struct *tsk)
352 {
353 unsigned long base = 0, size = 0;
354 /* If shadow stack is not supported or not enabled, nothing to release */
355 if (!is_user_shstk_enabled() || !is_shstk_enabled(tsk))
356 return;
357
358 /*
359 * When fork() with CLONE_VM fails, the child (tsk) already has a
360 * shadow stack allocated, and exit_thread() calls this function to
361 * free it. In this case the parent (current) and the child share
362 * the same mm struct. Move forward only when they're same.
363 */
364 if (!tsk->mm || tsk->mm != current->mm)
365 return;
366
367 /*
368 * We know shadow stack is enabled but if base is NULL, then
369 * this task is not managing its own shadow stack (CLONE_VFORK). So
370 * skip freeing it.
371 */
372 base = get_shstk_base(tsk, &size);
373 if (!base)
374 return;
375
376 vm_munmap(base, size);
377 set_shstk_base(tsk, 0, 0);
378 }
379
arch_get_shadow_stack_status(struct task_struct * t,unsigned long __user * status)380 int arch_get_shadow_stack_status(struct task_struct *t, unsigned long __user *status)
381 {
382 unsigned long bcfi_status = 0;
383
384 if (!is_user_shstk_enabled())
385 return -EINVAL;
386
387 /* this means shadow stack is enabled on the task */
388 bcfi_status |= (is_shstk_enabled(t) ? PR_SHADOW_STACK_ENABLE : 0);
389
390 return copy_to_user(status, &bcfi_status, sizeof(bcfi_status)) ? -EFAULT : 0;
391 }
392
arch_set_shadow_stack_status(struct task_struct * t,unsigned long status)393 int arch_set_shadow_stack_status(struct task_struct *t, unsigned long status)
394 {
395 unsigned long size = 0, addr = 0;
396 bool enable_shstk = false;
397
398 if (!is_user_shstk_enabled())
399 return -EINVAL;
400
401 /* Reject unknown flags */
402 if (status & ~PR_SHADOW_STACK_SUPPORTED_STATUS_MASK)
403 return -EINVAL;
404
405 /* bcfi status is locked and further can't be modified by user */
406 if (is_shstk_locked(t))
407 return -EINVAL;
408
409 enable_shstk = status & PR_SHADOW_STACK_ENABLE;
410 /* Request is to enable shadow stack and shadow stack is not enabled already */
411 if (enable_shstk && !is_shstk_enabled(t)) {
412 /* shadow stack was allocated and enable request again
413 * no need to support such usecase and return EINVAL.
414 */
415 if (is_shstk_allocated(t))
416 return -EINVAL;
417
418 size = calc_shstk_size(0);
419 addr = allocate_shadow_stack(0, size, 0, false);
420 if (IS_ERR_VALUE(addr))
421 return -ENOMEM;
422 set_shstk_base(t, addr, size);
423 set_active_shstk(t, addr + size);
424 }
425
426 /*
427 * If a request to disable shadow stack happens, let's go ahead and release it
428 * Although, if CLONE_VFORKed child did this, then in that case we will end up
429 * not releasing the shadow stack (because it might be needed in parent). Although
430 * we will disable it for VFORKed child. And if VFORKed child tries to enable again
431 * then in that case, it'll get entirely new shadow stack because following condition
432 * are true
433 * - shadow stack was not enabled for vforked child
434 * - shadow stack base was anyways pointing to 0
435 * This shouldn't be a big issue because we want parent to have availability of shadow
436 * stack whenever VFORKed child releases resources via exit or exec but at the same
437 * time we want VFORKed child to break away and establish new shadow stack if it desires
438 *
439 */
440 if (!enable_shstk)
441 shstk_release(t);
442
443 set_shstk_status(t, enable_shstk);
444 return 0;
445 }
446
arch_lock_shadow_stack_status(struct task_struct * task,unsigned long arg)447 int arch_lock_shadow_stack_status(struct task_struct *task,
448 unsigned long arg)
449 {
450 /* If shtstk not supported or not enabled on task, nothing to lock here */
451 if (!is_user_shstk_enabled() ||
452 !is_shstk_enabled(task) || arg != 0)
453 return -EINVAL;
454
455 set_shstk_lock(task);
456
457 return 0;
458 }
459
arch_get_indir_br_lp_status(struct task_struct * t,unsigned long __user * status)460 int arch_get_indir_br_lp_status(struct task_struct *t, unsigned long __user *status)
461 {
462 unsigned long fcfi_status = 0;
463
464 if (!is_user_lpad_enabled())
465 return -EINVAL;
466
467 /* indirect branch tracking is enabled on the task or not */
468 fcfi_status |= (is_indir_lp_enabled(t) ? PR_INDIR_BR_LP_ENABLE : 0);
469
470 return copy_to_user(status, &fcfi_status, sizeof(fcfi_status)) ? -EFAULT : 0;
471 }
472
arch_set_indir_br_lp_status(struct task_struct * t,unsigned long status)473 int arch_set_indir_br_lp_status(struct task_struct *t, unsigned long status)
474 {
475 bool enable_indir_lp = false;
476
477 if (!is_user_lpad_enabled())
478 return -EINVAL;
479
480 /* indirect branch tracking is locked and further can't be modified by user */
481 if (is_indir_lp_locked(t))
482 return -EINVAL;
483
484 /* Reject unknown flags */
485 if (status & ~PR_INDIR_BR_LP_ENABLE)
486 return -EINVAL;
487
488 enable_indir_lp = (status & PR_INDIR_BR_LP_ENABLE);
489 set_indir_lp_status(t, enable_indir_lp);
490
491 return 0;
492 }
493
arch_lock_indir_br_lp_status(struct task_struct * task,unsigned long arg)494 int arch_lock_indir_br_lp_status(struct task_struct *task,
495 unsigned long arg)
496 {
497 /*
498 * If indirect branch tracking is not supported or not enabled on task,
499 * nothing to lock here
500 */
501 if (!is_user_lpad_enabled() ||
502 !is_indir_lp_enabled(task) || arg != 0)
503 return -EINVAL;
504
505 set_indir_lp_lock(task);
506
507 return 0;
508 }
509
is_user_shstk_enabled(void)510 bool is_user_shstk_enabled(void)
511 {
512 return (cpu_supports_shadow_stack() &&
513 !(riscv_nousercfi & CMDLINE_DISABLE_RISCV_USERCFI_BCFI));
514 }
515
is_user_lpad_enabled(void)516 bool is_user_lpad_enabled(void)
517 {
518 return (cpu_supports_indirect_br_lp_instr() &&
519 !(riscv_nousercfi & CMDLINE_DISABLE_RISCV_USERCFI_FCFI));
520 }
521
setup_global_riscv_enable(char * str)522 static int __init setup_global_riscv_enable(char *str)
523 {
524 if (strcmp(str, "all") == 0)
525 riscv_nousercfi = CMDLINE_DISABLE_RISCV_USERCFI;
526
527 if (strcmp(str, "fcfi") == 0)
528 riscv_nousercfi |= CMDLINE_DISABLE_RISCV_USERCFI_FCFI;
529
530 if (strcmp(str, "bcfi") == 0)
531 riscv_nousercfi |= CMDLINE_DISABLE_RISCV_USERCFI_BCFI;
532
533 if (riscv_nousercfi)
534 pr_info("RISC-V user CFI disabled via cmdline - shadow stack status : %s, landing pad status : %s\n",
535 (riscv_nousercfi & CMDLINE_DISABLE_RISCV_USERCFI_BCFI) ? "disabled" :
536 "enabled", (riscv_nousercfi & CMDLINE_DISABLE_RISCV_USERCFI_FCFI) ?
537 "disabled" : "enabled");
538
539 return 1;
540 }
541
542 __setup("riscv_nousercfi=", setup_global_riscv_enable);
543