1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
3 * Copyright (c) 2016 Facebook
4 * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
5 */
6 #include <uapi/linux/btf.h>
7 #include <linux/bpf-cgroup.h>
8 #include <linux/kernel.h>
9 #include <linux/types.h>
10 #include <linux/slab.h>
11 #include <linux/bpf.h>
12 #include <linux/btf.h>
13 #include <linux/bpf_verifier.h>
14 #include <linux/filter.h>
15 #include <net/netlink.h>
16 #include <linux/file.h>
17 #include <linux/vmalloc.h>
18 #include <linux/stringify.h>
19 #include <linux/bsearch.h>
20 #include <linux/sort.h>
21 #include <linux/perf_event.h>
22 #include <linux/ctype.h>
23 #include <linux/error-injection.h>
24 #include <linux/bpf_lsm.h>
25 #include <linux/btf_ids.h>
26 #include <linux/poison.h>
27 #include <linux/module.h>
28 #include <linux/cpumask.h>
29 #include <linux/bpf_mem_alloc.h>
30 #include <net/xdp.h>
31 #include <linux/trace_events.h>
32 #include <linux/kallsyms.h>
33
34 #include "disasm.h"
35
36 static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
37 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
38 [_id] = & _name ## _verifier_ops,
39 #define BPF_MAP_TYPE(_id, _ops)
40 #define BPF_LINK_TYPE(_id, _name)
41 #include <linux/bpf_types.h>
42 #undef BPF_PROG_TYPE
43 #undef BPF_MAP_TYPE
44 #undef BPF_LINK_TYPE
45 };
46
47 enum bpf_features {
48 BPF_FEAT_RDONLY_CAST_TO_VOID = 0,
49 BPF_FEAT_STREAMS = 1,
50 __MAX_BPF_FEAT,
51 };
52
53 struct bpf_mem_alloc bpf_global_percpu_ma;
54 static bool bpf_global_percpu_ma_set;
55
56 /* bpf_check() is a static code analyzer that walks eBPF program
57 * instruction by instruction and updates register/stack state.
58 * All paths of conditional branches are analyzed until 'bpf_exit' insn.
59 *
60 * The first pass is depth-first-search to check that the program is a DAG.
61 * It rejects the following programs:
62 * - larger than BPF_MAXINSNS insns
63 * - if loop is present (detected via back-edge)
64 * - unreachable insns exist (shouldn't be a forest. program = one function)
65 * - out of bounds or malformed jumps
66 * The second pass is all possible path descent from the 1st insn.
67 * Since it's analyzing all paths through the program, the length of the
68 * analysis is limited to 64k insn, which may be hit even if total number of
69 * insn is less then 4K, but there are too many branches that change stack/regs.
70 * Number of 'branches to be analyzed' is limited to 1k
71 *
72 * On entry to each instruction, each register has a type, and the instruction
73 * changes the types of the registers depending on instruction semantics.
74 * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
75 * copied to R1.
76 *
77 * All registers are 64-bit.
78 * R0 - return register
79 * R1-R5 argument passing registers
80 * R6-R9 callee saved registers
81 * R10 - frame pointer read-only
82 *
83 * At the start of BPF program the register R1 contains a pointer to bpf_context
84 * and has type PTR_TO_CTX.
85 *
86 * Verifier tracks arithmetic operations on pointers in case:
87 * BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
88 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
89 * 1st insn copies R10 (which has FRAME_PTR) type into R1
90 * and 2nd arithmetic instruction is pattern matched to recognize
91 * that it wants to construct a pointer to some element within stack.
92 * So after 2nd insn, the register R1 has type PTR_TO_STACK
93 * (and -20 constant is saved for further stack bounds checking).
94 * Meaning that this reg is a pointer to stack plus known immediate constant.
95 *
96 * Most of the time the registers have SCALAR_VALUE type, which
97 * means the register has some value, but it's not a valid pointer.
98 * (like pointer plus pointer becomes SCALAR_VALUE type)
99 *
100 * When verifier sees load or store instructions the type of base register
101 * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
102 * four pointer types recognized by check_mem_access() function.
103 *
104 * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
105 * and the range of [ptr, ptr + map's value_size) is accessible.
106 *
107 * registers used to pass values to function calls are checked against
108 * function argument constraints.
109 *
110 * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
111 * It means that the register type passed to this function must be
112 * PTR_TO_STACK and it will be used inside the function as
113 * 'pointer to map element key'
114 *
115 * For example the argument constraints for bpf_map_lookup_elem():
116 * .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
117 * .arg1_type = ARG_CONST_MAP_PTR,
118 * .arg2_type = ARG_PTR_TO_MAP_KEY,
119 *
120 * ret_type says that this function returns 'pointer to map elem value or null'
121 * function expects 1st argument to be a const pointer to 'struct bpf_map' and
122 * 2nd argument should be a pointer to stack, which will be used inside
123 * the helper function as a pointer to map element key.
124 *
125 * On the kernel side the helper function looks like:
126 * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
127 * {
128 * struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
129 * void *key = (void *) (unsigned long) r2;
130 * void *value;
131 *
132 * here kernel can access 'key' and 'map' pointers safely, knowing that
133 * [key, key + map->key_size) bytes are valid and were initialized on
134 * the stack of eBPF program.
135 * }
136 *
137 * Corresponding eBPF program may look like:
138 * BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), // after this insn R2 type is FRAME_PTR
139 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
140 * BPF_LD_MAP_FD(BPF_REG_1, map_fd), // after this insn R1 type is CONST_PTR_TO_MAP
141 * BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
142 * here verifier looks at prototype of map_lookup_elem() and sees:
143 * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
144 * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
145 *
146 * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
147 * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
148 * and were initialized prior to this call.
149 * If it's ok, then verifier allows this BPF_CALL insn and looks at
150 * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
151 * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
152 * returns either pointer to map value or NULL.
153 *
154 * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
155 * insn, the register holding that pointer in the true branch changes state to
156 * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
157 * branch. See check_cond_jmp_op().
158 *
159 * After the call R0 is set to return type of the function and registers R1-R5
160 * are set to NOT_INIT to indicate that they are no longer readable.
161 *
162 * The following reference types represent a potential reference to a kernel
163 * resource which, after first being allocated, must be checked and freed by
164 * the BPF program:
165 * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
166 *
167 * When the verifier sees a helper call return a reference type, it allocates a
168 * pointer id for the reference and stores it in the current function state.
169 * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
170 * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
171 * passes through a NULL-check conditional. For the branch wherein the state is
172 * changed to CONST_IMM, the verifier releases the reference.
173 *
174 * For each helper function that allocates a reference, such as
175 * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
176 * bpf_sk_release(). When a reference type passes into the release function,
177 * the verifier also releases the reference. If any unchecked or unreleased
178 * reference remains at the end of the program, the verifier rejects it.
179 */
180
181 /* verifier_state + insn_idx are pushed to stack when branch is encountered */
182 struct bpf_verifier_stack_elem {
183 /* verifier state is 'st'
184 * before processing instruction 'insn_idx'
185 * and after processing instruction 'prev_insn_idx'
186 */
187 struct bpf_verifier_state st;
188 int insn_idx;
189 int prev_insn_idx;
190 struct bpf_verifier_stack_elem *next;
191 /* length of verifier log at the time this state was pushed on stack */
192 u32 log_pos;
193 };
194
195 #define BPF_COMPLEXITY_LIMIT_JMP_SEQ 8192
196 #define BPF_COMPLEXITY_LIMIT_STATES 64
197
198 #define BPF_MAP_KEY_POISON (1ULL << 63)
199 #define BPF_MAP_KEY_SEEN (1ULL << 62)
200
201 #define BPF_GLOBAL_PERCPU_MA_MAX_SIZE 512
202
203 #define BPF_PRIV_STACK_MIN_SIZE 64
204
205 static int acquire_reference(struct bpf_verifier_env *env, int insn_idx);
206 static int release_reference_nomark(struct bpf_verifier_state *state, int ref_obj_id);
207 static int release_reference(struct bpf_verifier_env *env, int ref_obj_id);
208 static void invalidate_non_owning_refs(struct bpf_verifier_env *env);
209 static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env);
210 static int ref_set_non_owning(struct bpf_verifier_env *env,
211 struct bpf_reg_state *reg);
212 static bool is_trusted_reg(const struct bpf_reg_state *reg);
213
bpf_map_ptr_poisoned(const struct bpf_insn_aux_data * aux)214 static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
215 {
216 return aux->map_ptr_state.poison;
217 }
218
bpf_map_ptr_unpriv(const struct bpf_insn_aux_data * aux)219 static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
220 {
221 return aux->map_ptr_state.unpriv;
222 }
223
bpf_map_ptr_store(struct bpf_insn_aux_data * aux,struct bpf_map * map,bool unpriv,bool poison)224 static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
225 struct bpf_map *map,
226 bool unpriv, bool poison)
227 {
228 unpriv |= bpf_map_ptr_unpriv(aux);
229 aux->map_ptr_state.unpriv = unpriv;
230 aux->map_ptr_state.poison = poison;
231 aux->map_ptr_state.map_ptr = map;
232 }
233
bpf_map_key_poisoned(const struct bpf_insn_aux_data * aux)234 static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux)
235 {
236 return aux->map_key_state & BPF_MAP_KEY_POISON;
237 }
238
bpf_map_key_unseen(const struct bpf_insn_aux_data * aux)239 static bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux)
240 {
241 return !(aux->map_key_state & BPF_MAP_KEY_SEEN);
242 }
243
bpf_map_key_immediate(const struct bpf_insn_aux_data * aux)244 static u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux)
245 {
246 return aux->map_key_state & ~(BPF_MAP_KEY_SEEN | BPF_MAP_KEY_POISON);
247 }
248
bpf_map_key_store(struct bpf_insn_aux_data * aux,u64 state)249 static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
250 {
251 bool poisoned = bpf_map_key_poisoned(aux);
252
253 aux->map_key_state = state | BPF_MAP_KEY_SEEN |
254 (poisoned ? BPF_MAP_KEY_POISON : 0ULL);
255 }
256
bpf_helper_call(const struct bpf_insn * insn)257 static bool bpf_helper_call(const struct bpf_insn *insn)
258 {
259 return insn->code == (BPF_JMP | BPF_CALL) &&
260 insn->src_reg == 0;
261 }
262
bpf_pseudo_call(const struct bpf_insn * insn)263 static bool bpf_pseudo_call(const struct bpf_insn *insn)
264 {
265 return insn->code == (BPF_JMP | BPF_CALL) &&
266 insn->src_reg == BPF_PSEUDO_CALL;
267 }
268
bpf_pseudo_kfunc_call(const struct bpf_insn * insn)269 static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn)
270 {
271 return insn->code == (BPF_JMP | BPF_CALL) &&
272 insn->src_reg == BPF_PSEUDO_KFUNC_CALL;
273 }
274
275 struct bpf_map_desc {
276 struct bpf_map *ptr;
277 int uid;
278 };
279
280 struct bpf_call_arg_meta {
281 struct bpf_map_desc map;
282 bool raw_mode;
283 bool pkt_access;
284 u8 release_regno;
285 int regno;
286 int access_size;
287 int mem_size;
288 u64 msize_max_value;
289 int ref_obj_id;
290 int dynptr_id;
291 int func_id;
292 struct btf *btf;
293 u32 btf_id;
294 struct btf *ret_btf;
295 u32 ret_btf_id;
296 u32 subprogno;
297 struct btf_field *kptr_field;
298 s64 const_map_key;
299 };
300
301 struct bpf_kfunc_meta {
302 struct btf *btf;
303 const struct btf_type *proto;
304 const char *name;
305 const u32 *flags;
306 s32 id;
307 };
308
309 struct bpf_kfunc_call_arg_meta {
310 /* In parameters */
311 struct btf *btf;
312 u32 func_id;
313 u32 kfunc_flags;
314 const struct btf_type *func_proto;
315 const char *func_name;
316 /* Out parameters */
317 u32 ref_obj_id;
318 u8 release_regno;
319 bool r0_rdonly;
320 u32 ret_btf_id;
321 u64 r0_size;
322 u32 subprogno;
323 struct {
324 u64 value;
325 bool found;
326 } arg_constant;
327
328 /* arg_{btf,btf_id,owning_ref} are used by kfunc-specific handling,
329 * generally to pass info about user-defined local kptr types to later
330 * verification logic
331 * bpf_obj_drop/bpf_percpu_obj_drop
332 * Record the local kptr type to be drop'd
333 * bpf_refcount_acquire (via KF_ARG_PTR_TO_REFCOUNTED_KPTR arg type)
334 * Record the local kptr type to be refcount_incr'd and use
335 * arg_owning_ref to determine whether refcount_acquire should be
336 * fallible
337 */
338 struct btf *arg_btf;
339 u32 arg_btf_id;
340 bool arg_owning_ref;
341 bool arg_prog;
342
343 struct {
344 struct btf_field *field;
345 } arg_list_head;
346 struct {
347 struct btf_field *field;
348 } arg_rbtree_root;
349 struct {
350 enum bpf_dynptr_type type;
351 u32 id;
352 u32 ref_obj_id;
353 } initialized_dynptr;
354 struct {
355 u8 spi;
356 u8 frameno;
357 } iter;
358 struct bpf_map_desc map;
359 u64 mem_size;
360 };
361
362 struct btf *btf_vmlinux;
363
btf_type_name(const struct btf * btf,u32 id)364 static const char *btf_type_name(const struct btf *btf, u32 id)
365 {
366 return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off);
367 }
368
369 static DEFINE_MUTEX(bpf_verifier_lock);
370 static DEFINE_MUTEX(bpf_percpu_ma_lock);
371
verbose(void * private_data,const char * fmt,...)372 __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
373 {
374 struct bpf_verifier_env *env = private_data;
375 va_list args;
376
377 if (!bpf_verifier_log_needed(&env->log))
378 return;
379
380 va_start(args, fmt);
381 bpf_verifier_vlog(&env->log, fmt, args);
382 va_end(args);
383 }
384
verbose_invalid_scalar(struct bpf_verifier_env * env,struct bpf_reg_state * reg,struct bpf_retval_range range,const char * ctx,const char * reg_name)385 static void verbose_invalid_scalar(struct bpf_verifier_env *env,
386 struct bpf_reg_state *reg,
387 struct bpf_retval_range range, const char *ctx,
388 const char *reg_name)
389 {
390 bool unknown = true;
391
392 verbose(env, "%s the register %s has", ctx, reg_name);
393 if (reg->smin_value > S64_MIN) {
394 verbose(env, " smin=%lld", reg->smin_value);
395 unknown = false;
396 }
397 if (reg->smax_value < S64_MAX) {
398 verbose(env, " smax=%lld", reg->smax_value);
399 unknown = false;
400 }
401 if (unknown)
402 verbose(env, " unknown scalar value");
403 verbose(env, " should have been in [%d, %d]\n", range.minval, range.maxval);
404 }
405
reg_not_null(const struct bpf_reg_state * reg)406 static bool reg_not_null(const struct bpf_reg_state *reg)
407 {
408 enum bpf_reg_type type;
409
410 type = reg->type;
411 if (type_may_be_null(type))
412 return false;
413
414 type = base_type(type);
415 return type == PTR_TO_SOCKET ||
416 type == PTR_TO_TCP_SOCK ||
417 type == PTR_TO_MAP_VALUE ||
418 type == PTR_TO_MAP_KEY ||
419 type == PTR_TO_SOCK_COMMON ||
420 (type == PTR_TO_BTF_ID && is_trusted_reg(reg)) ||
421 (type == PTR_TO_MEM && !(reg->type & PTR_UNTRUSTED)) ||
422 type == CONST_PTR_TO_MAP;
423 }
424
reg_btf_record(const struct bpf_reg_state * reg)425 static struct btf_record *reg_btf_record(const struct bpf_reg_state *reg)
426 {
427 struct btf_record *rec = NULL;
428 struct btf_struct_meta *meta;
429
430 if (reg->type == PTR_TO_MAP_VALUE) {
431 rec = reg->map_ptr->record;
432 } else if (type_is_ptr_alloc_obj(reg->type)) {
433 meta = btf_find_struct_meta(reg->btf, reg->btf_id);
434 if (meta)
435 rec = meta->record;
436 }
437 return rec;
438 }
439
subprog_is_global(const struct bpf_verifier_env * env,int subprog)440 static bool subprog_is_global(const struct bpf_verifier_env *env, int subprog)
441 {
442 struct bpf_func_info_aux *aux = env->prog->aux->func_info_aux;
443
444 return aux && aux[subprog].linkage == BTF_FUNC_GLOBAL;
445 }
446
subprog_name(const struct bpf_verifier_env * env,int subprog)447 static const char *subprog_name(const struct bpf_verifier_env *env, int subprog)
448 {
449 struct bpf_func_info *info;
450
451 if (!env->prog->aux->func_info)
452 return "";
453
454 info = &env->prog->aux->func_info[subprog];
455 return btf_type_name(env->prog->aux->btf, info->type_id);
456 }
457
mark_subprog_exc_cb(struct bpf_verifier_env * env,int subprog)458 static void mark_subprog_exc_cb(struct bpf_verifier_env *env, int subprog)
459 {
460 struct bpf_subprog_info *info = subprog_info(env, subprog);
461
462 info->is_cb = true;
463 info->is_async_cb = true;
464 info->is_exception_cb = true;
465 }
466
subprog_is_exc_cb(struct bpf_verifier_env * env,int subprog)467 static bool subprog_is_exc_cb(struct bpf_verifier_env *env, int subprog)
468 {
469 return subprog_info(env, subprog)->is_exception_cb;
470 }
471
reg_may_point_to_spin_lock(const struct bpf_reg_state * reg)472 static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
473 {
474 return btf_record_has_field(reg_btf_record(reg), BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK);
475 }
476
type_is_rdonly_mem(u32 type)477 static bool type_is_rdonly_mem(u32 type)
478 {
479 return type & MEM_RDONLY;
480 }
481
is_acquire_function(enum bpf_func_id func_id,const struct bpf_map * map)482 static bool is_acquire_function(enum bpf_func_id func_id,
483 const struct bpf_map *map)
484 {
485 enum bpf_map_type map_type = map ? map->map_type : BPF_MAP_TYPE_UNSPEC;
486
487 if (func_id == BPF_FUNC_sk_lookup_tcp ||
488 func_id == BPF_FUNC_sk_lookup_udp ||
489 func_id == BPF_FUNC_skc_lookup_tcp ||
490 func_id == BPF_FUNC_ringbuf_reserve ||
491 func_id == BPF_FUNC_kptr_xchg)
492 return true;
493
494 if (func_id == BPF_FUNC_map_lookup_elem &&
495 (map_type == BPF_MAP_TYPE_SOCKMAP ||
496 map_type == BPF_MAP_TYPE_SOCKHASH))
497 return true;
498
499 return false;
500 }
501
is_ptr_cast_function(enum bpf_func_id func_id)502 static bool is_ptr_cast_function(enum bpf_func_id func_id)
503 {
504 return func_id == BPF_FUNC_tcp_sock ||
505 func_id == BPF_FUNC_sk_fullsock ||
506 func_id == BPF_FUNC_skc_to_tcp_sock ||
507 func_id == BPF_FUNC_skc_to_tcp6_sock ||
508 func_id == BPF_FUNC_skc_to_udp6_sock ||
509 func_id == BPF_FUNC_skc_to_mptcp_sock ||
510 func_id == BPF_FUNC_skc_to_tcp_timewait_sock ||
511 func_id == BPF_FUNC_skc_to_tcp_request_sock;
512 }
513
is_dynptr_ref_function(enum bpf_func_id func_id)514 static bool is_dynptr_ref_function(enum bpf_func_id func_id)
515 {
516 return func_id == BPF_FUNC_dynptr_data;
517 }
518
519 static bool is_sync_callback_calling_kfunc(u32 btf_id);
520 static bool is_async_callback_calling_kfunc(u32 btf_id);
521 static bool is_callback_calling_kfunc(u32 btf_id);
522 static bool is_bpf_throw_kfunc(struct bpf_insn *insn);
523
524 static bool is_bpf_wq_set_callback_kfunc(u32 btf_id);
525 static bool is_task_work_add_kfunc(u32 func_id);
526
is_sync_callback_calling_function(enum bpf_func_id func_id)527 static bool is_sync_callback_calling_function(enum bpf_func_id func_id)
528 {
529 return func_id == BPF_FUNC_for_each_map_elem ||
530 func_id == BPF_FUNC_find_vma ||
531 func_id == BPF_FUNC_loop ||
532 func_id == BPF_FUNC_user_ringbuf_drain;
533 }
534
is_async_callback_calling_function(enum bpf_func_id func_id)535 static bool is_async_callback_calling_function(enum bpf_func_id func_id)
536 {
537 return func_id == BPF_FUNC_timer_set_callback;
538 }
539
is_callback_calling_function(enum bpf_func_id func_id)540 static bool is_callback_calling_function(enum bpf_func_id func_id)
541 {
542 return is_sync_callback_calling_function(func_id) ||
543 is_async_callback_calling_function(func_id);
544 }
545
is_sync_callback_calling_insn(struct bpf_insn * insn)546 static bool is_sync_callback_calling_insn(struct bpf_insn *insn)
547 {
548 return (bpf_helper_call(insn) && is_sync_callback_calling_function(insn->imm)) ||
549 (bpf_pseudo_kfunc_call(insn) && is_sync_callback_calling_kfunc(insn->imm));
550 }
551
is_async_callback_calling_insn(struct bpf_insn * insn)552 static bool is_async_callback_calling_insn(struct bpf_insn *insn)
553 {
554 return (bpf_helper_call(insn) && is_async_callback_calling_function(insn->imm)) ||
555 (bpf_pseudo_kfunc_call(insn) && is_async_callback_calling_kfunc(insn->imm));
556 }
557
is_async_cb_sleepable(struct bpf_verifier_env * env,struct bpf_insn * insn)558 static bool is_async_cb_sleepable(struct bpf_verifier_env *env, struct bpf_insn *insn)
559 {
560 /* bpf_timer callbacks are never sleepable. */
561 if (bpf_helper_call(insn) && insn->imm == BPF_FUNC_timer_set_callback)
562 return false;
563
564 /* bpf_wq and bpf_task_work callbacks are always sleepable. */
565 if (bpf_pseudo_kfunc_call(insn) && insn->off == 0 &&
566 (is_bpf_wq_set_callback_kfunc(insn->imm) || is_task_work_add_kfunc(insn->imm)))
567 return true;
568
569 verifier_bug(env, "unhandled async callback in is_async_cb_sleepable");
570 return false;
571 }
572
is_may_goto_insn(struct bpf_insn * insn)573 static bool is_may_goto_insn(struct bpf_insn *insn)
574 {
575 return insn->code == (BPF_JMP | BPF_JCOND) && insn->src_reg == BPF_MAY_GOTO;
576 }
577
is_may_goto_insn_at(struct bpf_verifier_env * env,int insn_idx)578 static bool is_may_goto_insn_at(struct bpf_verifier_env *env, int insn_idx)
579 {
580 return is_may_goto_insn(&env->prog->insnsi[insn_idx]);
581 }
582
is_storage_get_function(enum bpf_func_id func_id)583 static bool is_storage_get_function(enum bpf_func_id func_id)
584 {
585 return func_id == BPF_FUNC_sk_storage_get ||
586 func_id == BPF_FUNC_inode_storage_get ||
587 func_id == BPF_FUNC_task_storage_get ||
588 func_id == BPF_FUNC_cgrp_storage_get;
589 }
590
helper_multiple_ref_obj_use(enum bpf_func_id func_id,const struct bpf_map * map)591 static bool helper_multiple_ref_obj_use(enum bpf_func_id func_id,
592 const struct bpf_map *map)
593 {
594 int ref_obj_uses = 0;
595
596 if (is_ptr_cast_function(func_id))
597 ref_obj_uses++;
598 if (is_acquire_function(func_id, map))
599 ref_obj_uses++;
600 if (is_dynptr_ref_function(func_id))
601 ref_obj_uses++;
602
603 return ref_obj_uses > 1;
604 }
605
is_cmpxchg_insn(const struct bpf_insn * insn)606 static bool is_cmpxchg_insn(const struct bpf_insn *insn)
607 {
608 return BPF_CLASS(insn->code) == BPF_STX &&
609 BPF_MODE(insn->code) == BPF_ATOMIC &&
610 insn->imm == BPF_CMPXCHG;
611 }
612
is_atomic_load_insn(const struct bpf_insn * insn)613 static bool is_atomic_load_insn(const struct bpf_insn *insn)
614 {
615 return BPF_CLASS(insn->code) == BPF_STX &&
616 BPF_MODE(insn->code) == BPF_ATOMIC &&
617 insn->imm == BPF_LOAD_ACQ;
618 }
619
is_atomic_fetch_insn(const struct bpf_insn * insn)620 static bool is_atomic_fetch_insn(const struct bpf_insn *insn)
621 {
622 return BPF_CLASS(insn->code) == BPF_STX &&
623 BPF_MODE(insn->code) == BPF_ATOMIC &&
624 (insn->imm & BPF_FETCH);
625 }
626
__get_spi(s32 off)627 static int __get_spi(s32 off)
628 {
629 return (-off - 1) / BPF_REG_SIZE;
630 }
631
func(struct bpf_verifier_env * env,const struct bpf_reg_state * reg)632 static struct bpf_func_state *func(struct bpf_verifier_env *env,
633 const struct bpf_reg_state *reg)
634 {
635 struct bpf_verifier_state *cur = env->cur_state;
636
637 return cur->frame[reg->frameno];
638 }
639
is_spi_bounds_valid(struct bpf_func_state * state,int spi,int nr_slots)640 static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_slots)
641 {
642 int allocated_slots = state->allocated_stack / BPF_REG_SIZE;
643
644 /* We need to check that slots between [spi - nr_slots + 1, spi] are
645 * within [0, allocated_stack).
646 *
647 * Please note that the spi grows downwards. For example, a dynptr
648 * takes the size of two stack slots; the first slot will be at
649 * spi and the second slot will be at spi - 1.
650 */
651 return spi - nr_slots + 1 >= 0 && spi < allocated_slots;
652 }
653
stack_slot_obj_get_spi(struct bpf_verifier_env * env,struct bpf_reg_state * reg,const char * obj_kind,int nr_slots)654 static int stack_slot_obj_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
655 const char *obj_kind, int nr_slots)
656 {
657 int off, spi;
658
659 if (!tnum_is_const(reg->var_off)) {
660 verbose(env, "%s has to be at a constant offset\n", obj_kind);
661 return -EINVAL;
662 }
663
664 off = reg->off + reg->var_off.value;
665 if (off % BPF_REG_SIZE) {
666 verbose(env, "cannot pass in %s at an offset=%d\n", obj_kind, off);
667 return -EINVAL;
668 }
669
670 spi = __get_spi(off);
671 if (spi + 1 < nr_slots) {
672 verbose(env, "cannot pass in %s at an offset=%d\n", obj_kind, off);
673 return -EINVAL;
674 }
675
676 if (!is_spi_bounds_valid(func(env, reg), spi, nr_slots))
677 return -ERANGE;
678 return spi;
679 }
680
dynptr_get_spi(struct bpf_verifier_env * env,struct bpf_reg_state * reg)681 static int dynptr_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
682 {
683 return stack_slot_obj_get_spi(env, reg, "dynptr", BPF_DYNPTR_NR_SLOTS);
684 }
685
iter_get_spi(struct bpf_verifier_env * env,struct bpf_reg_state * reg,int nr_slots)686 static int iter_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int nr_slots)
687 {
688 return stack_slot_obj_get_spi(env, reg, "iter", nr_slots);
689 }
690
irq_flag_get_spi(struct bpf_verifier_env * env,struct bpf_reg_state * reg)691 static int irq_flag_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
692 {
693 return stack_slot_obj_get_spi(env, reg, "irq_flag", 1);
694 }
695
arg_to_dynptr_type(enum bpf_arg_type arg_type)696 static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type)
697 {
698 switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
699 case DYNPTR_TYPE_LOCAL:
700 return BPF_DYNPTR_TYPE_LOCAL;
701 case DYNPTR_TYPE_RINGBUF:
702 return BPF_DYNPTR_TYPE_RINGBUF;
703 case DYNPTR_TYPE_SKB:
704 return BPF_DYNPTR_TYPE_SKB;
705 case DYNPTR_TYPE_XDP:
706 return BPF_DYNPTR_TYPE_XDP;
707 case DYNPTR_TYPE_SKB_META:
708 return BPF_DYNPTR_TYPE_SKB_META;
709 case DYNPTR_TYPE_FILE:
710 return BPF_DYNPTR_TYPE_FILE;
711 default:
712 return BPF_DYNPTR_TYPE_INVALID;
713 }
714 }
715
get_dynptr_type_flag(enum bpf_dynptr_type type)716 static enum bpf_type_flag get_dynptr_type_flag(enum bpf_dynptr_type type)
717 {
718 switch (type) {
719 case BPF_DYNPTR_TYPE_LOCAL:
720 return DYNPTR_TYPE_LOCAL;
721 case BPF_DYNPTR_TYPE_RINGBUF:
722 return DYNPTR_TYPE_RINGBUF;
723 case BPF_DYNPTR_TYPE_SKB:
724 return DYNPTR_TYPE_SKB;
725 case BPF_DYNPTR_TYPE_XDP:
726 return DYNPTR_TYPE_XDP;
727 case BPF_DYNPTR_TYPE_SKB_META:
728 return DYNPTR_TYPE_SKB_META;
729 case BPF_DYNPTR_TYPE_FILE:
730 return DYNPTR_TYPE_FILE;
731 default:
732 return 0;
733 }
734 }
735
dynptr_type_refcounted(enum bpf_dynptr_type type)736 static bool dynptr_type_refcounted(enum bpf_dynptr_type type)
737 {
738 return type == BPF_DYNPTR_TYPE_RINGBUF || type == BPF_DYNPTR_TYPE_FILE;
739 }
740
741 static void __mark_dynptr_reg(struct bpf_reg_state *reg,
742 enum bpf_dynptr_type type,
743 bool first_slot, int dynptr_id);
744
745 static void __mark_reg_not_init(const struct bpf_verifier_env *env,
746 struct bpf_reg_state *reg);
747
mark_dynptr_stack_regs(struct bpf_verifier_env * env,struct bpf_reg_state * sreg1,struct bpf_reg_state * sreg2,enum bpf_dynptr_type type)748 static void mark_dynptr_stack_regs(struct bpf_verifier_env *env,
749 struct bpf_reg_state *sreg1,
750 struct bpf_reg_state *sreg2,
751 enum bpf_dynptr_type type)
752 {
753 int id = ++env->id_gen;
754
755 __mark_dynptr_reg(sreg1, type, true, id);
756 __mark_dynptr_reg(sreg2, type, false, id);
757 }
758
mark_dynptr_cb_reg(struct bpf_verifier_env * env,struct bpf_reg_state * reg,enum bpf_dynptr_type type)759 static void mark_dynptr_cb_reg(struct bpf_verifier_env *env,
760 struct bpf_reg_state *reg,
761 enum bpf_dynptr_type type)
762 {
763 __mark_dynptr_reg(reg, type, true, ++env->id_gen);
764 }
765
766 static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
767 struct bpf_func_state *state, int spi);
768
mark_stack_slots_dynptr(struct bpf_verifier_env * env,struct bpf_reg_state * reg,enum bpf_arg_type arg_type,int insn_idx,int clone_ref_obj_id)769 static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
770 enum bpf_arg_type arg_type, int insn_idx, int clone_ref_obj_id)
771 {
772 struct bpf_func_state *state = func(env, reg);
773 enum bpf_dynptr_type type;
774 int spi, i, err;
775
776 spi = dynptr_get_spi(env, reg);
777 if (spi < 0)
778 return spi;
779
780 /* We cannot assume both spi and spi - 1 belong to the same dynptr,
781 * hence we need to call destroy_if_dynptr_stack_slot twice for both,
782 * to ensure that for the following example:
783 * [d1][d1][d2][d2]
784 * spi 3 2 1 0
785 * So marking spi = 2 should lead to destruction of both d1 and d2. In
786 * case they do belong to same dynptr, second call won't see slot_type
787 * as STACK_DYNPTR and will simply skip destruction.
788 */
789 err = destroy_if_dynptr_stack_slot(env, state, spi);
790 if (err)
791 return err;
792 err = destroy_if_dynptr_stack_slot(env, state, spi - 1);
793 if (err)
794 return err;
795
796 for (i = 0; i < BPF_REG_SIZE; i++) {
797 state->stack[spi].slot_type[i] = STACK_DYNPTR;
798 state->stack[spi - 1].slot_type[i] = STACK_DYNPTR;
799 }
800
801 type = arg_to_dynptr_type(arg_type);
802 if (type == BPF_DYNPTR_TYPE_INVALID)
803 return -EINVAL;
804
805 mark_dynptr_stack_regs(env, &state->stack[spi].spilled_ptr,
806 &state->stack[spi - 1].spilled_ptr, type);
807
808 if (dynptr_type_refcounted(type)) {
809 /* The id is used to track proper releasing */
810 int id;
811
812 if (clone_ref_obj_id)
813 id = clone_ref_obj_id;
814 else
815 id = acquire_reference(env, insn_idx);
816
817 if (id < 0)
818 return id;
819
820 state->stack[spi].spilled_ptr.ref_obj_id = id;
821 state->stack[spi - 1].spilled_ptr.ref_obj_id = id;
822 }
823
824 bpf_mark_stack_write(env, state->frameno, BIT(spi - 1) | BIT(spi));
825
826 return 0;
827 }
828
invalidate_dynptr(struct bpf_verifier_env * env,struct bpf_func_state * state,int spi)829 static void invalidate_dynptr(struct bpf_verifier_env *env, struct bpf_func_state *state, int spi)
830 {
831 int i;
832
833 for (i = 0; i < BPF_REG_SIZE; i++) {
834 state->stack[spi].slot_type[i] = STACK_INVALID;
835 state->stack[spi - 1].slot_type[i] = STACK_INVALID;
836 }
837
838 __mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
839 __mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
840
841 bpf_mark_stack_write(env, state->frameno, BIT(spi - 1) | BIT(spi));
842 }
843
unmark_stack_slots_dynptr(struct bpf_verifier_env * env,struct bpf_reg_state * reg)844 static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
845 {
846 struct bpf_func_state *state = func(env, reg);
847 int spi, ref_obj_id, i;
848
849 /*
850 * This can only be set for PTR_TO_STACK, as CONST_PTR_TO_DYNPTR cannot
851 * be released by any dynptr helper. Hence, unmark_stack_slots_dynptr
852 * is safe to do directly.
853 */
854 if (reg->type == CONST_PTR_TO_DYNPTR) {
855 verifier_bug(env, "CONST_PTR_TO_DYNPTR cannot be released");
856 return -EFAULT;
857 }
858 spi = dynptr_get_spi(env, reg);
859 if (spi < 0)
860 return spi;
861
862 if (!dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) {
863 invalidate_dynptr(env, state, spi);
864 return 0;
865 }
866
867 ref_obj_id = state->stack[spi].spilled_ptr.ref_obj_id;
868
869 /* If the dynptr has a ref_obj_id, then we need to invalidate
870 * two things:
871 *
872 * 1) Any dynptrs with a matching ref_obj_id (clones)
873 * 2) Any slices derived from this dynptr.
874 */
875
876 /* Invalidate any slices associated with this dynptr */
877 WARN_ON_ONCE(release_reference(env, ref_obj_id));
878
879 /* Invalidate any dynptr clones */
880 for (i = 1; i < state->allocated_stack / BPF_REG_SIZE; i++) {
881 if (state->stack[i].spilled_ptr.ref_obj_id != ref_obj_id)
882 continue;
883
884 /* it should always be the case that if the ref obj id
885 * matches then the stack slot also belongs to a
886 * dynptr
887 */
888 if (state->stack[i].slot_type[0] != STACK_DYNPTR) {
889 verifier_bug(env, "misconfigured ref_obj_id");
890 return -EFAULT;
891 }
892 if (state->stack[i].spilled_ptr.dynptr.first_slot)
893 invalidate_dynptr(env, state, i);
894 }
895
896 return 0;
897 }
898
899 static void __mark_reg_unknown(const struct bpf_verifier_env *env,
900 struct bpf_reg_state *reg);
901
mark_reg_invalid(const struct bpf_verifier_env * env,struct bpf_reg_state * reg)902 static void mark_reg_invalid(const struct bpf_verifier_env *env, struct bpf_reg_state *reg)
903 {
904 if (!env->allow_ptr_leaks)
905 __mark_reg_not_init(env, reg);
906 else
907 __mark_reg_unknown(env, reg);
908 }
909
destroy_if_dynptr_stack_slot(struct bpf_verifier_env * env,struct bpf_func_state * state,int spi)910 static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
911 struct bpf_func_state *state, int spi)
912 {
913 struct bpf_func_state *fstate;
914 struct bpf_reg_state *dreg;
915 int i, dynptr_id;
916
917 /* We always ensure that STACK_DYNPTR is never set partially,
918 * hence just checking for slot_type[0] is enough. This is
919 * different for STACK_SPILL, where it may be only set for
920 * 1 byte, so code has to use is_spilled_reg.
921 */
922 if (state->stack[spi].slot_type[0] != STACK_DYNPTR)
923 return 0;
924
925 /* Reposition spi to first slot */
926 if (!state->stack[spi].spilled_ptr.dynptr.first_slot)
927 spi = spi + 1;
928
929 if (dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) {
930 verbose(env, "cannot overwrite referenced dynptr\n");
931 return -EINVAL;
932 }
933
934 mark_stack_slot_scratched(env, spi);
935 mark_stack_slot_scratched(env, spi - 1);
936
937 /* Writing partially to one dynptr stack slot destroys both. */
938 for (i = 0; i < BPF_REG_SIZE; i++) {
939 state->stack[spi].slot_type[i] = STACK_INVALID;
940 state->stack[spi - 1].slot_type[i] = STACK_INVALID;
941 }
942
943 dynptr_id = state->stack[spi].spilled_ptr.id;
944 /* Invalidate any slices associated with this dynptr */
945 bpf_for_each_reg_in_vstate(env->cur_state, fstate, dreg, ({
946 /* Dynptr slices are only PTR_TO_MEM_OR_NULL and PTR_TO_MEM */
947 if (dreg->type != (PTR_TO_MEM | PTR_MAYBE_NULL) && dreg->type != PTR_TO_MEM)
948 continue;
949 if (dreg->dynptr_id == dynptr_id)
950 mark_reg_invalid(env, dreg);
951 }));
952
953 /* Do not release reference state, we are destroying dynptr on stack,
954 * not using some helper to release it. Just reset register.
955 */
956 __mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
957 __mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
958
959 bpf_mark_stack_write(env, state->frameno, BIT(spi - 1) | BIT(spi));
960
961 return 0;
962 }
963
is_dynptr_reg_valid_uninit(struct bpf_verifier_env * env,struct bpf_reg_state * reg)964 static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
965 {
966 int spi;
967
968 if (reg->type == CONST_PTR_TO_DYNPTR)
969 return false;
970
971 spi = dynptr_get_spi(env, reg);
972
973 /* -ERANGE (i.e. spi not falling into allocated stack slots) isn't an
974 * error because this just means the stack state hasn't been updated yet.
975 * We will do check_mem_access to check and update stack bounds later.
976 */
977 if (spi < 0 && spi != -ERANGE)
978 return false;
979
980 /* We don't need to check if the stack slots are marked by previous
981 * dynptr initializations because we allow overwriting existing unreferenced
982 * STACK_DYNPTR slots, see mark_stack_slots_dynptr which calls
983 * destroy_if_dynptr_stack_slot to ensure dynptr objects at the slots we are
984 * touching are completely destructed before we reinitialize them for a new
985 * one. For referenced ones, destroy_if_dynptr_stack_slot returns an error early
986 * instead of delaying it until the end where the user will get "Unreleased
987 * reference" error.
988 */
989 return true;
990 }
991
is_dynptr_reg_valid_init(struct bpf_verifier_env * env,struct bpf_reg_state * reg)992 static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
993 {
994 struct bpf_func_state *state = func(env, reg);
995 int i, spi;
996
997 /* This already represents first slot of initialized bpf_dynptr.
998 *
999 * CONST_PTR_TO_DYNPTR already has fixed and var_off as 0 due to
1000 * check_func_arg_reg_off's logic, so we don't need to check its
1001 * offset and alignment.
1002 */
1003 if (reg->type == CONST_PTR_TO_DYNPTR)
1004 return true;
1005
1006 spi = dynptr_get_spi(env, reg);
1007 if (spi < 0)
1008 return false;
1009 if (!state->stack[spi].spilled_ptr.dynptr.first_slot)
1010 return false;
1011
1012 for (i = 0; i < BPF_REG_SIZE; i++) {
1013 if (state->stack[spi].slot_type[i] != STACK_DYNPTR ||
1014 state->stack[spi - 1].slot_type[i] != STACK_DYNPTR)
1015 return false;
1016 }
1017
1018 return true;
1019 }
1020
is_dynptr_type_expected(struct bpf_verifier_env * env,struct bpf_reg_state * reg,enum bpf_arg_type arg_type)1021 static bool is_dynptr_type_expected(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
1022 enum bpf_arg_type arg_type)
1023 {
1024 struct bpf_func_state *state = func(env, reg);
1025 enum bpf_dynptr_type dynptr_type;
1026 int spi;
1027
1028 /* ARG_PTR_TO_DYNPTR takes any type of dynptr */
1029 if (arg_type == ARG_PTR_TO_DYNPTR)
1030 return true;
1031
1032 dynptr_type = arg_to_dynptr_type(arg_type);
1033 if (reg->type == CONST_PTR_TO_DYNPTR) {
1034 return reg->dynptr.type == dynptr_type;
1035 } else {
1036 spi = dynptr_get_spi(env, reg);
1037 if (spi < 0)
1038 return false;
1039 return state->stack[spi].spilled_ptr.dynptr.type == dynptr_type;
1040 }
1041 }
1042
1043 static void __mark_reg_known_zero(struct bpf_reg_state *reg);
1044
1045 static bool in_rcu_cs(struct bpf_verifier_env *env);
1046
1047 static bool is_kfunc_rcu_protected(struct bpf_kfunc_call_arg_meta *meta);
1048
mark_stack_slots_iter(struct bpf_verifier_env * env,struct bpf_kfunc_call_arg_meta * meta,struct bpf_reg_state * reg,int insn_idx,struct btf * btf,u32 btf_id,int nr_slots)1049 static int mark_stack_slots_iter(struct bpf_verifier_env *env,
1050 struct bpf_kfunc_call_arg_meta *meta,
1051 struct bpf_reg_state *reg, int insn_idx,
1052 struct btf *btf, u32 btf_id, int nr_slots)
1053 {
1054 struct bpf_func_state *state = func(env, reg);
1055 int spi, i, j, id;
1056
1057 spi = iter_get_spi(env, reg, nr_slots);
1058 if (spi < 0)
1059 return spi;
1060
1061 id = acquire_reference(env, insn_idx);
1062 if (id < 0)
1063 return id;
1064
1065 for (i = 0; i < nr_slots; i++) {
1066 struct bpf_stack_state *slot = &state->stack[spi - i];
1067 struct bpf_reg_state *st = &slot->spilled_ptr;
1068
1069 __mark_reg_known_zero(st);
1070 st->type = PTR_TO_STACK; /* we don't have dedicated reg type */
1071 if (is_kfunc_rcu_protected(meta)) {
1072 if (in_rcu_cs(env))
1073 st->type |= MEM_RCU;
1074 else
1075 st->type |= PTR_UNTRUSTED;
1076 }
1077 st->ref_obj_id = i == 0 ? id : 0;
1078 st->iter.btf = btf;
1079 st->iter.btf_id = btf_id;
1080 st->iter.state = BPF_ITER_STATE_ACTIVE;
1081 st->iter.depth = 0;
1082
1083 for (j = 0; j < BPF_REG_SIZE; j++)
1084 slot->slot_type[j] = STACK_ITER;
1085
1086 bpf_mark_stack_write(env, state->frameno, BIT(spi - i));
1087 mark_stack_slot_scratched(env, spi - i);
1088 }
1089
1090 return 0;
1091 }
1092
unmark_stack_slots_iter(struct bpf_verifier_env * env,struct bpf_reg_state * reg,int nr_slots)1093 static int unmark_stack_slots_iter(struct bpf_verifier_env *env,
1094 struct bpf_reg_state *reg, int nr_slots)
1095 {
1096 struct bpf_func_state *state = func(env, reg);
1097 int spi, i, j;
1098
1099 spi = iter_get_spi(env, reg, nr_slots);
1100 if (spi < 0)
1101 return spi;
1102
1103 for (i = 0; i < nr_slots; i++) {
1104 struct bpf_stack_state *slot = &state->stack[spi - i];
1105 struct bpf_reg_state *st = &slot->spilled_ptr;
1106
1107 if (i == 0)
1108 WARN_ON_ONCE(release_reference(env, st->ref_obj_id));
1109
1110 __mark_reg_not_init(env, st);
1111
1112 for (j = 0; j < BPF_REG_SIZE; j++)
1113 slot->slot_type[j] = STACK_INVALID;
1114
1115 bpf_mark_stack_write(env, state->frameno, BIT(spi - i));
1116 mark_stack_slot_scratched(env, spi - i);
1117 }
1118
1119 return 0;
1120 }
1121
is_iter_reg_valid_uninit(struct bpf_verifier_env * env,struct bpf_reg_state * reg,int nr_slots)1122 static bool is_iter_reg_valid_uninit(struct bpf_verifier_env *env,
1123 struct bpf_reg_state *reg, int nr_slots)
1124 {
1125 struct bpf_func_state *state = func(env, reg);
1126 int spi, i, j;
1127
1128 /* For -ERANGE (i.e. spi not falling into allocated stack slots), we
1129 * will do check_mem_access to check and update stack bounds later, so
1130 * return true for that case.
1131 */
1132 spi = iter_get_spi(env, reg, nr_slots);
1133 if (spi == -ERANGE)
1134 return true;
1135 if (spi < 0)
1136 return false;
1137
1138 for (i = 0; i < nr_slots; i++) {
1139 struct bpf_stack_state *slot = &state->stack[spi - i];
1140
1141 for (j = 0; j < BPF_REG_SIZE; j++)
1142 if (slot->slot_type[j] == STACK_ITER)
1143 return false;
1144 }
1145
1146 return true;
1147 }
1148
is_iter_reg_valid_init(struct bpf_verifier_env * env,struct bpf_reg_state * reg,struct btf * btf,u32 btf_id,int nr_slots)1149 static int is_iter_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
1150 struct btf *btf, u32 btf_id, int nr_slots)
1151 {
1152 struct bpf_func_state *state = func(env, reg);
1153 int spi, i, j;
1154
1155 spi = iter_get_spi(env, reg, nr_slots);
1156 if (spi < 0)
1157 return -EINVAL;
1158
1159 for (i = 0; i < nr_slots; i++) {
1160 struct bpf_stack_state *slot = &state->stack[spi - i];
1161 struct bpf_reg_state *st = &slot->spilled_ptr;
1162
1163 if (st->type & PTR_UNTRUSTED)
1164 return -EPROTO;
1165 /* only main (first) slot has ref_obj_id set */
1166 if (i == 0 && !st->ref_obj_id)
1167 return -EINVAL;
1168 if (i != 0 && st->ref_obj_id)
1169 return -EINVAL;
1170 if (st->iter.btf != btf || st->iter.btf_id != btf_id)
1171 return -EINVAL;
1172
1173 for (j = 0; j < BPF_REG_SIZE; j++)
1174 if (slot->slot_type[j] != STACK_ITER)
1175 return -EINVAL;
1176 }
1177
1178 return 0;
1179 }
1180
1181 static int acquire_irq_state(struct bpf_verifier_env *env, int insn_idx);
1182 static int release_irq_state(struct bpf_verifier_state *state, int id);
1183
mark_stack_slot_irq_flag(struct bpf_verifier_env * env,struct bpf_kfunc_call_arg_meta * meta,struct bpf_reg_state * reg,int insn_idx,int kfunc_class)1184 static int mark_stack_slot_irq_flag(struct bpf_verifier_env *env,
1185 struct bpf_kfunc_call_arg_meta *meta,
1186 struct bpf_reg_state *reg, int insn_idx,
1187 int kfunc_class)
1188 {
1189 struct bpf_func_state *state = func(env, reg);
1190 struct bpf_stack_state *slot;
1191 struct bpf_reg_state *st;
1192 int spi, i, id;
1193
1194 spi = irq_flag_get_spi(env, reg);
1195 if (spi < 0)
1196 return spi;
1197
1198 id = acquire_irq_state(env, insn_idx);
1199 if (id < 0)
1200 return id;
1201
1202 slot = &state->stack[spi];
1203 st = &slot->spilled_ptr;
1204
1205 bpf_mark_stack_write(env, reg->frameno, BIT(spi));
1206 __mark_reg_known_zero(st);
1207 st->type = PTR_TO_STACK; /* we don't have dedicated reg type */
1208 st->ref_obj_id = id;
1209 st->irq.kfunc_class = kfunc_class;
1210
1211 for (i = 0; i < BPF_REG_SIZE; i++)
1212 slot->slot_type[i] = STACK_IRQ_FLAG;
1213
1214 mark_stack_slot_scratched(env, spi);
1215 return 0;
1216 }
1217
unmark_stack_slot_irq_flag(struct bpf_verifier_env * env,struct bpf_reg_state * reg,int kfunc_class)1218 static int unmark_stack_slot_irq_flag(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
1219 int kfunc_class)
1220 {
1221 struct bpf_func_state *state = func(env, reg);
1222 struct bpf_stack_state *slot;
1223 struct bpf_reg_state *st;
1224 int spi, i, err;
1225
1226 spi = irq_flag_get_spi(env, reg);
1227 if (spi < 0)
1228 return spi;
1229
1230 slot = &state->stack[spi];
1231 st = &slot->spilled_ptr;
1232
1233 if (st->irq.kfunc_class != kfunc_class) {
1234 const char *flag_kfunc = st->irq.kfunc_class == IRQ_NATIVE_KFUNC ? "native" : "lock";
1235 const char *used_kfunc = kfunc_class == IRQ_NATIVE_KFUNC ? "native" : "lock";
1236
1237 verbose(env, "irq flag acquired by %s kfuncs cannot be restored with %s kfuncs\n",
1238 flag_kfunc, used_kfunc);
1239 return -EINVAL;
1240 }
1241
1242 err = release_irq_state(env->cur_state, st->ref_obj_id);
1243 WARN_ON_ONCE(err && err != -EACCES);
1244 if (err) {
1245 int insn_idx = 0;
1246
1247 for (int i = 0; i < env->cur_state->acquired_refs; i++) {
1248 if (env->cur_state->refs[i].id == env->cur_state->active_irq_id) {
1249 insn_idx = env->cur_state->refs[i].insn_idx;
1250 break;
1251 }
1252 }
1253
1254 verbose(env, "cannot restore irq state out of order, expected id=%d acquired at insn_idx=%d\n",
1255 env->cur_state->active_irq_id, insn_idx);
1256 return err;
1257 }
1258
1259 __mark_reg_not_init(env, st);
1260
1261 bpf_mark_stack_write(env, reg->frameno, BIT(spi));
1262
1263 for (i = 0; i < BPF_REG_SIZE; i++)
1264 slot->slot_type[i] = STACK_INVALID;
1265
1266 mark_stack_slot_scratched(env, spi);
1267 return 0;
1268 }
1269
is_irq_flag_reg_valid_uninit(struct bpf_verifier_env * env,struct bpf_reg_state * reg)1270 static bool is_irq_flag_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
1271 {
1272 struct bpf_func_state *state = func(env, reg);
1273 struct bpf_stack_state *slot;
1274 int spi, i;
1275
1276 /* For -ERANGE (i.e. spi not falling into allocated stack slots), we
1277 * will do check_mem_access to check and update stack bounds later, so
1278 * return true for that case.
1279 */
1280 spi = irq_flag_get_spi(env, reg);
1281 if (spi == -ERANGE)
1282 return true;
1283 if (spi < 0)
1284 return false;
1285
1286 slot = &state->stack[spi];
1287
1288 for (i = 0; i < BPF_REG_SIZE; i++)
1289 if (slot->slot_type[i] == STACK_IRQ_FLAG)
1290 return false;
1291 return true;
1292 }
1293
is_irq_flag_reg_valid_init(struct bpf_verifier_env * env,struct bpf_reg_state * reg)1294 static int is_irq_flag_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
1295 {
1296 struct bpf_func_state *state = func(env, reg);
1297 struct bpf_stack_state *slot;
1298 struct bpf_reg_state *st;
1299 int spi, i;
1300
1301 spi = irq_flag_get_spi(env, reg);
1302 if (spi < 0)
1303 return -EINVAL;
1304
1305 slot = &state->stack[spi];
1306 st = &slot->spilled_ptr;
1307
1308 if (!st->ref_obj_id)
1309 return -EINVAL;
1310
1311 for (i = 0; i < BPF_REG_SIZE; i++)
1312 if (slot->slot_type[i] != STACK_IRQ_FLAG)
1313 return -EINVAL;
1314 return 0;
1315 }
1316
1317 /* Check if given stack slot is "special":
1318 * - spilled register state (STACK_SPILL);
1319 * - dynptr state (STACK_DYNPTR);
1320 * - iter state (STACK_ITER).
1321 * - irq flag state (STACK_IRQ_FLAG)
1322 */
is_stack_slot_special(const struct bpf_stack_state * stack)1323 static bool is_stack_slot_special(const struct bpf_stack_state *stack)
1324 {
1325 enum bpf_stack_slot_type type = stack->slot_type[BPF_REG_SIZE - 1];
1326
1327 switch (type) {
1328 case STACK_SPILL:
1329 case STACK_DYNPTR:
1330 case STACK_ITER:
1331 case STACK_IRQ_FLAG:
1332 return true;
1333 case STACK_INVALID:
1334 case STACK_MISC:
1335 case STACK_ZERO:
1336 return false;
1337 default:
1338 WARN_ONCE(1, "unknown stack slot type %d\n", type);
1339 return true;
1340 }
1341 }
1342
1343 /* The reg state of a pointer or a bounded scalar was saved when
1344 * it was spilled to the stack.
1345 */
is_spilled_reg(const struct bpf_stack_state * stack)1346 static bool is_spilled_reg(const struct bpf_stack_state *stack)
1347 {
1348 return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL;
1349 }
1350
is_spilled_scalar_reg(const struct bpf_stack_state * stack)1351 static bool is_spilled_scalar_reg(const struct bpf_stack_state *stack)
1352 {
1353 return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL &&
1354 stack->spilled_ptr.type == SCALAR_VALUE;
1355 }
1356
is_spilled_scalar_reg64(const struct bpf_stack_state * stack)1357 static bool is_spilled_scalar_reg64(const struct bpf_stack_state *stack)
1358 {
1359 return stack->slot_type[0] == STACK_SPILL &&
1360 stack->spilled_ptr.type == SCALAR_VALUE;
1361 }
1362
1363 /* Mark stack slot as STACK_MISC, unless it is already STACK_INVALID, in which
1364 * case they are equivalent, or it's STACK_ZERO, in which case we preserve
1365 * more precise STACK_ZERO.
1366 * Regardless of allow_ptr_leaks setting (i.e., privileged or unprivileged
1367 * mode), we won't promote STACK_INVALID to STACK_MISC. In privileged case it is
1368 * unnecessary as both are considered equivalent when loading data and pruning,
1369 * in case of unprivileged mode it will be incorrect to allow reads of invalid
1370 * slots.
1371 */
mark_stack_slot_misc(struct bpf_verifier_env * env,u8 * stype)1372 static void mark_stack_slot_misc(struct bpf_verifier_env *env, u8 *stype)
1373 {
1374 if (*stype == STACK_ZERO)
1375 return;
1376 if (*stype == STACK_INVALID)
1377 return;
1378 *stype = STACK_MISC;
1379 }
1380
scrub_spilled_slot(u8 * stype)1381 static void scrub_spilled_slot(u8 *stype)
1382 {
1383 if (*stype != STACK_INVALID)
1384 *stype = STACK_MISC;
1385 }
1386
1387 /* copy array src of length n * size bytes to dst. dst is reallocated if it's too
1388 * small to hold src. This is different from krealloc since we don't want to preserve
1389 * the contents of dst.
1390 *
1391 * Leaves dst untouched if src is NULL or length is zero. Returns NULL if memory could
1392 * not be allocated.
1393 */
copy_array(void * dst,const void * src,size_t n,size_t size,gfp_t flags)1394 static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t flags)
1395 {
1396 size_t alloc_bytes;
1397 void *orig = dst;
1398 size_t bytes;
1399
1400 if (ZERO_OR_NULL_PTR(src))
1401 goto out;
1402
1403 if (unlikely(check_mul_overflow(n, size, &bytes)))
1404 return NULL;
1405
1406 alloc_bytes = max(ksize(orig), kmalloc_size_roundup(bytes));
1407 dst = krealloc(orig, alloc_bytes, flags);
1408 if (!dst) {
1409 kfree(orig);
1410 return NULL;
1411 }
1412
1413 memcpy(dst, src, bytes);
1414 out:
1415 return dst ? dst : ZERO_SIZE_PTR;
1416 }
1417
1418 /* resize an array from old_n items to new_n items. the array is reallocated if it's too
1419 * small to hold new_n items. new items are zeroed out if the array grows.
1420 *
1421 * Contrary to krealloc_array, does not free arr if new_n is zero.
1422 */
realloc_array(void * arr,size_t old_n,size_t new_n,size_t size)1423 static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size)
1424 {
1425 size_t alloc_size;
1426 void *new_arr;
1427
1428 if (!new_n || old_n == new_n)
1429 goto out;
1430
1431 alloc_size = kmalloc_size_roundup(size_mul(new_n, size));
1432 new_arr = krealloc(arr, alloc_size, GFP_KERNEL_ACCOUNT);
1433 if (!new_arr) {
1434 kfree(arr);
1435 return NULL;
1436 }
1437 arr = new_arr;
1438
1439 if (new_n > old_n)
1440 memset(arr + old_n * size, 0, (new_n - old_n) * size);
1441
1442 out:
1443 return arr ? arr : ZERO_SIZE_PTR;
1444 }
1445
copy_reference_state(struct bpf_verifier_state * dst,const struct bpf_verifier_state * src)1446 static int copy_reference_state(struct bpf_verifier_state *dst, const struct bpf_verifier_state *src)
1447 {
1448 dst->refs = copy_array(dst->refs, src->refs, src->acquired_refs,
1449 sizeof(struct bpf_reference_state), GFP_KERNEL_ACCOUNT);
1450 if (!dst->refs)
1451 return -ENOMEM;
1452
1453 dst->acquired_refs = src->acquired_refs;
1454 dst->active_locks = src->active_locks;
1455 dst->active_preempt_locks = src->active_preempt_locks;
1456 dst->active_rcu_locks = src->active_rcu_locks;
1457 dst->active_irq_id = src->active_irq_id;
1458 dst->active_lock_id = src->active_lock_id;
1459 dst->active_lock_ptr = src->active_lock_ptr;
1460 return 0;
1461 }
1462
copy_stack_state(struct bpf_func_state * dst,const struct bpf_func_state * src)1463 static int copy_stack_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
1464 {
1465 size_t n = src->allocated_stack / BPF_REG_SIZE;
1466
1467 dst->stack = copy_array(dst->stack, src->stack, n, sizeof(struct bpf_stack_state),
1468 GFP_KERNEL_ACCOUNT);
1469 if (!dst->stack)
1470 return -ENOMEM;
1471
1472 dst->allocated_stack = src->allocated_stack;
1473 return 0;
1474 }
1475
resize_reference_state(struct bpf_verifier_state * state,size_t n)1476 static int resize_reference_state(struct bpf_verifier_state *state, size_t n)
1477 {
1478 state->refs = realloc_array(state->refs, state->acquired_refs, n,
1479 sizeof(struct bpf_reference_state));
1480 if (!state->refs)
1481 return -ENOMEM;
1482
1483 state->acquired_refs = n;
1484 return 0;
1485 }
1486
1487 /* Possibly update state->allocated_stack to be at least size bytes. Also
1488 * possibly update the function's high-water mark in its bpf_subprog_info.
1489 */
grow_stack_state(struct bpf_verifier_env * env,struct bpf_func_state * state,int size)1490 static int grow_stack_state(struct bpf_verifier_env *env, struct bpf_func_state *state, int size)
1491 {
1492 size_t old_n = state->allocated_stack / BPF_REG_SIZE, n;
1493
1494 /* The stack size is always a multiple of BPF_REG_SIZE. */
1495 size = round_up(size, BPF_REG_SIZE);
1496 n = size / BPF_REG_SIZE;
1497
1498 if (old_n >= n)
1499 return 0;
1500
1501 state->stack = realloc_array(state->stack, old_n, n, sizeof(struct bpf_stack_state));
1502 if (!state->stack)
1503 return -ENOMEM;
1504
1505 state->allocated_stack = size;
1506
1507 /* update known max for given subprogram */
1508 if (env->subprog_info[state->subprogno].stack_depth < size)
1509 env->subprog_info[state->subprogno].stack_depth = size;
1510
1511 return 0;
1512 }
1513
1514 /* Acquire a pointer id from the env and update the state->refs to include
1515 * this new pointer reference.
1516 * On success, returns a valid pointer id to associate with the register
1517 * On failure, returns a negative errno.
1518 */
acquire_reference_state(struct bpf_verifier_env * env,int insn_idx)1519 static struct bpf_reference_state *acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
1520 {
1521 struct bpf_verifier_state *state = env->cur_state;
1522 int new_ofs = state->acquired_refs;
1523 int err;
1524
1525 err = resize_reference_state(state, state->acquired_refs + 1);
1526 if (err)
1527 return NULL;
1528 state->refs[new_ofs].insn_idx = insn_idx;
1529
1530 return &state->refs[new_ofs];
1531 }
1532
acquire_reference(struct bpf_verifier_env * env,int insn_idx)1533 static int acquire_reference(struct bpf_verifier_env *env, int insn_idx)
1534 {
1535 struct bpf_reference_state *s;
1536
1537 s = acquire_reference_state(env, insn_idx);
1538 if (!s)
1539 return -ENOMEM;
1540 s->type = REF_TYPE_PTR;
1541 s->id = ++env->id_gen;
1542 return s->id;
1543 }
1544
acquire_lock_state(struct bpf_verifier_env * env,int insn_idx,enum ref_state_type type,int id,void * ptr)1545 static int acquire_lock_state(struct bpf_verifier_env *env, int insn_idx, enum ref_state_type type,
1546 int id, void *ptr)
1547 {
1548 struct bpf_verifier_state *state = env->cur_state;
1549 struct bpf_reference_state *s;
1550
1551 s = acquire_reference_state(env, insn_idx);
1552 if (!s)
1553 return -ENOMEM;
1554 s->type = type;
1555 s->id = id;
1556 s->ptr = ptr;
1557
1558 state->active_locks++;
1559 state->active_lock_id = id;
1560 state->active_lock_ptr = ptr;
1561 return 0;
1562 }
1563
acquire_irq_state(struct bpf_verifier_env * env,int insn_idx)1564 static int acquire_irq_state(struct bpf_verifier_env *env, int insn_idx)
1565 {
1566 struct bpf_verifier_state *state = env->cur_state;
1567 struct bpf_reference_state *s;
1568
1569 s = acquire_reference_state(env, insn_idx);
1570 if (!s)
1571 return -ENOMEM;
1572 s->type = REF_TYPE_IRQ;
1573 s->id = ++env->id_gen;
1574
1575 state->active_irq_id = s->id;
1576 return s->id;
1577 }
1578
release_reference_state(struct bpf_verifier_state * state,int idx)1579 static void release_reference_state(struct bpf_verifier_state *state, int idx)
1580 {
1581 int last_idx;
1582 size_t rem;
1583
1584 /* IRQ state requires the relative ordering of elements remaining the
1585 * same, since it relies on the refs array to behave as a stack, so that
1586 * it can detect out-of-order IRQ restore. Hence use memmove to shift
1587 * the array instead of swapping the final element into the deleted idx.
1588 */
1589 last_idx = state->acquired_refs - 1;
1590 rem = state->acquired_refs - idx - 1;
1591 if (last_idx && idx != last_idx)
1592 memmove(&state->refs[idx], &state->refs[idx + 1], sizeof(*state->refs) * rem);
1593 memset(&state->refs[last_idx], 0, sizeof(*state->refs));
1594 state->acquired_refs--;
1595 return;
1596 }
1597
find_reference_state(struct bpf_verifier_state * state,int ptr_id)1598 static bool find_reference_state(struct bpf_verifier_state *state, int ptr_id)
1599 {
1600 int i;
1601
1602 for (i = 0; i < state->acquired_refs; i++)
1603 if (state->refs[i].id == ptr_id)
1604 return true;
1605
1606 return false;
1607 }
1608
release_lock_state(struct bpf_verifier_state * state,int type,int id,void * ptr)1609 static int release_lock_state(struct bpf_verifier_state *state, int type, int id, void *ptr)
1610 {
1611 void *prev_ptr = NULL;
1612 u32 prev_id = 0;
1613 int i;
1614
1615 for (i = 0; i < state->acquired_refs; i++) {
1616 if (state->refs[i].type == type && state->refs[i].id == id &&
1617 state->refs[i].ptr == ptr) {
1618 release_reference_state(state, i);
1619 state->active_locks--;
1620 /* Reassign active lock (id, ptr). */
1621 state->active_lock_id = prev_id;
1622 state->active_lock_ptr = prev_ptr;
1623 return 0;
1624 }
1625 if (state->refs[i].type & REF_TYPE_LOCK_MASK) {
1626 prev_id = state->refs[i].id;
1627 prev_ptr = state->refs[i].ptr;
1628 }
1629 }
1630 return -EINVAL;
1631 }
1632
release_irq_state(struct bpf_verifier_state * state,int id)1633 static int release_irq_state(struct bpf_verifier_state *state, int id)
1634 {
1635 u32 prev_id = 0;
1636 int i;
1637
1638 if (id != state->active_irq_id)
1639 return -EACCES;
1640
1641 for (i = 0; i < state->acquired_refs; i++) {
1642 if (state->refs[i].type != REF_TYPE_IRQ)
1643 continue;
1644 if (state->refs[i].id == id) {
1645 release_reference_state(state, i);
1646 state->active_irq_id = prev_id;
1647 return 0;
1648 } else {
1649 prev_id = state->refs[i].id;
1650 }
1651 }
1652 return -EINVAL;
1653 }
1654
find_lock_state(struct bpf_verifier_state * state,enum ref_state_type type,int id,void * ptr)1655 static struct bpf_reference_state *find_lock_state(struct bpf_verifier_state *state, enum ref_state_type type,
1656 int id, void *ptr)
1657 {
1658 int i;
1659
1660 for (i = 0; i < state->acquired_refs; i++) {
1661 struct bpf_reference_state *s = &state->refs[i];
1662
1663 if (!(s->type & type))
1664 continue;
1665
1666 if (s->id == id && s->ptr == ptr)
1667 return s;
1668 }
1669 return NULL;
1670 }
1671
update_peak_states(struct bpf_verifier_env * env)1672 static void update_peak_states(struct bpf_verifier_env *env)
1673 {
1674 u32 cur_states;
1675
1676 cur_states = env->explored_states_size + env->free_list_size + env->num_backedges;
1677 env->peak_states = max(env->peak_states, cur_states);
1678 }
1679
free_func_state(struct bpf_func_state * state)1680 static void free_func_state(struct bpf_func_state *state)
1681 {
1682 if (!state)
1683 return;
1684 kfree(state->stack);
1685 kfree(state);
1686 }
1687
clear_jmp_history(struct bpf_verifier_state * state)1688 static void clear_jmp_history(struct bpf_verifier_state *state)
1689 {
1690 kfree(state->jmp_history);
1691 state->jmp_history = NULL;
1692 state->jmp_history_cnt = 0;
1693 }
1694
free_verifier_state(struct bpf_verifier_state * state,bool free_self)1695 static void free_verifier_state(struct bpf_verifier_state *state,
1696 bool free_self)
1697 {
1698 int i;
1699
1700 for (i = 0; i <= state->curframe; i++) {
1701 free_func_state(state->frame[i]);
1702 state->frame[i] = NULL;
1703 }
1704 kfree(state->refs);
1705 clear_jmp_history(state);
1706 if (free_self)
1707 kfree(state);
1708 }
1709
1710 /* struct bpf_verifier_state->parent refers to states
1711 * that are in either of env->{expored_states,free_list}.
1712 * In both cases the state is contained in struct bpf_verifier_state_list.
1713 */
state_parent_as_list(struct bpf_verifier_state * st)1714 static struct bpf_verifier_state_list *state_parent_as_list(struct bpf_verifier_state *st)
1715 {
1716 if (st->parent)
1717 return container_of(st->parent, struct bpf_verifier_state_list, state);
1718 return NULL;
1719 }
1720
1721 static bool incomplete_read_marks(struct bpf_verifier_env *env,
1722 struct bpf_verifier_state *st);
1723
1724 /* A state can be freed if it is no longer referenced:
1725 * - is in the env->free_list;
1726 * - has no children states;
1727 */
maybe_free_verifier_state(struct bpf_verifier_env * env,struct bpf_verifier_state_list * sl)1728 static void maybe_free_verifier_state(struct bpf_verifier_env *env,
1729 struct bpf_verifier_state_list *sl)
1730 {
1731 if (!sl->in_free_list
1732 || sl->state.branches != 0
1733 || incomplete_read_marks(env, &sl->state))
1734 return;
1735 list_del(&sl->node);
1736 free_verifier_state(&sl->state, false);
1737 kfree(sl);
1738 env->free_list_size--;
1739 }
1740
1741 /* copy verifier state from src to dst growing dst stack space
1742 * when necessary to accommodate larger src stack
1743 */
copy_func_state(struct bpf_func_state * dst,const struct bpf_func_state * src)1744 static int copy_func_state(struct bpf_func_state *dst,
1745 const struct bpf_func_state *src)
1746 {
1747 memcpy(dst, src, offsetof(struct bpf_func_state, stack));
1748 return copy_stack_state(dst, src);
1749 }
1750
copy_verifier_state(struct bpf_verifier_state * dst_state,const struct bpf_verifier_state * src)1751 static int copy_verifier_state(struct bpf_verifier_state *dst_state,
1752 const struct bpf_verifier_state *src)
1753 {
1754 struct bpf_func_state *dst;
1755 int i, err;
1756
1757 dst_state->jmp_history = copy_array(dst_state->jmp_history, src->jmp_history,
1758 src->jmp_history_cnt, sizeof(*dst_state->jmp_history),
1759 GFP_KERNEL_ACCOUNT);
1760 if (!dst_state->jmp_history)
1761 return -ENOMEM;
1762 dst_state->jmp_history_cnt = src->jmp_history_cnt;
1763
1764 /* if dst has more stack frames then src frame, free them, this is also
1765 * necessary in case of exceptional exits using bpf_throw.
1766 */
1767 for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
1768 free_func_state(dst_state->frame[i]);
1769 dst_state->frame[i] = NULL;
1770 }
1771 err = copy_reference_state(dst_state, src);
1772 if (err)
1773 return err;
1774 dst_state->speculative = src->speculative;
1775 dst_state->in_sleepable = src->in_sleepable;
1776 dst_state->cleaned = src->cleaned;
1777 dst_state->curframe = src->curframe;
1778 dst_state->branches = src->branches;
1779 dst_state->parent = src->parent;
1780 dst_state->first_insn_idx = src->first_insn_idx;
1781 dst_state->last_insn_idx = src->last_insn_idx;
1782 dst_state->dfs_depth = src->dfs_depth;
1783 dst_state->callback_unroll_depth = src->callback_unroll_depth;
1784 dst_state->may_goto_depth = src->may_goto_depth;
1785 dst_state->equal_state = src->equal_state;
1786 for (i = 0; i <= src->curframe; i++) {
1787 dst = dst_state->frame[i];
1788 if (!dst) {
1789 dst = kzalloc_obj(*dst, GFP_KERNEL_ACCOUNT);
1790 if (!dst)
1791 return -ENOMEM;
1792 dst_state->frame[i] = dst;
1793 }
1794 err = copy_func_state(dst, src->frame[i]);
1795 if (err)
1796 return err;
1797 }
1798 return 0;
1799 }
1800
state_htab_size(struct bpf_verifier_env * env)1801 static u32 state_htab_size(struct bpf_verifier_env *env)
1802 {
1803 return env->prog->len;
1804 }
1805
explored_state(struct bpf_verifier_env * env,int idx)1806 static struct list_head *explored_state(struct bpf_verifier_env *env, int idx)
1807 {
1808 struct bpf_verifier_state *cur = env->cur_state;
1809 struct bpf_func_state *state = cur->frame[cur->curframe];
1810
1811 return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
1812 }
1813
same_callsites(struct bpf_verifier_state * a,struct bpf_verifier_state * b)1814 static bool same_callsites(struct bpf_verifier_state *a, struct bpf_verifier_state *b)
1815 {
1816 int fr;
1817
1818 if (a->curframe != b->curframe)
1819 return false;
1820
1821 for (fr = a->curframe; fr >= 0; fr--)
1822 if (a->frame[fr]->callsite != b->frame[fr]->callsite)
1823 return false;
1824
1825 return true;
1826 }
1827
1828 /* Return IP for a given frame in a call stack */
frame_insn_idx(struct bpf_verifier_state * st,u32 frame)1829 static u32 frame_insn_idx(struct bpf_verifier_state *st, u32 frame)
1830 {
1831 return frame == st->curframe
1832 ? st->insn_idx
1833 : st->frame[frame + 1]->callsite;
1834 }
1835
1836 /* For state @st look for a topmost frame with frame_insn_idx() in some SCC,
1837 * if such frame exists form a corresponding @callchain as an array of
1838 * call sites leading to this frame and SCC id.
1839 * E.g.:
1840 *
1841 * void foo() { A: loop {... SCC#1 ...}; }
1842 * void bar() { B: loop { C: foo(); ... SCC#2 ... }
1843 * D: loop { E: foo(); ... SCC#3 ... } }
1844 * void main() { F: bar(); }
1845 *
1846 * @callchain at (A) would be either (F,SCC#2) or (F,SCC#3) depending
1847 * on @st frame call sites being (F,C,A) or (F,E,A).
1848 */
compute_scc_callchain(struct bpf_verifier_env * env,struct bpf_verifier_state * st,struct bpf_scc_callchain * callchain)1849 static bool compute_scc_callchain(struct bpf_verifier_env *env,
1850 struct bpf_verifier_state *st,
1851 struct bpf_scc_callchain *callchain)
1852 {
1853 u32 i, scc, insn_idx;
1854
1855 memset(callchain, 0, sizeof(*callchain));
1856 for (i = 0; i <= st->curframe; i++) {
1857 insn_idx = frame_insn_idx(st, i);
1858 scc = env->insn_aux_data[insn_idx].scc;
1859 if (scc) {
1860 callchain->scc = scc;
1861 break;
1862 } else if (i < st->curframe) {
1863 callchain->callsites[i] = insn_idx;
1864 } else {
1865 return false;
1866 }
1867 }
1868 return true;
1869 }
1870
1871 /* Check if bpf_scc_visit instance for @callchain exists. */
scc_visit_lookup(struct bpf_verifier_env * env,struct bpf_scc_callchain * callchain)1872 static struct bpf_scc_visit *scc_visit_lookup(struct bpf_verifier_env *env,
1873 struct bpf_scc_callchain *callchain)
1874 {
1875 struct bpf_scc_info *info = env->scc_info[callchain->scc];
1876 struct bpf_scc_visit *visits = info->visits;
1877 u32 i;
1878
1879 if (!info)
1880 return NULL;
1881 for (i = 0; i < info->num_visits; i++)
1882 if (memcmp(callchain, &visits[i].callchain, sizeof(*callchain)) == 0)
1883 return &visits[i];
1884 return NULL;
1885 }
1886
1887 /* Allocate a new bpf_scc_visit instance corresponding to @callchain.
1888 * Allocated instances are alive for a duration of the do_check_common()
1889 * call and are freed by free_states().
1890 */
scc_visit_alloc(struct bpf_verifier_env * env,struct bpf_scc_callchain * callchain)1891 static struct bpf_scc_visit *scc_visit_alloc(struct bpf_verifier_env *env,
1892 struct bpf_scc_callchain *callchain)
1893 {
1894 struct bpf_scc_visit *visit;
1895 struct bpf_scc_info *info;
1896 u32 scc, num_visits;
1897 u64 new_sz;
1898
1899 scc = callchain->scc;
1900 info = env->scc_info[scc];
1901 num_visits = info ? info->num_visits : 0;
1902 new_sz = sizeof(*info) + sizeof(struct bpf_scc_visit) * (num_visits + 1);
1903 info = kvrealloc(env->scc_info[scc], new_sz, GFP_KERNEL_ACCOUNT);
1904 if (!info)
1905 return NULL;
1906 env->scc_info[scc] = info;
1907 info->num_visits = num_visits + 1;
1908 visit = &info->visits[num_visits];
1909 memset(visit, 0, sizeof(*visit));
1910 memcpy(&visit->callchain, callchain, sizeof(*callchain));
1911 return visit;
1912 }
1913
1914 /* Form a string '(callsite#1,callsite#2,...,scc)' in env->tmp_str_buf */
format_callchain(struct bpf_verifier_env * env,struct bpf_scc_callchain * callchain)1915 static char *format_callchain(struct bpf_verifier_env *env, struct bpf_scc_callchain *callchain)
1916 {
1917 char *buf = env->tmp_str_buf;
1918 int i, delta = 0;
1919
1920 delta += snprintf(buf + delta, TMP_STR_BUF_LEN - delta, "(");
1921 for (i = 0; i < ARRAY_SIZE(callchain->callsites); i++) {
1922 if (!callchain->callsites[i])
1923 break;
1924 delta += snprintf(buf + delta, TMP_STR_BUF_LEN - delta, "%u,",
1925 callchain->callsites[i]);
1926 }
1927 delta += snprintf(buf + delta, TMP_STR_BUF_LEN - delta, "%u)", callchain->scc);
1928 return env->tmp_str_buf;
1929 }
1930
1931 /* If callchain for @st exists (@st is in some SCC), ensure that
1932 * bpf_scc_visit instance for this callchain exists.
1933 * If instance does not exist or is empty, assign visit->entry_state to @st.
1934 */
maybe_enter_scc(struct bpf_verifier_env * env,struct bpf_verifier_state * st)1935 static int maybe_enter_scc(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
1936 {
1937 struct bpf_scc_callchain *callchain = &env->callchain_buf;
1938 struct bpf_scc_visit *visit;
1939
1940 if (!compute_scc_callchain(env, st, callchain))
1941 return 0;
1942 visit = scc_visit_lookup(env, callchain);
1943 visit = visit ?: scc_visit_alloc(env, callchain);
1944 if (!visit)
1945 return -ENOMEM;
1946 if (!visit->entry_state) {
1947 visit->entry_state = st;
1948 if (env->log.level & BPF_LOG_LEVEL2)
1949 verbose(env, "SCC enter %s\n", format_callchain(env, callchain));
1950 }
1951 return 0;
1952 }
1953
1954 static int propagate_backedges(struct bpf_verifier_env *env, struct bpf_scc_visit *visit);
1955
1956 /* If callchain for @st exists (@st is in some SCC), make it empty:
1957 * - set visit->entry_state to NULL;
1958 * - flush accumulated backedges.
1959 */
maybe_exit_scc(struct bpf_verifier_env * env,struct bpf_verifier_state * st)1960 static int maybe_exit_scc(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
1961 {
1962 struct bpf_scc_callchain *callchain = &env->callchain_buf;
1963 struct bpf_scc_visit *visit;
1964
1965 if (!compute_scc_callchain(env, st, callchain))
1966 return 0;
1967 visit = scc_visit_lookup(env, callchain);
1968 if (!visit) {
1969 /*
1970 * If path traversal stops inside an SCC, corresponding bpf_scc_visit
1971 * must exist for non-speculative paths. For non-speculative paths
1972 * traversal stops when:
1973 * a. Verification error is found, maybe_exit_scc() is not called.
1974 * b. Top level BPF_EXIT is reached. Top level BPF_EXIT is not a member
1975 * of any SCC.
1976 * c. A checkpoint is reached and matched. Checkpoints are created by
1977 * is_state_visited(), which calls maybe_enter_scc(), which allocates
1978 * bpf_scc_visit instances for checkpoints within SCCs.
1979 * (c) is the only case that can reach this point.
1980 */
1981 if (!st->speculative) {
1982 verifier_bug(env, "scc exit: no visit info for call chain %s",
1983 format_callchain(env, callchain));
1984 return -EFAULT;
1985 }
1986 return 0;
1987 }
1988 if (visit->entry_state != st)
1989 return 0;
1990 if (env->log.level & BPF_LOG_LEVEL2)
1991 verbose(env, "SCC exit %s\n", format_callchain(env, callchain));
1992 visit->entry_state = NULL;
1993 env->num_backedges -= visit->num_backedges;
1994 visit->num_backedges = 0;
1995 update_peak_states(env);
1996 return propagate_backedges(env, visit);
1997 }
1998
1999 /* Lookup an bpf_scc_visit instance corresponding to @st callchain
2000 * and add @backedge to visit->backedges. @st callchain must exist.
2001 */
add_scc_backedge(struct bpf_verifier_env * env,struct bpf_verifier_state * st,struct bpf_scc_backedge * backedge)2002 static int add_scc_backedge(struct bpf_verifier_env *env,
2003 struct bpf_verifier_state *st,
2004 struct bpf_scc_backedge *backedge)
2005 {
2006 struct bpf_scc_callchain *callchain = &env->callchain_buf;
2007 struct bpf_scc_visit *visit;
2008
2009 if (!compute_scc_callchain(env, st, callchain)) {
2010 verifier_bug(env, "add backedge: no SCC in verification path, insn_idx %d",
2011 st->insn_idx);
2012 return -EFAULT;
2013 }
2014 visit = scc_visit_lookup(env, callchain);
2015 if (!visit) {
2016 verifier_bug(env, "add backedge: no visit info for call chain %s",
2017 format_callchain(env, callchain));
2018 return -EFAULT;
2019 }
2020 if (env->log.level & BPF_LOG_LEVEL2)
2021 verbose(env, "SCC backedge %s\n", format_callchain(env, callchain));
2022 backedge->next = visit->backedges;
2023 visit->backedges = backedge;
2024 visit->num_backedges++;
2025 env->num_backedges++;
2026 update_peak_states(env);
2027 return 0;
2028 }
2029
2030 /* bpf_reg_state->live marks for registers in a state @st are incomplete,
2031 * if state @st is in some SCC and not all execution paths starting at this
2032 * SCC are fully explored.
2033 */
incomplete_read_marks(struct bpf_verifier_env * env,struct bpf_verifier_state * st)2034 static bool incomplete_read_marks(struct bpf_verifier_env *env,
2035 struct bpf_verifier_state *st)
2036 {
2037 struct bpf_scc_callchain *callchain = &env->callchain_buf;
2038 struct bpf_scc_visit *visit;
2039
2040 if (!compute_scc_callchain(env, st, callchain))
2041 return false;
2042 visit = scc_visit_lookup(env, callchain);
2043 if (!visit)
2044 return false;
2045 return !!visit->backedges;
2046 }
2047
free_backedges(struct bpf_scc_visit * visit)2048 static void free_backedges(struct bpf_scc_visit *visit)
2049 {
2050 struct bpf_scc_backedge *backedge, *next;
2051
2052 for (backedge = visit->backedges; backedge; backedge = next) {
2053 free_verifier_state(&backedge->state, false);
2054 next = backedge->next;
2055 kfree(backedge);
2056 }
2057 visit->backedges = NULL;
2058 }
2059
update_branch_counts(struct bpf_verifier_env * env,struct bpf_verifier_state * st)2060 static int update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
2061 {
2062 struct bpf_verifier_state_list *sl = NULL, *parent_sl;
2063 struct bpf_verifier_state *parent;
2064 int err;
2065
2066 while (st) {
2067 u32 br = --st->branches;
2068
2069 /* verifier_bug_if(br > 1, ...) technically makes sense here,
2070 * but see comment in push_stack(), hence:
2071 */
2072 verifier_bug_if((int)br < 0, env, "%s:branches_to_explore=%d", __func__, br);
2073 if (br)
2074 break;
2075 err = maybe_exit_scc(env, st);
2076 if (err)
2077 return err;
2078 parent = st->parent;
2079 parent_sl = state_parent_as_list(st);
2080 if (sl)
2081 maybe_free_verifier_state(env, sl);
2082 st = parent;
2083 sl = parent_sl;
2084 }
2085 return 0;
2086 }
2087
pop_stack(struct bpf_verifier_env * env,int * prev_insn_idx,int * insn_idx,bool pop_log)2088 static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
2089 int *insn_idx, bool pop_log)
2090 {
2091 struct bpf_verifier_state *cur = env->cur_state;
2092 struct bpf_verifier_stack_elem *elem, *head = env->head;
2093 int err;
2094
2095 if (env->head == NULL)
2096 return -ENOENT;
2097
2098 if (cur) {
2099 err = copy_verifier_state(cur, &head->st);
2100 if (err)
2101 return err;
2102 }
2103 if (pop_log)
2104 bpf_vlog_reset(&env->log, head->log_pos);
2105 if (insn_idx)
2106 *insn_idx = head->insn_idx;
2107 if (prev_insn_idx)
2108 *prev_insn_idx = head->prev_insn_idx;
2109 elem = head->next;
2110 free_verifier_state(&head->st, false);
2111 kfree(head);
2112 env->head = elem;
2113 env->stack_size--;
2114 return 0;
2115 }
2116
error_recoverable_with_nospec(int err)2117 static bool error_recoverable_with_nospec(int err)
2118 {
2119 /* Should only return true for non-fatal errors that are allowed to
2120 * occur during speculative verification. For these we can insert a
2121 * nospec and the program might still be accepted. Do not include
2122 * something like ENOMEM because it is likely to re-occur for the next
2123 * architectural path once it has been recovered-from in all speculative
2124 * paths.
2125 */
2126 return err == -EPERM || err == -EACCES || err == -EINVAL;
2127 }
2128
push_stack(struct bpf_verifier_env * env,int insn_idx,int prev_insn_idx,bool speculative)2129 static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
2130 int insn_idx, int prev_insn_idx,
2131 bool speculative)
2132 {
2133 struct bpf_verifier_state *cur = env->cur_state;
2134 struct bpf_verifier_stack_elem *elem;
2135 int err;
2136
2137 elem = kzalloc_obj(struct bpf_verifier_stack_elem, GFP_KERNEL_ACCOUNT);
2138 if (!elem)
2139 return ERR_PTR(-ENOMEM);
2140
2141 elem->insn_idx = insn_idx;
2142 elem->prev_insn_idx = prev_insn_idx;
2143 elem->next = env->head;
2144 elem->log_pos = env->log.end_pos;
2145 env->head = elem;
2146 env->stack_size++;
2147 err = copy_verifier_state(&elem->st, cur);
2148 if (err)
2149 return ERR_PTR(-ENOMEM);
2150 elem->st.speculative |= speculative;
2151 if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
2152 verbose(env, "The sequence of %d jumps is too complex.\n",
2153 env->stack_size);
2154 return ERR_PTR(-E2BIG);
2155 }
2156 if (elem->st.parent) {
2157 ++elem->st.parent->branches;
2158 /* WARN_ON(branches > 2) technically makes sense here,
2159 * but
2160 * 1. speculative states will bump 'branches' for non-branch
2161 * instructions
2162 * 2. is_state_visited() heuristics may decide not to create
2163 * a new state for a sequence of branches and all such current
2164 * and cloned states will be pointing to a single parent state
2165 * which might have large 'branches' count.
2166 */
2167 }
2168 return &elem->st;
2169 }
2170
2171 #define CALLER_SAVED_REGS 6
2172 static const int caller_saved[CALLER_SAVED_REGS] = {
2173 BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
2174 };
2175
2176 /* This helper doesn't clear reg->id */
___mark_reg_known(struct bpf_reg_state * reg,u64 imm)2177 static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm)
2178 {
2179 reg->var_off = tnum_const(imm);
2180 reg->smin_value = (s64)imm;
2181 reg->smax_value = (s64)imm;
2182 reg->umin_value = imm;
2183 reg->umax_value = imm;
2184
2185 reg->s32_min_value = (s32)imm;
2186 reg->s32_max_value = (s32)imm;
2187 reg->u32_min_value = (u32)imm;
2188 reg->u32_max_value = (u32)imm;
2189 }
2190
2191 /* Mark the unknown part of a register (variable offset or scalar value) as
2192 * known to have the value @imm.
2193 */
__mark_reg_known(struct bpf_reg_state * reg,u64 imm)2194 static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
2195 {
2196 /* Clear off and union(map_ptr, range) */
2197 memset(((u8 *)reg) + sizeof(reg->type), 0,
2198 offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
2199 reg->id = 0;
2200 reg->ref_obj_id = 0;
2201 ___mark_reg_known(reg, imm);
2202 }
2203
__mark_reg32_known(struct bpf_reg_state * reg,u64 imm)2204 static void __mark_reg32_known(struct bpf_reg_state *reg, u64 imm)
2205 {
2206 reg->var_off = tnum_const_subreg(reg->var_off, imm);
2207 reg->s32_min_value = (s32)imm;
2208 reg->s32_max_value = (s32)imm;
2209 reg->u32_min_value = (u32)imm;
2210 reg->u32_max_value = (u32)imm;
2211 }
2212
2213 /* Mark the 'variable offset' part of a register as zero. This should be
2214 * used only on registers holding a pointer type.
2215 */
__mark_reg_known_zero(struct bpf_reg_state * reg)2216 static void __mark_reg_known_zero(struct bpf_reg_state *reg)
2217 {
2218 __mark_reg_known(reg, 0);
2219 }
2220
__mark_reg_const_zero(const struct bpf_verifier_env * env,struct bpf_reg_state * reg)2221 static void __mark_reg_const_zero(const struct bpf_verifier_env *env, struct bpf_reg_state *reg)
2222 {
2223 __mark_reg_known(reg, 0);
2224 reg->type = SCALAR_VALUE;
2225 /* all scalars are assumed imprecise initially (unless unprivileged,
2226 * in which case everything is forced to be precise)
2227 */
2228 reg->precise = !env->bpf_capable;
2229 }
2230
mark_reg_known_zero(struct bpf_verifier_env * env,struct bpf_reg_state * regs,u32 regno)2231 static void mark_reg_known_zero(struct bpf_verifier_env *env,
2232 struct bpf_reg_state *regs, u32 regno)
2233 {
2234 if (WARN_ON(regno >= MAX_BPF_REG)) {
2235 verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
2236 /* Something bad happened, let's kill all regs */
2237 for (regno = 0; regno < MAX_BPF_REG; regno++)
2238 __mark_reg_not_init(env, regs + regno);
2239 return;
2240 }
2241 __mark_reg_known_zero(regs + regno);
2242 }
2243
__mark_dynptr_reg(struct bpf_reg_state * reg,enum bpf_dynptr_type type,bool first_slot,int dynptr_id)2244 static void __mark_dynptr_reg(struct bpf_reg_state *reg, enum bpf_dynptr_type type,
2245 bool first_slot, int dynptr_id)
2246 {
2247 /* reg->type has no meaning for STACK_DYNPTR, but when we set reg for
2248 * callback arguments, it does need to be CONST_PTR_TO_DYNPTR, so simply
2249 * set it unconditionally as it is ignored for STACK_DYNPTR anyway.
2250 */
2251 __mark_reg_known_zero(reg);
2252 reg->type = CONST_PTR_TO_DYNPTR;
2253 /* Give each dynptr a unique id to uniquely associate slices to it. */
2254 reg->id = dynptr_id;
2255 reg->dynptr.type = type;
2256 reg->dynptr.first_slot = first_slot;
2257 }
2258
mark_ptr_not_null_reg(struct bpf_reg_state * reg)2259 static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
2260 {
2261 if (base_type(reg->type) == PTR_TO_MAP_VALUE) {
2262 const struct bpf_map *map = reg->map_ptr;
2263
2264 if (map->inner_map_meta) {
2265 reg->type = CONST_PTR_TO_MAP;
2266 reg->map_ptr = map->inner_map_meta;
2267 /* transfer reg's id which is unique for every map_lookup_elem
2268 * as UID of the inner map.
2269 */
2270 if (btf_record_has_field(map->inner_map_meta->record,
2271 BPF_TIMER | BPF_WORKQUEUE | BPF_TASK_WORK)) {
2272 reg->map_uid = reg->id;
2273 }
2274 } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
2275 reg->type = PTR_TO_XDP_SOCK;
2276 } else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
2277 map->map_type == BPF_MAP_TYPE_SOCKHASH) {
2278 reg->type = PTR_TO_SOCKET;
2279 } else {
2280 reg->type = PTR_TO_MAP_VALUE;
2281 }
2282 return;
2283 }
2284
2285 reg->type &= ~PTR_MAYBE_NULL;
2286 }
2287
mark_reg_graph_node(struct bpf_reg_state * regs,u32 regno,struct btf_field_graph_root * ds_head)2288 static void mark_reg_graph_node(struct bpf_reg_state *regs, u32 regno,
2289 struct btf_field_graph_root *ds_head)
2290 {
2291 __mark_reg_known_zero(®s[regno]);
2292 regs[regno].type = PTR_TO_BTF_ID | MEM_ALLOC;
2293 regs[regno].btf = ds_head->btf;
2294 regs[regno].btf_id = ds_head->value_btf_id;
2295 regs[regno].off = ds_head->node_offset;
2296 }
2297
reg_is_pkt_pointer(const struct bpf_reg_state * reg)2298 static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
2299 {
2300 return type_is_pkt_pointer(reg->type);
2301 }
2302
reg_is_pkt_pointer_any(const struct bpf_reg_state * reg)2303 static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
2304 {
2305 return reg_is_pkt_pointer(reg) ||
2306 reg->type == PTR_TO_PACKET_END;
2307 }
2308
reg_is_dynptr_slice_pkt(const struct bpf_reg_state * reg)2309 static bool reg_is_dynptr_slice_pkt(const struct bpf_reg_state *reg)
2310 {
2311 return base_type(reg->type) == PTR_TO_MEM &&
2312 (reg->type &
2313 (DYNPTR_TYPE_SKB | DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META));
2314 }
2315
2316 /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
reg_is_init_pkt_pointer(const struct bpf_reg_state * reg,enum bpf_reg_type which)2317 static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
2318 enum bpf_reg_type which)
2319 {
2320 /* The register can already have a range from prior markings.
2321 * This is fine as long as it hasn't been advanced from its
2322 * origin.
2323 */
2324 return reg->type == which &&
2325 reg->id == 0 &&
2326 reg->off == 0 &&
2327 tnum_equals_const(reg->var_off, 0);
2328 }
2329
2330 /* Reset the min/max bounds of a register */
__mark_reg_unbounded(struct bpf_reg_state * reg)2331 static void __mark_reg_unbounded(struct bpf_reg_state *reg)
2332 {
2333 reg->smin_value = S64_MIN;
2334 reg->smax_value = S64_MAX;
2335 reg->umin_value = 0;
2336 reg->umax_value = U64_MAX;
2337
2338 reg->s32_min_value = S32_MIN;
2339 reg->s32_max_value = S32_MAX;
2340 reg->u32_min_value = 0;
2341 reg->u32_max_value = U32_MAX;
2342 }
2343
__mark_reg64_unbounded(struct bpf_reg_state * reg)2344 static void __mark_reg64_unbounded(struct bpf_reg_state *reg)
2345 {
2346 reg->smin_value = S64_MIN;
2347 reg->smax_value = S64_MAX;
2348 reg->umin_value = 0;
2349 reg->umax_value = U64_MAX;
2350 }
2351
__mark_reg32_unbounded(struct bpf_reg_state * reg)2352 static void __mark_reg32_unbounded(struct bpf_reg_state *reg)
2353 {
2354 reg->s32_min_value = S32_MIN;
2355 reg->s32_max_value = S32_MAX;
2356 reg->u32_min_value = 0;
2357 reg->u32_max_value = U32_MAX;
2358 }
2359
reset_reg64_and_tnum(struct bpf_reg_state * reg)2360 static void reset_reg64_and_tnum(struct bpf_reg_state *reg)
2361 {
2362 __mark_reg64_unbounded(reg);
2363 reg->var_off = tnum_unknown;
2364 }
2365
reset_reg32_and_tnum(struct bpf_reg_state * reg)2366 static void reset_reg32_and_tnum(struct bpf_reg_state *reg)
2367 {
2368 __mark_reg32_unbounded(reg);
2369 reg->var_off = tnum_unknown;
2370 }
2371
__update_reg32_bounds(struct bpf_reg_state * reg)2372 static void __update_reg32_bounds(struct bpf_reg_state *reg)
2373 {
2374 struct tnum var32_off = tnum_subreg(reg->var_off);
2375
2376 /* min signed is max(sign bit) | min(other bits) */
2377 reg->s32_min_value = max_t(s32, reg->s32_min_value,
2378 var32_off.value | (var32_off.mask & S32_MIN));
2379 /* max signed is min(sign bit) | max(other bits) */
2380 reg->s32_max_value = min_t(s32, reg->s32_max_value,
2381 var32_off.value | (var32_off.mask & S32_MAX));
2382 reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)var32_off.value);
2383 reg->u32_max_value = min(reg->u32_max_value,
2384 (u32)(var32_off.value | var32_off.mask));
2385 }
2386
__update_reg64_bounds(struct bpf_reg_state * reg)2387 static void __update_reg64_bounds(struct bpf_reg_state *reg)
2388 {
2389 u64 tnum_next, tmax;
2390 bool umin_in_tnum;
2391
2392 /* min signed is max(sign bit) | min(other bits) */
2393 reg->smin_value = max_t(s64, reg->smin_value,
2394 reg->var_off.value | (reg->var_off.mask & S64_MIN));
2395 /* max signed is min(sign bit) | max(other bits) */
2396 reg->smax_value = min_t(s64, reg->smax_value,
2397 reg->var_off.value | (reg->var_off.mask & S64_MAX));
2398 reg->umin_value = max(reg->umin_value, reg->var_off.value);
2399 reg->umax_value = min(reg->umax_value,
2400 reg->var_off.value | reg->var_off.mask);
2401
2402 /* Check if u64 and tnum overlap in a single value */
2403 tnum_next = tnum_step(reg->var_off, reg->umin_value);
2404 umin_in_tnum = (reg->umin_value & ~reg->var_off.mask) == reg->var_off.value;
2405 tmax = reg->var_off.value | reg->var_off.mask;
2406 if (umin_in_tnum && tnum_next > reg->umax_value) {
2407 /* The u64 range and the tnum only overlap in umin.
2408 * u64: ---[xxxxxx]-----
2409 * tnum: --xx----------x-
2410 */
2411 ___mark_reg_known(reg, reg->umin_value);
2412 } else if (!umin_in_tnum && tnum_next == tmax) {
2413 /* The u64 range and the tnum only overlap in the maximum value
2414 * represented by the tnum, called tmax.
2415 * u64: ---[xxxxxx]-----
2416 * tnum: xx-----x--------
2417 */
2418 ___mark_reg_known(reg, tmax);
2419 } else if (!umin_in_tnum && tnum_next <= reg->umax_value &&
2420 tnum_step(reg->var_off, tnum_next) > reg->umax_value) {
2421 /* The u64 range and the tnum only overlap in between umin
2422 * (excluded) and umax.
2423 * u64: ---[xxxxxx]-----
2424 * tnum: xx----x-------x-
2425 */
2426 ___mark_reg_known(reg, tnum_next);
2427 }
2428 }
2429
__update_reg_bounds(struct bpf_reg_state * reg)2430 static void __update_reg_bounds(struct bpf_reg_state *reg)
2431 {
2432 __update_reg32_bounds(reg);
2433 __update_reg64_bounds(reg);
2434 }
2435
2436 /* Uses signed min/max values to inform unsigned, and vice-versa */
__reg32_deduce_bounds(struct bpf_reg_state * reg)2437 static void __reg32_deduce_bounds(struct bpf_reg_state *reg)
2438 {
2439 /* If upper 32 bits of u64/s64 range don't change, we can use lower 32
2440 * bits to improve our u32/s32 boundaries.
2441 *
2442 * E.g., the case where we have upper 32 bits as zero ([10, 20] in
2443 * u64) is pretty trivial, it's obvious that in u32 we'll also have
2444 * [10, 20] range. But this property holds for any 64-bit range as
2445 * long as upper 32 bits in that entire range of values stay the same.
2446 *
2447 * E.g., u64 range [0x10000000A, 0x10000000F] ([4294967306, 4294967311]
2448 * in decimal) has the same upper 32 bits throughout all the values in
2449 * that range. As such, lower 32 bits form a valid [0xA, 0xF] ([10, 15])
2450 * range.
2451 *
2452 * Note also, that [0xA, 0xF] is a valid range both in u32 and in s32,
2453 * following the rules outlined below about u64/s64 correspondence
2454 * (which equally applies to u32 vs s32 correspondence). In general it
2455 * depends on actual hexadecimal values of 32-bit range. They can form
2456 * only valid u32, or only valid s32 ranges in some cases.
2457 *
2458 * So we use all these insights to derive bounds for subregisters here.
2459 */
2460 if ((reg->umin_value >> 32) == (reg->umax_value >> 32)) {
2461 /* u64 to u32 casting preserves validity of low 32 bits as
2462 * a range, if upper 32 bits are the same
2463 */
2464 reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)reg->umin_value);
2465 reg->u32_max_value = min_t(u32, reg->u32_max_value, (u32)reg->umax_value);
2466
2467 if ((s32)reg->umin_value <= (s32)reg->umax_value) {
2468 reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->umin_value);
2469 reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->umax_value);
2470 }
2471 }
2472 if ((reg->smin_value >> 32) == (reg->smax_value >> 32)) {
2473 /* low 32 bits should form a proper u32 range */
2474 if ((u32)reg->smin_value <= (u32)reg->smax_value) {
2475 reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)reg->smin_value);
2476 reg->u32_max_value = min_t(u32, reg->u32_max_value, (u32)reg->smax_value);
2477 }
2478 /* low 32 bits should form a proper s32 range */
2479 if ((s32)reg->smin_value <= (s32)reg->smax_value) {
2480 reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->smin_value);
2481 reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->smax_value);
2482 }
2483 }
2484 /* Special case where upper bits form a small sequence of two
2485 * sequential numbers (in 32-bit unsigned space, so 0xffffffff to
2486 * 0x00000000 is also valid), while lower bits form a proper s32 range
2487 * going from negative numbers to positive numbers. E.g., let's say we
2488 * have s64 range [-1, 1] ([0xffffffffffffffff, 0x0000000000000001]).
2489 * Possible s64 values are {-1, 0, 1} ({0xffffffffffffffff,
2490 * 0x0000000000000000, 0x00000000000001}). Ignoring upper 32 bits,
2491 * we still get a valid s32 range [-1, 1] ([0xffffffff, 0x00000001]).
2492 * Note that it doesn't have to be 0xffffffff going to 0x00000000 in
2493 * upper 32 bits. As a random example, s64 range
2494 * [0xfffffff0fffffff0; 0xfffffff100000010], forms a valid s32 range
2495 * [-16, 16] ([0xfffffff0; 0x00000010]) in its 32 bit subregister.
2496 */
2497 if ((u32)(reg->umin_value >> 32) + 1 == (u32)(reg->umax_value >> 32) &&
2498 (s32)reg->umin_value < 0 && (s32)reg->umax_value >= 0) {
2499 reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->umin_value);
2500 reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->umax_value);
2501 }
2502 if ((u32)(reg->smin_value >> 32) + 1 == (u32)(reg->smax_value >> 32) &&
2503 (s32)reg->smin_value < 0 && (s32)reg->smax_value >= 0) {
2504 reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->smin_value);
2505 reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->smax_value);
2506 }
2507 /* if u32 range forms a valid s32 range (due to matching sign bit),
2508 * try to learn from that
2509 */
2510 if ((s32)reg->u32_min_value <= (s32)reg->u32_max_value) {
2511 reg->s32_min_value = max_t(s32, reg->s32_min_value, reg->u32_min_value);
2512 reg->s32_max_value = min_t(s32, reg->s32_max_value, reg->u32_max_value);
2513 }
2514 /* If we cannot cross the sign boundary, then signed and unsigned bounds
2515 * are the same, so combine. This works even in the negative case, e.g.
2516 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
2517 */
2518 if ((u32)reg->s32_min_value <= (u32)reg->s32_max_value) {
2519 reg->u32_min_value = max_t(u32, reg->s32_min_value, reg->u32_min_value);
2520 reg->u32_max_value = min_t(u32, reg->s32_max_value, reg->u32_max_value);
2521 } else {
2522 if (reg->u32_max_value < (u32)reg->s32_min_value) {
2523 /* See __reg64_deduce_bounds() for detailed explanation.
2524 * Refine ranges in the following situation:
2525 *
2526 * 0 U32_MAX
2527 * | [xxxxxxxxxxxxxx u32 range xxxxxxxxxxxxxx] |
2528 * |----------------------------|----------------------------|
2529 * |xxxxx s32 range xxxxxxxxx] [xxxxxxx|
2530 * 0 S32_MAX S32_MIN -1
2531 */
2532 reg->s32_min_value = (s32)reg->u32_min_value;
2533 reg->u32_max_value = min_t(u32, reg->u32_max_value, reg->s32_max_value);
2534 } else if ((u32)reg->s32_max_value < reg->u32_min_value) {
2535 /*
2536 * 0 U32_MAX
2537 * | [xxxxxxxxxxxxxx u32 range xxxxxxxxxxxxxx] |
2538 * |----------------------------|----------------------------|
2539 * |xxxxxxxxx] [xxxxxxxxxxxx s32 range |
2540 * 0 S32_MAX S32_MIN -1
2541 */
2542 reg->s32_max_value = (s32)reg->u32_max_value;
2543 reg->u32_min_value = max_t(u32, reg->u32_min_value, reg->s32_min_value);
2544 }
2545 }
2546 }
2547
__reg64_deduce_bounds(struct bpf_reg_state * reg)2548 static void __reg64_deduce_bounds(struct bpf_reg_state *reg)
2549 {
2550 /* If u64 range forms a valid s64 range (due to matching sign bit),
2551 * try to learn from that. Let's do a bit of ASCII art to see when
2552 * this is happening. Let's take u64 range first:
2553 *
2554 * 0 0x7fffffffffffffff 0x8000000000000000 U64_MAX
2555 * |-------------------------------|--------------------------------|
2556 *
2557 * Valid u64 range is formed when umin and umax are anywhere in the
2558 * range [0, U64_MAX], and umin <= umax. u64 case is simple and
2559 * straightforward. Let's see how s64 range maps onto the same range
2560 * of values, annotated below the line for comparison:
2561 *
2562 * 0 0x7fffffffffffffff 0x8000000000000000 U64_MAX
2563 * |-------------------------------|--------------------------------|
2564 * 0 S64_MAX S64_MIN -1
2565 *
2566 * So s64 values basically start in the middle and they are logically
2567 * contiguous to the right of it, wrapping around from -1 to 0, and
2568 * then finishing as S64_MAX (0x7fffffffffffffff) right before
2569 * S64_MIN. We can try drawing the continuity of u64 vs s64 values
2570 * more visually as mapped to sign-agnostic range of hex values.
2571 *
2572 * u64 start u64 end
2573 * _______________________________________________________________
2574 * / \
2575 * 0 0x7fffffffffffffff 0x8000000000000000 U64_MAX
2576 * |-------------------------------|--------------------------------|
2577 * 0 S64_MAX S64_MIN -1
2578 * / \
2579 * >------------------------------ ------------------------------->
2580 * s64 continues... s64 end s64 start s64 "midpoint"
2581 *
2582 * What this means is that, in general, we can't always derive
2583 * something new about u64 from any random s64 range, and vice versa.
2584 *
2585 * But we can do that in two particular cases. One is when entire
2586 * u64/s64 range is *entirely* contained within left half of the above
2587 * diagram or when it is *entirely* contained in the right half. I.e.:
2588 *
2589 * |-------------------------------|--------------------------------|
2590 * ^ ^ ^ ^
2591 * A B C D
2592 *
2593 * [A, B] and [C, D] are contained entirely in their respective halves
2594 * and form valid contiguous ranges as both u64 and s64 values. [A, B]
2595 * will be non-negative both as u64 and s64 (and in fact it will be
2596 * identical ranges no matter the signedness). [C, D] treated as s64
2597 * will be a range of negative values, while in u64 it will be
2598 * non-negative range of values larger than 0x8000000000000000.
2599 *
2600 * Now, any other range here can't be represented in both u64 and s64
2601 * simultaneously. E.g., [A, C], [A, D], [B, C], [B, D] are valid
2602 * contiguous u64 ranges, but they are discontinuous in s64. [B, C]
2603 * in s64 would be properly presented as [S64_MIN, C] and [B, S64_MAX],
2604 * for example. Similarly, valid s64 range [D, A] (going from negative
2605 * to positive values), would be two separate [D, U64_MAX] and [0, A]
2606 * ranges as u64. Currently reg_state can't represent two segments per
2607 * numeric domain, so in such situations we can only derive maximal
2608 * possible range ([0, U64_MAX] for u64, and [S64_MIN, S64_MAX] for s64).
2609 *
2610 * So we use these facts to derive umin/umax from smin/smax and vice
2611 * versa only if they stay within the same "half". This is equivalent
2612 * to checking sign bit: lower half will have sign bit as zero, upper
2613 * half have sign bit 1. Below in code we simplify this by just
2614 * casting umin/umax as smin/smax and checking if they form valid
2615 * range, and vice versa. Those are equivalent checks.
2616 */
2617 if ((s64)reg->umin_value <= (s64)reg->umax_value) {
2618 reg->smin_value = max_t(s64, reg->smin_value, reg->umin_value);
2619 reg->smax_value = min_t(s64, reg->smax_value, reg->umax_value);
2620 }
2621 /* If we cannot cross the sign boundary, then signed and unsigned bounds
2622 * are the same, so combine. This works even in the negative case, e.g.
2623 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
2624 */
2625 if ((u64)reg->smin_value <= (u64)reg->smax_value) {
2626 reg->umin_value = max_t(u64, reg->smin_value, reg->umin_value);
2627 reg->umax_value = min_t(u64, reg->smax_value, reg->umax_value);
2628 } else {
2629 /* If the s64 range crosses the sign boundary, then it's split
2630 * between the beginning and end of the U64 domain. In that
2631 * case, we can derive new bounds if the u64 range overlaps
2632 * with only one end of the s64 range.
2633 *
2634 * In the following example, the u64 range overlaps only with
2635 * positive portion of the s64 range.
2636 *
2637 * 0 U64_MAX
2638 * | [xxxxxxxxxxxxxx u64 range xxxxxxxxxxxxxx] |
2639 * |----------------------------|----------------------------|
2640 * |xxxxx s64 range xxxxxxxxx] [xxxxxxx|
2641 * 0 S64_MAX S64_MIN -1
2642 *
2643 * We can thus derive the following new s64 and u64 ranges.
2644 *
2645 * 0 U64_MAX
2646 * | [xxxxxx u64 range xxxxx] |
2647 * |----------------------------|----------------------------|
2648 * | [xxxxxx s64 range xxxxx] |
2649 * 0 S64_MAX S64_MIN -1
2650 *
2651 * If they overlap in two places, we can't derive anything
2652 * because reg_state can't represent two ranges per numeric
2653 * domain.
2654 *
2655 * 0 U64_MAX
2656 * | [xxxxxxxxxxxxxxxxx u64 range xxxxxxxxxxxxxxxxx] |
2657 * |----------------------------|----------------------------|
2658 * |xxxxx s64 range xxxxxxxxx] [xxxxxxxxxx|
2659 * 0 S64_MAX S64_MIN -1
2660 *
2661 * The first condition below corresponds to the first diagram
2662 * above.
2663 */
2664 if (reg->umax_value < (u64)reg->smin_value) {
2665 reg->smin_value = (s64)reg->umin_value;
2666 reg->umax_value = min_t(u64, reg->umax_value, reg->smax_value);
2667 } else if ((u64)reg->smax_value < reg->umin_value) {
2668 /* This second condition considers the case where the u64 range
2669 * overlaps with the negative portion of the s64 range:
2670 *
2671 * 0 U64_MAX
2672 * | [xxxxxxxxxxxxxx u64 range xxxxxxxxxxxxxx] |
2673 * |----------------------------|----------------------------|
2674 * |xxxxxxxxx] [xxxxxxxxxxxx s64 range |
2675 * 0 S64_MAX S64_MIN -1
2676 */
2677 reg->smax_value = (s64)reg->umax_value;
2678 reg->umin_value = max_t(u64, reg->umin_value, reg->smin_value);
2679 }
2680 }
2681 }
2682
__reg_deduce_mixed_bounds(struct bpf_reg_state * reg)2683 static void __reg_deduce_mixed_bounds(struct bpf_reg_state *reg)
2684 {
2685 /* Try to tighten 64-bit bounds from 32-bit knowledge, using 32-bit
2686 * values on both sides of 64-bit range in hope to have tighter range.
2687 * E.g., if r1 is [0x1'00000000, 0x3'80000000], and we learn from
2688 * 32-bit signed > 0 operation that s32 bounds are now [1; 0x7fffffff].
2689 * With this, we can substitute 1 as low 32-bits of _low_ 64-bit bound
2690 * (0x100000000 -> 0x100000001) and 0x7fffffff as low 32-bits of
2691 * _high_ 64-bit bound (0x380000000 -> 0x37fffffff) and arrive at a
2692 * better overall bounds for r1 as [0x1'000000001; 0x3'7fffffff].
2693 * We just need to make sure that derived bounds we are intersecting
2694 * with are well-formed ranges in respective s64 or u64 domain, just
2695 * like we do with similar kinds of 32-to-64 or 64-to-32 adjustments.
2696 */
2697 __u64 new_umin, new_umax;
2698 __s64 new_smin, new_smax;
2699
2700 /* u32 -> u64 tightening, it's always well-formed */
2701 new_umin = (reg->umin_value & ~0xffffffffULL) | reg->u32_min_value;
2702 new_umax = (reg->umax_value & ~0xffffffffULL) | reg->u32_max_value;
2703 reg->umin_value = max_t(u64, reg->umin_value, new_umin);
2704 reg->umax_value = min_t(u64, reg->umax_value, new_umax);
2705 /* u32 -> s64 tightening, u32 range embedded into s64 preserves range validity */
2706 new_smin = (reg->smin_value & ~0xffffffffULL) | reg->u32_min_value;
2707 new_smax = (reg->smax_value & ~0xffffffffULL) | reg->u32_max_value;
2708 reg->smin_value = max_t(s64, reg->smin_value, new_smin);
2709 reg->smax_value = min_t(s64, reg->smax_value, new_smax);
2710
2711 /* Here we would like to handle a special case after sign extending load,
2712 * when upper bits for a 64-bit range are all 1s or all 0s.
2713 *
2714 * Upper bits are all 1s when register is in a range:
2715 * [0xffff_ffff_0000_0000, 0xffff_ffff_ffff_ffff]
2716 * Upper bits are all 0s when register is in a range:
2717 * [0x0000_0000_0000_0000, 0x0000_0000_ffff_ffff]
2718 * Together this forms are continuous range:
2719 * [0xffff_ffff_0000_0000, 0x0000_0000_ffff_ffff]
2720 *
2721 * Now, suppose that register range is in fact tighter:
2722 * [0xffff_ffff_8000_0000, 0x0000_0000_ffff_ffff] (R)
2723 * Also suppose that it's 32-bit range is positive,
2724 * meaning that lower 32-bits of the full 64-bit register
2725 * are in the range:
2726 * [0x0000_0000, 0x7fff_ffff] (W)
2727 *
2728 * If this happens, then any value in a range:
2729 * [0xffff_ffff_0000_0000, 0xffff_ffff_7fff_ffff]
2730 * is smaller than a lowest bound of the range (R):
2731 * 0xffff_ffff_8000_0000
2732 * which means that upper bits of the full 64-bit register
2733 * can't be all 1s, when lower bits are in range (W).
2734 *
2735 * Note that:
2736 * - 0xffff_ffff_8000_0000 == (s64)S32_MIN
2737 * - 0x0000_0000_7fff_ffff == (s64)S32_MAX
2738 * These relations are used in the conditions below.
2739 */
2740 if (reg->s32_min_value >= 0 && reg->smin_value >= S32_MIN && reg->smax_value <= S32_MAX) {
2741 reg->smin_value = reg->s32_min_value;
2742 reg->smax_value = reg->s32_max_value;
2743 reg->umin_value = reg->s32_min_value;
2744 reg->umax_value = reg->s32_max_value;
2745 reg->var_off = tnum_intersect(reg->var_off,
2746 tnum_range(reg->smin_value, reg->smax_value));
2747 }
2748 }
2749
__reg_deduce_bounds(struct bpf_reg_state * reg)2750 static void __reg_deduce_bounds(struct bpf_reg_state *reg)
2751 {
2752 __reg32_deduce_bounds(reg);
2753 __reg64_deduce_bounds(reg);
2754 __reg_deduce_mixed_bounds(reg);
2755 }
2756
2757 /* Attempts to improve var_off based on unsigned min/max information */
__reg_bound_offset(struct bpf_reg_state * reg)2758 static void __reg_bound_offset(struct bpf_reg_state *reg)
2759 {
2760 struct tnum var64_off = tnum_intersect(reg->var_off,
2761 tnum_range(reg->umin_value,
2762 reg->umax_value));
2763 struct tnum var32_off = tnum_intersect(tnum_subreg(var64_off),
2764 tnum_range(reg->u32_min_value,
2765 reg->u32_max_value));
2766
2767 reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off);
2768 }
2769
reg_bounds_sync(struct bpf_reg_state * reg)2770 static void reg_bounds_sync(struct bpf_reg_state *reg)
2771 {
2772 /* We might have learned new bounds from the var_off. */
2773 __update_reg_bounds(reg);
2774 /* We might have learned something about the sign bit. */
2775 __reg_deduce_bounds(reg);
2776 __reg_deduce_bounds(reg);
2777 __reg_deduce_bounds(reg);
2778 /* We might have learned some bits from the bounds. */
2779 __reg_bound_offset(reg);
2780 /* Intersecting with the old var_off might have improved our bounds
2781 * slightly, e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
2782 * then new var_off is (0; 0x7f...fc) which improves our umax.
2783 */
2784 __update_reg_bounds(reg);
2785 }
2786
reg_bounds_sanity_check(struct bpf_verifier_env * env,struct bpf_reg_state * reg,const char * ctx)2787 static int reg_bounds_sanity_check(struct bpf_verifier_env *env,
2788 struct bpf_reg_state *reg, const char *ctx)
2789 {
2790 const char *msg;
2791
2792 if (reg->umin_value > reg->umax_value ||
2793 reg->smin_value > reg->smax_value ||
2794 reg->u32_min_value > reg->u32_max_value ||
2795 reg->s32_min_value > reg->s32_max_value) {
2796 msg = "range bounds violation";
2797 goto out;
2798 }
2799
2800 if (tnum_is_const(reg->var_off)) {
2801 u64 uval = reg->var_off.value;
2802 s64 sval = (s64)uval;
2803
2804 if (reg->umin_value != uval || reg->umax_value != uval ||
2805 reg->smin_value != sval || reg->smax_value != sval) {
2806 msg = "const tnum out of sync with range bounds";
2807 goto out;
2808 }
2809 }
2810
2811 if (tnum_subreg_is_const(reg->var_off)) {
2812 u32 uval32 = tnum_subreg(reg->var_off).value;
2813 s32 sval32 = (s32)uval32;
2814
2815 if (reg->u32_min_value != uval32 || reg->u32_max_value != uval32 ||
2816 reg->s32_min_value != sval32 || reg->s32_max_value != sval32) {
2817 msg = "const subreg tnum out of sync with range bounds";
2818 goto out;
2819 }
2820 }
2821
2822 return 0;
2823 out:
2824 verifier_bug(env, "REG INVARIANTS VIOLATION (%s): %s u64=[%#llx, %#llx] "
2825 "s64=[%#llx, %#llx] u32=[%#x, %#x] s32=[%#x, %#x] var_off=(%#llx, %#llx)",
2826 ctx, msg, reg->umin_value, reg->umax_value,
2827 reg->smin_value, reg->smax_value,
2828 reg->u32_min_value, reg->u32_max_value,
2829 reg->s32_min_value, reg->s32_max_value,
2830 reg->var_off.value, reg->var_off.mask);
2831 if (env->test_reg_invariants)
2832 return -EFAULT;
2833 __mark_reg_unbounded(reg);
2834 return 0;
2835 }
2836
__reg32_bound_s64(s32 a)2837 static bool __reg32_bound_s64(s32 a)
2838 {
2839 return a >= 0 && a <= S32_MAX;
2840 }
2841
__reg_assign_32_into_64(struct bpf_reg_state * reg)2842 static void __reg_assign_32_into_64(struct bpf_reg_state *reg)
2843 {
2844 reg->umin_value = reg->u32_min_value;
2845 reg->umax_value = reg->u32_max_value;
2846
2847 /* Attempt to pull 32-bit signed bounds into 64-bit bounds but must
2848 * be positive otherwise set to worse case bounds and refine later
2849 * from tnum.
2850 */
2851 if (__reg32_bound_s64(reg->s32_min_value) &&
2852 __reg32_bound_s64(reg->s32_max_value)) {
2853 reg->smin_value = reg->s32_min_value;
2854 reg->smax_value = reg->s32_max_value;
2855 } else {
2856 reg->smin_value = 0;
2857 reg->smax_value = U32_MAX;
2858 }
2859 }
2860
2861 /* Mark a register as having a completely unknown (scalar) value. */
__mark_reg_unknown_imprecise(struct bpf_reg_state * reg)2862 static void __mark_reg_unknown_imprecise(struct bpf_reg_state *reg)
2863 {
2864 /*
2865 * Clear type, off, and union(map_ptr, range) and
2866 * padding between 'type' and union
2867 */
2868 memset(reg, 0, offsetof(struct bpf_reg_state, var_off));
2869 reg->type = SCALAR_VALUE;
2870 reg->id = 0;
2871 reg->ref_obj_id = 0;
2872 reg->var_off = tnum_unknown;
2873 reg->frameno = 0;
2874 reg->precise = false;
2875 __mark_reg_unbounded(reg);
2876 }
2877
2878 /* Mark a register as having a completely unknown (scalar) value,
2879 * initialize .precise as true when not bpf capable.
2880 */
__mark_reg_unknown(const struct bpf_verifier_env * env,struct bpf_reg_state * reg)2881 static void __mark_reg_unknown(const struct bpf_verifier_env *env,
2882 struct bpf_reg_state *reg)
2883 {
2884 __mark_reg_unknown_imprecise(reg);
2885 reg->precise = !env->bpf_capable;
2886 }
2887
mark_reg_unknown(struct bpf_verifier_env * env,struct bpf_reg_state * regs,u32 regno)2888 static void mark_reg_unknown(struct bpf_verifier_env *env,
2889 struct bpf_reg_state *regs, u32 regno)
2890 {
2891 if (WARN_ON(regno >= MAX_BPF_REG)) {
2892 verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
2893 /* Something bad happened, let's kill all regs except FP */
2894 for (regno = 0; regno < BPF_REG_FP; regno++)
2895 __mark_reg_not_init(env, regs + regno);
2896 return;
2897 }
2898 __mark_reg_unknown(env, regs + regno);
2899 }
2900
__mark_reg_s32_range(struct bpf_verifier_env * env,struct bpf_reg_state * regs,u32 regno,s32 s32_min,s32 s32_max)2901 static int __mark_reg_s32_range(struct bpf_verifier_env *env,
2902 struct bpf_reg_state *regs,
2903 u32 regno,
2904 s32 s32_min,
2905 s32 s32_max)
2906 {
2907 struct bpf_reg_state *reg = regs + regno;
2908
2909 reg->s32_min_value = max_t(s32, reg->s32_min_value, s32_min);
2910 reg->s32_max_value = min_t(s32, reg->s32_max_value, s32_max);
2911
2912 reg->smin_value = max_t(s64, reg->smin_value, s32_min);
2913 reg->smax_value = min_t(s64, reg->smax_value, s32_max);
2914
2915 reg_bounds_sync(reg);
2916
2917 return reg_bounds_sanity_check(env, reg, "s32_range");
2918 }
2919
__mark_reg_not_init(const struct bpf_verifier_env * env,struct bpf_reg_state * reg)2920 static void __mark_reg_not_init(const struct bpf_verifier_env *env,
2921 struct bpf_reg_state *reg)
2922 {
2923 __mark_reg_unknown(env, reg);
2924 reg->type = NOT_INIT;
2925 }
2926
mark_reg_not_init(struct bpf_verifier_env * env,struct bpf_reg_state * regs,u32 regno)2927 static void mark_reg_not_init(struct bpf_verifier_env *env,
2928 struct bpf_reg_state *regs, u32 regno)
2929 {
2930 if (WARN_ON(regno >= MAX_BPF_REG)) {
2931 verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
2932 /* Something bad happened, let's kill all regs except FP */
2933 for (regno = 0; regno < BPF_REG_FP; regno++)
2934 __mark_reg_not_init(env, regs + regno);
2935 return;
2936 }
2937 __mark_reg_not_init(env, regs + regno);
2938 }
2939
mark_btf_ld_reg(struct bpf_verifier_env * env,struct bpf_reg_state * regs,u32 regno,enum bpf_reg_type reg_type,struct btf * btf,u32 btf_id,enum bpf_type_flag flag)2940 static int mark_btf_ld_reg(struct bpf_verifier_env *env,
2941 struct bpf_reg_state *regs, u32 regno,
2942 enum bpf_reg_type reg_type,
2943 struct btf *btf, u32 btf_id,
2944 enum bpf_type_flag flag)
2945 {
2946 switch (reg_type) {
2947 case SCALAR_VALUE:
2948 mark_reg_unknown(env, regs, regno);
2949 return 0;
2950 case PTR_TO_BTF_ID:
2951 mark_reg_known_zero(env, regs, regno);
2952 regs[regno].type = PTR_TO_BTF_ID | flag;
2953 regs[regno].btf = btf;
2954 regs[regno].btf_id = btf_id;
2955 if (type_may_be_null(flag))
2956 regs[regno].id = ++env->id_gen;
2957 return 0;
2958 case PTR_TO_MEM:
2959 mark_reg_known_zero(env, regs, regno);
2960 regs[regno].type = PTR_TO_MEM | flag;
2961 regs[regno].mem_size = 0;
2962 return 0;
2963 default:
2964 verifier_bug(env, "unexpected reg_type %d in %s\n", reg_type, __func__);
2965 return -EFAULT;
2966 }
2967 }
2968
2969 #define DEF_NOT_SUBREG (0)
init_reg_state(struct bpf_verifier_env * env,struct bpf_func_state * state)2970 static void init_reg_state(struct bpf_verifier_env *env,
2971 struct bpf_func_state *state)
2972 {
2973 struct bpf_reg_state *regs = state->regs;
2974 int i;
2975
2976 for (i = 0; i < MAX_BPF_REG; i++) {
2977 mark_reg_not_init(env, regs, i);
2978 regs[i].subreg_def = DEF_NOT_SUBREG;
2979 }
2980
2981 /* frame pointer */
2982 regs[BPF_REG_FP].type = PTR_TO_STACK;
2983 mark_reg_known_zero(env, regs, BPF_REG_FP);
2984 regs[BPF_REG_FP].frameno = state->frameno;
2985 }
2986
retval_range(s32 minval,s32 maxval)2987 static struct bpf_retval_range retval_range(s32 minval, s32 maxval)
2988 {
2989 return (struct bpf_retval_range){ minval, maxval };
2990 }
2991
2992 #define BPF_MAIN_FUNC (-1)
init_func_state(struct bpf_verifier_env * env,struct bpf_func_state * state,int callsite,int frameno,int subprogno)2993 static void init_func_state(struct bpf_verifier_env *env,
2994 struct bpf_func_state *state,
2995 int callsite, int frameno, int subprogno)
2996 {
2997 state->callsite = callsite;
2998 state->frameno = frameno;
2999 state->subprogno = subprogno;
3000 state->callback_ret_range = retval_range(0, 0);
3001 init_reg_state(env, state);
3002 mark_verifier_state_scratched(env);
3003 }
3004
3005 /* Similar to push_stack(), but for async callbacks */
push_async_cb(struct bpf_verifier_env * env,int insn_idx,int prev_insn_idx,int subprog,bool is_sleepable)3006 static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env,
3007 int insn_idx, int prev_insn_idx,
3008 int subprog, bool is_sleepable)
3009 {
3010 struct bpf_verifier_stack_elem *elem;
3011 struct bpf_func_state *frame;
3012
3013 elem = kzalloc_obj(struct bpf_verifier_stack_elem, GFP_KERNEL_ACCOUNT);
3014 if (!elem)
3015 return ERR_PTR(-ENOMEM);
3016
3017 elem->insn_idx = insn_idx;
3018 elem->prev_insn_idx = prev_insn_idx;
3019 elem->next = env->head;
3020 elem->log_pos = env->log.end_pos;
3021 env->head = elem;
3022 env->stack_size++;
3023 if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
3024 verbose(env,
3025 "The sequence of %d jumps is too complex for async cb.\n",
3026 env->stack_size);
3027 return ERR_PTR(-E2BIG);
3028 }
3029 /* Unlike push_stack() do not copy_verifier_state().
3030 * The caller state doesn't matter.
3031 * This is async callback. It starts in a fresh stack.
3032 * Initialize it similar to do_check_common().
3033 */
3034 elem->st.branches = 1;
3035 elem->st.in_sleepable = is_sleepable;
3036 frame = kzalloc_obj(*frame, GFP_KERNEL_ACCOUNT);
3037 if (!frame)
3038 return ERR_PTR(-ENOMEM);
3039 init_func_state(env, frame,
3040 BPF_MAIN_FUNC /* callsite */,
3041 0 /* frameno within this callchain */,
3042 subprog /* subprog number within this prog */);
3043 elem->st.frame[0] = frame;
3044 return &elem->st;
3045 }
3046
3047
3048 enum reg_arg_type {
3049 SRC_OP, /* register is used as source operand */
3050 DST_OP, /* register is used as destination operand */
3051 DST_OP_NO_MARK /* same as above, check only, don't mark */
3052 };
3053
cmp_subprogs(const void * a,const void * b)3054 static int cmp_subprogs(const void *a, const void *b)
3055 {
3056 return ((struct bpf_subprog_info *)a)->start -
3057 ((struct bpf_subprog_info *)b)->start;
3058 }
3059
3060 /* Find subprogram that contains instruction at 'off' */
bpf_find_containing_subprog(struct bpf_verifier_env * env,int off)3061 struct bpf_subprog_info *bpf_find_containing_subprog(struct bpf_verifier_env *env, int off)
3062 {
3063 struct bpf_subprog_info *vals = env->subprog_info;
3064 int l, r, m;
3065
3066 if (off >= env->prog->len || off < 0 || env->subprog_cnt == 0)
3067 return NULL;
3068
3069 l = 0;
3070 r = env->subprog_cnt - 1;
3071 while (l < r) {
3072 m = l + (r - l + 1) / 2;
3073 if (vals[m].start <= off)
3074 l = m;
3075 else
3076 r = m - 1;
3077 }
3078 return &vals[l];
3079 }
3080
3081 /* Find subprogram that starts exactly at 'off' */
find_subprog(struct bpf_verifier_env * env,int off)3082 static int find_subprog(struct bpf_verifier_env *env, int off)
3083 {
3084 struct bpf_subprog_info *p;
3085
3086 p = bpf_find_containing_subprog(env, off);
3087 if (!p || p->start != off)
3088 return -ENOENT;
3089 return p - env->subprog_info;
3090 }
3091
add_subprog(struct bpf_verifier_env * env,int off)3092 static int add_subprog(struct bpf_verifier_env *env, int off)
3093 {
3094 int insn_cnt = env->prog->len;
3095 int ret;
3096
3097 if (off >= insn_cnt || off < 0) {
3098 verbose(env, "call to invalid destination\n");
3099 return -EINVAL;
3100 }
3101 ret = find_subprog(env, off);
3102 if (ret >= 0)
3103 return ret;
3104 if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
3105 verbose(env, "too many subprograms\n");
3106 return -E2BIG;
3107 }
3108 /* determine subprog starts. The end is one before the next starts */
3109 env->subprog_info[env->subprog_cnt++].start = off;
3110 sort(env->subprog_info, env->subprog_cnt,
3111 sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
3112 return env->subprog_cnt - 1;
3113 }
3114
bpf_find_exception_callback_insn_off(struct bpf_verifier_env * env)3115 static int bpf_find_exception_callback_insn_off(struct bpf_verifier_env *env)
3116 {
3117 struct bpf_prog_aux *aux = env->prog->aux;
3118 struct btf *btf = aux->btf;
3119 const struct btf_type *t;
3120 u32 main_btf_id, id;
3121 const char *name;
3122 int ret, i;
3123
3124 /* Non-zero func_info_cnt implies valid btf */
3125 if (!aux->func_info_cnt)
3126 return 0;
3127 main_btf_id = aux->func_info[0].type_id;
3128
3129 t = btf_type_by_id(btf, main_btf_id);
3130 if (!t) {
3131 verbose(env, "invalid btf id for main subprog in func_info\n");
3132 return -EINVAL;
3133 }
3134
3135 name = btf_find_decl_tag_value(btf, t, -1, "exception_callback:");
3136 if (IS_ERR(name)) {
3137 ret = PTR_ERR(name);
3138 /* If there is no tag present, there is no exception callback */
3139 if (ret == -ENOENT)
3140 ret = 0;
3141 else if (ret == -EEXIST)
3142 verbose(env, "multiple exception callback tags for main subprog\n");
3143 return ret;
3144 }
3145
3146 ret = btf_find_by_name_kind(btf, name, BTF_KIND_FUNC);
3147 if (ret < 0) {
3148 verbose(env, "exception callback '%s' could not be found in BTF\n", name);
3149 return ret;
3150 }
3151 id = ret;
3152 t = btf_type_by_id(btf, id);
3153 if (btf_func_linkage(t) != BTF_FUNC_GLOBAL) {
3154 verbose(env, "exception callback '%s' must have global linkage\n", name);
3155 return -EINVAL;
3156 }
3157 ret = 0;
3158 for (i = 0; i < aux->func_info_cnt; i++) {
3159 if (aux->func_info[i].type_id != id)
3160 continue;
3161 ret = aux->func_info[i].insn_off;
3162 /* Further func_info and subprog checks will also happen
3163 * later, so assume this is the right insn_off for now.
3164 */
3165 if (!ret) {
3166 verbose(env, "invalid exception callback insn_off in func_info: 0\n");
3167 ret = -EINVAL;
3168 }
3169 }
3170 if (!ret) {
3171 verbose(env, "exception callback type id not found in func_info\n");
3172 ret = -EINVAL;
3173 }
3174 return ret;
3175 }
3176
3177 #define MAX_KFUNC_DESCS 256
3178 #define MAX_KFUNC_BTFS 256
3179
3180 struct bpf_kfunc_desc {
3181 struct btf_func_model func_model;
3182 u32 func_id;
3183 s32 imm;
3184 u16 offset;
3185 unsigned long addr;
3186 };
3187
3188 struct bpf_kfunc_btf {
3189 struct btf *btf;
3190 struct module *module;
3191 u16 offset;
3192 };
3193
3194 struct bpf_kfunc_desc_tab {
3195 /* Sorted by func_id (BTF ID) and offset (fd_array offset) during
3196 * verification. JITs do lookups by bpf_insn, where func_id may not be
3197 * available, therefore at the end of verification do_misc_fixups()
3198 * sorts this by imm and offset.
3199 */
3200 struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS];
3201 u32 nr_descs;
3202 };
3203
3204 struct bpf_kfunc_btf_tab {
3205 struct bpf_kfunc_btf descs[MAX_KFUNC_BTFS];
3206 u32 nr_descs;
3207 };
3208
3209 static int specialize_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc,
3210 int insn_idx);
3211
kfunc_desc_cmp_by_id_off(const void * a,const void * b)3212 static int kfunc_desc_cmp_by_id_off(const void *a, const void *b)
3213 {
3214 const struct bpf_kfunc_desc *d0 = a;
3215 const struct bpf_kfunc_desc *d1 = b;
3216
3217 /* func_id is not greater than BTF_MAX_TYPE */
3218 return d0->func_id - d1->func_id ?: d0->offset - d1->offset;
3219 }
3220
kfunc_btf_cmp_by_off(const void * a,const void * b)3221 static int kfunc_btf_cmp_by_off(const void *a, const void *b)
3222 {
3223 const struct bpf_kfunc_btf *d0 = a;
3224 const struct bpf_kfunc_btf *d1 = b;
3225
3226 return d0->offset - d1->offset;
3227 }
3228
3229 static struct bpf_kfunc_desc *
find_kfunc_desc(const struct bpf_prog * prog,u32 func_id,u16 offset)3230 find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset)
3231 {
3232 struct bpf_kfunc_desc desc = {
3233 .func_id = func_id,
3234 .offset = offset,
3235 };
3236 struct bpf_kfunc_desc_tab *tab;
3237
3238 tab = prog->aux->kfunc_tab;
3239 return bsearch(&desc, tab->descs, tab->nr_descs,
3240 sizeof(tab->descs[0]), kfunc_desc_cmp_by_id_off);
3241 }
3242
bpf_get_kfunc_addr(const struct bpf_prog * prog,u32 func_id,u16 btf_fd_idx,u8 ** func_addr)3243 int bpf_get_kfunc_addr(const struct bpf_prog *prog, u32 func_id,
3244 u16 btf_fd_idx, u8 **func_addr)
3245 {
3246 const struct bpf_kfunc_desc *desc;
3247
3248 desc = find_kfunc_desc(prog, func_id, btf_fd_idx);
3249 if (!desc)
3250 return -EFAULT;
3251
3252 *func_addr = (u8 *)desc->addr;
3253 return 0;
3254 }
3255
__find_kfunc_desc_btf(struct bpf_verifier_env * env,s16 offset)3256 static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
3257 s16 offset)
3258 {
3259 struct bpf_kfunc_btf kf_btf = { .offset = offset };
3260 struct bpf_kfunc_btf_tab *tab;
3261 struct bpf_kfunc_btf *b;
3262 struct module *mod;
3263 struct btf *btf;
3264 int btf_fd;
3265
3266 tab = env->prog->aux->kfunc_btf_tab;
3267 b = bsearch(&kf_btf, tab->descs, tab->nr_descs,
3268 sizeof(tab->descs[0]), kfunc_btf_cmp_by_off);
3269 if (!b) {
3270 if (tab->nr_descs == MAX_KFUNC_BTFS) {
3271 verbose(env, "too many different module BTFs\n");
3272 return ERR_PTR(-E2BIG);
3273 }
3274
3275 if (bpfptr_is_null(env->fd_array)) {
3276 verbose(env, "kfunc offset > 0 without fd_array is invalid\n");
3277 return ERR_PTR(-EPROTO);
3278 }
3279
3280 if (copy_from_bpfptr_offset(&btf_fd, env->fd_array,
3281 offset * sizeof(btf_fd),
3282 sizeof(btf_fd)))
3283 return ERR_PTR(-EFAULT);
3284
3285 btf = btf_get_by_fd(btf_fd);
3286 if (IS_ERR(btf)) {
3287 verbose(env, "invalid module BTF fd specified\n");
3288 return btf;
3289 }
3290
3291 if (!btf_is_module(btf)) {
3292 verbose(env, "BTF fd for kfunc is not a module BTF\n");
3293 btf_put(btf);
3294 return ERR_PTR(-EINVAL);
3295 }
3296
3297 mod = btf_try_get_module(btf);
3298 if (!mod) {
3299 btf_put(btf);
3300 return ERR_PTR(-ENXIO);
3301 }
3302
3303 b = &tab->descs[tab->nr_descs++];
3304 b->btf = btf;
3305 b->module = mod;
3306 b->offset = offset;
3307
3308 /* sort() reorders entries by value, so b may no longer point
3309 * to the right entry after this
3310 */
3311 sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
3312 kfunc_btf_cmp_by_off, NULL);
3313 } else {
3314 btf = b->btf;
3315 }
3316
3317 return btf;
3318 }
3319
bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab * tab)3320 void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab)
3321 {
3322 if (!tab)
3323 return;
3324
3325 while (tab->nr_descs--) {
3326 module_put(tab->descs[tab->nr_descs].module);
3327 btf_put(tab->descs[tab->nr_descs].btf);
3328 }
3329 kfree(tab);
3330 }
3331
find_kfunc_desc_btf(struct bpf_verifier_env * env,s16 offset)3332 static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, s16 offset)
3333 {
3334 if (offset) {
3335 if (offset < 0) {
3336 /* In the future, this can be allowed to increase limit
3337 * of fd index into fd_array, interpreted as u16.
3338 */
3339 verbose(env, "negative offset disallowed for kernel module function call\n");
3340 return ERR_PTR(-EINVAL);
3341 }
3342
3343 return __find_kfunc_desc_btf(env, offset);
3344 }
3345 return btf_vmlinux ?: ERR_PTR(-ENOENT);
3346 }
3347
3348 #define KF_IMPL_SUFFIX "_impl"
3349
find_kfunc_impl_proto(struct bpf_verifier_env * env,struct btf * btf,const char * func_name)3350 static const struct btf_type *find_kfunc_impl_proto(struct bpf_verifier_env *env,
3351 struct btf *btf,
3352 const char *func_name)
3353 {
3354 char *buf = env->tmp_str_buf;
3355 const struct btf_type *func;
3356 s32 impl_id;
3357 int len;
3358
3359 len = snprintf(buf, TMP_STR_BUF_LEN, "%s%s", func_name, KF_IMPL_SUFFIX);
3360 if (len < 0 || len >= TMP_STR_BUF_LEN) {
3361 verbose(env, "function name %s%s is too long\n", func_name, KF_IMPL_SUFFIX);
3362 return NULL;
3363 }
3364
3365 impl_id = btf_find_by_name_kind(btf, buf, BTF_KIND_FUNC);
3366 if (impl_id <= 0) {
3367 verbose(env, "cannot find function %s in BTF\n", buf);
3368 return NULL;
3369 }
3370
3371 func = btf_type_by_id(btf, impl_id);
3372
3373 return btf_type_by_id(btf, func->type);
3374 }
3375
fetch_kfunc_meta(struct bpf_verifier_env * env,s32 func_id,s16 offset,struct bpf_kfunc_meta * kfunc)3376 static int fetch_kfunc_meta(struct bpf_verifier_env *env,
3377 s32 func_id,
3378 s16 offset,
3379 struct bpf_kfunc_meta *kfunc)
3380 {
3381 const struct btf_type *func, *func_proto;
3382 const char *func_name;
3383 u32 *kfunc_flags;
3384 struct btf *btf;
3385
3386 if (func_id <= 0) {
3387 verbose(env, "invalid kernel function btf_id %d\n", func_id);
3388 return -EINVAL;
3389 }
3390
3391 btf = find_kfunc_desc_btf(env, offset);
3392 if (IS_ERR(btf)) {
3393 verbose(env, "failed to find BTF for kernel function\n");
3394 return PTR_ERR(btf);
3395 }
3396
3397 /*
3398 * Note that kfunc_flags may be NULL at this point, which
3399 * means that we couldn't find func_id in any relevant
3400 * kfunc_id_set. This most likely indicates an invalid kfunc
3401 * call. However we don't fail with an error here,
3402 * and let the caller decide what to do with NULL kfunc->flags.
3403 */
3404 kfunc_flags = btf_kfunc_flags(btf, func_id, env->prog);
3405
3406 func = btf_type_by_id(btf, func_id);
3407 if (!func || !btf_type_is_func(func)) {
3408 verbose(env, "kernel btf_id %d is not a function\n", func_id);
3409 return -EINVAL;
3410 }
3411
3412 func_name = btf_name_by_offset(btf, func->name_off);
3413
3414 /*
3415 * An actual prototype of a kfunc with KF_IMPLICIT_ARGS flag
3416 * can be found through the counterpart _impl kfunc.
3417 */
3418 if (kfunc_flags && (*kfunc_flags & KF_IMPLICIT_ARGS))
3419 func_proto = find_kfunc_impl_proto(env, btf, func_name);
3420 else
3421 func_proto = btf_type_by_id(btf, func->type);
3422
3423 if (!func_proto || !btf_type_is_func_proto(func_proto)) {
3424 verbose(env, "kernel function btf_id %d does not have a valid func_proto\n",
3425 func_id);
3426 return -EINVAL;
3427 }
3428
3429 memset(kfunc, 0, sizeof(*kfunc));
3430 kfunc->btf = btf;
3431 kfunc->id = func_id;
3432 kfunc->name = func_name;
3433 kfunc->proto = func_proto;
3434 kfunc->flags = kfunc_flags;
3435
3436 return 0;
3437 }
3438
add_kfunc_call(struct bpf_verifier_env * env,u32 func_id,s16 offset)3439 static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
3440 {
3441 struct bpf_kfunc_btf_tab *btf_tab;
3442 struct btf_func_model func_model;
3443 struct bpf_kfunc_desc_tab *tab;
3444 struct bpf_prog_aux *prog_aux;
3445 struct bpf_kfunc_meta kfunc;
3446 struct bpf_kfunc_desc *desc;
3447 unsigned long addr;
3448 int err;
3449
3450 prog_aux = env->prog->aux;
3451 tab = prog_aux->kfunc_tab;
3452 btf_tab = prog_aux->kfunc_btf_tab;
3453 if (!tab) {
3454 if (!btf_vmlinux) {
3455 verbose(env, "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n");
3456 return -ENOTSUPP;
3457 }
3458
3459 if (!env->prog->jit_requested) {
3460 verbose(env, "JIT is required for calling kernel function\n");
3461 return -ENOTSUPP;
3462 }
3463
3464 if (!bpf_jit_supports_kfunc_call()) {
3465 verbose(env, "JIT does not support calling kernel function\n");
3466 return -ENOTSUPP;
3467 }
3468
3469 if (!env->prog->gpl_compatible) {
3470 verbose(env, "cannot call kernel function from non-GPL compatible program\n");
3471 return -EINVAL;
3472 }
3473
3474 tab = kzalloc_obj(*tab, GFP_KERNEL_ACCOUNT);
3475 if (!tab)
3476 return -ENOMEM;
3477 prog_aux->kfunc_tab = tab;
3478 }
3479
3480 /* func_id == 0 is always invalid, but instead of returning an error, be
3481 * conservative and wait until the code elimination pass before returning
3482 * error, so that invalid calls that get pruned out can be in BPF programs
3483 * loaded from userspace. It is also required that offset be untouched
3484 * for such calls.
3485 */
3486 if (!func_id && !offset)
3487 return 0;
3488
3489 if (!btf_tab && offset) {
3490 btf_tab = kzalloc_obj(*btf_tab, GFP_KERNEL_ACCOUNT);
3491 if (!btf_tab)
3492 return -ENOMEM;
3493 prog_aux->kfunc_btf_tab = btf_tab;
3494 }
3495
3496 if (find_kfunc_desc(env->prog, func_id, offset))
3497 return 0;
3498
3499 if (tab->nr_descs == MAX_KFUNC_DESCS) {
3500 verbose(env, "too many different kernel function calls\n");
3501 return -E2BIG;
3502 }
3503
3504 err = fetch_kfunc_meta(env, func_id, offset, &kfunc);
3505 if (err)
3506 return err;
3507
3508 addr = kallsyms_lookup_name(kfunc.name);
3509 if (!addr) {
3510 verbose(env, "cannot find address for kernel function %s\n", kfunc.name);
3511 return -EINVAL;
3512 }
3513
3514 if (bpf_dev_bound_kfunc_id(func_id)) {
3515 err = bpf_dev_bound_kfunc_check(&env->log, prog_aux);
3516 if (err)
3517 return err;
3518 }
3519
3520 err = btf_distill_func_proto(&env->log, kfunc.btf, kfunc.proto, kfunc.name, &func_model);
3521 if (err)
3522 return err;
3523
3524 desc = &tab->descs[tab->nr_descs++];
3525 desc->func_id = func_id;
3526 desc->offset = offset;
3527 desc->addr = addr;
3528 desc->func_model = func_model;
3529 sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
3530 kfunc_desc_cmp_by_id_off, NULL);
3531 return 0;
3532 }
3533
kfunc_desc_cmp_by_imm_off(const void * a,const void * b)3534 static int kfunc_desc_cmp_by_imm_off(const void *a, const void *b)
3535 {
3536 const struct bpf_kfunc_desc *d0 = a;
3537 const struct bpf_kfunc_desc *d1 = b;
3538
3539 if (d0->imm != d1->imm)
3540 return d0->imm < d1->imm ? -1 : 1;
3541 if (d0->offset != d1->offset)
3542 return d0->offset < d1->offset ? -1 : 1;
3543 return 0;
3544 }
3545
set_kfunc_desc_imm(struct bpf_verifier_env * env,struct bpf_kfunc_desc * desc)3546 static int set_kfunc_desc_imm(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc)
3547 {
3548 unsigned long call_imm;
3549
3550 if (bpf_jit_supports_far_kfunc_call()) {
3551 call_imm = desc->func_id;
3552 } else {
3553 call_imm = BPF_CALL_IMM(desc->addr);
3554 /* Check whether the relative offset overflows desc->imm */
3555 if ((unsigned long)(s32)call_imm != call_imm) {
3556 verbose(env, "address of kernel func_id %u is out of range\n",
3557 desc->func_id);
3558 return -EINVAL;
3559 }
3560 }
3561 desc->imm = call_imm;
3562 return 0;
3563 }
3564
sort_kfunc_descs_by_imm_off(struct bpf_verifier_env * env)3565 static int sort_kfunc_descs_by_imm_off(struct bpf_verifier_env *env)
3566 {
3567 struct bpf_kfunc_desc_tab *tab;
3568 int i, err;
3569
3570 tab = env->prog->aux->kfunc_tab;
3571 if (!tab)
3572 return 0;
3573
3574 for (i = 0; i < tab->nr_descs; i++) {
3575 err = set_kfunc_desc_imm(env, &tab->descs[i]);
3576 if (err)
3577 return err;
3578 }
3579
3580 sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
3581 kfunc_desc_cmp_by_imm_off, NULL);
3582 return 0;
3583 }
3584
bpf_prog_has_kfunc_call(const struct bpf_prog * prog)3585 bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog)
3586 {
3587 return !!prog->aux->kfunc_tab;
3588 }
3589
3590 const struct btf_func_model *
bpf_jit_find_kfunc_model(const struct bpf_prog * prog,const struct bpf_insn * insn)3591 bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
3592 const struct bpf_insn *insn)
3593 {
3594 const struct bpf_kfunc_desc desc = {
3595 .imm = insn->imm,
3596 .offset = insn->off,
3597 };
3598 const struct bpf_kfunc_desc *res;
3599 struct bpf_kfunc_desc_tab *tab;
3600
3601 tab = prog->aux->kfunc_tab;
3602 res = bsearch(&desc, tab->descs, tab->nr_descs,
3603 sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm_off);
3604
3605 return res ? &res->func_model : NULL;
3606 }
3607
add_kfunc_in_insns(struct bpf_verifier_env * env,struct bpf_insn * insn,int cnt)3608 static int add_kfunc_in_insns(struct bpf_verifier_env *env,
3609 struct bpf_insn *insn, int cnt)
3610 {
3611 int i, ret;
3612
3613 for (i = 0; i < cnt; i++, insn++) {
3614 if (bpf_pseudo_kfunc_call(insn)) {
3615 ret = add_kfunc_call(env, insn->imm, insn->off);
3616 if (ret < 0)
3617 return ret;
3618 }
3619 }
3620 return 0;
3621 }
3622
add_subprog_and_kfunc(struct bpf_verifier_env * env)3623 static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
3624 {
3625 struct bpf_subprog_info *subprog = env->subprog_info;
3626 int i, ret, insn_cnt = env->prog->len, ex_cb_insn;
3627 struct bpf_insn *insn = env->prog->insnsi;
3628
3629 /* Add entry function. */
3630 ret = add_subprog(env, 0);
3631 if (ret)
3632 return ret;
3633
3634 for (i = 0; i < insn_cnt; i++, insn++) {
3635 if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn) &&
3636 !bpf_pseudo_kfunc_call(insn))
3637 continue;
3638
3639 if (!env->bpf_capable) {
3640 verbose(env, "loading/calling other bpf or kernel functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
3641 return -EPERM;
3642 }
3643
3644 if (bpf_pseudo_func(insn) || bpf_pseudo_call(insn))
3645 ret = add_subprog(env, i + insn->imm + 1);
3646 else
3647 ret = add_kfunc_call(env, insn->imm, insn->off);
3648
3649 if (ret < 0)
3650 return ret;
3651 }
3652
3653 ret = bpf_find_exception_callback_insn_off(env);
3654 if (ret < 0)
3655 return ret;
3656 ex_cb_insn = ret;
3657
3658 /* If ex_cb_insn > 0, this means that the main program has a subprog
3659 * marked using BTF decl tag to serve as the exception callback.
3660 */
3661 if (ex_cb_insn) {
3662 ret = add_subprog(env, ex_cb_insn);
3663 if (ret < 0)
3664 return ret;
3665 for (i = 1; i < env->subprog_cnt; i++) {
3666 if (env->subprog_info[i].start != ex_cb_insn)
3667 continue;
3668 env->exception_callback_subprog = i;
3669 mark_subprog_exc_cb(env, i);
3670 break;
3671 }
3672 }
3673
3674 /* Add a fake 'exit' subprog which could simplify subprog iteration
3675 * logic. 'subprog_cnt' should not be increased.
3676 */
3677 subprog[env->subprog_cnt].start = insn_cnt;
3678
3679 if (env->log.level & BPF_LOG_LEVEL2)
3680 for (i = 0; i < env->subprog_cnt; i++)
3681 verbose(env, "func#%d @%d\n", i, subprog[i].start);
3682
3683 return 0;
3684 }
3685
check_subprogs(struct bpf_verifier_env * env)3686 static int check_subprogs(struct bpf_verifier_env *env)
3687 {
3688 int i, subprog_start, subprog_end, off, cur_subprog = 0;
3689 struct bpf_subprog_info *subprog = env->subprog_info;
3690 struct bpf_insn *insn = env->prog->insnsi;
3691 int insn_cnt = env->prog->len;
3692
3693 /* now check that all jumps are within the same subprog */
3694 subprog_start = subprog[cur_subprog].start;
3695 subprog_end = subprog[cur_subprog + 1].start;
3696 for (i = 0; i < insn_cnt; i++) {
3697 u8 code = insn[i].code;
3698
3699 if (code == (BPF_JMP | BPF_CALL) &&
3700 insn[i].src_reg == 0 &&
3701 insn[i].imm == BPF_FUNC_tail_call) {
3702 subprog[cur_subprog].has_tail_call = true;
3703 subprog[cur_subprog].tail_call_reachable = true;
3704 }
3705 if (BPF_CLASS(code) == BPF_LD &&
3706 (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND))
3707 subprog[cur_subprog].has_ld_abs = true;
3708 if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
3709 goto next;
3710 if (BPF_OP(code) == BPF_CALL)
3711 goto next;
3712 if (BPF_OP(code) == BPF_EXIT) {
3713 subprog[cur_subprog].exit_idx = i;
3714 goto next;
3715 }
3716 off = i + bpf_jmp_offset(&insn[i]) + 1;
3717 if (off < subprog_start || off >= subprog_end) {
3718 verbose(env, "jump out of range from insn %d to %d\n", i, off);
3719 return -EINVAL;
3720 }
3721 next:
3722 if (i == subprog_end - 1) {
3723 /* to avoid fall-through from one subprog into another
3724 * the last insn of the subprog should be either exit
3725 * or unconditional jump back or bpf_throw call
3726 */
3727 if (code != (BPF_JMP | BPF_EXIT) &&
3728 code != (BPF_JMP32 | BPF_JA) &&
3729 code != (BPF_JMP | BPF_JA)) {
3730 verbose(env, "last insn is not an exit or jmp\n");
3731 return -EINVAL;
3732 }
3733 subprog_start = subprog_end;
3734 cur_subprog++;
3735 if (cur_subprog < env->subprog_cnt)
3736 subprog_end = subprog[cur_subprog + 1].start;
3737 }
3738 }
3739 return 0;
3740 }
3741
mark_stack_slot_obj_read(struct bpf_verifier_env * env,struct bpf_reg_state * reg,int spi,int nr_slots)3742 static int mark_stack_slot_obj_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
3743 int spi, int nr_slots)
3744 {
3745 int err, i;
3746
3747 for (i = 0; i < nr_slots; i++) {
3748 err = bpf_mark_stack_read(env, reg->frameno, env->insn_idx, BIT(spi - i));
3749 if (err)
3750 return err;
3751 mark_stack_slot_scratched(env, spi - i);
3752 }
3753 return 0;
3754 }
3755
mark_dynptr_read(struct bpf_verifier_env * env,struct bpf_reg_state * reg)3756 static int mark_dynptr_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
3757 {
3758 int spi;
3759
3760 /* For CONST_PTR_TO_DYNPTR, it must have already been done by
3761 * check_reg_arg in check_helper_call and mark_btf_func_reg_size in
3762 * check_kfunc_call.
3763 */
3764 if (reg->type == CONST_PTR_TO_DYNPTR)
3765 return 0;
3766 spi = dynptr_get_spi(env, reg);
3767 if (spi < 0)
3768 return spi;
3769 /* Caller ensures dynptr is valid and initialized, which means spi is in
3770 * bounds and spi is the first dynptr slot. Simply mark stack slot as
3771 * read.
3772 */
3773 return mark_stack_slot_obj_read(env, reg, spi, BPF_DYNPTR_NR_SLOTS);
3774 }
3775
mark_iter_read(struct bpf_verifier_env * env,struct bpf_reg_state * reg,int spi,int nr_slots)3776 static int mark_iter_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
3777 int spi, int nr_slots)
3778 {
3779 return mark_stack_slot_obj_read(env, reg, spi, nr_slots);
3780 }
3781
mark_irq_flag_read(struct bpf_verifier_env * env,struct bpf_reg_state * reg)3782 static int mark_irq_flag_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
3783 {
3784 int spi;
3785
3786 spi = irq_flag_get_spi(env, reg);
3787 if (spi < 0)
3788 return spi;
3789 return mark_stack_slot_obj_read(env, reg, spi, 1);
3790 }
3791
3792 /* This function is supposed to be used by the following 32-bit optimization
3793 * code only. It returns TRUE if the source or destination register operates
3794 * on 64-bit, otherwise return FALSE.
3795 */
is_reg64(struct bpf_insn * insn,u32 regno,struct bpf_reg_state * reg,enum reg_arg_type t)3796 static bool is_reg64(struct bpf_insn *insn,
3797 u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t)
3798 {
3799 u8 code, class, op;
3800
3801 code = insn->code;
3802 class = BPF_CLASS(code);
3803 op = BPF_OP(code);
3804 if (class == BPF_JMP) {
3805 /* BPF_EXIT for "main" will reach here. Return TRUE
3806 * conservatively.
3807 */
3808 if (op == BPF_EXIT)
3809 return true;
3810 if (op == BPF_CALL) {
3811 /* BPF to BPF call will reach here because of marking
3812 * caller saved clobber with DST_OP_NO_MARK for which we
3813 * don't care the register def because they are anyway
3814 * marked as NOT_INIT already.
3815 */
3816 if (insn->src_reg == BPF_PSEUDO_CALL)
3817 return false;
3818 /* Helper call will reach here because of arg type
3819 * check, conservatively return TRUE.
3820 */
3821 if (t == SRC_OP)
3822 return true;
3823
3824 return false;
3825 }
3826 }
3827
3828 if (class == BPF_ALU64 && op == BPF_END && (insn->imm == 16 || insn->imm == 32))
3829 return false;
3830
3831 if (class == BPF_ALU64 || class == BPF_JMP ||
3832 (class == BPF_ALU && op == BPF_END && insn->imm == 64))
3833 return true;
3834
3835 if (class == BPF_ALU || class == BPF_JMP32)
3836 return false;
3837
3838 if (class == BPF_LDX) {
3839 if (t != SRC_OP)
3840 return BPF_SIZE(code) == BPF_DW || BPF_MODE(code) == BPF_MEMSX;
3841 /* LDX source must be ptr. */
3842 return true;
3843 }
3844
3845 if (class == BPF_STX) {
3846 /* BPF_STX (including atomic variants) has one or more source
3847 * operands, one of which is a ptr. Check whether the caller is
3848 * asking about it.
3849 */
3850 if (t == SRC_OP && reg->type != SCALAR_VALUE)
3851 return true;
3852 return BPF_SIZE(code) == BPF_DW;
3853 }
3854
3855 if (class == BPF_LD) {
3856 u8 mode = BPF_MODE(code);
3857
3858 /* LD_IMM64 */
3859 if (mode == BPF_IMM)
3860 return true;
3861
3862 /* Both LD_IND and LD_ABS return 32-bit data. */
3863 if (t != SRC_OP)
3864 return false;
3865
3866 /* Implicit ctx ptr. */
3867 if (regno == BPF_REG_6)
3868 return true;
3869
3870 /* Explicit source could be any width. */
3871 return true;
3872 }
3873
3874 if (class == BPF_ST)
3875 /* The only source register for BPF_ST is a ptr. */
3876 return true;
3877
3878 /* Conservatively return true at default. */
3879 return true;
3880 }
3881
3882 /* Return the regno defined by the insn, or -1. */
insn_def_regno(const struct bpf_insn * insn)3883 static int insn_def_regno(const struct bpf_insn *insn)
3884 {
3885 switch (BPF_CLASS(insn->code)) {
3886 case BPF_JMP:
3887 case BPF_JMP32:
3888 case BPF_ST:
3889 return -1;
3890 case BPF_STX:
3891 if (BPF_MODE(insn->code) == BPF_ATOMIC ||
3892 BPF_MODE(insn->code) == BPF_PROBE_ATOMIC) {
3893 if (insn->imm == BPF_CMPXCHG)
3894 return BPF_REG_0;
3895 else if (insn->imm == BPF_LOAD_ACQ)
3896 return insn->dst_reg;
3897 else if (insn->imm & BPF_FETCH)
3898 return insn->src_reg;
3899 }
3900 return -1;
3901 default:
3902 return insn->dst_reg;
3903 }
3904 }
3905
3906 /* Return TRUE if INSN has defined any 32-bit value explicitly. */
insn_has_def32(struct bpf_insn * insn)3907 static bool insn_has_def32(struct bpf_insn *insn)
3908 {
3909 int dst_reg = insn_def_regno(insn);
3910
3911 if (dst_reg == -1)
3912 return false;
3913
3914 return !is_reg64(insn, dst_reg, NULL, DST_OP);
3915 }
3916
mark_insn_zext(struct bpf_verifier_env * env,struct bpf_reg_state * reg)3917 static void mark_insn_zext(struct bpf_verifier_env *env,
3918 struct bpf_reg_state *reg)
3919 {
3920 s32 def_idx = reg->subreg_def;
3921
3922 if (def_idx == DEF_NOT_SUBREG)
3923 return;
3924
3925 env->insn_aux_data[def_idx - 1].zext_dst = true;
3926 /* The dst will be zero extended, so won't be sub-register anymore. */
3927 reg->subreg_def = DEF_NOT_SUBREG;
3928 }
3929
__check_reg_arg(struct bpf_verifier_env * env,struct bpf_reg_state * regs,u32 regno,enum reg_arg_type t)3930 static int __check_reg_arg(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno,
3931 enum reg_arg_type t)
3932 {
3933 struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
3934 struct bpf_reg_state *reg;
3935 bool rw64;
3936
3937 if (regno >= MAX_BPF_REG) {
3938 verbose(env, "R%d is invalid\n", regno);
3939 return -EINVAL;
3940 }
3941
3942 mark_reg_scratched(env, regno);
3943
3944 reg = ®s[regno];
3945 rw64 = is_reg64(insn, regno, reg, t);
3946 if (t == SRC_OP) {
3947 /* check whether register used as source operand can be read */
3948 if (reg->type == NOT_INIT) {
3949 verbose(env, "R%d !read_ok\n", regno);
3950 return -EACCES;
3951 }
3952 /* We don't need to worry about FP liveness because it's read-only */
3953 if (regno == BPF_REG_FP)
3954 return 0;
3955
3956 if (rw64)
3957 mark_insn_zext(env, reg);
3958
3959 return 0;
3960 } else {
3961 /* check whether register used as dest operand can be written to */
3962 if (regno == BPF_REG_FP) {
3963 verbose(env, "frame pointer is read only\n");
3964 return -EACCES;
3965 }
3966 reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1;
3967 if (t == DST_OP)
3968 mark_reg_unknown(env, regs, regno);
3969 }
3970 return 0;
3971 }
3972
check_reg_arg(struct bpf_verifier_env * env,u32 regno,enum reg_arg_type t)3973 static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
3974 enum reg_arg_type t)
3975 {
3976 struct bpf_verifier_state *vstate = env->cur_state;
3977 struct bpf_func_state *state = vstate->frame[vstate->curframe];
3978
3979 return __check_reg_arg(env, state->regs, regno, t);
3980 }
3981
insn_stack_access_flags(int frameno,int spi)3982 static int insn_stack_access_flags(int frameno, int spi)
3983 {
3984 return INSN_F_STACK_ACCESS | (spi << INSN_F_SPI_SHIFT) | frameno;
3985 }
3986
insn_stack_access_spi(int insn_flags)3987 static int insn_stack_access_spi(int insn_flags)
3988 {
3989 return (insn_flags >> INSN_F_SPI_SHIFT) & INSN_F_SPI_MASK;
3990 }
3991
insn_stack_access_frameno(int insn_flags)3992 static int insn_stack_access_frameno(int insn_flags)
3993 {
3994 return insn_flags & INSN_F_FRAMENO_MASK;
3995 }
3996
mark_jmp_point(struct bpf_verifier_env * env,int idx)3997 static void mark_jmp_point(struct bpf_verifier_env *env, int idx)
3998 {
3999 env->insn_aux_data[idx].jmp_point = true;
4000 }
4001
is_jmp_point(struct bpf_verifier_env * env,int insn_idx)4002 static bool is_jmp_point(struct bpf_verifier_env *env, int insn_idx)
4003 {
4004 return env->insn_aux_data[insn_idx].jmp_point;
4005 }
4006
4007 #define LR_FRAMENO_BITS 3
4008 #define LR_SPI_BITS 6
4009 #define LR_ENTRY_BITS (LR_SPI_BITS + LR_FRAMENO_BITS + 1)
4010 #define LR_SIZE_BITS 4
4011 #define LR_FRAMENO_MASK ((1ull << LR_FRAMENO_BITS) - 1)
4012 #define LR_SPI_MASK ((1ull << LR_SPI_BITS) - 1)
4013 #define LR_SIZE_MASK ((1ull << LR_SIZE_BITS) - 1)
4014 #define LR_SPI_OFF LR_FRAMENO_BITS
4015 #define LR_IS_REG_OFF (LR_SPI_BITS + LR_FRAMENO_BITS)
4016 #define LINKED_REGS_MAX 6
4017
4018 struct linked_reg {
4019 u8 frameno;
4020 union {
4021 u8 spi;
4022 u8 regno;
4023 };
4024 bool is_reg;
4025 };
4026
4027 struct linked_regs {
4028 int cnt;
4029 struct linked_reg entries[LINKED_REGS_MAX];
4030 };
4031
linked_regs_push(struct linked_regs * s)4032 static struct linked_reg *linked_regs_push(struct linked_regs *s)
4033 {
4034 if (s->cnt < LINKED_REGS_MAX)
4035 return &s->entries[s->cnt++];
4036
4037 return NULL;
4038 }
4039
4040 /* Use u64 as a vector of 6 10-bit values, use first 4-bits to track
4041 * number of elements currently in stack.
4042 * Pack one history entry for linked registers as 10 bits in the following format:
4043 * - 3-bits frameno
4044 * - 6-bits spi_or_reg
4045 * - 1-bit is_reg
4046 */
linked_regs_pack(struct linked_regs * s)4047 static u64 linked_regs_pack(struct linked_regs *s)
4048 {
4049 u64 val = 0;
4050 int i;
4051
4052 for (i = 0; i < s->cnt; ++i) {
4053 struct linked_reg *e = &s->entries[i];
4054 u64 tmp = 0;
4055
4056 tmp |= e->frameno;
4057 tmp |= e->spi << LR_SPI_OFF;
4058 tmp |= (e->is_reg ? 1 : 0) << LR_IS_REG_OFF;
4059
4060 val <<= LR_ENTRY_BITS;
4061 val |= tmp;
4062 }
4063 val <<= LR_SIZE_BITS;
4064 val |= s->cnt;
4065 return val;
4066 }
4067
linked_regs_unpack(u64 val,struct linked_regs * s)4068 static void linked_regs_unpack(u64 val, struct linked_regs *s)
4069 {
4070 int i;
4071
4072 s->cnt = val & LR_SIZE_MASK;
4073 val >>= LR_SIZE_BITS;
4074
4075 for (i = 0; i < s->cnt; ++i) {
4076 struct linked_reg *e = &s->entries[i];
4077
4078 e->frameno = val & LR_FRAMENO_MASK;
4079 e->spi = (val >> LR_SPI_OFF) & LR_SPI_MASK;
4080 e->is_reg = (val >> LR_IS_REG_OFF) & 0x1;
4081 val >>= LR_ENTRY_BITS;
4082 }
4083 }
4084
4085 /* for any branch, call, exit record the history of jmps in the given state */
push_jmp_history(struct bpf_verifier_env * env,struct bpf_verifier_state * cur,int insn_flags,u64 linked_regs)4086 static int push_jmp_history(struct bpf_verifier_env *env, struct bpf_verifier_state *cur,
4087 int insn_flags, u64 linked_regs)
4088 {
4089 u32 cnt = cur->jmp_history_cnt;
4090 struct bpf_jmp_history_entry *p;
4091 size_t alloc_size;
4092
4093 /* combine instruction flags if we already recorded this instruction */
4094 if (env->cur_hist_ent) {
4095 /* atomic instructions push insn_flags twice, for READ and
4096 * WRITE sides, but they should agree on stack slot
4097 */
4098 verifier_bug_if((env->cur_hist_ent->flags & insn_flags) &&
4099 (env->cur_hist_ent->flags & insn_flags) != insn_flags,
4100 env, "insn history: insn_idx %d cur flags %x new flags %x",
4101 env->insn_idx, env->cur_hist_ent->flags, insn_flags);
4102 env->cur_hist_ent->flags |= insn_flags;
4103 verifier_bug_if(env->cur_hist_ent->linked_regs != 0, env,
4104 "insn history: insn_idx %d linked_regs: %#llx",
4105 env->insn_idx, env->cur_hist_ent->linked_regs);
4106 env->cur_hist_ent->linked_regs = linked_regs;
4107 return 0;
4108 }
4109
4110 cnt++;
4111 alloc_size = kmalloc_size_roundup(size_mul(cnt, sizeof(*p)));
4112 p = krealloc(cur->jmp_history, alloc_size, GFP_KERNEL_ACCOUNT);
4113 if (!p)
4114 return -ENOMEM;
4115 cur->jmp_history = p;
4116
4117 p = &cur->jmp_history[cnt - 1];
4118 p->idx = env->insn_idx;
4119 p->prev_idx = env->prev_insn_idx;
4120 p->flags = insn_flags;
4121 p->linked_regs = linked_regs;
4122 cur->jmp_history_cnt = cnt;
4123 env->cur_hist_ent = p;
4124
4125 return 0;
4126 }
4127
get_jmp_hist_entry(struct bpf_verifier_state * st,u32 hist_end,int insn_idx)4128 static struct bpf_jmp_history_entry *get_jmp_hist_entry(struct bpf_verifier_state *st,
4129 u32 hist_end, int insn_idx)
4130 {
4131 if (hist_end > 0 && st->jmp_history[hist_end - 1].idx == insn_idx)
4132 return &st->jmp_history[hist_end - 1];
4133 return NULL;
4134 }
4135
4136 /* Backtrack one insn at a time. If idx is not at the top of recorded
4137 * history then previous instruction came from straight line execution.
4138 * Return -ENOENT if we exhausted all instructions within given state.
4139 *
4140 * It's legal to have a bit of a looping with the same starting and ending
4141 * insn index within the same state, e.g.: 3->4->5->3, so just because current
4142 * instruction index is the same as state's first_idx doesn't mean we are
4143 * done. If there is still some jump history left, we should keep going. We
4144 * need to take into account that we might have a jump history between given
4145 * state's parent and itself, due to checkpointing. In this case, we'll have
4146 * history entry recording a jump from last instruction of parent state and
4147 * first instruction of given state.
4148 */
get_prev_insn_idx(struct bpf_verifier_state * st,int i,u32 * history)4149 static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
4150 u32 *history)
4151 {
4152 u32 cnt = *history;
4153
4154 if (i == st->first_insn_idx) {
4155 if (cnt == 0)
4156 return -ENOENT;
4157 if (cnt == 1 && st->jmp_history[0].idx == i)
4158 return -ENOENT;
4159 }
4160
4161 if (cnt && st->jmp_history[cnt - 1].idx == i) {
4162 i = st->jmp_history[cnt - 1].prev_idx;
4163 (*history)--;
4164 } else {
4165 i--;
4166 }
4167 return i;
4168 }
4169
disasm_kfunc_name(void * data,const struct bpf_insn * insn)4170 static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
4171 {
4172 const struct btf_type *func;
4173 struct btf *desc_btf;
4174
4175 if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL)
4176 return NULL;
4177
4178 desc_btf = find_kfunc_desc_btf(data, insn->off);
4179 if (IS_ERR(desc_btf))
4180 return "<error>";
4181
4182 func = btf_type_by_id(desc_btf, insn->imm);
4183 return btf_name_by_offset(desc_btf, func->name_off);
4184 }
4185
verbose_insn(struct bpf_verifier_env * env,struct bpf_insn * insn)4186 static void verbose_insn(struct bpf_verifier_env *env, struct bpf_insn *insn)
4187 {
4188 const struct bpf_insn_cbs cbs = {
4189 .cb_call = disasm_kfunc_name,
4190 .cb_print = verbose,
4191 .private_data = env,
4192 };
4193
4194 print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
4195 }
4196
bt_init(struct backtrack_state * bt,u32 frame)4197 static inline void bt_init(struct backtrack_state *bt, u32 frame)
4198 {
4199 bt->frame = frame;
4200 }
4201
bt_reset(struct backtrack_state * bt)4202 static inline void bt_reset(struct backtrack_state *bt)
4203 {
4204 struct bpf_verifier_env *env = bt->env;
4205
4206 memset(bt, 0, sizeof(*bt));
4207 bt->env = env;
4208 }
4209
bt_empty(struct backtrack_state * bt)4210 static inline u32 bt_empty(struct backtrack_state *bt)
4211 {
4212 u64 mask = 0;
4213 int i;
4214
4215 for (i = 0; i <= bt->frame; i++)
4216 mask |= bt->reg_masks[i] | bt->stack_masks[i];
4217
4218 return mask == 0;
4219 }
4220
bt_subprog_enter(struct backtrack_state * bt)4221 static inline int bt_subprog_enter(struct backtrack_state *bt)
4222 {
4223 if (bt->frame == MAX_CALL_FRAMES - 1) {
4224 verifier_bug(bt->env, "subprog enter from frame %d", bt->frame);
4225 return -EFAULT;
4226 }
4227 bt->frame++;
4228 return 0;
4229 }
4230
bt_subprog_exit(struct backtrack_state * bt)4231 static inline int bt_subprog_exit(struct backtrack_state *bt)
4232 {
4233 if (bt->frame == 0) {
4234 verifier_bug(bt->env, "subprog exit from frame 0");
4235 return -EFAULT;
4236 }
4237 bt->frame--;
4238 return 0;
4239 }
4240
bt_set_frame_reg(struct backtrack_state * bt,u32 frame,u32 reg)4241 static inline void bt_set_frame_reg(struct backtrack_state *bt, u32 frame, u32 reg)
4242 {
4243 bt->reg_masks[frame] |= 1 << reg;
4244 }
4245
bt_clear_frame_reg(struct backtrack_state * bt,u32 frame,u32 reg)4246 static inline void bt_clear_frame_reg(struct backtrack_state *bt, u32 frame, u32 reg)
4247 {
4248 bt->reg_masks[frame] &= ~(1 << reg);
4249 }
4250
bt_set_reg(struct backtrack_state * bt,u32 reg)4251 static inline void bt_set_reg(struct backtrack_state *bt, u32 reg)
4252 {
4253 bt_set_frame_reg(bt, bt->frame, reg);
4254 }
4255
bt_clear_reg(struct backtrack_state * bt,u32 reg)4256 static inline void bt_clear_reg(struct backtrack_state *bt, u32 reg)
4257 {
4258 bt_clear_frame_reg(bt, bt->frame, reg);
4259 }
4260
bt_set_frame_slot(struct backtrack_state * bt,u32 frame,u32 slot)4261 static inline void bt_set_frame_slot(struct backtrack_state *bt, u32 frame, u32 slot)
4262 {
4263 bt->stack_masks[frame] |= 1ull << slot;
4264 }
4265
bt_clear_frame_slot(struct backtrack_state * bt,u32 frame,u32 slot)4266 static inline void bt_clear_frame_slot(struct backtrack_state *bt, u32 frame, u32 slot)
4267 {
4268 bt->stack_masks[frame] &= ~(1ull << slot);
4269 }
4270
bt_frame_reg_mask(struct backtrack_state * bt,u32 frame)4271 static inline u32 bt_frame_reg_mask(struct backtrack_state *bt, u32 frame)
4272 {
4273 return bt->reg_masks[frame];
4274 }
4275
bt_reg_mask(struct backtrack_state * bt)4276 static inline u32 bt_reg_mask(struct backtrack_state *bt)
4277 {
4278 return bt->reg_masks[bt->frame];
4279 }
4280
bt_frame_stack_mask(struct backtrack_state * bt,u32 frame)4281 static inline u64 bt_frame_stack_mask(struct backtrack_state *bt, u32 frame)
4282 {
4283 return bt->stack_masks[frame];
4284 }
4285
bt_stack_mask(struct backtrack_state * bt)4286 static inline u64 bt_stack_mask(struct backtrack_state *bt)
4287 {
4288 return bt->stack_masks[bt->frame];
4289 }
4290
bt_is_reg_set(struct backtrack_state * bt,u32 reg)4291 static inline bool bt_is_reg_set(struct backtrack_state *bt, u32 reg)
4292 {
4293 return bt->reg_masks[bt->frame] & (1 << reg);
4294 }
4295
bt_is_frame_reg_set(struct backtrack_state * bt,u32 frame,u32 reg)4296 static inline bool bt_is_frame_reg_set(struct backtrack_state *bt, u32 frame, u32 reg)
4297 {
4298 return bt->reg_masks[frame] & (1 << reg);
4299 }
4300
bt_is_frame_slot_set(struct backtrack_state * bt,u32 frame,u32 slot)4301 static inline bool bt_is_frame_slot_set(struct backtrack_state *bt, u32 frame, u32 slot)
4302 {
4303 return bt->stack_masks[frame] & (1ull << slot);
4304 }
4305
4306 /* format registers bitmask, e.g., "r0,r2,r4" for 0x15 mask */
fmt_reg_mask(char * buf,ssize_t buf_sz,u32 reg_mask)4307 static void fmt_reg_mask(char *buf, ssize_t buf_sz, u32 reg_mask)
4308 {
4309 DECLARE_BITMAP(mask, 64);
4310 bool first = true;
4311 int i, n;
4312
4313 buf[0] = '\0';
4314
4315 bitmap_from_u64(mask, reg_mask);
4316 for_each_set_bit(i, mask, 32) {
4317 n = snprintf(buf, buf_sz, "%sr%d", first ? "" : ",", i);
4318 first = false;
4319 buf += n;
4320 buf_sz -= n;
4321 if (buf_sz < 0)
4322 break;
4323 }
4324 }
4325 /* format stack slots bitmask, e.g., "-8,-24,-40" for 0x15 mask */
bpf_fmt_stack_mask(char * buf,ssize_t buf_sz,u64 stack_mask)4326 void bpf_fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask)
4327 {
4328 DECLARE_BITMAP(mask, 64);
4329 bool first = true;
4330 int i, n;
4331
4332 buf[0] = '\0';
4333
4334 bitmap_from_u64(mask, stack_mask);
4335 for_each_set_bit(i, mask, 64) {
4336 n = snprintf(buf, buf_sz, "%s%d", first ? "" : ",", -(i + 1) * 8);
4337 first = false;
4338 buf += n;
4339 buf_sz -= n;
4340 if (buf_sz < 0)
4341 break;
4342 }
4343 }
4344
4345 /* If any register R in hist->linked_regs is marked as precise in bt,
4346 * do bt_set_frame_{reg,slot}(bt, R) for all registers in hist->linked_regs.
4347 */
bt_sync_linked_regs(struct backtrack_state * bt,struct bpf_jmp_history_entry * hist)4348 static void bt_sync_linked_regs(struct backtrack_state *bt, struct bpf_jmp_history_entry *hist)
4349 {
4350 struct linked_regs linked_regs;
4351 bool some_precise = false;
4352 int i;
4353
4354 if (!hist || hist->linked_regs == 0)
4355 return;
4356
4357 linked_regs_unpack(hist->linked_regs, &linked_regs);
4358 for (i = 0; i < linked_regs.cnt; ++i) {
4359 struct linked_reg *e = &linked_regs.entries[i];
4360
4361 if ((e->is_reg && bt_is_frame_reg_set(bt, e->frameno, e->regno)) ||
4362 (!e->is_reg && bt_is_frame_slot_set(bt, e->frameno, e->spi))) {
4363 some_precise = true;
4364 break;
4365 }
4366 }
4367
4368 if (!some_precise)
4369 return;
4370
4371 for (i = 0; i < linked_regs.cnt; ++i) {
4372 struct linked_reg *e = &linked_regs.entries[i];
4373
4374 if (e->is_reg)
4375 bt_set_frame_reg(bt, e->frameno, e->regno);
4376 else
4377 bt_set_frame_slot(bt, e->frameno, e->spi);
4378 }
4379 }
4380
4381 /* For given verifier state backtrack_insn() is called from the last insn to
4382 * the first insn. Its purpose is to compute a bitmask of registers and
4383 * stack slots that needs precision in the parent verifier state.
4384 *
4385 * @idx is an index of the instruction we are currently processing;
4386 * @subseq_idx is an index of the subsequent instruction that:
4387 * - *would be* executed next, if jump history is viewed in forward order;
4388 * - *was* processed previously during backtracking.
4389 */
backtrack_insn(struct bpf_verifier_env * env,int idx,int subseq_idx,struct bpf_jmp_history_entry * hist,struct backtrack_state * bt)4390 static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
4391 struct bpf_jmp_history_entry *hist, struct backtrack_state *bt)
4392 {
4393 struct bpf_insn *insn = env->prog->insnsi + idx;
4394 u8 class = BPF_CLASS(insn->code);
4395 u8 opcode = BPF_OP(insn->code);
4396 u8 mode = BPF_MODE(insn->code);
4397 u32 dreg = insn->dst_reg;
4398 u32 sreg = insn->src_reg;
4399 u32 spi, i, fr;
4400
4401 if (insn->code == 0)
4402 return 0;
4403 if (env->log.level & BPF_LOG_LEVEL2) {
4404 fmt_reg_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_reg_mask(bt));
4405 verbose(env, "mark_precise: frame%d: regs=%s ",
4406 bt->frame, env->tmp_str_buf);
4407 bpf_fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_stack_mask(bt));
4408 verbose(env, "stack=%s before ", env->tmp_str_buf);
4409 verbose(env, "%d: ", idx);
4410 verbose_insn(env, insn);
4411 }
4412
4413 /* If there is a history record that some registers gained range at this insn,
4414 * propagate precision marks to those registers, so that bt_is_reg_set()
4415 * accounts for these registers.
4416 */
4417 bt_sync_linked_regs(bt, hist);
4418
4419 if (class == BPF_ALU || class == BPF_ALU64) {
4420 if (!bt_is_reg_set(bt, dreg))
4421 return 0;
4422 if (opcode == BPF_END || opcode == BPF_NEG) {
4423 /* sreg is reserved and unused
4424 * dreg still need precision before this insn
4425 */
4426 return 0;
4427 } else if (opcode == BPF_MOV) {
4428 if (BPF_SRC(insn->code) == BPF_X) {
4429 /* dreg = sreg or dreg = (s8, s16, s32)sreg
4430 * dreg needs precision after this insn
4431 * sreg needs precision before this insn
4432 */
4433 bt_clear_reg(bt, dreg);
4434 if (sreg != BPF_REG_FP)
4435 bt_set_reg(bt, sreg);
4436 } else {
4437 /* dreg = K
4438 * dreg needs precision after this insn.
4439 * Corresponding register is already marked
4440 * as precise=true in this verifier state.
4441 * No further markings in parent are necessary
4442 */
4443 bt_clear_reg(bt, dreg);
4444 }
4445 } else {
4446 if (BPF_SRC(insn->code) == BPF_X) {
4447 /* dreg += sreg
4448 * both dreg and sreg need precision
4449 * before this insn
4450 */
4451 if (sreg != BPF_REG_FP)
4452 bt_set_reg(bt, sreg);
4453 } /* else dreg += K
4454 * dreg still needs precision before this insn
4455 */
4456 }
4457 } else if (class == BPF_LDX ||
4458 is_atomic_load_insn(insn) ||
4459 is_atomic_fetch_insn(insn)) {
4460 u32 load_reg = dreg;
4461
4462 /*
4463 * Atomic fetch operation writes the old value into
4464 * a register (sreg or r0) and if it was tracked for
4465 * precision, propagate to the stack slot like we do
4466 * in regular ldx.
4467 */
4468 if (is_atomic_fetch_insn(insn))
4469 load_reg = insn->imm == BPF_CMPXCHG ?
4470 BPF_REG_0 : sreg;
4471
4472 if (!bt_is_reg_set(bt, load_reg))
4473 return 0;
4474 bt_clear_reg(bt, load_reg);
4475
4476 /* scalars can only be spilled into stack w/o losing precision.
4477 * Load from any other memory can be zero extended.
4478 * The desire to keep that precision is already indicated
4479 * by 'precise' mark in corresponding register of this state.
4480 * No further tracking necessary.
4481 */
4482 if (!hist || !(hist->flags & INSN_F_STACK_ACCESS))
4483 return 0;
4484 /* dreg = *(u64 *)[fp - off] was a fill from the stack.
4485 * that [fp - off] slot contains scalar that needs to be
4486 * tracked with precision
4487 */
4488 spi = insn_stack_access_spi(hist->flags);
4489 fr = insn_stack_access_frameno(hist->flags);
4490 bt_set_frame_slot(bt, fr, spi);
4491 } else if (class == BPF_STX || class == BPF_ST) {
4492 if (bt_is_reg_set(bt, dreg))
4493 /* stx & st shouldn't be using _scalar_ dst_reg
4494 * to access memory. It means backtracking
4495 * encountered a case of pointer subtraction.
4496 */
4497 return -ENOTSUPP;
4498 /* scalars can only be spilled into stack */
4499 if (!hist || !(hist->flags & INSN_F_STACK_ACCESS))
4500 return 0;
4501 spi = insn_stack_access_spi(hist->flags);
4502 fr = insn_stack_access_frameno(hist->flags);
4503 if (!bt_is_frame_slot_set(bt, fr, spi))
4504 return 0;
4505 bt_clear_frame_slot(bt, fr, spi);
4506 if (class == BPF_STX)
4507 bt_set_reg(bt, sreg);
4508 } else if (class == BPF_JMP || class == BPF_JMP32) {
4509 if (bpf_pseudo_call(insn)) {
4510 int subprog_insn_idx, subprog;
4511
4512 subprog_insn_idx = idx + insn->imm + 1;
4513 subprog = find_subprog(env, subprog_insn_idx);
4514 if (subprog < 0)
4515 return -EFAULT;
4516
4517 if (subprog_is_global(env, subprog)) {
4518 /* check that jump history doesn't have any
4519 * extra instructions from subprog; the next
4520 * instruction after call to global subprog
4521 * should be literally next instruction in
4522 * caller program
4523 */
4524 verifier_bug_if(idx + 1 != subseq_idx, env,
4525 "extra insn from subprog");
4526 /* r1-r5 are invalidated after subprog call,
4527 * so for global func call it shouldn't be set
4528 * anymore
4529 */
4530 if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
4531 verifier_bug(env, "global subprog unexpected regs %x",
4532 bt_reg_mask(bt));
4533 return -EFAULT;
4534 }
4535 /* global subprog always sets R0 */
4536 bt_clear_reg(bt, BPF_REG_0);
4537 return 0;
4538 } else {
4539 /* static subprog call instruction, which
4540 * means that we are exiting current subprog,
4541 * so only r1-r5 could be still requested as
4542 * precise, r0 and r6-r10 or any stack slot in
4543 * the current frame should be zero by now
4544 */
4545 if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) {
4546 verifier_bug(env, "static subprog unexpected regs %x",
4547 bt_reg_mask(bt));
4548 return -EFAULT;
4549 }
4550 /* we are now tracking register spills correctly,
4551 * so any instance of leftover slots is a bug
4552 */
4553 if (bt_stack_mask(bt) != 0) {
4554 verifier_bug(env,
4555 "static subprog leftover stack slots %llx",
4556 bt_stack_mask(bt));
4557 return -EFAULT;
4558 }
4559 /* propagate r1-r5 to the caller */
4560 for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
4561 if (bt_is_reg_set(bt, i)) {
4562 bt_clear_reg(bt, i);
4563 bt_set_frame_reg(bt, bt->frame - 1, i);
4564 }
4565 }
4566 if (bt_subprog_exit(bt))
4567 return -EFAULT;
4568 return 0;
4569 }
4570 } else if (is_sync_callback_calling_insn(insn) && idx != subseq_idx - 1) {
4571 /* exit from callback subprog to callback-calling helper or
4572 * kfunc call. Use idx/subseq_idx check to discern it from
4573 * straight line code backtracking.
4574 * Unlike the subprog call handling above, we shouldn't
4575 * propagate precision of r1-r5 (if any requested), as they are
4576 * not actually arguments passed directly to callback subprogs
4577 */
4578 if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) {
4579 verifier_bug(env, "callback unexpected regs %x",
4580 bt_reg_mask(bt));
4581 return -EFAULT;
4582 }
4583 if (bt_stack_mask(bt) != 0) {
4584 verifier_bug(env, "callback leftover stack slots %llx",
4585 bt_stack_mask(bt));
4586 return -EFAULT;
4587 }
4588 /* clear r1-r5 in callback subprog's mask */
4589 for (i = BPF_REG_1; i <= BPF_REG_5; i++)
4590 bt_clear_reg(bt, i);
4591 if (bt_subprog_exit(bt))
4592 return -EFAULT;
4593 return 0;
4594 } else if (opcode == BPF_CALL) {
4595 /* kfunc with imm==0 is invalid and fixup_kfunc_call will
4596 * catch this error later. Make backtracking conservative
4597 * with ENOTSUPP.
4598 */
4599 if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && insn->imm == 0)
4600 return -ENOTSUPP;
4601 /* regular helper call sets R0 */
4602 bt_clear_reg(bt, BPF_REG_0);
4603 if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
4604 /* if backtracking was looking for registers R1-R5
4605 * they should have been found already.
4606 */
4607 verifier_bug(env, "backtracking call unexpected regs %x",
4608 bt_reg_mask(bt));
4609 return -EFAULT;
4610 }
4611 if (insn->src_reg == BPF_REG_0 && insn->imm == BPF_FUNC_tail_call
4612 && subseq_idx - idx != 1) {
4613 if (bt_subprog_enter(bt))
4614 return -EFAULT;
4615 }
4616 } else if (opcode == BPF_EXIT) {
4617 bool r0_precise;
4618
4619 /* Backtracking to a nested function call, 'idx' is a part of
4620 * the inner frame 'subseq_idx' is a part of the outer frame.
4621 * In case of a regular function call, instructions giving
4622 * precision to registers R1-R5 should have been found already.
4623 * In case of a callback, it is ok to have R1-R5 marked for
4624 * backtracking, as these registers are set by the function
4625 * invoking callback.
4626 */
4627 if (subseq_idx >= 0 && bpf_calls_callback(env, subseq_idx))
4628 for (i = BPF_REG_1; i <= BPF_REG_5; i++)
4629 bt_clear_reg(bt, i);
4630 if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
4631 verifier_bug(env, "backtracking exit unexpected regs %x",
4632 bt_reg_mask(bt));
4633 return -EFAULT;
4634 }
4635
4636 /* BPF_EXIT in subprog or callback always returns
4637 * right after the call instruction, so by checking
4638 * whether the instruction at subseq_idx-1 is subprog
4639 * call or not we can distinguish actual exit from
4640 * *subprog* from exit from *callback*. In the former
4641 * case, we need to propagate r0 precision, if
4642 * necessary. In the former we never do that.
4643 */
4644 r0_precise = subseq_idx - 1 >= 0 &&
4645 bpf_pseudo_call(&env->prog->insnsi[subseq_idx - 1]) &&
4646 bt_is_reg_set(bt, BPF_REG_0);
4647
4648 bt_clear_reg(bt, BPF_REG_0);
4649 if (bt_subprog_enter(bt))
4650 return -EFAULT;
4651
4652 if (r0_precise)
4653 bt_set_reg(bt, BPF_REG_0);
4654 /* r6-r9 and stack slots will stay set in caller frame
4655 * bitmasks until we return back from callee(s)
4656 */
4657 return 0;
4658 } else if (BPF_SRC(insn->code) == BPF_X) {
4659 if (!bt_is_reg_set(bt, dreg) && !bt_is_reg_set(bt, sreg))
4660 return 0;
4661 /* dreg <cond> sreg
4662 * Both dreg and sreg need precision before
4663 * this insn. If only sreg was marked precise
4664 * before it would be equally necessary to
4665 * propagate it to dreg.
4666 */
4667 if (!hist || !(hist->flags & INSN_F_SRC_REG_STACK))
4668 bt_set_reg(bt, sreg);
4669 if (!hist || !(hist->flags & INSN_F_DST_REG_STACK))
4670 bt_set_reg(bt, dreg);
4671 } else if (BPF_SRC(insn->code) == BPF_K) {
4672 /* dreg <cond> K
4673 * Only dreg still needs precision before
4674 * this insn, so for the K-based conditional
4675 * there is nothing new to be marked.
4676 */
4677 }
4678 } else if (class == BPF_LD) {
4679 if (!bt_is_reg_set(bt, dreg))
4680 return 0;
4681 bt_clear_reg(bt, dreg);
4682 /* It's ld_imm64 or ld_abs or ld_ind.
4683 * For ld_imm64 no further tracking of precision
4684 * into parent is necessary
4685 */
4686 if (mode == BPF_IND || mode == BPF_ABS)
4687 /* to be analyzed */
4688 return -ENOTSUPP;
4689 }
4690 /* Propagate precision marks to linked registers, to account for
4691 * registers marked as precise in this function.
4692 */
4693 bt_sync_linked_regs(bt, hist);
4694 return 0;
4695 }
4696
4697 /* the scalar precision tracking algorithm:
4698 * . at the start all registers have precise=false.
4699 * . scalar ranges are tracked as normal through alu and jmp insns.
4700 * . once precise value of the scalar register is used in:
4701 * . ptr + scalar alu
4702 * . if (scalar cond K|scalar)
4703 * . helper_call(.., scalar, ...) where ARG_CONST is expected
4704 * backtrack through the verifier states and mark all registers and
4705 * stack slots with spilled constants that these scalar registers
4706 * should be precise.
4707 * . during state pruning two registers (or spilled stack slots)
4708 * are equivalent if both are not precise.
4709 *
4710 * Note the verifier cannot simply walk register parentage chain,
4711 * since many different registers and stack slots could have been
4712 * used to compute single precise scalar.
4713 *
4714 * The approach of starting with precise=true for all registers and then
4715 * backtrack to mark a register as not precise when the verifier detects
4716 * that program doesn't care about specific value (e.g., when helper
4717 * takes register as ARG_ANYTHING parameter) is not safe.
4718 *
4719 * It's ok to walk single parentage chain of the verifier states.
4720 * It's possible that this backtracking will go all the way till 1st insn.
4721 * All other branches will be explored for needing precision later.
4722 *
4723 * The backtracking needs to deal with cases like:
4724 * R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0)
4725 * r9 -= r8
4726 * r5 = r9
4727 * if r5 > 0x79f goto pc+7
4728 * R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff))
4729 * r5 += 1
4730 * ...
4731 * call bpf_perf_event_output#25
4732 * where .arg5_type = ARG_CONST_SIZE_OR_ZERO
4733 *
4734 * and this case:
4735 * r6 = 1
4736 * call foo // uses callee's r6 inside to compute r0
4737 * r0 += r6
4738 * if r0 == 0 goto
4739 *
4740 * to track above reg_mask/stack_mask needs to be independent for each frame.
4741 *
4742 * Also if parent's curframe > frame where backtracking started,
4743 * the verifier need to mark registers in both frames, otherwise callees
4744 * may incorrectly prune callers. This is similar to
4745 * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences")
4746 *
4747 * For now backtracking falls back into conservative marking.
4748 */
mark_all_scalars_precise(struct bpf_verifier_env * env,struct bpf_verifier_state * st)4749 static void mark_all_scalars_precise(struct bpf_verifier_env *env,
4750 struct bpf_verifier_state *st)
4751 {
4752 struct bpf_func_state *func;
4753 struct bpf_reg_state *reg;
4754 int i, j;
4755
4756 if (env->log.level & BPF_LOG_LEVEL2) {
4757 verbose(env, "mark_precise: frame%d: falling back to forcing all scalars precise\n",
4758 st->curframe);
4759 }
4760
4761 /* big hammer: mark all scalars precise in this path.
4762 * pop_stack may still get !precise scalars.
4763 * We also skip current state and go straight to first parent state,
4764 * because precision markings in current non-checkpointed state are
4765 * not needed. See why in the comment in __mark_chain_precision below.
4766 */
4767 for (st = st->parent; st; st = st->parent) {
4768 for (i = 0; i <= st->curframe; i++) {
4769 func = st->frame[i];
4770 for (j = 0; j < BPF_REG_FP; j++) {
4771 reg = &func->regs[j];
4772 if (reg->type != SCALAR_VALUE || reg->precise)
4773 continue;
4774 reg->precise = true;
4775 if (env->log.level & BPF_LOG_LEVEL2) {
4776 verbose(env, "force_precise: frame%d: forcing r%d to be precise\n",
4777 i, j);
4778 }
4779 }
4780 for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
4781 if (!is_spilled_reg(&func->stack[j]))
4782 continue;
4783 reg = &func->stack[j].spilled_ptr;
4784 if (reg->type != SCALAR_VALUE || reg->precise)
4785 continue;
4786 reg->precise = true;
4787 if (env->log.level & BPF_LOG_LEVEL2) {
4788 verbose(env, "force_precise: frame%d: forcing fp%d to be precise\n",
4789 i, -(j + 1) * 8);
4790 }
4791 }
4792 }
4793 }
4794 }
4795
mark_all_scalars_imprecise(struct bpf_verifier_env * env,struct bpf_verifier_state * st)4796 static void mark_all_scalars_imprecise(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
4797 {
4798 struct bpf_func_state *func;
4799 struct bpf_reg_state *reg;
4800 int i, j;
4801
4802 for (i = 0; i <= st->curframe; i++) {
4803 func = st->frame[i];
4804 for (j = 0; j < BPF_REG_FP; j++) {
4805 reg = &func->regs[j];
4806 if (reg->type != SCALAR_VALUE)
4807 continue;
4808 reg->precise = false;
4809 }
4810 for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
4811 if (!is_spilled_reg(&func->stack[j]))
4812 continue;
4813 reg = &func->stack[j].spilled_ptr;
4814 if (reg->type != SCALAR_VALUE)
4815 continue;
4816 reg->precise = false;
4817 }
4818 }
4819 }
4820
4821 /*
4822 * __mark_chain_precision() backtracks BPF program instruction sequence and
4823 * chain of verifier states making sure that register *regno* (if regno >= 0)
4824 * and/or stack slot *spi* (if spi >= 0) are marked as precisely tracked
4825 * SCALARS, as well as any other registers and slots that contribute to
4826 * a tracked state of given registers/stack slots, depending on specific BPF
4827 * assembly instructions (see backtrack_insns() for exact instruction handling
4828 * logic). This backtracking relies on recorded jmp_history and is able to
4829 * traverse entire chain of parent states. This process ends only when all the
4830 * necessary registers/slots and their transitive dependencies are marked as
4831 * precise.
4832 *
4833 * One important and subtle aspect is that precise marks *do not matter* in
4834 * the currently verified state (current state). It is important to understand
4835 * why this is the case.
4836 *
4837 * First, note that current state is the state that is not yet "checkpointed",
4838 * i.e., it is not yet put into env->explored_states, and it has no children
4839 * states as well. It's ephemeral, and can end up either a) being discarded if
4840 * compatible explored state is found at some point or BPF_EXIT instruction is
4841 * reached or b) checkpointed and put into env->explored_states, branching out
4842 * into one or more children states.
4843 *
4844 * In the former case, precise markings in current state are completely
4845 * ignored by state comparison code (see regsafe() for details). Only
4846 * checkpointed ("old") state precise markings are important, and if old
4847 * state's register/slot is precise, regsafe() assumes current state's
4848 * register/slot as precise and checks value ranges exactly and precisely. If
4849 * states turn out to be compatible, current state's necessary precise
4850 * markings and any required parent states' precise markings are enforced
4851 * after the fact with propagate_precision() logic, after the fact. But it's
4852 * important to realize that in this case, even after marking current state
4853 * registers/slots as precise, we immediately discard current state. So what
4854 * actually matters is any of the precise markings propagated into current
4855 * state's parent states, which are always checkpointed (due to b) case above).
4856 * As such, for scenario a) it doesn't matter if current state has precise
4857 * markings set or not.
4858 *
4859 * Now, for the scenario b), checkpointing and forking into child(ren)
4860 * state(s). Note that before current state gets to checkpointing step, any
4861 * processed instruction always assumes precise SCALAR register/slot
4862 * knowledge: if precise value or range is useful to prune jump branch, BPF
4863 * verifier takes this opportunity enthusiastically. Similarly, when
4864 * register's value is used to calculate offset or memory address, exact
4865 * knowledge of SCALAR range is assumed, checked, and enforced. So, similar to
4866 * what we mentioned above about state comparison ignoring precise markings
4867 * during state comparison, BPF verifier ignores and also assumes precise
4868 * markings *at will* during instruction verification process. But as verifier
4869 * assumes precision, it also propagates any precision dependencies across
4870 * parent states, which are not yet finalized, so can be further restricted
4871 * based on new knowledge gained from restrictions enforced by their children
4872 * states. This is so that once those parent states are finalized, i.e., when
4873 * they have no more active children state, state comparison logic in
4874 * is_state_visited() would enforce strict and precise SCALAR ranges, if
4875 * required for correctness.
4876 *
4877 * To build a bit more intuition, note also that once a state is checkpointed,
4878 * the path we took to get to that state is not important. This is crucial
4879 * property for state pruning. When state is checkpointed and finalized at
4880 * some instruction index, it can be correctly and safely used to "short
4881 * circuit" any *compatible* state that reaches exactly the same instruction
4882 * index. I.e., if we jumped to that instruction from a completely different
4883 * code path than original finalized state was derived from, it doesn't
4884 * matter, current state can be discarded because from that instruction
4885 * forward having a compatible state will ensure we will safely reach the
4886 * exit. States describe preconditions for further exploration, but completely
4887 * forget the history of how we got here.
4888 *
4889 * This also means that even if we needed precise SCALAR range to get to
4890 * finalized state, but from that point forward *that same* SCALAR register is
4891 * never used in a precise context (i.e., it's precise value is not needed for
4892 * correctness), it's correct and safe to mark such register as "imprecise"
4893 * (i.e., precise marking set to false). This is what we rely on when we do
4894 * not set precise marking in current state. If no child state requires
4895 * precision for any given SCALAR register, it's safe to dictate that it can
4896 * be imprecise. If any child state does require this register to be precise,
4897 * we'll mark it precise later retroactively during precise markings
4898 * propagation from child state to parent states.
4899 *
4900 * Skipping precise marking setting in current state is a mild version of
4901 * relying on the above observation. But we can utilize this property even
4902 * more aggressively by proactively forgetting any precise marking in the
4903 * current state (which we inherited from the parent state), right before we
4904 * checkpoint it and branch off into new child state. This is done by
4905 * mark_all_scalars_imprecise() to hopefully get more permissive and generic
4906 * finalized states which help in short circuiting more future states.
4907 */
__mark_chain_precision(struct bpf_verifier_env * env,struct bpf_verifier_state * starting_state,int regno,bool * changed)4908 static int __mark_chain_precision(struct bpf_verifier_env *env,
4909 struct bpf_verifier_state *starting_state,
4910 int regno,
4911 bool *changed)
4912 {
4913 struct bpf_verifier_state *st = starting_state;
4914 struct backtrack_state *bt = &env->bt;
4915 int first_idx = st->first_insn_idx;
4916 int last_idx = starting_state->insn_idx;
4917 int subseq_idx = -1;
4918 struct bpf_func_state *func;
4919 bool tmp, skip_first = true;
4920 struct bpf_reg_state *reg;
4921 int i, fr, err;
4922
4923 if (!env->bpf_capable)
4924 return 0;
4925
4926 changed = changed ?: &tmp;
4927 /* set frame number from which we are starting to backtrack */
4928 bt_init(bt, starting_state->curframe);
4929
4930 /* Do sanity checks against current state of register and/or stack
4931 * slot, but don't set precise flag in current state, as precision
4932 * tracking in the current state is unnecessary.
4933 */
4934 func = st->frame[bt->frame];
4935 if (regno >= 0) {
4936 reg = &func->regs[regno];
4937 if (reg->type != SCALAR_VALUE) {
4938 verifier_bug(env, "backtracking misuse");
4939 return -EFAULT;
4940 }
4941 bt_set_reg(bt, regno);
4942 }
4943
4944 if (bt_empty(bt))
4945 return 0;
4946
4947 for (;;) {
4948 DECLARE_BITMAP(mask, 64);
4949 u32 history = st->jmp_history_cnt;
4950 struct bpf_jmp_history_entry *hist;
4951
4952 if (env->log.level & BPF_LOG_LEVEL2) {
4953 verbose(env, "mark_precise: frame%d: last_idx %d first_idx %d subseq_idx %d \n",
4954 bt->frame, last_idx, first_idx, subseq_idx);
4955 }
4956
4957 if (last_idx < 0) {
4958 /* we are at the entry into subprog, which
4959 * is expected for global funcs, but only if
4960 * requested precise registers are R1-R5
4961 * (which are global func's input arguments)
4962 */
4963 if (st->curframe == 0 &&
4964 st->frame[0]->subprogno > 0 &&
4965 st->frame[0]->callsite == BPF_MAIN_FUNC &&
4966 bt_stack_mask(bt) == 0 &&
4967 (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) == 0) {
4968 bitmap_from_u64(mask, bt_reg_mask(bt));
4969 for_each_set_bit(i, mask, 32) {
4970 reg = &st->frame[0]->regs[i];
4971 bt_clear_reg(bt, i);
4972 if (reg->type == SCALAR_VALUE) {
4973 reg->precise = true;
4974 *changed = true;
4975 }
4976 }
4977 return 0;
4978 }
4979
4980 verifier_bug(env, "backtracking func entry subprog %d reg_mask %x stack_mask %llx",
4981 st->frame[0]->subprogno, bt_reg_mask(bt), bt_stack_mask(bt));
4982 return -EFAULT;
4983 }
4984
4985 for (i = last_idx;;) {
4986 if (skip_first) {
4987 err = 0;
4988 skip_first = false;
4989 } else {
4990 hist = get_jmp_hist_entry(st, history, i);
4991 err = backtrack_insn(env, i, subseq_idx, hist, bt);
4992 }
4993 if (err == -ENOTSUPP) {
4994 mark_all_scalars_precise(env, starting_state);
4995 bt_reset(bt);
4996 return 0;
4997 } else if (err) {
4998 return err;
4999 }
5000 if (bt_empty(bt))
5001 /* Found assignment(s) into tracked register in this state.
5002 * Since this state is already marked, just return.
5003 * Nothing to be tracked further in the parent state.
5004 */
5005 return 0;
5006 subseq_idx = i;
5007 i = get_prev_insn_idx(st, i, &history);
5008 if (i == -ENOENT)
5009 break;
5010 if (i >= env->prog->len) {
5011 /* This can happen if backtracking reached insn 0
5012 * and there are still reg_mask or stack_mask
5013 * to backtrack.
5014 * It means the backtracking missed the spot where
5015 * particular register was initialized with a constant.
5016 */
5017 verifier_bug(env, "backtracking idx %d", i);
5018 return -EFAULT;
5019 }
5020 }
5021 st = st->parent;
5022 if (!st)
5023 break;
5024
5025 for (fr = bt->frame; fr >= 0; fr--) {
5026 func = st->frame[fr];
5027 bitmap_from_u64(mask, bt_frame_reg_mask(bt, fr));
5028 for_each_set_bit(i, mask, 32) {
5029 reg = &func->regs[i];
5030 if (reg->type != SCALAR_VALUE) {
5031 bt_clear_frame_reg(bt, fr, i);
5032 continue;
5033 }
5034 if (reg->precise) {
5035 bt_clear_frame_reg(bt, fr, i);
5036 } else {
5037 reg->precise = true;
5038 *changed = true;
5039 }
5040 }
5041
5042 bitmap_from_u64(mask, bt_frame_stack_mask(bt, fr));
5043 for_each_set_bit(i, mask, 64) {
5044 if (verifier_bug_if(i >= func->allocated_stack / BPF_REG_SIZE,
5045 env, "stack slot %d, total slots %d",
5046 i, func->allocated_stack / BPF_REG_SIZE))
5047 return -EFAULT;
5048
5049 if (!is_spilled_scalar_reg(&func->stack[i])) {
5050 bt_clear_frame_slot(bt, fr, i);
5051 continue;
5052 }
5053 reg = &func->stack[i].spilled_ptr;
5054 if (reg->precise) {
5055 bt_clear_frame_slot(bt, fr, i);
5056 } else {
5057 reg->precise = true;
5058 *changed = true;
5059 }
5060 }
5061 if (env->log.level & BPF_LOG_LEVEL2) {
5062 fmt_reg_mask(env->tmp_str_buf, TMP_STR_BUF_LEN,
5063 bt_frame_reg_mask(bt, fr));
5064 verbose(env, "mark_precise: frame%d: parent state regs=%s ",
5065 fr, env->tmp_str_buf);
5066 bpf_fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN,
5067 bt_frame_stack_mask(bt, fr));
5068 verbose(env, "stack=%s: ", env->tmp_str_buf);
5069 print_verifier_state(env, st, fr, true);
5070 }
5071 }
5072
5073 if (bt_empty(bt))
5074 return 0;
5075
5076 subseq_idx = first_idx;
5077 last_idx = st->last_insn_idx;
5078 first_idx = st->first_insn_idx;
5079 }
5080
5081 /* if we still have requested precise regs or slots, we missed
5082 * something (e.g., stack access through non-r10 register), so
5083 * fallback to marking all precise
5084 */
5085 if (!bt_empty(bt)) {
5086 mark_all_scalars_precise(env, starting_state);
5087 bt_reset(bt);
5088 }
5089
5090 return 0;
5091 }
5092
mark_chain_precision(struct bpf_verifier_env * env,int regno)5093 int mark_chain_precision(struct bpf_verifier_env *env, int regno)
5094 {
5095 return __mark_chain_precision(env, env->cur_state, regno, NULL);
5096 }
5097
5098 /* mark_chain_precision_batch() assumes that env->bt is set in the caller to
5099 * desired reg and stack masks across all relevant frames
5100 */
mark_chain_precision_batch(struct bpf_verifier_env * env,struct bpf_verifier_state * starting_state)5101 static int mark_chain_precision_batch(struct bpf_verifier_env *env,
5102 struct bpf_verifier_state *starting_state)
5103 {
5104 return __mark_chain_precision(env, starting_state, -1, NULL);
5105 }
5106
is_spillable_regtype(enum bpf_reg_type type)5107 static bool is_spillable_regtype(enum bpf_reg_type type)
5108 {
5109 switch (base_type(type)) {
5110 case PTR_TO_MAP_VALUE:
5111 case PTR_TO_STACK:
5112 case PTR_TO_CTX:
5113 case PTR_TO_PACKET:
5114 case PTR_TO_PACKET_META:
5115 case PTR_TO_PACKET_END:
5116 case PTR_TO_FLOW_KEYS:
5117 case CONST_PTR_TO_MAP:
5118 case PTR_TO_SOCKET:
5119 case PTR_TO_SOCK_COMMON:
5120 case PTR_TO_TCP_SOCK:
5121 case PTR_TO_XDP_SOCK:
5122 case PTR_TO_BTF_ID:
5123 case PTR_TO_BUF:
5124 case PTR_TO_MEM:
5125 case PTR_TO_FUNC:
5126 case PTR_TO_MAP_KEY:
5127 case PTR_TO_ARENA:
5128 return true;
5129 default:
5130 return false;
5131 }
5132 }
5133
5134 /* Does this register contain a constant zero? */
register_is_null(struct bpf_reg_state * reg)5135 static bool register_is_null(struct bpf_reg_state *reg)
5136 {
5137 return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
5138 }
5139
5140 /* check if register is a constant scalar value */
is_reg_const(struct bpf_reg_state * reg,bool subreg32)5141 static bool is_reg_const(struct bpf_reg_state *reg, bool subreg32)
5142 {
5143 return reg->type == SCALAR_VALUE &&
5144 tnum_is_const(subreg32 ? tnum_subreg(reg->var_off) : reg->var_off);
5145 }
5146
5147 /* assuming is_reg_const() is true, return constant value of a register */
reg_const_value(struct bpf_reg_state * reg,bool subreg32)5148 static u64 reg_const_value(struct bpf_reg_state *reg, bool subreg32)
5149 {
5150 return subreg32 ? tnum_subreg(reg->var_off).value : reg->var_off.value;
5151 }
5152
__is_pointer_value(bool allow_ptr_leaks,const struct bpf_reg_state * reg)5153 static bool __is_pointer_value(bool allow_ptr_leaks,
5154 const struct bpf_reg_state *reg)
5155 {
5156 if (allow_ptr_leaks)
5157 return false;
5158
5159 return reg->type != SCALAR_VALUE;
5160 }
5161
assign_scalar_id_before_mov(struct bpf_verifier_env * env,struct bpf_reg_state * src_reg)5162 static void assign_scalar_id_before_mov(struct bpf_verifier_env *env,
5163 struct bpf_reg_state *src_reg)
5164 {
5165 if (src_reg->type != SCALAR_VALUE)
5166 return;
5167
5168 if (src_reg->id & BPF_ADD_CONST) {
5169 /*
5170 * The verifier is processing rX = rY insn and
5171 * rY->id has special linked register already.
5172 * Cleared it, since multiple rX += const are not supported.
5173 */
5174 src_reg->id = 0;
5175 src_reg->off = 0;
5176 }
5177
5178 if (!src_reg->id && !tnum_is_const(src_reg->var_off))
5179 /* Ensure that src_reg has a valid ID that will be copied to
5180 * dst_reg and then will be used by sync_linked_regs() to
5181 * propagate min/max range.
5182 */
5183 src_reg->id = ++env->id_gen;
5184 }
5185
5186 /* Copy src state preserving dst->parent and dst->live fields */
copy_register_state(struct bpf_reg_state * dst,const struct bpf_reg_state * src)5187 static void copy_register_state(struct bpf_reg_state *dst, const struct bpf_reg_state *src)
5188 {
5189 *dst = *src;
5190 }
5191
save_register_state(struct bpf_verifier_env * env,struct bpf_func_state * state,int spi,struct bpf_reg_state * reg,int size)5192 static void save_register_state(struct bpf_verifier_env *env,
5193 struct bpf_func_state *state,
5194 int spi, struct bpf_reg_state *reg,
5195 int size)
5196 {
5197 int i;
5198
5199 copy_register_state(&state->stack[spi].spilled_ptr, reg);
5200
5201 for (i = BPF_REG_SIZE; i > BPF_REG_SIZE - size; i--)
5202 state->stack[spi].slot_type[i - 1] = STACK_SPILL;
5203
5204 /* size < 8 bytes spill */
5205 for (; i; i--)
5206 mark_stack_slot_misc(env, &state->stack[spi].slot_type[i - 1]);
5207 }
5208
is_bpf_st_mem(struct bpf_insn * insn)5209 static bool is_bpf_st_mem(struct bpf_insn *insn)
5210 {
5211 return BPF_CLASS(insn->code) == BPF_ST && BPF_MODE(insn->code) == BPF_MEM;
5212 }
5213
get_reg_width(struct bpf_reg_state * reg)5214 static int get_reg_width(struct bpf_reg_state *reg)
5215 {
5216 return fls64(reg->umax_value);
5217 }
5218
5219 /* See comment for mark_fastcall_pattern_for_call() */
check_fastcall_stack_contract(struct bpf_verifier_env * env,struct bpf_func_state * state,int insn_idx,int off)5220 static void check_fastcall_stack_contract(struct bpf_verifier_env *env,
5221 struct bpf_func_state *state, int insn_idx, int off)
5222 {
5223 struct bpf_subprog_info *subprog = &env->subprog_info[state->subprogno];
5224 struct bpf_insn_aux_data *aux = env->insn_aux_data;
5225 int i;
5226
5227 if (subprog->fastcall_stack_off <= off || aux[insn_idx].fastcall_pattern)
5228 return;
5229 /* access to the region [max_stack_depth .. fastcall_stack_off)
5230 * from something that is not a part of the fastcall pattern,
5231 * disable fastcall rewrites for current subprogram by setting
5232 * fastcall_stack_off to a value smaller than any possible offset.
5233 */
5234 subprog->fastcall_stack_off = S16_MIN;
5235 /* reset fastcall aux flags within subprogram,
5236 * happens at most once per subprogram
5237 */
5238 for (i = subprog->start; i < (subprog + 1)->start; ++i) {
5239 aux[i].fastcall_spills_num = 0;
5240 aux[i].fastcall_pattern = 0;
5241 }
5242 }
5243
5244 /* check_stack_{read,write}_fixed_off functions track spill/fill of registers,
5245 * stack boundary and alignment are checked in check_mem_access()
5246 */
check_stack_write_fixed_off(struct bpf_verifier_env * env,struct bpf_func_state * state,int off,int size,int value_regno,int insn_idx)5247 static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
5248 /* stack frame we're writing to */
5249 struct bpf_func_state *state,
5250 int off, int size, int value_regno,
5251 int insn_idx)
5252 {
5253 struct bpf_func_state *cur; /* state of the current function */
5254 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
5255 struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
5256 struct bpf_reg_state *reg = NULL;
5257 int insn_flags = insn_stack_access_flags(state->frameno, spi);
5258
5259 /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
5260 * so it's aligned access and [off, off + size) are within stack limits
5261 */
5262 if (!env->allow_ptr_leaks &&
5263 is_spilled_reg(&state->stack[spi]) &&
5264 !is_spilled_scalar_reg(&state->stack[spi]) &&
5265 size != BPF_REG_SIZE) {
5266 verbose(env, "attempt to corrupt spilled pointer on stack\n");
5267 return -EACCES;
5268 }
5269
5270 cur = env->cur_state->frame[env->cur_state->curframe];
5271 if (value_regno >= 0)
5272 reg = &cur->regs[value_regno];
5273 if (!env->bypass_spec_v4) {
5274 bool sanitize = reg && is_spillable_regtype(reg->type);
5275
5276 for (i = 0; i < size; i++) {
5277 u8 type = state->stack[spi].slot_type[i];
5278
5279 if (type != STACK_MISC && type != STACK_ZERO) {
5280 sanitize = true;
5281 break;
5282 }
5283 }
5284
5285 if (sanitize)
5286 env->insn_aux_data[insn_idx].nospec_result = true;
5287 }
5288
5289 err = destroy_if_dynptr_stack_slot(env, state, spi);
5290 if (err)
5291 return err;
5292
5293 if (!(off % BPF_REG_SIZE) && size == BPF_REG_SIZE) {
5294 /* only mark the slot as written if all 8 bytes were written
5295 * otherwise read propagation may incorrectly stop too soon
5296 * when stack slots are partially written.
5297 * This heuristic means that read propagation will be
5298 * conservative, since it will add reg_live_read marks
5299 * to stack slots all the way to first state when programs
5300 * writes+reads less than 8 bytes
5301 */
5302 bpf_mark_stack_write(env, state->frameno, BIT(spi));
5303 }
5304
5305 check_fastcall_stack_contract(env, state, insn_idx, off);
5306 mark_stack_slot_scratched(env, spi);
5307 if (reg && !(off % BPF_REG_SIZE) && reg->type == SCALAR_VALUE && env->bpf_capable) {
5308 bool reg_value_fits;
5309
5310 reg_value_fits = get_reg_width(reg) <= BITS_PER_BYTE * size;
5311 /* Make sure that reg had an ID to build a relation on spill. */
5312 if (reg_value_fits)
5313 assign_scalar_id_before_mov(env, reg);
5314 save_register_state(env, state, spi, reg, size);
5315 /* Break the relation on a narrowing spill. */
5316 if (!reg_value_fits)
5317 state->stack[spi].spilled_ptr.id = 0;
5318 } else if (!reg && !(off % BPF_REG_SIZE) && is_bpf_st_mem(insn) &&
5319 env->bpf_capable) {
5320 struct bpf_reg_state *tmp_reg = &env->fake_reg[0];
5321
5322 memset(tmp_reg, 0, sizeof(*tmp_reg));
5323 __mark_reg_known(tmp_reg, insn->imm);
5324 tmp_reg->type = SCALAR_VALUE;
5325 save_register_state(env, state, spi, tmp_reg, size);
5326 } else if (reg && is_spillable_regtype(reg->type)) {
5327 /* register containing pointer is being spilled into stack */
5328 if (size != BPF_REG_SIZE) {
5329 verbose_linfo(env, insn_idx, "; ");
5330 verbose(env, "invalid size of register spill\n");
5331 return -EACCES;
5332 }
5333 if (state != cur && reg->type == PTR_TO_STACK) {
5334 verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
5335 return -EINVAL;
5336 }
5337 save_register_state(env, state, spi, reg, size);
5338 } else {
5339 u8 type = STACK_MISC;
5340
5341 /* regular write of data into stack destroys any spilled ptr */
5342 state->stack[spi].spilled_ptr.type = NOT_INIT;
5343 /* Mark slots as STACK_MISC if they belonged to spilled ptr/dynptr/iter. */
5344 if (is_stack_slot_special(&state->stack[spi]))
5345 for (i = 0; i < BPF_REG_SIZE; i++)
5346 scrub_spilled_slot(&state->stack[spi].slot_type[i]);
5347
5348 /* when we zero initialize stack slots mark them as such */
5349 if ((reg && register_is_null(reg)) ||
5350 (!reg && is_bpf_st_mem(insn) && insn->imm == 0)) {
5351 /* STACK_ZERO case happened because register spill
5352 * wasn't properly aligned at the stack slot boundary,
5353 * so it's not a register spill anymore; force
5354 * originating register to be precise to make
5355 * STACK_ZERO correct for subsequent states
5356 */
5357 err = mark_chain_precision(env, value_regno);
5358 if (err)
5359 return err;
5360 type = STACK_ZERO;
5361 }
5362
5363 /* Mark slots affected by this stack write. */
5364 for (i = 0; i < size; i++)
5365 state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] = type;
5366 insn_flags = 0; /* not a register spill */
5367 }
5368
5369 if (insn_flags)
5370 return push_jmp_history(env, env->cur_state, insn_flags, 0);
5371 return 0;
5372 }
5373
5374 /* Write the stack: 'stack[ptr_regno + off] = value_regno'. 'ptr_regno' is
5375 * known to contain a variable offset.
5376 * This function checks whether the write is permitted and conservatively
5377 * tracks the effects of the write, considering that each stack slot in the
5378 * dynamic range is potentially written to.
5379 *
5380 * 'off' includes 'regno->off'.
5381 * 'value_regno' can be -1, meaning that an unknown value is being written to
5382 * the stack.
5383 *
5384 * Spilled pointers in range are not marked as written because we don't know
5385 * what's going to be actually written. This means that read propagation for
5386 * future reads cannot be terminated by this write.
5387 *
5388 * For privileged programs, uninitialized stack slots are considered
5389 * initialized by this write (even though we don't know exactly what offsets
5390 * are going to be written to). The idea is that we don't want the verifier to
5391 * reject future reads that access slots written to through variable offsets.
5392 */
check_stack_write_var_off(struct bpf_verifier_env * env,struct bpf_func_state * state,int ptr_regno,int off,int size,int value_regno,int insn_idx)5393 static int check_stack_write_var_off(struct bpf_verifier_env *env,
5394 /* func where register points to */
5395 struct bpf_func_state *state,
5396 int ptr_regno, int off, int size,
5397 int value_regno, int insn_idx)
5398 {
5399 struct bpf_func_state *cur; /* state of the current function */
5400 int min_off, max_off;
5401 int i, err;
5402 struct bpf_reg_state *ptr_reg = NULL, *value_reg = NULL;
5403 struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
5404 bool writing_zero = false;
5405 /* set if the fact that we're writing a zero is used to let any
5406 * stack slots remain STACK_ZERO
5407 */
5408 bool zero_used = false;
5409
5410 cur = env->cur_state->frame[env->cur_state->curframe];
5411 ptr_reg = &cur->regs[ptr_regno];
5412 min_off = ptr_reg->smin_value + off;
5413 max_off = ptr_reg->smax_value + off + size;
5414 if (value_regno >= 0)
5415 value_reg = &cur->regs[value_regno];
5416 if ((value_reg && register_is_null(value_reg)) ||
5417 (!value_reg && is_bpf_st_mem(insn) && insn->imm == 0))
5418 writing_zero = true;
5419
5420 for (i = min_off; i < max_off; i++) {
5421 int spi;
5422
5423 spi = __get_spi(i);
5424 err = destroy_if_dynptr_stack_slot(env, state, spi);
5425 if (err)
5426 return err;
5427 }
5428
5429 check_fastcall_stack_contract(env, state, insn_idx, min_off);
5430 /* Variable offset writes destroy any spilled pointers in range. */
5431 for (i = min_off; i < max_off; i++) {
5432 u8 new_type, *stype;
5433 int slot, spi;
5434
5435 slot = -i - 1;
5436 spi = slot / BPF_REG_SIZE;
5437 stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
5438 mark_stack_slot_scratched(env, spi);
5439
5440 if (!env->allow_ptr_leaks && *stype != STACK_MISC && *stype != STACK_ZERO) {
5441 /* Reject the write if range we may write to has not
5442 * been initialized beforehand. If we didn't reject
5443 * here, the ptr status would be erased below (even
5444 * though not all slots are actually overwritten),
5445 * possibly opening the door to leaks.
5446 *
5447 * We do however catch STACK_INVALID case below, and
5448 * only allow reading possibly uninitialized memory
5449 * later for CAP_PERFMON, as the write may not happen to
5450 * that slot.
5451 */
5452 verbose(env, "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d",
5453 insn_idx, i);
5454 return -EINVAL;
5455 }
5456
5457 /* If writing_zero and the spi slot contains a spill of value 0,
5458 * maintain the spill type.
5459 */
5460 if (writing_zero && *stype == STACK_SPILL &&
5461 is_spilled_scalar_reg(&state->stack[spi])) {
5462 struct bpf_reg_state *spill_reg = &state->stack[spi].spilled_ptr;
5463
5464 if (tnum_is_const(spill_reg->var_off) && spill_reg->var_off.value == 0) {
5465 zero_used = true;
5466 continue;
5467 }
5468 }
5469
5470 /* Erase all other spilled pointers. */
5471 state->stack[spi].spilled_ptr.type = NOT_INIT;
5472
5473 /* Update the slot type. */
5474 new_type = STACK_MISC;
5475 if (writing_zero && *stype == STACK_ZERO) {
5476 new_type = STACK_ZERO;
5477 zero_used = true;
5478 }
5479 /* If the slot is STACK_INVALID, we check whether it's OK to
5480 * pretend that it will be initialized by this write. The slot
5481 * might not actually be written to, and so if we mark it as
5482 * initialized future reads might leak uninitialized memory.
5483 * For privileged programs, we will accept such reads to slots
5484 * that may or may not be written because, if we're reject
5485 * them, the error would be too confusing.
5486 */
5487 if (*stype == STACK_INVALID && !env->allow_uninit_stack) {
5488 verbose(env, "uninit stack in range of var-offset write prohibited for !root; insn %d, off: %d",
5489 insn_idx, i);
5490 return -EINVAL;
5491 }
5492 *stype = new_type;
5493 }
5494 if (zero_used) {
5495 /* backtracking doesn't work for STACK_ZERO yet. */
5496 err = mark_chain_precision(env, value_regno);
5497 if (err)
5498 return err;
5499 }
5500 return 0;
5501 }
5502
5503 /* When register 'dst_regno' is assigned some values from stack[min_off,
5504 * max_off), we set the register's type according to the types of the
5505 * respective stack slots. If all the stack values are known to be zeros, then
5506 * so is the destination reg. Otherwise, the register is considered to be
5507 * SCALAR. This function does not deal with register filling; the caller must
5508 * ensure that all spilled registers in the stack range have been marked as
5509 * read.
5510 */
mark_reg_stack_read(struct bpf_verifier_env * env,struct bpf_func_state * ptr_state,int min_off,int max_off,int dst_regno)5511 static void mark_reg_stack_read(struct bpf_verifier_env *env,
5512 /* func where src register points to */
5513 struct bpf_func_state *ptr_state,
5514 int min_off, int max_off, int dst_regno)
5515 {
5516 struct bpf_verifier_state *vstate = env->cur_state;
5517 struct bpf_func_state *state = vstate->frame[vstate->curframe];
5518 int i, slot, spi;
5519 u8 *stype;
5520 int zeros = 0;
5521
5522 for (i = min_off; i < max_off; i++) {
5523 slot = -i - 1;
5524 spi = slot / BPF_REG_SIZE;
5525 mark_stack_slot_scratched(env, spi);
5526 stype = ptr_state->stack[spi].slot_type;
5527 if (stype[slot % BPF_REG_SIZE] != STACK_ZERO)
5528 break;
5529 zeros++;
5530 }
5531 if (zeros == max_off - min_off) {
5532 /* Any access_size read into register is zero extended,
5533 * so the whole register == const_zero.
5534 */
5535 __mark_reg_const_zero(env, &state->regs[dst_regno]);
5536 } else {
5537 /* have read misc data from the stack */
5538 mark_reg_unknown(env, state->regs, dst_regno);
5539 }
5540 }
5541
5542 /* Read the stack at 'off' and put the results into the register indicated by
5543 * 'dst_regno'. It handles reg filling if the addressed stack slot is a
5544 * spilled reg.
5545 *
5546 * 'dst_regno' can be -1, meaning that the read value is not going to a
5547 * register.
5548 *
5549 * The access is assumed to be within the current stack bounds.
5550 */
check_stack_read_fixed_off(struct bpf_verifier_env * env,struct bpf_func_state * reg_state,int off,int size,int dst_regno)5551 static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
5552 /* func where src register points to */
5553 struct bpf_func_state *reg_state,
5554 int off, int size, int dst_regno)
5555 {
5556 struct bpf_verifier_state *vstate = env->cur_state;
5557 struct bpf_func_state *state = vstate->frame[vstate->curframe];
5558 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
5559 struct bpf_reg_state *reg;
5560 u8 *stype, type;
5561 int insn_flags = insn_stack_access_flags(reg_state->frameno, spi);
5562 int err;
5563
5564 stype = reg_state->stack[spi].slot_type;
5565 reg = ®_state->stack[spi].spilled_ptr;
5566
5567 mark_stack_slot_scratched(env, spi);
5568 check_fastcall_stack_contract(env, state, env->insn_idx, off);
5569 err = bpf_mark_stack_read(env, reg_state->frameno, env->insn_idx, BIT(spi));
5570 if (err)
5571 return err;
5572
5573 if (is_spilled_reg(®_state->stack[spi])) {
5574 u8 spill_size = 1;
5575
5576 for (i = BPF_REG_SIZE - 1; i > 0 && stype[i - 1] == STACK_SPILL; i--)
5577 spill_size++;
5578
5579 if (size != BPF_REG_SIZE || spill_size != BPF_REG_SIZE) {
5580 if (reg->type != SCALAR_VALUE) {
5581 verbose_linfo(env, env->insn_idx, "; ");
5582 verbose(env, "invalid size of register fill\n");
5583 return -EACCES;
5584 }
5585
5586 if (dst_regno < 0)
5587 return 0;
5588
5589 if (size <= spill_size &&
5590 bpf_stack_narrow_access_ok(off, size, spill_size)) {
5591 /* The earlier check_reg_arg() has decided the
5592 * subreg_def for this insn. Save it first.
5593 */
5594 s32 subreg_def = state->regs[dst_regno].subreg_def;
5595
5596 if (env->bpf_capable && size == 4 && spill_size == 4 &&
5597 get_reg_width(reg) <= 32)
5598 /* Ensure stack slot has an ID to build a relation
5599 * with the destination register on fill.
5600 */
5601 assign_scalar_id_before_mov(env, reg);
5602 copy_register_state(&state->regs[dst_regno], reg);
5603 state->regs[dst_regno].subreg_def = subreg_def;
5604
5605 /* Break the relation on a narrowing fill.
5606 * coerce_reg_to_size will adjust the boundaries.
5607 */
5608 if (get_reg_width(reg) > size * BITS_PER_BYTE)
5609 state->regs[dst_regno].id = 0;
5610 } else {
5611 int spill_cnt = 0, zero_cnt = 0;
5612
5613 for (i = 0; i < size; i++) {
5614 type = stype[(slot - i) % BPF_REG_SIZE];
5615 if (type == STACK_SPILL) {
5616 spill_cnt++;
5617 continue;
5618 }
5619 if (type == STACK_MISC)
5620 continue;
5621 if (type == STACK_ZERO) {
5622 zero_cnt++;
5623 continue;
5624 }
5625 if (type == STACK_INVALID && env->allow_uninit_stack)
5626 continue;
5627 verbose(env, "invalid read from stack off %d+%d size %d\n",
5628 off, i, size);
5629 return -EACCES;
5630 }
5631
5632 if (spill_cnt == size &&
5633 tnum_is_const(reg->var_off) && reg->var_off.value == 0) {
5634 __mark_reg_const_zero(env, &state->regs[dst_regno]);
5635 /* this IS register fill, so keep insn_flags */
5636 } else if (zero_cnt == size) {
5637 /* similarly to mark_reg_stack_read(), preserve zeroes */
5638 __mark_reg_const_zero(env, &state->regs[dst_regno]);
5639 insn_flags = 0; /* not restoring original register state */
5640 } else {
5641 mark_reg_unknown(env, state->regs, dst_regno);
5642 insn_flags = 0; /* not restoring original register state */
5643 }
5644 }
5645 } else if (dst_regno >= 0) {
5646 /* restore register state from stack */
5647 if (env->bpf_capable)
5648 /* Ensure stack slot has an ID to build a relation
5649 * with the destination register on fill.
5650 */
5651 assign_scalar_id_before_mov(env, reg);
5652 copy_register_state(&state->regs[dst_regno], reg);
5653 /* mark reg as written since spilled pointer state likely
5654 * has its liveness marks cleared by is_state_visited()
5655 * which resets stack/reg liveness for state transitions
5656 */
5657 } else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
5658 /* If dst_regno==-1, the caller is asking us whether
5659 * it is acceptable to use this value as a SCALAR_VALUE
5660 * (e.g. for XADD).
5661 * We must not allow unprivileged callers to do that
5662 * with spilled pointers.
5663 */
5664 verbose(env, "leaking pointer from stack off %d\n",
5665 off);
5666 return -EACCES;
5667 }
5668 } else {
5669 for (i = 0; i < size; i++) {
5670 type = stype[(slot - i) % BPF_REG_SIZE];
5671 if (type == STACK_MISC)
5672 continue;
5673 if (type == STACK_ZERO)
5674 continue;
5675 if (type == STACK_INVALID && env->allow_uninit_stack)
5676 continue;
5677 verbose(env, "invalid read from stack off %d+%d size %d\n",
5678 off, i, size);
5679 return -EACCES;
5680 }
5681 if (dst_regno >= 0)
5682 mark_reg_stack_read(env, reg_state, off, off + size, dst_regno);
5683 insn_flags = 0; /* we are not restoring spilled register */
5684 }
5685 if (insn_flags)
5686 return push_jmp_history(env, env->cur_state, insn_flags, 0);
5687 return 0;
5688 }
5689
5690 enum bpf_access_src {
5691 ACCESS_DIRECT = 1, /* the access is performed by an instruction */
5692 ACCESS_HELPER = 2, /* the access is performed by a helper */
5693 };
5694
5695 static int check_stack_range_initialized(struct bpf_verifier_env *env,
5696 int regno, int off, int access_size,
5697 bool zero_size_allowed,
5698 enum bpf_access_type type,
5699 struct bpf_call_arg_meta *meta);
5700
reg_state(struct bpf_verifier_env * env,int regno)5701 static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
5702 {
5703 return cur_regs(env) + regno;
5704 }
5705
5706 /* Read the stack at 'ptr_regno + off' and put the result into the register
5707 * 'dst_regno'.
5708 * 'off' includes the pointer register's fixed offset(i.e. 'ptr_regno.off'),
5709 * but not its variable offset.
5710 * 'size' is assumed to be <= reg size and the access is assumed to be aligned.
5711 *
5712 * As opposed to check_stack_read_fixed_off, this function doesn't deal with
5713 * filling registers (i.e. reads of spilled register cannot be detected when
5714 * the offset is not fixed). We conservatively mark 'dst_regno' as containing
5715 * SCALAR_VALUE. That's why we assert that the 'ptr_regno' has a variable
5716 * offset; for a fixed offset check_stack_read_fixed_off should be used
5717 * instead.
5718 */
check_stack_read_var_off(struct bpf_verifier_env * env,int ptr_regno,int off,int size,int dst_regno)5719 static int check_stack_read_var_off(struct bpf_verifier_env *env,
5720 int ptr_regno, int off, int size, int dst_regno)
5721 {
5722 /* The state of the source register. */
5723 struct bpf_reg_state *reg = reg_state(env, ptr_regno);
5724 struct bpf_func_state *ptr_state = func(env, reg);
5725 int err;
5726 int min_off, max_off;
5727
5728 /* Note that we pass a NULL meta, so raw access will not be permitted.
5729 */
5730 err = check_stack_range_initialized(env, ptr_regno, off, size,
5731 false, BPF_READ, NULL);
5732 if (err)
5733 return err;
5734
5735 min_off = reg->smin_value + off;
5736 max_off = reg->smax_value + off;
5737 mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno);
5738 check_fastcall_stack_contract(env, ptr_state, env->insn_idx, min_off);
5739 return 0;
5740 }
5741
5742 /* check_stack_read dispatches to check_stack_read_fixed_off or
5743 * check_stack_read_var_off.
5744 *
5745 * The caller must ensure that the offset falls within the allocated stack
5746 * bounds.
5747 *
5748 * 'dst_regno' is a register which will receive the value from the stack. It
5749 * can be -1, meaning that the read value is not going to a register.
5750 */
check_stack_read(struct bpf_verifier_env * env,int ptr_regno,int off,int size,int dst_regno)5751 static int check_stack_read(struct bpf_verifier_env *env,
5752 int ptr_regno, int off, int size,
5753 int dst_regno)
5754 {
5755 struct bpf_reg_state *reg = reg_state(env, ptr_regno);
5756 struct bpf_func_state *state = func(env, reg);
5757 int err;
5758 /* Some accesses are only permitted with a static offset. */
5759 bool var_off = !tnum_is_const(reg->var_off);
5760
5761 /* The offset is required to be static when reads don't go to a
5762 * register, in order to not leak pointers (see
5763 * check_stack_read_fixed_off).
5764 */
5765 if (dst_regno < 0 && var_off) {
5766 char tn_buf[48];
5767
5768 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5769 verbose(env, "variable offset stack pointer cannot be passed into helper function; var_off=%s off=%d size=%d\n",
5770 tn_buf, off, size);
5771 return -EACCES;
5772 }
5773 /* Variable offset is prohibited for unprivileged mode for simplicity
5774 * since it requires corresponding support in Spectre masking for stack
5775 * ALU. See also retrieve_ptr_limit(). The check in
5776 * check_stack_access_for_ptr_arithmetic() called by
5777 * adjust_ptr_min_max_vals() prevents users from creating stack pointers
5778 * with variable offsets, therefore no check is required here. Further,
5779 * just checking it here would be insufficient as speculative stack
5780 * writes could still lead to unsafe speculative behaviour.
5781 */
5782 if (!var_off) {
5783 off += reg->var_off.value;
5784 err = check_stack_read_fixed_off(env, state, off, size,
5785 dst_regno);
5786 } else {
5787 /* Variable offset stack reads need more conservative handling
5788 * than fixed offset ones. Note that dst_regno >= 0 on this
5789 * branch.
5790 */
5791 err = check_stack_read_var_off(env, ptr_regno, off, size,
5792 dst_regno);
5793 }
5794 return err;
5795 }
5796
5797
5798 /* check_stack_write dispatches to check_stack_write_fixed_off or
5799 * check_stack_write_var_off.
5800 *
5801 * 'ptr_regno' is the register used as a pointer into the stack.
5802 * 'off' includes 'ptr_regno->off', but not its variable offset (if any).
5803 * 'value_regno' is the register whose value we're writing to the stack. It can
5804 * be -1, meaning that we're not writing from a register.
5805 *
5806 * The caller must ensure that the offset falls within the maximum stack size.
5807 */
check_stack_write(struct bpf_verifier_env * env,int ptr_regno,int off,int size,int value_regno,int insn_idx)5808 static int check_stack_write(struct bpf_verifier_env *env,
5809 int ptr_regno, int off, int size,
5810 int value_regno, int insn_idx)
5811 {
5812 struct bpf_reg_state *reg = reg_state(env, ptr_regno);
5813 struct bpf_func_state *state = func(env, reg);
5814 int err;
5815
5816 if (tnum_is_const(reg->var_off)) {
5817 off += reg->var_off.value;
5818 err = check_stack_write_fixed_off(env, state, off, size,
5819 value_regno, insn_idx);
5820 } else {
5821 /* Variable offset stack reads need more conservative handling
5822 * than fixed offset ones.
5823 */
5824 err = check_stack_write_var_off(env, state,
5825 ptr_regno, off, size,
5826 value_regno, insn_idx);
5827 }
5828 return err;
5829 }
5830
check_map_access_type(struct bpf_verifier_env * env,u32 regno,int off,int size,enum bpf_access_type type)5831 static int check_map_access_type(struct bpf_verifier_env *env, u32 regno,
5832 int off, int size, enum bpf_access_type type)
5833 {
5834 struct bpf_reg_state *reg = reg_state(env, regno);
5835 struct bpf_map *map = reg->map_ptr;
5836 u32 cap = bpf_map_flags_to_cap(map);
5837
5838 if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
5839 verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n",
5840 map->value_size, off, size);
5841 return -EACCES;
5842 }
5843
5844 if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
5845 verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n",
5846 map->value_size, off, size);
5847 return -EACCES;
5848 }
5849
5850 return 0;
5851 }
5852
5853 /* check read/write into memory region (e.g., map value, ringbuf sample, etc) */
__check_mem_access(struct bpf_verifier_env * env,int regno,int off,int size,u32 mem_size,bool zero_size_allowed)5854 static int __check_mem_access(struct bpf_verifier_env *env, int regno,
5855 int off, int size, u32 mem_size,
5856 bool zero_size_allowed)
5857 {
5858 bool size_ok = size > 0 || (size == 0 && zero_size_allowed);
5859 struct bpf_reg_state *reg;
5860
5861 if (off >= 0 && size_ok && (u64)off + size <= mem_size)
5862 return 0;
5863
5864 reg = &cur_regs(env)[regno];
5865 switch (reg->type) {
5866 case PTR_TO_MAP_KEY:
5867 verbose(env, "invalid access to map key, key_size=%d off=%d size=%d\n",
5868 mem_size, off, size);
5869 break;
5870 case PTR_TO_MAP_VALUE:
5871 verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
5872 mem_size, off, size);
5873 break;
5874 case PTR_TO_PACKET:
5875 case PTR_TO_PACKET_META:
5876 case PTR_TO_PACKET_END:
5877 verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
5878 off, size, regno, reg->id, off, mem_size);
5879 break;
5880 case PTR_TO_MEM:
5881 default:
5882 verbose(env, "invalid access to memory, mem_size=%u off=%d size=%d\n",
5883 mem_size, off, size);
5884 }
5885
5886 return -EACCES;
5887 }
5888
5889 /* check read/write into a memory region with possible variable offset */
check_mem_region_access(struct bpf_verifier_env * env,u32 regno,int off,int size,u32 mem_size,bool zero_size_allowed)5890 static int check_mem_region_access(struct bpf_verifier_env *env, u32 regno,
5891 int off, int size, u32 mem_size,
5892 bool zero_size_allowed)
5893 {
5894 struct bpf_verifier_state *vstate = env->cur_state;
5895 struct bpf_func_state *state = vstate->frame[vstate->curframe];
5896 struct bpf_reg_state *reg = &state->regs[regno];
5897 int err;
5898
5899 /* We may have adjusted the register pointing to memory region, so we
5900 * need to try adding each of min_value and max_value to off
5901 * to make sure our theoretical access will be safe.
5902 *
5903 * The minimum value is only important with signed
5904 * comparisons where we can't assume the floor of a
5905 * value is 0. If we are using signed variables for our
5906 * index'es we need to make sure that whatever we use
5907 * will have a set floor within our range.
5908 */
5909 if (reg->smin_value < 0 &&
5910 (reg->smin_value == S64_MIN ||
5911 (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) ||
5912 reg->smin_value + off < 0)) {
5913 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
5914 regno);
5915 return -EACCES;
5916 }
5917 err = __check_mem_access(env, regno, reg->smin_value + off, size,
5918 mem_size, zero_size_allowed);
5919 if (err) {
5920 verbose(env, "R%d min value is outside of the allowed memory range\n",
5921 regno);
5922 return err;
5923 }
5924
5925 /* If we haven't set a max value then we need to bail since we can't be
5926 * sure we won't do bad things.
5927 * If reg->umax_value + off could overflow, treat that as unbounded too.
5928 */
5929 if (reg->umax_value >= BPF_MAX_VAR_OFF) {
5930 verbose(env, "R%d unbounded memory access, make sure to bounds check any such access\n",
5931 regno);
5932 return -EACCES;
5933 }
5934 err = __check_mem_access(env, regno, reg->umax_value + off, size,
5935 mem_size, zero_size_allowed);
5936 if (err) {
5937 verbose(env, "R%d max value is outside of the allowed memory range\n",
5938 regno);
5939 return err;
5940 }
5941
5942 return 0;
5943 }
5944
__check_ptr_off_reg(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,int regno,bool fixed_off_ok)5945 static int __check_ptr_off_reg(struct bpf_verifier_env *env,
5946 const struct bpf_reg_state *reg, int regno,
5947 bool fixed_off_ok)
5948 {
5949 /* Access to this pointer-typed register or passing it to a helper
5950 * is only allowed in its original, unmodified form.
5951 */
5952
5953 if (reg->off < 0) {
5954 verbose(env, "negative offset %s ptr R%d off=%d disallowed\n",
5955 reg_type_str(env, reg->type), regno, reg->off);
5956 return -EACCES;
5957 }
5958
5959 if (!fixed_off_ok && reg->off) {
5960 verbose(env, "dereference of modified %s ptr R%d off=%d disallowed\n",
5961 reg_type_str(env, reg->type), regno, reg->off);
5962 return -EACCES;
5963 }
5964
5965 if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
5966 char tn_buf[48];
5967
5968 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5969 verbose(env, "variable %s access var_off=%s disallowed\n",
5970 reg_type_str(env, reg->type), tn_buf);
5971 return -EACCES;
5972 }
5973
5974 return 0;
5975 }
5976
check_ptr_off_reg(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,int regno)5977 static int check_ptr_off_reg(struct bpf_verifier_env *env,
5978 const struct bpf_reg_state *reg, int regno)
5979 {
5980 return __check_ptr_off_reg(env, reg, regno, false);
5981 }
5982
map_kptr_match_type(struct bpf_verifier_env * env,struct btf_field * kptr_field,struct bpf_reg_state * reg,u32 regno)5983 static int map_kptr_match_type(struct bpf_verifier_env *env,
5984 struct btf_field *kptr_field,
5985 struct bpf_reg_state *reg, u32 regno)
5986 {
5987 const char *targ_name = btf_type_name(kptr_field->kptr.btf, kptr_field->kptr.btf_id);
5988 int perm_flags;
5989 const char *reg_name = "";
5990
5991 if (btf_is_kernel(reg->btf)) {
5992 perm_flags = PTR_MAYBE_NULL | PTR_TRUSTED | MEM_RCU;
5993
5994 /* Only unreferenced case accepts untrusted pointers */
5995 if (kptr_field->type == BPF_KPTR_UNREF)
5996 perm_flags |= PTR_UNTRUSTED;
5997 } else {
5998 perm_flags = PTR_MAYBE_NULL | MEM_ALLOC;
5999 if (kptr_field->type == BPF_KPTR_PERCPU)
6000 perm_flags |= MEM_PERCPU;
6001 }
6002
6003 if (base_type(reg->type) != PTR_TO_BTF_ID || (type_flag(reg->type) & ~perm_flags))
6004 goto bad_type;
6005
6006 /* We need to verify reg->type and reg->btf, before accessing reg->btf */
6007 reg_name = btf_type_name(reg->btf, reg->btf_id);
6008
6009 /* For ref_ptr case, release function check should ensure we get one
6010 * referenced PTR_TO_BTF_ID, and that its fixed offset is 0. For the
6011 * normal store of unreferenced kptr, we must ensure var_off is zero.
6012 * Since ref_ptr cannot be accessed directly by BPF insns, checks for
6013 * reg->off and reg->ref_obj_id are not needed here.
6014 */
6015 if (__check_ptr_off_reg(env, reg, regno, true))
6016 return -EACCES;
6017
6018 /* A full type match is needed, as BTF can be vmlinux, module or prog BTF, and
6019 * we also need to take into account the reg->off.
6020 *
6021 * We want to support cases like:
6022 *
6023 * struct foo {
6024 * struct bar br;
6025 * struct baz bz;
6026 * };
6027 *
6028 * struct foo *v;
6029 * v = func(); // PTR_TO_BTF_ID
6030 * val->foo = v; // reg->off is zero, btf and btf_id match type
6031 * val->bar = &v->br; // reg->off is still zero, but we need to retry with
6032 * // first member type of struct after comparison fails
6033 * val->baz = &v->bz; // reg->off is non-zero, so struct needs to be walked
6034 * // to match type
6035 *
6036 * In the kptr_ref case, check_func_arg_reg_off already ensures reg->off
6037 * is zero. We must also ensure that btf_struct_ids_match does not walk
6038 * the struct to match type against first member of struct, i.e. reject
6039 * second case from above. Hence, when type is BPF_KPTR_REF, we set
6040 * strict mode to true for type match.
6041 */
6042 if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
6043 kptr_field->kptr.btf, kptr_field->kptr.btf_id,
6044 kptr_field->type != BPF_KPTR_UNREF))
6045 goto bad_type;
6046 return 0;
6047 bad_type:
6048 verbose(env, "invalid kptr access, R%d type=%s%s ", regno,
6049 reg_type_str(env, reg->type), reg_name);
6050 verbose(env, "expected=%s%s", reg_type_str(env, PTR_TO_BTF_ID), targ_name);
6051 if (kptr_field->type == BPF_KPTR_UNREF)
6052 verbose(env, " or %s%s\n", reg_type_str(env, PTR_TO_BTF_ID | PTR_UNTRUSTED),
6053 targ_name);
6054 else
6055 verbose(env, "\n");
6056 return -EINVAL;
6057 }
6058
in_sleepable(struct bpf_verifier_env * env)6059 static bool in_sleepable(struct bpf_verifier_env *env)
6060 {
6061 return env->cur_state->in_sleepable;
6062 }
6063
6064 /* The non-sleepable programs and sleepable programs with explicit bpf_rcu_read_lock()
6065 * can dereference RCU protected pointers and result is PTR_TRUSTED.
6066 */
in_rcu_cs(struct bpf_verifier_env * env)6067 static bool in_rcu_cs(struct bpf_verifier_env *env)
6068 {
6069 return env->cur_state->active_rcu_locks ||
6070 env->cur_state->active_locks ||
6071 !in_sleepable(env);
6072 }
6073
6074 /* Once GCC supports btf_type_tag the following mechanism will be replaced with tag check */
6075 BTF_SET_START(rcu_protected_types)
6076 #ifdef CONFIG_NET
BTF_ID(struct,prog_test_ref_kfunc)6077 BTF_ID(struct, prog_test_ref_kfunc)
6078 #endif
6079 #ifdef CONFIG_CGROUPS
6080 BTF_ID(struct, cgroup)
6081 #endif
6082 #ifdef CONFIG_BPF_JIT
6083 BTF_ID(struct, bpf_cpumask)
6084 #endif
6085 BTF_ID(struct, task_struct)
6086 #ifdef CONFIG_CRYPTO
6087 BTF_ID(struct, bpf_crypto_ctx)
6088 #endif
6089 BTF_SET_END(rcu_protected_types)
6090
6091 static bool rcu_protected_object(const struct btf *btf, u32 btf_id)
6092 {
6093 if (!btf_is_kernel(btf))
6094 return true;
6095 return btf_id_set_contains(&rcu_protected_types, btf_id);
6096 }
6097
kptr_pointee_btf_record(struct btf_field * kptr_field)6098 static struct btf_record *kptr_pointee_btf_record(struct btf_field *kptr_field)
6099 {
6100 struct btf_struct_meta *meta;
6101
6102 if (btf_is_kernel(kptr_field->kptr.btf))
6103 return NULL;
6104
6105 meta = btf_find_struct_meta(kptr_field->kptr.btf,
6106 kptr_field->kptr.btf_id);
6107
6108 return meta ? meta->record : NULL;
6109 }
6110
rcu_safe_kptr(const struct btf_field * field)6111 static bool rcu_safe_kptr(const struct btf_field *field)
6112 {
6113 const struct btf_field_kptr *kptr = &field->kptr;
6114
6115 return field->type == BPF_KPTR_PERCPU ||
6116 (field->type == BPF_KPTR_REF && rcu_protected_object(kptr->btf, kptr->btf_id));
6117 }
6118
btf_ld_kptr_type(struct bpf_verifier_env * env,struct btf_field * kptr_field)6119 static u32 btf_ld_kptr_type(struct bpf_verifier_env *env, struct btf_field *kptr_field)
6120 {
6121 struct btf_record *rec;
6122 u32 ret;
6123
6124 ret = PTR_MAYBE_NULL;
6125 if (rcu_safe_kptr(kptr_field) && in_rcu_cs(env)) {
6126 ret |= MEM_RCU;
6127 if (kptr_field->type == BPF_KPTR_PERCPU)
6128 ret |= MEM_PERCPU;
6129 else if (!btf_is_kernel(kptr_field->kptr.btf))
6130 ret |= MEM_ALLOC;
6131
6132 rec = kptr_pointee_btf_record(kptr_field);
6133 if (rec && btf_record_has_field(rec, BPF_GRAPH_NODE))
6134 ret |= NON_OWN_REF;
6135 } else {
6136 ret |= PTR_UNTRUSTED;
6137 }
6138
6139 return ret;
6140 }
6141
mark_uptr_ld_reg(struct bpf_verifier_env * env,u32 regno,struct btf_field * field)6142 static int mark_uptr_ld_reg(struct bpf_verifier_env *env, u32 regno,
6143 struct btf_field *field)
6144 {
6145 struct bpf_reg_state *reg;
6146 const struct btf_type *t;
6147
6148 t = btf_type_by_id(field->kptr.btf, field->kptr.btf_id);
6149 mark_reg_known_zero(env, cur_regs(env), regno);
6150 reg = reg_state(env, regno);
6151 reg->type = PTR_TO_MEM | PTR_MAYBE_NULL;
6152 reg->mem_size = t->size;
6153 reg->id = ++env->id_gen;
6154
6155 return 0;
6156 }
6157
check_map_kptr_access(struct bpf_verifier_env * env,u32 regno,int value_regno,int insn_idx,struct btf_field * kptr_field)6158 static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
6159 int value_regno, int insn_idx,
6160 struct btf_field *kptr_field)
6161 {
6162 struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
6163 int class = BPF_CLASS(insn->code);
6164 struct bpf_reg_state *val_reg;
6165 int ret;
6166
6167 /* Things we already checked for in check_map_access and caller:
6168 * - Reject cases where variable offset may touch kptr
6169 * - size of access (must be BPF_DW)
6170 * - tnum_is_const(reg->var_off)
6171 * - kptr_field->offset == off + reg->var_off.value
6172 */
6173 /* Only BPF_[LDX,STX,ST] | BPF_MEM | BPF_DW is supported */
6174 if (BPF_MODE(insn->code) != BPF_MEM) {
6175 verbose(env, "kptr in map can only be accessed using BPF_MEM instruction mode\n");
6176 return -EACCES;
6177 }
6178
6179 /* We only allow loading referenced kptr, since it will be marked as
6180 * untrusted, similar to unreferenced kptr.
6181 */
6182 if (class != BPF_LDX &&
6183 (kptr_field->type == BPF_KPTR_REF || kptr_field->type == BPF_KPTR_PERCPU)) {
6184 verbose(env, "store to referenced kptr disallowed\n");
6185 return -EACCES;
6186 }
6187 if (class != BPF_LDX && kptr_field->type == BPF_UPTR) {
6188 verbose(env, "store to uptr disallowed\n");
6189 return -EACCES;
6190 }
6191
6192 if (class == BPF_LDX) {
6193 if (kptr_field->type == BPF_UPTR)
6194 return mark_uptr_ld_reg(env, value_regno, kptr_field);
6195
6196 /* We can simply mark the value_regno receiving the pointer
6197 * value from map as PTR_TO_BTF_ID, with the correct type.
6198 */
6199 ret = mark_btf_ld_reg(env, cur_regs(env), value_regno, PTR_TO_BTF_ID,
6200 kptr_field->kptr.btf, kptr_field->kptr.btf_id,
6201 btf_ld_kptr_type(env, kptr_field));
6202 if (ret < 0)
6203 return ret;
6204 } else if (class == BPF_STX) {
6205 val_reg = reg_state(env, value_regno);
6206 if (!register_is_null(val_reg) &&
6207 map_kptr_match_type(env, kptr_field, val_reg, value_regno))
6208 return -EACCES;
6209 } else if (class == BPF_ST) {
6210 if (insn->imm) {
6211 verbose(env, "BPF_ST imm must be 0 when storing to kptr at off=%u\n",
6212 kptr_field->offset);
6213 return -EACCES;
6214 }
6215 } else {
6216 verbose(env, "kptr in map can only be accessed using BPF_LDX/BPF_STX/BPF_ST\n");
6217 return -EACCES;
6218 }
6219 return 0;
6220 }
6221
6222 /*
6223 * Return the size of the memory region accessible from a pointer to map value.
6224 * For INSN_ARRAY maps whole bpf_insn_array->ips array is accessible.
6225 */
map_mem_size(const struct bpf_map * map)6226 static u32 map_mem_size(const struct bpf_map *map)
6227 {
6228 if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY)
6229 return map->max_entries * sizeof(long);
6230
6231 return map->value_size;
6232 }
6233
6234 /* check read/write into a map element with possible variable offset */
check_map_access(struct bpf_verifier_env * env,u32 regno,int off,int size,bool zero_size_allowed,enum bpf_access_src src)6235 static int check_map_access(struct bpf_verifier_env *env, u32 regno,
6236 int off, int size, bool zero_size_allowed,
6237 enum bpf_access_src src)
6238 {
6239 struct bpf_verifier_state *vstate = env->cur_state;
6240 struct bpf_func_state *state = vstate->frame[vstate->curframe];
6241 struct bpf_reg_state *reg = &state->regs[regno];
6242 struct bpf_map *map = reg->map_ptr;
6243 u32 mem_size = map_mem_size(map);
6244 struct btf_record *rec;
6245 int err, i;
6246
6247 err = check_mem_region_access(env, regno, off, size, mem_size, zero_size_allowed);
6248 if (err)
6249 return err;
6250
6251 if (IS_ERR_OR_NULL(map->record))
6252 return 0;
6253 rec = map->record;
6254 for (i = 0; i < rec->cnt; i++) {
6255 struct btf_field *field = &rec->fields[i];
6256 u32 p = field->offset;
6257
6258 /* If any part of a field can be touched by load/store, reject
6259 * this program. To check that [x1, x2) overlaps with [y1, y2),
6260 * it is sufficient to check x1 < y2 && y1 < x2.
6261 */
6262 if (reg->smin_value + off < p + field->size &&
6263 p < reg->umax_value + off + size) {
6264 switch (field->type) {
6265 case BPF_KPTR_UNREF:
6266 case BPF_KPTR_REF:
6267 case BPF_KPTR_PERCPU:
6268 case BPF_UPTR:
6269 if (src != ACCESS_DIRECT) {
6270 verbose(env, "%s cannot be accessed indirectly by helper\n",
6271 btf_field_type_name(field->type));
6272 return -EACCES;
6273 }
6274 if (!tnum_is_const(reg->var_off)) {
6275 verbose(env, "%s access cannot have variable offset\n",
6276 btf_field_type_name(field->type));
6277 return -EACCES;
6278 }
6279 if (p != off + reg->var_off.value) {
6280 verbose(env, "%s access misaligned expected=%u off=%llu\n",
6281 btf_field_type_name(field->type),
6282 p, off + reg->var_off.value);
6283 return -EACCES;
6284 }
6285 if (size != bpf_size_to_bytes(BPF_DW)) {
6286 verbose(env, "%s access size must be BPF_DW\n",
6287 btf_field_type_name(field->type));
6288 return -EACCES;
6289 }
6290 break;
6291 default:
6292 verbose(env, "%s cannot be accessed directly by load/store\n",
6293 btf_field_type_name(field->type));
6294 return -EACCES;
6295 }
6296 }
6297 }
6298 return 0;
6299 }
6300
6301 #define MAX_PACKET_OFF 0xffff
6302
may_access_direct_pkt_data(struct bpf_verifier_env * env,const struct bpf_call_arg_meta * meta,enum bpf_access_type t)6303 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
6304 const struct bpf_call_arg_meta *meta,
6305 enum bpf_access_type t)
6306 {
6307 enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
6308
6309 switch (prog_type) {
6310 /* Program types only with direct read access go here! */
6311 case BPF_PROG_TYPE_LWT_IN:
6312 case BPF_PROG_TYPE_LWT_OUT:
6313 case BPF_PROG_TYPE_LWT_SEG6LOCAL:
6314 case BPF_PROG_TYPE_SK_REUSEPORT:
6315 case BPF_PROG_TYPE_FLOW_DISSECTOR:
6316 case BPF_PROG_TYPE_CGROUP_SKB:
6317 if (t == BPF_WRITE)
6318 return false;
6319 fallthrough;
6320
6321 /* Program types with direct read + write access go here! */
6322 case BPF_PROG_TYPE_SCHED_CLS:
6323 case BPF_PROG_TYPE_SCHED_ACT:
6324 case BPF_PROG_TYPE_XDP:
6325 case BPF_PROG_TYPE_LWT_XMIT:
6326 case BPF_PROG_TYPE_SK_SKB:
6327 case BPF_PROG_TYPE_SK_MSG:
6328 if (meta)
6329 return meta->pkt_access;
6330
6331 env->seen_direct_write = true;
6332 return true;
6333
6334 case BPF_PROG_TYPE_CGROUP_SOCKOPT:
6335 if (t == BPF_WRITE)
6336 env->seen_direct_write = true;
6337
6338 return true;
6339
6340 default:
6341 return false;
6342 }
6343 }
6344
check_packet_access(struct bpf_verifier_env * env,u32 regno,int off,int size,bool zero_size_allowed)6345 static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
6346 int size, bool zero_size_allowed)
6347 {
6348 struct bpf_reg_state *reg = reg_state(env, regno);
6349 int err;
6350
6351 /* We may have added a variable offset to the packet pointer; but any
6352 * reg->range we have comes after that. We are only checking the fixed
6353 * offset.
6354 */
6355
6356 /* We don't allow negative numbers, because we aren't tracking enough
6357 * detail to prove they're safe.
6358 */
6359 if (reg->smin_value < 0) {
6360 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
6361 regno);
6362 return -EACCES;
6363 }
6364
6365 err = reg->range < 0 ? -EINVAL :
6366 __check_mem_access(env, regno, off, size, reg->range,
6367 zero_size_allowed);
6368 if (err) {
6369 verbose(env, "R%d offset is outside of the packet\n", regno);
6370 return err;
6371 }
6372
6373 /* __check_mem_access has made sure "off + size - 1" is within u16.
6374 * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
6375 * otherwise find_good_pkt_pointers would have refused to set range info
6376 * that __check_mem_access would have rejected this pkt access.
6377 * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
6378 */
6379 env->prog->aux->max_pkt_offset =
6380 max_t(u32, env->prog->aux->max_pkt_offset,
6381 off + reg->umax_value + size - 1);
6382
6383 return err;
6384 }
6385
6386 /* check access to 'struct bpf_context' fields. Supports fixed offsets only */
check_ctx_access(struct bpf_verifier_env * env,int insn_idx,int off,int size,enum bpf_access_type t,struct bpf_insn_access_aux * info)6387 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
6388 enum bpf_access_type t, struct bpf_insn_access_aux *info)
6389 {
6390 if (env->ops->is_valid_access &&
6391 env->ops->is_valid_access(off, size, t, env->prog, info)) {
6392 /* A non zero info.ctx_field_size indicates that this field is a
6393 * candidate for later verifier transformation to load the whole
6394 * field and then apply a mask when accessed with a narrower
6395 * access than actual ctx access size. A zero info.ctx_field_size
6396 * will only allow for whole field access and rejects any other
6397 * type of narrower access.
6398 */
6399 if (base_type(info->reg_type) == PTR_TO_BTF_ID) {
6400 if (info->ref_obj_id &&
6401 !find_reference_state(env->cur_state, info->ref_obj_id)) {
6402 verbose(env, "invalid bpf_context access off=%d. Reference may already be released\n",
6403 off);
6404 return -EACCES;
6405 }
6406 } else {
6407 env->insn_aux_data[insn_idx].ctx_field_size = info->ctx_field_size;
6408 }
6409 /* remember the offset of last byte accessed in ctx */
6410 if (env->prog->aux->max_ctx_offset < off + size)
6411 env->prog->aux->max_ctx_offset = off + size;
6412 return 0;
6413 }
6414
6415 verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
6416 return -EACCES;
6417 }
6418
check_flow_keys_access(struct bpf_verifier_env * env,int off,int size)6419 static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
6420 int size)
6421 {
6422 if (size < 0 || off < 0 ||
6423 (u64)off + size > sizeof(struct bpf_flow_keys)) {
6424 verbose(env, "invalid access to flow keys off=%d size=%d\n",
6425 off, size);
6426 return -EACCES;
6427 }
6428 return 0;
6429 }
6430
check_sock_access(struct bpf_verifier_env * env,int insn_idx,u32 regno,int off,int size,enum bpf_access_type t)6431 static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
6432 u32 regno, int off, int size,
6433 enum bpf_access_type t)
6434 {
6435 struct bpf_reg_state *reg = reg_state(env, regno);
6436 struct bpf_insn_access_aux info = {};
6437 bool valid;
6438
6439 if (reg->smin_value < 0) {
6440 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
6441 regno);
6442 return -EACCES;
6443 }
6444
6445 switch (reg->type) {
6446 case PTR_TO_SOCK_COMMON:
6447 valid = bpf_sock_common_is_valid_access(off, size, t, &info);
6448 break;
6449 case PTR_TO_SOCKET:
6450 valid = bpf_sock_is_valid_access(off, size, t, &info);
6451 break;
6452 case PTR_TO_TCP_SOCK:
6453 valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
6454 break;
6455 case PTR_TO_XDP_SOCK:
6456 valid = bpf_xdp_sock_is_valid_access(off, size, t, &info);
6457 break;
6458 default:
6459 valid = false;
6460 }
6461
6462
6463 if (valid) {
6464 env->insn_aux_data[insn_idx].ctx_field_size =
6465 info.ctx_field_size;
6466 return 0;
6467 }
6468
6469 verbose(env, "R%d invalid %s access off=%d size=%d\n",
6470 regno, reg_type_str(env, reg->type), off, size);
6471
6472 return -EACCES;
6473 }
6474
is_pointer_value(struct bpf_verifier_env * env,int regno)6475 static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
6476 {
6477 return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
6478 }
6479
is_ctx_reg(struct bpf_verifier_env * env,int regno)6480 static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
6481 {
6482 const struct bpf_reg_state *reg = reg_state(env, regno);
6483
6484 return reg->type == PTR_TO_CTX;
6485 }
6486
is_sk_reg(struct bpf_verifier_env * env,int regno)6487 static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
6488 {
6489 const struct bpf_reg_state *reg = reg_state(env, regno);
6490
6491 return type_is_sk_pointer(reg->type);
6492 }
6493
is_pkt_reg(struct bpf_verifier_env * env,int regno)6494 static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
6495 {
6496 const struct bpf_reg_state *reg = reg_state(env, regno);
6497
6498 return type_is_pkt_pointer(reg->type);
6499 }
6500
is_flow_key_reg(struct bpf_verifier_env * env,int regno)6501 static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
6502 {
6503 const struct bpf_reg_state *reg = reg_state(env, regno);
6504
6505 /* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
6506 return reg->type == PTR_TO_FLOW_KEYS;
6507 }
6508
is_arena_reg(struct bpf_verifier_env * env,int regno)6509 static bool is_arena_reg(struct bpf_verifier_env *env, int regno)
6510 {
6511 const struct bpf_reg_state *reg = reg_state(env, regno);
6512
6513 return reg->type == PTR_TO_ARENA;
6514 }
6515
6516 /* Return false if @regno contains a pointer whose type isn't supported for
6517 * atomic instruction @insn.
6518 */
atomic_ptr_type_ok(struct bpf_verifier_env * env,int regno,struct bpf_insn * insn)6519 static bool atomic_ptr_type_ok(struct bpf_verifier_env *env, int regno,
6520 struct bpf_insn *insn)
6521 {
6522 if (is_ctx_reg(env, regno))
6523 return false;
6524 if (is_pkt_reg(env, regno))
6525 return false;
6526 if (is_flow_key_reg(env, regno))
6527 return false;
6528 if (is_sk_reg(env, regno))
6529 return false;
6530 if (is_arena_reg(env, regno))
6531 return bpf_jit_supports_insn(insn, true);
6532
6533 return true;
6534 }
6535
6536 static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = {
6537 #ifdef CONFIG_NET
6538 [PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
6539 [PTR_TO_SOCK_COMMON] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
6540 [PTR_TO_TCP_SOCK] = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
6541 #endif
6542 [CONST_PTR_TO_MAP] = btf_bpf_map_id,
6543 };
6544
is_trusted_reg(const struct bpf_reg_state * reg)6545 static bool is_trusted_reg(const struct bpf_reg_state *reg)
6546 {
6547 /* A referenced register is always trusted. */
6548 if (reg->ref_obj_id)
6549 return true;
6550
6551 /* Types listed in the reg2btf_ids are always trusted */
6552 if (reg2btf_ids[base_type(reg->type)] &&
6553 !bpf_type_has_unsafe_modifiers(reg->type))
6554 return true;
6555
6556 /* If a register is not referenced, it is trusted if it has the
6557 * MEM_ALLOC or PTR_TRUSTED type modifiers, and no others. Some of the
6558 * other type modifiers may be safe, but we elect to take an opt-in
6559 * approach here as some (e.g. PTR_UNTRUSTED and PTR_MAYBE_NULL) are
6560 * not.
6561 *
6562 * Eventually, we should make PTR_TRUSTED the single source of truth
6563 * for whether a register is trusted.
6564 */
6565 return type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS &&
6566 !bpf_type_has_unsafe_modifiers(reg->type);
6567 }
6568
is_rcu_reg(const struct bpf_reg_state * reg)6569 static bool is_rcu_reg(const struct bpf_reg_state *reg)
6570 {
6571 return reg->type & MEM_RCU;
6572 }
6573
clear_trusted_flags(enum bpf_type_flag * flag)6574 static void clear_trusted_flags(enum bpf_type_flag *flag)
6575 {
6576 *flag &= ~(BPF_REG_TRUSTED_MODIFIERS | MEM_RCU);
6577 }
6578
check_pkt_ptr_alignment(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,int off,int size,bool strict)6579 static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
6580 const struct bpf_reg_state *reg,
6581 int off, int size, bool strict)
6582 {
6583 struct tnum reg_off;
6584 int ip_align;
6585
6586 /* Byte size accesses are always allowed. */
6587 if (!strict || size == 1)
6588 return 0;
6589
6590 /* For platforms that do not have a Kconfig enabling
6591 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
6592 * NET_IP_ALIGN is universally set to '2'. And on platforms
6593 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
6594 * to this code only in strict mode where we want to emulate
6595 * the NET_IP_ALIGN==2 checking. Therefore use an
6596 * unconditional IP align value of '2'.
6597 */
6598 ip_align = 2;
6599
6600 reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
6601 if (!tnum_is_aligned(reg_off, size)) {
6602 char tn_buf[48];
6603
6604 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
6605 verbose(env,
6606 "misaligned packet access off %d+%s+%d+%d size %d\n",
6607 ip_align, tn_buf, reg->off, off, size);
6608 return -EACCES;
6609 }
6610
6611 return 0;
6612 }
6613
check_generic_ptr_alignment(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,const char * pointer_desc,int off,int size,bool strict)6614 static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
6615 const struct bpf_reg_state *reg,
6616 const char *pointer_desc,
6617 int off, int size, bool strict)
6618 {
6619 struct tnum reg_off;
6620
6621 /* Byte size accesses are always allowed. */
6622 if (!strict || size == 1)
6623 return 0;
6624
6625 reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
6626 if (!tnum_is_aligned(reg_off, size)) {
6627 char tn_buf[48];
6628
6629 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
6630 verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
6631 pointer_desc, tn_buf, reg->off, off, size);
6632 return -EACCES;
6633 }
6634
6635 return 0;
6636 }
6637
check_ptr_alignment(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,int off,int size,bool strict_alignment_once)6638 static int check_ptr_alignment(struct bpf_verifier_env *env,
6639 const struct bpf_reg_state *reg, int off,
6640 int size, bool strict_alignment_once)
6641 {
6642 bool strict = env->strict_alignment || strict_alignment_once;
6643 const char *pointer_desc = "";
6644
6645 switch (reg->type) {
6646 case PTR_TO_PACKET:
6647 case PTR_TO_PACKET_META:
6648 /* Special case, because of NET_IP_ALIGN. Given metadata sits
6649 * right in front, treat it the very same way.
6650 */
6651 return check_pkt_ptr_alignment(env, reg, off, size, strict);
6652 case PTR_TO_FLOW_KEYS:
6653 pointer_desc = "flow keys ";
6654 break;
6655 case PTR_TO_MAP_KEY:
6656 pointer_desc = "key ";
6657 break;
6658 case PTR_TO_MAP_VALUE:
6659 pointer_desc = "value ";
6660 if (reg->map_ptr->map_type == BPF_MAP_TYPE_INSN_ARRAY)
6661 strict = true;
6662 break;
6663 case PTR_TO_CTX:
6664 pointer_desc = "context ";
6665 break;
6666 case PTR_TO_STACK:
6667 pointer_desc = "stack ";
6668 /* The stack spill tracking logic in check_stack_write_fixed_off()
6669 * and check_stack_read_fixed_off() relies on stack accesses being
6670 * aligned.
6671 */
6672 strict = true;
6673 break;
6674 case PTR_TO_SOCKET:
6675 pointer_desc = "sock ";
6676 break;
6677 case PTR_TO_SOCK_COMMON:
6678 pointer_desc = "sock_common ";
6679 break;
6680 case PTR_TO_TCP_SOCK:
6681 pointer_desc = "tcp_sock ";
6682 break;
6683 case PTR_TO_XDP_SOCK:
6684 pointer_desc = "xdp_sock ";
6685 break;
6686 case PTR_TO_ARENA:
6687 return 0;
6688 default:
6689 break;
6690 }
6691 return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
6692 strict);
6693 }
6694
bpf_enable_priv_stack(struct bpf_prog * prog)6695 static enum priv_stack_mode bpf_enable_priv_stack(struct bpf_prog *prog)
6696 {
6697 if (!bpf_jit_supports_private_stack())
6698 return NO_PRIV_STACK;
6699
6700 /* bpf_prog_check_recur() checks all prog types that use bpf trampoline
6701 * while kprobe/tp/perf_event/raw_tp don't use trampoline hence checked
6702 * explicitly.
6703 */
6704 switch (prog->type) {
6705 case BPF_PROG_TYPE_KPROBE:
6706 case BPF_PROG_TYPE_TRACEPOINT:
6707 case BPF_PROG_TYPE_PERF_EVENT:
6708 case BPF_PROG_TYPE_RAW_TRACEPOINT:
6709 return PRIV_STACK_ADAPTIVE;
6710 case BPF_PROG_TYPE_TRACING:
6711 case BPF_PROG_TYPE_LSM:
6712 case BPF_PROG_TYPE_STRUCT_OPS:
6713 if (prog->aux->priv_stack_requested || bpf_prog_check_recur(prog))
6714 return PRIV_STACK_ADAPTIVE;
6715 fallthrough;
6716 default:
6717 break;
6718 }
6719
6720 return NO_PRIV_STACK;
6721 }
6722
round_up_stack_depth(struct bpf_verifier_env * env,int stack_depth)6723 static int round_up_stack_depth(struct bpf_verifier_env *env, int stack_depth)
6724 {
6725 if (env->prog->jit_requested)
6726 return round_up(stack_depth, 16);
6727
6728 /* round up to 32-bytes, since this is granularity
6729 * of interpreter stack size
6730 */
6731 return round_up(max_t(u32, stack_depth, 1), 32);
6732 }
6733
6734 /* starting from main bpf function walk all instructions of the function
6735 * and recursively walk all callees that given function can call.
6736 * Ignore jump and exit insns.
6737 * Since recursion is prevented by check_cfg() this algorithm
6738 * only needs a local stack of MAX_CALL_FRAMES to remember callsites
6739 */
check_max_stack_depth_subprog(struct bpf_verifier_env * env,int idx,bool priv_stack_supported)6740 static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx,
6741 bool priv_stack_supported)
6742 {
6743 struct bpf_subprog_info *subprog = env->subprog_info;
6744 struct bpf_insn *insn = env->prog->insnsi;
6745 int depth = 0, frame = 0, i, subprog_end, subprog_depth;
6746 bool tail_call_reachable = false;
6747 int ret_insn[MAX_CALL_FRAMES];
6748 int ret_prog[MAX_CALL_FRAMES];
6749 int j;
6750
6751 i = subprog[idx].start;
6752 if (!priv_stack_supported)
6753 subprog[idx].priv_stack_mode = NO_PRIV_STACK;
6754 process_func:
6755 /* protect against potential stack overflow that might happen when
6756 * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
6757 * depth for such case down to 256 so that the worst case scenario
6758 * would result in 8k stack size (32 which is tailcall limit * 256 =
6759 * 8k).
6760 *
6761 * To get the idea what might happen, see an example:
6762 * func1 -> sub rsp, 128
6763 * subfunc1 -> sub rsp, 256
6764 * tailcall1 -> add rsp, 256
6765 * func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320)
6766 * subfunc2 -> sub rsp, 64
6767 * subfunc22 -> sub rsp, 128
6768 * tailcall2 -> add rsp, 128
6769 * func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416)
6770 *
6771 * tailcall will unwind the current stack frame but it will not get rid
6772 * of caller's stack as shown on the example above.
6773 */
6774 if (idx && subprog[idx].has_tail_call && depth >= 256) {
6775 verbose(env,
6776 "tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
6777 depth);
6778 return -EACCES;
6779 }
6780
6781 subprog_depth = round_up_stack_depth(env, subprog[idx].stack_depth);
6782 if (priv_stack_supported) {
6783 /* Request private stack support only if the subprog stack
6784 * depth is no less than BPF_PRIV_STACK_MIN_SIZE. This is to
6785 * avoid jit penalty if the stack usage is small.
6786 */
6787 if (subprog[idx].priv_stack_mode == PRIV_STACK_UNKNOWN &&
6788 subprog_depth >= BPF_PRIV_STACK_MIN_SIZE)
6789 subprog[idx].priv_stack_mode = PRIV_STACK_ADAPTIVE;
6790 }
6791
6792 if (subprog[idx].priv_stack_mode == PRIV_STACK_ADAPTIVE) {
6793 if (subprog_depth > MAX_BPF_STACK) {
6794 verbose(env, "stack size of subprog %d is %d. Too large\n",
6795 idx, subprog_depth);
6796 return -EACCES;
6797 }
6798 } else {
6799 depth += subprog_depth;
6800 if (depth > MAX_BPF_STACK) {
6801 verbose(env, "combined stack size of %d calls is %d. Too large\n",
6802 frame + 1, depth);
6803 return -EACCES;
6804 }
6805 }
6806 continue_func:
6807 subprog_end = subprog[idx + 1].start;
6808 for (; i < subprog_end; i++) {
6809 int next_insn, sidx;
6810
6811 if (bpf_pseudo_kfunc_call(insn + i) && !insn[i].off) {
6812 bool err = false;
6813
6814 if (!is_bpf_throw_kfunc(insn + i))
6815 continue;
6816 if (subprog[idx].is_cb)
6817 err = true;
6818 for (int c = 0; c < frame && !err; c++) {
6819 if (subprog[ret_prog[c]].is_cb) {
6820 err = true;
6821 break;
6822 }
6823 }
6824 if (!err)
6825 continue;
6826 verbose(env,
6827 "bpf_throw kfunc (insn %d) cannot be called from callback subprog %d\n",
6828 i, idx);
6829 return -EINVAL;
6830 }
6831
6832 if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
6833 continue;
6834 /* remember insn and function to return to */
6835 ret_insn[frame] = i + 1;
6836 ret_prog[frame] = idx;
6837
6838 /* find the callee */
6839 next_insn = i + insn[i].imm + 1;
6840 sidx = find_subprog(env, next_insn);
6841 if (verifier_bug_if(sidx < 0, env, "callee not found at insn %d", next_insn))
6842 return -EFAULT;
6843 if (subprog[sidx].is_async_cb) {
6844 if (subprog[sidx].has_tail_call) {
6845 verifier_bug(env, "subprog has tail_call and async cb");
6846 return -EFAULT;
6847 }
6848 /* async callbacks don't increase bpf prog stack size unless called directly */
6849 if (!bpf_pseudo_call(insn + i))
6850 continue;
6851 if (subprog[sidx].is_exception_cb) {
6852 verbose(env, "insn %d cannot call exception cb directly", i);
6853 return -EINVAL;
6854 }
6855 }
6856 i = next_insn;
6857 idx = sidx;
6858 if (!priv_stack_supported)
6859 subprog[idx].priv_stack_mode = NO_PRIV_STACK;
6860
6861 if (subprog[idx].has_tail_call)
6862 tail_call_reachable = true;
6863
6864 frame++;
6865 if (frame >= MAX_CALL_FRAMES) {
6866 verbose(env, "the call stack of %d frames is too deep !\n",
6867 frame);
6868 return -E2BIG;
6869 }
6870 goto process_func;
6871 }
6872 /* if tail call got detected across bpf2bpf calls then mark each of the
6873 * currently present subprog frames as tail call reachable subprogs;
6874 * this info will be utilized by JIT so that we will be preserving the
6875 * tail call counter throughout bpf2bpf calls combined with tailcalls
6876 */
6877 if (tail_call_reachable)
6878 for (j = 0; j < frame; j++) {
6879 if (subprog[ret_prog[j]].is_exception_cb) {
6880 verbose(env, "cannot tail call within exception cb\n");
6881 return -EINVAL;
6882 }
6883 subprog[ret_prog[j]].tail_call_reachable = true;
6884 }
6885 if (subprog[0].tail_call_reachable)
6886 env->prog->aux->tail_call_reachable = true;
6887
6888 /* end of for() loop means the last insn of the 'subprog'
6889 * was reached. Doesn't matter whether it was JA or EXIT
6890 */
6891 if (frame == 0)
6892 return 0;
6893 if (subprog[idx].priv_stack_mode != PRIV_STACK_ADAPTIVE)
6894 depth -= round_up_stack_depth(env, subprog[idx].stack_depth);
6895 frame--;
6896 i = ret_insn[frame];
6897 idx = ret_prog[frame];
6898 goto continue_func;
6899 }
6900
check_max_stack_depth(struct bpf_verifier_env * env)6901 static int check_max_stack_depth(struct bpf_verifier_env *env)
6902 {
6903 enum priv_stack_mode priv_stack_mode = PRIV_STACK_UNKNOWN;
6904 struct bpf_subprog_info *si = env->subprog_info;
6905 bool priv_stack_supported;
6906 int ret;
6907
6908 for (int i = 0; i < env->subprog_cnt; i++) {
6909 if (si[i].has_tail_call) {
6910 priv_stack_mode = NO_PRIV_STACK;
6911 break;
6912 }
6913 }
6914
6915 if (priv_stack_mode == PRIV_STACK_UNKNOWN)
6916 priv_stack_mode = bpf_enable_priv_stack(env->prog);
6917
6918 /* All async_cb subprogs use normal kernel stack. If a particular
6919 * subprog appears in both main prog and async_cb subtree, that
6920 * subprog will use normal kernel stack to avoid potential nesting.
6921 * The reverse subprog traversal ensures when main prog subtree is
6922 * checked, the subprogs appearing in async_cb subtrees are already
6923 * marked as using normal kernel stack, so stack size checking can
6924 * be done properly.
6925 */
6926 for (int i = env->subprog_cnt - 1; i >= 0; i--) {
6927 if (!i || si[i].is_async_cb) {
6928 priv_stack_supported = !i && priv_stack_mode == PRIV_STACK_ADAPTIVE;
6929 ret = check_max_stack_depth_subprog(env, i, priv_stack_supported);
6930 if (ret < 0)
6931 return ret;
6932 }
6933 }
6934
6935 for (int i = 0; i < env->subprog_cnt; i++) {
6936 if (si[i].priv_stack_mode == PRIV_STACK_ADAPTIVE) {
6937 env->prog->aux->jits_use_priv_stack = true;
6938 break;
6939 }
6940 }
6941
6942 return 0;
6943 }
6944
6945 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
get_callee_stack_depth(struct bpf_verifier_env * env,const struct bpf_insn * insn,int idx)6946 static int get_callee_stack_depth(struct bpf_verifier_env *env,
6947 const struct bpf_insn *insn, int idx)
6948 {
6949 int start = idx + insn->imm + 1, subprog;
6950
6951 subprog = find_subprog(env, start);
6952 if (verifier_bug_if(subprog < 0, env, "get stack depth: no program at insn %d", start))
6953 return -EFAULT;
6954 return env->subprog_info[subprog].stack_depth;
6955 }
6956 #endif
6957
__check_buffer_access(struct bpf_verifier_env * env,const char * buf_info,const struct bpf_reg_state * reg,int regno,int off,int size)6958 static int __check_buffer_access(struct bpf_verifier_env *env,
6959 const char *buf_info,
6960 const struct bpf_reg_state *reg,
6961 int regno, int off, int size)
6962 {
6963 if (off < 0) {
6964 verbose(env,
6965 "R%d invalid %s buffer access: off=%d, size=%d\n",
6966 regno, buf_info, off, size);
6967 return -EACCES;
6968 }
6969 if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
6970 char tn_buf[48];
6971
6972 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
6973 verbose(env,
6974 "R%d invalid variable buffer offset: off=%d, var_off=%s\n",
6975 regno, off, tn_buf);
6976 return -EACCES;
6977 }
6978
6979 return 0;
6980 }
6981
check_tp_buffer_access(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,int regno,int off,int size)6982 static int check_tp_buffer_access(struct bpf_verifier_env *env,
6983 const struct bpf_reg_state *reg,
6984 int regno, int off, int size)
6985 {
6986 int err;
6987
6988 err = __check_buffer_access(env, "tracepoint", reg, regno, off, size);
6989 if (err)
6990 return err;
6991
6992 if (off + size > env->prog->aux->max_tp_access)
6993 env->prog->aux->max_tp_access = off + size;
6994
6995 return 0;
6996 }
6997
check_buffer_access(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,int regno,int off,int size,bool zero_size_allowed,u32 * max_access)6998 static int check_buffer_access(struct bpf_verifier_env *env,
6999 const struct bpf_reg_state *reg,
7000 int regno, int off, int size,
7001 bool zero_size_allowed,
7002 u32 *max_access)
7003 {
7004 const char *buf_info = type_is_rdonly_mem(reg->type) ? "rdonly" : "rdwr";
7005 int err;
7006
7007 err = __check_buffer_access(env, buf_info, reg, regno, off, size);
7008 if (err)
7009 return err;
7010
7011 if (off + size > *max_access)
7012 *max_access = off + size;
7013
7014 return 0;
7015 }
7016
7017 /* BPF architecture zero extends alu32 ops into 64-bit registesr */
zext_32_to_64(struct bpf_reg_state * reg)7018 static void zext_32_to_64(struct bpf_reg_state *reg)
7019 {
7020 reg->var_off = tnum_subreg(reg->var_off);
7021 __reg_assign_32_into_64(reg);
7022 }
7023
7024 /* truncate register to smaller size (in bytes)
7025 * must be called with size < BPF_REG_SIZE
7026 */
coerce_reg_to_size(struct bpf_reg_state * reg,int size)7027 static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
7028 {
7029 u64 mask;
7030
7031 /* clear high bits in bit representation */
7032 reg->var_off = tnum_cast(reg->var_off, size);
7033
7034 /* fix arithmetic bounds */
7035 mask = ((u64)1 << (size * 8)) - 1;
7036 if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
7037 reg->umin_value &= mask;
7038 reg->umax_value &= mask;
7039 } else {
7040 reg->umin_value = 0;
7041 reg->umax_value = mask;
7042 }
7043 reg->smin_value = reg->umin_value;
7044 reg->smax_value = reg->umax_value;
7045
7046 /* If size is smaller than 32bit register the 32bit register
7047 * values are also truncated so we push 64-bit bounds into
7048 * 32-bit bounds. Above were truncated < 32-bits already.
7049 */
7050 if (size < 4)
7051 __mark_reg32_unbounded(reg);
7052
7053 reg_bounds_sync(reg);
7054 }
7055
set_sext64_default_val(struct bpf_reg_state * reg,int size)7056 static void set_sext64_default_val(struct bpf_reg_state *reg, int size)
7057 {
7058 if (size == 1) {
7059 reg->smin_value = reg->s32_min_value = S8_MIN;
7060 reg->smax_value = reg->s32_max_value = S8_MAX;
7061 } else if (size == 2) {
7062 reg->smin_value = reg->s32_min_value = S16_MIN;
7063 reg->smax_value = reg->s32_max_value = S16_MAX;
7064 } else {
7065 /* size == 4 */
7066 reg->smin_value = reg->s32_min_value = S32_MIN;
7067 reg->smax_value = reg->s32_max_value = S32_MAX;
7068 }
7069 reg->umin_value = reg->u32_min_value = 0;
7070 reg->umax_value = U64_MAX;
7071 reg->u32_max_value = U32_MAX;
7072 reg->var_off = tnum_unknown;
7073 }
7074
coerce_reg_to_size_sx(struct bpf_reg_state * reg,int size)7075 static void coerce_reg_to_size_sx(struct bpf_reg_state *reg, int size)
7076 {
7077 s64 init_s64_max, init_s64_min, s64_max, s64_min, u64_cval;
7078 u64 top_smax_value, top_smin_value;
7079 u64 num_bits = size * 8;
7080
7081 if (tnum_is_const(reg->var_off)) {
7082 u64_cval = reg->var_off.value;
7083 if (size == 1)
7084 reg->var_off = tnum_const((s8)u64_cval);
7085 else if (size == 2)
7086 reg->var_off = tnum_const((s16)u64_cval);
7087 else
7088 /* size == 4 */
7089 reg->var_off = tnum_const((s32)u64_cval);
7090
7091 u64_cval = reg->var_off.value;
7092 reg->smax_value = reg->smin_value = u64_cval;
7093 reg->umax_value = reg->umin_value = u64_cval;
7094 reg->s32_max_value = reg->s32_min_value = u64_cval;
7095 reg->u32_max_value = reg->u32_min_value = u64_cval;
7096 return;
7097 }
7098
7099 top_smax_value = ((u64)reg->smax_value >> num_bits) << num_bits;
7100 top_smin_value = ((u64)reg->smin_value >> num_bits) << num_bits;
7101
7102 if (top_smax_value != top_smin_value)
7103 goto out;
7104
7105 /* find the s64_min and s64_min after sign extension */
7106 if (size == 1) {
7107 init_s64_max = (s8)reg->smax_value;
7108 init_s64_min = (s8)reg->smin_value;
7109 } else if (size == 2) {
7110 init_s64_max = (s16)reg->smax_value;
7111 init_s64_min = (s16)reg->smin_value;
7112 } else {
7113 init_s64_max = (s32)reg->smax_value;
7114 init_s64_min = (s32)reg->smin_value;
7115 }
7116
7117 s64_max = max(init_s64_max, init_s64_min);
7118 s64_min = min(init_s64_max, init_s64_min);
7119
7120 /* both of s64_max/s64_min positive or negative */
7121 if ((s64_max >= 0) == (s64_min >= 0)) {
7122 reg->s32_min_value = reg->smin_value = s64_min;
7123 reg->s32_max_value = reg->smax_value = s64_max;
7124 reg->u32_min_value = reg->umin_value = s64_min;
7125 reg->u32_max_value = reg->umax_value = s64_max;
7126 reg->var_off = tnum_range(s64_min, s64_max);
7127 return;
7128 }
7129
7130 out:
7131 set_sext64_default_val(reg, size);
7132 }
7133
set_sext32_default_val(struct bpf_reg_state * reg,int size)7134 static void set_sext32_default_val(struct bpf_reg_state *reg, int size)
7135 {
7136 if (size == 1) {
7137 reg->s32_min_value = S8_MIN;
7138 reg->s32_max_value = S8_MAX;
7139 } else {
7140 /* size == 2 */
7141 reg->s32_min_value = S16_MIN;
7142 reg->s32_max_value = S16_MAX;
7143 }
7144 reg->u32_min_value = 0;
7145 reg->u32_max_value = U32_MAX;
7146 reg->var_off = tnum_subreg(tnum_unknown);
7147 }
7148
coerce_subreg_to_size_sx(struct bpf_reg_state * reg,int size)7149 static void coerce_subreg_to_size_sx(struct bpf_reg_state *reg, int size)
7150 {
7151 s32 init_s32_max, init_s32_min, s32_max, s32_min, u32_val;
7152 u32 top_smax_value, top_smin_value;
7153 u32 num_bits = size * 8;
7154
7155 if (tnum_is_const(reg->var_off)) {
7156 u32_val = reg->var_off.value;
7157 if (size == 1)
7158 reg->var_off = tnum_const((s8)u32_val);
7159 else
7160 reg->var_off = tnum_const((s16)u32_val);
7161
7162 u32_val = reg->var_off.value;
7163 reg->s32_min_value = reg->s32_max_value = u32_val;
7164 reg->u32_min_value = reg->u32_max_value = u32_val;
7165 return;
7166 }
7167
7168 top_smax_value = ((u32)reg->s32_max_value >> num_bits) << num_bits;
7169 top_smin_value = ((u32)reg->s32_min_value >> num_bits) << num_bits;
7170
7171 if (top_smax_value != top_smin_value)
7172 goto out;
7173
7174 /* find the s32_min and s32_min after sign extension */
7175 if (size == 1) {
7176 init_s32_max = (s8)reg->s32_max_value;
7177 init_s32_min = (s8)reg->s32_min_value;
7178 } else {
7179 /* size == 2 */
7180 init_s32_max = (s16)reg->s32_max_value;
7181 init_s32_min = (s16)reg->s32_min_value;
7182 }
7183 s32_max = max(init_s32_max, init_s32_min);
7184 s32_min = min(init_s32_max, init_s32_min);
7185
7186 if ((s32_min >= 0) == (s32_max >= 0)) {
7187 reg->s32_min_value = s32_min;
7188 reg->s32_max_value = s32_max;
7189 reg->u32_min_value = (u32)s32_min;
7190 reg->u32_max_value = (u32)s32_max;
7191 reg->var_off = tnum_subreg(tnum_range(s32_min, s32_max));
7192 return;
7193 }
7194
7195 out:
7196 set_sext32_default_val(reg, size);
7197 }
7198
bpf_map_is_rdonly(const struct bpf_map * map)7199 static bool bpf_map_is_rdonly(const struct bpf_map *map)
7200 {
7201 /* A map is considered read-only if the following condition are true:
7202 *
7203 * 1) BPF program side cannot change any of the map content. The
7204 * BPF_F_RDONLY_PROG flag is throughout the lifetime of a map
7205 * and was set at map creation time.
7206 * 2) The map value(s) have been initialized from user space by a
7207 * loader and then "frozen", such that no new map update/delete
7208 * operations from syscall side are possible for the rest of
7209 * the map's lifetime from that point onwards.
7210 * 3) Any parallel/pending map update/delete operations from syscall
7211 * side have been completed. Only after that point, it's safe to
7212 * assume that map value(s) are immutable.
7213 */
7214 return (map->map_flags & BPF_F_RDONLY_PROG) &&
7215 READ_ONCE(map->frozen) &&
7216 !bpf_map_write_active(map);
7217 }
7218
bpf_map_direct_read(struct bpf_map * map,int off,int size,u64 * val,bool is_ldsx)7219 static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val,
7220 bool is_ldsx)
7221 {
7222 void *ptr;
7223 u64 addr;
7224 int err;
7225
7226 err = map->ops->map_direct_value_addr(map, &addr, off);
7227 if (err)
7228 return err;
7229 ptr = (void *)(long)addr + off;
7230
7231 switch (size) {
7232 case sizeof(u8):
7233 *val = is_ldsx ? (s64)*(s8 *)ptr : (u64)*(u8 *)ptr;
7234 break;
7235 case sizeof(u16):
7236 *val = is_ldsx ? (s64)*(s16 *)ptr : (u64)*(u16 *)ptr;
7237 break;
7238 case sizeof(u32):
7239 *val = is_ldsx ? (s64)*(s32 *)ptr : (u64)*(u32 *)ptr;
7240 break;
7241 case sizeof(u64):
7242 *val = *(u64 *)ptr;
7243 break;
7244 default:
7245 return -EINVAL;
7246 }
7247 return 0;
7248 }
7249
7250 #define BTF_TYPE_SAFE_RCU(__type) __PASTE(__type, __safe_rcu)
7251 #define BTF_TYPE_SAFE_RCU_OR_NULL(__type) __PASTE(__type, __safe_rcu_or_null)
7252 #define BTF_TYPE_SAFE_TRUSTED(__type) __PASTE(__type, __safe_trusted)
7253 #define BTF_TYPE_SAFE_TRUSTED_OR_NULL(__type) __PASTE(__type, __safe_trusted_or_null)
7254
7255 /*
7256 * Allow list few fields as RCU trusted or full trusted.
7257 * This logic doesn't allow mix tagging and will be removed once GCC supports
7258 * btf_type_tag.
7259 */
7260
7261 /* RCU trusted: these fields are trusted in RCU CS and never NULL */
BTF_TYPE_SAFE_RCU(struct task_struct)7262 BTF_TYPE_SAFE_RCU(struct task_struct) {
7263 const cpumask_t *cpus_ptr;
7264 struct css_set __rcu *cgroups;
7265 struct task_struct __rcu *real_parent;
7266 struct task_struct *group_leader;
7267 };
7268
BTF_TYPE_SAFE_RCU(struct cgroup)7269 BTF_TYPE_SAFE_RCU(struct cgroup) {
7270 /* cgrp->kn is always accessible as documented in kernel/cgroup/cgroup.c */
7271 struct kernfs_node *kn;
7272 };
7273
BTF_TYPE_SAFE_RCU(struct css_set)7274 BTF_TYPE_SAFE_RCU(struct css_set) {
7275 struct cgroup *dfl_cgrp;
7276 };
7277
BTF_TYPE_SAFE_RCU(struct cgroup_subsys_state)7278 BTF_TYPE_SAFE_RCU(struct cgroup_subsys_state) {
7279 struct cgroup *cgroup;
7280 };
7281
7282 /* RCU trusted: these fields are trusted in RCU CS and can be NULL */
BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct)7283 BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct) {
7284 struct file __rcu *exe_file;
7285 #ifdef CONFIG_MEMCG
7286 struct task_struct __rcu *owner;
7287 #endif
7288 };
7289
7290 /* skb->sk, req->sk are not RCU protected, but we mark them as such
7291 * because bpf prog accessible sockets are SOCK_RCU_FREE.
7292 */
BTF_TYPE_SAFE_RCU_OR_NULL(struct sk_buff)7293 BTF_TYPE_SAFE_RCU_OR_NULL(struct sk_buff) {
7294 struct sock *sk;
7295 };
7296
BTF_TYPE_SAFE_RCU_OR_NULL(struct request_sock)7297 BTF_TYPE_SAFE_RCU_OR_NULL(struct request_sock) {
7298 struct sock *sk;
7299 };
7300
7301 /* full trusted: these fields are trusted even outside of RCU CS and never NULL */
BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta)7302 BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta) {
7303 struct seq_file *seq;
7304 };
7305
BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task)7306 BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task) {
7307 struct bpf_iter_meta *meta;
7308 struct task_struct *task;
7309 };
7310
BTF_TYPE_SAFE_TRUSTED(struct linux_binprm)7311 BTF_TYPE_SAFE_TRUSTED(struct linux_binprm) {
7312 struct file *file;
7313 };
7314
BTF_TYPE_SAFE_TRUSTED(struct file)7315 BTF_TYPE_SAFE_TRUSTED(struct file) {
7316 struct inode *f_inode;
7317 };
7318
BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct dentry)7319 BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct dentry) {
7320 struct inode *d_inode;
7321 };
7322
BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket)7323 BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket) {
7324 struct sock *sk;
7325 };
7326
BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct vm_area_struct)7327 BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct vm_area_struct) {
7328 struct mm_struct *vm_mm;
7329 struct file *vm_file;
7330 };
7331
type_is_rcu(struct bpf_verifier_env * env,struct bpf_reg_state * reg,const char * field_name,u32 btf_id)7332 static bool type_is_rcu(struct bpf_verifier_env *env,
7333 struct bpf_reg_state *reg,
7334 const char *field_name, u32 btf_id)
7335 {
7336 BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct task_struct));
7337 BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct cgroup));
7338 BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct css_set));
7339 BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct cgroup_subsys_state));
7340
7341 return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_rcu");
7342 }
7343
type_is_rcu_or_null(struct bpf_verifier_env * env,struct bpf_reg_state * reg,const char * field_name,u32 btf_id)7344 static bool type_is_rcu_or_null(struct bpf_verifier_env *env,
7345 struct bpf_reg_state *reg,
7346 const char *field_name, u32 btf_id)
7347 {
7348 BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct));
7349 BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct sk_buff));
7350 BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct request_sock));
7351
7352 return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_rcu_or_null");
7353 }
7354
type_is_trusted(struct bpf_verifier_env * env,struct bpf_reg_state * reg,const char * field_name,u32 btf_id)7355 static bool type_is_trusted(struct bpf_verifier_env *env,
7356 struct bpf_reg_state *reg,
7357 const char *field_name, u32 btf_id)
7358 {
7359 BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta));
7360 BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task));
7361 BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct linux_binprm));
7362 BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct file));
7363
7364 return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_trusted");
7365 }
7366
type_is_trusted_or_null(struct bpf_verifier_env * env,struct bpf_reg_state * reg,const char * field_name,u32 btf_id)7367 static bool type_is_trusted_or_null(struct bpf_verifier_env *env,
7368 struct bpf_reg_state *reg,
7369 const char *field_name, u32 btf_id)
7370 {
7371 BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket));
7372 BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct dentry));
7373 BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct vm_area_struct));
7374
7375 return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id,
7376 "__safe_trusted_or_null");
7377 }
7378
check_ptr_to_btf_access(struct bpf_verifier_env * env,struct bpf_reg_state * regs,int regno,int off,int size,enum bpf_access_type atype,int value_regno)7379 static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
7380 struct bpf_reg_state *regs,
7381 int regno, int off, int size,
7382 enum bpf_access_type atype,
7383 int value_regno)
7384 {
7385 struct bpf_reg_state *reg = regs + regno;
7386 const struct btf_type *t = btf_type_by_id(reg->btf, reg->btf_id);
7387 const char *tname = btf_name_by_offset(reg->btf, t->name_off);
7388 const char *field_name = NULL;
7389 enum bpf_type_flag flag = 0;
7390 u32 btf_id = 0;
7391 int ret;
7392
7393 if (!env->allow_ptr_leaks) {
7394 verbose(env,
7395 "'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
7396 tname);
7397 return -EPERM;
7398 }
7399 if (!env->prog->gpl_compatible && btf_is_kernel(reg->btf)) {
7400 verbose(env,
7401 "Cannot access kernel 'struct %s' from non-GPL compatible program\n",
7402 tname);
7403 return -EINVAL;
7404 }
7405 if (off < 0) {
7406 verbose(env,
7407 "R%d is ptr_%s invalid negative access: off=%d\n",
7408 regno, tname, off);
7409 return -EACCES;
7410 }
7411 if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
7412 char tn_buf[48];
7413
7414 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
7415 verbose(env,
7416 "R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n",
7417 regno, tname, off, tn_buf);
7418 return -EACCES;
7419 }
7420
7421 if (reg->type & MEM_USER) {
7422 verbose(env,
7423 "R%d is ptr_%s access user memory: off=%d\n",
7424 regno, tname, off);
7425 return -EACCES;
7426 }
7427
7428 if (reg->type & MEM_PERCPU) {
7429 verbose(env,
7430 "R%d is ptr_%s access percpu memory: off=%d\n",
7431 regno, tname, off);
7432 return -EACCES;
7433 }
7434
7435 if (env->ops->btf_struct_access && !type_is_alloc(reg->type) && atype == BPF_WRITE) {
7436 if (!btf_is_kernel(reg->btf)) {
7437 verifier_bug(env, "reg->btf must be kernel btf");
7438 return -EFAULT;
7439 }
7440 ret = env->ops->btf_struct_access(&env->log, reg, off, size);
7441 } else {
7442 /* Writes are permitted with default btf_struct_access for
7443 * program allocated objects (which always have ref_obj_id > 0),
7444 * but not for untrusted PTR_TO_BTF_ID | MEM_ALLOC.
7445 */
7446 if (atype != BPF_READ && !type_is_ptr_alloc_obj(reg->type)) {
7447 verbose(env, "only read is supported\n");
7448 return -EACCES;
7449 }
7450
7451 if (type_is_alloc(reg->type) && !type_is_non_owning_ref(reg->type) &&
7452 !(reg->type & MEM_RCU) && !reg->ref_obj_id) {
7453 verifier_bug(env, "ref_obj_id for allocated object must be non-zero");
7454 return -EFAULT;
7455 }
7456
7457 ret = btf_struct_access(&env->log, reg, off, size, atype, &btf_id, &flag, &field_name);
7458 }
7459
7460 if (ret < 0)
7461 return ret;
7462
7463 if (ret != PTR_TO_BTF_ID) {
7464 /* just mark; */
7465
7466 } else if (type_flag(reg->type) & PTR_UNTRUSTED) {
7467 /* If this is an untrusted pointer, all pointers formed by walking it
7468 * also inherit the untrusted flag.
7469 */
7470 flag = PTR_UNTRUSTED;
7471
7472 } else if (is_trusted_reg(reg) || is_rcu_reg(reg)) {
7473 /* By default any pointer obtained from walking a trusted pointer is no
7474 * longer trusted, unless the field being accessed has explicitly been
7475 * marked as inheriting its parent's state of trust (either full or RCU).
7476 * For example:
7477 * 'cgroups' pointer is untrusted if task->cgroups dereference
7478 * happened in a sleepable program outside of bpf_rcu_read_lock()
7479 * section. In a non-sleepable program it's trusted while in RCU CS (aka MEM_RCU).
7480 * Note bpf_rcu_read_unlock() converts MEM_RCU pointers to PTR_UNTRUSTED.
7481 *
7482 * A regular RCU-protected pointer with __rcu tag can also be deemed
7483 * trusted if we are in an RCU CS. Such pointer can be NULL.
7484 */
7485 if (type_is_trusted(env, reg, field_name, btf_id)) {
7486 flag |= PTR_TRUSTED;
7487 } else if (type_is_trusted_or_null(env, reg, field_name, btf_id)) {
7488 flag |= PTR_TRUSTED | PTR_MAYBE_NULL;
7489 } else if (in_rcu_cs(env) && !type_may_be_null(reg->type)) {
7490 if (type_is_rcu(env, reg, field_name, btf_id)) {
7491 /* ignore __rcu tag and mark it MEM_RCU */
7492 flag |= MEM_RCU;
7493 } else if (flag & MEM_RCU ||
7494 type_is_rcu_or_null(env, reg, field_name, btf_id)) {
7495 /* __rcu tagged pointers can be NULL */
7496 flag |= MEM_RCU | PTR_MAYBE_NULL;
7497
7498 /* We always trust them */
7499 if (type_is_rcu_or_null(env, reg, field_name, btf_id) &&
7500 flag & PTR_UNTRUSTED)
7501 flag &= ~PTR_UNTRUSTED;
7502 } else if (flag & (MEM_PERCPU | MEM_USER)) {
7503 /* keep as-is */
7504 } else {
7505 /* walking unknown pointers yields old deprecated PTR_TO_BTF_ID */
7506 clear_trusted_flags(&flag);
7507 }
7508 } else {
7509 /*
7510 * If not in RCU CS or MEM_RCU pointer can be NULL then
7511 * aggressively mark as untrusted otherwise such
7512 * pointers will be plain PTR_TO_BTF_ID without flags
7513 * and will be allowed to be passed into helpers for
7514 * compat reasons.
7515 */
7516 flag = PTR_UNTRUSTED;
7517 }
7518 } else {
7519 /* Old compat. Deprecated */
7520 clear_trusted_flags(&flag);
7521 }
7522
7523 if (atype == BPF_READ && value_regno >= 0) {
7524 ret = mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag);
7525 if (ret < 0)
7526 return ret;
7527 }
7528
7529 return 0;
7530 }
7531
check_ptr_to_map_access(struct bpf_verifier_env * env,struct bpf_reg_state * regs,int regno,int off,int size,enum bpf_access_type atype,int value_regno)7532 static int check_ptr_to_map_access(struct bpf_verifier_env *env,
7533 struct bpf_reg_state *regs,
7534 int regno, int off, int size,
7535 enum bpf_access_type atype,
7536 int value_regno)
7537 {
7538 struct bpf_reg_state *reg = regs + regno;
7539 struct bpf_map *map = reg->map_ptr;
7540 struct bpf_reg_state map_reg;
7541 enum bpf_type_flag flag = 0;
7542 const struct btf_type *t;
7543 const char *tname;
7544 u32 btf_id;
7545 int ret;
7546
7547 if (!btf_vmlinux) {
7548 verbose(env, "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n");
7549 return -ENOTSUPP;
7550 }
7551
7552 if (!map->ops->map_btf_id || !*map->ops->map_btf_id) {
7553 verbose(env, "map_ptr access not supported for map type %d\n",
7554 map->map_type);
7555 return -ENOTSUPP;
7556 }
7557
7558 t = btf_type_by_id(btf_vmlinux, *map->ops->map_btf_id);
7559 tname = btf_name_by_offset(btf_vmlinux, t->name_off);
7560
7561 if (!env->allow_ptr_leaks) {
7562 verbose(env,
7563 "'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
7564 tname);
7565 return -EPERM;
7566 }
7567
7568 if (off < 0) {
7569 verbose(env, "R%d is %s invalid negative access: off=%d\n",
7570 regno, tname, off);
7571 return -EACCES;
7572 }
7573
7574 if (atype != BPF_READ) {
7575 verbose(env, "only read from %s is supported\n", tname);
7576 return -EACCES;
7577 }
7578
7579 /* Simulate access to a PTR_TO_BTF_ID */
7580 memset(&map_reg, 0, sizeof(map_reg));
7581 ret = mark_btf_ld_reg(env, &map_reg, 0, PTR_TO_BTF_ID,
7582 btf_vmlinux, *map->ops->map_btf_id, 0);
7583 if (ret < 0)
7584 return ret;
7585 ret = btf_struct_access(&env->log, &map_reg, off, size, atype, &btf_id, &flag, NULL);
7586 if (ret < 0)
7587 return ret;
7588
7589 if (value_regno >= 0) {
7590 ret = mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id, flag);
7591 if (ret < 0)
7592 return ret;
7593 }
7594
7595 return 0;
7596 }
7597
7598 /* Check that the stack access at the given offset is within bounds. The
7599 * maximum valid offset is -1.
7600 *
7601 * The minimum valid offset is -MAX_BPF_STACK for writes, and
7602 * -state->allocated_stack for reads.
7603 */
check_stack_slot_within_bounds(struct bpf_verifier_env * env,s64 off,struct bpf_func_state * state,enum bpf_access_type t)7604 static int check_stack_slot_within_bounds(struct bpf_verifier_env *env,
7605 s64 off,
7606 struct bpf_func_state *state,
7607 enum bpf_access_type t)
7608 {
7609 int min_valid_off;
7610
7611 if (t == BPF_WRITE || env->allow_uninit_stack)
7612 min_valid_off = -MAX_BPF_STACK;
7613 else
7614 min_valid_off = -state->allocated_stack;
7615
7616 if (off < min_valid_off || off > -1)
7617 return -EACCES;
7618 return 0;
7619 }
7620
7621 /* Check that the stack access at 'regno + off' falls within the maximum stack
7622 * bounds.
7623 *
7624 * 'off' includes `regno->offset`, but not its dynamic part (if any).
7625 */
check_stack_access_within_bounds(struct bpf_verifier_env * env,int regno,int off,int access_size,enum bpf_access_type type)7626 static int check_stack_access_within_bounds(
7627 struct bpf_verifier_env *env,
7628 int regno, int off, int access_size,
7629 enum bpf_access_type type)
7630 {
7631 struct bpf_reg_state *reg = reg_state(env, regno);
7632 struct bpf_func_state *state = func(env, reg);
7633 s64 min_off, max_off;
7634 int err;
7635 char *err_extra;
7636
7637 if (type == BPF_READ)
7638 err_extra = " read from";
7639 else
7640 err_extra = " write to";
7641
7642 if (tnum_is_const(reg->var_off)) {
7643 min_off = (s64)reg->var_off.value + off;
7644 max_off = min_off + access_size;
7645 } else {
7646 if (reg->smax_value >= BPF_MAX_VAR_OFF ||
7647 reg->smin_value <= -BPF_MAX_VAR_OFF) {
7648 verbose(env, "invalid unbounded variable-offset%s stack R%d\n",
7649 err_extra, regno);
7650 return -EACCES;
7651 }
7652 min_off = reg->smin_value + off;
7653 max_off = reg->smax_value + off + access_size;
7654 }
7655
7656 err = check_stack_slot_within_bounds(env, min_off, state, type);
7657 if (!err && max_off > 0)
7658 err = -EINVAL; /* out of stack access into non-negative offsets */
7659 if (!err && access_size < 0)
7660 /* access_size should not be negative (or overflow an int); others checks
7661 * along the way should have prevented such an access.
7662 */
7663 err = -EFAULT; /* invalid negative access size; integer overflow? */
7664
7665 if (err) {
7666 if (tnum_is_const(reg->var_off)) {
7667 verbose(env, "invalid%s stack R%d off=%d size=%d\n",
7668 err_extra, regno, off, access_size);
7669 } else {
7670 char tn_buf[48];
7671
7672 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
7673 verbose(env, "invalid variable-offset%s stack R%d var_off=%s off=%d size=%d\n",
7674 err_extra, regno, tn_buf, off, access_size);
7675 }
7676 return err;
7677 }
7678
7679 /* Note that there is no stack access with offset zero, so the needed stack
7680 * size is -min_off, not -min_off+1.
7681 */
7682 return grow_stack_state(env, state, -min_off /* size */);
7683 }
7684
get_func_retval_range(struct bpf_prog * prog,struct bpf_retval_range * range)7685 static bool get_func_retval_range(struct bpf_prog *prog,
7686 struct bpf_retval_range *range)
7687 {
7688 if (prog->type == BPF_PROG_TYPE_LSM &&
7689 prog->expected_attach_type == BPF_LSM_MAC &&
7690 !bpf_lsm_get_retval_range(prog, range)) {
7691 return true;
7692 }
7693 return false;
7694 }
7695
7696 /* check whether memory at (regno + off) is accessible for t = (read | write)
7697 * if t==write, value_regno is a register which value is stored into memory
7698 * if t==read, value_regno is a register which will receive the value from memory
7699 * if t==write && value_regno==-1, some unknown value is stored into memory
7700 * if t==read && value_regno==-1, don't care what we read from memory
7701 */
check_mem_access(struct bpf_verifier_env * env,int insn_idx,u32 regno,int off,int bpf_size,enum bpf_access_type t,int value_regno,bool strict_alignment_once,bool is_ldsx)7702 static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
7703 int off, int bpf_size, enum bpf_access_type t,
7704 int value_regno, bool strict_alignment_once, bool is_ldsx)
7705 {
7706 struct bpf_reg_state *regs = cur_regs(env);
7707 struct bpf_reg_state *reg = regs + regno;
7708 int size, err = 0;
7709
7710 size = bpf_size_to_bytes(bpf_size);
7711 if (size < 0)
7712 return size;
7713
7714 /* alignment checks will add in reg->off themselves */
7715 err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
7716 if (err)
7717 return err;
7718
7719 /* for access checks, reg->off is just part of off */
7720 off += reg->off;
7721
7722 if (reg->type == PTR_TO_MAP_KEY) {
7723 if (t == BPF_WRITE) {
7724 verbose(env, "write to change key R%d not allowed\n", regno);
7725 return -EACCES;
7726 }
7727
7728 err = check_mem_region_access(env, regno, off, size,
7729 reg->map_ptr->key_size, false);
7730 if (err)
7731 return err;
7732 if (value_regno >= 0)
7733 mark_reg_unknown(env, regs, value_regno);
7734 } else if (reg->type == PTR_TO_MAP_VALUE) {
7735 struct btf_field *kptr_field = NULL;
7736
7737 if (t == BPF_WRITE && value_regno >= 0 &&
7738 is_pointer_value(env, value_regno)) {
7739 verbose(env, "R%d leaks addr into map\n", value_regno);
7740 return -EACCES;
7741 }
7742 err = check_map_access_type(env, regno, off, size, t);
7743 if (err)
7744 return err;
7745 err = check_map_access(env, regno, off, size, false, ACCESS_DIRECT);
7746 if (err)
7747 return err;
7748 if (tnum_is_const(reg->var_off))
7749 kptr_field = btf_record_find(reg->map_ptr->record,
7750 off + reg->var_off.value, BPF_KPTR | BPF_UPTR);
7751 if (kptr_field) {
7752 err = check_map_kptr_access(env, regno, value_regno, insn_idx, kptr_field);
7753 } else if (t == BPF_READ && value_regno >= 0) {
7754 struct bpf_map *map = reg->map_ptr;
7755
7756 /*
7757 * If map is read-only, track its contents as scalars,
7758 * unless it is an insn array (see the special case below)
7759 */
7760 if (tnum_is_const(reg->var_off) &&
7761 bpf_map_is_rdonly(map) &&
7762 map->ops->map_direct_value_addr &&
7763 map->map_type != BPF_MAP_TYPE_INSN_ARRAY) {
7764 int map_off = off + reg->var_off.value;
7765 u64 val = 0;
7766
7767 err = bpf_map_direct_read(map, map_off, size,
7768 &val, is_ldsx);
7769 if (err)
7770 return err;
7771
7772 regs[value_regno].type = SCALAR_VALUE;
7773 __mark_reg_known(®s[value_regno], val);
7774 } else if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY) {
7775 if (bpf_size != BPF_DW) {
7776 verbose(env, "Invalid read of %d bytes from insn_array\n",
7777 size);
7778 return -EACCES;
7779 }
7780 copy_register_state(®s[value_regno], reg);
7781 regs[value_regno].type = PTR_TO_INSN;
7782 } else {
7783 mark_reg_unknown(env, regs, value_regno);
7784 }
7785 }
7786 } else if (base_type(reg->type) == PTR_TO_MEM) {
7787 bool rdonly_mem = type_is_rdonly_mem(reg->type);
7788 bool rdonly_untrusted = rdonly_mem && (reg->type & PTR_UNTRUSTED);
7789
7790 if (type_may_be_null(reg->type)) {
7791 verbose(env, "R%d invalid mem access '%s'\n", regno,
7792 reg_type_str(env, reg->type));
7793 return -EACCES;
7794 }
7795
7796 if (t == BPF_WRITE && rdonly_mem) {
7797 verbose(env, "R%d cannot write into %s\n",
7798 regno, reg_type_str(env, reg->type));
7799 return -EACCES;
7800 }
7801
7802 if (t == BPF_WRITE && value_regno >= 0 &&
7803 is_pointer_value(env, value_regno)) {
7804 verbose(env, "R%d leaks addr into mem\n", value_regno);
7805 return -EACCES;
7806 }
7807
7808 /*
7809 * Accesses to untrusted PTR_TO_MEM are done through probe
7810 * instructions, hence no need to check bounds in that case.
7811 */
7812 if (!rdonly_untrusted)
7813 err = check_mem_region_access(env, regno, off, size,
7814 reg->mem_size, false);
7815 if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem))
7816 mark_reg_unknown(env, regs, value_regno);
7817 } else if (reg->type == PTR_TO_CTX) {
7818 struct bpf_retval_range range;
7819 struct bpf_insn_access_aux info = {
7820 .reg_type = SCALAR_VALUE,
7821 .is_ldsx = is_ldsx,
7822 .log = &env->log,
7823 };
7824
7825 if (t == BPF_WRITE && value_regno >= 0 &&
7826 is_pointer_value(env, value_regno)) {
7827 verbose(env, "R%d leaks addr into ctx\n", value_regno);
7828 return -EACCES;
7829 }
7830
7831 err = check_ptr_off_reg(env, reg, regno);
7832 if (err < 0)
7833 return err;
7834
7835 err = check_ctx_access(env, insn_idx, off, size, t, &info);
7836 if (err)
7837 verbose_linfo(env, insn_idx, "; ");
7838 if (!err && t == BPF_READ && value_regno >= 0) {
7839 /* ctx access returns either a scalar, or a
7840 * PTR_TO_PACKET[_META,_END]. In the latter
7841 * case, we know the offset is zero.
7842 */
7843 if (info.reg_type == SCALAR_VALUE) {
7844 if (info.is_retval && get_func_retval_range(env->prog, &range)) {
7845 err = __mark_reg_s32_range(env, regs, value_regno,
7846 range.minval, range.maxval);
7847 if (err)
7848 return err;
7849 } else {
7850 mark_reg_unknown(env, regs, value_regno);
7851 }
7852 } else {
7853 mark_reg_known_zero(env, regs,
7854 value_regno);
7855 if (type_may_be_null(info.reg_type))
7856 regs[value_regno].id = ++env->id_gen;
7857 /* A load of ctx field could have different
7858 * actual load size with the one encoded in the
7859 * insn. When the dst is PTR, it is for sure not
7860 * a sub-register.
7861 */
7862 regs[value_regno].subreg_def = DEF_NOT_SUBREG;
7863 if (base_type(info.reg_type) == PTR_TO_BTF_ID) {
7864 regs[value_regno].btf = info.btf;
7865 regs[value_regno].btf_id = info.btf_id;
7866 regs[value_regno].ref_obj_id = info.ref_obj_id;
7867 }
7868 }
7869 regs[value_regno].type = info.reg_type;
7870 }
7871
7872 } else if (reg->type == PTR_TO_STACK) {
7873 /* Basic bounds checks. */
7874 err = check_stack_access_within_bounds(env, regno, off, size, t);
7875 if (err)
7876 return err;
7877
7878 if (t == BPF_READ)
7879 err = check_stack_read(env, regno, off, size,
7880 value_regno);
7881 else
7882 err = check_stack_write(env, regno, off, size,
7883 value_regno, insn_idx);
7884 } else if (reg_is_pkt_pointer(reg)) {
7885 if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
7886 verbose(env, "cannot write into packet\n");
7887 return -EACCES;
7888 }
7889 if (t == BPF_WRITE && value_regno >= 0 &&
7890 is_pointer_value(env, value_regno)) {
7891 verbose(env, "R%d leaks addr into packet\n",
7892 value_regno);
7893 return -EACCES;
7894 }
7895 err = check_packet_access(env, regno, off, size, false);
7896 if (!err && t == BPF_READ && value_regno >= 0)
7897 mark_reg_unknown(env, regs, value_regno);
7898 } else if (reg->type == PTR_TO_FLOW_KEYS) {
7899 if (t == BPF_WRITE && value_regno >= 0 &&
7900 is_pointer_value(env, value_regno)) {
7901 verbose(env, "R%d leaks addr into flow keys\n",
7902 value_regno);
7903 return -EACCES;
7904 }
7905
7906 err = check_flow_keys_access(env, off, size);
7907 if (!err && t == BPF_READ && value_regno >= 0)
7908 mark_reg_unknown(env, regs, value_regno);
7909 } else if (type_is_sk_pointer(reg->type)) {
7910 if (t == BPF_WRITE) {
7911 verbose(env, "R%d cannot write into %s\n",
7912 regno, reg_type_str(env, reg->type));
7913 return -EACCES;
7914 }
7915 err = check_sock_access(env, insn_idx, regno, off, size, t);
7916 if (!err && value_regno >= 0)
7917 mark_reg_unknown(env, regs, value_regno);
7918 } else if (reg->type == PTR_TO_TP_BUFFER) {
7919 err = check_tp_buffer_access(env, reg, regno, off, size);
7920 if (!err && t == BPF_READ && value_regno >= 0)
7921 mark_reg_unknown(env, regs, value_regno);
7922 } else if (base_type(reg->type) == PTR_TO_BTF_ID &&
7923 !type_may_be_null(reg->type)) {
7924 err = check_ptr_to_btf_access(env, regs, regno, off, size, t,
7925 value_regno);
7926 } else if (reg->type == CONST_PTR_TO_MAP) {
7927 err = check_ptr_to_map_access(env, regs, regno, off, size, t,
7928 value_regno);
7929 } else if (base_type(reg->type) == PTR_TO_BUF &&
7930 !type_may_be_null(reg->type)) {
7931 bool rdonly_mem = type_is_rdonly_mem(reg->type);
7932 u32 *max_access;
7933
7934 if (rdonly_mem) {
7935 if (t == BPF_WRITE) {
7936 verbose(env, "R%d cannot write into %s\n",
7937 regno, reg_type_str(env, reg->type));
7938 return -EACCES;
7939 }
7940 max_access = &env->prog->aux->max_rdonly_access;
7941 } else {
7942 max_access = &env->prog->aux->max_rdwr_access;
7943 }
7944
7945 err = check_buffer_access(env, reg, regno, off, size, false,
7946 max_access);
7947
7948 if (!err && value_regno >= 0 && (rdonly_mem || t == BPF_READ))
7949 mark_reg_unknown(env, regs, value_regno);
7950 } else if (reg->type == PTR_TO_ARENA) {
7951 if (t == BPF_READ && value_regno >= 0)
7952 mark_reg_unknown(env, regs, value_regno);
7953 } else {
7954 verbose(env, "R%d invalid mem access '%s'\n", regno,
7955 reg_type_str(env, reg->type));
7956 return -EACCES;
7957 }
7958
7959 if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
7960 regs[value_regno].type == SCALAR_VALUE) {
7961 if (!is_ldsx)
7962 /* b/h/w load zero-extends, mark upper bits as known 0 */
7963 coerce_reg_to_size(®s[value_regno], size);
7964 else
7965 coerce_reg_to_size_sx(®s[value_regno], size);
7966 }
7967 return err;
7968 }
7969
7970 static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type type,
7971 bool allow_trust_mismatch);
7972
check_load_mem(struct bpf_verifier_env * env,struct bpf_insn * insn,bool strict_alignment_once,bool is_ldsx,bool allow_trust_mismatch,const char * ctx)7973 static int check_load_mem(struct bpf_verifier_env *env, struct bpf_insn *insn,
7974 bool strict_alignment_once, bool is_ldsx,
7975 bool allow_trust_mismatch, const char *ctx)
7976 {
7977 struct bpf_reg_state *regs = cur_regs(env);
7978 enum bpf_reg_type src_reg_type;
7979 int err;
7980
7981 /* check src operand */
7982 err = check_reg_arg(env, insn->src_reg, SRC_OP);
7983 if (err)
7984 return err;
7985
7986 /* check dst operand */
7987 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
7988 if (err)
7989 return err;
7990
7991 src_reg_type = regs[insn->src_reg].type;
7992
7993 /* Check if (src_reg + off) is readable. The state of dst_reg will be
7994 * updated by this call.
7995 */
7996 err = check_mem_access(env, env->insn_idx, insn->src_reg, insn->off,
7997 BPF_SIZE(insn->code), BPF_READ, insn->dst_reg,
7998 strict_alignment_once, is_ldsx);
7999 err = err ?: save_aux_ptr_type(env, src_reg_type,
8000 allow_trust_mismatch);
8001 err = err ?: reg_bounds_sanity_check(env, ®s[insn->dst_reg], ctx);
8002
8003 return err;
8004 }
8005
check_store_reg(struct bpf_verifier_env * env,struct bpf_insn * insn,bool strict_alignment_once)8006 static int check_store_reg(struct bpf_verifier_env *env, struct bpf_insn *insn,
8007 bool strict_alignment_once)
8008 {
8009 struct bpf_reg_state *regs = cur_regs(env);
8010 enum bpf_reg_type dst_reg_type;
8011 int err;
8012
8013 /* check src1 operand */
8014 err = check_reg_arg(env, insn->src_reg, SRC_OP);
8015 if (err)
8016 return err;
8017
8018 /* check src2 operand */
8019 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
8020 if (err)
8021 return err;
8022
8023 dst_reg_type = regs[insn->dst_reg].type;
8024
8025 /* Check if (dst_reg + off) is writeable. */
8026 err = check_mem_access(env, env->insn_idx, insn->dst_reg, insn->off,
8027 BPF_SIZE(insn->code), BPF_WRITE, insn->src_reg,
8028 strict_alignment_once, false);
8029 err = err ?: save_aux_ptr_type(env, dst_reg_type, false);
8030
8031 return err;
8032 }
8033
check_atomic_rmw(struct bpf_verifier_env * env,struct bpf_insn * insn)8034 static int check_atomic_rmw(struct bpf_verifier_env *env,
8035 struct bpf_insn *insn)
8036 {
8037 int load_reg;
8038 int err;
8039
8040 if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) {
8041 verbose(env, "invalid atomic operand size\n");
8042 return -EINVAL;
8043 }
8044
8045 /* check src1 operand */
8046 err = check_reg_arg(env, insn->src_reg, SRC_OP);
8047 if (err)
8048 return err;
8049
8050 /* check src2 operand */
8051 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
8052 if (err)
8053 return err;
8054
8055 if (insn->imm == BPF_CMPXCHG) {
8056 /* Check comparison of R0 with memory location */
8057 const u32 aux_reg = BPF_REG_0;
8058
8059 err = check_reg_arg(env, aux_reg, SRC_OP);
8060 if (err)
8061 return err;
8062
8063 if (is_pointer_value(env, aux_reg)) {
8064 verbose(env, "R%d leaks addr into mem\n", aux_reg);
8065 return -EACCES;
8066 }
8067 }
8068
8069 if (is_pointer_value(env, insn->src_reg)) {
8070 verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
8071 return -EACCES;
8072 }
8073
8074 if (!atomic_ptr_type_ok(env, insn->dst_reg, insn)) {
8075 verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
8076 insn->dst_reg,
8077 reg_type_str(env, reg_state(env, insn->dst_reg)->type));
8078 return -EACCES;
8079 }
8080
8081 if (insn->imm & BPF_FETCH) {
8082 if (insn->imm == BPF_CMPXCHG)
8083 load_reg = BPF_REG_0;
8084 else
8085 load_reg = insn->src_reg;
8086
8087 /* check and record load of old value */
8088 err = check_reg_arg(env, load_reg, DST_OP);
8089 if (err)
8090 return err;
8091 } else {
8092 /* This instruction accesses a memory location but doesn't
8093 * actually load it into a register.
8094 */
8095 load_reg = -1;
8096 }
8097
8098 /* Check whether we can read the memory, with second call for fetch
8099 * case to simulate the register fill.
8100 */
8101 err = check_mem_access(env, env->insn_idx, insn->dst_reg, insn->off,
8102 BPF_SIZE(insn->code), BPF_READ, -1, true, false);
8103 if (!err && load_reg >= 0)
8104 err = check_mem_access(env, env->insn_idx, insn->dst_reg,
8105 insn->off, BPF_SIZE(insn->code),
8106 BPF_READ, load_reg, true, false);
8107 if (err)
8108 return err;
8109
8110 if (is_arena_reg(env, insn->dst_reg)) {
8111 err = save_aux_ptr_type(env, PTR_TO_ARENA, false);
8112 if (err)
8113 return err;
8114 }
8115 /* Check whether we can write into the same memory. */
8116 err = check_mem_access(env, env->insn_idx, insn->dst_reg, insn->off,
8117 BPF_SIZE(insn->code), BPF_WRITE, -1, true, false);
8118 if (err)
8119 return err;
8120 return 0;
8121 }
8122
check_atomic_load(struct bpf_verifier_env * env,struct bpf_insn * insn)8123 static int check_atomic_load(struct bpf_verifier_env *env,
8124 struct bpf_insn *insn)
8125 {
8126 int err;
8127
8128 err = check_load_mem(env, insn, true, false, false, "atomic_load");
8129 if (err)
8130 return err;
8131
8132 if (!atomic_ptr_type_ok(env, insn->src_reg, insn)) {
8133 verbose(env, "BPF_ATOMIC loads from R%d %s is not allowed\n",
8134 insn->src_reg,
8135 reg_type_str(env, reg_state(env, insn->src_reg)->type));
8136 return -EACCES;
8137 }
8138
8139 return 0;
8140 }
8141
check_atomic_store(struct bpf_verifier_env * env,struct bpf_insn * insn)8142 static int check_atomic_store(struct bpf_verifier_env *env,
8143 struct bpf_insn *insn)
8144 {
8145 int err;
8146
8147 err = check_store_reg(env, insn, true);
8148 if (err)
8149 return err;
8150
8151 if (!atomic_ptr_type_ok(env, insn->dst_reg, insn)) {
8152 verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
8153 insn->dst_reg,
8154 reg_type_str(env, reg_state(env, insn->dst_reg)->type));
8155 return -EACCES;
8156 }
8157
8158 return 0;
8159 }
8160
check_atomic(struct bpf_verifier_env * env,struct bpf_insn * insn)8161 static int check_atomic(struct bpf_verifier_env *env, struct bpf_insn *insn)
8162 {
8163 switch (insn->imm) {
8164 case BPF_ADD:
8165 case BPF_ADD | BPF_FETCH:
8166 case BPF_AND:
8167 case BPF_AND | BPF_FETCH:
8168 case BPF_OR:
8169 case BPF_OR | BPF_FETCH:
8170 case BPF_XOR:
8171 case BPF_XOR | BPF_FETCH:
8172 case BPF_XCHG:
8173 case BPF_CMPXCHG:
8174 return check_atomic_rmw(env, insn);
8175 case BPF_LOAD_ACQ:
8176 if (BPF_SIZE(insn->code) == BPF_DW && BITS_PER_LONG != 64) {
8177 verbose(env,
8178 "64-bit load-acquires are only supported on 64-bit arches\n");
8179 return -EOPNOTSUPP;
8180 }
8181 return check_atomic_load(env, insn);
8182 case BPF_STORE_REL:
8183 if (BPF_SIZE(insn->code) == BPF_DW && BITS_PER_LONG != 64) {
8184 verbose(env,
8185 "64-bit store-releases are only supported on 64-bit arches\n");
8186 return -EOPNOTSUPP;
8187 }
8188 return check_atomic_store(env, insn);
8189 default:
8190 verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n",
8191 insn->imm);
8192 return -EINVAL;
8193 }
8194 }
8195
8196 /* When register 'regno' is used to read the stack (either directly or through
8197 * a helper function) make sure that it's within stack boundary and, depending
8198 * on the access type and privileges, that all elements of the stack are
8199 * initialized.
8200 *
8201 * 'off' includes 'regno->off', but not its dynamic part (if any).
8202 *
8203 * All registers that have been spilled on the stack in the slots within the
8204 * read offsets are marked as read.
8205 */
check_stack_range_initialized(struct bpf_verifier_env * env,int regno,int off,int access_size,bool zero_size_allowed,enum bpf_access_type type,struct bpf_call_arg_meta * meta)8206 static int check_stack_range_initialized(
8207 struct bpf_verifier_env *env, int regno, int off,
8208 int access_size, bool zero_size_allowed,
8209 enum bpf_access_type type, struct bpf_call_arg_meta *meta)
8210 {
8211 struct bpf_reg_state *reg = reg_state(env, regno);
8212 struct bpf_func_state *state = func(env, reg);
8213 int err, min_off, max_off, i, j, slot, spi;
8214 /* Some accesses can write anything into the stack, others are
8215 * read-only.
8216 */
8217 bool clobber = false;
8218
8219 if (access_size == 0 && !zero_size_allowed) {
8220 verbose(env, "invalid zero-sized read\n");
8221 return -EACCES;
8222 }
8223
8224 if (type == BPF_WRITE)
8225 clobber = true;
8226
8227 err = check_stack_access_within_bounds(env, regno, off, access_size, type);
8228 if (err)
8229 return err;
8230
8231
8232 if (tnum_is_const(reg->var_off)) {
8233 min_off = max_off = reg->var_off.value + off;
8234 } else {
8235 /* Variable offset is prohibited for unprivileged mode for
8236 * simplicity since it requires corresponding support in
8237 * Spectre masking for stack ALU.
8238 * See also retrieve_ptr_limit().
8239 */
8240 if (!env->bypass_spec_v1) {
8241 char tn_buf[48];
8242
8243 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
8244 verbose(env, "R%d variable offset stack access prohibited for !root, var_off=%s\n",
8245 regno, tn_buf);
8246 return -EACCES;
8247 }
8248 /* Only initialized buffer on stack is allowed to be accessed
8249 * with variable offset. With uninitialized buffer it's hard to
8250 * guarantee that whole memory is marked as initialized on
8251 * helper return since specific bounds are unknown what may
8252 * cause uninitialized stack leaking.
8253 */
8254 if (meta && meta->raw_mode)
8255 meta = NULL;
8256
8257 min_off = reg->smin_value + off;
8258 max_off = reg->smax_value + off;
8259 }
8260
8261 if (meta && meta->raw_mode) {
8262 /* Ensure we won't be overwriting dynptrs when simulating byte
8263 * by byte access in check_helper_call using meta.access_size.
8264 * This would be a problem if we have a helper in the future
8265 * which takes:
8266 *
8267 * helper(uninit_mem, len, dynptr)
8268 *
8269 * Now, uninint_mem may overlap with dynptr pointer. Hence, it
8270 * may end up writing to dynptr itself when touching memory from
8271 * arg 1. This can be relaxed on a case by case basis for known
8272 * safe cases, but reject due to the possibilitiy of aliasing by
8273 * default.
8274 */
8275 for (i = min_off; i < max_off + access_size; i++) {
8276 int stack_off = -i - 1;
8277
8278 spi = __get_spi(i);
8279 /* raw_mode may write past allocated_stack */
8280 if (state->allocated_stack <= stack_off)
8281 continue;
8282 if (state->stack[spi].slot_type[stack_off % BPF_REG_SIZE] == STACK_DYNPTR) {
8283 verbose(env, "potential write to dynptr at off=%d disallowed\n", i);
8284 return -EACCES;
8285 }
8286 }
8287 meta->access_size = access_size;
8288 meta->regno = regno;
8289 return 0;
8290 }
8291
8292 for (i = min_off; i < max_off + access_size; i++) {
8293 u8 *stype;
8294
8295 slot = -i - 1;
8296 spi = slot / BPF_REG_SIZE;
8297 if (state->allocated_stack <= slot) {
8298 verbose(env, "allocated_stack too small\n");
8299 return -EFAULT;
8300 }
8301
8302 stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
8303 if (*stype == STACK_MISC)
8304 goto mark;
8305 if ((*stype == STACK_ZERO) ||
8306 (*stype == STACK_INVALID && env->allow_uninit_stack)) {
8307 if (clobber) {
8308 /* helper can write anything into the stack */
8309 *stype = STACK_MISC;
8310 }
8311 goto mark;
8312 }
8313
8314 if (is_spilled_reg(&state->stack[spi]) &&
8315 (state->stack[spi].spilled_ptr.type == SCALAR_VALUE ||
8316 env->allow_ptr_leaks)) {
8317 if (clobber) {
8318 __mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
8319 for (j = 0; j < BPF_REG_SIZE; j++)
8320 scrub_spilled_slot(&state->stack[spi].slot_type[j]);
8321 }
8322 goto mark;
8323 }
8324
8325 if (tnum_is_const(reg->var_off)) {
8326 verbose(env, "invalid read from stack R%d off %d+%d size %d\n",
8327 regno, min_off, i - min_off, access_size);
8328 } else {
8329 char tn_buf[48];
8330
8331 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
8332 verbose(env, "invalid read from stack R%d var_off %s+%d size %d\n",
8333 regno, tn_buf, i - min_off, access_size);
8334 }
8335 return -EACCES;
8336 mark:
8337 /* reading any byte out of 8-byte 'spill_slot' will cause
8338 * the whole slot to be marked as 'read'
8339 */
8340 err = bpf_mark_stack_read(env, reg->frameno, env->insn_idx, BIT(spi));
8341 if (err)
8342 return err;
8343 /* We do not call bpf_mark_stack_write(), as we can not
8344 * be sure that whether stack slot is written to or not. Hence,
8345 * we must still conservatively propagate reads upwards even if
8346 * helper may write to the entire memory range.
8347 */
8348 }
8349 return 0;
8350 }
8351
check_helper_mem_access(struct bpf_verifier_env * env,int regno,int access_size,enum bpf_access_type access_type,bool zero_size_allowed,struct bpf_call_arg_meta * meta)8352 static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
8353 int access_size, enum bpf_access_type access_type,
8354 bool zero_size_allowed,
8355 struct bpf_call_arg_meta *meta)
8356 {
8357 struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno];
8358 u32 *max_access;
8359
8360 switch (base_type(reg->type)) {
8361 case PTR_TO_PACKET:
8362 case PTR_TO_PACKET_META:
8363 return check_packet_access(env, regno, reg->off, access_size,
8364 zero_size_allowed);
8365 case PTR_TO_MAP_KEY:
8366 if (access_type == BPF_WRITE) {
8367 verbose(env, "R%d cannot write into %s\n", regno,
8368 reg_type_str(env, reg->type));
8369 return -EACCES;
8370 }
8371 return check_mem_region_access(env, regno, reg->off, access_size,
8372 reg->map_ptr->key_size, false);
8373 case PTR_TO_MAP_VALUE:
8374 if (check_map_access_type(env, regno, reg->off, access_size, access_type))
8375 return -EACCES;
8376 return check_map_access(env, regno, reg->off, access_size,
8377 zero_size_allowed, ACCESS_HELPER);
8378 case PTR_TO_MEM:
8379 if (type_is_rdonly_mem(reg->type)) {
8380 if (access_type == BPF_WRITE) {
8381 verbose(env, "R%d cannot write into %s\n", regno,
8382 reg_type_str(env, reg->type));
8383 return -EACCES;
8384 }
8385 }
8386 return check_mem_region_access(env, regno, reg->off,
8387 access_size, reg->mem_size,
8388 zero_size_allowed);
8389 case PTR_TO_BUF:
8390 if (type_is_rdonly_mem(reg->type)) {
8391 if (access_type == BPF_WRITE) {
8392 verbose(env, "R%d cannot write into %s\n", regno,
8393 reg_type_str(env, reg->type));
8394 return -EACCES;
8395 }
8396
8397 max_access = &env->prog->aux->max_rdonly_access;
8398 } else {
8399 max_access = &env->prog->aux->max_rdwr_access;
8400 }
8401 return check_buffer_access(env, reg, regno, reg->off,
8402 access_size, zero_size_allowed,
8403 max_access);
8404 case PTR_TO_STACK:
8405 return check_stack_range_initialized(
8406 env,
8407 regno, reg->off, access_size,
8408 zero_size_allowed, access_type, meta);
8409 case PTR_TO_BTF_ID:
8410 return check_ptr_to_btf_access(env, regs, regno, reg->off,
8411 access_size, BPF_READ, -1);
8412 case PTR_TO_CTX:
8413 /* in case the function doesn't know how to access the context,
8414 * (because we are in a program of type SYSCALL for example), we
8415 * can not statically check its size.
8416 * Dynamically check it now.
8417 */
8418 if (!env->ops->convert_ctx_access) {
8419 int offset = access_size - 1;
8420
8421 /* Allow zero-byte read from PTR_TO_CTX */
8422 if (access_size == 0)
8423 return zero_size_allowed ? 0 : -EACCES;
8424
8425 return check_mem_access(env, env->insn_idx, regno, offset, BPF_B,
8426 access_type, -1, false, false);
8427 }
8428
8429 fallthrough;
8430 default: /* scalar_value or invalid ptr */
8431 /* Allow zero-byte read from NULL, regardless of pointer type */
8432 if (zero_size_allowed && access_size == 0 &&
8433 register_is_null(reg))
8434 return 0;
8435
8436 verbose(env, "R%d type=%s ", regno,
8437 reg_type_str(env, reg->type));
8438 verbose(env, "expected=%s\n", reg_type_str(env, PTR_TO_STACK));
8439 return -EACCES;
8440 }
8441 }
8442
8443 /* verify arguments to helpers or kfuncs consisting of a pointer and an access
8444 * size.
8445 *
8446 * @regno is the register containing the access size. regno-1 is the register
8447 * containing the pointer.
8448 */
check_mem_size_reg(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno,enum bpf_access_type access_type,bool zero_size_allowed,struct bpf_call_arg_meta * meta)8449 static int check_mem_size_reg(struct bpf_verifier_env *env,
8450 struct bpf_reg_state *reg, u32 regno,
8451 enum bpf_access_type access_type,
8452 bool zero_size_allowed,
8453 struct bpf_call_arg_meta *meta)
8454 {
8455 int err;
8456
8457 /* This is used to refine r0 return value bounds for helpers
8458 * that enforce this value as an upper bound on return values.
8459 * See do_refine_retval_range() for helpers that can refine
8460 * the return value. C type of helper is u32 so we pull register
8461 * bound from umax_value however, if negative verifier errors
8462 * out. Only upper bounds can be learned because retval is an
8463 * int type and negative retvals are allowed.
8464 */
8465 meta->msize_max_value = reg->umax_value;
8466
8467 /* The register is SCALAR_VALUE; the access check happens using
8468 * its boundaries. For unprivileged variable accesses, disable
8469 * raw mode so that the program is required to initialize all
8470 * the memory that the helper could just partially fill up.
8471 */
8472 if (!tnum_is_const(reg->var_off))
8473 meta = NULL;
8474
8475 if (reg->smin_value < 0) {
8476 verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
8477 regno);
8478 return -EACCES;
8479 }
8480
8481 if (reg->umin_value == 0 && !zero_size_allowed) {
8482 verbose(env, "R%d invalid zero-sized read: u64=[%lld,%lld]\n",
8483 regno, reg->umin_value, reg->umax_value);
8484 return -EACCES;
8485 }
8486
8487 if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
8488 verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
8489 regno);
8490 return -EACCES;
8491 }
8492 err = check_helper_mem_access(env, regno - 1, reg->umax_value,
8493 access_type, zero_size_allowed, meta);
8494 if (!err)
8495 err = mark_chain_precision(env, regno);
8496 return err;
8497 }
8498
check_mem_reg(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno,u32 mem_size)8499 static int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
8500 u32 regno, u32 mem_size)
8501 {
8502 bool may_be_null = type_may_be_null(reg->type);
8503 struct bpf_reg_state saved_reg;
8504 int err;
8505
8506 if (register_is_null(reg))
8507 return 0;
8508
8509 /* Assuming that the register contains a value check if the memory
8510 * access is safe. Temporarily save and restore the register's state as
8511 * the conversion shouldn't be visible to a caller.
8512 */
8513 if (may_be_null) {
8514 saved_reg = *reg;
8515 mark_ptr_not_null_reg(reg);
8516 }
8517
8518 err = check_helper_mem_access(env, regno, mem_size, BPF_READ, true, NULL);
8519 err = err ?: check_helper_mem_access(env, regno, mem_size, BPF_WRITE, true, NULL);
8520
8521 if (may_be_null)
8522 *reg = saved_reg;
8523
8524 return err;
8525 }
8526
check_kfunc_mem_size_reg(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno)8527 static int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
8528 u32 regno)
8529 {
8530 struct bpf_reg_state *mem_reg = &cur_regs(env)[regno - 1];
8531 bool may_be_null = type_may_be_null(mem_reg->type);
8532 struct bpf_reg_state saved_reg;
8533 struct bpf_call_arg_meta meta;
8534 int err;
8535
8536 WARN_ON_ONCE(regno < BPF_REG_2 || regno > BPF_REG_5);
8537
8538 memset(&meta, 0, sizeof(meta));
8539
8540 if (may_be_null) {
8541 saved_reg = *mem_reg;
8542 mark_ptr_not_null_reg(mem_reg);
8543 }
8544
8545 err = check_mem_size_reg(env, reg, regno, BPF_READ, true, &meta);
8546 err = err ?: check_mem_size_reg(env, reg, regno, BPF_WRITE, true, &meta);
8547
8548 if (may_be_null)
8549 *mem_reg = saved_reg;
8550
8551 return err;
8552 }
8553
8554 enum {
8555 PROCESS_SPIN_LOCK = (1 << 0),
8556 PROCESS_RES_LOCK = (1 << 1),
8557 PROCESS_LOCK_IRQ = (1 << 2),
8558 };
8559
8560 /* Implementation details:
8561 * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL.
8562 * bpf_obj_new returns PTR_TO_BTF_ID | MEM_ALLOC | PTR_MAYBE_NULL.
8563 * Two bpf_map_lookups (even with the same key) will have different reg->id.
8564 * Two separate bpf_obj_new will also have different reg->id.
8565 * For traditional PTR_TO_MAP_VALUE or PTR_TO_BTF_ID | MEM_ALLOC, the verifier
8566 * clears reg->id after value_or_null->value transition, since the verifier only
8567 * cares about the range of access to valid map value pointer and doesn't care
8568 * about actual address of the map element.
8569 * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
8570 * reg->id > 0 after value_or_null->value transition. By doing so
8571 * two bpf_map_lookups will be considered two different pointers that
8572 * point to different bpf_spin_locks. Likewise for pointers to allocated objects
8573 * returned from bpf_obj_new.
8574 * The verifier allows taking only one bpf_spin_lock at a time to avoid
8575 * dead-locks.
8576 * Since only one bpf_spin_lock is allowed the checks are simpler than
8577 * reg_is_refcounted() logic. The verifier needs to remember only
8578 * one spin_lock instead of array of acquired_refs.
8579 * env->cur_state->active_locks remembers which map value element or allocated
8580 * object got locked and clears it after bpf_spin_unlock.
8581 */
process_spin_lock(struct bpf_verifier_env * env,int regno,int flags)8582 static int process_spin_lock(struct bpf_verifier_env *env, int regno, int flags)
8583 {
8584 bool is_lock = flags & PROCESS_SPIN_LOCK, is_res_lock = flags & PROCESS_RES_LOCK;
8585 const char *lock_str = is_res_lock ? "bpf_res_spin" : "bpf_spin";
8586 struct bpf_reg_state *reg = reg_state(env, regno);
8587 struct bpf_verifier_state *cur = env->cur_state;
8588 bool is_const = tnum_is_const(reg->var_off);
8589 bool is_irq = flags & PROCESS_LOCK_IRQ;
8590 u64 val = reg->var_off.value;
8591 struct bpf_map *map = NULL;
8592 struct btf *btf = NULL;
8593 struct btf_record *rec;
8594 u32 spin_lock_off;
8595 int err;
8596
8597 if (!is_const) {
8598 verbose(env,
8599 "R%d doesn't have constant offset. %s_lock has to be at the constant offset\n",
8600 regno, lock_str);
8601 return -EINVAL;
8602 }
8603 if (reg->type == PTR_TO_MAP_VALUE) {
8604 map = reg->map_ptr;
8605 if (!map->btf) {
8606 verbose(env,
8607 "map '%s' has to have BTF in order to use %s_lock\n",
8608 map->name, lock_str);
8609 return -EINVAL;
8610 }
8611 } else {
8612 btf = reg->btf;
8613 }
8614
8615 rec = reg_btf_record(reg);
8616 if (!btf_record_has_field(rec, is_res_lock ? BPF_RES_SPIN_LOCK : BPF_SPIN_LOCK)) {
8617 verbose(env, "%s '%s' has no valid %s_lock\n", map ? "map" : "local",
8618 map ? map->name : "kptr", lock_str);
8619 return -EINVAL;
8620 }
8621 spin_lock_off = is_res_lock ? rec->res_spin_lock_off : rec->spin_lock_off;
8622 if (spin_lock_off != val + reg->off) {
8623 verbose(env, "off %lld doesn't point to 'struct %s_lock' that is at %d\n",
8624 val + reg->off, lock_str, spin_lock_off);
8625 return -EINVAL;
8626 }
8627 if (is_lock) {
8628 void *ptr;
8629 int type;
8630
8631 if (map)
8632 ptr = map;
8633 else
8634 ptr = btf;
8635
8636 if (!is_res_lock && cur->active_locks) {
8637 if (find_lock_state(env->cur_state, REF_TYPE_LOCK, 0, NULL)) {
8638 verbose(env,
8639 "Locking two bpf_spin_locks are not allowed\n");
8640 return -EINVAL;
8641 }
8642 } else if (is_res_lock && cur->active_locks) {
8643 if (find_lock_state(env->cur_state, REF_TYPE_RES_LOCK | REF_TYPE_RES_LOCK_IRQ, reg->id, ptr)) {
8644 verbose(env, "Acquiring the same lock again, AA deadlock detected\n");
8645 return -EINVAL;
8646 }
8647 }
8648
8649 if (is_res_lock && is_irq)
8650 type = REF_TYPE_RES_LOCK_IRQ;
8651 else if (is_res_lock)
8652 type = REF_TYPE_RES_LOCK;
8653 else
8654 type = REF_TYPE_LOCK;
8655 err = acquire_lock_state(env, env->insn_idx, type, reg->id, ptr);
8656 if (err < 0) {
8657 verbose(env, "Failed to acquire lock state\n");
8658 return err;
8659 }
8660 } else {
8661 void *ptr;
8662 int type;
8663
8664 if (map)
8665 ptr = map;
8666 else
8667 ptr = btf;
8668
8669 if (!cur->active_locks) {
8670 verbose(env, "%s_unlock without taking a lock\n", lock_str);
8671 return -EINVAL;
8672 }
8673
8674 if (is_res_lock && is_irq)
8675 type = REF_TYPE_RES_LOCK_IRQ;
8676 else if (is_res_lock)
8677 type = REF_TYPE_RES_LOCK;
8678 else
8679 type = REF_TYPE_LOCK;
8680 if (!find_lock_state(cur, type, reg->id, ptr)) {
8681 verbose(env, "%s_unlock of different lock\n", lock_str);
8682 return -EINVAL;
8683 }
8684 if (reg->id != cur->active_lock_id || ptr != cur->active_lock_ptr) {
8685 verbose(env, "%s_unlock cannot be out of order\n", lock_str);
8686 return -EINVAL;
8687 }
8688 if (release_lock_state(cur, type, reg->id, ptr)) {
8689 verbose(env, "%s_unlock of different lock\n", lock_str);
8690 return -EINVAL;
8691 }
8692
8693 invalidate_non_owning_refs(env);
8694 }
8695 return 0;
8696 }
8697
8698 /* Check if @regno is a pointer to a specific field in a map value */
check_map_field_pointer(struct bpf_verifier_env * env,u32 regno,enum btf_field_type field_type,struct bpf_map_desc * map_desc)8699 static int check_map_field_pointer(struct bpf_verifier_env *env, u32 regno,
8700 enum btf_field_type field_type,
8701 struct bpf_map_desc *map_desc)
8702 {
8703 struct bpf_reg_state *reg = reg_state(env, regno);
8704 bool is_const = tnum_is_const(reg->var_off);
8705 struct bpf_map *map = reg->map_ptr;
8706 u64 val = reg->var_off.value;
8707 const char *struct_name = btf_field_type_name(field_type);
8708 int field_off = -1;
8709
8710 if (!is_const) {
8711 verbose(env,
8712 "R%d doesn't have constant offset. %s has to be at the constant offset\n",
8713 regno, struct_name);
8714 return -EINVAL;
8715 }
8716 if (!map->btf) {
8717 verbose(env, "map '%s' has to have BTF in order to use %s\n", map->name,
8718 struct_name);
8719 return -EINVAL;
8720 }
8721 if (!btf_record_has_field(map->record, field_type)) {
8722 verbose(env, "map '%s' has no valid %s\n", map->name, struct_name);
8723 return -EINVAL;
8724 }
8725 switch (field_type) {
8726 case BPF_TIMER:
8727 field_off = map->record->timer_off;
8728 break;
8729 case BPF_TASK_WORK:
8730 field_off = map->record->task_work_off;
8731 break;
8732 case BPF_WORKQUEUE:
8733 field_off = map->record->wq_off;
8734 break;
8735 default:
8736 verifier_bug(env, "unsupported BTF field type: %s\n", struct_name);
8737 return -EINVAL;
8738 }
8739 if (field_off != val + reg->off) {
8740 verbose(env, "off %lld doesn't point to 'struct %s' that is at %d\n",
8741 val + reg->off, struct_name, field_off);
8742 return -EINVAL;
8743 }
8744 if (map_desc->ptr) {
8745 verifier_bug(env, "Two map pointers in a %s helper", struct_name);
8746 return -EFAULT;
8747 }
8748 map_desc->uid = reg->map_uid;
8749 map_desc->ptr = map;
8750 return 0;
8751 }
8752
process_timer_func(struct bpf_verifier_env * env,int regno,struct bpf_map_desc * map)8753 static int process_timer_func(struct bpf_verifier_env *env, int regno,
8754 struct bpf_map_desc *map)
8755 {
8756 if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
8757 verbose(env, "bpf_timer cannot be used for PREEMPT_RT.\n");
8758 return -EOPNOTSUPP;
8759 }
8760 return check_map_field_pointer(env, regno, BPF_TIMER, map);
8761 }
8762
process_timer_helper(struct bpf_verifier_env * env,int regno,struct bpf_call_arg_meta * meta)8763 static int process_timer_helper(struct bpf_verifier_env *env, int regno,
8764 struct bpf_call_arg_meta *meta)
8765 {
8766 return process_timer_func(env, regno, &meta->map);
8767 }
8768
process_timer_kfunc(struct bpf_verifier_env * env,int regno,struct bpf_kfunc_call_arg_meta * meta)8769 static int process_timer_kfunc(struct bpf_verifier_env *env, int regno,
8770 struct bpf_kfunc_call_arg_meta *meta)
8771 {
8772 return process_timer_func(env, regno, &meta->map);
8773 }
8774
process_kptr_func(struct bpf_verifier_env * env,int regno,struct bpf_call_arg_meta * meta)8775 static int process_kptr_func(struct bpf_verifier_env *env, int regno,
8776 struct bpf_call_arg_meta *meta)
8777 {
8778 struct bpf_reg_state *reg = reg_state(env, regno);
8779 struct btf_field *kptr_field;
8780 struct bpf_map *map_ptr;
8781 struct btf_record *rec;
8782 u32 kptr_off;
8783
8784 if (type_is_ptr_alloc_obj(reg->type)) {
8785 rec = reg_btf_record(reg);
8786 } else { /* PTR_TO_MAP_VALUE */
8787 map_ptr = reg->map_ptr;
8788 if (!map_ptr->btf) {
8789 verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n",
8790 map_ptr->name);
8791 return -EINVAL;
8792 }
8793 rec = map_ptr->record;
8794 meta->map.ptr = map_ptr;
8795 }
8796
8797 if (!tnum_is_const(reg->var_off)) {
8798 verbose(env,
8799 "R%d doesn't have constant offset. kptr has to be at the constant offset\n",
8800 regno);
8801 return -EINVAL;
8802 }
8803
8804 if (!btf_record_has_field(rec, BPF_KPTR)) {
8805 verbose(env, "R%d has no valid kptr\n", regno);
8806 return -EINVAL;
8807 }
8808
8809 kptr_off = reg->off + reg->var_off.value;
8810 kptr_field = btf_record_find(rec, kptr_off, BPF_KPTR);
8811 if (!kptr_field) {
8812 verbose(env, "off=%d doesn't point to kptr\n", kptr_off);
8813 return -EACCES;
8814 }
8815 if (kptr_field->type != BPF_KPTR_REF && kptr_field->type != BPF_KPTR_PERCPU) {
8816 verbose(env, "off=%d kptr isn't referenced kptr\n", kptr_off);
8817 return -EACCES;
8818 }
8819 meta->kptr_field = kptr_field;
8820 return 0;
8821 }
8822
8823 /* There are two register types representing a bpf_dynptr, one is PTR_TO_STACK
8824 * which points to a stack slot, and the other is CONST_PTR_TO_DYNPTR.
8825 *
8826 * In both cases we deal with the first 8 bytes, but need to mark the next 8
8827 * bytes as STACK_DYNPTR in case of PTR_TO_STACK. In case of
8828 * CONST_PTR_TO_DYNPTR, we are guaranteed to get the beginning of the object.
8829 *
8830 * Mutability of bpf_dynptr is at two levels, one is at the level of struct
8831 * bpf_dynptr itself, i.e. whether the helper is receiving a pointer to struct
8832 * bpf_dynptr or pointer to const struct bpf_dynptr. In the former case, it can
8833 * mutate the view of the dynptr and also possibly destroy it. In the latter
8834 * case, it cannot mutate the bpf_dynptr itself but it can still mutate the
8835 * memory that dynptr points to.
8836 *
8837 * The verifier will keep track both levels of mutation (bpf_dynptr's in
8838 * reg->type and the memory's in reg->dynptr.type), but there is no support for
8839 * readonly dynptr view yet, hence only the first case is tracked and checked.
8840 *
8841 * This is consistent with how C applies the const modifier to a struct object,
8842 * where the pointer itself inside bpf_dynptr becomes const but not what it
8843 * points to.
8844 *
8845 * Helpers which do not mutate the bpf_dynptr set MEM_RDONLY in their argument
8846 * type, and declare it as 'const struct bpf_dynptr *' in their prototype.
8847 */
process_dynptr_func(struct bpf_verifier_env * env,int regno,int insn_idx,enum bpf_arg_type arg_type,int clone_ref_obj_id)8848 static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn_idx,
8849 enum bpf_arg_type arg_type, int clone_ref_obj_id)
8850 {
8851 struct bpf_reg_state *reg = reg_state(env, regno);
8852 int err;
8853
8854 if (reg->type != PTR_TO_STACK && reg->type != CONST_PTR_TO_DYNPTR) {
8855 verbose(env,
8856 "arg#%d expected pointer to stack or const struct bpf_dynptr\n",
8857 regno - 1);
8858 return -EINVAL;
8859 }
8860
8861 /* MEM_UNINIT and MEM_RDONLY are exclusive, when applied to an
8862 * ARG_PTR_TO_DYNPTR (or ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_*):
8863 */
8864 if ((arg_type & (MEM_UNINIT | MEM_RDONLY)) == (MEM_UNINIT | MEM_RDONLY)) {
8865 verifier_bug(env, "misconfigured dynptr helper type flags");
8866 return -EFAULT;
8867 }
8868
8869 /* MEM_UNINIT - Points to memory that is an appropriate candidate for
8870 * constructing a mutable bpf_dynptr object.
8871 *
8872 * Currently, this is only possible with PTR_TO_STACK
8873 * pointing to a region of at least 16 bytes which doesn't
8874 * contain an existing bpf_dynptr.
8875 *
8876 * MEM_RDONLY - Points to a initialized bpf_dynptr that will not be
8877 * mutated or destroyed. However, the memory it points to
8878 * may be mutated.
8879 *
8880 * None - Points to a initialized dynptr that can be mutated and
8881 * destroyed, including mutation of the memory it points
8882 * to.
8883 */
8884 if (arg_type & MEM_UNINIT) {
8885 int i;
8886
8887 if (!is_dynptr_reg_valid_uninit(env, reg)) {
8888 verbose(env, "Dynptr has to be an uninitialized dynptr\n");
8889 return -EINVAL;
8890 }
8891
8892 /* we write BPF_DW bits (8 bytes) at a time */
8893 for (i = 0; i < BPF_DYNPTR_SIZE; i += 8) {
8894 err = check_mem_access(env, insn_idx, regno,
8895 i, BPF_DW, BPF_WRITE, -1, false, false);
8896 if (err)
8897 return err;
8898 }
8899
8900 err = mark_stack_slots_dynptr(env, reg, arg_type, insn_idx, clone_ref_obj_id);
8901 } else /* MEM_RDONLY and None case from above */ {
8902 /* For the reg->type == PTR_TO_STACK case, bpf_dynptr is never const */
8903 if (reg->type == CONST_PTR_TO_DYNPTR && !(arg_type & MEM_RDONLY)) {
8904 verbose(env, "cannot pass pointer to const bpf_dynptr, the helper mutates it\n");
8905 return -EINVAL;
8906 }
8907
8908 if (!is_dynptr_reg_valid_init(env, reg)) {
8909 verbose(env,
8910 "Expected an initialized dynptr as arg #%d\n",
8911 regno - 1);
8912 return -EINVAL;
8913 }
8914
8915 /* Fold modifiers (in this case, MEM_RDONLY) when checking expected type */
8916 if (!is_dynptr_type_expected(env, reg, arg_type & ~MEM_RDONLY)) {
8917 verbose(env,
8918 "Expected a dynptr of type %s as arg #%d\n",
8919 dynptr_type_str(arg_to_dynptr_type(arg_type)), regno - 1);
8920 return -EINVAL;
8921 }
8922
8923 err = mark_dynptr_read(env, reg);
8924 }
8925 return err;
8926 }
8927
iter_ref_obj_id(struct bpf_verifier_env * env,struct bpf_reg_state * reg,int spi)8928 static u32 iter_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int spi)
8929 {
8930 struct bpf_func_state *state = func(env, reg);
8931
8932 return state->stack[spi].spilled_ptr.ref_obj_id;
8933 }
8934
is_iter_kfunc(struct bpf_kfunc_call_arg_meta * meta)8935 static bool is_iter_kfunc(struct bpf_kfunc_call_arg_meta *meta)
8936 {
8937 return meta->kfunc_flags & (KF_ITER_NEW | KF_ITER_NEXT | KF_ITER_DESTROY);
8938 }
8939
is_iter_new_kfunc(struct bpf_kfunc_call_arg_meta * meta)8940 static bool is_iter_new_kfunc(struct bpf_kfunc_call_arg_meta *meta)
8941 {
8942 return meta->kfunc_flags & KF_ITER_NEW;
8943 }
8944
is_iter_next_kfunc(struct bpf_kfunc_call_arg_meta * meta)8945 static bool is_iter_next_kfunc(struct bpf_kfunc_call_arg_meta *meta)
8946 {
8947 return meta->kfunc_flags & KF_ITER_NEXT;
8948 }
8949
is_iter_destroy_kfunc(struct bpf_kfunc_call_arg_meta * meta)8950 static bool is_iter_destroy_kfunc(struct bpf_kfunc_call_arg_meta *meta)
8951 {
8952 return meta->kfunc_flags & KF_ITER_DESTROY;
8953 }
8954
is_kfunc_arg_iter(struct bpf_kfunc_call_arg_meta * meta,int arg_idx,const struct btf_param * arg)8955 static bool is_kfunc_arg_iter(struct bpf_kfunc_call_arg_meta *meta, int arg_idx,
8956 const struct btf_param *arg)
8957 {
8958 /* btf_check_iter_kfuncs() guarantees that first argument of any iter
8959 * kfunc is iter state pointer
8960 */
8961 if (is_iter_kfunc(meta))
8962 return arg_idx == 0;
8963
8964 /* iter passed as an argument to a generic kfunc */
8965 return btf_param_match_suffix(meta->btf, arg, "__iter");
8966 }
8967
process_iter_arg(struct bpf_verifier_env * env,int regno,int insn_idx,struct bpf_kfunc_call_arg_meta * meta)8968 static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_idx,
8969 struct bpf_kfunc_call_arg_meta *meta)
8970 {
8971 struct bpf_reg_state *reg = reg_state(env, regno);
8972 const struct btf_type *t;
8973 int spi, err, i, nr_slots, btf_id;
8974
8975 if (reg->type != PTR_TO_STACK) {
8976 verbose(env, "arg#%d expected pointer to an iterator on stack\n", regno - 1);
8977 return -EINVAL;
8978 }
8979
8980 /* For iter_{new,next,destroy} functions, btf_check_iter_kfuncs()
8981 * ensures struct convention, so we wouldn't need to do any BTF
8982 * validation here. But given iter state can be passed as a parameter
8983 * to any kfunc, if arg has "__iter" suffix, we need to be a bit more
8984 * conservative here.
8985 */
8986 btf_id = btf_check_iter_arg(meta->btf, meta->func_proto, regno - 1);
8987 if (btf_id < 0) {
8988 verbose(env, "expected valid iter pointer as arg #%d\n", regno - 1);
8989 return -EINVAL;
8990 }
8991 t = btf_type_by_id(meta->btf, btf_id);
8992 nr_slots = t->size / BPF_REG_SIZE;
8993
8994 if (is_iter_new_kfunc(meta)) {
8995 /* bpf_iter_<type>_new() expects pointer to uninit iter state */
8996 if (!is_iter_reg_valid_uninit(env, reg, nr_slots)) {
8997 verbose(env, "expected uninitialized iter_%s as arg #%d\n",
8998 iter_type_str(meta->btf, btf_id), regno - 1);
8999 return -EINVAL;
9000 }
9001
9002 for (i = 0; i < nr_slots * 8; i += BPF_REG_SIZE) {
9003 err = check_mem_access(env, insn_idx, regno,
9004 i, BPF_DW, BPF_WRITE, -1, false, false);
9005 if (err)
9006 return err;
9007 }
9008
9009 err = mark_stack_slots_iter(env, meta, reg, insn_idx, meta->btf, btf_id, nr_slots);
9010 if (err)
9011 return err;
9012 } else {
9013 /* iter_next() or iter_destroy(), as well as any kfunc
9014 * accepting iter argument, expect initialized iter state
9015 */
9016 err = is_iter_reg_valid_init(env, reg, meta->btf, btf_id, nr_slots);
9017 switch (err) {
9018 case 0:
9019 break;
9020 case -EINVAL:
9021 verbose(env, "expected an initialized iter_%s as arg #%d\n",
9022 iter_type_str(meta->btf, btf_id), regno - 1);
9023 return err;
9024 case -EPROTO:
9025 verbose(env, "expected an RCU CS when using %s\n", meta->func_name);
9026 return err;
9027 default:
9028 return err;
9029 }
9030
9031 spi = iter_get_spi(env, reg, nr_slots);
9032 if (spi < 0)
9033 return spi;
9034
9035 err = mark_iter_read(env, reg, spi, nr_slots);
9036 if (err)
9037 return err;
9038
9039 /* remember meta->iter info for process_iter_next_call() */
9040 meta->iter.spi = spi;
9041 meta->iter.frameno = reg->frameno;
9042 meta->ref_obj_id = iter_ref_obj_id(env, reg, spi);
9043
9044 if (is_iter_destroy_kfunc(meta)) {
9045 err = unmark_stack_slots_iter(env, reg, nr_slots);
9046 if (err)
9047 return err;
9048 }
9049 }
9050
9051 return 0;
9052 }
9053
9054 /* Look for a previous loop entry at insn_idx: nearest parent state
9055 * stopped at insn_idx with callsites matching those in cur->frame.
9056 */
find_prev_entry(struct bpf_verifier_env * env,struct bpf_verifier_state * cur,int insn_idx)9057 static struct bpf_verifier_state *find_prev_entry(struct bpf_verifier_env *env,
9058 struct bpf_verifier_state *cur,
9059 int insn_idx)
9060 {
9061 struct bpf_verifier_state_list *sl;
9062 struct bpf_verifier_state *st;
9063 struct list_head *pos, *head;
9064
9065 /* Explored states are pushed in stack order, most recent states come first */
9066 head = explored_state(env, insn_idx);
9067 list_for_each(pos, head) {
9068 sl = container_of(pos, struct bpf_verifier_state_list, node);
9069 /* If st->branches != 0 state is a part of current DFS verification path,
9070 * hence cur & st for a loop.
9071 */
9072 st = &sl->state;
9073 if (st->insn_idx == insn_idx && st->branches && same_callsites(st, cur) &&
9074 st->dfs_depth < cur->dfs_depth)
9075 return st;
9076 }
9077
9078 return NULL;
9079 }
9080
9081 static void reset_idmap_scratch(struct bpf_verifier_env *env);
9082 static bool regs_exact(const struct bpf_reg_state *rold,
9083 const struct bpf_reg_state *rcur,
9084 struct bpf_idmap *idmap);
9085
9086 /*
9087 * Check if scalar registers are exact for the purpose of not widening.
9088 * More lenient than regs_exact()
9089 */
scalars_exact_for_widen(const struct bpf_reg_state * rold,const struct bpf_reg_state * rcur)9090 static bool scalars_exact_for_widen(const struct bpf_reg_state *rold,
9091 const struct bpf_reg_state *rcur)
9092 {
9093 return !memcmp(rold, rcur, offsetof(struct bpf_reg_state, id));
9094 }
9095
maybe_widen_reg(struct bpf_verifier_env * env,struct bpf_reg_state * rold,struct bpf_reg_state * rcur)9096 static void maybe_widen_reg(struct bpf_verifier_env *env,
9097 struct bpf_reg_state *rold, struct bpf_reg_state *rcur)
9098 {
9099 if (rold->type != SCALAR_VALUE)
9100 return;
9101 if (rold->type != rcur->type)
9102 return;
9103 if (rold->precise || rcur->precise || scalars_exact_for_widen(rold, rcur))
9104 return;
9105 __mark_reg_unknown(env, rcur);
9106 }
9107
widen_imprecise_scalars(struct bpf_verifier_env * env,struct bpf_verifier_state * old,struct bpf_verifier_state * cur)9108 static int widen_imprecise_scalars(struct bpf_verifier_env *env,
9109 struct bpf_verifier_state *old,
9110 struct bpf_verifier_state *cur)
9111 {
9112 struct bpf_func_state *fold, *fcur;
9113 int i, fr, num_slots;
9114
9115 for (fr = old->curframe; fr >= 0; fr--) {
9116 fold = old->frame[fr];
9117 fcur = cur->frame[fr];
9118
9119 for (i = 0; i < MAX_BPF_REG; i++)
9120 maybe_widen_reg(env,
9121 &fold->regs[i],
9122 &fcur->regs[i]);
9123
9124 num_slots = min(fold->allocated_stack / BPF_REG_SIZE,
9125 fcur->allocated_stack / BPF_REG_SIZE);
9126 for (i = 0; i < num_slots; i++) {
9127 if (!is_spilled_reg(&fold->stack[i]) ||
9128 !is_spilled_reg(&fcur->stack[i]))
9129 continue;
9130
9131 maybe_widen_reg(env,
9132 &fold->stack[i].spilled_ptr,
9133 &fcur->stack[i].spilled_ptr);
9134 }
9135 }
9136 return 0;
9137 }
9138
get_iter_from_state(struct bpf_verifier_state * cur_st,struct bpf_kfunc_call_arg_meta * meta)9139 static struct bpf_reg_state *get_iter_from_state(struct bpf_verifier_state *cur_st,
9140 struct bpf_kfunc_call_arg_meta *meta)
9141 {
9142 int iter_frameno = meta->iter.frameno;
9143 int iter_spi = meta->iter.spi;
9144
9145 return &cur_st->frame[iter_frameno]->stack[iter_spi].spilled_ptr;
9146 }
9147
9148 /* process_iter_next_call() is called when verifier gets to iterator's next
9149 * "method" (e.g., bpf_iter_num_next() for numbers iterator) call. We'll refer
9150 * to it as just "iter_next()" in comments below.
9151 *
9152 * BPF verifier relies on a crucial contract for any iter_next()
9153 * implementation: it should *eventually* return NULL, and once that happens
9154 * it should keep returning NULL. That is, once iterator exhausts elements to
9155 * iterate, it should never reset or spuriously return new elements.
9156 *
9157 * With the assumption of such contract, process_iter_next_call() simulates
9158 * a fork in the verifier state to validate loop logic correctness and safety
9159 * without having to simulate infinite amount of iterations.
9160 *
9161 * In current state, we first assume that iter_next() returned NULL and
9162 * iterator state is set to DRAINED (BPF_ITER_STATE_DRAINED). In such
9163 * conditions we should not form an infinite loop and should eventually reach
9164 * exit.
9165 *
9166 * Besides that, we also fork current state and enqueue it for later
9167 * verification. In a forked state we keep iterator state as ACTIVE
9168 * (BPF_ITER_STATE_ACTIVE) and assume non-NULL return from iter_next(). We
9169 * also bump iteration depth to prevent erroneous infinite loop detection
9170 * later on (see iter_active_depths_differ() comment for details). In this
9171 * state we assume that we'll eventually loop back to another iter_next()
9172 * calls (it could be in exactly same location or in some other instruction,
9173 * it doesn't matter, we don't make any unnecessary assumptions about this,
9174 * everything revolves around iterator state in a stack slot, not which
9175 * instruction is calling iter_next()). When that happens, we either will come
9176 * to iter_next() with equivalent state and can conclude that next iteration
9177 * will proceed in exactly the same way as we just verified, so it's safe to
9178 * assume that loop converges. If not, we'll go on another iteration
9179 * simulation with a different input state, until all possible starting states
9180 * are validated or we reach maximum number of instructions limit.
9181 *
9182 * This way, we will either exhaustively discover all possible input states
9183 * that iterator loop can start with and eventually will converge, or we'll
9184 * effectively regress into bounded loop simulation logic and either reach
9185 * maximum number of instructions if loop is not provably convergent, or there
9186 * is some statically known limit on number of iterations (e.g., if there is
9187 * an explicit `if n > 100 then break;` statement somewhere in the loop).
9188 *
9189 * Iteration convergence logic in is_state_visited() relies on exact
9190 * states comparison, which ignores read and precision marks.
9191 * This is necessary because read and precision marks are not finalized
9192 * while in the loop. Exact comparison might preclude convergence for
9193 * simple programs like below:
9194 *
9195 * i = 0;
9196 * while(iter_next(&it))
9197 * i++;
9198 *
9199 * At each iteration step i++ would produce a new distinct state and
9200 * eventually instruction processing limit would be reached.
9201 *
9202 * To avoid such behavior speculatively forget (widen) range for
9203 * imprecise scalar registers, if those registers were not precise at the
9204 * end of the previous iteration and do not match exactly.
9205 *
9206 * This is a conservative heuristic that allows to verify wide range of programs,
9207 * however it precludes verification of programs that conjure an
9208 * imprecise value on the first loop iteration and use it as precise on a second.
9209 * For example, the following safe program would fail to verify:
9210 *
9211 * struct bpf_num_iter it;
9212 * int arr[10];
9213 * int i = 0, a = 0;
9214 * bpf_iter_num_new(&it, 0, 10);
9215 * while (bpf_iter_num_next(&it)) {
9216 * if (a == 0) {
9217 * a = 1;
9218 * i = 7; // Because i changed verifier would forget
9219 * // it's range on second loop entry.
9220 * } else {
9221 * arr[i] = 42; // This would fail to verify.
9222 * }
9223 * }
9224 * bpf_iter_num_destroy(&it);
9225 */
process_iter_next_call(struct bpf_verifier_env * env,int insn_idx,struct bpf_kfunc_call_arg_meta * meta)9226 static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx,
9227 struct bpf_kfunc_call_arg_meta *meta)
9228 {
9229 struct bpf_verifier_state *cur_st = env->cur_state, *queued_st, *prev_st;
9230 struct bpf_func_state *cur_fr = cur_st->frame[cur_st->curframe], *queued_fr;
9231 struct bpf_reg_state *cur_iter, *queued_iter;
9232
9233 BTF_TYPE_EMIT(struct bpf_iter);
9234
9235 cur_iter = get_iter_from_state(cur_st, meta);
9236
9237 if (cur_iter->iter.state != BPF_ITER_STATE_ACTIVE &&
9238 cur_iter->iter.state != BPF_ITER_STATE_DRAINED) {
9239 verifier_bug(env, "unexpected iterator state %d (%s)",
9240 cur_iter->iter.state, iter_state_str(cur_iter->iter.state));
9241 return -EFAULT;
9242 }
9243
9244 if (cur_iter->iter.state == BPF_ITER_STATE_ACTIVE) {
9245 /* Because iter_next() call is a checkpoint is_state_visitied()
9246 * should guarantee parent state with same call sites and insn_idx.
9247 */
9248 if (!cur_st->parent || cur_st->parent->insn_idx != insn_idx ||
9249 !same_callsites(cur_st->parent, cur_st)) {
9250 verifier_bug(env, "bad parent state for iter next call");
9251 return -EFAULT;
9252 }
9253 /* Note cur_st->parent in the call below, it is necessary to skip
9254 * checkpoint created for cur_st by is_state_visited()
9255 * right at this instruction.
9256 */
9257 prev_st = find_prev_entry(env, cur_st->parent, insn_idx);
9258 /* branch out active iter state */
9259 queued_st = push_stack(env, insn_idx + 1, insn_idx, false);
9260 if (IS_ERR(queued_st))
9261 return PTR_ERR(queued_st);
9262
9263 queued_iter = get_iter_from_state(queued_st, meta);
9264 queued_iter->iter.state = BPF_ITER_STATE_ACTIVE;
9265 queued_iter->iter.depth++;
9266 if (prev_st)
9267 widen_imprecise_scalars(env, prev_st, queued_st);
9268
9269 queued_fr = queued_st->frame[queued_st->curframe];
9270 mark_ptr_not_null_reg(&queued_fr->regs[BPF_REG_0]);
9271 }
9272
9273 /* switch to DRAINED state, but keep the depth unchanged */
9274 /* mark current iter state as drained and assume returned NULL */
9275 cur_iter->iter.state = BPF_ITER_STATE_DRAINED;
9276 __mark_reg_const_zero(env, &cur_fr->regs[BPF_REG_0]);
9277
9278 return 0;
9279 }
9280
arg_type_is_mem_size(enum bpf_arg_type type)9281 static bool arg_type_is_mem_size(enum bpf_arg_type type)
9282 {
9283 return type == ARG_CONST_SIZE ||
9284 type == ARG_CONST_SIZE_OR_ZERO;
9285 }
9286
arg_type_is_raw_mem(enum bpf_arg_type type)9287 static bool arg_type_is_raw_mem(enum bpf_arg_type type)
9288 {
9289 return base_type(type) == ARG_PTR_TO_MEM &&
9290 type & MEM_UNINIT;
9291 }
9292
arg_type_is_release(enum bpf_arg_type type)9293 static bool arg_type_is_release(enum bpf_arg_type type)
9294 {
9295 return type & OBJ_RELEASE;
9296 }
9297
arg_type_is_dynptr(enum bpf_arg_type type)9298 static bool arg_type_is_dynptr(enum bpf_arg_type type)
9299 {
9300 return base_type(type) == ARG_PTR_TO_DYNPTR;
9301 }
9302
resolve_map_arg_type(struct bpf_verifier_env * env,const struct bpf_call_arg_meta * meta,enum bpf_arg_type * arg_type)9303 static int resolve_map_arg_type(struct bpf_verifier_env *env,
9304 const struct bpf_call_arg_meta *meta,
9305 enum bpf_arg_type *arg_type)
9306 {
9307 if (!meta->map.ptr) {
9308 /* kernel subsystem misconfigured verifier */
9309 verifier_bug(env, "invalid map_ptr to access map->type");
9310 return -EFAULT;
9311 }
9312
9313 switch (meta->map.ptr->map_type) {
9314 case BPF_MAP_TYPE_SOCKMAP:
9315 case BPF_MAP_TYPE_SOCKHASH:
9316 if (*arg_type == ARG_PTR_TO_MAP_VALUE) {
9317 *arg_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON;
9318 } else {
9319 verbose(env, "invalid arg_type for sockmap/sockhash\n");
9320 return -EINVAL;
9321 }
9322 break;
9323 case BPF_MAP_TYPE_BLOOM_FILTER:
9324 if (meta->func_id == BPF_FUNC_map_peek_elem)
9325 *arg_type = ARG_PTR_TO_MAP_VALUE;
9326 break;
9327 default:
9328 break;
9329 }
9330 return 0;
9331 }
9332
9333 struct bpf_reg_types {
9334 const enum bpf_reg_type types[10];
9335 u32 *btf_id;
9336 };
9337
9338 static const struct bpf_reg_types sock_types = {
9339 .types = {
9340 PTR_TO_SOCK_COMMON,
9341 PTR_TO_SOCKET,
9342 PTR_TO_TCP_SOCK,
9343 PTR_TO_XDP_SOCK,
9344 },
9345 };
9346
9347 #ifdef CONFIG_NET
9348 static const struct bpf_reg_types btf_id_sock_common_types = {
9349 .types = {
9350 PTR_TO_SOCK_COMMON,
9351 PTR_TO_SOCKET,
9352 PTR_TO_TCP_SOCK,
9353 PTR_TO_XDP_SOCK,
9354 PTR_TO_BTF_ID,
9355 PTR_TO_BTF_ID | PTR_TRUSTED,
9356 },
9357 .btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
9358 };
9359 #endif
9360
9361 static const struct bpf_reg_types mem_types = {
9362 .types = {
9363 PTR_TO_STACK,
9364 PTR_TO_PACKET,
9365 PTR_TO_PACKET_META,
9366 PTR_TO_MAP_KEY,
9367 PTR_TO_MAP_VALUE,
9368 PTR_TO_MEM,
9369 PTR_TO_MEM | MEM_RINGBUF,
9370 PTR_TO_BUF,
9371 PTR_TO_BTF_ID | PTR_TRUSTED,
9372 },
9373 };
9374
9375 static const struct bpf_reg_types spin_lock_types = {
9376 .types = {
9377 PTR_TO_MAP_VALUE,
9378 PTR_TO_BTF_ID | MEM_ALLOC,
9379 }
9380 };
9381
9382 static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
9383 static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
9384 static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
9385 static const struct bpf_reg_types ringbuf_mem_types = { .types = { PTR_TO_MEM | MEM_RINGBUF } };
9386 static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
9387 static const struct bpf_reg_types btf_ptr_types = {
9388 .types = {
9389 PTR_TO_BTF_ID,
9390 PTR_TO_BTF_ID | PTR_TRUSTED,
9391 PTR_TO_BTF_ID | MEM_RCU,
9392 },
9393 };
9394 static const struct bpf_reg_types percpu_btf_ptr_types = {
9395 .types = {
9396 PTR_TO_BTF_ID | MEM_PERCPU,
9397 PTR_TO_BTF_ID | MEM_PERCPU | MEM_RCU,
9398 PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED,
9399 }
9400 };
9401 static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
9402 static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
9403 static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
9404 static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } };
9405 static const struct bpf_reg_types kptr_xchg_dest_types = {
9406 .types = {
9407 PTR_TO_MAP_VALUE,
9408 PTR_TO_BTF_ID | MEM_ALLOC
9409 }
9410 };
9411 static const struct bpf_reg_types dynptr_types = {
9412 .types = {
9413 PTR_TO_STACK,
9414 CONST_PTR_TO_DYNPTR,
9415 }
9416 };
9417
9418 static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
9419 [ARG_PTR_TO_MAP_KEY] = &mem_types,
9420 [ARG_PTR_TO_MAP_VALUE] = &mem_types,
9421 [ARG_CONST_SIZE] = &scalar_types,
9422 [ARG_CONST_SIZE_OR_ZERO] = &scalar_types,
9423 [ARG_CONST_ALLOC_SIZE_OR_ZERO] = &scalar_types,
9424 [ARG_CONST_MAP_PTR] = &const_map_ptr_types,
9425 [ARG_PTR_TO_CTX] = &context_types,
9426 [ARG_PTR_TO_SOCK_COMMON] = &sock_types,
9427 #ifdef CONFIG_NET
9428 [ARG_PTR_TO_BTF_ID_SOCK_COMMON] = &btf_id_sock_common_types,
9429 #endif
9430 [ARG_PTR_TO_SOCKET] = &fullsock_types,
9431 [ARG_PTR_TO_BTF_ID] = &btf_ptr_types,
9432 [ARG_PTR_TO_SPIN_LOCK] = &spin_lock_types,
9433 [ARG_PTR_TO_MEM] = &mem_types,
9434 [ARG_PTR_TO_RINGBUF_MEM] = &ringbuf_mem_types,
9435 [ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types,
9436 [ARG_PTR_TO_FUNC] = &func_ptr_types,
9437 [ARG_PTR_TO_STACK] = &stack_ptr_types,
9438 [ARG_PTR_TO_CONST_STR] = &const_str_ptr_types,
9439 [ARG_PTR_TO_TIMER] = &timer_types,
9440 [ARG_KPTR_XCHG_DEST] = &kptr_xchg_dest_types,
9441 [ARG_PTR_TO_DYNPTR] = &dynptr_types,
9442 };
9443
check_reg_type(struct bpf_verifier_env * env,u32 regno,enum bpf_arg_type arg_type,const u32 * arg_btf_id,struct bpf_call_arg_meta * meta)9444 static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
9445 enum bpf_arg_type arg_type,
9446 const u32 *arg_btf_id,
9447 struct bpf_call_arg_meta *meta)
9448 {
9449 struct bpf_reg_state *reg = reg_state(env, regno);
9450 enum bpf_reg_type expected, type = reg->type;
9451 const struct bpf_reg_types *compatible;
9452 int i, j;
9453
9454 compatible = compatible_reg_types[base_type(arg_type)];
9455 if (!compatible) {
9456 verifier_bug(env, "unsupported arg type %d", arg_type);
9457 return -EFAULT;
9458 }
9459
9460 /* ARG_PTR_TO_MEM + RDONLY is compatible with PTR_TO_MEM and PTR_TO_MEM + RDONLY,
9461 * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM and NOT with PTR_TO_MEM + RDONLY
9462 *
9463 * Same for MAYBE_NULL:
9464 *
9465 * ARG_PTR_TO_MEM + MAYBE_NULL is compatible with PTR_TO_MEM and PTR_TO_MEM + MAYBE_NULL,
9466 * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM but NOT with PTR_TO_MEM + MAYBE_NULL
9467 *
9468 * ARG_PTR_TO_MEM is compatible with PTR_TO_MEM that is tagged with a dynptr type.
9469 *
9470 * Therefore we fold these flags depending on the arg_type before comparison.
9471 */
9472 if (arg_type & MEM_RDONLY)
9473 type &= ~MEM_RDONLY;
9474 if (arg_type & PTR_MAYBE_NULL)
9475 type &= ~PTR_MAYBE_NULL;
9476 if (base_type(arg_type) == ARG_PTR_TO_MEM)
9477 type &= ~DYNPTR_TYPE_FLAG_MASK;
9478
9479 /* Local kptr types are allowed as the source argument of bpf_kptr_xchg */
9480 if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type) && regno == BPF_REG_2) {
9481 type &= ~MEM_ALLOC;
9482 type &= ~MEM_PERCPU;
9483 }
9484
9485 for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
9486 expected = compatible->types[i];
9487 if (expected == NOT_INIT)
9488 break;
9489
9490 if (type == expected)
9491 goto found;
9492 }
9493
9494 verbose(env, "R%d type=%s expected=", regno, reg_type_str(env, reg->type));
9495 for (j = 0; j + 1 < i; j++)
9496 verbose(env, "%s, ", reg_type_str(env, compatible->types[j]));
9497 verbose(env, "%s\n", reg_type_str(env, compatible->types[j]));
9498 return -EACCES;
9499
9500 found:
9501 if (base_type(reg->type) != PTR_TO_BTF_ID)
9502 return 0;
9503
9504 if (compatible == &mem_types) {
9505 if (!(arg_type & MEM_RDONLY)) {
9506 verbose(env,
9507 "%s() may write into memory pointed by R%d type=%s\n",
9508 func_id_name(meta->func_id),
9509 regno, reg_type_str(env, reg->type));
9510 return -EACCES;
9511 }
9512 return 0;
9513 }
9514
9515 switch ((int)reg->type) {
9516 case PTR_TO_BTF_ID:
9517 case PTR_TO_BTF_ID | PTR_TRUSTED:
9518 case PTR_TO_BTF_ID | PTR_TRUSTED | PTR_MAYBE_NULL:
9519 case PTR_TO_BTF_ID | MEM_RCU:
9520 case PTR_TO_BTF_ID | PTR_MAYBE_NULL:
9521 case PTR_TO_BTF_ID | PTR_MAYBE_NULL | MEM_RCU:
9522 {
9523 /* For bpf_sk_release, it needs to match against first member
9524 * 'struct sock_common', hence make an exception for it. This
9525 * allows bpf_sk_release to work for multiple socket types.
9526 */
9527 bool strict_type_match = arg_type_is_release(arg_type) &&
9528 meta->func_id != BPF_FUNC_sk_release;
9529
9530 if (type_may_be_null(reg->type) &&
9531 (!type_may_be_null(arg_type) || arg_type_is_release(arg_type))) {
9532 verbose(env, "Possibly NULL pointer passed to helper arg%d\n", regno);
9533 return -EACCES;
9534 }
9535
9536 if (!arg_btf_id) {
9537 if (!compatible->btf_id) {
9538 verifier_bug(env, "missing arg compatible BTF ID");
9539 return -EFAULT;
9540 }
9541 arg_btf_id = compatible->btf_id;
9542 }
9543
9544 if (meta->func_id == BPF_FUNC_kptr_xchg) {
9545 if (map_kptr_match_type(env, meta->kptr_field, reg, regno))
9546 return -EACCES;
9547 } else {
9548 if (arg_btf_id == BPF_PTR_POISON) {
9549 verbose(env, "verifier internal error:");
9550 verbose(env, "R%d has non-overwritten BPF_PTR_POISON type\n",
9551 regno);
9552 return -EACCES;
9553 }
9554
9555 if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
9556 btf_vmlinux, *arg_btf_id,
9557 strict_type_match)) {
9558 verbose(env, "R%d is of type %s but %s is expected\n",
9559 regno, btf_type_name(reg->btf, reg->btf_id),
9560 btf_type_name(btf_vmlinux, *arg_btf_id));
9561 return -EACCES;
9562 }
9563 }
9564 break;
9565 }
9566 case PTR_TO_BTF_ID | MEM_ALLOC:
9567 case PTR_TO_BTF_ID | MEM_PERCPU | MEM_ALLOC:
9568 if (meta->func_id != BPF_FUNC_spin_lock && meta->func_id != BPF_FUNC_spin_unlock &&
9569 meta->func_id != BPF_FUNC_kptr_xchg) {
9570 verifier_bug(env, "unimplemented handling of MEM_ALLOC");
9571 return -EFAULT;
9572 }
9573 /* Check if local kptr in src arg matches kptr in dst arg */
9574 if (meta->func_id == BPF_FUNC_kptr_xchg && regno == BPF_REG_2) {
9575 if (map_kptr_match_type(env, meta->kptr_field, reg, regno))
9576 return -EACCES;
9577 }
9578 break;
9579 case PTR_TO_BTF_ID | MEM_PERCPU:
9580 case PTR_TO_BTF_ID | MEM_PERCPU | MEM_RCU:
9581 case PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED:
9582 /* Handled by helper specific checks */
9583 break;
9584 default:
9585 verifier_bug(env, "invalid PTR_TO_BTF_ID register for type match");
9586 return -EFAULT;
9587 }
9588 return 0;
9589 }
9590
9591 static struct btf_field *
reg_find_field_offset(const struct bpf_reg_state * reg,s32 off,u32 fields)9592 reg_find_field_offset(const struct bpf_reg_state *reg, s32 off, u32 fields)
9593 {
9594 struct btf_field *field;
9595 struct btf_record *rec;
9596
9597 rec = reg_btf_record(reg);
9598 if (!rec)
9599 return NULL;
9600
9601 field = btf_record_find(rec, off, fields);
9602 if (!field)
9603 return NULL;
9604
9605 return field;
9606 }
9607
check_func_arg_reg_off(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,int regno,enum bpf_arg_type arg_type)9608 static int check_func_arg_reg_off(struct bpf_verifier_env *env,
9609 const struct bpf_reg_state *reg, int regno,
9610 enum bpf_arg_type arg_type)
9611 {
9612 u32 type = reg->type;
9613
9614 /* When referenced register is passed to release function, its fixed
9615 * offset must be 0.
9616 *
9617 * We will check arg_type_is_release reg has ref_obj_id when storing
9618 * meta->release_regno.
9619 */
9620 if (arg_type_is_release(arg_type)) {
9621 /* ARG_PTR_TO_DYNPTR with OBJ_RELEASE is a bit special, as it
9622 * may not directly point to the object being released, but to
9623 * dynptr pointing to such object, which might be at some offset
9624 * on the stack. In that case, we simply to fallback to the
9625 * default handling.
9626 */
9627 if (arg_type_is_dynptr(arg_type) && type == PTR_TO_STACK)
9628 return 0;
9629
9630 /* Doing check_ptr_off_reg check for the offset will catch this
9631 * because fixed_off_ok is false, but checking here allows us
9632 * to give the user a better error message.
9633 */
9634 if (reg->off) {
9635 verbose(env, "R%d must have zero offset when passed to release func or trusted arg to kfunc\n",
9636 regno);
9637 return -EINVAL;
9638 }
9639 return __check_ptr_off_reg(env, reg, regno, false);
9640 }
9641
9642 switch (type) {
9643 /* Pointer types where both fixed and variable offset is explicitly allowed: */
9644 case PTR_TO_STACK:
9645 case PTR_TO_PACKET:
9646 case PTR_TO_PACKET_META:
9647 case PTR_TO_MAP_KEY:
9648 case PTR_TO_MAP_VALUE:
9649 case PTR_TO_MEM:
9650 case PTR_TO_MEM | MEM_RDONLY:
9651 case PTR_TO_MEM | MEM_RINGBUF:
9652 case PTR_TO_BUF:
9653 case PTR_TO_BUF | MEM_RDONLY:
9654 case PTR_TO_ARENA:
9655 case SCALAR_VALUE:
9656 return 0;
9657 /* All the rest must be rejected, except PTR_TO_BTF_ID which allows
9658 * fixed offset.
9659 */
9660 case PTR_TO_BTF_ID:
9661 case PTR_TO_BTF_ID | MEM_ALLOC:
9662 case PTR_TO_BTF_ID | PTR_TRUSTED:
9663 case PTR_TO_BTF_ID | MEM_RCU:
9664 case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF:
9665 case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF | MEM_RCU:
9666 /* When referenced PTR_TO_BTF_ID is passed to release function,
9667 * its fixed offset must be 0. In the other cases, fixed offset
9668 * can be non-zero. This was already checked above. So pass
9669 * fixed_off_ok as true to allow fixed offset for all other
9670 * cases. var_off always must be 0 for PTR_TO_BTF_ID, hence we
9671 * still need to do checks instead of returning.
9672 */
9673 return __check_ptr_off_reg(env, reg, regno, true);
9674 default:
9675 return __check_ptr_off_reg(env, reg, regno, false);
9676 }
9677 }
9678
get_dynptr_arg_reg(struct bpf_verifier_env * env,const struct bpf_func_proto * fn,struct bpf_reg_state * regs)9679 static struct bpf_reg_state *get_dynptr_arg_reg(struct bpf_verifier_env *env,
9680 const struct bpf_func_proto *fn,
9681 struct bpf_reg_state *regs)
9682 {
9683 struct bpf_reg_state *state = NULL;
9684 int i;
9685
9686 for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++)
9687 if (arg_type_is_dynptr(fn->arg_type[i])) {
9688 if (state) {
9689 verbose(env, "verifier internal error: multiple dynptr args\n");
9690 return NULL;
9691 }
9692 state = ®s[BPF_REG_1 + i];
9693 }
9694
9695 if (!state)
9696 verbose(env, "verifier internal error: no dynptr arg found\n");
9697
9698 return state;
9699 }
9700
dynptr_id(struct bpf_verifier_env * env,struct bpf_reg_state * reg)9701 static int dynptr_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
9702 {
9703 struct bpf_func_state *state = func(env, reg);
9704 int spi;
9705
9706 if (reg->type == CONST_PTR_TO_DYNPTR)
9707 return reg->id;
9708 spi = dynptr_get_spi(env, reg);
9709 if (spi < 0)
9710 return spi;
9711 return state->stack[spi].spilled_ptr.id;
9712 }
9713
dynptr_ref_obj_id(struct bpf_verifier_env * env,struct bpf_reg_state * reg)9714 static int dynptr_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
9715 {
9716 struct bpf_func_state *state = func(env, reg);
9717 int spi;
9718
9719 if (reg->type == CONST_PTR_TO_DYNPTR)
9720 return reg->ref_obj_id;
9721 spi = dynptr_get_spi(env, reg);
9722 if (spi < 0)
9723 return spi;
9724 return state->stack[spi].spilled_ptr.ref_obj_id;
9725 }
9726
dynptr_get_type(struct bpf_verifier_env * env,struct bpf_reg_state * reg)9727 static enum bpf_dynptr_type dynptr_get_type(struct bpf_verifier_env *env,
9728 struct bpf_reg_state *reg)
9729 {
9730 struct bpf_func_state *state = func(env, reg);
9731 int spi;
9732
9733 if (reg->type == CONST_PTR_TO_DYNPTR)
9734 return reg->dynptr.type;
9735
9736 spi = __get_spi(reg->off);
9737 if (spi < 0) {
9738 verbose(env, "verifier internal error: invalid spi when querying dynptr type\n");
9739 return BPF_DYNPTR_TYPE_INVALID;
9740 }
9741
9742 return state->stack[spi].spilled_ptr.dynptr.type;
9743 }
9744
check_reg_const_str(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno)9745 static int check_reg_const_str(struct bpf_verifier_env *env,
9746 struct bpf_reg_state *reg, u32 regno)
9747 {
9748 struct bpf_map *map = reg->map_ptr;
9749 int err;
9750 int map_off;
9751 u64 map_addr;
9752 char *str_ptr;
9753
9754 if (reg->type != PTR_TO_MAP_VALUE)
9755 return -EINVAL;
9756
9757 if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY) {
9758 verbose(env, "R%d points to insn_array map which cannot be used as const string\n", regno);
9759 return -EACCES;
9760 }
9761
9762 if (!bpf_map_is_rdonly(map)) {
9763 verbose(env, "R%d does not point to a readonly map'\n", regno);
9764 return -EACCES;
9765 }
9766
9767 if (!tnum_is_const(reg->var_off)) {
9768 verbose(env, "R%d is not a constant address'\n", regno);
9769 return -EACCES;
9770 }
9771
9772 if (!map->ops->map_direct_value_addr) {
9773 verbose(env, "no direct value access support for this map type\n");
9774 return -EACCES;
9775 }
9776
9777 err = check_map_access(env, regno, reg->off,
9778 map->value_size - reg->off, false,
9779 ACCESS_HELPER);
9780 if (err)
9781 return err;
9782
9783 map_off = reg->off + reg->var_off.value;
9784 err = map->ops->map_direct_value_addr(map, &map_addr, map_off);
9785 if (err) {
9786 verbose(env, "direct value access on string failed\n");
9787 return err;
9788 }
9789
9790 str_ptr = (char *)(long)(map_addr);
9791 if (!strnchr(str_ptr + map_off, map->value_size - map_off, 0)) {
9792 verbose(env, "string is not zero-terminated\n");
9793 return -EINVAL;
9794 }
9795 return 0;
9796 }
9797
9798 /* Returns constant key value in `value` if possible, else negative error */
get_constant_map_key(struct bpf_verifier_env * env,struct bpf_reg_state * key,u32 key_size,s64 * value)9799 static int get_constant_map_key(struct bpf_verifier_env *env,
9800 struct bpf_reg_state *key,
9801 u32 key_size,
9802 s64 *value)
9803 {
9804 struct bpf_func_state *state = func(env, key);
9805 struct bpf_reg_state *reg;
9806 int slot, spi, off;
9807 int spill_size = 0;
9808 int zero_size = 0;
9809 int stack_off;
9810 int i, err;
9811 u8 *stype;
9812
9813 if (!env->bpf_capable)
9814 return -EOPNOTSUPP;
9815 if (key->type != PTR_TO_STACK)
9816 return -EOPNOTSUPP;
9817 if (!tnum_is_const(key->var_off))
9818 return -EOPNOTSUPP;
9819
9820 stack_off = key->off + key->var_off.value;
9821 slot = -stack_off - 1;
9822 spi = slot / BPF_REG_SIZE;
9823 off = slot % BPF_REG_SIZE;
9824 stype = state->stack[spi].slot_type;
9825
9826 /* First handle precisely tracked STACK_ZERO */
9827 for (i = off; i >= 0 && stype[i] == STACK_ZERO; i--)
9828 zero_size++;
9829 if (zero_size >= key_size) {
9830 *value = 0;
9831 return 0;
9832 }
9833
9834 /* Check that stack contains a scalar spill of expected size */
9835 if (!is_spilled_scalar_reg(&state->stack[spi]))
9836 return -EOPNOTSUPP;
9837 for (i = off; i >= 0 && stype[i] == STACK_SPILL; i--)
9838 spill_size++;
9839 if (spill_size != key_size)
9840 return -EOPNOTSUPP;
9841
9842 reg = &state->stack[spi].spilled_ptr;
9843 if (!tnum_is_const(reg->var_off))
9844 /* Stack value not statically known */
9845 return -EOPNOTSUPP;
9846
9847 /* We are relying on a constant value. So mark as precise
9848 * to prevent pruning on it.
9849 */
9850 bt_set_frame_slot(&env->bt, key->frameno, spi);
9851 err = mark_chain_precision_batch(env, env->cur_state);
9852 if (err < 0)
9853 return err;
9854
9855 *value = reg->var_off.value;
9856 return 0;
9857 }
9858
9859 static bool can_elide_value_nullness(enum bpf_map_type type);
9860
check_func_arg(struct bpf_verifier_env * env,u32 arg,struct bpf_call_arg_meta * meta,const struct bpf_func_proto * fn,int insn_idx)9861 static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
9862 struct bpf_call_arg_meta *meta,
9863 const struct bpf_func_proto *fn,
9864 int insn_idx)
9865 {
9866 u32 regno = BPF_REG_1 + arg;
9867 struct bpf_reg_state *reg = reg_state(env, regno);
9868 enum bpf_arg_type arg_type = fn->arg_type[arg];
9869 enum bpf_reg_type type = reg->type;
9870 u32 *arg_btf_id = NULL;
9871 u32 key_size;
9872 int err = 0;
9873
9874 if (arg_type == ARG_DONTCARE)
9875 return 0;
9876
9877 err = check_reg_arg(env, regno, SRC_OP);
9878 if (err)
9879 return err;
9880
9881 if (arg_type == ARG_ANYTHING) {
9882 if (is_pointer_value(env, regno)) {
9883 verbose(env, "R%d leaks addr into helper function\n",
9884 regno);
9885 return -EACCES;
9886 }
9887 return 0;
9888 }
9889
9890 if (type_is_pkt_pointer(type) &&
9891 !may_access_direct_pkt_data(env, meta, BPF_READ)) {
9892 verbose(env, "helper access to the packet is not allowed\n");
9893 return -EACCES;
9894 }
9895
9896 if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE) {
9897 err = resolve_map_arg_type(env, meta, &arg_type);
9898 if (err)
9899 return err;
9900 }
9901
9902 if (register_is_null(reg) && type_may_be_null(arg_type))
9903 /* A NULL register has a SCALAR_VALUE type, so skip
9904 * type checking.
9905 */
9906 goto skip_type_check;
9907
9908 /* arg_btf_id and arg_size are in a union. */
9909 if (base_type(arg_type) == ARG_PTR_TO_BTF_ID ||
9910 base_type(arg_type) == ARG_PTR_TO_SPIN_LOCK)
9911 arg_btf_id = fn->arg_btf_id[arg];
9912
9913 err = check_reg_type(env, regno, arg_type, arg_btf_id, meta);
9914 if (err)
9915 return err;
9916
9917 err = check_func_arg_reg_off(env, reg, regno, arg_type);
9918 if (err)
9919 return err;
9920
9921 skip_type_check:
9922 if (arg_type_is_release(arg_type)) {
9923 if (arg_type_is_dynptr(arg_type)) {
9924 struct bpf_func_state *state = func(env, reg);
9925 int spi;
9926
9927 /* Only dynptr created on stack can be released, thus
9928 * the get_spi and stack state checks for spilled_ptr
9929 * should only be done before process_dynptr_func for
9930 * PTR_TO_STACK.
9931 */
9932 if (reg->type == PTR_TO_STACK) {
9933 spi = dynptr_get_spi(env, reg);
9934 if (spi < 0 || !state->stack[spi].spilled_ptr.ref_obj_id) {
9935 verbose(env, "arg %d is an unacquired reference\n", regno);
9936 return -EINVAL;
9937 }
9938 } else {
9939 verbose(env, "cannot release unowned const bpf_dynptr\n");
9940 return -EINVAL;
9941 }
9942 } else if (!reg->ref_obj_id && !register_is_null(reg)) {
9943 verbose(env, "R%d must be referenced when passed to release function\n",
9944 regno);
9945 return -EINVAL;
9946 }
9947 if (meta->release_regno) {
9948 verifier_bug(env, "more than one release argument");
9949 return -EFAULT;
9950 }
9951 meta->release_regno = regno;
9952 }
9953
9954 if (reg->ref_obj_id && base_type(arg_type) != ARG_KPTR_XCHG_DEST) {
9955 if (meta->ref_obj_id) {
9956 verbose(env, "more than one arg with ref_obj_id R%d %u %u",
9957 regno, reg->ref_obj_id,
9958 meta->ref_obj_id);
9959 return -EACCES;
9960 }
9961 meta->ref_obj_id = reg->ref_obj_id;
9962 }
9963
9964 switch (base_type(arg_type)) {
9965 case ARG_CONST_MAP_PTR:
9966 /* bpf_map_xxx(map_ptr) call: remember that map_ptr */
9967 if (meta->map.ptr) {
9968 /* Use map_uid (which is unique id of inner map) to reject:
9969 * inner_map1 = bpf_map_lookup_elem(outer_map, key1)
9970 * inner_map2 = bpf_map_lookup_elem(outer_map, key2)
9971 * if (inner_map1 && inner_map2) {
9972 * timer = bpf_map_lookup_elem(inner_map1);
9973 * if (timer)
9974 * // mismatch would have been allowed
9975 * bpf_timer_init(timer, inner_map2);
9976 * }
9977 *
9978 * Comparing map_ptr is enough to distinguish normal and outer maps.
9979 */
9980 if (meta->map.ptr != reg->map_ptr ||
9981 meta->map.uid != reg->map_uid) {
9982 verbose(env,
9983 "timer pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n",
9984 meta->map.uid, reg->map_uid);
9985 return -EINVAL;
9986 }
9987 }
9988 meta->map.ptr = reg->map_ptr;
9989 meta->map.uid = reg->map_uid;
9990 break;
9991 case ARG_PTR_TO_MAP_KEY:
9992 /* bpf_map_xxx(..., map_ptr, ..., key) call:
9993 * check that [key, key + map->key_size) are within
9994 * stack limits and initialized
9995 */
9996 if (!meta->map.ptr) {
9997 /* in function declaration map_ptr must come before
9998 * map_key, so that it's verified and known before
9999 * we have to check map_key here. Otherwise it means
10000 * that kernel subsystem misconfigured verifier
10001 */
10002 verifier_bug(env, "invalid map_ptr to access map->key");
10003 return -EFAULT;
10004 }
10005 key_size = meta->map.ptr->key_size;
10006 err = check_helper_mem_access(env, regno, key_size, BPF_READ, false, NULL);
10007 if (err)
10008 return err;
10009 if (can_elide_value_nullness(meta->map.ptr->map_type)) {
10010 err = get_constant_map_key(env, reg, key_size, &meta->const_map_key);
10011 if (err < 0) {
10012 meta->const_map_key = -1;
10013 if (err == -EOPNOTSUPP)
10014 err = 0;
10015 else
10016 return err;
10017 }
10018 }
10019 break;
10020 case ARG_PTR_TO_MAP_VALUE:
10021 if (type_may_be_null(arg_type) && register_is_null(reg))
10022 return 0;
10023
10024 /* bpf_map_xxx(..., map_ptr, ..., value) call:
10025 * check [value, value + map->value_size) validity
10026 */
10027 if (!meta->map.ptr) {
10028 /* kernel subsystem misconfigured verifier */
10029 verifier_bug(env, "invalid map_ptr to access map->value");
10030 return -EFAULT;
10031 }
10032 meta->raw_mode = arg_type & MEM_UNINIT;
10033 err = check_helper_mem_access(env, regno, meta->map.ptr->value_size,
10034 arg_type & MEM_WRITE ? BPF_WRITE : BPF_READ,
10035 false, meta);
10036 break;
10037 case ARG_PTR_TO_PERCPU_BTF_ID:
10038 if (!reg->btf_id) {
10039 verbose(env, "Helper has invalid btf_id in R%d\n", regno);
10040 return -EACCES;
10041 }
10042 meta->ret_btf = reg->btf;
10043 meta->ret_btf_id = reg->btf_id;
10044 break;
10045 case ARG_PTR_TO_SPIN_LOCK:
10046 if (in_rbtree_lock_required_cb(env)) {
10047 verbose(env, "can't spin_{lock,unlock} in rbtree cb\n");
10048 return -EACCES;
10049 }
10050 if (meta->func_id == BPF_FUNC_spin_lock) {
10051 err = process_spin_lock(env, regno, PROCESS_SPIN_LOCK);
10052 if (err)
10053 return err;
10054 } else if (meta->func_id == BPF_FUNC_spin_unlock) {
10055 err = process_spin_lock(env, regno, 0);
10056 if (err)
10057 return err;
10058 } else {
10059 verifier_bug(env, "spin lock arg on unexpected helper");
10060 return -EFAULT;
10061 }
10062 break;
10063 case ARG_PTR_TO_TIMER:
10064 err = process_timer_helper(env, regno, meta);
10065 if (err)
10066 return err;
10067 break;
10068 case ARG_PTR_TO_FUNC:
10069 meta->subprogno = reg->subprogno;
10070 break;
10071 case ARG_PTR_TO_MEM:
10072 /* The access to this pointer is only checked when we hit the
10073 * next is_mem_size argument below.
10074 */
10075 meta->raw_mode = arg_type & MEM_UNINIT;
10076 if (arg_type & MEM_FIXED_SIZE) {
10077 err = check_helper_mem_access(env, regno, fn->arg_size[arg],
10078 arg_type & MEM_WRITE ? BPF_WRITE : BPF_READ,
10079 false, meta);
10080 if (err)
10081 return err;
10082 if (arg_type & MEM_ALIGNED)
10083 err = check_ptr_alignment(env, reg, 0, fn->arg_size[arg], true);
10084 }
10085 break;
10086 case ARG_CONST_SIZE:
10087 err = check_mem_size_reg(env, reg, regno,
10088 fn->arg_type[arg - 1] & MEM_WRITE ?
10089 BPF_WRITE : BPF_READ,
10090 false, meta);
10091 break;
10092 case ARG_CONST_SIZE_OR_ZERO:
10093 err = check_mem_size_reg(env, reg, regno,
10094 fn->arg_type[arg - 1] & MEM_WRITE ?
10095 BPF_WRITE : BPF_READ,
10096 true, meta);
10097 break;
10098 case ARG_PTR_TO_DYNPTR:
10099 err = process_dynptr_func(env, regno, insn_idx, arg_type, 0);
10100 if (err)
10101 return err;
10102 break;
10103 case ARG_CONST_ALLOC_SIZE_OR_ZERO:
10104 if (!tnum_is_const(reg->var_off)) {
10105 verbose(env, "R%d is not a known constant'\n",
10106 regno);
10107 return -EACCES;
10108 }
10109 meta->mem_size = reg->var_off.value;
10110 err = mark_chain_precision(env, regno);
10111 if (err)
10112 return err;
10113 break;
10114 case ARG_PTR_TO_CONST_STR:
10115 {
10116 err = check_reg_const_str(env, reg, regno);
10117 if (err)
10118 return err;
10119 break;
10120 }
10121 case ARG_KPTR_XCHG_DEST:
10122 err = process_kptr_func(env, regno, meta);
10123 if (err)
10124 return err;
10125 break;
10126 }
10127
10128 return err;
10129 }
10130
may_update_sockmap(struct bpf_verifier_env * env,int func_id)10131 static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
10132 {
10133 enum bpf_attach_type eatype = env->prog->expected_attach_type;
10134 enum bpf_prog_type type = resolve_prog_type(env->prog);
10135
10136 if (func_id != BPF_FUNC_map_update_elem &&
10137 func_id != BPF_FUNC_map_delete_elem)
10138 return false;
10139
10140 /* It's not possible to get access to a locked struct sock in these
10141 * contexts, so updating is safe.
10142 */
10143 switch (type) {
10144 case BPF_PROG_TYPE_TRACING:
10145 if (eatype == BPF_TRACE_ITER)
10146 return true;
10147 break;
10148 case BPF_PROG_TYPE_SOCK_OPS:
10149 /* map_update allowed only via dedicated helpers with event type checks */
10150 if (func_id == BPF_FUNC_map_delete_elem)
10151 return true;
10152 break;
10153 case BPF_PROG_TYPE_SOCKET_FILTER:
10154 case BPF_PROG_TYPE_SCHED_CLS:
10155 case BPF_PROG_TYPE_SCHED_ACT:
10156 case BPF_PROG_TYPE_XDP:
10157 case BPF_PROG_TYPE_SK_REUSEPORT:
10158 case BPF_PROG_TYPE_FLOW_DISSECTOR:
10159 case BPF_PROG_TYPE_SK_LOOKUP:
10160 return true;
10161 default:
10162 break;
10163 }
10164
10165 verbose(env, "cannot update sockmap in this context\n");
10166 return false;
10167 }
10168
allow_tail_call_in_subprogs(struct bpf_verifier_env * env)10169 static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
10170 {
10171 return env->prog->jit_requested &&
10172 bpf_jit_supports_subprog_tailcalls();
10173 }
10174
check_map_func_compatibility(struct bpf_verifier_env * env,struct bpf_map * map,int func_id)10175 static int check_map_func_compatibility(struct bpf_verifier_env *env,
10176 struct bpf_map *map, int func_id)
10177 {
10178 if (!map)
10179 return 0;
10180
10181 /* We need a two way check, first is from map perspective ... */
10182 switch (map->map_type) {
10183 case BPF_MAP_TYPE_PROG_ARRAY:
10184 if (func_id != BPF_FUNC_tail_call)
10185 goto error;
10186 break;
10187 case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
10188 if (func_id != BPF_FUNC_perf_event_read &&
10189 func_id != BPF_FUNC_perf_event_output &&
10190 func_id != BPF_FUNC_skb_output &&
10191 func_id != BPF_FUNC_perf_event_read_value &&
10192 func_id != BPF_FUNC_xdp_output)
10193 goto error;
10194 break;
10195 case BPF_MAP_TYPE_RINGBUF:
10196 if (func_id != BPF_FUNC_ringbuf_output &&
10197 func_id != BPF_FUNC_ringbuf_reserve &&
10198 func_id != BPF_FUNC_ringbuf_query &&
10199 func_id != BPF_FUNC_ringbuf_reserve_dynptr &&
10200 func_id != BPF_FUNC_ringbuf_submit_dynptr &&
10201 func_id != BPF_FUNC_ringbuf_discard_dynptr)
10202 goto error;
10203 break;
10204 case BPF_MAP_TYPE_USER_RINGBUF:
10205 if (func_id != BPF_FUNC_user_ringbuf_drain)
10206 goto error;
10207 break;
10208 case BPF_MAP_TYPE_STACK_TRACE:
10209 if (func_id != BPF_FUNC_get_stackid)
10210 goto error;
10211 break;
10212 case BPF_MAP_TYPE_CGROUP_ARRAY:
10213 if (func_id != BPF_FUNC_skb_under_cgroup &&
10214 func_id != BPF_FUNC_current_task_under_cgroup)
10215 goto error;
10216 break;
10217 case BPF_MAP_TYPE_CGROUP_STORAGE:
10218 case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
10219 if (func_id != BPF_FUNC_get_local_storage)
10220 goto error;
10221 break;
10222 case BPF_MAP_TYPE_DEVMAP:
10223 case BPF_MAP_TYPE_DEVMAP_HASH:
10224 if (func_id != BPF_FUNC_redirect_map &&
10225 func_id != BPF_FUNC_map_lookup_elem)
10226 goto error;
10227 break;
10228 /* Restrict bpf side of cpumap and xskmap, open when use-cases
10229 * appear.
10230 */
10231 case BPF_MAP_TYPE_CPUMAP:
10232 if (func_id != BPF_FUNC_redirect_map)
10233 goto error;
10234 break;
10235 case BPF_MAP_TYPE_XSKMAP:
10236 if (func_id != BPF_FUNC_redirect_map &&
10237 func_id != BPF_FUNC_map_lookup_elem)
10238 goto error;
10239 break;
10240 case BPF_MAP_TYPE_ARRAY_OF_MAPS:
10241 case BPF_MAP_TYPE_HASH_OF_MAPS:
10242 if (func_id != BPF_FUNC_map_lookup_elem)
10243 goto error;
10244 break;
10245 case BPF_MAP_TYPE_SOCKMAP:
10246 if (func_id != BPF_FUNC_sk_redirect_map &&
10247 func_id != BPF_FUNC_sock_map_update &&
10248 func_id != BPF_FUNC_msg_redirect_map &&
10249 func_id != BPF_FUNC_sk_select_reuseport &&
10250 func_id != BPF_FUNC_map_lookup_elem &&
10251 !may_update_sockmap(env, func_id))
10252 goto error;
10253 break;
10254 case BPF_MAP_TYPE_SOCKHASH:
10255 if (func_id != BPF_FUNC_sk_redirect_hash &&
10256 func_id != BPF_FUNC_sock_hash_update &&
10257 func_id != BPF_FUNC_msg_redirect_hash &&
10258 func_id != BPF_FUNC_sk_select_reuseport &&
10259 func_id != BPF_FUNC_map_lookup_elem &&
10260 !may_update_sockmap(env, func_id))
10261 goto error;
10262 break;
10263 case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
10264 if (func_id != BPF_FUNC_sk_select_reuseport)
10265 goto error;
10266 break;
10267 case BPF_MAP_TYPE_QUEUE:
10268 case BPF_MAP_TYPE_STACK:
10269 if (func_id != BPF_FUNC_map_peek_elem &&
10270 func_id != BPF_FUNC_map_pop_elem &&
10271 func_id != BPF_FUNC_map_push_elem)
10272 goto error;
10273 break;
10274 case BPF_MAP_TYPE_SK_STORAGE:
10275 if (func_id != BPF_FUNC_sk_storage_get &&
10276 func_id != BPF_FUNC_sk_storage_delete &&
10277 func_id != BPF_FUNC_kptr_xchg)
10278 goto error;
10279 break;
10280 case BPF_MAP_TYPE_INODE_STORAGE:
10281 if (func_id != BPF_FUNC_inode_storage_get &&
10282 func_id != BPF_FUNC_inode_storage_delete &&
10283 func_id != BPF_FUNC_kptr_xchg)
10284 goto error;
10285 break;
10286 case BPF_MAP_TYPE_TASK_STORAGE:
10287 if (func_id != BPF_FUNC_task_storage_get &&
10288 func_id != BPF_FUNC_task_storage_delete &&
10289 func_id != BPF_FUNC_kptr_xchg)
10290 goto error;
10291 break;
10292 case BPF_MAP_TYPE_CGRP_STORAGE:
10293 if (func_id != BPF_FUNC_cgrp_storage_get &&
10294 func_id != BPF_FUNC_cgrp_storage_delete &&
10295 func_id != BPF_FUNC_kptr_xchg)
10296 goto error;
10297 break;
10298 case BPF_MAP_TYPE_BLOOM_FILTER:
10299 if (func_id != BPF_FUNC_map_peek_elem &&
10300 func_id != BPF_FUNC_map_push_elem)
10301 goto error;
10302 break;
10303 case BPF_MAP_TYPE_INSN_ARRAY:
10304 goto error;
10305 default:
10306 break;
10307 }
10308
10309 /* ... and second from the function itself. */
10310 switch (func_id) {
10311 case BPF_FUNC_tail_call:
10312 if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
10313 goto error;
10314 if (env->subprog_cnt > 1 && !allow_tail_call_in_subprogs(env)) {
10315 verbose(env, "mixing of tail_calls and bpf-to-bpf calls is not supported\n");
10316 return -EINVAL;
10317 }
10318 break;
10319 case BPF_FUNC_perf_event_read:
10320 case BPF_FUNC_perf_event_output:
10321 case BPF_FUNC_perf_event_read_value:
10322 case BPF_FUNC_skb_output:
10323 case BPF_FUNC_xdp_output:
10324 if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
10325 goto error;
10326 break;
10327 case BPF_FUNC_ringbuf_output:
10328 case BPF_FUNC_ringbuf_reserve:
10329 case BPF_FUNC_ringbuf_query:
10330 case BPF_FUNC_ringbuf_reserve_dynptr:
10331 case BPF_FUNC_ringbuf_submit_dynptr:
10332 case BPF_FUNC_ringbuf_discard_dynptr:
10333 if (map->map_type != BPF_MAP_TYPE_RINGBUF)
10334 goto error;
10335 break;
10336 case BPF_FUNC_user_ringbuf_drain:
10337 if (map->map_type != BPF_MAP_TYPE_USER_RINGBUF)
10338 goto error;
10339 break;
10340 case BPF_FUNC_get_stackid:
10341 if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
10342 goto error;
10343 break;
10344 case BPF_FUNC_current_task_under_cgroup:
10345 case BPF_FUNC_skb_under_cgroup:
10346 if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
10347 goto error;
10348 break;
10349 case BPF_FUNC_redirect_map:
10350 if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
10351 map->map_type != BPF_MAP_TYPE_DEVMAP_HASH &&
10352 map->map_type != BPF_MAP_TYPE_CPUMAP &&
10353 map->map_type != BPF_MAP_TYPE_XSKMAP)
10354 goto error;
10355 break;
10356 case BPF_FUNC_sk_redirect_map:
10357 case BPF_FUNC_msg_redirect_map:
10358 case BPF_FUNC_sock_map_update:
10359 if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
10360 goto error;
10361 break;
10362 case BPF_FUNC_sk_redirect_hash:
10363 case BPF_FUNC_msg_redirect_hash:
10364 case BPF_FUNC_sock_hash_update:
10365 if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
10366 goto error;
10367 break;
10368 case BPF_FUNC_get_local_storage:
10369 if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
10370 map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
10371 goto error;
10372 break;
10373 case BPF_FUNC_sk_select_reuseport:
10374 if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY &&
10375 map->map_type != BPF_MAP_TYPE_SOCKMAP &&
10376 map->map_type != BPF_MAP_TYPE_SOCKHASH)
10377 goto error;
10378 break;
10379 case BPF_FUNC_map_pop_elem:
10380 if (map->map_type != BPF_MAP_TYPE_QUEUE &&
10381 map->map_type != BPF_MAP_TYPE_STACK)
10382 goto error;
10383 break;
10384 case BPF_FUNC_map_peek_elem:
10385 case BPF_FUNC_map_push_elem:
10386 if (map->map_type != BPF_MAP_TYPE_QUEUE &&
10387 map->map_type != BPF_MAP_TYPE_STACK &&
10388 map->map_type != BPF_MAP_TYPE_BLOOM_FILTER)
10389 goto error;
10390 break;
10391 case BPF_FUNC_map_lookup_percpu_elem:
10392 if (map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY &&
10393 map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
10394 map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH)
10395 goto error;
10396 break;
10397 case BPF_FUNC_sk_storage_get:
10398 case BPF_FUNC_sk_storage_delete:
10399 if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
10400 goto error;
10401 break;
10402 case BPF_FUNC_inode_storage_get:
10403 case BPF_FUNC_inode_storage_delete:
10404 if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
10405 goto error;
10406 break;
10407 case BPF_FUNC_task_storage_get:
10408 case BPF_FUNC_task_storage_delete:
10409 if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
10410 goto error;
10411 break;
10412 case BPF_FUNC_cgrp_storage_get:
10413 case BPF_FUNC_cgrp_storage_delete:
10414 if (map->map_type != BPF_MAP_TYPE_CGRP_STORAGE)
10415 goto error;
10416 break;
10417 default:
10418 break;
10419 }
10420
10421 return 0;
10422 error:
10423 verbose(env, "cannot pass map_type %d into func %s#%d\n",
10424 map->map_type, func_id_name(func_id), func_id);
10425 return -EINVAL;
10426 }
10427
check_raw_mode_ok(const struct bpf_func_proto * fn)10428 static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
10429 {
10430 int count = 0;
10431
10432 if (arg_type_is_raw_mem(fn->arg1_type))
10433 count++;
10434 if (arg_type_is_raw_mem(fn->arg2_type))
10435 count++;
10436 if (arg_type_is_raw_mem(fn->arg3_type))
10437 count++;
10438 if (arg_type_is_raw_mem(fn->arg4_type))
10439 count++;
10440 if (arg_type_is_raw_mem(fn->arg5_type))
10441 count++;
10442
10443 /* We only support one arg being in raw mode at the moment,
10444 * which is sufficient for the helper functions we have
10445 * right now.
10446 */
10447 return count <= 1;
10448 }
10449
check_args_pair_invalid(const struct bpf_func_proto * fn,int arg)10450 static bool check_args_pair_invalid(const struct bpf_func_proto *fn, int arg)
10451 {
10452 bool is_fixed = fn->arg_type[arg] & MEM_FIXED_SIZE;
10453 bool has_size = fn->arg_size[arg] != 0;
10454 bool is_next_size = false;
10455
10456 if (arg + 1 < ARRAY_SIZE(fn->arg_type))
10457 is_next_size = arg_type_is_mem_size(fn->arg_type[arg + 1]);
10458
10459 if (base_type(fn->arg_type[arg]) != ARG_PTR_TO_MEM)
10460 return is_next_size;
10461
10462 return has_size == is_next_size || is_next_size == is_fixed;
10463 }
10464
check_arg_pair_ok(const struct bpf_func_proto * fn)10465 static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
10466 {
10467 /* bpf_xxx(..., buf, len) call will access 'len'
10468 * bytes from memory 'buf'. Both arg types need
10469 * to be paired, so make sure there's no buggy
10470 * helper function specification.
10471 */
10472 if (arg_type_is_mem_size(fn->arg1_type) ||
10473 check_args_pair_invalid(fn, 0) ||
10474 check_args_pair_invalid(fn, 1) ||
10475 check_args_pair_invalid(fn, 2) ||
10476 check_args_pair_invalid(fn, 3) ||
10477 check_args_pair_invalid(fn, 4))
10478 return false;
10479
10480 return true;
10481 }
10482
check_btf_id_ok(const struct bpf_func_proto * fn)10483 static bool check_btf_id_ok(const struct bpf_func_proto *fn)
10484 {
10485 int i;
10486
10487 for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
10488 if (base_type(fn->arg_type[i]) == ARG_PTR_TO_BTF_ID)
10489 return !!fn->arg_btf_id[i];
10490 if (base_type(fn->arg_type[i]) == ARG_PTR_TO_SPIN_LOCK)
10491 return fn->arg_btf_id[i] == BPF_PTR_POISON;
10492 if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i] &&
10493 /* arg_btf_id and arg_size are in a union. */
10494 (base_type(fn->arg_type[i]) != ARG_PTR_TO_MEM ||
10495 !(fn->arg_type[i] & MEM_FIXED_SIZE)))
10496 return false;
10497 }
10498
10499 return true;
10500 }
10501
check_mem_arg_rw_flag_ok(const struct bpf_func_proto * fn)10502 static bool check_mem_arg_rw_flag_ok(const struct bpf_func_proto *fn)
10503 {
10504 int i;
10505
10506 for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
10507 enum bpf_arg_type arg_type = fn->arg_type[i];
10508
10509 if (base_type(arg_type) != ARG_PTR_TO_MEM)
10510 continue;
10511 if (!(arg_type & (MEM_WRITE | MEM_RDONLY)))
10512 return false;
10513 }
10514
10515 return true;
10516 }
10517
check_func_proto(const struct bpf_func_proto * fn)10518 static int check_func_proto(const struct bpf_func_proto *fn)
10519 {
10520 return check_raw_mode_ok(fn) &&
10521 check_arg_pair_ok(fn) &&
10522 check_mem_arg_rw_flag_ok(fn) &&
10523 check_btf_id_ok(fn) ? 0 : -EINVAL;
10524 }
10525
10526 /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
10527 * are now invalid, so turn them into unknown SCALAR_VALUE.
10528 *
10529 * This also applies to dynptr slices belonging to skb and xdp dynptrs,
10530 * since these slices point to packet data.
10531 */
clear_all_pkt_pointers(struct bpf_verifier_env * env)10532 static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
10533 {
10534 struct bpf_func_state *state;
10535 struct bpf_reg_state *reg;
10536
10537 bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
10538 if (reg_is_pkt_pointer_any(reg) || reg_is_dynptr_slice_pkt(reg))
10539 mark_reg_invalid(env, reg);
10540 }));
10541 }
10542
10543 enum {
10544 AT_PKT_END = -1,
10545 BEYOND_PKT_END = -2,
10546 };
10547
mark_pkt_end(struct bpf_verifier_state * vstate,int regn,bool range_open)10548 static void mark_pkt_end(struct bpf_verifier_state *vstate, int regn, bool range_open)
10549 {
10550 struct bpf_func_state *state = vstate->frame[vstate->curframe];
10551 struct bpf_reg_state *reg = &state->regs[regn];
10552
10553 if (reg->type != PTR_TO_PACKET)
10554 /* PTR_TO_PACKET_META is not supported yet */
10555 return;
10556
10557 /* The 'reg' is pkt > pkt_end or pkt >= pkt_end.
10558 * How far beyond pkt_end it goes is unknown.
10559 * if (!range_open) it's the case of pkt >= pkt_end
10560 * if (range_open) it's the case of pkt > pkt_end
10561 * hence this pointer is at least 1 byte bigger than pkt_end
10562 */
10563 if (range_open)
10564 reg->range = BEYOND_PKT_END;
10565 else
10566 reg->range = AT_PKT_END;
10567 }
10568
release_reference_nomark(struct bpf_verifier_state * state,int ref_obj_id)10569 static int release_reference_nomark(struct bpf_verifier_state *state, int ref_obj_id)
10570 {
10571 int i;
10572
10573 for (i = 0; i < state->acquired_refs; i++) {
10574 if (state->refs[i].type != REF_TYPE_PTR)
10575 continue;
10576 if (state->refs[i].id == ref_obj_id) {
10577 release_reference_state(state, i);
10578 return 0;
10579 }
10580 }
10581 return -EINVAL;
10582 }
10583
10584 /* The pointer with the specified id has released its reference to kernel
10585 * resources. Identify all copies of the same pointer and clear the reference.
10586 *
10587 * This is the release function corresponding to acquire_reference(). Idempotent.
10588 */
release_reference(struct bpf_verifier_env * env,int ref_obj_id)10589 static int release_reference(struct bpf_verifier_env *env, int ref_obj_id)
10590 {
10591 struct bpf_verifier_state *vstate = env->cur_state;
10592 struct bpf_func_state *state;
10593 struct bpf_reg_state *reg;
10594 int err;
10595
10596 err = release_reference_nomark(vstate, ref_obj_id);
10597 if (err)
10598 return err;
10599
10600 bpf_for_each_reg_in_vstate(vstate, state, reg, ({
10601 if (reg->ref_obj_id == ref_obj_id)
10602 mark_reg_invalid(env, reg);
10603 }));
10604
10605 return 0;
10606 }
10607
invalidate_non_owning_refs(struct bpf_verifier_env * env)10608 static void invalidate_non_owning_refs(struct bpf_verifier_env *env)
10609 {
10610 struct bpf_func_state *unused;
10611 struct bpf_reg_state *reg;
10612
10613 bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({
10614 if (type_is_non_owning_ref(reg->type))
10615 mark_reg_invalid(env, reg);
10616 }));
10617 }
10618
clear_caller_saved_regs(struct bpf_verifier_env * env,struct bpf_reg_state * regs)10619 static void clear_caller_saved_regs(struct bpf_verifier_env *env,
10620 struct bpf_reg_state *regs)
10621 {
10622 int i;
10623
10624 /* after the call registers r0 - r5 were scratched */
10625 for (i = 0; i < CALLER_SAVED_REGS; i++) {
10626 mark_reg_not_init(env, regs, caller_saved[i]);
10627 __check_reg_arg(env, regs, caller_saved[i], DST_OP_NO_MARK);
10628 }
10629 }
10630
10631 typedef int (*set_callee_state_fn)(struct bpf_verifier_env *env,
10632 struct bpf_func_state *caller,
10633 struct bpf_func_state *callee,
10634 int insn_idx);
10635
10636 static int set_callee_state(struct bpf_verifier_env *env,
10637 struct bpf_func_state *caller,
10638 struct bpf_func_state *callee, int insn_idx);
10639
setup_func_entry(struct bpf_verifier_env * env,int subprog,int callsite,set_callee_state_fn set_callee_state_cb,struct bpf_verifier_state * state)10640 static int setup_func_entry(struct bpf_verifier_env *env, int subprog, int callsite,
10641 set_callee_state_fn set_callee_state_cb,
10642 struct bpf_verifier_state *state)
10643 {
10644 struct bpf_func_state *caller, *callee;
10645 int err;
10646
10647 if (state->curframe + 1 >= MAX_CALL_FRAMES) {
10648 verbose(env, "the call stack of %d frames is too deep\n",
10649 state->curframe + 2);
10650 return -E2BIG;
10651 }
10652
10653 if (state->frame[state->curframe + 1]) {
10654 verifier_bug(env, "Frame %d already allocated", state->curframe + 1);
10655 return -EFAULT;
10656 }
10657
10658 caller = state->frame[state->curframe];
10659 callee = kzalloc_obj(*callee, GFP_KERNEL_ACCOUNT);
10660 if (!callee)
10661 return -ENOMEM;
10662 state->frame[state->curframe + 1] = callee;
10663
10664 /* callee cannot access r0, r6 - r9 for reading and has to write
10665 * into its own stack before reading from it.
10666 * callee can read/write into caller's stack
10667 */
10668 init_func_state(env, callee,
10669 /* remember the callsite, it will be used by bpf_exit */
10670 callsite,
10671 state->curframe + 1 /* frameno within this callchain */,
10672 subprog /* subprog number within this prog */);
10673 err = set_callee_state_cb(env, caller, callee, callsite);
10674 if (err)
10675 goto err_out;
10676
10677 /* only increment it after check_reg_arg() finished */
10678 state->curframe++;
10679
10680 return 0;
10681
10682 err_out:
10683 free_func_state(callee);
10684 state->frame[state->curframe + 1] = NULL;
10685 return err;
10686 }
10687
btf_check_func_arg_match(struct bpf_verifier_env * env,int subprog,const struct btf * btf,struct bpf_reg_state * regs)10688 static int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog,
10689 const struct btf *btf,
10690 struct bpf_reg_state *regs)
10691 {
10692 struct bpf_subprog_info *sub = subprog_info(env, subprog);
10693 struct bpf_verifier_log *log = &env->log;
10694 u32 i;
10695 int ret;
10696
10697 ret = btf_prepare_func_args(env, subprog);
10698 if (ret)
10699 return ret;
10700
10701 /* check that BTF function arguments match actual types that the
10702 * verifier sees.
10703 */
10704 for (i = 0; i < sub->arg_cnt; i++) {
10705 u32 regno = i + 1;
10706 struct bpf_reg_state *reg = ®s[regno];
10707 struct bpf_subprog_arg_info *arg = &sub->args[i];
10708
10709 if (arg->arg_type == ARG_ANYTHING) {
10710 if (reg->type != SCALAR_VALUE) {
10711 bpf_log(log, "R%d is not a scalar\n", regno);
10712 return -EINVAL;
10713 }
10714 } else if (arg->arg_type & PTR_UNTRUSTED) {
10715 /*
10716 * Anything is allowed for untrusted arguments, as these are
10717 * read-only and probe read instructions would protect against
10718 * invalid memory access.
10719 */
10720 } else if (arg->arg_type == ARG_PTR_TO_CTX) {
10721 ret = check_func_arg_reg_off(env, reg, regno, ARG_DONTCARE);
10722 if (ret < 0)
10723 return ret;
10724 /* If function expects ctx type in BTF check that caller
10725 * is passing PTR_TO_CTX.
10726 */
10727 if (reg->type != PTR_TO_CTX) {
10728 bpf_log(log, "arg#%d expects pointer to ctx\n", i);
10729 return -EINVAL;
10730 }
10731 } else if (base_type(arg->arg_type) == ARG_PTR_TO_MEM) {
10732 ret = check_func_arg_reg_off(env, reg, regno, ARG_DONTCARE);
10733 if (ret < 0)
10734 return ret;
10735 if (check_mem_reg(env, reg, regno, arg->mem_size))
10736 return -EINVAL;
10737 if (!(arg->arg_type & PTR_MAYBE_NULL) && (reg->type & PTR_MAYBE_NULL)) {
10738 bpf_log(log, "arg#%d is expected to be non-NULL\n", i);
10739 return -EINVAL;
10740 }
10741 } else if (base_type(arg->arg_type) == ARG_PTR_TO_ARENA) {
10742 /*
10743 * Can pass any value and the kernel won't crash, but
10744 * only PTR_TO_ARENA or SCALAR make sense. Everything
10745 * else is a bug in the bpf program. Point it out to
10746 * the user at the verification time instead of
10747 * run-time debug nightmare.
10748 */
10749 if (reg->type != PTR_TO_ARENA && reg->type != SCALAR_VALUE) {
10750 bpf_log(log, "R%d is not a pointer to arena or scalar.\n", regno);
10751 return -EINVAL;
10752 }
10753 } else if (arg->arg_type == (ARG_PTR_TO_DYNPTR | MEM_RDONLY)) {
10754 ret = check_func_arg_reg_off(env, reg, regno, ARG_PTR_TO_DYNPTR);
10755 if (ret)
10756 return ret;
10757
10758 ret = process_dynptr_func(env, regno, -1, arg->arg_type, 0);
10759 if (ret)
10760 return ret;
10761 } else if (base_type(arg->arg_type) == ARG_PTR_TO_BTF_ID) {
10762 struct bpf_call_arg_meta meta;
10763 int err;
10764
10765 if (register_is_null(reg) && type_may_be_null(arg->arg_type))
10766 continue;
10767
10768 memset(&meta, 0, sizeof(meta)); /* leave func_id as zero */
10769 err = check_reg_type(env, regno, arg->arg_type, &arg->btf_id, &meta);
10770 err = err ?: check_func_arg_reg_off(env, reg, regno, arg->arg_type);
10771 if (err)
10772 return err;
10773 } else {
10774 verifier_bug(env, "unrecognized arg#%d type %d", i, arg->arg_type);
10775 return -EFAULT;
10776 }
10777 }
10778
10779 return 0;
10780 }
10781
10782 /* Compare BTF of a function call with given bpf_reg_state.
10783 * Returns:
10784 * EFAULT - there is a verifier bug. Abort verification.
10785 * EINVAL - there is a type mismatch or BTF is not available.
10786 * 0 - BTF matches with what bpf_reg_state expects.
10787 * Only PTR_TO_CTX and SCALAR_VALUE states are recognized.
10788 */
btf_check_subprog_call(struct bpf_verifier_env * env,int subprog,struct bpf_reg_state * regs)10789 static int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog,
10790 struct bpf_reg_state *regs)
10791 {
10792 struct bpf_prog *prog = env->prog;
10793 struct btf *btf = prog->aux->btf;
10794 u32 btf_id;
10795 int err;
10796
10797 if (!prog->aux->func_info)
10798 return -EINVAL;
10799
10800 btf_id = prog->aux->func_info[subprog].type_id;
10801 if (!btf_id)
10802 return -EFAULT;
10803
10804 if (prog->aux->func_info_aux[subprog].unreliable)
10805 return -EINVAL;
10806
10807 err = btf_check_func_arg_match(env, subprog, btf, regs);
10808 /* Compiler optimizations can remove arguments from static functions
10809 * or mismatched type can be passed into a global function.
10810 * In such cases mark the function as unreliable from BTF point of view.
10811 */
10812 if (err)
10813 prog->aux->func_info_aux[subprog].unreliable = true;
10814 return err;
10815 }
10816
push_callback_call(struct bpf_verifier_env * env,struct bpf_insn * insn,int insn_idx,int subprog,set_callee_state_fn set_callee_state_cb)10817 static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
10818 int insn_idx, int subprog,
10819 set_callee_state_fn set_callee_state_cb)
10820 {
10821 struct bpf_verifier_state *state = env->cur_state, *callback_state;
10822 struct bpf_func_state *caller, *callee;
10823 int err;
10824
10825 caller = state->frame[state->curframe];
10826 err = btf_check_subprog_call(env, subprog, caller->regs);
10827 if (err == -EFAULT)
10828 return err;
10829
10830 /* set_callee_state is used for direct subprog calls, but we are
10831 * interested in validating only BPF helpers that can call subprogs as
10832 * callbacks
10833 */
10834 env->subprog_info[subprog].is_cb = true;
10835 if (bpf_pseudo_kfunc_call(insn) &&
10836 !is_callback_calling_kfunc(insn->imm)) {
10837 verifier_bug(env, "kfunc %s#%d not marked as callback-calling",
10838 func_id_name(insn->imm), insn->imm);
10839 return -EFAULT;
10840 } else if (!bpf_pseudo_kfunc_call(insn) &&
10841 !is_callback_calling_function(insn->imm)) { /* helper */
10842 verifier_bug(env, "helper %s#%d not marked as callback-calling",
10843 func_id_name(insn->imm), insn->imm);
10844 return -EFAULT;
10845 }
10846
10847 if (is_async_callback_calling_insn(insn)) {
10848 struct bpf_verifier_state *async_cb;
10849
10850 /* there is no real recursion here. timer and workqueue callbacks are async */
10851 env->subprog_info[subprog].is_async_cb = true;
10852 async_cb = push_async_cb(env, env->subprog_info[subprog].start,
10853 insn_idx, subprog,
10854 is_async_cb_sleepable(env, insn));
10855 if (IS_ERR(async_cb))
10856 return PTR_ERR(async_cb);
10857 callee = async_cb->frame[0];
10858 callee->async_entry_cnt = caller->async_entry_cnt + 1;
10859
10860 /* Convert bpf_timer_set_callback() args into timer callback args */
10861 err = set_callee_state_cb(env, caller, callee, insn_idx);
10862 if (err)
10863 return err;
10864
10865 return 0;
10866 }
10867
10868 /* for callback functions enqueue entry to callback and
10869 * proceed with next instruction within current frame.
10870 */
10871 callback_state = push_stack(env, env->subprog_info[subprog].start, insn_idx, false);
10872 if (IS_ERR(callback_state))
10873 return PTR_ERR(callback_state);
10874
10875 err = setup_func_entry(env, subprog, insn_idx, set_callee_state_cb,
10876 callback_state);
10877 if (err)
10878 return err;
10879
10880 callback_state->callback_unroll_depth++;
10881 callback_state->frame[callback_state->curframe - 1]->callback_depth++;
10882 caller->callback_depth = 0;
10883 return 0;
10884 }
10885
check_func_call(struct bpf_verifier_env * env,struct bpf_insn * insn,int * insn_idx)10886 static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
10887 int *insn_idx)
10888 {
10889 struct bpf_verifier_state *state = env->cur_state;
10890 struct bpf_func_state *caller;
10891 int err, subprog, target_insn;
10892
10893 target_insn = *insn_idx + insn->imm + 1;
10894 subprog = find_subprog(env, target_insn);
10895 if (verifier_bug_if(subprog < 0, env, "target of func call at insn %d is not a program",
10896 target_insn))
10897 return -EFAULT;
10898
10899 caller = state->frame[state->curframe];
10900 err = btf_check_subprog_call(env, subprog, caller->regs);
10901 if (err == -EFAULT)
10902 return err;
10903 if (subprog_is_global(env, subprog)) {
10904 const char *sub_name = subprog_name(env, subprog);
10905
10906 if (env->cur_state->active_locks) {
10907 verbose(env, "global function calls are not allowed while holding a lock,\n"
10908 "use static function instead\n");
10909 return -EINVAL;
10910 }
10911
10912 if (env->subprog_info[subprog].might_sleep &&
10913 (env->cur_state->active_rcu_locks || env->cur_state->active_preempt_locks ||
10914 env->cur_state->active_irq_id || !in_sleepable(env))) {
10915 verbose(env, "global functions that may sleep are not allowed in non-sleepable context,\n"
10916 "i.e., in a RCU/IRQ/preempt-disabled section, or in\n"
10917 "a non-sleepable BPF program context\n");
10918 return -EINVAL;
10919 }
10920
10921 if (err) {
10922 verbose(env, "Caller passes invalid args into func#%d ('%s')\n",
10923 subprog, sub_name);
10924 return err;
10925 }
10926
10927 if (env->log.level & BPF_LOG_LEVEL)
10928 verbose(env, "Func#%d ('%s') is global and assumed valid.\n",
10929 subprog, sub_name);
10930 if (env->subprog_info[subprog].changes_pkt_data)
10931 clear_all_pkt_pointers(env);
10932 /* mark global subprog for verifying after main prog */
10933 subprog_aux(env, subprog)->called = true;
10934 clear_caller_saved_regs(env, caller->regs);
10935
10936 /* All global functions return a 64-bit SCALAR_VALUE */
10937 mark_reg_unknown(env, caller->regs, BPF_REG_0);
10938 caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
10939
10940 /* continue with next insn after call */
10941 return 0;
10942 }
10943
10944 /* for regular function entry setup new frame and continue
10945 * from that frame.
10946 */
10947 err = setup_func_entry(env, subprog, *insn_idx, set_callee_state, state);
10948 if (err)
10949 return err;
10950
10951 clear_caller_saved_regs(env, caller->regs);
10952
10953 /* and go analyze first insn of the callee */
10954 *insn_idx = env->subprog_info[subprog].start - 1;
10955
10956 bpf_reset_live_stack_callchain(env);
10957
10958 if (env->log.level & BPF_LOG_LEVEL) {
10959 verbose(env, "caller:\n");
10960 print_verifier_state(env, state, caller->frameno, true);
10961 verbose(env, "callee:\n");
10962 print_verifier_state(env, state, state->curframe, true);
10963 }
10964
10965 return 0;
10966 }
10967
map_set_for_each_callback_args(struct bpf_verifier_env * env,struct bpf_func_state * caller,struct bpf_func_state * callee)10968 int map_set_for_each_callback_args(struct bpf_verifier_env *env,
10969 struct bpf_func_state *caller,
10970 struct bpf_func_state *callee)
10971 {
10972 /* bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn,
10973 * void *callback_ctx, u64 flags);
10974 * callback_fn(struct bpf_map *map, void *key, void *value,
10975 * void *callback_ctx);
10976 */
10977 callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
10978
10979 callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
10980 __mark_reg_known_zero(&callee->regs[BPF_REG_2]);
10981 callee->regs[BPF_REG_2].map_ptr = caller->regs[BPF_REG_1].map_ptr;
10982
10983 callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
10984 __mark_reg_known_zero(&callee->regs[BPF_REG_3]);
10985 callee->regs[BPF_REG_3].map_ptr = caller->regs[BPF_REG_1].map_ptr;
10986
10987 /* pointer to stack or null */
10988 callee->regs[BPF_REG_4] = caller->regs[BPF_REG_3];
10989
10990 /* unused */
10991 __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
10992 return 0;
10993 }
10994
set_callee_state(struct bpf_verifier_env * env,struct bpf_func_state * caller,struct bpf_func_state * callee,int insn_idx)10995 static int set_callee_state(struct bpf_verifier_env *env,
10996 struct bpf_func_state *caller,
10997 struct bpf_func_state *callee, int insn_idx)
10998 {
10999 int i;
11000
11001 /* copy r1 - r5 args that callee can access. The copy includes parent
11002 * pointers, which connects us up to the liveness chain
11003 */
11004 for (i = BPF_REG_1; i <= BPF_REG_5; i++)
11005 callee->regs[i] = caller->regs[i];
11006 return 0;
11007 }
11008
set_map_elem_callback_state(struct bpf_verifier_env * env,struct bpf_func_state * caller,struct bpf_func_state * callee,int insn_idx)11009 static int set_map_elem_callback_state(struct bpf_verifier_env *env,
11010 struct bpf_func_state *caller,
11011 struct bpf_func_state *callee,
11012 int insn_idx)
11013 {
11014 struct bpf_insn_aux_data *insn_aux = &env->insn_aux_data[insn_idx];
11015 struct bpf_map *map;
11016 int err;
11017
11018 /* valid map_ptr and poison value does not matter */
11019 map = insn_aux->map_ptr_state.map_ptr;
11020 if (!map->ops->map_set_for_each_callback_args ||
11021 !map->ops->map_for_each_callback) {
11022 verbose(env, "callback function not allowed for map\n");
11023 return -ENOTSUPP;
11024 }
11025
11026 err = map->ops->map_set_for_each_callback_args(env, caller, callee);
11027 if (err)
11028 return err;
11029
11030 callee->in_callback_fn = true;
11031 callee->callback_ret_range = retval_range(0, 1);
11032 return 0;
11033 }
11034
set_loop_callback_state(struct bpf_verifier_env * env,struct bpf_func_state * caller,struct bpf_func_state * callee,int insn_idx)11035 static int set_loop_callback_state(struct bpf_verifier_env *env,
11036 struct bpf_func_state *caller,
11037 struct bpf_func_state *callee,
11038 int insn_idx)
11039 {
11040 /* bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx,
11041 * u64 flags);
11042 * callback_fn(u64 index, void *callback_ctx);
11043 */
11044 callee->regs[BPF_REG_1].type = SCALAR_VALUE;
11045 callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
11046
11047 /* unused */
11048 __mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
11049 __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
11050 __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
11051
11052 callee->in_callback_fn = true;
11053 callee->callback_ret_range = retval_range(0, 1);
11054 return 0;
11055 }
11056
set_timer_callback_state(struct bpf_verifier_env * env,struct bpf_func_state * caller,struct bpf_func_state * callee,int insn_idx)11057 static int set_timer_callback_state(struct bpf_verifier_env *env,
11058 struct bpf_func_state *caller,
11059 struct bpf_func_state *callee,
11060 int insn_idx)
11061 {
11062 struct bpf_map *map_ptr = caller->regs[BPF_REG_1].map_ptr;
11063
11064 /* bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn);
11065 * callback_fn(struct bpf_map *map, void *key, void *value);
11066 */
11067 callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP;
11068 __mark_reg_known_zero(&callee->regs[BPF_REG_1]);
11069 callee->regs[BPF_REG_1].map_ptr = map_ptr;
11070
11071 callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
11072 __mark_reg_known_zero(&callee->regs[BPF_REG_2]);
11073 callee->regs[BPF_REG_2].map_ptr = map_ptr;
11074
11075 callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
11076 __mark_reg_known_zero(&callee->regs[BPF_REG_3]);
11077 callee->regs[BPF_REG_3].map_ptr = map_ptr;
11078
11079 /* unused */
11080 __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
11081 __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
11082 callee->in_async_callback_fn = true;
11083 callee->callback_ret_range = retval_range(0, 0);
11084 return 0;
11085 }
11086
set_find_vma_callback_state(struct bpf_verifier_env * env,struct bpf_func_state * caller,struct bpf_func_state * callee,int insn_idx)11087 static int set_find_vma_callback_state(struct bpf_verifier_env *env,
11088 struct bpf_func_state *caller,
11089 struct bpf_func_state *callee,
11090 int insn_idx)
11091 {
11092 /* bpf_find_vma(struct task_struct *task, u64 addr,
11093 * void *callback_fn, void *callback_ctx, u64 flags)
11094 * (callback_fn)(struct task_struct *task,
11095 * struct vm_area_struct *vma, void *callback_ctx);
11096 */
11097 callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
11098
11099 callee->regs[BPF_REG_2].type = PTR_TO_BTF_ID;
11100 __mark_reg_known_zero(&callee->regs[BPF_REG_2]);
11101 callee->regs[BPF_REG_2].btf = btf_vmlinux;
11102 callee->regs[BPF_REG_2].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_VMA];
11103
11104 /* pointer to stack or null */
11105 callee->regs[BPF_REG_3] = caller->regs[BPF_REG_4];
11106
11107 /* unused */
11108 __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
11109 __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
11110 callee->in_callback_fn = true;
11111 callee->callback_ret_range = retval_range(0, 1);
11112 return 0;
11113 }
11114
set_user_ringbuf_callback_state(struct bpf_verifier_env * env,struct bpf_func_state * caller,struct bpf_func_state * callee,int insn_idx)11115 static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env,
11116 struct bpf_func_state *caller,
11117 struct bpf_func_state *callee,
11118 int insn_idx)
11119 {
11120 /* bpf_user_ringbuf_drain(struct bpf_map *map, void *callback_fn, void
11121 * callback_ctx, u64 flags);
11122 * callback_fn(const struct bpf_dynptr_t* dynptr, void *callback_ctx);
11123 */
11124 __mark_reg_not_init(env, &callee->regs[BPF_REG_0]);
11125 mark_dynptr_cb_reg(env, &callee->regs[BPF_REG_1], BPF_DYNPTR_TYPE_LOCAL);
11126 callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
11127
11128 /* unused */
11129 __mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
11130 __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
11131 __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
11132
11133 callee->in_callback_fn = true;
11134 callee->callback_ret_range = retval_range(0, 1);
11135 return 0;
11136 }
11137
set_rbtree_add_callback_state(struct bpf_verifier_env * env,struct bpf_func_state * caller,struct bpf_func_state * callee,int insn_idx)11138 static int set_rbtree_add_callback_state(struct bpf_verifier_env *env,
11139 struct bpf_func_state *caller,
11140 struct bpf_func_state *callee,
11141 int insn_idx)
11142 {
11143 /* void bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *node,
11144 * bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b));
11145 *
11146 * 'struct bpf_rb_node *node' arg to bpf_rbtree_add_impl is the same PTR_TO_BTF_ID w/ offset
11147 * that 'less' callback args will be receiving. However, 'node' arg was release_reference'd
11148 * by this point, so look at 'root'
11149 */
11150 struct btf_field *field;
11151
11152 field = reg_find_field_offset(&caller->regs[BPF_REG_1], caller->regs[BPF_REG_1].off,
11153 BPF_RB_ROOT);
11154 if (!field || !field->graph_root.value_btf_id)
11155 return -EFAULT;
11156
11157 mark_reg_graph_node(callee->regs, BPF_REG_1, &field->graph_root);
11158 ref_set_non_owning(env, &callee->regs[BPF_REG_1]);
11159 mark_reg_graph_node(callee->regs, BPF_REG_2, &field->graph_root);
11160 ref_set_non_owning(env, &callee->regs[BPF_REG_2]);
11161
11162 __mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
11163 __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
11164 __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
11165 callee->in_callback_fn = true;
11166 callee->callback_ret_range = retval_range(0, 1);
11167 return 0;
11168 }
11169
set_task_work_schedule_callback_state(struct bpf_verifier_env * env,struct bpf_func_state * caller,struct bpf_func_state * callee,int insn_idx)11170 static int set_task_work_schedule_callback_state(struct bpf_verifier_env *env,
11171 struct bpf_func_state *caller,
11172 struct bpf_func_state *callee,
11173 int insn_idx)
11174 {
11175 struct bpf_map *map_ptr = caller->regs[BPF_REG_3].map_ptr;
11176
11177 /*
11178 * callback_fn(struct bpf_map *map, void *key, void *value);
11179 */
11180 callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP;
11181 __mark_reg_known_zero(&callee->regs[BPF_REG_1]);
11182 callee->regs[BPF_REG_1].map_ptr = map_ptr;
11183
11184 callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
11185 __mark_reg_known_zero(&callee->regs[BPF_REG_2]);
11186 callee->regs[BPF_REG_2].map_ptr = map_ptr;
11187
11188 callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
11189 __mark_reg_known_zero(&callee->regs[BPF_REG_3]);
11190 callee->regs[BPF_REG_3].map_ptr = map_ptr;
11191
11192 /* unused */
11193 __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
11194 __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
11195 callee->in_async_callback_fn = true;
11196 callee->callback_ret_range = retval_range(S32_MIN, S32_MAX);
11197 return 0;
11198 }
11199
11200 static bool is_rbtree_lock_required_kfunc(u32 btf_id);
11201
11202 /* Are we currently verifying the callback for a rbtree helper that must
11203 * be called with lock held? If so, no need to complain about unreleased
11204 * lock
11205 */
in_rbtree_lock_required_cb(struct bpf_verifier_env * env)11206 static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env)
11207 {
11208 struct bpf_verifier_state *state = env->cur_state;
11209 struct bpf_insn *insn = env->prog->insnsi;
11210 struct bpf_func_state *callee;
11211 int kfunc_btf_id;
11212
11213 if (!state->curframe)
11214 return false;
11215
11216 callee = state->frame[state->curframe];
11217
11218 if (!callee->in_callback_fn)
11219 return false;
11220
11221 kfunc_btf_id = insn[callee->callsite].imm;
11222 return is_rbtree_lock_required_kfunc(kfunc_btf_id);
11223 }
11224
retval_range_within(struct bpf_retval_range range,const struct bpf_reg_state * reg,bool return_32bit)11225 static bool retval_range_within(struct bpf_retval_range range, const struct bpf_reg_state *reg,
11226 bool return_32bit)
11227 {
11228 if (return_32bit)
11229 return range.minval <= reg->s32_min_value && reg->s32_max_value <= range.maxval;
11230 else
11231 return range.minval <= reg->smin_value && reg->smax_value <= range.maxval;
11232 }
11233
prepare_func_exit(struct bpf_verifier_env * env,int * insn_idx)11234 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
11235 {
11236 struct bpf_verifier_state *state = env->cur_state, *prev_st;
11237 struct bpf_func_state *caller, *callee;
11238 struct bpf_reg_state *r0;
11239 bool in_callback_fn;
11240 int err;
11241
11242 err = bpf_update_live_stack(env);
11243 if (err)
11244 return err;
11245
11246 callee = state->frame[state->curframe];
11247 r0 = &callee->regs[BPF_REG_0];
11248 if (r0->type == PTR_TO_STACK) {
11249 /* technically it's ok to return caller's stack pointer
11250 * (or caller's caller's pointer) back to the caller,
11251 * since these pointers are valid. Only current stack
11252 * pointer will be invalid as soon as function exits,
11253 * but let's be conservative
11254 */
11255 verbose(env, "cannot return stack pointer to the caller\n");
11256 return -EINVAL;
11257 }
11258
11259 caller = state->frame[state->curframe - 1];
11260 if (callee->in_callback_fn) {
11261 if (r0->type != SCALAR_VALUE) {
11262 verbose(env, "R0 not a scalar value\n");
11263 return -EACCES;
11264 }
11265
11266 /* we are going to rely on register's precise value */
11267 err = mark_chain_precision(env, BPF_REG_0);
11268 if (err)
11269 return err;
11270
11271 /* enforce R0 return value range, and bpf_callback_t returns 64bit */
11272 if (!retval_range_within(callee->callback_ret_range, r0, false)) {
11273 verbose_invalid_scalar(env, r0, callee->callback_ret_range,
11274 "At callback return", "R0");
11275 return -EINVAL;
11276 }
11277 if (!bpf_calls_callback(env, callee->callsite)) {
11278 verifier_bug(env, "in callback at %d, callsite %d !calls_callback",
11279 *insn_idx, callee->callsite);
11280 return -EFAULT;
11281 }
11282 } else {
11283 /* return to the caller whatever r0 had in the callee */
11284 caller->regs[BPF_REG_0] = *r0;
11285 }
11286
11287 /* for callbacks like bpf_loop or bpf_for_each_map_elem go back to callsite,
11288 * there function call logic would reschedule callback visit. If iteration
11289 * converges is_state_visited() would prune that visit eventually.
11290 */
11291 in_callback_fn = callee->in_callback_fn;
11292 if (in_callback_fn)
11293 *insn_idx = callee->callsite;
11294 else
11295 *insn_idx = callee->callsite + 1;
11296
11297 if (env->log.level & BPF_LOG_LEVEL) {
11298 verbose(env, "returning from callee:\n");
11299 print_verifier_state(env, state, callee->frameno, true);
11300 verbose(env, "to caller at %d:\n", *insn_idx);
11301 print_verifier_state(env, state, caller->frameno, true);
11302 }
11303 /* clear everything in the callee. In case of exceptional exits using
11304 * bpf_throw, this will be done by copy_verifier_state for extra frames. */
11305 free_func_state(callee);
11306 state->frame[state->curframe--] = NULL;
11307
11308 /* for callbacks widen imprecise scalars to make programs like below verify:
11309 *
11310 * struct ctx { int i; }
11311 * void cb(int idx, struct ctx *ctx) { ctx->i++; ... }
11312 * ...
11313 * struct ctx = { .i = 0; }
11314 * bpf_loop(100, cb, &ctx, 0);
11315 *
11316 * This is similar to what is done in process_iter_next_call() for open
11317 * coded iterators.
11318 */
11319 prev_st = in_callback_fn ? find_prev_entry(env, state, *insn_idx) : NULL;
11320 if (prev_st) {
11321 err = widen_imprecise_scalars(env, prev_st, state);
11322 if (err)
11323 return err;
11324 }
11325 return 0;
11326 }
11327
do_refine_retval_range(struct bpf_verifier_env * env,struct bpf_reg_state * regs,int ret_type,int func_id,struct bpf_call_arg_meta * meta)11328 static int do_refine_retval_range(struct bpf_verifier_env *env,
11329 struct bpf_reg_state *regs, int ret_type,
11330 int func_id,
11331 struct bpf_call_arg_meta *meta)
11332 {
11333 struct bpf_reg_state *ret_reg = ®s[BPF_REG_0];
11334
11335 if (ret_type != RET_INTEGER)
11336 return 0;
11337
11338 switch (func_id) {
11339 case BPF_FUNC_get_stack:
11340 case BPF_FUNC_get_task_stack:
11341 case BPF_FUNC_probe_read_str:
11342 case BPF_FUNC_probe_read_kernel_str:
11343 case BPF_FUNC_probe_read_user_str:
11344 ret_reg->smax_value = meta->msize_max_value;
11345 ret_reg->s32_max_value = meta->msize_max_value;
11346 ret_reg->smin_value = -MAX_ERRNO;
11347 ret_reg->s32_min_value = -MAX_ERRNO;
11348 reg_bounds_sync(ret_reg);
11349 break;
11350 case BPF_FUNC_get_smp_processor_id:
11351 ret_reg->umax_value = nr_cpu_ids - 1;
11352 ret_reg->u32_max_value = nr_cpu_ids - 1;
11353 ret_reg->smax_value = nr_cpu_ids - 1;
11354 ret_reg->s32_max_value = nr_cpu_ids - 1;
11355 ret_reg->umin_value = 0;
11356 ret_reg->u32_min_value = 0;
11357 ret_reg->smin_value = 0;
11358 ret_reg->s32_min_value = 0;
11359 reg_bounds_sync(ret_reg);
11360 break;
11361 }
11362
11363 return reg_bounds_sanity_check(env, ret_reg, "retval");
11364 }
11365
11366 static int
record_func_map(struct bpf_verifier_env * env,struct bpf_call_arg_meta * meta,int func_id,int insn_idx)11367 record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
11368 int func_id, int insn_idx)
11369 {
11370 struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
11371 struct bpf_map *map = meta->map.ptr;
11372
11373 if (func_id != BPF_FUNC_tail_call &&
11374 func_id != BPF_FUNC_map_lookup_elem &&
11375 func_id != BPF_FUNC_map_update_elem &&
11376 func_id != BPF_FUNC_map_delete_elem &&
11377 func_id != BPF_FUNC_map_push_elem &&
11378 func_id != BPF_FUNC_map_pop_elem &&
11379 func_id != BPF_FUNC_map_peek_elem &&
11380 func_id != BPF_FUNC_for_each_map_elem &&
11381 func_id != BPF_FUNC_redirect_map &&
11382 func_id != BPF_FUNC_map_lookup_percpu_elem)
11383 return 0;
11384
11385 if (map == NULL) {
11386 verifier_bug(env, "expected map for helper call");
11387 return -EFAULT;
11388 }
11389
11390 /* In case of read-only, some additional restrictions
11391 * need to be applied in order to prevent altering the
11392 * state of the map from program side.
11393 */
11394 if ((map->map_flags & BPF_F_RDONLY_PROG) &&
11395 (func_id == BPF_FUNC_map_delete_elem ||
11396 func_id == BPF_FUNC_map_update_elem ||
11397 func_id == BPF_FUNC_map_push_elem ||
11398 func_id == BPF_FUNC_map_pop_elem)) {
11399 verbose(env, "write into map forbidden\n");
11400 return -EACCES;
11401 }
11402
11403 if (!aux->map_ptr_state.map_ptr)
11404 bpf_map_ptr_store(aux, meta->map.ptr,
11405 !meta->map.ptr->bypass_spec_v1, false);
11406 else if (aux->map_ptr_state.map_ptr != meta->map.ptr)
11407 bpf_map_ptr_store(aux, meta->map.ptr,
11408 !meta->map.ptr->bypass_spec_v1, true);
11409 return 0;
11410 }
11411
11412 static int
record_func_key(struct bpf_verifier_env * env,struct bpf_call_arg_meta * meta,int func_id,int insn_idx)11413 record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
11414 int func_id, int insn_idx)
11415 {
11416 struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
11417 struct bpf_reg_state *reg;
11418 struct bpf_map *map = meta->map.ptr;
11419 u64 val, max;
11420 int err;
11421
11422 if (func_id != BPF_FUNC_tail_call)
11423 return 0;
11424 if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) {
11425 verbose(env, "expected prog array map for tail call");
11426 return -EINVAL;
11427 }
11428
11429 reg = reg_state(env, BPF_REG_3);
11430 val = reg->var_off.value;
11431 max = map->max_entries;
11432
11433 if (!(is_reg_const(reg, false) && val < max)) {
11434 bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
11435 return 0;
11436 }
11437
11438 err = mark_chain_precision(env, BPF_REG_3);
11439 if (err)
11440 return err;
11441 if (bpf_map_key_unseen(aux))
11442 bpf_map_key_store(aux, val);
11443 else if (!bpf_map_key_poisoned(aux) &&
11444 bpf_map_key_immediate(aux) != val)
11445 bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
11446 return 0;
11447 }
11448
check_reference_leak(struct bpf_verifier_env * env,bool exception_exit)11449 static int check_reference_leak(struct bpf_verifier_env *env, bool exception_exit)
11450 {
11451 struct bpf_verifier_state *state = env->cur_state;
11452 enum bpf_prog_type type = resolve_prog_type(env->prog);
11453 struct bpf_reg_state *reg = reg_state(env, BPF_REG_0);
11454 bool refs_lingering = false;
11455 int i;
11456
11457 if (!exception_exit && cur_func(env)->frameno)
11458 return 0;
11459
11460 for (i = 0; i < state->acquired_refs; i++) {
11461 if (state->refs[i].type != REF_TYPE_PTR)
11462 continue;
11463 /* Allow struct_ops programs to return a referenced kptr back to
11464 * kernel. Type checks are performed later in check_return_code.
11465 */
11466 if (type == BPF_PROG_TYPE_STRUCT_OPS && !exception_exit &&
11467 reg->ref_obj_id == state->refs[i].id)
11468 continue;
11469 verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
11470 state->refs[i].id, state->refs[i].insn_idx);
11471 refs_lingering = true;
11472 }
11473 return refs_lingering ? -EINVAL : 0;
11474 }
11475
check_resource_leak(struct bpf_verifier_env * env,bool exception_exit,bool check_lock,const char * prefix)11476 static int check_resource_leak(struct bpf_verifier_env *env, bool exception_exit, bool check_lock, const char *prefix)
11477 {
11478 int err;
11479
11480 if (check_lock && env->cur_state->active_locks) {
11481 verbose(env, "%s cannot be used inside bpf_spin_lock-ed region\n", prefix);
11482 return -EINVAL;
11483 }
11484
11485 err = check_reference_leak(env, exception_exit);
11486 if (err) {
11487 verbose(env, "%s would lead to reference leak\n", prefix);
11488 return err;
11489 }
11490
11491 if (check_lock && env->cur_state->active_irq_id) {
11492 verbose(env, "%s cannot be used inside bpf_local_irq_save-ed region\n", prefix);
11493 return -EINVAL;
11494 }
11495
11496 if (check_lock && env->cur_state->active_rcu_locks) {
11497 verbose(env, "%s cannot be used inside bpf_rcu_read_lock-ed region\n", prefix);
11498 return -EINVAL;
11499 }
11500
11501 if (check_lock && env->cur_state->active_preempt_locks) {
11502 verbose(env, "%s cannot be used inside bpf_preempt_disable-ed region\n", prefix);
11503 return -EINVAL;
11504 }
11505
11506 return 0;
11507 }
11508
check_bpf_snprintf_call(struct bpf_verifier_env * env,struct bpf_reg_state * regs)11509 static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
11510 struct bpf_reg_state *regs)
11511 {
11512 struct bpf_reg_state *fmt_reg = ®s[BPF_REG_3];
11513 struct bpf_reg_state *data_len_reg = ®s[BPF_REG_5];
11514 struct bpf_map *fmt_map = fmt_reg->map_ptr;
11515 struct bpf_bprintf_data data = {};
11516 int err, fmt_map_off, num_args;
11517 u64 fmt_addr;
11518 char *fmt;
11519
11520 /* data must be an array of u64 */
11521 if (data_len_reg->var_off.value % 8)
11522 return -EINVAL;
11523 num_args = data_len_reg->var_off.value / 8;
11524
11525 /* fmt being ARG_PTR_TO_CONST_STR guarantees that var_off is const
11526 * and map_direct_value_addr is set.
11527 */
11528 fmt_map_off = fmt_reg->off + fmt_reg->var_off.value;
11529 err = fmt_map->ops->map_direct_value_addr(fmt_map, &fmt_addr,
11530 fmt_map_off);
11531 if (err) {
11532 verbose(env, "failed to retrieve map value address\n");
11533 return -EFAULT;
11534 }
11535 fmt = (char *)(long)fmt_addr + fmt_map_off;
11536
11537 /* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we
11538 * can focus on validating the format specifiers.
11539 */
11540 err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, num_args, &data);
11541 if (err < 0)
11542 verbose(env, "Invalid format string\n");
11543
11544 return err;
11545 }
11546
check_get_func_ip(struct bpf_verifier_env * env)11547 static int check_get_func_ip(struct bpf_verifier_env *env)
11548 {
11549 enum bpf_prog_type type = resolve_prog_type(env->prog);
11550 int func_id = BPF_FUNC_get_func_ip;
11551
11552 if (type == BPF_PROG_TYPE_TRACING) {
11553 if (!bpf_prog_has_trampoline(env->prog)) {
11554 verbose(env, "func %s#%d supported only for fentry/fexit/fmod_ret programs\n",
11555 func_id_name(func_id), func_id);
11556 return -ENOTSUPP;
11557 }
11558 return 0;
11559 } else if (type == BPF_PROG_TYPE_KPROBE) {
11560 return 0;
11561 }
11562
11563 verbose(env, "func %s#%d not supported for program type %d\n",
11564 func_id_name(func_id), func_id, type);
11565 return -ENOTSUPP;
11566 }
11567
cur_aux(const struct bpf_verifier_env * env)11568 static struct bpf_insn_aux_data *cur_aux(const struct bpf_verifier_env *env)
11569 {
11570 return &env->insn_aux_data[env->insn_idx];
11571 }
11572
loop_flag_is_zero(struct bpf_verifier_env * env)11573 static bool loop_flag_is_zero(struct bpf_verifier_env *env)
11574 {
11575 struct bpf_reg_state *reg = reg_state(env, BPF_REG_4);
11576 bool reg_is_null = register_is_null(reg);
11577
11578 if (reg_is_null)
11579 mark_chain_precision(env, BPF_REG_4);
11580
11581 return reg_is_null;
11582 }
11583
update_loop_inline_state(struct bpf_verifier_env * env,u32 subprogno)11584 static void update_loop_inline_state(struct bpf_verifier_env *env, u32 subprogno)
11585 {
11586 struct bpf_loop_inline_state *state = &cur_aux(env)->loop_inline_state;
11587
11588 if (!state->initialized) {
11589 state->initialized = 1;
11590 state->fit_for_inline = loop_flag_is_zero(env);
11591 state->callback_subprogno = subprogno;
11592 return;
11593 }
11594
11595 if (!state->fit_for_inline)
11596 return;
11597
11598 state->fit_for_inline = (loop_flag_is_zero(env) &&
11599 state->callback_subprogno == subprogno);
11600 }
11601
11602 /* Returns whether or not the given map type can potentially elide
11603 * lookup return value nullness check. This is possible if the key
11604 * is statically known.
11605 */
can_elide_value_nullness(enum bpf_map_type type)11606 static bool can_elide_value_nullness(enum bpf_map_type type)
11607 {
11608 switch (type) {
11609 case BPF_MAP_TYPE_ARRAY:
11610 case BPF_MAP_TYPE_PERCPU_ARRAY:
11611 return true;
11612 default:
11613 return false;
11614 }
11615 }
11616
get_helper_proto(struct bpf_verifier_env * env,int func_id,const struct bpf_func_proto ** ptr)11617 static int get_helper_proto(struct bpf_verifier_env *env, int func_id,
11618 const struct bpf_func_proto **ptr)
11619 {
11620 if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID)
11621 return -ERANGE;
11622
11623 if (!env->ops->get_func_proto)
11624 return -EINVAL;
11625
11626 *ptr = env->ops->get_func_proto(func_id, env->prog);
11627 return *ptr && (*ptr)->func ? 0 : -EINVAL;
11628 }
11629
11630 /* Check if we're in a sleepable context. */
in_sleepable_context(struct bpf_verifier_env * env)11631 static inline bool in_sleepable_context(struct bpf_verifier_env *env)
11632 {
11633 return !env->cur_state->active_rcu_locks &&
11634 !env->cur_state->active_preempt_locks &&
11635 !env->cur_state->active_locks &&
11636 !env->cur_state->active_irq_id &&
11637 in_sleepable(env);
11638 }
11639
check_helper_call(struct bpf_verifier_env * env,struct bpf_insn * insn,int * insn_idx_p)11640 static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
11641 int *insn_idx_p)
11642 {
11643 enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
11644 bool returns_cpu_specific_alloc_ptr = false;
11645 const struct bpf_func_proto *fn = NULL;
11646 enum bpf_return_type ret_type;
11647 enum bpf_type_flag ret_flag;
11648 struct bpf_reg_state *regs;
11649 struct bpf_call_arg_meta meta;
11650 int insn_idx = *insn_idx_p;
11651 bool changes_data;
11652 int i, err, func_id;
11653
11654 /* find function prototype */
11655 func_id = insn->imm;
11656 err = get_helper_proto(env, insn->imm, &fn);
11657 if (err == -ERANGE) {
11658 verbose(env, "invalid func %s#%d\n", func_id_name(func_id), func_id);
11659 return -EINVAL;
11660 }
11661
11662 if (err) {
11663 verbose(env, "program of this type cannot use helper %s#%d\n",
11664 func_id_name(func_id), func_id);
11665 return err;
11666 }
11667
11668 /* eBPF programs must be GPL compatible to use GPL-ed functions */
11669 if (!env->prog->gpl_compatible && fn->gpl_only) {
11670 verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
11671 return -EINVAL;
11672 }
11673
11674 if (fn->allowed && !fn->allowed(env->prog)) {
11675 verbose(env, "helper call is not allowed in probe\n");
11676 return -EINVAL;
11677 }
11678
11679 if (!in_sleepable(env) && fn->might_sleep) {
11680 verbose(env, "helper call might sleep in a non-sleepable prog\n");
11681 return -EINVAL;
11682 }
11683
11684 /* With LD_ABS/IND some JITs save/restore skb from r1. */
11685 changes_data = bpf_helper_changes_pkt_data(func_id);
11686 if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
11687 verifier_bug(env, "func %s#%d: r1 != ctx", func_id_name(func_id), func_id);
11688 return -EFAULT;
11689 }
11690
11691 memset(&meta, 0, sizeof(meta));
11692 meta.pkt_access = fn->pkt_access;
11693
11694 err = check_func_proto(fn);
11695 if (err) {
11696 verifier_bug(env, "incorrect func proto %s#%d", func_id_name(func_id), func_id);
11697 return err;
11698 }
11699
11700 if (env->cur_state->active_rcu_locks) {
11701 if (fn->might_sleep) {
11702 verbose(env, "sleepable helper %s#%d in rcu_read_lock region\n",
11703 func_id_name(func_id), func_id);
11704 return -EINVAL;
11705 }
11706 }
11707
11708 if (env->cur_state->active_preempt_locks) {
11709 if (fn->might_sleep) {
11710 verbose(env, "sleepable helper %s#%d in non-preemptible region\n",
11711 func_id_name(func_id), func_id);
11712 return -EINVAL;
11713 }
11714 }
11715
11716 if (env->cur_state->active_irq_id) {
11717 if (fn->might_sleep) {
11718 verbose(env, "sleepable helper %s#%d in IRQ-disabled region\n",
11719 func_id_name(func_id), func_id);
11720 return -EINVAL;
11721 }
11722 }
11723
11724 /* Track non-sleepable context for helpers. */
11725 if (!in_sleepable_context(env))
11726 env->insn_aux_data[insn_idx].non_sleepable = true;
11727
11728 meta.func_id = func_id;
11729 /* check args */
11730 for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
11731 err = check_func_arg(env, i, &meta, fn, insn_idx);
11732 if (err)
11733 return err;
11734 }
11735
11736 err = record_func_map(env, &meta, func_id, insn_idx);
11737 if (err)
11738 return err;
11739
11740 err = record_func_key(env, &meta, func_id, insn_idx);
11741 if (err)
11742 return err;
11743
11744 /* Mark slots with STACK_MISC in case of raw mode, stack offset
11745 * is inferred from register state.
11746 */
11747 for (i = 0; i < meta.access_size; i++) {
11748 err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
11749 BPF_WRITE, -1, false, false);
11750 if (err)
11751 return err;
11752 }
11753
11754 regs = cur_regs(env);
11755
11756 if (meta.release_regno) {
11757 err = -EINVAL;
11758 if (arg_type_is_dynptr(fn->arg_type[meta.release_regno - BPF_REG_1])) {
11759 err = unmark_stack_slots_dynptr(env, ®s[meta.release_regno]);
11760 } else if (func_id == BPF_FUNC_kptr_xchg && meta.ref_obj_id) {
11761 u32 ref_obj_id = meta.ref_obj_id;
11762 bool in_rcu = in_rcu_cs(env);
11763 struct bpf_func_state *state;
11764 struct bpf_reg_state *reg;
11765
11766 err = release_reference_nomark(env->cur_state, ref_obj_id);
11767 if (!err) {
11768 bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
11769 if (reg->ref_obj_id == ref_obj_id) {
11770 if (in_rcu && (reg->type & MEM_ALLOC) && (reg->type & MEM_PERCPU)) {
11771 reg->ref_obj_id = 0;
11772 reg->type &= ~MEM_ALLOC;
11773 reg->type |= MEM_RCU;
11774 } else {
11775 mark_reg_invalid(env, reg);
11776 }
11777 }
11778 }));
11779 }
11780 } else if (meta.ref_obj_id) {
11781 err = release_reference(env, meta.ref_obj_id);
11782 } else if (register_is_null(®s[meta.release_regno])) {
11783 /* meta.ref_obj_id can only be 0 if register that is meant to be
11784 * released is NULL, which must be > R0.
11785 */
11786 err = 0;
11787 }
11788 if (err) {
11789 verbose(env, "func %s#%d reference has not been acquired before\n",
11790 func_id_name(func_id), func_id);
11791 return err;
11792 }
11793 }
11794
11795 switch (func_id) {
11796 case BPF_FUNC_tail_call:
11797 err = check_resource_leak(env, false, true, "tail_call");
11798 if (err)
11799 return err;
11800 break;
11801 case BPF_FUNC_get_local_storage:
11802 /* check that flags argument in get_local_storage(map, flags) is 0,
11803 * this is required because get_local_storage() can't return an error.
11804 */
11805 if (!register_is_null(®s[BPF_REG_2])) {
11806 verbose(env, "get_local_storage() doesn't support non-zero flags\n");
11807 return -EINVAL;
11808 }
11809 break;
11810 case BPF_FUNC_for_each_map_elem:
11811 err = push_callback_call(env, insn, insn_idx, meta.subprogno,
11812 set_map_elem_callback_state);
11813 break;
11814 case BPF_FUNC_timer_set_callback:
11815 err = push_callback_call(env, insn, insn_idx, meta.subprogno,
11816 set_timer_callback_state);
11817 break;
11818 case BPF_FUNC_find_vma:
11819 err = push_callback_call(env, insn, insn_idx, meta.subprogno,
11820 set_find_vma_callback_state);
11821 break;
11822 case BPF_FUNC_snprintf:
11823 err = check_bpf_snprintf_call(env, regs);
11824 break;
11825 case BPF_FUNC_loop:
11826 update_loop_inline_state(env, meta.subprogno);
11827 /* Verifier relies on R1 value to determine if bpf_loop() iteration
11828 * is finished, thus mark it precise.
11829 */
11830 err = mark_chain_precision(env, BPF_REG_1);
11831 if (err)
11832 return err;
11833 if (cur_func(env)->callback_depth < regs[BPF_REG_1].umax_value) {
11834 err = push_callback_call(env, insn, insn_idx, meta.subprogno,
11835 set_loop_callback_state);
11836 } else {
11837 cur_func(env)->callback_depth = 0;
11838 if (env->log.level & BPF_LOG_LEVEL2)
11839 verbose(env, "frame%d bpf_loop iteration limit reached\n",
11840 env->cur_state->curframe);
11841 }
11842 break;
11843 case BPF_FUNC_dynptr_from_mem:
11844 if (regs[BPF_REG_1].type != PTR_TO_MAP_VALUE) {
11845 verbose(env, "Unsupported reg type %s for bpf_dynptr_from_mem data\n",
11846 reg_type_str(env, regs[BPF_REG_1].type));
11847 return -EACCES;
11848 }
11849 break;
11850 case BPF_FUNC_set_retval:
11851 if (prog_type == BPF_PROG_TYPE_LSM &&
11852 env->prog->expected_attach_type == BPF_LSM_CGROUP) {
11853 if (!env->prog->aux->attach_func_proto->type) {
11854 /* Make sure programs that attach to void
11855 * hooks don't try to modify return value.
11856 */
11857 verbose(env, "BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
11858 return -EINVAL;
11859 }
11860 }
11861 break;
11862 case BPF_FUNC_dynptr_data:
11863 {
11864 struct bpf_reg_state *reg;
11865 int id, ref_obj_id;
11866
11867 reg = get_dynptr_arg_reg(env, fn, regs);
11868 if (!reg)
11869 return -EFAULT;
11870
11871
11872 if (meta.dynptr_id) {
11873 verifier_bug(env, "meta.dynptr_id already set");
11874 return -EFAULT;
11875 }
11876 if (meta.ref_obj_id) {
11877 verifier_bug(env, "meta.ref_obj_id already set");
11878 return -EFAULT;
11879 }
11880
11881 id = dynptr_id(env, reg);
11882 if (id < 0) {
11883 verifier_bug(env, "failed to obtain dynptr id");
11884 return id;
11885 }
11886
11887 ref_obj_id = dynptr_ref_obj_id(env, reg);
11888 if (ref_obj_id < 0) {
11889 verifier_bug(env, "failed to obtain dynptr ref_obj_id");
11890 return ref_obj_id;
11891 }
11892
11893 meta.dynptr_id = id;
11894 meta.ref_obj_id = ref_obj_id;
11895
11896 break;
11897 }
11898 case BPF_FUNC_dynptr_write:
11899 {
11900 enum bpf_dynptr_type dynptr_type;
11901 struct bpf_reg_state *reg;
11902
11903 reg = get_dynptr_arg_reg(env, fn, regs);
11904 if (!reg)
11905 return -EFAULT;
11906
11907 dynptr_type = dynptr_get_type(env, reg);
11908 if (dynptr_type == BPF_DYNPTR_TYPE_INVALID)
11909 return -EFAULT;
11910
11911 if (dynptr_type == BPF_DYNPTR_TYPE_SKB ||
11912 dynptr_type == BPF_DYNPTR_TYPE_SKB_META)
11913 /* this will trigger clear_all_pkt_pointers(), which will
11914 * invalidate all dynptr slices associated with the skb
11915 */
11916 changes_data = true;
11917
11918 break;
11919 }
11920 case BPF_FUNC_per_cpu_ptr:
11921 case BPF_FUNC_this_cpu_ptr:
11922 {
11923 struct bpf_reg_state *reg = ®s[BPF_REG_1];
11924 const struct btf_type *type;
11925
11926 if (reg->type & MEM_RCU) {
11927 type = btf_type_by_id(reg->btf, reg->btf_id);
11928 if (!type || !btf_type_is_struct(type)) {
11929 verbose(env, "Helper has invalid btf/btf_id in R1\n");
11930 return -EFAULT;
11931 }
11932 returns_cpu_specific_alloc_ptr = true;
11933 env->insn_aux_data[insn_idx].call_with_percpu_alloc_ptr = true;
11934 }
11935 break;
11936 }
11937 case BPF_FUNC_user_ringbuf_drain:
11938 err = push_callback_call(env, insn, insn_idx, meta.subprogno,
11939 set_user_ringbuf_callback_state);
11940 break;
11941 }
11942
11943 if (err)
11944 return err;
11945
11946 /* reset caller saved regs */
11947 for (i = 0; i < CALLER_SAVED_REGS; i++) {
11948 mark_reg_not_init(env, regs, caller_saved[i]);
11949 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
11950 }
11951
11952 /* helper call returns 64-bit value. */
11953 regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
11954
11955 /* update return register (already marked as written above) */
11956 ret_type = fn->ret_type;
11957 ret_flag = type_flag(ret_type);
11958
11959 switch (base_type(ret_type)) {
11960 case RET_INTEGER:
11961 /* sets type to SCALAR_VALUE */
11962 mark_reg_unknown(env, regs, BPF_REG_0);
11963 break;
11964 case RET_VOID:
11965 regs[BPF_REG_0].type = NOT_INIT;
11966 break;
11967 case RET_PTR_TO_MAP_VALUE:
11968 /* There is no offset yet applied, variable or fixed */
11969 mark_reg_known_zero(env, regs, BPF_REG_0);
11970 /* remember map_ptr, so that check_map_access()
11971 * can check 'value_size' boundary of memory access
11972 * to map element returned from bpf_map_lookup_elem()
11973 */
11974 if (meta.map.ptr == NULL) {
11975 verifier_bug(env, "unexpected null map_ptr");
11976 return -EFAULT;
11977 }
11978
11979 if (func_id == BPF_FUNC_map_lookup_elem &&
11980 can_elide_value_nullness(meta.map.ptr->map_type) &&
11981 meta.const_map_key >= 0 &&
11982 meta.const_map_key < meta.map.ptr->max_entries)
11983 ret_flag &= ~PTR_MAYBE_NULL;
11984
11985 regs[BPF_REG_0].map_ptr = meta.map.ptr;
11986 regs[BPF_REG_0].map_uid = meta.map.uid;
11987 regs[BPF_REG_0].type = PTR_TO_MAP_VALUE | ret_flag;
11988 if (!type_may_be_null(ret_flag) &&
11989 btf_record_has_field(meta.map.ptr->record, BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK)) {
11990 regs[BPF_REG_0].id = ++env->id_gen;
11991 }
11992 break;
11993 case RET_PTR_TO_SOCKET:
11994 mark_reg_known_zero(env, regs, BPF_REG_0);
11995 regs[BPF_REG_0].type = PTR_TO_SOCKET | ret_flag;
11996 break;
11997 case RET_PTR_TO_SOCK_COMMON:
11998 mark_reg_known_zero(env, regs, BPF_REG_0);
11999 regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON | ret_flag;
12000 break;
12001 case RET_PTR_TO_TCP_SOCK:
12002 mark_reg_known_zero(env, regs, BPF_REG_0);
12003 regs[BPF_REG_0].type = PTR_TO_TCP_SOCK | ret_flag;
12004 break;
12005 case RET_PTR_TO_MEM:
12006 mark_reg_known_zero(env, regs, BPF_REG_0);
12007 regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
12008 regs[BPF_REG_0].mem_size = meta.mem_size;
12009 break;
12010 case RET_PTR_TO_MEM_OR_BTF_ID:
12011 {
12012 const struct btf_type *t;
12013
12014 mark_reg_known_zero(env, regs, BPF_REG_0);
12015 t = btf_type_skip_modifiers(meta.ret_btf, meta.ret_btf_id, NULL);
12016 if (!btf_type_is_struct(t)) {
12017 u32 tsize;
12018 const struct btf_type *ret;
12019 const char *tname;
12020
12021 /* resolve the type size of ksym. */
12022 ret = btf_resolve_size(meta.ret_btf, t, &tsize);
12023 if (IS_ERR(ret)) {
12024 tname = btf_name_by_offset(meta.ret_btf, t->name_off);
12025 verbose(env, "unable to resolve the size of type '%s': %ld\n",
12026 tname, PTR_ERR(ret));
12027 return -EINVAL;
12028 }
12029 regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
12030 regs[BPF_REG_0].mem_size = tsize;
12031 } else {
12032 if (returns_cpu_specific_alloc_ptr) {
12033 regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC | MEM_RCU;
12034 } else {
12035 /* MEM_RDONLY may be carried from ret_flag, but it
12036 * doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise
12037 * it will confuse the check of PTR_TO_BTF_ID in
12038 * check_mem_access().
12039 */
12040 ret_flag &= ~MEM_RDONLY;
12041 regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
12042 }
12043
12044 regs[BPF_REG_0].btf = meta.ret_btf;
12045 regs[BPF_REG_0].btf_id = meta.ret_btf_id;
12046 }
12047 break;
12048 }
12049 case RET_PTR_TO_BTF_ID:
12050 {
12051 struct btf *ret_btf;
12052 int ret_btf_id;
12053
12054 mark_reg_known_zero(env, regs, BPF_REG_0);
12055 regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
12056 if (func_id == BPF_FUNC_kptr_xchg) {
12057 ret_btf = meta.kptr_field->kptr.btf;
12058 ret_btf_id = meta.kptr_field->kptr.btf_id;
12059 if (!btf_is_kernel(ret_btf)) {
12060 regs[BPF_REG_0].type |= MEM_ALLOC;
12061 if (meta.kptr_field->type == BPF_KPTR_PERCPU)
12062 regs[BPF_REG_0].type |= MEM_PERCPU;
12063 }
12064 } else {
12065 if (fn->ret_btf_id == BPF_PTR_POISON) {
12066 verifier_bug(env, "func %s has non-overwritten BPF_PTR_POISON return type",
12067 func_id_name(func_id));
12068 return -EFAULT;
12069 }
12070 ret_btf = btf_vmlinux;
12071 ret_btf_id = *fn->ret_btf_id;
12072 }
12073 if (ret_btf_id == 0) {
12074 verbose(env, "invalid return type %u of func %s#%d\n",
12075 base_type(ret_type), func_id_name(func_id),
12076 func_id);
12077 return -EINVAL;
12078 }
12079 regs[BPF_REG_0].btf = ret_btf;
12080 regs[BPF_REG_0].btf_id = ret_btf_id;
12081 break;
12082 }
12083 default:
12084 verbose(env, "unknown return type %u of func %s#%d\n",
12085 base_type(ret_type), func_id_name(func_id), func_id);
12086 return -EINVAL;
12087 }
12088
12089 if (type_may_be_null(regs[BPF_REG_0].type))
12090 regs[BPF_REG_0].id = ++env->id_gen;
12091
12092 if (helper_multiple_ref_obj_use(func_id, meta.map.ptr)) {
12093 verifier_bug(env, "func %s#%d sets ref_obj_id more than once",
12094 func_id_name(func_id), func_id);
12095 return -EFAULT;
12096 }
12097
12098 if (is_dynptr_ref_function(func_id))
12099 regs[BPF_REG_0].dynptr_id = meta.dynptr_id;
12100
12101 if (is_ptr_cast_function(func_id) || is_dynptr_ref_function(func_id)) {
12102 /* For release_reference() */
12103 regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
12104 } else if (is_acquire_function(func_id, meta.map.ptr)) {
12105 int id = acquire_reference(env, insn_idx);
12106
12107 if (id < 0)
12108 return id;
12109 /* For mark_ptr_or_null_reg() */
12110 regs[BPF_REG_0].id = id;
12111 /* For release_reference() */
12112 regs[BPF_REG_0].ref_obj_id = id;
12113 }
12114
12115 err = do_refine_retval_range(env, regs, fn->ret_type, func_id, &meta);
12116 if (err)
12117 return err;
12118
12119 err = check_map_func_compatibility(env, meta.map.ptr, func_id);
12120 if (err)
12121 return err;
12122
12123 if ((func_id == BPF_FUNC_get_stack ||
12124 func_id == BPF_FUNC_get_task_stack) &&
12125 !env->prog->has_callchain_buf) {
12126 const char *err_str;
12127
12128 #ifdef CONFIG_PERF_EVENTS
12129 err = get_callchain_buffers(sysctl_perf_event_max_stack);
12130 err_str = "cannot get callchain buffer for func %s#%d\n";
12131 #else
12132 err = -ENOTSUPP;
12133 err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
12134 #endif
12135 if (err) {
12136 verbose(env, err_str, func_id_name(func_id), func_id);
12137 return err;
12138 }
12139
12140 env->prog->has_callchain_buf = true;
12141 }
12142
12143 if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack)
12144 env->prog->call_get_stack = true;
12145
12146 if (func_id == BPF_FUNC_get_func_ip) {
12147 if (check_get_func_ip(env))
12148 return -ENOTSUPP;
12149 env->prog->call_get_func_ip = true;
12150 }
12151
12152 if (func_id == BPF_FUNC_tail_call) {
12153 if (env->cur_state->curframe) {
12154 struct bpf_verifier_state *branch;
12155
12156 mark_reg_scratched(env, BPF_REG_0);
12157 branch = push_stack(env, env->insn_idx + 1, env->insn_idx, false);
12158 if (IS_ERR(branch))
12159 return PTR_ERR(branch);
12160 clear_all_pkt_pointers(env);
12161 mark_reg_unknown(env, regs, BPF_REG_0);
12162 err = prepare_func_exit(env, &env->insn_idx);
12163 if (err)
12164 return err;
12165 env->insn_idx--;
12166 } else {
12167 changes_data = false;
12168 }
12169 }
12170
12171 if (changes_data)
12172 clear_all_pkt_pointers(env);
12173 return 0;
12174 }
12175
12176 /* mark_btf_func_reg_size() is used when the reg size is determined by
12177 * the BTF func_proto's return value size and argument.
12178 */
__mark_btf_func_reg_size(struct bpf_verifier_env * env,struct bpf_reg_state * regs,u32 regno,size_t reg_size)12179 static void __mark_btf_func_reg_size(struct bpf_verifier_env *env, struct bpf_reg_state *regs,
12180 u32 regno, size_t reg_size)
12181 {
12182 struct bpf_reg_state *reg = ®s[regno];
12183
12184 if (regno == BPF_REG_0) {
12185 /* Function return value */
12186 reg->subreg_def = reg_size == sizeof(u64) ?
12187 DEF_NOT_SUBREG : env->insn_idx + 1;
12188 } else if (reg_size == sizeof(u64)) {
12189 /* Function argument */
12190 mark_insn_zext(env, reg);
12191 }
12192 }
12193
mark_btf_func_reg_size(struct bpf_verifier_env * env,u32 regno,size_t reg_size)12194 static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno,
12195 size_t reg_size)
12196 {
12197 return __mark_btf_func_reg_size(env, cur_regs(env), regno, reg_size);
12198 }
12199
is_kfunc_acquire(struct bpf_kfunc_call_arg_meta * meta)12200 static bool is_kfunc_acquire(struct bpf_kfunc_call_arg_meta *meta)
12201 {
12202 return meta->kfunc_flags & KF_ACQUIRE;
12203 }
12204
is_kfunc_release(struct bpf_kfunc_call_arg_meta * meta)12205 static bool is_kfunc_release(struct bpf_kfunc_call_arg_meta *meta)
12206 {
12207 return meta->kfunc_flags & KF_RELEASE;
12208 }
12209
is_kfunc_sleepable(struct bpf_kfunc_call_arg_meta * meta)12210 static bool is_kfunc_sleepable(struct bpf_kfunc_call_arg_meta *meta)
12211 {
12212 return meta->kfunc_flags & KF_SLEEPABLE;
12213 }
12214
is_kfunc_destructive(struct bpf_kfunc_call_arg_meta * meta)12215 static bool is_kfunc_destructive(struct bpf_kfunc_call_arg_meta *meta)
12216 {
12217 return meta->kfunc_flags & KF_DESTRUCTIVE;
12218 }
12219
is_kfunc_rcu(struct bpf_kfunc_call_arg_meta * meta)12220 static bool is_kfunc_rcu(struct bpf_kfunc_call_arg_meta *meta)
12221 {
12222 return meta->kfunc_flags & KF_RCU;
12223 }
12224
is_kfunc_rcu_protected(struct bpf_kfunc_call_arg_meta * meta)12225 static bool is_kfunc_rcu_protected(struct bpf_kfunc_call_arg_meta *meta)
12226 {
12227 return meta->kfunc_flags & KF_RCU_PROTECTED;
12228 }
12229
is_kfunc_arg_mem_size(const struct btf * btf,const struct btf_param * arg,const struct bpf_reg_state * reg)12230 static bool is_kfunc_arg_mem_size(const struct btf *btf,
12231 const struct btf_param *arg,
12232 const struct bpf_reg_state *reg)
12233 {
12234 const struct btf_type *t;
12235
12236 t = btf_type_skip_modifiers(btf, arg->type, NULL);
12237 if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
12238 return false;
12239
12240 return btf_param_match_suffix(btf, arg, "__sz");
12241 }
12242
is_kfunc_arg_const_mem_size(const struct btf * btf,const struct btf_param * arg,const struct bpf_reg_state * reg)12243 static bool is_kfunc_arg_const_mem_size(const struct btf *btf,
12244 const struct btf_param *arg,
12245 const struct bpf_reg_state *reg)
12246 {
12247 const struct btf_type *t;
12248
12249 t = btf_type_skip_modifiers(btf, arg->type, NULL);
12250 if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
12251 return false;
12252
12253 return btf_param_match_suffix(btf, arg, "__szk");
12254 }
12255
is_kfunc_arg_constant(const struct btf * btf,const struct btf_param * arg)12256 static bool is_kfunc_arg_constant(const struct btf *btf, const struct btf_param *arg)
12257 {
12258 return btf_param_match_suffix(btf, arg, "__k");
12259 }
12260
is_kfunc_arg_ignore(const struct btf * btf,const struct btf_param * arg)12261 static bool is_kfunc_arg_ignore(const struct btf *btf, const struct btf_param *arg)
12262 {
12263 return btf_param_match_suffix(btf, arg, "__ign");
12264 }
12265
is_kfunc_arg_map(const struct btf * btf,const struct btf_param * arg)12266 static bool is_kfunc_arg_map(const struct btf *btf, const struct btf_param *arg)
12267 {
12268 return btf_param_match_suffix(btf, arg, "__map");
12269 }
12270
is_kfunc_arg_alloc_obj(const struct btf * btf,const struct btf_param * arg)12271 static bool is_kfunc_arg_alloc_obj(const struct btf *btf, const struct btf_param *arg)
12272 {
12273 return btf_param_match_suffix(btf, arg, "__alloc");
12274 }
12275
is_kfunc_arg_uninit(const struct btf * btf,const struct btf_param * arg)12276 static bool is_kfunc_arg_uninit(const struct btf *btf, const struct btf_param *arg)
12277 {
12278 return btf_param_match_suffix(btf, arg, "__uninit");
12279 }
12280
is_kfunc_arg_refcounted_kptr(const struct btf * btf,const struct btf_param * arg)12281 static bool is_kfunc_arg_refcounted_kptr(const struct btf *btf, const struct btf_param *arg)
12282 {
12283 return btf_param_match_suffix(btf, arg, "__refcounted_kptr");
12284 }
12285
is_kfunc_arg_nullable(const struct btf * btf,const struct btf_param * arg)12286 static bool is_kfunc_arg_nullable(const struct btf *btf, const struct btf_param *arg)
12287 {
12288 return btf_param_match_suffix(btf, arg, "__nullable");
12289 }
12290
is_kfunc_arg_const_str(const struct btf * btf,const struct btf_param * arg)12291 static bool is_kfunc_arg_const_str(const struct btf *btf, const struct btf_param *arg)
12292 {
12293 return btf_param_match_suffix(btf, arg, "__str");
12294 }
12295
is_kfunc_arg_irq_flag(const struct btf * btf,const struct btf_param * arg)12296 static bool is_kfunc_arg_irq_flag(const struct btf *btf, const struct btf_param *arg)
12297 {
12298 return btf_param_match_suffix(btf, arg, "__irq_flag");
12299 }
12300
is_kfunc_arg_scalar_with_name(const struct btf * btf,const struct btf_param * arg,const char * name)12301 static bool is_kfunc_arg_scalar_with_name(const struct btf *btf,
12302 const struct btf_param *arg,
12303 const char *name)
12304 {
12305 int len, target_len = strlen(name);
12306 const char *param_name;
12307
12308 param_name = btf_name_by_offset(btf, arg->name_off);
12309 if (str_is_empty(param_name))
12310 return false;
12311 len = strlen(param_name);
12312 if (len != target_len)
12313 return false;
12314 if (strcmp(param_name, name))
12315 return false;
12316
12317 return true;
12318 }
12319
12320 enum {
12321 KF_ARG_DYNPTR_ID,
12322 KF_ARG_LIST_HEAD_ID,
12323 KF_ARG_LIST_NODE_ID,
12324 KF_ARG_RB_ROOT_ID,
12325 KF_ARG_RB_NODE_ID,
12326 KF_ARG_WORKQUEUE_ID,
12327 KF_ARG_RES_SPIN_LOCK_ID,
12328 KF_ARG_TASK_WORK_ID,
12329 KF_ARG_PROG_AUX_ID,
12330 KF_ARG_TIMER_ID
12331 };
12332
12333 BTF_ID_LIST(kf_arg_btf_ids)
BTF_ID(struct,bpf_dynptr)12334 BTF_ID(struct, bpf_dynptr)
12335 BTF_ID(struct, bpf_list_head)
12336 BTF_ID(struct, bpf_list_node)
12337 BTF_ID(struct, bpf_rb_root)
12338 BTF_ID(struct, bpf_rb_node)
12339 BTF_ID(struct, bpf_wq)
12340 BTF_ID(struct, bpf_res_spin_lock)
12341 BTF_ID(struct, bpf_task_work)
12342 BTF_ID(struct, bpf_prog_aux)
12343 BTF_ID(struct, bpf_timer)
12344
12345 static bool __is_kfunc_ptr_arg_type(const struct btf *btf,
12346 const struct btf_param *arg, int type)
12347 {
12348 const struct btf_type *t;
12349 u32 res_id;
12350
12351 t = btf_type_skip_modifiers(btf, arg->type, NULL);
12352 if (!t)
12353 return false;
12354 if (!btf_type_is_ptr(t))
12355 return false;
12356 t = btf_type_skip_modifiers(btf, t->type, &res_id);
12357 if (!t)
12358 return false;
12359 return btf_types_are_same(btf, res_id, btf_vmlinux, kf_arg_btf_ids[type]);
12360 }
12361
is_kfunc_arg_dynptr(const struct btf * btf,const struct btf_param * arg)12362 static bool is_kfunc_arg_dynptr(const struct btf *btf, const struct btf_param *arg)
12363 {
12364 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_DYNPTR_ID);
12365 }
12366
is_kfunc_arg_list_head(const struct btf * btf,const struct btf_param * arg)12367 static bool is_kfunc_arg_list_head(const struct btf *btf, const struct btf_param *arg)
12368 {
12369 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_HEAD_ID);
12370 }
12371
is_kfunc_arg_list_node(const struct btf * btf,const struct btf_param * arg)12372 static bool is_kfunc_arg_list_node(const struct btf *btf, const struct btf_param *arg)
12373 {
12374 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_NODE_ID);
12375 }
12376
is_kfunc_arg_rbtree_root(const struct btf * btf,const struct btf_param * arg)12377 static bool is_kfunc_arg_rbtree_root(const struct btf *btf, const struct btf_param *arg)
12378 {
12379 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_ROOT_ID);
12380 }
12381
is_kfunc_arg_rbtree_node(const struct btf * btf,const struct btf_param * arg)12382 static bool is_kfunc_arg_rbtree_node(const struct btf *btf, const struct btf_param *arg)
12383 {
12384 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_NODE_ID);
12385 }
12386
is_kfunc_arg_timer(const struct btf * btf,const struct btf_param * arg)12387 static bool is_kfunc_arg_timer(const struct btf *btf, const struct btf_param *arg)
12388 {
12389 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_TIMER_ID);
12390 }
12391
is_kfunc_arg_wq(const struct btf * btf,const struct btf_param * arg)12392 static bool is_kfunc_arg_wq(const struct btf *btf, const struct btf_param *arg)
12393 {
12394 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_WORKQUEUE_ID);
12395 }
12396
is_kfunc_arg_task_work(const struct btf * btf,const struct btf_param * arg)12397 static bool is_kfunc_arg_task_work(const struct btf *btf, const struct btf_param *arg)
12398 {
12399 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_TASK_WORK_ID);
12400 }
12401
is_kfunc_arg_res_spin_lock(const struct btf * btf,const struct btf_param * arg)12402 static bool is_kfunc_arg_res_spin_lock(const struct btf *btf, const struct btf_param *arg)
12403 {
12404 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RES_SPIN_LOCK_ID);
12405 }
12406
is_rbtree_node_type(const struct btf_type * t)12407 static bool is_rbtree_node_type(const struct btf_type *t)
12408 {
12409 return t == btf_type_by_id(btf_vmlinux, kf_arg_btf_ids[KF_ARG_RB_NODE_ID]);
12410 }
12411
is_list_node_type(const struct btf_type * t)12412 static bool is_list_node_type(const struct btf_type *t)
12413 {
12414 return t == btf_type_by_id(btf_vmlinux, kf_arg_btf_ids[KF_ARG_LIST_NODE_ID]);
12415 }
12416
is_kfunc_arg_callback(struct bpf_verifier_env * env,const struct btf * btf,const struct btf_param * arg)12417 static bool is_kfunc_arg_callback(struct bpf_verifier_env *env, const struct btf *btf,
12418 const struct btf_param *arg)
12419 {
12420 const struct btf_type *t;
12421
12422 t = btf_type_resolve_func_ptr(btf, arg->type, NULL);
12423 if (!t)
12424 return false;
12425
12426 return true;
12427 }
12428
is_kfunc_arg_prog_aux(const struct btf * btf,const struct btf_param * arg)12429 static bool is_kfunc_arg_prog_aux(const struct btf *btf, const struct btf_param *arg)
12430 {
12431 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_PROG_AUX_ID);
12432 }
12433
12434 /* Returns true if struct is composed of scalars, 4 levels of nesting allowed */
__btf_type_is_scalar_struct(struct bpf_verifier_env * env,const struct btf * btf,const struct btf_type * t,int rec)12435 static bool __btf_type_is_scalar_struct(struct bpf_verifier_env *env,
12436 const struct btf *btf,
12437 const struct btf_type *t, int rec)
12438 {
12439 const struct btf_type *member_type;
12440 const struct btf_member *member;
12441 u32 i;
12442
12443 if (!btf_type_is_struct(t))
12444 return false;
12445
12446 for_each_member(i, t, member) {
12447 const struct btf_array *array;
12448
12449 member_type = btf_type_skip_modifiers(btf, member->type, NULL);
12450 if (btf_type_is_struct(member_type)) {
12451 if (rec >= 3) {
12452 verbose(env, "max struct nesting depth exceeded\n");
12453 return false;
12454 }
12455 if (!__btf_type_is_scalar_struct(env, btf, member_type, rec + 1))
12456 return false;
12457 continue;
12458 }
12459 if (btf_type_is_array(member_type)) {
12460 array = btf_array(member_type);
12461 if (!array->nelems)
12462 return false;
12463 member_type = btf_type_skip_modifiers(btf, array->type, NULL);
12464 if (!btf_type_is_scalar(member_type))
12465 return false;
12466 continue;
12467 }
12468 if (!btf_type_is_scalar(member_type))
12469 return false;
12470 }
12471 return true;
12472 }
12473
12474 enum kfunc_ptr_arg_type {
12475 KF_ARG_PTR_TO_CTX,
12476 KF_ARG_PTR_TO_ALLOC_BTF_ID, /* Allocated object */
12477 KF_ARG_PTR_TO_REFCOUNTED_KPTR, /* Refcounted local kptr */
12478 KF_ARG_PTR_TO_DYNPTR,
12479 KF_ARG_PTR_TO_ITER,
12480 KF_ARG_PTR_TO_LIST_HEAD,
12481 KF_ARG_PTR_TO_LIST_NODE,
12482 KF_ARG_PTR_TO_BTF_ID, /* Also covers reg2btf_ids conversions */
12483 KF_ARG_PTR_TO_MEM,
12484 KF_ARG_PTR_TO_MEM_SIZE, /* Size derived from next argument, skip it */
12485 KF_ARG_PTR_TO_CALLBACK,
12486 KF_ARG_PTR_TO_RB_ROOT,
12487 KF_ARG_PTR_TO_RB_NODE,
12488 KF_ARG_PTR_TO_NULL,
12489 KF_ARG_PTR_TO_CONST_STR,
12490 KF_ARG_PTR_TO_MAP,
12491 KF_ARG_PTR_TO_TIMER,
12492 KF_ARG_PTR_TO_WORKQUEUE,
12493 KF_ARG_PTR_TO_IRQ_FLAG,
12494 KF_ARG_PTR_TO_RES_SPIN_LOCK,
12495 KF_ARG_PTR_TO_TASK_WORK,
12496 };
12497
12498 enum special_kfunc_type {
12499 KF_bpf_obj_new_impl,
12500 KF_bpf_obj_drop_impl,
12501 KF_bpf_refcount_acquire_impl,
12502 KF_bpf_list_push_front_impl,
12503 KF_bpf_list_push_back_impl,
12504 KF_bpf_list_pop_front,
12505 KF_bpf_list_pop_back,
12506 KF_bpf_list_front,
12507 KF_bpf_list_back,
12508 KF_bpf_cast_to_kern_ctx,
12509 KF_bpf_rdonly_cast,
12510 KF_bpf_rcu_read_lock,
12511 KF_bpf_rcu_read_unlock,
12512 KF_bpf_rbtree_remove,
12513 KF_bpf_rbtree_add_impl,
12514 KF_bpf_rbtree_first,
12515 KF_bpf_rbtree_root,
12516 KF_bpf_rbtree_left,
12517 KF_bpf_rbtree_right,
12518 KF_bpf_dynptr_from_skb,
12519 KF_bpf_dynptr_from_xdp,
12520 KF_bpf_dynptr_from_skb_meta,
12521 KF_bpf_xdp_pull_data,
12522 KF_bpf_dynptr_slice,
12523 KF_bpf_dynptr_slice_rdwr,
12524 KF_bpf_dynptr_clone,
12525 KF_bpf_percpu_obj_new_impl,
12526 KF_bpf_percpu_obj_drop_impl,
12527 KF_bpf_throw,
12528 KF_bpf_wq_set_callback,
12529 KF_bpf_preempt_disable,
12530 KF_bpf_preempt_enable,
12531 KF_bpf_iter_css_task_new,
12532 KF_bpf_session_cookie,
12533 KF_bpf_get_kmem_cache,
12534 KF_bpf_local_irq_save,
12535 KF_bpf_local_irq_restore,
12536 KF_bpf_iter_num_new,
12537 KF_bpf_iter_num_next,
12538 KF_bpf_iter_num_destroy,
12539 KF_bpf_set_dentry_xattr,
12540 KF_bpf_remove_dentry_xattr,
12541 KF_bpf_res_spin_lock,
12542 KF_bpf_res_spin_unlock,
12543 KF_bpf_res_spin_lock_irqsave,
12544 KF_bpf_res_spin_unlock_irqrestore,
12545 KF_bpf_dynptr_from_file,
12546 KF_bpf_dynptr_file_discard,
12547 KF___bpf_trap,
12548 KF_bpf_task_work_schedule_signal,
12549 KF_bpf_task_work_schedule_resume,
12550 KF_bpf_arena_alloc_pages,
12551 KF_bpf_arena_free_pages,
12552 KF_bpf_arena_reserve_pages,
12553 KF_bpf_session_is_return,
12554 KF_bpf_stream_vprintk,
12555 KF_bpf_stream_print_stack,
12556 };
12557
12558 BTF_ID_LIST(special_kfunc_list)
BTF_ID(func,bpf_obj_new_impl)12559 BTF_ID(func, bpf_obj_new_impl)
12560 BTF_ID(func, bpf_obj_drop_impl)
12561 BTF_ID(func, bpf_refcount_acquire_impl)
12562 BTF_ID(func, bpf_list_push_front_impl)
12563 BTF_ID(func, bpf_list_push_back_impl)
12564 BTF_ID(func, bpf_list_pop_front)
12565 BTF_ID(func, bpf_list_pop_back)
12566 BTF_ID(func, bpf_list_front)
12567 BTF_ID(func, bpf_list_back)
12568 BTF_ID(func, bpf_cast_to_kern_ctx)
12569 BTF_ID(func, bpf_rdonly_cast)
12570 BTF_ID(func, bpf_rcu_read_lock)
12571 BTF_ID(func, bpf_rcu_read_unlock)
12572 BTF_ID(func, bpf_rbtree_remove)
12573 BTF_ID(func, bpf_rbtree_add_impl)
12574 BTF_ID(func, bpf_rbtree_first)
12575 BTF_ID(func, bpf_rbtree_root)
12576 BTF_ID(func, bpf_rbtree_left)
12577 BTF_ID(func, bpf_rbtree_right)
12578 #ifdef CONFIG_NET
12579 BTF_ID(func, bpf_dynptr_from_skb)
12580 BTF_ID(func, bpf_dynptr_from_xdp)
12581 BTF_ID(func, bpf_dynptr_from_skb_meta)
12582 BTF_ID(func, bpf_xdp_pull_data)
12583 #else
12584 BTF_ID_UNUSED
12585 BTF_ID_UNUSED
12586 BTF_ID_UNUSED
12587 BTF_ID_UNUSED
12588 #endif
12589 BTF_ID(func, bpf_dynptr_slice)
12590 BTF_ID(func, bpf_dynptr_slice_rdwr)
12591 BTF_ID(func, bpf_dynptr_clone)
12592 BTF_ID(func, bpf_percpu_obj_new_impl)
12593 BTF_ID(func, bpf_percpu_obj_drop_impl)
12594 BTF_ID(func, bpf_throw)
12595 BTF_ID(func, bpf_wq_set_callback)
12596 BTF_ID(func, bpf_preempt_disable)
12597 BTF_ID(func, bpf_preempt_enable)
12598 #ifdef CONFIG_CGROUPS
12599 BTF_ID(func, bpf_iter_css_task_new)
12600 #else
12601 BTF_ID_UNUSED
12602 #endif
12603 #ifdef CONFIG_BPF_EVENTS
12604 BTF_ID(func, bpf_session_cookie)
12605 #else
12606 BTF_ID_UNUSED
12607 #endif
12608 BTF_ID(func, bpf_get_kmem_cache)
12609 BTF_ID(func, bpf_local_irq_save)
12610 BTF_ID(func, bpf_local_irq_restore)
12611 BTF_ID(func, bpf_iter_num_new)
12612 BTF_ID(func, bpf_iter_num_next)
12613 BTF_ID(func, bpf_iter_num_destroy)
12614 #ifdef CONFIG_BPF_LSM
12615 BTF_ID(func, bpf_set_dentry_xattr)
12616 BTF_ID(func, bpf_remove_dentry_xattr)
12617 #else
12618 BTF_ID_UNUSED
12619 BTF_ID_UNUSED
12620 #endif
12621 BTF_ID(func, bpf_res_spin_lock)
12622 BTF_ID(func, bpf_res_spin_unlock)
12623 BTF_ID(func, bpf_res_spin_lock_irqsave)
12624 BTF_ID(func, bpf_res_spin_unlock_irqrestore)
12625 BTF_ID(func, bpf_dynptr_from_file)
12626 BTF_ID(func, bpf_dynptr_file_discard)
12627 BTF_ID(func, __bpf_trap)
12628 BTF_ID(func, bpf_task_work_schedule_signal)
12629 BTF_ID(func, bpf_task_work_schedule_resume)
12630 BTF_ID(func, bpf_arena_alloc_pages)
12631 BTF_ID(func, bpf_arena_free_pages)
12632 BTF_ID(func, bpf_arena_reserve_pages)
12633 BTF_ID(func, bpf_session_is_return)
12634 BTF_ID(func, bpf_stream_vprintk)
12635 BTF_ID(func, bpf_stream_print_stack)
12636
12637 static bool is_task_work_add_kfunc(u32 func_id)
12638 {
12639 return func_id == special_kfunc_list[KF_bpf_task_work_schedule_signal] ||
12640 func_id == special_kfunc_list[KF_bpf_task_work_schedule_resume];
12641 }
12642
is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta * meta)12643 static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
12644 {
12645 if (meta->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl] &&
12646 meta->arg_owning_ref) {
12647 return false;
12648 }
12649
12650 return meta->kfunc_flags & KF_RET_NULL;
12651 }
12652
is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta * meta)12653 static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta)
12654 {
12655 return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_lock];
12656 }
12657
is_kfunc_bpf_rcu_read_unlock(struct bpf_kfunc_call_arg_meta * meta)12658 static bool is_kfunc_bpf_rcu_read_unlock(struct bpf_kfunc_call_arg_meta *meta)
12659 {
12660 return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_unlock];
12661 }
12662
is_kfunc_bpf_preempt_disable(struct bpf_kfunc_call_arg_meta * meta)12663 static bool is_kfunc_bpf_preempt_disable(struct bpf_kfunc_call_arg_meta *meta)
12664 {
12665 return meta->func_id == special_kfunc_list[KF_bpf_preempt_disable];
12666 }
12667
is_kfunc_bpf_preempt_enable(struct bpf_kfunc_call_arg_meta * meta)12668 static bool is_kfunc_bpf_preempt_enable(struct bpf_kfunc_call_arg_meta *meta)
12669 {
12670 return meta->func_id == special_kfunc_list[KF_bpf_preempt_enable];
12671 }
12672
is_kfunc_pkt_changing(struct bpf_kfunc_call_arg_meta * meta)12673 static bool is_kfunc_pkt_changing(struct bpf_kfunc_call_arg_meta *meta)
12674 {
12675 return meta->func_id == special_kfunc_list[KF_bpf_xdp_pull_data];
12676 }
12677
12678 static enum kfunc_ptr_arg_type
get_kfunc_ptr_arg_type(struct bpf_verifier_env * env,struct bpf_kfunc_call_arg_meta * meta,const struct btf_type * t,const struct btf_type * ref_t,const char * ref_tname,const struct btf_param * args,int argno,int nargs)12679 get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
12680 struct bpf_kfunc_call_arg_meta *meta,
12681 const struct btf_type *t, const struct btf_type *ref_t,
12682 const char *ref_tname, const struct btf_param *args,
12683 int argno, int nargs)
12684 {
12685 u32 regno = argno + 1;
12686 struct bpf_reg_state *regs = cur_regs(env);
12687 struct bpf_reg_state *reg = ®s[regno];
12688 bool arg_mem_size = false;
12689
12690 if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] ||
12691 meta->func_id == special_kfunc_list[KF_bpf_session_is_return] ||
12692 meta->func_id == special_kfunc_list[KF_bpf_session_cookie])
12693 return KF_ARG_PTR_TO_CTX;
12694
12695 if (argno + 1 < nargs &&
12696 (is_kfunc_arg_mem_size(meta->btf, &args[argno + 1], ®s[regno + 1]) ||
12697 is_kfunc_arg_const_mem_size(meta->btf, &args[argno + 1], ®s[regno + 1])))
12698 arg_mem_size = true;
12699
12700 /* In this function, we verify the kfunc's BTF as per the argument type,
12701 * leaving the rest of the verification with respect to the register
12702 * type to our caller. When a set of conditions hold in the BTF type of
12703 * arguments, we resolve it to a known kfunc_ptr_arg_type.
12704 */
12705 if (btf_is_prog_ctx_type(&env->log, meta->btf, t, resolve_prog_type(env->prog), argno))
12706 return KF_ARG_PTR_TO_CTX;
12707
12708 if (is_kfunc_arg_nullable(meta->btf, &args[argno]) && register_is_null(reg) &&
12709 !arg_mem_size)
12710 return KF_ARG_PTR_TO_NULL;
12711
12712 if (is_kfunc_arg_alloc_obj(meta->btf, &args[argno]))
12713 return KF_ARG_PTR_TO_ALLOC_BTF_ID;
12714
12715 if (is_kfunc_arg_refcounted_kptr(meta->btf, &args[argno]))
12716 return KF_ARG_PTR_TO_REFCOUNTED_KPTR;
12717
12718 if (is_kfunc_arg_dynptr(meta->btf, &args[argno]))
12719 return KF_ARG_PTR_TO_DYNPTR;
12720
12721 if (is_kfunc_arg_iter(meta, argno, &args[argno]))
12722 return KF_ARG_PTR_TO_ITER;
12723
12724 if (is_kfunc_arg_list_head(meta->btf, &args[argno]))
12725 return KF_ARG_PTR_TO_LIST_HEAD;
12726
12727 if (is_kfunc_arg_list_node(meta->btf, &args[argno]))
12728 return KF_ARG_PTR_TO_LIST_NODE;
12729
12730 if (is_kfunc_arg_rbtree_root(meta->btf, &args[argno]))
12731 return KF_ARG_PTR_TO_RB_ROOT;
12732
12733 if (is_kfunc_arg_rbtree_node(meta->btf, &args[argno]))
12734 return KF_ARG_PTR_TO_RB_NODE;
12735
12736 if (is_kfunc_arg_const_str(meta->btf, &args[argno]))
12737 return KF_ARG_PTR_TO_CONST_STR;
12738
12739 if (is_kfunc_arg_map(meta->btf, &args[argno]))
12740 return KF_ARG_PTR_TO_MAP;
12741
12742 if (is_kfunc_arg_wq(meta->btf, &args[argno]))
12743 return KF_ARG_PTR_TO_WORKQUEUE;
12744
12745 if (is_kfunc_arg_timer(meta->btf, &args[argno]))
12746 return KF_ARG_PTR_TO_TIMER;
12747
12748 if (is_kfunc_arg_task_work(meta->btf, &args[argno]))
12749 return KF_ARG_PTR_TO_TASK_WORK;
12750
12751 if (is_kfunc_arg_irq_flag(meta->btf, &args[argno]))
12752 return KF_ARG_PTR_TO_IRQ_FLAG;
12753
12754 if (is_kfunc_arg_res_spin_lock(meta->btf, &args[argno]))
12755 return KF_ARG_PTR_TO_RES_SPIN_LOCK;
12756
12757 if ((base_type(reg->type) == PTR_TO_BTF_ID || reg2btf_ids[base_type(reg->type)])) {
12758 if (!btf_type_is_struct(ref_t)) {
12759 verbose(env, "kernel function %s args#%d pointer type %s %s is not supported\n",
12760 meta->func_name, argno, btf_type_str(ref_t), ref_tname);
12761 return -EINVAL;
12762 }
12763 return KF_ARG_PTR_TO_BTF_ID;
12764 }
12765
12766 if (is_kfunc_arg_callback(env, meta->btf, &args[argno]))
12767 return KF_ARG_PTR_TO_CALLBACK;
12768
12769 /* This is the catch all argument type of register types supported by
12770 * check_helper_mem_access. However, we only allow when argument type is
12771 * pointer to scalar, or struct composed (recursively) of scalars. When
12772 * arg_mem_size is true, the pointer can be void *.
12773 */
12774 if (!btf_type_is_scalar(ref_t) && !__btf_type_is_scalar_struct(env, meta->btf, ref_t, 0) &&
12775 (arg_mem_size ? !btf_type_is_void(ref_t) : 1)) {
12776 verbose(env, "arg#%d pointer type %s %s must point to %sscalar, or struct with scalar\n",
12777 argno, btf_type_str(ref_t), ref_tname, arg_mem_size ? "void, " : "");
12778 return -EINVAL;
12779 }
12780 return arg_mem_size ? KF_ARG_PTR_TO_MEM_SIZE : KF_ARG_PTR_TO_MEM;
12781 }
12782
process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env * env,struct bpf_reg_state * reg,const struct btf_type * ref_t,const char * ref_tname,u32 ref_id,struct bpf_kfunc_call_arg_meta * meta,int argno)12783 static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env,
12784 struct bpf_reg_state *reg,
12785 const struct btf_type *ref_t,
12786 const char *ref_tname, u32 ref_id,
12787 struct bpf_kfunc_call_arg_meta *meta,
12788 int argno)
12789 {
12790 const struct btf_type *reg_ref_t;
12791 bool strict_type_match = false;
12792 const struct btf *reg_btf;
12793 const char *reg_ref_tname;
12794 bool taking_projection;
12795 bool struct_same;
12796 u32 reg_ref_id;
12797
12798 if (base_type(reg->type) == PTR_TO_BTF_ID) {
12799 reg_btf = reg->btf;
12800 reg_ref_id = reg->btf_id;
12801 } else {
12802 reg_btf = btf_vmlinux;
12803 reg_ref_id = *reg2btf_ids[base_type(reg->type)];
12804 }
12805
12806 /* Enforce strict type matching for calls to kfuncs that are acquiring
12807 * or releasing a reference, or are no-cast aliases. We do _not_
12808 * enforce strict matching for kfuncs by default,
12809 * as we want to enable BPF programs to pass types that are bitwise
12810 * equivalent without forcing them to explicitly cast with something
12811 * like bpf_cast_to_kern_ctx().
12812 *
12813 * For example, say we had a type like the following:
12814 *
12815 * struct bpf_cpumask {
12816 * cpumask_t cpumask;
12817 * refcount_t usage;
12818 * };
12819 *
12820 * Note that as specified in <linux/cpumask.h>, cpumask_t is typedef'ed
12821 * to a struct cpumask, so it would be safe to pass a struct
12822 * bpf_cpumask * to a kfunc expecting a struct cpumask *.
12823 *
12824 * The philosophy here is similar to how we allow scalars of different
12825 * types to be passed to kfuncs as long as the size is the same. The
12826 * only difference here is that we're simply allowing
12827 * btf_struct_ids_match() to walk the struct at the 0th offset, and
12828 * resolve types.
12829 */
12830 if ((is_kfunc_release(meta) && reg->ref_obj_id) ||
12831 btf_type_ids_nocast_alias(&env->log, reg_btf, reg_ref_id, meta->btf, ref_id))
12832 strict_type_match = true;
12833
12834 WARN_ON_ONCE(is_kfunc_release(meta) &&
12835 (reg->off || !tnum_is_const(reg->var_off) ||
12836 reg->var_off.value));
12837
12838 reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id, ®_ref_id);
12839 reg_ref_tname = btf_name_by_offset(reg_btf, reg_ref_t->name_off);
12840 struct_same = btf_struct_ids_match(&env->log, reg_btf, reg_ref_id, reg->off, meta->btf, ref_id, strict_type_match);
12841 /* If kfunc is accepting a projection type (ie. __sk_buff), it cannot
12842 * actually use it -- it must cast to the underlying type. So we allow
12843 * caller to pass in the underlying type.
12844 */
12845 taking_projection = btf_is_projection_of(ref_tname, reg_ref_tname);
12846 if (!taking_projection && !struct_same) {
12847 verbose(env, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n",
12848 meta->func_name, argno, btf_type_str(ref_t), ref_tname, argno + 1,
12849 btf_type_str(reg_ref_t), reg_ref_tname);
12850 return -EINVAL;
12851 }
12852 return 0;
12853 }
12854
process_irq_flag(struct bpf_verifier_env * env,int regno,struct bpf_kfunc_call_arg_meta * meta)12855 static int process_irq_flag(struct bpf_verifier_env *env, int regno,
12856 struct bpf_kfunc_call_arg_meta *meta)
12857 {
12858 struct bpf_reg_state *reg = reg_state(env, regno);
12859 int err, kfunc_class = IRQ_NATIVE_KFUNC;
12860 bool irq_save;
12861
12862 if (meta->func_id == special_kfunc_list[KF_bpf_local_irq_save] ||
12863 meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave]) {
12864 irq_save = true;
12865 if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave])
12866 kfunc_class = IRQ_LOCK_KFUNC;
12867 } else if (meta->func_id == special_kfunc_list[KF_bpf_local_irq_restore] ||
12868 meta->func_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore]) {
12869 irq_save = false;
12870 if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore])
12871 kfunc_class = IRQ_LOCK_KFUNC;
12872 } else {
12873 verifier_bug(env, "unknown irq flags kfunc");
12874 return -EFAULT;
12875 }
12876
12877 if (irq_save) {
12878 if (!is_irq_flag_reg_valid_uninit(env, reg)) {
12879 verbose(env, "expected uninitialized irq flag as arg#%d\n", regno - 1);
12880 return -EINVAL;
12881 }
12882
12883 err = check_mem_access(env, env->insn_idx, regno, 0, BPF_DW, BPF_WRITE, -1, false, false);
12884 if (err)
12885 return err;
12886
12887 err = mark_stack_slot_irq_flag(env, meta, reg, env->insn_idx, kfunc_class);
12888 if (err)
12889 return err;
12890 } else {
12891 err = is_irq_flag_reg_valid_init(env, reg);
12892 if (err) {
12893 verbose(env, "expected an initialized irq flag as arg#%d\n", regno - 1);
12894 return err;
12895 }
12896
12897 err = mark_irq_flag_read(env, reg);
12898 if (err)
12899 return err;
12900
12901 err = unmark_stack_slot_irq_flag(env, reg, kfunc_class);
12902 if (err)
12903 return err;
12904 }
12905 return 0;
12906 }
12907
12908
ref_set_non_owning(struct bpf_verifier_env * env,struct bpf_reg_state * reg)12909 static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
12910 {
12911 struct btf_record *rec = reg_btf_record(reg);
12912
12913 if (!env->cur_state->active_locks) {
12914 verifier_bug(env, "%s w/o active lock", __func__);
12915 return -EFAULT;
12916 }
12917
12918 if (type_flag(reg->type) & NON_OWN_REF) {
12919 verifier_bug(env, "NON_OWN_REF already set");
12920 return -EFAULT;
12921 }
12922
12923 reg->type |= NON_OWN_REF;
12924 if (rec->refcount_off >= 0)
12925 reg->type |= MEM_RCU;
12926
12927 return 0;
12928 }
12929
ref_convert_owning_non_owning(struct bpf_verifier_env * env,u32 ref_obj_id)12930 static int ref_convert_owning_non_owning(struct bpf_verifier_env *env, u32 ref_obj_id)
12931 {
12932 struct bpf_verifier_state *state = env->cur_state;
12933 struct bpf_func_state *unused;
12934 struct bpf_reg_state *reg;
12935 int i;
12936
12937 if (!ref_obj_id) {
12938 verifier_bug(env, "ref_obj_id is zero for owning -> non-owning conversion");
12939 return -EFAULT;
12940 }
12941
12942 for (i = 0; i < state->acquired_refs; i++) {
12943 if (state->refs[i].id != ref_obj_id)
12944 continue;
12945
12946 /* Clear ref_obj_id here so release_reference doesn't clobber
12947 * the whole reg
12948 */
12949 bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({
12950 if (reg->ref_obj_id == ref_obj_id) {
12951 reg->ref_obj_id = 0;
12952 ref_set_non_owning(env, reg);
12953 }
12954 }));
12955 return 0;
12956 }
12957
12958 verifier_bug(env, "ref state missing for ref_obj_id");
12959 return -EFAULT;
12960 }
12961
12962 /* Implementation details:
12963 *
12964 * Each register points to some region of memory, which we define as an
12965 * allocation. Each allocation may embed a bpf_spin_lock which protects any
12966 * special BPF objects (bpf_list_head, bpf_rb_root, etc.) part of the same
12967 * allocation. The lock and the data it protects are colocated in the same
12968 * memory region.
12969 *
12970 * Hence, everytime a register holds a pointer value pointing to such
12971 * allocation, the verifier preserves a unique reg->id for it.
12972 *
12973 * The verifier remembers the lock 'ptr' and the lock 'id' whenever
12974 * bpf_spin_lock is called.
12975 *
12976 * To enable this, lock state in the verifier captures two values:
12977 * active_lock.ptr = Register's type specific pointer
12978 * active_lock.id = A unique ID for each register pointer value
12979 *
12980 * Currently, PTR_TO_MAP_VALUE and PTR_TO_BTF_ID | MEM_ALLOC are the two
12981 * supported register types.
12982 *
12983 * The active_lock.ptr in case of map values is the reg->map_ptr, and in case of
12984 * allocated objects is the reg->btf pointer.
12985 *
12986 * The active_lock.id is non-unique for maps supporting direct_value_addr, as we
12987 * can establish the provenance of the map value statically for each distinct
12988 * lookup into such maps. They always contain a single map value hence unique
12989 * IDs for each pseudo load pessimizes the algorithm and rejects valid programs.
12990 *
12991 * So, in case of global variables, they use array maps with max_entries = 1,
12992 * hence their active_lock.ptr becomes map_ptr and id = 0 (since they all point
12993 * into the same map value as max_entries is 1, as described above).
12994 *
12995 * In case of inner map lookups, the inner map pointer has same map_ptr as the
12996 * outer map pointer (in verifier context), but each lookup into an inner map
12997 * assigns a fresh reg->id to the lookup, so while lookups into distinct inner
12998 * maps from the same outer map share the same map_ptr as active_lock.ptr, they
12999 * will get different reg->id assigned to each lookup, hence different
13000 * active_lock.id.
13001 *
13002 * In case of allocated objects, active_lock.ptr is the reg->btf, and the
13003 * reg->id is a unique ID preserved after the NULL pointer check on the pointer
13004 * returned from bpf_obj_new. Each allocation receives a new reg->id.
13005 */
check_reg_allocation_locked(struct bpf_verifier_env * env,struct bpf_reg_state * reg)13006 static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
13007 {
13008 struct bpf_reference_state *s;
13009 void *ptr;
13010 u32 id;
13011
13012 switch ((int)reg->type) {
13013 case PTR_TO_MAP_VALUE:
13014 ptr = reg->map_ptr;
13015 break;
13016 case PTR_TO_BTF_ID | MEM_ALLOC:
13017 ptr = reg->btf;
13018 break;
13019 default:
13020 verifier_bug(env, "unknown reg type for lock check");
13021 return -EFAULT;
13022 }
13023 id = reg->id;
13024
13025 if (!env->cur_state->active_locks)
13026 return -EINVAL;
13027 s = find_lock_state(env->cur_state, REF_TYPE_LOCK_MASK, id, ptr);
13028 if (!s) {
13029 verbose(env, "held lock and object are not in the same allocation\n");
13030 return -EINVAL;
13031 }
13032 return 0;
13033 }
13034
is_bpf_list_api_kfunc(u32 btf_id)13035 static bool is_bpf_list_api_kfunc(u32 btf_id)
13036 {
13037 return btf_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
13038 btf_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
13039 btf_id == special_kfunc_list[KF_bpf_list_pop_front] ||
13040 btf_id == special_kfunc_list[KF_bpf_list_pop_back] ||
13041 btf_id == special_kfunc_list[KF_bpf_list_front] ||
13042 btf_id == special_kfunc_list[KF_bpf_list_back];
13043 }
13044
is_bpf_rbtree_api_kfunc(u32 btf_id)13045 static bool is_bpf_rbtree_api_kfunc(u32 btf_id)
13046 {
13047 return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl] ||
13048 btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
13049 btf_id == special_kfunc_list[KF_bpf_rbtree_first] ||
13050 btf_id == special_kfunc_list[KF_bpf_rbtree_root] ||
13051 btf_id == special_kfunc_list[KF_bpf_rbtree_left] ||
13052 btf_id == special_kfunc_list[KF_bpf_rbtree_right];
13053 }
13054
is_bpf_iter_num_api_kfunc(u32 btf_id)13055 static bool is_bpf_iter_num_api_kfunc(u32 btf_id)
13056 {
13057 return btf_id == special_kfunc_list[KF_bpf_iter_num_new] ||
13058 btf_id == special_kfunc_list[KF_bpf_iter_num_next] ||
13059 btf_id == special_kfunc_list[KF_bpf_iter_num_destroy];
13060 }
13061
is_bpf_graph_api_kfunc(u32 btf_id)13062 static bool is_bpf_graph_api_kfunc(u32 btf_id)
13063 {
13064 return is_bpf_list_api_kfunc(btf_id) || is_bpf_rbtree_api_kfunc(btf_id) ||
13065 btf_id == special_kfunc_list[KF_bpf_refcount_acquire_impl];
13066 }
13067
is_bpf_res_spin_lock_kfunc(u32 btf_id)13068 static bool is_bpf_res_spin_lock_kfunc(u32 btf_id)
13069 {
13070 return btf_id == special_kfunc_list[KF_bpf_res_spin_lock] ||
13071 btf_id == special_kfunc_list[KF_bpf_res_spin_unlock] ||
13072 btf_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave] ||
13073 btf_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore];
13074 }
13075
is_bpf_arena_kfunc(u32 btf_id)13076 static bool is_bpf_arena_kfunc(u32 btf_id)
13077 {
13078 return btf_id == special_kfunc_list[KF_bpf_arena_alloc_pages] ||
13079 btf_id == special_kfunc_list[KF_bpf_arena_free_pages] ||
13080 btf_id == special_kfunc_list[KF_bpf_arena_reserve_pages];
13081 }
13082
is_bpf_stream_kfunc(u32 btf_id)13083 static bool is_bpf_stream_kfunc(u32 btf_id)
13084 {
13085 return btf_id == special_kfunc_list[KF_bpf_stream_vprintk] ||
13086 btf_id == special_kfunc_list[KF_bpf_stream_print_stack];
13087 }
13088
kfunc_spin_allowed(u32 btf_id)13089 static bool kfunc_spin_allowed(u32 btf_id)
13090 {
13091 return is_bpf_graph_api_kfunc(btf_id) || is_bpf_iter_num_api_kfunc(btf_id) ||
13092 is_bpf_res_spin_lock_kfunc(btf_id) || is_bpf_arena_kfunc(btf_id) ||
13093 is_bpf_stream_kfunc(btf_id);
13094 }
13095
is_sync_callback_calling_kfunc(u32 btf_id)13096 static bool is_sync_callback_calling_kfunc(u32 btf_id)
13097 {
13098 return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl];
13099 }
13100
is_async_callback_calling_kfunc(u32 btf_id)13101 static bool is_async_callback_calling_kfunc(u32 btf_id)
13102 {
13103 return is_bpf_wq_set_callback_kfunc(btf_id) ||
13104 is_task_work_add_kfunc(btf_id);
13105 }
13106
is_bpf_throw_kfunc(struct bpf_insn * insn)13107 static bool is_bpf_throw_kfunc(struct bpf_insn *insn)
13108 {
13109 return bpf_pseudo_kfunc_call(insn) && insn->off == 0 &&
13110 insn->imm == special_kfunc_list[KF_bpf_throw];
13111 }
13112
is_bpf_wq_set_callback_kfunc(u32 btf_id)13113 static bool is_bpf_wq_set_callback_kfunc(u32 btf_id)
13114 {
13115 return btf_id == special_kfunc_list[KF_bpf_wq_set_callback];
13116 }
13117
is_callback_calling_kfunc(u32 btf_id)13118 static bool is_callback_calling_kfunc(u32 btf_id)
13119 {
13120 return is_sync_callback_calling_kfunc(btf_id) ||
13121 is_async_callback_calling_kfunc(btf_id);
13122 }
13123
is_rbtree_lock_required_kfunc(u32 btf_id)13124 static bool is_rbtree_lock_required_kfunc(u32 btf_id)
13125 {
13126 return is_bpf_rbtree_api_kfunc(btf_id);
13127 }
13128
check_kfunc_is_graph_root_api(struct bpf_verifier_env * env,enum btf_field_type head_field_type,u32 kfunc_btf_id)13129 static bool check_kfunc_is_graph_root_api(struct bpf_verifier_env *env,
13130 enum btf_field_type head_field_type,
13131 u32 kfunc_btf_id)
13132 {
13133 bool ret;
13134
13135 switch (head_field_type) {
13136 case BPF_LIST_HEAD:
13137 ret = is_bpf_list_api_kfunc(kfunc_btf_id);
13138 break;
13139 case BPF_RB_ROOT:
13140 ret = is_bpf_rbtree_api_kfunc(kfunc_btf_id);
13141 break;
13142 default:
13143 verbose(env, "verifier internal error: unexpected graph root argument type %s\n",
13144 btf_field_type_name(head_field_type));
13145 return false;
13146 }
13147
13148 if (!ret)
13149 verbose(env, "verifier internal error: %s head arg for unknown kfunc\n",
13150 btf_field_type_name(head_field_type));
13151 return ret;
13152 }
13153
check_kfunc_is_graph_node_api(struct bpf_verifier_env * env,enum btf_field_type node_field_type,u32 kfunc_btf_id)13154 static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env *env,
13155 enum btf_field_type node_field_type,
13156 u32 kfunc_btf_id)
13157 {
13158 bool ret;
13159
13160 switch (node_field_type) {
13161 case BPF_LIST_NODE:
13162 ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
13163 kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_back_impl]);
13164 break;
13165 case BPF_RB_NODE:
13166 ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
13167 kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl] ||
13168 kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_left] ||
13169 kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_right]);
13170 break;
13171 default:
13172 verbose(env, "verifier internal error: unexpected graph node argument type %s\n",
13173 btf_field_type_name(node_field_type));
13174 return false;
13175 }
13176
13177 if (!ret)
13178 verbose(env, "verifier internal error: %s node arg for unknown kfunc\n",
13179 btf_field_type_name(node_field_type));
13180 return ret;
13181 }
13182
13183 static int
__process_kf_arg_ptr_to_graph_root(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno,struct bpf_kfunc_call_arg_meta * meta,enum btf_field_type head_field_type,struct btf_field ** head_field)13184 __process_kf_arg_ptr_to_graph_root(struct bpf_verifier_env *env,
13185 struct bpf_reg_state *reg, u32 regno,
13186 struct bpf_kfunc_call_arg_meta *meta,
13187 enum btf_field_type head_field_type,
13188 struct btf_field **head_field)
13189 {
13190 const char *head_type_name;
13191 struct btf_field *field;
13192 struct btf_record *rec;
13193 u32 head_off;
13194
13195 if (meta->btf != btf_vmlinux) {
13196 verifier_bug(env, "unexpected btf mismatch in kfunc call");
13197 return -EFAULT;
13198 }
13199
13200 if (!check_kfunc_is_graph_root_api(env, head_field_type, meta->func_id))
13201 return -EFAULT;
13202
13203 head_type_name = btf_field_type_name(head_field_type);
13204 if (!tnum_is_const(reg->var_off)) {
13205 verbose(env,
13206 "R%d doesn't have constant offset. %s has to be at the constant offset\n",
13207 regno, head_type_name);
13208 return -EINVAL;
13209 }
13210
13211 rec = reg_btf_record(reg);
13212 head_off = reg->off + reg->var_off.value;
13213 field = btf_record_find(rec, head_off, head_field_type);
13214 if (!field) {
13215 verbose(env, "%s not found at offset=%u\n", head_type_name, head_off);
13216 return -EINVAL;
13217 }
13218
13219 /* All functions require bpf_list_head to be protected using a bpf_spin_lock */
13220 if (check_reg_allocation_locked(env, reg)) {
13221 verbose(env, "bpf_spin_lock at off=%d must be held for %s\n",
13222 rec->spin_lock_off, head_type_name);
13223 return -EINVAL;
13224 }
13225
13226 if (*head_field) {
13227 verifier_bug(env, "repeating %s arg", head_type_name);
13228 return -EFAULT;
13229 }
13230 *head_field = field;
13231 return 0;
13232 }
13233
process_kf_arg_ptr_to_list_head(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno,struct bpf_kfunc_call_arg_meta * meta)13234 static int process_kf_arg_ptr_to_list_head(struct bpf_verifier_env *env,
13235 struct bpf_reg_state *reg, u32 regno,
13236 struct bpf_kfunc_call_arg_meta *meta)
13237 {
13238 return __process_kf_arg_ptr_to_graph_root(env, reg, regno, meta, BPF_LIST_HEAD,
13239 &meta->arg_list_head.field);
13240 }
13241
process_kf_arg_ptr_to_rbtree_root(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno,struct bpf_kfunc_call_arg_meta * meta)13242 static int process_kf_arg_ptr_to_rbtree_root(struct bpf_verifier_env *env,
13243 struct bpf_reg_state *reg, u32 regno,
13244 struct bpf_kfunc_call_arg_meta *meta)
13245 {
13246 return __process_kf_arg_ptr_to_graph_root(env, reg, regno, meta, BPF_RB_ROOT,
13247 &meta->arg_rbtree_root.field);
13248 }
13249
13250 static int
__process_kf_arg_ptr_to_graph_node(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno,struct bpf_kfunc_call_arg_meta * meta,enum btf_field_type head_field_type,enum btf_field_type node_field_type,struct btf_field ** node_field)13251 __process_kf_arg_ptr_to_graph_node(struct bpf_verifier_env *env,
13252 struct bpf_reg_state *reg, u32 regno,
13253 struct bpf_kfunc_call_arg_meta *meta,
13254 enum btf_field_type head_field_type,
13255 enum btf_field_type node_field_type,
13256 struct btf_field **node_field)
13257 {
13258 const char *node_type_name;
13259 const struct btf_type *et, *t;
13260 struct btf_field *field;
13261 u32 node_off;
13262
13263 if (meta->btf != btf_vmlinux) {
13264 verifier_bug(env, "unexpected btf mismatch in kfunc call");
13265 return -EFAULT;
13266 }
13267
13268 if (!check_kfunc_is_graph_node_api(env, node_field_type, meta->func_id))
13269 return -EFAULT;
13270
13271 node_type_name = btf_field_type_name(node_field_type);
13272 if (!tnum_is_const(reg->var_off)) {
13273 verbose(env,
13274 "R%d doesn't have constant offset. %s has to be at the constant offset\n",
13275 regno, node_type_name);
13276 return -EINVAL;
13277 }
13278
13279 node_off = reg->off + reg->var_off.value;
13280 field = reg_find_field_offset(reg, node_off, node_field_type);
13281 if (!field) {
13282 verbose(env, "%s not found at offset=%u\n", node_type_name, node_off);
13283 return -EINVAL;
13284 }
13285
13286 field = *node_field;
13287
13288 et = btf_type_by_id(field->graph_root.btf, field->graph_root.value_btf_id);
13289 t = btf_type_by_id(reg->btf, reg->btf_id);
13290 if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, 0, field->graph_root.btf,
13291 field->graph_root.value_btf_id, true)) {
13292 verbose(env, "operation on %s expects arg#1 %s at offset=%d "
13293 "in struct %s, but arg is at offset=%d in struct %s\n",
13294 btf_field_type_name(head_field_type),
13295 btf_field_type_name(node_field_type),
13296 field->graph_root.node_offset,
13297 btf_name_by_offset(field->graph_root.btf, et->name_off),
13298 node_off, btf_name_by_offset(reg->btf, t->name_off));
13299 return -EINVAL;
13300 }
13301 meta->arg_btf = reg->btf;
13302 meta->arg_btf_id = reg->btf_id;
13303
13304 if (node_off != field->graph_root.node_offset) {
13305 verbose(env, "arg#1 offset=%d, but expected %s at offset=%d in struct %s\n",
13306 node_off, btf_field_type_name(node_field_type),
13307 field->graph_root.node_offset,
13308 btf_name_by_offset(field->graph_root.btf, et->name_off));
13309 return -EINVAL;
13310 }
13311
13312 return 0;
13313 }
13314
process_kf_arg_ptr_to_list_node(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno,struct bpf_kfunc_call_arg_meta * meta)13315 static int process_kf_arg_ptr_to_list_node(struct bpf_verifier_env *env,
13316 struct bpf_reg_state *reg, u32 regno,
13317 struct bpf_kfunc_call_arg_meta *meta)
13318 {
13319 return __process_kf_arg_ptr_to_graph_node(env, reg, regno, meta,
13320 BPF_LIST_HEAD, BPF_LIST_NODE,
13321 &meta->arg_list_head.field);
13322 }
13323
process_kf_arg_ptr_to_rbtree_node(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno,struct bpf_kfunc_call_arg_meta * meta)13324 static int process_kf_arg_ptr_to_rbtree_node(struct bpf_verifier_env *env,
13325 struct bpf_reg_state *reg, u32 regno,
13326 struct bpf_kfunc_call_arg_meta *meta)
13327 {
13328 return __process_kf_arg_ptr_to_graph_node(env, reg, regno, meta,
13329 BPF_RB_ROOT, BPF_RB_NODE,
13330 &meta->arg_rbtree_root.field);
13331 }
13332
13333 /*
13334 * css_task iter allowlist is needed to avoid dead locking on css_set_lock.
13335 * LSM hooks and iters (both sleepable and non-sleepable) are safe.
13336 * Any sleepable progs are also safe since bpf_check_attach_target() enforce
13337 * them can only be attached to some specific hook points.
13338 */
check_css_task_iter_allowlist(struct bpf_verifier_env * env)13339 static bool check_css_task_iter_allowlist(struct bpf_verifier_env *env)
13340 {
13341 enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
13342
13343 switch (prog_type) {
13344 case BPF_PROG_TYPE_LSM:
13345 return true;
13346 case BPF_PROG_TYPE_TRACING:
13347 if (env->prog->expected_attach_type == BPF_TRACE_ITER)
13348 return true;
13349 fallthrough;
13350 default:
13351 return in_sleepable(env);
13352 }
13353 }
13354
check_kfunc_args(struct bpf_verifier_env * env,struct bpf_kfunc_call_arg_meta * meta,int insn_idx)13355 static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta,
13356 int insn_idx)
13357 {
13358 const char *func_name = meta->func_name, *ref_tname;
13359 const struct btf *btf = meta->btf;
13360 const struct btf_param *args;
13361 struct btf_record *rec;
13362 u32 i, nargs;
13363 int ret;
13364
13365 args = (const struct btf_param *)(meta->func_proto + 1);
13366 nargs = btf_type_vlen(meta->func_proto);
13367 if (nargs > MAX_BPF_FUNC_REG_ARGS) {
13368 verbose(env, "Function %s has %d > %d args\n", func_name, nargs,
13369 MAX_BPF_FUNC_REG_ARGS);
13370 return -EINVAL;
13371 }
13372
13373 /* Check that BTF function arguments match actual types that the
13374 * verifier sees.
13375 */
13376 for (i = 0; i < nargs; i++) {
13377 struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[i + 1];
13378 const struct btf_type *t, *ref_t, *resolve_ret;
13379 enum bpf_arg_type arg_type = ARG_DONTCARE;
13380 u32 regno = i + 1, ref_id, type_size;
13381 bool is_ret_buf_sz = false;
13382 int kf_arg_type;
13383
13384 t = btf_type_skip_modifiers(btf, args[i].type, NULL);
13385
13386 if (is_kfunc_arg_ignore(btf, &args[i]))
13387 continue;
13388
13389 if (is_kfunc_arg_prog_aux(btf, &args[i])) {
13390 /* Reject repeated use bpf_prog_aux */
13391 if (meta->arg_prog) {
13392 verifier_bug(env, "Only 1 prog->aux argument supported per-kfunc");
13393 return -EFAULT;
13394 }
13395 meta->arg_prog = true;
13396 cur_aux(env)->arg_prog = regno;
13397 continue;
13398 }
13399
13400 if (btf_type_is_scalar(t)) {
13401 if (reg->type != SCALAR_VALUE) {
13402 verbose(env, "R%d is not a scalar\n", regno);
13403 return -EINVAL;
13404 }
13405
13406 if (is_kfunc_arg_constant(meta->btf, &args[i])) {
13407 if (meta->arg_constant.found) {
13408 verifier_bug(env, "only one constant argument permitted");
13409 return -EFAULT;
13410 }
13411 if (!tnum_is_const(reg->var_off)) {
13412 verbose(env, "R%d must be a known constant\n", regno);
13413 return -EINVAL;
13414 }
13415 ret = mark_chain_precision(env, regno);
13416 if (ret < 0)
13417 return ret;
13418 meta->arg_constant.found = true;
13419 meta->arg_constant.value = reg->var_off.value;
13420 } else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdonly_buf_size")) {
13421 meta->r0_rdonly = true;
13422 is_ret_buf_sz = true;
13423 } else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdwr_buf_size")) {
13424 is_ret_buf_sz = true;
13425 }
13426
13427 if (is_ret_buf_sz) {
13428 if (meta->r0_size) {
13429 verbose(env, "2 or more rdonly/rdwr_buf_size parameters for kfunc");
13430 return -EINVAL;
13431 }
13432
13433 if (!tnum_is_const(reg->var_off)) {
13434 verbose(env, "R%d is not a const\n", regno);
13435 return -EINVAL;
13436 }
13437
13438 meta->r0_size = reg->var_off.value;
13439 ret = mark_chain_precision(env, regno);
13440 if (ret)
13441 return ret;
13442 }
13443 continue;
13444 }
13445
13446 if (!btf_type_is_ptr(t)) {
13447 verbose(env, "Unrecognized arg#%d type %s\n", i, btf_type_str(t));
13448 return -EINVAL;
13449 }
13450
13451 if ((register_is_null(reg) || type_may_be_null(reg->type)) &&
13452 !is_kfunc_arg_nullable(meta->btf, &args[i])) {
13453 verbose(env, "Possibly NULL pointer passed to trusted arg%d\n", i);
13454 return -EACCES;
13455 }
13456
13457 if (reg->ref_obj_id) {
13458 if (is_kfunc_release(meta) && meta->ref_obj_id) {
13459 verifier_bug(env, "more than one arg with ref_obj_id R%d %u %u",
13460 regno, reg->ref_obj_id,
13461 meta->ref_obj_id);
13462 return -EFAULT;
13463 }
13464 meta->ref_obj_id = reg->ref_obj_id;
13465 if (is_kfunc_release(meta))
13466 meta->release_regno = regno;
13467 }
13468
13469 ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id);
13470 ref_tname = btf_name_by_offset(btf, ref_t->name_off);
13471
13472 kf_arg_type = get_kfunc_ptr_arg_type(env, meta, t, ref_t, ref_tname, args, i, nargs);
13473 if (kf_arg_type < 0)
13474 return kf_arg_type;
13475
13476 switch (kf_arg_type) {
13477 case KF_ARG_PTR_TO_NULL:
13478 continue;
13479 case KF_ARG_PTR_TO_MAP:
13480 if (!reg->map_ptr) {
13481 verbose(env, "pointer in R%d isn't map pointer\n", regno);
13482 return -EINVAL;
13483 }
13484 if (meta->map.ptr && (reg->map_ptr->record->wq_off >= 0 ||
13485 reg->map_ptr->record->task_work_off >= 0)) {
13486 /* Use map_uid (which is unique id of inner map) to reject:
13487 * inner_map1 = bpf_map_lookup_elem(outer_map, key1)
13488 * inner_map2 = bpf_map_lookup_elem(outer_map, key2)
13489 * if (inner_map1 && inner_map2) {
13490 * wq = bpf_map_lookup_elem(inner_map1);
13491 * if (wq)
13492 * // mismatch would have been allowed
13493 * bpf_wq_init(wq, inner_map2);
13494 * }
13495 *
13496 * Comparing map_ptr is enough to distinguish normal and outer maps.
13497 */
13498 if (meta->map.ptr != reg->map_ptr ||
13499 meta->map.uid != reg->map_uid) {
13500 if (reg->map_ptr->record->task_work_off >= 0) {
13501 verbose(env,
13502 "bpf_task_work pointer in R2 map_uid=%d doesn't match map pointer in R3 map_uid=%d\n",
13503 meta->map.uid, reg->map_uid);
13504 return -EINVAL;
13505 }
13506 verbose(env,
13507 "workqueue pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n",
13508 meta->map.uid, reg->map_uid);
13509 return -EINVAL;
13510 }
13511 }
13512 meta->map.ptr = reg->map_ptr;
13513 meta->map.uid = reg->map_uid;
13514 fallthrough;
13515 case KF_ARG_PTR_TO_ALLOC_BTF_ID:
13516 case KF_ARG_PTR_TO_BTF_ID:
13517 if (!is_trusted_reg(reg)) {
13518 if (!is_kfunc_rcu(meta)) {
13519 verbose(env, "R%d must be referenced or trusted\n", regno);
13520 return -EINVAL;
13521 }
13522 if (!is_rcu_reg(reg)) {
13523 verbose(env, "R%d must be a rcu pointer\n", regno);
13524 return -EINVAL;
13525 }
13526 }
13527 fallthrough;
13528 case KF_ARG_PTR_TO_CTX:
13529 case KF_ARG_PTR_TO_DYNPTR:
13530 case KF_ARG_PTR_TO_ITER:
13531 case KF_ARG_PTR_TO_LIST_HEAD:
13532 case KF_ARG_PTR_TO_LIST_NODE:
13533 case KF_ARG_PTR_TO_RB_ROOT:
13534 case KF_ARG_PTR_TO_RB_NODE:
13535 case KF_ARG_PTR_TO_MEM:
13536 case KF_ARG_PTR_TO_MEM_SIZE:
13537 case KF_ARG_PTR_TO_CALLBACK:
13538 case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
13539 case KF_ARG_PTR_TO_CONST_STR:
13540 case KF_ARG_PTR_TO_WORKQUEUE:
13541 case KF_ARG_PTR_TO_TIMER:
13542 case KF_ARG_PTR_TO_TASK_WORK:
13543 case KF_ARG_PTR_TO_IRQ_FLAG:
13544 case KF_ARG_PTR_TO_RES_SPIN_LOCK:
13545 break;
13546 default:
13547 verifier_bug(env, "unknown kfunc arg type %d", kf_arg_type);
13548 return -EFAULT;
13549 }
13550
13551 if (is_kfunc_release(meta) && reg->ref_obj_id)
13552 arg_type |= OBJ_RELEASE;
13553 ret = check_func_arg_reg_off(env, reg, regno, arg_type);
13554 if (ret < 0)
13555 return ret;
13556
13557 switch (kf_arg_type) {
13558 case KF_ARG_PTR_TO_CTX:
13559 if (reg->type != PTR_TO_CTX) {
13560 verbose(env, "arg#%d expected pointer to ctx, but got %s\n",
13561 i, reg_type_str(env, reg->type));
13562 return -EINVAL;
13563 }
13564
13565 if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
13566 ret = get_kern_ctx_btf_id(&env->log, resolve_prog_type(env->prog));
13567 if (ret < 0)
13568 return -EINVAL;
13569 meta->ret_btf_id = ret;
13570 }
13571 break;
13572 case KF_ARG_PTR_TO_ALLOC_BTF_ID:
13573 if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC)) {
13574 if (meta->func_id != special_kfunc_list[KF_bpf_obj_drop_impl]) {
13575 verbose(env, "arg#%d expected for bpf_obj_drop_impl()\n", i);
13576 return -EINVAL;
13577 }
13578 } else if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC | MEM_PERCPU)) {
13579 if (meta->func_id != special_kfunc_list[KF_bpf_percpu_obj_drop_impl]) {
13580 verbose(env, "arg#%d expected for bpf_percpu_obj_drop_impl()\n", i);
13581 return -EINVAL;
13582 }
13583 } else {
13584 verbose(env, "arg#%d expected pointer to allocated object\n", i);
13585 return -EINVAL;
13586 }
13587 if (!reg->ref_obj_id) {
13588 verbose(env, "allocated object must be referenced\n");
13589 return -EINVAL;
13590 }
13591 if (meta->btf == btf_vmlinux) {
13592 meta->arg_btf = reg->btf;
13593 meta->arg_btf_id = reg->btf_id;
13594 }
13595 break;
13596 case KF_ARG_PTR_TO_DYNPTR:
13597 {
13598 enum bpf_arg_type dynptr_arg_type = ARG_PTR_TO_DYNPTR;
13599 int clone_ref_obj_id = 0;
13600
13601 if (reg->type == CONST_PTR_TO_DYNPTR)
13602 dynptr_arg_type |= MEM_RDONLY;
13603
13604 if (is_kfunc_arg_uninit(btf, &args[i]))
13605 dynptr_arg_type |= MEM_UNINIT;
13606
13607 if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
13608 dynptr_arg_type |= DYNPTR_TYPE_SKB;
13609 } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_xdp]) {
13610 dynptr_arg_type |= DYNPTR_TYPE_XDP;
13611 } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb_meta]) {
13612 dynptr_arg_type |= DYNPTR_TYPE_SKB_META;
13613 } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_file]) {
13614 dynptr_arg_type |= DYNPTR_TYPE_FILE;
13615 } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_file_discard]) {
13616 dynptr_arg_type |= DYNPTR_TYPE_FILE;
13617 meta->release_regno = regno;
13618 } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_clone] &&
13619 (dynptr_arg_type & MEM_UNINIT)) {
13620 enum bpf_dynptr_type parent_type = meta->initialized_dynptr.type;
13621
13622 if (parent_type == BPF_DYNPTR_TYPE_INVALID) {
13623 verifier_bug(env, "no dynptr type for parent of clone");
13624 return -EFAULT;
13625 }
13626
13627 dynptr_arg_type |= (unsigned int)get_dynptr_type_flag(parent_type);
13628 clone_ref_obj_id = meta->initialized_dynptr.ref_obj_id;
13629 if (dynptr_type_refcounted(parent_type) && !clone_ref_obj_id) {
13630 verifier_bug(env, "missing ref obj id for parent of clone");
13631 return -EFAULT;
13632 }
13633 }
13634
13635 ret = process_dynptr_func(env, regno, insn_idx, dynptr_arg_type, clone_ref_obj_id);
13636 if (ret < 0)
13637 return ret;
13638
13639 if (!(dynptr_arg_type & MEM_UNINIT)) {
13640 int id = dynptr_id(env, reg);
13641
13642 if (id < 0) {
13643 verifier_bug(env, "failed to obtain dynptr id");
13644 return id;
13645 }
13646 meta->initialized_dynptr.id = id;
13647 meta->initialized_dynptr.type = dynptr_get_type(env, reg);
13648 meta->initialized_dynptr.ref_obj_id = dynptr_ref_obj_id(env, reg);
13649 }
13650
13651 break;
13652 }
13653 case KF_ARG_PTR_TO_ITER:
13654 if (meta->func_id == special_kfunc_list[KF_bpf_iter_css_task_new]) {
13655 if (!check_css_task_iter_allowlist(env)) {
13656 verbose(env, "css_task_iter is only allowed in bpf_lsm, bpf_iter and sleepable progs\n");
13657 return -EINVAL;
13658 }
13659 }
13660 ret = process_iter_arg(env, regno, insn_idx, meta);
13661 if (ret < 0)
13662 return ret;
13663 break;
13664 case KF_ARG_PTR_TO_LIST_HEAD:
13665 if (reg->type != PTR_TO_MAP_VALUE &&
13666 reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
13667 verbose(env, "arg#%d expected pointer to map value or allocated object\n", i);
13668 return -EINVAL;
13669 }
13670 if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) && !reg->ref_obj_id) {
13671 verbose(env, "allocated object must be referenced\n");
13672 return -EINVAL;
13673 }
13674 ret = process_kf_arg_ptr_to_list_head(env, reg, regno, meta);
13675 if (ret < 0)
13676 return ret;
13677 break;
13678 case KF_ARG_PTR_TO_RB_ROOT:
13679 if (reg->type != PTR_TO_MAP_VALUE &&
13680 reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
13681 verbose(env, "arg#%d expected pointer to map value or allocated object\n", i);
13682 return -EINVAL;
13683 }
13684 if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) && !reg->ref_obj_id) {
13685 verbose(env, "allocated object must be referenced\n");
13686 return -EINVAL;
13687 }
13688 ret = process_kf_arg_ptr_to_rbtree_root(env, reg, regno, meta);
13689 if (ret < 0)
13690 return ret;
13691 break;
13692 case KF_ARG_PTR_TO_LIST_NODE:
13693 if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
13694 verbose(env, "arg#%d expected pointer to allocated object\n", i);
13695 return -EINVAL;
13696 }
13697 if (!reg->ref_obj_id) {
13698 verbose(env, "allocated object must be referenced\n");
13699 return -EINVAL;
13700 }
13701 ret = process_kf_arg_ptr_to_list_node(env, reg, regno, meta);
13702 if (ret < 0)
13703 return ret;
13704 break;
13705 case KF_ARG_PTR_TO_RB_NODE:
13706 if (meta->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
13707 if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
13708 verbose(env, "arg#%d expected pointer to allocated object\n", i);
13709 return -EINVAL;
13710 }
13711 if (!reg->ref_obj_id) {
13712 verbose(env, "allocated object must be referenced\n");
13713 return -EINVAL;
13714 }
13715 } else {
13716 if (!type_is_non_owning_ref(reg->type) && !reg->ref_obj_id) {
13717 verbose(env, "%s can only take non-owning or refcounted bpf_rb_node pointer\n", func_name);
13718 return -EINVAL;
13719 }
13720 if (in_rbtree_lock_required_cb(env)) {
13721 verbose(env, "%s not allowed in rbtree cb\n", func_name);
13722 return -EINVAL;
13723 }
13724 }
13725
13726 ret = process_kf_arg_ptr_to_rbtree_node(env, reg, regno, meta);
13727 if (ret < 0)
13728 return ret;
13729 break;
13730 case KF_ARG_PTR_TO_MAP:
13731 /* If argument has '__map' suffix expect 'struct bpf_map *' */
13732 ref_id = *reg2btf_ids[CONST_PTR_TO_MAP];
13733 ref_t = btf_type_by_id(btf_vmlinux, ref_id);
13734 ref_tname = btf_name_by_offset(btf, ref_t->name_off);
13735 fallthrough;
13736 case KF_ARG_PTR_TO_BTF_ID:
13737 /* Only base_type is checked, further checks are done here */
13738 if ((base_type(reg->type) != PTR_TO_BTF_ID ||
13739 (bpf_type_has_unsafe_modifiers(reg->type) && !is_rcu_reg(reg))) &&
13740 !reg2btf_ids[base_type(reg->type)]) {
13741 verbose(env, "arg#%d is %s ", i, reg_type_str(env, reg->type));
13742 verbose(env, "expected %s or socket\n",
13743 reg_type_str(env, base_type(reg->type) |
13744 (type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS)));
13745 return -EINVAL;
13746 }
13747 ret = process_kf_arg_ptr_to_btf_id(env, reg, ref_t, ref_tname, ref_id, meta, i);
13748 if (ret < 0)
13749 return ret;
13750 break;
13751 case KF_ARG_PTR_TO_MEM:
13752 resolve_ret = btf_resolve_size(btf, ref_t, &type_size);
13753 if (IS_ERR(resolve_ret)) {
13754 verbose(env, "arg#%d reference type('%s %s') size cannot be determined: %ld\n",
13755 i, btf_type_str(ref_t), ref_tname, PTR_ERR(resolve_ret));
13756 return -EINVAL;
13757 }
13758 ret = check_mem_reg(env, reg, regno, type_size);
13759 if (ret < 0)
13760 return ret;
13761 break;
13762 case KF_ARG_PTR_TO_MEM_SIZE:
13763 {
13764 struct bpf_reg_state *buff_reg = ®s[regno];
13765 const struct btf_param *buff_arg = &args[i];
13766 struct bpf_reg_state *size_reg = ®s[regno + 1];
13767 const struct btf_param *size_arg = &args[i + 1];
13768
13769 if (!register_is_null(buff_reg) || !is_kfunc_arg_nullable(meta->btf, buff_arg)) {
13770 ret = check_kfunc_mem_size_reg(env, size_reg, regno + 1);
13771 if (ret < 0) {
13772 verbose(env, "arg#%d arg#%d memory, len pair leads to invalid memory access\n", i, i + 1);
13773 return ret;
13774 }
13775 }
13776
13777 if (is_kfunc_arg_const_mem_size(meta->btf, size_arg, size_reg)) {
13778 if (meta->arg_constant.found) {
13779 verifier_bug(env, "only one constant argument permitted");
13780 return -EFAULT;
13781 }
13782 if (!tnum_is_const(size_reg->var_off)) {
13783 verbose(env, "R%d must be a known constant\n", regno + 1);
13784 return -EINVAL;
13785 }
13786 meta->arg_constant.found = true;
13787 meta->arg_constant.value = size_reg->var_off.value;
13788 }
13789
13790 /* Skip next '__sz' or '__szk' argument */
13791 i++;
13792 break;
13793 }
13794 case KF_ARG_PTR_TO_CALLBACK:
13795 if (reg->type != PTR_TO_FUNC) {
13796 verbose(env, "arg%d expected pointer to func\n", i);
13797 return -EINVAL;
13798 }
13799 meta->subprogno = reg->subprogno;
13800 break;
13801 case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
13802 if (!type_is_ptr_alloc_obj(reg->type)) {
13803 verbose(env, "arg#%d is neither owning or non-owning ref\n", i);
13804 return -EINVAL;
13805 }
13806 if (!type_is_non_owning_ref(reg->type))
13807 meta->arg_owning_ref = true;
13808
13809 rec = reg_btf_record(reg);
13810 if (!rec) {
13811 verifier_bug(env, "Couldn't find btf_record");
13812 return -EFAULT;
13813 }
13814
13815 if (rec->refcount_off < 0) {
13816 verbose(env, "arg#%d doesn't point to a type with bpf_refcount field\n", i);
13817 return -EINVAL;
13818 }
13819
13820 meta->arg_btf = reg->btf;
13821 meta->arg_btf_id = reg->btf_id;
13822 break;
13823 case KF_ARG_PTR_TO_CONST_STR:
13824 if (reg->type != PTR_TO_MAP_VALUE) {
13825 verbose(env, "arg#%d doesn't point to a const string\n", i);
13826 return -EINVAL;
13827 }
13828 ret = check_reg_const_str(env, reg, regno);
13829 if (ret)
13830 return ret;
13831 break;
13832 case KF_ARG_PTR_TO_WORKQUEUE:
13833 if (reg->type != PTR_TO_MAP_VALUE) {
13834 verbose(env, "arg#%d doesn't point to a map value\n", i);
13835 return -EINVAL;
13836 }
13837 ret = check_map_field_pointer(env, regno, BPF_WORKQUEUE, &meta->map);
13838 if (ret < 0)
13839 return ret;
13840 break;
13841 case KF_ARG_PTR_TO_TIMER:
13842 if (reg->type != PTR_TO_MAP_VALUE) {
13843 verbose(env, "arg#%d doesn't point to a map value\n", i);
13844 return -EINVAL;
13845 }
13846 ret = process_timer_kfunc(env, regno, meta);
13847 if (ret < 0)
13848 return ret;
13849 break;
13850 case KF_ARG_PTR_TO_TASK_WORK:
13851 if (reg->type != PTR_TO_MAP_VALUE) {
13852 verbose(env, "arg#%d doesn't point to a map value\n", i);
13853 return -EINVAL;
13854 }
13855 ret = check_map_field_pointer(env, regno, BPF_TASK_WORK, &meta->map);
13856 if (ret < 0)
13857 return ret;
13858 break;
13859 case KF_ARG_PTR_TO_IRQ_FLAG:
13860 if (reg->type != PTR_TO_STACK) {
13861 verbose(env, "arg#%d doesn't point to an irq flag on stack\n", i);
13862 return -EINVAL;
13863 }
13864 ret = process_irq_flag(env, regno, meta);
13865 if (ret < 0)
13866 return ret;
13867 break;
13868 case KF_ARG_PTR_TO_RES_SPIN_LOCK:
13869 {
13870 int flags = PROCESS_RES_LOCK;
13871
13872 if (reg->type != PTR_TO_MAP_VALUE && reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
13873 verbose(env, "arg#%d doesn't point to map value or allocated object\n", i);
13874 return -EINVAL;
13875 }
13876
13877 if (!is_bpf_res_spin_lock_kfunc(meta->func_id))
13878 return -EFAULT;
13879 if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock] ||
13880 meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave])
13881 flags |= PROCESS_SPIN_LOCK;
13882 if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave] ||
13883 meta->func_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore])
13884 flags |= PROCESS_LOCK_IRQ;
13885 ret = process_spin_lock(env, regno, flags);
13886 if (ret < 0)
13887 return ret;
13888 break;
13889 }
13890 }
13891 }
13892
13893 if (is_kfunc_release(meta) && !meta->release_regno) {
13894 verbose(env, "release kernel function %s expects refcounted PTR_TO_BTF_ID\n",
13895 func_name);
13896 return -EINVAL;
13897 }
13898
13899 return 0;
13900 }
13901
fetch_kfunc_arg_meta(struct bpf_verifier_env * env,s32 func_id,s16 offset,struct bpf_kfunc_call_arg_meta * meta)13902 static int fetch_kfunc_arg_meta(struct bpf_verifier_env *env,
13903 s32 func_id,
13904 s16 offset,
13905 struct bpf_kfunc_call_arg_meta *meta)
13906 {
13907 struct bpf_kfunc_meta kfunc;
13908 int err;
13909
13910 err = fetch_kfunc_meta(env, func_id, offset, &kfunc);
13911 if (err)
13912 return err;
13913
13914 memset(meta, 0, sizeof(*meta));
13915 meta->btf = kfunc.btf;
13916 meta->func_id = kfunc.id;
13917 meta->func_proto = kfunc.proto;
13918 meta->func_name = kfunc.name;
13919
13920 if (!kfunc.flags || !btf_kfunc_is_allowed(kfunc.btf, kfunc.id, env->prog))
13921 return -EACCES;
13922
13923 meta->kfunc_flags = *kfunc.flags;
13924
13925 return 0;
13926 }
13927
13928 /* check special kfuncs and return:
13929 * 1 - not fall-through to 'else' branch, continue verification
13930 * 0 - fall-through to 'else' branch
13931 * < 0 - not fall-through to 'else' branch, return error
13932 */
check_special_kfunc(struct bpf_verifier_env * env,struct bpf_kfunc_call_arg_meta * meta,struct bpf_reg_state * regs,struct bpf_insn_aux_data * insn_aux,const struct btf_type * ptr_type,struct btf * desc_btf)13933 static int check_special_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta,
13934 struct bpf_reg_state *regs, struct bpf_insn_aux_data *insn_aux,
13935 const struct btf_type *ptr_type, struct btf *desc_btf)
13936 {
13937 const struct btf_type *ret_t;
13938 int err = 0;
13939
13940 if (meta->btf != btf_vmlinux)
13941 return 0;
13942
13943 if (meta->func_id == special_kfunc_list[KF_bpf_obj_new_impl] ||
13944 meta->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
13945 struct btf_struct_meta *struct_meta;
13946 struct btf *ret_btf;
13947 u32 ret_btf_id;
13948
13949 if (meta->func_id == special_kfunc_list[KF_bpf_obj_new_impl] && !bpf_global_ma_set)
13950 return -ENOMEM;
13951
13952 if (((u64)(u32)meta->arg_constant.value) != meta->arg_constant.value) {
13953 verbose(env, "local type ID argument must be in range [0, U32_MAX]\n");
13954 return -EINVAL;
13955 }
13956
13957 ret_btf = env->prog->aux->btf;
13958 ret_btf_id = meta->arg_constant.value;
13959
13960 /* This may be NULL due to user not supplying a BTF */
13961 if (!ret_btf) {
13962 verbose(env, "bpf_obj_new/bpf_percpu_obj_new requires prog BTF\n");
13963 return -EINVAL;
13964 }
13965
13966 ret_t = btf_type_by_id(ret_btf, ret_btf_id);
13967 if (!ret_t || !__btf_type_is_struct(ret_t)) {
13968 verbose(env, "bpf_obj_new/bpf_percpu_obj_new type ID argument must be of a struct\n");
13969 return -EINVAL;
13970 }
13971
13972 if (meta->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
13973 if (ret_t->size > BPF_GLOBAL_PERCPU_MA_MAX_SIZE) {
13974 verbose(env, "bpf_percpu_obj_new type size (%d) is greater than %d\n",
13975 ret_t->size, BPF_GLOBAL_PERCPU_MA_MAX_SIZE);
13976 return -EINVAL;
13977 }
13978
13979 if (!bpf_global_percpu_ma_set) {
13980 mutex_lock(&bpf_percpu_ma_lock);
13981 if (!bpf_global_percpu_ma_set) {
13982 /* Charge memory allocated with bpf_global_percpu_ma to
13983 * root memcg. The obj_cgroup for root memcg is NULL.
13984 */
13985 err = bpf_mem_alloc_percpu_init(&bpf_global_percpu_ma, NULL);
13986 if (!err)
13987 bpf_global_percpu_ma_set = true;
13988 }
13989 mutex_unlock(&bpf_percpu_ma_lock);
13990 if (err)
13991 return err;
13992 }
13993
13994 mutex_lock(&bpf_percpu_ma_lock);
13995 err = bpf_mem_alloc_percpu_unit_init(&bpf_global_percpu_ma, ret_t->size);
13996 mutex_unlock(&bpf_percpu_ma_lock);
13997 if (err)
13998 return err;
13999 }
14000
14001 struct_meta = btf_find_struct_meta(ret_btf, ret_btf_id);
14002 if (meta->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
14003 if (!__btf_type_is_scalar_struct(env, ret_btf, ret_t, 0)) {
14004 verbose(env, "bpf_percpu_obj_new type ID argument must be of a struct of scalars\n");
14005 return -EINVAL;
14006 }
14007
14008 if (struct_meta) {
14009 verbose(env, "bpf_percpu_obj_new type ID argument must not contain special fields\n");
14010 return -EINVAL;
14011 }
14012 }
14013
14014 mark_reg_known_zero(env, regs, BPF_REG_0);
14015 regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
14016 regs[BPF_REG_0].btf = ret_btf;
14017 regs[BPF_REG_0].btf_id = ret_btf_id;
14018 if (meta->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl])
14019 regs[BPF_REG_0].type |= MEM_PERCPU;
14020
14021 insn_aux->obj_new_size = ret_t->size;
14022 insn_aux->kptr_struct_meta = struct_meta;
14023 } else if (meta->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) {
14024 mark_reg_known_zero(env, regs, BPF_REG_0);
14025 regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
14026 regs[BPF_REG_0].btf = meta->arg_btf;
14027 regs[BPF_REG_0].btf_id = meta->arg_btf_id;
14028
14029 insn_aux->kptr_struct_meta =
14030 btf_find_struct_meta(meta->arg_btf,
14031 meta->arg_btf_id);
14032 } else if (is_list_node_type(ptr_type)) {
14033 struct btf_field *field = meta->arg_list_head.field;
14034
14035 mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
14036 } else if (is_rbtree_node_type(ptr_type)) {
14037 struct btf_field *field = meta->arg_rbtree_root.field;
14038
14039 mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
14040 } else if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
14041 mark_reg_known_zero(env, regs, BPF_REG_0);
14042 regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_TRUSTED;
14043 regs[BPF_REG_0].btf = desc_btf;
14044 regs[BPF_REG_0].btf_id = meta->ret_btf_id;
14045 } else if (meta->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
14046 ret_t = btf_type_by_id(desc_btf, meta->arg_constant.value);
14047 if (!ret_t) {
14048 verbose(env, "Unknown type ID %lld passed to kfunc bpf_rdonly_cast\n",
14049 meta->arg_constant.value);
14050 return -EINVAL;
14051 } else if (btf_type_is_struct(ret_t)) {
14052 mark_reg_known_zero(env, regs, BPF_REG_0);
14053 regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
14054 regs[BPF_REG_0].btf = desc_btf;
14055 regs[BPF_REG_0].btf_id = meta->arg_constant.value;
14056 } else if (btf_type_is_void(ret_t)) {
14057 mark_reg_known_zero(env, regs, BPF_REG_0);
14058 regs[BPF_REG_0].type = PTR_TO_MEM | MEM_RDONLY | PTR_UNTRUSTED;
14059 regs[BPF_REG_0].mem_size = 0;
14060 } else {
14061 verbose(env,
14062 "kfunc bpf_rdonly_cast type ID argument must be of a struct or void\n");
14063 return -EINVAL;
14064 }
14065 } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_slice] ||
14066 meta->func_id == special_kfunc_list[KF_bpf_dynptr_slice_rdwr]) {
14067 enum bpf_type_flag type_flag = get_dynptr_type_flag(meta->initialized_dynptr.type);
14068
14069 mark_reg_known_zero(env, regs, BPF_REG_0);
14070
14071 if (!meta->arg_constant.found) {
14072 verifier_bug(env, "bpf_dynptr_slice(_rdwr) no constant size");
14073 return -EFAULT;
14074 }
14075
14076 regs[BPF_REG_0].mem_size = meta->arg_constant.value;
14077
14078 /* PTR_MAYBE_NULL will be added when is_kfunc_ret_null is checked */
14079 regs[BPF_REG_0].type = PTR_TO_MEM | type_flag;
14080
14081 if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_slice]) {
14082 regs[BPF_REG_0].type |= MEM_RDONLY;
14083 } else {
14084 /* this will set env->seen_direct_write to true */
14085 if (!may_access_direct_pkt_data(env, NULL, BPF_WRITE)) {
14086 verbose(env, "the prog does not allow writes to packet data\n");
14087 return -EINVAL;
14088 }
14089 }
14090
14091 if (!meta->initialized_dynptr.id) {
14092 verifier_bug(env, "no dynptr id");
14093 return -EFAULT;
14094 }
14095 regs[BPF_REG_0].dynptr_id = meta->initialized_dynptr.id;
14096
14097 /* we don't need to set BPF_REG_0's ref obj id
14098 * because packet slices are not refcounted (see
14099 * dynptr_type_refcounted)
14100 */
14101 } else {
14102 return 0;
14103 }
14104
14105 return 1;
14106 }
14107
14108 static int check_return_code(struct bpf_verifier_env *env, int regno, const char *reg_name);
14109
check_kfunc_call(struct bpf_verifier_env * env,struct bpf_insn * insn,int * insn_idx_p)14110 static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
14111 int *insn_idx_p)
14112 {
14113 bool sleepable, rcu_lock, rcu_unlock, preempt_disable, preempt_enable;
14114 u32 i, nargs, ptr_type_id, release_ref_obj_id;
14115 struct bpf_reg_state *regs = cur_regs(env);
14116 const char *func_name, *ptr_type_name;
14117 const struct btf_type *t, *ptr_type;
14118 struct bpf_kfunc_call_arg_meta meta;
14119 struct bpf_insn_aux_data *insn_aux;
14120 int err, insn_idx = *insn_idx_p;
14121 const struct btf_param *args;
14122 struct btf *desc_btf;
14123
14124 /* skip for now, but return error when we find this in fixup_kfunc_call */
14125 if (!insn->imm)
14126 return 0;
14127
14128 err = fetch_kfunc_arg_meta(env, insn->imm, insn->off, &meta);
14129 if (err == -EACCES && meta.func_name)
14130 verbose(env, "calling kernel function %s is not allowed\n", meta.func_name);
14131 if (err)
14132 return err;
14133 desc_btf = meta.btf;
14134 func_name = meta.func_name;
14135 insn_aux = &env->insn_aux_data[insn_idx];
14136
14137 insn_aux->is_iter_next = is_iter_next_kfunc(&meta);
14138
14139 if (!insn->off &&
14140 (insn->imm == special_kfunc_list[KF_bpf_res_spin_lock] ||
14141 insn->imm == special_kfunc_list[KF_bpf_res_spin_lock_irqsave])) {
14142 struct bpf_verifier_state *branch;
14143 struct bpf_reg_state *regs;
14144
14145 branch = push_stack(env, env->insn_idx + 1, env->insn_idx, false);
14146 if (IS_ERR(branch)) {
14147 verbose(env, "failed to push state for failed lock acquisition\n");
14148 return PTR_ERR(branch);
14149 }
14150
14151 regs = branch->frame[branch->curframe]->regs;
14152
14153 /* Clear r0-r5 registers in forked state */
14154 for (i = 0; i < CALLER_SAVED_REGS; i++)
14155 mark_reg_not_init(env, regs, caller_saved[i]);
14156
14157 mark_reg_unknown(env, regs, BPF_REG_0);
14158 err = __mark_reg_s32_range(env, regs, BPF_REG_0, -MAX_ERRNO, -1);
14159 if (err) {
14160 verbose(env, "failed to mark s32 range for retval in forked state for lock\n");
14161 return err;
14162 }
14163 __mark_btf_func_reg_size(env, regs, BPF_REG_0, sizeof(u32));
14164 } else if (!insn->off && insn->imm == special_kfunc_list[KF___bpf_trap]) {
14165 verbose(env, "unexpected __bpf_trap() due to uninitialized variable?\n");
14166 return -EFAULT;
14167 }
14168
14169 if (is_kfunc_destructive(&meta) && !capable(CAP_SYS_BOOT)) {
14170 verbose(env, "destructive kfunc calls require CAP_SYS_BOOT capability\n");
14171 return -EACCES;
14172 }
14173
14174 sleepable = is_kfunc_sleepable(&meta);
14175 if (sleepable && !in_sleepable(env)) {
14176 verbose(env, "program must be sleepable to call sleepable kfunc %s\n", func_name);
14177 return -EACCES;
14178 }
14179
14180 /* Track non-sleepable context for kfuncs, same as for helpers. */
14181 if (!in_sleepable_context(env))
14182 insn_aux->non_sleepable = true;
14183
14184 /* Check the arguments */
14185 err = check_kfunc_args(env, &meta, insn_idx);
14186 if (err < 0)
14187 return err;
14188
14189 if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
14190 err = push_callback_call(env, insn, insn_idx, meta.subprogno,
14191 set_rbtree_add_callback_state);
14192 if (err) {
14193 verbose(env, "kfunc %s#%d failed callback verification\n",
14194 func_name, meta.func_id);
14195 return err;
14196 }
14197 }
14198
14199 if (meta.func_id == special_kfunc_list[KF_bpf_session_cookie]) {
14200 meta.r0_size = sizeof(u64);
14201 meta.r0_rdonly = false;
14202 }
14203
14204 if (is_bpf_wq_set_callback_kfunc(meta.func_id)) {
14205 err = push_callback_call(env, insn, insn_idx, meta.subprogno,
14206 set_timer_callback_state);
14207 if (err) {
14208 verbose(env, "kfunc %s#%d failed callback verification\n",
14209 func_name, meta.func_id);
14210 return err;
14211 }
14212 }
14213
14214 if (is_task_work_add_kfunc(meta.func_id)) {
14215 err = push_callback_call(env, insn, insn_idx, meta.subprogno,
14216 set_task_work_schedule_callback_state);
14217 if (err) {
14218 verbose(env, "kfunc %s#%d failed callback verification\n",
14219 func_name, meta.func_id);
14220 return err;
14221 }
14222 }
14223
14224 rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta);
14225 rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta);
14226
14227 preempt_disable = is_kfunc_bpf_preempt_disable(&meta);
14228 preempt_enable = is_kfunc_bpf_preempt_enable(&meta);
14229
14230 if (rcu_lock) {
14231 env->cur_state->active_rcu_locks++;
14232 } else if (rcu_unlock) {
14233 struct bpf_func_state *state;
14234 struct bpf_reg_state *reg;
14235 u32 clear_mask = (1 << STACK_SPILL) | (1 << STACK_ITER);
14236
14237 if (env->cur_state->active_rcu_locks == 0) {
14238 verbose(env, "unmatched rcu read unlock (kernel function %s)\n", func_name);
14239 return -EINVAL;
14240 }
14241 if (--env->cur_state->active_rcu_locks == 0) {
14242 bpf_for_each_reg_in_vstate_mask(env->cur_state, state, reg, clear_mask, ({
14243 if (reg->type & MEM_RCU) {
14244 reg->type &= ~(MEM_RCU | PTR_MAYBE_NULL);
14245 reg->type |= PTR_UNTRUSTED;
14246 }
14247 }));
14248 }
14249 } else if (sleepable && env->cur_state->active_rcu_locks) {
14250 verbose(env, "kernel func %s is sleepable within rcu_read_lock region\n", func_name);
14251 return -EACCES;
14252 }
14253
14254 if (in_rbtree_lock_required_cb(env) && (rcu_lock || rcu_unlock)) {
14255 verbose(env, "Calling bpf_rcu_read_{lock,unlock} in unnecessary rbtree callback\n");
14256 return -EACCES;
14257 }
14258
14259 if (env->cur_state->active_preempt_locks) {
14260 if (preempt_disable) {
14261 env->cur_state->active_preempt_locks++;
14262 } else if (preempt_enable) {
14263 env->cur_state->active_preempt_locks--;
14264 } else if (sleepable) {
14265 verbose(env, "kernel func %s is sleepable within non-preemptible region\n", func_name);
14266 return -EACCES;
14267 }
14268 } else if (preempt_disable) {
14269 env->cur_state->active_preempt_locks++;
14270 } else if (preempt_enable) {
14271 verbose(env, "unmatched attempt to enable preemption (kernel function %s)\n", func_name);
14272 return -EINVAL;
14273 }
14274
14275 if (env->cur_state->active_irq_id && sleepable) {
14276 verbose(env, "kernel func %s is sleepable within IRQ-disabled region\n", func_name);
14277 return -EACCES;
14278 }
14279
14280 if (is_kfunc_rcu_protected(&meta) && !in_rcu_cs(env)) {
14281 verbose(env, "kernel func %s requires RCU critical section protection\n", func_name);
14282 return -EACCES;
14283 }
14284
14285 /* In case of release function, we get register number of refcounted
14286 * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now.
14287 */
14288 if (meta.release_regno) {
14289 struct bpf_reg_state *reg = ®s[meta.release_regno];
14290
14291 if (meta.initialized_dynptr.ref_obj_id) {
14292 err = unmark_stack_slots_dynptr(env, reg);
14293 } else {
14294 err = release_reference(env, reg->ref_obj_id);
14295 if (err)
14296 verbose(env, "kfunc %s#%d reference has not been acquired before\n",
14297 func_name, meta.func_id);
14298 }
14299 if (err)
14300 return err;
14301 }
14302
14303 if (meta.func_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
14304 meta.func_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
14305 meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
14306 release_ref_obj_id = regs[BPF_REG_2].ref_obj_id;
14307 insn_aux->insert_off = regs[BPF_REG_2].off;
14308 insn_aux->kptr_struct_meta = btf_find_struct_meta(meta.arg_btf, meta.arg_btf_id);
14309 err = ref_convert_owning_non_owning(env, release_ref_obj_id);
14310 if (err) {
14311 verbose(env, "kfunc %s#%d conversion of owning ref to non-owning failed\n",
14312 func_name, meta.func_id);
14313 return err;
14314 }
14315
14316 err = release_reference(env, release_ref_obj_id);
14317 if (err) {
14318 verbose(env, "kfunc %s#%d reference has not been acquired before\n",
14319 func_name, meta.func_id);
14320 return err;
14321 }
14322 }
14323
14324 if (meta.func_id == special_kfunc_list[KF_bpf_throw]) {
14325 if (!bpf_jit_supports_exceptions()) {
14326 verbose(env, "JIT does not support calling kfunc %s#%d\n",
14327 func_name, meta.func_id);
14328 return -ENOTSUPP;
14329 }
14330 env->seen_exception = true;
14331
14332 /* In the case of the default callback, the cookie value passed
14333 * to bpf_throw becomes the return value of the program.
14334 */
14335 if (!env->exception_callback_subprog) {
14336 err = check_return_code(env, BPF_REG_1, "R1");
14337 if (err < 0)
14338 return err;
14339 }
14340 }
14341
14342 for (i = 0; i < CALLER_SAVED_REGS; i++) {
14343 u32 regno = caller_saved[i];
14344
14345 mark_reg_not_init(env, regs, regno);
14346 regs[regno].subreg_def = DEF_NOT_SUBREG;
14347 }
14348
14349 /* Check return type */
14350 t = btf_type_skip_modifiers(desc_btf, meta.func_proto->type, NULL);
14351
14352 if (is_kfunc_acquire(&meta) && !btf_type_is_struct_ptr(meta.btf, t)) {
14353 /* Only exception is bpf_obj_new_impl */
14354 if (meta.btf != btf_vmlinux ||
14355 (meta.func_id != special_kfunc_list[KF_bpf_obj_new_impl] &&
14356 meta.func_id != special_kfunc_list[KF_bpf_percpu_obj_new_impl] &&
14357 meta.func_id != special_kfunc_list[KF_bpf_refcount_acquire_impl])) {
14358 verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
14359 return -EINVAL;
14360 }
14361 }
14362
14363 if (btf_type_is_scalar(t)) {
14364 mark_reg_unknown(env, regs, BPF_REG_0);
14365 if (meta.btf == btf_vmlinux && (meta.func_id == special_kfunc_list[KF_bpf_res_spin_lock] ||
14366 meta.func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave]))
14367 __mark_reg_const_zero(env, ®s[BPF_REG_0]);
14368 mark_btf_func_reg_size(env, BPF_REG_0, t->size);
14369 } else if (btf_type_is_ptr(t)) {
14370 ptr_type = btf_type_skip_modifiers(desc_btf, t->type, &ptr_type_id);
14371 err = check_special_kfunc(env, &meta, regs, insn_aux, ptr_type, desc_btf);
14372 if (err) {
14373 if (err < 0)
14374 return err;
14375 } else if (btf_type_is_void(ptr_type)) {
14376 /* kfunc returning 'void *' is equivalent to returning scalar */
14377 mark_reg_unknown(env, regs, BPF_REG_0);
14378 } else if (!__btf_type_is_struct(ptr_type)) {
14379 if (!meta.r0_size) {
14380 __u32 sz;
14381
14382 if (!IS_ERR(btf_resolve_size(desc_btf, ptr_type, &sz))) {
14383 meta.r0_size = sz;
14384 meta.r0_rdonly = true;
14385 }
14386 }
14387 if (!meta.r0_size) {
14388 ptr_type_name = btf_name_by_offset(desc_btf,
14389 ptr_type->name_off);
14390 verbose(env,
14391 "kernel function %s returns pointer type %s %s is not supported\n",
14392 func_name,
14393 btf_type_str(ptr_type),
14394 ptr_type_name);
14395 return -EINVAL;
14396 }
14397
14398 mark_reg_known_zero(env, regs, BPF_REG_0);
14399 regs[BPF_REG_0].type = PTR_TO_MEM;
14400 regs[BPF_REG_0].mem_size = meta.r0_size;
14401
14402 if (meta.r0_rdonly)
14403 regs[BPF_REG_0].type |= MEM_RDONLY;
14404
14405 /* Ensures we don't access the memory after a release_reference() */
14406 if (meta.ref_obj_id)
14407 regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
14408
14409 if (is_kfunc_rcu_protected(&meta))
14410 regs[BPF_REG_0].type |= MEM_RCU;
14411 } else {
14412 enum bpf_reg_type type = PTR_TO_BTF_ID;
14413
14414 if (meta.func_id == special_kfunc_list[KF_bpf_get_kmem_cache])
14415 type |= PTR_UNTRUSTED;
14416 else if (is_kfunc_rcu_protected(&meta) ||
14417 (is_iter_next_kfunc(&meta) &&
14418 (get_iter_from_state(env->cur_state, &meta)
14419 ->type & MEM_RCU))) {
14420 /*
14421 * If the iterator's constructor (the _new
14422 * function e.g., bpf_iter_task_new) has been
14423 * annotated with BPF kfunc flag
14424 * KF_RCU_PROTECTED and was called within a RCU
14425 * read-side critical section, also propagate
14426 * the MEM_RCU flag to the pointer returned from
14427 * the iterator's next function (e.g.,
14428 * bpf_iter_task_next).
14429 */
14430 type |= MEM_RCU;
14431 } else {
14432 /*
14433 * Any PTR_TO_BTF_ID that is returned from a BPF
14434 * kfunc should by default be treated as
14435 * implicitly trusted.
14436 */
14437 type |= PTR_TRUSTED;
14438 }
14439
14440 mark_reg_known_zero(env, regs, BPF_REG_0);
14441 regs[BPF_REG_0].btf = desc_btf;
14442 regs[BPF_REG_0].type = type;
14443 regs[BPF_REG_0].btf_id = ptr_type_id;
14444 }
14445
14446 if (is_kfunc_ret_null(&meta)) {
14447 regs[BPF_REG_0].type |= PTR_MAYBE_NULL;
14448 /* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */
14449 regs[BPF_REG_0].id = ++env->id_gen;
14450 }
14451 mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
14452 if (is_kfunc_acquire(&meta)) {
14453 int id = acquire_reference(env, insn_idx);
14454
14455 if (id < 0)
14456 return id;
14457 if (is_kfunc_ret_null(&meta))
14458 regs[BPF_REG_0].id = id;
14459 regs[BPF_REG_0].ref_obj_id = id;
14460 } else if (is_rbtree_node_type(ptr_type) || is_list_node_type(ptr_type)) {
14461 ref_set_non_owning(env, ®s[BPF_REG_0]);
14462 }
14463
14464 if (reg_may_point_to_spin_lock(®s[BPF_REG_0]) && !regs[BPF_REG_0].id)
14465 regs[BPF_REG_0].id = ++env->id_gen;
14466 } else if (btf_type_is_void(t)) {
14467 if (meta.btf == btf_vmlinux) {
14468 if (meta.func_id == special_kfunc_list[KF_bpf_obj_drop_impl] ||
14469 meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl]) {
14470 insn_aux->kptr_struct_meta =
14471 btf_find_struct_meta(meta.arg_btf,
14472 meta.arg_btf_id);
14473 }
14474 }
14475 }
14476
14477 if (is_kfunc_pkt_changing(&meta))
14478 clear_all_pkt_pointers(env);
14479
14480 nargs = btf_type_vlen(meta.func_proto);
14481 args = (const struct btf_param *)(meta.func_proto + 1);
14482 for (i = 0; i < nargs; i++) {
14483 u32 regno = i + 1;
14484
14485 t = btf_type_skip_modifiers(desc_btf, args[i].type, NULL);
14486 if (btf_type_is_ptr(t))
14487 mark_btf_func_reg_size(env, regno, sizeof(void *));
14488 else
14489 /* scalar. ensured by btf_check_kfunc_arg_match() */
14490 mark_btf_func_reg_size(env, regno, t->size);
14491 }
14492
14493 if (is_iter_next_kfunc(&meta)) {
14494 err = process_iter_next_call(env, insn_idx, &meta);
14495 if (err)
14496 return err;
14497 }
14498
14499 if (meta.func_id == special_kfunc_list[KF_bpf_session_cookie])
14500 env->prog->call_session_cookie = true;
14501
14502 return 0;
14503 }
14504
check_reg_sane_offset(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,enum bpf_reg_type type)14505 static bool check_reg_sane_offset(struct bpf_verifier_env *env,
14506 const struct bpf_reg_state *reg,
14507 enum bpf_reg_type type)
14508 {
14509 bool known = tnum_is_const(reg->var_off);
14510 s64 val = reg->var_off.value;
14511 s64 smin = reg->smin_value;
14512
14513 if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
14514 verbose(env, "math between %s pointer and %lld is not allowed\n",
14515 reg_type_str(env, type), val);
14516 return false;
14517 }
14518
14519 if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
14520 verbose(env, "%s pointer offset %d is not allowed\n",
14521 reg_type_str(env, type), reg->off);
14522 return false;
14523 }
14524
14525 if (smin == S64_MIN) {
14526 verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
14527 reg_type_str(env, type));
14528 return false;
14529 }
14530
14531 if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
14532 verbose(env, "value %lld makes %s pointer be out of bounds\n",
14533 smin, reg_type_str(env, type));
14534 return false;
14535 }
14536
14537 return true;
14538 }
14539
14540 enum {
14541 REASON_BOUNDS = -1,
14542 REASON_TYPE = -2,
14543 REASON_PATHS = -3,
14544 REASON_LIMIT = -4,
14545 REASON_STACK = -5,
14546 };
14547
retrieve_ptr_limit(const struct bpf_reg_state * ptr_reg,u32 * alu_limit,bool mask_to_left)14548 static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
14549 u32 *alu_limit, bool mask_to_left)
14550 {
14551 u32 max = 0, ptr_limit = 0;
14552
14553 switch (ptr_reg->type) {
14554 case PTR_TO_STACK:
14555 /* Offset 0 is out-of-bounds, but acceptable start for the
14556 * left direction, see BPF_REG_FP. Also, unknown scalar
14557 * offset where we would need to deal with min/max bounds is
14558 * currently prohibited for unprivileged.
14559 */
14560 max = MAX_BPF_STACK + mask_to_left;
14561 ptr_limit = -(ptr_reg->var_off.value + ptr_reg->off);
14562 break;
14563 case PTR_TO_MAP_VALUE:
14564 max = ptr_reg->map_ptr->value_size;
14565 ptr_limit = (mask_to_left ?
14566 ptr_reg->smin_value :
14567 ptr_reg->umax_value) + ptr_reg->off;
14568 break;
14569 default:
14570 return REASON_TYPE;
14571 }
14572
14573 if (ptr_limit >= max)
14574 return REASON_LIMIT;
14575 *alu_limit = ptr_limit;
14576 return 0;
14577 }
14578
can_skip_alu_sanitation(const struct bpf_verifier_env * env,const struct bpf_insn * insn)14579 static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
14580 const struct bpf_insn *insn)
14581 {
14582 return env->bypass_spec_v1 ||
14583 BPF_SRC(insn->code) == BPF_K ||
14584 cur_aux(env)->nospec;
14585 }
14586
update_alu_sanitation_state(struct bpf_insn_aux_data * aux,u32 alu_state,u32 alu_limit)14587 static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
14588 u32 alu_state, u32 alu_limit)
14589 {
14590 /* If we arrived here from different branches with different
14591 * state or limits to sanitize, then this won't work.
14592 */
14593 if (aux->alu_state &&
14594 (aux->alu_state != alu_state ||
14595 aux->alu_limit != alu_limit))
14596 return REASON_PATHS;
14597
14598 /* Corresponding fixup done in do_misc_fixups(). */
14599 aux->alu_state = alu_state;
14600 aux->alu_limit = alu_limit;
14601 return 0;
14602 }
14603
sanitize_val_alu(struct bpf_verifier_env * env,struct bpf_insn * insn)14604 static int sanitize_val_alu(struct bpf_verifier_env *env,
14605 struct bpf_insn *insn)
14606 {
14607 struct bpf_insn_aux_data *aux = cur_aux(env);
14608
14609 if (can_skip_alu_sanitation(env, insn))
14610 return 0;
14611
14612 return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
14613 }
14614
sanitize_needed(u8 opcode)14615 static bool sanitize_needed(u8 opcode)
14616 {
14617 return opcode == BPF_ADD || opcode == BPF_SUB;
14618 }
14619
14620 struct bpf_sanitize_info {
14621 struct bpf_insn_aux_data aux;
14622 bool mask_to_left;
14623 };
14624
sanitize_speculative_path(struct bpf_verifier_env * env,const struct bpf_insn * insn,u32 next_idx,u32 curr_idx)14625 static int sanitize_speculative_path(struct bpf_verifier_env *env,
14626 const struct bpf_insn *insn,
14627 u32 next_idx, u32 curr_idx)
14628 {
14629 struct bpf_verifier_state *branch;
14630 struct bpf_reg_state *regs;
14631
14632 branch = push_stack(env, next_idx, curr_idx, true);
14633 if (!IS_ERR(branch) && insn) {
14634 regs = branch->frame[branch->curframe]->regs;
14635 if (BPF_SRC(insn->code) == BPF_K) {
14636 mark_reg_unknown(env, regs, insn->dst_reg);
14637 } else if (BPF_SRC(insn->code) == BPF_X) {
14638 mark_reg_unknown(env, regs, insn->dst_reg);
14639 mark_reg_unknown(env, regs, insn->src_reg);
14640 }
14641 }
14642 return PTR_ERR_OR_ZERO(branch);
14643 }
14644
sanitize_ptr_alu(struct bpf_verifier_env * env,struct bpf_insn * insn,const struct bpf_reg_state * ptr_reg,const struct bpf_reg_state * off_reg,struct bpf_reg_state * dst_reg,struct bpf_sanitize_info * info,const bool commit_window)14645 static int sanitize_ptr_alu(struct bpf_verifier_env *env,
14646 struct bpf_insn *insn,
14647 const struct bpf_reg_state *ptr_reg,
14648 const struct bpf_reg_state *off_reg,
14649 struct bpf_reg_state *dst_reg,
14650 struct bpf_sanitize_info *info,
14651 const bool commit_window)
14652 {
14653 struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : &info->aux;
14654 struct bpf_verifier_state *vstate = env->cur_state;
14655 bool off_is_imm = tnum_is_const(off_reg->var_off);
14656 bool off_is_neg = off_reg->smin_value < 0;
14657 bool ptr_is_dst_reg = ptr_reg == dst_reg;
14658 u8 opcode = BPF_OP(insn->code);
14659 u32 alu_state, alu_limit;
14660 struct bpf_reg_state tmp;
14661 int err;
14662
14663 if (can_skip_alu_sanitation(env, insn))
14664 return 0;
14665
14666 /* We already marked aux for masking from non-speculative
14667 * paths, thus we got here in the first place. We only care
14668 * to explore bad access from here.
14669 */
14670 if (vstate->speculative)
14671 goto do_sim;
14672
14673 if (!commit_window) {
14674 if (!tnum_is_const(off_reg->var_off) &&
14675 (off_reg->smin_value < 0) != (off_reg->smax_value < 0))
14676 return REASON_BOUNDS;
14677
14678 info->mask_to_left = (opcode == BPF_ADD && off_is_neg) ||
14679 (opcode == BPF_SUB && !off_is_neg);
14680 }
14681
14682 err = retrieve_ptr_limit(ptr_reg, &alu_limit, info->mask_to_left);
14683 if (err < 0)
14684 return err;
14685
14686 if (commit_window) {
14687 /* In commit phase we narrow the masking window based on
14688 * the observed pointer move after the simulated operation.
14689 */
14690 alu_state = info->aux.alu_state;
14691 alu_limit = abs(info->aux.alu_limit - alu_limit);
14692 } else {
14693 alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
14694 alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
14695 alu_state |= ptr_is_dst_reg ?
14696 BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
14697
14698 /* Limit pruning on unknown scalars to enable deep search for
14699 * potential masking differences from other program paths.
14700 */
14701 if (!off_is_imm)
14702 env->explore_alu_limits = true;
14703 }
14704
14705 err = update_alu_sanitation_state(aux, alu_state, alu_limit);
14706 if (err < 0)
14707 return err;
14708 do_sim:
14709 /* If we're in commit phase, we're done here given we already
14710 * pushed the truncated dst_reg into the speculative verification
14711 * stack.
14712 *
14713 * Also, when register is a known constant, we rewrite register-based
14714 * operation to immediate-based, and thus do not need masking (and as
14715 * a consequence, do not need to simulate the zero-truncation either).
14716 */
14717 if (commit_window || off_is_imm)
14718 return 0;
14719
14720 /* Simulate and find potential out-of-bounds access under
14721 * speculative execution from truncation as a result of
14722 * masking when off was not within expected range. If off
14723 * sits in dst, then we temporarily need to move ptr there
14724 * to simulate dst (== 0) +/-= ptr. Needed, for example,
14725 * for cases where we use K-based arithmetic in one direction
14726 * and truncated reg-based in the other in order to explore
14727 * bad access.
14728 */
14729 if (!ptr_is_dst_reg) {
14730 tmp = *dst_reg;
14731 copy_register_state(dst_reg, ptr_reg);
14732 }
14733 err = sanitize_speculative_path(env, NULL, env->insn_idx + 1, env->insn_idx);
14734 if (err < 0)
14735 return REASON_STACK;
14736 if (!ptr_is_dst_reg)
14737 *dst_reg = tmp;
14738 return 0;
14739 }
14740
sanitize_mark_insn_seen(struct bpf_verifier_env * env)14741 static void sanitize_mark_insn_seen(struct bpf_verifier_env *env)
14742 {
14743 struct bpf_verifier_state *vstate = env->cur_state;
14744
14745 /* If we simulate paths under speculation, we don't update the
14746 * insn as 'seen' such that when we verify unreachable paths in
14747 * the non-speculative domain, sanitize_dead_code() can still
14748 * rewrite/sanitize them.
14749 */
14750 if (!vstate->speculative)
14751 env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
14752 }
14753
sanitize_err(struct bpf_verifier_env * env,const struct bpf_insn * insn,int reason,const struct bpf_reg_state * off_reg,const struct bpf_reg_state * dst_reg)14754 static int sanitize_err(struct bpf_verifier_env *env,
14755 const struct bpf_insn *insn, int reason,
14756 const struct bpf_reg_state *off_reg,
14757 const struct bpf_reg_state *dst_reg)
14758 {
14759 static const char *err = "pointer arithmetic with it prohibited for !root";
14760 const char *op = BPF_OP(insn->code) == BPF_ADD ? "add" : "sub";
14761 u32 dst = insn->dst_reg, src = insn->src_reg;
14762
14763 switch (reason) {
14764 case REASON_BOUNDS:
14765 verbose(env, "R%d has unknown scalar with mixed signed bounds, %s\n",
14766 off_reg == dst_reg ? dst : src, err);
14767 break;
14768 case REASON_TYPE:
14769 verbose(env, "R%d has pointer with unsupported alu operation, %s\n",
14770 off_reg == dst_reg ? src : dst, err);
14771 break;
14772 case REASON_PATHS:
14773 verbose(env, "R%d tried to %s from different maps, paths or scalars, %s\n",
14774 dst, op, err);
14775 break;
14776 case REASON_LIMIT:
14777 verbose(env, "R%d tried to %s beyond pointer bounds, %s\n",
14778 dst, op, err);
14779 break;
14780 case REASON_STACK:
14781 verbose(env, "R%d could not be pushed for speculative verification, %s\n",
14782 dst, err);
14783 return -ENOMEM;
14784 default:
14785 verifier_bug(env, "unknown reason (%d)", reason);
14786 break;
14787 }
14788
14789 return -EACCES;
14790 }
14791
14792 /* check that stack access falls within stack limits and that 'reg' doesn't
14793 * have a variable offset.
14794 *
14795 * Variable offset is prohibited for unprivileged mode for simplicity since it
14796 * requires corresponding support in Spectre masking for stack ALU. See also
14797 * retrieve_ptr_limit().
14798 *
14799 *
14800 * 'off' includes 'reg->off'.
14801 */
check_stack_access_for_ptr_arithmetic(struct bpf_verifier_env * env,int regno,const struct bpf_reg_state * reg,int off)14802 static int check_stack_access_for_ptr_arithmetic(
14803 struct bpf_verifier_env *env,
14804 int regno,
14805 const struct bpf_reg_state *reg,
14806 int off)
14807 {
14808 if (!tnum_is_const(reg->var_off)) {
14809 char tn_buf[48];
14810
14811 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
14812 verbose(env, "R%d variable stack access prohibited for !root, var_off=%s off=%d\n",
14813 regno, tn_buf, off);
14814 return -EACCES;
14815 }
14816
14817 if (off >= 0 || off < -MAX_BPF_STACK) {
14818 verbose(env, "R%d stack pointer arithmetic goes out of range, "
14819 "prohibited for !root; off=%d\n", regno, off);
14820 return -EACCES;
14821 }
14822
14823 return 0;
14824 }
14825
sanitize_check_bounds(struct bpf_verifier_env * env,const struct bpf_insn * insn,const struct bpf_reg_state * dst_reg)14826 static int sanitize_check_bounds(struct bpf_verifier_env *env,
14827 const struct bpf_insn *insn,
14828 const struct bpf_reg_state *dst_reg)
14829 {
14830 u32 dst = insn->dst_reg;
14831
14832 /* For unprivileged we require that resulting offset must be in bounds
14833 * in order to be able to sanitize access later on.
14834 */
14835 if (env->bypass_spec_v1)
14836 return 0;
14837
14838 switch (dst_reg->type) {
14839 case PTR_TO_STACK:
14840 if (check_stack_access_for_ptr_arithmetic(env, dst, dst_reg,
14841 dst_reg->off + dst_reg->var_off.value))
14842 return -EACCES;
14843 break;
14844 case PTR_TO_MAP_VALUE:
14845 if (check_map_access(env, dst, dst_reg->off, 1, false, ACCESS_HELPER)) {
14846 verbose(env, "R%d pointer arithmetic of map value goes out of range, "
14847 "prohibited for !root\n", dst);
14848 return -EACCES;
14849 }
14850 break;
14851 default:
14852 return -EOPNOTSUPP;
14853 }
14854
14855 return 0;
14856 }
14857
14858 /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
14859 * Caller should also handle BPF_MOV case separately.
14860 * If we return -EACCES, caller may want to try again treating pointer as a
14861 * scalar. So we only emit a diagnostic if !env->allow_ptr_leaks.
14862 */
adjust_ptr_min_max_vals(struct bpf_verifier_env * env,struct bpf_insn * insn,const struct bpf_reg_state * ptr_reg,const struct bpf_reg_state * off_reg)14863 static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
14864 struct bpf_insn *insn,
14865 const struct bpf_reg_state *ptr_reg,
14866 const struct bpf_reg_state *off_reg)
14867 {
14868 struct bpf_verifier_state *vstate = env->cur_state;
14869 struct bpf_func_state *state = vstate->frame[vstate->curframe];
14870 struct bpf_reg_state *regs = state->regs, *dst_reg;
14871 bool known = tnum_is_const(off_reg->var_off);
14872 s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
14873 smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
14874 u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
14875 umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
14876 struct bpf_sanitize_info info = {};
14877 u8 opcode = BPF_OP(insn->code);
14878 u32 dst = insn->dst_reg;
14879 int ret, bounds_ret;
14880
14881 dst_reg = ®s[dst];
14882
14883 if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
14884 smin_val > smax_val || umin_val > umax_val) {
14885 /* Taint dst register if offset had invalid bounds derived from
14886 * e.g. dead branches.
14887 */
14888 __mark_reg_unknown(env, dst_reg);
14889 return 0;
14890 }
14891
14892 if (BPF_CLASS(insn->code) != BPF_ALU64) {
14893 /* 32-bit ALU ops on pointers produce (meaningless) scalars */
14894 if (opcode == BPF_SUB && env->allow_ptr_leaks) {
14895 __mark_reg_unknown(env, dst_reg);
14896 return 0;
14897 }
14898
14899 verbose(env,
14900 "R%d 32-bit pointer arithmetic prohibited\n",
14901 dst);
14902 return -EACCES;
14903 }
14904
14905 if (ptr_reg->type & PTR_MAYBE_NULL) {
14906 verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
14907 dst, reg_type_str(env, ptr_reg->type));
14908 return -EACCES;
14909 }
14910
14911 /*
14912 * Accesses to untrusted PTR_TO_MEM are done through probe
14913 * instructions, hence no need to track offsets.
14914 */
14915 if (base_type(ptr_reg->type) == PTR_TO_MEM && (ptr_reg->type & PTR_UNTRUSTED))
14916 return 0;
14917
14918 switch (base_type(ptr_reg->type)) {
14919 case PTR_TO_CTX:
14920 case PTR_TO_MAP_VALUE:
14921 case PTR_TO_MAP_KEY:
14922 case PTR_TO_STACK:
14923 case PTR_TO_PACKET_META:
14924 case PTR_TO_PACKET:
14925 case PTR_TO_TP_BUFFER:
14926 case PTR_TO_BTF_ID:
14927 case PTR_TO_MEM:
14928 case PTR_TO_BUF:
14929 case PTR_TO_FUNC:
14930 case CONST_PTR_TO_DYNPTR:
14931 break;
14932 case PTR_TO_FLOW_KEYS:
14933 if (known)
14934 break;
14935 fallthrough;
14936 case CONST_PTR_TO_MAP:
14937 /* smin_val represents the known value */
14938 if (known && smin_val == 0 && opcode == BPF_ADD)
14939 break;
14940 fallthrough;
14941 default:
14942 verbose(env, "R%d pointer arithmetic on %s prohibited\n",
14943 dst, reg_type_str(env, ptr_reg->type));
14944 return -EACCES;
14945 }
14946
14947 /* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
14948 * The id may be overwritten later if we create a new variable offset.
14949 */
14950 dst_reg->type = ptr_reg->type;
14951 dst_reg->id = ptr_reg->id;
14952
14953 if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
14954 !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
14955 return -EINVAL;
14956
14957 /* pointer types do not carry 32-bit bounds at the moment. */
14958 __mark_reg32_unbounded(dst_reg);
14959
14960 if (sanitize_needed(opcode)) {
14961 ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg,
14962 &info, false);
14963 if (ret < 0)
14964 return sanitize_err(env, insn, ret, off_reg, dst_reg);
14965 }
14966
14967 switch (opcode) {
14968 case BPF_ADD:
14969 /* We can take a fixed offset as long as it doesn't overflow
14970 * the s32 'off' field
14971 */
14972 if (known && (ptr_reg->off + smin_val ==
14973 (s64)(s32)(ptr_reg->off + smin_val))) {
14974 /* pointer += K. Accumulate it into fixed offset */
14975 dst_reg->smin_value = smin_ptr;
14976 dst_reg->smax_value = smax_ptr;
14977 dst_reg->umin_value = umin_ptr;
14978 dst_reg->umax_value = umax_ptr;
14979 dst_reg->var_off = ptr_reg->var_off;
14980 dst_reg->off = ptr_reg->off + smin_val;
14981 dst_reg->raw = ptr_reg->raw;
14982 break;
14983 }
14984 /* A new variable offset is created. Note that off_reg->off
14985 * == 0, since it's a scalar.
14986 * dst_reg gets the pointer type and since some positive
14987 * integer value was added to the pointer, give it a new 'id'
14988 * if it's a PTR_TO_PACKET.
14989 * this creates a new 'base' pointer, off_reg (variable) gets
14990 * added into the variable offset, and we copy the fixed offset
14991 * from ptr_reg.
14992 */
14993 if (check_add_overflow(smin_ptr, smin_val, &dst_reg->smin_value) ||
14994 check_add_overflow(smax_ptr, smax_val, &dst_reg->smax_value)) {
14995 dst_reg->smin_value = S64_MIN;
14996 dst_reg->smax_value = S64_MAX;
14997 }
14998 if (check_add_overflow(umin_ptr, umin_val, &dst_reg->umin_value) ||
14999 check_add_overflow(umax_ptr, umax_val, &dst_reg->umax_value)) {
15000 dst_reg->umin_value = 0;
15001 dst_reg->umax_value = U64_MAX;
15002 }
15003 dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
15004 dst_reg->off = ptr_reg->off;
15005 dst_reg->raw = ptr_reg->raw;
15006 if (reg_is_pkt_pointer(ptr_reg)) {
15007 dst_reg->id = ++env->id_gen;
15008 /* something was added to pkt_ptr, set range to zero */
15009 memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
15010 }
15011 break;
15012 case BPF_SUB:
15013 if (dst_reg == off_reg) {
15014 /* scalar -= pointer. Creates an unknown scalar */
15015 verbose(env, "R%d tried to subtract pointer from scalar\n",
15016 dst);
15017 return -EACCES;
15018 }
15019 /* We don't allow subtraction from FP, because (according to
15020 * test_verifier.c test "invalid fp arithmetic", JITs might not
15021 * be able to deal with it.
15022 */
15023 if (ptr_reg->type == PTR_TO_STACK) {
15024 verbose(env, "R%d subtraction from stack pointer prohibited\n",
15025 dst);
15026 return -EACCES;
15027 }
15028 if (known && (ptr_reg->off - smin_val ==
15029 (s64)(s32)(ptr_reg->off - smin_val))) {
15030 /* pointer -= K. Subtract it from fixed offset */
15031 dst_reg->smin_value = smin_ptr;
15032 dst_reg->smax_value = smax_ptr;
15033 dst_reg->umin_value = umin_ptr;
15034 dst_reg->umax_value = umax_ptr;
15035 dst_reg->var_off = ptr_reg->var_off;
15036 dst_reg->id = ptr_reg->id;
15037 dst_reg->off = ptr_reg->off - smin_val;
15038 dst_reg->raw = ptr_reg->raw;
15039 break;
15040 }
15041 /* A new variable offset is created. If the subtrahend is known
15042 * nonnegative, then any reg->range we had before is still good.
15043 */
15044 if (check_sub_overflow(smin_ptr, smax_val, &dst_reg->smin_value) ||
15045 check_sub_overflow(smax_ptr, smin_val, &dst_reg->smax_value)) {
15046 /* Overflow possible, we know nothing */
15047 dst_reg->smin_value = S64_MIN;
15048 dst_reg->smax_value = S64_MAX;
15049 }
15050 if (umin_ptr < umax_val) {
15051 /* Overflow possible, we know nothing */
15052 dst_reg->umin_value = 0;
15053 dst_reg->umax_value = U64_MAX;
15054 } else {
15055 /* Cannot overflow (as long as bounds are consistent) */
15056 dst_reg->umin_value = umin_ptr - umax_val;
15057 dst_reg->umax_value = umax_ptr - umin_val;
15058 }
15059 dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
15060 dst_reg->off = ptr_reg->off;
15061 dst_reg->raw = ptr_reg->raw;
15062 if (reg_is_pkt_pointer(ptr_reg)) {
15063 dst_reg->id = ++env->id_gen;
15064 /* something was added to pkt_ptr, set range to zero */
15065 if (smin_val < 0)
15066 memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
15067 }
15068 break;
15069 case BPF_AND:
15070 case BPF_OR:
15071 case BPF_XOR:
15072 /* bitwise ops on pointers are troublesome, prohibit. */
15073 verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
15074 dst, bpf_alu_string[opcode >> 4]);
15075 return -EACCES;
15076 default:
15077 /* other operators (e.g. MUL,LSH) produce non-pointer results */
15078 verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
15079 dst, bpf_alu_string[opcode >> 4]);
15080 return -EACCES;
15081 }
15082
15083 if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
15084 return -EINVAL;
15085 reg_bounds_sync(dst_reg);
15086 bounds_ret = sanitize_check_bounds(env, insn, dst_reg);
15087 if (bounds_ret == -EACCES)
15088 return bounds_ret;
15089 if (sanitize_needed(opcode)) {
15090 ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg,
15091 &info, true);
15092 if (verifier_bug_if(!can_skip_alu_sanitation(env, insn)
15093 && !env->cur_state->speculative
15094 && bounds_ret
15095 && !ret,
15096 env, "Pointer type unsupported by sanitize_check_bounds() not rejected by retrieve_ptr_limit() as required")) {
15097 return -EFAULT;
15098 }
15099 if (ret < 0)
15100 return sanitize_err(env, insn, ret, off_reg, dst_reg);
15101 }
15102
15103 return 0;
15104 }
15105
scalar32_min_max_add(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)15106 static void scalar32_min_max_add(struct bpf_reg_state *dst_reg,
15107 struct bpf_reg_state *src_reg)
15108 {
15109 s32 *dst_smin = &dst_reg->s32_min_value;
15110 s32 *dst_smax = &dst_reg->s32_max_value;
15111 u32 *dst_umin = &dst_reg->u32_min_value;
15112 u32 *dst_umax = &dst_reg->u32_max_value;
15113 u32 umin_val = src_reg->u32_min_value;
15114 u32 umax_val = src_reg->u32_max_value;
15115 bool min_overflow, max_overflow;
15116
15117 if (check_add_overflow(*dst_smin, src_reg->s32_min_value, dst_smin) ||
15118 check_add_overflow(*dst_smax, src_reg->s32_max_value, dst_smax)) {
15119 *dst_smin = S32_MIN;
15120 *dst_smax = S32_MAX;
15121 }
15122
15123 /* If either all additions overflow or no additions overflow, then
15124 * it is okay to set: dst_umin = dst_umin + src_umin, dst_umax =
15125 * dst_umax + src_umax. Otherwise (some additions overflow), set
15126 * the output bounds to unbounded.
15127 */
15128 min_overflow = check_add_overflow(*dst_umin, umin_val, dst_umin);
15129 max_overflow = check_add_overflow(*dst_umax, umax_val, dst_umax);
15130
15131 if (!min_overflow && max_overflow) {
15132 *dst_umin = 0;
15133 *dst_umax = U32_MAX;
15134 }
15135 }
15136
scalar_min_max_add(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)15137 static void scalar_min_max_add(struct bpf_reg_state *dst_reg,
15138 struct bpf_reg_state *src_reg)
15139 {
15140 s64 *dst_smin = &dst_reg->smin_value;
15141 s64 *dst_smax = &dst_reg->smax_value;
15142 u64 *dst_umin = &dst_reg->umin_value;
15143 u64 *dst_umax = &dst_reg->umax_value;
15144 u64 umin_val = src_reg->umin_value;
15145 u64 umax_val = src_reg->umax_value;
15146 bool min_overflow, max_overflow;
15147
15148 if (check_add_overflow(*dst_smin, src_reg->smin_value, dst_smin) ||
15149 check_add_overflow(*dst_smax, src_reg->smax_value, dst_smax)) {
15150 *dst_smin = S64_MIN;
15151 *dst_smax = S64_MAX;
15152 }
15153
15154 /* If either all additions overflow or no additions overflow, then
15155 * it is okay to set: dst_umin = dst_umin + src_umin, dst_umax =
15156 * dst_umax + src_umax. Otherwise (some additions overflow), set
15157 * the output bounds to unbounded.
15158 */
15159 min_overflow = check_add_overflow(*dst_umin, umin_val, dst_umin);
15160 max_overflow = check_add_overflow(*dst_umax, umax_val, dst_umax);
15161
15162 if (!min_overflow && max_overflow) {
15163 *dst_umin = 0;
15164 *dst_umax = U64_MAX;
15165 }
15166 }
15167
scalar32_min_max_sub(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)15168 static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg,
15169 struct bpf_reg_state *src_reg)
15170 {
15171 s32 *dst_smin = &dst_reg->s32_min_value;
15172 s32 *dst_smax = &dst_reg->s32_max_value;
15173 u32 *dst_umin = &dst_reg->u32_min_value;
15174 u32 *dst_umax = &dst_reg->u32_max_value;
15175 u32 umin_val = src_reg->u32_min_value;
15176 u32 umax_val = src_reg->u32_max_value;
15177 bool min_underflow, max_underflow;
15178
15179 if (check_sub_overflow(*dst_smin, src_reg->s32_max_value, dst_smin) ||
15180 check_sub_overflow(*dst_smax, src_reg->s32_min_value, dst_smax)) {
15181 /* Overflow possible, we know nothing */
15182 *dst_smin = S32_MIN;
15183 *dst_smax = S32_MAX;
15184 }
15185
15186 /* If either all subtractions underflow or no subtractions
15187 * underflow, it is okay to set: dst_umin = dst_umin - src_umax,
15188 * dst_umax = dst_umax - src_umin. Otherwise (some subtractions
15189 * underflow), set the output bounds to unbounded.
15190 */
15191 min_underflow = check_sub_overflow(*dst_umin, umax_val, dst_umin);
15192 max_underflow = check_sub_overflow(*dst_umax, umin_val, dst_umax);
15193
15194 if (min_underflow && !max_underflow) {
15195 *dst_umin = 0;
15196 *dst_umax = U32_MAX;
15197 }
15198 }
15199
scalar_min_max_sub(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)15200 static void scalar_min_max_sub(struct bpf_reg_state *dst_reg,
15201 struct bpf_reg_state *src_reg)
15202 {
15203 s64 *dst_smin = &dst_reg->smin_value;
15204 s64 *dst_smax = &dst_reg->smax_value;
15205 u64 *dst_umin = &dst_reg->umin_value;
15206 u64 *dst_umax = &dst_reg->umax_value;
15207 u64 umin_val = src_reg->umin_value;
15208 u64 umax_val = src_reg->umax_value;
15209 bool min_underflow, max_underflow;
15210
15211 if (check_sub_overflow(*dst_smin, src_reg->smax_value, dst_smin) ||
15212 check_sub_overflow(*dst_smax, src_reg->smin_value, dst_smax)) {
15213 /* Overflow possible, we know nothing */
15214 *dst_smin = S64_MIN;
15215 *dst_smax = S64_MAX;
15216 }
15217
15218 /* If either all subtractions underflow or no subtractions
15219 * underflow, it is okay to set: dst_umin = dst_umin - src_umax,
15220 * dst_umax = dst_umax - src_umin. Otherwise (some subtractions
15221 * underflow), set the output bounds to unbounded.
15222 */
15223 min_underflow = check_sub_overflow(*dst_umin, umax_val, dst_umin);
15224 max_underflow = check_sub_overflow(*dst_umax, umin_val, dst_umax);
15225
15226 if (min_underflow && !max_underflow) {
15227 *dst_umin = 0;
15228 *dst_umax = U64_MAX;
15229 }
15230 }
15231
scalar32_min_max_mul(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)15232 static void scalar32_min_max_mul(struct bpf_reg_state *dst_reg,
15233 struct bpf_reg_state *src_reg)
15234 {
15235 s32 *dst_smin = &dst_reg->s32_min_value;
15236 s32 *dst_smax = &dst_reg->s32_max_value;
15237 u32 *dst_umin = &dst_reg->u32_min_value;
15238 u32 *dst_umax = &dst_reg->u32_max_value;
15239 s32 tmp_prod[4];
15240
15241 if (check_mul_overflow(*dst_umax, src_reg->u32_max_value, dst_umax) ||
15242 check_mul_overflow(*dst_umin, src_reg->u32_min_value, dst_umin)) {
15243 /* Overflow possible, we know nothing */
15244 *dst_umin = 0;
15245 *dst_umax = U32_MAX;
15246 }
15247 if (check_mul_overflow(*dst_smin, src_reg->s32_min_value, &tmp_prod[0]) ||
15248 check_mul_overflow(*dst_smin, src_reg->s32_max_value, &tmp_prod[1]) ||
15249 check_mul_overflow(*dst_smax, src_reg->s32_min_value, &tmp_prod[2]) ||
15250 check_mul_overflow(*dst_smax, src_reg->s32_max_value, &tmp_prod[3])) {
15251 /* Overflow possible, we know nothing */
15252 *dst_smin = S32_MIN;
15253 *dst_smax = S32_MAX;
15254 } else {
15255 *dst_smin = min_array(tmp_prod, 4);
15256 *dst_smax = max_array(tmp_prod, 4);
15257 }
15258 }
15259
scalar_min_max_mul(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)15260 static void scalar_min_max_mul(struct bpf_reg_state *dst_reg,
15261 struct bpf_reg_state *src_reg)
15262 {
15263 s64 *dst_smin = &dst_reg->smin_value;
15264 s64 *dst_smax = &dst_reg->smax_value;
15265 u64 *dst_umin = &dst_reg->umin_value;
15266 u64 *dst_umax = &dst_reg->umax_value;
15267 s64 tmp_prod[4];
15268
15269 if (check_mul_overflow(*dst_umax, src_reg->umax_value, dst_umax) ||
15270 check_mul_overflow(*dst_umin, src_reg->umin_value, dst_umin)) {
15271 /* Overflow possible, we know nothing */
15272 *dst_umin = 0;
15273 *dst_umax = U64_MAX;
15274 }
15275 if (check_mul_overflow(*dst_smin, src_reg->smin_value, &tmp_prod[0]) ||
15276 check_mul_overflow(*dst_smin, src_reg->smax_value, &tmp_prod[1]) ||
15277 check_mul_overflow(*dst_smax, src_reg->smin_value, &tmp_prod[2]) ||
15278 check_mul_overflow(*dst_smax, src_reg->smax_value, &tmp_prod[3])) {
15279 /* Overflow possible, we know nothing */
15280 *dst_smin = S64_MIN;
15281 *dst_smax = S64_MAX;
15282 } else {
15283 *dst_smin = min_array(tmp_prod, 4);
15284 *dst_smax = max_array(tmp_prod, 4);
15285 }
15286 }
15287
scalar32_min_max_udiv(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)15288 static void scalar32_min_max_udiv(struct bpf_reg_state *dst_reg,
15289 struct bpf_reg_state *src_reg)
15290 {
15291 u32 *dst_umin = &dst_reg->u32_min_value;
15292 u32 *dst_umax = &dst_reg->u32_max_value;
15293 u32 src_val = src_reg->u32_min_value; /* non-zero, const divisor */
15294
15295 *dst_umin = *dst_umin / src_val;
15296 *dst_umax = *dst_umax / src_val;
15297
15298 /* Reset other ranges/tnum to unbounded/unknown. */
15299 dst_reg->s32_min_value = S32_MIN;
15300 dst_reg->s32_max_value = S32_MAX;
15301 reset_reg64_and_tnum(dst_reg);
15302 }
15303
scalar_min_max_udiv(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)15304 static void scalar_min_max_udiv(struct bpf_reg_state *dst_reg,
15305 struct bpf_reg_state *src_reg)
15306 {
15307 u64 *dst_umin = &dst_reg->umin_value;
15308 u64 *dst_umax = &dst_reg->umax_value;
15309 u64 src_val = src_reg->umin_value; /* non-zero, const divisor */
15310
15311 *dst_umin = div64_u64(*dst_umin, src_val);
15312 *dst_umax = div64_u64(*dst_umax, src_val);
15313
15314 /* Reset other ranges/tnum to unbounded/unknown. */
15315 dst_reg->smin_value = S64_MIN;
15316 dst_reg->smax_value = S64_MAX;
15317 reset_reg32_and_tnum(dst_reg);
15318 }
15319
scalar32_min_max_sdiv(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)15320 static void scalar32_min_max_sdiv(struct bpf_reg_state *dst_reg,
15321 struct bpf_reg_state *src_reg)
15322 {
15323 s32 *dst_smin = &dst_reg->s32_min_value;
15324 s32 *dst_smax = &dst_reg->s32_max_value;
15325 s32 src_val = src_reg->s32_min_value; /* non-zero, const divisor */
15326 s32 res1, res2;
15327
15328 /* BPF div specification: S32_MIN / -1 = S32_MIN */
15329 if (*dst_smin == S32_MIN && src_val == -1) {
15330 /*
15331 * If the dividend range contains more than just S32_MIN,
15332 * we cannot precisely track the result, so it becomes unbounded.
15333 * e.g., [S32_MIN, S32_MIN+10]/(-1),
15334 * = {S32_MIN} U [-(S32_MIN+10), -(S32_MIN+1)]
15335 * = {S32_MIN} U [S32_MAX-9, S32_MAX] = [S32_MIN, S32_MAX]
15336 * Otherwise (if dividend is exactly S32_MIN), result remains S32_MIN.
15337 */
15338 if (*dst_smax != S32_MIN) {
15339 *dst_smin = S32_MIN;
15340 *dst_smax = S32_MAX;
15341 }
15342 goto reset;
15343 }
15344
15345 res1 = *dst_smin / src_val;
15346 res2 = *dst_smax / src_val;
15347 *dst_smin = min(res1, res2);
15348 *dst_smax = max(res1, res2);
15349
15350 reset:
15351 /* Reset other ranges/tnum to unbounded/unknown. */
15352 dst_reg->u32_min_value = 0;
15353 dst_reg->u32_max_value = U32_MAX;
15354 reset_reg64_and_tnum(dst_reg);
15355 }
15356
scalar_min_max_sdiv(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)15357 static void scalar_min_max_sdiv(struct bpf_reg_state *dst_reg,
15358 struct bpf_reg_state *src_reg)
15359 {
15360 s64 *dst_smin = &dst_reg->smin_value;
15361 s64 *dst_smax = &dst_reg->smax_value;
15362 s64 src_val = src_reg->smin_value; /* non-zero, const divisor */
15363 s64 res1, res2;
15364
15365 /* BPF div specification: S64_MIN / -1 = S64_MIN */
15366 if (*dst_smin == S64_MIN && src_val == -1) {
15367 /*
15368 * If the dividend range contains more than just S64_MIN,
15369 * we cannot precisely track the result, so it becomes unbounded.
15370 * e.g., [S64_MIN, S64_MIN+10]/(-1),
15371 * = {S64_MIN} U [-(S64_MIN+10), -(S64_MIN+1)]
15372 * = {S64_MIN} U [S64_MAX-9, S64_MAX] = [S64_MIN, S64_MAX]
15373 * Otherwise (if dividend is exactly S64_MIN), result remains S64_MIN.
15374 */
15375 if (*dst_smax != S64_MIN) {
15376 *dst_smin = S64_MIN;
15377 *dst_smax = S64_MAX;
15378 }
15379 goto reset;
15380 }
15381
15382 res1 = div64_s64(*dst_smin, src_val);
15383 res2 = div64_s64(*dst_smax, src_val);
15384 *dst_smin = min(res1, res2);
15385 *dst_smax = max(res1, res2);
15386
15387 reset:
15388 /* Reset other ranges/tnum to unbounded/unknown. */
15389 dst_reg->umin_value = 0;
15390 dst_reg->umax_value = U64_MAX;
15391 reset_reg32_and_tnum(dst_reg);
15392 }
15393
scalar32_min_max_umod(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)15394 static void scalar32_min_max_umod(struct bpf_reg_state *dst_reg,
15395 struct bpf_reg_state *src_reg)
15396 {
15397 u32 *dst_umin = &dst_reg->u32_min_value;
15398 u32 *dst_umax = &dst_reg->u32_max_value;
15399 u32 src_val = src_reg->u32_min_value; /* non-zero, const divisor */
15400 u32 res_max = src_val - 1;
15401
15402 /*
15403 * If dst_umax <= res_max, the result remains unchanged.
15404 * e.g., [2, 5] % 10 = [2, 5].
15405 */
15406 if (*dst_umax <= res_max)
15407 return;
15408
15409 *dst_umin = 0;
15410 *dst_umax = min(*dst_umax, res_max);
15411
15412 /* Reset other ranges/tnum to unbounded/unknown. */
15413 dst_reg->s32_min_value = S32_MIN;
15414 dst_reg->s32_max_value = S32_MAX;
15415 reset_reg64_and_tnum(dst_reg);
15416 }
15417
scalar_min_max_umod(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)15418 static void scalar_min_max_umod(struct bpf_reg_state *dst_reg,
15419 struct bpf_reg_state *src_reg)
15420 {
15421 u64 *dst_umin = &dst_reg->umin_value;
15422 u64 *dst_umax = &dst_reg->umax_value;
15423 u64 src_val = src_reg->umin_value; /* non-zero, const divisor */
15424 u64 res_max = src_val - 1;
15425
15426 /*
15427 * If dst_umax <= res_max, the result remains unchanged.
15428 * e.g., [2, 5] % 10 = [2, 5].
15429 */
15430 if (*dst_umax <= res_max)
15431 return;
15432
15433 *dst_umin = 0;
15434 *dst_umax = min(*dst_umax, res_max);
15435
15436 /* Reset other ranges/tnum to unbounded/unknown. */
15437 dst_reg->smin_value = S64_MIN;
15438 dst_reg->smax_value = S64_MAX;
15439 reset_reg32_and_tnum(dst_reg);
15440 }
15441
scalar32_min_max_smod(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)15442 static void scalar32_min_max_smod(struct bpf_reg_state *dst_reg,
15443 struct bpf_reg_state *src_reg)
15444 {
15445 s32 *dst_smin = &dst_reg->s32_min_value;
15446 s32 *dst_smax = &dst_reg->s32_max_value;
15447 s32 src_val = src_reg->s32_min_value; /* non-zero, const divisor */
15448
15449 /*
15450 * Safe absolute value calculation:
15451 * If src_val == S32_MIN (-2147483648), src_abs becomes 2147483648.
15452 * Here use unsigned integer to avoid overflow.
15453 */
15454 u32 src_abs = (src_val > 0) ? (u32)src_val : -(u32)src_val;
15455
15456 /*
15457 * Calculate the maximum possible absolute value of the result.
15458 * Even if src_abs is 2147483648 (S32_MIN), subtracting 1 gives
15459 * 2147483647 (S32_MAX), which fits perfectly in s32.
15460 */
15461 s32 res_max_abs = src_abs - 1;
15462
15463 /*
15464 * If the dividend is already within the result range,
15465 * the result remains unchanged. e.g., [-2, 5] % 10 = [-2, 5].
15466 */
15467 if (*dst_smin >= -res_max_abs && *dst_smax <= res_max_abs)
15468 return;
15469
15470 /* General case: result has the same sign as the dividend. */
15471 if (*dst_smin >= 0) {
15472 *dst_smin = 0;
15473 *dst_smax = min(*dst_smax, res_max_abs);
15474 } else if (*dst_smax <= 0) {
15475 *dst_smax = 0;
15476 *dst_smin = max(*dst_smin, -res_max_abs);
15477 } else {
15478 *dst_smin = -res_max_abs;
15479 *dst_smax = res_max_abs;
15480 }
15481
15482 /* Reset other ranges/tnum to unbounded/unknown. */
15483 dst_reg->u32_min_value = 0;
15484 dst_reg->u32_max_value = U32_MAX;
15485 reset_reg64_and_tnum(dst_reg);
15486 }
15487
scalar_min_max_smod(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)15488 static void scalar_min_max_smod(struct bpf_reg_state *dst_reg,
15489 struct bpf_reg_state *src_reg)
15490 {
15491 s64 *dst_smin = &dst_reg->smin_value;
15492 s64 *dst_smax = &dst_reg->smax_value;
15493 s64 src_val = src_reg->smin_value; /* non-zero, const divisor */
15494
15495 /*
15496 * Safe absolute value calculation:
15497 * If src_val == S64_MIN (-2^63), src_abs becomes 2^63.
15498 * Here use unsigned integer to avoid overflow.
15499 */
15500 u64 src_abs = (src_val > 0) ? (u64)src_val : -(u64)src_val;
15501
15502 /*
15503 * Calculate the maximum possible absolute value of the result.
15504 * Even if src_abs is 2^63 (S64_MIN), subtracting 1 gives
15505 * 2^63 - 1 (S64_MAX), which fits perfectly in s64.
15506 */
15507 s64 res_max_abs = src_abs - 1;
15508
15509 /*
15510 * If the dividend is already within the result range,
15511 * the result remains unchanged. e.g., [-2, 5] % 10 = [-2, 5].
15512 */
15513 if (*dst_smin >= -res_max_abs && *dst_smax <= res_max_abs)
15514 return;
15515
15516 /* General case: result has the same sign as the dividend. */
15517 if (*dst_smin >= 0) {
15518 *dst_smin = 0;
15519 *dst_smax = min(*dst_smax, res_max_abs);
15520 } else if (*dst_smax <= 0) {
15521 *dst_smax = 0;
15522 *dst_smin = max(*dst_smin, -res_max_abs);
15523 } else {
15524 *dst_smin = -res_max_abs;
15525 *dst_smax = res_max_abs;
15526 }
15527
15528 /* Reset other ranges/tnum to unbounded/unknown. */
15529 dst_reg->umin_value = 0;
15530 dst_reg->umax_value = U64_MAX;
15531 reset_reg32_and_tnum(dst_reg);
15532 }
15533
scalar32_min_max_and(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)15534 static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
15535 struct bpf_reg_state *src_reg)
15536 {
15537 bool src_known = tnum_subreg_is_const(src_reg->var_off);
15538 bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
15539 struct tnum var32_off = tnum_subreg(dst_reg->var_off);
15540 u32 umax_val = src_reg->u32_max_value;
15541
15542 if (src_known && dst_known) {
15543 __mark_reg32_known(dst_reg, var32_off.value);
15544 return;
15545 }
15546
15547 /* We get our minimum from the var_off, since that's inherently
15548 * bitwise. Our maximum is the minimum of the operands' maxima.
15549 */
15550 dst_reg->u32_min_value = var32_off.value;
15551 dst_reg->u32_max_value = min(dst_reg->u32_max_value, umax_val);
15552
15553 /* Safe to set s32 bounds by casting u32 result into s32 when u32
15554 * doesn't cross sign boundary. Otherwise set s32 bounds to unbounded.
15555 */
15556 if ((s32)dst_reg->u32_min_value <= (s32)dst_reg->u32_max_value) {
15557 dst_reg->s32_min_value = dst_reg->u32_min_value;
15558 dst_reg->s32_max_value = dst_reg->u32_max_value;
15559 } else {
15560 dst_reg->s32_min_value = S32_MIN;
15561 dst_reg->s32_max_value = S32_MAX;
15562 }
15563 }
15564
scalar_min_max_and(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)15565 static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
15566 struct bpf_reg_state *src_reg)
15567 {
15568 bool src_known = tnum_is_const(src_reg->var_off);
15569 bool dst_known = tnum_is_const(dst_reg->var_off);
15570 u64 umax_val = src_reg->umax_value;
15571
15572 if (src_known && dst_known) {
15573 __mark_reg_known(dst_reg, dst_reg->var_off.value);
15574 return;
15575 }
15576
15577 /* We get our minimum from the var_off, since that's inherently
15578 * bitwise. Our maximum is the minimum of the operands' maxima.
15579 */
15580 dst_reg->umin_value = dst_reg->var_off.value;
15581 dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
15582
15583 /* Safe to set s64 bounds by casting u64 result into s64 when u64
15584 * doesn't cross sign boundary. Otherwise set s64 bounds to unbounded.
15585 */
15586 if ((s64)dst_reg->umin_value <= (s64)dst_reg->umax_value) {
15587 dst_reg->smin_value = dst_reg->umin_value;
15588 dst_reg->smax_value = dst_reg->umax_value;
15589 } else {
15590 dst_reg->smin_value = S64_MIN;
15591 dst_reg->smax_value = S64_MAX;
15592 }
15593 /* We may learn something more from the var_off */
15594 __update_reg_bounds(dst_reg);
15595 }
15596
scalar32_min_max_or(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)15597 static void scalar32_min_max_or(struct bpf_reg_state *dst_reg,
15598 struct bpf_reg_state *src_reg)
15599 {
15600 bool src_known = tnum_subreg_is_const(src_reg->var_off);
15601 bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
15602 struct tnum var32_off = tnum_subreg(dst_reg->var_off);
15603 u32 umin_val = src_reg->u32_min_value;
15604
15605 if (src_known && dst_known) {
15606 __mark_reg32_known(dst_reg, var32_off.value);
15607 return;
15608 }
15609
15610 /* We get our maximum from the var_off, and our minimum is the
15611 * maximum of the operands' minima
15612 */
15613 dst_reg->u32_min_value = max(dst_reg->u32_min_value, umin_val);
15614 dst_reg->u32_max_value = var32_off.value | var32_off.mask;
15615
15616 /* Safe to set s32 bounds by casting u32 result into s32 when u32
15617 * doesn't cross sign boundary. Otherwise set s32 bounds to unbounded.
15618 */
15619 if ((s32)dst_reg->u32_min_value <= (s32)dst_reg->u32_max_value) {
15620 dst_reg->s32_min_value = dst_reg->u32_min_value;
15621 dst_reg->s32_max_value = dst_reg->u32_max_value;
15622 } else {
15623 dst_reg->s32_min_value = S32_MIN;
15624 dst_reg->s32_max_value = S32_MAX;
15625 }
15626 }
15627
scalar_min_max_or(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)15628 static void scalar_min_max_or(struct bpf_reg_state *dst_reg,
15629 struct bpf_reg_state *src_reg)
15630 {
15631 bool src_known = tnum_is_const(src_reg->var_off);
15632 bool dst_known = tnum_is_const(dst_reg->var_off);
15633 u64 umin_val = src_reg->umin_value;
15634
15635 if (src_known && dst_known) {
15636 __mark_reg_known(dst_reg, dst_reg->var_off.value);
15637 return;
15638 }
15639
15640 /* We get our maximum from the var_off, and our minimum is the
15641 * maximum of the operands' minima
15642 */
15643 dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
15644 dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
15645
15646 /* Safe to set s64 bounds by casting u64 result into s64 when u64
15647 * doesn't cross sign boundary. Otherwise set s64 bounds to unbounded.
15648 */
15649 if ((s64)dst_reg->umin_value <= (s64)dst_reg->umax_value) {
15650 dst_reg->smin_value = dst_reg->umin_value;
15651 dst_reg->smax_value = dst_reg->umax_value;
15652 } else {
15653 dst_reg->smin_value = S64_MIN;
15654 dst_reg->smax_value = S64_MAX;
15655 }
15656 /* We may learn something more from the var_off */
15657 __update_reg_bounds(dst_reg);
15658 }
15659
scalar32_min_max_xor(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)15660 static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg,
15661 struct bpf_reg_state *src_reg)
15662 {
15663 bool src_known = tnum_subreg_is_const(src_reg->var_off);
15664 bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
15665 struct tnum var32_off = tnum_subreg(dst_reg->var_off);
15666
15667 if (src_known && dst_known) {
15668 __mark_reg32_known(dst_reg, var32_off.value);
15669 return;
15670 }
15671
15672 /* We get both minimum and maximum from the var32_off. */
15673 dst_reg->u32_min_value = var32_off.value;
15674 dst_reg->u32_max_value = var32_off.value | var32_off.mask;
15675
15676 /* Safe to set s32 bounds by casting u32 result into s32 when u32
15677 * doesn't cross sign boundary. Otherwise set s32 bounds to unbounded.
15678 */
15679 if ((s32)dst_reg->u32_min_value <= (s32)dst_reg->u32_max_value) {
15680 dst_reg->s32_min_value = dst_reg->u32_min_value;
15681 dst_reg->s32_max_value = dst_reg->u32_max_value;
15682 } else {
15683 dst_reg->s32_min_value = S32_MIN;
15684 dst_reg->s32_max_value = S32_MAX;
15685 }
15686 }
15687
scalar_min_max_xor(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)15688 static void scalar_min_max_xor(struct bpf_reg_state *dst_reg,
15689 struct bpf_reg_state *src_reg)
15690 {
15691 bool src_known = tnum_is_const(src_reg->var_off);
15692 bool dst_known = tnum_is_const(dst_reg->var_off);
15693
15694 if (src_known && dst_known) {
15695 /* dst_reg->var_off.value has been updated earlier */
15696 __mark_reg_known(dst_reg, dst_reg->var_off.value);
15697 return;
15698 }
15699
15700 /* We get both minimum and maximum from the var_off. */
15701 dst_reg->umin_value = dst_reg->var_off.value;
15702 dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
15703
15704 /* Safe to set s64 bounds by casting u64 result into s64 when u64
15705 * doesn't cross sign boundary. Otherwise set s64 bounds to unbounded.
15706 */
15707 if ((s64)dst_reg->umin_value <= (s64)dst_reg->umax_value) {
15708 dst_reg->smin_value = dst_reg->umin_value;
15709 dst_reg->smax_value = dst_reg->umax_value;
15710 } else {
15711 dst_reg->smin_value = S64_MIN;
15712 dst_reg->smax_value = S64_MAX;
15713 }
15714
15715 __update_reg_bounds(dst_reg);
15716 }
15717
__scalar32_min_max_lsh(struct bpf_reg_state * dst_reg,u64 umin_val,u64 umax_val)15718 static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
15719 u64 umin_val, u64 umax_val)
15720 {
15721 /* We lose all sign bit information (except what we can pick
15722 * up from var_off)
15723 */
15724 dst_reg->s32_min_value = S32_MIN;
15725 dst_reg->s32_max_value = S32_MAX;
15726 /* If we might shift our top bit out, then we know nothing */
15727 if (umax_val > 31 || dst_reg->u32_max_value > 1ULL << (31 - umax_val)) {
15728 dst_reg->u32_min_value = 0;
15729 dst_reg->u32_max_value = U32_MAX;
15730 } else {
15731 dst_reg->u32_min_value <<= umin_val;
15732 dst_reg->u32_max_value <<= umax_val;
15733 }
15734 }
15735
scalar32_min_max_lsh(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)15736 static void scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
15737 struct bpf_reg_state *src_reg)
15738 {
15739 u32 umax_val = src_reg->u32_max_value;
15740 u32 umin_val = src_reg->u32_min_value;
15741 /* u32 alu operation will zext upper bits */
15742 struct tnum subreg = tnum_subreg(dst_reg->var_off);
15743
15744 __scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
15745 dst_reg->var_off = tnum_subreg(tnum_lshift(subreg, umin_val));
15746 /* Not required but being careful mark reg64 bounds as unknown so
15747 * that we are forced to pick them up from tnum and zext later and
15748 * if some path skips this step we are still safe.
15749 */
15750 __mark_reg64_unbounded(dst_reg);
15751 __update_reg32_bounds(dst_reg);
15752 }
15753
__scalar64_min_max_lsh(struct bpf_reg_state * dst_reg,u64 umin_val,u64 umax_val)15754 static void __scalar64_min_max_lsh(struct bpf_reg_state *dst_reg,
15755 u64 umin_val, u64 umax_val)
15756 {
15757 /* Special case <<32 because it is a common compiler pattern to sign
15758 * extend subreg by doing <<32 s>>32. smin/smax assignments are correct
15759 * because s32 bounds don't flip sign when shifting to the left by
15760 * 32bits.
15761 */
15762 if (umin_val == 32 && umax_val == 32) {
15763 dst_reg->smax_value = (s64)dst_reg->s32_max_value << 32;
15764 dst_reg->smin_value = (s64)dst_reg->s32_min_value << 32;
15765 } else {
15766 dst_reg->smax_value = S64_MAX;
15767 dst_reg->smin_value = S64_MIN;
15768 }
15769
15770 /* If we might shift our top bit out, then we know nothing */
15771 if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
15772 dst_reg->umin_value = 0;
15773 dst_reg->umax_value = U64_MAX;
15774 } else {
15775 dst_reg->umin_value <<= umin_val;
15776 dst_reg->umax_value <<= umax_val;
15777 }
15778 }
15779
scalar_min_max_lsh(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)15780 static void scalar_min_max_lsh(struct bpf_reg_state *dst_reg,
15781 struct bpf_reg_state *src_reg)
15782 {
15783 u64 umax_val = src_reg->umax_value;
15784 u64 umin_val = src_reg->umin_value;
15785
15786 /* scalar64 calc uses 32bit unshifted bounds so must be called first */
15787 __scalar64_min_max_lsh(dst_reg, umin_val, umax_val);
15788 __scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
15789
15790 dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
15791 /* We may learn something more from the var_off */
15792 __update_reg_bounds(dst_reg);
15793 }
15794
scalar32_min_max_rsh(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)15795 static void scalar32_min_max_rsh(struct bpf_reg_state *dst_reg,
15796 struct bpf_reg_state *src_reg)
15797 {
15798 struct tnum subreg = tnum_subreg(dst_reg->var_off);
15799 u32 umax_val = src_reg->u32_max_value;
15800 u32 umin_val = src_reg->u32_min_value;
15801
15802 /* BPF_RSH is an unsigned shift. If the value in dst_reg might
15803 * be negative, then either:
15804 * 1) src_reg might be zero, so the sign bit of the result is
15805 * unknown, so we lose our signed bounds
15806 * 2) it's known negative, thus the unsigned bounds capture the
15807 * signed bounds
15808 * 3) the signed bounds cross zero, so they tell us nothing
15809 * about the result
15810 * If the value in dst_reg is known nonnegative, then again the
15811 * unsigned bounds capture the signed bounds.
15812 * Thus, in all cases it suffices to blow away our signed bounds
15813 * and rely on inferring new ones from the unsigned bounds and
15814 * var_off of the result.
15815 */
15816 dst_reg->s32_min_value = S32_MIN;
15817 dst_reg->s32_max_value = S32_MAX;
15818
15819 dst_reg->var_off = tnum_rshift(subreg, umin_val);
15820 dst_reg->u32_min_value >>= umax_val;
15821 dst_reg->u32_max_value >>= umin_val;
15822
15823 __mark_reg64_unbounded(dst_reg);
15824 __update_reg32_bounds(dst_reg);
15825 }
15826
scalar_min_max_rsh(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)15827 static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg,
15828 struct bpf_reg_state *src_reg)
15829 {
15830 u64 umax_val = src_reg->umax_value;
15831 u64 umin_val = src_reg->umin_value;
15832
15833 /* BPF_RSH is an unsigned shift. If the value in dst_reg might
15834 * be negative, then either:
15835 * 1) src_reg might be zero, so the sign bit of the result is
15836 * unknown, so we lose our signed bounds
15837 * 2) it's known negative, thus the unsigned bounds capture the
15838 * signed bounds
15839 * 3) the signed bounds cross zero, so they tell us nothing
15840 * about the result
15841 * If the value in dst_reg is known nonnegative, then again the
15842 * unsigned bounds capture the signed bounds.
15843 * Thus, in all cases it suffices to blow away our signed bounds
15844 * and rely on inferring new ones from the unsigned bounds and
15845 * var_off of the result.
15846 */
15847 dst_reg->smin_value = S64_MIN;
15848 dst_reg->smax_value = S64_MAX;
15849 dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
15850 dst_reg->umin_value >>= umax_val;
15851 dst_reg->umax_value >>= umin_val;
15852
15853 /* Its not easy to operate on alu32 bounds here because it depends
15854 * on bits being shifted in. Take easy way out and mark unbounded
15855 * so we can recalculate later from tnum.
15856 */
15857 __mark_reg32_unbounded(dst_reg);
15858 __update_reg_bounds(dst_reg);
15859 }
15860
scalar32_min_max_arsh(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)15861 static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg,
15862 struct bpf_reg_state *src_reg)
15863 {
15864 u64 umin_val = src_reg->u32_min_value;
15865
15866 /* Upon reaching here, src_known is true and
15867 * umax_val is equal to umin_val.
15868 */
15869 dst_reg->s32_min_value = (u32)(((s32)dst_reg->s32_min_value) >> umin_val);
15870 dst_reg->s32_max_value = (u32)(((s32)dst_reg->s32_max_value) >> umin_val);
15871
15872 dst_reg->var_off = tnum_arshift(tnum_subreg(dst_reg->var_off), umin_val, 32);
15873
15874 /* blow away the dst_reg umin_value/umax_value and rely on
15875 * dst_reg var_off to refine the result.
15876 */
15877 dst_reg->u32_min_value = 0;
15878 dst_reg->u32_max_value = U32_MAX;
15879
15880 __mark_reg64_unbounded(dst_reg);
15881 __update_reg32_bounds(dst_reg);
15882 }
15883
scalar_min_max_arsh(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)15884 static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg,
15885 struct bpf_reg_state *src_reg)
15886 {
15887 u64 umin_val = src_reg->umin_value;
15888
15889 /* Upon reaching here, src_known is true and umax_val is equal
15890 * to umin_val.
15891 */
15892 dst_reg->smin_value >>= umin_val;
15893 dst_reg->smax_value >>= umin_val;
15894
15895 dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, 64);
15896
15897 /* blow away the dst_reg umin_value/umax_value and rely on
15898 * dst_reg var_off to refine the result.
15899 */
15900 dst_reg->umin_value = 0;
15901 dst_reg->umax_value = U64_MAX;
15902
15903 /* Its not easy to operate on alu32 bounds here because it depends
15904 * on bits being shifted in from upper 32-bits. Take easy way out
15905 * and mark unbounded so we can recalculate later from tnum.
15906 */
15907 __mark_reg32_unbounded(dst_reg);
15908 __update_reg_bounds(dst_reg);
15909 }
15910
scalar_byte_swap(struct bpf_reg_state * dst_reg,struct bpf_insn * insn)15911 static void scalar_byte_swap(struct bpf_reg_state *dst_reg, struct bpf_insn *insn)
15912 {
15913 /*
15914 * Byte swap operation - update var_off using tnum_bswap.
15915 * Three cases:
15916 * 1. bswap(16|32|64): opcode=0xd7 (BPF_END | BPF_ALU64 | BPF_TO_LE)
15917 * unconditional swap
15918 * 2. to_le(16|32|64): opcode=0xd4 (BPF_END | BPF_ALU | BPF_TO_LE)
15919 * swap on big-endian, truncation or no-op on little-endian
15920 * 3. to_be(16|32|64): opcode=0xdc (BPF_END | BPF_ALU | BPF_TO_BE)
15921 * swap on little-endian, truncation or no-op on big-endian
15922 */
15923
15924 bool alu64 = BPF_CLASS(insn->code) == BPF_ALU64;
15925 bool to_le = BPF_SRC(insn->code) == BPF_TO_LE;
15926 bool is_big_endian;
15927 #ifdef CONFIG_CPU_BIG_ENDIAN
15928 is_big_endian = true;
15929 #else
15930 is_big_endian = false;
15931 #endif
15932 /* Apply bswap if alu64 or switch between big-endian and little-endian machines */
15933 bool need_bswap = alu64 || (to_le == is_big_endian);
15934
15935 /*
15936 * If the register is mutated, manually reset its scalar ID to break
15937 * any existing ties and avoid incorrect bounds propagation.
15938 */
15939 if (need_bswap || insn->imm == 16 || insn->imm == 32)
15940 dst_reg->id = 0;
15941
15942 if (need_bswap) {
15943 if (insn->imm == 16)
15944 dst_reg->var_off = tnum_bswap16(dst_reg->var_off);
15945 else if (insn->imm == 32)
15946 dst_reg->var_off = tnum_bswap32(dst_reg->var_off);
15947 else if (insn->imm == 64)
15948 dst_reg->var_off = tnum_bswap64(dst_reg->var_off);
15949 /*
15950 * Byteswap scrambles the range, so we must reset bounds.
15951 * Bounds will be re-derived from the new tnum later.
15952 */
15953 __mark_reg_unbounded(dst_reg);
15954 }
15955 /* For bswap16/32, truncate dst register to match the swapped size */
15956 if (insn->imm == 16 || insn->imm == 32)
15957 coerce_reg_to_size(dst_reg, insn->imm / 8);
15958 }
15959
is_safe_to_compute_dst_reg_range(struct bpf_insn * insn,const struct bpf_reg_state * src_reg)15960 static bool is_safe_to_compute_dst_reg_range(struct bpf_insn *insn,
15961 const struct bpf_reg_state *src_reg)
15962 {
15963 bool src_is_const = false;
15964 u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
15965
15966 if (insn_bitness == 32) {
15967 if (tnum_subreg_is_const(src_reg->var_off)
15968 && src_reg->s32_min_value == src_reg->s32_max_value
15969 && src_reg->u32_min_value == src_reg->u32_max_value)
15970 src_is_const = true;
15971 } else {
15972 if (tnum_is_const(src_reg->var_off)
15973 && src_reg->smin_value == src_reg->smax_value
15974 && src_reg->umin_value == src_reg->umax_value)
15975 src_is_const = true;
15976 }
15977
15978 switch (BPF_OP(insn->code)) {
15979 case BPF_ADD:
15980 case BPF_SUB:
15981 case BPF_NEG:
15982 case BPF_AND:
15983 case BPF_XOR:
15984 case BPF_OR:
15985 case BPF_MUL:
15986 case BPF_END:
15987 return true;
15988
15989 /*
15990 * Division and modulo operators range is only safe to compute when the
15991 * divisor is a constant.
15992 */
15993 case BPF_DIV:
15994 case BPF_MOD:
15995 return src_is_const;
15996
15997 /* Shift operators range is only computable if shift dimension operand
15998 * is a constant. Shifts greater than 31 or 63 are undefined. This
15999 * includes shifts by a negative number.
16000 */
16001 case BPF_LSH:
16002 case BPF_RSH:
16003 case BPF_ARSH:
16004 return (src_is_const && src_reg->umax_value < insn_bitness);
16005 default:
16006 return false;
16007 }
16008 }
16009
maybe_fork_scalars(struct bpf_verifier_env * env,struct bpf_insn * insn,struct bpf_reg_state * dst_reg)16010 static int maybe_fork_scalars(struct bpf_verifier_env *env, struct bpf_insn *insn,
16011 struct bpf_reg_state *dst_reg)
16012 {
16013 struct bpf_verifier_state *branch;
16014 struct bpf_reg_state *regs;
16015 bool alu32;
16016
16017 if (dst_reg->smin_value == -1 && dst_reg->smax_value == 0)
16018 alu32 = false;
16019 else if (dst_reg->s32_min_value == -1 && dst_reg->s32_max_value == 0)
16020 alu32 = true;
16021 else
16022 return 0;
16023
16024 branch = push_stack(env, env->insn_idx, env->insn_idx, false);
16025 if (IS_ERR(branch))
16026 return PTR_ERR(branch);
16027
16028 regs = branch->frame[branch->curframe]->regs;
16029 if (alu32) {
16030 __mark_reg32_known(®s[insn->dst_reg], 0);
16031 __mark_reg32_known(dst_reg, -1ull);
16032 } else {
16033 __mark_reg_known(®s[insn->dst_reg], 0);
16034 __mark_reg_known(dst_reg, -1ull);
16035 }
16036 return 0;
16037 }
16038
16039 /* WARNING: This function does calculations on 64-bit values, but the actual
16040 * execution may occur on 32-bit values. Therefore, things like bitshifts
16041 * need extra checks in the 32-bit case.
16042 */
adjust_scalar_min_max_vals(struct bpf_verifier_env * env,struct bpf_insn * insn,struct bpf_reg_state * dst_reg,struct bpf_reg_state src_reg)16043 static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
16044 struct bpf_insn *insn,
16045 struct bpf_reg_state *dst_reg,
16046 struct bpf_reg_state src_reg)
16047 {
16048 u8 opcode = BPF_OP(insn->code);
16049 s16 off = insn->off;
16050 bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
16051 int ret;
16052
16053 if (!is_safe_to_compute_dst_reg_range(insn, &src_reg)) {
16054 __mark_reg_unknown(env, dst_reg);
16055 return 0;
16056 }
16057
16058 if (sanitize_needed(opcode)) {
16059 ret = sanitize_val_alu(env, insn);
16060 if (ret < 0)
16061 return sanitize_err(env, insn, ret, NULL, NULL);
16062 }
16063
16064 /* Calculate sign/unsigned bounds and tnum for alu32 and alu64 bit ops.
16065 * There are two classes of instructions: The first class we track both
16066 * alu32 and alu64 sign/unsigned bounds independently this provides the
16067 * greatest amount of precision when alu operations are mixed with jmp32
16068 * operations. These operations are BPF_ADD, BPF_SUB, BPF_MUL, BPF_ADD,
16069 * and BPF_OR. This is possible because these ops have fairly easy to
16070 * understand and calculate behavior in both 32-bit and 64-bit alu ops.
16071 * See alu32 verifier tests for examples. The second class of
16072 * operations, BPF_LSH, BPF_RSH, and BPF_ARSH, however are not so easy
16073 * with regards to tracking sign/unsigned bounds because the bits may
16074 * cross subreg boundaries in the alu64 case. When this happens we mark
16075 * the reg unbounded in the subreg bound space and use the resulting
16076 * tnum to calculate an approximation of the sign/unsigned bounds.
16077 */
16078 switch (opcode) {
16079 case BPF_ADD:
16080 scalar32_min_max_add(dst_reg, &src_reg);
16081 scalar_min_max_add(dst_reg, &src_reg);
16082 dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
16083 break;
16084 case BPF_SUB:
16085 scalar32_min_max_sub(dst_reg, &src_reg);
16086 scalar_min_max_sub(dst_reg, &src_reg);
16087 dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
16088 break;
16089 case BPF_NEG:
16090 env->fake_reg[0] = *dst_reg;
16091 __mark_reg_known(dst_reg, 0);
16092 scalar32_min_max_sub(dst_reg, &env->fake_reg[0]);
16093 scalar_min_max_sub(dst_reg, &env->fake_reg[0]);
16094 dst_reg->var_off = tnum_neg(env->fake_reg[0].var_off);
16095 break;
16096 case BPF_MUL:
16097 dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
16098 scalar32_min_max_mul(dst_reg, &src_reg);
16099 scalar_min_max_mul(dst_reg, &src_reg);
16100 break;
16101 case BPF_DIV:
16102 /* BPF div specification: x / 0 = 0 */
16103 if ((alu32 && src_reg.u32_min_value == 0) || (!alu32 && src_reg.umin_value == 0)) {
16104 ___mark_reg_known(dst_reg, 0);
16105 break;
16106 }
16107 if (alu32)
16108 if (off == 1)
16109 scalar32_min_max_sdiv(dst_reg, &src_reg);
16110 else
16111 scalar32_min_max_udiv(dst_reg, &src_reg);
16112 else
16113 if (off == 1)
16114 scalar_min_max_sdiv(dst_reg, &src_reg);
16115 else
16116 scalar_min_max_udiv(dst_reg, &src_reg);
16117 break;
16118 case BPF_MOD:
16119 /* BPF mod specification: x % 0 = x */
16120 if ((alu32 && src_reg.u32_min_value == 0) || (!alu32 && src_reg.umin_value == 0))
16121 break;
16122 if (alu32)
16123 if (off == 1)
16124 scalar32_min_max_smod(dst_reg, &src_reg);
16125 else
16126 scalar32_min_max_umod(dst_reg, &src_reg);
16127 else
16128 if (off == 1)
16129 scalar_min_max_smod(dst_reg, &src_reg);
16130 else
16131 scalar_min_max_umod(dst_reg, &src_reg);
16132 break;
16133 case BPF_AND:
16134 if (tnum_is_const(src_reg.var_off)) {
16135 ret = maybe_fork_scalars(env, insn, dst_reg);
16136 if (ret)
16137 return ret;
16138 }
16139 dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
16140 scalar32_min_max_and(dst_reg, &src_reg);
16141 scalar_min_max_and(dst_reg, &src_reg);
16142 break;
16143 case BPF_OR:
16144 if (tnum_is_const(src_reg.var_off)) {
16145 ret = maybe_fork_scalars(env, insn, dst_reg);
16146 if (ret)
16147 return ret;
16148 }
16149 dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
16150 scalar32_min_max_or(dst_reg, &src_reg);
16151 scalar_min_max_or(dst_reg, &src_reg);
16152 break;
16153 case BPF_XOR:
16154 dst_reg->var_off = tnum_xor(dst_reg->var_off, src_reg.var_off);
16155 scalar32_min_max_xor(dst_reg, &src_reg);
16156 scalar_min_max_xor(dst_reg, &src_reg);
16157 break;
16158 case BPF_LSH:
16159 if (alu32)
16160 scalar32_min_max_lsh(dst_reg, &src_reg);
16161 else
16162 scalar_min_max_lsh(dst_reg, &src_reg);
16163 break;
16164 case BPF_RSH:
16165 if (alu32)
16166 scalar32_min_max_rsh(dst_reg, &src_reg);
16167 else
16168 scalar_min_max_rsh(dst_reg, &src_reg);
16169 break;
16170 case BPF_ARSH:
16171 if (alu32)
16172 scalar32_min_max_arsh(dst_reg, &src_reg);
16173 else
16174 scalar_min_max_arsh(dst_reg, &src_reg);
16175 break;
16176 case BPF_END:
16177 scalar_byte_swap(dst_reg, insn);
16178 break;
16179 default:
16180 break;
16181 }
16182
16183 /*
16184 * ALU32 ops are zero extended into 64bit register.
16185 *
16186 * BPF_END is already handled inside the helper (truncation),
16187 * so skip zext here to avoid unexpected zero extension.
16188 * e.g., le64: opcode=(BPF_END|BPF_ALU|BPF_TO_LE), imm=0x40
16189 * This is a 64bit byte swap operation with alu32==true,
16190 * but we should not zero extend the result.
16191 */
16192 if (alu32 && opcode != BPF_END)
16193 zext_32_to_64(dst_reg);
16194 reg_bounds_sync(dst_reg);
16195 return 0;
16196 }
16197
16198 /* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
16199 * and var_off.
16200 */
adjust_reg_min_max_vals(struct bpf_verifier_env * env,struct bpf_insn * insn)16201 static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
16202 struct bpf_insn *insn)
16203 {
16204 struct bpf_verifier_state *vstate = env->cur_state;
16205 struct bpf_func_state *state = vstate->frame[vstate->curframe];
16206 struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
16207 struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
16208 bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
16209 u8 opcode = BPF_OP(insn->code);
16210 int err;
16211
16212 dst_reg = ®s[insn->dst_reg];
16213 src_reg = NULL;
16214
16215 if (dst_reg->type == PTR_TO_ARENA) {
16216 struct bpf_insn_aux_data *aux = cur_aux(env);
16217
16218 if (BPF_CLASS(insn->code) == BPF_ALU64)
16219 /*
16220 * 32-bit operations zero upper bits automatically.
16221 * 64-bit operations need to be converted to 32.
16222 */
16223 aux->needs_zext = true;
16224
16225 /* Any arithmetic operations are allowed on arena pointers */
16226 return 0;
16227 }
16228
16229 if (dst_reg->type != SCALAR_VALUE)
16230 ptr_reg = dst_reg;
16231
16232 if (BPF_SRC(insn->code) == BPF_X) {
16233 src_reg = ®s[insn->src_reg];
16234 if (src_reg->type != SCALAR_VALUE) {
16235 if (dst_reg->type != SCALAR_VALUE) {
16236 /* Combining two pointers by any ALU op yields
16237 * an arbitrary scalar. Disallow all math except
16238 * pointer subtraction
16239 */
16240 if (opcode == BPF_SUB && env->allow_ptr_leaks) {
16241 mark_reg_unknown(env, regs, insn->dst_reg);
16242 return 0;
16243 }
16244 verbose(env, "R%d pointer %s pointer prohibited\n",
16245 insn->dst_reg,
16246 bpf_alu_string[opcode >> 4]);
16247 return -EACCES;
16248 } else {
16249 /* scalar += pointer
16250 * This is legal, but we have to reverse our
16251 * src/dest handling in computing the range
16252 */
16253 err = mark_chain_precision(env, insn->dst_reg);
16254 if (err)
16255 return err;
16256 return adjust_ptr_min_max_vals(env, insn,
16257 src_reg, dst_reg);
16258 }
16259 } else if (ptr_reg) {
16260 /* pointer += scalar */
16261 err = mark_chain_precision(env, insn->src_reg);
16262 if (err)
16263 return err;
16264 return adjust_ptr_min_max_vals(env, insn,
16265 dst_reg, src_reg);
16266 } else if (dst_reg->precise) {
16267 /* if dst_reg is precise, src_reg should be precise as well */
16268 err = mark_chain_precision(env, insn->src_reg);
16269 if (err)
16270 return err;
16271 }
16272 } else {
16273 /* Pretend the src is a reg with a known value, since we only
16274 * need to be able to read from this state.
16275 */
16276 off_reg.type = SCALAR_VALUE;
16277 __mark_reg_known(&off_reg, insn->imm);
16278 src_reg = &off_reg;
16279 if (ptr_reg) /* pointer += K */
16280 return adjust_ptr_min_max_vals(env, insn,
16281 ptr_reg, src_reg);
16282 }
16283
16284 /* Got here implies adding two SCALAR_VALUEs */
16285 if (WARN_ON_ONCE(ptr_reg)) {
16286 print_verifier_state(env, vstate, vstate->curframe, true);
16287 verbose(env, "verifier internal error: unexpected ptr_reg\n");
16288 return -EFAULT;
16289 }
16290 if (WARN_ON(!src_reg)) {
16291 print_verifier_state(env, vstate, vstate->curframe, true);
16292 verbose(env, "verifier internal error: no src_reg\n");
16293 return -EFAULT;
16294 }
16295 /*
16296 * For alu32 linked register tracking, we need to check dst_reg's
16297 * umax_value before the ALU operation. After adjust_scalar_min_max_vals(),
16298 * alu32 ops will have zero-extended the result, making umax_value <= U32_MAX.
16299 */
16300 u64 dst_umax = dst_reg->umax_value;
16301
16302 err = adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
16303 if (err)
16304 return err;
16305 /*
16306 * Compilers can generate the code
16307 * r1 = r2
16308 * r1 += 0x1
16309 * if r2 < 1000 goto ...
16310 * use r1 in memory access
16311 * So remember constant delta between r2 and r1 and update r1 after
16312 * 'if' condition.
16313 */
16314 if (env->bpf_capable &&
16315 (BPF_OP(insn->code) == BPF_ADD || BPF_OP(insn->code) == BPF_SUB) &&
16316 dst_reg->id && is_reg_const(src_reg, alu32)) {
16317 u64 val = reg_const_value(src_reg, alu32);
16318 s32 off;
16319
16320 if (!alu32 && ((s64)val < S32_MIN || (s64)val > S32_MAX))
16321 goto clear_id;
16322
16323 if (alu32 && (dst_umax > U32_MAX))
16324 goto clear_id;
16325
16326 off = (s32)val;
16327
16328 if (BPF_OP(insn->code) == BPF_SUB) {
16329 /* Negating S32_MIN would overflow */
16330 if (off == S32_MIN)
16331 goto clear_id;
16332 off = -off;
16333 }
16334
16335 if (dst_reg->id & BPF_ADD_CONST) {
16336 /*
16337 * If the register already went through rX += val
16338 * we cannot accumulate another val into rx->off.
16339 */
16340 clear_id:
16341 dst_reg->off = 0;
16342 dst_reg->id = 0;
16343 } else {
16344 if (alu32)
16345 dst_reg->id |= BPF_ADD_CONST32;
16346 else
16347 dst_reg->id |= BPF_ADD_CONST64;
16348 dst_reg->off = off;
16349 }
16350 } else {
16351 /*
16352 * Make sure ID is cleared otherwise dst_reg min/max could be
16353 * incorrectly propagated into other registers by sync_linked_regs()
16354 */
16355 dst_reg->id = 0;
16356 }
16357 return 0;
16358 }
16359
16360 /* check validity of 32-bit and 64-bit arithmetic operations */
check_alu_op(struct bpf_verifier_env * env,struct bpf_insn * insn)16361 static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
16362 {
16363 struct bpf_reg_state *regs = cur_regs(env);
16364 u8 opcode = BPF_OP(insn->code);
16365 int err;
16366
16367 if (opcode == BPF_END || opcode == BPF_NEG) {
16368 if (opcode == BPF_NEG) {
16369 if (BPF_SRC(insn->code) != BPF_K ||
16370 insn->src_reg != BPF_REG_0 ||
16371 insn->off != 0 || insn->imm != 0) {
16372 verbose(env, "BPF_NEG uses reserved fields\n");
16373 return -EINVAL;
16374 }
16375 } else {
16376 if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
16377 (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
16378 (BPF_CLASS(insn->code) == BPF_ALU64 &&
16379 BPF_SRC(insn->code) != BPF_TO_LE)) {
16380 verbose(env, "BPF_END uses reserved fields\n");
16381 return -EINVAL;
16382 }
16383 }
16384
16385 /* check src operand */
16386 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
16387 if (err)
16388 return err;
16389
16390 if (is_pointer_value(env, insn->dst_reg)) {
16391 verbose(env, "R%d pointer arithmetic prohibited\n",
16392 insn->dst_reg);
16393 return -EACCES;
16394 }
16395
16396 /* check dest operand */
16397 if ((opcode == BPF_NEG || opcode == BPF_END) &&
16398 regs[insn->dst_reg].type == SCALAR_VALUE) {
16399 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
16400 err = err ?: adjust_scalar_min_max_vals(env, insn,
16401 ®s[insn->dst_reg],
16402 regs[insn->dst_reg]);
16403 } else {
16404 err = check_reg_arg(env, insn->dst_reg, DST_OP);
16405 }
16406 if (err)
16407 return err;
16408
16409 } else if (opcode == BPF_MOV) {
16410
16411 if (BPF_SRC(insn->code) == BPF_X) {
16412 if (BPF_CLASS(insn->code) == BPF_ALU) {
16413 if ((insn->off != 0 && insn->off != 8 && insn->off != 16) ||
16414 insn->imm) {
16415 verbose(env, "BPF_MOV uses reserved fields\n");
16416 return -EINVAL;
16417 }
16418 } else if (insn->off == BPF_ADDR_SPACE_CAST) {
16419 if (insn->imm != 1 && insn->imm != 1u << 16) {
16420 verbose(env, "addr_space_cast insn can only convert between address space 1 and 0\n");
16421 return -EINVAL;
16422 }
16423 if (!env->prog->aux->arena) {
16424 verbose(env, "addr_space_cast insn can only be used in a program that has an associated arena\n");
16425 return -EINVAL;
16426 }
16427 } else {
16428 if ((insn->off != 0 && insn->off != 8 && insn->off != 16 &&
16429 insn->off != 32) || insn->imm) {
16430 verbose(env, "BPF_MOV uses reserved fields\n");
16431 return -EINVAL;
16432 }
16433 }
16434
16435 /* check src operand */
16436 err = check_reg_arg(env, insn->src_reg, SRC_OP);
16437 if (err)
16438 return err;
16439 } else {
16440 if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
16441 verbose(env, "BPF_MOV uses reserved fields\n");
16442 return -EINVAL;
16443 }
16444 }
16445
16446 /* check dest operand, mark as required later */
16447 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
16448 if (err)
16449 return err;
16450
16451 if (BPF_SRC(insn->code) == BPF_X) {
16452 struct bpf_reg_state *src_reg = regs + insn->src_reg;
16453 struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
16454
16455 if (BPF_CLASS(insn->code) == BPF_ALU64) {
16456 if (insn->imm) {
16457 /* off == BPF_ADDR_SPACE_CAST */
16458 mark_reg_unknown(env, regs, insn->dst_reg);
16459 if (insn->imm == 1) { /* cast from as(1) to as(0) */
16460 dst_reg->type = PTR_TO_ARENA;
16461 /* PTR_TO_ARENA is 32-bit */
16462 dst_reg->subreg_def = env->insn_idx + 1;
16463 }
16464 } else if (insn->off == 0) {
16465 /* case: R1 = R2
16466 * copy register state to dest reg
16467 */
16468 assign_scalar_id_before_mov(env, src_reg);
16469 copy_register_state(dst_reg, src_reg);
16470 dst_reg->subreg_def = DEF_NOT_SUBREG;
16471 } else {
16472 /* case: R1 = (s8, s16 s32)R2 */
16473 if (is_pointer_value(env, insn->src_reg)) {
16474 verbose(env,
16475 "R%d sign-extension part of pointer\n",
16476 insn->src_reg);
16477 return -EACCES;
16478 } else if (src_reg->type == SCALAR_VALUE) {
16479 bool no_sext;
16480
16481 no_sext = src_reg->umax_value < (1ULL << (insn->off - 1));
16482 if (no_sext)
16483 assign_scalar_id_before_mov(env, src_reg);
16484 copy_register_state(dst_reg, src_reg);
16485 if (!no_sext)
16486 dst_reg->id = 0;
16487 coerce_reg_to_size_sx(dst_reg, insn->off >> 3);
16488 dst_reg->subreg_def = DEF_NOT_SUBREG;
16489 } else {
16490 mark_reg_unknown(env, regs, insn->dst_reg);
16491 }
16492 }
16493 } else {
16494 /* R1 = (u32) R2 */
16495 if (is_pointer_value(env, insn->src_reg)) {
16496 verbose(env,
16497 "R%d partial copy of pointer\n",
16498 insn->src_reg);
16499 return -EACCES;
16500 } else if (src_reg->type == SCALAR_VALUE) {
16501 if (insn->off == 0) {
16502 bool is_src_reg_u32 = get_reg_width(src_reg) <= 32;
16503
16504 if (is_src_reg_u32)
16505 assign_scalar_id_before_mov(env, src_reg);
16506 copy_register_state(dst_reg, src_reg);
16507 /* Make sure ID is cleared if src_reg is not in u32
16508 * range otherwise dst_reg min/max could be incorrectly
16509 * propagated into src_reg by sync_linked_regs()
16510 */
16511 if (!is_src_reg_u32)
16512 dst_reg->id = 0;
16513 dst_reg->subreg_def = env->insn_idx + 1;
16514 } else {
16515 /* case: W1 = (s8, s16)W2 */
16516 bool no_sext = src_reg->umax_value < (1ULL << (insn->off - 1));
16517
16518 if (no_sext)
16519 assign_scalar_id_before_mov(env, src_reg);
16520 copy_register_state(dst_reg, src_reg);
16521 if (!no_sext)
16522 dst_reg->id = 0;
16523 dst_reg->subreg_def = env->insn_idx + 1;
16524 coerce_subreg_to_size_sx(dst_reg, insn->off >> 3);
16525 }
16526 } else {
16527 mark_reg_unknown(env, regs,
16528 insn->dst_reg);
16529 }
16530 zext_32_to_64(dst_reg);
16531 reg_bounds_sync(dst_reg);
16532 }
16533 } else {
16534 /* case: R = imm
16535 * remember the value we stored into this reg
16536 */
16537 /* clear any state __mark_reg_known doesn't set */
16538 mark_reg_unknown(env, regs, insn->dst_reg);
16539 regs[insn->dst_reg].type = SCALAR_VALUE;
16540 if (BPF_CLASS(insn->code) == BPF_ALU64) {
16541 __mark_reg_known(regs + insn->dst_reg,
16542 insn->imm);
16543 } else {
16544 __mark_reg_known(regs + insn->dst_reg,
16545 (u32)insn->imm);
16546 }
16547 }
16548
16549 } else if (opcode > BPF_END) {
16550 verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
16551 return -EINVAL;
16552
16553 } else { /* all other ALU ops: and, sub, xor, add, ... */
16554
16555 if (BPF_SRC(insn->code) == BPF_X) {
16556 if (insn->imm != 0 || (insn->off != 0 && insn->off != 1) ||
16557 (insn->off == 1 && opcode != BPF_MOD && opcode != BPF_DIV)) {
16558 verbose(env, "BPF_ALU uses reserved fields\n");
16559 return -EINVAL;
16560 }
16561 /* check src1 operand */
16562 err = check_reg_arg(env, insn->src_reg, SRC_OP);
16563 if (err)
16564 return err;
16565 } else {
16566 if (insn->src_reg != BPF_REG_0 || (insn->off != 0 && insn->off != 1) ||
16567 (insn->off == 1 && opcode != BPF_MOD && opcode != BPF_DIV)) {
16568 verbose(env, "BPF_ALU uses reserved fields\n");
16569 return -EINVAL;
16570 }
16571 }
16572
16573 /* check src2 operand */
16574 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
16575 if (err)
16576 return err;
16577
16578 if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
16579 BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
16580 verbose(env, "div by zero\n");
16581 return -EINVAL;
16582 }
16583
16584 if ((opcode == BPF_LSH || opcode == BPF_RSH ||
16585 opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
16586 int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
16587
16588 if (insn->imm < 0 || insn->imm >= size) {
16589 verbose(env, "invalid shift %d\n", insn->imm);
16590 return -EINVAL;
16591 }
16592 }
16593
16594 /* check dest operand */
16595 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
16596 err = err ?: adjust_reg_min_max_vals(env, insn);
16597 if (err)
16598 return err;
16599 }
16600
16601 return reg_bounds_sanity_check(env, ®s[insn->dst_reg], "alu");
16602 }
16603
find_good_pkt_pointers(struct bpf_verifier_state * vstate,struct bpf_reg_state * dst_reg,enum bpf_reg_type type,bool range_right_open)16604 static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
16605 struct bpf_reg_state *dst_reg,
16606 enum bpf_reg_type type,
16607 bool range_right_open)
16608 {
16609 struct bpf_func_state *state;
16610 struct bpf_reg_state *reg;
16611 int new_range;
16612
16613 if (dst_reg->off < 0 ||
16614 (dst_reg->off == 0 && range_right_open))
16615 /* This doesn't give us any range */
16616 return;
16617
16618 if (dst_reg->umax_value > MAX_PACKET_OFF ||
16619 dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
16620 /* Risk of overflow. For instance, ptr + (1<<63) may be less
16621 * than pkt_end, but that's because it's also less than pkt.
16622 */
16623 return;
16624
16625 new_range = dst_reg->off;
16626 if (range_right_open)
16627 new_range++;
16628
16629 /* Examples for register markings:
16630 *
16631 * pkt_data in dst register:
16632 *
16633 * r2 = r3;
16634 * r2 += 8;
16635 * if (r2 > pkt_end) goto <handle exception>
16636 * <access okay>
16637 *
16638 * r2 = r3;
16639 * r2 += 8;
16640 * if (r2 < pkt_end) goto <access okay>
16641 * <handle exception>
16642 *
16643 * Where:
16644 * r2 == dst_reg, pkt_end == src_reg
16645 * r2=pkt(id=n,off=8,r=0)
16646 * r3=pkt(id=n,off=0,r=0)
16647 *
16648 * pkt_data in src register:
16649 *
16650 * r2 = r3;
16651 * r2 += 8;
16652 * if (pkt_end >= r2) goto <access okay>
16653 * <handle exception>
16654 *
16655 * r2 = r3;
16656 * r2 += 8;
16657 * if (pkt_end <= r2) goto <handle exception>
16658 * <access okay>
16659 *
16660 * Where:
16661 * pkt_end == dst_reg, r2 == src_reg
16662 * r2=pkt(id=n,off=8,r=0)
16663 * r3=pkt(id=n,off=0,r=0)
16664 *
16665 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
16666 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
16667 * and [r3, r3 + 8-1) respectively is safe to access depending on
16668 * the check.
16669 */
16670
16671 /* If our ids match, then we must have the same max_value. And we
16672 * don't care about the other reg's fixed offset, since if it's too big
16673 * the range won't allow anything.
16674 * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
16675 */
16676 bpf_for_each_reg_in_vstate(vstate, state, reg, ({
16677 if (reg->type == type && reg->id == dst_reg->id)
16678 /* keep the maximum range already checked */
16679 reg->range = max(reg->range, new_range);
16680 }));
16681 }
16682
16683 /*
16684 * <reg1> <op> <reg2>, currently assuming reg2 is a constant
16685 */
is_scalar_branch_taken(struct bpf_reg_state * reg1,struct bpf_reg_state * reg2,u8 opcode,bool is_jmp32)16686 static int is_scalar_branch_taken(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2,
16687 u8 opcode, bool is_jmp32)
16688 {
16689 struct tnum t1 = is_jmp32 ? tnum_subreg(reg1->var_off) : reg1->var_off;
16690 struct tnum t2 = is_jmp32 ? tnum_subreg(reg2->var_off) : reg2->var_off;
16691 u64 umin1 = is_jmp32 ? (u64)reg1->u32_min_value : reg1->umin_value;
16692 u64 umax1 = is_jmp32 ? (u64)reg1->u32_max_value : reg1->umax_value;
16693 s64 smin1 = is_jmp32 ? (s64)reg1->s32_min_value : reg1->smin_value;
16694 s64 smax1 = is_jmp32 ? (s64)reg1->s32_max_value : reg1->smax_value;
16695 u64 umin2 = is_jmp32 ? (u64)reg2->u32_min_value : reg2->umin_value;
16696 u64 umax2 = is_jmp32 ? (u64)reg2->u32_max_value : reg2->umax_value;
16697 s64 smin2 = is_jmp32 ? (s64)reg2->s32_min_value : reg2->smin_value;
16698 s64 smax2 = is_jmp32 ? (s64)reg2->s32_max_value : reg2->smax_value;
16699
16700 if (reg1 == reg2) {
16701 switch (opcode) {
16702 case BPF_JGE:
16703 case BPF_JLE:
16704 case BPF_JSGE:
16705 case BPF_JSLE:
16706 case BPF_JEQ:
16707 return 1;
16708 case BPF_JGT:
16709 case BPF_JLT:
16710 case BPF_JSGT:
16711 case BPF_JSLT:
16712 case BPF_JNE:
16713 return 0;
16714 case BPF_JSET:
16715 if (tnum_is_const(t1))
16716 return t1.value != 0;
16717 else
16718 return (smin1 <= 0 && smax1 >= 0) ? -1 : 1;
16719 default:
16720 return -1;
16721 }
16722 }
16723
16724 switch (opcode) {
16725 case BPF_JEQ:
16726 /* constants, umin/umax and smin/smax checks would be
16727 * redundant in this case because they all should match
16728 */
16729 if (tnum_is_const(t1) && tnum_is_const(t2))
16730 return t1.value == t2.value;
16731 if (!tnum_overlap(t1, t2))
16732 return 0;
16733 /* non-overlapping ranges */
16734 if (umin1 > umax2 || umax1 < umin2)
16735 return 0;
16736 if (smin1 > smax2 || smax1 < smin2)
16737 return 0;
16738 if (!is_jmp32) {
16739 /* if 64-bit ranges are inconclusive, see if we can
16740 * utilize 32-bit subrange knowledge to eliminate
16741 * branches that can't be taken a priori
16742 */
16743 if (reg1->u32_min_value > reg2->u32_max_value ||
16744 reg1->u32_max_value < reg2->u32_min_value)
16745 return 0;
16746 if (reg1->s32_min_value > reg2->s32_max_value ||
16747 reg1->s32_max_value < reg2->s32_min_value)
16748 return 0;
16749 }
16750 break;
16751 case BPF_JNE:
16752 /* constants, umin/umax and smin/smax checks would be
16753 * redundant in this case because they all should match
16754 */
16755 if (tnum_is_const(t1) && tnum_is_const(t2))
16756 return t1.value != t2.value;
16757 if (!tnum_overlap(t1, t2))
16758 return 1;
16759 /* non-overlapping ranges */
16760 if (umin1 > umax2 || umax1 < umin2)
16761 return 1;
16762 if (smin1 > smax2 || smax1 < smin2)
16763 return 1;
16764 if (!is_jmp32) {
16765 /* if 64-bit ranges are inconclusive, see if we can
16766 * utilize 32-bit subrange knowledge to eliminate
16767 * branches that can't be taken a priori
16768 */
16769 if (reg1->u32_min_value > reg2->u32_max_value ||
16770 reg1->u32_max_value < reg2->u32_min_value)
16771 return 1;
16772 if (reg1->s32_min_value > reg2->s32_max_value ||
16773 reg1->s32_max_value < reg2->s32_min_value)
16774 return 1;
16775 }
16776 break;
16777 case BPF_JSET:
16778 if (!is_reg_const(reg2, is_jmp32)) {
16779 swap(reg1, reg2);
16780 swap(t1, t2);
16781 }
16782 if (!is_reg_const(reg2, is_jmp32))
16783 return -1;
16784 if ((~t1.mask & t1.value) & t2.value)
16785 return 1;
16786 if (!((t1.mask | t1.value) & t2.value))
16787 return 0;
16788 break;
16789 case BPF_JGT:
16790 if (umin1 > umax2)
16791 return 1;
16792 else if (umax1 <= umin2)
16793 return 0;
16794 break;
16795 case BPF_JSGT:
16796 if (smin1 > smax2)
16797 return 1;
16798 else if (smax1 <= smin2)
16799 return 0;
16800 break;
16801 case BPF_JLT:
16802 if (umax1 < umin2)
16803 return 1;
16804 else if (umin1 >= umax2)
16805 return 0;
16806 break;
16807 case BPF_JSLT:
16808 if (smax1 < smin2)
16809 return 1;
16810 else if (smin1 >= smax2)
16811 return 0;
16812 break;
16813 case BPF_JGE:
16814 if (umin1 >= umax2)
16815 return 1;
16816 else if (umax1 < umin2)
16817 return 0;
16818 break;
16819 case BPF_JSGE:
16820 if (smin1 >= smax2)
16821 return 1;
16822 else if (smax1 < smin2)
16823 return 0;
16824 break;
16825 case BPF_JLE:
16826 if (umax1 <= umin2)
16827 return 1;
16828 else if (umin1 > umax2)
16829 return 0;
16830 break;
16831 case BPF_JSLE:
16832 if (smax1 <= smin2)
16833 return 1;
16834 else if (smin1 > smax2)
16835 return 0;
16836 break;
16837 }
16838
16839 return -1;
16840 }
16841
flip_opcode(u32 opcode)16842 static int flip_opcode(u32 opcode)
16843 {
16844 /* How can we transform "a <op> b" into "b <op> a"? */
16845 static const u8 opcode_flip[16] = {
16846 /* these stay the same */
16847 [BPF_JEQ >> 4] = BPF_JEQ,
16848 [BPF_JNE >> 4] = BPF_JNE,
16849 [BPF_JSET >> 4] = BPF_JSET,
16850 /* these swap "lesser" and "greater" (L and G in the opcodes) */
16851 [BPF_JGE >> 4] = BPF_JLE,
16852 [BPF_JGT >> 4] = BPF_JLT,
16853 [BPF_JLE >> 4] = BPF_JGE,
16854 [BPF_JLT >> 4] = BPF_JGT,
16855 [BPF_JSGE >> 4] = BPF_JSLE,
16856 [BPF_JSGT >> 4] = BPF_JSLT,
16857 [BPF_JSLE >> 4] = BPF_JSGE,
16858 [BPF_JSLT >> 4] = BPF_JSGT
16859 };
16860 return opcode_flip[opcode >> 4];
16861 }
16862
is_pkt_ptr_branch_taken(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg,u8 opcode)16863 static int is_pkt_ptr_branch_taken(struct bpf_reg_state *dst_reg,
16864 struct bpf_reg_state *src_reg,
16865 u8 opcode)
16866 {
16867 struct bpf_reg_state *pkt;
16868
16869 if (src_reg->type == PTR_TO_PACKET_END) {
16870 pkt = dst_reg;
16871 } else if (dst_reg->type == PTR_TO_PACKET_END) {
16872 pkt = src_reg;
16873 opcode = flip_opcode(opcode);
16874 } else {
16875 return -1;
16876 }
16877
16878 if (pkt->range >= 0)
16879 return -1;
16880
16881 switch (opcode) {
16882 case BPF_JLE:
16883 /* pkt <= pkt_end */
16884 fallthrough;
16885 case BPF_JGT:
16886 /* pkt > pkt_end */
16887 if (pkt->range == BEYOND_PKT_END)
16888 /* pkt has at last one extra byte beyond pkt_end */
16889 return opcode == BPF_JGT;
16890 break;
16891 case BPF_JLT:
16892 /* pkt < pkt_end */
16893 fallthrough;
16894 case BPF_JGE:
16895 /* pkt >= pkt_end */
16896 if (pkt->range == BEYOND_PKT_END || pkt->range == AT_PKT_END)
16897 return opcode == BPF_JGE;
16898 break;
16899 }
16900 return -1;
16901 }
16902
16903 /* compute branch direction of the expression "if (<reg1> opcode <reg2>) goto target;"
16904 * and return:
16905 * 1 - branch will be taken and "goto target" will be executed
16906 * 0 - branch will not be taken and fall-through to next insn
16907 * -1 - unknown. Example: "if (reg1 < 5)" is unknown when register value
16908 * range [0,10]
16909 */
is_branch_taken(struct bpf_reg_state * reg1,struct bpf_reg_state * reg2,u8 opcode,bool is_jmp32)16910 static int is_branch_taken(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2,
16911 u8 opcode, bool is_jmp32)
16912 {
16913 if (reg_is_pkt_pointer_any(reg1) && reg_is_pkt_pointer_any(reg2) && !is_jmp32)
16914 return is_pkt_ptr_branch_taken(reg1, reg2, opcode);
16915
16916 if (__is_pointer_value(false, reg1) || __is_pointer_value(false, reg2)) {
16917 u64 val;
16918
16919 /* arrange that reg2 is a scalar, and reg1 is a pointer */
16920 if (!is_reg_const(reg2, is_jmp32)) {
16921 opcode = flip_opcode(opcode);
16922 swap(reg1, reg2);
16923 }
16924 /* and ensure that reg2 is a constant */
16925 if (!is_reg_const(reg2, is_jmp32))
16926 return -1;
16927
16928 if (!reg_not_null(reg1))
16929 return -1;
16930
16931 /* If pointer is valid tests against zero will fail so we can
16932 * use this to direct branch taken.
16933 */
16934 val = reg_const_value(reg2, is_jmp32);
16935 if (val != 0)
16936 return -1;
16937
16938 switch (opcode) {
16939 case BPF_JEQ:
16940 return 0;
16941 case BPF_JNE:
16942 return 1;
16943 default:
16944 return -1;
16945 }
16946 }
16947
16948 /* now deal with two scalars, but not necessarily constants */
16949 return is_scalar_branch_taken(reg1, reg2, opcode, is_jmp32);
16950 }
16951
16952 /* Opcode that corresponds to a *false* branch condition.
16953 * E.g., if r1 < r2, then reverse (false) condition is r1 >= r2
16954 */
rev_opcode(u8 opcode)16955 static u8 rev_opcode(u8 opcode)
16956 {
16957 switch (opcode) {
16958 case BPF_JEQ: return BPF_JNE;
16959 case BPF_JNE: return BPF_JEQ;
16960 /* JSET doesn't have it's reverse opcode in BPF, so add
16961 * BPF_X flag to denote the reverse of that operation
16962 */
16963 case BPF_JSET: return BPF_JSET | BPF_X;
16964 case BPF_JSET | BPF_X: return BPF_JSET;
16965 case BPF_JGE: return BPF_JLT;
16966 case BPF_JGT: return BPF_JLE;
16967 case BPF_JLE: return BPF_JGT;
16968 case BPF_JLT: return BPF_JGE;
16969 case BPF_JSGE: return BPF_JSLT;
16970 case BPF_JSGT: return BPF_JSLE;
16971 case BPF_JSLE: return BPF_JSGT;
16972 case BPF_JSLT: return BPF_JSGE;
16973 default: return 0;
16974 }
16975 }
16976
16977 /* Refine range knowledge for <reg1> <op> <reg>2 conditional operation. */
regs_refine_cond_op(struct bpf_reg_state * reg1,struct bpf_reg_state * reg2,u8 opcode,bool is_jmp32)16978 static void regs_refine_cond_op(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2,
16979 u8 opcode, bool is_jmp32)
16980 {
16981 struct tnum t;
16982 u64 val;
16983
16984 /* In case of GE/GT/SGE/JST, reuse LE/LT/SLE/SLT logic from below */
16985 switch (opcode) {
16986 case BPF_JGE:
16987 case BPF_JGT:
16988 case BPF_JSGE:
16989 case BPF_JSGT:
16990 opcode = flip_opcode(opcode);
16991 swap(reg1, reg2);
16992 break;
16993 default:
16994 break;
16995 }
16996
16997 switch (opcode) {
16998 case BPF_JEQ:
16999 if (is_jmp32) {
17000 reg1->u32_min_value = max(reg1->u32_min_value, reg2->u32_min_value);
17001 reg1->u32_max_value = min(reg1->u32_max_value, reg2->u32_max_value);
17002 reg1->s32_min_value = max(reg1->s32_min_value, reg2->s32_min_value);
17003 reg1->s32_max_value = min(reg1->s32_max_value, reg2->s32_max_value);
17004 reg2->u32_min_value = reg1->u32_min_value;
17005 reg2->u32_max_value = reg1->u32_max_value;
17006 reg2->s32_min_value = reg1->s32_min_value;
17007 reg2->s32_max_value = reg1->s32_max_value;
17008
17009 t = tnum_intersect(tnum_subreg(reg1->var_off), tnum_subreg(reg2->var_off));
17010 reg1->var_off = tnum_with_subreg(reg1->var_off, t);
17011 reg2->var_off = tnum_with_subreg(reg2->var_off, t);
17012 } else {
17013 reg1->umin_value = max(reg1->umin_value, reg2->umin_value);
17014 reg1->umax_value = min(reg1->umax_value, reg2->umax_value);
17015 reg1->smin_value = max(reg1->smin_value, reg2->smin_value);
17016 reg1->smax_value = min(reg1->smax_value, reg2->smax_value);
17017 reg2->umin_value = reg1->umin_value;
17018 reg2->umax_value = reg1->umax_value;
17019 reg2->smin_value = reg1->smin_value;
17020 reg2->smax_value = reg1->smax_value;
17021
17022 reg1->var_off = tnum_intersect(reg1->var_off, reg2->var_off);
17023 reg2->var_off = reg1->var_off;
17024 }
17025 break;
17026 case BPF_JNE:
17027 if (!is_reg_const(reg2, is_jmp32))
17028 swap(reg1, reg2);
17029 if (!is_reg_const(reg2, is_jmp32))
17030 break;
17031
17032 /* try to recompute the bound of reg1 if reg2 is a const and
17033 * is exactly the edge of reg1.
17034 */
17035 val = reg_const_value(reg2, is_jmp32);
17036 if (is_jmp32) {
17037 /* u32_min_value is not equal to 0xffffffff at this point,
17038 * because otherwise u32_max_value is 0xffffffff as well,
17039 * in such a case both reg1 and reg2 would be constants,
17040 * jump would be predicted and reg_set_min_max() won't
17041 * be called.
17042 *
17043 * Same reasoning works for all {u,s}{min,max}{32,64} cases
17044 * below.
17045 */
17046 if (reg1->u32_min_value == (u32)val)
17047 reg1->u32_min_value++;
17048 if (reg1->u32_max_value == (u32)val)
17049 reg1->u32_max_value--;
17050 if (reg1->s32_min_value == (s32)val)
17051 reg1->s32_min_value++;
17052 if (reg1->s32_max_value == (s32)val)
17053 reg1->s32_max_value--;
17054 } else {
17055 if (reg1->umin_value == (u64)val)
17056 reg1->umin_value++;
17057 if (reg1->umax_value == (u64)val)
17058 reg1->umax_value--;
17059 if (reg1->smin_value == (s64)val)
17060 reg1->smin_value++;
17061 if (reg1->smax_value == (s64)val)
17062 reg1->smax_value--;
17063 }
17064 break;
17065 case BPF_JSET:
17066 if (!is_reg_const(reg2, is_jmp32))
17067 swap(reg1, reg2);
17068 if (!is_reg_const(reg2, is_jmp32))
17069 break;
17070 val = reg_const_value(reg2, is_jmp32);
17071 /* BPF_JSET (i.e., TRUE branch, *not* BPF_JSET | BPF_X)
17072 * requires single bit to learn something useful. E.g., if we
17073 * know that `r1 & 0x3` is true, then which bits (0, 1, or both)
17074 * are actually set? We can learn something definite only if
17075 * it's a single-bit value to begin with.
17076 *
17077 * BPF_JSET | BPF_X (i.e., negation of BPF_JSET) doesn't have
17078 * this restriction. I.e., !(r1 & 0x3) means neither bit 0 nor
17079 * bit 1 is set, which we can readily use in adjustments.
17080 */
17081 if (!is_power_of_2(val))
17082 break;
17083 if (is_jmp32) {
17084 t = tnum_or(tnum_subreg(reg1->var_off), tnum_const(val));
17085 reg1->var_off = tnum_with_subreg(reg1->var_off, t);
17086 } else {
17087 reg1->var_off = tnum_or(reg1->var_off, tnum_const(val));
17088 }
17089 break;
17090 case BPF_JSET | BPF_X: /* reverse of BPF_JSET, see rev_opcode() */
17091 if (!is_reg_const(reg2, is_jmp32))
17092 swap(reg1, reg2);
17093 if (!is_reg_const(reg2, is_jmp32))
17094 break;
17095 val = reg_const_value(reg2, is_jmp32);
17096 /* Forget the ranges before narrowing tnums, to avoid invariant
17097 * violations if we're on a dead branch.
17098 */
17099 __mark_reg_unbounded(reg1);
17100 if (is_jmp32) {
17101 t = tnum_and(tnum_subreg(reg1->var_off), tnum_const(~val));
17102 reg1->var_off = tnum_with_subreg(reg1->var_off, t);
17103 } else {
17104 reg1->var_off = tnum_and(reg1->var_off, tnum_const(~val));
17105 }
17106 break;
17107 case BPF_JLE:
17108 if (is_jmp32) {
17109 reg1->u32_max_value = min(reg1->u32_max_value, reg2->u32_max_value);
17110 reg2->u32_min_value = max(reg1->u32_min_value, reg2->u32_min_value);
17111 } else {
17112 reg1->umax_value = min(reg1->umax_value, reg2->umax_value);
17113 reg2->umin_value = max(reg1->umin_value, reg2->umin_value);
17114 }
17115 break;
17116 case BPF_JLT:
17117 if (is_jmp32) {
17118 reg1->u32_max_value = min(reg1->u32_max_value, reg2->u32_max_value - 1);
17119 reg2->u32_min_value = max(reg1->u32_min_value + 1, reg2->u32_min_value);
17120 } else {
17121 reg1->umax_value = min(reg1->umax_value, reg2->umax_value - 1);
17122 reg2->umin_value = max(reg1->umin_value + 1, reg2->umin_value);
17123 }
17124 break;
17125 case BPF_JSLE:
17126 if (is_jmp32) {
17127 reg1->s32_max_value = min(reg1->s32_max_value, reg2->s32_max_value);
17128 reg2->s32_min_value = max(reg1->s32_min_value, reg2->s32_min_value);
17129 } else {
17130 reg1->smax_value = min(reg1->smax_value, reg2->smax_value);
17131 reg2->smin_value = max(reg1->smin_value, reg2->smin_value);
17132 }
17133 break;
17134 case BPF_JSLT:
17135 if (is_jmp32) {
17136 reg1->s32_max_value = min(reg1->s32_max_value, reg2->s32_max_value - 1);
17137 reg2->s32_min_value = max(reg1->s32_min_value + 1, reg2->s32_min_value);
17138 } else {
17139 reg1->smax_value = min(reg1->smax_value, reg2->smax_value - 1);
17140 reg2->smin_value = max(reg1->smin_value + 1, reg2->smin_value);
17141 }
17142 break;
17143 default:
17144 return;
17145 }
17146 }
17147
17148 /* Adjusts the register min/max values in the case that the dst_reg and
17149 * src_reg are both SCALAR_VALUE registers (or we are simply doing a BPF_K
17150 * check, in which case we have a fake SCALAR_VALUE representing insn->imm).
17151 * Technically we can do similar adjustments for pointers to the same object,
17152 * but we don't support that right now.
17153 */
reg_set_min_max(struct bpf_verifier_env * env,struct bpf_reg_state * true_reg1,struct bpf_reg_state * true_reg2,struct bpf_reg_state * false_reg1,struct bpf_reg_state * false_reg2,u8 opcode,bool is_jmp32)17154 static int reg_set_min_max(struct bpf_verifier_env *env,
17155 struct bpf_reg_state *true_reg1,
17156 struct bpf_reg_state *true_reg2,
17157 struct bpf_reg_state *false_reg1,
17158 struct bpf_reg_state *false_reg2,
17159 u8 opcode, bool is_jmp32)
17160 {
17161 int err;
17162
17163 /* If either register is a pointer, we can't learn anything about its
17164 * variable offset from the compare (unless they were a pointer into
17165 * the same object, but we don't bother with that).
17166 */
17167 if (false_reg1->type != SCALAR_VALUE || false_reg2->type != SCALAR_VALUE)
17168 return 0;
17169
17170 /* We compute branch direction for same SCALAR_VALUE registers in
17171 * is_scalar_branch_taken(). For unknown branch directions (e.g., BPF_JSET)
17172 * on the same registers, we don't need to adjust the min/max values.
17173 */
17174 if (false_reg1 == false_reg2)
17175 return 0;
17176
17177 /* fallthrough (FALSE) branch */
17178 regs_refine_cond_op(false_reg1, false_reg2, rev_opcode(opcode), is_jmp32);
17179 reg_bounds_sync(false_reg1);
17180 reg_bounds_sync(false_reg2);
17181
17182 /* jump (TRUE) branch */
17183 regs_refine_cond_op(true_reg1, true_reg2, opcode, is_jmp32);
17184 reg_bounds_sync(true_reg1);
17185 reg_bounds_sync(true_reg2);
17186
17187 err = reg_bounds_sanity_check(env, true_reg1, "true_reg1");
17188 err = err ?: reg_bounds_sanity_check(env, true_reg2, "true_reg2");
17189 err = err ?: reg_bounds_sanity_check(env, false_reg1, "false_reg1");
17190 err = err ?: reg_bounds_sanity_check(env, false_reg2, "false_reg2");
17191 return err;
17192 }
17193
mark_ptr_or_null_reg(struct bpf_func_state * state,struct bpf_reg_state * reg,u32 id,bool is_null)17194 static void mark_ptr_or_null_reg(struct bpf_func_state *state,
17195 struct bpf_reg_state *reg, u32 id,
17196 bool is_null)
17197 {
17198 if (type_may_be_null(reg->type) && reg->id == id &&
17199 (is_rcu_reg(reg) || !WARN_ON_ONCE(!reg->id))) {
17200 /* Old offset (both fixed and variable parts) should have been
17201 * known-zero, because we don't allow pointer arithmetic on
17202 * pointers that might be NULL. If we see this happening, don't
17203 * convert the register.
17204 *
17205 * But in some cases, some helpers that return local kptrs
17206 * advance offset for the returned pointer. In those cases, it
17207 * is fine to expect to see reg->off.
17208 */
17209 if (WARN_ON_ONCE(reg->smin_value || reg->smax_value || !tnum_equals_const(reg->var_off, 0)))
17210 return;
17211 if (!(type_is_ptr_alloc_obj(reg->type) || type_is_non_owning_ref(reg->type)) &&
17212 WARN_ON_ONCE(reg->off))
17213 return;
17214
17215 if (is_null) {
17216 reg->type = SCALAR_VALUE;
17217 /* We don't need id and ref_obj_id from this point
17218 * onwards anymore, thus we should better reset it,
17219 * so that state pruning has chances to take effect.
17220 */
17221 reg->id = 0;
17222 reg->ref_obj_id = 0;
17223
17224 return;
17225 }
17226
17227 mark_ptr_not_null_reg(reg);
17228
17229 if (!reg_may_point_to_spin_lock(reg)) {
17230 /* For not-NULL ptr, reg->ref_obj_id will be reset
17231 * in release_reference().
17232 *
17233 * reg->id is still used by spin_lock ptr. Other
17234 * than spin_lock ptr type, reg->id can be reset.
17235 */
17236 reg->id = 0;
17237 }
17238 }
17239 }
17240
17241 /* The logic is similar to find_good_pkt_pointers(), both could eventually
17242 * be folded together at some point.
17243 */
mark_ptr_or_null_regs(struct bpf_verifier_state * vstate,u32 regno,bool is_null)17244 static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
17245 bool is_null)
17246 {
17247 struct bpf_func_state *state = vstate->frame[vstate->curframe];
17248 struct bpf_reg_state *regs = state->regs, *reg;
17249 u32 ref_obj_id = regs[regno].ref_obj_id;
17250 u32 id = regs[regno].id;
17251
17252 if (ref_obj_id && ref_obj_id == id && is_null)
17253 /* regs[regno] is in the " == NULL" branch.
17254 * No one could have freed the reference state before
17255 * doing the NULL check.
17256 */
17257 WARN_ON_ONCE(release_reference_nomark(vstate, id));
17258
17259 bpf_for_each_reg_in_vstate(vstate, state, reg, ({
17260 mark_ptr_or_null_reg(state, reg, id, is_null);
17261 }));
17262 }
17263
try_match_pkt_pointers(const struct bpf_insn * insn,struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg,struct bpf_verifier_state * this_branch,struct bpf_verifier_state * other_branch)17264 static bool try_match_pkt_pointers(const struct bpf_insn *insn,
17265 struct bpf_reg_state *dst_reg,
17266 struct bpf_reg_state *src_reg,
17267 struct bpf_verifier_state *this_branch,
17268 struct bpf_verifier_state *other_branch)
17269 {
17270 if (BPF_SRC(insn->code) != BPF_X)
17271 return false;
17272
17273 /* Pointers are always 64-bit. */
17274 if (BPF_CLASS(insn->code) == BPF_JMP32)
17275 return false;
17276
17277 switch (BPF_OP(insn->code)) {
17278 case BPF_JGT:
17279 if ((dst_reg->type == PTR_TO_PACKET &&
17280 src_reg->type == PTR_TO_PACKET_END) ||
17281 (dst_reg->type == PTR_TO_PACKET_META &&
17282 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
17283 /* pkt_data' > pkt_end, pkt_meta' > pkt_data */
17284 find_good_pkt_pointers(this_branch, dst_reg,
17285 dst_reg->type, false);
17286 mark_pkt_end(other_branch, insn->dst_reg, true);
17287 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
17288 src_reg->type == PTR_TO_PACKET) ||
17289 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
17290 src_reg->type == PTR_TO_PACKET_META)) {
17291 /* pkt_end > pkt_data', pkt_data > pkt_meta' */
17292 find_good_pkt_pointers(other_branch, src_reg,
17293 src_reg->type, true);
17294 mark_pkt_end(this_branch, insn->src_reg, false);
17295 } else {
17296 return false;
17297 }
17298 break;
17299 case BPF_JLT:
17300 if ((dst_reg->type == PTR_TO_PACKET &&
17301 src_reg->type == PTR_TO_PACKET_END) ||
17302 (dst_reg->type == PTR_TO_PACKET_META &&
17303 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
17304 /* pkt_data' < pkt_end, pkt_meta' < pkt_data */
17305 find_good_pkt_pointers(other_branch, dst_reg,
17306 dst_reg->type, true);
17307 mark_pkt_end(this_branch, insn->dst_reg, false);
17308 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
17309 src_reg->type == PTR_TO_PACKET) ||
17310 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
17311 src_reg->type == PTR_TO_PACKET_META)) {
17312 /* pkt_end < pkt_data', pkt_data > pkt_meta' */
17313 find_good_pkt_pointers(this_branch, src_reg,
17314 src_reg->type, false);
17315 mark_pkt_end(other_branch, insn->src_reg, true);
17316 } else {
17317 return false;
17318 }
17319 break;
17320 case BPF_JGE:
17321 if ((dst_reg->type == PTR_TO_PACKET &&
17322 src_reg->type == PTR_TO_PACKET_END) ||
17323 (dst_reg->type == PTR_TO_PACKET_META &&
17324 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
17325 /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
17326 find_good_pkt_pointers(this_branch, dst_reg,
17327 dst_reg->type, true);
17328 mark_pkt_end(other_branch, insn->dst_reg, false);
17329 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
17330 src_reg->type == PTR_TO_PACKET) ||
17331 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
17332 src_reg->type == PTR_TO_PACKET_META)) {
17333 /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
17334 find_good_pkt_pointers(other_branch, src_reg,
17335 src_reg->type, false);
17336 mark_pkt_end(this_branch, insn->src_reg, true);
17337 } else {
17338 return false;
17339 }
17340 break;
17341 case BPF_JLE:
17342 if ((dst_reg->type == PTR_TO_PACKET &&
17343 src_reg->type == PTR_TO_PACKET_END) ||
17344 (dst_reg->type == PTR_TO_PACKET_META &&
17345 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
17346 /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
17347 find_good_pkt_pointers(other_branch, dst_reg,
17348 dst_reg->type, false);
17349 mark_pkt_end(this_branch, insn->dst_reg, true);
17350 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
17351 src_reg->type == PTR_TO_PACKET) ||
17352 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
17353 src_reg->type == PTR_TO_PACKET_META)) {
17354 /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
17355 find_good_pkt_pointers(this_branch, src_reg,
17356 src_reg->type, true);
17357 mark_pkt_end(other_branch, insn->src_reg, false);
17358 } else {
17359 return false;
17360 }
17361 break;
17362 default:
17363 return false;
17364 }
17365
17366 return true;
17367 }
17368
__collect_linked_regs(struct linked_regs * reg_set,struct bpf_reg_state * reg,u32 id,u32 frameno,u32 spi_or_reg,bool is_reg)17369 static void __collect_linked_regs(struct linked_regs *reg_set, struct bpf_reg_state *reg,
17370 u32 id, u32 frameno, u32 spi_or_reg, bool is_reg)
17371 {
17372 struct linked_reg *e;
17373
17374 if (reg->type != SCALAR_VALUE || (reg->id & ~BPF_ADD_CONST) != id)
17375 return;
17376
17377 e = linked_regs_push(reg_set);
17378 if (e) {
17379 e->frameno = frameno;
17380 e->is_reg = is_reg;
17381 e->regno = spi_or_reg;
17382 } else {
17383 reg->id = 0;
17384 }
17385 }
17386
17387 /* For all R being scalar registers or spilled scalar registers
17388 * in verifier state, save R in linked_regs if R->id == id.
17389 * If there are too many Rs sharing same id, reset id for leftover Rs.
17390 */
collect_linked_regs(struct bpf_verifier_env * env,struct bpf_verifier_state * vstate,u32 id,struct linked_regs * linked_regs)17391 static void collect_linked_regs(struct bpf_verifier_env *env,
17392 struct bpf_verifier_state *vstate,
17393 u32 id,
17394 struct linked_regs *linked_regs)
17395 {
17396 struct bpf_insn_aux_data *aux = env->insn_aux_data;
17397 struct bpf_func_state *func;
17398 struct bpf_reg_state *reg;
17399 u16 live_regs;
17400 int i, j;
17401
17402 id = id & ~BPF_ADD_CONST;
17403 for (i = vstate->curframe; i >= 0; i--) {
17404 live_regs = aux[frame_insn_idx(vstate, i)].live_regs_before;
17405 func = vstate->frame[i];
17406 for (j = 0; j < BPF_REG_FP; j++) {
17407 if (!(live_regs & BIT(j)))
17408 continue;
17409 reg = &func->regs[j];
17410 __collect_linked_regs(linked_regs, reg, id, i, j, true);
17411 }
17412 for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
17413 if (!is_spilled_reg(&func->stack[j]))
17414 continue;
17415 reg = &func->stack[j].spilled_ptr;
17416 __collect_linked_regs(linked_regs, reg, id, i, j, false);
17417 }
17418 }
17419 }
17420
17421 /* For all R in linked_regs, copy known_reg range into R
17422 * if R->id == known_reg->id.
17423 */
sync_linked_regs(struct bpf_verifier_env * env,struct bpf_verifier_state * vstate,struct bpf_reg_state * known_reg,struct linked_regs * linked_regs)17424 static void sync_linked_regs(struct bpf_verifier_env *env, struct bpf_verifier_state *vstate,
17425 struct bpf_reg_state *known_reg, struct linked_regs *linked_regs)
17426 {
17427 struct bpf_reg_state fake_reg;
17428 struct bpf_reg_state *reg;
17429 struct linked_reg *e;
17430 int i;
17431
17432 for (i = 0; i < linked_regs->cnt; ++i) {
17433 e = &linked_regs->entries[i];
17434 reg = e->is_reg ? &vstate->frame[e->frameno]->regs[e->regno]
17435 : &vstate->frame[e->frameno]->stack[e->spi].spilled_ptr;
17436 if (reg->type != SCALAR_VALUE || reg == known_reg)
17437 continue;
17438 if ((reg->id & ~BPF_ADD_CONST) != (known_reg->id & ~BPF_ADD_CONST))
17439 continue;
17440 /*
17441 * Skip mixed 32/64-bit links: the delta relationship doesn't
17442 * hold across different ALU widths.
17443 */
17444 if (((reg->id ^ known_reg->id) & BPF_ADD_CONST) == BPF_ADD_CONST)
17445 continue;
17446 if ((!(reg->id & BPF_ADD_CONST) && !(known_reg->id & BPF_ADD_CONST)) ||
17447 reg->off == known_reg->off) {
17448 s32 saved_subreg_def = reg->subreg_def;
17449
17450 copy_register_state(reg, known_reg);
17451 reg->subreg_def = saved_subreg_def;
17452 } else {
17453 s32 saved_subreg_def = reg->subreg_def;
17454 s32 saved_off = reg->off;
17455 u32 saved_id = reg->id;
17456
17457 fake_reg.type = SCALAR_VALUE;
17458 __mark_reg_known(&fake_reg, (s64)reg->off - (s64)known_reg->off);
17459
17460 /* reg = known_reg; reg += delta */
17461 copy_register_state(reg, known_reg);
17462 /*
17463 * Must preserve off, id and subreg_def flag,
17464 * otherwise another sync_linked_regs() will be incorrect.
17465 */
17466 reg->off = saved_off;
17467 reg->id = saved_id;
17468 reg->subreg_def = saved_subreg_def;
17469
17470 scalar32_min_max_add(reg, &fake_reg);
17471 scalar_min_max_add(reg, &fake_reg);
17472 reg->var_off = tnum_add(reg->var_off, fake_reg.var_off);
17473 if ((reg->id | known_reg->id) & BPF_ADD_CONST32)
17474 zext_32_to_64(reg);
17475 reg_bounds_sync(reg);
17476 }
17477 if (e->is_reg)
17478 mark_reg_scratched(env, e->regno);
17479 else
17480 mark_stack_slot_scratched(env, e->spi);
17481 }
17482 }
17483
check_cond_jmp_op(struct bpf_verifier_env * env,struct bpf_insn * insn,int * insn_idx)17484 static int check_cond_jmp_op(struct bpf_verifier_env *env,
17485 struct bpf_insn *insn, int *insn_idx)
17486 {
17487 struct bpf_verifier_state *this_branch = env->cur_state;
17488 struct bpf_verifier_state *other_branch;
17489 struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
17490 struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
17491 struct bpf_reg_state *eq_branch_regs;
17492 struct linked_regs linked_regs = {};
17493 u8 opcode = BPF_OP(insn->code);
17494 int insn_flags = 0;
17495 bool is_jmp32;
17496 int pred = -1;
17497 int err;
17498
17499 /* Only conditional jumps are expected to reach here. */
17500 if (opcode == BPF_JA || opcode > BPF_JCOND) {
17501 verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
17502 return -EINVAL;
17503 }
17504
17505 if (opcode == BPF_JCOND) {
17506 struct bpf_verifier_state *cur_st = env->cur_state, *queued_st, *prev_st;
17507 int idx = *insn_idx;
17508
17509 if (insn->code != (BPF_JMP | BPF_JCOND) ||
17510 insn->src_reg != BPF_MAY_GOTO ||
17511 insn->dst_reg || insn->imm) {
17512 verbose(env, "invalid may_goto imm %d\n", insn->imm);
17513 return -EINVAL;
17514 }
17515 prev_st = find_prev_entry(env, cur_st->parent, idx);
17516
17517 /* branch out 'fallthrough' insn as a new state to explore */
17518 queued_st = push_stack(env, idx + 1, idx, false);
17519 if (IS_ERR(queued_st))
17520 return PTR_ERR(queued_st);
17521
17522 queued_st->may_goto_depth++;
17523 if (prev_st)
17524 widen_imprecise_scalars(env, prev_st, queued_st);
17525 *insn_idx += insn->off;
17526 return 0;
17527 }
17528
17529 /* check src2 operand */
17530 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17531 if (err)
17532 return err;
17533
17534 dst_reg = ®s[insn->dst_reg];
17535 if (BPF_SRC(insn->code) == BPF_X) {
17536 if (insn->imm != 0) {
17537 verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
17538 return -EINVAL;
17539 }
17540
17541 /* check src1 operand */
17542 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17543 if (err)
17544 return err;
17545
17546 src_reg = ®s[insn->src_reg];
17547 if (!(reg_is_pkt_pointer_any(dst_reg) && reg_is_pkt_pointer_any(src_reg)) &&
17548 is_pointer_value(env, insn->src_reg)) {
17549 verbose(env, "R%d pointer comparison prohibited\n",
17550 insn->src_reg);
17551 return -EACCES;
17552 }
17553
17554 if (src_reg->type == PTR_TO_STACK)
17555 insn_flags |= INSN_F_SRC_REG_STACK;
17556 if (dst_reg->type == PTR_TO_STACK)
17557 insn_flags |= INSN_F_DST_REG_STACK;
17558 } else {
17559 if (insn->src_reg != BPF_REG_0) {
17560 verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
17561 return -EINVAL;
17562 }
17563 src_reg = &env->fake_reg[0];
17564 memset(src_reg, 0, sizeof(*src_reg));
17565 src_reg->type = SCALAR_VALUE;
17566 __mark_reg_known(src_reg, insn->imm);
17567
17568 if (dst_reg->type == PTR_TO_STACK)
17569 insn_flags |= INSN_F_DST_REG_STACK;
17570 }
17571
17572 if (insn_flags) {
17573 err = push_jmp_history(env, this_branch, insn_flags, 0);
17574 if (err)
17575 return err;
17576 }
17577
17578 is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
17579 pred = is_branch_taken(dst_reg, src_reg, opcode, is_jmp32);
17580 if (pred >= 0) {
17581 /* If we get here with a dst_reg pointer type it is because
17582 * above is_branch_taken() special cased the 0 comparison.
17583 */
17584 if (!__is_pointer_value(false, dst_reg))
17585 err = mark_chain_precision(env, insn->dst_reg);
17586 if (BPF_SRC(insn->code) == BPF_X && !err &&
17587 !__is_pointer_value(false, src_reg))
17588 err = mark_chain_precision(env, insn->src_reg);
17589 if (err)
17590 return err;
17591 }
17592
17593 if (pred == 1) {
17594 /* Only follow the goto, ignore fall-through. If needed, push
17595 * the fall-through branch for simulation under speculative
17596 * execution.
17597 */
17598 if (!env->bypass_spec_v1) {
17599 err = sanitize_speculative_path(env, insn, *insn_idx + 1, *insn_idx);
17600 if (err < 0)
17601 return err;
17602 }
17603 if (env->log.level & BPF_LOG_LEVEL)
17604 print_insn_state(env, this_branch, this_branch->curframe);
17605 *insn_idx += insn->off;
17606 return 0;
17607 } else if (pred == 0) {
17608 /* Only follow the fall-through branch, since that's where the
17609 * program will go. If needed, push the goto branch for
17610 * simulation under speculative execution.
17611 */
17612 if (!env->bypass_spec_v1) {
17613 err = sanitize_speculative_path(env, insn, *insn_idx + insn->off + 1,
17614 *insn_idx);
17615 if (err < 0)
17616 return err;
17617 }
17618 if (env->log.level & BPF_LOG_LEVEL)
17619 print_insn_state(env, this_branch, this_branch->curframe);
17620 return 0;
17621 }
17622
17623 /* Push scalar registers sharing same ID to jump history,
17624 * do this before creating 'other_branch', so that both
17625 * 'this_branch' and 'other_branch' share this history
17626 * if parent state is created.
17627 */
17628 if (BPF_SRC(insn->code) == BPF_X && src_reg->type == SCALAR_VALUE && src_reg->id)
17629 collect_linked_regs(env, this_branch, src_reg->id, &linked_regs);
17630 if (dst_reg->type == SCALAR_VALUE && dst_reg->id)
17631 collect_linked_regs(env, this_branch, dst_reg->id, &linked_regs);
17632 if (linked_regs.cnt > 1) {
17633 err = push_jmp_history(env, this_branch, 0, linked_regs_pack(&linked_regs));
17634 if (err)
17635 return err;
17636 }
17637
17638 other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx, false);
17639 if (IS_ERR(other_branch))
17640 return PTR_ERR(other_branch);
17641 other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
17642
17643 if (BPF_SRC(insn->code) == BPF_X) {
17644 err = reg_set_min_max(env,
17645 &other_branch_regs[insn->dst_reg],
17646 &other_branch_regs[insn->src_reg],
17647 dst_reg, src_reg, opcode, is_jmp32);
17648 } else /* BPF_SRC(insn->code) == BPF_K */ {
17649 /* reg_set_min_max() can mangle the fake_reg. Make a copy
17650 * so that these are two different memory locations. The
17651 * src_reg is not used beyond here in context of K.
17652 */
17653 memcpy(&env->fake_reg[1], &env->fake_reg[0],
17654 sizeof(env->fake_reg[0]));
17655 err = reg_set_min_max(env,
17656 &other_branch_regs[insn->dst_reg],
17657 &env->fake_reg[0],
17658 dst_reg, &env->fake_reg[1],
17659 opcode, is_jmp32);
17660 }
17661 if (err)
17662 return err;
17663
17664 if (BPF_SRC(insn->code) == BPF_X &&
17665 src_reg->type == SCALAR_VALUE && src_reg->id &&
17666 !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) {
17667 sync_linked_regs(env, this_branch, src_reg, &linked_regs);
17668 sync_linked_regs(env, other_branch, &other_branch_regs[insn->src_reg],
17669 &linked_regs);
17670 }
17671 if (dst_reg->type == SCALAR_VALUE && dst_reg->id &&
17672 !WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) {
17673 sync_linked_regs(env, this_branch, dst_reg, &linked_regs);
17674 sync_linked_regs(env, other_branch, &other_branch_regs[insn->dst_reg],
17675 &linked_regs);
17676 }
17677
17678 /* if one pointer register is compared to another pointer
17679 * register check if PTR_MAYBE_NULL could be lifted.
17680 * E.g. register A - maybe null
17681 * register B - not null
17682 * for JNE A, B, ... - A is not null in the false branch;
17683 * for JEQ A, B, ... - A is not null in the true branch.
17684 *
17685 * Since PTR_TO_BTF_ID points to a kernel struct that does
17686 * not need to be null checked by the BPF program, i.e.,
17687 * could be null even without PTR_MAYBE_NULL marking, so
17688 * only propagate nullness when neither reg is that type.
17689 */
17690 if (!is_jmp32 && BPF_SRC(insn->code) == BPF_X &&
17691 __is_pointer_value(false, src_reg) && __is_pointer_value(false, dst_reg) &&
17692 type_may_be_null(src_reg->type) != type_may_be_null(dst_reg->type) &&
17693 base_type(src_reg->type) != PTR_TO_BTF_ID &&
17694 base_type(dst_reg->type) != PTR_TO_BTF_ID) {
17695 eq_branch_regs = NULL;
17696 switch (opcode) {
17697 case BPF_JEQ:
17698 eq_branch_regs = other_branch_regs;
17699 break;
17700 case BPF_JNE:
17701 eq_branch_regs = regs;
17702 break;
17703 default:
17704 /* do nothing */
17705 break;
17706 }
17707 if (eq_branch_regs) {
17708 if (type_may_be_null(src_reg->type))
17709 mark_ptr_not_null_reg(&eq_branch_regs[insn->src_reg]);
17710 else
17711 mark_ptr_not_null_reg(&eq_branch_regs[insn->dst_reg]);
17712 }
17713 }
17714
17715 /* detect if R == 0 where R is returned from bpf_map_lookup_elem().
17716 * NOTE: these optimizations below are related with pointer comparison
17717 * which will never be JMP32.
17718 */
17719 if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K &&
17720 insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
17721 type_may_be_null(dst_reg->type)) {
17722 /* Mark all identical registers in each branch as either
17723 * safe or unknown depending R == 0 or R != 0 conditional.
17724 */
17725 mark_ptr_or_null_regs(this_branch, insn->dst_reg,
17726 opcode == BPF_JNE);
17727 mark_ptr_or_null_regs(other_branch, insn->dst_reg,
17728 opcode == BPF_JEQ);
17729 } else if (!try_match_pkt_pointers(insn, dst_reg, ®s[insn->src_reg],
17730 this_branch, other_branch) &&
17731 is_pointer_value(env, insn->dst_reg)) {
17732 verbose(env, "R%d pointer comparison prohibited\n",
17733 insn->dst_reg);
17734 return -EACCES;
17735 }
17736 if (env->log.level & BPF_LOG_LEVEL)
17737 print_insn_state(env, this_branch, this_branch->curframe);
17738 return 0;
17739 }
17740
17741 /* verify BPF_LD_IMM64 instruction */
check_ld_imm(struct bpf_verifier_env * env,struct bpf_insn * insn)17742 static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
17743 {
17744 struct bpf_insn_aux_data *aux = cur_aux(env);
17745 struct bpf_reg_state *regs = cur_regs(env);
17746 struct bpf_reg_state *dst_reg;
17747 struct bpf_map *map;
17748 int err;
17749
17750 if (BPF_SIZE(insn->code) != BPF_DW) {
17751 verbose(env, "invalid BPF_LD_IMM insn\n");
17752 return -EINVAL;
17753 }
17754 if (insn->off != 0) {
17755 verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
17756 return -EINVAL;
17757 }
17758
17759 err = check_reg_arg(env, insn->dst_reg, DST_OP);
17760 if (err)
17761 return err;
17762
17763 dst_reg = ®s[insn->dst_reg];
17764 if (insn->src_reg == 0) {
17765 u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
17766
17767 dst_reg->type = SCALAR_VALUE;
17768 __mark_reg_known(®s[insn->dst_reg], imm);
17769 return 0;
17770 }
17771
17772 /* All special src_reg cases are listed below. From this point onwards
17773 * we either succeed and assign a corresponding dst_reg->type after
17774 * zeroing the offset, or fail and reject the program.
17775 */
17776 mark_reg_known_zero(env, regs, insn->dst_reg);
17777
17778 if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
17779 dst_reg->type = aux->btf_var.reg_type;
17780 switch (base_type(dst_reg->type)) {
17781 case PTR_TO_MEM:
17782 dst_reg->mem_size = aux->btf_var.mem_size;
17783 break;
17784 case PTR_TO_BTF_ID:
17785 dst_reg->btf = aux->btf_var.btf;
17786 dst_reg->btf_id = aux->btf_var.btf_id;
17787 break;
17788 default:
17789 verifier_bug(env, "pseudo btf id: unexpected dst reg type");
17790 return -EFAULT;
17791 }
17792 return 0;
17793 }
17794
17795 if (insn->src_reg == BPF_PSEUDO_FUNC) {
17796 struct bpf_prog_aux *aux = env->prog->aux;
17797 u32 subprogno = find_subprog(env,
17798 env->insn_idx + insn->imm + 1);
17799
17800 if (!aux->func_info) {
17801 verbose(env, "missing btf func_info\n");
17802 return -EINVAL;
17803 }
17804 if (aux->func_info_aux[subprogno].linkage != BTF_FUNC_STATIC) {
17805 verbose(env, "callback function not static\n");
17806 return -EINVAL;
17807 }
17808
17809 dst_reg->type = PTR_TO_FUNC;
17810 dst_reg->subprogno = subprogno;
17811 return 0;
17812 }
17813
17814 map = env->used_maps[aux->map_index];
17815 dst_reg->map_ptr = map;
17816
17817 if (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
17818 insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) {
17819 if (map->map_type == BPF_MAP_TYPE_ARENA) {
17820 __mark_reg_unknown(env, dst_reg);
17821 return 0;
17822 }
17823 dst_reg->type = PTR_TO_MAP_VALUE;
17824 dst_reg->off = aux->map_off;
17825 WARN_ON_ONCE(map->map_type != BPF_MAP_TYPE_INSN_ARRAY &&
17826 map->max_entries != 1);
17827 /* We want reg->id to be same (0) as map_value is not distinct */
17828 } else if (insn->src_reg == BPF_PSEUDO_MAP_FD ||
17829 insn->src_reg == BPF_PSEUDO_MAP_IDX) {
17830 dst_reg->type = CONST_PTR_TO_MAP;
17831 } else {
17832 verifier_bug(env, "unexpected src reg value for ldimm64");
17833 return -EFAULT;
17834 }
17835
17836 return 0;
17837 }
17838
may_access_skb(enum bpf_prog_type type)17839 static bool may_access_skb(enum bpf_prog_type type)
17840 {
17841 switch (type) {
17842 case BPF_PROG_TYPE_SOCKET_FILTER:
17843 case BPF_PROG_TYPE_SCHED_CLS:
17844 case BPF_PROG_TYPE_SCHED_ACT:
17845 return true;
17846 default:
17847 return false;
17848 }
17849 }
17850
17851 /* verify safety of LD_ABS|LD_IND instructions:
17852 * - they can only appear in the programs where ctx == skb
17853 * - since they are wrappers of function calls, they scratch R1-R5 registers,
17854 * preserve R6-R9, and store return value into R0
17855 *
17856 * Implicit input:
17857 * ctx == skb == R6 == CTX
17858 *
17859 * Explicit input:
17860 * SRC == any register
17861 * IMM == 32-bit immediate
17862 *
17863 * Output:
17864 * R0 - 8/16/32-bit skb data converted to cpu endianness
17865 */
check_ld_abs(struct bpf_verifier_env * env,struct bpf_insn * insn)17866 static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
17867 {
17868 struct bpf_reg_state *regs = cur_regs(env);
17869 static const int ctx_reg = BPF_REG_6;
17870 u8 mode = BPF_MODE(insn->code);
17871 int i, err;
17872
17873 if (!may_access_skb(resolve_prog_type(env->prog))) {
17874 verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
17875 return -EINVAL;
17876 }
17877
17878 if (!env->ops->gen_ld_abs) {
17879 verifier_bug(env, "gen_ld_abs is null");
17880 return -EFAULT;
17881 }
17882
17883 if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
17884 BPF_SIZE(insn->code) == BPF_DW ||
17885 (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
17886 verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
17887 return -EINVAL;
17888 }
17889
17890 /* check whether implicit source operand (register R6) is readable */
17891 err = check_reg_arg(env, ctx_reg, SRC_OP);
17892 if (err)
17893 return err;
17894
17895 /* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
17896 * gen_ld_abs() may terminate the program at runtime, leading to
17897 * reference leak.
17898 */
17899 err = check_resource_leak(env, false, true, "BPF_LD_[ABS|IND]");
17900 if (err)
17901 return err;
17902
17903 if (regs[ctx_reg].type != PTR_TO_CTX) {
17904 verbose(env,
17905 "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
17906 return -EINVAL;
17907 }
17908
17909 if (mode == BPF_IND) {
17910 /* check explicit source operand */
17911 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17912 if (err)
17913 return err;
17914 }
17915
17916 err = check_ptr_off_reg(env, ®s[ctx_reg], ctx_reg);
17917 if (err < 0)
17918 return err;
17919
17920 /* reset caller saved regs to unreadable */
17921 for (i = 0; i < CALLER_SAVED_REGS; i++) {
17922 mark_reg_not_init(env, regs, caller_saved[i]);
17923 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
17924 }
17925
17926 /* mark destination R0 register as readable, since it contains
17927 * the value fetched from the packet.
17928 * Already marked as written above.
17929 */
17930 mark_reg_unknown(env, regs, BPF_REG_0);
17931 /* ld_abs load up to 32-bit skb data. */
17932 regs[BPF_REG_0].subreg_def = env->insn_idx + 1;
17933 return 0;
17934 }
17935
check_return_code(struct bpf_verifier_env * env,int regno,const char * reg_name)17936 static int check_return_code(struct bpf_verifier_env *env, int regno, const char *reg_name)
17937 {
17938 const char *exit_ctx = "At program exit";
17939 struct tnum enforce_attach_type_range = tnum_unknown;
17940 const struct bpf_prog *prog = env->prog;
17941 struct bpf_reg_state *reg = reg_state(env, regno);
17942 struct bpf_retval_range range = retval_range(0, 1);
17943 enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
17944 int err;
17945 struct bpf_func_state *frame = env->cur_state->frame[0];
17946 const bool is_subprog = frame->subprogno;
17947 bool return_32bit = false;
17948 const struct btf_type *reg_type, *ret_type = NULL;
17949
17950 /* LSM and struct_ops func-ptr's return type could be "void" */
17951 if (!is_subprog || frame->in_exception_callback_fn) {
17952 switch (prog_type) {
17953 case BPF_PROG_TYPE_LSM:
17954 if (prog->expected_attach_type == BPF_LSM_CGROUP)
17955 /* See below, can be 0 or 0-1 depending on hook. */
17956 break;
17957 if (!prog->aux->attach_func_proto->type)
17958 return 0;
17959 break;
17960 case BPF_PROG_TYPE_STRUCT_OPS:
17961 if (!prog->aux->attach_func_proto->type)
17962 return 0;
17963
17964 if (frame->in_exception_callback_fn)
17965 break;
17966
17967 /* Allow a struct_ops program to return a referenced kptr if it
17968 * matches the operator's return type and is in its unmodified
17969 * form. A scalar zero (i.e., a null pointer) is also allowed.
17970 */
17971 reg_type = reg->btf ? btf_type_by_id(reg->btf, reg->btf_id) : NULL;
17972 ret_type = btf_type_resolve_ptr(prog->aux->attach_btf,
17973 prog->aux->attach_func_proto->type,
17974 NULL);
17975 if (ret_type && ret_type == reg_type && reg->ref_obj_id)
17976 return __check_ptr_off_reg(env, reg, regno, false);
17977 break;
17978 default:
17979 break;
17980 }
17981 }
17982
17983 /* eBPF calling convention is such that R0 is used
17984 * to return the value from eBPF program.
17985 * Make sure that it's readable at this time
17986 * of bpf_exit, which means that program wrote
17987 * something into it earlier
17988 */
17989 err = check_reg_arg(env, regno, SRC_OP);
17990 if (err)
17991 return err;
17992
17993 if (is_pointer_value(env, regno)) {
17994 verbose(env, "R%d leaks addr as return value\n", regno);
17995 return -EACCES;
17996 }
17997
17998 if (frame->in_async_callback_fn) {
17999 exit_ctx = "At async callback return";
18000 range = frame->callback_ret_range;
18001 goto enforce_retval;
18002 }
18003
18004 if (is_subprog && !frame->in_exception_callback_fn) {
18005 if (reg->type != SCALAR_VALUE) {
18006 verbose(env, "At subprogram exit the register R%d is not a scalar value (%s)\n",
18007 regno, reg_type_str(env, reg->type));
18008 return -EINVAL;
18009 }
18010 return 0;
18011 }
18012
18013 switch (prog_type) {
18014 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
18015 if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
18016 env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG ||
18017 env->prog->expected_attach_type == BPF_CGROUP_UNIX_RECVMSG ||
18018 env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME ||
18019 env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME ||
18020 env->prog->expected_attach_type == BPF_CGROUP_UNIX_GETPEERNAME ||
18021 env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME ||
18022 env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME ||
18023 env->prog->expected_attach_type == BPF_CGROUP_UNIX_GETSOCKNAME)
18024 range = retval_range(1, 1);
18025 if (env->prog->expected_attach_type == BPF_CGROUP_INET4_BIND ||
18026 env->prog->expected_attach_type == BPF_CGROUP_INET6_BIND)
18027 range = retval_range(0, 3);
18028 break;
18029 case BPF_PROG_TYPE_CGROUP_SKB:
18030 if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
18031 range = retval_range(0, 3);
18032 enforce_attach_type_range = tnum_range(2, 3);
18033 }
18034 break;
18035 case BPF_PROG_TYPE_CGROUP_SOCK:
18036 case BPF_PROG_TYPE_SOCK_OPS:
18037 case BPF_PROG_TYPE_CGROUP_DEVICE:
18038 case BPF_PROG_TYPE_CGROUP_SYSCTL:
18039 case BPF_PROG_TYPE_CGROUP_SOCKOPT:
18040 break;
18041 case BPF_PROG_TYPE_RAW_TRACEPOINT:
18042 if (!env->prog->aux->attach_btf_id)
18043 return 0;
18044 range = retval_range(0, 0);
18045 break;
18046 case BPF_PROG_TYPE_TRACING:
18047 switch (env->prog->expected_attach_type) {
18048 case BPF_TRACE_FENTRY:
18049 case BPF_TRACE_FEXIT:
18050 case BPF_TRACE_FSESSION:
18051 range = retval_range(0, 0);
18052 break;
18053 case BPF_TRACE_RAW_TP:
18054 case BPF_MODIFY_RETURN:
18055 return 0;
18056 case BPF_TRACE_ITER:
18057 break;
18058 default:
18059 return -ENOTSUPP;
18060 }
18061 break;
18062 case BPF_PROG_TYPE_KPROBE:
18063 switch (env->prog->expected_attach_type) {
18064 case BPF_TRACE_KPROBE_SESSION:
18065 case BPF_TRACE_UPROBE_SESSION:
18066 range = retval_range(0, 1);
18067 break;
18068 default:
18069 return 0;
18070 }
18071 break;
18072 case BPF_PROG_TYPE_SK_LOOKUP:
18073 range = retval_range(SK_DROP, SK_PASS);
18074 break;
18075
18076 case BPF_PROG_TYPE_LSM:
18077 if (env->prog->expected_attach_type != BPF_LSM_CGROUP) {
18078 /* no range found, any return value is allowed */
18079 if (!get_func_retval_range(env->prog, &range))
18080 return 0;
18081 /* no restricted range, any return value is allowed */
18082 if (range.minval == S32_MIN && range.maxval == S32_MAX)
18083 return 0;
18084 return_32bit = true;
18085 } else if (!env->prog->aux->attach_func_proto->type) {
18086 /* Make sure programs that attach to void
18087 * hooks don't try to modify return value.
18088 */
18089 range = retval_range(1, 1);
18090 }
18091 break;
18092
18093 case BPF_PROG_TYPE_NETFILTER:
18094 range = retval_range(NF_DROP, NF_ACCEPT);
18095 break;
18096 case BPF_PROG_TYPE_STRUCT_OPS:
18097 if (!ret_type)
18098 return 0;
18099 range = retval_range(0, 0);
18100 break;
18101 case BPF_PROG_TYPE_EXT:
18102 /* freplace program can return anything as its return value
18103 * depends on the to-be-replaced kernel func or bpf program.
18104 */
18105 default:
18106 return 0;
18107 }
18108
18109 enforce_retval:
18110 if (reg->type != SCALAR_VALUE) {
18111 verbose(env, "%s the register R%d is not a known value (%s)\n",
18112 exit_ctx, regno, reg_type_str(env, reg->type));
18113 return -EINVAL;
18114 }
18115
18116 err = mark_chain_precision(env, regno);
18117 if (err)
18118 return err;
18119
18120 if (!retval_range_within(range, reg, return_32bit)) {
18121 verbose_invalid_scalar(env, reg, range, exit_ctx, reg_name);
18122 if (!is_subprog &&
18123 prog->expected_attach_type == BPF_LSM_CGROUP &&
18124 prog_type == BPF_PROG_TYPE_LSM &&
18125 !prog->aux->attach_func_proto->type)
18126 verbose(env, "Note, BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
18127 return -EINVAL;
18128 }
18129
18130 if (!tnum_is_unknown(enforce_attach_type_range) &&
18131 tnum_in(enforce_attach_type_range, reg->var_off))
18132 env->prog->enforce_expected_attach_type = 1;
18133 return 0;
18134 }
18135
mark_subprog_changes_pkt_data(struct bpf_verifier_env * env,int off)18136 static void mark_subprog_changes_pkt_data(struct bpf_verifier_env *env, int off)
18137 {
18138 struct bpf_subprog_info *subprog;
18139
18140 subprog = bpf_find_containing_subprog(env, off);
18141 subprog->changes_pkt_data = true;
18142 }
18143
mark_subprog_might_sleep(struct bpf_verifier_env * env,int off)18144 static void mark_subprog_might_sleep(struct bpf_verifier_env *env, int off)
18145 {
18146 struct bpf_subprog_info *subprog;
18147
18148 subprog = bpf_find_containing_subprog(env, off);
18149 subprog->might_sleep = true;
18150 }
18151
18152 /* 't' is an index of a call-site.
18153 * 'w' is a callee entry point.
18154 * Eventually this function would be called when env->cfg.insn_state[w] == EXPLORED.
18155 * Rely on DFS traversal order and absence of recursive calls to guarantee that
18156 * callee's change_pkt_data marks would be correct at that moment.
18157 */
merge_callee_effects(struct bpf_verifier_env * env,int t,int w)18158 static void merge_callee_effects(struct bpf_verifier_env *env, int t, int w)
18159 {
18160 struct bpf_subprog_info *caller, *callee;
18161
18162 caller = bpf_find_containing_subprog(env, t);
18163 callee = bpf_find_containing_subprog(env, w);
18164 caller->changes_pkt_data |= callee->changes_pkt_data;
18165 caller->might_sleep |= callee->might_sleep;
18166 }
18167
18168 /* non-recursive DFS pseudo code
18169 * 1 procedure DFS-iterative(G,v):
18170 * 2 label v as discovered
18171 * 3 let S be a stack
18172 * 4 S.push(v)
18173 * 5 while S is not empty
18174 * 6 t <- S.peek()
18175 * 7 if t is what we're looking for:
18176 * 8 return t
18177 * 9 for all edges e in G.adjacentEdges(t) do
18178 * 10 if edge e is already labelled
18179 * 11 continue with the next edge
18180 * 12 w <- G.adjacentVertex(t,e)
18181 * 13 if vertex w is not discovered and not explored
18182 * 14 label e as tree-edge
18183 * 15 label w as discovered
18184 * 16 S.push(w)
18185 * 17 continue at 5
18186 * 18 else if vertex w is discovered
18187 * 19 label e as back-edge
18188 * 20 else
18189 * 21 // vertex w is explored
18190 * 22 label e as forward- or cross-edge
18191 * 23 label t as explored
18192 * 24 S.pop()
18193 *
18194 * convention:
18195 * 0x10 - discovered
18196 * 0x11 - discovered and fall-through edge labelled
18197 * 0x12 - discovered and fall-through and branch edges labelled
18198 * 0x20 - explored
18199 */
18200
18201 enum {
18202 DISCOVERED = 0x10,
18203 EXPLORED = 0x20,
18204 FALLTHROUGH = 1,
18205 BRANCH = 2,
18206 };
18207
mark_prune_point(struct bpf_verifier_env * env,int idx)18208 static void mark_prune_point(struct bpf_verifier_env *env, int idx)
18209 {
18210 env->insn_aux_data[idx].prune_point = true;
18211 }
18212
is_prune_point(struct bpf_verifier_env * env,int insn_idx)18213 static bool is_prune_point(struct bpf_verifier_env *env, int insn_idx)
18214 {
18215 return env->insn_aux_data[insn_idx].prune_point;
18216 }
18217
mark_force_checkpoint(struct bpf_verifier_env * env,int idx)18218 static void mark_force_checkpoint(struct bpf_verifier_env *env, int idx)
18219 {
18220 env->insn_aux_data[idx].force_checkpoint = true;
18221 }
18222
is_force_checkpoint(struct bpf_verifier_env * env,int insn_idx)18223 static bool is_force_checkpoint(struct bpf_verifier_env *env, int insn_idx)
18224 {
18225 return env->insn_aux_data[insn_idx].force_checkpoint;
18226 }
18227
mark_calls_callback(struct bpf_verifier_env * env,int idx)18228 static void mark_calls_callback(struct bpf_verifier_env *env, int idx)
18229 {
18230 env->insn_aux_data[idx].calls_callback = true;
18231 }
18232
bpf_calls_callback(struct bpf_verifier_env * env,int insn_idx)18233 bool bpf_calls_callback(struct bpf_verifier_env *env, int insn_idx)
18234 {
18235 return env->insn_aux_data[insn_idx].calls_callback;
18236 }
18237
18238 enum {
18239 DONE_EXPLORING = 0,
18240 KEEP_EXPLORING = 1,
18241 };
18242
18243 /* t, w, e - match pseudo-code above:
18244 * t - index of current instruction
18245 * w - next instruction
18246 * e - edge
18247 */
push_insn(int t,int w,int e,struct bpf_verifier_env * env)18248 static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
18249 {
18250 int *insn_stack = env->cfg.insn_stack;
18251 int *insn_state = env->cfg.insn_state;
18252
18253 if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
18254 return DONE_EXPLORING;
18255
18256 if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
18257 return DONE_EXPLORING;
18258
18259 if (w < 0 || w >= env->prog->len) {
18260 verbose_linfo(env, t, "%d: ", t);
18261 verbose(env, "jump out of range from insn %d to %d\n", t, w);
18262 return -EINVAL;
18263 }
18264
18265 if (e == BRANCH) {
18266 /* mark branch target for state pruning */
18267 mark_prune_point(env, w);
18268 mark_jmp_point(env, w);
18269 }
18270
18271 if (insn_state[w] == 0) {
18272 /* tree-edge */
18273 insn_state[t] = DISCOVERED | e;
18274 insn_state[w] = DISCOVERED;
18275 if (env->cfg.cur_stack >= env->prog->len)
18276 return -E2BIG;
18277 insn_stack[env->cfg.cur_stack++] = w;
18278 return KEEP_EXPLORING;
18279 } else if ((insn_state[w] & 0xF0) == DISCOVERED) {
18280 if (env->bpf_capable)
18281 return DONE_EXPLORING;
18282 verbose_linfo(env, t, "%d: ", t);
18283 verbose_linfo(env, w, "%d: ", w);
18284 verbose(env, "back-edge from insn %d to %d\n", t, w);
18285 return -EINVAL;
18286 } else if (insn_state[w] == EXPLORED) {
18287 /* forward- or cross-edge */
18288 insn_state[t] = DISCOVERED | e;
18289 } else {
18290 verifier_bug(env, "insn state internal bug");
18291 return -EFAULT;
18292 }
18293 return DONE_EXPLORING;
18294 }
18295
visit_func_call_insn(int t,struct bpf_insn * insns,struct bpf_verifier_env * env,bool visit_callee)18296 static int visit_func_call_insn(int t, struct bpf_insn *insns,
18297 struct bpf_verifier_env *env,
18298 bool visit_callee)
18299 {
18300 int ret, insn_sz;
18301 int w;
18302
18303 insn_sz = bpf_is_ldimm64(&insns[t]) ? 2 : 1;
18304 ret = push_insn(t, t + insn_sz, FALLTHROUGH, env);
18305 if (ret)
18306 return ret;
18307
18308 mark_prune_point(env, t + insn_sz);
18309 /* when we exit from subprog, we need to record non-linear history */
18310 mark_jmp_point(env, t + insn_sz);
18311
18312 if (visit_callee) {
18313 w = t + insns[t].imm + 1;
18314 mark_prune_point(env, t);
18315 merge_callee_effects(env, t, w);
18316 ret = push_insn(t, w, BRANCH, env);
18317 }
18318 return ret;
18319 }
18320
18321 /* Bitmask with 1s for all caller saved registers */
18322 #define ALL_CALLER_SAVED_REGS ((1u << CALLER_SAVED_REGS) - 1)
18323
18324 /* True if do_misc_fixups() replaces calls to helper number 'imm',
18325 * replacement patch is presumed to follow bpf_fastcall contract
18326 * (see mark_fastcall_pattern_for_call() below).
18327 */
verifier_inlines_helper_call(struct bpf_verifier_env * env,s32 imm)18328 static bool verifier_inlines_helper_call(struct bpf_verifier_env *env, s32 imm)
18329 {
18330 switch (imm) {
18331 #ifdef CONFIG_X86_64
18332 case BPF_FUNC_get_smp_processor_id:
18333 #ifdef CONFIG_SMP
18334 case BPF_FUNC_get_current_task_btf:
18335 case BPF_FUNC_get_current_task:
18336 #endif
18337 return env->prog->jit_requested && bpf_jit_supports_percpu_insn();
18338 #endif
18339 default:
18340 return false;
18341 }
18342 }
18343
18344 struct call_summary {
18345 u8 num_params;
18346 bool is_void;
18347 bool fastcall;
18348 };
18349
18350 /* If @call is a kfunc or helper call, fills @cs and returns true,
18351 * otherwise returns false.
18352 */
get_call_summary(struct bpf_verifier_env * env,struct bpf_insn * call,struct call_summary * cs)18353 static bool get_call_summary(struct bpf_verifier_env *env, struct bpf_insn *call,
18354 struct call_summary *cs)
18355 {
18356 struct bpf_kfunc_call_arg_meta meta;
18357 const struct bpf_func_proto *fn;
18358 int i;
18359
18360 if (bpf_helper_call(call)) {
18361
18362 if (get_helper_proto(env, call->imm, &fn) < 0)
18363 /* error would be reported later */
18364 return false;
18365 cs->fastcall = fn->allow_fastcall &&
18366 (verifier_inlines_helper_call(env, call->imm) ||
18367 bpf_jit_inlines_helper_call(call->imm));
18368 cs->is_void = fn->ret_type == RET_VOID;
18369 cs->num_params = 0;
18370 for (i = 0; i < ARRAY_SIZE(fn->arg_type); ++i) {
18371 if (fn->arg_type[i] == ARG_DONTCARE)
18372 break;
18373 cs->num_params++;
18374 }
18375 return true;
18376 }
18377
18378 if (bpf_pseudo_kfunc_call(call)) {
18379 int err;
18380
18381 err = fetch_kfunc_arg_meta(env, call->imm, call->off, &meta);
18382 if (err < 0)
18383 /* error would be reported later */
18384 return false;
18385 cs->num_params = btf_type_vlen(meta.func_proto);
18386 cs->fastcall = meta.kfunc_flags & KF_FASTCALL;
18387 cs->is_void = btf_type_is_void(btf_type_by_id(meta.btf, meta.func_proto->type));
18388 return true;
18389 }
18390
18391 return false;
18392 }
18393
18394 /* LLVM define a bpf_fastcall function attribute.
18395 * This attribute means that function scratches only some of
18396 * the caller saved registers defined by ABI.
18397 * For BPF the set of such registers could be defined as follows:
18398 * - R0 is scratched only if function is non-void;
18399 * - R1-R5 are scratched only if corresponding parameter type is defined
18400 * in the function prototype.
18401 *
18402 * The contract between kernel and clang allows to simultaneously use
18403 * such functions and maintain backwards compatibility with old
18404 * kernels that don't understand bpf_fastcall calls:
18405 *
18406 * - for bpf_fastcall calls clang allocates registers as-if relevant r0-r5
18407 * registers are not scratched by the call;
18408 *
18409 * - as a post-processing step, clang visits each bpf_fastcall call and adds
18410 * spill/fill for every live r0-r5;
18411 *
18412 * - stack offsets used for the spill/fill are allocated as lowest
18413 * stack offsets in whole function and are not used for any other
18414 * purposes;
18415 *
18416 * - when kernel loads a program, it looks for such patterns
18417 * (bpf_fastcall function surrounded by spills/fills) and checks if
18418 * spill/fill stack offsets are used exclusively in fastcall patterns;
18419 *
18420 * - if so, and if verifier or current JIT inlines the call to the
18421 * bpf_fastcall function (e.g. a helper call), kernel removes unnecessary
18422 * spill/fill pairs;
18423 *
18424 * - when old kernel loads a program, presence of spill/fill pairs
18425 * keeps BPF program valid, albeit slightly less efficient.
18426 *
18427 * For example:
18428 *
18429 * r1 = 1;
18430 * r2 = 2;
18431 * *(u64 *)(r10 - 8) = r1; r1 = 1;
18432 * *(u64 *)(r10 - 16) = r2; r2 = 2;
18433 * call %[to_be_inlined] --> call %[to_be_inlined]
18434 * r2 = *(u64 *)(r10 - 16); r0 = r1;
18435 * r1 = *(u64 *)(r10 - 8); r0 += r2;
18436 * r0 = r1; exit;
18437 * r0 += r2;
18438 * exit;
18439 *
18440 * The purpose of mark_fastcall_pattern_for_call is to:
18441 * - look for such patterns;
18442 * - mark spill and fill instructions in env->insn_aux_data[*].fastcall_pattern;
18443 * - mark set env->insn_aux_data[*].fastcall_spills_num for call instruction;
18444 * - update env->subprog_info[*]->fastcall_stack_off to find an offset
18445 * at which bpf_fastcall spill/fill stack slots start;
18446 * - update env->subprog_info[*]->keep_fastcall_stack.
18447 *
18448 * The .fastcall_pattern and .fastcall_stack_off are used by
18449 * check_fastcall_stack_contract() to check if every stack access to
18450 * fastcall spill/fill stack slot originates from spill/fill
18451 * instructions, members of fastcall patterns.
18452 *
18453 * If such condition holds true for a subprogram, fastcall patterns could
18454 * be rewritten by remove_fastcall_spills_fills().
18455 * Otherwise bpf_fastcall patterns are not changed in the subprogram
18456 * (code, presumably, generated by an older clang version).
18457 *
18458 * For example, it is *not* safe to remove spill/fill below:
18459 *
18460 * r1 = 1;
18461 * *(u64 *)(r10 - 8) = r1; r1 = 1;
18462 * call %[to_be_inlined] --> call %[to_be_inlined]
18463 * r1 = *(u64 *)(r10 - 8); r0 = *(u64 *)(r10 - 8); <---- wrong !!!
18464 * r0 = *(u64 *)(r10 - 8); r0 += r1;
18465 * r0 += r1; exit;
18466 * exit;
18467 */
mark_fastcall_pattern_for_call(struct bpf_verifier_env * env,struct bpf_subprog_info * subprog,int insn_idx,s16 lowest_off)18468 static void mark_fastcall_pattern_for_call(struct bpf_verifier_env *env,
18469 struct bpf_subprog_info *subprog,
18470 int insn_idx, s16 lowest_off)
18471 {
18472 struct bpf_insn *insns = env->prog->insnsi, *stx, *ldx;
18473 struct bpf_insn *call = &env->prog->insnsi[insn_idx];
18474 u32 clobbered_regs_mask;
18475 struct call_summary cs;
18476 u32 expected_regs_mask;
18477 s16 off;
18478 int i;
18479
18480 if (!get_call_summary(env, call, &cs))
18481 return;
18482
18483 /* A bitmask specifying which caller saved registers are clobbered
18484 * by a call to a helper/kfunc *as if* this helper/kfunc follows
18485 * bpf_fastcall contract:
18486 * - includes R0 if function is non-void;
18487 * - includes R1-R5 if corresponding parameter has is described
18488 * in the function prototype.
18489 */
18490 clobbered_regs_mask = GENMASK(cs.num_params, cs.is_void ? 1 : 0);
18491 /* e.g. if helper call clobbers r{0,1}, expect r{2,3,4,5} in the pattern */
18492 expected_regs_mask = ~clobbered_regs_mask & ALL_CALLER_SAVED_REGS;
18493
18494 /* match pairs of form:
18495 *
18496 * *(u64 *)(r10 - Y) = rX (where Y % 8 == 0)
18497 * ...
18498 * call %[to_be_inlined]
18499 * ...
18500 * rX = *(u64 *)(r10 - Y)
18501 */
18502 for (i = 1, off = lowest_off; i <= ARRAY_SIZE(caller_saved); ++i, off += BPF_REG_SIZE) {
18503 if (insn_idx - i < 0 || insn_idx + i >= env->prog->len)
18504 break;
18505 stx = &insns[insn_idx - i];
18506 ldx = &insns[insn_idx + i];
18507 /* must be a stack spill/fill pair */
18508 if (stx->code != (BPF_STX | BPF_MEM | BPF_DW) ||
18509 ldx->code != (BPF_LDX | BPF_MEM | BPF_DW) ||
18510 stx->dst_reg != BPF_REG_10 ||
18511 ldx->src_reg != BPF_REG_10)
18512 break;
18513 /* must be a spill/fill for the same reg */
18514 if (stx->src_reg != ldx->dst_reg)
18515 break;
18516 /* must be one of the previously unseen registers */
18517 if ((BIT(stx->src_reg) & expected_regs_mask) == 0)
18518 break;
18519 /* must be a spill/fill for the same expected offset,
18520 * no need to check offset alignment, BPF_DW stack access
18521 * is always 8-byte aligned.
18522 */
18523 if (stx->off != off || ldx->off != off)
18524 break;
18525 expected_regs_mask &= ~BIT(stx->src_reg);
18526 env->insn_aux_data[insn_idx - i].fastcall_pattern = 1;
18527 env->insn_aux_data[insn_idx + i].fastcall_pattern = 1;
18528 }
18529 if (i == 1)
18530 return;
18531
18532 /* Conditionally set 'fastcall_spills_num' to allow forward
18533 * compatibility when more helper functions are marked as
18534 * bpf_fastcall at compile time than current kernel supports, e.g:
18535 *
18536 * 1: *(u64 *)(r10 - 8) = r1
18537 * 2: call A ;; assume A is bpf_fastcall for current kernel
18538 * 3: r1 = *(u64 *)(r10 - 8)
18539 * 4: *(u64 *)(r10 - 8) = r1
18540 * 5: call B ;; assume B is not bpf_fastcall for current kernel
18541 * 6: r1 = *(u64 *)(r10 - 8)
18542 *
18543 * There is no need to block bpf_fastcall rewrite for such program.
18544 * Set 'fastcall_pattern' for both calls to keep check_fastcall_stack_contract() happy,
18545 * don't set 'fastcall_spills_num' for call B so that remove_fastcall_spills_fills()
18546 * does not remove spill/fill pair {4,6}.
18547 */
18548 if (cs.fastcall)
18549 env->insn_aux_data[insn_idx].fastcall_spills_num = i - 1;
18550 else
18551 subprog->keep_fastcall_stack = 1;
18552 subprog->fastcall_stack_off = min(subprog->fastcall_stack_off, off);
18553 }
18554
mark_fastcall_patterns(struct bpf_verifier_env * env)18555 static int mark_fastcall_patterns(struct bpf_verifier_env *env)
18556 {
18557 struct bpf_subprog_info *subprog = env->subprog_info;
18558 struct bpf_insn *insn;
18559 s16 lowest_off;
18560 int s, i;
18561
18562 for (s = 0; s < env->subprog_cnt; ++s, ++subprog) {
18563 /* find lowest stack spill offset used in this subprog */
18564 lowest_off = 0;
18565 for (i = subprog->start; i < (subprog + 1)->start; ++i) {
18566 insn = env->prog->insnsi + i;
18567 if (insn->code != (BPF_STX | BPF_MEM | BPF_DW) ||
18568 insn->dst_reg != BPF_REG_10)
18569 continue;
18570 lowest_off = min(lowest_off, insn->off);
18571 }
18572 /* use this offset to find fastcall patterns */
18573 for (i = subprog->start; i < (subprog + 1)->start; ++i) {
18574 insn = env->prog->insnsi + i;
18575 if (insn->code != (BPF_JMP | BPF_CALL))
18576 continue;
18577 mark_fastcall_pattern_for_call(env, subprog, i, lowest_off);
18578 }
18579 }
18580 return 0;
18581 }
18582
iarray_realloc(struct bpf_iarray * old,size_t n_elem)18583 static struct bpf_iarray *iarray_realloc(struct bpf_iarray *old, size_t n_elem)
18584 {
18585 size_t new_size = sizeof(struct bpf_iarray) + n_elem * sizeof(old->items[0]);
18586 struct bpf_iarray *new;
18587
18588 new = kvrealloc(old, new_size, GFP_KERNEL_ACCOUNT);
18589 if (!new) {
18590 /* this is what callers always want, so simplify the call site */
18591 kvfree(old);
18592 return NULL;
18593 }
18594
18595 new->cnt = n_elem;
18596 return new;
18597 }
18598
copy_insn_array(struct bpf_map * map,u32 start,u32 end,u32 * items)18599 static int copy_insn_array(struct bpf_map *map, u32 start, u32 end, u32 *items)
18600 {
18601 struct bpf_insn_array_value *value;
18602 u32 i;
18603
18604 for (i = start; i <= end; i++) {
18605 value = map->ops->map_lookup_elem(map, &i);
18606 /*
18607 * map_lookup_elem of an array map will never return an error,
18608 * but not checking it makes some static analysers to worry
18609 */
18610 if (IS_ERR(value))
18611 return PTR_ERR(value);
18612 else if (!value)
18613 return -EINVAL;
18614 items[i - start] = value->xlated_off;
18615 }
18616 return 0;
18617 }
18618
cmp_ptr_to_u32(const void * a,const void * b)18619 static int cmp_ptr_to_u32(const void *a, const void *b)
18620 {
18621 return *(u32 *)a - *(u32 *)b;
18622 }
18623
sort_insn_array_uniq(u32 * items,int cnt)18624 static int sort_insn_array_uniq(u32 *items, int cnt)
18625 {
18626 int unique = 1;
18627 int i;
18628
18629 sort(items, cnt, sizeof(items[0]), cmp_ptr_to_u32, NULL);
18630
18631 for (i = 1; i < cnt; i++)
18632 if (items[i] != items[unique - 1])
18633 items[unique++] = items[i];
18634
18635 return unique;
18636 }
18637
18638 /*
18639 * sort_unique({map[start], ..., map[end]}) into off
18640 */
copy_insn_array_uniq(struct bpf_map * map,u32 start,u32 end,u32 * off)18641 static int copy_insn_array_uniq(struct bpf_map *map, u32 start, u32 end, u32 *off)
18642 {
18643 u32 n = end - start + 1;
18644 int err;
18645
18646 err = copy_insn_array(map, start, end, off);
18647 if (err)
18648 return err;
18649
18650 return sort_insn_array_uniq(off, n);
18651 }
18652
18653 /*
18654 * Copy all unique offsets from the map
18655 */
jt_from_map(struct bpf_map * map)18656 static struct bpf_iarray *jt_from_map(struct bpf_map *map)
18657 {
18658 struct bpf_iarray *jt;
18659 int err;
18660 int n;
18661
18662 jt = iarray_realloc(NULL, map->max_entries);
18663 if (!jt)
18664 return ERR_PTR(-ENOMEM);
18665
18666 n = copy_insn_array_uniq(map, 0, map->max_entries - 1, jt->items);
18667 if (n < 0) {
18668 err = n;
18669 goto err_free;
18670 }
18671 if (n == 0) {
18672 err = -EINVAL;
18673 goto err_free;
18674 }
18675 jt->cnt = n;
18676 return jt;
18677
18678 err_free:
18679 kvfree(jt);
18680 return ERR_PTR(err);
18681 }
18682
18683 /*
18684 * Find and collect all maps which fit in the subprog. Return the result as one
18685 * combined jump table in jt->items (allocated with kvcalloc)
18686 */
jt_from_subprog(struct bpf_verifier_env * env,int subprog_start,int subprog_end)18687 static struct bpf_iarray *jt_from_subprog(struct bpf_verifier_env *env,
18688 int subprog_start, int subprog_end)
18689 {
18690 struct bpf_iarray *jt = NULL;
18691 struct bpf_map *map;
18692 struct bpf_iarray *jt_cur;
18693 int i;
18694
18695 for (i = 0; i < env->insn_array_map_cnt; i++) {
18696 /*
18697 * TODO (when needed): collect only jump tables, not static keys
18698 * or maps for indirect calls
18699 */
18700 map = env->insn_array_maps[i];
18701
18702 jt_cur = jt_from_map(map);
18703 if (IS_ERR(jt_cur)) {
18704 kvfree(jt);
18705 return jt_cur;
18706 }
18707
18708 /*
18709 * This is enough to check one element. The full table is
18710 * checked to fit inside the subprog later in create_jt()
18711 */
18712 if (jt_cur->items[0] >= subprog_start && jt_cur->items[0] < subprog_end) {
18713 u32 old_cnt = jt ? jt->cnt : 0;
18714 jt = iarray_realloc(jt, old_cnt + jt_cur->cnt);
18715 if (!jt) {
18716 kvfree(jt_cur);
18717 return ERR_PTR(-ENOMEM);
18718 }
18719 memcpy(jt->items + old_cnt, jt_cur->items, jt_cur->cnt << 2);
18720 }
18721
18722 kvfree(jt_cur);
18723 }
18724
18725 if (!jt) {
18726 verbose(env, "no jump tables found for subprog starting at %u\n", subprog_start);
18727 return ERR_PTR(-EINVAL);
18728 }
18729
18730 jt->cnt = sort_insn_array_uniq(jt->items, jt->cnt);
18731 return jt;
18732 }
18733
18734 static struct bpf_iarray *
create_jt(int t,struct bpf_verifier_env * env)18735 create_jt(int t, struct bpf_verifier_env *env)
18736 {
18737 static struct bpf_subprog_info *subprog;
18738 int subprog_start, subprog_end;
18739 struct bpf_iarray *jt;
18740 int i;
18741
18742 subprog = bpf_find_containing_subprog(env, t);
18743 subprog_start = subprog->start;
18744 subprog_end = (subprog + 1)->start;
18745 jt = jt_from_subprog(env, subprog_start, subprog_end);
18746 if (IS_ERR(jt))
18747 return jt;
18748
18749 /* Check that the every element of the jump table fits within the given subprogram */
18750 for (i = 0; i < jt->cnt; i++) {
18751 if (jt->items[i] < subprog_start || jt->items[i] >= subprog_end) {
18752 verbose(env, "jump table for insn %d points outside of the subprog [%u,%u]\n",
18753 t, subprog_start, subprog_end);
18754 kvfree(jt);
18755 return ERR_PTR(-EINVAL);
18756 }
18757 }
18758
18759 return jt;
18760 }
18761
18762 /* "conditional jump with N edges" */
visit_gotox_insn(int t,struct bpf_verifier_env * env)18763 static int visit_gotox_insn(int t, struct bpf_verifier_env *env)
18764 {
18765 int *insn_stack = env->cfg.insn_stack;
18766 int *insn_state = env->cfg.insn_state;
18767 bool keep_exploring = false;
18768 struct bpf_iarray *jt;
18769 int i, w;
18770
18771 jt = env->insn_aux_data[t].jt;
18772 if (!jt) {
18773 jt = create_jt(t, env);
18774 if (IS_ERR(jt))
18775 return PTR_ERR(jt);
18776
18777 env->insn_aux_data[t].jt = jt;
18778 }
18779
18780 mark_prune_point(env, t);
18781 for (i = 0; i < jt->cnt; i++) {
18782 w = jt->items[i];
18783 if (w < 0 || w >= env->prog->len) {
18784 verbose(env, "indirect jump out of range from insn %d to %d\n", t, w);
18785 return -EINVAL;
18786 }
18787
18788 mark_jmp_point(env, w);
18789
18790 /* EXPLORED || DISCOVERED */
18791 if (insn_state[w])
18792 continue;
18793
18794 if (env->cfg.cur_stack >= env->prog->len)
18795 return -E2BIG;
18796
18797 insn_stack[env->cfg.cur_stack++] = w;
18798 insn_state[w] |= DISCOVERED;
18799 keep_exploring = true;
18800 }
18801
18802 return keep_exploring ? KEEP_EXPLORING : DONE_EXPLORING;
18803 }
18804
visit_tailcall_insn(struct bpf_verifier_env * env,int t)18805 static int visit_tailcall_insn(struct bpf_verifier_env *env, int t)
18806 {
18807 static struct bpf_subprog_info *subprog;
18808 struct bpf_iarray *jt;
18809
18810 if (env->insn_aux_data[t].jt)
18811 return 0;
18812
18813 jt = iarray_realloc(NULL, 2);
18814 if (!jt)
18815 return -ENOMEM;
18816
18817 subprog = bpf_find_containing_subprog(env, t);
18818 jt->items[0] = t + 1;
18819 jt->items[1] = subprog->exit_idx;
18820 env->insn_aux_data[t].jt = jt;
18821 return 0;
18822 }
18823
18824 /* Visits the instruction at index t and returns one of the following:
18825 * < 0 - an error occurred
18826 * DONE_EXPLORING - the instruction was fully explored
18827 * KEEP_EXPLORING - there is still work to be done before it is fully explored
18828 */
visit_insn(int t,struct bpf_verifier_env * env)18829 static int visit_insn(int t, struct bpf_verifier_env *env)
18830 {
18831 struct bpf_insn *insns = env->prog->insnsi, *insn = &insns[t];
18832 int ret, off, insn_sz;
18833
18834 if (bpf_pseudo_func(insn))
18835 return visit_func_call_insn(t, insns, env, true);
18836
18837 /* All non-branch instructions have a single fall-through edge. */
18838 if (BPF_CLASS(insn->code) != BPF_JMP &&
18839 BPF_CLASS(insn->code) != BPF_JMP32) {
18840 insn_sz = bpf_is_ldimm64(insn) ? 2 : 1;
18841 return push_insn(t, t + insn_sz, FALLTHROUGH, env);
18842 }
18843
18844 switch (BPF_OP(insn->code)) {
18845 case BPF_EXIT:
18846 return DONE_EXPLORING;
18847
18848 case BPF_CALL:
18849 if (is_async_callback_calling_insn(insn))
18850 /* Mark this call insn as a prune point to trigger
18851 * is_state_visited() check before call itself is
18852 * processed by __check_func_call(). Otherwise new
18853 * async state will be pushed for further exploration.
18854 */
18855 mark_prune_point(env, t);
18856 /* For functions that invoke callbacks it is not known how many times
18857 * callback would be called. Verifier models callback calling functions
18858 * by repeatedly visiting callback bodies and returning to origin call
18859 * instruction.
18860 * In order to stop such iteration verifier needs to identify when a
18861 * state identical some state from a previous iteration is reached.
18862 * Check below forces creation of checkpoint before callback calling
18863 * instruction to allow search for such identical states.
18864 */
18865 if (is_sync_callback_calling_insn(insn)) {
18866 mark_calls_callback(env, t);
18867 mark_force_checkpoint(env, t);
18868 mark_prune_point(env, t);
18869 mark_jmp_point(env, t);
18870 }
18871 if (bpf_helper_call(insn)) {
18872 const struct bpf_func_proto *fp;
18873
18874 ret = get_helper_proto(env, insn->imm, &fp);
18875 /* If called in a non-sleepable context program will be
18876 * rejected anyway, so we should end up with precise
18877 * sleepable marks on subprogs, except for dead code
18878 * elimination.
18879 */
18880 if (ret == 0 && fp->might_sleep)
18881 mark_subprog_might_sleep(env, t);
18882 if (bpf_helper_changes_pkt_data(insn->imm))
18883 mark_subprog_changes_pkt_data(env, t);
18884 if (insn->imm == BPF_FUNC_tail_call)
18885 visit_tailcall_insn(env, t);
18886 } else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
18887 struct bpf_kfunc_call_arg_meta meta;
18888
18889 ret = fetch_kfunc_arg_meta(env, insn->imm, insn->off, &meta);
18890 if (ret == 0 && is_iter_next_kfunc(&meta)) {
18891 mark_prune_point(env, t);
18892 /* Checking and saving state checkpoints at iter_next() call
18893 * is crucial for fast convergence of open-coded iterator loop
18894 * logic, so we need to force it. If we don't do that,
18895 * is_state_visited() might skip saving a checkpoint, causing
18896 * unnecessarily long sequence of not checkpointed
18897 * instructions and jumps, leading to exhaustion of jump
18898 * history buffer, and potentially other undesired outcomes.
18899 * It is expected that with correct open-coded iterators
18900 * convergence will happen quickly, so we don't run a risk of
18901 * exhausting memory.
18902 */
18903 mark_force_checkpoint(env, t);
18904 }
18905 /* Same as helpers, if called in a non-sleepable context
18906 * program will be rejected anyway, so we should end up
18907 * with precise sleepable marks on subprogs, except for
18908 * dead code elimination.
18909 */
18910 if (ret == 0 && is_kfunc_sleepable(&meta))
18911 mark_subprog_might_sleep(env, t);
18912 if (ret == 0 && is_kfunc_pkt_changing(&meta))
18913 mark_subprog_changes_pkt_data(env, t);
18914 }
18915 return visit_func_call_insn(t, insns, env, insn->src_reg == BPF_PSEUDO_CALL);
18916
18917 case BPF_JA:
18918 if (BPF_SRC(insn->code) == BPF_X)
18919 return visit_gotox_insn(t, env);
18920
18921 if (BPF_CLASS(insn->code) == BPF_JMP)
18922 off = insn->off;
18923 else
18924 off = insn->imm;
18925
18926 /* unconditional jump with single edge */
18927 ret = push_insn(t, t + off + 1, FALLTHROUGH, env);
18928 if (ret)
18929 return ret;
18930
18931 mark_prune_point(env, t + off + 1);
18932 mark_jmp_point(env, t + off + 1);
18933
18934 return ret;
18935
18936 default:
18937 /* conditional jump with two edges */
18938 mark_prune_point(env, t);
18939 if (is_may_goto_insn(insn))
18940 mark_force_checkpoint(env, t);
18941
18942 ret = push_insn(t, t + 1, FALLTHROUGH, env);
18943 if (ret)
18944 return ret;
18945
18946 return push_insn(t, t + insn->off + 1, BRANCH, env);
18947 }
18948 }
18949
18950 /* non-recursive depth-first-search to detect loops in BPF program
18951 * loop == back-edge in directed graph
18952 */
check_cfg(struct bpf_verifier_env * env)18953 static int check_cfg(struct bpf_verifier_env *env)
18954 {
18955 int insn_cnt = env->prog->len;
18956 int *insn_stack, *insn_state;
18957 int ex_insn_beg, i, ret = 0;
18958
18959 insn_state = env->cfg.insn_state = kvzalloc_objs(int, insn_cnt,
18960 GFP_KERNEL_ACCOUNT);
18961 if (!insn_state)
18962 return -ENOMEM;
18963
18964 insn_stack = env->cfg.insn_stack = kvzalloc_objs(int, insn_cnt,
18965 GFP_KERNEL_ACCOUNT);
18966 if (!insn_stack) {
18967 kvfree(insn_state);
18968 return -ENOMEM;
18969 }
18970
18971 ex_insn_beg = env->exception_callback_subprog
18972 ? env->subprog_info[env->exception_callback_subprog].start
18973 : 0;
18974
18975 insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
18976 insn_stack[0] = 0; /* 0 is the first instruction */
18977 env->cfg.cur_stack = 1;
18978
18979 walk_cfg:
18980 while (env->cfg.cur_stack > 0) {
18981 int t = insn_stack[env->cfg.cur_stack - 1];
18982
18983 ret = visit_insn(t, env);
18984 switch (ret) {
18985 case DONE_EXPLORING:
18986 insn_state[t] = EXPLORED;
18987 env->cfg.cur_stack--;
18988 break;
18989 case KEEP_EXPLORING:
18990 break;
18991 default:
18992 if (ret > 0) {
18993 verifier_bug(env, "visit_insn internal bug");
18994 ret = -EFAULT;
18995 }
18996 goto err_free;
18997 }
18998 }
18999
19000 if (env->cfg.cur_stack < 0) {
19001 verifier_bug(env, "pop stack internal bug");
19002 ret = -EFAULT;
19003 goto err_free;
19004 }
19005
19006 if (ex_insn_beg && insn_state[ex_insn_beg] != EXPLORED) {
19007 insn_state[ex_insn_beg] = DISCOVERED;
19008 insn_stack[0] = ex_insn_beg;
19009 env->cfg.cur_stack = 1;
19010 goto walk_cfg;
19011 }
19012
19013 for (i = 0; i < insn_cnt; i++) {
19014 struct bpf_insn *insn = &env->prog->insnsi[i];
19015
19016 if (insn_state[i] != EXPLORED) {
19017 verbose(env, "unreachable insn %d\n", i);
19018 ret = -EINVAL;
19019 goto err_free;
19020 }
19021 if (bpf_is_ldimm64(insn)) {
19022 if (insn_state[i + 1] != 0) {
19023 verbose(env, "jump into the middle of ldimm64 insn %d\n", i);
19024 ret = -EINVAL;
19025 goto err_free;
19026 }
19027 i++; /* skip second half of ldimm64 */
19028 }
19029 }
19030 ret = 0; /* cfg looks good */
19031 env->prog->aux->changes_pkt_data = env->subprog_info[0].changes_pkt_data;
19032 env->prog->aux->might_sleep = env->subprog_info[0].might_sleep;
19033
19034 err_free:
19035 kvfree(insn_state);
19036 kvfree(insn_stack);
19037 env->cfg.insn_state = env->cfg.insn_stack = NULL;
19038 return ret;
19039 }
19040
19041 /*
19042 * For each subprogram 'i' fill array env->cfg.insn_subprogram sub-range
19043 * [env->subprog_info[i].postorder_start, env->subprog_info[i+1].postorder_start)
19044 * with indices of 'i' instructions in postorder.
19045 */
compute_postorder(struct bpf_verifier_env * env)19046 static int compute_postorder(struct bpf_verifier_env *env)
19047 {
19048 u32 cur_postorder, i, top, stack_sz, s;
19049 int *stack = NULL, *postorder = NULL, *state = NULL;
19050 struct bpf_iarray *succ;
19051
19052 postorder = kvzalloc_objs(int, env->prog->len, GFP_KERNEL_ACCOUNT);
19053 state = kvzalloc_objs(int, env->prog->len, GFP_KERNEL_ACCOUNT);
19054 stack = kvzalloc_objs(int, env->prog->len, GFP_KERNEL_ACCOUNT);
19055 if (!postorder || !state || !stack) {
19056 kvfree(postorder);
19057 kvfree(state);
19058 kvfree(stack);
19059 return -ENOMEM;
19060 }
19061 cur_postorder = 0;
19062 for (i = 0; i < env->subprog_cnt; i++) {
19063 env->subprog_info[i].postorder_start = cur_postorder;
19064 stack[0] = env->subprog_info[i].start;
19065 stack_sz = 1;
19066 do {
19067 top = stack[stack_sz - 1];
19068 state[top] |= DISCOVERED;
19069 if (state[top] & EXPLORED) {
19070 postorder[cur_postorder++] = top;
19071 stack_sz--;
19072 continue;
19073 }
19074 succ = bpf_insn_successors(env, top);
19075 for (s = 0; s < succ->cnt; ++s) {
19076 if (!state[succ->items[s]]) {
19077 stack[stack_sz++] = succ->items[s];
19078 state[succ->items[s]] |= DISCOVERED;
19079 }
19080 }
19081 state[top] |= EXPLORED;
19082 } while (stack_sz);
19083 }
19084 env->subprog_info[i].postorder_start = cur_postorder;
19085 env->cfg.insn_postorder = postorder;
19086 env->cfg.cur_postorder = cur_postorder;
19087 kvfree(stack);
19088 kvfree(state);
19089 return 0;
19090 }
19091
check_abnormal_return(struct bpf_verifier_env * env)19092 static int check_abnormal_return(struct bpf_verifier_env *env)
19093 {
19094 int i;
19095
19096 for (i = 1; i < env->subprog_cnt; i++) {
19097 if (env->subprog_info[i].has_ld_abs) {
19098 verbose(env, "LD_ABS is not allowed in subprogs without BTF\n");
19099 return -EINVAL;
19100 }
19101 if (env->subprog_info[i].has_tail_call) {
19102 verbose(env, "tail_call is not allowed in subprogs without BTF\n");
19103 return -EINVAL;
19104 }
19105 }
19106 return 0;
19107 }
19108
19109 /* The minimum supported BTF func info size */
19110 #define MIN_BPF_FUNCINFO_SIZE 8
19111 #define MAX_FUNCINFO_REC_SIZE 252
19112
check_btf_func_early(struct bpf_verifier_env * env,const union bpf_attr * attr,bpfptr_t uattr)19113 static int check_btf_func_early(struct bpf_verifier_env *env,
19114 const union bpf_attr *attr,
19115 bpfptr_t uattr)
19116 {
19117 u32 krec_size = sizeof(struct bpf_func_info);
19118 const struct btf_type *type, *func_proto;
19119 u32 i, nfuncs, urec_size, min_size;
19120 struct bpf_func_info *krecord;
19121 struct bpf_prog *prog;
19122 const struct btf *btf;
19123 u32 prev_offset = 0;
19124 bpfptr_t urecord;
19125 int ret = -ENOMEM;
19126
19127 nfuncs = attr->func_info_cnt;
19128 if (!nfuncs) {
19129 if (check_abnormal_return(env))
19130 return -EINVAL;
19131 return 0;
19132 }
19133
19134 urec_size = attr->func_info_rec_size;
19135 if (urec_size < MIN_BPF_FUNCINFO_SIZE ||
19136 urec_size > MAX_FUNCINFO_REC_SIZE ||
19137 urec_size % sizeof(u32)) {
19138 verbose(env, "invalid func info rec size %u\n", urec_size);
19139 return -EINVAL;
19140 }
19141
19142 prog = env->prog;
19143 btf = prog->aux->btf;
19144
19145 urecord = make_bpfptr(attr->func_info, uattr.is_kernel);
19146 min_size = min_t(u32, krec_size, urec_size);
19147
19148 krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
19149 if (!krecord)
19150 return -ENOMEM;
19151
19152 for (i = 0; i < nfuncs; i++) {
19153 ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
19154 if (ret) {
19155 if (ret == -E2BIG) {
19156 verbose(env, "nonzero tailing record in func info");
19157 /* set the size kernel expects so loader can zero
19158 * out the rest of the record.
19159 */
19160 if (copy_to_bpfptr_offset(uattr,
19161 offsetof(union bpf_attr, func_info_rec_size),
19162 &min_size, sizeof(min_size)))
19163 ret = -EFAULT;
19164 }
19165 goto err_free;
19166 }
19167
19168 if (copy_from_bpfptr(&krecord[i], urecord, min_size)) {
19169 ret = -EFAULT;
19170 goto err_free;
19171 }
19172
19173 /* check insn_off */
19174 ret = -EINVAL;
19175 if (i == 0) {
19176 if (krecord[i].insn_off) {
19177 verbose(env,
19178 "nonzero insn_off %u for the first func info record",
19179 krecord[i].insn_off);
19180 goto err_free;
19181 }
19182 } else if (krecord[i].insn_off <= prev_offset) {
19183 verbose(env,
19184 "same or smaller insn offset (%u) than previous func info record (%u)",
19185 krecord[i].insn_off, prev_offset);
19186 goto err_free;
19187 }
19188
19189 /* check type_id */
19190 type = btf_type_by_id(btf, krecord[i].type_id);
19191 if (!type || !btf_type_is_func(type)) {
19192 verbose(env, "invalid type id %d in func info",
19193 krecord[i].type_id);
19194 goto err_free;
19195 }
19196
19197 func_proto = btf_type_by_id(btf, type->type);
19198 if (unlikely(!func_proto || !btf_type_is_func_proto(func_proto)))
19199 /* btf_func_check() already verified it during BTF load */
19200 goto err_free;
19201
19202 prev_offset = krecord[i].insn_off;
19203 bpfptr_add(&urecord, urec_size);
19204 }
19205
19206 prog->aux->func_info = krecord;
19207 prog->aux->func_info_cnt = nfuncs;
19208 return 0;
19209
19210 err_free:
19211 kvfree(krecord);
19212 return ret;
19213 }
19214
check_btf_func(struct bpf_verifier_env * env,const union bpf_attr * attr,bpfptr_t uattr)19215 static int check_btf_func(struct bpf_verifier_env *env,
19216 const union bpf_attr *attr,
19217 bpfptr_t uattr)
19218 {
19219 const struct btf_type *type, *func_proto, *ret_type;
19220 u32 i, nfuncs, urec_size;
19221 struct bpf_func_info *krecord;
19222 struct bpf_func_info_aux *info_aux = NULL;
19223 struct bpf_prog *prog;
19224 const struct btf *btf;
19225 bpfptr_t urecord;
19226 bool scalar_return;
19227 int ret = -ENOMEM;
19228
19229 nfuncs = attr->func_info_cnt;
19230 if (!nfuncs) {
19231 if (check_abnormal_return(env))
19232 return -EINVAL;
19233 return 0;
19234 }
19235 if (nfuncs != env->subprog_cnt) {
19236 verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
19237 return -EINVAL;
19238 }
19239
19240 urec_size = attr->func_info_rec_size;
19241
19242 prog = env->prog;
19243 btf = prog->aux->btf;
19244
19245 urecord = make_bpfptr(attr->func_info, uattr.is_kernel);
19246
19247 krecord = prog->aux->func_info;
19248 info_aux = kzalloc_objs(*info_aux, nfuncs,
19249 GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
19250 if (!info_aux)
19251 return -ENOMEM;
19252
19253 for (i = 0; i < nfuncs; i++) {
19254 /* check insn_off */
19255 ret = -EINVAL;
19256
19257 if (env->subprog_info[i].start != krecord[i].insn_off) {
19258 verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
19259 goto err_free;
19260 }
19261
19262 /* Already checked type_id */
19263 type = btf_type_by_id(btf, krecord[i].type_id);
19264 info_aux[i].linkage = BTF_INFO_VLEN(type->info);
19265 /* Already checked func_proto */
19266 func_proto = btf_type_by_id(btf, type->type);
19267
19268 ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
19269 scalar_return =
19270 btf_type_is_small_int(ret_type) || btf_is_any_enum(ret_type);
19271 if (i && !scalar_return && env->subprog_info[i].has_ld_abs) {
19272 verbose(env, "LD_ABS is only allowed in functions that return 'int'.\n");
19273 goto err_free;
19274 }
19275 if (i && !scalar_return && env->subprog_info[i].has_tail_call) {
19276 verbose(env, "tail_call is only allowed in functions that return 'int'.\n");
19277 goto err_free;
19278 }
19279
19280 bpfptr_add(&urecord, urec_size);
19281 }
19282
19283 prog->aux->func_info_aux = info_aux;
19284 return 0;
19285
19286 err_free:
19287 kfree(info_aux);
19288 return ret;
19289 }
19290
adjust_btf_func(struct bpf_verifier_env * env)19291 static void adjust_btf_func(struct bpf_verifier_env *env)
19292 {
19293 struct bpf_prog_aux *aux = env->prog->aux;
19294 int i;
19295
19296 if (!aux->func_info)
19297 return;
19298
19299 /* func_info is not available for hidden subprogs */
19300 for (i = 0; i < env->subprog_cnt - env->hidden_subprog_cnt; i++)
19301 aux->func_info[i].insn_off = env->subprog_info[i].start;
19302 }
19303
19304 #define MIN_BPF_LINEINFO_SIZE offsetofend(struct bpf_line_info, line_col)
19305 #define MAX_LINEINFO_REC_SIZE MAX_FUNCINFO_REC_SIZE
19306
check_btf_line(struct bpf_verifier_env * env,const union bpf_attr * attr,bpfptr_t uattr)19307 static int check_btf_line(struct bpf_verifier_env *env,
19308 const union bpf_attr *attr,
19309 bpfptr_t uattr)
19310 {
19311 u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0;
19312 struct bpf_subprog_info *sub;
19313 struct bpf_line_info *linfo;
19314 struct bpf_prog *prog;
19315 const struct btf *btf;
19316 bpfptr_t ulinfo;
19317 int err;
19318
19319 nr_linfo = attr->line_info_cnt;
19320 if (!nr_linfo)
19321 return 0;
19322 if (nr_linfo > INT_MAX / sizeof(struct bpf_line_info))
19323 return -EINVAL;
19324
19325 rec_size = attr->line_info_rec_size;
19326 if (rec_size < MIN_BPF_LINEINFO_SIZE ||
19327 rec_size > MAX_LINEINFO_REC_SIZE ||
19328 rec_size & (sizeof(u32) - 1))
19329 return -EINVAL;
19330
19331 /* Need to zero it in case the userspace may
19332 * pass in a smaller bpf_line_info object.
19333 */
19334 linfo = kvzalloc_objs(struct bpf_line_info, nr_linfo,
19335 GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
19336 if (!linfo)
19337 return -ENOMEM;
19338
19339 prog = env->prog;
19340 btf = prog->aux->btf;
19341
19342 s = 0;
19343 sub = env->subprog_info;
19344 ulinfo = make_bpfptr(attr->line_info, uattr.is_kernel);
19345 expected_size = sizeof(struct bpf_line_info);
19346 ncopy = min_t(u32, expected_size, rec_size);
19347 for (i = 0; i < nr_linfo; i++) {
19348 err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size);
19349 if (err) {
19350 if (err == -E2BIG) {
19351 verbose(env, "nonzero tailing record in line_info");
19352 if (copy_to_bpfptr_offset(uattr,
19353 offsetof(union bpf_attr, line_info_rec_size),
19354 &expected_size, sizeof(expected_size)))
19355 err = -EFAULT;
19356 }
19357 goto err_free;
19358 }
19359
19360 if (copy_from_bpfptr(&linfo[i], ulinfo, ncopy)) {
19361 err = -EFAULT;
19362 goto err_free;
19363 }
19364
19365 /*
19366 * Check insn_off to ensure
19367 * 1) strictly increasing AND
19368 * 2) bounded by prog->len
19369 *
19370 * The linfo[0].insn_off == 0 check logically falls into
19371 * the later "missing bpf_line_info for func..." case
19372 * because the first linfo[0].insn_off must be the
19373 * first sub also and the first sub must have
19374 * subprog_info[0].start == 0.
19375 */
19376 if ((i && linfo[i].insn_off <= prev_offset) ||
19377 linfo[i].insn_off >= prog->len) {
19378 verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n",
19379 i, linfo[i].insn_off, prev_offset,
19380 prog->len);
19381 err = -EINVAL;
19382 goto err_free;
19383 }
19384
19385 if (!prog->insnsi[linfo[i].insn_off].code) {
19386 verbose(env,
19387 "Invalid insn code at line_info[%u].insn_off\n",
19388 i);
19389 err = -EINVAL;
19390 goto err_free;
19391 }
19392
19393 if (!btf_name_by_offset(btf, linfo[i].line_off) ||
19394 !btf_name_by_offset(btf, linfo[i].file_name_off)) {
19395 verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i);
19396 err = -EINVAL;
19397 goto err_free;
19398 }
19399
19400 if (s != env->subprog_cnt) {
19401 if (linfo[i].insn_off == sub[s].start) {
19402 sub[s].linfo_idx = i;
19403 s++;
19404 } else if (sub[s].start < linfo[i].insn_off) {
19405 verbose(env, "missing bpf_line_info for func#%u\n", s);
19406 err = -EINVAL;
19407 goto err_free;
19408 }
19409 }
19410
19411 prev_offset = linfo[i].insn_off;
19412 bpfptr_add(&ulinfo, rec_size);
19413 }
19414
19415 if (s != env->subprog_cnt) {
19416 verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n",
19417 env->subprog_cnt - s, s);
19418 err = -EINVAL;
19419 goto err_free;
19420 }
19421
19422 prog->aux->linfo = linfo;
19423 prog->aux->nr_linfo = nr_linfo;
19424
19425 return 0;
19426
19427 err_free:
19428 kvfree(linfo);
19429 return err;
19430 }
19431
19432 #define MIN_CORE_RELO_SIZE sizeof(struct bpf_core_relo)
19433 #define MAX_CORE_RELO_SIZE MAX_FUNCINFO_REC_SIZE
19434
check_core_relo(struct bpf_verifier_env * env,const union bpf_attr * attr,bpfptr_t uattr)19435 static int check_core_relo(struct bpf_verifier_env *env,
19436 const union bpf_attr *attr,
19437 bpfptr_t uattr)
19438 {
19439 u32 i, nr_core_relo, ncopy, expected_size, rec_size;
19440 struct bpf_core_relo core_relo = {};
19441 struct bpf_prog *prog = env->prog;
19442 const struct btf *btf = prog->aux->btf;
19443 struct bpf_core_ctx ctx = {
19444 .log = &env->log,
19445 .btf = btf,
19446 };
19447 bpfptr_t u_core_relo;
19448 int err;
19449
19450 nr_core_relo = attr->core_relo_cnt;
19451 if (!nr_core_relo)
19452 return 0;
19453 if (nr_core_relo > INT_MAX / sizeof(struct bpf_core_relo))
19454 return -EINVAL;
19455
19456 rec_size = attr->core_relo_rec_size;
19457 if (rec_size < MIN_CORE_RELO_SIZE ||
19458 rec_size > MAX_CORE_RELO_SIZE ||
19459 rec_size % sizeof(u32))
19460 return -EINVAL;
19461
19462 u_core_relo = make_bpfptr(attr->core_relos, uattr.is_kernel);
19463 expected_size = sizeof(struct bpf_core_relo);
19464 ncopy = min_t(u32, expected_size, rec_size);
19465
19466 /* Unlike func_info and line_info, copy and apply each CO-RE
19467 * relocation record one at a time.
19468 */
19469 for (i = 0; i < nr_core_relo; i++) {
19470 /* future proofing when sizeof(bpf_core_relo) changes */
19471 err = bpf_check_uarg_tail_zero(u_core_relo, expected_size, rec_size);
19472 if (err) {
19473 if (err == -E2BIG) {
19474 verbose(env, "nonzero tailing record in core_relo");
19475 if (copy_to_bpfptr_offset(uattr,
19476 offsetof(union bpf_attr, core_relo_rec_size),
19477 &expected_size, sizeof(expected_size)))
19478 err = -EFAULT;
19479 }
19480 break;
19481 }
19482
19483 if (copy_from_bpfptr(&core_relo, u_core_relo, ncopy)) {
19484 err = -EFAULT;
19485 break;
19486 }
19487
19488 if (core_relo.insn_off % 8 || core_relo.insn_off / 8 >= prog->len) {
19489 verbose(env, "Invalid core_relo[%u].insn_off:%u prog->len:%u\n",
19490 i, core_relo.insn_off, prog->len);
19491 err = -EINVAL;
19492 break;
19493 }
19494
19495 err = bpf_core_apply(&ctx, &core_relo, i,
19496 &prog->insnsi[core_relo.insn_off / 8]);
19497 if (err)
19498 break;
19499 bpfptr_add(&u_core_relo, rec_size);
19500 }
19501 return err;
19502 }
19503
check_btf_info_early(struct bpf_verifier_env * env,const union bpf_attr * attr,bpfptr_t uattr)19504 static int check_btf_info_early(struct bpf_verifier_env *env,
19505 const union bpf_attr *attr,
19506 bpfptr_t uattr)
19507 {
19508 struct btf *btf;
19509 int err;
19510
19511 if (!attr->func_info_cnt && !attr->line_info_cnt) {
19512 if (check_abnormal_return(env))
19513 return -EINVAL;
19514 return 0;
19515 }
19516
19517 btf = btf_get_by_fd(attr->prog_btf_fd);
19518 if (IS_ERR(btf))
19519 return PTR_ERR(btf);
19520 if (btf_is_kernel(btf)) {
19521 btf_put(btf);
19522 return -EACCES;
19523 }
19524 env->prog->aux->btf = btf;
19525
19526 err = check_btf_func_early(env, attr, uattr);
19527 if (err)
19528 return err;
19529 return 0;
19530 }
19531
check_btf_info(struct bpf_verifier_env * env,const union bpf_attr * attr,bpfptr_t uattr)19532 static int check_btf_info(struct bpf_verifier_env *env,
19533 const union bpf_attr *attr,
19534 bpfptr_t uattr)
19535 {
19536 int err;
19537
19538 if (!attr->func_info_cnt && !attr->line_info_cnt) {
19539 if (check_abnormal_return(env))
19540 return -EINVAL;
19541 return 0;
19542 }
19543
19544 err = check_btf_func(env, attr, uattr);
19545 if (err)
19546 return err;
19547
19548 err = check_btf_line(env, attr, uattr);
19549 if (err)
19550 return err;
19551
19552 err = check_core_relo(env, attr, uattr);
19553 if (err)
19554 return err;
19555
19556 return 0;
19557 }
19558
19559 /* check %cur's range satisfies %old's */
range_within(const struct bpf_reg_state * old,const struct bpf_reg_state * cur)19560 static bool range_within(const struct bpf_reg_state *old,
19561 const struct bpf_reg_state *cur)
19562 {
19563 return old->umin_value <= cur->umin_value &&
19564 old->umax_value >= cur->umax_value &&
19565 old->smin_value <= cur->smin_value &&
19566 old->smax_value >= cur->smax_value &&
19567 old->u32_min_value <= cur->u32_min_value &&
19568 old->u32_max_value >= cur->u32_max_value &&
19569 old->s32_min_value <= cur->s32_min_value &&
19570 old->s32_max_value >= cur->s32_max_value;
19571 }
19572
19573 /* If in the old state two registers had the same id, then they need to have
19574 * the same id in the new state as well. But that id could be different from
19575 * the old state, so we need to track the mapping from old to new ids.
19576 * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
19577 * regs with old id 5 must also have new id 9 for the new state to be safe. But
19578 * regs with a different old id could still have new id 9, we don't care about
19579 * that.
19580 * So we look through our idmap to see if this old id has been seen before. If
19581 * so, we require the new id to match; otherwise, we add the id pair to the map.
19582 */
check_ids(u32 old_id,u32 cur_id,struct bpf_idmap * idmap)19583 static bool check_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
19584 {
19585 struct bpf_id_pair *map = idmap->map;
19586 unsigned int i;
19587
19588 /* either both IDs should be set or both should be zero */
19589 if (!!old_id != !!cur_id)
19590 return false;
19591
19592 if (old_id == 0) /* cur_id == 0 as well */
19593 return true;
19594
19595 for (i = 0; i < idmap->cnt; i++) {
19596 if (map[i].old == old_id)
19597 return map[i].cur == cur_id;
19598 if (map[i].cur == cur_id)
19599 return false;
19600 }
19601
19602 /* Reached the end of known mappings; haven't seen this id before */
19603 if (idmap->cnt < BPF_ID_MAP_SIZE) {
19604 map[idmap->cnt].old = old_id;
19605 map[idmap->cnt].cur = cur_id;
19606 idmap->cnt++;
19607 return true;
19608 }
19609
19610 /* We ran out of idmap slots, which should be impossible */
19611 WARN_ON_ONCE(1);
19612 return false;
19613 }
19614
19615 /*
19616 * Compare scalar register IDs for state equivalence.
19617 *
19618 * When old_id == 0, the old register is independent - not linked to any
19619 * other register. Any linking in the current state only adds constraints,
19620 * making it more restrictive. Since the old state didn't rely on any ID
19621 * relationships for this register, it's always safe to accept cur regardless
19622 * of its ID. Hence, return true immediately.
19623 *
19624 * When old_id != 0 but cur_id == 0, we need to ensure that different
19625 * independent registers in cur don't incorrectly satisfy the ID matching
19626 * requirements of linked registers in old.
19627 *
19628 * Example: if old has r6.id=X and r7.id=X (linked), but cur has r6.id=0
19629 * and r7.id=0 (both independent), without temp IDs both would map old_id=X
19630 * to cur_id=0 and pass. With temp IDs: r6 maps X->temp1, r7 tries to map
19631 * X->temp2, but X is already mapped to temp1, so the check fails correctly.
19632 */
check_scalar_ids(u32 old_id,u32 cur_id,struct bpf_idmap * idmap)19633 static bool check_scalar_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
19634 {
19635 if (!old_id)
19636 return true;
19637
19638 cur_id = cur_id ? cur_id : ++idmap->tmp_id_gen;
19639
19640 return check_ids(old_id, cur_id, idmap);
19641 }
19642
clean_func_state(struct bpf_verifier_env * env,struct bpf_func_state * st,u32 ip)19643 static void clean_func_state(struct bpf_verifier_env *env,
19644 struct bpf_func_state *st,
19645 u32 ip)
19646 {
19647 u16 live_regs = env->insn_aux_data[ip].live_regs_before;
19648 int i, j;
19649
19650 for (i = 0; i < BPF_REG_FP; i++) {
19651 /* liveness must not touch this register anymore */
19652 if (!(live_regs & BIT(i)))
19653 /* since the register is unused, clear its state
19654 * to make further comparison simpler
19655 */
19656 __mark_reg_not_init(env, &st->regs[i]);
19657 }
19658
19659 for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
19660 if (!bpf_stack_slot_alive(env, st->frameno, i)) {
19661 __mark_reg_not_init(env, &st->stack[i].spilled_ptr);
19662 for (j = 0; j < BPF_REG_SIZE; j++)
19663 st->stack[i].slot_type[j] = STACK_INVALID;
19664 }
19665 }
19666 }
19667
clean_verifier_state(struct bpf_verifier_env * env,struct bpf_verifier_state * st)19668 static void clean_verifier_state(struct bpf_verifier_env *env,
19669 struct bpf_verifier_state *st)
19670 {
19671 int i, ip;
19672
19673 bpf_live_stack_query_init(env, st);
19674 st->cleaned = true;
19675 for (i = 0; i <= st->curframe; i++) {
19676 ip = frame_insn_idx(st, i);
19677 clean_func_state(env, st->frame[i], ip);
19678 }
19679 }
19680
19681 /* the parentage chains form a tree.
19682 * the verifier states are added to state lists at given insn and
19683 * pushed into state stack for future exploration.
19684 * when the verifier reaches bpf_exit insn some of the verifier states
19685 * stored in the state lists have their final liveness state already,
19686 * but a lot of states will get revised from liveness point of view when
19687 * the verifier explores other branches.
19688 * Example:
19689 * 1: *(u64)(r10 - 8) = 1
19690 * 2: if r1 == 100 goto pc+1
19691 * 3: *(u64)(r10 - 8) = 2
19692 * 4: r0 = *(u64)(r10 - 8)
19693 * 5: exit
19694 * when the verifier reaches exit insn the stack slot -8 in the state list of
19695 * insn 2 is not yet marked alive. Then the verifier pops the other_branch
19696 * of insn 2 and goes exploring further. After the insn 4 read, liveness
19697 * analysis would propagate read mark for -8 at insn 2.
19698 *
19699 * Since the verifier pushes the branch states as it sees them while exploring
19700 * the program the condition of walking the branch instruction for the second
19701 * time means that all states below this branch were already explored and
19702 * their final liveness marks are already propagated.
19703 * Hence when the verifier completes the search of state list in is_state_visited()
19704 * we can call this clean_live_states() function to clear dead the registers and stack
19705 * slots to simplify state merging.
19706 *
19707 * Important note here that walking the same branch instruction in the callee
19708 * doesn't meant that the states are DONE. The verifier has to compare
19709 * the callsites
19710 */
19711
19712 /* Find id in idset and increment its count, or add new entry */
idset_cnt_inc(struct bpf_idset * idset,u32 id)19713 static void idset_cnt_inc(struct bpf_idset *idset, u32 id)
19714 {
19715 u32 i;
19716
19717 for (i = 0; i < idset->num_ids; i++) {
19718 if (idset->entries[i].id == id) {
19719 idset->entries[i].cnt++;
19720 return;
19721 }
19722 }
19723 /* New id */
19724 if (idset->num_ids < BPF_ID_MAP_SIZE) {
19725 idset->entries[idset->num_ids].id = id;
19726 idset->entries[idset->num_ids].cnt = 1;
19727 idset->num_ids++;
19728 }
19729 }
19730
19731 /* Find id in idset and return its count, or 0 if not found */
idset_cnt_get(struct bpf_idset * idset,u32 id)19732 static u32 idset_cnt_get(struct bpf_idset *idset, u32 id)
19733 {
19734 u32 i;
19735
19736 for (i = 0; i < idset->num_ids; i++) {
19737 if (idset->entries[i].id == id)
19738 return idset->entries[i].cnt;
19739 }
19740 return 0;
19741 }
19742
19743 /*
19744 * Clear singular scalar ids in a state.
19745 * A register with a non-zero id is called singular if no other register shares
19746 * the same base id. Such registers can be treated as independent (id=0).
19747 */
clear_singular_ids(struct bpf_verifier_env * env,struct bpf_verifier_state * st)19748 static void clear_singular_ids(struct bpf_verifier_env *env,
19749 struct bpf_verifier_state *st)
19750 {
19751 struct bpf_idset *idset = &env->idset_scratch;
19752 struct bpf_func_state *func;
19753 struct bpf_reg_state *reg;
19754
19755 idset->num_ids = 0;
19756
19757 bpf_for_each_reg_in_vstate(st, func, reg, ({
19758 if (reg->type != SCALAR_VALUE)
19759 continue;
19760 if (!reg->id)
19761 continue;
19762 idset_cnt_inc(idset, reg->id & ~BPF_ADD_CONST);
19763 }));
19764
19765 bpf_for_each_reg_in_vstate(st, func, reg, ({
19766 if (reg->type != SCALAR_VALUE)
19767 continue;
19768 if (!reg->id)
19769 continue;
19770 if (idset_cnt_get(idset, reg->id & ~BPF_ADD_CONST) == 1) {
19771 reg->id = 0;
19772 reg->off = 0;
19773 }
19774 }));
19775 }
19776
clean_live_states(struct bpf_verifier_env * env,int insn,struct bpf_verifier_state * cur)19777 static void clean_live_states(struct bpf_verifier_env *env, int insn,
19778 struct bpf_verifier_state *cur)
19779 {
19780 struct bpf_verifier_state_list *sl;
19781 struct list_head *pos, *head;
19782
19783 head = explored_state(env, insn);
19784 list_for_each(pos, head) {
19785 sl = container_of(pos, struct bpf_verifier_state_list, node);
19786 if (sl->state.branches)
19787 continue;
19788 if (sl->state.insn_idx != insn ||
19789 !same_callsites(&sl->state, cur))
19790 continue;
19791 if (sl->state.cleaned)
19792 /* all regs in this state in all frames were already marked */
19793 continue;
19794 if (incomplete_read_marks(env, &sl->state))
19795 continue;
19796 clean_verifier_state(env, &sl->state);
19797 }
19798 }
19799
regs_exact(const struct bpf_reg_state * rold,const struct bpf_reg_state * rcur,struct bpf_idmap * idmap)19800 static bool regs_exact(const struct bpf_reg_state *rold,
19801 const struct bpf_reg_state *rcur,
19802 struct bpf_idmap *idmap)
19803 {
19804 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
19805 check_ids(rold->id, rcur->id, idmap) &&
19806 check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap);
19807 }
19808
19809 enum exact_level {
19810 NOT_EXACT,
19811 EXACT,
19812 RANGE_WITHIN
19813 };
19814
19815 /* Returns true if (rold safe implies rcur safe) */
regsafe(struct bpf_verifier_env * env,struct bpf_reg_state * rold,struct bpf_reg_state * rcur,struct bpf_idmap * idmap,enum exact_level exact)19816 static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
19817 struct bpf_reg_state *rcur, struct bpf_idmap *idmap,
19818 enum exact_level exact)
19819 {
19820 if (exact == EXACT)
19821 return regs_exact(rold, rcur, idmap);
19822
19823 if (rold->type == NOT_INIT)
19824 /* explored state can't have used this */
19825 return true;
19826
19827 /* Enforce that register types have to match exactly, including their
19828 * modifiers (like PTR_MAYBE_NULL, MEM_RDONLY, etc), as a general
19829 * rule.
19830 *
19831 * One can make a point that using a pointer register as unbounded
19832 * SCALAR would be technically acceptable, but this could lead to
19833 * pointer leaks because scalars are allowed to leak while pointers
19834 * are not. We could make this safe in special cases if root is
19835 * calling us, but it's probably not worth the hassle.
19836 *
19837 * Also, register types that are *not* MAYBE_NULL could technically be
19838 * safe to use as their MAYBE_NULL variants (e.g., PTR_TO_MAP_VALUE
19839 * is safe to be used as PTR_TO_MAP_VALUE_OR_NULL, provided both point
19840 * to the same map).
19841 * However, if the old MAYBE_NULL register then got NULL checked,
19842 * doing so could have affected others with the same id, and we can't
19843 * check for that because we lost the id when we converted to
19844 * a non-MAYBE_NULL variant.
19845 * So, as a general rule we don't allow mixing MAYBE_NULL and
19846 * non-MAYBE_NULL registers as well.
19847 */
19848 if (rold->type != rcur->type)
19849 return false;
19850
19851 switch (base_type(rold->type)) {
19852 case SCALAR_VALUE:
19853 if (env->explore_alu_limits) {
19854 /* explore_alu_limits disables tnum_in() and range_within()
19855 * logic and requires everything to be strict
19856 */
19857 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
19858 check_scalar_ids(rold->id, rcur->id, idmap);
19859 }
19860 if (!rold->precise && exact == NOT_EXACT)
19861 return true;
19862 /*
19863 * Linked register tracking uses rold->id to detect relationships.
19864 * When rold->id == 0, the register is independent and any linking
19865 * in rcur only adds constraints. When rold->id != 0, we must verify
19866 * id mapping and (for BPF_ADD_CONST) offset consistency.
19867 *
19868 * +------------------+-----------+------------------+---------------+
19869 * | | rold->id | rold + ADD_CONST | rold->id == 0 |
19870 * |------------------+-----------+------------------+---------------|
19871 * | rcur->id | range,ids | false | range |
19872 * | rcur + ADD_CONST | false | range,ids,off | range |
19873 * | rcur->id == 0 | range,ids | false | range |
19874 * +------------------+-----------+------------------+---------------+
19875 *
19876 * Why check_ids() for scalar registers?
19877 *
19878 * Consider the following BPF code:
19879 * 1: r6 = ... unbound scalar, ID=a ...
19880 * 2: r7 = ... unbound scalar, ID=b ...
19881 * 3: if (r6 > r7) goto +1
19882 * 4: r6 = r7
19883 * 5: if (r6 > X) goto ...
19884 * 6: ... memory operation using r7 ...
19885 *
19886 * First verification path is [1-6]:
19887 * - at (4) same bpf_reg_state::id (b) would be assigned to r6 and r7;
19888 * - at (5) r6 would be marked <= X, sync_linked_regs() would also mark
19889 * r7 <= X, because r6 and r7 share same id.
19890 * Next verification path is [1-4, 6].
19891 *
19892 * Instruction (6) would be reached in two states:
19893 * I. r6{.id=b}, r7{.id=b} via path 1-6;
19894 * II. r6{.id=a}, r7{.id=b} via path 1-4, 6.
19895 *
19896 * Use check_ids() to distinguish these states.
19897 * ---
19898 * Also verify that new value satisfies old value range knowledge.
19899 */
19900
19901 /*
19902 * ADD_CONST flags must match exactly: BPF_ADD_CONST32 and
19903 * BPF_ADD_CONST64 have different linking semantics in
19904 * sync_linked_regs() (alu32 zero-extends, alu64 does not),
19905 * so pruning across different flag types is unsafe.
19906 */
19907 if (rold->id &&
19908 (rold->id & BPF_ADD_CONST) != (rcur->id & BPF_ADD_CONST))
19909 return false;
19910
19911 /* Both have offset linkage: offsets must match */
19912 if ((rold->id & BPF_ADD_CONST) && rold->off != rcur->off)
19913 return false;
19914
19915 if (!check_scalar_ids(rold->id, rcur->id, idmap))
19916 return false;
19917
19918 return range_within(rold, rcur) && tnum_in(rold->var_off, rcur->var_off);
19919 case PTR_TO_MAP_KEY:
19920 case PTR_TO_MAP_VALUE:
19921 case PTR_TO_MEM:
19922 case PTR_TO_BUF:
19923 case PTR_TO_TP_BUFFER:
19924 /* If the new min/max/var_off satisfy the old ones and
19925 * everything else matches, we are OK.
19926 */
19927 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, var_off)) == 0 &&
19928 range_within(rold, rcur) &&
19929 tnum_in(rold->var_off, rcur->var_off) &&
19930 check_ids(rold->id, rcur->id, idmap) &&
19931 check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap);
19932 case PTR_TO_PACKET_META:
19933 case PTR_TO_PACKET:
19934 /* We must have at least as much range as the old ptr
19935 * did, so that any accesses which were safe before are
19936 * still safe. This is true even if old range < old off,
19937 * since someone could have accessed through (ptr - k), or
19938 * even done ptr -= k in a register, to get a safe access.
19939 */
19940 if (rold->range < 0 || rcur->range < 0) {
19941 /* special case for [BEYOND|AT]_PKT_END */
19942 if (rold->range != rcur->range)
19943 return false;
19944 } else if (rold->range > rcur->range) {
19945 return false;
19946 }
19947 /* If the offsets don't match, we can't trust our alignment;
19948 * nor can we be sure that we won't fall out of range.
19949 */
19950 if (rold->off != rcur->off)
19951 return false;
19952 /* id relations must be preserved */
19953 if (!check_ids(rold->id, rcur->id, idmap))
19954 return false;
19955 /* new val must satisfy old val knowledge */
19956 return range_within(rold, rcur) &&
19957 tnum_in(rold->var_off, rcur->var_off);
19958 case PTR_TO_STACK:
19959 /* two stack pointers are equal only if they're pointing to
19960 * the same stack frame, since fp-8 in foo != fp-8 in bar
19961 */
19962 return regs_exact(rold, rcur, idmap) && rold->frameno == rcur->frameno;
19963 case PTR_TO_ARENA:
19964 return true;
19965 case PTR_TO_INSN:
19966 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, var_off)) == 0 &&
19967 rold->off == rcur->off && range_within(rold, rcur) &&
19968 tnum_in(rold->var_off, rcur->var_off);
19969 default:
19970 return regs_exact(rold, rcur, idmap);
19971 }
19972 }
19973
19974 static struct bpf_reg_state unbound_reg;
19975
unbound_reg_init(void)19976 static __init int unbound_reg_init(void)
19977 {
19978 __mark_reg_unknown_imprecise(&unbound_reg);
19979 return 0;
19980 }
19981 late_initcall(unbound_reg_init);
19982
is_stack_all_misc(struct bpf_verifier_env * env,struct bpf_stack_state * stack)19983 static bool is_stack_all_misc(struct bpf_verifier_env *env,
19984 struct bpf_stack_state *stack)
19985 {
19986 u32 i;
19987
19988 for (i = 0; i < ARRAY_SIZE(stack->slot_type); ++i) {
19989 if ((stack->slot_type[i] == STACK_MISC) ||
19990 (stack->slot_type[i] == STACK_INVALID && env->allow_uninit_stack))
19991 continue;
19992 return false;
19993 }
19994
19995 return true;
19996 }
19997
scalar_reg_for_stack(struct bpf_verifier_env * env,struct bpf_stack_state * stack)19998 static struct bpf_reg_state *scalar_reg_for_stack(struct bpf_verifier_env *env,
19999 struct bpf_stack_state *stack)
20000 {
20001 if (is_spilled_scalar_reg64(stack))
20002 return &stack->spilled_ptr;
20003
20004 if (is_stack_all_misc(env, stack))
20005 return &unbound_reg;
20006
20007 return NULL;
20008 }
20009
stacksafe(struct bpf_verifier_env * env,struct bpf_func_state * old,struct bpf_func_state * cur,struct bpf_idmap * idmap,enum exact_level exact)20010 static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
20011 struct bpf_func_state *cur, struct bpf_idmap *idmap,
20012 enum exact_level exact)
20013 {
20014 int i, spi;
20015
20016 /* walk slots of the explored stack and ignore any additional
20017 * slots in the current stack, since explored(safe) state
20018 * didn't use them
20019 */
20020 for (i = 0; i < old->allocated_stack; i++) {
20021 struct bpf_reg_state *old_reg, *cur_reg;
20022
20023 spi = i / BPF_REG_SIZE;
20024
20025 if (exact == EXACT &&
20026 (i >= cur->allocated_stack ||
20027 old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
20028 cur->stack[spi].slot_type[i % BPF_REG_SIZE]))
20029 return false;
20030
20031 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
20032 continue;
20033
20034 if (env->allow_uninit_stack &&
20035 old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC)
20036 continue;
20037
20038 /* explored stack has more populated slots than current stack
20039 * and these slots were used
20040 */
20041 if (i >= cur->allocated_stack)
20042 return false;
20043
20044 /* 64-bit scalar spill vs all slots MISC and vice versa.
20045 * Load from all slots MISC produces unbound scalar.
20046 * Construct a fake register for such stack and call
20047 * regsafe() to ensure scalar ids are compared.
20048 */
20049 old_reg = scalar_reg_for_stack(env, &old->stack[spi]);
20050 cur_reg = scalar_reg_for_stack(env, &cur->stack[spi]);
20051 if (old_reg && cur_reg) {
20052 if (!regsafe(env, old_reg, cur_reg, idmap, exact))
20053 return false;
20054 i += BPF_REG_SIZE - 1;
20055 continue;
20056 }
20057
20058 /* if old state was safe with misc data in the stack
20059 * it will be safe with zero-initialized stack.
20060 * The opposite is not true
20061 */
20062 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
20063 cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
20064 continue;
20065 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
20066 cur->stack[spi].slot_type[i % BPF_REG_SIZE])
20067 /* Ex: old explored (safe) state has STACK_SPILL in
20068 * this stack slot, but current has STACK_MISC ->
20069 * this verifier states are not equivalent,
20070 * return false to continue verification of this path
20071 */
20072 return false;
20073 if (i % BPF_REG_SIZE != BPF_REG_SIZE - 1)
20074 continue;
20075 /* Both old and cur are having same slot_type */
20076 switch (old->stack[spi].slot_type[BPF_REG_SIZE - 1]) {
20077 case STACK_SPILL:
20078 /* when explored and current stack slot are both storing
20079 * spilled registers, check that stored pointers types
20080 * are the same as well.
20081 * Ex: explored safe path could have stored
20082 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
20083 * but current path has stored:
20084 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
20085 * such verifier states are not equivalent.
20086 * return false to continue verification of this path
20087 */
20088 if (!regsafe(env, &old->stack[spi].spilled_ptr,
20089 &cur->stack[spi].spilled_ptr, idmap, exact))
20090 return false;
20091 break;
20092 case STACK_DYNPTR:
20093 old_reg = &old->stack[spi].spilled_ptr;
20094 cur_reg = &cur->stack[spi].spilled_ptr;
20095 if (old_reg->dynptr.type != cur_reg->dynptr.type ||
20096 old_reg->dynptr.first_slot != cur_reg->dynptr.first_slot ||
20097 !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap))
20098 return false;
20099 break;
20100 case STACK_ITER:
20101 old_reg = &old->stack[spi].spilled_ptr;
20102 cur_reg = &cur->stack[spi].spilled_ptr;
20103 /* iter.depth is not compared between states as it
20104 * doesn't matter for correctness and would otherwise
20105 * prevent convergence; we maintain it only to prevent
20106 * infinite loop check triggering, see
20107 * iter_active_depths_differ()
20108 */
20109 if (old_reg->iter.btf != cur_reg->iter.btf ||
20110 old_reg->iter.btf_id != cur_reg->iter.btf_id ||
20111 old_reg->iter.state != cur_reg->iter.state ||
20112 /* ignore {old_reg,cur_reg}->iter.depth, see above */
20113 !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap))
20114 return false;
20115 break;
20116 case STACK_IRQ_FLAG:
20117 old_reg = &old->stack[spi].spilled_ptr;
20118 cur_reg = &cur->stack[spi].spilled_ptr;
20119 if (!check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap) ||
20120 old_reg->irq.kfunc_class != cur_reg->irq.kfunc_class)
20121 return false;
20122 break;
20123 case STACK_MISC:
20124 case STACK_ZERO:
20125 case STACK_INVALID:
20126 continue;
20127 /* Ensure that new unhandled slot types return false by default */
20128 default:
20129 return false;
20130 }
20131 }
20132 return true;
20133 }
20134
refsafe(struct bpf_verifier_state * old,struct bpf_verifier_state * cur,struct bpf_idmap * idmap)20135 static bool refsafe(struct bpf_verifier_state *old, struct bpf_verifier_state *cur,
20136 struct bpf_idmap *idmap)
20137 {
20138 int i;
20139
20140 if (old->acquired_refs != cur->acquired_refs)
20141 return false;
20142
20143 if (old->active_locks != cur->active_locks)
20144 return false;
20145
20146 if (old->active_preempt_locks != cur->active_preempt_locks)
20147 return false;
20148
20149 if (old->active_rcu_locks != cur->active_rcu_locks)
20150 return false;
20151
20152 if (!check_ids(old->active_irq_id, cur->active_irq_id, idmap))
20153 return false;
20154
20155 if (!check_ids(old->active_lock_id, cur->active_lock_id, idmap) ||
20156 old->active_lock_ptr != cur->active_lock_ptr)
20157 return false;
20158
20159 for (i = 0; i < old->acquired_refs; i++) {
20160 if (!check_ids(old->refs[i].id, cur->refs[i].id, idmap) ||
20161 old->refs[i].type != cur->refs[i].type)
20162 return false;
20163 switch (old->refs[i].type) {
20164 case REF_TYPE_PTR:
20165 case REF_TYPE_IRQ:
20166 break;
20167 case REF_TYPE_LOCK:
20168 case REF_TYPE_RES_LOCK:
20169 case REF_TYPE_RES_LOCK_IRQ:
20170 if (old->refs[i].ptr != cur->refs[i].ptr)
20171 return false;
20172 break;
20173 default:
20174 WARN_ONCE(1, "Unhandled enum type for reference state: %d\n", old->refs[i].type);
20175 return false;
20176 }
20177 }
20178
20179 return true;
20180 }
20181
20182 /* compare two verifier states
20183 *
20184 * all states stored in state_list are known to be valid, since
20185 * verifier reached 'bpf_exit' instruction through them
20186 *
20187 * this function is called when verifier exploring different branches of
20188 * execution popped from the state stack. If it sees an old state that has
20189 * more strict register state and more strict stack state then this execution
20190 * branch doesn't need to be explored further, since verifier already
20191 * concluded that more strict state leads to valid finish.
20192 *
20193 * Therefore two states are equivalent if register state is more conservative
20194 * and explored stack state is more conservative than the current one.
20195 * Example:
20196 * explored current
20197 * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
20198 * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
20199 *
20200 * In other words if current stack state (one being explored) has more
20201 * valid slots than old one that already passed validation, it means
20202 * the verifier can stop exploring and conclude that current state is valid too
20203 *
20204 * Similarly with registers. If explored state has register type as invalid
20205 * whereas register type in current state is meaningful, it means that
20206 * the current state will reach 'bpf_exit' instruction safely
20207 */
func_states_equal(struct bpf_verifier_env * env,struct bpf_func_state * old,struct bpf_func_state * cur,u32 insn_idx,enum exact_level exact)20208 static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old,
20209 struct bpf_func_state *cur, u32 insn_idx, enum exact_level exact)
20210 {
20211 u16 live_regs = env->insn_aux_data[insn_idx].live_regs_before;
20212 u16 i;
20213
20214 if (old->callback_depth > cur->callback_depth)
20215 return false;
20216
20217 for (i = 0; i < MAX_BPF_REG; i++)
20218 if (((1 << i) & live_regs) &&
20219 !regsafe(env, &old->regs[i], &cur->regs[i],
20220 &env->idmap_scratch, exact))
20221 return false;
20222
20223 if (!stacksafe(env, old, cur, &env->idmap_scratch, exact))
20224 return false;
20225
20226 return true;
20227 }
20228
reset_idmap_scratch(struct bpf_verifier_env * env)20229 static void reset_idmap_scratch(struct bpf_verifier_env *env)
20230 {
20231 struct bpf_idmap *idmap = &env->idmap_scratch;
20232
20233 idmap->tmp_id_gen = env->id_gen;
20234 idmap->cnt = 0;
20235 }
20236
states_equal(struct bpf_verifier_env * env,struct bpf_verifier_state * old,struct bpf_verifier_state * cur,enum exact_level exact)20237 static bool states_equal(struct bpf_verifier_env *env,
20238 struct bpf_verifier_state *old,
20239 struct bpf_verifier_state *cur,
20240 enum exact_level exact)
20241 {
20242 u32 insn_idx;
20243 int i;
20244
20245 if (old->curframe != cur->curframe)
20246 return false;
20247
20248 reset_idmap_scratch(env);
20249
20250 /* Verification state from speculative execution simulation
20251 * must never prune a non-speculative execution one.
20252 */
20253 if (old->speculative && !cur->speculative)
20254 return false;
20255
20256 if (old->in_sleepable != cur->in_sleepable)
20257 return false;
20258
20259 if (!refsafe(old, cur, &env->idmap_scratch))
20260 return false;
20261
20262 /* for states to be equal callsites have to be the same
20263 * and all frame states need to be equivalent
20264 */
20265 for (i = 0; i <= old->curframe; i++) {
20266 insn_idx = frame_insn_idx(old, i);
20267 if (old->frame[i]->callsite != cur->frame[i]->callsite)
20268 return false;
20269 if (!func_states_equal(env, old->frame[i], cur->frame[i], insn_idx, exact))
20270 return false;
20271 }
20272 return true;
20273 }
20274
20275 /* find precise scalars in the previous equivalent state and
20276 * propagate them into the current state
20277 */
propagate_precision(struct bpf_verifier_env * env,const struct bpf_verifier_state * old,struct bpf_verifier_state * cur,bool * changed)20278 static int propagate_precision(struct bpf_verifier_env *env,
20279 const struct bpf_verifier_state *old,
20280 struct bpf_verifier_state *cur,
20281 bool *changed)
20282 {
20283 struct bpf_reg_state *state_reg;
20284 struct bpf_func_state *state;
20285 int i, err = 0, fr;
20286 bool first;
20287
20288 for (fr = old->curframe; fr >= 0; fr--) {
20289 state = old->frame[fr];
20290 state_reg = state->regs;
20291 first = true;
20292 for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
20293 if (state_reg->type != SCALAR_VALUE ||
20294 !state_reg->precise)
20295 continue;
20296 if (env->log.level & BPF_LOG_LEVEL2) {
20297 if (first)
20298 verbose(env, "frame %d: propagating r%d", fr, i);
20299 else
20300 verbose(env, ",r%d", i);
20301 }
20302 bt_set_frame_reg(&env->bt, fr, i);
20303 first = false;
20304 }
20305
20306 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
20307 if (!is_spilled_reg(&state->stack[i]))
20308 continue;
20309 state_reg = &state->stack[i].spilled_ptr;
20310 if (state_reg->type != SCALAR_VALUE ||
20311 !state_reg->precise)
20312 continue;
20313 if (env->log.level & BPF_LOG_LEVEL2) {
20314 if (first)
20315 verbose(env, "frame %d: propagating fp%d",
20316 fr, (-i - 1) * BPF_REG_SIZE);
20317 else
20318 verbose(env, ",fp%d", (-i - 1) * BPF_REG_SIZE);
20319 }
20320 bt_set_frame_slot(&env->bt, fr, i);
20321 first = false;
20322 }
20323 if (!first && (env->log.level & BPF_LOG_LEVEL2))
20324 verbose(env, "\n");
20325 }
20326
20327 err = __mark_chain_precision(env, cur, -1, changed);
20328 if (err < 0)
20329 return err;
20330
20331 return 0;
20332 }
20333
20334 #define MAX_BACKEDGE_ITERS 64
20335
20336 /* Propagate read and precision marks from visit->backedges[*].state->equal_state
20337 * to corresponding parent states of visit->backedges[*].state until fixed point is reached,
20338 * then free visit->backedges.
20339 * After execution of this function incomplete_read_marks() will return false
20340 * for all states corresponding to @visit->callchain.
20341 */
propagate_backedges(struct bpf_verifier_env * env,struct bpf_scc_visit * visit)20342 static int propagate_backedges(struct bpf_verifier_env *env, struct bpf_scc_visit *visit)
20343 {
20344 struct bpf_scc_backedge *backedge;
20345 struct bpf_verifier_state *st;
20346 bool changed;
20347 int i, err;
20348
20349 i = 0;
20350 do {
20351 if (i++ > MAX_BACKEDGE_ITERS) {
20352 if (env->log.level & BPF_LOG_LEVEL2)
20353 verbose(env, "%s: too many iterations\n", __func__);
20354 for (backedge = visit->backedges; backedge; backedge = backedge->next)
20355 mark_all_scalars_precise(env, &backedge->state);
20356 break;
20357 }
20358 changed = false;
20359 for (backedge = visit->backedges; backedge; backedge = backedge->next) {
20360 st = &backedge->state;
20361 err = propagate_precision(env, st->equal_state, st, &changed);
20362 if (err)
20363 return err;
20364 }
20365 } while (changed);
20366
20367 free_backedges(visit);
20368 return 0;
20369 }
20370
states_maybe_looping(struct bpf_verifier_state * old,struct bpf_verifier_state * cur)20371 static bool states_maybe_looping(struct bpf_verifier_state *old,
20372 struct bpf_verifier_state *cur)
20373 {
20374 struct bpf_func_state *fold, *fcur;
20375 int i, fr = cur->curframe;
20376
20377 if (old->curframe != fr)
20378 return false;
20379
20380 fold = old->frame[fr];
20381 fcur = cur->frame[fr];
20382 for (i = 0; i < MAX_BPF_REG; i++)
20383 if (memcmp(&fold->regs[i], &fcur->regs[i],
20384 offsetof(struct bpf_reg_state, frameno)))
20385 return false;
20386 return true;
20387 }
20388
is_iter_next_insn(struct bpf_verifier_env * env,int insn_idx)20389 static bool is_iter_next_insn(struct bpf_verifier_env *env, int insn_idx)
20390 {
20391 return env->insn_aux_data[insn_idx].is_iter_next;
20392 }
20393
20394 /* is_state_visited() handles iter_next() (see process_iter_next_call() for
20395 * terminology) calls specially: as opposed to bounded BPF loops, it *expects*
20396 * states to match, which otherwise would look like an infinite loop. So while
20397 * iter_next() calls are taken care of, we still need to be careful and
20398 * prevent erroneous and too eager declaration of "infinite loop", when
20399 * iterators are involved.
20400 *
20401 * Here's a situation in pseudo-BPF assembly form:
20402 *
20403 * 0: again: ; set up iter_next() call args
20404 * 1: r1 = &it ; <CHECKPOINT HERE>
20405 * 2: call bpf_iter_num_next ; this is iter_next() call
20406 * 3: if r0 == 0 goto done
20407 * 4: ... something useful here ...
20408 * 5: goto again ; another iteration
20409 * 6: done:
20410 * 7: r1 = &it
20411 * 8: call bpf_iter_num_destroy ; clean up iter state
20412 * 9: exit
20413 *
20414 * This is a typical loop. Let's assume that we have a prune point at 1:,
20415 * before we get to `call bpf_iter_num_next` (e.g., because of that `goto
20416 * again`, assuming other heuristics don't get in a way).
20417 *
20418 * When we first time come to 1:, let's say we have some state X. We proceed
20419 * to 2:, fork states, enqueue ACTIVE, validate NULL case successfully, exit.
20420 * Now we come back to validate that forked ACTIVE state. We proceed through
20421 * 3-5, come to goto, jump to 1:. Let's assume our state didn't change, so we
20422 * are converging. But the problem is that we don't know that yet, as this
20423 * convergence has to happen at iter_next() call site only. So if nothing is
20424 * done, at 1: verifier will use bounded loop logic and declare infinite
20425 * looping (and would be *technically* correct, if not for iterator's
20426 * "eventual sticky NULL" contract, see process_iter_next_call()). But we
20427 * don't want that. So what we do in process_iter_next_call() when we go on
20428 * another ACTIVE iteration, we bump slot->iter.depth, to mark that it's
20429 * a different iteration. So when we suspect an infinite loop, we additionally
20430 * check if any of the *ACTIVE* iterator states depths differ. If yes, we
20431 * pretend we are not looping and wait for next iter_next() call.
20432 *
20433 * This only applies to ACTIVE state. In DRAINED state we don't expect to
20434 * loop, because that would actually mean infinite loop, as DRAINED state is
20435 * "sticky", and so we'll keep returning into the same instruction with the
20436 * same state (at least in one of possible code paths).
20437 *
20438 * This approach allows to keep infinite loop heuristic even in the face of
20439 * active iterator. E.g., C snippet below is and will be detected as
20440 * infinitely looping:
20441 *
20442 * struct bpf_iter_num it;
20443 * int *p, x;
20444 *
20445 * bpf_iter_num_new(&it, 0, 10);
20446 * while ((p = bpf_iter_num_next(&t))) {
20447 * x = p;
20448 * while (x--) {} // <<-- infinite loop here
20449 * }
20450 *
20451 */
iter_active_depths_differ(struct bpf_verifier_state * old,struct bpf_verifier_state * cur)20452 static bool iter_active_depths_differ(struct bpf_verifier_state *old, struct bpf_verifier_state *cur)
20453 {
20454 struct bpf_reg_state *slot, *cur_slot;
20455 struct bpf_func_state *state;
20456 int i, fr;
20457
20458 for (fr = old->curframe; fr >= 0; fr--) {
20459 state = old->frame[fr];
20460 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
20461 if (state->stack[i].slot_type[0] != STACK_ITER)
20462 continue;
20463
20464 slot = &state->stack[i].spilled_ptr;
20465 if (slot->iter.state != BPF_ITER_STATE_ACTIVE)
20466 continue;
20467
20468 cur_slot = &cur->frame[fr]->stack[i].spilled_ptr;
20469 if (cur_slot->iter.depth != slot->iter.depth)
20470 return true;
20471 }
20472 }
20473 return false;
20474 }
20475
is_state_visited(struct bpf_verifier_env * env,int insn_idx)20476 static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
20477 {
20478 struct bpf_verifier_state_list *new_sl;
20479 struct bpf_verifier_state_list *sl;
20480 struct bpf_verifier_state *cur = env->cur_state, *new;
20481 bool force_new_state, add_new_state, loop;
20482 int n, err, states_cnt = 0;
20483 struct list_head *pos, *tmp, *head;
20484
20485 force_new_state = env->test_state_freq || is_force_checkpoint(env, insn_idx) ||
20486 /* Avoid accumulating infinitely long jmp history */
20487 cur->jmp_history_cnt > 40;
20488
20489 /* bpf progs typically have pruning point every 4 instructions
20490 * http://vger.kernel.org/bpfconf2019.html#session-1
20491 * Do not add new state for future pruning if the verifier hasn't seen
20492 * at least 2 jumps and at least 8 instructions.
20493 * This heuristics helps decrease 'total_states' and 'peak_states' metric.
20494 * In tests that amounts to up to 50% reduction into total verifier
20495 * memory consumption and 20% verifier time speedup.
20496 */
20497 add_new_state = force_new_state;
20498 if (env->jmps_processed - env->prev_jmps_processed >= 2 &&
20499 env->insn_processed - env->prev_insn_processed >= 8)
20500 add_new_state = true;
20501
20502 clean_live_states(env, insn_idx, cur);
20503
20504 loop = false;
20505 head = explored_state(env, insn_idx);
20506 list_for_each_safe(pos, tmp, head) {
20507 sl = container_of(pos, struct bpf_verifier_state_list, node);
20508 states_cnt++;
20509 if (sl->state.insn_idx != insn_idx)
20510 continue;
20511
20512 if (sl->state.branches) {
20513 struct bpf_func_state *frame = sl->state.frame[sl->state.curframe];
20514
20515 if (frame->in_async_callback_fn &&
20516 frame->async_entry_cnt != cur->frame[cur->curframe]->async_entry_cnt) {
20517 /* Different async_entry_cnt means that the verifier is
20518 * processing another entry into async callback.
20519 * Seeing the same state is not an indication of infinite
20520 * loop or infinite recursion.
20521 * But finding the same state doesn't mean that it's safe
20522 * to stop processing the current state. The previous state
20523 * hasn't yet reached bpf_exit, since state.branches > 0.
20524 * Checking in_async_callback_fn alone is not enough either.
20525 * Since the verifier still needs to catch infinite loops
20526 * inside async callbacks.
20527 */
20528 goto skip_inf_loop_check;
20529 }
20530 /* BPF open-coded iterators loop detection is special.
20531 * states_maybe_looping() logic is too simplistic in detecting
20532 * states that *might* be equivalent, because it doesn't know
20533 * about ID remapping, so don't even perform it.
20534 * See process_iter_next_call() and iter_active_depths_differ()
20535 * for overview of the logic. When current and one of parent
20536 * states are detected as equivalent, it's a good thing: we prove
20537 * convergence and can stop simulating further iterations.
20538 * It's safe to assume that iterator loop will finish, taking into
20539 * account iter_next() contract of eventually returning
20540 * sticky NULL result.
20541 *
20542 * Note, that states have to be compared exactly in this case because
20543 * read and precision marks might not be finalized inside the loop.
20544 * E.g. as in the program below:
20545 *
20546 * 1. r7 = -16
20547 * 2. r6 = bpf_get_prandom_u32()
20548 * 3. while (bpf_iter_num_next(&fp[-8])) {
20549 * 4. if (r6 != 42) {
20550 * 5. r7 = -32
20551 * 6. r6 = bpf_get_prandom_u32()
20552 * 7. continue
20553 * 8. }
20554 * 9. r0 = r10
20555 * 10. r0 += r7
20556 * 11. r8 = *(u64 *)(r0 + 0)
20557 * 12. r6 = bpf_get_prandom_u32()
20558 * 13. }
20559 *
20560 * Here verifier would first visit path 1-3, create a checkpoint at 3
20561 * with r7=-16, continue to 4-7,3. Existing checkpoint at 3 does
20562 * not have read or precision mark for r7 yet, thus inexact states
20563 * comparison would discard current state with r7=-32
20564 * => unsafe memory access at 11 would not be caught.
20565 */
20566 if (is_iter_next_insn(env, insn_idx)) {
20567 if (states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
20568 struct bpf_func_state *cur_frame;
20569 struct bpf_reg_state *iter_state, *iter_reg;
20570 int spi;
20571
20572 cur_frame = cur->frame[cur->curframe];
20573 /* btf_check_iter_kfuncs() enforces that
20574 * iter state pointer is always the first arg
20575 */
20576 iter_reg = &cur_frame->regs[BPF_REG_1];
20577 /* current state is valid due to states_equal(),
20578 * so we can assume valid iter and reg state,
20579 * no need for extra (re-)validations
20580 */
20581 spi = __get_spi(iter_reg->off + iter_reg->var_off.value);
20582 iter_state = &func(env, iter_reg)->stack[spi].spilled_ptr;
20583 if (iter_state->iter.state == BPF_ITER_STATE_ACTIVE) {
20584 loop = true;
20585 goto hit;
20586 }
20587 }
20588 goto skip_inf_loop_check;
20589 }
20590 if (is_may_goto_insn_at(env, insn_idx)) {
20591 if (sl->state.may_goto_depth != cur->may_goto_depth &&
20592 states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
20593 loop = true;
20594 goto hit;
20595 }
20596 }
20597 if (bpf_calls_callback(env, insn_idx)) {
20598 if (states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
20599 loop = true;
20600 goto hit;
20601 }
20602 goto skip_inf_loop_check;
20603 }
20604 /* attempt to detect infinite loop to avoid unnecessary doomed work */
20605 if (states_maybe_looping(&sl->state, cur) &&
20606 states_equal(env, &sl->state, cur, EXACT) &&
20607 !iter_active_depths_differ(&sl->state, cur) &&
20608 sl->state.may_goto_depth == cur->may_goto_depth &&
20609 sl->state.callback_unroll_depth == cur->callback_unroll_depth) {
20610 verbose_linfo(env, insn_idx, "; ");
20611 verbose(env, "infinite loop detected at insn %d\n", insn_idx);
20612 verbose(env, "cur state:");
20613 print_verifier_state(env, cur, cur->curframe, true);
20614 verbose(env, "old state:");
20615 print_verifier_state(env, &sl->state, cur->curframe, true);
20616 return -EINVAL;
20617 }
20618 /* if the verifier is processing a loop, avoid adding new state
20619 * too often, since different loop iterations have distinct
20620 * states and may not help future pruning.
20621 * This threshold shouldn't be too low to make sure that
20622 * a loop with large bound will be rejected quickly.
20623 * The most abusive loop will be:
20624 * r1 += 1
20625 * if r1 < 1000000 goto pc-2
20626 * 1M insn_procssed limit / 100 == 10k peak states.
20627 * This threshold shouldn't be too high either, since states
20628 * at the end of the loop are likely to be useful in pruning.
20629 */
20630 skip_inf_loop_check:
20631 if (!force_new_state &&
20632 env->jmps_processed - env->prev_jmps_processed < 20 &&
20633 env->insn_processed - env->prev_insn_processed < 100)
20634 add_new_state = false;
20635 goto miss;
20636 }
20637 /* See comments for mark_all_regs_read_and_precise() */
20638 loop = incomplete_read_marks(env, &sl->state);
20639 if (states_equal(env, &sl->state, cur, loop ? RANGE_WITHIN : NOT_EXACT)) {
20640 hit:
20641 sl->hit_cnt++;
20642
20643 /* if previous state reached the exit with precision and
20644 * current state is equivalent to it (except precision marks)
20645 * the precision needs to be propagated back in
20646 * the current state.
20647 */
20648 err = 0;
20649 if (is_jmp_point(env, env->insn_idx))
20650 err = push_jmp_history(env, cur, 0, 0);
20651 err = err ? : propagate_precision(env, &sl->state, cur, NULL);
20652 if (err)
20653 return err;
20654 /* When processing iterator based loops above propagate_liveness and
20655 * propagate_precision calls are not sufficient to transfer all relevant
20656 * read and precision marks. E.g. consider the following case:
20657 *
20658 * .-> A --. Assume the states are visited in the order A, B, C.
20659 * | | | Assume that state B reaches a state equivalent to state A.
20660 * | v v At this point, state C is not processed yet, so state A
20661 * '-- B C has not received any read or precision marks from C.
20662 * Thus, marks propagated from A to B are incomplete.
20663 *
20664 * The verifier mitigates this by performing the following steps:
20665 *
20666 * - Prior to the main verification pass, strongly connected components
20667 * (SCCs) are computed over the program's control flow graph,
20668 * intraprocedurally.
20669 *
20670 * - During the main verification pass, `maybe_enter_scc()` checks
20671 * whether the current verifier state is entering an SCC. If so, an
20672 * instance of a `bpf_scc_visit` object is created, and the state
20673 * entering the SCC is recorded as the entry state.
20674 *
20675 * - This instance is associated not with the SCC itself, but with a
20676 * `bpf_scc_callchain`: a tuple consisting of the call sites leading to
20677 * the SCC and the SCC id. See `compute_scc_callchain()`.
20678 *
20679 * - When a verification path encounters a `states_equal(...,
20680 * RANGE_WITHIN)` condition, there exists a call chain describing the
20681 * current state and a corresponding `bpf_scc_visit` instance. A copy
20682 * of the current state is created and added to
20683 * `bpf_scc_visit->backedges`.
20684 *
20685 * - When a verification path terminates, `maybe_exit_scc()` is called
20686 * from `update_branch_counts()`. For states with `branches == 0`, it
20687 * checks whether the state is the entry state of any `bpf_scc_visit`
20688 * instance. If it is, this indicates that all paths originating from
20689 * this SCC visit have been explored. `propagate_backedges()` is then
20690 * called, which propagates read and precision marks through the
20691 * backedges until a fixed point is reached.
20692 * (In the earlier example, this would propagate marks from A to B,
20693 * from C to A, and then again from A to B.)
20694 *
20695 * A note on callchains
20696 * --------------------
20697 *
20698 * Consider the following example:
20699 *
20700 * void foo() { loop { ... SCC#1 ... } }
20701 * void main() {
20702 * A: foo();
20703 * B: ...
20704 * C: foo();
20705 * }
20706 *
20707 * Here, there are two distinct callchains leading to SCC#1:
20708 * - (A, SCC#1)
20709 * - (C, SCC#1)
20710 *
20711 * Each callchain identifies a separate `bpf_scc_visit` instance that
20712 * accumulates backedge states. The `propagate_{liveness,precision}()`
20713 * functions traverse the parent state of each backedge state, which
20714 * means these parent states must remain valid (i.e., not freed) while
20715 * the corresponding `bpf_scc_visit` instance exists.
20716 *
20717 * Associating `bpf_scc_visit` instances directly with SCCs instead of
20718 * callchains would break this invariant:
20719 * - States explored during `C: foo()` would contribute backedges to
20720 * SCC#1, but SCC#1 would only be exited once the exploration of
20721 * `A: foo()` completes.
20722 * - By that time, the states explored between `A: foo()` and `C: foo()`
20723 * (i.e., `B: ...`) may have already been freed, causing the parent
20724 * links for states from `C: foo()` to become invalid.
20725 */
20726 if (loop) {
20727 struct bpf_scc_backedge *backedge;
20728
20729 backedge = kzalloc_obj(*backedge,
20730 GFP_KERNEL_ACCOUNT);
20731 if (!backedge)
20732 return -ENOMEM;
20733 err = copy_verifier_state(&backedge->state, cur);
20734 backedge->state.equal_state = &sl->state;
20735 backedge->state.insn_idx = insn_idx;
20736 err = err ?: add_scc_backedge(env, &sl->state, backedge);
20737 if (err) {
20738 free_verifier_state(&backedge->state, false);
20739 kfree(backedge);
20740 return err;
20741 }
20742 }
20743 return 1;
20744 }
20745 miss:
20746 /* when new state is not going to be added do not increase miss count.
20747 * Otherwise several loop iterations will remove the state
20748 * recorded earlier. The goal of these heuristics is to have
20749 * states from some iterations of the loop (some in the beginning
20750 * and some at the end) to help pruning.
20751 */
20752 if (add_new_state)
20753 sl->miss_cnt++;
20754 /* heuristic to determine whether this state is beneficial
20755 * to keep checking from state equivalence point of view.
20756 * Higher numbers increase max_states_per_insn and verification time,
20757 * but do not meaningfully decrease insn_processed.
20758 * 'n' controls how many times state could miss before eviction.
20759 * Use bigger 'n' for checkpoints because evicting checkpoint states
20760 * too early would hinder iterator convergence.
20761 */
20762 n = is_force_checkpoint(env, insn_idx) && sl->state.branches > 0 ? 64 : 3;
20763 if (sl->miss_cnt > sl->hit_cnt * n + n) {
20764 /* the state is unlikely to be useful. Remove it to
20765 * speed up verification
20766 */
20767 sl->in_free_list = true;
20768 list_del(&sl->node);
20769 list_add(&sl->node, &env->free_list);
20770 env->free_list_size++;
20771 env->explored_states_size--;
20772 maybe_free_verifier_state(env, sl);
20773 }
20774 }
20775
20776 if (env->max_states_per_insn < states_cnt)
20777 env->max_states_per_insn = states_cnt;
20778
20779 if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
20780 return 0;
20781
20782 if (!add_new_state)
20783 return 0;
20784
20785 /* There were no equivalent states, remember the current one.
20786 * Technically the current state is not proven to be safe yet,
20787 * but it will either reach outer most bpf_exit (which means it's safe)
20788 * or it will be rejected. When there are no loops the verifier won't be
20789 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
20790 * again on the way to bpf_exit.
20791 * When looping the sl->state.branches will be > 0 and this state
20792 * will not be considered for equivalence until branches == 0.
20793 */
20794 new_sl = kzalloc_obj(struct bpf_verifier_state_list, GFP_KERNEL_ACCOUNT);
20795 if (!new_sl)
20796 return -ENOMEM;
20797 env->total_states++;
20798 env->explored_states_size++;
20799 update_peak_states(env);
20800 env->prev_jmps_processed = env->jmps_processed;
20801 env->prev_insn_processed = env->insn_processed;
20802
20803 /* forget precise markings we inherited, see __mark_chain_precision */
20804 if (env->bpf_capable)
20805 mark_all_scalars_imprecise(env, cur);
20806
20807 clear_singular_ids(env, cur);
20808
20809 /* add new state to the head of linked list */
20810 new = &new_sl->state;
20811 err = copy_verifier_state(new, cur);
20812 if (err) {
20813 free_verifier_state(new, false);
20814 kfree(new_sl);
20815 return err;
20816 }
20817 new->insn_idx = insn_idx;
20818 verifier_bug_if(new->branches != 1, env,
20819 "%s:branches_to_explore=%d insn %d",
20820 __func__, new->branches, insn_idx);
20821 err = maybe_enter_scc(env, new);
20822 if (err) {
20823 free_verifier_state(new, false);
20824 kfree(new_sl);
20825 return err;
20826 }
20827
20828 cur->parent = new;
20829 cur->first_insn_idx = insn_idx;
20830 cur->dfs_depth = new->dfs_depth + 1;
20831 clear_jmp_history(cur);
20832 list_add(&new_sl->node, head);
20833 return 0;
20834 }
20835
20836 /* Return true if it's OK to have the same insn return a different type. */
reg_type_mismatch_ok(enum bpf_reg_type type)20837 static bool reg_type_mismatch_ok(enum bpf_reg_type type)
20838 {
20839 switch (base_type(type)) {
20840 case PTR_TO_CTX:
20841 case PTR_TO_SOCKET:
20842 case PTR_TO_SOCK_COMMON:
20843 case PTR_TO_TCP_SOCK:
20844 case PTR_TO_XDP_SOCK:
20845 case PTR_TO_BTF_ID:
20846 case PTR_TO_ARENA:
20847 return false;
20848 default:
20849 return true;
20850 }
20851 }
20852
20853 /* If an instruction was previously used with particular pointer types, then we
20854 * need to be careful to avoid cases such as the below, where it may be ok
20855 * for one branch accessing the pointer, but not ok for the other branch:
20856 *
20857 * R1 = sock_ptr
20858 * goto X;
20859 * ...
20860 * R1 = some_other_valid_ptr;
20861 * goto X;
20862 * ...
20863 * R2 = *(u32 *)(R1 + 0);
20864 */
reg_type_mismatch(enum bpf_reg_type src,enum bpf_reg_type prev)20865 static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
20866 {
20867 return src != prev && (!reg_type_mismatch_ok(src) ||
20868 !reg_type_mismatch_ok(prev));
20869 }
20870
is_ptr_to_mem_or_btf_id(enum bpf_reg_type type)20871 static bool is_ptr_to_mem_or_btf_id(enum bpf_reg_type type)
20872 {
20873 switch (base_type(type)) {
20874 case PTR_TO_MEM:
20875 case PTR_TO_BTF_ID:
20876 return true;
20877 default:
20878 return false;
20879 }
20880 }
20881
is_ptr_to_mem(enum bpf_reg_type type)20882 static bool is_ptr_to_mem(enum bpf_reg_type type)
20883 {
20884 return base_type(type) == PTR_TO_MEM;
20885 }
20886
save_aux_ptr_type(struct bpf_verifier_env * env,enum bpf_reg_type type,bool allow_trust_mismatch)20887 static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type type,
20888 bool allow_trust_mismatch)
20889 {
20890 enum bpf_reg_type *prev_type = &env->insn_aux_data[env->insn_idx].ptr_type;
20891 enum bpf_reg_type merged_type;
20892
20893 if (*prev_type == NOT_INIT) {
20894 /* Saw a valid insn
20895 * dst_reg = *(u32 *)(src_reg + off)
20896 * save type to validate intersecting paths
20897 */
20898 *prev_type = type;
20899 } else if (reg_type_mismatch(type, *prev_type)) {
20900 /* Abuser program is trying to use the same insn
20901 * dst_reg = *(u32*) (src_reg + off)
20902 * with different pointer types:
20903 * src_reg == ctx in one branch and
20904 * src_reg == stack|map in some other branch.
20905 * Reject it.
20906 */
20907 if (allow_trust_mismatch &&
20908 is_ptr_to_mem_or_btf_id(type) &&
20909 is_ptr_to_mem_or_btf_id(*prev_type)) {
20910 /*
20911 * Have to support a use case when one path through
20912 * the program yields TRUSTED pointer while another
20913 * is UNTRUSTED. Fallback to UNTRUSTED to generate
20914 * BPF_PROBE_MEM/BPF_PROBE_MEMSX.
20915 * Same behavior of MEM_RDONLY flag.
20916 */
20917 if (is_ptr_to_mem(type) || is_ptr_to_mem(*prev_type))
20918 merged_type = PTR_TO_MEM;
20919 else
20920 merged_type = PTR_TO_BTF_ID;
20921 if ((type & PTR_UNTRUSTED) || (*prev_type & PTR_UNTRUSTED))
20922 merged_type |= PTR_UNTRUSTED;
20923 if ((type & MEM_RDONLY) || (*prev_type & MEM_RDONLY))
20924 merged_type |= MEM_RDONLY;
20925 *prev_type = merged_type;
20926 } else {
20927 verbose(env, "same insn cannot be used with different pointers\n");
20928 return -EINVAL;
20929 }
20930 }
20931
20932 return 0;
20933 }
20934
20935 enum {
20936 PROCESS_BPF_EXIT = 1
20937 };
20938
process_bpf_exit_full(struct bpf_verifier_env * env,bool * do_print_state,bool exception_exit)20939 static int process_bpf_exit_full(struct bpf_verifier_env *env,
20940 bool *do_print_state,
20941 bool exception_exit)
20942 {
20943 /* We must do check_reference_leak here before
20944 * prepare_func_exit to handle the case when
20945 * state->curframe > 0, it may be a callback function,
20946 * for which reference_state must match caller reference
20947 * state when it exits.
20948 */
20949 int err = check_resource_leak(env, exception_exit,
20950 exception_exit || !env->cur_state->curframe,
20951 exception_exit ? "bpf_throw" :
20952 "BPF_EXIT instruction in main prog");
20953 if (err)
20954 return err;
20955
20956 /* The side effect of the prepare_func_exit which is
20957 * being skipped is that it frees bpf_func_state.
20958 * Typically, process_bpf_exit will only be hit with
20959 * outermost exit. copy_verifier_state in pop_stack will
20960 * handle freeing of any extra bpf_func_state left over
20961 * from not processing all nested function exits. We
20962 * also skip return code checks as they are not needed
20963 * for exceptional exits.
20964 */
20965 if (exception_exit)
20966 return PROCESS_BPF_EXIT;
20967
20968 if (env->cur_state->curframe) {
20969 /* exit from nested function */
20970 err = prepare_func_exit(env, &env->insn_idx);
20971 if (err)
20972 return err;
20973 *do_print_state = true;
20974 return 0;
20975 }
20976
20977 err = check_return_code(env, BPF_REG_0, "R0");
20978 if (err)
20979 return err;
20980 return PROCESS_BPF_EXIT;
20981 }
20982
indirect_jump_min_max_index(struct bpf_verifier_env * env,int regno,struct bpf_map * map,u32 * pmin_index,u32 * pmax_index)20983 static int indirect_jump_min_max_index(struct bpf_verifier_env *env,
20984 int regno,
20985 struct bpf_map *map,
20986 u32 *pmin_index, u32 *pmax_index)
20987 {
20988 struct bpf_reg_state *reg = reg_state(env, regno);
20989 u64 min_index, max_index;
20990 const u32 size = 8;
20991
20992 if (check_add_overflow(reg->umin_value, reg->off, &min_index) ||
20993 (min_index > (u64) U32_MAX * size)) {
20994 verbose(env, "the sum of R%u umin_value %llu and off %u is too big\n",
20995 regno, reg->umin_value, reg->off);
20996 return -ERANGE;
20997 }
20998 if (check_add_overflow(reg->umax_value, reg->off, &max_index) ||
20999 (max_index > (u64) U32_MAX * size)) {
21000 verbose(env, "the sum of R%u umax_value %llu and off %u is too big\n",
21001 regno, reg->umax_value, reg->off);
21002 return -ERANGE;
21003 }
21004
21005 min_index /= size;
21006 max_index /= size;
21007
21008 if (max_index >= map->max_entries) {
21009 verbose(env, "R%u points to outside of jump table: [%llu,%llu] max_entries %u\n",
21010 regno, min_index, max_index, map->max_entries);
21011 return -EINVAL;
21012 }
21013
21014 *pmin_index = min_index;
21015 *pmax_index = max_index;
21016 return 0;
21017 }
21018
21019 /* gotox *dst_reg */
check_indirect_jump(struct bpf_verifier_env * env,struct bpf_insn * insn)21020 static int check_indirect_jump(struct bpf_verifier_env *env, struct bpf_insn *insn)
21021 {
21022 struct bpf_verifier_state *other_branch;
21023 struct bpf_reg_state *dst_reg;
21024 struct bpf_map *map;
21025 u32 min_index, max_index;
21026 int err = 0;
21027 int n;
21028 int i;
21029
21030 dst_reg = reg_state(env, insn->dst_reg);
21031 if (dst_reg->type != PTR_TO_INSN) {
21032 verbose(env, "R%d has type %s, expected PTR_TO_INSN\n",
21033 insn->dst_reg, reg_type_str(env, dst_reg->type));
21034 return -EINVAL;
21035 }
21036
21037 map = dst_reg->map_ptr;
21038 if (verifier_bug_if(!map, env, "R%d has an empty map pointer", insn->dst_reg))
21039 return -EFAULT;
21040
21041 if (verifier_bug_if(map->map_type != BPF_MAP_TYPE_INSN_ARRAY, env,
21042 "R%d has incorrect map type %d", insn->dst_reg, map->map_type))
21043 return -EFAULT;
21044
21045 err = indirect_jump_min_max_index(env, insn->dst_reg, map, &min_index, &max_index);
21046 if (err)
21047 return err;
21048
21049 /* Ensure that the buffer is large enough */
21050 if (!env->gotox_tmp_buf || env->gotox_tmp_buf->cnt < max_index - min_index + 1) {
21051 env->gotox_tmp_buf = iarray_realloc(env->gotox_tmp_buf,
21052 max_index - min_index + 1);
21053 if (!env->gotox_tmp_buf)
21054 return -ENOMEM;
21055 }
21056
21057 n = copy_insn_array_uniq(map, min_index, max_index, env->gotox_tmp_buf->items);
21058 if (n < 0)
21059 return n;
21060 if (n == 0) {
21061 verbose(env, "register R%d doesn't point to any offset in map id=%d\n",
21062 insn->dst_reg, map->id);
21063 return -EINVAL;
21064 }
21065
21066 for (i = 0; i < n - 1; i++) {
21067 other_branch = push_stack(env, env->gotox_tmp_buf->items[i],
21068 env->insn_idx, env->cur_state->speculative);
21069 if (IS_ERR(other_branch))
21070 return PTR_ERR(other_branch);
21071 }
21072 env->insn_idx = env->gotox_tmp_buf->items[n-1];
21073 return 0;
21074 }
21075
do_check_insn(struct bpf_verifier_env * env,bool * do_print_state)21076 static int do_check_insn(struct bpf_verifier_env *env, bool *do_print_state)
21077 {
21078 int err;
21079 struct bpf_insn *insn = &env->prog->insnsi[env->insn_idx];
21080 u8 class = BPF_CLASS(insn->code);
21081
21082 if (class == BPF_ALU || class == BPF_ALU64) {
21083 err = check_alu_op(env, insn);
21084 if (err)
21085 return err;
21086
21087 } else if (class == BPF_LDX) {
21088 bool is_ldsx = BPF_MODE(insn->code) == BPF_MEMSX;
21089
21090 /* Check for reserved fields is already done in
21091 * resolve_pseudo_ldimm64().
21092 */
21093 err = check_load_mem(env, insn, false, is_ldsx, true, "ldx");
21094 if (err)
21095 return err;
21096 } else if (class == BPF_STX) {
21097 if (BPF_MODE(insn->code) == BPF_ATOMIC) {
21098 err = check_atomic(env, insn);
21099 if (err)
21100 return err;
21101 env->insn_idx++;
21102 return 0;
21103 }
21104
21105 if (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0) {
21106 verbose(env, "BPF_STX uses reserved fields\n");
21107 return -EINVAL;
21108 }
21109
21110 err = check_store_reg(env, insn, false);
21111 if (err)
21112 return err;
21113 } else if (class == BPF_ST) {
21114 enum bpf_reg_type dst_reg_type;
21115
21116 if (BPF_MODE(insn->code) != BPF_MEM ||
21117 insn->src_reg != BPF_REG_0) {
21118 verbose(env, "BPF_ST uses reserved fields\n");
21119 return -EINVAL;
21120 }
21121 /* check src operand */
21122 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
21123 if (err)
21124 return err;
21125
21126 dst_reg_type = cur_regs(env)[insn->dst_reg].type;
21127
21128 /* check that memory (dst_reg + off) is writeable */
21129 err = check_mem_access(env, env->insn_idx, insn->dst_reg,
21130 insn->off, BPF_SIZE(insn->code),
21131 BPF_WRITE, -1, false, false);
21132 if (err)
21133 return err;
21134
21135 err = save_aux_ptr_type(env, dst_reg_type, false);
21136 if (err)
21137 return err;
21138 } else if (class == BPF_JMP || class == BPF_JMP32) {
21139 u8 opcode = BPF_OP(insn->code);
21140
21141 env->jmps_processed++;
21142 if (opcode == BPF_CALL) {
21143 if (BPF_SRC(insn->code) != BPF_K ||
21144 (insn->src_reg != BPF_PSEUDO_KFUNC_CALL &&
21145 insn->off != 0) ||
21146 (insn->src_reg != BPF_REG_0 &&
21147 insn->src_reg != BPF_PSEUDO_CALL &&
21148 insn->src_reg != BPF_PSEUDO_KFUNC_CALL) ||
21149 insn->dst_reg != BPF_REG_0 || class == BPF_JMP32) {
21150 verbose(env, "BPF_CALL uses reserved fields\n");
21151 return -EINVAL;
21152 }
21153
21154 if (env->cur_state->active_locks) {
21155 if ((insn->src_reg == BPF_REG_0 &&
21156 insn->imm != BPF_FUNC_spin_unlock) ||
21157 (insn->src_reg == BPF_PSEUDO_KFUNC_CALL &&
21158 (insn->off != 0 || !kfunc_spin_allowed(insn->imm)))) {
21159 verbose(env,
21160 "function calls are not allowed while holding a lock\n");
21161 return -EINVAL;
21162 }
21163 }
21164 if (insn->src_reg == BPF_PSEUDO_CALL) {
21165 err = check_func_call(env, insn, &env->insn_idx);
21166 } else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
21167 err = check_kfunc_call(env, insn, &env->insn_idx);
21168 if (!err && is_bpf_throw_kfunc(insn))
21169 return process_bpf_exit_full(env, do_print_state, true);
21170 } else {
21171 err = check_helper_call(env, insn, &env->insn_idx);
21172 }
21173 if (err)
21174 return err;
21175
21176 mark_reg_scratched(env, BPF_REG_0);
21177 } else if (opcode == BPF_JA) {
21178 if (BPF_SRC(insn->code) == BPF_X) {
21179 if (insn->src_reg != BPF_REG_0 ||
21180 insn->imm != 0 || insn->off != 0) {
21181 verbose(env, "BPF_JA|BPF_X uses reserved fields\n");
21182 return -EINVAL;
21183 }
21184 return check_indirect_jump(env, insn);
21185 }
21186
21187 if (BPF_SRC(insn->code) != BPF_K ||
21188 insn->src_reg != BPF_REG_0 ||
21189 insn->dst_reg != BPF_REG_0 ||
21190 (class == BPF_JMP && insn->imm != 0) ||
21191 (class == BPF_JMP32 && insn->off != 0)) {
21192 verbose(env, "BPF_JA uses reserved fields\n");
21193 return -EINVAL;
21194 }
21195
21196 if (class == BPF_JMP)
21197 env->insn_idx += insn->off + 1;
21198 else
21199 env->insn_idx += insn->imm + 1;
21200 return 0;
21201 } else if (opcode == BPF_EXIT) {
21202 if (BPF_SRC(insn->code) != BPF_K ||
21203 insn->imm != 0 ||
21204 insn->src_reg != BPF_REG_0 ||
21205 insn->dst_reg != BPF_REG_0 ||
21206 class == BPF_JMP32) {
21207 verbose(env, "BPF_EXIT uses reserved fields\n");
21208 return -EINVAL;
21209 }
21210 return process_bpf_exit_full(env, do_print_state, false);
21211 } else {
21212 err = check_cond_jmp_op(env, insn, &env->insn_idx);
21213 if (err)
21214 return err;
21215 }
21216 } else if (class == BPF_LD) {
21217 u8 mode = BPF_MODE(insn->code);
21218
21219 if (mode == BPF_ABS || mode == BPF_IND) {
21220 err = check_ld_abs(env, insn);
21221 if (err)
21222 return err;
21223
21224 } else if (mode == BPF_IMM) {
21225 err = check_ld_imm(env, insn);
21226 if (err)
21227 return err;
21228
21229 env->insn_idx++;
21230 sanitize_mark_insn_seen(env);
21231 } else {
21232 verbose(env, "invalid BPF_LD mode\n");
21233 return -EINVAL;
21234 }
21235 } else {
21236 verbose(env, "unknown insn class %d\n", class);
21237 return -EINVAL;
21238 }
21239
21240 env->insn_idx++;
21241 return 0;
21242 }
21243
do_check(struct bpf_verifier_env * env)21244 static int do_check(struct bpf_verifier_env *env)
21245 {
21246 bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
21247 struct bpf_verifier_state *state = env->cur_state;
21248 struct bpf_insn *insns = env->prog->insnsi;
21249 int insn_cnt = env->prog->len;
21250 bool do_print_state = false;
21251 int prev_insn_idx = -1;
21252
21253 for (;;) {
21254 struct bpf_insn *insn;
21255 struct bpf_insn_aux_data *insn_aux;
21256 int err, marks_err;
21257
21258 /* reset current history entry on each new instruction */
21259 env->cur_hist_ent = NULL;
21260
21261 env->prev_insn_idx = prev_insn_idx;
21262 if (env->insn_idx >= insn_cnt) {
21263 verbose(env, "invalid insn idx %d insn_cnt %d\n",
21264 env->insn_idx, insn_cnt);
21265 return -EFAULT;
21266 }
21267
21268 insn = &insns[env->insn_idx];
21269 insn_aux = &env->insn_aux_data[env->insn_idx];
21270
21271 if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
21272 verbose(env,
21273 "BPF program is too large. Processed %d insn\n",
21274 env->insn_processed);
21275 return -E2BIG;
21276 }
21277
21278 state->last_insn_idx = env->prev_insn_idx;
21279 state->insn_idx = env->insn_idx;
21280
21281 if (is_prune_point(env, env->insn_idx)) {
21282 err = is_state_visited(env, env->insn_idx);
21283 if (err < 0)
21284 return err;
21285 if (err == 1) {
21286 /* found equivalent state, can prune the search */
21287 if (env->log.level & BPF_LOG_LEVEL) {
21288 if (do_print_state)
21289 verbose(env, "\nfrom %d to %d%s: safe\n",
21290 env->prev_insn_idx, env->insn_idx,
21291 env->cur_state->speculative ?
21292 " (speculative execution)" : "");
21293 else
21294 verbose(env, "%d: safe\n", env->insn_idx);
21295 }
21296 goto process_bpf_exit;
21297 }
21298 }
21299
21300 if (is_jmp_point(env, env->insn_idx)) {
21301 err = push_jmp_history(env, state, 0, 0);
21302 if (err)
21303 return err;
21304 }
21305
21306 if (signal_pending(current))
21307 return -EAGAIN;
21308
21309 if (need_resched())
21310 cond_resched();
21311
21312 if (env->log.level & BPF_LOG_LEVEL2 && do_print_state) {
21313 verbose(env, "\nfrom %d to %d%s:",
21314 env->prev_insn_idx, env->insn_idx,
21315 env->cur_state->speculative ?
21316 " (speculative execution)" : "");
21317 print_verifier_state(env, state, state->curframe, true);
21318 do_print_state = false;
21319 }
21320
21321 if (env->log.level & BPF_LOG_LEVEL) {
21322 if (verifier_state_scratched(env))
21323 print_insn_state(env, state, state->curframe);
21324
21325 verbose_linfo(env, env->insn_idx, "; ");
21326 env->prev_log_pos = env->log.end_pos;
21327 verbose(env, "%d: ", env->insn_idx);
21328 verbose_insn(env, insn);
21329 env->prev_insn_print_pos = env->log.end_pos - env->prev_log_pos;
21330 env->prev_log_pos = env->log.end_pos;
21331 }
21332
21333 if (bpf_prog_is_offloaded(env->prog->aux)) {
21334 err = bpf_prog_offload_verify_insn(env, env->insn_idx,
21335 env->prev_insn_idx);
21336 if (err)
21337 return err;
21338 }
21339
21340 sanitize_mark_insn_seen(env);
21341 prev_insn_idx = env->insn_idx;
21342
21343 /* Reduce verification complexity by stopping speculative path
21344 * verification when a nospec is encountered.
21345 */
21346 if (state->speculative && insn_aux->nospec)
21347 goto process_bpf_exit;
21348
21349 err = bpf_reset_stack_write_marks(env, env->insn_idx);
21350 if (err)
21351 return err;
21352 err = do_check_insn(env, &do_print_state);
21353 if (err >= 0 || error_recoverable_with_nospec(err)) {
21354 marks_err = bpf_commit_stack_write_marks(env);
21355 if (marks_err)
21356 return marks_err;
21357 }
21358 if (error_recoverable_with_nospec(err) && state->speculative) {
21359 /* Prevent this speculative path from ever reaching the
21360 * insn that would have been unsafe to execute.
21361 */
21362 insn_aux->nospec = true;
21363 /* If it was an ADD/SUB insn, potentially remove any
21364 * markings for alu sanitization.
21365 */
21366 insn_aux->alu_state = 0;
21367 goto process_bpf_exit;
21368 } else if (err < 0) {
21369 return err;
21370 } else if (err == PROCESS_BPF_EXIT) {
21371 goto process_bpf_exit;
21372 }
21373 WARN_ON_ONCE(err);
21374
21375 if (state->speculative && insn_aux->nospec_result) {
21376 /* If we are on a path that performed a jump-op, this
21377 * may skip a nospec patched-in after the jump. This can
21378 * currently never happen because nospec_result is only
21379 * used for the write-ops
21380 * `*(size*)(dst_reg+off)=src_reg|imm32` and helper
21381 * calls. These must never skip the following insn
21382 * (i.e., bpf_insn_successors()'s opcode_info.can_jump
21383 * is false). Still, add a warning to document this in
21384 * case nospec_result is used elsewhere in the future.
21385 *
21386 * All non-branch instructions have a single
21387 * fall-through edge. For these, nospec_result should
21388 * already work.
21389 */
21390 if (verifier_bug_if((BPF_CLASS(insn->code) == BPF_JMP ||
21391 BPF_CLASS(insn->code) == BPF_JMP32) &&
21392 BPF_OP(insn->code) != BPF_CALL, env,
21393 "speculation barrier after jump instruction may not have the desired effect"))
21394 return -EFAULT;
21395 process_bpf_exit:
21396 mark_verifier_state_scratched(env);
21397 err = update_branch_counts(env, env->cur_state);
21398 if (err)
21399 return err;
21400 err = bpf_update_live_stack(env);
21401 if (err)
21402 return err;
21403 err = pop_stack(env, &prev_insn_idx, &env->insn_idx,
21404 pop_log);
21405 if (err < 0) {
21406 if (err != -ENOENT)
21407 return err;
21408 break;
21409 } else {
21410 do_print_state = true;
21411 continue;
21412 }
21413 }
21414 }
21415
21416 return 0;
21417 }
21418
find_btf_percpu_datasec(struct btf * btf)21419 static int find_btf_percpu_datasec(struct btf *btf)
21420 {
21421 const struct btf_type *t;
21422 const char *tname;
21423 int i, n;
21424
21425 /*
21426 * Both vmlinux and module each have their own ".data..percpu"
21427 * DATASECs in BTF. So for module's case, we need to skip vmlinux BTF
21428 * types to look at only module's own BTF types.
21429 */
21430 n = btf_nr_types(btf);
21431 for (i = btf_named_start_id(btf, true); i < n; i++) {
21432 t = btf_type_by_id(btf, i);
21433 if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC)
21434 continue;
21435
21436 tname = btf_name_by_offset(btf, t->name_off);
21437 if (!strcmp(tname, ".data..percpu"))
21438 return i;
21439 }
21440
21441 return -ENOENT;
21442 }
21443
21444 /*
21445 * Add btf to the env->used_btfs array. If needed, refcount the
21446 * corresponding kernel module. To simplify caller's logic
21447 * in case of error or if btf was added before the function
21448 * decreases the btf refcount.
21449 */
__add_used_btf(struct bpf_verifier_env * env,struct btf * btf)21450 static int __add_used_btf(struct bpf_verifier_env *env, struct btf *btf)
21451 {
21452 struct btf_mod_pair *btf_mod;
21453 int ret = 0;
21454 int i;
21455
21456 /* check whether we recorded this BTF (and maybe module) already */
21457 for (i = 0; i < env->used_btf_cnt; i++)
21458 if (env->used_btfs[i].btf == btf)
21459 goto ret_put;
21460
21461 if (env->used_btf_cnt >= MAX_USED_BTFS) {
21462 verbose(env, "The total number of btfs per program has reached the limit of %u\n",
21463 MAX_USED_BTFS);
21464 ret = -E2BIG;
21465 goto ret_put;
21466 }
21467
21468 btf_mod = &env->used_btfs[env->used_btf_cnt];
21469 btf_mod->btf = btf;
21470 btf_mod->module = NULL;
21471
21472 /* if we reference variables from kernel module, bump its refcount */
21473 if (btf_is_module(btf)) {
21474 btf_mod->module = btf_try_get_module(btf);
21475 if (!btf_mod->module) {
21476 ret = -ENXIO;
21477 goto ret_put;
21478 }
21479 }
21480
21481 env->used_btf_cnt++;
21482 return 0;
21483
21484 ret_put:
21485 /* Either error or this BTF was already added */
21486 btf_put(btf);
21487 return ret;
21488 }
21489
21490 /* replace pseudo btf_id with kernel symbol address */
__check_pseudo_btf_id(struct bpf_verifier_env * env,struct bpf_insn * insn,struct bpf_insn_aux_data * aux,struct btf * btf)21491 static int __check_pseudo_btf_id(struct bpf_verifier_env *env,
21492 struct bpf_insn *insn,
21493 struct bpf_insn_aux_data *aux,
21494 struct btf *btf)
21495 {
21496 const struct btf_var_secinfo *vsi;
21497 const struct btf_type *datasec;
21498 const struct btf_type *t;
21499 const char *sym_name;
21500 bool percpu = false;
21501 u32 type, id = insn->imm;
21502 s32 datasec_id;
21503 u64 addr;
21504 int i;
21505
21506 t = btf_type_by_id(btf, id);
21507 if (!t) {
21508 verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id);
21509 return -ENOENT;
21510 }
21511
21512 if (!btf_type_is_var(t) && !btf_type_is_func(t)) {
21513 verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR or KIND_FUNC\n", id);
21514 return -EINVAL;
21515 }
21516
21517 sym_name = btf_name_by_offset(btf, t->name_off);
21518 addr = kallsyms_lookup_name(sym_name);
21519 if (!addr) {
21520 verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n",
21521 sym_name);
21522 return -ENOENT;
21523 }
21524 insn[0].imm = (u32)addr;
21525 insn[1].imm = addr >> 32;
21526
21527 if (btf_type_is_func(t)) {
21528 aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY;
21529 aux->btf_var.mem_size = 0;
21530 return 0;
21531 }
21532
21533 datasec_id = find_btf_percpu_datasec(btf);
21534 if (datasec_id > 0) {
21535 datasec = btf_type_by_id(btf, datasec_id);
21536 for_each_vsi(i, datasec, vsi) {
21537 if (vsi->type == id) {
21538 percpu = true;
21539 break;
21540 }
21541 }
21542 }
21543
21544 type = t->type;
21545 t = btf_type_skip_modifiers(btf, type, NULL);
21546 if (percpu) {
21547 aux->btf_var.reg_type = PTR_TO_BTF_ID | MEM_PERCPU;
21548 aux->btf_var.btf = btf;
21549 aux->btf_var.btf_id = type;
21550 } else if (!btf_type_is_struct(t)) {
21551 const struct btf_type *ret;
21552 const char *tname;
21553 u32 tsize;
21554
21555 /* resolve the type size of ksym. */
21556 ret = btf_resolve_size(btf, t, &tsize);
21557 if (IS_ERR(ret)) {
21558 tname = btf_name_by_offset(btf, t->name_off);
21559 verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n",
21560 tname, PTR_ERR(ret));
21561 return -EINVAL;
21562 }
21563 aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY;
21564 aux->btf_var.mem_size = tsize;
21565 } else {
21566 aux->btf_var.reg_type = PTR_TO_BTF_ID;
21567 aux->btf_var.btf = btf;
21568 aux->btf_var.btf_id = type;
21569 }
21570
21571 return 0;
21572 }
21573
check_pseudo_btf_id(struct bpf_verifier_env * env,struct bpf_insn * insn,struct bpf_insn_aux_data * aux)21574 static int check_pseudo_btf_id(struct bpf_verifier_env *env,
21575 struct bpf_insn *insn,
21576 struct bpf_insn_aux_data *aux)
21577 {
21578 struct btf *btf;
21579 int btf_fd;
21580 int err;
21581
21582 btf_fd = insn[1].imm;
21583 if (btf_fd) {
21584 btf = btf_get_by_fd(btf_fd);
21585 if (IS_ERR(btf)) {
21586 verbose(env, "invalid module BTF object FD specified.\n");
21587 return -EINVAL;
21588 }
21589 } else {
21590 if (!btf_vmlinux) {
21591 verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
21592 return -EINVAL;
21593 }
21594 btf_get(btf_vmlinux);
21595 btf = btf_vmlinux;
21596 }
21597
21598 err = __check_pseudo_btf_id(env, insn, aux, btf);
21599 if (err) {
21600 btf_put(btf);
21601 return err;
21602 }
21603
21604 return __add_used_btf(env, btf);
21605 }
21606
is_tracing_prog_type(enum bpf_prog_type type)21607 static bool is_tracing_prog_type(enum bpf_prog_type type)
21608 {
21609 switch (type) {
21610 case BPF_PROG_TYPE_KPROBE:
21611 case BPF_PROG_TYPE_TRACEPOINT:
21612 case BPF_PROG_TYPE_PERF_EVENT:
21613 case BPF_PROG_TYPE_RAW_TRACEPOINT:
21614 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
21615 return true;
21616 default:
21617 return false;
21618 }
21619 }
21620
bpf_map_is_cgroup_storage(struct bpf_map * map)21621 static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
21622 {
21623 return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
21624 map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
21625 }
21626
check_map_prog_compatibility(struct bpf_verifier_env * env,struct bpf_map * map,struct bpf_prog * prog)21627 static int check_map_prog_compatibility(struct bpf_verifier_env *env,
21628 struct bpf_map *map,
21629 struct bpf_prog *prog)
21630
21631 {
21632 enum bpf_prog_type prog_type = resolve_prog_type(prog);
21633
21634 if (map->excl_prog_sha &&
21635 memcmp(map->excl_prog_sha, prog->digest, SHA256_DIGEST_SIZE)) {
21636 verbose(env, "program's hash doesn't match map's excl_prog_hash\n");
21637 return -EACCES;
21638 }
21639
21640 if (btf_record_has_field(map->record, BPF_LIST_HEAD) ||
21641 btf_record_has_field(map->record, BPF_RB_ROOT)) {
21642 if (is_tracing_prog_type(prog_type)) {
21643 verbose(env, "tracing progs cannot use bpf_{list_head,rb_root} yet\n");
21644 return -EINVAL;
21645 }
21646 }
21647
21648 if (btf_record_has_field(map->record, BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK)) {
21649 if (prog_type == BPF_PROG_TYPE_SOCKET_FILTER) {
21650 verbose(env, "socket filter progs cannot use bpf_spin_lock yet\n");
21651 return -EINVAL;
21652 }
21653
21654 if (is_tracing_prog_type(prog_type)) {
21655 verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
21656 return -EINVAL;
21657 }
21658 }
21659
21660 if ((bpf_prog_is_offloaded(prog->aux) || bpf_map_is_offloaded(map)) &&
21661 !bpf_offload_prog_map_match(prog, map)) {
21662 verbose(env, "offload device mismatch between prog and map\n");
21663 return -EINVAL;
21664 }
21665
21666 if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
21667 verbose(env, "bpf_struct_ops map cannot be used in prog\n");
21668 return -EINVAL;
21669 }
21670
21671 if (prog->sleepable)
21672 switch (map->map_type) {
21673 case BPF_MAP_TYPE_HASH:
21674 case BPF_MAP_TYPE_LRU_HASH:
21675 case BPF_MAP_TYPE_ARRAY:
21676 case BPF_MAP_TYPE_PERCPU_HASH:
21677 case BPF_MAP_TYPE_PERCPU_ARRAY:
21678 case BPF_MAP_TYPE_LRU_PERCPU_HASH:
21679 case BPF_MAP_TYPE_ARRAY_OF_MAPS:
21680 case BPF_MAP_TYPE_HASH_OF_MAPS:
21681 case BPF_MAP_TYPE_RINGBUF:
21682 case BPF_MAP_TYPE_USER_RINGBUF:
21683 case BPF_MAP_TYPE_INODE_STORAGE:
21684 case BPF_MAP_TYPE_SK_STORAGE:
21685 case BPF_MAP_TYPE_TASK_STORAGE:
21686 case BPF_MAP_TYPE_CGRP_STORAGE:
21687 case BPF_MAP_TYPE_QUEUE:
21688 case BPF_MAP_TYPE_STACK:
21689 case BPF_MAP_TYPE_ARENA:
21690 case BPF_MAP_TYPE_INSN_ARRAY:
21691 case BPF_MAP_TYPE_PROG_ARRAY:
21692 break;
21693 default:
21694 verbose(env,
21695 "Sleepable programs can only use array, hash, ringbuf and local storage maps\n");
21696 return -EINVAL;
21697 }
21698
21699 if (bpf_map_is_cgroup_storage(map) &&
21700 bpf_cgroup_storage_assign(env->prog->aux, map)) {
21701 verbose(env, "only one cgroup storage of each type is allowed\n");
21702 return -EBUSY;
21703 }
21704
21705 if (map->map_type == BPF_MAP_TYPE_ARENA) {
21706 if (env->prog->aux->arena) {
21707 verbose(env, "Only one arena per program\n");
21708 return -EBUSY;
21709 }
21710 if (!env->allow_ptr_leaks || !env->bpf_capable) {
21711 verbose(env, "CAP_BPF and CAP_PERFMON are required to use arena\n");
21712 return -EPERM;
21713 }
21714 if (!env->prog->jit_requested) {
21715 verbose(env, "JIT is required to use arena\n");
21716 return -EOPNOTSUPP;
21717 }
21718 if (!bpf_jit_supports_arena()) {
21719 verbose(env, "JIT doesn't support arena\n");
21720 return -EOPNOTSUPP;
21721 }
21722 env->prog->aux->arena = (void *)map;
21723 if (!bpf_arena_get_user_vm_start(env->prog->aux->arena)) {
21724 verbose(env, "arena's user address must be set via map_extra or mmap()\n");
21725 return -EINVAL;
21726 }
21727 }
21728
21729 return 0;
21730 }
21731
__add_used_map(struct bpf_verifier_env * env,struct bpf_map * map)21732 static int __add_used_map(struct bpf_verifier_env *env, struct bpf_map *map)
21733 {
21734 int i, err;
21735
21736 /* check whether we recorded this map already */
21737 for (i = 0; i < env->used_map_cnt; i++)
21738 if (env->used_maps[i] == map)
21739 return i;
21740
21741 if (env->used_map_cnt >= MAX_USED_MAPS) {
21742 verbose(env, "The total number of maps per program has reached the limit of %u\n",
21743 MAX_USED_MAPS);
21744 return -E2BIG;
21745 }
21746
21747 err = check_map_prog_compatibility(env, map, env->prog);
21748 if (err)
21749 return err;
21750
21751 if (env->prog->sleepable)
21752 atomic64_inc(&map->sleepable_refcnt);
21753
21754 /* hold the map. If the program is rejected by verifier,
21755 * the map will be released by release_maps() or it
21756 * will be used by the valid program until it's unloaded
21757 * and all maps are released in bpf_free_used_maps()
21758 */
21759 bpf_map_inc(map);
21760
21761 env->used_maps[env->used_map_cnt++] = map;
21762
21763 if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY) {
21764 err = bpf_insn_array_init(map, env->prog);
21765 if (err) {
21766 verbose(env, "Failed to properly initialize insn array\n");
21767 return err;
21768 }
21769 env->insn_array_maps[env->insn_array_map_cnt++] = map;
21770 }
21771
21772 return env->used_map_cnt - 1;
21773 }
21774
21775 /* Add map behind fd to used maps list, if it's not already there, and return
21776 * its index.
21777 * Returns <0 on error, or >= 0 index, on success.
21778 */
add_used_map(struct bpf_verifier_env * env,int fd)21779 static int add_used_map(struct bpf_verifier_env *env, int fd)
21780 {
21781 struct bpf_map *map;
21782 CLASS(fd, f)(fd);
21783
21784 map = __bpf_map_get(f);
21785 if (IS_ERR(map)) {
21786 verbose(env, "fd %d is not pointing to valid bpf_map\n", fd);
21787 return PTR_ERR(map);
21788 }
21789
21790 return __add_used_map(env, map);
21791 }
21792
21793 /* find and rewrite pseudo imm in ld_imm64 instructions:
21794 *
21795 * 1. if it accesses map FD, replace it with actual map pointer.
21796 * 2. if it accesses btf_id of a VAR, replace it with pointer to the var.
21797 *
21798 * NOTE: btf_vmlinux is required for converting pseudo btf_id.
21799 */
resolve_pseudo_ldimm64(struct bpf_verifier_env * env)21800 static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
21801 {
21802 struct bpf_insn *insn = env->prog->insnsi;
21803 int insn_cnt = env->prog->len;
21804 int i, err;
21805
21806 err = bpf_prog_calc_tag(env->prog);
21807 if (err)
21808 return err;
21809
21810 for (i = 0; i < insn_cnt; i++, insn++) {
21811 if (BPF_CLASS(insn->code) == BPF_LDX &&
21812 ((BPF_MODE(insn->code) != BPF_MEM && BPF_MODE(insn->code) != BPF_MEMSX) ||
21813 insn->imm != 0)) {
21814 verbose(env, "BPF_LDX uses reserved fields\n");
21815 return -EINVAL;
21816 }
21817
21818 if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
21819 struct bpf_insn_aux_data *aux;
21820 struct bpf_map *map;
21821 int map_idx;
21822 u64 addr;
21823 u32 fd;
21824
21825 if (i == insn_cnt - 1 || insn[1].code != 0 ||
21826 insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
21827 insn[1].off != 0) {
21828 verbose(env, "invalid bpf_ld_imm64 insn\n");
21829 return -EINVAL;
21830 }
21831
21832 if (insn[0].src_reg == 0)
21833 /* valid generic load 64-bit imm */
21834 goto next_insn;
21835
21836 if (insn[0].src_reg == BPF_PSEUDO_BTF_ID) {
21837 aux = &env->insn_aux_data[i];
21838 err = check_pseudo_btf_id(env, insn, aux);
21839 if (err)
21840 return err;
21841 goto next_insn;
21842 }
21843
21844 if (insn[0].src_reg == BPF_PSEUDO_FUNC) {
21845 aux = &env->insn_aux_data[i];
21846 aux->ptr_type = PTR_TO_FUNC;
21847 goto next_insn;
21848 }
21849
21850 /* In final convert_pseudo_ld_imm64() step, this is
21851 * converted into regular 64-bit imm load insn.
21852 */
21853 switch (insn[0].src_reg) {
21854 case BPF_PSEUDO_MAP_VALUE:
21855 case BPF_PSEUDO_MAP_IDX_VALUE:
21856 break;
21857 case BPF_PSEUDO_MAP_FD:
21858 case BPF_PSEUDO_MAP_IDX:
21859 if (insn[1].imm == 0)
21860 break;
21861 fallthrough;
21862 default:
21863 verbose(env, "unrecognized bpf_ld_imm64 insn\n");
21864 return -EINVAL;
21865 }
21866
21867 switch (insn[0].src_reg) {
21868 case BPF_PSEUDO_MAP_IDX_VALUE:
21869 case BPF_PSEUDO_MAP_IDX:
21870 if (bpfptr_is_null(env->fd_array)) {
21871 verbose(env, "fd_idx without fd_array is invalid\n");
21872 return -EPROTO;
21873 }
21874 if (copy_from_bpfptr_offset(&fd, env->fd_array,
21875 insn[0].imm * sizeof(fd),
21876 sizeof(fd)))
21877 return -EFAULT;
21878 break;
21879 default:
21880 fd = insn[0].imm;
21881 break;
21882 }
21883
21884 map_idx = add_used_map(env, fd);
21885 if (map_idx < 0)
21886 return map_idx;
21887 map = env->used_maps[map_idx];
21888
21889 aux = &env->insn_aux_data[i];
21890 aux->map_index = map_idx;
21891
21892 if (insn[0].src_reg == BPF_PSEUDO_MAP_FD ||
21893 insn[0].src_reg == BPF_PSEUDO_MAP_IDX) {
21894 addr = (unsigned long)map;
21895 } else {
21896 u32 off = insn[1].imm;
21897
21898 if (!map->ops->map_direct_value_addr) {
21899 verbose(env, "no direct value access support for this map type\n");
21900 return -EINVAL;
21901 }
21902
21903 err = map->ops->map_direct_value_addr(map, &addr, off);
21904 if (err) {
21905 verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n",
21906 map->value_size, off);
21907 return err;
21908 }
21909
21910 aux->map_off = off;
21911 addr += off;
21912 }
21913
21914 insn[0].imm = (u32)addr;
21915 insn[1].imm = addr >> 32;
21916
21917 next_insn:
21918 insn++;
21919 i++;
21920 continue;
21921 }
21922
21923 /* Basic sanity check before we invest more work here. */
21924 if (!bpf_opcode_in_insntable(insn->code)) {
21925 verbose(env, "unknown opcode %02x\n", insn->code);
21926 return -EINVAL;
21927 }
21928 }
21929
21930 /* now all pseudo BPF_LD_IMM64 instructions load valid
21931 * 'struct bpf_map *' into a register instead of user map_fd.
21932 * These pointers will be used later by verifier to validate map access.
21933 */
21934 return 0;
21935 }
21936
21937 /* drop refcnt of maps used by the rejected program */
release_maps(struct bpf_verifier_env * env)21938 static void release_maps(struct bpf_verifier_env *env)
21939 {
21940 __bpf_free_used_maps(env->prog->aux, env->used_maps,
21941 env->used_map_cnt);
21942 }
21943
21944 /* drop refcnt of maps used by the rejected program */
release_btfs(struct bpf_verifier_env * env)21945 static void release_btfs(struct bpf_verifier_env *env)
21946 {
21947 __bpf_free_used_btfs(env->used_btfs, env->used_btf_cnt);
21948 }
21949
21950 /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
convert_pseudo_ld_imm64(struct bpf_verifier_env * env)21951 static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
21952 {
21953 struct bpf_insn *insn = env->prog->insnsi;
21954 int insn_cnt = env->prog->len;
21955 int i;
21956
21957 for (i = 0; i < insn_cnt; i++, insn++) {
21958 if (insn->code != (BPF_LD | BPF_IMM | BPF_DW))
21959 continue;
21960 if (insn->src_reg == BPF_PSEUDO_FUNC)
21961 continue;
21962 insn->src_reg = 0;
21963 }
21964 }
21965
21966 /* single env->prog->insni[off] instruction was replaced with the range
21967 * insni[off, off + cnt). Adjust corresponding insn_aux_data by copying
21968 * [0, off) and [off, end) to new locations, so the patched range stays zero
21969 */
adjust_insn_aux_data(struct bpf_verifier_env * env,struct bpf_prog * new_prog,u32 off,u32 cnt)21970 static void adjust_insn_aux_data(struct bpf_verifier_env *env,
21971 struct bpf_prog *new_prog, u32 off, u32 cnt)
21972 {
21973 struct bpf_insn_aux_data *data = env->insn_aux_data;
21974 struct bpf_insn *insn = new_prog->insnsi;
21975 u32 old_seen = data[off].seen;
21976 u32 prog_len;
21977 int i;
21978
21979 /* aux info at OFF always needs adjustment, no matter fast path
21980 * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
21981 * original insn at old prog.
21982 */
21983 data[off].zext_dst = insn_has_def32(insn + off + cnt - 1);
21984
21985 if (cnt == 1)
21986 return;
21987 prog_len = new_prog->len;
21988
21989 memmove(data + off + cnt - 1, data + off,
21990 sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
21991 memset(data + off, 0, sizeof(struct bpf_insn_aux_data) * (cnt - 1));
21992 for (i = off; i < off + cnt - 1; i++) {
21993 /* Expand insni[off]'s seen count to the patched range. */
21994 data[i].seen = old_seen;
21995 data[i].zext_dst = insn_has_def32(insn + i);
21996 }
21997 }
21998
adjust_subprog_starts(struct bpf_verifier_env * env,u32 off,u32 len)21999 static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
22000 {
22001 int i;
22002
22003 if (len == 1)
22004 return;
22005 /* NOTE: fake 'exit' subprog should be updated as well. */
22006 for (i = 0; i <= env->subprog_cnt; i++) {
22007 if (env->subprog_info[i].start <= off)
22008 continue;
22009 env->subprog_info[i].start += len - 1;
22010 }
22011 }
22012
release_insn_arrays(struct bpf_verifier_env * env)22013 static void release_insn_arrays(struct bpf_verifier_env *env)
22014 {
22015 int i;
22016
22017 for (i = 0; i < env->insn_array_map_cnt; i++)
22018 bpf_insn_array_release(env->insn_array_maps[i]);
22019 }
22020
adjust_insn_arrays(struct bpf_verifier_env * env,u32 off,u32 len)22021 static void adjust_insn_arrays(struct bpf_verifier_env *env, u32 off, u32 len)
22022 {
22023 int i;
22024
22025 if (len == 1)
22026 return;
22027
22028 for (i = 0; i < env->insn_array_map_cnt; i++)
22029 bpf_insn_array_adjust(env->insn_array_maps[i], off, len);
22030 }
22031
adjust_insn_arrays_after_remove(struct bpf_verifier_env * env,u32 off,u32 len)22032 static void adjust_insn_arrays_after_remove(struct bpf_verifier_env *env, u32 off, u32 len)
22033 {
22034 int i;
22035
22036 for (i = 0; i < env->insn_array_map_cnt; i++)
22037 bpf_insn_array_adjust_after_remove(env->insn_array_maps[i], off, len);
22038 }
22039
adjust_poke_descs(struct bpf_prog * prog,u32 off,u32 len)22040 static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len)
22041 {
22042 struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
22043 int i, sz = prog->aux->size_poke_tab;
22044 struct bpf_jit_poke_descriptor *desc;
22045
22046 for (i = 0; i < sz; i++) {
22047 desc = &tab[i];
22048 if (desc->insn_idx <= off)
22049 continue;
22050 desc->insn_idx += len - 1;
22051 }
22052 }
22053
bpf_patch_insn_data(struct bpf_verifier_env * env,u32 off,const struct bpf_insn * patch,u32 len)22054 static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
22055 const struct bpf_insn *patch, u32 len)
22056 {
22057 struct bpf_prog *new_prog;
22058 struct bpf_insn_aux_data *new_data = NULL;
22059
22060 if (len > 1) {
22061 new_data = vrealloc(env->insn_aux_data,
22062 array_size(env->prog->len + len - 1,
22063 sizeof(struct bpf_insn_aux_data)),
22064 GFP_KERNEL_ACCOUNT | __GFP_ZERO);
22065 if (!new_data)
22066 return NULL;
22067
22068 env->insn_aux_data = new_data;
22069 }
22070
22071 new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
22072 if (IS_ERR(new_prog)) {
22073 if (PTR_ERR(new_prog) == -ERANGE)
22074 verbose(env,
22075 "insn %d cannot be patched due to 16-bit range\n",
22076 env->insn_aux_data[off].orig_idx);
22077 return NULL;
22078 }
22079 adjust_insn_aux_data(env, new_prog, off, len);
22080 adjust_subprog_starts(env, off, len);
22081 adjust_insn_arrays(env, off, len);
22082 adjust_poke_descs(new_prog, off, len);
22083 return new_prog;
22084 }
22085
22086 /*
22087 * For all jmp insns in a given 'prog' that point to 'tgt_idx' insn adjust the
22088 * jump offset by 'delta'.
22089 */
adjust_jmp_off(struct bpf_prog * prog,u32 tgt_idx,u32 delta)22090 static int adjust_jmp_off(struct bpf_prog *prog, u32 tgt_idx, u32 delta)
22091 {
22092 struct bpf_insn *insn = prog->insnsi;
22093 u32 insn_cnt = prog->len, i;
22094 s32 imm;
22095 s16 off;
22096
22097 for (i = 0; i < insn_cnt; i++, insn++) {
22098 u8 code = insn->code;
22099
22100 if (tgt_idx <= i && i < tgt_idx + delta)
22101 continue;
22102
22103 if ((BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) ||
22104 BPF_OP(code) == BPF_CALL || BPF_OP(code) == BPF_EXIT)
22105 continue;
22106
22107 if (insn->code == (BPF_JMP32 | BPF_JA)) {
22108 if (i + 1 + insn->imm != tgt_idx)
22109 continue;
22110 if (check_add_overflow(insn->imm, delta, &imm))
22111 return -ERANGE;
22112 insn->imm = imm;
22113 } else {
22114 if (i + 1 + insn->off != tgt_idx)
22115 continue;
22116 if (check_add_overflow(insn->off, delta, &off))
22117 return -ERANGE;
22118 insn->off = off;
22119 }
22120 }
22121 return 0;
22122 }
22123
adjust_subprog_starts_after_remove(struct bpf_verifier_env * env,u32 off,u32 cnt)22124 static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
22125 u32 off, u32 cnt)
22126 {
22127 int i, j;
22128
22129 /* find first prog starting at or after off (first to remove) */
22130 for (i = 0; i < env->subprog_cnt; i++)
22131 if (env->subprog_info[i].start >= off)
22132 break;
22133 /* find first prog starting at or after off + cnt (first to stay) */
22134 for (j = i; j < env->subprog_cnt; j++)
22135 if (env->subprog_info[j].start >= off + cnt)
22136 break;
22137 /* if j doesn't start exactly at off + cnt, we are just removing
22138 * the front of previous prog
22139 */
22140 if (env->subprog_info[j].start != off + cnt)
22141 j--;
22142
22143 if (j > i) {
22144 struct bpf_prog_aux *aux = env->prog->aux;
22145 int move;
22146
22147 /* move fake 'exit' subprog as well */
22148 move = env->subprog_cnt + 1 - j;
22149
22150 memmove(env->subprog_info + i,
22151 env->subprog_info + j,
22152 sizeof(*env->subprog_info) * move);
22153 env->subprog_cnt -= j - i;
22154
22155 /* remove func_info */
22156 if (aux->func_info) {
22157 move = aux->func_info_cnt - j;
22158
22159 memmove(aux->func_info + i,
22160 aux->func_info + j,
22161 sizeof(*aux->func_info) * move);
22162 aux->func_info_cnt -= j - i;
22163 /* func_info->insn_off is set after all code rewrites,
22164 * in adjust_btf_func() - no need to adjust
22165 */
22166 }
22167 } else {
22168 /* convert i from "first prog to remove" to "first to adjust" */
22169 if (env->subprog_info[i].start == off)
22170 i++;
22171 }
22172
22173 /* update fake 'exit' subprog as well */
22174 for (; i <= env->subprog_cnt; i++)
22175 env->subprog_info[i].start -= cnt;
22176
22177 return 0;
22178 }
22179
bpf_adj_linfo_after_remove(struct bpf_verifier_env * env,u32 off,u32 cnt)22180 static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
22181 u32 cnt)
22182 {
22183 struct bpf_prog *prog = env->prog;
22184 u32 i, l_off, l_cnt, nr_linfo;
22185 struct bpf_line_info *linfo;
22186
22187 nr_linfo = prog->aux->nr_linfo;
22188 if (!nr_linfo)
22189 return 0;
22190
22191 linfo = prog->aux->linfo;
22192
22193 /* find first line info to remove, count lines to be removed */
22194 for (i = 0; i < nr_linfo; i++)
22195 if (linfo[i].insn_off >= off)
22196 break;
22197
22198 l_off = i;
22199 l_cnt = 0;
22200 for (; i < nr_linfo; i++)
22201 if (linfo[i].insn_off < off + cnt)
22202 l_cnt++;
22203 else
22204 break;
22205
22206 /* First live insn doesn't match first live linfo, it needs to "inherit"
22207 * last removed linfo. prog is already modified, so prog->len == off
22208 * means no live instructions after (tail of the program was removed).
22209 */
22210 if (prog->len != off && l_cnt &&
22211 (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
22212 l_cnt--;
22213 linfo[--i].insn_off = off + cnt;
22214 }
22215
22216 /* remove the line info which refer to the removed instructions */
22217 if (l_cnt) {
22218 memmove(linfo + l_off, linfo + i,
22219 sizeof(*linfo) * (nr_linfo - i));
22220
22221 prog->aux->nr_linfo -= l_cnt;
22222 nr_linfo = prog->aux->nr_linfo;
22223 }
22224
22225 /* pull all linfo[i].insn_off >= off + cnt in by cnt */
22226 for (i = l_off; i < nr_linfo; i++)
22227 linfo[i].insn_off -= cnt;
22228
22229 /* fix up all subprogs (incl. 'exit') which start >= off */
22230 for (i = 0; i <= env->subprog_cnt; i++)
22231 if (env->subprog_info[i].linfo_idx > l_off) {
22232 /* program may have started in the removed region but
22233 * may not be fully removed
22234 */
22235 if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
22236 env->subprog_info[i].linfo_idx -= l_cnt;
22237 else
22238 env->subprog_info[i].linfo_idx = l_off;
22239 }
22240
22241 return 0;
22242 }
22243
22244 /*
22245 * Clean up dynamically allocated fields of aux data for instructions [start, ...]
22246 */
clear_insn_aux_data(struct bpf_verifier_env * env,int start,int len)22247 static void clear_insn_aux_data(struct bpf_verifier_env *env, int start, int len)
22248 {
22249 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
22250 struct bpf_insn *insns = env->prog->insnsi;
22251 int end = start + len;
22252 int i;
22253
22254 for (i = start; i < end; i++) {
22255 if (aux_data[i].jt) {
22256 kvfree(aux_data[i].jt);
22257 aux_data[i].jt = NULL;
22258 }
22259
22260 if (bpf_is_ldimm64(&insns[i]))
22261 i++;
22262 }
22263 }
22264
verifier_remove_insns(struct bpf_verifier_env * env,u32 off,u32 cnt)22265 static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
22266 {
22267 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
22268 unsigned int orig_prog_len = env->prog->len;
22269 int err;
22270
22271 if (bpf_prog_is_offloaded(env->prog->aux))
22272 bpf_prog_offload_remove_insns(env, off, cnt);
22273
22274 /* Should be called before bpf_remove_insns, as it uses prog->insnsi */
22275 clear_insn_aux_data(env, off, cnt);
22276
22277 err = bpf_remove_insns(env->prog, off, cnt);
22278 if (err)
22279 return err;
22280
22281 err = adjust_subprog_starts_after_remove(env, off, cnt);
22282 if (err)
22283 return err;
22284
22285 err = bpf_adj_linfo_after_remove(env, off, cnt);
22286 if (err)
22287 return err;
22288
22289 adjust_insn_arrays_after_remove(env, off, cnt);
22290
22291 memmove(aux_data + off, aux_data + off + cnt,
22292 sizeof(*aux_data) * (orig_prog_len - off - cnt));
22293
22294 return 0;
22295 }
22296
22297 /* The verifier does more data flow analysis than llvm and will not
22298 * explore branches that are dead at run time. Malicious programs can
22299 * have dead code too. Therefore replace all dead at-run-time code
22300 * with 'ja -1'.
22301 *
22302 * Just nops are not optimal, e.g. if they would sit at the end of the
22303 * program and through another bug we would manage to jump there, then
22304 * we'd execute beyond program memory otherwise. Returning exception
22305 * code also wouldn't work since we can have subprogs where the dead
22306 * code could be located.
22307 */
sanitize_dead_code(struct bpf_verifier_env * env)22308 static void sanitize_dead_code(struct bpf_verifier_env *env)
22309 {
22310 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
22311 struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
22312 struct bpf_insn *insn = env->prog->insnsi;
22313 const int insn_cnt = env->prog->len;
22314 int i;
22315
22316 for (i = 0; i < insn_cnt; i++) {
22317 if (aux_data[i].seen)
22318 continue;
22319 memcpy(insn + i, &trap, sizeof(trap));
22320 aux_data[i].zext_dst = false;
22321 }
22322 }
22323
insn_is_cond_jump(u8 code)22324 static bool insn_is_cond_jump(u8 code)
22325 {
22326 u8 op;
22327
22328 op = BPF_OP(code);
22329 if (BPF_CLASS(code) == BPF_JMP32)
22330 return op != BPF_JA;
22331
22332 if (BPF_CLASS(code) != BPF_JMP)
22333 return false;
22334
22335 return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
22336 }
22337
opt_hard_wire_dead_code_branches(struct bpf_verifier_env * env)22338 static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
22339 {
22340 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
22341 struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
22342 struct bpf_insn *insn = env->prog->insnsi;
22343 const int insn_cnt = env->prog->len;
22344 int i;
22345
22346 for (i = 0; i < insn_cnt; i++, insn++) {
22347 if (!insn_is_cond_jump(insn->code))
22348 continue;
22349
22350 if (!aux_data[i + 1].seen)
22351 ja.off = insn->off;
22352 else if (!aux_data[i + 1 + insn->off].seen)
22353 ja.off = 0;
22354 else
22355 continue;
22356
22357 if (bpf_prog_is_offloaded(env->prog->aux))
22358 bpf_prog_offload_replace_insn(env, i, &ja);
22359
22360 memcpy(insn, &ja, sizeof(ja));
22361 }
22362 }
22363
opt_remove_dead_code(struct bpf_verifier_env * env)22364 static int opt_remove_dead_code(struct bpf_verifier_env *env)
22365 {
22366 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
22367 int insn_cnt = env->prog->len;
22368 int i, err;
22369
22370 for (i = 0; i < insn_cnt; i++) {
22371 int j;
22372
22373 j = 0;
22374 while (i + j < insn_cnt && !aux_data[i + j].seen)
22375 j++;
22376 if (!j)
22377 continue;
22378
22379 err = verifier_remove_insns(env, i, j);
22380 if (err)
22381 return err;
22382 insn_cnt = env->prog->len;
22383 }
22384
22385 return 0;
22386 }
22387
22388 static const struct bpf_insn NOP = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
22389 static const struct bpf_insn MAY_GOTO_0 = BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 0, 0);
22390
opt_remove_nops(struct bpf_verifier_env * env)22391 static int opt_remove_nops(struct bpf_verifier_env *env)
22392 {
22393 struct bpf_insn *insn = env->prog->insnsi;
22394 int insn_cnt = env->prog->len;
22395 bool is_may_goto_0, is_ja;
22396 int i, err;
22397
22398 for (i = 0; i < insn_cnt; i++) {
22399 is_may_goto_0 = !memcmp(&insn[i], &MAY_GOTO_0, sizeof(MAY_GOTO_0));
22400 is_ja = !memcmp(&insn[i], &NOP, sizeof(NOP));
22401
22402 if (!is_may_goto_0 && !is_ja)
22403 continue;
22404
22405 err = verifier_remove_insns(env, i, 1);
22406 if (err)
22407 return err;
22408 insn_cnt--;
22409 /* Go back one insn to catch may_goto +1; may_goto +0 sequence */
22410 i -= (is_may_goto_0 && i > 0) ? 2 : 1;
22411 }
22412
22413 return 0;
22414 }
22415
opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env * env,const union bpf_attr * attr)22416 static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
22417 const union bpf_attr *attr)
22418 {
22419 struct bpf_insn *patch;
22420 /* use env->insn_buf as two independent buffers */
22421 struct bpf_insn *zext_patch = env->insn_buf;
22422 struct bpf_insn *rnd_hi32_patch = &env->insn_buf[2];
22423 struct bpf_insn_aux_data *aux = env->insn_aux_data;
22424 int i, patch_len, delta = 0, len = env->prog->len;
22425 struct bpf_insn *insns = env->prog->insnsi;
22426 struct bpf_prog *new_prog;
22427 bool rnd_hi32;
22428
22429 rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
22430 zext_patch[1] = BPF_ZEXT_REG(0);
22431 rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0);
22432 rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
22433 rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX);
22434 for (i = 0; i < len; i++) {
22435 int adj_idx = i + delta;
22436 struct bpf_insn insn;
22437 int load_reg;
22438
22439 insn = insns[adj_idx];
22440 load_reg = insn_def_regno(&insn);
22441 if (!aux[adj_idx].zext_dst) {
22442 u8 code, class;
22443 u32 imm_rnd;
22444
22445 if (!rnd_hi32)
22446 continue;
22447
22448 code = insn.code;
22449 class = BPF_CLASS(code);
22450 if (load_reg == -1)
22451 continue;
22452
22453 /* NOTE: arg "reg" (the fourth one) is only used for
22454 * BPF_STX + SRC_OP, so it is safe to pass NULL
22455 * here.
22456 */
22457 if (is_reg64(&insn, load_reg, NULL, DST_OP)) {
22458 if (class == BPF_LD &&
22459 BPF_MODE(code) == BPF_IMM)
22460 i++;
22461 continue;
22462 }
22463
22464 /* ctx load could be transformed into wider load. */
22465 if (class == BPF_LDX &&
22466 aux[adj_idx].ptr_type == PTR_TO_CTX)
22467 continue;
22468
22469 imm_rnd = get_random_u32();
22470 rnd_hi32_patch[0] = insn;
22471 rnd_hi32_patch[1].imm = imm_rnd;
22472 rnd_hi32_patch[3].dst_reg = load_reg;
22473 patch = rnd_hi32_patch;
22474 patch_len = 4;
22475 goto apply_patch_buffer;
22476 }
22477
22478 /* Add in an zero-extend instruction if a) the JIT has requested
22479 * it or b) it's a CMPXCHG.
22480 *
22481 * The latter is because: BPF_CMPXCHG always loads a value into
22482 * R0, therefore always zero-extends. However some archs'
22483 * equivalent instruction only does this load when the
22484 * comparison is successful. This detail of CMPXCHG is
22485 * orthogonal to the general zero-extension behaviour of the
22486 * CPU, so it's treated independently of bpf_jit_needs_zext.
22487 */
22488 if (!bpf_jit_needs_zext() && !is_cmpxchg_insn(&insn))
22489 continue;
22490
22491 /* Zero-extension is done by the caller. */
22492 if (bpf_pseudo_kfunc_call(&insn))
22493 continue;
22494
22495 if (verifier_bug_if(load_reg == -1, env,
22496 "zext_dst is set, but no reg is defined"))
22497 return -EFAULT;
22498
22499 zext_patch[0] = insn;
22500 zext_patch[1].dst_reg = load_reg;
22501 zext_patch[1].src_reg = load_reg;
22502 patch = zext_patch;
22503 patch_len = 2;
22504 apply_patch_buffer:
22505 new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len);
22506 if (!new_prog)
22507 return -ENOMEM;
22508 env->prog = new_prog;
22509 insns = new_prog->insnsi;
22510 aux = env->insn_aux_data;
22511 delta += patch_len - 1;
22512 }
22513
22514 return 0;
22515 }
22516
22517 /* convert load instructions that access fields of a context type into a
22518 * sequence of instructions that access fields of the underlying structure:
22519 * struct __sk_buff -> struct sk_buff
22520 * struct bpf_sock_ops -> struct sock
22521 */
convert_ctx_accesses(struct bpf_verifier_env * env)22522 static int convert_ctx_accesses(struct bpf_verifier_env *env)
22523 {
22524 struct bpf_subprog_info *subprogs = env->subprog_info;
22525 const struct bpf_verifier_ops *ops = env->ops;
22526 int i, cnt, size, ctx_field_size, ret, delta = 0, epilogue_cnt = 0;
22527 const int insn_cnt = env->prog->len;
22528 struct bpf_insn *epilogue_buf = env->epilogue_buf;
22529 struct bpf_insn *insn_buf = env->insn_buf;
22530 struct bpf_insn *insn;
22531 u32 target_size, size_default, off;
22532 struct bpf_prog *new_prog;
22533 enum bpf_access_type type;
22534 bool is_narrower_load;
22535 int epilogue_idx = 0;
22536
22537 if (ops->gen_epilogue) {
22538 epilogue_cnt = ops->gen_epilogue(epilogue_buf, env->prog,
22539 -(subprogs[0].stack_depth + 8));
22540 if (epilogue_cnt >= INSN_BUF_SIZE) {
22541 verifier_bug(env, "epilogue is too long");
22542 return -EFAULT;
22543 } else if (epilogue_cnt) {
22544 /* Save the ARG_PTR_TO_CTX for the epilogue to use */
22545 cnt = 0;
22546 subprogs[0].stack_depth += 8;
22547 insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_1,
22548 -subprogs[0].stack_depth);
22549 insn_buf[cnt++] = env->prog->insnsi[0];
22550 new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
22551 if (!new_prog)
22552 return -ENOMEM;
22553 env->prog = new_prog;
22554 delta += cnt - 1;
22555
22556 ret = add_kfunc_in_insns(env, epilogue_buf, epilogue_cnt - 1);
22557 if (ret < 0)
22558 return ret;
22559 }
22560 }
22561
22562 if (ops->gen_prologue || env->seen_direct_write) {
22563 if (!ops->gen_prologue) {
22564 verifier_bug(env, "gen_prologue is null");
22565 return -EFAULT;
22566 }
22567 cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
22568 env->prog);
22569 if (cnt >= INSN_BUF_SIZE) {
22570 verifier_bug(env, "prologue is too long");
22571 return -EFAULT;
22572 } else if (cnt) {
22573 new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
22574 if (!new_prog)
22575 return -ENOMEM;
22576
22577 env->prog = new_prog;
22578 delta += cnt - 1;
22579
22580 ret = add_kfunc_in_insns(env, insn_buf, cnt - 1);
22581 if (ret < 0)
22582 return ret;
22583 }
22584 }
22585
22586 if (delta)
22587 WARN_ON(adjust_jmp_off(env->prog, 0, delta));
22588
22589 if (bpf_prog_is_offloaded(env->prog->aux))
22590 return 0;
22591
22592 insn = env->prog->insnsi + delta;
22593
22594 for (i = 0; i < insn_cnt; i++, insn++) {
22595 bpf_convert_ctx_access_t convert_ctx_access;
22596 u8 mode;
22597
22598 if (env->insn_aux_data[i + delta].nospec) {
22599 WARN_ON_ONCE(env->insn_aux_data[i + delta].alu_state);
22600 struct bpf_insn *patch = insn_buf;
22601
22602 *patch++ = BPF_ST_NOSPEC();
22603 *patch++ = *insn;
22604 cnt = patch - insn_buf;
22605 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
22606 if (!new_prog)
22607 return -ENOMEM;
22608
22609 delta += cnt - 1;
22610 env->prog = new_prog;
22611 insn = new_prog->insnsi + i + delta;
22612 /* This can not be easily merged with the
22613 * nospec_result-case, because an insn may require a
22614 * nospec before and after itself. Therefore also do not
22615 * 'continue' here but potentially apply further
22616 * patching to insn. *insn should equal patch[1] now.
22617 */
22618 }
22619
22620 if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
22621 insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
22622 insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
22623 insn->code == (BPF_LDX | BPF_MEM | BPF_DW) ||
22624 insn->code == (BPF_LDX | BPF_MEMSX | BPF_B) ||
22625 insn->code == (BPF_LDX | BPF_MEMSX | BPF_H) ||
22626 insn->code == (BPF_LDX | BPF_MEMSX | BPF_W)) {
22627 type = BPF_READ;
22628 } else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
22629 insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
22630 insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
22631 insn->code == (BPF_STX | BPF_MEM | BPF_DW) ||
22632 insn->code == (BPF_ST | BPF_MEM | BPF_B) ||
22633 insn->code == (BPF_ST | BPF_MEM | BPF_H) ||
22634 insn->code == (BPF_ST | BPF_MEM | BPF_W) ||
22635 insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
22636 type = BPF_WRITE;
22637 } else if ((insn->code == (BPF_STX | BPF_ATOMIC | BPF_B) ||
22638 insn->code == (BPF_STX | BPF_ATOMIC | BPF_H) ||
22639 insn->code == (BPF_STX | BPF_ATOMIC | BPF_W) ||
22640 insn->code == (BPF_STX | BPF_ATOMIC | BPF_DW)) &&
22641 env->insn_aux_data[i + delta].ptr_type == PTR_TO_ARENA) {
22642 insn->code = BPF_STX | BPF_PROBE_ATOMIC | BPF_SIZE(insn->code);
22643 env->prog->aux->num_exentries++;
22644 continue;
22645 } else if (insn->code == (BPF_JMP | BPF_EXIT) &&
22646 epilogue_cnt &&
22647 i + delta < subprogs[1].start) {
22648 /* Generate epilogue for the main prog */
22649 if (epilogue_idx) {
22650 /* jump back to the earlier generated epilogue */
22651 insn_buf[0] = BPF_JMP32_A(epilogue_idx - i - delta - 1);
22652 cnt = 1;
22653 } else {
22654 memcpy(insn_buf, epilogue_buf,
22655 epilogue_cnt * sizeof(*epilogue_buf));
22656 cnt = epilogue_cnt;
22657 /* epilogue_idx cannot be 0. It must have at
22658 * least one ctx ptr saving insn before the
22659 * epilogue.
22660 */
22661 epilogue_idx = i + delta;
22662 }
22663 goto patch_insn_buf;
22664 } else {
22665 continue;
22666 }
22667
22668 if (type == BPF_WRITE &&
22669 env->insn_aux_data[i + delta].nospec_result) {
22670 /* nospec_result is only used to mitigate Spectre v4 and
22671 * to limit verification-time for Spectre v1.
22672 */
22673 struct bpf_insn *patch = insn_buf;
22674
22675 *patch++ = *insn;
22676 *patch++ = BPF_ST_NOSPEC();
22677 cnt = patch - insn_buf;
22678 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
22679 if (!new_prog)
22680 return -ENOMEM;
22681
22682 delta += cnt - 1;
22683 env->prog = new_prog;
22684 insn = new_prog->insnsi + i + delta;
22685 continue;
22686 }
22687
22688 switch ((int)env->insn_aux_data[i + delta].ptr_type) {
22689 case PTR_TO_CTX:
22690 if (!ops->convert_ctx_access)
22691 continue;
22692 convert_ctx_access = ops->convert_ctx_access;
22693 break;
22694 case PTR_TO_SOCKET:
22695 case PTR_TO_SOCK_COMMON:
22696 convert_ctx_access = bpf_sock_convert_ctx_access;
22697 break;
22698 case PTR_TO_TCP_SOCK:
22699 convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
22700 break;
22701 case PTR_TO_XDP_SOCK:
22702 convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
22703 break;
22704 case PTR_TO_BTF_ID:
22705 case PTR_TO_BTF_ID | PTR_UNTRUSTED:
22706 /* PTR_TO_BTF_ID | MEM_ALLOC always has a valid lifetime, unlike
22707 * PTR_TO_BTF_ID, and an active ref_obj_id, but the same cannot
22708 * be said once it is marked PTR_UNTRUSTED, hence we must handle
22709 * any faults for loads into such types. BPF_WRITE is disallowed
22710 * for this case.
22711 */
22712 case PTR_TO_BTF_ID | MEM_ALLOC | PTR_UNTRUSTED:
22713 case PTR_TO_MEM | MEM_RDONLY | PTR_UNTRUSTED:
22714 if (type == BPF_READ) {
22715 if (BPF_MODE(insn->code) == BPF_MEM)
22716 insn->code = BPF_LDX | BPF_PROBE_MEM |
22717 BPF_SIZE((insn)->code);
22718 else
22719 insn->code = BPF_LDX | BPF_PROBE_MEMSX |
22720 BPF_SIZE((insn)->code);
22721 env->prog->aux->num_exentries++;
22722 }
22723 continue;
22724 case PTR_TO_ARENA:
22725 if (BPF_MODE(insn->code) == BPF_MEMSX) {
22726 if (!bpf_jit_supports_insn(insn, true)) {
22727 verbose(env, "sign extending loads from arena are not supported yet\n");
22728 return -EOPNOTSUPP;
22729 }
22730 insn->code = BPF_CLASS(insn->code) | BPF_PROBE_MEM32SX | BPF_SIZE(insn->code);
22731 } else {
22732 insn->code = BPF_CLASS(insn->code) | BPF_PROBE_MEM32 | BPF_SIZE(insn->code);
22733 }
22734 env->prog->aux->num_exentries++;
22735 continue;
22736 default:
22737 continue;
22738 }
22739
22740 ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
22741 size = BPF_LDST_BYTES(insn);
22742 mode = BPF_MODE(insn->code);
22743
22744 /* If the read access is a narrower load of the field,
22745 * convert to a 4/8-byte load, to minimum program type specific
22746 * convert_ctx_access changes. If conversion is successful,
22747 * we will apply proper mask to the result.
22748 */
22749 is_narrower_load = size < ctx_field_size;
22750 size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
22751 off = insn->off;
22752 if (is_narrower_load) {
22753 u8 size_code;
22754
22755 if (type == BPF_WRITE) {
22756 verifier_bug(env, "narrow ctx access misconfigured");
22757 return -EFAULT;
22758 }
22759
22760 size_code = BPF_H;
22761 if (ctx_field_size == 4)
22762 size_code = BPF_W;
22763 else if (ctx_field_size == 8)
22764 size_code = BPF_DW;
22765
22766 insn->off = off & ~(size_default - 1);
22767 insn->code = BPF_LDX | BPF_MEM | size_code;
22768 }
22769
22770 target_size = 0;
22771 cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
22772 &target_size);
22773 if (cnt == 0 || cnt >= INSN_BUF_SIZE ||
22774 (ctx_field_size && !target_size)) {
22775 verifier_bug(env, "error during ctx access conversion (%d)", cnt);
22776 return -EFAULT;
22777 }
22778
22779 if (is_narrower_load && size < target_size) {
22780 u8 shift = bpf_ctx_narrow_access_offset(
22781 off, size, size_default) * 8;
22782 if (shift && cnt + 1 >= INSN_BUF_SIZE) {
22783 verifier_bug(env, "narrow ctx load misconfigured");
22784 return -EFAULT;
22785 }
22786 if (ctx_field_size <= 4) {
22787 if (shift)
22788 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
22789 insn->dst_reg,
22790 shift);
22791 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
22792 (1 << size * 8) - 1);
22793 } else {
22794 if (shift)
22795 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
22796 insn->dst_reg,
22797 shift);
22798 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
22799 (1ULL << size * 8) - 1);
22800 }
22801 }
22802 if (mode == BPF_MEMSX)
22803 insn_buf[cnt++] = BPF_RAW_INSN(BPF_ALU64 | BPF_MOV | BPF_X,
22804 insn->dst_reg, insn->dst_reg,
22805 size * 8, 0);
22806
22807 patch_insn_buf:
22808 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
22809 if (!new_prog)
22810 return -ENOMEM;
22811
22812 delta += cnt - 1;
22813
22814 /* keep walking new program and skip insns we just inserted */
22815 env->prog = new_prog;
22816 insn = new_prog->insnsi + i + delta;
22817 }
22818
22819 return 0;
22820 }
22821
jit_subprogs(struct bpf_verifier_env * env)22822 static int jit_subprogs(struct bpf_verifier_env *env)
22823 {
22824 struct bpf_prog *prog = env->prog, **func, *tmp;
22825 int i, j, subprog_start, subprog_end = 0, len, subprog;
22826 struct bpf_map *map_ptr;
22827 struct bpf_insn *insn;
22828 void *old_bpf_func;
22829 int err, num_exentries;
22830 int old_len, subprog_start_adjustment = 0;
22831
22832 if (env->subprog_cnt <= 1)
22833 return 0;
22834
22835 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
22836 if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn))
22837 continue;
22838
22839 /* Upon error here we cannot fall back to interpreter but
22840 * need a hard reject of the program. Thus -EFAULT is
22841 * propagated in any case.
22842 */
22843 subprog = find_subprog(env, i + insn->imm + 1);
22844 if (verifier_bug_if(subprog < 0, env, "No program to jit at insn %d",
22845 i + insn->imm + 1))
22846 return -EFAULT;
22847 /* temporarily remember subprog id inside insn instead of
22848 * aux_data, since next loop will split up all insns into funcs
22849 */
22850 insn->off = subprog;
22851 /* remember original imm in case JIT fails and fallback
22852 * to interpreter will be needed
22853 */
22854 env->insn_aux_data[i].call_imm = insn->imm;
22855 /* point imm to __bpf_call_base+1 from JITs point of view */
22856 insn->imm = 1;
22857 if (bpf_pseudo_func(insn)) {
22858 #if defined(MODULES_VADDR)
22859 u64 addr = MODULES_VADDR;
22860 #else
22861 u64 addr = VMALLOC_START;
22862 #endif
22863 /* jit (e.g. x86_64) may emit fewer instructions
22864 * if it learns a u32 imm is the same as a u64 imm.
22865 * Set close enough to possible prog address.
22866 */
22867 insn[0].imm = (u32)addr;
22868 insn[1].imm = addr >> 32;
22869 }
22870 }
22871
22872 err = bpf_prog_alloc_jited_linfo(prog);
22873 if (err)
22874 goto out_undo_insn;
22875
22876 err = -ENOMEM;
22877 func = kzalloc_objs(prog, env->subprog_cnt);
22878 if (!func)
22879 goto out_undo_insn;
22880
22881 for (i = 0; i < env->subprog_cnt; i++) {
22882 subprog_start = subprog_end;
22883 subprog_end = env->subprog_info[i + 1].start;
22884
22885 len = subprog_end - subprog_start;
22886 /* bpf_prog_run() doesn't call subprogs directly,
22887 * hence main prog stats include the runtime of subprogs.
22888 * subprogs don't have IDs and not reachable via prog_get_next_id
22889 * func[i]->stats will never be accessed and stays NULL
22890 */
22891 func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
22892 if (!func[i])
22893 goto out_free;
22894 memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
22895 len * sizeof(struct bpf_insn));
22896 func[i]->type = prog->type;
22897 func[i]->len = len;
22898 if (bpf_prog_calc_tag(func[i]))
22899 goto out_free;
22900 func[i]->is_func = 1;
22901 func[i]->sleepable = prog->sleepable;
22902 func[i]->aux->func_idx = i;
22903 /* Below members will be freed only at prog->aux */
22904 func[i]->aux->btf = prog->aux->btf;
22905 func[i]->aux->subprog_start = subprog_start + subprog_start_adjustment;
22906 func[i]->aux->func_info = prog->aux->func_info;
22907 func[i]->aux->func_info_cnt = prog->aux->func_info_cnt;
22908 func[i]->aux->poke_tab = prog->aux->poke_tab;
22909 func[i]->aux->size_poke_tab = prog->aux->size_poke_tab;
22910 func[i]->aux->main_prog_aux = prog->aux;
22911
22912 for (j = 0; j < prog->aux->size_poke_tab; j++) {
22913 struct bpf_jit_poke_descriptor *poke;
22914
22915 poke = &prog->aux->poke_tab[j];
22916 if (poke->insn_idx < subprog_end &&
22917 poke->insn_idx >= subprog_start)
22918 poke->aux = func[i]->aux;
22919 }
22920
22921 func[i]->aux->name[0] = 'F';
22922 func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
22923 if (env->subprog_info[i].priv_stack_mode == PRIV_STACK_ADAPTIVE)
22924 func[i]->aux->jits_use_priv_stack = true;
22925
22926 func[i]->jit_requested = 1;
22927 func[i]->blinding_requested = prog->blinding_requested;
22928 func[i]->aux->kfunc_tab = prog->aux->kfunc_tab;
22929 func[i]->aux->kfunc_btf_tab = prog->aux->kfunc_btf_tab;
22930 func[i]->aux->linfo = prog->aux->linfo;
22931 func[i]->aux->nr_linfo = prog->aux->nr_linfo;
22932 func[i]->aux->jited_linfo = prog->aux->jited_linfo;
22933 func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
22934 func[i]->aux->arena = prog->aux->arena;
22935 func[i]->aux->used_maps = env->used_maps;
22936 func[i]->aux->used_map_cnt = env->used_map_cnt;
22937 num_exentries = 0;
22938 insn = func[i]->insnsi;
22939 for (j = 0; j < func[i]->len; j++, insn++) {
22940 if (BPF_CLASS(insn->code) == BPF_LDX &&
22941 (BPF_MODE(insn->code) == BPF_PROBE_MEM ||
22942 BPF_MODE(insn->code) == BPF_PROBE_MEM32 ||
22943 BPF_MODE(insn->code) == BPF_PROBE_MEM32SX ||
22944 BPF_MODE(insn->code) == BPF_PROBE_MEMSX))
22945 num_exentries++;
22946 if ((BPF_CLASS(insn->code) == BPF_STX ||
22947 BPF_CLASS(insn->code) == BPF_ST) &&
22948 BPF_MODE(insn->code) == BPF_PROBE_MEM32)
22949 num_exentries++;
22950 if (BPF_CLASS(insn->code) == BPF_STX &&
22951 BPF_MODE(insn->code) == BPF_PROBE_ATOMIC)
22952 num_exentries++;
22953 }
22954 func[i]->aux->num_exentries = num_exentries;
22955 func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
22956 func[i]->aux->exception_cb = env->subprog_info[i].is_exception_cb;
22957 func[i]->aux->changes_pkt_data = env->subprog_info[i].changes_pkt_data;
22958 func[i]->aux->might_sleep = env->subprog_info[i].might_sleep;
22959 if (!i)
22960 func[i]->aux->exception_boundary = env->seen_exception;
22961
22962 /*
22963 * To properly pass the absolute subprog start to jit
22964 * all instruction adjustments should be accumulated
22965 */
22966 old_len = func[i]->len;
22967 func[i] = bpf_int_jit_compile(func[i]);
22968 subprog_start_adjustment += func[i]->len - old_len;
22969
22970 if (!func[i]->jited) {
22971 err = -ENOTSUPP;
22972 goto out_free;
22973 }
22974 cond_resched();
22975 }
22976
22977 /* at this point all bpf functions were successfully JITed
22978 * now populate all bpf_calls with correct addresses and
22979 * run last pass of JIT
22980 */
22981 for (i = 0; i < env->subprog_cnt; i++) {
22982 insn = func[i]->insnsi;
22983 for (j = 0; j < func[i]->len; j++, insn++) {
22984 if (bpf_pseudo_func(insn)) {
22985 subprog = insn->off;
22986 insn[0].imm = (u32)(long)func[subprog]->bpf_func;
22987 insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32;
22988 continue;
22989 }
22990 if (!bpf_pseudo_call(insn))
22991 continue;
22992 subprog = insn->off;
22993 insn->imm = BPF_CALL_IMM(func[subprog]->bpf_func);
22994 }
22995
22996 /* we use the aux data to keep a list of the start addresses
22997 * of the JITed images for each function in the program
22998 *
22999 * for some architectures, such as powerpc64, the imm field
23000 * might not be large enough to hold the offset of the start
23001 * address of the callee's JITed image from __bpf_call_base
23002 *
23003 * in such cases, we can lookup the start address of a callee
23004 * by using its subprog id, available from the off field of
23005 * the call instruction, as an index for this list
23006 */
23007 func[i]->aux->func = func;
23008 func[i]->aux->func_cnt = env->subprog_cnt - env->hidden_subprog_cnt;
23009 func[i]->aux->real_func_cnt = env->subprog_cnt;
23010 }
23011 for (i = 0; i < env->subprog_cnt; i++) {
23012 old_bpf_func = func[i]->bpf_func;
23013 tmp = bpf_int_jit_compile(func[i]);
23014 if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
23015 verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
23016 err = -ENOTSUPP;
23017 goto out_free;
23018 }
23019 cond_resched();
23020 }
23021
23022 /*
23023 * Cleanup func[i]->aux fields which aren't required
23024 * or can become invalid in future
23025 */
23026 for (i = 0; i < env->subprog_cnt; i++) {
23027 func[i]->aux->used_maps = NULL;
23028 func[i]->aux->used_map_cnt = 0;
23029 }
23030
23031 /* finally lock prog and jit images for all functions and
23032 * populate kallsysm. Begin at the first subprogram, since
23033 * bpf_prog_load will add the kallsyms for the main program.
23034 */
23035 for (i = 1; i < env->subprog_cnt; i++) {
23036 err = bpf_prog_lock_ro(func[i]);
23037 if (err)
23038 goto out_free;
23039 }
23040
23041 for (i = 1; i < env->subprog_cnt; i++)
23042 bpf_prog_kallsyms_add(func[i]);
23043
23044 /* Last step: make now unused interpreter insns from main
23045 * prog consistent for later dump requests, so they can
23046 * later look the same as if they were interpreted only.
23047 */
23048 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
23049 if (bpf_pseudo_func(insn)) {
23050 insn[0].imm = env->insn_aux_data[i].call_imm;
23051 insn[1].imm = insn->off;
23052 insn->off = 0;
23053 continue;
23054 }
23055 if (!bpf_pseudo_call(insn))
23056 continue;
23057 insn->off = env->insn_aux_data[i].call_imm;
23058 subprog = find_subprog(env, i + insn->off + 1);
23059 insn->imm = subprog;
23060 }
23061
23062 prog->jited = 1;
23063 prog->bpf_func = func[0]->bpf_func;
23064 prog->jited_len = func[0]->jited_len;
23065 prog->aux->extable = func[0]->aux->extable;
23066 prog->aux->num_exentries = func[0]->aux->num_exentries;
23067 prog->aux->func = func;
23068 prog->aux->func_cnt = env->subprog_cnt - env->hidden_subprog_cnt;
23069 prog->aux->real_func_cnt = env->subprog_cnt;
23070 prog->aux->bpf_exception_cb = (void *)func[env->exception_callback_subprog]->bpf_func;
23071 prog->aux->exception_boundary = func[0]->aux->exception_boundary;
23072 bpf_prog_jit_attempt_done(prog);
23073 return 0;
23074 out_free:
23075 /* We failed JIT'ing, so at this point we need to unregister poke
23076 * descriptors from subprogs, so that kernel is not attempting to
23077 * patch it anymore as we're freeing the subprog JIT memory.
23078 */
23079 for (i = 0; i < prog->aux->size_poke_tab; i++) {
23080 map_ptr = prog->aux->poke_tab[i].tail_call.map;
23081 map_ptr->ops->map_poke_untrack(map_ptr, prog->aux);
23082 }
23083 /* At this point we're guaranteed that poke descriptors are not
23084 * live anymore. We can just unlink its descriptor table as it's
23085 * released with the main prog.
23086 */
23087 for (i = 0; i < env->subprog_cnt; i++) {
23088 if (!func[i])
23089 continue;
23090 func[i]->aux->poke_tab = NULL;
23091 bpf_jit_free(func[i]);
23092 }
23093 kfree(func);
23094 out_undo_insn:
23095 /* cleanup main prog to be interpreted */
23096 prog->jit_requested = 0;
23097 prog->blinding_requested = 0;
23098 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
23099 if (!bpf_pseudo_call(insn))
23100 continue;
23101 insn->off = 0;
23102 insn->imm = env->insn_aux_data[i].call_imm;
23103 }
23104 bpf_prog_jit_attempt_done(prog);
23105 return err;
23106 }
23107
fixup_call_args(struct bpf_verifier_env * env)23108 static int fixup_call_args(struct bpf_verifier_env *env)
23109 {
23110 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
23111 struct bpf_prog *prog = env->prog;
23112 struct bpf_insn *insn = prog->insnsi;
23113 bool has_kfunc_call = bpf_prog_has_kfunc_call(prog);
23114 int i, depth;
23115 #endif
23116 int err = 0;
23117
23118 if (env->prog->jit_requested &&
23119 !bpf_prog_is_offloaded(env->prog->aux)) {
23120 err = jit_subprogs(env);
23121 if (err == 0)
23122 return 0;
23123 if (err == -EFAULT)
23124 return err;
23125 }
23126 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
23127 if (has_kfunc_call) {
23128 verbose(env, "calling kernel functions are not allowed in non-JITed programs\n");
23129 return -EINVAL;
23130 }
23131 if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) {
23132 /* When JIT fails the progs with bpf2bpf calls and tail_calls
23133 * have to be rejected, since interpreter doesn't support them yet.
23134 */
23135 verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
23136 return -EINVAL;
23137 }
23138 for (i = 0; i < prog->len; i++, insn++) {
23139 if (bpf_pseudo_func(insn)) {
23140 /* When JIT fails the progs with callback calls
23141 * have to be rejected, since interpreter doesn't support them yet.
23142 */
23143 verbose(env, "callbacks are not allowed in non-JITed programs\n");
23144 return -EINVAL;
23145 }
23146
23147 if (!bpf_pseudo_call(insn))
23148 continue;
23149 depth = get_callee_stack_depth(env, insn, i);
23150 if (depth < 0)
23151 return depth;
23152 bpf_patch_call_args(insn, depth);
23153 }
23154 err = 0;
23155 #endif
23156 return err;
23157 }
23158
23159 /* replace a generic kfunc with a specialized version if necessary */
specialize_kfunc(struct bpf_verifier_env * env,struct bpf_kfunc_desc * desc,int insn_idx)23160 static int specialize_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc, int insn_idx)
23161 {
23162 struct bpf_prog *prog = env->prog;
23163 bool seen_direct_write;
23164 void *xdp_kfunc;
23165 bool is_rdonly;
23166 u32 func_id = desc->func_id;
23167 u16 offset = desc->offset;
23168 unsigned long addr = desc->addr;
23169
23170 if (offset) /* return if module BTF is used */
23171 return 0;
23172
23173 if (bpf_dev_bound_kfunc_id(func_id)) {
23174 xdp_kfunc = bpf_dev_bound_resolve_kfunc(prog, func_id);
23175 if (xdp_kfunc)
23176 addr = (unsigned long)xdp_kfunc;
23177 /* fallback to default kfunc when not supported by netdev */
23178 } else if (func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
23179 seen_direct_write = env->seen_direct_write;
23180 is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE);
23181
23182 if (is_rdonly)
23183 addr = (unsigned long)bpf_dynptr_from_skb_rdonly;
23184
23185 /* restore env->seen_direct_write to its original value, since
23186 * may_access_direct_pkt_data mutates it
23187 */
23188 env->seen_direct_write = seen_direct_write;
23189 } else if (func_id == special_kfunc_list[KF_bpf_set_dentry_xattr]) {
23190 if (bpf_lsm_has_d_inode_locked(prog))
23191 addr = (unsigned long)bpf_set_dentry_xattr_locked;
23192 } else if (func_id == special_kfunc_list[KF_bpf_remove_dentry_xattr]) {
23193 if (bpf_lsm_has_d_inode_locked(prog))
23194 addr = (unsigned long)bpf_remove_dentry_xattr_locked;
23195 } else if (func_id == special_kfunc_list[KF_bpf_dynptr_from_file]) {
23196 if (!env->insn_aux_data[insn_idx].non_sleepable)
23197 addr = (unsigned long)bpf_dynptr_from_file_sleepable;
23198 } else if (func_id == special_kfunc_list[KF_bpf_arena_alloc_pages]) {
23199 if (env->insn_aux_data[insn_idx].non_sleepable)
23200 addr = (unsigned long)bpf_arena_alloc_pages_non_sleepable;
23201 } else if (func_id == special_kfunc_list[KF_bpf_arena_free_pages]) {
23202 if (env->insn_aux_data[insn_idx].non_sleepable)
23203 addr = (unsigned long)bpf_arena_free_pages_non_sleepable;
23204 }
23205 desc->addr = addr;
23206 return 0;
23207 }
23208
__fixup_collection_insert_kfunc(struct bpf_insn_aux_data * insn_aux,u16 struct_meta_reg,u16 node_offset_reg,struct bpf_insn * insn,struct bpf_insn * insn_buf,int * cnt)23209 static void __fixup_collection_insert_kfunc(struct bpf_insn_aux_data *insn_aux,
23210 u16 struct_meta_reg,
23211 u16 node_offset_reg,
23212 struct bpf_insn *insn,
23213 struct bpf_insn *insn_buf,
23214 int *cnt)
23215 {
23216 struct btf_struct_meta *kptr_struct_meta = insn_aux->kptr_struct_meta;
23217 struct bpf_insn addr[2] = { BPF_LD_IMM64(struct_meta_reg, (long)kptr_struct_meta) };
23218
23219 insn_buf[0] = addr[0];
23220 insn_buf[1] = addr[1];
23221 insn_buf[2] = BPF_MOV64_IMM(node_offset_reg, insn_aux->insert_off);
23222 insn_buf[3] = *insn;
23223 *cnt = 4;
23224 }
23225
fixup_kfunc_call(struct bpf_verifier_env * env,struct bpf_insn * insn,struct bpf_insn * insn_buf,int insn_idx,int * cnt)23226 static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
23227 struct bpf_insn *insn_buf, int insn_idx, int *cnt)
23228 {
23229 struct bpf_kfunc_desc *desc;
23230 int err;
23231
23232 if (!insn->imm) {
23233 verbose(env, "invalid kernel function call not eliminated in verifier pass\n");
23234 return -EINVAL;
23235 }
23236
23237 *cnt = 0;
23238
23239 /* insn->imm has the btf func_id. Replace it with an offset relative to
23240 * __bpf_call_base, unless the JIT needs to call functions that are
23241 * further than 32 bits away (bpf_jit_supports_far_kfunc_call()).
23242 */
23243 desc = find_kfunc_desc(env->prog, insn->imm, insn->off);
23244 if (!desc) {
23245 verifier_bug(env, "kernel function descriptor not found for func_id %u",
23246 insn->imm);
23247 return -EFAULT;
23248 }
23249
23250 err = specialize_kfunc(env, desc, insn_idx);
23251 if (err)
23252 return err;
23253
23254 if (!bpf_jit_supports_far_kfunc_call())
23255 insn->imm = BPF_CALL_IMM(desc->addr);
23256
23257 if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl] ||
23258 desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
23259 struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
23260 struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
23261 u64 obj_new_size = env->insn_aux_data[insn_idx].obj_new_size;
23262
23263 if (desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl] && kptr_struct_meta) {
23264 verifier_bug(env, "NULL kptr_struct_meta expected at insn_idx %d",
23265 insn_idx);
23266 return -EFAULT;
23267 }
23268
23269 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_1, obj_new_size);
23270 insn_buf[1] = addr[0];
23271 insn_buf[2] = addr[1];
23272 insn_buf[3] = *insn;
23273 *cnt = 4;
23274 } else if (desc->func_id == special_kfunc_list[KF_bpf_obj_drop_impl] ||
23275 desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl] ||
23276 desc->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) {
23277 struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
23278 struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
23279
23280 if (desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl] && kptr_struct_meta) {
23281 verifier_bug(env, "NULL kptr_struct_meta expected at insn_idx %d",
23282 insn_idx);
23283 return -EFAULT;
23284 }
23285
23286 if (desc->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl] &&
23287 !kptr_struct_meta) {
23288 verifier_bug(env, "kptr_struct_meta expected at insn_idx %d",
23289 insn_idx);
23290 return -EFAULT;
23291 }
23292
23293 insn_buf[0] = addr[0];
23294 insn_buf[1] = addr[1];
23295 insn_buf[2] = *insn;
23296 *cnt = 3;
23297 } else if (desc->func_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
23298 desc->func_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
23299 desc->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
23300 struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
23301 int struct_meta_reg = BPF_REG_3;
23302 int node_offset_reg = BPF_REG_4;
23303
23304 /* rbtree_add has extra 'less' arg, so args-to-fixup are in diff regs */
23305 if (desc->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
23306 struct_meta_reg = BPF_REG_4;
23307 node_offset_reg = BPF_REG_5;
23308 }
23309
23310 if (!kptr_struct_meta) {
23311 verifier_bug(env, "kptr_struct_meta expected at insn_idx %d",
23312 insn_idx);
23313 return -EFAULT;
23314 }
23315
23316 __fixup_collection_insert_kfunc(&env->insn_aux_data[insn_idx], struct_meta_reg,
23317 node_offset_reg, insn, insn_buf, cnt);
23318 } else if (desc->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] ||
23319 desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
23320 insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
23321 *cnt = 1;
23322 } else if (desc->func_id == special_kfunc_list[KF_bpf_session_is_return] &&
23323 env->prog->expected_attach_type == BPF_TRACE_FSESSION) {
23324 /*
23325 * inline the bpf_session_is_return() for fsession:
23326 * bool bpf_session_is_return(void *ctx)
23327 * {
23328 * return (((u64 *)ctx)[-1] >> BPF_TRAMP_IS_RETURN_SHIFT) & 1;
23329 * }
23330 */
23331 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
23332 insn_buf[1] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, BPF_TRAMP_IS_RETURN_SHIFT);
23333 insn_buf[2] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1);
23334 *cnt = 3;
23335 } else if (desc->func_id == special_kfunc_list[KF_bpf_session_cookie] &&
23336 env->prog->expected_attach_type == BPF_TRACE_FSESSION) {
23337 /*
23338 * inline bpf_session_cookie() for fsession:
23339 * __u64 *bpf_session_cookie(void *ctx)
23340 * {
23341 * u64 off = (((u64 *)ctx)[-1] >> BPF_TRAMP_COOKIE_INDEX_SHIFT) & 0xFF;
23342 * return &((u64 *)ctx)[-off];
23343 * }
23344 */
23345 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
23346 insn_buf[1] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, BPF_TRAMP_COOKIE_INDEX_SHIFT);
23347 insn_buf[2] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF);
23348 insn_buf[3] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
23349 insn_buf[4] = BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1);
23350 insn_buf[5] = BPF_ALU64_IMM(BPF_NEG, BPF_REG_0, 0);
23351 *cnt = 6;
23352 }
23353
23354 if (env->insn_aux_data[insn_idx].arg_prog) {
23355 u32 regno = env->insn_aux_data[insn_idx].arg_prog;
23356 struct bpf_insn ld_addrs[2] = { BPF_LD_IMM64(regno, (long)env->prog->aux) };
23357 int idx = *cnt;
23358
23359 insn_buf[idx++] = ld_addrs[0];
23360 insn_buf[idx++] = ld_addrs[1];
23361 insn_buf[idx++] = *insn;
23362 *cnt = idx;
23363 }
23364 return 0;
23365 }
23366
23367 /* The function requires that first instruction in 'patch' is insnsi[prog->len - 1] */
add_hidden_subprog(struct bpf_verifier_env * env,struct bpf_insn * patch,int len)23368 static int add_hidden_subprog(struct bpf_verifier_env *env, struct bpf_insn *patch, int len)
23369 {
23370 struct bpf_subprog_info *info = env->subprog_info;
23371 int cnt = env->subprog_cnt;
23372 struct bpf_prog *prog;
23373
23374 /* We only reserve one slot for hidden subprogs in subprog_info. */
23375 if (env->hidden_subprog_cnt) {
23376 verifier_bug(env, "only one hidden subprog supported");
23377 return -EFAULT;
23378 }
23379 /* We're not patching any existing instruction, just appending the new
23380 * ones for the hidden subprog. Hence all of the adjustment operations
23381 * in bpf_patch_insn_data are no-ops.
23382 */
23383 prog = bpf_patch_insn_data(env, env->prog->len - 1, patch, len);
23384 if (!prog)
23385 return -ENOMEM;
23386 env->prog = prog;
23387 info[cnt + 1].start = info[cnt].start;
23388 info[cnt].start = prog->len - len + 1;
23389 env->subprog_cnt++;
23390 env->hidden_subprog_cnt++;
23391 return 0;
23392 }
23393
23394 /* Do various post-verification rewrites in a single program pass.
23395 * These rewrites simplify JIT and interpreter implementations.
23396 */
do_misc_fixups(struct bpf_verifier_env * env)23397 static int do_misc_fixups(struct bpf_verifier_env *env)
23398 {
23399 struct bpf_prog *prog = env->prog;
23400 enum bpf_attach_type eatype = prog->expected_attach_type;
23401 enum bpf_prog_type prog_type = resolve_prog_type(prog);
23402 struct bpf_insn *insn = prog->insnsi;
23403 const struct bpf_func_proto *fn;
23404 const int insn_cnt = prog->len;
23405 const struct bpf_map_ops *ops;
23406 struct bpf_insn_aux_data *aux;
23407 struct bpf_insn *insn_buf = env->insn_buf;
23408 struct bpf_prog *new_prog;
23409 struct bpf_map *map_ptr;
23410 int i, ret, cnt, delta = 0, cur_subprog = 0;
23411 struct bpf_subprog_info *subprogs = env->subprog_info;
23412 u16 stack_depth = subprogs[cur_subprog].stack_depth;
23413 u16 stack_depth_extra = 0;
23414
23415 if (env->seen_exception && !env->exception_callback_subprog) {
23416 struct bpf_insn *patch = insn_buf;
23417
23418 *patch++ = env->prog->insnsi[insn_cnt - 1];
23419 *patch++ = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
23420 *patch++ = BPF_EXIT_INSN();
23421 ret = add_hidden_subprog(env, insn_buf, patch - insn_buf);
23422 if (ret < 0)
23423 return ret;
23424 prog = env->prog;
23425 insn = prog->insnsi;
23426
23427 env->exception_callback_subprog = env->subprog_cnt - 1;
23428 /* Don't update insn_cnt, as add_hidden_subprog always appends insns */
23429 mark_subprog_exc_cb(env, env->exception_callback_subprog);
23430 }
23431
23432 for (i = 0; i < insn_cnt;) {
23433 if (insn->code == (BPF_ALU64 | BPF_MOV | BPF_X) && insn->imm) {
23434 if ((insn->off == BPF_ADDR_SPACE_CAST && insn->imm == 1) ||
23435 (((struct bpf_map *)env->prog->aux->arena)->map_flags & BPF_F_NO_USER_CONV)) {
23436 /* convert to 32-bit mov that clears upper 32-bit */
23437 insn->code = BPF_ALU | BPF_MOV | BPF_X;
23438 /* clear off and imm, so it's a normal 'wX = wY' from JIT pov */
23439 insn->off = 0;
23440 insn->imm = 0;
23441 } /* cast from as(0) to as(1) should be handled by JIT */
23442 goto next_insn;
23443 }
23444
23445 if (env->insn_aux_data[i + delta].needs_zext)
23446 /* Convert BPF_CLASS(insn->code) == BPF_ALU64 to 32-bit ALU */
23447 insn->code = BPF_ALU | BPF_OP(insn->code) | BPF_SRC(insn->code);
23448
23449 /* Make sdiv/smod divide-by-minus-one exceptions impossible. */
23450 if ((insn->code == (BPF_ALU64 | BPF_MOD | BPF_K) ||
23451 insn->code == (BPF_ALU64 | BPF_DIV | BPF_K) ||
23452 insn->code == (BPF_ALU | BPF_MOD | BPF_K) ||
23453 insn->code == (BPF_ALU | BPF_DIV | BPF_K)) &&
23454 insn->off == 1 && insn->imm == -1) {
23455 bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
23456 bool isdiv = BPF_OP(insn->code) == BPF_DIV;
23457 struct bpf_insn *patch = insn_buf;
23458
23459 if (isdiv)
23460 *patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
23461 BPF_NEG | BPF_K, insn->dst_reg,
23462 0, 0, 0);
23463 else
23464 *patch++ = BPF_MOV32_IMM(insn->dst_reg, 0);
23465
23466 cnt = patch - insn_buf;
23467
23468 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
23469 if (!new_prog)
23470 return -ENOMEM;
23471
23472 delta += cnt - 1;
23473 env->prog = prog = new_prog;
23474 insn = new_prog->insnsi + i + delta;
23475 goto next_insn;
23476 }
23477
23478 /* Make divide-by-zero and divide-by-minus-one exceptions impossible. */
23479 if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
23480 insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
23481 insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
23482 insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
23483 bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
23484 bool isdiv = BPF_OP(insn->code) == BPF_DIV;
23485 bool is_sdiv = isdiv && insn->off == 1;
23486 bool is_smod = !isdiv && insn->off == 1;
23487 struct bpf_insn *patch = insn_buf;
23488
23489 if (is_sdiv) {
23490 /* [R,W]x sdiv 0 -> 0
23491 * LLONG_MIN sdiv -1 -> LLONG_MIN
23492 * INT_MIN sdiv -1 -> INT_MIN
23493 */
23494 *patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg);
23495 *patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
23496 BPF_ADD | BPF_K, BPF_REG_AX,
23497 0, 0, 1);
23498 *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
23499 BPF_JGT | BPF_K, BPF_REG_AX,
23500 0, 4, 1);
23501 *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
23502 BPF_JEQ | BPF_K, BPF_REG_AX,
23503 0, 1, 0);
23504 *patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
23505 BPF_MOV | BPF_K, insn->dst_reg,
23506 0, 0, 0);
23507 /* BPF_NEG(LLONG_MIN) == -LLONG_MIN == LLONG_MIN */
23508 *patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
23509 BPF_NEG | BPF_K, insn->dst_reg,
23510 0, 0, 0);
23511 *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
23512 *patch++ = *insn;
23513 cnt = patch - insn_buf;
23514 } else if (is_smod) {
23515 /* [R,W]x mod 0 -> [R,W]x */
23516 /* [R,W]x mod -1 -> 0 */
23517 *patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg);
23518 *patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
23519 BPF_ADD | BPF_K, BPF_REG_AX,
23520 0, 0, 1);
23521 *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
23522 BPF_JGT | BPF_K, BPF_REG_AX,
23523 0, 3, 1);
23524 *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
23525 BPF_JEQ | BPF_K, BPF_REG_AX,
23526 0, 3 + (is64 ? 0 : 1), 1);
23527 *patch++ = BPF_MOV32_IMM(insn->dst_reg, 0);
23528 *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
23529 *patch++ = *insn;
23530
23531 if (!is64) {
23532 *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
23533 *patch++ = BPF_MOV32_REG(insn->dst_reg, insn->dst_reg);
23534 }
23535 cnt = patch - insn_buf;
23536 } else if (isdiv) {
23537 /* [R,W]x div 0 -> 0 */
23538 *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
23539 BPF_JNE | BPF_K, insn->src_reg,
23540 0, 2, 0);
23541 *patch++ = BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg);
23542 *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
23543 *patch++ = *insn;
23544 cnt = patch - insn_buf;
23545 } else {
23546 /* [R,W]x mod 0 -> [R,W]x */
23547 *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
23548 BPF_JEQ | BPF_K, insn->src_reg,
23549 0, 1 + (is64 ? 0 : 1), 0);
23550 *patch++ = *insn;
23551
23552 if (!is64) {
23553 *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
23554 *patch++ = BPF_MOV32_REG(insn->dst_reg, insn->dst_reg);
23555 }
23556 cnt = patch - insn_buf;
23557 }
23558
23559 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
23560 if (!new_prog)
23561 return -ENOMEM;
23562
23563 delta += cnt - 1;
23564 env->prog = prog = new_prog;
23565 insn = new_prog->insnsi + i + delta;
23566 goto next_insn;
23567 }
23568
23569 /* Make it impossible to de-reference a userspace address */
23570 if (BPF_CLASS(insn->code) == BPF_LDX &&
23571 (BPF_MODE(insn->code) == BPF_PROBE_MEM ||
23572 BPF_MODE(insn->code) == BPF_PROBE_MEMSX)) {
23573 struct bpf_insn *patch = insn_buf;
23574 u64 uaddress_limit = bpf_arch_uaddress_limit();
23575
23576 if (!uaddress_limit)
23577 goto next_insn;
23578
23579 *patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg);
23580 if (insn->off)
23581 *patch++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_AX, insn->off);
23582 *patch++ = BPF_ALU64_IMM(BPF_RSH, BPF_REG_AX, 32);
23583 *patch++ = BPF_JMP_IMM(BPF_JLE, BPF_REG_AX, uaddress_limit >> 32, 2);
23584 *patch++ = *insn;
23585 *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
23586 *patch++ = BPF_MOV64_IMM(insn->dst_reg, 0);
23587
23588 cnt = patch - insn_buf;
23589 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
23590 if (!new_prog)
23591 return -ENOMEM;
23592
23593 delta += cnt - 1;
23594 env->prog = prog = new_prog;
23595 insn = new_prog->insnsi + i + delta;
23596 goto next_insn;
23597 }
23598
23599 /* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */
23600 if (BPF_CLASS(insn->code) == BPF_LD &&
23601 (BPF_MODE(insn->code) == BPF_ABS ||
23602 BPF_MODE(insn->code) == BPF_IND)) {
23603 cnt = env->ops->gen_ld_abs(insn, insn_buf);
23604 if (cnt == 0 || cnt >= INSN_BUF_SIZE) {
23605 verifier_bug(env, "%d insns generated for ld_abs", cnt);
23606 return -EFAULT;
23607 }
23608
23609 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
23610 if (!new_prog)
23611 return -ENOMEM;
23612
23613 delta += cnt - 1;
23614 env->prog = prog = new_prog;
23615 insn = new_prog->insnsi + i + delta;
23616 goto next_insn;
23617 }
23618
23619 /* Rewrite pointer arithmetic to mitigate speculation attacks. */
23620 if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
23621 insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
23622 const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
23623 const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
23624 struct bpf_insn *patch = insn_buf;
23625 bool issrc, isneg, isimm;
23626 u32 off_reg;
23627
23628 aux = &env->insn_aux_data[i + delta];
23629 if (!aux->alu_state ||
23630 aux->alu_state == BPF_ALU_NON_POINTER)
23631 goto next_insn;
23632
23633 isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
23634 issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
23635 BPF_ALU_SANITIZE_SRC;
23636 isimm = aux->alu_state & BPF_ALU_IMMEDIATE;
23637
23638 off_reg = issrc ? insn->src_reg : insn->dst_reg;
23639 if (isimm) {
23640 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
23641 } else {
23642 if (isneg)
23643 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
23644 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
23645 *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
23646 *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
23647 *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
23648 *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
23649 *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg);
23650 }
23651 if (!issrc)
23652 *patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg);
23653 insn->src_reg = BPF_REG_AX;
23654 if (isneg)
23655 insn->code = insn->code == code_add ?
23656 code_sub : code_add;
23657 *patch++ = *insn;
23658 if (issrc && isneg && !isimm)
23659 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
23660 cnt = patch - insn_buf;
23661
23662 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
23663 if (!new_prog)
23664 return -ENOMEM;
23665
23666 delta += cnt - 1;
23667 env->prog = prog = new_prog;
23668 insn = new_prog->insnsi + i + delta;
23669 goto next_insn;
23670 }
23671
23672 if (is_may_goto_insn(insn) && bpf_jit_supports_timed_may_goto()) {
23673 int stack_off_cnt = -stack_depth - 16;
23674
23675 /*
23676 * Two 8 byte slots, depth-16 stores the count, and
23677 * depth-8 stores the start timestamp of the loop.
23678 *
23679 * The starting value of count is BPF_MAX_TIMED_LOOPS
23680 * (0xffff). Every iteration loads it and subs it by 1,
23681 * until the value becomes 0 in AX (thus, 1 in stack),
23682 * after which we call arch_bpf_timed_may_goto, which
23683 * either sets AX to 0xffff to keep looping, or to 0
23684 * upon timeout. AX is then stored into the stack. In
23685 * the next iteration, we either see 0 and break out, or
23686 * continue iterating until the next time value is 0
23687 * after subtraction, rinse and repeat.
23688 */
23689 stack_depth_extra = 16;
23690 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_AX, BPF_REG_10, stack_off_cnt);
23691 if (insn->off >= 0)
23692 insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 5);
23693 else
23694 insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off - 1);
23695 insn_buf[2] = BPF_ALU64_IMM(BPF_SUB, BPF_REG_AX, 1);
23696 insn_buf[3] = BPF_JMP_IMM(BPF_JNE, BPF_REG_AX, 0, 2);
23697 /*
23698 * AX is used as an argument to pass in stack_off_cnt
23699 * (to add to r10/fp), and also as the return value of
23700 * the call to arch_bpf_timed_may_goto.
23701 */
23702 insn_buf[4] = BPF_MOV64_IMM(BPF_REG_AX, stack_off_cnt);
23703 insn_buf[5] = BPF_EMIT_CALL(arch_bpf_timed_may_goto);
23704 insn_buf[6] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_AX, stack_off_cnt);
23705 cnt = 7;
23706
23707 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
23708 if (!new_prog)
23709 return -ENOMEM;
23710
23711 delta += cnt - 1;
23712 env->prog = prog = new_prog;
23713 insn = new_prog->insnsi + i + delta;
23714 goto next_insn;
23715 } else if (is_may_goto_insn(insn)) {
23716 int stack_off = -stack_depth - 8;
23717
23718 stack_depth_extra = 8;
23719 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_AX, BPF_REG_10, stack_off);
23720 if (insn->off >= 0)
23721 insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 2);
23722 else
23723 insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off - 1);
23724 insn_buf[2] = BPF_ALU64_IMM(BPF_SUB, BPF_REG_AX, 1);
23725 insn_buf[3] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_AX, stack_off);
23726 cnt = 4;
23727
23728 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
23729 if (!new_prog)
23730 return -ENOMEM;
23731
23732 delta += cnt - 1;
23733 env->prog = prog = new_prog;
23734 insn = new_prog->insnsi + i + delta;
23735 goto next_insn;
23736 }
23737
23738 if (insn->code != (BPF_JMP | BPF_CALL))
23739 goto next_insn;
23740 if (insn->src_reg == BPF_PSEUDO_CALL)
23741 goto next_insn;
23742 if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
23743 ret = fixup_kfunc_call(env, insn, insn_buf, i + delta, &cnt);
23744 if (ret)
23745 return ret;
23746 if (cnt == 0)
23747 goto next_insn;
23748
23749 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
23750 if (!new_prog)
23751 return -ENOMEM;
23752
23753 delta += cnt - 1;
23754 env->prog = prog = new_prog;
23755 insn = new_prog->insnsi + i + delta;
23756 goto next_insn;
23757 }
23758
23759 /* Skip inlining the helper call if the JIT does it. */
23760 if (bpf_jit_inlines_helper_call(insn->imm))
23761 goto next_insn;
23762
23763 if (insn->imm == BPF_FUNC_get_route_realm)
23764 prog->dst_needed = 1;
23765 if (insn->imm == BPF_FUNC_get_prandom_u32)
23766 bpf_user_rnd_init_once();
23767 if (insn->imm == BPF_FUNC_override_return)
23768 prog->kprobe_override = 1;
23769 if (insn->imm == BPF_FUNC_tail_call) {
23770 /* If we tail call into other programs, we
23771 * cannot make any assumptions since they can
23772 * be replaced dynamically during runtime in
23773 * the program array.
23774 */
23775 prog->cb_access = 1;
23776 if (!allow_tail_call_in_subprogs(env))
23777 prog->aux->stack_depth = MAX_BPF_STACK;
23778 prog->aux->max_pkt_offset = MAX_PACKET_OFF;
23779
23780 /* mark bpf_tail_call as different opcode to avoid
23781 * conditional branch in the interpreter for every normal
23782 * call and to prevent accidental JITing by JIT compiler
23783 * that doesn't support bpf_tail_call yet
23784 */
23785 insn->imm = 0;
23786 insn->code = BPF_JMP | BPF_TAIL_CALL;
23787
23788 aux = &env->insn_aux_data[i + delta];
23789 if (env->bpf_capable && !prog->blinding_requested &&
23790 prog->jit_requested &&
23791 !bpf_map_key_poisoned(aux) &&
23792 !bpf_map_ptr_poisoned(aux) &&
23793 !bpf_map_ptr_unpriv(aux)) {
23794 struct bpf_jit_poke_descriptor desc = {
23795 .reason = BPF_POKE_REASON_TAIL_CALL,
23796 .tail_call.map = aux->map_ptr_state.map_ptr,
23797 .tail_call.key = bpf_map_key_immediate(aux),
23798 .insn_idx = i + delta,
23799 };
23800
23801 ret = bpf_jit_add_poke_descriptor(prog, &desc);
23802 if (ret < 0) {
23803 verbose(env, "adding tail call poke descriptor failed\n");
23804 return ret;
23805 }
23806
23807 insn->imm = ret + 1;
23808 goto next_insn;
23809 }
23810
23811 if (!bpf_map_ptr_unpriv(aux))
23812 goto next_insn;
23813
23814 /* instead of changing every JIT dealing with tail_call
23815 * emit two extra insns:
23816 * if (index >= max_entries) goto out;
23817 * index &= array->index_mask;
23818 * to avoid out-of-bounds cpu speculation
23819 */
23820 if (bpf_map_ptr_poisoned(aux)) {
23821 verbose(env, "tail_call abusing map_ptr\n");
23822 return -EINVAL;
23823 }
23824
23825 map_ptr = aux->map_ptr_state.map_ptr;
23826 insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
23827 map_ptr->max_entries, 2);
23828 insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
23829 container_of(map_ptr,
23830 struct bpf_array,
23831 map)->index_mask);
23832 insn_buf[2] = *insn;
23833 cnt = 3;
23834 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
23835 if (!new_prog)
23836 return -ENOMEM;
23837
23838 delta += cnt - 1;
23839 env->prog = prog = new_prog;
23840 insn = new_prog->insnsi + i + delta;
23841 goto next_insn;
23842 }
23843
23844 if (insn->imm == BPF_FUNC_timer_set_callback) {
23845 /* The verifier will process callback_fn as many times as necessary
23846 * with different maps and the register states prepared by
23847 * set_timer_callback_state will be accurate.
23848 *
23849 * The following use case is valid:
23850 * map1 is shared by prog1, prog2, prog3.
23851 * prog1 calls bpf_timer_init for some map1 elements
23852 * prog2 calls bpf_timer_set_callback for some map1 elements.
23853 * Those that were not bpf_timer_init-ed will return -EINVAL.
23854 * prog3 calls bpf_timer_start for some map1 elements.
23855 * Those that were not both bpf_timer_init-ed and
23856 * bpf_timer_set_callback-ed will return -EINVAL.
23857 */
23858 struct bpf_insn ld_addrs[2] = {
23859 BPF_LD_IMM64(BPF_REG_3, (long)prog->aux),
23860 };
23861
23862 insn_buf[0] = ld_addrs[0];
23863 insn_buf[1] = ld_addrs[1];
23864 insn_buf[2] = *insn;
23865 cnt = 3;
23866
23867 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
23868 if (!new_prog)
23869 return -ENOMEM;
23870
23871 delta += cnt - 1;
23872 env->prog = prog = new_prog;
23873 insn = new_prog->insnsi + i + delta;
23874 goto patch_call_imm;
23875 }
23876
23877 if (is_storage_get_function(insn->imm)) {
23878 if (env->insn_aux_data[i + delta].non_sleepable)
23879 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_ATOMIC);
23880 else
23881 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_KERNEL);
23882 insn_buf[1] = *insn;
23883 cnt = 2;
23884
23885 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
23886 if (!new_prog)
23887 return -ENOMEM;
23888
23889 delta += cnt - 1;
23890 env->prog = prog = new_prog;
23891 insn = new_prog->insnsi + i + delta;
23892 goto patch_call_imm;
23893 }
23894
23895 /* bpf_per_cpu_ptr() and bpf_this_cpu_ptr() */
23896 if (env->insn_aux_data[i + delta].call_with_percpu_alloc_ptr) {
23897 /* patch with 'r1 = *(u64 *)(r1 + 0)' since for percpu data,
23898 * bpf_mem_alloc() returns a ptr to the percpu data ptr.
23899 */
23900 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0);
23901 insn_buf[1] = *insn;
23902 cnt = 2;
23903
23904 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
23905 if (!new_prog)
23906 return -ENOMEM;
23907
23908 delta += cnt - 1;
23909 env->prog = prog = new_prog;
23910 insn = new_prog->insnsi + i + delta;
23911 goto patch_call_imm;
23912 }
23913
23914 /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
23915 * and other inlining handlers are currently limited to 64 bit
23916 * only.
23917 */
23918 if (prog->jit_requested && BITS_PER_LONG == 64 &&
23919 (insn->imm == BPF_FUNC_map_lookup_elem ||
23920 insn->imm == BPF_FUNC_map_update_elem ||
23921 insn->imm == BPF_FUNC_map_delete_elem ||
23922 insn->imm == BPF_FUNC_map_push_elem ||
23923 insn->imm == BPF_FUNC_map_pop_elem ||
23924 insn->imm == BPF_FUNC_map_peek_elem ||
23925 insn->imm == BPF_FUNC_redirect_map ||
23926 insn->imm == BPF_FUNC_for_each_map_elem ||
23927 insn->imm == BPF_FUNC_map_lookup_percpu_elem)) {
23928 aux = &env->insn_aux_data[i + delta];
23929 if (bpf_map_ptr_poisoned(aux))
23930 goto patch_call_imm;
23931
23932 map_ptr = aux->map_ptr_state.map_ptr;
23933 ops = map_ptr->ops;
23934 if (insn->imm == BPF_FUNC_map_lookup_elem &&
23935 ops->map_gen_lookup) {
23936 cnt = ops->map_gen_lookup(map_ptr, insn_buf);
23937 if (cnt == -EOPNOTSUPP)
23938 goto patch_map_ops_generic;
23939 if (cnt <= 0 || cnt >= INSN_BUF_SIZE) {
23940 verifier_bug(env, "%d insns generated for map lookup", cnt);
23941 return -EFAULT;
23942 }
23943
23944 new_prog = bpf_patch_insn_data(env, i + delta,
23945 insn_buf, cnt);
23946 if (!new_prog)
23947 return -ENOMEM;
23948
23949 delta += cnt - 1;
23950 env->prog = prog = new_prog;
23951 insn = new_prog->insnsi + i + delta;
23952 goto next_insn;
23953 }
23954
23955 BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
23956 (void *(*)(struct bpf_map *map, void *key))NULL));
23957 BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
23958 (long (*)(struct bpf_map *map, void *key))NULL));
23959 BUILD_BUG_ON(!__same_type(ops->map_update_elem,
23960 (long (*)(struct bpf_map *map, void *key, void *value,
23961 u64 flags))NULL));
23962 BUILD_BUG_ON(!__same_type(ops->map_push_elem,
23963 (long (*)(struct bpf_map *map, void *value,
23964 u64 flags))NULL));
23965 BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
23966 (long (*)(struct bpf_map *map, void *value))NULL));
23967 BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
23968 (long (*)(struct bpf_map *map, void *value))NULL));
23969 BUILD_BUG_ON(!__same_type(ops->map_redirect,
23970 (long (*)(struct bpf_map *map, u64 index, u64 flags))NULL));
23971 BUILD_BUG_ON(!__same_type(ops->map_for_each_callback,
23972 (long (*)(struct bpf_map *map,
23973 bpf_callback_t callback_fn,
23974 void *callback_ctx,
23975 u64 flags))NULL));
23976 BUILD_BUG_ON(!__same_type(ops->map_lookup_percpu_elem,
23977 (void *(*)(struct bpf_map *map, void *key, u32 cpu))NULL));
23978
23979 patch_map_ops_generic:
23980 switch (insn->imm) {
23981 case BPF_FUNC_map_lookup_elem:
23982 insn->imm = BPF_CALL_IMM(ops->map_lookup_elem);
23983 goto next_insn;
23984 case BPF_FUNC_map_update_elem:
23985 insn->imm = BPF_CALL_IMM(ops->map_update_elem);
23986 goto next_insn;
23987 case BPF_FUNC_map_delete_elem:
23988 insn->imm = BPF_CALL_IMM(ops->map_delete_elem);
23989 goto next_insn;
23990 case BPF_FUNC_map_push_elem:
23991 insn->imm = BPF_CALL_IMM(ops->map_push_elem);
23992 goto next_insn;
23993 case BPF_FUNC_map_pop_elem:
23994 insn->imm = BPF_CALL_IMM(ops->map_pop_elem);
23995 goto next_insn;
23996 case BPF_FUNC_map_peek_elem:
23997 insn->imm = BPF_CALL_IMM(ops->map_peek_elem);
23998 goto next_insn;
23999 case BPF_FUNC_redirect_map:
24000 insn->imm = BPF_CALL_IMM(ops->map_redirect);
24001 goto next_insn;
24002 case BPF_FUNC_for_each_map_elem:
24003 insn->imm = BPF_CALL_IMM(ops->map_for_each_callback);
24004 goto next_insn;
24005 case BPF_FUNC_map_lookup_percpu_elem:
24006 insn->imm = BPF_CALL_IMM(ops->map_lookup_percpu_elem);
24007 goto next_insn;
24008 }
24009
24010 goto patch_call_imm;
24011 }
24012
24013 /* Implement bpf_jiffies64 inline. */
24014 if (prog->jit_requested && BITS_PER_LONG == 64 &&
24015 insn->imm == BPF_FUNC_jiffies64) {
24016 struct bpf_insn ld_jiffies_addr[2] = {
24017 BPF_LD_IMM64(BPF_REG_0,
24018 (unsigned long)&jiffies),
24019 };
24020
24021 insn_buf[0] = ld_jiffies_addr[0];
24022 insn_buf[1] = ld_jiffies_addr[1];
24023 insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0,
24024 BPF_REG_0, 0);
24025 cnt = 3;
24026
24027 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
24028 cnt);
24029 if (!new_prog)
24030 return -ENOMEM;
24031
24032 delta += cnt - 1;
24033 env->prog = prog = new_prog;
24034 insn = new_prog->insnsi + i + delta;
24035 goto next_insn;
24036 }
24037
24038 #if defined(CONFIG_X86_64) && !defined(CONFIG_UML)
24039 /* Implement bpf_get_smp_processor_id() inline. */
24040 if (insn->imm == BPF_FUNC_get_smp_processor_id &&
24041 verifier_inlines_helper_call(env, insn->imm)) {
24042 /* BPF_FUNC_get_smp_processor_id inlining is an
24043 * optimization, so if cpu_number is ever
24044 * changed in some incompatible and hard to support
24045 * way, it's fine to back out this inlining logic
24046 */
24047 #ifdef CONFIG_SMP
24048 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, (u32)(unsigned long)&cpu_number);
24049 insn_buf[1] = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0);
24050 insn_buf[2] = BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0);
24051 cnt = 3;
24052 #else
24053 insn_buf[0] = BPF_ALU32_REG(BPF_XOR, BPF_REG_0, BPF_REG_0);
24054 cnt = 1;
24055 #endif
24056 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
24057 if (!new_prog)
24058 return -ENOMEM;
24059
24060 delta += cnt - 1;
24061 env->prog = prog = new_prog;
24062 insn = new_prog->insnsi + i + delta;
24063 goto next_insn;
24064 }
24065
24066 /* Implement bpf_get_current_task() and bpf_get_current_task_btf() inline. */
24067 if ((insn->imm == BPF_FUNC_get_current_task || insn->imm == BPF_FUNC_get_current_task_btf) &&
24068 verifier_inlines_helper_call(env, insn->imm)) {
24069 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, (u32)(unsigned long)¤t_task);
24070 insn_buf[1] = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0);
24071 insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0);
24072 cnt = 3;
24073
24074 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
24075 if (!new_prog)
24076 return -ENOMEM;
24077
24078 delta += cnt - 1;
24079 env->prog = prog = new_prog;
24080 insn = new_prog->insnsi + i + delta;
24081 goto next_insn;
24082 }
24083 #endif
24084 /* Implement bpf_get_func_arg inline. */
24085 if (prog_type == BPF_PROG_TYPE_TRACING &&
24086 insn->imm == BPF_FUNC_get_func_arg) {
24087 if (eatype == BPF_TRACE_RAW_TP) {
24088 int nr_args = btf_type_vlen(prog->aux->attach_func_proto);
24089
24090 /* skip 'void *__data' in btf_trace_##name() and save to reg0 */
24091 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, nr_args - 1);
24092 cnt = 1;
24093 } else {
24094 /* Load nr_args from ctx - 8 */
24095 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
24096 insn_buf[1] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF);
24097 cnt = 2;
24098 }
24099 insn_buf[cnt++] = BPF_JMP32_REG(BPF_JGE, BPF_REG_2, BPF_REG_0, 6);
24100 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 3);
24101 insn_buf[cnt++] = BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1);
24102 insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0);
24103 insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
24104 insn_buf[cnt++] = BPF_MOV64_IMM(BPF_REG_0, 0);
24105 insn_buf[cnt++] = BPF_JMP_A(1);
24106 insn_buf[cnt++] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL);
24107
24108 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
24109 if (!new_prog)
24110 return -ENOMEM;
24111
24112 delta += cnt - 1;
24113 env->prog = prog = new_prog;
24114 insn = new_prog->insnsi + i + delta;
24115 goto next_insn;
24116 }
24117
24118 /* Implement bpf_get_func_ret inline. */
24119 if (prog_type == BPF_PROG_TYPE_TRACING &&
24120 insn->imm == BPF_FUNC_get_func_ret) {
24121 if (eatype == BPF_TRACE_FEXIT ||
24122 eatype == BPF_TRACE_FSESSION ||
24123 eatype == BPF_MODIFY_RETURN) {
24124 /* Load nr_args from ctx - 8 */
24125 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
24126 insn_buf[1] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF);
24127 insn_buf[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
24128 insn_buf[3] = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1);
24129 insn_buf[4] = BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
24130 insn_buf[5] = BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0);
24131 insn_buf[6] = BPF_MOV64_IMM(BPF_REG_0, 0);
24132 cnt = 7;
24133 } else {
24134 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, -EOPNOTSUPP);
24135 cnt = 1;
24136 }
24137
24138 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
24139 if (!new_prog)
24140 return -ENOMEM;
24141
24142 delta += cnt - 1;
24143 env->prog = prog = new_prog;
24144 insn = new_prog->insnsi + i + delta;
24145 goto next_insn;
24146 }
24147
24148 /* Implement get_func_arg_cnt inline. */
24149 if (prog_type == BPF_PROG_TYPE_TRACING &&
24150 insn->imm == BPF_FUNC_get_func_arg_cnt) {
24151 if (eatype == BPF_TRACE_RAW_TP) {
24152 int nr_args = btf_type_vlen(prog->aux->attach_func_proto);
24153
24154 /* skip 'void *__data' in btf_trace_##name() and save to reg0 */
24155 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, nr_args - 1);
24156 cnt = 1;
24157 } else {
24158 /* Load nr_args from ctx - 8 */
24159 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
24160 insn_buf[1] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF);
24161 cnt = 2;
24162 }
24163
24164 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
24165 if (!new_prog)
24166 return -ENOMEM;
24167
24168 delta += cnt - 1;
24169 env->prog = prog = new_prog;
24170 insn = new_prog->insnsi + i + delta;
24171 goto next_insn;
24172 }
24173
24174 /* Implement bpf_get_func_ip inline. */
24175 if (prog_type == BPF_PROG_TYPE_TRACING &&
24176 insn->imm == BPF_FUNC_get_func_ip) {
24177 /* Load IP address from ctx - 16 */
24178 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -16);
24179
24180 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
24181 if (!new_prog)
24182 return -ENOMEM;
24183
24184 env->prog = prog = new_prog;
24185 insn = new_prog->insnsi + i + delta;
24186 goto next_insn;
24187 }
24188
24189 /* Implement bpf_get_branch_snapshot inline. */
24190 if (IS_ENABLED(CONFIG_PERF_EVENTS) &&
24191 prog->jit_requested && BITS_PER_LONG == 64 &&
24192 insn->imm == BPF_FUNC_get_branch_snapshot) {
24193 /* We are dealing with the following func protos:
24194 * u64 bpf_get_branch_snapshot(void *buf, u32 size, u64 flags);
24195 * int perf_snapshot_branch_stack(struct perf_branch_entry *entries, u32 cnt);
24196 */
24197 const u32 br_entry_size = sizeof(struct perf_branch_entry);
24198
24199 /* struct perf_branch_entry is part of UAPI and is
24200 * used as an array element, so extremely unlikely to
24201 * ever grow or shrink
24202 */
24203 BUILD_BUG_ON(br_entry_size != 24);
24204
24205 /* if (unlikely(flags)) return -EINVAL */
24206 insn_buf[0] = BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 0, 7);
24207
24208 /* Transform size (bytes) into number of entries (cnt = size / 24).
24209 * But to avoid expensive division instruction, we implement
24210 * divide-by-3 through multiplication, followed by further
24211 * division by 8 through 3-bit right shift.
24212 * Refer to book "Hacker's Delight, 2nd ed." by Henry S. Warren, Jr.,
24213 * p. 227, chapter "Unsigned Division by 3" for details and proofs.
24214 *
24215 * N / 3 <=> M * N / 2^33, where M = (2^33 + 1) / 3 = 0xaaaaaaab.
24216 */
24217 insn_buf[1] = BPF_MOV32_IMM(BPF_REG_0, 0xaaaaaaab);
24218 insn_buf[2] = BPF_ALU64_REG(BPF_MUL, BPF_REG_2, BPF_REG_0);
24219 insn_buf[3] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 36);
24220
24221 /* call perf_snapshot_branch_stack implementation */
24222 insn_buf[4] = BPF_EMIT_CALL(static_call_query(perf_snapshot_branch_stack));
24223 /* if (entry_cnt == 0) return -ENOENT */
24224 insn_buf[5] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4);
24225 /* return entry_cnt * sizeof(struct perf_branch_entry) */
24226 insn_buf[6] = BPF_ALU32_IMM(BPF_MUL, BPF_REG_0, br_entry_size);
24227 insn_buf[7] = BPF_JMP_A(3);
24228 /* return -EINVAL; */
24229 insn_buf[8] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL);
24230 insn_buf[9] = BPF_JMP_A(1);
24231 /* return -ENOENT; */
24232 insn_buf[10] = BPF_MOV64_IMM(BPF_REG_0, -ENOENT);
24233 cnt = 11;
24234
24235 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
24236 if (!new_prog)
24237 return -ENOMEM;
24238
24239 delta += cnt - 1;
24240 env->prog = prog = new_prog;
24241 insn = new_prog->insnsi + i + delta;
24242 goto next_insn;
24243 }
24244
24245 /* Implement bpf_kptr_xchg inline */
24246 if (prog->jit_requested && BITS_PER_LONG == 64 &&
24247 insn->imm == BPF_FUNC_kptr_xchg &&
24248 bpf_jit_supports_ptr_xchg()) {
24249 insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_2);
24250 insn_buf[1] = BPF_ATOMIC_OP(BPF_DW, BPF_XCHG, BPF_REG_1, BPF_REG_0, 0);
24251 cnt = 2;
24252
24253 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
24254 if (!new_prog)
24255 return -ENOMEM;
24256
24257 delta += cnt - 1;
24258 env->prog = prog = new_prog;
24259 insn = new_prog->insnsi + i + delta;
24260 goto next_insn;
24261 }
24262 patch_call_imm:
24263 fn = env->ops->get_func_proto(insn->imm, env->prog);
24264 /* all functions that have prototype and verifier allowed
24265 * programs to call them, must be real in-kernel functions
24266 */
24267 if (!fn->func) {
24268 verifier_bug(env,
24269 "not inlined functions %s#%d is missing func",
24270 func_id_name(insn->imm), insn->imm);
24271 return -EFAULT;
24272 }
24273 insn->imm = fn->func - __bpf_call_base;
24274 next_insn:
24275 if (subprogs[cur_subprog + 1].start == i + delta + 1) {
24276 subprogs[cur_subprog].stack_depth += stack_depth_extra;
24277 subprogs[cur_subprog].stack_extra = stack_depth_extra;
24278
24279 stack_depth = subprogs[cur_subprog].stack_depth;
24280 if (stack_depth > MAX_BPF_STACK && !prog->jit_requested) {
24281 verbose(env, "stack size %d(extra %d) is too large\n",
24282 stack_depth, stack_depth_extra);
24283 return -EINVAL;
24284 }
24285 cur_subprog++;
24286 stack_depth = subprogs[cur_subprog].stack_depth;
24287 stack_depth_extra = 0;
24288 }
24289 i++;
24290 insn++;
24291 }
24292
24293 env->prog->aux->stack_depth = subprogs[0].stack_depth;
24294 for (i = 0; i < env->subprog_cnt; i++) {
24295 int delta = bpf_jit_supports_timed_may_goto() ? 2 : 1;
24296 int subprog_start = subprogs[i].start;
24297 int stack_slots = subprogs[i].stack_extra / 8;
24298 int slots = delta, cnt = 0;
24299
24300 if (!stack_slots)
24301 continue;
24302 /* We need two slots in case timed may_goto is supported. */
24303 if (stack_slots > slots) {
24304 verifier_bug(env, "stack_slots supports may_goto only");
24305 return -EFAULT;
24306 }
24307
24308 stack_depth = subprogs[i].stack_depth;
24309 if (bpf_jit_supports_timed_may_goto()) {
24310 insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth,
24311 BPF_MAX_TIMED_LOOPS);
24312 insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth + 8, 0);
24313 } else {
24314 /* Add ST insn to subprog prologue to init extra stack */
24315 insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth,
24316 BPF_MAX_LOOPS);
24317 }
24318 /* Copy first actual insn to preserve it */
24319 insn_buf[cnt++] = env->prog->insnsi[subprog_start];
24320
24321 new_prog = bpf_patch_insn_data(env, subprog_start, insn_buf, cnt);
24322 if (!new_prog)
24323 return -ENOMEM;
24324 env->prog = prog = new_prog;
24325 /*
24326 * If may_goto is a first insn of a prog there could be a jmp
24327 * insn that points to it, hence adjust all such jmps to point
24328 * to insn after BPF_ST that inits may_goto count.
24329 * Adjustment will succeed because bpf_patch_insn_data() didn't fail.
24330 */
24331 WARN_ON(adjust_jmp_off(env->prog, subprog_start, delta));
24332 }
24333
24334 /* Since poke tab is now finalized, publish aux to tracker. */
24335 for (i = 0; i < prog->aux->size_poke_tab; i++) {
24336 map_ptr = prog->aux->poke_tab[i].tail_call.map;
24337 if (!map_ptr->ops->map_poke_track ||
24338 !map_ptr->ops->map_poke_untrack ||
24339 !map_ptr->ops->map_poke_run) {
24340 verifier_bug(env, "poke tab is misconfigured");
24341 return -EFAULT;
24342 }
24343
24344 ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux);
24345 if (ret < 0) {
24346 verbose(env, "tracking tail call prog failed\n");
24347 return ret;
24348 }
24349 }
24350
24351 ret = sort_kfunc_descs_by_imm_off(env);
24352 if (ret)
24353 return ret;
24354
24355 return 0;
24356 }
24357
inline_bpf_loop(struct bpf_verifier_env * env,int position,s32 stack_base,u32 callback_subprogno,u32 * total_cnt)24358 static struct bpf_prog *inline_bpf_loop(struct bpf_verifier_env *env,
24359 int position,
24360 s32 stack_base,
24361 u32 callback_subprogno,
24362 u32 *total_cnt)
24363 {
24364 s32 r6_offset = stack_base + 0 * BPF_REG_SIZE;
24365 s32 r7_offset = stack_base + 1 * BPF_REG_SIZE;
24366 s32 r8_offset = stack_base + 2 * BPF_REG_SIZE;
24367 int reg_loop_max = BPF_REG_6;
24368 int reg_loop_cnt = BPF_REG_7;
24369 int reg_loop_ctx = BPF_REG_8;
24370
24371 struct bpf_insn *insn_buf = env->insn_buf;
24372 struct bpf_prog *new_prog;
24373 u32 callback_start;
24374 u32 call_insn_offset;
24375 s32 callback_offset;
24376 u32 cnt = 0;
24377
24378 /* This represents an inlined version of bpf_iter.c:bpf_loop,
24379 * be careful to modify this code in sync.
24380 */
24381
24382 /* Return error and jump to the end of the patch if
24383 * expected number of iterations is too big.
24384 */
24385 insn_buf[cnt++] = BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2);
24386 insn_buf[cnt++] = BPF_MOV32_IMM(BPF_REG_0, -E2BIG);
24387 insn_buf[cnt++] = BPF_JMP_IMM(BPF_JA, 0, 0, 16);
24388 /* spill R6, R7, R8 to use these as loop vars */
24389 insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset);
24390 insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset);
24391 insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset);
24392 /* initialize loop vars */
24393 insn_buf[cnt++] = BPF_MOV64_REG(reg_loop_max, BPF_REG_1);
24394 insn_buf[cnt++] = BPF_MOV32_IMM(reg_loop_cnt, 0);
24395 insn_buf[cnt++] = BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3);
24396 /* loop header,
24397 * if reg_loop_cnt >= reg_loop_max skip the loop body
24398 */
24399 insn_buf[cnt++] = BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, 5);
24400 /* callback call,
24401 * correct callback offset would be set after patching
24402 */
24403 insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt);
24404 insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx);
24405 insn_buf[cnt++] = BPF_CALL_REL(0);
24406 /* increment loop counter */
24407 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1);
24408 /* jump to loop header if callback returned 0 */
24409 insn_buf[cnt++] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -6);
24410 /* return value of bpf_loop,
24411 * set R0 to the number of iterations
24412 */
24413 insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt);
24414 /* restore original values of R6, R7, R8 */
24415 insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset);
24416 insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset);
24417 insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset);
24418
24419 *total_cnt = cnt;
24420 new_prog = bpf_patch_insn_data(env, position, insn_buf, cnt);
24421 if (!new_prog)
24422 return new_prog;
24423
24424 /* callback start is known only after patching */
24425 callback_start = env->subprog_info[callback_subprogno].start;
24426 /* Note: insn_buf[12] is an offset of BPF_CALL_REL instruction */
24427 call_insn_offset = position + 12;
24428 callback_offset = callback_start - call_insn_offset - 1;
24429 new_prog->insnsi[call_insn_offset].imm = callback_offset;
24430
24431 return new_prog;
24432 }
24433
is_bpf_loop_call(struct bpf_insn * insn)24434 static bool is_bpf_loop_call(struct bpf_insn *insn)
24435 {
24436 return insn->code == (BPF_JMP | BPF_CALL) &&
24437 insn->src_reg == 0 &&
24438 insn->imm == BPF_FUNC_loop;
24439 }
24440
24441 /* For all sub-programs in the program (including main) check
24442 * insn_aux_data to see if there are bpf_loop calls that require
24443 * inlining. If such calls are found the calls are replaced with a
24444 * sequence of instructions produced by `inline_bpf_loop` function and
24445 * subprog stack_depth is increased by the size of 3 registers.
24446 * This stack space is used to spill values of the R6, R7, R8. These
24447 * registers are used to store the loop bound, counter and context
24448 * variables.
24449 */
optimize_bpf_loop(struct bpf_verifier_env * env)24450 static int optimize_bpf_loop(struct bpf_verifier_env *env)
24451 {
24452 struct bpf_subprog_info *subprogs = env->subprog_info;
24453 int i, cur_subprog = 0, cnt, delta = 0;
24454 struct bpf_insn *insn = env->prog->insnsi;
24455 int insn_cnt = env->prog->len;
24456 u16 stack_depth = subprogs[cur_subprog].stack_depth;
24457 u16 stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
24458 u16 stack_depth_extra = 0;
24459
24460 for (i = 0; i < insn_cnt; i++, insn++) {
24461 struct bpf_loop_inline_state *inline_state =
24462 &env->insn_aux_data[i + delta].loop_inline_state;
24463
24464 if (is_bpf_loop_call(insn) && inline_state->fit_for_inline) {
24465 struct bpf_prog *new_prog;
24466
24467 stack_depth_extra = BPF_REG_SIZE * 3 + stack_depth_roundup;
24468 new_prog = inline_bpf_loop(env,
24469 i + delta,
24470 -(stack_depth + stack_depth_extra),
24471 inline_state->callback_subprogno,
24472 &cnt);
24473 if (!new_prog)
24474 return -ENOMEM;
24475
24476 delta += cnt - 1;
24477 env->prog = new_prog;
24478 insn = new_prog->insnsi + i + delta;
24479 }
24480
24481 if (subprogs[cur_subprog + 1].start == i + delta + 1) {
24482 subprogs[cur_subprog].stack_depth += stack_depth_extra;
24483 cur_subprog++;
24484 stack_depth = subprogs[cur_subprog].stack_depth;
24485 stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
24486 stack_depth_extra = 0;
24487 }
24488 }
24489
24490 env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
24491
24492 return 0;
24493 }
24494
24495 /* Remove unnecessary spill/fill pairs, members of fastcall pattern,
24496 * adjust subprograms stack depth when possible.
24497 */
remove_fastcall_spills_fills(struct bpf_verifier_env * env)24498 static int remove_fastcall_spills_fills(struct bpf_verifier_env *env)
24499 {
24500 struct bpf_subprog_info *subprog = env->subprog_info;
24501 struct bpf_insn_aux_data *aux = env->insn_aux_data;
24502 struct bpf_insn *insn = env->prog->insnsi;
24503 int insn_cnt = env->prog->len;
24504 u32 spills_num;
24505 bool modified = false;
24506 int i, j;
24507
24508 for (i = 0; i < insn_cnt; i++, insn++) {
24509 if (aux[i].fastcall_spills_num > 0) {
24510 spills_num = aux[i].fastcall_spills_num;
24511 /* NOPs would be removed by opt_remove_nops() */
24512 for (j = 1; j <= spills_num; ++j) {
24513 *(insn - j) = NOP;
24514 *(insn + j) = NOP;
24515 }
24516 modified = true;
24517 }
24518 if ((subprog + 1)->start == i + 1) {
24519 if (modified && !subprog->keep_fastcall_stack)
24520 subprog->stack_depth = -subprog->fastcall_stack_off;
24521 subprog++;
24522 modified = false;
24523 }
24524 }
24525
24526 return 0;
24527 }
24528
free_states(struct bpf_verifier_env * env)24529 static void free_states(struct bpf_verifier_env *env)
24530 {
24531 struct bpf_verifier_state_list *sl;
24532 struct list_head *head, *pos, *tmp;
24533 struct bpf_scc_info *info;
24534 int i, j;
24535
24536 free_verifier_state(env->cur_state, true);
24537 env->cur_state = NULL;
24538 while (!pop_stack(env, NULL, NULL, false));
24539
24540 list_for_each_safe(pos, tmp, &env->free_list) {
24541 sl = container_of(pos, struct bpf_verifier_state_list, node);
24542 free_verifier_state(&sl->state, false);
24543 kfree(sl);
24544 }
24545 INIT_LIST_HEAD(&env->free_list);
24546
24547 for (i = 0; i < env->scc_cnt; ++i) {
24548 info = env->scc_info[i];
24549 if (!info)
24550 continue;
24551 for (j = 0; j < info->num_visits; j++)
24552 free_backedges(&info->visits[j]);
24553 kvfree(info);
24554 env->scc_info[i] = NULL;
24555 }
24556
24557 if (!env->explored_states)
24558 return;
24559
24560 for (i = 0; i < state_htab_size(env); i++) {
24561 head = &env->explored_states[i];
24562
24563 list_for_each_safe(pos, tmp, head) {
24564 sl = container_of(pos, struct bpf_verifier_state_list, node);
24565 free_verifier_state(&sl->state, false);
24566 kfree(sl);
24567 }
24568 INIT_LIST_HEAD(&env->explored_states[i]);
24569 }
24570 }
24571
do_check_common(struct bpf_verifier_env * env,int subprog)24572 static int do_check_common(struct bpf_verifier_env *env, int subprog)
24573 {
24574 bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
24575 struct bpf_subprog_info *sub = subprog_info(env, subprog);
24576 struct bpf_prog_aux *aux = env->prog->aux;
24577 struct bpf_verifier_state *state;
24578 struct bpf_reg_state *regs;
24579 int ret, i;
24580
24581 env->prev_linfo = NULL;
24582 env->pass_cnt++;
24583
24584 state = kzalloc_obj(struct bpf_verifier_state, GFP_KERNEL_ACCOUNT);
24585 if (!state)
24586 return -ENOMEM;
24587 state->curframe = 0;
24588 state->speculative = false;
24589 state->branches = 1;
24590 state->in_sleepable = env->prog->sleepable;
24591 state->frame[0] = kzalloc_obj(struct bpf_func_state, GFP_KERNEL_ACCOUNT);
24592 if (!state->frame[0]) {
24593 kfree(state);
24594 return -ENOMEM;
24595 }
24596 env->cur_state = state;
24597 init_func_state(env, state->frame[0],
24598 BPF_MAIN_FUNC /* callsite */,
24599 0 /* frameno */,
24600 subprog);
24601 state->first_insn_idx = env->subprog_info[subprog].start;
24602 state->last_insn_idx = -1;
24603
24604 regs = state->frame[state->curframe]->regs;
24605 if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) {
24606 const char *sub_name = subprog_name(env, subprog);
24607 struct bpf_subprog_arg_info *arg;
24608 struct bpf_reg_state *reg;
24609
24610 if (env->log.level & BPF_LOG_LEVEL)
24611 verbose(env, "Validating %s() func#%d...\n", sub_name, subprog);
24612 ret = btf_prepare_func_args(env, subprog);
24613 if (ret)
24614 goto out;
24615
24616 if (subprog_is_exc_cb(env, subprog)) {
24617 state->frame[0]->in_exception_callback_fn = true;
24618 /* We have already ensured that the callback returns an integer, just
24619 * like all global subprogs. We need to determine it only has a single
24620 * scalar argument.
24621 */
24622 if (sub->arg_cnt != 1 || sub->args[0].arg_type != ARG_ANYTHING) {
24623 verbose(env, "exception cb only supports single integer argument\n");
24624 ret = -EINVAL;
24625 goto out;
24626 }
24627 }
24628 for (i = BPF_REG_1; i <= sub->arg_cnt; i++) {
24629 arg = &sub->args[i - BPF_REG_1];
24630 reg = ®s[i];
24631
24632 if (arg->arg_type == ARG_PTR_TO_CTX) {
24633 reg->type = PTR_TO_CTX;
24634 mark_reg_known_zero(env, regs, i);
24635 } else if (arg->arg_type == ARG_ANYTHING) {
24636 reg->type = SCALAR_VALUE;
24637 mark_reg_unknown(env, regs, i);
24638 } else if (arg->arg_type == (ARG_PTR_TO_DYNPTR | MEM_RDONLY)) {
24639 /* assume unspecial LOCAL dynptr type */
24640 __mark_dynptr_reg(reg, BPF_DYNPTR_TYPE_LOCAL, true, ++env->id_gen);
24641 } else if (base_type(arg->arg_type) == ARG_PTR_TO_MEM) {
24642 reg->type = PTR_TO_MEM;
24643 reg->type |= arg->arg_type &
24644 (PTR_MAYBE_NULL | PTR_UNTRUSTED | MEM_RDONLY);
24645 mark_reg_known_zero(env, regs, i);
24646 reg->mem_size = arg->mem_size;
24647 if (arg->arg_type & PTR_MAYBE_NULL)
24648 reg->id = ++env->id_gen;
24649 } else if (base_type(arg->arg_type) == ARG_PTR_TO_BTF_ID) {
24650 reg->type = PTR_TO_BTF_ID;
24651 if (arg->arg_type & PTR_MAYBE_NULL)
24652 reg->type |= PTR_MAYBE_NULL;
24653 if (arg->arg_type & PTR_UNTRUSTED)
24654 reg->type |= PTR_UNTRUSTED;
24655 if (arg->arg_type & PTR_TRUSTED)
24656 reg->type |= PTR_TRUSTED;
24657 mark_reg_known_zero(env, regs, i);
24658 reg->btf = bpf_get_btf_vmlinux(); /* can't fail at this point */
24659 reg->btf_id = arg->btf_id;
24660 reg->id = ++env->id_gen;
24661 } else if (base_type(arg->arg_type) == ARG_PTR_TO_ARENA) {
24662 /* caller can pass either PTR_TO_ARENA or SCALAR */
24663 mark_reg_unknown(env, regs, i);
24664 } else {
24665 verifier_bug(env, "unhandled arg#%d type %d",
24666 i - BPF_REG_1, arg->arg_type);
24667 ret = -EFAULT;
24668 goto out;
24669 }
24670 }
24671 } else {
24672 /* if main BPF program has associated BTF info, validate that
24673 * it's matching expected signature, and otherwise mark BTF
24674 * info for main program as unreliable
24675 */
24676 if (env->prog->aux->func_info_aux) {
24677 ret = btf_prepare_func_args(env, 0);
24678 if (ret || sub->arg_cnt != 1 || sub->args[0].arg_type != ARG_PTR_TO_CTX)
24679 env->prog->aux->func_info_aux[0].unreliable = true;
24680 }
24681
24682 /* 1st arg to a function */
24683 regs[BPF_REG_1].type = PTR_TO_CTX;
24684 mark_reg_known_zero(env, regs, BPF_REG_1);
24685 }
24686
24687 /* Acquire references for struct_ops program arguments tagged with "__ref" */
24688 if (!subprog && env->prog->type == BPF_PROG_TYPE_STRUCT_OPS) {
24689 for (i = 0; i < aux->ctx_arg_info_size; i++)
24690 aux->ctx_arg_info[i].ref_obj_id = aux->ctx_arg_info[i].refcounted ?
24691 acquire_reference(env, 0) : 0;
24692 }
24693
24694 ret = do_check(env);
24695 out:
24696 if (!ret && pop_log)
24697 bpf_vlog_reset(&env->log, 0);
24698 free_states(env);
24699 return ret;
24700 }
24701
24702 /* Lazily verify all global functions based on their BTF, if they are called
24703 * from main BPF program or any of subprograms transitively.
24704 * BPF global subprogs called from dead code are not validated.
24705 * All callable global functions must pass verification.
24706 * Otherwise the whole program is rejected.
24707 * Consider:
24708 * int bar(int);
24709 * int foo(int f)
24710 * {
24711 * return bar(f);
24712 * }
24713 * int bar(int b)
24714 * {
24715 * ...
24716 * }
24717 * foo() will be verified first for R1=any_scalar_value. During verification it
24718 * will be assumed that bar() already verified successfully and call to bar()
24719 * from foo() will be checked for type match only. Later bar() will be verified
24720 * independently to check that it's safe for R1=any_scalar_value.
24721 */
do_check_subprogs(struct bpf_verifier_env * env)24722 static int do_check_subprogs(struct bpf_verifier_env *env)
24723 {
24724 struct bpf_prog_aux *aux = env->prog->aux;
24725 struct bpf_func_info_aux *sub_aux;
24726 int i, ret, new_cnt;
24727
24728 if (!aux->func_info)
24729 return 0;
24730
24731 /* exception callback is presumed to be always called */
24732 if (env->exception_callback_subprog)
24733 subprog_aux(env, env->exception_callback_subprog)->called = true;
24734
24735 again:
24736 new_cnt = 0;
24737 for (i = 1; i < env->subprog_cnt; i++) {
24738 if (!subprog_is_global(env, i))
24739 continue;
24740
24741 sub_aux = subprog_aux(env, i);
24742 if (!sub_aux->called || sub_aux->verified)
24743 continue;
24744
24745 env->insn_idx = env->subprog_info[i].start;
24746 WARN_ON_ONCE(env->insn_idx == 0);
24747 ret = do_check_common(env, i);
24748 if (ret) {
24749 return ret;
24750 } else if (env->log.level & BPF_LOG_LEVEL) {
24751 verbose(env, "Func#%d ('%s') is safe for any args that match its prototype\n",
24752 i, subprog_name(env, i));
24753 }
24754
24755 /* We verified new global subprog, it might have called some
24756 * more global subprogs that we haven't verified yet, so we
24757 * need to do another pass over subprogs to verify those.
24758 */
24759 sub_aux->verified = true;
24760 new_cnt++;
24761 }
24762
24763 /* We can't loop forever as we verify at least one global subprog on
24764 * each pass.
24765 */
24766 if (new_cnt)
24767 goto again;
24768
24769 return 0;
24770 }
24771
do_check_main(struct bpf_verifier_env * env)24772 static int do_check_main(struct bpf_verifier_env *env)
24773 {
24774 int ret;
24775
24776 env->insn_idx = 0;
24777 ret = do_check_common(env, 0);
24778 if (!ret)
24779 env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
24780 return ret;
24781 }
24782
24783
print_verification_stats(struct bpf_verifier_env * env)24784 static void print_verification_stats(struct bpf_verifier_env *env)
24785 {
24786 int i;
24787
24788 if (env->log.level & BPF_LOG_STATS) {
24789 verbose(env, "verification time %lld usec\n",
24790 div_u64(env->verification_time, 1000));
24791 verbose(env, "stack depth ");
24792 for (i = 0; i < env->subprog_cnt; i++) {
24793 u32 depth = env->subprog_info[i].stack_depth;
24794
24795 verbose(env, "%d", depth);
24796 if (i + 1 < env->subprog_cnt)
24797 verbose(env, "+");
24798 }
24799 verbose(env, "\n");
24800 }
24801 verbose(env, "processed %d insns (limit %d) max_states_per_insn %d "
24802 "total_states %d peak_states %d mark_read %d\n",
24803 env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS,
24804 env->max_states_per_insn, env->total_states,
24805 env->peak_states, env->longest_mark_read_walk);
24806 }
24807
bpf_prog_ctx_arg_info_init(struct bpf_prog * prog,const struct bpf_ctx_arg_aux * info,u32 cnt)24808 int bpf_prog_ctx_arg_info_init(struct bpf_prog *prog,
24809 const struct bpf_ctx_arg_aux *info, u32 cnt)
24810 {
24811 prog->aux->ctx_arg_info = kmemdup_array(info, cnt, sizeof(*info), GFP_KERNEL_ACCOUNT);
24812 prog->aux->ctx_arg_info_size = cnt;
24813
24814 return prog->aux->ctx_arg_info ? 0 : -ENOMEM;
24815 }
24816
check_struct_ops_btf_id(struct bpf_verifier_env * env)24817 static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
24818 {
24819 const struct btf_type *t, *func_proto;
24820 const struct bpf_struct_ops_desc *st_ops_desc;
24821 const struct bpf_struct_ops *st_ops;
24822 const struct btf_member *member;
24823 struct bpf_prog *prog = env->prog;
24824 bool has_refcounted_arg = false;
24825 u32 btf_id, member_idx, member_off;
24826 struct btf *btf;
24827 const char *mname;
24828 int i, err;
24829
24830 if (!prog->gpl_compatible) {
24831 verbose(env, "struct ops programs must have a GPL compatible license\n");
24832 return -EINVAL;
24833 }
24834
24835 if (!prog->aux->attach_btf_id)
24836 return -ENOTSUPP;
24837
24838 btf = prog->aux->attach_btf;
24839 if (btf_is_module(btf)) {
24840 /* Make sure st_ops is valid through the lifetime of env */
24841 env->attach_btf_mod = btf_try_get_module(btf);
24842 if (!env->attach_btf_mod) {
24843 verbose(env, "struct_ops module %s is not found\n",
24844 btf_get_name(btf));
24845 return -ENOTSUPP;
24846 }
24847 }
24848
24849 btf_id = prog->aux->attach_btf_id;
24850 st_ops_desc = bpf_struct_ops_find(btf, btf_id);
24851 if (!st_ops_desc) {
24852 verbose(env, "attach_btf_id %u is not a supported struct\n",
24853 btf_id);
24854 return -ENOTSUPP;
24855 }
24856 st_ops = st_ops_desc->st_ops;
24857
24858 t = st_ops_desc->type;
24859 member_idx = prog->expected_attach_type;
24860 if (member_idx >= btf_type_vlen(t)) {
24861 verbose(env, "attach to invalid member idx %u of struct %s\n",
24862 member_idx, st_ops->name);
24863 return -EINVAL;
24864 }
24865
24866 member = &btf_type_member(t)[member_idx];
24867 mname = btf_name_by_offset(btf, member->name_off);
24868 func_proto = btf_type_resolve_func_ptr(btf, member->type,
24869 NULL);
24870 if (!func_proto) {
24871 verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n",
24872 mname, member_idx, st_ops->name);
24873 return -EINVAL;
24874 }
24875
24876 member_off = __btf_member_bit_offset(t, member) / 8;
24877 err = bpf_struct_ops_supported(st_ops, member_off);
24878 if (err) {
24879 verbose(env, "attach to unsupported member %s of struct %s\n",
24880 mname, st_ops->name);
24881 return err;
24882 }
24883
24884 if (st_ops->check_member) {
24885 err = st_ops->check_member(t, member, prog);
24886
24887 if (err) {
24888 verbose(env, "attach to unsupported member %s of struct %s\n",
24889 mname, st_ops->name);
24890 return err;
24891 }
24892 }
24893
24894 if (prog->aux->priv_stack_requested && !bpf_jit_supports_private_stack()) {
24895 verbose(env, "Private stack not supported by jit\n");
24896 return -EACCES;
24897 }
24898
24899 for (i = 0; i < st_ops_desc->arg_info[member_idx].cnt; i++) {
24900 if (st_ops_desc->arg_info[member_idx].info->refcounted) {
24901 has_refcounted_arg = true;
24902 break;
24903 }
24904 }
24905
24906 /* Tail call is not allowed for programs with refcounted arguments since we
24907 * cannot guarantee that valid refcounted kptrs will be passed to the callee.
24908 */
24909 for (i = 0; i < env->subprog_cnt; i++) {
24910 if (has_refcounted_arg && env->subprog_info[i].has_tail_call) {
24911 verbose(env, "program with __ref argument cannot tail call\n");
24912 return -EINVAL;
24913 }
24914 }
24915
24916 prog->aux->st_ops = st_ops;
24917 prog->aux->attach_st_ops_member_off = member_off;
24918
24919 prog->aux->attach_func_proto = func_proto;
24920 prog->aux->attach_func_name = mname;
24921 env->ops = st_ops->verifier_ops;
24922
24923 return bpf_prog_ctx_arg_info_init(prog, st_ops_desc->arg_info[member_idx].info,
24924 st_ops_desc->arg_info[member_idx].cnt);
24925 }
24926 #define SECURITY_PREFIX "security_"
24927
check_attach_modify_return(unsigned long addr,const char * func_name)24928 static int check_attach_modify_return(unsigned long addr, const char *func_name)
24929 {
24930 if (within_error_injection_list(addr) ||
24931 !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
24932 return 0;
24933
24934 return -EINVAL;
24935 }
24936
24937 /* list of non-sleepable functions that are otherwise on
24938 * ALLOW_ERROR_INJECTION list
24939 */
24940 BTF_SET_START(btf_non_sleepable_error_inject)
24941 /* Three functions below can be called from sleepable and non-sleepable context.
24942 * Assume non-sleepable from bpf safety point of view.
24943 */
BTF_ID(func,__filemap_add_folio)24944 BTF_ID(func, __filemap_add_folio)
24945 #ifdef CONFIG_FAIL_PAGE_ALLOC
24946 BTF_ID(func, should_fail_alloc_page)
24947 #endif
24948 #ifdef CONFIG_FAILSLAB
24949 BTF_ID(func, should_failslab)
24950 #endif
24951 BTF_SET_END(btf_non_sleepable_error_inject)
24952
24953 static int check_non_sleepable_error_inject(u32 btf_id)
24954 {
24955 return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id);
24956 }
24957
bpf_check_attach_target(struct bpf_verifier_log * log,const struct bpf_prog * prog,const struct bpf_prog * tgt_prog,u32 btf_id,struct bpf_attach_target_info * tgt_info)24958 int bpf_check_attach_target(struct bpf_verifier_log *log,
24959 const struct bpf_prog *prog,
24960 const struct bpf_prog *tgt_prog,
24961 u32 btf_id,
24962 struct bpf_attach_target_info *tgt_info)
24963 {
24964 bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
24965 bool prog_tracing = prog->type == BPF_PROG_TYPE_TRACING;
24966 char trace_symbol[KSYM_SYMBOL_LEN];
24967 const char prefix[] = "btf_trace_";
24968 struct bpf_raw_event_map *btp;
24969 int ret = 0, subprog = -1, i;
24970 const struct btf_type *t;
24971 bool conservative = true;
24972 const char *tname, *fname;
24973 struct btf *btf;
24974 long addr = 0;
24975 struct module *mod = NULL;
24976
24977 if (!btf_id) {
24978 bpf_log(log, "Tracing programs must provide btf_id\n");
24979 return -EINVAL;
24980 }
24981 btf = tgt_prog ? tgt_prog->aux->btf : prog->aux->attach_btf;
24982 if (!btf) {
24983 bpf_log(log,
24984 "FENTRY/FEXIT program can only be attached to another program annotated with BTF\n");
24985 return -EINVAL;
24986 }
24987 t = btf_type_by_id(btf, btf_id);
24988 if (!t) {
24989 bpf_log(log, "attach_btf_id %u is invalid\n", btf_id);
24990 return -EINVAL;
24991 }
24992 tname = btf_name_by_offset(btf, t->name_off);
24993 if (!tname) {
24994 bpf_log(log, "attach_btf_id %u doesn't have a name\n", btf_id);
24995 return -EINVAL;
24996 }
24997 if (tgt_prog) {
24998 struct bpf_prog_aux *aux = tgt_prog->aux;
24999 bool tgt_changes_pkt_data;
25000 bool tgt_might_sleep;
25001
25002 if (bpf_prog_is_dev_bound(prog->aux) &&
25003 !bpf_prog_dev_bound_match(prog, tgt_prog)) {
25004 bpf_log(log, "Target program bound device mismatch");
25005 return -EINVAL;
25006 }
25007
25008 for (i = 0; i < aux->func_info_cnt; i++)
25009 if (aux->func_info[i].type_id == btf_id) {
25010 subprog = i;
25011 break;
25012 }
25013 if (subprog == -1) {
25014 bpf_log(log, "Subprog %s doesn't exist\n", tname);
25015 return -EINVAL;
25016 }
25017 if (aux->func && aux->func[subprog]->aux->exception_cb) {
25018 bpf_log(log,
25019 "%s programs cannot attach to exception callback\n",
25020 prog_extension ? "Extension" : "FENTRY/FEXIT");
25021 return -EINVAL;
25022 }
25023 conservative = aux->func_info_aux[subprog].unreliable;
25024 if (prog_extension) {
25025 if (conservative) {
25026 bpf_log(log,
25027 "Cannot replace static functions\n");
25028 return -EINVAL;
25029 }
25030 if (!prog->jit_requested) {
25031 bpf_log(log,
25032 "Extension programs should be JITed\n");
25033 return -EINVAL;
25034 }
25035 tgt_changes_pkt_data = aux->func
25036 ? aux->func[subprog]->aux->changes_pkt_data
25037 : aux->changes_pkt_data;
25038 if (prog->aux->changes_pkt_data && !tgt_changes_pkt_data) {
25039 bpf_log(log,
25040 "Extension program changes packet data, while original does not\n");
25041 return -EINVAL;
25042 }
25043
25044 tgt_might_sleep = aux->func
25045 ? aux->func[subprog]->aux->might_sleep
25046 : aux->might_sleep;
25047 if (prog->aux->might_sleep && !tgt_might_sleep) {
25048 bpf_log(log,
25049 "Extension program may sleep, while original does not\n");
25050 return -EINVAL;
25051 }
25052 }
25053 if (!tgt_prog->jited) {
25054 bpf_log(log, "Can attach to only JITed progs\n");
25055 return -EINVAL;
25056 }
25057 if (prog_tracing) {
25058 if (aux->attach_tracing_prog) {
25059 /*
25060 * Target program is an fentry/fexit which is already attached
25061 * to another tracing program. More levels of nesting
25062 * attachment are not allowed.
25063 */
25064 bpf_log(log, "Cannot nest tracing program attach more than once\n");
25065 return -EINVAL;
25066 }
25067 } else if (tgt_prog->type == prog->type) {
25068 /*
25069 * To avoid potential call chain cycles, prevent attaching of a
25070 * program extension to another extension. It's ok to attach
25071 * fentry/fexit to extension program.
25072 */
25073 bpf_log(log, "Cannot recursively attach\n");
25074 return -EINVAL;
25075 }
25076 if (tgt_prog->type == BPF_PROG_TYPE_TRACING &&
25077 prog_extension &&
25078 (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY ||
25079 tgt_prog->expected_attach_type == BPF_TRACE_FEXIT ||
25080 tgt_prog->expected_attach_type == BPF_TRACE_FSESSION)) {
25081 /* Program extensions can extend all program types
25082 * except fentry/fexit. The reason is the following.
25083 * The fentry/fexit programs are used for performance
25084 * analysis, stats and can be attached to any program
25085 * type. When extension program is replacing XDP function
25086 * it is necessary to allow performance analysis of all
25087 * functions. Both original XDP program and its program
25088 * extension. Hence attaching fentry/fexit to
25089 * BPF_PROG_TYPE_EXT is allowed. If extending of
25090 * fentry/fexit was allowed it would be possible to create
25091 * long call chain fentry->extension->fentry->extension
25092 * beyond reasonable stack size. Hence extending fentry
25093 * is not allowed.
25094 */
25095 bpf_log(log, "Cannot extend fentry/fexit/fsession\n");
25096 return -EINVAL;
25097 }
25098 } else {
25099 if (prog_extension) {
25100 bpf_log(log, "Cannot replace kernel functions\n");
25101 return -EINVAL;
25102 }
25103 }
25104
25105 switch (prog->expected_attach_type) {
25106 case BPF_TRACE_RAW_TP:
25107 if (tgt_prog) {
25108 bpf_log(log,
25109 "Only FENTRY/FEXIT progs are attachable to another BPF prog\n");
25110 return -EINVAL;
25111 }
25112 if (!btf_type_is_typedef(t)) {
25113 bpf_log(log, "attach_btf_id %u is not a typedef\n",
25114 btf_id);
25115 return -EINVAL;
25116 }
25117 if (strncmp(prefix, tname, sizeof(prefix) - 1)) {
25118 bpf_log(log, "attach_btf_id %u points to wrong type name %s\n",
25119 btf_id, tname);
25120 return -EINVAL;
25121 }
25122 tname += sizeof(prefix) - 1;
25123
25124 /* The func_proto of "btf_trace_##tname" is generated from typedef without argument
25125 * names. Thus using bpf_raw_event_map to get argument names.
25126 */
25127 btp = bpf_get_raw_tracepoint(tname);
25128 if (!btp)
25129 return -EINVAL;
25130 fname = kallsyms_lookup((unsigned long)btp->bpf_func, NULL, NULL, NULL,
25131 trace_symbol);
25132 bpf_put_raw_tracepoint(btp);
25133
25134 if (fname)
25135 ret = btf_find_by_name_kind(btf, fname, BTF_KIND_FUNC);
25136
25137 if (!fname || ret < 0) {
25138 bpf_log(log, "Cannot find btf of tracepoint template, fall back to %s%s.\n",
25139 prefix, tname);
25140 t = btf_type_by_id(btf, t->type);
25141 if (!btf_type_is_ptr(t))
25142 /* should never happen in valid vmlinux build */
25143 return -EINVAL;
25144 } else {
25145 t = btf_type_by_id(btf, ret);
25146 if (!btf_type_is_func(t))
25147 /* should never happen in valid vmlinux build */
25148 return -EINVAL;
25149 }
25150
25151 t = btf_type_by_id(btf, t->type);
25152 if (!btf_type_is_func_proto(t))
25153 /* should never happen in valid vmlinux build */
25154 return -EINVAL;
25155
25156 break;
25157 case BPF_TRACE_ITER:
25158 if (!btf_type_is_func(t)) {
25159 bpf_log(log, "attach_btf_id %u is not a function\n",
25160 btf_id);
25161 return -EINVAL;
25162 }
25163 t = btf_type_by_id(btf, t->type);
25164 if (!btf_type_is_func_proto(t))
25165 return -EINVAL;
25166 ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
25167 if (ret)
25168 return ret;
25169 break;
25170 default:
25171 if (!prog_extension)
25172 return -EINVAL;
25173 fallthrough;
25174 case BPF_MODIFY_RETURN:
25175 case BPF_LSM_MAC:
25176 case BPF_LSM_CGROUP:
25177 case BPF_TRACE_FENTRY:
25178 case BPF_TRACE_FEXIT:
25179 case BPF_TRACE_FSESSION:
25180 if (prog->expected_attach_type == BPF_TRACE_FSESSION &&
25181 !bpf_jit_supports_fsession()) {
25182 bpf_log(log, "JIT does not support fsession\n");
25183 return -EOPNOTSUPP;
25184 }
25185 if (!btf_type_is_func(t)) {
25186 bpf_log(log, "attach_btf_id %u is not a function\n",
25187 btf_id);
25188 return -EINVAL;
25189 }
25190 if (prog_extension &&
25191 btf_check_type_match(log, prog, btf, t))
25192 return -EINVAL;
25193 t = btf_type_by_id(btf, t->type);
25194 if (!btf_type_is_func_proto(t))
25195 return -EINVAL;
25196
25197 if ((prog->aux->saved_dst_prog_type || prog->aux->saved_dst_attach_type) &&
25198 (!tgt_prog || prog->aux->saved_dst_prog_type != tgt_prog->type ||
25199 prog->aux->saved_dst_attach_type != tgt_prog->expected_attach_type))
25200 return -EINVAL;
25201
25202 if (tgt_prog && conservative)
25203 t = NULL;
25204
25205 ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
25206 if (ret < 0)
25207 return ret;
25208
25209 if (tgt_prog) {
25210 if (subprog == 0)
25211 addr = (long) tgt_prog->bpf_func;
25212 else
25213 addr = (long) tgt_prog->aux->func[subprog]->bpf_func;
25214 } else {
25215 if (btf_is_module(btf)) {
25216 mod = btf_try_get_module(btf);
25217 if (mod)
25218 addr = find_kallsyms_symbol_value(mod, tname);
25219 else
25220 addr = 0;
25221 } else {
25222 addr = kallsyms_lookup_name(tname);
25223 }
25224 if (!addr) {
25225 module_put(mod);
25226 bpf_log(log,
25227 "The address of function %s cannot be found\n",
25228 tname);
25229 return -ENOENT;
25230 }
25231 }
25232
25233 if (prog->sleepable) {
25234 ret = -EINVAL;
25235 switch (prog->type) {
25236 case BPF_PROG_TYPE_TRACING:
25237
25238 /* fentry/fexit/fmod_ret progs can be sleepable if they are
25239 * attached to ALLOW_ERROR_INJECTION and are not in denylist.
25240 */
25241 if (!check_non_sleepable_error_inject(btf_id) &&
25242 within_error_injection_list(addr))
25243 ret = 0;
25244 /* fentry/fexit/fmod_ret progs can also be sleepable if they are
25245 * in the fmodret id set with the KF_SLEEPABLE flag.
25246 */
25247 else {
25248 u32 *flags = btf_kfunc_is_modify_return(btf, btf_id,
25249 prog);
25250
25251 if (flags && (*flags & KF_SLEEPABLE))
25252 ret = 0;
25253 }
25254 break;
25255 case BPF_PROG_TYPE_LSM:
25256 /* LSM progs check that they are attached to bpf_lsm_*() funcs.
25257 * Only some of them are sleepable.
25258 */
25259 if (bpf_lsm_is_sleepable_hook(btf_id))
25260 ret = 0;
25261 break;
25262 default:
25263 break;
25264 }
25265 if (ret) {
25266 module_put(mod);
25267 bpf_log(log, "%s is not sleepable\n", tname);
25268 return ret;
25269 }
25270 } else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
25271 if (tgt_prog) {
25272 module_put(mod);
25273 bpf_log(log, "can't modify return codes of BPF programs\n");
25274 return -EINVAL;
25275 }
25276 ret = -EINVAL;
25277 if (btf_kfunc_is_modify_return(btf, btf_id, prog) ||
25278 !check_attach_modify_return(addr, tname))
25279 ret = 0;
25280 if (ret) {
25281 module_put(mod);
25282 bpf_log(log, "%s() is not modifiable\n", tname);
25283 return ret;
25284 }
25285 }
25286
25287 break;
25288 }
25289 tgt_info->tgt_addr = addr;
25290 tgt_info->tgt_name = tname;
25291 tgt_info->tgt_type = t;
25292 tgt_info->tgt_mod = mod;
25293 return 0;
25294 }
25295
BTF_SET_START(btf_id_deny)25296 BTF_SET_START(btf_id_deny)
25297 BTF_ID_UNUSED
25298 #ifdef CONFIG_SMP
25299 BTF_ID(func, ___migrate_enable)
25300 BTF_ID(func, migrate_disable)
25301 BTF_ID(func, migrate_enable)
25302 #endif
25303 #if !defined CONFIG_PREEMPT_RCU && !defined CONFIG_TINY_RCU
25304 BTF_ID(func, rcu_read_unlock_strict)
25305 #endif
25306 #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_TRACE_PREEMPT_TOGGLE)
25307 BTF_ID(func, preempt_count_add)
25308 BTF_ID(func, preempt_count_sub)
25309 #endif
25310 #ifdef CONFIG_PREEMPT_RCU
25311 BTF_ID(func, __rcu_read_lock)
25312 BTF_ID(func, __rcu_read_unlock)
25313 #endif
25314 BTF_SET_END(btf_id_deny)
25315
25316 /* fexit and fmod_ret can't be used to attach to __noreturn functions.
25317 * Currently, we must manually list all __noreturn functions here. Once a more
25318 * robust solution is implemented, this workaround can be removed.
25319 */
25320 BTF_SET_START(noreturn_deny)
25321 #ifdef CONFIG_IA32_EMULATION
25322 BTF_ID(func, __ia32_sys_exit)
25323 BTF_ID(func, __ia32_sys_exit_group)
25324 #endif
25325 #ifdef CONFIG_KUNIT
25326 BTF_ID(func, __kunit_abort)
25327 BTF_ID(func, kunit_try_catch_throw)
25328 #endif
25329 #ifdef CONFIG_MODULES
25330 BTF_ID(func, __module_put_and_kthread_exit)
25331 #endif
25332 #ifdef CONFIG_X86_64
25333 BTF_ID(func, __x64_sys_exit)
25334 BTF_ID(func, __x64_sys_exit_group)
25335 #endif
25336 BTF_ID(func, do_exit)
25337 BTF_ID(func, do_group_exit)
25338 BTF_ID(func, kthread_complete_and_exit)
25339 BTF_ID(func, make_task_dead)
25340 BTF_SET_END(noreturn_deny)
25341
25342 static bool can_be_sleepable(struct bpf_prog *prog)
25343 {
25344 if (prog->type == BPF_PROG_TYPE_TRACING) {
25345 switch (prog->expected_attach_type) {
25346 case BPF_TRACE_FENTRY:
25347 case BPF_TRACE_FEXIT:
25348 case BPF_MODIFY_RETURN:
25349 case BPF_TRACE_ITER:
25350 case BPF_TRACE_FSESSION:
25351 return true;
25352 default:
25353 return false;
25354 }
25355 }
25356 return prog->type == BPF_PROG_TYPE_LSM ||
25357 prog->type == BPF_PROG_TYPE_KPROBE /* only for uprobes */ ||
25358 prog->type == BPF_PROG_TYPE_STRUCT_OPS;
25359 }
25360
check_attach_btf_id(struct bpf_verifier_env * env)25361 static int check_attach_btf_id(struct bpf_verifier_env *env)
25362 {
25363 struct bpf_prog *prog = env->prog;
25364 struct bpf_prog *tgt_prog = prog->aux->dst_prog;
25365 struct bpf_attach_target_info tgt_info = {};
25366 u32 btf_id = prog->aux->attach_btf_id;
25367 struct bpf_trampoline *tr;
25368 int ret;
25369 u64 key;
25370
25371 if (prog->type == BPF_PROG_TYPE_SYSCALL) {
25372 if (prog->sleepable)
25373 /* attach_btf_id checked to be zero already */
25374 return 0;
25375 verbose(env, "Syscall programs can only be sleepable\n");
25376 return -EINVAL;
25377 }
25378
25379 if (prog->sleepable && !can_be_sleepable(prog)) {
25380 verbose(env, "Only fentry/fexit/fmod_ret, lsm, iter, uprobe, and struct_ops programs can be sleepable\n");
25381 return -EINVAL;
25382 }
25383
25384 if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
25385 return check_struct_ops_btf_id(env);
25386
25387 if (prog->type != BPF_PROG_TYPE_TRACING &&
25388 prog->type != BPF_PROG_TYPE_LSM &&
25389 prog->type != BPF_PROG_TYPE_EXT)
25390 return 0;
25391
25392 ret = bpf_check_attach_target(&env->log, prog, tgt_prog, btf_id, &tgt_info);
25393 if (ret)
25394 return ret;
25395
25396 if (tgt_prog && prog->type == BPF_PROG_TYPE_EXT) {
25397 /* to make freplace equivalent to their targets, they need to
25398 * inherit env->ops and expected_attach_type for the rest of the
25399 * verification
25400 */
25401 env->ops = bpf_verifier_ops[tgt_prog->type];
25402 prog->expected_attach_type = tgt_prog->expected_attach_type;
25403 }
25404
25405 /* store info about the attachment target that will be used later */
25406 prog->aux->attach_func_proto = tgt_info.tgt_type;
25407 prog->aux->attach_func_name = tgt_info.tgt_name;
25408 prog->aux->mod = tgt_info.tgt_mod;
25409
25410 if (tgt_prog) {
25411 prog->aux->saved_dst_prog_type = tgt_prog->type;
25412 prog->aux->saved_dst_attach_type = tgt_prog->expected_attach_type;
25413 }
25414
25415 if (prog->expected_attach_type == BPF_TRACE_RAW_TP) {
25416 prog->aux->attach_btf_trace = true;
25417 return 0;
25418 } else if (prog->expected_attach_type == BPF_TRACE_ITER) {
25419 return bpf_iter_prog_supported(prog);
25420 }
25421
25422 if (prog->type == BPF_PROG_TYPE_LSM) {
25423 ret = bpf_lsm_verify_prog(&env->log, prog);
25424 if (ret < 0)
25425 return ret;
25426 } else if (prog->type == BPF_PROG_TYPE_TRACING &&
25427 btf_id_set_contains(&btf_id_deny, btf_id)) {
25428 verbose(env, "Attaching tracing programs to function '%s' is rejected.\n",
25429 tgt_info.tgt_name);
25430 return -EINVAL;
25431 } else if ((prog->expected_attach_type == BPF_TRACE_FEXIT ||
25432 prog->expected_attach_type == BPF_TRACE_FSESSION ||
25433 prog->expected_attach_type == BPF_MODIFY_RETURN) &&
25434 btf_id_set_contains(&noreturn_deny, btf_id)) {
25435 verbose(env, "Attaching fexit/fsession/fmod_ret to __noreturn function '%s' is rejected.\n",
25436 tgt_info.tgt_name);
25437 return -EINVAL;
25438 }
25439
25440 key = bpf_trampoline_compute_key(tgt_prog, prog->aux->attach_btf, btf_id);
25441 tr = bpf_trampoline_get(key, &tgt_info);
25442 if (!tr)
25443 return -ENOMEM;
25444
25445 if (tgt_prog && tgt_prog->aux->tail_call_reachable)
25446 tr->flags = BPF_TRAMP_F_TAIL_CALL_CTX;
25447
25448 prog->aux->dst_trampoline = tr;
25449 return 0;
25450 }
25451
bpf_get_btf_vmlinux(void)25452 struct btf *bpf_get_btf_vmlinux(void)
25453 {
25454 if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
25455 mutex_lock(&bpf_verifier_lock);
25456 if (!btf_vmlinux)
25457 btf_vmlinux = btf_parse_vmlinux();
25458 mutex_unlock(&bpf_verifier_lock);
25459 }
25460 return btf_vmlinux;
25461 }
25462
25463 /*
25464 * The add_fd_from_fd_array() is executed only if fd_array_cnt is non-zero. In
25465 * this case expect that every file descriptor in the array is either a map or
25466 * a BTF. Everything else is considered to be trash.
25467 */
add_fd_from_fd_array(struct bpf_verifier_env * env,int fd)25468 static int add_fd_from_fd_array(struct bpf_verifier_env *env, int fd)
25469 {
25470 struct bpf_map *map;
25471 struct btf *btf;
25472 CLASS(fd, f)(fd);
25473 int err;
25474
25475 map = __bpf_map_get(f);
25476 if (!IS_ERR(map)) {
25477 err = __add_used_map(env, map);
25478 if (err < 0)
25479 return err;
25480 return 0;
25481 }
25482
25483 btf = __btf_get_by_fd(f);
25484 if (!IS_ERR(btf)) {
25485 btf_get(btf);
25486 return __add_used_btf(env, btf);
25487 }
25488
25489 verbose(env, "fd %d is not pointing to valid bpf_map or btf\n", fd);
25490 return PTR_ERR(map);
25491 }
25492
process_fd_array(struct bpf_verifier_env * env,union bpf_attr * attr,bpfptr_t uattr)25493 static int process_fd_array(struct bpf_verifier_env *env, union bpf_attr *attr, bpfptr_t uattr)
25494 {
25495 size_t size = sizeof(int);
25496 int ret;
25497 int fd;
25498 u32 i;
25499
25500 env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
25501
25502 /*
25503 * The only difference between old (no fd_array_cnt is given) and new
25504 * APIs is that in the latter case the fd_array is expected to be
25505 * continuous and is scanned for map fds right away
25506 */
25507 if (!attr->fd_array_cnt)
25508 return 0;
25509
25510 /* Check for integer overflow */
25511 if (attr->fd_array_cnt >= (U32_MAX / size)) {
25512 verbose(env, "fd_array_cnt is too big (%u)\n", attr->fd_array_cnt);
25513 return -EINVAL;
25514 }
25515
25516 for (i = 0; i < attr->fd_array_cnt; i++) {
25517 if (copy_from_bpfptr_offset(&fd, env->fd_array, i * size, size))
25518 return -EFAULT;
25519
25520 ret = add_fd_from_fd_array(env, fd);
25521 if (ret)
25522 return ret;
25523 }
25524
25525 return 0;
25526 }
25527
25528 /* Each field is a register bitmask */
25529 struct insn_live_regs {
25530 u16 use; /* registers read by instruction */
25531 u16 def; /* registers written by instruction */
25532 u16 in; /* registers that may be alive before instruction */
25533 u16 out; /* registers that may be alive after instruction */
25534 };
25535
25536 /* Bitmask with 1s for all caller saved registers */
25537 #define ALL_CALLER_SAVED_REGS ((1u << CALLER_SAVED_REGS) - 1)
25538
25539 /* Compute info->{use,def} fields for the instruction */
compute_insn_live_regs(struct bpf_verifier_env * env,struct bpf_insn * insn,struct insn_live_regs * info)25540 static void compute_insn_live_regs(struct bpf_verifier_env *env,
25541 struct bpf_insn *insn,
25542 struct insn_live_regs *info)
25543 {
25544 struct call_summary cs;
25545 u8 class = BPF_CLASS(insn->code);
25546 u8 code = BPF_OP(insn->code);
25547 u8 mode = BPF_MODE(insn->code);
25548 u16 src = BIT(insn->src_reg);
25549 u16 dst = BIT(insn->dst_reg);
25550 u16 r0 = BIT(0);
25551 u16 def = 0;
25552 u16 use = 0xffff;
25553
25554 switch (class) {
25555 case BPF_LD:
25556 switch (mode) {
25557 case BPF_IMM:
25558 if (BPF_SIZE(insn->code) == BPF_DW) {
25559 def = dst;
25560 use = 0;
25561 }
25562 break;
25563 case BPF_LD | BPF_ABS:
25564 case BPF_LD | BPF_IND:
25565 /* stick with defaults */
25566 break;
25567 }
25568 break;
25569 case BPF_LDX:
25570 switch (mode) {
25571 case BPF_MEM:
25572 case BPF_MEMSX:
25573 def = dst;
25574 use = src;
25575 break;
25576 }
25577 break;
25578 case BPF_ST:
25579 switch (mode) {
25580 case BPF_MEM:
25581 def = 0;
25582 use = dst;
25583 break;
25584 }
25585 break;
25586 case BPF_STX:
25587 switch (mode) {
25588 case BPF_MEM:
25589 def = 0;
25590 use = dst | src;
25591 break;
25592 case BPF_ATOMIC:
25593 switch (insn->imm) {
25594 case BPF_CMPXCHG:
25595 use = r0 | dst | src;
25596 def = r0;
25597 break;
25598 case BPF_LOAD_ACQ:
25599 def = dst;
25600 use = src;
25601 break;
25602 case BPF_STORE_REL:
25603 def = 0;
25604 use = dst | src;
25605 break;
25606 default:
25607 use = dst | src;
25608 if (insn->imm & BPF_FETCH)
25609 def = src;
25610 else
25611 def = 0;
25612 }
25613 break;
25614 }
25615 break;
25616 case BPF_ALU:
25617 case BPF_ALU64:
25618 switch (code) {
25619 case BPF_END:
25620 use = dst;
25621 def = dst;
25622 break;
25623 case BPF_MOV:
25624 def = dst;
25625 if (BPF_SRC(insn->code) == BPF_K)
25626 use = 0;
25627 else
25628 use = src;
25629 break;
25630 default:
25631 def = dst;
25632 if (BPF_SRC(insn->code) == BPF_K)
25633 use = dst;
25634 else
25635 use = dst | src;
25636 }
25637 break;
25638 case BPF_JMP:
25639 case BPF_JMP32:
25640 switch (code) {
25641 case BPF_JA:
25642 def = 0;
25643 if (BPF_SRC(insn->code) == BPF_X)
25644 use = dst;
25645 else
25646 use = 0;
25647 break;
25648 case BPF_JCOND:
25649 def = 0;
25650 use = 0;
25651 break;
25652 case BPF_EXIT:
25653 def = 0;
25654 use = r0;
25655 break;
25656 case BPF_CALL:
25657 def = ALL_CALLER_SAVED_REGS;
25658 use = def & ~BIT(BPF_REG_0);
25659 if (get_call_summary(env, insn, &cs))
25660 use = GENMASK(cs.num_params, 1);
25661 break;
25662 default:
25663 def = 0;
25664 if (BPF_SRC(insn->code) == BPF_K)
25665 use = dst;
25666 else
25667 use = dst | src;
25668 }
25669 break;
25670 }
25671
25672 info->def = def;
25673 info->use = use;
25674 }
25675
25676 /* Compute may-live registers after each instruction in the program.
25677 * The register is live after the instruction I if it is read by some
25678 * instruction S following I during program execution and is not
25679 * overwritten between I and S.
25680 *
25681 * Store result in env->insn_aux_data[i].live_regs.
25682 */
compute_live_registers(struct bpf_verifier_env * env)25683 static int compute_live_registers(struct bpf_verifier_env *env)
25684 {
25685 struct bpf_insn_aux_data *insn_aux = env->insn_aux_data;
25686 struct bpf_insn *insns = env->prog->insnsi;
25687 struct insn_live_regs *state;
25688 int insn_cnt = env->prog->len;
25689 int err = 0, i, j;
25690 bool changed;
25691
25692 /* Use the following algorithm:
25693 * - define the following:
25694 * - I.use : a set of all registers read by instruction I;
25695 * - I.def : a set of all registers written by instruction I;
25696 * - I.in : a set of all registers that may be alive before I execution;
25697 * - I.out : a set of all registers that may be alive after I execution;
25698 * - insn_successors(I): a set of instructions S that might immediately
25699 * follow I for some program execution;
25700 * - associate separate empty sets 'I.in' and 'I.out' with each instruction;
25701 * - visit each instruction in a postorder and update
25702 * state[i].in, state[i].out as follows:
25703 *
25704 * state[i].out = U [state[s].in for S in insn_successors(i)]
25705 * state[i].in = (state[i].out / state[i].def) U state[i].use
25706 *
25707 * (where U stands for set union, / stands for set difference)
25708 * - repeat the computation while {in,out} fields changes for
25709 * any instruction.
25710 */
25711 state = kvzalloc_objs(*state, insn_cnt, GFP_KERNEL_ACCOUNT);
25712 if (!state) {
25713 err = -ENOMEM;
25714 goto out;
25715 }
25716
25717 for (i = 0; i < insn_cnt; ++i)
25718 compute_insn_live_regs(env, &insns[i], &state[i]);
25719
25720 changed = true;
25721 while (changed) {
25722 changed = false;
25723 for (i = 0; i < env->cfg.cur_postorder; ++i) {
25724 int insn_idx = env->cfg.insn_postorder[i];
25725 struct insn_live_regs *live = &state[insn_idx];
25726 struct bpf_iarray *succ;
25727 u16 new_out = 0;
25728 u16 new_in = 0;
25729
25730 succ = bpf_insn_successors(env, insn_idx);
25731 for (int s = 0; s < succ->cnt; ++s)
25732 new_out |= state[succ->items[s]].in;
25733 new_in = (new_out & ~live->def) | live->use;
25734 if (new_out != live->out || new_in != live->in) {
25735 live->in = new_in;
25736 live->out = new_out;
25737 changed = true;
25738 }
25739 }
25740 }
25741
25742 for (i = 0; i < insn_cnt; ++i)
25743 insn_aux[i].live_regs_before = state[i].in;
25744
25745 if (env->log.level & BPF_LOG_LEVEL2) {
25746 verbose(env, "Live regs before insn:\n");
25747 for (i = 0; i < insn_cnt; ++i) {
25748 if (env->insn_aux_data[i].scc)
25749 verbose(env, "%3d ", env->insn_aux_data[i].scc);
25750 else
25751 verbose(env, " ");
25752 verbose(env, "%3d: ", i);
25753 for (j = BPF_REG_0; j < BPF_REG_10; ++j)
25754 if (insn_aux[i].live_regs_before & BIT(j))
25755 verbose(env, "%d", j);
25756 else
25757 verbose(env, ".");
25758 verbose(env, " ");
25759 verbose_insn(env, &insns[i]);
25760 if (bpf_is_ldimm64(&insns[i]))
25761 i++;
25762 }
25763 }
25764
25765 out:
25766 kvfree(state);
25767 return err;
25768 }
25769
25770 /*
25771 * Compute strongly connected components (SCCs) on the CFG.
25772 * Assign an SCC number to each instruction, recorded in env->insn_aux[*].scc.
25773 * If instruction is a sole member of its SCC and there are no self edges,
25774 * assign it SCC number of zero.
25775 * Uses a non-recursive adaptation of Tarjan's algorithm for SCC computation.
25776 */
compute_scc(struct bpf_verifier_env * env)25777 static int compute_scc(struct bpf_verifier_env *env)
25778 {
25779 const u32 NOT_ON_STACK = U32_MAX;
25780
25781 struct bpf_insn_aux_data *aux = env->insn_aux_data;
25782 const u32 insn_cnt = env->prog->len;
25783 int stack_sz, dfs_sz, err = 0;
25784 u32 *stack, *pre, *low, *dfs;
25785 u32 i, j, t, w;
25786 u32 next_preorder_num;
25787 u32 next_scc_id;
25788 bool assign_scc;
25789 struct bpf_iarray *succ;
25790
25791 next_preorder_num = 1;
25792 next_scc_id = 1;
25793 /*
25794 * - 'stack' accumulates vertices in DFS order, see invariant comment below;
25795 * - 'pre[t] == p' => preorder number of vertex 't' is 'p';
25796 * - 'low[t] == n' => smallest preorder number of the vertex reachable from 't' is 'n';
25797 * - 'dfs' DFS traversal stack, used to emulate explicit recursion.
25798 */
25799 stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT);
25800 pre = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT);
25801 low = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT);
25802 dfs = kvcalloc(insn_cnt, sizeof(*dfs), GFP_KERNEL_ACCOUNT);
25803 if (!stack || !pre || !low || !dfs) {
25804 err = -ENOMEM;
25805 goto exit;
25806 }
25807 /*
25808 * References:
25809 * [1] R. Tarjan "Depth-First Search and Linear Graph Algorithms"
25810 * [2] D. J. Pearce "A Space-Efficient Algorithm for Finding Strongly Connected Components"
25811 *
25812 * The algorithm maintains the following invariant:
25813 * - suppose there is a path 'u' ~> 'v', such that 'pre[v] < pre[u]';
25814 * - then, vertex 'u' remains on stack while vertex 'v' is on stack.
25815 *
25816 * Consequently:
25817 * - If 'low[v] < pre[v]', there is a path from 'v' to some vertex 'u',
25818 * such that 'pre[u] == low[v]'; vertex 'u' is currently on the stack,
25819 * and thus there is an SCC (loop) containing both 'u' and 'v'.
25820 * - If 'low[v] == pre[v]', loops containing 'v' have been explored,
25821 * and 'v' can be considered the root of some SCC.
25822 *
25823 * Here is a pseudo-code for an explicitly recursive version of the algorithm:
25824 *
25825 * NOT_ON_STACK = insn_cnt + 1
25826 * pre = [0] * insn_cnt
25827 * low = [0] * insn_cnt
25828 * scc = [0] * insn_cnt
25829 * stack = []
25830 *
25831 * next_preorder_num = 1
25832 * next_scc_id = 1
25833 *
25834 * def recur(w):
25835 * nonlocal next_preorder_num
25836 * nonlocal next_scc_id
25837 *
25838 * pre[w] = next_preorder_num
25839 * low[w] = next_preorder_num
25840 * next_preorder_num += 1
25841 * stack.append(w)
25842 * for s in successors(w):
25843 * # Note: for classic algorithm the block below should look as:
25844 * #
25845 * # if pre[s] == 0:
25846 * # recur(s)
25847 * # low[w] = min(low[w], low[s])
25848 * # elif low[s] != NOT_ON_STACK:
25849 * # low[w] = min(low[w], pre[s])
25850 * #
25851 * # But replacing both 'min' instructions with 'low[w] = min(low[w], low[s])'
25852 * # does not break the invariant and makes itartive version of the algorithm
25853 * # simpler. See 'Algorithm #3' from [2].
25854 *
25855 * # 's' not yet visited
25856 * if pre[s] == 0:
25857 * recur(s)
25858 * # if 's' is on stack, pick lowest reachable preorder number from it;
25859 * # if 's' is not on stack 'low[s] == NOT_ON_STACK > low[w]',
25860 * # so 'min' would be a noop.
25861 * low[w] = min(low[w], low[s])
25862 *
25863 * if low[w] == pre[w]:
25864 * # 'w' is the root of an SCC, pop all vertices
25865 * # below 'w' on stack and assign same SCC to them.
25866 * while True:
25867 * t = stack.pop()
25868 * low[t] = NOT_ON_STACK
25869 * scc[t] = next_scc_id
25870 * if t == w:
25871 * break
25872 * next_scc_id += 1
25873 *
25874 * for i in range(0, insn_cnt):
25875 * if pre[i] == 0:
25876 * recur(i)
25877 *
25878 * Below implementation replaces explicit recursion with array 'dfs'.
25879 */
25880 for (i = 0; i < insn_cnt; i++) {
25881 if (pre[i])
25882 continue;
25883 stack_sz = 0;
25884 dfs_sz = 1;
25885 dfs[0] = i;
25886 dfs_continue:
25887 while (dfs_sz) {
25888 w = dfs[dfs_sz - 1];
25889 if (pre[w] == 0) {
25890 low[w] = next_preorder_num;
25891 pre[w] = next_preorder_num;
25892 next_preorder_num++;
25893 stack[stack_sz++] = w;
25894 }
25895 /* Visit 'w' successors */
25896 succ = bpf_insn_successors(env, w);
25897 for (j = 0; j < succ->cnt; ++j) {
25898 if (pre[succ->items[j]]) {
25899 low[w] = min(low[w], low[succ->items[j]]);
25900 } else {
25901 dfs[dfs_sz++] = succ->items[j];
25902 goto dfs_continue;
25903 }
25904 }
25905 /*
25906 * Preserve the invariant: if some vertex above in the stack
25907 * is reachable from 'w', keep 'w' on the stack.
25908 */
25909 if (low[w] < pre[w]) {
25910 dfs_sz--;
25911 goto dfs_continue;
25912 }
25913 /*
25914 * Assign SCC number only if component has two or more elements,
25915 * or if component has a self reference, or if instruction is a
25916 * callback calling function (implicit loop).
25917 */
25918 assign_scc = stack[stack_sz - 1] != w; /* two or more elements? */
25919 for (j = 0; j < succ->cnt; ++j) { /* self reference? */
25920 if (succ->items[j] == w) {
25921 assign_scc = true;
25922 break;
25923 }
25924 }
25925 if (bpf_calls_callback(env, w)) /* implicit loop? */
25926 assign_scc = true;
25927 /* Pop component elements from stack */
25928 do {
25929 t = stack[--stack_sz];
25930 low[t] = NOT_ON_STACK;
25931 if (assign_scc)
25932 aux[t].scc = next_scc_id;
25933 } while (t != w);
25934 if (assign_scc)
25935 next_scc_id++;
25936 dfs_sz--;
25937 }
25938 }
25939 env->scc_info = kvzalloc_objs(*env->scc_info, next_scc_id,
25940 GFP_KERNEL_ACCOUNT);
25941 if (!env->scc_info) {
25942 err = -ENOMEM;
25943 goto exit;
25944 }
25945 env->scc_cnt = next_scc_id;
25946 exit:
25947 kvfree(stack);
25948 kvfree(pre);
25949 kvfree(low);
25950 kvfree(dfs);
25951 return err;
25952 }
25953
bpf_check(struct bpf_prog ** prog,union bpf_attr * attr,bpfptr_t uattr,__u32 uattr_size)25954 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size)
25955 {
25956 u64 start_time = ktime_get_ns();
25957 struct bpf_verifier_env *env;
25958 int i, len, ret = -EINVAL, err;
25959 u32 log_true_size;
25960 bool is_priv;
25961
25962 BTF_TYPE_EMIT(enum bpf_features);
25963
25964 /* no program is valid */
25965 if (ARRAY_SIZE(bpf_verifier_ops) == 0)
25966 return -EINVAL;
25967
25968 /* 'struct bpf_verifier_env' can be global, but since it's not small,
25969 * allocate/free it every time bpf_check() is called
25970 */
25971 env = kvzalloc_obj(struct bpf_verifier_env, GFP_KERNEL_ACCOUNT);
25972 if (!env)
25973 return -ENOMEM;
25974
25975 env->bt.env = env;
25976
25977 len = (*prog)->len;
25978 env->insn_aux_data =
25979 vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
25980 ret = -ENOMEM;
25981 if (!env->insn_aux_data)
25982 goto err_free_env;
25983 for (i = 0; i < len; i++)
25984 env->insn_aux_data[i].orig_idx = i;
25985 env->succ = iarray_realloc(NULL, 2);
25986 if (!env->succ)
25987 goto err_free_env;
25988 env->prog = *prog;
25989 env->ops = bpf_verifier_ops[env->prog->type];
25990
25991 env->allow_ptr_leaks = bpf_allow_ptr_leaks(env->prog->aux->token);
25992 env->allow_uninit_stack = bpf_allow_uninit_stack(env->prog->aux->token);
25993 env->bypass_spec_v1 = bpf_bypass_spec_v1(env->prog->aux->token);
25994 env->bypass_spec_v4 = bpf_bypass_spec_v4(env->prog->aux->token);
25995 env->bpf_capable = is_priv = bpf_token_capable(env->prog->aux->token, CAP_BPF);
25996
25997 bpf_get_btf_vmlinux();
25998
25999 /* grab the mutex to protect few globals used by verifier */
26000 if (!is_priv)
26001 mutex_lock(&bpf_verifier_lock);
26002
26003 /* user could have requested verbose verifier output
26004 * and supplied buffer to store the verification trace
26005 */
26006 ret = bpf_vlog_init(&env->log, attr->log_level,
26007 (char __user *) (unsigned long) attr->log_buf,
26008 attr->log_size);
26009 if (ret)
26010 goto err_unlock;
26011
26012 ret = process_fd_array(env, attr, uattr);
26013 if (ret)
26014 goto skip_full_check;
26015
26016 mark_verifier_state_clean(env);
26017
26018 if (IS_ERR(btf_vmlinux)) {
26019 /* Either gcc or pahole or kernel are broken. */
26020 verbose(env, "in-kernel BTF is malformed\n");
26021 ret = PTR_ERR(btf_vmlinux);
26022 goto skip_full_check;
26023 }
26024
26025 env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
26026 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
26027 env->strict_alignment = true;
26028 if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
26029 env->strict_alignment = false;
26030
26031 if (is_priv)
26032 env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
26033 env->test_reg_invariants = attr->prog_flags & BPF_F_TEST_REG_INVARIANTS;
26034
26035 env->explored_states = kvzalloc_objs(struct list_head,
26036 state_htab_size(env),
26037 GFP_KERNEL_ACCOUNT);
26038 ret = -ENOMEM;
26039 if (!env->explored_states)
26040 goto skip_full_check;
26041
26042 for (i = 0; i < state_htab_size(env); i++)
26043 INIT_LIST_HEAD(&env->explored_states[i]);
26044 INIT_LIST_HEAD(&env->free_list);
26045
26046 ret = check_btf_info_early(env, attr, uattr);
26047 if (ret < 0)
26048 goto skip_full_check;
26049
26050 ret = add_subprog_and_kfunc(env);
26051 if (ret < 0)
26052 goto skip_full_check;
26053
26054 ret = check_subprogs(env);
26055 if (ret < 0)
26056 goto skip_full_check;
26057
26058 ret = check_btf_info(env, attr, uattr);
26059 if (ret < 0)
26060 goto skip_full_check;
26061
26062 ret = resolve_pseudo_ldimm64(env);
26063 if (ret < 0)
26064 goto skip_full_check;
26065
26066 if (bpf_prog_is_offloaded(env->prog->aux)) {
26067 ret = bpf_prog_offload_verifier_prep(env->prog);
26068 if (ret)
26069 goto skip_full_check;
26070 }
26071
26072 ret = check_cfg(env);
26073 if (ret < 0)
26074 goto skip_full_check;
26075
26076 ret = compute_postorder(env);
26077 if (ret < 0)
26078 goto skip_full_check;
26079
26080 ret = bpf_stack_liveness_init(env);
26081 if (ret)
26082 goto skip_full_check;
26083
26084 ret = check_attach_btf_id(env);
26085 if (ret)
26086 goto skip_full_check;
26087
26088 ret = compute_scc(env);
26089 if (ret < 0)
26090 goto skip_full_check;
26091
26092 ret = compute_live_registers(env);
26093 if (ret < 0)
26094 goto skip_full_check;
26095
26096 ret = mark_fastcall_patterns(env);
26097 if (ret < 0)
26098 goto skip_full_check;
26099
26100 ret = do_check_main(env);
26101 ret = ret ?: do_check_subprogs(env);
26102
26103 if (ret == 0 && bpf_prog_is_offloaded(env->prog->aux))
26104 ret = bpf_prog_offload_finalize(env);
26105
26106 skip_full_check:
26107 kvfree(env->explored_states);
26108
26109 /* might decrease stack depth, keep it before passes that
26110 * allocate additional slots.
26111 */
26112 if (ret == 0)
26113 ret = remove_fastcall_spills_fills(env);
26114
26115 if (ret == 0)
26116 ret = check_max_stack_depth(env);
26117
26118 /* instruction rewrites happen after this point */
26119 if (ret == 0)
26120 ret = optimize_bpf_loop(env);
26121
26122 if (is_priv) {
26123 if (ret == 0)
26124 opt_hard_wire_dead_code_branches(env);
26125 if (ret == 0)
26126 ret = opt_remove_dead_code(env);
26127 if (ret == 0)
26128 ret = opt_remove_nops(env);
26129 } else {
26130 if (ret == 0)
26131 sanitize_dead_code(env);
26132 }
26133
26134 if (ret == 0)
26135 /* program is valid, convert *(u32*)(ctx + off) accesses */
26136 ret = convert_ctx_accesses(env);
26137
26138 if (ret == 0)
26139 ret = do_misc_fixups(env);
26140
26141 /* do 32-bit optimization after insn patching has done so those patched
26142 * insns could be handled correctly.
26143 */
26144 if (ret == 0 && !bpf_prog_is_offloaded(env->prog->aux)) {
26145 ret = opt_subreg_zext_lo32_rnd_hi32(env, attr);
26146 env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret
26147 : false;
26148 }
26149
26150 if (ret == 0)
26151 ret = fixup_call_args(env);
26152
26153 env->verification_time = ktime_get_ns() - start_time;
26154 print_verification_stats(env);
26155 env->prog->aux->verified_insns = env->insn_processed;
26156
26157 /* preserve original error even if log finalization is successful */
26158 err = bpf_vlog_finalize(&env->log, &log_true_size);
26159 if (err)
26160 ret = err;
26161
26162 if (uattr_size >= offsetofend(union bpf_attr, log_true_size) &&
26163 copy_to_bpfptr_offset(uattr, offsetof(union bpf_attr, log_true_size),
26164 &log_true_size, sizeof(log_true_size))) {
26165 ret = -EFAULT;
26166 goto err_release_maps;
26167 }
26168
26169 if (ret)
26170 goto err_release_maps;
26171
26172 if (env->used_map_cnt) {
26173 /* if program passed verifier, update used_maps in bpf_prog_info */
26174 env->prog->aux->used_maps = kmalloc_objs(env->used_maps[0],
26175 env->used_map_cnt,
26176 GFP_KERNEL_ACCOUNT);
26177
26178 if (!env->prog->aux->used_maps) {
26179 ret = -ENOMEM;
26180 goto err_release_maps;
26181 }
26182
26183 memcpy(env->prog->aux->used_maps, env->used_maps,
26184 sizeof(env->used_maps[0]) * env->used_map_cnt);
26185 env->prog->aux->used_map_cnt = env->used_map_cnt;
26186 }
26187 if (env->used_btf_cnt) {
26188 /* if program passed verifier, update used_btfs in bpf_prog_aux */
26189 env->prog->aux->used_btfs = kmalloc_objs(env->used_btfs[0],
26190 env->used_btf_cnt,
26191 GFP_KERNEL_ACCOUNT);
26192 if (!env->prog->aux->used_btfs) {
26193 ret = -ENOMEM;
26194 goto err_release_maps;
26195 }
26196
26197 memcpy(env->prog->aux->used_btfs, env->used_btfs,
26198 sizeof(env->used_btfs[0]) * env->used_btf_cnt);
26199 env->prog->aux->used_btf_cnt = env->used_btf_cnt;
26200 }
26201 if (env->used_map_cnt || env->used_btf_cnt) {
26202 /* program is valid. Convert pseudo bpf_ld_imm64 into generic
26203 * bpf_ld_imm64 instructions
26204 */
26205 convert_pseudo_ld_imm64(env);
26206 }
26207
26208 adjust_btf_func(env);
26209
26210 err_release_maps:
26211 if (ret)
26212 release_insn_arrays(env);
26213 if (!env->prog->aux->used_maps)
26214 /* if we didn't copy map pointers into bpf_prog_info, release
26215 * them now. Otherwise free_used_maps() will release them.
26216 */
26217 release_maps(env);
26218 if (!env->prog->aux->used_btfs)
26219 release_btfs(env);
26220
26221 /* extension progs temporarily inherit the attach_type of their targets
26222 for verification purposes, so set it back to zero before returning
26223 */
26224 if (env->prog->type == BPF_PROG_TYPE_EXT)
26225 env->prog->expected_attach_type = 0;
26226
26227 *prog = env->prog;
26228
26229 module_put(env->attach_btf_mod);
26230 err_unlock:
26231 if (!is_priv)
26232 mutex_unlock(&bpf_verifier_lock);
26233 clear_insn_aux_data(env, 0, env->prog->len);
26234 vfree(env->insn_aux_data);
26235 err_free_env:
26236 bpf_stack_liveness_free(env);
26237 kvfree(env->cfg.insn_postorder);
26238 kvfree(env->scc_info);
26239 kvfree(env->succ);
26240 kvfree(env->gotox_tmp_buf);
26241 kvfree(env);
26242 return ret;
26243 }
26244