xref: /linux/kernel/bpf/verifier.c (revision 32e940f2bd3b16551f23ea44be47f6f5d1746d64)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
3  * Copyright (c) 2016 Facebook
4  * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
5  */
6 #include <uapi/linux/btf.h>
7 #include <linux/bpf-cgroup.h>
8 #include <linux/kernel.h>
9 #include <linux/types.h>
10 #include <linux/slab.h>
11 #include <linux/bpf.h>
12 #include <linux/btf.h>
13 #include <linux/bpf_verifier.h>
14 #include <linux/filter.h>
15 #include <net/netlink.h>
16 #include <linux/file.h>
17 #include <linux/vmalloc.h>
18 #include <linux/stringify.h>
19 #include <linux/bsearch.h>
20 #include <linux/sort.h>
21 #include <linux/perf_event.h>
22 #include <linux/ctype.h>
23 #include <linux/error-injection.h>
24 #include <linux/bpf_lsm.h>
25 #include <linux/btf_ids.h>
26 #include <linux/poison.h>
27 #include <linux/module.h>
28 #include <linux/cpumask.h>
29 #include <linux/bpf_mem_alloc.h>
30 #include <net/xdp.h>
31 #include <linux/trace_events.h>
32 #include <linux/kallsyms.h>
33 
34 #include "disasm.h"
35 
36 static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
37 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
38 	[_id] = & _name ## _verifier_ops,
39 #define BPF_MAP_TYPE(_id, _ops)
40 #define BPF_LINK_TYPE(_id, _name)
41 #include <linux/bpf_types.h>
42 #undef BPF_PROG_TYPE
43 #undef BPF_MAP_TYPE
44 #undef BPF_LINK_TYPE
45 };
46 
47 enum bpf_features {
48 	BPF_FEAT_RDONLY_CAST_TO_VOID = 0,
49 	BPF_FEAT_STREAMS	     = 1,
50 	__MAX_BPF_FEAT,
51 };
52 
53 struct bpf_mem_alloc bpf_global_percpu_ma;
54 static bool bpf_global_percpu_ma_set;
55 
56 /* bpf_check() is a static code analyzer that walks eBPF program
57  * instruction by instruction and updates register/stack state.
58  * All paths of conditional branches are analyzed until 'bpf_exit' insn.
59  *
60  * The first pass is depth-first-search to check that the program is a DAG.
61  * It rejects the following programs:
62  * - larger than BPF_MAXINSNS insns
63  * - if loop is present (detected via back-edge)
64  * - unreachable insns exist (shouldn't be a forest. program = one function)
65  * - out of bounds or malformed jumps
66  * The second pass is all possible path descent from the 1st insn.
67  * Since it's analyzing all paths through the program, the length of the
68  * analysis is limited to 64k insn, which may be hit even if total number of
69  * insn is less then 4K, but there are too many branches that change stack/regs.
70  * Number of 'branches to be analyzed' is limited to 1k
71  *
72  * On entry to each instruction, each register has a type, and the instruction
73  * changes the types of the registers depending on instruction semantics.
74  * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
75  * copied to R1.
76  *
77  * All registers are 64-bit.
78  * R0 - return register
79  * R1-R5 argument passing registers
80  * R6-R9 callee saved registers
81  * R10 - frame pointer read-only
82  *
83  * At the start of BPF program the register R1 contains a pointer to bpf_context
84  * and has type PTR_TO_CTX.
85  *
86  * Verifier tracks arithmetic operations on pointers in case:
87  *    BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
88  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
89  * 1st insn copies R10 (which has FRAME_PTR) type into R1
90  * and 2nd arithmetic instruction is pattern matched to recognize
91  * that it wants to construct a pointer to some element within stack.
92  * So after 2nd insn, the register R1 has type PTR_TO_STACK
93  * (and -20 constant is saved for further stack bounds checking).
94  * Meaning that this reg is a pointer to stack plus known immediate constant.
95  *
96  * Most of the time the registers have SCALAR_VALUE type, which
97  * means the register has some value, but it's not a valid pointer.
98  * (like pointer plus pointer becomes SCALAR_VALUE type)
99  *
100  * When verifier sees load or store instructions the type of base register
101  * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
102  * four pointer types recognized by check_mem_access() function.
103  *
104  * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
105  * and the range of [ptr, ptr + map's value_size) is accessible.
106  *
107  * registers used to pass values to function calls are checked against
108  * function argument constraints.
109  *
110  * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
111  * It means that the register type passed to this function must be
112  * PTR_TO_STACK and it will be used inside the function as
113  * 'pointer to map element key'
114  *
115  * For example the argument constraints for bpf_map_lookup_elem():
116  *   .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
117  *   .arg1_type = ARG_CONST_MAP_PTR,
118  *   .arg2_type = ARG_PTR_TO_MAP_KEY,
119  *
120  * ret_type says that this function returns 'pointer to map elem value or null'
121  * function expects 1st argument to be a const pointer to 'struct bpf_map' and
122  * 2nd argument should be a pointer to stack, which will be used inside
123  * the helper function as a pointer to map element key.
124  *
125  * On the kernel side the helper function looks like:
126  * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
127  * {
128  *    struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
129  *    void *key = (void *) (unsigned long) r2;
130  *    void *value;
131  *
132  *    here kernel can access 'key' and 'map' pointers safely, knowing that
133  *    [key, key + map->key_size) bytes are valid and were initialized on
134  *    the stack of eBPF program.
135  * }
136  *
137  * Corresponding eBPF program may look like:
138  *    BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),  // after this insn R2 type is FRAME_PTR
139  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
140  *    BPF_LD_MAP_FD(BPF_REG_1, map_fd),      // after this insn R1 type is CONST_PTR_TO_MAP
141  *    BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
142  * here verifier looks at prototype of map_lookup_elem() and sees:
143  * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
144  * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
145  *
146  * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
147  * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
148  * and were initialized prior to this call.
149  * If it's ok, then verifier allows this BPF_CALL insn and looks at
150  * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
151  * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
152  * returns either pointer to map value or NULL.
153  *
154  * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
155  * insn, the register holding that pointer in the true branch changes state to
156  * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
157  * branch. See check_cond_jmp_op().
158  *
159  * After the call R0 is set to return type of the function and registers R1-R5
160  * are set to NOT_INIT to indicate that they are no longer readable.
161  *
162  * The following reference types represent a potential reference to a kernel
163  * resource which, after first being allocated, must be checked and freed by
164  * the BPF program:
165  * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
166  *
167  * When the verifier sees a helper call return a reference type, it allocates a
168  * pointer id for the reference and stores it in the current function state.
169  * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
170  * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
171  * passes through a NULL-check conditional. For the branch wherein the state is
172  * changed to CONST_IMM, the verifier releases the reference.
173  *
174  * For each helper function that allocates a reference, such as
175  * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
176  * bpf_sk_release(). When a reference type passes into the release function,
177  * the verifier also releases the reference. If any unchecked or unreleased
178  * reference remains at the end of the program, the verifier rejects it.
179  */
180 
181 /* verifier_state + insn_idx are pushed to stack when branch is encountered */
182 struct bpf_verifier_stack_elem {
183 	/* verifier state is 'st'
184 	 * before processing instruction 'insn_idx'
185 	 * and after processing instruction 'prev_insn_idx'
186 	 */
187 	struct bpf_verifier_state st;
188 	int insn_idx;
189 	int prev_insn_idx;
190 	struct bpf_verifier_stack_elem *next;
191 	/* length of verifier log at the time this state was pushed on stack */
192 	u32 log_pos;
193 };
194 
195 #define BPF_COMPLEXITY_LIMIT_JMP_SEQ	8192
196 #define BPF_COMPLEXITY_LIMIT_STATES	64
197 
198 #define BPF_GLOBAL_PERCPU_MA_MAX_SIZE  512
199 
200 #define BPF_PRIV_STACK_MIN_SIZE		64
201 
202 static int acquire_reference(struct bpf_verifier_env *env, int insn_idx);
203 static int release_reference_nomark(struct bpf_verifier_state *state, int ref_obj_id);
204 static int release_reference(struct bpf_verifier_env *env, int ref_obj_id);
205 static void invalidate_non_owning_refs(struct bpf_verifier_env *env);
206 static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env);
207 static int ref_set_non_owning(struct bpf_verifier_env *env,
208 			      struct bpf_reg_state *reg);
209 static bool is_trusted_reg(const struct bpf_reg_state *reg);
210 static inline bool in_sleepable_context(struct bpf_verifier_env *env);
211 static const char *non_sleepable_context_description(struct bpf_verifier_env *env);
212 static void scalar32_min_max_add(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg);
213 static void scalar_min_max_add(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg);
214 
bpf_map_ptr_store(struct bpf_insn_aux_data * aux,struct bpf_map * map,bool unpriv,bool poison)215 static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
216 			      struct bpf_map *map,
217 			      bool unpriv, bool poison)
218 {
219 	unpriv |= bpf_map_ptr_unpriv(aux);
220 	aux->map_ptr_state.unpriv = unpriv;
221 	aux->map_ptr_state.poison = poison;
222 	aux->map_ptr_state.map_ptr = map;
223 }
224 
bpf_map_key_store(struct bpf_insn_aux_data * aux,u64 state)225 static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
226 {
227 	bool poisoned = bpf_map_key_poisoned(aux);
228 
229 	aux->map_key_state = state | BPF_MAP_KEY_SEEN |
230 			     (poisoned ? BPF_MAP_KEY_POISON : 0ULL);
231 }
232 
233 struct bpf_call_arg_meta {
234 	struct bpf_map_desc map;
235 	bool raw_mode;
236 	bool pkt_access;
237 	u8 release_regno;
238 	int regno;
239 	int access_size;
240 	int mem_size;
241 	u64 msize_max_value;
242 	int ref_obj_id;
243 	int dynptr_id;
244 	int func_id;
245 	struct btf *btf;
246 	u32 btf_id;
247 	struct btf *ret_btf;
248 	u32 ret_btf_id;
249 	u32 subprogno;
250 	struct btf_field *kptr_field;
251 	s64 const_map_key;
252 };
253 
254 struct bpf_kfunc_meta {
255 	struct btf *btf;
256 	const struct btf_type *proto;
257 	const char *name;
258 	const u32 *flags;
259 	s32 id;
260 };
261 
262 struct btf *btf_vmlinux;
263 
btf_type_name(const struct btf * btf,u32 id)264 static const char *btf_type_name(const struct btf *btf, u32 id)
265 {
266 	return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off);
267 }
268 
269 static DEFINE_MUTEX(bpf_verifier_lock);
270 static DEFINE_MUTEX(bpf_percpu_ma_lock);
271 
verbose(void * private_data,const char * fmt,...)272 __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
273 {
274 	struct bpf_verifier_env *env = private_data;
275 	va_list args;
276 
277 	if (!bpf_verifier_log_needed(&env->log))
278 		return;
279 
280 	va_start(args, fmt);
281 	bpf_verifier_vlog(&env->log, fmt, args);
282 	va_end(args);
283 }
284 
verbose_invalid_scalar(struct bpf_verifier_env * env,struct bpf_reg_state * reg,struct bpf_retval_range range,const char * ctx,const char * reg_name)285 static void verbose_invalid_scalar(struct bpf_verifier_env *env,
286 				   struct bpf_reg_state *reg,
287 				   struct bpf_retval_range range, const char *ctx,
288 				   const char *reg_name)
289 {
290 	bool unknown = true;
291 
292 	verbose(env, "%s the register %s has", ctx, reg_name);
293 	if (reg->smin_value > S64_MIN) {
294 		verbose(env, " smin=%lld", reg->smin_value);
295 		unknown = false;
296 	}
297 	if (reg->smax_value < S64_MAX) {
298 		verbose(env, " smax=%lld", reg->smax_value);
299 		unknown = false;
300 	}
301 	if (unknown)
302 		verbose(env, " unknown scalar value");
303 	verbose(env, " should have been in [%d, %d]\n", range.minval, range.maxval);
304 }
305 
reg_not_null(const struct bpf_reg_state * reg)306 static bool reg_not_null(const struct bpf_reg_state *reg)
307 {
308 	enum bpf_reg_type type;
309 
310 	type = reg->type;
311 	if (type_may_be_null(type))
312 		return false;
313 
314 	type = base_type(type);
315 	return type == PTR_TO_SOCKET ||
316 		type == PTR_TO_TCP_SOCK ||
317 		type == PTR_TO_MAP_VALUE ||
318 		type == PTR_TO_MAP_KEY ||
319 		type == PTR_TO_SOCK_COMMON ||
320 		(type == PTR_TO_BTF_ID && is_trusted_reg(reg)) ||
321 		(type == PTR_TO_MEM && !(reg->type & PTR_UNTRUSTED)) ||
322 		type == CONST_PTR_TO_MAP;
323 }
324 
reg_btf_record(const struct bpf_reg_state * reg)325 static struct btf_record *reg_btf_record(const struct bpf_reg_state *reg)
326 {
327 	struct btf_record *rec = NULL;
328 	struct btf_struct_meta *meta;
329 
330 	if (reg->type == PTR_TO_MAP_VALUE) {
331 		rec = reg->map_ptr->record;
332 	} else if (type_is_ptr_alloc_obj(reg->type)) {
333 		meta = btf_find_struct_meta(reg->btf, reg->btf_id);
334 		if (meta)
335 			rec = meta->record;
336 	}
337 	return rec;
338 }
339 
bpf_subprog_is_global(const struct bpf_verifier_env * env,int subprog)340 bool bpf_subprog_is_global(const struct bpf_verifier_env *env, int subprog)
341 {
342 	struct bpf_func_info_aux *aux = env->prog->aux->func_info_aux;
343 
344 	return aux && aux[subprog].linkage == BTF_FUNC_GLOBAL;
345 }
346 
subprog_returns_void(struct bpf_verifier_env * env,int subprog)347 static bool subprog_returns_void(struct bpf_verifier_env *env, int subprog)
348 {
349 	const struct btf_type *type, *func, *func_proto;
350 	const struct btf *btf = env->prog->aux->btf;
351 	u32 btf_id;
352 
353 	btf_id = env->prog->aux->func_info[subprog].type_id;
354 
355 	func = btf_type_by_id(btf, btf_id);
356 	if (verifier_bug_if(!func, env, "btf_id %u not found", btf_id))
357 		return false;
358 
359 	func_proto = btf_type_by_id(btf, func->type);
360 	if (!func_proto)
361 		return false;
362 
363 	type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
364 	if (!type)
365 		return false;
366 
367 	return btf_type_is_void(type);
368 }
369 
subprog_name(const struct bpf_verifier_env * env,int subprog)370 static const char *subprog_name(const struct bpf_verifier_env *env, int subprog)
371 {
372 	struct bpf_func_info *info;
373 
374 	if (!env->prog->aux->func_info)
375 		return "";
376 
377 	info = &env->prog->aux->func_info[subprog];
378 	return btf_type_name(env->prog->aux->btf, info->type_id);
379 }
380 
bpf_mark_subprog_exc_cb(struct bpf_verifier_env * env,int subprog)381 void bpf_mark_subprog_exc_cb(struct bpf_verifier_env *env, int subprog)
382 {
383 	struct bpf_subprog_info *info = subprog_info(env, subprog);
384 
385 	info->is_cb = true;
386 	info->is_async_cb = true;
387 	info->is_exception_cb = true;
388 }
389 
subprog_is_exc_cb(struct bpf_verifier_env * env,int subprog)390 static bool subprog_is_exc_cb(struct bpf_verifier_env *env, int subprog)
391 {
392 	return subprog_info(env, subprog)->is_exception_cb;
393 }
394 
reg_may_point_to_spin_lock(const struct bpf_reg_state * reg)395 static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
396 {
397 	return btf_record_has_field(reg_btf_record(reg), BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK);
398 }
399 
type_is_rdonly_mem(u32 type)400 static bool type_is_rdonly_mem(u32 type)
401 {
402 	return type & MEM_RDONLY;
403 }
404 
is_acquire_function(enum bpf_func_id func_id,const struct bpf_map * map)405 static bool is_acquire_function(enum bpf_func_id func_id,
406 				const struct bpf_map *map)
407 {
408 	enum bpf_map_type map_type = map ? map->map_type : BPF_MAP_TYPE_UNSPEC;
409 
410 	if (func_id == BPF_FUNC_sk_lookup_tcp ||
411 	    func_id == BPF_FUNC_sk_lookup_udp ||
412 	    func_id == BPF_FUNC_skc_lookup_tcp ||
413 	    func_id == BPF_FUNC_ringbuf_reserve ||
414 	    func_id == BPF_FUNC_kptr_xchg)
415 		return true;
416 
417 	if (func_id == BPF_FUNC_map_lookup_elem &&
418 	    (map_type == BPF_MAP_TYPE_SOCKMAP ||
419 	     map_type == BPF_MAP_TYPE_SOCKHASH))
420 		return true;
421 
422 	return false;
423 }
424 
is_ptr_cast_function(enum bpf_func_id func_id)425 static bool is_ptr_cast_function(enum bpf_func_id func_id)
426 {
427 	return func_id == BPF_FUNC_tcp_sock ||
428 		func_id == BPF_FUNC_sk_fullsock ||
429 		func_id == BPF_FUNC_skc_to_tcp_sock ||
430 		func_id == BPF_FUNC_skc_to_tcp6_sock ||
431 		func_id == BPF_FUNC_skc_to_udp6_sock ||
432 		func_id == BPF_FUNC_skc_to_mptcp_sock ||
433 		func_id == BPF_FUNC_skc_to_tcp_timewait_sock ||
434 		func_id == BPF_FUNC_skc_to_tcp_request_sock;
435 }
436 
is_dynptr_ref_function(enum bpf_func_id func_id)437 static bool is_dynptr_ref_function(enum bpf_func_id func_id)
438 {
439 	return func_id == BPF_FUNC_dynptr_data;
440 }
441 
442 static bool is_sync_callback_calling_kfunc(u32 btf_id);
443 static bool is_async_callback_calling_kfunc(u32 btf_id);
444 static bool is_callback_calling_kfunc(u32 btf_id);
445 static bool is_bpf_throw_kfunc(struct bpf_insn *insn);
446 
447 static bool is_bpf_wq_set_callback_kfunc(u32 btf_id);
448 static bool is_task_work_add_kfunc(u32 func_id);
449 
is_sync_callback_calling_function(enum bpf_func_id func_id)450 static bool is_sync_callback_calling_function(enum bpf_func_id func_id)
451 {
452 	return func_id == BPF_FUNC_for_each_map_elem ||
453 	       func_id == BPF_FUNC_find_vma ||
454 	       func_id == BPF_FUNC_loop ||
455 	       func_id == BPF_FUNC_user_ringbuf_drain;
456 }
457 
is_async_callback_calling_function(enum bpf_func_id func_id)458 static bool is_async_callback_calling_function(enum bpf_func_id func_id)
459 {
460 	return func_id == BPF_FUNC_timer_set_callback;
461 }
462 
is_callback_calling_function(enum bpf_func_id func_id)463 static bool is_callback_calling_function(enum bpf_func_id func_id)
464 {
465 	return is_sync_callback_calling_function(func_id) ||
466 	       is_async_callback_calling_function(func_id);
467 }
468 
bpf_is_sync_callback_calling_insn(struct bpf_insn * insn)469 bool bpf_is_sync_callback_calling_insn(struct bpf_insn *insn)
470 {
471 	return (bpf_helper_call(insn) && is_sync_callback_calling_function(insn->imm)) ||
472 	       (bpf_pseudo_kfunc_call(insn) && is_sync_callback_calling_kfunc(insn->imm));
473 }
474 
bpf_is_async_callback_calling_insn(struct bpf_insn * insn)475 bool bpf_is_async_callback_calling_insn(struct bpf_insn *insn)
476 {
477 	return (bpf_helper_call(insn) && is_async_callback_calling_function(insn->imm)) ||
478 	       (bpf_pseudo_kfunc_call(insn) && is_async_callback_calling_kfunc(insn->imm));
479 }
480 
is_async_cb_sleepable(struct bpf_verifier_env * env,struct bpf_insn * insn)481 static bool is_async_cb_sleepable(struct bpf_verifier_env *env, struct bpf_insn *insn)
482 {
483 	/* bpf_timer callbacks are never sleepable. */
484 	if (bpf_helper_call(insn) && insn->imm == BPF_FUNC_timer_set_callback)
485 		return false;
486 
487 	/* bpf_wq and bpf_task_work callbacks are always sleepable. */
488 	if (bpf_pseudo_kfunc_call(insn) && insn->off == 0 &&
489 	    (is_bpf_wq_set_callback_kfunc(insn->imm) || is_task_work_add_kfunc(insn->imm)))
490 		return true;
491 
492 	verifier_bug(env, "unhandled async callback in is_async_cb_sleepable");
493 	return false;
494 }
495 
bpf_is_may_goto_insn(struct bpf_insn * insn)496 bool bpf_is_may_goto_insn(struct bpf_insn *insn)
497 {
498 	return insn->code == (BPF_JMP | BPF_JCOND) && insn->src_reg == BPF_MAY_GOTO;
499 }
500 
helper_multiple_ref_obj_use(enum bpf_func_id func_id,const struct bpf_map * map)501 static bool helper_multiple_ref_obj_use(enum bpf_func_id func_id,
502 					const struct bpf_map *map)
503 {
504 	int ref_obj_uses = 0;
505 
506 	if (is_ptr_cast_function(func_id))
507 		ref_obj_uses++;
508 	if (is_acquire_function(func_id, map))
509 		ref_obj_uses++;
510 	if (is_dynptr_ref_function(func_id))
511 		ref_obj_uses++;
512 
513 	return ref_obj_uses > 1;
514 }
515 
516 
is_spi_bounds_valid(struct bpf_func_state * state,int spi,int nr_slots)517 static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_slots)
518 {
519        int allocated_slots = state->allocated_stack / BPF_REG_SIZE;
520 
521        /* We need to check that slots between [spi - nr_slots + 1, spi] are
522 	* within [0, allocated_stack).
523 	*
524 	* Please note that the spi grows downwards. For example, a dynptr
525 	* takes the size of two stack slots; the first slot will be at
526 	* spi and the second slot will be at spi - 1.
527 	*/
528        return spi - nr_slots + 1 >= 0 && spi < allocated_slots;
529 }
530 
stack_slot_obj_get_spi(struct bpf_verifier_env * env,struct bpf_reg_state * reg,const char * obj_kind,int nr_slots)531 static int stack_slot_obj_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
532 			          const char *obj_kind, int nr_slots)
533 {
534 	int off, spi;
535 
536 	if (!tnum_is_const(reg->var_off)) {
537 		verbose(env, "%s has to be at a constant offset\n", obj_kind);
538 		return -EINVAL;
539 	}
540 
541 	off = reg->var_off.value;
542 	if (off % BPF_REG_SIZE) {
543 		verbose(env, "cannot pass in %s at an offset=%d\n", obj_kind, off);
544 		return -EINVAL;
545 	}
546 
547 	spi = bpf_get_spi(off);
548 	if (spi + 1 < nr_slots) {
549 		verbose(env, "cannot pass in %s at an offset=%d\n", obj_kind, off);
550 		return -EINVAL;
551 	}
552 
553 	if (!is_spi_bounds_valid(bpf_func(env, reg), spi, nr_slots))
554 		return -ERANGE;
555 	return spi;
556 }
557 
dynptr_get_spi(struct bpf_verifier_env * env,struct bpf_reg_state * reg)558 static int dynptr_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
559 {
560 	return stack_slot_obj_get_spi(env, reg, "dynptr", BPF_DYNPTR_NR_SLOTS);
561 }
562 
iter_get_spi(struct bpf_verifier_env * env,struct bpf_reg_state * reg,int nr_slots)563 static int iter_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int nr_slots)
564 {
565 	return stack_slot_obj_get_spi(env, reg, "iter", nr_slots);
566 }
567 
irq_flag_get_spi(struct bpf_verifier_env * env,struct bpf_reg_state * reg)568 static int irq_flag_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
569 {
570 	return stack_slot_obj_get_spi(env, reg, "irq_flag", 1);
571 }
572 
arg_to_dynptr_type(enum bpf_arg_type arg_type)573 static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type)
574 {
575 	switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
576 	case DYNPTR_TYPE_LOCAL:
577 		return BPF_DYNPTR_TYPE_LOCAL;
578 	case DYNPTR_TYPE_RINGBUF:
579 		return BPF_DYNPTR_TYPE_RINGBUF;
580 	case DYNPTR_TYPE_SKB:
581 		return BPF_DYNPTR_TYPE_SKB;
582 	case DYNPTR_TYPE_XDP:
583 		return BPF_DYNPTR_TYPE_XDP;
584 	case DYNPTR_TYPE_SKB_META:
585 		return BPF_DYNPTR_TYPE_SKB_META;
586 	case DYNPTR_TYPE_FILE:
587 		return BPF_DYNPTR_TYPE_FILE;
588 	default:
589 		return BPF_DYNPTR_TYPE_INVALID;
590 	}
591 }
592 
get_dynptr_type_flag(enum bpf_dynptr_type type)593 static enum bpf_type_flag get_dynptr_type_flag(enum bpf_dynptr_type type)
594 {
595 	switch (type) {
596 	case BPF_DYNPTR_TYPE_LOCAL:
597 		return DYNPTR_TYPE_LOCAL;
598 	case BPF_DYNPTR_TYPE_RINGBUF:
599 		return DYNPTR_TYPE_RINGBUF;
600 	case BPF_DYNPTR_TYPE_SKB:
601 		return DYNPTR_TYPE_SKB;
602 	case BPF_DYNPTR_TYPE_XDP:
603 		return DYNPTR_TYPE_XDP;
604 	case BPF_DYNPTR_TYPE_SKB_META:
605 		return DYNPTR_TYPE_SKB_META;
606 	case BPF_DYNPTR_TYPE_FILE:
607 		return DYNPTR_TYPE_FILE;
608 	default:
609 		return 0;
610 	}
611 }
612 
dynptr_type_refcounted(enum bpf_dynptr_type type)613 static bool dynptr_type_refcounted(enum bpf_dynptr_type type)
614 {
615 	return type == BPF_DYNPTR_TYPE_RINGBUF || type == BPF_DYNPTR_TYPE_FILE;
616 }
617 
618 static void __mark_dynptr_reg(struct bpf_reg_state *reg,
619 			      enum bpf_dynptr_type type,
620 			      bool first_slot, int dynptr_id);
621 
622 
mark_dynptr_stack_regs(struct bpf_verifier_env * env,struct bpf_reg_state * sreg1,struct bpf_reg_state * sreg2,enum bpf_dynptr_type type)623 static void mark_dynptr_stack_regs(struct bpf_verifier_env *env,
624 				   struct bpf_reg_state *sreg1,
625 				   struct bpf_reg_state *sreg2,
626 				   enum bpf_dynptr_type type)
627 {
628 	int id = ++env->id_gen;
629 
630 	__mark_dynptr_reg(sreg1, type, true, id);
631 	__mark_dynptr_reg(sreg2, type, false, id);
632 }
633 
mark_dynptr_cb_reg(struct bpf_verifier_env * env,struct bpf_reg_state * reg,enum bpf_dynptr_type type)634 static void mark_dynptr_cb_reg(struct bpf_verifier_env *env,
635 			       struct bpf_reg_state *reg,
636 			       enum bpf_dynptr_type type)
637 {
638 	__mark_dynptr_reg(reg, type, true, ++env->id_gen);
639 }
640 
641 static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
642 				        struct bpf_func_state *state, int spi);
643 
mark_stack_slots_dynptr(struct bpf_verifier_env * env,struct bpf_reg_state * reg,enum bpf_arg_type arg_type,int insn_idx,int clone_ref_obj_id)644 static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
645 				   enum bpf_arg_type arg_type, int insn_idx, int clone_ref_obj_id)
646 {
647 	struct bpf_func_state *state = bpf_func(env, reg);
648 	enum bpf_dynptr_type type;
649 	int spi, i, err;
650 
651 	spi = dynptr_get_spi(env, reg);
652 	if (spi < 0)
653 		return spi;
654 
655 	/* We cannot assume both spi and spi - 1 belong to the same dynptr,
656 	 * hence we need to call destroy_if_dynptr_stack_slot twice for both,
657 	 * to ensure that for the following example:
658 	 *	[d1][d1][d2][d2]
659 	 * spi    3   2   1   0
660 	 * So marking spi = 2 should lead to destruction of both d1 and d2. In
661 	 * case they do belong to same dynptr, second call won't see slot_type
662 	 * as STACK_DYNPTR and will simply skip destruction.
663 	 */
664 	err = destroy_if_dynptr_stack_slot(env, state, spi);
665 	if (err)
666 		return err;
667 	err = destroy_if_dynptr_stack_slot(env, state, spi - 1);
668 	if (err)
669 		return err;
670 
671 	for (i = 0; i < BPF_REG_SIZE; i++) {
672 		state->stack[spi].slot_type[i] = STACK_DYNPTR;
673 		state->stack[spi - 1].slot_type[i] = STACK_DYNPTR;
674 	}
675 
676 	type = arg_to_dynptr_type(arg_type);
677 	if (type == BPF_DYNPTR_TYPE_INVALID)
678 		return -EINVAL;
679 
680 	mark_dynptr_stack_regs(env, &state->stack[spi].spilled_ptr,
681 			       &state->stack[spi - 1].spilled_ptr, type);
682 
683 	if (dynptr_type_refcounted(type)) {
684 		/* The id is used to track proper releasing */
685 		int id;
686 
687 		if (clone_ref_obj_id)
688 			id = clone_ref_obj_id;
689 		else
690 			id = acquire_reference(env, insn_idx);
691 
692 		if (id < 0)
693 			return id;
694 
695 		state->stack[spi].spilled_ptr.ref_obj_id = id;
696 		state->stack[spi - 1].spilled_ptr.ref_obj_id = id;
697 	}
698 
699 	return 0;
700 }
701 
invalidate_dynptr(struct bpf_verifier_env * env,struct bpf_func_state * state,int spi)702 static void invalidate_dynptr(struct bpf_verifier_env *env, struct bpf_func_state *state, int spi)
703 {
704 	int i;
705 
706 	for (i = 0; i < BPF_REG_SIZE; i++) {
707 		state->stack[spi].slot_type[i] = STACK_INVALID;
708 		state->stack[spi - 1].slot_type[i] = STACK_INVALID;
709 	}
710 
711 	bpf_mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
712 	bpf_mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
713 }
714 
unmark_stack_slots_dynptr(struct bpf_verifier_env * env,struct bpf_reg_state * reg)715 static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
716 {
717 	struct bpf_func_state *state = bpf_func(env, reg);
718 	int spi, ref_obj_id, i;
719 
720 	/*
721 	 * This can only be set for PTR_TO_STACK, as CONST_PTR_TO_DYNPTR cannot
722 	 * be released by any dynptr helper. Hence, unmark_stack_slots_dynptr
723 	 * is safe to do directly.
724 	 */
725 	if (reg->type == CONST_PTR_TO_DYNPTR) {
726 		verifier_bug(env, "CONST_PTR_TO_DYNPTR cannot be released");
727 		return -EFAULT;
728 	}
729 	spi = dynptr_get_spi(env, reg);
730 	if (spi < 0)
731 		return spi;
732 
733 	if (!dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) {
734 		invalidate_dynptr(env, state, spi);
735 		return 0;
736 	}
737 
738 	ref_obj_id = state->stack[spi].spilled_ptr.ref_obj_id;
739 
740 	/* If the dynptr has a ref_obj_id, then we need to invalidate
741 	 * two things:
742 	 *
743 	 * 1) Any dynptrs with a matching ref_obj_id (clones)
744 	 * 2) Any slices derived from this dynptr.
745 	 */
746 
747 	/* Invalidate any slices associated with this dynptr */
748 	WARN_ON_ONCE(release_reference(env, ref_obj_id));
749 
750 	/* Invalidate any dynptr clones */
751 	for (i = 1; i < state->allocated_stack / BPF_REG_SIZE; i++) {
752 		if (state->stack[i].spilled_ptr.ref_obj_id != ref_obj_id)
753 			continue;
754 
755 		/* it should always be the case that if the ref obj id
756 		 * matches then the stack slot also belongs to a
757 		 * dynptr
758 		 */
759 		if (state->stack[i].slot_type[0] != STACK_DYNPTR) {
760 			verifier_bug(env, "misconfigured ref_obj_id");
761 			return -EFAULT;
762 		}
763 		if (state->stack[i].spilled_ptr.dynptr.first_slot)
764 			invalidate_dynptr(env, state, i);
765 	}
766 
767 	return 0;
768 }
769 
770 static void __mark_reg_unknown(const struct bpf_verifier_env *env,
771 			       struct bpf_reg_state *reg);
772 
mark_reg_invalid(const struct bpf_verifier_env * env,struct bpf_reg_state * reg)773 static void mark_reg_invalid(const struct bpf_verifier_env *env, struct bpf_reg_state *reg)
774 {
775 	if (!env->allow_ptr_leaks)
776 		bpf_mark_reg_not_init(env, reg);
777 	else
778 		__mark_reg_unknown(env, reg);
779 }
780 
destroy_if_dynptr_stack_slot(struct bpf_verifier_env * env,struct bpf_func_state * state,int spi)781 static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
782 				        struct bpf_func_state *state, int spi)
783 {
784 	struct bpf_func_state *fstate;
785 	struct bpf_reg_state *dreg;
786 	int i, dynptr_id;
787 
788 	/* We always ensure that STACK_DYNPTR is never set partially,
789 	 * hence just checking for slot_type[0] is enough. This is
790 	 * different for STACK_SPILL, where it may be only set for
791 	 * 1 byte, so code has to use is_spilled_reg.
792 	 */
793 	if (state->stack[spi].slot_type[0] != STACK_DYNPTR)
794 		return 0;
795 
796 	/* Reposition spi to first slot */
797 	if (!state->stack[spi].spilled_ptr.dynptr.first_slot)
798 		spi = spi + 1;
799 
800 	if (dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) {
801 		int ref_obj_id = state->stack[spi].spilled_ptr.ref_obj_id;
802 		int ref_cnt = 0;
803 
804 		/*
805 		 * A referenced dynptr can be overwritten only if there is at
806 		 * least one other dynptr sharing the same ref_obj_id,
807 		 * ensuring the reference can still be properly released.
808 		 */
809 		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
810 			if (state->stack[i].slot_type[0] != STACK_DYNPTR)
811 				continue;
812 			if (!state->stack[i].spilled_ptr.dynptr.first_slot)
813 				continue;
814 			if (state->stack[i].spilled_ptr.ref_obj_id == ref_obj_id)
815 				ref_cnt++;
816 		}
817 
818 		if (ref_cnt <= 1) {
819 			verbose(env, "cannot overwrite referenced dynptr\n");
820 			return -EINVAL;
821 		}
822 	}
823 
824 	mark_stack_slot_scratched(env, spi);
825 	mark_stack_slot_scratched(env, spi - 1);
826 
827 	/* Writing partially to one dynptr stack slot destroys both. */
828 	for (i = 0; i < BPF_REG_SIZE; i++) {
829 		state->stack[spi].slot_type[i] = STACK_INVALID;
830 		state->stack[spi - 1].slot_type[i] = STACK_INVALID;
831 	}
832 
833 	dynptr_id = state->stack[spi].spilled_ptr.id;
834 	/* Invalidate any slices associated with this dynptr */
835 	bpf_for_each_reg_in_vstate(env->cur_state, fstate, dreg, ({
836 		/* Dynptr slices are only PTR_TO_MEM_OR_NULL and PTR_TO_MEM */
837 		if (dreg->type != (PTR_TO_MEM | PTR_MAYBE_NULL) && dreg->type != PTR_TO_MEM)
838 			continue;
839 		if (dreg->dynptr_id == dynptr_id)
840 			mark_reg_invalid(env, dreg);
841 	}));
842 
843 	/* Do not release reference state, we are destroying dynptr on stack,
844 	 * not using some helper to release it. Just reset register.
845 	 */
846 	bpf_mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
847 	bpf_mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
848 
849 	return 0;
850 }
851 
is_dynptr_reg_valid_uninit(struct bpf_verifier_env * env,struct bpf_reg_state * reg)852 static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
853 {
854 	int spi;
855 
856 	if (reg->type == CONST_PTR_TO_DYNPTR)
857 		return false;
858 
859 	spi = dynptr_get_spi(env, reg);
860 
861 	/* -ERANGE (i.e. spi not falling into allocated stack slots) isn't an
862 	 * error because this just means the stack state hasn't been updated yet.
863 	 * We will do check_mem_access to check and update stack bounds later.
864 	 */
865 	if (spi < 0 && spi != -ERANGE)
866 		return false;
867 
868 	/* We don't need to check if the stack slots are marked by previous
869 	 * dynptr initializations because we allow overwriting existing unreferenced
870 	 * STACK_DYNPTR slots, see mark_stack_slots_dynptr which calls
871 	 * destroy_if_dynptr_stack_slot to ensure dynptr objects at the slots we are
872 	 * touching are completely destructed before we reinitialize them for a new
873 	 * one. For referenced ones, destroy_if_dynptr_stack_slot returns an error early
874 	 * instead of delaying it until the end where the user will get "Unreleased
875 	 * reference" error.
876 	 */
877 	return true;
878 }
879 
is_dynptr_reg_valid_init(struct bpf_verifier_env * env,struct bpf_reg_state * reg)880 static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
881 {
882 	struct bpf_func_state *state = bpf_func(env, reg);
883 	int i, spi;
884 
885 	/* This already represents first slot of initialized bpf_dynptr.
886 	 *
887 	 * CONST_PTR_TO_DYNPTR already has fixed and var_off as 0 due to
888 	 * check_func_arg_reg_off's logic, so we don't need to check its
889 	 * offset and alignment.
890 	 */
891 	if (reg->type == CONST_PTR_TO_DYNPTR)
892 		return true;
893 
894 	spi = dynptr_get_spi(env, reg);
895 	if (spi < 0)
896 		return false;
897 	if (!state->stack[spi].spilled_ptr.dynptr.first_slot)
898 		return false;
899 
900 	for (i = 0; i < BPF_REG_SIZE; i++) {
901 		if (state->stack[spi].slot_type[i] != STACK_DYNPTR ||
902 		    state->stack[spi - 1].slot_type[i] != STACK_DYNPTR)
903 			return false;
904 	}
905 
906 	return true;
907 }
908 
is_dynptr_type_expected(struct bpf_verifier_env * env,struct bpf_reg_state * reg,enum bpf_arg_type arg_type)909 static bool is_dynptr_type_expected(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
910 				    enum bpf_arg_type arg_type)
911 {
912 	struct bpf_func_state *state = bpf_func(env, reg);
913 	enum bpf_dynptr_type dynptr_type;
914 	int spi;
915 
916 	/* ARG_PTR_TO_DYNPTR takes any type of dynptr */
917 	if (arg_type == ARG_PTR_TO_DYNPTR)
918 		return true;
919 
920 	dynptr_type = arg_to_dynptr_type(arg_type);
921 	if (reg->type == CONST_PTR_TO_DYNPTR) {
922 		return reg->dynptr.type == dynptr_type;
923 	} else {
924 		spi = dynptr_get_spi(env, reg);
925 		if (spi < 0)
926 			return false;
927 		return state->stack[spi].spilled_ptr.dynptr.type == dynptr_type;
928 	}
929 }
930 
931 static void __mark_reg_known_zero(struct bpf_reg_state *reg);
932 
933 static bool in_rcu_cs(struct bpf_verifier_env *env);
934 
935 static bool is_kfunc_rcu_protected(struct bpf_kfunc_call_arg_meta *meta);
936 
mark_stack_slots_iter(struct bpf_verifier_env * env,struct bpf_kfunc_call_arg_meta * meta,struct bpf_reg_state * reg,int insn_idx,struct btf * btf,u32 btf_id,int nr_slots)937 static int mark_stack_slots_iter(struct bpf_verifier_env *env,
938 				 struct bpf_kfunc_call_arg_meta *meta,
939 				 struct bpf_reg_state *reg, int insn_idx,
940 				 struct btf *btf, u32 btf_id, int nr_slots)
941 {
942 	struct bpf_func_state *state = bpf_func(env, reg);
943 	int spi, i, j, id;
944 
945 	spi = iter_get_spi(env, reg, nr_slots);
946 	if (spi < 0)
947 		return spi;
948 
949 	id = acquire_reference(env, insn_idx);
950 	if (id < 0)
951 		return id;
952 
953 	for (i = 0; i < nr_slots; i++) {
954 		struct bpf_stack_state *slot = &state->stack[spi - i];
955 		struct bpf_reg_state *st = &slot->spilled_ptr;
956 
957 		__mark_reg_known_zero(st);
958 		st->type = PTR_TO_STACK; /* we don't have dedicated reg type */
959 		if (is_kfunc_rcu_protected(meta)) {
960 			if (in_rcu_cs(env))
961 				st->type |= MEM_RCU;
962 			else
963 				st->type |= PTR_UNTRUSTED;
964 		}
965 		st->ref_obj_id = i == 0 ? id : 0;
966 		st->iter.btf = btf;
967 		st->iter.btf_id = btf_id;
968 		st->iter.state = BPF_ITER_STATE_ACTIVE;
969 		st->iter.depth = 0;
970 
971 		for (j = 0; j < BPF_REG_SIZE; j++)
972 			slot->slot_type[j] = STACK_ITER;
973 
974 		mark_stack_slot_scratched(env, spi - i);
975 	}
976 
977 	return 0;
978 }
979 
unmark_stack_slots_iter(struct bpf_verifier_env * env,struct bpf_reg_state * reg,int nr_slots)980 static int unmark_stack_slots_iter(struct bpf_verifier_env *env,
981 				   struct bpf_reg_state *reg, int nr_slots)
982 {
983 	struct bpf_func_state *state = bpf_func(env, reg);
984 	int spi, i, j;
985 
986 	spi = iter_get_spi(env, reg, nr_slots);
987 	if (spi < 0)
988 		return spi;
989 
990 	for (i = 0; i < nr_slots; i++) {
991 		struct bpf_stack_state *slot = &state->stack[spi - i];
992 		struct bpf_reg_state *st = &slot->spilled_ptr;
993 
994 		if (i == 0)
995 			WARN_ON_ONCE(release_reference(env, st->ref_obj_id));
996 
997 		bpf_mark_reg_not_init(env, st);
998 
999 		for (j = 0; j < BPF_REG_SIZE; j++)
1000 			slot->slot_type[j] = STACK_INVALID;
1001 
1002 		mark_stack_slot_scratched(env, spi - i);
1003 	}
1004 
1005 	return 0;
1006 }
1007 
is_iter_reg_valid_uninit(struct bpf_verifier_env * env,struct bpf_reg_state * reg,int nr_slots)1008 static bool is_iter_reg_valid_uninit(struct bpf_verifier_env *env,
1009 				     struct bpf_reg_state *reg, int nr_slots)
1010 {
1011 	struct bpf_func_state *state = bpf_func(env, reg);
1012 	int spi, i, j;
1013 
1014 	/* For -ERANGE (i.e. spi not falling into allocated stack slots), we
1015 	 * will do check_mem_access to check and update stack bounds later, so
1016 	 * return true for that case.
1017 	 */
1018 	spi = iter_get_spi(env, reg, nr_slots);
1019 	if (spi == -ERANGE)
1020 		return true;
1021 	if (spi < 0)
1022 		return false;
1023 
1024 	for (i = 0; i < nr_slots; i++) {
1025 		struct bpf_stack_state *slot = &state->stack[spi - i];
1026 
1027 		for (j = 0; j < BPF_REG_SIZE; j++)
1028 			if (slot->slot_type[j] == STACK_ITER)
1029 				return false;
1030 	}
1031 
1032 	return true;
1033 }
1034 
is_iter_reg_valid_init(struct bpf_verifier_env * env,struct bpf_reg_state * reg,struct btf * btf,u32 btf_id,int nr_slots)1035 static int is_iter_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
1036 				   struct btf *btf, u32 btf_id, int nr_slots)
1037 {
1038 	struct bpf_func_state *state = bpf_func(env, reg);
1039 	int spi, i, j;
1040 
1041 	spi = iter_get_spi(env, reg, nr_slots);
1042 	if (spi < 0)
1043 		return -EINVAL;
1044 
1045 	for (i = 0; i < nr_slots; i++) {
1046 		struct bpf_stack_state *slot = &state->stack[spi - i];
1047 		struct bpf_reg_state *st = &slot->spilled_ptr;
1048 
1049 		if (st->type & PTR_UNTRUSTED)
1050 			return -EPROTO;
1051 		/* only main (first) slot has ref_obj_id set */
1052 		if (i == 0 && !st->ref_obj_id)
1053 			return -EINVAL;
1054 		if (i != 0 && st->ref_obj_id)
1055 			return -EINVAL;
1056 		if (st->iter.btf != btf || st->iter.btf_id != btf_id)
1057 			return -EINVAL;
1058 
1059 		for (j = 0; j < BPF_REG_SIZE; j++)
1060 			if (slot->slot_type[j] != STACK_ITER)
1061 				return -EINVAL;
1062 	}
1063 
1064 	return 0;
1065 }
1066 
1067 static int acquire_irq_state(struct bpf_verifier_env *env, int insn_idx);
1068 static int release_irq_state(struct bpf_verifier_state *state, int id);
1069 
mark_stack_slot_irq_flag(struct bpf_verifier_env * env,struct bpf_kfunc_call_arg_meta * meta,struct bpf_reg_state * reg,int insn_idx,int kfunc_class)1070 static int mark_stack_slot_irq_flag(struct bpf_verifier_env *env,
1071 				     struct bpf_kfunc_call_arg_meta *meta,
1072 				     struct bpf_reg_state *reg, int insn_idx,
1073 				     int kfunc_class)
1074 {
1075 	struct bpf_func_state *state = bpf_func(env, reg);
1076 	struct bpf_stack_state *slot;
1077 	struct bpf_reg_state *st;
1078 	int spi, i, id;
1079 
1080 	spi = irq_flag_get_spi(env, reg);
1081 	if (spi < 0)
1082 		return spi;
1083 
1084 	id = acquire_irq_state(env, insn_idx);
1085 	if (id < 0)
1086 		return id;
1087 
1088 	slot = &state->stack[spi];
1089 	st = &slot->spilled_ptr;
1090 
1091 	__mark_reg_known_zero(st);
1092 	st->type = PTR_TO_STACK; /* we don't have dedicated reg type */
1093 	st->ref_obj_id = id;
1094 	st->irq.kfunc_class = kfunc_class;
1095 
1096 	for (i = 0; i < BPF_REG_SIZE; i++)
1097 		slot->slot_type[i] = STACK_IRQ_FLAG;
1098 
1099 	mark_stack_slot_scratched(env, spi);
1100 	return 0;
1101 }
1102 
unmark_stack_slot_irq_flag(struct bpf_verifier_env * env,struct bpf_reg_state * reg,int kfunc_class)1103 static int unmark_stack_slot_irq_flag(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
1104 				      int kfunc_class)
1105 {
1106 	struct bpf_func_state *state = bpf_func(env, reg);
1107 	struct bpf_stack_state *slot;
1108 	struct bpf_reg_state *st;
1109 	int spi, i, err;
1110 
1111 	spi = irq_flag_get_spi(env, reg);
1112 	if (spi < 0)
1113 		return spi;
1114 
1115 	slot = &state->stack[spi];
1116 	st = &slot->spilled_ptr;
1117 
1118 	if (st->irq.kfunc_class != kfunc_class) {
1119 		const char *flag_kfunc = st->irq.kfunc_class == IRQ_NATIVE_KFUNC ? "native" : "lock";
1120 		const char *used_kfunc = kfunc_class == IRQ_NATIVE_KFUNC ? "native" : "lock";
1121 
1122 		verbose(env, "irq flag acquired by %s kfuncs cannot be restored with %s kfuncs\n",
1123 			flag_kfunc, used_kfunc);
1124 		return -EINVAL;
1125 	}
1126 
1127 	err = release_irq_state(env->cur_state, st->ref_obj_id);
1128 	WARN_ON_ONCE(err && err != -EACCES);
1129 	if (err) {
1130 		int insn_idx = 0;
1131 
1132 		for (int i = 0; i < env->cur_state->acquired_refs; i++) {
1133 			if (env->cur_state->refs[i].id == env->cur_state->active_irq_id) {
1134 				insn_idx = env->cur_state->refs[i].insn_idx;
1135 				break;
1136 			}
1137 		}
1138 
1139 		verbose(env, "cannot restore irq state out of order, expected id=%d acquired at insn_idx=%d\n",
1140 			env->cur_state->active_irq_id, insn_idx);
1141 		return err;
1142 	}
1143 
1144 	bpf_mark_reg_not_init(env, st);
1145 
1146 	for (i = 0; i < BPF_REG_SIZE; i++)
1147 		slot->slot_type[i] = STACK_INVALID;
1148 
1149 	mark_stack_slot_scratched(env, spi);
1150 	return 0;
1151 }
1152 
is_irq_flag_reg_valid_uninit(struct bpf_verifier_env * env,struct bpf_reg_state * reg)1153 static bool is_irq_flag_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
1154 {
1155 	struct bpf_func_state *state = bpf_func(env, reg);
1156 	struct bpf_stack_state *slot;
1157 	int spi, i;
1158 
1159 	/* For -ERANGE (i.e. spi not falling into allocated stack slots), we
1160 	 * will do check_mem_access to check and update stack bounds later, so
1161 	 * return true for that case.
1162 	 */
1163 	spi = irq_flag_get_spi(env, reg);
1164 	if (spi == -ERANGE)
1165 		return true;
1166 	if (spi < 0)
1167 		return false;
1168 
1169 	slot = &state->stack[spi];
1170 
1171 	for (i = 0; i < BPF_REG_SIZE; i++)
1172 		if (slot->slot_type[i] == STACK_IRQ_FLAG)
1173 			return false;
1174 	return true;
1175 }
1176 
is_irq_flag_reg_valid_init(struct bpf_verifier_env * env,struct bpf_reg_state * reg)1177 static int is_irq_flag_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
1178 {
1179 	struct bpf_func_state *state = bpf_func(env, reg);
1180 	struct bpf_stack_state *slot;
1181 	struct bpf_reg_state *st;
1182 	int spi, i;
1183 
1184 	spi = irq_flag_get_spi(env, reg);
1185 	if (spi < 0)
1186 		return -EINVAL;
1187 
1188 	slot = &state->stack[spi];
1189 	st = &slot->spilled_ptr;
1190 
1191 	if (!st->ref_obj_id)
1192 		return -EINVAL;
1193 
1194 	for (i = 0; i < BPF_REG_SIZE; i++)
1195 		if (slot->slot_type[i] != STACK_IRQ_FLAG)
1196 			return -EINVAL;
1197 	return 0;
1198 }
1199 
1200 /* Check if given stack slot is "special":
1201  *   - spilled register state (STACK_SPILL);
1202  *   - dynptr state (STACK_DYNPTR);
1203  *   - iter state (STACK_ITER).
1204  *   - irq flag state (STACK_IRQ_FLAG)
1205  */
is_stack_slot_special(const struct bpf_stack_state * stack)1206 static bool is_stack_slot_special(const struct bpf_stack_state *stack)
1207 {
1208 	enum bpf_stack_slot_type type = stack->slot_type[BPF_REG_SIZE - 1];
1209 
1210 	switch (type) {
1211 	case STACK_SPILL:
1212 	case STACK_DYNPTR:
1213 	case STACK_ITER:
1214 	case STACK_IRQ_FLAG:
1215 		return true;
1216 	case STACK_INVALID:
1217 	case STACK_POISON:
1218 	case STACK_MISC:
1219 	case STACK_ZERO:
1220 		return false;
1221 	default:
1222 		WARN_ONCE(1, "unknown stack slot type %d\n", type);
1223 		return true;
1224 	}
1225 }
1226 
1227 /* The reg state of a pointer or a bounded scalar was saved when
1228  * it was spilled to the stack.
1229  */
1230 
1231 /*
1232  * Mark stack slot as STACK_MISC, unless it is already:
1233  * - STACK_INVALID, in which case they are equivalent.
1234  * - STACK_ZERO, in which case we preserve more precise STACK_ZERO.
1235  * - STACK_POISON, which truly forbids access to the slot.
1236  * Regardless of allow_ptr_leaks setting (i.e., privileged or unprivileged
1237  * mode), we won't promote STACK_INVALID to STACK_MISC. In privileged case it is
1238  * unnecessary as both are considered equivalent when loading data and pruning,
1239  * in case of unprivileged mode it will be incorrect to allow reads of invalid
1240  * slots.
1241  */
mark_stack_slot_misc(struct bpf_verifier_env * env,u8 * stype)1242 static void mark_stack_slot_misc(struct bpf_verifier_env *env, u8 *stype)
1243 {
1244 	if (*stype == STACK_ZERO)
1245 		return;
1246 	if (*stype == STACK_INVALID || *stype == STACK_POISON)
1247 		return;
1248 	*stype = STACK_MISC;
1249 }
1250 
scrub_spilled_slot(u8 * stype)1251 static void scrub_spilled_slot(u8 *stype)
1252 {
1253 	if (*stype != STACK_INVALID && *stype != STACK_POISON)
1254 		*stype = STACK_MISC;
1255 }
1256 
1257 /* copy array src of length n * size bytes to dst. dst is reallocated if it's too
1258  * small to hold src. This is different from krealloc since we don't want to preserve
1259  * the contents of dst.
1260  *
1261  * Leaves dst untouched if src is NULL or length is zero. Returns NULL if memory could
1262  * not be allocated.
1263  */
copy_array(void * dst,const void * src,size_t n,size_t size,gfp_t flags)1264 static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t flags)
1265 {
1266 	size_t alloc_bytes;
1267 	void *orig = dst;
1268 	size_t bytes;
1269 
1270 	if (ZERO_OR_NULL_PTR(src))
1271 		goto out;
1272 
1273 	if (unlikely(check_mul_overflow(n, size, &bytes)))
1274 		return NULL;
1275 
1276 	alloc_bytes = max(ksize(orig), kmalloc_size_roundup(bytes));
1277 	dst = krealloc(orig, alloc_bytes, flags);
1278 	if (!dst) {
1279 		kfree(orig);
1280 		return NULL;
1281 	}
1282 
1283 	memcpy(dst, src, bytes);
1284 out:
1285 	return dst ? dst : ZERO_SIZE_PTR;
1286 }
1287 
1288 /* resize an array from old_n items to new_n items. the array is reallocated if it's too
1289  * small to hold new_n items. new items are zeroed out if the array grows.
1290  *
1291  * Contrary to krealloc_array, does not free arr if new_n is zero.
1292  */
realloc_array(void * arr,size_t old_n,size_t new_n,size_t size)1293 static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size)
1294 {
1295 	size_t alloc_size;
1296 	void *new_arr;
1297 
1298 	if (!new_n || old_n == new_n)
1299 		goto out;
1300 
1301 	alloc_size = kmalloc_size_roundup(size_mul(new_n, size));
1302 	new_arr = krealloc(arr, alloc_size, GFP_KERNEL_ACCOUNT);
1303 	if (!new_arr) {
1304 		kfree(arr);
1305 		return NULL;
1306 	}
1307 	arr = new_arr;
1308 
1309 	if (new_n > old_n)
1310 		memset(arr + old_n * size, 0, (new_n - old_n) * size);
1311 
1312 out:
1313 	return arr ? arr : ZERO_SIZE_PTR;
1314 }
1315 
copy_reference_state(struct bpf_verifier_state * dst,const struct bpf_verifier_state * src)1316 static int copy_reference_state(struct bpf_verifier_state *dst, const struct bpf_verifier_state *src)
1317 {
1318 	dst->refs = copy_array(dst->refs, src->refs, src->acquired_refs,
1319 			       sizeof(struct bpf_reference_state), GFP_KERNEL_ACCOUNT);
1320 	if (!dst->refs)
1321 		return -ENOMEM;
1322 
1323 	dst->acquired_refs = src->acquired_refs;
1324 	dst->active_locks = src->active_locks;
1325 	dst->active_preempt_locks = src->active_preempt_locks;
1326 	dst->active_rcu_locks = src->active_rcu_locks;
1327 	dst->active_irq_id = src->active_irq_id;
1328 	dst->active_lock_id = src->active_lock_id;
1329 	dst->active_lock_ptr = src->active_lock_ptr;
1330 	return 0;
1331 }
1332 
copy_stack_state(struct bpf_func_state * dst,const struct bpf_func_state * src)1333 static int copy_stack_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
1334 {
1335 	size_t n = src->allocated_stack / BPF_REG_SIZE;
1336 
1337 	dst->stack = copy_array(dst->stack, src->stack, n, sizeof(struct bpf_stack_state),
1338 				GFP_KERNEL_ACCOUNT);
1339 	if (!dst->stack)
1340 		return -ENOMEM;
1341 
1342 	dst->allocated_stack = src->allocated_stack;
1343 	return 0;
1344 }
1345 
resize_reference_state(struct bpf_verifier_state * state,size_t n)1346 static int resize_reference_state(struct bpf_verifier_state *state, size_t n)
1347 {
1348 	state->refs = realloc_array(state->refs, state->acquired_refs, n,
1349 				    sizeof(struct bpf_reference_state));
1350 	if (!state->refs)
1351 		return -ENOMEM;
1352 
1353 	state->acquired_refs = n;
1354 	return 0;
1355 }
1356 
1357 /* Possibly update state->allocated_stack to be at least size bytes. Also
1358  * possibly update the function's high-water mark in its bpf_subprog_info.
1359  */
grow_stack_state(struct bpf_verifier_env * env,struct bpf_func_state * state,int size)1360 static int grow_stack_state(struct bpf_verifier_env *env, struct bpf_func_state *state, int size)
1361 {
1362 	size_t old_n = state->allocated_stack / BPF_REG_SIZE, n;
1363 
1364 	/* The stack size is always a multiple of BPF_REG_SIZE. */
1365 	size = round_up(size, BPF_REG_SIZE);
1366 	n = size / BPF_REG_SIZE;
1367 
1368 	if (old_n >= n)
1369 		return 0;
1370 
1371 	state->stack = realloc_array(state->stack, old_n, n, sizeof(struct bpf_stack_state));
1372 	if (!state->stack)
1373 		return -ENOMEM;
1374 
1375 	state->allocated_stack = size;
1376 
1377 	/* update known max for given subprogram */
1378 	if (env->subprog_info[state->subprogno].stack_depth < size)
1379 		env->subprog_info[state->subprogno].stack_depth = size;
1380 
1381 	return 0;
1382 }
1383 
1384 /* Acquire a pointer id from the env and update the state->refs to include
1385  * this new pointer reference.
1386  * On success, returns a valid pointer id to associate with the register
1387  * On failure, returns a negative errno.
1388  */
acquire_reference_state(struct bpf_verifier_env * env,int insn_idx)1389 static struct bpf_reference_state *acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
1390 {
1391 	struct bpf_verifier_state *state = env->cur_state;
1392 	int new_ofs = state->acquired_refs;
1393 	int err;
1394 
1395 	err = resize_reference_state(state, state->acquired_refs + 1);
1396 	if (err)
1397 		return NULL;
1398 	state->refs[new_ofs].insn_idx = insn_idx;
1399 
1400 	return &state->refs[new_ofs];
1401 }
1402 
acquire_reference(struct bpf_verifier_env * env,int insn_idx)1403 static int acquire_reference(struct bpf_verifier_env *env, int insn_idx)
1404 {
1405 	struct bpf_reference_state *s;
1406 
1407 	s = acquire_reference_state(env, insn_idx);
1408 	if (!s)
1409 		return -ENOMEM;
1410 	s->type = REF_TYPE_PTR;
1411 	s->id = ++env->id_gen;
1412 	return s->id;
1413 }
1414 
acquire_lock_state(struct bpf_verifier_env * env,int insn_idx,enum ref_state_type type,int id,void * ptr)1415 static int acquire_lock_state(struct bpf_verifier_env *env, int insn_idx, enum ref_state_type type,
1416 			      int id, void *ptr)
1417 {
1418 	struct bpf_verifier_state *state = env->cur_state;
1419 	struct bpf_reference_state *s;
1420 
1421 	s = acquire_reference_state(env, insn_idx);
1422 	if (!s)
1423 		return -ENOMEM;
1424 	s->type = type;
1425 	s->id = id;
1426 	s->ptr = ptr;
1427 
1428 	state->active_locks++;
1429 	state->active_lock_id = id;
1430 	state->active_lock_ptr = ptr;
1431 	return 0;
1432 }
1433 
acquire_irq_state(struct bpf_verifier_env * env,int insn_idx)1434 static int acquire_irq_state(struct bpf_verifier_env *env, int insn_idx)
1435 {
1436 	struct bpf_verifier_state *state = env->cur_state;
1437 	struct bpf_reference_state *s;
1438 
1439 	s = acquire_reference_state(env, insn_idx);
1440 	if (!s)
1441 		return -ENOMEM;
1442 	s->type = REF_TYPE_IRQ;
1443 	s->id = ++env->id_gen;
1444 
1445 	state->active_irq_id = s->id;
1446 	return s->id;
1447 }
1448 
release_reference_state(struct bpf_verifier_state * state,int idx)1449 static void release_reference_state(struct bpf_verifier_state *state, int idx)
1450 {
1451 	int last_idx;
1452 	size_t rem;
1453 
1454 	/* IRQ state requires the relative ordering of elements remaining the
1455 	 * same, since it relies on the refs array to behave as a stack, so that
1456 	 * it can detect out-of-order IRQ restore. Hence use memmove to shift
1457 	 * the array instead of swapping the final element into the deleted idx.
1458 	 */
1459 	last_idx = state->acquired_refs - 1;
1460 	rem = state->acquired_refs - idx - 1;
1461 	if (last_idx && idx != last_idx)
1462 		memmove(&state->refs[idx], &state->refs[idx + 1], sizeof(*state->refs) * rem);
1463 	memset(&state->refs[last_idx], 0, sizeof(*state->refs));
1464 	state->acquired_refs--;
1465 	return;
1466 }
1467 
find_reference_state(struct bpf_verifier_state * state,int ptr_id)1468 static bool find_reference_state(struct bpf_verifier_state *state, int ptr_id)
1469 {
1470 	int i;
1471 
1472 	for (i = 0; i < state->acquired_refs; i++)
1473 		if (state->refs[i].id == ptr_id)
1474 			return true;
1475 
1476 	return false;
1477 }
1478 
release_lock_state(struct bpf_verifier_state * state,int type,int id,void * ptr)1479 static int release_lock_state(struct bpf_verifier_state *state, int type, int id, void *ptr)
1480 {
1481 	void *prev_ptr = NULL;
1482 	u32 prev_id = 0;
1483 	int i;
1484 
1485 	for (i = 0; i < state->acquired_refs; i++) {
1486 		if (state->refs[i].type == type && state->refs[i].id == id &&
1487 		    state->refs[i].ptr == ptr) {
1488 			release_reference_state(state, i);
1489 			state->active_locks--;
1490 			/* Reassign active lock (id, ptr). */
1491 			state->active_lock_id = prev_id;
1492 			state->active_lock_ptr = prev_ptr;
1493 			return 0;
1494 		}
1495 		if (state->refs[i].type & REF_TYPE_LOCK_MASK) {
1496 			prev_id = state->refs[i].id;
1497 			prev_ptr = state->refs[i].ptr;
1498 		}
1499 	}
1500 	return -EINVAL;
1501 }
1502 
release_irq_state(struct bpf_verifier_state * state,int id)1503 static int release_irq_state(struct bpf_verifier_state *state, int id)
1504 {
1505 	u32 prev_id = 0;
1506 	int i;
1507 
1508 	if (id != state->active_irq_id)
1509 		return -EACCES;
1510 
1511 	for (i = 0; i < state->acquired_refs; i++) {
1512 		if (state->refs[i].type != REF_TYPE_IRQ)
1513 			continue;
1514 		if (state->refs[i].id == id) {
1515 			release_reference_state(state, i);
1516 			state->active_irq_id = prev_id;
1517 			return 0;
1518 		} else {
1519 			prev_id = state->refs[i].id;
1520 		}
1521 	}
1522 	return -EINVAL;
1523 }
1524 
find_lock_state(struct bpf_verifier_state * state,enum ref_state_type type,int id,void * ptr)1525 static struct bpf_reference_state *find_lock_state(struct bpf_verifier_state *state, enum ref_state_type type,
1526 						   int id, void *ptr)
1527 {
1528 	int i;
1529 
1530 	for (i = 0; i < state->acquired_refs; i++) {
1531 		struct bpf_reference_state *s = &state->refs[i];
1532 
1533 		if (!(s->type & type))
1534 			continue;
1535 
1536 		if (s->id == id && s->ptr == ptr)
1537 			return s;
1538 	}
1539 	return NULL;
1540 }
1541 
free_func_state(struct bpf_func_state * state)1542 static void free_func_state(struct bpf_func_state *state)
1543 {
1544 	if (!state)
1545 		return;
1546 	kfree(state->stack);
1547 	kfree(state);
1548 }
1549 
bpf_clear_jmp_history(struct bpf_verifier_state * state)1550 void bpf_clear_jmp_history(struct bpf_verifier_state *state)
1551 {
1552 	kfree(state->jmp_history);
1553 	state->jmp_history = NULL;
1554 	state->jmp_history_cnt = 0;
1555 }
1556 
bpf_free_verifier_state(struct bpf_verifier_state * state,bool free_self)1557 void bpf_free_verifier_state(struct bpf_verifier_state *state,
1558 			    bool free_self)
1559 {
1560 	int i;
1561 
1562 	for (i = 0; i <= state->curframe; i++) {
1563 		free_func_state(state->frame[i]);
1564 		state->frame[i] = NULL;
1565 	}
1566 	kfree(state->refs);
1567 	bpf_clear_jmp_history(state);
1568 	if (free_self)
1569 		kfree(state);
1570 }
1571 
1572 /* copy verifier state from src to dst growing dst stack space
1573  * when necessary to accommodate larger src stack
1574  */
copy_func_state(struct bpf_func_state * dst,const struct bpf_func_state * src)1575 static int copy_func_state(struct bpf_func_state *dst,
1576 			   const struct bpf_func_state *src)
1577 {
1578 	memcpy(dst, src, offsetof(struct bpf_func_state, stack));
1579 	return copy_stack_state(dst, src);
1580 }
1581 
bpf_copy_verifier_state(struct bpf_verifier_state * dst_state,const struct bpf_verifier_state * src)1582 int bpf_copy_verifier_state(struct bpf_verifier_state *dst_state,
1583 			   const struct bpf_verifier_state *src)
1584 {
1585 	struct bpf_func_state *dst;
1586 	int i, err;
1587 
1588 	dst_state->jmp_history = copy_array(dst_state->jmp_history, src->jmp_history,
1589 					  src->jmp_history_cnt, sizeof(*dst_state->jmp_history),
1590 					  GFP_KERNEL_ACCOUNT);
1591 	if (!dst_state->jmp_history)
1592 		return -ENOMEM;
1593 	dst_state->jmp_history_cnt = src->jmp_history_cnt;
1594 
1595 	/* if dst has more stack frames then src frame, free them, this is also
1596 	 * necessary in case of exceptional exits using bpf_throw.
1597 	 */
1598 	for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
1599 		free_func_state(dst_state->frame[i]);
1600 		dst_state->frame[i] = NULL;
1601 	}
1602 	err = copy_reference_state(dst_state, src);
1603 	if (err)
1604 		return err;
1605 	dst_state->speculative = src->speculative;
1606 	dst_state->in_sleepable = src->in_sleepable;
1607 	dst_state->curframe = src->curframe;
1608 	dst_state->branches = src->branches;
1609 	dst_state->parent = src->parent;
1610 	dst_state->first_insn_idx = src->first_insn_idx;
1611 	dst_state->last_insn_idx = src->last_insn_idx;
1612 	dst_state->dfs_depth = src->dfs_depth;
1613 	dst_state->callback_unroll_depth = src->callback_unroll_depth;
1614 	dst_state->may_goto_depth = src->may_goto_depth;
1615 	dst_state->equal_state = src->equal_state;
1616 	for (i = 0; i <= src->curframe; i++) {
1617 		dst = dst_state->frame[i];
1618 		if (!dst) {
1619 			dst = kzalloc_obj(*dst, GFP_KERNEL_ACCOUNT);
1620 			if (!dst)
1621 				return -ENOMEM;
1622 			dst_state->frame[i] = dst;
1623 		}
1624 		err = copy_func_state(dst, src->frame[i]);
1625 		if (err)
1626 			return err;
1627 	}
1628 	return 0;
1629 }
1630 
state_htab_size(struct bpf_verifier_env * env)1631 static u32 state_htab_size(struct bpf_verifier_env *env)
1632 {
1633 	return env->prog->len;
1634 }
1635 
bpf_explored_state(struct bpf_verifier_env * env,int idx)1636 struct list_head *bpf_explored_state(struct bpf_verifier_env *env, int idx)
1637 {
1638 	struct bpf_verifier_state *cur = env->cur_state;
1639 	struct bpf_func_state *state = cur->frame[cur->curframe];
1640 
1641 	return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
1642 }
1643 
same_callsites(struct bpf_verifier_state * a,struct bpf_verifier_state * b)1644 static bool same_callsites(struct bpf_verifier_state *a, struct bpf_verifier_state *b)
1645 {
1646 	int fr;
1647 
1648 	if (a->curframe != b->curframe)
1649 		return false;
1650 
1651 	for (fr = a->curframe; fr >= 0; fr--)
1652 		if (a->frame[fr]->callsite != b->frame[fr]->callsite)
1653 			return false;
1654 
1655 	return true;
1656 }
1657 
1658 
bpf_free_backedges(struct bpf_scc_visit * visit)1659 void bpf_free_backedges(struct bpf_scc_visit *visit)
1660 {
1661 	struct bpf_scc_backedge *backedge, *next;
1662 
1663 	for (backedge = visit->backedges; backedge; backedge = next) {
1664 		bpf_free_verifier_state(&backedge->state, false);
1665 		next = backedge->next;
1666 		kfree(backedge);
1667 	}
1668 	visit->backedges = NULL;
1669 }
1670 
pop_stack(struct bpf_verifier_env * env,int * prev_insn_idx,int * insn_idx,bool pop_log)1671 static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
1672 		     int *insn_idx, bool pop_log)
1673 {
1674 	struct bpf_verifier_state *cur = env->cur_state;
1675 	struct bpf_verifier_stack_elem *elem, *head = env->head;
1676 	int err;
1677 
1678 	if (env->head == NULL)
1679 		return -ENOENT;
1680 
1681 	if (cur) {
1682 		err = bpf_copy_verifier_state(cur, &head->st);
1683 		if (err)
1684 			return err;
1685 	}
1686 	if (pop_log)
1687 		bpf_vlog_reset(&env->log, head->log_pos);
1688 	if (insn_idx)
1689 		*insn_idx = head->insn_idx;
1690 	if (prev_insn_idx)
1691 		*prev_insn_idx = head->prev_insn_idx;
1692 	elem = head->next;
1693 	bpf_free_verifier_state(&head->st, false);
1694 	kfree(head);
1695 	env->head = elem;
1696 	env->stack_size--;
1697 	return 0;
1698 }
1699 
error_recoverable_with_nospec(int err)1700 static bool error_recoverable_with_nospec(int err)
1701 {
1702 	/* Should only return true for non-fatal errors that are allowed to
1703 	 * occur during speculative verification. For these we can insert a
1704 	 * nospec and the program might still be accepted. Do not include
1705 	 * something like ENOMEM because it is likely to re-occur for the next
1706 	 * architectural path once it has been recovered-from in all speculative
1707 	 * paths.
1708 	 */
1709 	return err == -EPERM || err == -EACCES || err == -EINVAL;
1710 }
1711 
push_stack(struct bpf_verifier_env * env,int insn_idx,int prev_insn_idx,bool speculative)1712 static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
1713 					     int insn_idx, int prev_insn_idx,
1714 					     bool speculative)
1715 {
1716 	struct bpf_verifier_state *cur = env->cur_state;
1717 	struct bpf_verifier_stack_elem *elem;
1718 	int err;
1719 
1720 	elem = kzalloc_obj(struct bpf_verifier_stack_elem, GFP_KERNEL_ACCOUNT);
1721 	if (!elem)
1722 		return ERR_PTR(-ENOMEM);
1723 
1724 	elem->insn_idx = insn_idx;
1725 	elem->prev_insn_idx = prev_insn_idx;
1726 	elem->next = env->head;
1727 	elem->log_pos = env->log.end_pos;
1728 	env->head = elem;
1729 	env->stack_size++;
1730 	err = bpf_copy_verifier_state(&elem->st, cur);
1731 	if (err)
1732 		return ERR_PTR(-ENOMEM);
1733 	elem->st.speculative |= speculative;
1734 	if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
1735 		verbose(env, "The sequence of %d jumps is too complex.\n",
1736 			env->stack_size);
1737 		return ERR_PTR(-E2BIG);
1738 	}
1739 	if (elem->st.parent) {
1740 		++elem->st.parent->branches;
1741 		/* WARN_ON(branches > 2) technically makes sense here,
1742 		 * but
1743 		 * 1. speculative states will bump 'branches' for non-branch
1744 		 * instructions
1745 		 * 2. is_state_visited() heuristics may decide not to create
1746 		 * a new state for a sequence of branches and all such current
1747 		 * and cloned states will be pointing to a single parent state
1748 		 * which might have large 'branches' count.
1749 		 */
1750 	}
1751 	return &elem->st;
1752 }
1753 
1754 static const int caller_saved[CALLER_SAVED_REGS] = {
1755 	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
1756 };
1757 
1758 /* This helper doesn't clear reg->id */
___mark_reg_known(struct bpf_reg_state * reg,u64 imm)1759 static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1760 {
1761 	reg->var_off = tnum_const(imm);
1762 	reg->smin_value = (s64)imm;
1763 	reg->smax_value = (s64)imm;
1764 	reg->umin_value = imm;
1765 	reg->umax_value = imm;
1766 
1767 	reg->s32_min_value = (s32)imm;
1768 	reg->s32_max_value = (s32)imm;
1769 	reg->u32_min_value = (u32)imm;
1770 	reg->u32_max_value = (u32)imm;
1771 }
1772 
1773 /* Mark the unknown part of a register (variable offset or scalar value) as
1774  * known to have the value @imm.
1775  */
__mark_reg_known(struct bpf_reg_state * reg,u64 imm)1776 static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1777 {
1778 	/* Clear off and union(map_ptr, range) */
1779 	memset(((u8 *)reg) + sizeof(reg->type), 0,
1780 	       offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
1781 	reg->id = 0;
1782 	reg->ref_obj_id = 0;
1783 	___mark_reg_known(reg, imm);
1784 }
1785 
__mark_reg32_known(struct bpf_reg_state * reg,u64 imm)1786 static void __mark_reg32_known(struct bpf_reg_state *reg, u64 imm)
1787 {
1788 	reg->var_off = tnum_const_subreg(reg->var_off, imm);
1789 	reg->s32_min_value = (s32)imm;
1790 	reg->s32_max_value = (s32)imm;
1791 	reg->u32_min_value = (u32)imm;
1792 	reg->u32_max_value = (u32)imm;
1793 }
1794 
1795 /* Mark the 'variable offset' part of a register as zero.  This should be
1796  * used only on registers holding a pointer type.
1797  */
__mark_reg_known_zero(struct bpf_reg_state * reg)1798 static void __mark_reg_known_zero(struct bpf_reg_state *reg)
1799 {
1800 	__mark_reg_known(reg, 0);
1801 }
1802 
__mark_reg_const_zero(const struct bpf_verifier_env * env,struct bpf_reg_state * reg)1803 static void __mark_reg_const_zero(const struct bpf_verifier_env *env, struct bpf_reg_state *reg)
1804 {
1805 	__mark_reg_known(reg, 0);
1806 	reg->type = SCALAR_VALUE;
1807 	/* all scalars are assumed imprecise initially (unless unprivileged,
1808 	 * in which case everything is forced to be precise)
1809 	 */
1810 	reg->precise = !env->bpf_capable;
1811 }
1812 
mark_reg_known_zero(struct bpf_verifier_env * env,struct bpf_reg_state * regs,u32 regno)1813 static void mark_reg_known_zero(struct bpf_verifier_env *env,
1814 				struct bpf_reg_state *regs, u32 regno)
1815 {
1816 	__mark_reg_known_zero(regs + regno);
1817 }
1818 
__mark_dynptr_reg(struct bpf_reg_state * reg,enum bpf_dynptr_type type,bool first_slot,int dynptr_id)1819 static void __mark_dynptr_reg(struct bpf_reg_state *reg, enum bpf_dynptr_type type,
1820 			      bool first_slot, int dynptr_id)
1821 {
1822 	/* reg->type has no meaning for STACK_DYNPTR, but when we set reg for
1823 	 * callback arguments, it does need to be CONST_PTR_TO_DYNPTR, so simply
1824 	 * set it unconditionally as it is ignored for STACK_DYNPTR anyway.
1825 	 */
1826 	__mark_reg_known_zero(reg);
1827 	reg->type = CONST_PTR_TO_DYNPTR;
1828 	/* Give each dynptr a unique id to uniquely associate slices to it. */
1829 	reg->id = dynptr_id;
1830 	reg->dynptr.type = type;
1831 	reg->dynptr.first_slot = first_slot;
1832 }
1833 
mark_ptr_not_null_reg(struct bpf_reg_state * reg)1834 static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
1835 {
1836 	if (base_type(reg->type) == PTR_TO_MAP_VALUE) {
1837 		const struct bpf_map *map = reg->map_ptr;
1838 
1839 		if (map->inner_map_meta) {
1840 			reg->type = CONST_PTR_TO_MAP;
1841 			reg->map_ptr = map->inner_map_meta;
1842 			/* transfer reg's id which is unique for every map_lookup_elem
1843 			 * as UID of the inner map.
1844 			 */
1845 			if (btf_record_has_field(map->inner_map_meta->record,
1846 						 BPF_TIMER | BPF_WORKQUEUE | BPF_TASK_WORK)) {
1847 				reg->map_uid = reg->id;
1848 			}
1849 		} else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
1850 			reg->type = PTR_TO_XDP_SOCK;
1851 		} else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
1852 			   map->map_type == BPF_MAP_TYPE_SOCKHASH) {
1853 			reg->type = PTR_TO_SOCKET;
1854 		} else {
1855 			reg->type = PTR_TO_MAP_VALUE;
1856 		}
1857 		return;
1858 	}
1859 
1860 	reg->type &= ~PTR_MAYBE_NULL;
1861 }
1862 
mark_reg_graph_node(struct bpf_reg_state * regs,u32 regno,struct btf_field_graph_root * ds_head)1863 static void mark_reg_graph_node(struct bpf_reg_state *regs, u32 regno,
1864 				struct btf_field_graph_root *ds_head)
1865 {
1866 	__mark_reg_known(&regs[regno], ds_head->node_offset);
1867 	regs[regno].type = PTR_TO_BTF_ID | MEM_ALLOC;
1868 	regs[regno].btf = ds_head->btf;
1869 	regs[regno].btf_id = ds_head->value_btf_id;
1870 }
1871 
reg_is_pkt_pointer(const struct bpf_reg_state * reg)1872 static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
1873 {
1874 	return type_is_pkt_pointer(reg->type);
1875 }
1876 
reg_is_pkt_pointer_any(const struct bpf_reg_state * reg)1877 static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
1878 {
1879 	return reg_is_pkt_pointer(reg) ||
1880 	       reg->type == PTR_TO_PACKET_END;
1881 }
1882 
reg_is_dynptr_slice_pkt(const struct bpf_reg_state * reg)1883 static bool reg_is_dynptr_slice_pkt(const struct bpf_reg_state *reg)
1884 {
1885 	return base_type(reg->type) == PTR_TO_MEM &&
1886 	       (reg->type &
1887 		(DYNPTR_TYPE_SKB | DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META));
1888 }
1889 
1890 /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
reg_is_init_pkt_pointer(const struct bpf_reg_state * reg,enum bpf_reg_type which)1891 static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
1892 				    enum bpf_reg_type which)
1893 {
1894 	/* The register can already have a range from prior markings.
1895 	 * This is fine as long as it hasn't been advanced from its
1896 	 * origin.
1897 	 */
1898 	return reg->type == which &&
1899 	       reg->id == 0 &&
1900 	       tnum_equals_const(reg->var_off, 0);
1901 }
1902 
1903 /* Reset the min/max bounds of a register */
__mark_reg_unbounded(struct bpf_reg_state * reg)1904 static void __mark_reg_unbounded(struct bpf_reg_state *reg)
1905 {
1906 	reg->smin_value = S64_MIN;
1907 	reg->smax_value = S64_MAX;
1908 	reg->umin_value = 0;
1909 	reg->umax_value = U64_MAX;
1910 
1911 	reg->s32_min_value = S32_MIN;
1912 	reg->s32_max_value = S32_MAX;
1913 	reg->u32_min_value = 0;
1914 	reg->u32_max_value = U32_MAX;
1915 }
1916 
__mark_reg64_unbounded(struct bpf_reg_state * reg)1917 static void __mark_reg64_unbounded(struct bpf_reg_state *reg)
1918 {
1919 	reg->smin_value = S64_MIN;
1920 	reg->smax_value = S64_MAX;
1921 	reg->umin_value = 0;
1922 	reg->umax_value = U64_MAX;
1923 }
1924 
__mark_reg32_unbounded(struct bpf_reg_state * reg)1925 static void __mark_reg32_unbounded(struct bpf_reg_state *reg)
1926 {
1927 	reg->s32_min_value = S32_MIN;
1928 	reg->s32_max_value = S32_MAX;
1929 	reg->u32_min_value = 0;
1930 	reg->u32_max_value = U32_MAX;
1931 }
1932 
reset_reg64_and_tnum(struct bpf_reg_state * reg)1933 static void reset_reg64_and_tnum(struct bpf_reg_state *reg)
1934 {
1935 	__mark_reg64_unbounded(reg);
1936 	reg->var_off = tnum_unknown;
1937 }
1938 
reset_reg32_and_tnum(struct bpf_reg_state * reg)1939 static void reset_reg32_and_tnum(struct bpf_reg_state *reg)
1940 {
1941 	__mark_reg32_unbounded(reg);
1942 	reg->var_off = tnum_unknown;
1943 }
1944 
__update_reg32_bounds(struct bpf_reg_state * reg)1945 static void __update_reg32_bounds(struct bpf_reg_state *reg)
1946 {
1947 	struct tnum var32_off = tnum_subreg(reg->var_off);
1948 
1949 	/* min signed is max(sign bit) | min(other bits) */
1950 	reg->s32_min_value = max_t(s32, reg->s32_min_value,
1951 			var32_off.value | (var32_off.mask & S32_MIN));
1952 	/* max signed is min(sign bit) | max(other bits) */
1953 	reg->s32_max_value = min_t(s32, reg->s32_max_value,
1954 			var32_off.value | (var32_off.mask & S32_MAX));
1955 	reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)var32_off.value);
1956 	reg->u32_max_value = min(reg->u32_max_value,
1957 				 (u32)(var32_off.value | var32_off.mask));
1958 }
1959 
__update_reg64_bounds(struct bpf_reg_state * reg)1960 static void __update_reg64_bounds(struct bpf_reg_state *reg)
1961 {
1962 	u64 tnum_next, tmax;
1963 	bool umin_in_tnum;
1964 
1965 	/* min signed is max(sign bit) | min(other bits) */
1966 	reg->smin_value = max_t(s64, reg->smin_value,
1967 				reg->var_off.value | (reg->var_off.mask & S64_MIN));
1968 	/* max signed is min(sign bit) | max(other bits) */
1969 	reg->smax_value = min_t(s64, reg->smax_value,
1970 				reg->var_off.value | (reg->var_off.mask & S64_MAX));
1971 	reg->umin_value = max(reg->umin_value, reg->var_off.value);
1972 	reg->umax_value = min(reg->umax_value,
1973 			      reg->var_off.value | reg->var_off.mask);
1974 
1975 	/* Check if u64 and tnum overlap in a single value */
1976 	tnum_next = tnum_step(reg->var_off, reg->umin_value);
1977 	umin_in_tnum = (reg->umin_value & ~reg->var_off.mask) == reg->var_off.value;
1978 	tmax = reg->var_off.value | reg->var_off.mask;
1979 	if (umin_in_tnum && tnum_next > reg->umax_value) {
1980 		/* The u64 range and the tnum only overlap in umin.
1981 		 * u64:  ---[xxxxxx]-----
1982 		 * tnum: --xx----------x-
1983 		 */
1984 		___mark_reg_known(reg, reg->umin_value);
1985 	} else if (!umin_in_tnum && tnum_next == tmax) {
1986 		/* The u64 range and the tnum only overlap in the maximum value
1987 		 * represented by the tnum, called tmax.
1988 		 * u64:  ---[xxxxxx]-----
1989 		 * tnum: xx-----x--------
1990 		 */
1991 		___mark_reg_known(reg, tmax);
1992 	} else if (!umin_in_tnum && tnum_next <= reg->umax_value &&
1993 		   tnum_step(reg->var_off, tnum_next) > reg->umax_value) {
1994 		/* The u64 range and the tnum only overlap in between umin
1995 		 * (excluded) and umax.
1996 		 * u64:  ---[xxxxxx]-----
1997 		 * tnum: xx----x-------x-
1998 		 */
1999 		___mark_reg_known(reg, tnum_next);
2000 	}
2001 }
2002 
__update_reg_bounds(struct bpf_reg_state * reg)2003 static void __update_reg_bounds(struct bpf_reg_state *reg)
2004 {
2005 	__update_reg32_bounds(reg);
2006 	__update_reg64_bounds(reg);
2007 }
2008 
2009 /* Uses signed min/max values to inform unsigned, and vice-versa */
deduce_bounds_32_from_64(struct bpf_reg_state * reg)2010 static void deduce_bounds_32_from_64(struct bpf_reg_state *reg)
2011 {
2012 	/* If upper 32 bits of u64/s64 range don't change, we can use lower 32
2013 	 * bits to improve our u32/s32 boundaries.
2014 	 *
2015 	 * E.g., the case where we have upper 32 bits as zero ([10, 20] in
2016 	 * u64) is pretty trivial, it's obvious that in u32 we'll also have
2017 	 * [10, 20] range. But this property holds for any 64-bit range as
2018 	 * long as upper 32 bits in that entire range of values stay the same.
2019 	 *
2020 	 * E.g., u64 range [0x10000000A, 0x10000000F] ([4294967306, 4294967311]
2021 	 * in decimal) has the same upper 32 bits throughout all the values in
2022 	 * that range. As such, lower 32 bits form a valid [0xA, 0xF] ([10, 15])
2023 	 * range.
2024 	 *
2025 	 * Note also, that [0xA, 0xF] is a valid range both in u32 and in s32,
2026 	 * following the rules outlined below about u64/s64 correspondence
2027 	 * (which equally applies to u32 vs s32 correspondence). In general it
2028 	 * depends on actual hexadecimal values of 32-bit range. They can form
2029 	 * only valid u32, or only valid s32 ranges in some cases.
2030 	 *
2031 	 * So we use all these insights to derive bounds for subregisters here.
2032 	 */
2033 	if ((reg->umin_value >> 32) == (reg->umax_value >> 32)) {
2034 		/* u64 to u32 casting preserves validity of low 32 bits as
2035 		 * a range, if upper 32 bits are the same
2036 		 */
2037 		reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)reg->umin_value);
2038 		reg->u32_max_value = min_t(u32, reg->u32_max_value, (u32)reg->umax_value);
2039 
2040 		if ((s32)reg->umin_value <= (s32)reg->umax_value) {
2041 			reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->umin_value);
2042 			reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->umax_value);
2043 		}
2044 	}
2045 	if ((reg->smin_value >> 32) == (reg->smax_value >> 32)) {
2046 		/* low 32 bits should form a proper u32 range */
2047 		if ((u32)reg->smin_value <= (u32)reg->smax_value) {
2048 			reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)reg->smin_value);
2049 			reg->u32_max_value = min_t(u32, reg->u32_max_value, (u32)reg->smax_value);
2050 		}
2051 		/* low 32 bits should form a proper s32 range */
2052 		if ((s32)reg->smin_value <= (s32)reg->smax_value) {
2053 			reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->smin_value);
2054 			reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->smax_value);
2055 		}
2056 	}
2057 	/* Special case where upper bits form a small sequence of two
2058 	 * sequential numbers (in 32-bit unsigned space, so 0xffffffff to
2059 	 * 0x00000000 is also valid), while lower bits form a proper s32 range
2060 	 * going from negative numbers to positive numbers. E.g., let's say we
2061 	 * have s64 range [-1, 1] ([0xffffffffffffffff, 0x0000000000000001]).
2062 	 * Possible s64 values are {-1, 0, 1} ({0xffffffffffffffff,
2063 	 * 0x0000000000000000, 0x00000000000001}). Ignoring upper 32 bits,
2064 	 * we still get a valid s32 range [-1, 1] ([0xffffffff, 0x00000001]).
2065 	 * Note that it doesn't have to be 0xffffffff going to 0x00000000 in
2066 	 * upper 32 bits. As a random example, s64 range
2067 	 * [0xfffffff0fffffff0; 0xfffffff100000010], forms a valid s32 range
2068 	 * [-16, 16] ([0xfffffff0; 0x00000010]) in its 32 bit subregister.
2069 	 */
2070 	if ((u32)(reg->umin_value >> 32) + 1 == (u32)(reg->umax_value >> 32) &&
2071 	    (s32)reg->umin_value < 0 && (s32)reg->umax_value >= 0) {
2072 		reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->umin_value);
2073 		reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->umax_value);
2074 	}
2075 	if ((u32)(reg->smin_value >> 32) + 1 == (u32)(reg->smax_value >> 32) &&
2076 	    (s32)reg->smin_value < 0 && (s32)reg->smax_value >= 0) {
2077 		reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->smin_value);
2078 		reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->smax_value);
2079 	}
2080 }
2081 
deduce_bounds_32_from_32(struct bpf_reg_state * reg)2082 static void deduce_bounds_32_from_32(struct bpf_reg_state *reg)
2083 {
2084 	/* if u32 range forms a valid s32 range (due to matching sign bit),
2085 	 * try to learn from that
2086 	 */
2087 	if ((s32)reg->u32_min_value <= (s32)reg->u32_max_value) {
2088 		reg->s32_min_value = max_t(s32, reg->s32_min_value, reg->u32_min_value);
2089 		reg->s32_max_value = min_t(s32, reg->s32_max_value, reg->u32_max_value);
2090 	}
2091 	/* If we cannot cross the sign boundary, then signed and unsigned bounds
2092 	 * are the same, so combine.  This works even in the negative case, e.g.
2093 	 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
2094 	 */
2095 	if ((u32)reg->s32_min_value <= (u32)reg->s32_max_value) {
2096 		reg->u32_min_value = max_t(u32, reg->s32_min_value, reg->u32_min_value);
2097 		reg->u32_max_value = min_t(u32, reg->s32_max_value, reg->u32_max_value);
2098 	} else {
2099 		if (reg->u32_max_value < (u32)reg->s32_min_value) {
2100 			/* See __reg64_deduce_bounds() for detailed explanation.
2101 			 * Refine ranges in the following situation:
2102 			 *
2103 			 * 0                                                   U32_MAX
2104 			 * |  [xxxxxxxxxxxxxx u32 range xxxxxxxxxxxxxx]              |
2105 			 * |----------------------------|----------------------------|
2106 			 * |xxxxx s32 range xxxxxxxxx]                       [xxxxxxx|
2107 			 * 0                     S32_MAX S32_MIN                    -1
2108 			 */
2109 			reg->s32_min_value = (s32)reg->u32_min_value;
2110 			reg->u32_max_value = min_t(u32, reg->u32_max_value, reg->s32_max_value);
2111 		} else if ((u32)reg->s32_max_value < reg->u32_min_value) {
2112 			/*
2113 			 * 0                                                   U32_MAX
2114 			 * |              [xxxxxxxxxxxxxx u32 range xxxxxxxxxxxxxx]  |
2115 			 * |----------------------------|----------------------------|
2116 			 * |xxxxxxxxx]                       [xxxxxxxxxxxx s32 range |
2117 			 * 0                     S32_MAX S32_MIN                    -1
2118 			 */
2119 			reg->s32_max_value = (s32)reg->u32_max_value;
2120 			reg->u32_min_value = max_t(u32, reg->u32_min_value, reg->s32_min_value);
2121 		}
2122 	}
2123 }
2124 
deduce_bounds_64_from_64(struct bpf_reg_state * reg)2125 static void deduce_bounds_64_from_64(struct bpf_reg_state *reg)
2126 {
2127 	/* If u64 range forms a valid s64 range (due to matching sign bit),
2128 	 * try to learn from that. Let's do a bit of ASCII art to see when
2129 	 * this is happening. Let's take u64 range first:
2130 	 *
2131 	 * 0             0x7fffffffffffffff 0x8000000000000000        U64_MAX
2132 	 * |-------------------------------|--------------------------------|
2133 	 *
2134 	 * Valid u64 range is formed when umin and umax are anywhere in the
2135 	 * range [0, U64_MAX], and umin <= umax. u64 case is simple and
2136 	 * straightforward. Let's see how s64 range maps onto the same range
2137 	 * of values, annotated below the line for comparison:
2138 	 *
2139 	 * 0             0x7fffffffffffffff 0x8000000000000000        U64_MAX
2140 	 * |-------------------------------|--------------------------------|
2141 	 * 0                        S64_MAX S64_MIN                        -1
2142 	 *
2143 	 * So s64 values basically start in the middle and they are logically
2144 	 * contiguous to the right of it, wrapping around from -1 to 0, and
2145 	 * then finishing as S64_MAX (0x7fffffffffffffff) right before
2146 	 * S64_MIN. We can try drawing the continuity of u64 vs s64 values
2147 	 * more visually as mapped to sign-agnostic range of hex values.
2148 	 *
2149 	 *  u64 start                                               u64 end
2150 	 *  _______________________________________________________________
2151 	 * /                                                               \
2152 	 * 0             0x7fffffffffffffff 0x8000000000000000        U64_MAX
2153 	 * |-------------------------------|--------------------------------|
2154 	 * 0                        S64_MAX S64_MIN                        -1
2155 	 *                                / \
2156 	 * >------------------------------   ------------------------------->
2157 	 * s64 continues...        s64 end   s64 start          s64 "midpoint"
2158 	 *
2159 	 * What this means is that, in general, we can't always derive
2160 	 * something new about u64 from any random s64 range, and vice versa.
2161 	 *
2162 	 * But we can do that in two particular cases. One is when entire
2163 	 * u64/s64 range is *entirely* contained within left half of the above
2164 	 * diagram or when it is *entirely* contained in the right half. I.e.:
2165 	 *
2166 	 * |-------------------------------|--------------------------------|
2167 	 *     ^                   ^            ^                 ^
2168 	 *     A                   B            C                 D
2169 	 *
2170 	 * [A, B] and [C, D] are contained entirely in their respective halves
2171 	 * and form valid contiguous ranges as both u64 and s64 values. [A, B]
2172 	 * will be non-negative both as u64 and s64 (and in fact it will be
2173 	 * identical ranges no matter the signedness). [C, D] treated as s64
2174 	 * will be a range of negative values, while in u64 it will be
2175 	 * non-negative range of values larger than 0x8000000000000000.
2176 	 *
2177 	 * Now, any other range here can't be represented in both u64 and s64
2178 	 * simultaneously. E.g., [A, C], [A, D], [B, C], [B, D] are valid
2179 	 * contiguous u64 ranges, but they are discontinuous in s64. [B, C]
2180 	 * in s64 would be properly presented as [S64_MIN, C] and [B, S64_MAX],
2181 	 * for example. Similarly, valid s64 range [D, A] (going from negative
2182 	 * to positive values), would be two separate [D, U64_MAX] and [0, A]
2183 	 * ranges as u64. Currently reg_state can't represent two segments per
2184 	 * numeric domain, so in such situations we can only derive maximal
2185 	 * possible range ([0, U64_MAX] for u64, and [S64_MIN, S64_MAX] for s64).
2186 	 *
2187 	 * So we use these facts to derive umin/umax from smin/smax and vice
2188 	 * versa only if they stay within the same "half". This is equivalent
2189 	 * to checking sign bit: lower half will have sign bit as zero, upper
2190 	 * half have sign bit 1. Below in code we simplify this by just
2191 	 * casting umin/umax as smin/smax and checking if they form valid
2192 	 * range, and vice versa. Those are equivalent checks.
2193 	 */
2194 	if ((s64)reg->umin_value <= (s64)reg->umax_value) {
2195 		reg->smin_value = max_t(s64, reg->smin_value, reg->umin_value);
2196 		reg->smax_value = min_t(s64, reg->smax_value, reg->umax_value);
2197 	}
2198 	/* If we cannot cross the sign boundary, then signed and unsigned bounds
2199 	 * are the same, so combine.  This works even in the negative case, e.g.
2200 	 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
2201 	 */
2202 	if ((u64)reg->smin_value <= (u64)reg->smax_value) {
2203 		reg->umin_value = max_t(u64, reg->smin_value, reg->umin_value);
2204 		reg->umax_value = min_t(u64, reg->smax_value, reg->umax_value);
2205 	} else {
2206 		/* If the s64 range crosses the sign boundary, then it's split
2207 		 * between the beginning and end of the U64 domain. In that
2208 		 * case, we can derive new bounds if the u64 range overlaps
2209 		 * with only one end of the s64 range.
2210 		 *
2211 		 * In the following example, the u64 range overlaps only with
2212 		 * positive portion of the s64 range.
2213 		 *
2214 		 * 0                                                   U64_MAX
2215 		 * |  [xxxxxxxxxxxxxx u64 range xxxxxxxxxxxxxx]              |
2216 		 * |----------------------------|----------------------------|
2217 		 * |xxxxx s64 range xxxxxxxxx]                       [xxxxxxx|
2218 		 * 0                     S64_MAX S64_MIN                    -1
2219 		 *
2220 		 * We can thus derive the following new s64 and u64 ranges.
2221 		 *
2222 		 * 0                                                   U64_MAX
2223 		 * |  [xxxxxx u64 range xxxxx]                               |
2224 		 * |----------------------------|----------------------------|
2225 		 * |  [xxxxxx s64 range xxxxx]                               |
2226 		 * 0                     S64_MAX S64_MIN                    -1
2227 		 *
2228 		 * If they overlap in two places, we can't derive anything
2229 		 * because reg_state can't represent two ranges per numeric
2230 		 * domain.
2231 		 *
2232 		 * 0                                                   U64_MAX
2233 		 * |  [xxxxxxxxxxxxxxxxx u64 range xxxxxxxxxxxxxxxxx]        |
2234 		 * |----------------------------|----------------------------|
2235 		 * |xxxxx s64 range xxxxxxxxx]                    [xxxxxxxxxx|
2236 		 * 0                     S64_MAX S64_MIN                    -1
2237 		 *
2238 		 * The first condition below corresponds to the first diagram
2239 		 * above.
2240 		 */
2241 		if (reg->umax_value < (u64)reg->smin_value) {
2242 			reg->smin_value = (s64)reg->umin_value;
2243 			reg->umax_value = min_t(u64, reg->umax_value, reg->smax_value);
2244 		} else if ((u64)reg->smax_value < reg->umin_value) {
2245 			/* This second condition considers the case where the u64 range
2246 			 * overlaps with the negative portion of the s64 range:
2247 			 *
2248 			 * 0                                                   U64_MAX
2249 			 * |              [xxxxxxxxxxxxxx u64 range xxxxxxxxxxxxxx]  |
2250 			 * |----------------------------|----------------------------|
2251 			 * |xxxxxxxxx]                       [xxxxxxxxxxxx s64 range |
2252 			 * 0                     S64_MAX S64_MIN                    -1
2253 			 */
2254 			reg->smax_value = (s64)reg->umax_value;
2255 			reg->umin_value = max_t(u64, reg->umin_value, reg->smin_value);
2256 		}
2257 	}
2258 }
2259 
deduce_bounds_64_from_32(struct bpf_reg_state * reg)2260 static void deduce_bounds_64_from_32(struct bpf_reg_state *reg)
2261 {
2262 	/* Try to tighten 64-bit bounds from 32-bit knowledge, using 32-bit
2263 	 * values on both sides of 64-bit range in hope to have tighter range.
2264 	 * E.g., if r1 is [0x1'00000000, 0x3'80000000], and we learn from
2265 	 * 32-bit signed > 0 operation that s32 bounds are now [1; 0x7fffffff].
2266 	 * With this, we can substitute 1 as low 32-bits of _low_ 64-bit bound
2267 	 * (0x100000000 -> 0x100000001) and 0x7fffffff as low 32-bits of
2268 	 * _high_ 64-bit bound (0x380000000 -> 0x37fffffff) and arrive at a
2269 	 * better overall bounds for r1 as [0x1'000000001; 0x3'7fffffff].
2270 	 * We just need to make sure that derived bounds we are intersecting
2271 	 * with are well-formed ranges in respective s64 or u64 domain, just
2272 	 * like we do with similar kinds of 32-to-64 or 64-to-32 adjustments.
2273 	 */
2274 	__u64 new_umin, new_umax;
2275 	__s64 new_smin, new_smax;
2276 
2277 	/* u32 -> u64 tightening, it's always well-formed */
2278 	new_umin = (reg->umin_value & ~0xffffffffULL) | reg->u32_min_value;
2279 	new_umax = (reg->umax_value & ~0xffffffffULL) | reg->u32_max_value;
2280 	reg->umin_value = max_t(u64, reg->umin_value, new_umin);
2281 	reg->umax_value = min_t(u64, reg->umax_value, new_umax);
2282 	/* u32 -> s64 tightening, u32 range embedded into s64 preserves range validity */
2283 	new_smin = (reg->smin_value & ~0xffffffffULL) | reg->u32_min_value;
2284 	new_smax = (reg->smax_value & ~0xffffffffULL) | reg->u32_max_value;
2285 	reg->smin_value = max_t(s64, reg->smin_value, new_smin);
2286 	reg->smax_value = min_t(s64, reg->smax_value, new_smax);
2287 
2288 	/* Here we would like to handle a special case after sign extending load,
2289 	 * when upper bits for a 64-bit range are all 1s or all 0s.
2290 	 *
2291 	 * Upper bits are all 1s when register is in a range:
2292 	 *   [0xffff_ffff_0000_0000, 0xffff_ffff_ffff_ffff]
2293 	 * Upper bits are all 0s when register is in a range:
2294 	 *   [0x0000_0000_0000_0000, 0x0000_0000_ffff_ffff]
2295 	 * Together this forms are continuous range:
2296 	 *   [0xffff_ffff_0000_0000, 0x0000_0000_ffff_ffff]
2297 	 *
2298 	 * Now, suppose that register range is in fact tighter:
2299 	 *   [0xffff_ffff_8000_0000, 0x0000_0000_ffff_ffff] (R)
2300 	 * Also suppose that it's 32-bit range is positive,
2301 	 * meaning that lower 32-bits of the full 64-bit register
2302 	 * are in the range:
2303 	 *   [0x0000_0000, 0x7fff_ffff] (W)
2304 	 *
2305 	 * If this happens, then any value in a range:
2306 	 *   [0xffff_ffff_0000_0000, 0xffff_ffff_7fff_ffff]
2307 	 * is smaller than a lowest bound of the range (R):
2308 	 *   0xffff_ffff_8000_0000
2309 	 * which means that upper bits of the full 64-bit register
2310 	 * can't be all 1s, when lower bits are in range (W).
2311 	 *
2312 	 * Note that:
2313 	 *  - 0xffff_ffff_8000_0000 == (s64)S32_MIN
2314 	 *  - 0x0000_0000_7fff_ffff == (s64)S32_MAX
2315 	 * These relations are used in the conditions below.
2316 	 */
2317 	if (reg->s32_min_value >= 0 && reg->smin_value >= S32_MIN && reg->smax_value <= S32_MAX) {
2318 		reg->smin_value = reg->s32_min_value;
2319 		reg->smax_value = reg->s32_max_value;
2320 		reg->umin_value = reg->s32_min_value;
2321 		reg->umax_value = reg->s32_max_value;
2322 		reg->var_off = tnum_intersect(reg->var_off,
2323 					      tnum_range(reg->smin_value, reg->smax_value));
2324 	}
2325 }
2326 
__reg_deduce_bounds(struct bpf_reg_state * reg)2327 static void __reg_deduce_bounds(struct bpf_reg_state *reg)
2328 {
2329 	deduce_bounds_64_from_64(reg);
2330 	deduce_bounds_32_from_64(reg);
2331 	deduce_bounds_32_from_32(reg);
2332 	deduce_bounds_64_from_32(reg);
2333 }
2334 
2335 /* Attempts to improve var_off based on unsigned min/max information */
__reg_bound_offset(struct bpf_reg_state * reg)2336 static void __reg_bound_offset(struct bpf_reg_state *reg)
2337 {
2338 	struct tnum var64_off = tnum_intersect(reg->var_off,
2339 					       tnum_range(reg->umin_value,
2340 							  reg->umax_value));
2341 	struct tnum var32_off = tnum_intersect(tnum_subreg(var64_off),
2342 					       tnum_range(reg->u32_min_value,
2343 							  reg->u32_max_value));
2344 
2345 	reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off);
2346 }
2347 
2348 static bool range_bounds_violation(struct bpf_reg_state *reg);
2349 
reg_bounds_sync(struct bpf_reg_state * reg)2350 static void reg_bounds_sync(struct bpf_reg_state *reg)
2351 {
2352 	/* If the input reg_state is invalid, we can exit early */
2353 	if (range_bounds_violation(reg))
2354 		return;
2355 	/* We might have learned new bounds from the var_off. */
2356 	__update_reg_bounds(reg);
2357 	/* We might have learned something about the sign bit. */
2358 	__reg_deduce_bounds(reg);
2359 	__reg_deduce_bounds(reg);
2360 	/* We might have learned some bits from the bounds. */
2361 	__reg_bound_offset(reg);
2362 	/* Intersecting with the old var_off might have improved our bounds
2363 	 * slightly, e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
2364 	 * then new var_off is (0; 0x7f...fc) which improves our umax.
2365 	 */
2366 	__update_reg_bounds(reg);
2367 }
2368 
range_bounds_violation(struct bpf_reg_state * reg)2369 static bool range_bounds_violation(struct bpf_reg_state *reg)
2370 {
2371 	return (reg->umin_value > reg->umax_value || reg->smin_value > reg->smax_value ||
2372 		reg->u32_min_value > reg->u32_max_value ||
2373 		reg->s32_min_value > reg->s32_max_value);
2374 }
2375 
const_tnum_range_mismatch(struct bpf_reg_state * reg)2376 static bool const_tnum_range_mismatch(struct bpf_reg_state *reg)
2377 {
2378 	u64 uval = reg->var_off.value;
2379 	s64 sval = (s64)uval;
2380 
2381 	if (!tnum_is_const(reg->var_off))
2382 		return false;
2383 
2384 	return reg->umin_value != uval || reg->umax_value != uval ||
2385 	       reg->smin_value != sval || reg->smax_value != sval;
2386 }
2387 
const_tnum_range_mismatch_32(struct bpf_reg_state * reg)2388 static bool const_tnum_range_mismatch_32(struct bpf_reg_state *reg)
2389 {
2390 	u32 uval32 = tnum_subreg(reg->var_off).value;
2391 	s32 sval32 = (s32)uval32;
2392 
2393 	if (!tnum_subreg_is_const(reg->var_off))
2394 		return false;
2395 
2396 	return reg->u32_min_value != uval32 || reg->u32_max_value != uval32 ||
2397 	       reg->s32_min_value != sval32 || reg->s32_max_value != sval32;
2398 }
2399 
reg_bounds_sanity_check(struct bpf_verifier_env * env,struct bpf_reg_state * reg,const char * ctx)2400 static int reg_bounds_sanity_check(struct bpf_verifier_env *env,
2401 				   struct bpf_reg_state *reg, const char *ctx)
2402 {
2403 	const char *msg;
2404 
2405 	if (range_bounds_violation(reg)) {
2406 		msg = "range bounds violation";
2407 		goto out;
2408 	}
2409 
2410 	if (const_tnum_range_mismatch(reg)) {
2411 		msg = "const tnum out of sync with range bounds";
2412 		goto out;
2413 	}
2414 
2415 	if (const_tnum_range_mismatch_32(reg)) {
2416 		msg = "const subreg tnum out of sync with range bounds";
2417 		goto out;
2418 	}
2419 
2420 	return 0;
2421 out:
2422 	verifier_bug(env, "REG INVARIANTS VIOLATION (%s): %s u64=[%#llx, %#llx] "
2423 		     "s64=[%#llx, %#llx] u32=[%#x, %#x] s32=[%#x, %#x] var_off=(%#llx, %#llx)",
2424 		     ctx, msg, reg->umin_value, reg->umax_value,
2425 		     reg->smin_value, reg->smax_value,
2426 		     reg->u32_min_value, reg->u32_max_value,
2427 		     reg->s32_min_value, reg->s32_max_value,
2428 		     reg->var_off.value, reg->var_off.mask);
2429 	if (env->test_reg_invariants)
2430 		return -EFAULT;
2431 	__mark_reg_unbounded(reg);
2432 	return 0;
2433 }
2434 
__reg32_bound_s64(s32 a)2435 static bool __reg32_bound_s64(s32 a)
2436 {
2437 	return a >= 0 && a <= S32_MAX;
2438 }
2439 
__reg_assign_32_into_64(struct bpf_reg_state * reg)2440 static void __reg_assign_32_into_64(struct bpf_reg_state *reg)
2441 {
2442 	reg->umin_value = reg->u32_min_value;
2443 	reg->umax_value = reg->u32_max_value;
2444 
2445 	/* Attempt to pull 32-bit signed bounds into 64-bit bounds but must
2446 	 * be positive otherwise set to worse case bounds and refine later
2447 	 * from tnum.
2448 	 */
2449 	if (__reg32_bound_s64(reg->s32_min_value) &&
2450 	    __reg32_bound_s64(reg->s32_max_value)) {
2451 		reg->smin_value = reg->s32_min_value;
2452 		reg->smax_value = reg->s32_max_value;
2453 	} else {
2454 		reg->smin_value = 0;
2455 		reg->smax_value = U32_MAX;
2456 	}
2457 }
2458 
2459 /* Mark a register as having a completely unknown (scalar) value. */
bpf_mark_reg_unknown_imprecise(struct bpf_reg_state * reg)2460 void bpf_mark_reg_unknown_imprecise(struct bpf_reg_state *reg)
2461 {
2462 	/*
2463 	 * Clear type, off, and union(map_ptr, range) and
2464 	 * padding between 'type' and union
2465 	 */
2466 	memset(reg, 0, offsetof(struct bpf_reg_state, var_off));
2467 	reg->type = SCALAR_VALUE;
2468 	reg->id = 0;
2469 	reg->ref_obj_id = 0;
2470 	reg->var_off = tnum_unknown;
2471 	reg->frameno = 0;
2472 	reg->precise = false;
2473 	__mark_reg_unbounded(reg);
2474 }
2475 
2476 /* Mark a register as having a completely unknown (scalar) value,
2477  * initialize .precise as true when not bpf capable.
2478  */
__mark_reg_unknown(const struct bpf_verifier_env * env,struct bpf_reg_state * reg)2479 static void __mark_reg_unknown(const struct bpf_verifier_env *env,
2480 			       struct bpf_reg_state *reg)
2481 {
2482 	bpf_mark_reg_unknown_imprecise(reg);
2483 	reg->precise = !env->bpf_capable;
2484 }
2485 
mark_reg_unknown(struct bpf_verifier_env * env,struct bpf_reg_state * regs,u32 regno)2486 static void mark_reg_unknown(struct bpf_verifier_env *env,
2487 			     struct bpf_reg_state *regs, u32 regno)
2488 {
2489 	__mark_reg_unknown(env, regs + regno);
2490 }
2491 
__mark_reg_s32_range(struct bpf_verifier_env * env,struct bpf_reg_state * regs,u32 regno,s32 s32_min,s32 s32_max)2492 static int __mark_reg_s32_range(struct bpf_verifier_env *env,
2493 				struct bpf_reg_state *regs,
2494 				u32 regno,
2495 				s32 s32_min,
2496 				s32 s32_max)
2497 {
2498 	struct bpf_reg_state *reg = regs + regno;
2499 
2500 	reg->s32_min_value = max_t(s32, reg->s32_min_value, s32_min);
2501 	reg->s32_max_value = min_t(s32, reg->s32_max_value, s32_max);
2502 
2503 	reg->smin_value = max_t(s64, reg->smin_value, s32_min);
2504 	reg->smax_value = min_t(s64, reg->smax_value, s32_max);
2505 
2506 	reg_bounds_sync(reg);
2507 
2508 	return reg_bounds_sanity_check(env, reg, "s32_range");
2509 }
2510 
bpf_mark_reg_not_init(const struct bpf_verifier_env * env,struct bpf_reg_state * reg)2511 void bpf_mark_reg_not_init(const struct bpf_verifier_env *env,
2512 			   struct bpf_reg_state *reg)
2513 {
2514 	__mark_reg_unknown(env, reg);
2515 	reg->type = NOT_INIT;
2516 }
2517 
mark_btf_ld_reg(struct bpf_verifier_env * env,struct bpf_reg_state * regs,u32 regno,enum bpf_reg_type reg_type,struct btf * btf,u32 btf_id,enum bpf_type_flag flag)2518 static int mark_btf_ld_reg(struct bpf_verifier_env *env,
2519 			   struct bpf_reg_state *regs, u32 regno,
2520 			   enum bpf_reg_type reg_type,
2521 			   struct btf *btf, u32 btf_id,
2522 			   enum bpf_type_flag flag)
2523 {
2524 	switch (reg_type) {
2525 	case SCALAR_VALUE:
2526 		mark_reg_unknown(env, regs, regno);
2527 		return 0;
2528 	case PTR_TO_BTF_ID:
2529 		mark_reg_known_zero(env, regs, regno);
2530 		regs[regno].type = PTR_TO_BTF_ID | flag;
2531 		regs[regno].btf = btf;
2532 		regs[regno].btf_id = btf_id;
2533 		if (type_may_be_null(flag))
2534 			regs[regno].id = ++env->id_gen;
2535 		return 0;
2536 	case PTR_TO_MEM:
2537 		mark_reg_known_zero(env, regs, regno);
2538 		regs[regno].type = PTR_TO_MEM | flag;
2539 		regs[regno].mem_size = 0;
2540 		return 0;
2541 	default:
2542 		verifier_bug(env, "unexpected reg_type %d in %s\n", reg_type, __func__);
2543 		return -EFAULT;
2544 	}
2545 }
2546 
2547 #define DEF_NOT_SUBREG	(0)
init_reg_state(struct bpf_verifier_env * env,struct bpf_func_state * state)2548 static void init_reg_state(struct bpf_verifier_env *env,
2549 			   struct bpf_func_state *state)
2550 {
2551 	struct bpf_reg_state *regs = state->regs;
2552 	int i;
2553 
2554 	for (i = 0; i < MAX_BPF_REG; i++) {
2555 		bpf_mark_reg_not_init(env, &regs[i]);
2556 		regs[i].subreg_def = DEF_NOT_SUBREG;
2557 	}
2558 
2559 	/* frame pointer */
2560 	regs[BPF_REG_FP].type = PTR_TO_STACK;
2561 	mark_reg_known_zero(env, regs, BPF_REG_FP);
2562 	regs[BPF_REG_FP].frameno = state->frameno;
2563 }
2564 
retval_range(s32 minval,s32 maxval)2565 static struct bpf_retval_range retval_range(s32 minval, s32 maxval)
2566 {
2567 	/*
2568 	 * return_32bit is set to false by default and set explicitly
2569 	 * by the caller when necessary.
2570 	 */
2571 	return (struct bpf_retval_range){ minval, maxval, false };
2572 }
2573 
init_func_state(struct bpf_verifier_env * env,struct bpf_func_state * state,int callsite,int frameno,int subprogno)2574 static void init_func_state(struct bpf_verifier_env *env,
2575 			    struct bpf_func_state *state,
2576 			    int callsite, int frameno, int subprogno)
2577 {
2578 	state->callsite = callsite;
2579 	state->frameno = frameno;
2580 	state->subprogno = subprogno;
2581 	state->callback_ret_range = retval_range(0, 0);
2582 	init_reg_state(env, state);
2583 	mark_verifier_state_scratched(env);
2584 }
2585 
2586 /* Similar to push_stack(), but for async callbacks */
push_async_cb(struct bpf_verifier_env * env,int insn_idx,int prev_insn_idx,int subprog,bool is_sleepable)2587 static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env,
2588 						int insn_idx, int prev_insn_idx,
2589 						int subprog, bool is_sleepable)
2590 {
2591 	struct bpf_verifier_stack_elem *elem;
2592 	struct bpf_func_state *frame;
2593 
2594 	elem = kzalloc_obj(struct bpf_verifier_stack_elem, GFP_KERNEL_ACCOUNT);
2595 	if (!elem)
2596 		return ERR_PTR(-ENOMEM);
2597 
2598 	elem->insn_idx = insn_idx;
2599 	elem->prev_insn_idx = prev_insn_idx;
2600 	elem->next = env->head;
2601 	elem->log_pos = env->log.end_pos;
2602 	env->head = elem;
2603 	env->stack_size++;
2604 	if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
2605 		verbose(env,
2606 			"The sequence of %d jumps is too complex for async cb.\n",
2607 			env->stack_size);
2608 		return ERR_PTR(-E2BIG);
2609 	}
2610 	/* Unlike push_stack() do not bpf_copy_verifier_state().
2611 	 * The caller state doesn't matter.
2612 	 * This is async callback. It starts in a fresh stack.
2613 	 * Initialize it similar to do_check_common().
2614 	 */
2615 	elem->st.branches = 1;
2616 	elem->st.in_sleepable = is_sleepable;
2617 	frame = kzalloc_obj(*frame, GFP_KERNEL_ACCOUNT);
2618 	if (!frame)
2619 		return ERR_PTR(-ENOMEM);
2620 	init_func_state(env, frame,
2621 			BPF_MAIN_FUNC /* callsite */,
2622 			0 /* frameno within this callchain */,
2623 			subprog /* subprog number within this prog */);
2624 	elem->st.frame[0] = frame;
2625 	return &elem->st;
2626 }
2627 
2628 
cmp_subprogs(const void * a,const void * b)2629 static int cmp_subprogs(const void *a, const void *b)
2630 {
2631 	return ((struct bpf_subprog_info *)a)->start -
2632 	       ((struct bpf_subprog_info *)b)->start;
2633 }
2634 
2635 /* Find subprogram that contains instruction at 'off' */
bpf_find_containing_subprog(struct bpf_verifier_env * env,int off)2636 struct bpf_subprog_info *bpf_find_containing_subprog(struct bpf_verifier_env *env, int off)
2637 {
2638 	struct bpf_subprog_info *vals = env->subprog_info;
2639 	int l, r, m;
2640 
2641 	if (off >= env->prog->len || off < 0 || env->subprog_cnt == 0)
2642 		return NULL;
2643 
2644 	l = 0;
2645 	r = env->subprog_cnt - 1;
2646 	while (l < r) {
2647 		m = l + (r - l + 1) / 2;
2648 		if (vals[m].start <= off)
2649 			l = m;
2650 		else
2651 			r = m - 1;
2652 	}
2653 	return &vals[l];
2654 }
2655 
2656 /* Find subprogram that starts exactly at 'off' */
bpf_find_subprog(struct bpf_verifier_env * env,int off)2657 int bpf_find_subprog(struct bpf_verifier_env *env, int off)
2658 {
2659 	struct bpf_subprog_info *p;
2660 
2661 	p = bpf_find_containing_subprog(env, off);
2662 	if (!p || p->start != off)
2663 		return -ENOENT;
2664 	return p - env->subprog_info;
2665 }
2666 
add_subprog(struct bpf_verifier_env * env,int off)2667 static int add_subprog(struct bpf_verifier_env *env, int off)
2668 {
2669 	int insn_cnt = env->prog->len;
2670 	int ret;
2671 
2672 	if (off >= insn_cnt || off < 0) {
2673 		verbose(env, "call to invalid destination\n");
2674 		return -EINVAL;
2675 	}
2676 	ret = bpf_find_subprog(env, off);
2677 	if (ret >= 0)
2678 		return ret;
2679 	if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
2680 		verbose(env, "too many subprograms\n");
2681 		return -E2BIG;
2682 	}
2683 	/* determine subprog starts. The end is one before the next starts */
2684 	env->subprog_info[env->subprog_cnt++].start = off;
2685 	sort(env->subprog_info, env->subprog_cnt,
2686 	     sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
2687 	return env->subprog_cnt - 1;
2688 }
2689 
bpf_find_exception_callback_insn_off(struct bpf_verifier_env * env)2690 static int bpf_find_exception_callback_insn_off(struct bpf_verifier_env *env)
2691 {
2692 	struct bpf_prog_aux *aux = env->prog->aux;
2693 	struct btf *btf = aux->btf;
2694 	const struct btf_type *t;
2695 	u32 main_btf_id, id;
2696 	const char *name;
2697 	int ret, i;
2698 
2699 	/* Non-zero func_info_cnt implies valid btf */
2700 	if (!aux->func_info_cnt)
2701 		return 0;
2702 	main_btf_id = aux->func_info[0].type_id;
2703 
2704 	t = btf_type_by_id(btf, main_btf_id);
2705 	if (!t) {
2706 		verbose(env, "invalid btf id for main subprog in func_info\n");
2707 		return -EINVAL;
2708 	}
2709 
2710 	name = btf_find_decl_tag_value(btf, t, -1, "exception_callback:");
2711 	if (IS_ERR(name)) {
2712 		ret = PTR_ERR(name);
2713 		/* If there is no tag present, there is no exception callback */
2714 		if (ret == -ENOENT)
2715 			ret = 0;
2716 		else if (ret == -EEXIST)
2717 			verbose(env, "multiple exception callback tags for main subprog\n");
2718 		return ret;
2719 	}
2720 
2721 	ret = btf_find_by_name_kind(btf, name, BTF_KIND_FUNC);
2722 	if (ret < 0) {
2723 		verbose(env, "exception callback '%s' could not be found in BTF\n", name);
2724 		return ret;
2725 	}
2726 	id = ret;
2727 	t = btf_type_by_id(btf, id);
2728 	if (btf_func_linkage(t) != BTF_FUNC_GLOBAL) {
2729 		verbose(env, "exception callback '%s' must have global linkage\n", name);
2730 		return -EINVAL;
2731 	}
2732 	ret = 0;
2733 	for (i = 0; i < aux->func_info_cnt; i++) {
2734 		if (aux->func_info[i].type_id != id)
2735 			continue;
2736 		ret = aux->func_info[i].insn_off;
2737 		/* Further func_info and subprog checks will also happen
2738 		 * later, so assume this is the right insn_off for now.
2739 		 */
2740 		if (!ret) {
2741 			verbose(env, "invalid exception callback insn_off in func_info: 0\n");
2742 			ret = -EINVAL;
2743 		}
2744 	}
2745 	if (!ret) {
2746 		verbose(env, "exception callback type id not found in func_info\n");
2747 		ret = -EINVAL;
2748 	}
2749 	return ret;
2750 }
2751 
2752 #define MAX_KFUNC_BTFS	256
2753 
2754 struct bpf_kfunc_btf {
2755 	struct btf *btf;
2756 	struct module *module;
2757 	u16 offset;
2758 };
2759 
2760 struct bpf_kfunc_btf_tab {
2761 	struct bpf_kfunc_btf descs[MAX_KFUNC_BTFS];
2762 	u32 nr_descs;
2763 };
2764 
kfunc_desc_cmp_by_id_off(const void * a,const void * b)2765 static int kfunc_desc_cmp_by_id_off(const void *a, const void *b)
2766 {
2767 	const struct bpf_kfunc_desc *d0 = a;
2768 	const struct bpf_kfunc_desc *d1 = b;
2769 
2770 	/* func_id is not greater than BTF_MAX_TYPE */
2771 	return d0->func_id - d1->func_id ?: d0->offset - d1->offset;
2772 }
2773 
kfunc_btf_cmp_by_off(const void * a,const void * b)2774 static int kfunc_btf_cmp_by_off(const void *a, const void *b)
2775 {
2776 	const struct bpf_kfunc_btf *d0 = a;
2777 	const struct bpf_kfunc_btf *d1 = b;
2778 
2779 	return d0->offset - d1->offset;
2780 }
2781 
2782 static struct bpf_kfunc_desc *
find_kfunc_desc(const struct bpf_prog * prog,u32 func_id,u16 offset)2783 find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset)
2784 {
2785 	struct bpf_kfunc_desc desc = {
2786 		.func_id = func_id,
2787 		.offset = offset,
2788 	};
2789 	struct bpf_kfunc_desc_tab *tab;
2790 
2791 	tab = prog->aux->kfunc_tab;
2792 	return bsearch(&desc, tab->descs, tab->nr_descs,
2793 		       sizeof(tab->descs[0]), kfunc_desc_cmp_by_id_off);
2794 }
2795 
bpf_get_kfunc_addr(const struct bpf_prog * prog,u32 func_id,u16 btf_fd_idx,u8 ** func_addr)2796 int bpf_get_kfunc_addr(const struct bpf_prog *prog, u32 func_id,
2797 		       u16 btf_fd_idx, u8 **func_addr)
2798 {
2799 	const struct bpf_kfunc_desc *desc;
2800 
2801 	desc = find_kfunc_desc(prog, func_id, btf_fd_idx);
2802 	if (!desc)
2803 		return -EFAULT;
2804 
2805 	*func_addr = (u8 *)desc->addr;
2806 	return 0;
2807 }
2808 
__find_kfunc_desc_btf(struct bpf_verifier_env * env,s16 offset)2809 static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
2810 					 s16 offset)
2811 {
2812 	struct bpf_kfunc_btf kf_btf = { .offset = offset };
2813 	struct bpf_kfunc_btf_tab *tab;
2814 	struct bpf_kfunc_btf *b;
2815 	struct module *mod;
2816 	struct btf *btf;
2817 	int btf_fd;
2818 
2819 	tab = env->prog->aux->kfunc_btf_tab;
2820 	b = bsearch(&kf_btf, tab->descs, tab->nr_descs,
2821 		    sizeof(tab->descs[0]), kfunc_btf_cmp_by_off);
2822 	if (!b) {
2823 		if (tab->nr_descs == MAX_KFUNC_BTFS) {
2824 			verbose(env, "too many different module BTFs\n");
2825 			return ERR_PTR(-E2BIG);
2826 		}
2827 
2828 		if (bpfptr_is_null(env->fd_array)) {
2829 			verbose(env, "kfunc offset > 0 without fd_array is invalid\n");
2830 			return ERR_PTR(-EPROTO);
2831 		}
2832 
2833 		if (copy_from_bpfptr_offset(&btf_fd, env->fd_array,
2834 					    offset * sizeof(btf_fd),
2835 					    sizeof(btf_fd)))
2836 			return ERR_PTR(-EFAULT);
2837 
2838 		btf = btf_get_by_fd(btf_fd);
2839 		if (IS_ERR(btf)) {
2840 			verbose(env, "invalid module BTF fd specified\n");
2841 			return btf;
2842 		}
2843 
2844 		if (!btf_is_module(btf)) {
2845 			verbose(env, "BTF fd for kfunc is not a module BTF\n");
2846 			btf_put(btf);
2847 			return ERR_PTR(-EINVAL);
2848 		}
2849 
2850 		mod = btf_try_get_module(btf);
2851 		if (!mod) {
2852 			btf_put(btf);
2853 			return ERR_PTR(-ENXIO);
2854 		}
2855 
2856 		b = &tab->descs[tab->nr_descs++];
2857 		b->btf = btf;
2858 		b->module = mod;
2859 		b->offset = offset;
2860 
2861 		/* sort() reorders entries by value, so b may no longer point
2862 		 * to the right entry after this
2863 		 */
2864 		sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
2865 		     kfunc_btf_cmp_by_off, NULL);
2866 	} else {
2867 		btf = b->btf;
2868 	}
2869 
2870 	return btf;
2871 }
2872 
bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab * tab)2873 void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab)
2874 {
2875 	if (!tab)
2876 		return;
2877 
2878 	while (tab->nr_descs--) {
2879 		module_put(tab->descs[tab->nr_descs].module);
2880 		btf_put(tab->descs[tab->nr_descs].btf);
2881 	}
2882 	kfree(tab);
2883 }
2884 
find_kfunc_desc_btf(struct bpf_verifier_env * env,s16 offset)2885 static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, s16 offset)
2886 {
2887 	if (offset) {
2888 		if (offset < 0) {
2889 			/* In the future, this can be allowed to increase limit
2890 			 * of fd index into fd_array, interpreted as u16.
2891 			 */
2892 			verbose(env, "negative offset disallowed for kernel module function call\n");
2893 			return ERR_PTR(-EINVAL);
2894 		}
2895 
2896 		return __find_kfunc_desc_btf(env, offset);
2897 	}
2898 	return btf_vmlinux ?: ERR_PTR(-ENOENT);
2899 }
2900 
2901 #define KF_IMPL_SUFFIX "_impl"
2902 
find_kfunc_impl_proto(struct bpf_verifier_env * env,struct btf * btf,const char * func_name)2903 static const struct btf_type *find_kfunc_impl_proto(struct bpf_verifier_env *env,
2904 						    struct btf *btf,
2905 						    const char *func_name)
2906 {
2907 	char *buf = env->tmp_str_buf;
2908 	const struct btf_type *func;
2909 	s32 impl_id;
2910 	int len;
2911 
2912 	len = snprintf(buf, TMP_STR_BUF_LEN, "%s%s", func_name, KF_IMPL_SUFFIX);
2913 	if (len < 0 || len >= TMP_STR_BUF_LEN) {
2914 		verbose(env, "function name %s%s is too long\n", func_name, KF_IMPL_SUFFIX);
2915 		return NULL;
2916 	}
2917 
2918 	impl_id = btf_find_by_name_kind(btf, buf, BTF_KIND_FUNC);
2919 	if (impl_id <= 0) {
2920 		verbose(env, "cannot find function %s in BTF\n", buf);
2921 		return NULL;
2922 	}
2923 
2924 	func = btf_type_by_id(btf, impl_id);
2925 
2926 	return btf_type_by_id(btf, func->type);
2927 }
2928 
fetch_kfunc_meta(struct bpf_verifier_env * env,s32 func_id,s16 offset,struct bpf_kfunc_meta * kfunc)2929 static int fetch_kfunc_meta(struct bpf_verifier_env *env,
2930 			    s32 func_id,
2931 			    s16 offset,
2932 			    struct bpf_kfunc_meta *kfunc)
2933 {
2934 	const struct btf_type *func, *func_proto;
2935 	const char *func_name;
2936 	u32 *kfunc_flags;
2937 	struct btf *btf;
2938 
2939 	if (func_id <= 0) {
2940 		verbose(env, "invalid kernel function btf_id %d\n", func_id);
2941 		return -EINVAL;
2942 	}
2943 
2944 	btf = find_kfunc_desc_btf(env, offset);
2945 	if (IS_ERR(btf)) {
2946 		verbose(env, "failed to find BTF for kernel function\n");
2947 		return PTR_ERR(btf);
2948 	}
2949 
2950 	/*
2951 	 * Note that kfunc_flags may be NULL at this point, which
2952 	 * means that we couldn't find func_id in any relevant
2953 	 * kfunc_id_set. This most likely indicates an invalid kfunc
2954 	 * call.  However we don't fail with an error here,
2955 	 * and let the caller decide what to do with NULL kfunc->flags.
2956 	 */
2957 	kfunc_flags = btf_kfunc_flags(btf, func_id, env->prog);
2958 
2959 	func = btf_type_by_id(btf, func_id);
2960 	if (!func || !btf_type_is_func(func)) {
2961 		verbose(env, "kernel btf_id %d is not a function\n", func_id);
2962 		return -EINVAL;
2963 	}
2964 
2965 	func_name = btf_name_by_offset(btf, func->name_off);
2966 
2967 	/*
2968 	 * An actual prototype of a kfunc with KF_IMPLICIT_ARGS flag
2969 	 * can be found through the counterpart _impl kfunc.
2970 	 */
2971 	if (kfunc_flags && (*kfunc_flags & KF_IMPLICIT_ARGS))
2972 		func_proto = find_kfunc_impl_proto(env, btf, func_name);
2973 	else
2974 		func_proto = btf_type_by_id(btf, func->type);
2975 
2976 	if (!func_proto || !btf_type_is_func_proto(func_proto)) {
2977 		verbose(env, "kernel function btf_id %d does not have a valid func_proto\n",
2978 			func_id);
2979 		return -EINVAL;
2980 	}
2981 
2982 	memset(kfunc, 0, sizeof(*kfunc));
2983 	kfunc->btf = btf;
2984 	kfunc->id = func_id;
2985 	kfunc->name = func_name;
2986 	kfunc->proto = func_proto;
2987 	kfunc->flags = kfunc_flags;
2988 
2989 	return 0;
2990 }
2991 
bpf_add_kfunc_call(struct bpf_verifier_env * env,u32 func_id,u16 offset)2992 int bpf_add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, u16 offset)
2993 {
2994 	struct bpf_kfunc_btf_tab *btf_tab;
2995 	struct btf_func_model func_model;
2996 	struct bpf_kfunc_desc_tab *tab;
2997 	struct bpf_prog_aux *prog_aux;
2998 	struct bpf_kfunc_meta kfunc;
2999 	struct bpf_kfunc_desc *desc;
3000 	unsigned long addr;
3001 	int err;
3002 
3003 	prog_aux = env->prog->aux;
3004 	tab = prog_aux->kfunc_tab;
3005 	btf_tab = prog_aux->kfunc_btf_tab;
3006 	if (!tab) {
3007 		if (!btf_vmlinux) {
3008 			verbose(env, "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n");
3009 			return -ENOTSUPP;
3010 		}
3011 
3012 		if (!env->prog->jit_requested) {
3013 			verbose(env, "JIT is required for calling kernel function\n");
3014 			return -ENOTSUPP;
3015 		}
3016 
3017 		if (!bpf_jit_supports_kfunc_call()) {
3018 			verbose(env, "JIT does not support calling kernel function\n");
3019 			return -ENOTSUPP;
3020 		}
3021 
3022 		if (!env->prog->gpl_compatible) {
3023 			verbose(env, "cannot call kernel function from non-GPL compatible program\n");
3024 			return -EINVAL;
3025 		}
3026 
3027 		tab = kzalloc_obj(*tab, GFP_KERNEL_ACCOUNT);
3028 		if (!tab)
3029 			return -ENOMEM;
3030 		prog_aux->kfunc_tab = tab;
3031 	}
3032 
3033 	/* func_id == 0 is always invalid, but instead of returning an error, be
3034 	 * conservative and wait until the code elimination pass before returning
3035 	 * error, so that invalid calls that get pruned out can be in BPF programs
3036 	 * loaded from userspace.  It is also required that offset be untouched
3037 	 * for such calls.
3038 	 */
3039 	if (!func_id && !offset)
3040 		return 0;
3041 
3042 	if (!btf_tab && offset) {
3043 		btf_tab = kzalloc_obj(*btf_tab, GFP_KERNEL_ACCOUNT);
3044 		if (!btf_tab)
3045 			return -ENOMEM;
3046 		prog_aux->kfunc_btf_tab = btf_tab;
3047 	}
3048 
3049 	if (find_kfunc_desc(env->prog, func_id, offset))
3050 		return 0;
3051 
3052 	if (tab->nr_descs == MAX_KFUNC_DESCS) {
3053 		verbose(env, "too many different kernel function calls\n");
3054 		return -E2BIG;
3055 	}
3056 
3057 	err = fetch_kfunc_meta(env, func_id, offset, &kfunc);
3058 	if (err)
3059 		return err;
3060 
3061 	addr = kallsyms_lookup_name(kfunc.name);
3062 	if (!addr) {
3063 		verbose(env, "cannot find address for kernel function %s\n", kfunc.name);
3064 		return -EINVAL;
3065 	}
3066 
3067 	if (bpf_dev_bound_kfunc_id(func_id)) {
3068 		err = bpf_dev_bound_kfunc_check(&env->log, prog_aux);
3069 		if (err)
3070 			return err;
3071 	}
3072 
3073 	err = btf_distill_func_proto(&env->log, kfunc.btf, kfunc.proto, kfunc.name, &func_model);
3074 	if (err)
3075 		return err;
3076 
3077 	desc = &tab->descs[tab->nr_descs++];
3078 	desc->func_id = func_id;
3079 	desc->offset = offset;
3080 	desc->addr = addr;
3081 	desc->func_model = func_model;
3082 	sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
3083 	     kfunc_desc_cmp_by_id_off, NULL);
3084 	return 0;
3085 }
3086 
bpf_prog_has_kfunc_call(const struct bpf_prog * prog)3087 bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog)
3088 {
3089 	return !!prog->aux->kfunc_tab;
3090 }
3091 
add_subprog_and_kfunc(struct bpf_verifier_env * env)3092 static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
3093 {
3094 	struct bpf_subprog_info *subprog = env->subprog_info;
3095 	int i, ret, insn_cnt = env->prog->len, ex_cb_insn;
3096 	struct bpf_insn *insn = env->prog->insnsi;
3097 
3098 	/* Add entry function. */
3099 	ret = add_subprog(env, 0);
3100 	if (ret)
3101 		return ret;
3102 
3103 	for (i = 0; i < insn_cnt; i++, insn++) {
3104 		if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn) &&
3105 		    !bpf_pseudo_kfunc_call(insn))
3106 			continue;
3107 
3108 		if (!env->bpf_capable) {
3109 			verbose(env, "loading/calling other bpf or kernel functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
3110 			return -EPERM;
3111 		}
3112 
3113 		if (bpf_pseudo_func(insn) || bpf_pseudo_call(insn))
3114 			ret = add_subprog(env, i + insn->imm + 1);
3115 		else
3116 			ret = bpf_add_kfunc_call(env, insn->imm, insn->off);
3117 
3118 		if (ret < 0)
3119 			return ret;
3120 	}
3121 
3122 	ret = bpf_find_exception_callback_insn_off(env);
3123 	if (ret < 0)
3124 		return ret;
3125 	ex_cb_insn = ret;
3126 
3127 	/* If ex_cb_insn > 0, this means that the main program has a subprog
3128 	 * marked using BTF decl tag to serve as the exception callback.
3129 	 */
3130 	if (ex_cb_insn) {
3131 		ret = add_subprog(env, ex_cb_insn);
3132 		if (ret < 0)
3133 			return ret;
3134 		for (i = 1; i < env->subprog_cnt; i++) {
3135 			if (env->subprog_info[i].start != ex_cb_insn)
3136 				continue;
3137 			env->exception_callback_subprog = i;
3138 			bpf_mark_subprog_exc_cb(env, i);
3139 			break;
3140 		}
3141 	}
3142 
3143 	/* Add a fake 'exit' subprog which could simplify subprog iteration
3144 	 * logic. 'subprog_cnt' should not be increased.
3145 	 */
3146 	subprog[env->subprog_cnt].start = insn_cnt;
3147 
3148 	if (env->log.level & BPF_LOG_LEVEL2)
3149 		for (i = 0; i < env->subprog_cnt; i++)
3150 			verbose(env, "func#%d @%d\n", i, subprog[i].start);
3151 
3152 	return 0;
3153 }
3154 
check_subprogs(struct bpf_verifier_env * env)3155 static int check_subprogs(struct bpf_verifier_env *env)
3156 {
3157 	int i, subprog_start, subprog_end, off, cur_subprog = 0;
3158 	struct bpf_subprog_info *subprog = env->subprog_info;
3159 	struct bpf_insn *insn = env->prog->insnsi;
3160 	int insn_cnt = env->prog->len;
3161 
3162 	/* now check that all jumps are within the same subprog */
3163 	subprog_start = subprog[cur_subprog].start;
3164 	subprog_end = subprog[cur_subprog + 1].start;
3165 	for (i = 0; i < insn_cnt; i++) {
3166 		u8 code = insn[i].code;
3167 
3168 		if (code == (BPF_JMP | BPF_CALL) &&
3169 		    insn[i].src_reg == 0 &&
3170 		    insn[i].imm == BPF_FUNC_tail_call) {
3171 			subprog[cur_subprog].has_tail_call = true;
3172 			subprog[cur_subprog].tail_call_reachable = true;
3173 		}
3174 		if (BPF_CLASS(code) == BPF_LD &&
3175 		    (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND))
3176 			subprog[cur_subprog].has_ld_abs = true;
3177 		if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
3178 			goto next;
3179 		if (BPF_OP(code) == BPF_CALL)
3180 			goto next;
3181 		if (BPF_OP(code) == BPF_EXIT) {
3182 			subprog[cur_subprog].exit_idx = i;
3183 			goto next;
3184 		}
3185 		off = i + bpf_jmp_offset(&insn[i]) + 1;
3186 		if (off < subprog_start || off >= subprog_end) {
3187 			verbose(env, "jump out of range from insn %d to %d\n", i, off);
3188 			return -EINVAL;
3189 		}
3190 next:
3191 		if (i == subprog_end - 1) {
3192 			/* to avoid fall-through from one subprog into another
3193 			 * the last insn of the subprog should be either exit
3194 			 * or unconditional jump back or bpf_throw call
3195 			 */
3196 			if (code != (BPF_JMP | BPF_EXIT) &&
3197 			    code != (BPF_JMP32 | BPF_JA) &&
3198 			    code != (BPF_JMP | BPF_JA)) {
3199 				verbose(env, "last insn is not an exit or jmp\n");
3200 				return -EINVAL;
3201 			}
3202 			subprog_start = subprog_end;
3203 			cur_subprog++;
3204 			if (cur_subprog < env->subprog_cnt)
3205 				subprog_end = subprog[cur_subprog + 1].start;
3206 		}
3207 	}
3208 	return 0;
3209 }
3210 
3211 /*
3212  * Sort subprogs in topological order so that leaf subprogs come first and
3213  * their callers come later. This is a DFS post-order traversal of the call
3214  * graph. Scan only reachable instructions (those in the computed postorder) of
3215  * the current subprog to discover callees (direct subprogs and sync
3216  * callbacks).
3217  */
sort_subprogs_topo(struct bpf_verifier_env * env)3218 static int sort_subprogs_topo(struct bpf_verifier_env *env)
3219 {
3220 	struct bpf_subprog_info *si = env->subprog_info;
3221 	int *insn_postorder = env->cfg.insn_postorder;
3222 	struct bpf_insn *insn = env->prog->insnsi;
3223 	int cnt = env->subprog_cnt;
3224 	int *dfs_stack = NULL;
3225 	int top = 0, order = 0;
3226 	int i, ret = 0;
3227 	u8 *color = NULL;
3228 
3229 	color = kvzalloc_objs(*color, cnt, GFP_KERNEL_ACCOUNT);
3230 	dfs_stack = kvmalloc_objs(*dfs_stack, cnt, GFP_KERNEL_ACCOUNT);
3231 	if (!color || !dfs_stack) {
3232 		ret = -ENOMEM;
3233 		goto out;
3234 	}
3235 
3236 	/*
3237 	 * DFS post-order traversal.
3238 	 * Color values: 0 = unvisited, 1 = on stack, 2 = done.
3239 	 */
3240 	for (i = 0; i < cnt; i++) {
3241 		if (color[i])
3242 			continue;
3243 		color[i] = 1;
3244 		dfs_stack[top++] = i;
3245 
3246 		while (top > 0) {
3247 			int cur = dfs_stack[top - 1];
3248 			int po_start = si[cur].postorder_start;
3249 			int po_end = si[cur + 1].postorder_start;
3250 			bool pushed = false;
3251 			int j;
3252 
3253 			for (j = po_start; j < po_end; j++) {
3254 				int idx = insn_postorder[j];
3255 				int callee;
3256 
3257 				if (!bpf_pseudo_call(&insn[idx]) && !bpf_pseudo_func(&insn[idx]))
3258 					continue;
3259 				callee = bpf_find_subprog(env, idx + insn[idx].imm + 1);
3260 				if (callee < 0) {
3261 					ret = -EFAULT;
3262 					goto out;
3263 				}
3264 				if (color[callee] == 2)
3265 					continue;
3266 				if (color[callee] == 1) {
3267 					if (bpf_pseudo_func(&insn[idx]))
3268 						continue;
3269 					verbose(env, "recursive call from %s() to %s()\n",
3270 						subprog_name(env, cur),
3271 						subprog_name(env, callee));
3272 					ret = -EINVAL;
3273 					goto out;
3274 				}
3275 				color[callee] = 1;
3276 				dfs_stack[top++] = callee;
3277 				pushed = true;
3278 				break;
3279 			}
3280 
3281 			if (!pushed) {
3282 				color[cur] = 2;
3283 				env->subprog_topo_order[order++] = cur;
3284 				top--;
3285 			}
3286 		}
3287 	}
3288 
3289 	if (env->log.level & BPF_LOG_LEVEL2)
3290 		for (i = 0; i < cnt; i++)
3291 			verbose(env, "topo_order[%d] = %s\n",
3292 				i, subprog_name(env, env->subprog_topo_order[i]));
3293 out:
3294 	kvfree(dfs_stack);
3295 	kvfree(color);
3296 	return ret;
3297 }
3298 
mark_stack_slot_obj_read(struct bpf_verifier_env * env,struct bpf_reg_state * reg,int spi,int nr_slots)3299 static int mark_stack_slot_obj_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
3300 				    int spi, int nr_slots)
3301 {
3302 	int i;
3303 
3304 	for (i = 0; i < nr_slots; i++)
3305 		mark_stack_slot_scratched(env, spi - i);
3306 	return 0;
3307 }
3308 
mark_dynptr_read(struct bpf_verifier_env * env,struct bpf_reg_state * reg)3309 static int mark_dynptr_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
3310 {
3311 	int spi;
3312 
3313 	/* For CONST_PTR_TO_DYNPTR, it must have already been done by
3314 	 * check_reg_arg in check_helper_call and mark_btf_func_reg_size in
3315 	 * check_kfunc_call.
3316 	 */
3317 	if (reg->type == CONST_PTR_TO_DYNPTR)
3318 		return 0;
3319 	spi = dynptr_get_spi(env, reg);
3320 	if (spi < 0)
3321 		return spi;
3322 	/* Caller ensures dynptr is valid and initialized, which means spi is in
3323 	 * bounds and spi is the first dynptr slot. Simply mark stack slot as
3324 	 * read.
3325 	 */
3326 	return mark_stack_slot_obj_read(env, reg, spi, BPF_DYNPTR_NR_SLOTS);
3327 }
3328 
mark_iter_read(struct bpf_verifier_env * env,struct bpf_reg_state * reg,int spi,int nr_slots)3329 static int mark_iter_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
3330 			  int spi, int nr_slots)
3331 {
3332 	return mark_stack_slot_obj_read(env, reg, spi, nr_slots);
3333 }
3334 
mark_irq_flag_read(struct bpf_verifier_env * env,struct bpf_reg_state * reg)3335 static int mark_irq_flag_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
3336 {
3337 	int spi;
3338 
3339 	spi = irq_flag_get_spi(env, reg);
3340 	if (spi < 0)
3341 		return spi;
3342 	return mark_stack_slot_obj_read(env, reg, spi, 1);
3343 }
3344 
3345 /* This function is supposed to be used by the following 32-bit optimization
3346  * code only. It returns TRUE if the source or destination register operates
3347  * on 64-bit, otherwise return FALSE.
3348  */
bpf_is_reg64(struct bpf_insn * insn,u32 regno,struct bpf_reg_state * reg,enum bpf_reg_arg_type t)3349 bool bpf_is_reg64(struct bpf_insn *insn,
3350 	      u32 regno, struct bpf_reg_state *reg, enum bpf_reg_arg_type t)
3351 {
3352 	u8 code, class, op;
3353 
3354 	code = insn->code;
3355 	class = BPF_CLASS(code);
3356 	op = BPF_OP(code);
3357 	if (class == BPF_JMP) {
3358 		/* BPF_EXIT for "main" will reach here. Return TRUE
3359 		 * conservatively.
3360 		 */
3361 		if (op == BPF_EXIT)
3362 			return true;
3363 		if (op == BPF_CALL) {
3364 			/* BPF to BPF call will reach here because of marking
3365 			 * caller saved clobber with DST_OP_NO_MARK for which we
3366 			 * don't care the register def because they are anyway
3367 			 * marked as NOT_INIT already.
3368 			 */
3369 			if (insn->src_reg == BPF_PSEUDO_CALL)
3370 				return false;
3371 			/* Helper call will reach here because of arg type
3372 			 * check, conservatively return TRUE.
3373 			 */
3374 			if (t == SRC_OP)
3375 				return true;
3376 
3377 			return false;
3378 		}
3379 	}
3380 
3381 	if (class == BPF_ALU64 && op == BPF_END && (insn->imm == 16 || insn->imm == 32))
3382 		return false;
3383 
3384 	if (class == BPF_ALU64 || class == BPF_JMP ||
3385 	    (class == BPF_ALU && op == BPF_END && insn->imm == 64))
3386 		return true;
3387 
3388 	if (class == BPF_ALU || class == BPF_JMP32)
3389 		return false;
3390 
3391 	if (class == BPF_LDX) {
3392 		if (t != SRC_OP)
3393 			return BPF_SIZE(code) == BPF_DW || BPF_MODE(code) == BPF_MEMSX;
3394 		/* LDX source must be ptr. */
3395 		return true;
3396 	}
3397 
3398 	if (class == BPF_STX) {
3399 		/* BPF_STX (including atomic variants) has one or more source
3400 		 * operands, one of which is a ptr. Check whether the caller is
3401 		 * asking about it.
3402 		 */
3403 		if (t == SRC_OP && reg->type != SCALAR_VALUE)
3404 			return true;
3405 		return BPF_SIZE(code) == BPF_DW;
3406 	}
3407 
3408 	if (class == BPF_LD) {
3409 		u8 mode = BPF_MODE(code);
3410 
3411 		/* LD_IMM64 */
3412 		if (mode == BPF_IMM)
3413 			return true;
3414 
3415 		/* Both LD_IND and LD_ABS return 32-bit data. */
3416 		if (t != SRC_OP)
3417 			return  false;
3418 
3419 		/* Implicit ctx ptr. */
3420 		if (regno == BPF_REG_6)
3421 			return true;
3422 
3423 		/* Explicit source could be any width. */
3424 		return true;
3425 	}
3426 
3427 	if (class == BPF_ST)
3428 		/* The only source register for BPF_ST is a ptr. */
3429 		return true;
3430 
3431 	/* Conservatively return true at default. */
3432 	return true;
3433 }
3434 
mark_insn_zext(struct bpf_verifier_env * env,struct bpf_reg_state * reg)3435 static void mark_insn_zext(struct bpf_verifier_env *env,
3436 			   struct bpf_reg_state *reg)
3437 {
3438 	s32 def_idx = reg->subreg_def;
3439 
3440 	if (def_idx == DEF_NOT_SUBREG)
3441 		return;
3442 
3443 	env->insn_aux_data[def_idx - 1].zext_dst = true;
3444 	/* The dst will be zero extended, so won't be sub-register anymore. */
3445 	reg->subreg_def = DEF_NOT_SUBREG;
3446 }
3447 
__check_reg_arg(struct bpf_verifier_env * env,struct bpf_reg_state * regs,u32 regno,enum bpf_reg_arg_type t)3448 static int __check_reg_arg(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno,
3449 			   enum bpf_reg_arg_type t)
3450 {
3451 	struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
3452 	struct bpf_reg_state *reg;
3453 	bool rw64;
3454 
3455 	mark_reg_scratched(env, regno);
3456 
3457 	reg = &regs[regno];
3458 	rw64 = bpf_is_reg64(insn, regno, reg, t);
3459 	if (t == SRC_OP) {
3460 		/* check whether register used as source operand can be read */
3461 		if (reg->type == NOT_INIT) {
3462 			verbose(env, "R%d !read_ok\n", regno);
3463 			return -EACCES;
3464 		}
3465 		/* We don't need to worry about FP liveness because it's read-only */
3466 		if (regno == BPF_REG_FP)
3467 			return 0;
3468 
3469 		if (rw64)
3470 			mark_insn_zext(env, reg);
3471 
3472 		return 0;
3473 	} else {
3474 		/* check whether register used as dest operand can be written to */
3475 		if (regno == BPF_REG_FP) {
3476 			verbose(env, "frame pointer is read only\n");
3477 			return -EACCES;
3478 		}
3479 		reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1;
3480 		if (t == DST_OP)
3481 			mark_reg_unknown(env, regs, regno);
3482 	}
3483 	return 0;
3484 }
3485 
check_reg_arg(struct bpf_verifier_env * env,u32 regno,enum bpf_reg_arg_type t)3486 static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
3487 			 enum bpf_reg_arg_type t)
3488 {
3489 	struct bpf_verifier_state *vstate = env->cur_state;
3490 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
3491 
3492 	return __check_reg_arg(env, state->regs, regno, t);
3493 }
3494 
insn_stack_access_flags(int frameno,int spi)3495 static int insn_stack_access_flags(int frameno, int spi)
3496 {
3497 	return INSN_F_STACK_ACCESS | (spi << INSN_F_SPI_SHIFT) | frameno;
3498 }
3499 
3500 #define LR_FRAMENO_BITS	3
3501 #define LR_SPI_BITS	6
3502 #define LR_ENTRY_BITS	(LR_SPI_BITS + LR_FRAMENO_BITS + 1)
3503 #define LR_SIZE_BITS	4
3504 #define LR_FRAMENO_MASK	((1ull << LR_FRAMENO_BITS) - 1)
3505 #define LR_SPI_MASK	((1ull << LR_SPI_BITS)     - 1)
3506 #define LR_SIZE_MASK	((1ull << LR_SIZE_BITS)    - 1)
3507 #define LR_SPI_OFF	LR_FRAMENO_BITS
3508 #define LR_IS_REG_OFF	(LR_SPI_BITS + LR_FRAMENO_BITS)
3509 #define LINKED_REGS_MAX	6
3510 
3511 struct linked_reg {
3512 	u8 frameno;
3513 	union {
3514 		u8 spi;
3515 		u8 regno;
3516 	};
3517 	bool is_reg;
3518 };
3519 
3520 struct linked_regs {
3521 	int cnt;
3522 	struct linked_reg entries[LINKED_REGS_MAX];
3523 };
3524 
linked_regs_push(struct linked_regs * s)3525 static struct linked_reg *linked_regs_push(struct linked_regs *s)
3526 {
3527 	if (s->cnt < LINKED_REGS_MAX)
3528 		return &s->entries[s->cnt++];
3529 
3530 	return NULL;
3531 }
3532 
3533 /* Use u64 as a vector of 6 10-bit values, use first 4-bits to track
3534  * number of elements currently in stack.
3535  * Pack one history entry for linked registers as 10 bits in the following format:
3536  * - 3-bits frameno
3537  * - 6-bits spi_or_reg
3538  * - 1-bit  is_reg
3539  */
linked_regs_pack(struct linked_regs * s)3540 static u64 linked_regs_pack(struct linked_regs *s)
3541 {
3542 	u64 val = 0;
3543 	int i;
3544 
3545 	for (i = 0; i < s->cnt; ++i) {
3546 		struct linked_reg *e = &s->entries[i];
3547 		u64 tmp = 0;
3548 
3549 		tmp |= e->frameno;
3550 		tmp |= e->spi << LR_SPI_OFF;
3551 		tmp |= (e->is_reg ? 1 : 0) << LR_IS_REG_OFF;
3552 
3553 		val <<= LR_ENTRY_BITS;
3554 		val |= tmp;
3555 	}
3556 	val <<= LR_SIZE_BITS;
3557 	val |= s->cnt;
3558 	return val;
3559 }
3560 
linked_regs_unpack(u64 val,struct linked_regs * s)3561 static void linked_regs_unpack(u64 val, struct linked_regs *s)
3562 {
3563 	int i;
3564 
3565 	s->cnt = val & LR_SIZE_MASK;
3566 	val >>= LR_SIZE_BITS;
3567 
3568 	for (i = 0; i < s->cnt; ++i) {
3569 		struct linked_reg *e = &s->entries[i];
3570 
3571 		e->frameno =  val & LR_FRAMENO_MASK;
3572 		e->spi     = (val >> LR_SPI_OFF) & LR_SPI_MASK;
3573 		e->is_reg  = (val >> LR_IS_REG_OFF) & 0x1;
3574 		val >>= LR_ENTRY_BITS;
3575 	}
3576 }
3577 
disasm_kfunc_name(void * data,const struct bpf_insn * insn)3578 static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
3579 {
3580 	const struct btf_type *func;
3581 	struct btf *desc_btf;
3582 
3583 	if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL)
3584 		return NULL;
3585 
3586 	desc_btf = find_kfunc_desc_btf(data, insn->off);
3587 	if (IS_ERR(desc_btf))
3588 		return "<error>";
3589 
3590 	func = btf_type_by_id(desc_btf, insn->imm);
3591 	return btf_name_by_offset(desc_btf, func->name_off);
3592 }
3593 
bpf_verbose_insn(struct bpf_verifier_env * env,struct bpf_insn * insn)3594 void bpf_verbose_insn(struct bpf_verifier_env *env, struct bpf_insn *insn)
3595 {
3596 	const struct bpf_insn_cbs cbs = {
3597 		.cb_call	= disasm_kfunc_name,
3598 		.cb_print	= verbose,
3599 		.private_data	= env,
3600 	};
3601 
3602 	print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
3603 }
3604 
3605 /* If any register R in hist->linked_regs is marked as precise in bt,
3606  * do bt_set_frame_{reg,slot}(bt, R) for all registers in hist->linked_regs.
3607  */
bpf_bt_sync_linked_regs(struct backtrack_state * bt,struct bpf_jmp_history_entry * hist)3608 void bpf_bt_sync_linked_regs(struct backtrack_state *bt, struct bpf_jmp_history_entry *hist)
3609 {
3610 	struct linked_regs linked_regs;
3611 	bool some_precise = false;
3612 	int i;
3613 
3614 	if (!hist || hist->linked_regs == 0)
3615 		return;
3616 
3617 	linked_regs_unpack(hist->linked_regs, &linked_regs);
3618 	for (i = 0; i < linked_regs.cnt; ++i) {
3619 		struct linked_reg *e = &linked_regs.entries[i];
3620 
3621 		if ((e->is_reg && bt_is_frame_reg_set(bt, e->frameno, e->regno)) ||
3622 		    (!e->is_reg && bt_is_frame_slot_set(bt, e->frameno, e->spi))) {
3623 			some_precise = true;
3624 			break;
3625 		}
3626 	}
3627 
3628 	if (!some_precise)
3629 		return;
3630 
3631 	for (i = 0; i < linked_regs.cnt; ++i) {
3632 		struct linked_reg *e = &linked_regs.entries[i];
3633 
3634 		if (e->is_reg)
3635 			bpf_bt_set_frame_reg(bt, e->frameno, e->regno);
3636 		else
3637 			bpf_bt_set_frame_slot(bt, e->frameno, e->spi);
3638 	}
3639 }
3640 
mark_chain_precision(struct bpf_verifier_env * env,int regno)3641 int mark_chain_precision(struct bpf_verifier_env *env, int regno)
3642 {
3643 	return bpf_mark_chain_precision(env, env->cur_state, regno, NULL);
3644 }
3645 
3646 /* mark_chain_precision_batch() assumes that env->bt is set in the caller to
3647  * desired reg and stack masks across all relevant frames
3648  */
mark_chain_precision_batch(struct bpf_verifier_env * env,struct bpf_verifier_state * starting_state)3649 static int mark_chain_precision_batch(struct bpf_verifier_env *env,
3650 				      struct bpf_verifier_state *starting_state)
3651 {
3652 	return bpf_mark_chain_precision(env, starting_state, -1, NULL);
3653 }
3654 
is_spillable_regtype(enum bpf_reg_type type)3655 static bool is_spillable_regtype(enum bpf_reg_type type)
3656 {
3657 	switch (base_type(type)) {
3658 	case PTR_TO_MAP_VALUE:
3659 	case PTR_TO_STACK:
3660 	case PTR_TO_CTX:
3661 	case PTR_TO_PACKET:
3662 	case PTR_TO_PACKET_META:
3663 	case PTR_TO_PACKET_END:
3664 	case PTR_TO_FLOW_KEYS:
3665 	case CONST_PTR_TO_MAP:
3666 	case PTR_TO_SOCKET:
3667 	case PTR_TO_SOCK_COMMON:
3668 	case PTR_TO_TCP_SOCK:
3669 	case PTR_TO_XDP_SOCK:
3670 	case PTR_TO_BTF_ID:
3671 	case PTR_TO_BUF:
3672 	case PTR_TO_MEM:
3673 	case PTR_TO_FUNC:
3674 	case PTR_TO_MAP_KEY:
3675 	case PTR_TO_ARENA:
3676 		return true;
3677 	default:
3678 		return false;
3679 	}
3680 }
3681 
3682 
3683 /* check if register is a constant scalar value */
is_reg_const(struct bpf_reg_state * reg,bool subreg32)3684 static bool is_reg_const(struct bpf_reg_state *reg, bool subreg32)
3685 {
3686 	return reg->type == SCALAR_VALUE &&
3687 	       tnum_is_const(subreg32 ? tnum_subreg(reg->var_off) : reg->var_off);
3688 }
3689 
3690 /* assuming is_reg_const() is true, return constant value of a register */
reg_const_value(struct bpf_reg_state * reg,bool subreg32)3691 static u64 reg_const_value(struct bpf_reg_state *reg, bool subreg32)
3692 {
3693 	return subreg32 ? tnum_subreg(reg->var_off).value : reg->var_off.value;
3694 }
3695 
__is_pointer_value(bool allow_ptr_leaks,const struct bpf_reg_state * reg)3696 static bool __is_pointer_value(bool allow_ptr_leaks,
3697 			       const struct bpf_reg_state *reg)
3698 {
3699 	if (allow_ptr_leaks)
3700 		return false;
3701 
3702 	return reg->type != SCALAR_VALUE;
3703 }
3704 
clear_scalar_id(struct bpf_reg_state * reg)3705 static void clear_scalar_id(struct bpf_reg_state *reg)
3706 {
3707 	reg->id = 0;
3708 	reg->delta = 0;
3709 }
3710 
assign_scalar_id_before_mov(struct bpf_verifier_env * env,struct bpf_reg_state * src_reg)3711 static void assign_scalar_id_before_mov(struct bpf_verifier_env *env,
3712 					struct bpf_reg_state *src_reg)
3713 {
3714 	if (src_reg->type != SCALAR_VALUE)
3715 		return;
3716 	/*
3717 	 * The verifier is processing rX = rY insn and
3718 	 * rY->id has special linked register already.
3719 	 * Cleared it, since multiple rX += const are not supported.
3720 	 */
3721 	if (src_reg->id & BPF_ADD_CONST)
3722 		clear_scalar_id(src_reg);
3723 	/*
3724 	 * Ensure that src_reg has a valid ID that will be copied to
3725 	 * dst_reg and then will be used by sync_linked_regs() to
3726 	 * propagate min/max range.
3727 	 */
3728 	if (!src_reg->id && !tnum_is_const(src_reg->var_off))
3729 		src_reg->id = ++env->id_gen;
3730 }
3731 
3732 /* Copy src state preserving dst->parent and dst->live fields */
copy_register_state(struct bpf_reg_state * dst,const struct bpf_reg_state * src)3733 static void copy_register_state(struct bpf_reg_state *dst, const struct bpf_reg_state *src)
3734 {
3735 	*dst = *src;
3736 }
3737 
save_register_state(struct bpf_verifier_env * env,struct bpf_func_state * state,int spi,struct bpf_reg_state * reg,int size)3738 static void save_register_state(struct bpf_verifier_env *env,
3739 				struct bpf_func_state *state,
3740 				int spi, struct bpf_reg_state *reg,
3741 				int size)
3742 {
3743 	int i;
3744 
3745 	copy_register_state(&state->stack[spi].spilled_ptr, reg);
3746 
3747 	for (i = BPF_REG_SIZE; i > BPF_REG_SIZE - size; i--)
3748 		state->stack[spi].slot_type[i - 1] = STACK_SPILL;
3749 
3750 	/* size < 8 bytes spill */
3751 	for (; i; i--)
3752 		mark_stack_slot_misc(env, &state->stack[spi].slot_type[i - 1]);
3753 }
3754 
is_bpf_st_mem(struct bpf_insn * insn)3755 static bool is_bpf_st_mem(struct bpf_insn *insn)
3756 {
3757 	return BPF_CLASS(insn->code) == BPF_ST && BPF_MODE(insn->code) == BPF_MEM;
3758 }
3759 
get_reg_width(struct bpf_reg_state * reg)3760 static int get_reg_width(struct bpf_reg_state *reg)
3761 {
3762 	return fls64(reg->umax_value);
3763 }
3764 
3765 /* See comment for mark_fastcall_pattern_for_call() */
check_fastcall_stack_contract(struct bpf_verifier_env * env,struct bpf_func_state * state,int insn_idx,int off)3766 static void check_fastcall_stack_contract(struct bpf_verifier_env *env,
3767 					  struct bpf_func_state *state, int insn_idx, int off)
3768 {
3769 	struct bpf_subprog_info *subprog = &env->subprog_info[state->subprogno];
3770 	struct bpf_insn_aux_data *aux = env->insn_aux_data;
3771 	int i;
3772 
3773 	if (subprog->fastcall_stack_off <= off || aux[insn_idx].fastcall_pattern)
3774 		return;
3775 	/* access to the region [max_stack_depth .. fastcall_stack_off)
3776 	 * from something that is not a part of the fastcall pattern,
3777 	 * disable fastcall rewrites for current subprogram by setting
3778 	 * fastcall_stack_off to a value smaller than any possible offset.
3779 	 */
3780 	subprog->fastcall_stack_off = S16_MIN;
3781 	/* reset fastcall aux flags within subprogram,
3782 	 * happens at most once per subprogram
3783 	 */
3784 	for (i = subprog->start; i < (subprog + 1)->start; ++i) {
3785 		aux[i].fastcall_spills_num = 0;
3786 		aux[i].fastcall_pattern = 0;
3787 	}
3788 }
3789 
scrub_special_slot(struct bpf_func_state * state,int spi)3790 static void scrub_special_slot(struct bpf_func_state *state, int spi)
3791 {
3792 	int i;
3793 
3794 	/* regular write of data into stack destroys any spilled ptr */
3795 	state->stack[spi].spilled_ptr.type = NOT_INIT;
3796 	/* Mark slots as STACK_MISC if they belonged to spilled ptr/dynptr/iter. */
3797 	if (is_stack_slot_special(&state->stack[spi]))
3798 		for (i = 0; i < BPF_REG_SIZE; i++)
3799 			scrub_spilled_slot(&state->stack[spi].slot_type[i]);
3800 }
3801 
3802 /* check_stack_{read,write}_fixed_off functions track spill/fill of registers,
3803  * stack boundary and alignment are checked in check_mem_access()
3804  */
check_stack_write_fixed_off(struct bpf_verifier_env * env,struct bpf_func_state * state,int off,int size,int value_regno,int insn_idx)3805 static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
3806 				       /* stack frame we're writing to */
3807 				       struct bpf_func_state *state,
3808 				       int off, int size, int value_regno,
3809 				       int insn_idx)
3810 {
3811 	struct bpf_func_state *cur; /* state of the current function */
3812 	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
3813 	struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
3814 	struct bpf_reg_state *reg = NULL;
3815 	int insn_flags = insn_stack_access_flags(state->frameno, spi);
3816 
3817 	/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
3818 	 * so it's aligned access and [off, off + size) are within stack limits
3819 	 */
3820 	if (!env->allow_ptr_leaks &&
3821 	    bpf_is_spilled_reg(&state->stack[spi]) &&
3822 	    !bpf_is_spilled_scalar_reg(&state->stack[spi]) &&
3823 	    size != BPF_REG_SIZE) {
3824 		verbose(env, "attempt to corrupt spilled pointer on stack\n");
3825 		return -EACCES;
3826 	}
3827 
3828 	cur = env->cur_state->frame[env->cur_state->curframe];
3829 	if (value_regno >= 0)
3830 		reg = &cur->regs[value_regno];
3831 	if (!env->bypass_spec_v4) {
3832 		bool sanitize = reg && is_spillable_regtype(reg->type);
3833 
3834 		for (i = 0; i < size; i++) {
3835 			u8 type = state->stack[spi].slot_type[i];
3836 
3837 			if (type != STACK_MISC && type != STACK_ZERO) {
3838 				sanitize = true;
3839 				break;
3840 			}
3841 		}
3842 
3843 		if (sanitize)
3844 			env->insn_aux_data[insn_idx].nospec_result = true;
3845 	}
3846 
3847 	err = destroy_if_dynptr_stack_slot(env, state, spi);
3848 	if (err)
3849 		return err;
3850 
3851 	check_fastcall_stack_contract(env, state, insn_idx, off);
3852 	mark_stack_slot_scratched(env, spi);
3853 	if (reg && !(off % BPF_REG_SIZE) && reg->type == SCALAR_VALUE && env->bpf_capable) {
3854 		bool reg_value_fits;
3855 
3856 		reg_value_fits = get_reg_width(reg) <= BITS_PER_BYTE * size;
3857 		/* Make sure that reg had an ID to build a relation on spill. */
3858 		if (reg_value_fits)
3859 			assign_scalar_id_before_mov(env, reg);
3860 		save_register_state(env, state, spi, reg, size);
3861 		/* Break the relation on a narrowing spill. */
3862 		if (!reg_value_fits)
3863 			state->stack[spi].spilled_ptr.id = 0;
3864 	} else if (!reg && !(off % BPF_REG_SIZE) && is_bpf_st_mem(insn) &&
3865 		   env->bpf_capable) {
3866 		struct bpf_reg_state *tmp_reg = &env->fake_reg[0];
3867 
3868 		memset(tmp_reg, 0, sizeof(*tmp_reg));
3869 		__mark_reg_known(tmp_reg, insn->imm);
3870 		tmp_reg->type = SCALAR_VALUE;
3871 		save_register_state(env, state, spi, tmp_reg, size);
3872 	} else if (reg && is_spillable_regtype(reg->type)) {
3873 		/* register containing pointer is being spilled into stack */
3874 		if (size != BPF_REG_SIZE) {
3875 			verbose_linfo(env, insn_idx, "; ");
3876 			verbose(env, "invalid size of register spill\n");
3877 			return -EACCES;
3878 		}
3879 		if (state != cur && reg->type == PTR_TO_STACK) {
3880 			verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
3881 			return -EINVAL;
3882 		}
3883 		save_register_state(env, state, spi, reg, size);
3884 	} else {
3885 		u8 type = STACK_MISC;
3886 
3887 		scrub_special_slot(state, spi);
3888 
3889 		/* when we zero initialize stack slots mark them as such */
3890 		if ((reg && bpf_register_is_null(reg)) ||
3891 		    (!reg && is_bpf_st_mem(insn) && insn->imm == 0)) {
3892 			/* STACK_ZERO case happened because register spill
3893 			 * wasn't properly aligned at the stack slot boundary,
3894 			 * so it's not a register spill anymore; force
3895 			 * originating register to be precise to make
3896 			 * STACK_ZERO correct for subsequent states
3897 			 */
3898 			err = mark_chain_precision(env, value_regno);
3899 			if (err)
3900 				return err;
3901 			type = STACK_ZERO;
3902 		}
3903 
3904 		/* Mark slots affected by this stack write. */
3905 		for (i = 0; i < size; i++)
3906 			state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] = type;
3907 		insn_flags = 0; /* not a register spill */
3908 	}
3909 
3910 	if (insn_flags)
3911 		return bpf_push_jmp_history(env, env->cur_state, insn_flags, 0);
3912 	return 0;
3913 }
3914 
3915 /* Write the stack: 'stack[ptr_regno + off] = value_regno'. 'ptr_regno' is
3916  * known to contain a variable offset.
3917  * This function checks whether the write is permitted and conservatively
3918  * tracks the effects of the write, considering that each stack slot in the
3919  * dynamic range is potentially written to.
3920  *
3921  * 'value_regno' can be -1, meaning that an unknown value is being written to
3922  * the stack.
3923  *
3924  * Spilled pointers in range are not marked as written because we don't know
3925  * what's going to be actually written. This means that read propagation for
3926  * future reads cannot be terminated by this write.
3927  *
3928  * For privileged programs, uninitialized stack slots are considered
3929  * initialized by this write (even though we don't know exactly what offsets
3930  * are going to be written to). The idea is that we don't want the verifier to
3931  * reject future reads that access slots written to through variable offsets.
3932  */
check_stack_write_var_off(struct bpf_verifier_env * env,struct bpf_func_state * state,int ptr_regno,int off,int size,int value_regno,int insn_idx)3933 static int check_stack_write_var_off(struct bpf_verifier_env *env,
3934 				     /* func where register points to */
3935 				     struct bpf_func_state *state,
3936 				     int ptr_regno, int off, int size,
3937 				     int value_regno, int insn_idx)
3938 {
3939 	struct bpf_func_state *cur; /* state of the current function */
3940 	int min_off, max_off;
3941 	int i, err;
3942 	struct bpf_reg_state *ptr_reg = NULL, *value_reg = NULL;
3943 	struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
3944 	bool writing_zero = false;
3945 	/* set if the fact that we're writing a zero is used to let any
3946 	 * stack slots remain STACK_ZERO
3947 	 */
3948 	bool zero_used = false;
3949 
3950 	cur = env->cur_state->frame[env->cur_state->curframe];
3951 	ptr_reg = &cur->regs[ptr_regno];
3952 	min_off = ptr_reg->smin_value + off;
3953 	max_off = ptr_reg->smax_value + off + size;
3954 	if (value_regno >= 0)
3955 		value_reg = &cur->regs[value_regno];
3956 	if ((value_reg && bpf_register_is_null(value_reg)) ||
3957 	    (!value_reg && is_bpf_st_mem(insn) && insn->imm == 0))
3958 		writing_zero = true;
3959 
3960 	for (i = min_off; i < max_off; i++) {
3961 		int spi;
3962 
3963 		spi = bpf_get_spi(i);
3964 		err = destroy_if_dynptr_stack_slot(env, state, spi);
3965 		if (err)
3966 			return err;
3967 	}
3968 
3969 	check_fastcall_stack_contract(env, state, insn_idx, min_off);
3970 	/* Variable offset writes destroy any spilled pointers in range. */
3971 	for (i = min_off; i < max_off; i++) {
3972 		u8 new_type, *stype;
3973 		int slot, spi;
3974 
3975 		slot = -i - 1;
3976 		spi = slot / BPF_REG_SIZE;
3977 		stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
3978 		mark_stack_slot_scratched(env, spi);
3979 
3980 		if (!env->allow_ptr_leaks && *stype != STACK_MISC && *stype != STACK_ZERO) {
3981 			/* Reject the write if range we may write to has not
3982 			 * been initialized beforehand. If we didn't reject
3983 			 * here, the ptr status would be erased below (even
3984 			 * though not all slots are actually overwritten),
3985 			 * possibly opening the door to leaks.
3986 			 *
3987 			 * We do however catch STACK_INVALID case below, and
3988 			 * only allow reading possibly uninitialized memory
3989 			 * later for CAP_PERFMON, as the write may not happen to
3990 			 * that slot.
3991 			 */
3992 			verbose(env, "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d",
3993 				insn_idx, i);
3994 			return -EINVAL;
3995 		}
3996 
3997 		/* If writing_zero and the spi slot contains a spill of value 0,
3998 		 * maintain the spill type.
3999 		 */
4000 		if (writing_zero && *stype == STACK_SPILL &&
4001 		    bpf_is_spilled_scalar_reg(&state->stack[spi])) {
4002 			struct bpf_reg_state *spill_reg = &state->stack[spi].spilled_ptr;
4003 
4004 			if (tnum_is_const(spill_reg->var_off) && spill_reg->var_off.value == 0) {
4005 				zero_used = true;
4006 				continue;
4007 			}
4008 		}
4009 
4010 		/*
4011 		 * Scrub slots if variable-offset stack write goes over spilled pointers.
4012 		 * Otherwise bpf_is_spilled_reg() may == true && spilled_ptr.type == NOT_INIT
4013 		 * and valid program is rejected by check_stack_read_fixed_off()
4014 		 * with obscure "invalid size of register fill" message.
4015 		 */
4016 		scrub_special_slot(state, spi);
4017 
4018 		/* Update the slot type. */
4019 		new_type = STACK_MISC;
4020 		if (writing_zero && *stype == STACK_ZERO) {
4021 			new_type = STACK_ZERO;
4022 			zero_used = true;
4023 		}
4024 		/* If the slot is STACK_INVALID, we check whether it's OK to
4025 		 * pretend that it will be initialized by this write. The slot
4026 		 * might not actually be written to, and so if we mark it as
4027 		 * initialized future reads might leak uninitialized memory.
4028 		 * For privileged programs, we will accept such reads to slots
4029 		 * that may or may not be written because, if we're reject
4030 		 * them, the error would be too confusing.
4031 		 * Conservatively, treat STACK_POISON in a similar way.
4032 		 */
4033 		if ((*stype == STACK_INVALID || *stype == STACK_POISON) &&
4034 		    !env->allow_uninit_stack) {
4035 			verbose(env, "uninit stack in range of var-offset write prohibited for !root; insn %d, off: %d",
4036 					insn_idx, i);
4037 			return -EINVAL;
4038 		}
4039 		*stype = new_type;
4040 	}
4041 	if (zero_used) {
4042 		/* backtracking doesn't work for STACK_ZERO yet. */
4043 		err = mark_chain_precision(env, value_regno);
4044 		if (err)
4045 			return err;
4046 	}
4047 	return 0;
4048 }
4049 
4050 /* When register 'dst_regno' is assigned some values from stack[min_off,
4051  * max_off), we set the register's type according to the types of the
4052  * respective stack slots. If all the stack values are known to be zeros, then
4053  * so is the destination reg. Otherwise, the register is considered to be
4054  * SCALAR. This function does not deal with register filling; the caller must
4055  * ensure that all spilled registers in the stack range have been marked as
4056  * read.
4057  */
mark_reg_stack_read(struct bpf_verifier_env * env,struct bpf_func_state * ptr_state,int min_off,int max_off,int dst_regno)4058 static void mark_reg_stack_read(struct bpf_verifier_env *env,
4059 				/* func where src register points to */
4060 				struct bpf_func_state *ptr_state,
4061 				int min_off, int max_off, int dst_regno)
4062 {
4063 	struct bpf_verifier_state *vstate = env->cur_state;
4064 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
4065 	int i, slot, spi;
4066 	u8 *stype;
4067 	int zeros = 0;
4068 
4069 	for (i = min_off; i < max_off; i++) {
4070 		slot = -i - 1;
4071 		spi = slot / BPF_REG_SIZE;
4072 		mark_stack_slot_scratched(env, spi);
4073 		stype = ptr_state->stack[spi].slot_type;
4074 		if (stype[slot % BPF_REG_SIZE] != STACK_ZERO)
4075 			break;
4076 		zeros++;
4077 	}
4078 	if (zeros == max_off - min_off) {
4079 		/* Any access_size read into register is zero extended,
4080 		 * so the whole register == const_zero.
4081 		 */
4082 		__mark_reg_const_zero(env, &state->regs[dst_regno]);
4083 	} else {
4084 		/* have read misc data from the stack */
4085 		mark_reg_unknown(env, state->regs, dst_regno);
4086 	}
4087 }
4088 
4089 /* Read the stack at 'off' and put the results into the register indicated by
4090  * 'dst_regno'. It handles reg filling if the addressed stack slot is a
4091  * spilled reg.
4092  *
4093  * 'dst_regno' can be -1, meaning that the read value is not going to a
4094  * register.
4095  *
4096  * The access is assumed to be within the current stack bounds.
4097  */
check_stack_read_fixed_off(struct bpf_verifier_env * env,struct bpf_func_state * reg_state,int off,int size,int dst_regno)4098 static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
4099 				      /* func where src register points to */
4100 				      struct bpf_func_state *reg_state,
4101 				      int off, int size, int dst_regno)
4102 {
4103 	struct bpf_verifier_state *vstate = env->cur_state;
4104 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
4105 	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
4106 	struct bpf_reg_state *reg;
4107 	u8 *stype, type;
4108 	int insn_flags = insn_stack_access_flags(reg_state->frameno, spi);
4109 
4110 	stype = reg_state->stack[spi].slot_type;
4111 	reg = &reg_state->stack[spi].spilled_ptr;
4112 
4113 	mark_stack_slot_scratched(env, spi);
4114 	check_fastcall_stack_contract(env, state, env->insn_idx, off);
4115 
4116 	if (bpf_is_spilled_reg(&reg_state->stack[spi])) {
4117 		u8 spill_size = 1;
4118 
4119 		for (i = BPF_REG_SIZE - 1; i > 0 && stype[i - 1] == STACK_SPILL; i--)
4120 			spill_size++;
4121 
4122 		if (size != BPF_REG_SIZE || spill_size != BPF_REG_SIZE) {
4123 			if (reg->type != SCALAR_VALUE) {
4124 				verbose_linfo(env, env->insn_idx, "; ");
4125 				verbose(env, "invalid size of register fill\n");
4126 				return -EACCES;
4127 			}
4128 
4129 			if (dst_regno < 0)
4130 				return 0;
4131 
4132 			if (size <= spill_size &&
4133 			    bpf_stack_narrow_access_ok(off, size, spill_size)) {
4134 				/* The earlier check_reg_arg() has decided the
4135 				 * subreg_def for this insn.  Save it first.
4136 				 */
4137 				s32 subreg_def = state->regs[dst_regno].subreg_def;
4138 
4139 				if (env->bpf_capable && size == 4 && spill_size == 4 &&
4140 				    get_reg_width(reg) <= 32)
4141 					/* Ensure stack slot has an ID to build a relation
4142 					 * with the destination register on fill.
4143 					 */
4144 					assign_scalar_id_before_mov(env, reg);
4145 				copy_register_state(&state->regs[dst_regno], reg);
4146 				state->regs[dst_regno].subreg_def = subreg_def;
4147 
4148 				/* Break the relation on a narrowing fill.
4149 				 * coerce_reg_to_size will adjust the boundaries.
4150 				 */
4151 				if (get_reg_width(reg) > size * BITS_PER_BYTE)
4152 					clear_scalar_id(&state->regs[dst_regno]);
4153 			} else {
4154 				int spill_cnt = 0, zero_cnt = 0;
4155 
4156 				for (i = 0; i < size; i++) {
4157 					type = stype[(slot - i) % BPF_REG_SIZE];
4158 					if (type == STACK_SPILL) {
4159 						spill_cnt++;
4160 						continue;
4161 					}
4162 					if (type == STACK_MISC)
4163 						continue;
4164 					if (type == STACK_ZERO) {
4165 						zero_cnt++;
4166 						continue;
4167 					}
4168 					if (type == STACK_INVALID && env->allow_uninit_stack)
4169 						continue;
4170 					if (type == STACK_POISON) {
4171 						verbose(env, "reading from stack off %d+%d size %d, slot poisoned by dead code elimination\n",
4172 							off, i, size);
4173 					} else {
4174 						verbose(env, "invalid read from stack off %d+%d size %d\n",
4175 							off, i, size);
4176 					}
4177 					return -EACCES;
4178 				}
4179 
4180 				if (spill_cnt == size &&
4181 				    tnum_is_const(reg->var_off) && reg->var_off.value == 0) {
4182 					__mark_reg_const_zero(env, &state->regs[dst_regno]);
4183 					/* this IS register fill, so keep insn_flags */
4184 				} else if (zero_cnt == size) {
4185 					/* similarly to mark_reg_stack_read(), preserve zeroes */
4186 					__mark_reg_const_zero(env, &state->regs[dst_regno]);
4187 					insn_flags = 0; /* not restoring original register state */
4188 				} else {
4189 					mark_reg_unknown(env, state->regs, dst_regno);
4190 					insn_flags = 0; /* not restoring original register state */
4191 				}
4192 			}
4193 		} else if (dst_regno >= 0) {
4194 			/* restore register state from stack */
4195 			if (env->bpf_capable)
4196 				/* Ensure stack slot has an ID to build a relation
4197 				 * with the destination register on fill.
4198 				 */
4199 				assign_scalar_id_before_mov(env, reg);
4200 			copy_register_state(&state->regs[dst_regno], reg);
4201 			/* mark reg as written since spilled pointer state likely
4202 			 * has its liveness marks cleared by is_state_visited()
4203 			 * which resets stack/reg liveness for state transitions
4204 			 */
4205 		} else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
4206 			/* If dst_regno==-1, the caller is asking us whether
4207 			 * it is acceptable to use this value as a SCALAR_VALUE
4208 			 * (e.g. for XADD).
4209 			 * We must not allow unprivileged callers to do that
4210 			 * with spilled pointers.
4211 			 */
4212 			verbose(env, "leaking pointer from stack off %d\n",
4213 				off);
4214 			return -EACCES;
4215 		}
4216 	} else {
4217 		for (i = 0; i < size; i++) {
4218 			type = stype[(slot - i) % BPF_REG_SIZE];
4219 			if (type == STACK_MISC)
4220 				continue;
4221 			if (type == STACK_ZERO)
4222 				continue;
4223 			if (type == STACK_INVALID && env->allow_uninit_stack)
4224 				continue;
4225 			if (type == STACK_POISON) {
4226 				verbose(env, "reading from stack off %d+%d size %d, slot poisoned by dead code elimination\n",
4227 					off, i, size);
4228 			} else {
4229 				verbose(env, "invalid read from stack off %d+%d size %d\n",
4230 					off, i, size);
4231 			}
4232 			return -EACCES;
4233 		}
4234 		if (dst_regno >= 0)
4235 			mark_reg_stack_read(env, reg_state, off, off + size, dst_regno);
4236 		insn_flags = 0; /* we are not restoring spilled register */
4237 	}
4238 	if (insn_flags)
4239 		return bpf_push_jmp_history(env, env->cur_state, insn_flags, 0);
4240 	return 0;
4241 }
4242 
4243 enum bpf_access_src {
4244 	ACCESS_DIRECT = 1,  /* the access is performed by an instruction */
4245 	ACCESS_HELPER = 2,  /* the access is performed by a helper */
4246 };
4247 
4248 static int check_stack_range_initialized(struct bpf_verifier_env *env,
4249 					 int regno, int off, int access_size,
4250 					 bool zero_size_allowed,
4251 					 enum bpf_access_type type,
4252 					 struct bpf_call_arg_meta *meta);
4253 
reg_state(struct bpf_verifier_env * env,int regno)4254 static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
4255 {
4256 	return cur_regs(env) + regno;
4257 }
4258 
4259 /* Read the stack at 'ptr_regno + off' and put the result into the register
4260  * 'dst_regno'.
4261  * 'off' includes the pointer register's fixed offset(i.e. 'ptr_regno.off'),
4262  * but not its variable offset.
4263  * 'size' is assumed to be <= reg size and the access is assumed to be aligned.
4264  *
4265  * As opposed to check_stack_read_fixed_off, this function doesn't deal with
4266  * filling registers (i.e. reads of spilled register cannot be detected when
4267  * the offset is not fixed). We conservatively mark 'dst_regno' as containing
4268  * SCALAR_VALUE. That's why we assert that the 'ptr_regno' has a variable
4269  * offset; for a fixed offset check_stack_read_fixed_off should be used
4270  * instead.
4271  */
check_stack_read_var_off(struct bpf_verifier_env * env,int ptr_regno,int off,int size,int dst_regno)4272 static int check_stack_read_var_off(struct bpf_verifier_env *env,
4273 				    int ptr_regno, int off, int size, int dst_regno)
4274 {
4275 	/* The state of the source register. */
4276 	struct bpf_reg_state *reg = reg_state(env, ptr_regno);
4277 	struct bpf_func_state *ptr_state = bpf_func(env, reg);
4278 	int err;
4279 	int min_off, max_off;
4280 
4281 	/* Note that we pass a NULL meta, so raw access will not be permitted.
4282 	 */
4283 	err = check_stack_range_initialized(env, ptr_regno, off, size,
4284 					    false, BPF_READ, NULL);
4285 	if (err)
4286 		return err;
4287 
4288 	min_off = reg->smin_value + off;
4289 	max_off = reg->smax_value + off;
4290 	mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno);
4291 	check_fastcall_stack_contract(env, ptr_state, env->insn_idx, min_off);
4292 	return 0;
4293 }
4294 
4295 /* check_stack_read dispatches to check_stack_read_fixed_off or
4296  * check_stack_read_var_off.
4297  *
4298  * The caller must ensure that the offset falls within the allocated stack
4299  * bounds.
4300  *
4301  * 'dst_regno' is a register which will receive the value from the stack. It
4302  * can be -1, meaning that the read value is not going to a register.
4303  */
check_stack_read(struct bpf_verifier_env * env,int ptr_regno,int off,int size,int dst_regno)4304 static int check_stack_read(struct bpf_verifier_env *env,
4305 			    int ptr_regno, int off, int size,
4306 			    int dst_regno)
4307 {
4308 	struct bpf_reg_state *reg = reg_state(env, ptr_regno);
4309 	struct bpf_func_state *state = bpf_func(env, reg);
4310 	int err;
4311 	/* Some accesses are only permitted with a static offset. */
4312 	bool var_off = !tnum_is_const(reg->var_off);
4313 
4314 	/* The offset is required to be static when reads don't go to a
4315 	 * register, in order to not leak pointers (see
4316 	 * check_stack_read_fixed_off).
4317 	 */
4318 	if (dst_regno < 0 && var_off) {
4319 		char tn_buf[48];
4320 
4321 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4322 		verbose(env, "variable offset stack pointer cannot be passed into helper function; var_off=%s off=%d size=%d\n",
4323 			tn_buf, off, size);
4324 		return -EACCES;
4325 	}
4326 	/* Variable offset is prohibited for unprivileged mode for simplicity
4327 	 * since it requires corresponding support in Spectre masking for stack
4328 	 * ALU. See also retrieve_ptr_limit(). The check in
4329 	 * check_stack_access_for_ptr_arithmetic() called by
4330 	 * adjust_ptr_min_max_vals() prevents users from creating stack pointers
4331 	 * with variable offsets, therefore no check is required here. Further,
4332 	 * just checking it here would be insufficient as speculative stack
4333 	 * writes could still lead to unsafe speculative behaviour.
4334 	 */
4335 	if (!var_off) {
4336 		off += reg->var_off.value;
4337 		err = check_stack_read_fixed_off(env, state, off, size,
4338 						 dst_regno);
4339 	} else {
4340 		/* Variable offset stack reads need more conservative handling
4341 		 * than fixed offset ones. Note that dst_regno >= 0 on this
4342 		 * branch.
4343 		 */
4344 		err = check_stack_read_var_off(env, ptr_regno, off, size,
4345 					       dst_regno);
4346 	}
4347 	return err;
4348 }
4349 
4350 
4351 /* check_stack_write dispatches to check_stack_write_fixed_off or
4352  * check_stack_write_var_off.
4353  *
4354  * 'ptr_regno' is the register used as a pointer into the stack.
4355  * 'value_regno' is the register whose value we're writing to the stack. It can
4356  * be -1, meaning that we're not writing from a register.
4357  *
4358  * The caller must ensure that the offset falls within the maximum stack size.
4359  */
check_stack_write(struct bpf_verifier_env * env,int ptr_regno,int off,int size,int value_regno,int insn_idx)4360 static int check_stack_write(struct bpf_verifier_env *env,
4361 			     int ptr_regno, int off, int size,
4362 			     int value_regno, int insn_idx)
4363 {
4364 	struct bpf_reg_state *reg = reg_state(env, ptr_regno);
4365 	struct bpf_func_state *state = bpf_func(env, reg);
4366 	int err;
4367 
4368 	if (tnum_is_const(reg->var_off)) {
4369 		off += reg->var_off.value;
4370 		err = check_stack_write_fixed_off(env, state, off, size,
4371 						  value_regno, insn_idx);
4372 	} else {
4373 		/* Variable offset stack reads need more conservative handling
4374 		 * than fixed offset ones.
4375 		 */
4376 		err = check_stack_write_var_off(env, state,
4377 						ptr_regno, off, size,
4378 						value_regno, insn_idx);
4379 	}
4380 	return err;
4381 }
4382 
check_map_access_type(struct bpf_verifier_env * env,u32 regno,int off,int size,enum bpf_access_type type)4383 static int check_map_access_type(struct bpf_verifier_env *env, u32 regno,
4384 				 int off, int size, enum bpf_access_type type)
4385 {
4386 	struct bpf_reg_state *reg = reg_state(env, regno);
4387 	struct bpf_map *map = reg->map_ptr;
4388 	u32 cap = bpf_map_flags_to_cap(map);
4389 
4390 	if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
4391 		verbose(env, "write into map forbidden, value_size=%d off=%lld size=%d\n",
4392 			map->value_size, reg->smin_value + off, size);
4393 		return -EACCES;
4394 	}
4395 
4396 	if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
4397 		verbose(env, "read from map forbidden, value_size=%d off=%lld size=%d\n",
4398 			map->value_size, reg->smin_value + off, size);
4399 		return -EACCES;
4400 	}
4401 
4402 	return 0;
4403 }
4404 
4405 /* check read/write into memory region (e.g., map value, ringbuf sample, etc) */
__check_mem_access(struct bpf_verifier_env * env,int regno,int off,int size,u32 mem_size,bool zero_size_allowed)4406 static int __check_mem_access(struct bpf_verifier_env *env, int regno,
4407 			      int off, int size, u32 mem_size,
4408 			      bool zero_size_allowed)
4409 {
4410 	bool size_ok = size > 0 || (size == 0 && zero_size_allowed);
4411 	struct bpf_reg_state *reg;
4412 
4413 	if (off >= 0 && size_ok && (u64)off + size <= mem_size)
4414 		return 0;
4415 
4416 	reg = &cur_regs(env)[regno];
4417 	switch (reg->type) {
4418 	case PTR_TO_MAP_KEY:
4419 		verbose(env, "invalid access to map key, key_size=%d off=%d size=%d\n",
4420 			mem_size, off, size);
4421 		break;
4422 	case PTR_TO_MAP_VALUE:
4423 		verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
4424 			mem_size, off, size);
4425 		break;
4426 	case PTR_TO_PACKET:
4427 	case PTR_TO_PACKET_META:
4428 	case PTR_TO_PACKET_END:
4429 		verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
4430 			off, size, regno, reg->id, off, mem_size);
4431 		break;
4432 	case PTR_TO_CTX:
4433 		verbose(env, "invalid access to context, ctx_size=%d off=%d size=%d\n",
4434 			mem_size, off, size);
4435 		break;
4436 	case PTR_TO_MEM:
4437 	default:
4438 		verbose(env, "invalid access to memory, mem_size=%u off=%d size=%d\n",
4439 			mem_size, off, size);
4440 	}
4441 
4442 	return -EACCES;
4443 }
4444 
4445 /* check read/write into a memory region with possible variable offset */
check_mem_region_access(struct bpf_verifier_env * env,u32 regno,int off,int size,u32 mem_size,bool zero_size_allowed)4446 static int check_mem_region_access(struct bpf_verifier_env *env, u32 regno,
4447 				   int off, int size, u32 mem_size,
4448 				   bool zero_size_allowed)
4449 {
4450 	struct bpf_verifier_state *vstate = env->cur_state;
4451 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
4452 	struct bpf_reg_state *reg = &state->regs[regno];
4453 	int err;
4454 
4455 	/* We may have adjusted the register pointing to memory region, so we
4456 	 * need to try adding each of min_value and max_value to off
4457 	 * to make sure our theoretical access will be safe.
4458 	 *
4459 	 * The minimum value is only important with signed
4460 	 * comparisons where we can't assume the floor of a
4461 	 * value is 0.  If we are using signed variables for our
4462 	 * index'es we need to make sure that whatever we use
4463 	 * will have a set floor within our range.
4464 	 */
4465 	if (reg->smin_value < 0 &&
4466 	    (reg->smin_value == S64_MIN ||
4467 	     (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) ||
4468 	      reg->smin_value + off < 0)) {
4469 		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
4470 			regno);
4471 		return -EACCES;
4472 	}
4473 	err = __check_mem_access(env, regno, reg->smin_value + off, size,
4474 				 mem_size, zero_size_allowed);
4475 	if (err) {
4476 		verbose(env, "R%d min value is outside of the allowed memory range\n",
4477 			regno);
4478 		return err;
4479 	}
4480 
4481 	/* If we haven't set a max value then we need to bail since we can't be
4482 	 * sure we won't do bad things.
4483 	 * If reg->umax_value + off could overflow, treat that as unbounded too.
4484 	 */
4485 	if (reg->umax_value >= BPF_MAX_VAR_OFF) {
4486 		verbose(env, "R%d unbounded memory access, make sure to bounds check any such access\n",
4487 			regno);
4488 		return -EACCES;
4489 	}
4490 	err = __check_mem_access(env, regno, reg->umax_value + off, size,
4491 				 mem_size, zero_size_allowed);
4492 	if (err) {
4493 		verbose(env, "R%d max value is outside of the allowed memory range\n",
4494 			regno);
4495 		return err;
4496 	}
4497 
4498 	return 0;
4499 }
4500 
__check_ptr_off_reg(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,int regno,bool fixed_off_ok)4501 static int __check_ptr_off_reg(struct bpf_verifier_env *env,
4502 			       const struct bpf_reg_state *reg, int regno,
4503 			       bool fixed_off_ok)
4504 {
4505 	/* Access to this pointer-typed register or passing it to a helper
4506 	 * is only allowed in its original, unmodified form.
4507 	 */
4508 
4509 	if (!tnum_is_const(reg->var_off)) {
4510 		char tn_buf[48];
4511 
4512 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4513 		verbose(env, "variable %s access var_off=%s disallowed\n",
4514 			reg_type_str(env, reg->type), tn_buf);
4515 		return -EACCES;
4516 	}
4517 
4518 	if (reg->smin_value < 0) {
4519 		verbose(env, "negative offset %s ptr R%d off=%lld disallowed\n",
4520 			reg_type_str(env, reg->type), regno, reg->var_off.value);
4521 		return -EACCES;
4522 	}
4523 
4524 	if (!fixed_off_ok && reg->var_off.value != 0) {
4525 		verbose(env, "dereference of modified %s ptr R%d off=%lld disallowed\n",
4526 			reg_type_str(env, reg->type), regno, reg->var_off.value);
4527 		return -EACCES;
4528 	}
4529 
4530 	return 0;
4531 }
4532 
check_ptr_off_reg(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,int regno)4533 static int check_ptr_off_reg(struct bpf_verifier_env *env,
4534 		             const struct bpf_reg_state *reg, int regno)
4535 {
4536 	return __check_ptr_off_reg(env, reg, regno, false);
4537 }
4538 
map_kptr_match_type(struct bpf_verifier_env * env,struct btf_field * kptr_field,struct bpf_reg_state * reg,u32 regno)4539 static int map_kptr_match_type(struct bpf_verifier_env *env,
4540 			       struct btf_field *kptr_field,
4541 			       struct bpf_reg_state *reg, u32 regno)
4542 {
4543 	const char *targ_name = btf_type_name(kptr_field->kptr.btf, kptr_field->kptr.btf_id);
4544 	int perm_flags;
4545 	const char *reg_name = "";
4546 
4547 	if (btf_is_kernel(reg->btf)) {
4548 		perm_flags = PTR_MAYBE_NULL | PTR_TRUSTED | MEM_RCU;
4549 
4550 		/* Only unreferenced case accepts untrusted pointers */
4551 		if (kptr_field->type == BPF_KPTR_UNREF)
4552 			perm_flags |= PTR_UNTRUSTED;
4553 	} else {
4554 		perm_flags = PTR_MAYBE_NULL | MEM_ALLOC;
4555 		if (kptr_field->type == BPF_KPTR_PERCPU)
4556 			perm_flags |= MEM_PERCPU;
4557 	}
4558 
4559 	if (base_type(reg->type) != PTR_TO_BTF_ID || (type_flag(reg->type) & ~perm_flags))
4560 		goto bad_type;
4561 
4562 	/* We need to verify reg->type and reg->btf, before accessing reg->btf */
4563 	reg_name = btf_type_name(reg->btf, reg->btf_id);
4564 
4565 	/* For ref_ptr case, release function check should ensure we get one
4566 	 * referenced PTR_TO_BTF_ID, and that its fixed offset is 0. For the
4567 	 * normal store of unreferenced kptr, we must ensure var_off is zero.
4568 	 * Since ref_ptr cannot be accessed directly by BPF insns, check for
4569 	 * reg->ref_obj_id is not needed here.
4570 	 */
4571 	if (__check_ptr_off_reg(env, reg, regno, true))
4572 		return -EACCES;
4573 
4574 	/* A full type match is needed, as BTF can be vmlinux, module or prog BTF, and
4575 	 * we also need to take into account the reg->var_off.
4576 	 *
4577 	 * We want to support cases like:
4578 	 *
4579 	 * struct foo {
4580 	 *         struct bar br;
4581 	 *         struct baz bz;
4582 	 * };
4583 	 *
4584 	 * struct foo *v;
4585 	 * v = func();	      // PTR_TO_BTF_ID
4586 	 * val->foo = v;      // reg->var_off is zero, btf and btf_id match type
4587 	 * val->bar = &v->br; // reg->var_off is still zero, but we need to retry with
4588 	 *                    // first member type of struct after comparison fails
4589 	 * val->baz = &v->bz; // reg->var_off is non-zero, so struct needs to be walked
4590 	 *                    // to match type
4591 	 *
4592 	 * In the kptr_ref case, check_func_arg_reg_off already ensures reg->var_off
4593 	 * is zero. We must also ensure that btf_struct_ids_match does not walk
4594 	 * the struct to match type against first member of struct, i.e. reject
4595 	 * second case from above. Hence, when type is BPF_KPTR_REF, we set
4596 	 * strict mode to true for type match.
4597 	 */
4598 	if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->var_off.value,
4599 				  kptr_field->kptr.btf, kptr_field->kptr.btf_id,
4600 				  kptr_field->type != BPF_KPTR_UNREF))
4601 		goto bad_type;
4602 	return 0;
4603 bad_type:
4604 	verbose(env, "invalid kptr access, R%d type=%s%s ", regno,
4605 		reg_type_str(env, reg->type), reg_name);
4606 	verbose(env, "expected=%s%s", reg_type_str(env, PTR_TO_BTF_ID), targ_name);
4607 	if (kptr_field->type == BPF_KPTR_UNREF)
4608 		verbose(env, " or %s%s\n", reg_type_str(env, PTR_TO_BTF_ID | PTR_UNTRUSTED),
4609 			targ_name);
4610 	else
4611 		verbose(env, "\n");
4612 	return -EINVAL;
4613 }
4614 
in_sleepable(struct bpf_verifier_env * env)4615 static bool in_sleepable(struct bpf_verifier_env *env)
4616 {
4617 	return env->cur_state->in_sleepable;
4618 }
4619 
4620 /* The non-sleepable programs and sleepable programs with explicit bpf_rcu_read_lock()
4621  * can dereference RCU protected pointers and result is PTR_TRUSTED.
4622  */
in_rcu_cs(struct bpf_verifier_env * env)4623 static bool in_rcu_cs(struct bpf_verifier_env *env)
4624 {
4625 	return env->cur_state->active_rcu_locks ||
4626 	       env->cur_state->active_locks ||
4627 	       !in_sleepable(env);
4628 }
4629 
4630 /* Once GCC supports btf_type_tag the following mechanism will be replaced with tag check */
4631 BTF_SET_START(rcu_protected_types)
4632 #ifdef CONFIG_NET
BTF_ID(struct,prog_test_ref_kfunc)4633 BTF_ID(struct, prog_test_ref_kfunc)
4634 #endif
4635 #ifdef CONFIG_CGROUPS
4636 BTF_ID(struct, cgroup)
4637 #endif
4638 #ifdef CONFIG_BPF_JIT
4639 BTF_ID(struct, bpf_cpumask)
4640 #endif
4641 BTF_ID(struct, task_struct)
4642 #ifdef CONFIG_CRYPTO
4643 BTF_ID(struct, bpf_crypto_ctx)
4644 #endif
4645 BTF_SET_END(rcu_protected_types)
4646 
4647 static bool rcu_protected_object(const struct btf *btf, u32 btf_id)
4648 {
4649 	if (!btf_is_kernel(btf))
4650 		return true;
4651 	return btf_id_set_contains(&rcu_protected_types, btf_id);
4652 }
4653 
kptr_pointee_btf_record(struct btf_field * kptr_field)4654 static struct btf_record *kptr_pointee_btf_record(struct btf_field *kptr_field)
4655 {
4656 	struct btf_struct_meta *meta;
4657 
4658 	if (btf_is_kernel(kptr_field->kptr.btf))
4659 		return NULL;
4660 
4661 	meta = btf_find_struct_meta(kptr_field->kptr.btf,
4662 				    kptr_field->kptr.btf_id);
4663 
4664 	return meta ? meta->record : NULL;
4665 }
4666 
rcu_safe_kptr(const struct btf_field * field)4667 static bool rcu_safe_kptr(const struct btf_field *field)
4668 {
4669 	const struct btf_field_kptr *kptr = &field->kptr;
4670 
4671 	return field->type == BPF_KPTR_PERCPU ||
4672 	       (field->type == BPF_KPTR_REF && rcu_protected_object(kptr->btf, kptr->btf_id));
4673 }
4674 
btf_ld_kptr_type(struct bpf_verifier_env * env,struct btf_field * kptr_field)4675 static u32 btf_ld_kptr_type(struct bpf_verifier_env *env, struct btf_field *kptr_field)
4676 {
4677 	struct btf_record *rec;
4678 	u32 ret;
4679 
4680 	ret = PTR_MAYBE_NULL;
4681 	if (rcu_safe_kptr(kptr_field) && in_rcu_cs(env)) {
4682 		ret |= MEM_RCU;
4683 		if (kptr_field->type == BPF_KPTR_PERCPU)
4684 			ret |= MEM_PERCPU;
4685 		else if (!btf_is_kernel(kptr_field->kptr.btf))
4686 			ret |= MEM_ALLOC;
4687 
4688 		rec = kptr_pointee_btf_record(kptr_field);
4689 		if (rec && btf_record_has_field(rec, BPF_GRAPH_NODE))
4690 			ret |= NON_OWN_REF;
4691 	} else {
4692 		ret |= PTR_UNTRUSTED;
4693 	}
4694 
4695 	return ret;
4696 }
4697 
mark_uptr_ld_reg(struct bpf_verifier_env * env,u32 regno,struct btf_field * field)4698 static int mark_uptr_ld_reg(struct bpf_verifier_env *env, u32 regno,
4699 			    struct btf_field *field)
4700 {
4701 	struct bpf_reg_state *reg;
4702 	const struct btf_type *t;
4703 
4704 	t = btf_type_by_id(field->kptr.btf, field->kptr.btf_id);
4705 	mark_reg_known_zero(env, cur_regs(env), regno);
4706 	reg = reg_state(env, regno);
4707 	reg->type = PTR_TO_MEM | PTR_MAYBE_NULL;
4708 	reg->mem_size = t->size;
4709 	reg->id = ++env->id_gen;
4710 
4711 	return 0;
4712 }
4713 
check_map_kptr_access(struct bpf_verifier_env * env,u32 regno,int value_regno,int insn_idx,struct btf_field * kptr_field)4714 static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
4715 				 int value_regno, int insn_idx,
4716 				 struct btf_field *kptr_field)
4717 {
4718 	struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
4719 	int class = BPF_CLASS(insn->code);
4720 	struct bpf_reg_state *val_reg;
4721 	int ret;
4722 
4723 	/* Things we already checked for in check_map_access and caller:
4724 	 *  - Reject cases where variable offset may touch kptr
4725 	 *  - size of access (must be BPF_DW)
4726 	 *  - tnum_is_const(reg->var_off)
4727 	 *  - kptr_field->offset == off + reg->var_off.value
4728 	 */
4729 	/* Only BPF_[LDX,STX,ST] | BPF_MEM | BPF_DW is supported */
4730 	if (BPF_MODE(insn->code) != BPF_MEM) {
4731 		verbose(env, "kptr in map can only be accessed using BPF_MEM instruction mode\n");
4732 		return -EACCES;
4733 	}
4734 
4735 	/* We only allow loading referenced kptr, since it will be marked as
4736 	 * untrusted, similar to unreferenced kptr.
4737 	 */
4738 	if (class != BPF_LDX &&
4739 	    (kptr_field->type == BPF_KPTR_REF || kptr_field->type == BPF_KPTR_PERCPU)) {
4740 		verbose(env, "store to referenced kptr disallowed\n");
4741 		return -EACCES;
4742 	}
4743 	if (class != BPF_LDX && kptr_field->type == BPF_UPTR) {
4744 		verbose(env, "store to uptr disallowed\n");
4745 		return -EACCES;
4746 	}
4747 
4748 	if (class == BPF_LDX) {
4749 		if (kptr_field->type == BPF_UPTR)
4750 			return mark_uptr_ld_reg(env, value_regno, kptr_field);
4751 
4752 		/* We can simply mark the value_regno receiving the pointer
4753 		 * value from map as PTR_TO_BTF_ID, with the correct type.
4754 		 */
4755 		ret = mark_btf_ld_reg(env, cur_regs(env), value_regno, PTR_TO_BTF_ID,
4756 				      kptr_field->kptr.btf, kptr_field->kptr.btf_id,
4757 				      btf_ld_kptr_type(env, kptr_field));
4758 		if (ret < 0)
4759 			return ret;
4760 	} else if (class == BPF_STX) {
4761 		val_reg = reg_state(env, value_regno);
4762 		if (!bpf_register_is_null(val_reg) &&
4763 		    map_kptr_match_type(env, kptr_field, val_reg, value_regno))
4764 			return -EACCES;
4765 	} else if (class == BPF_ST) {
4766 		if (insn->imm) {
4767 			verbose(env, "BPF_ST imm must be 0 when storing to kptr at off=%u\n",
4768 				kptr_field->offset);
4769 			return -EACCES;
4770 		}
4771 	} else {
4772 		verbose(env, "kptr in map can only be accessed using BPF_LDX/BPF_STX/BPF_ST\n");
4773 		return -EACCES;
4774 	}
4775 	return 0;
4776 }
4777 
4778 /*
4779  * Return the size of the memory region accessible from a pointer to map value.
4780  * For INSN_ARRAY maps whole bpf_insn_array->ips array is accessible.
4781  */
map_mem_size(const struct bpf_map * map)4782 static u32 map_mem_size(const struct bpf_map *map)
4783 {
4784 	if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY)
4785 		return map->max_entries * sizeof(long);
4786 
4787 	return map->value_size;
4788 }
4789 
4790 /* check read/write into a map element with possible variable offset */
check_map_access(struct bpf_verifier_env * env,u32 regno,int off,int size,bool zero_size_allowed,enum bpf_access_src src)4791 static int check_map_access(struct bpf_verifier_env *env, u32 regno,
4792 			    int off, int size, bool zero_size_allowed,
4793 			    enum bpf_access_src src)
4794 {
4795 	struct bpf_verifier_state *vstate = env->cur_state;
4796 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
4797 	struct bpf_reg_state *reg = &state->regs[regno];
4798 	struct bpf_map *map = reg->map_ptr;
4799 	u32 mem_size = map_mem_size(map);
4800 	struct btf_record *rec;
4801 	int err, i;
4802 
4803 	err = check_mem_region_access(env, regno, off, size, mem_size, zero_size_allowed);
4804 	if (err)
4805 		return err;
4806 
4807 	if (IS_ERR_OR_NULL(map->record))
4808 		return 0;
4809 	rec = map->record;
4810 	for (i = 0; i < rec->cnt; i++) {
4811 		struct btf_field *field = &rec->fields[i];
4812 		u32 p = field->offset;
4813 
4814 		/* If any part of a field  can be touched by load/store, reject
4815 		 * this program. To check that [x1, x2) overlaps with [y1, y2),
4816 		 * it is sufficient to check x1 < y2 && y1 < x2.
4817 		 */
4818 		if (reg->smin_value + off < p + field->size &&
4819 		    p < reg->umax_value + off + size) {
4820 			switch (field->type) {
4821 			case BPF_KPTR_UNREF:
4822 			case BPF_KPTR_REF:
4823 			case BPF_KPTR_PERCPU:
4824 			case BPF_UPTR:
4825 				if (src != ACCESS_DIRECT) {
4826 					verbose(env, "%s cannot be accessed indirectly by helper\n",
4827 						btf_field_type_name(field->type));
4828 					return -EACCES;
4829 				}
4830 				if (!tnum_is_const(reg->var_off)) {
4831 					verbose(env, "%s access cannot have variable offset\n",
4832 						btf_field_type_name(field->type));
4833 					return -EACCES;
4834 				}
4835 				if (p != off + reg->var_off.value) {
4836 					verbose(env, "%s access misaligned expected=%u off=%llu\n",
4837 						btf_field_type_name(field->type),
4838 						p, off + reg->var_off.value);
4839 					return -EACCES;
4840 				}
4841 				if (size != bpf_size_to_bytes(BPF_DW)) {
4842 					verbose(env, "%s access size must be BPF_DW\n",
4843 						btf_field_type_name(field->type));
4844 					return -EACCES;
4845 				}
4846 				break;
4847 			default:
4848 				verbose(env, "%s cannot be accessed directly by load/store\n",
4849 					btf_field_type_name(field->type));
4850 				return -EACCES;
4851 			}
4852 		}
4853 	}
4854 	return 0;
4855 }
4856 
may_access_direct_pkt_data(struct bpf_verifier_env * env,const struct bpf_call_arg_meta * meta,enum bpf_access_type t)4857 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
4858 			       const struct bpf_call_arg_meta *meta,
4859 			       enum bpf_access_type t)
4860 {
4861 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
4862 
4863 	switch (prog_type) {
4864 	/* Program types only with direct read access go here! */
4865 	case BPF_PROG_TYPE_LWT_IN:
4866 	case BPF_PROG_TYPE_LWT_OUT:
4867 	case BPF_PROG_TYPE_LWT_SEG6LOCAL:
4868 	case BPF_PROG_TYPE_SK_REUSEPORT:
4869 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
4870 	case BPF_PROG_TYPE_CGROUP_SKB:
4871 		if (t == BPF_WRITE)
4872 			return false;
4873 		fallthrough;
4874 
4875 	/* Program types with direct read + write access go here! */
4876 	case BPF_PROG_TYPE_SCHED_CLS:
4877 	case BPF_PROG_TYPE_SCHED_ACT:
4878 	case BPF_PROG_TYPE_XDP:
4879 	case BPF_PROG_TYPE_LWT_XMIT:
4880 	case BPF_PROG_TYPE_SK_SKB:
4881 	case BPF_PROG_TYPE_SK_MSG:
4882 		if (meta)
4883 			return meta->pkt_access;
4884 
4885 		env->seen_direct_write = true;
4886 		return true;
4887 
4888 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
4889 		if (t == BPF_WRITE)
4890 			env->seen_direct_write = true;
4891 
4892 		return true;
4893 
4894 	default:
4895 		return false;
4896 	}
4897 }
4898 
check_packet_access(struct bpf_verifier_env * env,u32 regno,int off,int size,bool zero_size_allowed)4899 static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
4900 			       int size, bool zero_size_allowed)
4901 {
4902 	struct bpf_reg_state *reg = reg_state(env, regno);
4903 	int err;
4904 
4905 	if (reg->range < 0) {
4906 		verbose(env, "R%d offset is outside of the packet\n", regno);
4907 		return -EINVAL;
4908 	}
4909 
4910 	err = check_mem_region_access(env, regno, off, size, reg->range, zero_size_allowed);
4911 	if (err)
4912 		return err;
4913 
4914 	/* __check_mem_access has made sure "off + size - 1" is within u16.
4915 	 * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
4916 	 * otherwise find_good_pkt_pointers would have refused to set range info
4917 	 * that __check_mem_access would have rejected this pkt access.
4918 	 * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
4919 	 */
4920 	env->prog->aux->max_pkt_offset =
4921 		max_t(u32, env->prog->aux->max_pkt_offset,
4922 		      off + reg->umax_value + size - 1);
4923 
4924 	return 0;
4925 }
4926 
is_var_ctx_off_allowed(struct bpf_prog * prog)4927 static bool is_var_ctx_off_allowed(struct bpf_prog *prog)
4928 {
4929 	return resolve_prog_type(prog) == BPF_PROG_TYPE_SYSCALL;
4930 }
4931 
4932 /* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
__check_ctx_access(struct bpf_verifier_env * env,int insn_idx,int off,int size,enum bpf_access_type t,struct bpf_insn_access_aux * info)4933 static int __check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
4934 			      enum bpf_access_type t, struct bpf_insn_access_aux *info)
4935 {
4936 	if (env->ops->is_valid_access &&
4937 	    env->ops->is_valid_access(off, size, t, env->prog, info)) {
4938 		/* A non zero info.ctx_field_size indicates that this field is a
4939 		 * candidate for later verifier transformation to load the whole
4940 		 * field and then apply a mask when accessed with a narrower
4941 		 * access than actual ctx access size. A zero info.ctx_field_size
4942 		 * will only allow for whole field access and rejects any other
4943 		 * type of narrower access.
4944 		 */
4945 		if (base_type(info->reg_type) == PTR_TO_BTF_ID) {
4946 			if (info->ref_obj_id &&
4947 			    !find_reference_state(env->cur_state, info->ref_obj_id)) {
4948 				verbose(env, "invalid bpf_context access off=%d. Reference may already be released\n",
4949 					off);
4950 				return -EACCES;
4951 			}
4952 		} else {
4953 			env->insn_aux_data[insn_idx].ctx_field_size = info->ctx_field_size;
4954 		}
4955 		/* remember the offset of last byte accessed in ctx */
4956 		if (env->prog->aux->max_ctx_offset < off + size)
4957 			env->prog->aux->max_ctx_offset = off + size;
4958 		return 0;
4959 	}
4960 
4961 	verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
4962 	return -EACCES;
4963 }
4964 
check_ctx_access(struct bpf_verifier_env * env,int insn_idx,u32 regno,int off,int access_size,enum bpf_access_type t,struct bpf_insn_access_aux * info)4965 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
4966 			    int off, int access_size, enum bpf_access_type t,
4967 			    struct bpf_insn_access_aux *info)
4968 {
4969 	/*
4970 	 * Program types that don't rewrite ctx accesses can safely
4971 	 * dereference ctx pointers with fixed offsets.
4972 	 */
4973 	bool var_off_ok = is_var_ctx_off_allowed(env->prog);
4974 	bool fixed_off_ok = !env->ops->convert_ctx_access;
4975 	struct bpf_reg_state *regs = cur_regs(env);
4976 	struct bpf_reg_state *reg = regs + regno;
4977 	int err;
4978 
4979 	if (var_off_ok)
4980 		err = check_mem_region_access(env, regno, off, access_size, U16_MAX, false);
4981 	else
4982 		err = __check_ptr_off_reg(env, reg, regno, fixed_off_ok);
4983 	if (err)
4984 		return err;
4985 	off += reg->umax_value;
4986 
4987 	err = __check_ctx_access(env, insn_idx, off, access_size, t, info);
4988 	if (err)
4989 		verbose_linfo(env, insn_idx, "; ");
4990 	return err;
4991 }
4992 
check_flow_keys_access(struct bpf_verifier_env * env,int off,int size)4993 static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
4994 				  int size)
4995 {
4996 	if (size < 0 || off < 0 ||
4997 	    (u64)off + size > sizeof(struct bpf_flow_keys)) {
4998 		verbose(env, "invalid access to flow keys off=%d size=%d\n",
4999 			off, size);
5000 		return -EACCES;
5001 	}
5002 	return 0;
5003 }
5004 
check_sock_access(struct bpf_verifier_env * env,int insn_idx,u32 regno,int off,int size,enum bpf_access_type t)5005 static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
5006 			     u32 regno, int off, int size,
5007 			     enum bpf_access_type t)
5008 {
5009 	struct bpf_reg_state *reg = reg_state(env, regno);
5010 	struct bpf_insn_access_aux info = {};
5011 	bool valid;
5012 
5013 	if (reg->smin_value < 0) {
5014 		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
5015 			regno);
5016 		return -EACCES;
5017 	}
5018 
5019 	switch (reg->type) {
5020 	case PTR_TO_SOCK_COMMON:
5021 		valid = bpf_sock_common_is_valid_access(off, size, t, &info);
5022 		break;
5023 	case PTR_TO_SOCKET:
5024 		valid = bpf_sock_is_valid_access(off, size, t, &info);
5025 		break;
5026 	case PTR_TO_TCP_SOCK:
5027 		valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
5028 		break;
5029 	case PTR_TO_XDP_SOCK:
5030 		valid = bpf_xdp_sock_is_valid_access(off, size, t, &info);
5031 		break;
5032 	default:
5033 		valid = false;
5034 	}
5035 
5036 
5037 	if (valid) {
5038 		env->insn_aux_data[insn_idx].ctx_field_size =
5039 			info.ctx_field_size;
5040 		return 0;
5041 	}
5042 
5043 	verbose(env, "R%d invalid %s access off=%d size=%d\n",
5044 		regno, reg_type_str(env, reg->type), off, size);
5045 
5046 	return -EACCES;
5047 }
5048 
is_pointer_value(struct bpf_verifier_env * env,int regno)5049 static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
5050 {
5051 	return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
5052 }
5053 
is_ctx_reg(struct bpf_verifier_env * env,int regno)5054 static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
5055 {
5056 	const struct bpf_reg_state *reg = reg_state(env, regno);
5057 
5058 	return reg->type == PTR_TO_CTX;
5059 }
5060 
is_sk_reg(struct bpf_verifier_env * env,int regno)5061 static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
5062 {
5063 	const struct bpf_reg_state *reg = reg_state(env, regno);
5064 
5065 	return type_is_sk_pointer(reg->type);
5066 }
5067 
is_pkt_reg(struct bpf_verifier_env * env,int regno)5068 static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
5069 {
5070 	const struct bpf_reg_state *reg = reg_state(env, regno);
5071 
5072 	return type_is_pkt_pointer(reg->type);
5073 }
5074 
is_flow_key_reg(struct bpf_verifier_env * env,int regno)5075 static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
5076 {
5077 	const struct bpf_reg_state *reg = reg_state(env, regno);
5078 
5079 	/* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
5080 	return reg->type == PTR_TO_FLOW_KEYS;
5081 }
5082 
is_arena_reg(struct bpf_verifier_env * env,int regno)5083 static bool is_arena_reg(struct bpf_verifier_env *env, int regno)
5084 {
5085 	const struct bpf_reg_state *reg = reg_state(env, regno);
5086 
5087 	return reg->type == PTR_TO_ARENA;
5088 }
5089 
5090 /* Return false if @regno contains a pointer whose type isn't supported for
5091  * atomic instruction @insn.
5092  */
atomic_ptr_type_ok(struct bpf_verifier_env * env,int regno,struct bpf_insn * insn)5093 static bool atomic_ptr_type_ok(struct bpf_verifier_env *env, int regno,
5094 			       struct bpf_insn *insn)
5095 {
5096 	if (is_ctx_reg(env, regno))
5097 		return false;
5098 	if (is_pkt_reg(env, regno))
5099 		return false;
5100 	if (is_flow_key_reg(env, regno))
5101 		return false;
5102 	if (is_sk_reg(env, regno))
5103 		return false;
5104 	if (is_arena_reg(env, regno))
5105 		return bpf_jit_supports_insn(insn, true);
5106 
5107 	return true;
5108 }
5109 
5110 static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = {
5111 #ifdef CONFIG_NET
5112 	[PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
5113 	[PTR_TO_SOCK_COMMON] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
5114 	[PTR_TO_TCP_SOCK] = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
5115 #endif
5116 	[CONST_PTR_TO_MAP] = btf_bpf_map_id,
5117 };
5118 
is_trusted_reg(const struct bpf_reg_state * reg)5119 static bool is_trusted_reg(const struct bpf_reg_state *reg)
5120 {
5121 	/* A referenced register is always trusted. */
5122 	if (reg->ref_obj_id)
5123 		return true;
5124 
5125 	/* Types listed in the reg2btf_ids are always trusted */
5126 	if (reg2btf_ids[base_type(reg->type)] &&
5127 	    !bpf_type_has_unsafe_modifiers(reg->type))
5128 		return true;
5129 
5130 	/* If a register is not referenced, it is trusted if it has the
5131 	 * MEM_ALLOC or PTR_TRUSTED type modifiers, and no others. Some of the
5132 	 * other type modifiers may be safe, but we elect to take an opt-in
5133 	 * approach here as some (e.g. PTR_UNTRUSTED and PTR_MAYBE_NULL) are
5134 	 * not.
5135 	 *
5136 	 * Eventually, we should make PTR_TRUSTED the single source of truth
5137 	 * for whether a register is trusted.
5138 	 */
5139 	return type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS &&
5140 	       !bpf_type_has_unsafe_modifiers(reg->type);
5141 }
5142 
is_rcu_reg(const struct bpf_reg_state * reg)5143 static bool is_rcu_reg(const struct bpf_reg_state *reg)
5144 {
5145 	return reg->type & MEM_RCU;
5146 }
5147 
clear_trusted_flags(enum bpf_type_flag * flag)5148 static void clear_trusted_flags(enum bpf_type_flag *flag)
5149 {
5150 	*flag &= ~(BPF_REG_TRUSTED_MODIFIERS | MEM_RCU);
5151 }
5152 
check_pkt_ptr_alignment(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,int off,int size,bool strict)5153 static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
5154 				   const struct bpf_reg_state *reg,
5155 				   int off, int size, bool strict)
5156 {
5157 	struct tnum reg_off;
5158 	int ip_align;
5159 
5160 	/* Byte size accesses are always allowed. */
5161 	if (!strict || size == 1)
5162 		return 0;
5163 
5164 	/* For platforms that do not have a Kconfig enabling
5165 	 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
5166 	 * NET_IP_ALIGN is universally set to '2'.  And on platforms
5167 	 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
5168 	 * to this code only in strict mode where we want to emulate
5169 	 * the NET_IP_ALIGN==2 checking.  Therefore use an
5170 	 * unconditional IP align value of '2'.
5171 	 */
5172 	ip_align = 2;
5173 
5174 	reg_off = tnum_add(reg->var_off, tnum_const(ip_align + off));
5175 	if (!tnum_is_aligned(reg_off, size)) {
5176 		char tn_buf[48];
5177 
5178 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5179 		verbose(env,
5180 			"misaligned packet access off %d+%s+%d size %d\n",
5181 			ip_align, tn_buf, off, size);
5182 		return -EACCES;
5183 	}
5184 
5185 	return 0;
5186 }
5187 
check_generic_ptr_alignment(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,const char * pointer_desc,int off,int size,bool strict)5188 static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
5189 				       const struct bpf_reg_state *reg,
5190 				       const char *pointer_desc,
5191 				       int off, int size, bool strict)
5192 {
5193 	struct tnum reg_off;
5194 
5195 	/* Byte size accesses are always allowed. */
5196 	if (!strict || size == 1)
5197 		return 0;
5198 
5199 	reg_off = tnum_add(reg->var_off, tnum_const(off));
5200 	if (!tnum_is_aligned(reg_off, size)) {
5201 		char tn_buf[48];
5202 
5203 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5204 		verbose(env, "misaligned %saccess off %s+%d size %d\n",
5205 			pointer_desc, tn_buf, off, size);
5206 		return -EACCES;
5207 	}
5208 
5209 	return 0;
5210 }
5211 
check_ptr_alignment(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,int off,int size,bool strict_alignment_once)5212 static int check_ptr_alignment(struct bpf_verifier_env *env,
5213 			       const struct bpf_reg_state *reg, int off,
5214 			       int size, bool strict_alignment_once)
5215 {
5216 	bool strict = env->strict_alignment || strict_alignment_once;
5217 	const char *pointer_desc = "";
5218 
5219 	switch (reg->type) {
5220 	case PTR_TO_PACKET:
5221 	case PTR_TO_PACKET_META:
5222 		/* Special case, because of NET_IP_ALIGN. Given metadata sits
5223 		 * right in front, treat it the very same way.
5224 		 */
5225 		return check_pkt_ptr_alignment(env, reg, off, size, strict);
5226 	case PTR_TO_FLOW_KEYS:
5227 		pointer_desc = "flow keys ";
5228 		break;
5229 	case PTR_TO_MAP_KEY:
5230 		pointer_desc = "key ";
5231 		break;
5232 	case PTR_TO_MAP_VALUE:
5233 		pointer_desc = "value ";
5234 		if (reg->map_ptr->map_type == BPF_MAP_TYPE_INSN_ARRAY)
5235 			strict = true;
5236 		break;
5237 	case PTR_TO_CTX:
5238 		pointer_desc = "context ";
5239 		break;
5240 	case PTR_TO_STACK:
5241 		pointer_desc = "stack ";
5242 		/* The stack spill tracking logic in check_stack_write_fixed_off()
5243 		 * and check_stack_read_fixed_off() relies on stack accesses being
5244 		 * aligned.
5245 		 */
5246 		strict = true;
5247 		break;
5248 	case PTR_TO_SOCKET:
5249 		pointer_desc = "sock ";
5250 		break;
5251 	case PTR_TO_SOCK_COMMON:
5252 		pointer_desc = "sock_common ";
5253 		break;
5254 	case PTR_TO_TCP_SOCK:
5255 		pointer_desc = "tcp_sock ";
5256 		break;
5257 	case PTR_TO_XDP_SOCK:
5258 		pointer_desc = "xdp_sock ";
5259 		break;
5260 	case PTR_TO_ARENA:
5261 		return 0;
5262 	default:
5263 		break;
5264 	}
5265 	return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
5266 					   strict);
5267 }
5268 
bpf_enable_priv_stack(struct bpf_prog * prog)5269 static enum priv_stack_mode bpf_enable_priv_stack(struct bpf_prog *prog)
5270 {
5271 	if (!bpf_jit_supports_private_stack())
5272 		return NO_PRIV_STACK;
5273 
5274 	/* bpf_prog_check_recur() checks all prog types that use bpf trampoline
5275 	 * while kprobe/tp/perf_event/raw_tp don't use trampoline hence checked
5276 	 * explicitly.
5277 	 */
5278 	switch (prog->type) {
5279 	case BPF_PROG_TYPE_KPROBE:
5280 	case BPF_PROG_TYPE_TRACEPOINT:
5281 	case BPF_PROG_TYPE_PERF_EVENT:
5282 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
5283 		return PRIV_STACK_ADAPTIVE;
5284 	case BPF_PROG_TYPE_TRACING:
5285 	case BPF_PROG_TYPE_LSM:
5286 	case BPF_PROG_TYPE_STRUCT_OPS:
5287 		if (prog->aux->priv_stack_requested || bpf_prog_check_recur(prog))
5288 			return PRIV_STACK_ADAPTIVE;
5289 		fallthrough;
5290 	default:
5291 		break;
5292 	}
5293 
5294 	return NO_PRIV_STACK;
5295 }
5296 
round_up_stack_depth(struct bpf_verifier_env * env,int stack_depth)5297 static int round_up_stack_depth(struct bpf_verifier_env *env, int stack_depth)
5298 {
5299 	if (env->prog->jit_requested)
5300 		return round_up(stack_depth, 16);
5301 
5302 	/* round up to 32-bytes, since this is granularity
5303 	 * of interpreter stack size
5304 	 */
5305 	return round_up(max_t(u32, stack_depth, 1), 32);
5306 }
5307 
5308 /* temporary state used for call frame depth calculation */
5309 struct bpf_subprog_call_depth_info {
5310 	int ret_insn; /* caller instruction where we return to. */
5311 	int caller; /* caller subprogram idx */
5312 	int frame; /* # of consecutive static call stack frames on top of stack */
5313 };
5314 
5315 /* starting from main bpf function walk all instructions of the function
5316  * and recursively walk all callees that given function can call.
5317  * Ignore jump and exit insns.
5318  */
check_max_stack_depth_subprog(struct bpf_verifier_env * env,int idx,struct bpf_subprog_call_depth_info * dinfo,bool priv_stack_supported)5319 static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx,
5320 					 struct bpf_subprog_call_depth_info *dinfo,
5321 					 bool priv_stack_supported)
5322 {
5323 	struct bpf_subprog_info *subprog = env->subprog_info;
5324 	struct bpf_insn *insn = env->prog->insnsi;
5325 	int depth = 0, frame = 0, i, subprog_end, subprog_depth;
5326 	bool tail_call_reachable = false;
5327 	int total;
5328 	int tmp;
5329 
5330 	/* no caller idx */
5331 	dinfo[idx].caller = -1;
5332 
5333 	i = subprog[idx].start;
5334 	if (!priv_stack_supported)
5335 		subprog[idx].priv_stack_mode = NO_PRIV_STACK;
5336 process_func:
5337 	/* protect against potential stack overflow that might happen when
5338 	 * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
5339 	 * depth for such case down to 256 so that the worst case scenario
5340 	 * would result in 8k stack size (32 which is tailcall limit * 256 =
5341 	 * 8k).
5342 	 *
5343 	 * To get the idea what might happen, see an example:
5344 	 * func1 -> sub rsp, 128
5345 	 *  subfunc1 -> sub rsp, 256
5346 	 *  tailcall1 -> add rsp, 256
5347 	 *   func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320)
5348 	 *   subfunc2 -> sub rsp, 64
5349 	 *   subfunc22 -> sub rsp, 128
5350 	 *   tailcall2 -> add rsp, 128
5351 	 *    func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416)
5352 	 *
5353 	 * tailcall will unwind the current stack frame but it will not get rid
5354 	 * of caller's stack as shown on the example above.
5355 	 */
5356 	if (idx && subprog[idx].has_tail_call && depth >= 256) {
5357 		verbose(env,
5358 			"tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
5359 			depth);
5360 		return -EACCES;
5361 	}
5362 
5363 	subprog_depth = round_up_stack_depth(env, subprog[idx].stack_depth);
5364 	if (priv_stack_supported) {
5365 		/* Request private stack support only if the subprog stack
5366 		 * depth is no less than BPF_PRIV_STACK_MIN_SIZE. This is to
5367 		 * avoid jit penalty if the stack usage is small.
5368 		 */
5369 		if (subprog[idx].priv_stack_mode == PRIV_STACK_UNKNOWN &&
5370 		    subprog_depth >= BPF_PRIV_STACK_MIN_SIZE)
5371 			subprog[idx].priv_stack_mode = PRIV_STACK_ADAPTIVE;
5372 	}
5373 
5374 	if (subprog[idx].priv_stack_mode == PRIV_STACK_ADAPTIVE) {
5375 		if (subprog_depth > MAX_BPF_STACK) {
5376 			verbose(env, "stack size of subprog %d is %d. Too large\n",
5377 				idx, subprog_depth);
5378 			return -EACCES;
5379 		}
5380 	} else {
5381 		depth += subprog_depth;
5382 		if (depth > MAX_BPF_STACK) {
5383 			total = 0;
5384 			for (tmp = idx; tmp >= 0; tmp = dinfo[tmp].caller)
5385 				total++;
5386 
5387 			verbose(env, "combined stack size of %d calls is %d. Too large\n",
5388 				total, depth);
5389 			return -EACCES;
5390 		}
5391 	}
5392 continue_func:
5393 	subprog_end = subprog[idx + 1].start;
5394 	for (; i < subprog_end; i++) {
5395 		int next_insn, sidx;
5396 
5397 		if (bpf_pseudo_kfunc_call(insn + i) && !insn[i].off) {
5398 			bool err = false;
5399 
5400 			if (!is_bpf_throw_kfunc(insn + i))
5401 				continue;
5402 			for (tmp = idx; tmp >= 0 && !err; tmp = dinfo[tmp].caller) {
5403 				if (subprog[tmp].is_cb) {
5404 					err = true;
5405 					break;
5406 				}
5407 			}
5408 			if (!err)
5409 				continue;
5410 			verbose(env,
5411 				"bpf_throw kfunc (insn %d) cannot be called from callback subprog %d\n",
5412 				i, idx);
5413 			return -EINVAL;
5414 		}
5415 
5416 		if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
5417 			continue;
5418 		/* remember insn and function to return to */
5419 
5420 		/* find the callee */
5421 		next_insn = i + insn[i].imm + 1;
5422 		sidx = bpf_find_subprog(env, next_insn);
5423 		if (verifier_bug_if(sidx < 0, env, "callee not found at insn %d", next_insn))
5424 			return -EFAULT;
5425 		if (subprog[sidx].is_async_cb) {
5426 			if (subprog[sidx].has_tail_call) {
5427 				verifier_bug(env, "subprog has tail_call and async cb");
5428 				return -EFAULT;
5429 			}
5430 			/* async callbacks don't increase bpf prog stack size unless called directly */
5431 			if (!bpf_pseudo_call(insn + i))
5432 				continue;
5433 			if (subprog[sidx].is_exception_cb) {
5434 				verbose(env, "insn %d cannot call exception cb directly", i);
5435 				return -EINVAL;
5436 			}
5437 		}
5438 
5439 		/* store caller info for after we return from callee */
5440 		dinfo[idx].frame = frame;
5441 		dinfo[idx].ret_insn = i + 1;
5442 
5443 		/* push caller idx into callee's dinfo */
5444 		dinfo[sidx].caller = idx;
5445 
5446 		i = next_insn;
5447 
5448 		idx = sidx;
5449 		if (!priv_stack_supported)
5450 			subprog[idx].priv_stack_mode = NO_PRIV_STACK;
5451 
5452 		if (subprog[idx].has_tail_call)
5453 			tail_call_reachable = true;
5454 
5455 		frame = bpf_subprog_is_global(env, idx) ? 0 : frame + 1;
5456 		if (frame >= MAX_CALL_FRAMES) {
5457 			verbose(env, "the call stack of %d frames is too deep !\n",
5458 				frame);
5459 			return -E2BIG;
5460 		}
5461 		goto process_func;
5462 	}
5463 	/* if tail call got detected across bpf2bpf calls then mark each of the
5464 	 * currently present subprog frames as tail call reachable subprogs;
5465 	 * this info will be utilized by JIT so that we will be preserving the
5466 	 * tail call counter throughout bpf2bpf calls combined with tailcalls
5467 	 */
5468 	if (tail_call_reachable)
5469 		for (tmp = idx; tmp >= 0; tmp = dinfo[tmp].caller) {
5470 			if (subprog[tmp].is_exception_cb) {
5471 				verbose(env, "cannot tail call within exception cb\n");
5472 				return -EINVAL;
5473 			}
5474 			subprog[tmp].tail_call_reachable = true;
5475 		}
5476 	if (subprog[0].tail_call_reachable)
5477 		env->prog->aux->tail_call_reachable = true;
5478 
5479 	/* end of for() loop means the last insn of the 'subprog'
5480 	 * was reached. Doesn't matter whether it was JA or EXIT
5481 	 */
5482 	if (frame == 0 && dinfo[idx].caller < 0)
5483 		return 0;
5484 	if (subprog[idx].priv_stack_mode != PRIV_STACK_ADAPTIVE)
5485 		depth -= round_up_stack_depth(env, subprog[idx].stack_depth);
5486 
5487 	/* pop caller idx from callee */
5488 	idx = dinfo[idx].caller;
5489 
5490 	/* retrieve caller state from its frame */
5491 	frame = dinfo[idx].frame;
5492 	i = dinfo[idx].ret_insn;
5493 
5494 	goto continue_func;
5495 }
5496 
check_max_stack_depth(struct bpf_verifier_env * env)5497 static int check_max_stack_depth(struct bpf_verifier_env *env)
5498 {
5499 	enum priv_stack_mode priv_stack_mode = PRIV_STACK_UNKNOWN;
5500 	struct bpf_subprog_call_depth_info *dinfo;
5501 	struct bpf_subprog_info *si = env->subprog_info;
5502 	bool priv_stack_supported;
5503 	int ret;
5504 
5505 	dinfo = kvcalloc(env->subprog_cnt, sizeof(*dinfo), GFP_KERNEL_ACCOUNT);
5506 	if (!dinfo)
5507 		return -ENOMEM;
5508 
5509 	for (int i = 0; i < env->subprog_cnt; i++) {
5510 		if (si[i].has_tail_call) {
5511 			priv_stack_mode = NO_PRIV_STACK;
5512 			break;
5513 		}
5514 	}
5515 
5516 	if (priv_stack_mode == PRIV_STACK_UNKNOWN)
5517 		priv_stack_mode = bpf_enable_priv_stack(env->prog);
5518 
5519 	/* All async_cb subprogs use normal kernel stack. If a particular
5520 	 * subprog appears in both main prog and async_cb subtree, that
5521 	 * subprog will use normal kernel stack to avoid potential nesting.
5522 	 * The reverse subprog traversal ensures when main prog subtree is
5523 	 * checked, the subprogs appearing in async_cb subtrees are already
5524 	 * marked as using normal kernel stack, so stack size checking can
5525 	 * be done properly.
5526 	 */
5527 	for (int i = env->subprog_cnt - 1; i >= 0; i--) {
5528 		if (!i || si[i].is_async_cb) {
5529 			priv_stack_supported = !i && priv_stack_mode == PRIV_STACK_ADAPTIVE;
5530 			ret = check_max_stack_depth_subprog(env, i, dinfo,
5531 					priv_stack_supported);
5532 			if (ret < 0) {
5533 				kvfree(dinfo);
5534 				return ret;
5535 			}
5536 		}
5537 	}
5538 
5539 	for (int i = 0; i < env->subprog_cnt; i++) {
5540 		if (si[i].priv_stack_mode == PRIV_STACK_ADAPTIVE) {
5541 			env->prog->aux->jits_use_priv_stack = true;
5542 			break;
5543 		}
5544 	}
5545 
5546 	kvfree(dinfo);
5547 
5548 	return 0;
5549 }
5550 
__check_buffer_access(struct bpf_verifier_env * env,const char * buf_info,const struct bpf_reg_state * reg,int regno,int off,int size)5551 static int __check_buffer_access(struct bpf_verifier_env *env,
5552 				 const char *buf_info,
5553 				 const struct bpf_reg_state *reg,
5554 				 int regno, int off, int size)
5555 {
5556 	if (off < 0) {
5557 		verbose(env,
5558 			"R%d invalid %s buffer access: off=%d, size=%d\n",
5559 			regno, buf_info, off, size);
5560 		return -EACCES;
5561 	}
5562 	if (!tnum_is_const(reg->var_off)) {
5563 		char tn_buf[48];
5564 
5565 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5566 		verbose(env,
5567 			"R%d invalid variable buffer offset: off=%d, var_off=%s\n",
5568 			regno, off, tn_buf);
5569 		return -EACCES;
5570 	}
5571 
5572 	return 0;
5573 }
5574 
check_tp_buffer_access(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,int regno,int off,int size)5575 static int check_tp_buffer_access(struct bpf_verifier_env *env,
5576 				  const struct bpf_reg_state *reg,
5577 				  int regno, int off, int size)
5578 {
5579 	int err;
5580 
5581 	err = __check_buffer_access(env, "tracepoint", reg, regno, off, size);
5582 	if (err)
5583 		return err;
5584 
5585 	env->prog->aux->max_tp_access = max(reg->var_off.value + off + size,
5586 					    env->prog->aux->max_tp_access);
5587 
5588 	return 0;
5589 }
5590 
check_buffer_access(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,int regno,int off,int size,bool zero_size_allowed,u32 * max_access)5591 static int check_buffer_access(struct bpf_verifier_env *env,
5592 			       const struct bpf_reg_state *reg,
5593 			       int regno, int off, int size,
5594 			       bool zero_size_allowed,
5595 			       u32 *max_access)
5596 {
5597 	const char *buf_info = type_is_rdonly_mem(reg->type) ? "rdonly" : "rdwr";
5598 	int err;
5599 
5600 	err = __check_buffer_access(env, buf_info, reg, regno, off, size);
5601 	if (err)
5602 		return err;
5603 
5604 	*max_access = max(reg->var_off.value + off + size, *max_access);
5605 
5606 	return 0;
5607 }
5608 
5609 /* BPF architecture zero extends alu32 ops into 64-bit registesr */
zext_32_to_64(struct bpf_reg_state * reg)5610 static void zext_32_to_64(struct bpf_reg_state *reg)
5611 {
5612 	reg->var_off = tnum_subreg(reg->var_off);
5613 	__reg_assign_32_into_64(reg);
5614 }
5615 
5616 /* truncate register to smaller size (in bytes)
5617  * must be called with size < BPF_REG_SIZE
5618  */
coerce_reg_to_size(struct bpf_reg_state * reg,int size)5619 static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
5620 {
5621 	u64 mask;
5622 
5623 	/* clear high bits in bit representation */
5624 	reg->var_off = tnum_cast(reg->var_off, size);
5625 
5626 	/* fix arithmetic bounds */
5627 	mask = ((u64)1 << (size * 8)) - 1;
5628 	if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
5629 		reg->umin_value &= mask;
5630 		reg->umax_value &= mask;
5631 	} else {
5632 		reg->umin_value = 0;
5633 		reg->umax_value = mask;
5634 	}
5635 	reg->smin_value = reg->umin_value;
5636 	reg->smax_value = reg->umax_value;
5637 
5638 	/* If size is smaller than 32bit register the 32bit register
5639 	 * values are also truncated so we push 64-bit bounds into
5640 	 * 32-bit bounds. Above were truncated < 32-bits already.
5641 	 */
5642 	if (size < 4)
5643 		__mark_reg32_unbounded(reg);
5644 
5645 	reg_bounds_sync(reg);
5646 }
5647 
set_sext64_default_val(struct bpf_reg_state * reg,int size)5648 static void set_sext64_default_val(struct bpf_reg_state *reg, int size)
5649 {
5650 	if (size == 1) {
5651 		reg->smin_value = reg->s32_min_value = S8_MIN;
5652 		reg->smax_value = reg->s32_max_value = S8_MAX;
5653 	} else if (size == 2) {
5654 		reg->smin_value = reg->s32_min_value = S16_MIN;
5655 		reg->smax_value = reg->s32_max_value = S16_MAX;
5656 	} else {
5657 		/* size == 4 */
5658 		reg->smin_value = reg->s32_min_value = S32_MIN;
5659 		reg->smax_value = reg->s32_max_value = S32_MAX;
5660 	}
5661 	reg->umin_value = reg->u32_min_value = 0;
5662 	reg->umax_value = U64_MAX;
5663 	reg->u32_max_value = U32_MAX;
5664 	reg->var_off = tnum_unknown;
5665 }
5666 
coerce_reg_to_size_sx(struct bpf_reg_state * reg,int size)5667 static void coerce_reg_to_size_sx(struct bpf_reg_state *reg, int size)
5668 {
5669 	s64 init_s64_max, init_s64_min, s64_max, s64_min, u64_cval;
5670 	u64 top_smax_value, top_smin_value;
5671 	u64 num_bits = size * 8;
5672 
5673 	if (tnum_is_const(reg->var_off)) {
5674 		u64_cval = reg->var_off.value;
5675 		if (size == 1)
5676 			reg->var_off = tnum_const((s8)u64_cval);
5677 		else if (size == 2)
5678 			reg->var_off = tnum_const((s16)u64_cval);
5679 		else
5680 			/* size == 4 */
5681 			reg->var_off = tnum_const((s32)u64_cval);
5682 
5683 		u64_cval = reg->var_off.value;
5684 		reg->smax_value = reg->smin_value = u64_cval;
5685 		reg->umax_value = reg->umin_value = u64_cval;
5686 		reg->s32_max_value = reg->s32_min_value = u64_cval;
5687 		reg->u32_max_value = reg->u32_min_value = u64_cval;
5688 		return;
5689 	}
5690 
5691 	top_smax_value = ((u64)reg->smax_value >> num_bits) << num_bits;
5692 	top_smin_value = ((u64)reg->smin_value >> num_bits) << num_bits;
5693 
5694 	if (top_smax_value != top_smin_value)
5695 		goto out;
5696 
5697 	/* find the s64_min and s64_min after sign extension */
5698 	if (size == 1) {
5699 		init_s64_max = (s8)reg->smax_value;
5700 		init_s64_min = (s8)reg->smin_value;
5701 	} else if (size == 2) {
5702 		init_s64_max = (s16)reg->smax_value;
5703 		init_s64_min = (s16)reg->smin_value;
5704 	} else {
5705 		init_s64_max = (s32)reg->smax_value;
5706 		init_s64_min = (s32)reg->smin_value;
5707 	}
5708 
5709 	s64_max = max(init_s64_max, init_s64_min);
5710 	s64_min = min(init_s64_max, init_s64_min);
5711 
5712 	/* both of s64_max/s64_min positive or negative */
5713 	if ((s64_max >= 0) == (s64_min >= 0)) {
5714 		reg->s32_min_value = reg->smin_value = s64_min;
5715 		reg->s32_max_value = reg->smax_value = s64_max;
5716 		reg->u32_min_value = reg->umin_value = s64_min;
5717 		reg->u32_max_value = reg->umax_value = s64_max;
5718 		reg->var_off = tnum_range(s64_min, s64_max);
5719 		return;
5720 	}
5721 
5722 out:
5723 	set_sext64_default_val(reg, size);
5724 }
5725 
set_sext32_default_val(struct bpf_reg_state * reg,int size)5726 static void set_sext32_default_val(struct bpf_reg_state *reg, int size)
5727 {
5728 	if (size == 1) {
5729 		reg->s32_min_value = S8_MIN;
5730 		reg->s32_max_value = S8_MAX;
5731 	} else {
5732 		/* size == 2 */
5733 		reg->s32_min_value = S16_MIN;
5734 		reg->s32_max_value = S16_MAX;
5735 	}
5736 	reg->u32_min_value = 0;
5737 	reg->u32_max_value = U32_MAX;
5738 	reg->var_off = tnum_subreg(tnum_unknown);
5739 }
5740 
coerce_subreg_to_size_sx(struct bpf_reg_state * reg,int size)5741 static void coerce_subreg_to_size_sx(struct bpf_reg_state *reg, int size)
5742 {
5743 	s32 init_s32_max, init_s32_min, s32_max, s32_min, u32_val;
5744 	u32 top_smax_value, top_smin_value;
5745 	u32 num_bits = size * 8;
5746 
5747 	if (tnum_is_const(reg->var_off)) {
5748 		u32_val = reg->var_off.value;
5749 		if (size == 1)
5750 			reg->var_off = tnum_const((s8)u32_val);
5751 		else
5752 			reg->var_off = tnum_const((s16)u32_val);
5753 
5754 		u32_val = reg->var_off.value;
5755 		reg->s32_min_value = reg->s32_max_value = u32_val;
5756 		reg->u32_min_value = reg->u32_max_value = u32_val;
5757 		return;
5758 	}
5759 
5760 	top_smax_value = ((u32)reg->s32_max_value >> num_bits) << num_bits;
5761 	top_smin_value = ((u32)reg->s32_min_value >> num_bits) << num_bits;
5762 
5763 	if (top_smax_value != top_smin_value)
5764 		goto out;
5765 
5766 	/* find the s32_min and s32_min after sign extension */
5767 	if (size == 1) {
5768 		init_s32_max = (s8)reg->s32_max_value;
5769 		init_s32_min = (s8)reg->s32_min_value;
5770 	} else {
5771 		/* size == 2 */
5772 		init_s32_max = (s16)reg->s32_max_value;
5773 		init_s32_min = (s16)reg->s32_min_value;
5774 	}
5775 	s32_max = max(init_s32_max, init_s32_min);
5776 	s32_min = min(init_s32_max, init_s32_min);
5777 
5778 	if ((s32_min >= 0) == (s32_max >= 0)) {
5779 		reg->s32_min_value = s32_min;
5780 		reg->s32_max_value = s32_max;
5781 		reg->u32_min_value = (u32)s32_min;
5782 		reg->u32_max_value = (u32)s32_max;
5783 		reg->var_off = tnum_subreg(tnum_range(s32_min, s32_max));
5784 		return;
5785 	}
5786 
5787 out:
5788 	set_sext32_default_val(reg, size);
5789 }
5790 
bpf_map_is_rdonly(const struct bpf_map * map)5791 bool bpf_map_is_rdonly(const struct bpf_map *map)
5792 {
5793 	/* A map is considered read-only if the following condition are true:
5794 	 *
5795 	 * 1) BPF program side cannot change any of the map content. The
5796 	 *    BPF_F_RDONLY_PROG flag is throughout the lifetime of a map
5797 	 *    and was set at map creation time.
5798 	 * 2) The map value(s) have been initialized from user space by a
5799 	 *    loader and then "frozen", such that no new map update/delete
5800 	 *    operations from syscall side are possible for the rest of
5801 	 *    the map's lifetime from that point onwards.
5802 	 * 3) Any parallel/pending map update/delete operations from syscall
5803 	 *    side have been completed. Only after that point, it's safe to
5804 	 *    assume that map value(s) are immutable.
5805 	 */
5806 	return (map->map_flags & BPF_F_RDONLY_PROG) &&
5807 	       READ_ONCE(map->frozen) &&
5808 	       !bpf_map_write_active(map);
5809 }
5810 
bpf_map_direct_read(struct bpf_map * map,int off,int size,u64 * val,bool is_ldsx)5811 int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val,
5812 			bool is_ldsx)
5813 {
5814 	void *ptr;
5815 	u64 addr;
5816 	int err;
5817 
5818 	err = map->ops->map_direct_value_addr(map, &addr, off);
5819 	if (err)
5820 		return err;
5821 	ptr = (void *)(long)addr + off;
5822 
5823 	switch (size) {
5824 	case sizeof(u8):
5825 		*val = is_ldsx ? (s64)*(s8 *)ptr : (u64)*(u8 *)ptr;
5826 		break;
5827 	case sizeof(u16):
5828 		*val = is_ldsx ? (s64)*(s16 *)ptr : (u64)*(u16 *)ptr;
5829 		break;
5830 	case sizeof(u32):
5831 		*val = is_ldsx ? (s64)*(s32 *)ptr : (u64)*(u32 *)ptr;
5832 		break;
5833 	case sizeof(u64):
5834 		*val = *(u64 *)ptr;
5835 		break;
5836 	default:
5837 		return -EINVAL;
5838 	}
5839 	return 0;
5840 }
5841 
5842 #define BTF_TYPE_SAFE_RCU(__type)  __PASTE(__type, __safe_rcu)
5843 #define BTF_TYPE_SAFE_RCU_OR_NULL(__type)  __PASTE(__type, __safe_rcu_or_null)
5844 #define BTF_TYPE_SAFE_TRUSTED(__type)  __PASTE(__type, __safe_trusted)
5845 #define BTF_TYPE_SAFE_TRUSTED_OR_NULL(__type)  __PASTE(__type, __safe_trusted_or_null)
5846 
5847 /*
5848  * Allow list few fields as RCU trusted or full trusted.
5849  * This logic doesn't allow mix tagging and will be removed once GCC supports
5850  * btf_type_tag.
5851  */
5852 
5853 /* RCU trusted: these fields are trusted in RCU CS and never NULL */
BTF_TYPE_SAFE_RCU(struct task_struct)5854 BTF_TYPE_SAFE_RCU(struct task_struct) {
5855 	const cpumask_t *cpus_ptr;
5856 	struct css_set __rcu *cgroups;
5857 	struct task_struct __rcu *real_parent;
5858 	struct task_struct *group_leader;
5859 };
5860 
BTF_TYPE_SAFE_RCU(struct cgroup)5861 BTF_TYPE_SAFE_RCU(struct cgroup) {
5862 	/* cgrp->kn is always accessible as documented in kernel/cgroup/cgroup.c */
5863 	struct kernfs_node *kn;
5864 };
5865 
BTF_TYPE_SAFE_RCU(struct css_set)5866 BTF_TYPE_SAFE_RCU(struct css_set) {
5867 	struct cgroup *dfl_cgrp;
5868 };
5869 
BTF_TYPE_SAFE_RCU(struct cgroup_subsys_state)5870 BTF_TYPE_SAFE_RCU(struct cgroup_subsys_state) {
5871 	struct cgroup *cgroup;
5872 };
5873 
5874 /* RCU trusted: these fields are trusted in RCU CS and can be NULL */
BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct)5875 BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct) {
5876 	struct file __rcu *exe_file;
5877 #ifdef CONFIG_MEMCG
5878 	struct task_struct __rcu *owner;
5879 #endif
5880 };
5881 
5882 /* skb->sk, req->sk are not RCU protected, but we mark them as such
5883  * because bpf prog accessible sockets are SOCK_RCU_FREE.
5884  */
BTF_TYPE_SAFE_RCU_OR_NULL(struct sk_buff)5885 BTF_TYPE_SAFE_RCU_OR_NULL(struct sk_buff) {
5886 	struct sock *sk;
5887 };
5888 
BTF_TYPE_SAFE_RCU_OR_NULL(struct request_sock)5889 BTF_TYPE_SAFE_RCU_OR_NULL(struct request_sock) {
5890 	struct sock *sk;
5891 };
5892 
5893 /* full trusted: these fields are trusted even outside of RCU CS and never NULL */
BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta)5894 BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta) {
5895 	struct seq_file *seq;
5896 };
5897 
BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task)5898 BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task) {
5899 	struct bpf_iter_meta *meta;
5900 	struct task_struct *task;
5901 };
5902 
BTF_TYPE_SAFE_TRUSTED(struct linux_binprm)5903 BTF_TYPE_SAFE_TRUSTED(struct linux_binprm) {
5904 	struct file *file;
5905 };
5906 
BTF_TYPE_SAFE_TRUSTED(struct file)5907 BTF_TYPE_SAFE_TRUSTED(struct file) {
5908 	struct inode *f_inode;
5909 };
5910 
BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct dentry)5911 BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct dentry) {
5912 	struct inode *d_inode;
5913 };
5914 
BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket)5915 BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket) {
5916 	struct sock *sk;
5917 };
5918 
BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct vm_area_struct)5919 BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct vm_area_struct) {
5920 	struct mm_struct *vm_mm;
5921 	struct file *vm_file;
5922 };
5923 
type_is_rcu(struct bpf_verifier_env * env,struct bpf_reg_state * reg,const char * field_name,u32 btf_id)5924 static bool type_is_rcu(struct bpf_verifier_env *env,
5925 			struct bpf_reg_state *reg,
5926 			const char *field_name, u32 btf_id)
5927 {
5928 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct task_struct));
5929 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct cgroup));
5930 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct css_set));
5931 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct cgroup_subsys_state));
5932 
5933 	return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_rcu");
5934 }
5935 
type_is_rcu_or_null(struct bpf_verifier_env * env,struct bpf_reg_state * reg,const char * field_name,u32 btf_id)5936 static bool type_is_rcu_or_null(struct bpf_verifier_env *env,
5937 				struct bpf_reg_state *reg,
5938 				const char *field_name, u32 btf_id)
5939 {
5940 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct));
5941 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct sk_buff));
5942 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct request_sock));
5943 
5944 	return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_rcu_or_null");
5945 }
5946 
type_is_trusted(struct bpf_verifier_env * env,struct bpf_reg_state * reg,const char * field_name,u32 btf_id)5947 static bool type_is_trusted(struct bpf_verifier_env *env,
5948 			    struct bpf_reg_state *reg,
5949 			    const char *field_name, u32 btf_id)
5950 {
5951 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta));
5952 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task));
5953 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct linux_binprm));
5954 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct file));
5955 
5956 	return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_trusted");
5957 }
5958 
type_is_trusted_or_null(struct bpf_verifier_env * env,struct bpf_reg_state * reg,const char * field_name,u32 btf_id)5959 static bool type_is_trusted_or_null(struct bpf_verifier_env *env,
5960 				    struct bpf_reg_state *reg,
5961 				    const char *field_name, u32 btf_id)
5962 {
5963 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket));
5964 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct dentry));
5965 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct vm_area_struct));
5966 
5967 	return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id,
5968 					  "__safe_trusted_or_null");
5969 }
5970 
check_ptr_to_btf_access(struct bpf_verifier_env * env,struct bpf_reg_state * regs,int regno,int off,int size,enum bpf_access_type atype,int value_regno)5971 static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
5972 				   struct bpf_reg_state *regs,
5973 				   int regno, int off, int size,
5974 				   enum bpf_access_type atype,
5975 				   int value_regno)
5976 {
5977 	struct bpf_reg_state *reg = regs + regno;
5978 	const struct btf_type *t = btf_type_by_id(reg->btf, reg->btf_id);
5979 	const char *tname = btf_name_by_offset(reg->btf, t->name_off);
5980 	const char *field_name = NULL;
5981 	enum bpf_type_flag flag = 0;
5982 	u32 btf_id = 0;
5983 	int ret;
5984 
5985 	if (!env->allow_ptr_leaks) {
5986 		verbose(env,
5987 			"'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
5988 			tname);
5989 		return -EPERM;
5990 	}
5991 	if (!env->prog->gpl_compatible && btf_is_kernel(reg->btf)) {
5992 		verbose(env,
5993 			"Cannot access kernel 'struct %s' from non-GPL compatible program\n",
5994 			tname);
5995 		return -EINVAL;
5996 	}
5997 
5998 	if (!tnum_is_const(reg->var_off)) {
5999 		char tn_buf[48];
6000 
6001 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
6002 		verbose(env,
6003 			"R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n",
6004 			regno, tname, off, tn_buf);
6005 		return -EACCES;
6006 	}
6007 
6008 	off += reg->var_off.value;
6009 
6010 	if (off < 0) {
6011 		verbose(env,
6012 			"R%d is ptr_%s invalid negative access: off=%d\n",
6013 			regno, tname, off);
6014 		return -EACCES;
6015 	}
6016 
6017 	if (reg->type & MEM_USER) {
6018 		verbose(env,
6019 			"R%d is ptr_%s access user memory: off=%d\n",
6020 			regno, tname, off);
6021 		return -EACCES;
6022 	}
6023 
6024 	if (reg->type & MEM_PERCPU) {
6025 		verbose(env,
6026 			"R%d is ptr_%s access percpu memory: off=%d\n",
6027 			regno, tname, off);
6028 		return -EACCES;
6029 	}
6030 
6031 	if (env->ops->btf_struct_access && !type_is_alloc(reg->type) && atype == BPF_WRITE) {
6032 		if (!btf_is_kernel(reg->btf)) {
6033 			verifier_bug(env, "reg->btf must be kernel btf");
6034 			return -EFAULT;
6035 		}
6036 		ret = env->ops->btf_struct_access(&env->log, reg, off, size);
6037 	} else {
6038 		/* Writes are permitted with default btf_struct_access for
6039 		 * program allocated objects (which always have ref_obj_id > 0),
6040 		 * but not for untrusted PTR_TO_BTF_ID | MEM_ALLOC.
6041 		 */
6042 		if (atype != BPF_READ && !type_is_ptr_alloc_obj(reg->type)) {
6043 			verbose(env, "only read is supported\n");
6044 			return -EACCES;
6045 		}
6046 
6047 		if (type_is_alloc(reg->type) && !type_is_non_owning_ref(reg->type) &&
6048 		    !(reg->type & MEM_RCU) && !reg->ref_obj_id) {
6049 			verifier_bug(env, "ref_obj_id for allocated object must be non-zero");
6050 			return -EFAULT;
6051 		}
6052 
6053 		ret = btf_struct_access(&env->log, reg, off, size, atype, &btf_id, &flag, &field_name);
6054 	}
6055 
6056 	if (ret < 0)
6057 		return ret;
6058 
6059 	if (ret != PTR_TO_BTF_ID) {
6060 		/* just mark; */
6061 
6062 	} else if (type_flag(reg->type) & PTR_UNTRUSTED) {
6063 		/* If this is an untrusted pointer, all pointers formed by walking it
6064 		 * also inherit the untrusted flag.
6065 		 */
6066 		flag = PTR_UNTRUSTED;
6067 
6068 	} else if (is_trusted_reg(reg) || is_rcu_reg(reg)) {
6069 		/* By default any pointer obtained from walking a trusted pointer is no
6070 		 * longer trusted, unless the field being accessed has explicitly been
6071 		 * marked as inheriting its parent's state of trust (either full or RCU).
6072 		 * For example:
6073 		 * 'cgroups' pointer is untrusted if task->cgroups dereference
6074 		 * happened in a sleepable program outside of bpf_rcu_read_lock()
6075 		 * section. In a non-sleepable program it's trusted while in RCU CS (aka MEM_RCU).
6076 		 * Note bpf_rcu_read_unlock() converts MEM_RCU pointers to PTR_UNTRUSTED.
6077 		 *
6078 		 * A regular RCU-protected pointer with __rcu tag can also be deemed
6079 		 * trusted if we are in an RCU CS. Such pointer can be NULL.
6080 		 */
6081 		if (type_is_trusted(env, reg, field_name, btf_id)) {
6082 			flag |= PTR_TRUSTED;
6083 		} else if (type_is_trusted_or_null(env, reg, field_name, btf_id)) {
6084 			flag |= PTR_TRUSTED | PTR_MAYBE_NULL;
6085 		} else if (in_rcu_cs(env) && !type_may_be_null(reg->type)) {
6086 			if (type_is_rcu(env, reg, field_name, btf_id)) {
6087 				/* ignore __rcu tag and mark it MEM_RCU */
6088 				flag |= MEM_RCU;
6089 			} else if (flag & MEM_RCU ||
6090 				   type_is_rcu_or_null(env, reg, field_name, btf_id)) {
6091 				/* __rcu tagged pointers can be NULL */
6092 				flag |= MEM_RCU | PTR_MAYBE_NULL;
6093 
6094 				/* We always trust them */
6095 				if (type_is_rcu_or_null(env, reg, field_name, btf_id) &&
6096 				    flag & PTR_UNTRUSTED)
6097 					flag &= ~PTR_UNTRUSTED;
6098 			} else if (flag & (MEM_PERCPU | MEM_USER)) {
6099 				/* keep as-is */
6100 			} else {
6101 				/* walking unknown pointers yields old deprecated PTR_TO_BTF_ID */
6102 				clear_trusted_flags(&flag);
6103 			}
6104 		} else {
6105 			/*
6106 			 * If not in RCU CS or MEM_RCU pointer can be NULL then
6107 			 * aggressively mark as untrusted otherwise such
6108 			 * pointers will be plain PTR_TO_BTF_ID without flags
6109 			 * and will be allowed to be passed into helpers for
6110 			 * compat reasons.
6111 			 */
6112 			flag = PTR_UNTRUSTED;
6113 		}
6114 	} else {
6115 		/* Old compat. Deprecated */
6116 		clear_trusted_flags(&flag);
6117 	}
6118 
6119 	if (atype == BPF_READ && value_regno >= 0) {
6120 		ret = mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag);
6121 		if (ret < 0)
6122 			return ret;
6123 	}
6124 
6125 	return 0;
6126 }
6127 
check_ptr_to_map_access(struct bpf_verifier_env * env,struct bpf_reg_state * regs,int regno,int off,int size,enum bpf_access_type atype,int value_regno)6128 static int check_ptr_to_map_access(struct bpf_verifier_env *env,
6129 				   struct bpf_reg_state *regs,
6130 				   int regno, int off, int size,
6131 				   enum bpf_access_type atype,
6132 				   int value_regno)
6133 {
6134 	struct bpf_reg_state *reg = regs + regno;
6135 	struct bpf_map *map = reg->map_ptr;
6136 	struct bpf_reg_state map_reg;
6137 	enum bpf_type_flag flag = 0;
6138 	const struct btf_type *t;
6139 	const char *tname;
6140 	u32 btf_id;
6141 	int ret;
6142 
6143 	if (!btf_vmlinux) {
6144 		verbose(env, "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n");
6145 		return -ENOTSUPP;
6146 	}
6147 
6148 	if (!map->ops->map_btf_id || !*map->ops->map_btf_id) {
6149 		verbose(env, "map_ptr access not supported for map type %d\n",
6150 			map->map_type);
6151 		return -ENOTSUPP;
6152 	}
6153 
6154 	t = btf_type_by_id(btf_vmlinux, *map->ops->map_btf_id);
6155 	tname = btf_name_by_offset(btf_vmlinux, t->name_off);
6156 
6157 	if (!env->allow_ptr_leaks) {
6158 		verbose(env,
6159 			"'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
6160 			tname);
6161 		return -EPERM;
6162 	}
6163 
6164 	if (off < 0) {
6165 		verbose(env, "R%d is %s invalid negative access: off=%d\n",
6166 			regno, tname, off);
6167 		return -EACCES;
6168 	}
6169 
6170 	if (atype != BPF_READ) {
6171 		verbose(env, "only read from %s is supported\n", tname);
6172 		return -EACCES;
6173 	}
6174 
6175 	/* Simulate access to a PTR_TO_BTF_ID */
6176 	memset(&map_reg, 0, sizeof(map_reg));
6177 	ret = mark_btf_ld_reg(env, &map_reg, 0, PTR_TO_BTF_ID,
6178 			      btf_vmlinux, *map->ops->map_btf_id, 0);
6179 	if (ret < 0)
6180 		return ret;
6181 	ret = btf_struct_access(&env->log, &map_reg, off, size, atype, &btf_id, &flag, NULL);
6182 	if (ret < 0)
6183 		return ret;
6184 
6185 	if (value_regno >= 0) {
6186 		ret = mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id, flag);
6187 		if (ret < 0)
6188 			return ret;
6189 	}
6190 
6191 	return 0;
6192 }
6193 
6194 /* Check that the stack access at the given offset is within bounds. The
6195  * maximum valid offset is -1.
6196  *
6197  * The minimum valid offset is -MAX_BPF_STACK for writes, and
6198  * -state->allocated_stack for reads.
6199  */
check_stack_slot_within_bounds(struct bpf_verifier_env * env,s64 off,struct bpf_func_state * state,enum bpf_access_type t)6200 static int check_stack_slot_within_bounds(struct bpf_verifier_env *env,
6201                                           s64 off,
6202                                           struct bpf_func_state *state,
6203                                           enum bpf_access_type t)
6204 {
6205 	int min_valid_off;
6206 
6207 	if (t == BPF_WRITE || env->allow_uninit_stack)
6208 		min_valid_off = -MAX_BPF_STACK;
6209 	else
6210 		min_valid_off = -state->allocated_stack;
6211 
6212 	if (off < min_valid_off || off > -1)
6213 		return -EACCES;
6214 	return 0;
6215 }
6216 
6217 /* Check that the stack access at 'regno + off' falls within the maximum stack
6218  * bounds.
6219  *
6220  * 'off' includes `regno->offset`, but not its dynamic part (if any).
6221  */
check_stack_access_within_bounds(struct bpf_verifier_env * env,int regno,int off,int access_size,enum bpf_access_type type)6222 static int check_stack_access_within_bounds(
6223 		struct bpf_verifier_env *env,
6224 		int regno, int off, int access_size,
6225 		enum bpf_access_type type)
6226 {
6227 	struct bpf_reg_state *reg = reg_state(env, regno);
6228 	struct bpf_func_state *state = bpf_func(env, reg);
6229 	s64 min_off, max_off;
6230 	int err;
6231 	char *err_extra;
6232 
6233 	if (type == BPF_READ)
6234 		err_extra = " read from";
6235 	else
6236 		err_extra = " write to";
6237 
6238 	if (tnum_is_const(reg->var_off)) {
6239 		min_off = (s64)reg->var_off.value + off;
6240 		max_off = min_off + access_size;
6241 	} else {
6242 		if (reg->smax_value >= BPF_MAX_VAR_OFF ||
6243 		    reg->smin_value <= -BPF_MAX_VAR_OFF) {
6244 			verbose(env, "invalid unbounded variable-offset%s stack R%d\n",
6245 				err_extra, regno);
6246 			return -EACCES;
6247 		}
6248 		min_off = reg->smin_value + off;
6249 		max_off = reg->smax_value + off + access_size;
6250 	}
6251 
6252 	err = check_stack_slot_within_bounds(env, min_off, state, type);
6253 	if (!err && max_off > 0)
6254 		err = -EINVAL; /* out of stack access into non-negative offsets */
6255 	if (!err && access_size < 0)
6256 		/* access_size should not be negative (or overflow an int); others checks
6257 		 * along the way should have prevented such an access.
6258 		 */
6259 		err = -EFAULT; /* invalid negative access size; integer overflow? */
6260 
6261 	if (err) {
6262 		if (tnum_is_const(reg->var_off)) {
6263 			verbose(env, "invalid%s stack R%d off=%lld size=%d\n",
6264 				err_extra, regno, min_off, access_size);
6265 		} else {
6266 			char tn_buf[48];
6267 
6268 			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
6269 			verbose(env, "invalid variable-offset%s stack R%d var_off=%s off=%d size=%d\n",
6270 				err_extra, regno, tn_buf, off, access_size);
6271 		}
6272 		return err;
6273 	}
6274 
6275 	/* Note that there is no stack access with offset zero, so the needed stack
6276 	 * size is -min_off, not -min_off+1.
6277 	 */
6278 	return grow_stack_state(env, state, -min_off /* size */);
6279 }
6280 
get_func_retval_range(struct bpf_prog * prog,struct bpf_retval_range * range)6281 static bool get_func_retval_range(struct bpf_prog *prog,
6282 				  struct bpf_retval_range *range)
6283 {
6284 	if (prog->type == BPF_PROG_TYPE_LSM &&
6285 		prog->expected_attach_type == BPF_LSM_MAC &&
6286 		!bpf_lsm_get_retval_range(prog, range)) {
6287 		return true;
6288 	}
6289 	return false;
6290 }
6291 
add_scalar_to_reg(struct bpf_reg_state * dst_reg,s64 val)6292 static void add_scalar_to_reg(struct bpf_reg_state *dst_reg, s64 val)
6293 {
6294 	struct bpf_reg_state fake_reg;
6295 
6296 	if (!val)
6297 		return;
6298 
6299 	fake_reg.type = SCALAR_VALUE;
6300 	__mark_reg_known(&fake_reg, val);
6301 
6302 	scalar32_min_max_add(dst_reg, &fake_reg);
6303 	scalar_min_max_add(dst_reg, &fake_reg);
6304 	dst_reg->var_off = tnum_add(dst_reg->var_off, fake_reg.var_off);
6305 
6306 	reg_bounds_sync(dst_reg);
6307 }
6308 
6309 /* check whether memory at (regno + off) is accessible for t = (read | write)
6310  * if t==write, value_regno is a register which value is stored into memory
6311  * if t==read, value_regno is a register which will receive the value from memory
6312  * if t==write && value_regno==-1, some unknown value is stored into memory
6313  * if t==read && value_regno==-1, don't care what we read from memory
6314  */
check_mem_access(struct bpf_verifier_env * env,int insn_idx,u32 regno,int off,int bpf_size,enum bpf_access_type t,int value_regno,bool strict_alignment_once,bool is_ldsx)6315 static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
6316 			    int off, int bpf_size, enum bpf_access_type t,
6317 			    int value_regno, bool strict_alignment_once, bool is_ldsx)
6318 {
6319 	struct bpf_reg_state *regs = cur_regs(env);
6320 	struct bpf_reg_state *reg = regs + regno;
6321 	int size, err = 0;
6322 
6323 	size = bpf_size_to_bytes(bpf_size);
6324 	if (size < 0)
6325 		return size;
6326 
6327 	err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
6328 	if (err)
6329 		return err;
6330 
6331 	if (reg->type == PTR_TO_MAP_KEY) {
6332 		if (t == BPF_WRITE) {
6333 			verbose(env, "write to change key R%d not allowed\n", regno);
6334 			return -EACCES;
6335 		}
6336 
6337 		err = check_mem_region_access(env, regno, off, size,
6338 					      reg->map_ptr->key_size, false);
6339 		if (err)
6340 			return err;
6341 		if (value_regno >= 0)
6342 			mark_reg_unknown(env, regs, value_regno);
6343 	} else if (reg->type == PTR_TO_MAP_VALUE) {
6344 		struct btf_field *kptr_field = NULL;
6345 
6346 		if (t == BPF_WRITE && value_regno >= 0 &&
6347 		    is_pointer_value(env, value_regno)) {
6348 			verbose(env, "R%d leaks addr into map\n", value_regno);
6349 			return -EACCES;
6350 		}
6351 		err = check_map_access_type(env, regno, off, size, t);
6352 		if (err)
6353 			return err;
6354 		err = check_map_access(env, regno, off, size, false, ACCESS_DIRECT);
6355 		if (err)
6356 			return err;
6357 		if (tnum_is_const(reg->var_off))
6358 			kptr_field = btf_record_find(reg->map_ptr->record,
6359 						     off + reg->var_off.value, BPF_KPTR | BPF_UPTR);
6360 		if (kptr_field) {
6361 			err = check_map_kptr_access(env, regno, value_regno, insn_idx, kptr_field);
6362 		} else if (t == BPF_READ && value_regno >= 0) {
6363 			struct bpf_map *map = reg->map_ptr;
6364 
6365 			/*
6366 			 * If map is read-only, track its contents as scalars,
6367 			 * unless it is an insn array (see the special case below)
6368 			 */
6369 			if (tnum_is_const(reg->var_off) &&
6370 			    bpf_map_is_rdonly(map) &&
6371 			    map->ops->map_direct_value_addr &&
6372 			    map->map_type != BPF_MAP_TYPE_INSN_ARRAY) {
6373 				int map_off = off + reg->var_off.value;
6374 				u64 val = 0;
6375 
6376 				err = bpf_map_direct_read(map, map_off, size,
6377 							  &val, is_ldsx);
6378 				if (err)
6379 					return err;
6380 
6381 				regs[value_regno].type = SCALAR_VALUE;
6382 				__mark_reg_known(&regs[value_regno], val);
6383 			} else if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY) {
6384 				if (bpf_size != BPF_DW) {
6385 					verbose(env, "Invalid read of %d bytes from insn_array\n",
6386 						     size);
6387 					return -EACCES;
6388 				}
6389 				copy_register_state(&regs[value_regno], reg);
6390 				add_scalar_to_reg(&regs[value_regno], off);
6391 				regs[value_regno].type = PTR_TO_INSN;
6392 			} else {
6393 				mark_reg_unknown(env, regs, value_regno);
6394 			}
6395 		}
6396 	} else if (base_type(reg->type) == PTR_TO_MEM) {
6397 		bool rdonly_mem = type_is_rdonly_mem(reg->type);
6398 		bool rdonly_untrusted = rdonly_mem && (reg->type & PTR_UNTRUSTED);
6399 
6400 		if (type_may_be_null(reg->type)) {
6401 			verbose(env, "R%d invalid mem access '%s'\n", regno,
6402 				reg_type_str(env, reg->type));
6403 			return -EACCES;
6404 		}
6405 
6406 		if (t == BPF_WRITE && rdonly_mem) {
6407 			verbose(env, "R%d cannot write into %s\n",
6408 				regno, reg_type_str(env, reg->type));
6409 			return -EACCES;
6410 		}
6411 
6412 		if (t == BPF_WRITE && value_regno >= 0 &&
6413 		    is_pointer_value(env, value_regno)) {
6414 			verbose(env, "R%d leaks addr into mem\n", value_regno);
6415 			return -EACCES;
6416 		}
6417 
6418 		/*
6419 		 * Accesses to untrusted PTR_TO_MEM are done through probe
6420 		 * instructions, hence no need to check bounds in that case.
6421 		 */
6422 		if (!rdonly_untrusted)
6423 			err = check_mem_region_access(env, regno, off, size,
6424 						      reg->mem_size, false);
6425 		if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem))
6426 			mark_reg_unknown(env, regs, value_regno);
6427 	} else if (reg->type == PTR_TO_CTX) {
6428 		struct bpf_insn_access_aux info = {
6429 			.reg_type = SCALAR_VALUE,
6430 			.is_ldsx = is_ldsx,
6431 			.log = &env->log,
6432 		};
6433 		struct bpf_retval_range range;
6434 
6435 		if (t == BPF_WRITE && value_regno >= 0 &&
6436 		    is_pointer_value(env, value_regno)) {
6437 			verbose(env, "R%d leaks addr into ctx\n", value_regno);
6438 			return -EACCES;
6439 		}
6440 
6441 		err = check_ctx_access(env, insn_idx, regno, off, size, t, &info);
6442 		if (!err && t == BPF_READ && value_regno >= 0) {
6443 			/* ctx access returns either a scalar, or a
6444 			 * PTR_TO_PACKET[_META,_END]. In the latter
6445 			 * case, we know the offset is zero.
6446 			 */
6447 			if (info.reg_type == SCALAR_VALUE) {
6448 				if (info.is_retval && get_func_retval_range(env->prog, &range)) {
6449 					err = __mark_reg_s32_range(env, regs, value_regno,
6450 								   range.minval, range.maxval);
6451 					if (err)
6452 						return err;
6453 				} else {
6454 					mark_reg_unknown(env, regs, value_regno);
6455 				}
6456 			} else {
6457 				mark_reg_known_zero(env, regs,
6458 						    value_regno);
6459 				if (type_may_be_null(info.reg_type))
6460 					regs[value_regno].id = ++env->id_gen;
6461 				/* A load of ctx field could have different
6462 				 * actual load size with the one encoded in the
6463 				 * insn. When the dst is PTR, it is for sure not
6464 				 * a sub-register.
6465 				 */
6466 				regs[value_regno].subreg_def = DEF_NOT_SUBREG;
6467 				if (base_type(info.reg_type) == PTR_TO_BTF_ID) {
6468 					regs[value_regno].btf = info.btf;
6469 					regs[value_regno].btf_id = info.btf_id;
6470 					regs[value_regno].ref_obj_id = info.ref_obj_id;
6471 				}
6472 			}
6473 			regs[value_regno].type = info.reg_type;
6474 		}
6475 
6476 	} else if (reg->type == PTR_TO_STACK) {
6477 		/* Basic bounds checks. */
6478 		err = check_stack_access_within_bounds(env, regno, off, size, t);
6479 		if (err)
6480 			return err;
6481 
6482 		if (t == BPF_READ)
6483 			err = check_stack_read(env, regno, off, size,
6484 					       value_regno);
6485 		else
6486 			err = check_stack_write(env, regno, off, size,
6487 						value_regno, insn_idx);
6488 	} else if (reg_is_pkt_pointer(reg)) {
6489 		if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
6490 			verbose(env, "cannot write into packet\n");
6491 			return -EACCES;
6492 		}
6493 		if (t == BPF_WRITE && value_regno >= 0 &&
6494 		    is_pointer_value(env, value_regno)) {
6495 			verbose(env, "R%d leaks addr into packet\n",
6496 				value_regno);
6497 			return -EACCES;
6498 		}
6499 		err = check_packet_access(env, regno, off, size, false);
6500 		if (!err && t == BPF_READ && value_regno >= 0)
6501 			mark_reg_unknown(env, regs, value_regno);
6502 	} else if (reg->type == PTR_TO_FLOW_KEYS) {
6503 		if (t == BPF_WRITE && value_regno >= 0 &&
6504 		    is_pointer_value(env, value_regno)) {
6505 			verbose(env, "R%d leaks addr into flow keys\n",
6506 				value_regno);
6507 			return -EACCES;
6508 		}
6509 
6510 		err = check_flow_keys_access(env, off, size);
6511 		if (!err && t == BPF_READ && value_regno >= 0)
6512 			mark_reg_unknown(env, regs, value_regno);
6513 	} else if (type_is_sk_pointer(reg->type)) {
6514 		if (t == BPF_WRITE) {
6515 			verbose(env, "R%d cannot write into %s\n",
6516 				regno, reg_type_str(env, reg->type));
6517 			return -EACCES;
6518 		}
6519 		err = check_sock_access(env, insn_idx, regno, off, size, t);
6520 		if (!err && value_regno >= 0)
6521 			mark_reg_unknown(env, regs, value_regno);
6522 	} else if (reg->type == PTR_TO_TP_BUFFER) {
6523 		err = check_tp_buffer_access(env, reg, regno, off, size);
6524 		if (!err && t == BPF_READ && value_regno >= 0)
6525 			mark_reg_unknown(env, regs, value_regno);
6526 	} else if (base_type(reg->type) == PTR_TO_BTF_ID &&
6527 		   !type_may_be_null(reg->type)) {
6528 		err = check_ptr_to_btf_access(env, regs, regno, off, size, t,
6529 					      value_regno);
6530 	} else if (reg->type == CONST_PTR_TO_MAP) {
6531 		err = check_ptr_to_map_access(env, regs, regno, off, size, t,
6532 					      value_regno);
6533 	} else if (base_type(reg->type) == PTR_TO_BUF &&
6534 		   !type_may_be_null(reg->type)) {
6535 		bool rdonly_mem = type_is_rdonly_mem(reg->type);
6536 		u32 *max_access;
6537 
6538 		if (rdonly_mem) {
6539 			if (t == BPF_WRITE) {
6540 				verbose(env, "R%d cannot write into %s\n",
6541 					regno, reg_type_str(env, reg->type));
6542 				return -EACCES;
6543 			}
6544 			max_access = &env->prog->aux->max_rdonly_access;
6545 		} else {
6546 			max_access = &env->prog->aux->max_rdwr_access;
6547 		}
6548 
6549 		err = check_buffer_access(env, reg, regno, off, size, false,
6550 					  max_access);
6551 
6552 		if (!err && value_regno >= 0 && (rdonly_mem || t == BPF_READ))
6553 			mark_reg_unknown(env, regs, value_regno);
6554 	} else if (reg->type == PTR_TO_ARENA) {
6555 		if (t == BPF_READ && value_regno >= 0)
6556 			mark_reg_unknown(env, regs, value_regno);
6557 	} else {
6558 		verbose(env, "R%d invalid mem access '%s'\n", regno,
6559 			reg_type_str(env, reg->type));
6560 		return -EACCES;
6561 	}
6562 
6563 	if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
6564 	    regs[value_regno].type == SCALAR_VALUE) {
6565 		if (!is_ldsx)
6566 			/* b/h/w load zero-extends, mark upper bits as known 0 */
6567 			coerce_reg_to_size(&regs[value_regno], size);
6568 		else
6569 			coerce_reg_to_size_sx(&regs[value_regno], size);
6570 	}
6571 	return err;
6572 }
6573 
6574 static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type type,
6575 			     bool allow_trust_mismatch);
6576 
check_load_mem(struct bpf_verifier_env * env,struct bpf_insn * insn,bool strict_alignment_once,bool is_ldsx,bool allow_trust_mismatch,const char * ctx)6577 static int check_load_mem(struct bpf_verifier_env *env, struct bpf_insn *insn,
6578 			  bool strict_alignment_once, bool is_ldsx,
6579 			  bool allow_trust_mismatch, const char *ctx)
6580 {
6581 	struct bpf_reg_state *regs = cur_regs(env);
6582 	enum bpf_reg_type src_reg_type;
6583 	int err;
6584 
6585 	/* check src operand */
6586 	err = check_reg_arg(env, insn->src_reg, SRC_OP);
6587 	if (err)
6588 		return err;
6589 
6590 	/* check dst operand */
6591 	err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
6592 	if (err)
6593 		return err;
6594 
6595 	src_reg_type = regs[insn->src_reg].type;
6596 
6597 	/* Check if (src_reg + off) is readable. The state of dst_reg will be
6598 	 * updated by this call.
6599 	 */
6600 	err = check_mem_access(env, env->insn_idx, insn->src_reg, insn->off,
6601 			       BPF_SIZE(insn->code), BPF_READ, insn->dst_reg,
6602 			       strict_alignment_once, is_ldsx);
6603 	err = err ?: save_aux_ptr_type(env, src_reg_type,
6604 				       allow_trust_mismatch);
6605 	err = err ?: reg_bounds_sanity_check(env, &regs[insn->dst_reg], ctx);
6606 
6607 	return err;
6608 }
6609 
check_store_reg(struct bpf_verifier_env * env,struct bpf_insn * insn,bool strict_alignment_once)6610 static int check_store_reg(struct bpf_verifier_env *env, struct bpf_insn *insn,
6611 			   bool strict_alignment_once)
6612 {
6613 	struct bpf_reg_state *regs = cur_regs(env);
6614 	enum bpf_reg_type dst_reg_type;
6615 	int err;
6616 
6617 	/* check src1 operand */
6618 	err = check_reg_arg(env, insn->src_reg, SRC_OP);
6619 	if (err)
6620 		return err;
6621 
6622 	/* check src2 operand */
6623 	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
6624 	if (err)
6625 		return err;
6626 
6627 	dst_reg_type = regs[insn->dst_reg].type;
6628 
6629 	/* Check if (dst_reg + off) is writeable. */
6630 	err = check_mem_access(env, env->insn_idx, insn->dst_reg, insn->off,
6631 			       BPF_SIZE(insn->code), BPF_WRITE, insn->src_reg,
6632 			       strict_alignment_once, false);
6633 	err = err ?: save_aux_ptr_type(env, dst_reg_type, false);
6634 
6635 	return err;
6636 }
6637 
check_atomic_rmw(struct bpf_verifier_env * env,struct bpf_insn * insn)6638 static int check_atomic_rmw(struct bpf_verifier_env *env,
6639 			    struct bpf_insn *insn)
6640 {
6641 	int load_reg;
6642 	int err;
6643 
6644 	if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) {
6645 		verbose(env, "invalid atomic operand size\n");
6646 		return -EINVAL;
6647 	}
6648 
6649 	/* check src1 operand */
6650 	err = check_reg_arg(env, insn->src_reg, SRC_OP);
6651 	if (err)
6652 		return err;
6653 
6654 	/* check src2 operand */
6655 	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
6656 	if (err)
6657 		return err;
6658 
6659 	if (insn->imm == BPF_CMPXCHG) {
6660 		/* Check comparison of R0 with memory location */
6661 		const u32 aux_reg = BPF_REG_0;
6662 
6663 		err = check_reg_arg(env, aux_reg, SRC_OP);
6664 		if (err)
6665 			return err;
6666 
6667 		if (is_pointer_value(env, aux_reg)) {
6668 			verbose(env, "R%d leaks addr into mem\n", aux_reg);
6669 			return -EACCES;
6670 		}
6671 	}
6672 
6673 	if (is_pointer_value(env, insn->src_reg)) {
6674 		verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
6675 		return -EACCES;
6676 	}
6677 
6678 	if (!atomic_ptr_type_ok(env, insn->dst_reg, insn)) {
6679 		verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
6680 			insn->dst_reg,
6681 			reg_type_str(env, reg_state(env, insn->dst_reg)->type));
6682 		return -EACCES;
6683 	}
6684 
6685 	if (insn->imm & BPF_FETCH) {
6686 		if (insn->imm == BPF_CMPXCHG)
6687 			load_reg = BPF_REG_0;
6688 		else
6689 			load_reg = insn->src_reg;
6690 
6691 		/* check and record load of old value */
6692 		err = check_reg_arg(env, load_reg, DST_OP);
6693 		if (err)
6694 			return err;
6695 	} else {
6696 		/* This instruction accesses a memory location but doesn't
6697 		 * actually load it into a register.
6698 		 */
6699 		load_reg = -1;
6700 	}
6701 
6702 	/* Check whether we can read the memory, with second call for fetch
6703 	 * case to simulate the register fill.
6704 	 */
6705 	err = check_mem_access(env, env->insn_idx, insn->dst_reg, insn->off,
6706 			       BPF_SIZE(insn->code), BPF_READ, -1, true, false);
6707 	if (!err && load_reg >= 0)
6708 		err = check_mem_access(env, env->insn_idx, insn->dst_reg,
6709 				       insn->off, BPF_SIZE(insn->code),
6710 				       BPF_READ, load_reg, true, false);
6711 	if (err)
6712 		return err;
6713 
6714 	if (is_arena_reg(env, insn->dst_reg)) {
6715 		err = save_aux_ptr_type(env, PTR_TO_ARENA, false);
6716 		if (err)
6717 			return err;
6718 	}
6719 	/* Check whether we can write into the same memory. */
6720 	err = check_mem_access(env, env->insn_idx, insn->dst_reg, insn->off,
6721 			       BPF_SIZE(insn->code), BPF_WRITE, -1, true, false);
6722 	if (err)
6723 		return err;
6724 	return 0;
6725 }
6726 
check_atomic_load(struct bpf_verifier_env * env,struct bpf_insn * insn)6727 static int check_atomic_load(struct bpf_verifier_env *env,
6728 			     struct bpf_insn *insn)
6729 {
6730 	int err;
6731 
6732 	err = check_load_mem(env, insn, true, false, false, "atomic_load");
6733 	if (err)
6734 		return err;
6735 
6736 	if (!atomic_ptr_type_ok(env, insn->src_reg, insn)) {
6737 		verbose(env, "BPF_ATOMIC loads from R%d %s is not allowed\n",
6738 			insn->src_reg,
6739 			reg_type_str(env, reg_state(env, insn->src_reg)->type));
6740 		return -EACCES;
6741 	}
6742 
6743 	return 0;
6744 }
6745 
check_atomic_store(struct bpf_verifier_env * env,struct bpf_insn * insn)6746 static int check_atomic_store(struct bpf_verifier_env *env,
6747 			      struct bpf_insn *insn)
6748 {
6749 	int err;
6750 
6751 	err = check_store_reg(env, insn, true);
6752 	if (err)
6753 		return err;
6754 
6755 	if (!atomic_ptr_type_ok(env, insn->dst_reg, insn)) {
6756 		verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
6757 			insn->dst_reg,
6758 			reg_type_str(env, reg_state(env, insn->dst_reg)->type));
6759 		return -EACCES;
6760 	}
6761 
6762 	return 0;
6763 }
6764 
check_atomic(struct bpf_verifier_env * env,struct bpf_insn * insn)6765 static int check_atomic(struct bpf_verifier_env *env, struct bpf_insn *insn)
6766 {
6767 	switch (insn->imm) {
6768 	case BPF_ADD:
6769 	case BPF_ADD | BPF_FETCH:
6770 	case BPF_AND:
6771 	case BPF_AND | BPF_FETCH:
6772 	case BPF_OR:
6773 	case BPF_OR | BPF_FETCH:
6774 	case BPF_XOR:
6775 	case BPF_XOR | BPF_FETCH:
6776 	case BPF_XCHG:
6777 	case BPF_CMPXCHG:
6778 		return check_atomic_rmw(env, insn);
6779 	case BPF_LOAD_ACQ:
6780 		if (BPF_SIZE(insn->code) == BPF_DW && BITS_PER_LONG != 64) {
6781 			verbose(env,
6782 				"64-bit load-acquires are only supported on 64-bit arches\n");
6783 			return -EOPNOTSUPP;
6784 		}
6785 		return check_atomic_load(env, insn);
6786 	case BPF_STORE_REL:
6787 		if (BPF_SIZE(insn->code) == BPF_DW && BITS_PER_LONG != 64) {
6788 			verbose(env,
6789 				"64-bit store-releases are only supported on 64-bit arches\n");
6790 			return -EOPNOTSUPP;
6791 		}
6792 		return check_atomic_store(env, insn);
6793 	default:
6794 		verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n",
6795 			insn->imm);
6796 		return -EINVAL;
6797 	}
6798 }
6799 
6800 /* When register 'regno' is used to read the stack (either directly or through
6801  * a helper function) make sure that it's within stack boundary and, depending
6802  * on the access type and privileges, that all elements of the stack are
6803  * initialized.
6804  *
6805  * All registers that have been spilled on the stack in the slots within the
6806  * read offsets are marked as read.
6807  */
check_stack_range_initialized(struct bpf_verifier_env * env,int regno,int off,int access_size,bool zero_size_allowed,enum bpf_access_type type,struct bpf_call_arg_meta * meta)6808 static int check_stack_range_initialized(
6809 		struct bpf_verifier_env *env, int regno, int off,
6810 		int access_size, bool zero_size_allowed,
6811 		enum bpf_access_type type, struct bpf_call_arg_meta *meta)
6812 {
6813 	struct bpf_reg_state *reg = reg_state(env, regno);
6814 	struct bpf_func_state *state = bpf_func(env, reg);
6815 	int err, min_off, max_off, i, j, slot, spi;
6816 	/* Some accesses can write anything into the stack, others are
6817 	 * read-only.
6818 	 */
6819 	bool clobber = type == BPF_WRITE;
6820 	/*
6821 	 * Negative access_size signals global subprog/kfunc arg check where
6822 	 * STACK_POISON slots are acceptable. static stack liveness
6823 	 * might have determined that subprog doesn't read them,
6824 	 * but BTF based global subprog validation isn't accurate enough.
6825 	 */
6826 	bool allow_poison = access_size < 0 || clobber;
6827 
6828 	access_size = abs(access_size);
6829 
6830 	if (access_size == 0 && !zero_size_allowed) {
6831 		verbose(env, "invalid zero-sized read\n");
6832 		return -EACCES;
6833 	}
6834 
6835 	err = check_stack_access_within_bounds(env, regno, off, access_size, type);
6836 	if (err)
6837 		return err;
6838 
6839 
6840 	if (tnum_is_const(reg->var_off)) {
6841 		min_off = max_off = reg->var_off.value + off;
6842 	} else {
6843 		/* Variable offset is prohibited for unprivileged mode for
6844 		 * simplicity since it requires corresponding support in
6845 		 * Spectre masking for stack ALU.
6846 		 * See also retrieve_ptr_limit().
6847 		 */
6848 		if (!env->bypass_spec_v1) {
6849 			char tn_buf[48];
6850 
6851 			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
6852 			verbose(env, "R%d variable offset stack access prohibited for !root, var_off=%s\n",
6853 				regno, tn_buf);
6854 			return -EACCES;
6855 		}
6856 		/* Only initialized buffer on stack is allowed to be accessed
6857 		 * with variable offset. With uninitialized buffer it's hard to
6858 		 * guarantee that whole memory is marked as initialized on
6859 		 * helper return since specific bounds are unknown what may
6860 		 * cause uninitialized stack leaking.
6861 		 */
6862 		if (meta && meta->raw_mode)
6863 			meta = NULL;
6864 
6865 		min_off = reg->smin_value + off;
6866 		max_off = reg->smax_value + off;
6867 	}
6868 
6869 	if (meta && meta->raw_mode) {
6870 		/* Ensure we won't be overwriting dynptrs when simulating byte
6871 		 * by byte access in check_helper_call using meta.access_size.
6872 		 * This would be a problem if we have a helper in the future
6873 		 * which takes:
6874 		 *
6875 		 *	helper(uninit_mem, len, dynptr)
6876 		 *
6877 		 * Now, uninint_mem may overlap with dynptr pointer. Hence, it
6878 		 * may end up writing to dynptr itself when touching memory from
6879 		 * arg 1. This can be relaxed on a case by case basis for known
6880 		 * safe cases, but reject due to the possibilitiy of aliasing by
6881 		 * default.
6882 		 */
6883 		for (i = min_off; i < max_off + access_size; i++) {
6884 			int stack_off = -i - 1;
6885 
6886 			spi = bpf_get_spi(i);
6887 			/* raw_mode may write past allocated_stack */
6888 			if (state->allocated_stack <= stack_off)
6889 				continue;
6890 			if (state->stack[spi].slot_type[stack_off % BPF_REG_SIZE] == STACK_DYNPTR) {
6891 				verbose(env, "potential write to dynptr at off=%d disallowed\n", i);
6892 				return -EACCES;
6893 			}
6894 		}
6895 		meta->access_size = access_size;
6896 		meta->regno = regno;
6897 		return 0;
6898 	}
6899 
6900 	for (i = min_off; i < max_off + access_size; i++) {
6901 		u8 *stype;
6902 
6903 		slot = -i - 1;
6904 		spi = slot / BPF_REG_SIZE;
6905 		if (state->allocated_stack <= slot) {
6906 			verbose(env, "allocated_stack too small\n");
6907 			return -EFAULT;
6908 		}
6909 
6910 		stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
6911 		if (*stype == STACK_MISC)
6912 			goto mark;
6913 		if ((*stype == STACK_ZERO) ||
6914 		    (*stype == STACK_INVALID && env->allow_uninit_stack)) {
6915 			if (clobber) {
6916 				/* helper can write anything into the stack */
6917 				*stype = STACK_MISC;
6918 			}
6919 			goto mark;
6920 		}
6921 
6922 		if (bpf_is_spilled_reg(&state->stack[spi]) &&
6923 		    (state->stack[spi].spilled_ptr.type == SCALAR_VALUE ||
6924 		     env->allow_ptr_leaks)) {
6925 			if (clobber) {
6926 				__mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
6927 				for (j = 0; j < BPF_REG_SIZE; j++)
6928 					scrub_spilled_slot(&state->stack[spi].slot_type[j]);
6929 			}
6930 			goto mark;
6931 		}
6932 
6933 		if (*stype == STACK_POISON) {
6934 			if (allow_poison)
6935 				goto mark;
6936 			verbose(env, "reading from stack R%d off %d+%d size %d, slot poisoned by dead code elimination\n",
6937 				regno, min_off, i - min_off, access_size);
6938 		} else if (tnum_is_const(reg->var_off)) {
6939 			verbose(env, "invalid read from stack R%d off %d+%d size %d\n",
6940 				regno, min_off, i - min_off, access_size);
6941 		} else {
6942 			char tn_buf[48];
6943 
6944 			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
6945 			verbose(env, "invalid read from stack R%d var_off %s+%d size %d\n",
6946 				regno, tn_buf, i - min_off, access_size);
6947 		}
6948 		return -EACCES;
6949 mark:
6950 		;
6951 	}
6952 	return 0;
6953 }
6954 
check_helper_mem_access(struct bpf_verifier_env * env,int regno,int access_size,enum bpf_access_type access_type,bool zero_size_allowed,struct bpf_call_arg_meta * meta)6955 static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
6956 				   int access_size, enum bpf_access_type access_type,
6957 				   bool zero_size_allowed,
6958 				   struct bpf_call_arg_meta *meta)
6959 {
6960 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
6961 	u32 *max_access;
6962 
6963 	switch (base_type(reg->type)) {
6964 	case PTR_TO_PACKET:
6965 	case PTR_TO_PACKET_META:
6966 		return check_packet_access(env, regno, 0, access_size,
6967 					   zero_size_allowed);
6968 	case PTR_TO_MAP_KEY:
6969 		if (access_type == BPF_WRITE) {
6970 			verbose(env, "R%d cannot write into %s\n", regno,
6971 				reg_type_str(env, reg->type));
6972 			return -EACCES;
6973 		}
6974 		return check_mem_region_access(env, regno, 0, access_size,
6975 					       reg->map_ptr->key_size, false);
6976 	case PTR_TO_MAP_VALUE:
6977 		if (check_map_access_type(env, regno, 0, access_size, access_type))
6978 			return -EACCES;
6979 		return check_map_access(env, regno, 0, access_size,
6980 					zero_size_allowed, ACCESS_HELPER);
6981 	case PTR_TO_MEM:
6982 		if (type_is_rdonly_mem(reg->type)) {
6983 			if (access_type == BPF_WRITE) {
6984 				verbose(env, "R%d cannot write into %s\n", regno,
6985 					reg_type_str(env, reg->type));
6986 				return -EACCES;
6987 			}
6988 		}
6989 		return check_mem_region_access(env, regno, 0,
6990 					       access_size, reg->mem_size,
6991 					       zero_size_allowed);
6992 	case PTR_TO_BUF:
6993 		if (type_is_rdonly_mem(reg->type)) {
6994 			if (access_type == BPF_WRITE) {
6995 				verbose(env, "R%d cannot write into %s\n", regno,
6996 					reg_type_str(env, reg->type));
6997 				return -EACCES;
6998 			}
6999 
7000 			max_access = &env->prog->aux->max_rdonly_access;
7001 		} else {
7002 			max_access = &env->prog->aux->max_rdwr_access;
7003 		}
7004 		return check_buffer_access(env, reg, regno, 0,
7005 					   access_size, zero_size_allowed,
7006 					   max_access);
7007 	case PTR_TO_STACK:
7008 		return check_stack_range_initialized(
7009 				env,
7010 				regno, 0, access_size,
7011 				zero_size_allowed, access_type, meta);
7012 	case PTR_TO_BTF_ID:
7013 		return check_ptr_to_btf_access(env, regs, regno, 0,
7014 					       access_size, BPF_READ, -1);
7015 	case PTR_TO_CTX:
7016 		/* Only permit reading or writing syscall context using helper calls. */
7017 		if (is_var_ctx_off_allowed(env->prog)) {
7018 			int err = check_mem_region_access(env, regno, 0, access_size, U16_MAX,
7019 							  zero_size_allowed);
7020 			if (err)
7021 				return err;
7022 			if (env->prog->aux->max_ctx_offset < reg->umax_value + access_size)
7023 				env->prog->aux->max_ctx_offset = reg->umax_value + access_size;
7024 			return 0;
7025 		}
7026 		fallthrough;
7027 	default: /* scalar_value or invalid ptr */
7028 		/* Allow zero-byte read from NULL, regardless of pointer type */
7029 		if (zero_size_allowed && access_size == 0 &&
7030 		    bpf_register_is_null(reg))
7031 			return 0;
7032 
7033 		verbose(env, "R%d type=%s ", regno,
7034 			reg_type_str(env, reg->type));
7035 		verbose(env, "expected=%s\n", reg_type_str(env, PTR_TO_STACK));
7036 		return -EACCES;
7037 	}
7038 }
7039 
7040 /* verify arguments to helpers or kfuncs consisting of a pointer and an access
7041  * size.
7042  *
7043  * @regno is the register containing the access size. regno-1 is the register
7044  * containing the pointer.
7045  */
check_mem_size_reg(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno,enum bpf_access_type access_type,bool zero_size_allowed,struct bpf_call_arg_meta * meta)7046 static int check_mem_size_reg(struct bpf_verifier_env *env,
7047 			      struct bpf_reg_state *reg, u32 regno,
7048 			      enum bpf_access_type access_type,
7049 			      bool zero_size_allowed,
7050 			      struct bpf_call_arg_meta *meta)
7051 {
7052 	int err;
7053 
7054 	/* This is used to refine r0 return value bounds for helpers
7055 	 * that enforce this value as an upper bound on return values.
7056 	 * See do_refine_retval_range() for helpers that can refine
7057 	 * the return value. C type of helper is u32 so we pull register
7058 	 * bound from umax_value however, if negative verifier errors
7059 	 * out. Only upper bounds can be learned because retval is an
7060 	 * int type and negative retvals are allowed.
7061 	 */
7062 	meta->msize_max_value = reg->umax_value;
7063 
7064 	/* The register is SCALAR_VALUE; the access check happens using
7065 	 * its boundaries. For unprivileged variable accesses, disable
7066 	 * raw mode so that the program is required to initialize all
7067 	 * the memory that the helper could just partially fill up.
7068 	 */
7069 	if (!tnum_is_const(reg->var_off))
7070 		meta = NULL;
7071 
7072 	if (reg->smin_value < 0) {
7073 		verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
7074 			regno);
7075 		return -EACCES;
7076 	}
7077 
7078 	if (reg->umin_value == 0 && !zero_size_allowed) {
7079 		verbose(env, "R%d invalid zero-sized read: u64=[%lld,%lld]\n",
7080 			regno, reg->umin_value, reg->umax_value);
7081 		return -EACCES;
7082 	}
7083 
7084 	if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
7085 		verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
7086 			regno);
7087 		return -EACCES;
7088 	}
7089 	err = check_helper_mem_access(env, regno - 1, reg->umax_value,
7090 				      access_type, zero_size_allowed, meta);
7091 	if (!err)
7092 		err = mark_chain_precision(env, regno);
7093 	return err;
7094 }
7095 
check_mem_reg(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno,u32 mem_size)7096 static int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
7097 			 u32 regno, u32 mem_size)
7098 {
7099 	bool may_be_null = type_may_be_null(reg->type);
7100 	struct bpf_reg_state saved_reg;
7101 	int err;
7102 
7103 	if (bpf_register_is_null(reg))
7104 		return 0;
7105 
7106 	/* Assuming that the register contains a value check if the memory
7107 	 * access is safe. Temporarily save and restore the register's state as
7108 	 * the conversion shouldn't be visible to a caller.
7109 	 */
7110 	if (may_be_null) {
7111 		saved_reg = *reg;
7112 		mark_ptr_not_null_reg(reg);
7113 	}
7114 
7115 	int size = base_type(reg->type) == PTR_TO_STACK ? -(int)mem_size : mem_size;
7116 
7117 	err = check_helper_mem_access(env, regno, size, BPF_READ, true, NULL);
7118 	err = err ?: check_helper_mem_access(env, regno, size, BPF_WRITE, true, NULL);
7119 
7120 	if (may_be_null)
7121 		*reg = saved_reg;
7122 
7123 	return err;
7124 }
7125 
check_kfunc_mem_size_reg(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno)7126 static int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
7127 				    u32 regno)
7128 {
7129 	struct bpf_reg_state *mem_reg = &cur_regs(env)[regno - 1];
7130 	bool may_be_null = type_may_be_null(mem_reg->type);
7131 	struct bpf_reg_state saved_reg;
7132 	struct bpf_call_arg_meta meta;
7133 	int err;
7134 
7135 	WARN_ON_ONCE(regno < BPF_REG_2 || regno > BPF_REG_5);
7136 
7137 	memset(&meta, 0, sizeof(meta));
7138 
7139 	if (may_be_null) {
7140 		saved_reg = *mem_reg;
7141 		mark_ptr_not_null_reg(mem_reg);
7142 	}
7143 
7144 	err = check_mem_size_reg(env, reg, regno, BPF_READ, true, &meta);
7145 	err = err ?: check_mem_size_reg(env, reg, regno, BPF_WRITE, true, &meta);
7146 
7147 	if (may_be_null)
7148 		*mem_reg = saved_reg;
7149 
7150 	return err;
7151 }
7152 
7153 enum {
7154 	PROCESS_SPIN_LOCK = (1 << 0),
7155 	PROCESS_RES_LOCK  = (1 << 1),
7156 	PROCESS_LOCK_IRQ  = (1 << 2),
7157 };
7158 
7159 /* Implementation details:
7160  * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL.
7161  * bpf_obj_new returns PTR_TO_BTF_ID | MEM_ALLOC | PTR_MAYBE_NULL.
7162  * Two bpf_map_lookups (even with the same key) will have different reg->id.
7163  * Two separate bpf_obj_new will also have different reg->id.
7164  * For traditional PTR_TO_MAP_VALUE or PTR_TO_BTF_ID | MEM_ALLOC, the verifier
7165  * clears reg->id after value_or_null->value transition, since the verifier only
7166  * cares about the range of access to valid map value pointer and doesn't care
7167  * about actual address of the map element.
7168  * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
7169  * reg->id > 0 after value_or_null->value transition. By doing so
7170  * two bpf_map_lookups will be considered two different pointers that
7171  * point to different bpf_spin_locks. Likewise for pointers to allocated objects
7172  * returned from bpf_obj_new.
7173  * The verifier allows taking only one bpf_spin_lock at a time to avoid
7174  * dead-locks.
7175  * Since only one bpf_spin_lock is allowed the checks are simpler than
7176  * reg_is_refcounted() logic. The verifier needs to remember only
7177  * one spin_lock instead of array of acquired_refs.
7178  * env->cur_state->active_locks remembers which map value element or allocated
7179  * object got locked and clears it after bpf_spin_unlock.
7180  */
process_spin_lock(struct bpf_verifier_env * env,int regno,int flags)7181 static int process_spin_lock(struct bpf_verifier_env *env, int regno, int flags)
7182 {
7183 	bool is_lock = flags & PROCESS_SPIN_LOCK, is_res_lock = flags & PROCESS_RES_LOCK;
7184 	const char *lock_str = is_res_lock ? "bpf_res_spin" : "bpf_spin";
7185 	struct bpf_reg_state *reg = reg_state(env, regno);
7186 	struct bpf_verifier_state *cur = env->cur_state;
7187 	bool is_const = tnum_is_const(reg->var_off);
7188 	bool is_irq = flags & PROCESS_LOCK_IRQ;
7189 	u64 val = reg->var_off.value;
7190 	struct bpf_map *map = NULL;
7191 	struct btf *btf = NULL;
7192 	struct btf_record *rec;
7193 	u32 spin_lock_off;
7194 	int err;
7195 
7196 	if (!is_const) {
7197 		verbose(env,
7198 			"R%d doesn't have constant offset. %s_lock has to be at the constant offset\n",
7199 			regno, lock_str);
7200 		return -EINVAL;
7201 	}
7202 	if (reg->type == PTR_TO_MAP_VALUE) {
7203 		map = reg->map_ptr;
7204 		if (!map->btf) {
7205 			verbose(env,
7206 				"map '%s' has to have BTF in order to use %s_lock\n",
7207 				map->name, lock_str);
7208 			return -EINVAL;
7209 		}
7210 	} else {
7211 		btf = reg->btf;
7212 	}
7213 
7214 	rec = reg_btf_record(reg);
7215 	if (!btf_record_has_field(rec, is_res_lock ? BPF_RES_SPIN_LOCK : BPF_SPIN_LOCK)) {
7216 		verbose(env, "%s '%s' has no valid %s_lock\n", map ? "map" : "local",
7217 			map ? map->name : "kptr", lock_str);
7218 		return -EINVAL;
7219 	}
7220 	spin_lock_off = is_res_lock ? rec->res_spin_lock_off : rec->spin_lock_off;
7221 	if (spin_lock_off != val) {
7222 		verbose(env, "off %lld doesn't point to 'struct %s_lock' that is at %d\n",
7223 			val, lock_str, spin_lock_off);
7224 		return -EINVAL;
7225 	}
7226 	if (is_lock) {
7227 		void *ptr;
7228 		int type;
7229 
7230 		if (map)
7231 			ptr = map;
7232 		else
7233 			ptr = btf;
7234 
7235 		if (!is_res_lock && cur->active_locks) {
7236 			if (find_lock_state(env->cur_state, REF_TYPE_LOCK, 0, NULL)) {
7237 				verbose(env,
7238 					"Locking two bpf_spin_locks are not allowed\n");
7239 				return -EINVAL;
7240 			}
7241 		} else if (is_res_lock && cur->active_locks) {
7242 			if (find_lock_state(env->cur_state, REF_TYPE_RES_LOCK | REF_TYPE_RES_LOCK_IRQ, reg->id, ptr)) {
7243 				verbose(env, "Acquiring the same lock again, AA deadlock detected\n");
7244 				return -EINVAL;
7245 			}
7246 		}
7247 
7248 		if (is_res_lock && is_irq)
7249 			type = REF_TYPE_RES_LOCK_IRQ;
7250 		else if (is_res_lock)
7251 			type = REF_TYPE_RES_LOCK;
7252 		else
7253 			type = REF_TYPE_LOCK;
7254 		err = acquire_lock_state(env, env->insn_idx, type, reg->id, ptr);
7255 		if (err < 0) {
7256 			verbose(env, "Failed to acquire lock state\n");
7257 			return err;
7258 		}
7259 	} else {
7260 		void *ptr;
7261 		int type;
7262 
7263 		if (map)
7264 			ptr = map;
7265 		else
7266 			ptr = btf;
7267 
7268 		if (!cur->active_locks) {
7269 			verbose(env, "%s_unlock without taking a lock\n", lock_str);
7270 			return -EINVAL;
7271 		}
7272 
7273 		if (is_res_lock && is_irq)
7274 			type = REF_TYPE_RES_LOCK_IRQ;
7275 		else if (is_res_lock)
7276 			type = REF_TYPE_RES_LOCK;
7277 		else
7278 			type = REF_TYPE_LOCK;
7279 		if (!find_lock_state(cur, type, reg->id, ptr)) {
7280 			verbose(env, "%s_unlock of different lock\n", lock_str);
7281 			return -EINVAL;
7282 		}
7283 		if (reg->id != cur->active_lock_id || ptr != cur->active_lock_ptr) {
7284 			verbose(env, "%s_unlock cannot be out of order\n", lock_str);
7285 			return -EINVAL;
7286 		}
7287 		if (release_lock_state(cur, type, reg->id, ptr)) {
7288 			verbose(env, "%s_unlock of different lock\n", lock_str);
7289 			return -EINVAL;
7290 		}
7291 
7292 		invalidate_non_owning_refs(env);
7293 	}
7294 	return 0;
7295 }
7296 
7297 /* Check if @regno is a pointer to a specific field in a map value */
check_map_field_pointer(struct bpf_verifier_env * env,u32 regno,enum btf_field_type field_type,struct bpf_map_desc * map_desc)7298 static int check_map_field_pointer(struct bpf_verifier_env *env, u32 regno,
7299 				   enum btf_field_type field_type,
7300 				   struct bpf_map_desc *map_desc)
7301 {
7302 	struct bpf_reg_state *reg = reg_state(env, regno);
7303 	bool is_const = tnum_is_const(reg->var_off);
7304 	struct bpf_map *map = reg->map_ptr;
7305 	u64 val = reg->var_off.value;
7306 	const char *struct_name = btf_field_type_name(field_type);
7307 	int field_off = -1;
7308 
7309 	if (!is_const) {
7310 		verbose(env,
7311 			"R%d doesn't have constant offset. %s has to be at the constant offset\n",
7312 			regno, struct_name);
7313 		return -EINVAL;
7314 	}
7315 	if (!map->btf) {
7316 		verbose(env, "map '%s' has to have BTF in order to use %s\n", map->name,
7317 			struct_name);
7318 		return -EINVAL;
7319 	}
7320 	if (!btf_record_has_field(map->record, field_type)) {
7321 		verbose(env, "map '%s' has no valid %s\n", map->name, struct_name);
7322 		return -EINVAL;
7323 	}
7324 	switch (field_type) {
7325 	case BPF_TIMER:
7326 		field_off = map->record->timer_off;
7327 		break;
7328 	case BPF_TASK_WORK:
7329 		field_off = map->record->task_work_off;
7330 		break;
7331 	case BPF_WORKQUEUE:
7332 		field_off = map->record->wq_off;
7333 		break;
7334 	default:
7335 		verifier_bug(env, "unsupported BTF field type: %s\n", struct_name);
7336 		return -EINVAL;
7337 	}
7338 	if (field_off != val) {
7339 		verbose(env, "off %lld doesn't point to 'struct %s' that is at %d\n",
7340 			val, struct_name, field_off);
7341 		return -EINVAL;
7342 	}
7343 	if (map_desc->ptr) {
7344 		verifier_bug(env, "Two map pointers in a %s helper", struct_name);
7345 		return -EFAULT;
7346 	}
7347 	map_desc->uid = reg->map_uid;
7348 	map_desc->ptr = map;
7349 	return 0;
7350 }
7351 
process_timer_func(struct bpf_verifier_env * env,int regno,struct bpf_map_desc * map)7352 static int process_timer_func(struct bpf_verifier_env *env, int regno,
7353 			      struct bpf_map_desc *map)
7354 {
7355 	if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
7356 		verbose(env, "bpf_timer cannot be used for PREEMPT_RT.\n");
7357 		return -EOPNOTSUPP;
7358 	}
7359 	return check_map_field_pointer(env, regno, BPF_TIMER, map);
7360 }
7361 
process_timer_helper(struct bpf_verifier_env * env,int regno,struct bpf_call_arg_meta * meta)7362 static int process_timer_helper(struct bpf_verifier_env *env, int regno,
7363 				struct bpf_call_arg_meta *meta)
7364 {
7365 	return process_timer_func(env, regno, &meta->map);
7366 }
7367 
process_timer_kfunc(struct bpf_verifier_env * env,int regno,struct bpf_kfunc_call_arg_meta * meta)7368 static int process_timer_kfunc(struct bpf_verifier_env *env, int regno,
7369 			       struct bpf_kfunc_call_arg_meta *meta)
7370 {
7371 	return process_timer_func(env, regno, &meta->map);
7372 }
7373 
process_kptr_func(struct bpf_verifier_env * env,int regno,struct bpf_call_arg_meta * meta)7374 static int process_kptr_func(struct bpf_verifier_env *env, int regno,
7375 			     struct bpf_call_arg_meta *meta)
7376 {
7377 	struct bpf_reg_state *reg = reg_state(env, regno);
7378 	struct btf_field *kptr_field;
7379 	struct bpf_map *map_ptr;
7380 	struct btf_record *rec;
7381 	u32 kptr_off;
7382 
7383 	if (type_is_ptr_alloc_obj(reg->type)) {
7384 		rec = reg_btf_record(reg);
7385 	} else { /* PTR_TO_MAP_VALUE */
7386 		map_ptr = reg->map_ptr;
7387 		if (!map_ptr->btf) {
7388 			verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n",
7389 				map_ptr->name);
7390 			return -EINVAL;
7391 		}
7392 		rec = map_ptr->record;
7393 		meta->map.ptr = map_ptr;
7394 	}
7395 
7396 	if (!tnum_is_const(reg->var_off)) {
7397 		verbose(env,
7398 			"R%d doesn't have constant offset. kptr has to be at the constant offset\n",
7399 			regno);
7400 		return -EINVAL;
7401 	}
7402 
7403 	if (!btf_record_has_field(rec, BPF_KPTR)) {
7404 		verbose(env, "R%d has no valid kptr\n", regno);
7405 		return -EINVAL;
7406 	}
7407 
7408 	kptr_off = reg->var_off.value;
7409 	kptr_field = btf_record_find(rec, kptr_off, BPF_KPTR);
7410 	if (!kptr_field) {
7411 		verbose(env, "off=%d doesn't point to kptr\n", kptr_off);
7412 		return -EACCES;
7413 	}
7414 	if (kptr_field->type != BPF_KPTR_REF && kptr_field->type != BPF_KPTR_PERCPU) {
7415 		verbose(env, "off=%d kptr isn't referenced kptr\n", kptr_off);
7416 		return -EACCES;
7417 	}
7418 	meta->kptr_field = kptr_field;
7419 	return 0;
7420 }
7421 
7422 /* There are two register types representing a bpf_dynptr, one is PTR_TO_STACK
7423  * which points to a stack slot, and the other is CONST_PTR_TO_DYNPTR.
7424  *
7425  * In both cases we deal with the first 8 bytes, but need to mark the next 8
7426  * bytes as STACK_DYNPTR in case of PTR_TO_STACK. In case of
7427  * CONST_PTR_TO_DYNPTR, we are guaranteed to get the beginning of the object.
7428  *
7429  * Mutability of bpf_dynptr is at two levels, one is at the level of struct
7430  * bpf_dynptr itself, i.e. whether the helper is receiving a pointer to struct
7431  * bpf_dynptr or pointer to const struct bpf_dynptr. In the former case, it can
7432  * mutate the view of the dynptr and also possibly destroy it. In the latter
7433  * case, it cannot mutate the bpf_dynptr itself but it can still mutate the
7434  * memory that dynptr points to.
7435  *
7436  * The verifier will keep track both levels of mutation (bpf_dynptr's in
7437  * reg->type and the memory's in reg->dynptr.type), but there is no support for
7438  * readonly dynptr view yet, hence only the first case is tracked and checked.
7439  *
7440  * This is consistent with how C applies the const modifier to a struct object,
7441  * where the pointer itself inside bpf_dynptr becomes const but not what it
7442  * points to.
7443  *
7444  * Helpers which do not mutate the bpf_dynptr set MEM_RDONLY in their argument
7445  * type, and declare it as 'const struct bpf_dynptr *' in their prototype.
7446  */
process_dynptr_func(struct bpf_verifier_env * env,int regno,int insn_idx,enum bpf_arg_type arg_type,int clone_ref_obj_id)7447 static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn_idx,
7448 			       enum bpf_arg_type arg_type, int clone_ref_obj_id)
7449 {
7450 	struct bpf_reg_state *reg = reg_state(env, regno);
7451 	int err;
7452 
7453 	if (reg->type != PTR_TO_STACK && reg->type != CONST_PTR_TO_DYNPTR) {
7454 		verbose(env,
7455 			"arg#%d expected pointer to stack or const struct bpf_dynptr\n",
7456 			regno - 1);
7457 		return -EINVAL;
7458 	}
7459 
7460 	/* MEM_UNINIT and MEM_RDONLY are exclusive, when applied to an
7461 	 * ARG_PTR_TO_DYNPTR (or ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_*):
7462 	 */
7463 	if ((arg_type & (MEM_UNINIT | MEM_RDONLY)) == (MEM_UNINIT | MEM_RDONLY)) {
7464 		verifier_bug(env, "misconfigured dynptr helper type flags");
7465 		return -EFAULT;
7466 	}
7467 
7468 	/*  MEM_UNINIT - Points to memory that is an appropriate candidate for
7469 	 *		 constructing a mutable bpf_dynptr object.
7470 	 *
7471 	 *		 Currently, this is only possible with PTR_TO_STACK
7472 	 *		 pointing to a region of at least 16 bytes which doesn't
7473 	 *		 contain an existing bpf_dynptr.
7474 	 *
7475 	 *  MEM_RDONLY - Points to a initialized bpf_dynptr that will not be
7476 	 *		 mutated or destroyed. However, the memory it points to
7477 	 *		 may be mutated.
7478 	 *
7479 	 *  None       - Points to a initialized dynptr that can be mutated and
7480 	 *		 destroyed, including mutation of the memory it points
7481 	 *		 to.
7482 	 */
7483 	if (arg_type & MEM_UNINIT) {
7484 		int i;
7485 
7486 		if (!is_dynptr_reg_valid_uninit(env, reg)) {
7487 			verbose(env, "Dynptr has to be an uninitialized dynptr\n");
7488 			return -EINVAL;
7489 		}
7490 
7491 		/* we write BPF_DW bits (8 bytes) at a time */
7492 		for (i = 0; i < BPF_DYNPTR_SIZE; i += 8) {
7493 			err = check_mem_access(env, insn_idx, regno,
7494 					       i, BPF_DW, BPF_WRITE, -1, false, false);
7495 			if (err)
7496 				return err;
7497 		}
7498 
7499 		err = mark_stack_slots_dynptr(env, reg, arg_type, insn_idx, clone_ref_obj_id);
7500 	} else /* MEM_RDONLY and None case from above */ {
7501 		/* For the reg->type == PTR_TO_STACK case, bpf_dynptr is never const */
7502 		if (reg->type == CONST_PTR_TO_DYNPTR && !(arg_type & MEM_RDONLY)) {
7503 			verbose(env, "cannot pass pointer to const bpf_dynptr, the helper mutates it\n");
7504 			return -EINVAL;
7505 		}
7506 
7507 		if (!is_dynptr_reg_valid_init(env, reg)) {
7508 			verbose(env,
7509 				"Expected an initialized dynptr as arg #%d\n",
7510 				regno - 1);
7511 			return -EINVAL;
7512 		}
7513 
7514 		/* Fold modifiers (in this case, MEM_RDONLY) when checking expected type */
7515 		if (!is_dynptr_type_expected(env, reg, arg_type & ~MEM_RDONLY)) {
7516 			verbose(env,
7517 				"Expected a dynptr of type %s as arg #%d\n",
7518 				dynptr_type_str(arg_to_dynptr_type(arg_type)), regno - 1);
7519 			return -EINVAL;
7520 		}
7521 
7522 		err = mark_dynptr_read(env, reg);
7523 	}
7524 	return err;
7525 }
7526 
iter_ref_obj_id(struct bpf_verifier_env * env,struct bpf_reg_state * reg,int spi)7527 static u32 iter_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int spi)
7528 {
7529 	struct bpf_func_state *state = bpf_func(env, reg);
7530 
7531 	return state->stack[spi].spilled_ptr.ref_obj_id;
7532 }
7533 
is_iter_kfunc(struct bpf_kfunc_call_arg_meta * meta)7534 static bool is_iter_kfunc(struct bpf_kfunc_call_arg_meta *meta)
7535 {
7536 	return meta->kfunc_flags & (KF_ITER_NEW | KF_ITER_NEXT | KF_ITER_DESTROY);
7537 }
7538 
is_iter_new_kfunc(struct bpf_kfunc_call_arg_meta * meta)7539 static bool is_iter_new_kfunc(struct bpf_kfunc_call_arg_meta *meta)
7540 {
7541 	return meta->kfunc_flags & KF_ITER_NEW;
7542 }
7543 
7544 
is_iter_destroy_kfunc(struct bpf_kfunc_call_arg_meta * meta)7545 static bool is_iter_destroy_kfunc(struct bpf_kfunc_call_arg_meta *meta)
7546 {
7547 	return meta->kfunc_flags & KF_ITER_DESTROY;
7548 }
7549 
is_kfunc_arg_iter(struct bpf_kfunc_call_arg_meta * meta,int arg_idx,const struct btf_param * arg)7550 static bool is_kfunc_arg_iter(struct bpf_kfunc_call_arg_meta *meta, int arg_idx,
7551 			      const struct btf_param *arg)
7552 {
7553 	/* btf_check_iter_kfuncs() guarantees that first argument of any iter
7554 	 * kfunc is iter state pointer
7555 	 */
7556 	if (is_iter_kfunc(meta))
7557 		return arg_idx == 0;
7558 
7559 	/* iter passed as an argument to a generic kfunc */
7560 	return btf_param_match_suffix(meta->btf, arg, "__iter");
7561 }
7562 
process_iter_arg(struct bpf_verifier_env * env,int regno,int insn_idx,struct bpf_kfunc_call_arg_meta * meta)7563 static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_idx,
7564 			    struct bpf_kfunc_call_arg_meta *meta)
7565 {
7566 	struct bpf_reg_state *reg = reg_state(env, regno);
7567 	const struct btf_type *t;
7568 	int spi, err, i, nr_slots, btf_id;
7569 
7570 	if (reg->type != PTR_TO_STACK) {
7571 		verbose(env, "arg#%d expected pointer to an iterator on stack\n", regno - 1);
7572 		return -EINVAL;
7573 	}
7574 
7575 	/* For iter_{new,next,destroy} functions, btf_check_iter_kfuncs()
7576 	 * ensures struct convention, so we wouldn't need to do any BTF
7577 	 * validation here. But given iter state can be passed as a parameter
7578 	 * to any kfunc, if arg has "__iter" suffix, we need to be a bit more
7579 	 * conservative here.
7580 	 */
7581 	btf_id = btf_check_iter_arg(meta->btf, meta->func_proto, regno - 1);
7582 	if (btf_id < 0) {
7583 		verbose(env, "expected valid iter pointer as arg #%d\n", regno - 1);
7584 		return -EINVAL;
7585 	}
7586 	t = btf_type_by_id(meta->btf, btf_id);
7587 	nr_slots = t->size / BPF_REG_SIZE;
7588 
7589 	if (is_iter_new_kfunc(meta)) {
7590 		/* bpf_iter_<type>_new() expects pointer to uninit iter state */
7591 		if (!is_iter_reg_valid_uninit(env, reg, nr_slots)) {
7592 			verbose(env, "expected uninitialized iter_%s as arg #%d\n",
7593 				iter_type_str(meta->btf, btf_id), regno - 1);
7594 			return -EINVAL;
7595 		}
7596 
7597 		for (i = 0; i < nr_slots * 8; i += BPF_REG_SIZE) {
7598 			err = check_mem_access(env, insn_idx, regno,
7599 					       i, BPF_DW, BPF_WRITE, -1, false, false);
7600 			if (err)
7601 				return err;
7602 		}
7603 
7604 		err = mark_stack_slots_iter(env, meta, reg, insn_idx, meta->btf, btf_id, nr_slots);
7605 		if (err)
7606 			return err;
7607 	} else {
7608 		/* iter_next() or iter_destroy(), as well as any kfunc
7609 		 * accepting iter argument, expect initialized iter state
7610 		 */
7611 		err = is_iter_reg_valid_init(env, reg, meta->btf, btf_id, nr_slots);
7612 		switch (err) {
7613 		case 0:
7614 			break;
7615 		case -EINVAL:
7616 			verbose(env, "expected an initialized iter_%s as arg #%d\n",
7617 				iter_type_str(meta->btf, btf_id), regno - 1);
7618 			return err;
7619 		case -EPROTO:
7620 			verbose(env, "expected an RCU CS when using %s\n", meta->func_name);
7621 			return err;
7622 		default:
7623 			return err;
7624 		}
7625 
7626 		spi = iter_get_spi(env, reg, nr_slots);
7627 		if (spi < 0)
7628 			return spi;
7629 
7630 		err = mark_iter_read(env, reg, spi, nr_slots);
7631 		if (err)
7632 			return err;
7633 
7634 		/* remember meta->iter info for process_iter_next_call() */
7635 		meta->iter.spi = spi;
7636 		meta->iter.frameno = reg->frameno;
7637 		meta->ref_obj_id = iter_ref_obj_id(env, reg, spi);
7638 
7639 		if (is_iter_destroy_kfunc(meta)) {
7640 			err = unmark_stack_slots_iter(env, reg, nr_slots);
7641 			if (err)
7642 				return err;
7643 		}
7644 	}
7645 
7646 	return 0;
7647 }
7648 
7649 /* Look for a previous loop entry at insn_idx: nearest parent state
7650  * stopped at insn_idx with callsites matching those in cur->frame.
7651  */
find_prev_entry(struct bpf_verifier_env * env,struct bpf_verifier_state * cur,int insn_idx)7652 static struct bpf_verifier_state *find_prev_entry(struct bpf_verifier_env *env,
7653 						  struct bpf_verifier_state *cur,
7654 						  int insn_idx)
7655 {
7656 	struct bpf_verifier_state_list *sl;
7657 	struct bpf_verifier_state *st;
7658 	struct list_head *pos, *head;
7659 
7660 	/* Explored states are pushed in stack order, most recent states come first */
7661 	head = bpf_explored_state(env, insn_idx);
7662 	list_for_each(pos, head) {
7663 		sl = container_of(pos, struct bpf_verifier_state_list, node);
7664 		/* If st->branches != 0 state is a part of current DFS verification path,
7665 		 * hence cur & st for a loop.
7666 		 */
7667 		st = &sl->state;
7668 		if (st->insn_idx == insn_idx && st->branches && same_callsites(st, cur) &&
7669 		    st->dfs_depth < cur->dfs_depth)
7670 			return st;
7671 	}
7672 
7673 	return NULL;
7674 }
7675 
7676 /*
7677  * Check if scalar registers are exact for the purpose of not widening.
7678  * More lenient than regs_exact()
7679  */
scalars_exact_for_widen(const struct bpf_reg_state * rold,const struct bpf_reg_state * rcur)7680 static bool scalars_exact_for_widen(const struct bpf_reg_state *rold,
7681 				    const struct bpf_reg_state *rcur)
7682 {
7683 	return !memcmp(rold, rcur, offsetof(struct bpf_reg_state, id));
7684 }
7685 
maybe_widen_reg(struct bpf_verifier_env * env,struct bpf_reg_state * rold,struct bpf_reg_state * rcur)7686 static void maybe_widen_reg(struct bpf_verifier_env *env,
7687 			    struct bpf_reg_state *rold, struct bpf_reg_state *rcur)
7688 {
7689 	if (rold->type != SCALAR_VALUE)
7690 		return;
7691 	if (rold->type != rcur->type)
7692 		return;
7693 	if (rold->precise || rcur->precise || scalars_exact_for_widen(rold, rcur))
7694 		return;
7695 	__mark_reg_unknown(env, rcur);
7696 }
7697 
widen_imprecise_scalars(struct bpf_verifier_env * env,struct bpf_verifier_state * old,struct bpf_verifier_state * cur)7698 static int widen_imprecise_scalars(struct bpf_verifier_env *env,
7699 				   struct bpf_verifier_state *old,
7700 				   struct bpf_verifier_state *cur)
7701 {
7702 	struct bpf_func_state *fold, *fcur;
7703 	int i, fr, num_slots;
7704 
7705 	for (fr = old->curframe; fr >= 0; fr--) {
7706 		fold = old->frame[fr];
7707 		fcur = cur->frame[fr];
7708 
7709 		for (i = 0; i < MAX_BPF_REG; i++)
7710 			maybe_widen_reg(env,
7711 					&fold->regs[i],
7712 					&fcur->regs[i]);
7713 
7714 		num_slots = min(fold->allocated_stack / BPF_REG_SIZE,
7715 				fcur->allocated_stack / BPF_REG_SIZE);
7716 		for (i = 0; i < num_slots; i++) {
7717 			if (!bpf_is_spilled_reg(&fold->stack[i]) ||
7718 			    !bpf_is_spilled_reg(&fcur->stack[i]))
7719 				continue;
7720 
7721 			maybe_widen_reg(env,
7722 					&fold->stack[i].spilled_ptr,
7723 					&fcur->stack[i].spilled_ptr);
7724 		}
7725 	}
7726 	return 0;
7727 }
7728 
get_iter_from_state(struct bpf_verifier_state * cur_st,struct bpf_kfunc_call_arg_meta * meta)7729 static struct bpf_reg_state *get_iter_from_state(struct bpf_verifier_state *cur_st,
7730 						 struct bpf_kfunc_call_arg_meta *meta)
7731 {
7732 	int iter_frameno = meta->iter.frameno;
7733 	int iter_spi = meta->iter.spi;
7734 
7735 	return &cur_st->frame[iter_frameno]->stack[iter_spi].spilled_ptr;
7736 }
7737 
7738 /* process_iter_next_call() is called when verifier gets to iterator's next
7739  * "method" (e.g., bpf_iter_num_next() for numbers iterator) call. We'll refer
7740  * to it as just "iter_next()" in comments below.
7741  *
7742  * BPF verifier relies on a crucial contract for any iter_next()
7743  * implementation: it should *eventually* return NULL, and once that happens
7744  * it should keep returning NULL. That is, once iterator exhausts elements to
7745  * iterate, it should never reset or spuriously return new elements.
7746  *
7747  * With the assumption of such contract, process_iter_next_call() simulates
7748  * a fork in the verifier state to validate loop logic correctness and safety
7749  * without having to simulate infinite amount of iterations.
7750  *
7751  * In current state, we first assume that iter_next() returned NULL and
7752  * iterator state is set to DRAINED (BPF_ITER_STATE_DRAINED). In such
7753  * conditions we should not form an infinite loop and should eventually reach
7754  * exit.
7755  *
7756  * Besides that, we also fork current state and enqueue it for later
7757  * verification. In a forked state we keep iterator state as ACTIVE
7758  * (BPF_ITER_STATE_ACTIVE) and assume non-NULL return from iter_next(). We
7759  * also bump iteration depth to prevent erroneous infinite loop detection
7760  * later on (see iter_active_depths_differ() comment for details). In this
7761  * state we assume that we'll eventually loop back to another iter_next()
7762  * calls (it could be in exactly same location or in some other instruction,
7763  * it doesn't matter, we don't make any unnecessary assumptions about this,
7764  * everything revolves around iterator state in a stack slot, not which
7765  * instruction is calling iter_next()). When that happens, we either will come
7766  * to iter_next() with equivalent state and can conclude that next iteration
7767  * will proceed in exactly the same way as we just verified, so it's safe to
7768  * assume that loop converges. If not, we'll go on another iteration
7769  * simulation with a different input state, until all possible starting states
7770  * are validated or we reach maximum number of instructions limit.
7771  *
7772  * This way, we will either exhaustively discover all possible input states
7773  * that iterator loop can start with and eventually will converge, or we'll
7774  * effectively regress into bounded loop simulation logic and either reach
7775  * maximum number of instructions if loop is not provably convergent, or there
7776  * is some statically known limit on number of iterations (e.g., if there is
7777  * an explicit `if n > 100 then break;` statement somewhere in the loop).
7778  *
7779  * Iteration convergence logic in is_state_visited() relies on exact
7780  * states comparison, which ignores read and precision marks.
7781  * This is necessary because read and precision marks are not finalized
7782  * while in the loop. Exact comparison might preclude convergence for
7783  * simple programs like below:
7784  *
7785  *     i = 0;
7786  *     while(iter_next(&it))
7787  *       i++;
7788  *
7789  * At each iteration step i++ would produce a new distinct state and
7790  * eventually instruction processing limit would be reached.
7791  *
7792  * To avoid such behavior speculatively forget (widen) range for
7793  * imprecise scalar registers, if those registers were not precise at the
7794  * end of the previous iteration and do not match exactly.
7795  *
7796  * This is a conservative heuristic that allows to verify wide range of programs,
7797  * however it precludes verification of programs that conjure an
7798  * imprecise value on the first loop iteration and use it as precise on a second.
7799  * For example, the following safe program would fail to verify:
7800  *
7801  *     struct bpf_num_iter it;
7802  *     int arr[10];
7803  *     int i = 0, a = 0;
7804  *     bpf_iter_num_new(&it, 0, 10);
7805  *     while (bpf_iter_num_next(&it)) {
7806  *       if (a == 0) {
7807  *         a = 1;
7808  *         i = 7; // Because i changed verifier would forget
7809  *                // it's range on second loop entry.
7810  *       } else {
7811  *         arr[i] = 42; // This would fail to verify.
7812  *       }
7813  *     }
7814  *     bpf_iter_num_destroy(&it);
7815  */
process_iter_next_call(struct bpf_verifier_env * env,int insn_idx,struct bpf_kfunc_call_arg_meta * meta)7816 static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx,
7817 				  struct bpf_kfunc_call_arg_meta *meta)
7818 {
7819 	struct bpf_verifier_state *cur_st = env->cur_state, *queued_st, *prev_st;
7820 	struct bpf_func_state *cur_fr = cur_st->frame[cur_st->curframe], *queued_fr;
7821 	struct bpf_reg_state *cur_iter, *queued_iter;
7822 
7823 	BTF_TYPE_EMIT(struct bpf_iter);
7824 
7825 	cur_iter = get_iter_from_state(cur_st, meta);
7826 
7827 	if (cur_iter->iter.state != BPF_ITER_STATE_ACTIVE &&
7828 	    cur_iter->iter.state != BPF_ITER_STATE_DRAINED) {
7829 		verifier_bug(env, "unexpected iterator state %d (%s)",
7830 			     cur_iter->iter.state, iter_state_str(cur_iter->iter.state));
7831 		return -EFAULT;
7832 	}
7833 
7834 	if (cur_iter->iter.state == BPF_ITER_STATE_ACTIVE) {
7835 		/* Because iter_next() call is a checkpoint is_state_visitied()
7836 		 * should guarantee parent state with same call sites and insn_idx.
7837 		 */
7838 		if (!cur_st->parent || cur_st->parent->insn_idx != insn_idx ||
7839 		    !same_callsites(cur_st->parent, cur_st)) {
7840 			verifier_bug(env, "bad parent state for iter next call");
7841 			return -EFAULT;
7842 		}
7843 		/* Note cur_st->parent in the call below, it is necessary to skip
7844 		 * checkpoint created for cur_st by is_state_visited()
7845 		 * right at this instruction.
7846 		 */
7847 		prev_st = find_prev_entry(env, cur_st->parent, insn_idx);
7848 		/* branch out active iter state */
7849 		queued_st = push_stack(env, insn_idx + 1, insn_idx, false);
7850 		if (IS_ERR(queued_st))
7851 			return PTR_ERR(queued_st);
7852 
7853 		queued_iter = get_iter_from_state(queued_st, meta);
7854 		queued_iter->iter.state = BPF_ITER_STATE_ACTIVE;
7855 		queued_iter->iter.depth++;
7856 		if (prev_st)
7857 			widen_imprecise_scalars(env, prev_st, queued_st);
7858 
7859 		queued_fr = queued_st->frame[queued_st->curframe];
7860 		mark_ptr_not_null_reg(&queued_fr->regs[BPF_REG_0]);
7861 	}
7862 
7863 	/* switch to DRAINED state, but keep the depth unchanged */
7864 	/* mark current iter state as drained and assume returned NULL */
7865 	cur_iter->iter.state = BPF_ITER_STATE_DRAINED;
7866 	__mark_reg_const_zero(env, &cur_fr->regs[BPF_REG_0]);
7867 
7868 	return 0;
7869 }
7870 
arg_type_is_mem_size(enum bpf_arg_type type)7871 static bool arg_type_is_mem_size(enum bpf_arg_type type)
7872 {
7873 	return type == ARG_CONST_SIZE ||
7874 	       type == ARG_CONST_SIZE_OR_ZERO;
7875 }
7876 
arg_type_is_raw_mem(enum bpf_arg_type type)7877 static bool arg_type_is_raw_mem(enum bpf_arg_type type)
7878 {
7879 	return base_type(type) == ARG_PTR_TO_MEM &&
7880 	       type & MEM_UNINIT;
7881 }
7882 
arg_type_is_release(enum bpf_arg_type type)7883 static bool arg_type_is_release(enum bpf_arg_type type)
7884 {
7885 	return type & OBJ_RELEASE;
7886 }
7887 
arg_type_is_dynptr(enum bpf_arg_type type)7888 static bool arg_type_is_dynptr(enum bpf_arg_type type)
7889 {
7890 	return base_type(type) == ARG_PTR_TO_DYNPTR;
7891 }
7892 
resolve_map_arg_type(struct bpf_verifier_env * env,const struct bpf_call_arg_meta * meta,enum bpf_arg_type * arg_type)7893 static int resolve_map_arg_type(struct bpf_verifier_env *env,
7894 				 const struct bpf_call_arg_meta *meta,
7895 				 enum bpf_arg_type *arg_type)
7896 {
7897 	if (!meta->map.ptr) {
7898 		/* kernel subsystem misconfigured verifier */
7899 		verifier_bug(env, "invalid map_ptr to access map->type");
7900 		return -EFAULT;
7901 	}
7902 
7903 	switch (meta->map.ptr->map_type) {
7904 	case BPF_MAP_TYPE_SOCKMAP:
7905 	case BPF_MAP_TYPE_SOCKHASH:
7906 		if (*arg_type == ARG_PTR_TO_MAP_VALUE) {
7907 			*arg_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON;
7908 		} else {
7909 			verbose(env, "invalid arg_type for sockmap/sockhash\n");
7910 			return -EINVAL;
7911 		}
7912 		break;
7913 	case BPF_MAP_TYPE_BLOOM_FILTER:
7914 		if (meta->func_id == BPF_FUNC_map_peek_elem)
7915 			*arg_type = ARG_PTR_TO_MAP_VALUE;
7916 		break;
7917 	default:
7918 		break;
7919 	}
7920 	return 0;
7921 }
7922 
7923 struct bpf_reg_types {
7924 	const enum bpf_reg_type types[10];
7925 	u32 *btf_id;
7926 };
7927 
7928 static const struct bpf_reg_types sock_types = {
7929 	.types = {
7930 		PTR_TO_SOCK_COMMON,
7931 		PTR_TO_SOCKET,
7932 		PTR_TO_TCP_SOCK,
7933 		PTR_TO_XDP_SOCK,
7934 	},
7935 };
7936 
7937 #ifdef CONFIG_NET
7938 static const struct bpf_reg_types btf_id_sock_common_types = {
7939 	.types = {
7940 		PTR_TO_SOCK_COMMON,
7941 		PTR_TO_SOCKET,
7942 		PTR_TO_TCP_SOCK,
7943 		PTR_TO_XDP_SOCK,
7944 		PTR_TO_BTF_ID,
7945 		PTR_TO_BTF_ID | PTR_TRUSTED,
7946 	},
7947 	.btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
7948 };
7949 #endif
7950 
7951 static const struct bpf_reg_types mem_types = {
7952 	.types = {
7953 		PTR_TO_STACK,
7954 		PTR_TO_PACKET,
7955 		PTR_TO_PACKET_META,
7956 		PTR_TO_MAP_KEY,
7957 		PTR_TO_MAP_VALUE,
7958 		PTR_TO_MEM,
7959 		PTR_TO_MEM | MEM_RINGBUF,
7960 		PTR_TO_BUF,
7961 		PTR_TO_BTF_ID | PTR_TRUSTED,
7962 		PTR_TO_CTX,
7963 	},
7964 };
7965 
7966 static const struct bpf_reg_types spin_lock_types = {
7967 	.types = {
7968 		PTR_TO_MAP_VALUE,
7969 		PTR_TO_BTF_ID | MEM_ALLOC,
7970 	}
7971 };
7972 
7973 static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
7974 static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
7975 static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
7976 static const struct bpf_reg_types ringbuf_mem_types = { .types = { PTR_TO_MEM | MEM_RINGBUF } };
7977 static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
7978 static const struct bpf_reg_types btf_ptr_types = {
7979 	.types = {
7980 		PTR_TO_BTF_ID,
7981 		PTR_TO_BTF_ID | PTR_TRUSTED,
7982 		PTR_TO_BTF_ID | MEM_RCU,
7983 	},
7984 };
7985 static const struct bpf_reg_types percpu_btf_ptr_types = {
7986 	.types = {
7987 		PTR_TO_BTF_ID | MEM_PERCPU,
7988 		PTR_TO_BTF_ID | MEM_PERCPU | MEM_RCU,
7989 		PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED,
7990 	}
7991 };
7992 static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
7993 static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
7994 static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
7995 static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } };
7996 static const struct bpf_reg_types kptr_xchg_dest_types = {
7997 	.types = {
7998 		PTR_TO_MAP_VALUE,
7999 		PTR_TO_BTF_ID | MEM_ALLOC,
8000 		PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF,
8001 		PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF | MEM_RCU,
8002 	}
8003 };
8004 static const struct bpf_reg_types dynptr_types = {
8005 	.types = {
8006 		PTR_TO_STACK,
8007 		CONST_PTR_TO_DYNPTR,
8008 	}
8009 };
8010 
8011 static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
8012 	[ARG_PTR_TO_MAP_KEY]		= &mem_types,
8013 	[ARG_PTR_TO_MAP_VALUE]		= &mem_types,
8014 	[ARG_CONST_SIZE]		= &scalar_types,
8015 	[ARG_CONST_SIZE_OR_ZERO]	= &scalar_types,
8016 	[ARG_CONST_ALLOC_SIZE_OR_ZERO]	= &scalar_types,
8017 	[ARG_CONST_MAP_PTR]		= &const_map_ptr_types,
8018 	[ARG_PTR_TO_CTX]		= &context_types,
8019 	[ARG_PTR_TO_SOCK_COMMON]	= &sock_types,
8020 #ifdef CONFIG_NET
8021 	[ARG_PTR_TO_BTF_ID_SOCK_COMMON]	= &btf_id_sock_common_types,
8022 #endif
8023 	[ARG_PTR_TO_SOCKET]		= &fullsock_types,
8024 	[ARG_PTR_TO_BTF_ID]		= &btf_ptr_types,
8025 	[ARG_PTR_TO_SPIN_LOCK]		= &spin_lock_types,
8026 	[ARG_PTR_TO_MEM]		= &mem_types,
8027 	[ARG_PTR_TO_RINGBUF_MEM]	= &ringbuf_mem_types,
8028 	[ARG_PTR_TO_PERCPU_BTF_ID]	= &percpu_btf_ptr_types,
8029 	[ARG_PTR_TO_FUNC]		= &func_ptr_types,
8030 	[ARG_PTR_TO_STACK]		= &stack_ptr_types,
8031 	[ARG_PTR_TO_CONST_STR]		= &const_str_ptr_types,
8032 	[ARG_PTR_TO_TIMER]		= &timer_types,
8033 	[ARG_KPTR_XCHG_DEST]		= &kptr_xchg_dest_types,
8034 	[ARG_PTR_TO_DYNPTR]		= &dynptr_types,
8035 };
8036 
check_reg_type(struct bpf_verifier_env * env,u32 regno,enum bpf_arg_type arg_type,const u32 * arg_btf_id,struct bpf_call_arg_meta * meta)8037 static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
8038 			  enum bpf_arg_type arg_type,
8039 			  const u32 *arg_btf_id,
8040 			  struct bpf_call_arg_meta *meta)
8041 {
8042 	struct bpf_reg_state *reg = reg_state(env, regno);
8043 	enum bpf_reg_type expected, type = reg->type;
8044 	const struct bpf_reg_types *compatible;
8045 	int i, j, err;
8046 
8047 	compatible = compatible_reg_types[base_type(arg_type)];
8048 	if (!compatible) {
8049 		verifier_bug(env, "unsupported arg type %d", arg_type);
8050 		return -EFAULT;
8051 	}
8052 
8053 	/* ARG_PTR_TO_MEM + RDONLY is compatible with PTR_TO_MEM and PTR_TO_MEM + RDONLY,
8054 	 * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM and NOT with PTR_TO_MEM + RDONLY
8055 	 *
8056 	 * Same for MAYBE_NULL:
8057 	 *
8058 	 * ARG_PTR_TO_MEM + MAYBE_NULL is compatible with PTR_TO_MEM and PTR_TO_MEM + MAYBE_NULL,
8059 	 * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM but NOT with PTR_TO_MEM + MAYBE_NULL
8060 	 *
8061 	 * ARG_PTR_TO_MEM is compatible with PTR_TO_MEM that is tagged with a dynptr type.
8062 	 *
8063 	 * Therefore we fold these flags depending on the arg_type before comparison.
8064 	 */
8065 	if (arg_type & MEM_RDONLY)
8066 		type &= ~MEM_RDONLY;
8067 	if (arg_type & PTR_MAYBE_NULL)
8068 		type &= ~PTR_MAYBE_NULL;
8069 	if (base_type(arg_type) == ARG_PTR_TO_MEM)
8070 		type &= ~DYNPTR_TYPE_FLAG_MASK;
8071 
8072 	/* Local kptr types are allowed as the source argument of bpf_kptr_xchg */
8073 	if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type) && regno == BPF_REG_2) {
8074 		type &= ~MEM_ALLOC;
8075 		type &= ~MEM_PERCPU;
8076 	}
8077 
8078 	for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
8079 		expected = compatible->types[i];
8080 		if (expected == NOT_INIT)
8081 			break;
8082 
8083 		if (type == expected)
8084 			goto found;
8085 	}
8086 
8087 	verbose(env, "R%d type=%s expected=", regno, reg_type_str(env, reg->type));
8088 	for (j = 0; j + 1 < i; j++)
8089 		verbose(env, "%s, ", reg_type_str(env, compatible->types[j]));
8090 	verbose(env, "%s\n", reg_type_str(env, compatible->types[j]));
8091 	return -EACCES;
8092 
8093 found:
8094 	if (base_type(reg->type) != PTR_TO_BTF_ID)
8095 		return 0;
8096 
8097 	if (compatible == &mem_types) {
8098 		if (!(arg_type & MEM_RDONLY)) {
8099 			verbose(env,
8100 				"%s() may write into memory pointed by R%d type=%s\n",
8101 				func_id_name(meta->func_id),
8102 				regno, reg_type_str(env, reg->type));
8103 			return -EACCES;
8104 		}
8105 		return 0;
8106 	}
8107 
8108 	switch ((int)reg->type) {
8109 	case PTR_TO_BTF_ID:
8110 	case PTR_TO_BTF_ID | PTR_TRUSTED:
8111 	case PTR_TO_BTF_ID | PTR_TRUSTED | PTR_MAYBE_NULL:
8112 	case PTR_TO_BTF_ID | MEM_RCU:
8113 	case PTR_TO_BTF_ID | PTR_MAYBE_NULL:
8114 	case PTR_TO_BTF_ID | PTR_MAYBE_NULL | MEM_RCU:
8115 	{
8116 		/* For bpf_sk_release, it needs to match against first member
8117 		 * 'struct sock_common', hence make an exception for it. This
8118 		 * allows bpf_sk_release to work for multiple socket types.
8119 		 */
8120 		bool strict_type_match = arg_type_is_release(arg_type) &&
8121 					 meta->func_id != BPF_FUNC_sk_release;
8122 
8123 		if (type_may_be_null(reg->type) &&
8124 		    (!type_may_be_null(arg_type) || arg_type_is_release(arg_type))) {
8125 			verbose(env, "Possibly NULL pointer passed to helper arg%d\n", regno);
8126 			return -EACCES;
8127 		}
8128 
8129 		if (!arg_btf_id) {
8130 			if (!compatible->btf_id) {
8131 				verifier_bug(env, "missing arg compatible BTF ID");
8132 				return -EFAULT;
8133 			}
8134 			arg_btf_id = compatible->btf_id;
8135 		}
8136 
8137 		if (meta->func_id == BPF_FUNC_kptr_xchg) {
8138 			if (map_kptr_match_type(env, meta->kptr_field, reg, regno))
8139 				return -EACCES;
8140 		} else {
8141 			if (arg_btf_id == BPF_PTR_POISON) {
8142 				verbose(env, "verifier internal error:");
8143 				verbose(env, "R%d has non-overwritten BPF_PTR_POISON type\n",
8144 					regno);
8145 				return -EACCES;
8146 			}
8147 
8148 			err = __check_ptr_off_reg(env, reg, regno, true);
8149 			if (err)
8150 				return err;
8151 
8152 			if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id,
8153 						  reg->var_off.value, btf_vmlinux, *arg_btf_id,
8154 						  strict_type_match)) {
8155 				verbose(env, "R%d is of type %s but %s is expected\n",
8156 					regno, btf_type_name(reg->btf, reg->btf_id),
8157 					btf_type_name(btf_vmlinux, *arg_btf_id));
8158 				return -EACCES;
8159 			}
8160 		}
8161 		break;
8162 	}
8163 	case PTR_TO_BTF_ID | MEM_ALLOC:
8164 	case PTR_TO_BTF_ID | MEM_PERCPU | MEM_ALLOC:
8165 	case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF:
8166 	case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF | MEM_RCU:
8167 		if (meta->func_id != BPF_FUNC_spin_lock && meta->func_id != BPF_FUNC_spin_unlock &&
8168 		    meta->func_id != BPF_FUNC_kptr_xchg) {
8169 			verifier_bug(env, "unimplemented handling of MEM_ALLOC");
8170 			return -EFAULT;
8171 		}
8172 		/* Check if local kptr in src arg matches kptr in dst arg */
8173 		if (meta->func_id == BPF_FUNC_kptr_xchg && regno == BPF_REG_2) {
8174 			if (map_kptr_match_type(env, meta->kptr_field, reg, regno))
8175 				return -EACCES;
8176 		}
8177 		break;
8178 	case PTR_TO_BTF_ID | MEM_PERCPU:
8179 	case PTR_TO_BTF_ID | MEM_PERCPU | MEM_RCU:
8180 	case PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED:
8181 		/* Handled by helper specific checks */
8182 		break;
8183 	default:
8184 		verifier_bug(env, "invalid PTR_TO_BTF_ID register for type match");
8185 		return -EFAULT;
8186 	}
8187 	return 0;
8188 }
8189 
8190 static struct btf_field *
reg_find_field_offset(const struct bpf_reg_state * reg,s32 off,u32 fields)8191 reg_find_field_offset(const struct bpf_reg_state *reg, s32 off, u32 fields)
8192 {
8193 	struct btf_field *field;
8194 	struct btf_record *rec;
8195 
8196 	rec = reg_btf_record(reg);
8197 	if (!rec)
8198 		return NULL;
8199 
8200 	field = btf_record_find(rec, off, fields);
8201 	if (!field)
8202 		return NULL;
8203 
8204 	return field;
8205 }
8206 
check_func_arg_reg_off(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,int regno,enum bpf_arg_type arg_type)8207 static int check_func_arg_reg_off(struct bpf_verifier_env *env,
8208 				  const struct bpf_reg_state *reg, int regno,
8209 				  enum bpf_arg_type arg_type)
8210 {
8211 	u32 type = reg->type;
8212 
8213 	/* When referenced register is passed to release function, its fixed
8214 	 * offset must be 0.
8215 	 *
8216 	 * We will check arg_type_is_release reg has ref_obj_id when storing
8217 	 * meta->release_regno.
8218 	 */
8219 	if (arg_type_is_release(arg_type)) {
8220 		/* ARG_PTR_TO_DYNPTR with OBJ_RELEASE is a bit special, as it
8221 		 * may not directly point to the object being released, but to
8222 		 * dynptr pointing to such object, which might be at some offset
8223 		 * on the stack. In that case, we simply to fallback to the
8224 		 * default handling.
8225 		 */
8226 		if (arg_type_is_dynptr(arg_type) && type == PTR_TO_STACK)
8227 			return 0;
8228 
8229 		/* Doing check_ptr_off_reg check for the offset will catch this
8230 		 * because fixed_off_ok is false, but checking here allows us
8231 		 * to give the user a better error message.
8232 		 */
8233 		if (!tnum_is_const(reg->var_off) || reg->var_off.value != 0) {
8234 			verbose(env, "R%d must have zero offset when passed to release func or trusted arg to kfunc\n",
8235 				regno);
8236 			return -EINVAL;
8237 		}
8238 	}
8239 
8240 	switch (type) {
8241 	/* Pointer types where both fixed and variable offset is explicitly allowed: */
8242 	case PTR_TO_STACK:
8243 	case PTR_TO_PACKET:
8244 	case PTR_TO_PACKET_META:
8245 	case PTR_TO_MAP_KEY:
8246 	case PTR_TO_MAP_VALUE:
8247 	case PTR_TO_MEM:
8248 	case PTR_TO_MEM | MEM_RDONLY:
8249 	case PTR_TO_MEM | MEM_RINGBUF:
8250 	case PTR_TO_BUF:
8251 	case PTR_TO_BUF | MEM_RDONLY:
8252 	case PTR_TO_ARENA:
8253 	case SCALAR_VALUE:
8254 		return 0;
8255 	/* All the rest must be rejected, except PTR_TO_BTF_ID which allows
8256 	 * fixed offset.
8257 	 */
8258 	case PTR_TO_BTF_ID:
8259 	case PTR_TO_BTF_ID | MEM_ALLOC:
8260 	case PTR_TO_BTF_ID | PTR_TRUSTED:
8261 	case PTR_TO_BTF_ID | MEM_RCU:
8262 	case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF:
8263 	case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF | MEM_RCU:
8264 		/* When referenced PTR_TO_BTF_ID is passed to release function,
8265 		 * its fixed offset must be 0. In the other cases, fixed offset
8266 		 * can be non-zero. This was already checked above. So pass
8267 		 * fixed_off_ok as true to allow fixed offset for all other
8268 		 * cases. var_off always must be 0 for PTR_TO_BTF_ID, hence we
8269 		 * still need to do checks instead of returning.
8270 		 */
8271 		return __check_ptr_off_reg(env, reg, regno, true);
8272 	case PTR_TO_CTX:
8273 		/*
8274 		 * Allow fixed and variable offsets for syscall context, but
8275 		 * only when the argument is passed as memory, not ctx,
8276 		 * otherwise we may get modified ctx in tail called programs and
8277 		 * global subprogs (that may act as extension prog hooks).
8278 		 */
8279 		if (arg_type != ARG_PTR_TO_CTX && is_var_ctx_off_allowed(env->prog))
8280 			return 0;
8281 		fallthrough;
8282 	default:
8283 		return __check_ptr_off_reg(env, reg, regno, false);
8284 	}
8285 }
8286 
get_dynptr_arg_reg(struct bpf_verifier_env * env,const struct bpf_func_proto * fn,struct bpf_reg_state * regs)8287 static struct bpf_reg_state *get_dynptr_arg_reg(struct bpf_verifier_env *env,
8288 						const struct bpf_func_proto *fn,
8289 						struct bpf_reg_state *regs)
8290 {
8291 	struct bpf_reg_state *state = NULL;
8292 	int i;
8293 
8294 	for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++)
8295 		if (arg_type_is_dynptr(fn->arg_type[i])) {
8296 			if (state) {
8297 				verbose(env, "verifier internal error: multiple dynptr args\n");
8298 				return NULL;
8299 			}
8300 			state = &regs[BPF_REG_1 + i];
8301 		}
8302 
8303 	if (!state)
8304 		verbose(env, "verifier internal error: no dynptr arg found\n");
8305 
8306 	return state;
8307 }
8308 
dynptr_id(struct bpf_verifier_env * env,struct bpf_reg_state * reg)8309 static int dynptr_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
8310 {
8311 	struct bpf_func_state *state = bpf_func(env, reg);
8312 	int spi;
8313 
8314 	if (reg->type == CONST_PTR_TO_DYNPTR)
8315 		return reg->id;
8316 	spi = dynptr_get_spi(env, reg);
8317 	if (spi < 0)
8318 		return spi;
8319 	return state->stack[spi].spilled_ptr.id;
8320 }
8321 
dynptr_ref_obj_id(struct bpf_verifier_env * env,struct bpf_reg_state * reg)8322 static int dynptr_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
8323 {
8324 	struct bpf_func_state *state = bpf_func(env, reg);
8325 	int spi;
8326 
8327 	if (reg->type == CONST_PTR_TO_DYNPTR)
8328 		return reg->ref_obj_id;
8329 	spi = dynptr_get_spi(env, reg);
8330 	if (spi < 0)
8331 		return spi;
8332 	return state->stack[spi].spilled_ptr.ref_obj_id;
8333 }
8334 
dynptr_get_type(struct bpf_verifier_env * env,struct bpf_reg_state * reg)8335 static enum bpf_dynptr_type dynptr_get_type(struct bpf_verifier_env *env,
8336 					    struct bpf_reg_state *reg)
8337 {
8338 	struct bpf_func_state *state = bpf_func(env, reg);
8339 	int spi;
8340 
8341 	if (reg->type == CONST_PTR_TO_DYNPTR)
8342 		return reg->dynptr.type;
8343 
8344 	spi = bpf_get_spi(reg->var_off.value);
8345 	if (spi < 0) {
8346 		verbose(env, "verifier internal error: invalid spi when querying dynptr type\n");
8347 		return BPF_DYNPTR_TYPE_INVALID;
8348 	}
8349 
8350 	return state->stack[spi].spilled_ptr.dynptr.type;
8351 }
8352 
check_reg_const_str(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno)8353 static int check_reg_const_str(struct bpf_verifier_env *env,
8354 			       struct bpf_reg_state *reg, u32 regno)
8355 {
8356 	struct bpf_map *map = reg->map_ptr;
8357 	int err;
8358 	int map_off;
8359 	u64 map_addr;
8360 	char *str_ptr;
8361 
8362 	if (reg->type != PTR_TO_MAP_VALUE)
8363 		return -EINVAL;
8364 
8365 	if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY) {
8366 		verbose(env, "R%d points to insn_array map which cannot be used as const string\n", regno);
8367 		return -EACCES;
8368 	}
8369 
8370 	if (!bpf_map_is_rdonly(map)) {
8371 		verbose(env, "R%d does not point to a readonly map'\n", regno);
8372 		return -EACCES;
8373 	}
8374 
8375 	if (!tnum_is_const(reg->var_off)) {
8376 		verbose(env, "R%d is not a constant address'\n", regno);
8377 		return -EACCES;
8378 	}
8379 
8380 	if (!map->ops->map_direct_value_addr) {
8381 		verbose(env, "no direct value access support for this map type\n");
8382 		return -EACCES;
8383 	}
8384 
8385 	err = check_map_access(env, regno, 0,
8386 			       map->value_size - reg->var_off.value, false,
8387 			       ACCESS_HELPER);
8388 	if (err)
8389 		return err;
8390 
8391 	map_off = reg->var_off.value;
8392 	err = map->ops->map_direct_value_addr(map, &map_addr, map_off);
8393 	if (err) {
8394 		verbose(env, "direct value access on string failed\n");
8395 		return err;
8396 	}
8397 
8398 	str_ptr = (char *)(long)(map_addr);
8399 	if (!strnchr(str_ptr + map_off, map->value_size - map_off, 0)) {
8400 		verbose(env, "string is not zero-terminated\n");
8401 		return -EINVAL;
8402 	}
8403 	return 0;
8404 }
8405 
8406 /* Returns constant key value in `value` if possible, else negative error */
get_constant_map_key(struct bpf_verifier_env * env,struct bpf_reg_state * key,u32 key_size,s64 * value)8407 static int get_constant_map_key(struct bpf_verifier_env *env,
8408 				struct bpf_reg_state *key,
8409 				u32 key_size,
8410 				s64 *value)
8411 {
8412 	struct bpf_func_state *state = bpf_func(env, key);
8413 	struct bpf_reg_state *reg;
8414 	int slot, spi, off;
8415 	int spill_size = 0;
8416 	int zero_size = 0;
8417 	int stack_off;
8418 	int i, err;
8419 	u8 *stype;
8420 
8421 	if (!env->bpf_capable)
8422 		return -EOPNOTSUPP;
8423 	if (key->type != PTR_TO_STACK)
8424 		return -EOPNOTSUPP;
8425 	if (!tnum_is_const(key->var_off))
8426 		return -EOPNOTSUPP;
8427 
8428 	stack_off = key->var_off.value;
8429 	slot = -stack_off - 1;
8430 	spi = slot / BPF_REG_SIZE;
8431 	off = slot % BPF_REG_SIZE;
8432 	stype = state->stack[spi].slot_type;
8433 
8434 	/* First handle precisely tracked STACK_ZERO */
8435 	for (i = off; i >= 0 && stype[i] == STACK_ZERO; i--)
8436 		zero_size++;
8437 	if (zero_size >= key_size) {
8438 		*value = 0;
8439 		return 0;
8440 	}
8441 
8442 	/* Check that stack contains a scalar spill of expected size */
8443 	if (!bpf_is_spilled_scalar_reg(&state->stack[spi]))
8444 		return -EOPNOTSUPP;
8445 	for (i = off; i >= 0 && stype[i] == STACK_SPILL; i--)
8446 		spill_size++;
8447 	if (spill_size != key_size)
8448 		return -EOPNOTSUPP;
8449 
8450 	reg = &state->stack[spi].spilled_ptr;
8451 	if (!tnum_is_const(reg->var_off))
8452 		/* Stack value not statically known */
8453 		return -EOPNOTSUPP;
8454 
8455 	/* We are relying on a constant value. So mark as precise
8456 	 * to prevent pruning on it.
8457 	 */
8458 	bpf_bt_set_frame_slot(&env->bt, key->frameno, spi);
8459 	err = mark_chain_precision_batch(env, env->cur_state);
8460 	if (err < 0)
8461 		return err;
8462 
8463 	*value = reg->var_off.value;
8464 	return 0;
8465 }
8466 
8467 static bool can_elide_value_nullness(enum bpf_map_type type);
8468 
check_func_arg(struct bpf_verifier_env * env,u32 arg,struct bpf_call_arg_meta * meta,const struct bpf_func_proto * fn,int insn_idx)8469 static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
8470 			  struct bpf_call_arg_meta *meta,
8471 			  const struct bpf_func_proto *fn,
8472 			  int insn_idx)
8473 {
8474 	u32 regno = BPF_REG_1 + arg;
8475 	struct bpf_reg_state *reg = reg_state(env, regno);
8476 	enum bpf_arg_type arg_type = fn->arg_type[arg];
8477 	enum bpf_reg_type type = reg->type;
8478 	u32 *arg_btf_id = NULL;
8479 	u32 key_size;
8480 	int err = 0;
8481 
8482 	if (arg_type == ARG_DONTCARE)
8483 		return 0;
8484 
8485 	err = check_reg_arg(env, regno, SRC_OP);
8486 	if (err)
8487 		return err;
8488 
8489 	if (arg_type == ARG_ANYTHING) {
8490 		if (is_pointer_value(env, regno)) {
8491 			verbose(env, "R%d leaks addr into helper function\n",
8492 				regno);
8493 			return -EACCES;
8494 		}
8495 		return 0;
8496 	}
8497 
8498 	if (type_is_pkt_pointer(type) &&
8499 	    !may_access_direct_pkt_data(env, meta, BPF_READ)) {
8500 		verbose(env, "helper access to the packet is not allowed\n");
8501 		return -EACCES;
8502 	}
8503 
8504 	if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE) {
8505 		err = resolve_map_arg_type(env, meta, &arg_type);
8506 		if (err)
8507 			return err;
8508 	}
8509 
8510 	if (bpf_register_is_null(reg) && type_may_be_null(arg_type))
8511 		/* A NULL register has a SCALAR_VALUE type, so skip
8512 		 * type checking.
8513 		 */
8514 		goto skip_type_check;
8515 
8516 	/* arg_btf_id and arg_size are in a union. */
8517 	if (base_type(arg_type) == ARG_PTR_TO_BTF_ID ||
8518 	    base_type(arg_type) == ARG_PTR_TO_SPIN_LOCK)
8519 		arg_btf_id = fn->arg_btf_id[arg];
8520 
8521 	err = check_reg_type(env, regno, arg_type, arg_btf_id, meta);
8522 	if (err)
8523 		return err;
8524 
8525 	err = check_func_arg_reg_off(env, reg, regno, arg_type);
8526 	if (err)
8527 		return err;
8528 
8529 skip_type_check:
8530 	if (arg_type_is_release(arg_type)) {
8531 		if (arg_type_is_dynptr(arg_type)) {
8532 			struct bpf_func_state *state = bpf_func(env, reg);
8533 			int spi;
8534 
8535 			/* Only dynptr created on stack can be released, thus
8536 			 * the get_spi and stack state checks for spilled_ptr
8537 			 * should only be done before process_dynptr_func for
8538 			 * PTR_TO_STACK.
8539 			 */
8540 			if (reg->type == PTR_TO_STACK) {
8541 				spi = dynptr_get_spi(env, reg);
8542 				if (spi < 0 || !state->stack[spi].spilled_ptr.ref_obj_id) {
8543 					verbose(env, "arg %d is an unacquired reference\n", regno);
8544 					return -EINVAL;
8545 				}
8546 			} else {
8547 				verbose(env, "cannot release unowned const bpf_dynptr\n");
8548 				return -EINVAL;
8549 			}
8550 		} else if (!reg->ref_obj_id && !bpf_register_is_null(reg)) {
8551 			verbose(env, "R%d must be referenced when passed to release function\n",
8552 				regno);
8553 			return -EINVAL;
8554 		}
8555 		if (meta->release_regno) {
8556 			verifier_bug(env, "more than one release argument");
8557 			return -EFAULT;
8558 		}
8559 		meta->release_regno = regno;
8560 	}
8561 
8562 	if (reg->ref_obj_id && base_type(arg_type) != ARG_KPTR_XCHG_DEST) {
8563 		if (meta->ref_obj_id) {
8564 			verbose(env, "more than one arg with ref_obj_id R%d %u %u",
8565 				regno, reg->ref_obj_id,
8566 				meta->ref_obj_id);
8567 			return -EACCES;
8568 		}
8569 		meta->ref_obj_id = reg->ref_obj_id;
8570 	}
8571 
8572 	switch (base_type(arg_type)) {
8573 	case ARG_CONST_MAP_PTR:
8574 		/* bpf_map_xxx(map_ptr) call: remember that map_ptr */
8575 		if (meta->map.ptr) {
8576 			/* Use map_uid (which is unique id of inner map) to reject:
8577 			 * inner_map1 = bpf_map_lookup_elem(outer_map, key1)
8578 			 * inner_map2 = bpf_map_lookup_elem(outer_map, key2)
8579 			 * if (inner_map1 && inner_map2) {
8580 			 *     timer = bpf_map_lookup_elem(inner_map1);
8581 			 *     if (timer)
8582 			 *         // mismatch would have been allowed
8583 			 *         bpf_timer_init(timer, inner_map2);
8584 			 * }
8585 			 *
8586 			 * Comparing map_ptr is enough to distinguish normal and outer maps.
8587 			 */
8588 			if (meta->map.ptr != reg->map_ptr ||
8589 			    meta->map.uid != reg->map_uid) {
8590 				verbose(env,
8591 					"timer pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n",
8592 					meta->map.uid, reg->map_uid);
8593 				return -EINVAL;
8594 			}
8595 		}
8596 		meta->map.ptr = reg->map_ptr;
8597 		meta->map.uid = reg->map_uid;
8598 		break;
8599 	case ARG_PTR_TO_MAP_KEY:
8600 		/* bpf_map_xxx(..., map_ptr, ..., key) call:
8601 		 * check that [key, key + map->key_size) are within
8602 		 * stack limits and initialized
8603 		 */
8604 		if (!meta->map.ptr) {
8605 			/* in function declaration map_ptr must come before
8606 			 * map_key, so that it's verified and known before
8607 			 * we have to check map_key here. Otherwise it means
8608 			 * that kernel subsystem misconfigured verifier
8609 			 */
8610 			verifier_bug(env, "invalid map_ptr to access map->key");
8611 			return -EFAULT;
8612 		}
8613 		key_size = meta->map.ptr->key_size;
8614 		err = check_helper_mem_access(env, regno, key_size, BPF_READ, false, NULL);
8615 		if (err)
8616 			return err;
8617 		if (can_elide_value_nullness(meta->map.ptr->map_type)) {
8618 			err = get_constant_map_key(env, reg, key_size, &meta->const_map_key);
8619 			if (err < 0) {
8620 				meta->const_map_key = -1;
8621 				if (err == -EOPNOTSUPP)
8622 					err = 0;
8623 				else
8624 					return err;
8625 			}
8626 		}
8627 		break;
8628 	case ARG_PTR_TO_MAP_VALUE:
8629 		if (type_may_be_null(arg_type) && bpf_register_is_null(reg))
8630 			return 0;
8631 
8632 		/* bpf_map_xxx(..., map_ptr, ..., value) call:
8633 		 * check [value, value + map->value_size) validity
8634 		 */
8635 		if (!meta->map.ptr) {
8636 			/* kernel subsystem misconfigured verifier */
8637 			verifier_bug(env, "invalid map_ptr to access map->value");
8638 			return -EFAULT;
8639 		}
8640 		meta->raw_mode = arg_type & MEM_UNINIT;
8641 		err = check_helper_mem_access(env, regno, meta->map.ptr->value_size,
8642 					      arg_type & MEM_WRITE ? BPF_WRITE : BPF_READ,
8643 					      false, meta);
8644 		break;
8645 	case ARG_PTR_TO_PERCPU_BTF_ID:
8646 		if (!reg->btf_id) {
8647 			verbose(env, "Helper has invalid btf_id in R%d\n", regno);
8648 			return -EACCES;
8649 		}
8650 		meta->ret_btf = reg->btf;
8651 		meta->ret_btf_id = reg->btf_id;
8652 		break;
8653 	case ARG_PTR_TO_SPIN_LOCK:
8654 		if (in_rbtree_lock_required_cb(env)) {
8655 			verbose(env, "can't spin_{lock,unlock} in rbtree cb\n");
8656 			return -EACCES;
8657 		}
8658 		if (meta->func_id == BPF_FUNC_spin_lock) {
8659 			err = process_spin_lock(env, regno, PROCESS_SPIN_LOCK);
8660 			if (err)
8661 				return err;
8662 		} else if (meta->func_id == BPF_FUNC_spin_unlock) {
8663 			err = process_spin_lock(env, regno, 0);
8664 			if (err)
8665 				return err;
8666 		} else {
8667 			verifier_bug(env, "spin lock arg on unexpected helper");
8668 			return -EFAULT;
8669 		}
8670 		break;
8671 	case ARG_PTR_TO_TIMER:
8672 		err = process_timer_helper(env, regno, meta);
8673 		if (err)
8674 			return err;
8675 		break;
8676 	case ARG_PTR_TO_FUNC:
8677 		meta->subprogno = reg->subprogno;
8678 		break;
8679 	case ARG_PTR_TO_MEM:
8680 		/* The access to this pointer is only checked when we hit the
8681 		 * next is_mem_size argument below.
8682 		 */
8683 		meta->raw_mode = arg_type & MEM_UNINIT;
8684 		if (arg_type & MEM_FIXED_SIZE) {
8685 			err = check_helper_mem_access(env, regno, fn->arg_size[arg],
8686 						      arg_type & MEM_WRITE ? BPF_WRITE : BPF_READ,
8687 						      false, meta);
8688 			if (err)
8689 				return err;
8690 			if (arg_type & MEM_ALIGNED)
8691 				err = check_ptr_alignment(env, reg, 0, fn->arg_size[arg], true);
8692 		}
8693 		break;
8694 	case ARG_CONST_SIZE:
8695 		err = check_mem_size_reg(env, reg, regno,
8696 					 fn->arg_type[arg - 1] & MEM_WRITE ?
8697 					 BPF_WRITE : BPF_READ,
8698 					 false, meta);
8699 		break;
8700 	case ARG_CONST_SIZE_OR_ZERO:
8701 		err = check_mem_size_reg(env, reg, regno,
8702 					 fn->arg_type[arg - 1] & MEM_WRITE ?
8703 					 BPF_WRITE : BPF_READ,
8704 					 true, meta);
8705 		break;
8706 	case ARG_PTR_TO_DYNPTR:
8707 		err = process_dynptr_func(env, regno, insn_idx, arg_type, 0);
8708 		if (err)
8709 			return err;
8710 		break;
8711 	case ARG_CONST_ALLOC_SIZE_OR_ZERO:
8712 		if (!tnum_is_const(reg->var_off)) {
8713 			verbose(env, "R%d is not a known constant'\n",
8714 				regno);
8715 			return -EACCES;
8716 		}
8717 		meta->mem_size = reg->var_off.value;
8718 		err = mark_chain_precision(env, regno);
8719 		if (err)
8720 			return err;
8721 		break;
8722 	case ARG_PTR_TO_CONST_STR:
8723 	{
8724 		err = check_reg_const_str(env, reg, regno);
8725 		if (err)
8726 			return err;
8727 		break;
8728 	}
8729 	case ARG_KPTR_XCHG_DEST:
8730 		err = process_kptr_func(env, regno, meta);
8731 		if (err)
8732 			return err;
8733 		break;
8734 	}
8735 
8736 	return err;
8737 }
8738 
may_update_sockmap(struct bpf_verifier_env * env,int func_id)8739 static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
8740 {
8741 	enum bpf_attach_type eatype = env->prog->expected_attach_type;
8742 	enum bpf_prog_type type = resolve_prog_type(env->prog);
8743 
8744 	if (func_id != BPF_FUNC_map_update_elem &&
8745 	    func_id != BPF_FUNC_map_delete_elem)
8746 		return false;
8747 
8748 	/* It's not possible to get access to a locked struct sock in these
8749 	 * contexts, so updating is safe.
8750 	 */
8751 	switch (type) {
8752 	case BPF_PROG_TYPE_TRACING:
8753 		if (eatype == BPF_TRACE_ITER)
8754 			return true;
8755 		break;
8756 	case BPF_PROG_TYPE_SOCK_OPS:
8757 		/* map_update allowed only via dedicated helpers with event type checks */
8758 		if (func_id == BPF_FUNC_map_delete_elem)
8759 			return true;
8760 		break;
8761 	case BPF_PROG_TYPE_SOCKET_FILTER:
8762 	case BPF_PROG_TYPE_SCHED_CLS:
8763 	case BPF_PROG_TYPE_SCHED_ACT:
8764 	case BPF_PROG_TYPE_XDP:
8765 	case BPF_PROG_TYPE_SK_REUSEPORT:
8766 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
8767 	case BPF_PROG_TYPE_SK_LOOKUP:
8768 		return true;
8769 	default:
8770 		break;
8771 	}
8772 
8773 	verbose(env, "cannot update sockmap in this context\n");
8774 	return false;
8775 }
8776 
bpf_allow_tail_call_in_subprogs(struct bpf_verifier_env * env)8777 bool bpf_allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
8778 {
8779 	return env->prog->jit_requested &&
8780 	       bpf_jit_supports_subprog_tailcalls();
8781 }
8782 
check_map_func_compatibility(struct bpf_verifier_env * env,struct bpf_map * map,int func_id)8783 static int check_map_func_compatibility(struct bpf_verifier_env *env,
8784 					struct bpf_map *map, int func_id)
8785 {
8786 	if (!map)
8787 		return 0;
8788 
8789 	/* We need a two way check, first is from map perspective ... */
8790 	switch (map->map_type) {
8791 	case BPF_MAP_TYPE_PROG_ARRAY:
8792 		if (func_id != BPF_FUNC_tail_call)
8793 			goto error;
8794 		break;
8795 	case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
8796 		if (func_id != BPF_FUNC_perf_event_read &&
8797 		    func_id != BPF_FUNC_perf_event_output &&
8798 		    func_id != BPF_FUNC_skb_output &&
8799 		    func_id != BPF_FUNC_perf_event_read_value &&
8800 		    func_id != BPF_FUNC_xdp_output)
8801 			goto error;
8802 		break;
8803 	case BPF_MAP_TYPE_RINGBUF:
8804 		if (func_id != BPF_FUNC_ringbuf_output &&
8805 		    func_id != BPF_FUNC_ringbuf_reserve &&
8806 		    func_id != BPF_FUNC_ringbuf_query &&
8807 		    func_id != BPF_FUNC_ringbuf_reserve_dynptr &&
8808 		    func_id != BPF_FUNC_ringbuf_submit_dynptr &&
8809 		    func_id != BPF_FUNC_ringbuf_discard_dynptr)
8810 			goto error;
8811 		break;
8812 	case BPF_MAP_TYPE_USER_RINGBUF:
8813 		if (func_id != BPF_FUNC_user_ringbuf_drain)
8814 			goto error;
8815 		break;
8816 	case BPF_MAP_TYPE_STACK_TRACE:
8817 		if (func_id != BPF_FUNC_get_stackid)
8818 			goto error;
8819 		break;
8820 	case BPF_MAP_TYPE_CGROUP_ARRAY:
8821 		if (func_id != BPF_FUNC_skb_under_cgroup &&
8822 		    func_id != BPF_FUNC_current_task_under_cgroup)
8823 			goto error;
8824 		break;
8825 	case BPF_MAP_TYPE_CGROUP_STORAGE:
8826 	case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
8827 		if (func_id != BPF_FUNC_get_local_storage)
8828 			goto error;
8829 		break;
8830 	case BPF_MAP_TYPE_DEVMAP:
8831 	case BPF_MAP_TYPE_DEVMAP_HASH:
8832 		if (func_id != BPF_FUNC_redirect_map &&
8833 		    func_id != BPF_FUNC_map_lookup_elem)
8834 			goto error;
8835 		break;
8836 	/* Restrict bpf side of cpumap and xskmap, open when use-cases
8837 	 * appear.
8838 	 */
8839 	case BPF_MAP_TYPE_CPUMAP:
8840 		if (func_id != BPF_FUNC_redirect_map)
8841 			goto error;
8842 		break;
8843 	case BPF_MAP_TYPE_XSKMAP:
8844 		if (func_id != BPF_FUNC_redirect_map &&
8845 		    func_id != BPF_FUNC_map_lookup_elem)
8846 			goto error;
8847 		break;
8848 	case BPF_MAP_TYPE_ARRAY_OF_MAPS:
8849 	case BPF_MAP_TYPE_HASH_OF_MAPS:
8850 		if (func_id != BPF_FUNC_map_lookup_elem)
8851 			goto error;
8852 		break;
8853 	case BPF_MAP_TYPE_SOCKMAP:
8854 		if (func_id != BPF_FUNC_sk_redirect_map &&
8855 		    func_id != BPF_FUNC_sock_map_update &&
8856 		    func_id != BPF_FUNC_msg_redirect_map &&
8857 		    func_id != BPF_FUNC_sk_select_reuseport &&
8858 		    func_id != BPF_FUNC_map_lookup_elem &&
8859 		    !may_update_sockmap(env, func_id))
8860 			goto error;
8861 		break;
8862 	case BPF_MAP_TYPE_SOCKHASH:
8863 		if (func_id != BPF_FUNC_sk_redirect_hash &&
8864 		    func_id != BPF_FUNC_sock_hash_update &&
8865 		    func_id != BPF_FUNC_msg_redirect_hash &&
8866 		    func_id != BPF_FUNC_sk_select_reuseport &&
8867 		    func_id != BPF_FUNC_map_lookup_elem &&
8868 		    !may_update_sockmap(env, func_id))
8869 			goto error;
8870 		break;
8871 	case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
8872 		if (func_id != BPF_FUNC_sk_select_reuseport)
8873 			goto error;
8874 		break;
8875 	case BPF_MAP_TYPE_QUEUE:
8876 	case BPF_MAP_TYPE_STACK:
8877 		if (func_id != BPF_FUNC_map_peek_elem &&
8878 		    func_id != BPF_FUNC_map_pop_elem &&
8879 		    func_id != BPF_FUNC_map_push_elem)
8880 			goto error;
8881 		break;
8882 	case BPF_MAP_TYPE_SK_STORAGE:
8883 		if (func_id != BPF_FUNC_sk_storage_get &&
8884 		    func_id != BPF_FUNC_sk_storage_delete &&
8885 		    func_id != BPF_FUNC_kptr_xchg)
8886 			goto error;
8887 		break;
8888 	case BPF_MAP_TYPE_INODE_STORAGE:
8889 		if (func_id != BPF_FUNC_inode_storage_get &&
8890 		    func_id != BPF_FUNC_inode_storage_delete &&
8891 		    func_id != BPF_FUNC_kptr_xchg)
8892 			goto error;
8893 		break;
8894 	case BPF_MAP_TYPE_TASK_STORAGE:
8895 		if (func_id != BPF_FUNC_task_storage_get &&
8896 		    func_id != BPF_FUNC_task_storage_delete &&
8897 		    func_id != BPF_FUNC_kptr_xchg)
8898 			goto error;
8899 		break;
8900 	case BPF_MAP_TYPE_CGRP_STORAGE:
8901 		if (func_id != BPF_FUNC_cgrp_storage_get &&
8902 		    func_id != BPF_FUNC_cgrp_storage_delete &&
8903 		    func_id != BPF_FUNC_kptr_xchg)
8904 			goto error;
8905 		break;
8906 	case BPF_MAP_TYPE_BLOOM_FILTER:
8907 		if (func_id != BPF_FUNC_map_peek_elem &&
8908 		    func_id != BPF_FUNC_map_push_elem)
8909 			goto error;
8910 		break;
8911 	case BPF_MAP_TYPE_INSN_ARRAY:
8912 		goto error;
8913 	default:
8914 		break;
8915 	}
8916 
8917 	/* ... and second from the function itself. */
8918 	switch (func_id) {
8919 	case BPF_FUNC_tail_call:
8920 		if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
8921 			goto error;
8922 		if (env->subprog_cnt > 1 && !bpf_allow_tail_call_in_subprogs(env)) {
8923 			verbose(env, "mixing of tail_calls and bpf-to-bpf calls is not supported\n");
8924 			return -EINVAL;
8925 		}
8926 		break;
8927 	case BPF_FUNC_perf_event_read:
8928 	case BPF_FUNC_perf_event_output:
8929 	case BPF_FUNC_perf_event_read_value:
8930 	case BPF_FUNC_skb_output:
8931 	case BPF_FUNC_xdp_output:
8932 		if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
8933 			goto error;
8934 		break;
8935 	case BPF_FUNC_ringbuf_output:
8936 	case BPF_FUNC_ringbuf_reserve:
8937 	case BPF_FUNC_ringbuf_query:
8938 	case BPF_FUNC_ringbuf_reserve_dynptr:
8939 	case BPF_FUNC_ringbuf_submit_dynptr:
8940 	case BPF_FUNC_ringbuf_discard_dynptr:
8941 		if (map->map_type != BPF_MAP_TYPE_RINGBUF)
8942 			goto error;
8943 		break;
8944 	case BPF_FUNC_user_ringbuf_drain:
8945 		if (map->map_type != BPF_MAP_TYPE_USER_RINGBUF)
8946 			goto error;
8947 		break;
8948 	case BPF_FUNC_get_stackid:
8949 		if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
8950 			goto error;
8951 		break;
8952 	case BPF_FUNC_current_task_under_cgroup:
8953 	case BPF_FUNC_skb_under_cgroup:
8954 		if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
8955 			goto error;
8956 		break;
8957 	case BPF_FUNC_redirect_map:
8958 		if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
8959 		    map->map_type != BPF_MAP_TYPE_DEVMAP_HASH &&
8960 		    map->map_type != BPF_MAP_TYPE_CPUMAP &&
8961 		    map->map_type != BPF_MAP_TYPE_XSKMAP)
8962 			goto error;
8963 		break;
8964 	case BPF_FUNC_sk_redirect_map:
8965 	case BPF_FUNC_msg_redirect_map:
8966 	case BPF_FUNC_sock_map_update:
8967 		if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
8968 			goto error;
8969 		break;
8970 	case BPF_FUNC_sk_redirect_hash:
8971 	case BPF_FUNC_msg_redirect_hash:
8972 	case BPF_FUNC_sock_hash_update:
8973 		if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
8974 			goto error;
8975 		break;
8976 	case BPF_FUNC_get_local_storage:
8977 		if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
8978 		    map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
8979 			goto error;
8980 		break;
8981 	case BPF_FUNC_sk_select_reuseport:
8982 		if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY &&
8983 		    map->map_type != BPF_MAP_TYPE_SOCKMAP &&
8984 		    map->map_type != BPF_MAP_TYPE_SOCKHASH)
8985 			goto error;
8986 		break;
8987 	case BPF_FUNC_map_pop_elem:
8988 		if (map->map_type != BPF_MAP_TYPE_QUEUE &&
8989 		    map->map_type != BPF_MAP_TYPE_STACK)
8990 			goto error;
8991 		break;
8992 	case BPF_FUNC_map_peek_elem:
8993 	case BPF_FUNC_map_push_elem:
8994 		if (map->map_type != BPF_MAP_TYPE_QUEUE &&
8995 		    map->map_type != BPF_MAP_TYPE_STACK &&
8996 		    map->map_type != BPF_MAP_TYPE_BLOOM_FILTER)
8997 			goto error;
8998 		break;
8999 	case BPF_FUNC_map_lookup_percpu_elem:
9000 		if (map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY &&
9001 		    map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
9002 		    map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH)
9003 			goto error;
9004 		break;
9005 	case BPF_FUNC_sk_storage_get:
9006 	case BPF_FUNC_sk_storage_delete:
9007 		if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
9008 			goto error;
9009 		break;
9010 	case BPF_FUNC_inode_storage_get:
9011 	case BPF_FUNC_inode_storage_delete:
9012 		if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
9013 			goto error;
9014 		break;
9015 	case BPF_FUNC_task_storage_get:
9016 	case BPF_FUNC_task_storage_delete:
9017 		if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
9018 			goto error;
9019 		break;
9020 	case BPF_FUNC_cgrp_storage_get:
9021 	case BPF_FUNC_cgrp_storage_delete:
9022 		if (map->map_type != BPF_MAP_TYPE_CGRP_STORAGE)
9023 			goto error;
9024 		break;
9025 	default:
9026 		break;
9027 	}
9028 
9029 	return 0;
9030 error:
9031 	verbose(env, "cannot pass map_type %d into func %s#%d\n",
9032 		map->map_type, func_id_name(func_id), func_id);
9033 	return -EINVAL;
9034 }
9035 
check_raw_mode_ok(const struct bpf_func_proto * fn)9036 static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
9037 {
9038 	int count = 0;
9039 
9040 	if (arg_type_is_raw_mem(fn->arg1_type))
9041 		count++;
9042 	if (arg_type_is_raw_mem(fn->arg2_type))
9043 		count++;
9044 	if (arg_type_is_raw_mem(fn->arg3_type))
9045 		count++;
9046 	if (arg_type_is_raw_mem(fn->arg4_type))
9047 		count++;
9048 	if (arg_type_is_raw_mem(fn->arg5_type))
9049 		count++;
9050 
9051 	/* We only support one arg being in raw mode at the moment,
9052 	 * which is sufficient for the helper functions we have
9053 	 * right now.
9054 	 */
9055 	return count <= 1;
9056 }
9057 
check_args_pair_invalid(const struct bpf_func_proto * fn,int arg)9058 static bool check_args_pair_invalid(const struct bpf_func_proto *fn, int arg)
9059 {
9060 	bool is_fixed = fn->arg_type[arg] & MEM_FIXED_SIZE;
9061 	bool has_size = fn->arg_size[arg] != 0;
9062 	bool is_next_size = false;
9063 
9064 	if (arg + 1 < ARRAY_SIZE(fn->arg_type))
9065 		is_next_size = arg_type_is_mem_size(fn->arg_type[arg + 1]);
9066 
9067 	if (base_type(fn->arg_type[arg]) != ARG_PTR_TO_MEM)
9068 		return is_next_size;
9069 
9070 	return has_size == is_next_size || is_next_size == is_fixed;
9071 }
9072 
check_arg_pair_ok(const struct bpf_func_proto * fn)9073 static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
9074 {
9075 	/* bpf_xxx(..., buf, len) call will access 'len'
9076 	 * bytes from memory 'buf'. Both arg types need
9077 	 * to be paired, so make sure there's no buggy
9078 	 * helper function specification.
9079 	 */
9080 	if (arg_type_is_mem_size(fn->arg1_type) ||
9081 	    check_args_pair_invalid(fn, 0) ||
9082 	    check_args_pair_invalid(fn, 1) ||
9083 	    check_args_pair_invalid(fn, 2) ||
9084 	    check_args_pair_invalid(fn, 3) ||
9085 	    check_args_pair_invalid(fn, 4))
9086 		return false;
9087 
9088 	return true;
9089 }
9090 
check_btf_id_ok(const struct bpf_func_proto * fn)9091 static bool check_btf_id_ok(const struct bpf_func_proto *fn)
9092 {
9093 	int i;
9094 
9095 	for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
9096 		if (base_type(fn->arg_type[i]) == ARG_PTR_TO_BTF_ID)
9097 			return !!fn->arg_btf_id[i];
9098 		if (base_type(fn->arg_type[i]) == ARG_PTR_TO_SPIN_LOCK)
9099 			return fn->arg_btf_id[i] == BPF_PTR_POISON;
9100 		if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i] &&
9101 		    /* arg_btf_id and arg_size are in a union. */
9102 		    (base_type(fn->arg_type[i]) != ARG_PTR_TO_MEM ||
9103 		     !(fn->arg_type[i] & MEM_FIXED_SIZE)))
9104 			return false;
9105 	}
9106 
9107 	return true;
9108 }
9109 
check_mem_arg_rw_flag_ok(const struct bpf_func_proto * fn)9110 static bool check_mem_arg_rw_flag_ok(const struct bpf_func_proto *fn)
9111 {
9112 	int i;
9113 
9114 	for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
9115 		enum bpf_arg_type arg_type = fn->arg_type[i];
9116 
9117 		if (base_type(arg_type) != ARG_PTR_TO_MEM)
9118 			continue;
9119 		if (!(arg_type & (MEM_WRITE | MEM_RDONLY)))
9120 			return false;
9121 	}
9122 
9123 	return true;
9124 }
9125 
check_func_proto(const struct bpf_func_proto * fn)9126 static int check_func_proto(const struct bpf_func_proto *fn)
9127 {
9128 	return check_raw_mode_ok(fn) &&
9129 	       check_arg_pair_ok(fn) &&
9130 	       check_mem_arg_rw_flag_ok(fn) &&
9131 	       check_btf_id_ok(fn) ? 0 : -EINVAL;
9132 }
9133 
9134 /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
9135  * are now invalid, so turn them into unknown SCALAR_VALUE.
9136  *
9137  * This also applies to dynptr slices belonging to skb and xdp dynptrs,
9138  * since these slices point to packet data.
9139  */
clear_all_pkt_pointers(struct bpf_verifier_env * env)9140 static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
9141 {
9142 	struct bpf_func_state *state;
9143 	struct bpf_reg_state *reg;
9144 
9145 	bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
9146 		if (reg_is_pkt_pointer_any(reg) || reg_is_dynptr_slice_pkt(reg))
9147 			mark_reg_invalid(env, reg);
9148 	}));
9149 }
9150 
9151 enum {
9152 	AT_PKT_END = -1,
9153 	BEYOND_PKT_END = -2,
9154 };
9155 
mark_pkt_end(struct bpf_verifier_state * vstate,int regn,bool range_open)9156 static void mark_pkt_end(struct bpf_verifier_state *vstate, int regn, bool range_open)
9157 {
9158 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
9159 	struct bpf_reg_state *reg = &state->regs[regn];
9160 
9161 	if (reg->type != PTR_TO_PACKET)
9162 		/* PTR_TO_PACKET_META is not supported yet */
9163 		return;
9164 
9165 	/* The 'reg' is pkt > pkt_end or pkt >= pkt_end.
9166 	 * How far beyond pkt_end it goes is unknown.
9167 	 * if (!range_open) it's the case of pkt >= pkt_end
9168 	 * if (range_open) it's the case of pkt > pkt_end
9169 	 * hence this pointer is at least 1 byte bigger than pkt_end
9170 	 */
9171 	if (range_open)
9172 		reg->range = BEYOND_PKT_END;
9173 	else
9174 		reg->range = AT_PKT_END;
9175 }
9176 
release_reference_nomark(struct bpf_verifier_state * state,int ref_obj_id)9177 static int release_reference_nomark(struct bpf_verifier_state *state, int ref_obj_id)
9178 {
9179 	int i;
9180 
9181 	for (i = 0; i < state->acquired_refs; i++) {
9182 		if (state->refs[i].type != REF_TYPE_PTR)
9183 			continue;
9184 		if (state->refs[i].id == ref_obj_id) {
9185 			release_reference_state(state, i);
9186 			return 0;
9187 		}
9188 	}
9189 	return -EINVAL;
9190 }
9191 
9192 /* The pointer with the specified id has released its reference to kernel
9193  * resources. Identify all copies of the same pointer and clear the reference.
9194  *
9195  * This is the release function corresponding to acquire_reference(). Idempotent.
9196  */
release_reference(struct bpf_verifier_env * env,int ref_obj_id)9197 static int release_reference(struct bpf_verifier_env *env, int ref_obj_id)
9198 {
9199 	struct bpf_verifier_state *vstate = env->cur_state;
9200 	struct bpf_func_state *state;
9201 	struct bpf_reg_state *reg;
9202 	int err;
9203 
9204 	err = release_reference_nomark(vstate, ref_obj_id);
9205 	if (err)
9206 		return err;
9207 
9208 	bpf_for_each_reg_in_vstate(vstate, state, reg, ({
9209 		if (reg->ref_obj_id == ref_obj_id)
9210 			mark_reg_invalid(env, reg);
9211 	}));
9212 
9213 	return 0;
9214 }
9215 
invalidate_non_owning_refs(struct bpf_verifier_env * env)9216 static void invalidate_non_owning_refs(struct bpf_verifier_env *env)
9217 {
9218 	struct bpf_func_state *unused;
9219 	struct bpf_reg_state *reg;
9220 
9221 	bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({
9222 		if (type_is_non_owning_ref(reg->type))
9223 			mark_reg_invalid(env, reg);
9224 	}));
9225 }
9226 
clear_caller_saved_regs(struct bpf_verifier_env * env,struct bpf_reg_state * regs)9227 static void clear_caller_saved_regs(struct bpf_verifier_env *env,
9228 				    struct bpf_reg_state *regs)
9229 {
9230 	int i;
9231 
9232 	/* after the call registers r0 - r5 were scratched */
9233 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
9234 		bpf_mark_reg_not_init(env, &regs[caller_saved[i]]);
9235 		__check_reg_arg(env, regs, caller_saved[i], DST_OP_NO_MARK);
9236 	}
9237 }
9238 
9239 typedef int (*set_callee_state_fn)(struct bpf_verifier_env *env,
9240 				   struct bpf_func_state *caller,
9241 				   struct bpf_func_state *callee,
9242 				   int insn_idx);
9243 
9244 static int set_callee_state(struct bpf_verifier_env *env,
9245 			    struct bpf_func_state *caller,
9246 			    struct bpf_func_state *callee, int insn_idx);
9247 
setup_func_entry(struct bpf_verifier_env * env,int subprog,int callsite,set_callee_state_fn set_callee_state_cb,struct bpf_verifier_state * state)9248 static int setup_func_entry(struct bpf_verifier_env *env, int subprog, int callsite,
9249 			    set_callee_state_fn set_callee_state_cb,
9250 			    struct bpf_verifier_state *state)
9251 {
9252 	struct bpf_func_state *caller, *callee;
9253 	int err;
9254 
9255 	if (state->curframe + 1 >= MAX_CALL_FRAMES) {
9256 		verbose(env, "the call stack of %d frames is too deep\n",
9257 			state->curframe + 2);
9258 		return -E2BIG;
9259 	}
9260 
9261 	if (state->frame[state->curframe + 1]) {
9262 		verifier_bug(env, "Frame %d already allocated", state->curframe + 1);
9263 		return -EFAULT;
9264 	}
9265 
9266 	caller = state->frame[state->curframe];
9267 	callee = kzalloc_obj(*callee, GFP_KERNEL_ACCOUNT);
9268 	if (!callee)
9269 		return -ENOMEM;
9270 	state->frame[state->curframe + 1] = callee;
9271 
9272 	/* callee cannot access r0, r6 - r9 for reading and has to write
9273 	 * into its own stack before reading from it.
9274 	 * callee can read/write into caller's stack
9275 	 */
9276 	init_func_state(env, callee,
9277 			/* remember the callsite, it will be used by bpf_exit */
9278 			callsite,
9279 			state->curframe + 1 /* frameno within this callchain */,
9280 			subprog /* subprog number within this prog */);
9281 	err = set_callee_state_cb(env, caller, callee, callsite);
9282 	if (err)
9283 		goto err_out;
9284 
9285 	/* only increment it after check_reg_arg() finished */
9286 	state->curframe++;
9287 
9288 	return 0;
9289 
9290 err_out:
9291 	free_func_state(callee);
9292 	state->frame[state->curframe + 1] = NULL;
9293 	return err;
9294 }
9295 
btf_check_func_arg_match(struct bpf_verifier_env * env,int subprog,const struct btf * btf,struct bpf_reg_state * regs)9296 static int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog,
9297 				    const struct btf *btf,
9298 				    struct bpf_reg_state *regs)
9299 {
9300 	struct bpf_subprog_info *sub = subprog_info(env, subprog);
9301 	struct bpf_verifier_log *log = &env->log;
9302 	u32 i;
9303 	int ret;
9304 
9305 	ret = btf_prepare_func_args(env, subprog);
9306 	if (ret)
9307 		return ret;
9308 
9309 	/* check that BTF function arguments match actual types that the
9310 	 * verifier sees.
9311 	 */
9312 	for (i = 0; i < sub->arg_cnt; i++) {
9313 		u32 regno = i + 1;
9314 		struct bpf_reg_state *reg = &regs[regno];
9315 		struct bpf_subprog_arg_info *arg = &sub->args[i];
9316 
9317 		if (arg->arg_type == ARG_ANYTHING) {
9318 			if (reg->type != SCALAR_VALUE) {
9319 				bpf_log(log, "R%d is not a scalar\n", regno);
9320 				return -EINVAL;
9321 			}
9322 		} else if (arg->arg_type & PTR_UNTRUSTED) {
9323 			/*
9324 			 * Anything is allowed for untrusted arguments, as these are
9325 			 * read-only and probe read instructions would protect against
9326 			 * invalid memory access.
9327 			 */
9328 		} else if (arg->arg_type == ARG_PTR_TO_CTX) {
9329 			ret = check_func_arg_reg_off(env, reg, regno, ARG_PTR_TO_CTX);
9330 			if (ret < 0)
9331 				return ret;
9332 			/* If function expects ctx type in BTF check that caller
9333 			 * is passing PTR_TO_CTX.
9334 			 */
9335 			if (reg->type != PTR_TO_CTX) {
9336 				bpf_log(log, "arg#%d expects pointer to ctx\n", i);
9337 				return -EINVAL;
9338 			}
9339 		} else if (base_type(arg->arg_type) == ARG_PTR_TO_MEM) {
9340 			ret = check_func_arg_reg_off(env, reg, regno, ARG_DONTCARE);
9341 			if (ret < 0)
9342 				return ret;
9343 			if (check_mem_reg(env, reg, regno, arg->mem_size))
9344 				return -EINVAL;
9345 			if (!(arg->arg_type & PTR_MAYBE_NULL) && (reg->type & PTR_MAYBE_NULL)) {
9346 				bpf_log(log, "arg#%d is expected to be non-NULL\n", i);
9347 				return -EINVAL;
9348 			}
9349 		} else if (base_type(arg->arg_type) == ARG_PTR_TO_ARENA) {
9350 			/*
9351 			 * Can pass any value and the kernel won't crash, but
9352 			 * only PTR_TO_ARENA or SCALAR make sense. Everything
9353 			 * else is a bug in the bpf program. Point it out to
9354 			 * the user at the verification time instead of
9355 			 * run-time debug nightmare.
9356 			 */
9357 			if (reg->type != PTR_TO_ARENA && reg->type != SCALAR_VALUE) {
9358 				bpf_log(log, "R%d is not a pointer to arena or scalar.\n", regno);
9359 				return -EINVAL;
9360 			}
9361 		} else if (arg->arg_type == (ARG_PTR_TO_DYNPTR | MEM_RDONLY)) {
9362 			ret = check_func_arg_reg_off(env, reg, regno, ARG_PTR_TO_DYNPTR);
9363 			if (ret)
9364 				return ret;
9365 
9366 			ret = process_dynptr_func(env, regno, -1, arg->arg_type, 0);
9367 			if (ret)
9368 				return ret;
9369 		} else if (base_type(arg->arg_type) == ARG_PTR_TO_BTF_ID) {
9370 			struct bpf_call_arg_meta meta;
9371 			int err;
9372 
9373 			if (bpf_register_is_null(reg) && type_may_be_null(arg->arg_type))
9374 				continue;
9375 
9376 			memset(&meta, 0, sizeof(meta)); /* leave func_id as zero */
9377 			err = check_reg_type(env, regno, arg->arg_type, &arg->btf_id, &meta);
9378 			err = err ?: check_func_arg_reg_off(env, reg, regno, arg->arg_type);
9379 			if (err)
9380 				return err;
9381 		} else {
9382 			verifier_bug(env, "unrecognized arg#%d type %d", i, arg->arg_type);
9383 			return -EFAULT;
9384 		}
9385 	}
9386 
9387 	return 0;
9388 }
9389 
9390 /* Compare BTF of a function call with given bpf_reg_state.
9391  * Returns:
9392  * EFAULT - there is a verifier bug. Abort verification.
9393  * EINVAL - there is a type mismatch or BTF is not available.
9394  * 0 - BTF matches with what bpf_reg_state expects.
9395  * Only PTR_TO_CTX and SCALAR_VALUE states are recognized.
9396  */
btf_check_subprog_call(struct bpf_verifier_env * env,int subprog,struct bpf_reg_state * regs)9397 static int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog,
9398 				  struct bpf_reg_state *regs)
9399 {
9400 	struct bpf_prog *prog = env->prog;
9401 	struct btf *btf = prog->aux->btf;
9402 	u32 btf_id;
9403 	int err;
9404 
9405 	if (!prog->aux->func_info)
9406 		return -EINVAL;
9407 
9408 	btf_id = prog->aux->func_info[subprog].type_id;
9409 	if (!btf_id)
9410 		return -EFAULT;
9411 
9412 	if (prog->aux->func_info_aux[subprog].unreliable)
9413 		return -EINVAL;
9414 
9415 	err = btf_check_func_arg_match(env, subprog, btf, regs);
9416 	/* Compiler optimizations can remove arguments from static functions
9417 	 * or mismatched type can be passed into a global function.
9418 	 * In such cases mark the function as unreliable from BTF point of view.
9419 	 */
9420 	if (err)
9421 		prog->aux->func_info_aux[subprog].unreliable = true;
9422 	return err;
9423 }
9424 
push_callback_call(struct bpf_verifier_env * env,struct bpf_insn * insn,int insn_idx,int subprog,set_callee_state_fn set_callee_state_cb)9425 static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
9426 			      int insn_idx, int subprog,
9427 			      set_callee_state_fn set_callee_state_cb)
9428 {
9429 	struct bpf_verifier_state *state = env->cur_state, *callback_state;
9430 	struct bpf_func_state *caller, *callee;
9431 	int err;
9432 
9433 	caller = state->frame[state->curframe];
9434 	err = btf_check_subprog_call(env, subprog, caller->regs);
9435 	if (err == -EFAULT)
9436 		return err;
9437 
9438 	/* set_callee_state is used for direct subprog calls, but we are
9439 	 * interested in validating only BPF helpers that can call subprogs as
9440 	 * callbacks
9441 	 */
9442 	env->subprog_info[subprog].is_cb = true;
9443 	if (bpf_pseudo_kfunc_call(insn) &&
9444 	    !is_callback_calling_kfunc(insn->imm)) {
9445 		verifier_bug(env, "kfunc %s#%d not marked as callback-calling",
9446 			     func_id_name(insn->imm), insn->imm);
9447 		return -EFAULT;
9448 	} else if (!bpf_pseudo_kfunc_call(insn) &&
9449 		   !is_callback_calling_function(insn->imm)) { /* helper */
9450 		verifier_bug(env, "helper %s#%d not marked as callback-calling",
9451 			     func_id_name(insn->imm), insn->imm);
9452 		return -EFAULT;
9453 	}
9454 
9455 	if (bpf_is_async_callback_calling_insn(insn)) {
9456 		struct bpf_verifier_state *async_cb;
9457 
9458 		/* there is no real recursion here. timer and workqueue callbacks are async */
9459 		env->subprog_info[subprog].is_async_cb = true;
9460 		async_cb = push_async_cb(env, env->subprog_info[subprog].start,
9461 					 insn_idx, subprog,
9462 					 is_async_cb_sleepable(env, insn));
9463 		if (IS_ERR(async_cb))
9464 			return PTR_ERR(async_cb);
9465 		callee = async_cb->frame[0];
9466 		callee->async_entry_cnt = caller->async_entry_cnt + 1;
9467 
9468 		/* Convert bpf_timer_set_callback() args into timer callback args */
9469 		err = set_callee_state_cb(env, caller, callee, insn_idx);
9470 		if (err)
9471 			return err;
9472 
9473 		return 0;
9474 	}
9475 
9476 	/* for callback functions enqueue entry to callback and
9477 	 * proceed with next instruction within current frame.
9478 	 */
9479 	callback_state = push_stack(env, env->subprog_info[subprog].start, insn_idx, false);
9480 	if (IS_ERR(callback_state))
9481 		return PTR_ERR(callback_state);
9482 
9483 	err = setup_func_entry(env, subprog, insn_idx, set_callee_state_cb,
9484 			       callback_state);
9485 	if (err)
9486 		return err;
9487 
9488 	callback_state->callback_unroll_depth++;
9489 	callback_state->frame[callback_state->curframe - 1]->callback_depth++;
9490 	caller->callback_depth = 0;
9491 	return 0;
9492 }
9493 
check_func_call(struct bpf_verifier_env * env,struct bpf_insn * insn,int * insn_idx)9494 static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
9495 			   int *insn_idx)
9496 {
9497 	struct bpf_verifier_state *state = env->cur_state;
9498 	struct bpf_func_state *caller;
9499 	int err, subprog, target_insn;
9500 
9501 	target_insn = *insn_idx + insn->imm + 1;
9502 	subprog = bpf_find_subprog(env, target_insn);
9503 	if (verifier_bug_if(subprog < 0, env, "target of func call at insn %d is not a program",
9504 			    target_insn))
9505 		return -EFAULT;
9506 
9507 	caller = state->frame[state->curframe];
9508 	err = btf_check_subprog_call(env, subprog, caller->regs);
9509 	if (err == -EFAULT)
9510 		return err;
9511 	if (bpf_subprog_is_global(env, subprog)) {
9512 		const char *sub_name = subprog_name(env, subprog);
9513 
9514 		if (env->cur_state->active_locks) {
9515 			verbose(env, "global function calls are not allowed while holding a lock,\n"
9516 				     "use static function instead\n");
9517 			return -EINVAL;
9518 		}
9519 
9520 		if (env->subprog_info[subprog].might_sleep && !in_sleepable_context(env)) {
9521 			verbose(env, "sleepable global function %s() called in %s\n",
9522 				sub_name, non_sleepable_context_description(env));
9523 			return -EINVAL;
9524 		}
9525 
9526 		if (err) {
9527 			verbose(env, "Caller passes invalid args into func#%d ('%s')\n",
9528 				subprog, sub_name);
9529 			return err;
9530 		}
9531 
9532 		if (env->log.level & BPF_LOG_LEVEL)
9533 			verbose(env, "Func#%d ('%s') is global and assumed valid.\n",
9534 				subprog, sub_name);
9535 		if (env->subprog_info[subprog].changes_pkt_data)
9536 			clear_all_pkt_pointers(env);
9537 		/* mark global subprog for verifying after main prog */
9538 		subprog_aux(env, subprog)->called = true;
9539 		clear_caller_saved_regs(env, caller->regs);
9540 
9541 		/* All non-void global functions return a 64-bit SCALAR_VALUE. */
9542 		if (!subprog_returns_void(env, subprog)) {
9543 			mark_reg_unknown(env, caller->regs, BPF_REG_0);
9544 			caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
9545 		}
9546 
9547 		/* continue with next insn after call */
9548 		return 0;
9549 	}
9550 
9551 	/* for regular function entry setup new frame and continue
9552 	 * from that frame.
9553 	 */
9554 	err = setup_func_entry(env, subprog, *insn_idx, set_callee_state, state);
9555 	if (err)
9556 		return err;
9557 
9558 	clear_caller_saved_regs(env, caller->regs);
9559 
9560 	/* and go analyze first insn of the callee */
9561 	*insn_idx = env->subprog_info[subprog].start - 1;
9562 
9563 	if (env->log.level & BPF_LOG_LEVEL) {
9564 		verbose(env, "caller:\n");
9565 		print_verifier_state(env, state, caller->frameno, true);
9566 		verbose(env, "callee:\n");
9567 		print_verifier_state(env, state, state->curframe, true);
9568 	}
9569 
9570 	return 0;
9571 }
9572 
map_set_for_each_callback_args(struct bpf_verifier_env * env,struct bpf_func_state * caller,struct bpf_func_state * callee)9573 int map_set_for_each_callback_args(struct bpf_verifier_env *env,
9574 				   struct bpf_func_state *caller,
9575 				   struct bpf_func_state *callee)
9576 {
9577 	/* bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn,
9578 	 *      void *callback_ctx, u64 flags);
9579 	 * callback_fn(struct bpf_map *map, void *key, void *value,
9580 	 *      void *callback_ctx);
9581 	 */
9582 	callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
9583 
9584 	callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
9585 	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
9586 	callee->regs[BPF_REG_2].map_ptr = caller->regs[BPF_REG_1].map_ptr;
9587 
9588 	callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
9589 	__mark_reg_known_zero(&callee->regs[BPF_REG_3]);
9590 	callee->regs[BPF_REG_3].map_ptr = caller->regs[BPF_REG_1].map_ptr;
9591 
9592 	/* pointer to stack or null */
9593 	callee->regs[BPF_REG_4] = caller->regs[BPF_REG_3];
9594 
9595 	/* unused */
9596 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9597 	return 0;
9598 }
9599 
set_callee_state(struct bpf_verifier_env * env,struct bpf_func_state * caller,struct bpf_func_state * callee,int insn_idx)9600 static int set_callee_state(struct bpf_verifier_env *env,
9601 			    struct bpf_func_state *caller,
9602 			    struct bpf_func_state *callee, int insn_idx)
9603 {
9604 	int i;
9605 
9606 	/* copy r1 - r5 args that callee can access.  The copy includes parent
9607 	 * pointers, which connects us up to the liveness chain
9608 	 */
9609 	for (i = BPF_REG_1; i <= BPF_REG_5; i++)
9610 		callee->regs[i] = caller->regs[i];
9611 	return 0;
9612 }
9613 
set_map_elem_callback_state(struct bpf_verifier_env * env,struct bpf_func_state * caller,struct bpf_func_state * callee,int insn_idx)9614 static int set_map_elem_callback_state(struct bpf_verifier_env *env,
9615 				       struct bpf_func_state *caller,
9616 				       struct bpf_func_state *callee,
9617 				       int insn_idx)
9618 {
9619 	struct bpf_insn_aux_data *insn_aux = &env->insn_aux_data[insn_idx];
9620 	struct bpf_map *map;
9621 	int err;
9622 
9623 	/* valid map_ptr and poison value does not matter */
9624 	map = insn_aux->map_ptr_state.map_ptr;
9625 	if (!map->ops->map_set_for_each_callback_args ||
9626 	    !map->ops->map_for_each_callback) {
9627 		verbose(env, "callback function not allowed for map\n");
9628 		return -ENOTSUPP;
9629 	}
9630 
9631 	err = map->ops->map_set_for_each_callback_args(env, caller, callee);
9632 	if (err)
9633 		return err;
9634 
9635 	callee->in_callback_fn = true;
9636 	callee->callback_ret_range = retval_range(0, 1);
9637 	return 0;
9638 }
9639 
set_loop_callback_state(struct bpf_verifier_env * env,struct bpf_func_state * caller,struct bpf_func_state * callee,int insn_idx)9640 static int set_loop_callback_state(struct bpf_verifier_env *env,
9641 				   struct bpf_func_state *caller,
9642 				   struct bpf_func_state *callee,
9643 				   int insn_idx)
9644 {
9645 	/* bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx,
9646 	 *	    u64 flags);
9647 	 * callback_fn(u64 index, void *callback_ctx);
9648 	 */
9649 	callee->regs[BPF_REG_1].type = SCALAR_VALUE;
9650 	callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
9651 
9652 	/* unused */
9653 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
9654 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9655 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9656 
9657 	callee->in_callback_fn = true;
9658 	callee->callback_ret_range = retval_range(0, 1);
9659 	return 0;
9660 }
9661 
set_timer_callback_state(struct bpf_verifier_env * env,struct bpf_func_state * caller,struct bpf_func_state * callee,int insn_idx)9662 static int set_timer_callback_state(struct bpf_verifier_env *env,
9663 				    struct bpf_func_state *caller,
9664 				    struct bpf_func_state *callee,
9665 				    int insn_idx)
9666 {
9667 	struct bpf_map *map_ptr = caller->regs[BPF_REG_1].map_ptr;
9668 
9669 	/* bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn);
9670 	 * callback_fn(struct bpf_map *map, void *key, void *value);
9671 	 */
9672 	callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP;
9673 	__mark_reg_known_zero(&callee->regs[BPF_REG_1]);
9674 	callee->regs[BPF_REG_1].map_ptr = map_ptr;
9675 
9676 	callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
9677 	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
9678 	callee->regs[BPF_REG_2].map_ptr = map_ptr;
9679 
9680 	callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
9681 	__mark_reg_known_zero(&callee->regs[BPF_REG_3]);
9682 	callee->regs[BPF_REG_3].map_ptr = map_ptr;
9683 
9684 	/* unused */
9685 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9686 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9687 	callee->in_async_callback_fn = true;
9688 	callee->callback_ret_range = retval_range(0, 0);
9689 	return 0;
9690 }
9691 
set_find_vma_callback_state(struct bpf_verifier_env * env,struct bpf_func_state * caller,struct bpf_func_state * callee,int insn_idx)9692 static int set_find_vma_callback_state(struct bpf_verifier_env *env,
9693 				       struct bpf_func_state *caller,
9694 				       struct bpf_func_state *callee,
9695 				       int insn_idx)
9696 {
9697 	/* bpf_find_vma(struct task_struct *task, u64 addr,
9698 	 *               void *callback_fn, void *callback_ctx, u64 flags)
9699 	 * (callback_fn)(struct task_struct *task,
9700 	 *               struct vm_area_struct *vma, void *callback_ctx);
9701 	 */
9702 	callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
9703 
9704 	callee->regs[BPF_REG_2].type = PTR_TO_BTF_ID;
9705 	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
9706 	callee->regs[BPF_REG_2].btf =  btf_vmlinux;
9707 	callee->regs[BPF_REG_2].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_VMA];
9708 
9709 	/* pointer to stack or null */
9710 	callee->regs[BPF_REG_3] = caller->regs[BPF_REG_4];
9711 
9712 	/* unused */
9713 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9714 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9715 	callee->in_callback_fn = true;
9716 	callee->callback_ret_range = retval_range(0, 1);
9717 	return 0;
9718 }
9719 
set_user_ringbuf_callback_state(struct bpf_verifier_env * env,struct bpf_func_state * caller,struct bpf_func_state * callee,int insn_idx)9720 static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env,
9721 					   struct bpf_func_state *caller,
9722 					   struct bpf_func_state *callee,
9723 					   int insn_idx)
9724 {
9725 	/* bpf_user_ringbuf_drain(struct bpf_map *map, void *callback_fn, void
9726 	 *			  callback_ctx, u64 flags);
9727 	 * callback_fn(const struct bpf_dynptr_t* dynptr, void *callback_ctx);
9728 	 */
9729 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_0]);
9730 	mark_dynptr_cb_reg(env, &callee->regs[BPF_REG_1], BPF_DYNPTR_TYPE_LOCAL);
9731 	callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
9732 
9733 	/* unused */
9734 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
9735 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9736 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9737 
9738 	callee->in_callback_fn = true;
9739 	callee->callback_ret_range = retval_range(0, 1);
9740 	return 0;
9741 }
9742 
set_rbtree_add_callback_state(struct bpf_verifier_env * env,struct bpf_func_state * caller,struct bpf_func_state * callee,int insn_idx)9743 static int set_rbtree_add_callback_state(struct bpf_verifier_env *env,
9744 					 struct bpf_func_state *caller,
9745 					 struct bpf_func_state *callee,
9746 					 int insn_idx)
9747 {
9748 	/* void bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *node,
9749 	 *                     bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b));
9750 	 *
9751 	 * 'struct bpf_rb_node *node' arg to bpf_rbtree_add_impl is the same PTR_TO_BTF_ID w/ offset
9752 	 * that 'less' callback args will be receiving. However, 'node' arg was release_reference'd
9753 	 * by this point, so look at 'root'
9754 	 */
9755 	struct btf_field *field;
9756 
9757 	field = reg_find_field_offset(&caller->regs[BPF_REG_1],
9758 				      caller->regs[BPF_REG_1].var_off.value,
9759 				      BPF_RB_ROOT);
9760 	if (!field || !field->graph_root.value_btf_id)
9761 		return -EFAULT;
9762 
9763 	mark_reg_graph_node(callee->regs, BPF_REG_1, &field->graph_root);
9764 	ref_set_non_owning(env, &callee->regs[BPF_REG_1]);
9765 	mark_reg_graph_node(callee->regs, BPF_REG_2, &field->graph_root);
9766 	ref_set_non_owning(env, &callee->regs[BPF_REG_2]);
9767 
9768 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
9769 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9770 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9771 	callee->in_callback_fn = true;
9772 	callee->callback_ret_range = retval_range(0, 1);
9773 	return 0;
9774 }
9775 
set_task_work_schedule_callback_state(struct bpf_verifier_env * env,struct bpf_func_state * caller,struct bpf_func_state * callee,int insn_idx)9776 static int set_task_work_schedule_callback_state(struct bpf_verifier_env *env,
9777 						 struct bpf_func_state *caller,
9778 						 struct bpf_func_state *callee,
9779 						 int insn_idx)
9780 {
9781 	struct bpf_map *map_ptr = caller->regs[BPF_REG_3].map_ptr;
9782 
9783 	/*
9784 	 * callback_fn(struct bpf_map *map, void *key, void *value);
9785 	 */
9786 	callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP;
9787 	__mark_reg_known_zero(&callee->regs[BPF_REG_1]);
9788 	callee->regs[BPF_REG_1].map_ptr = map_ptr;
9789 
9790 	callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
9791 	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
9792 	callee->regs[BPF_REG_2].map_ptr = map_ptr;
9793 
9794 	callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
9795 	__mark_reg_known_zero(&callee->regs[BPF_REG_3]);
9796 	callee->regs[BPF_REG_3].map_ptr = map_ptr;
9797 
9798 	/* unused */
9799 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9800 	bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9801 	callee->in_async_callback_fn = true;
9802 	callee->callback_ret_range = retval_range(S32_MIN, S32_MAX);
9803 	return 0;
9804 }
9805 
9806 static bool is_rbtree_lock_required_kfunc(u32 btf_id);
9807 
9808 /* Are we currently verifying the callback for a rbtree helper that must
9809  * be called with lock held? If so, no need to complain about unreleased
9810  * lock
9811  */
in_rbtree_lock_required_cb(struct bpf_verifier_env * env)9812 static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env)
9813 {
9814 	struct bpf_verifier_state *state = env->cur_state;
9815 	struct bpf_insn *insn = env->prog->insnsi;
9816 	struct bpf_func_state *callee;
9817 	int kfunc_btf_id;
9818 
9819 	if (!state->curframe)
9820 		return false;
9821 
9822 	callee = state->frame[state->curframe];
9823 
9824 	if (!callee->in_callback_fn)
9825 		return false;
9826 
9827 	kfunc_btf_id = insn[callee->callsite].imm;
9828 	return is_rbtree_lock_required_kfunc(kfunc_btf_id);
9829 }
9830 
retval_range_within(struct bpf_retval_range range,const struct bpf_reg_state * reg)9831 static bool retval_range_within(struct bpf_retval_range range, const struct bpf_reg_state *reg)
9832 {
9833 	if (range.return_32bit)
9834 		return range.minval <= reg->s32_min_value && reg->s32_max_value <= range.maxval;
9835 	else
9836 		return range.minval <= reg->smin_value && reg->smax_value <= range.maxval;
9837 }
9838 
prepare_func_exit(struct bpf_verifier_env * env,int * insn_idx)9839 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
9840 {
9841 	struct bpf_verifier_state *state = env->cur_state, *prev_st;
9842 	struct bpf_func_state *caller, *callee;
9843 	struct bpf_reg_state *r0;
9844 	bool in_callback_fn;
9845 	int err;
9846 
9847 	callee = state->frame[state->curframe];
9848 	r0 = &callee->regs[BPF_REG_0];
9849 	if (r0->type == PTR_TO_STACK) {
9850 		/* technically it's ok to return caller's stack pointer
9851 		 * (or caller's caller's pointer) back to the caller,
9852 		 * since these pointers are valid. Only current stack
9853 		 * pointer will be invalid as soon as function exits,
9854 		 * but let's be conservative
9855 		 */
9856 		verbose(env, "cannot return stack pointer to the caller\n");
9857 		return -EINVAL;
9858 	}
9859 
9860 	caller = state->frame[state->curframe - 1];
9861 	if (callee->in_callback_fn) {
9862 		if (r0->type != SCALAR_VALUE) {
9863 			verbose(env, "R0 not a scalar value\n");
9864 			return -EACCES;
9865 		}
9866 
9867 		/* we are going to rely on register's precise value */
9868 		err = mark_chain_precision(env, BPF_REG_0);
9869 		if (err)
9870 			return err;
9871 
9872 		/* enforce R0 return value range, and bpf_callback_t returns 64bit */
9873 		if (!retval_range_within(callee->callback_ret_range, r0)) {
9874 			verbose_invalid_scalar(env, r0, callee->callback_ret_range,
9875 					       "At callback return", "R0");
9876 			return -EINVAL;
9877 		}
9878 		if (!bpf_calls_callback(env, callee->callsite)) {
9879 			verifier_bug(env, "in callback at %d, callsite %d !calls_callback",
9880 				     *insn_idx, callee->callsite);
9881 			return -EFAULT;
9882 		}
9883 	} else {
9884 		/* return to the caller whatever r0 had in the callee */
9885 		caller->regs[BPF_REG_0] = *r0;
9886 	}
9887 
9888 	/* for callbacks like bpf_loop or bpf_for_each_map_elem go back to callsite,
9889 	 * there function call logic would reschedule callback visit. If iteration
9890 	 * converges is_state_visited() would prune that visit eventually.
9891 	 */
9892 	in_callback_fn = callee->in_callback_fn;
9893 	if (in_callback_fn)
9894 		*insn_idx = callee->callsite;
9895 	else
9896 		*insn_idx = callee->callsite + 1;
9897 
9898 	if (env->log.level & BPF_LOG_LEVEL) {
9899 		verbose(env, "returning from callee:\n");
9900 		print_verifier_state(env, state, callee->frameno, true);
9901 		verbose(env, "to caller at %d:\n", *insn_idx);
9902 		print_verifier_state(env, state, caller->frameno, true);
9903 	}
9904 	/* clear everything in the callee. In case of exceptional exits using
9905 	 * bpf_throw, this will be done by copy_verifier_state for extra frames. */
9906 	free_func_state(callee);
9907 	state->frame[state->curframe--] = NULL;
9908 
9909 	/* for callbacks widen imprecise scalars to make programs like below verify:
9910 	 *
9911 	 *   struct ctx { int i; }
9912 	 *   void cb(int idx, struct ctx *ctx) { ctx->i++; ... }
9913 	 *   ...
9914 	 *   struct ctx = { .i = 0; }
9915 	 *   bpf_loop(100, cb, &ctx, 0);
9916 	 *
9917 	 * This is similar to what is done in process_iter_next_call() for open
9918 	 * coded iterators.
9919 	 */
9920 	prev_st = in_callback_fn ? find_prev_entry(env, state, *insn_idx) : NULL;
9921 	if (prev_st) {
9922 		err = widen_imprecise_scalars(env, prev_st, state);
9923 		if (err)
9924 			return err;
9925 	}
9926 	return 0;
9927 }
9928 
do_refine_retval_range(struct bpf_verifier_env * env,struct bpf_reg_state * regs,int ret_type,int func_id,struct bpf_call_arg_meta * meta)9929 static int do_refine_retval_range(struct bpf_verifier_env *env,
9930 				  struct bpf_reg_state *regs, int ret_type,
9931 				  int func_id,
9932 				  struct bpf_call_arg_meta *meta)
9933 {
9934 	struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
9935 
9936 	if (ret_type != RET_INTEGER)
9937 		return 0;
9938 
9939 	switch (func_id) {
9940 	case BPF_FUNC_get_stack:
9941 	case BPF_FUNC_get_task_stack:
9942 	case BPF_FUNC_probe_read_str:
9943 	case BPF_FUNC_probe_read_kernel_str:
9944 	case BPF_FUNC_probe_read_user_str:
9945 		ret_reg->smax_value = meta->msize_max_value;
9946 		ret_reg->s32_max_value = meta->msize_max_value;
9947 		ret_reg->smin_value = -MAX_ERRNO;
9948 		ret_reg->s32_min_value = -MAX_ERRNO;
9949 		reg_bounds_sync(ret_reg);
9950 		break;
9951 	case BPF_FUNC_get_smp_processor_id:
9952 		ret_reg->umax_value = nr_cpu_ids - 1;
9953 		ret_reg->u32_max_value = nr_cpu_ids - 1;
9954 		ret_reg->smax_value = nr_cpu_ids - 1;
9955 		ret_reg->s32_max_value = nr_cpu_ids - 1;
9956 		ret_reg->umin_value = 0;
9957 		ret_reg->u32_min_value = 0;
9958 		ret_reg->smin_value = 0;
9959 		ret_reg->s32_min_value = 0;
9960 		reg_bounds_sync(ret_reg);
9961 		break;
9962 	}
9963 
9964 	return reg_bounds_sanity_check(env, ret_reg, "retval");
9965 }
9966 
9967 static int
record_func_map(struct bpf_verifier_env * env,struct bpf_call_arg_meta * meta,int func_id,int insn_idx)9968 record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
9969 		int func_id, int insn_idx)
9970 {
9971 	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
9972 	struct bpf_map *map = meta->map.ptr;
9973 
9974 	if (func_id != BPF_FUNC_tail_call &&
9975 	    func_id != BPF_FUNC_map_lookup_elem &&
9976 	    func_id != BPF_FUNC_map_update_elem &&
9977 	    func_id != BPF_FUNC_map_delete_elem &&
9978 	    func_id != BPF_FUNC_map_push_elem &&
9979 	    func_id != BPF_FUNC_map_pop_elem &&
9980 	    func_id != BPF_FUNC_map_peek_elem &&
9981 	    func_id != BPF_FUNC_for_each_map_elem &&
9982 	    func_id != BPF_FUNC_redirect_map &&
9983 	    func_id != BPF_FUNC_map_lookup_percpu_elem)
9984 		return 0;
9985 
9986 	if (map == NULL) {
9987 		verifier_bug(env, "expected map for helper call");
9988 		return -EFAULT;
9989 	}
9990 
9991 	/* In case of read-only, some additional restrictions
9992 	 * need to be applied in order to prevent altering the
9993 	 * state of the map from program side.
9994 	 */
9995 	if ((map->map_flags & BPF_F_RDONLY_PROG) &&
9996 	    (func_id == BPF_FUNC_map_delete_elem ||
9997 	     func_id == BPF_FUNC_map_update_elem ||
9998 	     func_id == BPF_FUNC_map_push_elem ||
9999 	     func_id == BPF_FUNC_map_pop_elem)) {
10000 		verbose(env, "write into map forbidden\n");
10001 		return -EACCES;
10002 	}
10003 
10004 	if (!aux->map_ptr_state.map_ptr)
10005 		bpf_map_ptr_store(aux, meta->map.ptr,
10006 				  !meta->map.ptr->bypass_spec_v1, false);
10007 	else if (aux->map_ptr_state.map_ptr != meta->map.ptr)
10008 		bpf_map_ptr_store(aux, meta->map.ptr,
10009 				  !meta->map.ptr->bypass_spec_v1, true);
10010 	return 0;
10011 }
10012 
10013 static int
record_func_key(struct bpf_verifier_env * env,struct bpf_call_arg_meta * meta,int func_id,int insn_idx)10014 record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
10015 		int func_id, int insn_idx)
10016 {
10017 	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
10018 	struct bpf_reg_state *reg;
10019 	struct bpf_map *map = meta->map.ptr;
10020 	u64 val, max;
10021 	int err;
10022 
10023 	if (func_id != BPF_FUNC_tail_call)
10024 		return 0;
10025 	if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) {
10026 		verbose(env, "expected prog array map for tail call");
10027 		return -EINVAL;
10028 	}
10029 
10030 	reg = reg_state(env, BPF_REG_3);
10031 	val = reg->var_off.value;
10032 	max = map->max_entries;
10033 
10034 	if (!(is_reg_const(reg, false) && val < max)) {
10035 		bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
10036 		return 0;
10037 	}
10038 
10039 	err = mark_chain_precision(env, BPF_REG_3);
10040 	if (err)
10041 		return err;
10042 	if (bpf_map_key_unseen(aux))
10043 		bpf_map_key_store(aux, val);
10044 	else if (!bpf_map_key_poisoned(aux) &&
10045 		  bpf_map_key_immediate(aux) != val)
10046 		bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
10047 	return 0;
10048 }
10049 
check_reference_leak(struct bpf_verifier_env * env,bool exception_exit)10050 static int check_reference_leak(struct bpf_verifier_env *env, bool exception_exit)
10051 {
10052 	struct bpf_verifier_state *state = env->cur_state;
10053 	enum bpf_prog_type type = resolve_prog_type(env->prog);
10054 	struct bpf_reg_state *reg = reg_state(env, BPF_REG_0);
10055 	bool refs_lingering = false;
10056 	int i;
10057 
10058 	if (!exception_exit && cur_func(env)->frameno)
10059 		return 0;
10060 
10061 	for (i = 0; i < state->acquired_refs; i++) {
10062 		if (state->refs[i].type != REF_TYPE_PTR)
10063 			continue;
10064 		/* Allow struct_ops programs to return a referenced kptr back to
10065 		 * kernel. Type checks are performed later in check_return_code.
10066 		 */
10067 		if (type == BPF_PROG_TYPE_STRUCT_OPS && !exception_exit &&
10068 		    reg->ref_obj_id == state->refs[i].id)
10069 			continue;
10070 		verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
10071 			state->refs[i].id, state->refs[i].insn_idx);
10072 		refs_lingering = true;
10073 	}
10074 	return refs_lingering ? -EINVAL : 0;
10075 }
10076 
check_resource_leak(struct bpf_verifier_env * env,bool exception_exit,bool check_lock,const char * prefix)10077 static int check_resource_leak(struct bpf_verifier_env *env, bool exception_exit, bool check_lock, const char *prefix)
10078 {
10079 	int err;
10080 
10081 	if (check_lock && env->cur_state->active_locks) {
10082 		verbose(env, "%s cannot be used inside bpf_spin_lock-ed region\n", prefix);
10083 		return -EINVAL;
10084 	}
10085 
10086 	err = check_reference_leak(env, exception_exit);
10087 	if (err) {
10088 		verbose(env, "%s would lead to reference leak\n", prefix);
10089 		return err;
10090 	}
10091 
10092 	if (check_lock && env->cur_state->active_irq_id) {
10093 		verbose(env, "%s cannot be used inside bpf_local_irq_save-ed region\n", prefix);
10094 		return -EINVAL;
10095 	}
10096 
10097 	if (check_lock && env->cur_state->active_rcu_locks) {
10098 		verbose(env, "%s cannot be used inside bpf_rcu_read_lock-ed region\n", prefix);
10099 		return -EINVAL;
10100 	}
10101 
10102 	if (check_lock && env->cur_state->active_preempt_locks) {
10103 		verbose(env, "%s cannot be used inside bpf_preempt_disable-ed region\n", prefix);
10104 		return -EINVAL;
10105 	}
10106 
10107 	return 0;
10108 }
10109 
check_bpf_snprintf_call(struct bpf_verifier_env * env,struct bpf_reg_state * regs)10110 static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
10111 				   struct bpf_reg_state *regs)
10112 {
10113 	struct bpf_reg_state *fmt_reg = &regs[BPF_REG_3];
10114 	struct bpf_reg_state *data_len_reg = &regs[BPF_REG_5];
10115 	struct bpf_map *fmt_map = fmt_reg->map_ptr;
10116 	struct bpf_bprintf_data data = {};
10117 	int err, fmt_map_off, num_args;
10118 	u64 fmt_addr;
10119 	char *fmt;
10120 
10121 	/* data must be an array of u64 */
10122 	if (data_len_reg->var_off.value % 8)
10123 		return -EINVAL;
10124 	num_args = data_len_reg->var_off.value / 8;
10125 
10126 	/* fmt being ARG_PTR_TO_CONST_STR guarantees that var_off is const
10127 	 * and map_direct_value_addr is set.
10128 	 */
10129 	fmt_map_off = fmt_reg->var_off.value;
10130 	err = fmt_map->ops->map_direct_value_addr(fmt_map, &fmt_addr,
10131 						  fmt_map_off);
10132 	if (err) {
10133 		verbose(env, "failed to retrieve map value address\n");
10134 		return -EFAULT;
10135 	}
10136 	fmt = (char *)(long)fmt_addr + fmt_map_off;
10137 
10138 	/* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we
10139 	 * can focus on validating the format specifiers.
10140 	 */
10141 	err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, num_args, &data);
10142 	if (err < 0)
10143 		verbose(env, "Invalid format string\n");
10144 
10145 	return err;
10146 }
10147 
check_get_func_ip(struct bpf_verifier_env * env)10148 static int check_get_func_ip(struct bpf_verifier_env *env)
10149 {
10150 	enum bpf_prog_type type = resolve_prog_type(env->prog);
10151 	int func_id = BPF_FUNC_get_func_ip;
10152 
10153 	if (type == BPF_PROG_TYPE_TRACING) {
10154 		if (!bpf_prog_has_trampoline(env->prog)) {
10155 			verbose(env, "func %s#%d supported only for fentry/fexit/fsession/fmod_ret programs\n",
10156 				func_id_name(func_id), func_id);
10157 			return -ENOTSUPP;
10158 		}
10159 		return 0;
10160 	} else if (type == BPF_PROG_TYPE_KPROBE) {
10161 		return 0;
10162 	}
10163 
10164 	verbose(env, "func %s#%d not supported for program type %d\n",
10165 		func_id_name(func_id), func_id, type);
10166 	return -ENOTSUPP;
10167 }
10168 
cur_aux(const struct bpf_verifier_env * env)10169 static struct bpf_insn_aux_data *cur_aux(const struct bpf_verifier_env *env)
10170 {
10171 	return &env->insn_aux_data[env->insn_idx];
10172 }
10173 
loop_flag_is_zero(struct bpf_verifier_env * env)10174 static bool loop_flag_is_zero(struct bpf_verifier_env *env)
10175 {
10176 	struct bpf_reg_state *reg = reg_state(env, BPF_REG_4);
10177 	bool reg_is_null = bpf_register_is_null(reg);
10178 
10179 	if (reg_is_null)
10180 		mark_chain_precision(env, BPF_REG_4);
10181 
10182 	return reg_is_null;
10183 }
10184 
update_loop_inline_state(struct bpf_verifier_env * env,u32 subprogno)10185 static void update_loop_inline_state(struct bpf_verifier_env *env, u32 subprogno)
10186 {
10187 	struct bpf_loop_inline_state *state = &cur_aux(env)->loop_inline_state;
10188 
10189 	if (!state->initialized) {
10190 		state->initialized = 1;
10191 		state->fit_for_inline = loop_flag_is_zero(env);
10192 		state->callback_subprogno = subprogno;
10193 		return;
10194 	}
10195 
10196 	if (!state->fit_for_inline)
10197 		return;
10198 
10199 	state->fit_for_inline = (loop_flag_is_zero(env) &&
10200 				 state->callback_subprogno == subprogno);
10201 }
10202 
10203 /* Returns whether or not the given map type can potentially elide
10204  * lookup return value nullness check. This is possible if the key
10205  * is statically known.
10206  */
can_elide_value_nullness(enum bpf_map_type type)10207 static bool can_elide_value_nullness(enum bpf_map_type type)
10208 {
10209 	switch (type) {
10210 	case BPF_MAP_TYPE_ARRAY:
10211 	case BPF_MAP_TYPE_PERCPU_ARRAY:
10212 		return true;
10213 	default:
10214 		return false;
10215 	}
10216 }
10217 
bpf_get_helper_proto(struct bpf_verifier_env * env,int func_id,const struct bpf_func_proto ** ptr)10218 int bpf_get_helper_proto(struct bpf_verifier_env *env, int func_id,
10219 			 const struct bpf_func_proto **ptr)
10220 {
10221 	if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID)
10222 		return -ERANGE;
10223 
10224 	if (!env->ops->get_func_proto)
10225 		return -EINVAL;
10226 
10227 	*ptr = env->ops->get_func_proto(func_id, env->prog);
10228 	return *ptr && (*ptr)->func ? 0 : -EINVAL;
10229 }
10230 
10231 /* Check if we're in a sleepable context. */
in_sleepable_context(struct bpf_verifier_env * env)10232 static inline bool in_sleepable_context(struct bpf_verifier_env *env)
10233 {
10234 	return !env->cur_state->active_rcu_locks &&
10235 	       !env->cur_state->active_preempt_locks &&
10236 	       !env->cur_state->active_locks &&
10237 	       !env->cur_state->active_irq_id &&
10238 	       in_sleepable(env);
10239 }
10240 
non_sleepable_context_description(struct bpf_verifier_env * env)10241 static const char *non_sleepable_context_description(struct bpf_verifier_env *env)
10242 {
10243 	if (env->cur_state->active_rcu_locks)
10244 		return "rcu_read_lock region";
10245 	if (env->cur_state->active_preempt_locks)
10246 		return "non-preemptible region";
10247 	if (env->cur_state->active_irq_id)
10248 		return "IRQ-disabled region";
10249 	if (env->cur_state->active_locks)
10250 		return "lock region";
10251 	return "non-sleepable prog";
10252 }
10253 
check_helper_call(struct bpf_verifier_env * env,struct bpf_insn * insn,int * insn_idx_p)10254 static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
10255 			     int *insn_idx_p)
10256 {
10257 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
10258 	bool returns_cpu_specific_alloc_ptr = false;
10259 	const struct bpf_func_proto *fn = NULL;
10260 	enum bpf_return_type ret_type;
10261 	enum bpf_type_flag ret_flag;
10262 	struct bpf_reg_state *regs;
10263 	struct bpf_call_arg_meta meta;
10264 	int insn_idx = *insn_idx_p;
10265 	bool changes_data;
10266 	int i, err, func_id;
10267 
10268 	/* find function prototype */
10269 	func_id = insn->imm;
10270 	err = bpf_get_helper_proto(env, insn->imm, &fn);
10271 	if (err == -ERANGE) {
10272 		verbose(env, "invalid func %s#%d\n", func_id_name(func_id), func_id);
10273 		return -EINVAL;
10274 	}
10275 
10276 	if (err) {
10277 		verbose(env, "program of this type cannot use helper %s#%d\n",
10278 			func_id_name(func_id), func_id);
10279 		return err;
10280 	}
10281 
10282 	/* eBPF programs must be GPL compatible to use GPL-ed functions */
10283 	if (!env->prog->gpl_compatible && fn->gpl_only) {
10284 		verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
10285 		return -EINVAL;
10286 	}
10287 
10288 	if (fn->allowed && !fn->allowed(env->prog)) {
10289 		verbose(env, "helper call is not allowed in probe\n");
10290 		return -EINVAL;
10291 	}
10292 
10293 	/* With LD_ABS/IND some JITs save/restore skb from r1. */
10294 	changes_data = bpf_helper_changes_pkt_data(func_id);
10295 	if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
10296 		verifier_bug(env, "func %s#%d: r1 != ctx", func_id_name(func_id), func_id);
10297 		return -EFAULT;
10298 	}
10299 
10300 	memset(&meta, 0, sizeof(meta));
10301 	meta.pkt_access = fn->pkt_access;
10302 
10303 	err = check_func_proto(fn);
10304 	if (err) {
10305 		verifier_bug(env, "incorrect func proto %s#%d", func_id_name(func_id), func_id);
10306 		return err;
10307 	}
10308 
10309 	if (fn->might_sleep && !in_sleepable_context(env)) {
10310 		verbose(env, "sleepable helper %s#%d in %s\n", func_id_name(func_id), func_id,
10311 			non_sleepable_context_description(env));
10312 		return -EINVAL;
10313 	}
10314 
10315 	/* Track non-sleepable context for helpers. */
10316 	if (!in_sleepable_context(env))
10317 		env->insn_aux_data[insn_idx].non_sleepable = true;
10318 
10319 	meta.func_id = func_id;
10320 	/* check args */
10321 	for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
10322 		err = check_func_arg(env, i, &meta, fn, insn_idx);
10323 		if (err)
10324 			return err;
10325 	}
10326 
10327 	err = record_func_map(env, &meta, func_id, insn_idx);
10328 	if (err)
10329 		return err;
10330 
10331 	err = record_func_key(env, &meta, func_id, insn_idx);
10332 	if (err)
10333 		return err;
10334 
10335 	/* Mark slots with STACK_MISC in case of raw mode, stack offset
10336 	 * is inferred from register state.
10337 	 */
10338 	for (i = 0; i < meta.access_size; i++) {
10339 		err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
10340 				       BPF_WRITE, -1, false, false);
10341 		if (err)
10342 			return err;
10343 	}
10344 
10345 	regs = cur_regs(env);
10346 
10347 	if (meta.release_regno) {
10348 		err = -EINVAL;
10349 		if (arg_type_is_dynptr(fn->arg_type[meta.release_regno - BPF_REG_1])) {
10350 			err = unmark_stack_slots_dynptr(env, &regs[meta.release_regno]);
10351 		} else if (func_id == BPF_FUNC_kptr_xchg && meta.ref_obj_id) {
10352 			u32 ref_obj_id = meta.ref_obj_id;
10353 			bool in_rcu = in_rcu_cs(env);
10354 			struct bpf_func_state *state;
10355 			struct bpf_reg_state *reg;
10356 
10357 			err = release_reference_nomark(env->cur_state, ref_obj_id);
10358 			if (!err) {
10359 				bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
10360 					if (reg->ref_obj_id == ref_obj_id) {
10361 						if (in_rcu && (reg->type & MEM_ALLOC) && (reg->type & MEM_PERCPU)) {
10362 							reg->ref_obj_id = 0;
10363 							reg->type &= ~MEM_ALLOC;
10364 							reg->type |= MEM_RCU;
10365 						} else {
10366 							mark_reg_invalid(env, reg);
10367 						}
10368 					}
10369 				}));
10370 			}
10371 		} else if (meta.ref_obj_id) {
10372 			err = release_reference(env, meta.ref_obj_id);
10373 		} else if (bpf_register_is_null(&regs[meta.release_regno])) {
10374 			/* meta.ref_obj_id can only be 0 if register that is meant to be
10375 			 * released is NULL, which must be > R0.
10376 			 */
10377 			err = 0;
10378 		}
10379 		if (err) {
10380 			verbose(env, "func %s#%d reference has not been acquired before\n",
10381 				func_id_name(func_id), func_id);
10382 			return err;
10383 		}
10384 	}
10385 
10386 	switch (func_id) {
10387 	case BPF_FUNC_tail_call:
10388 		err = check_resource_leak(env, false, true, "tail_call");
10389 		if (err)
10390 			return err;
10391 		break;
10392 	case BPF_FUNC_get_local_storage:
10393 		/* check that flags argument in get_local_storage(map, flags) is 0,
10394 		 * this is required because get_local_storage() can't return an error.
10395 		 */
10396 		if (!bpf_register_is_null(&regs[BPF_REG_2])) {
10397 			verbose(env, "get_local_storage() doesn't support non-zero flags\n");
10398 			return -EINVAL;
10399 		}
10400 		break;
10401 	case BPF_FUNC_for_each_map_elem:
10402 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
10403 					 set_map_elem_callback_state);
10404 		break;
10405 	case BPF_FUNC_timer_set_callback:
10406 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
10407 					 set_timer_callback_state);
10408 		break;
10409 	case BPF_FUNC_find_vma:
10410 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
10411 					 set_find_vma_callback_state);
10412 		break;
10413 	case BPF_FUNC_snprintf:
10414 		err = check_bpf_snprintf_call(env, regs);
10415 		break;
10416 	case BPF_FUNC_loop:
10417 		update_loop_inline_state(env, meta.subprogno);
10418 		/* Verifier relies on R1 value to determine if bpf_loop() iteration
10419 		 * is finished, thus mark it precise.
10420 		 */
10421 		err = mark_chain_precision(env, BPF_REG_1);
10422 		if (err)
10423 			return err;
10424 		if (cur_func(env)->callback_depth < regs[BPF_REG_1].umax_value) {
10425 			err = push_callback_call(env, insn, insn_idx, meta.subprogno,
10426 						 set_loop_callback_state);
10427 		} else {
10428 			cur_func(env)->callback_depth = 0;
10429 			if (env->log.level & BPF_LOG_LEVEL2)
10430 				verbose(env, "frame%d bpf_loop iteration limit reached\n",
10431 					env->cur_state->curframe);
10432 		}
10433 		break;
10434 	case BPF_FUNC_dynptr_from_mem:
10435 		if (regs[BPF_REG_1].type != PTR_TO_MAP_VALUE) {
10436 			verbose(env, "Unsupported reg type %s for bpf_dynptr_from_mem data\n",
10437 				reg_type_str(env, regs[BPF_REG_1].type));
10438 			return -EACCES;
10439 		}
10440 		break;
10441 	case BPF_FUNC_set_retval:
10442 		if (prog_type == BPF_PROG_TYPE_LSM &&
10443 		    env->prog->expected_attach_type == BPF_LSM_CGROUP) {
10444 			if (!env->prog->aux->attach_func_proto->type) {
10445 				/* Make sure programs that attach to void
10446 				 * hooks don't try to modify return value.
10447 				 */
10448 				verbose(env, "BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
10449 				return -EINVAL;
10450 			}
10451 		}
10452 		break;
10453 	case BPF_FUNC_dynptr_data:
10454 	{
10455 		struct bpf_reg_state *reg;
10456 		int id, ref_obj_id;
10457 
10458 		reg = get_dynptr_arg_reg(env, fn, regs);
10459 		if (!reg)
10460 			return -EFAULT;
10461 
10462 
10463 		if (meta.dynptr_id) {
10464 			verifier_bug(env, "meta.dynptr_id already set");
10465 			return -EFAULT;
10466 		}
10467 		if (meta.ref_obj_id) {
10468 			verifier_bug(env, "meta.ref_obj_id already set");
10469 			return -EFAULT;
10470 		}
10471 
10472 		id = dynptr_id(env, reg);
10473 		if (id < 0) {
10474 			verifier_bug(env, "failed to obtain dynptr id");
10475 			return id;
10476 		}
10477 
10478 		ref_obj_id = dynptr_ref_obj_id(env, reg);
10479 		if (ref_obj_id < 0) {
10480 			verifier_bug(env, "failed to obtain dynptr ref_obj_id");
10481 			return ref_obj_id;
10482 		}
10483 
10484 		meta.dynptr_id = id;
10485 		meta.ref_obj_id = ref_obj_id;
10486 
10487 		break;
10488 	}
10489 	case BPF_FUNC_dynptr_write:
10490 	{
10491 		enum bpf_dynptr_type dynptr_type;
10492 		struct bpf_reg_state *reg;
10493 
10494 		reg = get_dynptr_arg_reg(env, fn, regs);
10495 		if (!reg)
10496 			return -EFAULT;
10497 
10498 		dynptr_type = dynptr_get_type(env, reg);
10499 		if (dynptr_type == BPF_DYNPTR_TYPE_INVALID)
10500 			return -EFAULT;
10501 
10502 		if (dynptr_type == BPF_DYNPTR_TYPE_SKB ||
10503 		    dynptr_type == BPF_DYNPTR_TYPE_SKB_META)
10504 			/* this will trigger clear_all_pkt_pointers(), which will
10505 			 * invalidate all dynptr slices associated with the skb
10506 			 */
10507 			changes_data = true;
10508 
10509 		break;
10510 	}
10511 	case BPF_FUNC_per_cpu_ptr:
10512 	case BPF_FUNC_this_cpu_ptr:
10513 	{
10514 		struct bpf_reg_state *reg = &regs[BPF_REG_1];
10515 		const struct btf_type *type;
10516 
10517 		if (reg->type & MEM_RCU) {
10518 			type = btf_type_by_id(reg->btf, reg->btf_id);
10519 			if (!type || !btf_type_is_struct(type)) {
10520 				verbose(env, "Helper has invalid btf/btf_id in R1\n");
10521 				return -EFAULT;
10522 			}
10523 			returns_cpu_specific_alloc_ptr = true;
10524 			env->insn_aux_data[insn_idx].call_with_percpu_alloc_ptr = true;
10525 		}
10526 		break;
10527 	}
10528 	case BPF_FUNC_user_ringbuf_drain:
10529 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
10530 					 set_user_ringbuf_callback_state);
10531 		break;
10532 	}
10533 
10534 	if (err)
10535 		return err;
10536 
10537 	/* reset caller saved regs */
10538 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
10539 		bpf_mark_reg_not_init(env, &regs[caller_saved[i]]);
10540 		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
10541 	}
10542 
10543 	/* helper call returns 64-bit value. */
10544 	regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
10545 
10546 	/* update return register (already marked as written above) */
10547 	ret_type = fn->ret_type;
10548 	ret_flag = type_flag(ret_type);
10549 
10550 	switch (base_type(ret_type)) {
10551 	case RET_INTEGER:
10552 		/* sets type to SCALAR_VALUE */
10553 		mark_reg_unknown(env, regs, BPF_REG_0);
10554 		break;
10555 	case RET_VOID:
10556 		regs[BPF_REG_0].type = NOT_INIT;
10557 		break;
10558 	case RET_PTR_TO_MAP_VALUE:
10559 		/* There is no offset yet applied, variable or fixed */
10560 		mark_reg_known_zero(env, regs, BPF_REG_0);
10561 		/* remember map_ptr, so that check_map_access()
10562 		 * can check 'value_size' boundary of memory access
10563 		 * to map element returned from bpf_map_lookup_elem()
10564 		 */
10565 		if (meta.map.ptr == NULL) {
10566 			verifier_bug(env, "unexpected null map_ptr");
10567 			return -EFAULT;
10568 		}
10569 
10570 		if (func_id == BPF_FUNC_map_lookup_elem &&
10571 		    can_elide_value_nullness(meta.map.ptr->map_type) &&
10572 		    meta.const_map_key >= 0 &&
10573 		    meta.const_map_key < meta.map.ptr->max_entries)
10574 			ret_flag &= ~PTR_MAYBE_NULL;
10575 
10576 		regs[BPF_REG_0].map_ptr = meta.map.ptr;
10577 		regs[BPF_REG_0].map_uid = meta.map.uid;
10578 		regs[BPF_REG_0].type = PTR_TO_MAP_VALUE | ret_flag;
10579 		if (!type_may_be_null(ret_flag) &&
10580 		    btf_record_has_field(meta.map.ptr->record, BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK)) {
10581 			regs[BPF_REG_0].id = ++env->id_gen;
10582 		}
10583 		break;
10584 	case RET_PTR_TO_SOCKET:
10585 		mark_reg_known_zero(env, regs, BPF_REG_0);
10586 		regs[BPF_REG_0].type = PTR_TO_SOCKET | ret_flag;
10587 		break;
10588 	case RET_PTR_TO_SOCK_COMMON:
10589 		mark_reg_known_zero(env, regs, BPF_REG_0);
10590 		regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON | ret_flag;
10591 		break;
10592 	case RET_PTR_TO_TCP_SOCK:
10593 		mark_reg_known_zero(env, regs, BPF_REG_0);
10594 		regs[BPF_REG_0].type = PTR_TO_TCP_SOCK | ret_flag;
10595 		break;
10596 	case RET_PTR_TO_MEM:
10597 		mark_reg_known_zero(env, regs, BPF_REG_0);
10598 		regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
10599 		regs[BPF_REG_0].mem_size = meta.mem_size;
10600 		break;
10601 	case RET_PTR_TO_MEM_OR_BTF_ID:
10602 	{
10603 		const struct btf_type *t;
10604 
10605 		mark_reg_known_zero(env, regs, BPF_REG_0);
10606 		t = btf_type_skip_modifiers(meta.ret_btf, meta.ret_btf_id, NULL);
10607 		if (!btf_type_is_struct(t)) {
10608 			u32 tsize;
10609 			const struct btf_type *ret;
10610 			const char *tname;
10611 
10612 			/* resolve the type size of ksym. */
10613 			ret = btf_resolve_size(meta.ret_btf, t, &tsize);
10614 			if (IS_ERR(ret)) {
10615 				tname = btf_name_by_offset(meta.ret_btf, t->name_off);
10616 				verbose(env, "unable to resolve the size of type '%s': %ld\n",
10617 					tname, PTR_ERR(ret));
10618 				return -EINVAL;
10619 			}
10620 			regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
10621 			regs[BPF_REG_0].mem_size = tsize;
10622 		} else {
10623 			if (returns_cpu_specific_alloc_ptr) {
10624 				regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC | MEM_RCU;
10625 			} else {
10626 				/* MEM_RDONLY may be carried from ret_flag, but it
10627 				 * doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise
10628 				 * it will confuse the check of PTR_TO_BTF_ID in
10629 				 * check_mem_access().
10630 				 */
10631 				ret_flag &= ~MEM_RDONLY;
10632 				regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
10633 			}
10634 
10635 			regs[BPF_REG_0].btf = meta.ret_btf;
10636 			regs[BPF_REG_0].btf_id = meta.ret_btf_id;
10637 		}
10638 		break;
10639 	}
10640 	case RET_PTR_TO_BTF_ID:
10641 	{
10642 		struct btf *ret_btf;
10643 		int ret_btf_id;
10644 
10645 		mark_reg_known_zero(env, regs, BPF_REG_0);
10646 		regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
10647 		if (func_id == BPF_FUNC_kptr_xchg) {
10648 			ret_btf = meta.kptr_field->kptr.btf;
10649 			ret_btf_id = meta.kptr_field->kptr.btf_id;
10650 			if (!btf_is_kernel(ret_btf)) {
10651 				regs[BPF_REG_0].type |= MEM_ALLOC;
10652 				if (meta.kptr_field->type == BPF_KPTR_PERCPU)
10653 					regs[BPF_REG_0].type |= MEM_PERCPU;
10654 			}
10655 		} else {
10656 			if (fn->ret_btf_id == BPF_PTR_POISON) {
10657 				verifier_bug(env, "func %s has non-overwritten BPF_PTR_POISON return type",
10658 					     func_id_name(func_id));
10659 				return -EFAULT;
10660 			}
10661 			ret_btf = btf_vmlinux;
10662 			ret_btf_id = *fn->ret_btf_id;
10663 		}
10664 		if (ret_btf_id == 0) {
10665 			verbose(env, "invalid return type %u of func %s#%d\n",
10666 				base_type(ret_type), func_id_name(func_id),
10667 				func_id);
10668 			return -EINVAL;
10669 		}
10670 		regs[BPF_REG_0].btf = ret_btf;
10671 		regs[BPF_REG_0].btf_id = ret_btf_id;
10672 		break;
10673 	}
10674 	default:
10675 		verbose(env, "unknown return type %u of func %s#%d\n",
10676 			base_type(ret_type), func_id_name(func_id), func_id);
10677 		return -EINVAL;
10678 	}
10679 
10680 	if (type_may_be_null(regs[BPF_REG_0].type))
10681 		regs[BPF_REG_0].id = ++env->id_gen;
10682 
10683 	if (helper_multiple_ref_obj_use(func_id, meta.map.ptr)) {
10684 		verifier_bug(env, "func %s#%d sets ref_obj_id more than once",
10685 			     func_id_name(func_id), func_id);
10686 		return -EFAULT;
10687 	}
10688 
10689 	if (is_dynptr_ref_function(func_id))
10690 		regs[BPF_REG_0].dynptr_id = meta.dynptr_id;
10691 
10692 	if (is_ptr_cast_function(func_id) || is_dynptr_ref_function(func_id)) {
10693 		/* For release_reference() */
10694 		regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
10695 	} else if (is_acquire_function(func_id, meta.map.ptr)) {
10696 		int id = acquire_reference(env, insn_idx);
10697 
10698 		if (id < 0)
10699 			return id;
10700 		/* For mark_ptr_or_null_reg() */
10701 		regs[BPF_REG_0].id = id;
10702 		/* For release_reference() */
10703 		regs[BPF_REG_0].ref_obj_id = id;
10704 	}
10705 
10706 	err = do_refine_retval_range(env, regs, fn->ret_type, func_id, &meta);
10707 	if (err)
10708 		return err;
10709 
10710 	err = check_map_func_compatibility(env, meta.map.ptr, func_id);
10711 	if (err)
10712 		return err;
10713 
10714 	if ((func_id == BPF_FUNC_get_stack ||
10715 	     func_id == BPF_FUNC_get_task_stack) &&
10716 	    !env->prog->has_callchain_buf) {
10717 		const char *err_str;
10718 
10719 #ifdef CONFIG_PERF_EVENTS
10720 		err = get_callchain_buffers(sysctl_perf_event_max_stack);
10721 		err_str = "cannot get callchain buffer for func %s#%d\n";
10722 #else
10723 		err = -ENOTSUPP;
10724 		err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
10725 #endif
10726 		if (err) {
10727 			verbose(env, err_str, func_id_name(func_id), func_id);
10728 			return err;
10729 		}
10730 
10731 		env->prog->has_callchain_buf = true;
10732 	}
10733 
10734 	if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack)
10735 		env->prog->call_get_stack = true;
10736 
10737 	if (func_id == BPF_FUNC_get_func_ip) {
10738 		if (check_get_func_ip(env))
10739 			return -ENOTSUPP;
10740 		env->prog->call_get_func_ip = true;
10741 	}
10742 
10743 	if (func_id == BPF_FUNC_tail_call) {
10744 		if (env->cur_state->curframe) {
10745 			struct bpf_verifier_state *branch;
10746 
10747 			mark_reg_scratched(env, BPF_REG_0);
10748 			branch = push_stack(env, env->insn_idx + 1, env->insn_idx, false);
10749 			if (IS_ERR(branch))
10750 				return PTR_ERR(branch);
10751 			clear_all_pkt_pointers(env);
10752 			mark_reg_unknown(env, regs, BPF_REG_0);
10753 			err = prepare_func_exit(env, &env->insn_idx);
10754 			if (err)
10755 				return err;
10756 			env->insn_idx--;
10757 		} else {
10758 			changes_data = false;
10759 		}
10760 	}
10761 
10762 	if (changes_data)
10763 		clear_all_pkt_pointers(env);
10764 	return 0;
10765 }
10766 
10767 /* mark_btf_func_reg_size() is used when the reg size is determined by
10768  * the BTF func_proto's return value size and argument.
10769  */
__mark_btf_func_reg_size(struct bpf_verifier_env * env,struct bpf_reg_state * regs,u32 regno,size_t reg_size)10770 static void __mark_btf_func_reg_size(struct bpf_verifier_env *env, struct bpf_reg_state *regs,
10771 				     u32 regno, size_t reg_size)
10772 {
10773 	struct bpf_reg_state *reg = &regs[regno];
10774 
10775 	if (regno == BPF_REG_0) {
10776 		/* Function return value */
10777 		reg->subreg_def = reg_size == sizeof(u64) ?
10778 			DEF_NOT_SUBREG : env->insn_idx + 1;
10779 	} else if (reg_size == sizeof(u64)) {
10780 		/* Function argument */
10781 		mark_insn_zext(env, reg);
10782 	}
10783 }
10784 
mark_btf_func_reg_size(struct bpf_verifier_env * env,u32 regno,size_t reg_size)10785 static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno,
10786 				   size_t reg_size)
10787 {
10788 	return __mark_btf_func_reg_size(env, cur_regs(env), regno, reg_size);
10789 }
10790 
is_kfunc_acquire(struct bpf_kfunc_call_arg_meta * meta)10791 static bool is_kfunc_acquire(struct bpf_kfunc_call_arg_meta *meta)
10792 {
10793 	return meta->kfunc_flags & KF_ACQUIRE;
10794 }
10795 
is_kfunc_release(struct bpf_kfunc_call_arg_meta * meta)10796 static bool is_kfunc_release(struct bpf_kfunc_call_arg_meta *meta)
10797 {
10798 	return meta->kfunc_flags & KF_RELEASE;
10799 }
10800 
10801 
is_kfunc_destructive(struct bpf_kfunc_call_arg_meta * meta)10802 static bool is_kfunc_destructive(struct bpf_kfunc_call_arg_meta *meta)
10803 {
10804 	return meta->kfunc_flags & KF_DESTRUCTIVE;
10805 }
10806 
is_kfunc_rcu(struct bpf_kfunc_call_arg_meta * meta)10807 static bool is_kfunc_rcu(struct bpf_kfunc_call_arg_meta *meta)
10808 {
10809 	return meta->kfunc_flags & KF_RCU;
10810 }
10811 
is_kfunc_rcu_protected(struct bpf_kfunc_call_arg_meta * meta)10812 static bool is_kfunc_rcu_protected(struct bpf_kfunc_call_arg_meta *meta)
10813 {
10814 	return meta->kfunc_flags & KF_RCU_PROTECTED;
10815 }
10816 
is_kfunc_arg_mem_size(const struct btf * btf,const struct btf_param * arg,const struct bpf_reg_state * reg)10817 static bool is_kfunc_arg_mem_size(const struct btf *btf,
10818 				  const struct btf_param *arg,
10819 				  const struct bpf_reg_state *reg)
10820 {
10821 	const struct btf_type *t;
10822 
10823 	t = btf_type_skip_modifiers(btf, arg->type, NULL);
10824 	if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
10825 		return false;
10826 
10827 	return btf_param_match_suffix(btf, arg, "__sz");
10828 }
10829 
is_kfunc_arg_const_mem_size(const struct btf * btf,const struct btf_param * arg,const struct bpf_reg_state * reg)10830 static bool is_kfunc_arg_const_mem_size(const struct btf *btf,
10831 					const struct btf_param *arg,
10832 					const struct bpf_reg_state *reg)
10833 {
10834 	const struct btf_type *t;
10835 
10836 	t = btf_type_skip_modifiers(btf, arg->type, NULL);
10837 	if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
10838 		return false;
10839 
10840 	return btf_param_match_suffix(btf, arg, "__szk");
10841 }
10842 
is_kfunc_arg_constant(const struct btf * btf,const struct btf_param * arg)10843 static bool is_kfunc_arg_constant(const struct btf *btf, const struct btf_param *arg)
10844 {
10845 	return btf_param_match_suffix(btf, arg, "__k");
10846 }
10847 
is_kfunc_arg_ignore(const struct btf * btf,const struct btf_param * arg)10848 static bool is_kfunc_arg_ignore(const struct btf *btf, const struct btf_param *arg)
10849 {
10850 	return btf_param_match_suffix(btf, arg, "__ign");
10851 }
10852 
is_kfunc_arg_map(const struct btf * btf,const struct btf_param * arg)10853 static bool is_kfunc_arg_map(const struct btf *btf, const struct btf_param *arg)
10854 {
10855 	return btf_param_match_suffix(btf, arg, "__map");
10856 }
10857 
is_kfunc_arg_alloc_obj(const struct btf * btf,const struct btf_param * arg)10858 static bool is_kfunc_arg_alloc_obj(const struct btf *btf, const struct btf_param *arg)
10859 {
10860 	return btf_param_match_suffix(btf, arg, "__alloc");
10861 }
10862 
is_kfunc_arg_uninit(const struct btf * btf,const struct btf_param * arg)10863 static bool is_kfunc_arg_uninit(const struct btf *btf, const struct btf_param *arg)
10864 {
10865 	return btf_param_match_suffix(btf, arg, "__uninit");
10866 }
10867 
is_kfunc_arg_refcounted_kptr(const struct btf * btf,const struct btf_param * arg)10868 static bool is_kfunc_arg_refcounted_kptr(const struct btf *btf, const struct btf_param *arg)
10869 {
10870 	return btf_param_match_suffix(btf, arg, "__refcounted_kptr");
10871 }
10872 
is_kfunc_arg_nullable(const struct btf * btf,const struct btf_param * arg)10873 static bool is_kfunc_arg_nullable(const struct btf *btf, const struct btf_param *arg)
10874 {
10875 	return btf_param_match_suffix(btf, arg, "__nullable");
10876 }
10877 
is_kfunc_arg_const_str(const struct btf * btf,const struct btf_param * arg)10878 static bool is_kfunc_arg_const_str(const struct btf *btf, const struct btf_param *arg)
10879 {
10880 	return btf_param_match_suffix(btf, arg, "__str");
10881 }
10882 
is_kfunc_arg_irq_flag(const struct btf * btf,const struct btf_param * arg)10883 static bool is_kfunc_arg_irq_flag(const struct btf *btf, const struct btf_param *arg)
10884 {
10885 	return btf_param_match_suffix(btf, arg, "__irq_flag");
10886 }
10887 
is_kfunc_arg_scalar_with_name(const struct btf * btf,const struct btf_param * arg,const char * name)10888 static bool is_kfunc_arg_scalar_with_name(const struct btf *btf,
10889 					  const struct btf_param *arg,
10890 					  const char *name)
10891 {
10892 	int len, target_len = strlen(name);
10893 	const char *param_name;
10894 
10895 	param_name = btf_name_by_offset(btf, arg->name_off);
10896 	if (str_is_empty(param_name))
10897 		return false;
10898 	len = strlen(param_name);
10899 	if (len != target_len)
10900 		return false;
10901 	if (strcmp(param_name, name))
10902 		return false;
10903 
10904 	return true;
10905 }
10906 
10907 enum {
10908 	KF_ARG_DYNPTR_ID,
10909 	KF_ARG_LIST_HEAD_ID,
10910 	KF_ARG_LIST_NODE_ID,
10911 	KF_ARG_RB_ROOT_ID,
10912 	KF_ARG_RB_NODE_ID,
10913 	KF_ARG_WORKQUEUE_ID,
10914 	KF_ARG_RES_SPIN_LOCK_ID,
10915 	KF_ARG_TASK_WORK_ID,
10916 	KF_ARG_PROG_AUX_ID,
10917 	KF_ARG_TIMER_ID
10918 };
10919 
10920 BTF_ID_LIST(kf_arg_btf_ids)
BTF_ID(struct,bpf_dynptr)10921 BTF_ID(struct, bpf_dynptr)
10922 BTF_ID(struct, bpf_list_head)
10923 BTF_ID(struct, bpf_list_node)
10924 BTF_ID(struct, bpf_rb_root)
10925 BTF_ID(struct, bpf_rb_node)
10926 BTF_ID(struct, bpf_wq)
10927 BTF_ID(struct, bpf_res_spin_lock)
10928 BTF_ID(struct, bpf_task_work)
10929 BTF_ID(struct, bpf_prog_aux)
10930 BTF_ID(struct, bpf_timer)
10931 
10932 static bool __is_kfunc_ptr_arg_type(const struct btf *btf,
10933 				    const struct btf_param *arg, int type)
10934 {
10935 	const struct btf_type *t;
10936 	u32 res_id;
10937 
10938 	t = btf_type_skip_modifiers(btf, arg->type, NULL);
10939 	if (!t)
10940 		return false;
10941 	if (!btf_type_is_ptr(t))
10942 		return false;
10943 	t = btf_type_skip_modifiers(btf, t->type, &res_id);
10944 	if (!t)
10945 		return false;
10946 	return btf_types_are_same(btf, res_id, btf_vmlinux, kf_arg_btf_ids[type]);
10947 }
10948 
is_kfunc_arg_dynptr(const struct btf * btf,const struct btf_param * arg)10949 static bool is_kfunc_arg_dynptr(const struct btf *btf, const struct btf_param *arg)
10950 {
10951 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_DYNPTR_ID);
10952 }
10953 
is_kfunc_arg_list_head(const struct btf * btf,const struct btf_param * arg)10954 static bool is_kfunc_arg_list_head(const struct btf *btf, const struct btf_param *arg)
10955 {
10956 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_HEAD_ID);
10957 }
10958 
is_kfunc_arg_list_node(const struct btf * btf,const struct btf_param * arg)10959 static bool is_kfunc_arg_list_node(const struct btf *btf, const struct btf_param *arg)
10960 {
10961 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_NODE_ID);
10962 }
10963 
is_kfunc_arg_rbtree_root(const struct btf * btf,const struct btf_param * arg)10964 static bool is_kfunc_arg_rbtree_root(const struct btf *btf, const struct btf_param *arg)
10965 {
10966 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_ROOT_ID);
10967 }
10968 
is_kfunc_arg_rbtree_node(const struct btf * btf,const struct btf_param * arg)10969 static bool is_kfunc_arg_rbtree_node(const struct btf *btf, const struct btf_param *arg)
10970 {
10971 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_NODE_ID);
10972 }
10973 
is_kfunc_arg_timer(const struct btf * btf,const struct btf_param * arg)10974 static bool is_kfunc_arg_timer(const struct btf *btf, const struct btf_param *arg)
10975 {
10976 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_TIMER_ID);
10977 }
10978 
is_kfunc_arg_wq(const struct btf * btf,const struct btf_param * arg)10979 static bool is_kfunc_arg_wq(const struct btf *btf, const struct btf_param *arg)
10980 {
10981 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_WORKQUEUE_ID);
10982 }
10983 
is_kfunc_arg_task_work(const struct btf * btf,const struct btf_param * arg)10984 static bool is_kfunc_arg_task_work(const struct btf *btf, const struct btf_param *arg)
10985 {
10986 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_TASK_WORK_ID);
10987 }
10988 
is_kfunc_arg_res_spin_lock(const struct btf * btf,const struct btf_param * arg)10989 static bool is_kfunc_arg_res_spin_lock(const struct btf *btf, const struct btf_param *arg)
10990 {
10991 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RES_SPIN_LOCK_ID);
10992 }
10993 
is_rbtree_node_type(const struct btf_type * t)10994 static bool is_rbtree_node_type(const struct btf_type *t)
10995 {
10996 	return t == btf_type_by_id(btf_vmlinux, kf_arg_btf_ids[KF_ARG_RB_NODE_ID]);
10997 }
10998 
is_list_node_type(const struct btf_type * t)10999 static bool is_list_node_type(const struct btf_type *t)
11000 {
11001 	return t == btf_type_by_id(btf_vmlinux, kf_arg_btf_ids[KF_ARG_LIST_NODE_ID]);
11002 }
11003 
is_kfunc_arg_callback(struct bpf_verifier_env * env,const struct btf * btf,const struct btf_param * arg)11004 static bool is_kfunc_arg_callback(struct bpf_verifier_env *env, const struct btf *btf,
11005 				  const struct btf_param *arg)
11006 {
11007 	const struct btf_type *t;
11008 
11009 	t = btf_type_resolve_func_ptr(btf, arg->type, NULL);
11010 	if (!t)
11011 		return false;
11012 
11013 	return true;
11014 }
11015 
is_kfunc_arg_prog_aux(const struct btf * btf,const struct btf_param * arg)11016 static bool is_kfunc_arg_prog_aux(const struct btf *btf, const struct btf_param *arg)
11017 {
11018 	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_PROG_AUX_ID);
11019 }
11020 
11021 /*
11022  * A kfunc with KF_IMPLICIT_ARGS has two prototypes in BTF:
11023  *   - the _impl prototype with full arg list (meta->func_proto)
11024  *   - the BPF API prototype w/o implicit args (func->type in BTF)
11025  * To determine whether an argument is implicit, we compare its position
11026  * against the number of arguments in the prototype w/o implicit args.
11027  */
is_kfunc_arg_implicit(const struct bpf_kfunc_call_arg_meta * meta,u32 arg_idx)11028 static bool is_kfunc_arg_implicit(const struct bpf_kfunc_call_arg_meta *meta, u32 arg_idx)
11029 {
11030 	const struct btf_type *func, *func_proto;
11031 	u32 argn;
11032 
11033 	if (!(meta->kfunc_flags & KF_IMPLICIT_ARGS))
11034 		return false;
11035 
11036 	func = btf_type_by_id(meta->btf, meta->func_id);
11037 	func_proto = btf_type_by_id(meta->btf, func->type);
11038 	argn = btf_type_vlen(func_proto);
11039 
11040 	return argn <= arg_idx;
11041 }
11042 
11043 /* Returns true if struct is composed of scalars, 4 levels of nesting allowed */
__btf_type_is_scalar_struct(struct bpf_verifier_env * env,const struct btf * btf,const struct btf_type * t,int rec)11044 static bool __btf_type_is_scalar_struct(struct bpf_verifier_env *env,
11045 					const struct btf *btf,
11046 					const struct btf_type *t, int rec)
11047 {
11048 	const struct btf_type *member_type;
11049 	const struct btf_member *member;
11050 	u32 i;
11051 
11052 	if (!btf_type_is_struct(t))
11053 		return false;
11054 
11055 	for_each_member(i, t, member) {
11056 		const struct btf_array *array;
11057 
11058 		member_type = btf_type_skip_modifiers(btf, member->type, NULL);
11059 		if (btf_type_is_struct(member_type)) {
11060 			if (rec >= 3) {
11061 				verbose(env, "max struct nesting depth exceeded\n");
11062 				return false;
11063 			}
11064 			if (!__btf_type_is_scalar_struct(env, btf, member_type, rec + 1))
11065 				return false;
11066 			continue;
11067 		}
11068 		if (btf_type_is_array(member_type)) {
11069 			array = btf_array(member_type);
11070 			if (!array->nelems)
11071 				return false;
11072 			member_type = btf_type_skip_modifiers(btf, array->type, NULL);
11073 			if (!btf_type_is_scalar(member_type))
11074 				return false;
11075 			continue;
11076 		}
11077 		if (!btf_type_is_scalar(member_type))
11078 			return false;
11079 	}
11080 	return true;
11081 }
11082 
11083 enum kfunc_ptr_arg_type {
11084 	KF_ARG_PTR_TO_CTX,
11085 	KF_ARG_PTR_TO_ALLOC_BTF_ID,    /* Allocated object */
11086 	KF_ARG_PTR_TO_REFCOUNTED_KPTR, /* Refcounted local kptr */
11087 	KF_ARG_PTR_TO_DYNPTR,
11088 	KF_ARG_PTR_TO_ITER,
11089 	KF_ARG_PTR_TO_LIST_HEAD,
11090 	KF_ARG_PTR_TO_LIST_NODE,
11091 	KF_ARG_PTR_TO_BTF_ID,	       /* Also covers reg2btf_ids conversions */
11092 	KF_ARG_PTR_TO_MEM,
11093 	KF_ARG_PTR_TO_MEM_SIZE,	       /* Size derived from next argument, skip it */
11094 	KF_ARG_PTR_TO_CALLBACK,
11095 	KF_ARG_PTR_TO_RB_ROOT,
11096 	KF_ARG_PTR_TO_RB_NODE,
11097 	KF_ARG_PTR_TO_NULL,
11098 	KF_ARG_PTR_TO_CONST_STR,
11099 	KF_ARG_PTR_TO_MAP,
11100 	KF_ARG_PTR_TO_TIMER,
11101 	KF_ARG_PTR_TO_WORKQUEUE,
11102 	KF_ARG_PTR_TO_IRQ_FLAG,
11103 	KF_ARG_PTR_TO_RES_SPIN_LOCK,
11104 	KF_ARG_PTR_TO_TASK_WORK,
11105 };
11106 
11107 enum special_kfunc_type {
11108 	KF_bpf_obj_new_impl,
11109 	KF_bpf_obj_new,
11110 	KF_bpf_obj_drop_impl,
11111 	KF_bpf_obj_drop,
11112 	KF_bpf_refcount_acquire_impl,
11113 	KF_bpf_refcount_acquire,
11114 	KF_bpf_list_push_front_impl,
11115 	KF_bpf_list_push_front,
11116 	KF_bpf_list_push_back_impl,
11117 	KF_bpf_list_push_back,
11118 	KF_bpf_list_pop_front,
11119 	KF_bpf_list_pop_back,
11120 	KF_bpf_list_front,
11121 	KF_bpf_list_back,
11122 	KF_bpf_cast_to_kern_ctx,
11123 	KF_bpf_rdonly_cast,
11124 	KF_bpf_rcu_read_lock,
11125 	KF_bpf_rcu_read_unlock,
11126 	KF_bpf_rbtree_remove,
11127 	KF_bpf_rbtree_add_impl,
11128 	KF_bpf_rbtree_add,
11129 	KF_bpf_rbtree_first,
11130 	KF_bpf_rbtree_root,
11131 	KF_bpf_rbtree_left,
11132 	KF_bpf_rbtree_right,
11133 	KF_bpf_dynptr_from_skb,
11134 	KF_bpf_dynptr_from_xdp,
11135 	KF_bpf_dynptr_from_skb_meta,
11136 	KF_bpf_xdp_pull_data,
11137 	KF_bpf_dynptr_slice,
11138 	KF_bpf_dynptr_slice_rdwr,
11139 	KF_bpf_dynptr_clone,
11140 	KF_bpf_percpu_obj_new_impl,
11141 	KF_bpf_percpu_obj_new,
11142 	KF_bpf_percpu_obj_drop_impl,
11143 	KF_bpf_percpu_obj_drop,
11144 	KF_bpf_throw,
11145 	KF_bpf_wq_set_callback,
11146 	KF_bpf_preempt_disable,
11147 	KF_bpf_preempt_enable,
11148 	KF_bpf_iter_css_task_new,
11149 	KF_bpf_session_cookie,
11150 	KF_bpf_get_kmem_cache,
11151 	KF_bpf_local_irq_save,
11152 	KF_bpf_local_irq_restore,
11153 	KF_bpf_iter_num_new,
11154 	KF_bpf_iter_num_next,
11155 	KF_bpf_iter_num_destroy,
11156 	KF_bpf_set_dentry_xattr,
11157 	KF_bpf_remove_dentry_xattr,
11158 	KF_bpf_res_spin_lock,
11159 	KF_bpf_res_spin_unlock,
11160 	KF_bpf_res_spin_lock_irqsave,
11161 	KF_bpf_res_spin_unlock_irqrestore,
11162 	KF_bpf_dynptr_from_file,
11163 	KF_bpf_dynptr_file_discard,
11164 	KF___bpf_trap,
11165 	KF_bpf_task_work_schedule_signal,
11166 	KF_bpf_task_work_schedule_resume,
11167 	KF_bpf_arena_alloc_pages,
11168 	KF_bpf_arena_free_pages,
11169 	KF_bpf_arena_reserve_pages,
11170 	KF_bpf_session_is_return,
11171 	KF_bpf_stream_vprintk,
11172 	KF_bpf_stream_print_stack,
11173 };
11174 
11175 BTF_ID_LIST(special_kfunc_list)
BTF_ID(func,bpf_obj_new_impl)11176 BTF_ID(func, bpf_obj_new_impl)
11177 BTF_ID(func, bpf_obj_new)
11178 BTF_ID(func, bpf_obj_drop_impl)
11179 BTF_ID(func, bpf_obj_drop)
11180 BTF_ID(func, bpf_refcount_acquire_impl)
11181 BTF_ID(func, bpf_refcount_acquire)
11182 BTF_ID(func, bpf_list_push_front_impl)
11183 BTF_ID(func, bpf_list_push_front)
11184 BTF_ID(func, bpf_list_push_back_impl)
11185 BTF_ID(func, bpf_list_push_back)
11186 BTF_ID(func, bpf_list_pop_front)
11187 BTF_ID(func, bpf_list_pop_back)
11188 BTF_ID(func, bpf_list_front)
11189 BTF_ID(func, bpf_list_back)
11190 BTF_ID(func, bpf_cast_to_kern_ctx)
11191 BTF_ID(func, bpf_rdonly_cast)
11192 BTF_ID(func, bpf_rcu_read_lock)
11193 BTF_ID(func, bpf_rcu_read_unlock)
11194 BTF_ID(func, bpf_rbtree_remove)
11195 BTF_ID(func, bpf_rbtree_add_impl)
11196 BTF_ID(func, bpf_rbtree_add)
11197 BTF_ID(func, bpf_rbtree_first)
11198 BTF_ID(func, bpf_rbtree_root)
11199 BTF_ID(func, bpf_rbtree_left)
11200 BTF_ID(func, bpf_rbtree_right)
11201 #ifdef CONFIG_NET
11202 BTF_ID(func, bpf_dynptr_from_skb)
11203 BTF_ID(func, bpf_dynptr_from_xdp)
11204 BTF_ID(func, bpf_dynptr_from_skb_meta)
11205 BTF_ID(func, bpf_xdp_pull_data)
11206 #else
11207 BTF_ID_UNUSED
11208 BTF_ID_UNUSED
11209 BTF_ID_UNUSED
11210 BTF_ID_UNUSED
11211 #endif
11212 BTF_ID(func, bpf_dynptr_slice)
11213 BTF_ID(func, bpf_dynptr_slice_rdwr)
11214 BTF_ID(func, bpf_dynptr_clone)
11215 BTF_ID(func, bpf_percpu_obj_new_impl)
11216 BTF_ID(func, bpf_percpu_obj_new)
11217 BTF_ID(func, bpf_percpu_obj_drop_impl)
11218 BTF_ID(func, bpf_percpu_obj_drop)
11219 BTF_ID(func, bpf_throw)
11220 BTF_ID(func, bpf_wq_set_callback)
11221 BTF_ID(func, bpf_preempt_disable)
11222 BTF_ID(func, bpf_preempt_enable)
11223 #ifdef CONFIG_CGROUPS
11224 BTF_ID(func, bpf_iter_css_task_new)
11225 #else
11226 BTF_ID_UNUSED
11227 #endif
11228 #ifdef CONFIG_BPF_EVENTS
11229 BTF_ID(func, bpf_session_cookie)
11230 #else
11231 BTF_ID_UNUSED
11232 #endif
11233 BTF_ID(func, bpf_get_kmem_cache)
11234 BTF_ID(func, bpf_local_irq_save)
11235 BTF_ID(func, bpf_local_irq_restore)
11236 BTF_ID(func, bpf_iter_num_new)
11237 BTF_ID(func, bpf_iter_num_next)
11238 BTF_ID(func, bpf_iter_num_destroy)
11239 #ifdef CONFIG_BPF_LSM
11240 BTF_ID(func, bpf_set_dentry_xattr)
11241 BTF_ID(func, bpf_remove_dentry_xattr)
11242 #else
11243 BTF_ID_UNUSED
11244 BTF_ID_UNUSED
11245 #endif
11246 BTF_ID(func, bpf_res_spin_lock)
11247 BTF_ID(func, bpf_res_spin_unlock)
11248 BTF_ID(func, bpf_res_spin_lock_irqsave)
11249 BTF_ID(func, bpf_res_spin_unlock_irqrestore)
11250 BTF_ID(func, bpf_dynptr_from_file)
11251 BTF_ID(func, bpf_dynptr_file_discard)
11252 BTF_ID(func, __bpf_trap)
11253 BTF_ID(func, bpf_task_work_schedule_signal)
11254 BTF_ID(func, bpf_task_work_schedule_resume)
11255 BTF_ID(func, bpf_arena_alloc_pages)
11256 BTF_ID(func, bpf_arena_free_pages)
11257 BTF_ID(func, bpf_arena_reserve_pages)
11258 BTF_ID(func, bpf_session_is_return)
11259 BTF_ID(func, bpf_stream_vprintk)
11260 BTF_ID(func, bpf_stream_print_stack)
11261 
11262 static bool is_bpf_obj_new_kfunc(u32 func_id)
11263 {
11264 	return func_id == special_kfunc_list[KF_bpf_obj_new] ||
11265 	       func_id == special_kfunc_list[KF_bpf_obj_new_impl];
11266 }
11267 
is_bpf_percpu_obj_new_kfunc(u32 func_id)11268 static bool is_bpf_percpu_obj_new_kfunc(u32 func_id)
11269 {
11270 	return func_id == special_kfunc_list[KF_bpf_percpu_obj_new] ||
11271 	       func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl];
11272 }
11273 
is_bpf_obj_drop_kfunc(u32 func_id)11274 static bool is_bpf_obj_drop_kfunc(u32 func_id)
11275 {
11276 	return func_id == special_kfunc_list[KF_bpf_obj_drop] ||
11277 	       func_id == special_kfunc_list[KF_bpf_obj_drop_impl];
11278 }
11279 
is_bpf_percpu_obj_drop_kfunc(u32 func_id)11280 static bool is_bpf_percpu_obj_drop_kfunc(u32 func_id)
11281 {
11282 	return func_id == special_kfunc_list[KF_bpf_percpu_obj_drop] ||
11283 	       func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl];
11284 }
11285 
is_bpf_refcount_acquire_kfunc(u32 func_id)11286 static bool is_bpf_refcount_acquire_kfunc(u32 func_id)
11287 {
11288 	return func_id == special_kfunc_list[KF_bpf_refcount_acquire] ||
11289 	       func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl];
11290 }
11291 
is_bpf_list_push_kfunc(u32 func_id)11292 static bool is_bpf_list_push_kfunc(u32 func_id)
11293 {
11294 	return func_id == special_kfunc_list[KF_bpf_list_push_front] ||
11295 	       func_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
11296 	       func_id == special_kfunc_list[KF_bpf_list_push_back] ||
11297 	       func_id == special_kfunc_list[KF_bpf_list_push_back_impl];
11298 }
11299 
is_bpf_rbtree_add_kfunc(u32 func_id)11300 static bool is_bpf_rbtree_add_kfunc(u32 func_id)
11301 {
11302 	return func_id == special_kfunc_list[KF_bpf_rbtree_add] ||
11303 	       func_id == special_kfunc_list[KF_bpf_rbtree_add_impl];
11304 }
11305 
is_task_work_add_kfunc(u32 func_id)11306 static bool is_task_work_add_kfunc(u32 func_id)
11307 {
11308 	return func_id == special_kfunc_list[KF_bpf_task_work_schedule_signal] ||
11309 	       func_id == special_kfunc_list[KF_bpf_task_work_schedule_resume];
11310 }
11311 
is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta * meta)11312 static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
11313 {
11314 	if (is_bpf_refcount_acquire_kfunc(meta->func_id) && meta->arg_owning_ref)
11315 		return false;
11316 
11317 	return meta->kfunc_flags & KF_RET_NULL;
11318 }
11319 
is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta * meta)11320 static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta)
11321 {
11322 	return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_lock];
11323 }
11324 
is_kfunc_bpf_rcu_read_unlock(struct bpf_kfunc_call_arg_meta * meta)11325 static bool is_kfunc_bpf_rcu_read_unlock(struct bpf_kfunc_call_arg_meta *meta)
11326 {
11327 	return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_unlock];
11328 }
11329 
is_kfunc_bpf_preempt_disable(struct bpf_kfunc_call_arg_meta * meta)11330 static bool is_kfunc_bpf_preempt_disable(struct bpf_kfunc_call_arg_meta *meta)
11331 {
11332 	return meta->func_id == special_kfunc_list[KF_bpf_preempt_disable];
11333 }
11334 
is_kfunc_bpf_preempt_enable(struct bpf_kfunc_call_arg_meta * meta)11335 static bool is_kfunc_bpf_preempt_enable(struct bpf_kfunc_call_arg_meta *meta)
11336 {
11337 	return meta->func_id == special_kfunc_list[KF_bpf_preempt_enable];
11338 }
11339 
bpf_is_kfunc_pkt_changing(struct bpf_kfunc_call_arg_meta * meta)11340 bool bpf_is_kfunc_pkt_changing(struct bpf_kfunc_call_arg_meta *meta)
11341 {
11342 	return meta->func_id == special_kfunc_list[KF_bpf_xdp_pull_data];
11343 }
11344 
11345 static enum kfunc_ptr_arg_type
get_kfunc_ptr_arg_type(struct bpf_verifier_env * env,struct bpf_kfunc_call_arg_meta * meta,const struct btf_type * t,const struct btf_type * ref_t,const char * ref_tname,const struct btf_param * args,int argno,int nargs)11346 get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
11347 		       struct bpf_kfunc_call_arg_meta *meta,
11348 		       const struct btf_type *t, const struct btf_type *ref_t,
11349 		       const char *ref_tname, const struct btf_param *args,
11350 		       int argno, int nargs)
11351 {
11352 	u32 regno = argno + 1;
11353 	struct bpf_reg_state *regs = cur_regs(env);
11354 	struct bpf_reg_state *reg = &regs[regno];
11355 	bool arg_mem_size = false;
11356 
11357 	if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] ||
11358 	    meta->func_id == special_kfunc_list[KF_bpf_session_is_return] ||
11359 	    meta->func_id == special_kfunc_list[KF_bpf_session_cookie])
11360 		return KF_ARG_PTR_TO_CTX;
11361 
11362 	if (argno + 1 < nargs &&
11363 	    (is_kfunc_arg_mem_size(meta->btf, &args[argno + 1], &regs[regno + 1]) ||
11364 	     is_kfunc_arg_const_mem_size(meta->btf, &args[argno + 1], &regs[regno + 1])))
11365 		arg_mem_size = true;
11366 
11367 	/* In this function, we verify the kfunc's BTF as per the argument type,
11368 	 * leaving the rest of the verification with respect to the register
11369 	 * type to our caller. When a set of conditions hold in the BTF type of
11370 	 * arguments, we resolve it to a known kfunc_ptr_arg_type.
11371 	 */
11372 	if (btf_is_prog_ctx_type(&env->log, meta->btf, t, resolve_prog_type(env->prog), argno))
11373 		return KF_ARG_PTR_TO_CTX;
11374 
11375 	if (is_kfunc_arg_nullable(meta->btf, &args[argno]) && bpf_register_is_null(reg) &&
11376 	    !arg_mem_size)
11377 		return KF_ARG_PTR_TO_NULL;
11378 
11379 	if (is_kfunc_arg_alloc_obj(meta->btf, &args[argno]))
11380 		return KF_ARG_PTR_TO_ALLOC_BTF_ID;
11381 
11382 	if (is_kfunc_arg_refcounted_kptr(meta->btf, &args[argno]))
11383 		return KF_ARG_PTR_TO_REFCOUNTED_KPTR;
11384 
11385 	if (is_kfunc_arg_dynptr(meta->btf, &args[argno]))
11386 		return KF_ARG_PTR_TO_DYNPTR;
11387 
11388 	if (is_kfunc_arg_iter(meta, argno, &args[argno]))
11389 		return KF_ARG_PTR_TO_ITER;
11390 
11391 	if (is_kfunc_arg_list_head(meta->btf, &args[argno]))
11392 		return KF_ARG_PTR_TO_LIST_HEAD;
11393 
11394 	if (is_kfunc_arg_list_node(meta->btf, &args[argno]))
11395 		return KF_ARG_PTR_TO_LIST_NODE;
11396 
11397 	if (is_kfunc_arg_rbtree_root(meta->btf, &args[argno]))
11398 		return KF_ARG_PTR_TO_RB_ROOT;
11399 
11400 	if (is_kfunc_arg_rbtree_node(meta->btf, &args[argno]))
11401 		return KF_ARG_PTR_TO_RB_NODE;
11402 
11403 	if (is_kfunc_arg_const_str(meta->btf, &args[argno]))
11404 		return KF_ARG_PTR_TO_CONST_STR;
11405 
11406 	if (is_kfunc_arg_map(meta->btf, &args[argno]))
11407 		return KF_ARG_PTR_TO_MAP;
11408 
11409 	if (is_kfunc_arg_wq(meta->btf, &args[argno]))
11410 		return KF_ARG_PTR_TO_WORKQUEUE;
11411 
11412 	if (is_kfunc_arg_timer(meta->btf, &args[argno]))
11413 		return KF_ARG_PTR_TO_TIMER;
11414 
11415 	if (is_kfunc_arg_task_work(meta->btf, &args[argno]))
11416 		return KF_ARG_PTR_TO_TASK_WORK;
11417 
11418 	if (is_kfunc_arg_irq_flag(meta->btf, &args[argno]))
11419 		return KF_ARG_PTR_TO_IRQ_FLAG;
11420 
11421 	if (is_kfunc_arg_res_spin_lock(meta->btf, &args[argno]))
11422 		return KF_ARG_PTR_TO_RES_SPIN_LOCK;
11423 
11424 	if ((base_type(reg->type) == PTR_TO_BTF_ID || reg2btf_ids[base_type(reg->type)])) {
11425 		if (!btf_type_is_struct(ref_t)) {
11426 			verbose(env, "kernel function %s args#%d pointer type %s %s is not supported\n",
11427 				meta->func_name, argno, btf_type_str(ref_t), ref_tname);
11428 			return -EINVAL;
11429 		}
11430 		return KF_ARG_PTR_TO_BTF_ID;
11431 	}
11432 
11433 	if (is_kfunc_arg_callback(env, meta->btf, &args[argno]))
11434 		return KF_ARG_PTR_TO_CALLBACK;
11435 
11436 	/* This is the catch all argument type of register types supported by
11437 	 * check_helper_mem_access. However, we only allow when argument type is
11438 	 * pointer to scalar, or struct composed (recursively) of scalars. When
11439 	 * arg_mem_size is true, the pointer can be void *.
11440 	 */
11441 	if (!btf_type_is_scalar(ref_t) && !__btf_type_is_scalar_struct(env, meta->btf, ref_t, 0) &&
11442 	    (arg_mem_size ? !btf_type_is_void(ref_t) : 1)) {
11443 		verbose(env, "arg#%d pointer type %s %s must point to %sscalar, or struct with scalar\n",
11444 			argno, btf_type_str(ref_t), ref_tname, arg_mem_size ? "void, " : "");
11445 		return -EINVAL;
11446 	}
11447 	return arg_mem_size ? KF_ARG_PTR_TO_MEM_SIZE : KF_ARG_PTR_TO_MEM;
11448 }
11449 
process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env * env,struct bpf_reg_state * reg,const struct btf_type * ref_t,const char * ref_tname,u32 ref_id,struct bpf_kfunc_call_arg_meta * meta,int argno)11450 static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env,
11451 					struct bpf_reg_state *reg,
11452 					const struct btf_type *ref_t,
11453 					const char *ref_tname, u32 ref_id,
11454 					struct bpf_kfunc_call_arg_meta *meta,
11455 					int argno)
11456 {
11457 	const struct btf_type *reg_ref_t;
11458 	bool strict_type_match = false;
11459 	const struct btf *reg_btf;
11460 	const char *reg_ref_tname;
11461 	bool taking_projection;
11462 	bool struct_same;
11463 	u32 reg_ref_id;
11464 
11465 	if (base_type(reg->type) == PTR_TO_BTF_ID) {
11466 		reg_btf = reg->btf;
11467 		reg_ref_id = reg->btf_id;
11468 	} else {
11469 		reg_btf = btf_vmlinux;
11470 		reg_ref_id = *reg2btf_ids[base_type(reg->type)];
11471 	}
11472 
11473 	/* Enforce strict type matching for calls to kfuncs that are acquiring
11474 	 * or releasing a reference, or are no-cast aliases. We do _not_
11475 	 * enforce strict matching for kfuncs by default,
11476 	 * as we want to enable BPF programs to pass types that are bitwise
11477 	 * equivalent without forcing them to explicitly cast with something
11478 	 * like bpf_cast_to_kern_ctx().
11479 	 *
11480 	 * For example, say we had a type like the following:
11481 	 *
11482 	 * struct bpf_cpumask {
11483 	 *	cpumask_t cpumask;
11484 	 *	refcount_t usage;
11485 	 * };
11486 	 *
11487 	 * Note that as specified in <linux/cpumask.h>, cpumask_t is typedef'ed
11488 	 * to a struct cpumask, so it would be safe to pass a struct
11489 	 * bpf_cpumask * to a kfunc expecting a struct cpumask *.
11490 	 *
11491 	 * The philosophy here is similar to how we allow scalars of different
11492 	 * types to be passed to kfuncs as long as the size is the same. The
11493 	 * only difference here is that we're simply allowing
11494 	 * btf_struct_ids_match() to walk the struct at the 0th offset, and
11495 	 * resolve types.
11496 	 */
11497 	if ((is_kfunc_release(meta) && reg->ref_obj_id) ||
11498 	    btf_type_ids_nocast_alias(&env->log, reg_btf, reg_ref_id, meta->btf, ref_id))
11499 		strict_type_match = true;
11500 
11501 	WARN_ON_ONCE(is_kfunc_release(meta) && !tnum_is_const(reg->var_off));
11502 
11503 	reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id, &reg_ref_id);
11504 	reg_ref_tname = btf_name_by_offset(reg_btf, reg_ref_t->name_off);
11505 	struct_same = btf_struct_ids_match(&env->log, reg_btf, reg_ref_id, reg->var_off.value,
11506 					   meta->btf, ref_id, strict_type_match);
11507 	/* If kfunc is accepting a projection type (ie. __sk_buff), it cannot
11508 	 * actually use it -- it must cast to the underlying type. So we allow
11509 	 * caller to pass in the underlying type.
11510 	 */
11511 	taking_projection = btf_is_projection_of(ref_tname, reg_ref_tname);
11512 	if (!taking_projection && !struct_same) {
11513 		verbose(env, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n",
11514 			meta->func_name, argno, btf_type_str(ref_t), ref_tname, argno + 1,
11515 			btf_type_str(reg_ref_t), reg_ref_tname);
11516 		return -EINVAL;
11517 	}
11518 	return 0;
11519 }
11520 
process_irq_flag(struct bpf_verifier_env * env,int regno,struct bpf_kfunc_call_arg_meta * meta)11521 static int process_irq_flag(struct bpf_verifier_env *env, int regno,
11522 			     struct bpf_kfunc_call_arg_meta *meta)
11523 {
11524 	struct bpf_reg_state *reg = reg_state(env, regno);
11525 	int err, kfunc_class = IRQ_NATIVE_KFUNC;
11526 	bool irq_save;
11527 
11528 	if (meta->func_id == special_kfunc_list[KF_bpf_local_irq_save] ||
11529 	    meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave]) {
11530 		irq_save = true;
11531 		if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave])
11532 			kfunc_class = IRQ_LOCK_KFUNC;
11533 	} else if (meta->func_id == special_kfunc_list[KF_bpf_local_irq_restore] ||
11534 		   meta->func_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore]) {
11535 		irq_save = false;
11536 		if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore])
11537 			kfunc_class = IRQ_LOCK_KFUNC;
11538 	} else {
11539 		verifier_bug(env, "unknown irq flags kfunc");
11540 		return -EFAULT;
11541 	}
11542 
11543 	if (irq_save) {
11544 		if (!is_irq_flag_reg_valid_uninit(env, reg)) {
11545 			verbose(env, "expected uninitialized irq flag as arg#%d\n", regno - 1);
11546 			return -EINVAL;
11547 		}
11548 
11549 		err = check_mem_access(env, env->insn_idx, regno, 0, BPF_DW, BPF_WRITE, -1, false, false);
11550 		if (err)
11551 			return err;
11552 
11553 		err = mark_stack_slot_irq_flag(env, meta, reg, env->insn_idx, kfunc_class);
11554 		if (err)
11555 			return err;
11556 	} else {
11557 		err = is_irq_flag_reg_valid_init(env, reg);
11558 		if (err) {
11559 			verbose(env, "expected an initialized irq flag as arg#%d\n", regno - 1);
11560 			return err;
11561 		}
11562 
11563 		err = mark_irq_flag_read(env, reg);
11564 		if (err)
11565 			return err;
11566 
11567 		err = unmark_stack_slot_irq_flag(env, reg, kfunc_class);
11568 		if (err)
11569 			return err;
11570 	}
11571 	return 0;
11572 }
11573 
11574 
ref_set_non_owning(struct bpf_verifier_env * env,struct bpf_reg_state * reg)11575 static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
11576 {
11577 	struct btf_record *rec = reg_btf_record(reg);
11578 
11579 	if (!env->cur_state->active_locks) {
11580 		verifier_bug(env, "%s w/o active lock", __func__);
11581 		return -EFAULT;
11582 	}
11583 
11584 	if (type_flag(reg->type) & NON_OWN_REF) {
11585 		verifier_bug(env, "NON_OWN_REF already set");
11586 		return -EFAULT;
11587 	}
11588 
11589 	reg->type |= NON_OWN_REF;
11590 	if (rec->refcount_off >= 0)
11591 		reg->type |= MEM_RCU;
11592 
11593 	return 0;
11594 }
11595 
ref_convert_owning_non_owning(struct bpf_verifier_env * env,u32 ref_obj_id)11596 static int ref_convert_owning_non_owning(struct bpf_verifier_env *env, u32 ref_obj_id)
11597 {
11598 	struct bpf_verifier_state *state = env->cur_state;
11599 	struct bpf_func_state *unused;
11600 	struct bpf_reg_state *reg;
11601 	int i;
11602 
11603 	if (!ref_obj_id) {
11604 		verifier_bug(env, "ref_obj_id is zero for owning -> non-owning conversion");
11605 		return -EFAULT;
11606 	}
11607 
11608 	for (i = 0; i < state->acquired_refs; i++) {
11609 		if (state->refs[i].id != ref_obj_id)
11610 			continue;
11611 
11612 		/* Clear ref_obj_id here so release_reference doesn't clobber
11613 		 * the whole reg
11614 		 */
11615 		bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({
11616 			if (reg->ref_obj_id == ref_obj_id) {
11617 				reg->ref_obj_id = 0;
11618 				ref_set_non_owning(env, reg);
11619 			}
11620 		}));
11621 		return 0;
11622 	}
11623 
11624 	verifier_bug(env, "ref state missing for ref_obj_id");
11625 	return -EFAULT;
11626 }
11627 
11628 /* Implementation details:
11629  *
11630  * Each register points to some region of memory, which we define as an
11631  * allocation. Each allocation may embed a bpf_spin_lock which protects any
11632  * special BPF objects (bpf_list_head, bpf_rb_root, etc.) part of the same
11633  * allocation. The lock and the data it protects are colocated in the same
11634  * memory region.
11635  *
11636  * Hence, everytime a register holds a pointer value pointing to such
11637  * allocation, the verifier preserves a unique reg->id for it.
11638  *
11639  * The verifier remembers the lock 'ptr' and the lock 'id' whenever
11640  * bpf_spin_lock is called.
11641  *
11642  * To enable this, lock state in the verifier captures two values:
11643  *	active_lock.ptr = Register's type specific pointer
11644  *	active_lock.id  = A unique ID for each register pointer value
11645  *
11646  * Currently, PTR_TO_MAP_VALUE and PTR_TO_BTF_ID | MEM_ALLOC are the two
11647  * supported register types.
11648  *
11649  * The active_lock.ptr in case of map values is the reg->map_ptr, and in case of
11650  * allocated objects is the reg->btf pointer.
11651  *
11652  * The active_lock.id is non-unique for maps supporting direct_value_addr, as we
11653  * can establish the provenance of the map value statically for each distinct
11654  * lookup into such maps. They always contain a single map value hence unique
11655  * IDs for each pseudo load pessimizes the algorithm and rejects valid programs.
11656  *
11657  * So, in case of global variables, they use array maps with max_entries = 1,
11658  * hence their active_lock.ptr becomes map_ptr and id = 0 (since they all point
11659  * into the same map value as max_entries is 1, as described above).
11660  *
11661  * In case of inner map lookups, the inner map pointer has same map_ptr as the
11662  * outer map pointer (in verifier context), but each lookup into an inner map
11663  * assigns a fresh reg->id to the lookup, so while lookups into distinct inner
11664  * maps from the same outer map share the same map_ptr as active_lock.ptr, they
11665  * will get different reg->id assigned to each lookup, hence different
11666  * active_lock.id.
11667  *
11668  * In case of allocated objects, active_lock.ptr is the reg->btf, and the
11669  * reg->id is a unique ID preserved after the NULL pointer check on the pointer
11670  * returned from bpf_obj_new. Each allocation receives a new reg->id.
11671  */
check_reg_allocation_locked(struct bpf_verifier_env * env,struct bpf_reg_state * reg)11672 static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
11673 {
11674 	struct bpf_reference_state *s;
11675 	void *ptr;
11676 	u32 id;
11677 
11678 	switch ((int)reg->type) {
11679 	case PTR_TO_MAP_VALUE:
11680 		ptr = reg->map_ptr;
11681 		break;
11682 	case PTR_TO_BTF_ID | MEM_ALLOC:
11683 		ptr = reg->btf;
11684 		break;
11685 	default:
11686 		verifier_bug(env, "unknown reg type for lock check");
11687 		return -EFAULT;
11688 	}
11689 	id = reg->id;
11690 
11691 	if (!env->cur_state->active_locks)
11692 		return -EINVAL;
11693 	s = find_lock_state(env->cur_state, REF_TYPE_LOCK_MASK, id, ptr);
11694 	if (!s) {
11695 		verbose(env, "held lock and object are not in the same allocation\n");
11696 		return -EINVAL;
11697 	}
11698 	return 0;
11699 }
11700 
is_bpf_list_api_kfunc(u32 btf_id)11701 static bool is_bpf_list_api_kfunc(u32 btf_id)
11702 {
11703 	return is_bpf_list_push_kfunc(btf_id) ||
11704 	       btf_id == special_kfunc_list[KF_bpf_list_pop_front] ||
11705 	       btf_id == special_kfunc_list[KF_bpf_list_pop_back] ||
11706 	       btf_id == special_kfunc_list[KF_bpf_list_front] ||
11707 	       btf_id == special_kfunc_list[KF_bpf_list_back];
11708 }
11709 
is_bpf_rbtree_api_kfunc(u32 btf_id)11710 static bool is_bpf_rbtree_api_kfunc(u32 btf_id)
11711 {
11712 	return is_bpf_rbtree_add_kfunc(btf_id) ||
11713 	       btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
11714 	       btf_id == special_kfunc_list[KF_bpf_rbtree_first] ||
11715 	       btf_id == special_kfunc_list[KF_bpf_rbtree_root] ||
11716 	       btf_id == special_kfunc_list[KF_bpf_rbtree_left] ||
11717 	       btf_id == special_kfunc_list[KF_bpf_rbtree_right];
11718 }
11719 
is_bpf_iter_num_api_kfunc(u32 btf_id)11720 static bool is_bpf_iter_num_api_kfunc(u32 btf_id)
11721 {
11722 	return btf_id == special_kfunc_list[KF_bpf_iter_num_new] ||
11723 	       btf_id == special_kfunc_list[KF_bpf_iter_num_next] ||
11724 	       btf_id == special_kfunc_list[KF_bpf_iter_num_destroy];
11725 }
11726 
is_bpf_graph_api_kfunc(u32 btf_id)11727 static bool is_bpf_graph_api_kfunc(u32 btf_id)
11728 {
11729 	return is_bpf_list_api_kfunc(btf_id) ||
11730 	       is_bpf_rbtree_api_kfunc(btf_id) ||
11731 	       is_bpf_refcount_acquire_kfunc(btf_id);
11732 }
11733 
is_bpf_res_spin_lock_kfunc(u32 btf_id)11734 static bool is_bpf_res_spin_lock_kfunc(u32 btf_id)
11735 {
11736 	return btf_id == special_kfunc_list[KF_bpf_res_spin_lock] ||
11737 	       btf_id == special_kfunc_list[KF_bpf_res_spin_unlock] ||
11738 	       btf_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave] ||
11739 	       btf_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore];
11740 }
11741 
is_bpf_arena_kfunc(u32 btf_id)11742 static bool is_bpf_arena_kfunc(u32 btf_id)
11743 {
11744 	return btf_id == special_kfunc_list[KF_bpf_arena_alloc_pages] ||
11745 	       btf_id == special_kfunc_list[KF_bpf_arena_free_pages] ||
11746 	       btf_id == special_kfunc_list[KF_bpf_arena_reserve_pages];
11747 }
11748 
is_bpf_stream_kfunc(u32 btf_id)11749 static bool is_bpf_stream_kfunc(u32 btf_id)
11750 {
11751 	return btf_id == special_kfunc_list[KF_bpf_stream_vprintk] ||
11752 	       btf_id == special_kfunc_list[KF_bpf_stream_print_stack];
11753 }
11754 
kfunc_spin_allowed(u32 btf_id)11755 static bool kfunc_spin_allowed(u32 btf_id)
11756 {
11757 	return is_bpf_graph_api_kfunc(btf_id) || is_bpf_iter_num_api_kfunc(btf_id) ||
11758 	       is_bpf_res_spin_lock_kfunc(btf_id) || is_bpf_arena_kfunc(btf_id) ||
11759 	       is_bpf_stream_kfunc(btf_id);
11760 }
11761 
is_sync_callback_calling_kfunc(u32 btf_id)11762 static bool is_sync_callback_calling_kfunc(u32 btf_id)
11763 {
11764 	return is_bpf_rbtree_add_kfunc(btf_id);
11765 }
11766 
is_async_callback_calling_kfunc(u32 btf_id)11767 static bool is_async_callback_calling_kfunc(u32 btf_id)
11768 {
11769 	return is_bpf_wq_set_callback_kfunc(btf_id) ||
11770 	       is_task_work_add_kfunc(btf_id);
11771 }
11772 
is_bpf_throw_kfunc(struct bpf_insn * insn)11773 static bool is_bpf_throw_kfunc(struct bpf_insn *insn)
11774 {
11775 	return bpf_pseudo_kfunc_call(insn) && insn->off == 0 &&
11776 	       insn->imm == special_kfunc_list[KF_bpf_throw];
11777 }
11778 
is_bpf_wq_set_callback_kfunc(u32 btf_id)11779 static bool is_bpf_wq_set_callback_kfunc(u32 btf_id)
11780 {
11781 	return btf_id == special_kfunc_list[KF_bpf_wq_set_callback];
11782 }
11783 
is_callback_calling_kfunc(u32 btf_id)11784 static bool is_callback_calling_kfunc(u32 btf_id)
11785 {
11786 	return is_sync_callback_calling_kfunc(btf_id) ||
11787 	       is_async_callback_calling_kfunc(btf_id);
11788 }
11789 
is_rbtree_lock_required_kfunc(u32 btf_id)11790 static bool is_rbtree_lock_required_kfunc(u32 btf_id)
11791 {
11792 	return is_bpf_rbtree_api_kfunc(btf_id);
11793 }
11794 
check_kfunc_is_graph_root_api(struct bpf_verifier_env * env,enum btf_field_type head_field_type,u32 kfunc_btf_id)11795 static bool check_kfunc_is_graph_root_api(struct bpf_verifier_env *env,
11796 					  enum btf_field_type head_field_type,
11797 					  u32 kfunc_btf_id)
11798 {
11799 	bool ret;
11800 
11801 	switch (head_field_type) {
11802 	case BPF_LIST_HEAD:
11803 		ret = is_bpf_list_api_kfunc(kfunc_btf_id);
11804 		break;
11805 	case BPF_RB_ROOT:
11806 		ret = is_bpf_rbtree_api_kfunc(kfunc_btf_id);
11807 		break;
11808 	default:
11809 		verbose(env, "verifier internal error: unexpected graph root argument type %s\n",
11810 			btf_field_type_name(head_field_type));
11811 		return false;
11812 	}
11813 
11814 	if (!ret)
11815 		verbose(env, "verifier internal error: %s head arg for unknown kfunc\n",
11816 			btf_field_type_name(head_field_type));
11817 	return ret;
11818 }
11819 
check_kfunc_is_graph_node_api(struct bpf_verifier_env * env,enum btf_field_type node_field_type,u32 kfunc_btf_id)11820 static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env *env,
11821 					  enum btf_field_type node_field_type,
11822 					  u32 kfunc_btf_id)
11823 {
11824 	bool ret;
11825 
11826 	switch (node_field_type) {
11827 	case BPF_LIST_NODE:
11828 		ret = is_bpf_list_push_kfunc(kfunc_btf_id);
11829 		break;
11830 	case BPF_RB_NODE:
11831 		ret = (is_bpf_rbtree_add_kfunc(kfunc_btf_id) ||
11832 		       kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
11833 		       kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_left] ||
11834 		       kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_right]);
11835 		break;
11836 	default:
11837 		verbose(env, "verifier internal error: unexpected graph node argument type %s\n",
11838 			btf_field_type_name(node_field_type));
11839 		return false;
11840 	}
11841 
11842 	if (!ret)
11843 		verbose(env, "verifier internal error: %s node arg for unknown kfunc\n",
11844 			btf_field_type_name(node_field_type));
11845 	return ret;
11846 }
11847 
11848 static int
__process_kf_arg_ptr_to_graph_root(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno,struct bpf_kfunc_call_arg_meta * meta,enum btf_field_type head_field_type,struct btf_field ** head_field)11849 __process_kf_arg_ptr_to_graph_root(struct bpf_verifier_env *env,
11850 				   struct bpf_reg_state *reg, u32 regno,
11851 				   struct bpf_kfunc_call_arg_meta *meta,
11852 				   enum btf_field_type head_field_type,
11853 				   struct btf_field **head_field)
11854 {
11855 	const char *head_type_name;
11856 	struct btf_field *field;
11857 	struct btf_record *rec;
11858 	u32 head_off;
11859 
11860 	if (meta->btf != btf_vmlinux) {
11861 		verifier_bug(env, "unexpected btf mismatch in kfunc call");
11862 		return -EFAULT;
11863 	}
11864 
11865 	if (!check_kfunc_is_graph_root_api(env, head_field_type, meta->func_id))
11866 		return -EFAULT;
11867 
11868 	head_type_name = btf_field_type_name(head_field_type);
11869 	if (!tnum_is_const(reg->var_off)) {
11870 		verbose(env,
11871 			"R%d doesn't have constant offset. %s has to be at the constant offset\n",
11872 			regno, head_type_name);
11873 		return -EINVAL;
11874 	}
11875 
11876 	rec = reg_btf_record(reg);
11877 	head_off = reg->var_off.value;
11878 	field = btf_record_find(rec, head_off, head_field_type);
11879 	if (!field) {
11880 		verbose(env, "%s not found at offset=%u\n", head_type_name, head_off);
11881 		return -EINVAL;
11882 	}
11883 
11884 	/* All functions require bpf_list_head to be protected using a bpf_spin_lock */
11885 	if (check_reg_allocation_locked(env, reg)) {
11886 		verbose(env, "bpf_spin_lock at off=%d must be held for %s\n",
11887 			rec->spin_lock_off, head_type_name);
11888 		return -EINVAL;
11889 	}
11890 
11891 	if (*head_field) {
11892 		verifier_bug(env, "repeating %s arg", head_type_name);
11893 		return -EFAULT;
11894 	}
11895 	*head_field = field;
11896 	return 0;
11897 }
11898 
process_kf_arg_ptr_to_list_head(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno,struct bpf_kfunc_call_arg_meta * meta)11899 static int process_kf_arg_ptr_to_list_head(struct bpf_verifier_env *env,
11900 					   struct bpf_reg_state *reg, u32 regno,
11901 					   struct bpf_kfunc_call_arg_meta *meta)
11902 {
11903 	return __process_kf_arg_ptr_to_graph_root(env, reg, regno, meta, BPF_LIST_HEAD,
11904 							  &meta->arg_list_head.field);
11905 }
11906 
process_kf_arg_ptr_to_rbtree_root(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno,struct bpf_kfunc_call_arg_meta * meta)11907 static int process_kf_arg_ptr_to_rbtree_root(struct bpf_verifier_env *env,
11908 					     struct bpf_reg_state *reg, u32 regno,
11909 					     struct bpf_kfunc_call_arg_meta *meta)
11910 {
11911 	return __process_kf_arg_ptr_to_graph_root(env, reg, regno, meta, BPF_RB_ROOT,
11912 							  &meta->arg_rbtree_root.field);
11913 }
11914 
11915 static int
__process_kf_arg_ptr_to_graph_node(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno,struct bpf_kfunc_call_arg_meta * meta,enum btf_field_type head_field_type,enum btf_field_type node_field_type,struct btf_field ** node_field)11916 __process_kf_arg_ptr_to_graph_node(struct bpf_verifier_env *env,
11917 				   struct bpf_reg_state *reg, u32 regno,
11918 				   struct bpf_kfunc_call_arg_meta *meta,
11919 				   enum btf_field_type head_field_type,
11920 				   enum btf_field_type node_field_type,
11921 				   struct btf_field **node_field)
11922 {
11923 	const char *node_type_name;
11924 	const struct btf_type *et, *t;
11925 	struct btf_field *field;
11926 	u32 node_off;
11927 
11928 	if (meta->btf != btf_vmlinux) {
11929 		verifier_bug(env, "unexpected btf mismatch in kfunc call");
11930 		return -EFAULT;
11931 	}
11932 
11933 	if (!check_kfunc_is_graph_node_api(env, node_field_type, meta->func_id))
11934 		return -EFAULT;
11935 
11936 	node_type_name = btf_field_type_name(node_field_type);
11937 	if (!tnum_is_const(reg->var_off)) {
11938 		verbose(env,
11939 			"R%d doesn't have constant offset. %s has to be at the constant offset\n",
11940 			regno, node_type_name);
11941 		return -EINVAL;
11942 	}
11943 
11944 	node_off = reg->var_off.value;
11945 	field = reg_find_field_offset(reg, node_off, node_field_type);
11946 	if (!field) {
11947 		verbose(env, "%s not found at offset=%u\n", node_type_name, node_off);
11948 		return -EINVAL;
11949 	}
11950 
11951 	field = *node_field;
11952 
11953 	et = btf_type_by_id(field->graph_root.btf, field->graph_root.value_btf_id);
11954 	t = btf_type_by_id(reg->btf, reg->btf_id);
11955 	if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, 0, field->graph_root.btf,
11956 				  field->graph_root.value_btf_id, true)) {
11957 		verbose(env, "operation on %s expects arg#1 %s at offset=%d "
11958 			"in struct %s, but arg is at offset=%d in struct %s\n",
11959 			btf_field_type_name(head_field_type),
11960 			btf_field_type_name(node_field_type),
11961 			field->graph_root.node_offset,
11962 			btf_name_by_offset(field->graph_root.btf, et->name_off),
11963 			node_off, btf_name_by_offset(reg->btf, t->name_off));
11964 		return -EINVAL;
11965 	}
11966 	meta->arg_btf = reg->btf;
11967 	meta->arg_btf_id = reg->btf_id;
11968 
11969 	if (node_off != field->graph_root.node_offset) {
11970 		verbose(env, "arg#1 offset=%d, but expected %s at offset=%d in struct %s\n",
11971 			node_off, btf_field_type_name(node_field_type),
11972 			field->graph_root.node_offset,
11973 			btf_name_by_offset(field->graph_root.btf, et->name_off));
11974 		return -EINVAL;
11975 	}
11976 
11977 	return 0;
11978 }
11979 
process_kf_arg_ptr_to_list_node(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno,struct bpf_kfunc_call_arg_meta * meta)11980 static int process_kf_arg_ptr_to_list_node(struct bpf_verifier_env *env,
11981 					   struct bpf_reg_state *reg, u32 regno,
11982 					   struct bpf_kfunc_call_arg_meta *meta)
11983 {
11984 	return __process_kf_arg_ptr_to_graph_node(env, reg, regno, meta,
11985 						  BPF_LIST_HEAD, BPF_LIST_NODE,
11986 						  &meta->arg_list_head.field);
11987 }
11988 
process_kf_arg_ptr_to_rbtree_node(struct bpf_verifier_env * env,struct bpf_reg_state * reg,u32 regno,struct bpf_kfunc_call_arg_meta * meta)11989 static int process_kf_arg_ptr_to_rbtree_node(struct bpf_verifier_env *env,
11990 					     struct bpf_reg_state *reg, u32 regno,
11991 					     struct bpf_kfunc_call_arg_meta *meta)
11992 {
11993 	return __process_kf_arg_ptr_to_graph_node(env, reg, regno, meta,
11994 						  BPF_RB_ROOT, BPF_RB_NODE,
11995 						  &meta->arg_rbtree_root.field);
11996 }
11997 
11998 /*
11999  * css_task iter allowlist is needed to avoid dead locking on css_set_lock.
12000  * LSM hooks and iters (both sleepable and non-sleepable) are safe.
12001  * Any sleepable progs are also safe since bpf_check_attach_target() enforce
12002  * them can only be attached to some specific hook points.
12003  */
check_css_task_iter_allowlist(struct bpf_verifier_env * env)12004 static bool check_css_task_iter_allowlist(struct bpf_verifier_env *env)
12005 {
12006 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
12007 
12008 	switch (prog_type) {
12009 	case BPF_PROG_TYPE_LSM:
12010 		return true;
12011 	case BPF_PROG_TYPE_TRACING:
12012 		if (env->prog->expected_attach_type == BPF_TRACE_ITER)
12013 			return true;
12014 		fallthrough;
12015 	default:
12016 		return in_sleepable(env);
12017 	}
12018 }
12019 
check_kfunc_args(struct bpf_verifier_env * env,struct bpf_kfunc_call_arg_meta * meta,int insn_idx)12020 static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta,
12021 			    int insn_idx)
12022 {
12023 	const char *func_name = meta->func_name, *ref_tname;
12024 	const struct btf *btf = meta->btf;
12025 	const struct btf_param *args;
12026 	struct btf_record *rec;
12027 	u32 i, nargs;
12028 	int ret;
12029 
12030 	args = (const struct btf_param *)(meta->func_proto + 1);
12031 	nargs = btf_type_vlen(meta->func_proto);
12032 	if (nargs > MAX_BPF_FUNC_REG_ARGS) {
12033 		verbose(env, "Function %s has %d > %d args\n", func_name, nargs,
12034 			MAX_BPF_FUNC_REG_ARGS);
12035 		return -EINVAL;
12036 	}
12037 
12038 	/* Check that BTF function arguments match actual types that the
12039 	 * verifier sees.
12040 	 */
12041 	for (i = 0; i < nargs; i++) {
12042 		struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[i + 1];
12043 		const struct btf_type *t, *ref_t, *resolve_ret;
12044 		enum bpf_arg_type arg_type = ARG_DONTCARE;
12045 		u32 regno = i + 1, ref_id, type_size;
12046 		bool is_ret_buf_sz = false;
12047 		int kf_arg_type;
12048 
12049 		if (is_kfunc_arg_prog_aux(btf, &args[i])) {
12050 			/* Reject repeated use bpf_prog_aux */
12051 			if (meta->arg_prog) {
12052 				verifier_bug(env, "Only 1 prog->aux argument supported per-kfunc");
12053 				return -EFAULT;
12054 			}
12055 			meta->arg_prog = true;
12056 			cur_aux(env)->arg_prog = regno;
12057 			continue;
12058 		}
12059 
12060 		if (is_kfunc_arg_ignore(btf, &args[i]) || is_kfunc_arg_implicit(meta, i))
12061 			continue;
12062 
12063 		t = btf_type_skip_modifiers(btf, args[i].type, NULL);
12064 
12065 		if (btf_type_is_scalar(t)) {
12066 			if (reg->type != SCALAR_VALUE) {
12067 				verbose(env, "R%d is not a scalar\n", regno);
12068 				return -EINVAL;
12069 			}
12070 
12071 			if (is_kfunc_arg_constant(meta->btf, &args[i])) {
12072 				if (meta->arg_constant.found) {
12073 					verifier_bug(env, "only one constant argument permitted");
12074 					return -EFAULT;
12075 				}
12076 				if (!tnum_is_const(reg->var_off)) {
12077 					verbose(env, "R%d must be a known constant\n", regno);
12078 					return -EINVAL;
12079 				}
12080 				ret = mark_chain_precision(env, regno);
12081 				if (ret < 0)
12082 					return ret;
12083 				meta->arg_constant.found = true;
12084 				meta->arg_constant.value = reg->var_off.value;
12085 			} else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdonly_buf_size")) {
12086 				meta->r0_rdonly = true;
12087 				is_ret_buf_sz = true;
12088 			} else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdwr_buf_size")) {
12089 				is_ret_buf_sz = true;
12090 			}
12091 
12092 			if (is_ret_buf_sz) {
12093 				if (meta->r0_size) {
12094 					verbose(env, "2 or more rdonly/rdwr_buf_size parameters for kfunc");
12095 					return -EINVAL;
12096 				}
12097 
12098 				if (!tnum_is_const(reg->var_off)) {
12099 					verbose(env, "R%d is not a const\n", regno);
12100 					return -EINVAL;
12101 				}
12102 
12103 				meta->r0_size = reg->var_off.value;
12104 				ret = mark_chain_precision(env, regno);
12105 				if (ret)
12106 					return ret;
12107 			}
12108 			continue;
12109 		}
12110 
12111 		if (!btf_type_is_ptr(t)) {
12112 			verbose(env, "Unrecognized arg#%d type %s\n", i, btf_type_str(t));
12113 			return -EINVAL;
12114 		}
12115 
12116 		if ((bpf_register_is_null(reg) || type_may_be_null(reg->type)) &&
12117 		    !is_kfunc_arg_nullable(meta->btf, &args[i])) {
12118 			verbose(env, "Possibly NULL pointer passed to trusted arg%d\n", i);
12119 			return -EACCES;
12120 		}
12121 
12122 		if (reg->ref_obj_id) {
12123 			if (is_kfunc_release(meta) && meta->ref_obj_id) {
12124 				verifier_bug(env, "more than one arg with ref_obj_id R%d %u %u",
12125 					     regno, reg->ref_obj_id,
12126 					     meta->ref_obj_id);
12127 				return -EFAULT;
12128 			}
12129 			meta->ref_obj_id = reg->ref_obj_id;
12130 			if (is_kfunc_release(meta))
12131 				meta->release_regno = regno;
12132 		}
12133 
12134 		ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id);
12135 		ref_tname = btf_name_by_offset(btf, ref_t->name_off);
12136 
12137 		kf_arg_type = get_kfunc_ptr_arg_type(env, meta, t, ref_t, ref_tname, args, i, nargs);
12138 		if (kf_arg_type < 0)
12139 			return kf_arg_type;
12140 
12141 		switch (kf_arg_type) {
12142 		case KF_ARG_PTR_TO_NULL:
12143 			continue;
12144 		case KF_ARG_PTR_TO_MAP:
12145 			if (!reg->map_ptr) {
12146 				verbose(env, "pointer in R%d isn't map pointer\n", regno);
12147 				return -EINVAL;
12148 			}
12149 			if (meta->map.ptr && (reg->map_ptr->record->wq_off >= 0 ||
12150 					      reg->map_ptr->record->task_work_off >= 0)) {
12151 				/* Use map_uid (which is unique id of inner map) to reject:
12152 				 * inner_map1 = bpf_map_lookup_elem(outer_map, key1)
12153 				 * inner_map2 = bpf_map_lookup_elem(outer_map, key2)
12154 				 * if (inner_map1 && inner_map2) {
12155 				 *     wq = bpf_map_lookup_elem(inner_map1);
12156 				 *     if (wq)
12157 				 *         // mismatch would have been allowed
12158 				 *         bpf_wq_init(wq, inner_map2);
12159 				 * }
12160 				 *
12161 				 * Comparing map_ptr is enough to distinguish normal and outer maps.
12162 				 */
12163 				if (meta->map.ptr != reg->map_ptr ||
12164 				    meta->map.uid != reg->map_uid) {
12165 					if (reg->map_ptr->record->task_work_off >= 0) {
12166 						verbose(env,
12167 							"bpf_task_work pointer in R2 map_uid=%d doesn't match map pointer in R3 map_uid=%d\n",
12168 							meta->map.uid, reg->map_uid);
12169 						return -EINVAL;
12170 					}
12171 					verbose(env,
12172 						"workqueue pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n",
12173 						meta->map.uid, reg->map_uid);
12174 					return -EINVAL;
12175 				}
12176 			}
12177 			meta->map.ptr = reg->map_ptr;
12178 			meta->map.uid = reg->map_uid;
12179 			fallthrough;
12180 		case KF_ARG_PTR_TO_ALLOC_BTF_ID:
12181 		case KF_ARG_PTR_TO_BTF_ID:
12182 			if (!is_trusted_reg(reg)) {
12183 				if (!is_kfunc_rcu(meta)) {
12184 					verbose(env, "R%d must be referenced or trusted\n", regno);
12185 					return -EINVAL;
12186 				}
12187 				if (!is_rcu_reg(reg)) {
12188 					verbose(env, "R%d must be a rcu pointer\n", regno);
12189 					return -EINVAL;
12190 				}
12191 			}
12192 			fallthrough;
12193 		case KF_ARG_PTR_TO_DYNPTR:
12194 		case KF_ARG_PTR_TO_ITER:
12195 		case KF_ARG_PTR_TO_LIST_HEAD:
12196 		case KF_ARG_PTR_TO_LIST_NODE:
12197 		case KF_ARG_PTR_TO_RB_ROOT:
12198 		case KF_ARG_PTR_TO_RB_NODE:
12199 		case KF_ARG_PTR_TO_MEM:
12200 		case KF_ARG_PTR_TO_MEM_SIZE:
12201 		case KF_ARG_PTR_TO_CALLBACK:
12202 		case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
12203 		case KF_ARG_PTR_TO_CONST_STR:
12204 		case KF_ARG_PTR_TO_WORKQUEUE:
12205 		case KF_ARG_PTR_TO_TIMER:
12206 		case KF_ARG_PTR_TO_TASK_WORK:
12207 		case KF_ARG_PTR_TO_IRQ_FLAG:
12208 		case KF_ARG_PTR_TO_RES_SPIN_LOCK:
12209 			break;
12210 		case KF_ARG_PTR_TO_CTX:
12211 			arg_type = ARG_PTR_TO_CTX;
12212 			break;
12213 		default:
12214 			verifier_bug(env, "unknown kfunc arg type %d", kf_arg_type);
12215 			return -EFAULT;
12216 		}
12217 
12218 		if (is_kfunc_release(meta) && reg->ref_obj_id)
12219 			arg_type |= OBJ_RELEASE;
12220 		ret = check_func_arg_reg_off(env, reg, regno, arg_type);
12221 		if (ret < 0)
12222 			return ret;
12223 
12224 		switch (kf_arg_type) {
12225 		case KF_ARG_PTR_TO_CTX:
12226 			if (reg->type != PTR_TO_CTX) {
12227 				verbose(env, "arg#%d expected pointer to ctx, but got %s\n",
12228 					i, reg_type_str(env, reg->type));
12229 				return -EINVAL;
12230 			}
12231 
12232 			if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
12233 				ret = get_kern_ctx_btf_id(&env->log, resolve_prog_type(env->prog));
12234 				if (ret < 0)
12235 					return -EINVAL;
12236 				meta->ret_btf_id  = ret;
12237 			}
12238 			break;
12239 		case KF_ARG_PTR_TO_ALLOC_BTF_ID:
12240 			if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC)) {
12241 				if (!is_bpf_obj_drop_kfunc(meta->func_id)) {
12242 					verbose(env, "arg#%d expected for bpf_obj_drop()\n", i);
12243 					return -EINVAL;
12244 				}
12245 			} else if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC | MEM_PERCPU)) {
12246 				if (!is_bpf_percpu_obj_drop_kfunc(meta->func_id)) {
12247 					verbose(env, "arg#%d expected for bpf_percpu_obj_drop()\n", i);
12248 					return -EINVAL;
12249 				}
12250 			} else {
12251 				verbose(env, "arg#%d expected pointer to allocated object\n", i);
12252 				return -EINVAL;
12253 			}
12254 			if (!reg->ref_obj_id) {
12255 				verbose(env, "allocated object must be referenced\n");
12256 				return -EINVAL;
12257 			}
12258 			if (meta->btf == btf_vmlinux) {
12259 				meta->arg_btf = reg->btf;
12260 				meta->arg_btf_id = reg->btf_id;
12261 			}
12262 			break;
12263 		case KF_ARG_PTR_TO_DYNPTR:
12264 		{
12265 			enum bpf_arg_type dynptr_arg_type = ARG_PTR_TO_DYNPTR;
12266 			int clone_ref_obj_id = 0;
12267 
12268 			if (reg->type == CONST_PTR_TO_DYNPTR)
12269 				dynptr_arg_type |= MEM_RDONLY;
12270 
12271 			if (is_kfunc_arg_uninit(btf, &args[i]))
12272 				dynptr_arg_type |= MEM_UNINIT;
12273 
12274 			if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
12275 				dynptr_arg_type |= DYNPTR_TYPE_SKB;
12276 			} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_xdp]) {
12277 				dynptr_arg_type |= DYNPTR_TYPE_XDP;
12278 			} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb_meta]) {
12279 				dynptr_arg_type |= DYNPTR_TYPE_SKB_META;
12280 			} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_file]) {
12281 				dynptr_arg_type |= DYNPTR_TYPE_FILE;
12282 			} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_file_discard]) {
12283 				dynptr_arg_type |= DYNPTR_TYPE_FILE;
12284 				meta->release_regno = regno;
12285 			} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_clone] &&
12286 				   (dynptr_arg_type & MEM_UNINIT)) {
12287 				enum bpf_dynptr_type parent_type = meta->initialized_dynptr.type;
12288 
12289 				if (parent_type == BPF_DYNPTR_TYPE_INVALID) {
12290 					verifier_bug(env, "no dynptr type for parent of clone");
12291 					return -EFAULT;
12292 				}
12293 
12294 				dynptr_arg_type |= (unsigned int)get_dynptr_type_flag(parent_type);
12295 				clone_ref_obj_id = meta->initialized_dynptr.ref_obj_id;
12296 				if (dynptr_type_refcounted(parent_type) && !clone_ref_obj_id) {
12297 					verifier_bug(env, "missing ref obj id for parent of clone");
12298 					return -EFAULT;
12299 				}
12300 			}
12301 
12302 			ret = process_dynptr_func(env, regno, insn_idx, dynptr_arg_type, clone_ref_obj_id);
12303 			if (ret < 0)
12304 				return ret;
12305 
12306 			if (!(dynptr_arg_type & MEM_UNINIT)) {
12307 				int id = dynptr_id(env, reg);
12308 
12309 				if (id < 0) {
12310 					verifier_bug(env, "failed to obtain dynptr id");
12311 					return id;
12312 				}
12313 				meta->initialized_dynptr.id = id;
12314 				meta->initialized_dynptr.type = dynptr_get_type(env, reg);
12315 				meta->initialized_dynptr.ref_obj_id = dynptr_ref_obj_id(env, reg);
12316 			}
12317 
12318 			break;
12319 		}
12320 		case KF_ARG_PTR_TO_ITER:
12321 			if (meta->func_id == special_kfunc_list[KF_bpf_iter_css_task_new]) {
12322 				if (!check_css_task_iter_allowlist(env)) {
12323 					verbose(env, "css_task_iter is only allowed in bpf_lsm, bpf_iter and sleepable progs\n");
12324 					return -EINVAL;
12325 				}
12326 			}
12327 			ret = process_iter_arg(env, regno, insn_idx, meta);
12328 			if (ret < 0)
12329 				return ret;
12330 			break;
12331 		case KF_ARG_PTR_TO_LIST_HEAD:
12332 			if (reg->type != PTR_TO_MAP_VALUE &&
12333 			    reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
12334 				verbose(env, "arg#%d expected pointer to map value or allocated object\n", i);
12335 				return -EINVAL;
12336 			}
12337 			if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) && !reg->ref_obj_id) {
12338 				verbose(env, "allocated object must be referenced\n");
12339 				return -EINVAL;
12340 			}
12341 			ret = process_kf_arg_ptr_to_list_head(env, reg, regno, meta);
12342 			if (ret < 0)
12343 				return ret;
12344 			break;
12345 		case KF_ARG_PTR_TO_RB_ROOT:
12346 			if (reg->type != PTR_TO_MAP_VALUE &&
12347 			    reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
12348 				verbose(env, "arg#%d expected pointer to map value or allocated object\n", i);
12349 				return -EINVAL;
12350 			}
12351 			if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) && !reg->ref_obj_id) {
12352 				verbose(env, "allocated object must be referenced\n");
12353 				return -EINVAL;
12354 			}
12355 			ret = process_kf_arg_ptr_to_rbtree_root(env, reg, regno, meta);
12356 			if (ret < 0)
12357 				return ret;
12358 			break;
12359 		case KF_ARG_PTR_TO_LIST_NODE:
12360 			if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
12361 				verbose(env, "arg#%d expected pointer to allocated object\n", i);
12362 				return -EINVAL;
12363 			}
12364 			if (!reg->ref_obj_id) {
12365 				verbose(env, "allocated object must be referenced\n");
12366 				return -EINVAL;
12367 			}
12368 			ret = process_kf_arg_ptr_to_list_node(env, reg, regno, meta);
12369 			if (ret < 0)
12370 				return ret;
12371 			break;
12372 		case KF_ARG_PTR_TO_RB_NODE:
12373 			if (is_bpf_rbtree_add_kfunc(meta->func_id)) {
12374 				if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
12375 					verbose(env, "arg#%d expected pointer to allocated object\n", i);
12376 					return -EINVAL;
12377 				}
12378 				if (!reg->ref_obj_id) {
12379 					verbose(env, "allocated object must be referenced\n");
12380 					return -EINVAL;
12381 				}
12382 			} else {
12383 				if (!type_is_non_owning_ref(reg->type) && !reg->ref_obj_id) {
12384 					verbose(env, "%s can only take non-owning or refcounted bpf_rb_node pointer\n", func_name);
12385 					return -EINVAL;
12386 				}
12387 				if (in_rbtree_lock_required_cb(env)) {
12388 					verbose(env, "%s not allowed in rbtree cb\n", func_name);
12389 					return -EINVAL;
12390 				}
12391 			}
12392 
12393 			ret = process_kf_arg_ptr_to_rbtree_node(env, reg, regno, meta);
12394 			if (ret < 0)
12395 				return ret;
12396 			break;
12397 		case KF_ARG_PTR_TO_MAP:
12398 			/* If argument has '__map' suffix expect 'struct bpf_map *' */
12399 			ref_id = *reg2btf_ids[CONST_PTR_TO_MAP];
12400 			ref_t = btf_type_by_id(btf_vmlinux, ref_id);
12401 			ref_tname = btf_name_by_offset(btf, ref_t->name_off);
12402 			fallthrough;
12403 		case KF_ARG_PTR_TO_BTF_ID:
12404 			/* Only base_type is checked, further checks are done here */
12405 			if ((base_type(reg->type) != PTR_TO_BTF_ID ||
12406 			     (bpf_type_has_unsafe_modifiers(reg->type) && !is_rcu_reg(reg))) &&
12407 			    !reg2btf_ids[base_type(reg->type)]) {
12408 				verbose(env, "arg#%d is %s ", i, reg_type_str(env, reg->type));
12409 				verbose(env, "expected %s or socket\n",
12410 					reg_type_str(env, base_type(reg->type) |
12411 							  (type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS)));
12412 				return -EINVAL;
12413 			}
12414 			ret = process_kf_arg_ptr_to_btf_id(env, reg, ref_t, ref_tname, ref_id, meta, i);
12415 			if (ret < 0)
12416 				return ret;
12417 			break;
12418 		case KF_ARG_PTR_TO_MEM:
12419 			resolve_ret = btf_resolve_size(btf, ref_t, &type_size);
12420 			if (IS_ERR(resolve_ret)) {
12421 				verbose(env, "arg#%d reference type('%s %s') size cannot be determined: %ld\n",
12422 					i, btf_type_str(ref_t), ref_tname, PTR_ERR(resolve_ret));
12423 				return -EINVAL;
12424 			}
12425 			ret = check_mem_reg(env, reg, regno, type_size);
12426 			if (ret < 0)
12427 				return ret;
12428 			break;
12429 		case KF_ARG_PTR_TO_MEM_SIZE:
12430 		{
12431 			struct bpf_reg_state *buff_reg = &regs[regno];
12432 			const struct btf_param *buff_arg = &args[i];
12433 			struct bpf_reg_state *size_reg = &regs[regno + 1];
12434 			const struct btf_param *size_arg = &args[i + 1];
12435 
12436 			if (!bpf_register_is_null(buff_reg) || !is_kfunc_arg_nullable(meta->btf, buff_arg)) {
12437 				ret = check_kfunc_mem_size_reg(env, size_reg, regno + 1);
12438 				if (ret < 0) {
12439 					verbose(env, "arg#%d arg#%d memory, len pair leads to invalid memory access\n", i, i + 1);
12440 					return ret;
12441 				}
12442 			}
12443 
12444 			if (is_kfunc_arg_const_mem_size(meta->btf, size_arg, size_reg)) {
12445 				if (meta->arg_constant.found) {
12446 					verifier_bug(env, "only one constant argument permitted");
12447 					return -EFAULT;
12448 				}
12449 				if (!tnum_is_const(size_reg->var_off)) {
12450 					verbose(env, "R%d must be a known constant\n", regno + 1);
12451 					return -EINVAL;
12452 				}
12453 				meta->arg_constant.found = true;
12454 				meta->arg_constant.value = size_reg->var_off.value;
12455 			}
12456 
12457 			/* Skip next '__sz' or '__szk' argument */
12458 			i++;
12459 			break;
12460 		}
12461 		case KF_ARG_PTR_TO_CALLBACK:
12462 			if (reg->type != PTR_TO_FUNC) {
12463 				verbose(env, "arg%d expected pointer to func\n", i);
12464 				return -EINVAL;
12465 			}
12466 			meta->subprogno = reg->subprogno;
12467 			break;
12468 		case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
12469 			if (!type_is_ptr_alloc_obj(reg->type)) {
12470 				verbose(env, "arg#%d is neither owning or non-owning ref\n", i);
12471 				return -EINVAL;
12472 			}
12473 			if (!type_is_non_owning_ref(reg->type))
12474 				meta->arg_owning_ref = true;
12475 
12476 			rec = reg_btf_record(reg);
12477 			if (!rec) {
12478 				verifier_bug(env, "Couldn't find btf_record");
12479 				return -EFAULT;
12480 			}
12481 
12482 			if (rec->refcount_off < 0) {
12483 				verbose(env, "arg#%d doesn't point to a type with bpf_refcount field\n", i);
12484 				return -EINVAL;
12485 			}
12486 
12487 			meta->arg_btf = reg->btf;
12488 			meta->arg_btf_id = reg->btf_id;
12489 			break;
12490 		case KF_ARG_PTR_TO_CONST_STR:
12491 			if (reg->type != PTR_TO_MAP_VALUE) {
12492 				verbose(env, "arg#%d doesn't point to a const string\n", i);
12493 				return -EINVAL;
12494 			}
12495 			ret = check_reg_const_str(env, reg, regno);
12496 			if (ret)
12497 				return ret;
12498 			break;
12499 		case KF_ARG_PTR_TO_WORKQUEUE:
12500 			if (reg->type != PTR_TO_MAP_VALUE) {
12501 				verbose(env, "arg#%d doesn't point to a map value\n", i);
12502 				return -EINVAL;
12503 			}
12504 			ret = check_map_field_pointer(env, regno, BPF_WORKQUEUE, &meta->map);
12505 			if (ret < 0)
12506 				return ret;
12507 			break;
12508 		case KF_ARG_PTR_TO_TIMER:
12509 			if (reg->type != PTR_TO_MAP_VALUE) {
12510 				verbose(env, "arg#%d doesn't point to a map value\n", i);
12511 				return -EINVAL;
12512 			}
12513 			ret = process_timer_kfunc(env, regno, meta);
12514 			if (ret < 0)
12515 				return ret;
12516 			break;
12517 		case KF_ARG_PTR_TO_TASK_WORK:
12518 			if (reg->type != PTR_TO_MAP_VALUE) {
12519 				verbose(env, "arg#%d doesn't point to a map value\n", i);
12520 				return -EINVAL;
12521 			}
12522 			ret = check_map_field_pointer(env, regno, BPF_TASK_WORK, &meta->map);
12523 			if (ret < 0)
12524 				return ret;
12525 			break;
12526 		case KF_ARG_PTR_TO_IRQ_FLAG:
12527 			if (reg->type != PTR_TO_STACK) {
12528 				verbose(env, "arg#%d doesn't point to an irq flag on stack\n", i);
12529 				return -EINVAL;
12530 			}
12531 			ret = process_irq_flag(env, regno, meta);
12532 			if (ret < 0)
12533 				return ret;
12534 			break;
12535 		case KF_ARG_PTR_TO_RES_SPIN_LOCK:
12536 		{
12537 			int flags = PROCESS_RES_LOCK;
12538 
12539 			if (reg->type != PTR_TO_MAP_VALUE && reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
12540 				verbose(env, "arg#%d doesn't point to map value or allocated object\n", i);
12541 				return -EINVAL;
12542 			}
12543 
12544 			if (!is_bpf_res_spin_lock_kfunc(meta->func_id))
12545 				return -EFAULT;
12546 			if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock] ||
12547 			    meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave])
12548 				flags |= PROCESS_SPIN_LOCK;
12549 			if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave] ||
12550 			    meta->func_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore])
12551 				flags |= PROCESS_LOCK_IRQ;
12552 			ret = process_spin_lock(env, regno, flags);
12553 			if (ret < 0)
12554 				return ret;
12555 			break;
12556 		}
12557 		}
12558 	}
12559 
12560 	if (is_kfunc_release(meta) && !meta->release_regno) {
12561 		verbose(env, "release kernel function %s expects refcounted PTR_TO_BTF_ID\n",
12562 			func_name);
12563 		return -EINVAL;
12564 	}
12565 
12566 	return 0;
12567 }
12568 
bpf_fetch_kfunc_arg_meta(struct bpf_verifier_env * env,s32 func_id,s16 offset,struct bpf_kfunc_call_arg_meta * meta)12569 int bpf_fetch_kfunc_arg_meta(struct bpf_verifier_env *env,
12570 			     s32 func_id,
12571 			     s16 offset,
12572 			     struct bpf_kfunc_call_arg_meta *meta)
12573 {
12574 	struct bpf_kfunc_meta kfunc;
12575 	int err;
12576 
12577 	err = fetch_kfunc_meta(env, func_id, offset, &kfunc);
12578 	if (err)
12579 		return err;
12580 
12581 	memset(meta, 0, sizeof(*meta));
12582 	meta->btf = kfunc.btf;
12583 	meta->func_id = kfunc.id;
12584 	meta->func_proto = kfunc.proto;
12585 	meta->func_name = kfunc.name;
12586 
12587 	if (!kfunc.flags || !btf_kfunc_is_allowed(kfunc.btf, kfunc.id, env->prog))
12588 		return -EACCES;
12589 
12590 	meta->kfunc_flags = *kfunc.flags;
12591 
12592 	return 0;
12593 }
12594 
12595 /*
12596  * Determine how many bytes a helper accesses through a stack pointer at
12597  * argument position @arg (0-based, corresponding to R1-R5).
12598  *
12599  * Returns:
12600  *   > 0   known read access size in bytes
12601  *     0   doesn't read anything directly
12602  * S64_MIN unknown
12603  *   < 0   known write access of (-return) bytes
12604  */
bpf_helper_stack_access_bytes(struct bpf_verifier_env * env,struct bpf_insn * insn,int arg,int insn_idx)12605 s64 bpf_helper_stack_access_bytes(struct bpf_verifier_env *env, struct bpf_insn *insn,
12606 				  int arg, int insn_idx)
12607 {
12608 	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
12609 	const struct bpf_func_proto *fn;
12610 	enum bpf_arg_type at;
12611 	s64 size;
12612 
12613 	if (bpf_get_helper_proto(env, insn->imm, &fn) < 0)
12614 		return S64_MIN;
12615 
12616 	at = fn->arg_type[arg];
12617 
12618 	switch (base_type(at)) {
12619 	case ARG_PTR_TO_MAP_KEY:
12620 	case ARG_PTR_TO_MAP_VALUE: {
12621 		bool is_key = base_type(at) == ARG_PTR_TO_MAP_KEY;
12622 		u64 val;
12623 		int i, map_reg;
12624 
12625 		for (i = 0; i < arg; i++) {
12626 			if (base_type(fn->arg_type[i]) == ARG_CONST_MAP_PTR)
12627 				break;
12628 		}
12629 		if (i >= arg)
12630 			goto scan_all_maps;
12631 
12632 		map_reg = BPF_REG_1 + i;
12633 
12634 		if (!(aux->const_reg_map_mask & BIT(map_reg)))
12635 			goto scan_all_maps;
12636 
12637 		i = aux->const_reg_vals[map_reg];
12638 		if (i < env->used_map_cnt) {
12639 			size = is_key ? env->used_maps[i]->key_size
12640 				      : env->used_maps[i]->value_size;
12641 			goto out;
12642 		}
12643 scan_all_maps:
12644 		/*
12645 		 * Map pointer is not known at this call site (e.g. different
12646 		 * maps on merged paths).  Conservatively return the largest
12647 		 * key_size or value_size across all maps used by the program.
12648 		 */
12649 		val = 0;
12650 		for (i = 0; i < env->used_map_cnt; i++) {
12651 			struct bpf_map *map = env->used_maps[i];
12652 			u32 sz = is_key ? map->key_size : map->value_size;
12653 
12654 			if (sz > val)
12655 				val = sz;
12656 			if (map->inner_map_meta) {
12657 				sz = is_key ? map->inner_map_meta->key_size
12658 					    : map->inner_map_meta->value_size;
12659 				if (sz > val)
12660 					val = sz;
12661 			}
12662 		}
12663 		if (!val)
12664 			return S64_MIN;
12665 		size = val;
12666 		goto out;
12667 	}
12668 	case ARG_PTR_TO_MEM:
12669 		if (at & MEM_FIXED_SIZE) {
12670 			size = fn->arg_size[arg];
12671 			goto out;
12672 		}
12673 		if (arg + 1 < ARRAY_SIZE(fn->arg_type) &&
12674 		    arg_type_is_mem_size(fn->arg_type[arg + 1])) {
12675 			int size_reg = BPF_REG_1 + arg + 1;
12676 
12677 			if (aux->const_reg_mask & BIT(size_reg)) {
12678 				size = (s64)aux->const_reg_vals[size_reg];
12679 				goto out;
12680 			}
12681 			/*
12682 			 * Size arg is const on each path but differs across merged
12683 			 * paths. MAX_BPF_STACK is a safe upper bound for reads.
12684 			 */
12685 			if (at & MEM_UNINIT)
12686 				return 0;
12687 			return MAX_BPF_STACK;
12688 		}
12689 		return S64_MIN;
12690 	case ARG_PTR_TO_DYNPTR:
12691 		size = BPF_DYNPTR_SIZE;
12692 		break;
12693 	case ARG_PTR_TO_STACK:
12694 		/*
12695 		 * Only used by bpf_calls_callback() helpers. The helper itself
12696 		 * doesn't access stack. The callback subprog does and it's
12697 		 * analyzed separately.
12698 		 */
12699 		return 0;
12700 	default:
12701 		return S64_MIN;
12702 	}
12703 out:
12704 	/*
12705 	 * MEM_UNINIT args are write-only: the helper initializes the
12706 	 * buffer without reading it.
12707 	 */
12708 	if (at & MEM_UNINIT)
12709 		return -size;
12710 	return size;
12711 }
12712 
12713 /*
12714  * Determine how many bytes a kfunc accesses through a stack pointer at
12715  * argument position @arg (0-based, corresponding to R1-R5).
12716  *
12717  * Returns:
12718  *   > 0      known read access size in bytes
12719  *     0      doesn't access memory through that argument (ex: not a pointer)
12720  *   S64_MIN  unknown
12721  *   < 0      known write access of (-return) bytes
12722  */
bpf_kfunc_stack_access_bytes(struct bpf_verifier_env * env,struct bpf_insn * insn,int arg,int insn_idx)12723 s64 bpf_kfunc_stack_access_bytes(struct bpf_verifier_env *env, struct bpf_insn *insn,
12724 				 int arg, int insn_idx)
12725 {
12726 	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
12727 	struct bpf_kfunc_call_arg_meta meta;
12728 	const struct btf_param *args;
12729 	const struct btf_type *t, *ref_t;
12730 	const struct btf *btf;
12731 	u32 nargs, type_size;
12732 	s64 size;
12733 
12734 	if (bpf_fetch_kfunc_arg_meta(env, insn->imm, insn->off, &meta) < 0)
12735 		return S64_MIN;
12736 
12737 	btf = meta.btf;
12738 	args = btf_params(meta.func_proto);
12739 	nargs = btf_type_vlen(meta.func_proto);
12740 	if (arg >= nargs)
12741 		return 0;
12742 
12743 	t = btf_type_skip_modifiers(btf, args[arg].type, NULL);
12744 	if (!btf_type_is_ptr(t))
12745 		return 0;
12746 
12747 	/* dynptr: fixed 16-byte on-stack representation */
12748 	if (is_kfunc_arg_dynptr(btf, &args[arg])) {
12749 		size = BPF_DYNPTR_SIZE;
12750 		goto out;
12751 	}
12752 
12753 	/* ptr + __sz/__szk pair: size is in the next register */
12754 	if (arg + 1 < nargs &&
12755 	    (btf_param_match_suffix(btf, &args[arg + 1], "__sz") ||
12756 	     btf_param_match_suffix(btf, &args[arg + 1], "__szk"))) {
12757 		int size_reg = BPF_REG_1 + arg + 1;
12758 
12759 		if (aux->const_reg_mask & BIT(size_reg)) {
12760 			size = (s64)aux->const_reg_vals[size_reg];
12761 			goto out;
12762 		}
12763 		return MAX_BPF_STACK;
12764 	}
12765 
12766 	/* fixed-size pointed-to type: resolve via BTF */
12767 	ref_t = btf_type_skip_modifiers(btf, t->type, NULL);
12768 	if (!IS_ERR(btf_resolve_size(btf, ref_t, &type_size))) {
12769 		size = type_size;
12770 		goto out;
12771 	}
12772 
12773 	return S64_MIN;
12774 out:
12775 	/* KF_ITER_NEW kfuncs initialize the iterator state at arg 0 */
12776 	if (arg == 0 && meta.kfunc_flags & KF_ITER_NEW)
12777 		return -size;
12778 	if (is_kfunc_arg_uninit(btf, &args[arg]))
12779 		return -size;
12780 	return size;
12781 }
12782 
12783 /* check special kfuncs and return:
12784  *  1  - not fall-through to 'else' branch, continue verification
12785  *  0  - fall-through to 'else' branch
12786  * < 0 - not fall-through to 'else' branch, return error
12787  */
check_special_kfunc(struct bpf_verifier_env * env,struct bpf_kfunc_call_arg_meta * meta,struct bpf_reg_state * regs,struct bpf_insn_aux_data * insn_aux,const struct btf_type * ptr_type,struct btf * desc_btf)12788 static int check_special_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta,
12789 			       struct bpf_reg_state *regs, struct bpf_insn_aux_data *insn_aux,
12790 			       const struct btf_type *ptr_type, struct btf *desc_btf)
12791 {
12792 	const struct btf_type *ret_t;
12793 	int err = 0;
12794 
12795 	if (meta->btf != btf_vmlinux)
12796 		return 0;
12797 
12798 	if (is_bpf_obj_new_kfunc(meta->func_id) || is_bpf_percpu_obj_new_kfunc(meta->func_id)) {
12799 		struct btf_struct_meta *struct_meta;
12800 		struct btf *ret_btf;
12801 		u32 ret_btf_id;
12802 
12803 		if (is_bpf_obj_new_kfunc(meta->func_id) && !bpf_global_ma_set)
12804 			return -ENOMEM;
12805 
12806 		if (((u64)(u32)meta->arg_constant.value) != meta->arg_constant.value) {
12807 			verbose(env, "local type ID argument must be in range [0, U32_MAX]\n");
12808 			return -EINVAL;
12809 		}
12810 
12811 		ret_btf = env->prog->aux->btf;
12812 		ret_btf_id = meta->arg_constant.value;
12813 
12814 		/* This may be NULL due to user not supplying a BTF */
12815 		if (!ret_btf) {
12816 			verbose(env, "bpf_obj_new/bpf_percpu_obj_new requires prog BTF\n");
12817 			return -EINVAL;
12818 		}
12819 
12820 		ret_t = btf_type_by_id(ret_btf, ret_btf_id);
12821 		if (!ret_t || !__btf_type_is_struct(ret_t)) {
12822 			verbose(env, "bpf_obj_new/bpf_percpu_obj_new type ID argument must be of a struct\n");
12823 			return -EINVAL;
12824 		}
12825 
12826 		if (is_bpf_percpu_obj_new_kfunc(meta->func_id)) {
12827 			if (ret_t->size > BPF_GLOBAL_PERCPU_MA_MAX_SIZE) {
12828 				verbose(env, "bpf_percpu_obj_new type size (%d) is greater than %d\n",
12829 					ret_t->size, BPF_GLOBAL_PERCPU_MA_MAX_SIZE);
12830 				return -EINVAL;
12831 			}
12832 
12833 			if (!bpf_global_percpu_ma_set) {
12834 				mutex_lock(&bpf_percpu_ma_lock);
12835 				if (!bpf_global_percpu_ma_set) {
12836 					/* Charge memory allocated with bpf_global_percpu_ma to
12837 					 * root memcg. The obj_cgroup for root memcg is NULL.
12838 					 */
12839 					err = bpf_mem_alloc_percpu_init(&bpf_global_percpu_ma, NULL);
12840 					if (!err)
12841 						bpf_global_percpu_ma_set = true;
12842 				}
12843 				mutex_unlock(&bpf_percpu_ma_lock);
12844 				if (err)
12845 					return err;
12846 			}
12847 
12848 			mutex_lock(&bpf_percpu_ma_lock);
12849 			err = bpf_mem_alloc_percpu_unit_init(&bpf_global_percpu_ma, ret_t->size);
12850 			mutex_unlock(&bpf_percpu_ma_lock);
12851 			if (err)
12852 				return err;
12853 		}
12854 
12855 		struct_meta = btf_find_struct_meta(ret_btf, ret_btf_id);
12856 		if (is_bpf_percpu_obj_new_kfunc(meta->func_id)) {
12857 			if (!__btf_type_is_scalar_struct(env, ret_btf, ret_t, 0)) {
12858 				verbose(env, "bpf_percpu_obj_new type ID argument must be of a struct of scalars\n");
12859 				return -EINVAL;
12860 			}
12861 
12862 			if (struct_meta) {
12863 				verbose(env, "bpf_percpu_obj_new type ID argument must not contain special fields\n");
12864 				return -EINVAL;
12865 			}
12866 		}
12867 
12868 		mark_reg_known_zero(env, regs, BPF_REG_0);
12869 		regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
12870 		regs[BPF_REG_0].btf = ret_btf;
12871 		regs[BPF_REG_0].btf_id = ret_btf_id;
12872 		if (is_bpf_percpu_obj_new_kfunc(meta->func_id))
12873 			regs[BPF_REG_0].type |= MEM_PERCPU;
12874 
12875 		insn_aux->obj_new_size = ret_t->size;
12876 		insn_aux->kptr_struct_meta = struct_meta;
12877 	} else if (is_bpf_refcount_acquire_kfunc(meta->func_id)) {
12878 		mark_reg_known_zero(env, regs, BPF_REG_0);
12879 		regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
12880 		regs[BPF_REG_0].btf = meta->arg_btf;
12881 		regs[BPF_REG_0].btf_id = meta->arg_btf_id;
12882 
12883 		insn_aux->kptr_struct_meta =
12884 			btf_find_struct_meta(meta->arg_btf,
12885 					     meta->arg_btf_id);
12886 	} else if (is_list_node_type(ptr_type)) {
12887 		struct btf_field *field = meta->arg_list_head.field;
12888 
12889 		mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
12890 	} else if (is_rbtree_node_type(ptr_type)) {
12891 		struct btf_field *field = meta->arg_rbtree_root.field;
12892 
12893 		mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
12894 	} else if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
12895 		mark_reg_known_zero(env, regs, BPF_REG_0);
12896 		regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_TRUSTED;
12897 		regs[BPF_REG_0].btf = desc_btf;
12898 		regs[BPF_REG_0].btf_id = meta->ret_btf_id;
12899 	} else if (meta->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
12900 		ret_t = btf_type_by_id(desc_btf, meta->arg_constant.value);
12901 		if (!ret_t) {
12902 			verbose(env, "Unknown type ID %lld passed to kfunc bpf_rdonly_cast\n",
12903 				meta->arg_constant.value);
12904 			return -EINVAL;
12905 		} else if (btf_type_is_struct(ret_t)) {
12906 			mark_reg_known_zero(env, regs, BPF_REG_0);
12907 			regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
12908 			regs[BPF_REG_0].btf = desc_btf;
12909 			regs[BPF_REG_0].btf_id = meta->arg_constant.value;
12910 		} else if (btf_type_is_void(ret_t)) {
12911 			mark_reg_known_zero(env, regs, BPF_REG_0);
12912 			regs[BPF_REG_0].type = PTR_TO_MEM | MEM_RDONLY | PTR_UNTRUSTED;
12913 			regs[BPF_REG_0].mem_size = 0;
12914 		} else {
12915 			verbose(env,
12916 				"kfunc bpf_rdonly_cast type ID argument must be of a struct or void\n");
12917 			return -EINVAL;
12918 		}
12919 	} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_slice] ||
12920 		   meta->func_id == special_kfunc_list[KF_bpf_dynptr_slice_rdwr]) {
12921 		enum bpf_type_flag type_flag = get_dynptr_type_flag(meta->initialized_dynptr.type);
12922 
12923 		mark_reg_known_zero(env, regs, BPF_REG_0);
12924 
12925 		if (!meta->arg_constant.found) {
12926 			verifier_bug(env, "bpf_dynptr_slice(_rdwr) no constant size");
12927 			return -EFAULT;
12928 		}
12929 
12930 		regs[BPF_REG_0].mem_size = meta->arg_constant.value;
12931 
12932 		/* PTR_MAYBE_NULL will be added when is_kfunc_ret_null is checked */
12933 		regs[BPF_REG_0].type = PTR_TO_MEM | type_flag;
12934 
12935 		if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_slice]) {
12936 			regs[BPF_REG_0].type |= MEM_RDONLY;
12937 		} else {
12938 			/* this will set env->seen_direct_write to true */
12939 			if (!may_access_direct_pkt_data(env, NULL, BPF_WRITE)) {
12940 				verbose(env, "the prog does not allow writes to packet data\n");
12941 				return -EINVAL;
12942 			}
12943 		}
12944 
12945 		if (!meta->initialized_dynptr.id) {
12946 			verifier_bug(env, "no dynptr id");
12947 			return -EFAULT;
12948 		}
12949 		regs[BPF_REG_0].dynptr_id = meta->initialized_dynptr.id;
12950 
12951 		/* we don't need to set BPF_REG_0's ref obj id
12952 		 * because packet slices are not refcounted (see
12953 		 * dynptr_type_refcounted)
12954 		 */
12955 	} else {
12956 		return 0;
12957 	}
12958 
12959 	return 1;
12960 }
12961 
12962 static int check_return_code(struct bpf_verifier_env *env, int regno, const char *reg_name);
12963 static int process_bpf_exit_full(struct bpf_verifier_env *env,
12964 				 bool *do_print_state, bool exception_exit);
12965 
check_kfunc_call(struct bpf_verifier_env * env,struct bpf_insn * insn,int * insn_idx_p)12966 static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
12967 			    int *insn_idx_p)
12968 {
12969 	bool sleepable, rcu_lock, rcu_unlock, preempt_disable, preempt_enable;
12970 	u32 i, nargs, ptr_type_id, release_ref_obj_id;
12971 	struct bpf_reg_state *regs = cur_regs(env);
12972 	const char *func_name, *ptr_type_name;
12973 	const struct btf_type *t, *ptr_type;
12974 	struct bpf_kfunc_call_arg_meta meta;
12975 	struct bpf_insn_aux_data *insn_aux;
12976 	int err, insn_idx = *insn_idx_p;
12977 	const struct btf_param *args;
12978 	struct btf *desc_btf;
12979 
12980 	/* skip for now, but return error when we find this in fixup_kfunc_call */
12981 	if (!insn->imm)
12982 		return 0;
12983 
12984 	err = bpf_fetch_kfunc_arg_meta(env, insn->imm, insn->off, &meta);
12985 	if (err == -EACCES && meta.func_name)
12986 		verbose(env, "calling kernel function %s is not allowed\n", meta.func_name);
12987 	if (err)
12988 		return err;
12989 	desc_btf = meta.btf;
12990 	func_name = meta.func_name;
12991 	insn_aux = &env->insn_aux_data[insn_idx];
12992 
12993 	insn_aux->is_iter_next = bpf_is_iter_next_kfunc(&meta);
12994 
12995 	if (!insn->off &&
12996 	    (insn->imm == special_kfunc_list[KF_bpf_res_spin_lock] ||
12997 	     insn->imm == special_kfunc_list[KF_bpf_res_spin_lock_irqsave])) {
12998 		struct bpf_verifier_state *branch;
12999 		struct bpf_reg_state *regs;
13000 
13001 		branch = push_stack(env, env->insn_idx + 1, env->insn_idx, false);
13002 		if (IS_ERR(branch)) {
13003 			verbose(env, "failed to push state for failed lock acquisition\n");
13004 			return PTR_ERR(branch);
13005 		}
13006 
13007 		regs = branch->frame[branch->curframe]->regs;
13008 
13009 		/* Clear r0-r5 registers in forked state */
13010 		for (i = 0; i < CALLER_SAVED_REGS; i++)
13011 			bpf_mark_reg_not_init(env, &regs[caller_saved[i]]);
13012 
13013 		mark_reg_unknown(env, regs, BPF_REG_0);
13014 		err = __mark_reg_s32_range(env, regs, BPF_REG_0, -MAX_ERRNO, -1);
13015 		if (err) {
13016 			verbose(env, "failed to mark s32 range for retval in forked state for lock\n");
13017 			return err;
13018 		}
13019 		__mark_btf_func_reg_size(env, regs, BPF_REG_0, sizeof(u32));
13020 	} else if (!insn->off && insn->imm == special_kfunc_list[KF___bpf_trap]) {
13021 		verbose(env, "unexpected __bpf_trap() due to uninitialized variable?\n");
13022 		return -EFAULT;
13023 	}
13024 
13025 	if (is_kfunc_destructive(&meta) && !capable(CAP_SYS_BOOT)) {
13026 		verbose(env, "destructive kfunc calls require CAP_SYS_BOOT capability\n");
13027 		return -EACCES;
13028 	}
13029 
13030 	sleepable = bpf_is_kfunc_sleepable(&meta);
13031 	if (sleepable && !in_sleepable(env)) {
13032 		verbose(env, "program must be sleepable to call sleepable kfunc %s\n", func_name);
13033 		return -EACCES;
13034 	}
13035 
13036 	/* Track non-sleepable context for kfuncs, same as for helpers. */
13037 	if (!in_sleepable_context(env))
13038 		insn_aux->non_sleepable = true;
13039 
13040 	/* Check the arguments */
13041 	err = check_kfunc_args(env, &meta, insn_idx);
13042 	if (err < 0)
13043 		return err;
13044 
13045 	if (is_bpf_rbtree_add_kfunc(meta.func_id)) {
13046 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
13047 					 set_rbtree_add_callback_state);
13048 		if (err) {
13049 			verbose(env, "kfunc %s#%d failed callback verification\n",
13050 				func_name, meta.func_id);
13051 			return err;
13052 		}
13053 	}
13054 
13055 	if (meta.func_id == special_kfunc_list[KF_bpf_session_cookie]) {
13056 		meta.r0_size = sizeof(u64);
13057 		meta.r0_rdonly = false;
13058 	}
13059 
13060 	if (is_bpf_wq_set_callback_kfunc(meta.func_id)) {
13061 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
13062 					 set_timer_callback_state);
13063 		if (err) {
13064 			verbose(env, "kfunc %s#%d failed callback verification\n",
13065 				func_name, meta.func_id);
13066 			return err;
13067 		}
13068 	}
13069 
13070 	if (is_task_work_add_kfunc(meta.func_id)) {
13071 		err = push_callback_call(env, insn, insn_idx, meta.subprogno,
13072 					 set_task_work_schedule_callback_state);
13073 		if (err) {
13074 			verbose(env, "kfunc %s#%d failed callback verification\n",
13075 				func_name, meta.func_id);
13076 			return err;
13077 		}
13078 	}
13079 
13080 	rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta);
13081 	rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta);
13082 
13083 	preempt_disable = is_kfunc_bpf_preempt_disable(&meta);
13084 	preempt_enable = is_kfunc_bpf_preempt_enable(&meta);
13085 
13086 	if (rcu_lock) {
13087 		env->cur_state->active_rcu_locks++;
13088 	} else if (rcu_unlock) {
13089 		struct bpf_func_state *state;
13090 		struct bpf_reg_state *reg;
13091 		u32 clear_mask = (1 << STACK_SPILL) | (1 << STACK_ITER);
13092 
13093 		if (env->cur_state->active_rcu_locks == 0) {
13094 			verbose(env, "unmatched rcu read unlock (kernel function %s)\n", func_name);
13095 			return -EINVAL;
13096 		}
13097 		if (--env->cur_state->active_rcu_locks == 0) {
13098 			bpf_for_each_reg_in_vstate_mask(env->cur_state, state, reg, clear_mask, ({
13099 				if (reg->type & MEM_RCU) {
13100 					reg->type &= ~(MEM_RCU | PTR_MAYBE_NULL);
13101 					reg->type |= PTR_UNTRUSTED;
13102 				}
13103 			}));
13104 		}
13105 	} else if (preempt_disable) {
13106 		env->cur_state->active_preempt_locks++;
13107 	} else if (preempt_enable) {
13108 		if (env->cur_state->active_preempt_locks == 0) {
13109 			verbose(env, "unmatched attempt to enable preemption (kernel function %s)\n", func_name);
13110 			return -EINVAL;
13111 		}
13112 		env->cur_state->active_preempt_locks--;
13113 	}
13114 
13115 	if (sleepable && !in_sleepable_context(env)) {
13116 		verbose(env, "kernel func %s is sleepable within %s\n",
13117 			func_name, non_sleepable_context_description(env));
13118 		return -EACCES;
13119 	}
13120 
13121 	if (in_rbtree_lock_required_cb(env) && (rcu_lock || rcu_unlock)) {
13122 		verbose(env, "Calling bpf_rcu_read_{lock,unlock} in unnecessary rbtree callback\n");
13123 		return -EACCES;
13124 	}
13125 
13126 	if (is_kfunc_rcu_protected(&meta) && !in_rcu_cs(env)) {
13127 		verbose(env, "kernel func %s requires RCU critical section protection\n", func_name);
13128 		return -EACCES;
13129 	}
13130 
13131 	/* In case of release function, we get register number of refcounted
13132 	 * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now.
13133 	 */
13134 	if (meta.release_regno) {
13135 		struct bpf_reg_state *reg = &regs[meta.release_regno];
13136 
13137 		if (meta.initialized_dynptr.ref_obj_id) {
13138 			err = unmark_stack_slots_dynptr(env, reg);
13139 		} else {
13140 			err = release_reference(env, reg->ref_obj_id);
13141 			if (err)
13142 				verbose(env, "kfunc %s#%d reference has not been acquired before\n",
13143 					func_name, meta.func_id);
13144 		}
13145 		if (err)
13146 			return err;
13147 	}
13148 
13149 	if (is_bpf_list_push_kfunc(meta.func_id) || is_bpf_rbtree_add_kfunc(meta.func_id)) {
13150 		release_ref_obj_id = regs[BPF_REG_2].ref_obj_id;
13151 		insn_aux->insert_off = regs[BPF_REG_2].var_off.value;
13152 		insn_aux->kptr_struct_meta = btf_find_struct_meta(meta.arg_btf, meta.arg_btf_id);
13153 		err = ref_convert_owning_non_owning(env, release_ref_obj_id);
13154 		if (err) {
13155 			verbose(env, "kfunc %s#%d conversion of owning ref to non-owning failed\n",
13156 				func_name, meta.func_id);
13157 			return err;
13158 		}
13159 
13160 		err = release_reference(env, release_ref_obj_id);
13161 		if (err) {
13162 			verbose(env, "kfunc %s#%d reference has not been acquired before\n",
13163 				func_name, meta.func_id);
13164 			return err;
13165 		}
13166 	}
13167 
13168 	if (meta.func_id == special_kfunc_list[KF_bpf_throw]) {
13169 		if (!bpf_jit_supports_exceptions()) {
13170 			verbose(env, "JIT does not support calling kfunc %s#%d\n",
13171 				func_name, meta.func_id);
13172 			return -ENOTSUPP;
13173 		}
13174 		env->seen_exception = true;
13175 
13176 		/* In the case of the default callback, the cookie value passed
13177 		 * to bpf_throw becomes the return value of the program.
13178 		 */
13179 		if (!env->exception_callback_subprog) {
13180 			err = check_return_code(env, BPF_REG_1, "R1");
13181 			if (err < 0)
13182 				return err;
13183 		}
13184 	}
13185 
13186 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
13187 		u32 regno = caller_saved[i];
13188 
13189 		bpf_mark_reg_not_init(env, &regs[regno]);
13190 		regs[regno].subreg_def = DEF_NOT_SUBREG;
13191 	}
13192 
13193 	/* Check return type */
13194 	t = btf_type_skip_modifiers(desc_btf, meta.func_proto->type, NULL);
13195 
13196 	if (is_kfunc_acquire(&meta) && !btf_type_is_struct_ptr(meta.btf, t)) {
13197 		if (meta.btf != btf_vmlinux ||
13198 		    (!is_bpf_obj_new_kfunc(meta.func_id) &&
13199 		     !is_bpf_percpu_obj_new_kfunc(meta.func_id) &&
13200 		     !is_bpf_refcount_acquire_kfunc(meta.func_id))) {
13201 			verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
13202 			return -EINVAL;
13203 		}
13204 	}
13205 
13206 	if (btf_type_is_scalar(t)) {
13207 		mark_reg_unknown(env, regs, BPF_REG_0);
13208 		if (meta.btf == btf_vmlinux && (meta.func_id == special_kfunc_list[KF_bpf_res_spin_lock] ||
13209 		    meta.func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave]))
13210 			__mark_reg_const_zero(env, &regs[BPF_REG_0]);
13211 		mark_btf_func_reg_size(env, BPF_REG_0, t->size);
13212 	} else if (btf_type_is_ptr(t)) {
13213 		ptr_type = btf_type_skip_modifiers(desc_btf, t->type, &ptr_type_id);
13214 		err = check_special_kfunc(env, &meta, regs, insn_aux, ptr_type, desc_btf);
13215 		if (err) {
13216 			if (err < 0)
13217 				return err;
13218 		} else if (btf_type_is_void(ptr_type)) {
13219 			/* kfunc returning 'void *' is equivalent to returning scalar */
13220 			mark_reg_unknown(env, regs, BPF_REG_0);
13221 		} else if (!__btf_type_is_struct(ptr_type)) {
13222 			if (!meta.r0_size) {
13223 				__u32 sz;
13224 
13225 				if (!IS_ERR(btf_resolve_size(desc_btf, ptr_type, &sz))) {
13226 					meta.r0_size = sz;
13227 					meta.r0_rdonly = true;
13228 				}
13229 			}
13230 			if (!meta.r0_size) {
13231 				ptr_type_name = btf_name_by_offset(desc_btf,
13232 								   ptr_type->name_off);
13233 				verbose(env,
13234 					"kernel function %s returns pointer type %s %s is not supported\n",
13235 					func_name,
13236 					btf_type_str(ptr_type),
13237 					ptr_type_name);
13238 				return -EINVAL;
13239 			}
13240 
13241 			mark_reg_known_zero(env, regs, BPF_REG_0);
13242 			regs[BPF_REG_0].type = PTR_TO_MEM;
13243 			regs[BPF_REG_0].mem_size = meta.r0_size;
13244 
13245 			if (meta.r0_rdonly)
13246 				regs[BPF_REG_0].type |= MEM_RDONLY;
13247 
13248 			/* Ensures we don't access the memory after a release_reference() */
13249 			if (meta.ref_obj_id)
13250 				regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
13251 
13252 			if (is_kfunc_rcu_protected(&meta))
13253 				regs[BPF_REG_0].type |= MEM_RCU;
13254 		} else {
13255 			enum bpf_reg_type type = PTR_TO_BTF_ID;
13256 
13257 			if (meta.func_id == special_kfunc_list[KF_bpf_get_kmem_cache])
13258 				type |= PTR_UNTRUSTED;
13259 			else if (is_kfunc_rcu_protected(&meta) ||
13260 				 (bpf_is_iter_next_kfunc(&meta) &&
13261 				  (get_iter_from_state(env->cur_state, &meta)
13262 					   ->type & MEM_RCU))) {
13263 				/*
13264 				 * If the iterator's constructor (the _new
13265 				 * function e.g., bpf_iter_task_new) has been
13266 				 * annotated with BPF kfunc flag
13267 				 * KF_RCU_PROTECTED and was called within a RCU
13268 				 * read-side critical section, also propagate
13269 				 * the MEM_RCU flag to the pointer returned from
13270 				 * the iterator's next function (e.g.,
13271 				 * bpf_iter_task_next).
13272 				 */
13273 				type |= MEM_RCU;
13274 			} else {
13275 				/*
13276 				 * Any PTR_TO_BTF_ID that is returned from a BPF
13277 				 * kfunc should by default be treated as
13278 				 * implicitly trusted.
13279 				 */
13280 				type |= PTR_TRUSTED;
13281 			}
13282 
13283 			mark_reg_known_zero(env, regs, BPF_REG_0);
13284 			regs[BPF_REG_0].btf = desc_btf;
13285 			regs[BPF_REG_0].type = type;
13286 			regs[BPF_REG_0].btf_id = ptr_type_id;
13287 		}
13288 
13289 		if (is_kfunc_ret_null(&meta)) {
13290 			regs[BPF_REG_0].type |= PTR_MAYBE_NULL;
13291 			/* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */
13292 			regs[BPF_REG_0].id = ++env->id_gen;
13293 		}
13294 		mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
13295 		if (is_kfunc_acquire(&meta)) {
13296 			int id = acquire_reference(env, insn_idx);
13297 
13298 			if (id < 0)
13299 				return id;
13300 			if (is_kfunc_ret_null(&meta))
13301 				regs[BPF_REG_0].id = id;
13302 			regs[BPF_REG_0].ref_obj_id = id;
13303 		} else if (is_rbtree_node_type(ptr_type) || is_list_node_type(ptr_type)) {
13304 			ref_set_non_owning(env, &regs[BPF_REG_0]);
13305 		}
13306 
13307 		if (reg_may_point_to_spin_lock(&regs[BPF_REG_0]) && !regs[BPF_REG_0].id)
13308 			regs[BPF_REG_0].id = ++env->id_gen;
13309 	} else if (btf_type_is_void(t)) {
13310 		if (meta.btf == btf_vmlinux) {
13311 			if (is_bpf_obj_drop_kfunc(meta.func_id) ||
13312 			    is_bpf_percpu_obj_drop_kfunc(meta.func_id)) {
13313 				insn_aux->kptr_struct_meta =
13314 					btf_find_struct_meta(meta.arg_btf,
13315 							     meta.arg_btf_id);
13316 			}
13317 		}
13318 	}
13319 
13320 	if (bpf_is_kfunc_pkt_changing(&meta))
13321 		clear_all_pkt_pointers(env);
13322 
13323 	nargs = btf_type_vlen(meta.func_proto);
13324 	args = (const struct btf_param *)(meta.func_proto + 1);
13325 	for (i = 0; i < nargs; i++) {
13326 		u32 regno = i + 1;
13327 
13328 		t = btf_type_skip_modifiers(desc_btf, args[i].type, NULL);
13329 		if (btf_type_is_ptr(t))
13330 			mark_btf_func_reg_size(env, regno, sizeof(void *));
13331 		else
13332 			/* scalar. ensured by check_kfunc_args() */
13333 			mark_btf_func_reg_size(env, regno, t->size);
13334 	}
13335 
13336 	if (bpf_is_iter_next_kfunc(&meta)) {
13337 		err = process_iter_next_call(env, insn_idx, &meta);
13338 		if (err)
13339 			return err;
13340 	}
13341 
13342 	if (meta.func_id == special_kfunc_list[KF_bpf_session_cookie])
13343 		env->prog->call_session_cookie = true;
13344 
13345 	if (is_bpf_throw_kfunc(insn))
13346 		return process_bpf_exit_full(env, NULL, true);
13347 
13348 	return 0;
13349 }
13350 
check_reg_sane_offset_scalar(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,enum bpf_reg_type type)13351 static bool check_reg_sane_offset_scalar(struct bpf_verifier_env *env,
13352 					 const struct bpf_reg_state *reg,
13353 					 enum bpf_reg_type type)
13354 {
13355 	bool known = tnum_is_const(reg->var_off);
13356 	s64 val = reg->var_off.value;
13357 	s64 smin = reg->smin_value;
13358 
13359 	if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
13360 		verbose(env, "math between %s pointer and %lld is not allowed\n",
13361 			reg_type_str(env, type), val);
13362 		return false;
13363 	}
13364 
13365 	if (smin == S64_MIN) {
13366 		verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
13367 			reg_type_str(env, type));
13368 		return false;
13369 	}
13370 
13371 	if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
13372 		verbose(env, "value %lld makes %s pointer be out of bounds\n",
13373 			smin, reg_type_str(env, type));
13374 		return false;
13375 	}
13376 
13377 	return true;
13378 }
13379 
check_reg_sane_offset_ptr(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,enum bpf_reg_type type)13380 static bool check_reg_sane_offset_ptr(struct bpf_verifier_env *env,
13381 				      const struct bpf_reg_state *reg,
13382 				      enum bpf_reg_type type)
13383 {
13384 	bool known = tnum_is_const(reg->var_off);
13385 	s64 val = reg->var_off.value;
13386 	s64 smin = reg->smin_value;
13387 
13388 	if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
13389 		verbose(env, "%s pointer offset %lld is not allowed\n",
13390 			reg_type_str(env, type), val);
13391 		return false;
13392 	}
13393 
13394 	if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
13395 		verbose(env, "%s pointer offset %lld is not allowed\n",
13396 			reg_type_str(env, type), smin);
13397 		return false;
13398 	}
13399 
13400 	return true;
13401 }
13402 
13403 enum {
13404 	REASON_BOUNDS	= -1,
13405 	REASON_TYPE	= -2,
13406 	REASON_PATHS	= -3,
13407 	REASON_LIMIT	= -4,
13408 	REASON_STACK	= -5,
13409 };
13410 
retrieve_ptr_limit(const struct bpf_reg_state * ptr_reg,u32 * alu_limit,bool mask_to_left)13411 static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
13412 			      u32 *alu_limit, bool mask_to_left)
13413 {
13414 	u32 max = 0, ptr_limit = 0;
13415 
13416 	switch (ptr_reg->type) {
13417 	case PTR_TO_STACK:
13418 		/* Offset 0 is out-of-bounds, but acceptable start for the
13419 		 * left direction, see BPF_REG_FP. Also, unknown scalar
13420 		 * offset where we would need to deal with min/max bounds is
13421 		 * currently prohibited for unprivileged.
13422 		 */
13423 		max = MAX_BPF_STACK + mask_to_left;
13424 		ptr_limit = -ptr_reg->var_off.value;
13425 		break;
13426 	case PTR_TO_MAP_VALUE:
13427 		max = ptr_reg->map_ptr->value_size;
13428 		ptr_limit = mask_to_left ? ptr_reg->smin_value : ptr_reg->umax_value;
13429 		break;
13430 	default:
13431 		return REASON_TYPE;
13432 	}
13433 
13434 	if (ptr_limit >= max)
13435 		return REASON_LIMIT;
13436 	*alu_limit = ptr_limit;
13437 	return 0;
13438 }
13439 
can_skip_alu_sanitation(const struct bpf_verifier_env * env,const struct bpf_insn * insn)13440 static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
13441 				    const struct bpf_insn *insn)
13442 {
13443 	return env->bypass_spec_v1 ||
13444 		BPF_SRC(insn->code) == BPF_K ||
13445 		cur_aux(env)->nospec;
13446 }
13447 
update_alu_sanitation_state(struct bpf_insn_aux_data * aux,u32 alu_state,u32 alu_limit)13448 static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
13449 				       u32 alu_state, u32 alu_limit)
13450 {
13451 	/* If we arrived here from different branches with different
13452 	 * state or limits to sanitize, then this won't work.
13453 	 */
13454 	if (aux->alu_state &&
13455 	    (aux->alu_state != alu_state ||
13456 	     aux->alu_limit != alu_limit))
13457 		return REASON_PATHS;
13458 
13459 	/* Corresponding fixup done in do_misc_fixups(). */
13460 	aux->alu_state = alu_state;
13461 	aux->alu_limit = alu_limit;
13462 	return 0;
13463 }
13464 
sanitize_val_alu(struct bpf_verifier_env * env,struct bpf_insn * insn)13465 static int sanitize_val_alu(struct bpf_verifier_env *env,
13466 			    struct bpf_insn *insn)
13467 {
13468 	struct bpf_insn_aux_data *aux = cur_aux(env);
13469 
13470 	if (can_skip_alu_sanitation(env, insn))
13471 		return 0;
13472 
13473 	return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
13474 }
13475 
sanitize_needed(u8 opcode)13476 static bool sanitize_needed(u8 opcode)
13477 {
13478 	return opcode == BPF_ADD || opcode == BPF_SUB;
13479 }
13480 
13481 struct bpf_sanitize_info {
13482 	struct bpf_insn_aux_data aux;
13483 	bool mask_to_left;
13484 };
13485 
sanitize_speculative_path(struct bpf_verifier_env * env,const struct bpf_insn * insn,u32 next_idx,u32 curr_idx)13486 static int sanitize_speculative_path(struct bpf_verifier_env *env,
13487 				     const struct bpf_insn *insn,
13488 				     u32 next_idx, u32 curr_idx)
13489 {
13490 	struct bpf_verifier_state *branch;
13491 	struct bpf_reg_state *regs;
13492 
13493 	branch = push_stack(env, next_idx, curr_idx, true);
13494 	if (!IS_ERR(branch) && insn) {
13495 		regs = branch->frame[branch->curframe]->regs;
13496 		if (BPF_SRC(insn->code) == BPF_K) {
13497 			mark_reg_unknown(env, regs, insn->dst_reg);
13498 		} else if (BPF_SRC(insn->code) == BPF_X) {
13499 			mark_reg_unknown(env, regs, insn->dst_reg);
13500 			mark_reg_unknown(env, regs, insn->src_reg);
13501 		}
13502 	}
13503 	return PTR_ERR_OR_ZERO(branch);
13504 }
13505 
sanitize_ptr_alu(struct bpf_verifier_env * env,struct bpf_insn * insn,const struct bpf_reg_state * ptr_reg,const struct bpf_reg_state * off_reg,struct bpf_reg_state * dst_reg,struct bpf_sanitize_info * info,const bool commit_window)13506 static int sanitize_ptr_alu(struct bpf_verifier_env *env,
13507 			    struct bpf_insn *insn,
13508 			    const struct bpf_reg_state *ptr_reg,
13509 			    const struct bpf_reg_state *off_reg,
13510 			    struct bpf_reg_state *dst_reg,
13511 			    struct bpf_sanitize_info *info,
13512 			    const bool commit_window)
13513 {
13514 	struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : &info->aux;
13515 	struct bpf_verifier_state *vstate = env->cur_state;
13516 	bool off_is_imm = tnum_is_const(off_reg->var_off);
13517 	bool off_is_neg = off_reg->smin_value < 0;
13518 	bool ptr_is_dst_reg = ptr_reg == dst_reg;
13519 	u8 opcode = BPF_OP(insn->code);
13520 	u32 alu_state, alu_limit;
13521 	struct bpf_reg_state tmp;
13522 	int err;
13523 
13524 	if (can_skip_alu_sanitation(env, insn))
13525 		return 0;
13526 
13527 	/* We already marked aux for masking from non-speculative
13528 	 * paths, thus we got here in the first place. We only care
13529 	 * to explore bad access from here.
13530 	 */
13531 	if (vstate->speculative)
13532 		goto do_sim;
13533 
13534 	if (!commit_window) {
13535 		if (!tnum_is_const(off_reg->var_off) &&
13536 		    (off_reg->smin_value < 0) != (off_reg->smax_value < 0))
13537 			return REASON_BOUNDS;
13538 
13539 		info->mask_to_left = (opcode == BPF_ADD &&  off_is_neg) ||
13540 				     (opcode == BPF_SUB && !off_is_neg);
13541 	}
13542 
13543 	err = retrieve_ptr_limit(ptr_reg, &alu_limit, info->mask_to_left);
13544 	if (err < 0)
13545 		return err;
13546 
13547 	if (commit_window) {
13548 		/* In commit phase we narrow the masking window based on
13549 		 * the observed pointer move after the simulated operation.
13550 		 */
13551 		alu_state = info->aux.alu_state;
13552 		alu_limit = abs(info->aux.alu_limit - alu_limit);
13553 	} else {
13554 		alu_state  = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
13555 		alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
13556 		alu_state |= ptr_is_dst_reg ?
13557 			     BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
13558 
13559 		/* Limit pruning on unknown scalars to enable deep search for
13560 		 * potential masking differences from other program paths.
13561 		 */
13562 		if (!off_is_imm)
13563 			env->explore_alu_limits = true;
13564 	}
13565 
13566 	err = update_alu_sanitation_state(aux, alu_state, alu_limit);
13567 	if (err < 0)
13568 		return err;
13569 do_sim:
13570 	/* If we're in commit phase, we're done here given we already
13571 	 * pushed the truncated dst_reg into the speculative verification
13572 	 * stack.
13573 	 *
13574 	 * Also, when register is a known constant, we rewrite register-based
13575 	 * operation to immediate-based, and thus do not need masking (and as
13576 	 * a consequence, do not need to simulate the zero-truncation either).
13577 	 */
13578 	if (commit_window || off_is_imm)
13579 		return 0;
13580 
13581 	/* Simulate and find potential out-of-bounds access under
13582 	 * speculative execution from truncation as a result of
13583 	 * masking when off was not within expected range. If off
13584 	 * sits in dst, then we temporarily need to move ptr there
13585 	 * to simulate dst (== 0) +/-= ptr. Needed, for example,
13586 	 * for cases where we use K-based arithmetic in one direction
13587 	 * and truncated reg-based in the other in order to explore
13588 	 * bad access.
13589 	 */
13590 	if (!ptr_is_dst_reg) {
13591 		tmp = *dst_reg;
13592 		copy_register_state(dst_reg, ptr_reg);
13593 	}
13594 	err = sanitize_speculative_path(env, NULL, env->insn_idx + 1, env->insn_idx);
13595 	if (err < 0)
13596 		return REASON_STACK;
13597 	if (!ptr_is_dst_reg)
13598 		*dst_reg = tmp;
13599 	return 0;
13600 }
13601 
sanitize_mark_insn_seen(struct bpf_verifier_env * env)13602 static void sanitize_mark_insn_seen(struct bpf_verifier_env *env)
13603 {
13604 	struct bpf_verifier_state *vstate = env->cur_state;
13605 
13606 	/* If we simulate paths under speculation, we don't update the
13607 	 * insn as 'seen' such that when we verify unreachable paths in
13608 	 * the non-speculative domain, sanitize_dead_code() can still
13609 	 * rewrite/sanitize them.
13610 	 */
13611 	if (!vstate->speculative)
13612 		env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
13613 }
13614 
sanitize_err(struct bpf_verifier_env * env,const struct bpf_insn * insn,int reason,const struct bpf_reg_state * off_reg,const struct bpf_reg_state * dst_reg)13615 static int sanitize_err(struct bpf_verifier_env *env,
13616 			const struct bpf_insn *insn, int reason,
13617 			const struct bpf_reg_state *off_reg,
13618 			const struct bpf_reg_state *dst_reg)
13619 {
13620 	static const char *err = "pointer arithmetic with it prohibited for !root";
13621 	const char *op = BPF_OP(insn->code) == BPF_ADD ? "add" : "sub";
13622 	u32 dst = insn->dst_reg, src = insn->src_reg;
13623 
13624 	switch (reason) {
13625 	case REASON_BOUNDS:
13626 		verbose(env, "R%d has unknown scalar with mixed signed bounds, %s\n",
13627 			off_reg == dst_reg ? dst : src, err);
13628 		break;
13629 	case REASON_TYPE:
13630 		verbose(env, "R%d has pointer with unsupported alu operation, %s\n",
13631 			off_reg == dst_reg ? src : dst, err);
13632 		break;
13633 	case REASON_PATHS:
13634 		verbose(env, "R%d tried to %s from different maps, paths or scalars, %s\n",
13635 			dst, op, err);
13636 		break;
13637 	case REASON_LIMIT:
13638 		verbose(env, "R%d tried to %s beyond pointer bounds, %s\n",
13639 			dst, op, err);
13640 		break;
13641 	case REASON_STACK:
13642 		verbose(env, "R%d could not be pushed for speculative verification, %s\n",
13643 			dst, err);
13644 		return -ENOMEM;
13645 	default:
13646 		verifier_bug(env, "unknown reason (%d)", reason);
13647 		break;
13648 	}
13649 
13650 	return -EACCES;
13651 }
13652 
13653 /* check that stack access falls within stack limits and that 'reg' doesn't
13654  * have a variable offset.
13655  *
13656  * Variable offset is prohibited for unprivileged mode for simplicity since it
13657  * requires corresponding support in Spectre masking for stack ALU.  See also
13658  * retrieve_ptr_limit().
13659  */
check_stack_access_for_ptr_arithmetic(struct bpf_verifier_env * env,int regno,const struct bpf_reg_state * reg,int off)13660 static int check_stack_access_for_ptr_arithmetic(
13661 				struct bpf_verifier_env *env,
13662 				int regno,
13663 				const struct bpf_reg_state *reg,
13664 				int off)
13665 {
13666 	if (!tnum_is_const(reg->var_off)) {
13667 		char tn_buf[48];
13668 
13669 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
13670 		verbose(env, "R%d variable stack access prohibited for !root, var_off=%s off=%d\n",
13671 			regno, tn_buf, off);
13672 		return -EACCES;
13673 	}
13674 
13675 	if (off >= 0 || off < -MAX_BPF_STACK) {
13676 		verbose(env, "R%d stack pointer arithmetic goes out of range, "
13677 			"prohibited for !root; off=%d\n", regno, off);
13678 		return -EACCES;
13679 	}
13680 
13681 	return 0;
13682 }
13683 
sanitize_check_bounds(struct bpf_verifier_env * env,const struct bpf_insn * insn,const struct bpf_reg_state * dst_reg)13684 static int sanitize_check_bounds(struct bpf_verifier_env *env,
13685 				 const struct bpf_insn *insn,
13686 				 const struct bpf_reg_state *dst_reg)
13687 {
13688 	u32 dst = insn->dst_reg;
13689 
13690 	/* For unprivileged we require that resulting offset must be in bounds
13691 	 * in order to be able to sanitize access later on.
13692 	 */
13693 	if (env->bypass_spec_v1)
13694 		return 0;
13695 
13696 	switch (dst_reg->type) {
13697 	case PTR_TO_STACK:
13698 		if (check_stack_access_for_ptr_arithmetic(env, dst, dst_reg,
13699 							  dst_reg->var_off.value))
13700 			return -EACCES;
13701 		break;
13702 	case PTR_TO_MAP_VALUE:
13703 		if (check_map_access(env, dst, 0, 1, false, ACCESS_HELPER)) {
13704 			verbose(env, "R%d pointer arithmetic of map value goes out of range, "
13705 				"prohibited for !root\n", dst);
13706 			return -EACCES;
13707 		}
13708 		break;
13709 	default:
13710 		return -EOPNOTSUPP;
13711 	}
13712 
13713 	return 0;
13714 }
13715 
13716 /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
13717  * Caller should also handle BPF_MOV case separately.
13718  * If we return -EACCES, caller may want to try again treating pointer as a
13719  * scalar.  So we only emit a diagnostic if !env->allow_ptr_leaks.
13720  */
adjust_ptr_min_max_vals(struct bpf_verifier_env * env,struct bpf_insn * insn,const struct bpf_reg_state * ptr_reg,const struct bpf_reg_state * off_reg)13721 static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
13722 				   struct bpf_insn *insn,
13723 				   const struct bpf_reg_state *ptr_reg,
13724 				   const struct bpf_reg_state *off_reg)
13725 {
13726 	struct bpf_verifier_state *vstate = env->cur_state;
13727 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
13728 	struct bpf_reg_state *regs = state->regs, *dst_reg;
13729 	bool known = tnum_is_const(off_reg->var_off);
13730 	s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
13731 	    smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
13732 	u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
13733 	    umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
13734 	struct bpf_sanitize_info info = {};
13735 	u8 opcode = BPF_OP(insn->code);
13736 	u32 dst = insn->dst_reg;
13737 	int ret, bounds_ret;
13738 
13739 	dst_reg = &regs[dst];
13740 
13741 	if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
13742 	    smin_val > smax_val || umin_val > umax_val) {
13743 		/* Taint dst register if offset had invalid bounds derived from
13744 		 * e.g. dead branches.
13745 		 */
13746 		__mark_reg_unknown(env, dst_reg);
13747 		return 0;
13748 	}
13749 
13750 	if (BPF_CLASS(insn->code) != BPF_ALU64) {
13751 		/* 32-bit ALU ops on pointers produce (meaningless) scalars */
13752 		if (opcode == BPF_SUB && env->allow_ptr_leaks) {
13753 			__mark_reg_unknown(env, dst_reg);
13754 			return 0;
13755 		}
13756 
13757 		verbose(env,
13758 			"R%d 32-bit pointer arithmetic prohibited\n",
13759 			dst);
13760 		return -EACCES;
13761 	}
13762 
13763 	if (ptr_reg->type & PTR_MAYBE_NULL) {
13764 		verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
13765 			dst, reg_type_str(env, ptr_reg->type));
13766 		return -EACCES;
13767 	}
13768 
13769 	/*
13770 	 * Accesses to untrusted PTR_TO_MEM are done through probe
13771 	 * instructions, hence no need to track offsets.
13772 	 */
13773 	if (base_type(ptr_reg->type) == PTR_TO_MEM && (ptr_reg->type & PTR_UNTRUSTED))
13774 		return 0;
13775 
13776 	switch (base_type(ptr_reg->type)) {
13777 	case PTR_TO_CTX:
13778 	case PTR_TO_MAP_VALUE:
13779 	case PTR_TO_MAP_KEY:
13780 	case PTR_TO_STACK:
13781 	case PTR_TO_PACKET_META:
13782 	case PTR_TO_PACKET:
13783 	case PTR_TO_TP_BUFFER:
13784 	case PTR_TO_BTF_ID:
13785 	case PTR_TO_MEM:
13786 	case PTR_TO_BUF:
13787 	case PTR_TO_FUNC:
13788 	case CONST_PTR_TO_DYNPTR:
13789 		break;
13790 	case PTR_TO_FLOW_KEYS:
13791 		if (known)
13792 			break;
13793 		fallthrough;
13794 	case CONST_PTR_TO_MAP:
13795 		/* smin_val represents the known value */
13796 		if (known && smin_val == 0 && opcode == BPF_ADD)
13797 			break;
13798 		fallthrough;
13799 	default:
13800 		verbose(env, "R%d pointer arithmetic on %s prohibited\n",
13801 			dst, reg_type_str(env, ptr_reg->type));
13802 		return -EACCES;
13803 	}
13804 
13805 	/* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
13806 	 * The id may be overwritten later if we create a new variable offset.
13807 	 */
13808 	dst_reg->type = ptr_reg->type;
13809 	dst_reg->id = ptr_reg->id;
13810 
13811 	if (!check_reg_sane_offset_scalar(env, off_reg, ptr_reg->type) ||
13812 	    !check_reg_sane_offset_ptr(env, ptr_reg, ptr_reg->type))
13813 		return -EINVAL;
13814 
13815 	/* pointer types do not carry 32-bit bounds at the moment. */
13816 	__mark_reg32_unbounded(dst_reg);
13817 
13818 	if (sanitize_needed(opcode)) {
13819 		ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg,
13820 				       &info, false);
13821 		if (ret < 0)
13822 			return sanitize_err(env, insn, ret, off_reg, dst_reg);
13823 	}
13824 
13825 	switch (opcode) {
13826 	case BPF_ADD:
13827 		/*
13828 		 * dst_reg gets the pointer type and since some positive
13829 		 * integer value was added to the pointer, give it a new 'id'
13830 		 * if it's a PTR_TO_PACKET.
13831 		 * this creates a new 'base' pointer, off_reg (variable) gets
13832 		 * added into the variable offset, and we copy the fixed offset
13833 		 * from ptr_reg.
13834 		 */
13835 		if (check_add_overflow(smin_ptr, smin_val, &dst_reg->smin_value) ||
13836 		    check_add_overflow(smax_ptr, smax_val, &dst_reg->smax_value)) {
13837 			dst_reg->smin_value = S64_MIN;
13838 			dst_reg->smax_value = S64_MAX;
13839 		}
13840 		if (check_add_overflow(umin_ptr, umin_val, &dst_reg->umin_value) ||
13841 		    check_add_overflow(umax_ptr, umax_val, &dst_reg->umax_value)) {
13842 			dst_reg->umin_value = 0;
13843 			dst_reg->umax_value = U64_MAX;
13844 		}
13845 		dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
13846 		dst_reg->raw = ptr_reg->raw;
13847 		if (reg_is_pkt_pointer(ptr_reg)) {
13848 			if (!known)
13849 				dst_reg->id = ++env->id_gen;
13850 			/*
13851 			 * Clear range for unknown addends since we can't know
13852 			 * where the pkt pointer ended up. Also clear AT_PKT_END /
13853 			 * BEYOND_PKT_END from prior comparison as any pointer
13854 			 * arithmetic invalidates them.
13855 			 */
13856 			if (!known || dst_reg->range < 0)
13857 				memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
13858 		}
13859 		break;
13860 	case BPF_SUB:
13861 		if (dst_reg == off_reg) {
13862 			/* scalar -= pointer.  Creates an unknown scalar */
13863 			verbose(env, "R%d tried to subtract pointer from scalar\n",
13864 				dst);
13865 			return -EACCES;
13866 		}
13867 		/* We don't allow subtraction from FP, because (according to
13868 		 * test_verifier.c test "invalid fp arithmetic", JITs might not
13869 		 * be able to deal with it.
13870 		 */
13871 		if (ptr_reg->type == PTR_TO_STACK) {
13872 			verbose(env, "R%d subtraction from stack pointer prohibited\n",
13873 				dst);
13874 			return -EACCES;
13875 		}
13876 		/* A new variable offset is created.  If the subtrahend is known
13877 		 * nonnegative, then any reg->range we had before is still good.
13878 		 */
13879 		if (check_sub_overflow(smin_ptr, smax_val, &dst_reg->smin_value) ||
13880 		    check_sub_overflow(smax_ptr, smin_val, &dst_reg->smax_value)) {
13881 			/* Overflow possible, we know nothing */
13882 			dst_reg->smin_value = S64_MIN;
13883 			dst_reg->smax_value = S64_MAX;
13884 		}
13885 		if (umin_ptr < umax_val) {
13886 			/* Overflow possible, we know nothing */
13887 			dst_reg->umin_value = 0;
13888 			dst_reg->umax_value = U64_MAX;
13889 		} else {
13890 			/* Cannot overflow (as long as bounds are consistent) */
13891 			dst_reg->umin_value = umin_ptr - umax_val;
13892 			dst_reg->umax_value = umax_ptr - umin_val;
13893 		}
13894 		dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
13895 		dst_reg->raw = ptr_reg->raw;
13896 		if (reg_is_pkt_pointer(ptr_reg)) {
13897 			if (!known)
13898 				dst_reg->id = ++env->id_gen;
13899 			/*
13900 			 * Clear range if the subtrahend may be negative since
13901 			 * pkt pointer could move past its bounds. A positive
13902 			 * subtrahend moves it backwards keeping positive range
13903 			 * intact. Also clear AT_PKT_END / BEYOND_PKT_END from
13904 			 * prior comparison as arithmetic invalidates them.
13905 			 */
13906 			if ((!known && smin_val < 0) || dst_reg->range < 0)
13907 				memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
13908 		}
13909 		break;
13910 	case BPF_AND:
13911 	case BPF_OR:
13912 	case BPF_XOR:
13913 		/* bitwise ops on pointers are troublesome, prohibit. */
13914 		verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
13915 			dst, bpf_alu_string[opcode >> 4]);
13916 		return -EACCES;
13917 	default:
13918 		/* other operators (e.g. MUL,LSH) produce non-pointer results */
13919 		verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
13920 			dst, bpf_alu_string[opcode >> 4]);
13921 		return -EACCES;
13922 	}
13923 
13924 	if (!check_reg_sane_offset_ptr(env, dst_reg, ptr_reg->type))
13925 		return -EINVAL;
13926 	reg_bounds_sync(dst_reg);
13927 	bounds_ret = sanitize_check_bounds(env, insn, dst_reg);
13928 	if (bounds_ret == -EACCES)
13929 		return bounds_ret;
13930 	if (sanitize_needed(opcode)) {
13931 		ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg,
13932 				       &info, true);
13933 		if (verifier_bug_if(!can_skip_alu_sanitation(env, insn)
13934 				    && !env->cur_state->speculative
13935 				    && bounds_ret
13936 				    && !ret,
13937 				    env, "Pointer type unsupported by sanitize_check_bounds() not rejected by retrieve_ptr_limit() as required")) {
13938 			return -EFAULT;
13939 		}
13940 		if (ret < 0)
13941 			return sanitize_err(env, insn, ret, off_reg, dst_reg);
13942 	}
13943 
13944 	return 0;
13945 }
13946 
scalar32_min_max_add(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)13947 static void scalar32_min_max_add(struct bpf_reg_state *dst_reg,
13948 				 struct bpf_reg_state *src_reg)
13949 {
13950 	s32 *dst_smin = &dst_reg->s32_min_value;
13951 	s32 *dst_smax = &dst_reg->s32_max_value;
13952 	u32 *dst_umin = &dst_reg->u32_min_value;
13953 	u32 *dst_umax = &dst_reg->u32_max_value;
13954 	u32 umin_val = src_reg->u32_min_value;
13955 	u32 umax_val = src_reg->u32_max_value;
13956 	bool min_overflow, max_overflow;
13957 
13958 	if (check_add_overflow(*dst_smin, src_reg->s32_min_value, dst_smin) ||
13959 	    check_add_overflow(*dst_smax, src_reg->s32_max_value, dst_smax)) {
13960 		*dst_smin = S32_MIN;
13961 		*dst_smax = S32_MAX;
13962 	}
13963 
13964 	/* If either all additions overflow or no additions overflow, then
13965 	 * it is okay to set: dst_umin = dst_umin + src_umin, dst_umax =
13966 	 * dst_umax + src_umax. Otherwise (some additions overflow), set
13967 	 * the output bounds to unbounded.
13968 	 */
13969 	min_overflow = check_add_overflow(*dst_umin, umin_val, dst_umin);
13970 	max_overflow = check_add_overflow(*dst_umax, umax_val, dst_umax);
13971 
13972 	if (!min_overflow && max_overflow) {
13973 		*dst_umin = 0;
13974 		*dst_umax = U32_MAX;
13975 	}
13976 }
13977 
scalar_min_max_add(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)13978 static void scalar_min_max_add(struct bpf_reg_state *dst_reg,
13979 			       struct bpf_reg_state *src_reg)
13980 {
13981 	s64 *dst_smin = &dst_reg->smin_value;
13982 	s64 *dst_smax = &dst_reg->smax_value;
13983 	u64 *dst_umin = &dst_reg->umin_value;
13984 	u64 *dst_umax = &dst_reg->umax_value;
13985 	u64 umin_val = src_reg->umin_value;
13986 	u64 umax_val = src_reg->umax_value;
13987 	bool min_overflow, max_overflow;
13988 
13989 	if (check_add_overflow(*dst_smin, src_reg->smin_value, dst_smin) ||
13990 	    check_add_overflow(*dst_smax, src_reg->smax_value, dst_smax)) {
13991 		*dst_smin = S64_MIN;
13992 		*dst_smax = S64_MAX;
13993 	}
13994 
13995 	/* If either all additions overflow or no additions overflow, then
13996 	 * it is okay to set: dst_umin = dst_umin + src_umin, dst_umax =
13997 	 * dst_umax + src_umax. Otherwise (some additions overflow), set
13998 	 * the output bounds to unbounded.
13999 	 */
14000 	min_overflow = check_add_overflow(*dst_umin, umin_val, dst_umin);
14001 	max_overflow = check_add_overflow(*dst_umax, umax_val, dst_umax);
14002 
14003 	if (!min_overflow && max_overflow) {
14004 		*dst_umin = 0;
14005 		*dst_umax = U64_MAX;
14006 	}
14007 }
14008 
scalar32_min_max_sub(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14009 static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg,
14010 				 struct bpf_reg_state *src_reg)
14011 {
14012 	s32 *dst_smin = &dst_reg->s32_min_value;
14013 	s32 *dst_smax = &dst_reg->s32_max_value;
14014 	u32 *dst_umin = &dst_reg->u32_min_value;
14015 	u32 *dst_umax = &dst_reg->u32_max_value;
14016 	u32 umin_val = src_reg->u32_min_value;
14017 	u32 umax_val = src_reg->u32_max_value;
14018 	bool min_underflow, max_underflow;
14019 
14020 	if (check_sub_overflow(*dst_smin, src_reg->s32_max_value, dst_smin) ||
14021 	    check_sub_overflow(*dst_smax, src_reg->s32_min_value, dst_smax)) {
14022 		/* Overflow possible, we know nothing */
14023 		*dst_smin = S32_MIN;
14024 		*dst_smax = S32_MAX;
14025 	}
14026 
14027 	/* If either all subtractions underflow or no subtractions
14028 	 * underflow, it is okay to set: dst_umin = dst_umin - src_umax,
14029 	 * dst_umax = dst_umax - src_umin. Otherwise (some subtractions
14030 	 * underflow), set the output bounds to unbounded.
14031 	 */
14032 	min_underflow = check_sub_overflow(*dst_umin, umax_val, dst_umin);
14033 	max_underflow = check_sub_overflow(*dst_umax, umin_val, dst_umax);
14034 
14035 	if (min_underflow && !max_underflow) {
14036 		*dst_umin = 0;
14037 		*dst_umax = U32_MAX;
14038 	}
14039 }
14040 
scalar_min_max_sub(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14041 static void scalar_min_max_sub(struct bpf_reg_state *dst_reg,
14042 			       struct bpf_reg_state *src_reg)
14043 {
14044 	s64 *dst_smin = &dst_reg->smin_value;
14045 	s64 *dst_smax = &dst_reg->smax_value;
14046 	u64 *dst_umin = &dst_reg->umin_value;
14047 	u64 *dst_umax = &dst_reg->umax_value;
14048 	u64 umin_val = src_reg->umin_value;
14049 	u64 umax_val = src_reg->umax_value;
14050 	bool min_underflow, max_underflow;
14051 
14052 	if (check_sub_overflow(*dst_smin, src_reg->smax_value, dst_smin) ||
14053 	    check_sub_overflow(*dst_smax, src_reg->smin_value, dst_smax)) {
14054 		/* Overflow possible, we know nothing */
14055 		*dst_smin = S64_MIN;
14056 		*dst_smax = S64_MAX;
14057 	}
14058 
14059 	/* If either all subtractions underflow or no subtractions
14060 	 * underflow, it is okay to set: dst_umin = dst_umin - src_umax,
14061 	 * dst_umax = dst_umax - src_umin. Otherwise (some subtractions
14062 	 * underflow), set the output bounds to unbounded.
14063 	 */
14064 	min_underflow = check_sub_overflow(*dst_umin, umax_val, dst_umin);
14065 	max_underflow = check_sub_overflow(*dst_umax, umin_val, dst_umax);
14066 
14067 	if (min_underflow && !max_underflow) {
14068 		*dst_umin = 0;
14069 		*dst_umax = U64_MAX;
14070 	}
14071 }
14072 
scalar32_min_max_mul(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14073 static void scalar32_min_max_mul(struct bpf_reg_state *dst_reg,
14074 				 struct bpf_reg_state *src_reg)
14075 {
14076 	s32 *dst_smin = &dst_reg->s32_min_value;
14077 	s32 *dst_smax = &dst_reg->s32_max_value;
14078 	u32 *dst_umin = &dst_reg->u32_min_value;
14079 	u32 *dst_umax = &dst_reg->u32_max_value;
14080 	s32 tmp_prod[4];
14081 
14082 	if (check_mul_overflow(*dst_umax, src_reg->u32_max_value, dst_umax) ||
14083 	    check_mul_overflow(*dst_umin, src_reg->u32_min_value, dst_umin)) {
14084 		/* Overflow possible, we know nothing */
14085 		*dst_umin = 0;
14086 		*dst_umax = U32_MAX;
14087 	}
14088 	if (check_mul_overflow(*dst_smin, src_reg->s32_min_value, &tmp_prod[0]) ||
14089 	    check_mul_overflow(*dst_smin, src_reg->s32_max_value, &tmp_prod[1]) ||
14090 	    check_mul_overflow(*dst_smax, src_reg->s32_min_value, &tmp_prod[2]) ||
14091 	    check_mul_overflow(*dst_smax, src_reg->s32_max_value, &tmp_prod[3])) {
14092 		/* Overflow possible, we know nothing */
14093 		*dst_smin = S32_MIN;
14094 		*dst_smax = S32_MAX;
14095 	} else {
14096 		*dst_smin = min_array(tmp_prod, 4);
14097 		*dst_smax = max_array(tmp_prod, 4);
14098 	}
14099 }
14100 
scalar_min_max_mul(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14101 static void scalar_min_max_mul(struct bpf_reg_state *dst_reg,
14102 			       struct bpf_reg_state *src_reg)
14103 {
14104 	s64 *dst_smin = &dst_reg->smin_value;
14105 	s64 *dst_smax = &dst_reg->smax_value;
14106 	u64 *dst_umin = &dst_reg->umin_value;
14107 	u64 *dst_umax = &dst_reg->umax_value;
14108 	s64 tmp_prod[4];
14109 
14110 	if (check_mul_overflow(*dst_umax, src_reg->umax_value, dst_umax) ||
14111 	    check_mul_overflow(*dst_umin, src_reg->umin_value, dst_umin)) {
14112 		/* Overflow possible, we know nothing */
14113 		*dst_umin = 0;
14114 		*dst_umax = U64_MAX;
14115 	}
14116 	if (check_mul_overflow(*dst_smin, src_reg->smin_value, &tmp_prod[0]) ||
14117 	    check_mul_overflow(*dst_smin, src_reg->smax_value, &tmp_prod[1]) ||
14118 	    check_mul_overflow(*dst_smax, src_reg->smin_value, &tmp_prod[2]) ||
14119 	    check_mul_overflow(*dst_smax, src_reg->smax_value, &tmp_prod[3])) {
14120 		/* Overflow possible, we know nothing */
14121 		*dst_smin = S64_MIN;
14122 		*dst_smax = S64_MAX;
14123 	} else {
14124 		*dst_smin = min_array(tmp_prod, 4);
14125 		*dst_smax = max_array(tmp_prod, 4);
14126 	}
14127 }
14128 
scalar32_min_max_udiv(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14129 static void scalar32_min_max_udiv(struct bpf_reg_state *dst_reg,
14130 				  struct bpf_reg_state *src_reg)
14131 {
14132 	u32 *dst_umin = &dst_reg->u32_min_value;
14133 	u32 *dst_umax = &dst_reg->u32_max_value;
14134 	u32 src_val = src_reg->u32_min_value; /* non-zero, const divisor */
14135 
14136 	*dst_umin = *dst_umin / src_val;
14137 	*dst_umax = *dst_umax / src_val;
14138 
14139 	/* Reset other ranges/tnum to unbounded/unknown. */
14140 	dst_reg->s32_min_value = S32_MIN;
14141 	dst_reg->s32_max_value = S32_MAX;
14142 	reset_reg64_and_tnum(dst_reg);
14143 }
14144 
scalar_min_max_udiv(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14145 static void scalar_min_max_udiv(struct bpf_reg_state *dst_reg,
14146 				struct bpf_reg_state *src_reg)
14147 {
14148 	u64 *dst_umin = &dst_reg->umin_value;
14149 	u64 *dst_umax = &dst_reg->umax_value;
14150 	u64 src_val = src_reg->umin_value; /* non-zero, const divisor */
14151 
14152 	*dst_umin = div64_u64(*dst_umin, src_val);
14153 	*dst_umax = div64_u64(*dst_umax, src_val);
14154 
14155 	/* Reset other ranges/tnum to unbounded/unknown. */
14156 	dst_reg->smin_value = S64_MIN;
14157 	dst_reg->smax_value = S64_MAX;
14158 	reset_reg32_and_tnum(dst_reg);
14159 }
14160 
scalar32_min_max_sdiv(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14161 static void scalar32_min_max_sdiv(struct bpf_reg_state *dst_reg,
14162 				  struct bpf_reg_state *src_reg)
14163 {
14164 	s32 *dst_smin = &dst_reg->s32_min_value;
14165 	s32 *dst_smax = &dst_reg->s32_max_value;
14166 	s32 src_val = src_reg->s32_min_value; /* non-zero, const divisor */
14167 	s32 res1, res2;
14168 
14169 	/* BPF div specification: S32_MIN / -1 = S32_MIN */
14170 	if (*dst_smin == S32_MIN && src_val == -1) {
14171 		/*
14172 		 * If the dividend range contains more than just S32_MIN,
14173 		 * we cannot precisely track the result, so it becomes unbounded.
14174 		 * e.g., [S32_MIN, S32_MIN+10]/(-1),
14175 		 *     = {S32_MIN} U [-(S32_MIN+10), -(S32_MIN+1)]
14176 		 *     = {S32_MIN} U [S32_MAX-9, S32_MAX] = [S32_MIN, S32_MAX]
14177 		 * Otherwise (if dividend is exactly S32_MIN), result remains S32_MIN.
14178 		 */
14179 		if (*dst_smax != S32_MIN) {
14180 			*dst_smin = S32_MIN;
14181 			*dst_smax = S32_MAX;
14182 		}
14183 		goto reset;
14184 	}
14185 
14186 	res1 = *dst_smin / src_val;
14187 	res2 = *dst_smax / src_val;
14188 	*dst_smin = min(res1, res2);
14189 	*dst_smax = max(res1, res2);
14190 
14191 reset:
14192 	/* Reset other ranges/tnum to unbounded/unknown. */
14193 	dst_reg->u32_min_value = 0;
14194 	dst_reg->u32_max_value = U32_MAX;
14195 	reset_reg64_and_tnum(dst_reg);
14196 }
14197 
scalar_min_max_sdiv(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14198 static void scalar_min_max_sdiv(struct bpf_reg_state *dst_reg,
14199 				struct bpf_reg_state *src_reg)
14200 {
14201 	s64 *dst_smin = &dst_reg->smin_value;
14202 	s64 *dst_smax = &dst_reg->smax_value;
14203 	s64 src_val = src_reg->smin_value; /* non-zero, const divisor */
14204 	s64 res1, res2;
14205 
14206 	/* BPF div specification: S64_MIN / -1 = S64_MIN */
14207 	if (*dst_smin == S64_MIN && src_val == -1) {
14208 		/*
14209 		 * If the dividend range contains more than just S64_MIN,
14210 		 * we cannot precisely track the result, so it becomes unbounded.
14211 		 * e.g., [S64_MIN, S64_MIN+10]/(-1),
14212 		 *     = {S64_MIN} U [-(S64_MIN+10), -(S64_MIN+1)]
14213 		 *     = {S64_MIN} U [S64_MAX-9, S64_MAX] = [S64_MIN, S64_MAX]
14214 		 * Otherwise (if dividend is exactly S64_MIN), result remains S64_MIN.
14215 		 */
14216 		if (*dst_smax != S64_MIN) {
14217 			*dst_smin = S64_MIN;
14218 			*dst_smax = S64_MAX;
14219 		}
14220 		goto reset;
14221 	}
14222 
14223 	res1 = div64_s64(*dst_smin, src_val);
14224 	res2 = div64_s64(*dst_smax, src_val);
14225 	*dst_smin = min(res1, res2);
14226 	*dst_smax = max(res1, res2);
14227 
14228 reset:
14229 	/* Reset other ranges/tnum to unbounded/unknown. */
14230 	dst_reg->umin_value = 0;
14231 	dst_reg->umax_value = U64_MAX;
14232 	reset_reg32_and_tnum(dst_reg);
14233 }
14234 
scalar32_min_max_umod(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14235 static void scalar32_min_max_umod(struct bpf_reg_state *dst_reg,
14236 				  struct bpf_reg_state *src_reg)
14237 {
14238 	u32 *dst_umin = &dst_reg->u32_min_value;
14239 	u32 *dst_umax = &dst_reg->u32_max_value;
14240 	u32 src_val = src_reg->u32_min_value; /* non-zero, const divisor */
14241 	u32 res_max = src_val - 1;
14242 
14243 	/*
14244 	 * If dst_umax <= res_max, the result remains unchanged.
14245 	 * e.g., [2, 5] % 10 = [2, 5].
14246 	 */
14247 	if (*dst_umax <= res_max)
14248 		return;
14249 
14250 	*dst_umin = 0;
14251 	*dst_umax = min(*dst_umax, res_max);
14252 
14253 	/* Reset other ranges/tnum to unbounded/unknown. */
14254 	dst_reg->s32_min_value = S32_MIN;
14255 	dst_reg->s32_max_value = S32_MAX;
14256 	reset_reg64_and_tnum(dst_reg);
14257 }
14258 
scalar_min_max_umod(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14259 static void scalar_min_max_umod(struct bpf_reg_state *dst_reg,
14260 				struct bpf_reg_state *src_reg)
14261 {
14262 	u64 *dst_umin = &dst_reg->umin_value;
14263 	u64 *dst_umax = &dst_reg->umax_value;
14264 	u64 src_val = src_reg->umin_value; /* non-zero, const divisor */
14265 	u64 res_max = src_val - 1;
14266 
14267 	/*
14268 	 * If dst_umax <= res_max, the result remains unchanged.
14269 	 * e.g., [2, 5] % 10 = [2, 5].
14270 	 */
14271 	if (*dst_umax <= res_max)
14272 		return;
14273 
14274 	*dst_umin = 0;
14275 	*dst_umax = min(*dst_umax, res_max);
14276 
14277 	/* Reset other ranges/tnum to unbounded/unknown. */
14278 	dst_reg->smin_value = S64_MIN;
14279 	dst_reg->smax_value = S64_MAX;
14280 	reset_reg32_and_tnum(dst_reg);
14281 }
14282 
scalar32_min_max_smod(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14283 static void scalar32_min_max_smod(struct bpf_reg_state *dst_reg,
14284 				  struct bpf_reg_state *src_reg)
14285 {
14286 	s32 *dst_smin = &dst_reg->s32_min_value;
14287 	s32 *dst_smax = &dst_reg->s32_max_value;
14288 	s32 src_val = src_reg->s32_min_value; /* non-zero, const divisor */
14289 
14290 	/*
14291 	 * Safe absolute value calculation:
14292 	 * If src_val == S32_MIN (-2147483648), src_abs becomes 2147483648.
14293 	 * Here use unsigned integer to avoid overflow.
14294 	 */
14295 	u32 src_abs = (src_val > 0) ? (u32)src_val : -(u32)src_val;
14296 
14297 	/*
14298 	 * Calculate the maximum possible absolute value of the result.
14299 	 * Even if src_abs is 2147483648 (S32_MIN), subtracting 1 gives
14300 	 * 2147483647 (S32_MAX), which fits perfectly in s32.
14301 	 */
14302 	s32 res_max_abs = src_abs - 1;
14303 
14304 	/*
14305 	 * If the dividend is already within the result range,
14306 	 * the result remains unchanged. e.g., [-2, 5] % 10 = [-2, 5].
14307 	 */
14308 	if (*dst_smin >= -res_max_abs && *dst_smax <= res_max_abs)
14309 		return;
14310 
14311 	/* General case: result has the same sign as the dividend. */
14312 	if (*dst_smin >= 0) {
14313 		*dst_smin = 0;
14314 		*dst_smax = min(*dst_smax, res_max_abs);
14315 	} else if (*dst_smax <= 0) {
14316 		*dst_smax = 0;
14317 		*dst_smin = max(*dst_smin, -res_max_abs);
14318 	} else {
14319 		*dst_smin = -res_max_abs;
14320 		*dst_smax = res_max_abs;
14321 	}
14322 
14323 	/* Reset other ranges/tnum to unbounded/unknown. */
14324 	dst_reg->u32_min_value = 0;
14325 	dst_reg->u32_max_value = U32_MAX;
14326 	reset_reg64_and_tnum(dst_reg);
14327 }
14328 
scalar_min_max_smod(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14329 static void scalar_min_max_smod(struct bpf_reg_state *dst_reg,
14330 				struct bpf_reg_state *src_reg)
14331 {
14332 	s64 *dst_smin = &dst_reg->smin_value;
14333 	s64 *dst_smax = &dst_reg->smax_value;
14334 	s64 src_val = src_reg->smin_value; /* non-zero, const divisor */
14335 
14336 	/*
14337 	 * Safe absolute value calculation:
14338 	 * If src_val == S64_MIN (-2^63), src_abs becomes 2^63.
14339 	 * Here use unsigned integer to avoid overflow.
14340 	 */
14341 	u64 src_abs = (src_val > 0) ? (u64)src_val : -(u64)src_val;
14342 
14343 	/*
14344 	 * Calculate the maximum possible absolute value of the result.
14345 	 * Even if src_abs is 2^63 (S64_MIN), subtracting 1 gives
14346 	 * 2^63 - 1 (S64_MAX), which fits perfectly in s64.
14347 	 */
14348 	s64 res_max_abs = src_abs - 1;
14349 
14350 	/*
14351 	 * If the dividend is already within the result range,
14352 	 * the result remains unchanged. e.g., [-2, 5] % 10 = [-2, 5].
14353 	 */
14354 	if (*dst_smin >= -res_max_abs && *dst_smax <= res_max_abs)
14355 		return;
14356 
14357 	/* General case: result has the same sign as the dividend. */
14358 	if (*dst_smin >= 0) {
14359 		*dst_smin = 0;
14360 		*dst_smax = min(*dst_smax, res_max_abs);
14361 	} else if (*dst_smax <= 0) {
14362 		*dst_smax = 0;
14363 		*dst_smin = max(*dst_smin, -res_max_abs);
14364 	} else {
14365 		*dst_smin = -res_max_abs;
14366 		*dst_smax = res_max_abs;
14367 	}
14368 
14369 	/* Reset other ranges/tnum to unbounded/unknown. */
14370 	dst_reg->umin_value = 0;
14371 	dst_reg->umax_value = U64_MAX;
14372 	reset_reg32_and_tnum(dst_reg);
14373 }
14374 
scalar32_min_max_and(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14375 static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
14376 				 struct bpf_reg_state *src_reg)
14377 {
14378 	bool src_known = tnum_subreg_is_const(src_reg->var_off);
14379 	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
14380 	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
14381 	u32 umax_val = src_reg->u32_max_value;
14382 
14383 	if (src_known && dst_known) {
14384 		__mark_reg32_known(dst_reg, var32_off.value);
14385 		return;
14386 	}
14387 
14388 	/* We get our minimum from the var_off, since that's inherently
14389 	 * bitwise.  Our maximum is the minimum of the operands' maxima.
14390 	 */
14391 	dst_reg->u32_min_value = var32_off.value;
14392 	dst_reg->u32_max_value = min(dst_reg->u32_max_value, umax_val);
14393 
14394 	/* Safe to set s32 bounds by casting u32 result into s32 when u32
14395 	 * doesn't cross sign boundary. Otherwise set s32 bounds to unbounded.
14396 	 */
14397 	if ((s32)dst_reg->u32_min_value <= (s32)dst_reg->u32_max_value) {
14398 		dst_reg->s32_min_value = dst_reg->u32_min_value;
14399 		dst_reg->s32_max_value = dst_reg->u32_max_value;
14400 	} else {
14401 		dst_reg->s32_min_value = S32_MIN;
14402 		dst_reg->s32_max_value = S32_MAX;
14403 	}
14404 }
14405 
scalar_min_max_and(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14406 static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
14407 			       struct bpf_reg_state *src_reg)
14408 {
14409 	bool src_known = tnum_is_const(src_reg->var_off);
14410 	bool dst_known = tnum_is_const(dst_reg->var_off);
14411 	u64 umax_val = src_reg->umax_value;
14412 
14413 	if (src_known && dst_known) {
14414 		__mark_reg_known(dst_reg, dst_reg->var_off.value);
14415 		return;
14416 	}
14417 
14418 	/* We get our minimum from the var_off, since that's inherently
14419 	 * bitwise.  Our maximum is the minimum of the operands' maxima.
14420 	 */
14421 	dst_reg->umin_value = dst_reg->var_off.value;
14422 	dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
14423 
14424 	/* Safe to set s64 bounds by casting u64 result into s64 when u64
14425 	 * doesn't cross sign boundary. Otherwise set s64 bounds to unbounded.
14426 	 */
14427 	if ((s64)dst_reg->umin_value <= (s64)dst_reg->umax_value) {
14428 		dst_reg->smin_value = dst_reg->umin_value;
14429 		dst_reg->smax_value = dst_reg->umax_value;
14430 	} else {
14431 		dst_reg->smin_value = S64_MIN;
14432 		dst_reg->smax_value = S64_MAX;
14433 	}
14434 	/* We may learn something more from the var_off */
14435 	__update_reg_bounds(dst_reg);
14436 }
14437 
scalar32_min_max_or(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14438 static void scalar32_min_max_or(struct bpf_reg_state *dst_reg,
14439 				struct bpf_reg_state *src_reg)
14440 {
14441 	bool src_known = tnum_subreg_is_const(src_reg->var_off);
14442 	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
14443 	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
14444 	u32 umin_val = src_reg->u32_min_value;
14445 
14446 	if (src_known && dst_known) {
14447 		__mark_reg32_known(dst_reg, var32_off.value);
14448 		return;
14449 	}
14450 
14451 	/* We get our maximum from the var_off, and our minimum is the
14452 	 * maximum of the operands' minima
14453 	 */
14454 	dst_reg->u32_min_value = max(dst_reg->u32_min_value, umin_val);
14455 	dst_reg->u32_max_value = var32_off.value | var32_off.mask;
14456 
14457 	/* Safe to set s32 bounds by casting u32 result into s32 when u32
14458 	 * doesn't cross sign boundary. Otherwise set s32 bounds to unbounded.
14459 	 */
14460 	if ((s32)dst_reg->u32_min_value <= (s32)dst_reg->u32_max_value) {
14461 		dst_reg->s32_min_value = dst_reg->u32_min_value;
14462 		dst_reg->s32_max_value = dst_reg->u32_max_value;
14463 	} else {
14464 		dst_reg->s32_min_value = S32_MIN;
14465 		dst_reg->s32_max_value = S32_MAX;
14466 	}
14467 }
14468 
scalar_min_max_or(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14469 static void scalar_min_max_or(struct bpf_reg_state *dst_reg,
14470 			      struct bpf_reg_state *src_reg)
14471 {
14472 	bool src_known = tnum_is_const(src_reg->var_off);
14473 	bool dst_known = tnum_is_const(dst_reg->var_off);
14474 	u64 umin_val = src_reg->umin_value;
14475 
14476 	if (src_known && dst_known) {
14477 		__mark_reg_known(dst_reg, dst_reg->var_off.value);
14478 		return;
14479 	}
14480 
14481 	/* We get our maximum from the var_off, and our minimum is the
14482 	 * maximum of the operands' minima
14483 	 */
14484 	dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
14485 	dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
14486 
14487 	/* Safe to set s64 bounds by casting u64 result into s64 when u64
14488 	 * doesn't cross sign boundary. Otherwise set s64 bounds to unbounded.
14489 	 */
14490 	if ((s64)dst_reg->umin_value <= (s64)dst_reg->umax_value) {
14491 		dst_reg->smin_value = dst_reg->umin_value;
14492 		dst_reg->smax_value = dst_reg->umax_value;
14493 	} else {
14494 		dst_reg->smin_value = S64_MIN;
14495 		dst_reg->smax_value = S64_MAX;
14496 	}
14497 	/* We may learn something more from the var_off */
14498 	__update_reg_bounds(dst_reg);
14499 }
14500 
scalar32_min_max_xor(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14501 static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg,
14502 				 struct bpf_reg_state *src_reg)
14503 {
14504 	bool src_known = tnum_subreg_is_const(src_reg->var_off);
14505 	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
14506 	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
14507 
14508 	if (src_known && dst_known) {
14509 		__mark_reg32_known(dst_reg, var32_off.value);
14510 		return;
14511 	}
14512 
14513 	/* We get both minimum and maximum from the var32_off. */
14514 	dst_reg->u32_min_value = var32_off.value;
14515 	dst_reg->u32_max_value = var32_off.value | var32_off.mask;
14516 
14517 	/* Safe to set s32 bounds by casting u32 result into s32 when u32
14518 	 * doesn't cross sign boundary. Otherwise set s32 bounds to unbounded.
14519 	 */
14520 	if ((s32)dst_reg->u32_min_value <= (s32)dst_reg->u32_max_value) {
14521 		dst_reg->s32_min_value = dst_reg->u32_min_value;
14522 		dst_reg->s32_max_value = dst_reg->u32_max_value;
14523 	} else {
14524 		dst_reg->s32_min_value = S32_MIN;
14525 		dst_reg->s32_max_value = S32_MAX;
14526 	}
14527 }
14528 
scalar_min_max_xor(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14529 static void scalar_min_max_xor(struct bpf_reg_state *dst_reg,
14530 			       struct bpf_reg_state *src_reg)
14531 {
14532 	bool src_known = tnum_is_const(src_reg->var_off);
14533 	bool dst_known = tnum_is_const(dst_reg->var_off);
14534 
14535 	if (src_known && dst_known) {
14536 		/* dst_reg->var_off.value has been updated earlier */
14537 		__mark_reg_known(dst_reg, dst_reg->var_off.value);
14538 		return;
14539 	}
14540 
14541 	/* We get both minimum and maximum from the var_off. */
14542 	dst_reg->umin_value = dst_reg->var_off.value;
14543 	dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
14544 
14545 	/* Safe to set s64 bounds by casting u64 result into s64 when u64
14546 	 * doesn't cross sign boundary. Otherwise set s64 bounds to unbounded.
14547 	 */
14548 	if ((s64)dst_reg->umin_value <= (s64)dst_reg->umax_value) {
14549 		dst_reg->smin_value = dst_reg->umin_value;
14550 		dst_reg->smax_value = dst_reg->umax_value;
14551 	} else {
14552 		dst_reg->smin_value = S64_MIN;
14553 		dst_reg->smax_value = S64_MAX;
14554 	}
14555 
14556 	__update_reg_bounds(dst_reg);
14557 }
14558 
__scalar32_min_max_lsh(struct bpf_reg_state * dst_reg,u64 umin_val,u64 umax_val)14559 static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
14560 				   u64 umin_val, u64 umax_val)
14561 {
14562 	/* We lose all sign bit information (except what we can pick
14563 	 * up from var_off)
14564 	 */
14565 	dst_reg->s32_min_value = S32_MIN;
14566 	dst_reg->s32_max_value = S32_MAX;
14567 	/* If we might shift our top bit out, then we know nothing */
14568 	if (umax_val > 31 || dst_reg->u32_max_value > 1ULL << (31 - umax_val)) {
14569 		dst_reg->u32_min_value = 0;
14570 		dst_reg->u32_max_value = U32_MAX;
14571 	} else {
14572 		dst_reg->u32_min_value <<= umin_val;
14573 		dst_reg->u32_max_value <<= umax_val;
14574 	}
14575 }
14576 
scalar32_min_max_lsh(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14577 static void scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
14578 				 struct bpf_reg_state *src_reg)
14579 {
14580 	u32 umax_val = src_reg->u32_max_value;
14581 	u32 umin_val = src_reg->u32_min_value;
14582 	/* u32 alu operation will zext upper bits */
14583 	struct tnum subreg = tnum_subreg(dst_reg->var_off);
14584 
14585 	__scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
14586 	dst_reg->var_off = tnum_subreg(tnum_lshift(subreg, umin_val));
14587 	/* Not required but being careful mark reg64 bounds as unknown so
14588 	 * that we are forced to pick them up from tnum and zext later and
14589 	 * if some path skips this step we are still safe.
14590 	 */
14591 	__mark_reg64_unbounded(dst_reg);
14592 	__update_reg32_bounds(dst_reg);
14593 }
14594 
__scalar64_min_max_lsh(struct bpf_reg_state * dst_reg,u64 umin_val,u64 umax_val)14595 static void __scalar64_min_max_lsh(struct bpf_reg_state *dst_reg,
14596 				   u64 umin_val, u64 umax_val)
14597 {
14598 	/* Special case <<32 because it is a common compiler pattern to sign
14599 	 * extend subreg by doing <<32 s>>32. smin/smax assignments are correct
14600 	 * because s32 bounds don't flip sign when shifting to the left by
14601 	 * 32bits.
14602 	 */
14603 	if (umin_val == 32 && umax_val == 32) {
14604 		dst_reg->smax_value = (s64)dst_reg->s32_max_value << 32;
14605 		dst_reg->smin_value = (s64)dst_reg->s32_min_value << 32;
14606 	} else {
14607 		dst_reg->smax_value = S64_MAX;
14608 		dst_reg->smin_value = S64_MIN;
14609 	}
14610 
14611 	/* If we might shift our top bit out, then we know nothing */
14612 	if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
14613 		dst_reg->umin_value = 0;
14614 		dst_reg->umax_value = U64_MAX;
14615 	} else {
14616 		dst_reg->umin_value <<= umin_val;
14617 		dst_reg->umax_value <<= umax_val;
14618 	}
14619 }
14620 
scalar_min_max_lsh(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14621 static void scalar_min_max_lsh(struct bpf_reg_state *dst_reg,
14622 			       struct bpf_reg_state *src_reg)
14623 {
14624 	u64 umax_val = src_reg->umax_value;
14625 	u64 umin_val = src_reg->umin_value;
14626 
14627 	/* scalar64 calc uses 32bit unshifted bounds so must be called first */
14628 	__scalar64_min_max_lsh(dst_reg, umin_val, umax_val);
14629 	__scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
14630 
14631 	dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
14632 	/* We may learn something more from the var_off */
14633 	__update_reg_bounds(dst_reg);
14634 }
14635 
scalar32_min_max_rsh(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14636 static void scalar32_min_max_rsh(struct bpf_reg_state *dst_reg,
14637 				 struct bpf_reg_state *src_reg)
14638 {
14639 	struct tnum subreg = tnum_subreg(dst_reg->var_off);
14640 	u32 umax_val = src_reg->u32_max_value;
14641 	u32 umin_val = src_reg->u32_min_value;
14642 
14643 	/* BPF_RSH is an unsigned shift.  If the value in dst_reg might
14644 	 * be negative, then either:
14645 	 * 1) src_reg might be zero, so the sign bit of the result is
14646 	 *    unknown, so we lose our signed bounds
14647 	 * 2) it's known negative, thus the unsigned bounds capture the
14648 	 *    signed bounds
14649 	 * 3) the signed bounds cross zero, so they tell us nothing
14650 	 *    about the result
14651 	 * If the value in dst_reg is known nonnegative, then again the
14652 	 * unsigned bounds capture the signed bounds.
14653 	 * Thus, in all cases it suffices to blow away our signed bounds
14654 	 * and rely on inferring new ones from the unsigned bounds and
14655 	 * var_off of the result.
14656 	 */
14657 	dst_reg->s32_min_value = S32_MIN;
14658 	dst_reg->s32_max_value = S32_MAX;
14659 
14660 	dst_reg->var_off = tnum_rshift(subreg, umin_val);
14661 	dst_reg->u32_min_value >>= umax_val;
14662 	dst_reg->u32_max_value >>= umin_val;
14663 
14664 	__mark_reg64_unbounded(dst_reg);
14665 	__update_reg32_bounds(dst_reg);
14666 }
14667 
scalar_min_max_rsh(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14668 static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg,
14669 			       struct bpf_reg_state *src_reg)
14670 {
14671 	u64 umax_val = src_reg->umax_value;
14672 	u64 umin_val = src_reg->umin_value;
14673 
14674 	/* BPF_RSH is an unsigned shift.  If the value in dst_reg might
14675 	 * be negative, then either:
14676 	 * 1) src_reg might be zero, so the sign bit of the result is
14677 	 *    unknown, so we lose our signed bounds
14678 	 * 2) it's known negative, thus the unsigned bounds capture the
14679 	 *    signed bounds
14680 	 * 3) the signed bounds cross zero, so they tell us nothing
14681 	 *    about the result
14682 	 * If the value in dst_reg is known nonnegative, then again the
14683 	 * unsigned bounds capture the signed bounds.
14684 	 * Thus, in all cases it suffices to blow away our signed bounds
14685 	 * and rely on inferring new ones from the unsigned bounds and
14686 	 * var_off of the result.
14687 	 */
14688 	dst_reg->smin_value = S64_MIN;
14689 	dst_reg->smax_value = S64_MAX;
14690 	dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
14691 	dst_reg->umin_value >>= umax_val;
14692 	dst_reg->umax_value >>= umin_val;
14693 
14694 	/* Its not easy to operate on alu32 bounds here because it depends
14695 	 * on bits being shifted in. Take easy way out and mark unbounded
14696 	 * so we can recalculate later from tnum.
14697 	 */
14698 	__mark_reg32_unbounded(dst_reg);
14699 	__update_reg_bounds(dst_reg);
14700 }
14701 
scalar32_min_max_arsh(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14702 static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg,
14703 				  struct bpf_reg_state *src_reg)
14704 {
14705 	u64 umin_val = src_reg->u32_min_value;
14706 
14707 	/* Upon reaching here, src_known is true and
14708 	 * umax_val is equal to umin_val.
14709 	 */
14710 	dst_reg->s32_min_value = (u32)(((s32)dst_reg->s32_min_value) >> umin_val);
14711 	dst_reg->s32_max_value = (u32)(((s32)dst_reg->s32_max_value) >> umin_val);
14712 
14713 	dst_reg->var_off = tnum_arshift(tnum_subreg(dst_reg->var_off), umin_val, 32);
14714 
14715 	/* blow away the dst_reg umin_value/umax_value and rely on
14716 	 * dst_reg var_off to refine the result.
14717 	 */
14718 	dst_reg->u32_min_value = 0;
14719 	dst_reg->u32_max_value = U32_MAX;
14720 
14721 	__mark_reg64_unbounded(dst_reg);
14722 	__update_reg32_bounds(dst_reg);
14723 }
14724 
scalar_min_max_arsh(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)14725 static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg,
14726 				struct bpf_reg_state *src_reg)
14727 {
14728 	u64 umin_val = src_reg->umin_value;
14729 
14730 	/* Upon reaching here, src_known is true and umax_val is equal
14731 	 * to umin_val.
14732 	 */
14733 	dst_reg->smin_value >>= umin_val;
14734 	dst_reg->smax_value >>= umin_val;
14735 
14736 	dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, 64);
14737 
14738 	/* blow away the dst_reg umin_value/umax_value and rely on
14739 	 * dst_reg var_off to refine the result.
14740 	 */
14741 	dst_reg->umin_value = 0;
14742 	dst_reg->umax_value = U64_MAX;
14743 
14744 	/* Its not easy to operate on alu32 bounds here because it depends
14745 	 * on bits being shifted in from upper 32-bits. Take easy way out
14746 	 * and mark unbounded so we can recalculate later from tnum.
14747 	 */
14748 	__mark_reg32_unbounded(dst_reg);
14749 	__update_reg_bounds(dst_reg);
14750 }
14751 
scalar_byte_swap(struct bpf_reg_state * dst_reg,struct bpf_insn * insn)14752 static void scalar_byte_swap(struct bpf_reg_state *dst_reg, struct bpf_insn *insn)
14753 {
14754 	/*
14755 	 * Byte swap operation - update var_off using tnum_bswap.
14756 	 * Three cases:
14757 	 * 1. bswap(16|32|64): opcode=0xd7 (BPF_END | BPF_ALU64 | BPF_TO_LE)
14758 	 *    unconditional swap
14759 	 * 2. to_le(16|32|64): opcode=0xd4 (BPF_END | BPF_ALU | BPF_TO_LE)
14760 	 *    swap on big-endian, truncation or no-op on little-endian
14761 	 * 3. to_be(16|32|64): opcode=0xdc (BPF_END | BPF_ALU | BPF_TO_BE)
14762 	 *    swap on little-endian, truncation or no-op on big-endian
14763 	 */
14764 
14765 	bool alu64 = BPF_CLASS(insn->code) == BPF_ALU64;
14766 	bool to_le = BPF_SRC(insn->code) == BPF_TO_LE;
14767 	bool is_big_endian;
14768 #ifdef CONFIG_CPU_BIG_ENDIAN
14769 	is_big_endian = true;
14770 #else
14771 	is_big_endian = false;
14772 #endif
14773 	/* Apply bswap if alu64 or switch between big-endian and little-endian machines */
14774 	bool need_bswap = alu64 || (to_le == is_big_endian);
14775 
14776 	/*
14777 	 * If the register is mutated, manually reset its scalar ID to break
14778 	 * any existing ties and avoid incorrect bounds propagation.
14779 	 */
14780 	if (need_bswap || insn->imm == 16 || insn->imm == 32)
14781 		clear_scalar_id(dst_reg);
14782 
14783 	if (need_bswap) {
14784 		if (insn->imm == 16)
14785 			dst_reg->var_off = tnum_bswap16(dst_reg->var_off);
14786 		else if (insn->imm == 32)
14787 			dst_reg->var_off = tnum_bswap32(dst_reg->var_off);
14788 		else if (insn->imm == 64)
14789 			dst_reg->var_off = tnum_bswap64(dst_reg->var_off);
14790 		/*
14791 		 * Byteswap scrambles the range, so we must reset bounds.
14792 		 * Bounds will be re-derived from the new tnum later.
14793 		 */
14794 		__mark_reg_unbounded(dst_reg);
14795 	}
14796 	/* For bswap16/32, truncate dst register to match the swapped size */
14797 	if (insn->imm == 16 || insn->imm == 32)
14798 		coerce_reg_to_size(dst_reg, insn->imm / 8);
14799 }
14800 
is_safe_to_compute_dst_reg_range(struct bpf_insn * insn,const struct bpf_reg_state * src_reg)14801 static bool is_safe_to_compute_dst_reg_range(struct bpf_insn *insn,
14802 					     const struct bpf_reg_state *src_reg)
14803 {
14804 	bool src_is_const = false;
14805 	u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
14806 
14807 	if (insn_bitness == 32) {
14808 		if (tnum_subreg_is_const(src_reg->var_off)
14809 		    && src_reg->s32_min_value == src_reg->s32_max_value
14810 		    && src_reg->u32_min_value == src_reg->u32_max_value)
14811 			src_is_const = true;
14812 	} else {
14813 		if (tnum_is_const(src_reg->var_off)
14814 		    && src_reg->smin_value == src_reg->smax_value
14815 		    && src_reg->umin_value == src_reg->umax_value)
14816 			src_is_const = true;
14817 	}
14818 
14819 	switch (BPF_OP(insn->code)) {
14820 	case BPF_ADD:
14821 	case BPF_SUB:
14822 	case BPF_NEG:
14823 	case BPF_AND:
14824 	case BPF_XOR:
14825 	case BPF_OR:
14826 	case BPF_MUL:
14827 	case BPF_END:
14828 		return true;
14829 
14830 	/*
14831 	 * Division and modulo operators range is only safe to compute when the
14832 	 * divisor is a constant.
14833 	 */
14834 	case BPF_DIV:
14835 	case BPF_MOD:
14836 		return src_is_const;
14837 
14838 	/* Shift operators range is only computable if shift dimension operand
14839 	 * is a constant. Shifts greater than 31 or 63 are undefined. This
14840 	 * includes shifts by a negative number.
14841 	 */
14842 	case BPF_LSH:
14843 	case BPF_RSH:
14844 	case BPF_ARSH:
14845 		return (src_is_const && src_reg->umax_value < insn_bitness);
14846 	default:
14847 		return false;
14848 	}
14849 }
14850 
maybe_fork_scalars(struct bpf_verifier_env * env,struct bpf_insn * insn,struct bpf_reg_state * dst_reg)14851 static int maybe_fork_scalars(struct bpf_verifier_env *env, struct bpf_insn *insn,
14852 			      struct bpf_reg_state *dst_reg)
14853 {
14854 	struct bpf_verifier_state *branch;
14855 	struct bpf_reg_state *regs;
14856 	bool alu32;
14857 
14858 	if (dst_reg->smin_value == -1 && dst_reg->smax_value == 0)
14859 		alu32 = false;
14860 	else if (dst_reg->s32_min_value == -1 && dst_reg->s32_max_value == 0)
14861 		alu32 = true;
14862 	else
14863 		return 0;
14864 
14865 	branch = push_stack(env, env->insn_idx, env->insn_idx, false);
14866 	if (IS_ERR(branch))
14867 		return PTR_ERR(branch);
14868 
14869 	regs = branch->frame[branch->curframe]->regs;
14870 	if (alu32) {
14871 		__mark_reg32_known(&regs[insn->dst_reg], 0);
14872 		__mark_reg32_known(dst_reg, -1ull);
14873 	} else {
14874 		__mark_reg_known(&regs[insn->dst_reg], 0);
14875 		__mark_reg_known(dst_reg, -1ull);
14876 	}
14877 	return 0;
14878 }
14879 
14880 /* WARNING: This function does calculations on 64-bit values, but the actual
14881  * execution may occur on 32-bit values. Therefore, things like bitshifts
14882  * need extra checks in the 32-bit case.
14883  */
adjust_scalar_min_max_vals(struct bpf_verifier_env * env,struct bpf_insn * insn,struct bpf_reg_state * dst_reg,struct bpf_reg_state src_reg)14884 static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
14885 				      struct bpf_insn *insn,
14886 				      struct bpf_reg_state *dst_reg,
14887 				      struct bpf_reg_state src_reg)
14888 {
14889 	u8 opcode = BPF_OP(insn->code);
14890 	s16 off = insn->off;
14891 	bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
14892 	int ret;
14893 
14894 	if (!is_safe_to_compute_dst_reg_range(insn, &src_reg)) {
14895 		__mark_reg_unknown(env, dst_reg);
14896 		return 0;
14897 	}
14898 
14899 	if (sanitize_needed(opcode)) {
14900 		ret = sanitize_val_alu(env, insn);
14901 		if (ret < 0)
14902 			return sanitize_err(env, insn, ret, NULL, NULL);
14903 	}
14904 
14905 	/* Calculate sign/unsigned bounds and tnum for alu32 and alu64 bit ops.
14906 	 * There are two classes of instructions: The first class we track both
14907 	 * alu32 and alu64 sign/unsigned bounds independently this provides the
14908 	 * greatest amount of precision when alu operations are mixed with jmp32
14909 	 * operations. These operations are BPF_ADD, BPF_SUB, BPF_MUL, BPF_ADD,
14910 	 * and BPF_OR. This is possible because these ops have fairly easy to
14911 	 * understand and calculate behavior in both 32-bit and 64-bit alu ops.
14912 	 * See alu32 verifier tests for examples. The second class of
14913 	 * operations, BPF_LSH, BPF_RSH, and BPF_ARSH, however are not so easy
14914 	 * with regards to tracking sign/unsigned bounds because the bits may
14915 	 * cross subreg boundaries in the alu64 case. When this happens we mark
14916 	 * the reg unbounded in the subreg bound space and use the resulting
14917 	 * tnum to calculate an approximation of the sign/unsigned bounds.
14918 	 */
14919 	switch (opcode) {
14920 	case BPF_ADD:
14921 		scalar32_min_max_add(dst_reg, &src_reg);
14922 		scalar_min_max_add(dst_reg, &src_reg);
14923 		dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
14924 		break;
14925 	case BPF_SUB:
14926 		scalar32_min_max_sub(dst_reg, &src_reg);
14927 		scalar_min_max_sub(dst_reg, &src_reg);
14928 		dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
14929 		break;
14930 	case BPF_NEG:
14931 		env->fake_reg[0] = *dst_reg;
14932 		__mark_reg_known(dst_reg, 0);
14933 		scalar32_min_max_sub(dst_reg, &env->fake_reg[0]);
14934 		scalar_min_max_sub(dst_reg, &env->fake_reg[0]);
14935 		dst_reg->var_off = tnum_neg(env->fake_reg[0].var_off);
14936 		break;
14937 	case BPF_MUL:
14938 		dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
14939 		scalar32_min_max_mul(dst_reg, &src_reg);
14940 		scalar_min_max_mul(dst_reg, &src_reg);
14941 		break;
14942 	case BPF_DIV:
14943 		/* BPF div specification: x / 0 = 0 */
14944 		if ((alu32 && src_reg.u32_min_value == 0) || (!alu32 && src_reg.umin_value == 0)) {
14945 			___mark_reg_known(dst_reg, 0);
14946 			break;
14947 		}
14948 		if (alu32)
14949 			if (off == 1)
14950 				scalar32_min_max_sdiv(dst_reg, &src_reg);
14951 			else
14952 				scalar32_min_max_udiv(dst_reg, &src_reg);
14953 		else
14954 			if (off == 1)
14955 				scalar_min_max_sdiv(dst_reg, &src_reg);
14956 			else
14957 				scalar_min_max_udiv(dst_reg, &src_reg);
14958 		break;
14959 	case BPF_MOD:
14960 		/* BPF mod specification: x % 0 = x */
14961 		if ((alu32 && src_reg.u32_min_value == 0) || (!alu32 && src_reg.umin_value == 0))
14962 			break;
14963 		if (alu32)
14964 			if (off == 1)
14965 				scalar32_min_max_smod(dst_reg, &src_reg);
14966 			else
14967 				scalar32_min_max_umod(dst_reg, &src_reg);
14968 		else
14969 			if (off == 1)
14970 				scalar_min_max_smod(dst_reg, &src_reg);
14971 			else
14972 				scalar_min_max_umod(dst_reg, &src_reg);
14973 		break;
14974 	case BPF_AND:
14975 		if (tnum_is_const(src_reg.var_off)) {
14976 			ret = maybe_fork_scalars(env, insn, dst_reg);
14977 			if (ret)
14978 				return ret;
14979 		}
14980 		dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
14981 		scalar32_min_max_and(dst_reg, &src_reg);
14982 		scalar_min_max_and(dst_reg, &src_reg);
14983 		break;
14984 	case BPF_OR:
14985 		if (tnum_is_const(src_reg.var_off)) {
14986 			ret = maybe_fork_scalars(env, insn, dst_reg);
14987 			if (ret)
14988 				return ret;
14989 		}
14990 		dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
14991 		scalar32_min_max_or(dst_reg, &src_reg);
14992 		scalar_min_max_or(dst_reg, &src_reg);
14993 		break;
14994 	case BPF_XOR:
14995 		dst_reg->var_off = tnum_xor(dst_reg->var_off, src_reg.var_off);
14996 		scalar32_min_max_xor(dst_reg, &src_reg);
14997 		scalar_min_max_xor(dst_reg, &src_reg);
14998 		break;
14999 	case BPF_LSH:
15000 		if (alu32)
15001 			scalar32_min_max_lsh(dst_reg, &src_reg);
15002 		else
15003 			scalar_min_max_lsh(dst_reg, &src_reg);
15004 		break;
15005 	case BPF_RSH:
15006 		if (alu32)
15007 			scalar32_min_max_rsh(dst_reg, &src_reg);
15008 		else
15009 			scalar_min_max_rsh(dst_reg, &src_reg);
15010 		break;
15011 	case BPF_ARSH:
15012 		if (alu32)
15013 			scalar32_min_max_arsh(dst_reg, &src_reg);
15014 		else
15015 			scalar_min_max_arsh(dst_reg, &src_reg);
15016 		break;
15017 	case BPF_END:
15018 		scalar_byte_swap(dst_reg, insn);
15019 		break;
15020 	default:
15021 		break;
15022 	}
15023 
15024 	/*
15025 	 * ALU32 ops are zero extended into 64bit register.
15026 	 *
15027 	 * BPF_END is already handled inside the helper (truncation),
15028 	 * so skip zext here to avoid unexpected zero extension.
15029 	 * e.g., le64: opcode=(BPF_END|BPF_ALU|BPF_TO_LE), imm=0x40
15030 	 * This is a 64bit byte swap operation with alu32==true,
15031 	 * but we should not zero extend the result.
15032 	 */
15033 	if (alu32 && opcode != BPF_END)
15034 		zext_32_to_64(dst_reg);
15035 	reg_bounds_sync(dst_reg);
15036 	return 0;
15037 }
15038 
15039 /* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
15040  * and var_off.
15041  */
adjust_reg_min_max_vals(struct bpf_verifier_env * env,struct bpf_insn * insn)15042 static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
15043 				   struct bpf_insn *insn)
15044 {
15045 	struct bpf_verifier_state *vstate = env->cur_state;
15046 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
15047 	struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
15048 	struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
15049 	bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
15050 	u8 opcode = BPF_OP(insn->code);
15051 	int err;
15052 
15053 	dst_reg = &regs[insn->dst_reg];
15054 	if (BPF_SRC(insn->code) == BPF_X)
15055 		src_reg = &regs[insn->src_reg];
15056 	else
15057 		src_reg = NULL;
15058 
15059 	/* Case where at least one operand is an arena. */
15060 	if (dst_reg->type == PTR_TO_ARENA || (src_reg && src_reg->type == PTR_TO_ARENA)) {
15061 		struct bpf_insn_aux_data *aux = cur_aux(env);
15062 
15063 		if (dst_reg->type != PTR_TO_ARENA)
15064 			*dst_reg = *src_reg;
15065 
15066 		dst_reg->subreg_def = env->insn_idx + 1;
15067 
15068 		if (BPF_CLASS(insn->code) == BPF_ALU64)
15069 			/*
15070 			 * 32-bit operations zero upper bits automatically.
15071 			 * 64-bit operations need to be converted to 32.
15072 			 */
15073 			aux->needs_zext = true;
15074 
15075 		/* Any arithmetic operations are allowed on arena pointers */
15076 		return 0;
15077 	}
15078 
15079 	if (dst_reg->type != SCALAR_VALUE)
15080 		ptr_reg = dst_reg;
15081 
15082 	if (BPF_SRC(insn->code) == BPF_X) {
15083 		if (src_reg->type != SCALAR_VALUE) {
15084 			if (dst_reg->type != SCALAR_VALUE) {
15085 				/* Combining two pointers by any ALU op yields
15086 				 * an arbitrary scalar. Disallow all math except
15087 				 * pointer subtraction
15088 				 */
15089 				if (opcode == BPF_SUB && env->allow_ptr_leaks) {
15090 					mark_reg_unknown(env, regs, insn->dst_reg);
15091 					return 0;
15092 				}
15093 				verbose(env, "R%d pointer %s pointer prohibited\n",
15094 					insn->dst_reg,
15095 					bpf_alu_string[opcode >> 4]);
15096 				return -EACCES;
15097 			} else {
15098 				/* scalar += pointer
15099 				 * This is legal, but we have to reverse our
15100 				 * src/dest handling in computing the range
15101 				 */
15102 				err = mark_chain_precision(env, insn->dst_reg);
15103 				if (err)
15104 					return err;
15105 				return adjust_ptr_min_max_vals(env, insn,
15106 							       src_reg, dst_reg);
15107 			}
15108 		} else if (ptr_reg) {
15109 			/* pointer += scalar */
15110 			err = mark_chain_precision(env, insn->src_reg);
15111 			if (err)
15112 				return err;
15113 			return adjust_ptr_min_max_vals(env, insn,
15114 						       dst_reg, src_reg);
15115 		} else if (dst_reg->precise) {
15116 			/* if dst_reg is precise, src_reg should be precise as well */
15117 			err = mark_chain_precision(env, insn->src_reg);
15118 			if (err)
15119 				return err;
15120 		}
15121 	} else {
15122 		/* Pretend the src is a reg with a known value, since we only
15123 		 * need to be able to read from this state.
15124 		 */
15125 		off_reg.type = SCALAR_VALUE;
15126 		__mark_reg_known(&off_reg, insn->imm);
15127 		src_reg = &off_reg;
15128 		if (ptr_reg) /* pointer += K */
15129 			return adjust_ptr_min_max_vals(env, insn,
15130 						       ptr_reg, src_reg);
15131 	}
15132 
15133 	/* Got here implies adding two SCALAR_VALUEs */
15134 	if (WARN_ON_ONCE(ptr_reg)) {
15135 		print_verifier_state(env, vstate, vstate->curframe, true);
15136 		verbose(env, "verifier internal error: unexpected ptr_reg\n");
15137 		return -EFAULT;
15138 	}
15139 	if (WARN_ON(!src_reg)) {
15140 		print_verifier_state(env, vstate, vstate->curframe, true);
15141 		verbose(env, "verifier internal error: no src_reg\n");
15142 		return -EFAULT;
15143 	}
15144 	/*
15145 	 * For alu32 linked register tracking, we need to check dst_reg's
15146 	 * umax_value before the ALU operation. After adjust_scalar_min_max_vals(),
15147 	 * alu32 ops will have zero-extended the result, making umax_value <= U32_MAX.
15148 	 */
15149 	u64 dst_umax = dst_reg->umax_value;
15150 
15151 	err = adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
15152 	if (err)
15153 		return err;
15154 	/*
15155 	 * Compilers can generate the code
15156 	 * r1 = r2
15157 	 * r1 += 0x1
15158 	 * if r2 < 1000 goto ...
15159 	 * use r1 in memory access
15160 	 * So remember constant delta between r2 and r1 and update r1 after
15161 	 * 'if' condition.
15162 	 */
15163 	if (env->bpf_capable &&
15164 	    (BPF_OP(insn->code) == BPF_ADD || BPF_OP(insn->code) == BPF_SUB) &&
15165 	    dst_reg->id && is_reg_const(src_reg, alu32) &&
15166 	    !(BPF_SRC(insn->code) == BPF_X && insn->src_reg == insn->dst_reg)) {
15167 		u64 val = reg_const_value(src_reg, alu32);
15168 		s32 off;
15169 
15170 		if (!alu32 && ((s64)val < S32_MIN || (s64)val > S32_MAX))
15171 			goto clear_id;
15172 
15173 		if (alu32 && (dst_umax > U32_MAX))
15174 			goto clear_id;
15175 
15176 		off = (s32)val;
15177 
15178 		if (BPF_OP(insn->code) == BPF_SUB) {
15179 			/* Negating S32_MIN would overflow */
15180 			if (off == S32_MIN)
15181 				goto clear_id;
15182 			off = -off;
15183 		}
15184 
15185 		if (dst_reg->id & BPF_ADD_CONST) {
15186 			/*
15187 			 * If the register already went through rX += val
15188 			 * we cannot accumulate another val into rx->off.
15189 			 */
15190 clear_id:
15191 			clear_scalar_id(dst_reg);
15192 		} else {
15193 			if (alu32)
15194 				dst_reg->id |= BPF_ADD_CONST32;
15195 			else
15196 				dst_reg->id |= BPF_ADD_CONST64;
15197 			dst_reg->delta = off;
15198 		}
15199 	} else {
15200 		/*
15201 		 * Make sure ID is cleared otherwise dst_reg min/max could be
15202 		 * incorrectly propagated into other registers by sync_linked_regs()
15203 		 */
15204 		clear_scalar_id(dst_reg);
15205 	}
15206 	return 0;
15207 }
15208 
15209 /* check validity of 32-bit and 64-bit arithmetic operations */
check_alu_op(struct bpf_verifier_env * env,struct bpf_insn * insn)15210 static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
15211 {
15212 	struct bpf_reg_state *regs = cur_regs(env);
15213 	u8 opcode = BPF_OP(insn->code);
15214 	int err;
15215 
15216 	if (opcode == BPF_END || opcode == BPF_NEG) {
15217 		/* check src operand */
15218 		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
15219 		if (err)
15220 			return err;
15221 
15222 		if (is_pointer_value(env, insn->dst_reg)) {
15223 			verbose(env, "R%d pointer arithmetic prohibited\n",
15224 				insn->dst_reg);
15225 			return -EACCES;
15226 		}
15227 
15228 		/* check dest operand */
15229 		if (regs[insn->dst_reg].type == SCALAR_VALUE) {
15230 			err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
15231 			err = err ?: adjust_scalar_min_max_vals(env, insn,
15232 							 &regs[insn->dst_reg],
15233 							 regs[insn->dst_reg]);
15234 		} else {
15235 			err = check_reg_arg(env, insn->dst_reg, DST_OP);
15236 		}
15237 		if (err)
15238 			return err;
15239 
15240 	} else if (opcode == BPF_MOV) {
15241 
15242 		if (BPF_SRC(insn->code) == BPF_X) {
15243 			if (insn->off == BPF_ADDR_SPACE_CAST) {
15244 				if (!env->prog->aux->arena) {
15245 					verbose(env, "addr_space_cast insn can only be used in a program that has an associated arena\n");
15246 					return -EINVAL;
15247 				}
15248 			}
15249 
15250 			/* check src operand */
15251 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
15252 			if (err)
15253 				return err;
15254 		}
15255 
15256 		/* check dest operand, mark as required later */
15257 		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
15258 		if (err)
15259 			return err;
15260 
15261 		if (BPF_SRC(insn->code) == BPF_X) {
15262 			struct bpf_reg_state *src_reg = regs + insn->src_reg;
15263 			struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
15264 
15265 			if (BPF_CLASS(insn->code) == BPF_ALU64) {
15266 				if (insn->imm) {
15267 					/* off == BPF_ADDR_SPACE_CAST */
15268 					mark_reg_unknown(env, regs, insn->dst_reg);
15269 					if (insn->imm == 1) { /* cast from as(1) to as(0) */
15270 						dst_reg->type = PTR_TO_ARENA;
15271 						/* PTR_TO_ARENA is 32-bit */
15272 						dst_reg->subreg_def = env->insn_idx + 1;
15273 					}
15274 				} else if (insn->off == 0) {
15275 					/* case: R1 = R2
15276 					 * copy register state to dest reg
15277 					 */
15278 					assign_scalar_id_before_mov(env, src_reg);
15279 					copy_register_state(dst_reg, src_reg);
15280 					dst_reg->subreg_def = DEF_NOT_SUBREG;
15281 				} else {
15282 					/* case: R1 = (s8, s16 s32)R2 */
15283 					if (is_pointer_value(env, insn->src_reg)) {
15284 						verbose(env,
15285 							"R%d sign-extension part of pointer\n",
15286 							insn->src_reg);
15287 						return -EACCES;
15288 					} else if (src_reg->type == SCALAR_VALUE) {
15289 						bool no_sext;
15290 
15291 						no_sext = src_reg->umax_value < (1ULL << (insn->off - 1));
15292 						if (no_sext)
15293 							assign_scalar_id_before_mov(env, src_reg);
15294 						copy_register_state(dst_reg, src_reg);
15295 						if (!no_sext)
15296 							clear_scalar_id(dst_reg);
15297 						coerce_reg_to_size_sx(dst_reg, insn->off >> 3);
15298 						dst_reg->subreg_def = DEF_NOT_SUBREG;
15299 					} else {
15300 						mark_reg_unknown(env, regs, insn->dst_reg);
15301 					}
15302 				}
15303 			} else {
15304 				/* R1 = (u32) R2 */
15305 				if (is_pointer_value(env, insn->src_reg)) {
15306 					verbose(env,
15307 						"R%d partial copy of pointer\n",
15308 						insn->src_reg);
15309 					return -EACCES;
15310 				} else if (src_reg->type == SCALAR_VALUE) {
15311 					if (insn->off == 0) {
15312 						bool is_src_reg_u32 = get_reg_width(src_reg) <= 32;
15313 
15314 						if (is_src_reg_u32)
15315 							assign_scalar_id_before_mov(env, src_reg);
15316 						copy_register_state(dst_reg, src_reg);
15317 						/* Make sure ID is cleared if src_reg is not in u32
15318 						 * range otherwise dst_reg min/max could be incorrectly
15319 						 * propagated into src_reg by sync_linked_regs()
15320 						 */
15321 						if (!is_src_reg_u32)
15322 							clear_scalar_id(dst_reg);
15323 						dst_reg->subreg_def = env->insn_idx + 1;
15324 					} else {
15325 						/* case: W1 = (s8, s16)W2 */
15326 						bool no_sext = src_reg->umax_value < (1ULL << (insn->off - 1));
15327 
15328 						if (no_sext)
15329 							assign_scalar_id_before_mov(env, src_reg);
15330 						copy_register_state(dst_reg, src_reg);
15331 						if (!no_sext)
15332 							clear_scalar_id(dst_reg);
15333 						dst_reg->subreg_def = env->insn_idx + 1;
15334 						coerce_subreg_to_size_sx(dst_reg, insn->off >> 3);
15335 					}
15336 				} else {
15337 					mark_reg_unknown(env, regs,
15338 							 insn->dst_reg);
15339 				}
15340 				zext_32_to_64(dst_reg);
15341 				reg_bounds_sync(dst_reg);
15342 			}
15343 		} else {
15344 			/* case: R = imm
15345 			 * remember the value we stored into this reg
15346 			 */
15347 			/* clear any state __mark_reg_known doesn't set */
15348 			mark_reg_unknown(env, regs, insn->dst_reg);
15349 			regs[insn->dst_reg].type = SCALAR_VALUE;
15350 			if (BPF_CLASS(insn->code) == BPF_ALU64) {
15351 				__mark_reg_known(regs + insn->dst_reg,
15352 						 insn->imm);
15353 			} else {
15354 				__mark_reg_known(regs + insn->dst_reg,
15355 						 (u32)insn->imm);
15356 			}
15357 		}
15358 
15359 	} else {	/* all other ALU ops: and, sub, xor, add, ... */
15360 
15361 		if (BPF_SRC(insn->code) == BPF_X) {
15362 			/* check src1 operand */
15363 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
15364 			if (err)
15365 				return err;
15366 		}
15367 
15368 		/* check src2 operand */
15369 		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
15370 		if (err)
15371 			return err;
15372 
15373 		if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
15374 		    BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
15375 			verbose(env, "div by zero\n");
15376 			return -EINVAL;
15377 		}
15378 
15379 		if ((opcode == BPF_LSH || opcode == BPF_RSH ||
15380 		     opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
15381 			int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
15382 
15383 			if (insn->imm < 0 || insn->imm >= size) {
15384 				verbose(env, "invalid shift %d\n", insn->imm);
15385 				return -EINVAL;
15386 			}
15387 		}
15388 
15389 		/* check dest operand */
15390 		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
15391 		err = err ?: adjust_reg_min_max_vals(env, insn);
15392 		if (err)
15393 			return err;
15394 	}
15395 
15396 	return reg_bounds_sanity_check(env, &regs[insn->dst_reg], "alu");
15397 }
15398 
find_good_pkt_pointers(struct bpf_verifier_state * vstate,struct bpf_reg_state * dst_reg,enum bpf_reg_type type,bool range_right_open)15399 static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
15400 				   struct bpf_reg_state *dst_reg,
15401 				   enum bpf_reg_type type,
15402 				   bool range_right_open)
15403 {
15404 	struct bpf_func_state *state;
15405 	struct bpf_reg_state *reg;
15406 	int new_range;
15407 
15408 	if (dst_reg->umax_value == 0 && range_right_open)
15409 		/* This doesn't give us any range */
15410 		return;
15411 
15412 	if (dst_reg->umax_value > MAX_PACKET_OFF)
15413 		/* Risk of overflow.  For instance, ptr + (1<<63) may be less
15414 		 * than pkt_end, but that's because it's also less than pkt.
15415 		 */
15416 		return;
15417 
15418 	new_range = dst_reg->umax_value;
15419 	if (range_right_open)
15420 		new_range++;
15421 
15422 	/* Examples for register markings:
15423 	 *
15424 	 * pkt_data in dst register:
15425 	 *
15426 	 *   r2 = r3;
15427 	 *   r2 += 8;
15428 	 *   if (r2 > pkt_end) goto <handle exception>
15429 	 *   <access okay>
15430 	 *
15431 	 *   r2 = r3;
15432 	 *   r2 += 8;
15433 	 *   if (r2 < pkt_end) goto <access okay>
15434 	 *   <handle exception>
15435 	 *
15436 	 *   Where:
15437 	 *     r2 == dst_reg, pkt_end == src_reg
15438 	 *     r2=pkt(id=n,off=8,r=0)
15439 	 *     r3=pkt(id=n,off=0,r=0)
15440 	 *
15441 	 * pkt_data in src register:
15442 	 *
15443 	 *   r2 = r3;
15444 	 *   r2 += 8;
15445 	 *   if (pkt_end >= r2) goto <access okay>
15446 	 *   <handle exception>
15447 	 *
15448 	 *   r2 = r3;
15449 	 *   r2 += 8;
15450 	 *   if (pkt_end <= r2) goto <handle exception>
15451 	 *   <access okay>
15452 	 *
15453 	 *   Where:
15454 	 *     pkt_end == dst_reg, r2 == src_reg
15455 	 *     r2=pkt(id=n,off=8,r=0)
15456 	 *     r3=pkt(id=n,off=0,r=0)
15457 	 *
15458 	 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
15459 	 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
15460 	 * and [r3, r3 + 8-1) respectively is safe to access depending on
15461 	 * the check.
15462 	 */
15463 
15464 	/* If our ids match, then we must have the same max_value.  And we
15465 	 * don't care about the other reg's fixed offset, since if it's too big
15466 	 * the range won't allow anything.
15467 	 * dst_reg->umax_value is known < MAX_PACKET_OFF, therefore it fits in a u16.
15468 	 */
15469 	bpf_for_each_reg_in_vstate(vstate, state, reg, ({
15470 		if (reg->type == type && reg->id == dst_reg->id)
15471 			/* keep the maximum range already checked */
15472 			reg->range = max(reg->range, new_range);
15473 	}));
15474 }
15475 
15476 static void regs_refine_cond_op(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2,
15477 				u8 opcode, bool is_jmp32);
15478 static u8 rev_opcode(u8 opcode);
15479 
15480 /*
15481  * Learn more information about live branches by simulating refinement on both branches.
15482  * regs_refine_cond_op() is sound, so producing ill-formed register bounds for the branch means
15483  * that branch is dead.
15484  */
simulate_both_branches_taken(struct bpf_verifier_env * env,u8 opcode,bool is_jmp32)15485 static int simulate_both_branches_taken(struct bpf_verifier_env *env, u8 opcode, bool is_jmp32)
15486 {
15487 	/* Fallthrough (FALSE) branch */
15488 	regs_refine_cond_op(&env->false_reg1, &env->false_reg2, rev_opcode(opcode), is_jmp32);
15489 	reg_bounds_sync(&env->false_reg1);
15490 	reg_bounds_sync(&env->false_reg2);
15491 	/*
15492 	 * If there is a range bounds violation in *any* of the abstract values in either
15493 	 * reg_states in the FALSE branch (i.e. reg1, reg2), the FALSE branch must be dead. Only
15494 	 * TRUE branch will be taken.
15495 	 */
15496 	if (range_bounds_violation(&env->false_reg1) || range_bounds_violation(&env->false_reg2))
15497 		return 1;
15498 
15499 	/* Jump (TRUE) branch */
15500 	regs_refine_cond_op(&env->true_reg1, &env->true_reg2, opcode, is_jmp32);
15501 	reg_bounds_sync(&env->true_reg1);
15502 	reg_bounds_sync(&env->true_reg2);
15503 	/*
15504 	 * If there is a range bounds violation in *any* of the abstract values in either
15505 	 * reg_states in the TRUE branch (i.e. true_reg1, true_reg2), the TRUE branch must be dead.
15506 	 * Only FALSE branch will be taken.
15507 	 */
15508 	if (range_bounds_violation(&env->true_reg1) || range_bounds_violation(&env->true_reg2))
15509 		return 0;
15510 
15511 	/* Both branches are possible, we can't determine which one will be taken. */
15512 	return -1;
15513 }
15514 
15515 /*
15516  * <reg1> <op> <reg2>, currently assuming reg2 is a constant
15517  */
is_scalar_branch_taken(struct bpf_verifier_env * env,struct bpf_reg_state * reg1,struct bpf_reg_state * reg2,u8 opcode,bool is_jmp32)15518 static int is_scalar_branch_taken(struct bpf_verifier_env *env, struct bpf_reg_state *reg1,
15519 				  struct bpf_reg_state *reg2, u8 opcode, bool is_jmp32)
15520 {
15521 	struct tnum t1 = is_jmp32 ? tnum_subreg(reg1->var_off) : reg1->var_off;
15522 	struct tnum t2 = is_jmp32 ? tnum_subreg(reg2->var_off) : reg2->var_off;
15523 	u64 umin1 = is_jmp32 ? (u64)reg1->u32_min_value : reg1->umin_value;
15524 	u64 umax1 = is_jmp32 ? (u64)reg1->u32_max_value : reg1->umax_value;
15525 	s64 smin1 = is_jmp32 ? (s64)reg1->s32_min_value : reg1->smin_value;
15526 	s64 smax1 = is_jmp32 ? (s64)reg1->s32_max_value : reg1->smax_value;
15527 	u64 umin2 = is_jmp32 ? (u64)reg2->u32_min_value : reg2->umin_value;
15528 	u64 umax2 = is_jmp32 ? (u64)reg2->u32_max_value : reg2->umax_value;
15529 	s64 smin2 = is_jmp32 ? (s64)reg2->s32_min_value : reg2->smin_value;
15530 	s64 smax2 = is_jmp32 ? (s64)reg2->s32_max_value : reg2->smax_value;
15531 
15532 	if (reg1 == reg2) {
15533 		switch (opcode) {
15534 		case BPF_JGE:
15535 		case BPF_JLE:
15536 		case BPF_JSGE:
15537 		case BPF_JSLE:
15538 		case BPF_JEQ:
15539 			return 1;
15540 		case BPF_JGT:
15541 		case BPF_JLT:
15542 		case BPF_JSGT:
15543 		case BPF_JSLT:
15544 		case BPF_JNE:
15545 			return 0;
15546 		case BPF_JSET:
15547 			if (tnum_is_const(t1))
15548 				return t1.value != 0;
15549 			else
15550 				return (smin1 <= 0 && smax1 >= 0) ? -1 : 1;
15551 		default:
15552 			return -1;
15553 		}
15554 	}
15555 
15556 	switch (opcode) {
15557 	case BPF_JEQ:
15558 		/* constants, umin/umax and smin/smax checks would be
15559 		 * redundant in this case because they all should match
15560 		 */
15561 		if (tnum_is_const(t1) && tnum_is_const(t2))
15562 			return t1.value == t2.value;
15563 		if (!tnum_overlap(t1, t2))
15564 			return 0;
15565 		/* non-overlapping ranges */
15566 		if (umin1 > umax2 || umax1 < umin2)
15567 			return 0;
15568 		if (smin1 > smax2 || smax1 < smin2)
15569 			return 0;
15570 		if (!is_jmp32) {
15571 			/* if 64-bit ranges are inconclusive, see if we can
15572 			 * utilize 32-bit subrange knowledge to eliminate
15573 			 * branches that can't be taken a priori
15574 			 */
15575 			if (reg1->u32_min_value > reg2->u32_max_value ||
15576 			    reg1->u32_max_value < reg2->u32_min_value)
15577 				return 0;
15578 			if (reg1->s32_min_value > reg2->s32_max_value ||
15579 			    reg1->s32_max_value < reg2->s32_min_value)
15580 				return 0;
15581 		}
15582 		break;
15583 	case BPF_JNE:
15584 		/* constants, umin/umax and smin/smax checks would be
15585 		 * redundant in this case because they all should match
15586 		 */
15587 		if (tnum_is_const(t1) && tnum_is_const(t2))
15588 			return t1.value != t2.value;
15589 		if (!tnum_overlap(t1, t2))
15590 			return 1;
15591 		/* non-overlapping ranges */
15592 		if (umin1 > umax2 || umax1 < umin2)
15593 			return 1;
15594 		if (smin1 > smax2 || smax1 < smin2)
15595 			return 1;
15596 		if (!is_jmp32) {
15597 			/* if 64-bit ranges are inconclusive, see if we can
15598 			 * utilize 32-bit subrange knowledge to eliminate
15599 			 * branches that can't be taken a priori
15600 			 */
15601 			if (reg1->u32_min_value > reg2->u32_max_value ||
15602 			    reg1->u32_max_value < reg2->u32_min_value)
15603 				return 1;
15604 			if (reg1->s32_min_value > reg2->s32_max_value ||
15605 			    reg1->s32_max_value < reg2->s32_min_value)
15606 				return 1;
15607 		}
15608 		break;
15609 	case BPF_JSET:
15610 		if (!is_reg_const(reg2, is_jmp32)) {
15611 			swap(reg1, reg2);
15612 			swap(t1, t2);
15613 		}
15614 		if (!is_reg_const(reg2, is_jmp32))
15615 			return -1;
15616 		if ((~t1.mask & t1.value) & t2.value)
15617 			return 1;
15618 		if (!((t1.mask | t1.value) & t2.value))
15619 			return 0;
15620 		break;
15621 	case BPF_JGT:
15622 		if (umin1 > umax2)
15623 			return 1;
15624 		else if (umax1 <= umin2)
15625 			return 0;
15626 		break;
15627 	case BPF_JSGT:
15628 		if (smin1 > smax2)
15629 			return 1;
15630 		else if (smax1 <= smin2)
15631 			return 0;
15632 		break;
15633 	case BPF_JLT:
15634 		if (umax1 < umin2)
15635 			return 1;
15636 		else if (umin1 >= umax2)
15637 			return 0;
15638 		break;
15639 	case BPF_JSLT:
15640 		if (smax1 < smin2)
15641 			return 1;
15642 		else if (smin1 >= smax2)
15643 			return 0;
15644 		break;
15645 	case BPF_JGE:
15646 		if (umin1 >= umax2)
15647 			return 1;
15648 		else if (umax1 < umin2)
15649 			return 0;
15650 		break;
15651 	case BPF_JSGE:
15652 		if (smin1 >= smax2)
15653 			return 1;
15654 		else if (smax1 < smin2)
15655 			return 0;
15656 		break;
15657 	case BPF_JLE:
15658 		if (umax1 <= umin2)
15659 			return 1;
15660 		else if (umin1 > umax2)
15661 			return 0;
15662 		break;
15663 	case BPF_JSLE:
15664 		if (smax1 <= smin2)
15665 			return 1;
15666 		else if (smin1 > smax2)
15667 			return 0;
15668 		break;
15669 	}
15670 
15671 	return simulate_both_branches_taken(env, opcode, is_jmp32);
15672 }
15673 
flip_opcode(u32 opcode)15674 static int flip_opcode(u32 opcode)
15675 {
15676 	/* How can we transform "a <op> b" into "b <op> a"? */
15677 	static const u8 opcode_flip[16] = {
15678 		/* these stay the same */
15679 		[BPF_JEQ  >> 4] = BPF_JEQ,
15680 		[BPF_JNE  >> 4] = BPF_JNE,
15681 		[BPF_JSET >> 4] = BPF_JSET,
15682 		/* these swap "lesser" and "greater" (L and G in the opcodes) */
15683 		[BPF_JGE  >> 4] = BPF_JLE,
15684 		[BPF_JGT  >> 4] = BPF_JLT,
15685 		[BPF_JLE  >> 4] = BPF_JGE,
15686 		[BPF_JLT  >> 4] = BPF_JGT,
15687 		[BPF_JSGE >> 4] = BPF_JSLE,
15688 		[BPF_JSGT >> 4] = BPF_JSLT,
15689 		[BPF_JSLE >> 4] = BPF_JSGE,
15690 		[BPF_JSLT >> 4] = BPF_JSGT
15691 	};
15692 	return opcode_flip[opcode >> 4];
15693 }
15694 
is_pkt_ptr_branch_taken(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg,u8 opcode)15695 static int is_pkt_ptr_branch_taken(struct bpf_reg_state *dst_reg,
15696 				   struct bpf_reg_state *src_reg,
15697 				   u8 opcode)
15698 {
15699 	struct bpf_reg_state *pkt;
15700 
15701 	if (src_reg->type == PTR_TO_PACKET_END) {
15702 		pkt = dst_reg;
15703 	} else if (dst_reg->type == PTR_TO_PACKET_END) {
15704 		pkt = src_reg;
15705 		opcode = flip_opcode(opcode);
15706 	} else {
15707 		return -1;
15708 	}
15709 
15710 	if (pkt->range >= 0)
15711 		return -1;
15712 
15713 	switch (opcode) {
15714 	case BPF_JLE:
15715 		/* pkt <= pkt_end */
15716 		fallthrough;
15717 	case BPF_JGT:
15718 		/* pkt > pkt_end */
15719 		if (pkt->range == BEYOND_PKT_END)
15720 			/* pkt has at last one extra byte beyond pkt_end */
15721 			return opcode == BPF_JGT;
15722 		break;
15723 	case BPF_JLT:
15724 		/* pkt < pkt_end */
15725 		fallthrough;
15726 	case BPF_JGE:
15727 		/* pkt >= pkt_end */
15728 		if (pkt->range == BEYOND_PKT_END || pkt->range == AT_PKT_END)
15729 			return opcode == BPF_JGE;
15730 		break;
15731 	}
15732 	return -1;
15733 }
15734 
15735 /* compute branch direction of the expression "if (<reg1> opcode <reg2>) goto target;"
15736  * and return:
15737  *  1 - branch will be taken and "goto target" will be executed
15738  *  0 - branch will not be taken and fall-through to next insn
15739  * -1 - unknown. Example: "if (reg1 < 5)" is unknown when register value
15740  *      range [0,10]
15741  */
is_branch_taken(struct bpf_verifier_env * env,struct bpf_reg_state * reg1,struct bpf_reg_state * reg2,u8 opcode,bool is_jmp32)15742 static int is_branch_taken(struct bpf_verifier_env *env, struct bpf_reg_state *reg1,
15743 			   struct bpf_reg_state *reg2, u8 opcode, bool is_jmp32)
15744 {
15745 	if (reg_is_pkt_pointer_any(reg1) && reg_is_pkt_pointer_any(reg2) && !is_jmp32)
15746 		return is_pkt_ptr_branch_taken(reg1, reg2, opcode);
15747 
15748 	if (__is_pointer_value(false, reg1) || __is_pointer_value(false, reg2)) {
15749 		u64 val;
15750 
15751 		/* arrange that reg2 is a scalar, and reg1 is a pointer */
15752 		if (!is_reg_const(reg2, is_jmp32)) {
15753 			opcode = flip_opcode(opcode);
15754 			swap(reg1, reg2);
15755 		}
15756 		/* and ensure that reg2 is a constant */
15757 		if (!is_reg_const(reg2, is_jmp32))
15758 			return -1;
15759 
15760 		if (!reg_not_null(reg1))
15761 			return -1;
15762 
15763 		/* If pointer is valid tests against zero will fail so we can
15764 		 * use this to direct branch taken.
15765 		 */
15766 		val = reg_const_value(reg2, is_jmp32);
15767 		if (val != 0)
15768 			return -1;
15769 
15770 		switch (opcode) {
15771 		case BPF_JEQ:
15772 			return 0;
15773 		case BPF_JNE:
15774 			return 1;
15775 		default:
15776 			return -1;
15777 		}
15778 	}
15779 
15780 	/* now deal with two scalars, but not necessarily constants */
15781 	return is_scalar_branch_taken(env, reg1, reg2, opcode, is_jmp32);
15782 }
15783 
15784 /* Opcode that corresponds to a *false* branch condition.
15785  * E.g., if r1 < r2, then reverse (false) condition is r1 >= r2
15786  */
rev_opcode(u8 opcode)15787 static u8 rev_opcode(u8 opcode)
15788 {
15789 	switch (opcode) {
15790 	case BPF_JEQ:		return BPF_JNE;
15791 	case BPF_JNE:		return BPF_JEQ;
15792 	/* JSET doesn't have it's reverse opcode in BPF, so add
15793 	 * BPF_X flag to denote the reverse of that operation
15794 	 */
15795 	case BPF_JSET:		return BPF_JSET | BPF_X;
15796 	case BPF_JSET | BPF_X:	return BPF_JSET;
15797 	case BPF_JGE:		return BPF_JLT;
15798 	case BPF_JGT:		return BPF_JLE;
15799 	case BPF_JLE:		return BPF_JGT;
15800 	case BPF_JLT:		return BPF_JGE;
15801 	case BPF_JSGE:		return BPF_JSLT;
15802 	case BPF_JSGT:		return BPF_JSLE;
15803 	case BPF_JSLE:		return BPF_JSGT;
15804 	case BPF_JSLT:		return BPF_JSGE;
15805 	default:		return 0;
15806 	}
15807 }
15808 
15809 /* Refine range knowledge for <reg1> <op> <reg>2 conditional operation. */
regs_refine_cond_op(struct bpf_reg_state * reg1,struct bpf_reg_state * reg2,u8 opcode,bool is_jmp32)15810 static void regs_refine_cond_op(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2,
15811 				u8 opcode, bool is_jmp32)
15812 {
15813 	struct tnum t;
15814 	u64 val;
15815 
15816 	/* In case of GE/GT/SGE/JST, reuse LE/LT/SLE/SLT logic from below */
15817 	switch (opcode) {
15818 	case BPF_JGE:
15819 	case BPF_JGT:
15820 	case BPF_JSGE:
15821 	case BPF_JSGT:
15822 		opcode = flip_opcode(opcode);
15823 		swap(reg1, reg2);
15824 		break;
15825 	default:
15826 		break;
15827 	}
15828 
15829 	switch (opcode) {
15830 	case BPF_JEQ:
15831 		if (is_jmp32) {
15832 			reg1->u32_min_value = max(reg1->u32_min_value, reg2->u32_min_value);
15833 			reg1->u32_max_value = min(reg1->u32_max_value, reg2->u32_max_value);
15834 			reg1->s32_min_value = max(reg1->s32_min_value, reg2->s32_min_value);
15835 			reg1->s32_max_value = min(reg1->s32_max_value, reg2->s32_max_value);
15836 			reg2->u32_min_value = reg1->u32_min_value;
15837 			reg2->u32_max_value = reg1->u32_max_value;
15838 			reg2->s32_min_value = reg1->s32_min_value;
15839 			reg2->s32_max_value = reg1->s32_max_value;
15840 
15841 			t = tnum_intersect(tnum_subreg(reg1->var_off), tnum_subreg(reg2->var_off));
15842 			reg1->var_off = tnum_with_subreg(reg1->var_off, t);
15843 			reg2->var_off = tnum_with_subreg(reg2->var_off, t);
15844 		} else {
15845 			reg1->umin_value = max(reg1->umin_value, reg2->umin_value);
15846 			reg1->umax_value = min(reg1->umax_value, reg2->umax_value);
15847 			reg1->smin_value = max(reg1->smin_value, reg2->smin_value);
15848 			reg1->smax_value = min(reg1->smax_value, reg2->smax_value);
15849 			reg2->umin_value = reg1->umin_value;
15850 			reg2->umax_value = reg1->umax_value;
15851 			reg2->smin_value = reg1->smin_value;
15852 			reg2->smax_value = reg1->smax_value;
15853 
15854 			reg1->var_off = tnum_intersect(reg1->var_off, reg2->var_off);
15855 			reg2->var_off = reg1->var_off;
15856 		}
15857 		break;
15858 	case BPF_JNE:
15859 		if (!is_reg_const(reg2, is_jmp32))
15860 			swap(reg1, reg2);
15861 		if (!is_reg_const(reg2, is_jmp32))
15862 			break;
15863 
15864 		/* try to recompute the bound of reg1 if reg2 is a const and
15865 		 * is exactly the edge of reg1.
15866 		 */
15867 		val = reg_const_value(reg2, is_jmp32);
15868 		if (is_jmp32) {
15869 			/* u32_min_value is not equal to 0xffffffff at this point,
15870 			 * because otherwise u32_max_value is 0xffffffff as well,
15871 			 * in such a case both reg1 and reg2 would be constants,
15872 			 * jump would be predicted and regs_refine_cond_op()
15873 			 * wouldn't be called.
15874 			 *
15875 			 * Same reasoning works for all {u,s}{min,max}{32,64} cases
15876 			 * below.
15877 			 */
15878 			if (reg1->u32_min_value == (u32)val)
15879 				reg1->u32_min_value++;
15880 			if (reg1->u32_max_value == (u32)val)
15881 				reg1->u32_max_value--;
15882 			if (reg1->s32_min_value == (s32)val)
15883 				reg1->s32_min_value++;
15884 			if (reg1->s32_max_value == (s32)val)
15885 				reg1->s32_max_value--;
15886 		} else {
15887 			if (reg1->umin_value == (u64)val)
15888 				reg1->umin_value++;
15889 			if (reg1->umax_value == (u64)val)
15890 				reg1->umax_value--;
15891 			if (reg1->smin_value == (s64)val)
15892 				reg1->smin_value++;
15893 			if (reg1->smax_value == (s64)val)
15894 				reg1->smax_value--;
15895 		}
15896 		break;
15897 	case BPF_JSET:
15898 		if (!is_reg_const(reg2, is_jmp32))
15899 			swap(reg1, reg2);
15900 		if (!is_reg_const(reg2, is_jmp32))
15901 			break;
15902 		val = reg_const_value(reg2, is_jmp32);
15903 		/* BPF_JSET (i.e., TRUE branch, *not* BPF_JSET | BPF_X)
15904 		 * requires single bit to learn something useful. E.g., if we
15905 		 * know that `r1 & 0x3` is true, then which bits (0, 1, or both)
15906 		 * are actually set? We can learn something definite only if
15907 		 * it's a single-bit value to begin with.
15908 		 *
15909 		 * BPF_JSET | BPF_X (i.e., negation of BPF_JSET) doesn't have
15910 		 * this restriction. I.e., !(r1 & 0x3) means neither bit 0 nor
15911 		 * bit 1 is set, which we can readily use in adjustments.
15912 		 */
15913 		if (!is_power_of_2(val))
15914 			break;
15915 		if (is_jmp32) {
15916 			t = tnum_or(tnum_subreg(reg1->var_off), tnum_const(val));
15917 			reg1->var_off = tnum_with_subreg(reg1->var_off, t);
15918 		} else {
15919 			reg1->var_off = tnum_or(reg1->var_off, tnum_const(val));
15920 		}
15921 		break;
15922 	case BPF_JSET | BPF_X: /* reverse of BPF_JSET, see rev_opcode() */
15923 		if (!is_reg_const(reg2, is_jmp32))
15924 			swap(reg1, reg2);
15925 		if (!is_reg_const(reg2, is_jmp32))
15926 			break;
15927 		val = reg_const_value(reg2, is_jmp32);
15928 		/* Forget the ranges before narrowing tnums, to avoid invariant
15929 		 * violations if we're on a dead branch.
15930 		 */
15931 		__mark_reg_unbounded(reg1);
15932 		if (is_jmp32) {
15933 			t = tnum_and(tnum_subreg(reg1->var_off), tnum_const(~val));
15934 			reg1->var_off = tnum_with_subreg(reg1->var_off, t);
15935 		} else {
15936 			reg1->var_off = tnum_and(reg1->var_off, tnum_const(~val));
15937 		}
15938 		break;
15939 	case BPF_JLE:
15940 		if (is_jmp32) {
15941 			reg1->u32_max_value = min(reg1->u32_max_value, reg2->u32_max_value);
15942 			reg2->u32_min_value = max(reg1->u32_min_value, reg2->u32_min_value);
15943 		} else {
15944 			reg1->umax_value = min(reg1->umax_value, reg2->umax_value);
15945 			reg2->umin_value = max(reg1->umin_value, reg2->umin_value);
15946 		}
15947 		break;
15948 	case BPF_JLT:
15949 		if (is_jmp32) {
15950 			reg1->u32_max_value = min(reg1->u32_max_value, reg2->u32_max_value - 1);
15951 			reg2->u32_min_value = max(reg1->u32_min_value + 1, reg2->u32_min_value);
15952 		} else {
15953 			reg1->umax_value = min(reg1->umax_value, reg2->umax_value - 1);
15954 			reg2->umin_value = max(reg1->umin_value + 1, reg2->umin_value);
15955 		}
15956 		break;
15957 	case BPF_JSLE:
15958 		if (is_jmp32) {
15959 			reg1->s32_max_value = min(reg1->s32_max_value, reg2->s32_max_value);
15960 			reg2->s32_min_value = max(reg1->s32_min_value, reg2->s32_min_value);
15961 		} else {
15962 			reg1->smax_value = min(reg1->smax_value, reg2->smax_value);
15963 			reg2->smin_value = max(reg1->smin_value, reg2->smin_value);
15964 		}
15965 		break;
15966 	case BPF_JSLT:
15967 		if (is_jmp32) {
15968 			reg1->s32_max_value = min(reg1->s32_max_value, reg2->s32_max_value - 1);
15969 			reg2->s32_min_value = max(reg1->s32_min_value + 1, reg2->s32_min_value);
15970 		} else {
15971 			reg1->smax_value = min(reg1->smax_value, reg2->smax_value - 1);
15972 			reg2->smin_value = max(reg1->smin_value + 1, reg2->smin_value);
15973 		}
15974 		break;
15975 	default:
15976 		return;
15977 	}
15978 }
15979 
15980 /* Check for invariant violations on the registers for both branches of a condition */
regs_bounds_sanity_check_branches(struct bpf_verifier_env * env)15981 static int regs_bounds_sanity_check_branches(struct bpf_verifier_env *env)
15982 {
15983 	int err;
15984 
15985 	err = reg_bounds_sanity_check(env, &env->true_reg1, "true_reg1");
15986 	err = err ?: reg_bounds_sanity_check(env, &env->true_reg2, "true_reg2");
15987 	err = err ?: reg_bounds_sanity_check(env, &env->false_reg1, "false_reg1");
15988 	err = err ?: reg_bounds_sanity_check(env, &env->false_reg2, "false_reg2");
15989 	return err;
15990 }
15991 
mark_ptr_or_null_reg(struct bpf_func_state * state,struct bpf_reg_state * reg,u32 id,bool is_null)15992 static void mark_ptr_or_null_reg(struct bpf_func_state *state,
15993 				 struct bpf_reg_state *reg, u32 id,
15994 				 bool is_null)
15995 {
15996 	if (type_may_be_null(reg->type) && reg->id == id &&
15997 	    (is_rcu_reg(reg) || !WARN_ON_ONCE(!reg->id))) {
15998 		/* Old offset should have been known-zero, because we don't
15999 		 * allow pointer arithmetic on pointers that might be NULL.
16000 		 * If we see this happening, don't convert the register.
16001 		 *
16002 		 * But in some cases, some helpers that return local kptrs
16003 		 * advance offset for the returned pointer. In those cases,
16004 		 * it is fine to expect to see reg->var_off.
16005 		 */
16006 		if (!(type_is_ptr_alloc_obj(reg->type) || type_is_non_owning_ref(reg->type)) &&
16007 		    WARN_ON_ONCE(!tnum_equals_const(reg->var_off, 0)))
16008 			return;
16009 		if (is_null) {
16010 			/* We don't need id and ref_obj_id from this point
16011 			 * onwards anymore, thus we should better reset it,
16012 			 * so that state pruning has chances to take effect.
16013 			 */
16014 			__mark_reg_known_zero(reg);
16015 			reg->type = SCALAR_VALUE;
16016 
16017 			return;
16018 		}
16019 
16020 		mark_ptr_not_null_reg(reg);
16021 
16022 		if (!reg_may_point_to_spin_lock(reg)) {
16023 			/* For not-NULL ptr, reg->ref_obj_id will be reset
16024 			 * in release_reference().
16025 			 *
16026 			 * reg->id is still used by spin_lock ptr. Other
16027 			 * than spin_lock ptr type, reg->id can be reset.
16028 			 */
16029 			reg->id = 0;
16030 		}
16031 	}
16032 }
16033 
16034 /* The logic is similar to find_good_pkt_pointers(), both could eventually
16035  * be folded together at some point.
16036  */
mark_ptr_or_null_regs(struct bpf_verifier_state * vstate,u32 regno,bool is_null)16037 static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
16038 				  bool is_null)
16039 {
16040 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
16041 	struct bpf_reg_state *regs = state->regs, *reg;
16042 	u32 ref_obj_id = regs[regno].ref_obj_id;
16043 	u32 id = regs[regno].id;
16044 
16045 	if (ref_obj_id && ref_obj_id == id && is_null)
16046 		/* regs[regno] is in the " == NULL" branch.
16047 		 * No one could have freed the reference state before
16048 		 * doing the NULL check.
16049 		 */
16050 		WARN_ON_ONCE(release_reference_nomark(vstate, id));
16051 
16052 	bpf_for_each_reg_in_vstate(vstate, state, reg, ({
16053 		mark_ptr_or_null_reg(state, reg, id, is_null);
16054 	}));
16055 }
16056 
try_match_pkt_pointers(const struct bpf_insn * insn,struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg,struct bpf_verifier_state * this_branch,struct bpf_verifier_state * other_branch)16057 static bool try_match_pkt_pointers(const struct bpf_insn *insn,
16058 				   struct bpf_reg_state *dst_reg,
16059 				   struct bpf_reg_state *src_reg,
16060 				   struct bpf_verifier_state *this_branch,
16061 				   struct bpf_verifier_state *other_branch)
16062 {
16063 	if (BPF_SRC(insn->code) != BPF_X)
16064 		return false;
16065 
16066 	/* Pointers are always 64-bit. */
16067 	if (BPF_CLASS(insn->code) == BPF_JMP32)
16068 		return false;
16069 
16070 	switch (BPF_OP(insn->code)) {
16071 	case BPF_JGT:
16072 		if ((dst_reg->type == PTR_TO_PACKET &&
16073 		     src_reg->type == PTR_TO_PACKET_END) ||
16074 		    (dst_reg->type == PTR_TO_PACKET_META &&
16075 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
16076 			/* pkt_data' > pkt_end, pkt_meta' > pkt_data */
16077 			find_good_pkt_pointers(this_branch, dst_reg,
16078 					       dst_reg->type, false);
16079 			mark_pkt_end(other_branch, insn->dst_reg, true);
16080 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
16081 			    src_reg->type == PTR_TO_PACKET) ||
16082 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
16083 			    src_reg->type == PTR_TO_PACKET_META)) {
16084 			/* pkt_end > pkt_data', pkt_data > pkt_meta' */
16085 			find_good_pkt_pointers(other_branch, src_reg,
16086 					       src_reg->type, true);
16087 			mark_pkt_end(this_branch, insn->src_reg, false);
16088 		} else {
16089 			return false;
16090 		}
16091 		break;
16092 	case BPF_JLT:
16093 		if ((dst_reg->type == PTR_TO_PACKET &&
16094 		     src_reg->type == PTR_TO_PACKET_END) ||
16095 		    (dst_reg->type == PTR_TO_PACKET_META &&
16096 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
16097 			/* pkt_data' < pkt_end, pkt_meta' < pkt_data */
16098 			find_good_pkt_pointers(other_branch, dst_reg,
16099 					       dst_reg->type, true);
16100 			mark_pkt_end(this_branch, insn->dst_reg, false);
16101 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
16102 			    src_reg->type == PTR_TO_PACKET) ||
16103 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
16104 			    src_reg->type == PTR_TO_PACKET_META)) {
16105 			/* pkt_end < pkt_data', pkt_data > pkt_meta' */
16106 			find_good_pkt_pointers(this_branch, src_reg,
16107 					       src_reg->type, false);
16108 			mark_pkt_end(other_branch, insn->src_reg, true);
16109 		} else {
16110 			return false;
16111 		}
16112 		break;
16113 	case BPF_JGE:
16114 		if ((dst_reg->type == PTR_TO_PACKET &&
16115 		     src_reg->type == PTR_TO_PACKET_END) ||
16116 		    (dst_reg->type == PTR_TO_PACKET_META &&
16117 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
16118 			/* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
16119 			find_good_pkt_pointers(this_branch, dst_reg,
16120 					       dst_reg->type, true);
16121 			mark_pkt_end(other_branch, insn->dst_reg, false);
16122 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
16123 			    src_reg->type == PTR_TO_PACKET) ||
16124 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
16125 			    src_reg->type == PTR_TO_PACKET_META)) {
16126 			/* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
16127 			find_good_pkt_pointers(other_branch, src_reg,
16128 					       src_reg->type, false);
16129 			mark_pkt_end(this_branch, insn->src_reg, true);
16130 		} else {
16131 			return false;
16132 		}
16133 		break;
16134 	case BPF_JLE:
16135 		if ((dst_reg->type == PTR_TO_PACKET &&
16136 		     src_reg->type == PTR_TO_PACKET_END) ||
16137 		    (dst_reg->type == PTR_TO_PACKET_META &&
16138 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
16139 			/* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
16140 			find_good_pkt_pointers(other_branch, dst_reg,
16141 					       dst_reg->type, false);
16142 			mark_pkt_end(this_branch, insn->dst_reg, true);
16143 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
16144 			    src_reg->type == PTR_TO_PACKET) ||
16145 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
16146 			    src_reg->type == PTR_TO_PACKET_META)) {
16147 			/* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
16148 			find_good_pkt_pointers(this_branch, src_reg,
16149 					       src_reg->type, true);
16150 			mark_pkt_end(other_branch, insn->src_reg, false);
16151 		} else {
16152 			return false;
16153 		}
16154 		break;
16155 	default:
16156 		return false;
16157 	}
16158 
16159 	return true;
16160 }
16161 
__collect_linked_regs(struct linked_regs * reg_set,struct bpf_reg_state * reg,u32 id,u32 frameno,u32 spi_or_reg,bool is_reg)16162 static void __collect_linked_regs(struct linked_regs *reg_set, struct bpf_reg_state *reg,
16163 				  u32 id, u32 frameno, u32 spi_or_reg, bool is_reg)
16164 {
16165 	struct linked_reg *e;
16166 
16167 	if (reg->type != SCALAR_VALUE || (reg->id & ~BPF_ADD_CONST) != id)
16168 		return;
16169 
16170 	e = linked_regs_push(reg_set);
16171 	if (e) {
16172 		e->frameno = frameno;
16173 		e->is_reg = is_reg;
16174 		e->regno = spi_or_reg;
16175 	} else {
16176 		clear_scalar_id(reg);
16177 	}
16178 }
16179 
16180 /* For all R being scalar registers or spilled scalar registers
16181  * in verifier state, save R in linked_regs if R->id == id.
16182  * If there are too many Rs sharing same id, reset id for leftover Rs.
16183  */
collect_linked_regs(struct bpf_verifier_env * env,struct bpf_verifier_state * vstate,u32 id,struct linked_regs * linked_regs)16184 static void collect_linked_regs(struct bpf_verifier_env *env,
16185 				struct bpf_verifier_state *vstate,
16186 				u32 id,
16187 				struct linked_regs *linked_regs)
16188 {
16189 	struct bpf_insn_aux_data *aux = env->insn_aux_data;
16190 	struct bpf_func_state *func;
16191 	struct bpf_reg_state *reg;
16192 	u16 live_regs;
16193 	int i, j;
16194 
16195 	id = id & ~BPF_ADD_CONST;
16196 	for (i = vstate->curframe; i >= 0; i--) {
16197 		live_regs = aux[bpf_frame_insn_idx(vstate, i)].live_regs_before;
16198 		func = vstate->frame[i];
16199 		for (j = 0; j < BPF_REG_FP; j++) {
16200 			if (!(live_regs & BIT(j)))
16201 				continue;
16202 			reg = &func->regs[j];
16203 			__collect_linked_regs(linked_regs, reg, id, i, j, true);
16204 		}
16205 		for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
16206 			if (!bpf_is_spilled_reg(&func->stack[j]))
16207 				continue;
16208 			reg = &func->stack[j].spilled_ptr;
16209 			__collect_linked_regs(linked_regs, reg, id, i, j, false);
16210 		}
16211 	}
16212 }
16213 
16214 /* For all R in linked_regs, copy known_reg range into R
16215  * if R->id == known_reg->id.
16216  */
sync_linked_regs(struct bpf_verifier_env * env,struct bpf_verifier_state * vstate,struct bpf_reg_state * known_reg,struct linked_regs * linked_regs)16217 static void sync_linked_regs(struct bpf_verifier_env *env, struct bpf_verifier_state *vstate,
16218 			     struct bpf_reg_state *known_reg, struct linked_regs *linked_regs)
16219 {
16220 	struct bpf_reg_state fake_reg;
16221 	struct bpf_reg_state *reg;
16222 	struct linked_reg *e;
16223 	int i;
16224 
16225 	for (i = 0; i < linked_regs->cnt; ++i) {
16226 		e = &linked_regs->entries[i];
16227 		reg = e->is_reg ? &vstate->frame[e->frameno]->regs[e->regno]
16228 				: &vstate->frame[e->frameno]->stack[e->spi].spilled_ptr;
16229 		if (reg->type != SCALAR_VALUE || reg == known_reg)
16230 			continue;
16231 		if ((reg->id & ~BPF_ADD_CONST) != (known_reg->id & ~BPF_ADD_CONST))
16232 			continue;
16233 		/*
16234 		 * Skip mixed 32/64-bit links: the delta relationship doesn't
16235 		 * hold across different ALU widths.
16236 		 */
16237 		if (((reg->id ^ known_reg->id) & BPF_ADD_CONST) == BPF_ADD_CONST)
16238 			continue;
16239 		if ((!(reg->id & BPF_ADD_CONST) && !(known_reg->id & BPF_ADD_CONST)) ||
16240 		    reg->delta == known_reg->delta) {
16241 			s32 saved_subreg_def = reg->subreg_def;
16242 
16243 			copy_register_state(reg, known_reg);
16244 			reg->subreg_def = saved_subreg_def;
16245 		} else {
16246 			s32 saved_subreg_def = reg->subreg_def;
16247 			s32 saved_off = reg->delta;
16248 			u32 saved_id = reg->id;
16249 
16250 			fake_reg.type = SCALAR_VALUE;
16251 			__mark_reg_known(&fake_reg, (s64)reg->delta - (s64)known_reg->delta);
16252 
16253 			/* reg = known_reg; reg += delta */
16254 			copy_register_state(reg, known_reg);
16255 			/*
16256 			 * Must preserve off, id and subreg_def flag,
16257 			 * otherwise another sync_linked_regs() will be incorrect.
16258 			 */
16259 			reg->delta = saved_off;
16260 			reg->id = saved_id;
16261 			reg->subreg_def = saved_subreg_def;
16262 
16263 			scalar32_min_max_add(reg, &fake_reg);
16264 			scalar_min_max_add(reg, &fake_reg);
16265 			reg->var_off = tnum_add(reg->var_off, fake_reg.var_off);
16266 			if ((reg->id | known_reg->id) & BPF_ADD_CONST32)
16267 				zext_32_to_64(reg);
16268 			reg_bounds_sync(reg);
16269 		}
16270 		if (e->is_reg)
16271 			mark_reg_scratched(env, e->regno);
16272 		else
16273 			mark_stack_slot_scratched(env, e->spi);
16274 	}
16275 }
16276 
check_cond_jmp_op(struct bpf_verifier_env * env,struct bpf_insn * insn,int * insn_idx)16277 static int check_cond_jmp_op(struct bpf_verifier_env *env,
16278 			     struct bpf_insn *insn, int *insn_idx)
16279 {
16280 	struct bpf_verifier_state *this_branch = env->cur_state;
16281 	struct bpf_verifier_state *other_branch;
16282 	struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
16283 	struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
16284 	struct bpf_reg_state *eq_branch_regs;
16285 	struct linked_regs linked_regs = {};
16286 	u8 opcode = BPF_OP(insn->code);
16287 	int insn_flags = 0;
16288 	bool is_jmp32;
16289 	int pred = -1;
16290 	int err;
16291 
16292 	/* Only conditional jumps are expected to reach here. */
16293 	if (opcode == BPF_JA || opcode > BPF_JCOND) {
16294 		verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
16295 		return -EINVAL;
16296 	}
16297 
16298 	if (opcode == BPF_JCOND) {
16299 		struct bpf_verifier_state *cur_st = env->cur_state, *queued_st, *prev_st;
16300 		int idx = *insn_idx;
16301 
16302 		prev_st = find_prev_entry(env, cur_st->parent, idx);
16303 
16304 		/* branch out 'fallthrough' insn as a new state to explore */
16305 		queued_st = push_stack(env, idx + 1, idx, false);
16306 		if (IS_ERR(queued_st))
16307 			return PTR_ERR(queued_st);
16308 
16309 		queued_st->may_goto_depth++;
16310 		if (prev_st)
16311 			widen_imprecise_scalars(env, prev_st, queued_st);
16312 		*insn_idx += insn->off;
16313 		return 0;
16314 	}
16315 
16316 	/* check src2 operand */
16317 	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
16318 	if (err)
16319 		return err;
16320 
16321 	dst_reg = &regs[insn->dst_reg];
16322 	if (BPF_SRC(insn->code) == BPF_X) {
16323 		/* check src1 operand */
16324 		err = check_reg_arg(env, insn->src_reg, SRC_OP);
16325 		if (err)
16326 			return err;
16327 
16328 		src_reg = &regs[insn->src_reg];
16329 		if (!(reg_is_pkt_pointer_any(dst_reg) && reg_is_pkt_pointer_any(src_reg)) &&
16330 		    is_pointer_value(env, insn->src_reg)) {
16331 			verbose(env, "R%d pointer comparison prohibited\n",
16332 				insn->src_reg);
16333 			return -EACCES;
16334 		}
16335 
16336 		if (src_reg->type == PTR_TO_STACK)
16337 			insn_flags |= INSN_F_SRC_REG_STACK;
16338 		if (dst_reg->type == PTR_TO_STACK)
16339 			insn_flags |= INSN_F_DST_REG_STACK;
16340 	} else {
16341 		src_reg = &env->fake_reg[0];
16342 		memset(src_reg, 0, sizeof(*src_reg));
16343 		src_reg->type = SCALAR_VALUE;
16344 		__mark_reg_known(src_reg, insn->imm);
16345 
16346 		if (dst_reg->type == PTR_TO_STACK)
16347 			insn_flags |= INSN_F_DST_REG_STACK;
16348 	}
16349 
16350 	if (insn_flags) {
16351 		err = bpf_push_jmp_history(env, this_branch, insn_flags, 0);
16352 		if (err)
16353 			return err;
16354 	}
16355 
16356 	is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
16357 	copy_register_state(&env->false_reg1, dst_reg);
16358 	copy_register_state(&env->false_reg2, src_reg);
16359 	copy_register_state(&env->true_reg1, dst_reg);
16360 	copy_register_state(&env->true_reg2, src_reg);
16361 	pred = is_branch_taken(env, dst_reg, src_reg, opcode, is_jmp32);
16362 	if (pred >= 0) {
16363 		/* If we get here with a dst_reg pointer type it is because
16364 		 * above is_branch_taken() special cased the 0 comparison.
16365 		 */
16366 		if (!__is_pointer_value(false, dst_reg))
16367 			err = mark_chain_precision(env, insn->dst_reg);
16368 		if (BPF_SRC(insn->code) == BPF_X && !err &&
16369 		    !__is_pointer_value(false, src_reg))
16370 			err = mark_chain_precision(env, insn->src_reg);
16371 		if (err)
16372 			return err;
16373 	}
16374 
16375 	if (pred == 1) {
16376 		/* Only follow the goto, ignore fall-through. If needed, push
16377 		 * the fall-through branch for simulation under speculative
16378 		 * execution.
16379 		 */
16380 		if (!env->bypass_spec_v1) {
16381 			err = sanitize_speculative_path(env, insn, *insn_idx + 1, *insn_idx);
16382 			if (err < 0)
16383 				return err;
16384 		}
16385 		if (env->log.level & BPF_LOG_LEVEL)
16386 			print_insn_state(env, this_branch, this_branch->curframe);
16387 		*insn_idx += insn->off;
16388 		return 0;
16389 	} else if (pred == 0) {
16390 		/* Only follow the fall-through branch, since that's where the
16391 		 * program will go. If needed, push the goto branch for
16392 		 * simulation under speculative execution.
16393 		 */
16394 		if (!env->bypass_spec_v1) {
16395 			err = sanitize_speculative_path(env, insn, *insn_idx + insn->off + 1,
16396 							*insn_idx);
16397 			if (err < 0)
16398 				return err;
16399 		}
16400 		if (env->log.level & BPF_LOG_LEVEL)
16401 			print_insn_state(env, this_branch, this_branch->curframe);
16402 		return 0;
16403 	}
16404 
16405 	/* Push scalar registers sharing same ID to jump history,
16406 	 * do this before creating 'other_branch', so that both
16407 	 * 'this_branch' and 'other_branch' share this history
16408 	 * if parent state is created.
16409 	 */
16410 	if (BPF_SRC(insn->code) == BPF_X && src_reg->type == SCALAR_VALUE && src_reg->id)
16411 		collect_linked_regs(env, this_branch, src_reg->id, &linked_regs);
16412 	if (dst_reg->type == SCALAR_VALUE && dst_reg->id)
16413 		collect_linked_regs(env, this_branch, dst_reg->id, &linked_regs);
16414 	if (linked_regs.cnt > 1) {
16415 		err = bpf_push_jmp_history(env, this_branch, 0, linked_regs_pack(&linked_regs));
16416 		if (err)
16417 			return err;
16418 	}
16419 
16420 	other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx, false);
16421 	if (IS_ERR(other_branch))
16422 		return PTR_ERR(other_branch);
16423 	other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
16424 
16425 	err = regs_bounds_sanity_check_branches(env);
16426 	if (err)
16427 		return err;
16428 
16429 	copy_register_state(dst_reg, &env->false_reg1);
16430 	copy_register_state(src_reg, &env->false_reg2);
16431 	copy_register_state(&other_branch_regs[insn->dst_reg], &env->true_reg1);
16432 	if (BPF_SRC(insn->code) == BPF_X)
16433 		copy_register_state(&other_branch_regs[insn->src_reg], &env->true_reg2);
16434 
16435 	if (BPF_SRC(insn->code) == BPF_X &&
16436 	    src_reg->type == SCALAR_VALUE && src_reg->id &&
16437 	    !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) {
16438 		sync_linked_regs(env, this_branch, src_reg, &linked_regs);
16439 		sync_linked_regs(env, other_branch, &other_branch_regs[insn->src_reg],
16440 				 &linked_regs);
16441 	}
16442 	if (dst_reg->type == SCALAR_VALUE && dst_reg->id &&
16443 	    !WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) {
16444 		sync_linked_regs(env, this_branch, dst_reg, &linked_regs);
16445 		sync_linked_regs(env, other_branch, &other_branch_regs[insn->dst_reg],
16446 				 &linked_regs);
16447 	}
16448 
16449 	/* if one pointer register is compared to another pointer
16450 	 * register check if PTR_MAYBE_NULL could be lifted.
16451 	 * E.g. register A - maybe null
16452 	 *      register B - not null
16453 	 * for JNE A, B, ... - A is not null in the false branch;
16454 	 * for JEQ A, B, ... - A is not null in the true branch.
16455 	 *
16456 	 * Since PTR_TO_BTF_ID points to a kernel struct that does
16457 	 * not need to be null checked by the BPF program, i.e.,
16458 	 * could be null even without PTR_MAYBE_NULL marking, so
16459 	 * only propagate nullness when neither reg is that type.
16460 	 */
16461 	if (!is_jmp32 && BPF_SRC(insn->code) == BPF_X &&
16462 	    __is_pointer_value(false, src_reg) && __is_pointer_value(false, dst_reg) &&
16463 	    type_may_be_null(src_reg->type) != type_may_be_null(dst_reg->type) &&
16464 	    base_type(src_reg->type) != PTR_TO_BTF_ID &&
16465 	    base_type(dst_reg->type) != PTR_TO_BTF_ID) {
16466 		eq_branch_regs = NULL;
16467 		switch (opcode) {
16468 		case BPF_JEQ:
16469 			eq_branch_regs = other_branch_regs;
16470 			break;
16471 		case BPF_JNE:
16472 			eq_branch_regs = regs;
16473 			break;
16474 		default:
16475 			/* do nothing */
16476 			break;
16477 		}
16478 		if (eq_branch_regs) {
16479 			if (type_may_be_null(src_reg->type))
16480 				mark_ptr_not_null_reg(&eq_branch_regs[insn->src_reg]);
16481 			else
16482 				mark_ptr_not_null_reg(&eq_branch_regs[insn->dst_reg]);
16483 		}
16484 	}
16485 
16486 	/* detect if R == 0 where R is returned from bpf_map_lookup_elem().
16487 	 * Also does the same detection for a register whose the value is
16488 	 * known to be 0.
16489 	 * NOTE: these optimizations below are related with pointer comparison
16490 	 *       which will never be JMP32.
16491 	 */
16492 	if (!is_jmp32 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
16493 	    type_may_be_null(dst_reg->type) &&
16494 	    ((BPF_SRC(insn->code) == BPF_K && insn->imm == 0) ||
16495 	     (BPF_SRC(insn->code) == BPF_X && bpf_register_is_null(src_reg)))) {
16496 		/* Mark all identical registers in each branch as either
16497 		 * safe or unknown depending R == 0 or R != 0 conditional.
16498 		 */
16499 		mark_ptr_or_null_regs(this_branch, insn->dst_reg,
16500 				      opcode == BPF_JNE);
16501 		mark_ptr_or_null_regs(other_branch, insn->dst_reg,
16502 				      opcode == BPF_JEQ);
16503 	} else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
16504 					   this_branch, other_branch) &&
16505 		   is_pointer_value(env, insn->dst_reg)) {
16506 		verbose(env, "R%d pointer comparison prohibited\n",
16507 			insn->dst_reg);
16508 		return -EACCES;
16509 	}
16510 	if (env->log.level & BPF_LOG_LEVEL)
16511 		print_insn_state(env, this_branch, this_branch->curframe);
16512 	return 0;
16513 }
16514 
16515 /* verify BPF_LD_IMM64 instruction */
check_ld_imm(struct bpf_verifier_env * env,struct bpf_insn * insn)16516 static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
16517 {
16518 	struct bpf_insn_aux_data *aux = cur_aux(env);
16519 	struct bpf_reg_state *regs = cur_regs(env);
16520 	struct bpf_reg_state *dst_reg;
16521 	struct bpf_map *map;
16522 	int err;
16523 
16524 	if (BPF_SIZE(insn->code) != BPF_DW) {
16525 		verbose(env, "invalid BPF_LD_IMM insn\n");
16526 		return -EINVAL;
16527 	}
16528 
16529 	err = check_reg_arg(env, insn->dst_reg, DST_OP);
16530 	if (err)
16531 		return err;
16532 
16533 	dst_reg = &regs[insn->dst_reg];
16534 	if (insn->src_reg == 0) {
16535 		u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
16536 
16537 		dst_reg->type = SCALAR_VALUE;
16538 		__mark_reg_known(&regs[insn->dst_reg], imm);
16539 		return 0;
16540 	}
16541 
16542 	/* All special src_reg cases are listed below. From this point onwards
16543 	 * we either succeed and assign a corresponding dst_reg->type after
16544 	 * zeroing the offset, or fail and reject the program.
16545 	 */
16546 	mark_reg_known_zero(env, regs, insn->dst_reg);
16547 
16548 	if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
16549 		dst_reg->type = aux->btf_var.reg_type;
16550 		switch (base_type(dst_reg->type)) {
16551 		case PTR_TO_MEM:
16552 			dst_reg->mem_size = aux->btf_var.mem_size;
16553 			break;
16554 		case PTR_TO_BTF_ID:
16555 			dst_reg->btf = aux->btf_var.btf;
16556 			dst_reg->btf_id = aux->btf_var.btf_id;
16557 			break;
16558 		default:
16559 			verifier_bug(env, "pseudo btf id: unexpected dst reg type");
16560 			return -EFAULT;
16561 		}
16562 		return 0;
16563 	}
16564 
16565 	if (insn->src_reg == BPF_PSEUDO_FUNC) {
16566 		struct bpf_prog_aux *aux = env->prog->aux;
16567 		u32 subprogno = bpf_find_subprog(env,
16568 						 env->insn_idx + insn->imm + 1);
16569 
16570 		if (!aux->func_info) {
16571 			verbose(env, "missing btf func_info\n");
16572 			return -EINVAL;
16573 		}
16574 		if (aux->func_info_aux[subprogno].linkage != BTF_FUNC_STATIC) {
16575 			verbose(env, "callback function not static\n");
16576 			return -EINVAL;
16577 		}
16578 
16579 		dst_reg->type = PTR_TO_FUNC;
16580 		dst_reg->subprogno = subprogno;
16581 		return 0;
16582 	}
16583 
16584 	map = env->used_maps[aux->map_index];
16585 
16586 	if (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
16587 	    insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) {
16588 		if (map->map_type == BPF_MAP_TYPE_ARENA) {
16589 			__mark_reg_unknown(env, dst_reg);
16590 			dst_reg->map_ptr = map;
16591 			return 0;
16592 		}
16593 		__mark_reg_known(dst_reg, aux->map_off);
16594 		dst_reg->type = PTR_TO_MAP_VALUE;
16595 		dst_reg->map_ptr = map;
16596 		WARN_ON_ONCE(map->map_type != BPF_MAP_TYPE_INSN_ARRAY &&
16597 			     map->max_entries != 1);
16598 		/* We want reg->id to be same (0) as map_value is not distinct */
16599 	} else if (insn->src_reg == BPF_PSEUDO_MAP_FD ||
16600 		   insn->src_reg == BPF_PSEUDO_MAP_IDX) {
16601 		dst_reg->type = CONST_PTR_TO_MAP;
16602 		dst_reg->map_ptr = map;
16603 	} else {
16604 		verifier_bug(env, "unexpected src reg value for ldimm64");
16605 		return -EFAULT;
16606 	}
16607 
16608 	return 0;
16609 }
16610 
may_access_skb(enum bpf_prog_type type)16611 static bool may_access_skb(enum bpf_prog_type type)
16612 {
16613 	switch (type) {
16614 	case BPF_PROG_TYPE_SOCKET_FILTER:
16615 	case BPF_PROG_TYPE_SCHED_CLS:
16616 	case BPF_PROG_TYPE_SCHED_ACT:
16617 		return true;
16618 	default:
16619 		return false;
16620 	}
16621 }
16622 
16623 /* verify safety of LD_ABS|LD_IND instructions:
16624  * - they can only appear in the programs where ctx == skb
16625  * - since they are wrappers of function calls, they scratch R1-R5 registers,
16626  *   preserve R6-R9, and store return value into R0
16627  *
16628  * Implicit input:
16629  *   ctx == skb == R6 == CTX
16630  *
16631  * Explicit input:
16632  *   SRC == any register
16633  *   IMM == 32-bit immediate
16634  *
16635  * Output:
16636  *   R0 - 8/16/32-bit skb data converted to cpu endianness
16637  */
check_ld_abs(struct bpf_verifier_env * env,struct bpf_insn * insn)16638 static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
16639 {
16640 	struct bpf_reg_state *regs = cur_regs(env);
16641 	static const int ctx_reg = BPF_REG_6;
16642 	u8 mode = BPF_MODE(insn->code);
16643 	int i, err;
16644 
16645 	if (!may_access_skb(resolve_prog_type(env->prog))) {
16646 		verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
16647 		return -EINVAL;
16648 	}
16649 
16650 	if (!env->ops->gen_ld_abs) {
16651 		verifier_bug(env, "gen_ld_abs is null");
16652 		return -EFAULT;
16653 	}
16654 
16655 	/* check whether implicit source operand (register R6) is readable */
16656 	err = check_reg_arg(env, ctx_reg, SRC_OP);
16657 	if (err)
16658 		return err;
16659 
16660 	/* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
16661 	 * gen_ld_abs() may terminate the program at runtime, leading to
16662 	 * reference leak.
16663 	 */
16664 	err = check_resource_leak(env, false, true, "BPF_LD_[ABS|IND]");
16665 	if (err)
16666 		return err;
16667 
16668 	if (regs[ctx_reg].type != PTR_TO_CTX) {
16669 		verbose(env,
16670 			"at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
16671 		return -EINVAL;
16672 	}
16673 
16674 	if (mode == BPF_IND) {
16675 		/* check explicit source operand */
16676 		err = check_reg_arg(env, insn->src_reg, SRC_OP);
16677 		if (err)
16678 			return err;
16679 	}
16680 
16681 	err = check_ptr_off_reg(env, &regs[ctx_reg], ctx_reg);
16682 	if (err < 0)
16683 		return err;
16684 
16685 	/* reset caller saved regs to unreadable */
16686 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
16687 		bpf_mark_reg_not_init(env, &regs[caller_saved[i]]);
16688 		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
16689 	}
16690 
16691 	/* mark destination R0 register as readable, since it contains
16692 	 * the value fetched from the packet.
16693 	 * Already marked as written above.
16694 	 */
16695 	mark_reg_unknown(env, regs, BPF_REG_0);
16696 	/* ld_abs load up to 32-bit skb data. */
16697 	regs[BPF_REG_0].subreg_def = env->insn_idx + 1;
16698 	/*
16699 	 * See bpf_gen_ld_abs() which emits a hidden BPF_EXIT with r0=0
16700 	 * which must be explored by the verifier when in a subprog.
16701 	 */
16702 	if (env->cur_state->curframe) {
16703 		struct bpf_verifier_state *branch;
16704 
16705 		mark_reg_scratched(env, BPF_REG_0);
16706 		branch = push_stack(env, env->insn_idx + 1, env->insn_idx, false);
16707 		if (IS_ERR(branch))
16708 			return PTR_ERR(branch);
16709 		mark_reg_known_zero(env, regs, BPF_REG_0);
16710 		err = prepare_func_exit(env, &env->insn_idx);
16711 		if (err)
16712 			return err;
16713 		env->insn_idx--;
16714 	}
16715 	return 0;
16716 }
16717 
16718 
return_retval_range(struct bpf_verifier_env * env,struct bpf_retval_range * range)16719 static bool return_retval_range(struct bpf_verifier_env *env, struct bpf_retval_range *range)
16720 {
16721 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
16722 
16723 	/* Default return value range. */
16724 	*range = retval_range(0, 1);
16725 
16726 	switch (prog_type) {
16727 	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
16728 		switch (env->prog->expected_attach_type) {
16729 		case BPF_CGROUP_UDP4_RECVMSG:
16730 		case BPF_CGROUP_UDP6_RECVMSG:
16731 		case BPF_CGROUP_UNIX_RECVMSG:
16732 		case BPF_CGROUP_INET4_GETPEERNAME:
16733 		case BPF_CGROUP_INET6_GETPEERNAME:
16734 		case BPF_CGROUP_UNIX_GETPEERNAME:
16735 		case BPF_CGROUP_INET4_GETSOCKNAME:
16736 		case BPF_CGROUP_INET6_GETSOCKNAME:
16737 		case BPF_CGROUP_UNIX_GETSOCKNAME:
16738 			*range = retval_range(1, 1);
16739 			break;
16740 		case BPF_CGROUP_INET4_BIND:
16741 		case BPF_CGROUP_INET6_BIND:
16742 			*range = retval_range(0, 3);
16743 			break;
16744 		default:
16745 			break;
16746 		}
16747 		break;
16748 	case BPF_PROG_TYPE_CGROUP_SKB:
16749 		if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS)
16750 			*range = retval_range(0, 3);
16751 		break;
16752 	case BPF_PROG_TYPE_CGROUP_SOCK:
16753 	case BPF_PROG_TYPE_SOCK_OPS:
16754 	case BPF_PROG_TYPE_CGROUP_DEVICE:
16755 	case BPF_PROG_TYPE_CGROUP_SYSCTL:
16756 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
16757 		break;
16758 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
16759 		if (!env->prog->aux->attach_btf_id)
16760 			return false;
16761 		*range = retval_range(0, 0);
16762 		break;
16763 	case BPF_PROG_TYPE_TRACING:
16764 		switch (env->prog->expected_attach_type) {
16765 		case BPF_TRACE_FENTRY:
16766 		case BPF_TRACE_FEXIT:
16767 		case BPF_TRACE_FSESSION:
16768 			*range = retval_range(0, 0);
16769 			break;
16770 		case BPF_TRACE_RAW_TP:
16771 		case BPF_MODIFY_RETURN:
16772 			return false;
16773 		case BPF_TRACE_ITER:
16774 		default:
16775 			break;
16776 		}
16777 		break;
16778 	case BPF_PROG_TYPE_KPROBE:
16779 		switch (env->prog->expected_attach_type) {
16780 		case BPF_TRACE_KPROBE_SESSION:
16781 		case BPF_TRACE_UPROBE_SESSION:
16782 			break;
16783 		default:
16784 			return false;
16785 		}
16786 		break;
16787 	case BPF_PROG_TYPE_SK_LOOKUP:
16788 		*range = retval_range(SK_DROP, SK_PASS);
16789 		break;
16790 
16791 	case BPF_PROG_TYPE_LSM:
16792 		if (env->prog->expected_attach_type != BPF_LSM_CGROUP) {
16793 			/* no range found, any return value is allowed */
16794 			if (!get_func_retval_range(env->prog, range))
16795 				return false;
16796 			/* no restricted range, any return value is allowed */
16797 			if (range->minval == S32_MIN && range->maxval == S32_MAX)
16798 				return false;
16799 			range->return_32bit = true;
16800 		} else if (!env->prog->aux->attach_func_proto->type) {
16801 			/* Make sure programs that attach to void
16802 			 * hooks don't try to modify return value.
16803 			 */
16804 			*range = retval_range(1, 1);
16805 		}
16806 		break;
16807 
16808 	case BPF_PROG_TYPE_NETFILTER:
16809 		*range = retval_range(NF_DROP, NF_ACCEPT);
16810 		break;
16811 	case BPF_PROG_TYPE_STRUCT_OPS:
16812 		*range = retval_range(0, 0);
16813 		break;
16814 	case BPF_PROG_TYPE_EXT:
16815 		/* freplace program can return anything as its return value
16816 		 * depends on the to-be-replaced kernel func or bpf program.
16817 		 */
16818 	default:
16819 		return false;
16820 	}
16821 
16822 	/* Continue calculating. */
16823 
16824 	return true;
16825 }
16826 
program_returns_void(struct bpf_verifier_env * env)16827 static bool program_returns_void(struct bpf_verifier_env *env)
16828 {
16829 	const struct bpf_prog *prog = env->prog;
16830 	enum bpf_prog_type prog_type = prog->type;
16831 
16832 	switch (prog_type) {
16833 	case BPF_PROG_TYPE_LSM:
16834 		/* See return_retval_range, for BPF_LSM_CGROUP can be 0 or 0-1 depending on hook. */
16835 		if (prog->expected_attach_type != BPF_LSM_CGROUP &&
16836 		    !prog->aux->attach_func_proto->type)
16837 			return true;
16838 		break;
16839 	case BPF_PROG_TYPE_STRUCT_OPS:
16840 		if (!prog->aux->attach_func_proto->type)
16841 			return true;
16842 		break;
16843 	case BPF_PROG_TYPE_EXT:
16844 		/*
16845 		 * If the actual program is an extension, let it
16846 		 * return void - attaching will succeed only if the
16847 		 * program being replaced also returns void, and since
16848 		 * it has passed verification its actual type doesn't matter.
16849 		 */
16850 		if (subprog_returns_void(env, 0))
16851 			return true;
16852 		break;
16853 	default:
16854 		break;
16855 	}
16856 	return false;
16857 }
16858 
check_return_code(struct bpf_verifier_env * env,int regno,const char * reg_name)16859 static int check_return_code(struct bpf_verifier_env *env, int regno, const char *reg_name)
16860 {
16861 	const char *exit_ctx = "At program exit";
16862 	struct tnum enforce_attach_type_range = tnum_unknown;
16863 	const struct bpf_prog *prog = env->prog;
16864 	struct bpf_reg_state *reg = reg_state(env, regno);
16865 	struct bpf_retval_range range = retval_range(0, 1);
16866 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
16867 	struct bpf_func_state *frame = env->cur_state->frame[0];
16868 	const struct btf_type *reg_type, *ret_type = NULL;
16869 	int err;
16870 
16871 	/* LSM and struct_ops func-ptr's return type could be "void" */
16872 	if (!frame->in_async_callback_fn && program_returns_void(env))
16873 		return 0;
16874 
16875 	if (prog_type == BPF_PROG_TYPE_STRUCT_OPS) {
16876 		/* Allow a struct_ops program to return a referenced kptr if it
16877 		 * matches the operator's return type and is in its unmodified
16878 		 * form. A scalar zero (i.e., a null pointer) is also allowed.
16879 		 */
16880 		reg_type = reg->btf ? btf_type_by_id(reg->btf, reg->btf_id) : NULL;
16881 		ret_type = btf_type_resolve_ptr(prog->aux->attach_btf,
16882 						prog->aux->attach_func_proto->type,
16883 						NULL);
16884 		if (ret_type && ret_type == reg_type && reg->ref_obj_id)
16885 			return __check_ptr_off_reg(env, reg, regno, false);
16886 	}
16887 
16888 	/* eBPF calling convention is such that R0 is used
16889 	 * to return the value from eBPF program.
16890 	 * Make sure that it's readable at this time
16891 	 * of bpf_exit, which means that program wrote
16892 	 * something into it earlier
16893 	 */
16894 	err = check_reg_arg(env, regno, SRC_OP);
16895 	if (err)
16896 		return err;
16897 
16898 	if (is_pointer_value(env, regno)) {
16899 		verbose(env, "R%d leaks addr as return value\n", regno);
16900 		return -EACCES;
16901 	}
16902 
16903 	if (frame->in_async_callback_fn) {
16904 		exit_ctx = "At async callback return";
16905 		range = frame->callback_ret_range;
16906 		goto enforce_retval;
16907 	}
16908 
16909 	if (prog_type == BPF_PROG_TYPE_STRUCT_OPS && !ret_type)
16910 		return 0;
16911 
16912 	if (prog_type == BPF_PROG_TYPE_CGROUP_SKB && (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS))
16913 		enforce_attach_type_range = tnum_range(2, 3);
16914 
16915 	if (!return_retval_range(env, &range))
16916 		return 0;
16917 
16918 enforce_retval:
16919 	if (reg->type != SCALAR_VALUE) {
16920 		verbose(env, "%s the register R%d is not a known value (%s)\n",
16921 			exit_ctx, regno, reg_type_str(env, reg->type));
16922 		return -EINVAL;
16923 	}
16924 
16925 	err = mark_chain_precision(env, regno);
16926 	if (err)
16927 		return err;
16928 
16929 	if (!retval_range_within(range, reg)) {
16930 		verbose_invalid_scalar(env, reg, range, exit_ctx, reg_name);
16931 		if (prog->expected_attach_type == BPF_LSM_CGROUP &&
16932 		    prog_type == BPF_PROG_TYPE_LSM &&
16933 		    !prog->aux->attach_func_proto->type)
16934 			verbose(env, "Note, BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
16935 		return -EINVAL;
16936 	}
16937 
16938 	if (!tnum_is_unknown(enforce_attach_type_range) &&
16939 	    tnum_in(enforce_attach_type_range, reg->var_off))
16940 		env->prog->enforce_expected_attach_type = 1;
16941 	return 0;
16942 }
16943 
check_global_subprog_return_code(struct bpf_verifier_env * env)16944 static int check_global_subprog_return_code(struct bpf_verifier_env *env)
16945 {
16946 	struct bpf_reg_state *reg = reg_state(env, BPF_REG_0);
16947 	struct bpf_func_state *cur_frame = cur_func(env);
16948 	int err;
16949 
16950 	if (subprog_returns_void(env, cur_frame->subprogno))
16951 		return 0;
16952 
16953 	err = check_reg_arg(env, BPF_REG_0, SRC_OP);
16954 	if (err)
16955 		return err;
16956 
16957 	if (is_pointer_value(env, BPF_REG_0)) {
16958 		verbose(env, "R%d leaks addr as return value\n", BPF_REG_0);
16959 		return -EACCES;
16960 	}
16961 
16962 	if (reg->type != SCALAR_VALUE) {
16963 		verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
16964 			reg_type_str(env, reg->type));
16965 		return -EINVAL;
16966 	}
16967 
16968 	return 0;
16969 }
16970 
16971 /* Bitmask with 1s for all caller saved registers */
16972 #define ALL_CALLER_SAVED_REGS ((1u << CALLER_SAVED_REGS) - 1)
16973 
16974 /* True if do_misc_fixups() replaces calls to helper number 'imm',
16975  * replacement patch is presumed to follow bpf_fastcall contract
16976  * (see mark_fastcall_pattern_for_call() below).
16977  */
bpf_verifier_inlines_helper_call(struct bpf_verifier_env * env,s32 imm)16978 bool bpf_verifier_inlines_helper_call(struct bpf_verifier_env *env, s32 imm)
16979 {
16980 	switch (imm) {
16981 #ifdef CONFIG_X86_64
16982 	case BPF_FUNC_get_smp_processor_id:
16983 #ifdef CONFIG_SMP
16984 	case BPF_FUNC_get_current_task_btf:
16985 	case BPF_FUNC_get_current_task:
16986 #endif
16987 		return env->prog->jit_requested && bpf_jit_supports_percpu_insn();
16988 #endif
16989 	default:
16990 		return false;
16991 	}
16992 }
16993 
16994 /* If @call is a kfunc or helper call, fills @cs and returns true,
16995  * otherwise returns false.
16996  */
bpf_get_call_summary(struct bpf_verifier_env * env,struct bpf_insn * call,struct bpf_call_summary * cs)16997 bool bpf_get_call_summary(struct bpf_verifier_env *env, struct bpf_insn *call,
16998 			  struct bpf_call_summary *cs)
16999 {
17000 	struct bpf_kfunc_call_arg_meta meta;
17001 	const struct bpf_func_proto *fn;
17002 	int i;
17003 
17004 	if (bpf_helper_call(call)) {
17005 
17006 		if (bpf_get_helper_proto(env, call->imm, &fn) < 0)
17007 			/* error would be reported later */
17008 			return false;
17009 		cs->fastcall = fn->allow_fastcall &&
17010 			       (bpf_verifier_inlines_helper_call(env, call->imm) ||
17011 				bpf_jit_inlines_helper_call(call->imm));
17012 		cs->is_void = fn->ret_type == RET_VOID;
17013 		cs->num_params = 0;
17014 		for (i = 0; i < ARRAY_SIZE(fn->arg_type); ++i) {
17015 			if (fn->arg_type[i] == ARG_DONTCARE)
17016 				break;
17017 			cs->num_params++;
17018 		}
17019 		return true;
17020 	}
17021 
17022 	if (bpf_pseudo_kfunc_call(call)) {
17023 		int err;
17024 
17025 		err = bpf_fetch_kfunc_arg_meta(env, call->imm, call->off, &meta);
17026 		if (err < 0)
17027 			/* error would be reported later */
17028 			return false;
17029 		cs->num_params = btf_type_vlen(meta.func_proto);
17030 		cs->fastcall = meta.kfunc_flags & KF_FASTCALL;
17031 		cs->is_void = btf_type_is_void(btf_type_by_id(meta.btf, meta.func_proto->type));
17032 		return true;
17033 	}
17034 
17035 	return false;
17036 }
17037 
17038 /* LLVM define a bpf_fastcall function attribute.
17039  * This attribute means that function scratches only some of
17040  * the caller saved registers defined by ABI.
17041  * For BPF the set of such registers could be defined as follows:
17042  * - R0 is scratched only if function is non-void;
17043  * - R1-R5 are scratched only if corresponding parameter type is defined
17044  *   in the function prototype.
17045  *
17046  * The contract between kernel and clang allows to simultaneously use
17047  * such functions and maintain backwards compatibility with old
17048  * kernels that don't understand bpf_fastcall calls:
17049  *
17050  * - for bpf_fastcall calls clang allocates registers as-if relevant r0-r5
17051  *   registers are not scratched by the call;
17052  *
17053  * - as a post-processing step, clang visits each bpf_fastcall call and adds
17054  *   spill/fill for every live r0-r5;
17055  *
17056  * - stack offsets used for the spill/fill are allocated as lowest
17057  *   stack offsets in whole function and are not used for any other
17058  *   purposes;
17059  *
17060  * - when kernel loads a program, it looks for such patterns
17061  *   (bpf_fastcall function surrounded by spills/fills) and checks if
17062  *   spill/fill stack offsets are used exclusively in fastcall patterns;
17063  *
17064  * - if so, and if verifier or current JIT inlines the call to the
17065  *   bpf_fastcall function (e.g. a helper call), kernel removes unnecessary
17066  *   spill/fill pairs;
17067  *
17068  * - when old kernel loads a program, presence of spill/fill pairs
17069  *   keeps BPF program valid, albeit slightly less efficient.
17070  *
17071  * For example:
17072  *
17073  *   r1 = 1;
17074  *   r2 = 2;
17075  *   *(u64 *)(r10 - 8)  = r1;            r1 = 1;
17076  *   *(u64 *)(r10 - 16) = r2;            r2 = 2;
17077  *   call %[to_be_inlined]         -->   call %[to_be_inlined]
17078  *   r2 = *(u64 *)(r10 - 16);            r0 = r1;
17079  *   r1 = *(u64 *)(r10 - 8);             r0 += r2;
17080  *   r0 = r1;                            exit;
17081  *   r0 += r2;
17082  *   exit;
17083  *
17084  * The purpose of mark_fastcall_pattern_for_call is to:
17085  * - look for such patterns;
17086  * - mark spill and fill instructions in env->insn_aux_data[*].fastcall_pattern;
17087  * - mark set env->insn_aux_data[*].fastcall_spills_num for call instruction;
17088  * - update env->subprog_info[*]->fastcall_stack_off to find an offset
17089  *   at which bpf_fastcall spill/fill stack slots start;
17090  * - update env->subprog_info[*]->keep_fastcall_stack.
17091  *
17092  * The .fastcall_pattern and .fastcall_stack_off are used by
17093  * check_fastcall_stack_contract() to check if every stack access to
17094  * fastcall spill/fill stack slot originates from spill/fill
17095  * instructions, members of fastcall patterns.
17096  *
17097  * If such condition holds true for a subprogram, fastcall patterns could
17098  * be rewritten by remove_fastcall_spills_fills().
17099  * Otherwise bpf_fastcall patterns are not changed in the subprogram
17100  * (code, presumably, generated by an older clang version).
17101  *
17102  * For example, it is *not* safe to remove spill/fill below:
17103  *
17104  *   r1 = 1;
17105  *   *(u64 *)(r10 - 8)  = r1;            r1 = 1;
17106  *   call %[to_be_inlined]         -->   call %[to_be_inlined]
17107  *   r1 = *(u64 *)(r10 - 8);             r0 = *(u64 *)(r10 - 8);  <---- wrong !!!
17108  *   r0 = *(u64 *)(r10 - 8);             r0 += r1;
17109  *   r0 += r1;                           exit;
17110  *   exit;
17111  */
mark_fastcall_pattern_for_call(struct bpf_verifier_env * env,struct bpf_subprog_info * subprog,int insn_idx,s16 lowest_off)17112 static void mark_fastcall_pattern_for_call(struct bpf_verifier_env *env,
17113 					   struct bpf_subprog_info *subprog,
17114 					   int insn_idx, s16 lowest_off)
17115 {
17116 	struct bpf_insn *insns = env->prog->insnsi, *stx, *ldx;
17117 	struct bpf_insn *call = &env->prog->insnsi[insn_idx];
17118 	u32 clobbered_regs_mask;
17119 	struct bpf_call_summary cs;
17120 	u32 expected_regs_mask;
17121 	s16 off;
17122 	int i;
17123 
17124 	if (!bpf_get_call_summary(env, call, &cs))
17125 		return;
17126 
17127 	/* A bitmask specifying which caller saved registers are clobbered
17128 	 * by a call to a helper/kfunc *as if* this helper/kfunc follows
17129 	 * bpf_fastcall contract:
17130 	 * - includes R0 if function is non-void;
17131 	 * - includes R1-R5 if corresponding parameter has is described
17132 	 *   in the function prototype.
17133 	 */
17134 	clobbered_regs_mask = GENMASK(cs.num_params, cs.is_void ? 1 : 0);
17135 	/* e.g. if helper call clobbers r{0,1}, expect r{2,3,4,5} in the pattern */
17136 	expected_regs_mask = ~clobbered_regs_mask & ALL_CALLER_SAVED_REGS;
17137 
17138 	/* match pairs of form:
17139 	 *
17140 	 * *(u64 *)(r10 - Y) = rX   (where Y % 8 == 0)
17141 	 * ...
17142 	 * call %[to_be_inlined]
17143 	 * ...
17144 	 * rX = *(u64 *)(r10 - Y)
17145 	 */
17146 	for (i = 1, off = lowest_off; i <= ARRAY_SIZE(caller_saved); ++i, off += BPF_REG_SIZE) {
17147 		if (insn_idx - i < 0 || insn_idx + i >= env->prog->len)
17148 			break;
17149 		stx = &insns[insn_idx - i];
17150 		ldx = &insns[insn_idx + i];
17151 		/* must be a stack spill/fill pair */
17152 		if (stx->code != (BPF_STX | BPF_MEM | BPF_DW) ||
17153 		    ldx->code != (BPF_LDX | BPF_MEM | BPF_DW) ||
17154 		    stx->dst_reg != BPF_REG_10 ||
17155 		    ldx->src_reg != BPF_REG_10)
17156 			break;
17157 		/* must be a spill/fill for the same reg */
17158 		if (stx->src_reg != ldx->dst_reg)
17159 			break;
17160 		/* must be one of the previously unseen registers */
17161 		if ((BIT(stx->src_reg) & expected_regs_mask) == 0)
17162 			break;
17163 		/* must be a spill/fill for the same expected offset,
17164 		 * no need to check offset alignment, BPF_DW stack access
17165 		 * is always 8-byte aligned.
17166 		 */
17167 		if (stx->off != off || ldx->off != off)
17168 			break;
17169 		expected_regs_mask &= ~BIT(stx->src_reg);
17170 		env->insn_aux_data[insn_idx - i].fastcall_pattern = 1;
17171 		env->insn_aux_data[insn_idx + i].fastcall_pattern = 1;
17172 	}
17173 	if (i == 1)
17174 		return;
17175 
17176 	/* Conditionally set 'fastcall_spills_num' to allow forward
17177 	 * compatibility when more helper functions are marked as
17178 	 * bpf_fastcall at compile time than current kernel supports, e.g:
17179 	 *
17180 	 *   1: *(u64 *)(r10 - 8) = r1
17181 	 *   2: call A                  ;; assume A is bpf_fastcall for current kernel
17182 	 *   3: r1 = *(u64 *)(r10 - 8)
17183 	 *   4: *(u64 *)(r10 - 8) = r1
17184 	 *   5: call B                  ;; assume B is not bpf_fastcall for current kernel
17185 	 *   6: r1 = *(u64 *)(r10 - 8)
17186 	 *
17187 	 * There is no need to block bpf_fastcall rewrite for such program.
17188 	 * Set 'fastcall_pattern' for both calls to keep check_fastcall_stack_contract() happy,
17189 	 * don't set 'fastcall_spills_num' for call B so that remove_fastcall_spills_fills()
17190 	 * does not remove spill/fill pair {4,6}.
17191 	 */
17192 	if (cs.fastcall)
17193 		env->insn_aux_data[insn_idx].fastcall_spills_num = i - 1;
17194 	else
17195 		subprog->keep_fastcall_stack = 1;
17196 	subprog->fastcall_stack_off = min(subprog->fastcall_stack_off, off);
17197 }
17198 
mark_fastcall_patterns(struct bpf_verifier_env * env)17199 static int mark_fastcall_patterns(struct bpf_verifier_env *env)
17200 {
17201 	struct bpf_subprog_info *subprog = env->subprog_info;
17202 	struct bpf_insn *insn;
17203 	s16 lowest_off;
17204 	int s, i;
17205 
17206 	for (s = 0; s < env->subprog_cnt; ++s, ++subprog) {
17207 		/* find lowest stack spill offset used in this subprog */
17208 		lowest_off = 0;
17209 		for (i = subprog->start; i < (subprog + 1)->start; ++i) {
17210 			insn = env->prog->insnsi + i;
17211 			if (insn->code != (BPF_STX | BPF_MEM | BPF_DW) ||
17212 			    insn->dst_reg != BPF_REG_10)
17213 				continue;
17214 			lowest_off = min(lowest_off, insn->off);
17215 		}
17216 		/* use this offset to find fastcall patterns */
17217 		for (i = subprog->start; i < (subprog + 1)->start; ++i) {
17218 			insn = env->prog->insnsi + i;
17219 			if (insn->code != (BPF_JMP | BPF_CALL))
17220 				continue;
17221 			mark_fastcall_pattern_for_call(env, subprog, i, lowest_off);
17222 		}
17223 	}
17224 	return 0;
17225 }
17226 
adjust_btf_func(struct bpf_verifier_env * env)17227 static void adjust_btf_func(struct bpf_verifier_env *env)
17228 {
17229 	struct bpf_prog_aux *aux = env->prog->aux;
17230 	int i;
17231 
17232 	if (!aux->func_info)
17233 		return;
17234 
17235 	/* func_info is not available for hidden subprogs */
17236 	for (i = 0; i < env->subprog_cnt - env->hidden_subprog_cnt; i++)
17237 		aux->func_info[i].insn_off = env->subprog_info[i].start;
17238 }
17239 
17240 /* Find id in idset and increment its count, or add new entry */
idset_cnt_inc(struct bpf_idset * idset,u32 id)17241 static void idset_cnt_inc(struct bpf_idset *idset, u32 id)
17242 {
17243 	u32 i;
17244 
17245 	for (i = 0; i < idset->num_ids; i++) {
17246 		if (idset->entries[i].id == id) {
17247 			idset->entries[i].cnt++;
17248 			return;
17249 		}
17250 	}
17251 	/* New id */
17252 	if (idset->num_ids < BPF_ID_MAP_SIZE) {
17253 		idset->entries[idset->num_ids].id = id;
17254 		idset->entries[idset->num_ids].cnt = 1;
17255 		idset->num_ids++;
17256 	}
17257 }
17258 
17259 /* Find id in idset and return its count, or 0 if not found */
idset_cnt_get(struct bpf_idset * idset,u32 id)17260 static u32 idset_cnt_get(struct bpf_idset *idset, u32 id)
17261 {
17262 	u32 i;
17263 
17264 	for (i = 0; i < idset->num_ids; i++) {
17265 		if (idset->entries[i].id == id)
17266 			return idset->entries[i].cnt;
17267 	}
17268 	return 0;
17269 }
17270 
17271 /*
17272  * Clear singular scalar ids in a state.
17273  * A register with a non-zero id is called singular if no other register shares
17274  * the same base id. Such registers can be treated as independent (id=0).
17275  */
bpf_clear_singular_ids(struct bpf_verifier_env * env,struct bpf_verifier_state * st)17276 void bpf_clear_singular_ids(struct bpf_verifier_env *env,
17277 			    struct bpf_verifier_state *st)
17278 {
17279 	struct bpf_idset *idset = &env->idset_scratch;
17280 	struct bpf_func_state *func;
17281 	struct bpf_reg_state *reg;
17282 
17283 	idset->num_ids = 0;
17284 
17285 	bpf_for_each_reg_in_vstate(st, func, reg, ({
17286 		if (reg->type != SCALAR_VALUE)
17287 			continue;
17288 		if (!reg->id)
17289 			continue;
17290 		idset_cnt_inc(idset, reg->id & ~BPF_ADD_CONST);
17291 	}));
17292 
17293 	bpf_for_each_reg_in_vstate(st, func, reg, ({
17294 		if (reg->type != SCALAR_VALUE)
17295 			continue;
17296 		if (!reg->id)
17297 			continue;
17298 		if (idset_cnt_get(idset, reg->id & ~BPF_ADD_CONST) == 1)
17299 			clear_scalar_id(reg);
17300 	}));
17301 }
17302 
17303 /* Return true if it's OK to have the same insn return a different type. */
reg_type_mismatch_ok(enum bpf_reg_type type)17304 static bool reg_type_mismatch_ok(enum bpf_reg_type type)
17305 {
17306 	switch (base_type(type)) {
17307 	case PTR_TO_CTX:
17308 	case PTR_TO_SOCKET:
17309 	case PTR_TO_SOCK_COMMON:
17310 	case PTR_TO_TCP_SOCK:
17311 	case PTR_TO_XDP_SOCK:
17312 	case PTR_TO_BTF_ID:
17313 	case PTR_TO_ARENA:
17314 		return false;
17315 	default:
17316 		return true;
17317 	}
17318 }
17319 
17320 /* If an instruction was previously used with particular pointer types, then we
17321  * need to be careful to avoid cases such as the below, where it may be ok
17322  * for one branch accessing the pointer, but not ok for the other branch:
17323  *
17324  * R1 = sock_ptr
17325  * goto X;
17326  * ...
17327  * R1 = some_other_valid_ptr;
17328  * goto X;
17329  * ...
17330  * R2 = *(u32 *)(R1 + 0);
17331  */
reg_type_mismatch(enum bpf_reg_type src,enum bpf_reg_type prev)17332 static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
17333 {
17334 	return src != prev && (!reg_type_mismatch_ok(src) ||
17335 			       !reg_type_mismatch_ok(prev));
17336 }
17337 
is_ptr_to_mem_or_btf_id(enum bpf_reg_type type)17338 static bool is_ptr_to_mem_or_btf_id(enum bpf_reg_type type)
17339 {
17340 	switch (base_type(type)) {
17341 	case PTR_TO_MEM:
17342 	case PTR_TO_BTF_ID:
17343 		return true;
17344 	default:
17345 		return false;
17346 	}
17347 }
17348 
is_ptr_to_mem(enum bpf_reg_type type)17349 static bool is_ptr_to_mem(enum bpf_reg_type type)
17350 {
17351 	return base_type(type) == PTR_TO_MEM;
17352 }
17353 
save_aux_ptr_type(struct bpf_verifier_env * env,enum bpf_reg_type type,bool allow_trust_mismatch)17354 static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type type,
17355 			     bool allow_trust_mismatch)
17356 {
17357 	enum bpf_reg_type *prev_type = &env->insn_aux_data[env->insn_idx].ptr_type;
17358 	enum bpf_reg_type merged_type;
17359 
17360 	if (*prev_type == NOT_INIT) {
17361 		/* Saw a valid insn
17362 		 * dst_reg = *(u32 *)(src_reg + off)
17363 		 * save type to validate intersecting paths
17364 		 */
17365 		*prev_type = type;
17366 	} else if (reg_type_mismatch(type, *prev_type)) {
17367 		/* Abuser program is trying to use the same insn
17368 		 * dst_reg = *(u32*) (src_reg + off)
17369 		 * with different pointer types:
17370 		 * src_reg == ctx in one branch and
17371 		 * src_reg == stack|map in some other branch.
17372 		 * Reject it.
17373 		 */
17374 		if (allow_trust_mismatch &&
17375 		    is_ptr_to_mem_or_btf_id(type) &&
17376 		    is_ptr_to_mem_or_btf_id(*prev_type)) {
17377 			/*
17378 			 * Have to support a use case when one path through
17379 			 * the program yields TRUSTED pointer while another
17380 			 * is UNTRUSTED. Fallback to UNTRUSTED to generate
17381 			 * BPF_PROBE_MEM/BPF_PROBE_MEMSX.
17382 			 * Same behavior of MEM_RDONLY flag.
17383 			 */
17384 			if (is_ptr_to_mem(type) || is_ptr_to_mem(*prev_type))
17385 				merged_type = PTR_TO_MEM;
17386 			else
17387 				merged_type = PTR_TO_BTF_ID;
17388 			if ((type & PTR_UNTRUSTED) || (*prev_type & PTR_UNTRUSTED))
17389 				merged_type |= PTR_UNTRUSTED;
17390 			if ((type & MEM_RDONLY) || (*prev_type & MEM_RDONLY))
17391 				merged_type |= MEM_RDONLY;
17392 			*prev_type = merged_type;
17393 		} else {
17394 			verbose(env, "same insn cannot be used with different pointers\n");
17395 			return -EINVAL;
17396 		}
17397 	}
17398 
17399 	return 0;
17400 }
17401 
17402 enum {
17403 	PROCESS_BPF_EXIT = 1,
17404 	INSN_IDX_UPDATED = 2,
17405 };
17406 
process_bpf_exit_full(struct bpf_verifier_env * env,bool * do_print_state,bool exception_exit)17407 static int process_bpf_exit_full(struct bpf_verifier_env *env,
17408 				 bool *do_print_state,
17409 				 bool exception_exit)
17410 {
17411 	struct bpf_func_state *cur_frame = cur_func(env);
17412 
17413 	/* We must do check_reference_leak here before
17414 	 * prepare_func_exit to handle the case when
17415 	 * state->curframe > 0, it may be a callback function,
17416 	 * for which reference_state must match caller reference
17417 	 * state when it exits.
17418 	 */
17419 	int err = check_resource_leak(env, exception_exit,
17420 				      exception_exit || !env->cur_state->curframe,
17421 				      exception_exit ? "bpf_throw" :
17422 				      "BPF_EXIT instruction in main prog");
17423 	if (err)
17424 		return err;
17425 
17426 	/* The side effect of the prepare_func_exit which is
17427 	 * being skipped is that it frees bpf_func_state.
17428 	 * Typically, process_bpf_exit will only be hit with
17429 	 * outermost exit. copy_verifier_state in pop_stack will
17430 	 * handle freeing of any extra bpf_func_state left over
17431 	 * from not processing all nested function exits. We
17432 	 * also skip return code checks as they are not needed
17433 	 * for exceptional exits.
17434 	 */
17435 	if (exception_exit)
17436 		return PROCESS_BPF_EXIT;
17437 
17438 	if (env->cur_state->curframe) {
17439 		/* exit from nested function */
17440 		err = prepare_func_exit(env, &env->insn_idx);
17441 		if (err)
17442 			return err;
17443 		*do_print_state = true;
17444 		return INSN_IDX_UPDATED;
17445 	}
17446 
17447 	/*
17448 	 * Return from a regular global subprogram differs from return
17449 	 * from the main program or async/exception callback.
17450 	 * Main program exit implies return code restrictions
17451 	 * that depend on program type.
17452 	 * Exit from exception callback is equivalent to main program exit.
17453 	 * Exit from async callback implies return code restrictions
17454 	 * that depend on async scheduling mechanism.
17455 	 */
17456 	if (cur_frame->subprogno &&
17457 	    !cur_frame->in_async_callback_fn &&
17458 	    !cur_frame->in_exception_callback_fn)
17459 		err = check_global_subprog_return_code(env);
17460 	else
17461 		err = check_return_code(env, BPF_REG_0, "R0");
17462 	if (err)
17463 		return err;
17464 	return PROCESS_BPF_EXIT;
17465 }
17466 
indirect_jump_min_max_index(struct bpf_verifier_env * env,int regno,struct bpf_map * map,u32 * pmin_index,u32 * pmax_index)17467 static int indirect_jump_min_max_index(struct bpf_verifier_env *env,
17468 				       int regno,
17469 				       struct bpf_map *map,
17470 				       u32 *pmin_index, u32 *pmax_index)
17471 {
17472 	struct bpf_reg_state *reg = reg_state(env, regno);
17473 	u64 min_index = reg->umin_value;
17474 	u64 max_index = reg->umax_value;
17475 	const u32 size = 8;
17476 
17477 	if (min_index > (u64) U32_MAX * size) {
17478 		verbose(env, "the sum of R%u umin_value %llu is too big\n", regno, reg->umin_value);
17479 		return -ERANGE;
17480 	}
17481 	if (max_index > (u64) U32_MAX * size) {
17482 		verbose(env, "the sum of R%u umax_value %llu is too big\n", regno, reg->umax_value);
17483 		return -ERANGE;
17484 	}
17485 
17486 	min_index /= size;
17487 	max_index /= size;
17488 
17489 	if (max_index >= map->max_entries) {
17490 		verbose(env, "R%u points to outside of jump table: [%llu,%llu] max_entries %u\n",
17491 			     regno, min_index, max_index, map->max_entries);
17492 		return -EINVAL;
17493 	}
17494 
17495 	*pmin_index = min_index;
17496 	*pmax_index = max_index;
17497 	return 0;
17498 }
17499 
17500 /* gotox *dst_reg */
check_indirect_jump(struct bpf_verifier_env * env,struct bpf_insn * insn)17501 static int check_indirect_jump(struct bpf_verifier_env *env, struct bpf_insn *insn)
17502 {
17503 	struct bpf_verifier_state *other_branch;
17504 	struct bpf_reg_state *dst_reg;
17505 	struct bpf_map *map;
17506 	u32 min_index, max_index;
17507 	int err = 0;
17508 	int n;
17509 	int i;
17510 
17511 	dst_reg = reg_state(env, insn->dst_reg);
17512 	if (dst_reg->type != PTR_TO_INSN) {
17513 		verbose(env, "R%d has type %s, expected PTR_TO_INSN\n",
17514 			     insn->dst_reg, reg_type_str(env, dst_reg->type));
17515 		return -EINVAL;
17516 	}
17517 
17518 	map = dst_reg->map_ptr;
17519 	if (verifier_bug_if(!map, env, "R%d has an empty map pointer", insn->dst_reg))
17520 		return -EFAULT;
17521 
17522 	if (verifier_bug_if(map->map_type != BPF_MAP_TYPE_INSN_ARRAY, env,
17523 			    "R%d has incorrect map type %d", insn->dst_reg, map->map_type))
17524 		return -EFAULT;
17525 
17526 	err = indirect_jump_min_max_index(env, insn->dst_reg, map, &min_index, &max_index);
17527 	if (err)
17528 		return err;
17529 
17530 	/* Ensure that the buffer is large enough */
17531 	if (!env->gotox_tmp_buf || env->gotox_tmp_buf->cnt < max_index - min_index + 1) {
17532 		env->gotox_tmp_buf = bpf_iarray_realloc(env->gotox_tmp_buf,
17533 						        max_index - min_index + 1);
17534 		if (!env->gotox_tmp_buf)
17535 			return -ENOMEM;
17536 	}
17537 
17538 	n = bpf_copy_insn_array_uniq(map, min_index, max_index, env->gotox_tmp_buf->items);
17539 	if (n < 0)
17540 		return n;
17541 	if (n == 0) {
17542 		verbose(env, "register R%d doesn't point to any offset in map id=%d\n",
17543 			     insn->dst_reg, map->id);
17544 		return -EINVAL;
17545 	}
17546 
17547 	for (i = 0; i < n - 1; i++) {
17548 		other_branch = push_stack(env, env->gotox_tmp_buf->items[i],
17549 					  env->insn_idx, env->cur_state->speculative);
17550 		if (IS_ERR(other_branch))
17551 			return PTR_ERR(other_branch);
17552 	}
17553 	env->insn_idx = env->gotox_tmp_buf->items[n-1];
17554 	return INSN_IDX_UPDATED;
17555 }
17556 
do_check_insn(struct bpf_verifier_env * env,bool * do_print_state)17557 static int do_check_insn(struct bpf_verifier_env *env, bool *do_print_state)
17558 {
17559 	int err;
17560 	struct bpf_insn *insn = &env->prog->insnsi[env->insn_idx];
17561 	u8 class = BPF_CLASS(insn->code);
17562 
17563 	switch (class) {
17564 	case BPF_ALU:
17565 	case BPF_ALU64:
17566 		return check_alu_op(env, insn);
17567 
17568 	case BPF_LDX:
17569 		return check_load_mem(env, insn, false,
17570 				      BPF_MODE(insn->code) == BPF_MEMSX,
17571 				      true, "ldx");
17572 
17573 	case BPF_STX:
17574 		if (BPF_MODE(insn->code) == BPF_ATOMIC)
17575 			return check_atomic(env, insn);
17576 		return check_store_reg(env, insn, false);
17577 
17578 	case BPF_ST: {
17579 		enum bpf_reg_type dst_reg_type;
17580 
17581 		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17582 		if (err)
17583 			return err;
17584 
17585 		dst_reg_type = cur_regs(env)[insn->dst_reg].type;
17586 
17587 		err = check_mem_access(env, env->insn_idx, insn->dst_reg,
17588 				       insn->off, BPF_SIZE(insn->code),
17589 				       BPF_WRITE, -1, false, false);
17590 		if (err)
17591 			return err;
17592 
17593 		return save_aux_ptr_type(env, dst_reg_type, false);
17594 	}
17595 	case BPF_JMP:
17596 	case BPF_JMP32: {
17597 		u8 opcode = BPF_OP(insn->code);
17598 
17599 		env->jmps_processed++;
17600 		if (opcode == BPF_CALL) {
17601 			if (env->cur_state->active_locks) {
17602 				if ((insn->src_reg == BPF_REG_0 &&
17603 				     insn->imm != BPF_FUNC_spin_unlock &&
17604 				     insn->imm != BPF_FUNC_kptr_xchg) ||
17605 				    (insn->src_reg == BPF_PSEUDO_KFUNC_CALL &&
17606 				     (insn->off != 0 || !kfunc_spin_allowed(insn->imm)))) {
17607 					verbose(env,
17608 						"function calls are not allowed while holding a lock\n");
17609 					return -EINVAL;
17610 				}
17611 			}
17612 			mark_reg_scratched(env, BPF_REG_0);
17613 			if (insn->src_reg == BPF_PSEUDO_CALL)
17614 				return check_func_call(env, insn, &env->insn_idx);
17615 			if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL)
17616 				return check_kfunc_call(env, insn, &env->insn_idx);
17617 			return check_helper_call(env, insn, &env->insn_idx);
17618 		} else if (opcode == BPF_JA) {
17619 			if (BPF_SRC(insn->code) == BPF_X)
17620 				return check_indirect_jump(env, insn);
17621 
17622 			if (class == BPF_JMP)
17623 				env->insn_idx += insn->off + 1;
17624 			else
17625 				env->insn_idx += insn->imm + 1;
17626 			return INSN_IDX_UPDATED;
17627 		} else if (opcode == BPF_EXIT) {
17628 			return process_bpf_exit_full(env, do_print_state, false);
17629 		}
17630 		return check_cond_jmp_op(env, insn, &env->insn_idx);
17631 	}
17632 	case BPF_LD: {
17633 		u8 mode = BPF_MODE(insn->code);
17634 
17635 		if (mode == BPF_ABS || mode == BPF_IND)
17636 			return check_ld_abs(env, insn);
17637 
17638 		if (mode == BPF_IMM) {
17639 			err = check_ld_imm(env, insn);
17640 			if (err)
17641 				return err;
17642 
17643 			env->insn_idx++;
17644 			sanitize_mark_insn_seen(env);
17645 		}
17646 		return 0;
17647 	}
17648 	}
17649 	/* all class values are handled above. silence compiler warning */
17650 	return -EFAULT;
17651 }
17652 
do_check(struct bpf_verifier_env * env)17653 static int do_check(struct bpf_verifier_env *env)
17654 {
17655 	bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
17656 	struct bpf_verifier_state *state = env->cur_state;
17657 	struct bpf_insn *insns = env->prog->insnsi;
17658 	int insn_cnt = env->prog->len;
17659 	bool do_print_state = false;
17660 	int prev_insn_idx = -1;
17661 
17662 	for (;;) {
17663 		struct bpf_insn *insn;
17664 		struct bpf_insn_aux_data *insn_aux;
17665 		int err;
17666 
17667 		/* reset current history entry on each new instruction */
17668 		env->cur_hist_ent = NULL;
17669 
17670 		env->prev_insn_idx = prev_insn_idx;
17671 		if (env->insn_idx >= insn_cnt) {
17672 			verbose(env, "invalid insn idx %d insn_cnt %d\n",
17673 				env->insn_idx, insn_cnt);
17674 			return -EFAULT;
17675 		}
17676 
17677 		insn = &insns[env->insn_idx];
17678 		insn_aux = &env->insn_aux_data[env->insn_idx];
17679 
17680 		if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
17681 			verbose(env,
17682 				"BPF program is too large. Processed %d insn\n",
17683 				env->insn_processed);
17684 			return -E2BIG;
17685 		}
17686 
17687 		state->last_insn_idx = env->prev_insn_idx;
17688 		state->insn_idx = env->insn_idx;
17689 
17690 		if (bpf_is_prune_point(env, env->insn_idx)) {
17691 			err = bpf_is_state_visited(env, env->insn_idx);
17692 			if (err < 0)
17693 				return err;
17694 			if (err == 1) {
17695 				/* found equivalent state, can prune the search */
17696 				if (env->log.level & BPF_LOG_LEVEL) {
17697 					if (do_print_state)
17698 						verbose(env, "\nfrom %d to %d%s: safe\n",
17699 							env->prev_insn_idx, env->insn_idx,
17700 							env->cur_state->speculative ?
17701 							" (speculative execution)" : "");
17702 					else
17703 						verbose(env, "%d: safe\n", env->insn_idx);
17704 				}
17705 				goto process_bpf_exit;
17706 			}
17707 		}
17708 
17709 		if (bpf_is_jmp_point(env, env->insn_idx)) {
17710 			err = bpf_push_jmp_history(env, state, 0, 0);
17711 			if (err)
17712 				return err;
17713 		}
17714 
17715 		if (signal_pending(current))
17716 			return -EAGAIN;
17717 
17718 		if (need_resched())
17719 			cond_resched();
17720 
17721 		if (env->log.level & BPF_LOG_LEVEL2 && do_print_state) {
17722 			verbose(env, "\nfrom %d to %d%s:",
17723 				env->prev_insn_idx, env->insn_idx,
17724 				env->cur_state->speculative ?
17725 				" (speculative execution)" : "");
17726 			print_verifier_state(env, state, state->curframe, true);
17727 			do_print_state = false;
17728 		}
17729 
17730 		if (env->log.level & BPF_LOG_LEVEL) {
17731 			if (verifier_state_scratched(env))
17732 				print_insn_state(env, state, state->curframe);
17733 
17734 			verbose_linfo(env, env->insn_idx, "; ");
17735 			env->prev_log_pos = env->log.end_pos;
17736 			verbose(env, "%d: ", env->insn_idx);
17737 			bpf_verbose_insn(env, insn);
17738 			env->prev_insn_print_pos = env->log.end_pos - env->prev_log_pos;
17739 			env->prev_log_pos = env->log.end_pos;
17740 		}
17741 
17742 		if (bpf_prog_is_offloaded(env->prog->aux)) {
17743 			err = bpf_prog_offload_verify_insn(env, env->insn_idx,
17744 							   env->prev_insn_idx);
17745 			if (err)
17746 				return err;
17747 		}
17748 
17749 		sanitize_mark_insn_seen(env);
17750 		prev_insn_idx = env->insn_idx;
17751 
17752 		/* Sanity check: precomputed constants must match verifier state */
17753 		if (!state->speculative && insn_aux->const_reg_mask) {
17754 			struct bpf_reg_state *regs = cur_regs(env);
17755 			u16 mask = insn_aux->const_reg_mask;
17756 
17757 			for (int r = 0; r < ARRAY_SIZE(insn_aux->const_reg_vals); r++) {
17758 				u32 cval = insn_aux->const_reg_vals[r];
17759 
17760 				if (!(mask & BIT(r)))
17761 					continue;
17762 				if (regs[r].type != SCALAR_VALUE)
17763 					continue;
17764 				if (!tnum_is_const(regs[r].var_off))
17765 					continue;
17766 				if (verifier_bug_if((u32)regs[r].var_off.value != cval,
17767 						    env, "const R%d: %u != %llu",
17768 						    r, cval, regs[r].var_off.value))
17769 					return -EFAULT;
17770 			}
17771 		}
17772 
17773 		/* Reduce verification complexity by stopping speculative path
17774 		 * verification when a nospec is encountered.
17775 		 */
17776 		if (state->speculative && insn_aux->nospec)
17777 			goto process_bpf_exit;
17778 
17779 		err = do_check_insn(env, &do_print_state);
17780 		if (error_recoverable_with_nospec(err) && state->speculative) {
17781 			/* Prevent this speculative path from ever reaching the
17782 			 * insn that would have been unsafe to execute.
17783 			 */
17784 			insn_aux->nospec = true;
17785 			/* If it was an ADD/SUB insn, potentially remove any
17786 			 * markings for alu sanitization.
17787 			 */
17788 			insn_aux->alu_state = 0;
17789 			goto process_bpf_exit;
17790 		} else if (err < 0) {
17791 			return err;
17792 		} else if (err == PROCESS_BPF_EXIT) {
17793 			goto process_bpf_exit;
17794 		} else if (err == INSN_IDX_UPDATED) {
17795 		} else if (err == 0) {
17796 			env->insn_idx++;
17797 		}
17798 
17799 		if (state->speculative && insn_aux->nospec_result) {
17800 			/* If we are on a path that performed a jump-op, this
17801 			 * may skip a nospec patched-in after the jump. This can
17802 			 * currently never happen because nospec_result is only
17803 			 * used for the write-ops
17804 			 * `*(size*)(dst_reg+off)=src_reg|imm32` and helper
17805 			 * calls. These must never skip the following insn
17806 			 * (i.e., bpf_insn_successors()'s opcode_info.can_jump
17807 			 * is false). Still, add a warning to document this in
17808 			 * case nospec_result is used elsewhere in the future.
17809 			 *
17810 			 * All non-branch instructions have a single
17811 			 * fall-through edge. For these, nospec_result should
17812 			 * already work.
17813 			 */
17814 			if (verifier_bug_if((BPF_CLASS(insn->code) == BPF_JMP ||
17815 					     BPF_CLASS(insn->code) == BPF_JMP32) &&
17816 					    BPF_OP(insn->code) != BPF_CALL, env,
17817 					    "speculation barrier after jump instruction may not have the desired effect"))
17818 				return -EFAULT;
17819 process_bpf_exit:
17820 			mark_verifier_state_scratched(env);
17821 			err = bpf_update_branch_counts(env, env->cur_state);
17822 			if (err)
17823 				return err;
17824 			err = pop_stack(env, &prev_insn_idx, &env->insn_idx,
17825 					pop_log);
17826 			if (err < 0) {
17827 				if (err != -ENOENT)
17828 					return err;
17829 				break;
17830 			} else {
17831 				do_print_state = true;
17832 				continue;
17833 			}
17834 		}
17835 	}
17836 
17837 	return 0;
17838 }
17839 
find_btf_percpu_datasec(struct btf * btf)17840 static int find_btf_percpu_datasec(struct btf *btf)
17841 {
17842 	const struct btf_type *t;
17843 	const char *tname;
17844 	int i, n;
17845 
17846 	/*
17847 	 * Both vmlinux and module each have their own ".data..percpu"
17848 	 * DATASECs in BTF. So for module's case, we need to skip vmlinux BTF
17849 	 * types to look at only module's own BTF types.
17850 	 */
17851 	n = btf_nr_types(btf);
17852 	for (i = btf_named_start_id(btf, true); i < n; i++) {
17853 		t = btf_type_by_id(btf, i);
17854 		if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC)
17855 			continue;
17856 
17857 		tname = btf_name_by_offset(btf, t->name_off);
17858 		if (!strcmp(tname, ".data..percpu"))
17859 			return i;
17860 	}
17861 
17862 	return -ENOENT;
17863 }
17864 
17865 /*
17866  * Add btf to the env->used_btfs array. If needed, refcount the
17867  * corresponding kernel module. To simplify caller's logic
17868  * in case of error or if btf was added before the function
17869  * decreases the btf refcount.
17870  */
__add_used_btf(struct bpf_verifier_env * env,struct btf * btf)17871 static int __add_used_btf(struct bpf_verifier_env *env, struct btf *btf)
17872 {
17873 	struct btf_mod_pair *btf_mod;
17874 	int ret = 0;
17875 	int i;
17876 
17877 	/* check whether we recorded this BTF (and maybe module) already */
17878 	for (i = 0; i < env->used_btf_cnt; i++)
17879 		if (env->used_btfs[i].btf == btf)
17880 			goto ret_put;
17881 
17882 	if (env->used_btf_cnt >= MAX_USED_BTFS) {
17883 		verbose(env, "The total number of btfs per program has reached the limit of %u\n",
17884 			MAX_USED_BTFS);
17885 		ret = -E2BIG;
17886 		goto ret_put;
17887 	}
17888 
17889 	btf_mod = &env->used_btfs[env->used_btf_cnt];
17890 	btf_mod->btf = btf;
17891 	btf_mod->module = NULL;
17892 
17893 	/* if we reference variables from kernel module, bump its refcount */
17894 	if (btf_is_module(btf)) {
17895 		btf_mod->module = btf_try_get_module(btf);
17896 		if (!btf_mod->module) {
17897 			ret = -ENXIO;
17898 			goto ret_put;
17899 		}
17900 	}
17901 
17902 	env->used_btf_cnt++;
17903 	return 0;
17904 
17905 ret_put:
17906 	/* Either error or this BTF was already added */
17907 	btf_put(btf);
17908 	return ret;
17909 }
17910 
17911 /* replace pseudo btf_id with kernel symbol address */
__check_pseudo_btf_id(struct bpf_verifier_env * env,struct bpf_insn * insn,struct bpf_insn_aux_data * aux,struct btf * btf)17912 static int __check_pseudo_btf_id(struct bpf_verifier_env *env,
17913 				 struct bpf_insn *insn,
17914 				 struct bpf_insn_aux_data *aux,
17915 				 struct btf *btf)
17916 {
17917 	const struct btf_var_secinfo *vsi;
17918 	const struct btf_type *datasec;
17919 	const struct btf_type *t;
17920 	const char *sym_name;
17921 	bool percpu = false;
17922 	u32 type, id = insn->imm;
17923 	s32 datasec_id;
17924 	u64 addr;
17925 	int i;
17926 
17927 	t = btf_type_by_id(btf, id);
17928 	if (!t) {
17929 		verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id);
17930 		return -ENOENT;
17931 	}
17932 
17933 	if (!btf_type_is_var(t) && !btf_type_is_func(t)) {
17934 		verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR or KIND_FUNC\n", id);
17935 		return -EINVAL;
17936 	}
17937 
17938 	sym_name = btf_name_by_offset(btf, t->name_off);
17939 	addr = kallsyms_lookup_name(sym_name);
17940 	if (!addr) {
17941 		verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n",
17942 			sym_name);
17943 		return -ENOENT;
17944 	}
17945 	insn[0].imm = (u32)addr;
17946 	insn[1].imm = addr >> 32;
17947 
17948 	if (btf_type_is_func(t)) {
17949 		aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY;
17950 		aux->btf_var.mem_size = 0;
17951 		return 0;
17952 	}
17953 
17954 	datasec_id = find_btf_percpu_datasec(btf);
17955 	if (datasec_id > 0) {
17956 		datasec = btf_type_by_id(btf, datasec_id);
17957 		for_each_vsi(i, datasec, vsi) {
17958 			if (vsi->type == id) {
17959 				percpu = true;
17960 				break;
17961 			}
17962 		}
17963 	}
17964 
17965 	type = t->type;
17966 	t = btf_type_skip_modifiers(btf, type, NULL);
17967 	if (percpu) {
17968 		aux->btf_var.reg_type = PTR_TO_BTF_ID | MEM_PERCPU;
17969 		aux->btf_var.btf = btf;
17970 		aux->btf_var.btf_id = type;
17971 	} else if (!btf_type_is_struct(t)) {
17972 		const struct btf_type *ret;
17973 		const char *tname;
17974 		u32 tsize;
17975 
17976 		/* resolve the type size of ksym. */
17977 		ret = btf_resolve_size(btf, t, &tsize);
17978 		if (IS_ERR(ret)) {
17979 			tname = btf_name_by_offset(btf, t->name_off);
17980 			verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n",
17981 				tname, PTR_ERR(ret));
17982 			return -EINVAL;
17983 		}
17984 		aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY;
17985 		aux->btf_var.mem_size = tsize;
17986 	} else {
17987 		aux->btf_var.reg_type = PTR_TO_BTF_ID;
17988 		aux->btf_var.btf = btf;
17989 		aux->btf_var.btf_id = type;
17990 	}
17991 
17992 	return 0;
17993 }
17994 
check_pseudo_btf_id(struct bpf_verifier_env * env,struct bpf_insn * insn,struct bpf_insn_aux_data * aux)17995 static int check_pseudo_btf_id(struct bpf_verifier_env *env,
17996 			       struct bpf_insn *insn,
17997 			       struct bpf_insn_aux_data *aux)
17998 {
17999 	struct btf *btf;
18000 	int btf_fd;
18001 	int err;
18002 
18003 	btf_fd = insn[1].imm;
18004 	if (btf_fd) {
18005 		btf = btf_get_by_fd(btf_fd);
18006 		if (IS_ERR(btf)) {
18007 			verbose(env, "invalid module BTF object FD specified.\n");
18008 			return -EINVAL;
18009 		}
18010 	} else {
18011 		if (!btf_vmlinux) {
18012 			verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
18013 			return -EINVAL;
18014 		}
18015 		btf_get(btf_vmlinux);
18016 		btf = btf_vmlinux;
18017 	}
18018 
18019 	err = __check_pseudo_btf_id(env, insn, aux, btf);
18020 	if (err) {
18021 		btf_put(btf);
18022 		return err;
18023 	}
18024 
18025 	return __add_used_btf(env, btf);
18026 }
18027 
is_tracing_prog_type(enum bpf_prog_type type)18028 static bool is_tracing_prog_type(enum bpf_prog_type type)
18029 {
18030 	switch (type) {
18031 	case BPF_PROG_TYPE_KPROBE:
18032 	case BPF_PROG_TYPE_TRACEPOINT:
18033 	case BPF_PROG_TYPE_PERF_EVENT:
18034 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
18035 	case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
18036 		return true;
18037 	default:
18038 		return false;
18039 	}
18040 }
18041 
bpf_map_is_cgroup_storage(struct bpf_map * map)18042 static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
18043 {
18044 	return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
18045 		map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
18046 }
18047 
check_map_prog_compatibility(struct bpf_verifier_env * env,struct bpf_map * map,struct bpf_prog * prog)18048 static int check_map_prog_compatibility(struct bpf_verifier_env *env,
18049 					struct bpf_map *map,
18050 					struct bpf_prog *prog)
18051 
18052 {
18053 	enum bpf_prog_type prog_type = resolve_prog_type(prog);
18054 
18055 	if (map->excl_prog_sha &&
18056 	    memcmp(map->excl_prog_sha, prog->digest, SHA256_DIGEST_SIZE)) {
18057 		verbose(env, "program's hash doesn't match map's excl_prog_hash\n");
18058 		return -EACCES;
18059 	}
18060 
18061 	if (btf_record_has_field(map->record, BPF_LIST_HEAD) ||
18062 	    btf_record_has_field(map->record, BPF_RB_ROOT)) {
18063 		if (is_tracing_prog_type(prog_type)) {
18064 			verbose(env, "tracing progs cannot use bpf_{list_head,rb_root} yet\n");
18065 			return -EINVAL;
18066 		}
18067 	}
18068 
18069 	if (btf_record_has_field(map->record, BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK)) {
18070 		if (prog_type == BPF_PROG_TYPE_SOCKET_FILTER) {
18071 			verbose(env, "socket filter progs cannot use bpf_spin_lock yet\n");
18072 			return -EINVAL;
18073 		}
18074 
18075 		if (is_tracing_prog_type(prog_type)) {
18076 			verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
18077 			return -EINVAL;
18078 		}
18079 	}
18080 
18081 	if ((bpf_prog_is_offloaded(prog->aux) || bpf_map_is_offloaded(map)) &&
18082 	    !bpf_offload_prog_map_match(prog, map)) {
18083 		verbose(env, "offload device mismatch between prog and map\n");
18084 		return -EINVAL;
18085 	}
18086 
18087 	if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
18088 		verbose(env, "bpf_struct_ops map cannot be used in prog\n");
18089 		return -EINVAL;
18090 	}
18091 
18092 	if (prog->sleepable)
18093 		switch (map->map_type) {
18094 		case BPF_MAP_TYPE_HASH:
18095 		case BPF_MAP_TYPE_LRU_HASH:
18096 		case BPF_MAP_TYPE_ARRAY:
18097 		case BPF_MAP_TYPE_PERCPU_HASH:
18098 		case BPF_MAP_TYPE_PERCPU_ARRAY:
18099 		case BPF_MAP_TYPE_LRU_PERCPU_HASH:
18100 		case BPF_MAP_TYPE_ARRAY_OF_MAPS:
18101 		case BPF_MAP_TYPE_HASH_OF_MAPS:
18102 		case BPF_MAP_TYPE_RINGBUF:
18103 		case BPF_MAP_TYPE_USER_RINGBUF:
18104 		case BPF_MAP_TYPE_INODE_STORAGE:
18105 		case BPF_MAP_TYPE_SK_STORAGE:
18106 		case BPF_MAP_TYPE_TASK_STORAGE:
18107 		case BPF_MAP_TYPE_CGRP_STORAGE:
18108 		case BPF_MAP_TYPE_QUEUE:
18109 		case BPF_MAP_TYPE_STACK:
18110 		case BPF_MAP_TYPE_ARENA:
18111 		case BPF_MAP_TYPE_INSN_ARRAY:
18112 		case BPF_MAP_TYPE_PROG_ARRAY:
18113 			break;
18114 		default:
18115 			verbose(env,
18116 				"Sleepable programs can only use array, hash, ringbuf and local storage maps\n");
18117 			return -EINVAL;
18118 		}
18119 
18120 	if (bpf_map_is_cgroup_storage(map) &&
18121 	    bpf_cgroup_storage_assign(env->prog->aux, map)) {
18122 		verbose(env, "only one cgroup storage of each type is allowed\n");
18123 		return -EBUSY;
18124 	}
18125 
18126 	if (map->map_type == BPF_MAP_TYPE_ARENA) {
18127 		if (env->prog->aux->arena) {
18128 			verbose(env, "Only one arena per program\n");
18129 			return -EBUSY;
18130 		}
18131 		if (!env->allow_ptr_leaks || !env->bpf_capable) {
18132 			verbose(env, "CAP_BPF and CAP_PERFMON are required to use arena\n");
18133 			return -EPERM;
18134 		}
18135 		if (!env->prog->jit_requested) {
18136 			verbose(env, "JIT is required to use arena\n");
18137 			return -EOPNOTSUPP;
18138 		}
18139 		if (!bpf_jit_supports_arena()) {
18140 			verbose(env, "JIT doesn't support arena\n");
18141 			return -EOPNOTSUPP;
18142 		}
18143 		env->prog->aux->arena = (void *)map;
18144 		if (!bpf_arena_get_user_vm_start(env->prog->aux->arena)) {
18145 			verbose(env, "arena's user address must be set via map_extra or mmap()\n");
18146 			return -EINVAL;
18147 		}
18148 	}
18149 
18150 	return 0;
18151 }
18152 
__add_used_map(struct bpf_verifier_env * env,struct bpf_map * map)18153 static int __add_used_map(struct bpf_verifier_env *env, struct bpf_map *map)
18154 {
18155 	int i, err;
18156 
18157 	/* check whether we recorded this map already */
18158 	for (i = 0; i < env->used_map_cnt; i++)
18159 		if (env->used_maps[i] == map)
18160 			return i;
18161 
18162 	if (env->used_map_cnt >= MAX_USED_MAPS) {
18163 		verbose(env, "The total number of maps per program has reached the limit of %u\n",
18164 			MAX_USED_MAPS);
18165 		return -E2BIG;
18166 	}
18167 
18168 	err = check_map_prog_compatibility(env, map, env->prog);
18169 	if (err)
18170 		return err;
18171 
18172 	if (env->prog->sleepable)
18173 		atomic64_inc(&map->sleepable_refcnt);
18174 
18175 	/* hold the map. If the program is rejected by verifier,
18176 	 * the map will be released by release_maps() or it
18177 	 * will be used by the valid program until it's unloaded
18178 	 * and all maps are released in bpf_free_used_maps()
18179 	 */
18180 	bpf_map_inc(map);
18181 
18182 	env->used_maps[env->used_map_cnt++] = map;
18183 
18184 	if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY) {
18185 		err = bpf_insn_array_init(map, env->prog);
18186 		if (err) {
18187 			verbose(env, "Failed to properly initialize insn array\n");
18188 			return err;
18189 		}
18190 		env->insn_array_maps[env->insn_array_map_cnt++] = map;
18191 	}
18192 
18193 	return env->used_map_cnt - 1;
18194 }
18195 
18196 /* Add map behind fd to used maps list, if it's not already there, and return
18197  * its index.
18198  * Returns <0 on error, or >= 0 index, on success.
18199  */
add_used_map(struct bpf_verifier_env * env,int fd)18200 static int add_used_map(struct bpf_verifier_env *env, int fd)
18201 {
18202 	struct bpf_map *map;
18203 	CLASS(fd, f)(fd);
18204 
18205 	map = __bpf_map_get(f);
18206 	if (IS_ERR(map)) {
18207 		verbose(env, "fd %d is not pointing to valid bpf_map\n", fd);
18208 		return PTR_ERR(map);
18209 	}
18210 
18211 	return __add_used_map(env, map);
18212 }
18213 
check_alu_fields(struct bpf_verifier_env * env,struct bpf_insn * insn)18214 static int check_alu_fields(struct bpf_verifier_env *env, struct bpf_insn *insn)
18215 {
18216 	u8 class = BPF_CLASS(insn->code);
18217 	u8 opcode = BPF_OP(insn->code);
18218 
18219 	switch (opcode) {
18220 	case BPF_NEG:
18221 		if (BPF_SRC(insn->code) != BPF_K || insn->src_reg != BPF_REG_0 ||
18222 		    insn->off != 0 || insn->imm != 0) {
18223 			verbose(env, "BPF_NEG uses reserved fields\n");
18224 			return -EINVAL;
18225 		}
18226 		return 0;
18227 	case BPF_END:
18228 		if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
18229 		    (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
18230 		    (class == BPF_ALU64 && BPF_SRC(insn->code) != BPF_TO_LE)) {
18231 			verbose(env, "BPF_END uses reserved fields\n");
18232 			return -EINVAL;
18233 		}
18234 		return 0;
18235 	case BPF_MOV:
18236 		if (BPF_SRC(insn->code) == BPF_X) {
18237 			if (class == BPF_ALU) {
18238 				if ((insn->off != 0 && insn->off != 8 && insn->off != 16) ||
18239 				    insn->imm) {
18240 					verbose(env, "BPF_MOV uses reserved fields\n");
18241 					return -EINVAL;
18242 				}
18243 			} else if (insn->off == BPF_ADDR_SPACE_CAST) {
18244 				if (insn->imm != 1 && insn->imm != 1u << 16) {
18245 					verbose(env, "addr_space_cast insn can only convert between address space 1 and 0\n");
18246 					return -EINVAL;
18247 				}
18248 			} else if ((insn->off != 0 && insn->off != 8 &&
18249 				    insn->off != 16 && insn->off != 32) || insn->imm) {
18250 				verbose(env, "BPF_MOV uses reserved fields\n");
18251 				return -EINVAL;
18252 			}
18253 		} else if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
18254 			verbose(env, "BPF_MOV uses reserved fields\n");
18255 			return -EINVAL;
18256 		}
18257 		return 0;
18258 	case BPF_ADD:
18259 	case BPF_SUB:
18260 	case BPF_AND:
18261 	case BPF_OR:
18262 	case BPF_XOR:
18263 	case BPF_LSH:
18264 	case BPF_RSH:
18265 	case BPF_ARSH:
18266 	case BPF_MUL:
18267 	case BPF_DIV:
18268 	case BPF_MOD:
18269 		if (BPF_SRC(insn->code) == BPF_X) {
18270 			if (insn->imm != 0 || (insn->off != 0 && insn->off != 1) ||
18271 			    (insn->off == 1 && opcode != BPF_MOD && opcode != BPF_DIV)) {
18272 				verbose(env, "BPF_ALU uses reserved fields\n");
18273 				return -EINVAL;
18274 			}
18275 		} else if (insn->src_reg != BPF_REG_0 ||
18276 			   (insn->off != 0 && insn->off != 1) ||
18277 			   (insn->off == 1 && opcode != BPF_MOD && opcode != BPF_DIV)) {
18278 			verbose(env, "BPF_ALU uses reserved fields\n");
18279 			return -EINVAL;
18280 		}
18281 		return 0;
18282 	default:
18283 		verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
18284 		return -EINVAL;
18285 	}
18286 }
18287 
check_jmp_fields(struct bpf_verifier_env * env,struct bpf_insn * insn)18288 static int check_jmp_fields(struct bpf_verifier_env *env, struct bpf_insn *insn)
18289 {
18290 	u8 class = BPF_CLASS(insn->code);
18291 	u8 opcode = BPF_OP(insn->code);
18292 
18293 	switch (opcode) {
18294 	case BPF_CALL:
18295 		if (BPF_SRC(insn->code) != BPF_K ||
18296 		    (insn->src_reg != BPF_PSEUDO_KFUNC_CALL && insn->off != 0) ||
18297 		    (insn->src_reg != BPF_REG_0 && insn->src_reg != BPF_PSEUDO_CALL &&
18298 		     insn->src_reg != BPF_PSEUDO_KFUNC_CALL) ||
18299 		    insn->dst_reg != BPF_REG_0 || class == BPF_JMP32) {
18300 			verbose(env, "BPF_CALL uses reserved fields\n");
18301 			return -EINVAL;
18302 		}
18303 		return 0;
18304 	case BPF_JA:
18305 		if (BPF_SRC(insn->code) == BPF_X) {
18306 			if (insn->src_reg != BPF_REG_0 || insn->imm != 0 || insn->off != 0) {
18307 				verbose(env, "BPF_JA|BPF_X uses reserved fields\n");
18308 				return -EINVAL;
18309 			}
18310 		} else if (insn->src_reg != BPF_REG_0 || insn->dst_reg != BPF_REG_0 ||
18311 			   (class == BPF_JMP && insn->imm != 0) ||
18312 			   (class == BPF_JMP32 && insn->off != 0)) {
18313 			verbose(env, "BPF_JA uses reserved fields\n");
18314 			return -EINVAL;
18315 		}
18316 		return 0;
18317 	case BPF_EXIT:
18318 		if (BPF_SRC(insn->code) != BPF_K || insn->imm != 0 ||
18319 		    insn->src_reg != BPF_REG_0 || insn->dst_reg != BPF_REG_0 ||
18320 		    class == BPF_JMP32) {
18321 			verbose(env, "BPF_EXIT uses reserved fields\n");
18322 			return -EINVAL;
18323 		}
18324 		return 0;
18325 	case BPF_JCOND:
18326 		if (insn->code != (BPF_JMP | BPF_JCOND) || insn->src_reg != BPF_MAY_GOTO ||
18327 		    insn->dst_reg || insn->imm) {
18328 			verbose(env, "invalid may_goto imm %d\n", insn->imm);
18329 			return -EINVAL;
18330 		}
18331 		return 0;
18332 	default:
18333 		if (BPF_SRC(insn->code) == BPF_X) {
18334 			if (insn->imm != 0) {
18335 				verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
18336 				return -EINVAL;
18337 			}
18338 		} else if (insn->src_reg != BPF_REG_0) {
18339 			verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
18340 			return -EINVAL;
18341 		}
18342 		return 0;
18343 	}
18344 }
18345 
check_insn_fields(struct bpf_verifier_env * env,struct bpf_insn * insn)18346 static int check_insn_fields(struct bpf_verifier_env *env, struct bpf_insn *insn)
18347 {
18348 	switch (BPF_CLASS(insn->code)) {
18349 	case BPF_ALU:
18350 	case BPF_ALU64:
18351 		return check_alu_fields(env, insn);
18352 	case BPF_LDX:
18353 		if ((BPF_MODE(insn->code) != BPF_MEM && BPF_MODE(insn->code) != BPF_MEMSX) ||
18354 		    insn->imm != 0) {
18355 			verbose(env, "BPF_LDX uses reserved fields\n");
18356 			return -EINVAL;
18357 		}
18358 		return 0;
18359 	case BPF_STX:
18360 		if (BPF_MODE(insn->code) == BPF_ATOMIC)
18361 			return 0;
18362 		if (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0) {
18363 			verbose(env, "BPF_STX uses reserved fields\n");
18364 			return -EINVAL;
18365 		}
18366 		return 0;
18367 	case BPF_ST:
18368 		if (BPF_MODE(insn->code) != BPF_MEM || insn->src_reg != BPF_REG_0) {
18369 			verbose(env, "BPF_ST uses reserved fields\n");
18370 			return -EINVAL;
18371 		}
18372 		return 0;
18373 	case BPF_JMP:
18374 	case BPF_JMP32:
18375 		return check_jmp_fields(env, insn);
18376 	case BPF_LD: {
18377 		u8 mode = BPF_MODE(insn->code);
18378 
18379 		if (mode == BPF_ABS || mode == BPF_IND) {
18380 			if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
18381 			    BPF_SIZE(insn->code) == BPF_DW ||
18382 			    (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
18383 				verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
18384 				return -EINVAL;
18385 			}
18386 		} else if (mode != BPF_IMM) {
18387 			verbose(env, "invalid BPF_LD mode\n");
18388 			return -EINVAL;
18389 		}
18390 		return 0;
18391 	}
18392 	default:
18393 		verbose(env, "unknown insn class %d\n", BPF_CLASS(insn->code));
18394 		return -EINVAL;
18395 	}
18396 }
18397 
18398 /*
18399  * Check that insns are sane and rewrite pseudo imm in ld_imm64 instructions:
18400  *
18401  * 1. if it accesses map FD, replace it with actual map pointer.
18402  * 2. if it accesses btf_id of a VAR, replace it with pointer to the var.
18403  *
18404  * NOTE: btf_vmlinux is required for converting pseudo btf_id.
18405  */
check_and_resolve_insns(struct bpf_verifier_env * env)18406 static int check_and_resolve_insns(struct bpf_verifier_env *env)
18407 {
18408 	struct bpf_insn *insn = env->prog->insnsi;
18409 	int insn_cnt = env->prog->len;
18410 	int i, err;
18411 
18412 	err = bpf_prog_calc_tag(env->prog);
18413 	if (err)
18414 		return err;
18415 
18416 	for (i = 0; i < insn_cnt; i++, insn++) {
18417 		if (insn->dst_reg >= MAX_BPF_REG) {
18418 			verbose(env, "R%d is invalid\n", insn->dst_reg);
18419 			return -EINVAL;
18420 		}
18421 		if (insn->src_reg >= MAX_BPF_REG) {
18422 			verbose(env, "R%d is invalid\n", insn->src_reg);
18423 			return -EINVAL;
18424 		}
18425 		if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
18426 			struct bpf_insn_aux_data *aux;
18427 			struct bpf_map *map;
18428 			int map_idx;
18429 			u64 addr;
18430 			u32 fd;
18431 
18432 			if (i == insn_cnt - 1 || insn[1].code != 0 ||
18433 			    insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
18434 			    insn[1].off != 0) {
18435 				verbose(env, "invalid bpf_ld_imm64 insn\n");
18436 				return -EINVAL;
18437 			}
18438 
18439 			if (insn[0].off != 0) {
18440 				verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
18441 				return -EINVAL;
18442 			}
18443 
18444 			if (insn[0].src_reg == 0)
18445 				/* valid generic load 64-bit imm */
18446 				goto next_insn;
18447 
18448 			if (insn[0].src_reg == BPF_PSEUDO_BTF_ID) {
18449 				aux = &env->insn_aux_data[i];
18450 				err = check_pseudo_btf_id(env, insn, aux);
18451 				if (err)
18452 					return err;
18453 				goto next_insn;
18454 			}
18455 
18456 			if (insn[0].src_reg == BPF_PSEUDO_FUNC) {
18457 				aux = &env->insn_aux_data[i];
18458 				aux->ptr_type = PTR_TO_FUNC;
18459 				goto next_insn;
18460 			}
18461 
18462 			/* In final convert_pseudo_ld_imm64() step, this is
18463 			 * converted into regular 64-bit imm load insn.
18464 			 */
18465 			switch (insn[0].src_reg) {
18466 			case BPF_PSEUDO_MAP_VALUE:
18467 			case BPF_PSEUDO_MAP_IDX_VALUE:
18468 				break;
18469 			case BPF_PSEUDO_MAP_FD:
18470 			case BPF_PSEUDO_MAP_IDX:
18471 				if (insn[1].imm == 0)
18472 					break;
18473 				fallthrough;
18474 			default:
18475 				verbose(env, "unrecognized bpf_ld_imm64 insn\n");
18476 				return -EINVAL;
18477 			}
18478 
18479 			switch (insn[0].src_reg) {
18480 			case BPF_PSEUDO_MAP_IDX_VALUE:
18481 			case BPF_PSEUDO_MAP_IDX:
18482 				if (bpfptr_is_null(env->fd_array)) {
18483 					verbose(env, "fd_idx without fd_array is invalid\n");
18484 					return -EPROTO;
18485 				}
18486 				if (copy_from_bpfptr_offset(&fd, env->fd_array,
18487 							    insn[0].imm * sizeof(fd),
18488 							    sizeof(fd)))
18489 					return -EFAULT;
18490 				break;
18491 			default:
18492 				fd = insn[0].imm;
18493 				break;
18494 			}
18495 
18496 			map_idx = add_used_map(env, fd);
18497 			if (map_idx < 0)
18498 				return map_idx;
18499 			map = env->used_maps[map_idx];
18500 
18501 			aux = &env->insn_aux_data[i];
18502 			aux->map_index = map_idx;
18503 
18504 			if (insn[0].src_reg == BPF_PSEUDO_MAP_FD ||
18505 			    insn[0].src_reg == BPF_PSEUDO_MAP_IDX) {
18506 				addr = (unsigned long)map;
18507 			} else {
18508 				u32 off = insn[1].imm;
18509 
18510 				if (!map->ops->map_direct_value_addr) {
18511 					verbose(env, "no direct value access support for this map type\n");
18512 					return -EINVAL;
18513 				}
18514 
18515 				err = map->ops->map_direct_value_addr(map, &addr, off);
18516 				if (err) {
18517 					verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n",
18518 						map->value_size, off);
18519 					return err;
18520 				}
18521 
18522 				aux->map_off = off;
18523 				addr += off;
18524 			}
18525 
18526 			insn[0].imm = (u32)addr;
18527 			insn[1].imm = addr >> 32;
18528 
18529 next_insn:
18530 			insn++;
18531 			i++;
18532 			continue;
18533 		}
18534 
18535 		/* Basic sanity check before we invest more work here. */
18536 		if (!bpf_opcode_in_insntable(insn->code)) {
18537 			verbose(env, "unknown opcode %02x\n", insn->code);
18538 			return -EINVAL;
18539 		}
18540 
18541 		err = check_insn_fields(env, insn);
18542 		if (err)
18543 			return err;
18544 	}
18545 
18546 	/* now all pseudo BPF_LD_IMM64 instructions load valid
18547 	 * 'struct bpf_map *' into a register instead of user map_fd.
18548 	 * These pointers will be used later by verifier to validate map access.
18549 	 */
18550 	return 0;
18551 }
18552 
18553 /* drop refcnt of maps used by the rejected program */
release_maps(struct bpf_verifier_env * env)18554 static void release_maps(struct bpf_verifier_env *env)
18555 {
18556 	__bpf_free_used_maps(env->prog->aux, env->used_maps,
18557 			     env->used_map_cnt);
18558 }
18559 
18560 /* drop refcnt of maps used by the rejected program */
release_btfs(struct bpf_verifier_env * env)18561 static void release_btfs(struct bpf_verifier_env *env)
18562 {
18563 	__bpf_free_used_btfs(env->used_btfs, env->used_btf_cnt);
18564 }
18565 
18566 /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
convert_pseudo_ld_imm64(struct bpf_verifier_env * env)18567 static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
18568 {
18569 	struct bpf_insn *insn = env->prog->insnsi;
18570 	int insn_cnt = env->prog->len;
18571 	int i;
18572 
18573 	for (i = 0; i < insn_cnt; i++, insn++) {
18574 		if (insn->code != (BPF_LD | BPF_IMM | BPF_DW))
18575 			continue;
18576 		if (insn->src_reg == BPF_PSEUDO_FUNC)
18577 			continue;
18578 		insn->src_reg = 0;
18579 	}
18580 }
18581 
release_insn_arrays(struct bpf_verifier_env * env)18582 static void release_insn_arrays(struct bpf_verifier_env *env)
18583 {
18584 	int i;
18585 
18586 	for (i = 0; i < env->insn_array_map_cnt; i++)
18587 		bpf_insn_array_release(env->insn_array_maps[i]);
18588 }
18589 
18590 
18591 
18592 /* The verifier does more data flow analysis than llvm and will not
18593  * explore branches that are dead at run time. Malicious programs can
18594  * have dead code too. Therefore replace all dead at-run-time code
18595  * with 'ja -1'.
18596  *
18597  * Just nops are not optimal, e.g. if they would sit at the end of the
18598  * program and through another bug we would manage to jump there, then
18599  * we'd execute beyond program memory otherwise. Returning exception
18600  * code also wouldn't work since we can have subprogs where the dead
18601  * code could be located.
18602  */
sanitize_dead_code(struct bpf_verifier_env * env)18603 static void sanitize_dead_code(struct bpf_verifier_env *env)
18604 {
18605 	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
18606 	struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
18607 	struct bpf_insn *insn = env->prog->insnsi;
18608 	const int insn_cnt = env->prog->len;
18609 	int i;
18610 
18611 	for (i = 0; i < insn_cnt; i++) {
18612 		if (aux_data[i].seen)
18613 			continue;
18614 		memcpy(insn + i, &trap, sizeof(trap));
18615 		aux_data[i].zext_dst = false;
18616 	}
18617 }
18618 
18619 
18620 
free_states(struct bpf_verifier_env * env)18621 static void free_states(struct bpf_verifier_env *env)
18622 {
18623 	struct bpf_verifier_state_list *sl;
18624 	struct list_head *head, *pos, *tmp;
18625 	struct bpf_scc_info *info;
18626 	int i, j;
18627 
18628 	bpf_free_verifier_state(env->cur_state, true);
18629 	env->cur_state = NULL;
18630 	while (!pop_stack(env, NULL, NULL, false));
18631 
18632 	list_for_each_safe(pos, tmp, &env->free_list) {
18633 		sl = container_of(pos, struct bpf_verifier_state_list, node);
18634 		bpf_free_verifier_state(&sl->state, false);
18635 		kfree(sl);
18636 	}
18637 	INIT_LIST_HEAD(&env->free_list);
18638 
18639 	for (i = 0; i < env->scc_cnt; ++i) {
18640 		info = env->scc_info[i];
18641 		if (!info)
18642 			continue;
18643 		for (j = 0; j < info->num_visits; j++)
18644 			bpf_free_backedges(&info->visits[j]);
18645 		kvfree(info);
18646 		env->scc_info[i] = NULL;
18647 	}
18648 
18649 	if (!env->explored_states)
18650 		return;
18651 
18652 	for (i = 0; i < state_htab_size(env); i++) {
18653 		head = &env->explored_states[i];
18654 
18655 		list_for_each_safe(pos, tmp, head) {
18656 			sl = container_of(pos, struct bpf_verifier_state_list, node);
18657 			bpf_free_verifier_state(&sl->state, false);
18658 			kfree(sl);
18659 		}
18660 		INIT_LIST_HEAD(&env->explored_states[i]);
18661 	}
18662 }
18663 
do_check_common(struct bpf_verifier_env * env,int subprog)18664 static int do_check_common(struct bpf_verifier_env *env, int subprog)
18665 {
18666 	bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
18667 	struct bpf_subprog_info *sub = subprog_info(env, subprog);
18668 	struct bpf_prog_aux *aux = env->prog->aux;
18669 	struct bpf_verifier_state *state;
18670 	struct bpf_reg_state *regs;
18671 	int ret, i;
18672 
18673 	env->prev_linfo = NULL;
18674 	env->pass_cnt++;
18675 
18676 	state = kzalloc_obj(struct bpf_verifier_state, GFP_KERNEL_ACCOUNT);
18677 	if (!state)
18678 		return -ENOMEM;
18679 	state->curframe = 0;
18680 	state->speculative = false;
18681 	state->branches = 1;
18682 	state->in_sleepable = env->prog->sleepable;
18683 	state->frame[0] = kzalloc_obj(struct bpf_func_state, GFP_KERNEL_ACCOUNT);
18684 	if (!state->frame[0]) {
18685 		kfree(state);
18686 		return -ENOMEM;
18687 	}
18688 	env->cur_state = state;
18689 	init_func_state(env, state->frame[0],
18690 			BPF_MAIN_FUNC /* callsite */,
18691 			0 /* frameno */,
18692 			subprog);
18693 	state->first_insn_idx = env->subprog_info[subprog].start;
18694 	state->last_insn_idx = -1;
18695 
18696 	regs = state->frame[state->curframe]->regs;
18697 	if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) {
18698 		const char *sub_name = subprog_name(env, subprog);
18699 		struct bpf_subprog_arg_info *arg;
18700 		struct bpf_reg_state *reg;
18701 
18702 		if (env->log.level & BPF_LOG_LEVEL)
18703 			verbose(env, "Validating %s() func#%d...\n", sub_name, subprog);
18704 		ret = btf_prepare_func_args(env, subprog);
18705 		if (ret)
18706 			goto out;
18707 
18708 		if (subprog_is_exc_cb(env, subprog)) {
18709 			state->frame[0]->in_exception_callback_fn = true;
18710 
18711 			/*
18712 			 * Global functions are scalar or void, make sure
18713 			 * we return a scalar.
18714 			 */
18715 			if (subprog_returns_void(env, subprog)) {
18716 				verbose(env, "exception cb cannot return void\n");
18717 				ret = -EINVAL;
18718 				goto out;
18719 			}
18720 
18721 			/* Also ensure the callback only has a single scalar argument. */
18722 			if (sub->arg_cnt != 1 || sub->args[0].arg_type != ARG_ANYTHING) {
18723 				verbose(env, "exception cb only supports single integer argument\n");
18724 				ret = -EINVAL;
18725 				goto out;
18726 			}
18727 		}
18728 		for (i = BPF_REG_1; i <= sub->arg_cnt; i++) {
18729 			arg = &sub->args[i - BPF_REG_1];
18730 			reg = &regs[i];
18731 
18732 			if (arg->arg_type == ARG_PTR_TO_CTX) {
18733 				reg->type = PTR_TO_CTX;
18734 				mark_reg_known_zero(env, regs, i);
18735 			} else if (arg->arg_type == ARG_ANYTHING) {
18736 				reg->type = SCALAR_VALUE;
18737 				mark_reg_unknown(env, regs, i);
18738 			} else if (arg->arg_type == (ARG_PTR_TO_DYNPTR | MEM_RDONLY)) {
18739 				/* assume unspecial LOCAL dynptr type */
18740 				__mark_dynptr_reg(reg, BPF_DYNPTR_TYPE_LOCAL, true, ++env->id_gen);
18741 			} else if (base_type(arg->arg_type) == ARG_PTR_TO_MEM) {
18742 				reg->type = PTR_TO_MEM;
18743 				reg->type |= arg->arg_type &
18744 					     (PTR_MAYBE_NULL | PTR_UNTRUSTED | MEM_RDONLY);
18745 				mark_reg_known_zero(env, regs, i);
18746 				reg->mem_size = arg->mem_size;
18747 				if (arg->arg_type & PTR_MAYBE_NULL)
18748 					reg->id = ++env->id_gen;
18749 			} else if (base_type(arg->arg_type) == ARG_PTR_TO_BTF_ID) {
18750 				reg->type = PTR_TO_BTF_ID;
18751 				if (arg->arg_type & PTR_MAYBE_NULL)
18752 					reg->type |= PTR_MAYBE_NULL;
18753 				if (arg->arg_type & PTR_UNTRUSTED)
18754 					reg->type |= PTR_UNTRUSTED;
18755 				if (arg->arg_type & PTR_TRUSTED)
18756 					reg->type |= PTR_TRUSTED;
18757 				mark_reg_known_zero(env, regs, i);
18758 				reg->btf = bpf_get_btf_vmlinux(); /* can't fail at this point */
18759 				reg->btf_id = arg->btf_id;
18760 				reg->id = ++env->id_gen;
18761 			} else if (base_type(arg->arg_type) == ARG_PTR_TO_ARENA) {
18762 				/* caller can pass either PTR_TO_ARENA or SCALAR */
18763 				mark_reg_unknown(env, regs, i);
18764 			} else {
18765 				verifier_bug(env, "unhandled arg#%d type %d",
18766 					     i - BPF_REG_1, arg->arg_type);
18767 				ret = -EFAULT;
18768 				goto out;
18769 			}
18770 		}
18771 	} else {
18772 		/* if main BPF program has associated BTF info, validate that
18773 		 * it's matching expected signature, and otherwise mark BTF
18774 		 * info for main program as unreliable
18775 		 */
18776 		if (env->prog->aux->func_info_aux) {
18777 			ret = btf_prepare_func_args(env, 0);
18778 			if (ret || sub->arg_cnt != 1 || sub->args[0].arg_type != ARG_PTR_TO_CTX)
18779 				env->prog->aux->func_info_aux[0].unreliable = true;
18780 		}
18781 
18782 		/* 1st arg to a function */
18783 		regs[BPF_REG_1].type = PTR_TO_CTX;
18784 		mark_reg_known_zero(env, regs, BPF_REG_1);
18785 	}
18786 
18787 	/* Acquire references for struct_ops program arguments tagged with "__ref" */
18788 	if (!subprog && env->prog->type == BPF_PROG_TYPE_STRUCT_OPS) {
18789 		for (i = 0; i < aux->ctx_arg_info_size; i++)
18790 			aux->ctx_arg_info[i].ref_obj_id = aux->ctx_arg_info[i].refcounted ?
18791 							  acquire_reference(env, 0) : 0;
18792 	}
18793 
18794 	ret = do_check(env);
18795 out:
18796 	if (!ret && pop_log)
18797 		bpf_vlog_reset(&env->log, 0);
18798 	free_states(env);
18799 	return ret;
18800 }
18801 
18802 /* Lazily verify all global functions based on their BTF, if they are called
18803  * from main BPF program or any of subprograms transitively.
18804  * BPF global subprogs called from dead code are not validated.
18805  * All callable global functions must pass verification.
18806  * Otherwise the whole program is rejected.
18807  * Consider:
18808  * int bar(int);
18809  * int foo(int f)
18810  * {
18811  *    return bar(f);
18812  * }
18813  * int bar(int b)
18814  * {
18815  *    ...
18816  * }
18817  * foo() will be verified first for R1=any_scalar_value. During verification it
18818  * will be assumed that bar() already verified successfully and call to bar()
18819  * from foo() will be checked for type match only. Later bar() will be verified
18820  * independently to check that it's safe for R1=any_scalar_value.
18821  */
do_check_subprogs(struct bpf_verifier_env * env)18822 static int do_check_subprogs(struct bpf_verifier_env *env)
18823 {
18824 	struct bpf_prog_aux *aux = env->prog->aux;
18825 	struct bpf_func_info_aux *sub_aux;
18826 	int i, ret, new_cnt;
18827 
18828 	if (!aux->func_info)
18829 		return 0;
18830 
18831 	/* exception callback is presumed to be always called */
18832 	if (env->exception_callback_subprog)
18833 		subprog_aux(env, env->exception_callback_subprog)->called = true;
18834 
18835 again:
18836 	new_cnt = 0;
18837 	for (i = 1; i < env->subprog_cnt; i++) {
18838 		if (!bpf_subprog_is_global(env, i))
18839 			continue;
18840 
18841 		sub_aux = subprog_aux(env, i);
18842 		if (!sub_aux->called || sub_aux->verified)
18843 			continue;
18844 
18845 		env->insn_idx = env->subprog_info[i].start;
18846 		WARN_ON_ONCE(env->insn_idx == 0);
18847 		ret = do_check_common(env, i);
18848 		if (ret) {
18849 			return ret;
18850 		} else if (env->log.level & BPF_LOG_LEVEL) {
18851 			verbose(env, "Func#%d ('%s') is safe for any args that match its prototype\n",
18852 				i, subprog_name(env, i));
18853 		}
18854 
18855 		/* We verified new global subprog, it might have called some
18856 		 * more global subprogs that we haven't verified yet, so we
18857 		 * need to do another pass over subprogs to verify those.
18858 		 */
18859 		sub_aux->verified = true;
18860 		new_cnt++;
18861 	}
18862 
18863 	/* We can't loop forever as we verify at least one global subprog on
18864 	 * each pass.
18865 	 */
18866 	if (new_cnt)
18867 		goto again;
18868 
18869 	return 0;
18870 }
18871 
do_check_main(struct bpf_verifier_env * env)18872 static int do_check_main(struct bpf_verifier_env *env)
18873 {
18874 	int ret;
18875 
18876 	env->insn_idx = 0;
18877 	ret = do_check_common(env, 0);
18878 	if (!ret)
18879 		env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
18880 	return ret;
18881 }
18882 
18883 
print_verification_stats(struct bpf_verifier_env * env)18884 static void print_verification_stats(struct bpf_verifier_env *env)
18885 {
18886 	int i;
18887 
18888 	if (env->log.level & BPF_LOG_STATS) {
18889 		verbose(env, "verification time %lld usec\n",
18890 			div_u64(env->verification_time, 1000));
18891 		verbose(env, "stack depth ");
18892 		for (i = 0; i < env->subprog_cnt; i++) {
18893 			u32 depth = env->subprog_info[i].stack_depth;
18894 
18895 			verbose(env, "%d", depth);
18896 			if (i + 1 < env->subprog_cnt)
18897 				verbose(env, "+");
18898 		}
18899 		verbose(env, "\n");
18900 	}
18901 	verbose(env, "processed %d insns (limit %d) max_states_per_insn %d "
18902 		"total_states %d peak_states %d mark_read %d\n",
18903 		env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS,
18904 		env->max_states_per_insn, env->total_states,
18905 		env->peak_states, env->longest_mark_read_walk);
18906 }
18907 
bpf_prog_ctx_arg_info_init(struct bpf_prog * prog,const struct bpf_ctx_arg_aux * info,u32 cnt)18908 int bpf_prog_ctx_arg_info_init(struct bpf_prog *prog,
18909 			       const struct bpf_ctx_arg_aux *info, u32 cnt)
18910 {
18911 	prog->aux->ctx_arg_info = kmemdup_array(info, cnt, sizeof(*info), GFP_KERNEL_ACCOUNT);
18912 	prog->aux->ctx_arg_info_size = cnt;
18913 
18914 	return prog->aux->ctx_arg_info ? 0 : -ENOMEM;
18915 }
18916 
check_struct_ops_btf_id(struct bpf_verifier_env * env)18917 static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
18918 {
18919 	const struct btf_type *t, *func_proto;
18920 	const struct bpf_struct_ops_desc *st_ops_desc;
18921 	const struct bpf_struct_ops *st_ops;
18922 	const struct btf_member *member;
18923 	struct bpf_prog *prog = env->prog;
18924 	bool has_refcounted_arg = false;
18925 	u32 btf_id, member_idx, member_off;
18926 	struct btf *btf;
18927 	const char *mname;
18928 	int i, err;
18929 
18930 	if (!prog->gpl_compatible) {
18931 		verbose(env, "struct ops programs must have a GPL compatible license\n");
18932 		return -EINVAL;
18933 	}
18934 
18935 	if (!prog->aux->attach_btf_id)
18936 		return -ENOTSUPP;
18937 
18938 	btf = prog->aux->attach_btf;
18939 	if (btf_is_module(btf)) {
18940 		/* Make sure st_ops is valid through the lifetime of env */
18941 		env->attach_btf_mod = btf_try_get_module(btf);
18942 		if (!env->attach_btf_mod) {
18943 			verbose(env, "struct_ops module %s is not found\n",
18944 				btf_get_name(btf));
18945 			return -ENOTSUPP;
18946 		}
18947 	}
18948 
18949 	btf_id = prog->aux->attach_btf_id;
18950 	st_ops_desc = bpf_struct_ops_find(btf, btf_id);
18951 	if (!st_ops_desc) {
18952 		verbose(env, "attach_btf_id %u is not a supported struct\n",
18953 			btf_id);
18954 		return -ENOTSUPP;
18955 	}
18956 	st_ops = st_ops_desc->st_ops;
18957 
18958 	t = st_ops_desc->type;
18959 	member_idx = prog->expected_attach_type;
18960 	if (member_idx >= btf_type_vlen(t)) {
18961 		verbose(env, "attach to invalid member idx %u of struct %s\n",
18962 			member_idx, st_ops->name);
18963 		return -EINVAL;
18964 	}
18965 
18966 	member = &btf_type_member(t)[member_idx];
18967 	mname = btf_name_by_offset(btf, member->name_off);
18968 	func_proto = btf_type_resolve_func_ptr(btf, member->type,
18969 					       NULL);
18970 	if (!func_proto) {
18971 		verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n",
18972 			mname, member_idx, st_ops->name);
18973 		return -EINVAL;
18974 	}
18975 
18976 	member_off = __btf_member_bit_offset(t, member) / 8;
18977 	err = bpf_struct_ops_supported(st_ops, member_off);
18978 	if (err) {
18979 		verbose(env, "attach to unsupported member %s of struct %s\n",
18980 			mname, st_ops->name);
18981 		return err;
18982 	}
18983 
18984 	if (st_ops->check_member) {
18985 		err = st_ops->check_member(t, member, prog);
18986 
18987 		if (err) {
18988 			verbose(env, "attach to unsupported member %s of struct %s\n",
18989 				mname, st_ops->name);
18990 			return err;
18991 		}
18992 	}
18993 
18994 	if (prog->aux->priv_stack_requested && !bpf_jit_supports_private_stack()) {
18995 		verbose(env, "Private stack not supported by jit\n");
18996 		return -EACCES;
18997 	}
18998 
18999 	for (i = 0; i < st_ops_desc->arg_info[member_idx].cnt; i++) {
19000 		if (st_ops_desc->arg_info[member_idx].info[i].refcounted) {
19001 			has_refcounted_arg = true;
19002 			break;
19003 		}
19004 	}
19005 
19006 	/* Tail call is not allowed for programs with refcounted arguments since we
19007 	 * cannot guarantee that valid refcounted kptrs will be passed to the callee.
19008 	 */
19009 	for (i = 0; i < env->subprog_cnt; i++) {
19010 		if (has_refcounted_arg && env->subprog_info[i].has_tail_call) {
19011 			verbose(env, "program with __ref argument cannot tail call\n");
19012 			return -EINVAL;
19013 		}
19014 	}
19015 
19016 	prog->aux->st_ops = st_ops;
19017 	prog->aux->attach_st_ops_member_off = member_off;
19018 
19019 	prog->aux->attach_func_proto = func_proto;
19020 	prog->aux->attach_func_name = mname;
19021 	env->ops = st_ops->verifier_ops;
19022 
19023 	return bpf_prog_ctx_arg_info_init(prog, st_ops_desc->arg_info[member_idx].info,
19024 					  st_ops_desc->arg_info[member_idx].cnt);
19025 }
19026 #define SECURITY_PREFIX "security_"
19027 
19028 #ifdef CONFIG_FUNCTION_ERROR_INJECTION
19029 
19030 /* list of non-sleepable functions that are otherwise on
19031  * ALLOW_ERROR_INJECTION list
19032  */
19033 BTF_SET_START(btf_non_sleepable_error_inject)
19034 /* Three functions below can be called from sleepable and non-sleepable context.
19035  * Assume non-sleepable from bpf safety point of view.
19036  */
BTF_ID(func,__filemap_add_folio)19037 BTF_ID(func, __filemap_add_folio)
19038 #ifdef CONFIG_FAIL_PAGE_ALLOC
19039 BTF_ID(func, should_fail_alloc_page)
19040 #endif
19041 #ifdef CONFIG_FAILSLAB
19042 BTF_ID(func, should_failslab)
19043 #endif
19044 BTF_SET_END(btf_non_sleepable_error_inject)
19045 
19046 static int check_non_sleepable_error_inject(u32 btf_id)
19047 {
19048 	return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id);
19049 }
19050 
check_attach_sleepable(u32 btf_id,unsigned long addr,const char * func_name)19051 static int check_attach_sleepable(u32 btf_id, unsigned long addr, const char *func_name)
19052 {
19053 	/* fentry/fexit/fmod_ret progs can be sleepable if they are
19054 	 * attached to ALLOW_ERROR_INJECTION and are not in denylist.
19055 	 */
19056 	if (!check_non_sleepable_error_inject(btf_id) &&
19057 	    within_error_injection_list(addr))
19058 		return 0;
19059 
19060 	return -EINVAL;
19061 }
19062 
check_attach_modify_return(unsigned long addr,const char * func_name)19063 static int check_attach_modify_return(unsigned long addr, const char *func_name)
19064 {
19065 	if (within_error_injection_list(addr) ||
19066 	    !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
19067 		return 0;
19068 
19069 	return -EINVAL;
19070 }
19071 
19072 #else
19073 
19074 /* Unfortunately, the arch-specific prefixes are hard-coded in arch syscall code
19075  * so we need to hard-code them, too. Ftrace has arch_syscall_match_sym_name()
19076  * but that just compares two concrete function names.
19077  */
has_arch_syscall_prefix(const char * func_name)19078 static bool has_arch_syscall_prefix(const char *func_name)
19079 {
19080 #if defined(__x86_64__)
19081 	return !strncmp(func_name, "__x64_", 6);
19082 #elif defined(__i386__)
19083 	return !strncmp(func_name, "__ia32_", 7);
19084 #elif defined(__s390x__)
19085 	return !strncmp(func_name, "__s390x_", 8);
19086 #elif defined(__aarch64__)
19087 	return !strncmp(func_name, "__arm64_", 8);
19088 #elif defined(__riscv)
19089 	return !strncmp(func_name, "__riscv_", 8);
19090 #elif defined(__powerpc__) || defined(__powerpc64__)
19091 	return !strncmp(func_name, "sys_", 4);
19092 #elif defined(__loongarch__)
19093 	return !strncmp(func_name, "sys_", 4);
19094 #else
19095 	return false;
19096 #endif
19097 }
19098 
19099 /* Without error injection, allow sleepable and fmod_ret progs on syscalls. */
19100 
check_attach_sleepable(u32 btf_id,unsigned long addr,const char * func_name)19101 static int check_attach_sleepable(u32 btf_id, unsigned long addr, const char *func_name)
19102 {
19103 	if (has_arch_syscall_prefix(func_name))
19104 		return 0;
19105 
19106 	return -EINVAL;
19107 }
19108 
check_attach_modify_return(unsigned long addr,const char * func_name)19109 static int check_attach_modify_return(unsigned long addr, const char *func_name)
19110 {
19111 	if (has_arch_syscall_prefix(func_name) ||
19112 	    !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
19113 		return 0;
19114 
19115 	return -EINVAL;
19116 }
19117 
19118 #endif /* CONFIG_FUNCTION_ERROR_INJECTION */
19119 
bpf_check_attach_target(struct bpf_verifier_log * log,const struct bpf_prog * prog,const struct bpf_prog * tgt_prog,u32 btf_id,struct bpf_attach_target_info * tgt_info)19120 int bpf_check_attach_target(struct bpf_verifier_log *log,
19121 			    const struct bpf_prog *prog,
19122 			    const struct bpf_prog *tgt_prog,
19123 			    u32 btf_id,
19124 			    struct bpf_attach_target_info *tgt_info)
19125 {
19126 	bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
19127 	bool prog_tracing = prog->type == BPF_PROG_TYPE_TRACING;
19128 	char trace_symbol[KSYM_SYMBOL_LEN];
19129 	const char prefix[] = "btf_trace_";
19130 	struct bpf_raw_event_map *btp;
19131 	int ret = 0, subprog = -1, i;
19132 	const struct btf_type *t;
19133 	bool conservative = true;
19134 	const char *tname, *fname;
19135 	struct btf *btf;
19136 	long addr = 0;
19137 	struct module *mod = NULL;
19138 
19139 	if (!btf_id) {
19140 		bpf_log(log, "Tracing programs must provide btf_id\n");
19141 		return -EINVAL;
19142 	}
19143 	btf = tgt_prog ? tgt_prog->aux->btf : prog->aux->attach_btf;
19144 	if (!btf) {
19145 		bpf_log(log,
19146 			"Tracing program can only be attached to another program annotated with BTF\n");
19147 		return -EINVAL;
19148 	}
19149 	t = btf_type_by_id(btf, btf_id);
19150 	if (!t) {
19151 		bpf_log(log, "attach_btf_id %u is invalid\n", btf_id);
19152 		return -EINVAL;
19153 	}
19154 	tname = btf_name_by_offset(btf, t->name_off);
19155 	if (!tname) {
19156 		bpf_log(log, "attach_btf_id %u doesn't have a name\n", btf_id);
19157 		return -EINVAL;
19158 	}
19159 	if (tgt_prog) {
19160 		struct bpf_prog_aux *aux = tgt_prog->aux;
19161 		bool tgt_changes_pkt_data;
19162 		bool tgt_might_sleep;
19163 
19164 		if (bpf_prog_is_dev_bound(prog->aux) &&
19165 		    !bpf_prog_dev_bound_match(prog, tgt_prog)) {
19166 			bpf_log(log, "Target program bound device mismatch");
19167 			return -EINVAL;
19168 		}
19169 
19170 		for (i = 0; i < aux->func_info_cnt; i++)
19171 			if (aux->func_info[i].type_id == btf_id) {
19172 				subprog = i;
19173 				break;
19174 			}
19175 		if (subprog == -1) {
19176 			bpf_log(log, "Subprog %s doesn't exist\n", tname);
19177 			return -EINVAL;
19178 		}
19179 		if (aux->func && aux->func[subprog]->aux->exception_cb) {
19180 			bpf_log(log,
19181 				"%s programs cannot attach to exception callback\n",
19182 				prog_extension ? "Extension" : "Tracing");
19183 			return -EINVAL;
19184 		}
19185 		conservative = aux->func_info_aux[subprog].unreliable;
19186 		if (prog_extension) {
19187 			if (conservative) {
19188 				bpf_log(log,
19189 					"Cannot replace static functions\n");
19190 				return -EINVAL;
19191 			}
19192 			if (!prog->jit_requested) {
19193 				bpf_log(log,
19194 					"Extension programs should be JITed\n");
19195 				return -EINVAL;
19196 			}
19197 			tgt_changes_pkt_data = aux->func
19198 					       ? aux->func[subprog]->aux->changes_pkt_data
19199 					       : aux->changes_pkt_data;
19200 			if (prog->aux->changes_pkt_data && !tgt_changes_pkt_data) {
19201 				bpf_log(log,
19202 					"Extension program changes packet data, while original does not\n");
19203 				return -EINVAL;
19204 			}
19205 
19206 			tgt_might_sleep = aux->func
19207 					  ? aux->func[subprog]->aux->might_sleep
19208 					  : aux->might_sleep;
19209 			if (prog->aux->might_sleep && !tgt_might_sleep) {
19210 				bpf_log(log,
19211 					"Extension program may sleep, while original does not\n");
19212 				return -EINVAL;
19213 			}
19214 		}
19215 		if (!tgt_prog->jited) {
19216 			bpf_log(log, "Can attach to only JITed progs\n");
19217 			return -EINVAL;
19218 		}
19219 		if (prog_tracing) {
19220 			if (aux->attach_tracing_prog) {
19221 				/*
19222 				 * Target program is an fentry/fexit which is already attached
19223 				 * to another tracing program. More levels of nesting
19224 				 * attachment are not allowed.
19225 				 */
19226 				bpf_log(log, "Cannot nest tracing program attach more than once\n");
19227 				return -EINVAL;
19228 			}
19229 		} else if (tgt_prog->type == prog->type) {
19230 			/*
19231 			 * To avoid potential call chain cycles, prevent attaching of a
19232 			 * program extension to another extension. It's ok to attach
19233 			 * fentry/fexit to extension program.
19234 			 */
19235 			bpf_log(log, "Cannot recursively attach\n");
19236 			return -EINVAL;
19237 		}
19238 		if (tgt_prog->type == BPF_PROG_TYPE_TRACING &&
19239 		    prog_extension &&
19240 		    (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY ||
19241 		     tgt_prog->expected_attach_type == BPF_TRACE_FEXIT ||
19242 		     tgt_prog->expected_attach_type == BPF_TRACE_FSESSION)) {
19243 			/* Program extensions can extend all program types
19244 			 * except fentry/fexit. The reason is the following.
19245 			 * The fentry/fexit programs are used for performance
19246 			 * analysis, stats and can be attached to any program
19247 			 * type. When extension program is replacing XDP function
19248 			 * it is necessary to allow performance analysis of all
19249 			 * functions. Both original XDP program and its program
19250 			 * extension. Hence attaching fentry/fexit to
19251 			 * BPF_PROG_TYPE_EXT is allowed. If extending of
19252 			 * fentry/fexit was allowed it would be possible to create
19253 			 * long call chain fentry->extension->fentry->extension
19254 			 * beyond reasonable stack size. Hence extending fentry
19255 			 * is not allowed.
19256 			 */
19257 			bpf_log(log, "Cannot extend fentry/fexit/fsession\n");
19258 			return -EINVAL;
19259 		}
19260 	} else {
19261 		if (prog_extension) {
19262 			bpf_log(log, "Cannot replace kernel functions\n");
19263 			return -EINVAL;
19264 		}
19265 	}
19266 
19267 	switch (prog->expected_attach_type) {
19268 	case BPF_TRACE_RAW_TP:
19269 		if (tgt_prog) {
19270 			bpf_log(log,
19271 				"Only FENTRY/FEXIT/FSESSION progs are attachable to another BPF prog\n");
19272 			return -EINVAL;
19273 		}
19274 		if (!btf_type_is_typedef(t)) {
19275 			bpf_log(log, "attach_btf_id %u is not a typedef\n",
19276 				btf_id);
19277 			return -EINVAL;
19278 		}
19279 		if (strncmp(prefix, tname, sizeof(prefix) - 1)) {
19280 			bpf_log(log, "attach_btf_id %u points to wrong type name %s\n",
19281 				btf_id, tname);
19282 			return -EINVAL;
19283 		}
19284 		tname += sizeof(prefix) - 1;
19285 
19286 		/* The func_proto of "btf_trace_##tname" is generated from typedef without argument
19287 		 * names. Thus using bpf_raw_event_map to get argument names.
19288 		 */
19289 		btp = bpf_get_raw_tracepoint(tname);
19290 		if (!btp)
19291 			return -EINVAL;
19292 		fname = kallsyms_lookup((unsigned long)btp->bpf_func, NULL, NULL, NULL,
19293 					trace_symbol);
19294 		bpf_put_raw_tracepoint(btp);
19295 
19296 		if (fname)
19297 			ret = btf_find_by_name_kind(btf, fname, BTF_KIND_FUNC);
19298 
19299 		if (!fname || ret < 0) {
19300 			bpf_log(log, "Cannot find btf of tracepoint template, fall back to %s%s.\n",
19301 				prefix, tname);
19302 			t = btf_type_by_id(btf, t->type);
19303 			if (!btf_type_is_ptr(t))
19304 				/* should never happen in valid vmlinux build */
19305 				return -EINVAL;
19306 		} else {
19307 			t = btf_type_by_id(btf, ret);
19308 			if (!btf_type_is_func(t))
19309 				/* should never happen in valid vmlinux build */
19310 				return -EINVAL;
19311 		}
19312 
19313 		t = btf_type_by_id(btf, t->type);
19314 		if (!btf_type_is_func_proto(t))
19315 			/* should never happen in valid vmlinux build */
19316 			return -EINVAL;
19317 
19318 		break;
19319 	case BPF_TRACE_ITER:
19320 		if (!btf_type_is_func(t)) {
19321 			bpf_log(log, "attach_btf_id %u is not a function\n",
19322 				btf_id);
19323 			return -EINVAL;
19324 		}
19325 		t = btf_type_by_id(btf, t->type);
19326 		if (!btf_type_is_func_proto(t))
19327 			return -EINVAL;
19328 		ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
19329 		if (ret)
19330 			return ret;
19331 		break;
19332 	default:
19333 		if (!prog_extension)
19334 			return -EINVAL;
19335 		fallthrough;
19336 	case BPF_MODIFY_RETURN:
19337 	case BPF_LSM_MAC:
19338 	case BPF_LSM_CGROUP:
19339 	case BPF_TRACE_FENTRY:
19340 	case BPF_TRACE_FEXIT:
19341 	case BPF_TRACE_FSESSION:
19342 		if (prog->expected_attach_type == BPF_TRACE_FSESSION &&
19343 		    !bpf_jit_supports_fsession()) {
19344 			bpf_log(log, "JIT does not support fsession\n");
19345 			return -EOPNOTSUPP;
19346 		}
19347 		if (!btf_type_is_func(t)) {
19348 			bpf_log(log, "attach_btf_id %u is not a function\n",
19349 				btf_id);
19350 			return -EINVAL;
19351 		}
19352 		if (prog_extension &&
19353 		    btf_check_type_match(log, prog, btf, t))
19354 			return -EINVAL;
19355 		t = btf_type_by_id(btf, t->type);
19356 		if (!btf_type_is_func_proto(t))
19357 			return -EINVAL;
19358 
19359 		if ((prog->aux->saved_dst_prog_type || prog->aux->saved_dst_attach_type) &&
19360 		    (!tgt_prog || prog->aux->saved_dst_prog_type != tgt_prog->type ||
19361 		     prog->aux->saved_dst_attach_type != tgt_prog->expected_attach_type))
19362 			return -EINVAL;
19363 
19364 		if (tgt_prog && conservative)
19365 			t = NULL;
19366 
19367 		ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
19368 		if (ret < 0)
19369 			return ret;
19370 
19371 		if (tgt_prog) {
19372 			if (subprog == 0)
19373 				addr = (long) tgt_prog->bpf_func;
19374 			else
19375 				addr = (long) tgt_prog->aux->func[subprog]->bpf_func;
19376 		} else {
19377 			if (btf_is_module(btf)) {
19378 				mod = btf_try_get_module(btf);
19379 				if (mod)
19380 					addr = find_kallsyms_symbol_value(mod, tname);
19381 				else
19382 					addr = 0;
19383 			} else {
19384 				addr = kallsyms_lookup_name(tname);
19385 			}
19386 			if (!addr) {
19387 				module_put(mod);
19388 				bpf_log(log,
19389 					"The address of function %s cannot be found\n",
19390 					tname);
19391 				return -ENOENT;
19392 			}
19393 		}
19394 
19395 		if (prog->sleepable) {
19396 			ret = -EINVAL;
19397 			switch (prog->type) {
19398 			case BPF_PROG_TYPE_TRACING:
19399 				if (!check_attach_sleepable(btf_id, addr, tname))
19400 					ret = 0;
19401 				/* fentry/fexit/fmod_ret progs can also be sleepable if they are
19402 				 * in the fmodret id set with the KF_SLEEPABLE flag.
19403 				 */
19404 				else {
19405 					u32 *flags = btf_kfunc_is_modify_return(btf, btf_id,
19406 										prog);
19407 
19408 					if (flags && (*flags & KF_SLEEPABLE))
19409 						ret = 0;
19410 				}
19411 				break;
19412 			case BPF_PROG_TYPE_LSM:
19413 				/* LSM progs check that they are attached to bpf_lsm_*() funcs.
19414 				 * Only some of them are sleepable.
19415 				 */
19416 				if (bpf_lsm_is_sleepable_hook(btf_id))
19417 					ret = 0;
19418 				break;
19419 			default:
19420 				break;
19421 			}
19422 			if (ret) {
19423 				module_put(mod);
19424 				bpf_log(log, "%s is not sleepable\n", tname);
19425 				return ret;
19426 			}
19427 		} else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
19428 			if (tgt_prog) {
19429 				module_put(mod);
19430 				bpf_log(log, "can't modify return codes of BPF programs\n");
19431 				return -EINVAL;
19432 			}
19433 			ret = -EINVAL;
19434 			if (btf_kfunc_is_modify_return(btf, btf_id, prog) ||
19435 			    !check_attach_modify_return(addr, tname))
19436 				ret = 0;
19437 			if (ret) {
19438 				module_put(mod);
19439 				bpf_log(log, "%s() is not modifiable\n", tname);
19440 				return ret;
19441 			}
19442 		}
19443 
19444 		break;
19445 	}
19446 	tgt_info->tgt_addr = addr;
19447 	tgt_info->tgt_name = tname;
19448 	tgt_info->tgt_type = t;
19449 	tgt_info->tgt_mod = mod;
19450 	return 0;
19451 }
19452 
BTF_SET_START(btf_id_deny)19453 BTF_SET_START(btf_id_deny)
19454 BTF_ID_UNUSED
19455 #ifdef CONFIG_SMP
19456 BTF_ID(func, ___migrate_enable)
19457 BTF_ID(func, migrate_disable)
19458 BTF_ID(func, migrate_enable)
19459 #endif
19460 #if !defined CONFIG_PREEMPT_RCU && !defined CONFIG_TINY_RCU
19461 BTF_ID(func, rcu_read_unlock_strict)
19462 #endif
19463 #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_TRACE_PREEMPT_TOGGLE)
19464 BTF_ID(func, preempt_count_add)
19465 BTF_ID(func, preempt_count_sub)
19466 #endif
19467 #ifdef CONFIG_PREEMPT_RCU
19468 BTF_ID(func, __rcu_read_lock)
19469 BTF_ID(func, __rcu_read_unlock)
19470 #endif
19471 BTF_SET_END(btf_id_deny)
19472 
19473 /* fexit and fmod_ret can't be used to attach to __noreturn functions.
19474  * Currently, we must manually list all __noreturn functions here. Once a more
19475  * robust solution is implemented, this workaround can be removed.
19476  */
19477 BTF_SET_START(noreturn_deny)
19478 #ifdef CONFIG_IA32_EMULATION
19479 BTF_ID(func, __ia32_sys_exit)
19480 BTF_ID(func, __ia32_sys_exit_group)
19481 #endif
19482 #ifdef CONFIG_KUNIT
19483 BTF_ID(func, __kunit_abort)
19484 BTF_ID(func, kunit_try_catch_throw)
19485 #endif
19486 #ifdef CONFIG_MODULES
19487 BTF_ID(func, __module_put_and_kthread_exit)
19488 #endif
19489 #ifdef CONFIG_X86_64
19490 BTF_ID(func, __x64_sys_exit)
19491 BTF_ID(func, __x64_sys_exit_group)
19492 #endif
19493 BTF_ID(func, do_exit)
19494 BTF_ID(func, do_group_exit)
19495 BTF_ID(func, kthread_complete_and_exit)
19496 BTF_ID(func, make_task_dead)
19497 BTF_SET_END(noreturn_deny)
19498 
19499 static bool can_be_sleepable(struct bpf_prog *prog)
19500 {
19501 	if (prog->type == BPF_PROG_TYPE_TRACING) {
19502 		switch (prog->expected_attach_type) {
19503 		case BPF_TRACE_FENTRY:
19504 		case BPF_TRACE_FEXIT:
19505 		case BPF_MODIFY_RETURN:
19506 		case BPF_TRACE_ITER:
19507 		case BPF_TRACE_FSESSION:
19508 			return true;
19509 		default:
19510 			return false;
19511 		}
19512 	}
19513 	return prog->type == BPF_PROG_TYPE_LSM ||
19514 	       prog->type == BPF_PROG_TYPE_KPROBE /* only for uprobes */ ||
19515 	       prog->type == BPF_PROG_TYPE_STRUCT_OPS;
19516 }
19517 
check_attach_btf_id(struct bpf_verifier_env * env)19518 static int check_attach_btf_id(struct bpf_verifier_env *env)
19519 {
19520 	struct bpf_prog *prog = env->prog;
19521 	struct bpf_prog *tgt_prog = prog->aux->dst_prog;
19522 	struct bpf_attach_target_info tgt_info = {};
19523 	u32 btf_id = prog->aux->attach_btf_id;
19524 	struct bpf_trampoline *tr;
19525 	int ret;
19526 	u64 key;
19527 
19528 	if (prog->type == BPF_PROG_TYPE_SYSCALL) {
19529 		if (prog->sleepable)
19530 			/* attach_btf_id checked to be zero already */
19531 			return 0;
19532 		verbose(env, "Syscall programs can only be sleepable\n");
19533 		return -EINVAL;
19534 	}
19535 
19536 	if (prog->sleepable && !can_be_sleepable(prog)) {
19537 		verbose(env, "Only fentry/fexit/fsession/fmod_ret, lsm, iter, uprobe, and struct_ops programs can be sleepable\n");
19538 		return -EINVAL;
19539 	}
19540 
19541 	if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
19542 		return check_struct_ops_btf_id(env);
19543 
19544 	if (prog->type != BPF_PROG_TYPE_TRACING &&
19545 	    prog->type != BPF_PROG_TYPE_LSM &&
19546 	    prog->type != BPF_PROG_TYPE_EXT)
19547 		return 0;
19548 
19549 	ret = bpf_check_attach_target(&env->log, prog, tgt_prog, btf_id, &tgt_info);
19550 	if (ret)
19551 		return ret;
19552 
19553 	if (tgt_prog && prog->type == BPF_PROG_TYPE_EXT) {
19554 		/* to make freplace equivalent to their targets, they need to
19555 		 * inherit env->ops and expected_attach_type for the rest of the
19556 		 * verification
19557 		 */
19558 		env->ops = bpf_verifier_ops[tgt_prog->type];
19559 		prog->expected_attach_type = tgt_prog->expected_attach_type;
19560 	}
19561 
19562 	/* store info about the attachment target that will be used later */
19563 	prog->aux->attach_func_proto = tgt_info.tgt_type;
19564 	prog->aux->attach_func_name = tgt_info.tgt_name;
19565 	prog->aux->mod = tgt_info.tgt_mod;
19566 
19567 	if (tgt_prog) {
19568 		prog->aux->saved_dst_prog_type = tgt_prog->type;
19569 		prog->aux->saved_dst_attach_type = tgt_prog->expected_attach_type;
19570 	}
19571 
19572 	if (prog->expected_attach_type == BPF_TRACE_RAW_TP) {
19573 		prog->aux->attach_btf_trace = true;
19574 		return 0;
19575 	} else if (prog->expected_attach_type == BPF_TRACE_ITER) {
19576 		return bpf_iter_prog_supported(prog);
19577 	}
19578 
19579 	if (prog->type == BPF_PROG_TYPE_LSM) {
19580 		ret = bpf_lsm_verify_prog(&env->log, prog);
19581 		if (ret < 0)
19582 			return ret;
19583 	} else if (prog->type == BPF_PROG_TYPE_TRACING &&
19584 		   btf_id_set_contains(&btf_id_deny, btf_id)) {
19585 		verbose(env, "Attaching tracing programs to function '%s' is rejected.\n",
19586 			tgt_info.tgt_name);
19587 		return -EINVAL;
19588 	} else if ((prog->expected_attach_type == BPF_TRACE_FEXIT ||
19589 		   prog->expected_attach_type == BPF_TRACE_FSESSION ||
19590 		   prog->expected_attach_type == BPF_MODIFY_RETURN) &&
19591 		   btf_id_set_contains(&noreturn_deny, btf_id)) {
19592 		verbose(env, "Attaching fexit/fsession/fmod_ret to __noreturn function '%s' is rejected.\n",
19593 			tgt_info.tgt_name);
19594 		return -EINVAL;
19595 	}
19596 
19597 	key = bpf_trampoline_compute_key(tgt_prog, prog->aux->attach_btf, btf_id);
19598 	tr = bpf_trampoline_get(key, &tgt_info);
19599 	if (!tr)
19600 		return -ENOMEM;
19601 
19602 	if (tgt_prog && tgt_prog->aux->tail_call_reachable)
19603 		tr->flags = BPF_TRAMP_F_TAIL_CALL_CTX;
19604 
19605 	prog->aux->dst_trampoline = tr;
19606 	return 0;
19607 }
19608 
bpf_get_btf_vmlinux(void)19609 struct btf *bpf_get_btf_vmlinux(void)
19610 {
19611 	if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
19612 		mutex_lock(&bpf_verifier_lock);
19613 		if (!btf_vmlinux)
19614 			btf_vmlinux = btf_parse_vmlinux();
19615 		mutex_unlock(&bpf_verifier_lock);
19616 	}
19617 	return btf_vmlinux;
19618 }
19619 
19620 /*
19621  * The add_fd_from_fd_array() is executed only if fd_array_cnt is non-zero. In
19622  * this case expect that every file descriptor in the array is either a map or
19623  * a BTF. Everything else is considered to be trash.
19624  */
add_fd_from_fd_array(struct bpf_verifier_env * env,int fd)19625 static int add_fd_from_fd_array(struct bpf_verifier_env *env, int fd)
19626 {
19627 	struct bpf_map *map;
19628 	struct btf *btf;
19629 	CLASS(fd, f)(fd);
19630 	int err;
19631 
19632 	map = __bpf_map_get(f);
19633 	if (!IS_ERR(map)) {
19634 		err = __add_used_map(env, map);
19635 		if (err < 0)
19636 			return err;
19637 		return 0;
19638 	}
19639 
19640 	btf = __btf_get_by_fd(f);
19641 	if (!IS_ERR(btf)) {
19642 		btf_get(btf);
19643 		return __add_used_btf(env, btf);
19644 	}
19645 
19646 	verbose(env, "fd %d is not pointing to valid bpf_map or btf\n", fd);
19647 	return PTR_ERR(map);
19648 }
19649 
process_fd_array(struct bpf_verifier_env * env,union bpf_attr * attr,bpfptr_t uattr)19650 static int process_fd_array(struct bpf_verifier_env *env, union bpf_attr *attr, bpfptr_t uattr)
19651 {
19652 	size_t size = sizeof(int);
19653 	int ret;
19654 	int fd;
19655 	u32 i;
19656 
19657 	env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
19658 
19659 	/*
19660 	 * The only difference between old (no fd_array_cnt is given) and new
19661 	 * APIs is that in the latter case the fd_array is expected to be
19662 	 * continuous and is scanned for map fds right away
19663 	 */
19664 	if (!attr->fd_array_cnt)
19665 		return 0;
19666 
19667 	/* Check for integer overflow */
19668 	if (attr->fd_array_cnt >= (U32_MAX / size)) {
19669 		verbose(env, "fd_array_cnt is too big (%u)\n", attr->fd_array_cnt);
19670 		return -EINVAL;
19671 	}
19672 
19673 	for (i = 0; i < attr->fd_array_cnt; i++) {
19674 		if (copy_from_bpfptr_offset(&fd, env->fd_array, i * size, size))
19675 			return -EFAULT;
19676 
19677 		ret = add_fd_from_fd_array(env, fd);
19678 		if (ret)
19679 			return ret;
19680 	}
19681 
19682 	return 0;
19683 }
19684 
19685 /* replace a generic kfunc with a specialized version if necessary */
specialize_kfunc(struct bpf_verifier_env * env,struct bpf_kfunc_desc * desc,int insn_idx)19686 static int specialize_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc, int insn_idx)
19687 {
19688 	struct bpf_prog *prog = env->prog;
19689 	bool seen_direct_write;
19690 	void *xdp_kfunc;
19691 	bool is_rdonly;
19692 	u32 func_id = desc->func_id;
19693 	u16 offset = desc->offset;
19694 	unsigned long addr = desc->addr;
19695 
19696 	if (offset) /* return if module BTF is used */
19697 		return 0;
19698 
19699 	if (bpf_dev_bound_kfunc_id(func_id)) {
19700 		xdp_kfunc = bpf_dev_bound_resolve_kfunc(prog, func_id);
19701 		if (xdp_kfunc)
19702 			addr = (unsigned long)xdp_kfunc;
19703 		/* fallback to default kfunc when not supported by netdev */
19704 	} else if (func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
19705 		seen_direct_write = env->seen_direct_write;
19706 		is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE);
19707 
19708 		if (is_rdonly)
19709 			addr = (unsigned long)bpf_dynptr_from_skb_rdonly;
19710 
19711 		/* restore env->seen_direct_write to its original value, since
19712 		 * may_access_direct_pkt_data mutates it
19713 		 */
19714 		env->seen_direct_write = seen_direct_write;
19715 	} else if (func_id == special_kfunc_list[KF_bpf_set_dentry_xattr]) {
19716 		if (bpf_lsm_has_d_inode_locked(prog))
19717 			addr = (unsigned long)bpf_set_dentry_xattr_locked;
19718 	} else if (func_id == special_kfunc_list[KF_bpf_remove_dentry_xattr]) {
19719 		if (bpf_lsm_has_d_inode_locked(prog))
19720 			addr = (unsigned long)bpf_remove_dentry_xattr_locked;
19721 	} else if (func_id == special_kfunc_list[KF_bpf_dynptr_from_file]) {
19722 		if (!env->insn_aux_data[insn_idx].non_sleepable)
19723 			addr = (unsigned long)bpf_dynptr_from_file_sleepable;
19724 	} else if (func_id == special_kfunc_list[KF_bpf_arena_alloc_pages]) {
19725 		if (env->insn_aux_data[insn_idx].non_sleepable)
19726 			addr = (unsigned long)bpf_arena_alloc_pages_non_sleepable;
19727 	} else if (func_id == special_kfunc_list[KF_bpf_arena_free_pages]) {
19728 		if (env->insn_aux_data[insn_idx].non_sleepable)
19729 			addr = (unsigned long)bpf_arena_free_pages_non_sleepable;
19730 	}
19731 	desc->addr = addr;
19732 	return 0;
19733 }
19734 
__fixup_collection_insert_kfunc(struct bpf_insn_aux_data * insn_aux,u16 struct_meta_reg,u16 node_offset_reg,struct bpf_insn * insn,struct bpf_insn * insn_buf,int * cnt)19735 static void __fixup_collection_insert_kfunc(struct bpf_insn_aux_data *insn_aux,
19736 					    u16 struct_meta_reg,
19737 					    u16 node_offset_reg,
19738 					    struct bpf_insn *insn,
19739 					    struct bpf_insn *insn_buf,
19740 					    int *cnt)
19741 {
19742 	struct btf_struct_meta *kptr_struct_meta = insn_aux->kptr_struct_meta;
19743 	struct bpf_insn addr[2] = { BPF_LD_IMM64(struct_meta_reg, (long)kptr_struct_meta) };
19744 
19745 	insn_buf[0] = addr[0];
19746 	insn_buf[1] = addr[1];
19747 	insn_buf[2] = BPF_MOV64_IMM(node_offset_reg, insn_aux->insert_off);
19748 	insn_buf[3] = *insn;
19749 	*cnt = 4;
19750 }
19751 
bpf_fixup_kfunc_call(struct bpf_verifier_env * env,struct bpf_insn * insn,struct bpf_insn * insn_buf,int insn_idx,int * cnt)19752 int bpf_fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
19753 		     struct bpf_insn *insn_buf, int insn_idx, int *cnt)
19754 {
19755 	struct bpf_kfunc_desc *desc;
19756 	int err;
19757 
19758 	if (!insn->imm) {
19759 		verbose(env, "invalid kernel function call not eliminated in verifier pass\n");
19760 		return -EINVAL;
19761 	}
19762 
19763 	*cnt = 0;
19764 
19765 	/* insn->imm has the btf func_id. Replace it with an offset relative to
19766 	 * __bpf_call_base, unless the JIT needs to call functions that are
19767 	 * further than 32 bits away (bpf_jit_supports_far_kfunc_call()).
19768 	 */
19769 	desc = find_kfunc_desc(env->prog, insn->imm, insn->off);
19770 	if (!desc) {
19771 		verifier_bug(env, "kernel function descriptor not found for func_id %u",
19772 			     insn->imm);
19773 		return -EFAULT;
19774 	}
19775 
19776 	err = specialize_kfunc(env, desc, insn_idx);
19777 	if (err)
19778 		return err;
19779 
19780 	if (!bpf_jit_supports_far_kfunc_call())
19781 		insn->imm = BPF_CALL_IMM(desc->addr);
19782 
19783 	if (is_bpf_obj_new_kfunc(desc->func_id) || is_bpf_percpu_obj_new_kfunc(desc->func_id)) {
19784 		struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
19785 		struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
19786 		u64 obj_new_size = env->insn_aux_data[insn_idx].obj_new_size;
19787 
19788 		if (is_bpf_percpu_obj_new_kfunc(desc->func_id) && kptr_struct_meta) {
19789 			verifier_bug(env, "NULL kptr_struct_meta expected at insn_idx %d",
19790 				     insn_idx);
19791 			return -EFAULT;
19792 		}
19793 
19794 		insn_buf[0] = BPF_MOV64_IMM(BPF_REG_1, obj_new_size);
19795 		insn_buf[1] = addr[0];
19796 		insn_buf[2] = addr[1];
19797 		insn_buf[3] = *insn;
19798 		*cnt = 4;
19799 	} else if (is_bpf_obj_drop_kfunc(desc->func_id) ||
19800 		   is_bpf_percpu_obj_drop_kfunc(desc->func_id) ||
19801 		   is_bpf_refcount_acquire_kfunc(desc->func_id)) {
19802 		struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
19803 		struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
19804 
19805 		if (is_bpf_percpu_obj_drop_kfunc(desc->func_id) && kptr_struct_meta) {
19806 			verifier_bug(env, "NULL kptr_struct_meta expected at insn_idx %d",
19807 				     insn_idx);
19808 			return -EFAULT;
19809 		}
19810 
19811 		if (is_bpf_refcount_acquire_kfunc(desc->func_id) && !kptr_struct_meta) {
19812 			verifier_bug(env, "kptr_struct_meta expected at insn_idx %d",
19813 				     insn_idx);
19814 			return -EFAULT;
19815 		}
19816 
19817 		insn_buf[0] = addr[0];
19818 		insn_buf[1] = addr[1];
19819 		insn_buf[2] = *insn;
19820 		*cnt = 3;
19821 	} else if (is_bpf_list_push_kfunc(desc->func_id) ||
19822 		   is_bpf_rbtree_add_kfunc(desc->func_id)) {
19823 		struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
19824 		int struct_meta_reg = BPF_REG_3;
19825 		int node_offset_reg = BPF_REG_4;
19826 
19827 		/* rbtree_add has extra 'less' arg, so args-to-fixup are in diff regs */
19828 		if (is_bpf_rbtree_add_kfunc(desc->func_id)) {
19829 			struct_meta_reg = BPF_REG_4;
19830 			node_offset_reg = BPF_REG_5;
19831 		}
19832 
19833 		if (!kptr_struct_meta) {
19834 			verifier_bug(env, "kptr_struct_meta expected at insn_idx %d",
19835 				     insn_idx);
19836 			return -EFAULT;
19837 		}
19838 
19839 		__fixup_collection_insert_kfunc(&env->insn_aux_data[insn_idx], struct_meta_reg,
19840 						node_offset_reg, insn, insn_buf, cnt);
19841 	} else if (desc->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] ||
19842 		   desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
19843 		insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
19844 		*cnt = 1;
19845 	} else if (desc->func_id == special_kfunc_list[KF_bpf_session_is_return] &&
19846 		   env->prog->expected_attach_type == BPF_TRACE_FSESSION) {
19847 		/*
19848 		 * inline the bpf_session_is_return() for fsession:
19849 		 *   bool bpf_session_is_return(void *ctx)
19850 		 *   {
19851 		 *       return (((u64 *)ctx)[-1] >> BPF_TRAMP_IS_RETURN_SHIFT) & 1;
19852 		 *   }
19853 		 */
19854 		insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
19855 		insn_buf[1] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, BPF_TRAMP_IS_RETURN_SHIFT);
19856 		insn_buf[2] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1);
19857 		*cnt = 3;
19858 	} else if (desc->func_id == special_kfunc_list[KF_bpf_session_cookie] &&
19859 		   env->prog->expected_attach_type == BPF_TRACE_FSESSION) {
19860 		/*
19861 		 * inline bpf_session_cookie() for fsession:
19862 		 *   __u64 *bpf_session_cookie(void *ctx)
19863 		 *   {
19864 		 *       u64 off = (((u64 *)ctx)[-1] >> BPF_TRAMP_COOKIE_INDEX_SHIFT) & 0xFF;
19865 		 *       return &((u64 *)ctx)[-off];
19866 		 *   }
19867 		 */
19868 		insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
19869 		insn_buf[1] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, BPF_TRAMP_COOKIE_INDEX_SHIFT);
19870 		insn_buf[2] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF);
19871 		insn_buf[3] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
19872 		insn_buf[4] = BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1);
19873 		insn_buf[5] = BPF_ALU64_IMM(BPF_NEG, BPF_REG_0, 0);
19874 		*cnt = 6;
19875 	}
19876 
19877 	if (env->insn_aux_data[insn_idx].arg_prog) {
19878 		u32 regno = env->insn_aux_data[insn_idx].arg_prog;
19879 		struct bpf_insn ld_addrs[2] = { BPF_LD_IMM64(regno, (long)env->prog->aux) };
19880 		int idx = *cnt;
19881 
19882 		insn_buf[idx++] = ld_addrs[0];
19883 		insn_buf[idx++] = ld_addrs[1];
19884 		insn_buf[idx++] = *insn;
19885 		*cnt = idx;
19886 	}
19887 	return 0;
19888 }
19889 
bpf_check(struct bpf_prog ** prog,union bpf_attr * attr,bpfptr_t uattr,__u32 uattr_size)19890 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size)
19891 {
19892 	u64 start_time = ktime_get_ns();
19893 	struct bpf_verifier_env *env;
19894 	int i, len, ret = -EINVAL, err;
19895 	u32 log_true_size;
19896 	bool is_priv;
19897 
19898 	BTF_TYPE_EMIT(enum bpf_features);
19899 
19900 	/* no program is valid */
19901 	if (ARRAY_SIZE(bpf_verifier_ops) == 0)
19902 		return -EINVAL;
19903 
19904 	/* 'struct bpf_verifier_env' can be global, but since it's not small,
19905 	 * allocate/free it every time bpf_check() is called
19906 	 */
19907 	env = kvzalloc_obj(struct bpf_verifier_env, GFP_KERNEL_ACCOUNT);
19908 	if (!env)
19909 		return -ENOMEM;
19910 
19911 	env->bt.env = env;
19912 
19913 	len = (*prog)->len;
19914 	env->insn_aux_data =
19915 		vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
19916 	ret = -ENOMEM;
19917 	if (!env->insn_aux_data)
19918 		goto err_free_env;
19919 	for (i = 0; i < len; i++)
19920 		env->insn_aux_data[i].orig_idx = i;
19921 	env->succ = bpf_iarray_realloc(NULL, 2);
19922 	if (!env->succ)
19923 		goto err_free_env;
19924 	env->prog = *prog;
19925 	env->ops = bpf_verifier_ops[env->prog->type];
19926 
19927 	env->allow_ptr_leaks = bpf_allow_ptr_leaks(env->prog->aux->token);
19928 	env->allow_uninit_stack = bpf_allow_uninit_stack(env->prog->aux->token);
19929 	env->bypass_spec_v1 = bpf_bypass_spec_v1(env->prog->aux->token);
19930 	env->bypass_spec_v4 = bpf_bypass_spec_v4(env->prog->aux->token);
19931 	env->bpf_capable = is_priv = bpf_token_capable(env->prog->aux->token, CAP_BPF);
19932 
19933 	bpf_get_btf_vmlinux();
19934 
19935 	/* grab the mutex to protect few globals used by verifier */
19936 	if (!is_priv)
19937 		mutex_lock(&bpf_verifier_lock);
19938 
19939 	/* user could have requested verbose verifier output
19940 	 * and supplied buffer to store the verification trace
19941 	 */
19942 	ret = bpf_vlog_init(&env->log, attr->log_level,
19943 			    (char __user *) (unsigned long) attr->log_buf,
19944 			    attr->log_size);
19945 	if (ret)
19946 		goto err_unlock;
19947 
19948 	ret = process_fd_array(env, attr, uattr);
19949 	if (ret)
19950 		goto skip_full_check;
19951 
19952 	mark_verifier_state_clean(env);
19953 
19954 	if (IS_ERR(btf_vmlinux)) {
19955 		/* Either gcc or pahole or kernel are broken. */
19956 		verbose(env, "in-kernel BTF is malformed\n");
19957 		ret = PTR_ERR(btf_vmlinux);
19958 		goto skip_full_check;
19959 	}
19960 
19961 	env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
19962 	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
19963 		env->strict_alignment = true;
19964 	if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
19965 		env->strict_alignment = false;
19966 
19967 	if (is_priv)
19968 		env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
19969 	env->test_reg_invariants = attr->prog_flags & BPF_F_TEST_REG_INVARIANTS;
19970 
19971 	env->explored_states = kvzalloc_objs(struct list_head,
19972 					     state_htab_size(env),
19973 					     GFP_KERNEL_ACCOUNT);
19974 	ret = -ENOMEM;
19975 	if (!env->explored_states)
19976 		goto skip_full_check;
19977 
19978 	for (i = 0; i < state_htab_size(env); i++)
19979 		INIT_LIST_HEAD(&env->explored_states[i]);
19980 	INIT_LIST_HEAD(&env->free_list);
19981 
19982 	ret = bpf_check_btf_info_early(env, attr, uattr);
19983 	if (ret < 0)
19984 		goto skip_full_check;
19985 
19986 	ret = add_subprog_and_kfunc(env);
19987 	if (ret < 0)
19988 		goto skip_full_check;
19989 
19990 	ret = check_subprogs(env);
19991 	if (ret < 0)
19992 		goto skip_full_check;
19993 
19994 	ret = bpf_check_btf_info(env, attr, uattr);
19995 	if (ret < 0)
19996 		goto skip_full_check;
19997 
19998 	ret = check_and_resolve_insns(env);
19999 	if (ret < 0)
20000 		goto skip_full_check;
20001 
20002 	if (bpf_prog_is_offloaded(env->prog->aux)) {
20003 		ret = bpf_prog_offload_verifier_prep(env->prog);
20004 		if (ret)
20005 			goto skip_full_check;
20006 	}
20007 
20008 	ret = bpf_check_cfg(env);
20009 	if (ret < 0)
20010 		goto skip_full_check;
20011 
20012 	ret = bpf_compute_postorder(env);
20013 	if (ret < 0)
20014 		goto skip_full_check;
20015 
20016 	ret = bpf_stack_liveness_init(env);
20017 	if (ret)
20018 		goto skip_full_check;
20019 
20020 	ret = check_attach_btf_id(env);
20021 	if (ret)
20022 		goto skip_full_check;
20023 
20024 	ret = bpf_compute_const_regs(env);
20025 	if (ret < 0)
20026 		goto skip_full_check;
20027 
20028 	ret = bpf_prune_dead_branches(env);
20029 	if (ret < 0)
20030 		goto skip_full_check;
20031 
20032 	ret = sort_subprogs_topo(env);
20033 	if (ret < 0)
20034 		goto skip_full_check;
20035 
20036 	ret = bpf_compute_scc(env);
20037 	if (ret < 0)
20038 		goto skip_full_check;
20039 
20040 	ret = bpf_compute_live_registers(env);
20041 	if (ret < 0)
20042 		goto skip_full_check;
20043 
20044 	ret = mark_fastcall_patterns(env);
20045 	if (ret < 0)
20046 		goto skip_full_check;
20047 
20048 	ret = do_check_main(env);
20049 	ret = ret ?: do_check_subprogs(env);
20050 
20051 	if (ret == 0 && bpf_prog_is_offloaded(env->prog->aux))
20052 		ret = bpf_prog_offload_finalize(env);
20053 
20054 skip_full_check:
20055 	kvfree(env->explored_states);
20056 
20057 	/* might decrease stack depth, keep it before passes that
20058 	 * allocate additional slots.
20059 	 */
20060 	if (ret == 0)
20061 		ret = bpf_remove_fastcall_spills_fills(env);
20062 
20063 	if (ret == 0)
20064 		ret = check_max_stack_depth(env);
20065 
20066 	/* instruction rewrites happen after this point */
20067 	if (ret == 0)
20068 		ret = bpf_optimize_bpf_loop(env);
20069 
20070 	if (is_priv) {
20071 		if (ret == 0)
20072 			bpf_opt_hard_wire_dead_code_branches(env);
20073 		if (ret == 0)
20074 			ret = bpf_opt_remove_dead_code(env);
20075 		if (ret == 0)
20076 			ret = bpf_opt_remove_nops(env);
20077 	} else {
20078 		if (ret == 0)
20079 			sanitize_dead_code(env);
20080 	}
20081 
20082 	if (ret == 0)
20083 		/* program is valid, convert *(u32*)(ctx + off) accesses */
20084 		ret = bpf_convert_ctx_accesses(env);
20085 
20086 	if (ret == 0)
20087 		ret = bpf_do_misc_fixups(env);
20088 
20089 	/* do 32-bit optimization after insn patching has done so those patched
20090 	 * insns could be handled correctly.
20091 	 */
20092 	if (ret == 0 && !bpf_prog_is_offloaded(env->prog->aux)) {
20093 		ret = bpf_opt_subreg_zext_lo32_rnd_hi32(env, attr);
20094 		env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret
20095 								     : false;
20096 	}
20097 
20098 	if (ret == 0)
20099 		ret = bpf_fixup_call_args(env);
20100 
20101 	env->verification_time = ktime_get_ns() - start_time;
20102 	print_verification_stats(env);
20103 	env->prog->aux->verified_insns = env->insn_processed;
20104 
20105 	/* preserve original error even if log finalization is successful */
20106 	err = bpf_vlog_finalize(&env->log, &log_true_size);
20107 	if (err)
20108 		ret = err;
20109 
20110 	if (uattr_size >= offsetofend(union bpf_attr, log_true_size) &&
20111 	    copy_to_bpfptr_offset(uattr, offsetof(union bpf_attr, log_true_size),
20112 				  &log_true_size, sizeof(log_true_size))) {
20113 		ret = -EFAULT;
20114 		goto err_release_maps;
20115 	}
20116 
20117 	if (ret)
20118 		goto err_release_maps;
20119 
20120 	if (env->used_map_cnt) {
20121 		/* if program passed verifier, update used_maps in bpf_prog_info */
20122 		env->prog->aux->used_maps = kmalloc_objs(env->used_maps[0],
20123 							 env->used_map_cnt,
20124 							 GFP_KERNEL_ACCOUNT);
20125 
20126 		if (!env->prog->aux->used_maps) {
20127 			ret = -ENOMEM;
20128 			goto err_release_maps;
20129 		}
20130 
20131 		memcpy(env->prog->aux->used_maps, env->used_maps,
20132 		       sizeof(env->used_maps[0]) * env->used_map_cnt);
20133 		env->prog->aux->used_map_cnt = env->used_map_cnt;
20134 	}
20135 	if (env->used_btf_cnt) {
20136 		/* if program passed verifier, update used_btfs in bpf_prog_aux */
20137 		env->prog->aux->used_btfs = kmalloc_objs(env->used_btfs[0],
20138 							 env->used_btf_cnt,
20139 							 GFP_KERNEL_ACCOUNT);
20140 		if (!env->prog->aux->used_btfs) {
20141 			ret = -ENOMEM;
20142 			goto err_release_maps;
20143 		}
20144 
20145 		memcpy(env->prog->aux->used_btfs, env->used_btfs,
20146 		       sizeof(env->used_btfs[0]) * env->used_btf_cnt);
20147 		env->prog->aux->used_btf_cnt = env->used_btf_cnt;
20148 	}
20149 	if (env->used_map_cnt || env->used_btf_cnt) {
20150 		/* program is valid. Convert pseudo bpf_ld_imm64 into generic
20151 		 * bpf_ld_imm64 instructions
20152 		 */
20153 		convert_pseudo_ld_imm64(env);
20154 	}
20155 
20156 	adjust_btf_func(env);
20157 
20158 err_release_maps:
20159 	if (ret)
20160 		release_insn_arrays(env);
20161 	if (!env->prog->aux->used_maps)
20162 		/* if we didn't copy map pointers into bpf_prog_info, release
20163 		 * them now. Otherwise free_used_maps() will release them.
20164 		 */
20165 		release_maps(env);
20166 	if (!env->prog->aux->used_btfs)
20167 		release_btfs(env);
20168 
20169 	/* extension progs temporarily inherit the attach_type of their targets
20170 	   for verification purposes, so set it back to zero before returning
20171 	 */
20172 	if (env->prog->type == BPF_PROG_TYPE_EXT)
20173 		env->prog->expected_attach_type = 0;
20174 
20175 	*prog = env->prog;
20176 
20177 	module_put(env->attach_btf_mod);
20178 err_unlock:
20179 	if (!is_priv)
20180 		mutex_unlock(&bpf_verifier_lock);
20181 	bpf_clear_insn_aux_data(env, 0, env->prog->len);
20182 	vfree(env->insn_aux_data);
20183 err_free_env:
20184 	bpf_stack_liveness_free(env);
20185 	kvfree(env->cfg.insn_postorder);
20186 	kvfree(env->scc_info);
20187 	kvfree(env->succ);
20188 	kvfree(env->gotox_tmp_buf);
20189 	kvfree(env);
20190 	return ret;
20191 }
20192