1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * BPF JIT compiler for ARM64 4 * 5 * Copyright (C) 2014-2016 Zi Shen Lim <zlim.lnx@gmail.com> 6 */ 7 8 #define pr_fmt(fmt) "bpf_jit: " fmt 9 10 #include <linux/arm-smccc.h> 11 #include <linux/bitfield.h> 12 #include <linux/bpf.h> 13 #include <linux/filter.h> 14 #include <linux/memory.h> 15 #include <linux/printk.h> 16 #include <linux/slab.h> 17 18 #include <asm/asm-extable.h> 19 #include <asm/byteorder.h> 20 #include <asm/cacheflush.h> 21 #include <asm/cpufeature.h> 22 #include <asm/debug-monitors.h> 23 #include <asm/insn.h> 24 #include <asm/text-patching.h> 25 #include <asm/set_memory.h> 26 27 #include "bpf_jit.h" 28 29 #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) 30 #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) 31 #define TCCNT_PTR (MAX_BPF_JIT_REG + 2) 32 #define TMP_REG_3 (MAX_BPF_JIT_REG + 3) 33 #define ARENA_VM_START (MAX_BPF_JIT_REG + 5) 34 35 #define check_imm(bits, imm) do { \ 36 if ((((imm) > 0) && ((imm) >> (bits))) || \ 37 (((imm) < 0) && (~(imm) >> (bits)))) { \ 38 pr_info("[%2d] imm=%d(0x%x) out of range\n", \ 39 i, imm, imm); \ 40 return -EINVAL; \ 41 } \ 42 } while (0) 43 #define check_imm19(imm) check_imm(19, imm) 44 #define check_imm26(imm) check_imm(26, imm) 45 46 /* Map BPF registers to A64 registers */ 47 static const int bpf2a64[] = { 48 /* return value from in-kernel function, and exit value from eBPF */ 49 [BPF_REG_0] = A64_R(7), 50 /* arguments from eBPF program to in-kernel function */ 51 [BPF_REG_1] = A64_R(0), 52 [BPF_REG_2] = A64_R(1), 53 [BPF_REG_3] = A64_R(2), 54 [BPF_REG_4] = A64_R(3), 55 [BPF_REG_5] = A64_R(4), 56 /* callee saved registers that in-kernel function will preserve */ 57 [BPF_REG_6] = A64_R(19), 58 [BPF_REG_7] = A64_R(20), 59 [BPF_REG_8] = A64_R(21), 60 [BPF_REG_9] = A64_R(22), 61 /* read-only frame pointer to access stack */ 62 [BPF_REG_FP] = A64_R(25), 63 /* temporary registers for BPF JIT */ 64 [TMP_REG_1] = A64_R(10), 65 [TMP_REG_2] = A64_R(11), 66 [TMP_REG_3] = A64_R(12), 67 /* tail_call_cnt_ptr */ 68 [TCCNT_PTR] = A64_R(26), 69 /* temporary register for blinding constants */ 70 [BPF_REG_AX] = A64_R(9), 71 /* callee saved register for kern_vm_start address */ 72 [ARENA_VM_START] = A64_R(28), 73 }; 74 75 struct jit_ctx { 76 const struct bpf_prog *prog; 77 int idx; 78 int epilogue_offset; 79 int *offset; 80 int exentry_idx; 81 int nr_used_callee_reg; 82 u8 used_callee_reg[8]; /* r6~r9, fp, arena_vm_start */ 83 __le32 *image; 84 __le32 *ro_image; 85 u32 stack_size; 86 u64 user_vm_start; 87 u64 arena_vm_start; 88 bool fp_used; 89 bool write; 90 }; 91 92 struct bpf_plt { 93 u32 insn_ldr; /* load target */ 94 u32 insn_br; /* branch to target */ 95 u64 target; /* target value */ 96 }; 97 98 #define PLT_TARGET_SIZE sizeof_field(struct bpf_plt, target) 99 #define PLT_TARGET_OFFSET offsetof(struct bpf_plt, target) 100 101 static inline void emit(const u32 insn, struct jit_ctx *ctx) 102 { 103 if (ctx->image != NULL && ctx->write) 104 ctx->image[ctx->idx] = cpu_to_le32(insn); 105 106 ctx->idx++; 107 } 108 109 static inline void emit_a64_mov_i(const int is64, const int reg, 110 const s32 val, struct jit_ctx *ctx) 111 { 112 u16 hi = val >> 16; 113 u16 lo = val & 0xffff; 114 115 if (hi & 0x8000) { 116 if (hi == 0xffff) { 117 emit(A64_MOVN(is64, reg, (u16)~lo, 0), ctx); 118 } else { 119 emit(A64_MOVN(is64, reg, (u16)~hi, 16), ctx); 120 if (lo != 0xffff) 121 emit(A64_MOVK(is64, reg, lo, 0), ctx); 122 } 123 } else { 124 emit(A64_MOVZ(is64, reg, lo, 0), ctx); 125 if (hi) 126 emit(A64_MOVK(is64, reg, hi, 16), ctx); 127 } 128 } 129 130 static int i64_i16_blocks(const u64 val, bool inverse) 131 { 132 return (((val >> 0) & 0xffff) != (inverse ? 0xffff : 0x0000)) + 133 (((val >> 16) & 0xffff) != (inverse ? 0xffff : 0x0000)) + 134 (((val >> 32) & 0xffff) != (inverse ? 0xffff : 0x0000)) + 135 (((val >> 48) & 0xffff) != (inverse ? 0xffff : 0x0000)); 136 } 137 138 static inline void emit_a64_mov_i64(const int reg, const u64 val, 139 struct jit_ctx *ctx) 140 { 141 u64 nrm_tmp = val, rev_tmp = ~val; 142 bool inverse; 143 int shift; 144 145 if (!(nrm_tmp >> 32)) 146 return emit_a64_mov_i(0, reg, (u32)val, ctx); 147 148 inverse = i64_i16_blocks(nrm_tmp, true) < i64_i16_blocks(nrm_tmp, false); 149 shift = max(round_down((inverse ? (fls64(rev_tmp) - 1) : 150 (fls64(nrm_tmp) - 1)), 16), 0); 151 if (inverse) 152 emit(A64_MOVN(1, reg, (rev_tmp >> shift) & 0xffff, shift), ctx); 153 else 154 emit(A64_MOVZ(1, reg, (nrm_tmp >> shift) & 0xffff, shift), ctx); 155 shift -= 16; 156 while (shift >= 0) { 157 if (((nrm_tmp >> shift) & 0xffff) != (inverse ? 0xffff : 0x0000)) 158 emit(A64_MOVK(1, reg, (nrm_tmp >> shift) & 0xffff, shift), ctx); 159 shift -= 16; 160 } 161 } 162 163 static inline void emit_bti(u32 insn, struct jit_ctx *ctx) 164 { 165 if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) 166 emit(insn, ctx); 167 } 168 169 /* 170 * Kernel addresses in the vmalloc space use at most 48 bits, and the 171 * remaining bits are guaranteed to be 0x1. So we can compose the address 172 * with a fixed length movn/movk/movk sequence. 173 */ 174 static inline void emit_addr_mov_i64(const int reg, const u64 val, 175 struct jit_ctx *ctx) 176 { 177 u64 tmp = val; 178 int shift = 0; 179 180 emit(A64_MOVN(1, reg, ~tmp & 0xffff, shift), ctx); 181 while (shift < 32) { 182 tmp >>= 16; 183 shift += 16; 184 emit(A64_MOVK(1, reg, tmp & 0xffff, shift), ctx); 185 } 186 } 187 188 static bool should_emit_indirect_call(long target, const struct jit_ctx *ctx) 189 { 190 long offset; 191 192 /* when ctx->ro_image is not allocated or the target is unknown, 193 * emit indirect call 194 */ 195 if (!ctx->ro_image || !target) 196 return true; 197 198 offset = target - (long)&ctx->ro_image[ctx->idx]; 199 return offset < -SZ_128M || offset >= SZ_128M; 200 } 201 202 static void emit_direct_call(u64 target, struct jit_ctx *ctx) 203 { 204 u32 insn; 205 unsigned long pc; 206 207 pc = (unsigned long)&ctx->ro_image[ctx->idx]; 208 insn = aarch64_insn_gen_branch_imm(pc, target, AARCH64_INSN_BRANCH_LINK); 209 emit(insn, ctx); 210 } 211 212 static void emit_indirect_call(u64 target, struct jit_ctx *ctx) 213 { 214 u8 tmp; 215 216 tmp = bpf2a64[TMP_REG_1]; 217 emit_addr_mov_i64(tmp, target, ctx); 218 emit(A64_BLR(tmp), ctx); 219 } 220 221 static void emit_call(u64 target, struct jit_ctx *ctx) 222 { 223 if (should_emit_indirect_call((long)target, ctx)) 224 emit_indirect_call(target, ctx); 225 else 226 emit_direct_call(target, ctx); 227 } 228 229 static inline int bpf2a64_offset(int bpf_insn, int off, 230 const struct jit_ctx *ctx) 231 { 232 /* BPF JMP offset is relative to the next instruction */ 233 bpf_insn++; 234 /* 235 * Whereas arm64 branch instructions encode the offset 236 * from the branch itself, so we must subtract 1 from the 237 * instruction offset. 238 */ 239 return ctx->offset[bpf_insn + off] - (ctx->offset[bpf_insn] - 1); 240 } 241 242 static void jit_fill_hole(void *area, unsigned int size) 243 { 244 __le32 *ptr; 245 /* We are guaranteed to have aligned memory. */ 246 for (ptr = area; size >= sizeof(u32); size -= sizeof(u32)) 247 *ptr++ = cpu_to_le32(AARCH64_BREAK_FAULT); 248 } 249 250 int bpf_arch_text_invalidate(void *dst, size_t len) 251 { 252 if (!aarch64_insn_set(dst, AARCH64_BREAK_FAULT, len)) 253 return -EINVAL; 254 255 return 0; 256 } 257 258 static inline int epilogue_offset(const struct jit_ctx *ctx) 259 { 260 int to = ctx->epilogue_offset; 261 int from = ctx->idx; 262 263 return to - from; 264 } 265 266 static bool is_addsub_imm(u32 imm) 267 { 268 /* Either imm12 or shifted imm12. */ 269 return !(imm & ~0xfff) || !(imm & ~0xfff000); 270 } 271 272 static inline void emit_a64_add_i(const bool is64, const int dst, const int src, 273 const int tmp, const s32 imm, struct jit_ctx *ctx) 274 { 275 if (is_addsub_imm(imm)) { 276 emit(A64_ADD_I(is64, dst, src, imm), ctx); 277 } else if (is_addsub_imm(-(u32)imm)) { 278 emit(A64_SUB_I(is64, dst, src, -imm), ctx); 279 } else { 280 emit_a64_mov_i(is64, tmp, imm, ctx); 281 emit(A64_ADD(is64, dst, src, tmp), ctx); 282 } 283 } 284 285 /* 286 * There are 3 types of AArch64 LDR/STR (immediate) instruction: 287 * Post-index, Pre-index, Unsigned offset. 288 * 289 * For BPF ldr/str, the "unsigned offset" type is sufficient. 290 * 291 * "Unsigned offset" type LDR(immediate) format: 292 * 293 * 3 2 1 0 294 * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 295 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 296 * |x x|1 1 1 0 0 1 0 1| imm12 | Rn | Rt | 297 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 298 * scale 299 * 300 * "Unsigned offset" type STR(immediate) format: 301 * 3 2 1 0 302 * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 303 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 304 * |x x|1 1 1 0 0 1 0 0| imm12 | Rn | Rt | 305 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 306 * scale 307 * 308 * The offset is calculated from imm12 and scale in the following way: 309 * 310 * offset = (u64)imm12 << scale 311 */ 312 static bool is_lsi_offset(int offset, int scale) 313 { 314 if (offset < 0) 315 return false; 316 317 if (offset > (0xFFF << scale)) 318 return false; 319 320 if (offset & ((1 << scale) - 1)) 321 return false; 322 323 return true; 324 } 325 326 /* generated main prog prologue: 327 * bti c // if CONFIG_ARM64_BTI_KERNEL 328 * mov x9, lr 329 * nop // POKE_OFFSET 330 * paciasp // if CONFIG_ARM64_PTR_AUTH_KERNEL 331 * stp x29, lr, [sp, #-16]! 332 * mov x29, sp 333 * stp xzr, x26, [sp, #-16]! 334 * mov x26, sp 335 * // PROLOGUE_OFFSET 336 * // save callee-saved registers 337 */ 338 static void prepare_bpf_tail_call_cnt(struct jit_ctx *ctx) 339 { 340 const bool is_main_prog = !bpf_is_subprog(ctx->prog); 341 const u8 ptr = bpf2a64[TCCNT_PTR]; 342 343 if (is_main_prog) { 344 /* Initialize tail_call_cnt. */ 345 emit(A64_PUSH(A64_ZR, ptr, A64_SP), ctx); 346 emit(A64_MOV(1, ptr, A64_SP), ctx); 347 } else 348 emit(A64_PUSH(ptr, ptr, A64_SP), ctx); 349 } 350 351 static void find_used_callee_regs(struct jit_ctx *ctx) 352 { 353 int i; 354 const struct bpf_prog *prog = ctx->prog; 355 const struct bpf_insn *insn = &prog->insnsi[0]; 356 int reg_used = 0; 357 358 for (i = 0; i < prog->len; i++, insn++) { 359 if (insn->dst_reg == BPF_REG_6 || insn->src_reg == BPF_REG_6) 360 reg_used |= 1; 361 362 if (insn->dst_reg == BPF_REG_7 || insn->src_reg == BPF_REG_7) 363 reg_used |= 2; 364 365 if (insn->dst_reg == BPF_REG_8 || insn->src_reg == BPF_REG_8) 366 reg_used |= 4; 367 368 if (insn->dst_reg == BPF_REG_9 || insn->src_reg == BPF_REG_9) 369 reg_used |= 8; 370 371 if (insn->dst_reg == BPF_REG_FP || insn->src_reg == BPF_REG_FP) { 372 ctx->fp_used = true; 373 reg_used |= 16; 374 } 375 } 376 377 i = 0; 378 if (reg_used & 1) 379 ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_6]; 380 381 if (reg_used & 2) 382 ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_7]; 383 384 if (reg_used & 4) 385 ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_8]; 386 387 if (reg_used & 8) 388 ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_9]; 389 390 if (reg_used & 16) 391 ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_FP]; 392 393 if (ctx->arena_vm_start) 394 ctx->used_callee_reg[i++] = bpf2a64[ARENA_VM_START]; 395 396 ctx->nr_used_callee_reg = i; 397 } 398 399 /* Save callee-saved registers */ 400 static void push_callee_regs(struct jit_ctx *ctx) 401 { 402 int reg1, reg2, i; 403 404 /* 405 * Program acting as exception boundary should save all ARM64 406 * Callee-saved registers as the exception callback needs to recover 407 * all ARM64 Callee-saved registers in its epilogue. 408 */ 409 if (ctx->prog->aux->exception_boundary) { 410 emit(A64_PUSH(A64_R(19), A64_R(20), A64_SP), ctx); 411 emit(A64_PUSH(A64_R(21), A64_R(22), A64_SP), ctx); 412 emit(A64_PUSH(A64_R(23), A64_R(24), A64_SP), ctx); 413 emit(A64_PUSH(A64_R(25), A64_R(26), A64_SP), ctx); 414 emit(A64_PUSH(A64_R(27), A64_R(28), A64_SP), ctx); 415 } else { 416 find_used_callee_regs(ctx); 417 for (i = 0; i + 1 < ctx->nr_used_callee_reg; i += 2) { 418 reg1 = ctx->used_callee_reg[i]; 419 reg2 = ctx->used_callee_reg[i + 1]; 420 emit(A64_PUSH(reg1, reg2, A64_SP), ctx); 421 } 422 if (i < ctx->nr_used_callee_reg) { 423 reg1 = ctx->used_callee_reg[i]; 424 /* keep SP 16-byte aligned */ 425 emit(A64_PUSH(reg1, A64_ZR, A64_SP), ctx); 426 } 427 } 428 } 429 430 /* Restore callee-saved registers */ 431 static void pop_callee_regs(struct jit_ctx *ctx) 432 { 433 struct bpf_prog_aux *aux = ctx->prog->aux; 434 int reg1, reg2, i; 435 436 /* 437 * Program acting as exception boundary pushes R23 and R24 in addition 438 * to BPF callee-saved registers. Exception callback uses the boundary 439 * program's stack frame, so recover these extra registers in the above 440 * two cases. 441 */ 442 if (aux->exception_boundary || aux->exception_cb) { 443 emit(A64_POP(A64_R(27), A64_R(28), A64_SP), ctx); 444 emit(A64_POP(A64_R(25), A64_R(26), A64_SP), ctx); 445 emit(A64_POP(A64_R(23), A64_R(24), A64_SP), ctx); 446 emit(A64_POP(A64_R(21), A64_R(22), A64_SP), ctx); 447 emit(A64_POP(A64_R(19), A64_R(20), A64_SP), ctx); 448 } else { 449 i = ctx->nr_used_callee_reg - 1; 450 if (ctx->nr_used_callee_reg % 2 != 0) { 451 reg1 = ctx->used_callee_reg[i]; 452 emit(A64_POP(reg1, A64_ZR, A64_SP), ctx); 453 i--; 454 } 455 while (i > 0) { 456 reg1 = ctx->used_callee_reg[i - 1]; 457 reg2 = ctx->used_callee_reg[i]; 458 emit(A64_POP(reg1, reg2, A64_SP), ctx); 459 i -= 2; 460 } 461 } 462 } 463 464 #define BTI_INSNS (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) ? 1 : 0) 465 #define PAC_INSNS (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) ? 1 : 0) 466 467 /* Offset of nop instruction in bpf prog entry to be poked */ 468 #define POKE_OFFSET (BTI_INSNS + 1) 469 470 /* Tail call offset to jump into */ 471 #define PROLOGUE_OFFSET (BTI_INSNS + 2 + PAC_INSNS + 4) 472 473 static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) 474 { 475 const struct bpf_prog *prog = ctx->prog; 476 const bool is_main_prog = !bpf_is_subprog(prog); 477 const u8 fp = bpf2a64[BPF_REG_FP]; 478 const u8 arena_vm_base = bpf2a64[ARENA_VM_START]; 479 const int idx0 = ctx->idx; 480 int cur_offset; 481 482 /* 483 * BPF prog stack layout 484 * 485 * high 486 * original A64_SP => 0:+-----+ BPF prologue 487 * |FP/LR| 488 * current A64_FP => -16:+-----+ 489 * | ... | callee saved registers 490 * BPF fp register => -64:+-----+ <= (BPF_FP) 491 * | | 492 * | ... | BPF prog stack 493 * | | 494 * +-----+ <= (BPF_FP - prog->aux->stack_depth) 495 * |RSVD | padding 496 * current A64_SP => +-----+ <= (BPF_FP - ctx->stack_size) 497 * | | 498 * | ... | Function call stack 499 * | | 500 * +-----+ 501 * low 502 * 503 */ 504 505 /* bpf function may be invoked by 3 instruction types: 506 * 1. bl, attached via freplace to bpf prog via short jump 507 * 2. br, attached via freplace to bpf prog via long jump 508 * 3. blr, working as a function pointer, used by emit_call. 509 * So BTI_JC should used here to support both br and blr. 510 */ 511 emit_bti(A64_BTI_JC, ctx); 512 513 emit(A64_MOV(1, A64_R(9), A64_LR), ctx); 514 emit(A64_NOP, ctx); 515 516 if (!prog->aux->exception_cb) { 517 /* Sign lr */ 518 if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL)) 519 emit(A64_PACIASP, ctx); 520 521 /* Save FP and LR registers to stay align with ARM64 AAPCS */ 522 emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); 523 emit(A64_MOV(1, A64_FP, A64_SP), ctx); 524 525 prepare_bpf_tail_call_cnt(ctx); 526 527 if (!ebpf_from_cbpf && is_main_prog) { 528 cur_offset = ctx->idx - idx0; 529 if (cur_offset != PROLOGUE_OFFSET) { 530 pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n", 531 cur_offset, PROLOGUE_OFFSET); 532 return -1; 533 } 534 /* BTI landing pad for the tail call, done with a BR */ 535 emit_bti(A64_BTI_J, ctx); 536 } 537 push_callee_regs(ctx); 538 } else { 539 /* 540 * Exception callback receives FP of Main Program as third 541 * parameter 542 */ 543 emit(A64_MOV(1, A64_FP, A64_R(2)), ctx); 544 /* 545 * Main Program already pushed the frame record and the 546 * callee-saved registers. The exception callback will not push 547 * anything and re-use the main program's stack. 548 * 549 * 12 registers are on the stack 550 */ 551 emit(A64_SUB_I(1, A64_SP, A64_FP, 96), ctx); 552 } 553 554 if (ctx->fp_used) 555 /* Set up BPF prog stack base register */ 556 emit(A64_MOV(1, fp, A64_SP), ctx); 557 558 /* Stack must be multiples of 16B */ 559 ctx->stack_size = round_up(prog->aux->stack_depth, 16); 560 561 /* Set up function call stack */ 562 if (ctx->stack_size) 563 emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); 564 565 if (ctx->arena_vm_start) 566 emit_a64_mov_i64(arena_vm_base, ctx->arena_vm_start, ctx); 567 568 return 0; 569 } 570 571 static int emit_bpf_tail_call(struct jit_ctx *ctx) 572 { 573 /* bpf_tail_call(void *prog_ctx, struct bpf_array *array, u64 index) */ 574 const u8 r2 = bpf2a64[BPF_REG_2]; 575 const u8 r3 = bpf2a64[BPF_REG_3]; 576 577 const u8 tmp = bpf2a64[TMP_REG_1]; 578 const u8 prg = bpf2a64[TMP_REG_2]; 579 const u8 tcc = bpf2a64[TMP_REG_3]; 580 const u8 ptr = bpf2a64[TCCNT_PTR]; 581 size_t off; 582 __le32 *branch1 = NULL; 583 __le32 *branch2 = NULL; 584 __le32 *branch3 = NULL; 585 586 /* if (index >= array->map.max_entries) 587 * goto out; 588 */ 589 off = offsetof(struct bpf_array, map.max_entries); 590 emit_a64_mov_i64(tmp, off, ctx); 591 emit(A64_LDR32(tmp, r2, tmp), ctx); 592 emit(A64_MOV(0, r3, r3), ctx); 593 emit(A64_CMP(0, r3, tmp), ctx); 594 branch1 = ctx->image + ctx->idx; 595 emit(A64_NOP, ctx); 596 597 /* 598 * if ((*tail_call_cnt_ptr) >= MAX_TAIL_CALL_CNT) 599 * goto out; 600 */ 601 emit_a64_mov_i64(tmp, MAX_TAIL_CALL_CNT, ctx); 602 emit(A64_LDR64I(tcc, ptr, 0), ctx); 603 emit(A64_CMP(1, tcc, tmp), ctx); 604 branch2 = ctx->image + ctx->idx; 605 emit(A64_NOP, ctx); 606 607 /* (*tail_call_cnt_ptr)++; */ 608 emit(A64_ADD_I(1, tcc, tcc, 1), ctx); 609 610 /* prog = array->ptrs[index]; 611 * if (prog == NULL) 612 * goto out; 613 */ 614 off = offsetof(struct bpf_array, ptrs); 615 emit_a64_mov_i64(tmp, off, ctx); 616 emit(A64_ADD(1, tmp, r2, tmp), ctx); 617 emit(A64_LSL(1, prg, r3, 3), ctx); 618 emit(A64_LDR64(prg, tmp, prg), ctx); 619 branch3 = ctx->image + ctx->idx; 620 emit(A64_NOP, ctx); 621 622 /* Update tail_call_cnt if the slot is populated. */ 623 emit(A64_STR64I(tcc, ptr, 0), ctx); 624 625 /* restore SP */ 626 if (ctx->stack_size) 627 emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); 628 629 pop_callee_regs(ctx); 630 631 /* goto *(prog->bpf_func + prologue_offset); */ 632 off = offsetof(struct bpf_prog, bpf_func); 633 emit_a64_mov_i64(tmp, off, ctx); 634 emit(A64_LDR64(tmp, prg, tmp), ctx); 635 emit(A64_ADD_I(1, tmp, tmp, sizeof(u32) * PROLOGUE_OFFSET), ctx); 636 emit(A64_BR(tmp), ctx); 637 638 if (ctx->image) { 639 off = &ctx->image[ctx->idx] - branch1; 640 *branch1 = cpu_to_le32(A64_B_(A64_COND_CS, off)); 641 642 off = &ctx->image[ctx->idx] - branch2; 643 *branch2 = cpu_to_le32(A64_B_(A64_COND_CS, off)); 644 645 off = &ctx->image[ctx->idx] - branch3; 646 *branch3 = cpu_to_le32(A64_CBZ(1, prg, off)); 647 } 648 649 return 0; 650 } 651 652 static int emit_atomic_ld_st(const struct bpf_insn *insn, struct jit_ctx *ctx) 653 { 654 const s32 imm = insn->imm; 655 const s16 off = insn->off; 656 const u8 code = insn->code; 657 const bool arena = BPF_MODE(code) == BPF_PROBE_ATOMIC; 658 const u8 arena_vm_base = bpf2a64[ARENA_VM_START]; 659 const u8 dst = bpf2a64[insn->dst_reg]; 660 const u8 src = bpf2a64[insn->src_reg]; 661 const u8 tmp = bpf2a64[TMP_REG_1]; 662 u8 reg; 663 664 switch (imm) { 665 case BPF_LOAD_ACQ: 666 reg = src; 667 break; 668 case BPF_STORE_REL: 669 reg = dst; 670 break; 671 default: 672 pr_err_once("unknown atomic load/store op code %02x\n", imm); 673 return -EINVAL; 674 } 675 676 if (off) { 677 emit_a64_add_i(1, tmp, reg, tmp, off, ctx); 678 reg = tmp; 679 } 680 if (arena) { 681 emit(A64_ADD(1, tmp, reg, arena_vm_base), ctx); 682 reg = tmp; 683 } 684 685 switch (imm) { 686 case BPF_LOAD_ACQ: 687 switch (BPF_SIZE(code)) { 688 case BPF_B: 689 emit(A64_LDARB(dst, reg), ctx); 690 break; 691 case BPF_H: 692 emit(A64_LDARH(dst, reg), ctx); 693 break; 694 case BPF_W: 695 emit(A64_LDAR32(dst, reg), ctx); 696 break; 697 case BPF_DW: 698 emit(A64_LDAR64(dst, reg), ctx); 699 break; 700 } 701 break; 702 case BPF_STORE_REL: 703 switch (BPF_SIZE(code)) { 704 case BPF_B: 705 emit(A64_STLRB(src, reg), ctx); 706 break; 707 case BPF_H: 708 emit(A64_STLRH(src, reg), ctx); 709 break; 710 case BPF_W: 711 emit(A64_STLR32(src, reg), ctx); 712 break; 713 case BPF_DW: 714 emit(A64_STLR64(src, reg), ctx); 715 break; 716 } 717 break; 718 default: 719 pr_err_once("unexpected atomic load/store op code %02x\n", 720 imm); 721 return -EINVAL; 722 } 723 724 return 0; 725 } 726 727 #ifdef CONFIG_ARM64_LSE_ATOMICS 728 static int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) 729 { 730 const u8 code = insn->code; 731 const u8 arena_vm_base = bpf2a64[ARENA_VM_START]; 732 const u8 dst = bpf2a64[insn->dst_reg]; 733 const u8 src = bpf2a64[insn->src_reg]; 734 const u8 tmp = bpf2a64[TMP_REG_1]; 735 const u8 tmp2 = bpf2a64[TMP_REG_2]; 736 const bool isdw = BPF_SIZE(code) == BPF_DW; 737 const bool arena = BPF_MODE(code) == BPF_PROBE_ATOMIC; 738 const s16 off = insn->off; 739 u8 reg = dst; 740 741 if (off) { 742 emit_a64_add_i(1, tmp, reg, tmp, off, ctx); 743 reg = tmp; 744 } 745 if (arena) { 746 emit(A64_ADD(1, tmp, reg, arena_vm_base), ctx); 747 reg = tmp; 748 } 749 750 switch (insn->imm) { 751 /* lock *(u32/u64 *)(dst_reg + off) <op>= src_reg */ 752 case BPF_ADD: 753 emit(A64_STADD(isdw, reg, src), ctx); 754 break; 755 case BPF_AND: 756 emit(A64_MVN(isdw, tmp2, src), ctx); 757 emit(A64_STCLR(isdw, reg, tmp2), ctx); 758 break; 759 case BPF_OR: 760 emit(A64_STSET(isdw, reg, src), ctx); 761 break; 762 case BPF_XOR: 763 emit(A64_STEOR(isdw, reg, src), ctx); 764 break; 765 /* src_reg = atomic_fetch_<op>(dst_reg + off, src_reg) */ 766 case BPF_ADD | BPF_FETCH: 767 emit(A64_LDADDAL(isdw, src, reg, src), ctx); 768 break; 769 case BPF_AND | BPF_FETCH: 770 emit(A64_MVN(isdw, tmp2, src), ctx); 771 emit(A64_LDCLRAL(isdw, src, reg, tmp2), ctx); 772 break; 773 case BPF_OR | BPF_FETCH: 774 emit(A64_LDSETAL(isdw, src, reg, src), ctx); 775 break; 776 case BPF_XOR | BPF_FETCH: 777 emit(A64_LDEORAL(isdw, src, reg, src), ctx); 778 break; 779 /* src_reg = atomic_xchg(dst_reg + off, src_reg); */ 780 case BPF_XCHG: 781 emit(A64_SWPAL(isdw, src, reg, src), ctx); 782 break; 783 /* r0 = atomic_cmpxchg(dst_reg + off, r0, src_reg); */ 784 case BPF_CMPXCHG: 785 emit(A64_CASAL(isdw, src, reg, bpf2a64[BPF_REG_0]), ctx); 786 break; 787 default: 788 pr_err_once("unknown atomic op code %02x\n", insn->imm); 789 return -EINVAL; 790 } 791 792 return 0; 793 } 794 #else 795 static inline int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) 796 { 797 return -EINVAL; 798 } 799 #endif 800 801 static int emit_ll_sc_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) 802 { 803 const u8 code = insn->code; 804 const u8 dst = bpf2a64[insn->dst_reg]; 805 const u8 src = bpf2a64[insn->src_reg]; 806 const u8 tmp = bpf2a64[TMP_REG_1]; 807 const u8 tmp2 = bpf2a64[TMP_REG_2]; 808 const u8 tmp3 = bpf2a64[TMP_REG_3]; 809 const int i = insn - ctx->prog->insnsi; 810 const s32 imm = insn->imm; 811 const s16 off = insn->off; 812 const bool isdw = BPF_SIZE(code) == BPF_DW; 813 u8 reg = dst; 814 s32 jmp_offset; 815 816 if (BPF_MODE(code) == BPF_PROBE_ATOMIC) { 817 /* ll_sc based atomics don't support unsafe pointers yet. */ 818 pr_err_once("unknown atomic opcode %02x\n", code); 819 return -EINVAL; 820 } 821 822 if (off) { 823 emit_a64_add_i(1, tmp, reg, tmp, off, ctx); 824 reg = tmp; 825 } 826 827 if (imm == BPF_ADD || imm == BPF_AND || 828 imm == BPF_OR || imm == BPF_XOR) { 829 /* lock *(u32/u64 *)(dst_reg + off) <op>= src_reg */ 830 emit(A64_LDXR(isdw, tmp2, reg), ctx); 831 if (imm == BPF_ADD) 832 emit(A64_ADD(isdw, tmp2, tmp2, src), ctx); 833 else if (imm == BPF_AND) 834 emit(A64_AND(isdw, tmp2, tmp2, src), ctx); 835 else if (imm == BPF_OR) 836 emit(A64_ORR(isdw, tmp2, tmp2, src), ctx); 837 else 838 emit(A64_EOR(isdw, tmp2, tmp2, src), ctx); 839 emit(A64_STXR(isdw, tmp2, reg, tmp3), ctx); 840 jmp_offset = -3; 841 check_imm19(jmp_offset); 842 emit(A64_CBNZ(0, tmp3, jmp_offset), ctx); 843 } else if (imm == (BPF_ADD | BPF_FETCH) || 844 imm == (BPF_AND | BPF_FETCH) || 845 imm == (BPF_OR | BPF_FETCH) || 846 imm == (BPF_XOR | BPF_FETCH)) { 847 /* src_reg = atomic_fetch_<op>(dst_reg + off, src_reg) */ 848 const u8 ax = bpf2a64[BPF_REG_AX]; 849 850 emit(A64_MOV(isdw, ax, src), ctx); 851 emit(A64_LDXR(isdw, src, reg), ctx); 852 if (imm == (BPF_ADD | BPF_FETCH)) 853 emit(A64_ADD(isdw, tmp2, src, ax), ctx); 854 else if (imm == (BPF_AND | BPF_FETCH)) 855 emit(A64_AND(isdw, tmp2, src, ax), ctx); 856 else if (imm == (BPF_OR | BPF_FETCH)) 857 emit(A64_ORR(isdw, tmp2, src, ax), ctx); 858 else 859 emit(A64_EOR(isdw, tmp2, src, ax), ctx); 860 emit(A64_STLXR(isdw, tmp2, reg, tmp3), ctx); 861 jmp_offset = -3; 862 check_imm19(jmp_offset); 863 emit(A64_CBNZ(0, tmp3, jmp_offset), ctx); 864 emit(A64_DMB_ISH, ctx); 865 } else if (imm == BPF_XCHG) { 866 /* src_reg = atomic_xchg(dst_reg + off, src_reg); */ 867 emit(A64_MOV(isdw, tmp2, src), ctx); 868 emit(A64_LDXR(isdw, src, reg), ctx); 869 emit(A64_STLXR(isdw, tmp2, reg, tmp3), ctx); 870 jmp_offset = -2; 871 check_imm19(jmp_offset); 872 emit(A64_CBNZ(0, tmp3, jmp_offset), ctx); 873 emit(A64_DMB_ISH, ctx); 874 } else if (imm == BPF_CMPXCHG) { 875 /* r0 = atomic_cmpxchg(dst_reg + off, r0, src_reg); */ 876 const u8 r0 = bpf2a64[BPF_REG_0]; 877 878 emit(A64_MOV(isdw, tmp2, r0), ctx); 879 emit(A64_LDXR(isdw, r0, reg), ctx); 880 emit(A64_EOR(isdw, tmp3, r0, tmp2), ctx); 881 jmp_offset = 4; 882 check_imm19(jmp_offset); 883 emit(A64_CBNZ(isdw, tmp3, jmp_offset), ctx); 884 emit(A64_STLXR(isdw, src, reg, tmp3), ctx); 885 jmp_offset = -4; 886 check_imm19(jmp_offset); 887 emit(A64_CBNZ(0, tmp3, jmp_offset), ctx); 888 emit(A64_DMB_ISH, ctx); 889 } else { 890 pr_err_once("unknown atomic op code %02x\n", imm); 891 return -EINVAL; 892 } 893 894 return 0; 895 } 896 897 void dummy_tramp(void); 898 899 asm ( 900 " .pushsection .text, \"ax\", @progbits\n" 901 " .global dummy_tramp\n" 902 " .type dummy_tramp, %function\n" 903 "dummy_tramp:" 904 #if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) 905 " bti j\n" /* dummy_tramp is called via "br x10" */ 906 #endif 907 " mov x10, x30\n" 908 " mov x30, x9\n" 909 " ret x10\n" 910 " .size dummy_tramp, .-dummy_tramp\n" 911 " .popsection\n" 912 ); 913 914 /* build a plt initialized like this: 915 * 916 * plt: 917 * ldr tmp, target 918 * br tmp 919 * target: 920 * .quad dummy_tramp 921 * 922 * when a long jump trampoline is attached, target is filled with the 923 * trampoline address, and when the trampoline is removed, target is 924 * restored to dummy_tramp address. 925 */ 926 static void build_plt(struct jit_ctx *ctx) 927 { 928 const u8 tmp = bpf2a64[TMP_REG_1]; 929 struct bpf_plt *plt = NULL; 930 931 /* make sure target is 64-bit aligned */ 932 if ((ctx->idx + PLT_TARGET_OFFSET / AARCH64_INSN_SIZE) % 2) 933 emit(A64_NOP, ctx); 934 935 plt = (struct bpf_plt *)(ctx->image + ctx->idx); 936 /* plt is called via bl, no BTI needed here */ 937 emit(A64_LDR64LIT(tmp, 2 * AARCH64_INSN_SIZE), ctx); 938 emit(A64_BR(tmp), ctx); 939 940 if (ctx->image) 941 plt->target = (u64)&dummy_tramp; 942 } 943 944 /* Clobbers BPF registers 1-4, aka x0-x3 */ 945 static void __maybe_unused build_bhb_mitigation(struct jit_ctx *ctx) 946 { 947 const u8 r1 = bpf2a64[BPF_REG_1]; /* aka x0 */ 948 u8 k = get_spectre_bhb_loop_value(); 949 950 if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY) || 951 cpu_mitigations_off() || __nospectre_bhb || 952 arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE) 953 return; 954 955 if (capable(CAP_SYS_ADMIN)) 956 return; 957 958 if (supports_clearbhb(SCOPE_SYSTEM)) { 959 emit(aarch64_insn_gen_hint(AARCH64_INSN_HINT_CLEARBHB), ctx); 960 return; 961 } 962 963 if (k) { 964 emit_a64_mov_i64(r1, k, ctx); 965 emit(A64_B(1), ctx); 966 emit(A64_SUBS_I(true, r1, r1, 1), ctx); 967 emit(A64_B_(A64_COND_NE, -2), ctx); 968 emit(aarch64_insn_gen_dsb(AARCH64_INSN_MB_ISH), ctx); 969 emit(aarch64_insn_get_isb_value(), ctx); 970 } 971 972 if (is_spectre_bhb_fw_mitigated()) { 973 emit(A64_ORR_I(false, r1, AARCH64_INSN_REG_ZR, 974 ARM_SMCCC_ARCH_WORKAROUND_3), ctx); 975 switch (arm_smccc_1_1_get_conduit()) { 976 case SMCCC_CONDUIT_HVC: 977 emit(aarch64_insn_get_hvc_value(), ctx); 978 break; 979 case SMCCC_CONDUIT_SMC: 980 emit(aarch64_insn_get_smc_value(), ctx); 981 break; 982 default: 983 pr_err_once("Firmware mitigation enabled with unknown conduit\n"); 984 } 985 } 986 } 987 988 static void build_epilogue(struct jit_ctx *ctx, bool was_classic) 989 { 990 const u8 r0 = bpf2a64[BPF_REG_0]; 991 const u8 ptr = bpf2a64[TCCNT_PTR]; 992 993 /* We're done with BPF stack */ 994 if (ctx->stack_size) 995 emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); 996 997 pop_callee_regs(ctx); 998 999 emit(A64_POP(A64_ZR, ptr, A64_SP), ctx); 1000 1001 if (was_classic) 1002 build_bhb_mitigation(ctx); 1003 1004 /* Restore FP/LR registers */ 1005 emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx); 1006 1007 /* Move the return value from bpf:r0 (aka x7) to x0 */ 1008 emit(A64_MOV(1, A64_R(0), r0), ctx); 1009 1010 /* Authenticate lr */ 1011 if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL)) 1012 emit(A64_AUTIASP, ctx); 1013 1014 emit(A64_RET(A64_LR), ctx); 1015 } 1016 1017 #define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0) 1018 #define BPF_FIXUP_REG_MASK GENMASK(31, 27) 1019 #define DONT_CLEAR 5 /* Unused ARM64 register from BPF's POV */ 1020 1021 bool ex_handler_bpf(const struct exception_table_entry *ex, 1022 struct pt_regs *regs) 1023 { 1024 off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup); 1025 int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup); 1026 1027 if (dst_reg != DONT_CLEAR) 1028 regs->regs[dst_reg] = 0; 1029 regs->pc = (unsigned long)&ex->fixup - offset; 1030 return true; 1031 } 1032 1033 /* For accesses to BTF pointers, add an entry to the exception table */ 1034 static int add_exception_handler(const struct bpf_insn *insn, 1035 struct jit_ctx *ctx, 1036 int dst_reg) 1037 { 1038 off_t ins_offset; 1039 off_t fixup_offset; 1040 unsigned long pc; 1041 struct exception_table_entry *ex; 1042 1043 if (!ctx->image) 1044 /* First pass */ 1045 return 0; 1046 1047 if (BPF_MODE(insn->code) != BPF_PROBE_MEM && 1048 BPF_MODE(insn->code) != BPF_PROBE_MEMSX && 1049 BPF_MODE(insn->code) != BPF_PROBE_MEM32 && 1050 BPF_MODE(insn->code) != BPF_PROBE_ATOMIC) 1051 return 0; 1052 1053 if (!ctx->prog->aux->extable || 1054 WARN_ON_ONCE(ctx->exentry_idx >= ctx->prog->aux->num_exentries)) 1055 return -EINVAL; 1056 1057 ex = &ctx->prog->aux->extable[ctx->exentry_idx]; 1058 pc = (unsigned long)&ctx->ro_image[ctx->idx - 1]; 1059 1060 /* 1061 * This is the relative offset of the instruction that may fault from 1062 * the exception table itself. This will be written to the exception 1063 * table and if this instruction faults, the destination register will 1064 * be set to '0' and the execution will jump to the next instruction. 1065 */ 1066 ins_offset = pc - (long)&ex->insn; 1067 if (WARN_ON_ONCE(ins_offset >= 0 || ins_offset < INT_MIN)) 1068 return -ERANGE; 1069 1070 /* 1071 * Since the extable follows the program, the fixup offset is always 1072 * negative and limited to BPF_JIT_REGION_SIZE. Store a positive value 1073 * to keep things simple, and put the destination register in the upper 1074 * bits. We don't need to worry about buildtime or runtime sort 1075 * modifying the upper bits because the table is already sorted, and 1076 * isn't part of the main exception table. 1077 * 1078 * The fixup_offset is set to the next instruction from the instruction 1079 * that may fault. The execution will jump to this after handling the 1080 * fault. 1081 */ 1082 fixup_offset = (long)&ex->fixup - (pc + AARCH64_INSN_SIZE); 1083 if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, fixup_offset)) 1084 return -ERANGE; 1085 1086 /* 1087 * The offsets above have been calculated using the RO buffer but we 1088 * need to use the R/W buffer for writes. 1089 * switch ex to rw buffer for writing. 1090 */ 1091 ex = (void *)ctx->image + ((void *)ex - (void *)ctx->ro_image); 1092 1093 ex->insn = ins_offset; 1094 1095 if (BPF_CLASS(insn->code) != BPF_LDX) 1096 dst_reg = DONT_CLEAR; 1097 1098 ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, fixup_offset) | 1099 FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg); 1100 1101 ex->type = EX_TYPE_BPF; 1102 1103 ctx->exentry_idx++; 1104 return 0; 1105 } 1106 1107 /* JITs an eBPF instruction. 1108 * Returns: 1109 * 0 - successfully JITed an 8-byte eBPF instruction. 1110 * >0 - successfully JITed a 16-byte eBPF instruction. 1111 * <0 - failed to JIT. 1112 */ 1113 static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, 1114 bool extra_pass) 1115 { 1116 const u8 code = insn->code; 1117 u8 dst = bpf2a64[insn->dst_reg]; 1118 u8 src = bpf2a64[insn->src_reg]; 1119 const u8 tmp = bpf2a64[TMP_REG_1]; 1120 const u8 tmp2 = bpf2a64[TMP_REG_2]; 1121 const u8 fp = bpf2a64[BPF_REG_FP]; 1122 const u8 arena_vm_base = bpf2a64[ARENA_VM_START]; 1123 const s16 off = insn->off; 1124 const s32 imm = insn->imm; 1125 const int i = insn - ctx->prog->insnsi; 1126 const bool is64 = BPF_CLASS(code) == BPF_ALU64 || 1127 BPF_CLASS(code) == BPF_JMP; 1128 u8 jmp_cond; 1129 s32 jmp_offset; 1130 u32 a64_insn; 1131 u8 src_adj; 1132 u8 dst_adj; 1133 int off_adj; 1134 int ret; 1135 bool sign_extend; 1136 1137 switch (code) { 1138 /* dst = src */ 1139 case BPF_ALU | BPF_MOV | BPF_X: 1140 case BPF_ALU64 | BPF_MOV | BPF_X: 1141 if (insn_is_cast_user(insn)) { 1142 emit(A64_MOV(0, tmp, src), ctx); // 32-bit mov clears the upper 32 bits 1143 emit_a64_mov_i(0, dst, ctx->user_vm_start >> 32, ctx); 1144 emit(A64_LSL(1, dst, dst, 32), ctx); 1145 emit(A64_CBZ(1, tmp, 2), ctx); 1146 emit(A64_ORR(1, tmp, dst, tmp), ctx); 1147 emit(A64_MOV(1, dst, tmp), ctx); 1148 break; 1149 } else if (insn_is_mov_percpu_addr(insn)) { 1150 if (dst != src) 1151 emit(A64_MOV(1, dst, src), ctx); 1152 if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN)) 1153 emit(A64_MRS_TPIDR_EL2(tmp), ctx); 1154 else 1155 emit(A64_MRS_TPIDR_EL1(tmp), ctx); 1156 emit(A64_ADD(1, dst, dst, tmp), ctx); 1157 break; 1158 } 1159 switch (insn->off) { 1160 case 0: 1161 emit(A64_MOV(is64, dst, src), ctx); 1162 break; 1163 case 8: 1164 emit(A64_SXTB(is64, dst, src), ctx); 1165 break; 1166 case 16: 1167 emit(A64_SXTH(is64, dst, src), ctx); 1168 break; 1169 case 32: 1170 emit(A64_SXTW(is64, dst, src), ctx); 1171 break; 1172 } 1173 break; 1174 /* dst = dst OP src */ 1175 case BPF_ALU | BPF_ADD | BPF_X: 1176 case BPF_ALU64 | BPF_ADD | BPF_X: 1177 emit(A64_ADD(is64, dst, dst, src), ctx); 1178 break; 1179 case BPF_ALU | BPF_SUB | BPF_X: 1180 case BPF_ALU64 | BPF_SUB | BPF_X: 1181 emit(A64_SUB(is64, dst, dst, src), ctx); 1182 break; 1183 case BPF_ALU | BPF_AND | BPF_X: 1184 case BPF_ALU64 | BPF_AND | BPF_X: 1185 emit(A64_AND(is64, dst, dst, src), ctx); 1186 break; 1187 case BPF_ALU | BPF_OR | BPF_X: 1188 case BPF_ALU64 | BPF_OR | BPF_X: 1189 emit(A64_ORR(is64, dst, dst, src), ctx); 1190 break; 1191 case BPF_ALU | BPF_XOR | BPF_X: 1192 case BPF_ALU64 | BPF_XOR | BPF_X: 1193 emit(A64_EOR(is64, dst, dst, src), ctx); 1194 break; 1195 case BPF_ALU | BPF_MUL | BPF_X: 1196 case BPF_ALU64 | BPF_MUL | BPF_X: 1197 emit(A64_MUL(is64, dst, dst, src), ctx); 1198 break; 1199 case BPF_ALU | BPF_DIV | BPF_X: 1200 case BPF_ALU64 | BPF_DIV | BPF_X: 1201 if (!off) 1202 emit(A64_UDIV(is64, dst, dst, src), ctx); 1203 else 1204 emit(A64_SDIV(is64, dst, dst, src), ctx); 1205 break; 1206 case BPF_ALU | BPF_MOD | BPF_X: 1207 case BPF_ALU64 | BPF_MOD | BPF_X: 1208 if (!off) 1209 emit(A64_UDIV(is64, tmp, dst, src), ctx); 1210 else 1211 emit(A64_SDIV(is64, tmp, dst, src), ctx); 1212 emit(A64_MSUB(is64, dst, dst, tmp, src), ctx); 1213 break; 1214 case BPF_ALU | BPF_LSH | BPF_X: 1215 case BPF_ALU64 | BPF_LSH | BPF_X: 1216 emit(A64_LSLV(is64, dst, dst, src), ctx); 1217 break; 1218 case BPF_ALU | BPF_RSH | BPF_X: 1219 case BPF_ALU64 | BPF_RSH | BPF_X: 1220 emit(A64_LSRV(is64, dst, dst, src), ctx); 1221 break; 1222 case BPF_ALU | BPF_ARSH | BPF_X: 1223 case BPF_ALU64 | BPF_ARSH | BPF_X: 1224 emit(A64_ASRV(is64, dst, dst, src), ctx); 1225 break; 1226 /* dst = -dst */ 1227 case BPF_ALU | BPF_NEG: 1228 case BPF_ALU64 | BPF_NEG: 1229 emit(A64_NEG(is64, dst, dst), ctx); 1230 break; 1231 /* dst = BSWAP##imm(dst) */ 1232 case BPF_ALU | BPF_END | BPF_FROM_LE: 1233 case BPF_ALU | BPF_END | BPF_FROM_BE: 1234 case BPF_ALU64 | BPF_END | BPF_FROM_LE: 1235 #ifdef CONFIG_CPU_BIG_ENDIAN 1236 if (BPF_CLASS(code) == BPF_ALU && BPF_SRC(code) == BPF_FROM_BE) 1237 goto emit_bswap_uxt; 1238 #else /* !CONFIG_CPU_BIG_ENDIAN */ 1239 if (BPF_CLASS(code) == BPF_ALU && BPF_SRC(code) == BPF_FROM_LE) 1240 goto emit_bswap_uxt; 1241 #endif 1242 switch (imm) { 1243 case 16: 1244 emit(A64_REV16(is64, dst, dst), ctx); 1245 /* zero-extend 16 bits into 64 bits */ 1246 emit(A64_UXTH(is64, dst, dst), ctx); 1247 break; 1248 case 32: 1249 emit(A64_REV32(0, dst, dst), ctx); 1250 /* upper 32 bits already cleared */ 1251 break; 1252 case 64: 1253 emit(A64_REV64(dst, dst), ctx); 1254 break; 1255 } 1256 break; 1257 emit_bswap_uxt: 1258 switch (imm) { 1259 case 16: 1260 /* zero-extend 16 bits into 64 bits */ 1261 emit(A64_UXTH(is64, dst, dst), ctx); 1262 break; 1263 case 32: 1264 /* zero-extend 32 bits into 64 bits */ 1265 emit(A64_UXTW(is64, dst, dst), ctx); 1266 break; 1267 case 64: 1268 /* nop */ 1269 break; 1270 } 1271 break; 1272 /* dst = imm */ 1273 case BPF_ALU | BPF_MOV | BPF_K: 1274 case BPF_ALU64 | BPF_MOV | BPF_K: 1275 emit_a64_mov_i(is64, dst, imm, ctx); 1276 break; 1277 /* dst = dst OP imm */ 1278 case BPF_ALU | BPF_ADD | BPF_K: 1279 case BPF_ALU64 | BPF_ADD | BPF_K: 1280 emit_a64_add_i(is64, dst, dst, tmp, imm, ctx); 1281 break; 1282 case BPF_ALU | BPF_SUB | BPF_K: 1283 case BPF_ALU64 | BPF_SUB | BPF_K: 1284 if (is_addsub_imm(imm)) { 1285 emit(A64_SUB_I(is64, dst, dst, imm), ctx); 1286 } else if (is_addsub_imm(-(u32)imm)) { 1287 emit(A64_ADD_I(is64, dst, dst, -imm), ctx); 1288 } else { 1289 emit_a64_mov_i(is64, tmp, imm, ctx); 1290 emit(A64_SUB(is64, dst, dst, tmp), ctx); 1291 } 1292 break; 1293 case BPF_ALU | BPF_AND | BPF_K: 1294 case BPF_ALU64 | BPF_AND | BPF_K: 1295 a64_insn = A64_AND_I(is64, dst, dst, imm); 1296 if (a64_insn != AARCH64_BREAK_FAULT) { 1297 emit(a64_insn, ctx); 1298 } else { 1299 emit_a64_mov_i(is64, tmp, imm, ctx); 1300 emit(A64_AND(is64, dst, dst, tmp), ctx); 1301 } 1302 break; 1303 case BPF_ALU | BPF_OR | BPF_K: 1304 case BPF_ALU64 | BPF_OR | BPF_K: 1305 a64_insn = A64_ORR_I(is64, dst, dst, imm); 1306 if (a64_insn != AARCH64_BREAK_FAULT) { 1307 emit(a64_insn, ctx); 1308 } else { 1309 emit_a64_mov_i(is64, tmp, imm, ctx); 1310 emit(A64_ORR(is64, dst, dst, tmp), ctx); 1311 } 1312 break; 1313 case BPF_ALU | BPF_XOR | BPF_K: 1314 case BPF_ALU64 | BPF_XOR | BPF_K: 1315 a64_insn = A64_EOR_I(is64, dst, dst, imm); 1316 if (a64_insn != AARCH64_BREAK_FAULT) { 1317 emit(a64_insn, ctx); 1318 } else { 1319 emit_a64_mov_i(is64, tmp, imm, ctx); 1320 emit(A64_EOR(is64, dst, dst, tmp), ctx); 1321 } 1322 break; 1323 case BPF_ALU | BPF_MUL | BPF_K: 1324 case BPF_ALU64 | BPF_MUL | BPF_K: 1325 emit_a64_mov_i(is64, tmp, imm, ctx); 1326 emit(A64_MUL(is64, dst, dst, tmp), ctx); 1327 break; 1328 case BPF_ALU | BPF_DIV | BPF_K: 1329 case BPF_ALU64 | BPF_DIV | BPF_K: 1330 emit_a64_mov_i(is64, tmp, imm, ctx); 1331 if (!off) 1332 emit(A64_UDIV(is64, dst, dst, tmp), ctx); 1333 else 1334 emit(A64_SDIV(is64, dst, dst, tmp), ctx); 1335 break; 1336 case BPF_ALU | BPF_MOD | BPF_K: 1337 case BPF_ALU64 | BPF_MOD | BPF_K: 1338 emit_a64_mov_i(is64, tmp2, imm, ctx); 1339 if (!off) 1340 emit(A64_UDIV(is64, tmp, dst, tmp2), ctx); 1341 else 1342 emit(A64_SDIV(is64, tmp, dst, tmp2), ctx); 1343 emit(A64_MSUB(is64, dst, dst, tmp, tmp2), ctx); 1344 break; 1345 case BPF_ALU | BPF_LSH | BPF_K: 1346 case BPF_ALU64 | BPF_LSH | BPF_K: 1347 emit(A64_LSL(is64, dst, dst, imm), ctx); 1348 break; 1349 case BPF_ALU | BPF_RSH | BPF_K: 1350 case BPF_ALU64 | BPF_RSH | BPF_K: 1351 emit(A64_LSR(is64, dst, dst, imm), ctx); 1352 break; 1353 case BPF_ALU | BPF_ARSH | BPF_K: 1354 case BPF_ALU64 | BPF_ARSH | BPF_K: 1355 emit(A64_ASR(is64, dst, dst, imm), ctx); 1356 break; 1357 1358 /* JUMP off */ 1359 case BPF_JMP | BPF_JA: 1360 case BPF_JMP32 | BPF_JA: 1361 if (BPF_CLASS(code) == BPF_JMP) 1362 jmp_offset = bpf2a64_offset(i, off, ctx); 1363 else 1364 jmp_offset = bpf2a64_offset(i, imm, ctx); 1365 check_imm26(jmp_offset); 1366 emit(A64_B(jmp_offset), ctx); 1367 break; 1368 /* IF (dst COND src) JUMP off */ 1369 case BPF_JMP | BPF_JEQ | BPF_X: 1370 case BPF_JMP | BPF_JGT | BPF_X: 1371 case BPF_JMP | BPF_JLT | BPF_X: 1372 case BPF_JMP | BPF_JGE | BPF_X: 1373 case BPF_JMP | BPF_JLE | BPF_X: 1374 case BPF_JMP | BPF_JNE | BPF_X: 1375 case BPF_JMP | BPF_JSGT | BPF_X: 1376 case BPF_JMP | BPF_JSLT | BPF_X: 1377 case BPF_JMP | BPF_JSGE | BPF_X: 1378 case BPF_JMP | BPF_JSLE | BPF_X: 1379 case BPF_JMP32 | BPF_JEQ | BPF_X: 1380 case BPF_JMP32 | BPF_JGT | BPF_X: 1381 case BPF_JMP32 | BPF_JLT | BPF_X: 1382 case BPF_JMP32 | BPF_JGE | BPF_X: 1383 case BPF_JMP32 | BPF_JLE | BPF_X: 1384 case BPF_JMP32 | BPF_JNE | BPF_X: 1385 case BPF_JMP32 | BPF_JSGT | BPF_X: 1386 case BPF_JMP32 | BPF_JSLT | BPF_X: 1387 case BPF_JMP32 | BPF_JSGE | BPF_X: 1388 case BPF_JMP32 | BPF_JSLE | BPF_X: 1389 emit(A64_CMP(is64, dst, src), ctx); 1390 emit_cond_jmp: 1391 jmp_offset = bpf2a64_offset(i, off, ctx); 1392 check_imm19(jmp_offset); 1393 switch (BPF_OP(code)) { 1394 case BPF_JEQ: 1395 jmp_cond = A64_COND_EQ; 1396 break; 1397 case BPF_JGT: 1398 jmp_cond = A64_COND_HI; 1399 break; 1400 case BPF_JLT: 1401 jmp_cond = A64_COND_CC; 1402 break; 1403 case BPF_JGE: 1404 jmp_cond = A64_COND_CS; 1405 break; 1406 case BPF_JLE: 1407 jmp_cond = A64_COND_LS; 1408 break; 1409 case BPF_JSET: 1410 case BPF_JNE: 1411 jmp_cond = A64_COND_NE; 1412 break; 1413 case BPF_JSGT: 1414 jmp_cond = A64_COND_GT; 1415 break; 1416 case BPF_JSLT: 1417 jmp_cond = A64_COND_LT; 1418 break; 1419 case BPF_JSGE: 1420 jmp_cond = A64_COND_GE; 1421 break; 1422 case BPF_JSLE: 1423 jmp_cond = A64_COND_LE; 1424 break; 1425 default: 1426 return -EFAULT; 1427 } 1428 emit(A64_B_(jmp_cond, jmp_offset), ctx); 1429 break; 1430 case BPF_JMP | BPF_JSET | BPF_X: 1431 case BPF_JMP32 | BPF_JSET | BPF_X: 1432 emit(A64_TST(is64, dst, src), ctx); 1433 goto emit_cond_jmp; 1434 /* IF (dst COND imm) JUMP off */ 1435 case BPF_JMP | BPF_JEQ | BPF_K: 1436 case BPF_JMP | BPF_JGT | BPF_K: 1437 case BPF_JMP | BPF_JLT | BPF_K: 1438 case BPF_JMP | BPF_JGE | BPF_K: 1439 case BPF_JMP | BPF_JLE | BPF_K: 1440 case BPF_JMP | BPF_JNE | BPF_K: 1441 case BPF_JMP | BPF_JSGT | BPF_K: 1442 case BPF_JMP | BPF_JSLT | BPF_K: 1443 case BPF_JMP | BPF_JSGE | BPF_K: 1444 case BPF_JMP | BPF_JSLE | BPF_K: 1445 case BPF_JMP32 | BPF_JEQ | BPF_K: 1446 case BPF_JMP32 | BPF_JGT | BPF_K: 1447 case BPF_JMP32 | BPF_JLT | BPF_K: 1448 case BPF_JMP32 | BPF_JGE | BPF_K: 1449 case BPF_JMP32 | BPF_JLE | BPF_K: 1450 case BPF_JMP32 | BPF_JNE | BPF_K: 1451 case BPF_JMP32 | BPF_JSGT | BPF_K: 1452 case BPF_JMP32 | BPF_JSLT | BPF_K: 1453 case BPF_JMP32 | BPF_JSGE | BPF_K: 1454 case BPF_JMP32 | BPF_JSLE | BPF_K: 1455 if (is_addsub_imm(imm)) { 1456 emit(A64_CMP_I(is64, dst, imm), ctx); 1457 } else if (is_addsub_imm(-(u32)imm)) { 1458 emit(A64_CMN_I(is64, dst, -imm), ctx); 1459 } else { 1460 emit_a64_mov_i(is64, tmp, imm, ctx); 1461 emit(A64_CMP(is64, dst, tmp), ctx); 1462 } 1463 goto emit_cond_jmp; 1464 case BPF_JMP | BPF_JSET | BPF_K: 1465 case BPF_JMP32 | BPF_JSET | BPF_K: 1466 a64_insn = A64_TST_I(is64, dst, imm); 1467 if (a64_insn != AARCH64_BREAK_FAULT) { 1468 emit(a64_insn, ctx); 1469 } else { 1470 emit_a64_mov_i(is64, tmp, imm, ctx); 1471 emit(A64_TST(is64, dst, tmp), ctx); 1472 } 1473 goto emit_cond_jmp; 1474 /* function call */ 1475 case BPF_JMP | BPF_CALL: 1476 { 1477 const u8 r0 = bpf2a64[BPF_REG_0]; 1478 bool func_addr_fixed; 1479 u64 func_addr; 1480 u32 cpu_offset; 1481 1482 /* Implement helper call to bpf_get_smp_processor_id() inline */ 1483 if (insn->src_reg == 0 && insn->imm == BPF_FUNC_get_smp_processor_id) { 1484 cpu_offset = offsetof(struct thread_info, cpu); 1485 1486 emit(A64_MRS_SP_EL0(tmp), ctx); 1487 if (is_lsi_offset(cpu_offset, 2)) { 1488 emit(A64_LDR32I(r0, tmp, cpu_offset), ctx); 1489 } else { 1490 emit_a64_mov_i(1, tmp2, cpu_offset, ctx); 1491 emit(A64_LDR32(r0, tmp, tmp2), ctx); 1492 } 1493 break; 1494 } 1495 1496 /* Implement helper call to bpf_get_current_task/_btf() inline */ 1497 if (insn->src_reg == 0 && (insn->imm == BPF_FUNC_get_current_task || 1498 insn->imm == BPF_FUNC_get_current_task_btf)) { 1499 emit(A64_MRS_SP_EL0(r0), ctx); 1500 break; 1501 } 1502 1503 ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, 1504 &func_addr, &func_addr_fixed); 1505 if (ret < 0) 1506 return ret; 1507 emit_call(func_addr, ctx); 1508 emit(A64_MOV(1, r0, A64_R(0)), ctx); 1509 break; 1510 } 1511 /* tail call */ 1512 case BPF_JMP | BPF_TAIL_CALL: 1513 if (emit_bpf_tail_call(ctx)) 1514 return -EFAULT; 1515 break; 1516 /* function return */ 1517 case BPF_JMP | BPF_EXIT: 1518 /* Optimization: when last instruction is EXIT, 1519 simply fallthrough to epilogue. */ 1520 if (i == ctx->prog->len - 1) 1521 break; 1522 jmp_offset = epilogue_offset(ctx); 1523 check_imm26(jmp_offset); 1524 emit(A64_B(jmp_offset), ctx); 1525 break; 1526 1527 /* dst = imm64 */ 1528 case BPF_LD | BPF_IMM | BPF_DW: 1529 { 1530 const struct bpf_insn insn1 = insn[1]; 1531 u64 imm64; 1532 1533 imm64 = (u64)insn1.imm << 32 | (u32)imm; 1534 if (bpf_pseudo_func(insn)) 1535 emit_addr_mov_i64(dst, imm64, ctx); 1536 else 1537 emit_a64_mov_i64(dst, imm64, ctx); 1538 1539 return 1; 1540 } 1541 1542 /* LDX: dst = (u64)*(unsigned size *)(src + off) */ 1543 case BPF_LDX | BPF_MEM | BPF_W: 1544 case BPF_LDX | BPF_MEM | BPF_H: 1545 case BPF_LDX | BPF_MEM | BPF_B: 1546 case BPF_LDX | BPF_MEM | BPF_DW: 1547 case BPF_LDX | BPF_PROBE_MEM | BPF_DW: 1548 case BPF_LDX | BPF_PROBE_MEM | BPF_W: 1549 case BPF_LDX | BPF_PROBE_MEM | BPF_H: 1550 case BPF_LDX | BPF_PROBE_MEM | BPF_B: 1551 /* LDXS: dst_reg = (s64)*(signed size *)(src_reg + off) */ 1552 case BPF_LDX | BPF_MEMSX | BPF_B: 1553 case BPF_LDX | BPF_MEMSX | BPF_H: 1554 case BPF_LDX | BPF_MEMSX | BPF_W: 1555 case BPF_LDX | BPF_PROBE_MEMSX | BPF_B: 1556 case BPF_LDX | BPF_PROBE_MEMSX | BPF_H: 1557 case BPF_LDX | BPF_PROBE_MEMSX | BPF_W: 1558 case BPF_LDX | BPF_PROBE_MEM32 | BPF_B: 1559 case BPF_LDX | BPF_PROBE_MEM32 | BPF_H: 1560 case BPF_LDX | BPF_PROBE_MEM32 | BPF_W: 1561 case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW: 1562 if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) { 1563 emit(A64_ADD(1, tmp2, src, arena_vm_base), ctx); 1564 src = tmp2; 1565 } 1566 if (src == fp) { 1567 src_adj = A64_SP; 1568 off_adj = off + ctx->stack_size; 1569 } else { 1570 src_adj = src; 1571 off_adj = off; 1572 } 1573 sign_extend = (BPF_MODE(insn->code) == BPF_MEMSX || 1574 BPF_MODE(insn->code) == BPF_PROBE_MEMSX); 1575 switch (BPF_SIZE(code)) { 1576 case BPF_W: 1577 if (is_lsi_offset(off_adj, 2)) { 1578 if (sign_extend) 1579 emit(A64_LDRSWI(dst, src_adj, off_adj), ctx); 1580 else 1581 emit(A64_LDR32I(dst, src_adj, off_adj), ctx); 1582 } else { 1583 emit_a64_mov_i(1, tmp, off, ctx); 1584 if (sign_extend) 1585 emit(A64_LDRSW(dst, src, tmp), ctx); 1586 else 1587 emit(A64_LDR32(dst, src, tmp), ctx); 1588 } 1589 break; 1590 case BPF_H: 1591 if (is_lsi_offset(off_adj, 1)) { 1592 if (sign_extend) 1593 emit(A64_LDRSHI(dst, src_adj, off_adj), ctx); 1594 else 1595 emit(A64_LDRHI(dst, src_adj, off_adj), ctx); 1596 } else { 1597 emit_a64_mov_i(1, tmp, off, ctx); 1598 if (sign_extend) 1599 emit(A64_LDRSH(dst, src, tmp), ctx); 1600 else 1601 emit(A64_LDRH(dst, src, tmp), ctx); 1602 } 1603 break; 1604 case BPF_B: 1605 if (is_lsi_offset(off_adj, 0)) { 1606 if (sign_extend) 1607 emit(A64_LDRSBI(dst, src_adj, off_adj), ctx); 1608 else 1609 emit(A64_LDRBI(dst, src_adj, off_adj), ctx); 1610 } else { 1611 emit_a64_mov_i(1, tmp, off, ctx); 1612 if (sign_extend) 1613 emit(A64_LDRSB(dst, src, tmp), ctx); 1614 else 1615 emit(A64_LDRB(dst, src, tmp), ctx); 1616 } 1617 break; 1618 case BPF_DW: 1619 if (is_lsi_offset(off_adj, 3)) { 1620 emit(A64_LDR64I(dst, src_adj, off_adj), ctx); 1621 } else { 1622 emit_a64_mov_i(1, tmp, off, ctx); 1623 emit(A64_LDR64(dst, src, tmp), ctx); 1624 } 1625 break; 1626 } 1627 1628 ret = add_exception_handler(insn, ctx, dst); 1629 if (ret) 1630 return ret; 1631 break; 1632 1633 /* speculation barrier */ 1634 case BPF_ST | BPF_NOSPEC: 1635 /* 1636 * Nothing required here. 1637 * 1638 * In case of arm64, we rely on the firmware mitigation of 1639 * Speculative Store Bypass as controlled via the ssbd kernel 1640 * parameter. Whenever the mitigation is enabled, it works 1641 * for all of the kernel code with no need to provide any 1642 * additional instructions. 1643 */ 1644 break; 1645 1646 /* ST: *(size *)(dst + off) = imm */ 1647 case BPF_ST | BPF_MEM | BPF_W: 1648 case BPF_ST | BPF_MEM | BPF_H: 1649 case BPF_ST | BPF_MEM | BPF_B: 1650 case BPF_ST | BPF_MEM | BPF_DW: 1651 case BPF_ST | BPF_PROBE_MEM32 | BPF_B: 1652 case BPF_ST | BPF_PROBE_MEM32 | BPF_H: 1653 case BPF_ST | BPF_PROBE_MEM32 | BPF_W: 1654 case BPF_ST | BPF_PROBE_MEM32 | BPF_DW: 1655 if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) { 1656 emit(A64_ADD(1, tmp2, dst, arena_vm_base), ctx); 1657 dst = tmp2; 1658 } 1659 if (dst == fp) { 1660 dst_adj = A64_SP; 1661 off_adj = off + ctx->stack_size; 1662 } else { 1663 dst_adj = dst; 1664 off_adj = off; 1665 } 1666 /* Load imm to a register then store it */ 1667 emit_a64_mov_i(1, tmp, imm, ctx); 1668 switch (BPF_SIZE(code)) { 1669 case BPF_W: 1670 if (is_lsi_offset(off_adj, 2)) { 1671 emit(A64_STR32I(tmp, dst_adj, off_adj), ctx); 1672 } else { 1673 emit_a64_mov_i(1, tmp2, off, ctx); 1674 emit(A64_STR32(tmp, dst, tmp2), ctx); 1675 } 1676 break; 1677 case BPF_H: 1678 if (is_lsi_offset(off_adj, 1)) { 1679 emit(A64_STRHI(tmp, dst_adj, off_adj), ctx); 1680 } else { 1681 emit_a64_mov_i(1, tmp2, off, ctx); 1682 emit(A64_STRH(tmp, dst, tmp2), ctx); 1683 } 1684 break; 1685 case BPF_B: 1686 if (is_lsi_offset(off_adj, 0)) { 1687 emit(A64_STRBI(tmp, dst_adj, off_adj), ctx); 1688 } else { 1689 emit_a64_mov_i(1, tmp2, off, ctx); 1690 emit(A64_STRB(tmp, dst, tmp2), ctx); 1691 } 1692 break; 1693 case BPF_DW: 1694 if (is_lsi_offset(off_adj, 3)) { 1695 emit(A64_STR64I(tmp, dst_adj, off_adj), ctx); 1696 } else { 1697 emit_a64_mov_i(1, tmp2, off, ctx); 1698 emit(A64_STR64(tmp, dst, tmp2), ctx); 1699 } 1700 break; 1701 } 1702 1703 ret = add_exception_handler(insn, ctx, dst); 1704 if (ret) 1705 return ret; 1706 break; 1707 1708 /* STX: *(size *)(dst + off) = src */ 1709 case BPF_STX | BPF_MEM | BPF_W: 1710 case BPF_STX | BPF_MEM | BPF_H: 1711 case BPF_STX | BPF_MEM | BPF_B: 1712 case BPF_STX | BPF_MEM | BPF_DW: 1713 case BPF_STX | BPF_PROBE_MEM32 | BPF_B: 1714 case BPF_STX | BPF_PROBE_MEM32 | BPF_H: 1715 case BPF_STX | BPF_PROBE_MEM32 | BPF_W: 1716 case BPF_STX | BPF_PROBE_MEM32 | BPF_DW: 1717 if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) { 1718 emit(A64_ADD(1, tmp2, dst, arena_vm_base), ctx); 1719 dst = tmp2; 1720 } 1721 if (dst == fp) { 1722 dst_adj = A64_SP; 1723 off_adj = off + ctx->stack_size; 1724 } else { 1725 dst_adj = dst; 1726 off_adj = off; 1727 } 1728 switch (BPF_SIZE(code)) { 1729 case BPF_W: 1730 if (is_lsi_offset(off_adj, 2)) { 1731 emit(A64_STR32I(src, dst_adj, off_adj), ctx); 1732 } else { 1733 emit_a64_mov_i(1, tmp, off, ctx); 1734 emit(A64_STR32(src, dst, tmp), ctx); 1735 } 1736 break; 1737 case BPF_H: 1738 if (is_lsi_offset(off_adj, 1)) { 1739 emit(A64_STRHI(src, dst_adj, off_adj), ctx); 1740 } else { 1741 emit_a64_mov_i(1, tmp, off, ctx); 1742 emit(A64_STRH(src, dst, tmp), ctx); 1743 } 1744 break; 1745 case BPF_B: 1746 if (is_lsi_offset(off_adj, 0)) { 1747 emit(A64_STRBI(src, dst_adj, off_adj), ctx); 1748 } else { 1749 emit_a64_mov_i(1, tmp, off, ctx); 1750 emit(A64_STRB(src, dst, tmp), ctx); 1751 } 1752 break; 1753 case BPF_DW: 1754 if (is_lsi_offset(off_adj, 3)) { 1755 emit(A64_STR64I(src, dst_adj, off_adj), ctx); 1756 } else { 1757 emit_a64_mov_i(1, tmp, off, ctx); 1758 emit(A64_STR64(src, dst, tmp), ctx); 1759 } 1760 break; 1761 } 1762 1763 ret = add_exception_handler(insn, ctx, dst); 1764 if (ret) 1765 return ret; 1766 break; 1767 1768 case BPF_STX | BPF_ATOMIC | BPF_B: 1769 case BPF_STX | BPF_ATOMIC | BPF_H: 1770 case BPF_STX | BPF_ATOMIC | BPF_W: 1771 case BPF_STX | BPF_ATOMIC | BPF_DW: 1772 case BPF_STX | BPF_PROBE_ATOMIC | BPF_B: 1773 case BPF_STX | BPF_PROBE_ATOMIC | BPF_H: 1774 case BPF_STX | BPF_PROBE_ATOMIC | BPF_W: 1775 case BPF_STX | BPF_PROBE_ATOMIC | BPF_DW: 1776 if (bpf_atomic_is_load_store(insn)) 1777 ret = emit_atomic_ld_st(insn, ctx); 1778 else if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) 1779 ret = emit_lse_atomic(insn, ctx); 1780 else 1781 ret = emit_ll_sc_atomic(insn, ctx); 1782 if (ret) 1783 return ret; 1784 1785 ret = add_exception_handler(insn, ctx, dst); 1786 if (ret) 1787 return ret; 1788 break; 1789 1790 default: 1791 pr_err_once("unknown opcode %02x\n", code); 1792 return -EINVAL; 1793 } 1794 1795 return 0; 1796 } 1797 1798 static int build_body(struct jit_ctx *ctx, bool extra_pass) 1799 { 1800 const struct bpf_prog *prog = ctx->prog; 1801 int i; 1802 1803 /* 1804 * - offset[0] offset of the end of prologue, 1805 * start of the 1st instruction. 1806 * - offset[1] - offset of the end of 1st instruction, 1807 * start of the 2nd instruction 1808 * [....] 1809 * - offset[3] - offset of the end of 3rd instruction, 1810 * start of 4th instruction 1811 */ 1812 for (i = 0; i < prog->len; i++) { 1813 const struct bpf_insn *insn = &prog->insnsi[i]; 1814 int ret; 1815 1816 ctx->offset[i] = ctx->idx; 1817 ret = build_insn(insn, ctx, extra_pass); 1818 if (ret > 0) { 1819 i++; 1820 ctx->offset[i] = ctx->idx; 1821 continue; 1822 } 1823 if (ret) 1824 return ret; 1825 } 1826 /* 1827 * offset is allocated with prog->len + 1 so fill in 1828 * the last element with the offset after the last 1829 * instruction (end of program) 1830 */ 1831 ctx->offset[i] = ctx->idx; 1832 1833 return 0; 1834 } 1835 1836 static int validate_code(struct jit_ctx *ctx) 1837 { 1838 int i; 1839 1840 for (i = 0; i < ctx->idx; i++) { 1841 u32 a64_insn = le32_to_cpu(ctx->image[i]); 1842 1843 if (a64_insn == AARCH64_BREAK_FAULT) 1844 return -1; 1845 } 1846 return 0; 1847 } 1848 1849 static int validate_ctx(struct jit_ctx *ctx) 1850 { 1851 if (validate_code(ctx)) 1852 return -1; 1853 1854 if (WARN_ON_ONCE(ctx->exentry_idx != ctx->prog->aux->num_exentries)) 1855 return -1; 1856 1857 return 0; 1858 } 1859 1860 static inline void bpf_flush_icache(void *start, void *end) 1861 { 1862 flush_icache_range((unsigned long)start, (unsigned long)end); 1863 } 1864 1865 struct arm64_jit_data { 1866 struct bpf_binary_header *header; 1867 u8 *ro_image; 1868 struct bpf_binary_header *ro_header; 1869 struct jit_ctx ctx; 1870 }; 1871 1872 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) 1873 { 1874 int image_size, prog_size, extable_size, extable_align, extable_offset; 1875 struct bpf_prog *tmp, *orig_prog = prog; 1876 struct bpf_binary_header *header; 1877 struct bpf_binary_header *ro_header; 1878 struct arm64_jit_data *jit_data; 1879 bool was_classic = bpf_prog_was_classic(prog); 1880 bool tmp_blinded = false; 1881 bool extra_pass = false; 1882 struct jit_ctx ctx; 1883 u8 *image_ptr; 1884 u8 *ro_image_ptr; 1885 int body_idx; 1886 int exentry_idx; 1887 1888 if (!prog->jit_requested) 1889 return orig_prog; 1890 1891 tmp = bpf_jit_blind_constants(prog); 1892 /* If blinding was requested and we failed during blinding, 1893 * we must fall back to the interpreter. 1894 */ 1895 if (IS_ERR(tmp)) 1896 return orig_prog; 1897 if (tmp != prog) { 1898 tmp_blinded = true; 1899 prog = tmp; 1900 } 1901 1902 jit_data = prog->aux->jit_data; 1903 if (!jit_data) { 1904 jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); 1905 if (!jit_data) { 1906 prog = orig_prog; 1907 goto out; 1908 } 1909 prog->aux->jit_data = jit_data; 1910 } 1911 if (jit_data->ctx.offset) { 1912 ctx = jit_data->ctx; 1913 ro_image_ptr = jit_data->ro_image; 1914 ro_header = jit_data->ro_header; 1915 header = jit_data->header; 1916 image_ptr = (void *)header + ((void *)ro_image_ptr 1917 - (void *)ro_header); 1918 extra_pass = true; 1919 prog_size = sizeof(u32) * ctx.idx; 1920 goto skip_init_ctx; 1921 } 1922 memset(&ctx, 0, sizeof(ctx)); 1923 ctx.prog = prog; 1924 1925 ctx.offset = kvcalloc(prog->len + 1, sizeof(int), GFP_KERNEL); 1926 if (ctx.offset == NULL) { 1927 prog = orig_prog; 1928 goto out_off; 1929 } 1930 1931 ctx.user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena); 1932 ctx.arena_vm_start = bpf_arena_get_kern_vm_start(prog->aux->arena); 1933 1934 /* Pass 1: Estimate the maximum image size. 1935 * 1936 * BPF line info needs ctx->offset[i] to be the offset of 1937 * instruction[i] in jited image, so build prologue first. 1938 */ 1939 if (build_prologue(&ctx, was_classic)) { 1940 prog = orig_prog; 1941 goto out_off; 1942 } 1943 1944 if (build_body(&ctx, extra_pass)) { 1945 prog = orig_prog; 1946 goto out_off; 1947 } 1948 1949 ctx.epilogue_offset = ctx.idx; 1950 build_epilogue(&ctx, was_classic); 1951 build_plt(&ctx); 1952 1953 extable_align = __alignof__(struct exception_table_entry); 1954 extable_size = prog->aux->num_exentries * 1955 sizeof(struct exception_table_entry); 1956 1957 /* Now we know the maximum image size. */ 1958 prog_size = sizeof(u32) * ctx.idx; 1959 /* also allocate space for plt target */ 1960 extable_offset = round_up(prog_size + PLT_TARGET_SIZE, extable_align); 1961 image_size = extable_offset + extable_size; 1962 ro_header = bpf_jit_binary_pack_alloc(image_size, &ro_image_ptr, 1963 sizeof(u32), &header, &image_ptr, 1964 jit_fill_hole); 1965 if (!ro_header) { 1966 prog = orig_prog; 1967 goto out_off; 1968 } 1969 1970 /* Pass 2: Determine jited position and result for each instruction */ 1971 1972 /* 1973 * Use the image(RW) for writing the JITed instructions. But also save 1974 * the ro_image(RX) for calculating the offsets in the image. The RW 1975 * image will be later copied to the RX image from where the program 1976 * will run. The bpf_jit_binary_pack_finalize() will do this copy in the 1977 * final step. 1978 */ 1979 ctx.image = (__le32 *)image_ptr; 1980 ctx.ro_image = (__le32 *)ro_image_ptr; 1981 if (extable_size) 1982 prog->aux->extable = (void *)ro_image_ptr + extable_offset; 1983 skip_init_ctx: 1984 ctx.idx = 0; 1985 ctx.exentry_idx = 0; 1986 ctx.write = true; 1987 1988 build_prologue(&ctx, was_classic); 1989 1990 /* Record exentry_idx and body_idx before first build_body */ 1991 exentry_idx = ctx.exentry_idx; 1992 body_idx = ctx.idx; 1993 /* Dont write body instructions to memory for now */ 1994 ctx.write = false; 1995 1996 if (build_body(&ctx, extra_pass)) { 1997 prog = orig_prog; 1998 goto out_free_hdr; 1999 } 2000 2001 ctx.epilogue_offset = ctx.idx; 2002 ctx.exentry_idx = exentry_idx; 2003 ctx.idx = body_idx; 2004 ctx.write = true; 2005 2006 /* Pass 3: Adjust jump offset and write final image */ 2007 if (build_body(&ctx, extra_pass) || 2008 WARN_ON_ONCE(ctx.idx != ctx.epilogue_offset)) { 2009 prog = orig_prog; 2010 goto out_free_hdr; 2011 } 2012 2013 build_epilogue(&ctx, was_classic); 2014 build_plt(&ctx); 2015 2016 /* Extra pass to validate JITed code. */ 2017 if (validate_ctx(&ctx)) { 2018 prog = orig_prog; 2019 goto out_free_hdr; 2020 } 2021 2022 /* update the real prog size */ 2023 prog_size = sizeof(u32) * ctx.idx; 2024 2025 /* And we're done. */ 2026 if (bpf_jit_enable > 1) 2027 bpf_jit_dump(prog->len, prog_size, 2, ctx.image); 2028 2029 if (!prog->is_func || extra_pass) { 2030 /* The jited image may shrink since the jited result for 2031 * BPF_CALL to subprog may be changed from indirect call 2032 * to direct call. 2033 */ 2034 if (extra_pass && ctx.idx > jit_data->ctx.idx) { 2035 pr_err_once("multi-func JIT bug %d > %d\n", 2036 ctx.idx, jit_data->ctx.idx); 2037 prog->bpf_func = NULL; 2038 prog->jited = 0; 2039 prog->jited_len = 0; 2040 goto out_free_hdr; 2041 } 2042 if (WARN_ON(bpf_jit_binary_pack_finalize(ro_header, header))) { 2043 /* ro_header has been freed */ 2044 ro_header = NULL; 2045 prog = orig_prog; 2046 goto out_off; 2047 } 2048 /* 2049 * The instructions have now been copied to the ROX region from 2050 * where they will execute. Now the data cache has to be cleaned to 2051 * the PoU and the I-cache has to be invalidated for the VAs. 2052 */ 2053 bpf_flush_icache(ro_header, ctx.ro_image + ctx.idx); 2054 } else { 2055 jit_data->ctx = ctx; 2056 jit_data->ro_image = ro_image_ptr; 2057 jit_data->header = header; 2058 jit_data->ro_header = ro_header; 2059 } 2060 2061 prog->bpf_func = (void *)ctx.ro_image; 2062 prog->jited = 1; 2063 prog->jited_len = prog_size; 2064 2065 if (!prog->is_func || extra_pass) { 2066 int i; 2067 2068 /* offset[prog->len] is the size of program */ 2069 for (i = 0; i <= prog->len; i++) 2070 ctx.offset[i] *= AARCH64_INSN_SIZE; 2071 bpf_prog_fill_jited_linfo(prog, ctx.offset + 1); 2072 out_off: 2073 kvfree(ctx.offset); 2074 kfree(jit_data); 2075 prog->aux->jit_data = NULL; 2076 } 2077 out: 2078 if (tmp_blinded) 2079 bpf_jit_prog_release_other(prog, prog == orig_prog ? 2080 tmp : orig_prog); 2081 return prog; 2082 2083 out_free_hdr: 2084 if (header) { 2085 bpf_arch_text_copy(&ro_header->size, &header->size, 2086 sizeof(header->size)); 2087 bpf_jit_binary_pack_free(ro_header, header); 2088 } 2089 goto out_off; 2090 } 2091 2092 bool bpf_jit_supports_kfunc_call(void) 2093 { 2094 return true; 2095 } 2096 2097 void *bpf_arch_text_copy(void *dst, void *src, size_t len) 2098 { 2099 if (!aarch64_insn_copy(dst, src, len)) 2100 return ERR_PTR(-EINVAL); 2101 return dst; 2102 } 2103 2104 u64 bpf_jit_alloc_exec_limit(void) 2105 { 2106 return VMALLOC_END - VMALLOC_START; 2107 } 2108 2109 /* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */ 2110 bool bpf_jit_supports_subprog_tailcalls(void) 2111 { 2112 return true; 2113 } 2114 2115 static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l, 2116 int bargs_off, int retval_off, int run_ctx_off, 2117 bool save_ret) 2118 { 2119 __le32 *branch; 2120 u64 enter_prog; 2121 u64 exit_prog; 2122 struct bpf_prog *p = l->link.prog; 2123 int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie); 2124 2125 enter_prog = (u64)bpf_trampoline_enter(p); 2126 exit_prog = (u64)bpf_trampoline_exit(p); 2127 2128 if (l->cookie == 0) { 2129 /* if cookie is zero, one instruction is enough to store it */ 2130 emit(A64_STR64I(A64_ZR, A64_SP, run_ctx_off + cookie_off), ctx); 2131 } else { 2132 emit_a64_mov_i64(A64_R(10), l->cookie, ctx); 2133 emit(A64_STR64I(A64_R(10), A64_SP, run_ctx_off + cookie_off), 2134 ctx); 2135 } 2136 2137 /* save p to callee saved register x19 to avoid loading p with mov_i64 2138 * each time. 2139 */ 2140 emit_addr_mov_i64(A64_R(19), (const u64)p, ctx); 2141 2142 /* arg1: prog */ 2143 emit(A64_MOV(1, A64_R(0), A64_R(19)), ctx); 2144 /* arg2: &run_ctx */ 2145 emit(A64_ADD_I(1, A64_R(1), A64_SP, run_ctx_off), ctx); 2146 2147 emit_call(enter_prog, ctx); 2148 2149 /* save return value to callee saved register x20 */ 2150 emit(A64_MOV(1, A64_R(20), A64_R(0)), ctx); 2151 2152 /* if (__bpf_prog_enter(prog) == 0) 2153 * goto skip_exec_of_prog; 2154 */ 2155 branch = ctx->image + ctx->idx; 2156 emit(A64_NOP, ctx); 2157 2158 emit(A64_ADD_I(1, A64_R(0), A64_SP, bargs_off), ctx); 2159 if (!p->jited) 2160 emit_addr_mov_i64(A64_R(1), (const u64)p->insnsi, ctx); 2161 2162 emit_call((const u64)p->bpf_func, ctx); 2163 2164 if (save_ret) 2165 emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx); 2166 2167 if (ctx->image) { 2168 int offset = &ctx->image[ctx->idx] - branch; 2169 *branch = cpu_to_le32(A64_CBZ(1, A64_R(0), offset)); 2170 } 2171 2172 /* arg1: prog */ 2173 emit(A64_MOV(1, A64_R(0), A64_R(19)), ctx); 2174 /* arg2: start time */ 2175 emit(A64_MOV(1, A64_R(1), A64_R(20)), ctx); 2176 /* arg3: &run_ctx */ 2177 emit(A64_ADD_I(1, A64_R(2), A64_SP, run_ctx_off), ctx); 2178 2179 emit_call(exit_prog, ctx); 2180 } 2181 2182 static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl, 2183 int bargs_off, int retval_off, int run_ctx_off, 2184 __le32 **branches) 2185 { 2186 int i; 2187 2188 /* The first fmod_ret program will receive a garbage return value. 2189 * Set this to 0 to avoid confusing the program. 2190 */ 2191 emit(A64_STR64I(A64_ZR, A64_SP, retval_off), ctx); 2192 for (i = 0; i < tl->nr_links; i++) { 2193 invoke_bpf_prog(ctx, tl->links[i], bargs_off, retval_off, 2194 run_ctx_off, true); 2195 /* if (*(u64 *)(sp + retval_off) != 0) 2196 * goto do_fexit; 2197 */ 2198 emit(A64_LDR64I(A64_R(10), A64_SP, retval_off), ctx); 2199 /* Save the location of branch, and generate a nop. 2200 * This nop will be replaced with a cbnz later. 2201 */ 2202 branches[i] = ctx->image + ctx->idx; 2203 emit(A64_NOP, ctx); 2204 } 2205 } 2206 2207 struct arg_aux { 2208 /* how many args are passed through registers, the rest of the args are 2209 * passed through stack 2210 */ 2211 int args_in_regs; 2212 /* how many registers are used to pass arguments */ 2213 int regs_for_args; 2214 /* how much stack is used for additional args passed to bpf program 2215 * that did not fit in original function registers 2216 */ 2217 int bstack_for_args; 2218 /* home much stack is used for additional args passed to the 2219 * original function when called from trampoline (this one needs 2220 * arguments to be properly aligned) 2221 */ 2222 int ostack_for_args; 2223 }; 2224 2225 static int calc_arg_aux(const struct btf_func_model *m, 2226 struct arg_aux *a) 2227 { 2228 int stack_slots, nregs, slots, i; 2229 2230 /* verifier ensures m->nr_args <= MAX_BPF_FUNC_ARGS */ 2231 for (i = 0, nregs = 0; i < m->nr_args; i++) { 2232 slots = (m->arg_size[i] + 7) / 8; 2233 if (nregs + slots <= 8) /* passed through register ? */ 2234 nregs += slots; 2235 else 2236 break; 2237 } 2238 2239 a->args_in_regs = i; 2240 a->regs_for_args = nregs; 2241 a->ostack_for_args = 0; 2242 a->bstack_for_args = 0; 2243 2244 /* the rest arguments are passed through stack */ 2245 for (; i < m->nr_args; i++) { 2246 /* We can not know for sure about exact alignment needs for 2247 * struct passed on stack, so deny those 2248 */ 2249 if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG) 2250 return -ENOTSUPP; 2251 stack_slots = (m->arg_size[i] + 7) / 8; 2252 a->bstack_for_args += stack_slots * 8; 2253 a->ostack_for_args = a->ostack_for_args + stack_slots * 8; 2254 } 2255 2256 return 0; 2257 } 2258 2259 static void clear_garbage(struct jit_ctx *ctx, int reg, int effective_bytes) 2260 { 2261 if (effective_bytes) { 2262 int garbage_bits = 64 - 8 * effective_bytes; 2263 #ifdef CONFIG_CPU_BIG_ENDIAN 2264 /* garbage bits are at the right end */ 2265 emit(A64_LSR(1, reg, reg, garbage_bits), ctx); 2266 emit(A64_LSL(1, reg, reg, garbage_bits), ctx); 2267 #else 2268 /* garbage bits are at the left end */ 2269 emit(A64_LSL(1, reg, reg, garbage_bits), ctx); 2270 emit(A64_LSR(1, reg, reg, garbage_bits), ctx); 2271 #endif 2272 } 2273 } 2274 2275 static void save_args(struct jit_ctx *ctx, int bargs_off, int oargs_off, 2276 const struct btf_func_model *m, 2277 const struct arg_aux *a, 2278 bool for_call_origin) 2279 { 2280 int i; 2281 int reg; 2282 int doff; 2283 int soff; 2284 int slots; 2285 u8 tmp = bpf2a64[TMP_REG_1]; 2286 2287 /* store arguments to the stack for the bpf program, or restore 2288 * arguments from stack for the original function 2289 */ 2290 for (reg = 0; reg < a->regs_for_args; reg++) { 2291 emit(for_call_origin ? 2292 A64_LDR64I(reg, A64_SP, bargs_off) : 2293 A64_STR64I(reg, A64_SP, bargs_off), 2294 ctx); 2295 bargs_off += 8; 2296 } 2297 2298 soff = 32; /* on stack arguments start from FP + 32 */ 2299 doff = (for_call_origin ? oargs_off : bargs_off); 2300 2301 /* save on stack arguments */ 2302 for (i = a->args_in_regs; i < m->nr_args; i++) { 2303 slots = (m->arg_size[i] + 7) / 8; 2304 /* verifier ensures arg_size <= 16, so slots equals 1 or 2 */ 2305 while (slots-- > 0) { 2306 emit(A64_LDR64I(tmp, A64_FP, soff), ctx); 2307 /* if there is unused space in the last slot, clear 2308 * the garbage contained in the space. 2309 */ 2310 if (slots == 0 && !for_call_origin) 2311 clear_garbage(ctx, tmp, m->arg_size[i] % 8); 2312 emit(A64_STR64I(tmp, A64_SP, doff), ctx); 2313 soff += 8; 2314 doff += 8; 2315 } 2316 } 2317 } 2318 2319 static void restore_args(struct jit_ctx *ctx, int bargs_off, int nregs) 2320 { 2321 int reg; 2322 2323 for (reg = 0; reg < nregs; reg++) { 2324 emit(A64_LDR64I(reg, A64_SP, bargs_off), ctx); 2325 bargs_off += 8; 2326 } 2327 } 2328 2329 static bool is_struct_ops_tramp(const struct bpf_tramp_links *fentry_links) 2330 { 2331 return fentry_links->nr_links == 1 && 2332 fentry_links->links[0]->link.type == BPF_LINK_TYPE_STRUCT_OPS; 2333 } 2334 2335 /* Based on the x86's implementation of arch_prepare_bpf_trampoline(). 2336 * 2337 * bpf prog and function entry before bpf trampoline hooked: 2338 * mov x9, lr 2339 * nop 2340 * 2341 * bpf prog and function entry after bpf trampoline hooked: 2342 * mov x9, lr 2343 * bl <bpf_trampoline or plt> 2344 * 2345 */ 2346 static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, 2347 struct bpf_tramp_links *tlinks, void *func_addr, 2348 const struct btf_func_model *m, 2349 const struct arg_aux *a, 2350 u32 flags) 2351 { 2352 int i; 2353 int stack_size; 2354 int retaddr_off; 2355 int regs_off; 2356 int retval_off; 2357 int bargs_off; 2358 int nfuncargs_off; 2359 int ip_off; 2360 int run_ctx_off; 2361 int oargs_off; 2362 int nfuncargs; 2363 struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; 2364 struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; 2365 struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; 2366 bool save_ret; 2367 __le32 **branches = NULL; 2368 bool is_struct_ops = is_struct_ops_tramp(fentry); 2369 2370 /* trampoline stack layout: 2371 * [ parent ip ] 2372 * [ FP ] 2373 * SP + retaddr_off [ self ip ] 2374 * [ FP ] 2375 * 2376 * [ padding ] align SP to multiples of 16 2377 * 2378 * [ x20 ] callee saved reg x20 2379 * SP + regs_off [ x19 ] callee saved reg x19 2380 * 2381 * SP + retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or 2382 * BPF_TRAMP_F_RET_FENTRY_RET 2383 * [ arg reg N ] 2384 * [ ... ] 2385 * SP + bargs_off [ arg reg 1 ] for bpf 2386 * 2387 * SP + nfuncargs_off [ arg regs count ] 2388 * 2389 * SP + ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag 2390 * 2391 * SP + run_ctx_off [ bpf_tramp_run_ctx ] 2392 * 2393 * [ stack arg N ] 2394 * [ ... ] 2395 * SP + oargs_off [ stack arg 1 ] for original func 2396 */ 2397 2398 stack_size = 0; 2399 oargs_off = stack_size; 2400 if (flags & BPF_TRAMP_F_CALL_ORIG) 2401 stack_size += a->ostack_for_args; 2402 2403 run_ctx_off = stack_size; 2404 /* room for bpf_tramp_run_ctx */ 2405 stack_size += round_up(sizeof(struct bpf_tramp_run_ctx), 8); 2406 2407 ip_off = stack_size; 2408 /* room for IP address argument */ 2409 if (flags & BPF_TRAMP_F_IP_ARG) 2410 stack_size += 8; 2411 2412 nfuncargs_off = stack_size; 2413 /* room for args count */ 2414 stack_size += 8; 2415 2416 bargs_off = stack_size; 2417 /* room for args */ 2418 nfuncargs = a->regs_for_args + a->bstack_for_args / 8; 2419 stack_size += 8 * nfuncargs; 2420 2421 /* room for return value */ 2422 retval_off = stack_size; 2423 save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET); 2424 if (save_ret) 2425 stack_size += 8; 2426 2427 /* room for callee saved registers, currently x19 and x20 are used */ 2428 regs_off = stack_size; 2429 stack_size += 16; 2430 2431 /* round up to multiples of 16 to avoid SPAlignmentFault */ 2432 stack_size = round_up(stack_size, 16); 2433 2434 /* return address locates above FP */ 2435 retaddr_off = stack_size + 8; 2436 2437 /* bpf trampoline may be invoked by 3 instruction types: 2438 * 1. bl, attached to bpf prog or kernel function via short jump 2439 * 2. br, attached to bpf prog or kernel function via long jump 2440 * 3. blr, working as a function pointer, used by struct_ops. 2441 * So BTI_JC should used here to support both br and blr. 2442 */ 2443 emit_bti(A64_BTI_JC, ctx); 2444 2445 /* x9 is not set for struct_ops */ 2446 if (!is_struct_ops) { 2447 /* frame for parent function */ 2448 emit(A64_PUSH(A64_FP, A64_R(9), A64_SP), ctx); 2449 emit(A64_MOV(1, A64_FP, A64_SP), ctx); 2450 } 2451 2452 /* frame for patched function for tracing, or caller for struct_ops */ 2453 emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); 2454 emit(A64_MOV(1, A64_FP, A64_SP), ctx); 2455 2456 /* allocate stack space */ 2457 emit(A64_SUB_I(1, A64_SP, A64_SP, stack_size), ctx); 2458 2459 if (flags & BPF_TRAMP_F_IP_ARG) { 2460 /* save ip address of the traced function */ 2461 emit_addr_mov_i64(A64_R(10), (const u64)func_addr, ctx); 2462 emit(A64_STR64I(A64_R(10), A64_SP, ip_off), ctx); 2463 } 2464 2465 /* save arg regs count*/ 2466 emit(A64_MOVZ(1, A64_R(10), nfuncargs, 0), ctx); 2467 emit(A64_STR64I(A64_R(10), A64_SP, nfuncargs_off), ctx); 2468 2469 /* save args for bpf */ 2470 save_args(ctx, bargs_off, oargs_off, m, a, false); 2471 2472 /* save callee saved registers */ 2473 emit(A64_STR64I(A64_R(19), A64_SP, regs_off), ctx); 2474 emit(A64_STR64I(A64_R(20), A64_SP, regs_off + 8), ctx); 2475 2476 if (flags & BPF_TRAMP_F_CALL_ORIG) { 2477 /* for the first pass, assume the worst case */ 2478 if (!ctx->image) 2479 ctx->idx += 4; 2480 else 2481 emit_a64_mov_i64(A64_R(0), (const u64)im, ctx); 2482 emit_call((const u64)__bpf_tramp_enter, ctx); 2483 } 2484 2485 for (i = 0; i < fentry->nr_links; i++) 2486 invoke_bpf_prog(ctx, fentry->links[i], bargs_off, 2487 retval_off, run_ctx_off, 2488 flags & BPF_TRAMP_F_RET_FENTRY_RET); 2489 2490 if (fmod_ret->nr_links) { 2491 branches = kcalloc(fmod_ret->nr_links, sizeof(__le32 *), 2492 GFP_KERNEL); 2493 if (!branches) 2494 return -ENOMEM; 2495 2496 invoke_bpf_mod_ret(ctx, fmod_ret, bargs_off, retval_off, 2497 run_ctx_off, branches); 2498 } 2499 2500 if (flags & BPF_TRAMP_F_CALL_ORIG) { 2501 /* save args for original func */ 2502 save_args(ctx, bargs_off, oargs_off, m, a, true); 2503 /* call original func */ 2504 emit(A64_LDR64I(A64_R(10), A64_SP, retaddr_off), ctx); 2505 emit(A64_ADR(A64_LR, AARCH64_INSN_SIZE * 2), ctx); 2506 emit(A64_RET(A64_R(10)), ctx); 2507 /* store return value */ 2508 emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx); 2509 /* reserve a nop for bpf_tramp_image_put */ 2510 im->ip_after_call = ctx->ro_image + ctx->idx; 2511 emit(A64_NOP, ctx); 2512 } 2513 2514 /* update the branches saved in invoke_bpf_mod_ret with cbnz */ 2515 for (i = 0; i < fmod_ret->nr_links && ctx->image != NULL; i++) { 2516 int offset = &ctx->image[ctx->idx] - branches[i]; 2517 *branches[i] = cpu_to_le32(A64_CBNZ(1, A64_R(10), offset)); 2518 } 2519 2520 for (i = 0; i < fexit->nr_links; i++) 2521 invoke_bpf_prog(ctx, fexit->links[i], bargs_off, retval_off, 2522 run_ctx_off, false); 2523 2524 if (flags & BPF_TRAMP_F_CALL_ORIG) { 2525 im->ip_epilogue = ctx->ro_image + ctx->idx; 2526 /* for the first pass, assume the worst case */ 2527 if (!ctx->image) 2528 ctx->idx += 4; 2529 else 2530 emit_a64_mov_i64(A64_R(0), (const u64)im, ctx); 2531 emit_call((const u64)__bpf_tramp_exit, ctx); 2532 } 2533 2534 if (flags & BPF_TRAMP_F_RESTORE_REGS) 2535 restore_args(ctx, bargs_off, a->regs_for_args); 2536 2537 /* restore callee saved register x19 and x20 */ 2538 emit(A64_LDR64I(A64_R(19), A64_SP, regs_off), ctx); 2539 emit(A64_LDR64I(A64_R(20), A64_SP, regs_off + 8), ctx); 2540 2541 if (save_ret) 2542 emit(A64_LDR64I(A64_R(0), A64_SP, retval_off), ctx); 2543 2544 /* reset SP */ 2545 emit(A64_MOV(1, A64_SP, A64_FP), ctx); 2546 2547 if (is_struct_ops) { 2548 emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx); 2549 emit(A64_RET(A64_LR), ctx); 2550 } else { 2551 /* pop frames */ 2552 emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx); 2553 emit(A64_POP(A64_FP, A64_R(9), A64_SP), ctx); 2554 2555 if (flags & BPF_TRAMP_F_SKIP_FRAME) { 2556 /* skip patched function, return to parent */ 2557 emit(A64_MOV(1, A64_LR, A64_R(9)), ctx); 2558 emit(A64_RET(A64_R(9)), ctx); 2559 } else { 2560 /* return to patched function */ 2561 emit(A64_MOV(1, A64_R(10), A64_LR), ctx); 2562 emit(A64_MOV(1, A64_LR, A64_R(9)), ctx); 2563 emit(A64_RET(A64_R(10)), ctx); 2564 } 2565 } 2566 2567 kfree(branches); 2568 2569 return ctx->idx; 2570 } 2571 2572 int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, 2573 struct bpf_tramp_links *tlinks, void *func_addr) 2574 { 2575 struct jit_ctx ctx = { 2576 .image = NULL, 2577 .idx = 0, 2578 }; 2579 struct bpf_tramp_image im; 2580 struct arg_aux aaux; 2581 int ret; 2582 2583 ret = calc_arg_aux(m, &aaux); 2584 if (ret < 0) 2585 return ret; 2586 2587 ret = prepare_trampoline(&ctx, &im, tlinks, func_addr, m, &aaux, flags); 2588 if (ret < 0) 2589 return ret; 2590 2591 return ret < 0 ? ret : ret * AARCH64_INSN_SIZE; 2592 } 2593 2594 void *arch_alloc_bpf_trampoline(unsigned int size) 2595 { 2596 return bpf_prog_pack_alloc(size, jit_fill_hole); 2597 } 2598 2599 void arch_free_bpf_trampoline(void *image, unsigned int size) 2600 { 2601 bpf_prog_pack_free(image, size); 2602 } 2603 2604 int arch_protect_bpf_trampoline(void *image, unsigned int size) 2605 { 2606 return 0; 2607 } 2608 2609 int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image, 2610 void *ro_image_end, const struct btf_func_model *m, 2611 u32 flags, struct bpf_tramp_links *tlinks, 2612 void *func_addr) 2613 { 2614 u32 size = ro_image_end - ro_image; 2615 struct arg_aux aaux; 2616 void *image, *tmp; 2617 int ret; 2618 2619 /* image doesn't need to be in module memory range, so we can 2620 * use kvmalloc. 2621 */ 2622 image = kvmalloc(size, GFP_KERNEL); 2623 if (!image) 2624 return -ENOMEM; 2625 2626 struct jit_ctx ctx = { 2627 .image = image, 2628 .ro_image = ro_image, 2629 .idx = 0, 2630 .write = true, 2631 }; 2632 2633 2634 jit_fill_hole(image, (unsigned int)(ro_image_end - ro_image)); 2635 ret = calc_arg_aux(m, &aaux); 2636 if (ret) 2637 goto out; 2638 ret = prepare_trampoline(&ctx, im, tlinks, func_addr, m, &aaux, flags); 2639 2640 if (ret > 0 && validate_code(&ctx) < 0) { 2641 ret = -EINVAL; 2642 goto out; 2643 } 2644 2645 if (ret > 0) 2646 ret *= AARCH64_INSN_SIZE; 2647 2648 tmp = bpf_arch_text_copy(ro_image, image, size); 2649 if (IS_ERR(tmp)) { 2650 ret = PTR_ERR(tmp); 2651 goto out; 2652 } 2653 2654 bpf_flush_icache(ro_image, ro_image + size); 2655 out: 2656 kvfree(image); 2657 return ret; 2658 } 2659 2660 static bool is_long_jump(void *ip, void *target) 2661 { 2662 long offset; 2663 2664 /* NULL target means this is a NOP */ 2665 if (!target) 2666 return false; 2667 2668 offset = (long)target - (long)ip; 2669 return offset < -SZ_128M || offset >= SZ_128M; 2670 } 2671 2672 static int gen_branch_or_nop(enum aarch64_insn_branch_type type, void *ip, 2673 void *addr, void *plt, u32 *insn) 2674 { 2675 void *target; 2676 2677 if (!addr) { 2678 *insn = aarch64_insn_gen_nop(); 2679 return 0; 2680 } 2681 2682 if (is_long_jump(ip, addr)) 2683 target = plt; 2684 else 2685 target = addr; 2686 2687 *insn = aarch64_insn_gen_branch_imm((unsigned long)ip, 2688 (unsigned long)target, 2689 type); 2690 2691 return *insn != AARCH64_BREAK_FAULT ? 0 : -EFAULT; 2692 } 2693 2694 /* Replace the branch instruction from @ip to @old_addr in a bpf prog or a bpf 2695 * trampoline with the branch instruction from @ip to @new_addr. If @old_addr 2696 * or @new_addr is NULL, the old or new instruction is NOP. 2697 * 2698 * When @ip is the bpf prog entry, a bpf trampoline is being attached or 2699 * detached. Since bpf trampoline and bpf prog are allocated separately with 2700 * vmalloc, the address distance may exceed 128MB, the maximum branch range. 2701 * So long jump should be handled. 2702 * 2703 * When a bpf prog is constructed, a plt pointing to empty trampoline 2704 * dummy_tramp is placed at the end: 2705 * 2706 * bpf_prog: 2707 * mov x9, lr 2708 * nop // patchsite 2709 * ... 2710 * ret 2711 * 2712 * plt: 2713 * ldr x10, target 2714 * br x10 2715 * target: 2716 * .quad dummy_tramp // plt target 2717 * 2718 * This is also the state when no trampoline is attached. 2719 * 2720 * When a short-jump bpf trampoline is attached, the patchsite is patched 2721 * to a bl instruction to the trampoline directly: 2722 * 2723 * bpf_prog: 2724 * mov x9, lr 2725 * bl <short-jump bpf trampoline address> // patchsite 2726 * ... 2727 * ret 2728 * 2729 * plt: 2730 * ldr x10, target 2731 * br x10 2732 * target: 2733 * .quad dummy_tramp // plt target 2734 * 2735 * When a long-jump bpf trampoline is attached, the plt target is filled with 2736 * the trampoline address and the patchsite is patched to a bl instruction to 2737 * the plt: 2738 * 2739 * bpf_prog: 2740 * mov x9, lr 2741 * bl plt // patchsite 2742 * ... 2743 * ret 2744 * 2745 * plt: 2746 * ldr x10, target 2747 * br x10 2748 * target: 2749 * .quad <long-jump bpf trampoline address> // plt target 2750 * 2751 * The dummy_tramp is used to prevent another CPU from jumping to unknown 2752 * locations during the patching process, making the patching process easier. 2753 */ 2754 int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, 2755 void *old_addr, void *new_addr) 2756 { 2757 int ret; 2758 u32 old_insn; 2759 u32 new_insn; 2760 u32 replaced; 2761 struct bpf_plt *plt = NULL; 2762 unsigned long size = 0UL; 2763 unsigned long offset = ~0UL; 2764 enum aarch64_insn_branch_type branch_type; 2765 char namebuf[KSYM_NAME_LEN]; 2766 void *image = NULL; 2767 u64 plt_target = 0ULL; 2768 bool poking_bpf_entry; 2769 2770 if (!__bpf_address_lookup((unsigned long)ip, &size, &offset, namebuf)) 2771 /* Only poking bpf text is supported. Since kernel function 2772 * entry is set up by ftrace, we reply on ftrace to poke kernel 2773 * functions. 2774 */ 2775 return -ENOTSUPP; 2776 2777 image = ip - offset; 2778 /* zero offset means we're poking bpf prog entry */ 2779 poking_bpf_entry = (offset == 0UL); 2780 2781 /* bpf prog entry, find plt and the real patchsite */ 2782 if (poking_bpf_entry) { 2783 /* plt locates at the end of bpf prog */ 2784 plt = image + size - PLT_TARGET_OFFSET; 2785 2786 /* skip to the nop instruction in bpf prog entry: 2787 * bti c // if BTI enabled 2788 * mov x9, x30 2789 * nop 2790 */ 2791 ip = image + POKE_OFFSET * AARCH64_INSN_SIZE; 2792 } 2793 2794 /* long jump is only possible at bpf prog entry */ 2795 if (WARN_ON((is_long_jump(ip, new_addr) || is_long_jump(ip, old_addr)) && 2796 !poking_bpf_entry)) 2797 return -EINVAL; 2798 2799 if (poke_type == BPF_MOD_CALL) 2800 branch_type = AARCH64_INSN_BRANCH_LINK; 2801 else 2802 branch_type = AARCH64_INSN_BRANCH_NOLINK; 2803 2804 if (gen_branch_or_nop(branch_type, ip, old_addr, plt, &old_insn) < 0) 2805 return -EFAULT; 2806 2807 if (gen_branch_or_nop(branch_type, ip, new_addr, plt, &new_insn) < 0) 2808 return -EFAULT; 2809 2810 if (is_long_jump(ip, new_addr)) 2811 plt_target = (u64)new_addr; 2812 else if (is_long_jump(ip, old_addr)) 2813 /* if the old target is a long jump and the new target is not, 2814 * restore the plt target to dummy_tramp, so there is always a 2815 * legal and harmless address stored in plt target, and we'll 2816 * never jump from plt to an unknown place. 2817 */ 2818 plt_target = (u64)&dummy_tramp; 2819 2820 if (plt_target) { 2821 /* non-zero plt_target indicates we're patching a bpf prog, 2822 * which is read only. 2823 */ 2824 if (set_memory_rw(PAGE_MASK & ((uintptr_t)&plt->target), 1)) 2825 return -EFAULT; 2826 WRITE_ONCE(plt->target, plt_target); 2827 set_memory_ro(PAGE_MASK & ((uintptr_t)&plt->target), 1); 2828 /* since plt target points to either the new trampoline 2829 * or dummy_tramp, even if another CPU reads the old plt 2830 * target value before fetching the bl instruction to plt, 2831 * it will be brought back by dummy_tramp, so no barrier is 2832 * required here. 2833 */ 2834 } 2835 2836 /* if the old target and the new target are both long jumps, no 2837 * patching is required 2838 */ 2839 if (old_insn == new_insn) 2840 return 0; 2841 2842 mutex_lock(&text_mutex); 2843 if (aarch64_insn_read(ip, &replaced)) { 2844 ret = -EFAULT; 2845 goto out; 2846 } 2847 2848 if (replaced != old_insn) { 2849 ret = -EFAULT; 2850 goto out; 2851 } 2852 2853 /* We call aarch64_insn_patch_text_nosync() to replace instruction 2854 * atomically, so no other CPUs will fetch a half-new and half-old 2855 * instruction. But there is chance that another CPU executes the 2856 * old instruction after the patching operation finishes (e.g., 2857 * pipeline not flushed, or icache not synchronized yet). 2858 * 2859 * 1. when a new trampoline is attached, it is not a problem for 2860 * different CPUs to jump to different trampolines temporarily. 2861 * 2862 * 2. when an old trampoline is freed, we should wait for all other 2863 * CPUs to exit the trampoline and make sure the trampoline is no 2864 * longer reachable, since bpf_tramp_image_put() function already 2865 * uses percpu_ref and task-based rcu to do the sync, no need to call 2866 * the sync version here, see bpf_tramp_image_put() for details. 2867 */ 2868 ret = aarch64_insn_patch_text_nosync(ip, new_insn); 2869 out: 2870 mutex_unlock(&text_mutex); 2871 2872 return ret; 2873 } 2874 2875 bool bpf_jit_supports_ptr_xchg(void) 2876 { 2877 return true; 2878 } 2879 2880 bool bpf_jit_supports_exceptions(void) 2881 { 2882 /* We unwind through both kernel frames starting from within bpf_throw 2883 * call and BPF frames. Therefore we require FP unwinder to be enabled 2884 * to walk kernel frames and reach BPF frames in the stack trace. 2885 * ARM64 kernel is aways compiled with CONFIG_FRAME_POINTER=y 2886 */ 2887 return true; 2888 } 2889 2890 bool bpf_jit_supports_arena(void) 2891 { 2892 return true; 2893 } 2894 2895 bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena) 2896 { 2897 if (!in_arena) 2898 return true; 2899 switch (insn->code) { 2900 case BPF_STX | BPF_ATOMIC | BPF_W: 2901 case BPF_STX | BPF_ATOMIC | BPF_DW: 2902 if (!bpf_atomic_is_load_store(insn) && 2903 !cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) 2904 return false; 2905 } 2906 return true; 2907 } 2908 2909 bool bpf_jit_supports_percpu_insn(void) 2910 { 2911 return true; 2912 } 2913 2914 bool bpf_jit_inlines_helper_call(s32 imm) 2915 { 2916 switch (imm) { 2917 case BPF_FUNC_get_smp_processor_id: 2918 case BPF_FUNC_get_current_task: 2919 case BPF_FUNC_get_current_task_btf: 2920 return true; 2921 default: 2922 return false; 2923 } 2924 } 2925 2926 void bpf_jit_free(struct bpf_prog *prog) 2927 { 2928 if (prog->jited) { 2929 struct arm64_jit_data *jit_data = prog->aux->jit_data; 2930 struct bpf_binary_header *hdr; 2931 2932 /* 2933 * If we fail the final pass of JIT (from jit_subprogs), 2934 * the program may not be finalized yet. Call finalize here 2935 * before freeing it. 2936 */ 2937 if (jit_data) { 2938 bpf_arch_text_copy(&jit_data->ro_header->size, &jit_data->header->size, 2939 sizeof(jit_data->header->size)); 2940 kfree(jit_data); 2941 } 2942 hdr = bpf_jit_binary_pack_hdr(prog); 2943 bpf_jit_binary_pack_free(hdr, NULL); 2944 WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog)); 2945 } 2946 2947 bpf_prog_unlock_free(prog); 2948 } 2949