1 /* 2 * AArch64 translation 3 * 4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de> 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 #include "qemu/osdep.h" 20 21 #include "exec/exec-all.h" 22 #include "translate.h" 23 #include "translate-a64.h" 24 #include "qemu/log.h" 25 #include "arm_ldst.h" 26 #include "semihosting/semihost.h" 27 #include "cpregs.h" 28 29 static TCGv_i64 cpu_X[32]; 30 static TCGv_i64 cpu_pc; 31 32 /* Load/store exclusive handling */ 33 static TCGv_i64 cpu_exclusive_high; 34 35 static const char *regnames[] = { 36 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 37 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 38 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 39 "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp" 40 }; 41 42 enum a64_shift_type { 43 A64_SHIFT_TYPE_LSL = 0, 44 A64_SHIFT_TYPE_LSR = 1, 45 A64_SHIFT_TYPE_ASR = 2, 46 A64_SHIFT_TYPE_ROR = 3 47 }; 48 49 /* 50 * Helpers for extracting complex instruction fields 51 */ 52 53 /* 54 * For load/store with an unsigned 12 bit immediate scaled by the element 55 * size. The input has the immediate field in bits [14:3] and the element 56 * size in [2:0]. 57 */ 58 static int uimm_scaled(DisasContext *s, int x) 59 { 60 unsigned imm = x >> 3; 61 unsigned scale = extract32(x, 0, 3); 62 return imm << scale; 63 } 64 65 /* For load/store memory tags: scale offset by LOG2_TAG_GRANULE */ 66 static int scale_by_log2_tag_granule(DisasContext *s, int x) 67 { 68 return x << LOG2_TAG_GRANULE; 69 } 70 71 /* 72 * Include the generated decoders. 73 */ 74 75 #include "decode-sme-fa64.c.inc" 76 #include "decode-a64.c.inc" 77 78 /* Table based decoder typedefs - used when the relevant bits for decode 79 * are too awkwardly scattered across the instruction (eg SIMD). 80 */ 81 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn); 82 83 typedef struct AArch64DecodeTable { 84 uint32_t pattern; 85 uint32_t mask; 86 AArch64DecodeFn *disas_fn; 87 } AArch64DecodeTable; 88 89 /* initialize TCG globals. */ 90 void a64_translate_init(void) 91 { 92 int i; 93 94 cpu_pc = tcg_global_mem_new_i64(tcg_env, 95 offsetof(CPUARMState, pc), 96 "pc"); 97 for (i = 0; i < 32; i++) { 98 cpu_X[i] = tcg_global_mem_new_i64(tcg_env, 99 offsetof(CPUARMState, xregs[i]), 100 regnames[i]); 101 } 102 103 cpu_exclusive_high = tcg_global_mem_new_i64(tcg_env, 104 offsetof(CPUARMState, exclusive_high), "exclusive_high"); 105 } 106 107 /* 108 * Return the core mmu_idx to use for A64 load/store insns which 109 * have a "unprivileged load/store" variant. Those insns access 110 * EL0 if executed from an EL which has control over EL0 (usually 111 * EL1) but behave like normal loads and stores if executed from 112 * elsewhere (eg EL3). 113 * 114 * @unpriv : true for the unprivileged encoding; false for the 115 * normal encoding (in which case we will return the same 116 * thing as get_mem_index(). 117 */ 118 static int get_a64_user_mem_index(DisasContext *s, bool unpriv) 119 { 120 /* 121 * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL, 122 * which is the usual mmu_idx for this cpu state. 123 */ 124 ARMMMUIdx useridx = s->mmu_idx; 125 126 if (unpriv && s->unpriv) { 127 /* 128 * We have pre-computed the condition for AccType_UNPRIV. 129 * Therefore we should never get here with a mmu_idx for 130 * which we do not know the corresponding user mmu_idx. 131 */ 132 switch (useridx) { 133 case ARMMMUIdx_E10_1: 134 case ARMMMUIdx_E10_1_PAN: 135 useridx = ARMMMUIdx_E10_0; 136 break; 137 case ARMMMUIdx_E20_2: 138 case ARMMMUIdx_E20_2_PAN: 139 useridx = ARMMMUIdx_E20_0; 140 break; 141 default: 142 g_assert_not_reached(); 143 } 144 } 145 return arm_to_core_mmu_idx(useridx); 146 } 147 148 static void set_btype_raw(int val) 149 { 150 tcg_gen_st_i32(tcg_constant_i32(val), tcg_env, 151 offsetof(CPUARMState, btype)); 152 } 153 154 static void set_btype(DisasContext *s, int val) 155 { 156 /* BTYPE is a 2-bit field, and 0 should be done with reset_btype. */ 157 tcg_debug_assert(val >= 1 && val <= 3); 158 set_btype_raw(val); 159 s->btype = -1; 160 } 161 162 static void reset_btype(DisasContext *s) 163 { 164 if (s->btype != 0) { 165 set_btype_raw(0); 166 s->btype = 0; 167 } 168 } 169 170 static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff) 171 { 172 assert(s->pc_save != -1); 173 if (tb_cflags(s->base.tb) & CF_PCREL) { 174 tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff); 175 } else { 176 tcg_gen_movi_i64(dest, s->pc_curr + diff); 177 } 178 } 179 180 void gen_a64_update_pc(DisasContext *s, target_long diff) 181 { 182 gen_pc_plus_diff(s, cpu_pc, diff); 183 s->pc_save = s->pc_curr + diff; 184 } 185 186 /* 187 * Handle Top Byte Ignore (TBI) bits. 188 * 189 * If address tagging is enabled via the TCR TBI bits: 190 * + for EL2 and EL3 there is only one TBI bit, and if it is set 191 * then the address is zero-extended, clearing bits [63:56] 192 * + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0 193 * and TBI1 controls addresses with bit 55 == 1. 194 * If the appropriate TBI bit is set for the address then 195 * the address is sign-extended from bit 55 into bits [63:56] 196 * 197 * Here We have concatenated TBI{1,0} into tbi. 198 */ 199 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst, 200 TCGv_i64 src, int tbi) 201 { 202 if (tbi == 0) { 203 /* Load unmodified address */ 204 tcg_gen_mov_i64(dst, src); 205 } else if (!regime_has_2_ranges(s->mmu_idx)) { 206 /* Force tag byte to all zero */ 207 tcg_gen_extract_i64(dst, src, 0, 56); 208 } else { 209 /* Sign-extend from bit 55. */ 210 tcg_gen_sextract_i64(dst, src, 0, 56); 211 212 switch (tbi) { 213 case 1: 214 /* tbi0 but !tbi1: only use the extension if positive */ 215 tcg_gen_and_i64(dst, dst, src); 216 break; 217 case 2: 218 /* !tbi0 but tbi1: only use the extension if negative */ 219 tcg_gen_or_i64(dst, dst, src); 220 break; 221 case 3: 222 /* tbi0 and tbi1: always use the extension */ 223 break; 224 default: 225 g_assert_not_reached(); 226 } 227 } 228 } 229 230 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src) 231 { 232 /* 233 * If address tagging is enabled for instructions via the TCR TBI bits, 234 * then loading an address into the PC will clear out any tag. 235 */ 236 gen_top_byte_ignore(s, cpu_pc, src, s->tbii); 237 s->pc_save = -1; 238 } 239 240 /* 241 * Handle MTE and/or TBI. 242 * 243 * For TBI, ideally, we would do nothing. Proper behaviour on fault is 244 * for the tag to be present in the FAR_ELx register. But for user-only 245 * mode we do not have a TLB with which to implement this, so we must 246 * remove the top byte now. 247 * 248 * Always return a fresh temporary that we can increment independently 249 * of the write-back address. 250 */ 251 252 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr) 253 { 254 TCGv_i64 clean = tcg_temp_new_i64(); 255 #ifdef CONFIG_USER_ONLY 256 gen_top_byte_ignore(s, clean, addr, s->tbid); 257 #else 258 tcg_gen_mov_i64(clean, addr); 259 #endif 260 return clean; 261 } 262 263 /* Insert a zero tag into src, with the result at dst. */ 264 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src) 265 { 266 tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4)); 267 } 268 269 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr, 270 MMUAccessType acc, int log2_size) 271 { 272 gen_helper_probe_access(tcg_env, ptr, 273 tcg_constant_i32(acc), 274 tcg_constant_i32(get_mem_index(s)), 275 tcg_constant_i32(1 << log2_size)); 276 } 277 278 /* 279 * For MTE, check a single logical or atomic access. This probes a single 280 * address, the exact one specified. The size and alignment of the access 281 * is not relevant to MTE, per se, but watchpoints do require the size, 282 * and we want to recognize those before making any other changes to state. 283 */ 284 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr, 285 bool is_write, bool tag_checked, 286 MemOp memop, bool is_unpriv, 287 int core_idx) 288 { 289 if (tag_checked && s->mte_active[is_unpriv]) { 290 TCGv_i64 ret; 291 int desc = 0; 292 293 desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx); 294 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 295 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 296 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 297 desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(memop)); 298 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, memop_size(memop) - 1); 299 300 ret = tcg_temp_new_i64(); 301 gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr); 302 303 return ret; 304 } 305 return clean_data_tbi(s, addr); 306 } 307 308 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, 309 bool tag_checked, MemOp memop) 310 { 311 return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, memop, 312 false, get_mem_index(s)); 313 } 314 315 /* 316 * For MTE, check multiple logical sequential accesses. 317 */ 318 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, 319 bool tag_checked, int total_size, MemOp single_mop) 320 { 321 if (tag_checked && s->mte_active[0]) { 322 TCGv_i64 ret; 323 int desc = 0; 324 325 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 326 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 327 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 328 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 329 desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(single_mop)); 330 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, total_size - 1); 331 332 ret = tcg_temp_new_i64(); 333 gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr); 334 335 return ret; 336 } 337 return clean_data_tbi(s, addr); 338 } 339 340 /* 341 * Generate the special alignment check that applies to AccType_ATOMIC 342 * and AccType_ORDERED insns under FEAT_LSE2: the access need not be 343 * naturally aligned, but it must not cross a 16-byte boundary. 344 * See AArch64.CheckAlignment(). 345 */ 346 static void check_lse2_align(DisasContext *s, int rn, int imm, 347 bool is_write, MemOp mop) 348 { 349 TCGv_i32 tmp; 350 TCGv_i64 addr; 351 TCGLabel *over_label; 352 MMUAccessType type; 353 int mmu_idx; 354 355 tmp = tcg_temp_new_i32(); 356 tcg_gen_extrl_i64_i32(tmp, cpu_reg_sp(s, rn)); 357 tcg_gen_addi_i32(tmp, tmp, imm & 15); 358 tcg_gen_andi_i32(tmp, tmp, 15); 359 tcg_gen_addi_i32(tmp, tmp, memop_size(mop)); 360 361 over_label = gen_new_label(); 362 tcg_gen_brcondi_i32(TCG_COND_LEU, tmp, 16, over_label); 363 364 addr = tcg_temp_new_i64(); 365 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm); 366 367 type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD, 368 mmu_idx = get_mem_index(s); 369 gen_helper_unaligned_access(tcg_env, addr, tcg_constant_i32(type), 370 tcg_constant_i32(mmu_idx)); 371 372 gen_set_label(over_label); 373 374 } 375 376 /* Handle the alignment check for AccType_ATOMIC instructions. */ 377 static MemOp check_atomic_align(DisasContext *s, int rn, MemOp mop) 378 { 379 MemOp size = mop & MO_SIZE; 380 381 if (size == MO_8) { 382 return mop; 383 } 384 385 /* 386 * If size == MO_128, this is a LDXP, and the operation is single-copy 387 * atomic for each doubleword, not the entire quadword; it still must 388 * be quadword aligned. 389 */ 390 if (size == MO_128) { 391 return finalize_memop_atom(s, MO_128 | MO_ALIGN, 392 MO_ATOM_IFALIGN_PAIR); 393 } 394 if (dc_isar_feature(aa64_lse2, s)) { 395 check_lse2_align(s, rn, 0, true, mop); 396 } else { 397 mop |= MO_ALIGN; 398 } 399 return finalize_memop(s, mop); 400 } 401 402 /* Handle the alignment check for AccType_ORDERED instructions. */ 403 static MemOp check_ordered_align(DisasContext *s, int rn, int imm, 404 bool is_write, MemOp mop) 405 { 406 MemOp size = mop & MO_SIZE; 407 408 if (size == MO_8) { 409 return mop; 410 } 411 if (size == MO_128) { 412 return finalize_memop_atom(s, MO_128 | MO_ALIGN, 413 MO_ATOM_IFALIGN_PAIR); 414 } 415 if (!dc_isar_feature(aa64_lse2, s)) { 416 mop |= MO_ALIGN; 417 } else if (!s->naa) { 418 check_lse2_align(s, rn, imm, is_write, mop); 419 } 420 return finalize_memop(s, mop); 421 } 422 423 typedef struct DisasCompare64 { 424 TCGCond cond; 425 TCGv_i64 value; 426 } DisasCompare64; 427 428 static void a64_test_cc(DisasCompare64 *c64, int cc) 429 { 430 DisasCompare c32; 431 432 arm_test_cc(&c32, cc); 433 434 /* 435 * Sign-extend the 32-bit value so that the GE/LT comparisons work 436 * properly. The NE/EQ comparisons are also fine with this choice. 437 */ 438 c64->cond = c32.cond; 439 c64->value = tcg_temp_new_i64(); 440 tcg_gen_ext_i32_i64(c64->value, c32.value); 441 } 442 443 static void gen_rebuild_hflags(DisasContext *s) 444 { 445 gen_helper_rebuild_hflags_a64(tcg_env, tcg_constant_i32(s->current_el)); 446 } 447 448 static void gen_exception_internal(int excp) 449 { 450 assert(excp_is_internal(excp)); 451 gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp)); 452 } 453 454 static void gen_exception_internal_insn(DisasContext *s, int excp) 455 { 456 gen_a64_update_pc(s, 0); 457 gen_exception_internal(excp); 458 s->base.is_jmp = DISAS_NORETURN; 459 } 460 461 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome) 462 { 463 gen_a64_update_pc(s, 0); 464 gen_helper_exception_bkpt_insn(tcg_env, tcg_constant_i32(syndrome)); 465 s->base.is_jmp = DISAS_NORETURN; 466 } 467 468 static void gen_step_complete_exception(DisasContext *s) 469 { 470 /* We just completed step of an insn. Move from Active-not-pending 471 * to Active-pending, and then also take the swstep exception. 472 * This corresponds to making the (IMPDEF) choice to prioritize 473 * swstep exceptions over asynchronous exceptions taken to an exception 474 * level where debug is disabled. This choice has the advantage that 475 * we do not need to maintain internal state corresponding to the 476 * ISV/EX syndrome bits between completion of the step and generation 477 * of the exception, and our syndrome information is always correct. 478 */ 479 gen_ss_advance(s); 480 gen_swstep_exception(s, 1, s->is_ldex); 481 s->base.is_jmp = DISAS_NORETURN; 482 } 483 484 static inline bool use_goto_tb(DisasContext *s, uint64_t dest) 485 { 486 if (s->ss_active) { 487 return false; 488 } 489 return translator_use_goto_tb(&s->base, dest); 490 } 491 492 static void gen_goto_tb(DisasContext *s, int n, int64_t diff) 493 { 494 if (use_goto_tb(s, s->pc_curr + diff)) { 495 /* 496 * For pcrel, the pc must always be up-to-date on entry to 497 * the linked TB, so that it can use simple additions for all 498 * further adjustments. For !pcrel, the linked TB is compiled 499 * to know its full virtual address, so we can delay the 500 * update to pc to the unlinked path. A long chain of links 501 * can thus avoid many updates to the PC. 502 */ 503 if (tb_cflags(s->base.tb) & CF_PCREL) { 504 gen_a64_update_pc(s, diff); 505 tcg_gen_goto_tb(n); 506 } else { 507 tcg_gen_goto_tb(n); 508 gen_a64_update_pc(s, diff); 509 } 510 tcg_gen_exit_tb(s->base.tb, n); 511 s->base.is_jmp = DISAS_NORETURN; 512 } else { 513 gen_a64_update_pc(s, diff); 514 if (s->ss_active) { 515 gen_step_complete_exception(s); 516 } else { 517 tcg_gen_lookup_and_goto_ptr(); 518 s->base.is_jmp = DISAS_NORETURN; 519 } 520 } 521 } 522 523 /* 524 * Register access functions 525 * 526 * These functions are used for directly accessing a register in where 527 * changes to the final register value are likely to be made. If you 528 * need to use a register for temporary calculation (e.g. index type 529 * operations) use the read_* form. 530 * 531 * B1.2.1 Register mappings 532 * 533 * In instruction register encoding 31 can refer to ZR (zero register) or 534 * the SP (stack pointer) depending on context. In QEMU's case we map SP 535 * to cpu_X[31] and ZR accesses to a temporary which can be discarded. 536 * This is the point of the _sp forms. 537 */ 538 TCGv_i64 cpu_reg(DisasContext *s, int reg) 539 { 540 if (reg == 31) { 541 TCGv_i64 t = tcg_temp_new_i64(); 542 tcg_gen_movi_i64(t, 0); 543 return t; 544 } else { 545 return cpu_X[reg]; 546 } 547 } 548 549 /* register access for when 31 == SP */ 550 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg) 551 { 552 return cpu_X[reg]; 553 } 554 555 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64 556 * representing the register contents. This TCGv is an auto-freed 557 * temporary so it need not be explicitly freed, and may be modified. 558 */ 559 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf) 560 { 561 TCGv_i64 v = tcg_temp_new_i64(); 562 if (reg != 31) { 563 if (sf) { 564 tcg_gen_mov_i64(v, cpu_X[reg]); 565 } else { 566 tcg_gen_ext32u_i64(v, cpu_X[reg]); 567 } 568 } else { 569 tcg_gen_movi_i64(v, 0); 570 } 571 return v; 572 } 573 574 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf) 575 { 576 TCGv_i64 v = tcg_temp_new_i64(); 577 if (sf) { 578 tcg_gen_mov_i64(v, cpu_X[reg]); 579 } else { 580 tcg_gen_ext32u_i64(v, cpu_X[reg]); 581 } 582 return v; 583 } 584 585 /* Return the offset into CPUARMState of a slice (from 586 * the least significant end) of FP register Qn (ie 587 * Dn, Sn, Hn or Bn). 588 * (Note that this is not the same mapping as for A32; see cpu.h) 589 */ 590 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size) 591 { 592 return vec_reg_offset(s, regno, 0, size); 593 } 594 595 /* Offset of the high half of the 128 bit vector Qn */ 596 static inline int fp_reg_hi_offset(DisasContext *s, int regno) 597 { 598 return vec_reg_offset(s, regno, 1, MO_64); 599 } 600 601 /* Convenience accessors for reading and writing single and double 602 * FP registers. Writing clears the upper parts of the associated 603 * 128 bit vector register, as required by the architecture. 604 * Note that unlike the GP register accessors, the values returned 605 * by the read functions must be manually freed. 606 */ 607 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg) 608 { 609 TCGv_i64 v = tcg_temp_new_i64(); 610 611 tcg_gen_ld_i64(v, tcg_env, fp_reg_offset(s, reg, MO_64)); 612 return v; 613 } 614 615 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg) 616 { 617 TCGv_i32 v = tcg_temp_new_i32(); 618 619 tcg_gen_ld_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32)); 620 return v; 621 } 622 623 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg) 624 { 625 TCGv_i32 v = tcg_temp_new_i32(); 626 627 tcg_gen_ld16u_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16)); 628 return v; 629 } 630 631 static void clear_vec(DisasContext *s, int rd) 632 { 633 unsigned ofs = fp_reg_offset(s, rd, MO_64); 634 unsigned vsz = vec_full_reg_size(s); 635 636 tcg_gen_gvec_dup_imm(MO_64, ofs, vsz, vsz, 0); 637 } 638 639 /* 640 * Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64). 641 * If SVE is not enabled, then there are only 128 bits in the vector. 642 */ 643 static void clear_vec_high(DisasContext *s, bool is_q, int rd) 644 { 645 unsigned ofs = fp_reg_offset(s, rd, MO_64); 646 unsigned vsz = vec_full_reg_size(s); 647 648 /* Nop move, with side effect of clearing the tail. */ 649 tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz); 650 } 651 652 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v) 653 { 654 unsigned ofs = fp_reg_offset(s, reg, MO_64); 655 656 tcg_gen_st_i64(v, tcg_env, ofs); 657 clear_vec_high(s, false, reg); 658 } 659 660 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v) 661 { 662 TCGv_i64 tmp = tcg_temp_new_i64(); 663 664 tcg_gen_extu_i32_i64(tmp, v); 665 write_fp_dreg(s, reg, tmp); 666 } 667 668 /* Expand a 2-operand AdvSIMD vector operation using an expander function. */ 669 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn, 670 GVecGen2Fn *gvec_fn, int vece) 671 { 672 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 673 is_q ? 16 : 8, vec_full_reg_size(s)); 674 } 675 676 /* Expand a 2-operand + immediate AdvSIMD vector operation using 677 * an expander function. 678 */ 679 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn, 680 int64_t imm, GVecGen2iFn *gvec_fn, int vece) 681 { 682 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 683 imm, is_q ? 16 : 8, vec_full_reg_size(s)); 684 } 685 686 /* Expand a 3-operand AdvSIMD vector operation using an expander function. */ 687 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm, 688 GVecGen3Fn *gvec_fn, int vece) 689 { 690 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 691 vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s)); 692 } 693 694 /* Expand a 4-operand AdvSIMD vector operation using an expander function. */ 695 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm, 696 int rx, GVecGen4Fn *gvec_fn, int vece) 697 { 698 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 699 vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx), 700 is_q ? 16 : 8, vec_full_reg_size(s)); 701 } 702 703 /* Expand a 2-operand operation using an out-of-line helper. */ 704 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd, 705 int rn, int data, gen_helper_gvec_2 *fn) 706 { 707 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd), 708 vec_full_reg_offset(s, rn), 709 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 710 } 711 712 /* Expand a 3-operand operation using an out-of-line helper. */ 713 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd, 714 int rn, int rm, int data, gen_helper_gvec_3 *fn) 715 { 716 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 717 vec_full_reg_offset(s, rn), 718 vec_full_reg_offset(s, rm), 719 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 720 } 721 722 /* Expand a 3-operand + fpstatus pointer + simd data value operation using 723 * an out-of-line helper. 724 */ 725 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn, 726 int rm, bool is_fp16, int data, 727 gen_helper_gvec_3_ptr *fn) 728 { 729 TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); 730 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 731 vec_full_reg_offset(s, rn), 732 vec_full_reg_offset(s, rm), fpst, 733 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 734 } 735 736 /* Expand a 4-operand operation using an out-of-line helper. */ 737 static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn, 738 int rm, int ra, int data, gen_helper_gvec_4 *fn) 739 { 740 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 741 vec_full_reg_offset(s, rn), 742 vec_full_reg_offset(s, rm), 743 vec_full_reg_offset(s, ra), 744 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 745 } 746 747 /* 748 * Expand a 4-operand operation using an out-of-line helper that takes 749 * a pointer to the CPU env. 750 */ 751 static void gen_gvec_op4_env(DisasContext *s, bool is_q, int rd, int rn, 752 int rm, int ra, int data, 753 gen_helper_gvec_4_ptr *fn) 754 { 755 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 756 vec_full_reg_offset(s, rn), 757 vec_full_reg_offset(s, rm), 758 vec_full_reg_offset(s, ra), 759 tcg_env, 760 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 761 } 762 763 /* 764 * Expand a 4-operand + fpstatus pointer + simd data value operation using 765 * an out-of-line helper. 766 */ 767 static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn, 768 int rm, int ra, bool is_fp16, int data, 769 gen_helper_gvec_4_ptr *fn) 770 { 771 TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); 772 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 773 vec_full_reg_offset(s, rn), 774 vec_full_reg_offset(s, rm), 775 vec_full_reg_offset(s, ra), fpst, 776 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 777 } 778 779 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier 780 * than the 32 bit equivalent. 781 */ 782 static inline void gen_set_NZ64(TCGv_i64 result) 783 { 784 tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result); 785 tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF); 786 } 787 788 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */ 789 static inline void gen_logic_CC(int sf, TCGv_i64 result) 790 { 791 if (sf) { 792 gen_set_NZ64(result); 793 } else { 794 tcg_gen_extrl_i64_i32(cpu_ZF, result); 795 tcg_gen_mov_i32(cpu_NF, cpu_ZF); 796 } 797 tcg_gen_movi_i32(cpu_CF, 0); 798 tcg_gen_movi_i32(cpu_VF, 0); 799 } 800 801 /* dest = T0 + T1; compute C, N, V and Z flags */ 802 static void gen_add64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 803 { 804 TCGv_i64 result, flag, tmp; 805 result = tcg_temp_new_i64(); 806 flag = tcg_temp_new_i64(); 807 tmp = tcg_temp_new_i64(); 808 809 tcg_gen_movi_i64(tmp, 0); 810 tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp); 811 812 tcg_gen_extrl_i64_i32(cpu_CF, flag); 813 814 gen_set_NZ64(result); 815 816 tcg_gen_xor_i64(flag, result, t0); 817 tcg_gen_xor_i64(tmp, t0, t1); 818 tcg_gen_andc_i64(flag, flag, tmp); 819 tcg_gen_extrh_i64_i32(cpu_VF, flag); 820 821 tcg_gen_mov_i64(dest, result); 822 } 823 824 static void gen_add32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 825 { 826 TCGv_i32 t0_32 = tcg_temp_new_i32(); 827 TCGv_i32 t1_32 = tcg_temp_new_i32(); 828 TCGv_i32 tmp = tcg_temp_new_i32(); 829 830 tcg_gen_movi_i32(tmp, 0); 831 tcg_gen_extrl_i64_i32(t0_32, t0); 832 tcg_gen_extrl_i64_i32(t1_32, t1); 833 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp); 834 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 835 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 836 tcg_gen_xor_i32(tmp, t0_32, t1_32); 837 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); 838 tcg_gen_extu_i32_i64(dest, cpu_NF); 839 } 840 841 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 842 { 843 if (sf) { 844 gen_add64_CC(dest, t0, t1); 845 } else { 846 gen_add32_CC(dest, t0, t1); 847 } 848 } 849 850 /* dest = T0 - T1; compute C, N, V and Z flags */ 851 static void gen_sub64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 852 { 853 /* 64 bit arithmetic */ 854 TCGv_i64 result, flag, tmp; 855 856 result = tcg_temp_new_i64(); 857 flag = tcg_temp_new_i64(); 858 tcg_gen_sub_i64(result, t0, t1); 859 860 gen_set_NZ64(result); 861 862 tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1); 863 tcg_gen_extrl_i64_i32(cpu_CF, flag); 864 865 tcg_gen_xor_i64(flag, result, t0); 866 tmp = tcg_temp_new_i64(); 867 tcg_gen_xor_i64(tmp, t0, t1); 868 tcg_gen_and_i64(flag, flag, tmp); 869 tcg_gen_extrh_i64_i32(cpu_VF, flag); 870 tcg_gen_mov_i64(dest, result); 871 } 872 873 static void gen_sub32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 874 { 875 /* 32 bit arithmetic */ 876 TCGv_i32 t0_32 = tcg_temp_new_i32(); 877 TCGv_i32 t1_32 = tcg_temp_new_i32(); 878 TCGv_i32 tmp; 879 880 tcg_gen_extrl_i64_i32(t0_32, t0); 881 tcg_gen_extrl_i64_i32(t1_32, t1); 882 tcg_gen_sub_i32(cpu_NF, t0_32, t1_32); 883 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 884 tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32); 885 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 886 tmp = tcg_temp_new_i32(); 887 tcg_gen_xor_i32(tmp, t0_32, t1_32); 888 tcg_gen_and_i32(cpu_VF, cpu_VF, tmp); 889 tcg_gen_extu_i32_i64(dest, cpu_NF); 890 } 891 892 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 893 { 894 if (sf) { 895 gen_sub64_CC(dest, t0, t1); 896 } else { 897 gen_sub32_CC(dest, t0, t1); 898 } 899 } 900 901 /* dest = T0 + T1 + CF; do not compute flags. */ 902 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 903 { 904 TCGv_i64 flag = tcg_temp_new_i64(); 905 tcg_gen_extu_i32_i64(flag, cpu_CF); 906 tcg_gen_add_i64(dest, t0, t1); 907 tcg_gen_add_i64(dest, dest, flag); 908 909 if (!sf) { 910 tcg_gen_ext32u_i64(dest, dest); 911 } 912 } 913 914 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */ 915 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 916 { 917 if (sf) { 918 TCGv_i64 result = tcg_temp_new_i64(); 919 TCGv_i64 cf_64 = tcg_temp_new_i64(); 920 TCGv_i64 vf_64 = tcg_temp_new_i64(); 921 TCGv_i64 tmp = tcg_temp_new_i64(); 922 TCGv_i64 zero = tcg_constant_i64(0); 923 924 tcg_gen_extu_i32_i64(cf_64, cpu_CF); 925 tcg_gen_add2_i64(result, cf_64, t0, zero, cf_64, zero); 926 tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, zero); 927 tcg_gen_extrl_i64_i32(cpu_CF, cf_64); 928 gen_set_NZ64(result); 929 930 tcg_gen_xor_i64(vf_64, result, t0); 931 tcg_gen_xor_i64(tmp, t0, t1); 932 tcg_gen_andc_i64(vf_64, vf_64, tmp); 933 tcg_gen_extrh_i64_i32(cpu_VF, vf_64); 934 935 tcg_gen_mov_i64(dest, result); 936 } else { 937 TCGv_i32 t0_32 = tcg_temp_new_i32(); 938 TCGv_i32 t1_32 = tcg_temp_new_i32(); 939 TCGv_i32 tmp = tcg_temp_new_i32(); 940 TCGv_i32 zero = tcg_constant_i32(0); 941 942 tcg_gen_extrl_i64_i32(t0_32, t0); 943 tcg_gen_extrl_i64_i32(t1_32, t1); 944 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, zero, cpu_CF, zero); 945 tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, zero); 946 947 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 948 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 949 tcg_gen_xor_i32(tmp, t0_32, t1_32); 950 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); 951 tcg_gen_extu_i32_i64(dest, cpu_NF); 952 } 953 } 954 955 /* 956 * Load/Store generators 957 */ 958 959 /* 960 * Store from GPR register to memory. 961 */ 962 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source, 963 TCGv_i64 tcg_addr, MemOp memop, int memidx, 964 bool iss_valid, 965 unsigned int iss_srt, 966 bool iss_sf, bool iss_ar) 967 { 968 tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop); 969 970 if (iss_valid) { 971 uint32_t syn; 972 973 syn = syn_data_abort_with_iss(0, 974 (memop & MO_SIZE), 975 false, 976 iss_srt, 977 iss_sf, 978 iss_ar, 979 0, 0, 0, 0, 0, false); 980 disas_set_insn_syndrome(s, syn); 981 } 982 } 983 984 static void do_gpr_st(DisasContext *s, TCGv_i64 source, 985 TCGv_i64 tcg_addr, MemOp memop, 986 bool iss_valid, 987 unsigned int iss_srt, 988 bool iss_sf, bool iss_ar) 989 { 990 do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s), 991 iss_valid, iss_srt, iss_sf, iss_ar); 992 } 993 994 /* 995 * Load from memory to GPR register 996 */ 997 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, 998 MemOp memop, bool extend, int memidx, 999 bool iss_valid, unsigned int iss_srt, 1000 bool iss_sf, bool iss_ar) 1001 { 1002 tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop); 1003 1004 if (extend && (memop & MO_SIGN)) { 1005 g_assert((memop & MO_SIZE) <= MO_32); 1006 tcg_gen_ext32u_i64(dest, dest); 1007 } 1008 1009 if (iss_valid) { 1010 uint32_t syn; 1011 1012 syn = syn_data_abort_with_iss(0, 1013 (memop & MO_SIZE), 1014 (memop & MO_SIGN) != 0, 1015 iss_srt, 1016 iss_sf, 1017 iss_ar, 1018 0, 0, 0, 0, 0, false); 1019 disas_set_insn_syndrome(s, syn); 1020 } 1021 } 1022 1023 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, 1024 MemOp memop, bool extend, 1025 bool iss_valid, unsigned int iss_srt, 1026 bool iss_sf, bool iss_ar) 1027 { 1028 do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s), 1029 iss_valid, iss_srt, iss_sf, iss_ar); 1030 } 1031 1032 /* 1033 * Store from FP register to memory 1034 */ 1035 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, MemOp mop) 1036 { 1037 /* This writes the bottom N bits of a 128 bit wide vector to memory */ 1038 TCGv_i64 tmplo = tcg_temp_new_i64(); 1039 1040 tcg_gen_ld_i64(tmplo, tcg_env, fp_reg_offset(s, srcidx, MO_64)); 1041 1042 if ((mop & MO_SIZE) < MO_128) { 1043 tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop); 1044 } else { 1045 TCGv_i64 tmphi = tcg_temp_new_i64(); 1046 TCGv_i128 t16 = tcg_temp_new_i128(); 1047 1048 tcg_gen_ld_i64(tmphi, tcg_env, fp_reg_hi_offset(s, srcidx)); 1049 tcg_gen_concat_i64_i128(t16, tmplo, tmphi); 1050 1051 tcg_gen_qemu_st_i128(t16, tcg_addr, get_mem_index(s), mop); 1052 } 1053 } 1054 1055 /* 1056 * Load from memory to FP register 1057 */ 1058 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, MemOp mop) 1059 { 1060 /* This always zero-extends and writes to a full 128 bit wide vector */ 1061 TCGv_i64 tmplo = tcg_temp_new_i64(); 1062 TCGv_i64 tmphi = NULL; 1063 1064 if ((mop & MO_SIZE) < MO_128) { 1065 tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop); 1066 } else { 1067 TCGv_i128 t16 = tcg_temp_new_i128(); 1068 1069 tcg_gen_qemu_ld_i128(t16, tcg_addr, get_mem_index(s), mop); 1070 1071 tmphi = tcg_temp_new_i64(); 1072 tcg_gen_extr_i128_i64(tmplo, tmphi, t16); 1073 } 1074 1075 tcg_gen_st_i64(tmplo, tcg_env, fp_reg_offset(s, destidx, MO_64)); 1076 1077 if (tmphi) { 1078 tcg_gen_st_i64(tmphi, tcg_env, fp_reg_hi_offset(s, destidx)); 1079 } 1080 clear_vec_high(s, tmphi != NULL, destidx); 1081 } 1082 1083 /* 1084 * Vector load/store helpers. 1085 * 1086 * The principal difference between this and a FP load is that we don't 1087 * zero extend as we are filling a partial chunk of the vector register. 1088 * These functions don't support 128 bit loads/stores, which would be 1089 * normal load/store operations. 1090 * 1091 * The _i32 versions are useful when operating on 32 bit quantities 1092 * (eg for floating point single or using Neon helper functions). 1093 */ 1094 1095 /* Get value of an element within a vector register */ 1096 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx, 1097 int element, MemOp memop) 1098 { 1099 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE); 1100 switch ((unsigned)memop) { 1101 case MO_8: 1102 tcg_gen_ld8u_i64(tcg_dest, tcg_env, vect_off); 1103 break; 1104 case MO_16: 1105 tcg_gen_ld16u_i64(tcg_dest, tcg_env, vect_off); 1106 break; 1107 case MO_32: 1108 tcg_gen_ld32u_i64(tcg_dest, tcg_env, vect_off); 1109 break; 1110 case MO_8|MO_SIGN: 1111 tcg_gen_ld8s_i64(tcg_dest, tcg_env, vect_off); 1112 break; 1113 case MO_16|MO_SIGN: 1114 tcg_gen_ld16s_i64(tcg_dest, tcg_env, vect_off); 1115 break; 1116 case MO_32|MO_SIGN: 1117 tcg_gen_ld32s_i64(tcg_dest, tcg_env, vect_off); 1118 break; 1119 case MO_64: 1120 case MO_64|MO_SIGN: 1121 tcg_gen_ld_i64(tcg_dest, tcg_env, vect_off); 1122 break; 1123 default: 1124 g_assert_not_reached(); 1125 } 1126 } 1127 1128 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx, 1129 int element, MemOp memop) 1130 { 1131 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE); 1132 switch (memop) { 1133 case MO_8: 1134 tcg_gen_ld8u_i32(tcg_dest, tcg_env, vect_off); 1135 break; 1136 case MO_16: 1137 tcg_gen_ld16u_i32(tcg_dest, tcg_env, vect_off); 1138 break; 1139 case MO_8|MO_SIGN: 1140 tcg_gen_ld8s_i32(tcg_dest, tcg_env, vect_off); 1141 break; 1142 case MO_16|MO_SIGN: 1143 tcg_gen_ld16s_i32(tcg_dest, tcg_env, vect_off); 1144 break; 1145 case MO_32: 1146 case MO_32|MO_SIGN: 1147 tcg_gen_ld_i32(tcg_dest, tcg_env, vect_off); 1148 break; 1149 default: 1150 g_assert_not_reached(); 1151 } 1152 } 1153 1154 /* Set value of an element within a vector register */ 1155 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx, 1156 int element, MemOp memop) 1157 { 1158 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE); 1159 switch (memop) { 1160 case MO_8: 1161 tcg_gen_st8_i64(tcg_src, tcg_env, vect_off); 1162 break; 1163 case MO_16: 1164 tcg_gen_st16_i64(tcg_src, tcg_env, vect_off); 1165 break; 1166 case MO_32: 1167 tcg_gen_st32_i64(tcg_src, tcg_env, vect_off); 1168 break; 1169 case MO_64: 1170 tcg_gen_st_i64(tcg_src, tcg_env, vect_off); 1171 break; 1172 default: 1173 g_assert_not_reached(); 1174 } 1175 } 1176 1177 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src, 1178 int destidx, int element, MemOp memop) 1179 { 1180 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE); 1181 switch (memop) { 1182 case MO_8: 1183 tcg_gen_st8_i32(tcg_src, tcg_env, vect_off); 1184 break; 1185 case MO_16: 1186 tcg_gen_st16_i32(tcg_src, tcg_env, vect_off); 1187 break; 1188 case MO_32: 1189 tcg_gen_st_i32(tcg_src, tcg_env, vect_off); 1190 break; 1191 default: 1192 g_assert_not_reached(); 1193 } 1194 } 1195 1196 /* Store from vector register to memory */ 1197 static void do_vec_st(DisasContext *s, int srcidx, int element, 1198 TCGv_i64 tcg_addr, MemOp mop) 1199 { 1200 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 1201 1202 read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE); 1203 tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop); 1204 } 1205 1206 /* Load from memory to vector register */ 1207 static void do_vec_ld(DisasContext *s, int destidx, int element, 1208 TCGv_i64 tcg_addr, MemOp mop) 1209 { 1210 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 1211 1212 tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop); 1213 write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE); 1214 } 1215 1216 /* Check that FP/Neon access is enabled. If it is, return 1217 * true. If not, emit code to generate an appropriate exception, 1218 * and return false; the caller should not emit any code for 1219 * the instruction. Note that this check must happen after all 1220 * unallocated-encoding checks (otherwise the syndrome information 1221 * for the resulting exception will be incorrect). 1222 */ 1223 static bool fp_access_check_only(DisasContext *s) 1224 { 1225 if (s->fp_excp_el) { 1226 assert(!s->fp_access_checked); 1227 s->fp_access_checked = true; 1228 1229 gen_exception_insn_el(s, 0, EXCP_UDEF, 1230 syn_fp_access_trap(1, 0xe, false, 0), 1231 s->fp_excp_el); 1232 return false; 1233 } 1234 s->fp_access_checked = true; 1235 return true; 1236 } 1237 1238 static bool fp_access_check(DisasContext *s) 1239 { 1240 if (!fp_access_check_only(s)) { 1241 return false; 1242 } 1243 if (s->sme_trap_nonstreaming && s->is_nonstreaming) { 1244 gen_exception_insn(s, 0, EXCP_UDEF, 1245 syn_smetrap(SME_ET_Streaming, false)); 1246 return false; 1247 } 1248 return true; 1249 } 1250 1251 /* 1252 * Return <0 for non-supported element sizes, with MO_16 controlled by 1253 * FEAT_FP16; return 0 for fp disabled; otherwise return >0 for success. 1254 */ 1255 static int fp_access_check_scalar_hsd(DisasContext *s, MemOp esz) 1256 { 1257 switch (esz) { 1258 case MO_64: 1259 case MO_32: 1260 break; 1261 case MO_16: 1262 if (!dc_isar_feature(aa64_fp16, s)) { 1263 return -1; 1264 } 1265 break; 1266 default: 1267 return -1; 1268 } 1269 return fp_access_check(s); 1270 } 1271 1272 /* Likewise, but vector MO_64 must have two elements. */ 1273 static int fp_access_check_vector_hsd(DisasContext *s, bool is_q, MemOp esz) 1274 { 1275 switch (esz) { 1276 case MO_64: 1277 if (!is_q) { 1278 return -1; 1279 } 1280 break; 1281 case MO_32: 1282 break; 1283 case MO_16: 1284 if (!dc_isar_feature(aa64_fp16, s)) { 1285 return -1; 1286 } 1287 break; 1288 default: 1289 return -1; 1290 } 1291 return fp_access_check(s); 1292 } 1293 1294 /* 1295 * Check that SVE access is enabled. If it is, return true. 1296 * If not, emit code to generate an appropriate exception and return false. 1297 * This function corresponds to CheckSVEEnabled(). 1298 */ 1299 bool sve_access_check(DisasContext *s) 1300 { 1301 if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) { 1302 assert(dc_isar_feature(aa64_sme, s)); 1303 if (!sme_sm_enabled_check(s)) { 1304 goto fail_exit; 1305 } 1306 } else if (s->sve_excp_el) { 1307 gen_exception_insn_el(s, 0, EXCP_UDEF, 1308 syn_sve_access_trap(), s->sve_excp_el); 1309 goto fail_exit; 1310 } 1311 s->sve_access_checked = true; 1312 return fp_access_check(s); 1313 1314 fail_exit: 1315 /* Assert that we only raise one exception per instruction. */ 1316 assert(!s->sve_access_checked); 1317 s->sve_access_checked = true; 1318 return false; 1319 } 1320 1321 /* 1322 * Check that SME access is enabled, raise an exception if not. 1323 * Note that this function corresponds to CheckSMEAccess and is 1324 * only used directly for cpregs. 1325 */ 1326 static bool sme_access_check(DisasContext *s) 1327 { 1328 if (s->sme_excp_el) { 1329 gen_exception_insn_el(s, 0, EXCP_UDEF, 1330 syn_smetrap(SME_ET_AccessTrap, false), 1331 s->sme_excp_el); 1332 return false; 1333 } 1334 return true; 1335 } 1336 1337 /* This function corresponds to CheckSMEEnabled. */ 1338 bool sme_enabled_check(DisasContext *s) 1339 { 1340 /* 1341 * Note that unlike sve_excp_el, we have not constrained sme_excp_el 1342 * to be zero when fp_excp_el has priority. This is because we need 1343 * sme_excp_el by itself for cpregs access checks. 1344 */ 1345 if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) { 1346 s->fp_access_checked = true; 1347 return sme_access_check(s); 1348 } 1349 return fp_access_check_only(s); 1350 } 1351 1352 /* Common subroutine for CheckSMEAnd*Enabled. */ 1353 bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req) 1354 { 1355 if (!sme_enabled_check(s)) { 1356 return false; 1357 } 1358 if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) { 1359 gen_exception_insn(s, 0, EXCP_UDEF, 1360 syn_smetrap(SME_ET_NotStreaming, false)); 1361 return false; 1362 } 1363 if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) { 1364 gen_exception_insn(s, 0, EXCP_UDEF, 1365 syn_smetrap(SME_ET_InactiveZA, false)); 1366 return false; 1367 } 1368 return true; 1369 } 1370 1371 /* 1372 * Expanders for AdvSIMD translation functions. 1373 */ 1374 1375 static bool do_gvec_op2_ool(DisasContext *s, arg_qrr_e *a, int data, 1376 gen_helper_gvec_2 *fn) 1377 { 1378 if (!a->q && a->esz == MO_64) { 1379 return false; 1380 } 1381 if (fp_access_check(s)) { 1382 gen_gvec_op2_ool(s, a->q, a->rd, a->rn, data, fn); 1383 } 1384 return true; 1385 } 1386 1387 static bool do_gvec_op3_ool(DisasContext *s, arg_qrrr_e *a, int data, 1388 gen_helper_gvec_3 *fn) 1389 { 1390 if (!a->q && a->esz == MO_64) { 1391 return false; 1392 } 1393 if (fp_access_check(s)) { 1394 gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, data, fn); 1395 } 1396 return true; 1397 } 1398 1399 static bool do_gvec_fn3(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn) 1400 { 1401 if (!a->q && a->esz == MO_64) { 1402 return false; 1403 } 1404 if (fp_access_check(s)) { 1405 gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz); 1406 } 1407 return true; 1408 } 1409 1410 static bool do_gvec_fn3_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn) 1411 { 1412 if (a->esz == MO_64) { 1413 return false; 1414 } 1415 if (fp_access_check(s)) { 1416 gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz); 1417 } 1418 return true; 1419 } 1420 1421 static bool do_gvec_fn3_no8_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn) 1422 { 1423 if (a->esz == MO_8) { 1424 return false; 1425 } 1426 return do_gvec_fn3_no64(s, a, fn); 1427 } 1428 1429 static bool do_gvec_fn4(DisasContext *s, arg_qrrrr_e *a, GVecGen4Fn *fn) 1430 { 1431 if (!a->q && a->esz == MO_64) { 1432 return false; 1433 } 1434 if (fp_access_check(s)) { 1435 gen_gvec_fn4(s, a->q, a->rd, a->rn, a->rm, a->ra, fn, a->esz); 1436 } 1437 return true; 1438 } 1439 1440 /* 1441 * This utility function is for doing register extension with an 1442 * optional shift. You will likely want to pass a temporary for the 1443 * destination register. See DecodeRegExtend() in the ARM ARM. 1444 */ 1445 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in, 1446 int option, unsigned int shift) 1447 { 1448 int extsize = extract32(option, 0, 2); 1449 bool is_signed = extract32(option, 2, 1); 1450 1451 tcg_gen_ext_i64(tcg_out, tcg_in, extsize | (is_signed ? MO_SIGN : 0)); 1452 tcg_gen_shli_i64(tcg_out, tcg_out, shift); 1453 } 1454 1455 static inline void gen_check_sp_alignment(DisasContext *s) 1456 { 1457 /* The AArch64 architecture mandates that (if enabled via PSTATE 1458 * or SCTLR bits) there is a check that SP is 16-aligned on every 1459 * SP-relative load or store (with an exception generated if it is not). 1460 * In line with general QEMU practice regarding misaligned accesses, 1461 * we omit these checks for the sake of guest program performance. 1462 * This function is provided as a hook so we can more easily add these 1463 * checks in future (possibly as a "favour catching guest program bugs 1464 * over speed" user selectable option). 1465 */ 1466 } 1467 1468 /* 1469 * The instruction disassembly implemented here matches 1470 * the instruction encoding classifications in chapter C4 1471 * of the ARM Architecture Reference Manual (DDI0487B_a); 1472 * classification names and decode diagrams here should generally 1473 * match up with those in the manual. 1474 */ 1475 1476 static bool trans_B(DisasContext *s, arg_i *a) 1477 { 1478 reset_btype(s); 1479 gen_goto_tb(s, 0, a->imm); 1480 return true; 1481 } 1482 1483 static bool trans_BL(DisasContext *s, arg_i *a) 1484 { 1485 gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s)); 1486 reset_btype(s); 1487 gen_goto_tb(s, 0, a->imm); 1488 return true; 1489 } 1490 1491 1492 static bool trans_CBZ(DisasContext *s, arg_cbz *a) 1493 { 1494 DisasLabel match; 1495 TCGv_i64 tcg_cmp; 1496 1497 tcg_cmp = read_cpu_reg(s, a->rt, a->sf); 1498 reset_btype(s); 1499 1500 match = gen_disas_label(s); 1501 tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ, 1502 tcg_cmp, 0, match.label); 1503 gen_goto_tb(s, 0, 4); 1504 set_disas_label(s, match); 1505 gen_goto_tb(s, 1, a->imm); 1506 return true; 1507 } 1508 1509 static bool trans_TBZ(DisasContext *s, arg_tbz *a) 1510 { 1511 DisasLabel match; 1512 TCGv_i64 tcg_cmp; 1513 1514 tcg_cmp = tcg_temp_new_i64(); 1515 tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, a->rt), 1ULL << a->bitpos); 1516 1517 reset_btype(s); 1518 1519 match = gen_disas_label(s); 1520 tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ, 1521 tcg_cmp, 0, match.label); 1522 gen_goto_tb(s, 0, 4); 1523 set_disas_label(s, match); 1524 gen_goto_tb(s, 1, a->imm); 1525 return true; 1526 } 1527 1528 static bool trans_B_cond(DisasContext *s, arg_B_cond *a) 1529 { 1530 /* BC.cond is only present with FEAT_HBC */ 1531 if (a->c && !dc_isar_feature(aa64_hbc, s)) { 1532 return false; 1533 } 1534 reset_btype(s); 1535 if (a->cond < 0x0e) { 1536 /* genuinely conditional branches */ 1537 DisasLabel match = gen_disas_label(s); 1538 arm_gen_test_cc(a->cond, match.label); 1539 gen_goto_tb(s, 0, 4); 1540 set_disas_label(s, match); 1541 gen_goto_tb(s, 1, a->imm); 1542 } else { 1543 /* 0xe and 0xf are both "always" conditions */ 1544 gen_goto_tb(s, 0, a->imm); 1545 } 1546 return true; 1547 } 1548 1549 static void set_btype_for_br(DisasContext *s, int rn) 1550 { 1551 if (dc_isar_feature(aa64_bti, s)) { 1552 /* BR to {x16,x17} or !guard -> 1, else 3. */ 1553 if (rn == 16 || rn == 17) { 1554 set_btype(s, 1); 1555 } else { 1556 TCGv_i64 pc = tcg_temp_new_i64(); 1557 gen_pc_plus_diff(s, pc, 0); 1558 gen_helper_guarded_page_br(tcg_env, pc); 1559 s->btype = -1; 1560 } 1561 } 1562 } 1563 1564 static void set_btype_for_blr(DisasContext *s) 1565 { 1566 if (dc_isar_feature(aa64_bti, s)) { 1567 /* BLR sets BTYPE to 2, regardless of source guarded page. */ 1568 set_btype(s, 2); 1569 } 1570 } 1571 1572 static bool trans_BR(DisasContext *s, arg_r *a) 1573 { 1574 set_btype_for_br(s, a->rn); 1575 gen_a64_set_pc(s, cpu_reg(s, a->rn)); 1576 s->base.is_jmp = DISAS_JUMP; 1577 return true; 1578 } 1579 1580 static bool trans_BLR(DisasContext *s, arg_r *a) 1581 { 1582 TCGv_i64 dst = cpu_reg(s, a->rn); 1583 TCGv_i64 lr = cpu_reg(s, 30); 1584 if (dst == lr) { 1585 TCGv_i64 tmp = tcg_temp_new_i64(); 1586 tcg_gen_mov_i64(tmp, dst); 1587 dst = tmp; 1588 } 1589 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 1590 gen_a64_set_pc(s, dst); 1591 set_btype_for_blr(s); 1592 s->base.is_jmp = DISAS_JUMP; 1593 return true; 1594 } 1595 1596 static bool trans_RET(DisasContext *s, arg_r *a) 1597 { 1598 gen_a64_set_pc(s, cpu_reg(s, a->rn)); 1599 s->base.is_jmp = DISAS_JUMP; 1600 return true; 1601 } 1602 1603 static TCGv_i64 auth_branch_target(DisasContext *s, TCGv_i64 dst, 1604 TCGv_i64 modifier, bool use_key_a) 1605 { 1606 TCGv_i64 truedst; 1607 /* 1608 * Return the branch target for a BRAA/RETA/etc, which is either 1609 * just the destination dst, or that value with the pauth check 1610 * done and the code removed from the high bits. 1611 */ 1612 if (!s->pauth_active) { 1613 return dst; 1614 } 1615 1616 truedst = tcg_temp_new_i64(); 1617 if (use_key_a) { 1618 gen_helper_autia_combined(truedst, tcg_env, dst, modifier); 1619 } else { 1620 gen_helper_autib_combined(truedst, tcg_env, dst, modifier); 1621 } 1622 return truedst; 1623 } 1624 1625 static bool trans_BRAZ(DisasContext *s, arg_braz *a) 1626 { 1627 TCGv_i64 dst; 1628 1629 if (!dc_isar_feature(aa64_pauth, s)) { 1630 return false; 1631 } 1632 1633 dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m); 1634 set_btype_for_br(s, a->rn); 1635 gen_a64_set_pc(s, dst); 1636 s->base.is_jmp = DISAS_JUMP; 1637 return true; 1638 } 1639 1640 static bool trans_BLRAZ(DisasContext *s, arg_braz *a) 1641 { 1642 TCGv_i64 dst, lr; 1643 1644 if (!dc_isar_feature(aa64_pauth, s)) { 1645 return false; 1646 } 1647 1648 dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m); 1649 lr = cpu_reg(s, 30); 1650 if (dst == lr) { 1651 TCGv_i64 tmp = tcg_temp_new_i64(); 1652 tcg_gen_mov_i64(tmp, dst); 1653 dst = tmp; 1654 } 1655 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 1656 gen_a64_set_pc(s, dst); 1657 set_btype_for_blr(s); 1658 s->base.is_jmp = DISAS_JUMP; 1659 return true; 1660 } 1661 1662 static bool trans_RETA(DisasContext *s, arg_reta *a) 1663 { 1664 TCGv_i64 dst; 1665 1666 dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m); 1667 gen_a64_set_pc(s, dst); 1668 s->base.is_jmp = DISAS_JUMP; 1669 return true; 1670 } 1671 1672 static bool trans_BRA(DisasContext *s, arg_bra *a) 1673 { 1674 TCGv_i64 dst; 1675 1676 if (!dc_isar_feature(aa64_pauth, s)) { 1677 return false; 1678 } 1679 dst = auth_branch_target(s, cpu_reg(s,a->rn), cpu_reg_sp(s, a->rm), !a->m); 1680 gen_a64_set_pc(s, dst); 1681 set_btype_for_br(s, a->rn); 1682 s->base.is_jmp = DISAS_JUMP; 1683 return true; 1684 } 1685 1686 static bool trans_BLRA(DisasContext *s, arg_bra *a) 1687 { 1688 TCGv_i64 dst, lr; 1689 1690 if (!dc_isar_feature(aa64_pauth, s)) { 1691 return false; 1692 } 1693 dst = auth_branch_target(s, cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm), !a->m); 1694 lr = cpu_reg(s, 30); 1695 if (dst == lr) { 1696 TCGv_i64 tmp = tcg_temp_new_i64(); 1697 tcg_gen_mov_i64(tmp, dst); 1698 dst = tmp; 1699 } 1700 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 1701 gen_a64_set_pc(s, dst); 1702 set_btype_for_blr(s); 1703 s->base.is_jmp = DISAS_JUMP; 1704 return true; 1705 } 1706 1707 static bool trans_ERET(DisasContext *s, arg_ERET *a) 1708 { 1709 TCGv_i64 dst; 1710 1711 if (s->current_el == 0) { 1712 return false; 1713 } 1714 if (s->trap_eret) { 1715 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(0), 2); 1716 return true; 1717 } 1718 dst = tcg_temp_new_i64(); 1719 tcg_gen_ld_i64(dst, tcg_env, 1720 offsetof(CPUARMState, elr_el[s->current_el])); 1721 1722 translator_io_start(&s->base); 1723 1724 gen_helper_exception_return(tcg_env, dst); 1725 /* Must exit loop to check un-masked IRQs */ 1726 s->base.is_jmp = DISAS_EXIT; 1727 return true; 1728 } 1729 1730 static bool trans_ERETA(DisasContext *s, arg_reta *a) 1731 { 1732 TCGv_i64 dst; 1733 1734 if (!dc_isar_feature(aa64_pauth, s)) { 1735 return false; 1736 } 1737 if (s->current_el == 0) { 1738 return false; 1739 } 1740 /* The FGT trap takes precedence over an auth trap. */ 1741 if (s->trap_eret) { 1742 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(a->m ? 3 : 2), 2); 1743 return true; 1744 } 1745 dst = tcg_temp_new_i64(); 1746 tcg_gen_ld_i64(dst, tcg_env, 1747 offsetof(CPUARMState, elr_el[s->current_el])); 1748 1749 dst = auth_branch_target(s, dst, cpu_X[31], !a->m); 1750 1751 translator_io_start(&s->base); 1752 1753 gen_helper_exception_return(tcg_env, dst); 1754 /* Must exit loop to check un-masked IRQs */ 1755 s->base.is_jmp = DISAS_EXIT; 1756 return true; 1757 } 1758 1759 static bool trans_NOP(DisasContext *s, arg_NOP *a) 1760 { 1761 return true; 1762 } 1763 1764 static bool trans_YIELD(DisasContext *s, arg_YIELD *a) 1765 { 1766 /* 1767 * When running in MTTCG we don't generate jumps to the yield and 1768 * WFE helpers as it won't affect the scheduling of other vCPUs. 1769 * If we wanted to more completely model WFE/SEV so we don't busy 1770 * spin unnecessarily we would need to do something more involved. 1771 */ 1772 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1773 s->base.is_jmp = DISAS_YIELD; 1774 } 1775 return true; 1776 } 1777 1778 static bool trans_WFI(DisasContext *s, arg_WFI *a) 1779 { 1780 s->base.is_jmp = DISAS_WFI; 1781 return true; 1782 } 1783 1784 static bool trans_WFE(DisasContext *s, arg_WFI *a) 1785 { 1786 /* 1787 * When running in MTTCG we don't generate jumps to the yield and 1788 * WFE helpers as it won't affect the scheduling of other vCPUs. 1789 * If we wanted to more completely model WFE/SEV so we don't busy 1790 * spin unnecessarily we would need to do something more involved. 1791 */ 1792 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1793 s->base.is_jmp = DISAS_WFE; 1794 } 1795 return true; 1796 } 1797 1798 static bool trans_WFIT(DisasContext *s, arg_WFIT *a) 1799 { 1800 if (!dc_isar_feature(aa64_wfxt, s)) { 1801 return false; 1802 } 1803 1804 /* 1805 * Because we need to pass the register value to the helper, 1806 * it's easier to emit the code now, unlike trans_WFI which 1807 * defers it to aarch64_tr_tb_stop(). That means we need to 1808 * check ss_active so that single-stepping a WFIT doesn't halt. 1809 */ 1810 if (s->ss_active) { 1811 /* Act like a NOP under architectural singlestep */ 1812 return true; 1813 } 1814 1815 gen_a64_update_pc(s, 4); 1816 gen_helper_wfit(tcg_env, cpu_reg(s, a->rd)); 1817 /* Go back to the main loop to check for interrupts */ 1818 s->base.is_jmp = DISAS_EXIT; 1819 return true; 1820 } 1821 1822 static bool trans_WFET(DisasContext *s, arg_WFET *a) 1823 { 1824 if (!dc_isar_feature(aa64_wfxt, s)) { 1825 return false; 1826 } 1827 1828 /* 1829 * We rely here on our WFE implementation being a NOP, so we 1830 * don't need to do anything different to handle the WFET timeout 1831 * from what trans_WFE does. 1832 */ 1833 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1834 s->base.is_jmp = DISAS_WFE; 1835 } 1836 return true; 1837 } 1838 1839 static bool trans_XPACLRI(DisasContext *s, arg_XPACLRI *a) 1840 { 1841 if (s->pauth_active) { 1842 gen_helper_xpaci(cpu_X[30], tcg_env, cpu_X[30]); 1843 } 1844 return true; 1845 } 1846 1847 static bool trans_PACIA1716(DisasContext *s, arg_PACIA1716 *a) 1848 { 1849 if (s->pauth_active) { 1850 gen_helper_pacia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 1851 } 1852 return true; 1853 } 1854 1855 static bool trans_PACIB1716(DisasContext *s, arg_PACIB1716 *a) 1856 { 1857 if (s->pauth_active) { 1858 gen_helper_pacib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 1859 } 1860 return true; 1861 } 1862 1863 static bool trans_AUTIA1716(DisasContext *s, arg_AUTIA1716 *a) 1864 { 1865 if (s->pauth_active) { 1866 gen_helper_autia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 1867 } 1868 return true; 1869 } 1870 1871 static bool trans_AUTIB1716(DisasContext *s, arg_AUTIB1716 *a) 1872 { 1873 if (s->pauth_active) { 1874 gen_helper_autib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 1875 } 1876 return true; 1877 } 1878 1879 static bool trans_ESB(DisasContext *s, arg_ESB *a) 1880 { 1881 /* Without RAS, we must implement this as NOP. */ 1882 if (dc_isar_feature(aa64_ras, s)) { 1883 /* 1884 * QEMU does not have a source of physical SErrors, 1885 * so we are only concerned with virtual SErrors. 1886 * The pseudocode in the ARM for this case is 1887 * if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then 1888 * AArch64.vESBOperation(); 1889 * Most of the condition can be evaluated at translation time. 1890 * Test for EL2 present, and defer test for SEL2 to runtime. 1891 */ 1892 if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) { 1893 gen_helper_vesb(tcg_env); 1894 } 1895 } 1896 return true; 1897 } 1898 1899 static bool trans_PACIAZ(DisasContext *s, arg_PACIAZ *a) 1900 { 1901 if (s->pauth_active) { 1902 gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 1903 } 1904 return true; 1905 } 1906 1907 static bool trans_PACIASP(DisasContext *s, arg_PACIASP *a) 1908 { 1909 if (s->pauth_active) { 1910 gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 1911 } 1912 return true; 1913 } 1914 1915 static bool trans_PACIBZ(DisasContext *s, arg_PACIBZ *a) 1916 { 1917 if (s->pauth_active) { 1918 gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 1919 } 1920 return true; 1921 } 1922 1923 static bool trans_PACIBSP(DisasContext *s, arg_PACIBSP *a) 1924 { 1925 if (s->pauth_active) { 1926 gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 1927 } 1928 return true; 1929 } 1930 1931 static bool trans_AUTIAZ(DisasContext *s, arg_AUTIAZ *a) 1932 { 1933 if (s->pauth_active) { 1934 gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 1935 } 1936 return true; 1937 } 1938 1939 static bool trans_AUTIASP(DisasContext *s, arg_AUTIASP *a) 1940 { 1941 if (s->pauth_active) { 1942 gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 1943 } 1944 return true; 1945 } 1946 1947 static bool trans_AUTIBZ(DisasContext *s, arg_AUTIBZ *a) 1948 { 1949 if (s->pauth_active) { 1950 gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 1951 } 1952 return true; 1953 } 1954 1955 static bool trans_AUTIBSP(DisasContext *s, arg_AUTIBSP *a) 1956 { 1957 if (s->pauth_active) { 1958 gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 1959 } 1960 return true; 1961 } 1962 1963 static bool trans_CLREX(DisasContext *s, arg_CLREX *a) 1964 { 1965 tcg_gen_movi_i64(cpu_exclusive_addr, -1); 1966 return true; 1967 } 1968 1969 static bool trans_DSB_DMB(DisasContext *s, arg_DSB_DMB *a) 1970 { 1971 /* We handle DSB and DMB the same way */ 1972 TCGBar bar; 1973 1974 switch (a->types) { 1975 case 1: /* MBReqTypes_Reads */ 1976 bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST; 1977 break; 1978 case 2: /* MBReqTypes_Writes */ 1979 bar = TCG_BAR_SC | TCG_MO_ST_ST; 1980 break; 1981 default: /* MBReqTypes_All */ 1982 bar = TCG_BAR_SC | TCG_MO_ALL; 1983 break; 1984 } 1985 tcg_gen_mb(bar); 1986 return true; 1987 } 1988 1989 static bool trans_ISB(DisasContext *s, arg_ISB *a) 1990 { 1991 /* 1992 * We need to break the TB after this insn to execute 1993 * self-modifying code correctly and also to take 1994 * any pending interrupts immediately. 1995 */ 1996 reset_btype(s); 1997 gen_goto_tb(s, 0, 4); 1998 return true; 1999 } 2000 2001 static bool trans_SB(DisasContext *s, arg_SB *a) 2002 { 2003 if (!dc_isar_feature(aa64_sb, s)) { 2004 return false; 2005 } 2006 /* 2007 * TODO: There is no speculation barrier opcode for TCG; 2008 * MB and end the TB instead. 2009 */ 2010 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC); 2011 gen_goto_tb(s, 0, 4); 2012 return true; 2013 } 2014 2015 static bool trans_CFINV(DisasContext *s, arg_CFINV *a) 2016 { 2017 if (!dc_isar_feature(aa64_condm_4, s)) { 2018 return false; 2019 } 2020 tcg_gen_xori_i32(cpu_CF, cpu_CF, 1); 2021 return true; 2022 } 2023 2024 static bool trans_XAFLAG(DisasContext *s, arg_XAFLAG *a) 2025 { 2026 TCGv_i32 z; 2027 2028 if (!dc_isar_feature(aa64_condm_5, s)) { 2029 return false; 2030 } 2031 2032 z = tcg_temp_new_i32(); 2033 2034 tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0); 2035 2036 /* 2037 * (!C & !Z) << 31 2038 * (!(C | Z)) << 31 2039 * ~((C | Z) << 31) 2040 * ~-(C | Z) 2041 * (C | Z) - 1 2042 */ 2043 tcg_gen_or_i32(cpu_NF, cpu_CF, z); 2044 tcg_gen_subi_i32(cpu_NF, cpu_NF, 1); 2045 2046 /* !(Z & C) */ 2047 tcg_gen_and_i32(cpu_ZF, z, cpu_CF); 2048 tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1); 2049 2050 /* (!C & Z) << 31 -> -(Z & ~C) */ 2051 tcg_gen_andc_i32(cpu_VF, z, cpu_CF); 2052 tcg_gen_neg_i32(cpu_VF, cpu_VF); 2053 2054 /* C | Z */ 2055 tcg_gen_or_i32(cpu_CF, cpu_CF, z); 2056 2057 return true; 2058 } 2059 2060 static bool trans_AXFLAG(DisasContext *s, arg_AXFLAG *a) 2061 { 2062 if (!dc_isar_feature(aa64_condm_5, s)) { 2063 return false; 2064 } 2065 2066 tcg_gen_sari_i32(cpu_VF, cpu_VF, 31); /* V ? -1 : 0 */ 2067 tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF); /* C & !V */ 2068 2069 /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */ 2070 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF); 2071 2072 tcg_gen_movi_i32(cpu_NF, 0); 2073 tcg_gen_movi_i32(cpu_VF, 0); 2074 2075 return true; 2076 } 2077 2078 static bool trans_MSR_i_UAO(DisasContext *s, arg_i *a) 2079 { 2080 if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) { 2081 return false; 2082 } 2083 if (a->imm & 1) { 2084 set_pstate_bits(PSTATE_UAO); 2085 } else { 2086 clear_pstate_bits(PSTATE_UAO); 2087 } 2088 gen_rebuild_hflags(s); 2089 s->base.is_jmp = DISAS_TOO_MANY; 2090 return true; 2091 } 2092 2093 static bool trans_MSR_i_PAN(DisasContext *s, arg_i *a) 2094 { 2095 if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) { 2096 return false; 2097 } 2098 if (a->imm & 1) { 2099 set_pstate_bits(PSTATE_PAN); 2100 } else { 2101 clear_pstate_bits(PSTATE_PAN); 2102 } 2103 gen_rebuild_hflags(s); 2104 s->base.is_jmp = DISAS_TOO_MANY; 2105 return true; 2106 } 2107 2108 static bool trans_MSR_i_SPSEL(DisasContext *s, arg_i *a) 2109 { 2110 if (s->current_el == 0) { 2111 return false; 2112 } 2113 gen_helper_msr_i_spsel(tcg_env, tcg_constant_i32(a->imm & PSTATE_SP)); 2114 s->base.is_jmp = DISAS_TOO_MANY; 2115 return true; 2116 } 2117 2118 static bool trans_MSR_i_SBSS(DisasContext *s, arg_i *a) 2119 { 2120 if (!dc_isar_feature(aa64_ssbs, s)) { 2121 return false; 2122 } 2123 if (a->imm & 1) { 2124 set_pstate_bits(PSTATE_SSBS); 2125 } else { 2126 clear_pstate_bits(PSTATE_SSBS); 2127 } 2128 /* Don't need to rebuild hflags since SSBS is a nop */ 2129 s->base.is_jmp = DISAS_TOO_MANY; 2130 return true; 2131 } 2132 2133 static bool trans_MSR_i_DIT(DisasContext *s, arg_i *a) 2134 { 2135 if (!dc_isar_feature(aa64_dit, s)) { 2136 return false; 2137 } 2138 if (a->imm & 1) { 2139 set_pstate_bits(PSTATE_DIT); 2140 } else { 2141 clear_pstate_bits(PSTATE_DIT); 2142 } 2143 /* There's no need to rebuild hflags because DIT is a nop */ 2144 s->base.is_jmp = DISAS_TOO_MANY; 2145 return true; 2146 } 2147 2148 static bool trans_MSR_i_TCO(DisasContext *s, arg_i *a) 2149 { 2150 if (dc_isar_feature(aa64_mte, s)) { 2151 /* Full MTE is enabled -- set the TCO bit as directed. */ 2152 if (a->imm & 1) { 2153 set_pstate_bits(PSTATE_TCO); 2154 } else { 2155 clear_pstate_bits(PSTATE_TCO); 2156 } 2157 gen_rebuild_hflags(s); 2158 /* Many factors, including TCO, go into MTE_ACTIVE. */ 2159 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 2160 return true; 2161 } else if (dc_isar_feature(aa64_mte_insn_reg, s)) { 2162 /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI. */ 2163 return true; 2164 } else { 2165 /* Insn not present */ 2166 return false; 2167 } 2168 } 2169 2170 static bool trans_MSR_i_DAIFSET(DisasContext *s, arg_i *a) 2171 { 2172 gen_helper_msr_i_daifset(tcg_env, tcg_constant_i32(a->imm)); 2173 s->base.is_jmp = DISAS_TOO_MANY; 2174 return true; 2175 } 2176 2177 static bool trans_MSR_i_DAIFCLEAR(DisasContext *s, arg_i *a) 2178 { 2179 gen_helper_msr_i_daifclear(tcg_env, tcg_constant_i32(a->imm)); 2180 /* Exit the cpu loop to re-evaluate pending IRQs. */ 2181 s->base.is_jmp = DISAS_UPDATE_EXIT; 2182 return true; 2183 } 2184 2185 static bool trans_MSR_i_ALLINT(DisasContext *s, arg_i *a) 2186 { 2187 if (!dc_isar_feature(aa64_nmi, s) || s->current_el == 0) { 2188 return false; 2189 } 2190 2191 if (a->imm == 0) { 2192 clear_pstate_bits(PSTATE_ALLINT); 2193 } else if (s->current_el > 1) { 2194 set_pstate_bits(PSTATE_ALLINT); 2195 } else { 2196 gen_helper_msr_set_allint_el1(tcg_env); 2197 } 2198 2199 /* Exit the cpu loop to re-evaluate pending IRQs. */ 2200 s->base.is_jmp = DISAS_UPDATE_EXIT; 2201 return true; 2202 } 2203 2204 static bool trans_MSR_i_SVCR(DisasContext *s, arg_MSR_i_SVCR *a) 2205 { 2206 if (!dc_isar_feature(aa64_sme, s) || a->mask == 0) { 2207 return false; 2208 } 2209 if (sme_access_check(s)) { 2210 int old = s->pstate_sm | (s->pstate_za << 1); 2211 int new = a->imm * 3; 2212 2213 if ((old ^ new) & a->mask) { 2214 /* At least one bit changes. */ 2215 gen_helper_set_svcr(tcg_env, tcg_constant_i32(new), 2216 tcg_constant_i32(a->mask)); 2217 s->base.is_jmp = DISAS_TOO_MANY; 2218 } 2219 } 2220 return true; 2221 } 2222 2223 static void gen_get_nzcv(TCGv_i64 tcg_rt) 2224 { 2225 TCGv_i32 tmp = tcg_temp_new_i32(); 2226 TCGv_i32 nzcv = tcg_temp_new_i32(); 2227 2228 /* build bit 31, N */ 2229 tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31)); 2230 /* build bit 30, Z */ 2231 tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0); 2232 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1); 2233 /* build bit 29, C */ 2234 tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1); 2235 /* build bit 28, V */ 2236 tcg_gen_shri_i32(tmp, cpu_VF, 31); 2237 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1); 2238 /* generate result */ 2239 tcg_gen_extu_i32_i64(tcg_rt, nzcv); 2240 } 2241 2242 static void gen_set_nzcv(TCGv_i64 tcg_rt) 2243 { 2244 TCGv_i32 nzcv = tcg_temp_new_i32(); 2245 2246 /* take NZCV from R[t] */ 2247 tcg_gen_extrl_i64_i32(nzcv, tcg_rt); 2248 2249 /* bit 31, N */ 2250 tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31)); 2251 /* bit 30, Z */ 2252 tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30)); 2253 tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0); 2254 /* bit 29, C */ 2255 tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29)); 2256 tcg_gen_shri_i32(cpu_CF, cpu_CF, 29); 2257 /* bit 28, V */ 2258 tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28)); 2259 tcg_gen_shli_i32(cpu_VF, cpu_VF, 3); 2260 } 2261 2262 static void gen_sysreg_undef(DisasContext *s, bool isread, 2263 uint8_t op0, uint8_t op1, uint8_t op2, 2264 uint8_t crn, uint8_t crm, uint8_t rt) 2265 { 2266 /* 2267 * Generate code to emit an UNDEF with correct syndrome 2268 * information for a failed system register access. 2269 * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases, 2270 * but if FEAT_IDST is implemented then read accesses to registers 2271 * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP 2272 * syndrome. 2273 */ 2274 uint32_t syndrome; 2275 2276 if (isread && dc_isar_feature(aa64_ids, s) && 2277 arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) { 2278 syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); 2279 } else { 2280 syndrome = syn_uncategorized(); 2281 } 2282 gen_exception_insn(s, 0, EXCP_UDEF, syndrome); 2283 } 2284 2285 /* MRS - move from system register 2286 * MSR (register) - move to system register 2287 * SYS 2288 * SYSL 2289 * These are all essentially the same insn in 'read' and 'write' 2290 * versions, with varying op0 fields. 2291 */ 2292 static void handle_sys(DisasContext *s, bool isread, 2293 unsigned int op0, unsigned int op1, unsigned int op2, 2294 unsigned int crn, unsigned int crm, unsigned int rt) 2295 { 2296 uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, 2297 crn, crm, op0, op1, op2); 2298 const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key); 2299 bool need_exit_tb = false; 2300 bool nv_trap_to_el2 = false; 2301 bool nv_redirect_reg = false; 2302 bool skip_fp_access_checks = false; 2303 bool nv2_mem_redirect = false; 2304 TCGv_ptr tcg_ri = NULL; 2305 TCGv_i64 tcg_rt; 2306 uint32_t syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); 2307 2308 if (crn == 11 || crn == 15) { 2309 /* 2310 * Check for TIDCP trap, which must take precedence over 2311 * the UNDEF for "no such register" etc. 2312 */ 2313 switch (s->current_el) { 2314 case 0: 2315 if (dc_isar_feature(aa64_tidcp1, s)) { 2316 gen_helper_tidcp_el0(tcg_env, tcg_constant_i32(syndrome)); 2317 } 2318 break; 2319 case 1: 2320 gen_helper_tidcp_el1(tcg_env, tcg_constant_i32(syndrome)); 2321 break; 2322 } 2323 } 2324 2325 if (!ri) { 2326 /* Unknown register; this might be a guest error or a QEMU 2327 * unimplemented feature. 2328 */ 2329 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 " 2330 "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n", 2331 isread ? "read" : "write", op0, op1, crn, crm, op2); 2332 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt); 2333 return; 2334 } 2335 2336 if (s->nv2 && ri->nv2_redirect_offset) { 2337 /* 2338 * Some registers always redirect to memory; some only do so if 2339 * HCR_EL2.NV1 is 0, and some only if NV1 is 1 (these come in 2340 * pairs which share an offset; see the table in R_CSRPQ). 2341 */ 2342 if (ri->nv2_redirect_offset & NV2_REDIR_NV1) { 2343 nv2_mem_redirect = s->nv1; 2344 } else if (ri->nv2_redirect_offset & NV2_REDIR_NO_NV1) { 2345 nv2_mem_redirect = !s->nv1; 2346 } else { 2347 nv2_mem_redirect = true; 2348 } 2349 } 2350 2351 /* Check access permissions */ 2352 if (!cp_access_ok(s->current_el, ri, isread)) { 2353 /* 2354 * FEAT_NV/NV2 handling does not do the usual FP access checks 2355 * for registers only accessible at EL2 (though it *does* do them 2356 * for registers accessible at EL1). 2357 */ 2358 skip_fp_access_checks = true; 2359 if (s->nv2 && (ri->type & ARM_CP_NV2_REDIRECT)) { 2360 /* 2361 * This is one of the few EL2 registers which should redirect 2362 * to the equivalent EL1 register. We do that after running 2363 * the EL2 register's accessfn. 2364 */ 2365 nv_redirect_reg = true; 2366 assert(!nv2_mem_redirect); 2367 } else if (nv2_mem_redirect) { 2368 /* 2369 * NV2 redirect-to-memory takes precedence over trap to EL2 or 2370 * UNDEF to EL1. 2371 */ 2372 } else if (s->nv && arm_cpreg_traps_in_nv(ri)) { 2373 /* 2374 * This register / instruction exists and is an EL2 register, so 2375 * we must trap to EL2 if accessed in nested virtualization EL1 2376 * instead of UNDEFing. We'll do that after the usual access checks. 2377 * (This makes a difference only for a couple of registers like 2378 * VSTTBR_EL2 where the "UNDEF if NonSecure" should take priority 2379 * over the trap-to-EL2. Most trapped-by-FEAT_NV registers have 2380 * an accessfn which does nothing when called from EL1, because 2381 * the trap-to-EL3 controls which would apply to that register 2382 * at EL2 don't take priority over the FEAT_NV trap-to-EL2.) 2383 */ 2384 nv_trap_to_el2 = true; 2385 } else { 2386 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt); 2387 return; 2388 } 2389 } 2390 2391 if (ri->accessfn || (ri->fgt && s->fgt_active)) { 2392 /* Emit code to perform further access permissions checks at 2393 * runtime; this may result in an exception. 2394 */ 2395 gen_a64_update_pc(s, 0); 2396 tcg_ri = tcg_temp_new_ptr(); 2397 gen_helper_access_check_cp_reg(tcg_ri, tcg_env, 2398 tcg_constant_i32(key), 2399 tcg_constant_i32(syndrome), 2400 tcg_constant_i32(isread)); 2401 } else if (ri->type & ARM_CP_RAISES_EXC) { 2402 /* 2403 * The readfn or writefn might raise an exception; 2404 * synchronize the CPU state in case it does. 2405 */ 2406 gen_a64_update_pc(s, 0); 2407 } 2408 2409 if (!skip_fp_access_checks) { 2410 if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) { 2411 return; 2412 } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) { 2413 return; 2414 } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) { 2415 return; 2416 } 2417 } 2418 2419 if (nv_trap_to_el2) { 2420 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2); 2421 return; 2422 } 2423 2424 if (nv_redirect_reg) { 2425 /* 2426 * FEAT_NV2 redirection of an EL2 register to an EL1 register. 2427 * Conveniently in all cases the encoding of the EL1 register is 2428 * identical to the EL2 register except that opc1 is 0. 2429 * Get the reginfo for the EL1 register to use for the actual access. 2430 * We don't use the EL1 register's access function, and 2431 * fine-grained-traps on EL1 also do not apply here. 2432 */ 2433 key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, 2434 crn, crm, op0, 0, op2); 2435 ri = get_arm_cp_reginfo(s->cp_regs, key); 2436 assert(ri); 2437 assert(cp_access_ok(s->current_el, ri, isread)); 2438 /* 2439 * We might not have done an update_pc earlier, so check we don't 2440 * need it. We could support this in future if necessary. 2441 */ 2442 assert(!(ri->type & ARM_CP_RAISES_EXC)); 2443 } 2444 2445 if (nv2_mem_redirect) { 2446 /* 2447 * This system register is being redirected into an EL2 memory access. 2448 * This means it is not an IO operation, doesn't change hflags, 2449 * and need not end the TB, because it has no side effects. 2450 * 2451 * The access is 64-bit single copy atomic, guaranteed aligned because 2452 * of the definition of VCNR_EL2. Its endianness depends on 2453 * SCTLR_EL2.EE, not on the data endianness of EL1. 2454 * It is done under either the EL2 translation regime or the EL2&0 2455 * translation regime, depending on HCR_EL2.E2H. It behaves as if 2456 * PSTATE.PAN is 0. 2457 */ 2458 TCGv_i64 ptr = tcg_temp_new_i64(); 2459 MemOp mop = MO_64 | MO_ALIGN | MO_ATOM_IFALIGN; 2460 ARMMMUIdx armmemidx = s->nv2_mem_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_E2; 2461 int memidx = arm_to_core_mmu_idx(armmemidx); 2462 uint32_t syn; 2463 2464 mop |= (s->nv2_mem_be ? MO_BE : MO_LE); 2465 2466 tcg_gen_ld_i64(ptr, tcg_env, offsetof(CPUARMState, cp15.vncr_el2)); 2467 tcg_gen_addi_i64(ptr, ptr, 2468 (ri->nv2_redirect_offset & ~NV2_REDIR_FLAG_MASK)); 2469 tcg_rt = cpu_reg(s, rt); 2470 2471 syn = syn_data_abort_vncr(0, !isread, 0); 2472 disas_set_insn_syndrome(s, syn); 2473 if (isread) { 2474 tcg_gen_qemu_ld_i64(tcg_rt, ptr, memidx, mop); 2475 } else { 2476 tcg_gen_qemu_st_i64(tcg_rt, ptr, memidx, mop); 2477 } 2478 return; 2479 } 2480 2481 /* Handle special cases first */ 2482 switch (ri->type & ARM_CP_SPECIAL_MASK) { 2483 case 0: 2484 break; 2485 case ARM_CP_NOP: 2486 return; 2487 case ARM_CP_NZCV: 2488 tcg_rt = cpu_reg(s, rt); 2489 if (isread) { 2490 gen_get_nzcv(tcg_rt); 2491 } else { 2492 gen_set_nzcv(tcg_rt); 2493 } 2494 return; 2495 case ARM_CP_CURRENTEL: 2496 { 2497 /* 2498 * Reads as current EL value from pstate, which is 2499 * guaranteed to be constant by the tb flags. 2500 * For nested virt we should report EL2. 2501 */ 2502 int el = s->nv ? 2 : s->current_el; 2503 tcg_rt = cpu_reg(s, rt); 2504 tcg_gen_movi_i64(tcg_rt, el << 2); 2505 return; 2506 } 2507 case ARM_CP_DC_ZVA: 2508 /* Writes clear the aligned block of memory which rt points into. */ 2509 if (s->mte_active[0]) { 2510 int desc = 0; 2511 2512 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 2513 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 2514 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 2515 2516 tcg_rt = tcg_temp_new_i64(); 2517 gen_helper_mte_check_zva(tcg_rt, tcg_env, 2518 tcg_constant_i32(desc), cpu_reg(s, rt)); 2519 } else { 2520 tcg_rt = clean_data_tbi(s, cpu_reg(s, rt)); 2521 } 2522 gen_helper_dc_zva(tcg_env, tcg_rt); 2523 return; 2524 case ARM_CP_DC_GVA: 2525 { 2526 TCGv_i64 clean_addr, tag; 2527 2528 /* 2529 * DC_GVA, like DC_ZVA, requires that we supply the original 2530 * pointer for an invalid page. Probe that address first. 2531 */ 2532 tcg_rt = cpu_reg(s, rt); 2533 clean_addr = clean_data_tbi(s, tcg_rt); 2534 gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8); 2535 2536 if (s->ata[0]) { 2537 /* Extract the tag from the register to match STZGM. */ 2538 tag = tcg_temp_new_i64(); 2539 tcg_gen_shri_i64(tag, tcg_rt, 56); 2540 gen_helper_stzgm_tags(tcg_env, clean_addr, tag); 2541 } 2542 } 2543 return; 2544 case ARM_CP_DC_GZVA: 2545 { 2546 TCGv_i64 clean_addr, tag; 2547 2548 /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */ 2549 tcg_rt = cpu_reg(s, rt); 2550 clean_addr = clean_data_tbi(s, tcg_rt); 2551 gen_helper_dc_zva(tcg_env, clean_addr); 2552 2553 if (s->ata[0]) { 2554 /* Extract the tag from the register to match STZGM. */ 2555 tag = tcg_temp_new_i64(); 2556 tcg_gen_shri_i64(tag, tcg_rt, 56); 2557 gen_helper_stzgm_tags(tcg_env, clean_addr, tag); 2558 } 2559 } 2560 return; 2561 default: 2562 g_assert_not_reached(); 2563 } 2564 2565 if (ri->type & ARM_CP_IO) { 2566 /* I/O operations must end the TB here (whether read or write) */ 2567 need_exit_tb = translator_io_start(&s->base); 2568 } 2569 2570 tcg_rt = cpu_reg(s, rt); 2571 2572 if (isread) { 2573 if (ri->type & ARM_CP_CONST) { 2574 tcg_gen_movi_i64(tcg_rt, ri->resetvalue); 2575 } else if (ri->readfn) { 2576 if (!tcg_ri) { 2577 tcg_ri = gen_lookup_cp_reg(key); 2578 } 2579 gen_helper_get_cp_reg64(tcg_rt, tcg_env, tcg_ri); 2580 } else { 2581 tcg_gen_ld_i64(tcg_rt, tcg_env, ri->fieldoffset); 2582 } 2583 } else { 2584 if (ri->type & ARM_CP_CONST) { 2585 /* If not forbidden by access permissions, treat as WI */ 2586 return; 2587 } else if (ri->writefn) { 2588 if (!tcg_ri) { 2589 tcg_ri = gen_lookup_cp_reg(key); 2590 } 2591 gen_helper_set_cp_reg64(tcg_env, tcg_ri, tcg_rt); 2592 } else { 2593 tcg_gen_st_i64(tcg_rt, tcg_env, ri->fieldoffset); 2594 } 2595 } 2596 2597 if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) { 2598 /* 2599 * A write to any coprocessor register that ends a TB 2600 * must rebuild the hflags for the next TB. 2601 */ 2602 gen_rebuild_hflags(s); 2603 /* 2604 * We default to ending the TB on a coprocessor register write, 2605 * but allow this to be suppressed by the register definition 2606 * (usually only necessary to work around guest bugs). 2607 */ 2608 need_exit_tb = true; 2609 } 2610 if (need_exit_tb) { 2611 s->base.is_jmp = DISAS_UPDATE_EXIT; 2612 } 2613 } 2614 2615 static bool trans_SYS(DisasContext *s, arg_SYS *a) 2616 { 2617 handle_sys(s, a->l, a->op0, a->op1, a->op2, a->crn, a->crm, a->rt); 2618 return true; 2619 } 2620 2621 static bool trans_SVC(DisasContext *s, arg_i *a) 2622 { 2623 /* 2624 * For SVC, HVC and SMC we advance the single-step state 2625 * machine before taking the exception. This is architecturally 2626 * mandated, to ensure that single-stepping a system call 2627 * instruction works properly. 2628 */ 2629 uint32_t syndrome = syn_aa64_svc(a->imm); 2630 if (s->fgt_svc) { 2631 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2); 2632 return true; 2633 } 2634 gen_ss_advance(s); 2635 gen_exception_insn(s, 4, EXCP_SWI, syndrome); 2636 return true; 2637 } 2638 2639 static bool trans_HVC(DisasContext *s, arg_i *a) 2640 { 2641 int target_el = s->current_el == 3 ? 3 : 2; 2642 2643 if (s->current_el == 0) { 2644 unallocated_encoding(s); 2645 return true; 2646 } 2647 /* 2648 * The pre HVC helper handles cases when HVC gets trapped 2649 * as an undefined insn by runtime configuration. 2650 */ 2651 gen_a64_update_pc(s, 0); 2652 gen_helper_pre_hvc(tcg_env); 2653 /* Architecture requires ss advance before we do the actual work */ 2654 gen_ss_advance(s); 2655 gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(a->imm), target_el); 2656 return true; 2657 } 2658 2659 static bool trans_SMC(DisasContext *s, arg_i *a) 2660 { 2661 if (s->current_el == 0) { 2662 unallocated_encoding(s); 2663 return true; 2664 } 2665 gen_a64_update_pc(s, 0); 2666 gen_helper_pre_smc(tcg_env, tcg_constant_i32(syn_aa64_smc(a->imm))); 2667 /* Architecture requires ss advance before we do the actual work */ 2668 gen_ss_advance(s); 2669 gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(a->imm), 3); 2670 return true; 2671 } 2672 2673 static bool trans_BRK(DisasContext *s, arg_i *a) 2674 { 2675 gen_exception_bkpt_insn(s, syn_aa64_bkpt(a->imm)); 2676 return true; 2677 } 2678 2679 static bool trans_HLT(DisasContext *s, arg_i *a) 2680 { 2681 /* 2682 * HLT. This has two purposes. 2683 * Architecturally, it is an external halting debug instruction. 2684 * Since QEMU doesn't implement external debug, we treat this as 2685 * it is required for halting debug disabled: it will UNDEF. 2686 * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction. 2687 */ 2688 if (semihosting_enabled(s->current_el == 0) && a->imm == 0xf000) { 2689 gen_exception_internal_insn(s, EXCP_SEMIHOST); 2690 } else { 2691 unallocated_encoding(s); 2692 } 2693 return true; 2694 } 2695 2696 /* 2697 * Load/Store exclusive instructions are implemented by remembering 2698 * the value/address loaded, and seeing if these are the same 2699 * when the store is performed. This is not actually the architecturally 2700 * mandated semantics, but it works for typical guest code sequences 2701 * and avoids having to monitor regular stores. 2702 * 2703 * The store exclusive uses the atomic cmpxchg primitives to avoid 2704 * races in multi-threaded linux-user and when MTTCG softmmu is 2705 * enabled. 2706 */ 2707 static void gen_load_exclusive(DisasContext *s, int rt, int rt2, int rn, 2708 int size, bool is_pair) 2709 { 2710 int idx = get_mem_index(s); 2711 TCGv_i64 dirty_addr, clean_addr; 2712 MemOp memop = check_atomic_align(s, rn, size + is_pair); 2713 2714 s->is_ldex = true; 2715 dirty_addr = cpu_reg_sp(s, rn); 2716 clean_addr = gen_mte_check1(s, dirty_addr, false, rn != 31, memop); 2717 2718 g_assert(size <= 3); 2719 if (is_pair) { 2720 g_assert(size >= 2); 2721 if (size == 2) { 2722 tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop); 2723 if (s->be_data == MO_LE) { 2724 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32); 2725 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32); 2726 } else { 2727 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32); 2728 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32); 2729 } 2730 } else { 2731 TCGv_i128 t16 = tcg_temp_new_i128(); 2732 2733 tcg_gen_qemu_ld_i128(t16, clean_addr, idx, memop); 2734 2735 if (s->be_data == MO_LE) { 2736 tcg_gen_extr_i128_i64(cpu_exclusive_val, 2737 cpu_exclusive_high, t16); 2738 } else { 2739 tcg_gen_extr_i128_i64(cpu_exclusive_high, 2740 cpu_exclusive_val, t16); 2741 } 2742 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); 2743 tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high); 2744 } 2745 } else { 2746 tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop); 2747 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); 2748 } 2749 tcg_gen_mov_i64(cpu_exclusive_addr, clean_addr); 2750 } 2751 2752 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, 2753 int rn, int size, int is_pair) 2754 { 2755 /* if (env->exclusive_addr == addr && env->exclusive_val == [addr] 2756 * && (!is_pair || env->exclusive_high == [addr + datasize])) { 2757 * [addr] = {Rt}; 2758 * if (is_pair) { 2759 * [addr + datasize] = {Rt2}; 2760 * } 2761 * {Rd} = 0; 2762 * } else { 2763 * {Rd} = 1; 2764 * } 2765 * env->exclusive_addr = -1; 2766 */ 2767 TCGLabel *fail_label = gen_new_label(); 2768 TCGLabel *done_label = gen_new_label(); 2769 TCGv_i64 tmp, clean_addr; 2770 MemOp memop; 2771 2772 /* 2773 * FIXME: We are out of spec here. We have recorded only the address 2774 * from load_exclusive, not the entire range, and we assume that the 2775 * size of the access on both sides match. The architecture allows the 2776 * store to be smaller than the load, so long as the stored bytes are 2777 * within the range recorded by the load. 2778 */ 2779 2780 /* See AArch64.ExclusiveMonitorsPass() and AArch64.IsExclusiveVA(). */ 2781 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); 2782 tcg_gen_brcond_i64(TCG_COND_NE, clean_addr, cpu_exclusive_addr, fail_label); 2783 2784 /* 2785 * The write, and any associated faults, only happen if the virtual 2786 * and physical addresses pass the exclusive monitor check. These 2787 * faults are exceedingly unlikely, because normally the guest uses 2788 * the exact same address register for the load_exclusive, and we 2789 * would have recognized these faults there. 2790 * 2791 * It is possible to trigger an alignment fault pre-LSE2, e.g. with an 2792 * unaligned 4-byte write within the range of an aligned 8-byte load. 2793 * With LSE2, the store would need to cross a 16-byte boundary when the 2794 * load did not, which would mean the store is outside the range 2795 * recorded for the monitor, which would have failed a corrected monitor 2796 * check above. For now, we assume no size change and retain the 2797 * MO_ALIGN to let tcg know what we checked in the load_exclusive. 2798 * 2799 * It is possible to trigger an MTE fault, by performing the load with 2800 * a virtual address with a valid tag and performing the store with the 2801 * same virtual address and a different invalid tag. 2802 */ 2803 memop = size + is_pair; 2804 if (memop == MO_128 || !dc_isar_feature(aa64_lse2, s)) { 2805 memop |= MO_ALIGN; 2806 } 2807 memop = finalize_memop(s, memop); 2808 gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); 2809 2810 tmp = tcg_temp_new_i64(); 2811 if (is_pair) { 2812 if (size == 2) { 2813 if (s->be_data == MO_LE) { 2814 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2)); 2815 } else { 2816 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt)); 2817 } 2818 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, 2819 cpu_exclusive_val, tmp, 2820 get_mem_index(s), memop); 2821 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); 2822 } else { 2823 TCGv_i128 t16 = tcg_temp_new_i128(); 2824 TCGv_i128 c16 = tcg_temp_new_i128(); 2825 TCGv_i64 a, b; 2826 2827 if (s->be_data == MO_LE) { 2828 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2)); 2829 tcg_gen_concat_i64_i128(c16, cpu_exclusive_val, 2830 cpu_exclusive_high); 2831 } else { 2832 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt)); 2833 tcg_gen_concat_i64_i128(c16, cpu_exclusive_high, 2834 cpu_exclusive_val); 2835 } 2836 2837 tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16, 2838 get_mem_index(s), memop); 2839 2840 a = tcg_temp_new_i64(); 2841 b = tcg_temp_new_i64(); 2842 if (s->be_data == MO_LE) { 2843 tcg_gen_extr_i128_i64(a, b, t16); 2844 } else { 2845 tcg_gen_extr_i128_i64(b, a, t16); 2846 } 2847 2848 tcg_gen_xor_i64(a, a, cpu_exclusive_val); 2849 tcg_gen_xor_i64(b, b, cpu_exclusive_high); 2850 tcg_gen_or_i64(tmp, a, b); 2851 2852 tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0); 2853 } 2854 } else { 2855 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val, 2856 cpu_reg(s, rt), get_mem_index(s), memop); 2857 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); 2858 } 2859 tcg_gen_mov_i64(cpu_reg(s, rd), tmp); 2860 tcg_gen_br(done_label); 2861 2862 gen_set_label(fail_label); 2863 tcg_gen_movi_i64(cpu_reg(s, rd), 1); 2864 gen_set_label(done_label); 2865 tcg_gen_movi_i64(cpu_exclusive_addr, -1); 2866 } 2867 2868 static void gen_compare_and_swap(DisasContext *s, int rs, int rt, 2869 int rn, int size) 2870 { 2871 TCGv_i64 tcg_rs = cpu_reg(s, rs); 2872 TCGv_i64 tcg_rt = cpu_reg(s, rt); 2873 int memidx = get_mem_index(s); 2874 TCGv_i64 clean_addr; 2875 MemOp memop; 2876 2877 if (rn == 31) { 2878 gen_check_sp_alignment(s); 2879 } 2880 memop = check_atomic_align(s, rn, size); 2881 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); 2882 tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, 2883 memidx, memop); 2884 } 2885 2886 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, 2887 int rn, int size) 2888 { 2889 TCGv_i64 s1 = cpu_reg(s, rs); 2890 TCGv_i64 s2 = cpu_reg(s, rs + 1); 2891 TCGv_i64 t1 = cpu_reg(s, rt); 2892 TCGv_i64 t2 = cpu_reg(s, rt + 1); 2893 TCGv_i64 clean_addr; 2894 int memidx = get_mem_index(s); 2895 MemOp memop; 2896 2897 if (rn == 31) { 2898 gen_check_sp_alignment(s); 2899 } 2900 2901 /* This is a single atomic access, despite the "pair". */ 2902 memop = check_atomic_align(s, rn, size + 1); 2903 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); 2904 2905 if (size == 2) { 2906 TCGv_i64 cmp = tcg_temp_new_i64(); 2907 TCGv_i64 val = tcg_temp_new_i64(); 2908 2909 if (s->be_data == MO_LE) { 2910 tcg_gen_concat32_i64(val, t1, t2); 2911 tcg_gen_concat32_i64(cmp, s1, s2); 2912 } else { 2913 tcg_gen_concat32_i64(val, t2, t1); 2914 tcg_gen_concat32_i64(cmp, s2, s1); 2915 } 2916 2917 tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx, memop); 2918 2919 if (s->be_data == MO_LE) { 2920 tcg_gen_extr32_i64(s1, s2, cmp); 2921 } else { 2922 tcg_gen_extr32_i64(s2, s1, cmp); 2923 } 2924 } else { 2925 TCGv_i128 cmp = tcg_temp_new_i128(); 2926 TCGv_i128 val = tcg_temp_new_i128(); 2927 2928 if (s->be_data == MO_LE) { 2929 tcg_gen_concat_i64_i128(val, t1, t2); 2930 tcg_gen_concat_i64_i128(cmp, s1, s2); 2931 } else { 2932 tcg_gen_concat_i64_i128(val, t2, t1); 2933 tcg_gen_concat_i64_i128(cmp, s2, s1); 2934 } 2935 2936 tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx, memop); 2937 2938 if (s->be_data == MO_LE) { 2939 tcg_gen_extr_i128_i64(s1, s2, cmp); 2940 } else { 2941 tcg_gen_extr_i128_i64(s2, s1, cmp); 2942 } 2943 } 2944 } 2945 2946 /* 2947 * Compute the ISS.SF bit for syndrome information if an exception 2948 * is taken on a load or store. This indicates whether the instruction 2949 * is accessing a 32-bit or 64-bit register. This logic is derived 2950 * from the ARMv8 specs for LDR (Shared decode for all encodings). 2951 */ 2952 static bool ldst_iss_sf(int size, bool sign, bool ext) 2953 { 2954 2955 if (sign) { 2956 /* 2957 * Signed loads are 64 bit results if we are not going to 2958 * do a zero-extend from 32 to 64 after the load. 2959 * (For a store, sign and ext are always false.) 2960 */ 2961 return !ext; 2962 } else { 2963 /* Unsigned loads/stores work at the specified size */ 2964 return size == MO_64; 2965 } 2966 } 2967 2968 static bool trans_STXR(DisasContext *s, arg_stxr *a) 2969 { 2970 if (a->rn == 31) { 2971 gen_check_sp_alignment(s); 2972 } 2973 if (a->lasr) { 2974 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 2975 } 2976 gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, false); 2977 return true; 2978 } 2979 2980 static bool trans_LDXR(DisasContext *s, arg_stxr *a) 2981 { 2982 if (a->rn == 31) { 2983 gen_check_sp_alignment(s); 2984 } 2985 gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, false); 2986 if (a->lasr) { 2987 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 2988 } 2989 return true; 2990 } 2991 2992 static bool trans_STLR(DisasContext *s, arg_stlr *a) 2993 { 2994 TCGv_i64 clean_addr; 2995 MemOp memop; 2996 bool iss_sf = ldst_iss_sf(a->sz, false, false); 2997 2998 /* 2999 * StoreLORelease is the same as Store-Release for QEMU, but 3000 * needs the feature-test. 3001 */ 3002 if (!a->lasr && !dc_isar_feature(aa64_lor, s)) { 3003 return false; 3004 } 3005 /* Generate ISS for non-exclusive accesses including LASR. */ 3006 if (a->rn == 31) { 3007 gen_check_sp_alignment(s); 3008 } 3009 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 3010 memop = check_ordered_align(s, a->rn, 0, true, a->sz); 3011 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), 3012 true, a->rn != 31, memop); 3013 do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, memop, true, a->rt, 3014 iss_sf, a->lasr); 3015 return true; 3016 } 3017 3018 static bool trans_LDAR(DisasContext *s, arg_stlr *a) 3019 { 3020 TCGv_i64 clean_addr; 3021 MemOp memop; 3022 bool iss_sf = ldst_iss_sf(a->sz, false, false); 3023 3024 /* LoadLOAcquire is the same as Load-Acquire for QEMU. */ 3025 if (!a->lasr && !dc_isar_feature(aa64_lor, s)) { 3026 return false; 3027 } 3028 /* Generate ISS for non-exclusive accesses including LASR. */ 3029 if (a->rn == 31) { 3030 gen_check_sp_alignment(s); 3031 } 3032 memop = check_ordered_align(s, a->rn, 0, false, a->sz); 3033 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), 3034 false, a->rn != 31, memop); 3035 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, memop, false, true, 3036 a->rt, iss_sf, a->lasr); 3037 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3038 return true; 3039 } 3040 3041 static bool trans_STXP(DisasContext *s, arg_stxr *a) 3042 { 3043 if (a->rn == 31) { 3044 gen_check_sp_alignment(s); 3045 } 3046 if (a->lasr) { 3047 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 3048 } 3049 gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, true); 3050 return true; 3051 } 3052 3053 static bool trans_LDXP(DisasContext *s, arg_stxr *a) 3054 { 3055 if (a->rn == 31) { 3056 gen_check_sp_alignment(s); 3057 } 3058 gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, true); 3059 if (a->lasr) { 3060 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3061 } 3062 return true; 3063 } 3064 3065 static bool trans_CASP(DisasContext *s, arg_CASP *a) 3066 { 3067 if (!dc_isar_feature(aa64_atomics, s)) { 3068 return false; 3069 } 3070 if (((a->rt | a->rs) & 1) != 0) { 3071 return false; 3072 } 3073 3074 gen_compare_and_swap_pair(s, a->rs, a->rt, a->rn, a->sz); 3075 return true; 3076 } 3077 3078 static bool trans_CAS(DisasContext *s, arg_CAS *a) 3079 { 3080 if (!dc_isar_feature(aa64_atomics, s)) { 3081 return false; 3082 } 3083 gen_compare_and_swap(s, a->rs, a->rt, a->rn, a->sz); 3084 return true; 3085 } 3086 3087 static bool trans_LD_lit(DisasContext *s, arg_ldlit *a) 3088 { 3089 bool iss_sf = ldst_iss_sf(a->sz, a->sign, false); 3090 TCGv_i64 tcg_rt = cpu_reg(s, a->rt); 3091 TCGv_i64 clean_addr = tcg_temp_new_i64(); 3092 MemOp memop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3093 3094 gen_pc_plus_diff(s, clean_addr, a->imm); 3095 do_gpr_ld(s, tcg_rt, clean_addr, memop, 3096 false, true, a->rt, iss_sf, false); 3097 return true; 3098 } 3099 3100 static bool trans_LD_lit_v(DisasContext *s, arg_ldlit *a) 3101 { 3102 /* Load register (literal), vector version */ 3103 TCGv_i64 clean_addr; 3104 MemOp memop; 3105 3106 if (!fp_access_check(s)) { 3107 return true; 3108 } 3109 memop = finalize_memop_asimd(s, a->sz); 3110 clean_addr = tcg_temp_new_i64(); 3111 gen_pc_plus_diff(s, clean_addr, a->imm); 3112 do_fp_ld(s, a->rt, clean_addr, memop); 3113 return true; 3114 } 3115 3116 static void op_addr_ldstpair_pre(DisasContext *s, arg_ldstpair *a, 3117 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, 3118 uint64_t offset, bool is_store, MemOp mop) 3119 { 3120 if (a->rn == 31) { 3121 gen_check_sp_alignment(s); 3122 } 3123 3124 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3125 if (!a->p) { 3126 tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset); 3127 } 3128 3129 *clean_addr = gen_mte_checkN(s, *dirty_addr, is_store, 3130 (a->w || a->rn != 31), 2 << a->sz, mop); 3131 } 3132 3133 static void op_addr_ldstpair_post(DisasContext *s, arg_ldstpair *a, 3134 TCGv_i64 dirty_addr, uint64_t offset) 3135 { 3136 if (a->w) { 3137 if (a->p) { 3138 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3139 } 3140 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); 3141 } 3142 } 3143 3144 static bool trans_STP(DisasContext *s, arg_ldstpair *a) 3145 { 3146 uint64_t offset = a->imm << a->sz; 3147 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; 3148 MemOp mop = finalize_memop(s, a->sz); 3149 3150 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop); 3151 tcg_rt = cpu_reg(s, a->rt); 3152 tcg_rt2 = cpu_reg(s, a->rt2); 3153 /* 3154 * We built mop above for the single logical access -- rebuild it 3155 * now for the paired operation. 3156 * 3157 * With LSE2, non-sign-extending pairs are treated atomically if 3158 * aligned, and if unaligned one of the pair will be completely 3159 * within a 16-byte block and that element will be atomic. 3160 * Otherwise each element is separately atomic. 3161 * In all cases, issue one operation with the correct atomicity. 3162 */ 3163 mop = a->sz + 1; 3164 if (s->align_mem) { 3165 mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8); 3166 } 3167 mop = finalize_memop_pair(s, mop); 3168 if (a->sz == 2) { 3169 TCGv_i64 tmp = tcg_temp_new_i64(); 3170 3171 if (s->be_data == MO_LE) { 3172 tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2); 3173 } else { 3174 tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt); 3175 } 3176 tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop); 3177 } else { 3178 TCGv_i128 tmp = tcg_temp_new_i128(); 3179 3180 if (s->be_data == MO_LE) { 3181 tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2); 3182 } else { 3183 tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt); 3184 } 3185 tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop); 3186 } 3187 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3188 return true; 3189 } 3190 3191 static bool trans_LDP(DisasContext *s, arg_ldstpair *a) 3192 { 3193 uint64_t offset = a->imm << a->sz; 3194 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; 3195 MemOp mop = finalize_memop(s, a->sz); 3196 3197 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop); 3198 tcg_rt = cpu_reg(s, a->rt); 3199 tcg_rt2 = cpu_reg(s, a->rt2); 3200 3201 /* 3202 * We built mop above for the single logical access -- rebuild it 3203 * now for the paired operation. 3204 * 3205 * With LSE2, non-sign-extending pairs are treated atomically if 3206 * aligned, and if unaligned one of the pair will be completely 3207 * within a 16-byte block and that element will be atomic. 3208 * Otherwise each element is separately atomic. 3209 * In all cases, issue one operation with the correct atomicity. 3210 * 3211 * This treats sign-extending loads like zero-extending loads, 3212 * since that reuses the most code below. 3213 */ 3214 mop = a->sz + 1; 3215 if (s->align_mem) { 3216 mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8); 3217 } 3218 mop = finalize_memop_pair(s, mop); 3219 if (a->sz == 2) { 3220 int o2 = s->be_data == MO_LE ? 32 : 0; 3221 int o1 = o2 ^ 32; 3222 3223 tcg_gen_qemu_ld_i64(tcg_rt, clean_addr, get_mem_index(s), mop); 3224 if (a->sign) { 3225 tcg_gen_sextract_i64(tcg_rt2, tcg_rt, o2, 32); 3226 tcg_gen_sextract_i64(tcg_rt, tcg_rt, o1, 32); 3227 } else { 3228 tcg_gen_extract_i64(tcg_rt2, tcg_rt, o2, 32); 3229 tcg_gen_extract_i64(tcg_rt, tcg_rt, o1, 32); 3230 } 3231 } else { 3232 TCGv_i128 tmp = tcg_temp_new_i128(); 3233 3234 tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop); 3235 if (s->be_data == MO_LE) { 3236 tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp); 3237 } else { 3238 tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp); 3239 } 3240 } 3241 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3242 return true; 3243 } 3244 3245 static bool trans_STP_v(DisasContext *s, arg_ldstpair *a) 3246 { 3247 uint64_t offset = a->imm << a->sz; 3248 TCGv_i64 clean_addr, dirty_addr; 3249 MemOp mop; 3250 3251 if (!fp_access_check(s)) { 3252 return true; 3253 } 3254 3255 /* LSE2 does not merge FP pairs; leave these as separate operations. */ 3256 mop = finalize_memop_asimd(s, a->sz); 3257 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop); 3258 do_fp_st(s, a->rt, clean_addr, mop); 3259 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz); 3260 do_fp_st(s, a->rt2, clean_addr, mop); 3261 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3262 return true; 3263 } 3264 3265 static bool trans_LDP_v(DisasContext *s, arg_ldstpair *a) 3266 { 3267 uint64_t offset = a->imm << a->sz; 3268 TCGv_i64 clean_addr, dirty_addr; 3269 MemOp mop; 3270 3271 if (!fp_access_check(s)) { 3272 return true; 3273 } 3274 3275 /* LSE2 does not merge FP pairs; leave these as separate operations. */ 3276 mop = finalize_memop_asimd(s, a->sz); 3277 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop); 3278 do_fp_ld(s, a->rt, clean_addr, mop); 3279 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz); 3280 do_fp_ld(s, a->rt2, clean_addr, mop); 3281 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3282 return true; 3283 } 3284 3285 static bool trans_STGP(DisasContext *s, arg_ldstpair *a) 3286 { 3287 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; 3288 uint64_t offset = a->imm << LOG2_TAG_GRANULE; 3289 MemOp mop; 3290 TCGv_i128 tmp; 3291 3292 /* STGP only comes in one size. */ 3293 tcg_debug_assert(a->sz == MO_64); 3294 3295 if (!dc_isar_feature(aa64_mte_insn_reg, s)) { 3296 return false; 3297 } 3298 3299 if (a->rn == 31) { 3300 gen_check_sp_alignment(s); 3301 } 3302 3303 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3304 if (!a->p) { 3305 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3306 } 3307 3308 clean_addr = clean_data_tbi(s, dirty_addr); 3309 tcg_rt = cpu_reg(s, a->rt); 3310 tcg_rt2 = cpu_reg(s, a->rt2); 3311 3312 /* 3313 * STGP is defined as two 8-byte memory operations, aligned to TAG_GRANULE, 3314 * and one tag operation. We implement it as one single aligned 16-byte 3315 * memory operation for convenience. Note that the alignment ensures 3316 * MO_ATOM_IFALIGN_PAIR produces 8-byte atomicity for the memory store. 3317 */ 3318 mop = finalize_memop_atom(s, MO_128 | MO_ALIGN, MO_ATOM_IFALIGN_PAIR); 3319 3320 tmp = tcg_temp_new_i128(); 3321 if (s->be_data == MO_LE) { 3322 tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2); 3323 } else { 3324 tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt); 3325 } 3326 tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop); 3327 3328 /* Perform the tag store, if tag access enabled. */ 3329 if (s->ata[0]) { 3330 if (tb_cflags(s->base.tb) & CF_PARALLEL) { 3331 gen_helper_stg_parallel(tcg_env, dirty_addr, dirty_addr); 3332 } else { 3333 gen_helper_stg(tcg_env, dirty_addr, dirty_addr); 3334 } 3335 } 3336 3337 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3338 return true; 3339 } 3340 3341 static void op_addr_ldst_imm_pre(DisasContext *s, arg_ldst_imm *a, 3342 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, 3343 uint64_t offset, bool is_store, MemOp mop) 3344 { 3345 int memidx; 3346 3347 if (a->rn == 31) { 3348 gen_check_sp_alignment(s); 3349 } 3350 3351 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3352 if (!a->p) { 3353 tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset); 3354 } 3355 memidx = get_a64_user_mem_index(s, a->unpriv); 3356 *clean_addr = gen_mte_check1_mmuidx(s, *dirty_addr, is_store, 3357 a->w || a->rn != 31, 3358 mop, a->unpriv, memidx); 3359 } 3360 3361 static void op_addr_ldst_imm_post(DisasContext *s, arg_ldst_imm *a, 3362 TCGv_i64 dirty_addr, uint64_t offset) 3363 { 3364 if (a->w) { 3365 if (a->p) { 3366 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3367 } 3368 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); 3369 } 3370 } 3371 3372 static bool trans_STR_i(DisasContext *s, arg_ldst_imm *a) 3373 { 3374 bool iss_sf, iss_valid = !a->w; 3375 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3376 int memidx = get_a64_user_mem_index(s, a->unpriv); 3377 MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3378 3379 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop); 3380 3381 tcg_rt = cpu_reg(s, a->rt); 3382 iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3383 3384 do_gpr_st_memidx(s, tcg_rt, clean_addr, mop, memidx, 3385 iss_valid, a->rt, iss_sf, false); 3386 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3387 return true; 3388 } 3389 3390 static bool trans_LDR_i(DisasContext *s, arg_ldst_imm *a) 3391 { 3392 bool iss_sf, iss_valid = !a->w; 3393 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3394 int memidx = get_a64_user_mem_index(s, a->unpriv); 3395 MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3396 3397 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop); 3398 3399 tcg_rt = cpu_reg(s, a->rt); 3400 iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3401 3402 do_gpr_ld_memidx(s, tcg_rt, clean_addr, mop, 3403 a->ext, memidx, iss_valid, a->rt, iss_sf, false); 3404 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3405 return true; 3406 } 3407 3408 static bool trans_STR_v_i(DisasContext *s, arg_ldst_imm *a) 3409 { 3410 TCGv_i64 clean_addr, dirty_addr; 3411 MemOp mop; 3412 3413 if (!fp_access_check(s)) { 3414 return true; 3415 } 3416 mop = finalize_memop_asimd(s, a->sz); 3417 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop); 3418 do_fp_st(s, a->rt, clean_addr, mop); 3419 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3420 return true; 3421 } 3422 3423 static bool trans_LDR_v_i(DisasContext *s, arg_ldst_imm *a) 3424 { 3425 TCGv_i64 clean_addr, dirty_addr; 3426 MemOp mop; 3427 3428 if (!fp_access_check(s)) { 3429 return true; 3430 } 3431 mop = finalize_memop_asimd(s, a->sz); 3432 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop); 3433 do_fp_ld(s, a->rt, clean_addr, mop); 3434 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3435 return true; 3436 } 3437 3438 static void op_addr_ldst_pre(DisasContext *s, arg_ldst *a, 3439 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, 3440 bool is_store, MemOp memop) 3441 { 3442 TCGv_i64 tcg_rm; 3443 3444 if (a->rn == 31) { 3445 gen_check_sp_alignment(s); 3446 } 3447 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3448 3449 tcg_rm = read_cpu_reg(s, a->rm, 1); 3450 ext_and_shift_reg(tcg_rm, tcg_rm, a->opt, a->s ? a->sz : 0); 3451 3452 tcg_gen_add_i64(*dirty_addr, *dirty_addr, tcg_rm); 3453 *clean_addr = gen_mte_check1(s, *dirty_addr, is_store, true, memop); 3454 } 3455 3456 static bool trans_LDR(DisasContext *s, arg_ldst *a) 3457 { 3458 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3459 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3460 MemOp memop; 3461 3462 if (extract32(a->opt, 1, 1) == 0) { 3463 return false; 3464 } 3465 3466 memop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3467 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop); 3468 tcg_rt = cpu_reg(s, a->rt); 3469 do_gpr_ld(s, tcg_rt, clean_addr, memop, 3470 a->ext, true, a->rt, iss_sf, false); 3471 return true; 3472 } 3473 3474 static bool trans_STR(DisasContext *s, arg_ldst *a) 3475 { 3476 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3477 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3478 MemOp memop; 3479 3480 if (extract32(a->opt, 1, 1) == 0) { 3481 return false; 3482 } 3483 3484 memop = finalize_memop(s, a->sz); 3485 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop); 3486 tcg_rt = cpu_reg(s, a->rt); 3487 do_gpr_st(s, tcg_rt, clean_addr, memop, true, a->rt, iss_sf, false); 3488 return true; 3489 } 3490 3491 static bool trans_LDR_v(DisasContext *s, arg_ldst *a) 3492 { 3493 TCGv_i64 clean_addr, dirty_addr; 3494 MemOp memop; 3495 3496 if (extract32(a->opt, 1, 1) == 0) { 3497 return false; 3498 } 3499 3500 if (!fp_access_check(s)) { 3501 return true; 3502 } 3503 3504 memop = finalize_memop_asimd(s, a->sz); 3505 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop); 3506 do_fp_ld(s, a->rt, clean_addr, memop); 3507 return true; 3508 } 3509 3510 static bool trans_STR_v(DisasContext *s, arg_ldst *a) 3511 { 3512 TCGv_i64 clean_addr, dirty_addr; 3513 MemOp memop; 3514 3515 if (extract32(a->opt, 1, 1) == 0) { 3516 return false; 3517 } 3518 3519 if (!fp_access_check(s)) { 3520 return true; 3521 } 3522 3523 memop = finalize_memop_asimd(s, a->sz); 3524 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop); 3525 do_fp_st(s, a->rt, clean_addr, memop); 3526 return true; 3527 } 3528 3529 3530 static bool do_atomic_ld(DisasContext *s, arg_atomic *a, AtomicThreeOpFn *fn, 3531 int sign, bool invert) 3532 { 3533 MemOp mop = a->sz | sign; 3534 TCGv_i64 clean_addr, tcg_rs, tcg_rt; 3535 3536 if (a->rn == 31) { 3537 gen_check_sp_alignment(s); 3538 } 3539 mop = check_atomic_align(s, a->rn, mop); 3540 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false, 3541 a->rn != 31, mop); 3542 tcg_rs = read_cpu_reg(s, a->rs, true); 3543 tcg_rt = cpu_reg(s, a->rt); 3544 if (invert) { 3545 tcg_gen_not_i64(tcg_rs, tcg_rs); 3546 } 3547 /* 3548 * The tcg atomic primitives are all full barriers. Therefore we 3549 * can ignore the Acquire and Release bits of this instruction. 3550 */ 3551 fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop); 3552 3553 if (mop & MO_SIGN) { 3554 switch (a->sz) { 3555 case MO_8: 3556 tcg_gen_ext8u_i64(tcg_rt, tcg_rt); 3557 break; 3558 case MO_16: 3559 tcg_gen_ext16u_i64(tcg_rt, tcg_rt); 3560 break; 3561 case MO_32: 3562 tcg_gen_ext32u_i64(tcg_rt, tcg_rt); 3563 break; 3564 case MO_64: 3565 break; 3566 default: 3567 g_assert_not_reached(); 3568 } 3569 } 3570 return true; 3571 } 3572 3573 TRANS_FEAT(LDADD, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_add_i64, 0, false) 3574 TRANS_FEAT(LDCLR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_and_i64, 0, true) 3575 TRANS_FEAT(LDEOR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_xor_i64, 0, false) 3576 TRANS_FEAT(LDSET, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_or_i64, 0, false) 3577 TRANS_FEAT(LDSMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smax_i64, MO_SIGN, false) 3578 TRANS_FEAT(LDSMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smin_i64, MO_SIGN, false) 3579 TRANS_FEAT(LDUMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umax_i64, 0, false) 3580 TRANS_FEAT(LDUMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false) 3581 TRANS_FEAT(SWP, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false) 3582 3583 static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a) 3584 { 3585 bool iss_sf = ldst_iss_sf(a->sz, false, false); 3586 TCGv_i64 clean_addr; 3587 MemOp mop; 3588 3589 if (!dc_isar_feature(aa64_atomics, s) || 3590 !dc_isar_feature(aa64_rcpc_8_3, s)) { 3591 return false; 3592 } 3593 if (a->rn == 31) { 3594 gen_check_sp_alignment(s); 3595 } 3596 mop = check_ordered_align(s, a->rn, 0, false, a->sz); 3597 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false, 3598 a->rn != 31, mop); 3599 /* 3600 * LDAPR* are a special case because they are a simple load, not a 3601 * fetch-and-do-something op. 3602 * The architectural consistency requirements here are weaker than 3603 * full load-acquire (we only need "load-acquire processor consistent"), 3604 * but we choose to implement them as full LDAQ. 3605 */ 3606 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, false, 3607 true, a->rt, iss_sf, true); 3608 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3609 return true; 3610 } 3611 3612 static bool trans_LDRA(DisasContext *s, arg_LDRA *a) 3613 { 3614 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3615 MemOp memop; 3616 3617 /* Load with pointer authentication */ 3618 if (!dc_isar_feature(aa64_pauth, s)) { 3619 return false; 3620 } 3621 3622 if (a->rn == 31) { 3623 gen_check_sp_alignment(s); 3624 } 3625 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3626 3627 if (s->pauth_active) { 3628 if (!a->m) { 3629 gen_helper_autda_combined(dirty_addr, tcg_env, dirty_addr, 3630 tcg_constant_i64(0)); 3631 } else { 3632 gen_helper_autdb_combined(dirty_addr, tcg_env, dirty_addr, 3633 tcg_constant_i64(0)); 3634 } 3635 } 3636 3637 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); 3638 3639 memop = finalize_memop(s, MO_64); 3640 3641 /* Note that "clean" and "dirty" here refer to TBI not PAC. */ 3642 clean_addr = gen_mte_check1(s, dirty_addr, false, 3643 a->w || a->rn != 31, memop); 3644 3645 tcg_rt = cpu_reg(s, a->rt); 3646 do_gpr_ld(s, tcg_rt, clean_addr, memop, 3647 /* extend */ false, /* iss_valid */ !a->w, 3648 /* iss_srt */ a->rt, /* iss_sf */ true, /* iss_ar */ false); 3649 3650 if (a->w) { 3651 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); 3652 } 3653 return true; 3654 } 3655 3656 static bool trans_LDAPR_i(DisasContext *s, arg_ldapr_stlr_i *a) 3657 { 3658 TCGv_i64 clean_addr, dirty_addr; 3659 MemOp mop = a->sz | (a->sign ? MO_SIGN : 0); 3660 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3661 3662 if (!dc_isar_feature(aa64_rcpc_8_4, s)) { 3663 return false; 3664 } 3665 3666 if (a->rn == 31) { 3667 gen_check_sp_alignment(s); 3668 } 3669 3670 mop = check_ordered_align(s, a->rn, a->imm, false, mop); 3671 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3672 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); 3673 clean_addr = clean_data_tbi(s, dirty_addr); 3674 3675 /* 3676 * Load-AcquirePC semantics; we implement as the slightly more 3677 * restrictive Load-Acquire. 3678 */ 3679 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, a->ext, true, 3680 a->rt, iss_sf, true); 3681 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3682 return true; 3683 } 3684 3685 static bool trans_STLR_i(DisasContext *s, arg_ldapr_stlr_i *a) 3686 { 3687 TCGv_i64 clean_addr, dirty_addr; 3688 MemOp mop = a->sz; 3689 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3690 3691 if (!dc_isar_feature(aa64_rcpc_8_4, s)) { 3692 return false; 3693 } 3694 3695 /* TODO: ARMv8.4-LSE SCTLR.nAA */ 3696 3697 if (a->rn == 31) { 3698 gen_check_sp_alignment(s); 3699 } 3700 3701 mop = check_ordered_align(s, a->rn, a->imm, true, mop); 3702 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3703 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); 3704 clean_addr = clean_data_tbi(s, dirty_addr); 3705 3706 /* Store-Release semantics */ 3707 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 3708 do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, mop, true, a->rt, iss_sf, true); 3709 return true; 3710 } 3711 3712 static bool trans_LD_mult(DisasContext *s, arg_ldst_mult *a) 3713 { 3714 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3715 MemOp endian, align, mop; 3716 3717 int total; /* total bytes */ 3718 int elements; /* elements per vector */ 3719 int r; 3720 int size = a->sz; 3721 3722 if (!a->p && a->rm != 0) { 3723 /* For non-postindexed accesses the Rm field must be 0 */ 3724 return false; 3725 } 3726 if (size == 3 && !a->q && a->selem != 1) { 3727 return false; 3728 } 3729 if (!fp_access_check(s)) { 3730 return true; 3731 } 3732 3733 if (a->rn == 31) { 3734 gen_check_sp_alignment(s); 3735 } 3736 3737 /* For our purposes, bytes are always little-endian. */ 3738 endian = s->be_data; 3739 if (size == 0) { 3740 endian = MO_LE; 3741 } 3742 3743 total = a->rpt * a->selem * (a->q ? 16 : 8); 3744 tcg_rn = cpu_reg_sp(s, a->rn); 3745 3746 /* 3747 * Issue the MTE check vs the logical repeat count, before we 3748 * promote consecutive little-endian elements below. 3749 */ 3750 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, total, 3751 finalize_memop_asimd(s, size)); 3752 3753 /* 3754 * Consecutive little-endian elements from a single register 3755 * can be promoted to a larger little-endian operation. 3756 */ 3757 align = MO_ALIGN; 3758 if (a->selem == 1 && endian == MO_LE) { 3759 align = pow2_align(size); 3760 size = 3; 3761 } 3762 if (!s->align_mem) { 3763 align = 0; 3764 } 3765 mop = endian | size | align; 3766 3767 elements = (a->q ? 16 : 8) >> size; 3768 tcg_ebytes = tcg_constant_i64(1 << size); 3769 for (r = 0; r < a->rpt; r++) { 3770 int e; 3771 for (e = 0; e < elements; e++) { 3772 int xs; 3773 for (xs = 0; xs < a->selem; xs++) { 3774 int tt = (a->rt + r + xs) % 32; 3775 do_vec_ld(s, tt, e, clean_addr, mop); 3776 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3777 } 3778 } 3779 } 3780 3781 /* 3782 * For non-quad operations, setting a slice of the low 64 bits of 3783 * the register clears the high 64 bits (in the ARM ARM pseudocode 3784 * this is implicit in the fact that 'rval' is a 64 bit wide 3785 * variable). For quad operations, we might still need to zero 3786 * the high bits of SVE. 3787 */ 3788 for (r = 0; r < a->rpt * a->selem; r++) { 3789 int tt = (a->rt + r) % 32; 3790 clear_vec_high(s, a->q, tt); 3791 } 3792 3793 if (a->p) { 3794 if (a->rm == 31) { 3795 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3796 } else { 3797 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3798 } 3799 } 3800 return true; 3801 } 3802 3803 static bool trans_ST_mult(DisasContext *s, arg_ldst_mult *a) 3804 { 3805 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3806 MemOp endian, align, mop; 3807 3808 int total; /* total bytes */ 3809 int elements; /* elements per vector */ 3810 int r; 3811 int size = a->sz; 3812 3813 if (!a->p && a->rm != 0) { 3814 /* For non-postindexed accesses the Rm field must be 0 */ 3815 return false; 3816 } 3817 if (size == 3 && !a->q && a->selem != 1) { 3818 return false; 3819 } 3820 if (!fp_access_check(s)) { 3821 return true; 3822 } 3823 3824 if (a->rn == 31) { 3825 gen_check_sp_alignment(s); 3826 } 3827 3828 /* For our purposes, bytes are always little-endian. */ 3829 endian = s->be_data; 3830 if (size == 0) { 3831 endian = MO_LE; 3832 } 3833 3834 total = a->rpt * a->selem * (a->q ? 16 : 8); 3835 tcg_rn = cpu_reg_sp(s, a->rn); 3836 3837 /* 3838 * Issue the MTE check vs the logical repeat count, before we 3839 * promote consecutive little-endian elements below. 3840 */ 3841 clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, total, 3842 finalize_memop_asimd(s, size)); 3843 3844 /* 3845 * Consecutive little-endian elements from a single register 3846 * can be promoted to a larger little-endian operation. 3847 */ 3848 align = MO_ALIGN; 3849 if (a->selem == 1 && endian == MO_LE) { 3850 align = pow2_align(size); 3851 size = 3; 3852 } 3853 if (!s->align_mem) { 3854 align = 0; 3855 } 3856 mop = endian | size | align; 3857 3858 elements = (a->q ? 16 : 8) >> size; 3859 tcg_ebytes = tcg_constant_i64(1 << size); 3860 for (r = 0; r < a->rpt; r++) { 3861 int e; 3862 for (e = 0; e < elements; e++) { 3863 int xs; 3864 for (xs = 0; xs < a->selem; xs++) { 3865 int tt = (a->rt + r + xs) % 32; 3866 do_vec_st(s, tt, e, clean_addr, mop); 3867 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3868 } 3869 } 3870 } 3871 3872 if (a->p) { 3873 if (a->rm == 31) { 3874 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3875 } else { 3876 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3877 } 3878 } 3879 return true; 3880 } 3881 3882 static bool trans_ST_single(DisasContext *s, arg_ldst_single *a) 3883 { 3884 int xs, total, rt; 3885 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3886 MemOp mop; 3887 3888 if (!a->p && a->rm != 0) { 3889 return false; 3890 } 3891 if (!fp_access_check(s)) { 3892 return true; 3893 } 3894 3895 if (a->rn == 31) { 3896 gen_check_sp_alignment(s); 3897 } 3898 3899 total = a->selem << a->scale; 3900 tcg_rn = cpu_reg_sp(s, a->rn); 3901 3902 mop = finalize_memop_asimd(s, a->scale); 3903 clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, 3904 total, mop); 3905 3906 tcg_ebytes = tcg_constant_i64(1 << a->scale); 3907 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { 3908 do_vec_st(s, rt, a->index, clean_addr, mop); 3909 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3910 } 3911 3912 if (a->p) { 3913 if (a->rm == 31) { 3914 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3915 } else { 3916 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3917 } 3918 } 3919 return true; 3920 } 3921 3922 static bool trans_LD_single(DisasContext *s, arg_ldst_single *a) 3923 { 3924 int xs, total, rt; 3925 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3926 MemOp mop; 3927 3928 if (!a->p && a->rm != 0) { 3929 return false; 3930 } 3931 if (!fp_access_check(s)) { 3932 return true; 3933 } 3934 3935 if (a->rn == 31) { 3936 gen_check_sp_alignment(s); 3937 } 3938 3939 total = a->selem << a->scale; 3940 tcg_rn = cpu_reg_sp(s, a->rn); 3941 3942 mop = finalize_memop_asimd(s, a->scale); 3943 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, 3944 total, mop); 3945 3946 tcg_ebytes = tcg_constant_i64(1 << a->scale); 3947 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { 3948 do_vec_ld(s, rt, a->index, clean_addr, mop); 3949 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3950 } 3951 3952 if (a->p) { 3953 if (a->rm == 31) { 3954 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3955 } else { 3956 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3957 } 3958 } 3959 return true; 3960 } 3961 3962 static bool trans_LD_single_repl(DisasContext *s, arg_LD_single_repl *a) 3963 { 3964 int xs, total, rt; 3965 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3966 MemOp mop; 3967 3968 if (!a->p && a->rm != 0) { 3969 return false; 3970 } 3971 if (!fp_access_check(s)) { 3972 return true; 3973 } 3974 3975 if (a->rn == 31) { 3976 gen_check_sp_alignment(s); 3977 } 3978 3979 total = a->selem << a->scale; 3980 tcg_rn = cpu_reg_sp(s, a->rn); 3981 3982 mop = finalize_memop_asimd(s, a->scale); 3983 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, 3984 total, mop); 3985 3986 tcg_ebytes = tcg_constant_i64(1 << a->scale); 3987 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { 3988 /* Load and replicate to all elements */ 3989 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 3990 3991 tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop); 3992 tcg_gen_gvec_dup_i64(a->scale, vec_full_reg_offset(s, rt), 3993 (a->q + 1) * 8, vec_full_reg_size(s), tcg_tmp); 3994 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3995 } 3996 3997 if (a->p) { 3998 if (a->rm == 31) { 3999 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 4000 } else { 4001 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 4002 } 4003 } 4004 return true; 4005 } 4006 4007 static bool trans_STZGM(DisasContext *s, arg_ldst_tag *a) 4008 { 4009 TCGv_i64 addr, clean_addr, tcg_rt; 4010 int size = 4 << s->dcz_blocksize; 4011 4012 if (!dc_isar_feature(aa64_mte, s)) { 4013 return false; 4014 } 4015 if (s->current_el == 0) { 4016 return false; 4017 } 4018 4019 if (a->rn == 31) { 4020 gen_check_sp_alignment(s); 4021 } 4022 4023 addr = read_cpu_reg_sp(s, a->rn, true); 4024 tcg_gen_addi_i64(addr, addr, a->imm); 4025 tcg_rt = cpu_reg(s, a->rt); 4026 4027 if (s->ata[0]) { 4028 gen_helper_stzgm_tags(tcg_env, addr, tcg_rt); 4029 } 4030 /* 4031 * The non-tags portion of STZGM is mostly like DC_ZVA, 4032 * except the alignment happens before the access. 4033 */ 4034 clean_addr = clean_data_tbi(s, addr); 4035 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 4036 gen_helper_dc_zva(tcg_env, clean_addr); 4037 return true; 4038 } 4039 4040 static bool trans_STGM(DisasContext *s, arg_ldst_tag *a) 4041 { 4042 TCGv_i64 addr, clean_addr, tcg_rt; 4043 4044 if (!dc_isar_feature(aa64_mte, s)) { 4045 return false; 4046 } 4047 if (s->current_el == 0) { 4048 return false; 4049 } 4050 4051 if (a->rn == 31) { 4052 gen_check_sp_alignment(s); 4053 } 4054 4055 addr = read_cpu_reg_sp(s, a->rn, true); 4056 tcg_gen_addi_i64(addr, addr, a->imm); 4057 tcg_rt = cpu_reg(s, a->rt); 4058 4059 if (s->ata[0]) { 4060 gen_helper_stgm(tcg_env, addr, tcg_rt); 4061 } else { 4062 MMUAccessType acc = MMU_DATA_STORE; 4063 int size = 4 << s->gm_blocksize; 4064 4065 clean_addr = clean_data_tbi(s, addr); 4066 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 4067 gen_probe_access(s, clean_addr, acc, size); 4068 } 4069 return true; 4070 } 4071 4072 static bool trans_LDGM(DisasContext *s, arg_ldst_tag *a) 4073 { 4074 TCGv_i64 addr, clean_addr, tcg_rt; 4075 4076 if (!dc_isar_feature(aa64_mte, s)) { 4077 return false; 4078 } 4079 if (s->current_el == 0) { 4080 return false; 4081 } 4082 4083 if (a->rn == 31) { 4084 gen_check_sp_alignment(s); 4085 } 4086 4087 addr = read_cpu_reg_sp(s, a->rn, true); 4088 tcg_gen_addi_i64(addr, addr, a->imm); 4089 tcg_rt = cpu_reg(s, a->rt); 4090 4091 if (s->ata[0]) { 4092 gen_helper_ldgm(tcg_rt, tcg_env, addr); 4093 } else { 4094 MMUAccessType acc = MMU_DATA_LOAD; 4095 int size = 4 << s->gm_blocksize; 4096 4097 clean_addr = clean_data_tbi(s, addr); 4098 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 4099 gen_probe_access(s, clean_addr, acc, size); 4100 /* The result tags are zeros. */ 4101 tcg_gen_movi_i64(tcg_rt, 0); 4102 } 4103 return true; 4104 } 4105 4106 static bool trans_LDG(DisasContext *s, arg_ldst_tag *a) 4107 { 4108 TCGv_i64 addr, clean_addr, tcg_rt; 4109 4110 if (!dc_isar_feature(aa64_mte_insn_reg, s)) { 4111 return false; 4112 } 4113 4114 if (a->rn == 31) { 4115 gen_check_sp_alignment(s); 4116 } 4117 4118 addr = read_cpu_reg_sp(s, a->rn, true); 4119 if (!a->p) { 4120 /* pre-index or signed offset */ 4121 tcg_gen_addi_i64(addr, addr, a->imm); 4122 } 4123 4124 tcg_gen_andi_i64(addr, addr, -TAG_GRANULE); 4125 tcg_rt = cpu_reg(s, a->rt); 4126 if (s->ata[0]) { 4127 gen_helper_ldg(tcg_rt, tcg_env, addr, tcg_rt); 4128 } else { 4129 /* 4130 * Tag access disabled: we must check for aborts on the load 4131 * load from [rn+offset], and then insert a 0 tag into rt. 4132 */ 4133 clean_addr = clean_data_tbi(s, addr); 4134 gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8); 4135 gen_address_with_allocation_tag0(tcg_rt, tcg_rt); 4136 } 4137 4138 if (a->w) { 4139 /* pre-index or post-index */ 4140 if (a->p) { 4141 /* post-index */ 4142 tcg_gen_addi_i64(addr, addr, a->imm); 4143 } 4144 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr); 4145 } 4146 return true; 4147 } 4148 4149 static bool do_STG(DisasContext *s, arg_ldst_tag *a, bool is_zero, bool is_pair) 4150 { 4151 TCGv_i64 addr, tcg_rt; 4152 4153 if (a->rn == 31) { 4154 gen_check_sp_alignment(s); 4155 } 4156 4157 addr = read_cpu_reg_sp(s, a->rn, true); 4158 if (!a->p) { 4159 /* pre-index or signed offset */ 4160 tcg_gen_addi_i64(addr, addr, a->imm); 4161 } 4162 tcg_rt = cpu_reg_sp(s, a->rt); 4163 if (!s->ata[0]) { 4164 /* 4165 * For STG and ST2G, we need to check alignment and probe memory. 4166 * TODO: For STZG and STZ2G, we could rely on the stores below, 4167 * at least for system mode; user-only won't enforce alignment. 4168 */ 4169 if (is_pair) { 4170 gen_helper_st2g_stub(tcg_env, addr); 4171 } else { 4172 gen_helper_stg_stub(tcg_env, addr); 4173 } 4174 } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { 4175 if (is_pair) { 4176 gen_helper_st2g_parallel(tcg_env, addr, tcg_rt); 4177 } else { 4178 gen_helper_stg_parallel(tcg_env, addr, tcg_rt); 4179 } 4180 } else { 4181 if (is_pair) { 4182 gen_helper_st2g(tcg_env, addr, tcg_rt); 4183 } else { 4184 gen_helper_stg(tcg_env, addr, tcg_rt); 4185 } 4186 } 4187 4188 if (is_zero) { 4189 TCGv_i64 clean_addr = clean_data_tbi(s, addr); 4190 TCGv_i64 zero64 = tcg_constant_i64(0); 4191 TCGv_i128 zero128 = tcg_temp_new_i128(); 4192 int mem_index = get_mem_index(s); 4193 MemOp mop = finalize_memop(s, MO_128 | MO_ALIGN); 4194 4195 tcg_gen_concat_i64_i128(zero128, zero64, zero64); 4196 4197 /* This is 1 or 2 atomic 16-byte operations. */ 4198 tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop); 4199 if (is_pair) { 4200 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4201 tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop); 4202 } 4203 } 4204 4205 if (a->w) { 4206 /* pre-index or post-index */ 4207 if (a->p) { 4208 /* post-index */ 4209 tcg_gen_addi_i64(addr, addr, a->imm); 4210 } 4211 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr); 4212 } 4213 return true; 4214 } 4215 4216 TRANS_FEAT(STG, aa64_mte_insn_reg, do_STG, a, false, false) 4217 TRANS_FEAT(STZG, aa64_mte_insn_reg, do_STG, a, true, false) 4218 TRANS_FEAT(ST2G, aa64_mte_insn_reg, do_STG, a, false, true) 4219 TRANS_FEAT(STZ2G, aa64_mte_insn_reg, do_STG, a, true, true) 4220 4221 typedef void SetFn(TCGv_env, TCGv_i32, TCGv_i32); 4222 4223 static bool do_SET(DisasContext *s, arg_set *a, bool is_epilogue, 4224 bool is_setg, SetFn fn) 4225 { 4226 int memidx; 4227 uint32_t syndrome, desc = 0; 4228 4229 if (is_setg && !dc_isar_feature(aa64_mte, s)) { 4230 return false; 4231 } 4232 4233 /* 4234 * UNPREDICTABLE cases: we choose to UNDEF, which allows 4235 * us to pull this check before the CheckMOPSEnabled() test 4236 * (which we do in the helper function) 4237 */ 4238 if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd || 4239 a->rd == 31 || a->rn == 31) { 4240 return false; 4241 } 4242 4243 memidx = get_a64_user_mem_index(s, a->unpriv); 4244 4245 /* 4246 * We pass option_a == true, matching our implementation; 4247 * we pass wrong_option == false: helper function may set that bit. 4248 */ 4249 syndrome = syn_mop(true, is_setg, (a->nontemp << 1) | a->unpriv, 4250 is_epilogue, false, true, a->rd, a->rs, a->rn); 4251 4252 if (is_setg ? s->ata[a->unpriv] : s->mte_active[a->unpriv]) { 4253 /* We may need to do MTE tag checking, so assemble the descriptor */ 4254 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 4255 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 4256 desc = FIELD_DP32(desc, MTEDESC, WRITE, true); 4257 /* SIZEM1 and ALIGN we leave 0 (byte write) */ 4258 } 4259 /* The helper function always needs the memidx even with MTE disabled */ 4260 desc = FIELD_DP32(desc, MTEDESC, MIDX, memidx); 4261 4262 /* 4263 * The helper needs the register numbers, but since they're in 4264 * the syndrome anyway, we let it extract them from there rather 4265 * than passing in an extra three integer arguments. 4266 */ 4267 fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(desc)); 4268 return true; 4269 } 4270 4271 TRANS_FEAT(SETP, aa64_mops, do_SET, a, false, false, gen_helper_setp) 4272 TRANS_FEAT(SETM, aa64_mops, do_SET, a, false, false, gen_helper_setm) 4273 TRANS_FEAT(SETE, aa64_mops, do_SET, a, true, false, gen_helper_sete) 4274 TRANS_FEAT(SETGP, aa64_mops, do_SET, a, false, true, gen_helper_setgp) 4275 TRANS_FEAT(SETGM, aa64_mops, do_SET, a, false, true, gen_helper_setgm) 4276 TRANS_FEAT(SETGE, aa64_mops, do_SET, a, true, true, gen_helper_setge) 4277 4278 typedef void CpyFn(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32); 4279 4280 static bool do_CPY(DisasContext *s, arg_cpy *a, bool is_epilogue, CpyFn fn) 4281 { 4282 int rmemidx, wmemidx; 4283 uint32_t syndrome, rdesc = 0, wdesc = 0; 4284 bool wunpriv = extract32(a->options, 0, 1); 4285 bool runpriv = extract32(a->options, 1, 1); 4286 4287 /* 4288 * UNPREDICTABLE cases: we choose to UNDEF, which allows 4289 * us to pull this check before the CheckMOPSEnabled() test 4290 * (which we do in the helper function) 4291 */ 4292 if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd || 4293 a->rd == 31 || a->rs == 31 || a->rn == 31) { 4294 return false; 4295 } 4296 4297 rmemidx = get_a64_user_mem_index(s, runpriv); 4298 wmemidx = get_a64_user_mem_index(s, wunpriv); 4299 4300 /* 4301 * We pass option_a == true, matching our implementation; 4302 * we pass wrong_option == false: helper function may set that bit. 4303 */ 4304 syndrome = syn_mop(false, false, a->options, is_epilogue, 4305 false, true, a->rd, a->rs, a->rn); 4306 4307 /* If we need to do MTE tag checking, assemble the descriptors */ 4308 if (s->mte_active[runpriv]) { 4309 rdesc = FIELD_DP32(rdesc, MTEDESC, TBI, s->tbid); 4310 rdesc = FIELD_DP32(rdesc, MTEDESC, TCMA, s->tcma); 4311 } 4312 if (s->mte_active[wunpriv]) { 4313 wdesc = FIELD_DP32(wdesc, MTEDESC, TBI, s->tbid); 4314 wdesc = FIELD_DP32(wdesc, MTEDESC, TCMA, s->tcma); 4315 wdesc = FIELD_DP32(wdesc, MTEDESC, WRITE, true); 4316 } 4317 /* The helper function needs these parts of the descriptor regardless */ 4318 rdesc = FIELD_DP32(rdesc, MTEDESC, MIDX, rmemidx); 4319 wdesc = FIELD_DP32(wdesc, MTEDESC, MIDX, wmemidx); 4320 4321 /* 4322 * The helper needs the register numbers, but since they're in 4323 * the syndrome anyway, we let it extract them from there rather 4324 * than passing in an extra three integer arguments. 4325 */ 4326 fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(wdesc), 4327 tcg_constant_i32(rdesc)); 4328 return true; 4329 } 4330 4331 TRANS_FEAT(CPYP, aa64_mops, do_CPY, a, false, gen_helper_cpyp) 4332 TRANS_FEAT(CPYM, aa64_mops, do_CPY, a, false, gen_helper_cpym) 4333 TRANS_FEAT(CPYE, aa64_mops, do_CPY, a, true, gen_helper_cpye) 4334 TRANS_FEAT(CPYFP, aa64_mops, do_CPY, a, false, gen_helper_cpyfp) 4335 TRANS_FEAT(CPYFM, aa64_mops, do_CPY, a, false, gen_helper_cpyfm) 4336 TRANS_FEAT(CPYFE, aa64_mops, do_CPY, a, true, gen_helper_cpyfe) 4337 4338 typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64); 4339 4340 static bool gen_rri(DisasContext *s, arg_rri_sf *a, 4341 bool rd_sp, bool rn_sp, ArithTwoOp *fn) 4342 { 4343 TCGv_i64 tcg_rn = rn_sp ? cpu_reg_sp(s, a->rn) : cpu_reg(s, a->rn); 4344 TCGv_i64 tcg_rd = rd_sp ? cpu_reg_sp(s, a->rd) : cpu_reg(s, a->rd); 4345 TCGv_i64 tcg_imm = tcg_constant_i64(a->imm); 4346 4347 fn(tcg_rd, tcg_rn, tcg_imm); 4348 if (!a->sf) { 4349 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4350 } 4351 return true; 4352 } 4353 4354 /* 4355 * PC-rel. addressing 4356 */ 4357 4358 static bool trans_ADR(DisasContext *s, arg_ri *a) 4359 { 4360 gen_pc_plus_diff(s, cpu_reg(s, a->rd), a->imm); 4361 return true; 4362 } 4363 4364 static bool trans_ADRP(DisasContext *s, arg_ri *a) 4365 { 4366 int64_t offset = (int64_t)a->imm << 12; 4367 4368 /* The page offset is ok for CF_PCREL. */ 4369 offset -= s->pc_curr & 0xfff; 4370 gen_pc_plus_diff(s, cpu_reg(s, a->rd), offset); 4371 return true; 4372 } 4373 4374 /* 4375 * Add/subtract (immediate) 4376 */ 4377 TRANS(ADD_i, gen_rri, a, 1, 1, tcg_gen_add_i64) 4378 TRANS(SUB_i, gen_rri, a, 1, 1, tcg_gen_sub_i64) 4379 TRANS(ADDS_i, gen_rri, a, 0, 1, a->sf ? gen_add64_CC : gen_add32_CC) 4380 TRANS(SUBS_i, gen_rri, a, 0, 1, a->sf ? gen_sub64_CC : gen_sub32_CC) 4381 4382 /* 4383 * Add/subtract (immediate, with tags) 4384 */ 4385 4386 static bool gen_add_sub_imm_with_tags(DisasContext *s, arg_rri_tag *a, 4387 bool sub_op) 4388 { 4389 TCGv_i64 tcg_rn, tcg_rd; 4390 int imm; 4391 4392 imm = a->uimm6 << LOG2_TAG_GRANULE; 4393 if (sub_op) { 4394 imm = -imm; 4395 } 4396 4397 tcg_rn = cpu_reg_sp(s, a->rn); 4398 tcg_rd = cpu_reg_sp(s, a->rd); 4399 4400 if (s->ata[0]) { 4401 gen_helper_addsubg(tcg_rd, tcg_env, tcg_rn, 4402 tcg_constant_i32(imm), 4403 tcg_constant_i32(a->uimm4)); 4404 } else { 4405 tcg_gen_addi_i64(tcg_rd, tcg_rn, imm); 4406 gen_address_with_allocation_tag0(tcg_rd, tcg_rd); 4407 } 4408 return true; 4409 } 4410 4411 TRANS_FEAT(ADDG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, false) 4412 TRANS_FEAT(SUBG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, true) 4413 4414 /* The input should be a value in the bottom e bits (with higher 4415 * bits zero); returns that value replicated into every element 4416 * of size e in a 64 bit integer. 4417 */ 4418 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e) 4419 { 4420 assert(e != 0); 4421 while (e < 64) { 4422 mask |= mask << e; 4423 e *= 2; 4424 } 4425 return mask; 4426 } 4427 4428 /* 4429 * Logical (immediate) 4430 */ 4431 4432 /* 4433 * Simplified variant of pseudocode DecodeBitMasks() for the case where we 4434 * only require the wmask. Returns false if the imms/immr/immn are a reserved 4435 * value (ie should cause a guest UNDEF exception), and true if they are 4436 * valid, in which case the decoded bit pattern is written to result. 4437 */ 4438 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn, 4439 unsigned int imms, unsigned int immr) 4440 { 4441 uint64_t mask; 4442 unsigned e, levels, s, r; 4443 int len; 4444 4445 assert(immn < 2 && imms < 64 && immr < 64); 4446 4447 /* The bit patterns we create here are 64 bit patterns which 4448 * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or 4449 * 64 bits each. Each element contains the same value: a run 4450 * of between 1 and e-1 non-zero bits, rotated within the 4451 * element by between 0 and e-1 bits. 4452 * 4453 * The element size and run length are encoded into immn (1 bit) 4454 * and imms (6 bits) as follows: 4455 * 64 bit elements: immn = 1, imms = <length of run - 1> 4456 * 32 bit elements: immn = 0, imms = 0 : <length of run - 1> 4457 * 16 bit elements: immn = 0, imms = 10 : <length of run - 1> 4458 * 8 bit elements: immn = 0, imms = 110 : <length of run - 1> 4459 * 4 bit elements: immn = 0, imms = 1110 : <length of run - 1> 4460 * 2 bit elements: immn = 0, imms = 11110 : <length of run - 1> 4461 * Notice that immn = 0, imms = 11111x is the only combination 4462 * not covered by one of the above options; this is reserved. 4463 * Further, <length of run - 1> all-ones is a reserved pattern. 4464 * 4465 * In all cases the rotation is by immr % e (and immr is 6 bits). 4466 */ 4467 4468 /* First determine the element size */ 4469 len = 31 - clz32((immn << 6) | (~imms & 0x3f)); 4470 if (len < 1) { 4471 /* This is the immn == 0, imms == 0x11111x case */ 4472 return false; 4473 } 4474 e = 1 << len; 4475 4476 levels = e - 1; 4477 s = imms & levels; 4478 r = immr & levels; 4479 4480 if (s == levels) { 4481 /* <length of run - 1> mustn't be all-ones. */ 4482 return false; 4483 } 4484 4485 /* Create the value of one element: s+1 set bits rotated 4486 * by r within the element (which is e bits wide)... 4487 */ 4488 mask = MAKE_64BIT_MASK(0, s + 1); 4489 if (r) { 4490 mask = (mask >> r) | (mask << (e - r)); 4491 mask &= MAKE_64BIT_MASK(0, e); 4492 } 4493 /* ...then replicate the element over the whole 64 bit value */ 4494 mask = bitfield_replicate(mask, e); 4495 *result = mask; 4496 return true; 4497 } 4498 4499 static bool gen_rri_log(DisasContext *s, arg_rri_log *a, bool set_cc, 4500 void (*fn)(TCGv_i64, TCGv_i64, int64_t)) 4501 { 4502 TCGv_i64 tcg_rd, tcg_rn; 4503 uint64_t imm; 4504 4505 /* Some immediate field values are reserved. */ 4506 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1), 4507 extract32(a->dbm, 0, 6), 4508 extract32(a->dbm, 6, 6))) { 4509 return false; 4510 } 4511 if (!a->sf) { 4512 imm &= 0xffffffffull; 4513 } 4514 4515 tcg_rd = set_cc ? cpu_reg(s, a->rd) : cpu_reg_sp(s, a->rd); 4516 tcg_rn = cpu_reg(s, a->rn); 4517 4518 fn(tcg_rd, tcg_rn, imm); 4519 if (set_cc) { 4520 gen_logic_CC(a->sf, tcg_rd); 4521 } 4522 if (!a->sf) { 4523 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4524 } 4525 return true; 4526 } 4527 4528 TRANS(AND_i, gen_rri_log, a, false, tcg_gen_andi_i64) 4529 TRANS(ORR_i, gen_rri_log, a, false, tcg_gen_ori_i64) 4530 TRANS(EOR_i, gen_rri_log, a, false, tcg_gen_xori_i64) 4531 TRANS(ANDS_i, gen_rri_log, a, true, tcg_gen_andi_i64) 4532 4533 /* 4534 * Move wide (immediate) 4535 */ 4536 4537 static bool trans_MOVZ(DisasContext *s, arg_movw *a) 4538 { 4539 int pos = a->hw << 4; 4540 tcg_gen_movi_i64(cpu_reg(s, a->rd), (uint64_t)a->imm << pos); 4541 return true; 4542 } 4543 4544 static bool trans_MOVN(DisasContext *s, arg_movw *a) 4545 { 4546 int pos = a->hw << 4; 4547 uint64_t imm = a->imm; 4548 4549 imm = ~(imm << pos); 4550 if (!a->sf) { 4551 imm = (uint32_t)imm; 4552 } 4553 tcg_gen_movi_i64(cpu_reg(s, a->rd), imm); 4554 return true; 4555 } 4556 4557 static bool trans_MOVK(DisasContext *s, arg_movw *a) 4558 { 4559 int pos = a->hw << 4; 4560 TCGv_i64 tcg_rd, tcg_im; 4561 4562 tcg_rd = cpu_reg(s, a->rd); 4563 tcg_im = tcg_constant_i64(a->imm); 4564 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_im, pos, 16); 4565 if (!a->sf) { 4566 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4567 } 4568 return true; 4569 } 4570 4571 /* 4572 * Bitfield 4573 */ 4574 4575 static bool trans_SBFM(DisasContext *s, arg_SBFM *a) 4576 { 4577 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4578 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4579 unsigned int bitsize = a->sf ? 64 : 32; 4580 unsigned int ri = a->immr; 4581 unsigned int si = a->imms; 4582 unsigned int pos, len; 4583 4584 if (si >= ri) { 4585 /* Wd<s-r:0> = Wn<s:r> */ 4586 len = (si - ri) + 1; 4587 tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len); 4588 if (!a->sf) { 4589 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4590 } 4591 } else { 4592 /* Wd<32+s-r,32-r> = Wn<s:0> */ 4593 len = si + 1; 4594 pos = (bitsize - ri) & (bitsize - 1); 4595 4596 if (len < ri) { 4597 /* 4598 * Sign extend the destination field from len to fill the 4599 * balance of the word. Let the deposit below insert all 4600 * of those sign bits. 4601 */ 4602 tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len); 4603 len = ri; 4604 } 4605 4606 /* 4607 * We start with zero, and we haven't modified any bits outside 4608 * bitsize, therefore no final zero-extension is unneeded for !sf. 4609 */ 4610 tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len); 4611 } 4612 return true; 4613 } 4614 4615 static bool trans_UBFM(DisasContext *s, arg_UBFM *a) 4616 { 4617 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4618 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4619 unsigned int bitsize = a->sf ? 64 : 32; 4620 unsigned int ri = a->immr; 4621 unsigned int si = a->imms; 4622 unsigned int pos, len; 4623 4624 tcg_rd = cpu_reg(s, a->rd); 4625 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4626 4627 if (si >= ri) { 4628 /* Wd<s-r:0> = Wn<s:r> */ 4629 len = (si - ri) + 1; 4630 tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len); 4631 } else { 4632 /* Wd<32+s-r,32-r> = Wn<s:0> */ 4633 len = si + 1; 4634 pos = (bitsize - ri) & (bitsize - 1); 4635 tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len); 4636 } 4637 return true; 4638 } 4639 4640 static bool trans_BFM(DisasContext *s, arg_BFM *a) 4641 { 4642 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4643 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4644 unsigned int bitsize = a->sf ? 64 : 32; 4645 unsigned int ri = a->immr; 4646 unsigned int si = a->imms; 4647 unsigned int pos, len; 4648 4649 tcg_rd = cpu_reg(s, a->rd); 4650 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4651 4652 if (si >= ri) { 4653 /* Wd<s-r:0> = Wn<s:r> */ 4654 tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri); 4655 len = (si - ri) + 1; 4656 pos = 0; 4657 } else { 4658 /* Wd<32+s-r,32-r> = Wn<s:0> */ 4659 len = si + 1; 4660 pos = (bitsize - ri) & (bitsize - 1); 4661 } 4662 4663 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len); 4664 if (!a->sf) { 4665 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4666 } 4667 return true; 4668 } 4669 4670 static bool trans_EXTR(DisasContext *s, arg_extract *a) 4671 { 4672 TCGv_i64 tcg_rd, tcg_rm, tcg_rn; 4673 4674 tcg_rd = cpu_reg(s, a->rd); 4675 4676 if (unlikely(a->imm == 0)) { 4677 /* 4678 * tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts, 4679 * so an extract from bit 0 is a special case. 4680 */ 4681 if (a->sf) { 4682 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, a->rm)); 4683 } else { 4684 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, a->rm)); 4685 } 4686 } else { 4687 tcg_rm = cpu_reg(s, a->rm); 4688 tcg_rn = cpu_reg(s, a->rn); 4689 4690 if (a->sf) { 4691 /* Specialization to ROR happens in EXTRACT2. */ 4692 tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, a->imm); 4693 } else { 4694 TCGv_i32 t0 = tcg_temp_new_i32(); 4695 4696 tcg_gen_extrl_i64_i32(t0, tcg_rm); 4697 if (a->rm == a->rn) { 4698 tcg_gen_rotri_i32(t0, t0, a->imm); 4699 } else { 4700 TCGv_i32 t1 = tcg_temp_new_i32(); 4701 tcg_gen_extrl_i64_i32(t1, tcg_rn); 4702 tcg_gen_extract2_i32(t0, t0, t1, a->imm); 4703 } 4704 tcg_gen_extu_i32_i64(tcg_rd, t0); 4705 } 4706 } 4707 return true; 4708 } 4709 4710 static bool trans_TBL_TBX(DisasContext *s, arg_TBL_TBX *a) 4711 { 4712 if (fp_access_check(s)) { 4713 int len = (a->len + 1) * 16; 4714 4715 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd), 4716 vec_full_reg_offset(s, a->rm), tcg_env, 4717 a->q ? 16 : 8, vec_full_reg_size(s), 4718 (len << 6) | (a->tbx << 5) | a->rn, 4719 gen_helper_simd_tblx); 4720 } 4721 return true; 4722 } 4723 4724 typedef int simd_permute_idx_fn(int i, int part, int elements); 4725 4726 static bool do_simd_permute(DisasContext *s, arg_qrrr_e *a, 4727 simd_permute_idx_fn *fn, int part) 4728 { 4729 MemOp esz = a->esz; 4730 int datasize = a->q ? 16 : 8; 4731 int elements = datasize >> esz; 4732 TCGv_i64 tcg_res[2], tcg_ele; 4733 4734 if (esz == MO_64 && !a->q) { 4735 return false; 4736 } 4737 if (!fp_access_check(s)) { 4738 return true; 4739 } 4740 4741 tcg_res[0] = tcg_temp_new_i64(); 4742 tcg_res[1] = a->q ? tcg_temp_new_i64() : NULL; 4743 tcg_ele = tcg_temp_new_i64(); 4744 4745 for (int i = 0; i < elements; i++) { 4746 int o, w, idx; 4747 4748 idx = fn(i, part, elements); 4749 read_vec_element(s, tcg_ele, (idx & elements ? a->rm : a->rn), 4750 idx & (elements - 1), esz); 4751 4752 w = (i << (esz + 3)) / 64; 4753 o = (i << (esz + 3)) % 64; 4754 if (o == 0) { 4755 tcg_gen_mov_i64(tcg_res[w], tcg_ele); 4756 } else { 4757 tcg_gen_deposit_i64(tcg_res[w], tcg_res[w], tcg_ele, o, 8 << esz); 4758 } 4759 } 4760 4761 for (int i = a->q; i >= 0; --i) { 4762 write_vec_element(s, tcg_res[i], a->rd, i, MO_64); 4763 } 4764 clear_vec_high(s, a->q, a->rd); 4765 return true; 4766 } 4767 4768 static int permute_load_uzp(int i, int part, int elements) 4769 { 4770 return 2 * i + part; 4771 } 4772 4773 TRANS(UZP1, do_simd_permute, a, permute_load_uzp, 0) 4774 TRANS(UZP2, do_simd_permute, a, permute_load_uzp, 1) 4775 4776 static int permute_load_trn(int i, int part, int elements) 4777 { 4778 return (i & 1) * elements + (i & ~1) + part; 4779 } 4780 4781 TRANS(TRN1, do_simd_permute, a, permute_load_trn, 0) 4782 TRANS(TRN2, do_simd_permute, a, permute_load_trn, 1) 4783 4784 static int permute_load_zip(int i, int part, int elements) 4785 { 4786 return (i & 1) * elements + ((part * elements + i) >> 1); 4787 } 4788 4789 TRANS(ZIP1, do_simd_permute, a, permute_load_zip, 0) 4790 TRANS(ZIP2, do_simd_permute, a, permute_load_zip, 1) 4791 4792 /* 4793 * Cryptographic AES, SHA, SHA512 4794 */ 4795 4796 TRANS_FEAT(AESE, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aese) 4797 TRANS_FEAT(AESD, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aesd) 4798 TRANS_FEAT(AESMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesmc) 4799 TRANS_FEAT(AESIMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesimc) 4800 4801 TRANS_FEAT(SHA1C, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1c) 4802 TRANS_FEAT(SHA1P, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1p) 4803 TRANS_FEAT(SHA1M, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1m) 4804 TRANS_FEAT(SHA1SU0, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1su0) 4805 4806 TRANS_FEAT(SHA256H, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h) 4807 TRANS_FEAT(SHA256H2, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h2) 4808 TRANS_FEAT(SHA256SU1, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256su1) 4809 4810 TRANS_FEAT(SHA1H, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1h) 4811 TRANS_FEAT(SHA1SU1, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1su1) 4812 TRANS_FEAT(SHA256SU0, aa64_sha256, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha256su0) 4813 4814 TRANS_FEAT(SHA512H, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h) 4815 TRANS_FEAT(SHA512H2, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h2) 4816 TRANS_FEAT(SHA512SU1, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512su1) 4817 TRANS_FEAT(RAX1, aa64_sha3, do_gvec_fn3, a, gen_gvec_rax1) 4818 TRANS_FEAT(SM3PARTW1, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw1) 4819 TRANS_FEAT(SM3PARTW2, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw2) 4820 TRANS_FEAT(SM4EKEY, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4ekey) 4821 4822 TRANS_FEAT(SHA512SU0, aa64_sha512, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha512su0) 4823 TRANS_FEAT(SM4E, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4e) 4824 4825 TRANS_FEAT(EOR3, aa64_sha3, do_gvec_fn4, a, gen_gvec_eor3) 4826 TRANS_FEAT(BCAX, aa64_sha3, do_gvec_fn4, a, gen_gvec_bcax) 4827 4828 static bool trans_SM3SS1(DisasContext *s, arg_SM3SS1 *a) 4829 { 4830 if (!dc_isar_feature(aa64_sm3, s)) { 4831 return false; 4832 } 4833 if (fp_access_check(s)) { 4834 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 4835 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 4836 TCGv_i32 tcg_op3 = tcg_temp_new_i32(); 4837 TCGv_i32 tcg_res = tcg_temp_new_i32(); 4838 4839 read_vec_element_i32(s, tcg_op1, a->rn, 3, MO_32); 4840 read_vec_element_i32(s, tcg_op2, a->rm, 3, MO_32); 4841 read_vec_element_i32(s, tcg_op3, a->ra, 3, MO_32); 4842 4843 tcg_gen_rotri_i32(tcg_res, tcg_op1, 20); 4844 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2); 4845 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3); 4846 tcg_gen_rotri_i32(tcg_res, tcg_res, 25); 4847 4848 /* Clear the whole register first, then store bits [127:96]. */ 4849 clear_vec(s, a->rd); 4850 write_vec_element_i32(s, tcg_res, a->rd, 3, MO_32); 4851 } 4852 return true; 4853 } 4854 4855 static bool do_crypto3i(DisasContext *s, arg_crypto3i *a, gen_helper_gvec_3 *fn) 4856 { 4857 if (fp_access_check(s)) { 4858 gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->imm, fn); 4859 } 4860 return true; 4861 } 4862 TRANS_FEAT(SM3TT1A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1a) 4863 TRANS_FEAT(SM3TT1B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1b) 4864 TRANS_FEAT(SM3TT2A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2a) 4865 TRANS_FEAT(SM3TT2B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2b) 4866 4867 static bool trans_XAR(DisasContext *s, arg_XAR *a) 4868 { 4869 if (!dc_isar_feature(aa64_sha3, s)) { 4870 return false; 4871 } 4872 if (fp_access_check(s)) { 4873 gen_gvec_xar(MO_64, vec_full_reg_offset(s, a->rd), 4874 vec_full_reg_offset(s, a->rn), 4875 vec_full_reg_offset(s, a->rm), a->imm, 16, 4876 vec_full_reg_size(s)); 4877 } 4878 return true; 4879 } 4880 4881 /* 4882 * Advanced SIMD copy 4883 */ 4884 4885 static bool decode_esz_idx(int imm, MemOp *pesz, unsigned *pidx) 4886 { 4887 unsigned esz = ctz32(imm); 4888 if (esz <= MO_64) { 4889 *pesz = esz; 4890 *pidx = imm >> (esz + 1); 4891 return true; 4892 } 4893 return false; 4894 } 4895 4896 static bool trans_DUP_element_s(DisasContext *s, arg_DUP_element_s *a) 4897 { 4898 MemOp esz; 4899 unsigned idx; 4900 4901 if (!decode_esz_idx(a->imm, &esz, &idx)) { 4902 return false; 4903 } 4904 if (fp_access_check(s)) { 4905 /* 4906 * This instruction just extracts the specified element and 4907 * zero-extends it into the bottom of the destination register. 4908 */ 4909 TCGv_i64 tmp = tcg_temp_new_i64(); 4910 read_vec_element(s, tmp, a->rn, idx, esz); 4911 write_fp_dreg(s, a->rd, tmp); 4912 } 4913 return true; 4914 } 4915 4916 static bool trans_DUP_element_v(DisasContext *s, arg_DUP_element_v *a) 4917 { 4918 MemOp esz; 4919 unsigned idx; 4920 4921 if (!decode_esz_idx(a->imm, &esz, &idx)) { 4922 return false; 4923 } 4924 if (esz == MO_64 && !a->q) { 4925 return false; 4926 } 4927 if (fp_access_check(s)) { 4928 tcg_gen_gvec_dup_mem(esz, vec_full_reg_offset(s, a->rd), 4929 vec_reg_offset(s, a->rn, idx, esz), 4930 a->q ? 16 : 8, vec_full_reg_size(s)); 4931 } 4932 return true; 4933 } 4934 4935 static bool trans_DUP_general(DisasContext *s, arg_DUP_general *a) 4936 { 4937 MemOp esz; 4938 unsigned idx; 4939 4940 if (!decode_esz_idx(a->imm, &esz, &idx)) { 4941 return false; 4942 } 4943 if (esz == MO_64 && !a->q) { 4944 return false; 4945 } 4946 if (fp_access_check(s)) { 4947 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), 4948 a->q ? 16 : 8, vec_full_reg_size(s), 4949 cpu_reg(s, a->rn)); 4950 } 4951 return true; 4952 } 4953 4954 static bool do_smov_umov(DisasContext *s, arg_SMOV *a, MemOp is_signed) 4955 { 4956 MemOp esz; 4957 unsigned idx; 4958 4959 if (!decode_esz_idx(a->imm, &esz, &idx)) { 4960 return false; 4961 } 4962 if (is_signed) { 4963 if (esz == MO_64 || (esz == MO_32 && !a->q)) { 4964 return false; 4965 } 4966 } else { 4967 if (esz == MO_64 ? !a->q : a->q) { 4968 return false; 4969 } 4970 } 4971 if (fp_access_check(s)) { 4972 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4973 read_vec_element(s, tcg_rd, a->rn, idx, esz | is_signed); 4974 if (is_signed && !a->q) { 4975 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4976 } 4977 } 4978 return true; 4979 } 4980 4981 TRANS(SMOV, do_smov_umov, a, MO_SIGN) 4982 TRANS(UMOV, do_smov_umov, a, 0) 4983 4984 static bool trans_INS_general(DisasContext *s, arg_INS_general *a) 4985 { 4986 MemOp esz; 4987 unsigned idx; 4988 4989 if (!decode_esz_idx(a->imm, &esz, &idx)) { 4990 return false; 4991 } 4992 if (fp_access_check(s)) { 4993 write_vec_element(s, cpu_reg(s, a->rn), a->rd, idx, esz); 4994 clear_vec_high(s, true, a->rd); 4995 } 4996 return true; 4997 } 4998 4999 static bool trans_INS_element(DisasContext *s, arg_INS_element *a) 5000 { 5001 MemOp esz; 5002 unsigned didx, sidx; 5003 5004 if (!decode_esz_idx(a->di, &esz, &didx)) { 5005 return false; 5006 } 5007 sidx = a->si >> esz; 5008 if (fp_access_check(s)) { 5009 TCGv_i64 tmp = tcg_temp_new_i64(); 5010 5011 read_vec_element(s, tmp, a->rn, sidx, esz); 5012 write_vec_element(s, tmp, a->rd, didx, esz); 5013 5014 /* INS is considered a 128-bit write for SVE. */ 5015 clear_vec_high(s, true, a->rd); 5016 } 5017 return true; 5018 } 5019 5020 /* 5021 * Advanced SIMD three same 5022 */ 5023 5024 typedef struct FPScalar { 5025 void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); 5026 void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); 5027 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr); 5028 } FPScalar; 5029 5030 static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) 5031 { 5032 switch (a->esz) { 5033 case MO_64: 5034 if (fp_access_check(s)) { 5035 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 5036 TCGv_i64 t1 = read_fp_dreg(s, a->rm); 5037 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR)); 5038 write_fp_dreg(s, a->rd, t0); 5039 } 5040 break; 5041 case MO_32: 5042 if (fp_access_check(s)) { 5043 TCGv_i32 t0 = read_fp_sreg(s, a->rn); 5044 TCGv_i32 t1 = read_fp_sreg(s, a->rm); 5045 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR)); 5046 write_fp_sreg(s, a->rd, t0); 5047 } 5048 break; 5049 case MO_16: 5050 if (!dc_isar_feature(aa64_fp16, s)) { 5051 return false; 5052 } 5053 if (fp_access_check(s)) { 5054 TCGv_i32 t0 = read_fp_hreg(s, a->rn); 5055 TCGv_i32 t1 = read_fp_hreg(s, a->rm); 5056 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16)); 5057 write_fp_sreg(s, a->rd, t0); 5058 } 5059 break; 5060 default: 5061 return false; 5062 } 5063 return true; 5064 } 5065 5066 static const FPScalar f_scalar_fadd = { 5067 gen_helper_vfp_addh, 5068 gen_helper_vfp_adds, 5069 gen_helper_vfp_addd, 5070 }; 5071 TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd) 5072 5073 static const FPScalar f_scalar_fsub = { 5074 gen_helper_vfp_subh, 5075 gen_helper_vfp_subs, 5076 gen_helper_vfp_subd, 5077 }; 5078 TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub) 5079 5080 static const FPScalar f_scalar_fdiv = { 5081 gen_helper_vfp_divh, 5082 gen_helper_vfp_divs, 5083 gen_helper_vfp_divd, 5084 }; 5085 TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv) 5086 5087 static const FPScalar f_scalar_fmul = { 5088 gen_helper_vfp_mulh, 5089 gen_helper_vfp_muls, 5090 gen_helper_vfp_muld, 5091 }; 5092 TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul) 5093 5094 static const FPScalar f_scalar_fmax = { 5095 gen_helper_advsimd_maxh, 5096 gen_helper_vfp_maxs, 5097 gen_helper_vfp_maxd, 5098 }; 5099 TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax) 5100 5101 static const FPScalar f_scalar_fmin = { 5102 gen_helper_advsimd_minh, 5103 gen_helper_vfp_mins, 5104 gen_helper_vfp_mind, 5105 }; 5106 TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin) 5107 5108 static const FPScalar f_scalar_fmaxnm = { 5109 gen_helper_advsimd_maxnumh, 5110 gen_helper_vfp_maxnums, 5111 gen_helper_vfp_maxnumd, 5112 }; 5113 TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm) 5114 5115 static const FPScalar f_scalar_fminnm = { 5116 gen_helper_advsimd_minnumh, 5117 gen_helper_vfp_minnums, 5118 gen_helper_vfp_minnumd, 5119 }; 5120 TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm) 5121 5122 static const FPScalar f_scalar_fmulx = { 5123 gen_helper_advsimd_mulxh, 5124 gen_helper_vfp_mulxs, 5125 gen_helper_vfp_mulxd, 5126 }; 5127 TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx) 5128 5129 static void gen_fnmul_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5130 { 5131 gen_helper_vfp_mulh(d, n, m, s); 5132 gen_vfp_negh(d, d); 5133 } 5134 5135 static void gen_fnmul_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5136 { 5137 gen_helper_vfp_muls(d, n, m, s); 5138 gen_vfp_negs(d, d); 5139 } 5140 5141 static void gen_fnmul_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) 5142 { 5143 gen_helper_vfp_muld(d, n, m, s); 5144 gen_vfp_negd(d, d); 5145 } 5146 5147 static const FPScalar f_scalar_fnmul = { 5148 gen_fnmul_h, 5149 gen_fnmul_s, 5150 gen_fnmul_d, 5151 }; 5152 TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul) 5153 5154 static const FPScalar f_scalar_fcmeq = { 5155 gen_helper_advsimd_ceq_f16, 5156 gen_helper_neon_ceq_f32, 5157 gen_helper_neon_ceq_f64, 5158 }; 5159 TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq) 5160 5161 static const FPScalar f_scalar_fcmge = { 5162 gen_helper_advsimd_cge_f16, 5163 gen_helper_neon_cge_f32, 5164 gen_helper_neon_cge_f64, 5165 }; 5166 TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge) 5167 5168 static const FPScalar f_scalar_fcmgt = { 5169 gen_helper_advsimd_cgt_f16, 5170 gen_helper_neon_cgt_f32, 5171 gen_helper_neon_cgt_f64, 5172 }; 5173 TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt) 5174 5175 static const FPScalar f_scalar_facge = { 5176 gen_helper_advsimd_acge_f16, 5177 gen_helper_neon_acge_f32, 5178 gen_helper_neon_acge_f64, 5179 }; 5180 TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge) 5181 5182 static const FPScalar f_scalar_facgt = { 5183 gen_helper_advsimd_acgt_f16, 5184 gen_helper_neon_acgt_f32, 5185 gen_helper_neon_acgt_f64, 5186 }; 5187 TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt) 5188 5189 static void gen_fabd_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5190 { 5191 gen_helper_vfp_subh(d, n, m, s); 5192 gen_vfp_absh(d, d); 5193 } 5194 5195 static void gen_fabd_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5196 { 5197 gen_helper_vfp_subs(d, n, m, s); 5198 gen_vfp_abss(d, d); 5199 } 5200 5201 static void gen_fabd_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) 5202 { 5203 gen_helper_vfp_subd(d, n, m, s); 5204 gen_vfp_absd(d, d); 5205 } 5206 5207 static const FPScalar f_scalar_fabd = { 5208 gen_fabd_h, 5209 gen_fabd_s, 5210 gen_fabd_d, 5211 }; 5212 TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd) 5213 5214 static const FPScalar f_scalar_frecps = { 5215 gen_helper_recpsf_f16, 5216 gen_helper_recpsf_f32, 5217 gen_helper_recpsf_f64, 5218 }; 5219 TRANS(FRECPS_s, do_fp3_scalar, a, &f_scalar_frecps) 5220 5221 static const FPScalar f_scalar_frsqrts = { 5222 gen_helper_rsqrtsf_f16, 5223 gen_helper_rsqrtsf_f32, 5224 gen_helper_rsqrtsf_f64, 5225 }; 5226 TRANS(FRSQRTS_s, do_fp3_scalar, a, &f_scalar_frsqrts) 5227 5228 static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a, 5229 const FPScalar *f, bool swap) 5230 { 5231 switch (a->esz) { 5232 case MO_64: 5233 if (fp_access_check(s)) { 5234 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 5235 TCGv_i64 t1 = tcg_constant_i64(0); 5236 if (swap) { 5237 f->gen_d(t0, t1, t0, fpstatus_ptr(FPST_FPCR)); 5238 } else { 5239 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR)); 5240 } 5241 write_fp_dreg(s, a->rd, t0); 5242 } 5243 break; 5244 case MO_32: 5245 if (fp_access_check(s)) { 5246 TCGv_i32 t0 = read_fp_sreg(s, a->rn); 5247 TCGv_i32 t1 = tcg_constant_i32(0); 5248 if (swap) { 5249 f->gen_s(t0, t1, t0, fpstatus_ptr(FPST_FPCR)); 5250 } else { 5251 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR)); 5252 } 5253 write_fp_sreg(s, a->rd, t0); 5254 } 5255 break; 5256 case MO_16: 5257 if (!dc_isar_feature(aa64_fp16, s)) { 5258 return false; 5259 } 5260 if (fp_access_check(s)) { 5261 TCGv_i32 t0 = read_fp_hreg(s, a->rn); 5262 TCGv_i32 t1 = tcg_constant_i32(0); 5263 if (swap) { 5264 f->gen_h(t0, t1, t0, fpstatus_ptr(FPST_FPCR_F16)); 5265 } else { 5266 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16)); 5267 } 5268 write_fp_sreg(s, a->rd, t0); 5269 } 5270 break; 5271 default: 5272 return false; 5273 } 5274 return true; 5275 } 5276 5277 TRANS(FCMEQ0_s, do_fcmp0_s, a, &f_scalar_fcmeq, false) 5278 TRANS(FCMGT0_s, do_fcmp0_s, a, &f_scalar_fcmgt, false) 5279 TRANS(FCMGE0_s, do_fcmp0_s, a, &f_scalar_fcmge, false) 5280 TRANS(FCMLT0_s, do_fcmp0_s, a, &f_scalar_fcmgt, true) 5281 TRANS(FCMLE0_s, do_fcmp0_s, a, &f_scalar_fcmge, true) 5282 5283 static bool do_satacc_s(DisasContext *s, arg_rrr_e *a, 5284 MemOp sgn_n, MemOp sgn_m, 5285 void (*gen_bhs)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64, MemOp), 5286 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64)) 5287 { 5288 TCGv_i64 t0, t1, t2, qc; 5289 MemOp esz = a->esz; 5290 5291 if (!fp_access_check(s)) { 5292 return true; 5293 } 5294 5295 t0 = tcg_temp_new_i64(); 5296 t1 = tcg_temp_new_i64(); 5297 t2 = tcg_temp_new_i64(); 5298 qc = tcg_temp_new_i64(); 5299 read_vec_element(s, t1, a->rn, 0, esz | sgn_n); 5300 read_vec_element(s, t2, a->rm, 0, esz | sgn_m); 5301 tcg_gen_ld_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc)); 5302 5303 if (esz == MO_64) { 5304 gen_d(t0, qc, t1, t2); 5305 } else { 5306 gen_bhs(t0, qc, t1, t2, esz); 5307 tcg_gen_ext_i64(t0, t0, esz); 5308 } 5309 5310 write_fp_dreg(s, a->rd, t0); 5311 tcg_gen_st_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc)); 5312 return true; 5313 } 5314 5315 TRANS(SQADD_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqadd_bhs, gen_sqadd_d) 5316 TRANS(SQSUB_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqsub_bhs, gen_sqsub_d) 5317 TRANS(UQADD_s, do_satacc_s, a, 0, 0, gen_uqadd_bhs, gen_uqadd_d) 5318 TRANS(UQSUB_s, do_satacc_s, a, 0, 0, gen_uqsub_bhs, gen_uqsub_d) 5319 TRANS(SUQADD_s, do_satacc_s, a, MO_SIGN, 0, gen_suqadd_bhs, gen_suqadd_d) 5320 TRANS(USQADD_s, do_satacc_s, a, 0, MO_SIGN, gen_usqadd_bhs, gen_usqadd_d) 5321 5322 static bool do_int3_scalar_d(DisasContext *s, arg_rrr_e *a, 5323 void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64)) 5324 { 5325 if (fp_access_check(s)) { 5326 TCGv_i64 t0 = tcg_temp_new_i64(); 5327 TCGv_i64 t1 = tcg_temp_new_i64(); 5328 5329 read_vec_element(s, t0, a->rn, 0, MO_64); 5330 read_vec_element(s, t1, a->rm, 0, MO_64); 5331 fn(t0, t0, t1); 5332 write_fp_dreg(s, a->rd, t0); 5333 } 5334 return true; 5335 } 5336 5337 TRANS(SSHL_s, do_int3_scalar_d, a, gen_sshl_i64) 5338 TRANS(USHL_s, do_int3_scalar_d, a, gen_ushl_i64) 5339 TRANS(SRSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_s64) 5340 TRANS(URSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_u64) 5341 TRANS(ADD_s, do_int3_scalar_d, a, tcg_gen_add_i64) 5342 TRANS(SUB_s, do_int3_scalar_d, a, tcg_gen_sub_i64) 5343 5344 typedef struct ENVScalar2 { 5345 NeonGenTwoOpEnvFn *gen_bhs[3]; 5346 NeonGenTwo64OpEnvFn *gen_d; 5347 } ENVScalar2; 5348 5349 static bool do_env_scalar2(DisasContext *s, arg_rrr_e *a, const ENVScalar2 *f) 5350 { 5351 if (!fp_access_check(s)) { 5352 return true; 5353 } 5354 if (a->esz == MO_64) { 5355 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 5356 TCGv_i64 t1 = read_fp_dreg(s, a->rm); 5357 f->gen_d(t0, tcg_env, t0, t1); 5358 write_fp_dreg(s, a->rd, t0); 5359 } else { 5360 TCGv_i32 t0 = tcg_temp_new_i32(); 5361 TCGv_i32 t1 = tcg_temp_new_i32(); 5362 5363 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 5364 read_vec_element_i32(s, t1, a->rm, 0, a->esz); 5365 f->gen_bhs[a->esz](t0, tcg_env, t0, t1); 5366 write_fp_sreg(s, a->rd, t0); 5367 } 5368 return true; 5369 } 5370 5371 static const ENVScalar2 f_scalar_sqshl = { 5372 { gen_helper_neon_qshl_s8, 5373 gen_helper_neon_qshl_s16, 5374 gen_helper_neon_qshl_s32 }, 5375 gen_helper_neon_qshl_s64, 5376 }; 5377 TRANS(SQSHL_s, do_env_scalar2, a, &f_scalar_sqshl) 5378 5379 static const ENVScalar2 f_scalar_uqshl = { 5380 { gen_helper_neon_qshl_u8, 5381 gen_helper_neon_qshl_u16, 5382 gen_helper_neon_qshl_u32 }, 5383 gen_helper_neon_qshl_u64, 5384 }; 5385 TRANS(UQSHL_s, do_env_scalar2, a, &f_scalar_uqshl) 5386 5387 static const ENVScalar2 f_scalar_sqrshl = { 5388 { gen_helper_neon_qrshl_s8, 5389 gen_helper_neon_qrshl_s16, 5390 gen_helper_neon_qrshl_s32 }, 5391 gen_helper_neon_qrshl_s64, 5392 }; 5393 TRANS(SQRSHL_s, do_env_scalar2, a, &f_scalar_sqrshl) 5394 5395 static const ENVScalar2 f_scalar_uqrshl = { 5396 { gen_helper_neon_qrshl_u8, 5397 gen_helper_neon_qrshl_u16, 5398 gen_helper_neon_qrshl_u32 }, 5399 gen_helper_neon_qrshl_u64, 5400 }; 5401 TRANS(UQRSHL_s, do_env_scalar2, a, &f_scalar_uqrshl) 5402 5403 static bool do_env_scalar2_hs(DisasContext *s, arg_rrr_e *a, 5404 const ENVScalar2 *f) 5405 { 5406 if (a->esz == MO_16 || a->esz == MO_32) { 5407 return do_env_scalar2(s, a, f); 5408 } 5409 return false; 5410 } 5411 5412 static const ENVScalar2 f_scalar_sqdmulh = { 5413 { NULL, gen_helper_neon_qdmulh_s16, gen_helper_neon_qdmulh_s32 } 5414 }; 5415 TRANS(SQDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqdmulh) 5416 5417 static const ENVScalar2 f_scalar_sqrdmulh = { 5418 { NULL, gen_helper_neon_qrdmulh_s16, gen_helper_neon_qrdmulh_s32 } 5419 }; 5420 TRANS(SQRDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqrdmulh) 5421 5422 typedef struct ENVScalar3 { 5423 NeonGenThreeOpEnvFn *gen_hs[2]; 5424 } ENVScalar3; 5425 5426 static bool do_env_scalar3_hs(DisasContext *s, arg_rrr_e *a, 5427 const ENVScalar3 *f) 5428 { 5429 TCGv_i32 t0, t1, t2; 5430 5431 if (a->esz != MO_16 && a->esz != MO_32) { 5432 return false; 5433 } 5434 if (!fp_access_check(s)) { 5435 return true; 5436 } 5437 5438 t0 = tcg_temp_new_i32(); 5439 t1 = tcg_temp_new_i32(); 5440 t2 = tcg_temp_new_i32(); 5441 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 5442 read_vec_element_i32(s, t1, a->rm, 0, a->esz); 5443 read_vec_element_i32(s, t2, a->rd, 0, a->esz); 5444 f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2); 5445 write_fp_sreg(s, a->rd, t0); 5446 return true; 5447 } 5448 5449 static const ENVScalar3 f_scalar_sqrdmlah = { 5450 { gen_helper_neon_qrdmlah_s16, gen_helper_neon_qrdmlah_s32 } 5451 }; 5452 TRANS_FEAT(SQRDMLAH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlah) 5453 5454 static const ENVScalar3 f_scalar_sqrdmlsh = { 5455 { gen_helper_neon_qrdmlsh_s16, gen_helper_neon_qrdmlsh_s32 } 5456 }; 5457 TRANS_FEAT(SQRDMLSH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlsh) 5458 5459 static bool do_cmop_d(DisasContext *s, arg_rrr_e *a, TCGCond cond) 5460 { 5461 if (fp_access_check(s)) { 5462 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 5463 TCGv_i64 t1 = read_fp_dreg(s, a->rm); 5464 tcg_gen_negsetcond_i64(cond, t0, t0, t1); 5465 write_fp_dreg(s, a->rd, t0); 5466 } 5467 return true; 5468 } 5469 5470 TRANS(CMGT_s, do_cmop_d, a, TCG_COND_GT) 5471 TRANS(CMHI_s, do_cmop_d, a, TCG_COND_GTU) 5472 TRANS(CMGE_s, do_cmop_d, a, TCG_COND_GE) 5473 TRANS(CMHS_s, do_cmop_d, a, TCG_COND_GEU) 5474 TRANS(CMEQ_s, do_cmop_d, a, TCG_COND_EQ) 5475 TRANS(CMTST_s, do_cmop_d, a, TCG_COND_TSTNE) 5476 5477 static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data, 5478 gen_helper_gvec_3_ptr * const fns[3]) 5479 { 5480 MemOp esz = a->esz; 5481 int check = fp_access_check_vector_hsd(s, a->q, esz); 5482 5483 if (check <= 0) { 5484 return check == 0; 5485 } 5486 5487 gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, 5488 esz == MO_16, data, fns[esz - 1]); 5489 return true; 5490 } 5491 5492 static gen_helper_gvec_3_ptr * const f_vector_fadd[3] = { 5493 gen_helper_gvec_fadd_h, 5494 gen_helper_gvec_fadd_s, 5495 gen_helper_gvec_fadd_d, 5496 }; 5497 TRANS(FADD_v, do_fp3_vector, a, 0, f_vector_fadd) 5498 5499 static gen_helper_gvec_3_ptr * const f_vector_fsub[3] = { 5500 gen_helper_gvec_fsub_h, 5501 gen_helper_gvec_fsub_s, 5502 gen_helper_gvec_fsub_d, 5503 }; 5504 TRANS(FSUB_v, do_fp3_vector, a, 0, f_vector_fsub) 5505 5506 static gen_helper_gvec_3_ptr * const f_vector_fdiv[3] = { 5507 gen_helper_gvec_fdiv_h, 5508 gen_helper_gvec_fdiv_s, 5509 gen_helper_gvec_fdiv_d, 5510 }; 5511 TRANS(FDIV_v, do_fp3_vector, a, 0, f_vector_fdiv) 5512 5513 static gen_helper_gvec_3_ptr * const f_vector_fmul[3] = { 5514 gen_helper_gvec_fmul_h, 5515 gen_helper_gvec_fmul_s, 5516 gen_helper_gvec_fmul_d, 5517 }; 5518 TRANS(FMUL_v, do_fp3_vector, a, 0, f_vector_fmul) 5519 5520 static gen_helper_gvec_3_ptr * const f_vector_fmax[3] = { 5521 gen_helper_gvec_fmax_h, 5522 gen_helper_gvec_fmax_s, 5523 gen_helper_gvec_fmax_d, 5524 }; 5525 TRANS(FMAX_v, do_fp3_vector, a, 0, f_vector_fmax) 5526 5527 static gen_helper_gvec_3_ptr * const f_vector_fmin[3] = { 5528 gen_helper_gvec_fmin_h, 5529 gen_helper_gvec_fmin_s, 5530 gen_helper_gvec_fmin_d, 5531 }; 5532 TRANS(FMIN_v, do_fp3_vector, a, 0, f_vector_fmin) 5533 5534 static gen_helper_gvec_3_ptr * const f_vector_fmaxnm[3] = { 5535 gen_helper_gvec_fmaxnum_h, 5536 gen_helper_gvec_fmaxnum_s, 5537 gen_helper_gvec_fmaxnum_d, 5538 }; 5539 TRANS(FMAXNM_v, do_fp3_vector, a, 0, f_vector_fmaxnm) 5540 5541 static gen_helper_gvec_3_ptr * const f_vector_fminnm[3] = { 5542 gen_helper_gvec_fminnum_h, 5543 gen_helper_gvec_fminnum_s, 5544 gen_helper_gvec_fminnum_d, 5545 }; 5546 TRANS(FMINNM_v, do_fp3_vector, a, 0, f_vector_fminnm) 5547 5548 static gen_helper_gvec_3_ptr * const f_vector_fmulx[3] = { 5549 gen_helper_gvec_fmulx_h, 5550 gen_helper_gvec_fmulx_s, 5551 gen_helper_gvec_fmulx_d, 5552 }; 5553 TRANS(FMULX_v, do_fp3_vector, a, 0, f_vector_fmulx) 5554 5555 static gen_helper_gvec_3_ptr * const f_vector_fmla[3] = { 5556 gen_helper_gvec_vfma_h, 5557 gen_helper_gvec_vfma_s, 5558 gen_helper_gvec_vfma_d, 5559 }; 5560 TRANS(FMLA_v, do_fp3_vector, a, 0, f_vector_fmla) 5561 5562 static gen_helper_gvec_3_ptr * const f_vector_fmls[3] = { 5563 gen_helper_gvec_vfms_h, 5564 gen_helper_gvec_vfms_s, 5565 gen_helper_gvec_vfms_d, 5566 }; 5567 TRANS(FMLS_v, do_fp3_vector, a, 0, f_vector_fmls) 5568 5569 static gen_helper_gvec_3_ptr * const f_vector_fcmeq[3] = { 5570 gen_helper_gvec_fceq_h, 5571 gen_helper_gvec_fceq_s, 5572 gen_helper_gvec_fceq_d, 5573 }; 5574 TRANS(FCMEQ_v, do_fp3_vector, a, 0, f_vector_fcmeq) 5575 5576 static gen_helper_gvec_3_ptr * const f_vector_fcmge[3] = { 5577 gen_helper_gvec_fcge_h, 5578 gen_helper_gvec_fcge_s, 5579 gen_helper_gvec_fcge_d, 5580 }; 5581 TRANS(FCMGE_v, do_fp3_vector, a, 0, f_vector_fcmge) 5582 5583 static gen_helper_gvec_3_ptr * const f_vector_fcmgt[3] = { 5584 gen_helper_gvec_fcgt_h, 5585 gen_helper_gvec_fcgt_s, 5586 gen_helper_gvec_fcgt_d, 5587 }; 5588 TRANS(FCMGT_v, do_fp3_vector, a, 0, f_vector_fcmgt) 5589 5590 static gen_helper_gvec_3_ptr * const f_vector_facge[3] = { 5591 gen_helper_gvec_facge_h, 5592 gen_helper_gvec_facge_s, 5593 gen_helper_gvec_facge_d, 5594 }; 5595 TRANS(FACGE_v, do_fp3_vector, a, 0, f_vector_facge) 5596 5597 static gen_helper_gvec_3_ptr * const f_vector_facgt[3] = { 5598 gen_helper_gvec_facgt_h, 5599 gen_helper_gvec_facgt_s, 5600 gen_helper_gvec_facgt_d, 5601 }; 5602 TRANS(FACGT_v, do_fp3_vector, a, 0, f_vector_facgt) 5603 5604 static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = { 5605 gen_helper_gvec_fabd_h, 5606 gen_helper_gvec_fabd_s, 5607 gen_helper_gvec_fabd_d, 5608 }; 5609 TRANS(FABD_v, do_fp3_vector, a, 0, f_vector_fabd) 5610 5611 static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = { 5612 gen_helper_gvec_recps_h, 5613 gen_helper_gvec_recps_s, 5614 gen_helper_gvec_recps_d, 5615 }; 5616 TRANS(FRECPS_v, do_fp3_vector, a, 0, f_vector_frecps) 5617 5618 static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = { 5619 gen_helper_gvec_rsqrts_h, 5620 gen_helper_gvec_rsqrts_s, 5621 gen_helper_gvec_rsqrts_d, 5622 }; 5623 TRANS(FRSQRTS_v, do_fp3_vector, a, 0, f_vector_frsqrts) 5624 5625 static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = { 5626 gen_helper_gvec_faddp_h, 5627 gen_helper_gvec_faddp_s, 5628 gen_helper_gvec_faddp_d, 5629 }; 5630 TRANS(FADDP_v, do_fp3_vector, a, 0, f_vector_faddp) 5631 5632 static gen_helper_gvec_3_ptr * const f_vector_fmaxp[3] = { 5633 gen_helper_gvec_fmaxp_h, 5634 gen_helper_gvec_fmaxp_s, 5635 gen_helper_gvec_fmaxp_d, 5636 }; 5637 TRANS(FMAXP_v, do_fp3_vector, a, 0, f_vector_fmaxp) 5638 5639 static gen_helper_gvec_3_ptr * const f_vector_fminp[3] = { 5640 gen_helper_gvec_fminp_h, 5641 gen_helper_gvec_fminp_s, 5642 gen_helper_gvec_fminp_d, 5643 }; 5644 TRANS(FMINP_v, do_fp3_vector, a, 0, f_vector_fminp) 5645 5646 static gen_helper_gvec_3_ptr * const f_vector_fmaxnmp[3] = { 5647 gen_helper_gvec_fmaxnump_h, 5648 gen_helper_gvec_fmaxnump_s, 5649 gen_helper_gvec_fmaxnump_d, 5650 }; 5651 TRANS(FMAXNMP_v, do_fp3_vector, a, 0, f_vector_fmaxnmp) 5652 5653 static gen_helper_gvec_3_ptr * const f_vector_fminnmp[3] = { 5654 gen_helper_gvec_fminnump_h, 5655 gen_helper_gvec_fminnump_s, 5656 gen_helper_gvec_fminnump_d, 5657 }; 5658 TRANS(FMINNMP_v, do_fp3_vector, a, 0, f_vector_fminnmp) 5659 5660 static bool do_fmlal(DisasContext *s, arg_qrrr_e *a, bool is_s, bool is_2) 5661 { 5662 if (fp_access_check(s)) { 5663 int data = (is_2 << 1) | is_s; 5664 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), 5665 vec_full_reg_offset(s, a->rn), 5666 vec_full_reg_offset(s, a->rm), tcg_env, 5667 a->q ? 16 : 8, vec_full_reg_size(s), 5668 data, gen_helper_gvec_fmlal_a64); 5669 } 5670 return true; 5671 } 5672 5673 TRANS_FEAT(FMLAL_v, aa64_fhm, do_fmlal, a, false, false) 5674 TRANS_FEAT(FMLSL_v, aa64_fhm, do_fmlal, a, true, false) 5675 TRANS_FEAT(FMLAL2_v, aa64_fhm, do_fmlal, a, false, true) 5676 TRANS_FEAT(FMLSL2_v, aa64_fhm, do_fmlal, a, true, true) 5677 5678 TRANS(ADDP_v, do_gvec_fn3, a, gen_gvec_addp) 5679 TRANS(SMAXP_v, do_gvec_fn3_no64, a, gen_gvec_smaxp) 5680 TRANS(SMINP_v, do_gvec_fn3_no64, a, gen_gvec_sminp) 5681 TRANS(UMAXP_v, do_gvec_fn3_no64, a, gen_gvec_umaxp) 5682 TRANS(UMINP_v, do_gvec_fn3_no64, a, gen_gvec_uminp) 5683 5684 TRANS(AND_v, do_gvec_fn3, a, tcg_gen_gvec_and) 5685 TRANS(BIC_v, do_gvec_fn3, a, tcg_gen_gvec_andc) 5686 TRANS(ORR_v, do_gvec_fn3, a, tcg_gen_gvec_or) 5687 TRANS(ORN_v, do_gvec_fn3, a, tcg_gen_gvec_orc) 5688 TRANS(EOR_v, do_gvec_fn3, a, tcg_gen_gvec_xor) 5689 5690 static bool do_bitsel(DisasContext *s, bool is_q, int d, int a, int b, int c) 5691 { 5692 if (fp_access_check(s)) { 5693 gen_gvec_fn4(s, is_q, d, a, b, c, tcg_gen_gvec_bitsel, 0); 5694 } 5695 return true; 5696 } 5697 5698 TRANS(BSL_v, do_bitsel, a->q, a->rd, a->rd, a->rn, a->rm) 5699 TRANS(BIT_v, do_bitsel, a->q, a->rd, a->rm, a->rn, a->rd) 5700 TRANS(BIF_v, do_bitsel, a->q, a->rd, a->rm, a->rd, a->rn) 5701 5702 TRANS(SQADD_v, do_gvec_fn3, a, gen_gvec_sqadd_qc) 5703 TRANS(UQADD_v, do_gvec_fn3, a, gen_gvec_uqadd_qc) 5704 TRANS(SQSUB_v, do_gvec_fn3, a, gen_gvec_sqsub_qc) 5705 TRANS(UQSUB_v, do_gvec_fn3, a, gen_gvec_uqsub_qc) 5706 TRANS(SUQADD_v, do_gvec_fn3, a, gen_gvec_suqadd_qc) 5707 TRANS(USQADD_v, do_gvec_fn3, a, gen_gvec_usqadd_qc) 5708 5709 TRANS(SSHL_v, do_gvec_fn3, a, gen_gvec_sshl) 5710 TRANS(USHL_v, do_gvec_fn3, a, gen_gvec_ushl) 5711 TRANS(SRSHL_v, do_gvec_fn3, a, gen_gvec_srshl) 5712 TRANS(URSHL_v, do_gvec_fn3, a, gen_gvec_urshl) 5713 TRANS(SQSHL_v, do_gvec_fn3, a, gen_neon_sqshl) 5714 TRANS(UQSHL_v, do_gvec_fn3, a, gen_neon_uqshl) 5715 TRANS(SQRSHL_v, do_gvec_fn3, a, gen_neon_sqrshl) 5716 TRANS(UQRSHL_v, do_gvec_fn3, a, gen_neon_uqrshl) 5717 5718 TRANS(ADD_v, do_gvec_fn3, a, tcg_gen_gvec_add) 5719 TRANS(SUB_v, do_gvec_fn3, a, tcg_gen_gvec_sub) 5720 TRANS(SHADD_v, do_gvec_fn3_no64, a, gen_gvec_shadd) 5721 TRANS(UHADD_v, do_gvec_fn3_no64, a, gen_gvec_uhadd) 5722 TRANS(SHSUB_v, do_gvec_fn3_no64, a, gen_gvec_shsub) 5723 TRANS(UHSUB_v, do_gvec_fn3_no64, a, gen_gvec_uhsub) 5724 TRANS(SRHADD_v, do_gvec_fn3_no64, a, gen_gvec_srhadd) 5725 TRANS(URHADD_v, do_gvec_fn3_no64, a, gen_gvec_urhadd) 5726 TRANS(SMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smax) 5727 TRANS(UMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umax) 5728 TRANS(SMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smin) 5729 TRANS(UMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umin) 5730 TRANS(SABA_v, do_gvec_fn3_no64, a, gen_gvec_saba) 5731 TRANS(UABA_v, do_gvec_fn3_no64, a, gen_gvec_uaba) 5732 TRANS(SABD_v, do_gvec_fn3_no64, a, gen_gvec_sabd) 5733 TRANS(UABD_v, do_gvec_fn3_no64, a, gen_gvec_uabd) 5734 TRANS(MUL_v, do_gvec_fn3_no64, a, tcg_gen_gvec_mul) 5735 TRANS(PMUL_v, do_gvec_op3_ool, a, 0, gen_helper_gvec_pmul_b) 5736 TRANS(MLA_v, do_gvec_fn3_no64, a, gen_gvec_mla) 5737 TRANS(MLS_v, do_gvec_fn3_no64, a, gen_gvec_mls) 5738 5739 static bool do_cmop_v(DisasContext *s, arg_qrrr_e *a, TCGCond cond) 5740 { 5741 if (a->esz == MO_64 && !a->q) { 5742 return false; 5743 } 5744 if (fp_access_check(s)) { 5745 tcg_gen_gvec_cmp(cond, a->esz, 5746 vec_full_reg_offset(s, a->rd), 5747 vec_full_reg_offset(s, a->rn), 5748 vec_full_reg_offset(s, a->rm), 5749 a->q ? 16 : 8, vec_full_reg_size(s)); 5750 } 5751 return true; 5752 } 5753 5754 TRANS(CMGT_v, do_cmop_v, a, TCG_COND_GT) 5755 TRANS(CMHI_v, do_cmop_v, a, TCG_COND_GTU) 5756 TRANS(CMGE_v, do_cmop_v, a, TCG_COND_GE) 5757 TRANS(CMHS_v, do_cmop_v, a, TCG_COND_GEU) 5758 TRANS(CMEQ_v, do_cmop_v, a, TCG_COND_EQ) 5759 TRANS(CMTST_v, do_gvec_fn3, a, gen_gvec_cmtst) 5760 5761 TRANS(SQDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqdmulh_qc) 5762 TRANS(SQRDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmulh_qc) 5763 TRANS_FEAT(SQRDMLAH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlah_qc) 5764 TRANS_FEAT(SQRDMLSH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlsh_qc) 5765 5766 static bool do_dot_vector(DisasContext *s, arg_qrrr_e *a, 5767 gen_helper_gvec_4 *fn) 5768 { 5769 if (fp_access_check(s)) { 5770 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn); 5771 } 5772 return true; 5773 } 5774 5775 static bool do_dot_vector_env(DisasContext *s, arg_qrrr_e *a, 5776 gen_helper_gvec_4_ptr *fn) 5777 { 5778 if (fp_access_check(s)) { 5779 gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn); 5780 } 5781 return true; 5782 } 5783 5784 TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_sdot_b) 5785 TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_b) 5786 TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_b) 5787 TRANS_FEAT(BFDOT_v, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfdot) 5788 TRANS_FEAT(BFMMLA, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfmmla) 5789 TRANS_FEAT(SMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_smmla_b) 5790 TRANS_FEAT(UMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_ummla_b) 5791 TRANS_FEAT(USMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usmmla_b) 5792 5793 static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a) 5794 { 5795 if (!dc_isar_feature(aa64_bf16, s)) { 5796 return false; 5797 } 5798 if (fp_access_check(s)) { 5799 /* Q bit selects BFMLALB vs BFMLALT. */ 5800 gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, false, a->q, 5801 gen_helper_gvec_bfmlal); 5802 } 5803 return true; 5804 } 5805 5806 static gen_helper_gvec_3_ptr * const f_vector_fcadd[3] = { 5807 gen_helper_gvec_fcaddh, 5808 gen_helper_gvec_fcadds, 5809 gen_helper_gvec_fcaddd, 5810 }; 5811 TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0, f_vector_fcadd) 5812 TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1, f_vector_fcadd) 5813 5814 static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a) 5815 { 5816 static gen_helper_gvec_4_ptr * const fn[] = { 5817 [MO_16] = gen_helper_gvec_fcmlah, 5818 [MO_32] = gen_helper_gvec_fcmlas, 5819 [MO_64] = gen_helper_gvec_fcmlad, 5820 }; 5821 int check; 5822 5823 if (!dc_isar_feature(aa64_fcma, s)) { 5824 return false; 5825 } 5826 5827 check = fp_access_check_vector_hsd(s, a->q, a->esz); 5828 if (check <= 0) { 5829 return check == 0; 5830 } 5831 5832 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, 5833 a->esz == MO_16, a->rot, fn[a->esz]); 5834 return true; 5835 } 5836 5837 /* 5838 * Widening vector x vector/indexed. 5839 * 5840 * These read from the top or bottom half of a 128-bit vector. 5841 * After widening, optionally accumulate with a 128-bit vector. 5842 * Implement these inline, as the number of elements are limited 5843 * and the related SVE and SME operations on larger vectors use 5844 * even/odd elements instead of top/bottom half. 5845 * 5846 * If idx >= 0, operand 2 is indexed, otherwise vector. 5847 * If acc, operand 0 is loaded with rd. 5848 */ 5849 5850 /* For low half, iterating up. */ 5851 static bool do_3op_widening(DisasContext *s, MemOp memop, int top, 5852 int rd, int rn, int rm, int idx, 5853 NeonGenTwo64OpFn *fn, bool acc) 5854 { 5855 TCGv_i64 tcg_op0 = tcg_temp_new_i64(); 5856 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 5857 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 5858 MemOp esz = memop & MO_SIZE; 5859 int half = 8 >> esz; 5860 int top_swap, top_half; 5861 5862 /* There are no 64x64->128 bit operations. */ 5863 if (esz >= MO_64) { 5864 return false; 5865 } 5866 if (!fp_access_check(s)) { 5867 return true; 5868 } 5869 5870 if (idx >= 0) { 5871 read_vec_element(s, tcg_op2, rm, idx, memop); 5872 } 5873 5874 /* 5875 * For top half inputs, iterate forward; backward for bottom half. 5876 * This means the store to the destination will not occur until 5877 * overlapping input inputs are consumed. 5878 * Use top_swap to conditionally invert the forward iteration index. 5879 */ 5880 top_swap = top ? 0 : half - 1; 5881 top_half = top ? half : 0; 5882 5883 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) { 5884 int elt = elt_fwd ^ top_swap; 5885 5886 read_vec_element(s, tcg_op1, rn, elt + top_half, memop); 5887 if (idx < 0) { 5888 read_vec_element(s, tcg_op2, rm, elt + top_half, memop); 5889 } 5890 if (acc) { 5891 read_vec_element(s, tcg_op0, rd, elt, memop + 1); 5892 } 5893 fn(tcg_op0, tcg_op1, tcg_op2); 5894 write_vec_element(s, tcg_op0, rd, elt, esz + 1); 5895 } 5896 clear_vec_high(s, 1, rd); 5897 return true; 5898 } 5899 5900 static void gen_muladd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5901 { 5902 TCGv_i64 t = tcg_temp_new_i64(); 5903 tcg_gen_mul_i64(t, n, m); 5904 tcg_gen_add_i64(d, d, t); 5905 } 5906 5907 static void gen_mulsub_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5908 { 5909 TCGv_i64 t = tcg_temp_new_i64(); 5910 tcg_gen_mul_i64(t, n, m); 5911 tcg_gen_sub_i64(d, d, t); 5912 } 5913 5914 TRANS(SMULL_v, do_3op_widening, 5915 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 5916 tcg_gen_mul_i64, false) 5917 TRANS(UMULL_v, do_3op_widening, 5918 a->esz, a->q, a->rd, a->rn, a->rm, -1, 5919 tcg_gen_mul_i64, false) 5920 TRANS(SMLAL_v, do_3op_widening, 5921 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 5922 gen_muladd_i64, true) 5923 TRANS(UMLAL_v, do_3op_widening, 5924 a->esz, a->q, a->rd, a->rn, a->rm, -1, 5925 gen_muladd_i64, true) 5926 TRANS(SMLSL_v, do_3op_widening, 5927 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 5928 gen_mulsub_i64, true) 5929 TRANS(UMLSL_v, do_3op_widening, 5930 a->esz, a->q, a->rd, a->rn, a->rm, -1, 5931 gen_mulsub_i64, true) 5932 5933 TRANS(SMULL_vi, do_3op_widening, 5934 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 5935 tcg_gen_mul_i64, false) 5936 TRANS(UMULL_vi, do_3op_widening, 5937 a->esz, a->q, a->rd, a->rn, a->rm, a->idx, 5938 tcg_gen_mul_i64, false) 5939 TRANS(SMLAL_vi, do_3op_widening, 5940 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 5941 gen_muladd_i64, true) 5942 TRANS(UMLAL_vi, do_3op_widening, 5943 a->esz, a->q, a->rd, a->rn, a->rm, a->idx, 5944 gen_muladd_i64, true) 5945 TRANS(SMLSL_vi, do_3op_widening, 5946 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 5947 gen_mulsub_i64, true) 5948 TRANS(UMLSL_vi, do_3op_widening, 5949 a->esz, a->q, a->rd, a->rn, a->rm, a->idx, 5950 gen_mulsub_i64, true) 5951 5952 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5953 { 5954 TCGv_i64 t1 = tcg_temp_new_i64(); 5955 TCGv_i64 t2 = tcg_temp_new_i64(); 5956 5957 tcg_gen_sub_i64(t1, n, m); 5958 tcg_gen_sub_i64(t2, m, n); 5959 tcg_gen_movcond_i64(TCG_COND_GE, d, n, m, t1, t2); 5960 } 5961 5962 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5963 { 5964 TCGv_i64 t1 = tcg_temp_new_i64(); 5965 TCGv_i64 t2 = tcg_temp_new_i64(); 5966 5967 tcg_gen_sub_i64(t1, n, m); 5968 tcg_gen_sub_i64(t2, m, n); 5969 tcg_gen_movcond_i64(TCG_COND_GEU, d, n, m, t1, t2); 5970 } 5971 5972 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5973 { 5974 TCGv_i64 t = tcg_temp_new_i64(); 5975 gen_sabd_i64(t, n, m); 5976 tcg_gen_add_i64(d, d, t); 5977 } 5978 5979 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5980 { 5981 TCGv_i64 t = tcg_temp_new_i64(); 5982 gen_uabd_i64(t, n, m); 5983 tcg_gen_add_i64(d, d, t); 5984 } 5985 5986 TRANS(SADDL_v, do_3op_widening, 5987 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 5988 tcg_gen_add_i64, false) 5989 TRANS(UADDL_v, do_3op_widening, 5990 a->esz, a->q, a->rd, a->rn, a->rm, -1, 5991 tcg_gen_add_i64, false) 5992 TRANS(SSUBL_v, do_3op_widening, 5993 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 5994 tcg_gen_sub_i64, false) 5995 TRANS(USUBL_v, do_3op_widening, 5996 a->esz, a->q, a->rd, a->rn, a->rm, -1, 5997 tcg_gen_sub_i64, false) 5998 TRANS(SABDL_v, do_3op_widening, 5999 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6000 gen_sabd_i64, false) 6001 TRANS(UABDL_v, do_3op_widening, 6002 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6003 gen_uabd_i64, false) 6004 TRANS(SABAL_v, do_3op_widening, 6005 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6006 gen_saba_i64, true) 6007 TRANS(UABAL_v, do_3op_widening, 6008 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6009 gen_uaba_i64, true) 6010 6011 static void gen_sqdmull_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6012 { 6013 tcg_gen_mul_i64(d, n, m); 6014 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, d); 6015 } 6016 6017 static void gen_sqdmull_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6018 { 6019 tcg_gen_mul_i64(d, n, m); 6020 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, d); 6021 } 6022 6023 static void gen_sqdmlal_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6024 { 6025 TCGv_i64 t = tcg_temp_new_i64(); 6026 6027 tcg_gen_mul_i64(t, n, m); 6028 gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t); 6029 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t); 6030 } 6031 6032 static void gen_sqdmlal_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6033 { 6034 TCGv_i64 t = tcg_temp_new_i64(); 6035 6036 tcg_gen_mul_i64(t, n, m); 6037 gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t); 6038 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t); 6039 } 6040 6041 static void gen_sqdmlsl_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6042 { 6043 TCGv_i64 t = tcg_temp_new_i64(); 6044 6045 tcg_gen_mul_i64(t, n, m); 6046 gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t); 6047 tcg_gen_neg_i64(t, t); 6048 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t); 6049 } 6050 6051 static void gen_sqdmlsl_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6052 { 6053 TCGv_i64 t = tcg_temp_new_i64(); 6054 6055 tcg_gen_mul_i64(t, n, m); 6056 gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t); 6057 tcg_gen_neg_i64(t, t); 6058 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t); 6059 } 6060 6061 TRANS(SQDMULL_v, do_3op_widening, 6062 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6063 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false) 6064 TRANS(SQDMLAL_v, do_3op_widening, 6065 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6066 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true) 6067 TRANS(SQDMLSL_v, do_3op_widening, 6068 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6069 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true) 6070 6071 TRANS(SQDMULL_vi, do_3op_widening, 6072 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6073 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false) 6074 TRANS(SQDMLAL_vi, do_3op_widening, 6075 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6076 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true) 6077 TRANS(SQDMLSL_vi, do_3op_widening, 6078 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6079 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true) 6080 6081 static bool do_addsub_wide(DisasContext *s, arg_qrrr_e *a, 6082 MemOp sign, bool sub) 6083 { 6084 TCGv_i64 tcg_op0, tcg_op1; 6085 MemOp esz = a->esz; 6086 int half = 8 >> esz; 6087 bool top = a->q; 6088 int top_swap = top ? 0 : half - 1; 6089 int top_half = top ? half : 0; 6090 6091 /* There are no 64x64->128 bit operations. */ 6092 if (esz >= MO_64) { 6093 return false; 6094 } 6095 if (!fp_access_check(s)) { 6096 return true; 6097 } 6098 tcg_op0 = tcg_temp_new_i64(); 6099 tcg_op1 = tcg_temp_new_i64(); 6100 6101 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) { 6102 int elt = elt_fwd ^ top_swap; 6103 6104 read_vec_element(s, tcg_op1, a->rm, elt + top_half, esz | sign); 6105 read_vec_element(s, tcg_op0, a->rn, elt, esz + 1); 6106 if (sub) { 6107 tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1); 6108 } else { 6109 tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1); 6110 } 6111 write_vec_element(s, tcg_op0, a->rd, elt, esz + 1); 6112 } 6113 clear_vec_high(s, 1, a->rd); 6114 return true; 6115 } 6116 6117 TRANS(SADDW, do_addsub_wide, a, MO_SIGN, false) 6118 TRANS(UADDW, do_addsub_wide, a, 0, false) 6119 TRANS(SSUBW, do_addsub_wide, a, MO_SIGN, true) 6120 TRANS(USUBW, do_addsub_wide, a, 0, true) 6121 6122 static bool do_addsub_highnarrow(DisasContext *s, arg_qrrr_e *a, 6123 bool sub, bool round) 6124 { 6125 TCGv_i64 tcg_op0, tcg_op1; 6126 MemOp esz = a->esz; 6127 int half = 8 >> esz; 6128 bool top = a->q; 6129 int ebits = 8 << esz; 6130 uint64_t rbit = 1ull << (ebits - 1); 6131 int top_swap, top_half; 6132 6133 /* There are no 128x128->64 bit operations. */ 6134 if (esz >= MO_64) { 6135 return false; 6136 } 6137 if (!fp_access_check(s)) { 6138 return true; 6139 } 6140 tcg_op0 = tcg_temp_new_i64(); 6141 tcg_op1 = tcg_temp_new_i64(); 6142 6143 /* 6144 * For top half inputs, iterate backward; forward for bottom half. 6145 * This means the store to the destination will not occur until 6146 * overlapping input inputs are consumed. 6147 */ 6148 top_swap = top ? half - 1 : 0; 6149 top_half = top ? half : 0; 6150 6151 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) { 6152 int elt = elt_fwd ^ top_swap; 6153 6154 read_vec_element(s, tcg_op1, a->rm, elt, esz + 1); 6155 read_vec_element(s, tcg_op0, a->rn, elt, esz + 1); 6156 if (sub) { 6157 tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1); 6158 } else { 6159 tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1); 6160 } 6161 if (round) { 6162 tcg_gen_addi_i64(tcg_op0, tcg_op0, rbit); 6163 } 6164 tcg_gen_shri_i64(tcg_op0, tcg_op0, ebits); 6165 write_vec_element(s, tcg_op0, a->rd, elt + top_half, esz); 6166 } 6167 clear_vec_high(s, top, a->rd); 6168 return true; 6169 } 6170 6171 TRANS(ADDHN, do_addsub_highnarrow, a, false, false) 6172 TRANS(SUBHN, do_addsub_highnarrow, a, true, false) 6173 TRANS(RADDHN, do_addsub_highnarrow, a, false, true) 6174 TRANS(RSUBHN, do_addsub_highnarrow, a, true, true) 6175 6176 static bool do_pmull(DisasContext *s, arg_qrrr_e *a, gen_helper_gvec_3 *fn) 6177 { 6178 if (fp_access_check(s)) { 6179 /* The Q field specifies lo/hi half input for these insns. */ 6180 gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->q, fn); 6181 } 6182 return true; 6183 } 6184 6185 TRANS(PMULL_p8, do_pmull, a, gen_helper_neon_pmull_h) 6186 TRANS_FEAT(PMULL_p64, aa64_pmull, do_pmull, a, gen_helper_gvec_pmull_q) 6187 6188 /* 6189 * Advanced SIMD scalar/vector x indexed element 6190 */ 6191 6192 static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f) 6193 { 6194 switch (a->esz) { 6195 case MO_64: 6196 if (fp_access_check(s)) { 6197 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 6198 TCGv_i64 t1 = tcg_temp_new_i64(); 6199 6200 read_vec_element(s, t1, a->rm, a->idx, MO_64); 6201 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR)); 6202 write_fp_dreg(s, a->rd, t0); 6203 } 6204 break; 6205 case MO_32: 6206 if (fp_access_check(s)) { 6207 TCGv_i32 t0 = read_fp_sreg(s, a->rn); 6208 TCGv_i32 t1 = tcg_temp_new_i32(); 6209 6210 read_vec_element_i32(s, t1, a->rm, a->idx, MO_32); 6211 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR)); 6212 write_fp_sreg(s, a->rd, t0); 6213 } 6214 break; 6215 case MO_16: 6216 if (!dc_isar_feature(aa64_fp16, s)) { 6217 return false; 6218 } 6219 if (fp_access_check(s)) { 6220 TCGv_i32 t0 = read_fp_hreg(s, a->rn); 6221 TCGv_i32 t1 = tcg_temp_new_i32(); 6222 6223 read_vec_element_i32(s, t1, a->rm, a->idx, MO_16); 6224 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16)); 6225 write_fp_sreg(s, a->rd, t0); 6226 } 6227 break; 6228 default: 6229 g_assert_not_reached(); 6230 } 6231 return true; 6232 } 6233 6234 TRANS(FMUL_si, do_fp3_scalar_idx, a, &f_scalar_fmul) 6235 TRANS(FMULX_si, do_fp3_scalar_idx, a, &f_scalar_fmulx) 6236 6237 static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) 6238 { 6239 switch (a->esz) { 6240 case MO_64: 6241 if (fp_access_check(s)) { 6242 TCGv_i64 t0 = read_fp_dreg(s, a->rd); 6243 TCGv_i64 t1 = read_fp_dreg(s, a->rn); 6244 TCGv_i64 t2 = tcg_temp_new_i64(); 6245 6246 read_vec_element(s, t2, a->rm, a->idx, MO_64); 6247 if (neg) { 6248 gen_vfp_negd(t1, t1); 6249 } 6250 gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_FPCR)); 6251 write_fp_dreg(s, a->rd, t0); 6252 } 6253 break; 6254 case MO_32: 6255 if (fp_access_check(s)) { 6256 TCGv_i32 t0 = read_fp_sreg(s, a->rd); 6257 TCGv_i32 t1 = read_fp_sreg(s, a->rn); 6258 TCGv_i32 t2 = tcg_temp_new_i32(); 6259 6260 read_vec_element_i32(s, t2, a->rm, a->idx, MO_32); 6261 if (neg) { 6262 gen_vfp_negs(t1, t1); 6263 } 6264 gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_FPCR)); 6265 write_fp_sreg(s, a->rd, t0); 6266 } 6267 break; 6268 case MO_16: 6269 if (!dc_isar_feature(aa64_fp16, s)) { 6270 return false; 6271 } 6272 if (fp_access_check(s)) { 6273 TCGv_i32 t0 = read_fp_hreg(s, a->rd); 6274 TCGv_i32 t1 = read_fp_hreg(s, a->rn); 6275 TCGv_i32 t2 = tcg_temp_new_i32(); 6276 6277 read_vec_element_i32(s, t2, a->rm, a->idx, MO_16); 6278 if (neg) { 6279 gen_vfp_negh(t1, t1); 6280 } 6281 gen_helper_advsimd_muladdh(t0, t1, t2, t0, 6282 fpstatus_ptr(FPST_FPCR_F16)); 6283 write_fp_sreg(s, a->rd, t0); 6284 } 6285 break; 6286 default: 6287 g_assert_not_reached(); 6288 } 6289 return true; 6290 } 6291 6292 TRANS(FMLA_si, do_fmla_scalar_idx, a, false) 6293 TRANS(FMLS_si, do_fmla_scalar_idx, a, true) 6294 6295 static bool do_env_scalar2_idx_hs(DisasContext *s, arg_rrx_e *a, 6296 const ENVScalar2 *f) 6297 { 6298 if (a->esz < MO_16 || a->esz > MO_32) { 6299 return false; 6300 } 6301 if (fp_access_check(s)) { 6302 TCGv_i32 t0 = tcg_temp_new_i32(); 6303 TCGv_i32 t1 = tcg_temp_new_i32(); 6304 6305 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 6306 read_vec_element_i32(s, t1, a->rm, a->idx, a->esz); 6307 f->gen_bhs[a->esz](t0, tcg_env, t0, t1); 6308 write_fp_sreg(s, a->rd, t0); 6309 } 6310 return true; 6311 } 6312 6313 TRANS(SQDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqdmulh) 6314 TRANS(SQRDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqrdmulh) 6315 6316 static bool do_env_scalar3_idx_hs(DisasContext *s, arg_rrx_e *a, 6317 const ENVScalar3 *f) 6318 { 6319 if (a->esz < MO_16 || a->esz > MO_32) { 6320 return false; 6321 } 6322 if (fp_access_check(s)) { 6323 TCGv_i32 t0 = tcg_temp_new_i32(); 6324 TCGv_i32 t1 = tcg_temp_new_i32(); 6325 TCGv_i32 t2 = tcg_temp_new_i32(); 6326 6327 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 6328 read_vec_element_i32(s, t1, a->rm, a->idx, a->esz); 6329 read_vec_element_i32(s, t2, a->rd, 0, a->esz); 6330 f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2); 6331 write_fp_sreg(s, a->rd, t0); 6332 } 6333 return true; 6334 } 6335 6336 TRANS_FEAT(SQRDMLAH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlah) 6337 TRANS_FEAT(SQRDMLSH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlsh) 6338 6339 static bool do_scalar_muladd_widening_idx(DisasContext *s, arg_rrx_e *a, 6340 NeonGenTwo64OpFn *fn, bool acc) 6341 { 6342 if (fp_access_check(s)) { 6343 TCGv_i64 t0 = tcg_temp_new_i64(); 6344 TCGv_i64 t1 = tcg_temp_new_i64(); 6345 TCGv_i64 t2 = tcg_temp_new_i64(); 6346 6347 if (acc) { 6348 read_vec_element(s, t0, a->rd, 0, a->esz + 1); 6349 } 6350 read_vec_element(s, t1, a->rn, 0, a->esz | MO_SIGN); 6351 read_vec_element(s, t2, a->rm, a->idx, a->esz | MO_SIGN); 6352 fn(t0, t1, t2); 6353 6354 /* Clear the whole register first, then store scalar. */ 6355 clear_vec(s, a->rd); 6356 write_vec_element(s, t0, a->rd, 0, a->esz + 1); 6357 } 6358 return true; 6359 } 6360 6361 TRANS(SQDMULL_si, do_scalar_muladd_widening_idx, a, 6362 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false) 6363 TRANS(SQDMLAL_si, do_scalar_muladd_widening_idx, a, 6364 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true) 6365 TRANS(SQDMLSL_si, do_scalar_muladd_widening_idx, a, 6366 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true) 6367 6368 static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a, 6369 gen_helper_gvec_3_ptr * const fns[3]) 6370 { 6371 MemOp esz = a->esz; 6372 int check = fp_access_check_vector_hsd(s, a->q, esz); 6373 6374 if (check <= 0) { 6375 return check == 0; 6376 } 6377 6378 gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, 6379 esz == MO_16, a->idx, fns[esz - 1]); 6380 return true; 6381 } 6382 6383 static gen_helper_gvec_3_ptr * const f_vector_idx_fmul[3] = { 6384 gen_helper_gvec_fmul_idx_h, 6385 gen_helper_gvec_fmul_idx_s, 6386 gen_helper_gvec_fmul_idx_d, 6387 }; 6388 TRANS(FMUL_vi, do_fp3_vector_idx, a, f_vector_idx_fmul) 6389 6390 static gen_helper_gvec_3_ptr * const f_vector_idx_fmulx[3] = { 6391 gen_helper_gvec_fmulx_idx_h, 6392 gen_helper_gvec_fmulx_idx_s, 6393 gen_helper_gvec_fmulx_idx_d, 6394 }; 6395 TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx) 6396 6397 static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg) 6398 { 6399 static gen_helper_gvec_4_ptr * const fns[3] = { 6400 gen_helper_gvec_fmla_idx_h, 6401 gen_helper_gvec_fmla_idx_s, 6402 gen_helper_gvec_fmla_idx_d, 6403 }; 6404 MemOp esz = a->esz; 6405 int check = fp_access_check_vector_hsd(s, a->q, esz); 6406 6407 if (check <= 0) { 6408 return check == 0; 6409 } 6410 6411 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, 6412 esz == MO_16, (a->idx << 1) | neg, 6413 fns[esz - 1]); 6414 return true; 6415 } 6416 6417 TRANS(FMLA_vi, do_fmla_vector_idx, a, false) 6418 TRANS(FMLS_vi, do_fmla_vector_idx, a, true) 6419 6420 static bool do_fmlal_idx(DisasContext *s, arg_qrrx_e *a, bool is_s, bool is_2) 6421 { 6422 if (fp_access_check(s)) { 6423 int data = (a->idx << 2) | (is_2 << 1) | is_s; 6424 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), 6425 vec_full_reg_offset(s, a->rn), 6426 vec_full_reg_offset(s, a->rm), tcg_env, 6427 a->q ? 16 : 8, vec_full_reg_size(s), 6428 data, gen_helper_gvec_fmlal_idx_a64); 6429 } 6430 return true; 6431 } 6432 6433 TRANS_FEAT(FMLAL_vi, aa64_fhm, do_fmlal_idx, a, false, false) 6434 TRANS_FEAT(FMLSL_vi, aa64_fhm, do_fmlal_idx, a, true, false) 6435 TRANS_FEAT(FMLAL2_vi, aa64_fhm, do_fmlal_idx, a, false, true) 6436 TRANS_FEAT(FMLSL2_vi, aa64_fhm, do_fmlal_idx, a, true, true) 6437 6438 static bool do_int3_vector_idx(DisasContext *s, arg_qrrx_e *a, 6439 gen_helper_gvec_3 * const fns[2]) 6440 { 6441 assert(a->esz == MO_16 || a->esz == MO_32); 6442 if (fp_access_check(s)) { 6443 gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, a->idx, fns[a->esz - 1]); 6444 } 6445 return true; 6446 } 6447 6448 static gen_helper_gvec_3 * const f_vector_idx_mul[2] = { 6449 gen_helper_gvec_mul_idx_h, 6450 gen_helper_gvec_mul_idx_s, 6451 }; 6452 TRANS(MUL_vi, do_int3_vector_idx, a, f_vector_idx_mul) 6453 6454 static bool do_mla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool sub) 6455 { 6456 static gen_helper_gvec_4 * const fns[2][2] = { 6457 { gen_helper_gvec_mla_idx_h, gen_helper_gvec_mls_idx_h }, 6458 { gen_helper_gvec_mla_idx_s, gen_helper_gvec_mls_idx_s }, 6459 }; 6460 6461 assert(a->esz == MO_16 || a->esz == MO_32); 6462 if (fp_access_check(s)) { 6463 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, 6464 a->idx, fns[a->esz - 1][sub]); 6465 } 6466 return true; 6467 } 6468 6469 TRANS(MLA_vi, do_mla_vector_idx, a, false) 6470 TRANS(MLS_vi, do_mla_vector_idx, a, true) 6471 6472 static bool do_int3_qc_vector_idx(DisasContext *s, arg_qrrx_e *a, 6473 gen_helper_gvec_4 * const fns[2]) 6474 { 6475 assert(a->esz == MO_16 || a->esz == MO_32); 6476 if (fp_access_check(s)) { 6477 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd), 6478 vec_full_reg_offset(s, a->rn), 6479 vec_full_reg_offset(s, a->rm), 6480 offsetof(CPUARMState, vfp.qc), 6481 a->q ? 16 : 8, vec_full_reg_size(s), 6482 a->idx, fns[a->esz - 1]); 6483 } 6484 return true; 6485 } 6486 6487 static gen_helper_gvec_4 * const f_vector_idx_sqdmulh[2] = { 6488 gen_helper_neon_sqdmulh_idx_h, 6489 gen_helper_neon_sqdmulh_idx_s, 6490 }; 6491 TRANS(SQDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqdmulh) 6492 6493 static gen_helper_gvec_4 * const f_vector_idx_sqrdmulh[2] = { 6494 gen_helper_neon_sqrdmulh_idx_h, 6495 gen_helper_neon_sqrdmulh_idx_s, 6496 }; 6497 TRANS(SQRDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqrdmulh) 6498 6499 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlah[2] = { 6500 gen_helper_neon_sqrdmlah_idx_h, 6501 gen_helper_neon_sqrdmlah_idx_s, 6502 }; 6503 TRANS_FEAT(SQRDMLAH_vi, aa64_rdm, do_int3_qc_vector_idx, a, 6504 f_vector_idx_sqrdmlah) 6505 6506 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlsh[2] = { 6507 gen_helper_neon_sqrdmlsh_idx_h, 6508 gen_helper_neon_sqrdmlsh_idx_s, 6509 }; 6510 TRANS_FEAT(SQRDMLSH_vi, aa64_rdm, do_int3_qc_vector_idx, a, 6511 f_vector_idx_sqrdmlsh) 6512 6513 static bool do_dot_vector_idx(DisasContext *s, arg_qrrx_e *a, 6514 gen_helper_gvec_4 *fn) 6515 { 6516 if (fp_access_check(s)) { 6517 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn); 6518 } 6519 return true; 6520 } 6521 6522 static bool do_dot_vector_idx_env(DisasContext *s, arg_qrrx_e *a, 6523 gen_helper_gvec_4_ptr *fn) 6524 { 6525 if (fp_access_check(s)) { 6526 gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn); 6527 } 6528 return true; 6529 } 6530 6531 TRANS_FEAT(SDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_sdot_idx_b) 6532 TRANS_FEAT(UDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_udot_idx_b) 6533 TRANS_FEAT(SUDOT_vi, aa64_i8mm, do_dot_vector_idx, a, 6534 gen_helper_gvec_sudot_idx_b) 6535 TRANS_FEAT(USDOT_vi, aa64_i8mm, do_dot_vector_idx, a, 6536 gen_helper_gvec_usdot_idx_b) 6537 TRANS_FEAT(BFDOT_vi, aa64_bf16, do_dot_vector_idx_env, a, 6538 gen_helper_gvec_bfdot_idx) 6539 6540 static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a) 6541 { 6542 if (!dc_isar_feature(aa64_bf16, s)) { 6543 return false; 6544 } 6545 if (fp_access_check(s)) { 6546 /* Q bit selects BFMLALB vs BFMLALT. */ 6547 gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, 0, 6548 (a->idx << 1) | a->q, 6549 gen_helper_gvec_bfmlal_idx); 6550 } 6551 return true; 6552 } 6553 6554 static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a) 6555 { 6556 gen_helper_gvec_4_ptr *fn; 6557 6558 if (!dc_isar_feature(aa64_fcma, s)) { 6559 return false; 6560 } 6561 switch (a->esz) { 6562 case MO_16: 6563 if (!dc_isar_feature(aa64_fp16, s)) { 6564 return false; 6565 } 6566 fn = gen_helper_gvec_fcmlah_idx; 6567 break; 6568 case MO_32: 6569 fn = gen_helper_gvec_fcmlas_idx; 6570 break; 6571 default: 6572 g_assert_not_reached(); 6573 } 6574 if (fp_access_check(s)) { 6575 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, 6576 a->esz == MO_16, (a->idx << 2) | a->rot, fn); 6577 } 6578 return true; 6579 } 6580 6581 /* 6582 * Advanced SIMD scalar pairwise 6583 */ 6584 6585 static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f) 6586 { 6587 switch (a->esz) { 6588 case MO_64: 6589 if (fp_access_check(s)) { 6590 TCGv_i64 t0 = tcg_temp_new_i64(); 6591 TCGv_i64 t1 = tcg_temp_new_i64(); 6592 6593 read_vec_element(s, t0, a->rn, 0, MO_64); 6594 read_vec_element(s, t1, a->rn, 1, MO_64); 6595 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR)); 6596 write_fp_dreg(s, a->rd, t0); 6597 } 6598 break; 6599 case MO_32: 6600 if (fp_access_check(s)) { 6601 TCGv_i32 t0 = tcg_temp_new_i32(); 6602 TCGv_i32 t1 = tcg_temp_new_i32(); 6603 6604 read_vec_element_i32(s, t0, a->rn, 0, MO_32); 6605 read_vec_element_i32(s, t1, a->rn, 1, MO_32); 6606 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR)); 6607 write_fp_sreg(s, a->rd, t0); 6608 } 6609 break; 6610 case MO_16: 6611 if (!dc_isar_feature(aa64_fp16, s)) { 6612 return false; 6613 } 6614 if (fp_access_check(s)) { 6615 TCGv_i32 t0 = tcg_temp_new_i32(); 6616 TCGv_i32 t1 = tcg_temp_new_i32(); 6617 6618 read_vec_element_i32(s, t0, a->rn, 0, MO_16); 6619 read_vec_element_i32(s, t1, a->rn, 1, MO_16); 6620 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16)); 6621 write_fp_sreg(s, a->rd, t0); 6622 } 6623 break; 6624 default: 6625 g_assert_not_reached(); 6626 } 6627 return true; 6628 } 6629 6630 TRANS(FADDP_s, do_fp3_scalar_pair, a, &f_scalar_fadd) 6631 TRANS(FMAXP_s, do_fp3_scalar_pair, a, &f_scalar_fmax) 6632 TRANS(FMINP_s, do_fp3_scalar_pair, a, &f_scalar_fmin) 6633 TRANS(FMAXNMP_s, do_fp3_scalar_pair, a, &f_scalar_fmaxnm) 6634 TRANS(FMINNMP_s, do_fp3_scalar_pair, a, &f_scalar_fminnm) 6635 6636 static bool trans_ADDP_s(DisasContext *s, arg_rr_e *a) 6637 { 6638 if (fp_access_check(s)) { 6639 TCGv_i64 t0 = tcg_temp_new_i64(); 6640 TCGv_i64 t1 = tcg_temp_new_i64(); 6641 6642 read_vec_element(s, t0, a->rn, 0, MO_64); 6643 read_vec_element(s, t1, a->rn, 1, MO_64); 6644 tcg_gen_add_i64(t0, t0, t1); 6645 write_fp_dreg(s, a->rd, t0); 6646 } 6647 return true; 6648 } 6649 6650 /* 6651 * Floating-point conditional select 6652 */ 6653 6654 static bool trans_FCSEL(DisasContext *s, arg_FCSEL *a) 6655 { 6656 TCGv_i64 t_true, t_false; 6657 DisasCompare64 c; 6658 int check = fp_access_check_scalar_hsd(s, a->esz); 6659 6660 if (check <= 0) { 6661 return check == 0; 6662 } 6663 6664 /* Zero extend sreg & hreg inputs to 64 bits now. */ 6665 t_true = tcg_temp_new_i64(); 6666 t_false = tcg_temp_new_i64(); 6667 read_vec_element(s, t_true, a->rn, 0, a->esz); 6668 read_vec_element(s, t_false, a->rm, 0, a->esz); 6669 6670 a64_test_cc(&c, a->cond); 6671 tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0), 6672 t_true, t_false); 6673 6674 /* 6675 * Note that sregs & hregs write back zeros to the high bits, 6676 * and we've already done the zero-extension. 6677 */ 6678 write_fp_dreg(s, a->rd, t_true); 6679 return true; 6680 } 6681 6682 /* 6683 * Advanced SIMD Extract 6684 */ 6685 6686 static bool trans_EXT_d(DisasContext *s, arg_EXT_d *a) 6687 { 6688 if (fp_access_check(s)) { 6689 TCGv_i64 lo = read_fp_dreg(s, a->rn); 6690 if (a->imm != 0) { 6691 TCGv_i64 hi = read_fp_dreg(s, a->rm); 6692 tcg_gen_extract2_i64(lo, lo, hi, a->imm * 8); 6693 } 6694 write_fp_dreg(s, a->rd, lo); 6695 } 6696 return true; 6697 } 6698 6699 static bool trans_EXT_q(DisasContext *s, arg_EXT_q *a) 6700 { 6701 TCGv_i64 lo, hi; 6702 int pos = (a->imm & 7) * 8; 6703 int elt = a->imm >> 3; 6704 6705 if (!fp_access_check(s)) { 6706 return true; 6707 } 6708 6709 lo = tcg_temp_new_i64(); 6710 hi = tcg_temp_new_i64(); 6711 6712 read_vec_element(s, lo, a->rn, elt, MO_64); 6713 elt++; 6714 read_vec_element(s, hi, elt & 2 ? a->rm : a->rn, elt & 1, MO_64); 6715 elt++; 6716 6717 if (pos != 0) { 6718 TCGv_i64 hh = tcg_temp_new_i64(); 6719 tcg_gen_extract2_i64(lo, lo, hi, pos); 6720 read_vec_element(s, hh, a->rm, elt & 1, MO_64); 6721 tcg_gen_extract2_i64(hi, hi, hh, pos); 6722 } 6723 6724 write_vec_element(s, lo, a->rd, 0, MO_64); 6725 write_vec_element(s, hi, a->rd, 1, MO_64); 6726 clear_vec_high(s, true, a->rd); 6727 return true; 6728 } 6729 6730 /* 6731 * Floating-point data-processing (3 source) 6732 */ 6733 6734 static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) 6735 { 6736 TCGv_ptr fpst; 6737 6738 /* 6739 * These are fused multiply-add. Note that doing the negations here 6740 * as separate steps is correct: an input NaN should come out with 6741 * its sign bit flipped if it is a negated-input. 6742 */ 6743 switch (a->esz) { 6744 case MO_64: 6745 if (fp_access_check(s)) { 6746 TCGv_i64 tn = read_fp_dreg(s, a->rn); 6747 TCGv_i64 tm = read_fp_dreg(s, a->rm); 6748 TCGv_i64 ta = read_fp_dreg(s, a->ra); 6749 6750 if (neg_a) { 6751 gen_vfp_negd(ta, ta); 6752 } 6753 if (neg_n) { 6754 gen_vfp_negd(tn, tn); 6755 } 6756 fpst = fpstatus_ptr(FPST_FPCR); 6757 gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst); 6758 write_fp_dreg(s, a->rd, ta); 6759 } 6760 break; 6761 6762 case MO_32: 6763 if (fp_access_check(s)) { 6764 TCGv_i32 tn = read_fp_sreg(s, a->rn); 6765 TCGv_i32 tm = read_fp_sreg(s, a->rm); 6766 TCGv_i32 ta = read_fp_sreg(s, a->ra); 6767 6768 if (neg_a) { 6769 gen_vfp_negs(ta, ta); 6770 } 6771 if (neg_n) { 6772 gen_vfp_negs(tn, tn); 6773 } 6774 fpst = fpstatus_ptr(FPST_FPCR); 6775 gen_helper_vfp_muladds(ta, tn, tm, ta, fpst); 6776 write_fp_sreg(s, a->rd, ta); 6777 } 6778 break; 6779 6780 case MO_16: 6781 if (!dc_isar_feature(aa64_fp16, s)) { 6782 return false; 6783 } 6784 if (fp_access_check(s)) { 6785 TCGv_i32 tn = read_fp_hreg(s, a->rn); 6786 TCGv_i32 tm = read_fp_hreg(s, a->rm); 6787 TCGv_i32 ta = read_fp_hreg(s, a->ra); 6788 6789 if (neg_a) { 6790 gen_vfp_negh(ta, ta); 6791 } 6792 if (neg_n) { 6793 gen_vfp_negh(tn, tn); 6794 } 6795 fpst = fpstatus_ptr(FPST_FPCR_F16); 6796 gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst); 6797 write_fp_sreg(s, a->rd, ta); 6798 } 6799 break; 6800 6801 default: 6802 return false; 6803 } 6804 return true; 6805 } 6806 6807 TRANS(FMADD, do_fmadd, a, false, false) 6808 TRANS(FNMADD, do_fmadd, a, true, true) 6809 TRANS(FMSUB, do_fmadd, a, false, true) 6810 TRANS(FNMSUB, do_fmadd, a, true, false) 6811 6812 /* 6813 * Advanced SIMD Across Lanes 6814 */ 6815 6816 static bool do_int_reduction(DisasContext *s, arg_qrr_e *a, bool widen, 6817 MemOp src_sign, NeonGenTwo64OpFn *fn) 6818 { 6819 TCGv_i64 tcg_res, tcg_elt; 6820 MemOp src_mop = a->esz | src_sign; 6821 int elements = (a->q ? 16 : 8) >> a->esz; 6822 6823 /* Reject MO_64, and MO_32 without Q: a minimum of 4 elements. */ 6824 if (elements < 4) { 6825 return false; 6826 } 6827 if (!fp_access_check(s)) { 6828 return true; 6829 } 6830 6831 tcg_res = tcg_temp_new_i64(); 6832 tcg_elt = tcg_temp_new_i64(); 6833 6834 read_vec_element(s, tcg_res, a->rn, 0, src_mop); 6835 for (int i = 1; i < elements; i++) { 6836 read_vec_element(s, tcg_elt, a->rn, i, src_mop); 6837 fn(tcg_res, tcg_res, tcg_elt); 6838 } 6839 6840 tcg_gen_ext_i64(tcg_res, tcg_res, a->esz + widen); 6841 write_fp_dreg(s, a->rd, tcg_res); 6842 return true; 6843 } 6844 6845 TRANS(ADDV, do_int_reduction, a, false, 0, tcg_gen_add_i64) 6846 TRANS(SADDLV, do_int_reduction, a, true, MO_SIGN, tcg_gen_add_i64) 6847 TRANS(UADDLV, do_int_reduction, a, true, 0, tcg_gen_add_i64) 6848 TRANS(SMAXV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smax_i64) 6849 TRANS(UMAXV, do_int_reduction, a, false, 0, tcg_gen_umax_i64) 6850 TRANS(SMINV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smin_i64) 6851 TRANS(UMINV, do_int_reduction, a, false, 0, tcg_gen_umin_i64) 6852 6853 /* 6854 * do_fp_reduction helper 6855 * 6856 * This mirrors the Reduce() pseudocode in the ARM ARM. It is 6857 * important for correct NaN propagation that we do these 6858 * operations in exactly the order specified by the pseudocode. 6859 * 6860 * This is a recursive function. 6861 */ 6862 static TCGv_i32 do_reduction_op(DisasContext *s, int rn, MemOp esz, 6863 int ebase, int ecount, TCGv_ptr fpst, 6864 NeonGenTwoSingleOpFn *fn) 6865 { 6866 if (ecount == 1) { 6867 TCGv_i32 tcg_elem = tcg_temp_new_i32(); 6868 read_vec_element_i32(s, tcg_elem, rn, ebase, esz); 6869 return tcg_elem; 6870 } else { 6871 int half = ecount >> 1; 6872 TCGv_i32 tcg_hi, tcg_lo, tcg_res; 6873 6874 tcg_hi = do_reduction_op(s, rn, esz, ebase + half, half, fpst, fn); 6875 tcg_lo = do_reduction_op(s, rn, esz, ebase, half, fpst, fn); 6876 tcg_res = tcg_temp_new_i32(); 6877 6878 fn(tcg_res, tcg_lo, tcg_hi, fpst); 6879 return tcg_res; 6880 } 6881 } 6882 6883 static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a, 6884 NeonGenTwoSingleOpFn *fn) 6885 { 6886 if (fp_access_check(s)) { 6887 MemOp esz = a->esz; 6888 int elts = (a->q ? 16 : 8) >> esz; 6889 TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 6890 TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst, fn); 6891 write_fp_sreg(s, a->rd, res); 6892 } 6893 return true; 6894 } 6895 6896 TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_maxnumh) 6897 TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_minnumh) 6898 TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_maxh) 6899 TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_minh) 6900 6901 TRANS(FMAXNMV_s, do_fp_reduction, a, gen_helper_vfp_maxnums) 6902 TRANS(FMINNMV_s, do_fp_reduction, a, gen_helper_vfp_minnums) 6903 TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs) 6904 TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins) 6905 6906 /* 6907 * Floating-point Immediate 6908 */ 6909 6910 static bool trans_FMOVI_s(DisasContext *s, arg_FMOVI_s *a) 6911 { 6912 int check = fp_access_check_scalar_hsd(s, a->esz); 6913 uint64_t imm; 6914 6915 if (check <= 0) { 6916 return check == 0; 6917 } 6918 6919 imm = vfp_expand_imm(a->esz, a->imm); 6920 write_fp_dreg(s, a->rd, tcg_constant_i64(imm)); 6921 return true; 6922 } 6923 6924 /* 6925 * Floating point compare, conditional compare 6926 */ 6927 6928 static void handle_fp_compare(DisasContext *s, int size, 6929 unsigned int rn, unsigned int rm, 6930 bool cmp_with_zero, bool signal_all_nans) 6931 { 6932 TCGv_i64 tcg_flags = tcg_temp_new_i64(); 6933 TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 6934 6935 if (size == MO_64) { 6936 TCGv_i64 tcg_vn, tcg_vm; 6937 6938 tcg_vn = read_fp_dreg(s, rn); 6939 if (cmp_with_zero) { 6940 tcg_vm = tcg_constant_i64(0); 6941 } else { 6942 tcg_vm = read_fp_dreg(s, rm); 6943 } 6944 if (signal_all_nans) { 6945 gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 6946 } else { 6947 gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 6948 } 6949 } else { 6950 TCGv_i32 tcg_vn = tcg_temp_new_i32(); 6951 TCGv_i32 tcg_vm = tcg_temp_new_i32(); 6952 6953 read_vec_element_i32(s, tcg_vn, rn, 0, size); 6954 if (cmp_with_zero) { 6955 tcg_gen_movi_i32(tcg_vm, 0); 6956 } else { 6957 read_vec_element_i32(s, tcg_vm, rm, 0, size); 6958 } 6959 6960 switch (size) { 6961 case MO_32: 6962 if (signal_all_nans) { 6963 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 6964 } else { 6965 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 6966 } 6967 break; 6968 case MO_16: 6969 if (signal_all_nans) { 6970 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 6971 } else { 6972 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 6973 } 6974 break; 6975 default: 6976 g_assert_not_reached(); 6977 } 6978 } 6979 6980 gen_set_nzcv(tcg_flags); 6981 } 6982 6983 /* FCMP, FCMPE */ 6984 static bool trans_FCMP(DisasContext *s, arg_FCMP *a) 6985 { 6986 int check = fp_access_check_scalar_hsd(s, a->esz); 6987 6988 if (check <= 0) { 6989 return check == 0; 6990 } 6991 6992 handle_fp_compare(s, a->esz, a->rn, a->rm, a->z, a->e); 6993 return true; 6994 } 6995 6996 /* FCCMP, FCCMPE */ 6997 static bool trans_FCCMP(DisasContext *s, arg_FCCMP *a) 6998 { 6999 TCGLabel *label_continue = NULL; 7000 int check = fp_access_check_scalar_hsd(s, a->esz); 7001 7002 if (check <= 0) { 7003 return check == 0; 7004 } 7005 7006 if (a->cond < 0x0e) { /* not always */ 7007 TCGLabel *label_match = gen_new_label(); 7008 label_continue = gen_new_label(); 7009 arm_gen_test_cc(a->cond, label_match); 7010 /* nomatch: */ 7011 gen_set_nzcv(tcg_constant_i64(a->nzcv << 28)); 7012 tcg_gen_br(label_continue); 7013 gen_set_label(label_match); 7014 } 7015 7016 handle_fp_compare(s, a->esz, a->rn, a->rm, false, a->e); 7017 7018 if (label_continue) { 7019 gen_set_label(label_continue); 7020 } 7021 return true; 7022 } 7023 7024 /* 7025 * Advanced SIMD Modified Immediate 7026 */ 7027 7028 static bool trans_FMOVI_v_h(DisasContext *s, arg_FMOVI_v_h *a) 7029 { 7030 if (!dc_isar_feature(aa64_fp16, s)) { 7031 return false; 7032 } 7033 if (fp_access_check(s)) { 7034 tcg_gen_gvec_dup_imm(MO_16, vec_full_reg_offset(s, a->rd), 7035 a->q ? 16 : 8, vec_full_reg_size(s), 7036 vfp_expand_imm(MO_16, a->abcdefgh)); 7037 } 7038 return true; 7039 } 7040 7041 static void gen_movi(unsigned vece, uint32_t dofs, uint32_t aofs, 7042 int64_t c, uint32_t oprsz, uint32_t maxsz) 7043 { 7044 tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c); 7045 } 7046 7047 static bool trans_Vimm(DisasContext *s, arg_Vimm *a) 7048 { 7049 GVecGen2iFn *fn; 7050 7051 /* Handle decode of cmode/op here between ORR/BIC/MOVI */ 7052 if ((a->cmode & 1) && a->cmode < 12) { 7053 /* For op=1, the imm will be inverted, so BIC becomes AND. */ 7054 fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori; 7055 } else { 7056 /* There is one unallocated cmode/op combination in this space */ 7057 if (a->cmode == 15 && a->op == 1 && a->q == 0) { 7058 return false; 7059 } 7060 fn = gen_movi; 7061 } 7062 7063 if (fp_access_check(s)) { 7064 uint64_t imm = asimd_imm_const(a->abcdefgh, a->cmode, a->op); 7065 gen_gvec_fn2i(s, a->q, a->rd, a->rd, imm, fn, MO_64); 7066 } 7067 return true; 7068 } 7069 7070 /* 7071 * Advanced SIMD Shift by Immediate 7072 */ 7073 7074 static bool do_vec_shift_imm(DisasContext *s, arg_qrri_e *a, GVecGen2iFn *fn) 7075 { 7076 if (fp_access_check(s)) { 7077 gen_gvec_fn2i(s, a->q, a->rd, a->rn, a->imm, fn, a->esz); 7078 } 7079 return true; 7080 } 7081 7082 TRANS(SSHR_v, do_vec_shift_imm, a, gen_gvec_sshr) 7083 TRANS(USHR_v, do_vec_shift_imm, a, gen_gvec_ushr) 7084 TRANS(SSRA_v, do_vec_shift_imm, a, gen_gvec_ssra) 7085 TRANS(USRA_v, do_vec_shift_imm, a, gen_gvec_usra) 7086 TRANS(SRSHR_v, do_vec_shift_imm, a, gen_gvec_srshr) 7087 TRANS(URSHR_v, do_vec_shift_imm, a, gen_gvec_urshr) 7088 TRANS(SRSRA_v, do_vec_shift_imm, a, gen_gvec_srsra) 7089 TRANS(URSRA_v, do_vec_shift_imm, a, gen_gvec_ursra) 7090 TRANS(SRI_v, do_vec_shift_imm, a, gen_gvec_sri) 7091 TRANS(SHL_v, do_vec_shift_imm, a, tcg_gen_gvec_shli) 7092 TRANS(SLI_v, do_vec_shift_imm, a, gen_gvec_sli); 7093 TRANS(SQSHL_vi, do_vec_shift_imm, a, gen_neon_sqshli) 7094 TRANS(UQSHL_vi, do_vec_shift_imm, a, gen_neon_uqshli) 7095 TRANS(SQSHLU_vi, do_vec_shift_imm, a, gen_neon_sqshlui) 7096 7097 static bool do_vec_shift_imm_wide(DisasContext *s, arg_qrri_e *a, bool is_u) 7098 { 7099 TCGv_i64 tcg_rn, tcg_rd; 7100 int esz = a->esz; 7101 int esize; 7102 7103 if (!fp_access_check(s)) { 7104 return true; 7105 } 7106 7107 /* 7108 * For the LL variants the store is larger than the load, 7109 * so if rd == rn we would overwrite parts of our input. 7110 * So load everything right now and use shifts in the main loop. 7111 */ 7112 tcg_rd = tcg_temp_new_i64(); 7113 tcg_rn = tcg_temp_new_i64(); 7114 read_vec_element(s, tcg_rn, a->rn, a->q, MO_64); 7115 7116 esize = 8 << esz; 7117 for (int i = 0, elements = 8 >> esz; i < elements; i++) { 7118 if (is_u) { 7119 tcg_gen_extract_i64(tcg_rd, tcg_rn, i * esize, esize); 7120 } else { 7121 tcg_gen_sextract_i64(tcg_rd, tcg_rn, i * esize, esize); 7122 } 7123 tcg_gen_shli_i64(tcg_rd, tcg_rd, a->imm); 7124 write_vec_element(s, tcg_rd, a->rd, i, esz + 1); 7125 } 7126 clear_vec_high(s, true, a->rd); 7127 return true; 7128 } 7129 7130 TRANS(SSHLL_v, do_vec_shift_imm_wide, a, false) 7131 TRANS(USHLL_v, do_vec_shift_imm_wide, a, true) 7132 7133 static void gen_sshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7134 { 7135 assert(shift >= 0 && shift <= 64); 7136 tcg_gen_sari_i64(dst, src, MIN(shift, 63)); 7137 } 7138 7139 static void gen_ushr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7140 { 7141 assert(shift >= 0 && shift <= 64); 7142 if (shift == 64) { 7143 tcg_gen_movi_i64(dst, 0); 7144 } else { 7145 tcg_gen_shri_i64(dst, src, shift); 7146 } 7147 } 7148 7149 static void gen_ssra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7150 { 7151 gen_sshr_d(src, src, shift); 7152 tcg_gen_add_i64(dst, dst, src); 7153 } 7154 7155 static void gen_usra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7156 { 7157 gen_ushr_d(src, src, shift); 7158 tcg_gen_add_i64(dst, dst, src); 7159 } 7160 7161 static void gen_srshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7162 { 7163 assert(shift >= 0 && shift <= 32); 7164 if (shift) { 7165 TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1)); 7166 tcg_gen_add_i64(dst, src, rnd); 7167 tcg_gen_sari_i64(dst, dst, shift); 7168 } else { 7169 tcg_gen_mov_i64(dst, src); 7170 } 7171 } 7172 7173 static void gen_urshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7174 { 7175 assert(shift >= 0 && shift <= 32); 7176 if (shift) { 7177 TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1)); 7178 tcg_gen_add_i64(dst, src, rnd); 7179 tcg_gen_shri_i64(dst, dst, shift); 7180 } else { 7181 tcg_gen_mov_i64(dst, src); 7182 } 7183 } 7184 7185 static void gen_srshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7186 { 7187 assert(shift >= 0 && shift <= 64); 7188 if (shift == 0) { 7189 tcg_gen_mov_i64(dst, src); 7190 } else if (shift == 64) { 7191 /* Extension of sign bit (0,-1) plus sign bit (0,1) is zero. */ 7192 tcg_gen_movi_i64(dst, 0); 7193 } else { 7194 TCGv_i64 rnd = tcg_temp_new_i64(); 7195 tcg_gen_extract_i64(rnd, src, shift - 1, 1); 7196 tcg_gen_sari_i64(dst, src, shift); 7197 tcg_gen_add_i64(dst, dst, rnd); 7198 } 7199 } 7200 7201 static void gen_urshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7202 { 7203 assert(shift >= 0 && shift <= 64); 7204 if (shift == 0) { 7205 tcg_gen_mov_i64(dst, src); 7206 } else if (shift == 64) { 7207 /* Rounding will propagate bit 63 into bit 64. */ 7208 tcg_gen_shri_i64(dst, src, 63); 7209 } else { 7210 TCGv_i64 rnd = tcg_temp_new_i64(); 7211 tcg_gen_extract_i64(rnd, src, shift - 1, 1); 7212 tcg_gen_shri_i64(dst, src, shift); 7213 tcg_gen_add_i64(dst, dst, rnd); 7214 } 7215 } 7216 7217 static void gen_srsra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7218 { 7219 gen_srshr_d(src, src, shift); 7220 tcg_gen_add_i64(dst, dst, src); 7221 } 7222 7223 static void gen_ursra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7224 { 7225 gen_urshr_d(src, src, shift); 7226 tcg_gen_add_i64(dst, dst, src); 7227 } 7228 7229 static void gen_sri_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7230 { 7231 /* If shift is 64, dst is unchanged. */ 7232 if (shift != 64) { 7233 tcg_gen_shri_i64(src, src, shift); 7234 tcg_gen_deposit_i64(dst, dst, src, 0, 64 - shift); 7235 } 7236 } 7237 7238 static void gen_sli_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7239 { 7240 tcg_gen_deposit_i64(dst, dst, src, shift, 64 - shift); 7241 } 7242 7243 static bool do_vec_shift_imm_narrow(DisasContext *s, arg_qrri_e *a, 7244 WideShiftImmFn * const fns[3], MemOp sign) 7245 { 7246 TCGv_i64 tcg_rn, tcg_rd; 7247 int esz = a->esz; 7248 int esize; 7249 WideShiftImmFn *fn; 7250 7251 tcg_debug_assert(esz >= MO_8 && esz <= MO_32); 7252 7253 if (!fp_access_check(s)) { 7254 return true; 7255 } 7256 7257 tcg_rn = tcg_temp_new_i64(); 7258 tcg_rd = tcg_temp_new_i64(); 7259 tcg_gen_movi_i64(tcg_rd, 0); 7260 7261 fn = fns[esz]; 7262 esize = 8 << esz; 7263 for (int i = 0, elements = 8 >> esz; i < elements; i++) { 7264 read_vec_element(s, tcg_rn, a->rn, i, (esz + 1) | sign); 7265 fn(tcg_rn, tcg_rn, a->imm); 7266 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, esize * i, esize); 7267 } 7268 7269 write_vec_element(s, tcg_rd, a->rd, a->q, MO_64); 7270 clear_vec_high(s, a->q, a->rd); 7271 return true; 7272 } 7273 7274 static void gen_sqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7275 { 7276 tcg_gen_sari_i64(d, s, i); 7277 tcg_gen_ext16u_i64(d, d); 7278 gen_helper_neon_narrow_sat_s8(d, tcg_env, d); 7279 } 7280 7281 static void gen_sqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7282 { 7283 tcg_gen_sari_i64(d, s, i); 7284 tcg_gen_ext32u_i64(d, d); 7285 gen_helper_neon_narrow_sat_s16(d, tcg_env, d); 7286 } 7287 7288 static void gen_sqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7289 { 7290 gen_sshr_d(d, s, i); 7291 gen_helper_neon_narrow_sat_s32(d, tcg_env, d); 7292 } 7293 7294 static void gen_uqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7295 { 7296 tcg_gen_shri_i64(d, s, i); 7297 gen_helper_neon_narrow_sat_u8(d, tcg_env, d); 7298 } 7299 7300 static void gen_uqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7301 { 7302 tcg_gen_shri_i64(d, s, i); 7303 gen_helper_neon_narrow_sat_u16(d, tcg_env, d); 7304 } 7305 7306 static void gen_uqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7307 { 7308 gen_ushr_d(d, s, i); 7309 gen_helper_neon_narrow_sat_u32(d, tcg_env, d); 7310 } 7311 7312 static void gen_sqshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7313 { 7314 tcg_gen_sari_i64(d, s, i); 7315 tcg_gen_ext16u_i64(d, d); 7316 gen_helper_neon_unarrow_sat8(d, tcg_env, d); 7317 } 7318 7319 static void gen_sqshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7320 { 7321 tcg_gen_sari_i64(d, s, i); 7322 tcg_gen_ext32u_i64(d, d); 7323 gen_helper_neon_unarrow_sat16(d, tcg_env, d); 7324 } 7325 7326 static void gen_sqshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7327 { 7328 gen_sshr_d(d, s, i); 7329 gen_helper_neon_unarrow_sat32(d, tcg_env, d); 7330 } 7331 7332 static void gen_sqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7333 { 7334 gen_srshr_bhs(d, s, i); 7335 tcg_gen_ext16u_i64(d, d); 7336 gen_helper_neon_narrow_sat_s8(d, tcg_env, d); 7337 } 7338 7339 static void gen_sqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7340 { 7341 gen_srshr_bhs(d, s, i); 7342 tcg_gen_ext32u_i64(d, d); 7343 gen_helper_neon_narrow_sat_s16(d, tcg_env, d); 7344 } 7345 7346 static void gen_sqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7347 { 7348 gen_srshr_d(d, s, i); 7349 gen_helper_neon_narrow_sat_s32(d, tcg_env, d); 7350 } 7351 7352 static void gen_uqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7353 { 7354 gen_urshr_bhs(d, s, i); 7355 gen_helper_neon_narrow_sat_u8(d, tcg_env, d); 7356 } 7357 7358 static void gen_uqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7359 { 7360 gen_urshr_bhs(d, s, i); 7361 gen_helper_neon_narrow_sat_u16(d, tcg_env, d); 7362 } 7363 7364 static void gen_uqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7365 { 7366 gen_urshr_d(d, s, i); 7367 gen_helper_neon_narrow_sat_u32(d, tcg_env, d); 7368 } 7369 7370 static void gen_sqrshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7371 { 7372 gen_srshr_bhs(d, s, i); 7373 tcg_gen_ext16u_i64(d, d); 7374 gen_helper_neon_unarrow_sat8(d, tcg_env, d); 7375 } 7376 7377 static void gen_sqrshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7378 { 7379 gen_srshr_bhs(d, s, i); 7380 tcg_gen_ext32u_i64(d, d); 7381 gen_helper_neon_unarrow_sat16(d, tcg_env, d); 7382 } 7383 7384 static void gen_sqrshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7385 { 7386 gen_srshr_d(d, s, i); 7387 gen_helper_neon_unarrow_sat32(d, tcg_env, d); 7388 } 7389 7390 static WideShiftImmFn * const shrn_fns[] = { 7391 tcg_gen_shri_i64, 7392 tcg_gen_shri_i64, 7393 gen_ushr_d, 7394 }; 7395 TRANS(SHRN_v, do_vec_shift_imm_narrow, a, shrn_fns, 0) 7396 7397 static WideShiftImmFn * const rshrn_fns[] = { 7398 gen_urshr_bhs, 7399 gen_urshr_bhs, 7400 gen_urshr_d, 7401 }; 7402 TRANS(RSHRN_v, do_vec_shift_imm_narrow, a, rshrn_fns, 0) 7403 7404 static WideShiftImmFn * const sqshrn_fns[] = { 7405 gen_sqshrn_b, 7406 gen_sqshrn_h, 7407 gen_sqshrn_s, 7408 }; 7409 TRANS(SQSHRN_v, do_vec_shift_imm_narrow, a, sqshrn_fns, MO_SIGN) 7410 7411 static WideShiftImmFn * const uqshrn_fns[] = { 7412 gen_uqshrn_b, 7413 gen_uqshrn_h, 7414 gen_uqshrn_s, 7415 }; 7416 TRANS(UQSHRN_v, do_vec_shift_imm_narrow, a, uqshrn_fns, 0) 7417 7418 static WideShiftImmFn * const sqshrun_fns[] = { 7419 gen_sqshrun_b, 7420 gen_sqshrun_h, 7421 gen_sqshrun_s, 7422 }; 7423 TRANS(SQSHRUN_v, do_vec_shift_imm_narrow, a, sqshrun_fns, MO_SIGN) 7424 7425 static WideShiftImmFn * const sqrshrn_fns[] = { 7426 gen_sqrshrn_b, 7427 gen_sqrshrn_h, 7428 gen_sqrshrn_s, 7429 }; 7430 TRANS(SQRSHRN_v, do_vec_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN) 7431 7432 static WideShiftImmFn * const uqrshrn_fns[] = { 7433 gen_uqrshrn_b, 7434 gen_uqrshrn_h, 7435 gen_uqrshrn_s, 7436 }; 7437 TRANS(UQRSHRN_v, do_vec_shift_imm_narrow, a, uqrshrn_fns, 0) 7438 7439 static WideShiftImmFn * const sqrshrun_fns[] = { 7440 gen_sqrshrun_b, 7441 gen_sqrshrun_h, 7442 gen_sqrshrun_s, 7443 }; 7444 TRANS(SQRSHRUN_v, do_vec_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN) 7445 7446 /* 7447 * Advanced SIMD Scalar Shift by Immediate 7448 */ 7449 7450 static bool do_scalar_shift_imm(DisasContext *s, arg_rri_e *a, 7451 WideShiftImmFn *fn, bool accumulate, 7452 MemOp sign) 7453 { 7454 if (fp_access_check(s)) { 7455 TCGv_i64 rd = tcg_temp_new_i64(); 7456 TCGv_i64 rn = tcg_temp_new_i64(); 7457 7458 read_vec_element(s, rn, a->rn, 0, a->esz | sign); 7459 if (accumulate) { 7460 read_vec_element(s, rd, a->rd, 0, a->esz | sign); 7461 } 7462 fn(rd, rn, a->imm); 7463 write_fp_dreg(s, a->rd, rd); 7464 } 7465 return true; 7466 } 7467 7468 TRANS(SSHR_s, do_scalar_shift_imm, a, gen_sshr_d, false, 0) 7469 TRANS(USHR_s, do_scalar_shift_imm, a, gen_ushr_d, false, 0) 7470 TRANS(SSRA_s, do_scalar_shift_imm, a, gen_ssra_d, true, 0) 7471 TRANS(USRA_s, do_scalar_shift_imm, a, gen_usra_d, true, 0) 7472 TRANS(SRSHR_s, do_scalar_shift_imm, a, gen_srshr_d, false, 0) 7473 TRANS(URSHR_s, do_scalar_shift_imm, a, gen_urshr_d, false, 0) 7474 TRANS(SRSRA_s, do_scalar_shift_imm, a, gen_srsra_d, true, 0) 7475 TRANS(URSRA_s, do_scalar_shift_imm, a, gen_ursra_d, true, 0) 7476 TRANS(SRI_s, do_scalar_shift_imm, a, gen_sri_d, true, 0) 7477 7478 TRANS(SHL_s, do_scalar_shift_imm, a, tcg_gen_shli_i64, false, 0) 7479 TRANS(SLI_s, do_scalar_shift_imm, a, gen_sli_d, true, 0) 7480 7481 static void trunc_i64_env_imm(TCGv_i64 d, TCGv_i64 s, int64_t i, 7482 NeonGenTwoOpEnvFn *fn) 7483 { 7484 TCGv_i32 t = tcg_temp_new_i32(); 7485 tcg_gen_extrl_i64_i32(t, s); 7486 fn(t, tcg_env, t, tcg_constant_i32(i)); 7487 tcg_gen_extu_i32_i64(d, t); 7488 } 7489 7490 static void gen_sqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7491 { 7492 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s8); 7493 } 7494 7495 static void gen_sqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7496 { 7497 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s16); 7498 } 7499 7500 static void gen_sqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7501 { 7502 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s32); 7503 } 7504 7505 static void gen_sqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i) 7506 { 7507 gen_helper_neon_qshl_s64(d, tcg_env, s, tcg_constant_i64(i)); 7508 } 7509 7510 static void gen_uqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7511 { 7512 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u8); 7513 } 7514 7515 static void gen_uqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7516 { 7517 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u16); 7518 } 7519 7520 static void gen_uqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7521 { 7522 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u32); 7523 } 7524 7525 static void gen_uqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i) 7526 { 7527 gen_helper_neon_qshl_u64(d, tcg_env, s, tcg_constant_i64(i)); 7528 } 7529 7530 static void gen_sqshlui_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7531 { 7532 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s8); 7533 } 7534 7535 static void gen_sqshlui_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7536 { 7537 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s16); 7538 } 7539 7540 static void gen_sqshlui_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7541 { 7542 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s32); 7543 } 7544 7545 static void gen_sqshlui_d(TCGv_i64 d, TCGv_i64 s, int64_t i) 7546 { 7547 gen_helper_neon_qshlu_s64(d, tcg_env, s, tcg_constant_i64(i)); 7548 } 7549 7550 static WideShiftImmFn * const f_scalar_sqshli[] = { 7551 gen_sqshli_b, gen_sqshli_h, gen_sqshli_s, gen_sqshli_d 7552 }; 7553 7554 static WideShiftImmFn * const f_scalar_uqshli[] = { 7555 gen_uqshli_b, gen_uqshli_h, gen_uqshli_s, gen_uqshli_d 7556 }; 7557 7558 static WideShiftImmFn * const f_scalar_sqshlui[] = { 7559 gen_sqshlui_b, gen_sqshlui_h, gen_sqshlui_s, gen_sqshlui_d 7560 }; 7561 7562 /* Note that the helpers sign-extend their inputs, so don't do it here. */ 7563 TRANS(SQSHL_si, do_scalar_shift_imm, a, f_scalar_sqshli[a->esz], false, 0) 7564 TRANS(UQSHL_si, do_scalar_shift_imm, a, f_scalar_uqshli[a->esz], false, 0) 7565 TRANS(SQSHLU_si, do_scalar_shift_imm, a, f_scalar_sqshlui[a->esz], false, 0) 7566 7567 static bool do_scalar_shift_imm_narrow(DisasContext *s, arg_rri_e *a, 7568 WideShiftImmFn * const fns[3], 7569 MemOp sign, bool zext) 7570 { 7571 MemOp esz = a->esz; 7572 7573 tcg_debug_assert(esz >= MO_8 && esz <= MO_32); 7574 7575 if (fp_access_check(s)) { 7576 TCGv_i64 rd = tcg_temp_new_i64(); 7577 TCGv_i64 rn = tcg_temp_new_i64(); 7578 7579 read_vec_element(s, rn, a->rn, 0, (esz + 1) | sign); 7580 fns[esz](rd, rn, a->imm); 7581 if (zext) { 7582 tcg_gen_ext_i64(rd, rd, esz); 7583 } 7584 write_fp_dreg(s, a->rd, rd); 7585 } 7586 return true; 7587 } 7588 7589 TRANS(SQSHRN_si, do_scalar_shift_imm_narrow, a, sqshrn_fns, MO_SIGN, true) 7590 TRANS(SQRSHRN_si, do_scalar_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN, true) 7591 TRANS(UQSHRN_si, do_scalar_shift_imm_narrow, a, uqshrn_fns, 0, false) 7592 TRANS(UQRSHRN_si, do_scalar_shift_imm_narrow, a, uqrshrn_fns, 0, false) 7593 TRANS(SQSHRUN_si, do_scalar_shift_imm_narrow, a, sqshrun_fns, MO_SIGN, false) 7594 TRANS(SQRSHRUN_si, do_scalar_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN, false) 7595 7596 static bool do_div(DisasContext *s, arg_rrr_sf *a, bool is_signed) 7597 { 7598 TCGv_i64 tcg_n, tcg_m, tcg_rd; 7599 tcg_rd = cpu_reg(s, a->rd); 7600 7601 if (!a->sf && is_signed) { 7602 tcg_n = tcg_temp_new_i64(); 7603 tcg_m = tcg_temp_new_i64(); 7604 tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, a->rn)); 7605 tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, a->rm)); 7606 } else { 7607 tcg_n = read_cpu_reg(s, a->rn, a->sf); 7608 tcg_m = read_cpu_reg(s, a->rm, a->sf); 7609 } 7610 7611 if (is_signed) { 7612 gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m); 7613 } else { 7614 gen_helper_udiv64(tcg_rd, tcg_n, tcg_m); 7615 } 7616 7617 if (!a->sf) { /* zero extend final result */ 7618 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 7619 } 7620 return true; 7621 } 7622 7623 TRANS(SDIV, do_div, a, true) 7624 TRANS(UDIV, do_div, a, false) 7625 7626 /* Shift a TCGv src by TCGv shift_amount, put result in dst. 7627 * Note that it is the caller's responsibility to ensure that the 7628 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM 7629 * mandated semantics for out of range shifts. 7630 */ 7631 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf, 7632 enum a64_shift_type shift_type, TCGv_i64 shift_amount) 7633 { 7634 switch (shift_type) { 7635 case A64_SHIFT_TYPE_LSL: 7636 tcg_gen_shl_i64(dst, src, shift_amount); 7637 break; 7638 case A64_SHIFT_TYPE_LSR: 7639 tcg_gen_shr_i64(dst, src, shift_amount); 7640 break; 7641 case A64_SHIFT_TYPE_ASR: 7642 if (!sf) { 7643 tcg_gen_ext32s_i64(dst, src); 7644 } 7645 tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount); 7646 break; 7647 case A64_SHIFT_TYPE_ROR: 7648 if (sf) { 7649 tcg_gen_rotr_i64(dst, src, shift_amount); 7650 } else { 7651 TCGv_i32 t0, t1; 7652 t0 = tcg_temp_new_i32(); 7653 t1 = tcg_temp_new_i32(); 7654 tcg_gen_extrl_i64_i32(t0, src); 7655 tcg_gen_extrl_i64_i32(t1, shift_amount); 7656 tcg_gen_rotr_i32(t0, t0, t1); 7657 tcg_gen_extu_i32_i64(dst, t0); 7658 } 7659 break; 7660 default: 7661 assert(FALSE); /* all shift types should be handled */ 7662 break; 7663 } 7664 7665 if (!sf) { /* zero extend final result */ 7666 tcg_gen_ext32u_i64(dst, dst); 7667 } 7668 } 7669 7670 /* Shift a TCGv src by immediate, put result in dst. 7671 * The shift amount must be in range (this should always be true as the 7672 * relevant instructions will UNDEF on bad shift immediates). 7673 */ 7674 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf, 7675 enum a64_shift_type shift_type, unsigned int shift_i) 7676 { 7677 assert(shift_i < (sf ? 64 : 32)); 7678 7679 if (shift_i == 0) { 7680 tcg_gen_mov_i64(dst, src); 7681 } else { 7682 shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i)); 7683 } 7684 } 7685 7686 static bool do_shift_reg(DisasContext *s, arg_rrr_sf *a, 7687 enum a64_shift_type shift_type) 7688 { 7689 TCGv_i64 tcg_shift = tcg_temp_new_i64(); 7690 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 7691 TCGv_i64 tcg_rn = read_cpu_reg(s, a->rn, a->sf); 7692 7693 tcg_gen_andi_i64(tcg_shift, cpu_reg(s, a->rm), a->sf ? 63 : 31); 7694 shift_reg(tcg_rd, tcg_rn, a->sf, shift_type, tcg_shift); 7695 return true; 7696 } 7697 7698 TRANS(LSLV, do_shift_reg, a, A64_SHIFT_TYPE_LSL) 7699 TRANS(LSRV, do_shift_reg, a, A64_SHIFT_TYPE_LSR) 7700 TRANS(ASRV, do_shift_reg, a, A64_SHIFT_TYPE_ASR) 7701 TRANS(RORV, do_shift_reg, a, A64_SHIFT_TYPE_ROR) 7702 7703 static bool do_crc32(DisasContext *s, arg_rrr_e *a, bool crc32c) 7704 { 7705 TCGv_i64 tcg_acc, tcg_val, tcg_rd; 7706 TCGv_i32 tcg_bytes; 7707 7708 switch (a->esz) { 7709 case MO_8: 7710 case MO_16: 7711 case MO_32: 7712 tcg_val = tcg_temp_new_i64(); 7713 tcg_gen_extract_i64(tcg_val, cpu_reg(s, a->rm), 0, 8 << a->esz); 7714 break; 7715 case MO_64: 7716 tcg_val = cpu_reg(s, a->rm); 7717 break; 7718 default: 7719 g_assert_not_reached(); 7720 } 7721 tcg_acc = cpu_reg(s, a->rn); 7722 tcg_bytes = tcg_constant_i32(1 << a->esz); 7723 tcg_rd = cpu_reg(s, a->rd); 7724 7725 if (crc32c) { 7726 gen_helper_crc32c_64(tcg_rd, tcg_acc, tcg_val, tcg_bytes); 7727 } else { 7728 gen_helper_crc32_64(tcg_rd, tcg_acc, tcg_val, tcg_bytes); 7729 } 7730 return true; 7731 } 7732 7733 TRANS_FEAT(CRC32, aa64_crc32, do_crc32, a, false) 7734 TRANS_FEAT(CRC32C, aa64_crc32, do_crc32, a, true) 7735 7736 static bool do_subp(DisasContext *s, arg_rrr *a, bool setflag) 7737 { 7738 TCGv_i64 tcg_n = read_cpu_reg_sp(s, a->rn, true); 7739 TCGv_i64 tcg_m = read_cpu_reg_sp(s, a->rm, true); 7740 TCGv_i64 tcg_d = cpu_reg(s, a->rd); 7741 7742 tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56); 7743 tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56); 7744 7745 if (setflag) { 7746 gen_sub_CC(true, tcg_d, tcg_n, tcg_m); 7747 } else { 7748 tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m); 7749 } 7750 return true; 7751 } 7752 7753 TRANS_FEAT(SUBP, aa64_mte_insn_reg, do_subp, a, false) 7754 TRANS_FEAT(SUBPS, aa64_mte_insn_reg, do_subp, a, true) 7755 7756 static bool trans_IRG(DisasContext *s, arg_rrr *a) 7757 { 7758 if (dc_isar_feature(aa64_mte_insn_reg, s)) { 7759 TCGv_i64 tcg_rd = cpu_reg_sp(s, a->rd); 7760 TCGv_i64 tcg_rn = cpu_reg_sp(s, a->rn); 7761 7762 if (s->ata[0]) { 7763 gen_helper_irg(tcg_rd, tcg_env, tcg_rn, cpu_reg(s, a->rm)); 7764 } else { 7765 gen_address_with_allocation_tag0(tcg_rd, tcg_rn); 7766 } 7767 return true; 7768 } 7769 return false; 7770 } 7771 7772 static bool trans_GMI(DisasContext *s, arg_rrr *a) 7773 { 7774 if (dc_isar_feature(aa64_mte_insn_reg, s)) { 7775 TCGv_i64 t = tcg_temp_new_i64(); 7776 7777 tcg_gen_extract_i64(t, cpu_reg_sp(s, a->rn), 56, 4); 7778 tcg_gen_shl_i64(t, tcg_constant_i64(1), t); 7779 tcg_gen_or_i64(cpu_reg(s, a->rd), cpu_reg(s, a->rm), t); 7780 return true; 7781 } 7782 return false; 7783 } 7784 7785 static bool trans_PACGA(DisasContext *s, arg_rrr *a) 7786 { 7787 if (dc_isar_feature(aa64_pauth, s)) { 7788 gen_helper_pacga(cpu_reg(s, a->rd), tcg_env, 7789 cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm)); 7790 return true; 7791 } 7792 return false; 7793 } 7794 7795 typedef void ArithOneOp(TCGv_i64, TCGv_i64); 7796 7797 static bool gen_rr(DisasContext *s, int rd, int rn, ArithOneOp fn) 7798 { 7799 fn(cpu_reg(s, rd), cpu_reg(s, rn)); 7800 return true; 7801 } 7802 7803 static void gen_rbit32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 7804 { 7805 TCGv_i32 t32 = tcg_temp_new_i32(); 7806 7807 tcg_gen_extrl_i64_i32(t32, tcg_rn); 7808 gen_helper_rbit(t32, t32); 7809 tcg_gen_extu_i32_i64(tcg_rd, t32); 7810 } 7811 7812 static void gen_rev16_xx(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 mask) 7813 { 7814 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 7815 7816 tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8); 7817 tcg_gen_and_i64(tcg_rd, tcg_rn, mask); 7818 tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask); 7819 tcg_gen_shli_i64(tcg_rd, tcg_rd, 8); 7820 tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp); 7821 } 7822 7823 static void gen_rev16_32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 7824 { 7825 gen_rev16_xx(tcg_rd, tcg_rn, tcg_constant_i64(0x00ff00ff)); 7826 } 7827 7828 static void gen_rev16_64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 7829 { 7830 gen_rev16_xx(tcg_rd, tcg_rn, tcg_constant_i64(0x00ff00ff00ff00ffull)); 7831 } 7832 7833 static void gen_rev_32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 7834 { 7835 tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ); 7836 } 7837 7838 static void gen_rev32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 7839 { 7840 tcg_gen_bswap64_i64(tcg_rd, tcg_rn); 7841 tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32); 7842 } 7843 7844 TRANS(RBIT, gen_rr, a->rd, a->rn, a->sf ? gen_helper_rbit64 : gen_rbit32) 7845 TRANS(REV16, gen_rr, a->rd, a->rn, a->sf ? gen_rev16_64 : gen_rev16_32) 7846 TRANS(REV32, gen_rr, a->rd, a->rn, a->sf ? gen_rev32 : gen_rev_32) 7847 TRANS(REV64, gen_rr, a->rd, a->rn, tcg_gen_bswap64_i64) 7848 7849 static void gen_clz32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 7850 { 7851 TCGv_i32 t32 = tcg_temp_new_i32(); 7852 7853 tcg_gen_extrl_i64_i32(t32, tcg_rn); 7854 tcg_gen_clzi_i32(t32, t32, 32); 7855 tcg_gen_extu_i32_i64(tcg_rd, t32); 7856 } 7857 7858 static void gen_clz64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 7859 { 7860 tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64); 7861 } 7862 7863 static void gen_cls32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 7864 { 7865 TCGv_i32 t32 = tcg_temp_new_i32(); 7866 7867 tcg_gen_extrl_i64_i32(t32, tcg_rn); 7868 tcg_gen_clrsb_i32(t32, t32); 7869 tcg_gen_extu_i32_i64(tcg_rd, t32); 7870 } 7871 7872 TRANS(CLZ, gen_rr, a->rd, a->rn, a->sf ? gen_clz64 : gen_clz32) 7873 TRANS(CLS, gen_rr, a->rd, a->rn, a->sf ? tcg_gen_clrsb_i64 : gen_cls32) 7874 7875 static bool gen_pacaut(DisasContext *s, arg_pacaut *a, NeonGenTwo64OpEnvFn fn) 7876 { 7877 TCGv_i64 tcg_rd, tcg_rn; 7878 7879 if (a->z) { 7880 if (a->rn != 31) { 7881 return false; 7882 } 7883 tcg_rn = tcg_constant_i64(0); 7884 } else { 7885 tcg_rn = cpu_reg_sp(s, a->rn); 7886 } 7887 if (s->pauth_active) { 7888 tcg_rd = cpu_reg(s, a->rd); 7889 fn(tcg_rd, tcg_env, tcg_rd, tcg_rn); 7890 } 7891 return true; 7892 } 7893 7894 TRANS_FEAT(PACIA, aa64_pauth, gen_pacaut, a, gen_helper_pacia) 7895 TRANS_FEAT(PACIB, aa64_pauth, gen_pacaut, a, gen_helper_pacib) 7896 TRANS_FEAT(PACDA, aa64_pauth, gen_pacaut, a, gen_helper_pacda) 7897 TRANS_FEAT(PACDB, aa64_pauth, gen_pacaut, a, gen_helper_pacdb) 7898 7899 TRANS_FEAT(AUTIA, aa64_pauth, gen_pacaut, a, gen_helper_autia) 7900 TRANS_FEAT(AUTIB, aa64_pauth, gen_pacaut, a, gen_helper_autib) 7901 TRANS_FEAT(AUTDA, aa64_pauth, gen_pacaut, a, gen_helper_autda) 7902 TRANS_FEAT(AUTDB, aa64_pauth, gen_pacaut, a, gen_helper_autdb) 7903 7904 static bool do_xpac(DisasContext *s, int rd, NeonGenOne64OpEnvFn *fn) 7905 { 7906 if (s->pauth_active) { 7907 TCGv_i64 tcg_rd = cpu_reg(s, rd); 7908 fn(tcg_rd, tcg_env, tcg_rd); 7909 } 7910 return true; 7911 } 7912 7913 TRANS_FEAT(XPACI, aa64_pauth, do_xpac, a->rd, gen_helper_xpaci) 7914 TRANS_FEAT(XPACD, aa64_pauth, do_xpac, a->rd, gen_helper_xpacd) 7915 7916 static bool do_logic_reg(DisasContext *s, arg_logic_shift *a, 7917 ArithTwoOp *fn, ArithTwoOp *inv_fn, bool setflags) 7918 { 7919 TCGv_i64 tcg_rd, tcg_rn, tcg_rm; 7920 7921 if (!a->sf && (a->sa & (1 << 5))) { 7922 return false; 7923 } 7924 7925 tcg_rd = cpu_reg(s, a->rd); 7926 tcg_rn = cpu_reg(s, a->rn); 7927 7928 tcg_rm = read_cpu_reg(s, a->rm, a->sf); 7929 if (a->sa) { 7930 shift_reg_imm(tcg_rm, tcg_rm, a->sf, a->st, a->sa); 7931 } 7932 7933 (a->n ? inv_fn : fn)(tcg_rd, tcg_rn, tcg_rm); 7934 if (!a->sf) { 7935 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 7936 } 7937 if (setflags) { 7938 gen_logic_CC(a->sf, tcg_rd); 7939 } 7940 return true; 7941 } 7942 7943 static bool trans_ORR_r(DisasContext *s, arg_logic_shift *a) 7944 { 7945 /* 7946 * Unshifted ORR and ORN with WZR/XZR is the standard encoding for 7947 * register-register MOV and MVN, so it is worth special casing. 7948 */ 7949 if (a->sa == 0 && a->st == 0 && a->rn == 31) { 7950 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 7951 TCGv_i64 tcg_rm = cpu_reg(s, a->rm); 7952 7953 if (a->n) { 7954 tcg_gen_not_i64(tcg_rd, tcg_rm); 7955 if (!a->sf) { 7956 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 7957 } 7958 } else { 7959 if (a->sf) { 7960 tcg_gen_mov_i64(tcg_rd, tcg_rm); 7961 } else { 7962 tcg_gen_ext32u_i64(tcg_rd, tcg_rm); 7963 } 7964 } 7965 return true; 7966 } 7967 7968 return do_logic_reg(s, a, tcg_gen_or_i64, tcg_gen_orc_i64, false); 7969 } 7970 7971 TRANS(AND_r, do_logic_reg, a, tcg_gen_and_i64, tcg_gen_andc_i64, false) 7972 TRANS(ANDS_r, do_logic_reg, a, tcg_gen_and_i64, tcg_gen_andc_i64, true) 7973 TRANS(EOR_r, do_logic_reg, a, tcg_gen_xor_i64, tcg_gen_eqv_i64, false) 7974 7975 static bool do_addsub_ext(DisasContext *s, arg_addsub_ext *a, 7976 bool sub_op, bool setflags) 7977 { 7978 TCGv_i64 tcg_rm, tcg_rn, tcg_rd, tcg_result; 7979 7980 if (a->sa > 4) { 7981 return false; 7982 } 7983 7984 /* non-flag setting ops may use SP */ 7985 if (!setflags) { 7986 tcg_rd = cpu_reg_sp(s, a->rd); 7987 } else { 7988 tcg_rd = cpu_reg(s, a->rd); 7989 } 7990 tcg_rn = read_cpu_reg_sp(s, a->rn, a->sf); 7991 7992 tcg_rm = read_cpu_reg(s, a->rm, a->sf); 7993 ext_and_shift_reg(tcg_rm, tcg_rm, a->st, a->sa); 7994 7995 tcg_result = tcg_temp_new_i64(); 7996 if (!setflags) { 7997 if (sub_op) { 7998 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm); 7999 } else { 8000 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm); 8001 } 8002 } else { 8003 if (sub_op) { 8004 gen_sub_CC(a->sf, tcg_result, tcg_rn, tcg_rm); 8005 } else { 8006 gen_add_CC(a->sf, tcg_result, tcg_rn, tcg_rm); 8007 } 8008 } 8009 8010 if (a->sf) { 8011 tcg_gen_mov_i64(tcg_rd, tcg_result); 8012 } else { 8013 tcg_gen_ext32u_i64(tcg_rd, tcg_result); 8014 } 8015 return true; 8016 } 8017 8018 TRANS(ADD_ext, do_addsub_ext, a, false, false) 8019 TRANS(SUB_ext, do_addsub_ext, a, true, false) 8020 TRANS(ADDS_ext, do_addsub_ext, a, false, true) 8021 TRANS(SUBS_ext, do_addsub_ext, a, true, true) 8022 8023 static bool do_addsub_reg(DisasContext *s, arg_addsub_shift *a, 8024 bool sub_op, bool setflags) 8025 { 8026 TCGv_i64 tcg_rd, tcg_rn, tcg_rm, tcg_result; 8027 8028 if (a->st == 3 || (!a->sf && (a->sa & 32))) { 8029 return false; 8030 } 8031 8032 tcg_rd = cpu_reg(s, a->rd); 8033 tcg_rn = read_cpu_reg(s, a->rn, a->sf); 8034 tcg_rm = read_cpu_reg(s, a->rm, a->sf); 8035 8036 shift_reg_imm(tcg_rm, tcg_rm, a->sf, a->st, a->sa); 8037 8038 tcg_result = tcg_temp_new_i64(); 8039 if (!setflags) { 8040 if (sub_op) { 8041 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm); 8042 } else { 8043 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm); 8044 } 8045 } else { 8046 if (sub_op) { 8047 gen_sub_CC(a->sf, tcg_result, tcg_rn, tcg_rm); 8048 } else { 8049 gen_add_CC(a->sf, tcg_result, tcg_rn, tcg_rm); 8050 } 8051 } 8052 8053 if (a->sf) { 8054 tcg_gen_mov_i64(tcg_rd, tcg_result); 8055 } else { 8056 tcg_gen_ext32u_i64(tcg_rd, tcg_result); 8057 } 8058 return true; 8059 } 8060 8061 TRANS(ADD_r, do_addsub_reg, a, false, false) 8062 TRANS(SUB_r, do_addsub_reg, a, true, false) 8063 TRANS(ADDS_r, do_addsub_reg, a, false, true) 8064 TRANS(SUBS_r, do_addsub_reg, a, true, true) 8065 8066 static bool do_mulh(DisasContext *s, arg_rrr *a, 8067 void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64)) 8068 { 8069 TCGv_i64 discard = tcg_temp_new_i64(); 8070 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8071 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 8072 TCGv_i64 tcg_rm = cpu_reg(s, a->rm); 8073 8074 fn(discard, tcg_rd, tcg_rn, tcg_rm); 8075 return true; 8076 } 8077 8078 TRANS(SMULH, do_mulh, a, tcg_gen_muls2_i64) 8079 TRANS(UMULH, do_mulh, a, tcg_gen_mulu2_i64) 8080 8081 static bool do_muladd(DisasContext *s, arg_rrrr *a, 8082 bool sf, bool is_sub, MemOp mop) 8083 { 8084 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8085 TCGv_i64 tcg_op1, tcg_op2; 8086 8087 if (mop == MO_64) { 8088 tcg_op1 = cpu_reg(s, a->rn); 8089 tcg_op2 = cpu_reg(s, a->rm); 8090 } else { 8091 tcg_op1 = tcg_temp_new_i64(); 8092 tcg_op2 = tcg_temp_new_i64(); 8093 tcg_gen_ext_i64(tcg_op1, cpu_reg(s, a->rn), mop); 8094 tcg_gen_ext_i64(tcg_op2, cpu_reg(s, a->rm), mop); 8095 } 8096 8097 if (a->ra == 31 && !is_sub) { 8098 /* Special-case MADD with rA == XZR; it is the standard MUL alias */ 8099 tcg_gen_mul_i64(tcg_rd, tcg_op1, tcg_op2); 8100 } else { 8101 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 8102 TCGv_i64 tcg_ra = cpu_reg(s, a->ra); 8103 8104 tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2); 8105 if (is_sub) { 8106 tcg_gen_sub_i64(tcg_rd, tcg_ra, tcg_tmp); 8107 } else { 8108 tcg_gen_add_i64(tcg_rd, tcg_ra, tcg_tmp); 8109 } 8110 } 8111 8112 if (!sf) { 8113 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 8114 } 8115 return true; 8116 } 8117 8118 TRANS(MADD_w, do_muladd, a, false, false, MO_64) 8119 TRANS(MSUB_w, do_muladd, a, false, true, MO_64) 8120 TRANS(MADD_x, do_muladd, a, true, false, MO_64) 8121 TRANS(MSUB_x, do_muladd, a, true, true, MO_64) 8122 8123 TRANS(SMADDL, do_muladd, a, true, false, MO_SL) 8124 TRANS(SMSUBL, do_muladd, a, true, true, MO_SL) 8125 TRANS(UMADDL, do_muladd, a, true, false, MO_UL) 8126 TRANS(UMSUBL, do_muladd, a, true, true, MO_UL) 8127 8128 static bool do_adc_sbc(DisasContext *s, arg_rrr_sf *a, 8129 bool is_sub, bool setflags) 8130 { 8131 TCGv_i64 tcg_y, tcg_rn, tcg_rd; 8132 8133 tcg_rd = cpu_reg(s, a->rd); 8134 tcg_rn = cpu_reg(s, a->rn); 8135 8136 if (is_sub) { 8137 tcg_y = tcg_temp_new_i64(); 8138 tcg_gen_not_i64(tcg_y, cpu_reg(s, a->rm)); 8139 } else { 8140 tcg_y = cpu_reg(s, a->rm); 8141 } 8142 8143 if (setflags) { 8144 gen_adc_CC(a->sf, tcg_rd, tcg_rn, tcg_y); 8145 } else { 8146 gen_adc(a->sf, tcg_rd, tcg_rn, tcg_y); 8147 } 8148 return true; 8149 } 8150 8151 TRANS(ADC, do_adc_sbc, a, false, false) 8152 TRANS(SBC, do_adc_sbc, a, true, false) 8153 TRANS(ADCS, do_adc_sbc, a, false, true) 8154 TRANS(SBCS, do_adc_sbc, a, true, true) 8155 8156 static bool trans_RMIF(DisasContext *s, arg_RMIF *a) 8157 { 8158 int mask = a->mask; 8159 TCGv_i64 tcg_rn; 8160 TCGv_i32 nzcv; 8161 8162 if (!dc_isar_feature(aa64_condm_4, s)) { 8163 return false; 8164 } 8165 8166 tcg_rn = read_cpu_reg(s, a->rn, 1); 8167 tcg_gen_rotri_i64(tcg_rn, tcg_rn, a->imm); 8168 8169 nzcv = tcg_temp_new_i32(); 8170 tcg_gen_extrl_i64_i32(nzcv, tcg_rn); 8171 8172 if (mask & 8) { /* N */ 8173 tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3); 8174 } 8175 if (mask & 4) { /* Z */ 8176 tcg_gen_not_i32(cpu_ZF, nzcv); 8177 tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4); 8178 } 8179 if (mask & 2) { /* C */ 8180 tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1); 8181 } 8182 if (mask & 1) { /* V */ 8183 tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0); 8184 } 8185 return true; 8186 } 8187 8188 static bool do_setf(DisasContext *s, int rn, int shift) 8189 { 8190 TCGv_i32 tmp = tcg_temp_new_i32(); 8191 8192 tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn)); 8193 tcg_gen_shli_i32(cpu_NF, tmp, shift); 8194 tcg_gen_shli_i32(cpu_VF, tmp, shift - 1); 8195 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 8196 tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF); 8197 return true; 8198 } 8199 8200 TRANS_FEAT(SETF8, aa64_condm_4, do_setf, a->rn, 24) 8201 TRANS_FEAT(SETF16, aa64_condm_4, do_setf, a->rn, 16) 8202 8203 /* CCMP, CCMN */ 8204 static bool trans_CCMP(DisasContext *s, arg_CCMP *a) 8205 { 8206 TCGv_i32 tcg_t0 = tcg_temp_new_i32(); 8207 TCGv_i32 tcg_t1 = tcg_temp_new_i32(); 8208 TCGv_i32 tcg_t2 = tcg_temp_new_i32(); 8209 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 8210 TCGv_i64 tcg_rn, tcg_y; 8211 DisasCompare c; 8212 unsigned nzcv; 8213 8214 /* Set T0 = !COND. */ 8215 arm_test_cc(&c, a->cond); 8216 tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0); 8217 8218 /* Load the arguments for the new comparison. */ 8219 if (a->imm) { 8220 tcg_y = tcg_constant_i64(a->y); 8221 } else { 8222 tcg_y = cpu_reg(s, a->y); 8223 } 8224 tcg_rn = cpu_reg(s, a->rn); 8225 8226 /* Set the flags for the new comparison. */ 8227 if (a->op) { 8228 gen_sub_CC(a->sf, tcg_tmp, tcg_rn, tcg_y); 8229 } else { 8230 gen_add_CC(a->sf, tcg_tmp, tcg_rn, tcg_y); 8231 } 8232 8233 /* 8234 * If COND was false, force the flags to #nzcv. Compute two masks 8235 * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0). 8236 * For tcg hosts that support ANDC, we can make do with just T1. 8237 * In either case, allow the tcg optimizer to delete any unused mask. 8238 */ 8239 tcg_gen_neg_i32(tcg_t1, tcg_t0); 8240 tcg_gen_subi_i32(tcg_t2, tcg_t0, 1); 8241 8242 nzcv = a->nzcv; 8243 if (nzcv & 8) { /* N */ 8244 tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1); 8245 } else { 8246 if (TCG_TARGET_HAS_andc_i32) { 8247 tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1); 8248 } else { 8249 tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2); 8250 } 8251 } 8252 if (nzcv & 4) { /* Z */ 8253 if (TCG_TARGET_HAS_andc_i32) { 8254 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1); 8255 } else { 8256 tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2); 8257 } 8258 } else { 8259 tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0); 8260 } 8261 if (nzcv & 2) { /* C */ 8262 tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0); 8263 } else { 8264 if (TCG_TARGET_HAS_andc_i32) { 8265 tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1); 8266 } else { 8267 tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2); 8268 } 8269 } 8270 if (nzcv & 1) { /* V */ 8271 tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1); 8272 } else { 8273 if (TCG_TARGET_HAS_andc_i32) { 8274 tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1); 8275 } else { 8276 tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2); 8277 } 8278 } 8279 return true; 8280 } 8281 8282 static bool trans_CSEL(DisasContext *s, arg_CSEL *a) 8283 { 8284 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8285 TCGv_i64 zero = tcg_constant_i64(0); 8286 DisasCompare64 c; 8287 8288 a64_test_cc(&c, a->cond); 8289 8290 if (a->rn == 31 && a->rm == 31 && (a->else_inc ^ a->else_inv)) { 8291 /* CSET & CSETM. */ 8292 if (a->else_inv) { 8293 tcg_gen_negsetcond_i64(tcg_invert_cond(c.cond), 8294 tcg_rd, c.value, zero); 8295 } else { 8296 tcg_gen_setcond_i64(tcg_invert_cond(c.cond), 8297 tcg_rd, c.value, zero); 8298 } 8299 } else { 8300 TCGv_i64 t_true = cpu_reg(s, a->rn); 8301 TCGv_i64 t_false = read_cpu_reg(s, a->rm, 1); 8302 8303 if (a->else_inv && a->else_inc) { 8304 tcg_gen_neg_i64(t_false, t_false); 8305 } else if (a->else_inv) { 8306 tcg_gen_not_i64(t_false, t_false); 8307 } else if (a->else_inc) { 8308 tcg_gen_addi_i64(t_false, t_false, 1); 8309 } 8310 tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false); 8311 } 8312 8313 if (!a->sf) { 8314 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 8315 } 8316 return true; 8317 } 8318 8319 typedef struct FPScalar1Int { 8320 void (*gen_h)(TCGv_i32, TCGv_i32); 8321 void (*gen_s)(TCGv_i32, TCGv_i32); 8322 void (*gen_d)(TCGv_i64, TCGv_i64); 8323 } FPScalar1Int; 8324 8325 static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a, 8326 const FPScalar1Int *f) 8327 { 8328 switch (a->esz) { 8329 case MO_64: 8330 if (fp_access_check(s)) { 8331 TCGv_i64 t = read_fp_dreg(s, a->rn); 8332 f->gen_d(t, t); 8333 write_fp_dreg(s, a->rd, t); 8334 } 8335 break; 8336 case MO_32: 8337 if (fp_access_check(s)) { 8338 TCGv_i32 t = read_fp_sreg(s, a->rn); 8339 f->gen_s(t, t); 8340 write_fp_sreg(s, a->rd, t); 8341 } 8342 break; 8343 case MO_16: 8344 if (!dc_isar_feature(aa64_fp16, s)) { 8345 return false; 8346 } 8347 if (fp_access_check(s)) { 8348 TCGv_i32 t = read_fp_hreg(s, a->rn); 8349 f->gen_h(t, t); 8350 write_fp_sreg(s, a->rd, t); 8351 } 8352 break; 8353 default: 8354 return false; 8355 } 8356 return true; 8357 } 8358 8359 static const FPScalar1Int f_scalar_fmov = { 8360 tcg_gen_mov_i32, 8361 tcg_gen_mov_i32, 8362 tcg_gen_mov_i64, 8363 }; 8364 TRANS(FMOV_s, do_fp1_scalar_int, a, &f_scalar_fmov) 8365 8366 static const FPScalar1Int f_scalar_fabs = { 8367 gen_vfp_absh, 8368 gen_vfp_abss, 8369 gen_vfp_absd, 8370 }; 8371 TRANS(FABS_s, do_fp1_scalar_int, a, &f_scalar_fabs) 8372 8373 static const FPScalar1Int f_scalar_fneg = { 8374 gen_vfp_negh, 8375 gen_vfp_negs, 8376 gen_vfp_negd, 8377 }; 8378 TRANS(FNEG_s, do_fp1_scalar_int, a, &f_scalar_fneg) 8379 8380 typedef struct FPScalar1 { 8381 void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_ptr); 8382 void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_ptr); 8383 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_ptr); 8384 } FPScalar1; 8385 8386 static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a, 8387 const FPScalar1 *f, int rmode) 8388 { 8389 TCGv_i32 tcg_rmode = NULL; 8390 TCGv_ptr fpst; 8391 TCGv_i64 t64; 8392 TCGv_i32 t32; 8393 int check = fp_access_check_scalar_hsd(s, a->esz); 8394 8395 if (check <= 0) { 8396 return check == 0; 8397 } 8398 8399 fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 8400 if (rmode >= 0) { 8401 tcg_rmode = gen_set_rmode(rmode, fpst); 8402 } 8403 8404 switch (a->esz) { 8405 case MO_64: 8406 t64 = read_fp_dreg(s, a->rn); 8407 f->gen_d(t64, t64, fpst); 8408 write_fp_dreg(s, a->rd, t64); 8409 break; 8410 case MO_32: 8411 t32 = read_fp_sreg(s, a->rn); 8412 f->gen_s(t32, t32, fpst); 8413 write_fp_sreg(s, a->rd, t32); 8414 break; 8415 case MO_16: 8416 t32 = read_fp_hreg(s, a->rn); 8417 f->gen_h(t32, t32, fpst); 8418 write_fp_sreg(s, a->rd, t32); 8419 break; 8420 default: 8421 g_assert_not_reached(); 8422 } 8423 8424 if (rmode >= 0) { 8425 gen_restore_rmode(tcg_rmode, fpst); 8426 } 8427 return true; 8428 } 8429 8430 static const FPScalar1 f_scalar_fsqrt = { 8431 gen_helper_vfp_sqrth, 8432 gen_helper_vfp_sqrts, 8433 gen_helper_vfp_sqrtd, 8434 }; 8435 TRANS(FSQRT_s, do_fp1_scalar, a, &f_scalar_fsqrt, -1) 8436 8437 static const FPScalar1 f_scalar_frint = { 8438 gen_helper_advsimd_rinth, 8439 gen_helper_rints, 8440 gen_helper_rintd, 8441 }; 8442 TRANS(FRINTN_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_TIEEVEN) 8443 TRANS(FRINTP_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_POSINF) 8444 TRANS(FRINTM_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_NEGINF) 8445 TRANS(FRINTZ_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_ZERO) 8446 TRANS(FRINTA_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_TIEAWAY) 8447 TRANS(FRINTI_s, do_fp1_scalar, a, &f_scalar_frint, -1) 8448 8449 static const FPScalar1 f_scalar_frintx = { 8450 gen_helper_advsimd_rinth_exact, 8451 gen_helper_rints_exact, 8452 gen_helper_rintd_exact, 8453 }; 8454 TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1) 8455 8456 static const FPScalar1 f_scalar_bfcvt = { 8457 .gen_s = gen_helper_bfcvt, 8458 }; 8459 TRANS_FEAT(BFCVT_s, aa64_bf16, do_fp1_scalar, a, &f_scalar_bfcvt, -1) 8460 8461 static const FPScalar1 f_scalar_frint32 = { 8462 NULL, 8463 gen_helper_frint32_s, 8464 gen_helper_frint32_d, 8465 }; 8466 TRANS_FEAT(FRINT32Z_s, aa64_frint, do_fp1_scalar, a, 8467 &f_scalar_frint32, FPROUNDING_ZERO) 8468 TRANS_FEAT(FRINT32X_s, aa64_frint, do_fp1_scalar, a, &f_scalar_frint32, -1) 8469 8470 static const FPScalar1 f_scalar_frint64 = { 8471 NULL, 8472 gen_helper_frint64_s, 8473 gen_helper_frint64_d, 8474 }; 8475 TRANS_FEAT(FRINT64Z_s, aa64_frint, do_fp1_scalar, a, 8476 &f_scalar_frint64, FPROUNDING_ZERO) 8477 TRANS_FEAT(FRINT64X_s, aa64_frint, do_fp1_scalar, a, &f_scalar_frint64, -1) 8478 8479 static const FPScalar1 f_scalar_frecpe = { 8480 gen_helper_recpe_f16, 8481 gen_helper_recpe_f32, 8482 gen_helper_recpe_f64, 8483 }; 8484 TRANS(FRECPE_s, do_fp1_scalar, a, &f_scalar_frecpe, -1) 8485 8486 static const FPScalar1 f_scalar_frecpx = { 8487 gen_helper_frecpx_f16, 8488 gen_helper_frecpx_f32, 8489 gen_helper_frecpx_f64, 8490 }; 8491 TRANS(FRECPX_s, do_fp1_scalar, a, &f_scalar_frecpx, -1) 8492 8493 static const FPScalar1 f_scalar_frsqrte = { 8494 gen_helper_rsqrte_f16, 8495 gen_helper_rsqrte_f32, 8496 gen_helper_rsqrte_f64, 8497 }; 8498 TRANS(FRSQRTE_s, do_fp1_scalar, a, &f_scalar_frsqrte, -1) 8499 8500 static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a) 8501 { 8502 if (fp_access_check(s)) { 8503 TCGv_i32 tcg_rn = read_fp_sreg(s, a->rn); 8504 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 8505 8506 gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, tcg_env); 8507 write_fp_dreg(s, a->rd, tcg_rd); 8508 } 8509 return true; 8510 } 8511 8512 static bool trans_FCVT_s_hs(DisasContext *s, arg_rr *a) 8513 { 8514 if (fp_access_check(s)) { 8515 TCGv_i32 tmp = read_fp_sreg(s, a->rn); 8516 TCGv_i32 ahp = get_ahp_flag(); 8517 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 8518 8519 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp); 8520 /* write_fp_sreg is OK here because top half of result is zero */ 8521 write_fp_sreg(s, a->rd, tmp); 8522 } 8523 return true; 8524 } 8525 8526 static bool trans_FCVT_s_sd(DisasContext *s, arg_rr *a) 8527 { 8528 if (fp_access_check(s)) { 8529 TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn); 8530 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 8531 8532 gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, tcg_env); 8533 write_fp_sreg(s, a->rd, tcg_rd); 8534 } 8535 return true; 8536 } 8537 8538 static bool trans_FCVT_s_hd(DisasContext *s, arg_rr *a) 8539 { 8540 if (fp_access_check(s)) { 8541 TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn); 8542 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 8543 TCGv_i32 ahp = get_ahp_flag(); 8544 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 8545 8546 gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp); 8547 /* write_fp_sreg is OK here because top half of tcg_rd is zero */ 8548 write_fp_sreg(s, a->rd, tcg_rd); 8549 } 8550 return true; 8551 } 8552 8553 static bool trans_FCVT_s_sh(DisasContext *s, arg_rr *a) 8554 { 8555 if (fp_access_check(s)) { 8556 TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn); 8557 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 8558 TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR); 8559 TCGv_i32 tcg_ahp = get_ahp_flag(); 8560 8561 gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); 8562 write_fp_sreg(s, a->rd, tcg_rd); 8563 } 8564 return true; 8565 } 8566 8567 static bool trans_FCVT_s_dh(DisasContext *s, arg_rr *a) 8568 { 8569 if (fp_access_check(s)) { 8570 TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn); 8571 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 8572 TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR); 8573 TCGv_i32 tcg_ahp = get_ahp_flag(); 8574 8575 gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); 8576 write_fp_dreg(s, a->rd, tcg_rd); 8577 } 8578 return true; 8579 } 8580 8581 static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift, 8582 TCGv_i64 tcg_int, bool is_signed) 8583 { 8584 TCGv_ptr tcg_fpstatus; 8585 TCGv_i32 tcg_shift, tcg_single; 8586 TCGv_i64 tcg_double; 8587 8588 tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 8589 tcg_shift = tcg_constant_i32(shift); 8590 8591 switch (esz) { 8592 case MO_64: 8593 tcg_double = tcg_temp_new_i64(); 8594 if (is_signed) { 8595 gen_helper_vfp_sqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus); 8596 } else { 8597 gen_helper_vfp_uqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus); 8598 } 8599 write_fp_dreg(s, rd, tcg_double); 8600 break; 8601 8602 case MO_32: 8603 tcg_single = tcg_temp_new_i32(); 8604 if (is_signed) { 8605 gen_helper_vfp_sqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); 8606 } else { 8607 gen_helper_vfp_uqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); 8608 } 8609 write_fp_sreg(s, rd, tcg_single); 8610 break; 8611 8612 case MO_16: 8613 tcg_single = tcg_temp_new_i32(); 8614 if (is_signed) { 8615 gen_helper_vfp_sqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); 8616 } else { 8617 gen_helper_vfp_uqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); 8618 } 8619 write_fp_sreg(s, rd, tcg_single); 8620 break; 8621 8622 default: 8623 g_assert_not_reached(); 8624 } 8625 return true; 8626 } 8627 8628 static bool do_cvtf_g(DisasContext *s, arg_fcvt *a, bool is_signed) 8629 { 8630 TCGv_i64 tcg_int; 8631 int check = fp_access_check_scalar_hsd(s, a->esz); 8632 8633 if (check <= 0) { 8634 return check == 0; 8635 } 8636 8637 if (a->sf) { 8638 tcg_int = cpu_reg(s, a->rn); 8639 } else { 8640 tcg_int = read_cpu_reg(s, a->rn, true); 8641 if (is_signed) { 8642 tcg_gen_ext32s_i64(tcg_int, tcg_int); 8643 } else { 8644 tcg_gen_ext32u_i64(tcg_int, tcg_int); 8645 } 8646 } 8647 return do_cvtf_scalar(s, a->esz, a->rd, a->shift, tcg_int, is_signed); 8648 } 8649 8650 TRANS(SCVTF_g, do_cvtf_g, a, true) 8651 TRANS(UCVTF_g, do_cvtf_g, a, false) 8652 8653 /* 8654 * [US]CVTF (vector), scalar version. 8655 * Which sounds weird, but really just means input from fp register 8656 * instead of input from general register. Input and output element 8657 * size are always equal. 8658 */ 8659 static bool do_cvtf_f(DisasContext *s, arg_fcvt *a, bool is_signed) 8660 { 8661 TCGv_i64 tcg_int; 8662 int check = fp_access_check_scalar_hsd(s, a->esz); 8663 8664 if (check <= 0) { 8665 return check == 0; 8666 } 8667 8668 tcg_int = tcg_temp_new_i64(); 8669 read_vec_element(s, tcg_int, a->rn, 0, a->esz | (is_signed ? MO_SIGN : 0)); 8670 return do_cvtf_scalar(s, a->esz, a->rd, a->shift, tcg_int, is_signed); 8671 } 8672 8673 TRANS(SCVTF_f, do_cvtf_f, a, true) 8674 TRANS(UCVTF_f, do_cvtf_f, a, false) 8675 8676 static void do_fcvt_scalar(DisasContext *s, MemOp out, MemOp esz, 8677 TCGv_i64 tcg_out, int shift, int rn, 8678 ARMFPRounding rmode) 8679 { 8680 TCGv_ptr tcg_fpstatus; 8681 TCGv_i32 tcg_shift, tcg_rmode, tcg_single; 8682 8683 tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 8684 tcg_shift = tcg_constant_i32(shift); 8685 tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); 8686 8687 switch (esz) { 8688 case MO_64: 8689 read_vec_element(s, tcg_out, rn, 0, MO_64); 8690 switch (out) { 8691 case MO_64 | MO_SIGN: 8692 gen_helper_vfp_tosqd(tcg_out, tcg_out, tcg_shift, tcg_fpstatus); 8693 break; 8694 case MO_64: 8695 gen_helper_vfp_touqd(tcg_out, tcg_out, tcg_shift, tcg_fpstatus); 8696 break; 8697 case MO_32 | MO_SIGN: 8698 gen_helper_vfp_tosld(tcg_out, tcg_out, tcg_shift, tcg_fpstatus); 8699 break; 8700 case MO_32: 8701 gen_helper_vfp_tould(tcg_out, tcg_out, tcg_shift, tcg_fpstatus); 8702 break; 8703 default: 8704 g_assert_not_reached(); 8705 } 8706 break; 8707 8708 case MO_32: 8709 tcg_single = read_fp_sreg(s, rn); 8710 switch (out) { 8711 case MO_64 | MO_SIGN: 8712 gen_helper_vfp_tosqs(tcg_out, tcg_single, tcg_shift, tcg_fpstatus); 8713 break; 8714 case MO_64: 8715 gen_helper_vfp_touqs(tcg_out, tcg_single, tcg_shift, tcg_fpstatus); 8716 break; 8717 case MO_32 | MO_SIGN: 8718 gen_helper_vfp_tosls(tcg_single, tcg_single, 8719 tcg_shift, tcg_fpstatus); 8720 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 8721 break; 8722 case MO_32: 8723 gen_helper_vfp_touls(tcg_single, tcg_single, 8724 tcg_shift, tcg_fpstatus); 8725 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 8726 break; 8727 default: 8728 g_assert_not_reached(); 8729 } 8730 break; 8731 8732 case MO_16: 8733 tcg_single = read_fp_hreg(s, rn); 8734 switch (out) { 8735 case MO_64 | MO_SIGN: 8736 gen_helper_vfp_tosqh(tcg_out, tcg_single, tcg_shift, tcg_fpstatus); 8737 break; 8738 case MO_64: 8739 gen_helper_vfp_touqh(tcg_out, tcg_single, tcg_shift, tcg_fpstatus); 8740 break; 8741 case MO_32 | MO_SIGN: 8742 gen_helper_vfp_toslh(tcg_single, tcg_single, 8743 tcg_shift, tcg_fpstatus); 8744 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 8745 break; 8746 case MO_32: 8747 gen_helper_vfp_toulh(tcg_single, tcg_single, 8748 tcg_shift, tcg_fpstatus); 8749 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 8750 break; 8751 case MO_16 | MO_SIGN: 8752 gen_helper_vfp_toshh(tcg_single, tcg_single, 8753 tcg_shift, tcg_fpstatus); 8754 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 8755 break; 8756 case MO_16: 8757 gen_helper_vfp_touhh(tcg_single, tcg_single, 8758 tcg_shift, tcg_fpstatus); 8759 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 8760 break; 8761 default: 8762 g_assert_not_reached(); 8763 } 8764 break; 8765 8766 default: 8767 g_assert_not_reached(); 8768 } 8769 8770 gen_restore_rmode(tcg_rmode, tcg_fpstatus); 8771 } 8772 8773 static bool do_fcvt_g(DisasContext *s, arg_fcvt *a, 8774 ARMFPRounding rmode, bool is_signed) 8775 { 8776 TCGv_i64 tcg_int; 8777 int check = fp_access_check_scalar_hsd(s, a->esz); 8778 8779 if (check <= 0) { 8780 return check == 0; 8781 } 8782 8783 tcg_int = cpu_reg(s, a->rd); 8784 do_fcvt_scalar(s, (a->sf ? MO_64 : MO_32) | (is_signed ? MO_SIGN : 0), 8785 a->esz, tcg_int, a->shift, a->rn, rmode); 8786 8787 if (!a->sf) { 8788 tcg_gen_ext32u_i64(tcg_int, tcg_int); 8789 } 8790 return true; 8791 } 8792 8793 TRANS(FCVTNS_g, do_fcvt_g, a, FPROUNDING_TIEEVEN, true) 8794 TRANS(FCVTNU_g, do_fcvt_g, a, FPROUNDING_TIEEVEN, false) 8795 TRANS(FCVTPS_g, do_fcvt_g, a, FPROUNDING_POSINF, true) 8796 TRANS(FCVTPU_g, do_fcvt_g, a, FPROUNDING_POSINF, false) 8797 TRANS(FCVTMS_g, do_fcvt_g, a, FPROUNDING_NEGINF, true) 8798 TRANS(FCVTMU_g, do_fcvt_g, a, FPROUNDING_NEGINF, false) 8799 TRANS(FCVTZS_g, do_fcvt_g, a, FPROUNDING_ZERO, true) 8800 TRANS(FCVTZU_g, do_fcvt_g, a, FPROUNDING_ZERO, false) 8801 TRANS(FCVTAS_g, do_fcvt_g, a, FPROUNDING_TIEAWAY, true) 8802 TRANS(FCVTAU_g, do_fcvt_g, a, FPROUNDING_TIEAWAY, false) 8803 8804 /* 8805 * FCVT* (vector), scalar version. 8806 * Which sounds weird, but really just means output to fp register 8807 * instead of output to general register. Input and output element 8808 * size are always equal. 8809 */ 8810 static bool do_fcvt_f(DisasContext *s, arg_fcvt *a, 8811 ARMFPRounding rmode, bool is_signed) 8812 { 8813 TCGv_i64 tcg_int; 8814 int check = fp_access_check_scalar_hsd(s, a->esz); 8815 8816 if (check <= 0) { 8817 return check == 0; 8818 } 8819 8820 tcg_int = tcg_temp_new_i64(); 8821 do_fcvt_scalar(s, a->esz | (is_signed ? MO_SIGN : 0), 8822 a->esz, tcg_int, a->shift, a->rn, rmode); 8823 8824 clear_vec(s, a->rd); 8825 write_vec_element(s, tcg_int, a->rd, 0, a->esz); 8826 return true; 8827 } 8828 8829 TRANS(FCVTNS_f, do_fcvt_f, a, FPROUNDING_TIEEVEN, true) 8830 TRANS(FCVTNU_f, do_fcvt_f, a, FPROUNDING_TIEEVEN, false) 8831 TRANS(FCVTPS_f, do_fcvt_f, a, FPROUNDING_POSINF, true) 8832 TRANS(FCVTPU_f, do_fcvt_f, a, FPROUNDING_POSINF, false) 8833 TRANS(FCVTMS_f, do_fcvt_f, a, FPROUNDING_NEGINF, true) 8834 TRANS(FCVTMU_f, do_fcvt_f, a, FPROUNDING_NEGINF, false) 8835 TRANS(FCVTZS_f, do_fcvt_f, a, FPROUNDING_ZERO, true) 8836 TRANS(FCVTZU_f, do_fcvt_f, a, FPROUNDING_ZERO, false) 8837 TRANS(FCVTAS_f, do_fcvt_f, a, FPROUNDING_TIEAWAY, true) 8838 TRANS(FCVTAU_f, do_fcvt_f, a, FPROUNDING_TIEAWAY, false) 8839 8840 static bool trans_FJCVTZS(DisasContext *s, arg_FJCVTZS *a) 8841 { 8842 if (!dc_isar_feature(aa64_jscvt, s)) { 8843 return false; 8844 } 8845 if (fp_access_check(s)) { 8846 TCGv_i64 t = read_fp_dreg(s, a->rn); 8847 TCGv_ptr fpstatus = fpstatus_ptr(FPST_FPCR); 8848 8849 gen_helper_fjcvtzs(t, t, fpstatus); 8850 8851 tcg_gen_ext32u_i64(cpu_reg(s, a->rd), t); 8852 tcg_gen_extrh_i64_i32(cpu_ZF, t); 8853 tcg_gen_movi_i32(cpu_CF, 0); 8854 tcg_gen_movi_i32(cpu_NF, 0); 8855 tcg_gen_movi_i32(cpu_VF, 0); 8856 } 8857 return true; 8858 } 8859 8860 static bool trans_FMOV_hx(DisasContext *s, arg_rr *a) 8861 { 8862 if (!dc_isar_feature(aa64_fp16, s)) { 8863 return false; 8864 } 8865 if (fp_access_check(s)) { 8866 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 8867 TCGv_i64 tmp = tcg_temp_new_i64(); 8868 tcg_gen_ext16u_i64(tmp, tcg_rn); 8869 write_fp_dreg(s, a->rd, tmp); 8870 } 8871 return true; 8872 } 8873 8874 static bool trans_FMOV_sw(DisasContext *s, arg_rr *a) 8875 { 8876 if (fp_access_check(s)) { 8877 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 8878 TCGv_i64 tmp = tcg_temp_new_i64(); 8879 tcg_gen_ext32u_i64(tmp, tcg_rn); 8880 write_fp_dreg(s, a->rd, tmp); 8881 } 8882 return true; 8883 } 8884 8885 static bool trans_FMOV_dx(DisasContext *s, arg_rr *a) 8886 { 8887 if (fp_access_check(s)) { 8888 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 8889 write_fp_dreg(s, a->rd, tcg_rn); 8890 } 8891 return true; 8892 } 8893 8894 static bool trans_FMOV_ux(DisasContext *s, arg_rr *a) 8895 { 8896 if (fp_access_check(s)) { 8897 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 8898 tcg_gen_st_i64(tcg_rn, tcg_env, fp_reg_hi_offset(s, a->rd)); 8899 clear_vec_high(s, true, a->rd); 8900 } 8901 return true; 8902 } 8903 8904 static bool trans_FMOV_xh(DisasContext *s, arg_rr *a) 8905 { 8906 if (!dc_isar_feature(aa64_fp16, s)) { 8907 return false; 8908 } 8909 if (fp_access_check(s)) { 8910 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8911 tcg_gen_ld16u_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_16)); 8912 } 8913 return true; 8914 } 8915 8916 static bool trans_FMOV_ws(DisasContext *s, arg_rr *a) 8917 { 8918 if (fp_access_check(s)) { 8919 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8920 tcg_gen_ld32u_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_32)); 8921 } 8922 return true; 8923 } 8924 8925 static bool trans_FMOV_xd(DisasContext *s, arg_rr *a) 8926 { 8927 if (fp_access_check(s)) { 8928 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8929 tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_64)); 8930 } 8931 return true; 8932 } 8933 8934 static bool trans_FMOV_xu(DisasContext *s, arg_rr *a) 8935 { 8936 if (fp_access_check(s)) { 8937 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8938 tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_hi_offset(s, a->rn)); 8939 } 8940 return true; 8941 } 8942 8943 typedef struct ENVScalar1 { 8944 NeonGenOneOpEnvFn *gen_bhs[3]; 8945 NeonGenOne64OpEnvFn *gen_d; 8946 } ENVScalar1; 8947 8948 static bool do_env_scalar1(DisasContext *s, arg_rr_e *a, const ENVScalar1 *f) 8949 { 8950 if (!fp_access_check(s)) { 8951 return true; 8952 } 8953 if (a->esz == MO_64) { 8954 TCGv_i64 t = read_fp_dreg(s, a->rn); 8955 f->gen_d(t, tcg_env, t); 8956 write_fp_dreg(s, a->rd, t); 8957 } else { 8958 TCGv_i32 t = tcg_temp_new_i32(); 8959 8960 read_vec_element_i32(s, t, a->rn, 0, a->esz); 8961 f->gen_bhs[a->esz](t, tcg_env, t); 8962 write_fp_sreg(s, a->rd, t); 8963 } 8964 return true; 8965 } 8966 8967 static bool do_env_vector1(DisasContext *s, arg_qrr_e *a, const ENVScalar1 *f) 8968 { 8969 if (a->esz == MO_64 && !a->q) { 8970 return false; 8971 } 8972 if (!fp_access_check(s)) { 8973 return true; 8974 } 8975 if (a->esz == MO_64) { 8976 TCGv_i64 t = tcg_temp_new_i64(); 8977 8978 for (int i = 0; i < 2; ++i) { 8979 read_vec_element(s, t, a->rn, i, MO_64); 8980 f->gen_d(t, tcg_env, t); 8981 write_vec_element(s, t, a->rd, i, MO_64); 8982 } 8983 } else { 8984 TCGv_i32 t = tcg_temp_new_i32(); 8985 int n = (a->q ? 16 : 8) >> a->esz; 8986 8987 for (int i = 0; i < n; ++i) { 8988 read_vec_element_i32(s, t, a->rn, i, a->esz); 8989 f->gen_bhs[a->esz](t, tcg_env, t); 8990 write_vec_element_i32(s, t, a->rd, i, a->esz); 8991 } 8992 } 8993 clear_vec_high(s, a->q, a->rd); 8994 return true; 8995 } 8996 8997 static const ENVScalar1 f_scalar_sqabs = { 8998 { gen_helper_neon_qabs_s8, 8999 gen_helper_neon_qabs_s16, 9000 gen_helper_neon_qabs_s32 }, 9001 gen_helper_neon_qabs_s64, 9002 }; 9003 TRANS(SQABS_s, do_env_scalar1, a, &f_scalar_sqabs) 9004 TRANS(SQABS_v, do_env_vector1, a, &f_scalar_sqabs) 9005 9006 static const ENVScalar1 f_scalar_sqneg = { 9007 { gen_helper_neon_qneg_s8, 9008 gen_helper_neon_qneg_s16, 9009 gen_helper_neon_qneg_s32 }, 9010 gen_helper_neon_qneg_s64, 9011 }; 9012 TRANS(SQNEG_s, do_env_scalar1, a, &f_scalar_sqneg) 9013 TRANS(SQNEG_v, do_env_vector1, a, &f_scalar_sqneg) 9014 9015 static bool do_scalar1_d(DisasContext *s, arg_rr *a, ArithOneOp *f) 9016 { 9017 if (fp_access_check(s)) { 9018 TCGv_i64 t = read_fp_dreg(s, a->rn); 9019 f(t, t); 9020 write_fp_dreg(s, a->rd, t); 9021 } 9022 return true; 9023 } 9024 9025 TRANS(ABS_s, do_scalar1_d, a, tcg_gen_abs_i64) 9026 TRANS(NEG_s, do_scalar1_d, a, tcg_gen_neg_i64) 9027 9028 static bool do_cmop0_d(DisasContext *s, arg_rr *a, TCGCond cond) 9029 { 9030 if (fp_access_check(s)) { 9031 TCGv_i64 t = read_fp_dreg(s, a->rn); 9032 tcg_gen_negsetcond_i64(cond, t, t, tcg_constant_i64(0)); 9033 write_fp_dreg(s, a->rd, t); 9034 } 9035 return true; 9036 } 9037 9038 TRANS(CMGT0_s, do_cmop0_d, a, TCG_COND_GT) 9039 TRANS(CMGE0_s, do_cmop0_d, a, TCG_COND_GE) 9040 TRANS(CMLE0_s, do_cmop0_d, a, TCG_COND_LE) 9041 TRANS(CMLT0_s, do_cmop0_d, a, TCG_COND_LT) 9042 TRANS(CMEQ0_s, do_cmop0_d, a, TCG_COND_EQ) 9043 9044 static bool do_2misc_narrow_scalar(DisasContext *s, arg_rr_e *a, 9045 ArithOneOp * const fn[3]) 9046 { 9047 if (a->esz == MO_64) { 9048 return false; 9049 } 9050 if (fp_access_check(s)) { 9051 TCGv_i64 t = tcg_temp_new_i64(); 9052 9053 read_vec_element(s, t, a->rn, 0, a->esz + 1); 9054 fn[a->esz](t, t); 9055 clear_vec(s, a->rd); 9056 write_vec_element(s, t, a->rd, 0, a->esz); 9057 } 9058 return true; 9059 } 9060 9061 #define WRAP_ENV(NAME) \ 9062 static void gen_##NAME(TCGv_i64 d, TCGv_i64 n) \ 9063 { gen_helper_##NAME(d, tcg_env, n); } 9064 9065 WRAP_ENV(neon_unarrow_sat8) 9066 WRAP_ENV(neon_unarrow_sat16) 9067 WRAP_ENV(neon_unarrow_sat32) 9068 9069 static ArithOneOp * const f_scalar_sqxtun[] = { 9070 gen_neon_unarrow_sat8, 9071 gen_neon_unarrow_sat16, 9072 gen_neon_unarrow_sat32, 9073 }; 9074 TRANS(SQXTUN_s, do_2misc_narrow_scalar, a, f_scalar_sqxtun) 9075 9076 WRAP_ENV(neon_narrow_sat_s8) 9077 WRAP_ENV(neon_narrow_sat_s16) 9078 WRAP_ENV(neon_narrow_sat_s32) 9079 9080 static ArithOneOp * const f_scalar_sqxtn[] = { 9081 gen_neon_narrow_sat_s8, 9082 gen_neon_narrow_sat_s16, 9083 gen_neon_narrow_sat_s32, 9084 }; 9085 TRANS(SQXTN_s, do_2misc_narrow_scalar, a, f_scalar_sqxtn) 9086 9087 WRAP_ENV(neon_narrow_sat_u8) 9088 WRAP_ENV(neon_narrow_sat_u16) 9089 WRAP_ENV(neon_narrow_sat_u32) 9090 9091 static ArithOneOp * const f_scalar_uqxtn[] = { 9092 gen_neon_narrow_sat_u8, 9093 gen_neon_narrow_sat_u16, 9094 gen_neon_narrow_sat_u32, 9095 }; 9096 TRANS(UQXTN_s, do_2misc_narrow_scalar, a, f_scalar_uqxtn) 9097 9098 static void gen_fcvtxn_sd(TCGv_i64 d, TCGv_i64 n) 9099 { 9100 /* 9101 * 64 bit to 32 bit float conversion 9102 * with von Neumann rounding (round to odd) 9103 */ 9104 TCGv_i32 tmp = tcg_temp_new_i32(); 9105 gen_helper_fcvtx_f64_to_f32(tmp, n, tcg_env); 9106 tcg_gen_extu_i32_i64(d, tmp); 9107 } 9108 9109 static ArithOneOp * const f_scalar_fcvtxn[] = { 9110 NULL, 9111 NULL, 9112 gen_fcvtxn_sd, 9113 }; 9114 TRANS(FCVTXN_s, do_2misc_narrow_scalar, a, f_scalar_fcvtxn) 9115 9116 #undef WRAP_ENV 9117 9118 static bool do_gvec_fn2(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn) 9119 { 9120 if (!a->q && a->esz == MO_64) { 9121 return false; 9122 } 9123 if (fp_access_check(s)) { 9124 gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz); 9125 } 9126 return true; 9127 } 9128 9129 TRANS(ABS_v, do_gvec_fn2, a, tcg_gen_gvec_abs) 9130 TRANS(NEG_v, do_gvec_fn2, a, tcg_gen_gvec_neg) 9131 TRANS(NOT_v, do_gvec_fn2, a, tcg_gen_gvec_not) 9132 TRANS(CNT_v, do_gvec_fn2, a, gen_gvec_cnt) 9133 TRANS(RBIT_v, do_gvec_fn2, a, gen_gvec_rbit) 9134 TRANS(CMGT0_v, do_gvec_fn2, a, gen_gvec_cgt0) 9135 TRANS(CMGE0_v, do_gvec_fn2, a, gen_gvec_cge0) 9136 TRANS(CMLT0_v, do_gvec_fn2, a, gen_gvec_clt0) 9137 TRANS(CMLE0_v, do_gvec_fn2, a, gen_gvec_cle0) 9138 TRANS(CMEQ0_v, do_gvec_fn2, a, gen_gvec_ceq0) 9139 TRANS(REV16_v, do_gvec_fn2, a, gen_gvec_rev16) 9140 TRANS(REV32_v, do_gvec_fn2, a, gen_gvec_rev32) 9141 TRANS(URECPE_v, do_gvec_fn2, a, gen_gvec_urecpe) 9142 TRANS(URSQRTE_v, do_gvec_fn2, a, gen_gvec_ursqrte) 9143 9144 static bool do_gvec_fn2_bhs(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn) 9145 { 9146 if (a->esz == MO_64) { 9147 return false; 9148 } 9149 if (fp_access_check(s)) { 9150 gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz); 9151 } 9152 return true; 9153 } 9154 9155 TRANS(CLS_v, do_gvec_fn2_bhs, a, gen_gvec_cls) 9156 TRANS(CLZ_v, do_gvec_fn2_bhs, a, gen_gvec_clz) 9157 TRANS(REV64_v, do_gvec_fn2_bhs, a, gen_gvec_rev64) 9158 TRANS(SADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_saddlp) 9159 TRANS(UADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_uaddlp) 9160 TRANS(SADALP_v, do_gvec_fn2_bhs, a, gen_gvec_sadalp) 9161 TRANS(UADALP_v, do_gvec_fn2_bhs, a, gen_gvec_uadalp) 9162 9163 static bool do_2misc_narrow_vector(DisasContext *s, arg_qrr_e *a, 9164 ArithOneOp * const fn[3]) 9165 { 9166 if (a->esz == MO_64) { 9167 return false; 9168 } 9169 if (fp_access_check(s)) { 9170 TCGv_i64 t0 = tcg_temp_new_i64(); 9171 TCGv_i64 t1 = tcg_temp_new_i64(); 9172 9173 read_vec_element(s, t0, a->rn, 0, MO_64); 9174 read_vec_element(s, t1, a->rn, 1, MO_64); 9175 fn[a->esz](t0, t0); 9176 fn[a->esz](t1, t1); 9177 write_vec_element(s, t0, a->rd, a->q ? 2 : 0, MO_32); 9178 write_vec_element(s, t1, a->rd, a->q ? 3 : 1, MO_32); 9179 clear_vec_high(s, a->q, a->rd); 9180 } 9181 return true; 9182 } 9183 9184 static ArithOneOp * const f_scalar_xtn[] = { 9185 gen_helper_neon_narrow_u8, 9186 gen_helper_neon_narrow_u16, 9187 tcg_gen_ext32u_i64, 9188 }; 9189 TRANS(XTN, do_2misc_narrow_vector, a, f_scalar_xtn) 9190 TRANS(SQXTUN_v, do_2misc_narrow_vector, a, f_scalar_sqxtun) 9191 TRANS(SQXTN_v, do_2misc_narrow_vector, a, f_scalar_sqxtn) 9192 TRANS(UQXTN_v, do_2misc_narrow_vector, a, f_scalar_uqxtn) 9193 9194 static void gen_fcvtn_hs(TCGv_i64 d, TCGv_i64 n) 9195 { 9196 TCGv_i32 tcg_lo = tcg_temp_new_i32(); 9197 TCGv_i32 tcg_hi = tcg_temp_new_i32(); 9198 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 9199 TCGv_i32 ahp = get_ahp_flag(); 9200 9201 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, n); 9202 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp); 9203 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp); 9204 tcg_gen_deposit_i32(tcg_lo, tcg_lo, tcg_hi, 16, 16); 9205 tcg_gen_extu_i32_i64(d, tcg_lo); 9206 } 9207 9208 static void gen_fcvtn_sd(TCGv_i64 d, TCGv_i64 n) 9209 { 9210 TCGv_i32 tmp = tcg_temp_new_i32(); 9211 gen_helper_vfp_fcvtsd(tmp, n, tcg_env); 9212 tcg_gen_extu_i32_i64(d, tmp); 9213 } 9214 9215 static ArithOneOp * const f_vector_fcvtn[] = { 9216 NULL, 9217 gen_fcvtn_hs, 9218 gen_fcvtn_sd, 9219 }; 9220 TRANS(FCVTN_v, do_2misc_narrow_vector, a, f_vector_fcvtn) 9221 TRANS(FCVTXN_v, do_2misc_narrow_vector, a, f_scalar_fcvtxn) 9222 9223 static void gen_bfcvtn_hs(TCGv_i64 d, TCGv_i64 n) 9224 { 9225 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 9226 TCGv_i32 tmp = tcg_temp_new_i32(); 9227 gen_helper_bfcvt_pair(tmp, n, fpst); 9228 tcg_gen_extu_i32_i64(d, tmp); 9229 } 9230 9231 static ArithOneOp * const f_vector_bfcvtn[] = { 9232 NULL, 9233 gen_bfcvtn_hs, 9234 NULL, 9235 }; 9236 TRANS_FEAT(BFCVTN_v, aa64_bf16, do_2misc_narrow_vector, a, f_vector_bfcvtn) 9237 9238 static bool trans_SHLL_v(DisasContext *s, arg_qrr_e *a) 9239 { 9240 static NeonGenWidenFn * const widenfns[3] = { 9241 gen_helper_neon_widen_u8, 9242 gen_helper_neon_widen_u16, 9243 tcg_gen_extu_i32_i64, 9244 }; 9245 NeonGenWidenFn *widenfn; 9246 TCGv_i64 tcg_res[2]; 9247 TCGv_i32 tcg_op; 9248 int part, pass; 9249 9250 if (a->esz == MO_64) { 9251 return false; 9252 } 9253 if (!fp_access_check(s)) { 9254 return true; 9255 } 9256 9257 tcg_op = tcg_temp_new_i32(); 9258 widenfn = widenfns[a->esz]; 9259 part = a->q ? 2 : 0; 9260 9261 for (pass = 0; pass < 2; pass++) { 9262 read_vec_element_i32(s, tcg_op, a->rn, part + pass, MO_32); 9263 tcg_res[pass] = tcg_temp_new_i64(); 9264 widenfn(tcg_res[pass], tcg_op); 9265 tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << a->esz); 9266 } 9267 9268 for (pass = 0; pass < 2; pass++) { 9269 write_vec_element(s, tcg_res[pass], a->rd, pass, MO_64); 9270 } 9271 return true; 9272 } 9273 9274 static bool do_fabs_fneg_v(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn) 9275 { 9276 int check = fp_access_check_vector_hsd(s, a->q, a->esz); 9277 9278 if (check <= 0) { 9279 return check == 0; 9280 } 9281 9282 gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz); 9283 return true; 9284 } 9285 9286 TRANS(FABS_v, do_fabs_fneg_v, a, gen_gvec_fabs) 9287 TRANS(FNEG_v, do_fabs_fneg_v, a, gen_gvec_fneg) 9288 9289 static bool do_fp1_vector(DisasContext *s, arg_qrr_e *a, 9290 const FPScalar1 *f, int rmode) 9291 { 9292 TCGv_i32 tcg_rmode = NULL; 9293 TCGv_ptr fpst; 9294 int check = fp_access_check_vector_hsd(s, a->q, a->esz); 9295 9296 if (check <= 0) { 9297 return check == 0; 9298 } 9299 9300 fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 9301 if (rmode >= 0) { 9302 tcg_rmode = gen_set_rmode(rmode, fpst); 9303 } 9304 9305 if (a->esz == MO_64) { 9306 TCGv_i64 t64 = tcg_temp_new_i64(); 9307 9308 for (int pass = 0; pass < 2; ++pass) { 9309 read_vec_element(s, t64, a->rn, pass, MO_64); 9310 f->gen_d(t64, t64, fpst); 9311 write_vec_element(s, t64, a->rd, pass, MO_64); 9312 } 9313 } else { 9314 TCGv_i32 t32 = tcg_temp_new_i32(); 9315 void (*gen)(TCGv_i32, TCGv_i32, TCGv_ptr) 9316 = (a->esz == MO_16 ? f->gen_h : f->gen_s); 9317 9318 for (int pass = 0, n = (a->q ? 16 : 8) >> a->esz; pass < n; ++pass) { 9319 read_vec_element_i32(s, t32, a->rn, pass, a->esz); 9320 gen(t32, t32, fpst); 9321 write_vec_element_i32(s, t32, a->rd, pass, a->esz); 9322 } 9323 } 9324 clear_vec_high(s, a->q, a->rd); 9325 9326 if (rmode >= 0) { 9327 gen_restore_rmode(tcg_rmode, fpst); 9328 } 9329 return true; 9330 } 9331 9332 TRANS(FSQRT_v, do_fp1_vector, a, &f_scalar_fsqrt, -1) 9333 9334 TRANS(FRINTN_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_TIEEVEN) 9335 TRANS(FRINTP_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_POSINF) 9336 TRANS(FRINTM_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_NEGINF) 9337 TRANS(FRINTZ_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_ZERO) 9338 TRANS(FRINTA_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_TIEAWAY) 9339 TRANS(FRINTI_v, do_fp1_vector, a, &f_scalar_frint, -1) 9340 TRANS(FRINTX_v, do_fp1_vector, a, &f_scalar_frintx, -1) 9341 9342 TRANS_FEAT(FRINT32Z_v, aa64_frint, do_fp1_vector, a, 9343 &f_scalar_frint32, FPROUNDING_ZERO) 9344 TRANS_FEAT(FRINT32X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint32, -1) 9345 TRANS_FEAT(FRINT64Z_v, aa64_frint, do_fp1_vector, a, 9346 &f_scalar_frint64, FPROUNDING_ZERO) 9347 TRANS_FEAT(FRINT64X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint64, -1) 9348 9349 static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q, 9350 int rd, int rn, int data, 9351 gen_helper_gvec_2_ptr * const fns[3]) 9352 { 9353 int check = fp_access_check_vector_hsd(s, is_q, esz); 9354 TCGv_ptr fpst; 9355 9356 if (check <= 0) { 9357 return check == 0; 9358 } 9359 9360 fpst = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 9361 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd), 9362 vec_full_reg_offset(s, rn), fpst, 9363 is_q ? 16 : 8, vec_full_reg_size(s), 9364 data, fns[esz - 1]); 9365 return true; 9366 } 9367 9368 static gen_helper_gvec_2_ptr * const f_scvtf_v[] = { 9369 gen_helper_gvec_vcvt_sh, 9370 gen_helper_gvec_vcvt_sf, 9371 gen_helper_gvec_vcvt_sd, 9372 }; 9373 TRANS(SCVTF_vi, do_gvec_op2_fpst, 9374 a->esz, a->q, a->rd, a->rn, 0, f_scvtf_v) 9375 TRANS(SCVTF_vf, do_gvec_op2_fpst, 9376 a->esz, a->q, a->rd, a->rn, a->shift, f_scvtf_v) 9377 9378 static gen_helper_gvec_2_ptr * const f_ucvtf_v[] = { 9379 gen_helper_gvec_vcvt_uh, 9380 gen_helper_gvec_vcvt_uf, 9381 gen_helper_gvec_vcvt_ud, 9382 }; 9383 TRANS(UCVTF_vi, do_gvec_op2_fpst, 9384 a->esz, a->q, a->rd, a->rn, 0, f_ucvtf_v) 9385 TRANS(UCVTF_vf, do_gvec_op2_fpst, 9386 a->esz, a->q, a->rd, a->rn, a->shift, f_ucvtf_v) 9387 9388 static gen_helper_gvec_2_ptr * const f_fcvtzs_vf[] = { 9389 gen_helper_gvec_vcvt_rz_hs, 9390 gen_helper_gvec_vcvt_rz_fs, 9391 gen_helper_gvec_vcvt_rz_ds, 9392 }; 9393 TRANS(FCVTZS_vf, do_gvec_op2_fpst, 9394 a->esz, a->q, a->rd, a->rn, a->shift, f_fcvtzs_vf) 9395 9396 static gen_helper_gvec_2_ptr * const f_fcvtzu_vf[] = { 9397 gen_helper_gvec_vcvt_rz_hu, 9398 gen_helper_gvec_vcvt_rz_fu, 9399 gen_helper_gvec_vcvt_rz_du, 9400 }; 9401 TRANS(FCVTZU_vf, do_gvec_op2_fpst, 9402 a->esz, a->q, a->rd, a->rn, a->shift, f_fcvtzu_vf) 9403 9404 static gen_helper_gvec_2_ptr * const f_fcvt_s_vi[] = { 9405 gen_helper_gvec_vcvt_rm_sh, 9406 gen_helper_gvec_vcvt_rm_ss, 9407 gen_helper_gvec_vcvt_rm_sd, 9408 }; 9409 9410 static gen_helper_gvec_2_ptr * const f_fcvt_u_vi[] = { 9411 gen_helper_gvec_vcvt_rm_uh, 9412 gen_helper_gvec_vcvt_rm_us, 9413 gen_helper_gvec_vcvt_rm_ud, 9414 }; 9415 9416 TRANS(FCVTNS_vi, do_gvec_op2_fpst, 9417 a->esz, a->q, a->rd, a->rn, float_round_nearest_even, f_fcvt_s_vi) 9418 TRANS(FCVTNU_vi, do_gvec_op2_fpst, 9419 a->esz, a->q, a->rd, a->rn, float_round_nearest_even, f_fcvt_u_vi) 9420 TRANS(FCVTPS_vi, do_gvec_op2_fpst, 9421 a->esz, a->q, a->rd, a->rn, float_round_up, f_fcvt_s_vi) 9422 TRANS(FCVTPU_vi, do_gvec_op2_fpst, 9423 a->esz, a->q, a->rd, a->rn, float_round_up, f_fcvt_u_vi) 9424 TRANS(FCVTMS_vi, do_gvec_op2_fpst, 9425 a->esz, a->q, a->rd, a->rn, float_round_down, f_fcvt_s_vi) 9426 TRANS(FCVTMU_vi, do_gvec_op2_fpst, 9427 a->esz, a->q, a->rd, a->rn, float_round_down, f_fcvt_u_vi) 9428 TRANS(FCVTZS_vi, do_gvec_op2_fpst, 9429 a->esz, a->q, a->rd, a->rn, float_round_to_zero, f_fcvt_s_vi) 9430 TRANS(FCVTZU_vi, do_gvec_op2_fpst, 9431 a->esz, a->q, a->rd, a->rn, float_round_to_zero, f_fcvt_u_vi) 9432 TRANS(FCVTAS_vi, do_gvec_op2_fpst, 9433 a->esz, a->q, a->rd, a->rn, float_round_ties_away, f_fcvt_s_vi) 9434 TRANS(FCVTAU_vi, do_gvec_op2_fpst, 9435 a->esz, a->q, a->rd, a->rn, float_round_ties_away, f_fcvt_u_vi) 9436 9437 static gen_helper_gvec_2_ptr * const f_fceq0[] = { 9438 gen_helper_gvec_fceq0_h, 9439 gen_helper_gvec_fceq0_s, 9440 gen_helper_gvec_fceq0_d, 9441 }; 9442 TRANS(FCMEQ0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fceq0) 9443 9444 static gen_helper_gvec_2_ptr * const f_fcgt0[] = { 9445 gen_helper_gvec_fcgt0_h, 9446 gen_helper_gvec_fcgt0_s, 9447 gen_helper_gvec_fcgt0_d, 9448 }; 9449 TRANS(FCMGT0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcgt0) 9450 9451 static gen_helper_gvec_2_ptr * const f_fcge0[] = { 9452 gen_helper_gvec_fcge0_h, 9453 gen_helper_gvec_fcge0_s, 9454 gen_helper_gvec_fcge0_d, 9455 }; 9456 TRANS(FCMGE0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcge0) 9457 9458 static gen_helper_gvec_2_ptr * const f_fclt0[] = { 9459 gen_helper_gvec_fclt0_h, 9460 gen_helper_gvec_fclt0_s, 9461 gen_helper_gvec_fclt0_d, 9462 }; 9463 TRANS(FCMLT0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fclt0) 9464 9465 static gen_helper_gvec_2_ptr * const f_fcle0[] = { 9466 gen_helper_gvec_fcle0_h, 9467 gen_helper_gvec_fcle0_s, 9468 gen_helper_gvec_fcle0_d, 9469 }; 9470 TRANS(FCMLE0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcle0) 9471 9472 static gen_helper_gvec_2_ptr * const f_frecpe[] = { 9473 gen_helper_gvec_frecpe_h, 9474 gen_helper_gvec_frecpe_s, 9475 gen_helper_gvec_frecpe_d, 9476 }; 9477 TRANS(FRECPE_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frecpe) 9478 9479 static gen_helper_gvec_2_ptr * const f_frsqrte[] = { 9480 gen_helper_gvec_frsqrte_h, 9481 gen_helper_gvec_frsqrte_s, 9482 gen_helper_gvec_frsqrte_d, 9483 }; 9484 TRANS(FRSQRTE_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frsqrte) 9485 9486 static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a) 9487 { 9488 /* Handle 2-reg-misc ops which are widening (so each size element 9489 * in the source becomes a 2*size element in the destination. 9490 * The only instruction like this is FCVTL. 9491 */ 9492 int pass; 9493 9494 if (!fp_access_check(s)) { 9495 return true; 9496 } 9497 9498 if (a->esz == MO_64) { 9499 /* 32 -> 64 bit fp conversion */ 9500 TCGv_i64 tcg_res[2]; 9501 TCGv_i32 tcg_op = tcg_temp_new_i32(); 9502 int srcelt = a->q ? 2 : 0; 9503 9504 for (pass = 0; pass < 2; pass++) { 9505 tcg_res[pass] = tcg_temp_new_i64(); 9506 read_vec_element_i32(s, tcg_op, a->rn, srcelt + pass, MO_32); 9507 gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, tcg_env); 9508 } 9509 for (pass = 0; pass < 2; pass++) { 9510 write_vec_element(s, tcg_res[pass], a->rd, pass, MO_64); 9511 } 9512 } else { 9513 /* 16 -> 32 bit fp conversion */ 9514 int srcelt = a->q ? 4 : 0; 9515 TCGv_i32 tcg_res[4]; 9516 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 9517 TCGv_i32 ahp = get_ahp_flag(); 9518 9519 for (pass = 0; pass < 4; pass++) { 9520 tcg_res[pass] = tcg_temp_new_i32(); 9521 read_vec_element_i32(s, tcg_res[pass], a->rn, srcelt + pass, MO_16); 9522 gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass], 9523 fpst, ahp); 9524 } 9525 for (pass = 0; pass < 4; pass++) { 9526 write_vec_element_i32(s, tcg_res[pass], a->rd, pass, MO_32); 9527 } 9528 } 9529 clear_vec_high(s, true, a->rd); 9530 return true; 9531 } 9532 9533 static bool trans_OK(DisasContext *s, arg_OK *a) 9534 { 9535 return true; 9536 } 9537 9538 static bool trans_FAIL(DisasContext *s, arg_OK *a) 9539 { 9540 s->is_nonstreaming = true; 9541 return true; 9542 } 9543 9544 /** 9545 * btype_destination_ok: 9546 * @insn: The instruction at the branch destination 9547 * @bt: SCTLR_ELx.BT 9548 * @btype: PSTATE.BTYPE, and is non-zero 9549 * 9550 * On a guarded page, there are a limited number of insns 9551 * that may be present at the branch target: 9552 * - branch target identifiers, 9553 * - paciasp, pacibsp, 9554 * - BRK insn 9555 * - HLT insn 9556 * Anything else causes a Branch Target Exception. 9557 * 9558 * Return true if the branch is compatible, false to raise BTITRAP. 9559 */ 9560 static bool btype_destination_ok(uint32_t insn, bool bt, int btype) 9561 { 9562 if ((insn & 0xfffff01fu) == 0xd503201fu) { 9563 /* HINT space */ 9564 switch (extract32(insn, 5, 7)) { 9565 case 0b011001: /* PACIASP */ 9566 case 0b011011: /* PACIBSP */ 9567 /* 9568 * If SCTLR_ELx.BT, then PACI*SP are not compatible 9569 * with btype == 3. Otherwise all btype are ok. 9570 */ 9571 return !bt || btype != 3; 9572 case 0b100000: /* BTI */ 9573 /* Not compatible with any btype. */ 9574 return false; 9575 case 0b100010: /* BTI c */ 9576 /* Not compatible with btype == 3 */ 9577 return btype != 3; 9578 case 0b100100: /* BTI j */ 9579 /* Not compatible with btype == 2 */ 9580 return btype != 2; 9581 case 0b100110: /* BTI jc */ 9582 /* Compatible with any btype. */ 9583 return true; 9584 } 9585 } else { 9586 switch (insn & 0xffe0001fu) { 9587 case 0xd4200000u: /* BRK */ 9588 case 0xd4400000u: /* HLT */ 9589 /* Give priority to the breakpoint exception. */ 9590 return true; 9591 } 9592 } 9593 return false; 9594 } 9595 9596 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, 9597 CPUState *cpu) 9598 { 9599 DisasContext *dc = container_of(dcbase, DisasContext, base); 9600 CPUARMState *env = cpu_env(cpu); 9601 ARMCPU *arm_cpu = env_archcpu(env); 9602 CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb); 9603 int bound, core_mmu_idx; 9604 9605 dc->isar = &arm_cpu->isar; 9606 dc->condjmp = 0; 9607 dc->pc_save = dc->base.pc_first; 9608 dc->aarch64 = true; 9609 dc->thumb = false; 9610 dc->sctlr_b = 0; 9611 dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE; 9612 dc->condexec_mask = 0; 9613 dc->condexec_cond = 0; 9614 core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX); 9615 dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx); 9616 dc->tbii = EX_TBFLAG_A64(tb_flags, TBII); 9617 dc->tbid = EX_TBFLAG_A64(tb_flags, TBID); 9618 dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA); 9619 dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx); 9620 #if !defined(CONFIG_USER_ONLY) 9621 dc->user = (dc->current_el == 0); 9622 #endif 9623 dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL); 9624 dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM); 9625 dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL); 9626 dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE); 9627 dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC); 9628 dc->trap_eret = EX_TBFLAG_A64(tb_flags, TRAP_ERET); 9629 dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL); 9630 dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL); 9631 dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16; 9632 dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16; 9633 dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE); 9634 dc->bt = EX_TBFLAG_A64(tb_flags, BT); 9635 dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE); 9636 dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV); 9637 dc->ata[0] = EX_TBFLAG_A64(tb_flags, ATA); 9638 dc->ata[1] = EX_TBFLAG_A64(tb_flags, ATA0); 9639 dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE); 9640 dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE); 9641 dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM); 9642 dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA); 9643 dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING); 9644 dc->naa = EX_TBFLAG_A64(tb_flags, NAA); 9645 dc->nv = EX_TBFLAG_A64(tb_flags, NV); 9646 dc->nv1 = EX_TBFLAG_A64(tb_flags, NV1); 9647 dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2); 9648 dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20); 9649 dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE); 9650 dc->vec_len = 0; 9651 dc->vec_stride = 0; 9652 dc->cp_regs = arm_cpu->cp_regs; 9653 dc->features = env->features; 9654 dc->dcz_blocksize = arm_cpu->dcz_blocksize; 9655 dc->gm_blocksize = arm_cpu->gm_blocksize; 9656 9657 #ifdef CONFIG_USER_ONLY 9658 /* In sve_probe_page, we assume TBI is enabled. */ 9659 tcg_debug_assert(dc->tbid & 1); 9660 #endif 9661 9662 dc->lse2 = dc_isar_feature(aa64_lse2, dc); 9663 9664 /* Single step state. The code-generation logic here is: 9665 * SS_ACTIVE == 0: 9666 * generate code with no special handling for single-stepping (except 9667 * that anything that can make us go to SS_ACTIVE == 1 must end the TB; 9668 * this happens anyway because those changes are all system register or 9669 * PSTATE writes). 9670 * SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending) 9671 * emit code for one insn 9672 * emit code to clear PSTATE.SS 9673 * emit code to generate software step exception for completed step 9674 * end TB (as usual for having generated an exception) 9675 * SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending) 9676 * emit code to generate a software step exception 9677 * end the TB 9678 */ 9679 dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE); 9680 dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS); 9681 dc->is_ldex = false; 9682 9683 /* Bound the number of insns to execute to those left on the page. */ 9684 bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4; 9685 9686 /* If architectural single step active, limit to 1. */ 9687 if (dc->ss_active) { 9688 bound = 1; 9689 } 9690 dc->base.max_insns = MIN(dc->base.max_insns, bound); 9691 } 9692 9693 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu) 9694 { 9695 } 9696 9697 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) 9698 { 9699 DisasContext *dc = container_of(dcbase, DisasContext, base); 9700 target_ulong pc_arg = dc->base.pc_next; 9701 9702 if (tb_cflags(dcbase->tb) & CF_PCREL) { 9703 pc_arg &= ~TARGET_PAGE_MASK; 9704 } 9705 tcg_gen_insn_start(pc_arg, 0, 0); 9706 dc->insn_start_updated = false; 9707 } 9708 9709 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) 9710 { 9711 DisasContext *s = container_of(dcbase, DisasContext, base); 9712 CPUARMState *env = cpu_env(cpu); 9713 uint64_t pc = s->base.pc_next; 9714 uint32_t insn; 9715 9716 /* Singlestep exceptions have the highest priority. */ 9717 if (s->ss_active && !s->pstate_ss) { 9718 /* Singlestep state is Active-pending. 9719 * If we're in this state at the start of a TB then either 9720 * a) we just took an exception to an EL which is being debugged 9721 * and this is the first insn in the exception handler 9722 * b) debug exceptions were masked and we just unmasked them 9723 * without changing EL (eg by clearing PSTATE.D) 9724 * In either case we're going to take a swstep exception in the 9725 * "did not step an insn" case, and so the syndrome ISV and EX 9726 * bits should be zero. 9727 */ 9728 assert(s->base.num_insns == 1); 9729 gen_swstep_exception(s, 0, 0); 9730 s->base.is_jmp = DISAS_NORETURN; 9731 s->base.pc_next = pc + 4; 9732 return; 9733 } 9734 9735 if (pc & 3) { 9736 /* 9737 * PC alignment fault. This has priority over the instruction abort 9738 * that we would receive from a translation fault via arm_ldl_code. 9739 * This should only be possible after an indirect branch, at the 9740 * start of the TB. 9741 */ 9742 assert(s->base.num_insns == 1); 9743 gen_helper_exception_pc_alignment(tcg_env, tcg_constant_tl(pc)); 9744 s->base.is_jmp = DISAS_NORETURN; 9745 s->base.pc_next = QEMU_ALIGN_UP(pc, 4); 9746 return; 9747 } 9748 9749 s->pc_curr = pc; 9750 insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b); 9751 s->insn = insn; 9752 s->base.pc_next = pc + 4; 9753 9754 s->fp_access_checked = false; 9755 s->sve_access_checked = false; 9756 9757 if (s->pstate_il) { 9758 /* 9759 * Illegal execution state. This has priority over BTI 9760 * exceptions, but comes after instruction abort exceptions. 9761 */ 9762 gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate()); 9763 return; 9764 } 9765 9766 if (dc_isar_feature(aa64_bti, s)) { 9767 if (s->base.num_insns == 1) { 9768 /* First insn can have btype set to non-zero. */ 9769 tcg_debug_assert(s->btype >= 0); 9770 9771 /* 9772 * Note that the Branch Target Exception has fairly high 9773 * priority -- below debugging exceptions but above most 9774 * everything else. This allows us to handle this now 9775 * instead of waiting until the insn is otherwise decoded. 9776 * 9777 * We can check all but the guarded page check here; 9778 * defer the latter to a helper. 9779 */ 9780 if (s->btype != 0 9781 && !btype_destination_ok(insn, s->bt, s->btype)) { 9782 gen_helper_guarded_page_check(tcg_env); 9783 } 9784 } else { 9785 /* Not the first insn: btype must be 0. */ 9786 tcg_debug_assert(s->btype == 0); 9787 } 9788 } 9789 9790 s->is_nonstreaming = false; 9791 if (s->sme_trap_nonstreaming) { 9792 disas_sme_fa64(s, insn); 9793 } 9794 9795 if (!disas_a64(s, insn) && 9796 !disas_sme(s, insn) && 9797 !disas_sve(s, insn)) { 9798 unallocated_encoding(s); 9799 } 9800 9801 /* 9802 * After execution of most insns, btype is reset to 0. 9803 * Note that we set btype == -1 when the insn sets btype. 9804 */ 9805 if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) { 9806 reset_btype(s); 9807 } 9808 } 9809 9810 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) 9811 { 9812 DisasContext *dc = container_of(dcbase, DisasContext, base); 9813 9814 if (unlikely(dc->ss_active)) { 9815 /* Note that this means single stepping WFI doesn't halt the CPU. 9816 * For conditional branch insns this is harmless unreachable code as 9817 * gen_goto_tb() has already handled emitting the debug exception 9818 * (and thus a tb-jump is not possible when singlestepping). 9819 */ 9820 switch (dc->base.is_jmp) { 9821 default: 9822 gen_a64_update_pc(dc, 4); 9823 /* fall through */ 9824 case DISAS_EXIT: 9825 case DISAS_JUMP: 9826 gen_step_complete_exception(dc); 9827 break; 9828 case DISAS_NORETURN: 9829 break; 9830 } 9831 } else { 9832 switch (dc->base.is_jmp) { 9833 case DISAS_NEXT: 9834 case DISAS_TOO_MANY: 9835 gen_goto_tb(dc, 1, 4); 9836 break; 9837 default: 9838 case DISAS_UPDATE_EXIT: 9839 gen_a64_update_pc(dc, 4); 9840 /* fall through */ 9841 case DISAS_EXIT: 9842 tcg_gen_exit_tb(NULL, 0); 9843 break; 9844 case DISAS_UPDATE_NOCHAIN: 9845 gen_a64_update_pc(dc, 4); 9846 /* fall through */ 9847 case DISAS_JUMP: 9848 tcg_gen_lookup_and_goto_ptr(); 9849 break; 9850 case DISAS_NORETURN: 9851 case DISAS_SWI: 9852 break; 9853 case DISAS_WFE: 9854 gen_a64_update_pc(dc, 4); 9855 gen_helper_wfe(tcg_env); 9856 break; 9857 case DISAS_YIELD: 9858 gen_a64_update_pc(dc, 4); 9859 gen_helper_yield(tcg_env); 9860 break; 9861 case DISAS_WFI: 9862 /* 9863 * This is a special case because we don't want to just halt 9864 * the CPU if trying to debug across a WFI. 9865 */ 9866 gen_a64_update_pc(dc, 4); 9867 gen_helper_wfi(tcg_env, tcg_constant_i32(4)); 9868 /* 9869 * The helper doesn't necessarily throw an exception, but we 9870 * must go back to the main loop to check for interrupts anyway. 9871 */ 9872 tcg_gen_exit_tb(NULL, 0); 9873 break; 9874 } 9875 } 9876 } 9877 9878 const TranslatorOps aarch64_translator_ops = { 9879 .init_disas_context = aarch64_tr_init_disas_context, 9880 .tb_start = aarch64_tr_tb_start, 9881 .insn_start = aarch64_tr_insn_start, 9882 .translate_insn = aarch64_tr_translate_insn, 9883 .tb_stop = aarch64_tr_tb_stop, 9884 }; 9885