1 /* 2 * AArch64 translation 3 * 4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de> 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 #include "qemu/osdep.h" 20 21 #include "exec/exec-all.h" 22 #include "translate.h" 23 #include "translate-a64.h" 24 #include "qemu/log.h" 25 #include "arm_ldst.h" 26 #include "semihosting/semihost.h" 27 #include "cpregs.h" 28 29 static TCGv_i64 cpu_X[32]; 30 static TCGv_i64 cpu_pc; 31 32 /* Load/store exclusive handling */ 33 static TCGv_i64 cpu_exclusive_high; 34 35 static const char *regnames[] = { 36 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 37 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 38 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 39 "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp" 40 }; 41 42 enum a64_shift_type { 43 A64_SHIFT_TYPE_LSL = 0, 44 A64_SHIFT_TYPE_LSR = 1, 45 A64_SHIFT_TYPE_ASR = 2, 46 A64_SHIFT_TYPE_ROR = 3 47 }; 48 49 /* 50 * Helpers for extracting complex instruction fields 51 */ 52 53 /* 54 * For load/store with an unsigned 12 bit immediate scaled by the element 55 * size. The input has the immediate field in bits [14:3] and the element 56 * size in [2:0]. 57 */ 58 static int uimm_scaled(DisasContext *s, int x) 59 { 60 unsigned imm = x >> 3; 61 unsigned scale = extract32(x, 0, 3); 62 return imm << scale; 63 } 64 65 /* For load/store memory tags: scale offset by LOG2_TAG_GRANULE */ 66 static int scale_by_log2_tag_granule(DisasContext *s, int x) 67 { 68 return x << LOG2_TAG_GRANULE; 69 } 70 71 /* 72 * Include the generated decoders. 73 */ 74 75 #include "decode-sme-fa64.c.inc" 76 #include "decode-a64.c.inc" 77 78 /* Table based decoder typedefs - used when the relevant bits for decode 79 * are too awkwardly scattered across the instruction (eg SIMD). 80 */ 81 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn); 82 83 typedef struct AArch64DecodeTable { 84 uint32_t pattern; 85 uint32_t mask; 86 AArch64DecodeFn *disas_fn; 87 } AArch64DecodeTable; 88 89 /* initialize TCG globals. */ 90 void a64_translate_init(void) 91 { 92 int i; 93 94 cpu_pc = tcg_global_mem_new_i64(tcg_env, 95 offsetof(CPUARMState, pc), 96 "pc"); 97 for (i = 0; i < 32; i++) { 98 cpu_X[i] = tcg_global_mem_new_i64(tcg_env, 99 offsetof(CPUARMState, xregs[i]), 100 regnames[i]); 101 } 102 103 cpu_exclusive_high = tcg_global_mem_new_i64(tcg_env, 104 offsetof(CPUARMState, exclusive_high), "exclusive_high"); 105 } 106 107 /* 108 * Return the core mmu_idx to use for A64 load/store insns which 109 * have a "unprivileged load/store" variant. Those insns access 110 * EL0 if executed from an EL which has control over EL0 (usually 111 * EL1) but behave like normal loads and stores if executed from 112 * elsewhere (eg EL3). 113 * 114 * @unpriv : true for the unprivileged encoding; false for the 115 * normal encoding (in which case we will return the same 116 * thing as get_mem_index(). 117 */ 118 static int get_a64_user_mem_index(DisasContext *s, bool unpriv) 119 { 120 /* 121 * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL, 122 * which is the usual mmu_idx for this cpu state. 123 */ 124 ARMMMUIdx useridx = s->mmu_idx; 125 126 if (unpriv && s->unpriv) { 127 /* 128 * We have pre-computed the condition for AccType_UNPRIV. 129 * Therefore we should never get here with a mmu_idx for 130 * which we do not know the corresponding user mmu_idx. 131 */ 132 switch (useridx) { 133 case ARMMMUIdx_E10_1: 134 case ARMMMUIdx_E10_1_PAN: 135 useridx = ARMMMUIdx_E10_0; 136 break; 137 case ARMMMUIdx_E20_2: 138 case ARMMMUIdx_E20_2_PAN: 139 useridx = ARMMMUIdx_E20_0; 140 break; 141 default: 142 g_assert_not_reached(); 143 } 144 } 145 return arm_to_core_mmu_idx(useridx); 146 } 147 148 static void set_btype_raw(int val) 149 { 150 tcg_gen_st_i32(tcg_constant_i32(val), tcg_env, 151 offsetof(CPUARMState, btype)); 152 } 153 154 static void set_btype(DisasContext *s, int val) 155 { 156 /* BTYPE is a 2-bit field, and 0 should be done with reset_btype. */ 157 tcg_debug_assert(val >= 1 && val <= 3); 158 set_btype_raw(val); 159 s->btype = -1; 160 } 161 162 static void reset_btype(DisasContext *s) 163 { 164 if (s->btype != 0) { 165 set_btype_raw(0); 166 s->btype = 0; 167 } 168 } 169 170 static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff) 171 { 172 assert(s->pc_save != -1); 173 if (tb_cflags(s->base.tb) & CF_PCREL) { 174 tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff); 175 } else { 176 tcg_gen_movi_i64(dest, s->pc_curr + diff); 177 } 178 } 179 180 void gen_a64_update_pc(DisasContext *s, target_long diff) 181 { 182 gen_pc_plus_diff(s, cpu_pc, diff); 183 s->pc_save = s->pc_curr + diff; 184 } 185 186 /* 187 * Handle Top Byte Ignore (TBI) bits. 188 * 189 * If address tagging is enabled via the TCR TBI bits: 190 * + for EL2 and EL3 there is only one TBI bit, and if it is set 191 * then the address is zero-extended, clearing bits [63:56] 192 * + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0 193 * and TBI1 controls addresses with bit 55 == 1. 194 * If the appropriate TBI bit is set for the address then 195 * the address is sign-extended from bit 55 into bits [63:56] 196 * 197 * Here We have concatenated TBI{1,0} into tbi. 198 */ 199 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst, 200 TCGv_i64 src, int tbi) 201 { 202 if (tbi == 0) { 203 /* Load unmodified address */ 204 tcg_gen_mov_i64(dst, src); 205 } else if (!regime_has_2_ranges(s->mmu_idx)) { 206 /* Force tag byte to all zero */ 207 tcg_gen_extract_i64(dst, src, 0, 56); 208 } else { 209 /* Sign-extend from bit 55. */ 210 tcg_gen_sextract_i64(dst, src, 0, 56); 211 212 switch (tbi) { 213 case 1: 214 /* tbi0 but !tbi1: only use the extension if positive */ 215 tcg_gen_and_i64(dst, dst, src); 216 break; 217 case 2: 218 /* !tbi0 but tbi1: only use the extension if negative */ 219 tcg_gen_or_i64(dst, dst, src); 220 break; 221 case 3: 222 /* tbi0 and tbi1: always use the extension */ 223 break; 224 default: 225 g_assert_not_reached(); 226 } 227 } 228 } 229 230 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src) 231 { 232 /* 233 * If address tagging is enabled for instructions via the TCR TBI bits, 234 * then loading an address into the PC will clear out any tag. 235 */ 236 gen_top_byte_ignore(s, cpu_pc, src, s->tbii); 237 s->pc_save = -1; 238 } 239 240 /* 241 * Handle MTE and/or TBI. 242 * 243 * For TBI, ideally, we would do nothing. Proper behaviour on fault is 244 * for the tag to be present in the FAR_ELx register. But for user-only 245 * mode we do not have a TLB with which to implement this, so we must 246 * remove the top byte now. 247 * 248 * Always return a fresh temporary that we can increment independently 249 * of the write-back address. 250 */ 251 252 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr) 253 { 254 TCGv_i64 clean = tcg_temp_new_i64(); 255 #ifdef CONFIG_USER_ONLY 256 gen_top_byte_ignore(s, clean, addr, s->tbid); 257 #else 258 tcg_gen_mov_i64(clean, addr); 259 #endif 260 return clean; 261 } 262 263 /* Insert a zero tag into src, with the result at dst. */ 264 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src) 265 { 266 tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4)); 267 } 268 269 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr, 270 MMUAccessType acc, int log2_size) 271 { 272 gen_helper_probe_access(tcg_env, ptr, 273 tcg_constant_i32(acc), 274 tcg_constant_i32(get_mem_index(s)), 275 tcg_constant_i32(1 << log2_size)); 276 } 277 278 /* 279 * For MTE, check a single logical or atomic access. This probes a single 280 * address, the exact one specified. The size and alignment of the access 281 * is not relevant to MTE, per se, but watchpoints do require the size, 282 * and we want to recognize those before making any other changes to state. 283 */ 284 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr, 285 bool is_write, bool tag_checked, 286 MemOp memop, bool is_unpriv, 287 int core_idx) 288 { 289 if (tag_checked && s->mte_active[is_unpriv]) { 290 TCGv_i64 ret; 291 int desc = 0; 292 293 desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx); 294 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 295 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 296 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 297 desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(memop)); 298 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, memop_size(memop) - 1); 299 300 ret = tcg_temp_new_i64(); 301 gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr); 302 303 return ret; 304 } 305 return clean_data_tbi(s, addr); 306 } 307 308 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, 309 bool tag_checked, MemOp memop) 310 { 311 return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, memop, 312 false, get_mem_index(s)); 313 } 314 315 /* 316 * For MTE, check multiple logical sequential accesses. 317 */ 318 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, 319 bool tag_checked, int total_size, MemOp single_mop) 320 { 321 if (tag_checked && s->mte_active[0]) { 322 TCGv_i64 ret; 323 int desc = 0; 324 325 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 326 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 327 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 328 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 329 desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(single_mop)); 330 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, total_size - 1); 331 332 ret = tcg_temp_new_i64(); 333 gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr); 334 335 return ret; 336 } 337 return clean_data_tbi(s, addr); 338 } 339 340 /* 341 * Generate the special alignment check that applies to AccType_ATOMIC 342 * and AccType_ORDERED insns under FEAT_LSE2: the access need not be 343 * naturally aligned, but it must not cross a 16-byte boundary. 344 * See AArch64.CheckAlignment(). 345 */ 346 static void check_lse2_align(DisasContext *s, int rn, int imm, 347 bool is_write, MemOp mop) 348 { 349 TCGv_i32 tmp; 350 TCGv_i64 addr; 351 TCGLabel *over_label; 352 MMUAccessType type; 353 int mmu_idx; 354 355 tmp = tcg_temp_new_i32(); 356 tcg_gen_extrl_i64_i32(tmp, cpu_reg_sp(s, rn)); 357 tcg_gen_addi_i32(tmp, tmp, imm & 15); 358 tcg_gen_andi_i32(tmp, tmp, 15); 359 tcg_gen_addi_i32(tmp, tmp, memop_size(mop)); 360 361 over_label = gen_new_label(); 362 tcg_gen_brcondi_i32(TCG_COND_LEU, tmp, 16, over_label); 363 364 addr = tcg_temp_new_i64(); 365 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm); 366 367 type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD, 368 mmu_idx = get_mem_index(s); 369 gen_helper_unaligned_access(tcg_env, addr, tcg_constant_i32(type), 370 tcg_constant_i32(mmu_idx)); 371 372 gen_set_label(over_label); 373 374 } 375 376 /* Handle the alignment check for AccType_ATOMIC instructions. */ 377 static MemOp check_atomic_align(DisasContext *s, int rn, MemOp mop) 378 { 379 MemOp size = mop & MO_SIZE; 380 381 if (size == MO_8) { 382 return mop; 383 } 384 385 /* 386 * If size == MO_128, this is a LDXP, and the operation is single-copy 387 * atomic for each doubleword, not the entire quadword; it still must 388 * be quadword aligned. 389 */ 390 if (size == MO_128) { 391 return finalize_memop_atom(s, MO_128 | MO_ALIGN, 392 MO_ATOM_IFALIGN_PAIR); 393 } 394 if (dc_isar_feature(aa64_lse2, s)) { 395 check_lse2_align(s, rn, 0, true, mop); 396 } else { 397 mop |= MO_ALIGN; 398 } 399 return finalize_memop(s, mop); 400 } 401 402 /* Handle the alignment check for AccType_ORDERED instructions. */ 403 static MemOp check_ordered_align(DisasContext *s, int rn, int imm, 404 bool is_write, MemOp mop) 405 { 406 MemOp size = mop & MO_SIZE; 407 408 if (size == MO_8) { 409 return mop; 410 } 411 if (size == MO_128) { 412 return finalize_memop_atom(s, MO_128 | MO_ALIGN, 413 MO_ATOM_IFALIGN_PAIR); 414 } 415 if (!dc_isar_feature(aa64_lse2, s)) { 416 mop |= MO_ALIGN; 417 } else if (!s->naa) { 418 check_lse2_align(s, rn, imm, is_write, mop); 419 } 420 return finalize_memop(s, mop); 421 } 422 423 typedef struct DisasCompare64 { 424 TCGCond cond; 425 TCGv_i64 value; 426 } DisasCompare64; 427 428 static void a64_test_cc(DisasCompare64 *c64, int cc) 429 { 430 DisasCompare c32; 431 432 arm_test_cc(&c32, cc); 433 434 /* 435 * Sign-extend the 32-bit value so that the GE/LT comparisons work 436 * properly. The NE/EQ comparisons are also fine with this choice. 437 */ 438 c64->cond = c32.cond; 439 c64->value = tcg_temp_new_i64(); 440 tcg_gen_ext_i32_i64(c64->value, c32.value); 441 } 442 443 static void gen_rebuild_hflags(DisasContext *s) 444 { 445 gen_helper_rebuild_hflags_a64(tcg_env, tcg_constant_i32(s->current_el)); 446 } 447 448 static void gen_exception_internal(int excp) 449 { 450 assert(excp_is_internal(excp)); 451 gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp)); 452 } 453 454 static void gen_exception_internal_insn(DisasContext *s, int excp) 455 { 456 gen_a64_update_pc(s, 0); 457 gen_exception_internal(excp); 458 s->base.is_jmp = DISAS_NORETURN; 459 } 460 461 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome) 462 { 463 gen_a64_update_pc(s, 0); 464 gen_helper_exception_bkpt_insn(tcg_env, tcg_constant_i32(syndrome)); 465 s->base.is_jmp = DISAS_NORETURN; 466 } 467 468 static void gen_step_complete_exception(DisasContext *s) 469 { 470 /* We just completed step of an insn. Move from Active-not-pending 471 * to Active-pending, and then also take the swstep exception. 472 * This corresponds to making the (IMPDEF) choice to prioritize 473 * swstep exceptions over asynchronous exceptions taken to an exception 474 * level where debug is disabled. This choice has the advantage that 475 * we do not need to maintain internal state corresponding to the 476 * ISV/EX syndrome bits between completion of the step and generation 477 * of the exception, and our syndrome information is always correct. 478 */ 479 gen_ss_advance(s); 480 gen_swstep_exception(s, 1, s->is_ldex); 481 s->base.is_jmp = DISAS_NORETURN; 482 } 483 484 static inline bool use_goto_tb(DisasContext *s, uint64_t dest) 485 { 486 if (s->ss_active) { 487 return false; 488 } 489 return translator_use_goto_tb(&s->base, dest); 490 } 491 492 static void gen_goto_tb(DisasContext *s, int n, int64_t diff) 493 { 494 if (use_goto_tb(s, s->pc_curr + diff)) { 495 /* 496 * For pcrel, the pc must always be up-to-date on entry to 497 * the linked TB, so that it can use simple additions for all 498 * further adjustments. For !pcrel, the linked TB is compiled 499 * to know its full virtual address, so we can delay the 500 * update to pc to the unlinked path. A long chain of links 501 * can thus avoid many updates to the PC. 502 */ 503 if (tb_cflags(s->base.tb) & CF_PCREL) { 504 gen_a64_update_pc(s, diff); 505 tcg_gen_goto_tb(n); 506 } else { 507 tcg_gen_goto_tb(n); 508 gen_a64_update_pc(s, diff); 509 } 510 tcg_gen_exit_tb(s->base.tb, n); 511 s->base.is_jmp = DISAS_NORETURN; 512 } else { 513 gen_a64_update_pc(s, diff); 514 if (s->ss_active) { 515 gen_step_complete_exception(s); 516 } else { 517 tcg_gen_lookup_and_goto_ptr(); 518 s->base.is_jmp = DISAS_NORETURN; 519 } 520 } 521 } 522 523 /* 524 * Register access functions 525 * 526 * These functions are used for directly accessing a register in where 527 * changes to the final register value are likely to be made. If you 528 * need to use a register for temporary calculation (e.g. index type 529 * operations) use the read_* form. 530 * 531 * B1.2.1 Register mappings 532 * 533 * In instruction register encoding 31 can refer to ZR (zero register) or 534 * the SP (stack pointer) depending on context. In QEMU's case we map SP 535 * to cpu_X[31] and ZR accesses to a temporary which can be discarded. 536 * This is the point of the _sp forms. 537 */ 538 TCGv_i64 cpu_reg(DisasContext *s, int reg) 539 { 540 if (reg == 31) { 541 TCGv_i64 t = tcg_temp_new_i64(); 542 tcg_gen_movi_i64(t, 0); 543 return t; 544 } else { 545 return cpu_X[reg]; 546 } 547 } 548 549 /* register access for when 31 == SP */ 550 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg) 551 { 552 return cpu_X[reg]; 553 } 554 555 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64 556 * representing the register contents. This TCGv is an auto-freed 557 * temporary so it need not be explicitly freed, and may be modified. 558 */ 559 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf) 560 { 561 TCGv_i64 v = tcg_temp_new_i64(); 562 if (reg != 31) { 563 if (sf) { 564 tcg_gen_mov_i64(v, cpu_X[reg]); 565 } else { 566 tcg_gen_ext32u_i64(v, cpu_X[reg]); 567 } 568 } else { 569 tcg_gen_movi_i64(v, 0); 570 } 571 return v; 572 } 573 574 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf) 575 { 576 TCGv_i64 v = tcg_temp_new_i64(); 577 if (sf) { 578 tcg_gen_mov_i64(v, cpu_X[reg]); 579 } else { 580 tcg_gen_ext32u_i64(v, cpu_X[reg]); 581 } 582 return v; 583 } 584 585 /* Return the offset into CPUARMState of a slice (from 586 * the least significant end) of FP register Qn (ie 587 * Dn, Sn, Hn or Bn). 588 * (Note that this is not the same mapping as for A32; see cpu.h) 589 */ 590 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size) 591 { 592 return vec_reg_offset(s, regno, 0, size); 593 } 594 595 /* Offset of the high half of the 128 bit vector Qn */ 596 static inline int fp_reg_hi_offset(DisasContext *s, int regno) 597 { 598 return vec_reg_offset(s, regno, 1, MO_64); 599 } 600 601 /* Convenience accessors for reading and writing single and double 602 * FP registers. Writing clears the upper parts of the associated 603 * 128 bit vector register, as required by the architecture. 604 * Note that unlike the GP register accessors, the values returned 605 * by the read functions must be manually freed. 606 */ 607 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg) 608 { 609 TCGv_i64 v = tcg_temp_new_i64(); 610 611 tcg_gen_ld_i64(v, tcg_env, fp_reg_offset(s, reg, MO_64)); 612 return v; 613 } 614 615 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg) 616 { 617 TCGv_i32 v = tcg_temp_new_i32(); 618 619 tcg_gen_ld_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32)); 620 return v; 621 } 622 623 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg) 624 { 625 TCGv_i32 v = tcg_temp_new_i32(); 626 627 tcg_gen_ld16u_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16)); 628 return v; 629 } 630 631 static void clear_vec(DisasContext *s, int rd) 632 { 633 unsigned ofs = fp_reg_offset(s, rd, MO_64); 634 unsigned vsz = vec_full_reg_size(s); 635 636 tcg_gen_gvec_dup_imm(MO_64, ofs, vsz, vsz, 0); 637 } 638 639 /* 640 * Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64). 641 * If SVE is not enabled, then there are only 128 bits in the vector. 642 */ 643 static void clear_vec_high(DisasContext *s, bool is_q, int rd) 644 { 645 unsigned ofs = fp_reg_offset(s, rd, MO_64); 646 unsigned vsz = vec_full_reg_size(s); 647 648 /* Nop move, with side effect of clearing the tail. */ 649 tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz); 650 } 651 652 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v) 653 { 654 unsigned ofs = fp_reg_offset(s, reg, MO_64); 655 656 tcg_gen_st_i64(v, tcg_env, ofs); 657 clear_vec_high(s, false, reg); 658 } 659 660 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v) 661 { 662 TCGv_i64 tmp = tcg_temp_new_i64(); 663 664 tcg_gen_extu_i32_i64(tmp, v); 665 write_fp_dreg(s, reg, tmp); 666 } 667 668 /* Expand a 2-operand AdvSIMD vector operation using an expander function. */ 669 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn, 670 GVecGen2Fn *gvec_fn, int vece) 671 { 672 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 673 is_q ? 16 : 8, vec_full_reg_size(s)); 674 } 675 676 /* Expand a 2-operand + immediate AdvSIMD vector operation using 677 * an expander function. 678 */ 679 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn, 680 int64_t imm, GVecGen2iFn *gvec_fn, int vece) 681 { 682 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 683 imm, is_q ? 16 : 8, vec_full_reg_size(s)); 684 } 685 686 /* Expand a 3-operand AdvSIMD vector operation using an expander function. */ 687 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm, 688 GVecGen3Fn *gvec_fn, int vece) 689 { 690 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 691 vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s)); 692 } 693 694 /* Expand a 4-operand AdvSIMD vector operation using an expander function. */ 695 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm, 696 int rx, GVecGen4Fn *gvec_fn, int vece) 697 { 698 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 699 vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx), 700 is_q ? 16 : 8, vec_full_reg_size(s)); 701 } 702 703 /* Expand a 2-operand operation using an out-of-line helper. */ 704 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd, 705 int rn, int data, gen_helper_gvec_2 *fn) 706 { 707 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd), 708 vec_full_reg_offset(s, rn), 709 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 710 } 711 712 /* Expand a 3-operand operation using an out-of-line helper. */ 713 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd, 714 int rn, int rm, int data, gen_helper_gvec_3 *fn) 715 { 716 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 717 vec_full_reg_offset(s, rn), 718 vec_full_reg_offset(s, rm), 719 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 720 } 721 722 /* Expand a 3-operand + fpstatus pointer + simd data value operation using 723 * an out-of-line helper. 724 */ 725 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn, 726 int rm, bool is_fp16, int data, 727 gen_helper_gvec_3_ptr *fn) 728 { 729 TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_A64_F16 : FPST_A64); 730 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 731 vec_full_reg_offset(s, rn), 732 vec_full_reg_offset(s, rm), fpst, 733 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 734 } 735 736 /* Expand a 4-operand operation using an out-of-line helper. */ 737 static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn, 738 int rm, int ra, int data, gen_helper_gvec_4 *fn) 739 { 740 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 741 vec_full_reg_offset(s, rn), 742 vec_full_reg_offset(s, rm), 743 vec_full_reg_offset(s, ra), 744 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 745 } 746 747 /* 748 * Expand a 4-operand operation using an out-of-line helper that takes 749 * a pointer to the CPU env. 750 */ 751 static void gen_gvec_op4_env(DisasContext *s, bool is_q, int rd, int rn, 752 int rm, int ra, int data, 753 gen_helper_gvec_4_ptr *fn) 754 { 755 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 756 vec_full_reg_offset(s, rn), 757 vec_full_reg_offset(s, rm), 758 vec_full_reg_offset(s, ra), 759 tcg_env, 760 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 761 } 762 763 /* 764 * Expand a 4-operand + fpstatus pointer + simd data value operation using 765 * an out-of-line helper. 766 */ 767 static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn, 768 int rm, int ra, bool is_fp16, int data, 769 gen_helper_gvec_4_ptr *fn) 770 { 771 TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_A64_F16 : FPST_A64); 772 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 773 vec_full_reg_offset(s, rn), 774 vec_full_reg_offset(s, rm), 775 vec_full_reg_offset(s, ra), fpst, 776 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 777 } 778 779 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier 780 * than the 32 bit equivalent. 781 */ 782 static inline void gen_set_NZ64(TCGv_i64 result) 783 { 784 tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result); 785 tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF); 786 } 787 788 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */ 789 static inline void gen_logic_CC(int sf, TCGv_i64 result) 790 { 791 if (sf) { 792 gen_set_NZ64(result); 793 } else { 794 tcg_gen_extrl_i64_i32(cpu_ZF, result); 795 tcg_gen_mov_i32(cpu_NF, cpu_ZF); 796 } 797 tcg_gen_movi_i32(cpu_CF, 0); 798 tcg_gen_movi_i32(cpu_VF, 0); 799 } 800 801 /* dest = T0 + T1; compute C, N, V and Z flags */ 802 static void gen_add64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 803 { 804 TCGv_i64 result, flag, tmp; 805 result = tcg_temp_new_i64(); 806 flag = tcg_temp_new_i64(); 807 tmp = tcg_temp_new_i64(); 808 809 tcg_gen_movi_i64(tmp, 0); 810 tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp); 811 812 tcg_gen_extrl_i64_i32(cpu_CF, flag); 813 814 gen_set_NZ64(result); 815 816 tcg_gen_xor_i64(flag, result, t0); 817 tcg_gen_xor_i64(tmp, t0, t1); 818 tcg_gen_andc_i64(flag, flag, tmp); 819 tcg_gen_extrh_i64_i32(cpu_VF, flag); 820 821 tcg_gen_mov_i64(dest, result); 822 } 823 824 static void gen_add32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 825 { 826 TCGv_i32 t0_32 = tcg_temp_new_i32(); 827 TCGv_i32 t1_32 = tcg_temp_new_i32(); 828 TCGv_i32 tmp = tcg_temp_new_i32(); 829 830 tcg_gen_movi_i32(tmp, 0); 831 tcg_gen_extrl_i64_i32(t0_32, t0); 832 tcg_gen_extrl_i64_i32(t1_32, t1); 833 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp); 834 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 835 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 836 tcg_gen_xor_i32(tmp, t0_32, t1_32); 837 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); 838 tcg_gen_extu_i32_i64(dest, cpu_NF); 839 } 840 841 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 842 { 843 if (sf) { 844 gen_add64_CC(dest, t0, t1); 845 } else { 846 gen_add32_CC(dest, t0, t1); 847 } 848 } 849 850 /* dest = T0 - T1; compute C, N, V and Z flags */ 851 static void gen_sub64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 852 { 853 /* 64 bit arithmetic */ 854 TCGv_i64 result, flag, tmp; 855 856 result = tcg_temp_new_i64(); 857 flag = tcg_temp_new_i64(); 858 tcg_gen_sub_i64(result, t0, t1); 859 860 gen_set_NZ64(result); 861 862 tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1); 863 tcg_gen_extrl_i64_i32(cpu_CF, flag); 864 865 tcg_gen_xor_i64(flag, result, t0); 866 tmp = tcg_temp_new_i64(); 867 tcg_gen_xor_i64(tmp, t0, t1); 868 tcg_gen_and_i64(flag, flag, tmp); 869 tcg_gen_extrh_i64_i32(cpu_VF, flag); 870 tcg_gen_mov_i64(dest, result); 871 } 872 873 static void gen_sub32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 874 { 875 /* 32 bit arithmetic */ 876 TCGv_i32 t0_32 = tcg_temp_new_i32(); 877 TCGv_i32 t1_32 = tcg_temp_new_i32(); 878 TCGv_i32 tmp; 879 880 tcg_gen_extrl_i64_i32(t0_32, t0); 881 tcg_gen_extrl_i64_i32(t1_32, t1); 882 tcg_gen_sub_i32(cpu_NF, t0_32, t1_32); 883 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 884 tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32); 885 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 886 tmp = tcg_temp_new_i32(); 887 tcg_gen_xor_i32(tmp, t0_32, t1_32); 888 tcg_gen_and_i32(cpu_VF, cpu_VF, tmp); 889 tcg_gen_extu_i32_i64(dest, cpu_NF); 890 } 891 892 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 893 { 894 if (sf) { 895 gen_sub64_CC(dest, t0, t1); 896 } else { 897 gen_sub32_CC(dest, t0, t1); 898 } 899 } 900 901 /* dest = T0 + T1 + CF; do not compute flags. */ 902 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 903 { 904 TCGv_i64 flag = tcg_temp_new_i64(); 905 tcg_gen_extu_i32_i64(flag, cpu_CF); 906 tcg_gen_add_i64(dest, t0, t1); 907 tcg_gen_add_i64(dest, dest, flag); 908 909 if (!sf) { 910 tcg_gen_ext32u_i64(dest, dest); 911 } 912 } 913 914 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */ 915 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 916 { 917 if (sf) { 918 TCGv_i64 result = tcg_temp_new_i64(); 919 TCGv_i64 cf_64 = tcg_temp_new_i64(); 920 TCGv_i64 vf_64 = tcg_temp_new_i64(); 921 TCGv_i64 tmp = tcg_temp_new_i64(); 922 TCGv_i64 zero = tcg_constant_i64(0); 923 924 tcg_gen_extu_i32_i64(cf_64, cpu_CF); 925 tcg_gen_add2_i64(result, cf_64, t0, zero, cf_64, zero); 926 tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, zero); 927 tcg_gen_extrl_i64_i32(cpu_CF, cf_64); 928 gen_set_NZ64(result); 929 930 tcg_gen_xor_i64(vf_64, result, t0); 931 tcg_gen_xor_i64(tmp, t0, t1); 932 tcg_gen_andc_i64(vf_64, vf_64, tmp); 933 tcg_gen_extrh_i64_i32(cpu_VF, vf_64); 934 935 tcg_gen_mov_i64(dest, result); 936 } else { 937 TCGv_i32 t0_32 = tcg_temp_new_i32(); 938 TCGv_i32 t1_32 = tcg_temp_new_i32(); 939 TCGv_i32 tmp = tcg_temp_new_i32(); 940 TCGv_i32 zero = tcg_constant_i32(0); 941 942 tcg_gen_extrl_i64_i32(t0_32, t0); 943 tcg_gen_extrl_i64_i32(t1_32, t1); 944 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, zero, cpu_CF, zero); 945 tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, zero); 946 947 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 948 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 949 tcg_gen_xor_i32(tmp, t0_32, t1_32); 950 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); 951 tcg_gen_extu_i32_i64(dest, cpu_NF); 952 } 953 } 954 955 /* 956 * Load/Store generators 957 */ 958 959 /* 960 * Store from GPR register to memory. 961 */ 962 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source, 963 TCGv_i64 tcg_addr, MemOp memop, int memidx, 964 bool iss_valid, 965 unsigned int iss_srt, 966 bool iss_sf, bool iss_ar) 967 { 968 tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop); 969 970 if (iss_valid) { 971 uint32_t syn; 972 973 syn = syn_data_abort_with_iss(0, 974 (memop & MO_SIZE), 975 false, 976 iss_srt, 977 iss_sf, 978 iss_ar, 979 0, 0, 0, 0, 0, false); 980 disas_set_insn_syndrome(s, syn); 981 } 982 } 983 984 static void do_gpr_st(DisasContext *s, TCGv_i64 source, 985 TCGv_i64 tcg_addr, MemOp memop, 986 bool iss_valid, 987 unsigned int iss_srt, 988 bool iss_sf, bool iss_ar) 989 { 990 do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s), 991 iss_valid, iss_srt, iss_sf, iss_ar); 992 } 993 994 /* 995 * Load from memory to GPR register 996 */ 997 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, 998 MemOp memop, bool extend, int memidx, 999 bool iss_valid, unsigned int iss_srt, 1000 bool iss_sf, bool iss_ar) 1001 { 1002 tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop); 1003 1004 if (extend && (memop & MO_SIGN)) { 1005 g_assert((memop & MO_SIZE) <= MO_32); 1006 tcg_gen_ext32u_i64(dest, dest); 1007 } 1008 1009 if (iss_valid) { 1010 uint32_t syn; 1011 1012 syn = syn_data_abort_with_iss(0, 1013 (memop & MO_SIZE), 1014 (memop & MO_SIGN) != 0, 1015 iss_srt, 1016 iss_sf, 1017 iss_ar, 1018 0, 0, 0, 0, 0, false); 1019 disas_set_insn_syndrome(s, syn); 1020 } 1021 } 1022 1023 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, 1024 MemOp memop, bool extend, 1025 bool iss_valid, unsigned int iss_srt, 1026 bool iss_sf, bool iss_ar) 1027 { 1028 do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s), 1029 iss_valid, iss_srt, iss_sf, iss_ar); 1030 } 1031 1032 /* 1033 * Store from FP register to memory 1034 */ 1035 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, MemOp mop) 1036 { 1037 /* This writes the bottom N bits of a 128 bit wide vector to memory */ 1038 TCGv_i64 tmplo = tcg_temp_new_i64(); 1039 1040 tcg_gen_ld_i64(tmplo, tcg_env, fp_reg_offset(s, srcidx, MO_64)); 1041 1042 if ((mop & MO_SIZE) < MO_128) { 1043 tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop); 1044 } else { 1045 TCGv_i64 tmphi = tcg_temp_new_i64(); 1046 TCGv_i128 t16 = tcg_temp_new_i128(); 1047 1048 tcg_gen_ld_i64(tmphi, tcg_env, fp_reg_hi_offset(s, srcidx)); 1049 tcg_gen_concat_i64_i128(t16, tmplo, tmphi); 1050 1051 tcg_gen_qemu_st_i128(t16, tcg_addr, get_mem_index(s), mop); 1052 } 1053 } 1054 1055 /* 1056 * Load from memory to FP register 1057 */ 1058 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, MemOp mop) 1059 { 1060 /* This always zero-extends and writes to a full 128 bit wide vector */ 1061 TCGv_i64 tmplo = tcg_temp_new_i64(); 1062 TCGv_i64 tmphi = NULL; 1063 1064 if ((mop & MO_SIZE) < MO_128) { 1065 tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop); 1066 } else { 1067 TCGv_i128 t16 = tcg_temp_new_i128(); 1068 1069 tcg_gen_qemu_ld_i128(t16, tcg_addr, get_mem_index(s), mop); 1070 1071 tmphi = tcg_temp_new_i64(); 1072 tcg_gen_extr_i128_i64(tmplo, tmphi, t16); 1073 } 1074 1075 tcg_gen_st_i64(tmplo, tcg_env, fp_reg_offset(s, destidx, MO_64)); 1076 1077 if (tmphi) { 1078 tcg_gen_st_i64(tmphi, tcg_env, fp_reg_hi_offset(s, destidx)); 1079 } 1080 clear_vec_high(s, tmphi != NULL, destidx); 1081 } 1082 1083 /* 1084 * Vector load/store helpers. 1085 * 1086 * The principal difference between this and a FP load is that we don't 1087 * zero extend as we are filling a partial chunk of the vector register. 1088 * These functions don't support 128 bit loads/stores, which would be 1089 * normal load/store operations. 1090 * 1091 * The _i32 versions are useful when operating on 32 bit quantities 1092 * (eg for floating point single or using Neon helper functions). 1093 */ 1094 1095 /* Get value of an element within a vector register */ 1096 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx, 1097 int element, MemOp memop) 1098 { 1099 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE); 1100 switch ((unsigned)memop) { 1101 case MO_8: 1102 tcg_gen_ld8u_i64(tcg_dest, tcg_env, vect_off); 1103 break; 1104 case MO_16: 1105 tcg_gen_ld16u_i64(tcg_dest, tcg_env, vect_off); 1106 break; 1107 case MO_32: 1108 tcg_gen_ld32u_i64(tcg_dest, tcg_env, vect_off); 1109 break; 1110 case MO_8|MO_SIGN: 1111 tcg_gen_ld8s_i64(tcg_dest, tcg_env, vect_off); 1112 break; 1113 case MO_16|MO_SIGN: 1114 tcg_gen_ld16s_i64(tcg_dest, tcg_env, vect_off); 1115 break; 1116 case MO_32|MO_SIGN: 1117 tcg_gen_ld32s_i64(tcg_dest, tcg_env, vect_off); 1118 break; 1119 case MO_64: 1120 case MO_64|MO_SIGN: 1121 tcg_gen_ld_i64(tcg_dest, tcg_env, vect_off); 1122 break; 1123 default: 1124 g_assert_not_reached(); 1125 } 1126 } 1127 1128 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx, 1129 int element, MemOp memop) 1130 { 1131 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE); 1132 switch (memop) { 1133 case MO_8: 1134 tcg_gen_ld8u_i32(tcg_dest, tcg_env, vect_off); 1135 break; 1136 case MO_16: 1137 tcg_gen_ld16u_i32(tcg_dest, tcg_env, vect_off); 1138 break; 1139 case MO_8|MO_SIGN: 1140 tcg_gen_ld8s_i32(tcg_dest, tcg_env, vect_off); 1141 break; 1142 case MO_16|MO_SIGN: 1143 tcg_gen_ld16s_i32(tcg_dest, tcg_env, vect_off); 1144 break; 1145 case MO_32: 1146 case MO_32|MO_SIGN: 1147 tcg_gen_ld_i32(tcg_dest, tcg_env, vect_off); 1148 break; 1149 default: 1150 g_assert_not_reached(); 1151 } 1152 } 1153 1154 /* Set value of an element within a vector register */ 1155 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx, 1156 int element, MemOp memop) 1157 { 1158 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE); 1159 switch (memop) { 1160 case MO_8: 1161 tcg_gen_st8_i64(tcg_src, tcg_env, vect_off); 1162 break; 1163 case MO_16: 1164 tcg_gen_st16_i64(tcg_src, tcg_env, vect_off); 1165 break; 1166 case MO_32: 1167 tcg_gen_st32_i64(tcg_src, tcg_env, vect_off); 1168 break; 1169 case MO_64: 1170 tcg_gen_st_i64(tcg_src, tcg_env, vect_off); 1171 break; 1172 default: 1173 g_assert_not_reached(); 1174 } 1175 } 1176 1177 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src, 1178 int destidx, int element, MemOp memop) 1179 { 1180 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE); 1181 switch (memop) { 1182 case MO_8: 1183 tcg_gen_st8_i32(tcg_src, tcg_env, vect_off); 1184 break; 1185 case MO_16: 1186 tcg_gen_st16_i32(tcg_src, tcg_env, vect_off); 1187 break; 1188 case MO_32: 1189 tcg_gen_st_i32(tcg_src, tcg_env, vect_off); 1190 break; 1191 default: 1192 g_assert_not_reached(); 1193 } 1194 } 1195 1196 /* Store from vector register to memory */ 1197 static void do_vec_st(DisasContext *s, int srcidx, int element, 1198 TCGv_i64 tcg_addr, MemOp mop) 1199 { 1200 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 1201 1202 read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE); 1203 tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop); 1204 } 1205 1206 /* Load from memory to vector register */ 1207 static void do_vec_ld(DisasContext *s, int destidx, int element, 1208 TCGv_i64 tcg_addr, MemOp mop) 1209 { 1210 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 1211 1212 tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop); 1213 write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE); 1214 } 1215 1216 /* Check that FP/Neon access is enabled. If it is, return 1217 * true. If not, emit code to generate an appropriate exception, 1218 * and return false; the caller should not emit any code for 1219 * the instruction. Note that this check must happen after all 1220 * unallocated-encoding checks (otherwise the syndrome information 1221 * for the resulting exception will be incorrect). 1222 */ 1223 static bool fp_access_check_only(DisasContext *s) 1224 { 1225 if (s->fp_excp_el) { 1226 assert(!s->fp_access_checked); 1227 s->fp_access_checked = true; 1228 1229 gen_exception_insn_el(s, 0, EXCP_UDEF, 1230 syn_fp_access_trap(1, 0xe, false, 0), 1231 s->fp_excp_el); 1232 return false; 1233 } 1234 s->fp_access_checked = true; 1235 return true; 1236 } 1237 1238 static bool fp_access_check(DisasContext *s) 1239 { 1240 if (!fp_access_check_only(s)) { 1241 return false; 1242 } 1243 if (s->sme_trap_nonstreaming && s->is_nonstreaming) { 1244 gen_exception_insn(s, 0, EXCP_UDEF, 1245 syn_smetrap(SME_ET_Streaming, false)); 1246 return false; 1247 } 1248 return true; 1249 } 1250 1251 /* 1252 * Return <0 for non-supported element sizes, with MO_16 controlled by 1253 * FEAT_FP16; return 0 for fp disabled; otherwise return >0 for success. 1254 */ 1255 static int fp_access_check_scalar_hsd(DisasContext *s, MemOp esz) 1256 { 1257 switch (esz) { 1258 case MO_64: 1259 case MO_32: 1260 break; 1261 case MO_16: 1262 if (!dc_isar_feature(aa64_fp16, s)) { 1263 return -1; 1264 } 1265 break; 1266 default: 1267 return -1; 1268 } 1269 return fp_access_check(s); 1270 } 1271 1272 /* Likewise, but vector MO_64 must have two elements. */ 1273 static int fp_access_check_vector_hsd(DisasContext *s, bool is_q, MemOp esz) 1274 { 1275 switch (esz) { 1276 case MO_64: 1277 if (!is_q) { 1278 return -1; 1279 } 1280 break; 1281 case MO_32: 1282 break; 1283 case MO_16: 1284 if (!dc_isar_feature(aa64_fp16, s)) { 1285 return -1; 1286 } 1287 break; 1288 default: 1289 return -1; 1290 } 1291 return fp_access_check(s); 1292 } 1293 1294 /* 1295 * Check that SVE access is enabled. If it is, return true. 1296 * If not, emit code to generate an appropriate exception and return false. 1297 * This function corresponds to CheckSVEEnabled(). 1298 */ 1299 bool sve_access_check(DisasContext *s) 1300 { 1301 if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) { 1302 assert(dc_isar_feature(aa64_sme, s)); 1303 if (!sme_sm_enabled_check(s)) { 1304 goto fail_exit; 1305 } 1306 } else if (s->sve_excp_el) { 1307 gen_exception_insn_el(s, 0, EXCP_UDEF, 1308 syn_sve_access_trap(), s->sve_excp_el); 1309 goto fail_exit; 1310 } 1311 s->sve_access_checked = true; 1312 return fp_access_check(s); 1313 1314 fail_exit: 1315 /* Assert that we only raise one exception per instruction. */ 1316 assert(!s->sve_access_checked); 1317 s->sve_access_checked = true; 1318 return false; 1319 } 1320 1321 /* 1322 * Check that SME access is enabled, raise an exception if not. 1323 * Note that this function corresponds to CheckSMEAccess and is 1324 * only used directly for cpregs. 1325 */ 1326 static bool sme_access_check(DisasContext *s) 1327 { 1328 if (s->sme_excp_el) { 1329 gen_exception_insn_el(s, 0, EXCP_UDEF, 1330 syn_smetrap(SME_ET_AccessTrap, false), 1331 s->sme_excp_el); 1332 return false; 1333 } 1334 return true; 1335 } 1336 1337 /* This function corresponds to CheckSMEEnabled. */ 1338 bool sme_enabled_check(DisasContext *s) 1339 { 1340 /* 1341 * Note that unlike sve_excp_el, we have not constrained sme_excp_el 1342 * to be zero when fp_excp_el has priority. This is because we need 1343 * sme_excp_el by itself for cpregs access checks. 1344 */ 1345 if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) { 1346 s->fp_access_checked = true; 1347 return sme_access_check(s); 1348 } 1349 return fp_access_check_only(s); 1350 } 1351 1352 /* Common subroutine for CheckSMEAnd*Enabled. */ 1353 bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req) 1354 { 1355 if (!sme_enabled_check(s)) { 1356 return false; 1357 } 1358 if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) { 1359 gen_exception_insn(s, 0, EXCP_UDEF, 1360 syn_smetrap(SME_ET_NotStreaming, false)); 1361 return false; 1362 } 1363 if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) { 1364 gen_exception_insn(s, 0, EXCP_UDEF, 1365 syn_smetrap(SME_ET_InactiveZA, false)); 1366 return false; 1367 } 1368 return true; 1369 } 1370 1371 /* 1372 * Expanders for AdvSIMD translation functions. 1373 */ 1374 1375 static bool do_gvec_op2_ool(DisasContext *s, arg_qrr_e *a, int data, 1376 gen_helper_gvec_2 *fn) 1377 { 1378 if (!a->q && a->esz == MO_64) { 1379 return false; 1380 } 1381 if (fp_access_check(s)) { 1382 gen_gvec_op2_ool(s, a->q, a->rd, a->rn, data, fn); 1383 } 1384 return true; 1385 } 1386 1387 static bool do_gvec_op3_ool(DisasContext *s, arg_qrrr_e *a, int data, 1388 gen_helper_gvec_3 *fn) 1389 { 1390 if (!a->q && a->esz == MO_64) { 1391 return false; 1392 } 1393 if (fp_access_check(s)) { 1394 gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, data, fn); 1395 } 1396 return true; 1397 } 1398 1399 static bool do_gvec_fn3(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn) 1400 { 1401 if (!a->q && a->esz == MO_64) { 1402 return false; 1403 } 1404 if (fp_access_check(s)) { 1405 gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz); 1406 } 1407 return true; 1408 } 1409 1410 static bool do_gvec_fn3_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn) 1411 { 1412 if (a->esz == MO_64) { 1413 return false; 1414 } 1415 if (fp_access_check(s)) { 1416 gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz); 1417 } 1418 return true; 1419 } 1420 1421 static bool do_gvec_fn3_no8_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn) 1422 { 1423 if (a->esz == MO_8) { 1424 return false; 1425 } 1426 return do_gvec_fn3_no64(s, a, fn); 1427 } 1428 1429 static bool do_gvec_fn4(DisasContext *s, arg_qrrrr_e *a, GVecGen4Fn *fn) 1430 { 1431 if (!a->q && a->esz == MO_64) { 1432 return false; 1433 } 1434 if (fp_access_check(s)) { 1435 gen_gvec_fn4(s, a->q, a->rd, a->rn, a->rm, a->ra, fn, a->esz); 1436 } 1437 return true; 1438 } 1439 1440 /* 1441 * This utility function is for doing register extension with an 1442 * optional shift. You will likely want to pass a temporary for the 1443 * destination register. See DecodeRegExtend() in the ARM ARM. 1444 */ 1445 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in, 1446 int option, unsigned int shift) 1447 { 1448 int extsize = extract32(option, 0, 2); 1449 bool is_signed = extract32(option, 2, 1); 1450 1451 tcg_gen_ext_i64(tcg_out, tcg_in, extsize | (is_signed ? MO_SIGN : 0)); 1452 tcg_gen_shli_i64(tcg_out, tcg_out, shift); 1453 } 1454 1455 static inline void gen_check_sp_alignment(DisasContext *s) 1456 { 1457 /* The AArch64 architecture mandates that (if enabled via PSTATE 1458 * or SCTLR bits) there is a check that SP is 16-aligned on every 1459 * SP-relative load or store (with an exception generated if it is not). 1460 * In line with general QEMU practice regarding misaligned accesses, 1461 * we omit these checks for the sake of guest program performance. 1462 * This function is provided as a hook so we can more easily add these 1463 * checks in future (possibly as a "favour catching guest program bugs 1464 * over speed" user selectable option). 1465 */ 1466 } 1467 1468 /* 1469 * The instruction disassembly implemented here matches 1470 * the instruction encoding classifications in chapter C4 1471 * of the ARM Architecture Reference Manual (DDI0487B_a); 1472 * classification names and decode diagrams here should generally 1473 * match up with those in the manual. 1474 */ 1475 1476 static bool trans_B(DisasContext *s, arg_i *a) 1477 { 1478 reset_btype(s); 1479 gen_goto_tb(s, 0, a->imm); 1480 return true; 1481 } 1482 1483 static bool trans_BL(DisasContext *s, arg_i *a) 1484 { 1485 gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s)); 1486 reset_btype(s); 1487 gen_goto_tb(s, 0, a->imm); 1488 return true; 1489 } 1490 1491 1492 static bool trans_CBZ(DisasContext *s, arg_cbz *a) 1493 { 1494 DisasLabel match; 1495 TCGv_i64 tcg_cmp; 1496 1497 tcg_cmp = read_cpu_reg(s, a->rt, a->sf); 1498 reset_btype(s); 1499 1500 match = gen_disas_label(s); 1501 tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ, 1502 tcg_cmp, 0, match.label); 1503 gen_goto_tb(s, 0, 4); 1504 set_disas_label(s, match); 1505 gen_goto_tb(s, 1, a->imm); 1506 return true; 1507 } 1508 1509 static bool trans_TBZ(DisasContext *s, arg_tbz *a) 1510 { 1511 DisasLabel match; 1512 TCGv_i64 tcg_cmp; 1513 1514 tcg_cmp = tcg_temp_new_i64(); 1515 tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, a->rt), 1ULL << a->bitpos); 1516 1517 reset_btype(s); 1518 1519 match = gen_disas_label(s); 1520 tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ, 1521 tcg_cmp, 0, match.label); 1522 gen_goto_tb(s, 0, 4); 1523 set_disas_label(s, match); 1524 gen_goto_tb(s, 1, a->imm); 1525 return true; 1526 } 1527 1528 static bool trans_B_cond(DisasContext *s, arg_B_cond *a) 1529 { 1530 /* BC.cond is only present with FEAT_HBC */ 1531 if (a->c && !dc_isar_feature(aa64_hbc, s)) { 1532 return false; 1533 } 1534 reset_btype(s); 1535 if (a->cond < 0x0e) { 1536 /* genuinely conditional branches */ 1537 DisasLabel match = gen_disas_label(s); 1538 arm_gen_test_cc(a->cond, match.label); 1539 gen_goto_tb(s, 0, 4); 1540 set_disas_label(s, match); 1541 gen_goto_tb(s, 1, a->imm); 1542 } else { 1543 /* 0xe and 0xf are both "always" conditions */ 1544 gen_goto_tb(s, 0, a->imm); 1545 } 1546 return true; 1547 } 1548 1549 static void set_btype_for_br(DisasContext *s, int rn) 1550 { 1551 if (dc_isar_feature(aa64_bti, s)) { 1552 /* BR to {x16,x17} or !guard -> 1, else 3. */ 1553 if (rn == 16 || rn == 17) { 1554 set_btype(s, 1); 1555 } else { 1556 TCGv_i64 pc = tcg_temp_new_i64(); 1557 gen_pc_plus_diff(s, pc, 0); 1558 gen_helper_guarded_page_br(tcg_env, pc); 1559 s->btype = -1; 1560 } 1561 } 1562 } 1563 1564 static void set_btype_for_blr(DisasContext *s) 1565 { 1566 if (dc_isar_feature(aa64_bti, s)) { 1567 /* BLR sets BTYPE to 2, regardless of source guarded page. */ 1568 set_btype(s, 2); 1569 } 1570 } 1571 1572 static bool trans_BR(DisasContext *s, arg_r *a) 1573 { 1574 set_btype_for_br(s, a->rn); 1575 gen_a64_set_pc(s, cpu_reg(s, a->rn)); 1576 s->base.is_jmp = DISAS_JUMP; 1577 return true; 1578 } 1579 1580 static bool trans_BLR(DisasContext *s, arg_r *a) 1581 { 1582 TCGv_i64 dst = cpu_reg(s, a->rn); 1583 TCGv_i64 lr = cpu_reg(s, 30); 1584 if (dst == lr) { 1585 TCGv_i64 tmp = tcg_temp_new_i64(); 1586 tcg_gen_mov_i64(tmp, dst); 1587 dst = tmp; 1588 } 1589 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 1590 gen_a64_set_pc(s, dst); 1591 set_btype_for_blr(s); 1592 s->base.is_jmp = DISAS_JUMP; 1593 return true; 1594 } 1595 1596 static bool trans_RET(DisasContext *s, arg_r *a) 1597 { 1598 gen_a64_set_pc(s, cpu_reg(s, a->rn)); 1599 s->base.is_jmp = DISAS_JUMP; 1600 return true; 1601 } 1602 1603 static TCGv_i64 auth_branch_target(DisasContext *s, TCGv_i64 dst, 1604 TCGv_i64 modifier, bool use_key_a) 1605 { 1606 TCGv_i64 truedst; 1607 /* 1608 * Return the branch target for a BRAA/RETA/etc, which is either 1609 * just the destination dst, or that value with the pauth check 1610 * done and the code removed from the high bits. 1611 */ 1612 if (!s->pauth_active) { 1613 return dst; 1614 } 1615 1616 truedst = tcg_temp_new_i64(); 1617 if (use_key_a) { 1618 gen_helper_autia_combined(truedst, tcg_env, dst, modifier); 1619 } else { 1620 gen_helper_autib_combined(truedst, tcg_env, dst, modifier); 1621 } 1622 return truedst; 1623 } 1624 1625 static bool trans_BRAZ(DisasContext *s, arg_braz *a) 1626 { 1627 TCGv_i64 dst; 1628 1629 if (!dc_isar_feature(aa64_pauth, s)) { 1630 return false; 1631 } 1632 1633 dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m); 1634 set_btype_for_br(s, a->rn); 1635 gen_a64_set_pc(s, dst); 1636 s->base.is_jmp = DISAS_JUMP; 1637 return true; 1638 } 1639 1640 static bool trans_BLRAZ(DisasContext *s, arg_braz *a) 1641 { 1642 TCGv_i64 dst, lr; 1643 1644 if (!dc_isar_feature(aa64_pauth, s)) { 1645 return false; 1646 } 1647 1648 dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m); 1649 lr = cpu_reg(s, 30); 1650 if (dst == lr) { 1651 TCGv_i64 tmp = tcg_temp_new_i64(); 1652 tcg_gen_mov_i64(tmp, dst); 1653 dst = tmp; 1654 } 1655 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 1656 gen_a64_set_pc(s, dst); 1657 set_btype_for_blr(s); 1658 s->base.is_jmp = DISAS_JUMP; 1659 return true; 1660 } 1661 1662 static bool trans_RETA(DisasContext *s, arg_reta *a) 1663 { 1664 TCGv_i64 dst; 1665 1666 dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m); 1667 gen_a64_set_pc(s, dst); 1668 s->base.is_jmp = DISAS_JUMP; 1669 return true; 1670 } 1671 1672 static bool trans_BRA(DisasContext *s, arg_bra *a) 1673 { 1674 TCGv_i64 dst; 1675 1676 if (!dc_isar_feature(aa64_pauth, s)) { 1677 return false; 1678 } 1679 dst = auth_branch_target(s, cpu_reg(s,a->rn), cpu_reg_sp(s, a->rm), !a->m); 1680 gen_a64_set_pc(s, dst); 1681 set_btype_for_br(s, a->rn); 1682 s->base.is_jmp = DISAS_JUMP; 1683 return true; 1684 } 1685 1686 static bool trans_BLRA(DisasContext *s, arg_bra *a) 1687 { 1688 TCGv_i64 dst, lr; 1689 1690 if (!dc_isar_feature(aa64_pauth, s)) { 1691 return false; 1692 } 1693 dst = auth_branch_target(s, cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm), !a->m); 1694 lr = cpu_reg(s, 30); 1695 if (dst == lr) { 1696 TCGv_i64 tmp = tcg_temp_new_i64(); 1697 tcg_gen_mov_i64(tmp, dst); 1698 dst = tmp; 1699 } 1700 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 1701 gen_a64_set_pc(s, dst); 1702 set_btype_for_blr(s); 1703 s->base.is_jmp = DISAS_JUMP; 1704 return true; 1705 } 1706 1707 static bool trans_ERET(DisasContext *s, arg_ERET *a) 1708 { 1709 TCGv_i64 dst; 1710 1711 if (s->current_el == 0) { 1712 return false; 1713 } 1714 if (s->trap_eret) { 1715 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(0), 2); 1716 return true; 1717 } 1718 dst = tcg_temp_new_i64(); 1719 tcg_gen_ld_i64(dst, tcg_env, 1720 offsetof(CPUARMState, elr_el[s->current_el])); 1721 1722 translator_io_start(&s->base); 1723 1724 gen_helper_exception_return(tcg_env, dst); 1725 /* Must exit loop to check un-masked IRQs */ 1726 s->base.is_jmp = DISAS_EXIT; 1727 return true; 1728 } 1729 1730 static bool trans_ERETA(DisasContext *s, arg_reta *a) 1731 { 1732 TCGv_i64 dst; 1733 1734 if (!dc_isar_feature(aa64_pauth, s)) { 1735 return false; 1736 } 1737 if (s->current_el == 0) { 1738 return false; 1739 } 1740 /* The FGT trap takes precedence over an auth trap. */ 1741 if (s->trap_eret) { 1742 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(a->m ? 3 : 2), 2); 1743 return true; 1744 } 1745 dst = tcg_temp_new_i64(); 1746 tcg_gen_ld_i64(dst, tcg_env, 1747 offsetof(CPUARMState, elr_el[s->current_el])); 1748 1749 dst = auth_branch_target(s, dst, cpu_X[31], !a->m); 1750 1751 translator_io_start(&s->base); 1752 1753 gen_helper_exception_return(tcg_env, dst); 1754 /* Must exit loop to check un-masked IRQs */ 1755 s->base.is_jmp = DISAS_EXIT; 1756 return true; 1757 } 1758 1759 static bool trans_NOP(DisasContext *s, arg_NOP *a) 1760 { 1761 return true; 1762 } 1763 1764 static bool trans_YIELD(DisasContext *s, arg_YIELD *a) 1765 { 1766 /* 1767 * When running in MTTCG we don't generate jumps to the yield and 1768 * WFE helpers as it won't affect the scheduling of other vCPUs. 1769 * If we wanted to more completely model WFE/SEV so we don't busy 1770 * spin unnecessarily we would need to do something more involved. 1771 */ 1772 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1773 s->base.is_jmp = DISAS_YIELD; 1774 } 1775 return true; 1776 } 1777 1778 static bool trans_WFI(DisasContext *s, arg_WFI *a) 1779 { 1780 s->base.is_jmp = DISAS_WFI; 1781 return true; 1782 } 1783 1784 static bool trans_WFE(DisasContext *s, arg_WFI *a) 1785 { 1786 /* 1787 * When running in MTTCG we don't generate jumps to the yield and 1788 * WFE helpers as it won't affect the scheduling of other vCPUs. 1789 * If we wanted to more completely model WFE/SEV so we don't busy 1790 * spin unnecessarily we would need to do something more involved. 1791 */ 1792 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1793 s->base.is_jmp = DISAS_WFE; 1794 } 1795 return true; 1796 } 1797 1798 static bool trans_WFIT(DisasContext *s, arg_WFIT *a) 1799 { 1800 if (!dc_isar_feature(aa64_wfxt, s)) { 1801 return false; 1802 } 1803 1804 /* 1805 * Because we need to pass the register value to the helper, 1806 * it's easier to emit the code now, unlike trans_WFI which 1807 * defers it to aarch64_tr_tb_stop(). That means we need to 1808 * check ss_active so that single-stepping a WFIT doesn't halt. 1809 */ 1810 if (s->ss_active) { 1811 /* Act like a NOP under architectural singlestep */ 1812 return true; 1813 } 1814 1815 gen_a64_update_pc(s, 4); 1816 gen_helper_wfit(tcg_env, cpu_reg(s, a->rd)); 1817 /* Go back to the main loop to check for interrupts */ 1818 s->base.is_jmp = DISAS_EXIT; 1819 return true; 1820 } 1821 1822 static bool trans_WFET(DisasContext *s, arg_WFET *a) 1823 { 1824 if (!dc_isar_feature(aa64_wfxt, s)) { 1825 return false; 1826 } 1827 1828 /* 1829 * We rely here on our WFE implementation being a NOP, so we 1830 * don't need to do anything different to handle the WFET timeout 1831 * from what trans_WFE does. 1832 */ 1833 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1834 s->base.is_jmp = DISAS_WFE; 1835 } 1836 return true; 1837 } 1838 1839 static bool trans_XPACLRI(DisasContext *s, arg_XPACLRI *a) 1840 { 1841 if (s->pauth_active) { 1842 gen_helper_xpaci(cpu_X[30], tcg_env, cpu_X[30]); 1843 } 1844 return true; 1845 } 1846 1847 static bool trans_PACIA1716(DisasContext *s, arg_PACIA1716 *a) 1848 { 1849 if (s->pauth_active) { 1850 gen_helper_pacia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 1851 } 1852 return true; 1853 } 1854 1855 static bool trans_PACIB1716(DisasContext *s, arg_PACIB1716 *a) 1856 { 1857 if (s->pauth_active) { 1858 gen_helper_pacib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 1859 } 1860 return true; 1861 } 1862 1863 static bool trans_AUTIA1716(DisasContext *s, arg_AUTIA1716 *a) 1864 { 1865 if (s->pauth_active) { 1866 gen_helper_autia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 1867 } 1868 return true; 1869 } 1870 1871 static bool trans_AUTIB1716(DisasContext *s, arg_AUTIB1716 *a) 1872 { 1873 if (s->pauth_active) { 1874 gen_helper_autib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 1875 } 1876 return true; 1877 } 1878 1879 static bool trans_ESB(DisasContext *s, arg_ESB *a) 1880 { 1881 /* Without RAS, we must implement this as NOP. */ 1882 if (dc_isar_feature(aa64_ras, s)) { 1883 /* 1884 * QEMU does not have a source of physical SErrors, 1885 * so we are only concerned with virtual SErrors. 1886 * The pseudocode in the ARM for this case is 1887 * if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then 1888 * AArch64.vESBOperation(); 1889 * Most of the condition can be evaluated at translation time. 1890 * Test for EL2 present, and defer test for SEL2 to runtime. 1891 */ 1892 if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) { 1893 gen_helper_vesb(tcg_env); 1894 } 1895 } 1896 return true; 1897 } 1898 1899 static bool trans_PACIAZ(DisasContext *s, arg_PACIAZ *a) 1900 { 1901 if (s->pauth_active) { 1902 gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 1903 } 1904 return true; 1905 } 1906 1907 static bool trans_PACIASP(DisasContext *s, arg_PACIASP *a) 1908 { 1909 if (s->pauth_active) { 1910 gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 1911 } 1912 return true; 1913 } 1914 1915 static bool trans_PACIBZ(DisasContext *s, arg_PACIBZ *a) 1916 { 1917 if (s->pauth_active) { 1918 gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 1919 } 1920 return true; 1921 } 1922 1923 static bool trans_PACIBSP(DisasContext *s, arg_PACIBSP *a) 1924 { 1925 if (s->pauth_active) { 1926 gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 1927 } 1928 return true; 1929 } 1930 1931 static bool trans_AUTIAZ(DisasContext *s, arg_AUTIAZ *a) 1932 { 1933 if (s->pauth_active) { 1934 gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 1935 } 1936 return true; 1937 } 1938 1939 static bool trans_AUTIASP(DisasContext *s, arg_AUTIASP *a) 1940 { 1941 if (s->pauth_active) { 1942 gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 1943 } 1944 return true; 1945 } 1946 1947 static bool trans_AUTIBZ(DisasContext *s, arg_AUTIBZ *a) 1948 { 1949 if (s->pauth_active) { 1950 gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 1951 } 1952 return true; 1953 } 1954 1955 static bool trans_AUTIBSP(DisasContext *s, arg_AUTIBSP *a) 1956 { 1957 if (s->pauth_active) { 1958 gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 1959 } 1960 return true; 1961 } 1962 1963 static bool trans_CLREX(DisasContext *s, arg_CLREX *a) 1964 { 1965 tcg_gen_movi_i64(cpu_exclusive_addr, -1); 1966 return true; 1967 } 1968 1969 static bool trans_DSB_DMB(DisasContext *s, arg_DSB_DMB *a) 1970 { 1971 /* We handle DSB and DMB the same way */ 1972 TCGBar bar; 1973 1974 switch (a->types) { 1975 case 1: /* MBReqTypes_Reads */ 1976 bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST; 1977 break; 1978 case 2: /* MBReqTypes_Writes */ 1979 bar = TCG_BAR_SC | TCG_MO_ST_ST; 1980 break; 1981 default: /* MBReqTypes_All */ 1982 bar = TCG_BAR_SC | TCG_MO_ALL; 1983 break; 1984 } 1985 tcg_gen_mb(bar); 1986 return true; 1987 } 1988 1989 static bool trans_DSB_nXS(DisasContext *s, arg_DSB_nXS *a) 1990 { 1991 if (!dc_isar_feature(aa64_xs, s)) { 1992 return false; 1993 } 1994 tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL); 1995 return true; 1996 } 1997 1998 static bool trans_ISB(DisasContext *s, arg_ISB *a) 1999 { 2000 /* 2001 * We need to break the TB after this insn to execute 2002 * self-modifying code correctly and also to take 2003 * any pending interrupts immediately. 2004 */ 2005 reset_btype(s); 2006 gen_goto_tb(s, 0, 4); 2007 return true; 2008 } 2009 2010 static bool trans_SB(DisasContext *s, arg_SB *a) 2011 { 2012 if (!dc_isar_feature(aa64_sb, s)) { 2013 return false; 2014 } 2015 /* 2016 * TODO: There is no speculation barrier opcode for TCG; 2017 * MB and end the TB instead. 2018 */ 2019 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC); 2020 gen_goto_tb(s, 0, 4); 2021 return true; 2022 } 2023 2024 static bool trans_CFINV(DisasContext *s, arg_CFINV *a) 2025 { 2026 if (!dc_isar_feature(aa64_condm_4, s)) { 2027 return false; 2028 } 2029 tcg_gen_xori_i32(cpu_CF, cpu_CF, 1); 2030 return true; 2031 } 2032 2033 static bool trans_XAFLAG(DisasContext *s, arg_XAFLAG *a) 2034 { 2035 TCGv_i32 z; 2036 2037 if (!dc_isar_feature(aa64_condm_5, s)) { 2038 return false; 2039 } 2040 2041 z = tcg_temp_new_i32(); 2042 2043 tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0); 2044 2045 /* 2046 * (!C & !Z) << 31 2047 * (!(C | Z)) << 31 2048 * ~((C | Z) << 31) 2049 * ~-(C | Z) 2050 * (C | Z) - 1 2051 */ 2052 tcg_gen_or_i32(cpu_NF, cpu_CF, z); 2053 tcg_gen_subi_i32(cpu_NF, cpu_NF, 1); 2054 2055 /* !(Z & C) */ 2056 tcg_gen_and_i32(cpu_ZF, z, cpu_CF); 2057 tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1); 2058 2059 /* (!C & Z) << 31 -> -(Z & ~C) */ 2060 tcg_gen_andc_i32(cpu_VF, z, cpu_CF); 2061 tcg_gen_neg_i32(cpu_VF, cpu_VF); 2062 2063 /* C | Z */ 2064 tcg_gen_or_i32(cpu_CF, cpu_CF, z); 2065 2066 return true; 2067 } 2068 2069 static bool trans_AXFLAG(DisasContext *s, arg_AXFLAG *a) 2070 { 2071 if (!dc_isar_feature(aa64_condm_5, s)) { 2072 return false; 2073 } 2074 2075 tcg_gen_sari_i32(cpu_VF, cpu_VF, 31); /* V ? -1 : 0 */ 2076 tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF); /* C & !V */ 2077 2078 /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */ 2079 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF); 2080 2081 tcg_gen_movi_i32(cpu_NF, 0); 2082 tcg_gen_movi_i32(cpu_VF, 0); 2083 2084 return true; 2085 } 2086 2087 static bool trans_MSR_i_UAO(DisasContext *s, arg_i *a) 2088 { 2089 if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) { 2090 return false; 2091 } 2092 if (a->imm & 1) { 2093 set_pstate_bits(PSTATE_UAO); 2094 } else { 2095 clear_pstate_bits(PSTATE_UAO); 2096 } 2097 gen_rebuild_hflags(s); 2098 s->base.is_jmp = DISAS_TOO_MANY; 2099 return true; 2100 } 2101 2102 static bool trans_MSR_i_PAN(DisasContext *s, arg_i *a) 2103 { 2104 if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) { 2105 return false; 2106 } 2107 if (a->imm & 1) { 2108 set_pstate_bits(PSTATE_PAN); 2109 } else { 2110 clear_pstate_bits(PSTATE_PAN); 2111 } 2112 gen_rebuild_hflags(s); 2113 s->base.is_jmp = DISAS_TOO_MANY; 2114 return true; 2115 } 2116 2117 static bool trans_MSR_i_SPSEL(DisasContext *s, arg_i *a) 2118 { 2119 if (s->current_el == 0) { 2120 return false; 2121 } 2122 gen_helper_msr_i_spsel(tcg_env, tcg_constant_i32(a->imm & PSTATE_SP)); 2123 s->base.is_jmp = DISAS_TOO_MANY; 2124 return true; 2125 } 2126 2127 static bool trans_MSR_i_SBSS(DisasContext *s, arg_i *a) 2128 { 2129 if (!dc_isar_feature(aa64_ssbs, s)) { 2130 return false; 2131 } 2132 if (a->imm & 1) { 2133 set_pstate_bits(PSTATE_SSBS); 2134 } else { 2135 clear_pstate_bits(PSTATE_SSBS); 2136 } 2137 /* Don't need to rebuild hflags since SSBS is a nop */ 2138 s->base.is_jmp = DISAS_TOO_MANY; 2139 return true; 2140 } 2141 2142 static bool trans_MSR_i_DIT(DisasContext *s, arg_i *a) 2143 { 2144 if (!dc_isar_feature(aa64_dit, s)) { 2145 return false; 2146 } 2147 if (a->imm & 1) { 2148 set_pstate_bits(PSTATE_DIT); 2149 } else { 2150 clear_pstate_bits(PSTATE_DIT); 2151 } 2152 /* There's no need to rebuild hflags because DIT is a nop */ 2153 s->base.is_jmp = DISAS_TOO_MANY; 2154 return true; 2155 } 2156 2157 static bool trans_MSR_i_TCO(DisasContext *s, arg_i *a) 2158 { 2159 if (dc_isar_feature(aa64_mte, s)) { 2160 /* Full MTE is enabled -- set the TCO bit as directed. */ 2161 if (a->imm & 1) { 2162 set_pstate_bits(PSTATE_TCO); 2163 } else { 2164 clear_pstate_bits(PSTATE_TCO); 2165 } 2166 gen_rebuild_hflags(s); 2167 /* Many factors, including TCO, go into MTE_ACTIVE. */ 2168 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 2169 return true; 2170 } else if (dc_isar_feature(aa64_mte_insn_reg, s)) { 2171 /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI. */ 2172 return true; 2173 } else { 2174 /* Insn not present */ 2175 return false; 2176 } 2177 } 2178 2179 static bool trans_MSR_i_DAIFSET(DisasContext *s, arg_i *a) 2180 { 2181 gen_helper_msr_i_daifset(tcg_env, tcg_constant_i32(a->imm)); 2182 s->base.is_jmp = DISAS_TOO_MANY; 2183 return true; 2184 } 2185 2186 static bool trans_MSR_i_DAIFCLEAR(DisasContext *s, arg_i *a) 2187 { 2188 gen_helper_msr_i_daifclear(tcg_env, tcg_constant_i32(a->imm)); 2189 /* Exit the cpu loop to re-evaluate pending IRQs. */ 2190 s->base.is_jmp = DISAS_UPDATE_EXIT; 2191 return true; 2192 } 2193 2194 static bool trans_MSR_i_ALLINT(DisasContext *s, arg_i *a) 2195 { 2196 if (!dc_isar_feature(aa64_nmi, s) || s->current_el == 0) { 2197 return false; 2198 } 2199 2200 if (a->imm == 0) { 2201 clear_pstate_bits(PSTATE_ALLINT); 2202 } else if (s->current_el > 1) { 2203 set_pstate_bits(PSTATE_ALLINT); 2204 } else { 2205 gen_helper_msr_set_allint_el1(tcg_env); 2206 } 2207 2208 /* Exit the cpu loop to re-evaluate pending IRQs. */ 2209 s->base.is_jmp = DISAS_UPDATE_EXIT; 2210 return true; 2211 } 2212 2213 static bool trans_MSR_i_SVCR(DisasContext *s, arg_MSR_i_SVCR *a) 2214 { 2215 if (!dc_isar_feature(aa64_sme, s) || a->mask == 0) { 2216 return false; 2217 } 2218 if (sme_access_check(s)) { 2219 int old = s->pstate_sm | (s->pstate_za << 1); 2220 int new = a->imm * 3; 2221 2222 if ((old ^ new) & a->mask) { 2223 /* At least one bit changes. */ 2224 gen_helper_set_svcr(tcg_env, tcg_constant_i32(new), 2225 tcg_constant_i32(a->mask)); 2226 s->base.is_jmp = DISAS_TOO_MANY; 2227 } 2228 } 2229 return true; 2230 } 2231 2232 static void gen_get_nzcv(TCGv_i64 tcg_rt) 2233 { 2234 TCGv_i32 tmp = tcg_temp_new_i32(); 2235 TCGv_i32 nzcv = tcg_temp_new_i32(); 2236 2237 /* build bit 31, N */ 2238 tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31)); 2239 /* build bit 30, Z */ 2240 tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0); 2241 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1); 2242 /* build bit 29, C */ 2243 tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1); 2244 /* build bit 28, V */ 2245 tcg_gen_shri_i32(tmp, cpu_VF, 31); 2246 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1); 2247 /* generate result */ 2248 tcg_gen_extu_i32_i64(tcg_rt, nzcv); 2249 } 2250 2251 static void gen_set_nzcv(TCGv_i64 tcg_rt) 2252 { 2253 TCGv_i32 nzcv = tcg_temp_new_i32(); 2254 2255 /* take NZCV from R[t] */ 2256 tcg_gen_extrl_i64_i32(nzcv, tcg_rt); 2257 2258 /* bit 31, N */ 2259 tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31)); 2260 /* bit 30, Z */ 2261 tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30)); 2262 tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0); 2263 /* bit 29, C */ 2264 tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29)); 2265 tcg_gen_shri_i32(cpu_CF, cpu_CF, 29); 2266 /* bit 28, V */ 2267 tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28)); 2268 tcg_gen_shli_i32(cpu_VF, cpu_VF, 3); 2269 } 2270 2271 static void gen_sysreg_undef(DisasContext *s, bool isread, 2272 uint8_t op0, uint8_t op1, uint8_t op2, 2273 uint8_t crn, uint8_t crm, uint8_t rt) 2274 { 2275 /* 2276 * Generate code to emit an UNDEF with correct syndrome 2277 * information for a failed system register access. 2278 * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases, 2279 * but if FEAT_IDST is implemented then read accesses to registers 2280 * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP 2281 * syndrome. 2282 */ 2283 uint32_t syndrome; 2284 2285 if (isread && dc_isar_feature(aa64_ids, s) && 2286 arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) { 2287 syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); 2288 } else { 2289 syndrome = syn_uncategorized(); 2290 } 2291 gen_exception_insn(s, 0, EXCP_UDEF, syndrome); 2292 } 2293 2294 /* MRS - move from system register 2295 * MSR (register) - move to system register 2296 * SYS 2297 * SYSL 2298 * These are all essentially the same insn in 'read' and 'write' 2299 * versions, with varying op0 fields. 2300 */ 2301 static void handle_sys(DisasContext *s, bool isread, 2302 unsigned int op0, unsigned int op1, unsigned int op2, 2303 unsigned int crn, unsigned int crm, unsigned int rt) 2304 { 2305 uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, 2306 crn, crm, op0, op1, op2); 2307 const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key); 2308 bool need_exit_tb = false; 2309 bool nv_trap_to_el2 = false; 2310 bool nv_redirect_reg = false; 2311 bool skip_fp_access_checks = false; 2312 bool nv2_mem_redirect = false; 2313 TCGv_ptr tcg_ri = NULL; 2314 TCGv_i64 tcg_rt; 2315 uint32_t syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); 2316 2317 if (crn == 11 || crn == 15) { 2318 /* 2319 * Check for TIDCP trap, which must take precedence over 2320 * the UNDEF for "no such register" etc. 2321 */ 2322 switch (s->current_el) { 2323 case 0: 2324 if (dc_isar_feature(aa64_tidcp1, s)) { 2325 gen_helper_tidcp_el0(tcg_env, tcg_constant_i32(syndrome)); 2326 } 2327 break; 2328 case 1: 2329 gen_helper_tidcp_el1(tcg_env, tcg_constant_i32(syndrome)); 2330 break; 2331 } 2332 } 2333 2334 if (!ri) { 2335 /* Unknown register; this might be a guest error or a QEMU 2336 * unimplemented feature. 2337 */ 2338 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 " 2339 "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n", 2340 isread ? "read" : "write", op0, op1, crn, crm, op2); 2341 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt); 2342 return; 2343 } 2344 2345 if (s->nv2 && ri->nv2_redirect_offset) { 2346 /* 2347 * Some registers always redirect to memory; some only do so if 2348 * HCR_EL2.NV1 is 0, and some only if NV1 is 1 (these come in 2349 * pairs which share an offset; see the table in R_CSRPQ). 2350 */ 2351 if (ri->nv2_redirect_offset & NV2_REDIR_NV1) { 2352 nv2_mem_redirect = s->nv1; 2353 } else if (ri->nv2_redirect_offset & NV2_REDIR_NO_NV1) { 2354 nv2_mem_redirect = !s->nv1; 2355 } else { 2356 nv2_mem_redirect = true; 2357 } 2358 } 2359 2360 /* Check access permissions */ 2361 if (!cp_access_ok(s->current_el, ri, isread)) { 2362 /* 2363 * FEAT_NV/NV2 handling does not do the usual FP access checks 2364 * for registers only accessible at EL2 (though it *does* do them 2365 * for registers accessible at EL1). 2366 */ 2367 skip_fp_access_checks = true; 2368 if (s->nv2 && (ri->type & ARM_CP_NV2_REDIRECT)) { 2369 /* 2370 * This is one of the few EL2 registers which should redirect 2371 * to the equivalent EL1 register. We do that after running 2372 * the EL2 register's accessfn. 2373 */ 2374 nv_redirect_reg = true; 2375 assert(!nv2_mem_redirect); 2376 } else if (nv2_mem_redirect) { 2377 /* 2378 * NV2 redirect-to-memory takes precedence over trap to EL2 or 2379 * UNDEF to EL1. 2380 */ 2381 } else if (s->nv && arm_cpreg_traps_in_nv(ri)) { 2382 /* 2383 * This register / instruction exists and is an EL2 register, so 2384 * we must trap to EL2 if accessed in nested virtualization EL1 2385 * instead of UNDEFing. We'll do that after the usual access checks. 2386 * (This makes a difference only for a couple of registers like 2387 * VSTTBR_EL2 where the "UNDEF if NonSecure" should take priority 2388 * over the trap-to-EL2. Most trapped-by-FEAT_NV registers have 2389 * an accessfn which does nothing when called from EL1, because 2390 * the trap-to-EL3 controls which would apply to that register 2391 * at EL2 don't take priority over the FEAT_NV trap-to-EL2.) 2392 */ 2393 nv_trap_to_el2 = true; 2394 } else { 2395 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt); 2396 return; 2397 } 2398 } 2399 2400 if (ri->accessfn || (ri->fgt && s->fgt_active)) { 2401 /* Emit code to perform further access permissions checks at 2402 * runtime; this may result in an exception. 2403 */ 2404 gen_a64_update_pc(s, 0); 2405 tcg_ri = tcg_temp_new_ptr(); 2406 gen_helper_access_check_cp_reg(tcg_ri, tcg_env, 2407 tcg_constant_i32(key), 2408 tcg_constant_i32(syndrome), 2409 tcg_constant_i32(isread)); 2410 } else if (ri->type & ARM_CP_RAISES_EXC) { 2411 /* 2412 * The readfn or writefn might raise an exception; 2413 * synchronize the CPU state in case it does. 2414 */ 2415 gen_a64_update_pc(s, 0); 2416 } 2417 2418 if (!skip_fp_access_checks) { 2419 if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) { 2420 return; 2421 } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) { 2422 return; 2423 } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) { 2424 return; 2425 } 2426 } 2427 2428 if (nv_trap_to_el2) { 2429 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2); 2430 return; 2431 } 2432 2433 if (nv_redirect_reg) { 2434 /* 2435 * FEAT_NV2 redirection of an EL2 register to an EL1 register. 2436 * Conveniently in all cases the encoding of the EL1 register is 2437 * identical to the EL2 register except that opc1 is 0. 2438 * Get the reginfo for the EL1 register to use for the actual access. 2439 * We don't use the EL1 register's access function, and 2440 * fine-grained-traps on EL1 also do not apply here. 2441 */ 2442 key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, 2443 crn, crm, op0, 0, op2); 2444 ri = get_arm_cp_reginfo(s->cp_regs, key); 2445 assert(ri); 2446 assert(cp_access_ok(s->current_el, ri, isread)); 2447 /* 2448 * We might not have done an update_pc earlier, so check we don't 2449 * need it. We could support this in future if necessary. 2450 */ 2451 assert(!(ri->type & ARM_CP_RAISES_EXC)); 2452 } 2453 2454 if (nv2_mem_redirect) { 2455 /* 2456 * This system register is being redirected into an EL2 memory access. 2457 * This means it is not an IO operation, doesn't change hflags, 2458 * and need not end the TB, because it has no side effects. 2459 * 2460 * The access is 64-bit single copy atomic, guaranteed aligned because 2461 * of the definition of VCNR_EL2. Its endianness depends on 2462 * SCTLR_EL2.EE, not on the data endianness of EL1. 2463 * It is done under either the EL2 translation regime or the EL2&0 2464 * translation regime, depending on HCR_EL2.E2H. It behaves as if 2465 * PSTATE.PAN is 0. 2466 */ 2467 TCGv_i64 ptr = tcg_temp_new_i64(); 2468 MemOp mop = MO_64 | MO_ALIGN | MO_ATOM_IFALIGN; 2469 ARMMMUIdx armmemidx = s->nv2_mem_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_E2; 2470 int memidx = arm_to_core_mmu_idx(armmemidx); 2471 uint32_t syn; 2472 2473 mop |= (s->nv2_mem_be ? MO_BE : MO_LE); 2474 2475 tcg_gen_ld_i64(ptr, tcg_env, offsetof(CPUARMState, cp15.vncr_el2)); 2476 tcg_gen_addi_i64(ptr, ptr, 2477 (ri->nv2_redirect_offset & ~NV2_REDIR_FLAG_MASK)); 2478 tcg_rt = cpu_reg(s, rt); 2479 2480 syn = syn_data_abort_vncr(0, !isread, 0); 2481 disas_set_insn_syndrome(s, syn); 2482 if (isread) { 2483 tcg_gen_qemu_ld_i64(tcg_rt, ptr, memidx, mop); 2484 } else { 2485 tcg_gen_qemu_st_i64(tcg_rt, ptr, memidx, mop); 2486 } 2487 return; 2488 } 2489 2490 /* Handle special cases first */ 2491 switch (ri->type & ARM_CP_SPECIAL_MASK) { 2492 case 0: 2493 break; 2494 case ARM_CP_NOP: 2495 return; 2496 case ARM_CP_NZCV: 2497 tcg_rt = cpu_reg(s, rt); 2498 if (isread) { 2499 gen_get_nzcv(tcg_rt); 2500 } else { 2501 gen_set_nzcv(tcg_rt); 2502 } 2503 return; 2504 case ARM_CP_CURRENTEL: 2505 { 2506 /* 2507 * Reads as current EL value from pstate, which is 2508 * guaranteed to be constant by the tb flags. 2509 * For nested virt we should report EL2. 2510 */ 2511 int el = s->nv ? 2 : s->current_el; 2512 tcg_rt = cpu_reg(s, rt); 2513 tcg_gen_movi_i64(tcg_rt, el << 2); 2514 return; 2515 } 2516 case ARM_CP_DC_ZVA: 2517 /* Writes clear the aligned block of memory which rt points into. */ 2518 if (s->mte_active[0]) { 2519 int desc = 0; 2520 2521 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 2522 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 2523 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 2524 2525 tcg_rt = tcg_temp_new_i64(); 2526 gen_helper_mte_check_zva(tcg_rt, tcg_env, 2527 tcg_constant_i32(desc), cpu_reg(s, rt)); 2528 } else { 2529 tcg_rt = clean_data_tbi(s, cpu_reg(s, rt)); 2530 } 2531 gen_helper_dc_zva(tcg_env, tcg_rt); 2532 return; 2533 case ARM_CP_DC_GVA: 2534 { 2535 TCGv_i64 clean_addr, tag; 2536 2537 /* 2538 * DC_GVA, like DC_ZVA, requires that we supply the original 2539 * pointer for an invalid page. Probe that address first. 2540 */ 2541 tcg_rt = cpu_reg(s, rt); 2542 clean_addr = clean_data_tbi(s, tcg_rt); 2543 gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8); 2544 2545 if (s->ata[0]) { 2546 /* Extract the tag from the register to match STZGM. */ 2547 tag = tcg_temp_new_i64(); 2548 tcg_gen_shri_i64(tag, tcg_rt, 56); 2549 gen_helper_stzgm_tags(tcg_env, clean_addr, tag); 2550 } 2551 } 2552 return; 2553 case ARM_CP_DC_GZVA: 2554 { 2555 TCGv_i64 clean_addr, tag; 2556 2557 /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */ 2558 tcg_rt = cpu_reg(s, rt); 2559 clean_addr = clean_data_tbi(s, tcg_rt); 2560 gen_helper_dc_zva(tcg_env, clean_addr); 2561 2562 if (s->ata[0]) { 2563 /* Extract the tag from the register to match STZGM. */ 2564 tag = tcg_temp_new_i64(); 2565 tcg_gen_shri_i64(tag, tcg_rt, 56); 2566 gen_helper_stzgm_tags(tcg_env, clean_addr, tag); 2567 } 2568 } 2569 return; 2570 default: 2571 g_assert_not_reached(); 2572 } 2573 2574 if (ri->type & ARM_CP_IO) { 2575 /* I/O operations must end the TB here (whether read or write) */ 2576 need_exit_tb = translator_io_start(&s->base); 2577 } 2578 2579 tcg_rt = cpu_reg(s, rt); 2580 2581 if (isread) { 2582 if (ri->type & ARM_CP_CONST) { 2583 tcg_gen_movi_i64(tcg_rt, ri->resetvalue); 2584 } else if (ri->readfn) { 2585 if (!tcg_ri) { 2586 tcg_ri = gen_lookup_cp_reg(key); 2587 } 2588 gen_helper_get_cp_reg64(tcg_rt, tcg_env, tcg_ri); 2589 } else { 2590 tcg_gen_ld_i64(tcg_rt, tcg_env, ri->fieldoffset); 2591 } 2592 } else { 2593 if (ri->type & ARM_CP_CONST) { 2594 /* If not forbidden by access permissions, treat as WI */ 2595 return; 2596 } else if (ri->writefn) { 2597 if (!tcg_ri) { 2598 tcg_ri = gen_lookup_cp_reg(key); 2599 } 2600 gen_helper_set_cp_reg64(tcg_env, tcg_ri, tcg_rt); 2601 } else { 2602 tcg_gen_st_i64(tcg_rt, tcg_env, ri->fieldoffset); 2603 } 2604 } 2605 2606 if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) { 2607 /* 2608 * A write to any coprocessor register that ends a TB 2609 * must rebuild the hflags for the next TB. 2610 */ 2611 gen_rebuild_hflags(s); 2612 /* 2613 * We default to ending the TB on a coprocessor register write, 2614 * but allow this to be suppressed by the register definition 2615 * (usually only necessary to work around guest bugs). 2616 */ 2617 need_exit_tb = true; 2618 } 2619 if (need_exit_tb) { 2620 s->base.is_jmp = DISAS_UPDATE_EXIT; 2621 } 2622 } 2623 2624 static bool trans_SYS(DisasContext *s, arg_SYS *a) 2625 { 2626 handle_sys(s, a->l, a->op0, a->op1, a->op2, a->crn, a->crm, a->rt); 2627 return true; 2628 } 2629 2630 static bool trans_SVC(DisasContext *s, arg_i *a) 2631 { 2632 /* 2633 * For SVC, HVC and SMC we advance the single-step state 2634 * machine before taking the exception. This is architecturally 2635 * mandated, to ensure that single-stepping a system call 2636 * instruction works properly. 2637 */ 2638 uint32_t syndrome = syn_aa64_svc(a->imm); 2639 if (s->fgt_svc) { 2640 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2); 2641 return true; 2642 } 2643 gen_ss_advance(s); 2644 gen_exception_insn(s, 4, EXCP_SWI, syndrome); 2645 return true; 2646 } 2647 2648 static bool trans_HVC(DisasContext *s, arg_i *a) 2649 { 2650 int target_el = s->current_el == 3 ? 3 : 2; 2651 2652 if (s->current_el == 0) { 2653 unallocated_encoding(s); 2654 return true; 2655 } 2656 /* 2657 * The pre HVC helper handles cases when HVC gets trapped 2658 * as an undefined insn by runtime configuration. 2659 */ 2660 gen_a64_update_pc(s, 0); 2661 gen_helper_pre_hvc(tcg_env); 2662 /* Architecture requires ss advance before we do the actual work */ 2663 gen_ss_advance(s); 2664 gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(a->imm), target_el); 2665 return true; 2666 } 2667 2668 static bool trans_SMC(DisasContext *s, arg_i *a) 2669 { 2670 if (s->current_el == 0) { 2671 unallocated_encoding(s); 2672 return true; 2673 } 2674 gen_a64_update_pc(s, 0); 2675 gen_helper_pre_smc(tcg_env, tcg_constant_i32(syn_aa64_smc(a->imm))); 2676 /* Architecture requires ss advance before we do the actual work */ 2677 gen_ss_advance(s); 2678 gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(a->imm), 3); 2679 return true; 2680 } 2681 2682 static bool trans_BRK(DisasContext *s, arg_i *a) 2683 { 2684 gen_exception_bkpt_insn(s, syn_aa64_bkpt(a->imm)); 2685 return true; 2686 } 2687 2688 static bool trans_HLT(DisasContext *s, arg_i *a) 2689 { 2690 /* 2691 * HLT. This has two purposes. 2692 * Architecturally, it is an external halting debug instruction. 2693 * Since QEMU doesn't implement external debug, we treat this as 2694 * it is required for halting debug disabled: it will UNDEF. 2695 * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction. 2696 */ 2697 if (semihosting_enabled(s->current_el == 0) && a->imm == 0xf000) { 2698 gen_exception_internal_insn(s, EXCP_SEMIHOST); 2699 } else { 2700 unallocated_encoding(s); 2701 } 2702 return true; 2703 } 2704 2705 /* 2706 * Load/Store exclusive instructions are implemented by remembering 2707 * the value/address loaded, and seeing if these are the same 2708 * when the store is performed. This is not actually the architecturally 2709 * mandated semantics, but it works for typical guest code sequences 2710 * and avoids having to monitor regular stores. 2711 * 2712 * The store exclusive uses the atomic cmpxchg primitives to avoid 2713 * races in multi-threaded linux-user and when MTTCG softmmu is 2714 * enabled. 2715 */ 2716 static void gen_load_exclusive(DisasContext *s, int rt, int rt2, int rn, 2717 int size, bool is_pair) 2718 { 2719 int idx = get_mem_index(s); 2720 TCGv_i64 dirty_addr, clean_addr; 2721 MemOp memop = check_atomic_align(s, rn, size + is_pair); 2722 2723 s->is_ldex = true; 2724 dirty_addr = cpu_reg_sp(s, rn); 2725 clean_addr = gen_mte_check1(s, dirty_addr, false, rn != 31, memop); 2726 2727 g_assert(size <= 3); 2728 if (is_pair) { 2729 g_assert(size >= 2); 2730 if (size == 2) { 2731 tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop); 2732 if (s->be_data == MO_LE) { 2733 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32); 2734 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32); 2735 } else { 2736 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32); 2737 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32); 2738 } 2739 } else { 2740 TCGv_i128 t16 = tcg_temp_new_i128(); 2741 2742 tcg_gen_qemu_ld_i128(t16, clean_addr, idx, memop); 2743 2744 if (s->be_data == MO_LE) { 2745 tcg_gen_extr_i128_i64(cpu_exclusive_val, 2746 cpu_exclusive_high, t16); 2747 } else { 2748 tcg_gen_extr_i128_i64(cpu_exclusive_high, 2749 cpu_exclusive_val, t16); 2750 } 2751 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); 2752 tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high); 2753 } 2754 } else { 2755 tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop); 2756 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); 2757 } 2758 tcg_gen_mov_i64(cpu_exclusive_addr, clean_addr); 2759 } 2760 2761 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, 2762 int rn, int size, int is_pair) 2763 { 2764 /* if (env->exclusive_addr == addr && env->exclusive_val == [addr] 2765 * && (!is_pair || env->exclusive_high == [addr + datasize])) { 2766 * [addr] = {Rt}; 2767 * if (is_pair) { 2768 * [addr + datasize] = {Rt2}; 2769 * } 2770 * {Rd} = 0; 2771 * } else { 2772 * {Rd} = 1; 2773 * } 2774 * env->exclusive_addr = -1; 2775 */ 2776 TCGLabel *fail_label = gen_new_label(); 2777 TCGLabel *done_label = gen_new_label(); 2778 TCGv_i64 tmp, clean_addr; 2779 MemOp memop; 2780 2781 /* 2782 * FIXME: We are out of spec here. We have recorded only the address 2783 * from load_exclusive, not the entire range, and we assume that the 2784 * size of the access on both sides match. The architecture allows the 2785 * store to be smaller than the load, so long as the stored bytes are 2786 * within the range recorded by the load. 2787 */ 2788 2789 /* See AArch64.ExclusiveMonitorsPass() and AArch64.IsExclusiveVA(). */ 2790 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); 2791 tcg_gen_brcond_i64(TCG_COND_NE, clean_addr, cpu_exclusive_addr, fail_label); 2792 2793 /* 2794 * The write, and any associated faults, only happen if the virtual 2795 * and physical addresses pass the exclusive monitor check. These 2796 * faults are exceedingly unlikely, because normally the guest uses 2797 * the exact same address register for the load_exclusive, and we 2798 * would have recognized these faults there. 2799 * 2800 * It is possible to trigger an alignment fault pre-LSE2, e.g. with an 2801 * unaligned 4-byte write within the range of an aligned 8-byte load. 2802 * With LSE2, the store would need to cross a 16-byte boundary when the 2803 * load did not, which would mean the store is outside the range 2804 * recorded for the monitor, which would have failed a corrected monitor 2805 * check above. For now, we assume no size change and retain the 2806 * MO_ALIGN to let tcg know what we checked in the load_exclusive. 2807 * 2808 * It is possible to trigger an MTE fault, by performing the load with 2809 * a virtual address with a valid tag and performing the store with the 2810 * same virtual address and a different invalid tag. 2811 */ 2812 memop = size + is_pair; 2813 if (memop == MO_128 || !dc_isar_feature(aa64_lse2, s)) { 2814 memop |= MO_ALIGN; 2815 } 2816 memop = finalize_memop(s, memop); 2817 gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); 2818 2819 tmp = tcg_temp_new_i64(); 2820 if (is_pair) { 2821 if (size == 2) { 2822 if (s->be_data == MO_LE) { 2823 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2)); 2824 } else { 2825 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt)); 2826 } 2827 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, 2828 cpu_exclusive_val, tmp, 2829 get_mem_index(s), memop); 2830 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); 2831 } else { 2832 TCGv_i128 t16 = tcg_temp_new_i128(); 2833 TCGv_i128 c16 = tcg_temp_new_i128(); 2834 TCGv_i64 a, b; 2835 2836 if (s->be_data == MO_LE) { 2837 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2)); 2838 tcg_gen_concat_i64_i128(c16, cpu_exclusive_val, 2839 cpu_exclusive_high); 2840 } else { 2841 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt)); 2842 tcg_gen_concat_i64_i128(c16, cpu_exclusive_high, 2843 cpu_exclusive_val); 2844 } 2845 2846 tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16, 2847 get_mem_index(s), memop); 2848 2849 a = tcg_temp_new_i64(); 2850 b = tcg_temp_new_i64(); 2851 if (s->be_data == MO_LE) { 2852 tcg_gen_extr_i128_i64(a, b, t16); 2853 } else { 2854 tcg_gen_extr_i128_i64(b, a, t16); 2855 } 2856 2857 tcg_gen_xor_i64(a, a, cpu_exclusive_val); 2858 tcg_gen_xor_i64(b, b, cpu_exclusive_high); 2859 tcg_gen_or_i64(tmp, a, b); 2860 2861 tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0); 2862 } 2863 } else { 2864 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val, 2865 cpu_reg(s, rt), get_mem_index(s), memop); 2866 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); 2867 } 2868 tcg_gen_mov_i64(cpu_reg(s, rd), tmp); 2869 tcg_gen_br(done_label); 2870 2871 gen_set_label(fail_label); 2872 tcg_gen_movi_i64(cpu_reg(s, rd), 1); 2873 gen_set_label(done_label); 2874 tcg_gen_movi_i64(cpu_exclusive_addr, -1); 2875 } 2876 2877 static void gen_compare_and_swap(DisasContext *s, int rs, int rt, 2878 int rn, int size) 2879 { 2880 TCGv_i64 tcg_rs = cpu_reg(s, rs); 2881 TCGv_i64 tcg_rt = cpu_reg(s, rt); 2882 int memidx = get_mem_index(s); 2883 TCGv_i64 clean_addr; 2884 MemOp memop; 2885 2886 if (rn == 31) { 2887 gen_check_sp_alignment(s); 2888 } 2889 memop = check_atomic_align(s, rn, size); 2890 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); 2891 tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, 2892 memidx, memop); 2893 } 2894 2895 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, 2896 int rn, int size) 2897 { 2898 TCGv_i64 s1 = cpu_reg(s, rs); 2899 TCGv_i64 s2 = cpu_reg(s, rs + 1); 2900 TCGv_i64 t1 = cpu_reg(s, rt); 2901 TCGv_i64 t2 = cpu_reg(s, rt + 1); 2902 TCGv_i64 clean_addr; 2903 int memidx = get_mem_index(s); 2904 MemOp memop; 2905 2906 if (rn == 31) { 2907 gen_check_sp_alignment(s); 2908 } 2909 2910 /* This is a single atomic access, despite the "pair". */ 2911 memop = check_atomic_align(s, rn, size + 1); 2912 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); 2913 2914 if (size == 2) { 2915 TCGv_i64 cmp = tcg_temp_new_i64(); 2916 TCGv_i64 val = tcg_temp_new_i64(); 2917 2918 if (s->be_data == MO_LE) { 2919 tcg_gen_concat32_i64(val, t1, t2); 2920 tcg_gen_concat32_i64(cmp, s1, s2); 2921 } else { 2922 tcg_gen_concat32_i64(val, t2, t1); 2923 tcg_gen_concat32_i64(cmp, s2, s1); 2924 } 2925 2926 tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx, memop); 2927 2928 if (s->be_data == MO_LE) { 2929 tcg_gen_extr32_i64(s1, s2, cmp); 2930 } else { 2931 tcg_gen_extr32_i64(s2, s1, cmp); 2932 } 2933 } else { 2934 TCGv_i128 cmp = tcg_temp_new_i128(); 2935 TCGv_i128 val = tcg_temp_new_i128(); 2936 2937 if (s->be_data == MO_LE) { 2938 tcg_gen_concat_i64_i128(val, t1, t2); 2939 tcg_gen_concat_i64_i128(cmp, s1, s2); 2940 } else { 2941 tcg_gen_concat_i64_i128(val, t2, t1); 2942 tcg_gen_concat_i64_i128(cmp, s2, s1); 2943 } 2944 2945 tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx, memop); 2946 2947 if (s->be_data == MO_LE) { 2948 tcg_gen_extr_i128_i64(s1, s2, cmp); 2949 } else { 2950 tcg_gen_extr_i128_i64(s2, s1, cmp); 2951 } 2952 } 2953 } 2954 2955 /* 2956 * Compute the ISS.SF bit for syndrome information if an exception 2957 * is taken on a load or store. This indicates whether the instruction 2958 * is accessing a 32-bit or 64-bit register. This logic is derived 2959 * from the ARMv8 specs for LDR (Shared decode for all encodings). 2960 */ 2961 static bool ldst_iss_sf(int size, bool sign, bool ext) 2962 { 2963 2964 if (sign) { 2965 /* 2966 * Signed loads are 64 bit results if we are not going to 2967 * do a zero-extend from 32 to 64 after the load. 2968 * (For a store, sign and ext are always false.) 2969 */ 2970 return !ext; 2971 } else { 2972 /* Unsigned loads/stores work at the specified size */ 2973 return size == MO_64; 2974 } 2975 } 2976 2977 static bool trans_STXR(DisasContext *s, arg_stxr *a) 2978 { 2979 if (a->rn == 31) { 2980 gen_check_sp_alignment(s); 2981 } 2982 if (a->lasr) { 2983 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 2984 } 2985 gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, false); 2986 return true; 2987 } 2988 2989 static bool trans_LDXR(DisasContext *s, arg_stxr *a) 2990 { 2991 if (a->rn == 31) { 2992 gen_check_sp_alignment(s); 2993 } 2994 gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, false); 2995 if (a->lasr) { 2996 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 2997 } 2998 return true; 2999 } 3000 3001 static bool trans_STLR(DisasContext *s, arg_stlr *a) 3002 { 3003 TCGv_i64 clean_addr; 3004 MemOp memop; 3005 bool iss_sf = ldst_iss_sf(a->sz, false, false); 3006 3007 /* 3008 * StoreLORelease is the same as Store-Release for QEMU, but 3009 * needs the feature-test. 3010 */ 3011 if (!a->lasr && !dc_isar_feature(aa64_lor, s)) { 3012 return false; 3013 } 3014 /* Generate ISS for non-exclusive accesses including LASR. */ 3015 if (a->rn == 31) { 3016 gen_check_sp_alignment(s); 3017 } 3018 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 3019 memop = check_ordered_align(s, a->rn, 0, true, a->sz); 3020 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), 3021 true, a->rn != 31, memop); 3022 do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, memop, true, a->rt, 3023 iss_sf, a->lasr); 3024 return true; 3025 } 3026 3027 static bool trans_LDAR(DisasContext *s, arg_stlr *a) 3028 { 3029 TCGv_i64 clean_addr; 3030 MemOp memop; 3031 bool iss_sf = ldst_iss_sf(a->sz, false, false); 3032 3033 /* LoadLOAcquire is the same as Load-Acquire for QEMU. */ 3034 if (!a->lasr && !dc_isar_feature(aa64_lor, s)) { 3035 return false; 3036 } 3037 /* Generate ISS for non-exclusive accesses including LASR. */ 3038 if (a->rn == 31) { 3039 gen_check_sp_alignment(s); 3040 } 3041 memop = check_ordered_align(s, a->rn, 0, false, a->sz); 3042 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), 3043 false, a->rn != 31, memop); 3044 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, memop, false, true, 3045 a->rt, iss_sf, a->lasr); 3046 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3047 return true; 3048 } 3049 3050 static bool trans_STXP(DisasContext *s, arg_stxr *a) 3051 { 3052 if (a->rn == 31) { 3053 gen_check_sp_alignment(s); 3054 } 3055 if (a->lasr) { 3056 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 3057 } 3058 gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, true); 3059 return true; 3060 } 3061 3062 static bool trans_LDXP(DisasContext *s, arg_stxr *a) 3063 { 3064 if (a->rn == 31) { 3065 gen_check_sp_alignment(s); 3066 } 3067 gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, true); 3068 if (a->lasr) { 3069 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3070 } 3071 return true; 3072 } 3073 3074 static bool trans_CASP(DisasContext *s, arg_CASP *a) 3075 { 3076 if (!dc_isar_feature(aa64_atomics, s)) { 3077 return false; 3078 } 3079 if (((a->rt | a->rs) & 1) != 0) { 3080 return false; 3081 } 3082 3083 gen_compare_and_swap_pair(s, a->rs, a->rt, a->rn, a->sz); 3084 return true; 3085 } 3086 3087 static bool trans_CAS(DisasContext *s, arg_CAS *a) 3088 { 3089 if (!dc_isar_feature(aa64_atomics, s)) { 3090 return false; 3091 } 3092 gen_compare_and_swap(s, a->rs, a->rt, a->rn, a->sz); 3093 return true; 3094 } 3095 3096 static bool trans_LD_lit(DisasContext *s, arg_ldlit *a) 3097 { 3098 bool iss_sf = ldst_iss_sf(a->sz, a->sign, false); 3099 TCGv_i64 tcg_rt = cpu_reg(s, a->rt); 3100 TCGv_i64 clean_addr = tcg_temp_new_i64(); 3101 MemOp memop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3102 3103 gen_pc_plus_diff(s, clean_addr, a->imm); 3104 do_gpr_ld(s, tcg_rt, clean_addr, memop, 3105 false, true, a->rt, iss_sf, false); 3106 return true; 3107 } 3108 3109 static bool trans_LD_lit_v(DisasContext *s, arg_ldlit *a) 3110 { 3111 /* Load register (literal), vector version */ 3112 TCGv_i64 clean_addr; 3113 MemOp memop; 3114 3115 if (!fp_access_check(s)) { 3116 return true; 3117 } 3118 memop = finalize_memop_asimd(s, a->sz); 3119 clean_addr = tcg_temp_new_i64(); 3120 gen_pc_plus_diff(s, clean_addr, a->imm); 3121 do_fp_ld(s, a->rt, clean_addr, memop); 3122 return true; 3123 } 3124 3125 static void op_addr_ldstpair_pre(DisasContext *s, arg_ldstpair *a, 3126 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, 3127 uint64_t offset, bool is_store, MemOp mop) 3128 { 3129 if (a->rn == 31) { 3130 gen_check_sp_alignment(s); 3131 } 3132 3133 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3134 if (!a->p) { 3135 tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset); 3136 } 3137 3138 *clean_addr = gen_mte_checkN(s, *dirty_addr, is_store, 3139 (a->w || a->rn != 31), 2 << a->sz, mop); 3140 } 3141 3142 static void op_addr_ldstpair_post(DisasContext *s, arg_ldstpair *a, 3143 TCGv_i64 dirty_addr, uint64_t offset) 3144 { 3145 if (a->w) { 3146 if (a->p) { 3147 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3148 } 3149 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); 3150 } 3151 } 3152 3153 static bool trans_STP(DisasContext *s, arg_ldstpair *a) 3154 { 3155 uint64_t offset = a->imm << a->sz; 3156 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; 3157 MemOp mop = finalize_memop(s, a->sz); 3158 3159 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop); 3160 tcg_rt = cpu_reg(s, a->rt); 3161 tcg_rt2 = cpu_reg(s, a->rt2); 3162 /* 3163 * We built mop above for the single logical access -- rebuild it 3164 * now for the paired operation. 3165 * 3166 * With LSE2, non-sign-extending pairs are treated atomically if 3167 * aligned, and if unaligned one of the pair will be completely 3168 * within a 16-byte block and that element will be atomic. 3169 * Otherwise each element is separately atomic. 3170 * In all cases, issue one operation with the correct atomicity. 3171 */ 3172 mop = a->sz + 1; 3173 if (s->align_mem) { 3174 mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8); 3175 } 3176 mop = finalize_memop_pair(s, mop); 3177 if (a->sz == 2) { 3178 TCGv_i64 tmp = tcg_temp_new_i64(); 3179 3180 if (s->be_data == MO_LE) { 3181 tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2); 3182 } else { 3183 tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt); 3184 } 3185 tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop); 3186 } else { 3187 TCGv_i128 tmp = tcg_temp_new_i128(); 3188 3189 if (s->be_data == MO_LE) { 3190 tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2); 3191 } else { 3192 tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt); 3193 } 3194 tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop); 3195 } 3196 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3197 return true; 3198 } 3199 3200 static bool trans_LDP(DisasContext *s, arg_ldstpair *a) 3201 { 3202 uint64_t offset = a->imm << a->sz; 3203 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; 3204 MemOp mop = finalize_memop(s, a->sz); 3205 3206 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop); 3207 tcg_rt = cpu_reg(s, a->rt); 3208 tcg_rt2 = cpu_reg(s, a->rt2); 3209 3210 /* 3211 * We built mop above for the single logical access -- rebuild it 3212 * now for the paired operation. 3213 * 3214 * With LSE2, non-sign-extending pairs are treated atomically if 3215 * aligned, and if unaligned one of the pair will be completely 3216 * within a 16-byte block and that element will be atomic. 3217 * Otherwise each element is separately atomic. 3218 * In all cases, issue one operation with the correct atomicity. 3219 * 3220 * This treats sign-extending loads like zero-extending loads, 3221 * since that reuses the most code below. 3222 */ 3223 mop = a->sz + 1; 3224 if (s->align_mem) { 3225 mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8); 3226 } 3227 mop = finalize_memop_pair(s, mop); 3228 if (a->sz == 2) { 3229 int o2 = s->be_data == MO_LE ? 32 : 0; 3230 int o1 = o2 ^ 32; 3231 3232 tcg_gen_qemu_ld_i64(tcg_rt, clean_addr, get_mem_index(s), mop); 3233 if (a->sign) { 3234 tcg_gen_sextract_i64(tcg_rt2, tcg_rt, o2, 32); 3235 tcg_gen_sextract_i64(tcg_rt, tcg_rt, o1, 32); 3236 } else { 3237 tcg_gen_extract_i64(tcg_rt2, tcg_rt, o2, 32); 3238 tcg_gen_extract_i64(tcg_rt, tcg_rt, o1, 32); 3239 } 3240 } else { 3241 TCGv_i128 tmp = tcg_temp_new_i128(); 3242 3243 tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop); 3244 if (s->be_data == MO_LE) { 3245 tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp); 3246 } else { 3247 tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp); 3248 } 3249 } 3250 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3251 return true; 3252 } 3253 3254 static bool trans_STP_v(DisasContext *s, arg_ldstpair *a) 3255 { 3256 uint64_t offset = a->imm << a->sz; 3257 TCGv_i64 clean_addr, dirty_addr; 3258 MemOp mop; 3259 3260 if (!fp_access_check(s)) { 3261 return true; 3262 } 3263 3264 /* LSE2 does not merge FP pairs; leave these as separate operations. */ 3265 mop = finalize_memop_asimd(s, a->sz); 3266 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop); 3267 do_fp_st(s, a->rt, clean_addr, mop); 3268 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz); 3269 do_fp_st(s, a->rt2, clean_addr, mop); 3270 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3271 return true; 3272 } 3273 3274 static bool trans_LDP_v(DisasContext *s, arg_ldstpair *a) 3275 { 3276 uint64_t offset = a->imm << a->sz; 3277 TCGv_i64 clean_addr, dirty_addr; 3278 MemOp mop; 3279 3280 if (!fp_access_check(s)) { 3281 return true; 3282 } 3283 3284 /* LSE2 does not merge FP pairs; leave these as separate operations. */ 3285 mop = finalize_memop_asimd(s, a->sz); 3286 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop); 3287 do_fp_ld(s, a->rt, clean_addr, mop); 3288 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz); 3289 do_fp_ld(s, a->rt2, clean_addr, mop); 3290 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3291 return true; 3292 } 3293 3294 static bool trans_STGP(DisasContext *s, arg_ldstpair *a) 3295 { 3296 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; 3297 uint64_t offset = a->imm << LOG2_TAG_GRANULE; 3298 MemOp mop; 3299 TCGv_i128 tmp; 3300 3301 /* STGP only comes in one size. */ 3302 tcg_debug_assert(a->sz == MO_64); 3303 3304 if (!dc_isar_feature(aa64_mte_insn_reg, s)) { 3305 return false; 3306 } 3307 3308 if (a->rn == 31) { 3309 gen_check_sp_alignment(s); 3310 } 3311 3312 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3313 if (!a->p) { 3314 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3315 } 3316 3317 clean_addr = clean_data_tbi(s, dirty_addr); 3318 tcg_rt = cpu_reg(s, a->rt); 3319 tcg_rt2 = cpu_reg(s, a->rt2); 3320 3321 /* 3322 * STGP is defined as two 8-byte memory operations, aligned to TAG_GRANULE, 3323 * and one tag operation. We implement it as one single aligned 16-byte 3324 * memory operation for convenience. Note that the alignment ensures 3325 * MO_ATOM_IFALIGN_PAIR produces 8-byte atomicity for the memory store. 3326 */ 3327 mop = finalize_memop_atom(s, MO_128 | MO_ALIGN, MO_ATOM_IFALIGN_PAIR); 3328 3329 tmp = tcg_temp_new_i128(); 3330 if (s->be_data == MO_LE) { 3331 tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2); 3332 } else { 3333 tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt); 3334 } 3335 tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop); 3336 3337 /* Perform the tag store, if tag access enabled. */ 3338 if (s->ata[0]) { 3339 if (tb_cflags(s->base.tb) & CF_PARALLEL) { 3340 gen_helper_stg_parallel(tcg_env, dirty_addr, dirty_addr); 3341 } else { 3342 gen_helper_stg(tcg_env, dirty_addr, dirty_addr); 3343 } 3344 } 3345 3346 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3347 return true; 3348 } 3349 3350 static void op_addr_ldst_imm_pre(DisasContext *s, arg_ldst_imm *a, 3351 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, 3352 uint64_t offset, bool is_store, MemOp mop) 3353 { 3354 int memidx; 3355 3356 if (a->rn == 31) { 3357 gen_check_sp_alignment(s); 3358 } 3359 3360 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3361 if (!a->p) { 3362 tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset); 3363 } 3364 memidx = get_a64_user_mem_index(s, a->unpriv); 3365 *clean_addr = gen_mte_check1_mmuidx(s, *dirty_addr, is_store, 3366 a->w || a->rn != 31, 3367 mop, a->unpriv, memidx); 3368 } 3369 3370 static void op_addr_ldst_imm_post(DisasContext *s, arg_ldst_imm *a, 3371 TCGv_i64 dirty_addr, uint64_t offset) 3372 { 3373 if (a->w) { 3374 if (a->p) { 3375 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3376 } 3377 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); 3378 } 3379 } 3380 3381 static bool trans_STR_i(DisasContext *s, arg_ldst_imm *a) 3382 { 3383 bool iss_sf, iss_valid = !a->w; 3384 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3385 int memidx = get_a64_user_mem_index(s, a->unpriv); 3386 MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3387 3388 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop); 3389 3390 tcg_rt = cpu_reg(s, a->rt); 3391 iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3392 3393 do_gpr_st_memidx(s, tcg_rt, clean_addr, mop, memidx, 3394 iss_valid, a->rt, iss_sf, false); 3395 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3396 return true; 3397 } 3398 3399 static bool trans_LDR_i(DisasContext *s, arg_ldst_imm *a) 3400 { 3401 bool iss_sf, iss_valid = !a->w; 3402 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3403 int memidx = get_a64_user_mem_index(s, a->unpriv); 3404 MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3405 3406 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop); 3407 3408 tcg_rt = cpu_reg(s, a->rt); 3409 iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3410 3411 do_gpr_ld_memidx(s, tcg_rt, clean_addr, mop, 3412 a->ext, memidx, iss_valid, a->rt, iss_sf, false); 3413 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3414 return true; 3415 } 3416 3417 static bool trans_STR_v_i(DisasContext *s, arg_ldst_imm *a) 3418 { 3419 TCGv_i64 clean_addr, dirty_addr; 3420 MemOp mop; 3421 3422 if (!fp_access_check(s)) { 3423 return true; 3424 } 3425 mop = finalize_memop_asimd(s, a->sz); 3426 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop); 3427 do_fp_st(s, a->rt, clean_addr, mop); 3428 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3429 return true; 3430 } 3431 3432 static bool trans_LDR_v_i(DisasContext *s, arg_ldst_imm *a) 3433 { 3434 TCGv_i64 clean_addr, dirty_addr; 3435 MemOp mop; 3436 3437 if (!fp_access_check(s)) { 3438 return true; 3439 } 3440 mop = finalize_memop_asimd(s, a->sz); 3441 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop); 3442 do_fp_ld(s, a->rt, clean_addr, mop); 3443 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3444 return true; 3445 } 3446 3447 static void op_addr_ldst_pre(DisasContext *s, arg_ldst *a, 3448 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, 3449 bool is_store, MemOp memop) 3450 { 3451 TCGv_i64 tcg_rm; 3452 3453 if (a->rn == 31) { 3454 gen_check_sp_alignment(s); 3455 } 3456 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3457 3458 tcg_rm = read_cpu_reg(s, a->rm, 1); 3459 ext_and_shift_reg(tcg_rm, tcg_rm, a->opt, a->s ? a->sz : 0); 3460 3461 tcg_gen_add_i64(*dirty_addr, *dirty_addr, tcg_rm); 3462 *clean_addr = gen_mte_check1(s, *dirty_addr, is_store, true, memop); 3463 } 3464 3465 static bool trans_LDR(DisasContext *s, arg_ldst *a) 3466 { 3467 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3468 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3469 MemOp memop; 3470 3471 if (extract32(a->opt, 1, 1) == 0) { 3472 return false; 3473 } 3474 3475 memop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3476 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop); 3477 tcg_rt = cpu_reg(s, a->rt); 3478 do_gpr_ld(s, tcg_rt, clean_addr, memop, 3479 a->ext, true, a->rt, iss_sf, false); 3480 return true; 3481 } 3482 3483 static bool trans_STR(DisasContext *s, arg_ldst *a) 3484 { 3485 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3486 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3487 MemOp memop; 3488 3489 if (extract32(a->opt, 1, 1) == 0) { 3490 return false; 3491 } 3492 3493 memop = finalize_memop(s, a->sz); 3494 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop); 3495 tcg_rt = cpu_reg(s, a->rt); 3496 do_gpr_st(s, tcg_rt, clean_addr, memop, true, a->rt, iss_sf, false); 3497 return true; 3498 } 3499 3500 static bool trans_LDR_v(DisasContext *s, arg_ldst *a) 3501 { 3502 TCGv_i64 clean_addr, dirty_addr; 3503 MemOp memop; 3504 3505 if (extract32(a->opt, 1, 1) == 0) { 3506 return false; 3507 } 3508 3509 if (!fp_access_check(s)) { 3510 return true; 3511 } 3512 3513 memop = finalize_memop_asimd(s, a->sz); 3514 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop); 3515 do_fp_ld(s, a->rt, clean_addr, memop); 3516 return true; 3517 } 3518 3519 static bool trans_STR_v(DisasContext *s, arg_ldst *a) 3520 { 3521 TCGv_i64 clean_addr, dirty_addr; 3522 MemOp memop; 3523 3524 if (extract32(a->opt, 1, 1) == 0) { 3525 return false; 3526 } 3527 3528 if (!fp_access_check(s)) { 3529 return true; 3530 } 3531 3532 memop = finalize_memop_asimd(s, a->sz); 3533 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop); 3534 do_fp_st(s, a->rt, clean_addr, memop); 3535 return true; 3536 } 3537 3538 3539 static bool do_atomic_ld(DisasContext *s, arg_atomic *a, AtomicThreeOpFn *fn, 3540 int sign, bool invert) 3541 { 3542 MemOp mop = a->sz | sign; 3543 TCGv_i64 clean_addr, tcg_rs, tcg_rt; 3544 3545 if (a->rn == 31) { 3546 gen_check_sp_alignment(s); 3547 } 3548 mop = check_atomic_align(s, a->rn, mop); 3549 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false, 3550 a->rn != 31, mop); 3551 tcg_rs = read_cpu_reg(s, a->rs, true); 3552 tcg_rt = cpu_reg(s, a->rt); 3553 if (invert) { 3554 tcg_gen_not_i64(tcg_rs, tcg_rs); 3555 } 3556 /* 3557 * The tcg atomic primitives are all full barriers. Therefore we 3558 * can ignore the Acquire and Release bits of this instruction. 3559 */ 3560 fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop); 3561 3562 if (mop & MO_SIGN) { 3563 switch (a->sz) { 3564 case MO_8: 3565 tcg_gen_ext8u_i64(tcg_rt, tcg_rt); 3566 break; 3567 case MO_16: 3568 tcg_gen_ext16u_i64(tcg_rt, tcg_rt); 3569 break; 3570 case MO_32: 3571 tcg_gen_ext32u_i64(tcg_rt, tcg_rt); 3572 break; 3573 case MO_64: 3574 break; 3575 default: 3576 g_assert_not_reached(); 3577 } 3578 } 3579 return true; 3580 } 3581 3582 TRANS_FEAT(LDADD, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_add_i64, 0, false) 3583 TRANS_FEAT(LDCLR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_and_i64, 0, true) 3584 TRANS_FEAT(LDEOR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_xor_i64, 0, false) 3585 TRANS_FEAT(LDSET, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_or_i64, 0, false) 3586 TRANS_FEAT(LDSMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smax_i64, MO_SIGN, false) 3587 TRANS_FEAT(LDSMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smin_i64, MO_SIGN, false) 3588 TRANS_FEAT(LDUMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umax_i64, 0, false) 3589 TRANS_FEAT(LDUMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false) 3590 TRANS_FEAT(SWP, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false) 3591 3592 static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a) 3593 { 3594 bool iss_sf = ldst_iss_sf(a->sz, false, false); 3595 TCGv_i64 clean_addr; 3596 MemOp mop; 3597 3598 if (!dc_isar_feature(aa64_atomics, s) || 3599 !dc_isar_feature(aa64_rcpc_8_3, s)) { 3600 return false; 3601 } 3602 if (a->rn == 31) { 3603 gen_check_sp_alignment(s); 3604 } 3605 mop = check_ordered_align(s, a->rn, 0, false, a->sz); 3606 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false, 3607 a->rn != 31, mop); 3608 /* 3609 * LDAPR* are a special case because they are a simple load, not a 3610 * fetch-and-do-something op. 3611 * The architectural consistency requirements here are weaker than 3612 * full load-acquire (we only need "load-acquire processor consistent"), 3613 * but we choose to implement them as full LDAQ. 3614 */ 3615 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, false, 3616 true, a->rt, iss_sf, true); 3617 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3618 return true; 3619 } 3620 3621 static bool trans_LDRA(DisasContext *s, arg_LDRA *a) 3622 { 3623 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3624 MemOp memop; 3625 3626 /* Load with pointer authentication */ 3627 if (!dc_isar_feature(aa64_pauth, s)) { 3628 return false; 3629 } 3630 3631 if (a->rn == 31) { 3632 gen_check_sp_alignment(s); 3633 } 3634 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3635 3636 if (s->pauth_active) { 3637 if (!a->m) { 3638 gen_helper_autda_combined(dirty_addr, tcg_env, dirty_addr, 3639 tcg_constant_i64(0)); 3640 } else { 3641 gen_helper_autdb_combined(dirty_addr, tcg_env, dirty_addr, 3642 tcg_constant_i64(0)); 3643 } 3644 } 3645 3646 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); 3647 3648 memop = finalize_memop(s, MO_64); 3649 3650 /* Note that "clean" and "dirty" here refer to TBI not PAC. */ 3651 clean_addr = gen_mte_check1(s, dirty_addr, false, 3652 a->w || a->rn != 31, memop); 3653 3654 tcg_rt = cpu_reg(s, a->rt); 3655 do_gpr_ld(s, tcg_rt, clean_addr, memop, 3656 /* extend */ false, /* iss_valid */ !a->w, 3657 /* iss_srt */ a->rt, /* iss_sf */ true, /* iss_ar */ false); 3658 3659 if (a->w) { 3660 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); 3661 } 3662 return true; 3663 } 3664 3665 static bool trans_LDAPR_i(DisasContext *s, arg_ldapr_stlr_i *a) 3666 { 3667 TCGv_i64 clean_addr, dirty_addr; 3668 MemOp mop = a->sz | (a->sign ? MO_SIGN : 0); 3669 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3670 3671 if (!dc_isar_feature(aa64_rcpc_8_4, s)) { 3672 return false; 3673 } 3674 3675 if (a->rn == 31) { 3676 gen_check_sp_alignment(s); 3677 } 3678 3679 mop = check_ordered_align(s, a->rn, a->imm, false, mop); 3680 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3681 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); 3682 clean_addr = clean_data_tbi(s, dirty_addr); 3683 3684 /* 3685 * Load-AcquirePC semantics; we implement as the slightly more 3686 * restrictive Load-Acquire. 3687 */ 3688 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, a->ext, true, 3689 a->rt, iss_sf, true); 3690 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3691 return true; 3692 } 3693 3694 static bool trans_STLR_i(DisasContext *s, arg_ldapr_stlr_i *a) 3695 { 3696 TCGv_i64 clean_addr, dirty_addr; 3697 MemOp mop = a->sz; 3698 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3699 3700 if (!dc_isar_feature(aa64_rcpc_8_4, s)) { 3701 return false; 3702 } 3703 3704 /* TODO: ARMv8.4-LSE SCTLR.nAA */ 3705 3706 if (a->rn == 31) { 3707 gen_check_sp_alignment(s); 3708 } 3709 3710 mop = check_ordered_align(s, a->rn, a->imm, true, mop); 3711 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3712 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); 3713 clean_addr = clean_data_tbi(s, dirty_addr); 3714 3715 /* Store-Release semantics */ 3716 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 3717 do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, mop, true, a->rt, iss_sf, true); 3718 return true; 3719 } 3720 3721 static bool trans_LD_mult(DisasContext *s, arg_ldst_mult *a) 3722 { 3723 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3724 MemOp endian, align, mop; 3725 3726 int total; /* total bytes */ 3727 int elements; /* elements per vector */ 3728 int r; 3729 int size = a->sz; 3730 3731 if (!a->p && a->rm != 0) { 3732 /* For non-postindexed accesses the Rm field must be 0 */ 3733 return false; 3734 } 3735 if (size == 3 && !a->q && a->selem != 1) { 3736 return false; 3737 } 3738 if (!fp_access_check(s)) { 3739 return true; 3740 } 3741 3742 if (a->rn == 31) { 3743 gen_check_sp_alignment(s); 3744 } 3745 3746 /* For our purposes, bytes are always little-endian. */ 3747 endian = s->be_data; 3748 if (size == 0) { 3749 endian = MO_LE; 3750 } 3751 3752 total = a->rpt * a->selem * (a->q ? 16 : 8); 3753 tcg_rn = cpu_reg_sp(s, a->rn); 3754 3755 /* 3756 * Issue the MTE check vs the logical repeat count, before we 3757 * promote consecutive little-endian elements below. 3758 */ 3759 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, total, 3760 finalize_memop_asimd(s, size)); 3761 3762 /* 3763 * Consecutive little-endian elements from a single register 3764 * can be promoted to a larger little-endian operation. 3765 */ 3766 align = MO_ALIGN; 3767 if (a->selem == 1 && endian == MO_LE) { 3768 align = pow2_align(size); 3769 size = 3; 3770 } 3771 if (!s->align_mem) { 3772 align = 0; 3773 } 3774 mop = endian | size | align; 3775 3776 elements = (a->q ? 16 : 8) >> size; 3777 tcg_ebytes = tcg_constant_i64(1 << size); 3778 for (r = 0; r < a->rpt; r++) { 3779 int e; 3780 for (e = 0; e < elements; e++) { 3781 int xs; 3782 for (xs = 0; xs < a->selem; xs++) { 3783 int tt = (a->rt + r + xs) % 32; 3784 do_vec_ld(s, tt, e, clean_addr, mop); 3785 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3786 } 3787 } 3788 } 3789 3790 /* 3791 * For non-quad operations, setting a slice of the low 64 bits of 3792 * the register clears the high 64 bits (in the ARM ARM pseudocode 3793 * this is implicit in the fact that 'rval' is a 64 bit wide 3794 * variable). For quad operations, we might still need to zero 3795 * the high bits of SVE. 3796 */ 3797 for (r = 0; r < a->rpt * a->selem; r++) { 3798 int tt = (a->rt + r) % 32; 3799 clear_vec_high(s, a->q, tt); 3800 } 3801 3802 if (a->p) { 3803 if (a->rm == 31) { 3804 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3805 } else { 3806 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3807 } 3808 } 3809 return true; 3810 } 3811 3812 static bool trans_ST_mult(DisasContext *s, arg_ldst_mult *a) 3813 { 3814 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3815 MemOp endian, align, mop; 3816 3817 int total; /* total bytes */ 3818 int elements; /* elements per vector */ 3819 int r; 3820 int size = a->sz; 3821 3822 if (!a->p && a->rm != 0) { 3823 /* For non-postindexed accesses the Rm field must be 0 */ 3824 return false; 3825 } 3826 if (size == 3 && !a->q && a->selem != 1) { 3827 return false; 3828 } 3829 if (!fp_access_check(s)) { 3830 return true; 3831 } 3832 3833 if (a->rn == 31) { 3834 gen_check_sp_alignment(s); 3835 } 3836 3837 /* For our purposes, bytes are always little-endian. */ 3838 endian = s->be_data; 3839 if (size == 0) { 3840 endian = MO_LE; 3841 } 3842 3843 total = a->rpt * a->selem * (a->q ? 16 : 8); 3844 tcg_rn = cpu_reg_sp(s, a->rn); 3845 3846 /* 3847 * Issue the MTE check vs the logical repeat count, before we 3848 * promote consecutive little-endian elements below. 3849 */ 3850 clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, total, 3851 finalize_memop_asimd(s, size)); 3852 3853 /* 3854 * Consecutive little-endian elements from a single register 3855 * can be promoted to a larger little-endian operation. 3856 */ 3857 align = MO_ALIGN; 3858 if (a->selem == 1 && endian == MO_LE) { 3859 align = pow2_align(size); 3860 size = 3; 3861 } 3862 if (!s->align_mem) { 3863 align = 0; 3864 } 3865 mop = endian | size | align; 3866 3867 elements = (a->q ? 16 : 8) >> size; 3868 tcg_ebytes = tcg_constant_i64(1 << size); 3869 for (r = 0; r < a->rpt; r++) { 3870 int e; 3871 for (e = 0; e < elements; e++) { 3872 int xs; 3873 for (xs = 0; xs < a->selem; xs++) { 3874 int tt = (a->rt + r + xs) % 32; 3875 do_vec_st(s, tt, e, clean_addr, mop); 3876 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3877 } 3878 } 3879 } 3880 3881 if (a->p) { 3882 if (a->rm == 31) { 3883 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3884 } else { 3885 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3886 } 3887 } 3888 return true; 3889 } 3890 3891 static bool trans_ST_single(DisasContext *s, arg_ldst_single *a) 3892 { 3893 int xs, total, rt; 3894 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3895 MemOp mop; 3896 3897 if (!a->p && a->rm != 0) { 3898 return false; 3899 } 3900 if (!fp_access_check(s)) { 3901 return true; 3902 } 3903 3904 if (a->rn == 31) { 3905 gen_check_sp_alignment(s); 3906 } 3907 3908 total = a->selem << a->scale; 3909 tcg_rn = cpu_reg_sp(s, a->rn); 3910 3911 mop = finalize_memop_asimd(s, a->scale); 3912 clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, 3913 total, mop); 3914 3915 tcg_ebytes = tcg_constant_i64(1 << a->scale); 3916 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { 3917 do_vec_st(s, rt, a->index, clean_addr, mop); 3918 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3919 } 3920 3921 if (a->p) { 3922 if (a->rm == 31) { 3923 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3924 } else { 3925 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3926 } 3927 } 3928 return true; 3929 } 3930 3931 static bool trans_LD_single(DisasContext *s, arg_ldst_single *a) 3932 { 3933 int xs, total, rt; 3934 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3935 MemOp mop; 3936 3937 if (!a->p && a->rm != 0) { 3938 return false; 3939 } 3940 if (!fp_access_check(s)) { 3941 return true; 3942 } 3943 3944 if (a->rn == 31) { 3945 gen_check_sp_alignment(s); 3946 } 3947 3948 total = a->selem << a->scale; 3949 tcg_rn = cpu_reg_sp(s, a->rn); 3950 3951 mop = finalize_memop_asimd(s, a->scale); 3952 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, 3953 total, mop); 3954 3955 tcg_ebytes = tcg_constant_i64(1 << a->scale); 3956 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { 3957 do_vec_ld(s, rt, a->index, clean_addr, mop); 3958 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3959 } 3960 3961 if (a->p) { 3962 if (a->rm == 31) { 3963 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3964 } else { 3965 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3966 } 3967 } 3968 return true; 3969 } 3970 3971 static bool trans_LD_single_repl(DisasContext *s, arg_LD_single_repl *a) 3972 { 3973 int xs, total, rt; 3974 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3975 MemOp mop; 3976 3977 if (!a->p && a->rm != 0) { 3978 return false; 3979 } 3980 if (!fp_access_check(s)) { 3981 return true; 3982 } 3983 3984 if (a->rn == 31) { 3985 gen_check_sp_alignment(s); 3986 } 3987 3988 total = a->selem << a->scale; 3989 tcg_rn = cpu_reg_sp(s, a->rn); 3990 3991 mop = finalize_memop_asimd(s, a->scale); 3992 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, 3993 total, mop); 3994 3995 tcg_ebytes = tcg_constant_i64(1 << a->scale); 3996 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { 3997 /* Load and replicate to all elements */ 3998 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 3999 4000 tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop); 4001 tcg_gen_gvec_dup_i64(a->scale, vec_full_reg_offset(s, rt), 4002 (a->q + 1) * 8, vec_full_reg_size(s), tcg_tmp); 4003 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 4004 } 4005 4006 if (a->p) { 4007 if (a->rm == 31) { 4008 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 4009 } else { 4010 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 4011 } 4012 } 4013 return true; 4014 } 4015 4016 static bool trans_STZGM(DisasContext *s, arg_ldst_tag *a) 4017 { 4018 TCGv_i64 addr, clean_addr, tcg_rt; 4019 int size = 4 << s->dcz_blocksize; 4020 4021 if (!dc_isar_feature(aa64_mte, s)) { 4022 return false; 4023 } 4024 if (s->current_el == 0) { 4025 return false; 4026 } 4027 4028 if (a->rn == 31) { 4029 gen_check_sp_alignment(s); 4030 } 4031 4032 addr = read_cpu_reg_sp(s, a->rn, true); 4033 tcg_gen_addi_i64(addr, addr, a->imm); 4034 tcg_rt = cpu_reg(s, a->rt); 4035 4036 if (s->ata[0]) { 4037 gen_helper_stzgm_tags(tcg_env, addr, tcg_rt); 4038 } 4039 /* 4040 * The non-tags portion of STZGM is mostly like DC_ZVA, 4041 * except the alignment happens before the access. 4042 */ 4043 clean_addr = clean_data_tbi(s, addr); 4044 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 4045 gen_helper_dc_zva(tcg_env, clean_addr); 4046 return true; 4047 } 4048 4049 static bool trans_STGM(DisasContext *s, arg_ldst_tag *a) 4050 { 4051 TCGv_i64 addr, clean_addr, tcg_rt; 4052 4053 if (!dc_isar_feature(aa64_mte, s)) { 4054 return false; 4055 } 4056 if (s->current_el == 0) { 4057 return false; 4058 } 4059 4060 if (a->rn == 31) { 4061 gen_check_sp_alignment(s); 4062 } 4063 4064 addr = read_cpu_reg_sp(s, a->rn, true); 4065 tcg_gen_addi_i64(addr, addr, a->imm); 4066 tcg_rt = cpu_reg(s, a->rt); 4067 4068 if (s->ata[0]) { 4069 gen_helper_stgm(tcg_env, addr, tcg_rt); 4070 } else { 4071 MMUAccessType acc = MMU_DATA_STORE; 4072 int size = 4 << s->gm_blocksize; 4073 4074 clean_addr = clean_data_tbi(s, addr); 4075 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 4076 gen_probe_access(s, clean_addr, acc, size); 4077 } 4078 return true; 4079 } 4080 4081 static bool trans_LDGM(DisasContext *s, arg_ldst_tag *a) 4082 { 4083 TCGv_i64 addr, clean_addr, tcg_rt; 4084 4085 if (!dc_isar_feature(aa64_mte, s)) { 4086 return false; 4087 } 4088 if (s->current_el == 0) { 4089 return false; 4090 } 4091 4092 if (a->rn == 31) { 4093 gen_check_sp_alignment(s); 4094 } 4095 4096 addr = read_cpu_reg_sp(s, a->rn, true); 4097 tcg_gen_addi_i64(addr, addr, a->imm); 4098 tcg_rt = cpu_reg(s, a->rt); 4099 4100 if (s->ata[0]) { 4101 gen_helper_ldgm(tcg_rt, tcg_env, addr); 4102 } else { 4103 MMUAccessType acc = MMU_DATA_LOAD; 4104 int size = 4 << s->gm_blocksize; 4105 4106 clean_addr = clean_data_tbi(s, addr); 4107 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 4108 gen_probe_access(s, clean_addr, acc, size); 4109 /* The result tags are zeros. */ 4110 tcg_gen_movi_i64(tcg_rt, 0); 4111 } 4112 return true; 4113 } 4114 4115 static bool trans_LDG(DisasContext *s, arg_ldst_tag *a) 4116 { 4117 TCGv_i64 addr, clean_addr, tcg_rt; 4118 4119 if (!dc_isar_feature(aa64_mte_insn_reg, s)) { 4120 return false; 4121 } 4122 4123 if (a->rn == 31) { 4124 gen_check_sp_alignment(s); 4125 } 4126 4127 addr = read_cpu_reg_sp(s, a->rn, true); 4128 if (!a->p) { 4129 /* pre-index or signed offset */ 4130 tcg_gen_addi_i64(addr, addr, a->imm); 4131 } 4132 4133 tcg_gen_andi_i64(addr, addr, -TAG_GRANULE); 4134 tcg_rt = cpu_reg(s, a->rt); 4135 if (s->ata[0]) { 4136 gen_helper_ldg(tcg_rt, tcg_env, addr, tcg_rt); 4137 } else { 4138 /* 4139 * Tag access disabled: we must check for aborts on the load 4140 * load from [rn+offset], and then insert a 0 tag into rt. 4141 */ 4142 clean_addr = clean_data_tbi(s, addr); 4143 gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8); 4144 gen_address_with_allocation_tag0(tcg_rt, tcg_rt); 4145 } 4146 4147 if (a->w) { 4148 /* pre-index or post-index */ 4149 if (a->p) { 4150 /* post-index */ 4151 tcg_gen_addi_i64(addr, addr, a->imm); 4152 } 4153 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr); 4154 } 4155 return true; 4156 } 4157 4158 static bool do_STG(DisasContext *s, arg_ldst_tag *a, bool is_zero, bool is_pair) 4159 { 4160 TCGv_i64 addr, tcg_rt; 4161 4162 if (a->rn == 31) { 4163 gen_check_sp_alignment(s); 4164 } 4165 4166 addr = read_cpu_reg_sp(s, a->rn, true); 4167 if (!a->p) { 4168 /* pre-index or signed offset */ 4169 tcg_gen_addi_i64(addr, addr, a->imm); 4170 } 4171 tcg_rt = cpu_reg_sp(s, a->rt); 4172 if (!s->ata[0]) { 4173 /* 4174 * For STG and ST2G, we need to check alignment and probe memory. 4175 * TODO: For STZG and STZ2G, we could rely on the stores below, 4176 * at least for system mode; user-only won't enforce alignment. 4177 */ 4178 if (is_pair) { 4179 gen_helper_st2g_stub(tcg_env, addr); 4180 } else { 4181 gen_helper_stg_stub(tcg_env, addr); 4182 } 4183 } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { 4184 if (is_pair) { 4185 gen_helper_st2g_parallel(tcg_env, addr, tcg_rt); 4186 } else { 4187 gen_helper_stg_parallel(tcg_env, addr, tcg_rt); 4188 } 4189 } else { 4190 if (is_pair) { 4191 gen_helper_st2g(tcg_env, addr, tcg_rt); 4192 } else { 4193 gen_helper_stg(tcg_env, addr, tcg_rt); 4194 } 4195 } 4196 4197 if (is_zero) { 4198 TCGv_i64 clean_addr = clean_data_tbi(s, addr); 4199 TCGv_i64 zero64 = tcg_constant_i64(0); 4200 TCGv_i128 zero128 = tcg_temp_new_i128(); 4201 int mem_index = get_mem_index(s); 4202 MemOp mop = finalize_memop(s, MO_128 | MO_ALIGN); 4203 4204 tcg_gen_concat_i64_i128(zero128, zero64, zero64); 4205 4206 /* This is 1 or 2 atomic 16-byte operations. */ 4207 tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop); 4208 if (is_pair) { 4209 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4210 tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop); 4211 } 4212 } 4213 4214 if (a->w) { 4215 /* pre-index or post-index */ 4216 if (a->p) { 4217 /* post-index */ 4218 tcg_gen_addi_i64(addr, addr, a->imm); 4219 } 4220 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr); 4221 } 4222 return true; 4223 } 4224 4225 TRANS_FEAT(STG, aa64_mte_insn_reg, do_STG, a, false, false) 4226 TRANS_FEAT(STZG, aa64_mte_insn_reg, do_STG, a, true, false) 4227 TRANS_FEAT(ST2G, aa64_mte_insn_reg, do_STG, a, false, true) 4228 TRANS_FEAT(STZ2G, aa64_mte_insn_reg, do_STG, a, true, true) 4229 4230 typedef void SetFn(TCGv_env, TCGv_i32, TCGv_i32); 4231 4232 static bool do_SET(DisasContext *s, arg_set *a, bool is_epilogue, 4233 bool is_setg, SetFn fn) 4234 { 4235 int memidx; 4236 uint32_t syndrome, desc = 0; 4237 4238 if (is_setg && !dc_isar_feature(aa64_mte, s)) { 4239 return false; 4240 } 4241 4242 /* 4243 * UNPREDICTABLE cases: we choose to UNDEF, which allows 4244 * us to pull this check before the CheckMOPSEnabled() test 4245 * (which we do in the helper function) 4246 */ 4247 if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd || 4248 a->rd == 31 || a->rn == 31) { 4249 return false; 4250 } 4251 4252 memidx = get_a64_user_mem_index(s, a->unpriv); 4253 4254 /* 4255 * We pass option_a == true, matching our implementation; 4256 * we pass wrong_option == false: helper function may set that bit. 4257 */ 4258 syndrome = syn_mop(true, is_setg, (a->nontemp << 1) | a->unpriv, 4259 is_epilogue, false, true, a->rd, a->rs, a->rn); 4260 4261 if (is_setg ? s->ata[a->unpriv] : s->mte_active[a->unpriv]) { 4262 /* We may need to do MTE tag checking, so assemble the descriptor */ 4263 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 4264 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 4265 desc = FIELD_DP32(desc, MTEDESC, WRITE, true); 4266 /* SIZEM1 and ALIGN we leave 0 (byte write) */ 4267 } 4268 /* The helper function always needs the memidx even with MTE disabled */ 4269 desc = FIELD_DP32(desc, MTEDESC, MIDX, memidx); 4270 4271 /* 4272 * The helper needs the register numbers, but since they're in 4273 * the syndrome anyway, we let it extract them from there rather 4274 * than passing in an extra three integer arguments. 4275 */ 4276 fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(desc)); 4277 return true; 4278 } 4279 4280 TRANS_FEAT(SETP, aa64_mops, do_SET, a, false, false, gen_helper_setp) 4281 TRANS_FEAT(SETM, aa64_mops, do_SET, a, false, false, gen_helper_setm) 4282 TRANS_FEAT(SETE, aa64_mops, do_SET, a, true, false, gen_helper_sete) 4283 TRANS_FEAT(SETGP, aa64_mops, do_SET, a, false, true, gen_helper_setgp) 4284 TRANS_FEAT(SETGM, aa64_mops, do_SET, a, false, true, gen_helper_setgm) 4285 TRANS_FEAT(SETGE, aa64_mops, do_SET, a, true, true, gen_helper_setge) 4286 4287 typedef void CpyFn(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32); 4288 4289 static bool do_CPY(DisasContext *s, arg_cpy *a, bool is_epilogue, CpyFn fn) 4290 { 4291 int rmemidx, wmemidx; 4292 uint32_t syndrome, rdesc = 0, wdesc = 0; 4293 bool wunpriv = extract32(a->options, 0, 1); 4294 bool runpriv = extract32(a->options, 1, 1); 4295 4296 /* 4297 * UNPREDICTABLE cases: we choose to UNDEF, which allows 4298 * us to pull this check before the CheckMOPSEnabled() test 4299 * (which we do in the helper function) 4300 */ 4301 if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd || 4302 a->rd == 31 || a->rs == 31 || a->rn == 31) { 4303 return false; 4304 } 4305 4306 rmemidx = get_a64_user_mem_index(s, runpriv); 4307 wmemidx = get_a64_user_mem_index(s, wunpriv); 4308 4309 /* 4310 * We pass option_a == true, matching our implementation; 4311 * we pass wrong_option == false: helper function may set that bit. 4312 */ 4313 syndrome = syn_mop(false, false, a->options, is_epilogue, 4314 false, true, a->rd, a->rs, a->rn); 4315 4316 /* If we need to do MTE tag checking, assemble the descriptors */ 4317 if (s->mte_active[runpriv]) { 4318 rdesc = FIELD_DP32(rdesc, MTEDESC, TBI, s->tbid); 4319 rdesc = FIELD_DP32(rdesc, MTEDESC, TCMA, s->tcma); 4320 } 4321 if (s->mte_active[wunpriv]) { 4322 wdesc = FIELD_DP32(wdesc, MTEDESC, TBI, s->tbid); 4323 wdesc = FIELD_DP32(wdesc, MTEDESC, TCMA, s->tcma); 4324 wdesc = FIELD_DP32(wdesc, MTEDESC, WRITE, true); 4325 } 4326 /* The helper function needs these parts of the descriptor regardless */ 4327 rdesc = FIELD_DP32(rdesc, MTEDESC, MIDX, rmemidx); 4328 wdesc = FIELD_DP32(wdesc, MTEDESC, MIDX, wmemidx); 4329 4330 /* 4331 * The helper needs the register numbers, but since they're in 4332 * the syndrome anyway, we let it extract them from there rather 4333 * than passing in an extra three integer arguments. 4334 */ 4335 fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(wdesc), 4336 tcg_constant_i32(rdesc)); 4337 return true; 4338 } 4339 4340 TRANS_FEAT(CPYP, aa64_mops, do_CPY, a, false, gen_helper_cpyp) 4341 TRANS_FEAT(CPYM, aa64_mops, do_CPY, a, false, gen_helper_cpym) 4342 TRANS_FEAT(CPYE, aa64_mops, do_CPY, a, true, gen_helper_cpye) 4343 TRANS_FEAT(CPYFP, aa64_mops, do_CPY, a, false, gen_helper_cpyfp) 4344 TRANS_FEAT(CPYFM, aa64_mops, do_CPY, a, false, gen_helper_cpyfm) 4345 TRANS_FEAT(CPYFE, aa64_mops, do_CPY, a, true, gen_helper_cpyfe) 4346 4347 typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64); 4348 4349 static bool gen_rri(DisasContext *s, arg_rri_sf *a, 4350 bool rd_sp, bool rn_sp, ArithTwoOp *fn) 4351 { 4352 TCGv_i64 tcg_rn = rn_sp ? cpu_reg_sp(s, a->rn) : cpu_reg(s, a->rn); 4353 TCGv_i64 tcg_rd = rd_sp ? cpu_reg_sp(s, a->rd) : cpu_reg(s, a->rd); 4354 TCGv_i64 tcg_imm = tcg_constant_i64(a->imm); 4355 4356 fn(tcg_rd, tcg_rn, tcg_imm); 4357 if (!a->sf) { 4358 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4359 } 4360 return true; 4361 } 4362 4363 /* 4364 * PC-rel. addressing 4365 */ 4366 4367 static bool trans_ADR(DisasContext *s, arg_ri *a) 4368 { 4369 gen_pc_plus_diff(s, cpu_reg(s, a->rd), a->imm); 4370 return true; 4371 } 4372 4373 static bool trans_ADRP(DisasContext *s, arg_ri *a) 4374 { 4375 int64_t offset = (int64_t)a->imm << 12; 4376 4377 /* The page offset is ok for CF_PCREL. */ 4378 offset -= s->pc_curr & 0xfff; 4379 gen_pc_plus_diff(s, cpu_reg(s, a->rd), offset); 4380 return true; 4381 } 4382 4383 /* 4384 * Add/subtract (immediate) 4385 */ 4386 TRANS(ADD_i, gen_rri, a, 1, 1, tcg_gen_add_i64) 4387 TRANS(SUB_i, gen_rri, a, 1, 1, tcg_gen_sub_i64) 4388 TRANS(ADDS_i, gen_rri, a, 0, 1, a->sf ? gen_add64_CC : gen_add32_CC) 4389 TRANS(SUBS_i, gen_rri, a, 0, 1, a->sf ? gen_sub64_CC : gen_sub32_CC) 4390 4391 /* 4392 * Add/subtract (immediate, with tags) 4393 */ 4394 4395 static bool gen_add_sub_imm_with_tags(DisasContext *s, arg_rri_tag *a, 4396 bool sub_op) 4397 { 4398 TCGv_i64 tcg_rn, tcg_rd; 4399 int imm; 4400 4401 imm = a->uimm6 << LOG2_TAG_GRANULE; 4402 if (sub_op) { 4403 imm = -imm; 4404 } 4405 4406 tcg_rn = cpu_reg_sp(s, a->rn); 4407 tcg_rd = cpu_reg_sp(s, a->rd); 4408 4409 if (s->ata[0]) { 4410 gen_helper_addsubg(tcg_rd, tcg_env, tcg_rn, 4411 tcg_constant_i32(imm), 4412 tcg_constant_i32(a->uimm4)); 4413 } else { 4414 tcg_gen_addi_i64(tcg_rd, tcg_rn, imm); 4415 gen_address_with_allocation_tag0(tcg_rd, tcg_rd); 4416 } 4417 return true; 4418 } 4419 4420 TRANS_FEAT(ADDG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, false) 4421 TRANS_FEAT(SUBG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, true) 4422 4423 /* The input should be a value in the bottom e bits (with higher 4424 * bits zero); returns that value replicated into every element 4425 * of size e in a 64 bit integer. 4426 */ 4427 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e) 4428 { 4429 assert(e != 0); 4430 while (e < 64) { 4431 mask |= mask << e; 4432 e *= 2; 4433 } 4434 return mask; 4435 } 4436 4437 /* 4438 * Logical (immediate) 4439 */ 4440 4441 /* 4442 * Simplified variant of pseudocode DecodeBitMasks() for the case where we 4443 * only require the wmask. Returns false if the imms/immr/immn are a reserved 4444 * value (ie should cause a guest UNDEF exception), and true if they are 4445 * valid, in which case the decoded bit pattern is written to result. 4446 */ 4447 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn, 4448 unsigned int imms, unsigned int immr) 4449 { 4450 uint64_t mask; 4451 unsigned e, levels, s, r; 4452 int len; 4453 4454 assert(immn < 2 && imms < 64 && immr < 64); 4455 4456 /* The bit patterns we create here are 64 bit patterns which 4457 * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or 4458 * 64 bits each. Each element contains the same value: a run 4459 * of between 1 and e-1 non-zero bits, rotated within the 4460 * element by between 0 and e-1 bits. 4461 * 4462 * The element size and run length are encoded into immn (1 bit) 4463 * and imms (6 bits) as follows: 4464 * 64 bit elements: immn = 1, imms = <length of run - 1> 4465 * 32 bit elements: immn = 0, imms = 0 : <length of run - 1> 4466 * 16 bit elements: immn = 0, imms = 10 : <length of run - 1> 4467 * 8 bit elements: immn = 0, imms = 110 : <length of run - 1> 4468 * 4 bit elements: immn = 0, imms = 1110 : <length of run - 1> 4469 * 2 bit elements: immn = 0, imms = 11110 : <length of run - 1> 4470 * Notice that immn = 0, imms = 11111x is the only combination 4471 * not covered by one of the above options; this is reserved. 4472 * Further, <length of run - 1> all-ones is a reserved pattern. 4473 * 4474 * In all cases the rotation is by immr % e (and immr is 6 bits). 4475 */ 4476 4477 /* First determine the element size */ 4478 len = 31 - clz32((immn << 6) | (~imms & 0x3f)); 4479 if (len < 1) { 4480 /* This is the immn == 0, imms == 0x11111x case */ 4481 return false; 4482 } 4483 e = 1 << len; 4484 4485 levels = e - 1; 4486 s = imms & levels; 4487 r = immr & levels; 4488 4489 if (s == levels) { 4490 /* <length of run - 1> mustn't be all-ones. */ 4491 return false; 4492 } 4493 4494 /* Create the value of one element: s+1 set bits rotated 4495 * by r within the element (which is e bits wide)... 4496 */ 4497 mask = MAKE_64BIT_MASK(0, s + 1); 4498 if (r) { 4499 mask = (mask >> r) | (mask << (e - r)); 4500 mask &= MAKE_64BIT_MASK(0, e); 4501 } 4502 /* ...then replicate the element over the whole 64 bit value */ 4503 mask = bitfield_replicate(mask, e); 4504 *result = mask; 4505 return true; 4506 } 4507 4508 static bool gen_rri_log(DisasContext *s, arg_rri_log *a, bool set_cc, 4509 void (*fn)(TCGv_i64, TCGv_i64, int64_t)) 4510 { 4511 TCGv_i64 tcg_rd, tcg_rn; 4512 uint64_t imm; 4513 4514 /* Some immediate field values are reserved. */ 4515 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1), 4516 extract32(a->dbm, 0, 6), 4517 extract32(a->dbm, 6, 6))) { 4518 return false; 4519 } 4520 if (!a->sf) { 4521 imm &= 0xffffffffull; 4522 } 4523 4524 tcg_rd = set_cc ? cpu_reg(s, a->rd) : cpu_reg_sp(s, a->rd); 4525 tcg_rn = cpu_reg(s, a->rn); 4526 4527 fn(tcg_rd, tcg_rn, imm); 4528 if (set_cc) { 4529 gen_logic_CC(a->sf, tcg_rd); 4530 } 4531 if (!a->sf) { 4532 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4533 } 4534 return true; 4535 } 4536 4537 TRANS(AND_i, gen_rri_log, a, false, tcg_gen_andi_i64) 4538 TRANS(ORR_i, gen_rri_log, a, false, tcg_gen_ori_i64) 4539 TRANS(EOR_i, gen_rri_log, a, false, tcg_gen_xori_i64) 4540 TRANS(ANDS_i, gen_rri_log, a, true, tcg_gen_andi_i64) 4541 4542 /* 4543 * Move wide (immediate) 4544 */ 4545 4546 static bool trans_MOVZ(DisasContext *s, arg_movw *a) 4547 { 4548 int pos = a->hw << 4; 4549 tcg_gen_movi_i64(cpu_reg(s, a->rd), (uint64_t)a->imm << pos); 4550 return true; 4551 } 4552 4553 static bool trans_MOVN(DisasContext *s, arg_movw *a) 4554 { 4555 int pos = a->hw << 4; 4556 uint64_t imm = a->imm; 4557 4558 imm = ~(imm << pos); 4559 if (!a->sf) { 4560 imm = (uint32_t)imm; 4561 } 4562 tcg_gen_movi_i64(cpu_reg(s, a->rd), imm); 4563 return true; 4564 } 4565 4566 static bool trans_MOVK(DisasContext *s, arg_movw *a) 4567 { 4568 int pos = a->hw << 4; 4569 TCGv_i64 tcg_rd, tcg_im; 4570 4571 tcg_rd = cpu_reg(s, a->rd); 4572 tcg_im = tcg_constant_i64(a->imm); 4573 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_im, pos, 16); 4574 if (!a->sf) { 4575 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4576 } 4577 return true; 4578 } 4579 4580 /* 4581 * Bitfield 4582 */ 4583 4584 static bool trans_SBFM(DisasContext *s, arg_SBFM *a) 4585 { 4586 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4587 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4588 unsigned int bitsize = a->sf ? 64 : 32; 4589 unsigned int ri = a->immr; 4590 unsigned int si = a->imms; 4591 unsigned int pos, len; 4592 4593 if (si >= ri) { 4594 /* Wd<s-r:0> = Wn<s:r> */ 4595 len = (si - ri) + 1; 4596 tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len); 4597 if (!a->sf) { 4598 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4599 } 4600 } else { 4601 /* Wd<32+s-r,32-r> = Wn<s:0> */ 4602 len = si + 1; 4603 pos = (bitsize - ri) & (bitsize - 1); 4604 4605 if (len < ri) { 4606 /* 4607 * Sign extend the destination field from len to fill the 4608 * balance of the word. Let the deposit below insert all 4609 * of those sign bits. 4610 */ 4611 tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len); 4612 len = ri; 4613 } 4614 4615 /* 4616 * We start with zero, and we haven't modified any bits outside 4617 * bitsize, therefore no final zero-extension is unneeded for !sf. 4618 */ 4619 tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len); 4620 } 4621 return true; 4622 } 4623 4624 static bool trans_UBFM(DisasContext *s, arg_UBFM *a) 4625 { 4626 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4627 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4628 unsigned int bitsize = a->sf ? 64 : 32; 4629 unsigned int ri = a->immr; 4630 unsigned int si = a->imms; 4631 unsigned int pos, len; 4632 4633 tcg_rd = cpu_reg(s, a->rd); 4634 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4635 4636 if (si >= ri) { 4637 /* Wd<s-r:0> = Wn<s:r> */ 4638 len = (si - ri) + 1; 4639 tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len); 4640 } else { 4641 /* Wd<32+s-r,32-r> = Wn<s:0> */ 4642 len = si + 1; 4643 pos = (bitsize - ri) & (bitsize - 1); 4644 tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len); 4645 } 4646 return true; 4647 } 4648 4649 static bool trans_BFM(DisasContext *s, arg_BFM *a) 4650 { 4651 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4652 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4653 unsigned int bitsize = a->sf ? 64 : 32; 4654 unsigned int ri = a->immr; 4655 unsigned int si = a->imms; 4656 unsigned int pos, len; 4657 4658 tcg_rd = cpu_reg(s, a->rd); 4659 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4660 4661 if (si >= ri) { 4662 /* Wd<s-r:0> = Wn<s:r> */ 4663 tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri); 4664 len = (si - ri) + 1; 4665 pos = 0; 4666 } else { 4667 /* Wd<32+s-r,32-r> = Wn<s:0> */ 4668 len = si + 1; 4669 pos = (bitsize - ri) & (bitsize - 1); 4670 } 4671 4672 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len); 4673 if (!a->sf) { 4674 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4675 } 4676 return true; 4677 } 4678 4679 static bool trans_EXTR(DisasContext *s, arg_extract *a) 4680 { 4681 TCGv_i64 tcg_rd, tcg_rm, tcg_rn; 4682 4683 tcg_rd = cpu_reg(s, a->rd); 4684 4685 if (unlikely(a->imm == 0)) { 4686 /* 4687 * tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts, 4688 * so an extract from bit 0 is a special case. 4689 */ 4690 if (a->sf) { 4691 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, a->rm)); 4692 } else { 4693 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, a->rm)); 4694 } 4695 } else { 4696 tcg_rm = cpu_reg(s, a->rm); 4697 tcg_rn = cpu_reg(s, a->rn); 4698 4699 if (a->sf) { 4700 /* Specialization to ROR happens in EXTRACT2. */ 4701 tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, a->imm); 4702 } else { 4703 TCGv_i32 t0 = tcg_temp_new_i32(); 4704 4705 tcg_gen_extrl_i64_i32(t0, tcg_rm); 4706 if (a->rm == a->rn) { 4707 tcg_gen_rotri_i32(t0, t0, a->imm); 4708 } else { 4709 TCGv_i32 t1 = tcg_temp_new_i32(); 4710 tcg_gen_extrl_i64_i32(t1, tcg_rn); 4711 tcg_gen_extract2_i32(t0, t0, t1, a->imm); 4712 } 4713 tcg_gen_extu_i32_i64(tcg_rd, t0); 4714 } 4715 } 4716 return true; 4717 } 4718 4719 static bool trans_TBL_TBX(DisasContext *s, arg_TBL_TBX *a) 4720 { 4721 if (fp_access_check(s)) { 4722 int len = (a->len + 1) * 16; 4723 4724 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd), 4725 vec_full_reg_offset(s, a->rm), tcg_env, 4726 a->q ? 16 : 8, vec_full_reg_size(s), 4727 (len << 6) | (a->tbx << 5) | a->rn, 4728 gen_helper_simd_tblx); 4729 } 4730 return true; 4731 } 4732 4733 typedef int simd_permute_idx_fn(int i, int part, int elements); 4734 4735 static bool do_simd_permute(DisasContext *s, arg_qrrr_e *a, 4736 simd_permute_idx_fn *fn, int part) 4737 { 4738 MemOp esz = a->esz; 4739 int datasize = a->q ? 16 : 8; 4740 int elements = datasize >> esz; 4741 TCGv_i64 tcg_res[2], tcg_ele; 4742 4743 if (esz == MO_64 && !a->q) { 4744 return false; 4745 } 4746 if (!fp_access_check(s)) { 4747 return true; 4748 } 4749 4750 tcg_res[0] = tcg_temp_new_i64(); 4751 tcg_res[1] = a->q ? tcg_temp_new_i64() : NULL; 4752 tcg_ele = tcg_temp_new_i64(); 4753 4754 for (int i = 0; i < elements; i++) { 4755 int o, w, idx; 4756 4757 idx = fn(i, part, elements); 4758 read_vec_element(s, tcg_ele, (idx & elements ? a->rm : a->rn), 4759 idx & (elements - 1), esz); 4760 4761 w = (i << (esz + 3)) / 64; 4762 o = (i << (esz + 3)) % 64; 4763 if (o == 0) { 4764 tcg_gen_mov_i64(tcg_res[w], tcg_ele); 4765 } else { 4766 tcg_gen_deposit_i64(tcg_res[w], tcg_res[w], tcg_ele, o, 8 << esz); 4767 } 4768 } 4769 4770 for (int i = a->q; i >= 0; --i) { 4771 write_vec_element(s, tcg_res[i], a->rd, i, MO_64); 4772 } 4773 clear_vec_high(s, a->q, a->rd); 4774 return true; 4775 } 4776 4777 static int permute_load_uzp(int i, int part, int elements) 4778 { 4779 return 2 * i + part; 4780 } 4781 4782 TRANS(UZP1, do_simd_permute, a, permute_load_uzp, 0) 4783 TRANS(UZP2, do_simd_permute, a, permute_load_uzp, 1) 4784 4785 static int permute_load_trn(int i, int part, int elements) 4786 { 4787 return (i & 1) * elements + (i & ~1) + part; 4788 } 4789 4790 TRANS(TRN1, do_simd_permute, a, permute_load_trn, 0) 4791 TRANS(TRN2, do_simd_permute, a, permute_load_trn, 1) 4792 4793 static int permute_load_zip(int i, int part, int elements) 4794 { 4795 return (i & 1) * elements + ((part * elements + i) >> 1); 4796 } 4797 4798 TRANS(ZIP1, do_simd_permute, a, permute_load_zip, 0) 4799 TRANS(ZIP2, do_simd_permute, a, permute_load_zip, 1) 4800 4801 /* 4802 * Cryptographic AES, SHA, SHA512 4803 */ 4804 4805 TRANS_FEAT(AESE, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aese) 4806 TRANS_FEAT(AESD, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aesd) 4807 TRANS_FEAT(AESMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesmc) 4808 TRANS_FEAT(AESIMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesimc) 4809 4810 TRANS_FEAT(SHA1C, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1c) 4811 TRANS_FEAT(SHA1P, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1p) 4812 TRANS_FEAT(SHA1M, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1m) 4813 TRANS_FEAT(SHA1SU0, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1su0) 4814 4815 TRANS_FEAT(SHA256H, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h) 4816 TRANS_FEAT(SHA256H2, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h2) 4817 TRANS_FEAT(SHA256SU1, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256su1) 4818 4819 TRANS_FEAT(SHA1H, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1h) 4820 TRANS_FEAT(SHA1SU1, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1su1) 4821 TRANS_FEAT(SHA256SU0, aa64_sha256, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha256su0) 4822 4823 TRANS_FEAT(SHA512H, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h) 4824 TRANS_FEAT(SHA512H2, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h2) 4825 TRANS_FEAT(SHA512SU1, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512su1) 4826 TRANS_FEAT(RAX1, aa64_sha3, do_gvec_fn3, a, gen_gvec_rax1) 4827 TRANS_FEAT(SM3PARTW1, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw1) 4828 TRANS_FEAT(SM3PARTW2, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw2) 4829 TRANS_FEAT(SM4EKEY, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4ekey) 4830 4831 TRANS_FEAT(SHA512SU0, aa64_sha512, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha512su0) 4832 TRANS_FEAT(SM4E, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4e) 4833 4834 TRANS_FEAT(EOR3, aa64_sha3, do_gvec_fn4, a, gen_gvec_eor3) 4835 TRANS_FEAT(BCAX, aa64_sha3, do_gvec_fn4, a, gen_gvec_bcax) 4836 4837 static bool trans_SM3SS1(DisasContext *s, arg_SM3SS1 *a) 4838 { 4839 if (!dc_isar_feature(aa64_sm3, s)) { 4840 return false; 4841 } 4842 if (fp_access_check(s)) { 4843 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 4844 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 4845 TCGv_i32 tcg_op3 = tcg_temp_new_i32(); 4846 TCGv_i32 tcg_res = tcg_temp_new_i32(); 4847 4848 read_vec_element_i32(s, tcg_op1, a->rn, 3, MO_32); 4849 read_vec_element_i32(s, tcg_op2, a->rm, 3, MO_32); 4850 read_vec_element_i32(s, tcg_op3, a->ra, 3, MO_32); 4851 4852 tcg_gen_rotri_i32(tcg_res, tcg_op1, 20); 4853 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2); 4854 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3); 4855 tcg_gen_rotri_i32(tcg_res, tcg_res, 25); 4856 4857 /* Clear the whole register first, then store bits [127:96]. */ 4858 clear_vec(s, a->rd); 4859 write_vec_element_i32(s, tcg_res, a->rd, 3, MO_32); 4860 } 4861 return true; 4862 } 4863 4864 static bool do_crypto3i(DisasContext *s, arg_crypto3i *a, gen_helper_gvec_3 *fn) 4865 { 4866 if (fp_access_check(s)) { 4867 gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->imm, fn); 4868 } 4869 return true; 4870 } 4871 TRANS_FEAT(SM3TT1A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1a) 4872 TRANS_FEAT(SM3TT1B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1b) 4873 TRANS_FEAT(SM3TT2A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2a) 4874 TRANS_FEAT(SM3TT2B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2b) 4875 4876 static bool trans_XAR(DisasContext *s, arg_XAR *a) 4877 { 4878 if (!dc_isar_feature(aa64_sha3, s)) { 4879 return false; 4880 } 4881 if (fp_access_check(s)) { 4882 gen_gvec_xar(MO_64, vec_full_reg_offset(s, a->rd), 4883 vec_full_reg_offset(s, a->rn), 4884 vec_full_reg_offset(s, a->rm), a->imm, 16, 4885 vec_full_reg_size(s)); 4886 } 4887 return true; 4888 } 4889 4890 /* 4891 * Advanced SIMD copy 4892 */ 4893 4894 static bool decode_esz_idx(int imm, MemOp *pesz, unsigned *pidx) 4895 { 4896 unsigned esz = ctz32(imm); 4897 if (esz <= MO_64) { 4898 *pesz = esz; 4899 *pidx = imm >> (esz + 1); 4900 return true; 4901 } 4902 return false; 4903 } 4904 4905 static bool trans_DUP_element_s(DisasContext *s, arg_DUP_element_s *a) 4906 { 4907 MemOp esz; 4908 unsigned idx; 4909 4910 if (!decode_esz_idx(a->imm, &esz, &idx)) { 4911 return false; 4912 } 4913 if (fp_access_check(s)) { 4914 /* 4915 * This instruction just extracts the specified element and 4916 * zero-extends it into the bottom of the destination register. 4917 */ 4918 TCGv_i64 tmp = tcg_temp_new_i64(); 4919 read_vec_element(s, tmp, a->rn, idx, esz); 4920 write_fp_dreg(s, a->rd, tmp); 4921 } 4922 return true; 4923 } 4924 4925 static bool trans_DUP_element_v(DisasContext *s, arg_DUP_element_v *a) 4926 { 4927 MemOp esz; 4928 unsigned idx; 4929 4930 if (!decode_esz_idx(a->imm, &esz, &idx)) { 4931 return false; 4932 } 4933 if (esz == MO_64 && !a->q) { 4934 return false; 4935 } 4936 if (fp_access_check(s)) { 4937 tcg_gen_gvec_dup_mem(esz, vec_full_reg_offset(s, a->rd), 4938 vec_reg_offset(s, a->rn, idx, esz), 4939 a->q ? 16 : 8, vec_full_reg_size(s)); 4940 } 4941 return true; 4942 } 4943 4944 static bool trans_DUP_general(DisasContext *s, arg_DUP_general *a) 4945 { 4946 MemOp esz; 4947 unsigned idx; 4948 4949 if (!decode_esz_idx(a->imm, &esz, &idx)) { 4950 return false; 4951 } 4952 if (esz == MO_64 && !a->q) { 4953 return false; 4954 } 4955 if (fp_access_check(s)) { 4956 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), 4957 a->q ? 16 : 8, vec_full_reg_size(s), 4958 cpu_reg(s, a->rn)); 4959 } 4960 return true; 4961 } 4962 4963 static bool do_smov_umov(DisasContext *s, arg_SMOV *a, MemOp is_signed) 4964 { 4965 MemOp esz; 4966 unsigned idx; 4967 4968 if (!decode_esz_idx(a->imm, &esz, &idx)) { 4969 return false; 4970 } 4971 if (is_signed) { 4972 if (esz == MO_64 || (esz == MO_32 && !a->q)) { 4973 return false; 4974 } 4975 } else { 4976 if (esz == MO_64 ? !a->q : a->q) { 4977 return false; 4978 } 4979 } 4980 if (fp_access_check(s)) { 4981 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4982 read_vec_element(s, tcg_rd, a->rn, idx, esz | is_signed); 4983 if (is_signed && !a->q) { 4984 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4985 } 4986 } 4987 return true; 4988 } 4989 4990 TRANS(SMOV, do_smov_umov, a, MO_SIGN) 4991 TRANS(UMOV, do_smov_umov, a, 0) 4992 4993 static bool trans_INS_general(DisasContext *s, arg_INS_general *a) 4994 { 4995 MemOp esz; 4996 unsigned idx; 4997 4998 if (!decode_esz_idx(a->imm, &esz, &idx)) { 4999 return false; 5000 } 5001 if (fp_access_check(s)) { 5002 write_vec_element(s, cpu_reg(s, a->rn), a->rd, idx, esz); 5003 clear_vec_high(s, true, a->rd); 5004 } 5005 return true; 5006 } 5007 5008 static bool trans_INS_element(DisasContext *s, arg_INS_element *a) 5009 { 5010 MemOp esz; 5011 unsigned didx, sidx; 5012 5013 if (!decode_esz_idx(a->di, &esz, &didx)) { 5014 return false; 5015 } 5016 sidx = a->si >> esz; 5017 if (fp_access_check(s)) { 5018 TCGv_i64 tmp = tcg_temp_new_i64(); 5019 5020 read_vec_element(s, tmp, a->rn, sidx, esz); 5021 write_vec_element(s, tmp, a->rd, didx, esz); 5022 5023 /* INS is considered a 128-bit write for SVE. */ 5024 clear_vec_high(s, true, a->rd); 5025 } 5026 return true; 5027 } 5028 5029 /* 5030 * Advanced SIMD three same 5031 */ 5032 5033 typedef struct FPScalar { 5034 void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); 5035 void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); 5036 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr); 5037 } FPScalar; 5038 5039 static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) 5040 { 5041 switch (a->esz) { 5042 case MO_64: 5043 if (fp_access_check(s)) { 5044 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 5045 TCGv_i64 t1 = read_fp_dreg(s, a->rm); 5046 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); 5047 write_fp_dreg(s, a->rd, t0); 5048 } 5049 break; 5050 case MO_32: 5051 if (fp_access_check(s)) { 5052 TCGv_i32 t0 = read_fp_sreg(s, a->rn); 5053 TCGv_i32 t1 = read_fp_sreg(s, a->rm); 5054 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); 5055 write_fp_sreg(s, a->rd, t0); 5056 } 5057 break; 5058 case MO_16: 5059 if (!dc_isar_feature(aa64_fp16, s)) { 5060 return false; 5061 } 5062 if (fp_access_check(s)) { 5063 TCGv_i32 t0 = read_fp_hreg(s, a->rn); 5064 TCGv_i32 t1 = read_fp_hreg(s, a->rm); 5065 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); 5066 write_fp_sreg(s, a->rd, t0); 5067 } 5068 break; 5069 default: 5070 return false; 5071 } 5072 return true; 5073 } 5074 5075 static const FPScalar f_scalar_fadd = { 5076 gen_helper_vfp_addh, 5077 gen_helper_vfp_adds, 5078 gen_helper_vfp_addd, 5079 }; 5080 TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd) 5081 5082 static const FPScalar f_scalar_fsub = { 5083 gen_helper_vfp_subh, 5084 gen_helper_vfp_subs, 5085 gen_helper_vfp_subd, 5086 }; 5087 TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub) 5088 5089 static const FPScalar f_scalar_fdiv = { 5090 gen_helper_vfp_divh, 5091 gen_helper_vfp_divs, 5092 gen_helper_vfp_divd, 5093 }; 5094 TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv) 5095 5096 static const FPScalar f_scalar_fmul = { 5097 gen_helper_vfp_mulh, 5098 gen_helper_vfp_muls, 5099 gen_helper_vfp_muld, 5100 }; 5101 TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul) 5102 5103 static const FPScalar f_scalar_fmax = { 5104 gen_helper_vfp_maxh, 5105 gen_helper_vfp_maxs, 5106 gen_helper_vfp_maxd, 5107 }; 5108 TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax) 5109 5110 static const FPScalar f_scalar_fmin = { 5111 gen_helper_vfp_minh, 5112 gen_helper_vfp_mins, 5113 gen_helper_vfp_mind, 5114 }; 5115 TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin) 5116 5117 static const FPScalar f_scalar_fmaxnm = { 5118 gen_helper_vfp_maxnumh, 5119 gen_helper_vfp_maxnums, 5120 gen_helper_vfp_maxnumd, 5121 }; 5122 TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm) 5123 5124 static const FPScalar f_scalar_fminnm = { 5125 gen_helper_vfp_minnumh, 5126 gen_helper_vfp_minnums, 5127 gen_helper_vfp_minnumd, 5128 }; 5129 TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm) 5130 5131 static const FPScalar f_scalar_fmulx = { 5132 gen_helper_advsimd_mulxh, 5133 gen_helper_vfp_mulxs, 5134 gen_helper_vfp_mulxd, 5135 }; 5136 TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx) 5137 5138 static void gen_fnmul_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5139 { 5140 gen_helper_vfp_mulh(d, n, m, s); 5141 gen_vfp_negh(d, d); 5142 } 5143 5144 static void gen_fnmul_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5145 { 5146 gen_helper_vfp_muls(d, n, m, s); 5147 gen_vfp_negs(d, d); 5148 } 5149 5150 static void gen_fnmul_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) 5151 { 5152 gen_helper_vfp_muld(d, n, m, s); 5153 gen_vfp_negd(d, d); 5154 } 5155 5156 static const FPScalar f_scalar_fnmul = { 5157 gen_fnmul_h, 5158 gen_fnmul_s, 5159 gen_fnmul_d, 5160 }; 5161 TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul) 5162 5163 static const FPScalar f_scalar_fcmeq = { 5164 gen_helper_advsimd_ceq_f16, 5165 gen_helper_neon_ceq_f32, 5166 gen_helper_neon_ceq_f64, 5167 }; 5168 TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq) 5169 5170 static const FPScalar f_scalar_fcmge = { 5171 gen_helper_advsimd_cge_f16, 5172 gen_helper_neon_cge_f32, 5173 gen_helper_neon_cge_f64, 5174 }; 5175 TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge) 5176 5177 static const FPScalar f_scalar_fcmgt = { 5178 gen_helper_advsimd_cgt_f16, 5179 gen_helper_neon_cgt_f32, 5180 gen_helper_neon_cgt_f64, 5181 }; 5182 TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt) 5183 5184 static const FPScalar f_scalar_facge = { 5185 gen_helper_advsimd_acge_f16, 5186 gen_helper_neon_acge_f32, 5187 gen_helper_neon_acge_f64, 5188 }; 5189 TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge) 5190 5191 static const FPScalar f_scalar_facgt = { 5192 gen_helper_advsimd_acgt_f16, 5193 gen_helper_neon_acgt_f32, 5194 gen_helper_neon_acgt_f64, 5195 }; 5196 TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt) 5197 5198 static void gen_fabd_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5199 { 5200 gen_helper_vfp_subh(d, n, m, s); 5201 gen_vfp_absh(d, d); 5202 } 5203 5204 static void gen_fabd_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5205 { 5206 gen_helper_vfp_subs(d, n, m, s); 5207 gen_vfp_abss(d, d); 5208 } 5209 5210 static void gen_fabd_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) 5211 { 5212 gen_helper_vfp_subd(d, n, m, s); 5213 gen_vfp_absd(d, d); 5214 } 5215 5216 static const FPScalar f_scalar_fabd = { 5217 gen_fabd_h, 5218 gen_fabd_s, 5219 gen_fabd_d, 5220 }; 5221 TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd) 5222 5223 static const FPScalar f_scalar_frecps = { 5224 gen_helper_recpsf_f16, 5225 gen_helper_recpsf_f32, 5226 gen_helper_recpsf_f64, 5227 }; 5228 TRANS(FRECPS_s, do_fp3_scalar, a, &f_scalar_frecps) 5229 5230 static const FPScalar f_scalar_frsqrts = { 5231 gen_helper_rsqrtsf_f16, 5232 gen_helper_rsqrtsf_f32, 5233 gen_helper_rsqrtsf_f64, 5234 }; 5235 TRANS(FRSQRTS_s, do_fp3_scalar, a, &f_scalar_frsqrts) 5236 5237 static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a, 5238 const FPScalar *f, bool swap) 5239 { 5240 switch (a->esz) { 5241 case MO_64: 5242 if (fp_access_check(s)) { 5243 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 5244 TCGv_i64 t1 = tcg_constant_i64(0); 5245 if (swap) { 5246 f->gen_d(t0, t1, t0, fpstatus_ptr(FPST_A64)); 5247 } else { 5248 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); 5249 } 5250 write_fp_dreg(s, a->rd, t0); 5251 } 5252 break; 5253 case MO_32: 5254 if (fp_access_check(s)) { 5255 TCGv_i32 t0 = read_fp_sreg(s, a->rn); 5256 TCGv_i32 t1 = tcg_constant_i32(0); 5257 if (swap) { 5258 f->gen_s(t0, t1, t0, fpstatus_ptr(FPST_A64)); 5259 } else { 5260 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); 5261 } 5262 write_fp_sreg(s, a->rd, t0); 5263 } 5264 break; 5265 case MO_16: 5266 if (!dc_isar_feature(aa64_fp16, s)) { 5267 return false; 5268 } 5269 if (fp_access_check(s)) { 5270 TCGv_i32 t0 = read_fp_hreg(s, a->rn); 5271 TCGv_i32 t1 = tcg_constant_i32(0); 5272 if (swap) { 5273 f->gen_h(t0, t1, t0, fpstatus_ptr(FPST_A64_F16)); 5274 } else { 5275 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); 5276 } 5277 write_fp_sreg(s, a->rd, t0); 5278 } 5279 break; 5280 default: 5281 return false; 5282 } 5283 return true; 5284 } 5285 5286 TRANS(FCMEQ0_s, do_fcmp0_s, a, &f_scalar_fcmeq, false) 5287 TRANS(FCMGT0_s, do_fcmp0_s, a, &f_scalar_fcmgt, false) 5288 TRANS(FCMGE0_s, do_fcmp0_s, a, &f_scalar_fcmge, false) 5289 TRANS(FCMLT0_s, do_fcmp0_s, a, &f_scalar_fcmgt, true) 5290 TRANS(FCMLE0_s, do_fcmp0_s, a, &f_scalar_fcmge, true) 5291 5292 static bool do_satacc_s(DisasContext *s, arg_rrr_e *a, 5293 MemOp sgn_n, MemOp sgn_m, 5294 void (*gen_bhs)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64, MemOp), 5295 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64)) 5296 { 5297 TCGv_i64 t0, t1, t2, qc; 5298 MemOp esz = a->esz; 5299 5300 if (!fp_access_check(s)) { 5301 return true; 5302 } 5303 5304 t0 = tcg_temp_new_i64(); 5305 t1 = tcg_temp_new_i64(); 5306 t2 = tcg_temp_new_i64(); 5307 qc = tcg_temp_new_i64(); 5308 read_vec_element(s, t1, a->rn, 0, esz | sgn_n); 5309 read_vec_element(s, t2, a->rm, 0, esz | sgn_m); 5310 tcg_gen_ld_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc)); 5311 5312 if (esz == MO_64) { 5313 gen_d(t0, qc, t1, t2); 5314 } else { 5315 gen_bhs(t0, qc, t1, t2, esz); 5316 tcg_gen_ext_i64(t0, t0, esz); 5317 } 5318 5319 write_fp_dreg(s, a->rd, t0); 5320 tcg_gen_st_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc)); 5321 return true; 5322 } 5323 5324 TRANS(SQADD_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqadd_bhs, gen_sqadd_d) 5325 TRANS(SQSUB_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqsub_bhs, gen_sqsub_d) 5326 TRANS(UQADD_s, do_satacc_s, a, 0, 0, gen_uqadd_bhs, gen_uqadd_d) 5327 TRANS(UQSUB_s, do_satacc_s, a, 0, 0, gen_uqsub_bhs, gen_uqsub_d) 5328 TRANS(SUQADD_s, do_satacc_s, a, MO_SIGN, 0, gen_suqadd_bhs, gen_suqadd_d) 5329 TRANS(USQADD_s, do_satacc_s, a, 0, MO_SIGN, gen_usqadd_bhs, gen_usqadd_d) 5330 5331 static bool do_int3_scalar_d(DisasContext *s, arg_rrr_e *a, 5332 void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64)) 5333 { 5334 if (fp_access_check(s)) { 5335 TCGv_i64 t0 = tcg_temp_new_i64(); 5336 TCGv_i64 t1 = tcg_temp_new_i64(); 5337 5338 read_vec_element(s, t0, a->rn, 0, MO_64); 5339 read_vec_element(s, t1, a->rm, 0, MO_64); 5340 fn(t0, t0, t1); 5341 write_fp_dreg(s, a->rd, t0); 5342 } 5343 return true; 5344 } 5345 5346 TRANS(SSHL_s, do_int3_scalar_d, a, gen_sshl_i64) 5347 TRANS(USHL_s, do_int3_scalar_d, a, gen_ushl_i64) 5348 TRANS(SRSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_s64) 5349 TRANS(URSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_u64) 5350 TRANS(ADD_s, do_int3_scalar_d, a, tcg_gen_add_i64) 5351 TRANS(SUB_s, do_int3_scalar_d, a, tcg_gen_sub_i64) 5352 5353 typedef struct ENVScalar2 { 5354 NeonGenTwoOpEnvFn *gen_bhs[3]; 5355 NeonGenTwo64OpEnvFn *gen_d; 5356 } ENVScalar2; 5357 5358 static bool do_env_scalar2(DisasContext *s, arg_rrr_e *a, const ENVScalar2 *f) 5359 { 5360 if (!fp_access_check(s)) { 5361 return true; 5362 } 5363 if (a->esz == MO_64) { 5364 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 5365 TCGv_i64 t1 = read_fp_dreg(s, a->rm); 5366 f->gen_d(t0, tcg_env, t0, t1); 5367 write_fp_dreg(s, a->rd, t0); 5368 } else { 5369 TCGv_i32 t0 = tcg_temp_new_i32(); 5370 TCGv_i32 t1 = tcg_temp_new_i32(); 5371 5372 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 5373 read_vec_element_i32(s, t1, a->rm, 0, a->esz); 5374 f->gen_bhs[a->esz](t0, tcg_env, t0, t1); 5375 write_fp_sreg(s, a->rd, t0); 5376 } 5377 return true; 5378 } 5379 5380 static const ENVScalar2 f_scalar_sqshl = { 5381 { gen_helper_neon_qshl_s8, 5382 gen_helper_neon_qshl_s16, 5383 gen_helper_neon_qshl_s32 }, 5384 gen_helper_neon_qshl_s64, 5385 }; 5386 TRANS(SQSHL_s, do_env_scalar2, a, &f_scalar_sqshl) 5387 5388 static const ENVScalar2 f_scalar_uqshl = { 5389 { gen_helper_neon_qshl_u8, 5390 gen_helper_neon_qshl_u16, 5391 gen_helper_neon_qshl_u32 }, 5392 gen_helper_neon_qshl_u64, 5393 }; 5394 TRANS(UQSHL_s, do_env_scalar2, a, &f_scalar_uqshl) 5395 5396 static const ENVScalar2 f_scalar_sqrshl = { 5397 { gen_helper_neon_qrshl_s8, 5398 gen_helper_neon_qrshl_s16, 5399 gen_helper_neon_qrshl_s32 }, 5400 gen_helper_neon_qrshl_s64, 5401 }; 5402 TRANS(SQRSHL_s, do_env_scalar2, a, &f_scalar_sqrshl) 5403 5404 static const ENVScalar2 f_scalar_uqrshl = { 5405 { gen_helper_neon_qrshl_u8, 5406 gen_helper_neon_qrshl_u16, 5407 gen_helper_neon_qrshl_u32 }, 5408 gen_helper_neon_qrshl_u64, 5409 }; 5410 TRANS(UQRSHL_s, do_env_scalar2, a, &f_scalar_uqrshl) 5411 5412 static bool do_env_scalar2_hs(DisasContext *s, arg_rrr_e *a, 5413 const ENVScalar2 *f) 5414 { 5415 if (a->esz == MO_16 || a->esz == MO_32) { 5416 return do_env_scalar2(s, a, f); 5417 } 5418 return false; 5419 } 5420 5421 static const ENVScalar2 f_scalar_sqdmulh = { 5422 { NULL, gen_helper_neon_qdmulh_s16, gen_helper_neon_qdmulh_s32 } 5423 }; 5424 TRANS(SQDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqdmulh) 5425 5426 static const ENVScalar2 f_scalar_sqrdmulh = { 5427 { NULL, gen_helper_neon_qrdmulh_s16, gen_helper_neon_qrdmulh_s32 } 5428 }; 5429 TRANS(SQRDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqrdmulh) 5430 5431 typedef struct ENVScalar3 { 5432 NeonGenThreeOpEnvFn *gen_hs[2]; 5433 } ENVScalar3; 5434 5435 static bool do_env_scalar3_hs(DisasContext *s, arg_rrr_e *a, 5436 const ENVScalar3 *f) 5437 { 5438 TCGv_i32 t0, t1, t2; 5439 5440 if (a->esz != MO_16 && a->esz != MO_32) { 5441 return false; 5442 } 5443 if (!fp_access_check(s)) { 5444 return true; 5445 } 5446 5447 t0 = tcg_temp_new_i32(); 5448 t1 = tcg_temp_new_i32(); 5449 t2 = tcg_temp_new_i32(); 5450 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 5451 read_vec_element_i32(s, t1, a->rm, 0, a->esz); 5452 read_vec_element_i32(s, t2, a->rd, 0, a->esz); 5453 f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2); 5454 write_fp_sreg(s, a->rd, t0); 5455 return true; 5456 } 5457 5458 static const ENVScalar3 f_scalar_sqrdmlah = { 5459 { gen_helper_neon_qrdmlah_s16, gen_helper_neon_qrdmlah_s32 } 5460 }; 5461 TRANS_FEAT(SQRDMLAH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlah) 5462 5463 static const ENVScalar3 f_scalar_sqrdmlsh = { 5464 { gen_helper_neon_qrdmlsh_s16, gen_helper_neon_qrdmlsh_s32 } 5465 }; 5466 TRANS_FEAT(SQRDMLSH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlsh) 5467 5468 static bool do_cmop_d(DisasContext *s, arg_rrr_e *a, TCGCond cond) 5469 { 5470 if (fp_access_check(s)) { 5471 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 5472 TCGv_i64 t1 = read_fp_dreg(s, a->rm); 5473 tcg_gen_negsetcond_i64(cond, t0, t0, t1); 5474 write_fp_dreg(s, a->rd, t0); 5475 } 5476 return true; 5477 } 5478 5479 TRANS(CMGT_s, do_cmop_d, a, TCG_COND_GT) 5480 TRANS(CMHI_s, do_cmop_d, a, TCG_COND_GTU) 5481 TRANS(CMGE_s, do_cmop_d, a, TCG_COND_GE) 5482 TRANS(CMHS_s, do_cmop_d, a, TCG_COND_GEU) 5483 TRANS(CMEQ_s, do_cmop_d, a, TCG_COND_EQ) 5484 TRANS(CMTST_s, do_cmop_d, a, TCG_COND_TSTNE) 5485 5486 static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data, 5487 gen_helper_gvec_3_ptr * const fns[3]) 5488 { 5489 MemOp esz = a->esz; 5490 int check = fp_access_check_vector_hsd(s, a->q, esz); 5491 5492 if (check <= 0) { 5493 return check == 0; 5494 } 5495 5496 gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, 5497 esz == MO_16, data, fns[esz - 1]); 5498 return true; 5499 } 5500 5501 static gen_helper_gvec_3_ptr * const f_vector_fadd[3] = { 5502 gen_helper_gvec_fadd_h, 5503 gen_helper_gvec_fadd_s, 5504 gen_helper_gvec_fadd_d, 5505 }; 5506 TRANS(FADD_v, do_fp3_vector, a, 0, f_vector_fadd) 5507 5508 static gen_helper_gvec_3_ptr * const f_vector_fsub[3] = { 5509 gen_helper_gvec_fsub_h, 5510 gen_helper_gvec_fsub_s, 5511 gen_helper_gvec_fsub_d, 5512 }; 5513 TRANS(FSUB_v, do_fp3_vector, a, 0, f_vector_fsub) 5514 5515 static gen_helper_gvec_3_ptr * const f_vector_fdiv[3] = { 5516 gen_helper_gvec_fdiv_h, 5517 gen_helper_gvec_fdiv_s, 5518 gen_helper_gvec_fdiv_d, 5519 }; 5520 TRANS(FDIV_v, do_fp3_vector, a, 0, f_vector_fdiv) 5521 5522 static gen_helper_gvec_3_ptr * const f_vector_fmul[3] = { 5523 gen_helper_gvec_fmul_h, 5524 gen_helper_gvec_fmul_s, 5525 gen_helper_gvec_fmul_d, 5526 }; 5527 TRANS(FMUL_v, do_fp3_vector, a, 0, f_vector_fmul) 5528 5529 static gen_helper_gvec_3_ptr * const f_vector_fmax[3] = { 5530 gen_helper_gvec_fmax_h, 5531 gen_helper_gvec_fmax_s, 5532 gen_helper_gvec_fmax_d, 5533 }; 5534 TRANS(FMAX_v, do_fp3_vector, a, 0, f_vector_fmax) 5535 5536 static gen_helper_gvec_3_ptr * const f_vector_fmin[3] = { 5537 gen_helper_gvec_fmin_h, 5538 gen_helper_gvec_fmin_s, 5539 gen_helper_gvec_fmin_d, 5540 }; 5541 TRANS(FMIN_v, do_fp3_vector, a, 0, f_vector_fmin) 5542 5543 static gen_helper_gvec_3_ptr * const f_vector_fmaxnm[3] = { 5544 gen_helper_gvec_fmaxnum_h, 5545 gen_helper_gvec_fmaxnum_s, 5546 gen_helper_gvec_fmaxnum_d, 5547 }; 5548 TRANS(FMAXNM_v, do_fp3_vector, a, 0, f_vector_fmaxnm) 5549 5550 static gen_helper_gvec_3_ptr * const f_vector_fminnm[3] = { 5551 gen_helper_gvec_fminnum_h, 5552 gen_helper_gvec_fminnum_s, 5553 gen_helper_gvec_fminnum_d, 5554 }; 5555 TRANS(FMINNM_v, do_fp3_vector, a, 0, f_vector_fminnm) 5556 5557 static gen_helper_gvec_3_ptr * const f_vector_fmulx[3] = { 5558 gen_helper_gvec_fmulx_h, 5559 gen_helper_gvec_fmulx_s, 5560 gen_helper_gvec_fmulx_d, 5561 }; 5562 TRANS(FMULX_v, do_fp3_vector, a, 0, f_vector_fmulx) 5563 5564 static gen_helper_gvec_3_ptr * const f_vector_fmla[3] = { 5565 gen_helper_gvec_vfma_h, 5566 gen_helper_gvec_vfma_s, 5567 gen_helper_gvec_vfma_d, 5568 }; 5569 TRANS(FMLA_v, do_fp3_vector, a, 0, f_vector_fmla) 5570 5571 static gen_helper_gvec_3_ptr * const f_vector_fmls[3] = { 5572 gen_helper_gvec_vfms_h, 5573 gen_helper_gvec_vfms_s, 5574 gen_helper_gvec_vfms_d, 5575 }; 5576 TRANS(FMLS_v, do_fp3_vector, a, 0, f_vector_fmls) 5577 5578 static gen_helper_gvec_3_ptr * const f_vector_fcmeq[3] = { 5579 gen_helper_gvec_fceq_h, 5580 gen_helper_gvec_fceq_s, 5581 gen_helper_gvec_fceq_d, 5582 }; 5583 TRANS(FCMEQ_v, do_fp3_vector, a, 0, f_vector_fcmeq) 5584 5585 static gen_helper_gvec_3_ptr * const f_vector_fcmge[3] = { 5586 gen_helper_gvec_fcge_h, 5587 gen_helper_gvec_fcge_s, 5588 gen_helper_gvec_fcge_d, 5589 }; 5590 TRANS(FCMGE_v, do_fp3_vector, a, 0, f_vector_fcmge) 5591 5592 static gen_helper_gvec_3_ptr * const f_vector_fcmgt[3] = { 5593 gen_helper_gvec_fcgt_h, 5594 gen_helper_gvec_fcgt_s, 5595 gen_helper_gvec_fcgt_d, 5596 }; 5597 TRANS(FCMGT_v, do_fp3_vector, a, 0, f_vector_fcmgt) 5598 5599 static gen_helper_gvec_3_ptr * const f_vector_facge[3] = { 5600 gen_helper_gvec_facge_h, 5601 gen_helper_gvec_facge_s, 5602 gen_helper_gvec_facge_d, 5603 }; 5604 TRANS(FACGE_v, do_fp3_vector, a, 0, f_vector_facge) 5605 5606 static gen_helper_gvec_3_ptr * const f_vector_facgt[3] = { 5607 gen_helper_gvec_facgt_h, 5608 gen_helper_gvec_facgt_s, 5609 gen_helper_gvec_facgt_d, 5610 }; 5611 TRANS(FACGT_v, do_fp3_vector, a, 0, f_vector_facgt) 5612 5613 static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = { 5614 gen_helper_gvec_fabd_h, 5615 gen_helper_gvec_fabd_s, 5616 gen_helper_gvec_fabd_d, 5617 }; 5618 TRANS(FABD_v, do_fp3_vector, a, 0, f_vector_fabd) 5619 5620 static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = { 5621 gen_helper_gvec_recps_h, 5622 gen_helper_gvec_recps_s, 5623 gen_helper_gvec_recps_d, 5624 }; 5625 TRANS(FRECPS_v, do_fp3_vector, a, 0, f_vector_frecps) 5626 5627 static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = { 5628 gen_helper_gvec_rsqrts_h, 5629 gen_helper_gvec_rsqrts_s, 5630 gen_helper_gvec_rsqrts_d, 5631 }; 5632 TRANS(FRSQRTS_v, do_fp3_vector, a, 0, f_vector_frsqrts) 5633 5634 static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = { 5635 gen_helper_gvec_faddp_h, 5636 gen_helper_gvec_faddp_s, 5637 gen_helper_gvec_faddp_d, 5638 }; 5639 TRANS(FADDP_v, do_fp3_vector, a, 0, f_vector_faddp) 5640 5641 static gen_helper_gvec_3_ptr * const f_vector_fmaxp[3] = { 5642 gen_helper_gvec_fmaxp_h, 5643 gen_helper_gvec_fmaxp_s, 5644 gen_helper_gvec_fmaxp_d, 5645 }; 5646 TRANS(FMAXP_v, do_fp3_vector, a, 0, f_vector_fmaxp) 5647 5648 static gen_helper_gvec_3_ptr * const f_vector_fminp[3] = { 5649 gen_helper_gvec_fminp_h, 5650 gen_helper_gvec_fminp_s, 5651 gen_helper_gvec_fminp_d, 5652 }; 5653 TRANS(FMINP_v, do_fp3_vector, a, 0, f_vector_fminp) 5654 5655 static gen_helper_gvec_3_ptr * const f_vector_fmaxnmp[3] = { 5656 gen_helper_gvec_fmaxnump_h, 5657 gen_helper_gvec_fmaxnump_s, 5658 gen_helper_gvec_fmaxnump_d, 5659 }; 5660 TRANS(FMAXNMP_v, do_fp3_vector, a, 0, f_vector_fmaxnmp) 5661 5662 static gen_helper_gvec_3_ptr * const f_vector_fminnmp[3] = { 5663 gen_helper_gvec_fminnump_h, 5664 gen_helper_gvec_fminnump_s, 5665 gen_helper_gvec_fminnump_d, 5666 }; 5667 TRANS(FMINNMP_v, do_fp3_vector, a, 0, f_vector_fminnmp) 5668 5669 static bool do_fmlal(DisasContext *s, arg_qrrr_e *a, bool is_s, bool is_2) 5670 { 5671 if (fp_access_check(s)) { 5672 int data = (is_2 << 1) | is_s; 5673 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), 5674 vec_full_reg_offset(s, a->rn), 5675 vec_full_reg_offset(s, a->rm), tcg_env, 5676 a->q ? 16 : 8, vec_full_reg_size(s), 5677 data, gen_helper_gvec_fmlal_a64); 5678 } 5679 return true; 5680 } 5681 5682 TRANS_FEAT(FMLAL_v, aa64_fhm, do_fmlal, a, false, false) 5683 TRANS_FEAT(FMLSL_v, aa64_fhm, do_fmlal, a, true, false) 5684 TRANS_FEAT(FMLAL2_v, aa64_fhm, do_fmlal, a, false, true) 5685 TRANS_FEAT(FMLSL2_v, aa64_fhm, do_fmlal, a, true, true) 5686 5687 TRANS(ADDP_v, do_gvec_fn3, a, gen_gvec_addp) 5688 TRANS(SMAXP_v, do_gvec_fn3_no64, a, gen_gvec_smaxp) 5689 TRANS(SMINP_v, do_gvec_fn3_no64, a, gen_gvec_sminp) 5690 TRANS(UMAXP_v, do_gvec_fn3_no64, a, gen_gvec_umaxp) 5691 TRANS(UMINP_v, do_gvec_fn3_no64, a, gen_gvec_uminp) 5692 5693 TRANS(AND_v, do_gvec_fn3, a, tcg_gen_gvec_and) 5694 TRANS(BIC_v, do_gvec_fn3, a, tcg_gen_gvec_andc) 5695 TRANS(ORR_v, do_gvec_fn3, a, tcg_gen_gvec_or) 5696 TRANS(ORN_v, do_gvec_fn3, a, tcg_gen_gvec_orc) 5697 TRANS(EOR_v, do_gvec_fn3, a, tcg_gen_gvec_xor) 5698 5699 static bool do_bitsel(DisasContext *s, bool is_q, int d, int a, int b, int c) 5700 { 5701 if (fp_access_check(s)) { 5702 gen_gvec_fn4(s, is_q, d, a, b, c, tcg_gen_gvec_bitsel, 0); 5703 } 5704 return true; 5705 } 5706 5707 TRANS(BSL_v, do_bitsel, a->q, a->rd, a->rd, a->rn, a->rm) 5708 TRANS(BIT_v, do_bitsel, a->q, a->rd, a->rm, a->rn, a->rd) 5709 TRANS(BIF_v, do_bitsel, a->q, a->rd, a->rm, a->rd, a->rn) 5710 5711 TRANS(SQADD_v, do_gvec_fn3, a, gen_gvec_sqadd_qc) 5712 TRANS(UQADD_v, do_gvec_fn3, a, gen_gvec_uqadd_qc) 5713 TRANS(SQSUB_v, do_gvec_fn3, a, gen_gvec_sqsub_qc) 5714 TRANS(UQSUB_v, do_gvec_fn3, a, gen_gvec_uqsub_qc) 5715 TRANS(SUQADD_v, do_gvec_fn3, a, gen_gvec_suqadd_qc) 5716 TRANS(USQADD_v, do_gvec_fn3, a, gen_gvec_usqadd_qc) 5717 5718 TRANS(SSHL_v, do_gvec_fn3, a, gen_gvec_sshl) 5719 TRANS(USHL_v, do_gvec_fn3, a, gen_gvec_ushl) 5720 TRANS(SRSHL_v, do_gvec_fn3, a, gen_gvec_srshl) 5721 TRANS(URSHL_v, do_gvec_fn3, a, gen_gvec_urshl) 5722 TRANS(SQSHL_v, do_gvec_fn3, a, gen_neon_sqshl) 5723 TRANS(UQSHL_v, do_gvec_fn3, a, gen_neon_uqshl) 5724 TRANS(SQRSHL_v, do_gvec_fn3, a, gen_neon_sqrshl) 5725 TRANS(UQRSHL_v, do_gvec_fn3, a, gen_neon_uqrshl) 5726 5727 TRANS(ADD_v, do_gvec_fn3, a, tcg_gen_gvec_add) 5728 TRANS(SUB_v, do_gvec_fn3, a, tcg_gen_gvec_sub) 5729 TRANS(SHADD_v, do_gvec_fn3_no64, a, gen_gvec_shadd) 5730 TRANS(UHADD_v, do_gvec_fn3_no64, a, gen_gvec_uhadd) 5731 TRANS(SHSUB_v, do_gvec_fn3_no64, a, gen_gvec_shsub) 5732 TRANS(UHSUB_v, do_gvec_fn3_no64, a, gen_gvec_uhsub) 5733 TRANS(SRHADD_v, do_gvec_fn3_no64, a, gen_gvec_srhadd) 5734 TRANS(URHADD_v, do_gvec_fn3_no64, a, gen_gvec_urhadd) 5735 TRANS(SMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smax) 5736 TRANS(UMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umax) 5737 TRANS(SMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smin) 5738 TRANS(UMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umin) 5739 TRANS(SABA_v, do_gvec_fn3_no64, a, gen_gvec_saba) 5740 TRANS(UABA_v, do_gvec_fn3_no64, a, gen_gvec_uaba) 5741 TRANS(SABD_v, do_gvec_fn3_no64, a, gen_gvec_sabd) 5742 TRANS(UABD_v, do_gvec_fn3_no64, a, gen_gvec_uabd) 5743 TRANS(MUL_v, do_gvec_fn3_no64, a, tcg_gen_gvec_mul) 5744 TRANS(PMUL_v, do_gvec_op3_ool, a, 0, gen_helper_gvec_pmul_b) 5745 TRANS(MLA_v, do_gvec_fn3_no64, a, gen_gvec_mla) 5746 TRANS(MLS_v, do_gvec_fn3_no64, a, gen_gvec_mls) 5747 5748 static bool do_cmop_v(DisasContext *s, arg_qrrr_e *a, TCGCond cond) 5749 { 5750 if (a->esz == MO_64 && !a->q) { 5751 return false; 5752 } 5753 if (fp_access_check(s)) { 5754 tcg_gen_gvec_cmp(cond, a->esz, 5755 vec_full_reg_offset(s, a->rd), 5756 vec_full_reg_offset(s, a->rn), 5757 vec_full_reg_offset(s, a->rm), 5758 a->q ? 16 : 8, vec_full_reg_size(s)); 5759 } 5760 return true; 5761 } 5762 5763 TRANS(CMGT_v, do_cmop_v, a, TCG_COND_GT) 5764 TRANS(CMHI_v, do_cmop_v, a, TCG_COND_GTU) 5765 TRANS(CMGE_v, do_cmop_v, a, TCG_COND_GE) 5766 TRANS(CMHS_v, do_cmop_v, a, TCG_COND_GEU) 5767 TRANS(CMEQ_v, do_cmop_v, a, TCG_COND_EQ) 5768 TRANS(CMTST_v, do_gvec_fn3, a, gen_gvec_cmtst) 5769 5770 TRANS(SQDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqdmulh_qc) 5771 TRANS(SQRDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmulh_qc) 5772 TRANS_FEAT(SQRDMLAH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlah_qc) 5773 TRANS_FEAT(SQRDMLSH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlsh_qc) 5774 5775 static bool do_dot_vector(DisasContext *s, arg_qrrr_e *a, 5776 gen_helper_gvec_4 *fn) 5777 { 5778 if (fp_access_check(s)) { 5779 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn); 5780 } 5781 return true; 5782 } 5783 5784 static bool do_dot_vector_env(DisasContext *s, arg_qrrr_e *a, 5785 gen_helper_gvec_4_ptr *fn) 5786 { 5787 if (fp_access_check(s)) { 5788 gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn); 5789 } 5790 return true; 5791 } 5792 5793 TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_sdot_b) 5794 TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_b) 5795 TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_b) 5796 TRANS_FEAT(BFDOT_v, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfdot) 5797 TRANS_FEAT(BFMMLA, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfmmla) 5798 TRANS_FEAT(SMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_smmla_b) 5799 TRANS_FEAT(UMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_ummla_b) 5800 TRANS_FEAT(USMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usmmla_b) 5801 5802 static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a) 5803 { 5804 if (!dc_isar_feature(aa64_bf16, s)) { 5805 return false; 5806 } 5807 if (fp_access_check(s)) { 5808 /* Q bit selects BFMLALB vs BFMLALT. */ 5809 gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, false, a->q, 5810 gen_helper_gvec_bfmlal); 5811 } 5812 return true; 5813 } 5814 5815 static gen_helper_gvec_3_ptr * const f_vector_fcadd[3] = { 5816 gen_helper_gvec_fcaddh, 5817 gen_helper_gvec_fcadds, 5818 gen_helper_gvec_fcaddd, 5819 }; 5820 TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0, f_vector_fcadd) 5821 TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1, f_vector_fcadd) 5822 5823 static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a) 5824 { 5825 static gen_helper_gvec_4_ptr * const fn[] = { 5826 [MO_16] = gen_helper_gvec_fcmlah, 5827 [MO_32] = gen_helper_gvec_fcmlas, 5828 [MO_64] = gen_helper_gvec_fcmlad, 5829 }; 5830 int check; 5831 5832 if (!dc_isar_feature(aa64_fcma, s)) { 5833 return false; 5834 } 5835 5836 check = fp_access_check_vector_hsd(s, a->q, a->esz); 5837 if (check <= 0) { 5838 return check == 0; 5839 } 5840 5841 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, 5842 a->esz == MO_16, a->rot, fn[a->esz]); 5843 return true; 5844 } 5845 5846 /* 5847 * Widening vector x vector/indexed. 5848 * 5849 * These read from the top or bottom half of a 128-bit vector. 5850 * After widening, optionally accumulate with a 128-bit vector. 5851 * Implement these inline, as the number of elements are limited 5852 * and the related SVE and SME operations on larger vectors use 5853 * even/odd elements instead of top/bottom half. 5854 * 5855 * If idx >= 0, operand 2 is indexed, otherwise vector. 5856 * If acc, operand 0 is loaded with rd. 5857 */ 5858 5859 /* For low half, iterating up. */ 5860 static bool do_3op_widening(DisasContext *s, MemOp memop, int top, 5861 int rd, int rn, int rm, int idx, 5862 NeonGenTwo64OpFn *fn, bool acc) 5863 { 5864 TCGv_i64 tcg_op0 = tcg_temp_new_i64(); 5865 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 5866 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 5867 MemOp esz = memop & MO_SIZE; 5868 int half = 8 >> esz; 5869 int top_swap, top_half; 5870 5871 /* There are no 64x64->128 bit operations. */ 5872 if (esz >= MO_64) { 5873 return false; 5874 } 5875 if (!fp_access_check(s)) { 5876 return true; 5877 } 5878 5879 if (idx >= 0) { 5880 read_vec_element(s, tcg_op2, rm, idx, memop); 5881 } 5882 5883 /* 5884 * For top half inputs, iterate forward; backward for bottom half. 5885 * This means the store to the destination will not occur until 5886 * overlapping input inputs are consumed. 5887 * Use top_swap to conditionally invert the forward iteration index. 5888 */ 5889 top_swap = top ? 0 : half - 1; 5890 top_half = top ? half : 0; 5891 5892 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) { 5893 int elt = elt_fwd ^ top_swap; 5894 5895 read_vec_element(s, tcg_op1, rn, elt + top_half, memop); 5896 if (idx < 0) { 5897 read_vec_element(s, tcg_op2, rm, elt + top_half, memop); 5898 } 5899 if (acc) { 5900 read_vec_element(s, tcg_op0, rd, elt, memop + 1); 5901 } 5902 fn(tcg_op0, tcg_op1, tcg_op2); 5903 write_vec_element(s, tcg_op0, rd, elt, esz + 1); 5904 } 5905 clear_vec_high(s, 1, rd); 5906 return true; 5907 } 5908 5909 static void gen_muladd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5910 { 5911 TCGv_i64 t = tcg_temp_new_i64(); 5912 tcg_gen_mul_i64(t, n, m); 5913 tcg_gen_add_i64(d, d, t); 5914 } 5915 5916 static void gen_mulsub_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5917 { 5918 TCGv_i64 t = tcg_temp_new_i64(); 5919 tcg_gen_mul_i64(t, n, m); 5920 tcg_gen_sub_i64(d, d, t); 5921 } 5922 5923 TRANS(SMULL_v, do_3op_widening, 5924 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 5925 tcg_gen_mul_i64, false) 5926 TRANS(UMULL_v, do_3op_widening, 5927 a->esz, a->q, a->rd, a->rn, a->rm, -1, 5928 tcg_gen_mul_i64, false) 5929 TRANS(SMLAL_v, do_3op_widening, 5930 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 5931 gen_muladd_i64, true) 5932 TRANS(UMLAL_v, do_3op_widening, 5933 a->esz, a->q, a->rd, a->rn, a->rm, -1, 5934 gen_muladd_i64, true) 5935 TRANS(SMLSL_v, do_3op_widening, 5936 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 5937 gen_mulsub_i64, true) 5938 TRANS(UMLSL_v, do_3op_widening, 5939 a->esz, a->q, a->rd, a->rn, a->rm, -1, 5940 gen_mulsub_i64, true) 5941 5942 TRANS(SMULL_vi, do_3op_widening, 5943 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 5944 tcg_gen_mul_i64, false) 5945 TRANS(UMULL_vi, do_3op_widening, 5946 a->esz, a->q, a->rd, a->rn, a->rm, a->idx, 5947 tcg_gen_mul_i64, false) 5948 TRANS(SMLAL_vi, do_3op_widening, 5949 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 5950 gen_muladd_i64, true) 5951 TRANS(UMLAL_vi, do_3op_widening, 5952 a->esz, a->q, a->rd, a->rn, a->rm, a->idx, 5953 gen_muladd_i64, true) 5954 TRANS(SMLSL_vi, do_3op_widening, 5955 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 5956 gen_mulsub_i64, true) 5957 TRANS(UMLSL_vi, do_3op_widening, 5958 a->esz, a->q, a->rd, a->rn, a->rm, a->idx, 5959 gen_mulsub_i64, true) 5960 5961 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5962 { 5963 TCGv_i64 t1 = tcg_temp_new_i64(); 5964 TCGv_i64 t2 = tcg_temp_new_i64(); 5965 5966 tcg_gen_sub_i64(t1, n, m); 5967 tcg_gen_sub_i64(t2, m, n); 5968 tcg_gen_movcond_i64(TCG_COND_GE, d, n, m, t1, t2); 5969 } 5970 5971 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5972 { 5973 TCGv_i64 t1 = tcg_temp_new_i64(); 5974 TCGv_i64 t2 = tcg_temp_new_i64(); 5975 5976 tcg_gen_sub_i64(t1, n, m); 5977 tcg_gen_sub_i64(t2, m, n); 5978 tcg_gen_movcond_i64(TCG_COND_GEU, d, n, m, t1, t2); 5979 } 5980 5981 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5982 { 5983 TCGv_i64 t = tcg_temp_new_i64(); 5984 gen_sabd_i64(t, n, m); 5985 tcg_gen_add_i64(d, d, t); 5986 } 5987 5988 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5989 { 5990 TCGv_i64 t = tcg_temp_new_i64(); 5991 gen_uabd_i64(t, n, m); 5992 tcg_gen_add_i64(d, d, t); 5993 } 5994 5995 TRANS(SADDL_v, do_3op_widening, 5996 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 5997 tcg_gen_add_i64, false) 5998 TRANS(UADDL_v, do_3op_widening, 5999 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6000 tcg_gen_add_i64, false) 6001 TRANS(SSUBL_v, do_3op_widening, 6002 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6003 tcg_gen_sub_i64, false) 6004 TRANS(USUBL_v, do_3op_widening, 6005 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6006 tcg_gen_sub_i64, false) 6007 TRANS(SABDL_v, do_3op_widening, 6008 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6009 gen_sabd_i64, false) 6010 TRANS(UABDL_v, do_3op_widening, 6011 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6012 gen_uabd_i64, false) 6013 TRANS(SABAL_v, do_3op_widening, 6014 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6015 gen_saba_i64, true) 6016 TRANS(UABAL_v, do_3op_widening, 6017 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6018 gen_uaba_i64, true) 6019 6020 static void gen_sqdmull_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6021 { 6022 tcg_gen_mul_i64(d, n, m); 6023 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, d); 6024 } 6025 6026 static void gen_sqdmull_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6027 { 6028 tcg_gen_mul_i64(d, n, m); 6029 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, d); 6030 } 6031 6032 static void gen_sqdmlal_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6033 { 6034 TCGv_i64 t = tcg_temp_new_i64(); 6035 6036 tcg_gen_mul_i64(t, n, m); 6037 gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t); 6038 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t); 6039 } 6040 6041 static void gen_sqdmlal_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6042 { 6043 TCGv_i64 t = tcg_temp_new_i64(); 6044 6045 tcg_gen_mul_i64(t, n, m); 6046 gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t); 6047 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t); 6048 } 6049 6050 static void gen_sqdmlsl_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6051 { 6052 TCGv_i64 t = tcg_temp_new_i64(); 6053 6054 tcg_gen_mul_i64(t, n, m); 6055 gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t); 6056 tcg_gen_neg_i64(t, t); 6057 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t); 6058 } 6059 6060 static void gen_sqdmlsl_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6061 { 6062 TCGv_i64 t = tcg_temp_new_i64(); 6063 6064 tcg_gen_mul_i64(t, n, m); 6065 gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t); 6066 tcg_gen_neg_i64(t, t); 6067 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t); 6068 } 6069 6070 TRANS(SQDMULL_v, do_3op_widening, 6071 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6072 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false) 6073 TRANS(SQDMLAL_v, do_3op_widening, 6074 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6075 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true) 6076 TRANS(SQDMLSL_v, do_3op_widening, 6077 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6078 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true) 6079 6080 TRANS(SQDMULL_vi, do_3op_widening, 6081 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6082 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false) 6083 TRANS(SQDMLAL_vi, do_3op_widening, 6084 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6085 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true) 6086 TRANS(SQDMLSL_vi, do_3op_widening, 6087 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6088 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true) 6089 6090 static bool do_addsub_wide(DisasContext *s, arg_qrrr_e *a, 6091 MemOp sign, bool sub) 6092 { 6093 TCGv_i64 tcg_op0, tcg_op1; 6094 MemOp esz = a->esz; 6095 int half = 8 >> esz; 6096 bool top = a->q; 6097 int top_swap = top ? 0 : half - 1; 6098 int top_half = top ? half : 0; 6099 6100 /* There are no 64x64->128 bit operations. */ 6101 if (esz >= MO_64) { 6102 return false; 6103 } 6104 if (!fp_access_check(s)) { 6105 return true; 6106 } 6107 tcg_op0 = tcg_temp_new_i64(); 6108 tcg_op1 = tcg_temp_new_i64(); 6109 6110 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) { 6111 int elt = elt_fwd ^ top_swap; 6112 6113 read_vec_element(s, tcg_op1, a->rm, elt + top_half, esz | sign); 6114 read_vec_element(s, tcg_op0, a->rn, elt, esz + 1); 6115 if (sub) { 6116 tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1); 6117 } else { 6118 tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1); 6119 } 6120 write_vec_element(s, tcg_op0, a->rd, elt, esz + 1); 6121 } 6122 clear_vec_high(s, 1, a->rd); 6123 return true; 6124 } 6125 6126 TRANS(SADDW, do_addsub_wide, a, MO_SIGN, false) 6127 TRANS(UADDW, do_addsub_wide, a, 0, false) 6128 TRANS(SSUBW, do_addsub_wide, a, MO_SIGN, true) 6129 TRANS(USUBW, do_addsub_wide, a, 0, true) 6130 6131 static bool do_addsub_highnarrow(DisasContext *s, arg_qrrr_e *a, 6132 bool sub, bool round) 6133 { 6134 TCGv_i64 tcg_op0, tcg_op1; 6135 MemOp esz = a->esz; 6136 int half = 8 >> esz; 6137 bool top = a->q; 6138 int ebits = 8 << esz; 6139 uint64_t rbit = 1ull << (ebits - 1); 6140 int top_swap, top_half; 6141 6142 /* There are no 128x128->64 bit operations. */ 6143 if (esz >= MO_64) { 6144 return false; 6145 } 6146 if (!fp_access_check(s)) { 6147 return true; 6148 } 6149 tcg_op0 = tcg_temp_new_i64(); 6150 tcg_op1 = tcg_temp_new_i64(); 6151 6152 /* 6153 * For top half inputs, iterate backward; forward for bottom half. 6154 * This means the store to the destination will not occur until 6155 * overlapping input inputs are consumed. 6156 */ 6157 top_swap = top ? half - 1 : 0; 6158 top_half = top ? half : 0; 6159 6160 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) { 6161 int elt = elt_fwd ^ top_swap; 6162 6163 read_vec_element(s, tcg_op1, a->rm, elt, esz + 1); 6164 read_vec_element(s, tcg_op0, a->rn, elt, esz + 1); 6165 if (sub) { 6166 tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1); 6167 } else { 6168 tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1); 6169 } 6170 if (round) { 6171 tcg_gen_addi_i64(tcg_op0, tcg_op0, rbit); 6172 } 6173 tcg_gen_shri_i64(tcg_op0, tcg_op0, ebits); 6174 write_vec_element(s, tcg_op0, a->rd, elt + top_half, esz); 6175 } 6176 clear_vec_high(s, top, a->rd); 6177 return true; 6178 } 6179 6180 TRANS(ADDHN, do_addsub_highnarrow, a, false, false) 6181 TRANS(SUBHN, do_addsub_highnarrow, a, true, false) 6182 TRANS(RADDHN, do_addsub_highnarrow, a, false, true) 6183 TRANS(RSUBHN, do_addsub_highnarrow, a, true, true) 6184 6185 static bool do_pmull(DisasContext *s, arg_qrrr_e *a, gen_helper_gvec_3 *fn) 6186 { 6187 if (fp_access_check(s)) { 6188 /* The Q field specifies lo/hi half input for these insns. */ 6189 gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->q, fn); 6190 } 6191 return true; 6192 } 6193 6194 TRANS(PMULL_p8, do_pmull, a, gen_helper_neon_pmull_h) 6195 TRANS_FEAT(PMULL_p64, aa64_pmull, do_pmull, a, gen_helper_gvec_pmull_q) 6196 6197 /* 6198 * Advanced SIMD scalar/vector x indexed element 6199 */ 6200 6201 static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f) 6202 { 6203 switch (a->esz) { 6204 case MO_64: 6205 if (fp_access_check(s)) { 6206 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 6207 TCGv_i64 t1 = tcg_temp_new_i64(); 6208 6209 read_vec_element(s, t1, a->rm, a->idx, MO_64); 6210 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); 6211 write_fp_dreg(s, a->rd, t0); 6212 } 6213 break; 6214 case MO_32: 6215 if (fp_access_check(s)) { 6216 TCGv_i32 t0 = read_fp_sreg(s, a->rn); 6217 TCGv_i32 t1 = tcg_temp_new_i32(); 6218 6219 read_vec_element_i32(s, t1, a->rm, a->idx, MO_32); 6220 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); 6221 write_fp_sreg(s, a->rd, t0); 6222 } 6223 break; 6224 case MO_16: 6225 if (!dc_isar_feature(aa64_fp16, s)) { 6226 return false; 6227 } 6228 if (fp_access_check(s)) { 6229 TCGv_i32 t0 = read_fp_hreg(s, a->rn); 6230 TCGv_i32 t1 = tcg_temp_new_i32(); 6231 6232 read_vec_element_i32(s, t1, a->rm, a->idx, MO_16); 6233 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); 6234 write_fp_sreg(s, a->rd, t0); 6235 } 6236 break; 6237 default: 6238 g_assert_not_reached(); 6239 } 6240 return true; 6241 } 6242 6243 TRANS(FMUL_si, do_fp3_scalar_idx, a, &f_scalar_fmul) 6244 TRANS(FMULX_si, do_fp3_scalar_idx, a, &f_scalar_fmulx) 6245 6246 static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) 6247 { 6248 switch (a->esz) { 6249 case MO_64: 6250 if (fp_access_check(s)) { 6251 TCGv_i64 t0 = read_fp_dreg(s, a->rd); 6252 TCGv_i64 t1 = read_fp_dreg(s, a->rn); 6253 TCGv_i64 t2 = tcg_temp_new_i64(); 6254 6255 read_vec_element(s, t2, a->rm, a->idx, MO_64); 6256 if (neg) { 6257 gen_vfp_negd(t1, t1); 6258 } 6259 gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); 6260 write_fp_dreg(s, a->rd, t0); 6261 } 6262 break; 6263 case MO_32: 6264 if (fp_access_check(s)) { 6265 TCGv_i32 t0 = read_fp_sreg(s, a->rd); 6266 TCGv_i32 t1 = read_fp_sreg(s, a->rn); 6267 TCGv_i32 t2 = tcg_temp_new_i32(); 6268 6269 read_vec_element_i32(s, t2, a->rm, a->idx, MO_32); 6270 if (neg) { 6271 gen_vfp_negs(t1, t1); 6272 } 6273 gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); 6274 write_fp_sreg(s, a->rd, t0); 6275 } 6276 break; 6277 case MO_16: 6278 if (!dc_isar_feature(aa64_fp16, s)) { 6279 return false; 6280 } 6281 if (fp_access_check(s)) { 6282 TCGv_i32 t0 = read_fp_hreg(s, a->rd); 6283 TCGv_i32 t1 = read_fp_hreg(s, a->rn); 6284 TCGv_i32 t2 = tcg_temp_new_i32(); 6285 6286 read_vec_element_i32(s, t2, a->rm, a->idx, MO_16); 6287 if (neg) { 6288 gen_vfp_negh(t1, t1); 6289 } 6290 gen_helper_advsimd_muladdh(t0, t1, t2, t0, 6291 fpstatus_ptr(FPST_A64_F16)); 6292 write_fp_sreg(s, a->rd, t0); 6293 } 6294 break; 6295 default: 6296 g_assert_not_reached(); 6297 } 6298 return true; 6299 } 6300 6301 TRANS(FMLA_si, do_fmla_scalar_idx, a, false) 6302 TRANS(FMLS_si, do_fmla_scalar_idx, a, true) 6303 6304 static bool do_env_scalar2_idx_hs(DisasContext *s, arg_rrx_e *a, 6305 const ENVScalar2 *f) 6306 { 6307 if (a->esz < MO_16 || a->esz > MO_32) { 6308 return false; 6309 } 6310 if (fp_access_check(s)) { 6311 TCGv_i32 t0 = tcg_temp_new_i32(); 6312 TCGv_i32 t1 = tcg_temp_new_i32(); 6313 6314 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 6315 read_vec_element_i32(s, t1, a->rm, a->idx, a->esz); 6316 f->gen_bhs[a->esz](t0, tcg_env, t0, t1); 6317 write_fp_sreg(s, a->rd, t0); 6318 } 6319 return true; 6320 } 6321 6322 TRANS(SQDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqdmulh) 6323 TRANS(SQRDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqrdmulh) 6324 6325 static bool do_env_scalar3_idx_hs(DisasContext *s, arg_rrx_e *a, 6326 const ENVScalar3 *f) 6327 { 6328 if (a->esz < MO_16 || a->esz > MO_32) { 6329 return false; 6330 } 6331 if (fp_access_check(s)) { 6332 TCGv_i32 t0 = tcg_temp_new_i32(); 6333 TCGv_i32 t1 = tcg_temp_new_i32(); 6334 TCGv_i32 t2 = tcg_temp_new_i32(); 6335 6336 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 6337 read_vec_element_i32(s, t1, a->rm, a->idx, a->esz); 6338 read_vec_element_i32(s, t2, a->rd, 0, a->esz); 6339 f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2); 6340 write_fp_sreg(s, a->rd, t0); 6341 } 6342 return true; 6343 } 6344 6345 TRANS_FEAT(SQRDMLAH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlah) 6346 TRANS_FEAT(SQRDMLSH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlsh) 6347 6348 static bool do_scalar_muladd_widening_idx(DisasContext *s, arg_rrx_e *a, 6349 NeonGenTwo64OpFn *fn, bool acc) 6350 { 6351 if (fp_access_check(s)) { 6352 TCGv_i64 t0 = tcg_temp_new_i64(); 6353 TCGv_i64 t1 = tcg_temp_new_i64(); 6354 TCGv_i64 t2 = tcg_temp_new_i64(); 6355 6356 if (acc) { 6357 read_vec_element(s, t0, a->rd, 0, a->esz + 1); 6358 } 6359 read_vec_element(s, t1, a->rn, 0, a->esz | MO_SIGN); 6360 read_vec_element(s, t2, a->rm, a->idx, a->esz | MO_SIGN); 6361 fn(t0, t1, t2); 6362 6363 /* Clear the whole register first, then store scalar. */ 6364 clear_vec(s, a->rd); 6365 write_vec_element(s, t0, a->rd, 0, a->esz + 1); 6366 } 6367 return true; 6368 } 6369 6370 TRANS(SQDMULL_si, do_scalar_muladd_widening_idx, a, 6371 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false) 6372 TRANS(SQDMLAL_si, do_scalar_muladd_widening_idx, a, 6373 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true) 6374 TRANS(SQDMLSL_si, do_scalar_muladd_widening_idx, a, 6375 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true) 6376 6377 static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a, 6378 gen_helper_gvec_3_ptr * const fns[3]) 6379 { 6380 MemOp esz = a->esz; 6381 int check = fp_access_check_vector_hsd(s, a->q, esz); 6382 6383 if (check <= 0) { 6384 return check == 0; 6385 } 6386 6387 gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, 6388 esz == MO_16, a->idx, fns[esz - 1]); 6389 return true; 6390 } 6391 6392 static gen_helper_gvec_3_ptr * const f_vector_idx_fmul[3] = { 6393 gen_helper_gvec_fmul_idx_h, 6394 gen_helper_gvec_fmul_idx_s, 6395 gen_helper_gvec_fmul_idx_d, 6396 }; 6397 TRANS(FMUL_vi, do_fp3_vector_idx, a, f_vector_idx_fmul) 6398 6399 static gen_helper_gvec_3_ptr * const f_vector_idx_fmulx[3] = { 6400 gen_helper_gvec_fmulx_idx_h, 6401 gen_helper_gvec_fmulx_idx_s, 6402 gen_helper_gvec_fmulx_idx_d, 6403 }; 6404 TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx) 6405 6406 static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg) 6407 { 6408 static gen_helper_gvec_4_ptr * const fns[3] = { 6409 gen_helper_gvec_fmla_idx_h, 6410 gen_helper_gvec_fmla_idx_s, 6411 gen_helper_gvec_fmla_idx_d, 6412 }; 6413 MemOp esz = a->esz; 6414 int check = fp_access_check_vector_hsd(s, a->q, esz); 6415 6416 if (check <= 0) { 6417 return check == 0; 6418 } 6419 6420 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, 6421 esz == MO_16, (a->idx << 1) | neg, 6422 fns[esz - 1]); 6423 return true; 6424 } 6425 6426 TRANS(FMLA_vi, do_fmla_vector_idx, a, false) 6427 TRANS(FMLS_vi, do_fmla_vector_idx, a, true) 6428 6429 static bool do_fmlal_idx(DisasContext *s, arg_qrrx_e *a, bool is_s, bool is_2) 6430 { 6431 if (fp_access_check(s)) { 6432 int data = (a->idx << 2) | (is_2 << 1) | is_s; 6433 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), 6434 vec_full_reg_offset(s, a->rn), 6435 vec_full_reg_offset(s, a->rm), tcg_env, 6436 a->q ? 16 : 8, vec_full_reg_size(s), 6437 data, gen_helper_gvec_fmlal_idx_a64); 6438 } 6439 return true; 6440 } 6441 6442 TRANS_FEAT(FMLAL_vi, aa64_fhm, do_fmlal_idx, a, false, false) 6443 TRANS_FEAT(FMLSL_vi, aa64_fhm, do_fmlal_idx, a, true, false) 6444 TRANS_FEAT(FMLAL2_vi, aa64_fhm, do_fmlal_idx, a, false, true) 6445 TRANS_FEAT(FMLSL2_vi, aa64_fhm, do_fmlal_idx, a, true, true) 6446 6447 static bool do_int3_vector_idx(DisasContext *s, arg_qrrx_e *a, 6448 gen_helper_gvec_3 * const fns[2]) 6449 { 6450 assert(a->esz == MO_16 || a->esz == MO_32); 6451 if (fp_access_check(s)) { 6452 gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, a->idx, fns[a->esz - 1]); 6453 } 6454 return true; 6455 } 6456 6457 static gen_helper_gvec_3 * const f_vector_idx_mul[2] = { 6458 gen_helper_gvec_mul_idx_h, 6459 gen_helper_gvec_mul_idx_s, 6460 }; 6461 TRANS(MUL_vi, do_int3_vector_idx, a, f_vector_idx_mul) 6462 6463 static bool do_mla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool sub) 6464 { 6465 static gen_helper_gvec_4 * const fns[2][2] = { 6466 { gen_helper_gvec_mla_idx_h, gen_helper_gvec_mls_idx_h }, 6467 { gen_helper_gvec_mla_idx_s, gen_helper_gvec_mls_idx_s }, 6468 }; 6469 6470 assert(a->esz == MO_16 || a->esz == MO_32); 6471 if (fp_access_check(s)) { 6472 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, 6473 a->idx, fns[a->esz - 1][sub]); 6474 } 6475 return true; 6476 } 6477 6478 TRANS(MLA_vi, do_mla_vector_idx, a, false) 6479 TRANS(MLS_vi, do_mla_vector_idx, a, true) 6480 6481 static bool do_int3_qc_vector_idx(DisasContext *s, arg_qrrx_e *a, 6482 gen_helper_gvec_4 * const fns[2]) 6483 { 6484 assert(a->esz == MO_16 || a->esz == MO_32); 6485 if (fp_access_check(s)) { 6486 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd), 6487 vec_full_reg_offset(s, a->rn), 6488 vec_full_reg_offset(s, a->rm), 6489 offsetof(CPUARMState, vfp.qc), 6490 a->q ? 16 : 8, vec_full_reg_size(s), 6491 a->idx, fns[a->esz - 1]); 6492 } 6493 return true; 6494 } 6495 6496 static gen_helper_gvec_4 * const f_vector_idx_sqdmulh[2] = { 6497 gen_helper_neon_sqdmulh_idx_h, 6498 gen_helper_neon_sqdmulh_idx_s, 6499 }; 6500 TRANS(SQDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqdmulh) 6501 6502 static gen_helper_gvec_4 * const f_vector_idx_sqrdmulh[2] = { 6503 gen_helper_neon_sqrdmulh_idx_h, 6504 gen_helper_neon_sqrdmulh_idx_s, 6505 }; 6506 TRANS(SQRDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqrdmulh) 6507 6508 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlah[2] = { 6509 gen_helper_neon_sqrdmlah_idx_h, 6510 gen_helper_neon_sqrdmlah_idx_s, 6511 }; 6512 TRANS_FEAT(SQRDMLAH_vi, aa64_rdm, do_int3_qc_vector_idx, a, 6513 f_vector_idx_sqrdmlah) 6514 6515 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlsh[2] = { 6516 gen_helper_neon_sqrdmlsh_idx_h, 6517 gen_helper_neon_sqrdmlsh_idx_s, 6518 }; 6519 TRANS_FEAT(SQRDMLSH_vi, aa64_rdm, do_int3_qc_vector_idx, a, 6520 f_vector_idx_sqrdmlsh) 6521 6522 static bool do_dot_vector_idx(DisasContext *s, arg_qrrx_e *a, 6523 gen_helper_gvec_4 *fn) 6524 { 6525 if (fp_access_check(s)) { 6526 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn); 6527 } 6528 return true; 6529 } 6530 6531 static bool do_dot_vector_idx_env(DisasContext *s, arg_qrrx_e *a, 6532 gen_helper_gvec_4_ptr *fn) 6533 { 6534 if (fp_access_check(s)) { 6535 gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn); 6536 } 6537 return true; 6538 } 6539 6540 TRANS_FEAT(SDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_sdot_idx_b) 6541 TRANS_FEAT(UDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_udot_idx_b) 6542 TRANS_FEAT(SUDOT_vi, aa64_i8mm, do_dot_vector_idx, a, 6543 gen_helper_gvec_sudot_idx_b) 6544 TRANS_FEAT(USDOT_vi, aa64_i8mm, do_dot_vector_idx, a, 6545 gen_helper_gvec_usdot_idx_b) 6546 TRANS_FEAT(BFDOT_vi, aa64_bf16, do_dot_vector_idx_env, a, 6547 gen_helper_gvec_bfdot_idx) 6548 6549 static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a) 6550 { 6551 if (!dc_isar_feature(aa64_bf16, s)) { 6552 return false; 6553 } 6554 if (fp_access_check(s)) { 6555 /* Q bit selects BFMLALB vs BFMLALT. */ 6556 gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, 0, 6557 (a->idx << 1) | a->q, 6558 gen_helper_gvec_bfmlal_idx); 6559 } 6560 return true; 6561 } 6562 6563 static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a) 6564 { 6565 gen_helper_gvec_4_ptr *fn; 6566 6567 if (!dc_isar_feature(aa64_fcma, s)) { 6568 return false; 6569 } 6570 switch (a->esz) { 6571 case MO_16: 6572 if (!dc_isar_feature(aa64_fp16, s)) { 6573 return false; 6574 } 6575 fn = gen_helper_gvec_fcmlah_idx; 6576 break; 6577 case MO_32: 6578 fn = gen_helper_gvec_fcmlas_idx; 6579 break; 6580 default: 6581 g_assert_not_reached(); 6582 } 6583 if (fp_access_check(s)) { 6584 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, 6585 a->esz == MO_16, (a->idx << 2) | a->rot, fn); 6586 } 6587 return true; 6588 } 6589 6590 /* 6591 * Advanced SIMD scalar pairwise 6592 */ 6593 6594 static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f) 6595 { 6596 switch (a->esz) { 6597 case MO_64: 6598 if (fp_access_check(s)) { 6599 TCGv_i64 t0 = tcg_temp_new_i64(); 6600 TCGv_i64 t1 = tcg_temp_new_i64(); 6601 6602 read_vec_element(s, t0, a->rn, 0, MO_64); 6603 read_vec_element(s, t1, a->rn, 1, MO_64); 6604 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); 6605 write_fp_dreg(s, a->rd, t0); 6606 } 6607 break; 6608 case MO_32: 6609 if (fp_access_check(s)) { 6610 TCGv_i32 t0 = tcg_temp_new_i32(); 6611 TCGv_i32 t1 = tcg_temp_new_i32(); 6612 6613 read_vec_element_i32(s, t0, a->rn, 0, MO_32); 6614 read_vec_element_i32(s, t1, a->rn, 1, MO_32); 6615 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); 6616 write_fp_sreg(s, a->rd, t0); 6617 } 6618 break; 6619 case MO_16: 6620 if (!dc_isar_feature(aa64_fp16, s)) { 6621 return false; 6622 } 6623 if (fp_access_check(s)) { 6624 TCGv_i32 t0 = tcg_temp_new_i32(); 6625 TCGv_i32 t1 = tcg_temp_new_i32(); 6626 6627 read_vec_element_i32(s, t0, a->rn, 0, MO_16); 6628 read_vec_element_i32(s, t1, a->rn, 1, MO_16); 6629 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); 6630 write_fp_sreg(s, a->rd, t0); 6631 } 6632 break; 6633 default: 6634 g_assert_not_reached(); 6635 } 6636 return true; 6637 } 6638 6639 TRANS(FADDP_s, do_fp3_scalar_pair, a, &f_scalar_fadd) 6640 TRANS(FMAXP_s, do_fp3_scalar_pair, a, &f_scalar_fmax) 6641 TRANS(FMINP_s, do_fp3_scalar_pair, a, &f_scalar_fmin) 6642 TRANS(FMAXNMP_s, do_fp3_scalar_pair, a, &f_scalar_fmaxnm) 6643 TRANS(FMINNMP_s, do_fp3_scalar_pair, a, &f_scalar_fminnm) 6644 6645 static bool trans_ADDP_s(DisasContext *s, arg_rr_e *a) 6646 { 6647 if (fp_access_check(s)) { 6648 TCGv_i64 t0 = tcg_temp_new_i64(); 6649 TCGv_i64 t1 = tcg_temp_new_i64(); 6650 6651 read_vec_element(s, t0, a->rn, 0, MO_64); 6652 read_vec_element(s, t1, a->rn, 1, MO_64); 6653 tcg_gen_add_i64(t0, t0, t1); 6654 write_fp_dreg(s, a->rd, t0); 6655 } 6656 return true; 6657 } 6658 6659 /* 6660 * Floating-point conditional select 6661 */ 6662 6663 static bool trans_FCSEL(DisasContext *s, arg_FCSEL *a) 6664 { 6665 TCGv_i64 t_true, t_false; 6666 DisasCompare64 c; 6667 int check = fp_access_check_scalar_hsd(s, a->esz); 6668 6669 if (check <= 0) { 6670 return check == 0; 6671 } 6672 6673 /* Zero extend sreg & hreg inputs to 64 bits now. */ 6674 t_true = tcg_temp_new_i64(); 6675 t_false = tcg_temp_new_i64(); 6676 read_vec_element(s, t_true, a->rn, 0, a->esz); 6677 read_vec_element(s, t_false, a->rm, 0, a->esz); 6678 6679 a64_test_cc(&c, a->cond); 6680 tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0), 6681 t_true, t_false); 6682 6683 /* 6684 * Note that sregs & hregs write back zeros to the high bits, 6685 * and we've already done the zero-extension. 6686 */ 6687 write_fp_dreg(s, a->rd, t_true); 6688 return true; 6689 } 6690 6691 /* 6692 * Advanced SIMD Extract 6693 */ 6694 6695 static bool trans_EXT_d(DisasContext *s, arg_EXT_d *a) 6696 { 6697 if (fp_access_check(s)) { 6698 TCGv_i64 lo = read_fp_dreg(s, a->rn); 6699 if (a->imm != 0) { 6700 TCGv_i64 hi = read_fp_dreg(s, a->rm); 6701 tcg_gen_extract2_i64(lo, lo, hi, a->imm * 8); 6702 } 6703 write_fp_dreg(s, a->rd, lo); 6704 } 6705 return true; 6706 } 6707 6708 static bool trans_EXT_q(DisasContext *s, arg_EXT_q *a) 6709 { 6710 TCGv_i64 lo, hi; 6711 int pos = (a->imm & 7) * 8; 6712 int elt = a->imm >> 3; 6713 6714 if (!fp_access_check(s)) { 6715 return true; 6716 } 6717 6718 lo = tcg_temp_new_i64(); 6719 hi = tcg_temp_new_i64(); 6720 6721 read_vec_element(s, lo, a->rn, elt, MO_64); 6722 elt++; 6723 read_vec_element(s, hi, elt & 2 ? a->rm : a->rn, elt & 1, MO_64); 6724 elt++; 6725 6726 if (pos != 0) { 6727 TCGv_i64 hh = tcg_temp_new_i64(); 6728 tcg_gen_extract2_i64(lo, lo, hi, pos); 6729 read_vec_element(s, hh, a->rm, elt & 1, MO_64); 6730 tcg_gen_extract2_i64(hi, hi, hh, pos); 6731 } 6732 6733 write_vec_element(s, lo, a->rd, 0, MO_64); 6734 write_vec_element(s, hi, a->rd, 1, MO_64); 6735 clear_vec_high(s, true, a->rd); 6736 return true; 6737 } 6738 6739 /* 6740 * Floating-point data-processing (3 source) 6741 */ 6742 6743 static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) 6744 { 6745 TCGv_ptr fpst; 6746 6747 /* 6748 * These are fused multiply-add. Note that doing the negations here 6749 * as separate steps is correct: an input NaN should come out with 6750 * its sign bit flipped if it is a negated-input. 6751 */ 6752 switch (a->esz) { 6753 case MO_64: 6754 if (fp_access_check(s)) { 6755 TCGv_i64 tn = read_fp_dreg(s, a->rn); 6756 TCGv_i64 tm = read_fp_dreg(s, a->rm); 6757 TCGv_i64 ta = read_fp_dreg(s, a->ra); 6758 6759 if (neg_a) { 6760 gen_vfp_negd(ta, ta); 6761 } 6762 if (neg_n) { 6763 gen_vfp_negd(tn, tn); 6764 } 6765 fpst = fpstatus_ptr(FPST_A64); 6766 gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst); 6767 write_fp_dreg(s, a->rd, ta); 6768 } 6769 break; 6770 6771 case MO_32: 6772 if (fp_access_check(s)) { 6773 TCGv_i32 tn = read_fp_sreg(s, a->rn); 6774 TCGv_i32 tm = read_fp_sreg(s, a->rm); 6775 TCGv_i32 ta = read_fp_sreg(s, a->ra); 6776 6777 if (neg_a) { 6778 gen_vfp_negs(ta, ta); 6779 } 6780 if (neg_n) { 6781 gen_vfp_negs(tn, tn); 6782 } 6783 fpst = fpstatus_ptr(FPST_A64); 6784 gen_helper_vfp_muladds(ta, tn, tm, ta, fpst); 6785 write_fp_sreg(s, a->rd, ta); 6786 } 6787 break; 6788 6789 case MO_16: 6790 if (!dc_isar_feature(aa64_fp16, s)) { 6791 return false; 6792 } 6793 if (fp_access_check(s)) { 6794 TCGv_i32 tn = read_fp_hreg(s, a->rn); 6795 TCGv_i32 tm = read_fp_hreg(s, a->rm); 6796 TCGv_i32 ta = read_fp_hreg(s, a->ra); 6797 6798 if (neg_a) { 6799 gen_vfp_negh(ta, ta); 6800 } 6801 if (neg_n) { 6802 gen_vfp_negh(tn, tn); 6803 } 6804 fpst = fpstatus_ptr(FPST_A64_F16); 6805 gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst); 6806 write_fp_sreg(s, a->rd, ta); 6807 } 6808 break; 6809 6810 default: 6811 return false; 6812 } 6813 return true; 6814 } 6815 6816 TRANS(FMADD, do_fmadd, a, false, false) 6817 TRANS(FNMADD, do_fmadd, a, true, true) 6818 TRANS(FMSUB, do_fmadd, a, false, true) 6819 TRANS(FNMSUB, do_fmadd, a, true, false) 6820 6821 /* 6822 * Advanced SIMD Across Lanes 6823 */ 6824 6825 static bool do_int_reduction(DisasContext *s, arg_qrr_e *a, bool widen, 6826 MemOp src_sign, NeonGenTwo64OpFn *fn) 6827 { 6828 TCGv_i64 tcg_res, tcg_elt; 6829 MemOp src_mop = a->esz | src_sign; 6830 int elements = (a->q ? 16 : 8) >> a->esz; 6831 6832 /* Reject MO_64, and MO_32 without Q: a minimum of 4 elements. */ 6833 if (elements < 4) { 6834 return false; 6835 } 6836 if (!fp_access_check(s)) { 6837 return true; 6838 } 6839 6840 tcg_res = tcg_temp_new_i64(); 6841 tcg_elt = tcg_temp_new_i64(); 6842 6843 read_vec_element(s, tcg_res, a->rn, 0, src_mop); 6844 for (int i = 1; i < elements; i++) { 6845 read_vec_element(s, tcg_elt, a->rn, i, src_mop); 6846 fn(tcg_res, tcg_res, tcg_elt); 6847 } 6848 6849 tcg_gen_ext_i64(tcg_res, tcg_res, a->esz + widen); 6850 write_fp_dreg(s, a->rd, tcg_res); 6851 return true; 6852 } 6853 6854 TRANS(ADDV, do_int_reduction, a, false, 0, tcg_gen_add_i64) 6855 TRANS(SADDLV, do_int_reduction, a, true, MO_SIGN, tcg_gen_add_i64) 6856 TRANS(UADDLV, do_int_reduction, a, true, 0, tcg_gen_add_i64) 6857 TRANS(SMAXV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smax_i64) 6858 TRANS(UMAXV, do_int_reduction, a, false, 0, tcg_gen_umax_i64) 6859 TRANS(SMINV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smin_i64) 6860 TRANS(UMINV, do_int_reduction, a, false, 0, tcg_gen_umin_i64) 6861 6862 /* 6863 * do_fp_reduction helper 6864 * 6865 * This mirrors the Reduce() pseudocode in the ARM ARM. It is 6866 * important for correct NaN propagation that we do these 6867 * operations in exactly the order specified by the pseudocode. 6868 * 6869 * This is a recursive function. 6870 */ 6871 static TCGv_i32 do_reduction_op(DisasContext *s, int rn, MemOp esz, 6872 int ebase, int ecount, TCGv_ptr fpst, 6873 NeonGenTwoSingleOpFn *fn) 6874 { 6875 if (ecount == 1) { 6876 TCGv_i32 tcg_elem = tcg_temp_new_i32(); 6877 read_vec_element_i32(s, tcg_elem, rn, ebase, esz); 6878 return tcg_elem; 6879 } else { 6880 int half = ecount >> 1; 6881 TCGv_i32 tcg_hi, tcg_lo, tcg_res; 6882 6883 tcg_hi = do_reduction_op(s, rn, esz, ebase + half, half, fpst, fn); 6884 tcg_lo = do_reduction_op(s, rn, esz, ebase, half, fpst, fn); 6885 tcg_res = tcg_temp_new_i32(); 6886 6887 fn(tcg_res, tcg_lo, tcg_hi, fpst); 6888 return tcg_res; 6889 } 6890 } 6891 6892 static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a, 6893 NeonGenTwoSingleOpFn *fn) 6894 { 6895 if (fp_access_check(s)) { 6896 MemOp esz = a->esz; 6897 int elts = (a->q ? 16 : 8) >> esz; 6898 TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); 6899 TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst, fn); 6900 write_fp_sreg(s, a->rd, res); 6901 } 6902 return true; 6903 } 6904 6905 TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_maxnumh) 6906 TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_minnumh) 6907 TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_maxh) 6908 TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_minh) 6909 6910 TRANS(FMAXNMV_s, do_fp_reduction, a, gen_helper_vfp_maxnums) 6911 TRANS(FMINNMV_s, do_fp_reduction, a, gen_helper_vfp_minnums) 6912 TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs) 6913 TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins) 6914 6915 /* 6916 * Floating-point Immediate 6917 */ 6918 6919 static bool trans_FMOVI_s(DisasContext *s, arg_FMOVI_s *a) 6920 { 6921 int check = fp_access_check_scalar_hsd(s, a->esz); 6922 uint64_t imm; 6923 6924 if (check <= 0) { 6925 return check == 0; 6926 } 6927 6928 imm = vfp_expand_imm(a->esz, a->imm); 6929 write_fp_dreg(s, a->rd, tcg_constant_i64(imm)); 6930 return true; 6931 } 6932 6933 /* 6934 * Floating point compare, conditional compare 6935 */ 6936 6937 static void handle_fp_compare(DisasContext *s, int size, 6938 unsigned int rn, unsigned int rm, 6939 bool cmp_with_zero, bool signal_all_nans) 6940 { 6941 TCGv_i64 tcg_flags = tcg_temp_new_i64(); 6942 TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_A64_F16 : FPST_A64); 6943 6944 if (size == MO_64) { 6945 TCGv_i64 tcg_vn, tcg_vm; 6946 6947 tcg_vn = read_fp_dreg(s, rn); 6948 if (cmp_with_zero) { 6949 tcg_vm = tcg_constant_i64(0); 6950 } else { 6951 tcg_vm = read_fp_dreg(s, rm); 6952 } 6953 if (signal_all_nans) { 6954 gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 6955 } else { 6956 gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 6957 } 6958 } else { 6959 TCGv_i32 tcg_vn = tcg_temp_new_i32(); 6960 TCGv_i32 tcg_vm = tcg_temp_new_i32(); 6961 6962 read_vec_element_i32(s, tcg_vn, rn, 0, size); 6963 if (cmp_with_zero) { 6964 tcg_gen_movi_i32(tcg_vm, 0); 6965 } else { 6966 read_vec_element_i32(s, tcg_vm, rm, 0, size); 6967 } 6968 6969 switch (size) { 6970 case MO_32: 6971 if (signal_all_nans) { 6972 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 6973 } else { 6974 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 6975 } 6976 break; 6977 case MO_16: 6978 if (signal_all_nans) { 6979 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 6980 } else { 6981 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 6982 } 6983 break; 6984 default: 6985 g_assert_not_reached(); 6986 } 6987 } 6988 6989 gen_set_nzcv(tcg_flags); 6990 } 6991 6992 /* FCMP, FCMPE */ 6993 static bool trans_FCMP(DisasContext *s, arg_FCMP *a) 6994 { 6995 int check = fp_access_check_scalar_hsd(s, a->esz); 6996 6997 if (check <= 0) { 6998 return check == 0; 6999 } 7000 7001 handle_fp_compare(s, a->esz, a->rn, a->rm, a->z, a->e); 7002 return true; 7003 } 7004 7005 /* FCCMP, FCCMPE */ 7006 static bool trans_FCCMP(DisasContext *s, arg_FCCMP *a) 7007 { 7008 TCGLabel *label_continue = NULL; 7009 int check = fp_access_check_scalar_hsd(s, a->esz); 7010 7011 if (check <= 0) { 7012 return check == 0; 7013 } 7014 7015 if (a->cond < 0x0e) { /* not always */ 7016 TCGLabel *label_match = gen_new_label(); 7017 label_continue = gen_new_label(); 7018 arm_gen_test_cc(a->cond, label_match); 7019 /* nomatch: */ 7020 gen_set_nzcv(tcg_constant_i64(a->nzcv << 28)); 7021 tcg_gen_br(label_continue); 7022 gen_set_label(label_match); 7023 } 7024 7025 handle_fp_compare(s, a->esz, a->rn, a->rm, false, a->e); 7026 7027 if (label_continue) { 7028 gen_set_label(label_continue); 7029 } 7030 return true; 7031 } 7032 7033 /* 7034 * Advanced SIMD Modified Immediate 7035 */ 7036 7037 static bool trans_FMOVI_v_h(DisasContext *s, arg_FMOVI_v_h *a) 7038 { 7039 if (!dc_isar_feature(aa64_fp16, s)) { 7040 return false; 7041 } 7042 if (fp_access_check(s)) { 7043 tcg_gen_gvec_dup_imm(MO_16, vec_full_reg_offset(s, a->rd), 7044 a->q ? 16 : 8, vec_full_reg_size(s), 7045 vfp_expand_imm(MO_16, a->abcdefgh)); 7046 } 7047 return true; 7048 } 7049 7050 static void gen_movi(unsigned vece, uint32_t dofs, uint32_t aofs, 7051 int64_t c, uint32_t oprsz, uint32_t maxsz) 7052 { 7053 tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c); 7054 } 7055 7056 static bool trans_Vimm(DisasContext *s, arg_Vimm *a) 7057 { 7058 GVecGen2iFn *fn; 7059 7060 /* Handle decode of cmode/op here between ORR/BIC/MOVI */ 7061 if ((a->cmode & 1) && a->cmode < 12) { 7062 /* For op=1, the imm will be inverted, so BIC becomes AND. */ 7063 fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori; 7064 } else { 7065 /* There is one unallocated cmode/op combination in this space */ 7066 if (a->cmode == 15 && a->op == 1 && a->q == 0) { 7067 return false; 7068 } 7069 fn = gen_movi; 7070 } 7071 7072 if (fp_access_check(s)) { 7073 uint64_t imm = asimd_imm_const(a->abcdefgh, a->cmode, a->op); 7074 gen_gvec_fn2i(s, a->q, a->rd, a->rd, imm, fn, MO_64); 7075 } 7076 return true; 7077 } 7078 7079 /* 7080 * Advanced SIMD Shift by Immediate 7081 */ 7082 7083 static bool do_vec_shift_imm(DisasContext *s, arg_qrri_e *a, GVecGen2iFn *fn) 7084 { 7085 if (fp_access_check(s)) { 7086 gen_gvec_fn2i(s, a->q, a->rd, a->rn, a->imm, fn, a->esz); 7087 } 7088 return true; 7089 } 7090 7091 TRANS(SSHR_v, do_vec_shift_imm, a, gen_gvec_sshr) 7092 TRANS(USHR_v, do_vec_shift_imm, a, gen_gvec_ushr) 7093 TRANS(SSRA_v, do_vec_shift_imm, a, gen_gvec_ssra) 7094 TRANS(USRA_v, do_vec_shift_imm, a, gen_gvec_usra) 7095 TRANS(SRSHR_v, do_vec_shift_imm, a, gen_gvec_srshr) 7096 TRANS(URSHR_v, do_vec_shift_imm, a, gen_gvec_urshr) 7097 TRANS(SRSRA_v, do_vec_shift_imm, a, gen_gvec_srsra) 7098 TRANS(URSRA_v, do_vec_shift_imm, a, gen_gvec_ursra) 7099 TRANS(SRI_v, do_vec_shift_imm, a, gen_gvec_sri) 7100 TRANS(SHL_v, do_vec_shift_imm, a, tcg_gen_gvec_shli) 7101 TRANS(SLI_v, do_vec_shift_imm, a, gen_gvec_sli); 7102 TRANS(SQSHL_vi, do_vec_shift_imm, a, gen_neon_sqshli) 7103 TRANS(UQSHL_vi, do_vec_shift_imm, a, gen_neon_uqshli) 7104 TRANS(SQSHLU_vi, do_vec_shift_imm, a, gen_neon_sqshlui) 7105 7106 static bool do_vec_shift_imm_wide(DisasContext *s, arg_qrri_e *a, bool is_u) 7107 { 7108 TCGv_i64 tcg_rn, tcg_rd; 7109 int esz = a->esz; 7110 int esize; 7111 7112 if (!fp_access_check(s)) { 7113 return true; 7114 } 7115 7116 /* 7117 * For the LL variants the store is larger than the load, 7118 * so if rd == rn we would overwrite parts of our input. 7119 * So load everything right now and use shifts in the main loop. 7120 */ 7121 tcg_rd = tcg_temp_new_i64(); 7122 tcg_rn = tcg_temp_new_i64(); 7123 read_vec_element(s, tcg_rn, a->rn, a->q, MO_64); 7124 7125 esize = 8 << esz; 7126 for (int i = 0, elements = 8 >> esz; i < elements; i++) { 7127 if (is_u) { 7128 tcg_gen_extract_i64(tcg_rd, tcg_rn, i * esize, esize); 7129 } else { 7130 tcg_gen_sextract_i64(tcg_rd, tcg_rn, i * esize, esize); 7131 } 7132 tcg_gen_shli_i64(tcg_rd, tcg_rd, a->imm); 7133 write_vec_element(s, tcg_rd, a->rd, i, esz + 1); 7134 } 7135 clear_vec_high(s, true, a->rd); 7136 return true; 7137 } 7138 7139 TRANS(SSHLL_v, do_vec_shift_imm_wide, a, false) 7140 TRANS(USHLL_v, do_vec_shift_imm_wide, a, true) 7141 7142 static void gen_sshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7143 { 7144 assert(shift >= 0 && shift <= 64); 7145 tcg_gen_sari_i64(dst, src, MIN(shift, 63)); 7146 } 7147 7148 static void gen_ushr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7149 { 7150 assert(shift >= 0 && shift <= 64); 7151 if (shift == 64) { 7152 tcg_gen_movi_i64(dst, 0); 7153 } else { 7154 tcg_gen_shri_i64(dst, src, shift); 7155 } 7156 } 7157 7158 static void gen_ssra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7159 { 7160 gen_sshr_d(src, src, shift); 7161 tcg_gen_add_i64(dst, dst, src); 7162 } 7163 7164 static void gen_usra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7165 { 7166 gen_ushr_d(src, src, shift); 7167 tcg_gen_add_i64(dst, dst, src); 7168 } 7169 7170 static void gen_srshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7171 { 7172 assert(shift >= 0 && shift <= 32); 7173 if (shift) { 7174 TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1)); 7175 tcg_gen_add_i64(dst, src, rnd); 7176 tcg_gen_sari_i64(dst, dst, shift); 7177 } else { 7178 tcg_gen_mov_i64(dst, src); 7179 } 7180 } 7181 7182 static void gen_urshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7183 { 7184 assert(shift >= 0 && shift <= 32); 7185 if (shift) { 7186 TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1)); 7187 tcg_gen_add_i64(dst, src, rnd); 7188 tcg_gen_shri_i64(dst, dst, shift); 7189 } else { 7190 tcg_gen_mov_i64(dst, src); 7191 } 7192 } 7193 7194 static void gen_srshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7195 { 7196 assert(shift >= 0 && shift <= 64); 7197 if (shift == 0) { 7198 tcg_gen_mov_i64(dst, src); 7199 } else if (shift == 64) { 7200 /* Extension of sign bit (0,-1) plus sign bit (0,1) is zero. */ 7201 tcg_gen_movi_i64(dst, 0); 7202 } else { 7203 TCGv_i64 rnd = tcg_temp_new_i64(); 7204 tcg_gen_extract_i64(rnd, src, shift - 1, 1); 7205 tcg_gen_sari_i64(dst, src, shift); 7206 tcg_gen_add_i64(dst, dst, rnd); 7207 } 7208 } 7209 7210 static void gen_urshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7211 { 7212 assert(shift >= 0 && shift <= 64); 7213 if (shift == 0) { 7214 tcg_gen_mov_i64(dst, src); 7215 } else if (shift == 64) { 7216 /* Rounding will propagate bit 63 into bit 64. */ 7217 tcg_gen_shri_i64(dst, src, 63); 7218 } else { 7219 TCGv_i64 rnd = tcg_temp_new_i64(); 7220 tcg_gen_extract_i64(rnd, src, shift - 1, 1); 7221 tcg_gen_shri_i64(dst, src, shift); 7222 tcg_gen_add_i64(dst, dst, rnd); 7223 } 7224 } 7225 7226 static void gen_srsra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7227 { 7228 gen_srshr_d(src, src, shift); 7229 tcg_gen_add_i64(dst, dst, src); 7230 } 7231 7232 static void gen_ursra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7233 { 7234 gen_urshr_d(src, src, shift); 7235 tcg_gen_add_i64(dst, dst, src); 7236 } 7237 7238 static void gen_sri_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7239 { 7240 /* If shift is 64, dst is unchanged. */ 7241 if (shift != 64) { 7242 tcg_gen_shri_i64(src, src, shift); 7243 tcg_gen_deposit_i64(dst, dst, src, 0, 64 - shift); 7244 } 7245 } 7246 7247 static void gen_sli_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7248 { 7249 tcg_gen_deposit_i64(dst, dst, src, shift, 64 - shift); 7250 } 7251 7252 static bool do_vec_shift_imm_narrow(DisasContext *s, arg_qrri_e *a, 7253 WideShiftImmFn * const fns[3], MemOp sign) 7254 { 7255 TCGv_i64 tcg_rn, tcg_rd; 7256 int esz = a->esz; 7257 int esize; 7258 WideShiftImmFn *fn; 7259 7260 tcg_debug_assert(esz >= MO_8 && esz <= MO_32); 7261 7262 if (!fp_access_check(s)) { 7263 return true; 7264 } 7265 7266 tcg_rn = tcg_temp_new_i64(); 7267 tcg_rd = tcg_temp_new_i64(); 7268 tcg_gen_movi_i64(tcg_rd, 0); 7269 7270 fn = fns[esz]; 7271 esize = 8 << esz; 7272 for (int i = 0, elements = 8 >> esz; i < elements; i++) { 7273 read_vec_element(s, tcg_rn, a->rn, i, (esz + 1) | sign); 7274 fn(tcg_rn, tcg_rn, a->imm); 7275 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, esize * i, esize); 7276 } 7277 7278 write_vec_element(s, tcg_rd, a->rd, a->q, MO_64); 7279 clear_vec_high(s, a->q, a->rd); 7280 return true; 7281 } 7282 7283 static void gen_sqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7284 { 7285 tcg_gen_sari_i64(d, s, i); 7286 tcg_gen_ext16u_i64(d, d); 7287 gen_helper_neon_narrow_sat_s8(d, tcg_env, d); 7288 } 7289 7290 static void gen_sqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7291 { 7292 tcg_gen_sari_i64(d, s, i); 7293 tcg_gen_ext32u_i64(d, d); 7294 gen_helper_neon_narrow_sat_s16(d, tcg_env, d); 7295 } 7296 7297 static void gen_sqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7298 { 7299 gen_sshr_d(d, s, i); 7300 gen_helper_neon_narrow_sat_s32(d, tcg_env, d); 7301 } 7302 7303 static void gen_uqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7304 { 7305 tcg_gen_shri_i64(d, s, i); 7306 gen_helper_neon_narrow_sat_u8(d, tcg_env, d); 7307 } 7308 7309 static void gen_uqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7310 { 7311 tcg_gen_shri_i64(d, s, i); 7312 gen_helper_neon_narrow_sat_u16(d, tcg_env, d); 7313 } 7314 7315 static void gen_uqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7316 { 7317 gen_ushr_d(d, s, i); 7318 gen_helper_neon_narrow_sat_u32(d, tcg_env, d); 7319 } 7320 7321 static void gen_sqshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7322 { 7323 tcg_gen_sari_i64(d, s, i); 7324 tcg_gen_ext16u_i64(d, d); 7325 gen_helper_neon_unarrow_sat8(d, tcg_env, d); 7326 } 7327 7328 static void gen_sqshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7329 { 7330 tcg_gen_sari_i64(d, s, i); 7331 tcg_gen_ext32u_i64(d, d); 7332 gen_helper_neon_unarrow_sat16(d, tcg_env, d); 7333 } 7334 7335 static void gen_sqshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7336 { 7337 gen_sshr_d(d, s, i); 7338 gen_helper_neon_unarrow_sat32(d, tcg_env, d); 7339 } 7340 7341 static void gen_sqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7342 { 7343 gen_srshr_bhs(d, s, i); 7344 tcg_gen_ext16u_i64(d, d); 7345 gen_helper_neon_narrow_sat_s8(d, tcg_env, d); 7346 } 7347 7348 static void gen_sqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7349 { 7350 gen_srshr_bhs(d, s, i); 7351 tcg_gen_ext32u_i64(d, d); 7352 gen_helper_neon_narrow_sat_s16(d, tcg_env, d); 7353 } 7354 7355 static void gen_sqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7356 { 7357 gen_srshr_d(d, s, i); 7358 gen_helper_neon_narrow_sat_s32(d, tcg_env, d); 7359 } 7360 7361 static void gen_uqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7362 { 7363 gen_urshr_bhs(d, s, i); 7364 gen_helper_neon_narrow_sat_u8(d, tcg_env, d); 7365 } 7366 7367 static void gen_uqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7368 { 7369 gen_urshr_bhs(d, s, i); 7370 gen_helper_neon_narrow_sat_u16(d, tcg_env, d); 7371 } 7372 7373 static void gen_uqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7374 { 7375 gen_urshr_d(d, s, i); 7376 gen_helper_neon_narrow_sat_u32(d, tcg_env, d); 7377 } 7378 7379 static void gen_sqrshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7380 { 7381 gen_srshr_bhs(d, s, i); 7382 tcg_gen_ext16u_i64(d, d); 7383 gen_helper_neon_unarrow_sat8(d, tcg_env, d); 7384 } 7385 7386 static void gen_sqrshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7387 { 7388 gen_srshr_bhs(d, s, i); 7389 tcg_gen_ext32u_i64(d, d); 7390 gen_helper_neon_unarrow_sat16(d, tcg_env, d); 7391 } 7392 7393 static void gen_sqrshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7394 { 7395 gen_srshr_d(d, s, i); 7396 gen_helper_neon_unarrow_sat32(d, tcg_env, d); 7397 } 7398 7399 static WideShiftImmFn * const shrn_fns[] = { 7400 tcg_gen_shri_i64, 7401 tcg_gen_shri_i64, 7402 gen_ushr_d, 7403 }; 7404 TRANS(SHRN_v, do_vec_shift_imm_narrow, a, shrn_fns, 0) 7405 7406 static WideShiftImmFn * const rshrn_fns[] = { 7407 gen_urshr_bhs, 7408 gen_urshr_bhs, 7409 gen_urshr_d, 7410 }; 7411 TRANS(RSHRN_v, do_vec_shift_imm_narrow, a, rshrn_fns, 0) 7412 7413 static WideShiftImmFn * const sqshrn_fns[] = { 7414 gen_sqshrn_b, 7415 gen_sqshrn_h, 7416 gen_sqshrn_s, 7417 }; 7418 TRANS(SQSHRN_v, do_vec_shift_imm_narrow, a, sqshrn_fns, MO_SIGN) 7419 7420 static WideShiftImmFn * const uqshrn_fns[] = { 7421 gen_uqshrn_b, 7422 gen_uqshrn_h, 7423 gen_uqshrn_s, 7424 }; 7425 TRANS(UQSHRN_v, do_vec_shift_imm_narrow, a, uqshrn_fns, 0) 7426 7427 static WideShiftImmFn * const sqshrun_fns[] = { 7428 gen_sqshrun_b, 7429 gen_sqshrun_h, 7430 gen_sqshrun_s, 7431 }; 7432 TRANS(SQSHRUN_v, do_vec_shift_imm_narrow, a, sqshrun_fns, MO_SIGN) 7433 7434 static WideShiftImmFn * const sqrshrn_fns[] = { 7435 gen_sqrshrn_b, 7436 gen_sqrshrn_h, 7437 gen_sqrshrn_s, 7438 }; 7439 TRANS(SQRSHRN_v, do_vec_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN) 7440 7441 static WideShiftImmFn * const uqrshrn_fns[] = { 7442 gen_uqrshrn_b, 7443 gen_uqrshrn_h, 7444 gen_uqrshrn_s, 7445 }; 7446 TRANS(UQRSHRN_v, do_vec_shift_imm_narrow, a, uqrshrn_fns, 0) 7447 7448 static WideShiftImmFn * const sqrshrun_fns[] = { 7449 gen_sqrshrun_b, 7450 gen_sqrshrun_h, 7451 gen_sqrshrun_s, 7452 }; 7453 TRANS(SQRSHRUN_v, do_vec_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN) 7454 7455 /* 7456 * Advanced SIMD Scalar Shift by Immediate 7457 */ 7458 7459 static bool do_scalar_shift_imm(DisasContext *s, arg_rri_e *a, 7460 WideShiftImmFn *fn, bool accumulate, 7461 MemOp sign) 7462 { 7463 if (fp_access_check(s)) { 7464 TCGv_i64 rd = tcg_temp_new_i64(); 7465 TCGv_i64 rn = tcg_temp_new_i64(); 7466 7467 read_vec_element(s, rn, a->rn, 0, a->esz | sign); 7468 if (accumulate) { 7469 read_vec_element(s, rd, a->rd, 0, a->esz | sign); 7470 } 7471 fn(rd, rn, a->imm); 7472 write_fp_dreg(s, a->rd, rd); 7473 } 7474 return true; 7475 } 7476 7477 TRANS(SSHR_s, do_scalar_shift_imm, a, gen_sshr_d, false, 0) 7478 TRANS(USHR_s, do_scalar_shift_imm, a, gen_ushr_d, false, 0) 7479 TRANS(SSRA_s, do_scalar_shift_imm, a, gen_ssra_d, true, 0) 7480 TRANS(USRA_s, do_scalar_shift_imm, a, gen_usra_d, true, 0) 7481 TRANS(SRSHR_s, do_scalar_shift_imm, a, gen_srshr_d, false, 0) 7482 TRANS(URSHR_s, do_scalar_shift_imm, a, gen_urshr_d, false, 0) 7483 TRANS(SRSRA_s, do_scalar_shift_imm, a, gen_srsra_d, true, 0) 7484 TRANS(URSRA_s, do_scalar_shift_imm, a, gen_ursra_d, true, 0) 7485 TRANS(SRI_s, do_scalar_shift_imm, a, gen_sri_d, true, 0) 7486 7487 TRANS(SHL_s, do_scalar_shift_imm, a, tcg_gen_shli_i64, false, 0) 7488 TRANS(SLI_s, do_scalar_shift_imm, a, gen_sli_d, true, 0) 7489 7490 static void trunc_i64_env_imm(TCGv_i64 d, TCGv_i64 s, int64_t i, 7491 NeonGenTwoOpEnvFn *fn) 7492 { 7493 TCGv_i32 t = tcg_temp_new_i32(); 7494 tcg_gen_extrl_i64_i32(t, s); 7495 fn(t, tcg_env, t, tcg_constant_i32(i)); 7496 tcg_gen_extu_i32_i64(d, t); 7497 } 7498 7499 static void gen_sqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7500 { 7501 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s8); 7502 } 7503 7504 static void gen_sqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7505 { 7506 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s16); 7507 } 7508 7509 static void gen_sqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7510 { 7511 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s32); 7512 } 7513 7514 static void gen_sqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i) 7515 { 7516 gen_helper_neon_qshl_s64(d, tcg_env, s, tcg_constant_i64(i)); 7517 } 7518 7519 static void gen_uqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7520 { 7521 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u8); 7522 } 7523 7524 static void gen_uqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7525 { 7526 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u16); 7527 } 7528 7529 static void gen_uqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7530 { 7531 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u32); 7532 } 7533 7534 static void gen_uqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i) 7535 { 7536 gen_helper_neon_qshl_u64(d, tcg_env, s, tcg_constant_i64(i)); 7537 } 7538 7539 static void gen_sqshlui_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7540 { 7541 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s8); 7542 } 7543 7544 static void gen_sqshlui_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7545 { 7546 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s16); 7547 } 7548 7549 static void gen_sqshlui_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7550 { 7551 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s32); 7552 } 7553 7554 static void gen_sqshlui_d(TCGv_i64 d, TCGv_i64 s, int64_t i) 7555 { 7556 gen_helper_neon_qshlu_s64(d, tcg_env, s, tcg_constant_i64(i)); 7557 } 7558 7559 static WideShiftImmFn * const f_scalar_sqshli[] = { 7560 gen_sqshli_b, gen_sqshli_h, gen_sqshli_s, gen_sqshli_d 7561 }; 7562 7563 static WideShiftImmFn * const f_scalar_uqshli[] = { 7564 gen_uqshli_b, gen_uqshli_h, gen_uqshli_s, gen_uqshli_d 7565 }; 7566 7567 static WideShiftImmFn * const f_scalar_sqshlui[] = { 7568 gen_sqshlui_b, gen_sqshlui_h, gen_sqshlui_s, gen_sqshlui_d 7569 }; 7570 7571 /* Note that the helpers sign-extend their inputs, so don't do it here. */ 7572 TRANS(SQSHL_si, do_scalar_shift_imm, a, f_scalar_sqshli[a->esz], false, 0) 7573 TRANS(UQSHL_si, do_scalar_shift_imm, a, f_scalar_uqshli[a->esz], false, 0) 7574 TRANS(SQSHLU_si, do_scalar_shift_imm, a, f_scalar_sqshlui[a->esz], false, 0) 7575 7576 static bool do_scalar_shift_imm_narrow(DisasContext *s, arg_rri_e *a, 7577 WideShiftImmFn * const fns[3], 7578 MemOp sign, bool zext) 7579 { 7580 MemOp esz = a->esz; 7581 7582 tcg_debug_assert(esz >= MO_8 && esz <= MO_32); 7583 7584 if (fp_access_check(s)) { 7585 TCGv_i64 rd = tcg_temp_new_i64(); 7586 TCGv_i64 rn = tcg_temp_new_i64(); 7587 7588 read_vec_element(s, rn, a->rn, 0, (esz + 1) | sign); 7589 fns[esz](rd, rn, a->imm); 7590 if (zext) { 7591 tcg_gen_ext_i64(rd, rd, esz); 7592 } 7593 write_fp_dreg(s, a->rd, rd); 7594 } 7595 return true; 7596 } 7597 7598 TRANS(SQSHRN_si, do_scalar_shift_imm_narrow, a, sqshrn_fns, MO_SIGN, true) 7599 TRANS(SQRSHRN_si, do_scalar_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN, true) 7600 TRANS(UQSHRN_si, do_scalar_shift_imm_narrow, a, uqshrn_fns, 0, false) 7601 TRANS(UQRSHRN_si, do_scalar_shift_imm_narrow, a, uqrshrn_fns, 0, false) 7602 TRANS(SQSHRUN_si, do_scalar_shift_imm_narrow, a, sqshrun_fns, MO_SIGN, false) 7603 TRANS(SQRSHRUN_si, do_scalar_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN, false) 7604 7605 static bool do_div(DisasContext *s, arg_rrr_sf *a, bool is_signed) 7606 { 7607 TCGv_i64 tcg_n, tcg_m, tcg_rd; 7608 tcg_rd = cpu_reg(s, a->rd); 7609 7610 if (!a->sf && is_signed) { 7611 tcg_n = tcg_temp_new_i64(); 7612 tcg_m = tcg_temp_new_i64(); 7613 tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, a->rn)); 7614 tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, a->rm)); 7615 } else { 7616 tcg_n = read_cpu_reg(s, a->rn, a->sf); 7617 tcg_m = read_cpu_reg(s, a->rm, a->sf); 7618 } 7619 7620 if (is_signed) { 7621 gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m); 7622 } else { 7623 gen_helper_udiv64(tcg_rd, tcg_n, tcg_m); 7624 } 7625 7626 if (!a->sf) { /* zero extend final result */ 7627 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 7628 } 7629 return true; 7630 } 7631 7632 TRANS(SDIV, do_div, a, true) 7633 TRANS(UDIV, do_div, a, false) 7634 7635 /* Shift a TCGv src by TCGv shift_amount, put result in dst. 7636 * Note that it is the caller's responsibility to ensure that the 7637 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM 7638 * mandated semantics for out of range shifts. 7639 */ 7640 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf, 7641 enum a64_shift_type shift_type, TCGv_i64 shift_amount) 7642 { 7643 switch (shift_type) { 7644 case A64_SHIFT_TYPE_LSL: 7645 tcg_gen_shl_i64(dst, src, shift_amount); 7646 break; 7647 case A64_SHIFT_TYPE_LSR: 7648 tcg_gen_shr_i64(dst, src, shift_amount); 7649 break; 7650 case A64_SHIFT_TYPE_ASR: 7651 if (!sf) { 7652 tcg_gen_ext32s_i64(dst, src); 7653 } 7654 tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount); 7655 break; 7656 case A64_SHIFT_TYPE_ROR: 7657 if (sf) { 7658 tcg_gen_rotr_i64(dst, src, shift_amount); 7659 } else { 7660 TCGv_i32 t0, t1; 7661 t0 = tcg_temp_new_i32(); 7662 t1 = tcg_temp_new_i32(); 7663 tcg_gen_extrl_i64_i32(t0, src); 7664 tcg_gen_extrl_i64_i32(t1, shift_amount); 7665 tcg_gen_rotr_i32(t0, t0, t1); 7666 tcg_gen_extu_i32_i64(dst, t0); 7667 } 7668 break; 7669 default: 7670 assert(FALSE); /* all shift types should be handled */ 7671 break; 7672 } 7673 7674 if (!sf) { /* zero extend final result */ 7675 tcg_gen_ext32u_i64(dst, dst); 7676 } 7677 } 7678 7679 /* Shift a TCGv src by immediate, put result in dst. 7680 * The shift amount must be in range (this should always be true as the 7681 * relevant instructions will UNDEF on bad shift immediates). 7682 */ 7683 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf, 7684 enum a64_shift_type shift_type, unsigned int shift_i) 7685 { 7686 assert(shift_i < (sf ? 64 : 32)); 7687 7688 if (shift_i == 0) { 7689 tcg_gen_mov_i64(dst, src); 7690 } else { 7691 shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i)); 7692 } 7693 } 7694 7695 static bool do_shift_reg(DisasContext *s, arg_rrr_sf *a, 7696 enum a64_shift_type shift_type) 7697 { 7698 TCGv_i64 tcg_shift = tcg_temp_new_i64(); 7699 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 7700 TCGv_i64 tcg_rn = read_cpu_reg(s, a->rn, a->sf); 7701 7702 tcg_gen_andi_i64(tcg_shift, cpu_reg(s, a->rm), a->sf ? 63 : 31); 7703 shift_reg(tcg_rd, tcg_rn, a->sf, shift_type, tcg_shift); 7704 return true; 7705 } 7706 7707 TRANS(LSLV, do_shift_reg, a, A64_SHIFT_TYPE_LSL) 7708 TRANS(LSRV, do_shift_reg, a, A64_SHIFT_TYPE_LSR) 7709 TRANS(ASRV, do_shift_reg, a, A64_SHIFT_TYPE_ASR) 7710 TRANS(RORV, do_shift_reg, a, A64_SHIFT_TYPE_ROR) 7711 7712 static bool do_crc32(DisasContext *s, arg_rrr_e *a, bool crc32c) 7713 { 7714 TCGv_i64 tcg_acc, tcg_val, tcg_rd; 7715 TCGv_i32 tcg_bytes; 7716 7717 switch (a->esz) { 7718 case MO_8: 7719 case MO_16: 7720 case MO_32: 7721 tcg_val = tcg_temp_new_i64(); 7722 tcg_gen_extract_i64(tcg_val, cpu_reg(s, a->rm), 0, 8 << a->esz); 7723 break; 7724 case MO_64: 7725 tcg_val = cpu_reg(s, a->rm); 7726 break; 7727 default: 7728 g_assert_not_reached(); 7729 } 7730 tcg_acc = cpu_reg(s, a->rn); 7731 tcg_bytes = tcg_constant_i32(1 << a->esz); 7732 tcg_rd = cpu_reg(s, a->rd); 7733 7734 if (crc32c) { 7735 gen_helper_crc32c_64(tcg_rd, tcg_acc, tcg_val, tcg_bytes); 7736 } else { 7737 gen_helper_crc32_64(tcg_rd, tcg_acc, tcg_val, tcg_bytes); 7738 } 7739 return true; 7740 } 7741 7742 TRANS_FEAT(CRC32, aa64_crc32, do_crc32, a, false) 7743 TRANS_FEAT(CRC32C, aa64_crc32, do_crc32, a, true) 7744 7745 static bool do_subp(DisasContext *s, arg_rrr *a, bool setflag) 7746 { 7747 TCGv_i64 tcg_n = read_cpu_reg_sp(s, a->rn, true); 7748 TCGv_i64 tcg_m = read_cpu_reg_sp(s, a->rm, true); 7749 TCGv_i64 tcg_d = cpu_reg(s, a->rd); 7750 7751 tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56); 7752 tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56); 7753 7754 if (setflag) { 7755 gen_sub_CC(true, tcg_d, tcg_n, tcg_m); 7756 } else { 7757 tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m); 7758 } 7759 return true; 7760 } 7761 7762 TRANS_FEAT(SUBP, aa64_mte_insn_reg, do_subp, a, false) 7763 TRANS_FEAT(SUBPS, aa64_mte_insn_reg, do_subp, a, true) 7764 7765 static bool trans_IRG(DisasContext *s, arg_rrr *a) 7766 { 7767 if (dc_isar_feature(aa64_mte_insn_reg, s)) { 7768 TCGv_i64 tcg_rd = cpu_reg_sp(s, a->rd); 7769 TCGv_i64 tcg_rn = cpu_reg_sp(s, a->rn); 7770 7771 if (s->ata[0]) { 7772 gen_helper_irg(tcg_rd, tcg_env, tcg_rn, cpu_reg(s, a->rm)); 7773 } else { 7774 gen_address_with_allocation_tag0(tcg_rd, tcg_rn); 7775 } 7776 return true; 7777 } 7778 return false; 7779 } 7780 7781 static bool trans_GMI(DisasContext *s, arg_rrr *a) 7782 { 7783 if (dc_isar_feature(aa64_mte_insn_reg, s)) { 7784 TCGv_i64 t = tcg_temp_new_i64(); 7785 7786 tcg_gen_extract_i64(t, cpu_reg_sp(s, a->rn), 56, 4); 7787 tcg_gen_shl_i64(t, tcg_constant_i64(1), t); 7788 tcg_gen_or_i64(cpu_reg(s, a->rd), cpu_reg(s, a->rm), t); 7789 return true; 7790 } 7791 return false; 7792 } 7793 7794 static bool trans_PACGA(DisasContext *s, arg_rrr *a) 7795 { 7796 if (dc_isar_feature(aa64_pauth, s)) { 7797 gen_helper_pacga(cpu_reg(s, a->rd), tcg_env, 7798 cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm)); 7799 return true; 7800 } 7801 return false; 7802 } 7803 7804 typedef void ArithOneOp(TCGv_i64, TCGv_i64); 7805 7806 static bool gen_rr(DisasContext *s, int rd, int rn, ArithOneOp fn) 7807 { 7808 fn(cpu_reg(s, rd), cpu_reg(s, rn)); 7809 return true; 7810 } 7811 7812 static void gen_rbit32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 7813 { 7814 TCGv_i32 t32 = tcg_temp_new_i32(); 7815 7816 tcg_gen_extrl_i64_i32(t32, tcg_rn); 7817 gen_helper_rbit(t32, t32); 7818 tcg_gen_extu_i32_i64(tcg_rd, t32); 7819 } 7820 7821 static void gen_rev16_xx(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 mask) 7822 { 7823 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 7824 7825 tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8); 7826 tcg_gen_and_i64(tcg_rd, tcg_rn, mask); 7827 tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask); 7828 tcg_gen_shli_i64(tcg_rd, tcg_rd, 8); 7829 tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp); 7830 } 7831 7832 static void gen_rev16_32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 7833 { 7834 gen_rev16_xx(tcg_rd, tcg_rn, tcg_constant_i64(0x00ff00ff)); 7835 } 7836 7837 static void gen_rev16_64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 7838 { 7839 gen_rev16_xx(tcg_rd, tcg_rn, tcg_constant_i64(0x00ff00ff00ff00ffull)); 7840 } 7841 7842 static void gen_rev_32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 7843 { 7844 tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ); 7845 } 7846 7847 static void gen_rev32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 7848 { 7849 tcg_gen_bswap64_i64(tcg_rd, tcg_rn); 7850 tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32); 7851 } 7852 7853 TRANS(RBIT, gen_rr, a->rd, a->rn, a->sf ? gen_helper_rbit64 : gen_rbit32) 7854 TRANS(REV16, gen_rr, a->rd, a->rn, a->sf ? gen_rev16_64 : gen_rev16_32) 7855 TRANS(REV32, gen_rr, a->rd, a->rn, a->sf ? gen_rev32 : gen_rev_32) 7856 TRANS(REV64, gen_rr, a->rd, a->rn, tcg_gen_bswap64_i64) 7857 7858 static void gen_clz32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 7859 { 7860 TCGv_i32 t32 = tcg_temp_new_i32(); 7861 7862 tcg_gen_extrl_i64_i32(t32, tcg_rn); 7863 tcg_gen_clzi_i32(t32, t32, 32); 7864 tcg_gen_extu_i32_i64(tcg_rd, t32); 7865 } 7866 7867 static void gen_clz64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 7868 { 7869 tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64); 7870 } 7871 7872 static void gen_cls32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 7873 { 7874 TCGv_i32 t32 = tcg_temp_new_i32(); 7875 7876 tcg_gen_extrl_i64_i32(t32, tcg_rn); 7877 tcg_gen_clrsb_i32(t32, t32); 7878 tcg_gen_extu_i32_i64(tcg_rd, t32); 7879 } 7880 7881 TRANS(CLZ, gen_rr, a->rd, a->rn, a->sf ? gen_clz64 : gen_clz32) 7882 TRANS(CLS, gen_rr, a->rd, a->rn, a->sf ? tcg_gen_clrsb_i64 : gen_cls32) 7883 7884 static bool gen_pacaut(DisasContext *s, arg_pacaut *a, NeonGenTwo64OpEnvFn fn) 7885 { 7886 TCGv_i64 tcg_rd, tcg_rn; 7887 7888 if (a->z) { 7889 if (a->rn != 31) { 7890 return false; 7891 } 7892 tcg_rn = tcg_constant_i64(0); 7893 } else { 7894 tcg_rn = cpu_reg_sp(s, a->rn); 7895 } 7896 if (s->pauth_active) { 7897 tcg_rd = cpu_reg(s, a->rd); 7898 fn(tcg_rd, tcg_env, tcg_rd, tcg_rn); 7899 } 7900 return true; 7901 } 7902 7903 TRANS_FEAT(PACIA, aa64_pauth, gen_pacaut, a, gen_helper_pacia) 7904 TRANS_FEAT(PACIB, aa64_pauth, gen_pacaut, a, gen_helper_pacib) 7905 TRANS_FEAT(PACDA, aa64_pauth, gen_pacaut, a, gen_helper_pacda) 7906 TRANS_FEAT(PACDB, aa64_pauth, gen_pacaut, a, gen_helper_pacdb) 7907 7908 TRANS_FEAT(AUTIA, aa64_pauth, gen_pacaut, a, gen_helper_autia) 7909 TRANS_FEAT(AUTIB, aa64_pauth, gen_pacaut, a, gen_helper_autib) 7910 TRANS_FEAT(AUTDA, aa64_pauth, gen_pacaut, a, gen_helper_autda) 7911 TRANS_FEAT(AUTDB, aa64_pauth, gen_pacaut, a, gen_helper_autdb) 7912 7913 static bool do_xpac(DisasContext *s, int rd, NeonGenOne64OpEnvFn *fn) 7914 { 7915 if (s->pauth_active) { 7916 TCGv_i64 tcg_rd = cpu_reg(s, rd); 7917 fn(tcg_rd, tcg_env, tcg_rd); 7918 } 7919 return true; 7920 } 7921 7922 TRANS_FEAT(XPACI, aa64_pauth, do_xpac, a->rd, gen_helper_xpaci) 7923 TRANS_FEAT(XPACD, aa64_pauth, do_xpac, a->rd, gen_helper_xpacd) 7924 7925 static bool do_logic_reg(DisasContext *s, arg_logic_shift *a, 7926 ArithTwoOp *fn, ArithTwoOp *inv_fn, bool setflags) 7927 { 7928 TCGv_i64 tcg_rd, tcg_rn, tcg_rm; 7929 7930 if (!a->sf && (a->sa & (1 << 5))) { 7931 return false; 7932 } 7933 7934 tcg_rd = cpu_reg(s, a->rd); 7935 tcg_rn = cpu_reg(s, a->rn); 7936 7937 tcg_rm = read_cpu_reg(s, a->rm, a->sf); 7938 if (a->sa) { 7939 shift_reg_imm(tcg_rm, tcg_rm, a->sf, a->st, a->sa); 7940 } 7941 7942 (a->n ? inv_fn : fn)(tcg_rd, tcg_rn, tcg_rm); 7943 if (!a->sf) { 7944 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 7945 } 7946 if (setflags) { 7947 gen_logic_CC(a->sf, tcg_rd); 7948 } 7949 return true; 7950 } 7951 7952 static bool trans_ORR_r(DisasContext *s, arg_logic_shift *a) 7953 { 7954 /* 7955 * Unshifted ORR and ORN with WZR/XZR is the standard encoding for 7956 * register-register MOV and MVN, so it is worth special casing. 7957 */ 7958 if (a->sa == 0 && a->st == 0 && a->rn == 31) { 7959 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 7960 TCGv_i64 tcg_rm = cpu_reg(s, a->rm); 7961 7962 if (a->n) { 7963 tcg_gen_not_i64(tcg_rd, tcg_rm); 7964 if (!a->sf) { 7965 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 7966 } 7967 } else { 7968 if (a->sf) { 7969 tcg_gen_mov_i64(tcg_rd, tcg_rm); 7970 } else { 7971 tcg_gen_ext32u_i64(tcg_rd, tcg_rm); 7972 } 7973 } 7974 return true; 7975 } 7976 7977 return do_logic_reg(s, a, tcg_gen_or_i64, tcg_gen_orc_i64, false); 7978 } 7979 7980 TRANS(AND_r, do_logic_reg, a, tcg_gen_and_i64, tcg_gen_andc_i64, false) 7981 TRANS(ANDS_r, do_logic_reg, a, tcg_gen_and_i64, tcg_gen_andc_i64, true) 7982 TRANS(EOR_r, do_logic_reg, a, tcg_gen_xor_i64, tcg_gen_eqv_i64, false) 7983 7984 static bool do_addsub_ext(DisasContext *s, arg_addsub_ext *a, 7985 bool sub_op, bool setflags) 7986 { 7987 TCGv_i64 tcg_rm, tcg_rn, tcg_rd, tcg_result; 7988 7989 if (a->sa > 4) { 7990 return false; 7991 } 7992 7993 /* non-flag setting ops may use SP */ 7994 if (!setflags) { 7995 tcg_rd = cpu_reg_sp(s, a->rd); 7996 } else { 7997 tcg_rd = cpu_reg(s, a->rd); 7998 } 7999 tcg_rn = read_cpu_reg_sp(s, a->rn, a->sf); 8000 8001 tcg_rm = read_cpu_reg(s, a->rm, a->sf); 8002 ext_and_shift_reg(tcg_rm, tcg_rm, a->st, a->sa); 8003 8004 tcg_result = tcg_temp_new_i64(); 8005 if (!setflags) { 8006 if (sub_op) { 8007 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm); 8008 } else { 8009 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm); 8010 } 8011 } else { 8012 if (sub_op) { 8013 gen_sub_CC(a->sf, tcg_result, tcg_rn, tcg_rm); 8014 } else { 8015 gen_add_CC(a->sf, tcg_result, tcg_rn, tcg_rm); 8016 } 8017 } 8018 8019 if (a->sf) { 8020 tcg_gen_mov_i64(tcg_rd, tcg_result); 8021 } else { 8022 tcg_gen_ext32u_i64(tcg_rd, tcg_result); 8023 } 8024 return true; 8025 } 8026 8027 TRANS(ADD_ext, do_addsub_ext, a, false, false) 8028 TRANS(SUB_ext, do_addsub_ext, a, true, false) 8029 TRANS(ADDS_ext, do_addsub_ext, a, false, true) 8030 TRANS(SUBS_ext, do_addsub_ext, a, true, true) 8031 8032 static bool do_addsub_reg(DisasContext *s, arg_addsub_shift *a, 8033 bool sub_op, bool setflags) 8034 { 8035 TCGv_i64 tcg_rd, tcg_rn, tcg_rm, tcg_result; 8036 8037 if (a->st == 3 || (!a->sf && (a->sa & 32))) { 8038 return false; 8039 } 8040 8041 tcg_rd = cpu_reg(s, a->rd); 8042 tcg_rn = read_cpu_reg(s, a->rn, a->sf); 8043 tcg_rm = read_cpu_reg(s, a->rm, a->sf); 8044 8045 shift_reg_imm(tcg_rm, tcg_rm, a->sf, a->st, a->sa); 8046 8047 tcg_result = tcg_temp_new_i64(); 8048 if (!setflags) { 8049 if (sub_op) { 8050 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm); 8051 } else { 8052 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm); 8053 } 8054 } else { 8055 if (sub_op) { 8056 gen_sub_CC(a->sf, tcg_result, tcg_rn, tcg_rm); 8057 } else { 8058 gen_add_CC(a->sf, tcg_result, tcg_rn, tcg_rm); 8059 } 8060 } 8061 8062 if (a->sf) { 8063 tcg_gen_mov_i64(tcg_rd, tcg_result); 8064 } else { 8065 tcg_gen_ext32u_i64(tcg_rd, tcg_result); 8066 } 8067 return true; 8068 } 8069 8070 TRANS(ADD_r, do_addsub_reg, a, false, false) 8071 TRANS(SUB_r, do_addsub_reg, a, true, false) 8072 TRANS(ADDS_r, do_addsub_reg, a, false, true) 8073 TRANS(SUBS_r, do_addsub_reg, a, true, true) 8074 8075 static bool do_mulh(DisasContext *s, arg_rrr *a, 8076 void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64)) 8077 { 8078 TCGv_i64 discard = tcg_temp_new_i64(); 8079 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8080 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 8081 TCGv_i64 tcg_rm = cpu_reg(s, a->rm); 8082 8083 fn(discard, tcg_rd, tcg_rn, tcg_rm); 8084 return true; 8085 } 8086 8087 TRANS(SMULH, do_mulh, a, tcg_gen_muls2_i64) 8088 TRANS(UMULH, do_mulh, a, tcg_gen_mulu2_i64) 8089 8090 static bool do_muladd(DisasContext *s, arg_rrrr *a, 8091 bool sf, bool is_sub, MemOp mop) 8092 { 8093 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8094 TCGv_i64 tcg_op1, tcg_op2; 8095 8096 if (mop == MO_64) { 8097 tcg_op1 = cpu_reg(s, a->rn); 8098 tcg_op2 = cpu_reg(s, a->rm); 8099 } else { 8100 tcg_op1 = tcg_temp_new_i64(); 8101 tcg_op2 = tcg_temp_new_i64(); 8102 tcg_gen_ext_i64(tcg_op1, cpu_reg(s, a->rn), mop); 8103 tcg_gen_ext_i64(tcg_op2, cpu_reg(s, a->rm), mop); 8104 } 8105 8106 if (a->ra == 31 && !is_sub) { 8107 /* Special-case MADD with rA == XZR; it is the standard MUL alias */ 8108 tcg_gen_mul_i64(tcg_rd, tcg_op1, tcg_op2); 8109 } else { 8110 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 8111 TCGv_i64 tcg_ra = cpu_reg(s, a->ra); 8112 8113 tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2); 8114 if (is_sub) { 8115 tcg_gen_sub_i64(tcg_rd, tcg_ra, tcg_tmp); 8116 } else { 8117 tcg_gen_add_i64(tcg_rd, tcg_ra, tcg_tmp); 8118 } 8119 } 8120 8121 if (!sf) { 8122 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 8123 } 8124 return true; 8125 } 8126 8127 TRANS(MADD_w, do_muladd, a, false, false, MO_64) 8128 TRANS(MSUB_w, do_muladd, a, false, true, MO_64) 8129 TRANS(MADD_x, do_muladd, a, true, false, MO_64) 8130 TRANS(MSUB_x, do_muladd, a, true, true, MO_64) 8131 8132 TRANS(SMADDL, do_muladd, a, true, false, MO_SL) 8133 TRANS(SMSUBL, do_muladd, a, true, true, MO_SL) 8134 TRANS(UMADDL, do_muladd, a, true, false, MO_UL) 8135 TRANS(UMSUBL, do_muladd, a, true, true, MO_UL) 8136 8137 static bool do_adc_sbc(DisasContext *s, arg_rrr_sf *a, 8138 bool is_sub, bool setflags) 8139 { 8140 TCGv_i64 tcg_y, tcg_rn, tcg_rd; 8141 8142 tcg_rd = cpu_reg(s, a->rd); 8143 tcg_rn = cpu_reg(s, a->rn); 8144 8145 if (is_sub) { 8146 tcg_y = tcg_temp_new_i64(); 8147 tcg_gen_not_i64(tcg_y, cpu_reg(s, a->rm)); 8148 } else { 8149 tcg_y = cpu_reg(s, a->rm); 8150 } 8151 8152 if (setflags) { 8153 gen_adc_CC(a->sf, tcg_rd, tcg_rn, tcg_y); 8154 } else { 8155 gen_adc(a->sf, tcg_rd, tcg_rn, tcg_y); 8156 } 8157 return true; 8158 } 8159 8160 TRANS(ADC, do_adc_sbc, a, false, false) 8161 TRANS(SBC, do_adc_sbc, a, true, false) 8162 TRANS(ADCS, do_adc_sbc, a, false, true) 8163 TRANS(SBCS, do_adc_sbc, a, true, true) 8164 8165 static bool trans_RMIF(DisasContext *s, arg_RMIF *a) 8166 { 8167 int mask = a->mask; 8168 TCGv_i64 tcg_rn; 8169 TCGv_i32 nzcv; 8170 8171 if (!dc_isar_feature(aa64_condm_4, s)) { 8172 return false; 8173 } 8174 8175 tcg_rn = read_cpu_reg(s, a->rn, 1); 8176 tcg_gen_rotri_i64(tcg_rn, tcg_rn, a->imm); 8177 8178 nzcv = tcg_temp_new_i32(); 8179 tcg_gen_extrl_i64_i32(nzcv, tcg_rn); 8180 8181 if (mask & 8) { /* N */ 8182 tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3); 8183 } 8184 if (mask & 4) { /* Z */ 8185 tcg_gen_not_i32(cpu_ZF, nzcv); 8186 tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4); 8187 } 8188 if (mask & 2) { /* C */ 8189 tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1); 8190 } 8191 if (mask & 1) { /* V */ 8192 tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0); 8193 } 8194 return true; 8195 } 8196 8197 static bool do_setf(DisasContext *s, int rn, int shift) 8198 { 8199 TCGv_i32 tmp = tcg_temp_new_i32(); 8200 8201 tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn)); 8202 tcg_gen_shli_i32(cpu_NF, tmp, shift); 8203 tcg_gen_shli_i32(cpu_VF, tmp, shift - 1); 8204 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 8205 tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF); 8206 return true; 8207 } 8208 8209 TRANS_FEAT(SETF8, aa64_condm_4, do_setf, a->rn, 24) 8210 TRANS_FEAT(SETF16, aa64_condm_4, do_setf, a->rn, 16) 8211 8212 /* CCMP, CCMN */ 8213 static bool trans_CCMP(DisasContext *s, arg_CCMP *a) 8214 { 8215 TCGv_i32 tcg_t0 = tcg_temp_new_i32(); 8216 TCGv_i32 tcg_t1 = tcg_temp_new_i32(); 8217 TCGv_i32 tcg_t2 = tcg_temp_new_i32(); 8218 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 8219 TCGv_i64 tcg_rn, tcg_y; 8220 DisasCompare c; 8221 unsigned nzcv; 8222 bool has_andc; 8223 8224 /* Set T0 = !COND. */ 8225 arm_test_cc(&c, a->cond); 8226 tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0); 8227 8228 /* Load the arguments for the new comparison. */ 8229 if (a->imm) { 8230 tcg_y = tcg_constant_i64(a->y); 8231 } else { 8232 tcg_y = cpu_reg(s, a->y); 8233 } 8234 tcg_rn = cpu_reg(s, a->rn); 8235 8236 /* Set the flags for the new comparison. */ 8237 if (a->op) { 8238 gen_sub_CC(a->sf, tcg_tmp, tcg_rn, tcg_y); 8239 } else { 8240 gen_add_CC(a->sf, tcg_tmp, tcg_rn, tcg_y); 8241 } 8242 8243 /* 8244 * If COND was false, force the flags to #nzcv. Compute two masks 8245 * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0). 8246 * For tcg hosts that support ANDC, we can make do with just T1. 8247 * In either case, allow the tcg optimizer to delete any unused mask. 8248 */ 8249 tcg_gen_neg_i32(tcg_t1, tcg_t0); 8250 tcg_gen_subi_i32(tcg_t2, tcg_t0, 1); 8251 8252 nzcv = a->nzcv; 8253 has_andc = tcg_op_supported(INDEX_op_andc_i32, TCG_TYPE_I32, 0); 8254 if (nzcv & 8) { /* N */ 8255 tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1); 8256 } else { 8257 if (has_andc) { 8258 tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1); 8259 } else { 8260 tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2); 8261 } 8262 } 8263 if (nzcv & 4) { /* Z */ 8264 if (has_andc) { 8265 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1); 8266 } else { 8267 tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2); 8268 } 8269 } else { 8270 tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0); 8271 } 8272 if (nzcv & 2) { /* C */ 8273 tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0); 8274 } else { 8275 if (has_andc) { 8276 tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1); 8277 } else { 8278 tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2); 8279 } 8280 } 8281 if (nzcv & 1) { /* V */ 8282 tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1); 8283 } else { 8284 if (has_andc) { 8285 tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1); 8286 } else { 8287 tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2); 8288 } 8289 } 8290 return true; 8291 } 8292 8293 static bool trans_CSEL(DisasContext *s, arg_CSEL *a) 8294 { 8295 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8296 TCGv_i64 zero = tcg_constant_i64(0); 8297 DisasCompare64 c; 8298 8299 a64_test_cc(&c, a->cond); 8300 8301 if (a->rn == 31 && a->rm == 31 && (a->else_inc ^ a->else_inv)) { 8302 /* CSET & CSETM. */ 8303 if (a->else_inv) { 8304 tcg_gen_negsetcond_i64(tcg_invert_cond(c.cond), 8305 tcg_rd, c.value, zero); 8306 } else { 8307 tcg_gen_setcond_i64(tcg_invert_cond(c.cond), 8308 tcg_rd, c.value, zero); 8309 } 8310 } else { 8311 TCGv_i64 t_true = cpu_reg(s, a->rn); 8312 TCGv_i64 t_false = read_cpu_reg(s, a->rm, 1); 8313 8314 if (a->else_inv && a->else_inc) { 8315 tcg_gen_neg_i64(t_false, t_false); 8316 } else if (a->else_inv) { 8317 tcg_gen_not_i64(t_false, t_false); 8318 } else if (a->else_inc) { 8319 tcg_gen_addi_i64(t_false, t_false, 1); 8320 } 8321 tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false); 8322 } 8323 8324 if (!a->sf) { 8325 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 8326 } 8327 return true; 8328 } 8329 8330 typedef struct FPScalar1Int { 8331 void (*gen_h)(TCGv_i32, TCGv_i32); 8332 void (*gen_s)(TCGv_i32, TCGv_i32); 8333 void (*gen_d)(TCGv_i64, TCGv_i64); 8334 } FPScalar1Int; 8335 8336 static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a, 8337 const FPScalar1Int *f) 8338 { 8339 switch (a->esz) { 8340 case MO_64: 8341 if (fp_access_check(s)) { 8342 TCGv_i64 t = read_fp_dreg(s, a->rn); 8343 f->gen_d(t, t); 8344 write_fp_dreg(s, a->rd, t); 8345 } 8346 break; 8347 case MO_32: 8348 if (fp_access_check(s)) { 8349 TCGv_i32 t = read_fp_sreg(s, a->rn); 8350 f->gen_s(t, t); 8351 write_fp_sreg(s, a->rd, t); 8352 } 8353 break; 8354 case MO_16: 8355 if (!dc_isar_feature(aa64_fp16, s)) { 8356 return false; 8357 } 8358 if (fp_access_check(s)) { 8359 TCGv_i32 t = read_fp_hreg(s, a->rn); 8360 f->gen_h(t, t); 8361 write_fp_sreg(s, a->rd, t); 8362 } 8363 break; 8364 default: 8365 return false; 8366 } 8367 return true; 8368 } 8369 8370 static const FPScalar1Int f_scalar_fmov = { 8371 tcg_gen_mov_i32, 8372 tcg_gen_mov_i32, 8373 tcg_gen_mov_i64, 8374 }; 8375 TRANS(FMOV_s, do_fp1_scalar_int, a, &f_scalar_fmov) 8376 8377 static const FPScalar1Int f_scalar_fabs = { 8378 gen_vfp_absh, 8379 gen_vfp_abss, 8380 gen_vfp_absd, 8381 }; 8382 TRANS(FABS_s, do_fp1_scalar_int, a, &f_scalar_fabs) 8383 8384 static const FPScalar1Int f_scalar_fneg = { 8385 gen_vfp_negh, 8386 gen_vfp_negs, 8387 gen_vfp_negd, 8388 }; 8389 TRANS(FNEG_s, do_fp1_scalar_int, a, &f_scalar_fneg) 8390 8391 typedef struct FPScalar1 { 8392 void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_ptr); 8393 void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_ptr); 8394 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_ptr); 8395 } FPScalar1; 8396 8397 static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a, 8398 const FPScalar1 *f, int rmode) 8399 { 8400 TCGv_i32 tcg_rmode = NULL; 8401 TCGv_ptr fpst; 8402 TCGv_i64 t64; 8403 TCGv_i32 t32; 8404 int check = fp_access_check_scalar_hsd(s, a->esz); 8405 8406 if (check <= 0) { 8407 return check == 0; 8408 } 8409 8410 fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); 8411 if (rmode >= 0) { 8412 tcg_rmode = gen_set_rmode(rmode, fpst); 8413 } 8414 8415 switch (a->esz) { 8416 case MO_64: 8417 t64 = read_fp_dreg(s, a->rn); 8418 f->gen_d(t64, t64, fpst); 8419 write_fp_dreg(s, a->rd, t64); 8420 break; 8421 case MO_32: 8422 t32 = read_fp_sreg(s, a->rn); 8423 f->gen_s(t32, t32, fpst); 8424 write_fp_sreg(s, a->rd, t32); 8425 break; 8426 case MO_16: 8427 t32 = read_fp_hreg(s, a->rn); 8428 f->gen_h(t32, t32, fpst); 8429 write_fp_sreg(s, a->rd, t32); 8430 break; 8431 default: 8432 g_assert_not_reached(); 8433 } 8434 8435 if (rmode >= 0) { 8436 gen_restore_rmode(tcg_rmode, fpst); 8437 } 8438 return true; 8439 } 8440 8441 static const FPScalar1 f_scalar_fsqrt = { 8442 gen_helper_vfp_sqrth, 8443 gen_helper_vfp_sqrts, 8444 gen_helper_vfp_sqrtd, 8445 }; 8446 TRANS(FSQRT_s, do_fp1_scalar, a, &f_scalar_fsqrt, -1) 8447 8448 static const FPScalar1 f_scalar_frint = { 8449 gen_helper_advsimd_rinth, 8450 gen_helper_rints, 8451 gen_helper_rintd, 8452 }; 8453 TRANS(FRINTN_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_TIEEVEN) 8454 TRANS(FRINTP_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_POSINF) 8455 TRANS(FRINTM_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_NEGINF) 8456 TRANS(FRINTZ_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_ZERO) 8457 TRANS(FRINTA_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_TIEAWAY) 8458 TRANS(FRINTI_s, do_fp1_scalar, a, &f_scalar_frint, -1) 8459 8460 static const FPScalar1 f_scalar_frintx = { 8461 gen_helper_advsimd_rinth_exact, 8462 gen_helper_rints_exact, 8463 gen_helper_rintd_exact, 8464 }; 8465 TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1) 8466 8467 static const FPScalar1 f_scalar_bfcvt = { 8468 .gen_s = gen_helper_bfcvt, 8469 }; 8470 TRANS_FEAT(BFCVT_s, aa64_bf16, do_fp1_scalar, a, &f_scalar_bfcvt, -1) 8471 8472 static const FPScalar1 f_scalar_frint32 = { 8473 NULL, 8474 gen_helper_frint32_s, 8475 gen_helper_frint32_d, 8476 }; 8477 TRANS_FEAT(FRINT32Z_s, aa64_frint, do_fp1_scalar, a, 8478 &f_scalar_frint32, FPROUNDING_ZERO) 8479 TRANS_FEAT(FRINT32X_s, aa64_frint, do_fp1_scalar, a, &f_scalar_frint32, -1) 8480 8481 static const FPScalar1 f_scalar_frint64 = { 8482 NULL, 8483 gen_helper_frint64_s, 8484 gen_helper_frint64_d, 8485 }; 8486 TRANS_FEAT(FRINT64Z_s, aa64_frint, do_fp1_scalar, a, 8487 &f_scalar_frint64, FPROUNDING_ZERO) 8488 TRANS_FEAT(FRINT64X_s, aa64_frint, do_fp1_scalar, a, &f_scalar_frint64, -1) 8489 8490 static const FPScalar1 f_scalar_frecpe = { 8491 gen_helper_recpe_f16, 8492 gen_helper_recpe_f32, 8493 gen_helper_recpe_f64, 8494 }; 8495 TRANS(FRECPE_s, do_fp1_scalar, a, &f_scalar_frecpe, -1) 8496 8497 static const FPScalar1 f_scalar_frecpx = { 8498 gen_helper_frecpx_f16, 8499 gen_helper_frecpx_f32, 8500 gen_helper_frecpx_f64, 8501 }; 8502 TRANS(FRECPX_s, do_fp1_scalar, a, &f_scalar_frecpx, -1) 8503 8504 static const FPScalar1 f_scalar_frsqrte = { 8505 gen_helper_rsqrte_f16, 8506 gen_helper_rsqrte_f32, 8507 gen_helper_rsqrte_f64, 8508 }; 8509 TRANS(FRSQRTE_s, do_fp1_scalar, a, &f_scalar_frsqrte, -1) 8510 8511 static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a) 8512 { 8513 if (fp_access_check(s)) { 8514 TCGv_i32 tcg_rn = read_fp_sreg(s, a->rn); 8515 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 8516 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 8517 8518 gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, fpst); 8519 write_fp_dreg(s, a->rd, tcg_rd); 8520 } 8521 return true; 8522 } 8523 8524 static bool trans_FCVT_s_hs(DisasContext *s, arg_rr *a) 8525 { 8526 if (fp_access_check(s)) { 8527 TCGv_i32 tmp = read_fp_sreg(s, a->rn); 8528 TCGv_i32 ahp = get_ahp_flag(); 8529 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 8530 8531 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp); 8532 /* write_fp_sreg is OK here because top half of result is zero */ 8533 write_fp_sreg(s, a->rd, tmp); 8534 } 8535 return true; 8536 } 8537 8538 static bool trans_FCVT_s_sd(DisasContext *s, arg_rr *a) 8539 { 8540 if (fp_access_check(s)) { 8541 TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn); 8542 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 8543 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 8544 8545 gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, fpst); 8546 write_fp_sreg(s, a->rd, tcg_rd); 8547 } 8548 return true; 8549 } 8550 8551 static bool trans_FCVT_s_hd(DisasContext *s, arg_rr *a) 8552 { 8553 if (fp_access_check(s)) { 8554 TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn); 8555 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 8556 TCGv_i32 ahp = get_ahp_flag(); 8557 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 8558 8559 gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp); 8560 /* write_fp_sreg is OK here because top half of tcg_rd is zero */ 8561 write_fp_sreg(s, a->rd, tcg_rd); 8562 } 8563 return true; 8564 } 8565 8566 static bool trans_FCVT_s_sh(DisasContext *s, arg_rr *a) 8567 { 8568 if (fp_access_check(s)) { 8569 TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn); 8570 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 8571 TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16); 8572 TCGv_i32 tcg_ahp = get_ahp_flag(); 8573 8574 gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); 8575 write_fp_sreg(s, a->rd, tcg_rd); 8576 } 8577 return true; 8578 } 8579 8580 static bool trans_FCVT_s_dh(DisasContext *s, arg_rr *a) 8581 { 8582 if (fp_access_check(s)) { 8583 TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn); 8584 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 8585 TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16); 8586 TCGv_i32 tcg_ahp = get_ahp_flag(); 8587 8588 gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); 8589 write_fp_dreg(s, a->rd, tcg_rd); 8590 } 8591 return true; 8592 } 8593 8594 static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift, 8595 TCGv_i64 tcg_int, bool is_signed) 8596 { 8597 TCGv_ptr tcg_fpstatus; 8598 TCGv_i32 tcg_shift, tcg_single; 8599 TCGv_i64 tcg_double; 8600 8601 tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); 8602 tcg_shift = tcg_constant_i32(shift); 8603 8604 switch (esz) { 8605 case MO_64: 8606 tcg_double = tcg_temp_new_i64(); 8607 if (is_signed) { 8608 gen_helper_vfp_sqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus); 8609 } else { 8610 gen_helper_vfp_uqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus); 8611 } 8612 write_fp_dreg(s, rd, tcg_double); 8613 break; 8614 8615 case MO_32: 8616 tcg_single = tcg_temp_new_i32(); 8617 if (is_signed) { 8618 gen_helper_vfp_sqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); 8619 } else { 8620 gen_helper_vfp_uqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); 8621 } 8622 write_fp_sreg(s, rd, tcg_single); 8623 break; 8624 8625 case MO_16: 8626 tcg_single = tcg_temp_new_i32(); 8627 if (is_signed) { 8628 gen_helper_vfp_sqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); 8629 } else { 8630 gen_helper_vfp_uqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); 8631 } 8632 write_fp_sreg(s, rd, tcg_single); 8633 break; 8634 8635 default: 8636 g_assert_not_reached(); 8637 } 8638 return true; 8639 } 8640 8641 static bool do_cvtf_g(DisasContext *s, arg_fcvt *a, bool is_signed) 8642 { 8643 TCGv_i64 tcg_int; 8644 int check = fp_access_check_scalar_hsd(s, a->esz); 8645 8646 if (check <= 0) { 8647 return check == 0; 8648 } 8649 8650 if (a->sf) { 8651 tcg_int = cpu_reg(s, a->rn); 8652 } else { 8653 tcg_int = read_cpu_reg(s, a->rn, true); 8654 if (is_signed) { 8655 tcg_gen_ext32s_i64(tcg_int, tcg_int); 8656 } else { 8657 tcg_gen_ext32u_i64(tcg_int, tcg_int); 8658 } 8659 } 8660 return do_cvtf_scalar(s, a->esz, a->rd, a->shift, tcg_int, is_signed); 8661 } 8662 8663 TRANS(SCVTF_g, do_cvtf_g, a, true) 8664 TRANS(UCVTF_g, do_cvtf_g, a, false) 8665 8666 /* 8667 * [US]CVTF (vector), scalar version. 8668 * Which sounds weird, but really just means input from fp register 8669 * instead of input from general register. Input and output element 8670 * size are always equal. 8671 */ 8672 static bool do_cvtf_f(DisasContext *s, arg_fcvt *a, bool is_signed) 8673 { 8674 TCGv_i64 tcg_int; 8675 int check = fp_access_check_scalar_hsd(s, a->esz); 8676 8677 if (check <= 0) { 8678 return check == 0; 8679 } 8680 8681 tcg_int = tcg_temp_new_i64(); 8682 read_vec_element(s, tcg_int, a->rn, 0, a->esz | (is_signed ? MO_SIGN : 0)); 8683 return do_cvtf_scalar(s, a->esz, a->rd, a->shift, tcg_int, is_signed); 8684 } 8685 8686 TRANS(SCVTF_f, do_cvtf_f, a, true) 8687 TRANS(UCVTF_f, do_cvtf_f, a, false) 8688 8689 static void do_fcvt_scalar(DisasContext *s, MemOp out, MemOp esz, 8690 TCGv_i64 tcg_out, int shift, int rn, 8691 ARMFPRounding rmode) 8692 { 8693 TCGv_ptr tcg_fpstatus; 8694 TCGv_i32 tcg_shift, tcg_rmode, tcg_single; 8695 8696 tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); 8697 tcg_shift = tcg_constant_i32(shift); 8698 tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); 8699 8700 switch (esz) { 8701 case MO_64: 8702 read_vec_element(s, tcg_out, rn, 0, MO_64); 8703 switch (out) { 8704 case MO_64 | MO_SIGN: 8705 gen_helper_vfp_tosqd(tcg_out, tcg_out, tcg_shift, tcg_fpstatus); 8706 break; 8707 case MO_64: 8708 gen_helper_vfp_touqd(tcg_out, tcg_out, tcg_shift, tcg_fpstatus); 8709 break; 8710 case MO_32 | MO_SIGN: 8711 gen_helper_vfp_tosld(tcg_out, tcg_out, tcg_shift, tcg_fpstatus); 8712 break; 8713 case MO_32: 8714 gen_helper_vfp_tould(tcg_out, tcg_out, tcg_shift, tcg_fpstatus); 8715 break; 8716 default: 8717 g_assert_not_reached(); 8718 } 8719 break; 8720 8721 case MO_32: 8722 tcg_single = read_fp_sreg(s, rn); 8723 switch (out) { 8724 case MO_64 | MO_SIGN: 8725 gen_helper_vfp_tosqs(tcg_out, tcg_single, tcg_shift, tcg_fpstatus); 8726 break; 8727 case MO_64: 8728 gen_helper_vfp_touqs(tcg_out, tcg_single, tcg_shift, tcg_fpstatus); 8729 break; 8730 case MO_32 | MO_SIGN: 8731 gen_helper_vfp_tosls(tcg_single, tcg_single, 8732 tcg_shift, tcg_fpstatus); 8733 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 8734 break; 8735 case MO_32: 8736 gen_helper_vfp_touls(tcg_single, tcg_single, 8737 tcg_shift, tcg_fpstatus); 8738 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 8739 break; 8740 default: 8741 g_assert_not_reached(); 8742 } 8743 break; 8744 8745 case MO_16: 8746 tcg_single = read_fp_hreg(s, rn); 8747 switch (out) { 8748 case MO_64 | MO_SIGN: 8749 gen_helper_vfp_tosqh(tcg_out, tcg_single, tcg_shift, tcg_fpstatus); 8750 break; 8751 case MO_64: 8752 gen_helper_vfp_touqh(tcg_out, tcg_single, tcg_shift, tcg_fpstatus); 8753 break; 8754 case MO_32 | MO_SIGN: 8755 gen_helper_vfp_toslh(tcg_single, tcg_single, 8756 tcg_shift, tcg_fpstatus); 8757 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 8758 break; 8759 case MO_32: 8760 gen_helper_vfp_toulh(tcg_single, tcg_single, 8761 tcg_shift, tcg_fpstatus); 8762 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 8763 break; 8764 case MO_16 | MO_SIGN: 8765 gen_helper_vfp_toshh(tcg_single, tcg_single, 8766 tcg_shift, tcg_fpstatus); 8767 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 8768 break; 8769 case MO_16: 8770 gen_helper_vfp_touhh(tcg_single, tcg_single, 8771 tcg_shift, tcg_fpstatus); 8772 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 8773 break; 8774 default: 8775 g_assert_not_reached(); 8776 } 8777 break; 8778 8779 default: 8780 g_assert_not_reached(); 8781 } 8782 8783 gen_restore_rmode(tcg_rmode, tcg_fpstatus); 8784 } 8785 8786 static bool do_fcvt_g(DisasContext *s, arg_fcvt *a, 8787 ARMFPRounding rmode, bool is_signed) 8788 { 8789 TCGv_i64 tcg_int; 8790 int check = fp_access_check_scalar_hsd(s, a->esz); 8791 8792 if (check <= 0) { 8793 return check == 0; 8794 } 8795 8796 tcg_int = cpu_reg(s, a->rd); 8797 do_fcvt_scalar(s, (a->sf ? MO_64 : MO_32) | (is_signed ? MO_SIGN : 0), 8798 a->esz, tcg_int, a->shift, a->rn, rmode); 8799 8800 if (!a->sf) { 8801 tcg_gen_ext32u_i64(tcg_int, tcg_int); 8802 } 8803 return true; 8804 } 8805 8806 TRANS(FCVTNS_g, do_fcvt_g, a, FPROUNDING_TIEEVEN, true) 8807 TRANS(FCVTNU_g, do_fcvt_g, a, FPROUNDING_TIEEVEN, false) 8808 TRANS(FCVTPS_g, do_fcvt_g, a, FPROUNDING_POSINF, true) 8809 TRANS(FCVTPU_g, do_fcvt_g, a, FPROUNDING_POSINF, false) 8810 TRANS(FCVTMS_g, do_fcvt_g, a, FPROUNDING_NEGINF, true) 8811 TRANS(FCVTMU_g, do_fcvt_g, a, FPROUNDING_NEGINF, false) 8812 TRANS(FCVTZS_g, do_fcvt_g, a, FPROUNDING_ZERO, true) 8813 TRANS(FCVTZU_g, do_fcvt_g, a, FPROUNDING_ZERO, false) 8814 TRANS(FCVTAS_g, do_fcvt_g, a, FPROUNDING_TIEAWAY, true) 8815 TRANS(FCVTAU_g, do_fcvt_g, a, FPROUNDING_TIEAWAY, false) 8816 8817 /* 8818 * FCVT* (vector), scalar version. 8819 * Which sounds weird, but really just means output to fp register 8820 * instead of output to general register. Input and output element 8821 * size are always equal. 8822 */ 8823 static bool do_fcvt_f(DisasContext *s, arg_fcvt *a, 8824 ARMFPRounding rmode, bool is_signed) 8825 { 8826 TCGv_i64 tcg_int; 8827 int check = fp_access_check_scalar_hsd(s, a->esz); 8828 8829 if (check <= 0) { 8830 return check == 0; 8831 } 8832 8833 tcg_int = tcg_temp_new_i64(); 8834 do_fcvt_scalar(s, a->esz | (is_signed ? MO_SIGN : 0), 8835 a->esz, tcg_int, a->shift, a->rn, rmode); 8836 8837 clear_vec(s, a->rd); 8838 write_vec_element(s, tcg_int, a->rd, 0, a->esz); 8839 return true; 8840 } 8841 8842 TRANS(FCVTNS_f, do_fcvt_f, a, FPROUNDING_TIEEVEN, true) 8843 TRANS(FCVTNU_f, do_fcvt_f, a, FPROUNDING_TIEEVEN, false) 8844 TRANS(FCVTPS_f, do_fcvt_f, a, FPROUNDING_POSINF, true) 8845 TRANS(FCVTPU_f, do_fcvt_f, a, FPROUNDING_POSINF, false) 8846 TRANS(FCVTMS_f, do_fcvt_f, a, FPROUNDING_NEGINF, true) 8847 TRANS(FCVTMU_f, do_fcvt_f, a, FPROUNDING_NEGINF, false) 8848 TRANS(FCVTZS_f, do_fcvt_f, a, FPROUNDING_ZERO, true) 8849 TRANS(FCVTZU_f, do_fcvt_f, a, FPROUNDING_ZERO, false) 8850 TRANS(FCVTAS_f, do_fcvt_f, a, FPROUNDING_TIEAWAY, true) 8851 TRANS(FCVTAU_f, do_fcvt_f, a, FPROUNDING_TIEAWAY, false) 8852 8853 static bool trans_FJCVTZS(DisasContext *s, arg_FJCVTZS *a) 8854 { 8855 if (!dc_isar_feature(aa64_jscvt, s)) { 8856 return false; 8857 } 8858 if (fp_access_check(s)) { 8859 TCGv_i64 t = read_fp_dreg(s, a->rn); 8860 TCGv_ptr fpstatus = fpstatus_ptr(FPST_A64); 8861 8862 gen_helper_fjcvtzs(t, t, fpstatus); 8863 8864 tcg_gen_ext32u_i64(cpu_reg(s, a->rd), t); 8865 tcg_gen_extrh_i64_i32(cpu_ZF, t); 8866 tcg_gen_movi_i32(cpu_CF, 0); 8867 tcg_gen_movi_i32(cpu_NF, 0); 8868 tcg_gen_movi_i32(cpu_VF, 0); 8869 } 8870 return true; 8871 } 8872 8873 static bool trans_FMOV_hx(DisasContext *s, arg_rr *a) 8874 { 8875 if (!dc_isar_feature(aa64_fp16, s)) { 8876 return false; 8877 } 8878 if (fp_access_check(s)) { 8879 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 8880 TCGv_i64 tmp = tcg_temp_new_i64(); 8881 tcg_gen_ext16u_i64(tmp, tcg_rn); 8882 write_fp_dreg(s, a->rd, tmp); 8883 } 8884 return true; 8885 } 8886 8887 static bool trans_FMOV_sw(DisasContext *s, arg_rr *a) 8888 { 8889 if (fp_access_check(s)) { 8890 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 8891 TCGv_i64 tmp = tcg_temp_new_i64(); 8892 tcg_gen_ext32u_i64(tmp, tcg_rn); 8893 write_fp_dreg(s, a->rd, tmp); 8894 } 8895 return true; 8896 } 8897 8898 static bool trans_FMOV_dx(DisasContext *s, arg_rr *a) 8899 { 8900 if (fp_access_check(s)) { 8901 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 8902 write_fp_dreg(s, a->rd, tcg_rn); 8903 } 8904 return true; 8905 } 8906 8907 static bool trans_FMOV_ux(DisasContext *s, arg_rr *a) 8908 { 8909 if (fp_access_check(s)) { 8910 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 8911 tcg_gen_st_i64(tcg_rn, tcg_env, fp_reg_hi_offset(s, a->rd)); 8912 clear_vec_high(s, true, a->rd); 8913 } 8914 return true; 8915 } 8916 8917 static bool trans_FMOV_xh(DisasContext *s, arg_rr *a) 8918 { 8919 if (!dc_isar_feature(aa64_fp16, s)) { 8920 return false; 8921 } 8922 if (fp_access_check(s)) { 8923 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8924 tcg_gen_ld16u_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_16)); 8925 } 8926 return true; 8927 } 8928 8929 static bool trans_FMOV_ws(DisasContext *s, arg_rr *a) 8930 { 8931 if (fp_access_check(s)) { 8932 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8933 tcg_gen_ld32u_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_32)); 8934 } 8935 return true; 8936 } 8937 8938 static bool trans_FMOV_xd(DisasContext *s, arg_rr *a) 8939 { 8940 if (fp_access_check(s)) { 8941 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8942 tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_64)); 8943 } 8944 return true; 8945 } 8946 8947 static bool trans_FMOV_xu(DisasContext *s, arg_rr *a) 8948 { 8949 if (fp_access_check(s)) { 8950 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8951 tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_hi_offset(s, a->rn)); 8952 } 8953 return true; 8954 } 8955 8956 typedef struct ENVScalar1 { 8957 NeonGenOneOpEnvFn *gen_bhs[3]; 8958 NeonGenOne64OpEnvFn *gen_d; 8959 } ENVScalar1; 8960 8961 static bool do_env_scalar1(DisasContext *s, arg_rr_e *a, const ENVScalar1 *f) 8962 { 8963 if (!fp_access_check(s)) { 8964 return true; 8965 } 8966 if (a->esz == MO_64) { 8967 TCGv_i64 t = read_fp_dreg(s, a->rn); 8968 f->gen_d(t, tcg_env, t); 8969 write_fp_dreg(s, a->rd, t); 8970 } else { 8971 TCGv_i32 t = tcg_temp_new_i32(); 8972 8973 read_vec_element_i32(s, t, a->rn, 0, a->esz); 8974 f->gen_bhs[a->esz](t, tcg_env, t); 8975 write_fp_sreg(s, a->rd, t); 8976 } 8977 return true; 8978 } 8979 8980 static bool do_env_vector1(DisasContext *s, arg_qrr_e *a, const ENVScalar1 *f) 8981 { 8982 if (a->esz == MO_64 && !a->q) { 8983 return false; 8984 } 8985 if (!fp_access_check(s)) { 8986 return true; 8987 } 8988 if (a->esz == MO_64) { 8989 TCGv_i64 t = tcg_temp_new_i64(); 8990 8991 for (int i = 0; i < 2; ++i) { 8992 read_vec_element(s, t, a->rn, i, MO_64); 8993 f->gen_d(t, tcg_env, t); 8994 write_vec_element(s, t, a->rd, i, MO_64); 8995 } 8996 } else { 8997 TCGv_i32 t = tcg_temp_new_i32(); 8998 int n = (a->q ? 16 : 8) >> a->esz; 8999 9000 for (int i = 0; i < n; ++i) { 9001 read_vec_element_i32(s, t, a->rn, i, a->esz); 9002 f->gen_bhs[a->esz](t, tcg_env, t); 9003 write_vec_element_i32(s, t, a->rd, i, a->esz); 9004 } 9005 } 9006 clear_vec_high(s, a->q, a->rd); 9007 return true; 9008 } 9009 9010 static const ENVScalar1 f_scalar_sqabs = { 9011 { gen_helper_neon_qabs_s8, 9012 gen_helper_neon_qabs_s16, 9013 gen_helper_neon_qabs_s32 }, 9014 gen_helper_neon_qabs_s64, 9015 }; 9016 TRANS(SQABS_s, do_env_scalar1, a, &f_scalar_sqabs) 9017 TRANS(SQABS_v, do_env_vector1, a, &f_scalar_sqabs) 9018 9019 static const ENVScalar1 f_scalar_sqneg = { 9020 { gen_helper_neon_qneg_s8, 9021 gen_helper_neon_qneg_s16, 9022 gen_helper_neon_qneg_s32 }, 9023 gen_helper_neon_qneg_s64, 9024 }; 9025 TRANS(SQNEG_s, do_env_scalar1, a, &f_scalar_sqneg) 9026 TRANS(SQNEG_v, do_env_vector1, a, &f_scalar_sqneg) 9027 9028 static bool do_scalar1_d(DisasContext *s, arg_rr *a, ArithOneOp *f) 9029 { 9030 if (fp_access_check(s)) { 9031 TCGv_i64 t = read_fp_dreg(s, a->rn); 9032 f(t, t); 9033 write_fp_dreg(s, a->rd, t); 9034 } 9035 return true; 9036 } 9037 9038 TRANS(ABS_s, do_scalar1_d, a, tcg_gen_abs_i64) 9039 TRANS(NEG_s, do_scalar1_d, a, tcg_gen_neg_i64) 9040 9041 static bool do_cmop0_d(DisasContext *s, arg_rr *a, TCGCond cond) 9042 { 9043 if (fp_access_check(s)) { 9044 TCGv_i64 t = read_fp_dreg(s, a->rn); 9045 tcg_gen_negsetcond_i64(cond, t, t, tcg_constant_i64(0)); 9046 write_fp_dreg(s, a->rd, t); 9047 } 9048 return true; 9049 } 9050 9051 TRANS(CMGT0_s, do_cmop0_d, a, TCG_COND_GT) 9052 TRANS(CMGE0_s, do_cmop0_d, a, TCG_COND_GE) 9053 TRANS(CMLE0_s, do_cmop0_d, a, TCG_COND_LE) 9054 TRANS(CMLT0_s, do_cmop0_d, a, TCG_COND_LT) 9055 TRANS(CMEQ0_s, do_cmop0_d, a, TCG_COND_EQ) 9056 9057 static bool do_2misc_narrow_scalar(DisasContext *s, arg_rr_e *a, 9058 ArithOneOp * const fn[3]) 9059 { 9060 if (a->esz == MO_64) { 9061 return false; 9062 } 9063 if (fp_access_check(s)) { 9064 TCGv_i64 t = tcg_temp_new_i64(); 9065 9066 read_vec_element(s, t, a->rn, 0, a->esz + 1); 9067 fn[a->esz](t, t); 9068 clear_vec(s, a->rd); 9069 write_vec_element(s, t, a->rd, 0, a->esz); 9070 } 9071 return true; 9072 } 9073 9074 #define WRAP_ENV(NAME) \ 9075 static void gen_##NAME(TCGv_i64 d, TCGv_i64 n) \ 9076 { gen_helper_##NAME(d, tcg_env, n); } 9077 9078 WRAP_ENV(neon_unarrow_sat8) 9079 WRAP_ENV(neon_unarrow_sat16) 9080 WRAP_ENV(neon_unarrow_sat32) 9081 9082 static ArithOneOp * const f_scalar_sqxtun[] = { 9083 gen_neon_unarrow_sat8, 9084 gen_neon_unarrow_sat16, 9085 gen_neon_unarrow_sat32, 9086 }; 9087 TRANS(SQXTUN_s, do_2misc_narrow_scalar, a, f_scalar_sqxtun) 9088 9089 WRAP_ENV(neon_narrow_sat_s8) 9090 WRAP_ENV(neon_narrow_sat_s16) 9091 WRAP_ENV(neon_narrow_sat_s32) 9092 9093 static ArithOneOp * const f_scalar_sqxtn[] = { 9094 gen_neon_narrow_sat_s8, 9095 gen_neon_narrow_sat_s16, 9096 gen_neon_narrow_sat_s32, 9097 }; 9098 TRANS(SQXTN_s, do_2misc_narrow_scalar, a, f_scalar_sqxtn) 9099 9100 WRAP_ENV(neon_narrow_sat_u8) 9101 WRAP_ENV(neon_narrow_sat_u16) 9102 WRAP_ENV(neon_narrow_sat_u32) 9103 9104 static ArithOneOp * const f_scalar_uqxtn[] = { 9105 gen_neon_narrow_sat_u8, 9106 gen_neon_narrow_sat_u16, 9107 gen_neon_narrow_sat_u32, 9108 }; 9109 TRANS(UQXTN_s, do_2misc_narrow_scalar, a, f_scalar_uqxtn) 9110 9111 static void gen_fcvtxn_sd(TCGv_i64 d, TCGv_i64 n) 9112 { 9113 /* 9114 * 64 bit to 32 bit float conversion 9115 * with von Neumann rounding (round to odd) 9116 */ 9117 TCGv_i32 tmp = tcg_temp_new_i32(); 9118 gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_A64)); 9119 tcg_gen_extu_i32_i64(d, tmp); 9120 } 9121 9122 static ArithOneOp * const f_scalar_fcvtxn[] = { 9123 NULL, 9124 NULL, 9125 gen_fcvtxn_sd, 9126 }; 9127 TRANS(FCVTXN_s, do_2misc_narrow_scalar, a, f_scalar_fcvtxn) 9128 9129 #undef WRAP_ENV 9130 9131 static bool do_gvec_fn2(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn) 9132 { 9133 if (!a->q && a->esz == MO_64) { 9134 return false; 9135 } 9136 if (fp_access_check(s)) { 9137 gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz); 9138 } 9139 return true; 9140 } 9141 9142 TRANS(ABS_v, do_gvec_fn2, a, tcg_gen_gvec_abs) 9143 TRANS(NEG_v, do_gvec_fn2, a, tcg_gen_gvec_neg) 9144 TRANS(NOT_v, do_gvec_fn2, a, tcg_gen_gvec_not) 9145 TRANS(CNT_v, do_gvec_fn2, a, gen_gvec_cnt) 9146 TRANS(RBIT_v, do_gvec_fn2, a, gen_gvec_rbit) 9147 TRANS(CMGT0_v, do_gvec_fn2, a, gen_gvec_cgt0) 9148 TRANS(CMGE0_v, do_gvec_fn2, a, gen_gvec_cge0) 9149 TRANS(CMLT0_v, do_gvec_fn2, a, gen_gvec_clt0) 9150 TRANS(CMLE0_v, do_gvec_fn2, a, gen_gvec_cle0) 9151 TRANS(CMEQ0_v, do_gvec_fn2, a, gen_gvec_ceq0) 9152 TRANS(REV16_v, do_gvec_fn2, a, gen_gvec_rev16) 9153 TRANS(REV32_v, do_gvec_fn2, a, gen_gvec_rev32) 9154 TRANS(URECPE_v, do_gvec_fn2, a, gen_gvec_urecpe) 9155 TRANS(URSQRTE_v, do_gvec_fn2, a, gen_gvec_ursqrte) 9156 9157 static bool do_gvec_fn2_bhs(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn) 9158 { 9159 if (a->esz == MO_64) { 9160 return false; 9161 } 9162 if (fp_access_check(s)) { 9163 gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz); 9164 } 9165 return true; 9166 } 9167 9168 TRANS(CLS_v, do_gvec_fn2_bhs, a, gen_gvec_cls) 9169 TRANS(CLZ_v, do_gvec_fn2_bhs, a, gen_gvec_clz) 9170 TRANS(REV64_v, do_gvec_fn2_bhs, a, gen_gvec_rev64) 9171 TRANS(SADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_saddlp) 9172 TRANS(UADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_uaddlp) 9173 TRANS(SADALP_v, do_gvec_fn2_bhs, a, gen_gvec_sadalp) 9174 TRANS(UADALP_v, do_gvec_fn2_bhs, a, gen_gvec_uadalp) 9175 9176 static bool do_2misc_narrow_vector(DisasContext *s, arg_qrr_e *a, 9177 ArithOneOp * const fn[3]) 9178 { 9179 if (a->esz == MO_64) { 9180 return false; 9181 } 9182 if (fp_access_check(s)) { 9183 TCGv_i64 t0 = tcg_temp_new_i64(); 9184 TCGv_i64 t1 = tcg_temp_new_i64(); 9185 9186 read_vec_element(s, t0, a->rn, 0, MO_64); 9187 read_vec_element(s, t1, a->rn, 1, MO_64); 9188 fn[a->esz](t0, t0); 9189 fn[a->esz](t1, t1); 9190 write_vec_element(s, t0, a->rd, a->q ? 2 : 0, MO_32); 9191 write_vec_element(s, t1, a->rd, a->q ? 3 : 1, MO_32); 9192 clear_vec_high(s, a->q, a->rd); 9193 } 9194 return true; 9195 } 9196 9197 static ArithOneOp * const f_scalar_xtn[] = { 9198 gen_helper_neon_narrow_u8, 9199 gen_helper_neon_narrow_u16, 9200 tcg_gen_ext32u_i64, 9201 }; 9202 TRANS(XTN, do_2misc_narrow_vector, a, f_scalar_xtn) 9203 TRANS(SQXTUN_v, do_2misc_narrow_vector, a, f_scalar_sqxtun) 9204 TRANS(SQXTN_v, do_2misc_narrow_vector, a, f_scalar_sqxtn) 9205 TRANS(UQXTN_v, do_2misc_narrow_vector, a, f_scalar_uqxtn) 9206 9207 static void gen_fcvtn_hs(TCGv_i64 d, TCGv_i64 n) 9208 { 9209 TCGv_i32 tcg_lo = tcg_temp_new_i32(); 9210 TCGv_i32 tcg_hi = tcg_temp_new_i32(); 9211 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 9212 TCGv_i32 ahp = get_ahp_flag(); 9213 9214 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, n); 9215 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp); 9216 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp); 9217 tcg_gen_deposit_i32(tcg_lo, tcg_lo, tcg_hi, 16, 16); 9218 tcg_gen_extu_i32_i64(d, tcg_lo); 9219 } 9220 9221 static void gen_fcvtn_sd(TCGv_i64 d, TCGv_i64 n) 9222 { 9223 TCGv_i32 tmp = tcg_temp_new_i32(); 9224 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 9225 9226 gen_helper_vfp_fcvtsd(tmp, n, fpst); 9227 tcg_gen_extu_i32_i64(d, tmp); 9228 } 9229 9230 static ArithOneOp * const f_vector_fcvtn[] = { 9231 NULL, 9232 gen_fcvtn_hs, 9233 gen_fcvtn_sd, 9234 }; 9235 TRANS(FCVTN_v, do_2misc_narrow_vector, a, f_vector_fcvtn) 9236 TRANS(FCVTXN_v, do_2misc_narrow_vector, a, f_scalar_fcvtxn) 9237 9238 static void gen_bfcvtn_hs(TCGv_i64 d, TCGv_i64 n) 9239 { 9240 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 9241 TCGv_i32 tmp = tcg_temp_new_i32(); 9242 gen_helper_bfcvt_pair(tmp, n, fpst); 9243 tcg_gen_extu_i32_i64(d, tmp); 9244 } 9245 9246 static ArithOneOp * const f_vector_bfcvtn[] = { 9247 NULL, 9248 gen_bfcvtn_hs, 9249 NULL, 9250 }; 9251 TRANS_FEAT(BFCVTN_v, aa64_bf16, do_2misc_narrow_vector, a, f_vector_bfcvtn) 9252 9253 static bool trans_SHLL_v(DisasContext *s, arg_qrr_e *a) 9254 { 9255 static NeonGenWidenFn * const widenfns[3] = { 9256 gen_helper_neon_widen_u8, 9257 gen_helper_neon_widen_u16, 9258 tcg_gen_extu_i32_i64, 9259 }; 9260 NeonGenWidenFn *widenfn; 9261 TCGv_i64 tcg_res[2]; 9262 TCGv_i32 tcg_op; 9263 int part, pass; 9264 9265 if (a->esz == MO_64) { 9266 return false; 9267 } 9268 if (!fp_access_check(s)) { 9269 return true; 9270 } 9271 9272 tcg_op = tcg_temp_new_i32(); 9273 widenfn = widenfns[a->esz]; 9274 part = a->q ? 2 : 0; 9275 9276 for (pass = 0; pass < 2; pass++) { 9277 read_vec_element_i32(s, tcg_op, a->rn, part + pass, MO_32); 9278 tcg_res[pass] = tcg_temp_new_i64(); 9279 widenfn(tcg_res[pass], tcg_op); 9280 tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << a->esz); 9281 } 9282 9283 for (pass = 0; pass < 2; pass++) { 9284 write_vec_element(s, tcg_res[pass], a->rd, pass, MO_64); 9285 } 9286 return true; 9287 } 9288 9289 static bool do_fabs_fneg_v(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn) 9290 { 9291 int check = fp_access_check_vector_hsd(s, a->q, a->esz); 9292 9293 if (check <= 0) { 9294 return check == 0; 9295 } 9296 9297 gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz); 9298 return true; 9299 } 9300 9301 TRANS(FABS_v, do_fabs_fneg_v, a, gen_gvec_fabs) 9302 TRANS(FNEG_v, do_fabs_fneg_v, a, gen_gvec_fneg) 9303 9304 static bool do_fp1_vector(DisasContext *s, arg_qrr_e *a, 9305 const FPScalar1 *f, int rmode) 9306 { 9307 TCGv_i32 tcg_rmode = NULL; 9308 TCGv_ptr fpst; 9309 int check = fp_access_check_vector_hsd(s, a->q, a->esz); 9310 9311 if (check <= 0) { 9312 return check == 0; 9313 } 9314 9315 fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); 9316 if (rmode >= 0) { 9317 tcg_rmode = gen_set_rmode(rmode, fpst); 9318 } 9319 9320 if (a->esz == MO_64) { 9321 TCGv_i64 t64 = tcg_temp_new_i64(); 9322 9323 for (int pass = 0; pass < 2; ++pass) { 9324 read_vec_element(s, t64, a->rn, pass, MO_64); 9325 f->gen_d(t64, t64, fpst); 9326 write_vec_element(s, t64, a->rd, pass, MO_64); 9327 } 9328 } else { 9329 TCGv_i32 t32 = tcg_temp_new_i32(); 9330 void (*gen)(TCGv_i32, TCGv_i32, TCGv_ptr) 9331 = (a->esz == MO_16 ? f->gen_h : f->gen_s); 9332 9333 for (int pass = 0, n = (a->q ? 16 : 8) >> a->esz; pass < n; ++pass) { 9334 read_vec_element_i32(s, t32, a->rn, pass, a->esz); 9335 gen(t32, t32, fpst); 9336 write_vec_element_i32(s, t32, a->rd, pass, a->esz); 9337 } 9338 } 9339 clear_vec_high(s, a->q, a->rd); 9340 9341 if (rmode >= 0) { 9342 gen_restore_rmode(tcg_rmode, fpst); 9343 } 9344 return true; 9345 } 9346 9347 TRANS(FSQRT_v, do_fp1_vector, a, &f_scalar_fsqrt, -1) 9348 9349 TRANS(FRINTN_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_TIEEVEN) 9350 TRANS(FRINTP_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_POSINF) 9351 TRANS(FRINTM_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_NEGINF) 9352 TRANS(FRINTZ_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_ZERO) 9353 TRANS(FRINTA_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_TIEAWAY) 9354 TRANS(FRINTI_v, do_fp1_vector, a, &f_scalar_frint, -1) 9355 TRANS(FRINTX_v, do_fp1_vector, a, &f_scalar_frintx, -1) 9356 9357 TRANS_FEAT(FRINT32Z_v, aa64_frint, do_fp1_vector, a, 9358 &f_scalar_frint32, FPROUNDING_ZERO) 9359 TRANS_FEAT(FRINT32X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint32, -1) 9360 TRANS_FEAT(FRINT64Z_v, aa64_frint, do_fp1_vector, a, 9361 &f_scalar_frint64, FPROUNDING_ZERO) 9362 TRANS_FEAT(FRINT64X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint64, -1) 9363 9364 static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q, 9365 int rd, int rn, int data, 9366 gen_helper_gvec_2_ptr * const fns[3]) 9367 { 9368 int check = fp_access_check_vector_hsd(s, is_q, esz); 9369 TCGv_ptr fpst; 9370 9371 if (check <= 0) { 9372 return check == 0; 9373 } 9374 9375 fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); 9376 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd), 9377 vec_full_reg_offset(s, rn), fpst, 9378 is_q ? 16 : 8, vec_full_reg_size(s), 9379 data, fns[esz - 1]); 9380 return true; 9381 } 9382 9383 static gen_helper_gvec_2_ptr * const f_scvtf_v[] = { 9384 gen_helper_gvec_vcvt_sh, 9385 gen_helper_gvec_vcvt_sf, 9386 gen_helper_gvec_vcvt_sd, 9387 }; 9388 TRANS(SCVTF_vi, do_gvec_op2_fpst, 9389 a->esz, a->q, a->rd, a->rn, 0, f_scvtf_v) 9390 TRANS(SCVTF_vf, do_gvec_op2_fpst, 9391 a->esz, a->q, a->rd, a->rn, a->shift, f_scvtf_v) 9392 9393 static gen_helper_gvec_2_ptr * const f_ucvtf_v[] = { 9394 gen_helper_gvec_vcvt_uh, 9395 gen_helper_gvec_vcvt_uf, 9396 gen_helper_gvec_vcvt_ud, 9397 }; 9398 TRANS(UCVTF_vi, do_gvec_op2_fpst, 9399 a->esz, a->q, a->rd, a->rn, 0, f_ucvtf_v) 9400 TRANS(UCVTF_vf, do_gvec_op2_fpst, 9401 a->esz, a->q, a->rd, a->rn, a->shift, f_ucvtf_v) 9402 9403 static gen_helper_gvec_2_ptr * const f_fcvtzs_vf[] = { 9404 gen_helper_gvec_vcvt_rz_hs, 9405 gen_helper_gvec_vcvt_rz_fs, 9406 gen_helper_gvec_vcvt_rz_ds, 9407 }; 9408 TRANS(FCVTZS_vf, do_gvec_op2_fpst, 9409 a->esz, a->q, a->rd, a->rn, a->shift, f_fcvtzs_vf) 9410 9411 static gen_helper_gvec_2_ptr * const f_fcvtzu_vf[] = { 9412 gen_helper_gvec_vcvt_rz_hu, 9413 gen_helper_gvec_vcvt_rz_fu, 9414 gen_helper_gvec_vcvt_rz_du, 9415 }; 9416 TRANS(FCVTZU_vf, do_gvec_op2_fpst, 9417 a->esz, a->q, a->rd, a->rn, a->shift, f_fcvtzu_vf) 9418 9419 static gen_helper_gvec_2_ptr * const f_fcvt_s_vi[] = { 9420 gen_helper_gvec_vcvt_rm_sh, 9421 gen_helper_gvec_vcvt_rm_ss, 9422 gen_helper_gvec_vcvt_rm_sd, 9423 }; 9424 9425 static gen_helper_gvec_2_ptr * const f_fcvt_u_vi[] = { 9426 gen_helper_gvec_vcvt_rm_uh, 9427 gen_helper_gvec_vcvt_rm_us, 9428 gen_helper_gvec_vcvt_rm_ud, 9429 }; 9430 9431 TRANS(FCVTNS_vi, do_gvec_op2_fpst, 9432 a->esz, a->q, a->rd, a->rn, float_round_nearest_even, f_fcvt_s_vi) 9433 TRANS(FCVTNU_vi, do_gvec_op2_fpst, 9434 a->esz, a->q, a->rd, a->rn, float_round_nearest_even, f_fcvt_u_vi) 9435 TRANS(FCVTPS_vi, do_gvec_op2_fpst, 9436 a->esz, a->q, a->rd, a->rn, float_round_up, f_fcvt_s_vi) 9437 TRANS(FCVTPU_vi, do_gvec_op2_fpst, 9438 a->esz, a->q, a->rd, a->rn, float_round_up, f_fcvt_u_vi) 9439 TRANS(FCVTMS_vi, do_gvec_op2_fpst, 9440 a->esz, a->q, a->rd, a->rn, float_round_down, f_fcvt_s_vi) 9441 TRANS(FCVTMU_vi, do_gvec_op2_fpst, 9442 a->esz, a->q, a->rd, a->rn, float_round_down, f_fcvt_u_vi) 9443 TRANS(FCVTZS_vi, do_gvec_op2_fpst, 9444 a->esz, a->q, a->rd, a->rn, float_round_to_zero, f_fcvt_s_vi) 9445 TRANS(FCVTZU_vi, do_gvec_op2_fpst, 9446 a->esz, a->q, a->rd, a->rn, float_round_to_zero, f_fcvt_u_vi) 9447 TRANS(FCVTAS_vi, do_gvec_op2_fpst, 9448 a->esz, a->q, a->rd, a->rn, float_round_ties_away, f_fcvt_s_vi) 9449 TRANS(FCVTAU_vi, do_gvec_op2_fpst, 9450 a->esz, a->q, a->rd, a->rn, float_round_ties_away, f_fcvt_u_vi) 9451 9452 static gen_helper_gvec_2_ptr * const f_fceq0[] = { 9453 gen_helper_gvec_fceq0_h, 9454 gen_helper_gvec_fceq0_s, 9455 gen_helper_gvec_fceq0_d, 9456 }; 9457 TRANS(FCMEQ0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fceq0) 9458 9459 static gen_helper_gvec_2_ptr * const f_fcgt0[] = { 9460 gen_helper_gvec_fcgt0_h, 9461 gen_helper_gvec_fcgt0_s, 9462 gen_helper_gvec_fcgt0_d, 9463 }; 9464 TRANS(FCMGT0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcgt0) 9465 9466 static gen_helper_gvec_2_ptr * const f_fcge0[] = { 9467 gen_helper_gvec_fcge0_h, 9468 gen_helper_gvec_fcge0_s, 9469 gen_helper_gvec_fcge0_d, 9470 }; 9471 TRANS(FCMGE0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcge0) 9472 9473 static gen_helper_gvec_2_ptr * const f_fclt0[] = { 9474 gen_helper_gvec_fclt0_h, 9475 gen_helper_gvec_fclt0_s, 9476 gen_helper_gvec_fclt0_d, 9477 }; 9478 TRANS(FCMLT0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fclt0) 9479 9480 static gen_helper_gvec_2_ptr * const f_fcle0[] = { 9481 gen_helper_gvec_fcle0_h, 9482 gen_helper_gvec_fcle0_s, 9483 gen_helper_gvec_fcle0_d, 9484 }; 9485 TRANS(FCMLE0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcle0) 9486 9487 static gen_helper_gvec_2_ptr * const f_frecpe[] = { 9488 gen_helper_gvec_frecpe_h, 9489 gen_helper_gvec_frecpe_s, 9490 gen_helper_gvec_frecpe_d, 9491 }; 9492 TRANS(FRECPE_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frecpe) 9493 9494 static gen_helper_gvec_2_ptr * const f_frsqrte[] = { 9495 gen_helper_gvec_frsqrte_h, 9496 gen_helper_gvec_frsqrte_s, 9497 gen_helper_gvec_frsqrte_d, 9498 }; 9499 TRANS(FRSQRTE_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frsqrte) 9500 9501 static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a) 9502 { 9503 /* Handle 2-reg-misc ops which are widening (so each size element 9504 * in the source becomes a 2*size element in the destination. 9505 * The only instruction like this is FCVTL. 9506 */ 9507 int pass; 9508 TCGv_ptr fpst; 9509 9510 if (!fp_access_check(s)) { 9511 return true; 9512 } 9513 9514 if (a->esz == MO_64) { 9515 /* 32 -> 64 bit fp conversion */ 9516 TCGv_i64 tcg_res[2]; 9517 TCGv_i32 tcg_op = tcg_temp_new_i32(); 9518 int srcelt = a->q ? 2 : 0; 9519 9520 fpst = fpstatus_ptr(FPST_A64); 9521 9522 for (pass = 0; pass < 2; pass++) { 9523 tcg_res[pass] = tcg_temp_new_i64(); 9524 read_vec_element_i32(s, tcg_op, a->rn, srcelt + pass, MO_32); 9525 gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, fpst); 9526 } 9527 for (pass = 0; pass < 2; pass++) { 9528 write_vec_element(s, tcg_res[pass], a->rd, pass, MO_64); 9529 } 9530 } else { 9531 /* 16 -> 32 bit fp conversion */ 9532 int srcelt = a->q ? 4 : 0; 9533 TCGv_i32 tcg_res[4]; 9534 TCGv_i32 ahp = get_ahp_flag(); 9535 9536 fpst = fpstatus_ptr(FPST_A64_F16); 9537 9538 for (pass = 0; pass < 4; pass++) { 9539 tcg_res[pass] = tcg_temp_new_i32(); 9540 read_vec_element_i32(s, tcg_res[pass], a->rn, srcelt + pass, MO_16); 9541 gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass], 9542 fpst, ahp); 9543 } 9544 for (pass = 0; pass < 4; pass++) { 9545 write_vec_element_i32(s, tcg_res[pass], a->rd, pass, MO_32); 9546 } 9547 } 9548 clear_vec_high(s, true, a->rd); 9549 return true; 9550 } 9551 9552 static bool trans_OK(DisasContext *s, arg_OK *a) 9553 { 9554 return true; 9555 } 9556 9557 static bool trans_FAIL(DisasContext *s, arg_OK *a) 9558 { 9559 s->is_nonstreaming = true; 9560 return true; 9561 } 9562 9563 /** 9564 * btype_destination_ok: 9565 * @insn: The instruction at the branch destination 9566 * @bt: SCTLR_ELx.BT 9567 * @btype: PSTATE.BTYPE, and is non-zero 9568 * 9569 * On a guarded page, there are a limited number of insns 9570 * that may be present at the branch target: 9571 * - branch target identifiers, 9572 * - paciasp, pacibsp, 9573 * - BRK insn 9574 * - HLT insn 9575 * Anything else causes a Branch Target Exception. 9576 * 9577 * Return true if the branch is compatible, false to raise BTITRAP. 9578 */ 9579 static bool btype_destination_ok(uint32_t insn, bool bt, int btype) 9580 { 9581 if ((insn & 0xfffff01fu) == 0xd503201fu) { 9582 /* HINT space */ 9583 switch (extract32(insn, 5, 7)) { 9584 case 0b011001: /* PACIASP */ 9585 case 0b011011: /* PACIBSP */ 9586 /* 9587 * If SCTLR_ELx.BT, then PACI*SP are not compatible 9588 * with btype == 3. Otherwise all btype are ok. 9589 */ 9590 return !bt || btype != 3; 9591 case 0b100000: /* BTI */ 9592 /* Not compatible with any btype. */ 9593 return false; 9594 case 0b100010: /* BTI c */ 9595 /* Not compatible with btype == 3 */ 9596 return btype != 3; 9597 case 0b100100: /* BTI j */ 9598 /* Not compatible with btype == 2 */ 9599 return btype != 2; 9600 case 0b100110: /* BTI jc */ 9601 /* Compatible with any btype. */ 9602 return true; 9603 } 9604 } else { 9605 switch (insn & 0xffe0001fu) { 9606 case 0xd4200000u: /* BRK */ 9607 case 0xd4400000u: /* HLT */ 9608 /* Give priority to the breakpoint exception. */ 9609 return true; 9610 } 9611 } 9612 return false; 9613 } 9614 9615 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, 9616 CPUState *cpu) 9617 { 9618 DisasContext *dc = container_of(dcbase, DisasContext, base); 9619 CPUARMState *env = cpu_env(cpu); 9620 ARMCPU *arm_cpu = env_archcpu(env); 9621 CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb); 9622 int bound, core_mmu_idx; 9623 9624 dc->isar = &arm_cpu->isar; 9625 dc->condjmp = 0; 9626 dc->pc_save = dc->base.pc_first; 9627 dc->aarch64 = true; 9628 dc->thumb = false; 9629 dc->sctlr_b = 0; 9630 dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE; 9631 dc->condexec_mask = 0; 9632 dc->condexec_cond = 0; 9633 core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX); 9634 dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx); 9635 dc->tbii = EX_TBFLAG_A64(tb_flags, TBII); 9636 dc->tbid = EX_TBFLAG_A64(tb_flags, TBID); 9637 dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA); 9638 dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx); 9639 #if !defined(CONFIG_USER_ONLY) 9640 dc->user = (dc->current_el == 0); 9641 #endif 9642 dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL); 9643 dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM); 9644 dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL); 9645 dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE); 9646 dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC); 9647 dc->trap_eret = EX_TBFLAG_A64(tb_flags, TRAP_ERET); 9648 dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL); 9649 dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL); 9650 dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16; 9651 dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16; 9652 dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE); 9653 dc->bt = EX_TBFLAG_A64(tb_flags, BT); 9654 dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE); 9655 dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV); 9656 dc->ata[0] = EX_TBFLAG_A64(tb_flags, ATA); 9657 dc->ata[1] = EX_TBFLAG_A64(tb_flags, ATA0); 9658 dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE); 9659 dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE); 9660 dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM); 9661 dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA); 9662 dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING); 9663 dc->naa = EX_TBFLAG_A64(tb_flags, NAA); 9664 dc->nv = EX_TBFLAG_A64(tb_flags, NV); 9665 dc->nv1 = EX_TBFLAG_A64(tb_flags, NV1); 9666 dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2); 9667 dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20); 9668 dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE); 9669 dc->vec_len = 0; 9670 dc->vec_stride = 0; 9671 dc->cp_regs = arm_cpu->cp_regs; 9672 dc->features = env->features; 9673 dc->dcz_blocksize = arm_cpu->dcz_blocksize; 9674 dc->gm_blocksize = arm_cpu->gm_blocksize; 9675 9676 #ifdef CONFIG_USER_ONLY 9677 /* In sve_probe_page, we assume TBI is enabled. */ 9678 tcg_debug_assert(dc->tbid & 1); 9679 #endif 9680 9681 dc->lse2 = dc_isar_feature(aa64_lse2, dc); 9682 9683 /* Single step state. The code-generation logic here is: 9684 * SS_ACTIVE == 0: 9685 * generate code with no special handling for single-stepping (except 9686 * that anything that can make us go to SS_ACTIVE == 1 must end the TB; 9687 * this happens anyway because those changes are all system register or 9688 * PSTATE writes). 9689 * SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending) 9690 * emit code for one insn 9691 * emit code to clear PSTATE.SS 9692 * emit code to generate software step exception for completed step 9693 * end TB (as usual for having generated an exception) 9694 * SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending) 9695 * emit code to generate a software step exception 9696 * end the TB 9697 */ 9698 dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE); 9699 dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS); 9700 dc->is_ldex = false; 9701 9702 /* Bound the number of insns to execute to those left on the page. */ 9703 bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4; 9704 9705 /* If architectural single step active, limit to 1. */ 9706 if (dc->ss_active) { 9707 bound = 1; 9708 } 9709 dc->base.max_insns = MIN(dc->base.max_insns, bound); 9710 } 9711 9712 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu) 9713 { 9714 } 9715 9716 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) 9717 { 9718 DisasContext *dc = container_of(dcbase, DisasContext, base); 9719 target_ulong pc_arg = dc->base.pc_next; 9720 9721 if (tb_cflags(dcbase->tb) & CF_PCREL) { 9722 pc_arg &= ~TARGET_PAGE_MASK; 9723 } 9724 tcg_gen_insn_start(pc_arg, 0, 0); 9725 dc->insn_start_updated = false; 9726 } 9727 9728 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) 9729 { 9730 DisasContext *s = container_of(dcbase, DisasContext, base); 9731 CPUARMState *env = cpu_env(cpu); 9732 uint64_t pc = s->base.pc_next; 9733 uint32_t insn; 9734 9735 /* Singlestep exceptions have the highest priority. */ 9736 if (s->ss_active && !s->pstate_ss) { 9737 /* Singlestep state is Active-pending. 9738 * If we're in this state at the start of a TB then either 9739 * a) we just took an exception to an EL which is being debugged 9740 * and this is the first insn in the exception handler 9741 * b) debug exceptions were masked and we just unmasked them 9742 * without changing EL (eg by clearing PSTATE.D) 9743 * In either case we're going to take a swstep exception in the 9744 * "did not step an insn" case, and so the syndrome ISV and EX 9745 * bits should be zero. 9746 */ 9747 assert(s->base.num_insns == 1); 9748 gen_swstep_exception(s, 0, 0); 9749 s->base.is_jmp = DISAS_NORETURN; 9750 s->base.pc_next = pc + 4; 9751 return; 9752 } 9753 9754 if (pc & 3) { 9755 /* 9756 * PC alignment fault. This has priority over the instruction abort 9757 * that we would receive from a translation fault via arm_ldl_code. 9758 * This should only be possible after an indirect branch, at the 9759 * start of the TB. 9760 */ 9761 assert(s->base.num_insns == 1); 9762 gen_helper_exception_pc_alignment(tcg_env, tcg_constant_tl(pc)); 9763 s->base.is_jmp = DISAS_NORETURN; 9764 s->base.pc_next = QEMU_ALIGN_UP(pc, 4); 9765 return; 9766 } 9767 9768 s->pc_curr = pc; 9769 insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b); 9770 s->insn = insn; 9771 s->base.pc_next = pc + 4; 9772 9773 s->fp_access_checked = false; 9774 s->sve_access_checked = false; 9775 9776 if (s->pstate_il) { 9777 /* 9778 * Illegal execution state. This has priority over BTI 9779 * exceptions, but comes after instruction abort exceptions. 9780 */ 9781 gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate()); 9782 return; 9783 } 9784 9785 if (dc_isar_feature(aa64_bti, s)) { 9786 if (s->base.num_insns == 1) { 9787 /* First insn can have btype set to non-zero. */ 9788 tcg_debug_assert(s->btype >= 0); 9789 9790 /* 9791 * Note that the Branch Target Exception has fairly high 9792 * priority -- below debugging exceptions but above most 9793 * everything else. This allows us to handle this now 9794 * instead of waiting until the insn is otherwise decoded. 9795 * 9796 * We can check all but the guarded page check here; 9797 * defer the latter to a helper. 9798 */ 9799 if (s->btype != 0 9800 && !btype_destination_ok(insn, s->bt, s->btype)) { 9801 gen_helper_guarded_page_check(tcg_env); 9802 } 9803 } else { 9804 /* Not the first insn: btype must be 0. */ 9805 tcg_debug_assert(s->btype == 0); 9806 } 9807 } 9808 9809 s->is_nonstreaming = false; 9810 if (s->sme_trap_nonstreaming) { 9811 disas_sme_fa64(s, insn); 9812 } 9813 9814 if (!disas_a64(s, insn) && 9815 !disas_sme(s, insn) && 9816 !disas_sve(s, insn)) { 9817 unallocated_encoding(s); 9818 } 9819 9820 /* 9821 * After execution of most insns, btype is reset to 0. 9822 * Note that we set btype == -1 when the insn sets btype. 9823 */ 9824 if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) { 9825 reset_btype(s); 9826 } 9827 } 9828 9829 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) 9830 { 9831 DisasContext *dc = container_of(dcbase, DisasContext, base); 9832 9833 if (unlikely(dc->ss_active)) { 9834 /* Note that this means single stepping WFI doesn't halt the CPU. 9835 * For conditional branch insns this is harmless unreachable code as 9836 * gen_goto_tb() has already handled emitting the debug exception 9837 * (and thus a tb-jump is not possible when singlestepping). 9838 */ 9839 switch (dc->base.is_jmp) { 9840 default: 9841 gen_a64_update_pc(dc, 4); 9842 /* fall through */ 9843 case DISAS_EXIT: 9844 case DISAS_JUMP: 9845 gen_step_complete_exception(dc); 9846 break; 9847 case DISAS_NORETURN: 9848 break; 9849 } 9850 } else { 9851 switch (dc->base.is_jmp) { 9852 case DISAS_NEXT: 9853 case DISAS_TOO_MANY: 9854 gen_goto_tb(dc, 1, 4); 9855 break; 9856 default: 9857 case DISAS_UPDATE_EXIT: 9858 gen_a64_update_pc(dc, 4); 9859 /* fall through */ 9860 case DISAS_EXIT: 9861 tcg_gen_exit_tb(NULL, 0); 9862 break; 9863 case DISAS_UPDATE_NOCHAIN: 9864 gen_a64_update_pc(dc, 4); 9865 /* fall through */ 9866 case DISAS_JUMP: 9867 tcg_gen_lookup_and_goto_ptr(); 9868 break; 9869 case DISAS_NORETURN: 9870 case DISAS_SWI: 9871 break; 9872 case DISAS_WFE: 9873 gen_a64_update_pc(dc, 4); 9874 gen_helper_wfe(tcg_env); 9875 break; 9876 case DISAS_YIELD: 9877 gen_a64_update_pc(dc, 4); 9878 gen_helper_yield(tcg_env); 9879 break; 9880 case DISAS_WFI: 9881 /* 9882 * This is a special case because we don't want to just halt 9883 * the CPU if trying to debug across a WFI. 9884 */ 9885 gen_a64_update_pc(dc, 4); 9886 gen_helper_wfi(tcg_env, tcg_constant_i32(4)); 9887 /* 9888 * The helper doesn't necessarily throw an exception, but we 9889 * must go back to the main loop to check for interrupts anyway. 9890 */ 9891 tcg_gen_exit_tb(NULL, 0); 9892 break; 9893 } 9894 } 9895 } 9896 9897 const TranslatorOps aarch64_translator_ops = { 9898 .init_disas_context = aarch64_tr_init_disas_context, 9899 .tb_start = aarch64_tr_tb_start, 9900 .insn_start = aarch64_tr_insn_start, 9901 .translate_insn = aarch64_tr_translate_insn, 9902 .tb_stop = aarch64_tr_tb_stop, 9903 }; 9904