1 /* 2 * AArch64 translation 3 * 4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de> 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 #include "qemu/osdep.h" 20 #include "exec/target_page.h" 21 #include "translate.h" 22 #include "translate-a64.h" 23 #include "qemu/log.h" 24 #include "arm_ldst.h" 25 #include "semihosting/semihost.h" 26 #include "cpregs.h" 27 28 static TCGv_i64 cpu_X[32]; 29 static TCGv_i64 cpu_pc; 30 31 /* Load/store exclusive handling */ 32 static TCGv_i64 cpu_exclusive_high; 33 34 static const char *regnames[] = { 35 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 36 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 37 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 38 "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp" 39 }; 40 41 enum a64_shift_type { 42 A64_SHIFT_TYPE_LSL = 0, 43 A64_SHIFT_TYPE_LSR = 1, 44 A64_SHIFT_TYPE_ASR = 2, 45 A64_SHIFT_TYPE_ROR = 3 46 }; 47 48 /* 49 * Helpers for extracting complex instruction fields 50 */ 51 52 /* 53 * For load/store with an unsigned 12 bit immediate scaled by the element 54 * size. The input has the immediate field in bits [14:3] and the element 55 * size in [2:0]. 56 */ 57 static int uimm_scaled(DisasContext *s, int x) 58 { 59 unsigned imm = x >> 3; 60 unsigned scale = extract32(x, 0, 3); 61 return imm << scale; 62 } 63 64 /* For load/store memory tags: scale offset by LOG2_TAG_GRANULE */ 65 static int scale_by_log2_tag_granule(DisasContext *s, int x) 66 { 67 return x << LOG2_TAG_GRANULE; 68 } 69 70 /* 71 * Include the generated decoders. 72 */ 73 74 #include "decode-sme-fa64.c.inc" 75 #include "decode-a64.c.inc" 76 77 /* initialize TCG globals. */ 78 void a64_translate_init(void) 79 { 80 int i; 81 82 cpu_pc = tcg_global_mem_new_i64(tcg_env, 83 offsetof(CPUARMState, pc), 84 "pc"); 85 for (i = 0; i < 32; i++) { 86 cpu_X[i] = tcg_global_mem_new_i64(tcg_env, 87 offsetof(CPUARMState, xregs[i]), 88 regnames[i]); 89 } 90 91 cpu_exclusive_high = tcg_global_mem_new_i64(tcg_env, 92 offsetof(CPUARMState, exclusive_high), "exclusive_high"); 93 } 94 95 /* 96 * Return the core mmu_idx to use for A64 load/store insns which 97 * have a "unprivileged load/store" variant. Those insns access 98 * EL0 if executed from an EL which has control over EL0 (usually 99 * EL1) but behave like normal loads and stores if executed from 100 * elsewhere (eg EL3). 101 * 102 * @unpriv : true for the unprivileged encoding; false for the 103 * normal encoding (in which case we will return the same 104 * thing as get_mem_index(). 105 */ 106 static int get_a64_user_mem_index(DisasContext *s, bool unpriv) 107 { 108 /* 109 * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL, 110 * which is the usual mmu_idx for this cpu state. 111 */ 112 ARMMMUIdx useridx = s->mmu_idx; 113 114 if (unpriv && s->unpriv) { 115 /* 116 * We have pre-computed the condition for AccType_UNPRIV. 117 * Therefore we should never get here with a mmu_idx for 118 * which we do not know the corresponding user mmu_idx. 119 */ 120 switch (useridx) { 121 case ARMMMUIdx_E10_1: 122 case ARMMMUIdx_E10_1_PAN: 123 useridx = ARMMMUIdx_E10_0; 124 break; 125 case ARMMMUIdx_E20_2: 126 case ARMMMUIdx_E20_2_PAN: 127 useridx = ARMMMUIdx_E20_0; 128 break; 129 default: 130 g_assert_not_reached(); 131 } 132 } 133 return arm_to_core_mmu_idx(useridx); 134 } 135 136 static void set_btype_raw(int val) 137 { 138 tcg_gen_st_i32(tcg_constant_i32(val), tcg_env, 139 offsetof(CPUARMState, btype)); 140 } 141 142 static void set_btype(DisasContext *s, int val) 143 { 144 /* BTYPE is a 2-bit field, and 0 should be done with reset_btype. */ 145 tcg_debug_assert(val >= 1 && val <= 3); 146 set_btype_raw(val); 147 s->btype = -1; 148 } 149 150 static void reset_btype(DisasContext *s) 151 { 152 if (s->btype != 0) { 153 set_btype_raw(0); 154 s->btype = 0; 155 } 156 } 157 158 static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff) 159 { 160 assert(s->pc_save != -1); 161 if (tb_cflags(s->base.tb) & CF_PCREL) { 162 tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff); 163 } else { 164 tcg_gen_movi_i64(dest, s->pc_curr + diff); 165 } 166 } 167 168 void gen_a64_update_pc(DisasContext *s, target_long diff) 169 { 170 gen_pc_plus_diff(s, cpu_pc, diff); 171 s->pc_save = s->pc_curr + diff; 172 } 173 174 /* 175 * Handle Top Byte Ignore (TBI) bits. 176 * 177 * If address tagging is enabled via the TCR TBI bits: 178 * + for EL2 and EL3 there is only one TBI bit, and if it is set 179 * then the address is zero-extended, clearing bits [63:56] 180 * + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0 181 * and TBI1 controls addresses with bit 55 == 1. 182 * If the appropriate TBI bit is set for the address then 183 * the address is sign-extended from bit 55 into bits [63:56] 184 * 185 * Here We have concatenated TBI{1,0} into tbi. 186 */ 187 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst, 188 TCGv_i64 src, int tbi) 189 { 190 if (tbi == 0) { 191 /* Load unmodified address */ 192 tcg_gen_mov_i64(dst, src); 193 } else if (!regime_has_2_ranges(s->mmu_idx)) { 194 /* Force tag byte to all zero */ 195 tcg_gen_extract_i64(dst, src, 0, 56); 196 } else { 197 /* Sign-extend from bit 55. */ 198 tcg_gen_sextract_i64(dst, src, 0, 56); 199 200 switch (tbi) { 201 case 1: 202 /* tbi0 but !tbi1: only use the extension if positive */ 203 tcg_gen_and_i64(dst, dst, src); 204 break; 205 case 2: 206 /* !tbi0 but tbi1: only use the extension if negative */ 207 tcg_gen_or_i64(dst, dst, src); 208 break; 209 case 3: 210 /* tbi0 and tbi1: always use the extension */ 211 break; 212 default: 213 g_assert_not_reached(); 214 } 215 } 216 } 217 218 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src) 219 { 220 /* 221 * If address tagging is enabled for instructions via the TCR TBI bits, 222 * then loading an address into the PC will clear out any tag. 223 */ 224 gen_top_byte_ignore(s, cpu_pc, src, s->tbii); 225 s->pc_save = -1; 226 } 227 228 /* 229 * Handle MTE and/or TBI. 230 * 231 * For TBI, ideally, we would do nothing. Proper behaviour on fault is 232 * for the tag to be present in the FAR_ELx register. But for user-only 233 * mode we do not have a TLB with which to implement this, so we must 234 * remove the top byte now. 235 * 236 * Always return a fresh temporary that we can increment independently 237 * of the write-back address. 238 */ 239 240 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr) 241 { 242 TCGv_i64 clean = tcg_temp_new_i64(); 243 #ifdef CONFIG_USER_ONLY 244 gen_top_byte_ignore(s, clean, addr, s->tbid); 245 #else 246 tcg_gen_mov_i64(clean, addr); 247 #endif 248 return clean; 249 } 250 251 /* Insert a zero tag into src, with the result at dst. */ 252 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src) 253 { 254 tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4)); 255 } 256 257 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr, 258 MMUAccessType acc, int log2_size) 259 { 260 gen_helper_probe_access(tcg_env, ptr, 261 tcg_constant_i32(acc), 262 tcg_constant_i32(get_mem_index(s)), 263 tcg_constant_i32(1 << log2_size)); 264 } 265 266 /* 267 * For MTE, check a single logical or atomic access. This probes a single 268 * address, the exact one specified. The size and alignment of the access 269 * is not relevant to MTE, per se, but watchpoints do require the size, 270 * and we want to recognize those before making any other changes to state. 271 */ 272 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr, 273 bool is_write, bool tag_checked, 274 MemOp memop, bool is_unpriv, 275 int core_idx) 276 { 277 if (tag_checked && s->mte_active[is_unpriv]) { 278 TCGv_i64 ret; 279 int desc = 0; 280 281 desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx); 282 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 283 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 284 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 285 desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(memop)); 286 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, memop_size(memop) - 1); 287 288 ret = tcg_temp_new_i64(); 289 gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr); 290 291 return ret; 292 } 293 return clean_data_tbi(s, addr); 294 } 295 296 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, 297 bool tag_checked, MemOp memop) 298 { 299 return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, memop, 300 false, get_mem_index(s)); 301 } 302 303 /* 304 * For MTE, check multiple logical sequential accesses. 305 */ 306 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, 307 bool tag_checked, int total_size, MemOp single_mop) 308 { 309 if (tag_checked && s->mte_active[0]) { 310 TCGv_i64 ret; 311 int desc = 0; 312 313 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 314 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 315 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 316 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 317 desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(single_mop)); 318 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, total_size - 1); 319 320 ret = tcg_temp_new_i64(); 321 gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr); 322 323 return ret; 324 } 325 return clean_data_tbi(s, addr); 326 } 327 328 /* 329 * Generate the special alignment check that applies to AccType_ATOMIC 330 * and AccType_ORDERED insns under FEAT_LSE2: the access need not be 331 * naturally aligned, but it must not cross a 16-byte boundary. 332 * See AArch64.CheckAlignment(). 333 */ 334 static void check_lse2_align(DisasContext *s, int rn, int imm, 335 bool is_write, MemOp mop) 336 { 337 TCGv_i32 tmp; 338 TCGv_i64 addr; 339 TCGLabel *over_label; 340 MMUAccessType type; 341 int mmu_idx; 342 343 tmp = tcg_temp_new_i32(); 344 tcg_gen_extrl_i64_i32(tmp, cpu_reg_sp(s, rn)); 345 tcg_gen_addi_i32(tmp, tmp, imm & 15); 346 tcg_gen_andi_i32(tmp, tmp, 15); 347 tcg_gen_addi_i32(tmp, tmp, memop_size(mop)); 348 349 over_label = gen_new_label(); 350 tcg_gen_brcondi_i32(TCG_COND_LEU, tmp, 16, over_label); 351 352 addr = tcg_temp_new_i64(); 353 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm); 354 355 type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD, 356 mmu_idx = get_mem_index(s); 357 gen_helper_unaligned_access(tcg_env, addr, tcg_constant_i32(type), 358 tcg_constant_i32(mmu_idx)); 359 360 gen_set_label(over_label); 361 362 } 363 364 /* Handle the alignment check for AccType_ATOMIC instructions. */ 365 static MemOp check_atomic_align(DisasContext *s, int rn, MemOp mop) 366 { 367 MemOp size = mop & MO_SIZE; 368 369 if (size == MO_8) { 370 return mop; 371 } 372 373 /* 374 * If size == MO_128, this is a LDXP, and the operation is single-copy 375 * atomic for each doubleword, not the entire quadword; it still must 376 * be quadword aligned. 377 */ 378 if (size == MO_128) { 379 return finalize_memop_atom(s, MO_128 | MO_ALIGN, 380 MO_ATOM_IFALIGN_PAIR); 381 } 382 if (dc_isar_feature(aa64_lse2, s)) { 383 check_lse2_align(s, rn, 0, true, mop); 384 } else { 385 mop |= MO_ALIGN; 386 } 387 return finalize_memop(s, mop); 388 } 389 390 /* Handle the alignment check for AccType_ORDERED instructions. */ 391 static MemOp check_ordered_align(DisasContext *s, int rn, int imm, 392 bool is_write, MemOp mop) 393 { 394 MemOp size = mop & MO_SIZE; 395 396 if (size == MO_8) { 397 return mop; 398 } 399 if (size == MO_128) { 400 return finalize_memop_atom(s, MO_128 | MO_ALIGN, 401 MO_ATOM_IFALIGN_PAIR); 402 } 403 if (!dc_isar_feature(aa64_lse2, s)) { 404 mop |= MO_ALIGN; 405 } else if (!s->naa) { 406 check_lse2_align(s, rn, imm, is_write, mop); 407 } 408 return finalize_memop(s, mop); 409 } 410 411 typedef struct DisasCompare64 { 412 TCGCond cond; 413 TCGv_i64 value; 414 } DisasCompare64; 415 416 static void a64_test_cc(DisasCompare64 *c64, int cc) 417 { 418 DisasCompare c32; 419 420 arm_test_cc(&c32, cc); 421 422 /* 423 * Sign-extend the 32-bit value so that the GE/LT comparisons work 424 * properly. The NE/EQ comparisons are also fine with this choice. 425 */ 426 c64->cond = c32.cond; 427 c64->value = tcg_temp_new_i64(); 428 tcg_gen_ext_i32_i64(c64->value, c32.value); 429 } 430 431 static void gen_rebuild_hflags(DisasContext *s) 432 { 433 gen_helper_rebuild_hflags_a64(tcg_env, tcg_constant_i32(s->current_el)); 434 } 435 436 static void gen_exception_internal(int excp) 437 { 438 assert(excp_is_internal(excp)); 439 gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp)); 440 } 441 442 static void gen_exception_internal_insn(DisasContext *s, int excp) 443 { 444 gen_a64_update_pc(s, 0); 445 gen_exception_internal(excp); 446 s->base.is_jmp = DISAS_NORETURN; 447 } 448 449 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome) 450 { 451 gen_a64_update_pc(s, 0); 452 gen_helper_exception_bkpt_insn(tcg_env, tcg_constant_i32(syndrome)); 453 s->base.is_jmp = DISAS_NORETURN; 454 } 455 456 static void gen_step_complete_exception(DisasContext *s) 457 { 458 /* We just completed step of an insn. Move from Active-not-pending 459 * to Active-pending, and then also take the swstep exception. 460 * This corresponds to making the (IMPDEF) choice to prioritize 461 * swstep exceptions over asynchronous exceptions taken to an exception 462 * level where debug is disabled. This choice has the advantage that 463 * we do not need to maintain internal state corresponding to the 464 * ISV/EX syndrome bits between completion of the step and generation 465 * of the exception, and our syndrome information is always correct. 466 */ 467 gen_ss_advance(s); 468 gen_swstep_exception(s, 1, s->is_ldex); 469 s->base.is_jmp = DISAS_NORETURN; 470 } 471 472 static inline bool use_goto_tb(DisasContext *s, uint64_t dest) 473 { 474 if (s->ss_active) { 475 return false; 476 } 477 return translator_use_goto_tb(&s->base, dest); 478 } 479 480 static void gen_goto_tb(DisasContext *s, int n, int64_t diff) 481 { 482 if (use_goto_tb(s, s->pc_curr + diff)) { 483 /* 484 * For pcrel, the pc must always be up-to-date on entry to 485 * the linked TB, so that it can use simple additions for all 486 * further adjustments. For !pcrel, the linked TB is compiled 487 * to know its full virtual address, so we can delay the 488 * update to pc to the unlinked path. A long chain of links 489 * can thus avoid many updates to the PC. 490 */ 491 if (tb_cflags(s->base.tb) & CF_PCREL) { 492 gen_a64_update_pc(s, diff); 493 tcg_gen_goto_tb(n); 494 } else { 495 tcg_gen_goto_tb(n); 496 gen_a64_update_pc(s, diff); 497 } 498 tcg_gen_exit_tb(s->base.tb, n); 499 s->base.is_jmp = DISAS_NORETURN; 500 } else { 501 gen_a64_update_pc(s, diff); 502 if (s->ss_active) { 503 gen_step_complete_exception(s); 504 } else { 505 tcg_gen_lookup_and_goto_ptr(); 506 s->base.is_jmp = DISAS_NORETURN; 507 } 508 } 509 } 510 511 /* 512 * Register access functions 513 * 514 * These functions are used for directly accessing a register in where 515 * changes to the final register value are likely to be made. If you 516 * need to use a register for temporary calculation (e.g. index type 517 * operations) use the read_* form. 518 * 519 * B1.2.1 Register mappings 520 * 521 * In instruction register encoding 31 can refer to ZR (zero register) or 522 * the SP (stack pointer) depending on context. In QEMU's case we map SP 523 * to cpu_X[31] and ZR accesses to a temporary which can be discarded. 524 * This is the point of the _sp forms. 525 */ 526 TCGv_i64 cpu_reg(DisasContext *s, int reg) 527 { 528 if (reg == 31) { 529 TCGv_i64 t = tcg_temp_new_i64(); 530 tcg_gen_movi_i64(t, 0); 531 return t; 532 } else { 533 return cpu_X[reg]; 534 } 535 } 536 537 /* register access for when 31 == SP */ 538 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg) 539 { 540 return cpu_X[reg]; 541 } 542 543 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64 544 * representing the register contents. This TCGv is an auto-freed 545 * temporary so it need not be explicitly freed, and may be modified. 546 */ 547 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf) 548 { 549 TCGv_i64 v = tcg_temp_new_i64(); 550 if (reg != 31) { 551 if (sf) { 552 tcg_gen_mov_i64(v, cpu_X[reg]); 553 } else { 554 tcg_gen_ext32u_i64(v, cpu_X[reg]); 555 } 556 } else { 557 tcg_gen_movi_i64(v, 0); 558 } 559 return v; 560 } 561 562 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf) 563 { 564 TCGv_i64 v = tcg_temp_new_i64(); 565 if (sf) { 566 tcg_gen_mov_i64(v, cpu_X[reg]); 567 } else { 568 tcg_gen_ext32u_i64(v, cpu_X[reg]); 569 } 570 return v; 571 } 572 573 /* Return the offset into CPUARMState of a slice (from 574 * the least significant end) of FP register Qn (ie 575 * Dn, Sn, Hn or Bn). 576 * (Note that this is not the same mapping as for A32; see cpu.h) 577 */ 578 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size) 579 { 580 return vec_reg_offset(s, regno, 0, size); 581 } 582 583 /* Offset of the high half of the 128 bit vector Qn */ 584 static inline int fp_reg_hi_offset(DisasContext *s, int regno) 585 { 586 return vec_reg_offset(s, regno, 1, MO_64); 587 } 588 589 /* Convenience accessors for reading and writing single and double 590 * FP registers. Writing clears the upper parts of the associated 591 * 128 bit vector register, as required by the architecture. 592 * Note that unlike the GP register accessors, the values returned 593 * by the read functions must be manually freed. 594 */ 595 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg) 596 { 597 TCGv_i64 v = tcg_temp_new_i64(); 598 599 tcg_gen_ld_i64(v, tcg_env, fp_reg_offset(s, reg, MO_64)); 600 return v; 601 } 602 603 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg) 604 { 605 TCGv_i32 v = tcg_temp_new_i32(); 606 607 tcg_gen_ld_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32)); 608 return v; 609 } 610 611 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg) 612 { 613 TCGv_i32 v = tcg_temp_new_i32(); 614 615 tcg_gen_ld16u_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16)); 616 return v; 617 } 618 619 static void clear_vec(DisasContext *s, int rd) 620 { 621 unsigned ofs = fp_reg_offset(s, rd, MO_64); 622 unsigned vsz = vec_full_reg_size(s); 623 624 tcg_gen_gvec_dup_imm(MO_64, ofs, vsz, vsz, 0); 625 } 626 627 /* 628 * Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64). 629 * If SVE is not enabled, then there are only 128 bits in the vector. 630 */ 631 static void clear_vec_high(DisasContext *s, bool is_q, int rd) 632 { 633 unsigned ofs = fp_reg_offset(s, rd, MO_64); 634 unsigned vsz = vec_full_reg_size(s); 635 636 /* Nop move, with side effect of clearing the tail. */ 637 tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz); 638 } 639 640 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v) 641 { 642 unsigned ofs = fp_reg_offset(s, reg, MO_64); 643 644 tcg_gen_st_i64(v, tcg_env, ofs); 645 clear_vec_high(s, false, reg); 646 } 647 648 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v) 649 { 650 TCGv_i64 tmp = tcg_temp_new_i64(); 651 652 tcg_gen_extu_i32_i64(tmp, v); 653 write_fp_dreg(s, reg, tmp); 654 } 655 656 /* 657 * Write a double result to 128 bit vector register reg, honouring FPCR.NEP: 658 * - if FPCR.NEP == 0, clear the high elements of reg 659 * - if FPCR.NEP == 1, set the high elements of reg from mergereg 660 * (i.e. merge the result with those high elements) 661 * In either case, SVE register bits above 128 are zeroed (per R_WKYLB). 662 */ 663 static void write_fp_dreg_merging(DisasContext *s, int reg, int mergereg, 664 TCGv_i64 v) 665 { 666 if (!s->fpcr_nep) { 667 write_fp_dreg(s, reg, v); 668 return; 669 } 670 671 /* 672 * Move from mergereg to reg; this sets the high elements and 673 * clears the bits above 128 as a side effect. 674 */ 675 tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg), 676 vec_full_reg_offset(s, mergereg), 677 16, vec_full_reg_size(s)); 678 tcg_gen_st_i64(v, tcg_env, vec_full_reg_offset(s, reg)); 679 } 680 681 /* 682 * Write a single-prec result, but only clear the higher elements 683 * of the destination register if FPCR.NEP is 0; otherwise preserve them. 684 */ 685 static void write_fp_sreg_merging(DisasContext *s, int reg, int mergereg, 686 TCGv_i32 v) 687 { 688 if (!s->fpcr_nep) { 689 write_fp_sreg(s, reg, v); 690 return; 691 } 692 693 tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg), 694 vec_full_reg_offset(s, mergereg), 695 16, vec_full_reg_size(s)); 696 tcg_gen_st_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32)); 697 } 698 699 /* 700 * Write a half-prec result, but only clear the higher elements 701 * of the destination register if FPCR.NEP is 0; otherwise preserve them. 702 * The caller must ensure that the top 16 bits of v are zero. 703 */ 704 static void write_fp_hreg_merging(DisasContext *s, int reg, int mergereg, 705 TCGv_i32 v) 706 { 707 if (!s->fpcr_nep) { 708 write_fp_sreg(s, reg, v); 709 return; 710 } 711 712 tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg), 713 vec_full_reg_offset(s, mergereg), 714 16, vec_full_reg_size(s)); 715 tcg_gen_st16_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16)); 716 } 717 718 /* Expand a 2-operand AdvSIMD vector operation using an expander function. */ 719 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn, 720 GVecGen2Fn *gvec_fn, int vece) 721 { 722 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 723 is_q ? 16 : 8, vec_full_reg_size(s)); 724 } 725 726 /* Expand a 2-operand + immediate AdvSIMD vector operation using 727 * an expander function. 728 */ 729 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn, 730 int64_t imm, GVecGen2iFn *gvec_fn, int vece) 731 { 732 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 733 imm, is_q ? 16 : 8, vec_full_reg_size(s)); 734 } 735 736 /* Expand a 3-operand AdvSIMD vector operation using an expander function. */ 737 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm, 738 GVecGen3Fn *gvec_fn, int vece) 739 { 740 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 741 vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s)); 742 } 743 744 /* Expand a 4-operand AdvSIMD vector operation using an expander function. */ 745 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm, 746 int rx, GVecGen4Fn *gvec_fn, int vece) 747 { 748 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 749 vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx), 750 is_q ? 16 : 8, vec_full_reg_size(s)); 751 } 752 753 /* Expand a 2-operand operation using an out-of-line helper. */ 754 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd, 755 int rn, int data, gen_helper_gvec_2 *fn) 756 { 757 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd), 758 vec_full_reg_offset(s, rn), 759 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 760 } 761 762 /* Expand a 3-operand operation using an out-of-line helper. */ 763 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd, 764 int rn, int rm, int data, gen_helper_gvec_3 *fn) 765 { 766 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 767 vec_full_reg_offset(s, rn), 768 vec_full_reg_offset(s, rm), 769 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 770 } 771 772 /* Expand a 3-operand + fpstatus pointer + simd data value operation using 773 * an out-of-line helper. 774 */ 775 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn, 776 int rm, ARMFPStatusFlavour fpsttype, int data, 777 gen_helper_gvec_3_ptr *fn) 778 { 779 TCGv_ptr fpst = fpstatus_ptr(fpsttype); 780 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 781 vec_full_reg_offset(s, rn), 782 vec_full_reg_offset(s, rm), fpst, 783 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 784 } 785 786 /* Expand a 4-operand operation using an out-of-line helper. */ 787 static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn, 788 int rm, int ra, int data, gen_helper_gvec_4 *fn) 789 { 790 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 791 vec_full_reg_offset(s, rn), 792 vec_full_reg_offset(s, rm), 793 vec_full_reg_offset(s, ra), 794 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 795 } 796 797 /* 798 * Expand a 4-operand operation using an out-of-line helper that takes 799 * a pointer to the CPU env. 800 */ 801 static void gen_gvec_op4_env(DisasContext *s, bool is_q, int rd, int rn, 802 int rm, int ra, int data, 803 gen_helper_gvec_4_ptr *fn) 804 { 805 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 806 vec_full_reg_offset(s, rn), 807 vec_full_reg_offset(s, rm), 808 vec_full_reg_offset(s, ra), 809 tcg_env, 810 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 811 } 812 813 /* 814 * Expand a 4-operand + fpstatus pointer + simd data value operation using 815 * an out-of-line helper. 816 */ 817 static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn, 818 int rm, int ra, ARMFPStatusFlavour fpsttype, 819 int data, 820 gen_helper_gvec_4_ptr *fn) 821 { 822 TCGv_ptr fpst = fpstatus_ptr(fpsttype); 823 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 824 vec_full_reg_offset(s, rn), 825 vec_full_reg_offset(s, rm), 826 vec_full_reg_offset(s, ra), fpst, 827 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 828 } 829 830 /* 831 * When FPCR.AH == 1, NEG and ABS do not flip the sign bit of a NaN. 832 * These functions implement 833 * d = floatN_is_any_nan(s) ? s : floatN_chs(s) 834 * which for float32 is 835 * d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s ^ (1 << 31)) 836 * and similarly for the other float sizes. 837 */ 838 static void gen_vfp_ah_negh(TCGv_i32 d, TCGv_i32 s) 839 { 840 TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32(); 841 842 gen_vfp_negh(chs_s, s); 843 gen_vfp_absh(abs_s, s); 844 tcg_gen_movcond_i32(TCG_COND_GTU, d, 845 abs_s, tcg_constant_i32(0x7c00), 846 s, chs_s); 847 } 848 849 static void gen_vfp_ah_negs(TCGv_i32 d, TCGv_i32 s) 850 { 851 TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32(); 852 853 gen_vfp_negs(chs_s, s); 854 gen_vfp_abss(abs_s, s); 855 tcg_gen_movcond_i32(TCG_COND_GTU, d, 856 abs_s, tcg_constant_i32(0x7f800000UL), 857 s, chs_s); 858 } 859 860 static void gen_vfp_ah_negd(TCGv_i64 d, TCGv_i64 s) 861 { 862 TCGv_i64 abs_s = tcg_temp_new_i64(), chs_s = tcg_temp_new_i64(); 863 864 gen_vfp_negd(chs_s, s); 865 gen_vfp_absd(abs_s, s); 866 tcg_gen_movcond_i64(TCG_COND_GTU, d, 867 abs_s, tcg_constant_i64(0x7ff0000000000000ULL), 868 s, chs_s); 869 } 870 871 /* 872 * These functions implement 873 * d = floatN_is_any_nan(s) ? s : floatN_abs(s) 874 * which for float32 is 875 * d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s & ~(1 << 31)) 876 * and similarly for the other float sizes. 877 */ 878 static void gen_vfp_ah_absh(TCGv_i32 d, TCGv_i32 s) 879 { 880 TCGv_i32 abs_s = tcg_temp_new_i32(); 881 882 gen_vfp_absh(abs_s, s); 883 tcg_gen_movcond_i32(TCG_COND_GTU, d, 884 abs_s, tcg_constant_i32(0x7c00), 885 s, abs_s); 886 } 887 888 static void gen_vfp_ah_abss(TCGv_i32 d, TCGv_i32 s) 889 { 890 TCGv_i32 abs_s = tcg_temp_new_i32(); 891 892 gen_vfp_abss(abs_s, s); 893 tcg_gen_movcond_i32(TCG_COND_GTU, d, 894 abs_s, tcg_constant_i32(0x7f800000UL), 895 s, abs_s); 896 } 897 898 static void gen_vfp_ah_absd(TCGv_i64 d, TCGv_i64 s) 899 { 900 TCGv_i64 abs_s = tcg_temp_new_i64(); 901 902 gen_vfp_absd(abs_s, s); 903 tcg_gen_movcond_i64(TCG_COND_GTU, d, 904 abs_s, tcg_constant_i64(0x7ff0000000000000ULL), 905 s, abs_s); 906 } 907 908 static void gen_vfp_maybe_ah_negh(DisasContext *dc, TCGv_i32 d, TCGv_i32 s) 909 { 910 if (dc->fpcr_ah) { 911 gen_vfp_ah_negh(d, s); 912 } else { 913 gen_vfp_negh(d, s); 914 } 915 } 916 917 static void gen_vfp_maybe_ah_negs(DisasContext *dc, TCGv_i32 d, TCGv_i32 s) 918 { 919 if (dc->fpcr_ah) { 920 gen_vfp_ah_negs(d, s); 921 } else { 922 gen_vfp_negs(d, s); 923 } 924 } 925 926 static void gen_vfp_maybe_ah_negd(DisasContext *dc, TCGv_i64 d, TCGv_i64 s) 927 { 928 if (dc->fpcr_ah) { 929 gen_vfp_ah_negd(d, s); 930 } else { 931 gen_vfp_negd(d, s); 932 } 933 } 934 935 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier 936 * than the 32 bit equivalent. 937 */ 938 static inline void gen_set_NZ64(TCGv_i64 result) 939 { 940 tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result); 941 tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF); 942 } 943 944 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */ 945 static inline void gen_logic_CC(int sf, TCGv_i64 result) 946 { 947 if (sf) { 948 gen_set_NZ64(result); 949 } else { 950 tcg_gen_extrl_i64_i32(cpu_ZF, result); 951 tcg_gen_mov_i32(cpu_NF, cpu_ZF); 952 } 953 tcg_gen_movi_i32(cpu_CF, 0); 954 tcg_gen_movi_i32(cpu_VF, 0); 955 } 956 957 /* dest = T0 + T1; compute C, N, V and Z flags */ 958 static void gen_add64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 959 { 960 TCGv_i64 result, flag, tmp; 961 result = tcg_temp_new_i64(); 962 flag = tcg_temp_new_i64(); 963 tmp = tcg_temp_new_i64(); 964 965 tcg_gen_movi_i64(tmp, 0); 966 tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp); 967 968 tcg_gen_extrl_i64_i32(cpu_CF, flag); 969 970 gen_set_NZ64(result); 971 972 tcg_gen_xor_i64(flag, result, t0); 973 tcg_gen_xor_i64(tmp, t0, t1); 974 tcg_gen_andc_i64(flag, flag, tmp); 975 tcg_gen_extrh_i64_i32(cpu_VF, flag); 976 977 tcg_gen_mov_i64(dest, result); 978 } 979 980 static void gen_add32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 981 { 982 TCGv_i32 t0_32 = tcg_temp_new_i32(); 983 TCGv_i32 t1_32 = tcg_temp_new_i32(); 984 TCGv_i32 tmp = tcg_temp_new_i32(); 985 986 tcg_gen_movi_i32(tmp, 0); 987 tcg_gen_extrl_i64_i32(t0_32, t0); 988 tcg_gen_extrl_i64_i32(t1_32, t1); 989 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp); 990 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 991 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 992 tcg_gen_xor_i32(tmp, t0_32, t1_32); 993 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); 994 tcg_gen_extu_i32_i64(dest, cpu_NF); 995 } 996 997 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 998 { 999 if (sf) { 1000 gen_add64_CC(dest, t0, t1); 1001 } else { 1002 gen_add32_CC(dest, t0, t1); 1003 } 1004 } 1005 1006 /* dest = T0 - T1; compute C, N, V and Z flags */ 1007 static void gen_sub64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 1008 { 1009 /* 64 bit arithmetic */ 1010 TCGv_i64 result, flag, tmp; 1011 1012 result = tcg_temp_new_i64(); 1013 flag = tcg_temp_new_i64(); 1014 tcg_gen_sub_i64(result, t0, t1); 1015 1016 gen_set_NZ64(result); 1017 1018 tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1); 1019 tcg_gen_extrl_i64_i32(cpu_CF, flag); 1020 1021 tcg_gen_xor_i64(flag, result, t0); 1022 tmp = tcg_temp_new_i64(); 1023 tcg_gen_xor_i64(tmp, t0, t1); 1024 tcg_gen_and_i64(flag, flag, tmp); 1025 tcg_gen_extrh_i64_i32(cpu_VF, flag); 1026 tcg_gen_mov_i64(dest, result); 1027 } 1028 1029 static void gen_sub32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 1030 { 1031 /* 32 bit arithmetic */ 1032 TCGv_i32 t0_32 = tcg_temp_new_i32(); 1033 TCGv_i32 t1_32 = tcg_temp_new_i32(); 1034 TCGv_i32 tmp; 1035 1036 tcg_gen_extrl_i64_i32(t0_32, t0); 1037 tcg_gen_extrl_i64_i32(t1_32, t1); 1038 tcg_gen_sub_i32(cpu_NF, t0_32, t1_32); 1039 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 1040 tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32); 1041 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 1042 tmp = tcg_temp_new_i32(); 1043 tcg_gen_xor_i32(tmp, t0_32, t1_32); 1044 tcg_gen_and_i32(cpu_VF, cpu_VF, tmp); 1045 tcg_gen_extu_i32_i64(dest, cpu_NF); 1046 } 1047 1048 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 1049 { 1050 if (sf) { 1051 gen_sub64_CC(dest, t0, t1); 1052 } else { 1053 gen_sub32_CC(dest, t0, t1); 1054 } 1055 } 1056 1057 /* dest = T0 + T1 + CF; do not compute flags. */ 1058 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 1059 { 1060 TCGv_i64 flag = tcg_temp_new_i64(); 1061 tcg_gen_extu_i32_i64(flag, cpu_CF); 1062 tcg_gen_add_i64(dest, t0, t1); 1063 tcg_gen_add_i64(dest, dest, flag); 1064 1065 if (!sf) { 1066 tcg_gen_ext32u_i64(dest, dest); 1067 } 1068 } 1069 1070 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */ 1071 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 1072 { 1073 if (sf) { 1074 TCGv_i64 result = tcg_temp_new_i64(); 1075 TCGv_i64 cf_64 = tcg_temp_new_i64(); 1076 TCGv_i64 vf_64 = tcg_temp_new_i64(); 1077 TCGv_i64 tmp = tcg_temp_new_i64(); 1078 1079 tcg_gen_extu_i32_i64(cf_64, cpu_CF); 1080 tcg_gen_addcio_i64(result, cf_64, t0, t1, cf_64); 1081 tcg_gen_extrl_i64_i32(cpu_CF, cf_64); 1082 gen_set_NZ64(result); 1083 1084 tcg_gen_xor_i64(vf_64, result, t0); 1085 tcg_gen_xor_i64(tmp, t0, t1); 1086 tcg_gen_andc_i64(vf_64, vf_64, tmp); 1087 tcg_gen_extrh_i64_i32(cpu_VF, vf_64); 1088 1089 tcg_gen_mov_i64(dest, result); 1090 } else { 1091 TCGv_i32 t0_32 = tcg_temp_new_i32(); 1092 TCGv_i32 t1_32 = tcg_temp_new_i32(); 1093 TCGv_i32 tmp = tcg_temp_new_i32(); 1094 1095 tcg_gen_extrl_i64_i32(t0_32, t0); 1096 tcg_gen_extrl_i64_i32(t1_32, t1); 1097 tcg_gen_addcio_i32(cpu_NF, cpu_CF, t0_32, t1_32, cpu_CF); 1098 1099 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 1100 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 1101 tcg_gen_xor_i32(tmp, t0_32, t1_32); 1102 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); 1103 tcg_gen_extu_i32_i64(dest, cpu_NF); 1104 } 1105 } 1106 1107 /* 1108 * Load/Store generators 1109 */ 1110 1111 /* 1112 * Store from GPR register to memory. 1113 */ 1114 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source, 1115 TCGv_i64 tcg_addr, MemOp memop, int memidx, 1116 bool iss_valid, 1117 unsigned int iss_srt, 1118 bool iss_sf, bool iss_ar) 1119 { 1120 tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop); 1121 1122 if (iss_valid) { 1123 uint32_t syn; 1124 1125 syn = syn_data_abort_with_iss(0, 1126 (memop & MO_SIZE), 1127 false, 1128 iss_srt, 1129 iss_sf, 1130 iss_ar, 1131 0, 0, 0, 0, 0, false); 1132 disas_set_insn_syndrome(s, syn); 1133 } 1134 } 1135 1136 static void do_gpr_st(DisasContext *s, TCGv_i64 source, 1137 TCGv_i64 tcg_addr, MemOp memop, 1138 bool iss_valid, 1139 unsigned int iss_srt, 1140 bool iss_sf, bool iss_ar) 1141 { 1142 do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s), 1143 iss_valid, iss_srt, iss_sf, iss_ar); 1144 } 1145 1146 /* 1147 * Load from memory to GPR register 1148 */ 1149 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, 1150 MemOp memop, bool extend, int memidx, 1151 bool iss_valid, unsigned int iss_srt, 1152 bool iss_sf, bool iss_ar) 1153 { 1154 tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop); 1155 1156 if (extend && (memop & MO_SIGN)) { 1157 g_assert((memop & MO_SIZE) <= MO_32); 1158 tcg_gen_ext32u_i64(dest, dest); 1159 } 1160 1161 if (iss_valid) { 1162 uint32_t syn; 1163 1164 syn = syn_data_abort_with_iss(0, 1165 (memop & MO_SIZE), 1166 (memop & MO_SIGN) != 0, 1167 iss_srt, 1168 iss_sf, 1169 iss_ar, 1170 0, 0, 0, 0, 0, false); 1171 disas_set_insn_syndrome(s, syn); 1172 } 1173 } 1174 1175 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, 1176 MemOp memop, bool extend, 1177 bool iss_valid, unsigned int iss_srt, 1178 bool iss_sf, bool iss_ar) 1179 { 1180 do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s), 1181 iss_valid, iss_srt, iss_sf, iss_ar); 1182 } 1183 1184 /* 1185 * Store from FP register to memory 1186 */ 1187 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, MemOp mop) 1188 { 1189 /* This writes the bottom N bits of a 128 bit wide vector to memory */ 1190 TCGv_i64 tmplo = tcg_temp_new_i64(); 1191 1192 tcg_gen_ld_i64(tmplo, tcg_env, fp_reg_offset(s, srcidx, MO_64)); 1193 1194 if ((mop & MO_SIZE) < MO_128) { 1195 tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop); 1196 } else { 1197 TCGv_i64 tmphi = tcg_temp_new_i64(); 1198 TCGv_i128 t16 = tcg_temp_new_i128(); 1199 1200 tcg_gen_ld_i64(tmphi, tcg_env, fp_reg_hi_offset(s, srcidx)); 1201 tcg_gen_concat_i64_i128(t16, tmplo, tmphi); 1202 1203 tcg_gen_qemu_st_i128(t16, tcg_addr, get_mem_index(s), mop); 1204 } 1205 } 1206 1207 /* 1208 * Load from memory to FP register 1209 */ 1210 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, MemOp mop) 1211 { 1212 /* This always zero-extends and writes to a full 128 bit wide vector */ 1213 TCGv_i64 tmplo = tcg_temp_new_i64(); 1214 TCGv_i64 tmphi = NULL; 1215 1216 if ((mop & MO_SIZE) < MO_128) { 1217 tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop); 1218 } else { 1219 TCGv_i128 t16 = tcg_temp_new_i128(); 1220 1221 tcg_gen_qemu_ld_i128(t16, tcg_addr, get_mem_index(s), mop); 1222 1223 tmphi = tcg_temp_new_i64(); 1224 tcg_gen_extr_i128_i64(tmplo, tmphi, t16); 1225 } 1226 1227 tcg_gen_st_i64(tmplo, tcg_env, fp_reg_offset(s, destidx, MO_64)); 1228 1229 if (tmphi) { 1230 tcg_gen_st_i64(tmphi, tcg_env, fp_reg_hi_offset(s, destidx)); 1231 } 1232 clear_vec_high(s, tmphi != NULL, destidx); 1233 } 1234 1235 /* 1236 * Vector load/store helpers. 1237 * 1238 * The principal difference between this and a FP load is that we don't 1239 * zero extend as we are filling a partial chunk of the vector register. 1240 * These functions don't support 128 bit loads/stores, which would be 1241 * normal load/store operations. 1242 * 1243 * The _i32 versions are useful when operating on 32 bit quantities 1244 * (eg for floating point single or using Neon helper functions). 1245 */ 1246 1247 /* Get value of an element within a vector register */ 1248 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx, 1249 int element, MemOp memop) 1250 { 1251 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE); 1252 switch ((unsigned)memop) { 1253 case MO_8: 1254 tcg_gen_ld8u_i64(tcg_dest, tcg_env, vect_off); 1255 break; 1256 case MO_16: 1257 tcg_gen_ld16u_i64(tcg_dest, tcg_env, vect_off); 1258 break; 1259 case MO_32: 1260 tcg_gen_ld32u_i64(tcg_dest, tcg_env, vect_off); 1261 break; 1262 case MO_8|MO_SIGN: 1263 tcg_gen_ld8s_i64(tcg_dest, tcg_env, vect_off); 1264 break; 1265 case MO_16|MO_SIGN: 1266 tcg_gen_ld16s_i64(tcg_dest, tcg_env, vect_off); 1267 break; 1268 case MO_32|MO_SIGN: 1269 tcg_gen_ld32s_i64(tcg_dest, tcg_env, vect_off); 1270 break; 1271 case MO_64: 1272 case MO_64|MO_SIGN: 1273 tcg_gen_ld_i64(tcg_dest, tcg_env, vect_off); 1274 break; 1275 default: 1276 g_assert_not_reached(); 1277 } 1278 } 1279 1280 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx, 1281 int element, MemOp memop) 1282 { 1283 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE); 1284 switch (memop) { 1285 case MO_8: 1286 tcg_gen_ld8u_i32(tcg_dest, tcg_env, vect_off); 1287 break; 1288 case MO_16: 1289 tcg_gen_ld16u_i32(tcg_dest, tcg_env, vect_off); 1290 break; 1291 case MO_8|MO_SIGN: 1292 tcg_gen_ld8s_i32(tcg_dest, tcg_env, vect_off); 1293 break; 1294 case MO_16|MO_SIGN: 1295 tcg_gen_ld16s_i32(tcg_dest, tcg_env, vect_off); 1296 break; 1297 case MO_32: 1298 case MO_32|MO_SIGN: 1299 tcg_gen_ld_i32(tcg_dest, tcg_env, vect_off); 1300 break; 1301 default: 1302 g_assert_not_reached(); 1303 } 1304 } 1305 1306 /* Set value of an element within a vector register */ 1307 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx, 1308 int element, MemOp memop) 1309 { 1310 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE); 1311 switch (memop) { 1312 case MO_8: 1313 tcg_gen_st8_i64(tcg_src, tcg_env, vect_off); 1314 break; 1315 case MO_16: 1316 tcg_gen_st16_i64(tcg_src, tcg_env, vect_off); 1317 break; 1318 case MO_32: 1319 tcg_gen_st32_i64(tcg_src, tcg_env, vect_off); 1320 break; 1321 case MO_64: 1322 tcg_gen_st_i64(tcg_src, tcg_env, vect_off); 1323 break; 1324 default: 1325 g_assert_not_reached(); 1326 } 1327 } 1328 1329 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src, 1330 int destidx, int element, MemOp memop) 1331 { 1332 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE); 1333 switch (memop) { 1334 case MO_8: 1335 tcg_gen_st8_i32(tcg_src, tcg_env, vect_off); 1336 break; 1337 case MO_16: 1338 tcg_gen_st16_i32(tcg_src, tcg_env, vect_off); 1339 break; 1340 case MO_32: 1341 tcg_gen_st_i32(tcg_src, tcg_env, vect_off); 1342 break; 1343 default: 1344 g_assert_not_reached(); 1345 } 1346 } 1347 1348 /* Store from vector register to memory */ 1349 static void do_vec_st(DisasContext *s, int srcidx, int element, 1350 TCGv_i64 tcg_addr, MemOp mop) 1351 { 1352 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 1353 1354 read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE); 1355 tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop); 1356 } 1357 1358 /* Load from memory to vector register */ 1359 static void do_vec_ld(DisasContext *s, int destidx, int element, 1360 TCGv_i64 tcg_addr, MemOp mop) 1361 { 1362 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 1363 1364 tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop); 1365 write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE); 1366 } 1367 1368 /* Check that FP/Neon access is enabled. If it is, return 1369 * true. If not, emit code to generate an appropriate exception, 1370 * and return false; the caller should not emit any code for 1371 * the instruction. Note that this check must happen after all 1372 * unallocated-encoding checks (otherwise the syndrome information 1373 * for the resulting exception will be incorrect). 1374 */ 1375 static bool fp_access_check_only(DisasContext *s) 1376 { 1377 if (s->fp_excp_el) { 1378 assert(!s->fp_access_checked); 1379 s->fp_access_checked = -1; 1380 1381 gen_exception_insn_el(s, 0, EXCP_UDEF, 1382 syn_fp_access_trap(1, 0xe, false, 0), 1383 s->fp_excp_el); 1384 return false; 1385 } 1386 s->fp_access_checked = 1; 1387 return true; 1388 } 1389 1390 static bool fp_access_check(DisasContext *s) 1391 { 1392 if (!fp_access_check_only(s)) { 1393 return false; 1394 } 1395 if (s->sme_trap_nonstreaming && s->is_nonstreaming) { 1396 gen_exception_insn(s, 0, EXCP_UDEF, 1397 syn_smetrap(SME_ET_Streaming, false)); 1398 return false; 1399 } 1400 return true; 1401 } 1402 1403 /* 1404 * Return <0 for non-supported element sizes, with MO_16 controlled by 1405 * FEAT_FP16; return 0 for fp disabled; otherwise return >0 for success. 1406 */ 1407 static int fp_access_check_scalar_hsd(DisasContext *s, MemOp esz) 1408 { 1409 switch (esz) { 1410 case MO_64: 1411 case MO_32: 1412 break; 1413 case MO_16: 1414 if (!dc_isar_feature(aa64_fp16, s)) { 1415 return -1; 1416 } 1417 break; 1418 default: 1419 return -1; 1420 } 1421 return fp_access_check(s); 1422 } 1423 1424 /* Likewise, but vector MO_64 must have two elements. */ 1425 static int fp_access_check_vector_hsd(DisasContext *s, bool is_q, MemOp esz) 1426 { 1427 switch (esz) { 1428 case MO_64: 1429 if (!is_q) { 1430 return -1; 1431 } 1432 break; 1433 case MO_32: 1434 break; 1435 case MO_16: 1436 if (!dc_isar_feature(aa64_fp16, s)) { 1437 return -1; 1438 } 1439 break; 1440 default: 1441 return -1; 1442 } 1443 return fp_access_check(s); 1444 } 1445 1446 /* 1447 * Check that SVE access is enabled. If it is, return true. 1448 * If not, emit code to generate an appropriate exception and return false. 1449 * This function corresponds to CheckSVEEnabled(). 1450 */ 1451 bool sve_access_check(DisasContext *s) 1452 { 1453 if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) { 1454 bool ret; 1455 1456 assert(dc_isar_feature(aa64_sme, s)); 1457 ret = sme_sm_enabled_check(s); 1458 s->sve_access_checked = (ret ? 1 : -1); 1459 return ret; 1460 } 1461 if (s->sve_excp_el) { 1462 /* Assert that we only raise one exception per instruction. */ 1463 assert(!s->sve_access_checked); 1464 gen_exception_insn_el(s, 0, EXCP_UDEF, 1465 syn_sve_access_trap(), s->sve_excp_el); 1466 s->sve_access_checked = -1; 1467 return false; 1468 } 1469 s->sve_access_checked = 1; 1470 return fp_access_check(s); 1471 } 1472 1473 /* 1474 * Check that SME access is enabled, raise an exception if not. 1475 * Note that this function corresponds to CheckSMEAccess and is 1476 * only used directly for cpregs. 1477 */ 1478 static bool sme_access_check(DisasContext *s) 1479 { 1480 if (s->sme_excp_el) { 1481 gen_exception_insn_el(s, 0, EXCP_UDEF, 1482 syn_smetrap(SME_ET_AccessTrap, false), 1483 s->sme_excp_el); 1484 return false; 1485 } 1486 return true; 1487 } 1488 1489 /* This function corresponds to CheckSMEEnabled. */ 1490 bool sme_enabled_check(DisasContext *s) 1491 { 1492 /* 1493 * Note that unlike sve_excp_el, we have not constrained sme_excp_el 1494 * to be zero when fp_excp_el has priority. This is because we need 1495 * sme_excp_el by itself for cpregs access checks. 1496 */ 1497 if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) { 1498 bool ret = sme_access_check(s); 1499 s->fp_access_checked = (ret ? 1 : -1); 1500 return ret; 1501 } 1502 return fp_access_check_only(s); 1503 } 1504 1505 /* Common subroutine for CheckSMEAnd*Enabled. */ 1506 bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req) 1507 { 1508 if (!sme_enabled_check(s)) { 1509 return false; 1510 } 1511 if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) { 1512 gen_exception_insn(s, 0, EXCP_UDEF, 1513 syn_smetrap(SME_ET_NotStreaming, false)); 1514 return false; 1515 } 1516 if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) { 1517 gen_exception_insn(s, 0, EXCP_UDEF, 1518 syn_smetrap(SME_ET_InactiveZA, false)); 1519 return false; 1520 } 1521 return true; 1522 } 1523 1524 /* 1525 * Expanders for AdvSIMD translation functions. 1526 */ 1527 1528 static bool do_gvec_op2_ool(DisasContext *s, arg_qrr_e *a, int data, 1529 gen_helper_gvec_2 *fn) 1530 { 1531 if (!a->q && a->esz == MO_64) { 1532 return false; 1533 } 1534 if (fp_access_check(s)) { 1535 gen_gvec_op2_ool(s, a->q, a->rd, a->rn, data, fn); 1536 } 1537 return true; 1538 } 1539 1540 static bool do_gvec_op3_ool(DisasContext *s, arg_qrrr_e *a, int data, 1541 gen_helper_gvec_3 *fn) 1542 { 1543 if (!a->q && a->esz == MO_64) { 1544 return false; 1545 } 1546 if (fp_access_check(s)) { 1547 gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, data, fn); 1548 } 1549 return true; 1550 } 1551 1552 static bool do_gvec_fn3(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn) 1553 { 1554 if (!a->q && a->esz == MO_64) { 1555 return false; 1556 } 1557 if (fp_access_check(s)) { 1558 gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz); 1559 } 1560 return true; 1561 } 1562 1563 static bool do_gvec_fn3_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn) 1564 { 1565 if (a->esz == MO_64) { 1566 return false; 1567 } 1568 if (fp_access_check(s)) { 1569 gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz); 1570 } 1571 return true; 1572 } 1573 1574 static bool do_gvec_fn3_no8_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn) 1575 { 1576 if (a->esz == MO_8) { 1577 return false; 1578 } 1579 return do_gvec_fn3_no64(s, a, fn); 1580 } 1581 1582 static bool do_gvec_fn4(DisasContext *s, arg_qrrrr_e *a, GVecGen4Fn *fn) 1583 { 1584 if (!a->q && a->esz == MO_64) { 1585 return false; 1586 } 1587 if (fp_access_check(s)) { 1588 gen_gvec_fn4(s, a->q, a->rd, a->rn, a->rm, a->ra, fn, a->esz); 1589 } 1590 return true; 1591 } 1592 1593 /* 1594 * This utility function is for doing register extension with an 1595 * optional shift. You will likely want to pass a temporary for the 1596 * destination register. See DecodeRegExtend() in the ARM ARM. 1597 */ 1598 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in, 1599 int option, unsigned int shift) 1600 { 1601 int extsize = extract32(option, 0, 2); 1602 bool is_signed = extract32(option, 2, 1); 1603 1604 tcg_gen_ext_i64(tcg_out, tcg_in, extsize | (is_signed ? MO_SIGN : 0)); 1605 tcg_gen_shli_i64(tcg_out, tcg_out, shift); 1606 } 1607 1608 static inline void gen_check_sp_alignment(DisasContext *s) 1609 { 1610 /* The AArch64 architecture mandates that (if enabled via PSTATE 1611 * or SCTLR bits) there is a check that SP is 16-aligned on every 1612 * SP-relative load or store (with an exception generated if it is not). 1613 * In line with general QEMU practice regarding misaligned accesses, 1614 * we omit these checks for the sake of guest program performance. 1615 * This function is provided as a hook so we can more easily add these 1616 * checks in future (possibly as a "favour catching guest program bugs 1617 * over speed" user selectable option). 1618 */ 1619 } 1620 1621 /* 1622 * The instruction disassembly implemented here matches 1623 * the instruction encoding classifications in chapter C4 1624 * of the ARM Architecture Reference Manual (DDI0487B_a); 1625 * classification names and decode diagrams here should generally 1626 * match up with those in the manual. 1627 */ 1628 1629 static bool trans_B(DisasContext *s, arg_i *a) 1630 { 1631 reset_btype(s); 1632 gen_goto_tb(s, 0, a->imm); 1633 return true; 1634 } 1635 1636 static bool trans_BL(DisasContext *s, arg_i *a) 1637 { 1638 gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s)); 1639 reset_btype(s); 1640 gen_goto_tb(s, 0, a->imm); 1641 return true; 1642 } 1643 1644 1645 static bool trans_CBZ(DisasContext *s, arg_cbz *a) 1646 { 1647 DisasLabel match; 1648 TCGv_i64 tcg_cmp; 1649 1650 tcg_cmp = read_cpu_reg(s, a->rt, a->sf); 1651 reset_btype(s); 1652 1653 match = gen_disas_label(s); 1654 tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ, 1655 tcg_cmp, 0, match.label); 1656 gen_goto_tb(s, 0, 4); 1657 set_disas_label(s, match); 1658 gen_goto_tb(s, 1, a->imm); 1659 return true; 1660 } 1661 1662 static bool trans_TBZ(DisasContext *s, arg_tbz *a) 1663 { 1664 DisasLabel match; 1665 TCGv_i64 tcg_cmp; 1666 1667 tcg_cmp = tcg_temp_new_i64(); 1668 tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, a->rt), 1ULL << a->bitpos); 1669 1670 reset_btype(s); 1671 1672 match = gen_disas_label(s); 1673 tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ, 1674 tcg_cmp, 0, match.label); 1675 gen_goto_tb(s, 0, 4); 1676 set_disas_label(s, match); 1677 gen_goto_tb(s, 1, a->imm); 1678 return true; 1679 } 1680 1681 static bool trans_B_cond(DisasContext *s, arg_B_cond *a) 1682 { 1683 /* BC.cond is only present with FEAT_HBC */ 1684 if (a->c && !dc_isar_feature(aa64_hbc, s)) { 1685 return false; 1686 } 1687 reset_btype(s); 1688 if (a->cond < 0x0e) { 1689 /* genuinely conditional branches */ 1690 DisasLabel match = gen_disas_label(s); 1691 arm_gen_test_cc(a->cond, match.label); 1692 gen_goto_tb(s, 0, 4); 1693 set_disas_label(s, match); 1694 gen_goto_tb(s, 1, a->imm); 1695 } else { 1696 /* 0xe and 0xf are both "always" conditions */ 1697 gen_goto_tb(s, 0, a->imm); 1698 } 1699 return true; 1700 } 1701 1702 static void set_btype_for_br(DisasContext *s, int rn) 1703 { 1704 if (dc_isar_feature(aa64_bti, s)) { 1705 /* BR to {x16,x17} or !guard -> 1, else 3. */ 1706 if (rn == 16 || rn == 17) { 1707 set_btype(s, 1); 1708 } else { 1709 TCGv_i64 pc = tcg_temp_new_i64(); 1710 gen_pc_plus_diff(s, pc, 0); 1711 gen_helper_guarded_page_br(tcg_env, pc); 1712 s->btype = -1; 1713 } 1714 } 1715 } 1716 1717 static void set_btype_for_blr(DisasContext *s) 1718 { 1719 if (dc_isar_feature(aa64_bti, s)) { 1720 /* BLR sets BTYPE to 2, regardless of source guarded page. */ 1721 set_btype(s, 2); 1722 } 1723 } 1724 1725 static bool trans_BR(DisasContext *s, arg_r *a) 1726 { 1727 set_btype_for_br(s, a->rn); 1728 gen_a64_set_pc(s, cpu_reg(s, a->rn)); 1729 s->base.is_jmp = DISAS_JUMP; 1730 return true; 1731 } 1732 1733 static bool trans_BLR(DisasContext *s, arg_r *a) 1734 { 1735 TCGv_i64 dst = cpu_reg(s, a->rn); 1736 TCGv_i64 lr = cpu_reg(s, 30); 1737 if (dst == lr) { 1738 TCGv_i64 tmp = tcg_temp_new_i64(); 1739 tcg_gen_mov_i64(tmp, dst); 1740 dst = tmp; 1741 } 1742 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 1743 gen_a64_set_pc(s, dst); 1744 set_btype_for_blr(s); 1745 s->base.is_jmp = DISAS_JUMP; 1746 return true; 1747 } 1748 1749 static bool trans_RET(DisasContext *s, arg_r *a) 1750 { 1751 gen_a64_set_pc(s, cpu_reg(s, a->rn)); 1752 s->base.is_jmp = DISAS_JUMP; 1753 return true; 1754 } 1755 1756 static TCGv_i64 auth_branch_target(DisasContext *s, TCGv_i64 dst, 1757 TCGv_i64 modifier, bool use_key_a) 1758 { 1759 TCGv_i64 truedst; 1760 /* 1761 * Return the branch target for a BRAA/RETA/etc, which is either 1762 * just the destination dst, or that value with the pauth check 1763 * done and the code removed from the high bits. 1764 */ 1765 if (!s->pauth_active) { 1766 return dst; 1767 } 1768 1769 truedst = tcg_temp_new_i64(); 1770 if (use_key_a) { 1771 gen_helper_autia_combined(truedst, tcg_env, dst, modifier); 1772 } else { 1773 gen_helper_autib_combined(truedst, tcg_env, dst, modifier); 1774 } 1775 return truedst; 1776 } 1777 1778 static bool trans_BRAZ(DisasContext *s, arg_braz *a) 1779 { 1780 TCGv_i64 dst; 1781 1782 if (!dc_isar_feature(aa64_pauth, s)) { 1783 return false; 1784 } 1785 1786 dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m); 1787 set_btype_for_br(s, a->rn); 1788 gen_a64_set_pc(s, dst); 1789 s->base.is_jmp = DISAS_JUMP; 1790 return true; 1791 } 1792 1793 static bool trans_BLRAZ(DisasContext *s, arg_braz *a) 1794 { 1795 TCGv_i64 dst, lr; 1796 1797 if (!dc_isar_feature(aa64_pauth, s)) { 1798 return false; 1799 } 1800 1801 dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m); 1802 lr = cpu_reg(s, 30); 1803 if (dst == lr) { 1804 TCGv_i64 tmp = tcg_temp_new_i64(); 1805 tcg_gen_mov_i64(tmp, dst); 1806 dst = tmp; 1807 } 1808 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 1809 gen_a64_set_pc(s, dst); 1810 set_btype_for_blr(s); 1811 s->base.is_jmp = DISAS_JUMP; 1812 return true; 1813 } 1814 1815 static bool trans_RETA(DisasContext *s, arg_reta *a) 1816 { 1817 TCGv_i64 dst; 1818 1819 dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m); 1820 gen_a64_set_pc(s, dst); 1821 s->base.is_jmp = DISAS_JUMP; 1822 return true; 1823 } 1824 1825 static bool trans_BRA(DisasContext *s, arg_bra *a) 1826 { 1827 TCGv_i64 dst; 1828 1829 if (!dc_isar_feature(aa64_pauth, s)) { 1830 return false; 1831 } 1832 dst = auth_branch_target(s, cpu_reg(s,a->rn), cpu_reg_sp(s, a->rm), !a->m); 1833 gen_a64_set_pc(s, dst); 1834 set_btype_for_br(s, a->rn); 1835 s->base.is_jmp = DISAS_JUMP; 1836 return true; 1837 } 1838 1839 static bool trans_BLRA(DisasContext *s, arg_bra *a) 1840 { 1841 TCGv_i64 dst, lr; 1842 1843 if (!dc_isar_feature(aa64_pauth, s)) { 1844 return false; 1845 } 1846 dst = auth_branch_target(s, cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm), !a->m); 1847 lr = cpu_reg(s, 30); 1848 if (dst == lr) { 1849 TCGv_i64 tmp = tcg_temp_new_i64(); 1850 tcg_gen_mov_i64(tmp, dst); 1851 dst = tmp; 1852 } 1853 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 1854 gen_a64_set_pc(s, dst); 1855 set_btype_for_blr(s); 1856 s->base.is_jmp = DISAS_JUMP; 1857 return true; 1858 } 1859 1860 static bool trans_ERET(DisasContext *s, arg_ERET *a) 1861 { 1862 TCGv_i64 dst; 1863 1864 if (s->current_el == 0) { 1865 return false; 1866 } 1867 if (s->trap_eret) { 1868 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(0), 2); 1869 return true; 1870 } 1871 dst = tcg_temp_new_i64(); 1872 tcg_gen_ld_i64(dst, tcg_env, 1873 offsetof(CPUARMState, elr_el[s->current_el])); 1874 1875 translator_io_start(&s->base); 1876 1877 gen_helper_exception_return(tcg_env, dst); 1878 /* Must exit loop to check un-masked IRQs */ 1879 s->base.is_jmp = DISAS_EXIT; 1880 return true; 1881 } 1882 1883 static bool trans_ERETA(DisasContext *s, arg_reta *a) 1884 { 1885 TCGv_i64 dst; 1886 1887 if (!dc_isar_feature(aa64_pauth, s)) { 1888 return false; 1889 } 1890 if (s->current_el == 0) { 1891 return false; 1892 } 1893 /* The FGT trap takes precedence over an auth trap. */ 1894 if (s->trap_eret) { 1895 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(a->m ? 3 : 2), 2); 1896 return true; 1897 } 1898 dst = tcg_temp_new_i64(); 1899 tcg_gen_ld_i64(dst, tcg_env, 1900 offsetof(CPUARMState, elr_el[s->current_el])); 1901 1902 dst = auth_branch_target(s, dst, cpu_X[31], !a->m); 1903 1904 translator_io_start(&s->base); 1905 1906 gen_helper_exception_return(tcg_env, dst); 1907 /* Must exit loop to check un-masked IRQs */ 1908 s->base.is_jmp = DISAS_EXIT; 1909 return true; 1910 } 1911 1912 static bool trans_NOP(DisasContext *s, arg_NOP *a) 1913 { 1914 return true; 1915 } 1916 1917 static bool trans_YIELD(DisasContext *s, arg_YIELD *a) 1918 { 1919 /* 1920 * When running in MTTCG we don't generate jumps to the yield and 1921 * WFE helpers as it won't affect the scheduling of other vCPUs. 1922 * If we wanted to more completely model WFE/SEV so we don't busy 1923 * spin unnecessarily we would need to do something more involved. 1924 */ 1925 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1926 s->base.is_jmp = DISAS_YIELD; 1927 } 1928 return true; 1929 } 1930 1931 static bool trans_WFI(DisasContext *s, arg_WFI *a) 1932 { 1933 s->base.is_jmp = DISAS_WFI; 1934 return true; 1935 } 1936 1937 static bool trans_WFE(DisasContext *s, arg_WFI *a) 1938 { 1939 /* 1940 * When running in MTTCG we don't generate jumps to the yield and 1941 * WFE helpers as it won't affect the scheduling of other vCPUs. 1942 * If we wanted to more completely model WFE/SEV so we don't busy 1943 * spin unnecessarily we would need to do something more involved. 1944 */ 1945 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1946 s->base.is_jmp = DISAS_WFE; 1947 } 1948 return true; 1949 } 1950 1951 static bool trans_WFIT(DisasContext *s, arg_WFIT *a) 1952 { 1953 if (!dc_isar_feature(aa64_wfxt, s)) { 1954 return false; 1955 } 1956 1957 /* 1958 * Because we need to pass the register value to the helper, 1959 * it's easier to emit the code now, unlike trans_WFI which 1960 * defers it to aarch64_tr_tb_stop(). That means we need to 1961 * check ss_active so that single-stepping a WFIT doesn't halt. 1962 */ 1963 if (s->ss_active) { 1964 /* Act like a NOP under architectural singlestep */ 1965 return true; 1966 } 1967 1968 gen_a64_update_pc(s, 4); 1969 gen_helper_wfit(tcg_env, cpu_reg(s, a->rd)); 1970 /* Go back to the main loop to check for interrupts */ 1971 s->base.is_jmp = DISAS_EXIT; 1972 return true; 1973 } 1974 1975 static bool trans_WFET(DisasContext *s, arg_WFET *a) 1976 { 1977 if (!dc_isar_feature(aa64_wfxt, s)) { 1978 return false; 1979 } 1980 1981 /* 1982 * We rely here on our WFE implementation being a NOP, so we 1983 * don't need to do anything different to handle the WFET timeout 1984 * from what trans_WFE does. 1985 */ 1986 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1987 s->base.is_jmp = DISAS_WFE; 1988 } 1989 return true; 1990 } 1991 1992 static bool trans_XPACLRI(DisasContext *s, arg_XPACLRI *a) 1993 { 1994 if (s->pauth_active) { 1995 gen_helper_xpaci(cpu_X[30], tcg_env, cpu_X[30]); 1996 } 1997 return true; 1998 } 1999 2000 static bool trans_PACIA1716(DisasContext *s, arg_PACIA1716 *a) 2001 { 2002 if (s->pauth_active) { 2003 gen_helper_pacia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 2004 } 2005 return true; 2006 } 2007 2008 static bool trans_PACIB1716(DisasContext *s, arg_PACIB1716 *a) 2009 { 2010 if (s->pauth_active) { 2011 gen_helper_pacib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 2012 } 2013 return true; 2014 } 2015 2016 static bool trans_AUTIA1716(DisasContext *s, arg_AUTIA1716 *a) 2017 { 2018 if (s->pauth_active) { 2019 gen_helper_autia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 2020 } 2021 return true; 2022 } 2023 2024 static bool trans_AUTIB1716(DisasContext *s, arg_AUTIB1716 *a) 2025 { 2026 if (s->pauth_active) { 2027 gen_helper_autib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 2028 } 2029 return true; 2030 } 2031 2032 static bool trans_ESB(DisasContext *s, arg_ESB *a) 2033 { 2034 /* Without RAS, we must implement this as NOP. */ 2035 if (dc_isar_feature(aa64_ras, s)) { 2036 /* 2037 * QEMU does not have a source of physical SErrors, 2038 * so we are only concerned with virtual SErrors. 2039 * The pseudocode in the ARM for this case is 2040 * if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then 2041 * AArch64.vESBOperation(); 2042 * Most of the condition can be evaluated at translation time. 2043 * Test for EL2 present, and defer test for SEL2 to runtime. 2044 */ 2045 if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) { 2046 gen_helper_vesb(tcg_env); 2047 } 2048 } 2049 return true; 2050 } 2051 2052 static bool trans_PACIAZ(DisasContext *s, arg_PACIAZ *a) 2053 { 2054 if (s->pauth_active) { 2055 gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 2056 } 2057 return true; 2058 } 2059 2060 static bool trans_PACIASP(DisasContext *s, arg_PACIASP *a) 2061 { 2062 if (s->pauth_active) { 2063 gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 2064 } 2065 return true; 2066 } 2067 2068 static bool trans_PACIBZ(DisasContext *s, arg_PACIBZ *a) 2069 { 2070 if (s->pauth_active) { 2071 gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 2072 } 2073 return true; 2074 } 2075 2076 static bool trans_PACIBSP(DisasContext *s, arg_PACIBSP *a) 2077 { 2078 if (s->pauth_active) { 2079 gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 2080 } 2081 return true; 2082 } 2083 2084 static bool trans_AUTIAZ(DisasContext *s, arg_AUTIAZ *a) 2085 { 2086 if (s->pauth_active) { 2087 gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 2088 } 2089 return true; 2090 } 2091 2092 static bool trans_AUTIASP(DisasContext *s, arg_AUTIASP *a) 2093 { 2094 if (s->pauth_active) { 2095 gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 2096 } 2097 return true; 2098 } 2099 2100 static bool trans_AUTIBZ(DisasContext *s, arg_AUTIBZ *a) 2101 { 2102 if (s->pauth_active) { 2103 gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 2104 } 2105 return true; 2106 } 2107 2108 static bool trans_AUTIBSP(DisasContext *s, arg_AUTIBSP *a) 2109 { 2110 if (s->pauth_active) { 2111 gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 2112 } 2113 return true; 2114 } 2115 2116 static bool trans_CLREX(DisasContext *s, arg_CLREX *a) 2117 { 2118 tcg_gen_movi_i64(cpu_exclusive_addr, -1); 2119 return true; 2120 } 2121 2122 static bool trans_DSB_DMB(DisasContext *s, arg_DSB_DMB *a) 2123 { 2124 /* We handle DSB and DMB the same way */ 2125 TCGBar bar; 2126 2127 switch (a->types) { 2128 case 1: /* MBReqTypes_Reads */ 2129 bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST; 2130 break; 2131 case 2: /* MBReqTypes_Writes */ 2132 bar = TCG_BAR_SC | TCG_MO_ST_ST; 2133 break; 2134 default: /* MBReqTypes_All */ 2135 bar = TCG_BAR_SC | TCG_MO_ALL; 2136 break; 2137 } 2138 tcg_gen_mb(bar); 2139 return true; 2140 } 2141 2142 static bool trans_DSB_nXS(DisasContext *s, arg_DSB_nXS *a) 2143 { 2144 if (!dc_isar_feature(aa64_xs, s)) { 2145 return false; 2146 } 2147 tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL); 2148 return true; 2149 } 2150 2151 static bool trans_ISB(DisasContext *s, arg_ISB *a) 2152 { 2153 /* 2154 * We need to break the TB after this insn to execute 2155 * self-modifying code correctly and also to take 2156 * any pending interrupts immediately. 2157 */ 2158 reset_btype(s); 2159 gen_goto_tb(s, 0, 4); 2160 return true; 2161 } 2162 2163 static bool trans_SB(DisasContext *s, arg_SB *a) 2164 { 2165 if (!dc_isar_feature(aa64_sb, s)) { 2166 return false; 2167 } 2168 /* 2169 * TODO: There is no speculation barrier opcode for TCG; 2170 * MB and end the TB instead. 2171 */ 2172 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC); 2173 gen_goto_tb(s, 0, 4); 2174 return true; 2175 } 2176 2177 static bool trans_CFINV(DisasContext *s, arg_CFINV *a) 2178 { 2179 if (!dc_isar_feature(aa64_condm_4, s)) { 2180 return false; 2181 } 2182 tcg_gen_xori_i32(cpu_CF, cpu_CF, 1); 2183 return true; 2184 } 2185 2186 static bool trans_XAFLAG(DisasContext *s, arg_XAFLAG *a) 2187 { 2188 TCGv_i32 z; 2189 2190 if (!dc_isar_feature(aa64_condm_5, s)) { 2191 return false; 2192 } 2193 2194 z = tcg_temp_new_i32(); 2195 2196 tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0); 2197 2198 /* 2199 * (!C & !Z) << 31 2200 * (!(C | Z)) << 31 2201 * ~((C | Z) << 31) 2202 * ~-(C | Z) 2203 * (C | Z) - 1 2204 */ 2205 tcg_gen_or_i32(cpu_NF, cpu_CF, z); 2206 tcg_gen_subi_i32(cpu_NF, cpu_NF, 1); 2207 2208 /* !(Z & C) */ 2209 tcg_gen_and_i32(cpu_ZF, z, cpu_CF); 2210 tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1); 2211 2212 /* (!C & Z) << 31 -> -(Z & ~C) */ 2213 tcg_gen_andc_i32(cpu_VF, z, cpu_CF); 2214 tcg_gen_neg_i32(cpu_VF, cpu_VF); 2215 2216 /* C | Z */ 2217 tcg_gen_or_i32(cpu_CF, cpu_CF, z); 2218 2219 return true; 2220 } 2221 2222 static bool trans_AXFLAG(DisasContext *s, arg_AXFLAG *a) 2223 { 2224 if (!dc_isar_feature(aa64_condm_5, s)) { 2225 return false; 2226 } 2227 2228 tcg_gen_sari_i32(cpu_VF, cpu_VF, 31); /* V ? -1 : 0 */ 2229 tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF); /* C & !V */ 2230 2231 /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */ 2232 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF); 2233 2234 tcg_gen_movi_i32(cpu_NF, 0); 2235 tcg_gen_movi_i32(cpu_VF, 0); 2236 2237 return true; 2238 } 2239 2240 static bool trans_MSR_i_UAO(DisasContext *s, arg_i *a) 2241 { 2242 if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) { 2243 return false; 2244 } 2245 if (a->imm & 1) { 2246 set_pstate_bits(PSTATE_UAO); 2247 } else { 2248 clear_pstate_bits(PSTATE_UAO); 2249 } 2250 gen_rebuild_hflags(s); 2251 s->base.is_jmp = DISAS_TOO_MANY; 2252 return true; 2253 } 2254 2255 static bool trans_MSR_i_PAN(DisasContext *s, arg_i *a) 2256 { 2257 if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) { 2258 return false; 2259 } 2260 if (a->imm & 1) { 2261 set_pstate_bits(PSTATE_PAN); 2262 } else { 2263 clear_pstate_bits(PSTATE_PAN); 2264 } 2265 gen_rebuild_hflags(s); 2266 s->base.is_jmp = DISAS_TOO_MANY; 2267 return true; 2268 } 2269 2270 static bool trans_MSR_i_SPSEL(DisasContext *s, arg_i *a) 2271 { 2272 if (s->current_el == 0) { 2273 return false; 2274 } 2275 gen_helper_msr_i_spsel(tcg_env, tcg_constant_i32(a->imm & PSTATE_SP)); 2276 s->base.is_jmp = DISAS_TOO_MANY; 2277 return true; 2278 } 2279 2280 static bool trans_MSR_i_SBSS(DisasContext *s, arg_i *a) 2281 { 2282 if (!dc_isar_feature(aa64_ssbs, s)) { 2283 return false; 2284 } 2285 if (a->imm & 1) { 2286 set_pstate_bits(PSTATE_SSBS); 2287 } else { 2288 clear_pstate_bits(PSTATE_SSBS); 2289 } 2290 /* Don't need to rebuild hflags since SSBS is a nop */ 2291 s->base.is_jmp = DISAS_TOO_MANY; 2292 return true; 2293 } 2294 2295 static bool trans_MSR_i_DIT(DisasContext *s, arg_i *a) 2296 { 2297 if (!dc_isar_feature(aa64_dit, s)) { 2298 return false; 2299 } 2300 if (a->imm & 1) { 2301 set_pstate_bits(PSTATE_DIT); 2302 } else { 2303 clear_pstate_bits(PSTATE_DIT); 2304 } 2305 /* There's no need to rebuild hflags because DIT is a nop */ 2306 s->base.is_jmp = DISAS_TOO_MANY; 2307 return true; 2308 } 2309 2310 static bool trans_MSR_i_TCO(DisasContext *s, arg_i *a) 2311 { 2312 if (dc_isar_feature(aa64_mte, s)) { 2313 /* Full MTE is enabled -- set the TCO bit as directed. */ 2314 if (a->imm & 1) { 2315 set_pstate_bits(PSTATE_TCO); 2316 } else { 2317 clear_pstate_bits(PSTATE_TCO); 2318 } 2319 gen_rebuild_hflags(s); 2320 /* Many factors, including TCO, go into MTE_ACTIVE. */ 2321 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 2322 return true; 2323 } else if (dc_isar_feature(aa64_mte_insn_reg, s)) { 2324 /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI. */ 2325 return true; 2326 } else { 2327 /* Insn not present */ 2328 return false; 2329 } 2330 } 2331 2332 static bool trans_MSR_i_DAIFSET(DisasContext *s, arg_i *a) 2333 { 2334 gen_helper_msr_i_daifset(tcg_env, tcg_constant_i32(a->imm)); 2335 s->base.is_jmp = DISAS_TOO_MANY; 2336 return true; 2337 } 2338 2339 static bool trans_MSR_i_DAIFCLEAR(DisasContext *s, arg_i *a) 2340 { 2341 gen_helper_msr_i_daifclear(tcg_env, tcg_constant_i32(a->imm)); 2342 /* Exit the cpu loop to re-evaluate pending IRQs. */ 2343 s->base.is_jmp = DISAS_UPDATE_EXIT; 2344 return true; 2345 } 2346 2347 static bool trans_MSR_i_ALLINT(DisasContext *s, arg_i *a) 2348 { 2349 if (!dc_isar_feature(aa64_nmi, s) || s->current_el == 0) { 2350 return false; 2351 } 2352 2353 if (a->imm == 0) { 2354 clear_pstate_bits(PSTATE_ALLINT); 2355 } else if (s->current_el > 1) { 2356 set_pstate_bits(PSTATE_ALLINT); 2357 } else { 2358 gen_helper_msr_set_allint_el1(tcg_env); 2359 } 2360 2361 /* Exit the cpu loop to re-evaluate pending IRQs. */ 2362 s->base.is_jmp = DISAS_UPDATE_EXIT; 2363 return true; 2364 } 2365 2366 static bool trans_MSR_i_SVCR(DisasContext *s, arg_MSR_i_SVCR *a) 2367 { 2368 if (!dc_isar_feature(aa64_sme, s) || a->mask == 0) { 2369 return false; 2370 } 2371 if (sme_access_check(s)) { 2372 int old = s->pstate_sm | (s->pstate_za << 1); 2373 int new = a->imm * 3; 2374 2375 if ((old ^ new) & a->mask) { 2376 /* At least one bit changes. */ 2377 gen_helper_set_svcr(tcg_env, tcg_constant_i32(new), 2378 tcg_constant_i32(a->mask)); 2379 s->base.is_jmp = DISAS_TOO_MANY; 2380 } 2381 } 2382 return true; 2383 } 2384 2385 static void gen_get_nzcv(TCGv_i64 tcg_rt) 2386 { 2387 TCGv_i32 tmp = tcg_temp_new_i32(); 2388 TCGv_i32 nzcv = tcg_temp_new_i32(); 2389 2390 /* build bit 31, N */ 2391 tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31)); 2392 /* build bit 30, Z */ 2393 tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0); 2394 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1); 2395 /* build bit 29, C */ 2396 tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1); 2397 /* build bit 28, V */ 2398 tcg_gen_shri_i32(tmp, cpu_VF, 31); 2399 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1); 2400 /* generate result */ 2401 tcg_gen_extu_i32_i64(tcg_rt, nzcv); 2402 } 2403 2404 static void gen_set_nzcv(TCGv_i64 tcg_rt) 2405 { 2406 TCGv_i32 nzcv = tcg_temp_new_i32(); 2407 2408 /* take NZCV from R[t] */ 2409 tcg_gen_extrl_i64_i32(nzcv, tcg_rt); 2410 2411 /* bit 31, N */ 2412 tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31)); 2413 /* bit 30, Z */ 2414 tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30)); 2415 tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0); 2416 /* bit 29, C */ 2417 tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29)); 2418 tcg_gen_shri_i32(cpu_CF, cpu_CF, 29); 2419 /* bit 28, V */ 2420 tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28)); 2421 tcg_gen_shli_i32(cpu_VF, cpu_VF, 3); 2422 } 2423 2424 static void gen_sysreg_undef(DisasContext *s, bool isread, 2425 uint8_t op0, uint8_t op1, uint8_t op2, 2426 uint8_t crn, uint8_t crm, uint8_t rt) 2427 { 2428 /* 2429 * Generate code to emit an UNDEF with correct syndrome 2430 * information for a failed system register access. 2431 * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases, 2432 * but if FEAT_IDST is implemented then read accesses to registers 2433 * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP 2434 * syndrome. 2435 */ 2436 uint32_t syndrome; 2437 2438 if (isread && dc_isar_feature(aa64_ids, s) && 2439 arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) { 2440 syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); 2441 } else { 2442 syndrome = syn_uncategorized(); 2443 } 2444 gen_exception_insn(s, 0, EXCP_UDEF, syndrome); 2445 } 2446 2447 /* MRS - move from system register 2448 * MSR (register) - move to system register 2449 * SYS 2450 * SYSL 2451 * These are all essentially the same insn in 'read' and 'write' 2452 * versions, with varying op0 fields. 2453 */ 2454 static void handle_sys(DisasContext *s, bool isread, 2455 unsigned int op0, unsigned int op1, unsigned int op2, 2456 unsigned int crn, unsigned int crm, unsigned int rt) 2457 { 2458 uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, 2459 crn, crm, op0, op1, op2); 2460 const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key); 2461 bool need_exit_tb = false; 2462 bool nv_trap_to_el2 = false; 2463 bool nv_redirect_reg = false; 2464 bool skip_fp_access_checks = false; 2465 bool nv2_mem_redirect = false; 2466 TCGv_ptr tcg_ri = NULL; 2467 TCGv_i64 tcg_rt; 2468 uint32_t syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); 2469 2470 if (crn == 11 || crn == 15) { 2471 /* 2472 * Check for TIDCP trap, which must take precedence over 2473 * the UNDEF for "no such register" etc. 2474 */ 2475 switch (s->current_el) { 2476 case 0: 2477 if (dc_isar_feature(aa64_tidcp1, s)) { 2478 gen_helper_tidcp_el0(tcg_env, tcg_constant_i32(syndrome)); 2479 } 2480 break; 2481 case 1: 2482 gen_helper_tidcp_el1(tcg_env, tcg_constant_i32(syndrome)); 2483 break; 2484 } 2485 } 2486 2487 if (!ri) { 2488 /* Unknown register; this might be a guest error or a QEMU 2489 * unimplemented feature. 2490 */ 2491 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 " 2492 "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n", 2493 isread ? "read" : "write", op0, op1, crn, crm, op2); 2494 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt); 2495 return; 2496 } 2497 2498 if (s->nv2 && ri->nv2_redirect_offset) { 2499 /* 2500 * Some registers always redirect to memory; some only do so if 2501 * HCR_EL2.NV1 is 0, and some only if NV1 is 1 (these come in 2502 * pairs which share an offset; see the table in R_CSRPQ). 2503 */ 2504 if (ri->nv2_redirect_offset & NV2_REDIR_NV1) { 2505 nv2_mem_redirect = s->nv1; 2506 } else if (ri->nv2_redirect_offset & NV2_REDIR_NO_NV1) { 2507 nv2_mem_redirect = !s->nv1; 2508 } else { 2509 nv2_mem_redirect = true; 2510 } 2511 } 2512 2513 /* Check access permissions */ 2514 if (!cp_access_ok(s->current_el, ri, isread)) { 2515 /* 2516 * FEAT_NV/NV2 handling does not do the usual FP access checks 2517 * for registers only accessible at EL2 (though it *does* do them 2518 * for registers accessible at EL1). 2519 */ 2520 skip_fp_access_checks = true; 2521 if (s->nv2 && (ri->type & ARM_CP_NV2_REDIRECT)) { 2522 /* 2523 * This is one of the few EL2 registers which should redirect 2524 * to the equivalent EL1 register. We do that after running 2525 * the EL2 register's accessfn. 2526 */ 2527 nv_redirect_reg = true; 2528 assert(!nv2_mem_redirect); 2529 } else if (nv2_mem_redirect) { 2530 /* 2531 * NV2 redirect-to-memory takes precedence over trap to EL2 or 2532 * UNDEF to EL1. 2533 */ 2534 } else if (s->nv && arm_cpreg_traps_in_nv(ri)) { 2535 /* 2536 * This register / instruction exists and is an EL2 register, so 2537 * we must trap to EL2 if accessed in nested virtualization EL1 2538 * instead of UNDEFing. We'll do that after the usual access checks. 2539 * (This makes a difference only for a couple of registers like 2540 * VSTTBR_EL2 where the "UNDEF if NonSecure" should take priority 2541 * over the trap-to-EL2. Most trapped-by-FEAT_NV registers have 2542 * an accessfn which does nothing when called from EL1, because 2543 * the trap-to-EL3 controls which would apply to that register 2544 * at EL2 don't take priority over the FEAT_NV trap-to-EL2.) 2545 */ 2546 nv_trap_to_el2 = true; 2547 } else { 2548 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt); 2549 return; 2550 } 2551 } 2552 2553 if (ri->accessfn || (ri->fgt && s->fgt_active)) { 2554 /* Emit code to perform further access permissions checks at 2555 * runtime; this may result in an exception. 2556 */ 2557 gen_a64_update_pc(s, 0); 2558 tcg_ri = tcg_temp_new_ptr(); 2559 gen_helper_access_check_cp_reg(tcg_ri, tcg_env, 2560 tcg_constant_i32(key), 2561 tcg_constant_i32(syndrome), 2562 tcg_constant_i32(isread)); 2563 } else if (ri->type & ARM_CP_RAISES_EXC) { 2564 /* 2565 * The readfn or writefn might raise an exception; 2566 * synchronize the CPU state in case it does. 2567 */ 2568 gen_a64_update_pc(s, 0); 2569 } 2570 2571 if (!skip_fp_access_checks) { 2572 if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) { 2573 return; 2574 } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) { 2575 return; 2576 } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) { 2577 return; 2578 } 2579 } 2580 2581 if (nv_trap_to_el2) { 2582 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2); 2583 return; 2584 } 2585 2586 if (nv_redirect_reg) { 2587 /* 2588 * FEAT_NV2 redirection of an EL2 register to an EL1 register. 2589 * Conveniently in all cases the encoding of the EL1 register is 2590 * identical to the EL2 register except that opc1 is 0. 2591 * Get the reginfo for the EL1 register to use for the actual access. 2592 * We don't use the EL1 register's access function, and 2593 * fine-grained-traps on EL1 also do not apply here. 2594 */ 2595 key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, 2596 crn, crm, op0, 0, op2); 2597 ri = get_arm_cp_reginfo(s->cp_regs, key); 2598 assert(ri); 2599 assert(cp_access_ok(s->current_el, ri, isread)); 2600 /* 2601 * We might not have done an update_pc earlier, so check we don't 2602 * need it. We could support this in future if necessary. 2603 */ 2604 assert(!(ri->type & ARM_CP_RAISES_EXC)); 2605 } 2606 2607 if (nv2_mem_redirect) { 2608 /* 2609 * This system register is being redirected into an EL2 memory access. 2610 * This means it is not an IO operation, doesn't change hflags, 2611 * and need not end the TB, because it has no side effects. 2612 * 2613 * The access is 64-bit single copy atomic, guaranteed aligned because 2614 * of the definition of VCNR_EL2. Its endianness depends on 2615 * SCTLR_EL2.EE, not on the data endianness of EL1. 2616 * It is done under either the EL2 translation regime or the EL2&0 2617 * translation regime, depending on HCR_EL2.E2H. It behaves as if 2618 * PSTATE.PAN is 0. 2619 */ 2620 TCGv_i64 ptr = tcg_temp_new_i64(); 2621 MemOp mop = MO_64 | MO_ALIGN | MO_ATOM_IFALIGN; 2622 ARMMMUIdx armmemidx = s->nv2_mem_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_E2; 2623 int memidx = arm_to_core_mmu_idx(armmemidx); 2624 uint32_t syn; 2625 2626 mop |= (s->nv2_mem_be ? MO_BE : MO_LE); 2627 2628 tcg_gen_ld_i64(ptr, tcg_env, offsetof(CPUARMState, cp15.vncr_el2)); 2629 tcg_gen_addi_i64(ptr, ptr, 2630 (ri->nv2_redirect_offset & ~NV2_REDIR_FLAG_MASK)); 2631 tcg_rt = cpu_reg(s, rt); 2632 2633 syn = syn_data_abort_vncr(0, !isread, 0); 2634 disas_set_insn_syndrome(s, syn); 2635 if (isread) { 2636 tcg_gen_qemu_ld_i64(tcg_rt, ptr, memidx, mop); 2637 } else { 2638 tcg_gen_qemu_st_i64(tcg_rt, ptr, memidx, mop); 2639 } 2640 return; 2641 } 2642 2643 /* Handle special cases first */ 2644 switch (ri->type & ARM_CP_SPECIAL_MASK) { 2645 case 0: 2646 break; 2647 case ARM_CP_NOP: 2648 return; 2649 case ARM_CP_NZCV: 2650 tcg_rt = cpu_reg(s, rt); 2651 if (isread) { 2652 gen_get_nzcv(tcg_rt); 2653 } else { 2654 gen_set_nzcv(tcg_rt); 2655 } 2656 return; 2657 case ARM_CP_CURRENTEL: 2658 { 2659 /* 2660 * Reads as current EL value from pstate, which is 2661 * guaranteed to be constant by the tb flags. 2662 * For nested virt we should report EL2. 2663 */ 2664 int el = s->nv ? 2 : s->current_el; 2665 tcg_rt = cpu_reg(s, rt); 2666 tcg_gen_movi_i64(tcg_rt, el << 2); 2667 return; 2668 } 2669 case ARM_CP_DC_ZVA: 2670 /* Writes clear the aligned block of memory which rt points into. */ 2671 if (s->mte_active[0]) { 2672 int desc = 0; 2673 2674 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 2675 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 2676 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 2677 2678 tcg_rt = tcg_temp_new_i64(); 2679 gen_helper_mte_check_zva(tcg_rt, tcg_env, 2680 tcg_constant_i32(desc), cpu_reg(s, rt)); 2681 } else { 2682 tcg_rt = clean_data_tbi(s, cpu_reg(s, rt)); 2683 } 2684 gen_helper_dc_zva(tcg_env, tcg_rt); 2685 return; 2686 case ARM_CP_DC_GVA: 2687 { 2688 TCGv_i64 clean_addr, tag; 2689 2690 /* 2691 * DC_GVA, like DC_ZVA, requires that we supply the original 2692 * pointer for an invalid page. Probe that address first. 2693 */ 2694 tcg_rt = cpu_reg(s, rt); 2695 clean_addr = clean_data_tbi(s, tcg_rt); 2696 gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8); 2697 2698 if (s->ata[0]) { 2699 /* Extract the tag from the register to match STZGM. */ 2700 tag = tcg_temp_new_i64(); 2701 tcg_gen_shri_i64(tag, tcg_rt, 56); 2702 gen_helper_stzgm_tags(tcg_env, clean_addr, tag); 2703 } 2704 } 2705 return; 2706 case ARM_CP_DC_GZVA: 2707 { 2708 TCGv_i64 clean_addr, tag; 2709 2710 /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */ 2711 tcg_rt = cpu_reg(s, rt); 2712 clean_addr = clean_data_tbi(s, tcg_rt); 2713 gen_helper_dc_zva(tcg_env, clean_addr); 2714 2715 if (s->ata[0]) { 2716 /* Extract the tag from the register to match STZGM. */ 2717 tag = tcg_temp_new_i64(); 2718 tcg_gen_shri_i64(tag, tcg_rt, 56); 2719 gen_helper_stzgm_tags(tcg_env, clean_addr, tag); 2720 } 2721 } 2722 return; 2723 default: 2724 g_assert_not_reached(); 2725 } 2726 2727 if (ri->type & ARM_CP_IO) { 2728 /* I/O operations must end the TB here (whether read or write) */ 2729 need_exit_tb = translator_io_start(&s->base); 2730 } 2731 2732 tcg_rt = cpu_reg(s, rt); 2733 2734 if (isread) { 2735 if (ri->type & ARM_CP_CONST) { 2736 tcg_gen_movi_i64(tcg_rt, ri->resetvalue); 2737 } else if (ri->readfn) { 2738 if (!tcg_ri) { 2739 tcg_ri = gen_lookup_cp_reg(key); 2740 } 2741 gen_helper_get_cp_reg64(tcg_rt, tcg_env, tcg_ri); 2742 } else { 2743 tcg_gen_ld_i64(tcg_rt, tcg_env, ri->fieldoffset); 2744 } 2745 } else { 2746 if (ri->type & ARM_CP_CONST) { 2747 /* If not forbidden by access permissions, treat as WI */ 2748 return; 2749 } else if (ri->writefn) { 2750 if (!tcg_ri) { 2751 tcg_ri = gen_lookup_cp_reg(key); 2752 } 2753 gen_helper_set_cp_reg64(tcg_env, tcg_ri, tcg_rt); 2754 } else { 2755 tcg_gen_st_i64(tcg_rt, tcg_env, ri->fieldoffset); 2756 } 2757 } 2758 2759 if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) { 2760 /* 2761 * A write to any coprocessor register that ends a TB 2762 * must rebuild the hflags for the next TB. 2763 */ 2764 gen_rebuild_hflags(s); 2765 /* 2766 * We default to ending the TB on a coprocessor register write, 2767 * but allow this to be suppressed by the register definition 2768 * (usually only necessary to work around guest bugs). 2769 */ 2770 need_exit_tb = true; 2771 } 2772 if (need_exit_tb) { 2773 s->base.is_jmp = DISAS_UPDATE_EXIT; 2774 } 2775 } 2776 2777 static bool trans_SYS(DisasContext *s, arg_SYS *a) 2778 { 2779 handle_sys(s, a->l, a->op0, a->op1, a->op2, a->crn, a->crm, a->rt); 2780 return true; 2781 } 2782 2783 static bool trans_SVC(DisasContext *s, arg_i *a) 2784 { 2785 /* 2786 * For SVC, HVC and SMC we advance the single-step state 2787 * machine before taking the exception. This is architecturally 2788 * mandated, to ensure that single-stepping a system call 2789 * instruction works properly. 2790 */ 2791 uint32_t syndrome = syn_aa64_svc(a->imm); 2792 if (s->fgt_svc) { 2793 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2); 2794 return true; 2795 } 2796 gen_ss_advance(s); 2797 gen_exception_insn(s, 4, EXCP_SWI, syndrome); 2798 return true; 2799 } 2800 2801 static bool trans_HVC(DisasContext *s, arg_i *a) 2802 { 2803 int target_el = s->current_el == 3 ? 3 : 2; 2804 2805 if (s->current_el == 0) { 2806 unallocated_encoding(s); 2807 return true; 2808 } 2809 /* 2810 * The pre HVC helper handles cases when HVC gets trapped 2811 * as an undefined insn by runtime configuration. 2812 */ 2813 gen_a64_update_pc(s, 0); 2814 gen_helper_pre_hvc(tcg_env); 2815 /* Architecture requires ss advance before we do the actual work */ 2816 gen_ss_advance(s); 2817 gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(a->imm), target_el); 2818 return true; 2819 } 2820 2821 static bool trans_SMC(DisasContext *s, arg_i *a) 2822 { 2823 if (s->current_el == 0) { 2824 unallocated_encoding(s); 2825 return true; 2826 } 2827 gen_a64_update_pc(s, 0); 2828 gen_helper_pre_smc(tcg_env, tcg_constant_i32(syn_aa64_smc(a->imm))); 2829 /* Architecture requires ss advance before we do the actual work */ 2830 gen_ss_advance(s); 2831 gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(a->imm), 3); 2832 return true; 2833 } 2834 2835 static bool trans_BRK(DisasContext *s, arg_i *a) 2836 { 2837 gen_exception_bkpt_insn(s, syn_aa64_bkpt(a->imm)); 2838 return true; 2839 } 2840 2841 static bool trans_HLT(DisasContext *s, arg_i *a) 2842 { 2843 /* 2844 * HLT. This has two purposes. 2845 * Architecturally, it is an external halting debug instruction. 2846 * Since QEMU doesn't implement external debug, we treat this as 2847 * it is required for halting debug disabled: it will UNDEF. 2848 * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction. 2849 */ 2850 if (semihosting_enabled(s->current_el == 0) && a->imm == 0xf000) { 2851 gen_exception_internal_insn(s, EXCP_SEMIHOST); 2852 } else { 2853 unallocated_encoding(s); 2854 } 2855 return true; 2856 } 2857 2858 /* 2859 * Load/Store exclusive instructions are implemented by remembering 2860 * the value/address loaded, and seeing if these are the same 2861 * when the store is performed. This is not actually the architecturally 2862 * mandated semantics, but it works for typical guest code sequences 2863 * and avoids having to monitor regular stores. 2864 * 2865 * The store exclusive uses the atomic cmpxchg primitives to avoid 2866 * races in multi-threaded linux-user and when MTTCG softmmu is 2867 * enabled. 2868 */ 2869 static void gen_load_exclusive(DisasContext *s, int rt, int rt2, int rn, 2870 int size, bool is_pair) 2871 { 2872 int idx = get_mem_index(s); 2873 TCGv_i64 dirty_addr, clean_addr; 2874 MemOp memop = check_atomic_align(s, rn, size + is_pair); 2875 2876 s->is_ldex = true; 2877 dirty_addr = cpu_reg_sp(s, rn); 2878 clean_addr = gen_mte_check1(s, dirty_addr, false, rn != 31, memop); 2879 2880 g_assert(size <= 3); 2881 if (is_pair) { 2882 g_assert(size >= 2); 2883 if (size == 2) { 2884 tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop); 2885 if (s->be_data == MO_LE) { 2886 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32); 2887 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32); 2888 } else { 2889 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32); 2890 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32); 2891 } 2892 } else { 2893 TCGv_i128 t16 = tcg_temp_new_i128(); 2894 2895 tcg_gen_qemu_ld_i128(t16, clean_addr, idx, memop); 2896 2897 if (s->be_data == MO_LE) { 2898 tcg_gen_extr_i128_i64(cpu_exclusive_val, 2899 cpu_exclusive_high, t16); 2900 } else { 2901 tcg_gen_extr_i128_i64(cpu_exclusive_high, 2902 cpu_exclusive_val, t16); 2903 } 2904 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); 2905 tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high); 2906 } 2907 } else { 2908 tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop); 2909 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); 2910 } 2911 tcg_gen_mov_i64(cpu_exclusive_addr, clean_addr); 2912 } 2913 2914 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, 2915 int rn, int size, int is_pair) 2916 { 2917 /* if (env->exclusive_addr == addr && env->exclusive_val == [addr] 2918 * && (!is_pair || env->exclusive_high == [addr + datasize])) { 2919 * [addr] = {Rt}; 2920 * if (is_pair) { 2921 * [addr + datasize] = {Rt2}; 2922 * } 2923 * {Rd} = 0; 2924 * } else { 2925 * {Rd} = 1; 2926 * } 2927 * env->exclusive_addr = -1; 2928 */ 2929 TCGLabel *fail_label = gen_new_label(); 2930 TCGLabel *done_label = gen_new_label(); 2931 TCGv_i64 tmp, clean_addr; 2932 MemOp memop; 2933 2934 /* 2935 * FIXME: We are out of spec here. We have recorded only the address 2936 * from load_exclusive, not the entire range, and we assume that the 2937 * size of the access on both sides match. The architecture allows the 2938 * store to be smaller than the load, so long as the stored bytes are 2939 * within the range recorded by the load. 2940 */ 2941 2942 /* See AArch64.ExclusiveMonitorsPass() and AArch64.IsExclusiveVA(). */ 2943 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); 2944 tcg_gen_brcond_i64(TCG_COND_NE, clean_addr, cpu_exclusive_addr, fail_label); 2945 2946 /* 2947 * The write, and any associated faults, only happen if the virtual 2948 * and physical addresses pass the exclusive monitor check. These 2949 * faults are exceedingly unlikely, because normally the guest uses 2950 * the exact same address register for the load_exclusive, and we 2951 * would have recognized these faults there. 2952 * 2953 * It is possible to trigger an alignment fault pre-LSE2, e.g. with an 2954 * unaligned 4-byte write within the range of an aligned 8-byte load. 2955 * With LSE2, the store would need to cross a 16-byte boundary when the 2956 * load did not, which would mean the store is outside the range 2957 * recorded for the monitor, which would have failed a corrected monitor 2958 * check above. For now, we assume no size change and retain the 2959 * MO_ALIGN to let tcg know what we checked in the load_exclusive. 2960 * 2961 * It is possible to trigger an MTE fault, by performing the load with 2962 * a virtual address with a valid tag and performing the store with the 2963 * same virtual address and a different invalid tag. 2964 */ 2965 memop = size + is_pair; 2966 if (memop == MO_128 || !dc_isar_feature(aa64_lse2, s)) { 2967 memop |= MO_ALIGN; 2968 } 2969 memop = finalize_memop(s, memop); 2970 gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); 2971 2972 tmp = tcg_temp_new_i64(); 2973 if (is_pair) { 2974 if (size == 2) { 2975 if (s->be_data == MO_LE) { 2976 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2)); 2977 } else { 2978 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt)); 2979 } 2980 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, 2981 cpu_exclusive_val, tmp, 2982 get_mem_index(s), memop); 2983 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); 2984 } else { 2985 TCGv_i128 t16 = tcg_temp_new_i128(); 2986 TCGv_i128 c16 = tcg_temp_new_i128(); 2987 TCGv_i64 a, b; 2988 2989 if (s->be_data == MO_LE) { 2990 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2)); 2991 tcg_gen_concat_i64_i128(c16, cpu_exclusive_val, 2992 cpu_exclusive_high); 2993 } else { 2994 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt)); 2995 tcg_gen_concat_i64_i128(c16, cpu_exclusive_high, 2996 cpu_exclusive_val); 2997 } 2998 2999 tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16, 3000 get_mem_index(s), memop); 3001 3002 a = tcg_temp_new_i64(); 3003 b = tcg_temp_new_i64(); 3004 if (s->be_data == MO_LE) { 3005 tcg_gen_extr_i128_i64(a, b, t16); 3006 } else { 3007 tcg_gen_extr_i128_i64(b, a, t16); 3008 } 3009 3010 tcg_gen_xor_i64(a, a, cpu_exclusive_val); 3011 tcg_gen_xor_i64(b, b, cpu_exclusive_high); 3012 tcg_gen_or_i64(tmp, a, b); 3013 3014 tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0); 3015 } 3016 } else { 3017 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val, 3018 cpu_reg(s, rt), get_mem_index(s), memop); 3019 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); 3020 } 3021 tcg_gen_mov_i64(cpu_reg(s, rd), tmp); 3022 tcg_gen_br(done_label); 3023 3024 gen_set_label(fail_label); 3025 tcg_gen_movi_i64(cpu_reg(s, rd), 1); 3026 gen_set_label(done_label); 3027 tcg_gen_movi_i64(cpu_exclusive_addr, -1); 3028 } 3029 3030 static void gen_compare_and_swap(DisasContext *s, int rs, int rt, 3031 int rn, int size) 3032 { 3033 TCGv_i64 tcg_rs = cpu_reg(s, rs); 3034 TCGv_i64 tcg_rt = cpu_reg(s, rt); 3035 int memidx = get_mem_index(s); 3036 TCGv_i64 clean_addr; 3037 MemOp memop; 3038 3039 if (rn == 31) { 3040 gen_check_sp_alignment(s); 3041 } 3042 memop = check_atomic_align(s, rn, size); 3043 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); 3044 tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, 3045 memidx, memop); 3046 } 3047 3048 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, 3049 int rn, int size) 3050 { 3051 TCGv_i64 s1 = cpu_reg(s, rs); 3052 TCGv_i64 s2 = cpu_reg(s, rs + 1); 3053 TCGv_i64 t1 = cpu_reg(s, rt); 3054 TCGv_i64 t2 = cpu_reg(s, rt + 1); 3055 TCGv_i64 clean_addr; 3056 int memidx = get_mem_index(s); 3057 MemOp memop; 3058 3059 if (rn == 31) { 3060 gen_check_sp_alignment(s); 3061 } 3062 3063 /* This is a single atomic access, despite the "pair". */ 3064 memop = check_atomic_align(s, rn, size + 1); 3065 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); 3066 3067 if (size == 2) { 3068 TCGv_i64 cmp = tcg_temp_new_i64(); 3069 TCGv_i64 val = tcg_temp_new_i64(); 3070 3071 if (s->be_data == MO_LE) { 3072 tcg_gen_concat32_i64(val, t1, t2); 3073 tcg_gen_concat32_i64(cmp, s1, s2); 3074 } else { 3075 tcg_gen_concat32_i64(val, t2, t1); 3076 tcg_gen_concat32_i64(cmp, s2, s1); 3077 } 3078 3079 tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx, memop); 3080 3081 if (s->be_data == MO_LE) { 3082 tcg_gen_extr32_i64(s1, s2, cmp); 3083 } else { 3084 tcg_gen_extr32_i64(s2, s1, cmp); 3085 } 3086 } else { 3087 TCGv_i128 cmp = tcg_temp_new_i128(); 3088 TCGv_i128 val = tcg_temp_new_i128(); 3089 3090 if (s->be_data == MO_LE) { 3091 tcg_gen_concat_i64_i128(val, t1, t2); 3092 tcg_gen_concat_i64_i128(cmp, s1, s2); 3093 } else { 3094 tcg_gen_concat_i64_i128(val, t2, t1); 3095 tcg_gen_concat_i64_i128(cmp, s2, s1); 3096 } 3097 3098 tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx, memop); 3099 3100 if (s->be_data == MO_LE) { 3101 tcg_gen_extr_i128_i64(s1, s2, cmp); 3102 } else { 3103 tcg_gen_extr_i128_i64(s2, s1, cmp); 3104 } 3105 } 3106 } 3107 3108 /* 3109 * Compute the ISS.SF bit for syndrome information if an exception 3110 * is taken on a load or store. This indicates whether the instruction 3111 * is accessing a 32-bit or 64-bit register. This logic is derived 3112 * from the ARMv8 specs for LDR (Shared decode for all encodings). 3113 */ 3114 static bool ldst_iss_sf(int size, bool sign, bool ext) 3115 { 3116 3117 if (sign) { 3118 /* 3119 * Signed loads are 64 bit results if we are not going to 3120 * do a zero-extend from 32 to 64 after the load. 3121 * (For a store, sign and ext are always false.) 3122 */ 3123 return !ext; 3124 } else { 3125 /* Unsigned loads/stores work at the specified size */ 3126 return size == MO_64; 3127 } 3128 } 3129 3130 static bool trans_STXR(DisasContext *s, arg_stxr *a) 3131 { 3132 if (a->rn == 31) { 3133 gen_check_sp_alignment(s); 3134 } 3135 if (a->lasr) { 3136 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 3137 } 3138 gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, false); 3139 return true; 3140 } 3141 3142 static bool trans_LDXR(DisasContext *s, arg_stxr *a) 3143 { 3144 if (a->rn == 31) { 3145 gen_check_sp_alignment(s); 3146 } 3147 gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, false); 3148 if (a->lasr) { 3149 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3150 } 3151 return true; 3152 } 3153 3154 static bool trans_STLR(DisasContext *s, arg_stlr *a) 3155 { 3156 TCGv_i64 clean_addr; 3157 MemOp memop; 3158 bool iss_sf = ldst_iss_sf(a->sz, false, false); 3159 3160 /* 3161 * StoreLORelease is the same as Store-Release for QEMU, but 3162 * needs the feature-test. 3163 */ 3164 if (!a->lasr && !dc_isar_feature(aa64_lor, s)) { 3165 return false; 3166 } 3167 /* Generate ISS for non-exclusive accesses including LASR. */ 3168 if (a->rn == 31) { 3169 gen_check_sp_alignment(s); 3170 } 3171 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 3172 memop = check_ordered_align(s, a->rn, 0, true, a->sz); 3173 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), 3174 true, a->rn != 31, memop); 3175 do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, memop, true, a->rt, 3176 iss_sf, a->lasr); 3177 return true; 3178 } 3179 3180 static bool trans_LDAR(DisasContext *s, arg_stlr *a) 3181 { 3182 TCGv_i64 clean_addr; 3183 MemOp memop; 3184 bool iss_sf = ldst_iss_sf(a->sz, false, false); 3185 3186 /* LoadLOAcquire is the same as Load-Acquire for QEMU. */ 3187 if (!a->lasr && !dc_isar_feature(aa64_lor, s)) { 3188 return false; 3189 } 3190 /* Generate ISS for non-exclusive accesses including LASR. */ 3191 if (a->rn == 31) { 3192 gen_check_sp_alignment(s); 3193 } 3194 memop = check_ordered_align(s, a->rn, 0, false, a->sz); 3195 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), 3196 false, a->rn != 31, memop); 3197 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, memop, false, true, 3198 a->rt, iss_sf, a->lasr); 3199 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3200 return true; 3201 } 3202 3203 static bool trans_STXP(DisasContext *s, arg_stxr *a) 3204 { 3205 if (a->rn == 31) { 3206 gen_check_sp_alignment(s); 3207 } 3208 if (a->lasr) { 3209 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 3210 } 3211 gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, true); 3212 return true; 3213 } 3214 3215 static bool trans_LDXP(DisasContext *s, arg_stxr *a) 3216 { 3217 if (a->rn == 31) { 3218 gen_check_sp_alignment(s); 3219 } 3220 gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, true); 3221 if (a->lasr) { 3222 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3223 } 3224 return true; 3225 } 3226 3227 static bool trans_CASP(DisasContext *s, arg_CASP *a) 3228 { 3229 if (!dc_isar_feature(aa64_atomics, s)) { 3230 return false; 3231 } 3232 if (((a->rt | a->rs) & 1) != 0) { 3233 return false; 3234 } 3235 3236 gen_compare_and_swap_pair(s, a->rs, a->rt, a->rn, a->sz); 3237 return true; 3238 } 3239 3240 static bool trans_CAS(DisasContext *s, arg_CAS *a) 3241 { 3242 if (!dc_isar_feature(aa64_atomics, s)) { 3243 return false; 3244 } 3245 gen_compare_and_swap(s, a->rs, a->rt, a->rn, a->sz); 3246 return true; 3247 } 3248 3249 static bool trans_LD_lit(DisasContext *s, arg_ldlit *a) 3250 { 3251 bool iss_sf = ldst_iss_sf(a->sz, a->sign, false); 3252 TCGv_i64 tcg_rt = cpu_reg(s, a->rt); 3253 TCGv_i64 clean_addr = tcg_temp_new_i64(); 3254 MemOp memop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3255 3256 gen_pc_plus_diff(s, clean_addr, a->imm); 3257 do_gpr_ld(s, tcg_rt, clean_addr, memop, 3258 false, true, a->rt, iss_sf, false); 3259 return true; 3260 } 3261 3262 static bool trans_LD_lit_v(DisasContext *s, arg_ldlit *a) 3263 { 3264 /* Load register (literal), vector version */ 3265 TCGv_i64 clean_addr; 3266 MemOp memop; 3267 3268 if (!fp_access_check(s)) { 3269 return true; 3270 } 3271 memop = finalize_memop_asimd(s, a->sz); 3272 clean_addr = tcg_temp_new_i64(); 3273 gen_pc_plus_diff(s, clean_addr, a->imm); 3274 do_fp_ld(s, a->rt, clean_addr, memop); 3275 return true; 3276 } 3277 3278 static void op_addr_ldstpair_pre(DisasContext *s, arg_ldstpair *a, 3279 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, 3280 uint64_t offset, bool is_store, MemOp mop) 3281 { 3282 if (a->rn == 31) { 3283 gen_check_sp_alignment(s); 3284 } 3285 3286 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3287 if (!a->p) { 3288 tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset); 3289 } 3290 3291 *clean_addr = gen_mte_checkN(s, *dirty_addr, is_store, 3292 (a->w || a->rn != 31), 2 << a->sz, mop); 3293 } 3294 3295 static void op_addr_ldstpair_post(DisasContext *s, arg_ldstpair *a, 3296 TCGv_i64 dirty_addr, uint64_t offset) 3297 { 3298 if (a->w) { 3299 if (a->p) { 3300 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3301 } 3302 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); 3303 } 3304 } 3305 3306 static bool trans_STP(DisasContext *s, arg_ldstpair *a) 3307 { 3308 uint64_t offset = a->imm << a->sz; 3309 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; 3310 MemOp mop = finalize_memop(s, a->sz); 3311 3312 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop); 3313 tcg_rt = cpu_reg(s, a->rt); 3314 tcg_rt2 = cpu_reg(s, a->rt2); 3315 /* 3316 * We built mop above for the single logical access -- rebuild it 3317 * now for the paired operation. 3318 * 3319 * With LSE2, non-sign-extending pairs are treated atomically if 3320 * aligned, and if unaligned one of the pair will be completely 3321 * within a 16-byte block and that element will be atomic. 3322 * Otherwise each element is separately atomic. 3323 * In all cases, issue one operation with the correct atomicity. 3324 */ 3325 mop = a->sz + 1; 3326 if (s->align_mem) { 3327 mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8); 3328 } 3329 mop = finalize_memop_pair(s, mop); 3330 if (a->sz == 2) { 3331 TCGv_i64 tmp = tcg_temp_new_i64(); 3332 3333 if (s->be_data == MO_LE) { 3334 tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2); 3335 } else { 3336 tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt); 3337 } 3338 tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop); 3339 } else { 3340 TCGv_i128 tmp = tcg_temp_new_i128(); 3341 3342 if (s->be_data == MO_LE) { 3343 tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2); 3344 } else { 3345 tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt); 3346 } 3347 tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop); 3348 } 3349 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3350 return true; 3351 } 3352 3353 static bool trans_LDP(DisasContext *s, arg_ldstpair *a) 3354 { 3355 uint64_t offset = a->imm << a->sz; 3356 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; 3357 MemOp mop = finalize_memop(s, a->sz); 3358 3359 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop); 3360 tcg_rt = cpu_reg(s, a->rt); 3361 tcg_rt2 = cpu_reg(s, a->rt2); 3362 3363 /* 3364 * We built mop above for the single logical access -- rebuild it 3365 * now for the paired operation. 3366 * 3367 * With LSE2, non-sign-extending pairs are treated atomically if 3368 * aligned, and if unaligned one of the pair will be completely 3369 * within a 16-byte block and that element will be atomic. 3370 * Otherwise each element is separately atomic. 3371 * In all cases, issue one operation with the correct atomicity. 3372 * 3373 * This treats sign-extending loads like zero-extending loads, 3374 * since that reuses the most code below. 3375 */ 3376 mop = a->sz + 1; 3377 if (s->align_mem) { 3378 mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8); 3379 } 3380 mop = finalize_memop_pair(s, mop); 3381 if (a->sz == 2) { 3382 int o2 = s->be_data == MO_LE ? 32 : 0; 3383 int o1 = o2 ^ 32; 3384 3385 tcg_gen_qemu_ld_i64(tcg_rt, clean_addr, get_mem_index(s), mop); 3386 if (a->sign) { 3387 tcg_gen_sextract_i64(tcg_rt2, tcg_rt, o2, 32); 3388 tcg_gen_sextract_i64(tcg_rt, tcg_rt, o1, 32); 3389 } else { 3390 tcg_gen_extract_i64(tcg_rt2, tcg_rt, o2, 32); 3391 tcg_gen_extract_i64(tcg_rt, tcg_rt, o1, 32); 3392 } 3393 } else { 3394 TCGv_i128 tmp = tcg_temp_new_i128(); 3395 3396 tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop); 3397 if (s->be_data == MO_LE) { 3398 tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp); 3399 } else { 3400 tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp); 3401 } 3402 } 3403 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3404 return true; 3405 } 3406 3407 static bool trans_STP_v(DisasContext *s, arg_ldstpair *a) 3408 { 3409 uint64_t offset = a->imm << a->sz; 3410 TCGv_i64 clean_addr, dirty_addr; 3411 MemOp mop; 3412 3413 if (!fp_access_check(s)) { 3414 return true; 3415 } 3416 3417 /* LSE2 does not merge FP pairs; leave these as separate operations. */ 3418 mop = finalize_memop_asimd(s, a->sz); 3419 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop); 3420 do_fp_st(s, a->rt, clean_addr, mop); 3421 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz); 3422 do_fp_st(s, a->rt2, clean_addr, mop); 3423 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3424 return true; 3425 } 3426 3427 static bool trans_LDP_v(DisasContext *s, arg_ldstpair *a) 3428 { 3429 uint64_t offset = a->imm << a->sz; 3430 TCGv_i64 clean_addr, dirty_addr; 3431 MemOp mop; 3432 3433 if (!fp_access_check(s)) { 3434 return true; 3435 } 3436 3437 /* LSE2 does not merge FP pairs; leave these as separate operations. */ 3438 mop = finalize_memop_asimd(s, a->sz); 3439 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop); 3440 do_fp_ld(s, a->rt, clean_addr, mop); 3441 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz); 3442 do_fp_ld(s, a->rt2, clean_addr, mop); 3443 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3444 return true; 3445 } 3446 3447 static bool trans_STGP(DisasContext *s, arg_ldstpair *a) 3448 { 3449 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; 3450 uint64_t offset = a->imm << LOG2_TAG_GRANULE; 3451 MemOp mop; 3452 TCGv_i128 tmp; 3453 3454 /* STGP only comes in one size. */ 3455 tcg_debug_assert(a->sz == MO_64); 3456 3457 if (!dc_isar_feature(aa64_mte_insn_reg, s)) { 3458 return false; 3459 } 3460 3461 if (a->rn == 31) { 3462 gen_check_sp_alignment(s); 3463 } 3464 3465 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3466 if (!a->p) { 3467 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3468 } 3469 3470 clean_addr = clean_data_tbi(s, dirty_addr); 3471 tcg_rt = cpu_reg(s, a->rt); 3472 tcg_rt2 = cpu_reg(s, a->rt2); 3473 3474 /* 3475 * STGP is defined as two 8-byte memory operations, aligned to TAG_GRANULE, 3476 * and one tag operation. We implement it as one single aligned 16-byte 3477 * memory operation for convenience. Note that the alignment ensures 3478 * MO_ATOM_IFALIGN_PAIR produces 8-byte atomicity for the memory store. 3479 */ 3480 mop = finalize_memop_atom(s, MO_128 | MO_ALIGN, MO_ATOM_IFALIGN_PAIR); 3481 3482 tmp = tcg_temp_new_i128(); 3483 if (s->be_data == MO_LE) { 3484 tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2); 3485 } else { 3486 tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt); 3487 } 3488 tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop); 3489 3490 /* Perform the tag store, if tag access enabled. */ 3491 if (s->ata[0]) { 3492 if (tb_cflags(s->base.tb) & CF_PARALLEL) { 3493 gen_helper_stg_parallel(tcg_env, dirty_addr, dirty_addr); 3494 } else { 3495 gen_helper_stg(tcg_env, dirty_addr, dirty_addr); 3496 } 3497 } 3498 3499 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3500 return true; 3501 } 3502 3503 static void op_addr_ldst_imm_pre(DisasContext *s, arg_ldst_imm *a, 3504 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, 3505 uint64_t offset, bool is_store, MemOp mop) 3506 { 3507 int memidx; 3508 3509 if (a->rn == 31) { 3510 gen_check_sp_alignment(s); 3511 } 3512 3513 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3514 if (!a->p) { 3515 tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset); 3516 } 3517 memidx = get_a64_user_mem_index(s, a->unpriv); 3518 *clean_addr = gen_mte_check1_mmuidx(s, *dirty_addr, is_store, 3519 a->w || a->rn != 31, 3520 mop, a->unpriv, memidx); 3521 } 3522 3523 static void op_addr_ldst_imm_post(DisasContext *s, arg_ldst_imm *a, 3524 TCGv_i64 dirty_addr, uint64_t offset) 3525 { 3526 if (a->w) { 3527 if (a->p) { 3528 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3529 } 3530 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); 3531 } 3532 } 3533 3534 static bool trans_STR_i(DisasContext *s, arg_ldst_imm *a) 3535 { 3536 bool iss_sf, iss_valid = !a->w; 3537 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3538 int memidx = get_a64_user_mem_index(s, a->unpriv); 3539 MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3540 3541 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop); 3542 3543 tcg_rt = cpu_reg(s, a->rt); 3544 iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3545 3546 do_gpr_st_memidx(s, tcg_rt, clean_addr, mop, memidx, 3547 iss_valid, a->rt, iss_sf, false); 3548 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3549 return true; 3550 } 3551 3552 static bool trans_LDR_i(DisasContext *s, arg_ldst_imm *a) 3553 { 3554 bool iss_sf, iss_valid = !a->w; 3555 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3556 int memidx = get_a64_user_mem_index(s, a->unpriv); 3557 MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3558 3559 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop); 3560 3561 tcg_rt = cpu_reg(s, a->rt); 3562 iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3563 3564 do_gpr_ld_memidx(s, tcg_rt, clean_addr, mop, 3565 a->ext, memidx, iss_valid, a->rt, iss_sf, false); 3566 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3567 return true; 3568 } 3569 3570 static bool trans_STR_v_i(DisasContext *s, arg_ldst_imm *a) 3571 { 3572 TCGv_i64 clean_addr, dirty_addr; 3573 MemOp mop; 3574 3575 if (!fp_access_check(s)) { 3576 return true; 3577 } 3578 mop = finalize_memop_asimd(s, a->sz); 3579 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop); 3580 do_fp_st(s, a->rt, clean_addr, mop); 3581 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3582 return true; 3583 } 3584 3585 static bool trans_LDR_v_i(DisasContext *s, arg_ldst_imm *a) 3586 { 3587 TCGv_i64 clean_addr, dirty_addr; 3588 MemOp mop; 3589 3590 if (!fp_access_check(s)) { 3591 return true; 3592 } 3593 mop = finalize_memop_asimd(s, a->sz); 3594 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop); 3595 do_fp_ld(s, a->rt, clean_addr, mop); 3596 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3597 return true; 3598 } 3599 3600 static void op_addr_ldst_pre(DisasContext *s, arg_ldst *a, 3601 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, 3602 bool is_store, MemOp memop) 3603 { 3604 TCGv_i64 tcg_rm; 3605 3606 if (a->rn == 31) { 3607 gen_check_sp_alignment(s); 3608 } 3609 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3610 3611 tcg_rm = read_cpu_reg(s, a->rm, 1); 3612 ext_and_shift_reg(tcg_rm, tcg_rm, a->opt, a->s ? a->sz : 0); 3613 3614 tcg_gen_add_i64(*dirty_addr, *dirty_addr, tcg_rm); 3615 *clean_addr = gen_mte_check1(s, *dirty_addr, is_store, true, memop); 3616 } 3617 3618 static bool trans_LDR(DisasContext *s, arg_ldst *a) 3619 { 3620 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3621 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3622 MemOp memop; 3623 3624 if (extract32(a->opt, 1, 1) == 0) { 3625 return false; 3626 } 3627 3628 memop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3629 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop); 3630 tcg_rt = cpu_reg(s, a->rt); 3631 do_gpr_ld(s, tcg_rt, clean_addr, memop, 3632 a->ext, true, a->rt, iss_sf, false); 3633 return true; 3634 } 3635 3636 static bool trans_STR(DisasContext *s, arg_ldst *a) 3637 { 3638 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3639 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3640 MemOp memop; 3641 3642 if (extract32(a->opt, 1, 1) == 0) { 3643 return false; 3644 } 3645 3646 memop = finalize_memop(s, a->sz); 3647 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop); 3648 tcg_rt = cpu_reg(s, a->rt); 3649 do_gpr_st(s, tcg_rt, clean_addr, memop, true, a->rt, iss_sf, false); 3650 return true; 3651 } 3652 3653 static bool trans_LDR_v(DisasContext *s, arg_ldst *a) 3654 { 3655 TCGv_i64 clean_addr, dirty_addr; 3656 MemOp memop; 3657 3658 if (extract32(a->opt, 1, 1) == 0) { 3659 return false; 3660 } 3661 3662 if (!fp_access_check(s)) { 3663 return true; 3664 } 3665 3666 memop = finalize_memop_asimd(s, a->sz); 3667 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop); 3668 do_fp_ld(s, a->rt, clean_addr, memop); 3669 return true; 3670 } 3671 3672 static bool trans_STR_v(DisasContext *s, arg_ldst *a) 3673 { 3674 TCGv_i64 clean_addr, dirty_addr; 3675 MemOp memop; 3676 3677 if (extract32(a->opt, 1, 1) == 0) { 3678 return false; 3679 } 3680 3681 if (!fp_access_check(s)) { 3682 return true; 3683 } 3684 3685 memop = finalize_memop_asimd(s, a->sz); 3686 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop); 3687 do_fp_st(s, a->rt, clean_addr, memop); 3688 return true; 3689 } 3690 3691 3692 static bool do_atomic_ld(DisasContext *s, arg_atomic *a, AtomicThreeOpFn *fn, 3693 int sign, bool invert) 3694 { 3695 MemOp mop = a->sz | sign; 3696 TCGv_i64 clean_addr, tcg_rs, tcg_rt; 3697 3698 if (a->rn == 31) { 3699 gen_check_sp_alignment(s); 3700 } 3701 mop = check_atomic_align(s, a->rn, mop); 3702 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false, 3703 a->rn != 31, mop); 3704 tcg_rs = read_cpu_reg(s, a->rs, true); 3705 tcg_rt = cpu_reg(s, a->rt); 3706 if (invert) { 3707 tcg_gen_not_i64(tcg_rs, tcg_rs); 3708 } 3709 /* 3710 * The tcg atomic primitives are all full barriers. Therefore we 3711 * can ignore the Acquire and Release bits of this instruction. 3712 */ 3713 fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop); 3714 3715 if (mop & MO_SIGN) { 3716 switch (a->sz) { 3717 case MO_8: 3718 tcg_gen_ext8u_i64(tcg_rt, tcg_rt); 3719 break; 3720 case MO_16: 3721 tcg_gen_ext16u_i64(tcg_rt, tcg_rt); 3722 break; 3723 case MO_32: 3724 tcg_gen_ext32u_i64(tcg_rt, tcg_rt); 3725 break; 3726 case MO_64: 3727 break; 3728 default: 3729 g_assert_not_reached(); 3730 } 3731 } 3732 return true; 3733 } 3734 3735 TRANS_FEAT(LDADD, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_add_i64, 0, false) 3736 TRANS_FEAT(LDCLR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_and_i64, 0, true) 3737 TRANS_FEAT(LDEOR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_xor_i64, 0, false) 3738 TRANS_FEAT(LDSET, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_or_i64, 0, false) 3739 TRANS_FEAT(LDSMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smax_i64, MO_SIGN, false) 3740 TRANS_FEAT(LDSMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smin_i64, MO_SIGN, false) 3741 TRANS_FEAT(LDUMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umax_i64, 0, false) 3742 TRANS_FEAT(LDUMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false) 3743 TRANS_FEAT(SWP, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false) 3744 3745 static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a) 3746 { 3747 bool iss_sf = ldst_iss_sf(a->sz, false, false); 3748 TCGv_i64 clean_addr; 3749 MemOp mop; 3750 3751 if (!dc_isar_feature(aa64_atomics, s) || 3752 !dc_isar_feature(aa64_rcpc_8_3, s)) { 3753 return false; 3754 } 3755 if (a->rn == 31) { 3756 gen_check_sp_alignment(s); 3757 } 3758 mop = check_ordered_align(s, a->rn, 0, false, a->sz); 3759 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false, 3760 a->rn != 31, mop); 3761 /* 3762 * LDAPR* are a special case because they are a simple load, not a 3763 * fetch-and-do-something op. 3764 * The architectural consistency requirements here are weaker than 3765 * full load-acquire (we only need "load-acquire processor consistent"), 3766 * but we choose to implement them as full LDAQ. 3767 */ 3768 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, false, 3769 true, a->rt, iss_sf, true); 3770 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3771 return true; 3772 } 3773 3774 static bool trans_LDRA(DisasContext *s, arg_LDRA *a) 3775 { 3776 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3777 MemOp memop; 3778 3779 /* Load with pointer authentication */ 3780 if (!dc_isar_feature(aa64_pauth, s)) { 3781 return false; 3782 } 3783 3784 if (a->rn == 31) { 3785 gen_check_sp_alignment(s); 3786 } 3787 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3788 3789 if (s->pauth_active) { 3790 if (!a->m) { 3791 gen_helper_autda_combined(dirty_addr, tcg_env, dirty_addr, 3792 tcg_constant_i64(0)); 3793 } else { 3794 gen_helper_autdb_combined(dirty_addr, tcg_env, dirty_addr, 3795 tcg_constant_i64(0)); 3796 } 3797 } 3798 3799 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); 3800 3801 memop = finalize_memop(s, MO_64); 3802 3803 /* Note that "clean" and "dirty" here refer to TBI not PAC. */ 3804 clean_addr = gen_mte_check1(s, dirty_addr, false, 3805 a->w || a->rn != 31, memop); 3806 3807 tcg_rt = cpu_reg(s, a->rt); 3808 do_gpr_ld(s, tcg_rt, clean_addr, memop, 3809 /* extend */ false, /* iss_valid */ !a->w, 3810 /* iss_srt */ a->rt, /* iss_sf */ true, /* iss_ar */ false); 3811 3812 if (a->w) { 3813 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); 3814 } 3815 return true; 3816 } 3817 3818 static bool trans_LDAPR_i(DisasContext *s, arg_ldapr_stlr_i *a) 3819 { 3820 TCGv_i64 clean_addr, dirty_addr; 3821 MemOp mop = a->sz | (a->sign ? MO_SIGN : 0); 3822 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3823 3824 if (!dc_isar_feature(aa64_rcpc_8_4, s)) { 3825 return false; 3826 } 3827 3828 if (a->rn == 31) { 3829 gen_check_sp_alignment(s); 3830 } 3831 3832 mop = check_ordered_align(s, a->rn, a->imm, false, mop); 3833 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3834 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); 3835 clean_addr = clean_data_tbi(s, dirty_addr); 3836 3837 /* 3838 * Load-AcquirePC semantics; we implement as the slightly more 3839 * restrictive Load-Acquire. 3840 */ 3841 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, a->ext, true, 3842 a->rt, iss_sf, true); 3843 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3844 return true; 3845 } 3846 3847 static bool trans_STLR_i(DisasContext *s, arg_ldapr_stlr_i *a) 3848 { 3849 TCGv_i64 clean_addr, dirty_addr; 3850 MemOp mop = a->sz; 3851 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3852 3853 if (!dc_isar_feature(aa64_rcpc_8_4, s)) { 3854 return false; 3855 } 3856 3857 /* TODO: ARMv8.4-LSE SCTLR.nAA */ 3858 3859 if (a->rn == 31) { 3860 gen_check_sp_alignment(s); 3861 } 3862 3863 mop = check_ordered_align(s, a->rn, a->imm, true, mop); 3864 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3865 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); 3866 clean_addr = clean_data_tbi(s, dirty_addr); 3867 3868 /* Store-Release semantics */ 3869 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 3870 do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, mop, true, a->rt, iss_sf, true); 3871 return true; 3872 } 3873 3874 static bool trans_LD_mult(DisasContext *s, arg_ldst_mult *a) 3875 { 3876 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3877 MemOp endian, align, mop; 3878 3879 int total; /* total bytes */ 3880 int elements; /* elements per vector */ 3881 int r; 3882 int size = a->sz; 3883 3884 if (!a->p && a->rm != 0) { 3885 /* For non-postindexed accesses the Rm field must be 0 */ 3886 return false; 3887 } 3888 if (size == 3 && !a->q && a->selem != 1) { 3889 return false; 3890 } 3891 if (!fp_access_check(s)) { 3892 return true; 3893 } 3894 3895 if (a->rn == 31) { 3896 gen_check_sp_alignment(s); 3897 } 3898 3899 /* For our purposes, bytes are always little-endian. */ 3900 endian = s->be_data; 3901 if (size == 0) { 3902 endian = MO_LE; 3903 } 3904 3905 total = a->rpt * a->selem * (a->q ? 16 : 8); 3906 tcg_rn = cpu_reg_sp(s, a->rn); 3907 3908 /* 3909 * Issue the MTE check vs the logical repeat count, before we 3910 * promote consecutive little-endian elements below. 3911 */ 3912 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, total, 3913 finalize_memop_asimd(s, size)); 3914 3915 /* 3916 * Consecutive little-endian elements from a single register 3917 * can be promoted to a larger little-endian operation. 3918 */ 3919 align = MO_ALIGN; 3920 if (a->selem == 1 && endian == MO_LE) { 3921 align = pow2_align(size); 3922 size = 3; 3923 } 3924 if (!s->align_mem) { 3925 align = 0; 3926 } 3927 mop = endian | size | align; 3928 3929 elements = (a->q ? 16 : 8) >> size; 3930 tcg_ebytes = tcg_constant_i64(1 << size); 3931 for (r = 0; r < a->rpt; r++) { 3932 int e; 3933 for (e = 0; e < elements; e++) { 3934 int xs; 3935 for (xs = 0; xs < a->selem; xs++) { 3936 int tt = (a->rt + r + xs) % 32; 3937 do_vec_ld(s, tt, e, clean_addr, mop); 3938 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3939 } 3940 } 3941 } 3942 3943 /* 3944 * For non-quad operations, setting a slice of the low 64 bits of 3945 * the register clears the high 64 bits (in the ARM ARM pseudocode 3946 * this is implicit in the fact that 'rval' is a 64 bit wide 3947 * variable). For quad operations, we might still need to zero 3948 * the high bits of SVE. 3949 */ 3950 for (r = 0; r < a->rpt * a->selem; r++) { 3951 int tt = (a->rt + r) % 32; 3952 clear_vec_high(s, a->q, tt); 3953 } 3954 3955 if (a->p) { 3956 if (a->rm == 31) { 3957 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3958 } else { 3959 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3960 } 3961 } 3962 return true; 3963 } 3964 3965 static bool trans_ST_mult(DisasContext *s, arg_ldst_mult *a) 3966 { 3967 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3968 MemOp endian, align, mop; 3969 3970 int total; /* total bytes */ 3971 int elements; /* elements per vector */ 3972 int r; 3973 int size = a->sz; 3974 3975 if (!a->p && a->rm != 0) { 3976 /* For non-postindexed accesses the Rm field must be 0 */ 3977 return false; 3978 } 3979 if (size == 3 && !a->q && a->selem != 1) { 3980 return false; 3981 } 3982 if (!fp_access_check(s)) { 3983 return true; 3984 } 3985 3986 if (a->rn == 31) { 3987 gen_check_sp_alignment(s); 3988 } 3989 3990 /* For our purposes, bytes are always little-endian. */ 3991 endian = s->be_data; 3992 if (size == 0) { 3993 endian = MO_LE; 3994 } 3995 3996 total = a->rpt * a->selem * (a->q ? 16 : 8); 3997 tcg_rn = cpu_reg_sp(s, a->rn); 3998 3999 /* 4000 * Issue the MTE check vs the logical repeat count, before we 4001 * promote consecutive little-endian elements below. 4002 */ 4003 clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, total, 4004 finalize_memop_asimd(s, size)); 4005 4006 /* 4007 * Consecutive little-endian elements from a single register 4008 * can be promoted to a larger little-endian operation. 4009 */ 4010 align = MO_ALIGN; 4011 if (a->selem == 1 && endian == MO_LE) { 4012 align = pow2_align(size); 4013 size = 3; 4014 } 4015 if (!s->align_mem) { 4016 align = 0; 4017 } 4018 mop = endian | size | align; 4019 4020 elements = (a->q ? 16 : 8) >> size; 4021 tcg_ebytes = tcg_constant_i64(1 << size); 4022 for (r = 0; r < a->rpt; r++) { 4023 int e; 4024 for (e = 0; e < elements; e++) { 4025 int xs; 4026 for (xs = 0; xs < a->selem; xs++) { 4027 int tt = (a->rt + r + xs) % 32; 4028 do_vec_st(s, tt, e, clean_addr, mop); 4029 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 4030 } 4031 } 4032 } 4033 4034 if (a->p) { 4035 if (a->rm == 31) { 4036 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 4037 } else { 4038 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 4039 } 4040 } 4041 return true; 4042 } 4043 4044 static bool trans_ST_single(DisasContext *s, arg_ldst_single *a) 4045 { 4046 int xs, total, rt; 4047 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 4048 MemOp mop; 4049 4050 if (!a->p && a->rm != 0) { 4051 return false; 4052 } 4053 if (!fp_access_check(s)) { 4054 return true; 4055 } 4056 4057 if (a->rn == 31) { 4058 gen_check_sp_alignment(s); 4059 } 4060 4061 total = a->selem << a->scale; 4062 tcg_rn = cpu_reg_sp(s, a->rn); 4063 4064 mop = finalize_memop_asimd(s, a->scale); 4065 clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, 4066 total, mop); 4067 4068 tcg_ebytes = tcg_constant_i64(1 << a->scale); 4069 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { 4070 do_vec_st(s, rt, a->index, clean_addr, mop); 4071 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 4072 } 4073 4074 if (a->p) { 4075 if (a->rm == 31) { 4076 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 4077 } else { 4078 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 4079 } 4080 } 4081 return true; 4082 } 4083 4084 static bool trans_LD_single(DisasContext *s, arg_ldst_single *a) 4085 { 4086 int xs, total, rt; 4087 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 4088 MemOp mop; 4089 4090 if (!a->p && a->rm != 0) { 4091 return false; 4092 } 4093 if (!fp_access_check(s)) { 4094 return true; 4095 } 4096 4097 if (a->rn == 31) { 4098 gen_check_sp_alignment(s); 4099 } 4100 4101 total = a->selem << a->scale; 4102 tcg_rn = cpu_reg_sp(s, a->rn); 4103 4104 mop = finalize_memop_asimd(s, a->scale); 4105 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, 4106 total, mop); 4107 4108 tcg_ebytes = tcg_constant_i64(1 << a->scale); 4109 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { 4110 do_vec_ld(s, rt, a->index, clean_addr, mop); 4111 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 4112 } 4113 4114 if (a->p) { 4115 if (a->rm == 31) { 4116 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 4117 } else { 4118 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 4119 } 4120 } 4121 return true; 4122 } 4123 4124 static bool trans_LD_single_repl(DisasContext *s, arg_LD_single_repl *a) 4125 { 4126 int xs, total, rt; 4127 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 4128 MemOp mop; 4129 4130 if (!a->p && a->rm != 0) { 4131 return false; 4132 } 4133 if (!fp_access_check(s)) { 4134 return true; 4135 } 4136 4137 if (a->rn == 31) { 4138 gen_check_sp_alignment(s); 4139 } 4140 4141 total = a->selem << a->scale; 4142 tcg_rn = cpu_reg_sp(s, a->rn); 4143 4144 mop = finalize_memop_asimd(s, a->scale); 4145 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, 4146 total, mop); 4147 4148 tcg_ebytes = tcg_constant_i64(1 << a->scale); 4149 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { 4150 /* Load and replicate to all elements */ 4151 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 4152 4153 tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop); 4154 tcg_gen_gvec_dup_i64(a->scale, vec_full_reg_offset(s, rt), 4155 (a->q + 1) * 8, vec_full_reg_size(s), tcg_tmp); 4156 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 4157 } 4158 4159 if (a->p) { 4160 if (a->rm == 31) { 4161 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 4162 } else { 4163 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 4164 } 4165 } 4166 return true; 4167 } 4168 4169 static bool trans_STZGM(DisasContext *s, arg_ldst_tag *a) 4170 { 4171 TCGv_i64 addr, clean_addr, tcg_rt; 4172 int size = 4 << s->dcz_blocksize; 4173 4174 if (!dc_isar_feature(aa64_mte, s)) { 4175 return false; 4176 } 4177 if (s->current_el == 0) { 4178 return false; 4179 } 4180 4181 if (a->rn == 31) { 4182 gen_check_sp_alignment(s); 4183 } 4184 4185 addr = read_cpu_reg_sp(s, a->rn, true); 4186 tcg_gen_addi_i64(addr, addr, a->imm); 4187 tcg_rt = cpu_reg(s, a->rt); 4188 4189 if (s->ata[0]) { 4190 gen_helper_stzgm_tags(tcg_env, addr, tcg_rt); 4191 } 4192 /* 4193 * The non-tags portion of STZGM is mostly like DC_ZVA, 4194 * except the alignment happens before the access. 4195 */ 4196 clean_addr = clean_data_tbi(s, addr); 4197 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 4198 gen_helper_dc_zva(tcg_env, clean_addr); 4199 return true; 4200 } 4201 4202 static bool trans_STGM(DisasContext *s, arg_ldst_tag *a) 4203 { 4204 TCGv_i64 addr, clean_addr, tcg_rt; 4205 4206 if (!dc_isar_feature(aa64_mte, s)) { 4207 return false; 4208 } 4209 if (s->current_el == 0) { 4210 return false; 4211 } 4212 4213 if (a->rn == 31) { 4214 gen_check_sp_alignment(s); 4215 } 4216 4217 addr = read_cpu_reg_sp(s, a->rn, true); 4218 tcg_gen_addi_i64(addr, addr, a->imm); 4219 tcg_rt = cpu_reg(s, a->rt); 4220 4221 if (s->ata[0]) { 4222 gen_helper_stgm(tcg_env, addr, tcg_rt); 4223 } else { 4224 MMUAccessType acc = MMU_DATA_STORE; 4225 int size = 4 << s->gm_blocksize; 4226 4227 clean_addr = clean_data_tbi(s, addr); 4228 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 4229 gen_probe_access(s, clean_addr, acc, size); 4230 } 4231 return true; 4232 } 4233 4234 static bool trans_LDGM(DisasContext *s, arg_ldst_tag *a) 4235 { 4236 TCGv_i64 addr, clean_addr, tcg_rt; 4237 4238 if (!dc_isar_feature(aa64_mte, s)) { 4239 return false; 4240 } 4241 if (s->current_el == 0) { 4242 return false; 4243 } 4244 4245 if (a->rn == 31) { 4246 gen_check_sp_alignment(s); 4247 } 4248 4249 addr = read_cpu_reg_sp(s, a->rn, true); 4250 tcg_gen_addi_i64(addr, addr, a->imm); 4251 tcg_rt = cpu_reg(s, a->rt); 4252 4253 if (s->ata[0]) { 4254 gen_helper_ldgm(tcg_rt, tcg_env, addr); 4255 } else { 4256 MMUAccessType acc = MMU_DATA_LOAD; 4257 int size = 4 << s->gm_blocksize; 4258 4259 clean_addr = clean_data_tbi(s, addr); 4260 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 4261 gen_probe_access(s, clean_addr, acc, size); 4262 /* The result tags are zeros. */ 4263 tcg_gen_movi_i64(tcg_rt, 0); 4264 } 4265 return true; 4266 } 4267 4268 static bool trans_LDG(DisasContext *s, arg_ldst_tag *a) 4269 { 4270 TCGv_i64 addr, clean_addr, tcg_rt; 4271 4272 if (!dc_isar_feature(aa64_mte_insn_reg, s)) { 4273 return false; 4274 } 4275 4276 if (a->rn == 31) { 4277 gen_check_sp_alignment(s); 4278 } 4279 4280 addr = read_cpu_reg_sp(s, a->rn, true); 4281 if (!a->p) { 4282 /* pre-index or signed offset */ 4283 tcg_gen_addi_i64(addr, addr, a->imm); 4284 } 4285 4286 tcg_gen_andi_i64(addr, addr, -TAG_GRANULE); 4287 tcg_rt = cpu_reg(s, a->rt); 4288 if (s->ata[0]) { 4289 gen_helper_ldg(tcg_rt, tcg_env, addr, tcg_rt); 4290 } else { 4291 /* 4292 * Tag access disabled: we must check for aborts on the load 4293 * load from [rn+offset], and then insert a 0 tag into rt. 4294 */ 4295 clean_addr = clean_data_tbi(s, addr); 4296 gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8); 4297 gen_address_with_allocation_tag0(tcg_rt, tcg_rt); 4298 } 4299 4300 if (a->w) { 4301 /* pre-index or post-index */ 4302 if (a->p) { 4303 /* post-index */ 4304 tcg_gen_addi_i64(addr, addr, a->imm); 4305 } 4306 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr); 4307 } 4308 return true; 4309 } 4310 4311 static bool do_STG(DisasContext *s, arg_ldst_tag *a, bool is_zero, bool is_pair) 4312 { 4313 TCGv_i64 addr, tcg_rt; 4314 4315 if (a->rn == 31) { 4316 gen_check_sp_alignment(s); 4317 } 4318 4319 addr = read_cpu_reg_sp(s, a->rn, true); 4320 if (!a->p) { 4321 /* pre-index or signed offset */ 4322 tcg_gen_addi_i64(addr, addr, a->imm); 4323 } 4324 tcg_rt = cpu_reg_sp(s, a->rt); 4325 if (!s->ata[0]) { 4326 /* 4327 * For STG and ST2G, we need to check alignment and probe memory. 4328 * TODO: For STZG and STZ2G, we could rely on the stores below, 4329 * at least for system mode; user-only won't enforce alignment. 4330 */ 4331 if (is_pair) { 4332 gen_helper_st2g_stub(tcg_env, addr); 4333 } else { 4334 gen_helper_stg_stub(tcg_env, addr); 4335 } 4336 } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { 4337 if (is_pair) { 4338 gen_helper_st2g_parallel(tcg_env, addr, tcg_rt); 4339 } else { 4340 gen_helper_stg_parallel(tcg_env, addr, tcg_rt); 4341 } 4342 } else { 4343 if (is_pair) { 4344 gen_helper_st2g(tcg_env, addr, tcg_rt); 4345 } else { 4346 gen_helper_stg(tcg_env, addr, tcg_rt); 4347 } 4348 } 4349 4350 if (is_zero) { 4351 TCGv_i64 clean_addr = clean_data_tbi(s, addr); 4352 TCGv_i64 zero64 = tcg_constant_i64(0); 4353 TCGv_i128 zero128 = tcg_temp_new_i128(); 4354 int mem_index = get_mem_index(s); 4355 MemOp mop = finalize_memop(s, MO_128 | MO_ALIGN); 4356 4357 tcg_gen_concat_i64_i128(zero128, zero64, zero64); 4358 4359 /* This is 1 or 2 atomic 16-byte operations. */ 4360 tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop); 4361 if (is_pair) { 4362 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4363 tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop); 4364 } 4365 } 4366 4367 if (a->w) { 4368 /* pre-index or post-index */ 4369 if (a->p) { 4370 /* post-index */ 4371 tcg_gen_addi_i64(addr, addr, a->imm); 4372 } 4373 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr); 4374 } 4375 return true; 4376 } 4377 4378 TRANS_FEAT(STG, aa64_mte_insn_reg, do_STG, a, false, false) 4379 TRANS_FEAT(STZG, aa64_mte_insn_reg, do_STG, a, true, false) 4380 TRANS_FEAT(ST2G, aa64_mte_insn_reg, do_STG, a, false, true) 4381 TRANS_FEAT(STZ2G, aa64_mte_insn_reg, do_STG, a, true, true) 4382 4383 typedef void SetFn(TCGv_env, TCGv_i32, TCGv_i32); 4384 4385 static bool do_SET(DisasContext *s, arg_set *a, bool is_epilogue, 4386 bool is_setg, SetFn fn) 4387 { 4388 int memidx; 4389 uint32_t syndrome, desc = 0; 4390 4391 if (is_setg && !dc_isar_feature(aa64_mte, s)) { 4392 return false; 4393 } 4394 4395 /* 4396 * UNPREDICTABLE cases: we choose to UNDEF, which allows 4397 * us to pull this check before the CheckMOPSEnabled() test 4398 * (which we do in the helper function) 4399 */ 4400 if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd || 4401 a->rd == 31 || a->rn == 31) { 4402 return false; 4403 } 4404 4405 memidx = get_a64_user_mem_index(s, a->unpriv); 4406 4407 /* 4408 * We pass option_a == true, matching our implementation; 4409 * we pass wrong_option == false: helper function may set that bit. 4410 */ 4411 syndrome = syn_mop(true, is_setg, (a->nontemp << 1) | a->unpriv, 4412 is_epilogue, false, true, a->rd, a->rs, a->rn); 4413 4414 if (is_setg ? s->ata[a->unpriv] : s->mte_active[a->unpriv]) { 4415 /* We may need to do MTE tag checking, so assemble the descriptor */ 4416 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 4417 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 4418 desc = FIELD_DP32(desc, MTEDESC, WRITE, true); 4419 /* SIZEM1 and ALIGN we leave 0 (byte write) */ 4420 } 4421 /* The helper function always needs the memidx even with MTE disabled */ 4422 desc = FIELD_DP32(desc, MTEDESC, MIDX, memidx); 4423 4424 /* 4425 * The helper needs the register numbers, but since they're in 4426 * the syndrome anyway, we let it extract them from there rather 4427 * than passing in an extra three integer arguments. 4428 */ 4429 fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(desc)); 4430 return true; 4431 } 4432 4433 TRANS_FEAT(SETP, aa64_mops, do_SET, a, false, false, gen_helper_setp) 4434 TRANS_FEAT(SETM, aa64_mops, do_SET, a, false, false, gen_helper_setm) 4435 TRANS_FEAT(SETE, aa64_mops, do_SET, a, true, false, gen_helper_sete) 4436 TRANS_FEAT(SETGP, aa64_mops, do_SET, a, false, true, gen_helper_setgp) 4437 TRANS_FEAT(SETGM, aa64_mops, do_SET, a, false, true, gen_helper_setgm) 4438 TRANS_FEAT(SETGE, aa64_mops, do_SET, a, true, true, gen_helper_setge) 4439 4440 typedef void CpyFn(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32); 4441 4442 static bool do_CPY(DisasContext *s, arg_cpy *a, bool is_epilogue, CpyFn fn) 4443 { 4444 int rmemidx, wmemidx; 4445 uint32_t syndrome, rdesc = 0, wdesc = 0; 4446 bool wunpriv = extract32(a->options, 0, 1); 4447 bool runpriv = extract32(a->options, 1, 1); 4448 4449 /* 4450 * UNPREDICTABLE cases: we choose to UNDEF, which allows 4451 * us to pull this check before the CheckMOPSEnabled() test 4452 * (which we do in the helper function) 4453 */ 4454 if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd || 4455 a->rd == 31 || a->rs == 31 || a->rn == 31) { 4456 return false; 4457 } 4458 4459 rmemidx = get_a64_user_mem_index(s, runpriv); 4460 wmemidx = get_a64_user_mem_index(s, wunpriv); 4461 4462 /* 4463 * We pass option_a == true, matching our implementation; 4464 * we pass wrong_option == false: helper function may set that bit. 4465 */ 4466 syndrome = syn_mop(false, false, a->options, is_epilogue, 4467 false, true, a->rd, a->rs, a->rn); 4468 4469 /* If we need to do MTE tag checking, assemble the descriptors */ 4470 if (s->mte_active[runpriv]) { 4471 rdesc = FIELD_DP32(rdesc, MTEDESC, TBI, s->tbid); 4472 rdesc = FIELD_DP32(rdesc, MTEDESC, TCMA, s->tcma); 4473 } 4474 if (s->mte_active[wunpriv]) { 4475 wdesc = FIELD_DP32(wdesc, MTEDESC, TBI, s->tbid); 4476 wdesc = FIELD_DP32(wdesc, MTEDESC, TCMA, s->tcma); 4477 wdesc = FIELD_DP32(wdesc, MTEDESC, WRITE, true); 4478 } 4479 /* The helper function needs these parts of the descriptor regardless */ 4480 rdesc = FIELD_DP32(rdesc, MTEDESC, MIDX, rmemidx); 4481 wdesc = FIELD_DP32(wdesc, MTEDESC, MIDX, wmemidx); 4482 4483 /* 4484 * The helper needs the register numbers, but since they're in 4485 * the syndrome anyway, we let it extract them from there rather 4486 * than passing in an extra three integer arguments. 4487 */ 4488 fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(wdesc), 4489 tcg_constant_i32(rdesc)); 4490 return true; 4491 } 4492 4493 TRANS_FEAT(CPYP, aa64_mops, do_CPY, a, false, gen_helper_cpyp) 4494 TRANS_FEAT(CPYM, aa64_mops, do_CPY, a, false, gen_helper_cpym) 4495 TRANS_FEAT(CPYE, aa64_mops, do_CPY, a, true, gen_helper_cpye) 4496 TRANS_FEAT(CPYFP, aa64_mops, do_CPY, a, false, gen_helper_cpyfp) 4497 TRANS_FEAT(CPYFM, aa64_mops, do_CPY, a, false, gen_helper_cpyfm) 4498 TRANS_FEAT(CPYFE, aa64_mops, do_CPY, a, true, gen_helper_cpyfe) 4499 4500 typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64); 4501 4502 static bool gen_rri(DisasContext *s, arg_rri_sf *a, 4503 bool rd_sp, bool rn_sp, ArithTwoOp *fn) 4504 { 4505 TCGv_i64 tcg_rn = rn_sp ? cpu_reg_sp(s, a->rn) : cpu_reg(s, a->rn); 4506 TCGv_i64 tcg_rd = rd_sp ? cpu_reg_sp(s, a->rd) : cpu_reg(s, a->rd); 4507 TCGv_i64 tcg_imm = tcg_constant_i64(a->imm); 4508 4509 fn(tcg_rd, tcg_rn, tcg_imm); 4510 if (!a->sf) { 4511 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4512 } 4513 return true; 4514 } 4515 4516 /* 4517 * PC-rel. addressing 4518 */ 4519 4520 static bool trans_ADR(DisasContext *s, arg_ri *a) 4521 { 4522 gen_pc_plus_diff(s, cpu_reg(s, a->rd), a->imm); 4523 return true; 4524 } 4525 4526 static bool trans_ADRP(DisasContext *s, arg_ri *a) 4527 { 4528 int64_t offset = (int64_t)a->imm << 12; 4529 4530 /* The page offset is ok for CF_PCREL. */ 4531 offset -= s->pc_curr & 0xfff; 4532 gen_pc_plus_diff(s, cpu_reg(s, a->rd), offset); 4533 return true; 4534 } 4535 4536 /* 4537 * Add/subtract (immediate) 4538 */ 4539 TRANS(ADD_i, gen_rri, a, 1, 1, tcg_gen_add_i64) 4540 TRANS(SUB_i, gen_rri, a, 1, 1, tcg_gen_sub_i64) 4541 TRANS(ADDS_i, gen_rri, a, 0, 1, a->sf ? gen_add64_CC : gen_add32_CC) 4542 TRANS(SUBS_i, gen_rri, a, 0, 1, a->sf ? gen_sub64_CC : gen_sub32_CC) 4543 4544 /* 4545 * Add/subtract (immediate, with tags) 4546 */ 4547 4548 static bool gen_add_sub_imm_with_tags(DisasContext *s, arg_rri_tag *a, 4549 bool sub_op) 4550 { 4551 TCGv_i64 tcg_rn, tcg_rd; 4552 int imm; 4553 4554 imm = a->uimm6 << LOG2_TAG_GRANULE; 4555 if (sub_op) { 4556 imm = -imm; 4557 } 4558 4559 tcg_rn = cpu_reg_sp(s, a->rn); 4560 tcg_rd = cpu_reg_sp(s, a->rd); 4561 4562 if (s->ata[0]) { 4563 gen_helper_addsubg(tcg_rd, tcg_env, tcg_rn, 4564 tcg_constant_i32(imm), 4565 tcg_constant_i32(a->uimm4)); 4566 } else { 4567 tcg_gen_addi_i64(tcg_rd, tcg_rn, imm); 4568 gen_address_with_allocation_tag0(tcg_rd, tcg_rd); 4569 } 4570 return true; 4571 } 4572 4573 TRANS_FEAT(ADDG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, false) 4574 TRANS_FEAT(SUBG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, true) 4575 4576 /* The input should be a value in the bottom e bits (with higher 4577 * bits zero); returns that value replicated into every element 4578 * of size e in a 64 bit integer. 4579 */ 4580 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e) 4581 { 4582 assert(e != 0); 4583 while (e < 64) { 4584 mask |= mask << e; 4585 e *= 2; 4586 } 4587 return mask; 4588 } 4589 4590 /* 4591 * Logical (immediate) 4592 */ 4593 4594 /* 4595 * Simplified variant of pseudocode DecodeBitMasks() for the case where we 4596 * only require the wmask. Returns false if the imms/immr/immn are a reserved 4597 * value (ie should cause a guest UNDEF exception), and true if they are 4598 * valid, in which case the decoded bit pattern is written to result. 4599 */ 4600 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn, 4601 unsigned int imms, unsigned int immr) 4602 { 4603 uint64_t mask; 4604 unsigned e, levels, s, r; 4605 int len; 4606 4607 assert(immn < 2 && imms < 64 && immr < 64); 4608 4609 /* The bit patterns we create here are 64 bit patterns which 4610 * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or 4611 * 64 bits each. Each element contains the same value: a run 4612 * of between 1 and e-1 non-zero bits, rotated within the 4613 * element by between 0 and e-1 bits. 4614 * 4615 * The element size and run length are encoded into immn (1 bit) 4616 * and imms (6 bits) as follows: 4617 * 64 bit elements: immn = 1, imms = <length of run - 1> 4618 * 32 bit elements: immn = 0, imms = 0 : <length of run - 1> 4619 * 16 bit elements: immn = 0, imms = 10 : <length of run - 1> 4620 * 8 bit elements: immn = 0, imms = 110 : <length of run - 1> 4621 * 4 bit elements: immn = 0, imms = 1110 : <length of run - 1> 4622 * 2 bit elements: immn = 0, imms = 11110 : <length of run - 1> 4623 * Notice that immn = 0, imms = 11111x is the only combination 4624 * not covered by one of the above options; this is reserved. 4625 * Further, <length of run - 1> all-ones is a reserved pattern. 4626 * 4627 * In all cases the rotation is by immr % e (and immr is 6 bits). 4628 */ 4629 4630 /* First determine the element size */ 4631 len = 31 - clz32((immn << 6) | (~imms & 0x3f)); 4632 if (len < 1) { 4633 /* This is the immn == 0, imms == 0x11111x case */ 4634 return false; 4635 } 4636 e = 1 << len; 4637 4638 levels = e - 1; 4639 s = imms & levels; 4640 r = immr & levels; 4641 4642 if (s == levels) { 4643 /* <length of run - 1> mustn't be all-ones. */ 4644 return false; 4645 } 4646 4647 /* Create the value of one element: s+1 set bits rotated 4648 * by r within the element (which is e bits wide)... 4649 */ 4650 mask = MAKE_64BIT_MASK(0, s + 1); 4651 if (r) { 4652 mask = (mask >> r) | (mask << (e - r)); 4653 mask &= MAKE_64BIT_MASK(0, e); 4654 } 4655 /* ...then replicate the element over the whole 64 bit value */ 4656 mask = bitfield_replicate(mask, e); 4657 *result = mask; 4658 return true; 4659 } 4660 4661 static bool gen_rri_log(DisasContext *s, arg_rri_log *a, bool set_cc, 4662 void (*fn)(TCGv_i64, TCGv_i64, int64_t)) 4663 { 4664 TCGv_i64 tcg_rd, tcg_rn; 4665 uint64_t imm; 4666 4667 /* Some immediate field values are reserved. */ 4668 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1), 4669 extract32(a->dbm, 0, 6), 4670 extract32(a->dbm, 6, 6))) { 4671 return false; 4672 } 4673 if (!a->sf) { 4674 imm &= 0xffffffffull; 4675 } 4676 4677 tcg_rd = set_cc ? cpu_reg(s, a->rd) : cpu_reg_sp(s, a->rd); 4678 tcg_rn = cpu_reg(s, a->rn); 4679 4680 fn(tcg_rd, tcg_rn, imm); 4681 if (set_cc) { 4682 gen_logic_CC(a->sf, tcg_rd); 4683 } 4684 if (!a->sf) { 4685 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4686 } 4687 return true; 4688 } 4689 4690 TRANS(AND_i, gen_rri_log, a, false, tcg_gen_andi_i64) 4691 TRANS(ORR_i, gen_rri_log, a, false, tcg_gen_ori_i64) 4692 TRANS(EOR_i, gen_rri_log, a, false, tcg_gen_xori_i64) 4693 TRANS(ANDS_i, gen_rri_log, a, true, tcg_gen_andi_i64) 4694 4695 /* 4696 * Move wide (immediate) 4697 */ 4698 4699 static bool trans_MOVZ(DisasContext *s, arg_movw *a) 4700 { 4701 int pos = a->hw << 4; 4702 tcg_gen_movi_i64(cpu_reg(s, a->rd), (uint64_t)a->imm << pos); 4703 return true; 4704 } 4705 4706 static bool trans_MOVN(DisasContext *s, arg_movw *a) 4707 { 4708 int pos = a->hw << 4; 4709 uint64_t imm = a->imm; 4710 4711 imm = ~(imm << pos); 4712 if (!a->sf) { 4713 imm = (uint32_t)imm; 4714 } 4715 tcg_gen_movi_i64(cpu_reg(s, a->rd), imm); 4716 return true; 4717 } 4718 4719 static bool trans_MOVK(DisasContext *s, arg_movw *a) 4720 { 4721 int pos = a->hw << 4; 4722 TCGv_i64 tcg_rd, tcg_im; 4723 4724 tcg_rd = cpu_reg(s, a->rd); 4725 tcg_im = tcg_constant_i64(a->imm); 4726 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_im, pos, 16); 4727 if (!a->sf) { 4728 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4729 } 4730 return true; 4731 } 4732 4733 /* 4734 * Bitfield 4735 */ 4736 4737 static bool trans_SBFM(DisasContext *s, arg_SBFM *a) 4738 { 4739 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4740 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4741 unsigned int bitsize = a->sf ? 64 : 32; 4742 unsigned int ri = a->immr; 4743 unsigned int si = a->imms; 4744 unsigned int pos, len; 4745 4746 if (si >= ri) { 4747 /* Wd<s-r:0> = Wn<s:r> */ 4748 len = (si - ri) + 1; 4749 tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len); 4750 if (!a->sf) { 4751 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4752 } 4753 } else { 4754 /* Wd<32+s-r,32-r> = Wn<s:0> */ 4755 len = si + 1; 4756 pos = (bitsize - ri) & (bitsize - 1); 4757 4758 if (len < ri) { 4759 /* 4760 * Sign extend the destination field from len to fill the 4761 * balance of the word. Let the deposit below insert all 4762 * of those sign bits. 4763 */ 4764 tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len); 4765 len = ri; 4766 } 4767 4768 /* 4769 * We start with zero, and we haven't modified any bits outside 4770 * bitsize, therefore no final zero-extension is unneeded for !sf. 4771 */ 4772 tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len); 4773 } 4774 return true; 4775 } 4776 4777 static bool trans_UBFM(DisasContext *s, arg_UBFM *a) 4778 { 4779 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4780 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4781 unsigned int bitsize = a->sf ? 64 : 32; 4782 unsigned int ri = a->immr; 4783 unsigned int si = a->imms; 4784 unsigned int pos, len; 4785 4786 tcg_rd = cpu_reg(s, a->rd); 4787 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4788 4789 if (si >= ri) { 4790 /* Wd<s-r:0> = Wn<s:r> */ 4791 len = (si - ri) + 1; 4792 tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len); 4793 } else { 4794 /* Wd<32+s-r,32-r> = Wn<s:0> */ 4795 len = si + 1; 4796 pos = (bitsize - ri) & (bitsize - 1); 4797 tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len); 4798 } 4799 return true; 4800 } 4801 4802 static bool trans_BFM(DisasContext *s, arg_BFM *a) 4803 { 4804 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4805 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4806 unsigned int bitsize = a->sf ? 64 : 32; 4807 unsigned int ri = a->immr; 4808 unsigned int si = a->imms; 4809 unsigned int pos, len; 4810 4811 tcg_rd = cpu_reg(s, a->rd); 4812 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4813 4814 if (si >= ri) { 4815 /* Wd<s-r:0> = Wn<s:r> */ 4816 tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri); 4817 len = (si - ri) + 1; 4818 pos = 0; 4819 } else { 4820 /* Wd<32+s-r,32-r> = Wn<s:0> */ 4821 len = si + 1; 4822 pos = (bitsize - ri) & (bitsize - 1); 4823 } 4824 4825 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len); 4826 if (!a->sf) { 4827 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4828 } 4829 return true; 4830 } 4831 4832 static bool trans_EXTR(DisasContext *s, arg_extract *a) 4833 { 4834 TCGv_i64 tcg_rd, tcg_rm, tcg_rn; 4835 4836 tcg_rd = cpu_reg(s, a->rd); 4837 4838 if (unlikely(a->imm == 0)) { 4839 /* 4840 * tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts, 4841 * so an extract from bit 0 is a special case. 4842 */ 4843 if (a->sf) { 4844 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, a->rm)); 4845 } else { 4846 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, a->rm)); 4847 } 4848 } else { 4849 tcg_rm = cpu_reg(s, a->rm); 4850 tcg_rn = cpu_reg(s, a->rn); 4851 4852 if (a->sf) { 4853 /* Specialization to ROR happens in EXTRACT2. */ 4854 tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, a->imm); 4855 } else { 4856 TCGv_i32 t0 = tcg_temp_new_i32(); 4857 4858 tcg_gen_extrl_i64_i32(t0, tcg_rm); 4859 if (a->rm == a->rn) { 4860 tcg_gen_rotri_i32(t0, t0, a->imm); 4861 } else { 4862 TCGv_i32 t1 = tcg_temp_new_i32(); 4863 tcg_gen_extrl_i64_i32(t1, tcg_rn); 4864 tcg_gen_extract2_i32(t0, t0, t1, a->imm); 4865 } 4866 tcg_gen_extu_i32_i64(tcg_rd, t0); 4867 } 4868 } 4869 return true; 4870 } 4871 4872 static bool trans_TBL_TBX(DisasContext *s, arg_TBL_TBX *a) 4873 { 4874 if (fp_access_check(s)) { 4875 int len = (a->len + 1) * 16; 4876 4877 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd), 4878 vec_full_reg_offset(s, a->rm), tcg_env, 4879 a->q ? 16 : 8, vec_full_reg_size(s), 4880 (len << 6) | (a->tbx << 5) | a->rn, 4881 gen_helper_simd_tblx); 4882 } 4883 return true; 4884 } 4885 4886 typedef int simd_permute_idx_fn(int i, int part, int elements); 4887 4888 static bool do_simd_permute(DisasContext *s, arg_qrrr_e *a, 4889 simd_permute_idx_fn *fn, int part) 4890 { 4891 MemOp esz = a->esz; 4892 int datasize = a->q ? 16 : 8; 4893 int elements = datasize >> esz; 4894 TCGv_i64 tcg_res[2], tcg_ele; 4895 4896 if (esz == MO_64 && !a->q) { 4897 return false; 4898 } 4899 if (!fp_access_check(s)) { 4900 return true; 4901 } 4902 4903 tcg_res[0] = tcg_temp_new_i64(); 4904 tcg_res[1] = a->q ? tcg_temp_new_i64() : NULL; 4905 tcg_ele = tcg_temp_new_i64(); 4906 4907 for (int i = 0; i < elements; i++) { 4908 int o, w, idx; 4909 4910 idx = fn(i, part, elements); 4911 read_vec_element(s, tcg_ele, (idx & elements ? a->rm : a->rn), 4912 idx & (elements - 1), esz); 4913 4914 w = (i << (esz + 3)) / 64; 4915 o = (i << (esz + 3)) % 64; 4916 if (o == 0) { 4917 tcg_gen_mov_i64(tcg_res[w], tcg_ele); 4918 } else { 4919 tcg_gen_deposit_i64(tcg_res[w], tcg_res[w], tcg_ele, o, 8 << esz); 4920 } 4921 } 4922 4923 for (int i = a->q; i >= 0; --i) { 4924 write_vec_element(s, tcg_res[i], a->rd, i, MO_64); 4925 } 4926 clear_vec_high(s, a->q, a->rd); 4927 return true; 4928 } 4929 4930 static int permute_load_uzp(int i, int part, int elements) 4931 { 4932 return 2 * i + part; 4933 } 4934 4935 TRANS(UZP1, do_simd_permute, a, permute_load_uzp, 0) 4936 TRANS(UZP2, do_simd_permute, a, permute_load_uzp, 1) 4937 4938 static int permute_load_trn(int i, int part, int elements) 4939 { 4940 return (i & 1) * elements + (i & ~1) + part; 4941 } 4942 4943 TRANS(TRN1, do_simd_permute, a, permute_load_trn, 0) 4944 TRANS(TRN2, do_simd_permute, a, permute_load_trn, 1) 4945 4946 static int permute_load_zip(int i, int part, int elements) 4947 { 4948 return (i & 1) * elements + ((part * elements + i) >> 1); 4949 } 4950 4951 TRANS(ZIP1, do_simd_permute, a, permute_load_zip, 0) 4952 TRANS(ZIP2, do_simd_permute, a, permute_load_zip, 1) 4953 4954 /* 4955 * Cryptographic AES, SHA, SHA512 4956 */ 4957 4958 TRANS_FEAT(AESE, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aese) 4959 TRANS_FEAT(AESD, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aesd) 4960 TRANS_FEAT(AESMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesmc) 4961 TRANS_FEAT(AESIMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesimc) 4962 4963 TRANS_FEAT(SHA1C, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1c) 4964 TRANS_FEAT(SHA1P, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1p) 4965 TRANS_FEAT(SHA1M, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1m) 4966 TRANS_FEAT(SHA1SU0, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1su0) 4967 4968 TRANS_FEAT(SHA256H, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h) 4969 TRANS_FEAT(SHA256H2, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h2) 4970 TRANS_FEAT(SHA256SU1, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256su1) 4971 4972 TRANS_FEAT(SHA1H, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1h) 4973 TRANS_FEAT(SHA1SU1, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1su1) 4974 TRANS_FEAT(SHA256SU0, aa64_sha256, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha256su0) 4975 4976 TRANS_FEAT(SHA512H, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h) 4977 TRANS_FEAT(SHA512H2, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h2) 4978 TRANS_FEAT(SHA512SU1, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512su1) 4979 TRANS_FEAT(RAX1, aa64_sha3, do_gvec_fn3, a, gen_gvec_rax1) 4980 TRANS_FEAT(SM3PARTW1, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw1) 4981 TRANS_FEAT(SM3PARTW2, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw2) 4982 TRANS_FEAT(SM4EKEY, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4ekey) 4983 4984 TRANS_FEAT(SHA512SU0, aa64_sha512, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha512su0) 4985 TRANS_FEAT(SM4E, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4e) 4986 4987 TRANS_FEAT(EOR3, aa64_sha3, do_gvec_fn4, a, gen_gvec_eor3) 4988 TRANS_FEAT(BCAX, aa64_sha3, do_gvec_fn4, a, gen_gvec_bcax) 4989 4990 static bool trans_SM3SS1(DisasContext *s, arg_SM3SS1 *a) 4991 { 4992 if (!dc_isar_feature(aa64_sm3, s)) { 4993 return false; 4994 } 4995 if (fp_access_check(s)) { 4996 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 4997 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 4998 TCGv_i32 tcg_op3 = tcg_temp_new_i32(); 4999 TCGv_i32 tcg_res = tcg_temp_new_i32(); 5000 5001 read_vec_element_i32(s, tcg_op1, a->rn, 3, MO_32); 5002 read_vec_element_i32(s, tcg_op2, a->rm, 3, MO_32); 5003 read_vec_element_i32(s, tcg_op3, a->ra, 3, MO_32); 5004 5005 tcg_gen_rotri_i32(tcg_res, tcg_op1, 20); 5006 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2); 5007 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3); 5008 tcg_gen_rotri_i32(tcg_res, tcg_res, 25); 5009 5010 /* Clear the whole register first, then store bits [127:96]. */ 5011 clear_vec(s, a->rd); 5012 write_vec_element_i32(s, tcg_res, a->rd, 3, MO_32); 5013 } 5014 return true; 5015 } 5016 5017 static bool do_crypto3i(DisasContext *s, arg_crypto3i *a, gen_helper_gvec_3 *fn) 5018 { 5019 if (fp_access_check(s)) { 5020 gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->imm, fn); 5021 } 5022 return true; 5023 } 5024 TRANS_FEAT(SM3TT1A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1a) 5025 TRANS_FEAT(SM3TT1B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1b) 5026 TRANS_FEAT(SM3TT2A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2a) 5027 TRANS_FEAT(SM3TT2B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2b) 5028 5029 static bool trans_XAR(DisasContext *s, arg_XAR *a) 5030 { 5031 if (!dc_isar_feature(aa64_sha3, s)) { 5032 return false; 5033 } 5034 if (fp_access_check(s)) { 5035 gen_gvec_xar(MO_64, vec_full_reg_offset(s, a->rd), 5036 vec_full_reg_offset(s, a->rn), 5037 vec_full_reg_offset(s, a->rm), a->imm, 16, 5038 vec_full_reg_size(s)); 5039 } 5040 return true; 5041 } 5042 5043 /* 5044 * Advanced SIMD copy 5045 */ 5046 5047 static bool decode_esz_idx(int imm, MemOp *pesz, unsigned *pidx) 5048 { 5049 unsigned esz = ctz32(imm); 5050 if (esz <= MO_64) { 5051 *pesz = esz; 5052 *pidx = imm >> (esz + 1); 5053 return true; 5054 } 5055 return false; 5056 } 5057 5058 static bool trans_DUP_element_s(DisasContext *s, arg_DUP_element_s *a) 5059 { 5060 MemOp esz; 5061 unsigned idx; 5062 5063 if (!decode_esz_idx(a->imm, &esz, &idx)) { 5064 return false; 5065 } 5066 if (fp_access_check(s)) { 5067 /* 5068 * This instruction just extracts the specified element and 5069 * zero-extends it into the bottom of the destination register. 5070 */ 5071 TCGv_i64 tmp = tcg_temp_new_i64(); 5072 read_vec_element(s, tmp, a->rn, idx, esz); 5073 write_fp_dreg(s, a->rd, tmp); 5074 } 5075 return true; 5076 } 5077 5078 static bool trans_DUP_element_v(DisasContext *s, arg_DUP_element_v *a) 5079 { 5080 MemOp esz; 5081 unsigned idx; 5082 5083 if (!decode_esz_idx(a->imm, &esz, &idx)) { 5084 return false; 5085 } 5086 if (esz == MO_64 && !a->q) { 5087 return false; 5088 } 5089 if (fp_access_check(s)) { 5090 tcg_gen_gvec_dup_mem(esz, vec_full_reg_offset(s, a->rd), 5091 vec_reg_offset(s, a->rn, idx, esz), 5092 a->q ? 16 : 8, vec_full_reg_size(s)); 5093 } 5094 return true; 5095 } 5096 5097 static bool trans_DUP_general(DisasContext *s, arg_DUP_general *a) 5098 { 5099 MemOp esz; 5100 unsigned idx; 5101 5102 if (!decode_esz_idx(a->imm, &esz, &idx)) { 5103 return false; 5104 } 5105 if (esz == MO_64 && !a->q) { 5106 return false; 5107 } 5108 if (fp_access_check(s)) { 5109 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), 5110 a->q ? 16 : 8, vec_full_reg_size(s), 5111 cpu_reg(s, a->rn)); 5112 } 5113 return true; 5114 } 5115 5116 static bool do_smov_umov(DisasContext *s, arg_SMOV *a, MemOp is_signed) 5117 { 5118 MemOp esz; 5119 unsigned idx; 5120 5121 if (!decode_esz_idx(a->imm, &esz, &idx)) { 5122 return false; 5123 } 5124 if (is_signed) { 5125 if (esz == MO_64 || (esz == MO_32 && !a->q)) { 5126 return false; 5127 } 5128 } else { 5129 if (esz == MO_64 ? !a->q : a->q) { 5130 return false; 5131 } 5132 } 5133 if (fp_access_check(s)) { 5134 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 5135 read_vec_element(s, tcg_rd, a->rn, idx, esz | is_signed); 5136 if (is_signed && !a->q) { 5137 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 5138 } 5139 } 5140 return true; 5141 } 5142 5143 TRANS(SMOV, do_smov_umov, a, MO_SIGN) 5144 TRANS(UMOV, do_smov_umov, a, 0) 5145 5146 static bool trans_INS_general(DisasContext *s, arg_INS_general *a) 5147 { 5148 MemOp esz; 5149 unsigned idx; 5150 5151 if (!decode_esz_idx(a->imm, &esz, &idx)) { 5152 return false; 5153 } 5154 if (fp_access_check(s)) { 5155 write_vec_element(s, cpu_reg(s, a->rn), a->rd, idx, esz); 5156 clear_vec_high(s, true, a->rd); 5157 } 5158 return true; 5159 } 5160 5161 static bool trans_INS_element(DisasContext *s, arg_INS_element *a) 5162 { 5163 MemOp esz; 5164 unsigned didx, sidx; 5165 5166 if (!decode_esz_idx(a->di, &esz, &didx)) { 5167 return false; 5168 } 5169 sidx = a->si >> esz; 5170 if (fp_access_check(s)) { 5171 TCGv_i64 tmp = tcg_temp_new_i64(); 5172 5173 read_vec_element(s, tmp, a->rn, sidx, esz); 5174 write_vec_element(s, tmp, a->rd, didx, esz); 5175 5176 /* INS is considered a 128-bit write for SVE. */ 5177 clear_vec_high(s, true, a->rd); 5178 } 5179 return true; 5180 } 5181 5182 /* 5183 * Advanced SIMD three same 5184 */ 5185 5186 typedef struct FPScalar { 5187 void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); 5188 void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); 5189 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr); 5190 } FPScalar; 5191 5192 static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a, 5193 const FPScalar *f, int mergereg, 5194 ARMFPStatusFlavour fpsttype) 5195 { 5196 switch (a->esz) { 5197 case MO_64: 5198 if (fp_access_check(s)) { 5199 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 5200 TCGv_i64 t1 = read_fp_dreg(s, a->rm); 5201 f->gen_d(t0, t0, t1, fpstatus_ptr(fpsttype)); 5202 write_fp_dreg_merging(s, a->rd, mergereg, t0); 5203 } 5204 break; 5205 case MO_32: 5206 if (fp_access_check(s)) { 5207 TCGv_i32 t0 = read_fp_sreg(s, a->rn); 5208 TCGv_i32 t1 = read_fp_sreg(s, a->rm); 5209 f->gen_s(t0, t0, t1, fpstatus_ptr(fpsttype)); 5210 write_fp_sreg_merging(s, a->rd, mergereg, t0); 5211 } 5212 break; 5213 case MO_16: 5214 if (!dc_isar_feature(aa64_fp16, s)) { 5215 return false; 5216 } 5217 if (fp_access_check(s)) { 5218 TCGv_i32 t0 = read_fp_hreg(s, a->rn); 5219 TCGv_i32 t1 = read_fp_hreg(s, a->rm); 5220 f->gen_h(t0, t0, t1, fpstatus_ptr(fpsttype)); 5221 write_fp_hreg_merging(s, a->rd, mergereg, t0); 5222 } 5223 break; 5224 default: 5225 return false; 5226 } 5227 return true; 5228 } 5229 5230 static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f, 5231 int mergereg) 5232 { 5233 return do_fp3_scalar_with_fpsttype(s, a, f, mergereg, 5234 a->esz == MO_16 ? 5235 FPST_A64_F16 : FPST_A64); 5236 } 5237 5238 static bool do_fp3_scalar_ah_2fn(DisasContext *s, arg_rrr_e *a, 5239 const FPScalar *fnormal, const FPScalar *fah, 5240 int mergereg) 5241 { 5242 return do_fp3_scalar_with_fpsttype(s, a, s->fpcr_ah ? fah : fnormal, 5243 mergereg, select_ah_fpst(s, a->esz)); 5244 } 5245 5246 /* Some insns need to call different helpers when FPCR.AH == 1 */ 5247 static bool do_fp3_scalar_2fn(DisasContext *s, arg_rrr_e *a, 5248 const FPScalar *fnormal, 5249 const FPScalar *fah, 5250 int mergereg) 5251 { 5252 return do_fp3_scalar(s, a, s->fpcr_ah ? fah : fnormal, mergereg); 5253 } 5254 5255 static const FPScalar f_scalar_fadd = { 5256 gen_helper_vfp_addh, 5257 gen_helper_vfp_adds, 5258 gen_helper_vfp_addd, 5259 }; 5260 TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd, a->rn) 5261 5262 static const FPScalar f_scalar_fsub = { 5263 gen_helper_vfp_subh, 5264 gen_helper_vfp_subs, 5265 gen_helper_vfp_subd, 5266 }; 5267 TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub, a->rn) 5268 5269 static const FPScalar f_scalar_fdiv = { 5270 gen_helper_vfp_divh, 5271 gen_helper_vfp_divs, 5272 gen_helper_vfp_divd, 5273 }; 5274 TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv, a->rn) 5275 5276 static const FPScalar f_scalar_fmul = { 5277 gen_helper_vfp_mulh, 5278 gen_helper_vfp_muls, 5279 gen_helper_vfp_muld, 5280 }; 5281 TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul, a->rn) 5282 5283 static const FPScalar f_scalar_fmax = { 5284 gen_helper_vfp_maxh, 5285 gen_helper_vfp_maxs, 5286 gen_helper_vfp_maxd, 5287 }; 5288 static const FPScalar f_scalar_fmax_ah = { 5289 gen_helper_vfp_ah_maxh, 5290 gen_helper_vfp_ah_maxs, 5291 gen_helper_vfp_ah_maxd, 5292 }; 5293 TRANS(FMAX_s, do_fp3_scalar_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah, a->rn) 5294 5295 static const FPScalar f_scalar_fmin = { 5296 gen_helper_vfp_minh, 5297 gen_helper_vfp_mins, 5298 gen_helper_vfp_mind, 5299 }; 5300 static const FPScalar f_scalar_fmin_ah = { 5301 gen_helper_vfp_ah_minh, 5302 gen_helper_vfp_ah_mins, 5303 gen_helper_vfp_ah_mind, 5304 }; 5305 TRANS(FMIN_s, do_fp3_scalar_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah, a->rn) 5306 5307 static const FPScalar f_scalar_fmaxnm = { 5308 gen_helper_vfp_maxnumh, 5309 gen_helper_vfp_maxnums, 5310 gen_helper_vfp_maxnumd, 5311 }; 5312 TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm, a->rn) 5313 5314 static const FPScalar f_scalar_fminnm = { 5315 gen_helper_vfp_minnumh, 5316 gen_helper_vfp_minnums, 5317 gen_helper_vfp_minnumd, 5318 }; 5319 TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm, a->rn) 5320 5321 static const FPScalar f_scalar_fmulx = { 5322 gen_helper_advsimd_mulxh, 5323 gen_helper_vfp_mulxs, 5324 gen_helper_vfp_mulxd, 5325 }; 5326 TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx, a->rn) 5327 5328 static void gen_fnmul_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5329 { 5330 gen_helper_vfp_mulh(d, n, m, s); 5331 gen_vfp_negh(d, d); 5332 } 5333 5334 static void gen_fnmul_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5335 { 5336 gen_helper_vfp_muls(d, n, m, s); 5337 gen_vfp_negs(d, d); 5338 } 5339 5340 static void gen_fnmul_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) 5341 { 5342 gen_helper_vfp_muld(d, n, m, s); 5343 gen_vfp_negd(d, d); 5344 } 5345 5346 static void gen_fnmul_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5347 { 5348 gen_helper_vfp_mulh(d, n, m, s); 5349 gen_vfp_ah_negh(d, d); 5350 } 5351 5352 static void gen_fnmul_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5353 { 5354 gen_helper_vfp_muls(d, n, m, s); 5355 gen_vfp_ah_negs(d, d); 5356 } 5357 5358 static void gen_fnmul_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) 5359 { 5360 gen_helper_vfp_muld(d, n, m, s); 5361 gen_vfp_ah_negd(d, d); 5362 } 5363 5364 static const FPScalar f_scalar_fnmul = { 5365 gen_fnmul_h, 5366 gen_fnmul_s, 5367 gen_fnmul_d, 5368 }; 5369 static const FPScalar f_scalar_ah_fnmul = { 5370 gen_fnmul_ah_h, 5371 gen_fnmul_ah_s, 5372 gen_fnmul_ah_d, 5373 }; 5374 TRANS(FNMUL_s, do_fp3_scalar_2fn, a, &f_scalar_fnmul, &f_scalar_ah_fnmul, a->rn) 5375 5376 static const FPScalar f_scalar_fcmeq = { 5377 gen_helper_advsimd_ceq_f16, 5378 gen_helper_neon_ceq_f32, 5379 gen_helper_neon_ceq_f64, 5380 }; 5381 TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq, a->rm) 5382 5383 static const FPScalar f_scalar_fcmge = { 5384 gen_helper_advsimd_cge_f16, 5385 gen_helper_neon_cge_f32, 5386 gen_helper_neon_cge_f64, 5387 }; 5388 TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge, a->rm) 5389 5390 static const FPScalar f_scalar_fcmgt = { 5391 gen_helper_advsimd_cgt_f16, 5392 gen_helper_neon_cgt_f32, 5393 gen_helper_neon_cgt_f64, 5394 }; 5395 TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt, a->rm) 5396 5397 static const FPScalar f_scalar_facge = { 5398 gen_helper_advsimd_acge_f16, 5399 gen_helper_neon_acge_f32, 5400 gen_helper_neon_acge_f64, 5401 }; 5402 TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge, a->rm) 5403 5404 static const FPScalar f_scalar_facgt = { 5405 gen_helper_advsimd_acgt_f16, 5406 gen_helper_neon_acgt_f32, 5407 gen_helper_neon_acgt_f64, 5408 }; 5409 TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt, a->rm) 5410 5411 static void gen_fabd_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5412 { 5413 gen_helper_vfp_subh(d, n, m, s); 5414 gen_vfp_absh(d, d); 5415 } 5416 5417 static void gen_fabd_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5418 { 5419 gen_helper_vfp_subs(d, n, m, s); 5420 gen_vfp_abss(d, d); 5421 } 5422 5423 static void gen_fabd_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) 5424 { 5425 gen_helper_vfp_subd(d, n, m, s); 5426 gen_vfp_absd(d, d); 5427 } 5428 5429 static void gen_fabd_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5430 { 5431 gen_helper_vfp_subh(d, n, m, s); 5432 gen_vfp_ah_absh(d, d); 5433 } 5434 5435 static void gen_fabd_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5436 { 5437 gen_helper_vfp_subs(d, n, m, s); 5438 gen_vfp_ah_abss(d, d); 5439 } 5440 5441 static void gen_fabd_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) 5442 { 5443 gen_helper_vfp_subd(d, n, m, s); 5444 gen_vfp_ah_absd(d, d); 5445 } 5446 5447 static const FPScalar f_scalar_fabd = { 5448 gen_fabd_h, 5449 gen_fabd_s, 5450 gen_fabd_d, 5451 }; 5452 static const FPScalar f_scalar_ah_fabd = { 5453 gen_fabd_ah_h, 5454 gen_fabd_ah_s, 5455 gen_fabd_ah_d, 5456 }; 5457 TRANS(FABD_s, do_fp3_scalar_2fn, a, &f_scalar_fabd, &f_scalar_ah_fabd, a->rn) 5458 5459 static const FPScalar f_scalar_frecps = { 5460 gen_helper_recpsf_f16, 5461 gen_helper_recpsf_f32, 5462 gen_helper_recpsf_f64, 5463 }; 5464 static const FPScalar f_scalar_ah_frecps = { 5465 gen_helper_recpsf_ah_f16, 5466 gen_helper_recpsf_ah_f32, 5467 gen_helper_recpsf_ah_f64, 5468 }; 5469 TRANS(FRECPS_s, do_fp3_scalar_ah_2fn, a, 5470 &f_scalar_frecps, &f_scalar_ah_frecps, a->rn) 5471 5472 static const FPScalar f_scalar_frsqrts = { 5473 gen_helper_rsqrtsf_f16, 5474 gen_helper_rsqrtsf_f32, 5475 gen_helper_rsqrtsf_f64, 5476 }; 5477 static const FPScalar f_scalar_ah_frsqrts = { 5478 gen_helper_rsqrtsf_ah_f16, 5479 gen_helper_rsqrtsf_ah_f32, 5480 gen_helper_rsqrtsf_ah_f64, 5481 }; 5482 TRANS(FRSQRTS_s, do_fp3_scalar_ah_2fn, a, 5483 &f_scalar_frsqrts, &f_scalar_ah_frsqrts, a->rn) 5484 5485 static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a, 5486 const FPScalar *f, bool swap) 5487 { 5488 switch (a->esz) { 5489 case MO_64: 5490 if (fp_access_check(s)) { 5491 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 5492 TCGv_i64 t1 = tcg_constant_i64(0); 5493 if (swap) { 5494 f->gen_d(t0, t1, t0, fpstatus_ptr(FPST_A64)); 5495 } else { 5496 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); 5497 } 5498 write_fp_dreg(s, a->rd, t0); 5499 } 5500 break; 5501 case MO_32: 5502 if (fp_access_check(s)) { 5503 TCGv_i32 t0 = read_fp_sreg(s, a->rn); 5504 TCGv_i32 t1 = tcg_constant_i32(0); 5505 if (swap) { 5506 f->gen_s(t0, t1, t0, fpstatus_ptr(FPST_A64)); 5507 } else { 5508 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); 5509 } 5510 write_fp_sreg(s, a->rd, t0); 5511 } 5512 break; 5513 case MO_16: 5514 if (!dc_isar_feature(aa64_fp16, s)) { 5515 return false; 5516 } 5517 if (fp_access_check(s)) { 5518 TCGv_i32 t0 = read_fp_hreg(s, a->rn); 5519 TCGv_i32 t1 = tcg_constant_i32(0); 5520 if (swap) { 5521 f->gen_h(t0, t1, t0, fpstatus_ptr(FPST_A64_F16)); 5522 } else { 5523 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); 5524 } 5525 write_fp_sreg(s, a->rd, t0); 5526 } 5527 break; 5528 default: 5529 return false; 5530 } 5531 return true; 5532 } 5533 5534 TRANS(FCMEQ0_s, do_fcmp0_s, a, &f_scalar_fcmeq, false) 5535 TRANS(FCMGT0_s, do_fcmp0_s, a, &f_scalar_fcmgt, false) 5536 TRANS(FCMGE0_s, do_fcmp0_s, a, &f_scalar_fcmge, false) 5537 TRANS(FCMLT0_s, do_fcmp0_s, a, &f_scalar_fcmgt, true) 5538 TRANS(FCMLE0_s, do_fcmp0_s, a, &f_scalar_fcmge, true) 5539 5540 static bool do_satacc_s(DisasContext *s, arg_rrr_e *a, 5541 MemOp sgn_n, MemOp sgn_m, 5542 void (*gen_bhs)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64, MemOp), 5543 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64)) 5544 { 5545 TCGv_i64 t0, t1, t2, qc; 5546 MemOp esz = a->esz; 5547 5548 if (!fp_access_check(s)) { 5549 return true; 5550 } 5551 5552 t0 = tcg_temp_new_i64(); 5553 t1 = tcg_temp_new_i64(); 5554 t2 = tcg_temp_new_i64(); 5555 qc = tcg_temp_new_i64(); 5556 read_vec_element(s, t1, a->rn, 0, esz | sgn_n); 5557 read_vec_element(s, t2, a->rm, 0, esz | sgn_m); 5558 tcg_gen_ld_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc)); 5559 5560 if (esz == MO_64) { 5561 gen_d(t0, qc, t1, t2); 5562 } else { 5563 gen_bhs(t0, qc, t1, t2, esz); 5564 tcg_gen_ext_i64(t0, t0, esz); 5565 } 5566 5567 write_fp_dreg(s, a->rd, t0); 5568 tcg_gen_st_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc)); 5569 return true; 5570 } 5571 5572 TRANS(SQADD_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqadd_bhs, gen_sqadd_d) 5573 TRANS(SQSUB_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqsub_bhs, gen_sqsub_d) 5574 TRANS(UQADD_s, do_satacc_s, a, 0, 0, gen_uqadd_bhs, gen_uqadd_d) 5575 TRANS(UQSUB_s, do_satacc_s, a, 0, 0, gen_uqsub_bhs, gen_uqsub_d) 5576 TRANS(SUQADD_s, do_satacc_s, a, MO_SIGN, 0, gen_suqadd_bhs, gen_suqadd_d) 5577 TRANS(USQADD_s, do_satacc_s, a, 0, MO_SIGN, gen_usqadd_bhs, gen_usqadd_d) 5578 5579 static bool do_int3_scalar_d(DisasContext *s, arg_rrr_e *a, 5580 void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64)) 5581 { 5582 if (fp_access_check(s)) { 5583 TCGv_i64 t0 = tcg_temp_new_i64(); 5584 TCGv_i64 t1 = tcg_temp_new_i64(); 5585 5586 read_vec_element(s, t0, a->rn, 0, MO_64); 5587 read_vec_element(s, t1, a->rm, 0, MO_64); 5588 fn(t0, t0, t1); 5589 write_fp_dreg(s, a->rd, t0); 5590 } 5591 return true; 5592 } 5593 5594 TRANS(SSHL_s, do_int3_scalar_d, a, gen_sshl_i64) 5595 TRANS(USHL_s, do_int3_scalar_d, a, gen_ushl_i64) 5596 TRANS(SRSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_s64) 5597 TRANS(URSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_u64) 5598 TRANS(ADD_s, do_int3_scalar_d, a, tcg_gen_add_i64) 5599 TRANS(SUB_s, do_int3_scalar_d, a, tcg_gen_sub_i64) 5600 5601 typedef struct ENVScalar2 { 5602 NeonGenTwoOpEnvFn *gen_bhs[3]; 5603 NeonGenTwo64OpEnvFn *gen_d; 5604 } ENVScalar2; 5605 5606 static bool do_env_scalar2(DisasContext *s, arg_rrr_e *a, const ENVScalar2 *f) 5607 { 5608 if (!fp_access_check(s)) { 5609 return true; 5610 } 5611 if (a->esz == MO_64) { 5612 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 5613 TCGv_i64 t1 = read_fp_dreg(s, a->rm); 5614 f->gen_d(t0, tcg_env, t0, t1); 5615 write_fp_dreg(s, a->rd, t0); 5616 } else { 5617 TCGv_i32 t0 = tcg_temp_new_i32(); 5618 TCGv_i32 t1 = tcg_temp_new_i32(); 5619 5620 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 5621 read_vec_element_i32(s, t1, a->rm, 0, a->esz); 5622 f->gen_bhs[a->esz](t0, tcg_env, t0, t1); 5623 write_fp_sreg(s, a->rd, t0); 5624 } 5625 return true; 5626 } 5627 5628 static const ENVScalar2 f_scalar_sqshl = { 5629 { gen_helper_neon_qshl_s8, 5630 gen_helper_neon_qshl_s16, 5631 gen_helper_neon_qshl_s32 }, 5632 gen_helper_neon_qshl_s64, 5633 }; 5634 TRANS(SQSHL_s, do_env_scalar2, a, &f_scalar_sqshl) 5635 5636 static const ENVScalar2 f_scalar_uqshl = { 5637 { gen_helper_neon_qshl_u8, 5638 gen_helper_neon_qshl_u16, 5639 gen_helper_neon_qshl_u32 }, 5640 gen_helper_neon_qshl_u64, 5641 }; 5642 TRANS(UQSHL_s, do_env_scalar2, a, &f_scalar_uqshl) 5643 5644 static const ENVScalar2 f_scalar_sqrshl = { 5645 { gen_helper_neon_qrshl_s8, 5646 gen_helper_neon_qrshl_s16, 5647 gen_helper_neon_qrshl_s32 }, 5648 gen_helper_neon_qrshl_s64, 5649 }; 5650 TRANS(SQRSHL_s, do_env_scalar2, a, &f_scalar_sqrshl) 5651 5652 static const ENVScalar2 f_scalar_uqrshl = { 5653 { gen_helper_neon_qrshl_u8, 5654 gen_helper_neon_qrshl_u16, 5655 gen_helper_neon_qrshl_u32 }, 5656 gen_helper_neon_qrshl_u64, 5657 }; 5658 TRANS(UQRSHL_s, do_env_scalar2, a, &f_scalar_uqrshl) 5659 5660 static bool do_env_scalar2_hs(DisasContext *s, arg_rrr_e *a, 5661 const ENVScalar2 *f) 5662 { 5663 if (a->esz == MO_16 || a->esz == MO_32) { 5664 return do_env_scalar2(s, a, f); 5665 } 5666 return false; 5667 } 5668 5669 static const ENVScalar2 f_scalar_sqdmulh = { 5670 { NULL, gen_helper_neon_qdmulh_s16, gen_helper_neon_qdmulh_s32 } 5671 }; 5672 TRANS(SQDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqdmulh) 5673 5674 static const ENVScalar2 f_scalar_sqrdmulh = { 5675 { NULL, gen_helper_neon_qrdmulh_s16, gen_helper_neon_qrdmulh_s32 } 5676 }; 5677 TRANS(SQRDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqrdmulh) 5678 5679 typedef struct ENVScalar3 { 5680 NeonGenThreeOpEnvFn *gen_hs[2]; 5681 } ENVScalar3; 5682 5683 static bool do_env_scalar3_hs(DisasContext *s, arg_rrr_e *a, 5684 const ENVScalar3 *f) 5685 { 5686 TCGv_i32 t0, t1, t2; 5687 5688 if (a->esz != MO_16 && a->esz != MO_32) { 5689 return false; 5690 } 5691 if (!fp_access_check(s)) { 5692 return true; 5693 } 5694 5695 t0 = tcg_temp_new_i32(); 5696 t1 = tcg_temp_new_i32(); 5697 t2 = tcg_temp_new_i32(); 5698 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 5699 read_vec_element_i32(s, t1, a->rm, 0, a->esz); 5700 read_vec_element_i32(s, t2, a->rd, 0, a->esz); 5701 f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2); 5702 write_fp_sreg(s, a->rd, t0); 5703 return true; 5704 } 5705 5706 static const ENVScalar3 f_scalar_sqrdmlah = { 5707 { gen_helper_neon_qrdmlah_s16, gen_helper_neon_qrdmlah_s32 } 5708 }; 5709 TRANS_FEAT(SQRDMLAH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlah) 5710 5711 static const ENVScalar3 f_scalar_sqrdmlsh = { 5712 { gen_helper_neon_qrdmlsh_s16, gen_helper_neon_qrdmlsh_s32 } 5713 }; 5714 TRANS_FEAT(SQRDMLSH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlsh) 5715 5716 static bool do_cmop_d(DisasContext *s, arg_rrr_e *a, TCGCond cond) 5717 { 5718 if (fp_access_check(s)) { 5719 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 5720 TCGv_i64 t1 = read_fp_dreg(s, a->rm); 5721 tcg_gen_negsetcond_i64(cond, t0, t0, t1); 5722 write_fp_dreg(s, a->rd, t0); 5723 } 5724 return true; 5725 } 5726 5727 TRANS(CMGT_s, do_cmop_d, a, TCG_COND_GT) 5728 TRANS(CMHI_s, do_cmop_d, a, TCG_COND_GTU) 5729 TRANS(CMGE_s, do_cmop_d, a, TCG_COND_GE) 5730 TRANS(CMHS_s, do_cmop_d, a, TCG_COND_GEU) 5731 TRANS(CMEQ_s, do_cmop_d, a, TCG_COND_EQ) 5732 TRANS(CMTST_s, do_cmop_d, a, TCG_COND_TSTNE) 5733 5734 static bool do_fp3_vector_with_fpsttype(DisasContext *s, arg_qrrr_e *a, 5735 int data, 5736 gen_helper_gvec_3_ptr * const fns[3], 5737 ARMFPStatusFlavour fpsttype) 5738 { 5739 MemOp esz = a->esz; 5740 int check = fp_access_check_vector_hsd(s, a->q, esz); 5741 5742 if (check <= 0) { 5743 return check == 0; 5744 } 5745 5746 gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, fpsttype, 5747 data, fns[esz - 1]); 5748 return true; 5749 } 5750 5751 static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data, 5752 gen_helper_gvec_3_ptr * const fns[3]) 5753 { 5754 return do_fp3_vector_with_fpsttype(s, a, data, fns, 5755 a->esz == MO_16 ? 5756 FPST_A64_F16 : FPST_A64); 5757 } 5758 5759 static bool do_fp3_vector_2fn(DisasContext *s, arg_qrrr_e *a, int data, 5760 gen_helper_gvec_3_ptr * const fnormal[3], 5761 gen_helper_gvec_3_ptr * const fah[3]) 5762 { 5763 return do_fp3_vector(s, a, data, s->fpcr_ah ? fah : fnormal); 5764 } 5765 5766 static bool do_fp3_vector_ah_2fn(DisasContext *s, arg_qrrr_e *a, int data, 5767 gen_helper_gvec_3_ptr * const fnormal[3], 5768 gen_helper_gvec_3_ptr * const fah[3]) 5769 { 5770 return do_fp3_vector_with_fpsttype(s, a, data, s->fpcr_ah ? fah : fnormal, 5771 select_ah_fpst(s, a->esz)); 5772 } 5773 5774 static gen_helper_gvec_3_ptr * const f_vector_fadd[3] = { 5775 gen_helper_gvec_fadd_h, 5776 gen_helper_gvec_fadd_s, 5777 gen_helper_gvec_fadd_d, 5778 }; 5779 TRANS(FADD_v, do_fp3_vector, a, 0, f_vector_fadd) 5780 5781 static gen_helper_gvec_3_ptr * const f_vector_fsub[3] = { 5782 gen_helper_gvec_fsub_h, 5783 gen_helper_gvec_fsub_s, 5784 gen_helper_gvec_fsub_d, 5785 }; 5786 TRANS(FSUB_v, do_fp3_vector, a, 0, f_vector_fsub) 5787 5788 static gen_helper_gvec_3_ptr * const f_vector_fdiv[3] = { 5789 gen_helper_gvec_fdiv_h, 5790 gen_helper_gvec_fdiv_s, 5791 gen_helper_gvec_fdiv_d, 5792 }; 5793 TRANS(FDIV_v, do_fp3_vector, a, 0, f_vector_fdiv) 5794 5795 static gen_helper_gvec_3_ptr * const f_vector_fmul[3] = { 5796 gen_helper_gvec_fmul_h, 5797 gen_helper_gvec_fmul_s, 5798 gen_helper_gvec_fmul_d, 5799 }; 5800 TRANS(FMUL_v, do_fp3_vector, a, 0, f_vector_fmul) 5801 5802 static gen_helper_gvec_3_ptr * const f_vector_fmax[3] = { 5803 gen_helper_gvec_fmax_h, 5804 gen_helper_gvec_fmax_s, 5805 gen_helper_gvec_fmax_d, 5806 }; 5807 static gen_helper_gvec_3_ptr * const f_vector_fmax_ah[3] = { 5808 gen_helper_gvec_ah_fmax_h, 5809 gen_helper_gvec_ah_fmax_s, 5810 gen_helper_gvec_ah_fmax_d, 5811 }; 5812 TRANS(FMAX_v, do_fp3_vector_2fn, a, 0, f_vector_fmax, f_vector_fmax_ah) 5813 5814 static gen_helper_gvec_3_ptr * const f_vector_fmin[3] = { 5815 gen_helper_gvec_fmin_h, 5816 gen_helper_gvec_fmin_s, 5817 gen_helper_gvec_fmin_d, 5818 }; 5819 static gen_helper_gvec_3_ptr * const f_vector_fmin_ah[3] = { 5820 gen_helper_gvec_ah_fmin_h, 5821 gen_helper_gvec_ah_fmin_s, 5822 gen_helper_gvec_ah_fmin_d, 5823 }; 5824 TRANS(FMIN_v, do_fp3_vector_2fn, a, 0, f_vector_fmin, f_vector_fmin_ah) 5825 5826 static gen_helper_gvec_3_ptr * const f_vector_fmaxnm[3] = { 5827 gen_helper_gvec_fmaxnum_h, 5828 gen_helper_gvec_fmaxnum_s, 5829 gen_helper_gvec_fmaxnum_d, 5830 }; 5831 TRANS(FMAXNM_v, do_fp3_vector, a, 0, f_vector_fmaxnm) 5832 5833 static gen_helper_gvec_3_ptr * const f_vector_fminnm[3] = { 5834 gen_helper_gvec_fminnum_h, 5835 gen_helper_gvec_fminnum_s, 5836 gen_helper_gvec_fminnum_d, 5837 }; 5838 TRANS(FMINNM_v, do_fp3_vector, a, 0, f_vector_fminnm) 5839 5840 static gen_helper_gvec_3_ptr * const f_vector_fmulx[3] = { 5841 gen_helper_gvec_fmulx_h, 5842 gen_helper_gvec_fmulx_s, 5843 gen_helper_gvec_fmulx_d, 5844 }; 5845 TRANS(FMULX_v, do_fp3_vector, a, 0, f_vector_fmulx) 5846 5847 static gen_helper_gvec_3_ptr * const f_vector_fmla[3] = { 5848 gen_helper_gvec_vfma_h, 5849 gen_helper_gvec_vfma_s, 5850 gen_helper_gvec_vfma_d, 5851 }; 5852 TRANS(FMLA_v, do_fp3_vector, a, 0, f_vector_fmla) 5853 5854 static gen_helper_gvec_3_ptr * const f_vector_fmls[3] = { 5855 gen_helper_gvec_vfms_h, 5856 gen_helper_gvec_vfms_s, 5857 gen_helper_gvec_vfms_d, 5858 }; 5859 static gen_helper_gvec_3_ptr * const f_vector_fmls_ah[3] = { 5860 gen_helper_gvec_ah_vfms_h, 5861 gen_helper_gvec_ah_vfms_s, 5862 gen_helper_gvec_ah_vfms_d, 5863 }; 5864 TRANS(FMLS_v, do_fp3_vector_2fn, a, 0, f_vector_fmls, f_vector_fmls_ah) 5865 5866 static gen_helper_gvec_3_ptr * const f_vector_fcmeq[3] = { 5867 gen_helper_gvec_fceq_h, 5868 gen_helper_gvec_fceq_s, 5869 gen_helper_gvec_fceq_d, 5870 }; 5871 TRANS(FCMEQ_v, do_fp3_vector, a, 0, f_vector_fcmeq) 5872 5873 static gen_helper_gvec_3_ptr * const f_vector_fcmge[3] = { 5874 gen_helper_gvec_fcge_h, 5875 gen_helper_gvec_fcge_s, 5876 gen_helper_gvec_fcge_d, 5877 }; 5878 TRANS(FCMGE_v, do_fp3_vector, a, 0, f_vector_fcmge) 5879 5880 static gen_helper_gvec_3_ptr * const f_vector_fcmgt[3] = { 5881 gen_helper_gvec_fcgt_h, 5882 gen_helper_gvec_fcgt_s, 5883 gen_helper_gvec_fcgt_d, 5884 }; 5885 TRANS(FCMGT_v, do_fp3_vector, a, 0, f_vector_fcmgt) 5886 5887 static gen_helper_gvec_3_ptr * const f_vector_facge[3] = { 5888 gen_helper_gvec_facge_h, 5889 gen_helper_gvec_facge_s, 5890 gen_helper_gvec_facge_d, 5891 }; 5892 TRANS(FACGE_v, do_fp3_vector, a, 0, f_vector_facge) 5893 5894 static gen_helper_gvec_3_ptr * const f_vector_facgt[3] = { 5895 gen_helper_gvec_facgt_h, 5896 gen_helper_gvec_facgt_s, 5897 gen_helper_gvec_facgt_d, 5898 }; 5899 TRANS(FACGT_v, do_fp3_vector, a, 0, f_vector_facgt) 5900 5901 static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = { 5902 gen_helper_gvec_fabd_h, 5903 gen_helper_gvec_fabd_s, 5904 gen_helper_gvec_fabd_d, 5905 }; 5906 static gen_helper_gvec_3_ptr * const f_vector_ah_fabd[3] = { 5907 gen_helper_gvec_ah_fabd_h, 5908 gen_helper_gvec_ah_fabd_s, 5909 gen_helper_gvec_ah_fabd_d, 5910 }; 5911 TRANS(FABD_v, do_fp3_vector_2fn, a, 0, f_vector_fabd, f_vector_ah_fabd) 5912 5913 static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = { 5914 gen_helper_gvec_recps_h, 5915 gen_helper_gvec_recps_s, 5916 gen_helper_gvec_recps_d, 5917 }; 5918 static gen_helper_gvec_3_ptr * const f_vector_ah_frecps[3] = { 5919 gen_helper_gvec_ah_recps_h, 5920 gen_helper_gvec_ah_recps_s, 5921 gen_helper_gvec_ah_recps_d, 5922 }; 5923 TRANS(FRECPS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frecps, f_vector_ah_frecps) 5924 5925 static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = { 5926 gen_helper_gvec_rsqrts_h, 5927 gen_helper_gvec_rsqrts_s, 5928 gen_helper_gvec_rsqrts_d, 5929 }; 5930 static gen_helper_gvec_3_ptr * const f_vector_ah_frsqrts[3] = { 5931 gen_helper_gvec_ah_rsqrts_h, 5932 gen_helper_gvec_ah_rsqrts_s, 5933 gen_helper_gvec_ah_rsqrts_d, 5934 }; 5935 TRANS(FRSQRTS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frsqrts, f_vector_ah_frsqrts) 5936 5937 static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = { 5938 gen_helper_gvec_faddp_h, 5939 gen_helper_gvec_faddp_s, 5940 gen_helper_gvec_faddp_d, 5941 }; 5942 TRANS(FADDP_v, do_fp3_vector, a, 0, f_vector_faddp) 5943 5944 static gen_helper_gvec_3_ptr * const f_vector_fmaxp[3] = { 5945 gen_helper_gvec_fmaxp_h, 5946 gen_helper_gvec_fmaxp_s, 5947 gen_helper_gvec_fmaxp_d, 5948 }; 5949 static gen_helper_gvec_3_ptr * const f_vector_ah_fmaxp[3] = { 5950 gen_helper_gvec_ah_fmaxp_h, 5951 gen_helper_gvec_ah_fmaxp_s, 5952 gen_helper_gvec_ah_fmaxp_d, 5953 }; 5954 TRANS(FMAXP_v, do_fp3_vector_2fn, a, 0, f_vector_fmaxp, f_vector_ah_fmaxp) 5955 5956 static gen_helper_gvec_3_ptr * const f_vector_fminp[3] = { 5957 gen_helper_gvec_fminp_h, 5958 gen_helper_gvec_fminp_s, 5959 gen_helper_gvec_fminp_d, 5960 }; 5961 static gen_helper_gvec_3_ptr * const f_vector_ah_fminp[3] = { 5962 gen_helper_gvec_ah_fminp_h, 5963 gen_helper_gvec_ah_fminp_s, 5964 gen_helper_gvec_ah_fminp_d, 5965 }; 5966 TRANS(FMINP_v, do_fp3_vector_2fn, a, 0, f_vector_fminp, f_vector_ah_fminp) 5967 5968 static gen_helper_gvec_3_ptr * const f_vector_fmaxnmp[3] = { 5969 gen_helper_gvec_fmaxnump_h, 5970 gen_helper_gvec_fmaxnump_s, 5971 gen_helper_gvec_fmaxnump_d, 5972 }; 5973 TRANS(FMAXNMP_v, do_fp3_vector, a, 0, f_vector_fmaxnmp) 5974 5975 static gen_helper_gvec_3_ptr * const f_vector_fminnmp[3] = { 5976 gen_helper_gvec_fminnump_h, 5977 gen_helper_gvec_fminnump_s, 5978 gen_helper_gvec_fminnump_d, 5979 }; 5980 TRANS(FMINNMP_v, do_fp3_vector, a, 0, f_vector_fminnmp) 5981 5982 static bool do_fmlal(DisasContext *s, arg_qrrr_e *a, bool is_s, bool is_2) 5983 { 5984 if (fp_access_check(s)) { 5985 int data = (is_2 << 1) | is_s; 5986 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), 5987 vec_full_reg_offset(s, a->rn), 5988 vec_full_reg_offset(s, a->rm), tcg_env, 5989 a->q ? 16 : 8, vec_full_reg_size(s), 5990 data, gen_helper_gvec_fmlal_a64); 5991 } 5992 return true; 5993 } 5994 5995 TRANS_FEAT(FMLAL_v, aa64_fhm, do_fmlal, a, false, false) 5996 TRANS_FEAT(FMLSL_v, aa64_fhm, do_fmlal, a, true, false) 5997 TRANS_FEAT(FMLAL2_v, aa64_fhm, do_fmlal, a, false, true) 5998 TRANS_FEAT(FMLSL2_v, aa64_fhm, do_fmlal, a, true, true) 5999 6000 TRANS(ADDP_v, do_gvec_fn3, a, gen_gvec_addp) 6001 TRANS(SMAXP_v, do_gvec_fn3_no64, a, gen_gvec_smaxp) 6002 TRANS(SMINP_v, do_gvec_fn3_no64, a, gen_gvec_sminp) 6003 TRANS(UMAXP_v, do_gvec_fn3_no64, a, gen_gvec_umaxp) 6004 TRANS(UMINP_v, do_gvec_fn3_no64, a, gen_gvec_uminp) 6005 6006 TRANS(AND_v, do_gvec_fn3, a, tcg_gen_gvec_and) 6007 TRANS(BIC_v, do_gvec_fn3, a, tcg_gen_gvec_andc) 6008 TRANS(ORR_v, do_gvec_fn3, a, tcg_gen_gvec_or) 6009 TRANS(ORN_v, do_gvec_fn3, a, tcg_gen_gvec_orc) 6010 TRANS(EOR_v, do_gvec_fn3, a, tcg_gen_gvec_xor) 6011 6012 static bool do_bitsel(DisasContext *s, bool is_q, int d, int a, int b, int c) 6013 { 6014 if (fp_access_check(s)) { 6015 gen_gvec_fn4(s, is_q, d, a, b, c, tcg_gen_gvec_bitsel, 0); 6016 } 6017 return true; 6018 } 6019 6020 TRANS(BSL_v, do_bitsel, a->q, a->rd, a->rd, a->rn, a->rm) 6021 TRANS(BIT_v, do_bitsel, a->q, a->rd, a->rm, a->rn, a->rd) 6022 TRANS(BIF_v, do_bitsel, a->q, a->rd, a->rm, a->rd, a->rn) 6023 6024 TRANS(SQADD_v, do_gvec_fn3, a, gen_gvec_sqadd_qc) 6025 TRANS(UQADD_v, do_gvec_fn3, a, gen_gvec_uqadd_qc) 6026 TRANS(SQSUB_v, do_gvec_fn3, a, gen_gvec_sqsub_qc) 6027 TRANS(UQSUB_v, do_gvec_fn3, a, gen_gvec_uqsub_qc) 6028 TRANS(SUQADD_v, do_gvec_fn3, a, gen_gvec_suqadd_qc) 6029 TRANS(USQADD_v, do_gvec_fn3, a, gen_gvec_usqadd_qc) 6030 6031 TRANS(SSHL_v, do_gvec_fn3, a, gen_gvec_sshl) 6032 TRANS(USHL_v, do_gvec_fn3, a, gen_gvec_ushl) 6033 TRANS(SRSHL_v, do_gvec_fn3, a, gen_gvec_srshl) 6034 TRANS(URSHL_v, do_gvec_fn3, a, gen_gvec_urshl) 6035 TRANS(SQSHL_v, do_gvec_fn3, a, gen_neon_sqshl) 6036 TRANS(UQSHL_v, do_gvec_fn3, a, gen_neon_uqshl) 6037 TRANS(SQRSHL_v, do_gvec_fn3, a, gen_neon_sqrshl) 6038 TRANS(UQRSHL_v, do_gvec_fn3, a, gen_neon_uqrshl) 6039 6040 TRANS(ADD_v, do_gvec_fn3, a, tcg_gen_gvec_add) 6041 TRANS(SUB_v, do_gvec_fn3, a, tcg_gen_gvec_sub) 6042 TRANS(SHADD_v, do_gvec_fn3_no64, a, gen_gvec_shadd) 6043 TRANS(UHADD_v, do_gvec_fn3_no64, a, gen_gvec_uhadd) 6044 TRANS(SHSUB_v, do_gvec_fn3_no64, a, gen_gvec_shsub) 6045 TRANS(UHSUB_v, do_gvec_fn3_no64, a, gen_gvec_uhsub) 6046 TRANS(SRHADD_v, do_gvec_fn3_no64, a, gen_gvec_srhadd) 6047 TRANS(URHADD_v, do_gvec_fn3_no64, a, gen_gvec_urhadd) 6048 TRANS(SMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smax) 6049 TRANS(UMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umax) 6050 TRANS(SMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smin) 6051 TRANS(UMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umin) 6052 TRANS(SABA_v, do_gvec_fn3_no64, a, gen_gvec_saba) 6053 TRANS(UABA_v, do_gvec_fn3_no64, a, gen_gvec_uaba) 6054 TRANS(SABD_v, do_gvec_fn3_no64, a, gen_gvec_sabd) 6055 TRANS(UABD_v, do_gvec_fn3_no64, a, gen_gvec_uabd) 6056 TRANS(MUL_v, do_gvec_fn3_no64, a, tcg_gen_gvec_mul) 6057 TRANS(PMUL_v, do_gvec_op3_ool, a, 0, gen_helper_gvec_pmul_b) 6058 TRANS(MLA_v, do_gvec_fn3_no64, a, gen_gvec_mla) 6059 TRANS(MLS_v, do_gvec_fn3_no64, a, gen_gvec_mls) 6060 6061 static bool do_cmop_v(DisasContext *s, arg_qrrr_e *a, TCGCond cond) 6062 { 6063 if (a->esz == MO_64 && !a->q) { 6064 return false; 6065 } 6066 if (fp_access_check(s)) { 6067 tcg_gen_gvec_cmp(cond, a->esz, 6068 vec_full_reg_offset(s, a->rd), 6069 vec_full_reg_offset(s, a->rn), 6070 vec_full_reg_offset(s, a->rm), 6071 a->q ? 16 : 8, vec_full_reg_size(s)); 6072 } 6073 return true; 6074 } 6075 6076 TRANS(CMGT_v, do_cmop_v, a, TCG_COND_GT) 6077 TRANS(CMHI_v, do_cmop_v, a, TCG_COND_GTU) 6078 TRANS(CMGE_v, do_cmop_v, a, TCG_COND_GE) 6079 TRANS(CMHS_v, do_cmop_v, a, TCG_COND_GEU) 6080 TRANS(CMEQ_v, do_cmop_v, a, TCG_COND_EQ) 6081 TRANS(CMTST_v, do_gvec_fn3, a, gen_gvec_cmtst) 6082 6083 TRANS(SQDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqdmulh_qc) 6084 TRANS(SQRDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmulh_qc) 6085 TRANS_FEAT(SQRDMLAH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlah_qc) 6086 TRANS_FEAT(SQRDMLSH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlsh_qc) 6087 6088 static bool do_dot_vector(DisasContext *s, arg_qrrr_e *a, 6089 gen_helper_gvec_4 *fn) 6090 { 6091 if (fp_access_check(s)) { 6092 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn); 6093 } 6094 return true; 6095 } 6096 6097 static bool do_dot_vector_env(DisasContext *s, arg_qrrr_e *a, 6098 gen_helper_gvec_4_ptr *fn) 6099 { 6100 if (fp_access_check(s)) { 6101 gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn); 6102 } 6103 return true; 6104 } 6105 6106 TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_sdot_b) 6107 TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_b) 6108 TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_b) 6109 TRANS_FEAT(BFDOT_v, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfdot) 6110 TRANS_FEAT(BFMMLA, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfmmla) 6111 TRANS_FEAT(SMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_smmla_b) 6112 TRANS_FEAT(UMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_ummla_b) 6113 TRANS_FEAT(USMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usmmla_b) 6114 6115 static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a) 6116 { 6117 if (!dc_isar_feature(aa64_bf16, s)) { 6118 return false; 6119 } 6120 if (fp_access_check(s)) { 6121 /* Q bit selects BFMLALB vs BFMLALT. */ 6122 gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, 6123 s->fpcr_ah ? FPST_AH : FPST_A64, a->q, 6124 gen_helper_gvec_bfmlal); 6125 } 6126 return true; 6127 } 6128 6129 static gen_helper_gvec_3_ptr * const f_vector_fcadd[3] = { 6130 gen_helper_gvec_fcaddh, 6131 gen_helper_gvec_fcadds, 6132 gen_helper_gvec_fcaddd, 6133 }; 6134 /* 6135 * Encode FPCR.AH into the data so the helper knows whether the 6136 * negations it does should avoid flipping the sign bit on a NaN 6137 */ 6138 TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0 | (s->fpcr_ah << 1), 6139 f_vector_fcadd) 6140 TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1 | (s->fpcr_ah << 1), 6141 f_vector_fcadd) 6142 6143 static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a) 6144 { 6145 static gen_helper_gvec_4_ptr * const fn[] = { 6146 [MO_16] = gen_helper_gvec_fcmlah, 6147 [MO_32] = gen_helper_gvec_fcmlas, 6148 [MO_64] = gen_helper_gvec_fcmlad, 6149 }; 6150 int check; 6151 6152 if (!dc_isar_feature(aa64_fcma, s)) { 6153 return false; 6154 } 6155 6156 check = fp_access_check_vector_hsd(s, a->q, a->esz); 6157 if (check <= 0) { 6158 return check == 0; 6159 } 6160 6161 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, 6162 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, 6163 a->rot | (s->fpcr_ah << 2), fn[a->esz]); 6164 return true; 6165 } 6166 6167 /* 6168 * Widening vector x vector/indexed. 6169 * 6170 * These read from the top or bottom half of a 128-bit vector. 6171 * After widening, optionally accumulate with a 128-bit vector. 6172 * Implement these inline, as the number of elements are limited 6173 * and the related SVE and SME operations on larger vectors use 6174 * even/odd elements instead of top/bottom half. 6175 * 6176 * If idx >= 0, operand 2 is indexed, otherwise vector. 6177 * If acc, operand 0 is loaded with rd. 6178 */ 6179 6180 /* For low half, iterating up. */ 6181 static bool do_3op_widening(DisasContext *s, MemOp memop, int top, 6182 int rd, int rn, int rm, int idx, 6183 NeonGenTwo64OpFn *fn, bool acc) 6184 { 6185 TCGv_i64 tcg_op0 = tcg_temp_new_i64(); 6186 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 6187 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 6188 MemOp esz = memop & MO_SIZE; 6189 int half = 8 >> esz; 6190 int top_swap, top_half; 6191 6192 /* There are no 64x64->128 bit operations. */ 6193 if (esz >= MO_64) { 6194 return false; 6195 } 6196 if (!fp_access_check(s)) { 6197 return true; 6198 } 6199 6200 if (idx >= 0) { 6201 read_vec_element(s, tcg_op2, rm, idx, memop); 6202 } 6203 6204 /* 6205 * For top half inputs, iterate forward; backward for bottom half. 6206 * This means the store to the destination will not occur until 6207 * overlapping input inputs are consumed. 6208 * Use top_swap to conditionally invert the forward iteration index. 6209 */ 6210 top_swap = top ? 0 : half - 1; 6211 top_half = top ? half : 0; 6212 6213 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) { 6214 int elt = elt_fwd ^ top_swap; 6215 6216 read_vec_element(s, tcg_op1, rn, elt + top_half, memop); 6217 if (idx < 0) { 6218 read_vec_element(s, tcg_op2, rm, elt + top_half, memop); 6219 } 6220 if (acc) { 6221 read_vec_element(s, tcg_op0, rd, elt, memop + 1); 6222 } 6223 fn(tcg_op0, tcg_op1, tcg_op2); 6224 write_vec_element(s, tcg_op0, rd, elt, esz + 1); 6225 } 6226 clear_vec_high(s, 1, rd); 6227 return true; 6228 } 6229 6230 static void gen_muladd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6231 { 6232 TCGv_i64 t = tcg_temp_new_i64(); 6233 tcg_gen_mul_i64(t, n, m); 6234 tcg_gen_add_i64(d, d, t); 6235 } 6236 6237 static void gen_mulsub_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6238 { 6239 TCGv_i64 t = tcg_temp_new_i64(); 6240 tcg_gen_mul_i64(t, n, m); 6241 tcg_gen_sub_i64(d, d, t); 6242 } 6243 6244 TRANS(SMULL_v, do_3op_widening, 6245 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6246 tcg_gen_mul_i64, false) 6247 TRANS(UMULL_v, do_3op_widening, 6248 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6249 tcg_gen_mul_i64, false) 6250 TRANS(SMLAL_v, do_3op_widening, 6251 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6252 gen_muladd_i64, true) 6253 TRANS(UMLAL_v, do_3op_widening, 6254 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6255 gen_muladd_i64, true) 6256 TRANS(SMLSL_v, do_3op_widening, 6257 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6258 gen_mulsub_i64, true) 6259 TRANS(UMLSL_v, do_3op_widening, 6260 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6261 gen_mulsub_i64, true) 6262 6263 TRANS(SMULL_vi, do_3op_widening, 6264 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6265 tcg_gen_mul_i64, false) 6266 TRANS(UMULL_vi, do_3op_widening, 6267 a->esz, a->q, a->rd, a->rn, a->rm, a->idx, 6268 tcg_gen_mul_i64, false) 6269 TRANS(SMLAL_vi, do_3op_widening, 6270 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6271 gen_muladd_i64, true) 6272 TRANS(UMLAL_vi, do_3op_widening, 6273 a->esz, a->q, a->rd, a->rn, a->rm, a->idx, 6274 gen_muladd_i64, true) 6275 TRANS(SMLSL_vi, do_3op_widening, 6276 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6277 gen_mulsub_i64, true) 6278 TRANS(UMLSL_vi, do_3op_widening, 6279 a->esz, a->q, a->rd, a->rn, a->rm, a->idx, 6280 gen_mulsub_i64, true) 6281 6282 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6283 { 6284 TCGv_i64 t1 = tcg_temp_new_i64(); 6285 TCGv_i64 t2 = tcg_temp_new_i64(); 6286 6287 tcg_gen_sub_i64(t1, n, m); 6288 tcg_gen_sub_i64(t2, m, n); 6289 tcg_gen_movcond_i64(TCG_COND_GE, d, n, m, t1, t2); 6290 } 6291 6292 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6293 { 6294 TCGv_i64 t1 = tcg_temp_new_i64(); 6295 TCGv_i64 t2 = tcg_temp_new_i64(); 6296 6297 tcg_gen_sub_i64(t1, n, m); 6298 tcg_gen_sub_i64(t2, m, n); 6299 tcg_gen_movcond_i64(TCG_COND_GEU, d, n, m, t1, t2); 6300 } 6301 6302 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6303 { 6304 TCGv_i64 t = tcg_temp_new_i64(); 6305 gen_sabd_i64(t, n, m); 6306 tcg_gen_add_i64(d, d, t); 6307 } 6308 6309 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6310 { 6311 TCGv_i64 t = tcg_temp_new_i64(); 6312 gen_uabd_i64(t, n, m); 6313 tcg_gen_add_i64(d, d, t); 6314 } 6315 6316 TRANS(SADDL_v, do_3op_widening, 6317 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6318 tcg_gen_add_i64, false) 6319 TRANS(UADDL_v, do_3op_widening, 6320 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6321 tcg_gen_add_i64, false) 6322 TRANS(SSUBL_v, do_3op_widening, 6323 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6324 tcg_gen_sub_i64, false) 6325 TRANS(USUBL_v, do_3op_widening, 6326 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6327 tcg_gen_sub_i64, false) 6328 TRANS(SABDL_v, do_3op_widening, 6329 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6330 gen_sabd_i64, false) 6331 TRANS(UABDL_v, do_3op_widening, 6332 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6333 gen_uabd_i64, false) 6334 TRANS(SABAL_v, do_3op_widening, 6335 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6336 gen_saba_i64, true) 6337 TRANS(UABAL_v, do_3op_widening, 6338 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6339 gen_uaba_i64, true) 6340 6341 static void gen_sqdmull_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6342 { 6343 tcg_gen_mul_i64(d, n, m); 6344 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, d); 6345 } 6346 6347 static void gen_sqdmull_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6348 { 6349 tcg_gen_mul_i64(d, n, m); 6350 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, d); 6351 } 6352 6353 static void gen_sqdmlal_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6354 { 6355 TCGv_i64 t = tcg_temp_new_i64(); 6356 6357 tcg_gen_mul_i64(t, n, m); 6358 gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t); 6359 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t); 6360 } 6361 6362 static void gen_sqdmlal_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6363 { 6364 TCGv_i64 t = tcg_temp_new_i64(); 6365 6366 tcg_gen_mul_i64(t, n, m); 6367 gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t); 6368 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t); 6369 } 6370 6371 static void gen_sqdmlsl_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6372 { 6373 TCGv_i64 t = tcg_temp_new_i64(); 6374 6375 tcg_gen_mul_i64(t, n, m); 6376 gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t); 6377 tcg_gen_neg_i64(t, t); 6378 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t); 6379 } 6380 6381 static void gen_sqdmlsl_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6382 { 6383 TCGv_i64 t = tcg_temp_new_i64(); 6384 6385 tcg_gen_mul_i64(t, n, m); 6386 gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t); 6387 tcg_gen_neg_i64(t, t); 6388 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t); 6389 } 6390 6391 TRANS(SQDMULL_v, do_3op_widening, 6392 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6393 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false) 6394 TRANS(SQDMLAL_v, do_3op_widening, 6395 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6396 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true) 6397 TRANS(SQDMLSL_v, do_3op_widening, 6398 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6399 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true) 6400 6401 TRANS(SQDMULL_vi, do_3op_widening, 6402 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6403 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false) 6404 TRANS(SQDMLAL_vi, do_3op_widening, 6405 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6406 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true) 6407 TRANS(SQDMLSL_vi, do_3op_widening, 6408 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6409 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true) 6410 6411 static bool do_addsub_wide(DisasContext *s, arg_qrrr_e *a, 6412 MemOp sign, bool sub) 6413 { 6414 TCGv_i64 tcg_op0, tcg_op1; 6415 MemOp esz = a->esz; 6416 int half = 8 >> esz; 6417 bool top = a->q; 6418 int top_swap = top ? 0 : half - 1; 6419 int top_half = top ? half : 0; 6420 6421 /* There are no 64x64->128 bit operations. */ 6422 if (esz >= MO_64) { 6423 return false; 6424 } 6425 if (!fp_access_check(s)) { 6426 return true; 6427 } 6428 tcg_op0 = tcg_temp_new_i64(); 6429 tcg_op1 = tcg_temp_new_i64(); 6430 6431 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) { 6432 int elt = elt_fwd ^ top_swap; 6433 6434 read_vec_element(s, tcg_op1, a->rm, elt + top_half, esz | sign); 6435 read_vec_element(s, tcg_op0, a->rn, elt, esz + 1); 6436 if (sub) { 6437 tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1); 6438 } else { 6439 tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1); 6440 } 6441 write_vec_element(s, tcg_op0, a->rd, elt, esz + 1); 6442 } 6443 clear_vec_high(s, 1, a->rd); 6444 return true; 6445 } 6446 6447 TRANS(SADDW, do_addsub_wide, a, MO_SIGN, false) 6448 TRANS(UADDW, do_addsub_wide, a, 0, false) 6449 TRANS(SSUBW, do_addsub_wide, a, MO_SIGN, true) 6450 TRANS(USUBW, do_addsub_wide, a, 0, true) 6451 6452 static bool do_addsub_highnarrow(DisasContext *s, arg_qrrr_e *a, 6453 bool sub, bool round) 6454 { 6455 TCGv_i64 tcg_op0, tcg_op1; 6456 MemOp esz = a->esz; 6457 int half = 8 >> esz; 6458 bool top = a->q; 6459 int ebits = 8 << esz; 6460 uint64_t rbit = 1ull << (ebits - 1); 6461 int top_swap, top_half; 6462 6463 /* There are no 128x128->64 bit operations. */ 6464 if (esz >= MO_64) { 6465 return false; 6466 } 6467 if (!fp_access_check(s)) { 6468 return true; 6469 } 6470 tcg_op0 = tcg_temp_new_i64(); 6471 tcg_op1 = tcg_temp_new_i64(); 6472 6473 /* 6474 * For top half inputs, iterate backward; forward for bottom half. 6475 * This means the store to the destination will not occur until 6476 * overlapping input inputs are consumed. 6477 */ 6478 top_swap = top ? half - 1 : 0; 6479 top_half = top ? half : 0; 6480 6481 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) { 6482 int elt = elt_fwd ^ top_swap; 6483 6484 read_vec_element(s, tcg_op1, a->rm, elt, esz + 1); 6485 read_vec_element(s, tcg_op0, a->rn, elt, esz + 1); 6486 if (sub) { 6487 tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1); 6488 } else { 6489 tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1); 6490 } 6491 if (round) { 6492 tcg_gen_addi_i64(tcg_op0, tcg_op0, rbit); 6493 } 6494 tcg_gen_shri_i64(tcg_op0, tcg_op0, ebits); 6495 write_vec_element(s, tcg_op0, a->rd, elt + top_half, esz); 6496 } 6497 clear_vec_high(s, top, a->rd); 6498 return true; 6499 } 6500 6501 TRANS(ADDHN, do_addsub_highnarrow, a, false, false) 6502 TRANS(SUBHN, do_addsub_highnarrow, a, true, false) 6503 TRANS(RADDHN, do_addsub_highnarrow, a, false, true) 6504 TRANS(RSUBHN, do_addsub_highnarrow, a, true, true) 6505 6506 static bool do_pmull(DisasContext *s, arg_qrrr_e *a, gen_helper_gvec_3 *fn) 6507 { 6508 if (fp_access_check(s)) { 6509 /* The Q field specifies lo/hi half input for these insns. */ 6510 gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->q, fn); 6511 } 6512 return true; 6513 } 6514 6515 TRANS(PMULL_p8, do_pmull, a, gen_helper_neon_pmull_h) 6516 TRANS_FEAT(PMULL_p64, aa64_pmull, do_pmull, a, gen_helper_gvec_pmull_q) 6517 6518 /* 6519 * Advanced SIMD scalar/vector x indexed element 6520 */ 6521 6522 static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f) 6523 { 6524 switch (a->esz) { 6525 case MO_64: 6526 if (fp_access_check(s)) { 6527 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 6528 TCGv_i64 t1 = tcg_temp_new_i64(); 6529 6530 read_vec_element(s, t1, a->rm, a->idx, MO_64); 6531 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); 6532 write_fp_dreg_merging(s, a->rd, a->rn, t0); 6533 } 6534 break; 6535 case MO_32: 6536 if (fp_access_check(s)) { 6537 TCGv_i32 t0 = read_fp_sreg(s, a->rn); 6538 TCGv_i32 t1 = tcg_temp_new_i32(); 6539 6540 read_vec_element_i32(s, t1, a->rm, a->idx, MO_32); 6541 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); 6542 write_fp_sreg_merging(s, a->rd, a->rn, t0); 6543 } 6544 break; 6545 case MO_16: 6546 if (!dc_isar_feature(aa64_fp16, s)) { 6547 return false; 6548 } 6549 if (fp_access_check(s)) { 6550 TCGv_i32 t0 = read_fp_hreg(s, a->rn); 6551 TCGv_i32 t1 = tcg_temp_new_i32(); 6552 6553 read_vec_element_i32(s, t1, a->rm, a->idx, MO_16); 6554 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); 6555 write_fp_hreg_merging(s, a->rd, a->rn, t0); 6556 } 6557 break; 6558 default: 6559 g_assert_not_reached(); 6560 } 6561 return true; 6562 } 6563 6564 TRANS(FMUL_si, do_fp3_scalar_idx, a, &f_scalar_fmul) 6565 TRANS(FMULX_si, do_fp3_scalar_idx, a, &f_scalar_fmulx) 6566 6567 static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) 6568 { 6569 switch (a->esz) { 6570 case MO_64: 6571 if (fp_access_check(s)) { 6572 TCGv_i64 t0 = read_fp_dreg(s, a->rd); 6573 TCGv_i64 t1 = read_fp_dreg(s, a->rn); 6574 TCGv_i64 t2 = tcg_temp_new_i64(); 6575 6576 read_vec_element(s, t2, a->rm, a->idx, MO_64); 6577 if (neg) { 6578 gen_vfp_maybe_ah_negd(s, t1, t1); 6579 } 6580 gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); 6581 write_fp_dreg_merging(s, a->rd, a->rd, t0); 6582 } 6583 break; 6584 case MO_32: 6585 if (fp_access_check(s)) { 6586 TCGv_i32 t0 = read_fp_sreg(s, a->rd); 6587 TCGv_i32 t1 = read_fp_sreg(s, a->rn); 6588 TCGv_i32 t2 = tcg_temp_new_i32(); 6589 6590 read_vec_element_i32(s, t2, a->rm, a->idx, MO_32); 6591 if (neg) { 6592 gen_vfp_maybe_ah_negs(s, t1, t1); 6593 } 6594 gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); 6595 write_fp_sreg_merging(s, a->rd, a->rd, t0); 6596 } 6597 break; 6598 case MO_16: 6599 if (!dc_isar_feature(aa64_fp16, s)) { 6600 return false; 6601 } 6602 if (fp_access_check(s)) { 6603 TCGv_i32 t0 = read_fp_hreg(s, a->rd); 6604 TCGv_i32 t1 = read_fp_hreg(s, a->rn); 6605 TCGv_i32 t2 = tcg_temp_new_i32(); 6606 6607 read_vec_element_i32(s, t2, a->rm, a->idx, MO_16); 6608 if (neg) { 6609 gen_vfp_maybe_ah_negh(s, t1, t1); 6610 } 6611 gen_helper_advsimd_muladdh(t0, t1, t2, t0, 6612 fpstatus_ptr(FPST_A64_F16)); 6613 write_fp_hreg_merging(s, a->rd, a->rd, t0); 6614 } 6615 break; 6616 default: 6617 g_assert_not_reached(); 6618 } 6619 return true; 6620 } 6621 6622 TRANS(FMLA_si, do_fmla_scalar_idx, a, false) 6623 TRANS(FMLS_si, do_fmla_scalar_idx, a, true) 6624 6625 static bool do_env_scalar2_idx_hs(DisasContext *s, arg_rrx_e *a, 6626 const ENVScalar2 *f) 6627 { 6628 if (a->esz < MO_16 || a->esz > MO_32) { 6629 return false; 6630 } 6631 if (fp_access_check(s)) { 6632 TCGv_i32 t0 = tcg_temp_new_i32(); 6633 TCGv_i32 t1 = tcg_temp_new_i32(); 6634 6635 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 6636 read_vec_element_i32(s, t1, a->rm, a->idx, a->esz); 6637 f->gen_bhs[a->esz](t0, tcg_env, t0, t1); 6638 write_fp_sreg(s, a->rd, t0); 6639 } 6640 return true; 6641 } 6642 6643 TRANS(SQDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqdmulh) 6644 TRANS(SQRDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqrdmulh) 6645 6646 static bool do_env_scalar3_idx_hs(DisasContext *s, arg_rrx_e *a, 6647 const ENVScalar3 *f) 6648 { 6649 if (a->esz < MO_16 || a->esz > MO_32) { 6650 return false; 6651 } 6652 if (fp_access_check(s)) { 6653 TCGv_i32 t0 = tcg_temp_new_i32(); 6654 TCGv_i32 t1 = tcg_temp_new_i32(); 6655 TCGv_i32 t2 = tcg_temp_new_i32(); 6656 6657 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 6658 read_vec_element_i32(s, t1, a->rm, a->idx, a->esz); 6659 read_vec_element_i32(s, t2, a->rd, 0, a->esz); 6660 f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2); 6661 write_fp_sreg(s, a->rd, t0); 6662 } 6663 return true; 6664 } 6665 6666 TRANS_FEAT(SQRDMLAH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlah) 6667 TRANS_FEAT(SQRDMLSH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlsh) 6668 6669 static bool do_scalar_muladd_widening_idx(DisasContext *s, arg_rrx_e *a, 6670 NeonGenTwo64OpFn *fn, bool acc) 6671 { 6672 if (fp_access_check(s)) { 6673 TCGv_i64 t0 = tcg_temp_new_i64(); 6674 TCGv_i64 t1 = tcg_temp_new_i64(); 6675 TCGv_i64 t2 = tcg_temp_new_i64(); 6676 6677 if (acc) { 6678 read_vec_element(s, t0, a->rd, 0, a->esz + 1); 6679 } 6680 read_vec_element(s, t1, a->rn, 0, a->esz | MO_SIGN); 6681 read_vec_element(s, t2, a->rm, a->idx, a->esz | MO_SIGN); 6682 fn(t0, t1, t2); 6683 6684 /* Clear the whole register first, then store scalar. */ 6685 clear_vec(s, a->rd); 6686 write_vec_element(s, t0, a->rd, 0, a->esz + 1); 6687 } 6688 return true; 6689 } 6690 6691 TRANS(SQDMULL_si, do_scalar_muladd_widening_idx, a, 6692 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false) 6693 TRANS(SQDMLAL_si, do_scalar_muladd_widening_idx, a, 6694 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true) 6695 TRANS(SQDMLSL_si, do_scalar_muladd_widening_idx, a, 6696 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true) 6697 6698 static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a, 6699 gen_helper_gvec_3_ptr * const fns[3]) 6700 { 6701 MemOp esz = a->esz; 6702 int check = fp_access_check_vector_hsd(s, a->q, esz); 6703 6704 if (check <= 0) { 6705 return check == 0; 6706 } 6707 6708 gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, 6709 esz == MO_16 ? FPST_A64_F16 : FPST_A64, 6710 a->idx, fns[esz - 1]); 6711 return true; 6712 } 6713 6714 static gen_helper_gvec_3_ptr * const f_vector_idx_fmul[3] = { 6715 gen_helper_gvec_fmul_idx_h, 6716 gen_helper_gvec_fmul_idx_s, 6717 gen_helper_gvec_fmul_idx_d, 6718 }; 6719 TRANS(FMUL_vi, do_fp3_vector_idx, a, f_vector_idx_fmul) 6720 6721 static gen_helper_gvec_3_ptr * const f_vector_idx_fmulx[3] = { 6722 gen_helper_gvec_fmulx_idx_h, 6723 gen_helper_gvec_fmulx_idx_s, 6724 gen_helper_gvec_fmulx_idx_d, 6725 }; 6726 TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx) 6727 6728 static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg) 6729 { 6730 static gen_helper_gvec_4_ptr * const fns[3][3] = { 6731 { gen_helper_gvec_fmla_idx_h, 6732 gen_helper_gvec_fmla_idx_s, 6733 gen_helper_gvec_fmla_idx_d }, 6734 { gen_helper_gvec_fmls_idx_h, 6735 gen_helper_gvec_fmls_idx_s, 6736 gen_helper_gvec_fmls_idx_d }, 6737 { gen_helper_gvec_ah_fmls_idx_h, 6738 gen_helper_gvec_ah_fmls_idx_s, 6739 gen_helper_gvec_ah_fmls_idx_d }, 6740 }; 6741 MemOp esz = a->esz; 6742 int check = fp_access_check_vector_hsd(s, a->q, esz); 6743 6744 if (check <= 0) { 6745 return check == 0; 6746 } 6747 6748 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, 6749 esz == MO_16 ? FPST_A64_F16 : FPST_A64, 6750 a->idx, fns[neg ? 1 + s->fpcr_ah : 0][esz - 1]); 6751 return true; 6752 } 6753 6754 TRANS(FMLA_vi, do_fmla_vector_idx, a, false) 6755 TRANS(FMLS_vi, do_fmla_vector_idx, a, true) 6756 6757 static bool do_fmlal_idx(DisasContext *s, arg_qrrx_e *a, bool is_s, bool is_2) 6758 { 6759 if (fp_access_check(s)) { 6760 int data = (a->idx << 2) | (is_2 << 1) | is_s; 6761 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), 6762 vec_full_reg_offset(s, a->rn), 6763 vec_full_reg_offset(s, a->rm), tcg_env, 6764 a->q ? 16 : 8, vec_full_reg_size(s), 6765 data, gen_helper_gvec_fmlal_idx_a64); 6766 } 6767 return true; 6768 } 6769 6770 TRANS_FEAT(FMLAL_vi, aa64_fhm, do_fmlal_idx, a, false, false) 6771 TRANS_FEAT(FMLSL_vi, aa64_fhm, do_fmlal_idx, a, true, false) 6772 TRANS_FEAT(FMLAL2_vi, aa64_fhm, do_fmlal_idx, a, false, true) 6773 TRANS_FEAT(FMLSL2_vi, aa64_fhm, do_fmlal_idx, a, true, true) 6774 6775 static bool do_int3_vector_idx(DisasContext *s, arg_qrrx_e *a, 6776 gen_helper_gvec_3 * const fns[2]) 6777 { 6778 assert(a->esz == MO_16 || a->esz == MO_32); 6779 if (fp_access_check(s)) { 6780 gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, a->idx, fns[a->esz - 1]); 6781 } 6782 return true; 6783 } 6784 6785 static gen_helper_gvec_3 * const f_vector_idx_mul[2] = { 6786 gen_helper_gvec_mul_idx_h, 6787 gen_helper_gvec_mul_idx_s, 6788 }; 6789 TRANS(MUL_vi, do_int3_vector_idx, a, f_vector_idx_mul) 6790 6791 static bool do_mla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool sub) 6792 { 6793 static gen_helper_gvec_4 * const fns[2][2] = { 6794 { gen_helper_gvec_mla_idx_h, gen_helper_gvec_mls_idx_h }, 6795 { gen_helper_gvec_mla_idx_s, gen_helper_gvec_mls_idx_s }, 6796 }; 6797 6798 assert(a->esz == MO_16 || a->esz == MO_32); 6799 if (fp_access_check(s)) { 6800 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, 6801 a->idx, fns[a->esz - 1][sub]); 6802 } 6803 return true; 6804 } 6805 6806 TRANS(MLA_vi, do_mla_vector_idx, a, false) 6807 TRANS(MLS_vi, do_mla_vector_idx, a, true) 6808 6809 static bool do_int3_qc_vector_idx(DisasContext *s, arg_qrrx_e *a, 6810 gen_helper_gvec_4 * const fns[2]) 6811 { 6812 assert(a->esz == MO_16 || a->esz == MO_32); 6813 if (fp_access_check(s)) { 6814 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd), 6815 vec_full_reg_offset(s, a->rn), 6816 vec_full_reg_offset(s, a->rm), 6817 offsetof(CPUARMState, vfp.qc), 6818 a->q ? 16 : 8, vec_full_reg_size(s), 6819 a->idx, fns[a->esz - 1]); 6820 } 6821 return true; 6822 } 6823 6824 static gen_helper_gvec_4 * const f_vector_idx_sqdmulh[2] = { 6825 gen_helper_neon_sqdmulh_idx_h, 6826 gen_helper_neon_sqdmulh_idx_s, 6827 }; 6828 TRANS(SQDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqdmulh) 6829 6830 static gen_helper_gvec_4 * const f_vector_idx_sqrdmulh[2] = { 6831 gen_helper_neon_sqrdmulh_idx_h, 6832 gen_helper_neon_sqrdmulh_idx_s, 6833 }; 6834 TRANS(SQRDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqrdmulh) 6835 6836 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlah[2] = { 6837 gen_helper_neon_sqrdmlah_idx_h, 6838 gen_helper_neon_sqrdmlah_idx_s, 6839 }; 6840 TRANS_FEAT(SQRDMLAH_vi, aa64_rdm, do_int3_qc_vector_idx, a, 6841 f_vector_idx_sqrdmlah) 6842 6843 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlsh[2] = { 6844 gen_helper_neon_sqrdmlsh_idx_h, 6845 gen_helper_neon_sqrdmlsh_idx_s, 6846 }; 6847 TRANS_FEAT(SQRDMLSH_vi, aa64_rdm, do_int3_qc_vector_idx, a, 6848 f_vector_idx_sqrdmlsh) 6849 6850 static bool do_dot_vector_idx(DisasContext *s, arg_qrrx_e *a, 6851 gen_helper_gvec_4 *fn) 6852 { 6853 if (fp_access_check(s)) { 6854 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn); 6855 } 6856 return true; 6857 } 6858 6859 static bool do_dot_vector_idx_env(DisasContext *s, arg_qrrx_e *a, 6860 gen_helper_gvec_4_ptr *fn) 6861 { 6862 if (fp_access_check(s)) { 6863 gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn); 6864 } 6865 return true; 6866 } 6867 6868 TRANS_FEAT(SDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_sdot_idx_b) 6869 TRANS_FEAT(UDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_udot_idx_b) 6870 TRANS_FEAT(SUDOT_vi, aa64_i8mm, do_dot_vector_idx, a, 6871 gen_helper_gvec_sudot_idx_b) 6872 TRANS_FEAT(USDOT_vi, aa64_i8mm, do_dot_vector_idx, a, 6873 gen_helper_gvec_usdot_idx_b) 6874 TRANS_FEAT(BFDOT_vi, aa64_bf16, do_dot_vector_idx_env, a, 6875 gen_helper_gvec_bfdot_idx) 6876 6877 static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a) 6878 { 6879 if (!dc_isar_feature(aa64_bf16, s)) { 6880 return false; 6881 } 6882 if (fp_access_check(s)) { 6883 /* Q bit selects BFMLALB vs BFMLALT. */ 6884 gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, 6885 s->fpcr_ah ? FPST_AH : FPST_A64, 6886 (a->idx << 1) | a->q, 6887 gen_helper_gvec_bfmlal_idx); 6888 } 6889 return true; 6890 } 6891 6892 static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a) 6893 { 6894 gen_helper_gvec_4_ptr *fn; 6895 6896 if (!dc_isar_feature(aa64_fcma, s)) { 6897 return false; 6898 } 6899 switch (a->esz) { 6900 case MO_16: 6901 if (!dc_isar_feature(aa64_fp16, s)) { 6902 return false; 6903 } 6904 fn = gen_helper_gvec_fcmlah_idx; 6905 break; 6906 case MO_32: 6907 fn = gen_helper_gvec_fcmlas_idx; 6908 break; 6909 default: 6910 g_assert_not_reached(); 6911 } 6912 if (fp_access_check(s)) { 6913 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, 6914 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, 6915 (s->fpcr_ah << 4) | (a->idx << 2) | a->rot, fn); 6916 } 6917 return true; 6918 } 6919 6920 /* 6921 * Advanced SIMD scalar pairwise 6922 */ 6923 6924 static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f) 6925 { 6926 switch (a->esz) { 6927 case MO_64: 6928 if (fp_access_check(s)) { 6929 TCGv_i64 t0 = tcg_temp_new_i64(); 6930 TCGv_i64 t1 = tcg_temp_new_i64(); 6931 6932 read_vec_element(s, t0, a->rn, 0, MO_64); 6933 read_vec_element(s, t1, a->rn, 1, MO_64); 6934 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); 6935 write_fp_dreg(s, a->rd, t0); 6936 } 6937 break; 6938 case MO_32: 6939 if (fp_access_check(s)) { 6940 TCGv_i32 t0 = tcg_temp_new_i32(); 6941 TCGv_i32 t1 = tcg_temp_new_i32(); 6942 6943 read_vec_element_i32(s, t0, a->rn, 0, MO_32); 6944 read_vec_element_i32(s, t1, a->rn, 1, MO_32); 6945 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); 6946 write_fp_sreg(s, a->rd, t0); 6947 } 6948 break; 6949 case MO_16: 6950 if (!dc_isar_feature(aa64_fp16, s)) { 6951 return false; 6952 } 6953 if (fp_access_check(s)) { 6954 TCGv_i32 t0 = tcg_temp_new_i32(); 6955 TCGv_i32 t1 = tcg_temp_new_i32(); 6956 6957 read_vec_element_i32(s, t0, a->rn, 0, MO_16); 6958 read_vec_element_i32(s, t1, a->rn, 1, MO_16); 6959 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); 6960 write_fp_sreg(s, a->rd, t0); 6961 } 6962 break; 6963 default: 6964 g_assert_not_reached(); 6965 } 6966 return true; 6967 } 6968 6969 static bool do_fp3_scalar_pair_2fn(DisasContext *s, arg_rr_e *a, 6970 const FPScalar *fnormal, 6971 const FPScalar *fah) 6972 { 6973 return do_fp3_scalar_pair(s, a, s->fpcr_ah ? fah : fnormal); 6974 } 6975 6976 TRANS(FADDP_s, do_fp3_scalar_pair, a, &f_scalar_fadd) 6977 TRANS(FMAXP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah) 6978 TRANS(FMINP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah) 6979 TRANS(FMAXNMP_s, do_fp3_scalar_pair, a, &f_scalar_fmaxnm) 6980 TRANS(FMINNMP_s, do_fp3_scalar_pair, a, &f_scalar_fminnm) 6981 6982 static bool trans_ADDP_s(DisasContext *s, arg_rr_e *a) 6983 { 6984 if (fp_access_check(s)) { 6985 TCGv_i64 t0 = tcg_temp_new_i64(); 6986 TCGv_i64 t1 = tcg_temp_new_i64(); 6987 6988 read_vec_element(s, t0, a->rn, 0, MO_64); 6989 read_vec_element(s, t1, a->rn, 1, MO_64); 6990 tcg_gen_add_i64(t0, t0, t1); 6991 write_fp_dreg(s, a->rd, t0); 6992 } 6993 return true; 6994 } 6995 6996 /* 6997 * Floating-point conditional select 6998 */ 6999 7000 static bool trans_FCSEL(DisasContext *s, arg_FCSEL *a) 7001 { 7002 TCGv_i64 t_true, t_false; 7003 DisasCompare64 c; 7004 int check = fp_access_check_scalar_hsd(s, a->esz); 7005 7006 if (check <= 0) { 7007 return check == 0; 7008 } 7009 7010 /* Zero extend sreg & hreg inputs to 64 bits now. */ 7011 t_true = tcg_temp_new_i64(); 7012 t_false = tcg_temp_new_i64(); 7013 read_vec_element(s, t_true, a->rn, 0, a->esz); 7014 read_vec_element(s, t_false, a->rm, 0, a->esz); 7015 7016 a64_test_cc(&c, a->cond); 7017 tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0), 7018 t_true, t_false); 7019 7020 /* 7021 * Note that sregs & hregs write back zeros to the high bits, 7022 * and we've already done the zero-extension. 7023 */ 7024 write_fp_dreg(s, a->rd, t_true); 7025 return true; 7026 } 7027 7028 /* 7029 * Advanced SIMD Extract 7030 */ 7031 7032 static bool trans_EXT_d(DisasContext *s, arg_EXT_d *a) 7033 { 7034 if (fp_access_check(s)) { 7035 TCGv_i64 lo = read_fp_dreg(s, a->rn); 7036 if (a->imm != 0) { 7037 TCGv_i64 hi = read_fp_dreg(s, a->rm); 7038 tcg_gen_extract2_i64(lo, lo, hi, a->imm * 8); 7039 } 7040 write_fp_dreg(s, a->rd, lo); 7041 } 7042 return true; 7043 } 7044 7045 static bool trans_EXT_q(DisasContext *s, arg_EXT_q *a) 7046 { 7047 TCGv_i64 lo, hi; 7048 int pos = (a->imm & 7) * 8; 7049 int elt = a->imm >> 3; 7050 7051 if (!fp_access_check(s)) { 7052 return true; 7053 } 7054 7055 lo = tcg_temp_new_i64(); 7056 hi = tcg_temp_new_i64(); 7057 7058 read_vec_element(s, lo, a->rn, elt, MO_64); 7059 elt++; 7060 read_vec_element(s, hi, elt & 2 ? a->rm : a->rn, elt & 1, MO_64); 7061 elt++; 7062 7063 if (pos != 0) { 7064 TCGv_i64 hh = tcg_temp_new_i64(); 7065 tcg_gen_extract2_i64(lo, lo, hi, pos); 7066 read_vec_element(s, hh, a->rm, elt & 1, MO_64); 7067 tcg_gen_extract2_i64(hi, hi, hh, pos); 7068 } 7069 7070 write_vec_element(s, lo, a->rd, 0, MO_64); 7071 write_vec_element(s, hi, a->rd, 1, MO_64); 7072 clear_vec_high(s, true, a->rd); 7073 return true; 7074 } 7075 7076 /* 7077 * Floating-point data-processing (3 source) 7078 */ 7079 7080 static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) 7081 { 7082 TCGv_ptr fpst; 7083 7084 /* 7085 * These are fused multiply-add. Note that doing the negations here 7086 * as separate steps is correct: an input NaN should come out with 7087 * its sign bit flipped if it is a negated-input. 7088 */ 7089 switch (a->esz) { 7090 case MO_64: 7091 if (fp_access_check(s)) { 7092 TCGv_i64 tn = read_fp_dreg(s, a->rn); 7093 TCGv_i64 tm = read_fp_dreg(s, a->rm); 7094 TCGv_i64 ta = read_fp_dreg(s, a->ra); 7095 7096 if (neg_a) { 7097 gen_vfp_maybe_ah_negd(s, ta, ta); 7098 } 7099 if (neg_n) { 7100 gen_vfp_maybe_ah_negd(s, tn, tn); 7101 } 7102 fpst = fpstatus_ptr(FPST_A64); 7103 gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst); 7104 write_fp_dreg_merging(s, a->rd, a->ra, ta); 7105 } 7106 break; 7107 7108 case MO_32: 7109 if (fp_access_check(s)) { 7110 TCGv_i32 tn = read_fp_sreg(s, a->rn); 7111 TCGv_i32 tm = read_fp_sreg(s, a->rm); 7112 TCGv_i32 ta = read_fp_sreg(s, a->ra); 7113 7114 if (neg_a) { 7115 gen_vfp_maybe_ah_negs(s, ta, ta); 7116 } 7117 if (neg_n) { 7118 gen_vfp_maybe_ah_negs(s, tn, tn); 7119 } 7120 fpst = fpstatus_ptr(FPST_A64); 7121 gen_helper_vfp_muladds(ta, tn, tm, ta, fpst); 7122 write_fp_sreg_merging(s, a->rd, a->ra, ta); 7123 } 7124 break; 7125 7126 case MO_16: 7127 if (!dc_isar_feature(aa64_fp16, s)) { 7128 return false; 7129 } 7130 if (fp_access_check(s)) { 7131 TCGv_i32 tn = read_fp_hreg(s, a->rn); 7132 TCGv_i32 tm = read_fp_hreg(s, a->rm); 7133 TCGv_i32 ta = read_fp_hreg(s, a->ra); 7134 7135 if (neg_a) { 7136 gen_vfp_maybe_ah_negh(s, ta, ta); 7137 } 7138 if (neg_n) { 7139 gen_vfp_maybe_ah_negh(s, tn, tn); 7140 } 7141 fpst = fpstatus_ptr(FPST_A64_F16); 7142 gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst); 7143 write_fp_hreg_merging(s, a->rd, a->ra, ta); 7144 } 7145 break; 7146 7147 default: 7148 return false; 7149 } 7150 return true; 7151 } 7152 7153 TRANS(FMADD, do_fmadd, a, false, false) 7154 TRANS(FNMADD, do_fmadd, a, true, true) 7155 TRANS(FMSUB, do_fmadd, a, false, true) 7156 TRANS(FNMSUB, do_fmadd, a, true, false) 7157 7158 /* 7159 * Advanced SIMD Across Lanes 7160 */ 7161 7162 static bool do_int_reduction(DisasContext *s, arg_qrr_e *a, bool widen, 7163 MemOp src_sign, NeonGenTwo64OpFn *fn) 7164 { 7165 TCGv_i64 tcg_res, tcg_elt; 7166 MemOp src_mop = a->esz | src_sign; 7167 int elements = (a->q ? 16 : 8) >> a->esz; 7168 7169 /* Reject MO_64, and MO_32 without Q: a minimum of 4 elements. */ 7170 if (elements < 4) { 7171 return false; 7172 } 7173 if (!fp_access_check(s)) { 7174 return true; 7175 } 7176 7177 tcg_res = tcg_temp_new_i64(); 7178 tcg_elt = tcg_temp_new_i64(); 7179 7180 read_vec_element(s, tcg_res, a->rn, 0, src_mop); 7181 for (int i = 1; i < elements; i++) { 7182 read_vec_element(s, tcg_elt, a->rn, i, src_mop); 7183 fn(tcg_res, tcg_res, tcg_elt); 7184 } 7185 7186 tcg_gen_ext_i64(tcg_res, tcg_res, a->esz + widen); 7187 write_fp_dreg(s, a->rd, tcg_res); 7188 return true; 7189 } 7190 7191 TRANS(ADDV, do_int_reduction, a, false, 0, tcg_gen_add_i64) 7192 TRANS(SADDLV, do_int_reduction, a, true, MO_SIGN, tcg_gen_add_i64) 7193 TRANS(UADDLV, do_int_reduction, a, true, 0, tcg_gen_add_i64) 7194 TRANS(SMAXV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smax_i64) 7195 TRANS(UMAXV, do_int_reduction, a, false, 0, tcg_gen_umax_i64) 7196 TRANS(SMINV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smin_i64) 7197 TRANS(UMINV, do_int_reduction, a, false, 0, tcg_gen_umin_i64) 7198 7199 /* 7200 * do_fp_reduction helper 7201 * 7202 * This mirrors the Reduce() pseudocode in the ARM ARM. It is 7203 * important for correct NaN propagation that we do these 7204 * operations in exactly the order specified by the pseudocode. 7205 * 7206 * This is a recursive function. 7207 */ 7208 static TCGv_i32 do_reduction_op(DisasContext *s, int rn, MemOp esz, 7209 int ebase, int ecount, TCGv_ptr fpst, 7210 NeonGenTwoSingleOpFn *fn) 7211 { 7212 if (ecount == 1) { 7213 TCGv_i32 tcg_elem = tcg_temp_new_i32(); 7214 read_vec_element_i32(s, tcg_elem, rn, ebase, esz); 7215 return tcg_elem; 7216 } else { 7217 int half = ecount >> 1; 7218 TCGv_i32 tcg_hi, tcg_lo, tcg_res; 7219 7220 tcg_hi = do_reduction_op(s, rn, esz, ebase + half, half, fpst, fn); 7221 tcg_lo = do_reduction_op(s, rn, esz, ebase, half, fpst, fn); 7222 tcg_res = tcg_temp_new_i32(); 7223 7224 fn(tcg_res, tcg_lo, tcg_hi, fpst); 7225 return tcg_res; 7226 } 7227 } 7228 7229 static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a, 7230 NeonGenTwoSingleOpFn *fnormal, 7231 NeonGenTwoSingleOpFn *fah) 7232 { 7233 if (fp_access_check(s)) { 7234 MemOp esz = a->esz; 7235 int elts = (a->q ? 16 : 8) >> esz; 7236 TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); 7237 TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst, 7238 s->fpcr_ah ? fah : fnormal); 7239 write_fp_sreg(s, a->rd, res); 7240 } 7241 return true; 7242 } 7243 7244 TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a, 7245 gen_helper_vfp_maxnumh, gen_helper_vfp_maxnumh) 7246 TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a, 7247 gen_helper_vfp_minnumh, gen_helper_vfp_minnumh) 7248 TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a, 7249 gen_helper_vfp_maxh, gen_helper_vfp_ah_maxh) 7250 TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a, 7251 gen_helper_vfp_minh, gen_helper_vfp_ah_minh) 7252 7253 TRANS(FMAXNMV_s, do_fp_reduction, a, 7254 gen_helper_vfp_maxnums, gen_helper_vfp_maxnums) 7255 TRANS(FMINNMV_s, do_fp_reduction, a, 7256 gen_helper_vfp_minnums, gen_helper_vfp_minnums) 7257 TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs, gen_helper_vfp_ah_maxs) 7258 TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins, gen_helper_vfp_ah_mins) 7259 7260 /* 7261 * Floating-point Immediate 7262 */ 7263 7264 static bool trans_FMOVI_s(DisasContext *s, arg_FMOVI_s *a) 7265 { 7266 int check = fp_access_check_scalar_hsd(s, a->esz); 7267 uint64_t imm; 7268 7269 if (check <= 0) { 7270 return check == 0; 7271 } 7272 7273 imm = vfp_expand_imm(a->esz, a->imm); 7274 write_fp_dreg(s, a->rd, tcg_constant_i64(imm)); 7275 return true; 7276 } 7277 7278 /* 7279 * Floating point compare, conditional compare 7280 */ 7281 7282 static void handle_fp_compare(DisasContext *s, int size, 7283 unsigned int rn, unsigned int rm, 7284 bool cmp_with_zero, bool signal_all_nans) 7285 { 7286 TCGv_i64 tcg_flags = tcg_temp_new_i64(); 7287 TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_A64_F16 : FPST_A64); 7288 7289 if (size == MO_64) { 7290 TCGv_i64 tcg_vn, tcg_vm; 7291 7292 tcg_vn = read_fp_dreg(s, rn); 7293 if (cmp_with_zero) { 7294 tcg_vm = tcg_constant_i64(0); 7295 } else { 7296 tcg_vm = read_fp_dreg(s, rm); 7297 } 7298 if (signal_all_nans) { 7299 gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 7300 } else { 7301 gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 7302 } 7303 } else { 7304 TCGv_i32 tcg_vn = tcg_temp_new_i32(); 7305 TCGv_i32 tcg_vm = tcg_temp_new_i32(); 7306 7307 read_vec_element_i32(s, tcg_vn, rn, 0, size); 7308 if (cmp_with_zero) { 7309 tcg_gen_movi_i32(tcg_vm, 0); 7310 } else { 7311 read_vec_element_i32(s, tcg_vm, rm, 0, size); 7312 } 7313 7314 switch (size) { 7315 case MO_32: 7316 if (signal_all_nans) { 7317 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 7318 } else { 7319 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 7320 } 7321 break; 7322 case MO_16: 7323 if (signal_all_nans) { 7324 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 7325 } else { 7326 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 7327 } 7328 break; 7329 default: 7330 g_assert_not_reached(); 7331 } 7332 } 7333 7334 gen_set_nzcv(tcg_flags); 7335 } 7336 7337 /* FCMP, FCMPE */ 7338 static bool trans_FCMP(DisasContext *s, arg_FCMP *a) 7339 { 7340 int check = fp_access_check_scalar_hsd(s, a->esz); 7341 7342 if (check <= 0) { 7343 return check == 0; 7344 } 7345 7346 handle_fp_compare(s, a->esz, a->rn, a->rm, a->z, a->e); 7347 return true; 7348 } 7349 7350 /* FCCMP, FCCMPE */ 7351 static bool trans_FCCMP(DisasContext *s, arg_FCCMP *a) 7352 { 7353 TCGLabel *label_continue = NULL; 7354 int check = fp_access_check_scalar_hsd(s, a->esz); 7355 7356 if (check <= 0) { 7357 return check == 0; 7358 } 7359 7360 if (a->cond < 0x0e) { /* not always */ 7361 TCGLabel *label_match = gen_new_label(); 7362 label_continue = gen_new_label(); 7363 arm_gen_test_cc(a->cond, label_match); 7364 /* nomatch: */ 7365 gen_set_nzcv(tcg_constant_i64(a->nzcv << 28)); 7366 tcg_gen_br(label_continue); 7367 gen_set_label(label_match); 7368 } 7369 7370 handle_fp_compare(s, a->esz, a->rn, a->rm, false, a->e); 7371 7372 if (label_continue) { 7373 gen_set_label(label_continue); 7374 } 7375 return true; 7376 } 7377 7378 /* 7379 * Advanced SIMD Modified Immediate 7380 */ 7381 7382 static bool trans_FMOVI_v_h(DisasContext *s, arg_FMOVI_v_h *a) 7383 { 7384 if (!dc_isar_feature(aa64_fp16, s)) { 7385 return false; 7386 } 7387 if (fp_access_check(s)) { 7388 tcg_gen_gvec_dup_imm(MO_16, vec_full_reg_offset(s, a->rd), 7389 a->q ? 16 : 8, vec_full_reg_size(s), 7390 vfp_expand_imm(MO_16, a->abcdefgh)); 7391 } 7392 return true; 7393 } 7394 7395 static void gen_movi(unsigned vece, uint32_t dofs, uint32_t aofs, 7396 int64_t c, uint32_t oprsz, uint32_t maxsz) 7397 { 7398 tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c); 7399 } 7400 7401 static bool trans_Vimm(DisasContext *s, arg_Vimm *a) 7402 { 7403 GVecGen2iFn *fn; 7404 7405 /* Handle decode of cmode/op here between ORR/BIC/MOVI */ 7406 if ((a->cmode & 1) && a->cmode < 12) { 7407 /* For op=1, the imm will be inverted, so BIC becomes AND. */ 7408 fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori; 7409 } else { 7410 /* There is one unallocated cmode/op combination in this space */ 7411 if (a->cmode == 15 && a->op == 1 && a->q == 0) { 7412 return false; 7413 } 7414 fn = gen_movi; 7415 } 7416 7417 if (fp_access_check(s)) { 7418 uint64_t imm = asimd_imm_const(a->abcdefgh, a->cmode, a->op); 7419 gen_gvec_fn2i(s, a->q, a->rd, a->rd, imm, fn, MO_64); 7420 } 7421 return true; 7422 } 7423 7424 /* 7425 * Advanced SIMD Shift by Immediate 7426 */ 7427 7428 static bool do_vec_shift_imm(DisasContext *s, arg_qrri_e *a, GVecGen2iFn *fn) 7429 { 7430 if (fp_access_check(s)) { 7431 gen_gvec_fn2i(s, a->q, a->rd, a->rn, a->imm, fn, a->esz); 7432 } 7433 return true; 7434 } 7435 7436 TRANS(SSHR_v, do_vec_shift_imm, a, gen_gvec_sshr) 7437 TRANS(USHR_v, do_vec_shift_imm, a, gen_gvec_ushr) 7438 TRANS(SSRA_v, do_vec_shift_imm, a, gen_gvec_ssra) 7439 TRANS(USRA_v, do_vec_shift_imm, a, gen_gvec_usra) 7440 TRANS(SRSHR_v, do_vec_shift_imm, a, gen_gvec_srshr) 7441 TRANS(URSHR_v, do_vec_shift_imm, a, gen_gvec_urshr) 7442 TRANS(SRSRA_v, do_vec_shift_imm, a, gen_gvec_srsra) 7443 TRANS(URSRA_v, do_vec_shift_imm, a, gen_gvec_ursra) 7444 TRANS(SRI_v, do_vec_shift_imm, a, gen_gvec_sri) 7445 TRANS(SHL_v, do_vec_shift_imm, a, tcg_gen_gvec_shli) 7446 TRANS(SLI_v, do_vec_shift_imm, a, gen_gvec_sli); 7447 TRANS(SQSHL_vi, do_vec_shift_imm, a, gen_neon_sqshli) 7448 TRANS(UQSHL_vi, do_vec_shift_imm, a, gen_neon_uqshli) 7449 TRANS(SQSHLU_vi, do_vec_shift_imm, a, gen_neon_sqshlui) 7450 7451 static bool do_vec_shift_imm_wide(DisasContext *s, arg_qrri_e *a, bool is_u) 7452 { 7453 TCGv_i64 tcg_rn, tcg_rd; 7454 int esz = a->esz; 7455 int esize; 7456 7457 if (!fp_access_check(s)) { 7458 return true; 7459 } 7460 7461 /* 7462 * For the LL variants the store is larger than the load, 7463 * so if rd == rn we would overwrite parts of our input. 7464 * So load everything right now and use shifts in the main loop. 7465 */ 7466 tcg_rd = tcg_temp_new_i64(); 7467 tcg_rn = tcg_temp_new_i64(); 7468 read_vec_element(s, tcg_rn, a->rn, a->q, MO_64); 7469 7470 esize = 8 << esz; 7471 for (int i = 0, elements = 8 >> esz; i < elements; i++) { 7472 if (is_u) { 7473 tcg_gen_extract_i64(tcg_rd, tcg_rn, i * esize, esize); 7474 } else { 7475 tcg_gen_sextract_i64(tcg_rd, tcg_rn, i * esize, esize); 7476 } 7477 tcg_gen_shli_i64(tcg_rd, tcg_rd, a->imm); 7478 write_vec_element(s, tcg_rd, a->rd, i, esz + 1); 7479 } 7480 clear_vec_high(s, true, a->rd); 7481 return true; 7482 } 7483 7484 TRANS(SSHLL_v, do_vec_shift_imm_wide, a, false) 7485 TRANS(USHLL_v, do_vec_shift_imm_wide, a, true) 7486 7487 static void gen_sshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7488 { 7489 assert(shift >= 0 && shift <= 64); 7490 tcg_gen_sari_i64(dst, src, MIN(shift, 63)); 7491 } 7492 7493 static void gen_ushr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7494 { 7495 assert(shift >= 0 && shift <= 64); 7496 if (shift == 64) { 7497 tcg_gen_movi_i64(dst, 0); 7498 } else { 7499 tcg_gen_shri_i64(dst, src, shift); 7500 } 7501 } 7502 7503 static void gen_ssra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7504 { 7505 gen_sshr_d(src, src, shift); 7506 tcg_gen_add_i64(dst, dst, src); 7507 } 7508 7509 static void gen_usra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7510 { 7511 gen_ushr_d(src, src, shift); 7512 tcg_gen_add_i64(dst, dst, src); 7513 } 7514 7515 static void gen_srshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7516 { 7517 assert(shift >= 0 && shift <= 32); 7518 if (shift) { 7519 TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1)); 7520 tcg_gen_add_i64(dst, src, rnd); 7521 tcg_gen_sari_i64(dst, dst, shift); 7522 } else { 7523 tcg_gen_mov_i64(dst, src); 7524 } 7525 } 7526 7527 static void gen_urshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7528 { 7529 assert(shift >= 0 && shift <= 32); 7530 if (shift) { 7531 TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1)); 7532 tcg_gen_add_i64(dst, src, rnd); 7533 tcg_gen_shri_i64(dst, dst, shift); 7534 } else { 7535 tcg_gen_mov_i64(dst, src); 7536 } 7537 } 7538 7539 static void gen_srshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7540 { 7541 assert(shift >= 0 && shift <= 64); 7542 if (shift == 0) { 7543 tcg_gen_mov_i64(dst, src); 7544 } else if (shift == 64) { 7545 /* Extension of sign bit (0,-1) plus sign bit (0,1) is zero. */ 7546 tcg_gen_movi_i64(dst, 0); 7547 } else { 7548 TCGv_i64 rnd = tcg_temp_new_i64(); 7549 tcg_gen_extract_i64(rnd, src, shift - 1, 1); 7550 tcg_gen_sari_i64(dst, src, shift); 7551 tcg_gen_add_i64(dst, dst, rnd); 7552 } 7553 } 7554 7555 static void gen_urshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7556 { 7557 assert(shift >= 0 && shift <= 64); 7558 if (shift == 0) { 7559 tcg_gen_mov_i64(dst, src); 7560 } else if (shift == 64) { 7561 /* Rounding will propagate bit 63 into bit 64. */ 7562 tcg_gen_shri_i64(dst, src, 63); 7563 } else { 7564 TCGv_i64 rnd = tcg_temp_new_i64(); 7565 tcg_gen_extract_i64(rnd, src, shift - 1, 1); 7566 tcg_gen_shri_i64(dst, src, shift); 7567 tcg_gen_add_i64(dst, dst, rnd); 7568 } 7569 } 7570 7571 static void gen_srsra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7572 { 7573 gen_srshr_d(src, src, shift); 7574 tcg_gen_add_i64(dst, dst, src); 7575 } 7576 7577 static void gen_ursra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7578 { 7579 gen_urshr_d(src, src, shift); 7580 tcg_gen_add_i64(dst, dst, src); 7581 } 7582 7583 static void gen_sri_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7584 { 7585 /* If shift is 64, dst is unchanged. */ 7586 if (shift != 64) { 7587 tcg_gen_shri_i64(src, src, shift); 7588 tcg_gen_deposit_i64(dst, dst, src, 0, 64 - shift); 7589 } 7590 } 7591 7592 static void gen_sli_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7593 { 7594 tcg_gen_deposit_i64(dst, dst, src, shift, 64 - shift); 7595 } 7596 7597 static bool do_vec_shift_imm_narrow(DisasContext *s, arg_qrri_e *a, 7598 WideShiftImmFn * const fns[3], MemOp sign) 7599 { 7600 TCGv_i64 tcg_rn, tcg_rd; 7601 int esz = a->esz; 7602 int esize; 7603 WideShiftImmFn *fn; 7604 7605 tcg_debug_assert(esz >= MO_8 && esz <= MO_32); 7606 7607 if (!fp_access_check(s)) { 7608 return true; 7609 } 7610 7611 tcg_rn = tcg_temp_new_i64(); 7612 tcg_rd = tcg_temp_new_i64(); 7613 tcg_gen_movi_i64(tcg_rd, 0); 7614 7615 fn = fns[esz]; 7616 esize = 8 << esz; 7617 for (int i = 0, elements = 8 >> esz; i < elements; i++) { 7618 read_vec_element(s, tcg_rn, a->rn, i, (esz + 1) | sign); 7619 fn(tcg_rn, tcg_rn, a->imm); 7620 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, esize * i, esize); 7621 } 7622 7623 write_vec_element(s, tcg_rd, a->rd, a->q, MO_64); 7624 clear_vec_high(s, a->q, a->rd); 7625 return true; 7626 } 7627 7628 static void gen_sqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7629 { 7630 tcg_gen_sari_i64(d, s, i); 7631 tcg_gen_ext16u_i64(d, d); 7632 gen_helper_neon_narrow_sat_s8(d, tcg_env, d); 7633 } 7634 7635 static void gen_sqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7636 { 7637 tcg_gen_sari_i64(d, s, i); 7638 tcg_gen_ext32u_i64(d, d); 7639 gen_helper_neon_narrow_sat_s16(d, tcg_env, d); 7640 } 7641 7642 static void gen_sqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7643 { 7644 gen_sshr_d(d, s, i); 7645 gen_helper_neon_narrow_sat_s32(d, tcg_env, d); 7646 } 7647 7648 static void gen_uqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7649 { 7650 tcg_gen_shri_i64(d, s, i); 7651 gen_helper_neon_narrow_sat_u8(d, tcg_env, d); 7652 } 7653 7654 static void gen_uqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7655 { 7656 tcg_gen_shri_i64(d, s, i); 7657 gen_helper_neon_narrow_sat_u16(d, tcg_env, d); 7658 } 7659 7660 static void gen_uqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7661 { 7662 gen_ushr_d(d, s, i); 7663 gen_helper_neon_narrow_sat_u32(d, tcg_env, d); 7664 } 7665 7666 static void gen_sqshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7667 { 7668 tcg_gen_sari_i64(d, s, i); 7669 tcg_gen_ext16u_i64(d, d); 7670 gen_helper_neon_unarrow_sat8(d, tcg_env, d); 7671 } 7672 7673 static void gen_sqshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7674 { 7675 tcg_gen_sari_i64(d, s, i); 7676 tcg_gen_ext32u_i64(d, d); 7677 gen_helper_neon_unarrow_sat16(d, tcg_env, d); 7678 } 7679 7680 static void gen_sqshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7681 { 7682 gen_sshr_d(d, s, i); 7683 gen_helper_neon_unarrow_sat32(d, tcg_env, d); 7684 } 7685 7686 static void gen_sqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7687 { 7688 gen_srshr_bhs(d, s, i); 7689 tcg_gen_ext16u_i64(d, d); 7690 gen_helper_neon_narrow_sat_s8(d, tcg_env, d); 7691 } 7692 7693 static void gen_sqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7694 { 7695 gen_srshr_bhs(d, s, i); 7696 tcg_gen_ext32u_i64(d, d); 7697 gen_helper_neon_narrow_sat_s16(d, tcg_env, d); 7698 } 7699 7700 static void gen_sqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7701 { 7702 gen_srshr_d(d, s, i); 7703 gen_helper_neon_narrow_sat_s32(d, tcg_env, d); 7704 } 7705 7706 static void gen_uqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7707 { 7708 gen_urshr_bhs(d, s, i); 7709 gen_helper_neon_narrow_sat_u8(d, tcg_env, d); 7710 } 7711 7712 static void gen_uqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7713 { 7714 gen_urshr_bhs(d, s, i); 7715 gen_helper_neon_narrow_sat_u16(d, tcg_env, d); 7716 } 7717 7718 static void gen_uqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7719 { 7720 gen_urshr_d(d, s, i); 7721 gen_helper_neon_narrow_sat_u32(d, tcg_env, d); 7722 } 7723 7724 static void gen_sqrshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7725 { 7726 gen_srshr_bhs(d, s, i); 7727 tcg_gen_ext16u_i64(d, d); 7728 gen_helper_neon_unarrow_sat8(d, tcg_env, d); 7729 } 7730 7731 static void gen_sqrshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7732 { 7733 gen_srshr_bhs(d, s, i); 7734 tcg_gen_ext32u_i64(d, d); 7735 gen_helper_neon_unarrow_sat16(d, tcg_env, d); 7736 } 7737 7738 static void gen_sqrshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7739 { 7740 gen_srshr_d(d, s, i); 7741 gen_helper_neon_unarrow_sat32(d, tcg_env, d); 7742 } 7743 7744 static WideShiftImmFn * const shrn_fns[] = { 7745 tcg_gen_shri_i64, 7746 tcg_gen_shri_i64, 7747 gen_ushr_d, 7748 }; 7749 TRANS(SHRN_v, do_vec_shift_imm_narrow, a, shrn_fns, 0) 7750 7751 static WideShiftImmFn * const rshrn_fns[] = { 7752 gen_urshr_bhs, 7753 gen_urshr_bhs, 7754 gen_urshr_d, 7755 }; 7756 TRANS(RSHRN_v, do_vec_shift_imm_narrow, a, rshrn_fns, 0) 7757 7758 static WideShiftImmFn * const sqshrn_fns[] = { 7759 gen_sqshrn_b, 7760 gen_sqshrn_h, 7761 gen_sqshrn_s, 7762 }; 7763 TRANS(SQSHRN_v, do_vec_shift_imm_narrow, a, sqshrn_fns, MO_SIGN) 7764 7765 static WideShiftImmFn * const uqshrn_fns[] = { 7766 gen_uqshrn_b, 7767 gen_uqshrn_h, 7768 gen_uqshrn_s, 7769 }; 7770 TRANS(UQSHRN_v, do_vec_shift_imm_narrow, a, uqshrn_fns, 0) 7771 7772 static WideShiftImmFn * const sqshrun_fns[] = { 7773 gen_sqshrun_b, 7774 gen_sqshrun_h, 7775 gen_sqshrun_s, 7776 }; 7777 TRANS(SQSHRUN_v, do_vec_shift_imm_narrow, a, sqshrun_fns, MO_SIGN) 7778 7779 static WideShiftImmFn * const sqrshrn_fns[] = { 7780 gen_sqrshrn_b, 7781 gen_sqrshrn_h, 7782 gen_sqrshrn_s, 7783 }; 7784 TRANS(SQRSHRN_v, do_vec_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN) 7785 7786 static WideShiftImmFn * const uqrshrn_fns[] = { 7787 gen_uqrshrn_b, 7788 gen_uqrshrn_h, 7789 gen_uqrshrn_s, 7790 }; 7791 TRANS(UQRSHRN_v, do_vec_shift_imm_narrow, a, uqrshrn_fns, 0) 7792 7793 static WideShiftImmFn * const sqrshrun_fns[] = { 7794 gen_sqrshrun_b, 7795 gen_sqrshrun_h, 7796 gen_sqrshrun_s, 7797 }; 7798 TRANS(SQRSHRUN_v, do_vec_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN) 7799 7800 /* 7801 * Advanced SIMD Scalar Shift by Immediate 7802 */ 7803 7804 static bool do_scalar_shift_imm(DisasContext *s, arg_rri_e *a, 7805 WideShiftImmFn *fn, bool accumulate, 7806 MemOp sign) 7807 { 7808 if (fp_access_check(s)) { 7809 TCGv_i64 rd = tcg_temp_new_i64(); 7810 TCGv_i64 rn = tcg_temp_new_i64(); 7811 7812 read_vec_element(s, rn, a->rn, 0, a->esz | sign); 7813 if (accumulate) { 7814 read_vec_element(s, rd, a->rd, 0, a->esz | sign); 7815 } 7816 fn(rd, rn, a->imm); 7817 write_fp_dreg(s, a->rd, rd); 7818 } 7819 return true; 7820 } 7821 7822 TRANS(SSHR_s, do_scalar_shift_imm, a, gen_sshr_d, false, 0) 7823 TRANS(USHR_s, do_scalar_shift_imm, a, gen_ushr_d, false, 0) 7824 TRANS(SSRA_s, do_scalar_shift_imm, a, gen_ssra_d, true, 0) 7825 TRANS(USRA_s, do_scalar_shift_imm, a, gen_usra_d, true, 0) 7826 TRANS(SRSHR_s, do_scalar_shift_imm, a, gen_srshr_d, false, 0) 7827 TRANS(URSHR_s, do_scalar_shift_imm, a, gen_urshr_d, false, 0) 7828 TRANS(SRSRA_s, do_scalar_shift_imm, a, gen_srsra_d, true, 0) 7829 TRANS(URSRA_s, do_scalar_shift_imm, a, gen_ursra_d, true, 0) 7830 TRANS(SRI_s, do_scalar_shift_imm, a, gen_sri_d, true, 0) 7831 7832 TRANS(SHL_s, do_scalar_shift_imm, a, tcg_gen_shli_i64, false, 0) 7833 TRANS(SLI_s, do_scalar_shift_imm, a, gen_sli_d, true, 0) 7834 7835 static void trunc_i64_env_imm(TCGv_i64 d, TCGv_i64 s, int64_t i, 7836 NeonGenTwoOpEnvFn *fn) 7837 { 7838 TCGv_i32 t = tcg_temp_new_i32(); 7839 tcg_gen_extrl_i64_i32(t, s); 7840 fn(t, tcg_env, t, tcg_constant_i32(i)); 7841 tcg_gen_extu_i32_i64(d, t); 7842 } 7843 7844 static void gen_sqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7845 { 7846 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s8); 7847 } 7848 7849 static void gen_sqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7850 { 7851 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s16); 7852 } 7853 7854 static void gen_sqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7855 { 7856 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s32); 7857 } 7858 7859 static void gen_sqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i) 7860 { 7861 gen_helper_neon_qshl_s64(d, tcg_env, s, tcg_constant_i64(i)); 7862 } 7863 7864 static void gen_uqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7865 { 7866 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u8); 7867 } 7868 7869 static void gen_uqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7870 { 7871 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u16); 7872 } 7873 7874 static void gen_uqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7875 { 7876 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u32); 7877 } 7878 7879 static void gen_uqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i) 7880 { 7881 gen_helper_neon_qshl_u64(d, tcg_env, s, tcg_constant_i64(i)); 7882 } 7883 7884 static void gen_sqshlui_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7885 { 7886 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s8); 7887 } 7888 7889 static void gen_sqshlui_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7890 { 7891 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s16); 7892 } 7893 7894 static void gen_sqshlui_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7895 { 7896 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s32); 7897 } 7898 7899 static void gen_sqshlui_d(TCGv_i64 d, TCGv_i64 s, int64_t i) 7900 { 7901 gen_helper_neon_qshlu_s64(d, tcg_env, s, tcg_constant_i64(i)); 7902 } 7903 7904 static WideShiftImmFn * const f_scalar_sqshli[] = { 7905 gen_sqshli_b, gen_sqshli_h, gen_sqshli_s, gen_sqshli_d 7906 }; 7907 7908 static WideShiftImmFn * const f_scalar_uqshli[] = { 7909 gen_uqshli_b, gen_uqshli_h, gen_uqshli_s, gen_uqshli_d 7910 }; 7911 7912 static WideShiftImmFn * const f_scalar_sqshlui[] = { 7913 gen_sqshlui_b, gen_sqshlui_h, gen_sqshlui_s, gen_sqshlui_d 7914 }; 7915 7916 /* Note that the helpers sign-extend their inputs, so don't do it here. */ 7917 TRANS(SQSHL_si, do_scalar_shift_imm, a, f_scalar_sqshli[a->esz], false, 0) 7918 TRANS(UQSHL_si, do_scalar_shift_imm, a, f_scalar_uqshli[a->esz], false, 0) 7919 TRANS(SQSHLU_si, do_scalar_shift_imm, a, f_scalar_sqshlui[a->esz], false, 0) 7920 7921 static bool do_scalar_shift_imm_narrow(DisasContext *s, arg_rri_e *a, 7922 WideShiftImmFn * const fns[3], 7923 MemOp sign, bool zext) 7924 { 7925 MemOp esz = a->esz; 7926 7927 tcg_debug_assert(esz >= MO_8 && esz <= MO_32); 7928 7929 if (fp_access_check(s)) { 7930 TCGv_i64 rd = tcg_temp_new_i64(); 7931 TCGv_i64 rn = tcg_temp_new_i64(); 7932 7933 read_vec_element(s, rn, a->rn, 0, (esz + 1) | sign); 7934 fns[esz](rd, rn, a->imm); 7935 if (zext) { 7936 tcg_gen_ext_i64(rd, rd, esz); 7937 } 7938 write_fp_dreg(s, a->rd, rd); 7939 } 7940 return true; 7941 } 7942 7943 TRANS(SQSHRN_si, do_scalar_shift_imm_narrow, a, sqshrn_fns, MO_SIGN, true) 7944 TRANS(SQRSHRN_si, do_scalar_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN, true) 7945 TRANS(UQSHRN_si, do_scalar_shift_imm_narrow, a, uqshrn_fns, 0, false) 7946 TRANS(UQRSHRN_si, do_scalar_shift_imm_narrow, a, uqrshrn_fns, 0, false) 7947 TRANS(SQSHRUN_si, do_scalar_shift_imm_narrow, a, sqshrun_fns, MO_SIGN, false) 7948 TRANS(SQRSHRUN_si, do_scalar_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN, false) 7949 7950 static bool do_div(DisasContext *s, arg_rrr_sf *a, bool is_signed) 7951 { 7952 TCGv_i64 tcg_n, tcg_m, tcg_rd; 7953 tcg_rd = cpu_reg(s, a->rd); 7954 7955 if (!a->sf && is_signed) { 7956 tcg_n = tcg_temp_new_i64(); 7957 tcg_m = tcg_temp_new_i64(); 7958 tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, a->rn)); 7959 tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, a->rm)); 7960 } else { 7961 tcg_n = read_cpu_reg(s, a->rn, a->sf); 7962 tcg_m = read_cpu_reg(s, a->rm, a->sf); 7963 } 7964 7965 if (is_signed) { 7966 gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m); 7967 } else { 7968 gen_helper_udiv64(tcg_rd, tcg_n, tcg_m); 7969 } 7970 7971 if (!a->sf) { /* zero extend final result */ 7972 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 7973 } 7974 return true; 7975 } 7976 7977 TRANS(SDIV, do_div, a, true) 7978 TRANS(UDIV, do_div, a, false) 7979 7980 /* Shift a TCGv src by TCGv shift_amount, put result in dst. 7981 * Note that it is the caller's responsibility to ensure that the 7982 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM 7983 * mandated semantics for out of range shifts. 7984 */ 7985 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf, 7986 enum a64_shift_type shift_type, TCGv_i64 shift_amount) 7987 { 7988 switch (shift_type) { 7989 case A64_SHIFT_TYPE_LSL: 7990 tcg_gen_shl_i64(dst, src, shift_amount); 7991 break; 7992 case A64_SHIFT_TYPE_LSR: 7993 tcg_gen_shr_i64(dst, src, shift_amount); 7994 break; 7995 case A64_SHIFT_TYPE_ASR: 7996 if (!sf) { 7997 tcg_gen_ext32s_i64(dst, src); 7998 } 7999 tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount); 8000 break; 8001 case A64_SHIFT_TYPE_ROR: 8002 if (sf) { 8003 tcg_gen_rotr_i64(dst, src, shift_amount); 8004 } else { 8005 TCGv_i32 t0, t1; 8006 t0 = tcg_temp_new_i32(); 8007 t1 = tcg_temp_new_i32(); 8008 tcg_gen_extrl_i64_i32(t0, src); 8009 tcg_gen_extrl_i64_i32(t1, shift_amount); 8010 tcg_gen_rotr_i32(t0, t0, t1); 8011 tcg_gen_extu_i32_i64(dst, t0); 8012 } 8013 break; 8014 default: 8015 assert(FALSE); /* all shift types should be handled */ 8016 break; 8017 } 8018 8019 if (!sf) { /* zero extend final result */ 8020 tcg_gen_ext32u_i64(dst, dst); 8021 } 8022 } 8023 8024 /* Shift a TCGv src by immediate, put result in dst. 8025 * The shift amount must be in range (this should always be true as the 8026 * relevant instructions will UNDEF on bad shift immediates). 8027 */ 8028 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf, 8029 enum a64_shift_type shift_type, unsigned int shift_i) 8030 { 8031 assert(shift_i < (sf ? 64 : 32)); 8032 8033 if (shift_i == 0) { 8034 tcg_gen_mov_i64(dst, src); 8035 } else { 8036 shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i)); 8037 } 8038 } 8039 8040 static bool do_shift_reg(DisasContext *s, arg_rrr_sf *a, 8041 enum a64_shift_type shift_type) 8042 { 8043 TCGv_i64 tcg_shift = tcg_temp_new_i64(); 8044 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8045 TCGv_i64 tcg_rn = read_cpu_reg(s, a->rn, a->sf); 8046 8047 tcg_gen_andi_i64(tcg_shift, cpu_reg(s, a->rm), a->sf ? 63 : 31); 8048 shift_reg(tcg_rd, tcg_rn, a->sf, shift_type, tcg_shift); 8049 return true; 8050 } 8051 8052 TRANS(LSLV, do_shift_reg, a, A64_SHIFT_TYPE_LSL) 8053 TRANS(LSRV, do_shift_reg, a, A64_SHIFT_TYPE_LSR) 8054 TRANS(ASRV, do_shift_reg, a, A64_SHIFT_TYPE_ASR) 8055 TRANS(RORV, do_shift_reg, a, A64_SHIFT_TYPE_ROR) 8056 8057 static bool do_crc32(DisasContext *s, arg_rrr_e *a, bool crc32c) 8058 { 8059 TCGv_i64 tcg_acc, tcg_val, tcg_rd; 8060 TCGv_i32 tcg_bytes; 8061 8062 switch (a->esz) { 8063 case MO_8: 8064 case MO_16: 8065 case MO_32: 8066 tcg_val = tcg_temp_new_i64(); 8067 tcg_gen_extract_i64(tcg_val, cpu_reg(s, a->rm), 0, 8 << a->esz); 8068 break; 8069 case MO_64: 8070 tcg_val = cpu_reg(s, a->rm); 8071 break; 8072 default: 8073 g_assert_not_reached(); 8074 } 8075 tcg_acc = cpu_reg(s, a->rn); 8076 tcg_bytes = tcg_constant_i32(1 << a->esz); 8077 tcg_rd = cpu_reg(s, a->rd); 8078 8079 if (crc32c) { 8080 gen_helper_crc32c_64(tcg_rd, tcg_acc, tcg_val, tcg_bytes); 8081 } else { 8082 gen_helper_crc32_64(tcg_rd, tcg_acc, tcg_val, tcg_bytes); 8083 } 8084 return true; 8085 } 8086 8087 TRANS_FEAT(CRC32, aa64_crc32, do_crc32, a, false) 8088 TRANS_FEAT(CRC32C, aa64_crc32, do_crc32, a, true) 8089 8090 static bool do_subp(DisasContext *s, arg_rrr *a, bool setflag) 8091 { 8092 TCGv_i64 tcg_n = read_cpu_reg_sp(s, a->rn, true); 8093 TCGv_i64 tcg_m = read_cpu_reg_sp(s, a->rm, true); 8094 TCGv_i64 tcg_d = cpu_reg(s, a->rd); 8095 8096 tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56); 8097 tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56); 8098 8099 if (setflag) { 8100 gen_sub_CC(true, tcg_d, tcg_n, tcg_m); 8101 } else { 8102 tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m); 8103 } 8104 return true; 8105 } 8106 8107 TRANS_FEAT(SUBP, aa64_mte_insn_reg, do_subp, a, false) 8108 TRANS_FEAT(SUBPS, aa64_mte_insn_reg, do_subp, a, true) 8109 8110 static bool trans_IRG(DisasContext *s, arg_rrr *a) 8111 { 8112 if (dc_isar_feature(aa64_mte_insn_reg, s)) { 8113 TCGv_i64 tcg_rd = cpu_reg_sp(s, a->rd); 8114 TCGv_i64 tcg_rn = cpu_reg_sp(s, a->rn); 8115 8116 if (s->ata[0]) { 8117 gen_helper_irg(tcg_rd, tcg_env, tcg_rn, cpu_reg(s, a->rm)); 8118 } else { 8119 gen_address_with_allocation_tag0(tcg_rd, tcg_rn); 8120 } 8121 return true; 8122 } 8123 return false; 8124 } 8125 8126 static bool trans_GMI(DisasContext *s, arg_rrr *a) 8127 { 8128 if (dc_isar_feature(aa64_mte_insn_reg, s)) { 8129 TCGv_i64 t = tcg_temp_new_i64(); 8130 8131 tcg_gen_extract_i64(t, cpu_reg_sp(s, a->rn), 56, 4); 8132 tcg_gen_shl_i64(t, tcg_constant_i64(1), t); 8133 tcg_gen_or_i64(cpu_reg(s, a->rd), cpu_reg(s, a->rm), t); 8134 return true; 8135 } 8136 return false; 8137 } 8138 8139 static bool trans_PACGA(DisasContext *s, arg_rrr *a) 8140 { 8141 if (dc_isar_feature(aa64_pauth, s)) { 8142 gen_helper_pacga(cpu_reg(s, a->rd), tcg_env, 8143 cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm)); 8144 return true; 8145 } 8146 return false; 8147 } 8148 8149 typedef void ArithOneOp(TCGv_i64, TCGv_i64); 8150 8151 static bool gen_rr(DisasContext *s, int rd, int rn, ArithOneOp fn) 8152 { 8153 fn(cpu_reg(s, rd), cpu_reg(s, rn)); 8154 return true; 8155 } 8156 8157 static void gen_rbit32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8158 { 8159 TCGv_i32 t32 = tcg_temp_new_i32(); 8160 8161 tcg_gen_extrl_i64_i32(t32, tcg_rn); 8162 gen_helper_rbit(t32, t32); 8163 tcg_gen_extu_i32_i64(tcg_rd, t32); 8164 } 8165 8166 static void gen_rev16_xx(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 mask) 8167 { 8168 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 8169 8170 tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8); 8171 tcg_gen_and_i64(tcg_rd, tcg_rn, mask); 8172 tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask); 8173 tcg_gen_shli_i64(tcg_rd, tcg_rd, 8); 8174 tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp); 8175 } 8176 8177 static void gen_rev16_32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8178 { 8179 gen_rev16_xx(tcg_rd, tcg_rn, tcg_constant_i64(0x00ff00ff)); 8180 } 8181 8182 static void gen_rev16_64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8183 { 8184 gen_rev16_xx(tcg_rd, tcg_rn, tcg_constant_i64(0x00ff00ff00ff00ffull)); 8185 } 8186 8187 static void gen_rev_32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8188 { 8189 tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ); 8190 } 8191 8192 static void gen_rev32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8193 { 8194 tcg_gen_bswap64_i64(tcg_rd, tcg_rn); 8195 tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32); 8196 } 8197 8198 TRANS(RBIT, gen_rr, a->rd, a->rn, a->sf ? gen_helper_rbit64 : gen_rbit32) 8199 TRANS(REV16, gen_rr, a->rd, a->rn, a->sf ? gen_rev16_64 : gen_rev16_32) 8200 TRANS(REV32, gen_rr, a->rd, a->rn, a->sf ? gen_rev32 : gen_rev_32) 8201 TRANS(REV64, gen_rr, a->rd, a->rn, tcg_gen_bswap64_i64) 8202 8203 static void gen_clz32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8204 { 8205 TCGv_i32 t32 = tcg_temp_new_i32(); 8206 8207 tcg_gen_extrl_i64_i32(t32, tcg_rn); 8208 tcg_gen_clzi_i32(t32, t32, 32); 8209 tcg_gen_extu_i32_i64(tcg_rd, t32); 8210 } 8211 8212 static void gen_clz64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8213 { 8214 tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64); 8215 } 8216 8217 static void gen_cls32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8218 { 8219 TCGv_i32 t32 = tcg_temp_new_i32(); 8220 8221 tcg_gen_extrl_i64_i32(t32, tcg_rn); 8222 tcg_gen_clrsb_i32(t32, t32); 8223 tcg_gen_extu_i32_i64(tcg_rd, t32); 8224 } 8225 8226 TRANS(CLZ, gen_rr, a->rd, a->rn, a->sf ? gen_clz64 : gen_clz32) 8227 TRANS(CLS, gen_rr, a->rd, a->rn, a->sf ? tcg_gen_clrsb_i64 : gen_cls32) 8228 8229 static bool gen_pacaut(DisasContext *s, arg_pacaut *a, NeonGenTwo64OpEnvFn fn) 8230 { 8231 TCGv_i64 tcg_rd, tcg_rn; 8232 8233 if (a->z) { 8234 if (a->rn != 31) { 8235 return false; 8236 } 8237 tcg_rn = tcg_constant_i64(0); 8238 } else { 8239 tcg_rn = cpu_reg_sp(s, a->rn); 8240 } 8241 if (s->pauth_active) { 8242 tcg_rd = cpu_reg(s, a->rd); 8243 fn(tcg_rd, tcg_env, tcg_rd, tcg_rn); 8244 } 8245 return true; 8246 } 8247 8248 TRANS_FEAT(PACIA, aa64_pauth, gen_pacaut, a, gen_helper_pacia) 8249 TRANS_FEAT(PACIB, aa64_pauth, gen_pacaut, a, gen_helper_pacib) 8250 TRANS_FEAT(PACDA, aa64_pauth, gen_pacaut, a, gen_helper_pacda) 8251 TRANS_FEAT(PACDB, aa64_pauth, gen_pacaut, a, gen_helper_pacdb) 8252 8253 TRANS_FEAT(AUTIA, aa64_pauth, gen_pacaut, a, gen_helper_autia) 8254 TRANS_FEAT(AUTIB, aa64_pauth, gen_pacaut, a, gen_helper_autib) 8255 TRANS_FEAT(AUTDA, aa64_pauth, gen_pacaut, a, gen_helper_autda) 8256 TRANS_FEAT(AUTDB, aa64_pauth, gen_pacaut, a, gen_helper_autdb) 8257 8258 static bool do_xpac(DisasContext *s, int rd, NeonGenOne64OpEnvFn *fn) 8259 { 8260 if (s->pauth_active) { 8261 TCGv_i64 tcg_rd = cpu_reg(s, rd); 8262 fn(tcg_rd, tcg_env, tcg_rd); 8263 } 8264 return true; 8265 } 8266 8267 TRANS_FEAT(XPACI, aa64_pauth, do_xpac, a->rd, gen_helper_xpaci) 8268 TRANS_FEAT(XPACD, aa64_pauth, do_xpac, a->rd, gen_helper_xpacd) 8269 8270 static bool do_logic_reg(DisasContext *s, arg_logic_shift *a, 8271 ArithTwoOp *fn, ArithTwoOp *inv_fn, bool setflags) 8272 { 8273 TCGv_i64 tcg_rd, tcg_rn, tcg_rm; 8274 8275 if (!a->sf && (a->sa & (1 << 5))) { 8276 return false; 8277 } 8278 8279 tcg_rd = cpu_reg(s, a->rd); 8280 tcg_rn = cpu_reg(s, a->rn); 8281 8282 tcg_rm = read_cpu_reg(s, a->rm, a->sf); 8283 if (a->sa) { 8284 shift_reg_imm(tcg_rm, tcg_rm, a->sf, a->st, a->sa); 8285 } 8286 8287 (a->n ? inv_fn : fn)(tcg_rd, tcg_rn, tcg_rm); 8288 if (!a->sf) { 8289 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 8290 } 8291 if (setflags) { 8292 gen_logic_CC(a->sf, tcg_rd); 8293 } 8294 return true; 8295 } 8296 8297 static bool trans_ORR_r(DisasContext *s, arg_logic_shift *a) 8298 { 8299 /* 8300 * Unshifted ORR and ORN with WZR/XZR is the standard encoding for 8301 * register-register MOV and MVN, so it is worth special casing. 8302 */ 8303 if (a->sa == 0 && a->st == 0 && a->rn == 31) { 8304 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8305 TCGv_i64 tcg_rm = cpu_reg(s, a->rm); 8306 8307 if (a->n) { 8308 tcg_gen_not_i64(tcg_rd, tcg_rm); 8309 if (!a->sf) { 8310 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 8311 } 8312 } else { 8313 if (a->sf) { 8314 tcg_gen_mov_i64(tcg_rd, tcg_rm); 8315 } else { 8316 tcg_gen_ext32u_i64(tcg_rd, tcg_rm); 8317 } 8318 } 8319 return true; 8320 } 8321 8322 return do_logic_reg(s, a, tcg_gen_or_i64, tcg_gen_orc_i64, false); 8323 } 8324 8325 TRANS(AND_r, do_logic_reg, a, tcg_gen_and_i64, tcg_gen_andc_i64, false) 8326 TRANS(ANDS_r, do_logic_reg, a, tcg_gen_and_i64, tcg_gen_andc_i64, true) 8327 TRANS(EOR_r, do_logic_reg, a, tcg_gen_xor_i64, tcg_gen_eqv_i64, false) 8328 8329 static bool do_addsub_ext(DisasContext *s, arg_addsub_ext *a, 8330 bool sub_op, bool setflags) 8331 { 8332 TCGv_i64 tcg_rm, tcg_rn, tcg_rd, tcg_result; 8333 8334 if (a->sa > 4) { 8335 return false; 8336 } 8337 8338 /* non-flag setting ops may use SP */ 8339 if (!setflags) { 8340 tcg_rd = cpu_reg_sp(s, a->rd); 8341 } else { 8342 tcg_rd = cpu_reg(s, a->rd); 8343 } 8344 tcg_rn = read_cpu_reg_sp(s, a->rn, a->sf); 8345 8346 tcg_rm = read_cpu_reg(s, a->rm, a->sf); 8347 ext_and_shift_reg(tcg_rm, tcg_rm, a->st, a->sa); 8348 8349 tcg_result = tcg_temp_new_i64(); 8350 if (!setflags) { 8351 if (sub_op) { 8352 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm); 8353 } else { 8354 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm); 8355 } 8356 } else { 8357 if (sub_op) { 8358 gen_sub_CC(a->sf, tcg_result, tcg_rn, tcg_rm); 8359 } else { 8360 gen_add_CC(a->sf, tcg_result, tcg_rn, tcg_rm); 8361 } 8362 } 8363 8364 if (a->sf) { 8365 tcg_gen_mov_i64(tcg_rd, tcg_result); 8366 } else { 8367 tcg_gen_ext32u_i64(tcg_rd, tcg_result); 8368 } 8369 return true; 8370 } 8371 8372 TRANS(ADD_ext, do_addsub_ext, a, false, false) 8373 TRANS(SUB_ext, do_addsub_ext, a, true, false) 8374 TRANS(ADDS_ext, do_addsub_ext, a, false, true) 8375 TRANS(SUBS_ext, do_addsub_ext, a, true, true) 8376 8377 static bool do_addsub_reg(DisasContext *s, arg_addsub_shift *a, 8378 bool sub_op, bool setflags) 8379 { 8380 TCGv_i64 tcg_rd, tcg_rn, tcg_rm, tcg_result; 8381 8382 if (a->st == 3 || (!a->sf && (a->sa & 32))) { 8383 return false; 8384 } 8385 8386 tcg_rd = cpu_reg(s, a->rd); 8387 tcg_rn = read_cpu_reg(s, a->rn, a->sf); 8388 tcg_rm = read_cpu_reg(s, a->rm, a->sf); 8389 8390 shift_reg_imm(tcg_rm, tcg_rm, a->sf, a->st, a->sa); 8391 8392 tcg_result = tcg_temp_new_i64(); 8393 if (!setflags) { 8394 if (sub_op) { 8395 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm); 8396 } else { 8397 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm); 8398 } 8399 } else { 8400 if (sub_op) { 8401 gen_sub_CC(a->sf, tcg_result, tcg_rn, tcg_rm); 8402 } else { 8403 gen_add_CC(a->sf, tcg_result, tcg_rn, tcg_rm); 8404 } 8405 } 8406 8407 if (a->sf) { 8408 tcg_gen_mov_i64(tcg_rd, tcg_result); 8409 } else { 8410 tcg_gen_ext32u_i64(tcg_rd, tcg_result); 8411 } 8412 return true; 8413 } 8414 8415 TRANS(ADD_r, do_addsub_reg, a, false, false) 8416 TRANS(SUB_r, do_addsub_reg, a, true, false) 8417 TRANS(ADDS_r, do_addsub_reg, a, false, true) 8418 TRANS(SUBS_r, do_addsub_reg, a, true, true) 8419 8420 static bool do_mulh(DisasContext *s, arg_rrr *a, 8421 void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64)) 8422 { 8423 TCGv_i64 discard = tcg_temp_new_i64(); 8424 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8425 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 8426 TCGv_i64 tcg_rm = cpu_reg(s, a->rm); 8427 8428 fn(discard, tcg_rd, tcg_rn, tcg_rm); 8429 return true; 8430 } 8431 8432 TRANS(SMULH, do_mulh, a, tcg_gen_muls2_i64) 8433 TRANS(UMULH, do_mulh, a, tcg_gen_mulu2_i64) 8434 8435 static bool do_muladd(DisasContext *s, arg_rrrr *a, 8436 bool sf, bool is_sub, MemOp mop) 8437 { 8438 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8439 TCGv_i64 tcg_op1, tcg_op2; 8440 8441 if (mop == MO_64) { 8442 tcg_op1 = cpu_reg(s, a->rn); 8443 tcg_op2 = cpu_reg(s, a->rm); 8444 } else { 8445 tcg_op1 = tcg_temp_new_i64(); 8446 tcg_op2 = tcg_temp_new_i64(); 8447 tcg_gen_ext_i64(tcg_op1, cpu_reg(s, a->rn), mop); 8448 tcg_gen_ext_i64(tcg_op2, cpu_reg(s, a->rm), mop); 8449 } 8450 8451 if (a->ra == 31 && !is_sub) { 8452 /* Special-case MADD with rA == XZR; it is the standard MUL alias */ 8453 tcg_gen_mul_i64(tcg_rd, tcg_op1, tcg_op2); 8454 } else { 8455 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 8456 TCGv_i64 tcg_ra = cpu_reg(s, a->ra); 8457 8458 tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2); 8459 if (is_sub) { 8460 tcg_gen_sub_i64(tcg_rd, tcg_ra, tcg_tmp); 8461 } else { 8462 tcg_gen_add_i64(tcg_rd, tcg_ra, tcg_tmp); 8463 } 8464 } 8465 8466 if (!sf) { 8467 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 8468 } 8469 return true; 8470 } 8471 8472 TRANS(MADD_w, do_muladd, a, false, false, MO_64) 8473 TRANS(MSUB_w, do_muladd, a, false, true, MO_64) 8474 TRANS(MADD_x, do_muladd, a, true, false, MO_64) 8475 TRANS(MSUB_x, do_muladd, a, true, true, MO_64) 8476 8477 TRANS(SMADDL, do_muladd, a, true, false, MO_SL) 8478 TRANS(SMSUBL, do_muladd, a, true, true, MO_SL) 8479 TRANS(UMADDL, do_muladd, a, true, false, MO_UL) 8480 TRANS(UMSUBL, do_muladd, a, true, true, MO_UL) 8481 8482 static bool do_adc_sbc(DisasContext *s, arg_rrr_sf *a, 8483 bool is_sub, bool setflags) 8484 { 8485 TCGv_i64 tcg_y, tcg_rn, tcg_rd; 8486 8487 tcg_rd = cpu_reg(s, a->rd); 8488 tcg_rn = cpu_reg(s, a->rn); 8489 8490 if (is_sub) { 8491 tcg_y = tcg_temp_new_i64(); 8492 tcg_gen_not_i64(tcg_y, cpu_reg(s, a->rm)); 8493 } else { 8494 tcg_y = cpu_reg(s, a->rm); 8495 } 8496 8497 if (setflags) { 8498 gen_adc_CC(a->sf, tcg_rd, tcg_rn, tcg_y); 8499 } else { 8500 gen_adc(a->sf, tcg_rd, tcg_rn, tcg_y); 8501 } 8502 return true; 8503 } 8504 8505 TRANS(ADC, do_adc_sbc, a, false, false) 8506 TRANS(SBC, do_adc_sbc, a, true, false) 8507 TRANS(ADCS, do_adc_sbc, a, false, true) 8508 TRANS(SBCS, do_adc_sbc, a, true, true) 8509 8510 static bool trans_RMIF(DisasContext *s, arg_RMIF *a) 8511 { 8512 int mask = a->mask; 8513 TCGv_i64 tcg_rn; 8514 TCGv_i32 nzcv; 8515 8516 if (!dc_isar_feature(aa64_condm_4, s)) { 8517 return false; 8518 } 8519 8520 tcg_rn = read_cpu_reg(s, a->rn, 1); 8521 tcg_gen_rotri_i64(tcg_rn, tcg_rn, a->imm); 8522 8523 nzcv = tcg_temp_new_i32(); 8524 tcg_gen_extrl_i64_i32(nzcv, tcg_rn); 8525 8526 if (mask & 8) { /* N */ 8527 tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3); 8528 } 8529 if (mask & 4) { /* Z */ 8530 tcg_gen_not_i32(cpu_ZF, nzcv); 8531 tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4); 8532 } 8533 if (mask & 2) { /* C */ 8534 tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1); 8535 } 8536 if (mask & 1) { /* V */ 8537 tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0); 8538 } 8539 return true; 8540 } 8541 8542 static bool do_setf(DisasContext *s, int rn, int shift) 8543 { 8544 TCGv_i32 tmp = tcg_temp_new_i32(); 8545 8546 tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn)); 8547 tcg_gen_shli_i32(cpu_NF, tmp, shift); 8548 tcg_gen_shli_i32(cpu_VF, tmp, shift - 1); 8549 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 8550 tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF); 8551 return true; 8552 } 8553 8554 TRANS_FEAT(SETF8, aa64_condm_4, do_setf, a->rn, 24) 8555 TRANS_FEAT(SETF16, aa64_condm_4, do_setf, a->rn, 16) 8556 8557 /* CCMP, CCMN */ 8558 static bool trans_CCMP(DisasContext *s, arg_CCMP *a) 8559 { 8560 TCGv_i32 tcg_t0 = tcg_temp_new_i32(); 8561 TCGv_i32 tcg_t1 = tcg_temp_new_i32(); 8562 TCGv_i32 tcg_t2 = tcg_temp_new_i32(); 8563 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 8564 TCGv_i64 tcg_rn, tcg_y; 8565 DisasCompare c; 8566 unsigned nzcv; 8567 bool has_andc; 8568 8569 /* Set T0 = !COND. */ 8570 arm_test_cc(&c, a->cond); 8571 tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0); 8572 8573 /* Load the arguments for the new comparison. */ 8574 if (a->imm) { 8575 tcg_y = tcg_constant_i64(a->y); 8576 } else { 8577 tcg_y = cpu_reg(s, a->y); 8578 } 8579 tcg_rn = cpu_reg(s, a->rn); 8580 8581 /* Set the flags for the new comparison. */ 8582 if (a->op) { 8583 gen_sub_CC(a->sf, tcg_tmp, tcg_rn, tcg_y); 8584 } else { 8585 gen_add_CC(a->sf, tcg_tmp, tcg_rn, tcg_y); 8586 } 8587 8588 /* 8589 * If COND was false, force the flags to #nzcv. Compute two masks 8590 * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0). 8591 * For tcg hosts that support ANDC, we can make do with just T1. 8592 * In either case, allow the tcg optimizer to delete any unused mask. 8593 */ 8594 tcg_gen_neg_i32(tcg_t1, tcg_t0); 8595 tcg_gen_subi_i32(tcg_t2, tcg_t0, 1); 8596 8597 nzcv = a->nzcv; 8598 has_andc = tcg_op_supported(INDEX_op_andc, TCG_TYPE_I32, 0); 8599 if (nzcv & 8) { /* N */ 8600 tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1); 8601 } else { 8602 if (has_andc) { 8603 tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1); 8604 } else { 8605 tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2); 8606 } 8607 } 8608 if (nzcv & 4) { /* Z */ 8609 if (has_andc) { 8610 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1); 8611 } else { 8612 tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2); 8613 } 8614 } else { 8615 tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0); 8616 } 8617 if (nzcv & 2) { /* C */ 8618 tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0); 8619 } else { 8620 if (has_andc) { 8621 tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1); 8622 } else { 8623 tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2); 8624 } 8625 } 8626 if (nzcv & 1) { /* V */ 8627 tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1); 8628 } else { 8629 if (has_andc) { 8630 tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1); 8631 } else { 8632 tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2); 8633 } 8634 } 8635 return true; 8636 } 8637 8638 static bool trans_CSEL(DisasContext *s, arg_CSEL *a) 8639 { 8640 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8641 TCGv_i64 zero = tcg_constant_i64(0); 8642 DisasCompare64 c; 8643 8644 a64_test_cc(&c, a->cond); 8645 8646 if (a->rn == 31 && a->rm == 31 && (a->else_inc ^ a->else_inv)) { 8647 /* CSET & CSETM. */ 8648 if (a->else_inv) { 8649 tcg_gen_negsetcond_i64(tcg_invert_cond(c.cond), 8650 tcg_rd, c.value, zero); 8651 } else { 8652 tcg_gen_setcond_i64(tcg_invert_cond(c.cond), 8653 tcg_rd, c.value, zero); 8654 } 8655 } else { 8656 TCGv_i64 t_true = cpu_reg(s, a->rn); 8657 TCGv_i64 t_false = read_cpu_reg(s, a->rm, 1); 8658 8659 if (a->else_inv && a->else_inc) { 8660 tcg_gen_neg_i64(t_false, t_false); 8661 } else if (a->else_inv) { 8662 tcg_gen_not_i64(t_false, t_false); 8663 } else if (a->else_inc) { 8664 tcg_gen_addi_i64(t_false, t_false, 1); 8665 } 8666 tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false); 8667 } 8668 8669 if (!a->sf) { 8670 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 8671 } 8672 return true; 8673 } 8674 8675 typedef struct FPScalar1Int { 8676 void (*gen_h)(TCGv_i32, TCGv_i32); 8677 void (*gen_s)(TCGv_i32, TCGv_i32); 8678 void (*gen_d)(TCGv_i64, TCGv_i64); 8679 } FPScalar1Int; 8680 8681 static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a, 8682 const FPScalar1Int *f, 8683 bool merging) 8684 { 8685 switch (a->esz) { 8686 case MO_64: 8687 if (fp_access_check(s)) { 8688 TCGv_i64 t = read_fp_dreg(s, a->rn); 8689 f->gen_d(t, t); 8690 if (merging) { 8691 write_fp_dreg_merging(s, a->rd, a->rd, t); 8692 } else { 8693 write_fp_dreg(s, a->rd, t); 8694 } 8695 } 8696 break; 8697 case MO_32: 8698 if (fp_access_check(s)) { 8699 TCGv_i32 t = read_fp_sreg(s, a->rn); 8700 f->gen_s(t, t); 8701 if (merging) { 8702 write_fp_sreg_merging(s, a->rd, a->rd, t); 8703 } else { 8704 write_fp_sreg(s, a->rd, t); 8705 } 8706 } 8707 break; 8708 case MO_16: 8709 if (!dc_isar_feature(aa64_fp16, s)) { 8710 return false; 8711 } 8712 if (fp_access_check(s)) { 8713 TCGv_i32 t = read_fp_hreg(s, a->rn); 8714 f->gen_h(t, t); 8715 if (merging) { 8716 write_fp_hreg_merging(s, a->rd, a->rd, t); 8717 } else { 8718 write_fp_sreg(s, a->rd, t); 8719 } 8720 } 8721 break; 8722 default: 8723 return false; 8724 } 8725 return true; 8726 } 8727 8728 static bool do_fp1_scalar_int_2fn(DisasContext *s, arg_rr_e *a, 8729 const FPScalar1Int *fnormal, 8730 const FPScalar1Int *fah) 8731 { 8732 return do_fp1_scalar_int(s, a, s->fpcr_ah ? fah : fnormal, true); 8733 } 8734 8735 static const FPScalar1Int f_scalar_fmov = { 8736 tcg_gen_mov_i32, 8737 tcg_gen_mov_i32, 8738 tcg_gen_mov_i64, 8739 }; 8740 TRANS(FMOV_s, do_fp1_scalar_int, a, &f_scalar_fmov, false) 8741 8742 static const FPScalar1Int f_scalar_fabs = { 8743 gen_vfp_absh, 8744 gen_vfp_abss, 8745 gen_vfp_absd, 8746 }; 8747 static const FPScalar1Int f_scalar_ah_fabs = { 8748 gen_vfp_ah_absh, 8749 gen_vfp_ah_abss, 8750 gen_vfp_ah_absd, 8751 }; 8752 TRANS(FABS_s, do_fp1_scalar_int_2fn, a, &f_scalar_fabs, &f_scalar_ah_fabs) 8753 8754 static const FPScalar1Int f_scalar_fneg = { 8755 gen_vfp_negh, 8756 gen_vfp_negs, 8757 gen_vfp_negd, 8758 }; 8759 static const FPScalar1Int f_scalar_ah_fneg = { 8760 gen_vfp_ah_negh, 8761 gen_vfp_ah_negs, 8762 gen_vfp_ah_negd, 8763 }; 8764 TRANS(FNEG_s, do_fp1_scalar_int_2fn, a, &f_scalar_fneg, &f_scalar_ah_fneg) 8765 8766 typedef struct FPScalar1 { 8767 void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_ptr); 8768 void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_ptr); 8769 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_ptr); 8770 } FPScalar1; 8771 8772 static bool do_fp1_scalar_with_fpsttype(DisasContext *s, arg_rr_e *a, 8773 const FPScalar1 *f, int rmode, 8774 ARMFPStatusFlavour fpsttype) 8775 { 8776 TCGv_i32 tcg_rmode = NULL; 8777 TCGv_ptr fpst; 8778 TCGv_i64 t64; 8779 TCGv_i32 t32; 8780 int check = fp_access_check_scalar_hsd(s, a->esz); 8781 8782 if (check <= 0) { 8783 return check == 0; 8784 } 8785 8786 fpst = fpstatus_ptr(fpsttype); 8787 if (rmode >= 0) { 8788 tcg_rmode = gen_set_rmode(rmode, fpst); 8789 } 8790 8791 switch (a->esz) { 8792 case MO_64: 8793 t64 = read_fp_dreg(s, a->rn); 8794 f->gen_d(t64, t64, fpst); 8795 write_fp_dreg_merging(s, a->rd, a->rd, t64); 8796 break; 8797 case MO_32: 8798 t32 = read_fp_sreg(s, a->rn); 8799 f->gen_s(t32, t32, fpst); 8800 write_fp_sreg_merging(s, a->rd, a->rd, t32); 8801 break; 8802 case MO_16: 8803 t32 = read_fp_hreg(s, a->rn); 8804 f->gen_h(t32, t32, fpst); 8805 write_fp_hreg_merging(s, a->rd, a->rd, t32); 8806 break; 8807 default: 8808 g_assert_not_reached(); 8809 } 8810 8811 if (rmode >= 0) { 8812 gen_restore_rmode(tcg_rmode, fpst); 8813 } 8814 return true; 8815 } 8816 8817 static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a, 8818 const FPScalar1 *f, int rmode) 8819 { 8820 return do_fp1_scalar_with_fpsttype(s, a, f, rmode, 8821 a->esz == MO_16 ? 8822 FPST_A64_F16 : FPST_A64); 8823 } 8824 8825 static bool do_fp1_scalar_ah(DisasContext *s, arg_rr_e *a, 8826 const FPScalar1 *f, int rmode) 8827 { 8828 return do_fp1_scalar_with_fpsttype(s, a, f, rmode, select_ah_fpst(s, a->esz)); 8829 } 8830 8831 static const FPScalar1 f_scalar_fsqrt = { 8832 gen_helper_vfp_sqrth, 8833 gen_helper_vfp_sqrts, 8834 gen_helper_vfp_sqrtd, 8835 }; 8836 TRANS(FSQRT_s, do_fp1_scalar, a, &f_scalar_fsqrt, -1) 8837 8838 static const FPScalar1 f_scalar_frint = { 8839 gen_helper_advsimd_rinth, 8840 gen_helper_rints, 8841 gen_helper_rintd, 8842 }; 8843 TRANS(FRINTN_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_TIEEVEN) 8844 TRANS(FRINTP_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_POSINF) 8845 TRANS(FRINTM_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_NEGINF) 8846 TRANS(FRINTZ_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_ZERO) 8847 TRANS(FRINTA_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_TIEAWAY) 8848 TRANS(FRINTI_s, do_fp1_scalar, a, &f_scalar_frint, -1) 8849 8850 static const FPScalar1 f_scalar_frintx = { 8851 gen_helper_advsimd_rinth_exact, 8852 gen_helper_rints_exact, 8853 gen_helper_rintd_exact, 8854 }; 8855 TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1) 8856 8857 static bool trans_BFCVT_s(DisasContext *s, arg_rr_e *a) 8858 { 8859 ARMFPStatusFlavour fpsttype = s->fpcr_ah ? FPST_AH : FPST_A64; 8860 TCGv_i32 t32; 8861 int check; 8862 8863 if (!dc_isar_feature(aa64_bf16, s)) { 8864 return false; 8865 } 8866 8867 check = fp_access_check_scalar_hsd(s, a->esz); 8868 8869 if (check <= 0) { 8870 return check == 0; 8871 } 8872 8873 t32 = read_fp_sreg(s, a->rn); 8874 gen_helper_bfcvt(t32, t32, fpstatus_ptr(fpsttype)); 8875 write_fp_hreg_merging(s, a->rd, a->rd, t32); 8876 return true; 8877 } 8878 8879 static const FPScalar1 f_scalar_frint32 = { 8880 NULL, 8881 gen_helper_frint32_s, 8882 gen_helper_frint32_d, 8883 }; 8884 TRANS_FEAT(FRINT32Z_s, aa64_frint, do_fp1_scalar, a, 8885 &f_scalar_frint32, FPROUNDING_ZERO) 8886 TRANS_FEAT(FRINT32X_s, aa64_frint, do_fp1_scalar, a, &f_scalar_frint32, -1) 8887 8888 static const FPScalar1 f_scalar_frint64 = { 8889 NULL, 8890 gen_helper_frint64_s, 8891 gen_helper_frint64_d, 8892 }; 8893 TRANS_FEAT(FRINT64Z_s, aa64_frint, do_fp1_scalar, a, 8894 &f_scalar_frint64, FPROUNDING_ZERO) 8895 TRANS_FEAT(FRINT64X_s, aa64_frint, do_fp1_scalar, a, &f_scalar_frint64, -1) 8896 8897 static const FPScalar1 f_scalar_frecpe = { 8898 gen_helper_recpe_f16, 8899 gen_helper_recpe_f32, 8900 gen_helper_recpe_f64, 8901 }; 8902 static const FPScalar1 f_scalar_frecpe_rpres = { 8903 gen_helper_recpe_f16, 8904 gen_helper_recpe_rpres_f32, 8905 gen_helper_recpe_f64, 8906 }; 8907 TRANS(FRECPE_s, do_fp1_scalar_ah, a, 8908 s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? 8909 &f_scalar_frecpe_rpres : &f_scalar_frecpe, -1) 8910 8911 static const FPScalar1 f_scalar_frecpx = { 8912 gen_helper_frecpx_f16, 8913 gen_helper_frecpx_f32, 8914 gen_helper_frecpx_f64, 8915 }; 8916 TRANS(FRECPX_s, do_fp1_scalar_ah, a, &f_scalar_frecpx, -1) 8917 8918 static const FPScalar1 f_scalar_frsqrte = { 8919 gen_helper_rsqrte_f16, 8920 gen_helper_rsqrte_f32, 8921 gen_helper_rsqrte_f64, 8922 }; 8923 static const FPScalar1 f_scalar_frsqrte_rpres = { 8924 gen_helper_rsqrte_f16, 8925 gen_helper_rsqrte_rpres_f32, 8926 gen_helper_rsqrte_f64, 8927 }; 8928 TRANS(FRSQRTE_s, do_fp1_scalar_ah, a, 8929 s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? 8930 &f_scalar_frsqrte_rpres : &f_scalar_frsqrte, -1) 8931 8932 static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a) 8933 { 8934 if (fp_access_check(s)) { 8935 TCGv_i32 tcg_rn = read_fp_sreg(s, a->rn); 8936 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 8937 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 8938 8939 gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, fpst); 8940 write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd); 8941 } 8942 return true; 8943 } 8944 8945 static bool trans_FCVT_s_hs(DisasContext *s, arg_rr *a) 8946 { 8947 if (fp_access_check(s)) { 8948 TCGv_i32 tmp = read_fp_sreg(s, a->rn); 8949 TCGv_i32 ahp = get_ahp_flag(); 8950 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 8951 8952 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp); 8953 /* write_fp_hreg_merging is OK here because top half of result is zero */ 8954 write_fp_hreg_merging(s, a->rd, a->rd, tmp); 8955 } 8956 return true; 8957 } 8958 8959 static bool trans_FCVT_s_sd(DisasContext *s, arg_rr *a) 8960 { 8961 if (fp_access_check(s)) { 8962 TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn); 8963 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 8964 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 8965 8966 gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, fpst); 8967 write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd); 8968 } 8969 return true; 8970 } 8971 8972 static bool trans_FCVT_s_hd(DisasContext *s, arg_rr *a) 8973 { 8974 if (fp_access_check(s)) { 8975 TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn); 8976 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 8977 TCGv_i32 ahp = get_ahp_flag(); 8978 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 8979 8980 gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp); 8981 /* write_fp_hreg_merging is OK here because top half of tcg_rd is zero */ 8982 write_fp_hreg_merging(s, a->rd, a->rd, tcg_rd); 8983 } 8984 return true; 8985 } 8986 8987 static bool trans_FCVT_s_sh(DisasContext *s, arg_rr *a) 8988 { 8989 if (fp_access_check(s)) { 8990 TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn); 8991 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 8992 TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16); 8993 TCGv_i32 tcg_ahp = get_ahp_flag(); 8994 8995 gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); 8996 write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd); 8997 } 8998 return true; 8999 } 9000 9001 static bool trans_FCVT_s_dh(DisasContext *s, arg_rr *a) 9002 { 9003 if (fp_access_check(s)) { 9004 TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn); 9005 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 9006 TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16); 9007 TCGv_i32 tcg_ahp = get_ahp_flag(); 9008 9009 gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); 9010 write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd); 9011 } 9012 return true; 9013 } 9014 9015 static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift, 9016 TCGv_i64 tcg_int, bool is_signed) 9017 { 9018 TCGv_ptr tcg_fpstatus; 9019 TCGv_i32 tcg_shift, tcg_single; 9020 TCGv_i64 tcg_double; 9021 9022 tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); 9023 tcg_shift = tcg_constant_i32(shift); 9024 9025 switch (esz) { 9026 case MO_64: 9027 tcg_double = tcg_temp_new_i64(); 9028 if (is_signed) { 9029 gen_helper_vfp_sqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus); 9030 } else { 9031 gen_helper_vfp_uqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus); 9032 } 9033 write_fp_dreg_merging(s, rd, rd, tcg_double); 9034 break; 9035 9036 case MO_32: 9037 tcg_single = tcg_temp_new_i32(); 9038 if (is_signed) { 9039 gen_helper_vfp_sqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); 9040 } else { 9041 gen_helper_vfp_uqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); 9042 } 9043 write_fp_sreg_merging(s, rd, rd, tcg_single); 9044 break; 9045 9046 case MO_16: 9047 tcg_single = tcg_temp_new_i32(); 9048 if (is_signed) { 9049 gen_helper_vfp_sqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); 9050 } else { 9051 gen_helper_vfp_uqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); 9052 } 9053 write_fp_hreg_merging(s, rd, rd, tcg_single); 9054 break; 9055 9056 default: 9057 g_assert_not_reached(); 9058 } 9059 return true; 9060 } 9061 9062 static bool do_cvtf_g(DisasContext *s, arg_fcvt *a, bool is_signed) 9063 { 9064 TCGv_i64 tcg_int; 9065 int check = fp_access_check_scalar_hsd(s, a->esz); 9066 9067 if (check <= 0) { 9068 return check == 0; 9069 } 9070 9071 if (a->sf) { 9072 tcg_int = cpu_reg(s, a->rn); 9073 } else { 9074 tcg_int = read_cpu_reg(s, a->rn, true); 9075 if (is_signed) { 9076 tcg_gen_ext32s_i64(tcg_int, tcg_int); 9077 } else { 9078 tcg_gen_ext32u_i64(tcg_int, tcg_int); 9079 } 9080 } 9081 return do_cvtf_scalar(s, a->esz, a->rd, a->shift, tcg_int, is_signed); 9082 } 9083 9084 TRANS(SCVTF_g, do_cvtf_g, a, true) 9085 TRANS(UCVTF_g, do_cvtf_g, a, false) 9086 9087 /* 9088 * [US]CVTF (vector), scalar version. 9089 * Which sounds weird, but really just means input from fp register 9090 * instead of input from general register. Input and output element 9091 * size are always equal. 9092 */ 9093 static bool do_cvtf_f(DisasContext *s, arg_fcvt *a, bool is_signed) 9094 { 9095 TCGv_i64 tcg_int; 9096 int check = fp_access_check_scalar_hsd(s, a->esz); 9097 9098 if (check <= 0) { 9099 return check == 0; 9100 } 9101 9102 tcg_int = tcg_temp_new_i64(); 9103 read_vec_element(s, tcg_int, a->rn, 0, a->esz | (is_signed ? MO_SIGN : 0)); 9104 return do_cvtf_scalar(s, a->esz, a->rd, a->shift, tcg_int, is_signed); 9105 } 9106 9107 TRANS(SCVTF_f, do_cvtf_f, a, true) 9108 TRANS(UCVTF_f, do_cvtf_f, a, false) 9109 9110 static void do_fcvt_scalar(DisasContext *s, MemOp out, MemOp esz, 9111 TCGv_i64 tcg_out, int shift, int rn, 9112 ARMFPRounding rmode) 9113 { 9114 TCGv_ptr tcg_fpstatus; 9115 TCGv_i32 tcg_shift, tcg_rmode, tcg_single; 9116 9117 tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); 9118 tcg_shift = tcg_constant_i32(shift); 9119 tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); 9120 9121 switch (esz) { 9122 case MO_64: 9123 read_vec_element(s, tcg_out, rn, 0, MO_64); 9124 switch (out) { 9125 case MO_64 | MO_SIGN: 9126 gen_helper_vfp_tosqd(tcg_out, tcg_out, tcg_shift, tcg_fpstatus); 9127 break; 9128 case MO_64: 9129 gen_helper_vfp_touqd(tcg_out, tcg_out, tcg_shift, tcg_fpstatus); 9130 break; 9131 case MO_32 | MO_SIGN: 9132 gen_helper_vfp_tosld(tcg_out, tcg_out, tcg_shift, tcg_fpstatus); 9133 break; 9134 case MO_32: 9135 gen_helper_vfp_tould(tcg_out, tcg_out, tcg_shift, tcg_fpstatus); 9136 break; 9137 default: 9138 g_assert_not_reached(); 9139 } 9140 break; 9141 9142 case MO_32: 9143 tcg_single = read_fp_sreg(s, rn); 9144 switch (out) { 9145 case MO_64 | MO_SIGN: 9146 gen_helper_vfp_tosqs(tcg_out, tcg_single, tcg_shift, tcg_fpstatus); 9147 break; 9148 case MO_64: 9149 gen_helper_vfp_touqs(tcg_out, tcg_single, tcg_shift, tcg_fpstatus); 9150 break; 9151 case MO_32 | MO_SIGN: 9152 gen_helper_vfp_tosls(tcg_single, tcg_single, 9153 tcg_shift, tcg_fpstatus); 9154 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 9155 break; 9156 case MO_32: 9157 gen_helper_vfp_touls(tcg_single, tcg_single, 9158 tcg_shift, tcg_fpstatus); 9159 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 9160 break; 9161 default: 9162 g_assert_not_reached(); 9163 } 9164 break; 9165 9166 case MO_16: 9167 tcg_single = read_fp_hreg(s, rn); 9168 switch (out) { 9169 case MO_64 | MO_SIGN: 9170 gen_helper_vfp_tosqh(tcg_out, tcg_single, tcg_shift, tcg_fpstatus); 9171 break; 9172 case MO_64: 9173 gen_helper_vfp_touqh(tcg_out, tcg_single, tcg_shift, tcg_fpstatus); 9174 break; 9175 case MO_32 | MO_SIGN: 9176 gen_helper_vfp_toslh(tcg_single, tcg_single, 9177 tcg_shift, tcg_fpstatus); 9178 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 9179 break; 9180 case MO_32: 9181 gen_helper_vfp_toulh(tcg_single, tcg_single, 9182 tcg_shift, tcg_fpstatus); 9183 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 9184 break; 9185 case MO_16 | MO_SIGN: 9186 gen_helper_vfp_toshh(tcg_single, tcg_single, 9187 tcg_shift, tcg_fpstatus); 9188 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 9189 break; 9190 case MO_16: 9191 gen_helper_vfp_touhh(tcg_single, tcg_single, 9192 tcg_shift, tcg_fpstatus); 9193 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 9194 break; 9195 default: 9196 g_assert_not_reached(); 9197 } 9198 break; 9199 9200 default: 9201 g_assert_not_reached(); 9202 } 9203 9204 gen_restore_rmode(tcg_rmode, tcg_fpstatus); 9205 } 9206 9207 static bool do_fcvt_g(DisasContext *s, arg_fcvt *a, 9208 ARMFPRounding rmode, bool is_signed) 9209 { 9210 TCGv_i64 tcg_int; 9211 int check = fp_access_check_scalar_hsd(s, a->esz); 9212 9213 if (check <= 0) { 9214 return check == 0; 9215 } 9216 9217 tcg_int = cpu_reg(s, a->rd); 9218 do_fcvt_scalar(s, (a->sf ? MO_64 : MO_32) | (is_signed ? MO_SIGN : 0), 9219 a->esz, tcg_int, a->shift, a->rn, rmode); 9220 9221 if (!a->sf) { 9222 tcg_gen_ext32u_i64(tcg_int, tcg_int); 9223 } 9224 return true; 9225 } 9226 9227 TRANS(FCVTNS_g, do_fcvt_g, a, FPROUNDING_TIEEVEN, true) 9228 TRANS(FCVTNU_g, do_fcvt_g, a, FPROUNDING_TIEEVEN, false) 9229 TRANS(FCVTPS_g, do_fcvt_g, a, FPROUNDING_POSINF, true) 9230 TRANS(FCVTPU_g, do_fcvt_g, a, FPROUNDING_POSINF, false) 9231 TRANS(FCVTMS_g, do_fcvt_g, a, FPROUNDING_NEGINF, true) 9232 TRANS(FCVTMU_g, do_fcvt_g, a, FPROUNDING_NEGINF, false) 9233 TRANS(FCVTZS_g, do_fcvt_g, a, FPROUNDING_ZERO, true) 9234 TRANS(FCVTZU_g, do_fcvt_g, a, FPROUNDING_ZERO, false) 9235 TRANS(FCVTAS_g, do_fcvt_g, a, FPROUNDING_TIEAWAY, true) 9236 TRANS(FCVTAU_g, do_fcvt_g, a, FPROUNDING_TIEAWAY, false) 9237 9238 /* 9239 * FCVT* (vector), scalar version. 9240 * Which sounds weird, but really just means output to fp register 9241 * instead of output to general register. Input and output element 9242 * size are always equal. 9243 */ 9244 static bool do_fcvt_f(DisasContext *s, arg_fcvt *a, 9245 ARMFPRounding rmode, bool is_signed) 9246 { 9247 TCGv_i64 tcg_int; 9248 int check = fp_access_check_scalar_hsd(s, a->esz); 9249 9250 if (check <= 0) { 9251 return check == 0; 9252 } 9253 9254 tcg_int = tcg_temp_new_i64(); 9255 do_fcvt_scalar(s, a->esz | (is_signed ? MO_SIGN : 0), 9256 a->esz, tcg_int, a->shift, a->rn, rmode); 9257 9258 if (!s->fpcr_nep) { 9259 clear_vec(s, a->rd); 9260 } 9261 write_vec_element(s, tcg_int, a->rd, 0, a->esz); 9262 return true; 9263 } 9264 9265 TRANS(FCVTNS_f, do_fcvt_f, a, FPROUNDING_TIEEVEN, true) 9266 TRANS(FCVTNU_f, do_fcvt_f, a, FPROUNDING_TIEEVEN, false) 9267 TRANS(FCVTPS_f, do_fcvt_f, a, FPROUNDING_POSINF, true) 9268 TRANS(FCVTPU_f, do_fcvt_f, a, FPROUNDING_POSINF, false) 9269 TRANS(FCVTMS_f, do_fcvt_f, a, FPROUNDING_NEGINF, true) 9270 TRANS(FCVTMU_f, do_fcvt_f, a, FPROUNDING_NEGINF, false) 9271 TRANS(FCVTZS_f, do_fcvt_f, a, FPROUNDING_ZERO, true) 9272 TRANS(FCVTZU_f, do_fcvt_f, a, FPROUNDING_ZERO, false) 9273 TRANS(FCVTAS_f, do_fcvt_f, a, FPROUNDING_TIEAWAY, true) 9274 TRANS(FCVTAU_f, do_fcvt_f, a, FPROUNDING_TIEAWAY, false) 9275 9276 static bool trans_FJCVTZS(DisasContext *s, arg_FJCVTZS *a) 9277 { 9278 if (!dc_isar_feature(aa64_jscvt, s)) { 9279 return false; 9280 } 9281 if (fp_access_check(s)) { 9282 TCGv_i64 t = read_fp_dreg(s, a->rn); 9283 TCGv_ptr fpstatus = fpstatus_ptr(FPST_A64); 9284 9285 gen_helper_fjcvtzs(t, t, fpstatus); 9286 9287 tcg_gen_ext32u_i64(cpu_reg(s, a->rd), t); 9288 tcg_gen_extrh_i64_i32(cpu_ZF, t); 9289 tcg_gen_movi_i32(cpu_CF, 0); 9290 tcg_gen_movi_i32(cpu_NF, 0); 9291 tcg_gen_movi_i32(cpu_VF, 0); 9292 } 9293 return true; 9294 } 9295 9296 static bool trans_FMOV_hx(DisasContext *s, arg_rr *a) 9297 { 9298 if (!dc_isar_feature(aa64_fp16, s)) { 9299 return false; 9300 } 9301 if (fp_access_check(s)) { 9302 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 9303 TCGv_i64 tmp = tcg_temp_new_i64(); 9304 tcg_gen_ext16u_i64(tmp, tcg_rn); 9305 write_fp_dreg(s, a->rd, tmp); 9306 } 9307 return true; 9308 } 9309 9310 static bool trans_FMOV_sw(DisasContext *s, arg_rr *a) 9311 { 9312 if (fp_access_check(s)) { 9313 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 9314 TCGv_i64 tmp = tcg_temp_new_i64(); 9315 tcg_gen_ext32u_i64(tmp, tcg_rn); 9316 write_fp_dreg(s, a->rd, tmp); 9317 } 9318 return true; 9319 } 9320 9321 static bool trans_FMOV_dx(DisasContext *s, arg_rr *a) 9322 { 9323 if (fp_access_check(s)) { 9324 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 9325 write_fp_dreg(s, a->rd, tcg_rn); 9326 } 9327 return true; 9328 } 9329 9330 static bool trans_FMOV_ux(DisasContext *s, arg_rr *a) 9331 { 9332 if (fp_access_check(s)) { 9333 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 9334 tcg_gen_st_i64(tcg_rn, tcg_env, fp_reg_hi_offset(s, a->rd)); 9335 clear_vec_high(s, true, a->rd); 9336 } 9337 return true; 9338 } 9339 9340 static bool trans_FMOV_xh(DisasContext *s, arg_rr *a) 9341 { 9342 if (!dc_isar_feature(aa64_fp16, s)) { 9343 return false; 9344 } 9345 if (fp_access_check(s)) { 9346 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 9347 tcg_gen_ld16u_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_16)); 9348 } 9349 return true; 9350 } 9351 9352 static bool trans_FMOV_ws(DisasContext *s, arg_rr *a) 9353 { 9354 if (fp_access_check(s)) { 9355 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 9356 tcg_gen_ld32u_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_32)); 9357 } 9358 return true; 9359 } 9360 9361 static bool trans_FMOV_xd(DisasContext *s, arg_rr *a) 9362 { 9363 if (fp_access_check(s)) { 9364 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 9365 tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_64)); 9366 } 9367 return true; 9368 } 9369 9370 static bool trans_FMOV_xu(DisasContext *s, arg_rr *a) 9371 { 9372 if (fp_access_check(s)) { 9373 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 9374 tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_hi_offset(s, a->rn)); 9375 } 9376 return true; 9377 } 9378 9379 typedef struct ENVScalar1 { 9380 NeonGenOneOpEnvFn *gen_bhs[3]; 9381 NeonGenOne64OpEnvFn *gen_d; 9382 } ENVScalar1; 9383 9384 static bool do_env_scalar1(DisasContext *s, arg_rr_e *a, const ENVScalar1 *f) 9385 { 9386 if (!fp_access_check(s)) { 9387 return true; 9388 } 9389 if (a->esz == MO_64) { 9390 TCGv_i64 t = read_fp_dreg(s, a->rn); 9391 f->gen_d(t, tcg_env, t); 9392 write_fp_dreg(s, a->rd, t); 9393 } else { 9394 TCGv_i32 t = tcg_temp_new_i32(); 9395 9396 read_vec_element_i32(s, t, a->rn, 0, a->esz); 9397 f->gen_bhs[a->esz](t, tcg_env, t); 9398 write_fp_sreg(s, a->rd, t); 9399 } 9400 return true; 9401 } 9402 9403 static bool do_env_vector1(DisasContext *s, arg_qrr_e *a, const ENVScalar1 *f) 9404 { 9405 if (a->esz == MO_64 && !a->q) { 9406 return false; 9407 } 9408 if (!fp_access_check(s)) { 9409 return true; 9410 } 9411 if (a->esz == MO_64) { 9412 TCGv_i64 t = tcg_temp_new_i64(); 9413 9414 for (int i = 0; i < 2; ++i) { 9415 read_vec_element(s, t, a->rn, i, MO_64); 9416 f->gen_d(t, tcg_env, t); 9417 write_vec_element(s, t, a->rd, i, MO_64); 9418 } 9419 } else { 9420 TCGv_i32 t = tcg_temp_new_i32(); 9421 int n = (a->q ? 16 : 8) >> a->esz; 9422 9423 for (int i = 0; i < n; ++i) { 9424 read_vec_element_i32(s, t, a->rn, i, a->esz); 9425 f->gen_bhs[a->esz](t, tcg_env, t); 9426 write_vec_element_i32(s, t, a->rd, i, a->esz); 9427 } 9428 } 9429 clear_vec_high(s, a->q, a->rd); 9430 return true; 9431 } 9432 9433 static const ENVScalar1 f_scalar_sqabs = { 9434 { gen_helper_neon_qabs_s8, 9435 gen_helper_neon_qabs_s16, 9436 gen_helper_neon_qabs_s32 }, 9437 gen_helper_neon_qabs_s64, 9438 }; 9439 TRANS(SQABS_s, do_env_scalar1, a, &f_scalar_sqabs) 9440 TRANS(SQABS_v, do_env_vector1, a, &f_scalar_sqabs) 9441 9442 static const ENVScalar1 f_scalar_sqneg = { 9443 { gen_helper_neon_qneg_s8, 9444 gen_helper_neon_qneg_s16, 9445 gen_helper_neon_qneg_s32 }, 9446 gen_helper_neon_qneg_s64, 9447 }; 9448 TRANS(SQNEG_s, do_env_scalar1, a, &f_scalar_sqneg) 9449 TRANS(SQNEG_v, do_env_vector1, a, &f_scalar_sqneg) 9450 9451 static bool do_scalar1_d(DisasContext *s, arg_rr *a, ArithOneOp *f) 9452 { 9453 if (fp_access_check(s)) { 9454 TCGv_i64 t = read_fp_dreg(s, a->rn); 9455 f(t, t); 9456 write_fp_dreg(s, a->rd, t); 9457 } 9458 return true; 9459 } 9460 9461 TRANS(ABS_s, do_scalar1_d, a, tcg_gen_abs_i64) 9462 TRANS(NEG_s, do_scalar1_d, a, tcg_gen_neg_i64) 9463 9464 static bool do_cmop0_d(DisasContext *s, arg_rr *a, TCGCond cond) 9465 { 9466 if (fp_access_check(s)) { 9467 TCGv_i64 t = read_fp_dreg(s, a->rn); 9468 tcg_gen_negsetcond_i64(cond, t, t, tcg_constant_i64(0)); 9469 write_fp_dreg(s, a->rd, t); 9470 } 9471 return true; 9472 } 9473 9474 TRANS(CMGT0_s, do_cmop0_d, a, TCG_COND_GT) 9475 TRANS(CMGE0_s, do_cmop0_d, a, TCG_COND_GE) 9476 TRANS(CMLE0_s, do_cmop0_d, a, TCG_COND_LE) 9477 TRANS(CMLT0_s, do_cmop0_d, a, TCG_COND_LT) 9478 TRANS(CMEQ0_s, do_cmop0_d, a, TCG_COND_EQ) 9479 9480 static bool do_2misc_narrow_scalar(DisasContext *s, arg_rr_e *a, 9481 ArithOneOp * const fn[3]) 9482 { 9483 if (a->esz == MO_64) { 9484 return false; 9485 } 9486 if (fp_access_check(s)) { 9487 TCGv_i64 t = tcg_temp_new_i64(); 9488 9489 read_vec_element(s, t, a->rn, 0, a->esz + 1); 9490 fn[a->esz](t, t); 9491 clear_vec(s, a->rd); 9492 write_vec_element(s, t, a->rd, 0, a->esz); 9493 } 9494 return true; 9495 } 9496 9497 #define WRAP_ENV(NAME) \ 9498 static void gen_##NAME(TCGv_i64 d, TCGv_i64 n) \ 9499 { gen_helper_##NAME(d, tcg_env, n); } 9500 9501 WRAP_ENV(neon_unarrow_sat8) 9502 WRAP_ENV(neon_unarrow_sat16) 9503 WRAP_ENV(neon_unarrow_sat32) 9504 9505 static ArithOneOp * const f_scalar_sqxtun[] = { 9506 gen_neon_unarrow_sat8, 9507 gen_neon_unarrow_sat16, 9508 gen_neon_unarrow_sat32, 9509 }; 9510 TRANS(SQXTUN_s, do_2misc_narrow_scalar, a, f_scalar_sqxtun) 9511 9512 WRAP_ENV(neon_narrow_sat_s8) 9513 WRAP_ENV(neon_narrow_sat_s16) 9514 WRAP_ENV(neon_narrow_sat_s32) 9515 9516 static ArithOneOp * const f_scalar_sqxtn[] = { 9517 gen_neon_narrow_sat_s8, 9518 gen_neon_narrow_sat_s16, 9519 gen_neon_narrow_sat_s32, 9520 }; 9521 TRANS(SQXTN_s, do_2misc_narrow_scalar, a, f_scalar_sqxtn) 9522 9523 WRAP_ENV(neon_narrow_sat_u8) 9524 WRAP_ENV(neon_narrow_sat_u16) 9525 WRAP_ENV(neon_narrow_sat_u32) 9526 9527 static ArithOneOp * const f_scalar_uqxtn[] = { 9528 gen_neon_narrow_sat_u8, 9529 gen_neon_narrow_sat_u16, 9530 gen_neon_narrow_sat_u32, 9531 }; 9532 TRANS(UQXTN_s, do_2misc_narrow_scalar, a, f_scalar_uqxtn) 9533 9534 static bool trans_FCVTXN_s(DisasContext *s, arg_rr_e *a) 9535 { 9536 if (fp_access_check(s)) { 9537 /* 9538 * 64 bit to 32 bit float conversion 9539 * with von Neumann rounding (round to odd) 9540 */ 9541 TCGv_i64 src = read_fp_dreg(s, a->rn); 9542 TCGv_i32 dst = tcg_temp_new_i32(); 9543 gen_helper_fcvtx_f64_to_f32(dst, src, fpstatus_ptr(FPST_A64)); 9544 write_fp_sreg_merging(s, a->rd, a->rd, dst); 9545 } 9546 return true; 9547 } 9548 9549 #undef WRAP_ENV 9550 9551 static bool do_gvec_fn2(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn) 9552 { 9553 if (!a->q && a->esz == MO_64) { 9554 return false; 9555 } 9556 if (fp_access_check(s)) { 9557 gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz); 9558 } 9559 return true; 9560 } 9561 9562 TRANS(ABS_v, do_gvec_fn2, a, tcg_gen_gvec_abs) 9563 TRANS(NEG_v, do_gvec_fn2, a, tcg_gen_gvec_neg) 9564 TRANS(NOT_v, do_gvec_fn2, a, tcg_gen_gvec_not) 9565 TRANS(CNT_v, do_gvec_fn2, a, gen_gvec_cnt) 9566 TRANS(RBIT_v, do_gvec_fn2, a, gen_gvec_rbit) 9567 TRANS(CMGT0_v, do_gvec_fn2, a, gen_gvec_cgt0) 9568 TRANS(CMGE0_v, do_gvec_fn2, a, gen_gvec_cge0) 9569 TRANS(CMLT0_v, do_gvec_fn2, a, gen_gvec_clt0) 9570 TRANS(CMLE0_v, do_gvec_fn2, a, gen_gvec_cle0) 9571 TRANS(CMEQ0_v, do_gvec_fn2, a, gen_gvec_ceq0) 9572 TRANS(REV16_v, do_gvec_fn2, a, gen_gvec_rev16) 9573 TRANS(REV32_v, do_gvec_fn2, a, gen_gvec_rev32) 9574 TRANS(URECPE_v, do_gvec_fn2, a, gen_gvec_urecpe) 9575 TRANS(URSQRTE_v, do_gvec_fn2, a, gen_gvec_ursqrte) 9576 9577 static bool do_gvec_fn2_bhs(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn) 9578 { 9579 if (a->esz == MO_64) { 9580 return false; 9581 } 9582 if (fp_access_check(s)) { 9583 gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz); 9584 } 9585 return true; 9586 } 9587 9588 TRANS(CLS_v, do_gvec_fn2_bhs, a, gen_gvec_cls) 9589 TRANS(CLZ_v, do_gvec_fn2_bhs, a, gen_gvec_clz) 9590 TRANS(REV64_v, do_gvec_fn2_bhs, a, gen_gvec_rev64) 9591 TRANS(SADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_saddlp) 9592 TRANS(UADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_uaddlp) 9593 TRANS(SADALP_v, do_gvec_fn2_bhs, a, gen_gvec_sadalp) 9594 TRANS(UADALP_v, do_gvec_fn2_bhs, a, gen_gvec_uadalp) 9595 9596 static bool do_2misc_narrow_vector(DisasContext *s, arg_qrr_e *a, 9597 ArithOneOp * const fn[3]) 9598 { 9599 if (a->esz == MO_64) { 9600 return false; 9601 } 9602 if (fp_access_check(s)) { 9603 TCGv_i64 t0 = tcg_temp_new_i64(); 9604 TCGv_i64 t1 = tcg_temp_new_i64(); 9605 9606 read_vec_element(s, t0, a->rn, 0, MO_64); 9607 read_vec_element(s, t1, a->rn, 1, MO_64); 9608 fn[a->esz](t0, t0); 9609 fn[a->esz](t1, t1); 9610 write_vec_element(s, t0, a->rd, a->q ? 2 : 0, MO_32); 9611 write_vec_element(s, t1, a->rd, a->q ? 3 : 1, MO_32); 9612 clear_vec_high(s, a->q, a->rd); 9613 } 9614 return true; 9615 } 9616 9617 static ArithOneOp * const f_scalar_xtn[] = { 9618 gen_helper_neon_narrow_u8, 9619 gen_helper_neon_narrow_u16, 9620 tcg_gen_ext32u_i64, 9621 }; 9622 TRANS(XTN, do_2misc_narrow_vector, a, f_scalar_xtn) 9623 TRANS(SQXTUN_v, do_2misc_narrow_vector, a, f_scalar_sqxtun) 9624 TRANS(SQXTN_v, do_2misc_narrow_vector, a, f_scalar_sqxtn) 9625 TRANS(UQXTN_v, do_2misc_narrow_vector, a, f_scalar_uqxtn) 9626 9627 static void gen_fcvtn_hs(TCGv_i64 d, TCGv_i64 n) 9628 { 9629 TCGv_i32 tcg_lo = tcg_temp_new_i32(); 9630 TCGv_i32 tcg_hi = tcg_temp_new_i32(); 9631 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 9632 TCGv_i32 ahp = get_ahp_flag(); 9633 9634 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, n); 9635 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp); 9636 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp); 9637 tcg_gen_deposit_i32(tcg_lo, tcg_lo, tcg_hi, 16, 16); 9638 tcg_gen_extu_i32_i64(d, tcg_lo); 9639 } 9640 9641 static void gen_fcvtn_sd(TCGv_i64 d, TCGv_i64 n) 9642 { 9643 TCGv_i32 tmp = tcg_temp_new_i32(); 9644 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 9645 9646 gen_helper_vfp_fcvtsd(tmp, n, fpst); 9647 tcg_gen_extu_i32_i64(d, tmp); 9648 } 9649 9650 static void gen_fcvtxn_sd(TCGv_i64 d, TCGv_i64 n) 9651 { 9652 /* 9653 * 64 bit to 32 bit float conversion 9654 * with von Neumann rounding (round to odd) 9655 */ 9656 TCGv_i32 tmp = tcg_temp_new_i32(); 9657 gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_A64)); 9658 tcg_gen_extu_i32_i64(d, tmp); 9659 } 9660 9661 static ArithOneOp * const f_vector_fcvtn[] = { 9662 NULL, 9663 gen_fcvtn_hs, 9664 gen_fcvtn_sd, 9665 }; 9666 static ArithOneOp * const f_scalar_fcvtxn[] = { 9667 NULL, 9668 NULL, 9669 gen_fcvtxn_sd, 9670 }; 9671 TRANS(FCVTN_v, do_2misc_narrow_vector, a, f_vector_fcvtn) 9672 TRANS(FCVTXN_v, do_2misc_narrow_vector, a, f_scalar_fcvtxn) 9673 9674 static void gen_bfcvtn_hs(TCGv_i64 d, TCGv_i64 n) 9675 { 9676 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 9677 TCGv_i32 tmp = tcg_temp_new_i32(); 9678 gen_helper_bfcvt_pair(tmp, n, fpst); 9679 tcg_gen_extu_i32_i64(d, tmp); 9680 } 9681 9682 static void gen_bfcvtn_ah_hs(TCGv_i64 d, TCGv_i64 n) 9683 { 9684 TCGv_ptr fpst = fpstatus_ptr(FPST_AH); 9685 TCGv_i32 tmp = tcg_temp_new_i32(); 9686 gen_helper_bfcvt_pair(tmp, n, fpst); 9687 tcg_gen_extu_i32_i64(d, tmp); 9688 } 9689 9690 static ArithOneOp * const f_vector_bfcvtn[2][3] = { 9691 { 9692 NULL, 9693 gen_bfcvtn_hs, 9694 NULL, 9695 }, { 9696 NULL, 9697 gen_bfcvtn_ah_hs, 9698 NULL, 9699 } 9700 }; 9701 TRANS_FEAT(BFCVTN_v, aa64_bf16, do_2misc_narrow_vector, a, 9702 f_vector_bfcvtn[s->fpcr_ah]) 9703 9704 static bool trans_SHLL_v(DisasContext *s, arg_qrr_e *a) 9705 { 9706 static NeonGenWidenFn * const widenfns[3] = { 9707 gen_helper_neon_widen_u8, 9708 gen_helper_neon_widen_u16, 9709 tcg_gen_extu_i32_i64, 9710 }; 9711 NeonGenWidenFn *widenfn; 9712 TCGv_i64 tcg_res[2]; 9713 TCGv_i32 tcg_op; 9714 int part, pass; 9715 9716 if (a->esz == MO_64) { 9717 return false; 9718 } 9719 if (!fp_access_check(s)) { 9720 return true; 9721 } 9722 9723 tcg_op = tcg_temp_new_i32(); 9724 widenfn = widenfns[a->esz]; 9725 part = a->q ? 2 : 0; 9726 9727 for (pass = 0; pass < 2; pass++) { 9728 read_vec_element_i32(s, tcg_op, a->rn, part + pass, MO_32); 9729 tcg_res[pass] = tcg_temp_new_i64(); 9730 widenfn(tcg_res[pass], tcg_op); 9731 tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << a->esz); 9732 } 9733 9734 for (pass = 0; pass < 2; pass++) { 9735 write_vec_element(s, tcg_res[pass], a->rd, pass, MO_64); 9736 } 9737 return true; 9738 } 9739 9740 static bool do_fabs_fneg_v(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn) 9741 { 9742 int check = fp_access_check_vector_hsd(s, a->q, a->esz); 9743 9744 if (check <= 0) { 9745 return check == 0; 9746 } 9747 9748 gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz); 9749 return true; 9750 } 9751 9752 TRANS(FABS_v, do_fabs_fneg_v, a, gen_gvec_fabs) 9753 TRANS(FNEG_v, do_fabs_fneg_v, a, gen_gvec_fneg) 9754 9755 static bool do_fp1_vector(DisasContext *s, arg_qrr_e *a, 9756 const FPScalar1 *f, int rmode) 9757 { 9758 TCGv_i32 tcg_rmode = NULL; 9759 TCGv_ptr fpst; 9760 int check = fp_access_check_vector_hsd(s, a->q, a->esz); 9761 9762 if (check <= 0) { 9763 return check == 0; 9764 } 9765 9766 fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); 9767 if (rmode >= 0) { 9768 tcg_rmode = gen_set_rmode(rmode, fpst); 9769 } 9770 9771 if (a->esz == MO_64) { 9772 TCGv_i64 t64 = tcg_temp_new_i64(); 9773 9774 for (int pass = 0; pass < 2; ++pass) { 9775 read_vec_element(s, t64, a->rn, pass, MO_64); 9776 f->gen_d(t64, t64, fpst); 9777 write_vec_element(s, t64, a->rd, pass, MO_64); 9778 } 9779 } else { 9780 TCGv_i32 t32 = tcg_temp_new_i32(); 9781 void (*gen)(TCGv_i32, TCGv_i32, TCGv_ptr) 9782 = (a->esz == MO_16 ? f->gen_h : f->gen_s); 9783 9784 for (int pass = 0, n = (a->q ? 16 : 8) >> a->esz; pass < n; ++pass) { 9785 read_vec_element_i32(s, t32, a->rn, pass, a->esz); 9786 gen(t32, t32, fpst); 9787 write_vec_element_i32(s, t32, a->rd, pass, a->esz); 9788 } 9789 } 9790 clear_vec_high(s, a->q, a->rd); 9791 9792 if (rmode >= 0) { 9793 gen_restore_rmode(tcg_rmode, fpst); 9794 } 9795 return true; 9796 } 9797 9798 TRANS(FSQRT_v, do_fp1_vector, a, &f_scalar_fsqrt, -1) 9799 9800 TRANS(FRINTN_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_TIEEVEN) 9801 TRANS(FRINTP_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_POSINF) 9802 TRANS(FRINTM_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_NEGINF) 9803 TRANS(FRINTZ_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_ZERO) 9804 TRANS(FRINTA_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_TIEAWAY) 9805 TRANS(FRINTI_v, do_fp1_vector, a, &f_scalar_frint, -1) 9806 TRANS(FRINTX_v, do_fp1_vector, a, &f_scalar_frintx, -1) 9807 9808 TRANS_FEAT(FRINT32Z_v, aa64_frint, do_fp1_vector, a, 9809 &f_scalar_frint32, FPROUNDING_ZERO) 9810 TRANS_FEAT(FRINT32X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint32, -1) 9811 TRANS_FEAT(FRINT64Z_v, aa64_frint, do_fp1_vector, a, 9812 &f_scalar_frint64, FPROUNDING_ZERO) 9813 TRANS_FEAT(FRINT64X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint64, -1) 9814 9815 static bool do_gvec_op2_fpst_with_fpsttype(DisasContext *s, MemOp esz, 9816 bool is_q, int rd, int rn, int data, 9817 gen_helper_gvec_2_ptr * const fns[3], 9818 ARMFPStatusFlavour fpsttype) 9819 { 9820 int check = fp_access_check_vector_hsd(s, is_q, esz); 9821 TCGv_ptr fpst; 9822 9823 if (check <= 0) { 9824 return check == 0; 9825 } 9826 9827 fpst = fpstatus_ptr(fpsttype); 9828 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd), 9829 vec_full_reg_offset(s, rn), fpst, 9830 is_q ? 16 : 8, vec_full_reg_size(s), 9831 data, fns[esz - 1]); 9832 return true; 9833 } 9834 9835 static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q, 9836 int rd, int rn, int data, 9837 gen_helper_gvec_2_ptr * const fns[3]) 9838 { 9839 return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data, fns, 9840 esz == MO_16 ? FPST_A64_F16 : 9841 FPST_A64); 9842 } 9843 9844 static bool do_gvec_op2_ah_fpst(DisasContext *s, MemOp esz, bool is_q, 9845 int rd, int rn, int data, 9846 gen_helper_gvec_2_ptr * const fns[3]) 9847 { 9848 return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data, 9849 fns, select_ah_fpst(s, esz)); 9850 } 9851 9852 static gen_helper_gvec_2_ptr * const f_scvtf_v[] = { 9853 gen_helper_gvec_vcvt_sh, 9854 gen_helper_gvec_vcvt_sf, 9855 gen_helper_gvec_vcvt_sd, 9856 }; 9857 TRANS(SCVTF_vi, do_gvec_op2_fpst, 9858 a->esz, a->q, a->rd, a->rn, 0, f_scvtf_v) 9859 TRANS(SCVTF_vf, do_gvec_op2_fpst, 9860 a->esz, a->q, a->rd, a->rn, a->shift, f_scvtf_v) 9861 9862 static gen_helper_gvec_2_ptr * const f_ucvtf_v[] = { 9863 gen_helper_gvec_vcvt_uh, 9864 gen_helper_gvec_vcvt_uf, 9865 gen_helper_gvec_vcvt_ud, 9866 }; 9867 TRANS(UCVTF_vi, do_gvec_op2_fpst, 9868 a->esz, a->q, a->rd, a->rn, 0, f_ucvtf_v) 9869 TRANS(UCVTF_vf, do_gvec_op2_fpst, 9870 a->esz, a->q, a->rd, a->rn, a->shift, f_ucvtf_v) 9871 9872 static gen_helper_gvec_2_ptr * const f_fcvtzs_vf[] = { 9873 gen_helper_gvec_vcvt_rz_hs, 9874 gen_helper_gvec_vcvt_rz_fs, 9875 gen_helper_gvec_vcvt_rz_ds, 9876 }; 9877 TRANS(FCVTZS_vf, do_gvec_op2_fpst, 9878 a->esz, a->q, a->rd, a->rn, a->shift, f_fcvtzs_vf) 9879 9880 static gen_helper_gvec_2_ptr * const f_fcvtzu_vf[] = { 9881 gen_helper_gvec_vcvt_rz_hu, 9882 gen_helper_gvec_vcvt_rz_fu, 9883 gen_helper_gvec_vcvt_rz_du, 9884 }; 9885 TRANS(FCVTZU_vf, do_gvec_op2_fpst, 9886 a->esz, a->q, a->rd, a->rn, a->shift, f_fcvtzu_vf) 9887 9888 static gen_helper_gvec_2_ptr * const f_fcvt_s_vi[] = { 9889 gen_helper_gvec_vcvt_rm_sh, 9890 gen_helper_gvec_vcvt_rm_ss, 9891 gen_helper_gvec_vcvt_rm_sd, 9892 }; 9893 9894 static gen_helper_gvec_2_ptr * const f_fcvt_u_vi[] = { 9895 gen_helper_gvec_vcvt_rm_uh, 9896 gen_helper_gvec_vcvt_rm_us, 9897 gen_helper_gvec_vcvt_rm_ud, 9898 }; 9899 9900 TRANS(FCVTNS_vi, do_gvec_op2_fpst, 9901 a->esz, a->q, a->rd, a->rn, float_round_nearest_even, f_fcvt_s_vi) 9902 TRANS(FCVTNU_vi, do_gvec_op2_fpst, 9903 a->esz, a->q, a->rd, a->rn, float_round_nearest_even, f_fcvt_u_vi) 9904 TRANS(FCVTPS_vi, do_gvec_op2_fpst, 9905 a->esz, a->q, a->rd, a->rn, float_round_up, f_fcvt_s_vi) 9906 TRANS(FCVTPU_vi, do_gvec_op2_fpst, 9907 a->esz, a->q, a->rd, a->rn, float_round_up, f_fcvt_u_vi) 9908 TRANS(FCVTMS_vi, do_gvec_op2_fpst, 9909 a->esz, a->q, a->rd, a->rn, float_round_down, f_fcvt_s_vi) 9910 TRANS(FCVTMU_vi, do_gvec_op2_fpst, 9911 a->esz, a->q, a->rd, a->rn, float_round_down, f_fcvt_u_vi) 9912 TRANS(FCVTZS_vi, do_gvec_op2_fpst, 9913 a->esz, a->q, a->rd, a->rn, float_round_to_zero, f_fcvt_s_vi) 9914 TRANS(FCVTZU_vi, do_gvec_op2_fpst, 9915 a->esz, a->q, a->rd, a->rn, float_round_to_zero, f_fcvt_u_vi) 9916 TRANS(FCVTAS_vi, do_gvec_op2_fpst, 9917 a->esz, a->q, a->rd, a->rn, float_round_ties_away, f_fcvt_s_vi) 9918 TRANS(FCVTAU_vi, do_gvec_op2_fpst, 9919 a->esz, a->q, a->rd, a->rn, float_round_ties_away, f_fcvt_u_vi) 9920 9921 static gen_helper_gvec_2_ptr * const f_fceq0[] = { 9922 gen_helper_gvec_fceq0_h, 9923 gen_helper_gvec_fceq0_s, 9924 gen_helper_gvec_fceq0_d, 9925 }; 9926 TRANS(FCMEQ0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fceq0) 9927 9928 static gen_helper_gvec_2_ptr * const f_fcgt0[] = { 9929 gen_helper_gvec_fcgt0_h, 9930 gen_helper_gvec_fcgt0_s, 9931 gen_helper_gvec_fcgt0_d, 9932 }; 9933 TRANS(FCMGT0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcgt0) 9934 9935 static gen_helper_gvec_2_ptr * const f_fcge0[] = { 9936 gen_helper_gvec_fcge0_h, 9937 gen_helper_gvec_fcge0_s, 9938 gen_helper_gvec_fcge0_d, 9939 }; 9940 TRANS(FCMGE0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcge0) 9941 9942 static gen_helper_gvec_2_ptr * const f_fclt0[] = { 9943 gen_helper_gvec_fclt0_h, 9944 gen_helper_gvec_fclt0_s, 9945 gen_helper_gvec_fclt0_d, 9946 }; 9947 TRANS(FCMLT0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fclt0) 9948 9949 static gen_helper_gvec_2_ptr * const f_fcle0[] = { 9950 gen_helper_gvec_fcle0_h, 9951 gen_helper_gvec_fcle0_s, 9952 gen_helper_gvec_fcle0_d, 9953 }; 9954 TRANS(FCMLE0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcle0) 9955 9956 static gen_helper_gvec_2_ptr * const f_frecpe[] = { 9957 gen_helper_gvec_frecpe_h, 9958 gen_helper_gvec_frecpe_s, 9959 gen_helper_gvec_frecpe_d, 9960 }; 9961 static gen_helper_gvec_2_ptr * const f_frecpe_rpres[] = { 9962 gen_helper_gvec_frecpe_h, 9963 gen_helper_gvec_frecpe_rpres_s, 9964 gen_helper_gvec_frecpe_d, 9965 }; 9966 TRANS(FRECPE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, 9967 s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frecpe_rpres : f_frecpe) 9968 9969 static gen_helper_gvec_2_ptr * const f_frsqrte[] = { 9970 gen_helper_gvec_frsqrte_h, 9971 gen_helper_gvec_frsqrte_s, 9972 gen_helper_gvec_frsqrte_d, 9973 }; 9974 static gen_helper_gvec_2_ptr * const f_frsqrte_rpres[] = { 9975 gen_helper_gvec_frsqrte_h, 9976 gen_helper_gvec_frsqrte_rpres_s, 9977 gen_helper_gvec_frsqrte_d, 9978 }; 9979 TRANS(FRSQRTE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, 9980 s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frsqrte_rpres : f_frsqrte) 9981 9982 static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a) 9983 { 9984 /* Handle 2-reg-misc ops which are widening (so each size element 9985 * in the source becomes a 2*size element in the destination. 9986 * The only instruction like this is FCVTL. 9987 */ 9988 int pass; 9989 TCGv_ptr fpst; 9990 9991 if (!fp_access_check(s)) { 9992 return true; 9993 } 9994 9995 if (a->esz == MO_64) { 9996 /* 32 -> 64 bit fp conversion */ 9997 TCGv_i64 tcg_res[2]; 9998 TCGv_i32 tcg_op = tcg_temp_new_i32(); 9999 int srcelt = a->q ? 2 : 0; 10000 10001 fpst = fpstatus_ptr(FPST_A64); 10002 10003 for (pass = 0; pass < 2; pass++) { 10004 tcg_res[pass] = tcg_temp_new_i64(); 10005 read_vec_element_i32(s, tcg_op, a->rn, srcelt + pass, MO_32); 10006 gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, fpst); 10007 } 10008 for (pass = 0; pass < 2; pass++) { 10009 write_vec_element(s, tcg_res[pass], a->rd, pass, MO_64); 10010 } 10011 } else { 10012 /* 16 -> 32 bit fp conversion */ 10013 int srcelt = a->q ? 4 : 0; 10014 TCGv_i32 tcg_res[4]; 10015 TCGv_i32 ahp = get_ahp_flag(); 10016 10017 fpst = fpstatus_ptr(FPST_A64_F16); 10018 10019 for (pass = 0; pass < 4; pass++) { 10020 tcg_res[pass] = tcg_temp_new_i32(); 10021 read_vec_element_i32(s, tcg_res[pass], a->rn, srcelt + pass, MO_16); 10022 gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass], 10023 fpst, ahp); 10024 } 10025 for (pass = 0; pass < 4; pass++) { 10026 write_vec_element_i32(s, tcg_res[pass], a->rd, pass, MO_32); 10027 } 10028 } 10029 clear_vec_high(s, true, a->rd); 10030 return true; 10031 } 10032 10033 static bool trans_OK(DisasContext *s, arg_OK *a) 10034 { 10035 return true; 10036 } 10037 10038 static bool trans_FAIL(DisasContext *s, arg_OK *a) 10039 { 10040 s->is_nonstreaming = true; 10041 return true; 10042 } 10043 10044 /** 10045 * btype_destination_ok: 10046 * @insn: The instruction at the branch destination 10047 * @bt: SCTLR_ELx.BT 10048 * @btype: PSTATE.BTYPE, and is non-zero 10049 * 10050 * On a guarded page, there are a limited number of insns 10051 * that may be present at the branch target: 10052 * - branch target identifiers, 10053 * - paciasp, pacibsp, 10054 * - BRK insn 10055 * - HLT insn 10056 * Anything else causes a Branch Target Exception. 10057 * 10058 * Return true if the branch is compatible, false to raise BTITRAP. 10059 */ 10060 static bool btype_destination_ok(uint32_t insn, bool bt, int btype) 10061 { 10062 if ((insn & 0xfffff01fu) == 0xd503201fu) { 10063 /* HINT space */ 10064 switch (extract32(insn, 5, 7)) { 10065 case 0b011001: /* PACIASP */ 10066 case 0b011011: /* PACIBSP */ 10067 /* 10068 * If SCTLR_ELx.BT, then PACI*SP are not compatible 10069 * with btype == 3. Otherwise all btype are ok. 10070 */ 10071 return !bt || btype != 3; 10072 case 0b100000: /* BTI */ 10073 /* Not compatible with any btype. */ 10074 return false; 10075 case 0b100010: /* BTI c */ 10076 /* Not compatible with btype == 3 */ 10077 return btype != 3; 10078 case 0b100100: /* BTI j */ 10079 /* Not compatible with btype == 2 */ 10080 return btype != 2; 10081 case 0b100110: /* BTI jc */ 10082 /* Compatible with any btype. */ 10083 return true; 10084 } 10085 } else { 10086 switch (insn & 0xffe0001fu) { 10087 case 0xd4200000u: /* BRK */ 10088 case 0xd4400000u: /* HLT */ 10089 /* Give priority to the breakpoint exception. */ 10090 return true; 10091 } 10092 } 10093 return false; 10094 } 10095 10096 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, 10097 CPUState *cpu) 10098 { 10099 DisasContext *dc = container_of(dcbase, DisasContext, base); 10100 CPUARMState *env = cpu_env(cpu); 10101 ARMCPU *arm_cpu = env_archcpu(env); 10102 CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb); 10103 int bound, core_mmu_idx; 10104 10105 dc->isar = &arm_cpu->isar; 10106 dc->condjmp = 0; 10107 dc->pc_save = dc->base.pc_first; 10108 dc->aarch64 = true; 10109 dc->thumb = false; 10110 dc->sctlr_b = 0; 10111 dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE; 10112 dc->condexec_mask = 0; 10113 dc->condexec_cond = 0; 10114 core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX); 10115 dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx); 10116 dc->tbii = EX_TBFLAG_A64(tb_flags, TBII); 10117 dc->tbid = EX_TBFLAG_A64(tb_flags, TBID); 10118 dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA); 10119 dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx); 10120 #if !defined(CONFIG_USER_ONLY) 10121 dc->user = (dc->current_el == 0); 10122 #endif 10123 dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL); 10124 dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM); 10125 dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL); 10126 dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE); 10127 dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC); 10128 dc->trap_eret = EX_TBFLAG_A64(tb_flags, TRAP_ERET); 10129 dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL); 10130 dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL); 10131 dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16; 10132 dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16; 10133 dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE); 10134 dc->bt = EX_TBFLAG_A64(tb_flags, BT); 10135 dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE); 10136 dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV); 10137 dc->ata[0] = EX_TBFLAG_A64(tb_flags, ATA); 10138 dc->ata[1] = EX_TBFLAG_A64(tb_flags, ATA0); 10139 dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE); 10140 dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE); 10141 dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM); 10142 dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA); 10143 dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING); 10144 dc->naa = EX_TBFLAG_A64(tb_flags, NAA); 10145 dc->nv = EX_TBFLAG_A64(tb_flags, NV); 10146 dc->nv1 = EX_TBFLAG_A64(tb_flags, NV1); 10147 dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2); 10148 dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20); 10149 dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE); 10150 dc->fpcr_ah = EX_TBFLAG_A64(tb_flags, AH); 10151 dc->fpcr_nep = EX_TBFLAG_A64(tb_flags, NEP); 10152 dc->vec_len = 0; 10153 dc->vec_stride = 0; 10154 dc->cp_regs = arm_cpu->cp_regs; 10155 dc->features = env->features; 10156 dc->dcz_blocksize = arm_cpu->dcz_blocksize; 10157 dc->gm_blocksize = arm_cpu->gm_blocksize; 10158 10159 #ifdef CONFIG_USER_ONLY 10160 /* In sve_probe_page, we assume TBI is enabled. */ 10161 tcg_debug_assert(dc->tbid & 1); 10162 #endif 10163 10164 dc->lse2 = dc_isar_feature(aa64_lse2, dc); 10165 10166 /* Single step state. The code-generation logic here is: 10167 * SS_ACTIVE == 0: 10168 * generate code with no special handling for single-stepping (except 10169 * that anything that can make us go to SS_ACTIVE == 1 must end the TB; 10170 * this happens anyway because those changes are all system register or 10171 * PSTATE writes). 10172 * SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending) 10173 * emit code for one insn 10174 * emit code to clear PSTATE.SS 10175 * emit code to generate software step exception for completed step 10176 * end TB (as usual for having generated an exception) 10177 * SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending) 10178 * emit code to generate a software step exception 10179 * end the TB 10180 */ 10181 dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE); 10182 dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS); 10183 dc->is_ldex = false; 10184 10185 /* Bound the number of insns to execute to those left on the page. */ 10186 bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4; 10187 10188 /* If architectural single step active, limit to 1. */ 10189 if (dc->ss_active) { 10190 bound = 1; 10191 } 10192 dc->base.max_insns = MIN(dc->base.max_insns, bound); 10193 } 10194 10195 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu) 10196 { 10197 } 10198 10199 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) 10200 { 10201 DisasContext *dc = container_of(dcbase, DisasContext, base); 10202 target_ulong pc_arg = dc->base.pc_next; 10203 10204 if (tb_cflags(dcbase->tb) & CF_PCREL) { 10205 pc_arg &= ~TARGET_PAGE_MASK; 10206 } 10207 tcg_gen_insn_start(pc_arg, 0, 0); 10208 dc->insn_start_updated = false; 10209 } 10210 10211 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) 10212 { 10213 DisasContext *s = container_of(dcbase, DisasContext, base); 10214 CPUARMState *env = cpu_env(cpu); 10215 uint64_t pc = s->base.pc_next; 10216 uint32_t insn; 10217 10218 /* Singlestep exceptions have the highest priority. */ 10219 if (s->ss_active && !s->pstate_ss) { 10220 /* Singlestep state is Active-pending. 10221 * If we're in this state at the start of a TB then either 10222 * a) we just took an exception to an EL which is being debugged 10223 * and this is the first insn in the exception handler 10224 * b) debug exceptions were masked and we just unmasked them 10225 * without changing EL (eg by clearing PSTATE.D) 10226 * In either case we're going to take a swstep exception in the 10227 * "did not step an insn" case, and so the syndrome ISV and EX 10228 * bits should be zero. 10229 */ 10230 assert(s->base.num_insns == 1); 10231 gen_swstep_exception(s, 0, 0); 10232 s->base.is_jmp = DISAS_NORETURN; 10233 s->base.pc_next = pc + 4; 10234 return; 10235 } 10236 10237 if (pc & 3) { 10238 /* 10239 * PC alignment fault. This has priority over the instruction abort 10240 * that we would receive from a translation fault via arm_ldl_code. 10241 * This should only be possible after an indirect branch, at the 10242 * start of the TB. 10243 */ 10244 assert(s->base.num_insns == 1); 10245 gen_helper_exception_pc_alignment(tcg_env, tcg_constant_vaddr(pc)); 10246 s->base.is_jmp = DISAS_NORETURN; 10247 s->base.pc_next = QEMU_ALIGN_UP(pc, 4); 10248 return; 10249 } 10250 10251 s->pc_curr = pc; 10252 insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b); 10253 s->insn = insn; 10254 s->base.pc_next = pc + 4; 10255 10256 s->fp_access_checked = 0; 10257 s->sve_access_checked = 0; 10258 10259 if (s->pstate_il) { 10260 /* 10261 * Illegal execution state. This has priority over BTI 10262 * exceptions, but comes after instruction abort exceptions. 10263 */ 10264 gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate()); 10265 return; 10266 } 10267 10268 if (dc_isar_feature(aa64_bti, s)) { 10269 if (s->base.num_insns == 1) { 10270 /* First insn can have btype set to non-zero. */ 10271 tcg_debug_assert(s->btype >= 0); 10272 10273 /* 10274 * Note that the Branch Target Exception has fairly high 10275 * priority -- below debugging exceptions but above most 10276 * everything else. This allows us to handle this now 10277 * instead of waiting until the insn is otherwise decoded. 10278 * 10279 * We can check all but the guarded page check here; 10280 * defer the latter to a helper. 10281 */ 10282 if (s->btype != 0 10283 && !btype_destination_ok(insn, s->bt, s->btype)) { 10284 gen_helper_guarded_page_check(tcg_env); 10285 } 10286 } else { 10287 /* Not the first insn: btype must be 0. */ 10288 tcg_debug_assert(s->btype == 0); 10289 } 10290 } 10291 10292 s->is_nonstreaming = false; 10293 if (s->sme_trap_nonstreaming) { 10294 disas_sme_fa64(s, insn); 10295 } 10296 10297 if (!disas_a64(s, insn) && 10298 !disas_sme(s, insn) && 10299 !disas_sve(s, insn)) { 10300 unallocated_encoding(s); 10301 } 10302 10303 /* 10304 * After execution of most insns, btype is reset to 0. 10305 * Note that we set btype == -1 when the insn sets btype. 10306 */ 10307 if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) { 10308 reset_btype(s); 10309 } 10310 } 10311 10312 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) 10313 { 10314 DisasContext *dc = container_of(dcbase, DisasContext, base); 10315 10316 if (unlikely(dc->ss_active)) { 10317 /* Note that this means single stepping WFI doesn't halt the CPU. 10318 * For conditional branch insns this is harmless unreachable code as 10319 * gen_goto_tb() has already handled emitting the debug exception 10320 * (and thus a tb-jump is not possible when singlestepping). 10321 */ 10322 switch (dc->base.is_jmp) { 10323 default: 10324 gen_a64_update_pc(dc, 4); 10325 /* fall through */ 10326 case DISAS_EXIT: 10327 case DISAS_JUMP: 10328 gen_step_complete_exception(dc); 10329 break; 10330 case DISAS_NORETURN: 10331 break; 10332 } 10333 } else { 10334 switch (dc->base.is_jmp) { 10335 case DISAS_NEXT: 10336 case DISAS_TOO_MANY: 10337 gen_goto_tb(dc, 1, 4); 10338 break; 10339 default: 10340 case DISAS_UPDATE_EXIT: 10341 gen_a64_update_pc(dc, 4); 10342 /* fall through */ 10343 case DISAS_EXIT: 10344 tcg_gen_exit_tb(NULL, 0); 10345 break; 10346 case DISAS_UPDATE_NOCHAIN: 10347 gen_a64_update_pc(dc, 4); 10348 /* fall through */ 10349 case DISAS_JUMP: 10350 tcg_gen_lookup_and_goto_ptr(); 10351 break; 10352 case DISAS_NORETURN: 10353 case DISAS_SWI: 10354 break; 10355 case DISAS_WFE: 10356 gen_a64_update_pc(dc, 4); 10357 gen_helper_wfe(tcg_env); 10358 break; 10359 case DISAS_YIELD: 10360 gen_a64_update_pc(dc, 4); 10361 gen_helper_yield(tcg_env); 10362 break; 10363 case DISAS_WFI: 10364 /* 10365 * This is a special case because we don't want to just halt 10366 * the CPU if trying to debug across a WFI. 10367 */ 10368 gen_a64_update_pc(dc, 4); 10369 gen_helper_wfi(tcg_env, tcg_constant_i32(4)); 10370 /* 10371 * The helper doesn't necessarily throw an exception, but we 10372 * must go back to the main loop to check for interrupts anyway. 10373 */ 10374 tcg_gen_exit_tb(NULL, 0); 10375 break; 10376 } 10377 } 10378 } 10379 10380 const TranslatorOps aarch64_translator_ops = { 10381 .init_disas_context = aarch64_tr_init_disas_context, 10382 .tb_start = aarch64_tr_tb_start, 10383 .insn_start = aarch64_tr_insn_start, 10384 .translate_insn = aarch64_tr_translate_insn, 10385 .tb_stop = aarch64_tr_tb_stop, 10386 }; 10387