1 /* 2 * AArch64 translation 3 * 4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de> 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 #include "qemu/osdep.h" 20 #include "exec/exec-all.h" 21 #include "exec/target_page.h" 22 #include "translate.h" 23 #include "translate-a64.h" 24 #include "qemu/log.h" 25 #include "arm_ldst.h" 26 #include "semihosting/semihost.h" 27 #include "cpregs.h" 28 29 static TCGv_i64 cpu_X[32]; 30 static TCGv_i64 cpu_pc; 31 32 /* Load/store exclusive handling */ 33 static TCGv_i64 cpu_exclusive_high; 34 35 static const char *regnames[] = { 36 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 37 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 38 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 39 "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp" 40 }; 41 42 enum a64_shift_type { 43 A64_SHIFT_TYPE_LSL = 0, 44 A64_SHIFT_TYPE_LSR = 1, 45 A64_SHIFT_TYPE_ASR = 2, 46 A64_SHIFT_TYPE_ROR = 3 47 }; 48 49 /* 50 * Helpers for extracting complex instruction fields 51 */ 52 53 /* 54 * For load/store with an unsigned 12 bit immediate scaled by the element 55 * size. The input has the immediate field in bits [14:3] and the element 56 * size in [2:0]. 57 */ 58 static int uimm_scaled(DisasContext *s, int x) 59 { 60 unsigned imm = x >> 3; 61 unsigned scale = extract32(x, 0, 3); 62 return imm << scale; 63 } 64 65 /* For load/store memory tags: scale offset by LOG2_TAG_GRANULE */ 66 static int scale_by_log2_tag_granule(DisasContext *s, int x) 67 { 68 return x << LOG2_TAG_GRANULE; 69 } 70 71 /* 72 * Include the generated decoders. 73 */ 74 75 #include "decode-sme-fa64.c.inc" 76 #include "decode-a64.c.inc" 77 78 /* initialize TCG globals. */ 79 void a64_translate_init(void) 80 { 81 int i; 82 83 cpu_pc = tcg_global_mem_new_i64(tcg_env, 84 offsetof(CPUARMState, pc), 85 "pc"); 86 for (i = 0; i < 32; i++) { 87 cpu_X[i] = tcg_global_mem_new_i64(tcg_env, 88 offsetof(CPUARMState, xregs[i]), 89 regnames[i]); 90 } 91 92 cpu_exclusive_high = tcg_global_mem_new_i64(tcg_env, 93 offsetof(CPUARMState, exclusive_high), "exclusive_high"); 94 } 95 96 /* 97 * Return the core mmu_idx to use for A64 load/store insns which 98 * have a "unprivileged load/store" variant. Those insns access 99 * EL0 if executed from an EL which has control over EL0 (usually 100 * EL1) but behave like normal loads and stores if executed from 101 * elsewhere (eg EL3). 102 * 103 * @unpriv : true for the unprivileged encoding; false for the 104 * normal encoding (in which case we will return the same 105 * thing as get_mem_index(). 106 */ 107 static int get_a64_user_mem_index(DisasContext *s, bool unpriv) 108 { 109 /* 110 * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL, 111 * which is the usual mmu_idx for this cpu state. 112 */ 113 ARMMMUIdx useridx = s->mmu_idx; 114 115 if (unpriv && s->unpriv) { 116 /* 117 * We have pre-computed the condition for AccType_UNPRIV. 118 * Therefore we should never get here with a mmu_idx for 119 * which we do not know the corresponding user mmu_idx. 120 */ 121 switch (useridx) { 122 case ARMMMUIdx_E10_1: 123 case ARMMMUIdx_E10_1_PAN: 124 useridx = ARMMMUIdx_E10_0; 125 break; 126 case ARMMMUIdx_E20_2: 127 case ARMMMUIdx_E20_2_PAN: 128 useridx = ARMMMUIdx_E20_0; 129 break; 130 default: 131 g_assert_not_reached(); 132 } 133 } 134 return arm_to_core_mmu_idx(useridx); 135 } 136 137 static void set_btype_raw(int val) 138 { 139 tcg_gen_st_i32(tcg_constant_i32(val), tcg_env, 140 offsetof(CPUARMState, btype)); 141 } 142 143 static void set_btype(DisasContext *s, int val) 144 { 145 /* BTYPE is a 2-bit field, and 0 should be done with reset_btype. */ 146 tcg_debug_assert(val >= 1 && val <= 3); 147 set_btype_raw(val); 148 s->btype = -1; 149 } 150 151 static void reset_btype(DisasContext *s) 152 { 153 if (s->btype != 0) { 154 set_btype_raw(0); 155 s->btype = 0; 156 } 157 } 158 159 static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff) 160 { 161 assert(s->pc_save != -1); 162 if (tb_cflags(s->base.tb) & CF_PCREL) { 163 tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff); 164 } else { 165 tcg_gen_movi_i64(dest, s->pc_curr + diff); 166 } 167 } 168 169 void gen_a64_update_pc(DisasContext *s, target_long diff) 170 { 171 gen_pc_plus_diff(s, cpu_pc, diff); 172 s->pc_save = s->pc_curr + diff; 173 } 174 175 /* 176 * Handle Top Byte Ignore (TBI) bits. 177 * 178 * If address tagging is enabled via the TCR TBI bits: 179 * + for EL2 and EL3 there is only one TBI bit, and if it is set 180 * then the address is zero-extended, clearing bits [63:56] 181 * + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0 182 * and TBI1 controls addresses with bit 55 == 1. 183 * If the appropriate TBI bit is set for the address then 184 * the address is sign-extended from bit 55 into bits [63:56] 185 * 186 * Here We have concatenated TBI{1,0} into tbi. 187 */ 188 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst, 189 TCGv_i64 src, int tbi) 190 { 191 if (tbi == 0) { 192 /* Load unmodified address */ 193 tcg_gen_mov_i64(dst, src); 194 } else if (!regime_has_2_ranges(s->mmu_idx)) { 195 /* Force tag byte to all zero */ 196 tcg_gen_extract_i64(dst, src, 0, 56); 197 } else { 198 /* Sign-extend from bit 55. */ 199 tcg_gen_sextract_i64(dst, src, 0, 56); 200 201 switch (tbi) { 202 case 1: 203 /* tbi0 but !tbi1: only use the extension if positive */ 204 tcg_gen_and_i64(dst, dst, src); 205 break; 206 case 2: 207 /* !tbi0 but tbi1: only use the extension if negative */ 208 tcg_gen_or_i64(dst, dst, src); 209 break; 210 case 3: 211 /* tbi0 and tbi1: always use the extension */ 212 break; 213 default: 214 g_assert_not_reached(); 215 } 216 } 217 } 218 219 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src) 220 { 221 /* 222 * If address tagging is enabled for instructions via the TCR TBI bits, 223 * then loading an address into the PC will clear out any tag. 224 */ 225 gen_top_byte_ignore(s, cpu_pc, src, s->tbii); 226 s->pc_save = -1; 227 } 228 229 /* 230 * Handle MTE and/or TBI. 231 * 232 * For TBI, ideally, we would do nothing. Proper behaviour on fault is 233 * for the tag to be present in the FAR_ELx register. But for user-only 234 * mode we do not have a TLB with which to implement this, so we must 235 * remove the top byte now. 236 * 237 * Always return a fresh temporary that we can increment independently 238 * of the write-back address. 239 */ 240 241 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr) 242 { 243 TCGv_i64 clean = tcg_temp_new_i64(); 244 #ifdef CONFIG_USER_ONLY 245 gen_top_byte_ignore(s, clean, addr, s->tbid); 246 #else 247 tcg_gen_mov_i64(clean, addr); 248 #endif 249 return clean; 250 } 251 252 /* Insert a zero tag into src, with the result at dst. */ 253 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src) 254 { 255 tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4)); 256 } 257 258 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr, 259 MMUAccessType acc, int log2_size) 260 { 261 gen_helper_probe_access(tcg_env, ptr, 262 tcg_constant_i32(acc), 263 tcg_constant_i32(get_mem_index(s)), 264 tcg_constant_i32(1 << log2_size)); 265 } 266 267 /* 268 * For MTE, check a single logical or atomic access. This probes a single 269 * address, the exact one specified. The size and alignment of the access 270 * is not relevant to MTE, per se, but watchpoints do require the size, 271 * and we want to recognize those before making any other changes to state. 272 */ 273 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr, 274 bool is_write, bool tag_checked, 275 MemOp memop, bool is_unpriv, 276 int core_idx) 277 { 278 if (tag_checked && s->mte_active[is_unpriv]) { 279 TCGv_i64 ret; 280 int desc = 0; 281 282 desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx); 283 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 284 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 285 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 286 desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(memop)); 287 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, memop_size(memop) - 1); 288 289 ret = tcg_temp_new_i64(); 290 gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr); 291 292 return ret; 293 } 294 return clean_data_tbi(s, addr); 295 } 296 297 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, 298 bool tag_checked, MemOp memop) 299 { 300 return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, memop, 301 false, get_mem_index(s)); 302 } 303 304 /* 305 * For MTE, check multiple logical sequential accesses. 306 */ 307 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, 308 bool tag_checked, int total_size, MemOp single_mop) 309 { 310 if (tag_checked && s->mte_active[0]) { 311 TCGv_i64 ret; 312 int desc = 0; 313 314 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 315 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 316 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 317 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 318 desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(single_mop)); 319 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, total_size - 1); 320 321 ret = tcg_temp_new_i64(); 322 gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr); 323 324 return ret; 325 } 326 return clean_data_tbi(s, addr); 327 } 328 329 /* 330 * Generate the special alignment check that applies to AccType_ATOMIC 331 * and AccType_ORDERED insns under FEAT_LSE2: the access need not be 332 * naturally aligned, but it must not cross a 16-byte boundary. 333 * See AArch64.CheckAlignment(). 334 */ 335 static void check_lse2_align(DisasContext *s, int rn, int imm, 336 bool is_write, MemOp mop) 337 { 338 TCGv_i32 tmp; 339 TCGv_i64 addr; 340 TCGLabel *over_label; 341 MMUAccessType type; 342 int mmu_idx; 343 344 tmp = tcg_temp_new_i32(); 345 tcg_gen_extrl_i64_i32(tmp, cpu_reg_sp(s, rn)); 346 tcg_gen_addi_i32(tmp, tmp, imm & 15); 347 tcg_gen_andi_i32(tmp, tmp, 15); 348 tcg_gen_addi_i32(tmp, tmp, memop_size(mop)); 349 350 over_label = gen_new_label(); 351 tcg_gen_brcondi_i32(TCG_COND_LEU, tmp, 16, over_label); 352 353 addr = tcg_temp_new_i64(); 354 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm); 355 356 type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD, 357 mmu_idx = get_mem_index(s); 358 gen_helper_unaligned_access(tcg_env, addr, tcg_constant_i32(type), 359 tcg_constant_i32(mmu_idx)); 360 361 gen_set_label(over_label); 362 363 } 364 365 /* Handle the alignment check for AccType_ATOMIC instructions. */ 366 static MemOp check_atomic_align(DisasContext *s, int rn, MemOp mop) 367 { 368 MemOp size = mop & MO_SIZE; 369 370 if (size == MO_8) { 371 return mop; 372 } 373 374 /* 375 * If size == MO_128, this is a LDXP, and the operation is single-copy 376 * atomic for each doubleword, not the entire quadword; it still must 377 * be quadword aligned. 378 */ 379 if (size == MO_128) { 380 return finalize_memop_atom(s, MO_128 | MO_ALIGN, 381 MO_ATOM_IFALIGN_PAIR); 382 } 383 if (dc_isar_feature(aa64_lse2, s)) { 384 check_lse2_align(s, rn, 0, true, mop); 385 } else { 386 mop |= MO_ALIGN; 387 } 388 return finalize_memop(s, mop); 389 } 390 391 /* Handle the alignment check for AccType_ORDERED instructions. */ 392 static MemOp check_ordered_align(DisasContext *s, int rn, int imm, 393 bool is_write, MemOp mop) 394 { 395 MemOp size = mop & MO_SIZE; 396 397 if (size == MO_8) { 398 return mop; 399 } 400 if (size == MO_128) { 401 return finalize_memop_atom(s, MO_128 | MO_ALIGN, 402 MO_ATOM_IFALIGN_PAIR); 403 } 404 if (!dc_isar_feature(aa64_lse2, s)) { 405 mop |= MO_ALIGN; 406 } else if (!s->naa) { 407 check_lse2_align(s, rn, imm, is_write, mop); 408 } 409 return finalize_memop(s, mop); 410 } 411 412 typedef struct DisasCompare64 { 413 TCGCond cond; 414 TCGv_i64 value; 415 } DisasCompare64; 416 417 static void a64_test_cc(DisasCompare64 *c64, int cc) 418 { 419 DisasCompare c32; 420 421 arm_test_cc(&c32, cc); 422 423 /* 424 * Sign-extend the 32-bit value so that the GE/LT comparisons work 425 * properly. The NE/EQ comparisons are also fine with this choice. 426 */ 427 c64->cond = c32.cond; 428 c64->value = tcg_temp_new_i64(); 429 tcg_gen_ext_i32_i64(c64->value, c32.value); 430 } 431 432 static void gen_rebuild_hflags(DisasContext *s) 433 { 434 gen_helper_rebuild_hflags_a64(tcg_env, tcg_constant_i32(s->current_el)); 435 } 436 437 static void gen_exception_internal(int excp) 438 { 439 assert(excp_is_internal(excp)); 440 gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp)); 441 } 442 443 static void gen_exception_internal_insn(DisasContext *s, int excp) 444 { 445 gen_a64_update_pc(s, 0); 446 gen_exception_internal(excp); 447 s->base.is_jmp = DISAS_NORETURN; 448 } 449 450 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome) 451 { 452 gen_a64_update_pc(s, 0); 453 gen_helper_exception_bkpt_insn(tcg_env, tcg_constant_i32(syndrome)); 454 s->base.is_jmp = DISAS_NORETURN; 455 } 456 457 static void gen_step_complete_exception(DisasContext *s) 458 { 459 /* We just completed step of an insn. Move from Active-not-pending 460 * to Active-pending, and then also take the swstep exception. 461 * This corresponds to making the (IMPDEF) choice to prioritize 462 * swstep exceptions over asynchronous exceptions taken to an exception 463 * level where debug is disabled. This choice has the advantage that 464 * we do not need to maintain internal state corresponding to the 465 * ISV/EX syndrome bits between completion of the step and generation 466 * of the exception, and our syndrome information is always correct. 467 */ 468 gen_ss_advance(s); 469 gen_swstep_exception(s, 1, s->is_ldex); 470 s->base.is_jmp = DISAS_NORETURN; 471 } 472 473 static inline bool use_goto_tb(DisasContext *s, uint64_t dest) 474 { 475 if (s->ss_active) { 476 return false; 477 } 478 return translator_use_goto_tb(&s->base, dest); 479 } 480 481 static void gen_goto_tb(DisasContext *s, int n, int64_t diff) 482 { 483 if (use_goto_tb(s, s->pc_curr + diff)) { 484 /* 485 * For pcrel, the pc must always be up-to-date on entry to 486 * the linked TB, so that it can use simple additions for all 487 * further adjustments. For !pcrel, the linked TB is compiled 488 * to know its full virtual address, so we can delay the 489 * update to pc to the unlinked path. A long chain of links 490 * can thus avoid many updates to the PC. 491 */ 492 if (tb_cflags(s->base.tb) & CF_PCREL) { 493 gen_a64_update_pc(s, diff); 494 tcg_gen_goto_tb(n); 495 } else { 496 tcg_gen_goto_tb(n); 497 gen_a64_update_pc(s, diff); 498 } 499 tcg_gen_exit_tb(s->base.tb, n); 500 s->base.is_jmp = DISAS_NORETURN; 501 } else { 502 gen_a64_update_pc(s, diff); 503 if (s->ss_active) { 504 gen_step_complete_exception(s); 505 } else { 506 tcg_gen_lookup_and_goto_ptr(); 507 s->base.is_jmp = DISAS_NORETURN; 508 } 509 } 510 } 511 512 /* 513 * Register access functions 514 * 515 * These functions are used for directly accessing a register in where 516 * changes to the final register value are likely to be made. If you 517 * need to use a register for temporary calculation (e.g. index type 518 * operations) use the read_* form. 519 * 520 * B1.2.1 Register mappings 521 * 522 * In instruction register encoding 31 can refer to ZR (zero register) or 523 * the SP (stack pointer) depending on context. In QEMU's case we map SP 524 * to cpu_X[31] and ZR accesses to a temporary which can be discarded. 525 * This is the point of the _sp forms. 526 */ 527 TCGv_i64 cpu_reg(DisasContext *s, int reg) 528 { 529 if (reg == 31) { 530 TCGv_i64 t = tcg_temp_new_i64(); 531 tcg_gen_movi_i64(t, 0); 532 return t; 533 } else { 534 return cpu_X[reg]; 535 } 536 } 537 538 /* register access for when 31 == SP */ 539 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg) 540 { 541 return cpu_X[reg]; 542 } 543 544 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64 545 * representing the register contents. This TCGv is an auto-freed 546 * temporary so it need not be explicitly freed, and may be modified. 547 */ 548 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf) 549 { 550 TCGv_i64 v = tcg_temp_new_i64(); 551 if (reg != 31) { 552 if (sf) { 553 tcg_gen_mov_i64(v, cpu_X[reg]); 554 } else { 555 tcg_gen_ext32u_i64(v, cpu_X[reg]); 556 } 557 } else { 558 tcg_gen_movi_i64(v, 0); 559 } 560 return v; 561 } 562 563 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf) 564 { 565 TCGv_i64 v = tcg_temp_new_i64(); 566 if (sf) { 567 tcg_gen_mov_i64(v, cpu_X[reg]); 568 } else { 569 tcg_gen_ext32u_i64(v, cpu_X[reg]); 570 } 571 return v; 572 } 573 574 /* Return the offset into CPUARMState of a slice (from 575 * the least significant end) of FP register Qn (ie 576 * Dn, Sn, Hn or Bn). 577 * (Note that this is not the same mapping as for A32; see cpu.h) 578 */ 579 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size) 580 { 581 return vec_reg_offset(s, regno, 0, size); 582 } 583 584 /* Offset of the high half of the 128 bit vector Qn */ 585 static inline int fp_reg_hi_offset(DisasContext *s, int regno) 586 { 587 return vec_reg_offset(s, regno, 1, MO_64); 588 } 589 590 /* Convenience accessors for reading and writing single and double 591 * FP registers. Writing clears the upper parts of the associated 592 * 128 bit vector register, as required by the architecture. 593 * Note that unlike the GP register accessors, the values returned 594 * by the read functions must be manually freed. 595 */ 596 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg) 597 { 598 TCGv_i64 v = tcg_temp_new_i64(); 599 600 tcg_gen_ld_i64(v, tcg_env, fp_reg_offset(s, reg, MO_64)); 601 return v; 602 } 603 604 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg) 605 { 606 TCGv_i32 v = tcg_temp_new_i32(); 607 608 tcg_gen_ld_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32)); 609 return v; 610 } 611 612 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg) 613 { 614 TCGv_i32 v = tcg_temp_new_i32(); 615 616 tcg_gen_ld16u_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16)); 617 return v; 618 } 619 620 static void clear_vec(DisasContext *s, int rd) 621 { 622 unsigned ofs = fp_reg_offset(s, rd, MO_64); 623 unsigned vsz = vec_full_reg_size(s); 624 625 tcg_gen_gvec_dup_imm(MO_64, ofs, vsz, vsz, 0); 626 } 627 628 /* 629 * Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64). 630 * If SVE is not enabled, then there are only 128 bits in the vector. 631 */ 632 static void clear_vec_high(DisasContext *s, bool is_q, int rd) 633 { 634 unsigned ofs = fp_reg_offset(s, rd, MO_64); 635 unsigned vsz = vec_full_reg_size(s); 636 637 /* Nop move, with side effect of clearing the tail. */ 638 tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz); 639 } 640 641 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v) 642 { 643 unsigned ofs = fp_reg_offset(s, reg, MO_64); 644 645 tcg_gen_st_i64(v, tcg_env, ofs); 646 clear_vec_high(s, false, reg); 647 } 648 649 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v) 650 { 651 TCGv_i64 tmp = tcg_temp_new_i64(); 652 653 tcg_gen_extu_i32_i64(tmp, v); 654 write_fp_dreg(s, reg, tmp); 655 } 656 657 /* 658 * Write a double result to 128 bit vector register reg, honouring FPCR.NEP: 659 * - if FPCR.NEP == 0, clear the high elements of reg 660 * - if FPCR.NEP == 1, set the high elements of reg from mergereg 661 * (i.e. merge the result with those high elements) 662 * In either case, SVE register bits above 128 are zeroed (per R_WKYLB). 663 */ 664 static void write_fp_dreg_merging(DisasContext *s, int reg, int mergereg, 665 TCGv_i64 v) 666 { 667 if (!s->fpcr_nep) { 668 write_fp_dreg(s, reg, v); 669 return; 670 } 671 672 /* 673 * Move from mergereg to reg; this sets the high elements and 674 * clears the bits above 128 as a side effect. 675 */ 676 tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg), 677 vec_full_reg_offset(s, mergereg), 678 16, vec_full_reg_size(s)); 679 tcg_gen_st_i64(v, tcg_env, vec_full_reg_offset(s, reg)); 680 } 681 682 /* 683 * Write a single-prec result, but only clear the higher elements 684 * of the destination register if FPCR.NEP is 0; otherwise preserve them. 685 */ 686 static void write_fp_sreg_merging(DisasContext *s, int reg, int mergereg, 687 TCGv_i32 v) 688 { 689 if (!s->fpcr_nep) { 690 write_fp_sreg(s, reg, v); 691 return; 692 } 693 694 tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg), 695 vec_full_reg_offset(s, mergereg), 696 16, vec_full_reg_size(s)); 697 tcg_gen_st_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32)); 698 } 699 700 /* 701 * Write a half-prec result, but only clear the higher elements 702 * of the destination register if FPCR.NEP is 0; otherwise preserve them. 703 * The caller must ensure that the top 16 bits of v are zero. 704 */ 705 static void write_fp_hreg_merging(DisasContext *s, int reg, int mergereg, 706 TCGv_i32 v) 707 { 708 if (!s->fpcr_nep) { 709 write_fp_sreg(s, reg, v); 710 return; 711 } 712 713 tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg), 714 vec_full_reg_offset(s, mergereg), 715 16, vec_full_reg_size(s)); 716 tcg_gen_st16_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16)); 717 } 718 719 /* Expand a 2-operand AdvSIMD vector operation using an expander function. */ 720 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn, 721 GVecGen2Fn *gvec_fn, int vece) 722 { 723 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 724 is_q ? 16 : 8, vec_full_reg_size(s)); 725 } 726 727 /* Expand a 2-operand + immediate AdvSIMD vector operation using 728 * an expander function. 729 */ 730 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn, 731 int64_t imm, GVecGen2iFn *gvec_fn, int vece) 732 { 733 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 734 imm, is_q ? 16 : 8, vec_full_reg_size(s)); 735 } 736 737 /* Expand a 3-operand AdvSIMD vector operation using an expander function. */ 738 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm, 739 GVecGen3Fn *gvec_fn, int vece) 740 { 741 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 742 vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s)); 743 } 744 745 /* Expand a 4-operand AdvSIMD vector operation using an expander function. */ 746 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm, 747 int rx, GVecGen4Fn *gvec_fn, int vece) 748 { 749 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 750 vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx), 751 is_q ? 16 : 8, vec_full_reg_size(s)); 752 } 753 754 /* Expand a 2-operand operation using an out-of-line helper. */ 755 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd, 756 int rn, int data, gen_helper_gvec_2 *fn) 757 { 758 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd), 759 vec_full_reg_offset(s, rn), 760 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 761 } 762 763 /* Expand a 3-operand operation using an out-of-line helper. */ 764 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd, 765 int rn, int rm, int data, gen_helper_gvec_3 *fn) 766 { 767 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 768 vec_full_reg_offset(s, rn), 769 vec_full_reg_offset(s, rm), 770 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 771 } 772 773 /* Expand a 3-operand + fpstatus pointer + simd data value operation using 774 * an out-of-line helper. 775 */ 776 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn, 777 int rm, ARMFPStatusFlavour fpsttype, int data, 778 gen_helper_gvec_3_ptr *fn) 779 { 780 TCGv_ptr fpst = fpstatus_ptr(fpsttype); 781 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 782 vec_full_reg_offset(s, rn), 783 vec_full_reg_offset(s, rm), fpst, 784 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 785 } 786 787 /* Expand a 4-operand operation using an out-of-line helper. */ 788 static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn, 789 int rm, int ra, int data, gen_helper_gvec_4 *fn) 790 { 791 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 792 vec_full_reg_offset(s, rn), 793 vec_full_reg_offset(s, rm), 794 vec_full_reg_offset(s, ra), 795 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 796 } 797 798 /* 799 * Expand a 4-operand operation using an out-of-line helper that takes 800 * a pointer to the CPU env. 801 */ 802 static void gen_gvec_op4_env(DisasContext *s, bool is_q, int rd, int rn, 803 int rm, int ra, int data, 804 gen_helper_gvec_4_ptr *fn) 805 { 806 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 807 vec_full_reg_offset(s, rn), 808 vec_full_reg_offset(s, rm), 809 vec_full_reg_offset(s, ra), 810 tcg_env, 811 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 812 } 813 814 /* 815 * Expand a 4-operand + fpstatus pointer + simd data value operation using 816 * an out-of-line helper. 817 */ 818 static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn, 819 int rm, int ra, ARMFPStatusFlavour fpsttype, 820 int data, 821 gen_helper_gvec_4_ptr *fn) 822 { 823 TCGv_ptr fpst = fpstatus_ptr(fpsttype); 824 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 825 vec_full_reg_offset(s, rn), 826 vec_full_reg_offset(s, rm), 827 vec_full_reg_offset(s, ra), fpst, 828 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 829 } 830 831 /* 832 * When FPCR.AH == 1, NEG and ABS do not flip the sign bit of a NaN. 833 * These functions implement 834 * d = floatN_is_any_nan(s) ? s : floatN_chs(s) 835 * which for float32 is 836 * d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s ^ (1 << 31)) 837 * and similarly for the other float sizes. 838 */ 839 static void gen_vfp_ah_negh(TCGv_i32 d, TCGv_i32 s) 840 { 841 TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32(); 842 843 gen_vfp_negh(chs_s, s); 844 gen_vfp_absh(abs_s, s); 845 tcg_gen_movcond_i32(TCG_COND_GTU, d, 846 abs_s, tcg_constant_i32(0x7c00), 847 s, chs_s); 848 } 849 850 static void gen_vfp_ah_negs(TCGv_i32 d, TCGv_i32 s) 851 { 852 TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32(); 853 854 gen_vfp_negs(chs_s, s); 855 gen_vfp_abss(abs_s, s); 856 tcg_gen_movcond_i32(TCG_COND_GTU, d, 857 abs_s, tcg_constant_i32(0x7f800000UL), 858 s, chs_s); 859 } 860 861 static void gen_vfp_ah_negd(TCGv_i64 d, TCGv_i64 s) 862 { 863 TCGv_i64 abs_s = tcg_temp_new_i64(), chs_s = tcg_temp_new_i64(); 864 865 gen_vfp_negd(chs_s, s); 866 gen_vfp_absd(abs_s, s); 867 tcg_gen_movcond_i64(TCG_COND_GTU, d, 868 abs_s, tcg_constant_i64(0x7ff0000000000000ULL), 869 s, chs_s); 870 } 871 872 /* 873 * These functions implement 874 * d = floatN_is_any_nan(s) ? s : floatN_abs(s) 875 * which for float32 is 876 * d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s & ~(1 << 31)) 877 * and similarly for the other float sizes. 878 */ 879 static void gen_vfp_ah_absh(TCGv_i32 d, TCGv_i32 s) 880 { 881 TCGv_i32 abs_s = tcg_temp_new_i32(); 882 883 gen_vfp_absh(abs_s, s); 884 tcg_gen_movcond_i32(TCG_COND_GTU, d, 885 abs_s, tcg_constant_i32(0x7c00), 886 s, abs_s); 887 } 888 889 static void gen_vfp_ah_abss(TCGv_i32 d, TCGv_i32 s) 890 { 891 TCGv_i32 abs_s = tcg_temp_new_i32(); 892 893 gen_vfp_abss(abs_s, s); 894 tcg_gen_movcond_i32(TCG_COND_GTU, d, 895 abs_s, tcg_constant_i32(0x7f800000UL), 896 s, abs_s); 897 } 898 899 static void gen_vfp_ah_absd(TCGv_i64 d, TCGv_i64 s) 900 { 901 TCGv_i64 abs_s = tcg_temp_new_i64(); 902 903 gen_vfp_absd(abs_s, s); 904 tcg_gen_movcond_i64(TCG_COND_GTU, d, 905 abs_s, tcg_constant_i64(0x7ff0000000000000ULL), 906 s, abs_s); 907 } 908 909 static void gen_vfp_maybe_ah_negh(DisasContext *dc, TCGv_i32 d, TCGv_i32 s) 910 { 911 if (dc->fpcr_ah) { 912 gen_vfp_ah_negh(d, s); 913 } else { 914 gen_vfp_negh(d, s); 915 } 916 } 917 918 static void gen_vfp_maybe_ah_negs(DisasContext *dc, TCGv_i32 d, TCGv_i32 s) 919 { 920 if (dc->fpcr_ah) { 921 gen_vfp_ah_negs(d, s); 922 } else { 923 gen_vfp_negs(d, s); 924 } 925 } 926 927 static void gen_vfp_maybe_ah_negd(DisasContext *dc, TCGv_i64 d, TCGv_i64 s) 928 { 929 if (dc->fpcr_ah) { 930 gen_vfp_ah_negd(d, s); 931 } else { 932 gen_vfp_negd(d, s); 933 } 934 } 935 936 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier 937 * than the 32 bit equivalent. 938 */ 939 static inline void gen_set_NZ64(TCGv_i64 result) 940 { 941 tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result); 942 tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF); 943 } 944 945 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */ 946 static inline void gen_logic_CC(int sf, TCGv_i64 result) 947 { 948 if (sf) { 949 gen_set_NZ64(result); 950 } else { 951 tcg_gen_extrl_i64_i32(cpu_ZF, result); 952 tcg_gen_mov_i32(cpu_NF, cpu_ZF); 953 } 954 tcg_gen_movi_i32(cpu_CF, 0); 955 tcg_gen_movi_i32(cpu_VF, 0); 956 } 957 958 /* dest = T0 + T1; compute C, N, V and Z flags */ 959 static void gen_add64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 960 { 961 TCGv_i64 result, flag, tmp; 962 result = tcg_temp_new_i64(); 963 flag = tcg_temp_new_i64(); 964 tmp = tcg_temp_new_i64(); 965 966 tcg_gen_movi_i64(tmp, 0); 967 tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp); 968 969 tcg_gen_extrl_i64_i32(cpu_CF, flag); 970 971 gen_set_NZ64(result); 972 973 tcg_gen_xor_i64(flag, result, t0); 974 tcg_gen_xor_i64(tmp, t0, t1); 975 tcg_gen_andc_i64(flag, flag, tmp); 976 tcg_gen_extrh_i64_i32(cpu_VF, flag); 977 978 tcg_gen_mov_i64(dest, result); 979 } 980 981 static void gen_add32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 982 { 983 TCGv_i32 t0_32 = tcg_temp_new_i32(); 984 TCGv_i32 t1_32 = tcg_temp_new_i32(); 985 TCGv_i32 tmp = tcg_temp_new_i32(); 986 987 tcg_gen_movi_i32(tmp, 0); 988 tcg_gen_extrl_i64_i32(t0_32, t0); 989 tcg_gen_extrl_i64_i32(t1_32, t1); 990 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp); 991 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 992 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 993 tcg_gen_xor_i32(tmp, t0_32, t1_32); 994 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); 995 tcg_gen_extu_i32_i64(dest, cpu_NF); 996 } 997 998 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 999 { 1000 if (sf) { 1001 gen_add64_CC(dest, t0, t1); 1002 } else { 1003 gen_add32_CC(dest, t0, t1); 1004 } 1005 } 1006 1007 /* dest = T0 - T1; compute C, N, V and Z flags */ 1008 static void gen_sub64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 1009 { 1010 /* 64 bit arithmetic */ 1011 TCGv_i64 result, flag, tmp; 1012 1013 result = tcg_temp_new_i64(); 1014 flag = tcg_temp_new_i64(); 1015 tcg_gen_sub_i64(result, t0, t1); 1016 1017 gen_set_NZ64(result); 1018 1019 tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1); 1020 tcg_gen_extrl_i64_i32(cpu_CF, flag); 1021 1022 tcg_gen_xor_i64(flag, result, t0); 1023 tmp = tcg_temp_new_i64(); 1024 tcg_gen_xor_i64(tmp, t0, t1); 1025 tcg_gen_and_i64(flag, flag, tmp); 1026 tcg_gen_extrh_i64_i32(cpu_VF, flag); 1027 tcg_gen_mov_i64(dest, result); 1028 } 1029 1030 static void gen_sub32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 1031 { 1032 /* 32 bit arithmetic */ 1033 TCGv_i32 t0_32 = tcg_temp_new_i32(); 1034 TCGv_i32 t1_32 = tcg_temp_new_i32(); 1035 TCGv_i32 tmp; 1036 1037 tcg_gen_extrl_i64_i32(t0_32, t0); 1038 tcg_gen_extrl_i64_i32(t1_32, t1); 1039 tcg_gen_sub_i32(cpu_NF, t0_32, t1_32); 1040 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 1041 tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32); 1042 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 1043 tmp = tcg_temp_new_i32(); 1044 tcg_gen_xor_i32(tmp, t0_32, t1_32); 1045 tcg_gen_and_i32(cpu_VF, cpu_VF, tmp); 1046 tcg_gen_extu_i32_i64(dest, cpu_NF); 1047 } 1048 1049 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 1050 { 1051 if (sf) { 1052 gen_sub64_CC(dest, t0, t1); 1053 } else { 1054 gen_sub32_CC(dest, t0, t1); 1055 } 1056 } 1057 1058 /* dest = T0 + T1 + CF; do not compute flags. */ 1059 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 1060 { 1061 TCGv_i64 flag = tcg_temp_new_i64(); 1062 tcg_gen_extu_i32_i64(flag, cpu_CF); 1063 tcg_gen_add_i64(dest, t0, t1); 1064 tcg_gen_add_i64(dest, dest, flag); 1065 1066 if (!sf) { 1067 tcg_gen_ext32u_i64(dest, dest); 1068 } 1069 } 1070 1071 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */ 1072 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 1073 { 1074 if (sf) { 1075 TCGv_i64 result = tcg_temp_new_i64(); 1076 TCGv_i64 cf_64 = tcg_temp_new_i64(); 1077 TCGv_i64 vf_64 = tcg_temp_new_i64(); 1078 TCGv_i64 tmp = tcg_temp_new_i64(); 1079 TCGv_i64 zero = tcg_constant_i64(0); 1080 1081 tcg_gen_extu_i32_i64(cf_64, cpu_CF); 1082 tcg_gen_add2_i64(result, cf_64, t0, zero, cf_64, zero); 1083 tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, zero); 1084 tcg_gen_extrl_i64_i32(cpu_CF, cf_64); 1085 gen_set_NZ64(result); 1086 1087 tcg_gen_xor_i64(vf_64, result, t0); 1088 tcg_gen_xor_i64(tmp, t0, t1); 1089 tcg_gen_andc_i64(vf_64, vf_64, tmp); 1090 tcg_gen_extrh_i64_i32(cpu_VF, vf_64); 1091 1092 tcg_gen_mov_i64(dest, result); 1093 } else { 1094 TCGv_i32 t0_32 = tcg_temp_new_i32(); 1095 TCGv_i32 t1_32 = tcg_temp_new_i32(); 1096 TCGv_i32 tmp = tcg_temp_new_i32(); 1097 TCGv_i32 zero = tcg_constant_i32(0); 1098 1099 tcg_gen_extrl_i64_i32(t0_32, t0); 1100 tcg_gen_extrl_i64_i32(t1_32, t1); 1101 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, zero, cpu_CF, zero); 1102 tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, zero); 1103 1104 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 1105 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 1106 tcg_gen_xor_i32(tmp, t0_32, t1_32); 1107 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); 1108 tcg_gen_extu_i32_i64(dest, cpu_NF); 1109 } 1110 } 1111 1112 /* 1113 * Load/Store generators 1114 */ 1115 1116 /* 1117 * Store from GPR register to memory. 1118 */ 1119 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source, 1120 TCGv_i64 tcg_addr, MemOp memop, int memidx, 1121 bool iss_valid, 1122 unsigned int iss_srt, 1123 bool iss_sf, bool iss_ar) 1124 { 1125 tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop); 1126 1127 if (iss_valid) { 1128 uint32_t syn; 1129 1130 syn = syn_data_abort_with_iss(0, 1131 (memop & MO_SIZE), 1132 false, 1133 iss_srt, 1134 iss_sf, 1135 iss_ar, 1136 0, 0, 0, 0, 0, false); 1137 disas_set_insn_syndrome(s, syn); 1138 } 1139 } 1140 1141 static void do_gpr_st(DisasContext *s, TCGv_i64 source, 1142 TCGv_i64 tcg_addr, MemOp memop, 1143 bool iss_valid, 1144 unsigned int iss_srt, 1145 bool iss_sf, bool iss_ar) 1146 { 1147 do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s), 1148 iss_valid, iss_srt, iss_sf, iss_ar); 1149 } 1150 1151 /* 1152 * Load from memory to GPR register 1153 */ 1154 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, 1155 MemOp memop, bool extend, int memidx, 1156 bool iss_valid, unsigned int iss_srt, 1157 bool iss_sf, bool iss_ar) 1158 { 1159 tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop); 1160 1161 if (extend && (memop & MO_SIGN)) { 1162 g_assert((memop & MO_SIZE) <= MO_32); 1163 tcg_gen_ext32u_i64(dest, dest); 1164 } 1165 1166 if (iss_valid) { 1167 uint32_t syn; 1168 1169 syn = syn_data_abort_with_iss(0, 1170 (memop & MO_SIZE), 1171 (memop & MO_SIGN) != 0, 1172 iss_srt, 1173 iss_sf, 1174 iss_ar, 1175 0, 0, 0, 0, 0, false); 1176 disas_set_insn_syndrome(s, syn); 1177 } 1178 } 1179 1180 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, 1181 MemOp memop, bool extend, 1182 bool iss_valid, unsigned int iss_srt, 1183 bool iss_sf, bool iss_ar) 1184 { 1185 do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s), 1186 iss_valid, iss_srt, iss_sf, iss_ar); 1187 } 1188 1189 /* 1190 * Store from FP register to memory 1191 */ 1192 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, MemOp mop) 1193 { 1194 /* This writes the bottom N bits of a 128 bit wide vector to memory */ 1195 TCGv_i64 tmplo = tcg_temp_new_i64(); 1196 1197 tcg_gen_ld_i64(tmplo, tcg_env, fp_reg_offset(s, srcidx, MO_64)); 1198 1199 if ((mop & MO_SIZE) < MO_128) { 1200 tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop); 1201 } else { 1202 TCGv_i64 tmphi = tcg_temp_new_i64(); 1203 TCGv_i128 t16 = tcg_temp_new_i128(); 1204 1205 tcg_gen_ld_i64(tmphi, tcg_env, fp_reg_hi_offset(s, srcidx)); 1206 tcg_gen_concat_i64_i128(t16, tmplo, tmphi); 1207 1208 tcg_gen_qemu_st_i128(t16, tcg_addr, get_mem_index(s), mop); 1209 } 1210 } 1211 1212 /* 1213 * Load from memory to FP register 1214 */ 1215 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, MemOp mop) 1216 { 1217 /* This always zero-extends and writes to a full 128 bit wide vector */ 1218 TCGv_i64 tmplo = tcg_temp_new_i64(); 1219 TCGv_i64 tmphi = NULL; 1220 1221 if ((mop & MO_SIZE) < MO_128) { 1222 tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop); 1223 } else { 1224 TCGv_i128 t16 = tcg_temp_new_i128(); 1225 1226 tcg_gen_qemu_ld_i128(t16, tcg_addr, get_mem_index(s), mop); 1227 1228 tmphi = tcg_temp_new_i64(); 1229 tcg_gen_extr_i128_i64(tmplo, tmphi, t16); 1230 } 1231 1232 tcg_gen_st_i64(tmplo, tcg_env, fp_reg_offset(s, destidx, MO_64)); 1233 1234 if (tmphi) { 1235 tcg_gen_st_i64(tmphi, tcg_env, fp_reg_hi_offset(s, destidx)); 1236 } 1237 clear_vec_high(s, tmphi != NULL, destidx); 1238 } 1239 1240 /* 1241 * Vector load/store helpers. 1242 * 1243 * The principal difference between this and a FP load is that we don't 1244 * zero extend as we are filling a partial chunk of the vector register. 1245 * These functions don't support 128 bit loads/stores, which would be 1246 * normal load/store operations. 1247 * 1248 * The _i32 versions are useful when operating on 32 bit quantities 1249 * (eg for floating point single or using Neon helper functions). 1250 */ 1251 1252 /* Get value of an element within a vector register */ 1253 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx, 1254 int element, MemOp memop) 1255 { 1256 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE); 1257 switch ((unsigned)memop) { 1258 case MO_8: 1259 tcg_gen_ld8u_i64(tcg_dest, tcg_env, vect_off); 1260 break; 1261 case MO_16: 1262 tcg_gen_ld16u_i64(tcg_dest, tcg_env, vect_off); 1263 break; 1264 case MO_32: 1265 tcg_gen_ld32u_i64(tcg_dest, tcg_env, vect_off); 1266 break; 1267 case MO_8|MO_SIGN: 1268 tcg_gen_ld8s_i64(tcg_dest, tcg_env, vect_off); 1269 break; 1270 case MO_16|MO_SIGN: 1271 tcg_gen_ld16s_i64(tcg_dest, tcg_env, vect_off); 1272 break; 1273 case MO_32|MO_SIGN: 1274 tcg_gen_ld32s_i64(tcg_dest, tcg_env, vect_off); 1275 break; 1276 case MO_64: 1277 case MO_64|MO_SIGN: 1278 tcg_gen_ld_i64(tcg_dest, tcg_env, vect_off); 1279 break; 1280 default: 1281 g_assert_not_reached(); 1282 } 1283 } 1284 1285 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx, 1286 int element, MemOp memop) 1287 { 1288 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE); 1289 switch (memop) { 1290 case MO_8: 1291 tcg_gen_ld8u_i32(tcg_dest, tcg_env, vect_off); 1292 break; 1293 case MO_16: 1294 tcg_gen_ld16u_i32(tcg_dest, tcg_env, vect_off); 1295 break; 1296 case MO_8|MO_SIGN: 1297 tcg_gen_ld8s_i32(tcg_dest, tcg_env, vect_off); 1298 break; 1299 case MO_16|MO_SIGN: 1300 tcg_gen_ld16s_i32(tcg_dest, tcg_env, vect_off); 1301 break; 1302 case MO_32: 1303 case MO_32|MO_SIGN: 1304 tcg_gen_ld_i32(tcg_dest, tcg_env, vect_off); 1305 break; 1306 default: 1307 g_assert_not_reached(); 1308 } 1309 } 1310 1311 /* Set value of an element within a vector register */ 1312 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx, 1313 int element, MemOp memop) 1314 { 1315 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE); 1316 switch (memop) { 1317 case MO_8: 1318 tcg_gen_st8_i64(tcg_src, tcg_env, vect_off); 1319 break; 1320 case MO_16: 1321 tcg_gen_st16_i64(tcg_src, tcg_env, vect_off); 1322 break; 1323 case MO_32: 1324 tcg_gen_st32_i64(tcg_src, tcg_env, vect_off); 1325 break; 1326 case MO_64: 1327 tcg_gen_st_i64(tcg_src, tcg_env, vect_off); 1328 break; 1329 default: 1330 g_assert_not_reached(); 1331 } 1332 } 1333 1334 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src, 1335 int destidx, int element, MemOp memop) 1336 { 1337 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE); 1338 switch (memop) { 1339 case MO_8: 1340 tcg_gen_st8_i32(tcg_src, tcg_env, vect_off); 1341 break; 1342 case MO_16: 1343 tcg_gen_st16_i32(tcg_src, tcg_env, vect_off); 1344 break; 1345 case MO_32: 1346 tcg_gen_st_i32(tcg_src, tcg_env, vect_off); 1347 break; 1348 default: 1349 g_assert_not_reached(); 1350 } 1351 } 1352 1353 /* Store from vector register to memory */ 1354 static void do_vec_st(DisasContext *s, int srcidx, int element, 1355 TCGv_i64 tcg_addr, MemOp mop) 1356 { 1357 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 1358 1359 read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE); 1360 tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop); 1361 } 1362 1363 /* Load from memory to vector register */ 1364 static void do_vec_ld(DisasContext *s, int destidx, int element, 1365 TCGv_i64 tcg_addr, MemOp mop) 1366 { 1367 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 1368 1369 tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop); 1370 write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE); 1371 } 1372 1373 /* Check that FP/Neon access is enabled. If it is, return 1374 * true. If not, emit code to generate an appropriate exception, 1375 * and return false; the caller should not emit any code for 1376 * the instruction. Note that this check must happen after all 1377 * unallocated-encoding checks (otherwise the syndrome information 1378 * for the resulting exception will be incorrect). 1379 */ 1380 static bool fp_access_check_only(DisasContext *s) 1381 { 1382 if (s->fp_excp_el) { 1383 assert(!s->fp_access_checked); 1384 s->fp_access_checked = -1; 1385 1386 gen_exception_insn_el(s, 0, EXCP_UDEF, 1387 syn_fp_access_trap(1, 0xe, false, 0), 1388 s->fp_excp_el); 1389 return false; 1390 } 1391 s->fp_access_checked = 1; 1392 return true; 1393 } 1394 1395 static bool fp_access_check(DisasContext *s) 1396 { 1397 if (!fp_access_check_only(s)) { 1398 return false; 1399 } 1400 if (s->sme_trap_nonstreaming && s->is_nonstreaming) { 1401 gen_exception_insn(s, 0, EXCP_UDEF, 1402 syn_smetrap(SME_ET_Streaming, false)); 1403 return false; 1404 } 1405 return true; 1406 } 1407 1408 /* 1409 * Return <0 for non-supported element sizes, with MO_16 controlled by 1410 * FEAT_FP16; return 0 for fp disabled; otherwise return >0 for success. 1411 */ 1412 static int fp_access_check_scalar_hsd(DisasContext *s, MemOp esz) 1413 { 1414 switch (esz) { 1415 case MO_64: 1416 case MO_32: 1417 break; 1418 case MO_16: 1419 if (!dc_isar_feature(aa64_fp16, s)) { 1420 return -1; 1421 } 1422 break; 1423 default: 1424 return -1; 1425 } 1426 return fp_access_check(s); 1427 } 1428 1429 /* Likewise, but vector MO_64 must have two elements. */ 1430 static int fp_access_check_vector_hsd(DisasContext *s, bool is_q, MemOp esz) 1431 { 1432 switch (esz) { 1433 case MO_64: 1434 if (!is_q) { 1435 return -1; 1436 } 1437 break; 1438 case MO_32: 1439 break; 1440 case MO_16: 1441 if (!dc_isar_feature(aa64_fp16, s)) { 1442 return -1; 1443 } 1444 break; 1445 default: 1446 return -1; 1447 } 1448 return fp_access_check(s); 1449 } 1450 1451 /* 1452 * Check that SVE access is enabled. If it is, return true. 1453 * If not, emit code to generate an appropriate exception and return false. 1454 * This function corresponds to CheckSVEEnabled(). 1455 */ 1456 bool sve_access_check(DisasContext *s) 1457 { 1458 if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) { 1459 bool ret; 1460 1461 assert(dc_isar_feature(aa64_sme, s)); 1462 ret = sme_sm_enabled_check(s); 1463 s->sve_access_checked = (ret ? 1 : -1); 1464 return ret; 1465 } 1466 if (s->sve_excp_el) { 1467 /* Assert that we only raise one exception per instruction. */ 1468 assert(!s->sve_access_checked); 1469 gen_exception_insn_el(s, 0, EXCP_UDEF, 1470 syn_sve_access_trap(), s->sve_excp_el); 1471 s->sve_access_checked = -1; 1472 return false; 1473 } 1474 s->sve_access_checked = 1; 1475 return fp_access_check(s); 1476 } 1477 1478 /* 1479 * Check that SME access is enabled, raise an exception if not. 1480 * Note that this function corresponds to CheckSMEAccess and is 1481 * only used directly for cpregs. 1482 */ 1483 static bool sme_access_check(DisasContext *s) 1484 { 1485 if (s->sme_excp_el) { 1486 gen_exception_insn_el(s, 0, EXCP_UDEF, 1487 syn_smetrap(SME_ET_AccessTrap, false), 1488 s->sme_excp_el); 1489 return false; 1490 } 1491 return true; 1492 } 1493 1494 /* This function corresponds to CheckSMEEnabled. */ 1495 bool sme_enabled_check(DisasContext *s) 1496 { 1497 /* 1498 * Note that unlike sve_excp_el, we have not constrained sme_excp_el 1499 * to be zero when fp_excp_el has priority. This is because we need 1500 * sme_excp_el by itself for cpregs access checks. 1501 */ 1502 if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) { 1503 bool ret = sme_access_check(s); 1504 s->fp_access_checked = (ret ? 1 : -1); 1505 return ret; 1506 } 1507 return fp_access_check_only(s); 1508 } 1509 1510 /* Common subroutine for CheckSMEAnd*Enabled. */ 1511 bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req) 1512 { 1513 if (!sme_enabled_check(s)) { 1514 return false; 1515 } 1516 if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) { 1517 gen_exception_insn(s, 0, EXCP_UDEF, 1518 syn_smetrap(SME_ET_NotStreaming, false)); 1519 return false; 1520 } 1521 if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) { 1522 gen_exception_insn(s, 0, EXCP_UDEF, 1523 syn_smetrap(SME_ET_InactiveZA, false)); 1524 return false; 1525 } 1526 return true; 1527 } 1528 1529 /* 1530 * Expanders for AdvSIMD translation functions. 1531 */ 1532 1533 static bool do_gvec_op2_ool(DisasContext *s, arg_qrr_e *a, int data, 1534 gen_helper_gvec_2 *fn) 1535 { 1536 if (!a->q && a->esz == MO_64) { 1537 return false; 1538 } 1539 if (fp_access_check(s)) { 1540 gen_gvec_op2_ool(s, a->q, a->rd, a->rn, data, fn); 1541 } 1542 return true; 1543 } 1544 1545 static bool do_gvec_op3_ool(DisasContext *s, arg_qrrr_e *a, int data, 1546 gen_helper_gvec_3 *fn) 1547 { 1548 if (!a->q && a->esz == MO_64) { 1549 return false; 1550 } 1551 if (fp_access_check(s)) { 1552 gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, data, fn); 1553 } 1554 return true; 1555 } 1556 1557 static bool do_gvec_fn3(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn) 1558 { 1559 if (!a->q && a->esz == MO_64) { 1560 return false; 1561 } 1562 if (fp_access_check(s)) { 1563 gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz); 1564 } 1565 return true; 1566 } 1567 1568 static bool do_gvec_fn3_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn) 1569 { 1570 if (a->esz == MO_64) { 1571 return false; 1572 } 1573 if (fp_access_check(s)) { 1574 gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz); 1575 } 1576 return true; 1577 } 1578 1579 static bool do_gvec_fn3_no8_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn) 1580 { 1581 if (a->esz == MO_8) { 1582 return false; 1583 } 1584 return do_gvec_fn3_no64(s, a, fn); 1585 } 1586 1587 static bool do_gvec_fn4(DisasContext *s, arg_qrrrr_e *a, GVecGen4Fn *fn) 1588 { 1589 if (!a->q && a->esz == MO_64) { 1590 return false; 1591 } 1592 if (fp_access_check(s)) { 1593 gen_gvec_fn4(s, a->q, a->rd, a->rn, a->rm, a->ra, fn, a->esz); 1594 } 1595 return true; 1596 } 1597 1598 /* 1599 * This utility function is for doing register extension with an 1600 * optional shift. You will likely want to pass a temporary for the 1601 * destination register. See DecodeRegExtend() in the ARM ARM. 1602 */ 1603 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in, 1604 int option, unsigned int shift) 1605 { 1606 int extsize = extract32(option, 0, 2); 1607 bool is_signed = extract32(option, 2, 1); 1608 1609 tcg_gen_ext_i64(tcg_out, tcg_in, extsize | (is_signed ? MO_SIGN : 0)); 1610 tcg_gen_shli_i64(tcg_out, tcg_out, shift); 1611 } 1612 1613 static inline void gen_check_sp_alignment(DisasContext *s) 1614 { 1615 /* The AArch64 architecture mandates that (if enabled via PSTATE 1616 * or SCTLR bits) there is a check that SP is 16-aligned on every 1617 * SP-relative load or store (with an exception generated if it is not). 1618 * In line with general QEMU practice regarding misaligned accesses, 1619 * we omit these checks for the sake of guest program performance. 1620 * This function is provided as a hook so we can more easily add these 1621 * checks in future (possibly as a "favour catching guest program bugs 1622 * over speed" user selectable option). 1623 */ 1624 } 1625 1626 /* 1627 * The instruction disassembly implemented here matches 1628 * the instruction encoding classifications in chapter C4 1629 * of the ARM Architecture Reference Manual (DDI0487B_a); 1630 * classification names and decode diagrams here should generally 1631 * match up with those in the manual. 1632 */ 1633 1634 static bool trans_B(DisasContext *s, arg_i *a) 1635 { 1636 reset_btype(s); 1637 gen_goto_tb(s, 0, a->imm); 1638 return true; 1639 } 1640 1641 static bool trans_BL(DisasContext *s, arg_i *a) 1642 { 1643 gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s)); 1644 reset_btype(s); 1645 gen_goto_tb(s, 0, a->imm); 1646 return true; 1647 } 1648 1649 1650 static bool trans_CBZ(DisasContext *s, arg_cbz *a) 1651 { 1652 DisasLabel match; 1653 TCGv_i64 tcg_cmp; 1654 1655 tcg_cmp = read_cpu_reg(s, a->rt, a->sf); 1656 reset_btype(s); 1657 1658 match = gen_disas_label(s); 1659 tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ, 1660 tcg_cmp, 0, match.label); 1661 gen_goto_tb(s, 0, 4); 1662 set_disas_label(s, match); 1663 gen_goto_tb(s, 1, a->imm); 1664 return true; 1665 } 1666 1667 static bool trans_TBZ(DisasContext *s, arg_tbz *a) 1668 { 1669 DisasLabel match; 1670 TCGv_i64 tcg_cmp; 1671 1672 tcg_cmp = tcg_temp_new_i64(); 1673 tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, a->rt), 1ULL << a->bitpos); 1674 1675 reset_btype(s); 1676 1677 match = gen_disas_label(s); 1678 tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ, 1679 tcg_cmp, 0, match.label); 1680 gen_goto_tb(s, 0, 4); 1681 set_disas_label(s, match); 1682 gen_goto_tb(s, 1, a->imm); 1683 return true; 1684 } 1685 1686 static bool trans_B_cond(DisasContext *s, arg_B_cond *a) 1687 { 1688 /* BC.cond is only present with FEAT_HBC */ 1689 if (a->c && !dc_isar_feature(aa64_hbc, s)) { 1690 return false; 1691 } 1692 reset_btype(s); 1693 if (a->cond < 0x0e) { 1694 /* genuinely conditional branches */ 1695 DisasLabel match = gen_disas_label(s); 1696 arm_gen_test_cc(a->cond, match.label); 1697 gen_goto_tb(s, 0, 4); 1698 set_disas_label(s, match); 1699 gen_goto_tb(s, 1, a->imm); 1700 } else { 1701 /* 0xe and 0xf are both "always" conditions */ 1702 gen_goto_tb(s, 0, a->imm); 1703 } 1704 return true; 1705 } 1706 1707 static void set_btype_for_br(DisasContext *s, int rn) 1708 { 1709 if (dc_isar_feature(aa64_bti, s)) { 1710 /* BR to {x16,x17} or !guard -> 1, else 3. */ 1711 if (rn == 16 || rn == 17) { 1712 set_btype(s, 1); 1713 } else { 1714 TCGv_i64 pc = tcg_temp_new_i64(); 1715 gen_pc_plus_diff(s, pc, 0); 1716 gen_helper_guarded_page_br(tcg_env, pc); 1717 s->btype = -1; 1718 } 1719 } 1720 } 1721 1722 static void set_btype_for_blr(DisasContext *s) 1723 { 1724 if (dc_isar_feature(aa64_bti, s)) { 1725 /* BLR sets BTYPE to 2, regardless of source guarded page. */ 1726 set_btype(s, 2); 1727 } 1728 } 1729 1730 static bool trans_BR(DisasContext *s, arg_r *a) 1731 { 1732 set_btype_for_br(s, a->rn); 1733 gen_a64_set_pc(s, cpu_reg(s, a->rn)); 1734 s->base.is_jmp = DISAS_JUMP; 1735 return true; 1736 } 1737 1738 static bool trans_BLR(DisasContext *s, arg_r *a) 1739 { 1740 TCGv_i64 dst = cpu_reg(s, a->rn); 1741 TCGv_i64 lr = cpu_reg(s, 30); 1742 if (dst == lr) { 1743 TCGv_i64 tmp = tcg_temp_new_i64(); 1744 tcg_gen_mov_i64(tmp, dst); 1745 dst = tmp; 1746 } 1747 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 1748 gen_a64_set_pc(s, dst); 1749 set_btype_for_blr(s); 1750 s->base.is_jmp = DISAS_JUMP; 1751 return true; 1752 } 1753 1754 static bool trans_RET(DisasContext *s, arg_r *a) 1755 { 1756 gen_a64_set_pc(s, cpu_reg(s, a->rn)); 1757 s->base.is_jmp = DISAS_JUMP; 1758 return true; 1759 } 1760 1761 static TCGv_i64 auth_branch_target(DisasContext *s, TCGv_i64 dst, 1762 TCGv_i64 modifier, bool use_key_a) 1763 { 1764 TCGv_i64 truedst; 1765 /* 1766 * Return the branch target for a BRAA/RETA/etc, which is either 1767 * just the destination dst, or that value with the pauth check 1768 * done and the code removed from the high bits. 1769 */ 1770 if (!s->pauth_active) { 1771 return dst; 1772 } 1773 1774 truedst = tcg_temp_new_i64(); 1775 if (use_key_a) { 1776 gen_helper_autia_combined(truedst, tcg_env, dst, modifier); 1777 } else { 1778 gen_helper_autib_combined(truedst, tcg_env, dst, modifier); 1779 } 1780 return truedst; 1781 } 1782 1783 static bool trans_BRAZ(DisasContext *s, arg_braz *a) 1784 { 1785 TCGv_i64 dst; 1786 1787 if (!dc_isar_feature(aa64_pauth, s)) { 1788 return false; 1789 } 1790 1791 dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m); 1792 set_btype_for_br(s, a->rn); 1793 gen_a64_set_pc(s, dst); 1794 s->base.is_jmp = DISAS_JUMP; 1795 return true; 1796 } 1797 1798 static bool trans_BLRAZ(DisasContext *s, arg_braz *a) 1799 { 1800 TCGv_i64 dst, lr; 1801 1802 if (!dc_isar_feature(aa64_pauth, s)) { 1803 return false; 1804 } 1805 1806 dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m); 1807 lr = cpu_reg(s, 30); 1808 if (dst == lr) { 1809 TCGv_i64 tmp = tcg_temp_new_i64(); 1810 tcg_gen_mov_i64(tmp, dst); 1811 dst = tmp; 1812 } 1813 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 1814 gen_a64_set_pc(s, dst); 1815 set_btype_for_blr(s); 1816 s->base.is_jmp = DISAS_JUMP; 1817 return true; 1818 } 1819 1820 static bool trans_RETA(DisasContext *s, arg_reta *a) 1821 { 1822 TCGv_i64 dst; 1823 1824 dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m); 1825 gen_a64_set_pc(s, dst); 1826 s->base.is_jmp = DISAS_JUMP; 1827 return true; 1828 } 1829 1830 static bool trans_BRA(DisasContext *s, arg_bra *a) 1831 { 1832 TCGv_i64 dst; 1833 1834 if (!dc_isar_feature(aa64_pauth, s)) { 1835 return false; 1836 } 1837 dst = auth_branch_target(s, cpu_reg(s,a->rn), cpu_reg_sp(s, a->rm), !a->m); 1838 gen_a64_set_pc(s, dst); 1839 set_btype_for_br(s, a->rn); 1840 s->base.is_jmp = DISAS_JUMP; 1841 return true; 1842 } 1843 1844 static bool trans_BLRA(DisasContext *s, arg_bra *a) 1845 { 1846 TCGv_i64 dst, lr; 1847 1848 if (!dc_isar_feature(aa64_pauth, s)) { 1849 return false; 1850 } 1851 dst = auth_branch_target(s, cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm), !a->m); 1852 lr = cpu_reg(s, 30); 1853 if (dst == lr) { 1854 TCGv_i64 tmp = tcg_temp_new_i64(); 1855 tcg_gen_mov_i64(tmp, dst); 1856 dst = tmp; 1857 } 1858 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 1859 gen_a64_set_pc(s, dst); 1860 set_btype_for_blr(s); 1861 s->base.is_jmp = DISAS_JUMP; 1862 return true; 1863 } 1864 1865 static bool trans_ERET(DisasContext *s, arg_ERET *a) 1866 { 1867 TCGv_i64 dst; 1868 1869 if (s->current_el == 0) { 1870 return false; 1871 } 1872 if (s->trap_eret) { 1873 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(0), 2); 1874 return true; 1875 } 1876 dst = tcg_temp_new_i64(); 1877 tcg_gen_ld_i64(dst, tcg_env, 1878 offsetof(CPUARMState, elr_el[s->current_el])); 1879 1880 translator_io_start(&s->base); 1881 1882 gen_helper_exception_return(tcg_env, dst); 1883 /* Must exit loop to check un-masked IRQs */ 1884 s->base.is_jmp = DISAS_EXIT; 1885 return true; 1886 } 1887 1888 static bool trans_ERETA(DisasContext *s, arg_reta *a) 1889 { 1890 TCGv_i64 dst; 1891 1892 if (!dc_isar_feature(aa64_pauth, s)) { 1893 return false; 1894 } 1895 if (s->current_el == 0) { 1896 return false; 1897 } 1898 /* The FGT trap takes precedence over an auth trap. */ 1899 if (s->trap_eret) { 1900 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(a->m ? 3 : 2), 2); 1901 return true; 1902 } 1903 dst = tcg_temp_new_i64(); 1904 tcg_gen_ld_i64(dst, tcg_env, 1905 offsetof(CPUARMState, elr_el[s->current_el])); 1906 1907 dst = auth_branch_target(s, dst, cpu_X[31], !a->m); 1908 1909 translator_io_start(&s->base); 1910 1911 gen_helper_exception_return(tcg_env, dst); 1912 /* Must exit loop to check un-masked IRQs */ 1913 s->base.is_jmp = DISAS_EXIT; 1914 return true; 1915 } 1916 1917 static bool trans_NOP(DisasContext *s, arg_NOP *a) 1918 { 1919 return true; 1920 } 1921 1922 static bool trans_YIELD(DisasContext *s, arg_YIELD *a) 1923 { 1924 /* 1925 * When running in MTTCG we don't generate jumps to the yield and 1926 * WFE helpers as it won't affect the scheduling of other vCPUs. 1927 * If we wanted to more completely model WFE/SEV so we don't busy 1928 * spin unnecessarily we would need to do something more involved. 1929 */ 1930 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1931 s->base.is_jmp = DISAS_YIELD; 1932 } 1933 return true; 1934 } 1935 1936 static bool trans_WFI(DisasContext *s, arg_WFI *a) 1937 { 1938 s->base.is_jmp = DISAS_WFI; 1939 return true; 1940 } 1941 1942 static bool trans_WFE(DisasContext *s, arg_WFI *a) 1943 { 1944 /* 1945 * When running in MTTCG we don't generate jumps to the yield and 1946 * WFE helpers as it won't affect the scheduling of other vCPUs. 1947 * If we wanted to more completely model WFE/SEV so we don't busy 1948 * spin unnecessarily we would need to do something more involved. 1949 */ 1950 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1951 s->base.is_jmp = DISAS_WFE; 1952 } 1953 return true; 1954 } 1955 1956 static bool trans_WFIT(DisasContext *s, arg_WFIT *a) 1957 { 1958 if (!dc_isar_feature(aa64_wfxt, s)) { 1959 return false; 1960 } 1961 1962 /* 1963 * Because we need to pass the register value to the helper, 1964 * it's easier to emit the code now, unlike trans_WFI which 1965 * defers it to aarch64_tr_tb_stop(). That means we need to 1966 * check ss_active so that single-stepping a WFIT doesn't halt. 1967 */ 1968 if (s->ss_active) { 1969 /* Act like a NOP under architectural singlestep */ 1970 return true; 1971 } 1972 1973 gen_a64_update_pc(s, 4); 1974 gen_helper_wfit(tcg_env, cpu_reg(s, a->rd)); 1975 /* Go back to the main loop to check for interrupts */ 1976 s->base.is_jmp = DISAS_EXIT; 1977 return true; 1978 } 1979 1980 static bool trans_WFET(DisasContext *s, arg_WFET *a) 1981 { 1982 if (!dc_isar_feature(aa64_wfxt, s)) { 1983 return false; 1984 } 1985 1986 /* 1987 * We rely here on our WFE implementation being a NOP, so we 1988 * don't need to do anything different to handle the WFET timeout 1989 * from what trans_WFE does. 1990 */ 1991 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1992 s->base.is_jmp = DISAS_WFE; 1993 } 1994 return true; 1995 } 1996 1997 static bool trans_XPACLRI(DisasContext *s, arg_XPACLRI *a) 1998 { 1999 if (s->pauth_active) { 2000 gen_helper_xpaci(cpu_X[30], tcg_env, cpu_X[30]); 2001 } 2002 return true; 2003 } 2004 2005 static bool trans_PACIA1716(DisasContext *s, arg_PACIA1716 *a) 2006 { 2007 if (s->pauth_active) { 2008 gen_helper_pacia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 2009 } 2010 return true; 2011 } 2012 2013 static bool trans_PACIB1716(DisasContext *s, arg_PACIB1716 *a) 2014 { 2015 if (s->pauth_active) { 2016 gen_helper_pacib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 2017 } 2018 return true; 2019 } 2020 2021 static bool trans_AUTIA1716(DisasContext *s, arg_AUTIA1716 *a) 2022 { 2023 if (s->pauth_active) { 2024 gen_helper_autia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 2025 } 2026 return true; 2027 } 2028 2029 static bool trans_AUTIB1716(DisasContext *s, arg_AUTIB1716 *a) 2030 { 2031 if (s->pauth_active) { 2032 gen_helper_autib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 2033 } 2034 return true; 2035 } 2036 2037 static bool trans_ESB(DisasContext *s, arg_ESB *a) 2038 { 2039 /* Without RAS, we must implement this as NOP. */ 2040 if (dc_isar_feature(aa64_ras, s)) { 2041 /* 2042 * QEMU does not have a source of physical SErrors, 2043 * so we are only concerned with virtual SErrors. 2044 * The pseudocode in the ARM for this case is 2045 * if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then 2046 * AArch64.vESBOperation(); 2047 * Most of the condition can be evaluated at translation time. 2048 * Test for EL2 present, and defer test for SEL2 to runtime. 2049 */ 2050 if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) { 2051 gen_helper_vesb(tcg_env); 2052 } 2053 } 2054 return true; 2055 } 2056 2057 static bool trans_PACIAZ(DisasContext *s, arg_PACIAZ *a) 2058 { 2059 if (s->pauth_active) { 2060 gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 2061 } 2062 return true; 2063 } 2064 2065 static bool trans_PACIASP(DisasContext *s, arg_PACIASP *a) 2066 { 2067 if (s->pauth_active) { 2068 gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 2069 } 2070 return true; 2071 } 2072 2073 static bool trans_PACIBZ(DisasContext *s, arg_PACIBZ *a) 2074 { 2075 if (s->pauth_active) { 2076 gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 2077 } 2078 return true; 2079 } 2080 2081 static bool trans_PACIBSP(DisasContext *s, arg_PACIBSP *a) 2082 { 2083 if (s->pauth_active) { 2084 gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 2085 } 2086 return true; 2087 } 2088 2089 static bool trans_AUTIAZ(DisasContext *s, arg_AUTIAZ *a) 2090 { 2091 if (s->pauth_active) { 2092 gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 2093 } 2094 return true; 2095 } 2096 2097 static bool trans_AUTIASP(DisasContext *s, arg_AUTIASP *a) 2098 { 2099 if (s->pauth_active) { 2100 gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 2101 } 2102 return true; 2103 } 2104 2105 static bool trans_AUTIBZ(DisasContext *s, arg_AUTIBZ *a) 2106 { 2107 if (s->pauth_active) { 2108 gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 2109 } 2110 return true; 2111 } 2112 2113 static bool trans_AUTIBSP(DisasContext *s, arg_AUTIBSP *a) 2114 { 2115 if (s->pauth_active) { 2116 gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 2117 } 2118 return true; 2119 } 2120 2121 static bool trans_CLREX(DisasContext *s, arg_CLREX *a) 2122 { 2123 tcg_gen_movi_i64(cpu_exclusive_addr, -1); 2124 return true; 2125 } 2126 2127 static bool trans_DSB_DMB(DisasContext *s, arg_DSB_DMB *a) 2128 { 2129 /* We handle DSB and DMB the same way */ 2130 TCGBar bar; 2131 2132 switch (a->types) { 2133 case 1: /* MBReqTypes_Reads */ 2134 bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST; 2135 break; 2136 case 2: /* MBReqTypes_Writes */ 2137 bar = TCG_BAR_SC | TCG_MO_ST_ST; 2138 break; 2139 default: /* MBReqTypes_All */ 2140 bar = TCG_BAR_SC | TCG_MO_ALL; 2141 break; 2142 } 2143 tcg_gen_mb(bar); 2144 return true; 2145 } 2146 2147 static bool trans_DSB_nXS(DisasContext *s, arg_DSB_nXS *a) 2148 { 2149 if (!dc_isar_feature(aa64_xs, s)) { 2150 return false; 2151 } 2152 tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL); 2153 return true; 2154 } 2155 2156 static bool trans_ISB(DisasContext *s, arg_ISB *a) 2157 { 2158 /* 2159 * We need to break the TB after this insn to execute 2160 * self-modifying code correctly and also to take 2161 * any pending interrupts immediately. 2162 */ 2163 reset_btype(s); 2164 gen_goto_tb(s, 0, 4); 2165 return true; 2166 } 2167 2168 static bool trans_SB(DisasContext *s, arg_SB *a) 2169 { 2170 if (!dc_isar_feature(aa64_sb, s)) { 2171 return false; 2172 } 2173 /* 2174 * TODO: There is no speculation barrier opcode for TCG; 2175 * MB and end the TB instead. 2176 */ 2177 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC); 2178 gen_goto_tb(s, 0, 4); 2179 return true; 2180 } 2181 2182 static bool trans_CFINV(DisasContext *s, arg_CFINV *a) 2183 { 2184 if (!dc_isar_feature(aa64_condm_4, s)) { 2185 return false; 2186 } 2187 tcg_gen_xori_i32(cpu_CF, cpu_CF, 1); 2188 return true; 2189 } 2190 2191 static bool trans_XAFLAG(DisasContext *s, arg_XAFLAG *a) 2192 { 2193 TCGv_i32 z; 2194 2195 if (!dc_isar_feature(aa64_condm_5, s)) { 2196 return false; 2197 } 2198 2199 z = tcg_temp_new_i32(); 2200 2201 tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0); 2202 2203 /* 2204 * (!C & !Z) << 31 2205 * (!(C | Z)) << 31 2206 * ~((C | Z) << 31) 2207 * ~-(C | Z) 2208 * (C | Z) - 1 2209 */ 2210 tcg_gen_or_i32(cpu_NF, cpu_CF, z); 2211 tcg_gen_subi_i32(cpu_NF, cpu_NF, 1); 2212 2213 /* !(Z & C) */ 2214 tcg_gen_and_i32(cpu_ZF, z, cpu_CF); 2215 tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1); 2216 2217 /* (!C & Z) << 31 -> -(Z & ~C) */ 2218 tcg_gen_andc_i32(cpu_VF, z, cpu_CF); 2219 tcg_gen_neg_i32(cpu_VF, cpu_VF); 2220 2221 /* C | Z */ 2222 tcg_gen_or_i32(cpu_CF, cpu_CF, z); 2223 2224 return true; 2225 } 2226 2227 static bool trans_AXFLAG(DisasContext *s, arg_AXFLAG *a) 2228 { 2229 if (!dc_isar_feature(aa64_condm_5, s)) { 2230 return false; 2231 } 2232 2233 tcg_gen_sari_i32(cpu_VF, cpu_VF, 31); /* V ? -1 : 0 */ 2234 tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF); /* C & !V */ 2235 2236 /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */ 2237 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF); 2238 2239 tcg_gen_movi_i32(cpu_NF, 0); 2240 tcg_gen_movi_i32(cpu_VF, 0); 2241 2242 return true; 2243 } 2244 2245 static bool trans_MSR_i_UAO(DisasContext *s, arg_i *a) 2246 { 2247 if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) { 2248 return false; 2249 } 2250 if (a->imm & 1) { 2251 set_pstate_bits(PSTATE_UAO); 2252 } else { 2253 clear_pstate_bits(PSTATE_UAO); 2254 } 2255 gen_rebuild_hflags(s); 2256 s->base.is_jmp = DISAS_TOO_MANY; 2257 return true; 2258 } 2259 2260 static bool trans_MSR_i_PAN(DisasContext *s, arg_i *a) 2261 { 2262 if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) { 2263 return false; 2264 } 2265 if (a->imm & 1) { 2266 set_pstate_bits(PSTATE_PAN); 2267 } else { 2268 clear_pstate_bits(PSTATE_PAN); 2269 } 2270 gen_rebuild_hflags(s); 2271 s->base.is_jmp = DISAS_TOO_MANY; 2272 return true; 2273 } 2274 2275 static bool trans_MSR_i_SPSEL(DisasContext *s, arg_i *a) 2276 { 2277 if (s->current_el == 0) { 2278 return false; 2279 } 2280 gen_helper_msr_i_spsel(tcg_env, tcg_constant_i32(a->imm & PSTATE_SP)); 2281 s->base.is_jmp = DISAS_TOO_MANY; 2282 return true; 2283 } 2284 2285 static bool trans_MSR_i_SBSS(DisasContext *s, arg_i *a) 2286 { 2287 if (!dc_isar_feature(aa64_ssbs, s)) { 2288 return false; 2289 } 2290 if (a->imm & 1) { 2291 set_pstate_bits(PSTATE_SSBS); 2292 } else { 2293 clear_pstate_bits(PSTATE_SSBS); 2294 } 2295 /* Don't need to rebuild hflags since SSBS is a nop */ 2296 s->base.is_jmp = DISAS_TOO_MANY; 2297 return true; 2298 } 2299 2300 static bool trans_MSR_i_DIT(DisasContext *s, arg_i *a) 2301 { 2302 if (!dc_isar_feature(aa64_dit, s)) { 2303 return false; 2304 } 2305 if (a->imm & 1) { 2306 set_pstate_bits(PSTATE_DIT); 2307 } else { 2308 clear_pstate_bits(PSTATE_DIT); 2309 } 2310 /* There's no need to rebuild hflags because DIT is a nop */ 2311 s->base.is_jmp = DISAS_TOO_MANY; 2312 return true; 2313 } 2314 2315 static bool trans_MSR_i_TCO(DisasContext *s, arg_i *a) 2316 { 2317 if (dc_isar_feature(aa64_mte, s)) { 2318 /* Full MTE is enabled -- set the TCO bit as directed. */ 2319 if (a->imm & 1) { 2320 set_pstate_bits(PSTATE_TCO); 2321 } else { 2322 clear_pstate_bits(PSTATE_TCO); 2323 } 2324 gen_rebuild_hflags(s); 2325 /* Many factors, including TCO, go into MTE_ACTIVE. */ 2326 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 2327 return true; 2328 } else if (dc_isar_feature(aa64_mte_insn_reg, s)) { 2329 /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI. */ 2330 return true; 2331 } else { 2332 /* Insn not present */ 2333 return false; 2334 } 2335 } 2336 2337 static bool trans_MSR_i_DAIFSET(DisasContext *s, arg_i *a) 2338 { 2339 gen_helper_msr_i_daifset(tcg_env, tcg_constant_i32(a->imm)); 2340 s->base.is_jmp = DISAS_TOO_MANY; 2341 return true; 2342 } 2343 2344 static bool trans_MSR_i_DAIFCLEAR(DisasContext *s, arg_i *a) 2345 { 2346 gen_helper_msr_i_daifclear(tcg_env, tcg_constant_i32(a->imm)); 2347 /* Exit the cpu loop to re-evaluate pending IRQs. */ 2348 s->base.is_jmp = DISAS_UPDATE_EXIT; 2349 return true; 2350 } 2351 2352 static bool trans_MSR_i_ALLINT(DisasContext *s, arg_i *a) 2353 { 2354 if (!dc_isar_feature(aa64_nmi, s) || s->current_el == 0) { 2355 return false; 2356 } 2357 2358 if (a->imm == 0) { 2359 clear_pstate_bits(PSTATE_ALLINT); 2360 } else if (s->current_el > 1) { 2361 set_pstate_bits(PSTATE_ALLINT); 2362 } else { 2363 gen_helper_msr_set_allint_el1(tcg_env); 2364 } 2365 2366 /* Exit the cpu loop to re-evaluate pending IRQs. */ 2367 s->base.is_jmp = DISAS_UPDATE_EXIT; 2368 return true; 2369 } 2370 2371 static bool trans_MSR_i_SVCR(DisasContext *s, arg_MSR_i_SVCR *a) 2372 { 2373 if (!dc_isar_feature(aa64_sme, s) || a->mask == 0) { 2374 return false; 2375 } 2376 if (sme_access_check(s)) { 2377 int old = s->pstate_sm | (s->pstate_za << 1); 2378 int new = a->imm * 3; 2379 2380 if ((old ^ new) & a->mask) { 2381 /* At least one bit changes. */ 2382 gen_helper_set_svcr(tcg_env, tcg_constant_i32(new), 2383 tcg_constant_i32(a->mask)); 2384 s->base.is_jmp = DISAS_TOO_MANY; 2385 } 2386 } 2387 return true; 2388 } 2389 2390 static void gen_get_nzcv(TCGv_i64 tcg_rt) 2391 { 2392 TCGv_i32 tmp = tcg_temp_new_i32(); 2393 TCGv_i32 nzcv = tcg_temp_new_i32(); 2394 2395 /* build bit 31, N */ 2396 tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31)); 2397 /* build bit 30, Z */ 2398 tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0); 2399 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1); 2400 /* build bit 29, C */ 2401 tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1); 2402 /* build bit 28, V */ 2403 tcg_gen_shri_i32(tmp, cpu_VF, 31); 2404 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1); 2405 /* generate result */ 2406 tcg_gen_extu_i32_i64(tcg_rt, nzcv); 2407 } 2408 2409 static void gen_set_nzcv(TCGv_i64 tcg_rt) 2410 { 2411 TCGv_i32 nzcv = tcg_temp_new_i32(); 2412 2413 /* take NZCV from R[t] */ 2414 tcg_gen_extrl_i64_i32(nzcv, tcg_rt); 2415 2416 /* bit 31, N */ 2417 tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31)); 2418 /* bit 30, Z */ 2419 tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30)); 2420 tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0); 2421 /* bit 29, C */ 2422 tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29)); 2423 tcg_gen_shri_i32(cpu_CF, cpu_CF, 29); 2424 /* bit 28, V */ 2425 tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28)); 2426 tcg_gen_shli_i32(cpu_VF, cpu_VF, 3); 2427 } 2428 2429 static void gen_sysreg_undef(DisasContext *s, bool isread, 2430 uint8_t op0, uint8_t op1, uint8_t op2, 2431 uint8_t crn, uint8_t crm, uint8_t rt) 2432 { 2433 /* 2434 * Generate code to emit an UNDEF with correct syndrome 2435 * information for a failed system register access. 2436 * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases, 2437 * but if FEAT_IDST is implemented then read accesses to registers 2438 * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP 2439 * syndrome. 2440 */ 2441 uint32_t syndrome; 2442 2443 if (isread && dc_isar_feature(aa64_ids, s) && 2444 arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) { 2445 syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); 2446 } else { 2447 syndrome = syn_uncategorized(); 2448 } 2449 gen_exception_insn(s, 0, EXCP_UDEF, syndrome); 2450 } 2451 2452 /* MRS - move from system register 2453 * MSR (register) - move to system register 2454 * SYS 2455 * SYSL 2456 * These are all essentially the same insn in 'read' and 'write' 2457 * versions, with varying op0 fields. 2458 */ 2459 static void handle_sys(DisasContext *s, bool isread, 2460 unsigned int op0, unsigned int op1, unsigned int op2, 2461 unsigned int crn, unsigned int crm, unsigned int rt) 2462 { 2463 uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, 2464 crn, crm, op0, op1, op2); 2465 const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key); 2466 bool need_exit_tb = false; 2467 bool nv_trap_to_el2 = false; 2468 bool nv_redirect_reg = false; 2469 bool skip_fp_access_checks = false; 2470 bool nv2_mem_redirect = false; 2471 TCGv_ptr tcg_ri = NULL; 2472 TCGv_i64 tcg_rt; 2473 uint32_t syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); 2474 2475 if (crn == 11 || crn == 15) { 2476 /* 2477 * Check for TIDCP trap, which must take precedence over 2478 * the UNDEF for "no such register" etc. 2479 */ 2480 switch (s->current_el) { 2481 case 0: 2482 if (dc_isar_feature(aa64_tidcp1, s)) { 2483 gen_helper_tidcp_el0(tcg_env, tcg_constant_i32(syndrome)); 2484 } 2485 break; 2486 case 1: 2487 gen_helper_tidcp_el1(tcg_env, tcg_constant_i32(syndrome)); 2488 break; 2489 } 2490 } 2491 2492 if (!ri) { 2493 /* Unknown register; this might be a guest error or a QEMU 2494 * unimplemented feature. 2495 */ 2496 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 " 2497 "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n", 2498 isread ? "read" : "write", op0, op1, crn, crm, op2); 2499 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt); 2500 return; 2501 } 2502 2503 if (s->nv2 && ri->nv2_redirect_offset) { 2504 /* 2505 * Some registers always redirect to memory; some only do so if 2506 * HCR_EL2.NV1 is 0, and some only if NV1 is 1 (these come in 2507 * pairs which share an offset; see the table in R_CSRPQ). 2508 */ 2509 if (ri->nv2_redirect_offset & NV2_REDIR_NV1) { 2510 nv2_mem_redirect = s->nv1; 2511 } else if (ri->nv2_redirect_offset & NV2_REDIR_NO_NV1) { 2512 nv2_mem_redirect = !s->nv1; 2513 } else { 2514 nv2_mem_redirect = true; 2515 } 2516 } 2517 2518 /* Check access permissions */ 2519 if (!cp_access_ok(s->current_el, ri, isread)) { 2520 /* 2521 * FEAT_NV/NV2 handling does not do the usual FP access checks 2522 * for registers only accessible at EL2 (though it *does* do them 2523 * for registers accessible at EL1). 2524 */ 2525 skip_fp_access_checks = true; 2526 if (s->nv2 && (ri->type & ARM_CP_NV2_REDIRECT)) { 2527 /* 2528 * This is one of the few EL2 registers which should redirect 2529 * to the equivalent EL1 register. We do that after running 2530 * the EL2 register's accessfn. 2531 */ 2532 nv_redirect_reg = true; 2533 assert(!nv2_mem_redirect); 2534 } else if (nv2_mem_redirect) { 2535 /* 2536 * NV2 redirect-to-memory takes precedence over trap to EL2 or 2537 * UNDEF to EL1. 2538 */ 2539 } else if (s->nv && arm_cpreg_traps_in_nv(ri)) { 2540 /* 2541 * This register / instruction exists and is an EL2 register, so 2542 * we must trap to EL2 if accessed in nested virtualization EL1 2543 * instead of UNDEFing. We'll do that after the usual access checks. 2544 * (This makes a difference only for a couple of registers like 2545 * VSTTBR_EL2 where the "UNDEF if NonSecure" should take priority 2546 * over the trap-to-EL2. Most trapped-by-FEAT_NV registers have 2547 * an accessfn which does nothing when called from EL1, because 2548 * the trap-to-EL3 controls which would apply to that register 2549 * at EL2 don't take priority over the FEAT_NV trap-to-EL2.) 2550 */ 2551 nv_trap_to_el2 = true; 2552 } else { 2553 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt); 2554 return; 2555 } 2556 } 2557 2558 if (ri->accessfn || (ri->fgt && s->fgt_active)) { 2559 /* Emit code to perform further access permissions checks at 2560 * runtime; this may result in an exception. 2561 */ 2562 gen_a64_update_pc(s, 0); 2563 tcg_ri = tcg_temp_new_ptr(); 2564 gen_helper_access_check_cp_reg(tcg_ri, tcg_env, 2565 tcg_constant_i32(key), 2566 tcg_constant_i32(syndrome), 2567 tcg_constant_i32(isread)); 2568 } else if (ri->type & ARM_CP_RAISES_EXC) { 2569 /* 2570 * The readfn or writefn might raise an exception; 2571 * synchronize the CPU state in case it does. 2572 */ 2573 gen_a64_update_pc(s, 0); 2574 } 2575 2576 if (!skip_fp_access_checks) { 2577 if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) { 2578 return; 2579 } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) { 2580 return; 2581 } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) { 2582 return; 2583 } 2584 } 2585 2586 if (nv_trap_to_el2) { 2587 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2); 2588 return; 2589 } 2590 2591 if (nv_redirect_reg) { 2592 /* 2593 * FEAT_NV2 redirection of an EL2 register to an EL1 register. 2594 * Conveniently in all cases the encoding of the EL1 register is 2595 * identical to the EL2 register except that opc1 is 0. 2596 * Get the reginfo for the EL1 register to use for the actual access. 2597 * We don't use the EL1 register's access function, and 2598 * fine-grained-traps on EL1 also do not apply here. 2599 */ 2600 key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, 2601 crn, crm, op0, 0, op2); 2602 ri = get_arm_cp_reginfo(s->cp_regs, key); 2603 assert(ri); 2604 assert(cp_access_ok(s->current_el, ri, isread)); 2605 /* 2606 * We might not have done an update_pc earlier, so check we don't 2607 * need it. We could support this in future if necessary. 2608 */ 2609 assert(!(ri->type & ARM_CP_RAISES_EXC)); 2610 } 2611 2612 if (nv2_mem_redirect) { 2613 /* 2614 * This system register is being redirected into an EL2 memory access. 2615 * This means it is not an IO operation, doesn't change hflags, 2616 * and need not end the TB, because it has no side effects. 2617 * 2618 * The access is 64-bit single copy atomic, guaranteed aligned because 2619 * of the definition of VCNR_EL2. Its endianness depends on 2620 * SCTLR_EL2.EE, not on the data endianness of EL1. 2621 * It is done under either the EL2 translation regime or the EL2&0 2622 * translation regime, depending on HCR_EL2.E2H. It behaves as if 2623 * PSTATE.PAN is 0. 2624 */ 2625 TCGv_i64 ptr = tcg_temp_new_i64(); 2626 MemOp mop = MO_64 | MO_ALIGN | MO_ATOM_IFALIGN; 2627 ARMMMUIdx armmemidx = s->nv2_mem_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_E2; 2628 int memidx = arm_to_core_mmu_idx(armmemidx); 2629 uint32_t syn; 2630 2631 mop |= (s->nv2_mem_be ? MO_BE : MO_LE); 2632 2633 tcg_gen_ld_i64(ptr, tcg_env, offsetof(CPUARMState, cp15.vncr_el2)); 2634 tcg_gen_addi_i64(ptr, ptr, 2635 (ri->nv2_redirect_offset & ~NV2_REDIR_FLAG_MASK)); 2636 tcg_rt = cpu_reg(s, rt); 2637 2638 syn = syn_data_abort_vncr(0, !isread, 0); 2639 disas_set_insn_syndrome(s, syn); 2640 if (isread) { 2641 tcg_gen_qemu_ld_i64(tcg_rt, ptr, memidx, mop); 2642 } else { 2643 tcg_gen_qemu_st_i64(tcg_rt, ptr, memidx, mop); 2644 } 2645 return; 2646 } 2647 2648 /* Handle special cases first */ 2649 switch (ri->type & ARM_CP_SPECIAL_MASK) { 2650 case 0: 2651 break; 2652 case ARM_CP_NOP: 2653 return; 2654 case ARM_CP_NZCV: 2655 tcg_rt = cpu_reg(s, rt); 2656 if (isread) { 2657 gen_get_nzcv(tcg_rt); 2658 } else { 2659 gen_set_nzcv(tcg_rt); 2660 } 2661 return; 2662 case ARM_CP_CURRENTEL: 2663 { 2664 /* 2665 * Reads as current EL value from pstate, which is 2666 * guaranteed to be constant by the tb flags. 2667 * For nested virt we should report EL2. 2668 */ 2669 int el = s->nv ? 2 : s->current_el; 2670 tcg_rt = cpu_reg(s, rt); 2671 tcg_gen_movi_i64(tcg_rt, el << 2); 2672 return; 2673 } 2674 case ARM_CP_DC_ZVA: 2675 /* Writes clear the aligned block of memory which rt points into. */ 2676 if (s->mte_active[0]) { 2677 int desc = 0; 2678 2679 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 2680 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 2681 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 2682 2683 tcg_rt = tcg_temp_new_i64(); 2684 gen_helper_mte_check_zva(tcg_rt, tcg_env, 2685 tcg_constant_i32(desc), cpu_reg(s, rt)); 2686 } else { 2687 tcg_rt = clean_data_tbi(s, cpu_reg(s, rt)); 2688 } 2689 gen_helper_dc_zva(tcg_env, tcg_rt); 2690 return; 2691 case ARM_CP_DC_GVA: 2692 { 2693 TCGv_i64 clean_addr, tag; 2694 2695 /* 2696 * DC_GVA, like DC_ZVA, requires that we supply the original 2697 * pointer for an invalid page. Probe that address first. 2698 */ 2699 tcg_rt = cpu_reg(s, rt); 2700 clean_addr = clean_data_tbi(s, tcg_rt); 2701 gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8); 2702 2703 if (s->ata[0]) { 2704 /* Extract the tag from the register to match STZGM. */ 2705 tag = tcg_temp_new_i64(); 2706 tcg_gen_shri_i64(tag, tcg_rt, 56); 2707 gen_helper_stzgm_tags(tcg_env, clean_addr, tag); 2708 } 2709 } 2710 return; 2711 case ARM_CP_DC_GZVA: 2712 { 2713 TCGv_i64 clean_addr, tag; 2714 2715 /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */ 2716 tcg_rt = cpu_reg(s, rt); 2717 clean_addr = clean_data_tbi(s, tcg_rt); 2718 gen_helper_dc_zva(tcg_env, clean_addr); 2719 2720 if (s->ata[0]) { 2721 /* Extract the tag from the register to match STZGM. */ 2722 tag = tcg_temp_new_i64(); 2723 tcg_gen_shri_i64(tag, tcg_rt, 56); 2724 gen_helper_stzgm_tags(tcg_env, clean_addr, tag); 2725 } 2726 } 2727 return; 2728 default: 2729 g_assert_not_reached(); 2730 } 2731 2732 if (ri->type & ARM_CP_IO) { 2733 /* I/O operations must end the TB here (whether read or write) */ 2734 need_exit_tb = translator_io_start(&s->base); 2735 } 2736 2737 tcg_rt = cpu_reg(s, rt); 2738 2739 if (isread) { 2740 if (ri->type & ARM_CP_CONST) { 2741 tcg_gen_movi_i64(tcg_rt, ri->resetvalue); 2742 } else if (ri->readfn) { 2743 if (!tcg_ri) { 2744 tcg_ri = gen_lookup_cp_reg(key); 2745 } 2746 gen_helper_get_cp_reg64(tcg_rt, tcg_env, tcg_ri); 2747 } else { 2748 tcg_gen_ld_i64(tcg_rt, tcg_env, ri->fieldoffset); 2749 } 2750 } else { 2751 if (ri->type & ARM_CP_CONST) { 2752 /* If not forbidden by access permissions, treat as WI */ 2753 return; 2754 } else if (ri->writefn) { 2755 if (!tcg_ri) { 2756 tcg_ri = gen_lookup_cp_reg(key); 2757 } 2758 gen_helper_set_cp_reg64(tcg_env, tcg_ri, tcg_rt); 2759 } else { 2760 tcg_gen_st_i64(tcg_rt, tcg_env, ri->fieldoffset); 2761 } 2762 } 2763 2764 if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) { 2765 /* 2766 * A write to any coprocessor register that ends a TB 2767 * must rebuild the hflags for the next TB. 2768 */ 2769 gen_rebuild_hflags(s); 2770 /* 2771 * We default to ending the TB on a coprocessor register write, 2772 * but allow this to be suppressed by the register definition 2773 * (usually only necessary to work around guest bugs). 2774 */ 2775 need_exit_tb = true; 2776 } 2777 if (need_exit_tb) { 2778 s->base.is_jmp = DISAS_UPDATE_EXIT; 2779 } 2780 } 2781 2782 static bool trans_SYS(DisasContext *s, arg_SYS *a) 2783 { 2784 handle_sys(s, a->l, a->op0, a->op1, a->op2, a->crn, a->crm, a->rt); 2785 return true; 2786 } 2787 2788 static bool trans_SVC(DisasContext *s, arg_i *a) 2789 { 2790 /* 2791 * For SVC, HVC and SMC we advance the single-step state 2792 * machine before taking the exception. This is architecturally 2793 * mandated, to ensure that single-stepping a system call 2794 * instruction works properly. 2795 */ 2796 uint32_t syndrome = syn_aa64_svc(a->imm); 2797 if (s->fgt_svc) { 2798 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2); 2799 return true; 2800 } 2801 gen_ss_advance(s); 2802 gen_exception_insn(s, 4, EXCP_SWI, syndrome); 2803 return true; 2804 } 2805 2806 static bool trans_HVC(DisasContext *s, arg_i *a) 2807 { 2808 int target_el = s->current_el == 3 ? 3 : 2; 2809 2810 if (s->current_el == 0) { 2811 unallocated_encoding(s); 2812 return true; 2813 } 2814 /* 2815 * The pre HVC helper handles cases when HVC gets trapped 2816 * as an undefined insn by runtime configuration. 2817 */ 2818 gen_a64_update_pc(s, 0); 2819 gen_helper_pre_hvc(tcg_env); 2820 /* Architecture requires ss advance before we do the actual work */ 2821 gen_ss_advance(s); 2822 gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(a->imm), target_el); 2823 return true; 2824 } 2825 2826 static bool trans_SMC(DisasContext *s, arg_i *a) 2827 { 2828 if (s->current_el == 0) { 2829 unallocated_encoding(s); 2830 return true; 2831 } 2832 gen_a64_update_pc(s, 0); 2833 gen_helper_pre_smc(tcg_env, tcg_constant_i32(syn_aa64_smc(a->imm))); 2834 /* Architecture requires ss advance before we do the actual work */ 2835 gen_ss_advance(s); 2836 gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(a->imm), 3); 2837 return true; 2838 } 2839 2840 static bool trans_BRK(DisasContext *s, arg_i *a) 2841 { 2842 gen_exception_bkpt_insn(s, syn_aa64_bkpt(a->imm)); 2843 return true; 2844 } 2845 2846 static bool trans_HLT(DisasContext *s, arg_i *a) 2847 { 2848 /* 2849 * HLT. This has two purposes. 2850 * Architecturally, it is an external halting debug instruction. 2851 * Since QEMU doesn't implement external debug, we treat this as 2852 * it is required for halting debug disabled: it will UNDEF. 2853 * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction. 2854 */ 2855 if (semihosting_enabled(s->current_el == 0) && a->imm == 0xf000) { 2856 gen_exception_internal_insn(s, EXCP_SEMIHOST); 2857 } else { 2858 unallocated_encoding(s); 2859 } 2860 return true; 2861 } 2862 2863 /* 2864 * Load/Store exclusive instructions are implemented by remembering 2865 * the value/address loaded, and seeing if these are the same 2866 * when the store is performed. This is not actually the architecturally 2867 * mandated semantics, but it works for typical guest code sequences 2868 * and avoids having to monitor regular stores. 2869 * 2870 * The store exclusive uses the atomic cmpxchg primitives to avoid 2871 * races in multi-threaded linux-user and when MTTCG softmmu is 2872 * enabled. 2873 */ 2874 static void gen_load_exclusive(DisasContext *s, int rt, int rt2, int rn, 2875 int size, bool is_pair) 2876 { 2877 int idx = get_mem_index(s); 2878 TCGv_i64 dirty_addr, clean_addr; 2879 MemOp memop = check_atomic_align(s, rn, size + is_pair); 2880 2881 s->is_ldex = true; 2882 dirty_addr = cpu_reg_sp(s, rn); 2883 clean_addr = gen_mte_check1(s, dirty_addr, false, rn != 31, memop); 2884 2885 g_assert(size <= 3); 2886 if (is_pair) { 2887 g_assert(size >= 2); 2888 if (size == 2) { 2889 tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop); 2890 if (s->be_data == MO_LE) { 2891 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32); 2892 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32); 2893 } else { 2894 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32); 2895 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32); 2896 } 2897 } else { 2898 TCGv_i128 t16 = tcg_temp_new_i128(); 2899 2900 tcg_gen_qemu_ld_i128(t16, clean_addr, idx, memop); 2901 2902 if (s->be_data == MO_LE) { 2903 tcg_gen_extr_i128_i64(cpu_exclusive_val, 2904 cpu_exclusive_high, t16); 2905 } else { 2906 tcg_gen_extr_i128_i64(cpu_exclusive_high, 2907 cpu_exclusive_val, t16); 2908 } 2909 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); 2910 tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high); 2911 } 2912 } else { 2913 tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop); 2914 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); 2915 } 2916 tcg_gen_mov_i64(cpu_exclusive_addr, clean_addr); 2917 } 2918 2919 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, 2920 int rn, int size, int is_pair) 2921 { 2922 /* if (env->exclusive_addr == addr && env->exclusive_val == [addr] 2923 * && (!is_pair || env->exclusive_high == [addr + datasize])) { 2924 * [addr] = {Rt}; 2925 * if (is_pair) { 2926 * [addr + datasize] = {Rt2}; 2927 * } 2928 * {Rd} = 0; 2929 * } else { 2930 * {Rd} = 1; 2931 * } 2932 * env->exclusive_addr = -1; 2933 */ 2934 TCGLabel *fail_label = gen_new_label(); 2935 TCGLabel *done_label = gen_new_label(); 2936 TCGv_i64 tmp, clean_addr; 2937 MemOp memop; 2938 2939 /* 2940 * FIXME: We are out of spec here. We have recorded only the address 2941 * from load_exclusive, not the entire range, and we assume that the 2942 * size of the access on both sides match. The architecture allows the 2943 * store to be smaller than the load, so long as the stored bytes are 2944 * within the range recorded by the load. 2945 */ 2946 2947 /* See AArch64.ExclusiveMonitorsPass() and AArch64.IsExclusiveVA(). */ 2948 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); 2949 tcg_gen_brcond_i64(TCG_COND_NE, clean_addr, cpu_exclusive_addr, fail_label); 2950 2951 /* 2952 * The write, and any associated faults, only happen if the virtual 2953 * and physical addresses pass the exclusive monitor check. These 2954 * faults are exceedingly unlikely, because normally the guest uses 2955 * the exact same address register for the load_exclusive, and we 2956 * would have recognized these faults there. 2957 * 2958 * It is possible to trigger an alignment fault pre-LSE2, e.g. with an 2959 * unaligned 4-byte write within the range of an aligned 8-byte load. 2960 * With LSE2, the store would need to cross a 16-byte boundary when the 2961 * load did not, which would mean the store is outside the range 2962 * recorded for the monitor, which would have failed a corrected monitor 2963 * check above. For now, we assume no size change and retain the 2964 * MO_ALIGN to let tcg know what we checked in the load_exclusive. 2965 * 2966 * It is possible to trigger an MTE fault, by performing the load with 2967 * a virtual address with a valid tag and performing the store with the 2968 * same virtual address and a different invalid tag. 2969 */ 2970 memop = size + is_pair; 2971 if (memop == MO_128 || !dc_isar_feature(aa64_lse2, s)) { 2972 memop |= MO_ALIGN; 2973 } 2974 memop = finalize_memop(s, memop); 2975 gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); 2976 2977 tmp = tcg_temp_new_i64(); 2978 if (is_pair) { 2979 if (size == 2) { 2980 if (s->be_data == MO_LE) { 2981 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2)); 2982 } else { 2983 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt)); 2984 } 2985 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, 2986 cpu_exclusive_val, tmp, 2987 get_mem_index(s), memop); 2988 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); 2989 } else { 2990 TCGv_i128 t16 = tcg_temp_new_i128(); 2991 TCGv_i128 c16 = tcg_temp_new_i128(); 2992 TCGv_i64 a, b; 2993 2994 if (s->be_data == MO_LE) { 2995 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2)); 2996 tcg_gen_concat_i64_i128(c16, cpu_exclusive_val, 2997 cpu_exclusive_high); 2998 } else { 2999 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt)); 3000 tcg_gen_concat_i64_i128(c16, cpu_exclusive_high, 3001 cpu_exclusive_val); 3002 } 3003 3004 tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16, 3005 get_mem_index(s), memop); 3006 3007 a = tcg_temp_new_i64(); 3008 b = tcg_temp_new_i64(); 3009 if (s->be_data == MO_LE) { 3010 tcg_gen_extr_i128_i64(a, b, t16); 3011 } else { 3012 tcg_gen_extr_i128_i64(b, a, t16); 3013 } 3014 3015 tcg_gen_xor_i64(a, a, cpu_exclusive_val); 3016 tcg_gen_xor_i64(b, b, cpu_exclusive_high); 3017 tcg_gen_or_i64(tmp, a, b); 3018 3019 tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0); 3020 } 3021 } else { 3022 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val, 3023 cpu_reg(s, rt), get_mem_index(s), memop); 3024 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); 3025 } 3026 tcg_gen_mov_i64(cpu_reg(s, rd), tmp); 3027 tcg_gen_br(done_label); 3028 3029 gen_set_label(fail_label); 3030 tcg_gen_movi_i64(cpu_reg(s, rd), 1); 3031 gen_set_label(done_label); 3032 tcg_gen_movi_i64(cpu_exclusive_addr, -1); 3033 } 3034 3035 static void gen_compare_and_swap(DisasContext *s, int rs, int rt, 3036 int rn, int size) 3037 { 3038 TCGv_i64 tcg_rs = cpu_reg(s, rs); 3039 TCGv_i64 tcg_rt = cpu_reg(s, rt); 3040 int memidx = get_mem_index(s); 3041 TCGv_i64 clean_addr; 3042 MemOp memop; 3043 3044 if (rn == 31) { 3045 gen_check_sp_alignment(s); 3046 } 3047 memop = check_atomic_align(s, rn, size); 3048 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); 3049 tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, 3050 memidx, memop); 3051 } 3052 3053 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, 3054 int rn, int size) 3055 { 3056 TCGv_i64 s1 = cpu_reg(s, rs); 3057 TCGv_i64 s2 = cpu_reg(s, rs + 1); 3058 TCGv_i64 t1 = cpu_reg(s, rt); 3059 TCGv_i64 t2 = cpu_reg(s, rt + 1); 3060 TCGv_i64 clean_addr; 3061 int memidx = get_mem_index(s); 3062 MemOp memop; 3063 3064 if (rn == 31) { 3065 gen_check_sp_alignment(s); 3066 } 3067 3068 /* This is a single atomic access, despite the "pair". */ 3069 memop = check_atomic_align(s, rn, size + 1); 3070 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); 3071 3072 if (size == 2) { 3073 TCGv_i64 cmp = tcg_temp_new_i64(); 3074 TCGv_i64 val = tcg_temp_new_i64(); 3075 3076 if (s->be_data == MO_LE) { 3077 tcg_gen_concat32_i64(val, t1, t2); 3078 tcg_gen_concat32_i64(cmp, s1, s2); 3079 } else { 3080 tcg_gen_concat32_i64(val, t2, t1); 3081 tcg_gen_concat32_i64(cmp, s2, s1); 3082 } 3083 3084 tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx, memop); 3085 3086 if (s->be_data == MO_LE) { 3087 tcg_gen_extr32_i64(s1, s2, cmp); 3088 } else { 3089 tcg_gen_extr32_i64(s2, s1, cmp); 3090 } 3091 } else { 3092 TCGv_i128 cmp = tcg_temp_new_i128(); 3093 TCGv_i128 val = tcg_temp_new_i128(); 3094 3095 if (s->be_data == MO_LE) { 3096 tcg_gen_concat_i64_i128(val, t1, t2); 3097 tcg_gen_concat_i64_i128(cmp, s1, s2); 3098 } else { 3099 tcg_gen_concat_i64_i128(val, t2, t1); 3100 tcg_gen_concat_i64_i128(cmp, s2, s1); 3101 } 3102 3103 tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx, memop); 3104 3105 if (s->be_data == MO_LE) { 3106 tcg_gen_extr_i128_i64(s1, s2, cmp); 3107 } else { 3108 tcg_gen_extr_i128_i64(s2, s1, cmp); 3109 } 3110 } 3111 } 3112 3113 /* 3114 * Compute the ISS.SF bit for syndrome information if an exception 3115 * is taken on a load or store. This indicates whether the instruction 3116 * is accessing a 32-bit or 64-bit register. This logic is derived 3117 * from the ARMv8 specs for LDR (Shared decode for all encodings). 3118 */ 3119 static bool ldst_iss_sf(int size, bool sign, bool ext) 3120 { 3121 3122 if (sign) { 3123 /* 3124 * Signed loads are 64 bit results if we are not going to 3125 * do a zero-extend from 32 to 64 after the load. 3126 * (For a store, sign and ext are always false.) 3127 */ 3128 return !ext; 3129 } else { 3130 /* Unsigned loads/stores work at the specified size */ 3131 return size == MO_64; 3132 } 3133 } 3134 3135 static bool trans_STXR(DisasContext *s, arg_stxr *a) 3136 { 3137 if (a->rn == 31) { 3138 gen_check_sp_alignment(s); 3139 } 3140 if (a->lasr) { 3141 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 3142 } 3143 gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, false); 3144 return true; 3145 } 3146 3147 static bool trans_LDXR(DisasContext *s, arg_stxr *a) 3148 { 3149 if (a->rn == 31) { 3150 gen_check_sp_alignment(s); 3151 } 3152 gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, false); 3153 if (a->lasr) { 3154 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3155 } 3156 return true; 3157 } 3158 3159 static bool trans_STLR(DisasContext *s, arg_stlr *a) 3160 { 3161 TCGv_i64 clean_addr; 3162 MemOp memop; 3163 bool iss_sf = ldst_iss_sf(a->sz, false, false); 3164 3165 /* 3166 * StoreLORelease is the same as Store-Release for QEMU, but 3167 * needs the feature-test. 3168 */ 3169 if (!a->lasr && !dc_isar_feature(aa64_lor, s)) { 3170 return false; 3171 } 3172 /* Generate ISS for non-exclusive accesses including LASR. */ 3173 if (a->rn == 31) { 3174 gen_check_sp_alignment(s); 3175 } 3176 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 3177 memop = check_ordered_align(s, a->rn, 0, true, a->sz); 3178 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), 3179 true, a->rn != 31, memop); 3180 do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, memop, true, a->rt, 3181 iss_sf, a->lasr); 3182 return true; 3183 } 3184 3185 static bool trans_LDAR(DisasContext *s, arg_stlr *a) 3186 { 3187 TCGv_i64 clean_addr; 3188 MemOp memop; 3189 bool iss_sf = ldst_iss_sf(a->sz, false, false); 3190 3191 /* LoadLOAcquire is the same as Load-Acquire for QEMU. */ 3192 if (!a->lasr && !dc_isar_feature(aa64_lor, s)) { 3193 return false; 3194 } 3195 /* Generate ISS for non-exclusive accesses including LASR. */ 3196 if (a->rn == 31) { 3197 gen_check_sp_alignment(s); 3198 } 3199 memop = check_ordered_align(s, a->rn, 0, false, a->sz); 3200 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), 3201 false, a->rn != 31, memop); 3202 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, memop, false, true, 3203 a->rt, iss_sf, a->lasr); 3204 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3205 return true; 3206 } 3207 3208 static bool trans_STXP(DisasContext *s, arg_stxr *a) 3209 { 3210 if (a->rn == 31) { 3211 gen_check_sp_alignment(s); 3212 } 3213 if (a->lasr) { 3214 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 3215 } 3216 gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, true); 3217 return true; 3218 } 3219 3220 static bool trans_LDXP(DisasContext *s, arg_stxr *a) 3221 { 3222 if (a->rn == 31) { 3223 gen_check_sp_alignment(s); 3224 } 3225 gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, true); 3226 if (a->lasr) { 3227 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3228 } 3229 return true; 3230 } 3231 3232 static bool trans_CASP(DisasContext *s, arg_CASP *a) 3233 { 3234 if (!dc_isar_feature(aa64_atomics, s)) { 3235 return false; 3236 } 3237 if (((a->rt | a->rs) & 1) != 0) { 3238 return false; 3239 } 3240 3241 gen_compare_and_swap_pair(s, a->rs, a->rt, a->rn, a->sz); 3242 return true; 3243 } 3244 3245 static bool trans_CAS(DisasContext *s, arg_CAS *a) 3246 { 3247 if (!dc_isar_feature(aa64_atomics, s)) { 3248 return false; 3249 } 3250 gen_compare_and_swap(s, a->rs, a->rt, a->rn, a->sz); 3251 return true; 3252 } 3253 3254 static bool trans_LD_lit(DisasContext *s, arg_ldlit *a) 3255 { 3256 bool iss_sf = ldst_iss_sf(a->sz, a->sign, false); 3257 TCGv_i64 tcg_rt = cpu_reg(s, a->rt); 3258 TCGv_i64 clean_addr = tcg_temp_new_i64(); 3259 MemOp memop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3260 3261 gen_pc_plus_diff(s, clean_addr, a->imm); 3262 do_gpr_ld(s, tcg_rt, clean_addr, memop, 3263 false, true, a->rt, iss_sf, false); 3264 return true; 3265 } 3266 3267 static bool trans_LD_lit_v(DisasContext *s, arg_ldlit *a) 3268 { 3269 /* Load register (literal), vector version */ 3270 TCGv_i64 clean_addr; 3271 MemOp memop; 3272 3273 if (!fp_access_check(s)) { 3274 return true; 3275 } 3276 memop = finalize_memop_asimd(s, a->sz); 3277 clean_addr = tcg_temp_new_i64(); 3278 gen_pc_plus_diff(s, clean_addr, a->imm); 3279 do_fp_ld(s, a->rt, clean_addr, memop); 3280 return true; 3281 } 3282 3283 static void op_addr_ldstpair_pre(DisasContext *s, arg_ldstpair *a, 3284 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, 3285 uint64_t offset, bool is_store, MemOp mop) 3286 { 3287 if (a->rn == 31) { 3288 gen_check_sp_alignment(s); 3289 } 3290 3291 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3292 if (!a->p) { 3293 tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset); 3294 } 3295 3296 *clean_addr = gen_mte_checkN(s, *dirty_addr, is_store, 3297 (a->w || a->rn != 31), 2 << a->sz, mop); 3298 } 3299 3300 static void op_addr_ldstpair_post(DisasContext *s, arg_ldstpair *a, 3301 TCGv_i64 dirty_addr, uint64_t offset) 3302 { 3303 if (a->w) { 3304 if (a->p) { 3305 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3306 } 3307 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); 3308 } 3309 } 3310 3311 static bool trans_STP(DisasContext *s, arg_ldstpair *a) 3312 { 3313 uint64_t offset = a->imm << a->sz; 3314 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; 3315 MemOp mop = finalize_memop(s, a->sz); 3316 3317 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop); 3318 tcg_rt = cpu_reg(s, a->rt); 3319 tcg_rt2 = cpu_reg(s, a->rt2); 3320 /* 3321 * We built mop above for the single logical access -- rebuild it 3322 * now for the paired operation. 3323 * 3324 * With LSE2, non-sign-extending pairs are treated atomically if 3325 * aligned, and if unaligned one of the pair will be completely 3326 * within a 16-byte block and that element will be atomic. 3327 * Otherwise each element is separately atomic. 3328 * In all cases, issue one operation with the correct atomicity. 3329 */ 3330 mop = a->sz + 1; 3331 if (s->align_mem) { 3332 mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8); 3333 } 3334 mop = finalize_memop_pair(s, mop); 3335 if (a->sz == 2) { 3336 TCGv_i64 tmp = tcg_temp_new_i64(); 3337 3338 if (s->be_data == MO_LE) { 3339 tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2); 3340 } else { 3341 tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt); 3342 } 3343 tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop); 3344 } else { 3345 TCGv_i128 tmp = tcg_temp_new_i128(); 3346 3347 if (s->be_data == MO_LE) { 3348 tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2); 3349 } else { 3350 tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt); 3351 } 3352 tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop); 3353 } 3354 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3355 return true; 3356 } 3357 3358 static bool trans_LDP(DisasContext *s, arg_ldstpair *a) 3359 { 3360 uint64_t offset = a->imm << a->sz; 3361 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; 3362 MemOp mop = finalize_memop(s, a->sz); 3363 3364 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop); 3365 tcg_rt = cpu_reg(s, a->rt); 3366 tcg_rt2 = cpu_reg(s, a->rt2); 3367 3368 /* 3369 * We built mop above for the single logical access -- rebuild it 3370 * now for the paired operation. 3371 * 3372 * With LSE2, non-sign-extending pairs are treated atomically if 3373 * aligned, and if unaligned one of the pair will be completely 3374 * within a 16-byte block and that element will be atomic. 3375 * Otherwise each element is separately atomic. 3376 * In all cases, issue one operation with the correct atomicity. 3377 * 3378 * This treats sign-extending loads like zero-extending loads, 3379 * since that reuses the most code below. 3380 */ 3381 mop = a->sz + 1; 3382 if (s->align_mem) { 3383 mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8); 3384 } 3385 mop = finalize_memop_pair(s, mop); 3386 if (a->sz == 2) { 3387 int o2 = s->be_data == MO_LE ? 32 : 0; 3388 int o1 = o2 ^ 32; 3389 3390 tcg_gen_qemu_ld_i64(tcg_rt, clean_addr, get_mem_index(s), mop); 3391 if (a->sign) { 3392 tcg_gen_sextract_i64(tcg_rt2, tcg_rt, o2, 32); 3393 tcg_gen_sextract_i64(tcg_rt, tcg_rt, o1, 32); 3394 } else { 3395 tcg_gen_extract_i64(tcg_rt2, tcg_rt, o2, 32); 3396 tcg_gen_extract_i64(tcg_rt, tcg_rt, o1, 32); 3397 } 3398 } else { 3399 TCGv_i128 tmp = tcg_temp_new_i128(); 3400 3401 tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop); 3402 if (s->be_data == MO_LE) { 3403 tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp); 3404 } else { 3405 tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp); 3406 } 3407 } 3408 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3409 return true; 3410 } 3411 3412 static bool trans_STP_v(DisasContext *s, arg_ldstpair *a) 3413 { 3414 uint64_t offset = a->imm << a->sz; 3415 TCGv_i64 clean_addr, dirty_addr; 3416 MemOp mop; 3417 3418 if (!fp_access_check(s)) { 3419 return true; 3420 } 3421 3422 /* LSE2 does not merge FP pairs; leave these as separate operations. */ 3423 mop = finalize_memop_asimd(s, a->sz); 3424 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop); 3425 do_fp_st(s, a->rt, clean_addr, mop); 3426 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz); 3427 do_fp_st(s, a->rt2, clean_addr, mop); 3428 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3429 return true; 3430 } 3431 3432 static bool trans_LDP_v(DisasContext *s, arg_ldstpair *a) 3433 { 3434 uint64_t offset = a->imm << a->sz; 3435 TCGv_i64 clean_addr, dirty_addr; 3436 MemOp mop; 3437 3438 if (!fp_access_check(s)) { 3439 return true; 3440 } 3441 3442 /* LSE2 does not merge FP pairs; leave these as separate operations. */ 3443 mop = finalize_memop_asimd(s, a->sz); 3444 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop); 3445 do_fp_ld(s, a->rt, clean_addr, mop); 3446 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz); 3447 do_fp_ld(s, a->rt2, clean_addr, mop); 3448 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3449 return true; 3450 } 3451 3452 static bool trans_STGP(DisasContext *s, arg_ldstpair *a) 3453 { 3454 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; 3455 uint64_t offset = a->imm << LOG2_TAG_GRANULE; 3456 MemOp mop; 3457 TCGv_i128 tmp; 3458 3459 /* STGP only comes in one size. */ 3460 tcg_debug_assert(a->sz == MO_64); 3461 3462 if (!dc_isar_feature(aa64_mte_insn_reg, s)) { 3463 return false; 3464 } 3465 3466 if (a->rn == 31) { 3467 gen_check_sp_alignment(s); 3468 } 3469 3470 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3471 if (!a->p) { 3472 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3473 } 3474 3475 clean_addr = clean_data_tbi(s, dirty_addr); 3476 tcg_rt = cpu_reg(s, a->rt); 3477 tcg_rt2 = cpu_reg(s, a->rt2); 3478 3479 /* 3480 * STGP is defined as two 8-byte memory operations, aligned to TAG_GRANULE, 3481 * and one tag operation. We implement it as one single aligned 16-byte 3482 * memory operation for convenience. Note that the alignment ensures 3483 * MO_ATOM_IFALIGN_PAIR produces 8-byte atomicity for the memory store. 3484 */ 3485 mop = finalize_memop_atom(s, MO_128 | MO_ALIGN, MO_ATOM_IFALIGN_PAIR); 3486 3487 tmp = tcg_temp_new_i128(); 3488 if (s->be_data == MO_LE) { 3489 tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2); 3490 } else { 3491 tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt); 3492 } 3493 tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop); 3494 3495 /* Perform the tag store, if tag access enabled. */ 3496 if (s->ata[0]) { 3497 if (tb_cflags(s->base.tb) & CF_PARALLEL) { 3498 gen_helper_stg_parallel(tcg_env, dirty_addr, dirty_addr); 3499 } else { 3500 gen_helper_stg(tcg_env, dirty_addr, dirty_addr); 3501 } 3502 } 3503 3504 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3505 return true; 3506 } 3507 3508 static void op_addr_ldst_imm_pre(DisasContext *s, arg_ldst_imm *a, 3509 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, 3510 uint64_t offset, bool is_store, MemOp mop) 3511 { 3512 int memidx; 3513 3514 if (a->rn == 31) { 3515 gen_check_sp_alignment(s); 3516 } 3517 3518 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3519 if (!a->p) { 3520 tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset); 3521 } 3522 memidx = get_a64_user_mem_index(s, a->unpriv); 3523 *clean_addr = gen_mte_check1_mmuidx(s, *dirty_addr, is_store, 3524 a->w || a->rn != 31, 3525 mop, a->unpriv, memidx); 3526 } 3527 3528 static void op_addr_ldst_imm_post(DisasContext *s, arg_ldst_imm *a, 3529 TCGv_i64 dirty_addr, uint64_t offset) 3530 { 3531 if (a->w) { 3532 if (a->p) { 3533 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3534 } 3535 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); 3536 } 3537 } 3538 3539 static bool trans_STR_i(DisasContext *s, arg_ldst_imm *a) 3540 { 3541 bool iss_sf, iss_valid = !a->w; 3542 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3543 int memidx = get_a64_user_mem_index(s, a->unpriv); 3544 MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3545 3546 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop); 3547 3548 tcg_rt = cpu_reg(s, a->rt); 3549 iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3550 3551 do_gpr_st_memidx(s, tcg_rt, clean_addr, mop, memidx, 3552 iss_valid, a->rt, iss_sf, false); 3553 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3554 return true; 3555 } 3556 3557 static bool trans_LDR_i(DisasContext *s, arg_ldst_imm *a) 3558 { 3559 bool iss_sf, iss_valid = !a->w; 3560 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3561 int memidx = get_a64_user_mem_index(s, a->unpriv); 3562 MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3563 3564 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop); 3565 3566 tcg_rt = cpu_reg(s, a->rt); 3567 iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3568 3569 do_gpr_ld_memidx(s, tcg_rt, clean_addr, mop, 3570 a->ext, memidx, iss_valid, a->rt, iss_sf, false); 3571 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3572 return true; 3573 } 3574 3575 static bool trans_STR_v_i(DisasContext *s, arg_ldst_imm *a) 3576 { 3577 TCGv_i64 clean_addr, dirty_addr; 3578 MemOp mop; 3579 3580 if (!fp_access_check(s)) { 3581 return true; 3582 } 3583 mop = finalize_memop_asimd(s, a->sz); 3584 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop); 3585 do_fp_st(s, a->rt, clean_addr, mop); 3586 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3587 return true; 3588 } 3589 3590 static bool trans_LDR_v_i(DisasContext *s, arg_ldst_imm *a) 3591 { 3592 TCGv_i64 clean_addr, dirty_addr; 3593 MemOp mop; 3594 3595 if (!fp_access_check(s)) { 3596 return true; 3597 } 3598 mop = finalize_memop_asimd(s, a->sz); 3599 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop); 3600 do_fp_ld(s, a->rt, clean_addr, mop); 3601 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3602 return true; 3603 } 3604 3605 static void op_addr_ldst_pre(DisasContext *s, arg_ldst *a, 3606 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, 3607 bool is_store, MemOp memop) 3608 { 3609 TCGv_i64 tcg_rm; 3610 3611 if (a->rn == 31) { 3612 gen_check_sp_alignment(s); 3613 } 3614 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3615 3616 tcg_rm = read_cpu_reg(s, a->rm, 1); 3617 ext_and_shift_reg(tcg_rm, tcg_rm, a->opt, a->s ? a->sz : 0); 3618 3619 tcg_gen_add_i64(*dirty_addr, *dirty_addr, tcg_rm); 3620 *clean_addr = gen_mte_check1(s, *dirty_addr, is_store, true, memop); 3621 } 3622 3623 static bool trans_LDR(DisasContext *s, arg_ldst *a) 3624 { 3625 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3626 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3627 MemOp memop; 3628 3629 if (extract32(a->opt, 1, 1) == 0) { 3630 return false; 3631 } 3632 3633 memop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3634 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop); 3635 tcg_rt = cpu_reg(s, a->rt); 3636 do_gpr_ld(s, tcg_rt, clean_addr, memop, 3637 a->ext, true, a->rt, iss_sf, false); 3638 return true; 3639 } 3640 3641 static bool trans_STR(DisasContext *s, arg_ldst *a) 3642 { 3643 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3644 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3645 MemOp memop; 3646 3647 if (extract32(a->opt, 1, 1) == 0) { 3648 return false; 3649 } 3650 3651 memop = finalize_memop(s, a->sz); 3652 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop); 3653 tcg_rt = cpu_reg(s, a->rt); 3654 do_gpr_st(s, tcg_rt, clean_addr, memop, true, a->rt, iss_sf, false); 3655 return true; 3656 } 3657 3658 static bool trans_LDR_v(DisasContext *s, arg_ldst *a) 3659 { 3660 TCGv_i64 clean_addr, dirty_addr; 3661 MemOp memop; 3662 3663 if (extract32(a->opt, 1, 1) == 0) { 3664 return false; 3665 } 3666 3667 if (!fp_access_check(s)) { 3668 return true; 3669 } 3670 3671 memop = finalize_memop_asimd(s, a->sz); 3672 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop); 3673 do_fp_ld(s, a->rt, clean_addr, memop); 3674 return true; 3675 } 3676 3677 static bool trans_STR_v(DisasContext *s, arg_ldst *a) 3678 { 3679 TCGv_i64 clean_addr, dirty_addr; 3680 MemOp memop; 3681 3682 if (extract32(a->opt, 1, 1) == 0) { 3683 return false; 3684 } 3685 3686 if (!fp_access_check(s)) { 3687 return true; 3688 } 3689 3690 memop = finalize_memop_asimd(s, a->sz); 3691 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop); 3692 do_fp_st(s, a->rt, clean_addr, memop); 3693 return true; 3694 } 3695 3696 3697 static bool do_atomic_ld(DisasContext *s, arg_atomic *a, AtomicThreeOpFn *fn, 3698 int sign, bool invert) 3699 { 3700 MemOp mop = a->sz | sign; 3701 TCGv_i64 clean_addr, tcg_rs, tcg_rt; 3702 3703 if (a->rn == 31) { 3704 gen_check_sp_alignment(s); 3705 } 3706 mop = check_atomic_align(s, a->rn, mop); 3707 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false, 3708 a->rn != 31, mop); 3709 tcg_rs = read_cpu_reg(s, a->rs, true); 3710 tcg_rt = cpu_reg(s, a->rt); 3711 if (invert) { 3712 tcg_gen_not_i64(tcg_rs, tcg_rs); 3713 } 3714 /* 3715 * The tcg atomic primitives are all full barriers. Therefore we 3716 * can ignore the Acquire and Release bits of this instruction. 3717 */ 3718 fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop); 3719 3720 if (mop & MO_SIGN) { 3721 switch (a->sz) { 3722 case MO_8: 3723 tcg_gen_ext8u_i64(tcg_rt, tcg_rt); 3724 break; 3725 case MO_16: 3726 tcg_gen_ext16u_i64(tcg_rt, tcg_rt); 3727 break; 3728 case MO_32: 3729 tcg_gen_ext32u_i64(tcg_rt, tcg_rt); 3730 break; 3731 case MO_64: 3732 break; 3733 default: 3734 g_assert_not_reached(); 3735 } 3736 } 3737 return true; 3738 } 3739 3740 TRANS_FEAT(LDADD, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_add_i64, 0, false) 3741 TRANS_FEAT(LDCLR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_and_i64, 0, true) 3742 TRANS_FEAT(LDEOR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_xor_i64, 0, false) 3743 TRANS_FEAT(LDSET, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_or_i64, 0, false) 3744 TRANS_FEAT(LDSMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smax_i64, MO_SIGN, false) 3745 TRANS_FEAT(LDSMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smin_i64, MO_SIGN, false) 3746 TRANS_FEAT(LDUMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umax_i64, 0, false) 3747 TRANS_FEAT(LDUMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false) 3748 TRANS_FEAT(SWP, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false) 3749 3750 static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a) 3751 { 3752 bool iss_sf = ldst_iss_sf(a->sz, false, false); 3753 TCGv_i64 clean_addr; 3754 MemOp mop; 3755 3756 if (!dc_isar_feature(aa64_atomics, s) || 3757 !dc_isar_feature(aa64_rcpc_8_3, s)) { 3758 return false; 3759 } 3760 if (a->rn == 31) { 3761 gen_check_sp_alignment(s); 3762 } 3763 mop = check_ordered_align(s, a->rn, 0, false, a->sz); 3764 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false, 3765 a->rn != 31, mop); 3766 /* 3767 * LDAPR* are a special case because they are a simple load, not a 3768 * fetch-and-do-something op. 3769 * The architectural consistency requirements here are weaker than 3770 * full load-acquire (we only need "load-acquire processor consistent"), 3771 * but we choose to implement them as full LDAQ. 3772 */ 3773 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, false, 3774 true, a->rt, iss_sf, true); 3775 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3776 return true; 3777 } 3778 3779 static bool trans_LDRA(DisasContext *s, arg_LDRA *a) 3780 { 3781 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3782 MemOp memop; 3783 3784 /* Load with pointer authentication */ 3785 if (!dc_isar_feature(aa64_pauth, s)) { 3786 return false; 3787 } 3788 3789 if (a->rn == 31) { 3790 gen_check_sp_alignment(s); 3791 } 3792 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3793 3794 if (s->pauth_active) { 3795 if (!a->m) { 3796 gen_helper_autda_combined(dirty_addr, tcg_env, dirty_addr, 3797 tcg_constant_i64(0)); 3798 } else { 3799 gen_helper_autdb_combined(dirty_addr, tcg_env, dirty_addr, 3800 tcg_constant_i64(0)); 3801 } 3802 } 3803 3804 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); 3805 3806 memop = finalize_memop(s, MO_64); 3807 3808 /* Note that "clean" and "dirty" here refer to TBI not PAC. */ 3809 clean_addr = gen_mte_check1(s, dirty_addr, false, 3810 a->w || a->rn != 31, memop); 3811 3812 tcg_rt = cpu_reg(s, a->rt); 3813 do_gpr_ld(s, tcg_rt, clean_addr, memop, 3814 /* extend */ false, /* iss_valid */ !a->w, 3815 /* iss_srt */ a->rt, /* iss_sf */ true, /* iss_ar */ false); 3816 3817 if (a->w) { 3818 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); 3819 } 3820 return true; 3821 } 3822 3823 static bool trans_LDAPR_i(DisasContext *s, arg_ldapr_stlr_i *a) 3824 { 3825 TCGv_i64 clean_addr, dirty_addr; 3826 MemOp mop = a->sz | (a->sign ? MO_SIGN : 0); 3827 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3828 3829 if (!dc_isar_feature(aa64_rcpc_8_4, s)) { 3830 return false; 3831 } 3832 3833 if (a->rn == 31) { 3834 gen_check_sp_alignment(s); 3835 } 3836 3837 mop = check_ordered_align(s, a->rn, a->imm, false, mop); 3838 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3839 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); 3840 clean_addr = clean_data_tbi(s, dirty_addr); 3841 3842 /* 3843 * Load-AcquirePC semantics; we implement as the slightly more 3844 * restrictive Load-Acquire. 3845 */ 3846 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, a->ext, true, 3847 a->rt, iss_sf, true); 3848 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3849 return true; 3850 } 3851 3852 static bool trans_STLR_i(DisasContext *s, arg_ldapr_stlr_i *a) 3853 { 3854 TCGv_i64 clean_addr, dirty_addr; 3855 MemOp mop = a->sz; 3856 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3857 3858 if (!dc_isar_feature(aa64_rcpc_8_4, s)) { 3859 return false; 3860 } 3861 3862 /* TODO: ARMv8.4-LSE SCTLR.nAA */ 3863 3864 if (a->rn == 31) { 3865 gen_check_sp_alignment(s); 3866 } 3867 3868 mop = check_ordered_align(s, a->rn, a->imm, true, mop); 3869 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3870 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); 3871 clean_addr = clean_data_tbi(s, dirty_addr); 3872 3873 /* Store-Release semantics */ 3874 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 3875 do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, mop, true, a->rt, iss_sf, true); 3876 return true; 3877 } 3878 3879 static bool trans_LD_mult(DisasContext *s, arg_ldst_mult *a) 3880 { 3881 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3882 MemOp endian, align, mop; 3883 3884 int total; /* total bytes */ 3885 int elements; /* elements per vector */ 3886 int r; 3887 int size = a->sz; 3888 3889 if (!a->p && a->rm != 0) { 3890 /* For non-postindexed accesses the Rm field must be 0 */ 3891 return false; 3892 } 3893 if (size == 3 && !a->q && a->selem != 1) { 3894 return false; 3895 } 3896 if (!fp_access_check(s)) { 3897 return true; 3898 } 3899 3900 if (a->rn == 31) { 3901 gen_check_sp_alignment(s); 3902 } 3903 3904 /* For our purposes, bytes are always little-endian. */ 3905 endian = s->be_data; 3906 if (size == 0) { 3907 endian = MO_LE; 3908 } 3909 3910 total = a->rpt * a->selem * (a->q ? 16 : 8); 3911 tcg_rn = cpu_reg_sp(s, a->rn); 3912 3913 /* 3914 * Issue the MTE check vs the logical repeat count, before we 3915 * promote consecutive little-endian elements below. 3916 */ 3917 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, total, 3918 finalize_memop_asimd(s, size)); 3919 3920 /* 3921 * Consecutive little-endian elements from a single register 3922 * can be promoted to a larger little-endian operation. 3923 */ 3924 align = MO_ALIGN; 3925 if (a->selem == 1 && endian == MO_LE) { 3926 align = pow2_align(size); 3927 size = 3; 3928 } 3929 if (!s->align_mem) { 3930 align = 0; 3931 } 3932 mop = endian | size | align; 3933 3934 elements = (a->q ? 16 : 8) >> size; 3935 tcg_ebytes = tcg_constant_i64(1 << size); 3936 for (r = 0; r < a->rpt; r++) { 3937 int e; 3938 for (e = 0; e < elements; e++) { 3939 int xs; 3940 for (xs = 0; xs < a->selem; xs++) { 3941 int tt = (a->rt + r + xs) % 32; 3942 do_vec_ld(s, tt, e, clean_addr, mop); 3943 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3944 } 3945 } 3946 } 3947 3948 /* 3949 * For non-quad operations, setting a slice of the low 64 bits of 3950 * the register clears the high 64 bits (in the ARM ARM pseudocode 3951 * this is implicit in the fact that 'rval' is a 64 bit wide 3952 * variable). For quad operations, we might still need to zero 3953 * the high bits of SVE. 3954 */ 3955 for (r = 0; r < a->rpt * a->selem; r++) { 3956 int tt = (a->rt + r) % 32; 3957 clear_vec_high(s, a->q, tt); 3958 } 3959 3960 if (a->p) { 3961 if (a->rm == 31) { 3962 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3963 } else { 3964 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3965 } 3966 } 3967 return true; 3968 } 3969 3970 static bool trans_ST_mult(DisasContext *s, arg_ldst_mult *a) 3971 { 3972 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3973 MemOp endian, align, mop; 3974 3975 int total; /* total bytes */ 3976 int elements; /* elements per vector */ 3977 int r; 3978 int size = a->sz; 3979 3980 if (!a->p && a->rm != 0) { 3981 /* For non-postindexed accesses the Rm field must be 0 */ 3982 return false; 3983 } 3984 if (size == 3 && !a->q && a->selem != 1) { 3985 return false; 3986 } 3987 if (!fp_access_check(s)) { 3988 return true; 3989 } 3990 3991 if (a->rn == 31) { 3992 gen_check_sp_alignment(s); 3993 } 3994 3995 /* For our purposes, bytes are always little-endian. */ 3996 endian = s->be_data; 3997 if (size == 0) { 3998 endian = MO_LE; 3999 } 4000 4001 total = a->rpt * a->selem * (a->q ? 16 : 8); 4002 tcg_rn = cpu_reg_sp(s, a->rn); 4003 4004 /* 4005 * Issue the MTE check vs the logical repeat count, before we 4006 * promote consecutive little-endian elements below. 4007 */ 4008 clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, total, 4009 finalize_memop_asimd(s, size)); 4010 4011 /* 4012 * Consecutive little-endian elements from a single register 4013 * can be promoted to a larger little-endian operation. 4014 */ 4015 align = MO_ALIGN; 4016 if (a->selem == 1 && endian == MO_LE) { 4017 align = pow2_align(size); 4018 size = 3; 4019 } 4020 if (!s->align_mem) { 4021 align = 0; 4022 } 4023 mop = endian | size | align; 4024 4025 elements = (a->q ? 16 : 8) >> size; 4026 tcg_ebytes = tcg_constant_i64(1 << size); 4027 for (r = 0; r < a->rpt; r++) { 4028 int e; 4029 for (e = 0; e < elements; e++) { 4030 int xs; 4031 for (xs = 0; xs < a->selem; xs++) { 4032 int tt = (a->rt + r + xs) % 32; 4033 do_vec_st(s, tt, e, clean_addr, mop); 4034 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 4035 } 4036 } 4037 } 4038 4039 if (a->p) { 4040 if (a->rm == 31) { 4041 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 4042 } else { 4043 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 4044 } 4045 } 4046 return true; 4047 } 4048 4049 static bool trans_ST_single(DisasContext *s, arg_ldst_single *a) 4050 { 4051 int xs, total, rt; 4052 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 4053 MemOp mop; 4054 4055 if (!a->p && a->rm != 0) { 4056 return false; 4057 } 4058 if (!fp_access_check(s)) { 4059 return true; 4060 } 4061 4062 if (a->rn == 31) { 4063 gen_check_sp_alignment(s); 4064 } 4065 4066 total = a->selem << a->scale; 4067 tcg_rn = cpu_reg_sp(s, a->rn); 4068 4069 mop = finalize_memop_asimd(s, a->scale); 4070 clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, 4071 total, mop); 4072 4073 tcg_ebytes = tcg_constant_i64(1 << a->scale); 4074 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { 4075 do_vec_st(s, rt, a->index, clean_addr, mop); 4076 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 4077 } 4078 4079 if (a->p) { 4080 if (a->rm == 31) { 4081 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 4082 } else { 4083 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 4084 } 4085 } 4086 return true; 4087 } 4088 4089 static bool trans_LD_single(DisasContext *s, arg_ldst_single *a) 4090 { 4091 int xs, total, rt; 4092 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 4093 MemOp mop; 4094 4095 if (!a->p && a->rm != 0) { 4096 return false; 4097 } 4098 if (!fp_access_check(s)) { 4099 return true; 4100 } 4101 4102 if (a->rn == 31) { 4103 gen_check_sp_alignment(s); 4104 } 4105 4106 total = a->selem << a->scale; 4107 tcg_rn = cpu_reg_sp(s, a->rn); 4108 4109 mop = finalize_memop_asimd(s, a->scale); 4110 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, 4111 total, mop); 4112 4113 tcg_ebytes = tcg_constant_i64(1 << a->scale); 4114 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { 4115 do_vec_ld(s, rt, a->index, clean_addr, mop); 4116 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 4117 } 4118 4119 if (a->p) { 4120 if (a->rm == 31) { 4121 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 4122 } else { 4123 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 4124 } 4125 } 4126 return true; 4127 } 4128 4129 static bool trans_LD_single_repl(DisasContext *s, arg_LD_single_repl *a) 4130 { 4131 int xs, total, rt; 4132 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 4133 MemOp mop; 4134 4135 if (!a->p && a->rm != 0) { 4136 return false; 4137 } 4138 if (!fp_access_check(s)) { 4139 return true; 4140 } 4141 4142 if (a->rn == 31) { 4143 gen_check_sp_alignment(s); 4144 } 4145 4146 total = a->selem << a->scale; 4147 tcg_rn = cpu_reg_sp(s, a->rn); 4148 4149 mop = finalize_memop_asimd(s, a->scale); 4150 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, 4151 total, mop); 4152 4153 tcg_ebytes = tcg_constant_i64(1 << a->scale); 4154 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { 4155 /* Load and replicate to all elements */ 4156 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 4157 4158 tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop); 4159 tcg_gen_gvec_dup_i64(a->scale, vec_full_reg_offset(s, rt), 4160 (a->q + 1) * 8, vec_full_reg_size(s), tcg_tmp); 4161 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 4162 } 4163 4164 if (a->p) { 4165 if (a->rm == 31) { 4166 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 4167 } else { 4168 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 4169 } 4170 } 4171 return true; 4172 } 4173 4174 static bool trans_STZGM(DisasContext *s, arg_ldst_tag *a) 4175 { 4176 TCGv_i64 addr, clean_addr, tcg_rt; 4177 int size = 4 << s->dcz_blocksize; 4178 4179 if (!dc_isar_feature(aa64_mte, s)) { 4180 return false; 4181 } 4182 if (s->current_el == 0) { 4183 return false; 4184 } 4185 4186 if (a->rn == 31) { 4187 gen_check_sp_alignment(s); 4188 } 4189 4190 addr = read_cpu_reg_sp(s, a->rn, true); 4191 tcg_gen_addi_i64(addr, addr, a->imm); 4192 tcg_rt = cpu_reg(s, a->rt); 4193 4194 if (s->ata[0]) { 4195 gen_helper_stzgm_tags(tcg_env, addr, tcg_rt); 4196 } 4197 /* 4198 * The non-tags portion of STZGM is mostly like DC_ZVA, 4199 * except the alignment happens before the access. 4200 */ 4201 clean_addr = clean_data_tbi(s, addr); 4202 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 4203 gen_helper_dc_zva(tcg_env, clean_addr); 4204 return true; 4205 } 4206 4207 static bool trans_STGM(DisasContext *s, arg_ldst_tag *a) 4208 { 4209 TCGv_i64 addr, clean_addr, tcg_rt; 4210 4211 if (!dc_isar_feature(aa64_mte, s)) { 4212 return false; 4213 } 4214 if (s->current_el == 0) { 4215 return false; 4216 } 4217 4218 if (a->rn == 31) { 4219 gen_check_sp_alignment(s); 4220 } 4221 4222 addr = read_cpu_reg_sp(s, a->rn, true); 4223 tcg_gen_addi_i64(addr, addr, a->imm); 4224 tcg_rt = cpu_reg(s, a->rt); 4225 4226 if (s->ata[0]) { 4227 gen_helper_stgm(tcg_env, addr, tcg_rt); 4228 } else { 4229 MMUAccessType acc = MMU_DATA_STORE; 4230 int size = 4 << s->gm_blocksize; 4231 4232 clean_addr = clean_data_tbi(s, addr); 4233 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 4234 gen_probe_access(s, clean_addr, acc, size); 4235 } 4236 return true; 4237 } 4238 4239 static bool trans_LDGM(DisasContext *s, arg_ldst_tag *a) 4240 { 4241 TCGv_i64 addr, clean_addr, tcg_rt; 4242 4243 if (!dc_isar_feature(aa64_mte, s)) { 4244 return false; 4245 } 4246 if (s->current_el == 0) { 4247 return false; 4248 } 4249 4250 if (a->rn == 31) { 4251 gen_check_sp_alignment(s); 4252 } 4253 4254 addr = read_cpu_reg_sp(s, a->rn, true); 4255 tcg_gen_addi_i64(addr, addr, a->imm); 4256 tcg_rt = cpu_reg(s, a->rt); 4257 4258 if (s->ata[0]) { 4259 gen_helper_ldgm(tcg_rt, tcg_env, addr); 4260 } else { 4261 MMUAccessType acc = MMU_DATA_LOAD; 4262 int size = 4 << s->gm_blocksize; 4263 4264 clean_addr = clean_data_tbi(s, addr); 4265 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 4266 gen_probe_access(s, clean_addr, acc, size); 4267 /* The result tags are zeros. */ 4268 tcg_gen_movi_i64(tcg_rt, 0); 4269 } 4270 return true; 4271 } 4272 4273 static bool trans_LDG(DisasContext *s, arg_ldst_tag *a) 4274 { 4275 TCGv_i64 addr, clean_addr, tcg_rt; 4276 4277 if (!dc_isar_feature(aa64_mte_insn_reg, s)) { 4278 return false; 4279 } 4280 4281 if (a->rn == 31) { 4282 gen_check_sp_alignment(s); 4283 } 4284 4285 addr = read_cpu_reg_sp(s, a->rn, true); 4286 if (!a->p) { 4287 /* pre-index or signed offset */ 4288 tcg_gen_addi_i64(addr, addr, a->imm); 4289 } 4290 4291 tcg_gen_andi_i64(addr, addr, -TAG_GRANULE); 4292 tcg_rt = cpu_reg(s, a->rt); 4293 if (s->ata[0]) { 4294 gen_helper_ldg(tcg_rt, tcg_env, addr, tcg_rt); 4295 } else { 4296 /* 4297 * Tag access disabled: we must check for aborts on the load 4298 * load from [rn+offset], and then insert a 0 tag into rt. 4299 */ 4300 clean_addr = clean_data_tbi(s, addr); 4301 gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8); 4302 gen_address_with_allocation_tag0(tcg_rt, tcg_rt); 4303 } 4304 4305 if (a->w) { 4306 /* pre-index or post-index */ 4307 if (a->p) { 4308 /* post-index */ 4309 tcg_gen_addi_i64(addr, addr, a->imm); 4310 } 4311 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr); 4312 } 4313 return true; 4314 } 4315 4316 static bool do_STG(DisasContext *s, arg_ldst_tag *a, bool is_zero, bool is_pair) 4317 { 4318 TCGv_i64 addr, tcg_rt; 4319 4320 if (a->rn == 31) { 4321 gen_check_sp_alignment(s); 4322 } 4323 4324 addr = read_cpu_reg_sp(s, a->rn, true); 4325 if (!a->p) { 4326 /* pre-index or signed offset */ 4327 tcg_gen_addi_i64(addr, addr, a->imm); 4328 } 4329 tcg_rt = cpu_reg_sp(s, a->rt); 4330 if (!s->ata[0]) { 4331 /* 4332 * For STG and ST2G, we need to check alignment and probe memory. 4333 * TODO: For STZG and STZ2G, we could rely on the stores below, 4334 * at least for system mode; user-only won't enforce alignment. 4335 */ 4336 if (is_pair) { 4337 gen_helper_st2g_stub(tcg_env, addr); 4338 } else { 4339 gen_helper_stg_stub(tcg_env, addr); 4340 } 4341 } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { 4342 if (is_pair) { 4343 gen_helper_st2g_parallel(tcg_env, addr, tcg_rt); 4344 } else { 4345 gen_helper_stg_parallel(tcg_env, addr, tcg_rt); 4346 } 4347 } else { 4348 if (is_pair) { 4349 gen_helper_st2g(tcg_env, addr, tcg_rt); 4350 } else { 4351 gen_helper_stg(tcg_env, addr, tcg_rt); 4352 } 4353 } 4354 4355 if (is_zero) { 4356 TCGv_i64 clean_addr = clean_data_tbi(s, addr); 4357 TCGv_i64 zero64 = tcg_constant_i64(0); 4358 TCGv_i128 zero128 = tcg_temp_new_i128(); 4359 int mem_index = get_mem_index(s); 4360 MemOp mop = finalize_memop(s, MO_128 | MO_ALIGN); 4361 4362 tcg_gen_concat_i64_i128(zero128, zero64, zero64); 4363 4364 /* This is 1 or 2 atomic 16-byte operations. */ 4365 tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop); 4366 if (is_pair) { 4367 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4368 tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop); 4369 } 4370 } 4371 4372 if (a->w) { 4373 /* pre-index or post-index */ 4374 if (a->p) { 4375 /* post-index */ 4376 tcg_gen_addi_i64(addr, addr, a->imm); 4377 } 4378 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr); 4379 } 4380 return true; 4381 } 4382 4383 TRANS_FEAT(STG, aa64_mte_insn_reg, do_STG, a, false, false) 4384 TRANS_FEAT(STZG, aa64_mte_insn_reg, do_STG, a, true, false) 4385 TRANS_FEAT(ST2G, aa64_mte_insn_reg, do_STG, a, false, true) 4386 TRANS_FEAT(STZ2G, aa64_mte_insn_reg, do_STG, a, true, true) 4387 4388 typedef void SetFn(TCGv_env, TCGv_i32, TCGv_i32); 4389 4390 static bool do_SET(DisasContext *s, arg_set *a, bool is_epilogue, 4391 bool is_setg, SetFn fn) 4392 { 4393 int memidx; 4394 uint32_t syndrome, desc = 0; 4395 4396 if (is_setg && !dc_isar_feature(aa64_mte, s)) { 4397 return false; 4398 } 4399 4400 /* 4401 * UNPREDICTABLE cases: we choose to UNDEF, which allows 4402 * us to pull this check before the CheckMOPSEnabled() test 4403 * (which we do in the helper function) 4404 */ 4405 if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd || 4406 a->rd == 31 || a->rn == 31) { 4407 return false; 4408 } 4409 4410 memidx = get_a64_user_mem_index(s, a->unpriv); 4411 4412 /* 4413 * We pass option_a == true, matching our implementation; 4414 * we pass wrong_option == false: helper function may set that bit. 4415 */ 4416 syndrome = syn_mop(true, is_setg, (a->nontemp << 1) | a->unpriv, 4417 is_epilogue, false, true, a->rd, a->rs, a->rn); 4418 4419 if (is_setg ? s->ata[a->unpriv] : s->mte_active[a->unpriv]) { 4420 /* We may need to do MTE tag checking, so assemble the descriptor */ 4421 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 4422 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 4423 desc = FIELD_DP32(desc, MTEDESC, WRITE, true); 4424 /* SIZEM1 and ALIGN we leave 0 (byte write) */ 4425 } 4426 /* The helper function always needs the memidx even with MTE disabled */ 4427 desc = FIELD_DP32(desc, MTEDESC, MIDX, memidx); 4428 4429 /* 4430 * The helper needs the register numbers, but since they're in 4431 * the syndrome anyway, we let it extract them from there rather 4432 * than passing in an extra three integer arguments. 4433 */ 4434 fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(desc)); 4435 return true; 4436 } 4437 4438 TRANS_FEAT(SETP, aa64_mops, do_SET, a, false, false, gen_helper_setp) 4439 TRANS_FEAT(SETM, aa64_mops, do_SET, a, false, false, gen_helper_setm) 4440 TRANS_FEAT(SETE, aa64_mops, do_SET, a, true, false, gen_helper_sete) 4441 TRANS_FEAT(SETGP, aa64_mops, do_SET, a, false, true, gen_helper_setgp) 4442 TRANS_FEAT(SETGM, aa64_mops, do_SET, a, false, true, gen_helper_setgm) 4443 TRANS_FEAT(SETGE, aa64_mops, do_SET, a, true, true, gen_helper_setge) 4444 4445 typedef void CpyFn(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32); 4446 4447 static bool do_CPY(DisasContext *s, arg_cpy *a, bool is_epilogue, CpyFn fn) 4448 { 4449 int rmemidx, wmemidx; 4450 uint32_t syndrome, rdesc = 0, wdesc = 0; 4451 bool wunpriv = extract32(a->options, 0, 1); 4452 bool runpriv = extract32(a->options, 1, 1); 4453 4454 /* 4455 * UNPREDICTABLE cases: we choose to UNDEF, which allows 4456 * us to pull this check before the CheckMOPSEnabled() test 4457 * (which we do in the helper function) 4458 */ 4459 if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd || 4460 a->rd == 31 || a->rs == 31 || a->rn == 31) { 4461 return false; 4462 } 4463 4464 rmemidx = get_a64_user_mem_index(s, runpriv); 4465 wmemidx = get_a64_user_mem_index(s, wunpriv); 4466 4467 /* 4468 * We pass option_a == true, matching our implementation; 4469 * we pass wrong_option == false: helper function may set that bit. 4470 */ 4471 syndrome = syn_mop(false, false, a->options, is_epilogue, 4472 false, true, a->rd, a->rs, a->rn); 4473 4474 /* If we need to do MTE tag checking, assemble the descriptors */ 4475 if (s->mte_active[runpriv]) { 4476 rdesc = FIELD_DP32(rdesc, MTEDESC, TBI, s->tbid); 4477 rdesc = FIELD_DP32(rdesc, MTEDESC, TCMA, s->tcma); 4478 } 4479 if (s->mte_active[wunpriv]) { 4480 wdesc = FIELD_DP32(wdesc, MTEDESC, TBI, s->tbid); 4481 wdesc = FIELD_DP32(wdesc, MTEDESC, TCMA, s->tcma); 4482 wdesc = FIELD_DP32(wdesc, MTEDESC, WRITE, true); 4483 } 4484 /* The helper function needs these parts of the descriptor regardless */ 4485 rdesc = FIELD_DP32(rdesc, MTEDESC, MIDX, rmemidx); 4486 wdesc = FIELD_DP32(wdesc, MTEDESC, MIDX, wmemidx); 4487 4488 /* 4489 * The helper needs the register numbers, but since they're in 4490 * the syndrome anyway, we let it extract them from there rather 4491 * than passing in an extra three integer arguments. 4492 */ 4493 fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(wdesc), 4494 tcg_constant_i32(rdesc)); 4495 return true; 4496 } 4497 4498 TRANS_FEAT(CPYP, aa64_mops, do_CPY, a, false, gen_helper_cpyp) 4499 TRANS_FEAT(CPYM, aa64_mops, do_CPY, a, false, gen_helper_cpym) 4500 TRANS_FEAT(CPYE, aa64_mops, do_CPY, a, true, gen_helper_cpye) 4501 TRANS_FEAT(CPYFP, aa64_mops, do_CPY, a, false, gen_helper_cpyfp) 4502 TRANS_FEAT(CPYFM, aa64_mops, do_CPY, a, false, gen_helper_cpyfm) 4503 TRANS_FEAT(CPYFE, aa64_mops, do_CPY, a, true, gen_helper_cpyfe) 4504 4505 typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64); 4506 4507 static bool gen_rri(DisasContext *s, arg_rri_sf *a, 4508 bool rd_sp, bool rn_sp, ArithTwoOp *fn) 4509 { 4510 TCGv_i64 tcg_rn = rn_sp ? cpu_reg_sp(s, a->rn) : cpu_reg(s, a->rn); 4511 TCGv_i64 tcg_rd = rd_sp ? cpu_reg_sp(s, a->rd) : cpu_reg(s, a->rd); 4512 TCGv_i64 tcg_imm = tcg_constant_i64(a->imm); 4513 4514 fn(tcg_rd, tcg_rn, tcg_imm); 4515 if (!a->sf) { 4516 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4517 } 4518 return true; 4519 } 4520 4521 /* 4522 * PC-rel. addressing 4523 */ 4524 4525 static bool trans_ADR(DisasContext *s, arg_ri *a) 4526 { 4527 gen_pc_plus_diff(s, cpu_reg(s, a->rd), a->imm); 4528 return true; 4529 } 4530 4531 static bool trans_ADRP(DisasContext *s, arg_ri *a) 4532 { 4533 int64_t offset = (int64_t)a->imm << 12; 4534 4535 /* The page offset is ok for CF_PCREL. */ 4536 offset -= s->pc_curr & 0xfff; 4537 gen_pc_plus_diff(s, cpu_reg(s, a->rd), offset); 4538 return true; 4539 } 4540 4541 /* 4542 * Add/subtract (immediate) 4543 */ 4544 TRANS(ADD_i, gen_rri, a, 1, 1, tcg_gen_add_i64) 4545 TRANS(SUB_i, gen_rri, a, 1, 1, tcg_gen_sub_i64) 4546 TRANS(ADDS_i, gen_rri, a, 0, 1, a->sf ? gen_add64_CC : gen_add32_CC) 4547 TRANS(SUBS_i, gen_rri, a, 0, 1, a->sf ? gen_sub64_CC : gen_sub32_CC) 4548 4549 /* 4550 * Add/subtract (immediate, with tags) 4551 */ 4552 4553 static bool gen_add_sub_imm_with_tags(DisasContext *s, arg_rri_tag *a, 4554 bool sub_op) 4555 { 4556 TCGv_i64 tcg_rn, tcg_rd; 4557 int imm; 4558 4559 imm = a->uimm6 << LOG2_TAG_GRANULE; 4560 if (sub_op) { 4561 imm = -imm; 4562 } 4563 4564 tcg_rn = cpu_reg_sp(s, a->rn); 4565 tcg_rd = cpu_reg_sp(s, a->rd); 4566 4567 if (s->ata[0]) { 4568 gen_helper_addsubg(tcg_rd, tcg_env, tcg_rn, 4569 tcg_constant_i32(imm), 4570 tcg_constant_i32(a->uimm4)); 4571 } else { 4572 tcg_gen_addi_i64(tcg_rd, tcg_rn, imm); 4573 gen_address_with_allocation_tag0(tcg_rd, tcg_rd); 4574 } 4575 return true; 4576 } 4577 4578 TRANS_FEAT(ADDG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, false) 4579 TRANS_FEAT(SUBG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, true) 4580 4581 /* The input should be a value in the bottom e bits (with higher 4582 * bits zero); returns that value replicated into every element 4583 * of size e in a 64 bit integer. 4584 */ 4585 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e) 4586 { 4587 assert(e != 0); 4588 while (e < 64) { 4589 mask |= mask << e; 4590 e *= 2; 4591 } 4592 return mask; 4593 } 4594 4595 /* 4596 * Logical (immediate) 4597 */ 4598 4599 /* 4600 * Simplified variant of pseudocode DecodeBitMasks() for the case where we 4601 * only require the wmask. Returns false if the imms/immr/immn are a reserved 4602 * value (ie should cause a guest UNDEF exception), and true if they are 4603 * valid, in which case the decoded bit pattern is written to result. 4604 */ 4605 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn, 4606 unsigned int imms, unsigned int immr) 4607 { 4608 uint64_t mask; 4609 unsigned e, levels, s, r; 4610 int len; 4611 4612 assert(immn < 2 && imms < 64 && immr < 64); 4613 4614 /* The bit patterns we create here are 64 bit patterns which 4615 * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or 4616 * 64 bits each. Each element contains the same value: a run 4617 * of between 1 and e-1 non-zero bits, rotated within the 4618 * element by between 0 and e-1 bits. 4619 * 4620 * The element size and run length are encoded into immn (1 bit) 4621 * and imms (6 bits) as follows: 4622 * 64 bit elements: immn = 1, imms = <length of run - 1> 4623 * 32 bit elements: immn = 0, imms = 0 : <length of run - 1> 4624 * 16 bit elements: immn = 0, imms = 10 : <length of run - 1> 4625 * 8 bit elements: immn = 0, imms = 110 : <length of run - 1> 4626 * 4 bit elements: immn = 0, imms = 1110 : <length of run - 1> 4627 * 2 bit elements: immn = 0, imms = 11110 : <length of run - 1> 4628 * Notice that immn = 0, imms = 11111x is the only combination 4629 * not covered by one of the above options; this is reserved. 4630 * Further, <length of run - 1> all-ones is a reserved pattern. 4631 * 4632 * In all cases the rotation is by immr % e (and immr is 6 bits). 4633 */ 4634 4635 /* First determine the element size */ 4636 len = 31 - clz32((immn << 6) | (~imms & 0x3f)); 4637 if (len < 1) { 4638 /* This is the immn == 0, imms == 0x11111x case */ 4639 return false; 4640 } 4641 e = 1 << len; 4642 4643 levels = e - 1; 4644 s = imms & levels; 4645 r = immr & levels; 4646 4647 if (s == levels) { 4648 /* <length of run - 1> mustn't be all-ones. */ 4649 return false; 4650 } 4651 4652 /* Create the value of one element: s+1 set bits rotated 4653 * by r within the element (which is e bits wide)... 4654 */ 4655 mask = MAKE_64BIT_MASK(0, s + 1); 4656 if (r) { 4657 mask = (mask >> r) | (mask << (e - r)); 4658 mask &= MAKE_64BIT_MASK(0, e); 4659 } 4660 /* ...then replicate the element over the whole 64 bit value */ 4661 mask = bitfield_replicate(mask, e); 4662 *result = mask; 4663 return true; 4664 } 4665 4666 static bool gen_rri_log(DisasContext *s, arg_rri_log *a, bool set_cc, 4667 void (*fn)(TCGv_i64, TCGv_i64, int64_t)) 4668 { 4669 TCGv_i64 tcg_rd, tcg_rn; 4670 uint64_t imm; 4671 4672 /* Some immediate field values are reserved. */ 4673 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1), 4674 extract32(a->dbm, 0, 6), 4675 extract32(a->dbm, 6, 6))) { 4676 return false; 4677 } 4678 if (!a->sf) { 4679 imm &= 0xffffffffull; 4680 } 4681 4682 tcg_rd = set_cc ? cpu_reg(s, a->rd) : cpu_reg_sp(s, a->rd); 4683 tcg_rn = cpu_reg(s, a->rn); 4684 4685 fn(tcg_rd, tcg_rn, imm); 4686 if (set_cc) { 4687 gen_logic_CC(a->sf, tcg_rd); 4688 } 4689 if (!a->sf) { 4690 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4691 } 4692 return true; 4693 } 4694 4695 TRANS(AND_i, gen_rri_log, a, false, tcg_gen_andi_i64) 4696 TRANS(ORR_i, gen_rri_log, a, false, tcg_gen_ori_i64) 4697 TRANS(EOR_i, gen_rri_log, a, false, tcg_gen_xori_i64) 4698 TRANS(ANDS_i, gen_rri_log, a, true, tcg_gen_andi_i64) 4699 4700 /* 4701 * Move wide (immediate) 4702 */ 4703 4704 static bool trans_MOVZ(DisasContext *s, arg_movw *a) 4705 { 4706 int pos = a->hw << 4; 4707 tcg_gen_movi_i64(cpu_reg(s, a->rd), (uint64_t)a->imm << pos); 4708 return true; 4709 } 4710 4711 static bool trans_MOVN(DisasContext *s, arg_movw *a) 4712 { 4713 int pos = a->hw << 4; 4714 uint64_t imm = a->imm; 4715 4716 imm = ~(imm << pos); 4717 if (!a->sf) { 4718 imm = (uint32_t)imm; 4719 } 4720 tcg_gen_movi_i64(cpu_reg(s, a->rd), imm); 4721 return true; 4722 } 4723 4724 static bool trans_MOVK(DisasContext *s, arg_movw *a) 4725 { 4726 int pos = a->hw << 4; 4727 TCGv_i64 tcg_rd, tcg_im; 4728 4729 tcg_rd = cpu_reg(s, a->rd); 4730 tcg_im = tcg_constant_i64(a->imm); 4731 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_im, pos, 16); 4732 if (!a->sf) { 4733 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4734 } 4735 return true; 4736 } 4737 4738 /* 4739 * Bitfield 4740 */ 4741 4742 static bool trans_SBFM(DisasContext *s, arg_SBFM *a) 4743 { 4744 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4745 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4746 unsigned int bitsize = a->sf ? 64 : 32; 4747 unsigned int ri = a->immr; 4748 unsigned int si = a->imms; 4749 unsigned int pos, len; 4750 4751 if (si >= ri) { 4752 /* Wd<s-r:0> = Wn<s:r> */ 4753 len = (si - ri) + 1; 4754 tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len); 4755 if (!a->sf) { 4756 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4757 } 4758 } else { 4759 /* Wd<32+s-r,32-r> = Wn<s:0> */ 4760 len = si + 1; 4761 pos = (bitsize - ri) & (bitsize - 1); 4762 4763 if (len < ri) { 4764 /* 4765 * Sign extend the destination field from len to fill the 4766 * balance of the word. Let the deposit below insert all 4767 * of those sign bits. 4768 */ 4769 tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len); 4770 len = ri; 4771 } 4772 4773 /* 4774 * We start with zero, and we haven't modified any bits outside 4775 * bitsize, therefore no final zero-extension is unneeded for !sf. 4776 */ 4777 tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len); 4778 } 4779 return true; 4780 } 4781 4782 static bool trans_UBFM(DisasContext *s, arg_UBFM *a) 4783 { 4784 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4785 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4786 unsigned int bitsize = a->sf ? 64 : 32; 4787 unsigned int ri = a->immr; 4788 unsigned int si = a->imms; 4789 unsigned int pos, len; 4790 4791 tcg_rd = cpu_reg(s, a->rd); 4792 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4793 4794 if (si >= ri) { 4795 /* Wd<s-r:0> = Wn<s:r> */ 4796 len = (si - ri) + 1; 4797 tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len); 4798 } else { 4799 /* Wd<32+s-r,32-r> = Wn<s:0> */ 4800 len = si + 1; 4801 pos = (bitsize - ri) & (bitsize - 1); 4802 tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len); 4803 } 4804 return true; 4805 } 4806 4807 static bool trans_BFM(DisasContext *s, arg_BFM *a) 4808 { 4809 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4810 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4811 unsigned int bitsize = a->sf ? 64 : 32; 4812 unsigned int ri = a->immr; 4813 unsigned int si = a->imms; 4814 unsigned int pos, len; 4815 4816 tcg_rd = cpu_reg(s, a->rd); 4817 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4818 4819 if (si >= ri) { 4820 /* Wd<s-r:0> = Wn<s:r> */ 4821 tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri); 4822 len = (si - ri) + 1; 4823 pos = 0; 4824 } else { 4825 /* Wd<32+s-r,32-r> = Wn<s:0> */ 4826 len = si + 1; 4827 pos = (bitsize - ri) & (bitsize - 1); 4828 } 4829 4830 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len); 4831 if (!a->sf) { 4832 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4833 } 4834 return true; 4835 } 4836 4837 static bool trans_EXTR(DisasContext *s, arg_extract *a) 4838 { 4839 TCGv_i64 tcg_rd, tcg_rm, tcg_rn; 4840 4841 tcg_rd = cpu_reg(s, a->rd); 4842 4843 if (unlikely(a->imm == 0)) { 4844 /* 4845 * tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts, 4846 * so an extract from bit 0 is a special case. 4847 */ 4848 if (a->sf) { 4849 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, a->rm)); 4850 } else { 4851 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, a->rm)); 4852 } 4853 } else { 4854 tcg_rm = cpu_reg(s, a->rm); 4855 tcg_rn = cpu_reg(s, a->rn); 4856 4857 if (a->sf) { 4858 /* Specialization to ROR happens in EXTRACT2. */ 4859 tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, a->imm); 4860 } else { 4861 TCGv_i32 t0 = tcg_temp_new_i32(); 4862 4863 tcg_gen_extrl_i64_i32(t0, tcg_rm); 4864 if (a->rm == a->rn) { 4865 tcg_gen_rotri_i32(t0, t0, a->imm); 4866 } else { 4867 TCGv_i32 t1 = tcg_temp_new_i32(); 4868 tcg_gen_extrl_i64_i32(t1, tcg_rn); 4869 tcg_gen_extract2_i32(t0, t0, t1, a->imm); 4870 } 4871 tcg_gen_extu_i32_i64(tcg_rd, t0); 4872 } 4873 } 4874 return true; 4875 } 4876 4877 static bool trans_TBL_TBX(DisasContext *s, arg_TBL_TBX *a) 4878 { 4879 if (fp_access_check(s)) { 4880 int len = (a->len + 1) * 16; 4881 4882 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd), 4883 vec_full_reg_offset(s, a->rm), tcg_env, 4884 a->q ? 16 : 8, vec_full_reg_size(s), 4885 (len << 6) | (a->tbx << 5) | a->rn, 4886 gen_helper_simd_tblx); 4887 } 4888 return true; 4889 } 4890 4891 typedef int simd_permute_idx_fn(int i, int part, int elements); 4892 4893 static bool do_simd_permute(DisasContext *s, arg_qrrr_e *a, 4894 simd_permute_idx_fn *fn, int part) 4895 { 4896 MemOp esz = a->esz; 4897 int datasize = a->q ? 16 : 8; 4898 int elements = datasize >> esz; 4899 TCGv_i64 tcg_res[2], tcg_ele; 4900 4901 if (esz == MO_64 && !a->q) { 4902 return false; 4903 } 4904 if (!fp_access_check(s)) { 4905 return true; 4906 } 4907 4908 tcg_res[0] = tcg_temp_new_i64(); 4909 tcg_res[1] = a->q ? tcg_temp_new_i64() : NULL; 4910 tcg_ele = tcg_temp_new_i64(); 4911 4912 for (int i = 0; i < elements; i++) { 4913 int o, w, idx; 4914 4915 idx = fn(i, part, elements); 4916 read_vec_element(s, tcg_ele, (idx & elements ? a->rm : a->rn), 4917 idx & (elements - 1), esz); 4918 4919 w = (i << (esz + 3)) / 64; 4920 o = (i << (esz + 3)) % 64; 4921 if (o == 0) { 4922 tcg_gen_mov_i64(tcg_res[w], tcg_ele); 4923 } else { 4924 tcg_gen_deposit_i64(tcg_res[w], tcg_res[w], tcg_ele, o, 8 << esz); 4925 } 4926 } 4927 4928 for (int i = a->q; i >= 0; --i) { 4929 write_vec_element(s, tcg_res[i], a->rd, i, MO_64); 4930 } 4931 clear_vec_high(s, a->q, a->rd); 4932 return true; 4933 } 4934 4935 static int permute_load_uzp(int i, int part, int elements) 4936 { 4937 return 2 * i + part; 4938 } 4939 4940 TRANS(UZP1, do_simd_permute, a, permute_load_uzp, 0) 4941 TRANS(UZP2, do_simd_permute, a, permute_load_uzp, 1) 4942 4943 static int permute_load_trn(int i, int part, int elements) 4944 { 4945 return (i & 1) * elements + (i & ~1) + part; 4946 } 4947 4948 TRANS(TRN1, do_simd_permute, a, permute_load_trn, 0) 4949 TRANS(TRN2, do_simd_permute, a, permute_load_trn, 1) 4950 4951 static int permute_load_zip(int i, int part, int elements) 4952 { 4953 return (i & 1) * elements + ((part * elements + i) >> 1); 4954 } 4955 4956 TRANS(ZIP1, do_simd_permute, a, permute_load_zip, 0) 4957 TRANS(ZIP2, do_simd_permute, a, permute_load_zip, 1) 4958 4959 /* 4960 * Cryptographic AES, SHA, SHA512 4961 */ 4962 4963 TRANS_FEAT(AESE, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aese) 4964 TRANS_FEAT(AESD, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aesd) 4965 TRANS_FEAT(AESMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesmc) 4966 TRANS_FEAT(AESIMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesimc) 4967 4968 TRANS_FEAT(SHA1C, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1c) 4969 TRANS_FEAT(SHA1P, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1p) 4970 TRANS_FEAT(SHA1M, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1m) 4971 TRANS_FEAT(SHA1SU0, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1su0) 4972 4973 TRANS_FEAT(SHA256H, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h) 4974 TRANS_FEAT(SHA256H2, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h2) 4975 TRANS_FEAT(SHA256SU1, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256su1) 4976 4977 TRANS_FEAT(SHA1H, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1h) 4978 TRANS_FEAT(SHA1SU1, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1su1) 4979 TRANS_FEAT(SHA256SU0, aa64_sha256, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha256su0) 4980 4981 TRANS_FEAT(SHA512H, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h) 4982 TRANS_FEAT(SHA512H2, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h2) 4983 TRANS_FEAT(SHA512SU1, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512su1) 4984 TRANS_FEAT(RAX1, aa64_sha3, do_gvec_fn3, a, gen_gvec_rax1) 4985 TRANS_FEAT(SM3PARTW1, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw1) 4986 TRANS_FEAT(SM3PARTW2, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw2) 4987 TRANS_FEAT(SM4EKEY, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4ekey) 4988 4989 TRANS_FEAT(SHA512SU0, aa64_sha512, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha512su0) 4990 TRANS_FEAT(SM4E, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4e) 4991 4992 TRANS_FEAT(EOR3, aa64_sha3, do_gvec_fn4, a, gen_gvec_eor3) 4993 TRANS_FEAT(BCAX, aa64_sha3, do_gvec_fn4, a, gen_gvec_bcax) 4994 4995 static bool trans_SM3SS1(DisasContext *s, arg_SM3SS1 *a) 4996 { 4997 if (!dc_isar_feature(aa64_sm3, s)) { 4998 return false; 4999 } 5000 if (fp_access_check(s)) { 5001 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 5002 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 5003 TCGv_i32 tcg_op3 = tcg_temp_new_i32(); 5004 TCGv_i32 tcg_res = tcg_temp_new_i32(); 5005 5006 read_vec_element_i32(s, tcg_op1, a->rn, 3, MO_32); 5007 read_vec_element_i32(s, tcg_op2, a->rm, 3, MO_32); 5008 read_vec_element_i32(s, tcg_op3, a->ra, 3, MO_32); 5009 5010 tcg_gen_rotri_i32(tcg_res, tcg_op1, 20); 5011 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2); 5012 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3); 5013 tcg_gen_rotri_i32(tcg_res, tcg_res, 25); 5014 5015 /* Clear the whole register first, then store bits [127:96]. */ 5016 clear_vec(s, a->rd); 5017 write_vec_element_i32(s, tcg_res, a->rd, 3, MO_32); 5018 } 5019 return true; 5020 } 5021 5022 static bool do_crypto3i(DisasContext *s, arg_crypto3i *a, gen_helper_gvec_3 *fn) 5023 { 5024 if (fp_access_check(s)) { 5025 gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->imm, fn); 5026 } 5027 return true; 5028 } 5029 TRANS_FEAT(SM3TT1A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1a) 5030 TRANS_FEAT(SM3TT1B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1b) 5031 TRANS_FEAT(SM3TT2A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2a) 5032 TRANS_FEAT(SM3TT2B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2b) 5033 5034 static bool trans_XAR(DisasContext *s, arg_XAR *a) 5035 { 5036 if (!dc_isar_feature(aa64_sha3, s)) { 5037 return false; 5038 } 5039 if (fp_access_check(s)) { 5040 gen_gvec_xar(MO_64, vec_full_reg_offset(s, a->rd), 5041 vec_full_reg_offset(s, a->rn), 5042 vec_full_reg_offset(s, a->rm), a->imm, 16, 5043 vec_full_reg_size(s)); 5044 } 5045 return true; 5046 } 5047 5048 /* 5049 * Advanced SIMD copy 5050 */ 5051 5052 static bool decode_esz_idx(int imm, MemOp *pesz, unsigned *pidx) 5053 { 5054 unsigned esz = ctz32(imm); 5055 if (esz <= MO_64) { 5056 *pesz = esz; 5057 *pidx = imm >> (esz + 1); 5058 return true; 5059 } 5060 return false; 5061 } 5062 5063 static bool trans_DUP_element_s(DisasContext *s, arg_DUP_element_s *a) 5064 { 5065 MemOp esz; 5066 unsigned idx; 5067 5068 if (!decode_esz_idx(a->imm, &esz, &idx)) { 5069 return false; 5070 } 5071 if (fp_access_check(s)) { 5072 /* 5073 * This instruction just extracts the specified element and 5074 * zero-extends it into the bottom of the destination register. 5075 */ 5076 TCGv_i64 tmp = tcg_temp_new_i64(); 5077 read_vec_element(s, tmp, a->rn, idx, esz); 5078 write_fp_dreg(s, a->rd, tmp); 5079 } 5080 return true; 5081 } 5082 5083 static bool trans_DUP_element_v(DisasContext *s, arg_DUP_element_v *a) 5084 { 5085 MemOp esz; 5086 unsigned idx; 5087 5088 if (!decode_esz_idx(a->imm, &esz, &idx)) { 5089 return false; 5090 } 5091 if (esz == MO_64 && !a->q) { 5092 return false; 5093 } 5094 if (fp_access_check(s)) { 5095 tcg_gen_gvec_dup_mem(esz, vec_full_reg_offset(s, a->rd), 5096 vec_reg_offset(s, a->rn, idx, esz), 5097 a->q ? 16 : 8, vec_full_reg_size(s)); 5098 } 5099 return true; 5100 } 5101 5102 static bool trans_DUP_general(DisasContext *s, arg_DUP_general *a) 5103 { 5104 MemOp esz; 5105 unsigned idx; 5106 5107 if (!decode_esz_idx(a->imm, &esz, &idx)) { 5108 return false; 5109 } 5110 if (esz == MO_64 && !a->q) { 5111 return false; 5112 } 5113 if (fp_access_check(s)) { 5114 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), 5115 a->q ? 16 : 8, vec_full_reg_size(s), 5116 cpu_reg(s, a->rn)); 5117 } 5118 return true; 5119 } 5120 5121 static bool do_smov_umov(DisasContext *s, arg_SMOV *a, MemOp is_signed) 5122 { 5123 MemOp esz; 5124 unsigned idx; 5125 5126 if (!decode_esz_idx(a->imm, &esz, &idx)) { 5127 return false; 5128 } 5129 if (is_signed) { 5130 if (esz == MO_64 || (esz == MO_32 && !a->q)) { 5131 return false; 5132 } 5133 } else { 5134 if (esz == MO_64 ? !a->q : a->q) { 5135 return false; 5136 } 5137 } 5138 if (fp_access_check(s)) { 5139 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 5140 read_vec_element(s, tcg_rd, a->rn, idx, esz | is_signed); 5141 if (is_signed && !a->q) { 5142 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 5143 } 5144 } 5145 return true; 5146 } 5147 5148 TRANS(SMOV, do_smov_umov, a, MO_SIGN) 5149 TRANS(UMOV, do_smov_umov, a, 0) 5150 5151 static bool trans_INS_general(DisasContext *s, arg_INS_general *a) 5152 { 5153 MemOp esz; 5154 unsigned idx; 5155 5156 if (!decode_esz_idx(a->imm, &esz, &idx)) { 5157 return false; 5158 } 5159 if (fp_access_check(s)) { 5160 write_vec_element(s, cpu_reg(s, a->rn), a->rd, idx, esz); 5161 clear_vec_high(s, true, a->rd); 5162 } 5163 return true; 5164 } 5165 5166 static bool trans_INS_element(DisasContext *s, arg_INS_element *a) 5167 { 5168 MemOp esz; 5169 unsigned didx, sidx; 5170 5171 if (!decode_esz_idx(a->di, &esz, &didx)) { 5172 return false; 5173 } 5174 sidx = a->si >> esz; 5175 if (fp_access_check(s)) { 5176 TCGv_i64 tmp = tcg_temp_new_i64(); 5177 5178 read_vec_element(s, tmp, a->rn, sidx, esz); 5179 write_vec_element(s, tmp, a->rd, didx, esz); 5180 5181 /* INS is considered a 128-bit write for SVE. */ 5182 clear_vec_high(s, true, a->rd); 5183 } 5184 return true; 5185 } 5186 5187 /* 5188 * Advanced SIMD three same 5189 */ 5190 5191 typedef struct FPScalar { 5192 void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); 5193 void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); 5194 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr); 5195 } FPScalar; 5196 5197 static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a, 5198 const FPScalar *f, int mergereg, 5199 ARMFPStatusFlavour fpsttype) 5200 { 5201 switch (a->esz) { 5202 case MO_64: 5203 if (fp_access_check(s)) { 5204 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 5205 TCGv_i64 t1 = read_fp_dreg(s, a->rm); 5206 f->gen_d(t0, t0, t1, fpstatus_ptr(fpsttype)); 5207 write_fp_dreg_merging(s, a->rd, mergereg, t0); 5208 } 5209 break; 5210 case MO_32: 5211 if (fp_access_check(s)) { 5212 TCGv_i32 t0 = read_fp_sreg(s, a->rn); 5213 TCGv_i32 t1 = read_fp_sreg(s, a->rm); 5214 f->gen_s(t0, t0, t1, fpstatus_ptr(fpsttype)); 5215 write_fp_sreg_merging(s, a->rd, mergereg, t0); 5216 } 5217 break; 5218 case MO_16: 5219 if (!dc_isar_feature(aa64_fp16, s)) { 5220 return false; 5221 } 5222 if (fp_access_check(s)) { 5223 TCGv_i32 t0 = read_fp_hreg(s, a->rn); 5224 TCGv_i32 t1 = read_fp_hreg(s, a->rm); 5225 f->gen_h(t0, t0, t1, fpstatus_ptr(fpsttype)); 5226 write_fp_hreg_merging(s, a->rd, mergereg, t0); 5227 } 5228 break; 5229 default: 5230 return false; 5231 } 5232 return true; 5233 } 5234 5235 static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f, 5236 int mergereg) 5237 { 5238 return do_fp3_scalar_with_fpsttype(s, a, f, mergereg, 5239 a->esz == MO_16 ? 5240 FPST_A64_F16 : FPST_A64); 5241 } 5242 5243 static bool do_fp3_scalar_ah_2fn(DisasContext *s, arg_rrr_e *a, 5244 const FPScalar *fnormal, const FPScalar *fah, 5245 int mergereg) 5246 { 5247 return do_fp3_scalar_with_fpsttype(s, a, s->fpcr_ah ? fah : fnormal, 5248 mergereg, select_ah_fpst(s, a->esz)); 5249 } 5250 5251 /* Some insns need to call different helpers when FPCR.AH == 1 */ 5252 static bool do_fp3_scalar_2fn(DisasContext *s, arg_rrr_e *a, 5253 const FPScalar *fnormal, 5254 const FPScalar *fah, 5255 int mergereg) 5256 { 5257 return do_fp3_scalar(s, a, s->fpcr_ah ? fah : fnormal, mergereg); 5258 } 5259 5260 static const FPScalar f_scalar_fadd = { 5261 gen_helper_vfp_addh, 5262 gen_helper_vfp_adds, 5263 gen_helper_vfp_addd, 5264 }; 5265 TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd, a->rn) 5266 5267 static const FPScalar f_scalar_fsub = { 5268 gen_helper_vfp_subh, 5269 gen_helper_vfp_subs, 5270 gen_helper_vfp_subd, 5271 }; 5272 TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub, a->rn) 5273 5274 static const FPScalar f_scalar_fdiv = { 5275 gen_helper_vfp_divh, 5276 gen_helper_vfp_divs, 5277 gen_helper_vfp_divd, 5278 }; 5279 TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv, a->rn) 5280 5281 static const FPScalar f_scalar_fmul = { 5282 gen_helper_vfp_mulh, 5283 gen_helper_vfp_muls, 5284 gen_helper_vfp_muld, 5285 }; 5286 TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul, a->rn) 5287 5288 static const FPScalar f_scalar_fmax = { 5289 gen_helper_vfp_maxh, 5290 gen_helper_vfp_maxs, 5291 gen_helper_vfp_maxd, 5292 }; 5293 static const FPScalar f_scalar_fmax_ah = { 5294 gen_helper_vfp_ah_maxh, 5295 gen_helper_vfp_ah_maxs, 5296 gen_helper_vfp_ah_maxd, 5297 }; 5298 TRANS(FMAX_s, do_fp3_scalar_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah, a->rn) 5299 5300 static const FPScalar f_scalar_fmin = { 5301 gen_helper_vfp_minh, 5302 gen_helper_vfp_mins, 5303 gen_helper_vfp_mind, 5304 }; 5305 static const FPScalar f_scalar_fmin_ah = { 5306 gen_helper_vfp_ah_minh, 5307 gen_helper_vfp_ah_mins, 5308 gen_helper_vfp_ah_mind, 5309 }; 5310 TRANS(FMIN_s, do_fp3_scalar_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah, a->rn) 5311 5312 static const FPScalar f_scalar_fmaxnm = { 5313 gen_helper_vfp_maxnumh, 5314 gen_helper_vfp_maxnums, 5315 gen_helper_vfp_maxnumd, 5316 }; 5317 TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm, a->rn) 5318 5319 static const FPScalar f_scalar_fminnm = { 5320 gen_helper_vfp_minnumh, 5321 gen_helper_vfp_minnums, 5322 gen_helper_vfp_minnumd, 5323 }; 5324 TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm, a->rn) 5325 5326 static const FPScalar f_scalar_fmulx = { 5327 gen_helper_advsimd_mulxh, 5328 gen_helper_vfp_mulxs, 5329 gen_helper_vfp_mulxd, 5330 }; 5331 TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx, a->rn) 5332 5333 static void gen_fnmul_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5334 { 5335 gen_helper_vfp_mulh(d, n, m, s); 5336 gen_vfp_negh(d, d); 5337 } 5338 5339 static void gen_fnmul_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5340 { 5341 gen_helper_vfp_muls(d, n, m, s); 5342 gen_vfp_negs(d, d); 5343 } 5344 5345 static void gen_fnmul_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) 5346 { 5347 gen_helper_vfp_muld(d, n, m, s); 5348 gen_vfp_negd(d, d); 5349 } 5350 5351 static void gen_fnmul_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5352 { 5353 gen_helper_vfp_mulh(d, n, m, s); 5354 gen_vfp_ah_negh(d, d); 5355 } 5356 5357 static void gen_fnmul_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5358 { 5359 gen_helper_vfp_muls(d, n, m, s); 5360 gen_vfp_ah_negs(d, d); 5361 } 5362 5363 static void gen_fnmul_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) 5364 { 5365 gen_helper_vfp_muld(d, n, m, s); 5366 gen_vfp_ah_negd(d, d); 5367 } 5368 5369 static const FPScalar f_scalar_fnmul = { 5370 gen_fnmul_h, 5371 gen_fnmul_s, 5372 gen_fnmul_d, 5373 }; 5374 static const FPScalar f_scalar_ah_fnmul = { 5375 gen_fnmul_ah_h, 5376 gen_fnmul_ah_s, 5377 gen_fnmul_ah_d, 5378 }; 5379 TRANS(FNMUL_s, do_fp3_scalar_2fn, a, &f_scalar_fnmul, &f_scalar_ah_fnmul, a->rn) 5380 5381 static const FPScalar f_scalar_fcmeq = { 5382 gen_helper_advsimd_ceq_f16, 5383 gen_helper_neon_ceq_f32, 5384 gen_helper_neon_ceq_f64, 5385 }; 5386 TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq, a->rm) 5387 5388 static const FPScalar f_scalar_fcmge = { 5389 gen_helper_advsimd_cge_f16, 5390 gen_helper_neon_cge_f32, 5391 gen_helper_neon_cge_f64, 5392 }; 5393 TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge, a->rm) 5394 5395 static const FPScalar f_scalar_fcmgt = { 5396 gen_helper_advsimd_cgt_f16, 5397 gen_helper_neon_cgt_f32, 5398 gen_helper_neon_cgt_f64, 5399 }; 5400 TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt, a->rm) 5401 5402 static const FPScalar f_scalar_facge = { 5403 gen_helper_advsimd_acge_f16, 5404 gen_helper_neon_acge_f32, 5405 gen_helper_neon_acge_f64, 5406 }; 5407 TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge, a->rm) 5408 5409 static const FPScalar f_scalar_facgt = { 5410 gen_helper_advsimd_acgt_f16, 5411 gen_helper_neon_acgt_f32, 5412 gen_helper_neon_acgt_f64, 5413 }; 5414 TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt, a->rm) 5415 5416 static void gen_fabd_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5417 { 5418 gen_helper_vfp_subh(d, n, m, s); 5419 gen_vfp_absh(d, d); 5420 } 5421 5422 static void gen_fabd_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5423 { 5424 gen_helper_vfp_subs(d, n, m, s); 5425 gen_vfp_abss(d, d); 5426 } 5427 5428 static void gen_fabd_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) 5429 { 5430 gen_helper_vfp_subd(d, n, m, s); 5431 gen_vfp_absd(d, d); 5432 } 5433 5434 static void gen_fabd_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5435 { 5436 gen_helper_vfp_subh(d, n, m, s); 5437 gen_vfp_ah_absh(d, d); 5438 } 5439 5440 static void gen_fabd_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5441 { 5442 gen_helper_vfp_subs(d, n, m, s); 5443 gen_vfp_ah_abss(d, d); 5444 } 5445 5446 static void gen_fabd_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) 5447 { 5448 gen_helper_vfp_subd(d, n, m, s); 5449 gen_vfp_ah_absd(d, d); 5450 } 5451 5452 static const FPScalar f_scalar_fabd = { 5453 gen_fabd_h, 5454 gen_fabd_s, 5455 gen_fabd_d, 5456 }; 5457 static const FPScalar f_scalar_ah_fabd = { 5458 gen_fabd_ah_h, 5459 gen_fabd_ah_s, 5460 gen_fabd_ah_d, 5461 }; 5462 TRANS(FABD_s, do_fp3_scalar_2fn, a, &f_scalar_fabd, &f_scalar_ah_fabd, a->rn) 5463 5464 static const FPScalar f_scalar_frecps = { 5465 gen_helper_recpsf_f16, 5466 gen_helper_recpsf_f32, 5467 gen_helper_recpsf_f64, 5468 }; 5469 static const FPScalar f_scalar_ah_frecps = { 5470 gen_helper_recpsf_ah_f16, 5471 gen_helper_recpsf_ah_f32, 5472 gen_helper_recpsf_ah_f64, 5473 }; 5474 TRANS(FRECPS_s, do_fp3_scalar_ah_2fn, a, 5475 &f_scalar_frecps, &f_scalar_ah_frecps, a->rn) 5476 5477 static const FPScalar f_scalar_frsqrts = { 5478 gen_helper_rsqrtsf_f16, 5479 gen_helper_rsqrtsf_f32, 5480 gen_helper_rsqrtsf_f64, 5481 }; 5482 static const FPScalar f_scalar_ah_frsqrts = { 5483 gen_helper_rsqrtsf_ah_f16, 5484 gen_helper_rsqrtsf_ah_f32, 5485 gen_helper_rsqrtsf_ah_f64, 5486 }; 5487 TRANS(FRSQRTS_s, do_fp3_scalar_ah_2fn, a, 5488 &f_scalar_frsqrts, &f_scalar_ah_frsqrts, a->rn) 5489 5490 static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a, 5491 const FPScalar *f, bool swap) 5492 { 5493 switch (a->esz) { 5494 case MO_64: 5495 if (fp_access_check(s)) { 5496 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 5497 TCGv_i64 t1 = tcg_constant_i64(0); 5498 if (swap) { 5499 f->gen_d(t0, t1, t0, fpstatus_ptr(FPST_A64)); 5500 } else { 5501 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); 5502 } 5503 write_fp_dreg(s, a->rd, t0); 5504 } 5505 break; 5506 case MO_32: 5507 if (fp_access_check(s)) { 5508 TCGv_i32 t0 = read_fp_sreg(s, a->rn); 5509 TCGv_i32 t1 = tcg_constant_i32(0); 5510 if (swap) { 5511 f->gen_s(t0, t1, t0, fpstatus_ptr(FPST_A64)); 5512 } else { 5513 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); 5514 } 5515 write_fp_sreg(s, a->rd, t0); 5516 } 5517 break; 5518 case MO_16: 5519 if (!dc_isar_feature(aa64_fp16, s)) { 5520 return false; 5521 } 5522 if (fp_access_check(s)) { 5523 TCGv_i32 t0 = read_fp_hreg(s, a->rn); 5524 TCGv_i32 t1 = tcg_constant_i32(0); 5525 if (swap) { 5526 f->gen_h(t0, t1, t0, fpstatus_ptr(FPST_A64_F16)); 5527 } else { 5528 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); 5529 } 5530 write_fp_sreg(s, a->rd, t0); 5531 } 5532 break; 5533 default: 5534 return false; 5535 } 5536 return true; 5537 } 5538 5539 TRANS(FCMEQ0_s, do_fcmp0_s, a, &f_scalar_fcmeq, false) 5540 TRANS(FCMGT0_s, do_fcmp0_s, a, &f_scalar_fcmgt, false) 5541 TRANS(FCMGE0_s, do_fcmp0_s, a, &f_scalar_fcmge, false) 5542 TRANS(FCMLT0_s, do_fcmp0_s, a, &f_scalar_fcmgt, true) 5543 TRANS(FCMLE0_s, do_fcmp0_s, a, &f_scalar_fcmge, true) 5544 5545 static bool do_satacc_s(DisasContext *s, arg_rrr_e *a, 5546 MemOp sgn_n, MemOp sgn_m, 5547 void (*gen_bhs)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64, MemOp), 5548 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64)) 5549 { 5550 TCGv_i64 t0, t1, t2, qc; 5551 MemOp esz = a->esz; 5552 5553 if (!fp_access_check(s)) { 5554 return true; 5555 } 5556 5557 t0 = tcg_temp_new_i64(); 5558 t1 = tcg_temp_new_i64(); 5559 t2 = tcg_temp_new_i64(); 5560 qc = tcg_temp_new_i64(); 5561 read_vec_element(s, t1, a->rn, 0, esz | sgn_n); 5562 read_vec_element(s, t2, a->rm, 0, esz | sgn_m); 5563 tcg_gen_ld_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc)); 5564 5565 if (esz == MO_64) { 5566 gen_d(t0, qc, t1, t2); 5567 } else { 5568 gen_bhs(t0, qc, t1, t2, esz); 5569 tcg_gen_ext_i64(t0, t0, esz); 5570 } 5571 5572 write_fp_dreg(s, a->rd, t0); 5573 tcg_gen_st_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc)); 5574 return true; 5575 } 5576 5577 TRANS(SQADD_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqadd_bhs, gen_sqadd_d) 5578 TRANS(SQSUB_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqsub_bhs, gen_sqsub_d) 5579 TRANS(UQADD_s, do_satacc_s, a, 0, 0, gen_uqadd_bhs, gen_uqadd_d) 5580 TRANS(UQSUB_s, do_satacc_s, a, 0, 0, gen_uqsub_bhs, gen_uqsub_d) 5581 TRANS(SUQADD_s, do_satacc_s, a, MO_SIGN, 0, gen_suqadd_bhs, gen_suqadd_d) 5582 TRANS(USQADD_s, do_satacc_s, a, 0, MO_SIGN, gen_usqadd_bhs, gen_usqadd_d) 5583 5584 static bool do_int3_scalar_d(DisasContext *s, arg_rrr_e *a, 5585 void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64)) 5586 { 5587 if (fp_access_check(s)) { 5588 TCGv_i64 t0 = tcg_temp_new_i64(); 5589 TCGv_i64 t1 = tcg_temp_new_i64(); 5590 5591 read_vec_element(s, t0, a->rn, 0, MO_64); 5592 read_vec_element(s, t1, a->rm, 0, MO_64); 5593 fn(t0, t0, t1); 5594 write_fp_dreg(s, a->rd, t0); 5595 } 5596 return true; 5597 } 5598 5599 TRANS(SSHL_s, do_int3_scalar_d, a, gen_sshl_i64) 5600 TRANS(USHL_s, do_int3_scalar_d, a, gen_ushl_i64) 5601 TRANS(SRSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_s64) 5602 TRANS(URSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_u64) 5603 TRANS(ADD_s, do_int3_scalar_d, a, tcg_gen_add_i64) 5604 TRANS(SUB_s, do_int3_scalar_d, a, tcg_gen_sub_i64) 5605 5606 typedef struct ENVScalar2 { 5607 NeonGenTwoOpEnvFn *gen_bhs[3]; 5608 NeonGenTwo64OpEnvFn *gen_d; 5609 } ENVScalar2; 5610 5611 static bool do_env_scalar2(DisasContext *s, arg_rrr_e *a, const ENVScalar2 *f) 5612 { 5613 if (!fp_access_check(s)) { 5614 return true; 5615 } 5616 if (a->esz == MO_64) { 5617 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 5618 TCGv_i64 t1 = read_fp_dreg(s, a->rm); 5619 f->gen_d(t0, tcg_env, t0, t1); 5620 write_fp_dreg(s, a->rd, t0); 5621 } else { 5622 TCGv_i32 t0 = tcg_temp_new_i32(); 5623 TCGv_i32 t1 = tcg_temp_new_i32(); 5624 5625 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 5626 read_vec_element_i32(s, t1, a->rm, 0, a->esz); 5627 f->gen_bhs[a->esz](t0, tcg_env, t0, t1); 5628 write_fp_sreg(s, a->rd, t0); 5629 } 5630 return true; 5631 } 5632 5633 static const ENVScalar2 f_scalar_sqshl = { 5634 { gen_helper_neon_qshl_s8, 5635 gen_helper_neon_qshl_s16, 5636 gen_helper_neon_qshl_s32 }, 5637 gen_helper_neon_qshl_s64, 5638 }; 5639 TRANS(SQSHL_s, do_env_scalar2, a, &f_scalar_sqshl) 5640 5641 static const ENVScalar2 f_scalar_uqshl = { 5642 { gen_helper_neon_qshl_u8, 5643 gen_helper_neon_qshl_u16, 5644 gen_helper_neon_qshl_u32 }, 5645 gen_helper_neon_qshl_u64, 5646 }; 5647 TRANS(UQSHL_s, do_env_scalar2, a, &f_scalar_uqshl) 5648 5649 static const ENVScalar2 f_scalar_sqrshl = { 5650 { gen_helper_neon_qrshl_s8, 5651 gen_helper_neon_qrshl_s16, 5652 gen_helper_neon_qrshl_s32 }, 5653 gen_helper_neon_qrshl_s64, 5654 }; 5655 TRANS(SQRSHL_s, do_env_scalar2, a, &f_scalar_sqrshl) 5656 5657 static const ENVScalar2 f_scalar_uqrshl = { 5658 { gen_helper_neon_qrshl_u8, 5659 gen_helper_neon_qrshl_u16, 5660 gen_helper_neon_qrshl_u32 }, 5661 gen_helper_neon_qrshl_u64, 5662 }; 5663 TRANS(UQRSHL_s, do_env_scalar2, a, &f_scalar_uqrshl) 5664 5665 static bool do_env_scalar2_hs(DisasContext *s, arg_rrr_e *a, 5666 const ENVScalar2 *f) 5667 { 5668 if (a->esz == MO_16 || a->esz == MO_32) { 5669 return do_env_scalar2(s, a, f); 5670 } 5671 return false; 5672 } 5673 5674 static const ENVScalar2 f_scalar_sqdmulh = { 5675 { NULL, gen_helper_neon_qdmulh_s16, gen_helper_neon_qdmulh_s32 } 5676 }; 5677 TRANS(SQDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqdmulh) 5678 5679 static const ENVScalar2 f_scalar_sqrdmulh = { 5680 { NULL, gen_helper_neon_qrdmulh_s16, gen_helper_neon_qrdmulh_s32 } 5681 }; 5682 TRANS(SQRDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqrdmulh) 5683 5684 typedef struct ENVScalar3 { 5685 NeonGenThreeOpEnvFn *gen_hs[2]; 5686 } ENVScalar3; 5687 5688 static bool do_env_scalar3_hs(DisasContext *s, arg_rrr_e *a, 5689 const ENVScalar3 *f) 5690 { 5691 TCGv_i32 t0, t1, t2; 5692 5693 if (a->esz != MO_16 && a->esz != MO_32) { 5694 return false; 5695 } 5696 if (!fp_access_check(s)) { 5697 return true; 5698 } 5699 5700 t0 = tcg_temp_new_i32(); 5701 t1 = tcg_temp_new_i32(); 5702 t2 = tcg_temp_new_i32(); 5703 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 5704 read_vec_element_i32(s, t1, a->rm, 0, a->esz); 5705 read_vec_element_i32(s, t2, a->rd, 0, a->esz); 5706 f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2); 5707 write_fp_sreg(s, a->rd, t0); 5708 return true; 5709 } 5710 5711 static const ENVScalar3 f_scalar_sqrdmlah = { 5712 { gen_helper_neon_qrdmlah_s16, gen_helper_neon_qrdmlah_s32 } 5713 }; 5714 TRANS_FEAT(SQRDMLAH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlah) 5715 5716 static const ENVScalar3 f_scalar_sqrdmlsh = { 5717 { gen_helper_neon_qrdmlsh_s16, gen_helper_neon_qrdmlsh_s32 } 5718 }; 5719 TRANS_FEAT(SQRDMLSH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlsh) 5720 5721 static bool do_cmop_d(DisasContext *s, arg_rrr_e *a, TCGCond cond) 5722 { 5723 if (fp_access_check(s)) { 5724 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 5725 TCGv_i64 t1 = read_fp_dreg(s, a->rm); 5726 tcg_gen_negsetcond_i64(cond, t0, t0, t1); 5727 write_fp_dreg(s, a->rd, t0); 5728 } 5729 return true; 5730 } 5731 5732 TRANS(CMGT_s, do_cmop_d, a, TCG_COND_GT) 5733 TRANS(CMHI_s, do_cmop_d, a, TCG_COND_GTU) 5734 TRANS(CMGE_s, do_cmop_d, a, TCG_COND_GE) 5735 TRANS(CMHS_s, do_cmop_d, a, TCG_COND_GEU) 5736 TRANS(CMEQ_s, do_cmop_d, a, TCG_COND_EQ) 5737 TRANS(CMTST_s, do_cmop_d, a, TCG_COND_TSTNE) 5738 5739 static bool do_fp3_vector_with_fpsttype(DisasContext *s, arg_qrrr_e *a, 5740 int data, 5741 gen_helper_gvec_3_ptr * const fns[3], 5742 ARMFPStatusFlavour fpsttype) 5743 { 5744 MemOp esz = a->esz; 5745 int check = fp_access_check_vector_hsd(s, a->q, esz); 5746 5747 if (check <= 0) { 5748 return check == 0; 5749 } 5750 5751 gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, fpsttype, 5752 data, fns[esz - 1]); 5753 return true; 5754 } 5755 5756 static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data, 5757 gen_helper_gvec_3_ptr * const fns[3]) 5758 { 5759 return do_fp3_vector_with_fpsttype(s, a, data, fns, 5760 a->esz == MO_16 ? 5761 FPST_A64_F16 : FPST_A64); 5762 } 5763 5764 static bool do_fp3_vector_2fn(DisasContext *s, arg_qrrr_e *a, int data, 5765 gen_helper_gvec_3_ptr * const fnormal[3], 5766 gen_helper_gvec_3_ptr * const fah[3]) 5767 { 5768 return do_fp3_vector(s, a, data, s->fpcr_ah ? fah : fnormal); 5769 } 5770 5771 static bool do_fp3_vector_ah_2fn(DisasContext *s, arg_qrrr_e *a, int data, 5772 gen_helper_gvec_3_ptr * const fnormal[3], 5773 gen_helper_gvec_3_ptr * const fah[3]) 5774 { 5775 return do_fp3_vector_with_fpsttype(s, a, data, s->fpcr_ah ? fah : fnormal, 5776 select_ah_fpst(s, a->esz)); 5777 } 5778 5779 static gen_helper_gvec_3_ptr * const f_vector_fadd[3] = { 5780 gen_helper_gvec_fadd_h, 5781 gen_helper_gvec_fadd_s, 5782 gen_helper_gvec_fadd_d, 5783 }; 5784 TRANS(FADD_v, do_fp3_vector, a, 0, f_vector_fadd) 5785 5786 static gen_helper_gvec_3_ptr * const f_vector_fsub[3] = { 5787 gen_helper_gvec_fsub_h, 5788 gen_helper_gvec_fsub_s, 5789 gen_helper_gvec_fsub_d, 5790 }; 5791 TRANS(FSUB_v, do_fp3_vector, a, 0, f_vector_fsub) 5792 5793 static gen_helper_gvec_3_ptr * const f_vector_fdiv[3] = { 5794 gen_helper_gvec_fdiv_h, 5795 gen_helper_gvec_fdiv_s, 5796 gen_helper_gvec_fdiv_d, 5797 }; 5798 TRANS(FDIV_v, do_fp3_vector, a, 0, f_vector_fdiv) 5799 5800 static gen_helper_gvec_3_ptr * const f_vector_fmul[3] = { 5801 gen_helper_gvec_fmul_h, 5802 gen_helper_gvec_fmul_s, 5803 gen_helper_gvec_fmul_d, 5804 }; 5805 TRANS(FMUL_v, do_fp3_vector, a, 0, f_vector_fmul) 5806 5807 static gen_helper_gvec_3_ptr * const f_vector_fmax[3] = { 5808 gen_helper_gvec_fmax_h, 5809 gen_helper_gvec_fmax_s, 5810 gen_helper_gvec_fmax_d, 5811 }; 5812 static gen_helper_gvec_3_ptr * const f_vector_fmax_ah[3] = { 5813 gen_helper_gvec_ah_fmax_h, 5814 gen_helper_gvec_ah_fmax_s, 5815 gen_helper_gvec_ah_fmax_d, 5816 }; 5817 TRANS(FMAX_v, do_fp3_vector_2fn, a, 0, f_vector_fmax, f_vector_fmax_ah) 5818 5819 static gen_helper_gvec_3_ptr * const f_vector_fmin[3] = { 5820 gen_helper_gvec_fmin_h, 5821 gen_helper_gvec_fmin_s, 5822 gen_helper_gvec_fmin_d, 5823 }; 5824 static gen_helper_gvec_3_ptr * const f_vector_fmin_ah[3] = { 5825 gen_helper_gvec_ah_fmin_h, 5826 gen_helper_gvec_ah_fmin_s, 5827 gen_helper_gvec_ah_fmin_d, 5828 }; 5829 TRANS(FMIN_v, do_fp3_vector_2fn, a, 0, f_vector_fmin, f_vector_fmin_ah) 5830 5831 static gen_helper_gvec_3_ptr * const f_vector_fmaxnm[3] = { 5832 gen_helper_gvec_fmaxnum_h, 5833 gen_helper_gvec_fmaxnum_s, 5834 gen_helper_gvec_fmaxnum_d, 5835 }; 5836 TRANS(FMAXNM_v, do_fp3_vector, a, 0, f_vector_fmaxnm) 5837 5838 static gen_helper_gvec_3_ptr * const f_vector_fminnm[3] = { 5839 gen_helper_gvec_fminnum_h, 5840 gen_helper_gvec_fminnum_s, 5841 gen_helper_gvec_fminnum_d, 5842 }; 5843 TRANS(FMINNM_v, do_fp3_vector, a, 0, f_vector_fminnm) 5844 5845 static gen_helper_gvec_3_ptr * const f_vector_fmulx[3] = { 5846 gen_helper_gvec_fmulx_h, 5847 gen_helper_gvec_fmulx_s, 5848 gen_helper_gvec_fmulx_d, 5849 }; 5850 TRANS(FMULX_v, do_fp3_vector, a, 0, f_vector_fmulx) 5851 5852 static gen_helper_gvec_3_ptr * const f_vector_fmla[3] = { 5853 gen_helper_gvec_vfma_h, 5854 gen_helper_gvec_vfma_s, 5855 gen_helper_gvec_vfma_d, 5856 }; 5857 TRANS(FMLA_v, do_fp3_vector, a, 0, f_vector_fmla) 5858 5859 static gen_helper_gvec_3_ptr * const f_vector_fmls[3] = { 5860 gen_helper_gvec_vfms_h, 5861 gen_helper_gvec_vfms_s, 5862 gen_helper_gvec_vfms_d, 5863 }; 5864 static gen_helper_gvec_3_ptr * const f_vector_fmls_ah[3] = { 5865 gen_helper_gvec_ah_vfms_h, 5866 gen_helper_gvec_ah_vfms_s, 5867 gen_helper_gvec_ah_vfms_d, 5868 }; 5869 TRANS(FMLS_v, do_fp3_vector_2fn, a, 0, f_vector_fmls, f_vector_fmls_ah) 5870 5871 static gen_helper_gvec_3_ptr * const f_vector_fcmeq[3] = { 5872 gen_helper_gvec_fceq_h, 5873 gen_helper_gvec_fceq_s, 5874 gen_helper_gvec_fceq_d, 5875 }; 5876 TRANS(FCMEQ_v, do_fp3_vector, a, 0, f_vector_fcmeq) 5877 5878 static gen_helper_gvec_3_ptr * const f_vector_fcmge[3] = { 5879 gen_helper_gvec_fcge_h, 5880 gen_helper_gvec_fcge_s, 5881 gen_helper_gvec_fcge_d, 5882 }; 5883 TRANS(FCMGE_v, do_fp3_vector, a, 0, f_vector_fcmge) 5884 5885 static gen_helper_gvec_3_ptr * const f_vector_fcmgt[3] = { 5886 gen_helper_gvec_fcgt_h, 5887 gen_helper_gvec_fcgt_s, 5888 gen_helper_gvec_fcgt_d, 5889 }; 5890 TRANS(FCMGT_v, do_fp3_vector, a, 0, f_vector_fcmgt) 5891 5892 static gen_helper_gvec_3_ptr * const f_vector_facge[3] = { 5893 gen_helper_gvec_facge_h, 5894 gen_helper_gvec_facge_s, 5895 gen_helper_gvec_facge_d, 5896 }; 5897 TRANS(FACGE_v, do_fp3_vector, a, 0, f_vector_facge) 5898 5899 static gen_helper_gvec_3_ptr * const f_vector_facgt[3] = { 5900 gen_helper_gvec_facgt_h, 5901 gen_helper_gvec_facgt_s, 5902 gen_helper_gvec_facgt_d, 5903 }; 5904 TRANS(FACGT_v, do_fp3_vector, a, 0, f_vector_facgt) 5905 5906 static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = { 5907 gen_helper_gvec_fabd_h, 5908 gen_helper_gvec_fabd_s, 5909 gen_helper_gvec_fabd_d, 5910 }; 5911 static gen_helper_gvec_3_ptr * const f_vector_ah_fabd[3] = { 5912 gen_helper_gvec_ah_fabd_h, 5913 gen_helper_gvec_ah_fabd_s, 5914 gen_helper_gvec_ah_fabd_d, 5915 }; 5916 TRANS(FABD_v, do_fp3_vector_2fn, a, 0, f_vector_fabd, f_vector_ah_fabd) 5917 5918 static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = { 5919 gen_helper_gvec_recps_h, 5920 gen_helper_gvec_recps_s, 5921 gen_helper_gvec_recps_d, 5922 }; 5923 static gen_helper_gvec_3_ptr * const f_vector_ah_frecps[3] = { 5924 gen_helper_gvec_ah_recps_h, 5925 gen_helper_gvec_ah_recps_s, 5926 gen_helper_gvec_ah_recps_d, 5927 }; 5928 TRANS(FRECPS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frecps, f_vector_ah_frecps) 5929 5930 static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = { 5931 gen_helper_gvec_rsqrts_h, 5932 gen_helper_gvec_rsqrts_s, 5933 gen_helper_gvec_rsqrts_d, 5934 }; 5935 static gen_helper_gvec_3_ptr * const f_vector_ah_frsqrts[3] = { 5936 gen_helper_gvec_ah_rsqrts_h, 5937 gen_helper_gvec_ah_rsqrts_s, 5938 gen_helper_gvec_ah_rsqrts_d, 5939 }; 5940 TRANS(FRSQRTS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frsqrts, f_vector_ah_frsqrts) 5941 5942 static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = { 5943 gen_helper_gvec_faddp_h, 5944 gen_helper_gvec_faddp_s, 5945 gen_helper_gvec_faddp_d, 5946 }; 5947 TRANS(FADDP_v, do_fp3_vector, a, 0, f_vector_faddp) 5948 5949 static gen_helper_gvec_3_ptr * const f_vector_fmaxp[3] = { 5950 gen_helper_gvec_fmaxp_h, 5951 gen_helper_gvec_fmaxp_s, 5952 gen_helper_gvec_fmaxp_d, 5953 }; 5954 static gen_helper_gvec_3_ptr * const f_vector_ah_fmaxp[3] = { 5955 gen_helper_gvec_ah_fmaxp_h, 5956 gen_helper_gvec_ah_fmaxp_s, 5957 gen_helper_gvec_ah_fmaxp_d, 5958 }; 5959 TRANS(FMAXP_v, do_fp3_vector_2fn, a, 0, f_vector_fmaxp, f_vector_ah_fmaxp) 5960 5961 static gen_helper_gvec_3_ptr * const f_vector_fminp[3] = { 5962 gen_helper_gvec_fminp_h, 5963 gen_helper_gvec_fminp_s, 5964 gen_helper_gvec_fminp_d, 5965 }; 5966 static gen_helper_gvec_3_ptr * const f_vector_ah_fminp[3] = { 5967 gen_helper_gvec_ah_fminp_h, 5968 gen_helper_gvec_ah_fminp_s, 5969 gen_helper_gvec_ah_fminp_d, 5970 }; 5971 TRANS(FMINP_v, do_fp3_vector_2fn, a, 0, f_vector_fminp, f_vector_ah_fminp) 5972 5973 static gen_helper_gvec_3_ptr * const f_vector_fmaxnmp[3] = { 5974 gen_helper_gvec_fmaxnump_h, 5975 gen_helper_gvec_fmaxnump_s, 5976 gen_helper_gvec_fmaxnump_d, 5977 }; 5978 TRANS(FMAXNMP_v, do_fp3_vector, a, 0, f_vector_fmaxnmp) 5979 5980 static gen_helper_gvec_3_ptr * const f_vector_fminnmp[3] = { 5981 gen_helper_gvec_fminnump_h, 5982 gen_helper_gvec_fminnump_s, 5983 gen_helper_gvec_fminnump_d, 5984 }; 5985 TRANS(FMINNMP_v, do_fp3_vector, a, 0, f_vector_fminnmp) 5986 5987 static bool do_fmlal(DisasContext *s, arg_qrrr_e *a, bool is_s, bool is_2) 5988 { 5989 if (fp_access_check(s)) { 5990 int data = (is_2 << 1) | is_s; 5991 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), 5992 vec_full_reg_offset(s, a->rn), 5993 vec_full_reg_offset(s, a->rm), tcg_env, 5994 a->q ? 16 : 8, vec_full_reg_size(s), 5995 data, gen_helper_gvec_fmlal_a64); 5996 } 5997 return true; 5998 } 5999 6000 TRANS_FEAT(FMLAL_v, aa64_fhm, do_fmlal, a, false, false) 6001 TRANS_FEAT(FMLSL_v, aa64_fhm, do_fmlal, a, true, false) 6002 TRANS_FEAT(FMLAL2_v, aa64_fhm, do_fmlal, a, false, true) 6003 TRANS_FEAT(FMLSL2_v, aa64_fhm, do_fmlal, a, true, true) 6004 6005 TRANS(ADDP_v, do_gvec_fn3, a, gen_gvec_addp) 6006 TRANS(SMAXP_v, do_gvec_fn3_no64, a, gen_gvec_smaxp) 6007 TRANS(SMINP_v, do_gvec_fn3_no64, a, gen_gvec_sminp) 6008 TRANS(UMAXP_v, do_gvec_fn3_no64, a, gen_gvec_umaxp) 6009 TRANS(UMINP_v, do_gvec_fn3_no64, a, gen_gvec_uminp) 6010 6011 TRANS(AND_v, do_gvec_fn3, a, tcg_gen_gvec_and) 6012 TRANS(BIC_v, do_gvec_fn3, a, tcg_gen_gvec_andc) 6013 TRANS(ORR_v, do_gvec_fn3, a, tcg_gen_gvec_or) 6014 TRANS(ORN_v, do_gvec_fn3, a, tcg_gen_gvec_orc) 6015 TRANS(EOR_v, do_gvec_fn3, a, tcg_gen_gvec_xor) 6016 6017 static bool do_bitsel(DisasContext *s, bool is_q, int d, int a, int b, int c) 6018 { 6019 if (fp_access_check(s)) { 6020 gen_gvec_fn4(s, is_q, d, a, b, c, tcg_gen_gvec_bitsel, 0); 6021 } 6022 return true; 6023 } 6024 6025 TRANS(BSL_v, do_bitsel, a->q, a->rd, a->rd, a->rn, a->rm) 6026 TRANS(BIT_v, do_bitsel, a->q, a->rd, a->rm, a->rn, a->rd) 6027 TRANS(BIF_v, do_bitsel, a->q, a->rd, a->rm, a->rd, a->rn) 6028 6029 TRANS(SQADD_v, do_gvec_fn3, a, gen_gvec_sqadd_qc) 6030 TRANS(UQADD_v, do_gvec_fn3, a, gen_gvec_uqadd_qc) 6031 TRANS(SQSUB_v, do_gvec_fn3, a, gen_gvec_sqsub_qc) 6032 TRANS(UQSUB_v, do_gvec_fn3, a, gen_gvec_uqsub_qc) 6033 TRANS(SUQADD_v, do_gvec_fn3, a, gen_gvec_suqadd_qc) 6034 TRANS(USQADD_v, do_gvec_fn3, a, gen_gvec_usqadd_qc) 6035 6036 TRANS(SSHL_v, do_gvec_fn3, a, gen_gvec_sshl) 6037 TRANS(USHL_v, do_gvec_fn3, a, gen_gvec_ushl) 6038 TRANS(SRSHL_v, do_gvec_fn3, a, gen_gvec_srshl) 6039 TRANS(URSHL_v, do_gvec_fn3, a, gen_gvec_urshl) 6040 TRANS(SQSHL_v, do_gvec_fn3, a, gen_neon_sqshl) 6041 TRANS(UQSHL_v, do_gvec_fn3, a, gen_neon_uqshl) 6042 TRANS(SQRSHL_v, do_gvec_fn3, a, gen_neon_sqrshl) 6043 TRANS(UQRSHL_v, do_gvec_fn3, a, gen_neon_uqrshl) 6044 6045 TRANS(ADD_v, do_gvec_fn3, a, tcg_gen_gvec_add) 6046 TRANS(SUB_v, do_gvec_fn3, a, tcg_gen_gvec_sub) 6047 TRANS(SHADD_v, do_gvec_fn3_no64, a, gen_gvec_shadd) 6048 TRANS(UHADD_v, do_gvec_fn3_no64, a, gen_gvec_uhadd) 6049 TRANS(SHSUB_v, do_gvec_fn3_no64, a, gen_gvec_shsub) 6050 TRANS(UHSUB_v, do_gvec_fn3_no64, a, gen_gvec_uhsub) 6051 TRANS(SRHADD_v, do_gvec_fn3_no64, a, gen_gvec_srhadd) 6052 TRANS(URHADD_v, do_gvec_fn3_no64, a, gen_gvec_urhadd) 6053 TRANS(SMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smax) 6054 TRANS(UMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umax) 6055 TRANS(SMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smin) 6056 TRANS(UMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umin) 6057 TRANS(SABA_v, do_gvec_fn3_no64, a, gen_gvec_saba) 6058 TRANS(UABA_v, do_gvec_fn3_no64, a, gen_gvec_uaba) 6059 TRANS(SABD_v, do_gvec_fn3_no64, a, gen_gvec_sabd) 6060 TRANS(UABD_v, do_gvec_fn3_no64, a, gen_gvec_uabd) 6061 TRANS(MUL_v, do_gvec_fn3_no64, a, tcg_gen_gvec_mul) 6062 TRANS(PMUL_v, do_gvec_op3_ool, a, 0, gen_helper_gvec_pmul_b) 6063 TRANS(MLA_v, do_gvec_fn3_no64, a, gen_gvec_mla) 6064 TRANS(MLS_v, do_gvec_fn3_no64, a, gen_gvec_mls) 6065 6066 static bool do_cmop_v(DisasContext *s, arg_qrrr_e *a, TCGCond cond) 6067 { 6068 if (a->esz == MO_64 && !a->q) { 6069 return false; 6070 } 6071 if (fp_access_check(s)) { 6072 tcg_gen_gvec_cmp(cond, a->esz, 6073 vec_full_reg_offset(s, a->rd), 6074 vec_full_reg_offset(s, a->rn), 6075 vec_full_reg_offset(s, a->rm), 6076 a->q ? 16 : 8, vec_full_reg_size(s)); 6077 } 6078 return true; 6079 } 6080 6081 TRANS(CMGT_v, do_cmop_v, a, TCG_COND_GT) 6082 TRANS(CMHI_v, do_cmop_v, a, TCG_COND_GTU) 6083 TRANS(CMGE_v, do_cmop_v, a, TCG_COND_GE) 6084 TRANS(CMHS_v, do_cmop_v, a, TCG_COND_GEU) 6085 TRANS(CMEQ_v, do_cmop_v, a, TCG_COND_EQ) 6086 TRANS(CMTST_v, do_gvec_fn3, a, gen_gvec_cmtst) 6087 6088 TRANS(SQDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqdmulh_qc) 6089 TRANS(SQRDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmulh_qc) 6090 TRANS_FEAT(SQRDMLAH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlah_qc) 6091 TRANS_FEAT(SQRDMLSH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlsh_qc) 6092 6093 static bool do_dot_vector(DisasContext *s, arg_qrrr_e *a, 6094 gen_helper_gvec_4 *fn) 6095 { 6096 if (fp_access_check(s)) { 6097 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn); 6098 } 6099 return true; 6100 } 6101 6102 static bool do_dot_vector_env(DisasContext *s, arg_qrrr_e *a, 6103 gen_helper_gvec_4_ptr *fn) 6104 { 6105 if (fp_access_check(s)) { 6106 gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn); 6107 } 6108 return true; 6109 } 6110 6111 TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_sdot_b) 6112 TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_b) 6113 TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_b) 6114 TRANS_FEAT(BFDOT_v, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfdot) 6115 TRANS_FEAT(BFMMLA, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfmmla) 6116 TRANS_FEAT(SMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_smmla_b) 6117 TRANS_FEAT(UMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_ummla_b) 6118 TRANS_FEAT(USMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usmmla_b) 6119 6120 static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a) 6121 { 6122 if (!dc_isar_feature(aa64_bf16, s)) { 6123 return false; 6124 } 6125 if (fp_access_check(s)) { 6126 /* Q bit selects BFMLALB vs BFMLALT. */ 6127 gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, 6128 s->fpcr_ah ? FPST_AH : FPST_A64, a->q, 6129 gen_helper_gvec_bfmlal); 6130 } 6131 return true; 6132 } 6133 6134 static gen_helper_gvec_3_ptr * const f_vector_fcadd[3] = { 6135 gen_helper_gvec_fcaddh, 6136 gen_helper_gvec_fcadds, 6137 gen_helper_gvec_fcaddd, 6138 }; 6139 /* 6140 * Encode FPCR.AH into the data so the helper knows whether the 6141 * negations it does should avoid flipping the sign bit on a NaN 6142 */ 6143 TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0 | (s->fpcr_ah << 1), 6144 f_vector_fcadd) 6145 TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1 | (s->fpcr_ah << 1), 6146 f_vector_fcadd) 6147 6148 static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a) 6149 { 6150 static gen_helper_gvec_4_ptr * const fn[] = { 6151 [MO_16] = gen_helper_gvec_fcmlah, 6152 [MO_32] = gen_helper_gvec_fcmlas, 6153 [MO_64] = gen_helper_gvec_fcmlad, 6154 }; 6155 int check; 6156 6157 if (!dc_isar_feature(aa64_fcma, s)) { 6158 return false; 6159 } 6160 6161 check = fp_access_check_vector_hsd(s, a->q, a->esz); 6162 if (check <= 0) { 6163 return check == 0; 6164 } 6165 6166 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, 6167 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, 6168 a->rot | (s->fpcr_ah << 2), fn[a->esz]); 6169 return true; 6170 } 6171 6172 /* 6173 * Widening vector x vector/indexed. 6174 * 6175 * These read from the top or bottom half of a 128-bit vector. 6176 * After widening, optionally accumulate with a 128-bit vector. 6177 * Implement these inline, as the number of elements are limited 6178 * and the related SVE and SME operations on larger vectors use 6179 * even/odd elements instead of top/bottom half. 6180 * 6181 * If idx >= 0, operand 2 is indexed, otherwise vector. 6182 * If acc, operand 0 is loaded with rd. 6183 */ 6184 6185 /* For low half, iterating up. */ 6186 static bool do_3op_widening(DisasContext *s, MemOp memop, int top, 6187 int rd, int rn, int rm, int idx, 6188 NeonGenTwo64OpFn *fn, bool acc) 6189 { 6190 TCGv_i64 tcg_op0 = tcg_temp_new_i64(); 6191 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 6192 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 6193 MemOp esz = memop & MO_SIZE; 6194 int half = 8 >> esz; 6195 int top_swap, top_half; 6196 6197 /* There are no 64x64->128 bit operations. */ 6198 if (esz >= MO_64) { 6199 return false; 6200 } 6201 if (!fp_access_check(s)) { 6202 return true; 6203 } 6204 6205 if (idx >= 0) { 6206 read_vec_element(s, tcg_op2, rm, idx, memop); 6207 } 6208 6209 /* 6210 * For top half inputs, iterate forward; backward for bottom half. 6211 * This means the store to the destination will not occur until 6212 * overlapping input inputs are consumed. 6213 * Use top_swap to conditionally invert the forward iteration index. 6214 */ 6215 top_swap = top ? 0 : half - 1; 6216 top_half = top ? half : 0; 6217 6218 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) { 6219 int elt = elt_fwd ^ top_swap; 6220 6221 read_vec_element(s, tcg_op1, rn, elt + top_half, memop); 6222 if (idx < 0) { 6223 read_vec_element(s, tcg_op2, rm, elt + top_half, memop); 6224 } 6225 if (acc) { 6226 read_vec_element(s, tcg_op0, rd, elt, memop + 1); 6227 } 6228 fn(tcg_op0, tcg_op1, tcg_op2); 6229 write_vec_element(s, tcg_op0, rd, elt, esz + 1); 6230 } 6231 clear_vec_high(s, 1, rd); 6232 return true; 6233 } 6234 6235 static void gen_muladd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6236 { 6237 TCGv_i64 t = tcg_temp_new_i64(); 6238 tcg_gen_mul_i64(t, n, m); 6239 tcg_gen_add_i64(d, d, t); 6240 } 6241 6242 static void gen_mulsub_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6243 { 6244 TCGv_i64 t = tcg_temp_new_i64(); 6245 tcg_gen_mul_i64(t, n, m); 6246 tcg_gen_sub_i64(d, d, t); 6247 } 6248 6249 TRANS(SMULL_v, do_3op_widening, 6250 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6251 tcg_gen_mul_i64, false) 6252 TRANS(UMULL_v, do_3op_widening, 6253 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6254 tcg_gen_mul_i64, false) 6255 TRANS(SMLAL_v, do_3op_widening, 6256 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6257 gen_muladd_i64, true) 6258 TRANS(UMLAL_v, do_3op_widening, 6259 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6260 gen_muladd_i64, true) 6261 TRANS(SMLSL_v, do_3op_widening, 6262 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6263 gen_mulsub_i64, true) 6264 TRANS(UMLSL_v, do_3op_widening, 6265 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6266 gen_mulsub_i64, true) 6267 6268 TRANS(SMULL_vi, do_3op_widening, 6269 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6270 tcg_gen_mul_i64, false) 6271 TRANS(UMULL_vi, do_3op_widening, 6272 a->esz, a->q, a->rd, a->rn, a->rm, a->idx, 6273 tcg_gen_mul_i64, false) 6274 TRANS(SMLAL_vi, do_3op_widening, 6275 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6276 gen_muladd_i64, true) 6277 TRANS(UMLAL_vi, do_3op_widening, 6278 a->esz, a->q, a->rd, a->rn, a->rm, a->idx, 6279 gen_muladd_i64, true) 6280 TRANS(SMLSL_vi, do_3op_widening, 6281 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6282 gen_mulsub_i64, true) 6283 TRANS(UMLSL_vi, do_3op_widening, 6284 a->esz, a->q, a->rd, a->rn, a->rm, a->idx, 6285 gen_mulsub_i64, true) 6286 6287 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6288 { 6289 TCGv_i64 t1 = tcg_temp_new_i64(); 6290 TCGv_i64 t2 = tcg_temp_new_i64(); 6291 6292 tcg_gen_sub_i64(t1, n, m); 6293 tcg_gen_sub_i64(t2, m, n); 6294 tcg_gen_movcond_i64(TCG_COND_GE, d, n, m, t1, t2); 6295 } 6296 6297 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6298 { 6299 TCGv_i64 t1 = tcg_temp_new_i64(); 6300 TCGv_i64 t2 = tcg_temp_new_i64(); 6301 6302 tcg_gen_sub_i64(t1, n, m); 6303 tcg_gen_sub_i64(t2, m, n); 6304 tcg_gen_movcond_i64(TCG_COND_GEU, d, n, m, t1, t2); 6305 } 6306 6307 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6308 { 6309 TCGv_i64 t = tcg_temp_new_i64(); 6310 gen_sabd_i64(t, n, m); 6311 tcg_gen_add_i64(d, d, t); 6312 } 6313 6314 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6315 { 6316 TCGv_i64 t = tcg_temp_new_i64(); 6317 gen_uabd_i64(t, n, m); 6318 tcg_gen_add_i64(d, d, t); 6319 } 6320 6321 TRANS(SADDL_v, do_3op_widening, 6322 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6323 tcg_gen_add_i64, false) 6324 TRANS(UADDL_v, do_3op_widening, 6325 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6326 tcg_gen_add_i64, false) 6327 TRANS(SSUBL_v, do_3op_widening, 6328 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6329 tcg_gen_sub_i64, false) 6330 TRANS(USUBL_v, do_3op_widening, 6331 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6332 tcg_gen_sub_i64, false) 6333 TRANS(SABDL_v, do_3op_widening, 6334 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6335 gen_sabd_i64, false) 6336 TRANS(UABDL_v, do_3op_widening, 6337 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6338 gen_uabd_i64, false) 6339 TRANS(SABAL_v, do_3op_widening, 6340 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6341 gen_saba_i64, true) 6342 TRANS(UABAL_v, do_3op_widening, 6343 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6344 gen_uaba_i64, true) 6345 6346 static void gen_sqdmull_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6347 { 6348 tcg_gen_mul_i64(d, n, m); 6349 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, d); 6350 } 6351 6352 static void gen_sqdmull_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6353 { 6354 tcg_gen_mul_i64(d, n, m); 6355 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, d); 6356 } 6357 6358 static void gen_sqdmlal_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6359 { 6360 TCGv_i64 t = tcg_temp_new_i64(); 6361 6362 tcg_gen_mul_i64(t, n, m); 6363 gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t); 6364 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t); 6365 } 6366 6367 static void gen_sqdmlal_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6368 { 6369 TCGv_i64 t = tcg_temp_new_i64(); 6370 6371 tcg_gen_mul_i64(t, n, m); 6372 gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t); 6373 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t); 6374 } 6375 6376 static void gen_sqdmlsl_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6377 { 6378 TCGv_i64 t = tcg_temp_new_i64(); 6379 6380 tcg_gen_mul_i64(t, n, m); 6381 gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t); 6382 tcg_gen_neg_i64(t, t); 6383 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t); 6384 } 6385 6386 static void gen_sqdmlsl_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6387 { 6388 TCGv_i64 t = tcg_temp_new_i64(); 6389 6390 tcg_gen_mul_i64(t, n, m); 6391 gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t); 6392 tcg_gen_neg_i64(t, t); 6393 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t); 6394 } 6395 6396 TRANS(SQDMULL_v, do_3op_widening, 6397 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6398 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false) 6399 TRANS(SQDMLAL_v, do_3op_widening, 6400 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6401 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true) 6402 TRANS(SQDMLSL_v, do_3op_widening, 6403 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6404 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true) 6405 6406 TRANS(SQDMULL_vi, do_3op_widening, 6407 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6408 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false) 6409 TRANS(SQDMLAL_vi, do_3op_widening, 6410 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6411 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true) 6412 TRANS(SQDMLSL_vi, do_3op_widening, 6413 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6414 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true) 6415 6416 static bool do_addsub_wide(DisasContext *s, arg_qrrr_e *a, 6417 MemOp sign, bool sub) 6418 { 6419 TCGv_i64 tcg_op0, tcg_op1; 6420 MemOp esz = a->esz; 6421 int half = 8 >> esz; 6422 bool top = a->q; 6423 int top_swap = top ? 0 : half - 1; 6424 int top_half = top ? half : 0; 6425 6426 /* There are no 64x64->128 bit operations. */ 6427 if (esz >= MO_64) { 6428 return false; 6429 } 6430 if (!fp_access_check(s)) { 6431 return true; 6432 } 6433 tcg_op0 = tcg_temp_new_i64(); 6434 tcg_op1 = tcg_temp_new_i64(); 6435 6436 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) { 6437 int elt = elt_fwd ^ top_swap; 6438 6439 read_vec_element(s, tcg_op1, a->rm, elt + top_half, esz | sign); 6440 read_vec_element(s, tcg_op0, a->rn, elt, esz + 1); 6441 if (sub) { 6442 tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1); 6443 } else { 6444 tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1); 6445 } 6446 write_vec_element(s, tcg_op0, a->rd, elt, esz + 1); 6447 } 6448 clear_vec_high(s, 1, a->rd); 6449 return true; 6450 } 6451 6452 TRANS(SADDW, do_addsub_wide, a, MO_SIGN, false) 6453 TRANS(UADDW, do_addsub_wide, a, 0, false) 6454 TRANS(SSUBW, do_addsub_wide, a, MO_SIGN, true) 6455 TRANS(USUBW, do_addsub_wide, a, 0, true) 6456 6457 static bool do_addsub_highnarrow(DisasContext *s, arg_qrrr_e *a, 6458 bool sub, bool round) 6459 { 6460 TCGv_i64 tcg_op0, tcg_op1; 6461 MemOp esz = a->esz; 6462 int half = 8 >> esz; 6463 bool top = a->q; 6464 int ebits = 8 << esz; 6465 uint64_t rbit = 1ull << (ebits - 1); 6466 int top_swap, top_half; 6467 6468 /* There are no 128x128->64 bit operations. */ 6469 if (esz >= MO_64) { 6470 return false; 6471 } 6472 if (!fp_access_check(s)) { 6473 return true; 6474 } 6475 tcg_op0 = tcg_temp_new_i64(); 6476 tcg_op1 = tcg_temp_new_i64(); 6477 6478 /* 6479 * For top half inputs, iterate backward; forward for bottom half. 6480 * This means the store to the destination will not occur until 6481 * overlapping input inputs are consumed. 6482 */ 6483 top_swap = top ? half - 1 : 0; 6484 top_half = top ? half : 0; 6485 6486 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) { 6487 int elt = elt_fwd ^ top_swap; 6488 6489 read_vec_element(s, tcg_op1, a->rm, elt, esz + 1); 6490 read_vec_element(s, tcg_op0, a->rn, elt, esz + 1); 6491 if (sub) { 6492 tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1); 6493 } else { 6494 tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1); 6495 } 6496 if (round) { 6497 tcg_gen_addi_i64(tcg_op0, tcg_op0, rbit); 6498 } 6499 tcg_gen_shri_i64(tcg_op0, tcg_op0, ebits); 6500 write_vec_element(s, tcg_op0, a->rd, elt + top_half, esz); 6501 } 6502 clear_vec_high(s, top, a->rd); 6503 return true; 6504 } 6505 6506 TRANS(ADDHN, do_addsub_highnarrow, a, false, false) 6507 TRANS(SUBHN, do_addsub_highnarrow, a, true, false) 6508 TRANS(RADDHN, do_addsub_highnarrow, a, false, true) 6509 TRANS(RSUBHN, do_addsub_highnarrow, a, true, true) 6510 6511 static bool do_pmull(DisasContext *s, arg_qrrr_e *a, gen_helper_gvec_3 *fn) 6512 { 6513 if (fp_access_check(s)) { 6514 /* The Q field specifies lo/hi half input for these insns. */ 6515 gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->q, fn); 6516 } 6517 return true; 6518 } 6519 6520 TRANS(PMULL_p8, do_pmull, a, gen_helper_neon_pmull_h) 6521 TRANS_FEAT(PMULL_p64, aa64_pmull, do_pmull, a, gen_helper_gvec_pmull_q) 6522 6523 /* 6524 * Advanced SIMD scalar/vector x indexed element 6525 */ 6526 6527 static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f) 6528 { 6529 switch (a->esz) { 6530 case MO_64: 6531 if (fp_access_check(s)) { 6532 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 6533 TCGv_i64 t1 = tcg_temp_new_i64(); 6534 6535 read_vec_element(s, t1, a->rm, a->idx, MO_64); 6536 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); 6537 write_fp_dreg_merging(s, a->rd, a->rn, t0); 6538 } 6539 break; 6540 case MO_32: 6541 if (fp_access_check(s)) { 6542 TCGv_i32 t0 = read_fp_sreg(s, a->rn); 6543 TCGv_i32 t1 = tcg_temp_new_i32(); 6544 6545 read_vec_element_i32(s, t1, a->rm, a->idx, MO_32); 6546 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); 6547 write_fp_sreg_merging(s, a->rd, a->rn, t0); 6548 } 6549 break; 6550 case MO_16: 6551 if (!dc_isar_feature(aa64_fp16, s)) { 6552 return false; 6553 } 6554 if (fp_access_check(s)) { 6555 TCGv_i32 t0 = read_fp_hreg(s, a->rn); 6556 TCGv_i32 t1 = tcg_temp_new_i32(); 6557 6558 read_vec_element_i32(s, t1, a->rm, a->idx, MO_16); 6559 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); 6560 write_fp_hreg_merging(s, a->rd, a->rn, t0); 6561 } 6562 break; 6563 default: 6564 g_assert_not_reached(); 6565 } 6566 return true; 6567 } 6568 6569 TRANS(FMUL_si, do_fp3_scalar_idx, a, &f_scalar_fmul) 6570 TRANS(FMULX_si, do_fp3_scalar_idx, a, &f_scalar_fmulx) 6571 6572 static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) 6573 { 6574 switch (a->esz) { 6575 case MO_64: 6576 if (fp_access_check(s)) { 6577 TCGv_i64 t0 = read_fp_dreg(s, a->rd); 6578 TCGv_i64 t1 = read_fp_dreg(s, a->rn); 6579 TCGv_i64 t2 = tcg_temp_new_i64(); 6580 6581 read_vec_element(s, t2, a->rm, a->idx, MO_64); 6582 if (neg) { 6583 gen_vfp_maybe_ah_negd(s, t1, t1); 6584 } 6585 gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); 6586 write_fp_dreg_merging(s, a->rd, a->rd, t0); 6587 } 6588 break; 6589 case MO_32: 6590 if (fp_access_check(s)) { 6591 TCGv_i32 t0 = read_fp_sreg(s, a->rd); 6592 TCGv_i32 t1 = read_fp_sreg(s, a->rn); 6593 TCGv_i32 t2 = tcg_temp_new_i32(); 6594 6595 read_vec_element_i32(s, t2, a->rm, a->idx, MO_32); 6596 if (neg) { 6597 gen_vfp_maybe_ah_negs(s, t1, t1); 6598 } 6599 gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); 6600 write_fp_sreg_merging(s, a->rd, a->rd, t0); 6601 } 6602 break; 6603 case MO_16: 6604 if (!dc_isar_feature(aa64_fp16, s)) { 6605 return false; 6606 } 6607 if (fp_access_check(s)) { 6608 TCGv_i32 t0 = read_fp_hreg(s, a->rd); 6609 TCGv_i32 t1 = read_fp_hreg(s, a->rn); 6610 TCGv_i32 t2 = tcg_temp_new_i32(); 6611 6612 read_vec_element_i32(s, t2, a->rm, a->idx, MO_16); 6613 if (neg) { 6614 gen_vfp_maybe_ah_negh(s, t1, t1); 6615 } 6616 gen_helper_advsimd_muladdh(t0, t1, t2, t0, 6617 fpstatus_ptr(FPST_A64_F16)); 6618 write_fp_hreg_merging(s, a->rd, a->rd, t0); 6619 } 6620 break; 6621 default: 6622 g_assert_not_reached(); 6623 } 6624 return true; 6625 } 6626 6627 TRANS(FMLA_si, do_fmla_scalar_idx, a, false) 6628 TRANS(FMLS_si, do_fmla_scalar_idx, a, true) 6629 6630 static bool do_env_scalar2_idx_hs(DisasContext *s, arg_rrx_e *a, 6631 const ENVScalar2 *f) 6632 { 6633 if (a->esz < MO_16 || a->esz > MO_32) { 6634 return false; 6635 } 6636 if (fp_access_check(s)) { 6637 TCGv_i32 t0 = tcg_temp_new_i32(); 6638 TCGv_i32 t1 = tcg_temp_new_i32(); 6639 6640 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 6641 read_vec_element_i32(s, t1, a->rm, a->idx, a->esz); 6642 f->gen_bhs[a->esz](t0, tcg_env, t0, t1); 6643 write_fp_sreg(s, a->rd, t0); 6644 } 6645 return true; 6646 } 6647 6648 TRANS(SQDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqdmulh) 6649 TRANS(SQRDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqrdmulh) 6650 6651 static bool do_env_scalar3_idx_hs(DisasContext *s, arg_rrx_e *a, 6652 const ENVScalar3 *f) 6653 { 6654 if (a->esz < MO_16 || a->esz > MO_32) { 6655 return false; 6656 } 6657 if (fp_access_check(s)) { 6658 TCGv_i32 t0 = tcg_temp_new_i32(); 6659 TCGv_i32 t1 = tcg_temp_new_i32(); 6660 TCGv_i32 t2 = tcg_temp_new_i32(); 6661 6662 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 6663 read_vec_element_i32(s, t1, a->rm, a->idx, a->esz); 6664 read_vec_element_i32(s, t2, a->rd, 0, a->esz); 6665 f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2); 6666 write_fp_sreg(s, a->rd, t0); 6667 } 6668 return true; 6669 } 6670 6671 TRANS_FEAT(SQRDMLAH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlah) 6672 TRANS_FEAT(SQRDMLSH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlsh) 6673 6674 static bool do_scalar_muladd_widening_idx(DisasContext *s, arg_rrx_e *a, 6675 NeonGenTwo64OpFn *fn, bool acc) 6676 { 6677 if (fp_access_check(s)) { 6678 TCGv_i64 t0 = tcg_temp_new_i64(); 6679 TCGv_i64 t1 = tcg_temp_new_i64(); 6680 TCGv_i64 t2 = tcg_temp_new_i64(); 6681 6682 if (acc) { 6683 read_vec_element(s, t0, a->rd, 0, a->esz + 1); 6684 } 6685 read_vec_element(s, t1, a->rn, 0, a->esz | MO_SIGN); 6686 read_vec_element(s, t2, a->rm, a->idx, a->esz | MO_SIGN); 6687 fn(t0, t1, t2); 6688 6689 /* Clear the whole register first, then store scalar. */ 6690 clear_vec(s, a->rd); 6691 write_vec_element(s, t0, a->rd, 0, a->esz + 1); 6692 } 6693 return true; 6694 } 6695 6696 TRANS(SQDMULL_si, do_scalar_muladd_widening_idx, a, 6697 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false) 6698 TRANS(SQDMLAL_si, do_scalar_muladd_widening_idx, a, 6699 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true) 6700 TRANS(SQDMLSL_si, do_scalar_muladd_widening_idx, a, 6701 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true) 6702 6703 static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a, 6704 gen_helper_gvec_3_ptr * const fns[3]) 6705 { 6706 MemOp esz = a->esz; 6707 int check = fp_access_check_vector_hsd(s, a->q, esz); 6708 6709 if (check <= 0) { 6710 return check == 0; 6711 } 6712 6713 gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, 6714 esz == MO_16 ? FPST_A64_F16 : FPST_A64, 6715 a->idx, fns[esz - 1]); 6716 return true; 6717 } 6718 6719 static gen_helper_gvec_3_ptr * const f_vector_idx_fmul[3] = { 6720 gen_helper_gvec_fmul_idx_h, 6721 gen_helper_gvec_fmul_idx_s, 6722 gen_helper_gvec_fmul_idx_d, 6723 }; 6724 TRANS(FMUL_vi, do_fp3_vector_idx, a, f_vector_idx_fmul) 6725 6726 static gen_helper_gvec_3_ptr * const f_vector_idx_fmulx[3] = { 6727 gen_helper_gvec_fmulx_idx_h, 6728 gen_helper_gvec_fmulx_idx_s, 6729 gen_helper_gvec_fmulx_idx_d, 6730 }; 6731 TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx) 6732 6733 static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg) 6734 { 6735 static gen_helper_gvec_4_ptr * const fns[3][3] = { 6736 { gen_helper_gvec_fmla_idx_h, 6737 gen_helper_gvec_fmla_idx_s, 6738 gen_helper_gvec_fmla_idx_d }, 6739 { gen_helper_gvec_fmls_idx_h, 6740 gen_helper_gvec_fmls_idx_s, 6741 gen_helper_gvec_fmls_idx_d }, 6742 { gen_helper_gvec_ah_fmls_idx_h, 6743 gen_helper_gvec_ah_fmls_idx_s, 6744 gen_helper_gvec_ah_fmls_idx_d }, 6745 }; 6746 MemOp esz = a->esz; 6747 int check = fp_access_check_vector_hsd(s, a->q, esz); 6748 6749 if (check <= 0) { 6750 return check == 0; 6751 } 6752 6753 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, 6754 esz == MO_16 ? FPST_A64_F16 : FPST_A64, 6755 a->idx, fns[neg ? 1 + s->fpcr_ah : 0][esz - 1]); 6756 return true; 6757 } 6758 6759 TRANS(FMLA_vi, do_fmla_vector_idx, a, false) 6760 TRANS(FMLS_vi, do_fmla_vector_idx, a, true) 6761 6762 static bool do_fmlal_idx(DisasContext *s, arg_qrrx_e *a, bool is_s, bool is_2) 6763 { 6764 if (fp_access_check(s)) { 6765 int data = (a->idx << 2) | (is_2 << 1) | is_s; 6766 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), 6767 vec_full_reg_offset(s, a->rn), 6768 vec_full_reg_offset(s, a->rm), tcg_env, 6769 a->q ? 16 : 8, vec_full_reg_size(s), 6770 data, gen_helper_gvec_fmlal_idx_a64); 6771 } 6772 return true; 6773 } 6774 6775 TRANS_FEAT(FMLAL_vi, aa64_fhm, do_fmlal_idx, a, false, false) 6776 TRANS_FEAT(FMLSL_vi, aa64_fhm, do_fmlal_idx, a, true, false) 6777 TRANS_FEAT(FMLAL2_vi, aa64_fhm, do_fmlal_idx, a, false, true) 6778 TRANS_FEAT(FMLSL2_vi, aa64_fhm, do_fmlal_idx, a, true, true) 6779 6780 static bool do_int3_vector_idx(DisasContext *s, arg_qrrx_e *a, 6781 gen_helper_gvec_3 * const fns[2]) 6782 { 6783 assert(a->esz == MO_16 || a->esz == MO_32); 6784 if (fp_access_check(s)) { 6785 gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, a->idx, fns[a->esz - 1]); 6786 } 6787 return true; 6788 } 6789 6790 static gen_helper_gvec_3 * const f_vector_idx_mul[2] = { 6791 gen_helper_gvec_mul_idx_h, 6792 gen_helper_gvec_mul_idx_s, 6793 }; 6794 TRANS(MUL_vi, do_int3_vector_idx, a, f_vector_idx_mul) 6795 6796 static bool do_mla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool sub) 6797 { 6798 static gen_helper_gvec_4 * const fns[2][2] = { 6799 { gen_helper_gvec_mla_idx_h, gen_helper_gvec_mls_idx_h }, 6800 { gen_helper_gvec_mla_idx_s, gen_helper_gvec_mls_idx_s }, 6801 }; 6802 6803 assert(a->esz == MO_16 || a->esz == MO_32); 6804 if (fp_access_check(s)) { 6805 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, 6806 a->idx, fns[a->esz - 1][sub]); 6807 } 6808 return true; 6809 } 6810 6811 TRANS(MLA_vi, do_mla_vector_idx, a, false) 6812 TRANS(MLS_vi, do_mla_vector_idx, a, true) 6813 6814 static bool do_int3_qc_vector_idx(DisasContext *s, arg_qrrx_e *a, 6815 gen_helper_gvec_4 * const fns[2]) 6816 { 6817 assert(a->esz == MO_16 || a->esz == MO_32); 6818 if (fp_access_check(s)) { 6819 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd), 6820 vec_full_reg_offset(s, a->rn), 6821 vec_full_reg_offset(s, a->rm), 6822 offsetof(CPUARMState, vfp.qc), 6823 a->q ? 16 : 8, vec_full_reg_size(s), 6824 a->idx, fns[a->esz - 1]); 6825 } 6826 return true; 6827 } 6828 6829 static gen_helper_gvec_4 * const f_vector_idx_sqdmulh[2] = { 6830 gen_helper_neon_sqdmulh_idx_h, 6831 gen_helper_neon_sqdmulh_idx_s, 6832 }; 6833 TRANS(SQDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqdmulh) 6834 6835 static gen_helper_gvec_4 * const f_vector_idx_sqrdmulh[2] = { 6836 gen_helper_neon_sqrdmulh_idx_h, 6837 gen_helper_neon_sqrdmulh_idx_s, 6838 }; 6839 TRANS(SQRDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqrdmulh) 6840 6841 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlah[2] = { 6842 gen_helper_neon_sqrdmlah_idx_h, 6843 gen_helper_neon_sqrdmlah_idx_s, 6844 }; 6845 TRANS_FEAT(SQRDMLAH_vi, aa64_rdm, do_int3_qc_vector_idx, a, 6846 f_vector_idx_sqrdmlah) 6847 6848 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlsh[2] = { 6849 gen_helper_neon_sqrdmlsh_idx_h, 6850 gen_helper_neon_sqrdmlsh_idx_s, 6851 }; 6852 TRANS_FEAT(SQRDMLSH_vi, aa64_rdm, do_int3_qc_vector_idx, a, 6853 f_vector_idx_sqrdmlsh) 6854 6855 static bool do_dot_vector_idx(DisasContext *s, arg_qrrx_e *a, 6856 gen_helper_gvec_4 *fn) 6857 { 6858 if (fp_access_check(s)) { 6859 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn); 6860 } 6861 return true; 6862 } 6863 6864 static bool do_dot_vector_idx_env(DisasContext *s, arg_qrrx_e *a, 6865 gen_helper_gvec_4_ptr *fn) 6866 { 6867 if (fp_access_check(s)) { 6868 gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn); 6869 } 6870 return true; 6871 } 6872 6873 TRANS_FEAT(SDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_sdot_idx_b) 6874 TRANS_FEAT(UDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_udot_idx_b) 6875 TRANS_FEAT(SUDOT_vi, aa64_i8mm, do_dot_vector_idx, a, 6876 gen_helper_gvec_sudot_idx_b) 6877 TRANS_FEAT(USDOT_vi, aa64_i8mm, do_dot_vector_idx, a, 6878 gen_helper_gvec_usdot_idx_b) 6879 TRANS_FEAT(BFDOT_vi, aa64_bf16, do_dot_vector_idx_env, a, 6880 gen_helper_gvec_bfdot_idx) 6881 6882 static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a) 6883 { 6884 if (!dc_isar_feature(aa64_bf16, s)) { 6885 return false; 6886 } 6887 if (fp_access_check(s)) { 6888 /* Q bit selects BFMLALB vs BFMLALT. */ 6889 gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, 6890 s->fpcr_ah ? FPST_AH : FPST_A64, 6891 (a->idx << 1) | a->q, 6892 gen_helper_gvec_bfmlal_idx); 6893 } 6894 return true; 6895 } 6896 6897 static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a) 6898 { 6899 gen_helper_gvec_4_ptr *fn; 6900 6901 if (!dc_isar_feature(aa64_fcma, s)) { 6902 return false; 6903 } 6904 switch (a->esz) { 6905 case MO_16: 6906 if (!dc_isar_feature(aa64_fp16, s)) { 6907 return false; 6908 } 6909 fn = gen_helper_gvec_fcmlah_idx; 6910 break; 6911 case MO_32: 6912 fn = gen_helper_gvec_fcmlas_idx; 6913 break; 6914 default: 6915 g_assert_not_reached(); 6916 } 6917 if (fp_access_check(s)) { 6918 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, 6919 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, 6920 (s->fpcr_ah << 4) | (a->idx << 2) | a->rot, fn); 6921 } 6922 return true; 6923 } 6924 6925 /* 6926 * Advanced SIMD scalar pairwise 6927 */ 6928 6929 static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f) 6930 { 6931 switch (a->esz) { 6932 case MO_64: 6933 if (fp_access_check(s)) { 6934 TCGv_i64 t0 = tcg_temp_new_i64(); 6935 TCGv_i64 t1 = tcg_temp_new_i64(); 6936 6937 read_vec_element(s, t0, a->rn, 0, MO_64); 6938 read_vec_element(s, t1, a->rn, 1, MO_64); 6939 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); 6940 write_fp_dreg(s, a->rd, t0); 6941 } 6942 break; 6943 case MO_32: 6944 if (fp_access_check(s)) { 6945 TCGv_i32 t0 = tcg_temp_new_i32(); 6946 TCGv_i32 t1 = tcg_temp_new_i32(); 6947 6948 read_vec_element_i32(s, t0, a->rn, 0, MO_32); 6949 read_vec_element_i32(s, t1, a->rn, 1, MO_32); 6950 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); 6951 write_fp_sreg(s, a->rd, t0); 6952 } 6953 break; 6954 case MO_16: 6955 if (!dc_isar_feature(aa64_fp16, s)) { 6956 return false; 6957 } 6958 if (fp_access_check(s)) { 6959 TCGv_i32 t0 = tcg_temp_new_i32(); 6960 TCGv_i32 t1 = tcg_temp_new_i32(); 6961 6962 read_vec_element_i32(s, t0, a->rn, 0, MO_16); 6963 read_vec_element_i32(s, t1, a->rn, 1, MO_16); 6964 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); 6965 write_fp_sreg(s, a->rd, t0); 6966 } 6967 break; 6968 default: 6969 g_assert_not_reached(); 6970 } 6971 return true; 6972 } 6973 6974 static bool do_fp3_scalar_pair_2fn(DisasContext *s, arg_rr_e *a, 6975 const FPScalar *fnormal, 6976 const FPScalar *fah) 6977 { 6978 return do_fp3_scalar_pair(s, a, s->fpcr_ah ? fah : fnormal); 6979 } 6980 6981 TRANS(FADDP_s, do_fp3_scalar_pair, a, &f_scalar_fadd) 6982 TRANS(FMAXP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah) 6983 TRANS(FMINP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah) 6984 TRANS(FMAXNMP_s, do_fp3_scalar_pair, a, &f_scalar_fmaxnm) 6985 TRANS(FMINNMP_s, do_fp3_scalar_pair, a, &f_scalar_fminnm) 6986 6987 static bool trans_ADDP_s(DisasContext *s, arg_rr_e *a) 6988 { 6989 if (fp_access_check(s)) { 6990 TCGv_i64 t0 = tcg_temp_new_i64(); 6991 TCGv_i64 t1 = tcg_temp_new_i64(); 6992 6993 read_vec_element(s, t0, a->rn, 0, MO_64); 6994 read_vec_element(s, t1, a->rn, 1, MO_64); 6995 tcg_gen_add_i64(t0, t0, t1); 6996 write_fp_dreg(s, a->rd, t0); 6997 } 6998 return true; 6999 } 7000 7001 /* 7002 * Floating-point conditional select 7003 */ 7004 7005 static bool trans_FCSEL(DisasContext *s, arg_FCSEL *a) 7006 { 7007 TCGv_i64 t_true, t_false; 7008 DisasCompare64 c; 7009 int check = fp_access_check_scalar_hsd(s, a->esz); 7010 7011 if (check <= 0) { 7012 return check == 0; 7013 } 7014 7015 /* Zero extend sreg & hreg inputs to 64 bits now. */ 7016 t_true = tcg_temp_new_i64(); 7017 t_false = tcg_temp_new_i64(); 7018 read_vec_element(s, t_true, a->rn, 0, a->esz); 7019 read_vec_element(s, t_false, a->rm, 0, a->esz); 7020 7021 a64_test_cc(&c, a->cond); 7022 tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0), 7023 t_true, t_false); 7024 7025 /* 7026 * Note that sregs & hregs write back zeros to the high bits, 7027 * and we've already done the zero-extension. 7028 */ 7029 write_fp_dreg(s, a->rd, t_true); 7030 return true; 7031 } 7032 7033 /* 7034 * Advanced SIMD Extract 7035 */ 7036 7037 static bool trans_EXT_d(DisasContext *s, arg_EXT_d *a) 7038 { 7039 if (fp_access_check(s)) { 7040 TCGv_i64 lo = read_fp_dreg(s, a->rn); 7041 if (a->imm != 0) { 7042 TCGv_i64 hi = read_fp_dreg(s, a->rm); 7043 tcg_gen_extract2_i64(lo, lo, hi, a->imm * 8); 7044 } 7045 write_fp_dreg(s, a->rd, lo); 7046 } 7047 return true; 7048 } 7049 7050 static bool trans_EXT_q(DisasContext *s, arg_EXT_q *a) 7051 { 7052 TCGv_i64 lo, hi; 7053 int pos = (a->imm & 7) * 8; 7054 int elt = a->imm >> 3; 7055 7056 if (!fp_access_check(s)) { 7057 return true; 7058 } 7059 7060 lo = tcg_temp_new_i64(); 7061 hi = tcg_temp_new_i64(); 7062 7063 read_vec_element(s, lo, a->rn, elt, MO_64); 7064 elt++; 7065 read_vec_element(s, hi, elt & 2 ? a->rm : a->rn, elt & 1, MO_64); 7066 elt++; 7067 7068 if (pos != 0) { 7069 TCGv_i64 hh = tcg_temp_new_i64(); 7070 tcg_gen_extract2_i64(lo, lo, hi, pos); 7071 read_vec_element(s, hh, a->rm, elt & 1, MO_64); 7072 tcg_gen_extract2_i64(hi, hi, hh, pos); 7073 } 7074 7075 write_vec_element(s, lo, a->rd, 0, MO_64); 7076 write_vec_element(s, hi, a->rd, 1, MO_64); 7077 clear_vec_high(s, true, a->rd); 7078 return true; 7079 } 7080 7081 /* 7082 * Floating-point data-processing (3 source) 7083 */ 7084 7085 static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) 7086 { 7087 TCGv_ptr fpst; 7088 7089 /* 7090 * These are fused multiply-add. Note that doing the negations here 7091 * as separate steps is correct: an input NaN should come out with 7092 * its sign bit flipped if it is a negated-input. 7093 */ 7094 switch (a->esz) { 7095 case MO_64: 7096 if (fp_access_check(s)) { 7097 TCGv_i64 tn = read_fp_dreg(s, a->rn); 7098 TCGv_i64 tm = read_fp_dreg(s, a->rm); 7099 TCGv_i64 ta = read_fp_dreg(s, a->ra); 7100 7101 if (neg_a) { 7102 gen_vfp_maybe_ah_negd(s, ta, ta); 7103 } 7104 if (neg_n) { 7105 gen_vfp_maybe_ah_negd(s, tn, tn); 7106 } 7107 fpst = fpstatus_ptr(FPST_A64); 7108 gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst); 7109 write_fp_dreg_merging(s, a->rd, a->ra, ta); 7110 } 7111 break; 7112 7113 case MO_32: 7114 if (fp_access_check(s)) { 7115 TCGv_i32 tn = read_fp_sreg(s, a->rn); 7116 TCGv_i32 tm = read_fp_sreg(s, a->rm); 7117 TCGv_i32 ta = read_fp_sreg(s, a->ra); 7118 7119 if (neg_a) { 7120 gen_vfp_maybe_ah_negs(s, ta, ta); 7121 } 7122 if (neg_n) { 7123 gen_vfp_maybe_ah_negs(s, tn, tn); 7124 } 7125 fpst = fpstatus_ptr(FPST_A64); 7126 gen_helper_vfp_muladds(ta, tn, tm, ta, fpst); 7127 write_fp_sreg_merging(s, a->rd, a->ra, ta); 7128 } 7129 break; 7130 7131 case MO_16: 7132 if (!dc_isar_feature(aa64_fp16, s)) { 7133 return false; 7134 } 7135 if (fp_access_check(s)) { 7136 TCGv_i32 tn = read_fp_hreg(s, a->rn); 7137 TCGv_i32 tm = read_fp_hreg(s, a->rm); 7138 TCGv_i32 ta = read_fp_hreg(s, a->ra); 7139 7140 if (neg_a) { 7141 gen_vfp_maybe_ah_negh(s, ta, ta); 7142 } 7143 if (neg_n) { 7144 gen_vfp_maybe_ah_negh(s, tn, tn); 7145 } 7146 fpst = fpstatus_ptr(FPST_A64_F16); 7147 gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst); 7148 write_fp_hreg_merging(s, a->rd, a->ra, ta); 7149 } 7150 break; 7151 7152 default: 7153 return false; 7154 } 7155 return true; 7156 } 7157 7158 TRANS(FMADD, do_fmadd, a, false, false) 7159 TRANS(FNMADD, do_fmadd, a, true, true) 7160 TRANS(FMSUB, do_fmadd, a, false, true) 7161 TRANS(FNMSUB, do_fmadd, a, true, false) 7162 7163 /* 7164 * Advanced SIMD Across Lanes 7165 */ 7166 7167 static bool do_int_reduction(DisasContext *s, arg_qrr_e *a, bool widen, 7168 MemOp src_sign, NeonGenTwo64OpFn *fn) 7169 { 7170 TCGv_i64 tcg_res, tcg_elt; 7171 MemOp src_mop = a->esz | src_sign; 7172 int elements = (a->q ? 16 : 8) >> a->esz; 7173 7174 /* Reject MO_64, and MO_32 without Q: a minimum of 4 elements. */ 7175 if (elements < 4) { 7176 return false; 7177 } 7178 if (!fp_access_check(s)) { 7179 return true; 7180 } 7181 7182 tcg_res = tcg_temp_new_i64(); 7183 tcg_elt = tcg_temp_new_i64(); 7184 7185 read_vec_element(s, tcg_res, a->rn, 0, src_mop); 7186 for (int i = 1; i < elements; i++) { 7187 read_vec_element(s, tcg_elt, a->rn, i, src_mop); 7188 fn(tcg_res, tcg_res, tcg_elt); 7189 } 7190 7191 tcg_gen_ext_i64(tcg_res, tcg_res, a->esz + widen); 7192 write_fp_dreg(s, a->rd, tcg_res); 7193 return true; 7194 } 7195 7196 TRANS(ADDV, do_int_reduction, a, false, 0, tcg_gen_add_i64) 7197 TRANS(SADDLV, do_int_reduction, a, true, MO_SIGN, tcg_gen_add_i64) 7198 TRANS(UADDLV, do_int_reduction, a, true, 0, tcg_gen_add_i64) 7199 TRANS(SMAXV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smax_i64) 7200 TRANS(UMAXV, do_int_reduction, a, false, 0, tcg_gen_umax_i64) 7201 TRANS(SMINV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smin_i64) 7202 TRANS(UMINV, do_int_reduction, a, false, 0, tcg_gen_umin_i64) 7203 7204 /* 7205 * do_fp_reduction helper 7206 * 7207 * This mirrors the Reduce() pseudocode in the ARM ARM. It is 7208 * important for correct NaN propagation that we do these 7209 * operations in exactly the order specified by the pseudocode. 7210 * 7211 * This is a recursive function. 7212 */ 7213 static TCGv_i32 do_reduction_op(DisasContext *s, int rn, MemOp esz, 7214 int ebase, int ecount, TCGv_ptr fpst, 7215 NeonGenTwoSingleOpFn *fn) 7216 { 7217 if (ecount == 1) { 7218 TCGv_i32 tcg_elem = tcg_temp_new_i32(); 7219 read_vec_element_i32(s, tcg_elem, rn, ebase, esz); 7220 return tcg_elem; 7221 } else { 7222 int half = ecount >> 1; 7223 TCGv_i32 tcg_hi, tcg_lo, tcg_res; 7224 7225 tcg_hi = do_reduction_op(s, rn, esz, ebase + half, half, fpst, fn); 7226 tcg_lo = do_reduction_op(s, rn, esz, ebase, half, fpst, fn); 7227 tcg_res = tcg_temp_new_i32(); 7228 7229 fn(tcg_res, tcg_lo, tcg_hi, fpst); 7230 return tcg_res; 7231 } 7232 } 7233 7234 static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a, 7235 NeonGenTwoSingleOpFn *fnormal, 7236 NeonGenTwoSingleOpFn *fah) 7237 { 7238 if (fp_access_check(s)) { 7239 MemOp esz = a->esz; 7240 int elts = (a->q ? 16 : 8) >> esz; 7241 TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); 7242 TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst, 7243 s->fpcr_ah ? fah : fnormal); 7244 write_fp_sreg(s, a->rd, res); 7245 } 7246 return true; 7247 } 7248 7249 TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a, 7250 gen_helper_vfp_maxnumh, gen_helper_vfp_maxnumh) 7251 TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a, 7252 gen_helper_vfp_minnumh, gen_helper_vfp_minnumh) 7253 TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a, 7254 gen_helper_vfp_maxh, gen_helper_vfp_ah_maxh) 7255 TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a, 7256 gen_helper_vfp_minh, gen_helper_vfp_ah_minh) 7257 7258 TRANS(FMAXNMV_s, do_fp_reduction, a, 7259 gen_helper_vfp_maxnums, gen_helper_vfp_maxnums) 7260 TRANS(FMINNMV_s, do_fp_reduction, a, 7261 gen_helper_vfp_minnums, gen_helper_vfp_minnums) 7262 TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs, gen_helper_vfp_ah_maxs) 7263 TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins, gen_helper_vfp_ah_mins) 7264 7265 /* 7266 * Floating-point Immediate 7267 */ 7268 7269 static bool trans_FMOVI_s(DisasContext *s, arg_FMOVI_s *a) 7270 { 7271 int check = fp_access_check_scalar_hsd(s, a->esz); 7272 uint64_t imm; 7273 7274 if (check <= 0) { 7275 return check == 0; 7276 } 7277 7278 imm = vfp_expand_imm(a->esz, a->imm); 7279 write_fp_dreg(s, a->rd, tcg_constant_i64(imm)); 7280 return true; 7281 } 7282 7283 /* 7284 * Floating point compare, conditional compare 7285 */ 7286 7287 static void handle_fp_compare(DisasContext *s, int size, 7288 unsigned int rn, unsigned int rm, 7289 bool cmp_with_zero, bool signal_all_nans) 7290 { 7291 TCGv_i64 tcg_flags = tcg_temp_new_i64(); 7292 TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_A64_F16 : FPST_A64); 7293 7294 if (size == MO_64) { 7295 TCGv_i64 tcg_vn, tcg_vm; 7296 7297 tcg_vn = read_fp_dreg(s, rn); 7298 if (cmp_with_zero) { 7299 tcg_vm = tcg_constant_i64(0); 7300 } else { 7301 tcg_vm = read_fp_dreg(s, rm); 7302 } 7303 if (signal_all_nans) { 7304 gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 7305 } else { 7306 gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 7307 } 7308 } else { 7309 TCGv_i32 tcg_vn = tcg_temp_new_i32(); 7310 TCGv_i32 tcg_vm = tcg_temp_new_i32(); 7311 7312 read_vec_element_i32(s, tcg_vn, rn, 0, size); 7313 if (cmp_with_zero) { 7314 tcg_gen_movi_i32(tcg_vm, 0); 7315 } else { 7316 read_vec_element_i32(s, tcg_vm, rm, 0, size); 7317 } 7318 7319 switch (size) { 7320 case MO_32: 7321 if (signal_all_nans) { 7322 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 7323 } else { 7324 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 7325 } 7326 break; 7327 case MO_16: 7328 if (signal_all_nans) { 7329 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 7330 } else { 7331 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 7332 } 7333 break; 7334 default: 7335 g_assert_not_reached(); 7336 } 7337 } 7338 7339 gen_set_nzcv(tcg_flags); 7340 } 7341 7342 /* FCMP, FCMPE */ 7343 static bool trans_FCMP(DisasContext *s, arg_FCMP *a) 7344 { 7345 int check = fp_access_check_scalar_hsd(s, a->esz); 7346 7347 if (check <= 0) { 7348 return check == 0; 7349 } 7350 7351 handle_fp_compare(s, a->esz, a->rn, a->rm, a->z, a->e); 7352 return true; 7353 } 7354 7355 /* FCCMP, FCCMPE */ 7356 static bool trans_FCCMP(DisasContext *s, arg_FCCMP *a) 7357 { 7358 TCGLabel *label_continue = NULL; 7359 int check = fp_access_check_scalar_hsd(s, a->esz); 7360 7361 if (check <= 0) { 7362 return check == 0; 7363 } 7364 7365 if (a->cond < 0x0e) { /* not always */ 7366 TCGLabel *label_match = gen_new_label(); 7367 label_continue = gen_new_label(); 7368 arm_gen_test_cc(a->cond, label_match); 7369 /* nomatch: */ 7370 gen_set_nzcv(tcg_constant_i64(a->nzcv << 28)); 7371 tcg_gen_br(label_continue); 7372 gen_set_label(label_match); 7373 } 7374 7375 handle_fp_compare(s, a->esz, a->rn, a->rm, false, a->e); 7376 7377 if (label_continue) { 7378 gen_set_label(label_continue); 7379 } 7380 return true; 7381 } 7382 7383 /* 7384 * Advanced SIMD Modified Immediate 7385 */ 7386 7387 static bool trans_FMOVI_v_h(DisasContext *s, arg_FMOVI_v_h *a) 7388 { 7389 if (!dc_isar_feature(aa64_fp16, s)) { 7390 return false; 7391 } 7392 if (fp_access_check(s)) { 7393 tcg_gen_gvec_dup_imm(MO_16, vec_full_reg_offset(s, a->rd), 7394 a->q ? 16 : 8, vec_full_reg_size(s), 7395 vfp_expand_imm(MO_16, a->abcdefgh)); 7396 } 7397 return true; 7398 } 7399 7400 static void gen_movi(unsigned vece, uint32_t dofs, uint32_t aofs, 7401 int64_t c, uint32_t oprsz, uint32_t maxsz) 7402 { 7403 tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c); 7404 } 7405 7406 static bool trans_Vimm(DisasContext *s, arg_Vimm *a) 7407 { 7408 GVecGen2iFn *fn; 7409 7410 /* Handle decode of cmode/op here between ORR/BIC/MOVI */ 7411 if ((a->cmode & 1) && a->cmode < 12) { 7412 /* For op=1, the imm will be inverted, so BIC becomes AND. */ 7413 fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori; 7414 } else { 7415 /* There is one unallocated cmode/op combination in this space */ 7416 if (a->cmode == 15 && a->op == 1 && a->q == 0) { 7417 return false; 7418 } 7419 fn = gen_movi; 7420 } 7421 7422 if (fp_access_check(s)) { 7423 uint64_t imm = asimd_imm_const(a->abcdefgh, a->cmode, a->op); 7424 gen_gvec_fn2i(s, a->q, a->rd, a->rd, imm, fn, MO_64); 7425 } 7426 return true; 7427 } 7428 7429 /* 7430 * Advanced SIMD Shift by Immediate 7431 */ 7432 7433 static bool do_vec_shift_imm(DisasContext *s, arg_qrri_e *a, GVecGen2iFn *fn) 7434 { 7435 if (fp_access_check(s)) { 7436 gen_gvec_fn2i(s, a->q, a->rd, a->rn, a->imm, fn, a->esz); 7437 } 7438 return true; 7439 } 7440 7441 TRANS(SSHR_v, do_vec_shift_imm, a, gen_gvec_sshr) 7442 TRANS(USHR_v, do_vec_shift_imm, a, gen_gvec_ushr) 7443 TRANS(SSRA_v, do_vec_shift_imm, a, gen_gvec_ssra) 7444 TRANS(USRA_v, do_vec_shift_imm, a, gen_gvec_usra) 7445 TRANS(SRSHR_v, do_vec_shift_imm, a, gen_gvec_srshr) 7446 TRANS(URSHR_v, do_vec_shift_imm, a, gen_gvec_urshr) 7447 TRANS(SRSRA_v, do_vec_shift_imm, a, gen_gvec_srsra) 7448 TRANS(URSRA_v, do_vec_shift_imm, a, gen_gvec_ursra) 7449 TRANS(SRI_v, do_vec_shift_imm, a, gen_gvec_sri) 7450 TRANS(SHL_v, do_vec_shift_imm, a, tcg_gen_gvec_shli) 7451 TRANS(SLI_v, do_vec_shift_imm, a, gen_gvec_sli); 7452 TRANS(SQSHL_vi, do_vec_shift_imm, a, gen_neon_sqshli) 7453 TRANS(UQSHL_vi, do_vec_shift_imm, a, gen_neon_uqshli) 7454 TRANS(SQSHLU_vi, do_vec_shift_imm, a, gen_neon_sqshlui) 7455 7456 static bool do_vec_shift_imm_wide(DisasContext *s, arg_qrri_e *a, bool is_u) 7457 { 7458 TCGv_i64 tcg_rn, tcg_rd; 7459 int esz = a->esz; 7460 int esize; 7461 7462 if (!fp_access_check(s)) { 7463 return true; 7464 } 7465 7466 /* 7467 * For the LL variants the store is larger than the load, 7468 * so if rd == rn we would overwrite parts of our input. 7469 * So load everything right now and use shifts in the main loop. 7470 */ 7471 tcg_rd = tcg_temp_new_i64(); 7472 tcg_rn = tcg_temp_new_i64(); 7473 read_vec_element(s, tcg_rn, a->rn, a->q, MO_64); 7474 7475 esize = 8 << esz; 7476 for (int i = 0, elements = 8 >> esz; i < elements; i++) { 7477 if (is_u) { 7478 tcg_gen_extract_i64(tcg_rd, tcg_rn, i * esize, esize); 7479 } else { 7480 tcg_gen_sextract_i64(tcg_rd, tcg_rn, i * esize, esize); 7481 } 7482 tcg_gen_shli_i64(tcg_rd, tcg_rd, a->imm); 7483 write_vec_element(s, tcg_rd, a->rd, i, esz + 1); 7484 } 7485 clear_vec_high(s, true, a->rd); 7486 return true; 7487 } 7488 7489 TRANS(SSHLL_v, do_vec_shift_imm_wide, a, false) 7490 TRANS(USHLL_v, do_vec_shift_imm_wide, a, true) 7491 7492 static void gen_sshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7493 { 7494 assert(shift >= 0 && shift <= 64); 7495 tcg_gen_sari_i64(dst, src, MIN(shift, 63)); 7496 } 7497 7498 static void gen_ushr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7499 { 7500 assert(shift >= 0 && shift <= 64); 7501 if (shift == 64) { 7502 tcg_gen_movi_i64(dst, 0); 7503 } else { 7504 tcg_gen_shri_i64(dst, src, shift); 7505 } 7506 } 7507 7508 static void gen_ssra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7509 { 7510 gen_sshr_d(src, src, shift); 7511 tcg_gen_add_i64(dst, dst, src); 7512 } 7513 7514 static void gen_usra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7515 { 7516 gen_ushr_d(src, src, shift); 7517 tcg_gen_add_i64(dst, dst, src); 7518 } 7519 7520 static void gen_srshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7521 { 7522 assert(shift >= 0 && shift <= 32); 7523 if (shift) { 7524 TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1)); 7525 tcg_gen_add_i64(dst, src, rnd); 7526 tcg_gen_sari_i64(dst, dst, shift); 7527 } else { 7528 tcg_gen_mov_i64(dst, src); 7529 } 7530 } 7531 7532 static void gen_urshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7533 { 7534 assert(shift >= 0 && shift <= 32); 7535 if (shift) { 7536 TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1)); 7537 tcg_gen_add_i64(dst, src, rnd); 7538 tcg_gen_shri_i64(dst, dst, shift); 7539 } else { 7540 tcg_gen_mov_i64(dst, src); 7541 } 7542 } 7543 7544 static void gen_srshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7545 { 7546 assert(shift >= 0 && shift <= 64); 7547 if (shift == 0) { 7548 tcg_gen_mov_i64(dst, src); 7549 } else if (shift == 64) { 7550 /* Extension of sign bit (0,-1) plus sign bit (0,1) is zero. */ 7551 tcg_gen_movi_i64(dst, 0); 7552 } else { 7553 TCGv_i64 rnd = tcg_temp_new_i64(); 7554 tcg_gen_extract_i64(rnd, src, shift - 1, 1); 7555 tcg_gen_sari_i64(dst, src, shift); 7556 tcg_gen_add_i64(dst, dst, rnd); 7557 } 7558 } 7559 7560 static void gen_urshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7561 { 7562 assert(shift >= 0 && shift <= 64); 7563 if (shift == 0) { 7564 tcg_gen_mov_i64(dst, src); 7565 } else if (shift == 64) { 7566 /* Rounding will propagate bit 63 into bit 64. */ 7567 tcg_gen_shri_i64(dst, src, 63); 7568 } else { 7569 TCGv_i64 rnd = tcg_temp_new_i64(); 7570 tcg_gen_extract_i64(rnd, src, shift - 1, 1); 7571 tcg_gen_shri_i64(dst, src, shift); 7572 tcg_gen_add_i64(dst, dst, rnd); 7573 } 7574 } 7575 7576 static void gen_srsra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7577 { 7578 gen_srshr_d(src, src, shift); 7579 tcg_gen_add_i64(dst, dst, src); 7580 } 7581 7582 static void gen_ursra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7583 { 7584 gen_urshr_d(src, src, shift); 7585 tcg_gen_add_i64(dst, dst, src); 7586 } 7587 7588 static void gen_sri_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7589 { 7590 /* If shift is 64, dst is unchanged. */ 7591 if (shift != 64) { 7592 tcg_gen_shri_i64(src, src, shift); 7593 tcg_gen_deposit_i64(dst, dst, src, 0, 64 - shift); 7594 } 7595 } 7596 7597 static void gen_sli_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7598 { 7599 tcg_gen_deposit_i64(dst, dst, src, shift, 64 - shift); 7600 } 7601 7602 static bool do_vec_shift_imm_narrow(DisasContext *s, arg_qrri_e *a, 7603 WideShiftImmFn * const fns[3], MemOp sign) 7604 { 7605 TCGv_i64 tcg_rn, tcg_rd; 7606 int esz = a->esz; 7607 int esize; 7608 WideShiftImmFn *fn; 7609 7610 tcg_debug_assert(esz >= MO_8 && esz <= MO_32); 7611 7612 if (!fp_access_check(s)) { 7613 return true; 7614 } 7615 7616 tcg_rn = tcg_temp_new_i64(); 7617 tcg_rd = tcg_temp_new_i64(); 7618 tcg_gen_movi_i64(tcg_rd, 0); 7619 7620 fn = fns[esz]; 7621 esize = 8 << esz; 7622 for (int i = 0, elements = 8 >> esz; i < elements; i++) { 7623 read_vec_element(s, tcg_rn, a->rn, i, (esz + 1) | sign); 7624 fn(tcg_rn, tcg_rn, a->imm); 7625 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, esize * i, esize); 7626 } 7627 7628 write_vec_element(s, tcg_rd, a->rd, a->q, MO_64); 7629 clear_vec_high(s, a->q, a->rd); 7630 return true; 7631 } 7632 7633 static void gen_sqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7634 { 7635 tcg_gen_sari_i64(d, s, i); 7636 tcg_gen_ext16u_i64(d, d); 7637 gen_helper_neon_narrow_sat_s8(d, tcg_env, d); 7638 } 7639 7640 static void gen_sqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7641 { 7642 tcg_gen_sari_i64(d, s, i); 7643 tcg_gen_ext32u_i64(d, d); 7644 gen_helper_neon_narrow_sat_s16(d, tcg_env, d); 7645 } 7646 7647 static void gen_sqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7648 { 7649 gen_sshr_d(d, s, i); 7650 gen_helper_neon_narrow_sat_s32(d, tcg_env, d); 7651 } 7652 7653 static void gen_uqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7654 { 7655 tcg_gen_shri_i64(d, s, i); 7656 gen_helper_neon_narrow_sat_u8(d, tcg_env, d); 7657 } 7658 7659 static void gen_uqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7660 { 7661 tcg_gen_shri_i64(d, s, i); 7662 gen_helper_neon_narrow_sat_u16(d, tcg_env, d); 7663 } 7664 7665 static void gen_uqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7666 { 7667 gen_ushr_d(d, s, i); 7668 gen_helper_neon_narrow_sat_u32(d, tcg_env, d); 7669 } 7670 7671 static void gen_sqshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7672 { 7673 tcg_gen_sari_i64(d, s, i); 7674 tcg_gen_ext16u_i64(d, d); 7675 gen_helper_neon_unarrow_sat8(d, tcg_env, d); 7676 } 7677 7678 static void gen_sqshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7679 { 7680 tcg_gen_sari_i64(d, s, i); 7681 tcg_gen_ext32u_i64(d, d); 7682 gen_helper_neon_unarrow_sat16(d, tcg_env, d); 7683 } 7684 7685 static void gen_sqshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7686 { 7687 gen_sshr_d(d, s, i); 7688 gen_helper_neon_unarrow_sat32(d, tcg_env, d); 7689 } 7690 7691 static void gen_sqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7692 { 7693 gen_srshr_bhs(d, s, i); 7694 tcg_gen_ext16u_i64(d, d); 7695 gen_helper_neon_narrow_sat_s8(d, tcg_env, d); 7696 } 7697 7698 static void gen_sqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7699 { 7700 gen_srshr_bhs(d, s, i); 7701 tcg_gen_ext32u_i64(d, d); 7702 gen_helper_neon_narrow_sat_s16(d, tcg_env, d); 7703 } 7704 7705 static void gen_sqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7706 { 7707 gen_srshr_d(d, s, i); 7708 gen_helper_neon_narrow_sat_s32(d, tcg_env, d); 7709 } 7710 7711 static void gen_uqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7712 { 7713 gen_urshr_bhs(d, s, i); 7714 gen_helper_neon_narrow_sat_u8(d, tcg_env, d); 7715 } 7716 7717 static void gen_uqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7718 { 7719 gen_urshr_bhs(d, s, i); 7720 gen_helper_neon_narrow_sat_u16(d, tcg_env, d); 7721 } 7722 7723 static void gen_uqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7724 { 7725 gen_urshr_d(d, s, i); 7726 gen_helper_neon_narrow_sat_u32(d, tcg_env, d); 7727 } 7728 7729 static void gen_sqrshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7730 { 7731 gen_srshr_bhs(d, s, i); 7732 tcg_gen_ext16u_i64(d, d); 7733 gen_helper_neon_unarrow_sat8(d, tcg_env, d); 7734 } 7735 7736 static void gen_sqrshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7737 { 7738 gen_srshr_bhs(d, s, i); 7739 tcg_gen_ext32u_i64(d, d); 7740 gen_helper_neon_unarrow_sat16(d, tcg_env, d); 7741 } 7742 7743 static void gen_sqrshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7744 { 7745 gen_srshr_d(d, s, i); 7746 gen_helper_neon_unarrow_sat32(d, tcg_env, d); 7747 } 7748 7749 static WideShiftImmFn * const shrn_fns[] = { 7750 tcg_gen_shri_i64, 7751 tcg_gen_shri_i64, 7752 gen_ushr_d, 7753 }; 7754 TRANS(SHRN_v, do_vec_shift_imm_narrow, a, shrn_fns, 0) 7755 7756 static WideShiftImmFn * const rshrn_fns[] = { 7757 gen_urshr_bhs, 7758 gen_urshr_bhs, 7759 gen_urshr_d, 7760 }; 7761 TRANS(RSHRN_v, do_vec_shift_imm_narrow, a, rshrn_fns, 0) 7762 7763 static WideShiftImmFn * const sqshrn_fns[] = { 7764 gen_sqshrn_b, 7765 gen_sqshrn_h, 7766 gen_sqshrn_s, 7767 }; 7768 TRANS(SQSHRN_v, do_vec_shift_imm_narrow, a, sqshrn_fns, MO_SIGN) 7769 7770 static WideShiftImmFn * const uqshrn_fns[] = { 7771 gen_uqshrn_b, 7772 gen_uqshrn_h, 7773 gen_uqshrn_s, 7774 }; 7775 TRANS(UQSHRN_v, do_vec_shift_imm_narrow, a, uqshrn_fns, 0) 7776 7777 static WideShiftImmFn * const sqshrun_fns[] = { 7778 gen_sqshrun_b, 7779 gen_sqshrun_h, 7780 gen_sqshrun_s, 7781 }; 7782 TRANS(SQSHRUN_v, do_vec_shift_imm_narrow, a, sqshrun_fns, MO_SIGN) 7783 7784 static WideShiftImmFn * const sqrshrn_fns[] = { 7785 gen_sqrshrn_b, 7786 gen_sqrshrn_h, 7787 gen_sqrshrn_s, 7788 }; 7789 TRANS(SQRSHRN_v, do_vec_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN) 7790 7791 static WideShiftImmFn * const uqrshrn_fns[] = { 7792 gen_uqrshrn_b, 7793 gen_uqrshrn_h, 7794 gen_uqrshrn_s, 7795 }; 7796 TRANS(UQRSHRN_v, do_vec_shift_imm_narrow, a, uqrshrn_fns, 0) 7797 7798 static WideShiftImmFn * const sqrshrun_fns[] = { 7799 gen_sqrshrun_b, 7800 gen_sqrshrun_h, 7801 gen_sqrshrun_s, 7802 }; 7803 TRANS(SQRSHRUN_v, do_vec_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN) 7804 7805 /* 7806 * Advanced SIMD Scalar Shift by Immediate 7807 */ 7808 7809 static bool do_scalar_shift_imm(DisasContext *s, arg_rri_e *a, 7810 WideShiftImmFn *fn, bool accumulate, 7811 MemOp sign) 7812 { 7813 if (fp_access_check(s)) { 7814 TCGv_i64 rd = tcg_temp_new_i64(); 7815 TCGv_i64 rn = tcg_temp_new_i64(); 7816 7817 read_vec_element(s, rn, a->rn, 0, a->esz | sign); 7818 if (accumulate) { 7819 read_vec_element(s, rd, a->rd, 0, a->esz | sign); 7820 } 7821 fn(rd, rn, a->imm); 7822 write_fp_dreg(s, a->rd, rd); 7823 } 7824 return true; 7825 } 7826 7827 TRANS(SSHR_s, do_scalar_shift_imm, a, gen_sshr_d, false, 0) 7828 TRANS(USHR_s, do_scalar_shift_imm, a, gen_ushr_d, false, 0) 7829 TRANS(SSRA_s, do_scalar_shift_imm, a, gen_ssra_d, true, 0) 7830 TRANS(USRA_s, do_scalar_shift_imm, a, gen_usra_d, true, 0) 7831 TRANS(SRSHR_s, do_scalar_shift_imm, a, gen_srshr_d, false, 0) 7832 TRANS(URSHR_s, do_scalar_shift_imm, a, gen_urshr_d, false, 0) 7833 TRANS(SRSRA_s, do_scalar_shift_imm, a, gen_srsra_d, true, 0) 7834 TRANS(URSRA_s, do_scalar_shift_imm, a, gen_ursra_d, true, 0) 7835 TRANS(SRI_s, do_scalar_shift_imm, a, gen_sri_d, true, 0) 7836 7837 TRANS(SHL_s, do_scalar_shift_imm, a, tcg_gen_shli_i64, false, 0) 7838 TRANS(SLI_s, do_scalar_shift_imm, a, gen_sli_d, true, 0) 7839 7840 static void trunc_i64_env_imm(TCGv_i64 d, TCGv_i64 s, int64_t i, 7841 NeonGenTwoOpEnvFn *fn) 7842 { 7843 TCGv_i32 t = tcg_temp_new_i32(); 7844 tcg_gen_extrl_i64_i32(t, s); 7845 fn(t, tcg_env, t, tcg_constant_i32(i)); 7846 tcg_gen_extu_i32_i64(d, t); 7847 } 7848 7849 static void gen_sqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7850 { 7851 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s8); 7852 } 7853 7854 static void gen_sqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7855 { 7856 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s16); 7857 } 7858 7859 static void gen_sqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7860 { 7861 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s32); 7862 } 7863 7864 static void gen_sqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i) 7865 { 7866 gen_helper_neon_qshl_s64(d, tcg_env, s, tcg_constant_i64(i)); 7867 } 7868 7869 static void gen_uqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7870 { 7871 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u8); 7872 } 7873 7874 static void gen_uqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7875 { 7876 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u16); 7877 } 7878 7879 static void gen_uqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7880 { 7881 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u32); 7882 } 7883 7884 static void gen_uqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i) 7885 { 7886 gen_helper_neon_qshl_u64(d, tcg_env, s, tcg_constant_i64(i)); 7887 } 7888 7889 static void gen_sqshlui_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7890 { 7891 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s8); 7892 } 7893 7894 static void gen_sqshlui_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7895 { 7896 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s16); 7897 } 7898 7899 static void gen_sqshlui_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7900 { 7901 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s32); 7902 } 7903 7904 static void gen_sqshlui_d(TCGv_i64 d, TCGv_i64 s, int64_t i) 7905 { 7906 gen_helper_neon_qshlu_s64(d, tcg_env, s, tcg_constant_i64(i)); 7907 } 7908 7909 static WideShiftImmFn * const f_scalar_sqshli[] = { 7910 gen_sqshli_b, gen_sqshli_h, gen_sqshli_s, gen_sqshli_d 7911 }; 7912 7913 static WideShiftImmFn * const f_scalar_uqshli[] = { 7914 gen_uqshli_b, gen_uqshli_h, gen_uqshli_s, gen_uqshli_d 7915 }; 7916 7917 static WideShiftImmFn * const f_scalar_sqshlui[] = { 7918 gen_sqshlui_b, gen_sqshlui_h, gen_sqshlui_s, gen_sqshlui_d 7919 }; 7920 7921 /* Note that the helpers sign-extend their inputs, so don't do it here. */ 7922 TRANS(SQSHL_si, do_scalar_shift_imm, a, f_scalar_sqshli[a->esz], false, 0) 7923 TRANS(UQSHL_si, do_scalar_shift_imm, a, f_scalar_uqshli[a->esz], false, 0) 7924 TRANS(SQSHLU_si, do_scalar_shift_imm, a, f_scalar_sqshlui[a->esz], false, 0) 7925 7926 static bool do_scalar_shift_imm_narrow(DisasContext *s, arg_rri_e *a, 7927 WideShiftImmFn * const fns[3], 7928 MemOp sign, bool zext) 7929 { 7930 MemOp esz = a->esz; 7931 7932 tcg_debug_assert(esz >= MO_8 && esz <= MO_32); 7933 7934 if (fp_access_check(s)) { 7935 TCGv_i64 rd = tcg_temp_new_i64(); 7936 TCGv_i64 rn = tcg_temp_new_i64(); 7937 7938 read_vec_element(s, rn, a->rn, 0, (esz + 1) | sign); 7939 fns[esz](rd, rn, a->imm); 7940 if (zext) { 7941 tcg_gen_ext_i64(rd, rd, esz); 7942 } 7943 write_fp_dreg(s, a->rd, rd); 7944 } 7945 return true; 7946 } 7947 7948 TRANS(SQSHRN_si, do_scalar_shift_imm_narrow, a, sqshrn_fns, MO_SIGN, true) 7949 TRANS(SQRSHRN_si, do_scalar_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN, true) 7950 TRANS(UQSHRN_si, do_scalar_shift_imm_narrow, a, uqshrn_fns, 0, false) 7951 TRANS(UQRSHRN_si, do_scalar_shift_imm_narrow, a, uqrshrn_fns, 0, false) 7952 TRANS(SQSHRUN_si, do_scalar_shift_imm_narrow, a, sqshrun_fns, MO_SIGN, false) 7953 TRANS(SQRSHRUN_si, do_scalar_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN, false) 7954 7955 static bool do_div(DisasContext *s, arg_rrr_sf *a, bool is_signed) 7956 { 7957 TCGv_i64 tcg_n, tcg_m, tcg_rd; 7958 tcg_rd = cpu_reg(s, a->rd); 7959 7960 if (!a->sf && is_signed) { 7961 tcg_n = tcg_temp_new_i64(); 7962 tcg_m = tcg_temp_new_i64(); 7963 tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, a->rn)); 7964 tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, a->rm)); 7965 } else { 7966 tcg_n = read_cpu_reg(s, a->rn, a->sf); 7967 tcg_m = read_cpu_reg(s, a->rm, a->sf); 7968 } 7969 7970 if (is_signed) { 7971 gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m); 7972 } else { 7973 gen_helper_udiv64(tcg_rd, tcg_n, tcg_m); 7974 } 7975 7976 if (!a->sf) { /* zero extend final result */ 7977 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 7978 } 7979 return true; 7980 } 7981 7982 TRANS(SDIV, do_div, a, true) 7983 TRANS(UDIV, do_div, a, false) 7984 7985 /* Shift a TCGv src by TCGv shift_amount, put result in dst. 7986 * Note that it is the caller's responsibility to ensure that the 7987 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM 7988 * mandated semantics for out of range shifts. 7989 */ 7990 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf, 7991 enum a64_shift_type shift_type, TCGv_i64 shift_amount) 7992 { 7993 switch (shift_type) { 7994 case A64_SHIFT_TYPE_LSL: 7995 tcg_gen_shl_i64(dst, src, shift_amount); 7996 break; 7997 case A64_SHIFT_TYPE_LSR: 7998 tcg_gen_shr_i64(dst, src, shift_amount); 7999 break; 8000 case A64_SHIFT_TYPE_ASR: 8001 if (!sf) { 8002 tcg_gen_ext32s_i64(dst, src); 8003 } 8004 tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount); 8005 break; 8006 case A64_SHIFT_TYPE_ROR: 8007 if (sf) { 8008 tcg_gen_rotr_i64(dst, src, shift_amount); 8009 } else { 8010 TCGv_i32 t0, t1; 8011 t0 = tcg_temp_new_i32(); 8012 t1 = tcg_temp_new_i32(); 8013 tcg_gen_extrl_i64_i32(t0, src); 8014 tcg_gen_extrl_i64_i32(t1, shift_amount); 8015 tcg_gen_rotr_i32(t0, t0, t1); 8016 tcg_gen_extu_i32_i64(dst, t0); 8017 } 8018 break; 8019 default: 8020 assert(FALSE); /* all shift types should be handled */ 8021 break; 8022 } 8023 8024 if (!sf) { /* zero extend final result */ 8025 tcg_gen_ext32u_i64(dst, dst); 8026 } 8027 } 8028 8029 /* Shift a TCGv src by immediate, put result in dst. 8030 * The shift amount must be in range (this should always be true as the 8031 * relevant instructions will UNDEF on bad shift immediates). 8032 */ 8033 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf, 8034 enum a64_shift_type shift_type, unsigned int shift_i) 8035 { 8036 assert(shift_i < (sf ? 64 : 32)); 8037 8038 if (shift_i == 0) { 8039 tcg_gen_mov_i64(dst, src); 8040 } else { 8041 shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i)); 8042 } 8043 } 8044 8045 static bool do_shift_reg(DisasContext *s, arg_rrr_sf *a, 8046 enum a64_shift_type shift_type) 8047 { 8048 TCGv_i64 tcg_shift = tcg_temp_new_i64(); 8049 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8050 TCGv_i64 tcg_rn = read_cpu_reg(s, a->rn, a->sf); 8051 8052 tcg_gen_andi_i64(tcg_shift, cpu_reg(s, a->rm), a->sf ? 63 : 31); 8053 shift_reg(tcg_rd, tcg_rn, a->sf, shift_type, tcg_shift); 8054 return true; 8055 } 8056 8057 TRANS(LSLV, do_shift_reg, a, A64_SHIFT_TYPE_LSL) 8058 TRANS(LSRV, do_shift_reg, a, A64_SHIFT_TYPE_LSR) 8059 TRANS(ASRV, do_shift_reg, a, A64_SHIFT_TYPE_ASR) 8060 TRANS(RORV, do_shift_reg, a, A64_SHIFT_TYPE_ROR) 8061 8062 static bool do_crc32(DisasContext *s, arg_rrr_e *a, bool crc32c) 8063 { 8064 TCGv_i64 tcg_acc, tcg_val, tcg_rd; 8065 TCGv_i32 tcg_bytes; 8066 8067 switch (a->esz) { 8068 case MO_8: 8069 case MO_16: 8070 case MO_32: 8071 tcg_val = tcg_temp_new_i64(); 8072 tcg_gen_extract_i64(tcg_val, cpu_reg(s, a->rm), 0, 8 << a->esz); 8073 break; 8074 case MO_64: 8075 tcg_val = cpu_reg(s, a->rm); 8076 break; 8077 default: 8078 g_assert_not_reached(); 8079 } 8080 tcg_acc = cpu_reg(s, a->rn); 8081 tcg_bytes = tcg_constant_i32(1 << a->esz); 8082 tcg_rd = cpu_reg(s, a->rd); 8083 8084 if (crc32c) { 8085 gen_helper_crc32c_64(tcg_rd, tcg_acc, tcg_val, tcg_bytes); 8086 } else { 8087 gen_helper_crc32_64(tcg_rd, tcg_acc, tcg_val, tcg_bytes); 8088 } 8089 return true; 8090 } 8091 8092 TRANS_FEAT(CRC32, aa64_crc32, do_crc32, a, false) 8093 TRANS_FEAT(CRC32C, aa64_crc32, do_crc32, a, true) 8094 8095 static bool do_subp(DisasContext *s, arg_rrr *a, bool setflag) 8096 { 8097 TCGv_i64 tcg_n = read_cpu_reg_sp(s, a->rn, true); 8098 TCGv_i64 tcg_m = read_cpu_reg_sp(s, a->rm, true); 8099 TCGv_i64 tcg_d = cpu_reg(s, a->rd); 8100 8101 tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56); 8102 tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56); 8103 8104 if (setflag) { 8105 gen_sub_CC(true, tcg_d, tcg_n, tcg_m); 8106 } else { 8107 tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m); 8108 } 8109 return true; 8110 } 8111 8112 TRANS_FEAT(SUBP, aa64_mte_insn_reg, do_subp, a, false) 8113 TRANS_FEAT(SUBPS, aa64_mte_insn_reg, do_subp, a, true) 8114 8115 static bool trans_IRG(DisasContext *s, arg_rrr *a) 8116 { 8117 if (dc_isar_feature(aa64_mte_insn_reg, s)) { 8118 TCGv_i64 tcg_rd = cpu_reg_sp(s, a->rd); 8119 TCGv_i64 tcg_rn = cpu_reg_sp(s, a->rn); 8120 8121 if (s->ata[0]) { 8122 gen_helper_irg(tcg_rd, tcg_env, tcg_rn, cpu_reg(s, a->rm)); 8123 } else { 8124 gen_address_with_allocation_tag0(tcg_rd, tcg_rn); 8125 } 8126 return true; 8127 } 8128 return false; 8129 } 8130 8131 static bool trans_GMI(DisasContext *s, arg_rrr *a) 8132 { 8133 if (dc_isar_feature(aa64_mte_insn_reg, s)) { 8134 TCGv_i64 t = tcg_temp_new_i64(); 8135 8136 tcg_gen_extract_i64(t, cpu_reg_sp(s, a->rn), 56, 4); 8137 tcg_gen_shl_i64(t, tcg_constant_i64(1), t); 8138 tcg_gen_or_i64(cpu_reg(s, a->rd), cpu_reg(s, a->rm), t); 8139 return true; 8140 } 8141 return false; 8142 } 8143 8144 static bool trans_PACGA(DisasContext *s, arg_rrr *a) 8145 { 8146 if (dc_isar_feature(aa64_pauth, s)) { 8147 gen_helper_pacga(cpu_reg(s, a->rd), tcg_env, 8148 cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm)); 8149 return true; 8150 } 8151 return false; 8152 } 8153 8154 typedef void ArithOneOp(TCGv_i64, TCGv_i64); 8155 8156 static bool gen_rr(DisasContext *s, int rd, int rn, ArithOneOp fn) 8157 { 8158 fn(cpu_reg(s, rd), cpu_reg(s, rn)); 8159 return true; 8160 } 8161 8162 static void gen_rbit32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8163 { 8164 TCGv_i32 t32 = tcg_temp_new_i32(); 8165 8166 tcg_gen_extrl_i64_i32(t32, tcg_rn); 8167 gen_helper_rbit(t32, t32); 8168 tcg_gen_extu_i32_i64(tcg_rd, t32); 8169 } 8170 8171 static void gen_rev16_xx(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 mask) 8172 { 8173 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 8174 8175 tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8); 8176 tcg_gen_and_i64(tcg_rd, tcg_rn, mask); 8177 tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask); 8178 tcg_gen_shli_i64(tcg_rd, tcg_rd, 8); 8179 tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp); 8180 } 8181 8182 static void gen_rev16_32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8183 { 8184 gen_rev16_xx(tcg_rd, tcg_rn, tcg_constant_i64(0x00ff00ff)); 8185 } 8186 8187 static void gen_rev16_64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8188 { 8189 gen_rev16_xx(tcg_rd, tcg_rn, tcg_constant_i64(0x00ff00ff00ff00ffull)); 8190 } 8191 8192 static void gen_rev_32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8193 { 8194 tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ); 8195 } 8196 8197 static void gen_rev32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8198 { 8199 tcg_gen_bswap64_i64(tcg_rd, tcg_rn); 8200 tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32); 8201 } 8202 8203 TRANS(RBIT, gen_rr, a->rd, a->rn, a->sf ? gen_helper_rbit64 : gen_rbit32) 8204 TRANS(REV16, gen_rr, a->rd, a->rn, a->sf ? gen_rev16_64 : gen_rev16_32) 8205 TRANS(REV32, gen_rr, a->rd, a->rn, a->sf ? gen_rev32 : gen_rev_32) 8206 TRANS(REV64, gen_rr, a->rd, a->rn, tcg_gen_bswap64_i64) 8207 8208 static void gen_clz32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8209 { 8210 TCGv_i32 t32 = tcg_temp_new_i32(); 8211 8212 tcg_gen_extrl_i64_i32(t32, tcg_rn); 8213 tcg_gen_clzi_i32(t32, t32, 32); 8214 tcg_gen_extu_i32_i64(tcg_rd, t32); 8215 } 8216 8217 static void gen_clz64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8218 { 8219 tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64); 8220 } 8221 8222 static void gen_cls32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8223 { 8224 TCGv_i32 t32 = tcg_temp_new_i32(); 8225 8226 tcg_gen_extrl_i64_i32(t32, tcg_rn); 8227 tcg_gen_clrsb_i32(t32, t32); 8228 tcg_gen_extu_i32_i64(tcg_rd, t32); 8229 } 8230 8231 TRANS(CLZ, gen_rr, a->rd, a->rn, a->sf ? gen_clz64 : gen_clz32) 8232 TRANS(CLS, gen_rr, a->rd, a->rn, a->sf ? tcg_gen_clrsb_i64 : gen_cls32) 8233 8234 static bool gen_pacaut(DisasContext *s, arg_pacaut *a, NeonGenTwo64OpEnvFn fn) 8235 { 8236 TCGv_i64 tcg_rd, tcg_rn; 8237 8238 if (a->z) { 8239 if (a->rn != 31) { 8240 return false; 8241 } 8242 tcg_rn = tcg_constant_i64(0); 8243 } else { 8244 tcg_rn = cpu_reg_sp(s, a->rn); 8245 } 8246 if (s->pauth_active) { 8247 tcg_rd = cpu_reg(s, a->rd); 8248 fn(tcg_rd, tcg_env, tcg_rd, tcg_rn); 8249 } 8250 return true; 8251 } 8252 8253 TRANS_FEAT(PACIA, aa64_pauth, gen_pacaut, a, gen_helper_pacia) 8254 TRANS_FEAT(PACIB, aa64_pauth, gen_pacaut, a, gen_helper_pacib) 8255 TRANS_FEAT(PACDA, aa64_pauth, gen_pacaut, a, gen_helper_pacda) 8256 TRANS_FEAT(PACDB, aa64_pauth, gen_pacaut, a, gen_helper_pacdb) 8257 8258 TRANS_FEAT(AUTIA, aa64_pauth, gen_pacaut, a, gen_helper_autia) 8259 TRANS_FEAT(AUTIB, aa64_pauth, gen_pacaut, a, gen_helper_autib) 8260 TRANS_FEAT(AUTDA, aa64_pauth, gen_pacaut, a, gen_helper_autda) 8261 TRANS_FEAT(AUTDB, aa64_pauth, gen_pacaut, a, gen_helper_autdb) 8262 8263 static bool do_xpac(DisasContext *s, int rd, NeonGenOne64OpEnvFn *fn) 8264 { 8265 if (s->pauth_active) { 8266 TCGv_i64 tcg_rd = cpu_reg(s, rd); 8267 fn(tcg_rd, tcg_env, tcg_rd); 8268 } 8269 return true; 8270 } 8271 8272 TRANS_FEAT(XPACI, aa64_pauth, do_xpac, a->rd, gen_helper_xpaci) 8273 TRANS_FEAT(XPACD, aa64_pauth, do_xpac, a->rd, gen_helper_xpacd) 8274 8275 static bool do_logic_reg(DisasContext *s, arg_logic_shift *a, 8276 ArithTwoOp *fn, ArithTwoOp *inv_fn, bool setflags) 8277 { 8278 TCGv_i64 tcg_rd, tcg_rn, tcg_rm; 8279 8280 if (!a->sf && (a->sa & (1 << 5))) { 8281 return false; 8282 } 8283 8284 tcg_rd = cpu_reg(s, a->rd); 8285 tcg_rn = cpu_reg(s, a->rn); 8286 8287 tcg_rm = read_cpu_reg(s, a->rm, a->sf); 8288 if (a->sa) { 8289 shift_reg_imm(tcg_rm, tcg_rm, a->sf, a->st, a->sa); 8290 } 8291 8292 (a->n ? inv_fn : fn)(tcg_rd, tcg_rn, tcg_rm); 8293 if (!a->sf) { 8294 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 8295 } 8296 if (setflags) { 8297 gen_logic_CC(a->sf, tcg_rd); 8298 } 8299 return true; 8300 } 8301 8302 static bool trans_ORR_r(DisasContext *s, arg_logic_shift *a) 8303 { 8304 /* 8305 * Unshifted ORR and ORN with WZR/XZR is the standard encoding for 8306 * register-register MOV and MVN, so it is worth special casing. 8307 */ 8308 if (a->sa == 0 && a->st == 0 && a->rn == 31) { 8309 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8310 TCGv_i64 tcg_rm = cpu_reg(s, a->rm); 8311 8312 if (a->n) { 8313 tcg_gen_not_i64(tcg_rd, tcg_rm); 8314 if (!a->sf) { 8315 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 8316 } 8317 } else { 8318 if (a->sf) { 8319 tcg_gen_mov_i64(tcg_rd, tcg_rm); 8320 } else { 8321 tcg_gen_ext32u_i64(tcg_rd, tcg_rm); 8322 } 8323 } 8324 return true; 8325 } 8326 8327 return do_logic_reg(s, a, tcg_gen_or_i64, tcg_gen_orc_i64, false); 8328 } 8329 8330 TRANS(AND_r, do_logic_reg, a, tcg_gen_and_i64, tcg_gen_andc_i64, false) 8331 TRANS(ANDS_r, do_logic_reg, a, tcg_gen_and_i64, tcg_gen_andc_i64, true) 8332 TRANS(EOR_r, do_logic_reg, a, tcg_gen_xor_i64, tcg_gen_eqv_i64, false) 8333 8334 static bool do_addsub_ext(DisasContext *s, arg_addsub_ext *a, 8335 bool sub_op, bool setflags) 8336 { 8337 TCGv_i64 tcg_rm, tcg_rn, tcg_rd, tcg_result; 8338 8339 if (a->sa > 4) { 8340 return false; 8341 } 8342 8343 /* non-flag setting ops may use SP */ 8344 if (!setflags) { 8345 tcg_rd = cpu_reg_sp(s, a->rd); 8346 } else { 8347 tcg_rd = cpu_reg(s, a->rd); 8348 } 8349 tcg_rn = read_cpu_reg_sp(s, a->rn, a->sf); 8350 8351 tcg_rm = read_cpu_reg(s, a->rm, a->sf); 8352 ext_and_shift_reg(tcg_rm, tcg_rm, a->st, a->sa); 8353 8354 tcg_result = tcg_temp_new_i64(); 8355 if (!setflags) { 8356 if (sub_op) { 8357 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm); 8358 } else { 8359 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm); 8360 } 8361 } else { 8362 if (sub_op) { 8363 gen_sub_CC(a->sf, tcg_result, tcg_rn, tcg_rm); 8364 } else { 8365 gen_add_CC(a->sf, tcg_result, tcg_rn, tcg_rm); 8366 } 8367 } 8368 8369 if (a->sf) { 8370 tcg_gen_mov_i64(tcg_rd, tcg_result); 8371 } else { 8372 tcg_gen_ext32u_i64(tcg_rd, tcg_result); 8373 } 8374 return true; 8375 } 8376 8377 TRANS(ADD_ext, do_addsub_ext, a, false, false) 8378 TRANS(SUB_ext, do_addsub_ext, a, true, false) 8379 TRANS(ADDS_ext, do_addsub_ext, a, false, true) 8380 TRANS(SUBS_ext, do_addsub_ext, a, true, true) 8381 8382 static bool do_addsub_reg(DisasContext *s, arg_addsub_shift *a, 8383 bool sub_op, bool setflags) 8384 { 8385 TCGv_i64 tcg_rd, tcg_rn, tcg_rm, tcg_result; 8386 8387 if (a->st == 3 || (!a->sf && (a->sa & 32))) { 8388 return false; 8389 } 8390 8391 tcg_rd = cpu_reg(s, a->rd); 8392 tcg_rn = read_cpu_reg(s, a->rn, a->sf); 8393 tcg_rm = read_cpu_reg(s, a->rm, a->sf); 8394 8395 shift_reg_imm(tcg_rm, tcg_rm, a->sf, a->st, a->sa); 8396 8397 tcg_result = tcg_temp_new_i64(); 8398 if (!setflags) { 8399 if (sub_op) { 8400 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm); 8401 } else { 8402 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm); 8403 } 8404 } else { 8405 if (sub_op) { 8406 gen_sub_CC(a->sf, tcg_result, tcg_rn, tcg_rm); 8407 } else { 8408 gen_add_CC(a->sf, tcg_result, tcg_rn, tcg_rm); 8409 } 8410 } 8411 8412 if (a->sf) { 8413 tcg_gen_mov_i64(tcg_rd, tcg_result); 8414 } else { 8415 tcg_gen_ext32u_i64(tcg_rd, tcg_result); 8416 } 8417 return true; 8418 } 8419 8420 TRANS(ADD_r, do_addsub_reg, a, false, false) 8421 TRANS(SUB_r, do_addsub_reg, a, true, false) 8422 TRANS(ADDS_r, do_addsub_reg, a, false, true) 8423 TRANS(SUBS_r, do_addsub_reg, a, true, true) 8424 8425 static bool do_mulh(DisasContext *s, arg_rrr *a, 8426 void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64)) 8427 { 8428 TCGv_i64 discard = tcg_temp_new_i64(); 8429 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8430 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 8431 TCGv_i64 tcg_rm = cpu_reg(s, a->rm); 8432 8433 fn(discard, tcg_rd, tcg_rn, tcg_rm); 8434 return true; 8435 } 8436 8437 TRANS(SMULH, do_mulh, a, tcg_gen_muls2_i64) 8438 TRANS(UMULH, do_mulh, a, tcg_gen_mulu2_i64) 8439 8440 static bool do_muladd(DisasContext *s, arg_rrrr *a, 8441 bool sf, bool is_sub, MemOp mop) 8442 { 8443 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8444 TCGv_i64 tcg_op1, tcg_op2; 8445 8446 if (mop == MO_64) { 8447 tcg_op1 = cpu_reg(s, a->rn); 8448 tcg_op2 = cpu_reg(s, a->rm); 8449 } else { 8450 tcg_op1 = tcg_temp_new_i64(); 8451 tcg_op2 = tcg_temp_new_i64(); 8452 tcg_gen_ext_i64(tcg_op1, cpu_reg(s, a->rn), mop); 8453 tcg_gen_ext_i64(tcg_op2, cpu_reg(s, a->rm), mop); 8454 } 8455 8456 if (a->ra == 31 && !is_sub) { 8457 /* Special-case MADD with rA == XZR; it is the standard MUL alias */ 8458 tcg_gen_mul_i64(tcg_rd, tcg_op1, tcg_op2); 8459 } else { 8460 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 8461 TCGv_i64 tcg_ra = cpu_reg(s, a->ra); 8462 8463 tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2); 8464 if (is_sub) { 8465 tcg_gen_sub_i64(tcg_rd, tcg_ra, tcg_tmp); 8466 } else { 8467 tcg_gen_add_i64(tcg_rd, tcg_ra, tcg_tmp); 8468 } 8469 } 8470 8471 if (!sf) { 8472 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 8473 } 8474 return true; 8475 } 8476 8477 TRANS(MADD_w, do_muladd, a, false, false, MO_64) 8478 TRANS(MSUB_w, do_muladd, a, false, true, MO_64) 8479 TRANS(MADD_x, do_muladd, a, true, false, MO_64) 8480 TRANS(MSUB_x, do_muladd, a, true, true, MO_64) 8481 8482 TRANS(SMADDL, do_muladd, a, true, false, MO_SL) 8483 TRANS(SMSUBL, do_muladd, a, true, true, MO_SL) 8484 TRANS(UMADDL, do_muladd, a, true, false, MO_UL) 8485 TRANS(UMSUBL, do_muladd, a, true, true, MO_UL) 8486 8487 static bool do_adc_sbc(DisasContext *s, arg_rrr_sf *a, 8488 bool is_sub, bool setflags) 8489 { 8490 TCGv_i64 tcg_y, tcg_rn, tcg_rd; 8491 8492 tcg_rd = cpu_reg(s, a->rd); 8493 tcg_rn = cpu_reg(s, a->rn); 8494 8495 if (is_sub) { 8496 tcg_y = tcg_temp_new_i64(); 8497 tcg_gen_not_i64(tcg_y, cpu_reg(s, a->rm)); 8498 } else { 8499 tcg_y = cpu_reg(s, a->rm); 8500 } 8501 8502 if (setflags) { 8503 gen_adc_CC(a->sf, tcg_rd, tcg_rn, tcg_y); 8504 } else { 8505 gen_adc(a->sf, tcg_rd, tcg_rn, tcg_y); 8506 } 8507 return true; 8508 } 8509 8510 TRANS(ADC, do_adc_sbc, a, false, false) 8511 TRANS(SBC, do_adc_sbc, a, true, false) 8512 TRANS(ADCS, do_adc_sbc, a, false, true) 8513 TRANS(SBCS, do_adc_sbc, a, true, true) 8514 8515 static bool trans_RMIF(DisasContext *s, arg_RMIF *a) 8516 { 8517 int mask = a->mask; 8518 TCGv_i64 tcg_rn; 8519 TCGv_i32 nzcv; 8520 8521 if (!dc_isar_feature(aa64_condm_4, s)) { 8522 return false; 8523 } 8524 8525 tcg_rn = read_cpu_reg(s, a->rn, 1); 8526 tcg_gen_rotri_i64(tcg_rn, tcg_rn, a->imm); 8527 8528 nzcv = tcg_temp_new_i32(); 8529 tcg_gen_extrl_i64_i32(nzcv, tcg_rn); 8530 8531 if (mask & 8) { /* N */ 8532 tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3); 8533 } 8534 if (mask & 4) { /* Z */ 8535 tcg_gen_not_i32(cpu_ZF, nzcv); 8536 tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4); 8537 } 8538 if (mask & 2) { /* C */ 8539 tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1); 8540 } 8541 if (mask & 1) { /* V */ 8542 tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0); 8543 } 8544 return true; 8545 } 8546 8547 static bool do_setf(DisasContext *s, int rn, int shift) 8548 { 8549 TCGv_i32 tmp = tcg_temp_new_i32(); 8550 8551 tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn)); 8552 tcg_gen_shli_i32(cpu_NF, tmp, shift); 8553 tcg_gen_shli_i32(cpu_VF, tmp, shift - 1); 8554 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 8555 tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF); 8556 return true; 8557 } 8558 8559 TRANS_FEAT(SETF8, aa64_condm_4, do_setf, a->rn, 24) 8560 TRANS_FEAT(SETF16, aa64_condm_4, do_setf, a->rn, 16) 8561 8562 /* CCMP, CCMN */ 8563 static bool trans_CCMP(DisasContext *s, arg_CCMP *a) 8564 { 8565 TCGv_i32 tcg_t0 = tcg_temp_new_i32(); 8566 TCGv_i32 tcg_t1 = tcg_temp_new_i32(); 8567 TCGv_i32 tcg_t2 = tcg_temp_new_i32(); 8568 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 8569 TCGv_i64 tcg_rn, tcg_y; 8570 DisasCompare c; 8571 unsigned nzcv; 8572 bool has_andc; 8573 8574 /* Set T0 = !COND. */ 8575 arm_test_cc(&c, a->cond); 8576 tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0); 8577 8578 /* Load the arguments for the new comparison. */ 8579 if (a->imm) { 8580 tcg_y = tcg_constant_i64(a->y); 8581 } else { 8582 tcg_y = cpu_reg(s, a->y); 8583 } 8584 tcg_rn = cpu_reg(s, a->rn); 8585 8586 /* Set the flags for the new comparison. */ 8587 if (a->op) { 8588 gen_sub_CC(a->sf, tcg_tmp, tcg_rn, tcg_y); 8589 } else { 8590 gen_add_CC(a->sf, tcg_tmp, tcg_rn, tcg_y); 8591 } 8592 8593 /* 8594 * If COND was false, force the flags to #nzcv. Compute two masks 8595 * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0). 8596 * For tcg hosts that support ANDC, we can make do with just T1. 8597 * In either case, allow the tcg optimizer to delete any unused mask. 8598 */ 8599 tcg_gen_neg_i32(tcg_t1, tcg_t0); 8600 tcg_gen_subi_i32(tcg_t2, tcg_t0, 1); 8601 8602 nzcv = a->nzcv; 8603 has_andc = tcg_op_supported(INDEX_op_andc_i32, TCG_TYPE_I32, 0); 8604 if (nzcv & 8) { /* N */ 8605 tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1); 8606 } else { 8607 if (has_andc) { 8608 tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1); 8609 } else { 8610 tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2); 8611 } 8612 } 8613 if (nzcv & 4) { /* Z */ 8614 if (has_andc) { 8615 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1); 8616 } else { 8617 tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2); 8618 } 8619 } else { 8620 tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0); 8621 } 8622 if (nzcv & 2) { /* C */ 8623 tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0); 8624 } else { 8625 if (has_andc) { 8626 tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1); 8627 } else { 8628 tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2); 8629 } 8630 } 8631 if (nzcv & 1) { /* V */ 8632 tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1); 8633 } else { 8634 if (has_andc) { 8635 tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1); 8636 } else { 8637 tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2); 8638 } 8639 } 8640 return true; 8641 } 8642 8643 static bool trans_CSEL(DisasContext *s, arg_CSEL *a) 8644 { 8645 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8646 TCGv_i64 zero = tcg_constant_i64(0); 8647 DisasCompare64 c; 8648 8649 a64_test_cc(&c, a->cond); 8650 8651 if (a->rn == 31 && a->rm == 31 && (a->else_inc ^ a->else_inv)) { 8652 /* CSET & CSETM. */ 8653 if (a->else_inv) { 8654 tcg_gen_negsetcond_i64(tcg_invert_cond(c.cond), 8655 tcg_rd, c.value, zero); 8656 } else { 8657 tcg_gen_setcond_i64(tcg_invert_cond(c.cond), 8658 tcg_rd, c.value, zero); 8659 } 8660 } else { 8661 TCGv_i64 t_true = cpu_reg(s, a->rn); 8662 TCGv_i64 t_false = read_cpu_reg(s, a->rm, 1); 8663 8664 if (a->else_inv && a->else_inc) { 8665 tcg_gen_neg_i64(t_false, t_false); 8666 } else if (a->else_inv) { 8667 tcg_gen_not_i64(t_false, t_false); 8668 } else if (a->else_inc) { 8669 tcg_gen_addi_i64(t_false, t_false, 1); 8670 } 8671 tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false); 8672 } 8673 8674 if (!a->sf) { 8675 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 8676 } 8677 return true; 8678 } 8679 8680 typedef struct FPScalar1Int { 8681 void (*gen_h)(TCGv_i32, TCGv_i32); 8682 void (*gen_s)(TCGv_i32, TCGv_i32); 8683 void (*gen_d)(TCGv_i64, TCGv_i64); 8684 } FPScalar1Int; 8685 8686 static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a, 8687 const FPScalar1Int *f, 8688 bool merging) 8689 { 8690 switch (a->esz) { 8691 case MO_64: 8692 if (fp_access_check(s)) { 8693 TCGv_i64 t = read_fp_dreg(s, a->rn); 8694 f->gen_d(t, t); 8695 if (merging) { 8696 write_fp_dreg_merging(s, a->rd, a->rd, t); 8697 } else { 8698 write_fp_dreg(s, a->rd, t); 8699 } 8700 } 8701 break; 8702 case MO_32: 8703 if (fp_access_check(s)) { 8704 TCGv_i32 t = read_fp_sreg(s, a->rn); 8705 f->gen_s(t, t); 8706 if (merging) { 8707 write_fp_sreg_merging(s, a->rd, a->rd, t); 8708 } else { 8709 write_fp_sreg(s, a->rd, t); 8710 } 8711 } 8712 break; 8713 case MO_16: 8714 if (!dc_isar_feature(aa64_fp16, s)) { 8715 return false; 8716 } 8717 if (fp_access_check(s)) { 8718 TCGv_i32 t = read_fp_hreg(s, a->rn); 8719 f->gen_h(t, t); 8720 if (merging) { 8721 write_fp_hreg_merging(s, a->rd, a->rd, t); 8722 } else { 8723 write_fp_sreg(s, a->rd, t); 8724 } 8725 } 8726 break; 8727 default: 8728 return false; 8729 } 8730 return true; 8731 } 8732 8733 static bool do_fp1_scalar_int_2fn(DisasContext *s, arg_rr_e *a, 8734 const FPScalar1Int *fnormal, 8735 const FPScalar1Int *fah) 8736 { 8737 return do_fp1_scalar_int(s, a, s->fpcr_ah ? fah : fnormal, true); 8738 } 8739 8740 static const FPScalar1Int f_scalar_fmov = { 8741 tcg_gen_mov_i32, 8742 tcg_gen_mov_i32, 8743 tcg_gen_mov_i64, 8744 }; 8745 TRANS(FMOV_s, do_fp1_scalar_int, a, &f_scalar_fmov, false) 8746 8747 static const FPScalar1Int f_scalar_fabs = { 8748 gen_vfp_absh, 8749 gen_vfp_abss, 8750 gen_vfp_absd, 8751 }; 8752 static const FPScalar1Int f_scalar_ah_fabs = { 8753 gen_vfp_ah_absh, 8754 gen_vfp_ah_abss, 8755 gen_vfp_ah_absd, 8756 }; 8757 TRANS(FABS_s, do_fp1_scalar_int_2fn, a, &f_scalar_fabs, &f_scalar_ah_fabs) 8758 8759 static const FPScalar1Int f_scalar_fneg = { 8760 gen_vfp_negh, 8761 gen_vfp_negs, 8762 gen_vfp_negd, 8763 }; 8764 static const FPScalar1Int f_scalar_ah_fneg = { 8765 gen_vfp_ah_negh, 8766 gen_vfp_ah_negs, 8767 gen_vfp_ah_negd, 8768 }; 8769 TRANS(FNEG_s, do_fp1_scalar_int_2fn, a, &f_scalar_fneg, &f_scalar_ah_fneg) 8770 8771 typedef struct FPScalar1 { 8772 void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_ptr); 8773 void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_ptr); 8774 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_ptr); 8775 } FPScalar1; 8776 8777 static bool do_fp1_scalar_with_fpsttype(DisasContext *s, arg_rr_e *a, 8778 const FPScalar1 *f, int rmode, 8779 ARMFPStatusFlavour fpsttype) 8780 { 8781 TCGv_i32 tcg_rmode = NULL; 8782 TCGv_ptr fpst; 8783 TCGv_i64 t64; 8784 TCGv_i32 t32; 8785 int check = fp_access_check_scalar_hsd(s, a->esz); 8786 8787 if (check <= 0) { 8788 return check == 0; 8789 } 8790 8791 fpst = fpstatus_ptr(fpsttype); 8792 if (rmode >= 0) { 8793 tcg_rmode = gen_set_rmode(rmode, fpst); 8794 } 8795 8796 switch (a->esz) { 8797 case MO_64: 8798 t64 = read_fp_dreg(s, a->rn); 8799 f->gen_d(t64, t64, fpst); 8800 write_fp_dreg_merging(s, a->rd, a->rd, t64); 8801 break; 8802 case MO_32: 8803 t32 = read_fp_sreg(s, a->rn); 8804 f->gen_s(t32, t32, fpst); 8805 write_fp_sreg_merging(s, a->rd, a->rd, t32); 8806 break; 8807 case MO_16: 8808 t32 = read_fp_hreg(s, a->rn); 8809 f->gen_h(t32, t32, fpst); 8810 write_fp_hreg_merging(s, a->rd, a->rd, t32); 8811 break; 8812 default: 8813 g_assert_not_reached(); 8814 } 8815 8816 if (rmode >= 0) { 8817 gen_restore_rmode(tcg_rmode, fpst); 8818 } 8819 return true; 8820 } 8821 8822 static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a, 8823 const FPScalar1 *f, int rmode) 8824 { 8825 return do_fp1_scalar_with_fpsttype(s, a, f, rmode, 8826 a->esz == MO_16 ? 8827 FPST_A64_F16 : FPST_A64); 8828 } 8829 8830 static bool do_fp1_scalar_ah(DisasContext *s, arg_rr_e *a, 8831 const FPScalar1 *f, int rmode) 8832 { 8833 return do_fp1_scalar_with_fpsttype(s, a, f, rmode, select_ah_fpst(s, a->esz)); 8834 } 8835 8836 static const FPScalar1 f_scalar_fsqrt = { 8837 gen_helper_vfp_sqrth, 8838 gen_helper_vfp_sqrts, 8839 gen_helper_vfp_sqrtd, 8840 }; 8841 TRANS(FSQRT_s, do_fp1_scalar, a, &f_scalar_fsqrt, -1) 8842 8843 static const FPScalar1 f_scalar_frint = { 8844 gen_helper_advsimd_rinth, 8845 gen_helper_rints, 8846 gen_helper_rintd, 8847 }; 8848 TRANS(FRINTN_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_TIEEVEN) 8849 TRANS(FRINTP_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_POSINF) 8850 TRANS(FRINTM_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_NEGINF) 8851 TRANS(FRINTZ_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_ZERO) 8852 TRANS(FRINTA_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_TIEAWAY) 8853 TRANS(FRINTI_s, do_fp1_scalar, a, &f_scalar_frint, -1) 8854 8855 static const FPScalar1 f_scalar_frintx = { 8856 gen_helper_advsimd_rinth_exact, 8857 gen_helper_rints_exact, 8858 gen_helper_rintd_exact, 8859 }; 8860 TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1) 8861 8862 static bool trans_BFCVT_s(DisasContext *s, arg_rr_e *a) 8863 { 8864 ARMFPStatusFlavour fpsttype = s->fpcr_ah ? FPST_AH : FPST_A64; 8865 TCGv_i32 t32; 8866 int check; 8867 8868 if (!dc_isar_feature(aa64_bf16, s)) { 8869 return false; 8870 } 8871 8872 check = fp_access_check_scalar_hsd(s, a->esz); 8873 8874 if (check <= 0) { 8875 return check == 0; 8876 } 8877 8878 t32 = read_fp_sreg(s, a->rn); 8879 gen_helper_bfcvt(t32, t32, fpstatus_ptr(fpsttype)); 8880 write_fp_hreg_merging(s, a->rd, a->rd, t32); 8881 return true; 8882 } 8883 8884 static const FPScalar1 f_scalar_frint32 = { 8885 NULL, 8886 gen_helper_frint32_s, 8887 gen_helper_frint32_d, 8888 }; 8889 TRANS_FEAT(FRINT32Z_s, aa64_frint, do_fp1_scalar, a, 8890 &f_scalar_frint32, FPROUNDING_ZERO) 8891 TRANS_FEAT(FRINT32X_s, aa64_frint, do_fp1_scalar, a, &f_scalar_frint32, -1) 8892 8893 static const FPScalar1 f_scalar_frint64 = { 8894 NULL, 8895 gen_helper_frint64_s, 8896 gen_helper_frint64_d, 8897 }; 8898 TRANS_FEAT(FRINT64Z_s, aa64_frint, do_fp1_scalar, a, 8899 &f_scalar_frint64, FPROUNDING_ZERO) 8900 TRANS_FEAT(FRINT64X_s, aa64_frint, do_fp1_scalar, a, &f_scalar_frint64, -1) 8901 8902 static const FPScalar1 f_scalar_frecpe = { 8903 gen_helper_recpe_f16, 8904 gen_helper_recpe_f32, 8905 gen_helper_recpe_f64, 8906 }; 8907 static const FPScalar1 f_scalar_frecpe_rpres = { 8908 gen_helper_recpe_f16, 8909 gen_helper_recpe_rpres_f32, 8910 gen_helper_recpe_f64, 8911 }; 8912 TRANS(FRECPE_s, do_fp1_scalar_ah, a, 8913 s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? 8914 &f_scalar_frecpe_rpres : &f_scalar_frecpe, -1) 8915 8916 static const FPScalar1 f_scalar_frecpx = { 8917 gen_helper_frecpx_f16, 8918 gen_helper_frecpx_f32, 8919 gen_helper_frecpx_f64, 8920 }; 8921 TRANS(FRECPX_s, do_fp1_scalar_ah, a, &f_scalar_frecpx, -1) 8922 8923 static const FPScalar1 f_scalar_frsqrte = { 8924 gen_helper_rsqrte_f16, 8925 gen_helper_rsqrte_f32, 8926 gen_helper_rsqrte_f64, 8927 }; 8928 static const FPScalar1 f_scalar_frsqrte_rpres = { 8929 gen_helper_rsqrte_f16, 8930 gen_helper_rsqrte_rpres_f32, 8931 gen_helper_rsqrte_f64, 8932 }; 8933 TRANS(FRSQRTE_s, do_fp1_scalar_ah, a, 8934 s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? 8935 &f_scalar_frsqrte_rpres : &f_scalar_frsqrte, -1) 8936 8937 static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a) 8938 { 8939 if (fp_access_check(s)) { 8940 TCGv_i32 tcg_rn = read_fp_sreg(s, a->rn); 8941 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 8942 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 8943 8944 gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, fpst); 8945 write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd); 8946 } 8947 return true; 8948 } 8949 8950 static bool trans_FCVT_s_hs(DisasContext *s, arg_rr *a) 8951 { 8952 if (fp_access_check(s)) { 8953 TCGv_i32 tmp = read_fp_sreg(s, a->rn); 8954 TCGv_i32 ahp = get_ahp_flag(); 8955 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 8956 8957 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp); 8958 /* write_fp_hreg_merging is OK here because top half of result is zero */ 8959 write_fp_hreg_merging(s, a->rd, a->rd, tmp); 8960 } 8961 return true; 8962 } 8963 8964 static bool trans_FCVT_s_sd(DisasContext *s, arg_rr *a) 8965 { 8966 if (fp_access_check(s)) { 8967 TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn); 8968 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 8969 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 8970 8971 gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, fpst); 8972 write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd); 8973 } 8974 return true; 8975 } 8976 8977 static bool trans_FCVT_s_hd(DisasContext *s, arg_rr *a) 8978 { 8979 if (fp_access_check(s)) { 8980 TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn); 8981 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 8982 TCGv_i32 ahp = get_ahp_flag(); 8983 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 8984 8985 gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp); 8986 /* write_fp_hreg_merging is OK here because top half of tcg_rd is zero */ 8987 write_fp_hreg_merging(s, a->rd, a->rd, tcg_rd); 8988 } 8989 return true; 8990 } 8991 8992 static bool trans_FCVT_s_sh(DisasContext *s, arg_rr *a) 8993 { 8994 if (fp_access_check(s)) { 8995 TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn); 8996 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 8997 TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16); 8998 TCGv_i32 tcg_ahp = get_ahp_flag(); 8999 9000 gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); 9001 write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd); 9002 } 9003 return true; 9004 } 9005 9006 static bool trans_FCVT_s_dh(DisasContext *s, arg_rr *a) 9007 { 9008 if (fp_access_check(s)) { 9009 TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn); 9010 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 9011 TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16); 9012 TCGv_i32 tcg_ahp = get_ahp_flag(); 9013 9014 gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); 9015 write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd); 9016 } 9017 return true; 9018 } 9019 9020 static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift, 9021 TCGv_i64 tcg_int, bool is_signed) 9022 { 9023 TCGv_ptr tcg_fpstatus; 9024 TCGv_i32 tcg_shift, tcg_single; 9025 TCGv_i64 tcg_double; 9026 9027 tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); 9028 tcg_shift = tcg_constant_i32(shift); 9029 9030 switch (esz) { 9031 case MO_64: 9032 tcg_double = tcg_temp_new_i64(); 9033 if (is_signed) { 9034 gen_helper_vfp_sqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus); 9035 } else { 9036 gen_helper_vfp_uqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus); 9037 } 9038 write_fp_dreg_merging(s, rd, rd, tcg_double); 9039 break; 9040 9041 case MO_32: 9042 tcg_single = tcg_temp_new_i32(); 9043 if (is_signed) { 9044 gen_helper_vfp_sqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); 9045 } else { 9046 gen_helper_vfp_uqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); 9047 } 9048 write_fp_sreg_merging(s, rd, rd, tcg_single); 9049 break; 9050 9051 case MO_16: 9052 tcg_single = tcg_temp_new_i32(); 9053 if (is_signed) { 9054 gen_helper_vfp_sqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); 9055 } else { 9056 gen_helper_vfp_uqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); 9057 } 9058 write_fp_hreg_merging(s, rd, rd, tcg_single); 9059 break; 9060 9061 default: 9062 g_assert_not_reached(); 9063 } 9064 return true; 9065 } 9066 9067 static bool do_cvtf_g(DisasContext *s, arg_fcvt *a, bool is_signed) 9068 { 9069 TCGv_i64 tcg_int; 9070 int check = fp_access_check_scalar_hsd(s, a->esz); 9071 9072 if (check <= 0) { 9073 return check == 0; 9074 } 9075 9076 if (a->sf) { 9077 tcg_int = cpu_reg(s, a->rn); 9078 } else { 9079 tcg_int = read_cpu_reg(s, a->rn, true); 9080 if (is_signed) { 9081 tcg_gen_ext32s_i64(tcg_int, tcg_int); 9082 } else { 9083 tcg_gen_ext32u_i64(tcg_int, tcg_int); 9084 } 9085 } 9086 return do_cvtf_scalar(s, a->esz, a->rd, a->shift, tcg_int, is_signed); 9087 } 9088 9089 TRANS(SCVTF_g, do_cvtf_g, a, true) 9090 TRANS(UCVTF_g, do_cvtf_g, a, false) 9091 9092 /* 9093 * [US]CVTF (vector), scalar version. 9094 * Which sounds weird, but really just means input from fp register 9095 * instead of input from general register. Input and output element 9096 * size are always equal. 9097 */ 9098 static bool do_cvtf_f(DisasContext *s, arg_fcvt *a, bool is_signed) 9099 { 9100 TCGv_i64 tcg_int; 9101 int check = fp_access_check_scalar_hsd(s, a->esz); 9102 9103 if (check <= 0) { 9104 return check == 0; 9105 } 9106 9107 tcg_int = tcg_temp_new_i64(); 9108 read_vec_element(s, tcg_int, a->rn, 0, a->esz | (is_signed ? MO_SIGN : 0)); 9109 return do_cvtf_scalar(s, a->esz, a->rd, a->shift, tcg_int, is_signed); 9110 } 9111 9112 TRANS(SCVTF_f, do_cvtf_f, a, true) 9113 TRANS(UCVTF_f, do_cvtf_f, a, false) 9114 9115 static void do_fcvt_scalar(DisasContext *s, MemOp out, MemOp esz, 9116 TCGv_i64 tcg_out, int shift, int rn, 9117 ARMFPRounding rmode) 9118 { 9119 TCGv_ptr tcg_fpstatus; 9120 TCGv_i32 tcg_shift, tcg_rmode, tcg_single; 9121 9122 tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); 9123 tcg_shift = tcg_constant_i32(shift); 9124 tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); 9125 9126 switch (esz) { 9127 case MO_64: 9128 read_vec_element(s, tcg_out, rn, 0, MO_64); 9129 switch (out) { 9130 case MO_64 | MO_SIGN: 9131 gen_helper_vfp_tosqd(tcg_out, tcg_out, tcg_shift, tcg_fpstatus); 9132 break; 9133 case MO_64: 9134 gen_helper_vfp_touqd(tcg_out, tcg_out, tcg_shift, tcg_fpstatus); 9135 break; 9136 case MO_32 | MO_SIGN: 9137 gen_helper_vfp_tosld(tcg_out, tcg_out, tcg_shift, tcg_fpstatus); 9138 break; 9139 case MO_32: 9140 gen_helper_vfp_tould(tcg_out, tcg_out, tcg_shift, tcg_fpstatus); 9141 break; 9142 default: 9143 g_assert_not_reached(); 9144 } 9145 break; 9146 9147 case MO_32: 9148 tcg_single = read_fp_sreg(s, rn); 9149 switch (out) { 9150 case MO_64 | MO_SIGN: 9151 gen_helper_vfp_tosqs(tcg_out, tcg_single, tcg_shift, tcg_fpstatus); 9152 break; 9153 case MO_64: 9154 gen_helper_vfp_touqs(tcg_out, tcg_single, tcg_shift, tcg_fpstatus); 9155 break; 9156 case MO_32 | MO_SIGN: 9157 gen_helper_vfp_tosls(tcg_single, tcg_single, 9158 tcg_shift, tcg_fpstatus); 9159 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 9160 break; 9161 case MO_32: 9162 gen_helper_vfp_touls(tcg_single, tcg_single, 9163 tcg_shift, tcg_fpstatus); 9164 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 9165 break; 9166 default: 9167 g_assert_not_reached(); 9168 } 9169 break; 9170 9171 case MO_16: 9172 tcg_single = read_fp_hreg(s, rn); 9173 switch (out) { 9174 case MO_64 | MO_SIGN: 9175 gen_helper_vfp_tosqh(tcg_out, tcg_single, tcg_shift, tcg_fpstatus); 9176 break; 9177 case MO_64: 9178 gen_helper_vfp_touqh(tcg_out, tcg_single, tcg_shift, tcg_fpstatus); 9179 break; 9180 case MO_32 | MO_SIGN: 9181 gen_helper_vfp_toslh(tcg_single, tcg_single, 9182 tcg_shift, tcg_fpstatus); 9183 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 9184 break; 9185 case MO_32: 9186 gen_helper_vfp_toulh(tcg_single, tcg_single, 9187 tcg_shift, tcg_fpstatus); 9188 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 9189 break; 9190 case MO_16 | MO_SIGN: 9191 gen_helper_vfp_toshh(tcg_single, tcg_single, 9192 tcg_shift, tcg_fpstatus); 9193 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 9194 break; 9195 case MO_16: 9196 gen_helper_vfp_touhh(tcg_single, tcg_single, 9197 tcg_shift, tcg_fpstatus); 9198 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 9199 break; 9200 default: 9201 g_assert_not_reached(); 9202 } 9203 break; 9204 9205 default: 9206 g_assert_not_reached(); 9207 } 9208 9209 gen_restore_rmode(tcg_rmode, tcg_fpstatus); 9210 } 9211 9212 static bool do_fcvt_g(DisasContext *s, arg_fcvt *a, 9213 ARMFPRounding rmode, bool is_signed) 9214 { 9215 TCGv_i64 tcg_int; 9216 int check = fp_access_check_scalar_hsd(s, a->esz); 9217 9218 if (check <= 0) { 9219 return check == 0; 9220 } 9221 9222 tcg_int = cpu_reg(s, a->rd); 9223 do_fcvt_scalar(s, (a->sf ? MO_64 : MO_32) | (is_signed ? MO_SIGN : 0), 9224 a->esz, tcg_int, a->shift, a->rn, rmode); 9225 9226 if (!a->sf) { 9227 tcg_gen_ext32u_i64(tcg_int, tcg_int); 9228 } 9229 return true; 9230 } 9231 9232 TRANS(FCVTNS_g, do_fcvt_g, a, FPROUNDING_TIEEVEN, true) 9233 TRANS(FCVTNU_g, do_fcvt_g, a, FPROUNDING_TIEEVEN, false) 9234 TRANS(FCVTPS_g, do_fcvt_g, a, FPROUNDING_POSINF, true) 9235 TRANS(FCVTPU_g, do_fcvt_g, a, FPROUNDING_POSINF, false) 9236 TRANS(FCVTMS_g, do_fcvt_g, a, FPROUNDING_NEGINF, true) 9237 TRANS(FCVTMU_g, do_fcvt_g, a, FPROUNDING_NEGINF, false) 9238 TRANS(FCVTZS_g, do_fcvt_g, a, FPROUNDING_ZERO, true) 9239 TRANS(FCVTZU_g, do_fcvt_g, a, FPROUNDING_ZERO, false) 9240 TRANS(FCVTAS_g, do_fcvt_g, a, FPROUNDING_TIEAWAY, true) 9241 TRANS(FCVTAU_g, do_fcvt_g, a, FPROUNDING_TIEAWAY, false) 9242 9243 /* 9244 * FCVT* (vector), scalar version. 9245 * Which sounds weird, but really just means output to fp register 9246 * instead of output to general register. Input and output element 9247 * size are always equal. 9248 */ 9249 static bool do_fcvt_f(DisasContext *s, arg_fcvt *a, 9250 ARMFPRounding rmode, bool is_signed) 9251 { 9252 TCGv_i64 tcg_int; 9253 int check = fp_access_check_scalar_hsd(s, a->esz); 9254 9255 if (check <= 0) { 9256 return check == 0; 9257 } 9258 9259 tcg_int = tcg_temp_new_i64(); 9260 do_fcvt_scalar(s, a->esz | (is_signed ? MO_SIGN : 0), 9261 a->esz, tcg_int, a->shift, a->rn, rmode); 9262 9263 if (!s->fpcr_nep) { 9264 clear_vec(s, a->rd); 9265 } 9266 write_vec_element(s, tcg_int, a->rd, 0, a->esz); 9267 return true; 9268 } 9269 9270 TRANS(FCVTNS_f, do_fcvt_f, a, FPROUNDING_TIEEVEN, true) 9271 TRANS(FCVTNU_f, do_fcvt_f, a, FPROUNDING_TIEEVEN, false) 9272 TRANS(FCVTPS_f, do_fcvt_f, a, FPROUNDING_POSINF, true) 9273 TRANS(FCVTPU_f, do_fcvt_f, a, FPROUNDING_POSINF, false) 9274 TRANS(FCVTMS_f, do_fcvt_f, a, FPROUNDING_NEGINF, true) 9275 TRANS(FCVTMU_f, do_fcvt_f, a, FPROUNDING_NEGINF, false) 9276 TRANS(FCVTZS_f, do_fcvt_f, a, FPROUNDING_ZERO, true) 9277 TRANS(FCVTZU_f, do_fcvt_f, a, FPROUNDING_ZERO, false) 9278 TRANS(FCVTAS_f, do_fcvt_f, a, FPROUNDING_TIEAWAY, true) 9279 TRANS(FCVTAU_f, do_fcvt_f, a, FPROUNDING_TIEAWAY, false) 9280 9281 static bool trans_FJCVTZS(DisasContext *s, arg_FJCVTZS *a) 9282 { 9283 if (!dc_isar_feature(aa64_jscvt, s)) { 9284 return false; 9285 } 9286 if (fp_access_check(s)) { 9287 TCGv_i64 t = read_fp_dreg(s, a->rn); 9288 TCGv_ptr fpstatus = fpstatus_ptr(FPST_A64); 9289 9290 gen_helper_fjcvtzs(t, t, fpstatus); 9291 9292 tcg_gen_ext32u_i64(cpu_reg(s, a->rd), t); 9293 tcg_gen_extrh_i64_i32(cpu_ZF, t); 9294 tcg_gen_movi_i32(cpu_CF, 0); 9295 tcg_gen_movi_i32(cpu_NF, 0); 9296 tcg_gen_movi_i32(cpu_VF, 0); 9297 } 9298 return true; 9299 } 9300 9301 static bool trans_FMOV_hx(DisasContext *s, arg_rr *a) 9302 { 9303 if (!dc_isar_feature(aa64_fp16, s)) { 9304 return false; 9305 } 9306 if (fp_access_check(s)) { 9307 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 9308 TCGv_i64 tmp = tcg_temp_new_i64(); 9309 tcg_gen_ext16u_i64(tmp, tcg_rn); 9310 write_fp_dreg(s, a->rd, tmp); 9311 } 9312 return true; 9313 } 9314 9315 static bool trans_FMOV_sw(DisasContext *s, arg_rr *a) 9316 { 9317 if (fp_access_check(s)) { 9318 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 9319 TCGv_i64 tmp = tcg_temp_new_i64(); 9320 tcg_gen_ext32u_i64(tmp, tcg_rn); 9321 write_fp_dreg(s, a->rd, tmp); 9322 } 9323 return true; 9324 } 9325 9326 static bool trans_FMOV_dx(DisasContext *s, arg_rr *a) 9327 { 9328 if (fp_access_check(s)) { 9329 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 9330 write_fp_dreg(s, a->rd, tcg_rn); 9331 } 9332 return true; 9333 } 9334 9335 static bool trans_FMOV_ux(DisasContext *s, arg_rr *a) 9336 { 9337 if (fp_access_check(s)) { 9338 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 9339 tcg_gen_st_i64(tcg_rn, tcg_env, fp_reg_hi_offset(s, a->rd)); 9340 clear_vec_high(s, true, a->rd); 9341 } 9342 return true; 9343 } 9344 9345 static bool trans_FMOV_xh(DisasContext *s, arg_rr *a) 9346 { 9347 if (!dc_isar_feature(aa64_fp16, s)) { 9348 return false; 9349 } 9350 if (fp_access_check(s)) { 9351 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 9352 tcg_gen_ld16u_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_16)); 9353 } 9354 return true; 9355 } 9356 9357 static bool trans_FMOV_ws(DisasContext *s, arg_rr *a) 9358 { 9359 if (fp_access_check(s)) { 9360 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 9361 tcg_gen_ld32u_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_32)); 9362 } 9363 return true; 9364 } 9365 9366 static bool trans_FMOV_xd(DisasContext *s, arg_rr *a) 9367 { 9368 if (fp_access_check(s)) { 9369 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 9370 tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_64)); 9371 } 9372 return true; 9373 } 9374 9375 static bool trans_FMOV_xu(DisasContext *s, arg_rr *a) 9376 { 9377 if (fp_access_check(s)) { 9378 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 9379 tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_hi_offset(s, a->rn)); 9380 } 9381 return true; 9382 } 9383 9384 typedef struct ENVScalar1 { 9385 NeonGenOneOpEnvFn *gen_bhs[3]; 9386 NeonGenOne64OpEnvFn *gen_d; 9387 } ENVScalar1; 9388 9389 static bool do_env_scalar1(DisasContext *s, arg_rr_e *a, const ENVScalar1 *f) 9390 { 9391 if (!fp_access_check(s)) { 9392 return true; 9393 } 9394 if (a->esz == MO_64) { 9395 TCGv_i64 t = read_fp_dreg(s, a->rn); 9396 f->gen_d(t, tcg_env, t); 9397 write_fp_dreg(s, a->rd, t); 9398 } else { 9399 TCGv_i32 t = tcg_temp_new_i32(); 9400 9401 read_vec_element_i32(s, t, a->rn, 0, a->esz); 9402 f->gen_bhs[a->esz](t, tcg_env, t); 9403 write_fp_sreg(s, a->rd, t); 9404 } 9405 return true; 9406 } 9407 9408 static bool do_env_vector1(DisasContext *s, arg_qrr_e *a, const ENVScalar1 *f) 9409 { 9410 if (a->esz == MO_64 && !a->q) { 9411 return false; 9412 } 9413 if (!fp_access_check(s)) { 9414 return true; 9415 } 9416 if (a->esz == MO_64) { 9417 TCGv_i64 t = tcg_temp_new_i64(); 9418 9419 for (int i = 0; i < 2; ++i) { 9420 read_vec_element(s, t, a->rn, i, MO_64); 9421 f->gen_d(t, tcg_env, t); 9422 write_vec_element(s, t, a->rd, i, MO_64); 9423 } 9424 } else { 9425 TCGv_i32 t = tcg_temp_new_i32(); 9426 int n = (a->q ? 16 : 8) >> a->esz; 9427 9428 for (int i = 0; i < n; ++i) { 9429 read_vec_element_i32(s, t, a->rn, i, a->esz); 9430 f->gen_bhs[a->esz](t, tcg_env, t); 9431 write_vec_element_i32(s, t, a->rd, i, a->esz); 9432 } 9433 } 9434 clear_vec_high(s, a->q, a->rd); 9435 return true; 9436 } 9437 9438 static const ENVScalar1 f_scalar_sqabs = { 9439 { gen_helper_neon_qabs_s8, 9440 gen_helper_neon_qabs_s16, 9441 gen_helper_neon_qabs_s32 }, 9442 gen_helper_neon_qabs_s64, 9443 }; 9444 TRANS(SQABS_s, do_env_scalar1, a, &f_scalar_sqabs) 9445 TRANS(SQABS_v, do_env_vector1, a, &f_scalar_sqabs) 9446 9447 static const ENVScalar1 f_scalar_sqneg = { 9448 { gen_helper_neon_qneg_s8, 9449 gen_helper_neon_qneg_s16, 9450 gen_helper_neon_qneg_s32 }, 9451 gen_helper_neon_qneg_s64, 9452 }; 9453 TRANS(SQNEG_s, do_env_scalar1, a, &f_scalar_sqneg) 9454 TRANS(SQNEG_v, do_env_vector1, a, &f_scalar_sqneg) 9455 9456 static bool do_scalar1_d(DisasContext *s, arg_rr *a, ArithOneOp *f) 9457 { 9458 if (fp_access_check(s)) { 9459 TCGv_i64 t = read_fp_dreg(s, a->rn); 9460 f(t, t); 9461 write_fp_dreg(s, a->rd, t); 9462 } 9463 return true; 9464 } 9465 9466 TRANS(ABS_s, do_scalar1_d, a, tcg_gen_abs_i64) 9467 TRANS(NEG_s, do_scalar1_d, a, tcg_gen_neg_i64) 9468 9469 static bool do_cmop0_d(DisasContext *s, arg_rr *a, TCGCond cond) 9470 { 9471 if (fp_access_check(s)) { 9472 TCGv_i64 t = read_fp_dreg(s, a->rn); 9473 tcg_gen_negsetcond_i64(cond, t, t, tcg_constant_i64(0)); 9474 write_fp_dreg(s, a->rd, t); 9475 } 9476 return true; 9477 } 9478 9479 TRANS(CMGT0_s, do_cmop0_d, a, TCG_COND_GT) 9480 TRANS(CMGE0_s, do_cmop0_d, a, TCG_COND_GE) 9481 TRANS(CMLE0_s, do_cmop0_d, a, TCG_COND_LE) 9482 TRANS(CMLT0_s, do_cmop0_d, a, TCG_COND_LT) 9483 TRANS(CMEQ0_s, do_cmop0_d, a, TCG_COND_EQ) 9484 9485 static bool do_2misc_narrow_scalar(DisasContext *s, arg_rr_e *a, 9486 ArithOneOp * const fn[3]) 9487 { 9488 if (a->esz == MO_64) { 9489 return false; 9490 } 9491 if (fp_access_check(s)) { 9492 TCGv_i64 t = tcg_temp_new_i64(); 9493 9494 read_vec_element(s, t, a->rn, 0, a->esz + 1); 9495 fn[a->esz](t, t); 9496 clear_vec(s, a->rd); 9497 write_vec_element(s, t, a->rd, 0, a->esz); 9498 } 9499 return true; 9500 } 9501 9502 #define WRAP_ENV(NAME) \ 9503 static void gen_##NAME(TCGv_i64 d, TCGv_i64 n) \ 9504 { gen_helper_##NAME(d, tcg_env, n); } 9505 9506 WRAP_ENV(neon_unarrow_sat8) 9507 WRAP_ENV(neon_unarrow_sat16) 9508 WRAP_ENV(neon_unarrow_sat32) 9509 9510 static ArithOneOp * const f_scalar_sqxtun[] = { 9511 gen_neon_unarrow_sat8, 9512 gen_neon_unarrow_sat16, 9513 gen_neon_unarrow_sat32, 9514 }; 9515 TRANS(SQXTUN_s, do_2misc_narrow_scalar, a, f_scalar_sqxtun) 9516 9517 WRAP_ENV(neon_narrow_sat_s8) 9518 WRAP_ENV(neon_narrow_sat_s16) 9519 WRAP_ENV(neon_narrow_sat_s32) 9520 9521 static ArithOneOp * const f_scalar_sqxtn[] = { 9522 gen_neon_narrow_sat_s8, 9523 gen_neon_narrow_sat_s16, 9524 gen_neon_narrow_sat_s32, 9525 }; 9526 TRANS(SQXTN_s, do_2misc_narrow_scalar, a, f_scalar_sqxtn) 9527 9528 WRAP_ENV(neon_narrow_sat_u8) 9529 WRAP_ENV(neon_narrow_sat_u16) 9530 WRAP_ENV(neon_narrow_sat_u32) 9531 9532 static ArithOneOp * const f_scalar_uqxtn[] = { 9533 gen_neon_narrow_sat_u8, 9534 gen_neon_narrow_sat_u16, 9535 gen_neon_narrow_sat_u32, 9536 }; 9537 TRANS(UQXTN_s, do_2misc_narrow_scalar, a, f_scalar_uqxtn) 9538 9539 static bool trans_FCVTXN_s(DisasContext *s, arg_rr_e *a) 9540 { 9541 if (fp_access_check(s)) { 9542 /* 9543 * 64 bit to 32 bit float conversion 9544 * with von Neumann rounding (round to odd) 9545 */ 9546 TCGv_i64 src = read_fp_dreg(s, a->rn); 9547 TCGv_i32 dst = tcg_temp_new_i32(); 9548 gen_helper_fcvtx_f64_to_f32(dst, src, fpstatus_ptr(FPST_A64)); 9549 write_fp_sreg_merging(s, a->rd, a->rd, dst); 9550 } 9551 return true; 9552 } 9553 9554 #undef WRAP_ENV 9555 9556 static bool do_gvec_fn2(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn) 9557 { 9558 if (!a->q && a->esz == MO_64) { 9559 return false; 9560 } 9561 if (fp_access_check(s)) { 9562 gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz); 9563 } 9564 return true; 9565 } 9566 9567 TRANS(ABS_v, do_gvec_fn2, a, tcg_gen_gvec_abs) 9568 TRANS(NEG_v, do_gvec_fn2, a, tcg_gen_gvec_neg) 9569 TRANS(NOT_v, do_gvec_fn2, a, tcg_gen_gvec_not) 9570 TRANS(CNT_v, do_gvec_fn2, a, gen_gvec_cnt) 9571 TRANS(RBIT_v, do_gvec_fn2, a, gen_gvec_rbit) 9572 TRANS(CMGT0_v, do_gvec_fn2, a, gen_gvec_cgt0) 9573 TRANS(CMGE0_v, do_gvec_fn2, a, gen_gvec_cge0) 9574 TRANS(CMLT0_v, do_gvec_fn2, a, gen_gvec_clt0) 9575 TRANS(CMLE0_v, do_gvec_fn2, a, gen_gvec_cle0) 9576 TRANS(CMEQ0_v, do_gvec_fn2, a, gen_gvec_ceq0) 9577 TRANS(REV16_v, do_gvec_fn2, a, gen_gvec_rev16) 9578 TRANS(REV32_v, do_gvec_fn2, a, gen_gvec_rev32) 9579 TRANS(URECPE_v, do_gvec_fn2, a, gen_gvec_urecpe) 9580 TRANS(URSQRTE_v, do_gvec_fn2, a, gen_gvec_ursqrte) 9581 9582 static bool do_gvec_fn2_bhs(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn) 9583 { 9584 if (a->esz == MO_64) { 9585 return false; 9586 } 9587 if (fp_access_check(s)) { 9588 gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz); 9589 } 9590 return true; 9591 } 9592 9593 TRANS(CLS_v, do_gvec_fn2_bhs, a, gen_gvec_cls) 9594 TRANS(CLZ_v, do_gvec_fn2_bhs, a, gen_gvec_clz) 9595 TRANS(REV64_v, do_gvec_fn2_bhs, a, gen_gvec_rev64) 9596 TRANS(SADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_saddlp) 9597 TRANS(UADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_uaddlp) 9598 TRANS(SADALP_v, do_gvec_fn2_bhs, a, gen_gvec_sadalp) 9599 TRANS(UADALP_v, do_gvec_fn2_bhs, a, gen_gvec_uadalp) 9600 9601 static bool do_2misc_narrow_vector(DisasContext *s, arg_qrr_e *a, 9602 ArithOneOp * const fn[3]) 9603 { 9604 if (a->esz == MO_64) { 9605 return false; 9606 } 9607 if (fp_access_check(s)) { 9608 TCGv_i64 t0 = tcg_temp_new_i64(); 9609 TCGv_i64 t1 = tcg_temp_new_i64(); 9610 9611 read_vec_element(s, t0, a->rn, 0, MO_64); 9612 read_vec_element(s, t1, a->rn, 1, MO_64); 9613 fn[a->esz](t0, t0); 9614 fn[a->esz](t1, t1); 9615 write_vec_element(s, t0, a->rd, a->q ? 2 : 0, MO_32); 9616 write_vec_element(s, t1, a->rd, a->q ? 3 : 1, MO_32); 9617 clear_vec_high(s, a->q, a->rd); 9618 } 9619 return true; 9620 } 9621 9622 static ArithOneOp * const f_scalar_xtn[] = { 9623 gen_helper_neon_narrow_u8, 9624 gen_helper_neon_narrow_u16, 9625 tcg_gen_ext32u_i64, 9626 }; 9627 TRANS(XTN, do_2misc_narrow_vector, a, f_scalar_xtn) 9628 TRANS(SQXTUN_v, do_2misc_narrow_vector, a, f_scalar_sqxtun) 9629 TRANS(SQXTN_v, do_2misc_narrow_vector, a, f_scalar_sqxtn) 9630 TRANS(UQXTN_v, do_2misc_narrow_vector, a, f_scalar_uqxtn) 9631 9632 static void gen_fcvtn_hs(TCGv_i64 d, TCGv_i64 n) 9633 { 9634 TCGv_i32 tcg_lo = tcg_temp_new_i32(); 9635 TCGv_i32 tcg_hi = tcg_temp_new_i32(); 9636 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 9637 TCGv_i32 ahp = get_ahp_flag(); 9638 9639 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, n); 9640 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp); 9641 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp); 9642 tcg_gen_deposit_i32(tcg_lo, tcg_lo, tcg_hi, 16, 16); 9643 tcg_gen_extu_i32_i64(d, tcg_lo); 9644 } 9645 9646 static void gen_fcvtn_sd(TCGv_i64 d, TCGv_i64 n) 9647 { 9648 TCGv_i32 tmp = tcg_temp_new_i32(); 9649 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 9650 9651 gen_helper_vfp_fcvtsd(tmp, n, fpst); 9652 tcg_gen_extu_i32_i64(d, tmp); 9653 } 9654 9655 static void gen_fcvtxn_sd(TCGv_i64 d, TCGv_i64 n) 9656 { 9657 /* 9658 * 64 bit to 32 bit float conversion 9659 * with von Neumann rounding (round to odd) 9660 */ 9661 TCGv_i32 tmp = tcg_temp_new_i32(); 9662 gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_A64)); 9663 tcg_gen_extu_i32_i64(d, tmp); 9664 } 9665 9666 static ArithOneOp * const f_vector_fcvtn[] = { 9667 NULL, 9668 gen_fcvtn_hs, 9669 gen_fcvtn_sd, 9670 }; 9671 static ArithOneOp * const f_scalar_fcvtxn[] = { 9672 NULL, 9673 NULL, 9674 gen_fcvtxn_sd, 9675 }; 9676 TRANS(FCVTN_v, do_2misc_narrow_vector, a, f_vector_fcvtn) 9677 TRANS(FCVTXN_v, do_2misc_narrow_vector, a, f_scalar_fcvtxn) 9678 9679 static void gen_bfcvtn_hs(TCGv_i64 d, TCGv_i64 n) 9680 { 9681 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 9682 TCGv_i32 tmp = tcg_temp_new_i32(); 9683 gen_helper_bfcvt_pair(tmp, n, fpst); 9684 tcg_gen_extu_i32_i64(d, tmp); 9685 } 9686 9687 static void gen_bfcvtn_ah_hs(TCGv_i64 d, TCGv_i64 n) 9688 { 9689 TCGv_ptr fpst = fpstatus_ptr(FPST_AH); 9690 TCGv_i32 tmp = tcg_temp_new_i32(); 9691 gen_helper_bfcvt_pair(tmp, n, fpst); 9692 tcg_gen_extu_i32_i64(d, tmp); 9693 } 9694 9695 static ArithOneOp * const f_vector_bfcvtn[2][3] = { 9696 { 9697 NULL, 9698 gen_bfcvtn_hs, 9699 NULL, 9700 }, { 9701 NULL, 9702 gen_bfcvtn_ah_hs, 9703 NULL, 9704 } 9705 }; 9706 TRANS_FEAT(BFCVTN_v, aa64_bf16, do_2misc_narrow_vector, a, 9707 f_vector_bfcvtn[s->fpcr_ah]) 9708 9709 static bool trans_SHLL_v(DisasContext *s, arg_qrr_e *a) 9710 { 9711 static NeonGenWidenFn * const widenfns[3] = { 9712 gen_helper_neon_widen_u8, 9713 gen_helper_neon_widen_u16, 9714 tcg_gen_extu_i32_i64, 9715 }; 9716 NeonGenWidenFn *widenfn; 9717 TCGv_i64 tcg_res[2]; 9718 TCGv_i32 tcg_op; 9719 int part, pass; 9720 9721 if (a->esz == MO_64) { 9722 return false; 9723 } 9724 if (!fp_access_check(s)) { 9725 return true; 9726 } 9727 9728 tcg_op = tcg_temp_new_i32(); 9729 widenfn = widenfns[a->esz]; 9730 part = a->q ? 2 : 0; 9731 9732 for (pass = 0; pass < 2; pass++) { 9733 read_vec_element_i32(s, tcg_op, a->rn, part + pass, MO_32); 9734 tcg_res[pass] = tcg_temp_new_i64(); 9735 widenfn(tcg_res[pass], tcg_op); 9736 tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << a->esz); 9737 } 9738 9739 for (pass = 0; pass < 2; pass++) { 9740 write_vec_element(s, tcg_res[pass], a->rd, pass, MO_64); 9741 } 9742 return true; 9743 } 9744 9745 static bool do_fabs_fneg_v(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn) 9746 { 9747 int check = fp_access_check_vector_hsd(s, a->q, a->esz); 9748 9749 if (check <= 0) { 9750 return check == 0; 9751 } 9752 9753 gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz); 9754 return true; 9755 } 9756 9757 TRANS(FABS_v, do_fabs_fneg_v, a, gen_gvec_fabs) 9758 TRANS(FNEG_v, do_fabs_fneg_v, a, gen_gvec_fneg) 9759 9760 static bool do_fp1_vector(DisasContext *s, arg_qrr_e *a, 9761 const FPScalar1 *f, int rmode) 9762 { 9763 TCGv_i32 tcg_rmode = NULL; 9764 TCGv_ptr fpst; 9765 int check = fp_access_check_vector_hsd(s, a->q, a->esz); 9766 9767 if (check <= 0) { 9768 return check == 0; 9769 } 9770 9771 fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); 9772 if (rmode >= 0) { 9773 tcg_rmode = gen_set_rmode(rmode, fpst); 9774 } 9775 9776 if (a->esz == MO_64) { 9777 TCGv_i64 t64 = tcg_temp_new_i64(); 9778 9779 for (int pass = 0; pass < 2; ++pass) { 9780 read_vec_element(s, t64, a->rn, pass, MO_64); 9781 f->gen_d(t64, t64, fpst); 9782 write_vec_element(s, t64, a->rd, pass, MO_64); 9783 } 9784 } else { 9785 TCGv_i32 t32 = tcg_temp_new_i32(); 9786 void (*gen)(TCGv_i32, TCGv_i32, TCGv_ptr) 9787 = (a->esz == MO_16 ? f->gen_h : f->gen_s); 9788 9789 for (int pass = 0, n = (a->q ? 16 : 8) >> a->esz; pass < n; ++pass) { 9790 read_vec_element_i32(s, t32, a->rn, pass, a->esz); 9791 gen(t32, t32, fpst); 9792 write_vec_element_i32(s, t32, a->rd, pass, a->esz); 9793 } 9794 } 9795 clear_vec_high(s, a->q, a->rd); 9796 9797 if (rmode >= 0) { 9798 gen_restore_rmode(tcg_rmode, fpst); 9799 } 9800 return true; 9801 } 9802 9803 TRANS(FSQRT_v, do_fp1_vector, a, &f_scalar_fsqrt, -1) 9804 9805 TRANS(FRINTN_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_TIEEVEN) 9806 TRANS(FRINTP_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_POSINF) 9807 TRANS(FRINTM_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_NEGINF) 9808 TRANS(FRINTZ_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_ZERO) 9809 TRANS(FRINTA_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_TIEAWAY) 9810 TRANS(FRINTI_v, do_fp1_vector, a, &f_scalar_frint, -1) 9811 TRANS(FRINTX_v, do_fp1_vector, a, &f_scalar_frintx, -1) 9812 9813 TRANS_FEAT(FRINT32Z_v, aa64_frint, do_fp1_vector, a, 9814 &f_scalar_frint32, FPROUNDING_ZERO) 9815 TRANS_FEAT(FRINT32X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint32, -1) 9816 TRANS_FEAT(FRINT64Z_v, aa64_frint, do_fp1_vector, a, 9817 &f_scalar_frint64, FPROUNDING_ZERO) 9818 TRANS_FEAT(FRINT64X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint64, -1) 9819 9820 static bool do_gvec_op2_fpst_with_fpsttype(DisasContext *s, MemOp esz, 9821 bool is_q, int rd, int rn, int data, 9822 gen_helper_gvec_2_ptr * const fns[3], 9823 ARMFPStatusFlavour fpsttype) 9824 { 9825 int check = fp_access_check_vector_hsd(s, is_q, esz); 9826 TCGv_ptr fpst; 9827 9828 if (check <= 0) { 9829 return check == 0; 9830 } 9831 9832 fpst = fpstatus_ptr(fpsttype); 9833 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd), 9834 vec_full_reg_offset(s, rn), fpst, 9835 is_q ? 16 : 8, vec_full_reg_size(s), 9836 data, fns[esz - 1]); 9837 return true; 9838 } 9839 9840 static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q, 9841 int rd, int rn, int data, 9842 gen_helper_gvec_2_ptr * const fns[3]) 9843 { 9844 return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data, fns, 9845 esz == MO_16 ? FPST_A64_F16 : 9846 FPST_A64); 9847 } 9848 9849 static bool do_gvec_op2_ah_fpst(DisasContext *s, MemOp esz, bool is_q, 9850 int rd, int rn, int data, 9851 gen_helper_gvec_2_ptr * const fns[3]) 9852 { 9853 return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data, 9854 fns, select_ah_fpst(s, esz)); 9855 } 9856 9857 static gen_helper_gvec_2_ptr * const f_scvtf_v[] = { 9858 gen_helper_gvec_vcvt_sh, 9859 gen_helper_gvec_vcvt_sf, 9860 gen_helper_gvec_vcvt_sd, 9861 }; 9862 TRANS(SCVTF_vi, do_gvec_op2_fpst, 9863 a->esz, a->q, a->rd, a->rn, 0, f_scvtf_v) 9864 TRANS(SCVTF_vf, do_gvec_op2_fpst, 9865 a->esz, a->q, a->rd, a->rn, a->shift, f_scvtf_v) 9866 9867 static gen_helper_gvec_2_ptr * const f_ucvtf_v[] = { 9868 gen_helper_gvec_vcvt_uh, 9869 gen_helper_gvec_vcvt_uf, 9870 gen_helper_gvec_vcvt_ud, 9871 }; 9872 TRANS(UCVTF_vi, do_gvec_op2_fpst, 9873 a->esz, a->q, a->rd, a->rn, 0, f_ucvtf_v) 9874 TRANS(UCVTF_vf, do_gvec_op2_fpst, 9875 a->esz, a->q, a->rd, a->rn, a->shift, f_ucvtf_v) 9876 9877 static gen_helper_gvec_2_ptr * const f_fcvtzs_vf[] = { 9878 gen_helper_gvec_vcvt_rz_hs, 9879 gen_helper_gvec_vcvt_rz_fs, 9880 gen_helper_gvec_vcvt_rz_ds, 9881 }; 9882 TRANS(FCVTZS_vf, do_gvec_op2_fpst, 9883 a->esz, a->q, a->rd, a->rn, a->shift, f_fcvtzs_vf) 9884 9885 static gen_helper_gvec_2_ptr * const f_fcvtzu_vf[] = { 9886 gen_helper_gvec_vcvt_rz_hu, 9887 gen_helper_gvec_vcvt_rz_fu, 9888 gen_helper_gvec_vcvt_rz_du, 9889 }; 9890 TRANS(FCVTZU_vf, do_gvec_op2_fpst, 9891 a->esz, a->q, a->rd, a->rn, a->shift, f_fcvtzu_vf) 9892 9893 static gen_helper_gvec_2_ptr * const f_fcvt_s_vi[] = { 9894 gen_helper_gvec_vcvt_rm_sh, 9895 gen_helper_gvec_vcvt_rm_ss, 9896 gen_helper_gvec_vcvt_rm_sd, 9897 }; 9898 9899 static gen_helper_gvec_2_ptr * const f_fcvt_u_vi[] = { 9900 gen_helper_gvec_vcvt_rm_uh, 9901 gen_helper_gvec_vcvt_rm_us, 9902 gen_helper_gvec_vcvt_rm_ud, 9903 }; 9904 9905 TRANS(FCVTNS_vi, do_gvec_op2_fpst, 9906 a->esz, a->q, a->rd, a->rn, float_round_nearest_even, f_fcvt_s_vi) 9907 TRANS(FCVTNU_vi, do_gvec_op2_fpst, 9908 a->esz, a->q, a->rd, a->rn, float_round_nearest_even, f_fcvt_u_vi) 9909 TRANS(FCVTPS_vi, do_gvec_op2_fpst, 9910 a->esz, a->q, a->rd, a->rn, float_round_up, f_fcvt_s_vi) 9911 TRANS(FCVTPU_vi, do_gvec_op2_fpst, 9912 a->esz, a->q, a->rd, a->rn, float_round_up, f_fcvt_u_vi) 9913 TRANS(FCVTMS_vi, do_gvec_op2_fpst, 9914 a->esz, a->q, a->rd, a->rn, float_round_down, f_fcvt_s_vi) 9915 TRANS(FCVTMU_vi, do_gvec_op2_fpst, 9916 a->esz, a->q, a->rd, a->rn, float_round_down, f_fcvt_u_vi) 9917 TRANS(FCVTZS_vi, do_gvec_op2_fpst, 9918 a->esz, a->q, a->rd, a->rn, float_round_to_zero, f_fcvt_s_vi) 9919 TRANS(FCVTZU_vi, do_gvec_op2_fpst, 9920 a->esz, a->q, a->rd, a->rn, float_round_to_zero, f_fcvt_u_vi) 9921 TRANS(FCVTAS_vi, do_gvec_op2_fpst, 9922 a->esz, a->q, a->rd, a->rn, float_round_ties_away, f_fcvt_s_vi) 9923 TRANS(FCVTAU_vi, do_gvec_op2_fpst, 9924 a->esz, a->q, a->rd, a->rn, float_round_ties_away, f_fcvt_u_vi) 9925 9926 static gen_helper_gvec_2_ptr * const f_fceq0[] = { 9927 gen_helper_gvec_fceq0_h, 9928 gen_helper_gvec_fceq0_s, 9929 gen_helper_gvec_fceq0_d, 9930 }; 9931 TRANS(FCMEQ0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fceq0) 9932 9933 static gen_helper_gvec_2_ptr * const f_fcgt0[] = { 9934 gen_helper_gvec_fcgt0_h, 9935 gen_helper_gvec_fcgt0_s, 9936 gen_helper_gvec_fcgt0_d, 9937 }; 9938 TRANS(FCMGT0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcgt0) 9939 9940 static gen_helper_gvec_2_ptr * const f_fcge0[] = { 9941 gen_helper_gvec_fcge0_h, 9942 gen_helper_gvec_fcge0_s, 9943 gen_helper_gvec_fcge0_d, 9944 }; 9945 TRANS(FCMGE0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcge0) 9946 9947 static gen_helper_gvec_2_ptr * const f_fclt0[] = { 9948 gen_helper_gvec_fclt0_h, 9949 gen_helper_gvec_fclt0_s, 9950 gen_helper_gvec_fclt0_d, 9951 }; 9952 TRANS(FCMLT0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fclt0) 9953 9954 static gen_helper_gvec_2_ptr * const f_fcle0[] = { 9955 gen_helper_gvec_fcle0_h, 9956 gen_helper_gvec_fcle0_s, 9957 gen_helper_gvec_fcle0_d, 9958 }; 9959 TRANS(FCMLE0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcle0) 9960 9961 static gen_helper_gvec_2_ptr * const f_frecpe[] = { 9962 gen_helper_gvec_frecpe_h, 9963 gen_helper_gvec_frecpe_s, 9964 gen_helper_gvec_frecpe_d, 9965 }; 9966 static gen_helper_gvec_2_ptr * const f_frecpe_rpres[] = { 9967 gen_helper_gvec_frecpe_h, 9968 gen_helper_gvec_frecpe_rpres_s, 9969 gen_helper_gvec_frecpe_d, 9970 }; 9971 TRANS(FRECPE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, 9972 s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frecpe_rpres : f_frecpe) 9973 9974 static gen_helper_gvec_2_ptr * const f_frsqrte[] = { 9975 gen_helper_gvec_frsqrte_h, 9976 gen_helper_gvec_frsqrte_s, 9977 gen_helper_gvec_frsqrte_d, 9978 }; 9979 static gen_helper_gvec_2_ptr * const f_frsqrte_rpres[] = { 9980 gen_helper_gvec_frsqrte_h, 9981 gen_helper_gvec_frsqrte_rpres_s, 9982 gen_helper_gvec_frsqrte_d, 9983 }; 9984 TRANS(FRSQRTE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, 9985 s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frsqrte_rpres : f_frsqrte) 9986 9987 static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a) 9988 { 9989 /* Handle 2-reg-misc ops which are widening (so each size element 9990 * in the source becomes a 2*size element in the destination. 9991 * The only instruction like this is FCVTL. 9992 */ 9993 int pass; 9994 TCGv_ptr fpst; 9995 9996 if (!fp_access_check(s)) { 9997 return true; 9998 } 9999 10000 if (a->esz == MO_64) { 10001 /* 32 -> 64 bit fp conversion */ 10002 TCGv_i64 tcg_res[2]; 10003 TCGv_i32 tcg_op = tcg_temp_new_i32(); 10004 int srcelt = a->q ? 2 : 0; 10005 10006 fpst = fpstatus_ptr(FPST_A64); 10007 10008 for (pass = 0; pass < 2; pass++) { 10009 tcg_res[pass] = tcg_temp_new_i64(); 10010 read_vec_element_i32(s, tcg_op, a->rn, srcelt + pass, MO_32); 10011 gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, fpst); 10012 } 10013 for (pass = 0; pass < 2; pass++) { 10014 write_vec_element(s, tcg_res[pass], a->rd, pass, MO_64); 10015 } 10016 } else { 10017 /* 16 -> 32 bit fp conversion */ 10018 int srcelt = a->q ? 4 : 0; 10019 TCGv_i32 tcg_res[4]; 10020 TCGv_i32 ahp = get_ahp_flag(); 10021 10022 fpst = fpstatus_ptr(FPST_A64_F16); 10023 10024 for (pass = 0; pass < 4; pass++) { 10025 tcg_res[pass] = tcg_temp_new_i32(); 10026 read_vec_element_i32(s, tcg_res[pass], a->rn, srcelt + pass, MO_16); 10027 gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass], 10028 fpst, ahp); 10029 } 10030 for (pass = 0; pass < 4; pass++) { 10031 write_vec_element_i32(s, tcg_res[pass], a->rd, pass, MO_32); 10032 } 10033 } 10034 clear_vec_high(s, true, a->rd); 10035 return true; 10036 } 10037 10038 static bool trans_OK(DisasContext *s, arg_OK *a) 10039 { 10040 return true; 10041 } 10042 10043 static bool trans_FAIL(DisasContext *s, arg_OK *a) 10044 { 10045 s->is_nonstreaming = true; 10046 return true; 10047 } 10048 10049 /** 10050 * btype_destination_ok: 10051 * @insn: The instruction at the branch destination 10052 * @bt: SCTLR_ELx.BT 10053 * @btype: PSTATE.BTYPE, and is non-zero 10054 * 10055 * On a guarded page, there are a limited number of insns 10056 * that may be present at the branch target: 10057 * - branch target identifiers, 10058 * - paciasp, pacibsp, 10059 * - BRK insn 10060 * - HLT insn 10061 * Anything else causes a Branch Target Exception. 10062 * 10063 * Return true if the branch is compatible, false to raise BTITRAP. 10064 */ 10065 static bool btype_destination_ok(uint32_t insn, bool bt, int btype) 10066 { 10067 if ((insn & 0xfffff01fu) == 0xd503201fu) { 10068 /* HINT space */ 10069 switch (extract32(insn, 5, 7)) { 10070 case 0b011001: /* PACIASP */ 10071 case 0b011011: /* PACIBSP */ 10072 /* 10073 * If SCTLR_ELx.BT, then PACI*SP are not compatible 10074 * with btype == 3. Otherwise all btype are ok. 10075 */ 10076 return !bt || btype != 3; 10077 case 0b100000: /* BTI */ 10078 /* Not compatible with any btype. */ 10079 return false; 10080 case 0b100010: /* BTI c */ 10081 /* Not compatible with btype == 3 */ 10082 return btype != 3; 10083 case 0b100100: /* BTI j */ 10084 /* Not compatible with btype == 2 */ 10085 return btype != 2; 10086 case 0b100110: /* BTI jc */ 10087 /* Compatible with any btype. */ 10088 return true; 10089 } 10090 } else { 10091 switch (insn & 0xffe0001fu) { 10092 case 0xd4200000u: /* BRK */ 10093 case 0xd4400000u: /* HLT */ 10094 /* Give priority to the breakpoint exception. */ 10095 return true; 10096 } 10097 } 10098 return false; 10099 } 10100 10101 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, 10102 CPUState *cpu) 10103 { 10104 DisasContext *dc = container_of(dcbase, DisasContext, base); 10105 CPUARMState *env = cpu_env(cpu); 10106 ARMCPU *arm_cpu = env_archcpu(env); 10107 CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb); 10108 int bound, core_mmu_idx; 10109 10110 dc->isar = &arm_cpu->isar; 10111 dc->condjmp = 0; 10112 dc->pc_save = dc->base.pc_first; 10113 dc->aarch64 = true; 10114 dc->thumb = false; 10115 dc->sctlr_b = 0; 10116 dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE; 10117 dc->condexec_mask = 0; 10118 dc->condexec_cond = 0; 10119 core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX); 10120 dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx); 10121 dc->tbii = EX_TBFLAG_A64(tb_flags, TBII); 10122 dc->tbid = EX_TBFLAG_A64(tb_flags, TBID); 10123 dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA); 10124 dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx); 10125 #if !defined(CONFIG_USER_ONLY) 10126 dc->user = (dc->current_el == 0); 10127 #endif 10128 dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL); 10129 dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM); 10130 dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL); 10131 dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE); 10132 dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC); 10133 dc->trap_eret = EX_TBFLAG_A64(tb_flags, TRAP_ERET); 10134 dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL); 10135 dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL); 10136 dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16; 10137 dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16; 10138 dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE); 10139 dc->bt = EX_TBFLAG_A64(tb_flags, BT); 10140 dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE); 10141 dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV); 10142 dc->ata[0] = EX_TBFLAG_A64(tb_flags, ATA); 10143 dc->ata[1] = EX_TBFLAG_A64(tb_flags, ATA0); 10144 dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE); 10145 dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE); 10146 dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM); 10147 dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA); 10148 dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING); 10149 dc->naa = EX_TBFLAG_A64(tb_flags, NAA); 10150 dc->nv = EX_TBFLAG_A64(tb_flags, NV); 10151 dc->nv1 = EX_TBFLAG_A64(tb_flags, NV1); 10152 dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2); 10153 dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20); 10154 dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE); 10155 dc->fpcr_ah = EX_TBFLAG_A64(tb_flags, AH); 10156 dc->fpcr_nep = EX_TBFLAG_A64(tb_flags, NEP); 10157 dc->vec_len = 0; 10158 dc->vec_stride = 0; 10159 dc->cp_regs = arm_cpu->cp_regs; 10160 dc->features = env->features; 10161 dc->dcz_blocksize = arm_cpu->dcz_blocksize; 10162 dc->gm_blocksize = arm_cpu->gm_blocksize; 10163 10164 #ifdef CONFIG_USER_ONLY 10165 /* In sve_probe_page, we assume TBI is enabled. */ 10166 tcg_debug_assert(dc->tbid & 1); 10167 #endif 10168 10169 dc->lse2 = dc_isar_feature(aa64_lse2, dc); 10170 10171 /* Single step state. The code-generation logic here is: 10172 * SS_ACTIVE == 0: 10173 * generate code with no special handling for single-stepping (except 10174 * that anything that can make us go to SS_ACTIVE == 1 must end the TB; 10175 * this happens anyway because those changes are all system register or 10176 * PSTATE writes). 10177 * SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending) 10178 * emit code for one insn 10179 * emit code to clear PSTATE.SS 10180 * emit code to generate software step exception for completed step 10181 * end TB (as usual for having generated an exception) 10182 * SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending) 10183 * emit code to generate a software step exception 10184 * end the TB 10185 */ 10186 dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE); 10187 dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS); 10188 dc->is_ldex = false; 10189 10190 /* Bound the number of insns to execute to those left on the page. */ 10191 bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4; 10192 10193 /* If architectural single step active, limit to 1. */ 10194 if (dc->ss_active) { 10195 bound = 1; 10196 } 10197 dc->base.max_insns = MIN(dc->base.max_insns, bound); 10198 } 10199 10200 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu) 10201 { 10202 } 10203 10204 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) 10205 { 10206 DisasContext *dc = container_of(dcbase, DisasContext, base); 10207 target_ulong pc_arg = dc->base.pc_next; 10208 10209 if (tb_cflags(dcbase->tb) & CF_PCREL) { 10210 pc_arg &= ~TARGET_PAGE_MASK; 10211 } 10212 tcg_gen_insn_start(pc_arg, 0, 0); 10213 dc->insn_start_updated = false; 10214 } 10215 10216 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) 10217 { 10218 DisasContext *s = container_of(dcbase, DisasContext, base); 10219 CPUARMState *env = cpu_env(cpu); 10220 uint64_t pc = s->base.pc_next; 10221 uint32_t insn; 10222 10223 /* Singlestep exceptions have the highest priority. */ 10224 if (s->ss_active && !s->pstate_ss) { 10225 /* Singlestep state is Active-pending. 10226 * If we're in this state at the start of a TB then either 10227 * a) we just took an exception to an EL which is being debugged 10228 * and this is the first insn in the exception handler 10229 * b) debug exceptions were masked and we just unmasked them 10230 * without changing EL (eg by clearing PSTATE.D) 10231 * In either case we're going to take a swstep exception in the 10232 * "did not step an insn" case, and so the syndrome ISV and EX 10233 * bits should be zero. 10234 */ 10235 assert(s->base.num_insns == 1); 10236 gen_swstep_exception(s, 0, 0); 10237 s->base.is_jmp = DISAS_NORETURN; 10238 s->base.pc_next = pc + 4; 10239 return; 10240 } 10241 10242 if (pc & 3) { 10243 /* 10244 * PC alignment fault. This has priority over the instruction abort 10245 * that we would receive from a translation fault via arm_ldl_code. 10246 * This should only be possible after an indirect branch, at the 10247 * start of the TB. 10248 */ 10249 assert(s->base.num_insns == 1); 10250 gen_helper_exception_pc_alignment(tcg_env, tcg_constant_tl(pc)); 10251 s->base.is_jmp = DISAS_NORETURN; 10252 s->base.pc_next = QEMU_ALIGN_UP(pc, 4); 10253 return; 10254 } 10255 10256 s->pc_curr = pc; 10257 insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b); 10258 s->insn = insn; 10259 s->base.pc_next = pc + 4; 10260 10261 s->fp_access_checked = 0; 10262 s->sve_access_checked = 0; 10263 10264 if (s->pstate_il) { 10265 /* 10266 * Illegal execution state. This has priority over BTI 10267 * exceptions, but comes after instruction abort exceptions. 10268 */ 10269 gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate()); 10270 return; 10271 } 10272 10273 if (dc_isar_feature(aa64_bti, s)) { 10274 if (s->base.num_insns == 1) { 10275 /* First insn can have btype set to non-zero. */ 10276 tcg_debug_assert(s->btype >= 0); 10277 10278 /* 10279 * Note that the Branch Target Exception has fairly high 10280 * priority -- below debugging exceptions but above most 10281 * everything else. This allows us to handle this now 10282 * instead of waiting until the insn is otherwise decoded. 10283 * 10284 * We can check all but the guarded page check here; 10285 * defer the latter to a helper. 10286 */ 10287 if (s->btype != 0 10288 && !btype_destination_ok(insn, s->bt, s->btype)) { 10289 gen_helper_guarded_page_check(tcg_env); 10290 } 10291 } else { 10292 /* Not the first insn: btype must be 0. */ 10293 tcg_debug_assert(s->btype == 0); 10294 } 10295 } 10296 10297 s->is_nonstreaming = false; 10298 if (s->sme_trap_nonstreaming) { 10299 disas_sme_fa64(s, insn); 10300 } 10301 10302 if (!disas_a64(s, insn) && 10303 !disas_sme(s, insn) && 10304 !disas_sve(s, insn)) { 10305 unallocated_encoding(s); 10306 } 10307 10308 /* 10309 * After execution of most insns, btype is reset to 0. 10310 * Note that we set btype == -1 when the insn sets btype. 10311 */ 10312 if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) { 10313 reset_btype(s); 10314 } 10315 } 10316 10317 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) 10318 { 10319 DisasContext *dc = container_of(dcbase, DisasContext, base); 10320 10321 if (unlikely(dc->ss_active)) { 10322 /* Note that this means single stepping WFI doesn't halt the CPU. 10323 * For conditional branch insns this is harmless unreachable code as 10324 * gen_goto_tb() has already handled emitting the debug exception 10325 * (and thus a tb-jump is not possible when singlestepping). 10326 */ 10327 switch (dc->base.is_jmp) { 10328 default: 10329 gen_a64_update_pc(dc, 4); 10330 /* fall through */ 10331 case DISAS_EXIT: 10332 case DISAS_JUMP: 10333 gen_step_complete_exception(dc); 10334 break; 10335 case DISAS_NORETURN: 10336 break; 10337 } 10338 } else { 10339 switch (dc->base.is_jmp) { 10340 case DISAS_NEXT: 10341 case DISAS_TOO_MANY: 10342 gen_goto_tb(dc, 1, 4); 10343 break; 10344 default: 10345 case DISAS_UPDATE_EXIT: 10346 gen_a64_update_pc(dc, 4); 10347 /* fall through */ 10348 case DISAS_EXIT: 10349 tcg_gen_exit_tb(NULL, 0); 10350 break; 10351 case DISAS_UPDATE_NOCHAIN: 10352 gen_a64_update_pc(dc, 4); 10353 /* fall through */ 10354 case DISAS_JUMP: 10355 tcg_gen_lookup_and_goto_ptr(); 10356 break; 10357 case DISAS_NORETURN: 10358 case DISAS_SWI: 10359 break; 10360 case DISAS_WFE: 10361 gen_a64_update_pc(dc, 4); 10362 gen_helper_wfe(tcg_env); 10363 break; 10364 case DISAS_YIELD: 10365 gen_a64_update_pc(dc, 4); 10366 gen_helper_yield(tcg_env); 10367 break; 10368 case DISAS_WFI: 10369 /* 10370 * This is a special case because we don't want to just halt 10371 * the CPU if trying to debug across a WFI. 10372 */ 10373 gen_a64_update_pc(dc, 4); 10374 gen_helper_wfi(tcg_env, tcg_constant_i32(4)); 10375 /* 10376 * The helper doesn't necessarily throw an exception, but we 10377 * must go back to the main loop to check for interrupts anyway. 10378 */ 10379 tcg_gen_exit_tb(NULL, 0); 10380 break; 10381 } 10382 } 10383 } 10384 10385 const TranslatorOps aarch64_translator_ops = { 10386 .init_disas_context = aarch64_tr_init_disas_context, 10387 .tb_start = aarch64_tr_tb_start, 10388 .insn_start = aarch64_tr_insn_start, 10389 .translate_insn = aarch64_tr_translate_insn, 10390 .tb_stop = aarch64_tr_tb_stop, 10391 }; 10392