1 /* 2 * AArch64 translation 3 * 4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de> 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 #include "qemu/osdep.h" 20 #include "exec/exec-all.h" 21 #include "exec/target_page.h" 22 #include "translate.h" 23 #include "translate-a64.h" 24 #include "qemu/log.h" 25 #include "arm_ldst.h" 26 #include "semihosting/semihost.h" 27 #include "cpregs.h" 28 29 static TCGv_i64 cpu_X[32]; 30 static TCGv_i64 cpu_pc; 31 32 /* Load/store exclusive handling */ 33 static TCGv_i64 cpu_exclusive_high; 34 35 static const char *regnames[] = { 36 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 37 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 38 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 39 "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp" 40 }; 41 42 enum a64_shift_type { 43 A64_SHIFT_TYPE_LSL = 0, 44 A64_SHIFT_TYPE_LSR = 1, 45 A64_SHIFT_TYPE_ASR = 2, 46 A64_SHIFT_TYPE_ROR = 3 47 }; 48 49 /* 50 * Helpers for extracting complex instruction fields 51 */ 52 53 /* 54 * For load/store with an unsigned 12 bit immediate scaled by the element 55 * size. The input has the immediate field in bits [14:3] and the element 56 * size in [2:0]. 57 */ 58 static int uimm_scaled(DisasContext *s, int x) 59 { 60 unsigned imm = x >> 3; 61 unsigned scale = extract32(x, 0, 3); 62 return imm << scale; 63 } 64 65 /* For load/store memory tags: scale offset by LOG2_TAG_GRANULE */ 66 static int scale_by_log2_tag_granule(DisasContext *s, int x) 67 { 68 return x << LOG2_TAG_GRANULE; 69 } 70 71 /* 72 * Include the generated decoders. 73 */ 74 75 #include "decode-sme-fa64.c.inc" 76 #include "decode-a64.c.inc" 77 78 /* initialize TCG globals. */ 79 void a64_translate_init(void) 80 { 81 int i; 82 83 cpu_pc = tcg_global_mem_new_i64(tcg_env, 84 offsetof(CPUARMState, pc), 85 "pc"); 86 for (i = 0; i < 32; i++) { 87 cpu_X[i] = tcg_global_mem_new_i64(tcg_env, 88 offsetof(CPUARMState, xregs[i]), 89 regnames[i]); 90 } 91 92 cpu_exclusive_high = tcg_global_mem_new_i64(tcg_env, 93 offsetof(CPUARMState, exclusive_high), "exclusive_high"); 94 } 95 96 /* 97 * Return the core mmu_idx to use for A64 load/store insns which 98 * have a "unprivileged load/store" variant. Those insns access 99 * EL0 if executed from an EL which has control over EL0 (usually 100 * EL1) but behave like normal loads and stores if executed from 101 * elsewhere (eg EL3). 102 * 103 * @unpriv : true for the unprivileged encoding; false for the 104 * normal encoding (in which case we will return the same 105 * thing as get_mem_index(). 106 */ 107 static int get_a64_user_mem_index(DisasContext *s, bool unpriv) 108 { 109 /* 110 * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL, 111 * which is the usual mmu_idx for this cpu state. 112 */ 113 ARMMMUIdx useridx = s->mmu_idx; 114 115 if (unpriv && s->unpriv) { 116 /* 117 * We have pre-computed the condition for AccType_UNPRIV. 118 * Therefore we should never get here with a mmu_idx for 119 * which we do not know the corresponding user mmu_idx. 120 */ 121 switch (useridx) { 122 case ARMMMUIdx_E10_1: 123 case ARMMMUIdx_E10_1_PAN: 124 useridx = ARMMMUIdx_E10_0; 125 break; 126 case ARMMMUIdx_E20_2: 127 case ARMMMUIdx_E20_2_PAN: 128 useridx = ARMMMUIdx_E20_0; 129 break; 130 default: 131 g_assert_not_reached(); 132 } 133 } 134 return arm_to_core_mmu_idx(useridx); 135 } 136 137 static void set_btype_raw(int val) 138 { 139 tcg_gen_st_i32(tcg_constant_i32(val), tcg_env, 140 offsetof(CPUARMState, btype)); 141 } 142 143 static void set_btype(DisasContext *s, int val) 144 { 145 /* BTYPE is a 2-bit field, and 0 should be done with reset_btype. */ 146 tcg_debug_assert(val >= 1 && val <= 3); 147 set_btype_raw(val); 148 s->btype = -1; 149 } 150 151 static void reset_btype(DisasContext *s) 152 { 153 if (s->btype != 0) { 154 set_btype_raw(0); 155 s->btype = 0; 156 } 157 } 158 159 static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff) 160 { 161 assert(s->pc_save != -1); 162 if (tb_cflags(s->base.tb) & CF_PCREL) { 163 tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff); 164 } else { 165 tcg_gen_movi_i64(dest, s->pc_curr + diff); 166 } 167 } 168 169 void gen_a64_update_pc(DisasContext *s, target_long diff) 170 { 171 gen_pc_plus_diff(s, cpu_pc, diff); 172 s->pc_save = s->pc_curr + diff; 173 } 174 175 /* 176 * Handle Top Byte Ignore (TBI) bits. 177 * 178 * If address tagging is enabled via the TCR TBI bits: 179 * + for EL2 and EL3 there is only one TBI bit, and if it is set 180 * then the address is zero-extended, clearing bits [63:56] 181 * + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0 182 * and TBI1 controls addresses with bit 55 == 1. 183 * If the appropriate TBI bit is set for the address then 184 * the address is sign-extended from bit 55 into bits [63:56] 185 * 186 * Here We have concatenated TBI{1,0} into tbi. 187 */ 188 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst, 189 TCGv_i64 src, int tbi) 190 { 191 if (tbi == 0) { 192 /* Load unmodified address */ 193 tcg_gen_mov_i64(dst, src); 194 } else if (!regime_has_2_ranges(s->mmu_idx)) { 195 /* Force tag byte to all zero */ 196 tcg_gen_extract_i64(dst, src, 0, 56); 197 } else { 198 /* Sign-extend from bit 55. */ 199 tcg_gen_sextract_i64(dst, src, 0, 56); 200 201 switch (tbi) { 202 case 1: 203 /* tbi0 but !tbi1: only use the extension if positive */ 204 tcg_gen_and_i64(dst, dst, src); 205 break; 206 case 2: 207 /* !tbi0 but tbi1: only use the extension if negative */ 208 tcg_gen_or_i64(dst, dst, src); 209 break; 210 case 3: 211 /* tbi0 and tbi1: always use the extension */ 212 break; 213 default: 214 g_assert_not_reached(); 215 } 216 } 217 } 218 219 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src) 220 { 221 /* 222 * If address tagging is enabled for instructions via the TCR TBI bits, 223 * then loading an address into the PC will clear out any tag. 224 */ 225 gen_top_byte_ignore(s, cpu_pc, src, s->tbii); 226 s->pc_save = -1; 227 } 228 229 /* 230 * Handle MTE and/or TBI. 231 * 232 * For TBI, ideally, we would do nothing. Proper behaviour on fault is 233 * for the tag to be present in the FAR_ELx register. But for user-only 234 * mode we do not have a TLB with which to implement this, so we must 235 * remove the top byte now. 236 * 237 * Always return a fresh temporary that we can increment independently 238 * of the write-back address. 239 */ 240 241 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr) 242 { 243 TCGv_i64 clean = tcg_temp_new_i64(); 244 #ifdef CONFIG_USER_ONLY 245 gen_top_byte_ignore(s, clean, addr, s->tbid); 246 #else 247 tcg_gen_mov_i64(clean, addr); 248 #endif 249 return clean; 250 } 251 252 /* Insert a zero tag into src, with the result at dst. */ 253 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src) 254 { 255 tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4)); 256 } 257 258 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr, 259 MMUAccessType acc, int log2_size) 260 { 261 gen_helper_probe_access(tcg_env, ptr, 262 tcg_constant_i32(acc), 263 tcg_constant_i32(get_mem_index(s)), 264 tcg_constant_i32(1 << log2_size)); 265 } 266 267 /* 268 * For MTE, check a single logical or atomic access. This probes a single 269 * address, the exact one specified. The size and alignment of the access 270 * is not relevant to MTE, per se, but watchpoints do require the size, 271 * and we want to recognize those before making any other changes to state. 272 */ 273 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr, 274 bool is_write, bool tag_checked, 275 MemOp memop, bool is_unpriv, 276 int core_idx) 277 { 278 if (tag_checked && s->mte_active[is_unpriv]) { 279 TCGv_i64 ret; 280 int desc = 0; 281 282 desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx); 283 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 284 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 285 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 286 desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(memop)); 287 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, memop_size(memop) - 1); 288 289 ret = tcg_temp_new_i64(); 290 gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr); 291 292 return ret; 293 } 294 return clean_data_tbi(s, addr); 295 } 296 297 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, 298 bool tag_checked, MemOp memop) 299 { 300 return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, memop, 301 false, get_mem_index(s)); 302 } 303 304 /* 305 * For MTE, check multiple logical sequential accesses. 306 */ 307 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, 308 bool tag_checked, int total_size, MemOp single_mop) 309 { 310 if (tag_checked && s->mte_active[0]) { 311 TCGv_i64 ret; 312 int desc = 0; 313 314 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 315 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 316 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 317 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 318 desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(single_mop)); 319 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, total_size - 1); 320 321 ret = tcg_temp_new_i64(); 322 gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr); 323 324 return ret; 325 } 326 return clean_data_tbi(s, addr); 327 } 328 329 /* 330 * Generate the special alignment check that applies to AccType_ATOMIC 331 * and AccType_ORDERED insns under FEAT_LSE2: the access need not be 332 * naturally aligned, but it must not cross a 16-byte boundary. 333 * See AArch64.CheckAlignment(). 334 */ 335 static void check_lse2_align(DisasContext *s, int rn, int imm, 336 bool is_write, MemOp mop) 337 { 338 TCGv_i32 tmp; 339 TCGv_i64 addr; 340 TCGLabel *over_label; 341 MMUAccessType type; 342 int mmu_idx; 343 344 tmp = tcg_temp_new_i32(); 345 tcg_gen_extrl_i64_i32(tmp, cpu_reg_sp(s, rn)); 346 tcg_gen_addi_i32(tmp, tmp, imm & 15); 347 tcg_gen_andi_i32(tmp, tmp, 15); 348 tcg_gen_addi_i32(tmp, tmp, memop_size(mop)); 349 350 over_label = gen_new_label(); 351 tcg_gen_brcondi_i32(TCG_COND_LEU, tmp, 16, over_label); 352 353 addr = tcg_temp_new_i64(); 354 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm); 355 356 type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD, 357 mmu_idx = get_mem_index(s); 358 gen_helper_unaligned_access(tcg_env, addr, tcg_constant_i32(type), 359 tcg_constant_i32(mmu_idx)); 360 361 gen_set_label(over_label); 362 363 } 364 365 /* Handle the alignment check for AccType_ATOMIC instructions. */ 366 static MemOp check_atomic_align(DisasContext *s, int rn, MemOp mop) 367 { 368 MemOp size = mop & MO_SIZE; 369 370 if (size == MO_8) { 371 return mop; 372 } 373 374 /* 375 * If size == MO_128, this is a LDXP, and the operation is single-copy 376 * atomic for each doubleword, not the entire quadword; it still must 377 * be quadword aligned. 378 */ 379 if (size == MO_128) { 380 return finalize_memop_atom(s, MO_128 | MO_ALIGN, 381 MO_ATOM_IFALIGN_PAIR); 382 } 383 if (dc_isar_feature(aa64_lse2, s)) { 384 check_lse2_align(s, rn, 0, true, mop); 385 } else { 386 mop |= MO_ALIGN; 387 } 388 return finalize_memop(s, mop); 389 } 390 391 /* Handle the alignment check for AccType_ORDERED instructions. */ 392 static MemOp check_ordered_align(DisasContext *s, int rn, int imm, 393 bool is_write, MemOp mop) 394 { 395 MemOp size = mop & MO_SIZE; 396 397 if (size == MO_8) { 398 return mop; 399 } 400 if (size == MO_128) { 401 return finalize_memop_atom(s, MO_128 | MO_ALIGN, 402 MO_ATOM_IFALIGN_PAIR); 403 } 404 if (!dc_isar_feature(aa64_lse2, s)) { 405 mop |= MO_ALIGN; 406 } else if (!s->naa) { 407 check_lse2_align(s, rn, imm, is_write, mop); 408 } 409 return finalize_memop(s, mop); 410 } 411 412 typedef struct DisasCompare64 { 413 TCGCond cond; 414 TCGv_i64 value; 415 } DisasCompare64; 416 417 static void a64_test_cc(DisasCompare64 *c64, int cc) 418 { 419 DisasCompare c32; 420 421 arm_test_cc(&c32, cc); 422 423 /* 424 * Sign-extend the 32-bit value so that the GE/LT comparisons work 425 * properly. The NE/EQ comparisons are also fine with this choice. 426 */ 427 c64->cond = c32.cond; 428 c64->value = tcg_temp_new_i64(); 429 tcg_gen_ext_i32_i64(c64->value, c32.value); 430 } 431 432 static void gen_rebuild_hflags(DisasContext *s) 433 { 434 gen_helper_rebuild_hflags_a64(tcg_env, tcg_constant_i32(s->current_el)); 435 } 436 437 static void gen_exception_internal(int excp) 438 { 439 assert(excp_is_internal(excp)); 440 gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp)); 441 } 442 443 static void gen_exception_internal_insn(DisasContext *s, int excp) 444 { 445 gen_a64_update_pc(s, 0); 446 gen_exception_internal(excp); 447 s->base.is_jmp = DISAS_NORETURN; 448 } 449 450 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome) 451 { 452 gen_a64_update_pc(s, 0); 453 gen_helper_exception_bkpt_insn(tcg_env, tcg_constant_i32(syndrome)); 454 s->base.is_jmp = DISAS_NORETURN; 455 } 456 457 static void gen_step_complete_exception(DisasContext *s) 458 { 459 /* We just completed step of an insn. Move from Active-not-pending 460 * to Active-pending, and then also take the swstep exception. 461 * This corresponds to making the (IMPDEF) choice to prioritize 462 * swstep exceptions over asynchronous exceptions taken to an exception 463 * level where debug is disabled. This choice has the advantage that 464 * we do not need to maintain internal state corresponding to the 465 * ISV/EX syndrome bits between completion of the step and generation 466 * of the exception, and our syndrome information is always correct. 467 */ 468 gen_ss_advance(s); 469 gen_swstep_exception(s, 1, s->is_ldex); 470 s->base.is_jmp = DISAS_NORETURN; 471 } 472 473 static inline bool use_goto_tb(DisasContext *s, uint64_t dest) 474 { 475 if (s->ss_active) { 476 return false; 477 } 478 return translator_use_goto_tb(&s->base, dest); 479 } 480 481 static void gen_goto_tb(DisasContext *s, int n, int64_t diff) 482 { 483 if (use_goto_tb(s, s->pc_curr + diff)) { 484 /* 485 * For pcrel, the pc must always be up-to-date on entry to 486 * the linked TB, so that it can use simple additions for all 487 * further adjustments. For !pcrel, the linked TB is compiled 488 * to know its full virtual address, so we can delay the 489 * update to pc to the unlinked path. A long chain of links 490 * can thus avoid many updates to the PC. 491 */ 492 if (tb_cflags(s->base.tb) & CF_PCREL) { 493 gen_a64_update_pc(s, diff); 494 tcg_gen_goto_tb(n); 495 } else { 496 tcg_gen_goto_tb(n); 497 gen_a64_update_pc(s, diff); 498 } 499 tcg_gen_exit_tb(s->base.tb, n); 500 s->base.is_jmp = DISAS_NORETURN; 501 } else { 502 gen_a64_update_pc(s, diff); 503 if (s->ss_active) { 504 gen_step_complete_exception(s); 505 } else { 506 tcg_gen_lookup_and_goto_ptr(); 507 s->base.is_jmp = DISAS_NORETURN; 508 } 509 } 510 } 511 512 /* 513 * Register access functions 514 * 515 * These functions are used for directly accessing a register in where 516 * changes to the final register value are likely to be made. If you 517 * need to use a register for temporary calculation (e.g. index type 518 * operations) use the read_* form. 519 * 520 * B1.2.1 Register mappings 521 * 522 * In instruction register encoding 31 can refer to ZR (zero register) or 523 * the SP (stack pointer) depending on context. In QEMU's case we map SP 524 * to cpu_X[31] and ZR accesses to a temporary which can be discarded. 525 * This is the point of the _sp forms. 526 */ 527 TCGv_i64 cpu_reg(DisasContext *s, int reg) 528 { 529 if (reg == 31) { 530 TCGv_i64 t = tcg_temp_new_i64(); 531 tcg_gen_movi_i64(t, 0); 532 return t; 533 } else { 534 return cpu_X[reg]; 535 } 536 } 537 538 /* register access for when 31 == SP */ 539 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg) 540 { 541 return cpu_X[reg]; 542 } 543 544 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64 545 * representing the register contents. This TCGv is an auto-freed 546 * temporary so it need not be explicitly freed, and may be modified. 547 */ 548 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf) 549 { 550 TCGv_i64 v = tcg_temp_new_i64(); 551 if (reg != 31) { 552 if (sf) { 553 tcg_gen_mov_i64(v, cpu_X[reg]); 554 } else { 555 tcg_gen_ext32u_i64(v, cpu_X[reg]); 556 } 557 } else { 558 tcg_gen_movi_i64(v, 0); 559 } 560 return v; 561 } 562 563 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf) 564 { 565 TCGv_i64 v = tcg_temp_new_i64(); 566 if (sf) { 567 tcg_gen_mov_i64(v, cpu_X[reg]); 568 } else { 569 tcg_gen_ext32u_i64(v, cpu_X[reg]); 570 } 571 return v; 572 } 573 574 /* Return the offset into CPUARMState of a slice (from 575 * the least significant end) of FP register Qn (ie 576 * Dn, Sn, Hn or Bn). 577 * (Note that this is not the same mapping as for A32; see cpu.h) 578 */ 579 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size) 580 { 581 return vec_reg_offset(s, regno, 0, size); 582 } 583 584 /* Offset of the high half of the 128 bit vector Qn */ 585 static inline int fp_reg_hi_offset(DisasContext *s, int regno) 586 { 587 return vec_reg_offset(s, regno, 1, MO_64); 588 } 589 590 /* Convenience accessors for reading and writing single and double 591 * FP registers. Writing clears the upper parts of the associated 592 * 128 bit vector register, as required by the architecture. 593 * Note that unlike the GP register accessors, the values returned 594 * by the read functions must be manually freed. 595 */ 596 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg) 597 { 598 TCGv_i64 v = tcg_temp_new_i64(); 599 600 tcg_gen_ld_i64(v, tcg_env, fp_reg_offset(s, reg, MO_64)); 601 return v; 602 } 603 604 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg) 605 { 606 TCGv_i32 v = tcg_temp_new_i32(); 607 608 tcg_gen_ld_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32)); 609 return v; 610 } 611 612 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg) 613 { 614 TCGv_i32 v = tcg_temp_new_i32(); 615 616 tcg_gen_ld16u_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16)); 617 return v; 618 } 619 620 static void clear_vec(DisasContext *s, int rd) 621 { 622 unsigned ofs = fp_reg_offset(s, rd, MO_64); 623 unsigned vsz = vec_full_reg_size(s); 624 625 tcg_gen_gvec_dup_imm(MO_64, ofs, vsz, vsz, 0); 626 } 627 628 /* 629 * Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64). 630 * If SVE is not enabled, then there are only 128 bits in the vector. 631 */ 632 static void clear_vec_high(DisasContext *s, bool is_q, int rd) 633 { 634 unsigned ofs = fp_reg_offset(s, rd, MO_64); 635 unsigned vsz = vec_full_reg_size(s); 636 637 /* Nop move, with side effect of clearing the tail. */ 638 tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz); 639 } 640 641 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v) 642 { 643 unsigned ofs = fp_reg_offset(s, reg, MO_64); 644 645 tcg_gen_st_i64(v, tcg_env, ofs); 646 clear_vec_high(s, false, reg); 647 } 648 649 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v) 650 { 651 TCGv_i64 tmp = tcg_temp_new_i64(); 652 653 tcg_gen_extu_i32_i64(tmp, v); 654 write_fp_dreg(s, reg, tmp); 655 } 656 657 /* 658 * Write a double result to 128 bit vector register reg, honouring FPCR.NEP: 659 * - if FPCR.NEP == 0, clear the high elements of reg 660 * - if FPCR.NEP == 1, set the high elements of reg from mergereg 661 * (i.e. merge the result with those high elements) 662 * In either case, SVE register bits above 128 are zeroed (per R_WKYLB). 663 */ 664 static void write_fp_dreg_merging(DisasContext *s, int reg, int mergereg, 665 TCGv_i64 v) 666 { 667 if (!s->fpcr_nep) { 668 write_fp_dreg(s, reg, v); 669 return; 670 } 671 672 /* 673 * Move from mergereg to reg; this sets the high elements and 674 * clears the bits above 128 as a side effect. 675 */ 676 tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg), 677 vec_full_reg_offset(s, mergereg), 678 16, vec_full_reg_size(s)); 679 tcg_gen_st_i64(v, tcg_env, vec_full_reg_offset(s, reg)); 680 } 681 682 /* 683 * Write a single-prec result, but only clear the higher elements 684 * of the destination register if FPCR.NEP is 0; otherwise preserve them. 685 */ 686 static void write_fp_sreg_merging(DisasContext *s, int reg, int mergereg, 687 TCGv_i32 v) 688 { 689 if (!s->fpcr_nep) { 690 write_fp_sreg(s, reg, v); 691 return; 692 } 693 694 tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg), 695 vec_full_reg_offset(s, mergereg), 696 16, vec_full_reg_size(s)); 697 tcg_gen_st_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32)); 698 } 699 700 /* 701 * Write a half-prec result, but only clear the higher elements 702 * of the destination register if FPCR.NEP is 0; otherwise preserve them. 703 * The caller must ensure that the top 16 bits of v are zero. 704 */ 705 static void write_fp_hreg_merging(DisasContext *s, int reg, int mergereg, 706 TCGv_i32 v) 707 { 708 if (!s->fpcr_nep) { 709 write_fp_sreg(s, reg, v); 710 return; 711 } 712 713 tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg), 714 vec_full_reg_offset(s, mergereg), 715 16, vec_full_reg_size(s)); 716 tcg_gen_st16_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16)); 717 } 718 719 /* Expand a 2-operand AdvSIMD vector operation using an expander function. */ 720 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn, 721 GVecGen2Fn *gvec_fn, int vece) 722 { 723 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 724 is_q ? 16 : 8, vec_full_reg_size(s)); 725 } 726 727 /* Expand a 2-operand + immediate AdvSIMD vector operation using 728 * an expander function. 729 */ 730 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn, 731 int64_t imm, GVecGen2iFn *gvec_fn, int vece) 732 { 733 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 734 imm, is_q ? 16 : 8, vec_full_reg_size(s)); 735 } 736 737 /* Expand a 3-operand AdvSIMD vector operation using an expander function. */ 738 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm, 739 GVecGen3Fn *gvec_fn, int vece) 740 { 741 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 742 vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s)); 743 } 744 745 /* Expand a 4-operand AdvSIMD vector operation using an expander function. */ 746 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm, 747 int rx, GVecGen4Fn *gvec_fn, int vece) 748 { 749 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 750 vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx), 751 is_q ? 16 : 8, vec_full_reg_size(s)); 752 } 753 754 /* Expand a 2-operand operation using an out-of-line helper. */ 755 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd, 756 int rn, int data, gen_helper_gvec_2 *fn) 757 { 758 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd), 759 vec_full_reg_offset(s, rn), 760 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 761 } 762 763 /* Expand a 3-operand operation using an out-of-line helper. */ 764 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd, 765 int rn, int rm, int data, gen_helper_gvec_3 *fn) 766 { 767 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 768 vec_full_reg_offset(s, rn), 769 vec_full_reg_offset(s, rm), 770 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 771 } 772 773 /* Expand a 3-operand + fpstatus pointer + simd data value operation using 774 * an out-of-line helper. 775 */ 776 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn, 777 int rm, ARMFPStatusFlavour fpsttype, int data, 778 gen_helper_gvec_3_ptr *fn) 779 { 780 TCGv_ptr fpst = fpstatus_ptr(fpsttype); 781 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 782 vec_full_reg_offset(s, rn), 783 vec_full_reg_offset(s, rm), fpst, 784 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 785 } 786 787 /* Expand a 4-operand operation using an out-of-line helper. */ 788 static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn, 789 int rm, int ra, int data, gen_helper_gvec_4 *fn) 790 { 791 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 792 vec_full_reg_offset(s, rn), 793 vec_full_reg_offset(s, rm), 794 vec_full_reg_offset(s, ra), 795 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 796 } 797 798 /* 799 * Expand a 4-operand operation using an out-of-line helper that takes 800 * a pointer to the CPU env. 801 */ 802 static void gen_gvec_op4_env(DisasContext *s, bool is_q, int rd, int rn, 803 int rm, int ra, int data, 804 gen_helper_gvec_4_ptr *fn) 805 { 806 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 807 vec_full_reg_offset(s, rn), 808 vec_full_reg_offset(s, rm), 809 vec_full_reg_offset(s, ra), 810 tcg_env, 811 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 812 } 813 814 /* 815 * Expand a 4-operand + fpstatus pointer + simd data value operation using 816 * an out-of-line helper. 817 */ 818 static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn, 819 int rm, int ra, ARMFPStatusFlavour fpsttype, 820 int data, 821 gen_helper_gvec_4_ptr *fn) 822 { 823 TCGv_ptr fpst = fpstatus_ptr(fpsttype); 824 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 825 vec_full_reg_offset(s, rn), 826 vec_full_reg_offset(s, rm), 827 vec_full_reg_offset(s, ra), fpst, 828 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 829 } 830 831 /* 832 * When FPCR.AH == 1, NEG and ABS do not flip the sign bit of a NaN. 833 * These functions implement 834 * d = floatN_is_any_nan(s) ? s : floatN_chs(s) 835 * which for float32 is 836 * d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s ^ (1 << 31)) 837 * and similarly for the other float sizes. 838 */ 839 static void gen_vfp_ah_negh(TCGv_i32 d, TCGv_i32 s) 840 { 841 TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32(); 842 843 gen_vfp_negh(chs_s, s); 844 gen_vfp_absh(abs_s, s); 845 tcg_gen_movcond_i32(TCG_COND_GTU, d, 846 abs_s, tcg_constant_i32(0x7c00), 847 s, chs_s); 848 } 849 850 static void gen_vfp_ah_negs(TCGv_i32 d, TCGv_i32 s) 851 { 852 TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32(); 853 854 gen_vfp_negs(chs_s, s); 855 gen_vfp_abss(abs_s, s); 856 tcg_gen_movcond_i32(TCG_COND_GTU, d, 857 abs_s, tcg_constant_i32(0x7f800000UL), 858 s, chs_s); 859 } 860 861 static void gen_vfp_ah_negd(TCGv_i64 d, TCGv_i64 s) 862 { 863 TCGv_i64 abs_s = tcg_temp_new_i64(), chs_s = tcg_temp_new_i64(); 864 865 gen_vfp_negd(chs_s, s); 866 gen_vfp_absd(abs_s, s); 867 tcg_gen_movcond_i64(TCG_COND_GTU, d, 868 abs_s, tcg_constant_i64(0x7ff0000000000000ULL), 869 s, chs_s); 870 } 871 872 /* 873 * These functions implement 874 * d = floatN_is_any_nan(s) ? s : floatN_abs(s) 875 * which for float32 is 876 * d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s & ~(1 << 31)) 877 * and similarly for the other float sizes. 878 */ 879 static void gen_vfp_ah_absh(TCGv_i32 d, TCGv_i32 s) 880 { 881 TCGv_i32 abs_s = tcg_temp_new_i32(); 882 883 gen_vfp_absh(abs_s, s); 884 tcg_gen_movcond_i32(TCG_COND_GTU, d, 885 abs_s, tcg_constant_i32(0x7c00), 886 s, abs_s); 887 } 888 889 static void gen_vfp_ah_abss(TCGv_i32 d, TCGv_i32 s) 890 { 891 TCGv_i32 abs_s = tcg_temp_new_i32(); 892 893 gen_vfp_abss(abs_s, s); 894 tcg_gen_movcond_i32(TCG_COND_GTU, d, 895 abs_s, tcg_constant_i32(0x7f800000UL), 896 s, abs_s); 897 } 898 899 static void gen_vfp_ah_absd(TCGv_i64 d, TCGv_i64 s) 900 { 901 TCGv_i64 abs_s = tcg_temp_new_i64(); 902 903 gen_vfp_absd(abs_s, s); 904 tcg_gen_movcond_i64(TCG_COND_GTU, d, 905 abs_s, tcg_constant_i64(0x7ff0000000000000ULL), 906 s, abs_s); 907 } 908 909 static void gen_vfp_maybe_ah_negh(DisasContext *dc, TCGv_i32 d, TCGv_i32 s) 910 { 911 if (dc->fpcr_ah) { 912 gen_vfp_ah_negh(d, s); 913 } else { 914 gen_vfp_negh(d, s); 915 } 916 } 917 918 static void gen_vfp_maybe_ah_negs(DisasContext *dc, TCGv_i32 d, TCGv_i32 s) 919 { 920 if (dc->fpcr_ah) { 921 gen_vfp_ah_negs(d, s); 922 } else { 923 gen_vfp_negs(d, s); 924 } 925 } 926 927 static void gen_vfp_maybe_ah_negd(DisasContext *dc, TCGv_i64 d, TCGv_i64 s) 928 { 929 if (dc->fpcr_ah) { 930 gen_vfp_ah_negd(d, s); 931 } else { 932 gen_vfp_negd(d, s); 933 } 934 } 935 936 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier 937 * than the 32 bit equivalent. 938 */ 939 static inline void gen_set_NZ64(TCGv_i64 result) 940 { 941 tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result); 942 tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF); 943 } 944 945 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */ 946 static inline void gen_logic_CC(int sf, TCGv_i64 result) 947 { 948 if (sf) { 949 gen_set_NZ64(result); 950 } else { 951 tcg_gen_extrl_i64_i32(cpu_ZF, result); 952 tcg_gen_mov_i32(cpu_NF, cpu_ZF); 953 } 954 tcg_gen_movi_i32(cpu_CF, 0); 955 tcg_gen_movi_i32(cpu_VF, 0); 956 } 957 958 /* dest = T0 + T1; compute C, N, V and Z flags */ 959 static void gen_add64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 960 { 961 TCGv_i64 result, flag, tmp; 962 result = tcg_temp_new_i64(); 963 flag = tcg_temp_new_i64(); 964 tmp = tcg_temp_new_i64(); 965 966 tcg_gen_movi_i64(tmp, 0); 967 tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp); 968 969 tcg_gen_extrl_i64_i32(cpu_CF, flag); 970 971 gen_set_NZ64(result); 972 973 tcg_gen_xor_i64(flag, result, t0); 974 tcg_gen_xor_i64(tmp, t0, t1); 975 tcg_gen_andc_i64(flag, flag, tmp); 976 tcg_gen_extrh_i64_i32(cpu_VF, flag); 977 978 tcg_gen_mov_i64(dest, result); 979 } 980 981 static void gen_add32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 982 { 983 TCGv_i32 t0_32 = tcg_temp_new_i32(); 984 TCGv_i32 t1_32 = tcg_temp_new_i32(); 985 TCGv_i32 tmp = tcg_temp_new_i32(); 986 987 tcg_gen_movi_i32(tmp, 0); 988 tcg_gen_extrl_i64_i32(t0_32, t0); 989 tcg_gen_extrl_i64_i32(t1_32, t1); 990 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp); 991 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 992 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 993 tcg_gen_xor_i32(tmp, t0_32, t1_32); 994 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); 995 tcg_gen_extu_i32_i64(dest, cpu_NF); 996 } 997 998 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 999 { 1000 if (sf) { 1001 gen_add64_CC(dest, t0, t1); 1002 } else { 1003 gen_add32_CC(dest, t0, t1); 1004 } 1005 } 1006 1007 /* dest = T0 - T1; compute C, N, V and Z flags */ 1008 static void gen_sub64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 1009 { 1010 /* 64 bit arithmetic */ 1011 TCGv_i64 result, flag, tmp; 1012 1013 result = tcg_temp_new_i64(); 1014 flag = tcg_temp_new_i64(); 1015 tcg_gen_sub_i64(result, t0, t1); 1016 1017 gen_set_NZ64(result); 1018 1019 tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1); 1020 tcg_gen_extrl_i64_i32(cpu_CF, flag); 1021 1022 tcg_gen_xor_i64(flag, result, t0); 1023 tmp = tcg_temp_new_i64(); 1024 tcg_gen_xor_i64(tmp, t0, t1); 1025 tcg_gen_and_i64(flag, flag, tmp); 1026 tcg_gen_extrh_i64_i32(cpu_VF, flag); 1027 tcg_gen_mov_i64(dest, result); 1028 } 1029 1030 static void gen_sub32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 1031 { 1032 /* 32 bit arithmetic */ 1033 TCGv_i32 t0_32 = tcg_temp_new_i32(); 1034 TCGv_i32 t1_32 = tcg_temp_new_i32(); 1035 TCGv_i32 tmp; 1036 1037 tcg_gen_extrl_i64_i32(t0_32, t0); 1038 tcg_gen_extrl_i64_i32(t1_32, t1); 1039 tcg_gen_sub_i32(cpu_NF, t0_32, t1_32); 1040 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 1041 tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32); 1042 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 1043 tmp = tcg_temp_new_i32(); 1044 tcg_gen_xor_i32(tmp, t0_32, t1_32); 1045 tcg_gen_and_i32(cpu_VF, cpu_VF, tmp); 1046 tcg_gen_extu_i32_i64(dest, cpu_NF); 1047 } 1048 1049 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 1050 { 1051 if (sf) { 1052 gen_sub64_CC(dest, t0, t1); 1053 } else { 1054 gen_sub32_CC(dest, t0, t1); 1055 } 1056 } 1057 1058 /* dest = T0 + T1 + CF; do not compute flags. */ 1059 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 1060 { 1061 TCGv_i64 flag = tcg_temp_new_i64(); 1062 tcg_gen_extu_i32_i64(flag, cpu_CF); 1063 tcg_gen_add_i64(dest, t0, t1); 1064 tcg_gen_add_i64(dest, dest, flag); 1065 1066 if (!sf) { 1067 tcg_gen_ext32u_i64(dest, dest); 1068 } 1069 } 1070 1071 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */ 1072 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 1073 { 1074 if (sf) { 1075 TCGv_i64 result = tcg_temp_new_i64(); 1076 TCGv_i64 cf_64 = tcg_temp_new_i64(); 1077 TCGv_i64 vf_64 = tcg_temp_new_i64(); 1078 TCGv_i64 tmp = tcg_temp_new_i64(); 1079 1080 tcg_gen_extu_i32_i64(cf_64, cpu_CF); 1081 tcg_gen_addcio_i64(result, cf_64, t0, t1, cf_64); 1082 tcg_gen_extrl_i64_i32(cpu_CF, cf_64); 1083 gen_set_NZ64(result); 1084 1085 tcg_gen_xor_i64(vf_64, result, t0); 1086 tcg_gen_xor_i64(tmp, t0, t1); 1087 tcg_gen_andc_i64(vf_64, vf_64, tmp); 1088 tcg_gen_extrh_i64_i32(cpu_VF, vf_64); 1089 1090 tcg_gen_mov_i64(dest, result); 1091 } else { 1092 TCGv_i32 t0_32 = tcg_temp_new_i32(); 1093 TCGv_i32 t1_32 = tcg_temp_new_i32(); 1094 TCGv_i32 tmp = tcg_temp_new_i32(); 1095 1096 tcg_gen_extrl_i64_i32(t0_32, t0); 1097 tcg_gen_extrl_i64_i32(t1_32, t1); 1098 tcg_gen_addcio_i32(cpu_NF, cpu_CF, t0_32, t1_32, cpu_CF); 1099 1100 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 1101 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 1102 tcg_gen_xor_i32(tmp, t0_32, t1_32); 1103 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); 1104 tcg_gen_extu_i32_i64(dest, cpu_NF); 1105 } 1106 } 1107 1108 /* 1109 * Load/Store generators 1110 */ 1111 1112 /* 1113 * Store from GPR register to memory. 1114 */ 1115 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source, 1116 TCGv_i64 tcg_addr, MemOp memop, int memidx, 1117 bool iss_valid, 1118 unsigned int iss_srt, 1119 bool iss_sf, bool iss_ar) 1120 { 1121 tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop); 1122 1123 if (iss_valid) { 1124 uint32_t syn; 1125 1126 syn = syn_data_abort_with_iss(0, 1127 (memop & MO_SIZE), 1128 false, 1129 iss_srt, 1130 iss_sf, 1131 iss_ar, 1132 0, 0, 0, 0, 0, false); 1133 disas_set_insn_syndrome(s, syn); 1134 } 1135 } 1136 1137 static void do_gpr_st(DisasContext *s, TCGv_i64 source, 1138 TCGv_i64 tcg_addr, MemOp memop, 1139 bool iss_valid, 1140 unsigned int iss_srt, 1141 bool iss_sf, bool iss_ar) 1142 { 1143 do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s), 1144 iss_valid, iss_srt, iss_sf, iss_ar); 1145 } 1146 1147 /* 1148 * Load from memory to GPR register 1149 */ 1150 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, 1151 MemOp memop, bool extend, int memidx, 1152 bool iss_valid, unsigned int iss_srt, 1153 bool iss_sf, bool iss_ar) 1154 { 1155 tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop); 1156 1157 if (extend && (memop & MO_SIGN)) { 1158 g_assert((memop & MO_SIZE) <= MO_32); 1159 tcg_gen_ext32u_i64(dest, dest); 1160 } 1161 1162 if (iss_valid) { 1163 uint32_t syn; 1164 1165 syn = syn_data_abort_with_iss(0, 1166 (memop & MO_SIZE), 1167 (memop & MO_SIGN) != 0, 1168 iss_srt, 1169 iss_sf, 1170 iss_ar, 1171 0, 0, 0, 0, 0, false); 1172 disas_set_insn_syndrome(s, syn); 1173 } 1174 } 1175 1176 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, 1177 MemOp memop, bool extend, 1178 bool iss_valid, unsigned int iss_srt, 1179 bool iss_sf, bool iss_ar) 1180 { 1181 do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s), 1182 iss_valid, iss_srt, iss_sf, iss_ar); 1183 } 1184 1185 /* 1186 * Store from FP register to memory 1187 */ 1188 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, MemOp mop) 1189 { 1190 /* This writes the bottom N bits of a 128 bit wide vector to memory */ 1191 TCGv_i64 tmplo = tcg_temp_new_i64(); 1192 1193 tcg_gen_ld_i64(tmplo, tcg_env, fp_reg_offset(s, srcidx, MO_64)); 1194 1195 if ((mop & MO_SIZE) < MO_128) { 1196 tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop); 1197 } else { 1198 TCGv_i64 tmphi = tcg_temp_new_i64(); 1199 TCGv_i128 t16 = tcg_temp_new_i128(); 1200 1201 tcg_gen_ld_i64(tmphi, tcg_env, fp_reg_hi_offset(s, srcidx)); 1202 tcg_gen_concat_i64_i128(t16, tmplo, tmphi); 1203 1204 tcg_gen_qemu_st_i128(t16, tcg_addr, get_mem_index(s), mop); 1205 } 1206 } 1207 1208 /* 1209 * Load from memory to FP register 1210 */ 1211 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, MemOp mop) 1212 { 1213 /* This always zero-extends and writes to a full 128 bit wide vector */ 1214 TCGv_i64 tmplo = tcg_temp_new_i64(); 1215 TCGv_i64 tmphi = NULL; 1216 1217 if ((mop & MO_SIZE) < MO_128) { 1218 tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop); 1219 } else { 1220 TCGv_i128 t16 = tcg_temp_new_i128(); 1221 1222 tcg_gen_qemu_ld_i128(t16, tcg_addr, get_mem_index(s), mop); 1223 1224 tmphi = tcg_temp_new_i64(); 1225 tcg_gen_extr_i128_i64(tmplo, tmphi, t16); 1226 } 1227 1228 tcg_gen_st_i64(tmplo, tcg_env, fp_reg_offset(s, destidx, MO_64)); 1229 1230 if (tmphi) { 1231 tcg_gen_st_i64(tmphi, tcg_env, fp_reg_hi_offset(s, destidx)); 1232 } 1233 clear_vec_high(s, tmphi != NULL, destidx); 1234 } 1235 1236 /* 1237 * Vector load/store helpers. 1238 * 1239 * The principal difference between this and a FP load is that we don't 1240 * zero extend as we are filling a partial chunk of the vector register. 1241 * These functions don't support 128 bit loads/stores, which would be 1242 * normal load/store operations. 1243 * 1244 * The _i32 versions are useful when operating on 32 bit quantities 1245 * (eg for floating point single or using Neon helper functions). 1246 */ 1247 1248 /* Get value of an element within a vector register */ 1249 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx, 1250 int element, MemOp memop) 1251 { 1252 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE); 1253 switch ((unsigned)memop) { 1254 case MO_8: 1255 tcg_gen_ld8u_i64(tcg_dest, tcg_env, vect_off); 1256 break; 1257 case MO_16: 1258 tcg_gen_ld16u_i64(tcg_dest, tcg_env, vect_off); 1259 break; 1260 case MO_32: 1261 tcg_gen_ld32u_i64(tcg_dest, tcg_env, vect_off); 1262 break; 1263 case MO_8|MO_SIGN: 1264 tcg_gen_ld8s_i64(tcg_dest, tcg_env, vect_off); 1265 break; 1266 case MO_16|MO_SIGN: 1267 tcg_gen_ld16s_i64(tcg_dest, tcg_env, vect_off); 1268 break; 1269 case MO_32|MO_SIGN: 1270 tcg_gen_ld32s_i64(tcg_dest, tcg_env, vect_off); 1271 break; 1272 case MO_64: 1273 case MO_64|MO_SIGN: 1274 tcg_gen_ld_i64(tcg_dest, tcg_env, vect_off); 1275 break; 1276 default: 1277 g_assert_not_reached(); 1278 } 1279 } 1280 1281 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx, 1282 int element, MemOp memop) 1283 { 1284 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE); 1285 switch (memop) { 1286 case MO_8: 1287 tcg_gen_ld8u_i32(tcg_dest, tcg_env, vect_off); 1288 break; 1289 case MO_16: 1290 tcg_gen_ld16u_i32(tcg_dest, tcg_env, vect_off); 1291 break; 1292 case MO_8|MO_SIGN: 1293 tcg_gen_ld8s_i32(tcg_dest, tcg_env, vect_off); 1294 break; 1295 case MO_16|MO_SIGN: 1296 tcg_gen_ld16s_i32(tcg_dest, tcg_env, vect_off); 1297 break; 1298 case MO_32: 1299 case MO_32|MO_SIGN: 1300 tcg_gen_ld_i32(tcg_dest, tcg_env, vect_off); 1301 break; 1302 default: 1303 g_assert_not_reached(); 1304 } 1305 } 1306 1307 /* Set value of an element within a vector register */ 1308 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx, 1309 int element, MemOp memop) 1310 { 1311 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE); 1312 switch (memop) { 1313 case MO_8: 1314 tcg_gen_st8_i64(tcg_src, tcg_env, vect_off); 1315 break; 1316 case MO_16: 1317 tcg_gen_st16_i64(tcg_src, tcg_env, vect_off); 1318 break; 1319 case MO_32: 1320 tcg_gen_st32_i64(tcg_src, tcg_env, vect_off); 1321 break; 1322 case MO_64: 1323 tcg_gen_st_i64(tcg_src, tcg_env, vect_off); 1324 break; 1325 default: 1326 g_assert_not_reached(); 1327 } 1328 } 1329 1330 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src, 1331 int destidx, int element, MemOp memop) 1332 { 1333 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE); 1334 switch (memop) { 1335 case MO_8: 1336 tcg_gen_st8_i32(tcg_src, tcg_env, vect_off); 1337 break; 1338 case MO_16: 1339 tcg_gen_st16_i32(tcg_src, tcg_env, vect_off); 1340 break; 1341 case MO_32: 1342 tcg_gen_st_i32(tcg_src, tcg_env, vect_off); 1343 break; 1344 default: 1345 g_assert_not_reached(); 1346 } 1347 } 1348 1349 /* Store from vector register to memory */ 1350 static void do_vec_st(DisasContext *s, int srcidx, int element, 1351 TCGv_i64 tcg_addr, MemOp mop) 1352 { 1353 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 1354 1355 read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE); 1356 tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop); 1357 } 1358 1359 /* Load from memory to vector register */ 1360 static void do_vec_ld(DisasContext *s, int destidx, int element, 1361 TCGv_i64 tcg_addr, MemOp mop) 1362 { 1363 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 1364 1365 tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop); 1366 write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE); 1367 } 1368 1369 /* Check that FP/Neon access is enabled. If it is, return 1370 * true. If not, emit code to generate an appropriate exception, 1371 * and return false; the caller should not emit any code for 1372 * the instruction. Note that this check must happen after all 1373 * unallocated-encoding checks (otherwise the syndrome information 1374 * for the resulting exception will be incorrect). 1375 */ 1376 static bool fp_access_check_only(DisasContext *s) 1377 { 1378 if (s->fp_excp_el) { 1379 assert(!s->fp_access_checked); 1380 s->fp_access_checked = -1; 1381 1382 gen_exception_insn_el(s, 0, EXCP_UDEF, 1383 syn_fp_access_trap(1, 0xe, false, 0), 1384 s->fp_excp_el); 1385 return false; 1386 } 1387 s->fp_access_checked = 1; 1388 return true; 1389 } 1390 1391 static bool fp_access_check(DisasContext *s) 1392 { 1393 if (!fp_access_check_only(s)) { 1394 return false; 1395 } 1396 if (s->sme_trap_nonstreaming && s->is_nonstreaming) { 1397 gen_exception_insn(s, 0, EXCP_UDEF, 1398 syn_smetrap(SME_ET_Streaming, false)); 1399 return false; 1400 } 1401 return true; 1402 } 1403 1404 /* 1405 * Return <0 for non-supported element sizes, with MO_16 controlled by 1406 * FEAT_FP16; return 0 for fp disabled; otherwise return >0 for success. 1407 */ 1408 static int fp_access_check_scalar_hsd(DisasContext *s, MemOp esz) 1409 { 1410 switch (esz) { 1411 case MO_64: 1412 case MO_32: 1413 break; 1414 case MO_16: 1415 if (!dc_isar_feature(aa64_fp16, s)) { 1416 return -1; 1417 } 1418 break; 1419 default: 1420 return -1; 1421 } 1422 return fp_access_check(s); 1423 } 1424 1425 /* Likewise, but vector MO_64 must have two elements. */ 1426 static int fp_access_check_vector_hsd(DisasContext *s, bool is_q, MemOp esz) 1427 { 1428 switch (esz) { 1429 case MO_64: 1430 if (!is_q) { 1431 return -1; 1432 } 1433 break; 1434 case MO_32: 1435 break; 1436 case MO_16: 1437 if (!dc_isar_feature(aa64_fp16, s)) { 1438 return -1; 1439 } 1440 break; 1441 default: 1442 return -1; 1443 } 1444 return fp_access_check(s); 1445 } 1446 1447 /* 1448 * Check that SVE access is enabled. If it is, return true. 1449 * If not, emit code to generate an appropriate exception and return false. 1450 * This function corresponds to CheckSVEEnabled(). 1451 */ 1452 bool sve_access_check(DisasContext *s) 1453 { 1454 if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) { 1455 bool ret; 1456 1457 assert(dc_isar_feature(aa64_sme, s)); 1458 ret = sme_sm_enabled_check(s); 1459 s->sve_access_checked = (ret ? 1 : -1); 1460 return ret; 1461 } 1462 if (s->sve_excp_el) { 1463 /* Assert that we only raise one exception per instruction. */ 1464 assert(!s->sve_access_checked); 1465 gen_exception_insn_el(s, 0, EXCP_UDEF, 1466 syn_sve_access_trap(), s->sve_excp_el); 1467 s->sve_access_checked = -1; 1468 return false; 1469 } 1470 s->sve_access_checked = 1; 1471 return fp_access_check(s); 1472 } 1473 1474 /* 1475 * Check that SME access is enabled, raise an exception if not. 1476 * Note that this function corresponds to CheckSMEAccess and is 1477 * only used directly for cpregs. 1478 */ 1479 static bool sme_access_check(DisasContext *s) 1480 { 1481 if (s->sme_excp_el) { 1482 gen_exception_insn_el(s, 0, EXCP_UDEF, 1483 syn_smetrap(SME_ET_AccessTrap, false), 1484 s->sme_excp_el); 1485 return false; 1486 } 1487 return true; 1488 } 1489 1490 /* This function corresponds to CheckSMEEnabled. */ 1491 bool sme_enabled_check(DisasContext *s) 1492 { 1493 /* 1494 * Note that unlike sve_excp_el, we have not constrained sme_excp_el 1495 * to be zero when fp_excp_el has priority. This is because we need 1496 * sme_excp_el by itself for cpregs access checks. 1497 */ 1498 if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) { 1499 bool ret = sme_access_check(s); 1500 s->fp_access_checked = (ret ? 1 : -1); 1501 return ret; 1502 } 1503 return fp_access_check_only(s); 1504 } 1505 1506 /* Common subroutine for CheckSMEAnd*Enabled. */ 1507 bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req) 1508 { 1509 if (!sme_enabled_check(s)) { 1510 return false; 1511 } 1512 if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) { 1513 gen_exception_insn(s, 0, EXCP_UDEF, 1514 syn_smetrap(SME_ET_NotStreaming, false)); 1515 return false; 1516 } 1517 if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) { 1518 gen_exception_insn(s, 0, EXCP_UDEF, 1519 syn_smetrap(SME_ET_InactiveZA, false)); 1520 return false; 1521 } 1522 return true; 1523 } 1524 1525 /* 1526 * Expanders for AdvSIMD translation functions. 1527 */ 1528 1529 static bool do_gvec_op2_ool(DisasContext *s, arg_qrr_e *a, int data, 1530 gen_helper_gvec_2 *fn) 1531 { 1532 if (!a->q && a->esz == MO_64) { 1533 return false; 1534 } 1535 if (fp_access_check(s)) { 1536 gen_gvec_op2_ool(s, a->q, a->rd, a->rn, data, fn); 1537 } 1538 return true; 1539 } 1540 1541 static bool do_gvec_op3_ool(DisasContext *s, arg_qrrr_e *a, int data, 1542 gen_helper_gvec_3 *fn) 1543 { 1544 if (!a->q && a->esz == MO_64) { 1545 return false; 1546 } 1547 if (fp_access_check(s)) { 1548 gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, data, fn); 1549 } 1550 return true; 1551 } 1552 1553 static bool do_gvec_fn3(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn) 1554 { 1555 if (!a->q && a->esz == MO_64) { 1556 return false; 1557 } 1558 if (fp_access_check(s)) { 1559 gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz); 1560 } 1561 return true; 1562 } 1563 1564 static bool do_gvec_fn3_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn) 1565 { 1566 if (a->esz == MO_64) { 1567 return false; 1568 } 1569 if (fp_access_check(s)) { 1570 gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz); 1571 } 1572 return true; 1573 } 1574 1575 static bool do_gvec_fn3_no8_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn) 1576 { 1577 if (a->esz == MO_8) { 1578 return false; 1579 } 1580 return do_gvec_fn3_no64(s, a, fn); 1581 } 1582 1583 static bool do_gvec_fn4(DisasContext *s, arg_qrrrr_e *a, GVecGen4Fn *fn) 1584 { 1585 if (!a->q && a->esz == MO_64) { 1586 return false; 1587 } 1588 if (fp_access_check(s)) { 1589 gen_gvec_fn4(s, a->q, a->rd, a->rn, a->rm, a->ra, fn, a->esz); 1590 } 1591 return true; 1592 } 1593 1594 /* 1595 * This utility function is for doing register extension with an 1596 * optional shift. You will likely want to pass a temporary for the 1597 * destination register. See DecodeRegExtend() in the ARM ARM. 1598 */ 1599 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in, 1600 int option, unsigned int shift) 1601 { 1602 int extsize = extract32(option, 0, 2); 1603 bool is_signed = extract32(option, 2, 1); 1604 1605 tcg_gen_ext_i64(tcg_out, tcg_in, extsize | (is_signed ? MO_SIGN : 0)); 1606 tcg_gen_shli_i64(tcg_out, tcg_out, shift); 1607 } 1608 1609 static inline void gen_check_sp_alignment(DisasContext *s) 1610 { 1611 /* The AArch64 architecture mandates that (if enabled via PSTATE 1612 * or SCTLR bits) there is a check that SP is 16-aligned on every 1613 * SP-relative load or store (with an exception generated if it is not). 1614 * In line with general QEMU practice regarding misaligned accesses, 1615 * we omit these checks for the sake of guest program performance. 1616 * This function is provided as a hook so we can more easily add these 1617 * checks in future (possibly as a "favour catching guest program bugs 1618 * over speed" user selectable option). 1619 */ 1620 } 1621 1622 /* 1623 * The instruction disassembly implemented here matches 1624 * the instruction encoding classifications in chapter C4 1625 * of the ARM Architecture Reference Manual (DDI0487B_a); 1626 * classification names and decode diagrams here should generally 1627 * match up with those in the manual. 1628 */ 1629 1630 static bool trans_B(DisasContext *s, arg_i *a) 1631 { 1632 reset_btype(s); 1633 gen_goto_tb(s, 0, a->imm); 1634 return true; 1635 } 1636 1637 static bool trans_BL(DisasContext *s, arg_i *a) 1638 { 1639 gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s)); 1640 reset_btype(s); 1641 gen_goto_tb(s, 0, a->imm); 1642 return true; 1643 } 1644 1645 1646 static bool trans_CBZ(DisasContext *s, arg_cbz *a) 1647 { 1648 DisasLabel match; 1649 TCGv_i64 tcg_cmp; 1650 1651 tcg_cmp = read_cpu_reg(s, a->rt, a->sf); 1652 reset_btype(s); 1653 1654 match = gen_disas_label(s); 1655 tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ, 1656 tcg_cmp, 0, match.label); 1657 gen_goto_tb(s, 0, 4); 1658 set_disas_label(s, match); 1659 gen_goto_tb(s, 1, a->imm); 1660 return true; 1661 } 1662 1663 static bool trans_TBZ(DisasContext *s, arg_tbz *a) 1664 { 1665 DisasLabel match; 1666 TCGv_i64 tcg_cmp; 1667 1668 tcg_cmp = tcg_temp_new_i64(); 1669 tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, a->rt), 1ULL << a->bitpos); 1670 1671 reset_btype(s); 1672 1673 match = gen_disas_label(s); 1674 tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ, 1675 tcg_cmp, 0, match.label); 1676 gen_goto_tb(s, 0, 4); 1677 set_disas_label(s, match); 1678 gen_goto_tb(s, 1, a->imm); 1679 return true; 1680 } 1681 1682 static bool trans_B_cond(DisasContext *s, arg_B_cond *a) 1683 { 1684 /* BC.cond is only present with FEAT_HBC */ 1685 if (a->c && !dc_isar_feature(aa64_hbc, s)) { 1686 return false; 1687 } 1688 reset_btype(s); 1689 if (a->cond < 0x0e) { 1690 /* genuinely conditional branches */ 1691 DisasLabel match = gen_disas_label(s); 1692 arm_gen_test_cc(a->cond, match.label); 1693 gen_goto_tb(s, 0, 4); 1694 set_disas_label(s, match); 1695 gen_goto_tb(s, 1, a->imm); 1696 } else { 1697 /* 0xe and 0xf are both "always" conditions */ 1698 gen_goto_tb(s, 0, a->imm); 1699 } 1700 return true; 1701 } 1702 1703 static void set_btype_for_br(DisasContext *s, int rn) 1704 { 1705 if (dc_isar_feature(aa64_bti, s)) { 1706 /* BR to {x16,x17} or !guard -> 1, else 3. */ 1707 if (rn == 16 || rn == 17) { 1708 set_btype(s, 1); 1709 } else { 1710 TCGv_i64 pc = tcg_temp_new_i64(); 1711 gen_pc_plus_diff(s, pc, 0); 1712 gen_helper_guarded_page_br(tcg_env, pc); 1713 s->btype = -1; 1714 } 1715 } 1716 } 1717 1718 static void set_btype_for_blr(DisasContext *s) 1719 { 1720 if (dc_isar_feature(aa64_bti, s)) { 1721 /* BLR sets BTYPE to 2, regardless of source guarded page. */ 1722 set_btype(s, 2); 1723 } 1724 } 1725 1726 static bool trans_BR(DisasContext *s, arg_r *a) 1727 { 1728 set_btype_for_br(s, a->rn); 1729 gen_a64_set_pc(s, cpu_reg(s, a->rn)); 1730 s->base.is_jmp = DISAS_JUMP; 1731 return true; 1732 } 1733 1734 static bool trans_BLR(DisasContext *s, arg_r *a) 1735 { 1736 TCGv_i64 dst = cpu_reg(s, a->rn); 1737 TCGv_i64 lr = cpu_reg(s, 30); 1738 if (dst == lr) { 1739 TCGv_i64 tmp = tcg_temp_new_i64(); 1740 tcg_gen_mov_i64(tmp, dst); 1741 dst = tmp; 1742 } 1743 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 1744 gen_a64_set_pc(s, dst); 1745 set_btype_for_blr(s); 1746 s->base.is_jmp = DISAS_JUMP; 1747 return true; 1748 } 1749 1750 static bool trans_RET(DisasContext *s, arg_r *a) 1751 { 1752 gen_a64_set_pc(s, cpu_reg(s, a->rn)); 1753 s->base.is_jmp = DISAS_JUMP; 1754 return true; 1755 } 1756 1757 static TCGv_i64 auth_branch_target(DisasContext *s, TCGv_i64 dst, 1758 TCGv_i64 modifier, bool use_key_a) 1759 { 1760 TCGv_i64 truedst; 1761 /* 1762 * Return the branch target for a BRAA/RETA/etc, which is either 1763 * just the destination dst, or that value with the pauth check 1764 * done and the code removed from the high bits. 1765 */ 1766 if (!s->pauth_active) { 1767 return dst; 1768 } 1769 1770 truedst = tcg_temp_new_i64(); 1771 if (use_key_a) { 1772 gen_helper_autia_combined(truedst, tcg_env, dst, modifier); 1773 } else { 1774 gen_helper_autib_combined(truedst, tcg_env, dst, modifier); 1775 } 1776 return truedst; 1777 } 1778 1779 static bool trans_BRAZ(DisasContext *s, arg_braz *a) 1780 { 1781 TCGv_i64 dst; 1782 1783 if (!dc_isar_feature(aa64_pauth, s)) { 1784 return false; 1785 } 1786 1787 dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m); 1788 set_btype_for_br(s, a->rn); 1789 gen_a64_set_pc(s, dst); 1790 s->base.is_jmp = DISAS_JUMP; 1791 return true; 1792 } 1793 1794 static bool trans_BLRAZ(DisasContext *s, arg_braz *a) 1795 { 1796 TCGv_i64 dst, lr; 1797 1798 if (!dc_isar_feature(aa64_pauth, s)) { 1799 return false; 1800 } 1801 1802 dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m); 1803 lr = cpu_reg(s, 30); 1804 if (dst == lr) { 1805 TCGv_i64 tmp = tcg_temp_new_i64(); 1806 tcg_gen_mov_i64(tmp, dst); 1807 dst = tmp; 1808 } 1809 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 1810 gen_a64_set_pc(s, dst); 1811 set_btype_for_blr(s); 1812 s->base.is_jmp = DISAS_JUMP; 1813 return true; 1814 } 1815 1816 static bool trans_RETA(DisasContext *s, arg_reta *a) 1817 { 1818 TCGv_i64 dst; 1819 1820 dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m); 1821 gen_a64_set_pc(s, dst); 1822 s->base.is_jmp = DISAS_JUMP; 1823 return true; 1824 } 1825 1826 static bool trans_BRA(DisasContext *s, arg_bra *a) 1827 { 1828 TCGv_i64 dst; 1829 1830 if (!dc_isar_feature(aa64_pauth, s)) { 1831 return false; 1832 } 1833 dst = auth_branch_target(s, cpu_reg(s,a->rn), cpu_reg_sp(s, a->rm), !a->m); 1834 gen_a64_set_pc(s, dst); 1835 set_btype_for_br(s, a->rn); 1836 s->base.is_jmp = DISAS_JUMP; 1837 return true; 1838 } 1839 1840 static bool trans_BLRA(DisasContext *s, arg_bra *a) 1841 { 1842 TCGv_i64 dst, lr; 1843 1844 if (!dc_isar_feature(aa64_pauth, s)) { 1845 return false; 1846 } 1847 dst = auth_branch_target(s, cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm), !a->m); 1848 lr = cpu_reg(s, 30); 1849 if (dst == lr) { 1850 TCGv_i64 tmp = tcg_temp_new_i64(); 1851 tcg_gen_mov_i64(tmp, dst); 1852 dst = tmp; 1853 } 1854 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 1855 gen_a64_set_pc(s, dst); 1856 set_btype_for_blr(s); 1857 s->base.is_jmp = DISAS_JUMP; 1858 return true; 1859 } 1860 1861 static bool trans_ERET(DisasContext *s, arg_ERET *a) 1862 { 1863 TCGv_i64 dst; 1864 1865 if (s->current_el == 0) { 1866 return false; 1867 } 1868 if (s->trap_eret) { 1869 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(0), 2); 1870 return true; 1871 } 1872 dst = tcg_temp_new_i64(); 1873 tcg_gen_ld_i64(dst, tcg_env, 1874 offsetof(CPUARMState, elr_el[s->current_el])); 1875 1876 translator_io_start(&s->base); 1877 1878 gen_helper_exception_return(tcg_env, dst); 1879 /* Must exit loop to check un-masked IRQs */ 1880 s->base.is_jmp = DISAS_EXIT; 1881 return true; 1882 } 1883 1884 static bool trans_ERETA(DisasContext *s, arg_reta *a) 1885 { 1886 TCGv_i64 dst; 1887 1888 if (!dc_isar_feature(aa64_pauth, s)) { 1889 return false; 1890 } 1891 if (s->current_el == 0) { 1892 return false; 1893 } 1894 /* The FGT trap takes precedence over an auth trap. */ 1895 if (s->trap_eret) { 1896 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(a->m ? 3 : 2), 2); 1897 return true; 1898 } 1899 dst = tcg_temp_new_i64(); 1900 tcg_gen_ld_i64(dst, tcg_env, 1901 offsetof(CPUARMState, elr_el[s->current_el])); 1902 1903 dst = auth_branch_target(s, dst, cpu_X[31], !a->m); 1904 1905 translator_io_start(&s->base); 1906 1907 gen_helper_exception_return(tcg_env, dst); 1908 /* Must exit loop to check un-masked IRQs */ 1909 s->base.is_jmp = DISAS_EXIT; 1910 return true; 1911 } 1912 1913 static bool trans_NOP(DisasContext *s, arg_NOP *a) 1914 { 1915 return true; 1916 } 1917 1918 static bool trans_YIELD(DisasContext *s, arg_YIELD *a) 1919 { 1920 /* 1921 * When running in MTTCG we don't generate jumps to the yield and 1922 * WFE helpers as it won't affect the scheduling of other vCPUs. 1923 * If we wanted to more completely model WFE/SEV so we don't busy 1924 * spin unnecessarily we would need to do something more involved. 1925 */ 1926 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1927 s->base.is_jmp = DISAS_YIELD; 1928 } 1929 return true; 1930 } 1931 1932 static bool trans_WFI(DisasContext *s, arg_WFI *a) 1933 { 1934 s->base.is_jmp = DISAS_WFI; 1935 return true; 1936 } 1937 1938 static bool trans_WFE(DisasContext *s, arg_WFI *a) 1939 { 1940 /* 1941 * When running in MTTCG we don't generate jumps to the yield and 1942 * WFE helpers as it won't affect the scheduling of other vCPUs. 1943 * If we wanted to more completely model WFE/SEV so we don't busy 1944 * spin unnecessarily we would need to do something more involved. 1945 */ 1946 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1947 s->base.is_jmp = DISAS_WFE; 1948 } 1949 return true; 1950 } 1951 1952 static bool trans_WFIT(DisasContext *s, arg_WFIT *a) 1953 { 1954 if (!dc_isar_feature(aa64_wfxt, s)) { 1955 return false; 1956 } 1957 1958 /* 1959 * Because we need to pass the register value to the helper, 1960 * it's easier to emit the code now, unlike trans_WFI which 1961 * defers it to aarch64_tr_tb_stop(). That means we need to 1962 * check ss_active so that single-stepping a WFIT doesn't halt. 1963 */ 1964 if (s->ss_active) { 1965 /* Act like a NOP under architectural singlestep */ 1966 return true; 1967 } 1968 1969 gen_a64_update_pc(s, 4); 1970 gen_helper_wfit(tcg_env, cpu_reg(s, a->rd)); 1971 /* Go back to the main loop to check for interrupts */ 1972 s->base.is_jmp = DISAS_EXIT; 1973 return true; 1974 } 1975 1976 static bool trans_WFET(DisasContext *s, arg_WFET *a) 1977 { 1978 if (!dc_isar_feature(aa64_wfxt, s)) { 1979 return false; 1980 } 1981 1982 /* 1983 * We rely here on our WFE implementation being a NOP, so we 1984 * don't need to do anything different to handle the WFET timeout 1985 * from what trans_WFE does. 1986 */ 1987 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1988 s->base.is_jmp = DISAS_WFE; 1989 } 1990 return true; 1991 } 1992 1993 static bool trans_XPACLRI(DisasContext *s, arg_XPACLRI *a) 1994 { 1995 if (s->pauth_active) { 1996 gen_helper_xpaci(cpu_X[30], tcg_env, cpu_X[30]); 1997 } 1998 return true; 1999 } 2000 2001 static bool trans_PACIA1716(DisasContext *s, arg_PACIA1716 *a) 2002 { 2003 if (s->pauth_active) { 2004 gen_helper_pacia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 2005 } 2006 return true; 2007 } 2008 2009 static bool trans_PACIB1716(DisasContext *s, arg_PACIB1716 *a) 2010 { 2011 if (s->pauth_active) { 2012 gen_helper_pacib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 2013 } 2014 return true; 2015 } 2016 2017 static bool trans_AUTIA1716(DisasContext *s, arg_AUTIA1716 *a) 2018 { 2019 if (s->pauth_active) { 2020 gen_helper_autia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 2021 } 2022 return true; 2023 } 2024 2025 static bool trans_AUTIB1716(DisasContext *s, arg_AUTIB1716 *a) 2026 { 2027 if (s->pauth_active) { 2028 gen_helper_autib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 2029 } 2030 return true; 2031 } 2032 2033 static bool trans_ESB(DisasContext *s, arg_ESB *a) 2034 { 2035 /* Without RAS, we must implement this as NOP. */ 2036 if (dc_isar_feature(aa64_ras, s)) { 2037 /* 2038 * QEMU does not have a source of physical SErrors, 2039 * so we are only concerned with virtual SErrors. 2040 * The pseudocode in the ARM for this case is 2041 * if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then 2042 * AArch64.vESBOperation(); 2043 * Most of the condition can be evaluated at translation time. 2044 * Test for EL2 present, and defer test for SEL2 to runtime. 2045 */ 2046 if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) { 2047 gen_helper_vesb(tcg_env); 2048 } 2049 } 2050 return true; 2051 } 2052 2053 static bool trans_PACIAZ(DisasContext *s, arg_PACIAZ *a) 2054 { 2055 if (s->pauth_active) { 2056 gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 2057 } 2058 return true; 2059 } 2060 2061 static bool trans_PACIASP(DisasContext *s, arg_PACIASP *a) 2062 { 2063 if (s->pauth_active) { 2064 gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 2065 } 2066 return true; 2067 } 2068 2069 static bool trans_PACIBZ(DisasContext *s, arg_PACIBZ *a) 2070 { 2071 if (s->pauth_active) { 2072 gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 2073 } 2074 return true; 2075 } 2076 2077 static bool trans_PACIBSP(DisasContext *s, arg_PACIBSP *a) 2078 { 2079 if (s->pauth_active) { 2080 gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 2081 } 2082 return true; 2083 } 2084 2085 static bool trans_AUTIAZ(DisasContext *s, arg_AUTIAZ *a) 2086 { 2087 if (s->pauth_active) { 2088 gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 2089 } 2090 return true; 2091 } 2092 2093 static bool trans_AUTIASP(DisasContext *s, arg_AUTIASP *a) 2094 { 2095 if (s->pauth_active) { 2096 gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 2097 } 2098 return true; 2099 } 2100 2101 static bool trans_AUTIBZ(DisasContext *s, arg_AUTIBZ *a) 2102 { 2103 if (s->pauth_active) { 2104 gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 2105 } 2106 return true; 2107 } 2108 2109 static bool trans_AUTIBSP(DisasContext *s, arg_AUTIBSP *a) 2110 { 2111 if (s->pauth_active) { 2112 gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 2113 } 2114 return true; 2115 } 2116 2117 static bool trans_CLREX(DisasContext *s, arg_CLREX *a) 2118 { 2119 tcg_gen_movi_i64(cpu_exclusive_addr, -1); 2120 return true; 2121 } 2122 2123 static bool trans_DSB_DMB(DisasContext *s, arg_DSB_DMB *a) 2124 { 2125 /* We handle DSB and DMB the same way */ 2126 TCGBar bar; 2127 2128 switch (a->types) { 2129 case 1: /* MBReqTypes_Reads */ 2130 bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST; 2131 break; 2132 case 2: /* MBReqTypes_Writes */ 2133 bar = TCG_BAR_SC | TCG_MO_ST_ST; 2134 break; 2135 default: /* MBReqTypes_All */ 2136 bar = TCG_BAR_SC | TCG_MO_ALL; 2137 break; 2138 } 2139 tcg_gen_mb(bar); 2140 return true; 2141 } 2142 2143 static bool trans_DSB_nXS(DisasContext *s, arg_DSB_nXS *a) 2144 { 2145 if (!dc_isar_feature(aa64_xs, s)) { 2146 return false; 2147 } 2148 tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL); 2149 return true; 2150 } 2151 2152 static bool trans_ISB(DisasContext *s, arg_ISB *a) 2153 { 2154 /* 2155 * We need to break the TB after this insn to execute 2156 * self-modifying code correctly and also to take 2157 * any pending interrupts immediately. 2158 */ 2159 reset_btype(s); 2160 gen_goto_tb(s, 0, 4); 2161 return true; 2162 } 2163 2164 static bool trans_SB(DisasContext *s, arg_SB *a) 2165 { 2166 if (!dc_isar_feature(aa64_sb, s)) { 2167 return false; 2168 } 2169 /* 2170 * TODO: There is no speculation barrier opcode for TCG; 2171 * MB and end the TB instead. 2172 */ 2173 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC); 2174 gen_goto_tb(s, 0, 4); 2175 return true; 2176 } 2177 2178 static bool trans_CFINV(DisasContext *s, arg_CFINV *a) 2179 { 2180 if (!dc_isar_feature(aa64_condm_4, s)) { 2181 return false; 2182 } 2183 tcg_gen_xori_i32(cpu_CF, cpu_CF, 1); 2184 return true; 2185 } 2186 2187 static bool trans_XAFLAG(DisasContext *s, arg_XAFLAG *a) 2188 { 2189 TCGv_i32 z; 2190 2191 if (!dc_isar_feature(aa64_condm_5, s)) { 2192 return false; 2193 } 2194 2195 z = tcg_temp_new_i32(); 2196 2197 tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0); 2198 2199 /* 2200 * (!C & !Z) << 31 2201 * (!(C | Z)) << 31 2202 * ~((C | Z) << 31) 2203 * ~-(C | Z) 2204 * (C | Z) - 1 2205 */ 2206 tcg_gen_or_i32(cpu_NF, cpu_CF, z); 2207 tcg_gen_subi_i32(cpu_NF, cpu_NF, 1); 2208 2209 /* !(Z & C) */ 2210 tcg_gen_and_i32(cpu_ZF, z, cpu_CF); 2211 tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1); 2212 2213 /* (!C & Z) << 31 -> -(Z & ~C) */ 2214 tcg_gen_andc_i32(cpu_VF, z, cpu_CF); 2215 tcg_gen_neg_i32(cpu_VF, cpu_VF); 2216 2217 /* C | Z */ 2218 tcg_gen_or_i32(cpu_CF, cpu_CF, z); 2219 2220 return true; 2221 } 2222 2223 static bool trans_AXFLAG(DisasContext *s, arg_AXFLAG *a) 2224 { 2225 if (!dc_isar_feature(aa64_condm_5, s)) { 2226 return false; 2227 } 2228 2229 tcg_gen_sari_i32(cpu_VF, cpu_VF, 31); /* V ? -1 : 0 */ 2230 tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF); /* C & !V */ 2231 2232 /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */ 2233 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF); 2234 2235 tcg_gen_movi_i32(cpu_NF, 0); 2236 tcg_gen_movi_i32(cpu_VF, 0); 2237 2238 return true; 2239 } 2240 2241 static bool trans_MSR_i_UAO(DisasContext *s, arg_i *a) 2242 { 2243 if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) { 2244 return false; 2245 } 2246 if (a->imm & 1) { 2247 set_pstate_bits(PSTATE_UAO); 2248 } else { 2249 clear_pstate_bits(PSTATE_UAO); 2250 } 2251 gen_rebuild_hflags(s); 2252 s->base.is_jmp = DISAS_TOO_MANY; 2253 return true; 2254 } 2255 2256 static bool trans_MSR_i_PAN(DisasContext *s, arg_i *a) 2257 { 2258 if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) { 2259 return false; 2260 } 2261 if (a->imm & 1) { 2262 set_pstate_bits(PSTATE_PAN); 2263 } else { 2264 clear_pstate_bits(PSTATE_PAN); 2265 } 2266 gen_rebuild_hflags(s); 2267 s->base.is_jmp = DISAS_TOO_MANY; 2268 return true; 2269 } 2270 2271 static bool trans_MSR_i_SPSEL(DisasContext *s, arg_i *a) 2272 { 2273 if (s->current_el == 0) { 2274 return false; 2275 } 2276 gen_helper_msr_i_spsel(tcg_env, tcg_constant_i32(a->imm & PSTATE_SP)); 2277 s->base.is_jmp = DISAS_TOO_MANY; 2278 return true; 2279 } 2280 2281 static bool trans_MSR_i_SBSS(DisasContext *s, arg_i *a) 2282 { 2283 if (!dc_isar_feature(aa64_ssbs, s)) { 2284 return false; 2285 } 2286 if (a->imm & 1) { 2287 set_pstate_bits(PSTATE_SSBS); 2288 } else { 2289 clear_pstate_bits(PSTATE_SSBS); 2290 } 2291 /* Don't need to rebuild hflags since SSBS is a nop */ 2292 s->base.is_jmp = DISAS_TOO_MANY; 2293 return true; 2294 } 2295 2296 static bool trans_MSR_i_DIT(DisasContext *s, arg_i *a) 2297 { 2298 if (!dc_isar_feature(aa64_dit, s)) { 2299 return false; 2300 } 2301 if (a->imm & 1) { 2302 set_pstate_bits(PSTATE_DIT); 2303 } else { 2304 clear_pstate_bits(PSTATE_DIT); 2305 } 2306 /* There's no need to rebuild hflags because DIT is a nop */ 2307 s->base.is_jmp = DISAS_TOO_MANY; 2308 return true; 2309 } 2310 2311 static bool trans_MSR_i_TCO(DisasContext *s, arg_i *a) 2312 { 2313 if (dc_isar_feature(aa64_mte, s)) { 2314 /* Full MTE is enabled -- set the TCO bit as directed. */ 2315 if (a->imm & 1) { 2316 set_pstate_bits(PSTATE_TCO); 2317 } else { 2318 clear_pstate_bits(PSTATE_TCO); 2319 } 2320 gen_rebuild_hflags(s); 2321 /* Many factors, including TCO, go into MTE_ACTIVE. */ 2322 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 2323 return true; 2324 } else if (dc_isar_feature(aa64_mte_insn_reg, s)) { 2325 /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI. */ 2326 return true; 2327 } else { 2328 /* Insn not present */ 2329 return false; 2330 } 2331 } 2332 2333 static bool trans_MSR_i_DAIFSET(DisasContext *s, arg_i *a) 2334 { 2335 gen_helper_msr_i_daifset(tcg_env, tcg_constant_i32(a->imm)); 2336 s->base.is_jmp = DISAS_TOO_MANY; 2337 return true; 2338 } 2339 2340 static bool trans_MSR_i_DAIFCLEAR(DisasContext *s, arg_i *a) 2341 { 2342 gen_helper_msr_i_daifclear(tcg_env, tcg_constant_i32(a->imm)); 2343 /* Exit the cpu loop to re-evaluate pending IRQs. */ 2344 s->base.is_jmp = DISAS_UPDATE_EXIT; 2345 return true; 2346 } 2347 2348 static bool trans_MSR_i_ALLINT(DisasContext *s, arg_i *a) 2349 { 2350 if (!dc_isar_feature(aa64_nmi, s) || s->current_el == 0) { 2351 return false; 2352 } 2353 2354 if (a->imm == 0) { 2355 clear_pstate_bits(PSTATE_ALLINT); 2356 } else if (s->current_el > 1) { 2357 set_pstate_bits(PSTATE_ALLINT); 2358 } else { 2359 gen_helper_msr_set_allint_el1(tcg_env); 2360 } 2361 2362 /* Exit the cpu loop to re-evaluate pending IRQs. */ 2363 s->base.is_jmp = DISAS_UPDATE_EXIT; 2364 return true; 2365 } 2366 2367 static bool trans_MSR_i_SVCR(DisasContext *s, arg_MSR_i_SVCR *a) 2368 { 2369 if (!dc_isar_feature(aa64_sme, s) || a->mask == 0) { 2370 return false; 2371 } 2372 if (sme_access_check(s)) { 2373 int old = s->pstate_sm | (s->pstate_za << 1); 2374 int new = a->imm * 3; 2375 2376 if ((old ^ new) & a->mask) { 2377 /* At least one bit changes. */ 2378 gen_helper_set_svcr(tcg_env, tcg_constant_i32(new), 2379 tcg_constant_i32(a->mask)); 2380 s->base.is_jmp = DISAS_TOO_MANY; 2381 } 2382 } 2383 return true; 2384 } 2385 2386 static void gen_get_nzcv(TCGv_i64 tcg_rt) 2387 { 2388 TCGv_i32 tmp = tcg_temp_new_i32(); 2389 TCGv_i32 nzcv = tcg_temp_new_i32(); 2390 2391 /* build bit 31, N */ 2392 tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31)); 2393 /* build bit 30, Z */ 2394 tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0); 2395 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1); 2396 /* build bit 29, C */ 2397 tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1); 2398 /* build bit 28, V */ 2399 tcg_gen_shri_i32(tmp, cpu_VF, 31); 2400 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1); 2401 /* generate result */ 2402 tcg_gen_extu_i32_i64(tcg_rt, nzcv); 2403 } 2404 2405 static void gen_set_nzcv(TCGv_i64 tcg_rt) 2406 { 2407 TCGv_i32 nzcv = tcg_temp_new_i32(); 2408 2409 /* take NZCV from R[t] */ 2410 tcg_gen_extrl_i64_i32(nzcv, tcg_rt); 2411 2412 /* bit 31, N */ 2413 tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31)); 2414 /* bit 30, Z */ 2415 tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30)); 2416 tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0); 2417 /* bit 29, C */ 2418 tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29)); 2419 tcg_gen_shri_i32(cpu_CF, cpu_CF, 29); 2420 /* bit 28, V */ 2421 tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28)); 2422 tcg_gen_shli_i32(cpu_VF, cpu_VF, 3); 2423 } 2424 2425 static void gen_sysreg_undef(DisasContext *s, bool isread, 2426 uint8_t op0, uint8_t op1, uint8_t op2, 2427 uint8_t crn, uint8_t crm, uint8_t rt) 2428 { 2429 /* 2430 * Generate code to emit an UNDEF with correct syndrome 2431 * information for a failed system register access. 2432 * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases, 2433 * but if FEAT_IDST is implemented then read accesses to registers 2434 * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP 2435 * syndrome. 2436 */ 2437 uint32_t syndrome; 2438 2439 if (isread && dc_isar_feature(aa64_ids, s) && 2440 arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) { 2441 syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); 2442 } else { 2443 syndrome = syn_uncategorized(); 2444 } 2445 gen_exception_insn(s, 0, EXCP_UDEF, syndrome); 2446 } 2447 2448 /* MRS - move from system register 2449 * MSR (register) - move to system register 2450 * SYS 2451 * SYSL 2452 * These are all essentially the same insn in 'read' and 'write' 2453 * versions, with varying op0 fields. 2454 */ 2455 static void handle_sys(DisasContext *s, bool isread, 2456 unsigned int op0, unsigned int op1, unsigned int op2, 2457 unsigned int crn, unsigned int crm, unsigned int rt) 2458 { 2459 uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, 2460 crn, crm, op0, op1, op2); 2461 const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key); 2462 bool need_exit_tb = false; 2463 bool nv_trap_to_el2 = false; 2464 bool nv_redirect_reg = false; 2465 bool skip_fp_access_checks = false; 2466 bool nv2_mem_redirect = false; 2467 TCGv_ptr tcg_ri = NULL; 2468 TCGv_i64 tcg_rt; 2469 uint32_t syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); 2470 2471 if (crn == 11 || crn == 15) { 2472 /* 2473 * Check for TIDCP trap, which must take precedence over 2474 * the UNDEF for "no such register" etc. 2475 */ 2476 switch (s->current_el) { 2477 case 0: 2478 if (dc_isar_feature(aa64_tidcp1, s)) { 2479 gen_helper_tidcp_el0(tcg_env, tcg_constant_i32(syndrome)); 2480 } 2481 break; 2482 case 1: 2483 gen_helper_tidcp_el1(tcg_env, tcg_constant_i32(syndrome)); 2484 break; 2485 } 2486 } 2487 2488 if (!ri) { 2489 /* Unknown register; this might be a guest error or a QEMU 2490 * unimplemented feature. 2491 */ 2492 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 " 2493 "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n", 2494 isread ? "read" : "write", op0, op1, crn, crm, op2); 2495 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt); 2496 return; 2497 } 2498 2499 if (s->nv2 && ri->nv2_redirect_offset) { 2500 /* 2501 * Some registers always redirect to memory; some only do so if 2502 * HCR_EL2.NV1 is 0, and some only if NV1 is 1 (these come in 2503 * pairs which share an offset; see the table in R_CSRPQ). 2504 */ 2505 if (ri->nv2_redirect_offset & NV2_REDIR_NV1) { 2506 nv2_mem_redirect = s->nv1; 2507 } else if (ri->nv2_redirect_offset & NV2_REDIR_NO_NV1) { 2508 nv2_mem_redirect = !s->nv1; 2509 } else { 2510 nv2_mem_redirect = true; 2511 } 2512 } 2513 2514 /* Check access permissions */ 2515 if (!cp_access_ok(s->current_el, ri, isread)) { 2516 /* 2517 * FEAT_NV/NV2 handling does not do the usual FP access checks 2518 * for registers only accessible at EL2 (though it *does* do them 2519 * for registers accessible at EL1). 2520 */ 2521 skip_fp_access_checks = true; 2522 if (s->nv2 && (ri->type & ARM_CP_NV2_REDIRECT)) { 2523 /* 2524 * This is one of the few EL2 registers which should redirect 2525 * to the equivalent EL1 register. We do that after running 2526 * the EL2 register's accessfn. 2527 */ 2528 nv_redirect_reg = true; 2529 assert(!nv2_mem_redirect); 2530 } else if (nv2_mem_redirect) { 2531 /* 2532 * NV2 redirect-to-memory takes precedence over trap to EL2 or 2533 * UNDEF to EL1. 2534 */ 2535 } else if (s->nv && arm_cpreg_traps_in_nv(ri)) { 2536 /* 2537 * This register / instruction exists and is an EL2 register, so 2538 * we must trap to EL2 if accessed in nested virtualization EL1 2539 * instead of UNDEFing. We'll do that after the usual access checks. 2540 * (This makes a difference only for a couple of registers like 2541 * VSTTBR_EL2 where the "UNDEF if NonSecure" should take priority 2542 * over the trap-to-EL2. Most trapped-by-FEAT_NV registers have 2543 * an accessfn which does nothing when called from EL1, because 2544 * the trap-to-EL3 controls which would apply to that register 2545 * at EL2 don't take priority over the FEAT_NV trap-to-EL2.) 2546 */ 2547 nv_trap_to_el2 = true; 2548 } else { 2549 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt); 2550 return; 2551 } 2552 } 2553 2554 if (ri->accessfn || (ri->fgt && s->fgt_active)) { 2555 /* Emit code to perform further access permissions checks at 2556 * runtime; this may result in an exception. 2557 */ 2558 gen_a64_update_pc(s, 0); 2559 tcg_ri = tcg_temp_new_ptr(); 2560 gen_helper_access_check_cp_reg(tcg_ri, tcg_env, 2561 tcg_constant_i32(key), 2562 tcg_constant_i32(syndrome), 2563 tcg_constant_i32(isread)); 2564 } else if (ri->type & ARM_CP_RAISES_EXC) { 2565 /* 2566 * The readfn or writefn might raise an exception; 2567 * synchronize the CPU state in case it does. 2568 */ 2569 gen_a64_update_pc(s, 0); 2570 } 2571 2572 if (!skip_fp_access_checks) { 2573 if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) { 2574 return; 2575 } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) { 2576 return; 2577 } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) { 2578 return; 2579 } 2580 } 2581 2582 if (nv_trap_to_el2) { 2583 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2); 2584 return; 2585 } 2586 2587 if (nv_redirect_reg) { 2588 /* 2589 * FEAT_NV2 redirection of an EL2 register to an EL1 register. 2590 * Conveniently in all cases the encoding of the EL1 register is 2591 * identical to the EL2 register except that opc1 is 0. 2592 * Get the reginfo for the EL1 register to use for the actual access. 2593 * We don't use the EL1 register's access function, and 2594 * fine-grained-traps on EL1 also do not apply here. 2595 */ 2596 key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, 2597 crn, crm, op0, 0, op2); 2598 ri = get_arm_cp_reginfo(s->cp_regs, key); 2599 assert(ri); 2600 assert(cp_access_ok(s->current_el, ri, isread)); 2601 /* 2602 * We might not have done an update_pc earlier, so check we don't 2603 * need it. We could support this in future if necessary. 2604 */ 2605 assert(!(ri->type & ARM_CP_RAISES_EXC)); 2606 } 2607 2608 if (nv2_mem_redirect) { 2609 /* 2610 * This system register is being redirected into an EL2 memory access. 2611 * This means it is not an IO operation, doesn't change hflags, 2612 * and need not end the TB, because it has no side effects. 2613 * 2614 * The access is 64-bit single copy atomic, guaranteed aligned because 2615 * of the definition of VCNR_EL2. Its endianness depends on 2616 * SCTLR_EL2.EE, not on the data endianness of EL1. 2617 * It is done under either the EL2 translation regime or the EL2&0 2618 * translation regime, depending on HCR_EL2.E2H. It behaves as if 2619 * PSTATE.PAN is 0. 2620 */ 2621 TCGv_i64 ptr = tcg_temp_new_i64(); 2622 MemOp mop = MO_64 | MO_ALIGN | MO_ATOM_IFALIGN; 2623 ARMMMUIdx armmemidx = s->nv2_mem_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_E2; 2624 int memidx = arm_to_core_mmu_idx(armmemidx); 2625 uint32_t syn; 2626 2627 mop |= (s->nv2_mem_be ? MO_BE : MO_LE); 2628 2629 tcg_gen_ld_i64(ptr, tcg_env, offsetof(CPUARMState, cp15.vncr_el2)); 2630 tcg_gen_addi_i64(ptr, ptr, 2631 (ri->nv2_redirect_offset & ~NV2_REDIR_FLAG_MASK)); 2632 tcg_rt = cpu_reg(s, rt); 2633 2634 syn = syn_data_abort_vncr(0, !isread, 0); 2635 disas_set_insn_syndrome(s, syn); 2636 if (isread) { 2637 tcg_gen_qemu_ld_i64(tcg_rt, ptr, memidx, mop); 2638 } else { 2639 tcg_gen_qemu_st_i64(tcg_rt, ptr, memidx, mop); 2640 } 2641 return; 2642 } 2643 2644 /* Handle special cases first */ 2645 switch (ri->type & ARM_CP_SPECIAL_MASK) { 2646 case 0: 2647 break; 2648 case ARM_CP_NOP: 2649 return; 2650 case ARM_CP_NZCV: 2651 tcg_rt = cpu_reg(s, rt); 2652 if (isread) { 2653 gen_get_nzcv(tcg_rt); 2654 } else { 2655 gen_set_nzcv(tcg_rt); 2656 } 2657 return; 2658 case ARM_CP_CURRENTEL: 2659 { 2660 /* 2661 * Reads as current EL value from pstate, which is 2662 * guaranteed to be constant by the tb flags. 2663 * For nested virt we should report EL2. 2664 */ 2665 int el = s->nv ? 2 : s->current_el; 2666 tcg_rt = cpu_reg(s, rt); 2667 tcg_gen_movi_i64(tcg_rt, el << 2); 2668 return; 2669 } 2670 case ARM_CP_DC_ZVA: 2671 /* Writes clear the aligned block of memory which rt points into. */ 2672 if (s->mte_active[0]) { 2673 int desc = 0; 2674 2675 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 2676 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 2677 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 2678 2679 tcg_rt = tcg_temp_new_i64(); 2680 gen_helper_mte_check_zva(tcg_rt, tcg_env, 2681 tcg_constant_i32(desc), cpu_reg(s, rt)); 2682 } else { 2683 tcg_rt = clean_data_tbi(s, cpu_reg(s, rt)); 2684 } 2685 gen_helper_dc_zva(tcg_env, tcg_rt); 2686 return; 2687 case ARM_CP_DC_GVA: 2688 { 2689 TCGv_i64 clean_addr, tag; 2690 2691 /* 2692 * DC_GVA, like DC_ZVA, requires that we supply the original 2693 * pointer for an invalid page. Probe that address first. 2694 */ 2695 tcg_rt = cpu_reg(s, rt); 2696 clean_addr = clean_data_tbi(s, tcg_rt); 2697 gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8); 2698 2699 if (s->ata[0]) { 2700 /* Extract the tag from the register to match STZGM. */ 2701 tag = tcg_temp_new_i64(); 2702 tcg_gen_shri_i64(tag, tcg_rt, 56); 2703 gen_helper_stzgm_tags(tcg_env, clean_addr, tag); 2704 } 2705 } 2706 return; 2707 case ARM_CP_DC_GZVA: 2708 { 2709 TCGv_i64 clean_addr, tag; 2710 2711 /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */ 2712 tcg_rt = cpu_reg(s, rt); 2713 clean_addr = clean_data_tbi(s, tcg_rt); 2714 gen_helper_dc_zva(tcg_env, clean_addr); 2715 2716 if (s->ata[0]) { 2717 /* Extract the tag from the register to match STZGM. */ 2718 tag = tcg_temp_new_i64(); 2719 tcg_gen_shri_i64(tag, tcg_rt, 56); 2720 gen_helper_stzgm_tags(tcg_env, clean_addr, tag); 2721 } 2722 } 2723 return; 2724 default: 2725 g_assert_not_reached(); 2726 } 2727 2728 if (ri->type & ARM_CP_IO) { 2729 /* I/O operations must end the TB here (whether read or write) */ 2730 need_exit_tb = translator_io_start(&s->base); 2731 } 2732 2733 tcg_rt = cpu_reg(s, rt); 2734 2735 if (isread) { 2736 if (ri->type & ARM_CP_CONST) { 2737 tcg_gen_movi_i64(tcg_rt, ri->resetvalue); 2738 } else if (ri->readfn) { 2739 if (!tcg_ri) { 2740 tcg_ri = gen_lookup_cp_reg(key); 2741 } 2742 gen_helper_get_cp_reg64(tcg_rt, tcg_env, tcg_ri); 2743 } else { 2744 tcg_gen_ld_i64(tcg_rt, tcg_env, ri->fieldoffset); 2745 } 2746 } else { 2747 if (ri->type & ARM_CP_CONST) { 2748 /* If not forbidden by access permissions, treat as WI */ 2749 return; 2750 } else if (ri->writefn) { 2751 if (!tcg_ri) { 2752 tcg_ri = gen_lookup_cp_reg(key); 2753 } 2754 gen_helper_set_cp_reg64(tcg_env, tcg_ri, tcg_rt); 2755 } else { 2756 tcg_gen_st_i64(tcg_rt, tcg_env, ri->fieldoffset); 2757 } 2758 } 2759 2760 if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) { 2761 /* 2762 * A write to any coprocessor register that ends a TB 2763 * must rebuild the hflags for the next TB. 2764 */ 2765 gen_rebuild_hflags(s); 2766 /* 2767 * We default to ending the TB on a coprocessor register write, 2768 * but allow this to be suppressed by the register definition 2769 * (usually only necessary to work around guest bugs). 2770 */ 2771 need_exit_tb = true; 2772 } 2773 if (need_exit_tb) { 2774 s->base.is_jmp = DISAS_UPDATE_EXIT; 2775 } 2776 } 2777 2778 static bool trans_SYS(DisasContext *s, arg_SYS *a) 2779 { 2780 handle_sys(s, a->l, a->op0, a->op1, a->op2, a->crn, a->crm, a->rt); 2781 return true; 2782 } 2783 2784 static bool trans_SVC(DisasContext *s, arg_i *a) 2785 { 2786 /* 2787 * For SVC, HVC and SMC we advance the single-step state 2788 * machine before taking the exception. This is architecturally 2789 * mandated, to ensure that single-stepping a system call 2790 * instruction works properly. 2791 */ 2792 uint32_t syndrome = syn_aa64_svc(a->imm); 2793 if (s->fgt_svc) { 2794 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2); 2795 return true; 2796 } 2797 gen_ss_advance(s); 2798 gen_exception_insn(s, 4, EXCP_SWI, syndrome); 2799 return true; 2800 } 2801 2802 static bool trans_HVC(DisasContext *s, arg_i *a) 2803 { 2804 int target_el = s->current_el == 3 ? 3 : 2; 2805 2806 if (s->current_el == 0) { 2807 unallocated_encoding(s); 2808 return true; 2809 } 2810 /* 2811 * The pre HVC helper handles cases when HVC gets trapped 2812 * as an undefined insn by runtime configuration. 2813 */ 2814 gen_a64_update_pc(s, 0); 2815 gen_helper_pre_hvc(tcg_env); 2816 /* Architecture requires ss advance before we do the actual work */ 2817 gen_ss_advance(s); 2818 gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(a->imm), target_el); 2819 return true; 2820 } 2821 2822 static bool trans_SMC(DisasContext *s, arg_i *a) 2823 { 2824 if (s->current_el == 0) { 2825 unallocated_encoding(s); 2826 return true; 2827 } 2828 gen_a64_update_pc(s, 0); 2829 gen_helper_pre_smc(tcg_env, tcg_constant_i32(syn_aa64_smc(a->imm))); 2830 /* Architecture requires ss advance before we do the actual work */ 2831 gen_ss_advance(s); 2832 gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(a->imm), 3); 2833 return true; 2834 } 2835 2836 static bool trans_BRK(DisasContext *s, arg_i *a) 2837 { 2838 gen_exception_bkpt_insn(s, syn_aa64_bkpt(a->imm)); 2839 return true; 2840 } 2841 2842 static bool trans_HLT(DisasContext *s, arg_i *a) 2843 { 2844 /* 2845 * HLT. This has two purposes. 2846 * Architecturally, it is an external halting debug instruction. 2847 * Since QEMU doesn't implement external debug, we treat this as 2848 * it is required for halting debug disabled: it will UNDEF. 2849 * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction. 2850 */ 2851 if (semihosting_enabled(s->current_el == 0) && a->imm == 0xf000) { 2852 gen_exception_internal_insn(s, EXCP_SEMIHOST); 2853 } else { 2854 unallocated_encoding(s); 2855 } 2856 return true; 2857 } 2858 2859 /* 2860 * Load/Store exclusive instructions are implemented by remembering 2861 * the value/address loaded, and seeing if these are the same 2862 * when the store is performed. This is not actually the architecturally 2863 * mandated semantics, but it works for typical guest code sequences 2864 * and avoids having to monitor regular stores. 2865 * 2866 * The store exclusive uses the atomic cmpxchg primitives to avoid 2867 * races in multi-threaded linux-user and when MTTCG softmmu is 2868 * enabled. 2869 */ 2870 static void gen_load_exclusive(DisasContext *s, int rt, int rt2, int rn, 2871 int size, bool is_pair) 2872 { 2873 int idx = get_mem_index(s); 2874 TCGv_i64 dirty_addr, clean_addr; 2875 MemOp memop = check_atomic_align(s, rn, size + is_pair); 2876 2877 s->is_ldex = true; 2878 dirty_addr = cpu_reg_sp(s, rn); 2879 clean_addr = gen_mte_check1(s, dirty_addr, false, rn != 31, memop); 2880 2881 g_assert(size <= 3); 2882 if (is_pair) { 2883 g_assert(size >= 2); 2884 if (size == 2) { 2885 tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop); 2886 if (s->be_data == MO_LE) { 2887 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32); 2888 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32); 2889 } else { 2890 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32); 2891 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32); 2892 } 2893 } else { 2894 TCGv_i128 t16 = tcg_temp_new_i128(); 2895 2896 tcg_gen_qemu_ld_i128(t16, clean_addr, idx, memop); 2897 2898 if (s->be_data == MO_LE) { 2899 tcg_gen_extr_i128_i64(cpu_exclusive_val, 2900 cpu_exclusive_high, t16); 2901 } else { 2902 tcg_gen_extr_i128_i64(cpu_exclusive_high, 2903 cpu_exclusive_val, t16); 2904 } 2905 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); 2906 tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high); 2907 } 2908 } else { 2909 tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop); 2910 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); 2911 } 2912 tcg_gen_mov_i64(cpu_exclusive_addr, clean_addr); 2913 } 2914 2915 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, 2916 int rn, int size, int is_pair) 2917 { 2918 /* if (env->exclusive_addr == addr && env->exclusive_val == [addr] 2919 * && (!is_pair || env->exclusive_high == [addr + datasize])) { 2920 * [addr] = {Rt}; 2921 * if (is_pair) { 2922 * [addr + datasize] = {Rt2}; 2923 * } 2924 * {Rd} = 0; 2925 * } else { 2926 * {Rd} = 1; 2927 * } 2928 * env->exclusive_addr = -1; 2929 */ 2930 TCGLabel *fail_label = gen_new_label(); 2931 TCGLabel *done_label = gen_new_label(); 2932 TCGv_i64 tmp, clean_addr; 2933 MemOp memop; 2934 2935 /* 2936 * FIXME: We are out of spec here. We have recorded only the address 2937 * from load_exclusive, not the entire range, and we assume that the 2938 * size of the access on both sides match. The architecture allows the 2939 * store to be smaller than the load, so long as the stored bytes are 2940 * within the range recorded by the load. 2941 */ 2942 2943 /* See AArch64.ExclusiveMonitorsPass() and AArch64.IsExclusiveVA(). */ 2944 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); 2945 tcg_gen_brcond_i64(TCG_COND_NE, clean_addr, cpu_exclusive_addr, fail_label); 2946 2947 /* 2948 * The write, and any associated faults, only happen if the virtual 2949 * and physical addresses pass the exclusive monitor check. These 2950 * faults are exceedingly unlikely, because normally the guest uses 2951 * the exact same address register for the load_exclusive, and we 2952 * would have recognized these faults there. 2953 * 2954 * It is possible to trigger an alignment fault pre-LSE2, e.g. with an 2955 * unaligned 4-byte write within the range of an aligned 8-byte load. 2956 * With LSE2, the store would need to cross a 16-byte boundary when the 2957 * load did not, which would mean the store is outside the range 2958 * recorded for the monitor, which would have failed a corrected monitor 2959 * check above. For now, we assume no size change and retain the 2960 * MO_ALIGN to let tcg know what we checked in the load_exclusive. 2961 * 2962 * It is possible to trigger an MTE fault, by performing the load with 2963 * a virtual address with a valid tag and performing the store with the 2964 * same virtual address and a different invalid tag. 2965 */ 2966 memop = size + is_pair; 2967 if (memop == MO_128 || !dc_isar_feature(aa64_lse2, s)) { 2968 memop |= MO_ALIGN; 2969 } 2970 memop = finalize_memop(s, memop); 2971 gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); 2972 2973 tmp = tcg_temp_new_i64(); 2974 if (is_pair) { 2975 if (size == 2) { 2976 if (s->be_data == MO_LE) { 2977 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2)); 2978 } else { 2979 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt)); 2980 } 2981 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, 2982 cpu_exclusive_val, tmp, 2983 get_mem_index(s), memop); 2984 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); 2985 } else { 2986 TCGv_i128 t16 = tcg_temp_new_i128(); 2987 TCGv_i128 c16 = tcg_temp_new_i128(); 2988 TCGv_i64 a, b; 2989 2990 if (s->be_data == MO_LE) { 2991 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2)); 2992 tcg_gen_concat_i64_i128(c16, cpu_exclusive_val, 2993 cpu_exclusive_high); 2994 } else { 2995 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt)); 2996 tcg_gen_concat_i64_i128(c16, cpu_exclusive_high, 2997 cpu_exclusive_val); 2998 } 2999 3000 tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16, 3001 get_mem_index(s), memop); 3002 3003 a = tcg_temp_new_i64(); 3004 b = tcg_temp_new_i64(); 3005 if (s->be_data == MO_LE) { 3006 tcg_gen_extr_i128_i64(a, b, t16); 3007 } else { 3008 tcg_gen_extr_i128_i64(b, a, t16); 3009 } 3010 3011 tcg_gen_xor_i64(a, a, cpu_exclusive_val); 3012 tcg_gen_xor_i64(b, b, cpu_exclusive_high); 3013 tcg_gen_or_i64(tmp, a, b); 3014 3015 tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0); 3016 } 3017 } else { 3018 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val, 3019 cpu_reg(s, rt), get_mem_index(s), memop); 3020 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); 3021 } 3022 tcg_gen_mov_i64(cpu_reg(s, rd), tmp); 3023 tcg_gen_br(done_label); 3024 3025 gen_set_label(fail_label); 3026 tcg_gen_movi_i64(cpu_reg(s, rd), 1); 3027 gen_set_label(done_label); 3028 tcg_gen_movi_i64(cpu_exclusive_addr, -1); 3029 } 3030 3031 static void gen_compare_and_swap(DisasContext *s, int rs, int rt, 3032 int rn, int size) 3033 { 3034 TCGv_i64 tcg_rs = cpu_reg(s, rs); 3035 TCGv_i64 tcg_rt = cpu_reg(s, rt); 3036 int memidx = get_mem_index(s); 3037 TCGv_i64 clean_addr; 3038 MemOp memop; 3039 3040 if (rn == 31) { 3041 gen_check_sp_alignment(s); 3042 } 3043 memop = check_atomic_align(s, rn, size); 3044 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); 3045 tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, 3046 memidx, memop); 3047 } 3048 3049 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, 3050 int rn, int size) 3051 { 3052 TCGv_i64 s1 = cpu_reg(s, rs); 3053 TCGv_i64 s2 = cpu_reg(s, rs + 1); 3054 TCGv_i64 t1 = cpu_reg(s, rt); 3055 TCGv_i64 t2 = cpu_reg(s, rt + 1); 3056 TCGv_i64 clean_addr; 3057 int memidx = get_mem_index(s); 3058 MemOp memop; 3059 3060 if (rn == 31) { 3061 gen_check_sp_alignment(s); 3062 } 3063 3064 /* This is a single atomic access, despite the "pair". */ 3065 memop = check_atomic_align(s, rn, size + 1); 3066 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); 3067 3068 if (size == 2) { 3069 TCGv_i64 cmp = tcg_temp_new_i64(); 3070 TCGv_i64 val = tcg_temp_new_i64(); 3071 3072 if (s->be_data == MO_LE) { 3073 tcg_gen_concat32_i64(val, t1, t2); 3074 tcg_gen_concat32_i64(cmp, s1, s2); 3075 } else { 3076 tcg_gen_concat32_i64(val, t2, t1); 3077 tcg_gen_concat32_i64(cmp, s2, s1); 3078 } 3079 3080 tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx, memop); 3081 3082 if (s->be_data == MO_LE) { 3083 tcg_gen_extr32_i64(s1, s2, cmp); 3084 } else { 3085 tcg_gen_extr32_i64(s2, s1, cmp); 3086 } 3087 } else { 3088 TCGv_i128 cmp = tcg_temp_new_i128(); 3089 TCGv_i128 val = tcg_temp_new_i128(); 3090 3091 if (s->be_data == MO_LE) { 3092 tcg_gen_concat_i64_i128(val, t1, t2); 3093 tcg_gen_concat_i64_i128(cmp, s1, s2); 3094 } else { 3095 tcg_gen_concat_i64_i128(val, t2, t1); 3096 tcg_gen_concat_i64_i128(cmp, s2, s1); 3097 } 3098 3099 tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx, memop); 3100 3101 if (s->be_data == MO_LE) { 3102 tcg_gen_extr_i128_i64(s1, s2, cmp); 3103 } else { 3104 tcg_gen_extr_i128_i64(s2, s1, cmp); 3105 } 3106 } 3107 } 3108 3109 /* 3110 * Compute the ISS.SF bit for syndrome information if an exception 3111 * is taken on a load or store. This indicates whether the instruction 3112 * is accessing a 32-bit or 64-bit register. This logic is derived 3113 * from the ARMv8 specs for LDR (Shared decode for all encodings). 3114 */ 3115 static bool ldst_iss_sf(int size, bool sign, bool ext) 3116 { 3117 3118 if (sign) { 3119 /* 3120 * Signed loads are 64 bit results if we are not going to 3121 * do a zero-extend from 32 to 64 after the load. 3122 * (For a store, sign and ext are always false.) 3123 */ 3124 return !ext; 3125 } else { 3126 /* Unsigned loads/stores work at the specified size */ 3127 return size == MO_64; 3128 } 3129 } 3130 3131 static bool trans_STXR(DisasContext *s, arg_stxr *a) 3132 { 3133 if (a->rn == 31) { 3134 gen_check_sp_alignment(s); 3135 } 3136 if (a->lasr) { 3137 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 3138 } 3139 gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, false); 3140 return true; 3141 } 3142 3143 static bool trans_LDXR(DisasContext *s, arg_stxr *a) 3144 { 3145 if (a->rn == 31) { 3146 gen_check_sp_alignment(s); 3147 } 3148 gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, false); 3149 if (a->lasr) { 3150 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3151 } 3152 return true; 3153 } 3154 3155 static bool trans_STLR(DisasContext *s, arg_stlr *a) 3156 { 3157 TCGv_i64 clean_addr; 3158 MemOp memop; 3159 bool iss_sf = ldst_iss_sf(a->sz, false, false); 3160 3161 /* 3162 * StoreLORelease is the same as Store-Release for QEMU, but 3163 * needs the feature-test. 3164 */ 3165 if (!a->lasr && !dc_isar_feature(aa64_lor, s)) { 3166 return false; 3167 } 3168 /* Generate ISS for non-exclusive accesses including LASR. */ 3169 if (a->rn == 31) { 3170 gen_check_sp_alignment(s); 3171 } 3172 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 3173 memop = check_ordered_align(s, a->rn, 0, true, a->sz); 3174 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), 3175 true, a->rn != 31, memop); 3176 do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, memop, true, a->rt, 3177 iss_sf, a->lasr); 3178 return true; 3179 } 3180 3181 static bool trans_LDAR(DisasContext *s, arg_stlr *a) 3182 { 3183 TCGv_i64 clean_addr; 3184 MemOp memop; 3185 bool iss_sf = ldst_iss_sf(a->sz, false, false); 3186 3187 /* LoadLOAcquire is the same as Load-Acquire for QEMU. */ 3188 if (!a->lasr && !dc_isar_feature(aa64_lor, s)) { 3189 return false; 3190 } 3191 /* Generate ISS for non-exclusive accesses including LASR. */ 3192 if (a->rn == 31) { 3193 gen_check_sp_alignment(s); 3194 } 3195 memop = check_ordered_align(s, a->rn, 0, false, a->sz); 3196 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), 3197 false, a->rn != 31, memop); 3198 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, memop, false, true, 3199 a->rt, iss_sf, a->lasr); 3200 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3201 return true; 3202 } 3203 3204 static bool trans_STXP(DisasContext *s, arg_stxr *a) 3205 { 3206 if (a->rn == 31) { 3207 gen_check_sp_alignment(s); 3208 } 3209 if (a->lasr) { 3210 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 3211 } 3212 gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, true); 3213 return true; 3214 } 3215 3216 static bool trans_LDXP(DisasContext *s, arg_stxr *a) 3217 { 3218 if (a->rn == 31) { 3219 gen_check_sp_alignment(s); 3220 } 3221 gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, true); 3222 if (a->lasr) { 3223 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3224 } 3225 return true; 3226 } 3227 3228 static bool trans_CASP(DisasContext *s, arg_CASP *a) 3229 { 3230 if (!dc_isar_feature(aa64_atomics, s)) { 3231 return false; 3232 } 3233 if (((a->rt | a->rs) & 1) != 0) { 3234 return false; 3235 } 3236 3237 gen_compare_and_swap_pair(s, a->rs, a->rt, a->rn, a->sz); 3238 return true; 3239 } 3240 3241 static bool trans_CAS(DisasContext *s, arg_CAS *a) 3242 { 3243 if (!dc_isar_feature(aa64_atomics, s)) { 3244 return false; 3245 } 3246 gen_compare_and_swap(s, a->rs, a->rt, a->rn, a->sz); 3247 return true; 3248 } 3249 3250 static bool trans_LD_lit(DisasContext *s, arg_ldlit *a) 3251 { 3252 bool iss_sf = ldst_iss_sf(a->sz, a->sign, false); 3253 TCGv_i64 tcg_rt = cpu_reg(s, a->rt); 3254 TCGv_i64 clean_addr = tcg_temp_new_i64(); 3255 MemOp memop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3256 3257 gen_pc_plus_diff(s, clean_addr, a->imm); 3258 do_gpr_ld(s, tcg_rt, clean_addr, memop, 3259 false, true, a->rt, iss_sf, false); 3260 return true; 3261 } 3262 3263 static bool trans_LD_lit_v(DisasContext *s, arg_ldlit *a) 3264 { 3265 /* Load register (literal), vector version */ 3266 TCGv_i64 clean_addr; 3267 MemOp memop; 3268 3269 if (!fp_access_check(s)) { 3270 return true; 3271 } 3272 memop = finalize_memop_asimd(s, a->sz); 3273 clean_addr = tcg_temp_new_i64(); 3274 gen_pc_plus_diff(s, clean_addr, a->imm); 3275 do_fp_ld(s, a->rt, clean_addr, memop); 3276 return true; 3277 } 3278 3279 static void op_addr_ldstpair_pre(DisasContext *s, arg_ldstpair *a, 3280 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, 3281 uint64_t offset, bool is_store, MemOp mop) 3282 { 3283 if (a->rn == 31) { 3284 gen_check_sp_alignment(s); 3285 } 3286 3287 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3288 if (!a->p) { 3289 tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset); 3290 } 3291 3292 *clean_addr = gen_mte_checkN(s, *dirty_addr, is_store, 3293 (a->w || a->rn != 31), 2 << a->sz, mop); 3294 } 3295 3296 static void op_addr_ldstpair_post(DisasContext *s, arg_ldstpair *a, 3297 TCGv_i64 dirty_addr, uint64_t offset) 3298 { 3299 if (a->w) { 3300 if (a->p) { 3301 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3302 } 3303 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); 3304 } 3305 } 3306 3307 static bool trans_STP(DisasContext *s, arg_ldstpair *a) 3308 { 3309 uint64_t offset = a->imm << a->sz; 3310 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; 3311 MemOp mop = finalize_memop(s, a->sz); 3312 3313 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop); 3314 tcg_rt = cpu_reg(s, a->rt); 3315 tcg_rt2 = cpu_reg(s, a->rt2); 3316 /* 3317 * We built mop above for the single logical access -- rebuild it 3318 * now for the paired operation. 3319 * 3320 * With LSE2, non-sign-extending pairs are treated atomically if 3321 * aligned, and if unaligned one of the pair will be completely 3322 * within a 16-byte block and that element will be atomic. 3323 * Otherwise each element is separately atomic. 3324 * In all cases, issue one operation with the correct atomicity. 3325 */ 3326 mop = a->sz + 1; 3327 if (s->align_mem) { 3328 mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8); 3329 } 3330 mop = finalize_memop_pair(s, mop); 3331 if (a->sz == 2) { 3332 TCGv_i64 tmp = tcg_temp_new_i64(); 3333 3334 if (s->be_data == MO_LE) { 3335 tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2); 3336 } else { 3337 tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt); 3338 } 3339 tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop); 3340 } else { 3341 TCGv_i128 tmp = tcg_temp_new_i128(); 3342 3343 if (s->be_data == MO_LE) { 3344 tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2); 3345 } else { 3346 tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt); 3347 } 3348 tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop); 3349 } 3350 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3351 return true; 3352 } 3353 3354 static bool trans_LDP(DisasContext *s, arg_ldstpair *a) 3355 { 3356 uint64_t offset = a->imm << a->sz; 3357 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; 3358 MemOp mop = finalize_memop(s, a->sz); 3359 3360 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop); 3361 tcg_rt = cpu_reg(s, a->rt); 3362 tcg_rt2 = cpu_reg(s, a->rt2); 3363 3364 /* 3365 * We built mop above for the single logical access -- rebuild it 3366 * now for the paired operation. 3367 * 3368 * With LSE2, non-sign-extending pairs are treated atomically if 3369 * aligned, and if unaligned one of the pair will be completely 3370 * within a 16-byte block and that element will be atomic. 3371 * Otherwise each element is separately atomic. 3372 * In all cases, issue one operation with the correct atomicity. 3373 * 3374 * This treats sign-extending loads like zero-extending loads, 3375 * since that reuses the most code below. 3376 */ 3377 mop = a->sz + 1; 3378 if (s->align_mem) { 3379 mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8); 3380 } 3381 mop = finalize_memop_pair(s, mop); 3382 if (a->sz == 2) { 3383 int o2 = s->be_data == MO_LE ? 32 : 0; 3384 int o1 = o2 ^ 32; 3385 3386 tcg_gen_qemu_ld_i64(tcg_rt, clean_addr, get_mem_index(s), mop); 3387 if (a->sign) { 3388 tcg_gen_sextract_i64(tcg_rt2, tcg_rt, o2, 32); 3389 tcg_gen_sextract_i64(tcg_rt, tcg_rt, o1, 32); 3390 } else { 3391 tcg_gen_extract_i64(tcg_rt2, tcg_rt, o2, 32); 3392 tcg_gen_extract_i64(tcg_rt, tcg_rt, o1, 32); 3393 } 3394 } else { 3395 TCGv_i128 tmp = tcg_temp_new_i128(); 3396 3397 tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop); 3398 if (s->be_data == MO_LE) { 3399 tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp); 3400 } else { 3401 tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp); 3402 } 3403 } 3404 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3405 return true; 3406 } 3407 3408 static bool trans_STP_v(DisasContext *s, arg_ldstpair *a) 3409 { 3410 uint64_t offset = a->imm << a->sz; 3411 TCGv_i64 clean_addr, dirty_addr; 3412 MemOp mop; 3413 3414 if (!fp_access_check(s)) { 3415 return true; 3416 } 3417 3418 /* LSE2 does not merge FP pairs; leave these as separate operations. */ 3419 mop = finalize_memop_asimd(s, a->sz); 3420 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop); 3421 do_fp_st(s, a->rt, clean_addr, mop); 3422 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz); 3423 do_fp_st(s, a->rt2, clean_addr, mop); 3424 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3425 return true; 3426 } 3427 3428 static bool trans_LDP_v(DisasContext *s, arg_ldstpair *a) 3429 { 3430 uint64_t offset = a->imm << a->sz; 3431 TCGv_i64 clean_addr, dirty_addr; 3432 MemOp mop; 3433 3434 if (!fp_access_check(s)) { 3435 return true; 3436 } 3437 3438 /* LSE2 does not merge FP pairs; leave these as separate operations. */ 3439 mop = finalize_memop_asimd(s, a->sz); 3440 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop); 3441 do_fp_ld(s, a->rt, clean_addr, mop); 3442 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz); 3443 do_fp_ld(s, a->rt2, clean_addr, mop); 3444 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3445 return true; 3446 } 3447 3448 static bool trans_STGP(DisasContext *s, arg_ldstpair *a) 3449 { 3450 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; 3451 uint64_t offset = a->imm << LOG2_TAG_GRANULE; 3452 MemOp mop; 3453 TCGv_i128 tmp; 3454 3455 /* STGP only comes in one size. */ 3456 tcg_debug_assert(a->sz == MO_64); 3457 3458 if (!dc_isar_feature(aa64_mte_insn_reg, s)) { 3459 return false; 3460 } 3461 3462 if (a->rn == 31) { 3463 gen_check_sp_alignment(s); 3464 } 3465 3466 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3467 if (!a->p) { 3468 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3469 } 3470 3471 clean_addr = clean_data_tbi(s, dirty_addr); 3472 tcg_rt = cpu_reg(s, a->rt); 3473 tcg_rt2 = cpu_reg(s, a->rt2); 3474 3475 /* 3476 * STGP is defined as two 8-byte memory operations, aligned to TAG_GRANULE, 3477 * and one tag operation. We implement it as one single aligned 16-byte 3478 * memory operation for convenience. Note that the alignment ensures 3479 * MO_ATOM_IFALIGN_PAIR produces 8-byte atomicity for the memory store. 3480 */ 3481 mop = finalize_memop_atom(s, MO_128 | MO_ALIGN, MO_ATOM_IFALIGN_PAIR); 3482 3483 tmp = tcg_temp_new_i128(); 3484 if (s->be_data == MO_LE) { 3485 tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2); 3486 } else { 3487 tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt); 3488 } 3489 tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop); 3490 3491 /* Perform the tag store, if tag access enabled. */ 3492 if (s->ata[0]) { 3493 if (tb_cflags(s->base.tb) & CF_PARALLEL) { 3494 gen_helper_stg_parallel(tcg_env, dirty_addr, dirty_addr); 3495 } else { 3496 gen_helper_stg(tcg_env, dirty_addr, dirty_addr); 3497 } 3498 } 3499 3500 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3501 return true; 3502 } 3503 3504 static void op_addr_ldst_imm_pre(DisasContext *s, arg_ldst_imm *a, 3505 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, 3506 uint64_t offset, bool is_store, MemOp mop) 3507 { 3508 int memidx; 3509 3510 if (a->rn == 31) { 3511 gen_check_sp_alignment(s); 3512 } 3513 3514 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3515 if (!a->p) { 3516 tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset); 3517 } 3518 memidx = get_a64_user_mem_index(s, a->unpriv); 3519 *clean_addr = gen_mte_check1_mmuidx(s, *dirty_addr, is_store, 3520 a->w || a->rn != 31, 3521 mop, a->unpriv, memidx); 3522 } 3523 3524 static void op_addr_ldst_imm_post(DisasContext *s, arg_ldst_imm *a, 3525 TCGv_i64 dirty_addr, uint64_t offset) 3526 { 3527 if (a->w) { 3528 if (a->p) { 3529 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3530 } 3531 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); 3532 } 3533 } 3534 3535 static bool trans_STR_i(DisasContext *s, arg_ldst_imm *a) 3536 { 3537 bool iss_sf, iss_valid = !a->w; 3538 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3539 int memidx = get_a64_user_mem_index(s, a->unpriv); 3540 MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3541 3542 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop); 3543 3544 tcg_rt = cpu_reg(s, a->rt); 3545 iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3546 3547 do_gpr_st_memidx(s, tcg_rt, clean_addr, mop, memidx, 3548 iss_valid, a->rt, iss_sf, false); 3549 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3550 return true; 3551 } 3552 3553 static bool trans_LDR_i(DisasContext *s, arg_ldst_imm *a) 3554 { 3555 bool iss_sf, iss_valid = !a->w; 3556 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3557 int memidx = get_a64_user_mem_index(s, a->unpriv); 3558 MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3559 3560 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop); 3561 3562 tcg_rt = cpu_reg(s, a->rt); 3563 iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3564 3565 do_gpr_ld_memidx(s, tcg_rt, clean_addr, mop, 3566 a->ext, memidx, iss_valid, a->rt, iss_sf, false); 3567 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3568 return true; 3569 } 3570 3571 static bool trans_STR_v_i(DisasContext *s, arg_ldst_imm *a) 3572 { 3573 TCGv_i64 clean_addr, dirty_addr; 3574 MemOp mop; 3575 3576 if (!fp_access_check(s)) { 3577 return true; 3578 } 3579 mop = finalize_memop_asimd(s, a->sz); 3580 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop); 3581 do_fp_st(s, a->rt, clean_addr, mop); 3582 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3583 return true; 3584 } 3585 3586 static bool trans_LDR_v_i(DisasContext *s, arg_ldst_imm *a) 3587 { 3588 TCGv_i64 clean_addr, dirty_addr; 3589 MemOp mop; 3590 3591 if (!fp_access_check(s)) { 3592 return true; 3593 } 3594 mop = finalize_memop_asimd(s, a->sz); 3595 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop); 3596 do_fp_ld(s, a->rt, clean_addr, mop); 3597 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3598 return true; 3599 } 3600 3601 static void op_addr_ldst_pre(DisasContext *s, arg_ldst *a, 3602 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, 3603 bool is_store, MemOp memop) 3604 { 3605 TCGv_i64 tcg_rm; 3606 3607 if (a->rn == 31) { 3608 gen_check_sp_alignment(s); 3609 } 3610 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3611 3612 tcg_rm = read_cpu_reg(s, a->rm, 1); 3613 ext_and_shift_reg(tcg_rm, tcg_rm, a->opt, a->s ? a->sz : 0); 3614 3615 tcg_gen_add_i64(*dirty_addr, *dirty_addr, tcg_rm); 3616 *clean_addr = gen_mte_check1(s, *dirty_addr, is_store, true, memop); 3617 } 3618 3619 static bool trans_LDR(DisasContext *s, arg_ldst *a) 3620 { 3621 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3622 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3623 MemOp memop; 3624 3625 if (extract32(a->opt, 1, 1) == 0) { 3626 return false; 3627 } 3628 3629 memop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3630 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop); 3631 tcg_rt = cpu_reg(s, a->rt); 3632 do_gpr_ld(s, tcg_rt, clean_addr, memop, 3633 a->ext, true, a->rt, iss_sf, false); 3634 return true; 3635 } 3636 3637 static bool trans_STR(DisasContext *s, arg_ldst *a) 3638 { 3639 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3640 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3641 MemOp memop; 3642 3643 if (extract32(a->opt, 1, 1) == 0) { 3644 return false; 3645 } 3646 3647 memop = finalize_memop(s, a->sz); 3648 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop); 3649 tcg_rt = cpu_reg(s, a->rt); 3650 do_gpr_st(s, tcg_rt, clean_addr, memop, true, a->rt, iss_sf, false); 3651 return true; 3652 } 3653 3654 static bool trans_LDR_v(DisasContext *s, arg_ldst *a) 3655 { 3656 TCGv_i64 clean_addr, dirty_addr; 3657 MemOp memop; 3658 3659 if (extract32(a->opt, 1, 1) == 0) { 3660 return false; 3661 } 3662 3663 if (!fp_access_check(s)) { 3664 return true; 3665 } 3666 3667 memop = finalize_memop_asimd(s, a->sz); 3668 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop); 3669 do_fp_ld(s, a->rt, clean_addr, memop); 3670 return true; 3671 } 3672 3673 static bool trans_STR_v(DisasContext *s, arg_ldst *a) 3674 { 3675 TCGv_i64 clean_addr, dirty_addr; 3676 MemOp memop; 3677 3678 if (extract32(a->opt, 1, 1) == 0) { 3679 return false; 3680 } 3681 3682 if (!fp_access_check(s)) { 3683 return true; 3684 } 3685 3686 memop = finalize_memop_asimd(s, a->sz); 3687 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop); 3688 do_fp_st(s, a->rt, clean_addr, memop); 3689 return true; 3690 } 3691 3692 3693 static bool do_atomic_ld(DisasContext *s, arg_atomic *a, AtomicThreeOpFn *fn, 3694 int sign, bool invert) 3695 { 3696 MemOp mop = a->sz | sign; 3697 TCGv_i64 clean_addr, tcg_rs, tcg_rt; 3698 3699 if (a->rn == 31) { 3700 gen_check_sp_alignment(s); 3701 } 3702 mop = check_atomic_align(s, a->rn, mop); 3703 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false, 3704 a->rn != 31, mop); 3705 tcg_rs = read_cpu_reg(s, a->rs, true); 3706 tcg_rt = cpu_reg(s, a->rt); 3707 if (invert) { 3708 tcg_gen_not_i64(tcg_rs, tcg_rs); 3709 } 3710 /* 3711 * The tcg atomic primitives are all full barriers. Therefore we 3712 * can ignore the Acquire and Release bits of this instruction. 3713 */ 3714 fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop); 3715 3716 if (mop & MO_SIGN) { 3717 switch (a->sz) { 3718 case MO_8: 3719 tcg_gen_ext8u_i64(tcg_rt, tcg_rt); 3720 break; 3721 case MO_16: 3722 tcg_gen_ext16u_i64(tcg_rt, tcg_rt); 3723 break; 3724 case MO_32: 3725 tcg_gen_ext32u_i64(tcg_rt, tcg_rt); 3726 break; 3727 case MO_64: 3728 break; 3729 default: 3730 g_assert_not_reached(); 3731 } 3732 } 3733 return true; 3734 } 3735 3736 TRANS_FEAT(LDADD, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_add_i64, 0, false) 3737 TRANS_FEAT(LDCLR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_and_i64, 0, true) 3738 TRANS_FEAT(LDEOR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_xor_i64, 0, false) 3739 TRANS_FEAT(LDSET, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_or_i64, 0, false) 3740 TRANS_FEAT(LDSMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smax_i64, MO_SIGN, false) 3741 TRANS_FEAT(LDSMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smin_i64, MO_SIGN, false) 3742 TRANS_FEAT(LDUMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umax_i64, 0, false) 3743 TRANS_FEAT(LDUMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false) 3744 TRANS_FEAT(SWP, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false) 3745 3746 static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a) 3747 { 3748 bool iss_sf = ldst_iss_sf(a->sz, false, false); 3749 TCGv_i64 clean_addr; 3750 MemOp mop; 3751 3752 if (!dc_isar_feature(aa64_atomics, s) || 3753 !dc_isar_feature(aa64_rcpc_8_3, s)) { 3754 return false; 3755 } 3756 if (a->rn == 31) { 3757 gen_check_sp_alignment(s); 3758 } 3759 mop = check_ordered_align(s, a->rn, 0, false, a->sz); 3760 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false, 3761 a->rn != 31, mop); 3762 /* 3763 * LDAPR* are a special case because they are a simple load, not a 3764 * fetch-and-do-something op. 3765 * The architectural consistency requirements here are weaker than 3766 * full load-acquire (we only need "load-acquire processor consistent"), 3767 * but we choose to implement them as full LDAQ. 3768 */ 3769 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, false, 3770 true, a->rt, iss_sf, true); 3771 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3772 return true; 3773 } 3774 3775 static bool trans_LDRA(DisasContext *s, arg_LDRA *a) 3776 { 3777 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3778 MemOp memop; 3779 3780 /* Load with pointer authentication */ 3781 if (!dc_isar_feature(aa64_pauth, s)) { 3782 return false; 3783 } 3784 3785 if (a->rn == 31) { 3786 gen_check_sp_alignment(s); 3787 } 3788 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3789 3790 if (s->pauth_active) { 3791 if (!a->m) { 3792 gen_helper_autda_combined(dirty_addr, tcg_env, dirty_addr, 3793 tcg_constant_i64(0)); 3794 } else { 3795 gen_helper_autdb_combined(dirty_addr, tcg_env, dirty_addr, 3796 tcg_constant_i64(0)); 3797 } 3798 } 3799 3800 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); 3801 3802 memop = finalize_memop(s, MO_64); 3803 3804 /* Note that "clean" and "dirty" here refer to TBI not PAC. */ 3805 clean_addr = gen_mte_check1(s, dirty_addr, false, 3806 a->w || a->rn != 31, memop); 3807 3808 tcg_rt = cpu_reg(s, a->rt); 3809 do_gpr_ld(s, tcg_rt, clean_addr, memop, 3810 /* extend */ false, /* iss_valid */ !a->w, 3811 /* iss_srt */ a->rt, /* iss_sf */ true, /* iss_ar */ false); 3812 3813 if (a->w) { 3814 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); 3815 } 3816 return true; 3817 } 3818 3819 static bool trans_LDAPR_i(DisasContext *s, arg_ldapr_stlr_i *a) 3820 { 3821 TCGv_i64 clean_addr, dirty_addr; 3822 MemOp mop = a->sz | (a->sign ? MO_SIGN : 0); 3823 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3824 3825 if (!dc_isar_feature(aa64_rcpc_8_4, s)) { 3826 return false; 3827 } 3828 3829 if (a->rn == 31) { 3830 gen_check_sp_alignment(s); 3831 } 3832 3833 mop = check_ordered_align(s, a->rn, a->imm, false, mop); 3834 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3835 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); 3836 clean_addr = clean_data_tbi(s, dirty_addr); 3837 3838 /* 3839 * Load-AcquirePC semantics; we implement as the slightly more 3840 * restrictive Load-Acquire. 3841 */ 3842 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, a->ext, true, 3843 a->rt, iss_sf, true); 3844 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3845 return true; 3846 } 3847 3848 static bool trans_STLR_i(DisasContext *s, arg_ldapr_stlr_i *a) 3849 { 3850 TCGv_i64 clean_addr, dirty_addr; 3851 MemOp mop = a->sz; 3852 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3853 3854 if (!dc_isar_feature(aa64_rcpc_8_4, s)) { 3855 return false; 3856 } 3857 3858 /* TODO: ARMv8.4-LSE SCTLR.nAA */ 3859 3860 if (a->rn == 31) { 3861 gen_check_sp_alignment(s); 3862 } 3863 3864 mop = check_ordered_align(s, a->rn, a->imm, true, mop); 3865 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3866 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); 3867 clean_addr = clean_data_tbi(s, dirty_addr); 3868 3869 /* Store-Release semantics */ 3870 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 3871 do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, mop, true, a->rt, iss_sf, true); 3872 return true; 3873 } 3874 3875 static bool trans_LD_mult(DisasContext *s, arg_ldst_mult *a) 3876 { 3877 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3878 MemOp endian, align, mop; 3879 3880 int total; /* total bytes */ 3881 int elements; /* elements per vector */ 3882 int r; 3883 int size = a->sz; 3884 3885 if (!a->p && a->rm != 0) { 3886 /* For non-postindexed accesses the Rm field must be 0 */ 3887 return false; 3888 } 3889 if (size == 3 && !a->q && a->selem != 1) { 3890 return false; 3891 } 3892 if (!fp_access_check(s)) { 3893 return true; 3894 } 3895 3896 if (a->rn == 31) { 3897 gen_check_sp_alignment(s); 3898 } 3899 3900 /* For our purposes, bytes are always little-endian. */ 3901 endian = s->be_data; 3902 if (size == 0) { 3903 endian = MO_LE; 3904 } 3905 3906 total = a->rpt * a->selem * (a->q ? 16 : 8); 3907 tcg_rn = cpu_reg_sp(s, a->rn); 3908 3909 /* 3910 * Issue the MTE check vs the logical repeat count, before we 3911 * promote consecutive little-endian elements below. 3912 */ 3913 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, total, 3914 finalize_memop_asimd(s, size)); 3915 3916 /* 3917 * Consecutive little-endian elements from a single register 3918 * can be promoted to a larger little-endian operation. 3919 */ 3920 align = MO_ALIGN; 3921 if (a->selem == 1 && endian == MO_LE) { 3922 align = pow2_align(size); 3923 size = 3; 3924 } 3925 if (!s->align_mem) { 3926 align = 0; 3927 } 3928 mop = endian | size | align; 3929 3930 elements = (a->q ? 16 : 8) >> size; 3931 tcg_ebytes = tcg_constant_i64(1 << size); 3932 for (r = 0; r < a->rpt; r++) { 3933 int e; 3934 for (e = 0; e < elements; e++) { 3935 int xs; 3936 for (xs = 0; xs < a->selem; xs++) { 3937 int tt = (a->rt + r + xs) % 32; 3938 do_vec_ld(s, tt, e, clean_addr, mop); 3939 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3940 } 3941 } 3942 } 3943 3944 /* 3945 * For non-quad operations, setting a slice of the low 64 bits of 3946 * the register clears the high 64 bits (in the ARM ARM pseudocode 3947 * this is implicit in the fact that 'rval' is a 64 bit wide 3948 * variable). For quad operations, we might still need to zero 3949 * the high bits of SVE. 3950 */ 3951 for (r = 0; r < a->rpt * a->selem; r++) { 3952 int tt = (a->rt + r) % 32; 3953 clear_vec_high(s, a->q, tt); 3954 } 3955 3956 if (a->p) { 3957 if (a->rm == 31) { 3958 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3959 } else { 3960 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3961 } 3962 } 3963 return true; 3964 } 3965 3966 static bool trans_ST_mult(DisasContext *s, arg_ldst_mult *a) 3967 { 3968 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3969 MemOp endian, align, mop; 3970 3971 int total; /* total bytes */ 3972 int elements; /* elements per vector */ 3973 int r; 3974 int size = a->sz; 3975 3976 if (!a->p && a->rm != 0) { 3977 /* For non-postindexed accesses the Rm field must be 0 */ 3978 return false; 3979 } 3980 if (size == 3 && !a->q && a->selem != 1) { 3981 return false; 3982 } 3983 if (!fp_access_check(s)) { 3984 return true; 3985 } 3986 3987 if (a->rn == 31) { 3988 gen_check_sp_alignment(s); 3989 } 3990 3991 /* For our purposes, bytes are always little-endian. */ 3992 endian = s->be_data; 3993 if (size == 0) { 3994 endian = MO_LE; 3995 } 3996 3997 total = a->rpt * a->selem * (a->q ? 16 : 8); 3998 tcg_rn = cpu_reg_sp(s, a->rn); 3999 4000 /* 4001 * Issue the MTE check vs the logical repeat count, before we 4002 * promote consecutive little-endian elements below. 4003 */ 4004 clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, total, 4005 finalize_memop_asimd(s, size)); 4006 4007 /* 4008 * Consecutive little-endian elements from a single register 4009 * can be promoted to a larger little-endian operation. 4010 */ 4011 align = MO_ALIGN; 4012 if (a->selem == 1 && endian == MO_LE) { 4013 align = pow2_align(size); 4014 size = 3; 4015 } 4016 if (!s->align_mem) { 4017 align = 0; 4018 } 4019 mop = endian | size | align; 4020 4021 elements = (a->q ? 16 : 8) >> size; 4022 tcg_ebytes = tcg_constant_i64(1 << size); 4023 for (r = 0; r < a->rpt; r++) { 4024 int e; 4025 for (e = 0; e < elements; e++) { 4026 int xs; 4027 for (xs = 0; xs < a->selem; xs++) { 4028 int tt = (a->rt + r + xs) % 32; 4029 do_vec_st(s, tt, e, clean_addr, mop); 4030 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 4031 } 4032 } 4033 } 4034 4035 if (a->p) { 4036 if (a->rm == 31) { 4037 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 4038 } else { 4039 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 4040 } 4041 } 4042 return true; 4043 } 4044 4045 static bool trans_ST_single(DisasContext *s, arg_ldst_single *a) 4046 { 4047 int xs, total, rt; 4048 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 4049 MemOp mop; 4050 4051 if (!a->p && a->rm != 0) { 4052 return false; 4053 } 4054 if (!fp_access_check(s)) { 4055 return true; 4056 } 4057 4058 if (a->rn == 31) { 4059 gen_check_sp_alignment(s); 4060 } 4061 4062 total = a->selem << a->scale; 4063 tcg_rn = cpu_reg_sp(s, a->rn); 4064 4065 mop = finalize_memop_asimd(s, a->scale); 4066 clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, 4067 total, mop); 4068 4069 tcg_ebytes = tcg_constant_i64(1 << a->scale); 4070 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { 4071 do_vec_st(s, rt, a->index, clean_addr, mop); 4072 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 4073 } 4074 4075 if (a->p) { 4076 if (a->rm == 31) { 4077 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 4078 } else { 4079 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 4080 } 4081 } 4082 return true; 4083 } 4084 4085 static bool trans_LD_single(DisasContext *s, arg_ldst_single *a) 4086 { 4087 int xs, total, rt; 4088 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 4089 MemOp mop; 4090 4091 if (!a->p && a->rm != 0) { 4092 return false; 4093 } 4094 if (!fp_access_check(s)) { 4095 return true; 4096 } 4097 4098 if (a->rn == 31) { 4099 gen_check_sp_alignment(s); 4100 } 4101 4102 total = a->selem << a->scale; 4103 tcg_rn = cpu_reg_sp(s, a->rn); 4104 4105 mop = finalize_memop_asimd(s, a->scale); 4106 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, 4107 total, mop); 4108 4109 tcg_ebytes = tcg_constant_i64(1 << a->scale); 4110 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { 4111 do_vec_ld(s, rt, a->index, clean_addr, mop); 4112 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 4113 } 4114 4115 if (a->p) { 4116 if (a->rm == 31) { 4117 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 4118 } else { 4119 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 4120 } 4121 } 4122 return true; 4123 } 4124 4125 static bool trans_LD_single_repl(DisasContext *s, arg_LD_single_repl *a) 4126 { 4127 int xs, total, rt; 4128 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 4129 MemOp mop; 4130 4131 if (!a->p && a->rm != 0) { 4132 return false; 4133 } 4134 if (!fp_access_check(s)) { 4135 return true; 4136 } 4137 4138 if (a->rn == 31) { 4139 gen_check_sp_alignment(s); 4140 } 4141 4142 total = a->selem << a->scale; 4143 tcg_rn = cpu_reg_sp(s, a->rn); 4144 4145 mop = finalize_memop_asimd(s, a->scale); 4146 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, 4147 total, mop); 4148 4149 tcg_ebytes = tcg_constant_i64(1 << a->scale); 4150 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { 4151 /* Load and replicate to all elements */ 4152 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 4153 4154 tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop); 4155 tcg_gen_gvec_dup_i64(a->scale, vec_full_reg_offset(s, rt), 4156 (a->q + 1) * 8, vec_full_reg_size(s), tcg_tmp); 4157 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 4158 } 4159 4160 if (a->p) { 4161 if (a->rm == 31) { 4162 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 4163 } else { 4164 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 4165 } 4166 } 4167 return true; 4168 } 4169 4170 static bool trans_STZGM(DisasContext *s, arg_ldst_tag *a) 4171 { 4172 TCGv_i64 addr, clean_addr, tcg_rt; 4173 int size = 4 << s->dcz_blocksize; 4174 4175 if (!dc_isar_feature(aa64_mte, s)) { 4176 return false; 4177 } 4178 if (s->current_el == 0) { 4179 return false; 4180 } 4181 4182 if (a->rn == 31) { 4183 gen_check_sp_alignment(s); 4184 } 4185 4186 addr = read_cpu_reg_sp(s, a->rn, true); 4187 tcg_gen_addi_i64(addr, addr, a->imm); 4188 tcg_rt = cpu_reg(s, a->rt); 4189 4190 if (s->ata[0]) { 4191 gen_helper_stzgm_tags(tcg_env, addr, tcg_rt); 4192 } 4193 /* 4194 * The non-tags portion of STZGM is mostly like DC_ZVA, 4195 * except the alignment happens before the access. 4196 */ 4197 clean_addr = clean_data_tbi(s, addr); 4198 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 4199 gen_helper_dc_zva(tcg_env, clean_addr); 4200 return true; 4201 } 4202 4203 static bool trans_STGM(DisasContext *s, arg_ldst_tag *a) 4204 { 4205 TCGv_i64 addr, clean_addr, tcg_rt; 4206 4207 if (!dc_isar_feature(aa64_mte, s)) { 4208 return false; 4209 } 4210 if (s->current_el == 0) { 4211 return false; 4212 } 4213 4214 if (a->rn == 31) { 4215 gen_check_sp_alignment(s); 4216 } 4217 4218 addr = read_cpu_reg_sp(s, a->rn, true); 4219 tcg_gen_addi_i64(addr, addr, a->imm); 4220 tcg_rt = cpu_reg(s, a->rt); 4221 4222 if (s->ata[0]) { 4223 gen_helper_stgm(tcg_env, addr, tcg_rt); 4224 } else { 4225 MMUAccessType acc = MMU_DATA_STORE; 4226 int size = 4 << s->gm_blocksize; 4227 4228 clean_addr = clean_data_tbi(s, addr); 4229 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 4230 gen_probe_access(s, clean_addr, acc, size); 4231 } 4232 return true; 4233 } 4234 4235 static bool trans_LDGM(DisasContext *s, arg_ldst_tag *a) 4236 { 4237 TCGv_i64 addr, clean_addr, tcg_rt; 4238 4239 if (!dc_isar_feature(aa64_mte, s)) { 4240 return false; 4241 } 4242 if (s->current_el == 0) { 4243 return false; 4244 } 4245 4246 if (a->rn == 31) { 4247 gen_check_sp_alignment(s); 4248 } 4249 4250 addr = read_cpu_reg_sp(s, a->rn, true); 4251 tcg_gen_addi_i64(addr, addr, a->imm); 4252 tcg_rt = cpu_reg(s, a->rt); 4253 4254 if (s->ata[0]) { 4255 gen_helper_ldgm(tcg_rt, tcg_env, addr); 4256 } else { 4257 MMUAccessType acc = MMU_DATA_LOAD; 4258 int size = 4 << s->gm_blocksize; 4259 4260 clean_addr = clean_data_tbi(s, addr); 4261 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 4262 gen_probe_access(s, clean_addr, acc, size); 4263 /* The result tags are zeros. */ 4264 tcg_gen_movi_i64(tcg_rt, 0); 4265 } 4266 return true; 4267 } 4268 4269 static bool trans_LDG(DisasContext *s, arg_ldst_tag *a) 4270 { 4271 TCGv_i64 addr, clean_addr, tcg_rt; 4272 4273 if (!dc_isar_feature(aa64_mte_insn_reg, s)) { 4274 return false; 4275 } 4276 4277 if (a->rn == 31) { 4278 gen_check_sp_alignment(s); 4279 } 4280 4281 addr = read_cpu_reg_sp(s, a->rn, true); 4282 if (!a->p) { 4283 /* pre-index or signed offset */ 4284 tcg_gen_addi_i64(addr, addr, a->imm); 4285 } 4286 4287 tcg_gen_andi_i64(addr, addr, -TAG_GRANULE); 4288 tcg_rt = cpu_reg(s, a->rt); 4289 if (s->ata[0]) { 4290 gen_helper_ldg(tcg_rt, tcg_env, addr, tcg_rt); 4291 } else { 4292 /* 4293 * Tag access disabled: we must check for aborts on the load 4294 * load from [rn+offset], and then insert a 0 tag into rt. 4295 */ 4296 clean_addr = clean_data_tbi(s, addr); 4297 gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8); 4298 gen_address_with_allocation_tag0(tcg_rt, tcg_rt); 4299 } 4300 4301 if (a->w) { 4302 /* pre-index or post-index */ 4303 if (a->p) { 4304 /* post-index */ 4305 tcg_gen_addi_i64(addr, addr, a->imm); 4306 } 4307 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr); 4308 } 4309 return true; 4310 } 4311 4312 static bool do_STG(DisasContext *s, arg_ldst_tag *a, bool is_zero, bool is_pair) 4313 { 4314 TCGv_i64 addr, tcg_rt; 4315 4316 if (a->rn == 31) { 4317 gen_check_sp_alignment(s); 4318 } 4319 4320 addr = read_cpu_reg_sp(s, a->rn, true); 4321 if (!a->p) { 4322 /* pre-index or signed offset */ 4323 tcg_gen_addi_i64(addr, addr, a->imm); 4324 } 4325 tcg_rt = cpu_reg_sp(s, a->rt); 4326 if (!s->ata[0]) { 4327 /* 4328 * For STG and ST2G, we need to check alignment and probe memory. 4329 * TODO: For STZG and STZ2G, we could rely on the stores below, 4330 * at least for system mode; user-only won't enforce alignment. 4331 */ 4332 if (is_pair) { 4333 gen_helper_st2g_stub(tcg_env, addr); 4334 } else { 4335 gen_helper_stg_stub(tcg_env, addr); 4336 } 4337 } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { 4338 if (is_pair) { 4339 gen_helper_st2g_parallel(tcg_env, addr, tcg_rt); 4340 } else { 4341 gen_helper_stg_parallel(tcg_env, addr, tcg_rt); 4342 } 4343 } else { 4344 if (is_pair) { 4345 gen_helper_st2g(tcg_env, addr, tcg_rt); 4346 } else { 4347 gen_helper_stg(tcg_env, addr, tcg_rt); 4348 } 4349 } 4350 4351 if (is_zero) { 4352 TCGv_i64 clean_addr = clean_data_tbi(s, addr); 4353 TCGv_i64 zero64 = tcg_constant_i64(0); 4354 TCGv_i128 zero128 = tcg_temp_new_i128(); 4355 int mem_index = get_mem_index(s); 4356 MemOp mop = finalize_memop(s, MO_128 | MO_ALIGN); 4357 4358 tcg_gen_concat_i64_i128(zero128, zero64, zero64); 4359 4360 /* This is 1 or 2 atomic 16-byte operations. */ 4361 tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop); 4362 if (is_pair) { 4363 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4364 tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop); 4365 } 4366 } 4367 4368 if (a->w) { 4369 /* pre-index or post-index */ 4370 if (a->p) { 4371 /* post-index */ 4372 tcg_gen_addi_i64(addr, addr, a->imm); 4373 } 4374 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr); 4375 } 4376 return true; 4377 } 4378 4379 TRANS_FEAT(STG, aa64_mte_insn_reg, do_STG, a, false, false) 4380 TRANS_FEAT(STZG, aa64_mte_insn_reg, do_STG, a, true, false) 4381 TRANS_FEAT(ST2G, aa64_mte_insn_reg, do_STG, a, false, true) 4382 TRANS_FEAT(STZ2G, aa64_mte_insn_reg, do_STG, a, true, true) 4383 4384 typedef void SetFn(TCGv_env, TCGv_i32, TCGv_i32); 4385 4386 static bool do_SET(DisasContext *s, arg_set *a, bool is_epilogue, 4387 bool is_setg, SetFn fn) 4388 { 4389 int memidx; 4390 uint32_t syndrome, desc = 0; 4391 4392 if (is_setg && !dc_isar_feature(aa64_mte, s)) { 4393 return false; 4394 } 4395 4396 /* 4397 * UNPREDICTABLE cases: we choose to UNDEF, which allows 4398 * us to pull this check before the CheckMOPSEnabled() test 4399 * (which we do in the helper function) 4400 */ 4401 if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd || 4402 a->rd == 31 || a->rn == 31) { 4403 return false; 4404 } 4405 4406 memidx = get_a64_user_mem_index(s, a->unpriv); 4407 4408 /* 4409 * We pass option_a == true, matching our implementation; 4410 * we pass wrong_option == false: helper function may set that bit. 4411 */ 4412 syndrome = syn_mop(true, is_setg, (a->nontemp << 1) | a->unpriv, 4413 is_epilogue, false, true, a->rd, a->rs, a->rn); 4414 4415 if (is_setg ? s->ata[a->unpriv] : s->mte_active[a->unpriv]) { 4416 /* We may need to do MTE tag checking, so assemble the descriptor */ 4417 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 4418 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 4419 desc = FIELD_DP32(desc, MTEDESC, WRITE, true); 4420 /* SIZEM1 and ALIGN we leave 0 (byte write) */ 4421 } 4422 /* The helper function always needs the memidx even with MTE disabled */ 4423 desc = FIELD_DP32(desc, MTEDESC, MIDX, memidx); 4424 4425 /* 4426 * The helper needs the register numbers, but since they're in 4427 * the syndrome anyway, we let it extract them from there rather 4428 * than passing in an extra three integer arguments. 4429 */ 4430 fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(desc)); 4431 return true; 4432 } 4433 4434 TRANS_FEAT(SETP, aa64_mops, do_SET, a, false, false, gen_helper_setp) 4435 TRANS_FEAT(SETM, aa64_mops, do_SET, a, false, false, gen_helper_setm) 4436 TRANS_FEAT(SETE, aa64_mops, do_SET, a, true, false, gen_helper_sete) 4437 TRANS_FEAT(SETGP, aa64_mops, do_SET, a, false, true, gen_helper_setgp) 4438 TRANS_FEAT(SETGM, aa64_mops, do_SET, a, false, true, gen_helper_setgm) 4439 TRANS_FEAT(SETGE, aa64_mops, do_SET, a, true, true, gen_helper_setge) 4440 4441 typedef void CpyFn(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32); 4442 4443 static bool do_CPY(DisasContext *s, arg_cpy *a, bool is_epilogue, CpyFn fn) 4444 { 4445 int rmemidx, wmemidx; 4446 uint32_t syndrome, rdesc = 0, wdesc = 0; 4447 bool wunpriv = extract32(a->options, 0, 1); 4448 bool runpriv = extract32(a->options, 1, 1); 4449 4450 /* 4451 * UNPREDICTABLE cases: we choose to UNDEF, which allows 4452 * us to pull this check before the CheckMOPSEnabled() test 4453 * (which we do in the helper function) 4454 */ 4455 if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd || 4456 a->rd == 31 || a->rs == 31 || a->rn == 31) { 4457 return false; 4458 } 4459 4460 rmemidx = get_a64_user_mem_index(s, runpriv); 4461 wmemidx = get_a64_user_mem_index(s, wunpriv); 4462 4463 /* 4464 * We pass option_a == true, matching our implementation; 4465 * we pass wrong_option == false: helper function may set that bit. 4466 */ 4467 syndrome = syn_mop(false, false, a->options, is_epilogue, 4468 false, true, a->rd, a->rs, a->rn); 4469 4470 /* If we need to do MTE tag checking, assemble the descriptors */ 4471 if (s->mte_active[runpriv]) { 4472 rdesc = FIELD_DP32(rdesc, MTEDESC, TBI, s->tbid); 4473 rdesc = FIELD_DP32(rdesc, MTEDESC, TCMA, s->tcma); 4474 } 4475 if (s->mte_active[wunpriv]) { 4476 wdesc = FIELD_DP32(wdesc, MTEDESC, TBI, s->tbid); 4477 wdesc = FIELD_DP32(wdesc, MTEDESC, TCMA, s->tcma); 4478 wdesc = FIELD_DP32(wdesc, MTEDESC, WRITE, true); 4479 } 4480 /* The helper function needs these parts of the descriptor regardless */ 4481 rdesc = FIELD_DP32(rdesc, MTEDESC, MIDX, rmemidx); 4482 wdesc = FIELD_DP32(wdesc, MTEDESC, MIDX, wmemidx); 4483 4484 /* 4485 * The helper needs the register numbers, but since they're in 4486 * the syndrome anyway, we let it extract them from there rather 4487 * than passing in an extra three integer arguments. 4488 */ 4489 fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(wdesc), 4490 tcg_constant_i32(rdesc)); 4491 return true; 4492 } 4493 4494 TRANS_FEAT(CPYP, aa64_mops, do_CPY, a, false, gen_helper_cpyp) 4495 TRANS_FEAT(CPYM, aa64_mops, do_CPY, a, false, gen_helper_cpym) 4496 TRANS_FEAT(CPYE, aa64_mops, do_CPY, a, true, gen_helper_cpye) 4497 TRANS_FEAT(CPYFP, aa64_mops, do_CPY, a, false, gen_helper_cpyfp) 4498 TRANS_FEAT(CPYFM, aa64_mops, do_CPY, a, false, gen_helper_cpyfm) 4499 TRANS_FEAT(CPYFE, aa64_mops, do_CPY, a, true, gen_helper_cpyfe) 4500 4501 typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64); 4502 4503 static bool gen_rri(DisasContext *s, arg_rri_sf *a, 4504 bool rd_sp, bool rn_sp, ArithTwoOp *fn) 4505 { 4506 TCGv_i64 tcg_rn = rn_sp ? cpu_reg_sp(s, a->rn) : cpu_reg(s, a->rn); 4507 TCGv_i64 tcg_rd = rd_sp ? cpu_reg_sp(s, a->rd) : cpu_reg(s, a->rd); 4508 TCGv_i64 tcg_imm = tcg_constant_i64(a->imm); 4509 4510 fn(tcg_rd, tcg_rn, tcg_imm); 4511 if (!a->sf) { 4512 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4513 } 4514 return true; 4515 } 4516 4517 /* 4518 * PC-rel. addressing 4519 */ 4520 4521 static bool trans_ADR(DisasContext *s, arg_ri *a) 4522 { 4523 gen_pc_plus_diff(s, cpu_reg(s, a->rd), a->imm); 4524 return true; 4525 } 4526 4527 static bool trans_ADRP(DisasContext *s, arg_ri *a) 4528 { 4529 int64_t offset = (int64_t)a->imm << 12; 4530 4531 /* The page offset is ok for CF_PCREL. */ 4532 offset -= s->pc_curr & 0xfff; 4533 gen_pc_plus_diff(s, cpu_reg(s, a->rd), offset); 4534 return true; 4535 } 4536 4537 /* 4538 * Add/subtract (immediate) 4539 */ 4540 TRANS(ADD_i, gen_rri, a, 1, 1, tcg_gen_add_i64) 4541 TRANS(SUB_i, gen_rri, a, 1, 1, tcg_gen_sub_i64) 4542 TRANS(ADDS_i, gen_rri, a, 0, 1, a->sf ? gen_add64_CC : gen_add32_CC) 4543 TRANS(SUBS_i, gen_rri, a, 0, 1, a->sf ? gen_sub64_CC : gen_sub32_CC) 4544 4545 /* 4546 * Add/subtract (immediate, with tags) 4547 */ 4548 4549 static bool gen_add_sub_imm_with_tags(DisasContext *s, arg_rri_tag *a, 4550 bool sub_op) 4551 { 4552 TCGv_i64 tcg_rn, tcg_rd; 4553 int imm; 4554 4555 imm = a->uimm6 << LOG2_TAG_GRANULE; 4556 if (sub_op) { 4557 imm = -imm; 4558 } 4559 4560 tcg_rn = cpu_reg_sp(s, a->rn); 4561 tcg_rd = cpu_reg_sp(s, a->rd); 4562 4563 if (s->ata[0]) { 4564 gen_helper_addsubg(tcg_rd, tcg_env, tcg_rn, 4565 tcg_constant_i32(imm), 4566 tcg_constant_i32(a->uimm4)); 4567 } else { 4568 tcg_gen_addi_i64(tcg_rd, tcg_rn, imm); 4569 gen_address_with_allocation_tag0(tcg_rd, tcg_rd); 4570 } 4571 return true; 4572 } 4573 4574 TRANS_FEAT(ADDG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, false) 4575 TRANS_FEAT(SUBG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, true) 4576 4577 /* The input should be a value in the bottom e bits (with higher 4578 * bits zero); returns that value replicated into every element 4579 * of size e in a 64 bit integer. 4580 */ 4581 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e) 4582 { 4583 assert(e != 0); 4584 while (e < 64) { 4585 mask |= mask << e; 4586 e *= 2; 4587 } 4588 return mask; 4589 } 4590 4591 /* 4592 * Logical (immediate) 4593 */ 4594 4595 /* 4596 * Simplified variant of pseudocode DecodeBitMasks() for the case where we 4597 * only require the wmask. Returns false if the imms/immr/immn are a reserved 4598 * value (ie should cause a guest UNDEF exception), and true if they are 4599 * valid, in which case the decoded bit pattern is written to result. 4600 */ 4601 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn, 4602 unsigned int imms, unsigned int immr) 4603 { 4604 uint64_t mask; 4605 unsigned e, levels, s, r; 4606 int len; 4607 4608 assert(immn < 2 && imms < 64 && immr < 64); 4609 4610 /* The bit patterns we create here are 64 bit patterns which 4611 * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or 4612 * 64 bits each. Each element contains the same value: a run 4613 * of between 1 and e-1 non-zero bits, rotated within the 4614 * element by between 0 and e-1 bits. 4615 * 4616 * The element size and run length are encoded into immn (1 bit) 4617 * and imms (6 bits) as follows: 4618 * 64 bit elements: immn = 1, imms = <length of run - 1> 4619 * 32 bit elements: immn = 0, imms = 0 : <length of run - 1> 4620 * 16 bit elements: immn = 0, imms = 10 : <length of run - 1> 4621 * 8 bit elements: immn = 0, imms = 110 : <length of run - 1> 4622 * 4 bit elements: immn = 0, imms = 1110 : <length of run - 1> 4623 * 2 bit elements: immn = 0, imms = 11110 : <length of run - 1> 4624 * Notice that immn = 0, imms = 11111x is the only combination 4625 * not covered by one of the above options; this is reserved. 4626 * Further, <length of run - 1> all-ones is a reserved pattern. 4627 * 4628 * In all cases the rotation is by immr % e (and immr is 6 bits). 4629 */ 4630 4631 /* First determine the element size */ 4632 len = 31 - clz32((immn << 6) | (~imms & 0x3f)); 4633 if (len < 1) { 4634 /* This is the immn == 0, imms == 0x11111x case */ 4635 return false; 4636 } 4637 e = 1 << len; 4638 4639 levels = e - 1; 4640 s = imms & levels; 4641 r = immr & levels; 4642 4643 if (s == levels) { 4644 /* <length of run - 1> mustn't be all-ones. */ 4645 return false; 4646 } 4647 4648 /* Create the value of one element: s+1 set bits rotated 4649 * by r within the element (which is e bits wide)... 4650 */ 4651 mask = MAKE_64BIT_MASK(0, s + 1); 4652 if (r) { 4653 mask = (mask >> r) | (mask << (e - r)); 4654 mask &= MAKE_64BIT_MASK(0, e); 4655 } 4656 /* ...then replicate the element over the whole 64 bit value */ 4657 mask = bitfield_replicate(mask, e); 4658 *result = mask; 4659 return true; 4660 } 4661 4662 static bool gen_rri_log(DisasContext *s, arg_rri_log *a, bool set_cc, 4663 void (*fn)(TCGv_i64, TCGv_i64, int64_t)) 4664 { 4665 TCGv_i64 tcg_rd, tcg_rn; 4666 uint64_t imm; 4667 4668 /* Some immediate field values are reserved. */ 4669 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1), 4670 extract32(a->dbm, 0, 6), 4671 extract32(a->dbm, 6, 6))) { 4672 return false; 4673 } 4674 if (!a->sf) { 4675 imm &= 0xffffffffull; 4676 } 4677 4678 tcg_rd = set_cc ? cpu_reg(s, a->rd) : cpu_reg_sp(s, a->rd); 4679 tcg_rn = cpu_reg(s, a->rn); 4680 4681 fn(tcg_rd, tcg_rn, imm); 4682 if (set_cc) { 4683 gen_logic_CC(a->sf, tcg_rd); 4684 } 4685 if (!a->sf) { 4686 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4687 } 4688 return true; 4689 } 4690 4691 TRANS(AND_i, gen_rri_log, a, false, tcg_gen_andi_i64) 4692 TRANS(ORR_i, gen_rri_log, a, false, tcg_gen_ori_i64) 4693 TRANS(EOR_i, gen_rri_log, a, false, tcg_gen_xori_i64) 4694 TRANS(ANDS_i, gen_rri_log, a, true, tcg_gen_andi_i64) 4695 4696 /* 4697 * Move wide (immediate) 4698 */ 4699 4700 static bool trans_MOVZ(DisasContext *s, arg_movw *a) 4701 { 4702 int pos = a->hw << 4; 4703 tcg_gen_movi_i64(cpu_reg(s, a->rd), (uint64_t)a->imm << pos); 4704 return true; 4705 } 4706 4707 static bool trans_MOVN(DisasContext *s, arg_movw *a) 4708 { 4709 int pos = a->hw << 4; 4710 uint64_t imm = a->imm; 4711 4712 imm = ~(imm << pos); 4713 if (!a->sf) { 4714 imm = (uint32_t)imm; 4715 } 4716 tcg_gen_movi_i64(cpu_reg(s, a->rd), imm); 4717 return true; 4718 } 4719 4720 static bool trans_MOVK(DisasContext *s, arg_movw *a) 4721 { 4722 int pos = a->hw << 4; 4723 TCGv_i64 tcg_rd, tcg_im; 4724 4725 tcg_rd = cpu_reg(s, a->rd); 4726 tcg_im = tcg_constant_i64(a->imm); 4727 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_im, pos, 16); 4728 if (!a->sf) { 4729 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4730 } 4731 return true; 4732 } 4733 4734 /* 4735 * Bitfield 4736 */ 4737 4738 static bool trans_SBFM(DisasContext *s, arg_SBFM *a) 4739 { 4740 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4741 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4742 unsigned int bitsize = a->sf ? 64 : 32; 4743 unsigned int ri = a->immr; 4744 unsigned int si = a->imms; 4745 unsigned int pos, len; 4746 4747 if (si >= ri) { 4748 /* Wd<s-r:0> = Wn<s:r> */ 4749 len = (si - ri) + 1; 4750 tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len); 4751 if (!a->sf) { 4752 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4753 } 4754 } else { 4755 /* Wd<32+s-r,32-r> = Wn<s:0> */ 4756 len = si + 1; 4757 pos = (bitsize - ri) & (bitsize - 1); 4758 4759 if (len < ri) { 4760 /* 4761 * Sign extend the destination field from len to fill the 4762 * balance of the word. Let the deposit below insert all 4763 * of those sign bits. 4764 */ 4765 tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len); 4766 len = ri; 4767 } 4768 4769 /* 4770 * We start with zero, and we haven't modified any bits outside 4771 * bitsize, therefore no final zero-extension is unneeded for !sf. 4772 */ 4773 tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len); 4774 } 4775 return true; 4776 } 4777 4778 static bool trans_UBFM(DisasContext *s, arg_UBFM *a) 4779 { 4780 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4781 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4782 unsigned int bitsize = a->sf ? 64 : 32; 4783 unsigned int ri = a->immr; 4784 unsigned int si = a->imms; 4785 unsigned int pos, len; 4786 4787 tcg_rd = cpu_reg(s, a->rd); 4788 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4789 4790 if (si >= ri) { 4791 /* Wd<s-r:0> = Wn<s:r> */ 4792 len = (si - ri) + 1; 4793 tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len); 4794 } else { 4795 /* Wd<32+s-r,32-r> = Wn<s:0> */ 4796 len = si + 1; 4797 pos = (bitsize - ri) & (bitsize - 1); 4798 tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len); 4799 } 4800 return true; 4801 } 4802 4803 static bool trans_BFM(DisasContext *s, arg_BFM *a) 4804 { 4805 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4806 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4807 unsigned int bitsize = a->sf ? 64 : 32; 4808 unsigned int ri = a->immr; 4809 unsigned int si = a->imms; 4810 unsigned int pos, len; 4811 4812 tcg_rd = cpu_reg(s, a->rd); 4813 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4814 4815 if (si >= ri) { 4816 /* Wd<s-r:0> = Wn<s:r> */ 4817 tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri); 4818 len = (si - ri) + 1; 4819 pos = 0; 4820 } else { 4821 /* Wd<32+s-r,32-r> = Wn<s:0> */ 4822 len = si + 1; 4823 pos = (bitsize - ri) & (bitsize - 1); 4824 } 4825 4826 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len); 4827 if (!a->sf) { 4828 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4829 } 4830 return true; 4831 } 4832 4833 static bool trans_EXTR(DisasContext *s, arg_extract *a) 4834 { 4835 TCGv_i64 tcg_rd, tcg_rm, tcg_rn; 4836 4837 tcg_rd = cpu_reg(s, a->rd); 4838 4839 if (unlikely(a->imm == 0)) { 4840 /* 4841 * tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts, 4842 * so an extract from bit 0 is a special case. 4843 */ 4844 if (a->sf) { 4845 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, a->rm)); 4846 } else { 4847 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, a->rm)); 4848 } 4849 } else { 4850 tcg_rm = cpu_reg(s, a->rm); 4851 tcg_rn = cpu_reg(s, a->rn); 4852 4853 if (a->sf) { 4854 /* Specialization to ROR happens in EXTRACT2. */ 4855 tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, a->imm); 4856 } else { 4857 TCGv_i32 t0 = tcg_temp_new_i32(); 4858 4859 tcg_gen_extrl_i64_i32(t0, tcg_rm); 4860 if (a->rm == a->rn) { 4861 tcg_gen_rotri_i32(t0, t0, a->imm); 4862 } else { 4863 TCGv_i32 t1 = tcg_temp_new_i32(); 4864 tcg_gen_extrl_i64_i32(t1, tcg_rn); 4865 tcg_gen_extract2_i32(t0, t0, t1, a->imm); 4866 } 4867 tcg_gen_extu_i32_i64(tcg_rd, t0); 4868 } 4869 } 4870 return true; 4871 } 4872 4873 static bool trans_TBL_TBX(DisasContext *s, arg_TBL_TBX *a) 4874 { 4875 if (fp_access_check(s)) { 4876 int len = (a->len + 1) * 16; 4877 4878 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd), 4879 vec_full_reg_offset(s, a->rm), tcg_env, 4880 a->q ? 16 : 8, vec_full_reg_size(s), 4881 (len << 6) | (a->tbx << 5) | a->rn, 4882 gen_helper_simd_tblx); 4883 } 4884 return true; 4885 } 4886 4887 typedef int simd_permute_idx_fn(int i, int part, int elements); 4888 4889 static bool do_simd_permute(DisasContext *s, arg_qrrr_e *a, 4890 simd_permute_idx_fn *fn, int part) 4891 { 4892 MemOp esz = a->esz; 4893 int datasize = a->q ? 16 : 8; 4894 int elements = datasize >> esz; 4895 TCGv_i64 tcg_res[2], tcg_ele; 4896 4897 if (esz == MO_64 && !a->q) { 4898 return false; 4899 } 4900 if (!fp_access_check(s)) { 4901 return true; 4902 } 4903 4904 tcg_res[0] = tcg_temp_new_i64(); 4905 tcg_res[1] = a->q ? tcg_temp_new_i64() : NULL; 4906 tcg_ele = tcg_temp_new_i64(); 4907 4908 for (int i = 0; i < elements; i++) { 4909 int o, w, idx; 4910 4911 idx = fn(i, part, elements); 4912 read_vec_element(s, tcg_ele, (idx & elements ? a->rm : a->rn), 4913 idx & (elements - 1), esz); 4914 4915 w = (i << (esz + 3)) / 64; 4916 o = (i << (esz + 3)) % 64; 4917 if (o == 0) { 4918 tcg_gen_mov_i64(tcg_res[w], tcg_ele); 4919 } else { 4920 tcg_gen_deposit_i64(tcg_res[w], tcg_res[w], tcg_ele, o, 8 << esz); 4921 } 4922 } 4923 4924 for (int i = a->q; i >= 0; --i) { 4925 write_vec_element(s, tcg_res[i], a->rd, i, MO_64); 4926 } 4927 clear_vec_high(s, a->q, a->rd); 4928 return true; 4929 } 4930 4931 static int permute_load_uzp(int i, int part, int elements) 4932 { 4933 return 2 * i + part; 4934 } 4935 4936 TRANS(UZP1, do_simd_permute, a, permute_load_uzp, 0) 4937 TRANS(UZP2, do_simd_permute, a, permute_load_uzp, 1) 4938 4939 static int permute_load_trn(int i, int part, int elements) 4940 { 4941 return (i & 1) * elements + (i & ~1) + part; 4942 } 4943 4944 TRANS(TRN1, do_simd_permute, a, permute_load_trn, 0) 4945 TRANS(TRN2, do_simd_permute, a, permute_load_trn, 1) 4946 4947 static int permute_load_zip(int i, int part, int elements) 4948 { 4949 return (i & 1) * elements + ((part * elements + i) >> 1); 4950 } 4951 4952 TRANS(ZIP1, do_simd_permute, a, permute_load_zip, 0) 4953 TRANS(ZIP2, do_simd_permute, a, permute_load_zip, 1) 4954 4955 /* 4956 * Cryptographic AES, SHA, SHA512 4957 */ 4958 4959 TRANS_FEAT(AESE, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aese) 4960 TRANS_FEAT(AESD, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aesd) 4961 TRANS_FEAT(AESMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesmc) 4962 TRANS_FEAT(AESIMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesimc) 4963 4964 TRANS_FEAT(SHA1C, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1c) 4965 TRANS_FEAT(SHA1P, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1p) 4966 TRANS_FEAT(SHA1M, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1m) 4967 TRANS_FEAT(SHA1SU0, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1su0) 4968 4969 TRANS_FEAT(SHA256H, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h) 4970 TRANS_FEAT(SHA256H2, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h2) 4971 TRANS_FEAT(SHA256SU1, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256su1) 4972 4973 TRANS_FEAT(SHA1H, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1h) 4974 TRANS_FEAT(SHA1SU1, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1su1) 4975 TRANS_FEAT(SHA256SU0, aa64_sha256, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha256su0) 4976 4977 TRANS_FEAT(SHA512H, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h) 4978 TRANS_FEAT(SHA512H2, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h2) 4979 TRANS_FEAT(SHA512SU1, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512su1) 4980 TRANS_FEAT(RAX1, aa64_sha3, do_gvec_fn3, a, gen_gvec_rax1) 4981 TRANS_FEAT(SM3PARTW1, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw1) 4982 TRANS_FEAT(SM3PARTW2, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw2) 4983 TRANS_FEAT(SM4EKEY, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4ekey) 4984 4985 TRANS_FEAT(SHA512SU0, aa64_sha512, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha512su0) 4986 TRANS_FEAT(SM4E, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4e) 4987 4988 TRANS_FEAT(EOR3, aa64_sha3, do_gvec_fn4, a, gen_gvec_eor3) 4989 TRANS_FEAT(BCAX, aa64_sha3, do_gvec_fn4, a, gen_gvec_bcax) 4990 4991 static bool trans_SM3SS1(DisasContext *s, arg_SM3SS1 *a) 4992 { 4993 if (!dc_isar_feature(aa64_sm3, s)) { 4994 return false; 4995 } 4996 if (fp_access_check(s)) { 4997 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 4998 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 4999 TCGv_i32 tcg_op3 = tcg_temp_new_i32(); 5000 TCGv_i32 tcg_res = tcg_temp_new_i32(); 5001 5002 read_vec_element_i32(s, tcg_op1, a->rn, 3, MO_32); 5003 read_vec_element_i32(s, tcg_op2, a->rm, 3, MO_32); 5004 read_vec_element_i32(s, tcg_op3, a->ra, 3, MO_32); 5005 5006 tcg_gen_rotri_i32(tcg_res, tcg_op1, 20); 5007 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2); 5008 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3); 5009 tcg_gen_rotri_i32(tcg_res, tcg_res, 25); 5010 5011 /* Clear the whole register first, then store bits [127:96]. */ 5012 clear_vec(s, a->rd); 5013 write_vec_element_i32(s, tcg_res, a->rd, 3, MO_32); 5014 } 5015 return true; 5016 } 5017 5018 static bool do_crypto3i(DisasContext *s, arg_crypto3i *a, gen_helper_gvec_3 *fn) 5019 { 5020 if (fp_access_check(s)) { 5021 gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->imm, fn); 5022 } 5023 return true; 5024 } 5025 TRANS_FEAT(SM3TT1A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1a) 5026 TRANS_FEAT(SM3TT1B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1b) 5027 TRANS_FEAT(SM3TT2A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2a) 5028 TRANS_FEAT(SM3TT2B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2b) 5029 5030 static bool trans_XAR(DisasContext *s, arg_XAR *a) 5031 { 5032 if (!dc_isar_feature(aa64_sha3, s)) { 5033 return false; 5034 } 5035 if (fp_access_check(s)) { 5036 gen_gvec_xar(MO_64, vec_full_reg_offset(s, a->rd), 5037 vec_full_reg_offset(s, a->rn), 5038 vec_full_reg_offset(s, a->rm), a->imm, 16, 5039 vec_full_reg_size(s)); 5040 } 5041 return true; 5042 } 5043 5044 /* 5045 * Advanced SIMD copy 5046 */ 5047 5048 static bool decode_esz_idx(int imm, MemOp *pesz, unsigned *pidx) 5049 { 5050 unsigned esz = ctz32(imm); 5051 if (esz <= MO_64) { 5052 *pesz = esz; 5053 *pidx = imm >> (esz + 1); 5054 return true; 5055 } 5056 return false; 5057 } 5058 5059 static bool trans_DUP_element_s(DisasContext *s, arg_DUP_element_s *a) 5060 { 5061 MemOp esz; 5062 unsigned idx; 5063 5064 if (!decode_esz_idx(a->imm, &esz, &idx)) { 5065 return false; 5066 } 5067 if (fp_access_check(s)) { 5068 /* 5069 * This instruction just extracts the specified element and 5070 * zero-extends it into the bottom of the destination register. 5071 */ 5072 TCGv_i64 tmp = tcg_temp_new_i64(); 5073 read_vec_element(s, tmp, a->rn, idx, esz); 5074 write_fp_dreg(s, a->rd, tmp); 5075 } 5076 return true; 5077 } 5078 5079 static bool trans_DUP_element_v(DisasContext *s, arg_DUP_element_v *a) 5080 { 5081 MemOp esz; 5082 unsigned idx; 5083 5084 if (!decode_esz_idx(a->imm, &esz, &idx)) { 5085 return false; 5086 } 5087 if (esz == MO_64 && !a->q) { 5088 return false; 5089 } 5090 if (fp_access_check(s)) { 5091 tcg_gen_gvec_dup_mem(esz, vec_full_reg_offset(s, a->rd), 5092 vec_reg_offset(s, a->rn, idx, esz), 5093 a->q ? 16 : 8, vec_full_reg_size(s)); 5094 } 5095 return true; 5096 } 5097 5098 static bool trans_DUP_general(DisasContext *s, arg_DUP_general *a) 5099 { 5100 MemOp esz; 5101 unsigned idx; 5102 5103 if (!decode_esz_idx(a->imm, &esz, &idx)) { 5104 return false; 5105 } 5106 if (esz == MO_64 && !a->q) { 5107 return false; 5108 } 5109 if (fp_access_check(s)) { 5110 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), 5111 a->q ? 16 : 8, vec_full_reg_size(s), 5112 cpu_reg(s, a->rn)); 5113 } 5114 return true; 5115 } 5116 5117 static bool do_smov_umov(DisasContext *s, arg_SMOV *a, MemOp is_signed) 5118 { 5119 MemOp esz; 5120 unsigned idx; 5121 5122 if (!decode_esz_idx(a->imm, &esz, &idx)) { 5123 return false; 5124 } 5125 if (is_signed) { 5126 if (esz == MO_64 || (esz == MO_32 && !a->q)) { 5127 return false; 5128 } 5129 } else { 5130 if (esz == MO_64 ? !a->q : a->q) { 5131 return false; 5132 } 5133 } 5134 if (fp_access_check(s)) { 5135 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 5136 read_vec_element(s, tcg_rd, a->rn, idx, esz | is_signed); 5137 if (is_signed && !a->q) { 5138 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 5139 } 5140 } 5141 return true; 5142 } 5143 5144 TRANS(SMOV, do_smov_umov, a, MO_SIGN) 5145 TRANS(UMOV, do_smov_umov, a, 0) 5146 5147 static bool trans_INS_general(DisasContext *s, arg_INS_general *a) 5148 { 5149 MemOp esz; 5150 unsigned idx; 5151 5152 if (!decode_esz_idx(a->imm, &esz, &idx)) { 5153 return false; 5154 } 5155 if (fp_access_check(s)) { 5156 write_vec_element(s, cpu_reg(s, a->rn), a->rd, idx, esz); 5157 clear_vec_high(s, true, a->rd); 5158 } 5159 return true; 5160 } 5161 5162 static bool trans_INS_element(DisasContext *s, arg_INS_element *a) 5163 { 5164 MemOp esz; 5165 unsigned didx, sidx; 5166 5167 if (!decode_esz_idx(a->di, &esz, &didx)) { 5168 return false; 5169 } 5170 sidx = a->si >> esz; 5171 if (fp_access_check(s)) { 5172 TCGv_i64 tmp = tcg_temp_new_i64(); 5173 5174 read_vec_element(s, tmp, a->rn, sidx, esz); 5175 write_vec_element(s, tmp, a->rd, didx, esz); 5176 5177 /* INS is considered a 128-bit write for SVE. */ 5178 clear_vec_high(s, true, a->rd); 5179 } 5180 return true; 5181 } 5182 5183 /* 5184 * Advanced SIMD three same 5185 */ 5186 5187 typedef struct FPScalar { 5188 void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); 5189 void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); 5190 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr); 5191 } FPScalar; 5192 5193 static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a, 5194 const FPScalar *f, int mergereg, 5195 ARMFPStatusFlavour fpsttype) 5196 { 5197 switch (a->esz) { 5198 case MO_64: 5199 if (fp_access_check(s)) { 5200 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 5201 TCGv_i64 t1 = read_fp_dreg(s, a->rm); 5202 f->gen_d(t0, t0, t1, fpstatus_ptr(fpsttype)); 5203 write_fp_dreg_merging(s, a->rd, mergereg, t0); 5204 } 5205 break; 5206 case MO_32: 5207 if (fp_access_check(s)) { 5208 TCGv_i32 t0 = read_fp_sreg(s, a->rn); 5209 TCGv_i32 t1 = read_fp_sreg(s, a->rm); 5210 f->gen_s(t0, t0, t1, fpstatus_ptr(fpsttype)); 5211 write_fp_sreg_merging(s, a->rd, mergereg, t0); 5212 } 5213 break; 5214 case MO_16: 5215 if (!dc_isar_feature(aa64_fp16, s)) { 5216 return false; 5217 } 5218 if (fp_access_check(s)) { 5219 TCGv_i32 t0 = read_fp_hreg(s, a->rn); 5220 TCGv_i32 t1 = read_fp_hreg(s, a->rm); 5221 f->gen_h(t0, t0, t1, fpstatus_ptr(fpsttype)); 5222 write_fp_hreg_merging(s, a->rd, mergereg, t0); 5223 } 5224 break; 5225 default: 5226 return false; 5227 } 5228 return true; 5229 } 5230 5231 static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f, 5232 int mergereg) 5233 { 5234 return do_fp3_scalar_with_fpsttype(s, a, f, mergereg, 5235 a->esz == MO_16 ? 5236 FPST_A64_F16 : FPST_A64); 5237 } 5238 5239 static bool do_fp3_scalar_ah_2fn(DisasContext *s, arg_rrr_e *a, 5240 const FPScalar *fnormal, const FPScalar *fah, 5241 int mergereg) 5242 { 5243 return do_fp3_scalar_with_fpsttype(s, a, s->fpcr_ah ? fah : fnormal, 5244 mergereg, select_ah_fpst(s, a->esz)); 5245 } 5246 5247 /* Some insns need to call different helpers when FPCR.AH == 1 */ 5248 static bool do_fp3_scalar_2fn(DisasContext *s, arg_rrr_e *a, 5249 const FPScalar *fnormal, 5250 const FPScalar *fah, 5251 int mergereg) 5252 { 5253 return do_fp3_scalar(s, a, s->fpcr_ah ? fah : fnormal, mergereg); 5254 } 5255 5256 static const FPScalar f_scalar_fadd = { 5257 gen_helper_vfp_addh, 5258 gen_helper_vfp_adds, 5259 gen_helper_vfp_addd, 5260 }; 5261 TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd, a->rn) 5262 5263 static const FPScalar f_scalar_fsub = { 5264 gen_helper_vfp_subh, 5265 gen_helper_vfp_subs, 5266 gen_helper_vfp_subd, 5267 }; 5268 TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub, a->rn) 5269 5270 static const FPScalar f_scalar_fdiv = { 5271 gen_helper_vfp_divh, 5272 gen_helper_vfp_divs, 5273 gen_helper_vfp_divd, 5274 }; 5275 TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv, a->rn) 5276 5277 static const FPScalar f_scalar_fmul = { 5278 gen_helper_vfp_mulh, 5279 gen_helper_vfp_muls, 5280 gen_helper_vfp_muld, 5281 }; 5282 TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul, a->rn) 5283 5284 static const FPScalar f_scalar_fmax = { 5285 gen_helper_vfp_maxh, 5286 gen_helper_vfp_maxs, 5287 gen_helper_vfp_maxd, 5288 }; 5289 static const FPScalar f_scalar_fmax_ah = { 5290 gen_helper_vfp_ah_maxh, 5291 gen_helper_vfp_ah_maxs, 5292 gen_helper_vfp_ah_maxd, 5293 }; 5294 TRANS(FMAX_s, do_fp3_scalar_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah, a->rn) 5295 5296 static const FPScalar f_scalar_fmin = { 5297 gen_helper_vfp_minh, 5298 gen_helper_vfp_mins, 5299 gen_helper_vfp_mind, 5300 }; 5301 static const FPScalar f_scalar_fmin_ah = { 5302 gen_helper_vfp_ah_minh, 5303 gen_helper_vfp_ah_mins, 5304 gen_helper_vfp_ah_mind, 5305 }; 5306 TRANS(FMIN_s, do_fp3_scalar_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah, a->rn) 5307 5308 static const FPScalar f_scalar_fmaxnm = { 5309 gen_helper_vfp_maxnumh, 5310 gen_helper_vfp_maxnums, 5311 gen_helper_vfp_maxnumd, 5312 }; 5313 TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm, a->rn) 5314 5315 static const FPScalar f_scalar_fminnm = { 5316 gen_helper_vfp_minnumh, 5317 gen_helper_vfp_minnums, 5318 gen_helper_vfp_minnumd, 5319 }; 5320 TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm, a->rn) 5321 5322 static const FPScalar f_scalar_fmulx = { 5323 gen_helper_advsimd_mulxh, 5324 gen_helper_vfp_mulxs, 5325 gen_helper_vfp_mulxd, 5326 }; 5327 TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx, a->rn) 5328 5329 static void gen_fnmul_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5330 { 5331 gen_helper_vfp_mulh(d, n, m, s); 5332 gen_vfp_negh(d, d); 5333 } 5334 5335 static void gen_fnmul_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5336 { 5337 gen_helper_vfp_muls(d, n, m, s); 5338 gen_vfp_negs(d, d); 5339 } 5340 5341 static void gen_fnmul_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) 5342 { 5343 gen_helper_vfp_muld(d, n, m, s); 5344 gen_vfp_negd(d, d); 5345 } 5346 5347 static void gen_fnmul_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5348 { 5349 gen_helper_vfp_mulh(d, n, m, s); 5350 gen_vfp_ah_negh(d, d); 5351 } 5352 5353 static void gen_fnmul_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5354 { 5355 gen_helper_vfp_muls(d, n, m, s); 5356 gen_vfp_ah_negs(d, d); 5357 } 5358 5359 static void gen_fnmul_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) 5360 { 5361 gen_helper_vfp_muld(d, n, m, s); 5362 gen_vfp_ah_negd(d, d); 5363 } 5364 5365 static const FPScalar f_scalar_fnmul = { 5366 gen_fnmul_h, 5367 gen_fnmul_s, 5368 gen_fnmul_d, 5369 }; 5370 static const FPScalar f_scalar_ah_fnmul = { 5371 gen_fnmul_ah_h, 5372 gen_fnmul_ah_s, 5373 gen_fnmul_ah_d, 5374 }; 5375 TRANS(FNMUL_s, do_fp3_scalar_2fn, a, &f_scalar_fnmul, &f_scalar_ah_fnmul, a->rn) 5376 5377 static const FPScalar f_scalar_fcmeq = { 5378 gen_helper_advsimd_ceq_f16, 5379 gen_helper_neon_ceq_f32, 5380 gen_helper_neon_ceq_f64, 5381 }; 5382 TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq, a->rm) 5383 5384 static const FPScalar f_scalar_fcmge = { 5385 gen_helper_advsimd_cge_f16, 5386 gen_helper_neon_cge_f32, 5387 gen_helper_neon_cge_f64, 5388 }; 5389 TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge, a->rm) 5390 5391 static const FPScalar f_scalar_fcmgt = { 5392 gen_helper_advsimd_cgt_f16, 5393 gen_helper_neon_cgt_f32, 5394 gen_helper_neon_cgt_f64, 5395 }; 5396 TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt, a->rm) 5397 5398 static const FPScalar f_scalar_facge = { 5399 gen_helper_advsimd_acge_f16, 5400 gen_helper_neon_acge_f32, 5401 gen_helper_neon_acge_f64, 5402 }; 5403 TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge, a->rm) 5404 5405 static const FPScalar f_scalar_facgt = { 5406 gen_helper_advsimd_acgt_f16, 5407 gen_helper_neon_acgt_f32, 5408 gen_helper_neon_acgt_f64, 5409 }; 5410 TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt, a->rm) 5411 5412 static void gen_fabd_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5413 { 5414 gen_helper_vfp_subh(d, n, m, s); 5415 gen_vfp_absh(d, d); 5416 } 5417 5418 static void gen_fabd_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5419 { 5420 gen_helper_vfp_subs(d, n, m, s); 5421 gen_vfp_abss(d, d); 5422 } 5423 5424 static void gen_fabd_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) 5425 { 5426 gen_helper_vfp_subd(d, n, m, s); 5427 gen_vfp_absd(d, d); 5428 } 5429 5430 static void gen_fabd_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5431 { 5432 gen_helper_vfp_subh(d, n, m, s); 5433 gen_vfp_ah_absh(d, d); 5434 } 5435 5436 static void gen_fabd_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5437 { 5438 gen_helper_vfp_subs(d, n, m, s); 5439 gen_vfp_ah_abss(d, d); 5440 } 5441 5442 static void gen_fabd_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) 5443 { 5444 gen_helper_vfp_subd(d, n, m, s); 5445 gen_vfp_ah_absd(d, d); 5446 } 5447 5448 static const FPScalar f_scalar_fabd = { 5449 gen_fabd_h, 5450 gen_fabd_s, 5451 gen_fabd_d, 5452 }; 5453 static const FPScalar f_scalar_ah_fabd = { 5454 gen_fabd_ah_h, 5455 gen_fabd_ah_s, 5456 gen_fabd_ah_d, 5457 }; 5458 TRANS(FABD_s, do_fp3_scalar_2fn, a, &f_scalar_fabd, &f_scalar_ah_fabd, a->rn) 5459 5460 static const FPScalar f_scalar_frecps = { 5461 gen_helper_recpsf_f16, 5462 gen_helper_recpsf_f32, 5463 gen_helper_recpsf_f64, 5464 }; 5465 static const FPScalar f_scalar_ah_frecps = { 5466 gen_helper_recpsf_ah_f16, 5467 gen_helper_recpsf_ah_f32, 5468 gen_helper_recpsf_ah_f64, 5469 }; 5470 TRANS(FRECPS_s, do_fp3_scalar_ah_2fn, a, 5471 &f_scalar_frecps, &f_scalar_ah_frecps, a->rn) 5472 5473 static const FPScalar f_scalar_frsqrts = { 5474 gen_helper_rsqrtsf_f16, 5475 gen_helper_rsqrtsf_f32, 5476 gen_helper_rsqrtsf_f64, 5477 }; 5478 static const FPScalar f_scalar_ah_frsqrts = { 5479 gen_helper_rsqrtsf_ah_f16, 5480 gen_helper_rsqrtsf_ah_f32, 5481 gen_helper_rsqrtsf_ah_f64, 5482 }; 5483 TRANS(FRSQRTS_s, do_fp3_scalar_ah_2fn, a, 5484 &f_scalar_frsqrts, &f_scalar_ah_frsqrts, a->rn) 5485 5486 static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a, 5487 const FPScalar *f, bool swap) 5488 { 5489 switch (a->esz) { 5490 case MO_64: 5491 if (fp_access_check(s)) { 5492 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 5493 TCGv_i64 t1 = tcg_constant_i64(0); 5494 if (swap) { 5495 f->gen_d(t0, t1, t0, fpstatus_ptr(FPST_A64)); 5496 } else { 5497 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); 5498 } 5499 write_fp_dreg(s, a->rd, t0); 5500 } 5501 break; 5502 case MO_32: 5503 if (fp_access_check(s)) { 5504 TCGv_i32 t0 = read_fp_sreg(s, a->rn); 5505 TCGv_i32 t1 = tcg_constant_i32(0); 5506 if (swap) { 5507 f->gen_s(t0, t1, t0, fpstatus_ptr(FPST_A64)); 5508 } else { 5509 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); 5510 } 5511 write_fp_sreg(s, a->rd, t0); 5512 } 5513 break; 5514 case MO_16: 5515 if (!dc_isar_feature(aa64_fp16, s)) { 5516 return false; 5517 } 5518 if (fp_access_check(s)) { 5519 TCGv_i32 t0 = read_fp_hreg(s, a->rn); 5520 TCGv_i32 t1 = tcg_constant_i32(0); 5521 if (swap) { 5522 f->gen_h(t0, t1, t0, fpstatus_ptr(FPST_A64_F16)); 5523 } else { 5524 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); 5525 } 5526 write_fp_sreg(s, a->rd, t0); 5527 } 5528 break; 5529 default: 5530 return false; 5531 } 5532 return true; 5533 } 5534 5535 TRANS(FCMEQ0_s, do_fcmp0_s, a, &f_scalar_fcmeq, false) 5536 TRANS(FCMGT0_s, do_fcmp0_s, a, &f_scalar_fcmgt, false) 5537 TRANS(FCMGE0_s, do_fcmp0_s, a, &f_scalar_fcmge, false) 5538 TRANS(FCMLT0_s, do_fcmp0_s, a, &f_scalar_fcmgt, true) 5539 TRANS(FCMLE0_s, do_fcmp0_s, a, &f_scalar_fcmge, true) 5540 5541 static bool do_satacc_s(DisasContext *s, arg_rrr_e *a, 5542 MemOp sgn_n, MemOp sgn_m, 5543 void (*gen_bhs)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64, MemOp), 5544 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64)) 5545 { 5546 TCGv_i64 t0, t1, t2, qc; 5547 MemOp esz = a->esz; 5548 5549 if (!fp_access_check(s)) { 5550 return true; 5551 } 5552 5553 t0 = tcg_temp_new_i64(); 5554 t1 = tcg_temp_new_i64(); 5555 t2 = tcg_temp_new_i64(); 5556 qc = tcg_temp_new_i64(); 5557 read_vec_element(s, t1, a->rn, 0, esz | sgn_n); 5558 read_vec_element(s, t2, a->rm, 0, esz | sgn_m); 5559 tcg_gen_ld_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc)); 5560 5561 if (esz == MO_64) { 5562 gen_d(t0, qc, t1, t2); 5563 } else { 5564 gen_bhs(t0, qc, t1, t2, esz); 5565 tcg_gen_ext_i64(t0, t0, esz); 5566 } 5567 5568 write_fp_dreg(s, a->rd, t0); 5569 tcg_gen_st_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc)); 5570 return true; 5571 } 5572 5573 TRANS(SQADD_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqadd_bhs, gen_sqadd_d) 5574 TRANS(SQSUB_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqsub_bhs, gen_sqsub_d) 5575 TRANS(UQADD_s, do_satacc_s, a, 0, 0, gen_uqadd_bhs, gen_uqadd_d) 5576 TRANS(UQSUB_s, do_satacc_s, a, 0, 0, gen_uqsub_bhs, gen_uqsub_d) 5577 TRANS(SUQADD_s, do_satacc_s, a, MO_SIGN, 0, gen_suqadd_bhs, gen_suqadd_d) 5578 TRANS(USQADD_s, do_satacc_s, a, 0, MO_SIGN, gen_usqadd_bhs, gen_usqadd_d) 5579 5580 static bool do_int3_scalar_d(DisasContext *s, arg_rrr_e *a, 5581 void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64)) 5582 { 5583 if (fp_access_check(s)) { 5584 TCGv_i64 t0 = tcg_temp_new_i64(); 5585 TCGv_i64 t1 = tcg_temp_new_i64(); 5586 5587 read_vec_element(s, t0, a->rn, 0, MO_64); 5588 read_vec_element(s, t1, a->rm, 0, MO_64); 5589 fn(t0, t0, t1); 5590 write_fp_dreg(s, a->rd, t0); 5591 } 5592 return true; 5593 } 5594 5595 TRANS(SSHL_s, do_int3_scalar_d, a, gen_sshl_i64) 5596 TRANS(USHL_s, do_int3_scalar_d, a, gen_ushl_i64) 5597 TRANS(SRSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_s64) 5598 TRANS(URSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_u64) 5599 TRANS(ADD_s, do_int3_scalar_d, a, tcg_gen_add_i64) 5600 TRANS(SUB_s, do_int3_scalar_d, a, tcg_gen_sub_i64) 5601 5602 typedef struct ENVScalar2 { 5603 NeonGenTwoOpEnvFn *gen_bhs[3]; 5604 NeonGenTwo64OpEnvFn *gen_d; 5605 } ENVScalar2; 5606 5607 static bool do_env_scalar2(DisasContext *s, arg_rrr_e *a, const ENVScalar2 *f) 5608 { 5609 if (!fp_access_check(s)) { 5610 return true; 5611 } 5612 if (a->esz == MO_64) { 5613 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 5614 TCGv_i64 t1 = read_fp_dreg(s, a->rm); 5615 f->gen_d(t0, tcg_env, t0, t1); 5616 write_fp_dreg(s, a->rd, t0); 5617 } else { 5618 TCGv_i32 t0 = tcg_temp_new_i32(); 5619 TCGv_i32 t1 = tcg_temp_new_i32(); 5620 5621 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 5622 read_vec_element_i32(s, t1, a->rm, 0, a->esz); 5623 f->gen_bhs[a->esz](t0, tcg_env, t0, t1); 5624 write_fp_sreg(s, a->rd, t0); 5625 } 5626 return true; 5627 } 5628 5629 static const ENVScalar2 f_scalar_sqshl = { 5630 { gen_helper_neon_qshl_s8, 5631 gen_helper_neon_qshl_s16, 5632 gen_helper_neon_qshl_s32 }, 5633 gen_helper_neon_qshl_s64, 5634 }; 5635 TRANS(SQSHL_s, do_env_scalar2, a, &f_scalar_sqshl) 5636 5637 static const ENVScalar2 f_scalar_uqshl = { 5638 { gen_helper_neon_qshl_u8, 5639 gen_helper_neon_qshl_u16, 5640 gen_helper_neon_qshl_u32 }, 5641 gen_helper_neon_qshl_u64, 5642 }; 5643 TRANS(UQSHL_s, do_env_scalar2, a, &f_scalar_uqshl) 5644 5645 static const ENVScalar2 f_scalar_sqrshl = { 5646 { gen_helper_neon_qrshl_s8, 5647 gen_helper_neon_qrshl_s16, 5648 gen_helper_neon_qrshl_s32 }, 5649 gen_helper_neon_qrshl_s64, 5650 }; 5651 TRANS(SQRSHL_s, do_env_scalar2, a, &f_scalar_sqrshl) 5652 5653 static const ENVScalar2 f_scalar_uqrshl = { 5654 { gen_helper_neon_qrshl_u8, 5655 gen_helper_neon_qrshl_u16, 5656 gen_helper_neon_qrshl_u32 }, 5657 gen_helper_neon_qrshl_u64, 5658 }; 5659 TRANS(UQRSHL_s, do_env_scalar2, a, &f_scalar_uqrshl) 5660 5661 static bool do_env_scalar2_hs(DisasContext *s, arg_rrr_e *a, 5662 const ENVScalar2 *f) 5663 { 5664 if (a->esz == MO_16 || a->esz == MO_32) { 5665 return do_env_scalar2(s, a, f); 5666 } 5667 return false; 5668 } 5669 5670 static const ENVScalar2 f_scalar_sqdmulh = { 5671 { NULL, gen_helper_neon_qdmulh_s16, gen_helper_neon_qdmulh_s32 } 5672 }; 5673 TRANS(SQDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqdmulh) 5674 5675 static const ENVScalar2 f_scalar_sqrdmulh = { 5676 { NULL, gen_helper_neon_qrdmulh_s16, gen_helper_neon_qrdmulh_s32 } 5677 }; 5678 TRANS(SQRDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqrdmulh) 5679 5680 typedef struct ENVScalar3 { 5681 NeonGenThreeOpEnvFn *gen_hs[2]; 5682 } ENVScalar3; 5683 5684 static bool do_env_scalar3_hs(DisasContext *s, arg_rrr_e *a, 5685 const ENVScalar3 *f) 5686 { 5687 TCGv_i32 t0, t1, t2; 5688 5689 if (a->esz != MO_16 && a->esz != MO_32) { 5690 return false; 5691 } 5692 if (!fp_access_check(s)) { 5693 return true; 5694 } 5695 5696 t0 = tcg_temp_new_i32(); 5697 t1 = tcg_temp_new_i32(); 5698 t2 = tcg_temp_new_i32(); 5699 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 5700 read_vec_element_i32(s, t1, a->rm, 0, a->esz); 5701 read_vec_element_i32(s, t2, a->rd, 0, a->esz); 5702 f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2); 5703 write_fp_sreg(s, a->rd, t0); 5704 return true; 5705 } 5706 5707 static const ENVScalar3 f_scalar_sqrdmlah = { 5708 { gen_helper_neon_qrdmlah_s16, gen_helper_neon_qrdmlah_s32 } 5709 }; 5710 TRANS_FEAT(SQRDMLAH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlah) 5711 5712 static const ENVScalar3 f_scalar_sqrdmlsh = { 5713 { gen_helper_neon_qrdmlsh_s16, gen_helper_neon_qrdmlsh_s32 } 5714 }; 5715 TRANS_FEAT(SQRDMLSH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlsh) 5716 5717 static bool do_cmop_d(DisasContext *s, arg_rrr_e *a, TCGCond cond) 5718 { 5719 if (fp_access_check(s)) { 5720 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 5721 TCGv_i64 t1 = read_fp_dreg(s, a->rm); 5722 tcg_gen_negsetcond_i64(cond, t0, t0, t1); 5723 write_fp_dreg(s, a->rd, t0); 5724 } 5725 return true; 5726 } 5727 5728 TRANS(CMGT_s, do_cmop_d, a, TCG_COND_GT) 5729 TRANS(CMHI_s, do_cmop_d, a, TCG_COND_GTU) 5730 TRANS(CMGE_s, do_cmop_d, a, TCG_COND_GE) 5731 TRANS(CMHS_s, do_cmop_d, a, TCG_COND_GEU) 5732 TRANS(CMEQ_s, do_cmop_d, a, TCG_COND_EQ) 5733 TRANS(CMTST_s, do_cmop_d, a, TCG_COND_TSTNE) 5734 5735 static bool do_fp3_vector_with_fpsttype(DisasContext *s, arg_qrrr_e *a, 5736 int data, 5737 gen_helper_gvec_3_ptr * const fns[3], 5738 ARMFPStatusFlavour fpsttype) 5739 { 5740 MemOp esz = a->esz; 5741 int check = fp_access_check_vector_hsd(s, a->q, esz); 5742 5743 if (check <= 0) { 5744 return check == 0; 5745 } 5746 5747 gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, fpsttype, 5748 data, fns[esz - 1]); 5749 return true; 5750 } 5751 5752 static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data, 5753 gen_helper_gvec_3_ptr * const fns[3]) 5754 { 5755 return do_fp3_vector_with_fpsttype(s, a, data, fns, 5756 a->esz == MO_16 ? 5757 FPST_A64_F16 : FPST_A64); 5758 } 5759 5760 static bool do_fp3_vector_2fn(DisasContext *s, arg_qrrr_e *a, int data, 5761 gen_helper_gvec_3_ptr * const fnormal[3], 5762 gen_helper_gvec_3_ptr * const fah[3]) 5763 { 5764 return do_fp3_vector(s, a, data, s->fpcr_ah ? fah : fnormal); 5765 } 5766 5767 static bool do_fp3_vector_ah_2fn(DisasContext *s, arg_qrrr_e *a, int data, 5768 gen_helper_gvec_3_ptr * const fnormal[3], 5769 gen_helper_gvec_3_ptr * const fah[3]) 5770 { 5771 return do_fp3_vector_with_fpsttype(s, a, data, s->fpcr_ah ? fah : fnormal, 5772 select_ah_fpst(s, a->esz)); 5773 } 5774 5775 static gen_helper_gvec_3_ptr * const f_vector_fadd[3] = { 5776 gen_helper_gvec_fadd_h, 5777 gen_helper_gvec_fadd_s, 5778 gen_helper_gvec_fadd_d, 5779 }; 5780 TRANS(FADD_v, do_fp3_vector, a, 0, f_vector_fadd) 5781 5782 static gen_helper_gvec_3_ptr * const f_vector_fsub[3] = { 5783 gen_helper_gvec_fsub_h, 5784 gen_helper_gvec_fsub_s, 5785 gen_helper_gvec_fsub_d, 5786 }; 5787 TRANS(FSUB_v, do_fp3_vector, a, 0, f_vector_fsub) 5788 5789 static gen_helper_gvec_3_ptr * const f_vector_fdiv[3] = { 5790 gen_helper_gvec_fdiv_h, 5791 gen_helper_gvec_fdiv_s, 5792 gen_helper_gvec_fdiv_d, 5793 }; 5794 TRANS(FDIV_v, do_fp3_vector, a, 0, f_vector_fdiv) 5795 5796 static gen_helper_gvec_3_ptr * const f_vector_fmul[3] = { 5797 gen_helper_gvec_fmul_h, 5798 gen_helper_gvec_fmul_s, 5799 gen_helper_gvec_fmul_d, 5800 }; 5801 TRANS(FMUL_v, do_fp3_vector, a, 0, f_vector_fmul) 5802 5803 static gen_helper_gvec_3_ptr * const f_vector_fmax[3] = { 5804 gen_helper_gvec_fmax_h, 5805 gen_helper_gvec_fmax_s, 5806 gen_helper_gvec_fmax_d, 5807 }; 5808 static gen_helper_gvec_3_ptr * const f_vector_fmax_ah[3] = { 5809 gen_helper_gvec_ah_fmax_h, 5810 gen_helper_gvec_ah_fmax_s, 5811 gen_helper_gvec_ah_fmax_d, 5812 }; 5813 TRANS(FMAX_v, do_fp3_vector_2fn, a, 0, f_vector_fmax, f_vector_fmax_ah) 5814 5815 static gen_helper_gvec_3_ptr * const f_vector_fmin[3] = { 5816 gen_helper_gvec_fmin_h, 5817 gen_helper_gvec_fmin_s, 5818 gen_helper_gvec_fmin_d, 5819 }; 5820 static gen_helper_gvec_3_ptr * const f_vector_fmin_ah[3] = { 5821 gen_helper_gvec_ah_fmin_h, 5822 gen_helper_gvec_ah_fmin_s, 5823 gen_helper_gvec_ah_fmin_d, 5824 }; 5825 TRANS(FMIN_v, do_fp3_vector_2fn, a, 0, f_vector_fmin, f_vector_fmin_ah) 5826 5827 static gen_helper_gvec_3_ptr * const f_vector_fmaxnm[3] = { 5828 gen_helper_gvec_fmaxnum_h, 5829 gen_helper_gvec_fmaxnum_s, 5830 gen_helper_gvec_fmaxnum_d, 5831 }; 5832 TRANS(FMAXNM_v, do_fp3_vector, a, 0, f_vector_fmaxnm) 5833 5834 static gen_helper_gvec_3_ptr * const f_vector_fminnm[3] = { 5835 gen_helper_gvec_fminnum_h, 5836 gen_helper_gvec_fminnum_s, 5837 gen_helper_gvec_fminnum_d, 5838 }; 5839 TRANS(FMINNM_v, do_fp3_vector, a, 0, f_vector_fminnm) 5840 5841 static gen_helper_gvec_3_ptr * const f_vector_fmulx[3] = { 5842 gen_helper_gvec_fmulx_h, 5843 gen_helper_gvec_fmulx_s, 5844 gen_helper_gvec_fmulx_d, 5845 }; 5846 TRANS(FMULX_v, do_fp3_vector, a, 0, f_vector_fmulx) 5847 5848 static gen_helper_gvec_3_ptr * const f_vector_fmla[3] = { 5849 gen_helper_gvec_vfma_h, 5850 gen_helper_gvec_vfma_s, 5851 gen_helper_gvec_vfma_d, 5852 }; 5853 TRANS(FMLA_v, do_fp3_vector, a, 0, f_vector_fmla) 5854 5855 static gen_helper_gvec_3_ptr * const f_vector_fmls[3] = { 5856 gen_helper_gvec_vfms_h, 5857 gen_helper_gvec_vfms_s, 5858 gen_helper_gvec_vfms_d, 5859 }; 5860 static gen_helper_gvec_3_ptr * const f_vector_fmls_ah[3] = { 5861 gen_helper_gvec_ah_vfms_h, 5862 gen_helper_gvec_ah_vfms_s, 5863 gen_helper_gvec_ah_vfms_d, 5864 }; 5865 TRANS(FMLS_v, do_fp3_vector_2fn, a, 0, f_vector_fmls, f_vector_fmls_ah) 5866 5867 static gen_helper_gvec_3_ptr * const f_vector_fcmeq[3] = { 5868 gen_helper_gvec_fceq_h, 5869 gen_helper_gvec_fceq_s, 5870 gen_helper_gvec_fceq_d, 5871 }; 5872 TRANS(FCMEQ_v, do_fp3_vector, a, 0, f_vector_fcmeq) 5873 5874 static gen_helper_gvec_3_ptr * const f_vector_fcmge[3] = { 5875 gen_helper_gvec_fcge_h, 5876 gen_helper_gvec_fcge_s, 5877 gen_helper_gvec_fcge_d, 5878 }; 5879 TRANS(FCMGE_v, do_fp3_vector, a, 0, f_vector_fcmge) 5880 5881 static gen_helper_gvec_3_ptr * const f_vector_fcmgt[3] = { 5882 gen_helper_gvec_fcgt_h, 5883 gen_helper_gvec_fcgt_s, 5884 gen_helper_gvec_fcgt_d, 5885 }; 5886 TRANS(FCMGT_v, do_fp3_vector, a, 0, f_vector_fcmgt) 5887 5888 static gen_helper_gvec_3_ptr * const f_vector_facge[3] = { 5889 gen_helper_gvec_facge_h, 5890 gen_helper_gvec_facge_s, 5891 gen_helper_gvec_facge_d, 5892 }; 5893 TRANS(FACGE_v, do_fp3_vector, a, 0, f_vector_facge) 5894 5895 static gen_helper_gvec_3_ptr * const f_vector_facgt[3] = { 5896 gen_helper_gvec_facgt_h, 5897 gen_helper_gvec_facgt_s, 5898 gen_helper_gvec_facgt_d, 5899 }; 5900 TRANS(FACGT_v, do_fp3_vector, a, 0, f_vector_facgt) 5901 5902 static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = { 5903 gen_helper_gvec_fabd_h, 5904 gen_helper_gvec_fabd_s, 5905 gen_helper_gvec_fabd_d, 5906 }; 5907 static gen_helper_gvec_3_ptr * const f_vector_ah_fabd[3] = { 5908 gen_helper_gvec_ah_fabd_h, 5909 gen_helper_gvec_ah_fabd_s, 5910 gen_helper_gvec_ah_fabd_d, 5911 }; 5912 TRANS(FABD_v, do_fp3_vector_2fn, a, 0, f_vector_fabd, f_vector_ah_fabd) 5913 5914 static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = { 5915 gen_helper_gvec_recps_h, 5916 gen_helper_gvec_recps_s, 5917 gen_helper_gvec_recps_d, 5918 }; 5919 static gen_helper_gvec_3_ptr * const f_vector_ah_frecps[3] = { 5920 gen_helper_gvec_ah_recps_h, 5921 gen_helper_gvec_ah_recps_s, 5922 gen_helper_gvec_ah_recps_d, 5923 }; 5924 TRANS(FRECPS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frecps, f_vector_ah_frecps) 5925 5926 static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = { 5927 gen_helper_gvec_rsqrts_h, 5928 gen_helper_gvec_rsqrts_s, 5929 gen_helper_gvec_rsqrts_d, 5930 }; 5931 static gen_helper_gvec_3_ptr * const f_vector_ah_frsqrts[3] = { 5932 gen_helper_gvec_ah_rsqrts_h, 5933 gen_helper_gvec_ah_rsqrts_s, 5934 gen_helper_gvec_ah_rsqrts_d, 5935 }; 5936 TRANS(FRSQRTS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frsqrts, f_vector_ah_frsqrts) 5937 5938 static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = { 5939 gen_helper_gvec_faddp_h, 5940 gen_helper_gvec_faddp_s, 5941 gen_helper_gvec_faddp_d, 5942 }; 5943 TRANS(FADDP_v, do_fp3_vector, a, 0, f_vector_faddp) 5944 5945 static gen_helper_gvec_3_ptr * const f_vector_fmaxp[3] = { 5946 gen_helper_gvec_fmaxp_h, 5947 gen_helper_gvec_fmaxp_s, 5948 gen_helper_gvec_fmaxp_d, 5949 }; 5950 static gen_helper_gvec_3_ptr * const f_vector_ah_fmaxp[3] = { 5951 gen_helper_gvec_ah_fmaxp_h, 5952 gen_helper_gvec_ah_fmaxp_s, 5953 gen_helper_gvec_ah_fmaxp_d, 5954 }; 5955 TRANS(FMAXP_v, do_fp3_vector_2fn, a, 0, f_vector_fmaxp, f_vector_ah_fmaxp) 5956 5957 static gen_helper_gvec_3_ptr * const f_vector_fminp[3] = { 5958 gen_helper_gvec_fminp_h, 5959 gen_helper_gvec_fminp_s, 5960 gen_helper_gvec_fminp_d, 5961 }; 5962 static gen_helper_gvec_3_ptr * const f_vector_ah_fminp[3] = { 5963 gen_helper_gvec_ah_fminp_h, 5964 gen_helper_gvec_ah_fminp_s, 5965 gen_helper_gvec_ah_fminp_d, 5966 }; 5967 TRANS(FMINP_v, do_fp3_vector_2fn, a, 0, f_vector_fminp, f_vector_ah_fminp) 5968 5969 static gen_helper_gvec_3_ptr * const f_vector_fmaxnmp[3] = { 5970 gen_helper_gvec_fmaxnump_h, 5971 gen_helper_gvec_fmaxnump_s, 5972 gen_helper_gvec_fmaxnump_d, 5973 }; 5974 TRANS(FMAXNMP_v, do_fp3_vector, a, 0, f_vector_fmaxnmp) 5975 5976 static gen_helper_gvec_3_ptr * const f_vector_fminnmp[3] = { 5977 gen_helper_gvec_fminnump_h, 5978 gen_helper_gvec_fminnump_s, 5979 gen_helper_gvec_fminnump_d, 5980 }; 5981 TRANS(FMINNMP_v, do_fp3_vector, a, 0, f_vector_fminnmp) 5982 5983 static bool do_fmlal(DisasContext *s, arg_qrrr_e *a, bool is_s, bool is_2) 5984 { 5985 if (fp_access_check(s)) { 5986 int data = (is_2 << 1) | is_s; 5987 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), 5988 vec_full_reg_offset(s, a->rn), 5989 vec_full_reg_offset(s, a->rm), tcg_env, 5990 a->q ? 16 : 8, vec_full_reg_size(s), 5991 data, gen_helper_gvec_fmlal_a64); 5992 } 5993 return true; 5994 } 5995 5996 TRANS_FEAT(FMLAL_v, aa64_fhm, do_fmlal, a, false, false) 5997 TRANS_FEAT(FMLSL_v, aa64_fhm, do_fmlal, a, true, false) 5998 TRANS_FEAT(FMLAL2_v, aa64_fhm, do_fmlal, a, false, true) 5999 TRANS_FEAT(FMLSL2_v, aa64_fhm, do_fmlal, a, true, true) 6000 6001 TRANS(ADDP_v, do_gvec_fn3, a, gen_gvec_addp) 6002 TRANS(SMAXP_v, do_gvec_fn3_no64, a, gen_gvec_smaxp) 6003 TRANS(SMINP_v, do_gvec_fn3_no64, a, gen_gvec_sminp) 6004 TRANS(UMAXP_v, do_gvec_fn3_no64, a, gen_gvec_umaxp) 6005 TRANS(UMINP_v, do_gvec_fn3_no64, a, gen_gvec_uminp) 6006 6007 TRANS(AND_v, do_gvec_fn3, a, tcg_gen_gvec_and) 6008 TRANS(BIC_v, do_gvec_fn3, a, tcg_gen_gvec_andc) 6009 TRANS(ORR_v, do_gvec_fn3, a, tcg_gen_gvec_or) 6010 TRANS(ORN_v, do_gvec_fn3, a, tcg_gen_gvec_orc) 6011 TRANS(EOR_v, do_gvec_fn3, a, tcg_gen_gvec_xor) 6012 6013 static bool do_bitsel(DisasContext *s, bool is_q, int d, int a, int b, int c) 6014 { 6015 if (fp_access_check(s)) { 6016 gen_gvec_fn4(s, is_q, d, a, b, c, tcg_gen_gvec_bitsel, 0); 6017 } 6018 return true; 6019 } 6020 6021 TRANS(BSL_v, do_bitsel, a->q, a->rd, a->rd, a->rn, a->rm) 6022 TRANS(BIT_v, do_bitsel, a->q, a->rd, a->rm, a->rn, a->rd) 6023 TRANS(BIF_v, do_bitsel, a->q, a->rd, a->rm, a->rd, a->rn) 6024 6025 TRANS(SQADD_v, do_gvec_fn3, a, gen_gvec_sqadd_qc) 6026 TRANS(UQADD_v, do_gvec_fn3, a, gen_gvec_uqadd_qc) 6027 TRANS(SQSUB_v, do_gvec_fn3, a, gen_gvec_sqsub_qc) 6028 TRANS(UQSUB_v, do_gvec_fn3, a, gen_gvec_uqsub_qc) 6029 TRANS(SUQADD_v, do_gvec_fn3, a, gen_gvec_suqadd_qc) 6030 TRANS(USQADD_v, do_gvec_fn3, a, gen_gvec_usqadd_qc) 6031 6032 TRANS(SSHL_v, do_gvec_fn3, a, gen_gvec_sshl) 6033 TRANS(USHL_v, do_gvec_fn3, a, gen_gvec_ushl) 6034 TRANS(SRSHL_v, do_gvec_fn3, a, gen_gvec_srshl) 6035 TRANS(URSHL_v, do_gvec_fn3, a, gen_gvec_urshl) 6036 TRANS(SQSHL_v, do_gvec_fn3, a, gen_neon_sqshl) 6037 TRANS(UQSHL_v, do_gvec_fn3, a, gen_neon_uqshl) 6038 TRANS(SQRSHL_v, do_gvec_fn3, a, gen_neon_sqrshl) 6039 TRANS(UQRSHL_v, do_gvec_fn3, a, gen_neon_uqrshl) 6040 6041 TRANS(ADD_v, do_gvec_fn3, a, tcg_gen_gvec_add) 6042 TRANS(SUB_v, do_gvec_fn3, a, tcg_gen_gvec_sub) 6043 TRANS(SHADD_v, do_gvec_fn3_no64, a, gen_gvec_shadd) 6044 TRANS(UHADD_v, do_gvec_fn3_no64, a, gen_gvec_uhadd) 6045 TRANS(SHSUB_v, do_gvec_fn3_no64, a, gen_gvec_shsub) 6046 TRANS(UHSUB_v, do_gvec_fn3_no64, a, gen_gvec_uhsub) 6047 TRANS(SRHADD_v, do_gvec_fn3_no64, a, gen_gvec_srhadd) 6048 TRANS(URHADD_v, do_gvec_fn3_no64, a, gen_gvec_urhadd) 6049 TRANS(SMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smax) 6050 TRANS(UMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umax) 6051 TRANS(SMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smin) 6052 TRANS(UMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umin) 6053 TRANS(SABA_v, do_gvec_fn3_no64, a, gen_gvec_saba) 6054 TRANS(UABA_v, do_gvec_fn3_no64, a, gen_gvec_uaba) 6055 TRANS(SABD_v, do_gvec_fn3_no64, a, gen_gvec_sabd) 6056 TRANS(UABD_v, do_gvec_fn3_no64, a, gen_gvec_uabd) 6057 TRANS(MUL_v, do_gvec_fn3_no64, a, tcg_gen_gvec_mul) 6058 TRANS(PMUL_v, do_gvec_op3_ool, a, 0, gen_helper_gvec_pmul_b) 6059 TRANS(MLA_v, do_gvec_fn3_no64, a, gen_gvec_mla) 6060 TRANS(MLS_v, do_gvec_fn3_no64, a, gen_gvec_mls) 6061 6062 static bool do_cmop_v(DisasContext *s, arg_qrrr_e *a, TCGCond cond) 6063 { 6064 if (a->esz == MO_64 && !a->q) { 6065 return false; 6066 } 6067 if (fp_access_check(s)) { 6068 tcg_gen_gvec_cmp(cond, a->esz, 6069 vec_full_reg_offset(s, a->rd), 6070 vec_full_reg_offset(s, a->rn), 6071 vec_full_reg_offset(s, a->rm), 6072 a->q ? 16 : 8, vec_full_reg_size(s)); 6073 } 6074 return true; 6075 } 6076 6077 TRANS(CMGT_v, do_cmop_v, a, TCG_COND_GT) 6078 TRANS(CMHI_v, do_cmop_v, a, TCG_COND_GTU) 6079 TRANS(CMGE_v, do_cmop_v, a, TCG_COND_GE) 6080 TRANS(CMHS_v, do_cmop_v, a, TCG_COND_GEU) 6081 TRANS(CMEQ_v, do_cmop_v, a, TCG_COND_EQ) 6082 TRANS(CMTST_v, do_gvec_fn3, a, gen_gvec_cmtst) 6083 6084 TRANS(SQDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqdmulh_qc) 6085 TRANS(SQRDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmulh_qc) 6086 TRANS_FEAT(SQRDMLAH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlah_qc) 6087 TRANS_FEAT(SQRDMLSH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlsh_qc) 6088 6089 static bool do_dot_vector(DisasContext *s, arg_qrrr_e *a, 6090 gen_helper_gvec_4 *fn) 6091 { 6092 if (fp_access_check(s)) { 6093 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn); 6094 } 6095 return true; 6096 } 6097 6098 static bool do_dot_vector_env(DisasContext *s, arg_qrrr_e *a, 6099 gen_helper_gvec_4_ptr *fn) 6100 { 6101 if (fp_access_check(s)) { 6102 gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn); 6103 } 6104 return true; 6105 } 6106 6107 TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_sdot_b) 6108 TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_b) 6109 TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_b) 6110 TRANS_FEAT(BFDOT_v, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfdot) 6111 TRANS_FEAT(BFMMLA, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfmmla) 6112 TRANS_FEAT(SMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_smmla_b) 6113 TRANS_FEAT(UMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_ummla_b) 6114 TRANS_FEAT(USMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usmmla_b) 6115 6116 static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a) 6117 { 6118 if (!dc_isar_feature(aa64_bf16, s)) { 6119 return false; 6120 } 6121 if (fp_access_check(s)) { 6122 /* Q bit selects BFMLALB vs BFMLALT. */ 6123 gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, 6124 s->fpcr_ah ? FPST_AH : FPST_A64, a->q, 6125 gen_helper_gvec_bfmlal); 6126 } 6127 return true; 6128 } 6129 6130 static gen_helper_gvec_3_ptr * const f_vector_fcadd[3] = { 6131 gen_helper_gvec_fcaddh, 6132 gen_helper_gvec_fcadds, 6133 gen_helper_gvec_fcaddd, 6134 }; 6135 /* 6136 * Encode FPCR.AH into the data so the helper knows whether the 6137 * negations it does should avoid flipping the sign bit on a NaN 6138 */ 6139 TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0 | (s->fpcr_ah << 1), 6140 f_vector_fcadd) 6141 TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1 | (s->fpcr_ah << 1), 6142 f_vector_fcadd) 6143 6144 static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a) 6145 { 6146 static gen_helper_gvec_4_ptr * const fn[] = { 6147 [MO_16] = gen_helper_gvec_fcmlah, 6148 [MO_32] = gen_helper_gvec_fcmlas, 6149 [MO_64] = gen_helper_gvec_fcmlad, 6150 }; 6151 int check; 6152 6153 if (!dc_isar_feature(aa64_fcma, s)) { 6154 return false; 6155 } 6156 6157 check = fp_access_check_vector_hsd(s, a->q, a->esz); 6158 if (check <= 0) { 6159 return check == 0; 6160 } 6161 6162 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, 6163 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, 6164 a->rot | (s->fpcr_ah << 2), fn[a->esz]); 6165 return true; 6166 } 6167 6168 /* 6169 * Widening vector x vector/indexed. 6170 * 6171 * These read from the top or bottom half of a 128-bit vector. 6172 * After widening, optionally accumulate with a 128-bit vector. 6173 * Implement these inline, as the number of elements are limited 6174 * and the related SVE and SME operations on larger vectors use 6175 * even/odd elements instead of top/bottom half. 6176 * 6177 * If idx >= 0, operand 2 is indexed, otherwise vector. 6178 * If acc, operand 0 is loaded with rd. 6179 */ 6180 6181 /* For low half, iterating up. */ 6182 static bool do_3op_widening(DisasContext *s, MemOp memop, int top, 6183 int rd, int rn, int rm, int idx, 6184 NeonGenTwo64OpFn *fn, bool acc) 6185 { 6186 TCGv_i64 tcg_op0 = tcg_temp_new_i64(); 6187 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 6188 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 6189 MemOp esz = memop & MO_SIZE; 6190 int half = 8 >> esz; 6191 int top_swap, top_half; 6192 6193 /* There are no 64x64->128 bit operations. */ 6194 if (esz >= MO_64) { 6195 return false; 6196 } 6197 if (!fp_access_check(s)) { 6198 return true; 6199 } 6200 6201 if (idx >= 0) { 6202 read_vec_element(s, tcg_op2, rm, idx, memop); 6203 } 6204 6205 /* 6206 * For top half inputs, iterate forward; backward for bottom half. 6207 * This means the store to the destination will not occur until 6208 * overlapping input inputs are consumed. 6209 * Use top_swap to conditionally invert the forward iteration index. 6210 */ 6211 top_swap = top ? 0 : half - 1; 6212 top_half = top ? half : 0; 6213 6214 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) { 6215 int elt = elt_fwd ^ top_swap; 6216 6217 read_vec_element(s, tcg_op1, rn, elt + top_half, memop); 6218 if (idx < 0) { 6219 read_vec_element(s, tcg_op2, rm, elt + top_half, memop); 6220 } 6221 if (acc) { 6222 read_vec_element(s, tcg_op0, rd, elt, memop + 1); 6223 } 6224 fn(tcg_op0, tcg_op1, tcg_op2); 6225 write_vec_element(s, tcg_op0, rd, elt, esz + 1); 6226 } 6227 clear_vec_high(s, 1, rd); 6228 return true; 6229 } 6230 6231 static void gen_muladd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6232 { 6233 TCGv_i64 t = tcg_temp_new_i64(); 6234 tcg_gen_mul_i64(t, n, m); 6235 tcg_gen_add_i64(d, d, t); 6236 } 6237 6238 static void gen_mulsub_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6239 { 6240 TCGv_i64 t = tcg_temp_new_i64(); 6241 tcg_gen_mul_i64(t, n, m); 6242 tcg_gen_sub_i64(d, d, t); 6243 } 6244 6245 TRANS(SMULL_v, do_3op_widening, 6246 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6247 tcg_gen_mul_i64, false) 6248 TRANS(UMULL_v, do_3op_widening, 6249 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6250 tcg_gen_mul_i64, false) 6251 TRANS(SMLAL_v, do_3op_widening, 6252 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6253 gen_muladd_i64, true) 6254 TRANS(UMLAL_v, do_3op_widening, 6255 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6256 gen_muladd_i64, true) 6257 TRANS(SMLSL_v, do_3op_widening, 6258 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6259 gen_mulsub_i64, true) 6260 TRANS(UMLSL_v, do_3op_widening, 6261 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6262 gen_mulsub_i64, true) 6263 6264 TRANS(SMULL_vi, do_3op_widening, 6265 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6266 tcg_gen_mul_i64, false) 6267 TRANS(UMULL_vi, do_3op_widening, 6268 a->esz, a->q, a->rd, a->rn, a->rm, a->idx, 6269 tcg_gen_mul_i64, false) 6270 TRANS(SMLAL_vi, do_3op_widening, 6271 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6272 gen_muladd_i64, true) 6273 TRANS(UMLAL_vi, do_3op_widening, 6274 a->esz, a->q, a->rd, a->rn, a->rm, a->idx, 6275 gen_muladd_i64, true) 6276 TRANS(SMLSL_vi, do_3op_widening, 6277 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6278 gen_mulsub_i64, true) 6279 TRANS(UMLSL_vi, do_3op_widening, 6280 a->esz, a->q, a->rd, a->rn, a->rm, a->idx, 6281 gen_mulsub_i64, true) 6282 6283 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6284 { 6285 TCGv_i64 t1 = tcg_temp_new_i64(); 6286 TCGv_i64 t2 = tcg_temp_new_i64(); 6287 6288 tcg_gen_sub_i64(t1, n, m); 6289 tcg_gen_sub_i64(t2, m, n); 6290 tcg_gen_movcond_i64(TCG_COND_GE, d, n, m, t1, t2); 6291 } 6292 6293 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6294 { 6295 TCGv_i64 t1 = tcg_temp_new_i64(); 6296 TCGv_i64 t2 = tcg_temp_new_i64(); 6297 6298 tcg_gen_sub_i64(t1, n, m); 6299 tcg_gen_sub_i64(t2, m, n); 6300 tcg_gen_movcond_i64(TCG_COND_GEU, d, n, m, t1, t2); 6301 } 6302 6303 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6304 { 6305 TCGv_i64 t = tcg_temp_new_i64(); 6306 gen_sabd_i64(t, n, m); 6307 tcg_gen_add_i64(d, d, t); 6308 } 6309 6310 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6311 { 6312 TCGv_i64 t = tcg_temp_new_i64(); 6313 gen_uabd_i64(t, n, m); 6314 tcg_gen_add_i64(d, d, t); 6315 } 6316 6317 TRANS(SADDL_v, do_3op_widening, 6318 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6319 tcg_gen_add_i64, false) 6320 TRANS(UADDL_v, do_3op_widening, 6321 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6322 tcg_gen_add_i64, false) 6323 TRANS(SSUBL_v, do_3op_widening, 6324 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6325 tcg_gen_sub_i64, false) 6326 TRANS(USUBL_v, do_3op_widening, 6327 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6328 tcg_gen_sub_i64, false) 6329 TRANS(SABDL_v, do_3op_widening, 6330 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6331 gen_sabd_i64, false) 6332 TRANS(UABDL_v, do_3op_widening, 6333 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6334 gen_uabd_i64, false) 6335 TRANS(SABAL_v, do_3op_widening, 6336 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6337 gen_saba_i64, true) 6338 TRANS(UABAL_v, do_3op_widening, 6339 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6340 gen_uaba_i64, true) 6341 6342 static void gen_sqdmull_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6343 { 6344 tcg_gen_mul_i64(d, n, m); 6345 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, d); 6346 } 6347 6348 static void gen_sqdmull_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6349 { 6350 tcg_gen_mul_i64(d, n, m); 6351 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, d); 6352 } 6353 6354 static void gen_sqdmlal_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6355 { 6356 TCGv_i64 t = tcg_temp_new_i64(); 6357 6358 tcg_gen_mul_i64(t, n, m); 6359 gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t); 6360 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t); 6361 } 6362 6363 static void gen_sqdmlal_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6364 { 6365 TCGv_i64 t = tcg_temp_new_i64(); 6366 6367 tcg_gen_mul_i64(t, n, m); 6368 gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t); 6369 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t); 6370 } 6371 6372 static void gen_sqdmlsl_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6373 { 6374 TCGv_i64 t = tcg_temp_new_i64(); 6375 6376 tcg_gen_mul_i64(t, n, m); 6377 gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t); 6378 tcg_gen_neg_i64(t, t); 6379 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t); 6380 } 6381 6382 static void gen_sqdmlsl_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6383 { 6384 TCGv_i64 t = tcg_temp_new_i64(); 6385 6386 tcg_gen_mul_i64(t, n, m); 6387 gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t); 6388 tcg_gen_neg_i64(t, t); 6389 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t); 6390 } 6391 6392 TRANS(SQDMULL_v, do_3op_widening, 6393 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6394 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false) 6395 TRANS(SQDMLAL_v, do_3op_widening, 6396 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6397 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true) 6398 TRANS(SQDMLSL_v, do_3op_widening, 6399 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6400 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true) 6401 6402 TRANS(SQDMULL_vi, do_3op_widening, 6403 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6404 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false) 6405 TRANS(SQDMLAL_vi, do_3op_widening, 6406 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6407 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true) 6408 TRANS(SQDMLSL_vi, do_3op_widening, 6409 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6410 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true) 6411 6412 static bool do_addsub_wide(DisasContext *s, arg_qrrr_e *a, 6413 MemOp sign, bool sub) 6414 { 6415 TCGv_i64 tcg_op0, tcg_op1; 6416 MemOp esz = a->esz; 6417 int half = 8 >> esz; 6418 bool top = a->q; 6419 int top_swap = top ? 0 : half - 1; 6420 int top_half = top ? half : 0; 6421 6422 /* There are no 64x64->128 bit operations. */ 6423 if (esz >= MO_64) { 6424 return false; 6425 } 6426 if (!fp_access_check(s)) { 6427 return true; 6428 } 6429 tcg_op0 = tcg_temp_new_i64(); 6430 tcg_op1 = tcg_temp_new_i64(); 6431 6432 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) { 6433 int elt = elt_fwd ^ top_swap; 6434 6435 read_vec_element(s, tcg_op1, a->rm, elt + top_half, esz | sign); 6436 read_vec_element(s, tcg_op0, a->rn, elt, esz + 1); 6437 if (sub) { 6438 tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1); 6439 } else { 6440 tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1); 6441 } 6442 write_vec_element(s, tcg_op0, a->rd, elt, esz + 1); 6443 } 6444 clear_vec_high(s, 1, a->rd); 6445 return true; 6446 } 6447 6448 TRANS(SADDW, do_addsub_wide, a, MO_SIGN, false) 6449 TRANS(UADDW, do_addsub_wide, a, 0, false) 6450 TRANS(SSUBW, do_addsub_wide, a, MO_SIGN, true) 6451 TRANS(USUBW, do_addsub_wide, a, 0, true) 6452 6453 static bool do_addsub_highnarrow(DisasContext *s, arg_qrrr_e *a, 6454 bool sub, bool round) 6455 { 6456 TCGv_i64 tcg_op0, tcg_op1; 6457 MemOp esz = a->esz; 6458 int half = 8 >> esz; 6459 bool top = a->q; 6460 int ebits = 8 << esz; 6461 uint64_t rbit = 1ull << (ebits - 1); 6462 int top_swap, top_half; 6463 6464 /* There are no 128x128->64 bit operations. */ 6465 if (esz >= MO_64) { 6466 return false; 6467 } 6468 if (!fp_access_check(s)) { 6469 return true; 6470 } 6471 tcg_op0 = tcg_temp_new_i64(); 6472 tcg_op1 = tcg_temp_new_i64(); 6473 6474 /* 6475 * For top half inputs, iterate backward; forward for bottom half. 6476 * This means the store to the destination will not occur until 6477 * overlapping input inputs are consumed. 6478 */ 6479 top_swap = top ? half - 1 : 0; 6480 top_half = top ? half : 0; 6481 6482 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) { 6483 int elt = elt_fwd ^ top_swap; 6484 6485 read_vec_element(s, tcg_op1, a->rm, elt, esz + 1); 6486 read_vec_element(s, tcg_op0, a->rn, elt, esz + 1); 6487 if (sub) { 6488 tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1); 6489 } else { 6490 tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1); 6491 } 6492 if (round) { 6493 tcg_gen_addi_i64(tcg_op0, tcg_op0, rbit); 6494 } 6495 tcg_gen_shri_i64(tcg_op0, tcg_op0, ebits); 6496 write_vec_element(s, tcg_op0, a->rd, elt + top_half, esz); 6497 } 6498 clear_vec_high(s, top, a->rd); 6499 return true; 6500 } 6501 6502 TRANS(ADDHN, do_addsub_highnarrow, a, false, false) 6503 TRANS(SUBHN, do_addsub_highnarrow, a, true, false) 6504 TRANS(RADDHN, do_addsub_highnarrow, a, false, true) 6505 TRANS(RSUBHN, do_addsub_highnarrow, a, true, true) 6506 6507 static bool do_pmull(DisasContext *s, arg_qrrr_e *a, gen_helper_gvec_3 *fn) 6508 { 6509 if (fp_access_check(s)) { 6510 /* The Q field specifies lo/hi half input for these insns. */ 6511 gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->q, fn); 6512 } 6513 return true; 6514 } 6515 6516 TRANS(PMULL_p8, do_pmull, a, gen_helper_neon_pmull_h) 6517 TRANS_FEAT(PMULL_p64, aa64_pmull, do_pmull, a, gen_helper_gvec_pmull_q) 6518 6519 /* 6520 * Advanced SIMD scalar/vector x indexed element 6521 */ 6522 6523 static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f) 6524 { 6525 switch (a->esz) { 6526 case MO_64: 6527 if (fp_access_check(s)) { 6528 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 6529 TCGv_i64 t1 = tcg_temp_new_i64(); 6530 6531 read_vec_element(s, t1, a->rm, a->idx, MO_64); 6532 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); 6533 write_fp_dreg_merging(s, a->rd, a->rn, t0); 6534 } 6535 break; 6536 case MO_32: 6537 if (fp_access_check(s)) { 6538 TCGv_i32 t0 = read_fp_sreg(s, a->rn); 6539 TCGv_i32 t1 = tcg_temp_new_i32(); 6540 6541 read_vec_element_i32(s, t1, a->rm, a->idx, MO_32); 6542 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); 6543 write_fp_sreg_merging(s, a->rd, a->rn, t0); 6544 } 6545 break; 6546 case MO_16: 6547 if (!dc_isar_feature(aa64_fp16, s)) { 6548 return false; 6549 } 6550 if (fp_access_check(s)) { 6551 TCGv_i32 t0 = read_fp_hreg(s, a->rn); 6552 TCGv_i32 t1 = tcg_temp_new_i32(); 6553 6554 read_vec_element_i32(s, t1, a->rm, a->idx, MO_16); 6555 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); 6556 write_fp_hreg_merging(s, a->rd, a->rn, t0); 6557 } 6558 break; 6559 default: 6560 g_assert_not_reached(); 6561 } 6562 return true; 6563 } 6564 6565 TRANS(FMUL_si, do_fp3_scalar_idx, a, &f_scalar_fmul) 6566 TRANS(FMULX_si, do_fp3_scalar_idx, a, &f_scalar_fmulx) 6567 6568 static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) 6569 { 6570 switch (a->esz) { 6571 case MO_64: 6572 if (fp_access_check(s)) { 6573 TCGv_i64 t0 = read_fp_dreg(s, a->rd); 6574 TCGv_i64 t1 = read_fp_dreg(s, a->rn); 6575 TCGv_i64 t2 = tcg_temp_new_i64(); 6576 6577 read_vec_element(s, t2, a->rm, a->idx, MO_64); 6578 if (neg) { 6579 gen_vfp_maybe_ah_negd(s, t1, t1); 6580 } 6581 gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); 6582 write_fp_dreg_merging(s, a->rd, a->rd, t0); 6583 } 6584 break; 6585 case MO_32: 6586 if (fp_access_check(s)) { 6587 TCGv_i32 t0 = read_fp_sreg(s, a->rd); 6588 TCGv_i32 t1 = read_fp_sreg(s, a->rn); 6589 TCGv_i32 t2 = tcg_temp_new_i32(); 6590 6591 read_vec_element_i32(s, t2, a->rm, a->idx, MO_32); 6592 if (neg) { 6593 gen_vfp_maybe_ah_negs(s, t1, t1); 6594 } 6595 gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); 6596 write_fp_sreg_merging(s, a->rd, a->rd, t0); 6597 } 6598 break; 6599 case MO_16: 6600 if (!dc_isar_feature(aa64_fp16, s)) { 6601 return false; 6602 } 6603 if (fp_access_check(s)) { 6604 TCGv_i32 t0 = read_fp_hreg(s, a->rd); 6605 TCGv_i32 t1 = read_fp_hreg(s, a->rn); 6606 TCGv_i32 t2 = tcg_temp_new_i32(); 6607 6608 read_vec_element_i32(s, t2, a->rm, a->idx, MO_16); 6609 if (neg) { 6610 gen_vfp_maybe_ah_negh(s, t1, t1); 6611 } 6612 gen_helper_advsimd_muladdh(t0, t1, t2, t0, 6613 fpstatus_ptr(FPST_A64_F16)); 6614 write_fp_hreg_merging(s, a->rd, a->rd, t0); 6615 } 6616 break; 6617 default: 6618 g_assert_not_reached(); 6619 } 6620 return true; 6621 } 6622 6623 TRANS(FMLA_si, do_fmla_scalar_idx, a, false) 6624 TRANS(FMLS_si, do_fmla_scalar_idx, a, true) 6625 6626 static bool do_env_scalar2_idx_hs(DisasContext *s, arg_rrx_e *a, 6627 const ENVScalar2 *f) 6628 { 6629 if (a->esz < MO_16 || a->esz > MO_32) { 6630 return false; 6631 } 6632 if (fp_access_check(s)) { 6633 TCGv_i32 t0 = tcg_temp_new_i32(); 6634 TCGv_i32 t1 = tcg_temp_new_i32(); 6635 6636 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 6637 read_vec_element_i32(s, t1, a->rm, a->idx, a->esz); 6638 f->gen_bhs[a->esz](t0, tcg_env, t0, t1); 6639 write_fp_sreg(s, a->rd, t0); 6640 } 6641 return true; 6642 } 6643 6644 TRANS(SQDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqdmulh) 6645 TRANS(SQRDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqrdmulh) 6646 6647 static bool do_env_scalar3_idx_hs(DisasContext *s, arg_rrx_e *a, 6648 const ENVScalar3 *f) 6649 { 6650 if (a->esz < MO_16 || a->esz > MO_32) { 6651 return false; 6652 } 6653 if (fp_access_check(s)) { 6654 TCGv_i32 t0 = tcg_temp_new_i32(); 6655 TCGv_i32 t1 = tcg_temp_new_i32(); 6656 TCGv_i32 t2 = tcg_temp_new_i32(); 6657 6658 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 6659 read_vec_element_i32(s, t1, a->rm, a->idx, a->esz); 6660 read_vec_element_i32(s, t2, a->rd, 0, a->esz); 6661 f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2); 6662 write_fp_sreg(s, a->rd, t0); 6663 } 6664 return true; 6665 } 6666 6667 TRANS_FEAT(SQRDMLAH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlah) 6668 TRANS_FEAT(SQRDMLSH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlsh) 6669 6670 static bool do_scalar_muladd_widening_idx(DisasContext *s, arg_rrx_e *a, 6671 NeonGenTwo64OpFn *fn, bool acc) 6672 { 6673 if (fp_access_check(s)) { 6674 TCGv_i64 t0 = tcg_temp_new_i64(); 6675 TCGv_i64 t1 = tcg_temp_new_i64(); 6676 TCGv_i64 t2 = tcg_temp_new_i64(); 6677 6678 if (acc) { 6679 read_vec_element(s, t0, a->rd, 0, a->esz + 1); 6680 } 6681 read_vec_element(s, t1, a->rn, 0, a->esz | MO_SIGN); 6682 read_vec_element(s, t2, a->rm, a->idx, a->esz | MO_SIGN); 6683 fn(t0, t1, t2); 6684 6685 /* Clear the whole register first, then store scalar. */ 6686 clear_vec(s, a->rd); 6687 write_vec_element(s, t0, a->rd, 0, a->esz + 1); 6688 } 6689 return true; 6690 } 6691 6692 TRANS(SQDMULL_si, do_scalar_muladd_widening_idx, a, 6693 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false) 6694 TRANS(SQDMLAL_si, do_scalar_muladd_widening_idx, a, 6695 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true) 6696 TRANS(SQDMLSL_si, do_scalar_muladd_widening_idx, a, 6697 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true) 6698 6699 static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a, 6700 gen_helper_gvec_3_ptr * const fns[3]) 6701 { 6702 MemOp esz = a->esz; 6703 int check = fp_access_check_vector_hsd(s, a->q, esz); 6704 6705 if (check <= 0) { 6706 return check == 0; 6707 } 6708 6709 gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, 6710 esz == MO_16 ? FPST_A64_F16 : FPST_A64, 6711 a->idx, fns[esz - 1]); 6712 return true; 6713 } 6714 6715 static gen_helper_gvec_3_ptr * const f_vector_idx_fmul[3] = { 6716 gen_helper_gvec_fmul_idx_h, 6717 gen_helper_gvec_fmul_idx_s, 6718 gen_helper_gvec_fmul_idx_d, 6719 }; 6720 TRANS(FMUL_vi, do_fp3_vector_idx, a, f_vector_idx_fmul) 6721 6722 static gen_helper_gvec_3_ptr * const f_vector_idx_fmulx[3] = { 6723 gen_helper_gvec_fmulx_idx_h, 6724 gen_helper_gvec_fmulx_idx_s, 6725 gen_helper_gvec_fmulx_idx_d, 6726 }; 6727 TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx) 6728 6729 static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg) 6730 { 6731 static gen_helper_gvec_4_ptr * const fns[3][3] = { 6732 { gen_helper_gvec_fmla_idx_h, 6733 gen_helper_gvec_fmla_idx_s, 6734 gen_helper_gvec_fmla_idx_d }, 6735 { gen_helper_gvec_fmls_idx_h, 6736 gen_helper_gvec_fmls_idx_s, 6737 gen_helper_gvec_fmls_idx_d }, 6738 { gen_helper_gvec_ah_fmls_idx_h, 6739 gen_helper_gvec_ah_fmls_idx_s, 6740 gen_helper_gvec_ah_fmls_idx_d }, 6741 }; 6742 MemOp esz = a->esz; 6743 int check = fp_access_check_vector_hsd(s, a->q, esz); 6744 6745 if (check <= 0) { 6746 return check == 0; 6747 } 6748 6749 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, 6750 esz == MO_16 ? FPST_A64_F16 : FPST_A64, 6751 a->idx, fns[neg ? 1 + s->fpcr_ah : 0][esz - 1]); 6752 return true; 6753 } 6754 6755 TRANS(FMLA_vi, do_fmla_vector_idx, a, false) 6756 TRANS(FMLS_vi, do_fmla_vector_idx, a, true) 6757 6758 static bool do_fmlal_idx(DisasContext *s, arg_qrrx_e *a, bool is_s, bool is_2) 6759 { 6760 if (fp_access_check(s)) { 6761 int data = (a->idx << 2) | (is_2 << 1) | is_s; 6762 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), 6763 vec_full_reg_offset(s, a->rn), 6764 vec_full_reg_offset(s, a->rm), tcg_env, 6765 a->q ? 16 : 8, vec_full_reg_size(s), 6766 data, gen_helper_gvec_fmlal_idx_a64); 6767 } 6768 return true; 6769 } 6770 6771 TRANS_FEAT(FMLAL_vi, aa64_fhm, do_fmlal_idx, a, false, false) 6772 TRANS_FEAT(FMLSL_vi, aa64_fhm, do_fmlal_idx, a, true, false) 6773 TRANS_FEAT(FMLAL2_vi, aa64_fhm, do_fmlal_idx, a, false, true) 6774 TRANS_FEAT(FMLSL2_vi, aa64_fhm, do_fmlal_idx, a, true, true) 6775 6776 static bool do_int3_vector_idx(DisasContext *s, arg_qrrx_e *a, 6777 gen_helper_gvec_3 * const fns[2]) 6778 { 6779 assert(a->esz == MO_16 || a->esz == MO_32); 6780 if (fp_access_check(s)) { 6781 gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, a->idx, fns[a->esz - 1]); 6782 } 6783 return true; 6784 } 6785 6786 static gen_helper_gvec_3 * const f_vector_idx_mul[2] = { 6787 gen_helper_gvec_mul_idx_h, 6788 gen_helper_gvec_mul_idx_s, 6789 }; 6790 TRANS(MUL_vi, do_int3_vector_idx, a, f_vector_idx_mul) 6791 6792 static bool do_mla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool sub) 6793 { 6794 static gen_helper_gvec_4 * const fns[2][2] = { 6795 { gen_helper_gvec_mla_idx_h, gen_helper_gvec_mls_idx_h }, 6796 { gen_helper_gvec_mla_idx_s, gen_helper_gvec_mls_idx_s }, 6797 }; 6798 6799 assert(a->esz == MO_16 || a->esz == MO_32); 6800 if (fp_access_check(s)) { 6801 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, 6802 a->idx, fns[a->esz - 1][sub]); 6803 } 6804 return true; 6805 } 6806 6807 TRANS(MLA_vi, do_mla_vector_idx, a, false) 6808 TRANS(MLS_vi, do_mla_vector_idx, a, true) 6809 6810 static bool do_int3_qc_vector_idx(DisasContext *s, arg_qrrx_e *a, 6811 gen_helper_gvec_4 * const fns[2]) 6812 { 6813 assert(a->esz == MO_16 || a->esz == MO_32); 6814 if (fp_access_check(s)) { 6815 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd), 6816 vec_full_reg_offset(s, a->rn), 6817 vec_full_reg_offset(s, a->rm), 6818 offsetof(CPUARMState, vfp.qc), 6819 a->q ? 16 : 8, vec_full_reg_size(s), 6820 a->idx, fns[a->esz - 1]); 6821 } 6822 return true; 6823 } 6824 6825 static gen_helper_gvec_4 * const f_vector_idx_sqdmulh[2] = { 6826 gen_helper_neon_sqdmulh_idx_h, 6827 gen_helper_neon_sqdmulh_idx_s, 6828 }; 6829 TRANS(SQDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqdmulh) 6830 6831 static gen_helper_gvec_4 * const f_vector_idx_sqrdmulh[2] = { 6832 gen_helper_neon_sqrdmulh_idx_h, 6833 gen_helper_neon_sqrdmulh_idx_s, 6834 }; 6835 TRANS(SQRDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqrdmulh) 6836 6837 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlah[2] = { 6838 gen_helper_neon_sqrdmlah_idx_h, 6839 gen_helper_neon_sqrdmlah_idx_s, 6840 }; 6841 TRANS_FEAT(SQRDMLAH_vi, aa64_rdm, do_int3_qc_vector_idx, a, 6842 f_vector_idx_sqrdmlah) 6843 6844 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlsh[2] = { 6845 gen_helper_neon_sqrdmlsh_idx_h, 6846 gen_helper_neon_sqrdmlsh_idx_s, 6847 }; 6848 TRANS_FEAT(SQRDMLSH_vi, aa64_rdm, do_int3_qc_vector_idx, a, 6849 f_vector_idx_sqrdmlsh) 6850 6851 static bool do_dot_vector_idx(DisasContext *s, arg_qrrx_e *a, 6852 gen_helper_gvec_4 *fn) 6853 { 6854 if (fp_access_check(s)) { 6855 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn); 6856 } 6857 return true; 6858 } 6859 6860 static bool do_dot_vector_idx_env(DisasContext *s, arg_qrrx_e *a, 6861 gen_helper_gvec_4_ptr *fn) 6862 { 6863 if (fp_access_check(s)) { 6864 gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn); 6865 } 6866 return true; 6867 } 6868 6869 TRANS_FEAT(SDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_sdot_idx_b) 6870 TRANS_FEAT(UDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_udot_idx_b) 6871 TRANS_FEAT(SUDOT_vi, aa64_i8mm, do_dot_vector_idx, a, 6872 gen_helper_gvec_sudot_idx_b) 6873 TRANS_FEAT(USDOT_vi, aa64_i8mm, do_dot_vector_idx, a, 6874 gen_helper_gvec_usdot_idx_b) 6875 TRANS_FEAT(BFDOT_vi, aa64_bf16, do_dot_vector_idx_env, a, 6876 gen_helper_gvec_bfdot_idx) 6877 6878 static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a) 6879 { 6880 if (!dc_isar_feature(aa64_bf16, s)) { 6881 return false; 6882 } 6883 if (fp_access_check(s)) { 6884 /* Q bit selects BFMLALB vs BFMLALT. */ 6885 gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, 6886 s->fpcr_ah ? FPST_AH : FPST_A64, 6887 (a->idx << 1) | a->q, 6888 gen_helper_gvec_bfmlal_idx); 6889 } 6890 return true; 6891 } 6892 6893 static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a) 6894 { 6895 gen_helper_gvec_4_ptr *fn; 6896 6897 if (!dc_isar_feature(aa64_fcma, s)) { 6898 return false; 6899 } 6900 switch (a->esz) { 6901 case MO_16: 6902 if (!dc_isar_feature(aa64_fp16, s)) { 6903 return false; 6904 } 6905 fn = gen_helper_gvec_fcmlah_idx; 6906 break; 6907 case MO_32: 6908 fn = gen_helper_gvec_fcmlas_idx; 6909 break; 6910 default: 6911 g_assert_not_reached(); 6912 } 6913 if (fp_access_check(s)) { 6914 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, 6915 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, 6916 (s->fpcr_ah << 4) | (a->idx << 2) | a->rot, fn); 6917 } 6918 return true; 6919 } 6920 6921 /* 6922 * Advanced SIMD scalar pairwise 6923 */ 6924 6925 static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f) 6926 { 6927 switch (a->esz) { 6928 case MO_64: 6929 if (fp_access_check(s)) { 6930 TCGv_i64 t0 = tcg_temp_new_i64(); 6931 TCGv_i64 t1 = tcg_temp_new_i64(); 6932 6933 read_vec_element(s, t0, a->rn, 0, MO_64); 6934 read_vec_element(s, t1, a->rn, 1, MO_64); 6935 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); 6936 write_fp_dreg(s, a->rd, t0); 6937 } 6938 break; 6939 case MO_32: 6940 if (fp_access_check(s)) { 6941 TCGv_i32 t0 = tcg_temp_new_i32(); 6942 TCGv_i32 t1 = tcg_temp_new_i32(); 6943 6944 read_vec_element_i32(s, t0, a->rn, 0, MO_32); 6945 read_vec_element_i32(s, t1, a->rn, 1, MO_32); 6946 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); 6947 write_fp_sreg(s, a->rd, t0); 6948 } 6949 break; 6950 case MO_16: 6951 if (!dc_isar_feature(aa64_fp16, s)) { 6952 return false; 6953 } 6954 if (fp_access_check(s)) { 6955 TCGv_i32 t0 = tcg_temp_new_i32(); 6956 TCGv_i32 t1 = tcg_temp_new_i32(); 6957 6958 read_vec_element_i32(s, t0, a->rn, 0, MO_16); 6959 read_vec_element_i32(s, t1, a->rn, 1, MO_16); 6960 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); 6961 write_fp_sreg(s, a->rd, t0); 6962 } 6963 break; 6964 default: 6965 g_assert_not_reached(); 6966 } 6967 return true; 6968 } 6969 6970 static bool do_fp3_scalar_pair_2fn(DisasContext *s, arg_rr_e *a, 6971 const FPScalar *fnormal, 6972 const FPScalar *fah) 6973 { 6974 return do_fp3_scalar_pair(s, a, s->fpcr_ah ? fah : fnormal); 6975 } 6976 6977 TRANS(FADDP_s, do_fp3_scalar_pair, a, &f_scalar_fadd) 6978 TRANS(FMAXP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah) 6979 TRANS(FMINP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah) 6980 TRANS(FMAXNMP_s, do_fp3_scalar_pair, a, &f_scalar_fmaxnm) 6981 TRANS(FMINNMP_s, do_fp3_scalar_pair, a, &f_scalar_fminnm) 6982 6983 static bool trans_ADDP_s(DisasContext *s, arg_rr_e *a) 6984 { 6985 if (fp_access_check(s)) { 6986 TCGv_i64 t0 = tcg_temp_new_i64(); 6987 TCGv_i64 t1 = tcg_temp_new_i64(); 6988 6989 read_vec_element(s, t0, a->rn, 0, MO_64); 6990 read_vec_element(s, t1, a->rn, 1, MO_64); 6991 tcg_gen_add_i64(t0, t0, t1); 6992 write_fp_dreg(s, a->rd, t0); 6993 } 6994 return true; 6995 } 6996 6997 /* 6998 * Floating-point conditional select 6999 */ 7000 7001 static bool trans_FCSEL(DisasContext *s, arg_FCSEL *a) 7002 { 7003 TCGv_i64 t_true, t_false; 7004 DisasCompare64 c; 7005 int check = fp_access_check_scalar_hsd(s, a->esz); 7006 7007 if (check <= 0) { 7008 return check == 0; 7009 } 7010 7011 /* Zero extend sreg & hreg inputs to 64 bits now. */ 7012 t_true = tcg_temp_new_i64(); 7013 t_false = tcg_temp_new_i64(); 7014 read_vec_element(s, t_true, a->rn, 0, a->esz); 7015 read_vec_element(s, t_false, a->rm, 0, a->esz); 7016 7017 a64_test_cc(&c, a->cond); 7018 tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0), 7019 t_true, t_false); 7020 7021 /* 7022 * Note that sregs & hregs write back zeros to the high bits, 7023 * and we've already done the zero-extension. 7024 */ 7025 write_fp_dreg(s, a->rd, t_true); 7026 return true; 7027 } 7028 7029 /* 7030 * Advanced SIMD Extract 7031 */ 7032 7033 static bool trans_EXT_d(DisasContext *s, arg_EXT_d *a) 7034 { 7035 if (fp_access_check(s)) { 7036 TCGv_i64 lo = read_fp_dreg(s, a->rn); 7037 if (a->imm != 0) { 7038 TCGv_i64 hi = read_fp_dreg(s, a->rm); 7039 tcg_gen_extract2_i64(lo, lo, hi, a->imm * 8); 7040 } 7041 write_fp_dreg(s, a->rd, lo); 7042 } 7043 return true; 7044 } 7045 7046 static bool trans_EXT_q(DisasContext *s, arg_EXT_q *a) 7047 { 7048 TCGv_i64 lo, hi; 7049 int pos = (a->imm & 7) * 8; 7050 int elt = a->imm >> 3; 7051 7052 if (!fp_access_check(s)) { 7053 return true; 7054 } 7055 7056 lo = tcg_temp_new_i64(); 7057 hi = tcg_temp_new_i64(); 7058 7059 read_vec_element(s, lo, a->rn, elt, MO_64); 7060 elt++; 7061 read_vec_element(s, hi, elt & 2 ? a->rm : a->rn, elt & 1, MO_64); 7062 elt++; 7063 7064 if (pos != 0) { 7065 TCGv_i64 hh = tcg_temp_new_i64(); 7066 tcg_gen_extract2_i64(lo, lo, hi, pos); 7067 read_vec_element(s, hh, a->rm, elt & 1, MO_64); 7068 tcg_gen_extract2_i64(hi, hi, hh, pos); 7069 } 7070 7071 write_vec_element(s, lo, a->rd, 0, MO_64); 7072 write_vec_element(s, hi, a->rd, 1, MO_64); 7073 clear_vec_high(s, true, a->rd); 7074 return true; 7075 } 7076 7077 /* 7078 * Floating-point data-processing (3 source) 7079 */ 7080 7081 static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) 7082 { 7083 TCGv_ptr fpst; 7084 7085 /* 7086 * These are fused multiply-add. Note that doing the negations here 7087 * as separate steps is correct: an input NaN should come out with 7088 * its sign bit flipped if it is a negated-input. 7089 */ 7090 switch (a->esz) { 7091 case MO_64: 7092 if (fp_access_check(s)) { 7093 TCGv_i64 tn = read_fp_dreg(s, a->rn); 7094 TCGv_i64 tm = read_fp_dreg(s, a->rm); 7095 TCGv_i64 ta = read_fp_dreg(s, a->ra); 7096 7097 if (neg_a) { 7098 gen_vfp_maybe_ah_negd(s, ta, ta); 7099 } 7100 if (neg_n) { 7101 gen_vfp_maybe_ah_negd(s, tn, tn); 7102 } 7103 fpst = fpstatus_ptr(FPST_A64); 7104 gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst); 7105 write_fp_dreg_merging(s, a->rd, a->ra, ta); 7106 } 7107 break; 7108 7109 case MO_32: 7110 if (fp_access_check(s)) { 7111 TCGv_i32 tn = read_fp_sreg(s, a->rn); 7112 TCGv_i32 tm = read_fp_sreg(s, a->rm); 7113 TCGv_i32 ta = read_fp_sreg(s, a->ra); 7114 7115 if (neg_a) { 7116 gen_vfp_maybe_ah_negs(s, ta, ta); 7117 } 7118 if (neg_n) { 7119 gen_vfp_maybe_ah_negs(s, tn, tn); 7120 } 7121 fpst = fpstatus_ptr(FPST_A64); 7122 gen_helper_vfp_muladds(ta, tn, tm, ta, fpst); 7123 write_fp_sreg_merging(s, a->rd, a->ra, ta); 7124 } 7125 break; 7126 7127 case MO_16: 7128 if (!dc_isar_feature(aa64_fp16, s)) { 7129 return false; 7130 } 7131 if (fp_access_check(s)) { 7132 TCGv_i32 tn = read_fp_hreg(s, a->rn); 7133 TCGv_i32 tm = read_fp_hreg(s, a->rm); 7134 TCGv_i32 ta = read_fp_hreg(s, a->ra); 7135 7136 if (neg_a) { 7137 gen_vfp_maybe_ah_negh(s, ta, ta); 7138 } 7139 if (neg_n) { 7140 gen_vfp_maybe_ah_negh(s, tn, tn); 7141 } 7142 fpst = fpstatus_ptr(FPST_A64_F16); 7143 gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst); 7144 write_fp_hreg_merging(s, a->rd, a->ra, ta); 7145 } 7146 break; 7147 7148 default: 7149 return false; 7150 } 7151 return true; 7152 } 7153 7154 TRANS(FMADD, do_fmadd, a, false, false) 7155 TRANS(FNMADD, do_fmadd, a, true, true) 7156 TRANS(FMSUB, do_fmadd, a, false, true) 7157 TRANS(FNMSUB, do_fmadd, a, true, false) 7158 7159 /* 7160 * Advanced SIMD Across Lanes 7161 */ 7162 7163 static bool do_int_reduction(DisasContext *s, arg_qrr_e *a, bool widen, 7164 MemOp src_sign, NeonGenTwo64OpFn *fn) 7165 { 7166 TCGv_i64 tcg_res, tcg_elt; 7167 MemOp src_mop = a->esz | src_sign; 7168 int elements = (a->q ? 16 : 8) >> a->esz; 7169 7170 /* Reject MO_64, and MO_32 without Q: a minimum of 4 elements. */ 7171 if (elements < 4) { 7172 return false; 7173 } 7174 if (!fp_access_check(s)) { 7175 return true; 7176 } 7177 7178 tcg_res = tcg_temp_new_i64(); 7179 tcg_elt = tcg_temp_new_i64(); 7180 7181 read_vec_element(s, tcg_res, a->rn, 0, src_mop); 7182 for (int i = 1; i < elements; i++) { 7183 read_vec_element(s, tcg_elt, a->rn, i, src_mop); 7184 fn(tcg_res, tcg_res, tcg_elt); 7185 } 7186 7187 tcg_gen_ext_i64(tcg_res, tcg_res, a->esz + widen); 7188 write_fp_dreg(s, a->rd, tcg_res); 7189 return true; 7190 } 7191 7192 TRANS(ADDV, do_int_reduction, a, false, 0, tcg_gen_add_i64) 7193 TRANS(SADDLV, do_int_reduction, a, true, MO_SIGN, tcg_gen_add_i64) 7194 TRANS(UADDLV, do_int_reduction, a, true, 0, tcg_gen_add_i64) 7195 TRANS(SMAXV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smax_i64) 7196 TRANS(UMAXV, do_int_reduction, a, false, 0, tcg_gen_umax_i64) 7197 TRANS(SMINV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smin_i64) 7198 TRANS(UMINV, do_int_reduction, a, false, 0, tcg_gen_umin_i64) 7199 7200 /* 7201 * do_fp_reduction helper 7202 * 7203 * This mirrors the Reduce() pseudocode in the ARM ARM. It is 7204 * important for correct NaN propagation that we do these 7205 * operations in exactly the order specified by the pseudocode. 7206 * 7207 * This is a recursive function. 7208 */ 7209 static TCGv_i32 do_reduction_op(DisasContext *s, int rn, MemOp esz, 7210 int ebase, int ecount, TCGv_ptr fpst, 7211 NeonGenTwoSingleOpFn *fn) 7212 { 7213 if (ecount == 1) { 7214 TCGv_i32 tcg_elem = tcg_temp_new_i32(); 7215 read_vec_element_i32(s, tcg_elem, rn, ebase, esz); 7216 return tcg_elem; 7217 } else { 7218 int half = ecount >> 1; 7219 TCGv_i32 tcg_hi, tcg_lo, tcg_res; 7220 7221 tcg_hi = do_reduction_op(s, rn, esz, ebase + half, half, fpst, fn); 7222 tcg_lo = do_reduction_op(s, rn, esz, ebase, half, fpst, fn); 7223 tcg_res = tcg_temp_new_i32(); 7224 7225 fn(tcg_res, tcg_lo, tcg_hi, fpst); 7226 return tcg_res; 7227 } 7228 } 7229 7230 static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a, 7231 NeonGenTwoSingleOpFn *fnormal, 7232 NeonGenTwoSingleOpFn *fah) 7233 { 7234 if (fp_access_check(s)) { 7235 MemOp esz = a->esz; 7236 int elts = (a->q ? 16 : 8) >> esz; 7237 TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); 7238 TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst, 7239 s->fpcr_ah ? fah : fnormal); 7240 write_fp_sreg(s, a->rd, res); 7241 } 7242 return true; 7243 } 7244 7245 TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a, 7246 gen_helper_vfp_maxnumh, gen_helper_vfp_maxnumh) 7247 TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a, 7248 gen_helper_vfp_minnumh, gen_helper_vfp_minnumh) 7249 TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a, 7250 gen_helper_vfp_maxh, gen_helper_vfp_ah_maxh) 7251 TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a, 7252 gen_helper_vfp_minh, gen_helper_vfp_ah_minh) 7253 7254 TRANS(FMAXNMV_s, do_fp_reduction, a, 7255 gen_helper_vfp_maxnums, gen_helper_vfp_maxnums) 7256 TRANS(FMINNMV_s, do_fp_reduction, a, 7257 gen_helper_vfp_minnums, gen_helper_vfp_minnums) 7258 TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs, gen_helper_vfp_ah_maxs) 7259 TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins, gen_helper_vfp_ah_mins) 7260 7261 /* 7262 * Floating-point Immediate 7263 */ 7264 7265 static bool trans_FMOVI_s(DisasContext *s, arg_FMOVI_s *a) 7266 { 7267 int check = fp_access_check_scalar_hsd(s, a->esz); 7268 uint64_t imm; 7269 7270 if (check <= 0) { 7271 return check == 0; 7272 } 7273 7274 imm = vfp_expand_imm(a->esz, a->imm); 7275 write_fp_dreg(s, a->rd, tcg_constant_i64(imm)); 7276 return true; 7277 } 7278 7279 /* 7280 * Floating point compare, conditional compare 7281 */ 7282 7283 static void handle_fp_compare(DisasContext *s, int size, 7284 unsigned int rn, unsigned int rm, 7285 bool cmp_with_zero, bool signal_all_nans) 7286 { 7287 TCGv_i64 tcg_flags = tcg_temp_new_i64(); 7288 TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_A64_F16 : FPST_A64); 7289 7290 if (size == MO_64) { 7291 TCGv_i64 tcg_vn, tcg_vm; 7292 7293 tcg_vn = read_fp_dreg(s, rn); 7294 if (cmp_with_zero) { 7295 tcg_vm = tcg_constant_i64(0); 7296 } else { 7297 tcg_vm = read_fp_dreg(s, rm); 7298 } 7299 if (signal_all_nans) { 7300 gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 7301 } else { 7302 gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 7303 } 7304 } else { 7305 TCGv_i32 tcg_vn = tcg_temp_new_i32(); 7306 TCGv_i32 tcg_vm = tcg_temp_new_i32(); 7307 7308 read_vec_element_i32(s, tcg_vn, rn, 0, size); 7309 if (cmp_with_zero) { 7310 tcg_gen_movi_i32(tcg_vm, 0); 7311 } else { 7312 read_vec_element_i32(s, tcg_vm, rm, 0, size); 7313 } 7314 7315 switch (size) { 7316 case MO_32: 7317 if (signal_all_nans) { 7318 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 7319 } else { 7320 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 7321 } 7322 break; 7323 case MO_16: 7324 if (signal_all_nans) { 7325 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 7326 } else { 7327 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 7328 } 7329 break; 7330 default: 7331 g_assert_not_reached(); 7332 } 7333 } 7334 7335 gen_set_nzcv(tcg_flags); 7336 } 7337 7338 /* FCMP, FCMPE */ 7339 static bool trans_FCMP(DisasContext *s, arg_FCMP *a) 7340 { 7341 int check = fp_access_check_scalar_hsd(s, a->esz); 7342 7343 if (check <= 0) { 7344 return check == 0; 7345 } 7346 7347 handle_fp_compare(s, a->esz, a->rn, a->rm, a->z, a->e); 7348 return true; 7349 } 7350 7351 /* FCCMP, FCCMPE */ 7352 static bool trans_FCCMP(DisasContext *s, arg_FCCMP *a) 7353 { 7354 TCGLabel *label_continue = NULL; 7355 int check = fp_access_check_scalar_hsd(s, a->esz); 7356 7357 if (check <= 0) { 7358 return check == 0; 7359 } 7360 7361 if (a->cond < 0x0e) { /* not always */ 7362 TCGLabel *label_match = gen_new_label(); 7363 label_continue = gen_new_label(); 7364 arm_gen_test_cc(a->cond, label_match); 7365 /* nomatch: */ 7366 gen_set_nzcv(tcg_constant_i64(a->nzcv << 28)); 7367 tcg_gen_br(label_continue); 7368 gen_set_label(label_match); 7369 } 7370 7371 handle_fp_compare(s, a->esz, a->rn, a->rm, false, a->e); 7372 7373 if (label_continue) { 7374 gen_set_label(label_continue); 7375 } 7376 return true; 7377 } 7378 7379 /* 7380 * Advanced SIMD Modified Immediate 7381 */ 7382 7383 static bool trans_FMOVI_v_h(DisasContext *s, arg_FMOVI_v_h *a) 7384 { 7385 if (!dc_isar_feature(aa64_fp16, s)) { 7386 return false; 7387 } 7388 if (fp_access_check(s)) { 7389 tcg_gen_gvec_dup_imm(MO_16, vec_full_reg_offset(s, a->rd), 7390 a->q ? 16 : 8, vec_full_reg_size(s), 7391 vfp_expand_imm(MO_16, a->abcdefgh)); 7392 } 7393 return true; 7394 } 7395 7396 static void gen_movi(unsigned vece, uint32_t dofs, uint32_t aofs, 7397 int64_t c, uint32_t oprsz, uint32_t maxsz) 7398 { 7399 tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c); 7400 } 7401 7402 static bool trans_Vimm(DisasContext *s, arg_Vimm *a) 7403 { 7404 GVecGen2iFn *fn; 7405 7406 /* Handle decode of cmode/op here between ORR/BIC/MOVI */ 7407 if ((a->cmode & 1) && a->cmode < 12) { 7408 /* For op=1, the imm will be inverted, so BIC becomes AND. */ 7409 fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori; 7410 } else { 7411 /* There is one unallocated cmode/op combination in this space */ 7412 if (a->cmode == 15 && a->op == 1 && a->q == 0) { 7413 return false; 7414 } 7415 fn = gen_movi; 7416 } 7417 7418 if (fp_access_check(s)) { 7419 uint64_t imm = asimd_imm_const(a->abcdefgh, a->cmode, a->op); 7420 gen_gvec_fn2i(s, a->q, a->rd, a->rd, imm, fn, MO_64); 7421 } 7422 return true; 7423 } 7424 7425 /* 7426 * Advanced SIMD Shift by Immediate 7427 */ 7428 7429 static bool do_vec_shift_imm(DisasContext *s, arg_qrri_e *a, GVecGen2iFn *fn) 7430 { 7431 if (fp_access_check(s)) { 7432 gen_gvec_fn2i(s, a->q, a->rd, a->rn, a->imm, fn, a->esz); 7433 } 7434 return true; 7435 } 7436 7437 TRANS(SSHR_v, do_vec_shift_imm, a, gen_gvec_sshr) 7438 TRANS(USHR_v, do_vec_shift_imm, a, gen_gvec_ushr) 7439 TRANS(SSRA_v, do_vec_shift_imm, a, gen_gvec_ssra) 7440 TRANS(USRA_v, do_vec_shift_imm, a, gen_gvec_usra) 7441 TRANS(SRSHR_v, do_vec_shift_imm, a, gen_gvec_srshr) 7442 TRANS(URSHR_v, do_vec_shift_imm, a, gen_gvec_urshr) 7443 TRANS(SRSRA_v, do_vec_shift_imm, a, gen_gvec_srsra) 7444 TRANS(URSRA_v, do_vec_shift_imm, a, gen_gvec_ursra) 7445 TRANS(SRI_v, do_vec_shift_imm, a, gen_gvec_sri) 7446 TRANS(SHL_v, do_vec_shift_imm, a, tcg_gen_gvec_shli) 7447 TRANS(SLI_v, do_vec_shift_imm, a, gen_gvec_sli); 7448 TRANS(SQSHL_vi, do_vec_shift_imm, a, gen_neon_sqshli) 7449 TRANS(UQSHL_vi, do_vec_shift_imm, a, gen_neon_uqshli) 7450 TRANS(SQSHLU_vi, do_vec_shift_imm, a, gen_neon_sqshlui) 7451 7452 static bool do_vec_shift_imm_wide(DisasContext *s, arg_qrri_e *a, bool is_u) 7453 { 7454 TCGv_i64 tcg_rn, tcg_rd; 7455 int esz = a->esz; 7456 int esize; 7457 7458 if (!fp_access_check(s)) { 7459 return true; 7460 } 7461 7462 /* 7463 * For the LL variants the store is larger than the load, 7464 * so if rd == rn we would overwrite parts of our input. 7465 * So load everything right now and use shifts in the main loop. 7466 */ 7467 tcg_rd = tcg_temp_new_i64(); 7468 tcg_rn = tcg_temp_new_i64(); 7469 read_vec_element(s, tcg_rn, a->rn, a->q, MO_64); 7470 7471 esize = 8 << esz; 7472 for (int i = 0, elements = 8 >> esz; i < elements; i++) { 7473 if (is_u) { 7474 tcg_gen_extract_i64(tcg_rd, tcg_rn, i * esize, esize); 7475 } else { 7476 tcg_gen_sextract_i64(tcg_rd, tcg_rn, i * esize, esize); 7477 } 7478 tcg_gen_shli_i64(tcg_rd, tcg_rd, a->imm); 7479 write_vec_element(s, tcg_rd, a->rd, i, esz + 1); 7480 } 7481 clear_vec_high(s, true, a->rd); 7482 return true; 7483 } 7484 7485 TRANS(SSHLL_v, do_vec_shift_imm_wide, a, false) 7486 TRANS(USHLL_v, do_vec_shift_imm_wide, a, true) 7487 7488 static void gen_sshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7489 { 7490 assert(shift >= 0 && shift <= 64); 7491 tcg_gen_sari_i64(dst, src, MIN(shift, 63)); 7492 } 7493 7494 static void gen_ushr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7495 { 7496 assert(shift >= 0 && shift <= 64); 7497 if (shift == 64) { 7498 tcg_gen_movi_i64(dst, 0); 7499 } else { 7500 tcg_gen_shri_i64(dst, src, shift); 7501 } 7502 } 7503 7504 static void gen_ssra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7505 { 7506 gen_sshr_d(src, src, shift); 7507 tcg_gen_add_i64(dst, dst, src); 7508 } 7509 7510 static void gen_usra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7511 { 7512 gen_ushr_d(src, src, shift); 7513 tcg_gen_add_i64(dst, dst, src); 7514 } 7515 7516 static void gen_srshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7517 { 7518 assert(shift >= 0 && shift <= 32); 7519 if (shift) { 7520 TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1)); 7521 tcg_gen_add_i64(dst, src, rnd); 7522 tcg_gen_sari_i64(dst, dst, shift); 7523 } else { 7524 tcg_gen_mov_i64(dst, src); 7525 } 7526 } 7527 7528 static void gen_urshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7529 { 7530 assert(shift >= 0 && shift <= 32); 7531 if (shift) { 7532 TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1)); 7533 tcg_gen_add_i64(dst, src, rnd); 7534 tcg_gen_shri_i64(dst, dst, shift); 7535 } else { 7536 tcg_gen_mov_i64(dst, src); 7537 } 7538 } 7539 7540 static void gen_srshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7541 { 7542 assert(shift >= 0 && shift <= 64); 7543 if (shift == 0) { 7544 tcg_gen_mov_i64(dst, src); 7545 } else if (shift == 64) { 7546 /* Extension of sign bit (0,-1) plus sign bit (0,1) is zero. */ 7547 tcg_gen_movi_i64(dst, 0); 7548 } else { 7549 TCGv_i64 rnd = tcg_temp_new_i64(); 7550 tcg_gen_extract_i64(rnd, src, shift - 1, 1); 7551 tcg_gen_sari_i64(dst, src, shift); 7552 tcg_gen_add_i64(dst, dst, rnd); 7553 } 7554 } 7555 7556 static void gen_urshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7557 { 7558 assert(shift >= 0 && shift <= 64); 7559 if (shift == 0) { 7560 tcg_gen_mov_i64(dst, src); 7561 } else if (shift == 64) { 7562 /* Rounding will propagate bit 63 into bit 64. */ 7563 tcg_gen_shri_i64(dst, src, 63); 7564 } else { 7565 TCGv_i64 rnd = tcg_temp_new_i64(); 7566 tcg_gen_extract_i64(rnd, src, shift - 1, 1); 7567 tcg_gen_shri_i64(dst, src, shift); 7568 tcg_gen_add_i64(dst, dst, rnd); 7569 } 7570 } 7571 7572 static void gen_srsra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7573 { 7574 gen_srshr_d(src, src, shift); 7575 tcg_gen_add_i64(dst, dst, src); 7576 } 7577 7578 static void gen_ursra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7579 { 7580 gen_urshr_d(src, src, shift); 7581 tcg_gen_add_i64(dst, dst, src); 7582 } 7583 7584 static void gen_sri_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7585 { 7586 /* If shift is 64, dst is unchanged. */ 7587 if (shift != 64) { 7588 tcg_gen_shri_i64(src, src, shift); 7589 tcg_gen_deposit_i64(dst, dst, src, 0, 64 - shift); 7590 } 7591 } 7592 7593 static void gen_sli_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7594 { 7595 tcg_gen_deposit_i64(dst, dst, src, shift, 64 - shift); 7596 } 7597 7598 static bool do_vec_shift_imm_narrow(DisasContext *s, arg_qrri_e *a, 7599 WideShiftImmFn * const fns[3], MemOp sign) 7600 { 7601 TCGv_i64 tcg_rn, tcg_rd; 7602 int esz = a->esz; 7603 int esize; 7604 WideShiftImmFn *fn; 7605 7606 tcg_debug_assert(esz >= MO_8 && esz <= MO_32); 7607 7608 if (!fp_access_check(s)) { 7609 return true; 7610 } 7611 7612 tcg_rn = tcg_temp_new_i64(); 7613 tcg_rd = tcg_temp_new_i64(); 7614 tcg_gen_movi_i64(tcg_rd, 0); 7615 7616 fn = fns[esz]; 7617 esize = 8 << esz; 7618 for (int i = 0, elements = 8 >> esz; i < elements; i++) { 7619 read_vec_element(s, tcg_rn, a->rn, i, (esz + 1) | sign); 7620 fn(tcg_rn, tcg_rn, a->imm); 7621 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, esize * i, esize); 7622 } 7623 7624 write_vec_element(s, tcg_rd, a->rd, a->q, MO_64); 7625 clear_vec_high(s, a->q, a->rd); 7626 return true; 7627 } 7628 7629 static void gen_sqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7630 { 7631 tcg_gen_sari_i64(d, s, i); 7632 tcg_gen_ext16u_i64(d, d); 7633 gen_helper_neon_narrow_sat_s8(d, tcg_env, d); 7634 } 7635 7636 static void gen_sqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7637 { 7638 tcg_gen_sari_i64(d, s, i); 7639 tcg_gen_ext32u_i64(d, d); 7640 gen_helper_neon_narrow_sat_s16(d, tcg_env, d); 7641 } 7642 7643 static void gen_sqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7644 { 7645 gen_sshr_d(d, s, i); 7646 gen_helper_neon_narrow_sat_s32(d, tcg_env, d); 7647 } 7648 7649 static void gen_uqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7650 { 7651 tcg_gen_shri_i64(d, s, i); 7652 gen_helper_neon_narrow_sat_u8(d, tcg_env, d); 7653 } 7654 7655 static void gen_uqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7656 { 7657 tcg_gen_shri_i64(d, s, i); 7658 gen_helper_neon_narrow_sat_u16(d, tcg_env, d); 7659 } 7660 7661 static void gen_uqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7662 { 7663 gen_ushr_d(d, s, i); 7664 gen_helper_neon_narrow_sat_u32(d, tcg_env, d); 7665 } 7666 7667 static void gen_sqshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7668 { 7669 tcg_gen_sari_i64(d, s, i); 7670 tcg_gen_ext16u_i64(d, d); 7671 gen_helper_neon_unarrow_sat8(d, tcg_env, d); 7672 } 7673 7674 static void gen_sqshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7675 { 7676 tcg_gen_sari_i64(d, s, i); 7677 tcg_gen_ext32u_i64(d, d); 7678 gen_helper_neon_unarrow_sat16(d, tcg_env, d); 7679 } 7680 7681 static void gen_sqshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7682 { 7683 gen_sshr_d(d, s, i); 7684 gen_helper_neon_unarrow_sat32(d, tcg_env, d); 7685 } 7686 7687 static void gen_sqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7688 { 7689 gen_srshr_bhs(d, s, i); 7690 tcg_gen_ext16u_i64(d, d); 7691 gen_helper_neon_narrow_sat_s8(d, tcg_env, d); 7692 } 7693 7694 static void gen_sqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7695 { 7696 gen_srshr_bhs(d, s, i); 7697 tcg_gen_ext32u_i64(d, d); 7698 gen_helper_neon_narrow_sat_s16(d, tcg_env, d); 7699 } 7700 7701 static void gen_sqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7702 { 7703 gen_srshr_d(d, s, i); 7704 gen_helper_neon_narrow_sat_s32(d, tcg_env, d); 7705 } 7706 7707 static void gen_uqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7708 { 7709 gen_urshr_bhs(d, s, i); 7710 gen_helper_neon_narrow_sat_u8(d, tcg_env, d); 7711 } 7712 7713 static void gen_uqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7714 { 7715 gen_urshr_bhs(d, s, i); 7716 gen_helper_neon_narrow_sat_u16(d, tcg_env, d); 7717 } 7718 7719 static void gen_uqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7720 { 7721 gen_urshr_d(d, s, i); 7722 gen_helper_neon_narrow_sat_u32(d, tcg_env, d); 7723 } 7724 7725 static void gen_sqrshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7726 { 7727 gen_srshr_bhs(d, s, i); 7728 tcg_gen_ext16u_i64(d, d); 7729 gen_helper_neon_unarrow_sat8(d, tcg_env, d); 7730 } 7731 7732 static void gen_sqrshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7733 { 7734 gen_srshr_bhs(d, s, i); 7735 tcg_gen_ext32u_i64(d, d); 7736 gen_helper_neon_unarrow_sat16(d, tcg_env, d); 7737 } 7738 7739 static void gen_sqrshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7740 { 7741 gen_srshr_d(d, s, i); 7742 gen_helper_neon_unarrow_sat32(d, tcg_env, d); 7743 } 7744 7745 static WideShiftImmFn * const shrn_fns[] = { 7746 tcg_gen_shri_i64, 7747 tcg_gen_shri_i64, 7748 gen_ushr_d, 7749 }; 7750 TRANS(SHRN_v, do_vec_shift_imm_narrow, a, shrn_fns, 0) 7751 7752 static WideShiftImmFn * const rshrn_fns[] = { 7753 gen_urshr_bhs, 7754 gen_urshr_bhs, 7755 gen_urshr_d, 7756 }; 7757 TRANS(RSHRN_v, do_vec_shift_imm_narrow, a, rshrn_fns, 0) 7758 7759 static WideShiftImmFn * const sqshrn_fns[] = { 7760 gen_sqshrn_b, 7761 gen_sqshrn_h, 7762 gen_sqshrn_s, 7763 }; 7764 TRANS(SQSHRN_v, do_vec_shift_imm_narrow, a, sqshrn_fns, MO_SIGN) 7765 7766 static WideShiftImmFn * const uqshrn_fns[] = { 7767 gen_uqshrn_b, 7768 gen_uqshrn_h, 7769 gen_uqshrn_s, 7770 }; 7771 TRANS(UQSHRN_v, do_vec_shift_imm_narrow, a, uqshrn_fns, 0) 7772 7773 static WideShiftImmFn * const sqshrun_fns[] = { 7774 gen_sqshrun_b, 7775 gen_sqshrun_h, 7776 gen_sqshrun_s, 7777 }; 7778 TRANS(SQSHRUN_v, do_vec_shift_imm_narrow, a, sqshrun_fns, MO_SIGN) 7779 7780 static WideShiftImmFn * const sqrshrn_fns[] = { 7781 gen_sqrshrn_b, 7782 gen_sqrshrn_h, 7783 gen_sqrshrn_s, 7784 }; 7785 TRANS(SQRSHRN_v, do_vec_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN) 7786 7787 static WideShiftImmFn * const uqrshrn_fns[] = { 7788 gen_uqrshrn_b, 7789 gen_uqrshrn_h, 7790 gen_uqrshrn_s, 7791 }; 7792 TRANS(UQRSHRN_v, do_vec_shift_imm_narrow, a, uqrshrn_fns, 0) 7793 7794 static WideShiftImmFn * const sqrshrun_fns[] = { 7795 gen_sqrshrun_b, 7796 gen_sqrshrun_h, 7797 gen_sqrshrun_s, 7798 }; 7799 TRANS(SQRSHRUN_v, do_vec_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN) 7800 7801 /* 7802 * Advanced SIMD Scalar Shift by Immediate 7803 */ 7804 7805 static bool do_scalar_shift_imm(DisasContext *s, arg_rri_e *a, 7806 WideShiftImmFn *fn, bool accumulate, 7807 MemOp sign) 7808 { 7809 if (fp_access_check(s)) { 7810 TCGv_i64 rd = tcg_temp_new_i64(); 7811 TCGv_i64 rn = tcg_temp_new_i64(); 7812 7813 read_vec_element(s, rn, a->rn, 0, a->esz | sign); 7814 if (accumulate) { 7815 read_vec_element(s, rd, a->rd, 0, a->esz | sign); 7816 } 7817 fn(rd, rn, a->imm); 7818 write_fp_dreg(s, a->rd, rd); 7819 } 7820 return true; 7821 } 7822 7823 TRANS(SSHR_s, do_scalar_shift_imm, a, gen_sshr_d, false, 0) 7824 TRANS(USHR_s, do_scalar_shift_imm, a, gen_ushr_d, false, 0) 7825 TRANS(SSRA_s, do_scalar_shift_imm, a, gen_ssra_d, true, 0) 7826 TRANS(USRA_s, do_scalar_shift_imm, a, gen_usra_d, true, 0) 7827 TRANS(SRSHR_s, do_scalar_shift_imm, a, gen_srshr_d, false, 0) 7828 TRANS(URSHR_s, do_scalar_shift_imm, a, gen_urshr_d, false, 0) 7829 TRANS(SRSRA_s, do_scalar_shift_imm, a, gen_srsra_d, true, 0) 7830 TRANS(URSRA_s, do_scalar_shift_imm, a, gen_ursra_d, true, 0) 7831 TRANS(SRI_s, do_scalar_shift_imm, a, gen_sri_d, true, 0) 7832 7833 TRANS(SHL_s, do_scalar_shift_imm, a, tcg_gen_shli_i64, false, 0) 7834 TRANS(SLI_s, do_scalar_shift_imm, a, gen_sli_d, true, 0) 7835 7836 static void trunc_i64_env_imm(TCGv_i64 d, TCGv_i64 s, int64_t i, 7837 NeonGenTwoOpEnvFn *fn) 7838 { 7839 TCGv_i32 t = tcg_temp_new_i32(); 7840 tcg_gen_extrl_i64_i32(t, s); 7841 fn(t, tcg_env, t, tcg_constant_i32(i)); 7842 tcg_gen_extu_i32_i64(d, t); 7843 } 7844 7845 static void gen_sqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7846 { 7847 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s8); 7848 } 7849 7850 static void gen_sqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7851 { 7852 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s16); 7853 } 7854 7855 static void gen_sqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7856 { 7857 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s32); 7858 } 7859 7860 static void gen_sqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i) 7861 { 7862 gen_helper_neon_qshl_s64(d, tcg_env, s, tcg_constant_i64(i)); 7863 } 7864 7865 static void gen_uqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7866 { 7867 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u8); 7868 } 7869 7870 static void gen_uqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7871 { 7872 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u16); 7873 } 7874 7875 static void gen_uqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7876 { 7877 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u32); 7878 } 7879 7880 static void gen_uqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i) 7881 { 7882 gen_helper_neon_qshl_u64(d, tcg_env, s, tcg_constant_i64(i)); 7883 } 7884 7885 static void gen_sqshlui_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7886 { 7887 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s8); 7888 } 7889 7890 static void gen_sqshlui_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7891 { 7892 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s16); 7893 } 7894 7895 static void gen_sqshlui_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7896 { 7897 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s32); 7898 } 7899 7900 static void gen_sqshlui_d(TCGv_i64 d, TCGv_i64 s, int64_t i) 7901 { 7902 gen_helper_neon_qshlu_s64(d, tcg_env, s, tcg_constant_i64(i)); 7903 } 7904 7905 static WideShiftImmFn * const f_scalar_sqshli[] = { 7906 gen_sqshli_b, gen_sqshli_h, gen_sqshli_s, gen_sqshli_d 7907 }; 7908 7909 static WideShiftImmFn * const f_scalar_uqshli[] = { 7910 gen_uqshli_b, gen_uqshli_h, gen_uqshli_s, gen_uqshli_d 7911 }; 7912 7913 static WideShiftImmFn * const f_scalar_sqshlui[] = { 7914 gen_sqshlui_b, gen_sqshlui_h, gen_sqshlui_s, gen_sqshlui_d 7915 }; 7916 7917 /* Note that the helpers sign-extend their inputs, so don't do it here. */ 7918 TRANS(SQSHL_si, do_scalar_shift_imm, a, f_scalar_sqshli[a->esz], false, 0) 7919 TRANS(UQSHL_si, do_scalar_shift_imm, a, f_scalar_uqshli[a->esz], false, 0) 7920 TRANS(SQSHLU_si, do_scalar_shift_imm, a, f_scalar_sqshlui[a->esz], false, 0) 7921 7922 static bool do_scalar_shift_imm_narrow(DisasContext *s, arg_rri_e *a, 7923 WideShiftImmFn * const fns[3], 7924 MemOp sign, bool zext) 7925 { 7926 MemOp esz = a->esz; 7927 7928 tcg_debug_assert(esz >= MO_8 && esz <= MO_32); 7929 7930 if (fp_access_check(s)) { 7931 TCGv_i64 rd = tcg_temp_new_i64(); 7932 TCGv_i64 rn = tcg_temp_new_i64(); 7933 7934 read_vec_element(s, rn, a->rn, 0, (esz + 1) | sign); 7935 fns[esz](rd, rn, a->imm); 7936 if (zext) { 7937 tcg_gen_ext_i64(rd, rd, esz); 7938 } 7939 write_fp_dreg(s, a->rd, rd); 7940 } 7941 return true; 7942 } 7943 7944 TRANS(SQSHRN_si, do_scalar_shift_imm_narrow, a, sqshrn_fns, MO_SIGN, true) 7945 TRANS(SQRSHRN_si, do_scalar_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN, true) 7946 TRANS(UQSHRN_si, do_scalar_shift_imm_narrow, a, uqshrn_fns, 0, false) 7947 TRANS(UQRSHRN_si, do_scalar_shift_imm_narrow, a, uqrshrn_fns, 0, false) 7948 TRANS(SQSHRUN_si, do_scalar_shift_imm_narrow, a, sqshrun_fns, MO_SIGN, false) 7949 TRANS(SQRSHRUN_si, do_scalar_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN, false) 7950 7951 static bool do_div(DisasContext *s, arg_rrr_sf *a, bool is_signed) 7952 { 7953 TCGv_i64 tcg_n, tcg_m, tcg_rd; 7954 tcg_rd = cpu_reg(s, a->rd); 7955 7956 if (!a->sf && is_signed) { 7957 tcg_n = tcg_temp_new_i64(); 7958 tcg_m = tcg_temp_new_i64(); 7959 tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, a->rn)); 7960 tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, a->rm)); 7961 } else { 7962 tcg_n = read_cpu_reg(s, a->rn, a->sf); 7963 tcg_m = read_cpu_reg(s, a->rm, a->sf); 7964 } 7965 7966 if (is_signed) { 7967 gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m); 7968 } else { 7969 gen_helper_udiv64(tcg_rd, tcg_n, tcg_m); 7970 } 7971 7972 if (!a->sf) { /* zero extend final result */ 7973 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 7974 } 7975 return true; 7976 } 7977 7978 TRANS(SDIV, do_div, a, true) 7979 TRANS(UDIV, do_div, a, false) 7980 7981 /* Shift a TCGv src by TCGv shift_amount, put result in dst. 7982 * Note that it is the caller's responsibility to ensure that the 7983 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM 7984 * mandated semantics for out of range shifts. 7985 */ 7986 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf, 7987 enum a64_shift_type shift_type, TCGv_i64 shift_amount) 7988 { 7989 switch (shift_type) { 7990 case A64_SHIFT_TYPE_LSL: 7991 tcg_gen_shl_i64(dst, src, shift_amount); 7992 break; 7993 case A64_SHIFT_TYPE_LSR: 7994 tcg_gen_shr_i64(dst, src, shift_amount); 7995 break; 7996 case A64_SHIFT_TYPE_ASR: 7997 if (!sf) { 7998 tcg_gen_ext32s_i64(dst, src); 7999 } 8000 tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount); 8001 break; 8002 case A64_SHIFT_TYPE_ROR: 8003 if (sf) { 8004 tcg_gen_rotr_i64(dst, src, shift_amount); 8005 } else { 8006 TCGv_i32 t0, t1; 8007 t0 = tcg_temp_new_i32(); 8008 t1 = tcg_temp_new_i32(); 8009 tcg_gen_extrl_i64_i32(t0, src); 8010 tcg_gen_extrl_i64_i32(t1, shift_amount); 8011 tcg_gen_rotr_i32(t0, t0, t1); 8012 tcg_gen_extu_i32_i64(dst, t0); 8013 } 8014 break; 8015 default: 8016 assert(FALSE); /* all shift types should be handled */ 8017 break; 8018 } 8019 8020 if (!sf) { /* zero extend final result */ 8021 tcg_gen_ext32u_i64(dst, dst); 8022 } 8023 } 8024 8025 /* Shift a TCGv src by immediate, put result in dst. 8026 * The shift amount must be in range (this should always be true as the 8027 * relevant instructions will UNDEF on bad shift immediates). 8028 */ 8029 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf, 8030 enum a64_shift_type shift_type, unsigned int shift_i) 8031 { 8032 assert(shift_i < (sf ? 64 : 32)); 8033 8034 if (shift_i == 0) { 8035 tcg_gen_mov_i64(dst, src); 8036 } else { 8037 shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i)); 8038 } 8039 } 8040 8041 static bool do_shift_reg(DisasContext *s, arg_rrr_sf *a, 8042 enum a64_shift_type shift_type) 8043 { 8044 TCGv_i64 tcg_shift = tcg_temp_new_i64(); 8045 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8046 TCGv_i64 tcg_rn = read_cpu_reg(s, a->rn, a->sf); 8047 8048 tcg_gen_andi_i64(tcg_shift, cpu_reg(s, a->rm), a->sf ? 63 : 31); 8049 shift_reg(tcg_rd, tcg_rn, a->sf, shift_type, tcg_shift); 8050 return true; 8051 } 8052 8053 TRANS(LSLV, do_shift_reg, a, A64_SHIFT_TYPE_LSL) 8054 TRANS(LSRV, do_shift_reg, a, A64_SHIFT_TYPE_LSR) 8055 TRANS(ASRV, do_shift_reg, a, A64_SHIFT_TYPE_ASR) 8056 TRANS(RORV, do_shift_reg, a, A64_SHIFT_TYPE_ROR) 8057 8058 static bool do_crc32(DisasContext *s, arg_rrr_e *a, bool crc32c) 8059 { 8060 TCGv_i64 tcg_acc, tcg_val, tcg_rd; 8061 TCGv_i32 tcg_bytes; 8062 8063 switch (a->esz) { 8064 case MO_8: 8065 case MO_16: 8066 case MO_32: 8067 tcg_val = tcg_temp_new_i64(); 8068 tcg_gen_extract_i64(tcg_val, cpu_reg(s, a->rm), 0, 8 << a->esz); 8069 break; 8070 case MO_64: 8071 tcg_val = cpu_reg(s, a->rm); 8072 break; 8073 default: 8074 g_assert_not_reached(); 8075 } 8076 tcg_acc = cpu_reg(s, a->rn); 8077 tcg_bytes = tcg_constant_i32(1 << a->esz); 8078 tcg_rd = cpu_reg(s, a->rd); 8079 8080 if (crc32c) { 8081 gen_helper_crc32c_64(tcg_rd, tcg_acc, tcg_val, tcg_bytes); 8082 } else { 8083 gen_helper_crc32_64(tcg_rd, tcg_acc, tcg_val, tcg_bytes); 8084 } 8085 return true; 8086 } 8087 8088 TRANS_FEAT(CRC32, aa64_crc32, do_crc32, a, false) 8089 TRANS_FEAT(CRC32C, aa64_crc32, do_crc32, a, true) 8090 8091 static bool do_subp(DisasContext *s, arg_rrr *a, bool setflag) 8092 { 8093 TCGv_i64 tcg_n = read_cpu_reg_sp(s, a->rn, true); 8094 TCGv_i64 tcg_m = read_cpu_reg_sp(s, a->rm, true); 8095 TCGv_i64 tcg_d = cpu_reg(s, a->rd); 8096 8097 tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56); 8098 tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56); 8099 8100 if (setflag) { 8101 gen_sub_CC(true, tcg_d, tcg_n, tcg_m); 8102 } else { 8103 tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m); 8104 } 8105 return true; 8106 } 8107 8108 TRANS_FEAT(SUBP, aa64_mte_insn_reg, do_subp, a, false) 8109 TRANS_FEAT(SUBPS, aa64_mte_insn_reg, do_subp, a, true) 8110 8111 static bool trans_IRG(DisasContext *s, arg_rrr *a) 8112 { 8113 if (dc_isar_feature(aa64_mte_insn_reg, s)) { 8114 TCGv_i64 tcg_rd = cpu_reg_sp(s, a->rd); 8115 TCGv_i64 tcg_rn = cpu_reg_sp(s, a->rn); 8116 8117 if (s->ata[0]) { 8118 gen_helper_irg(tcg_rd, tcg_env, tcg_rn, cpu_reg(s, a->rm)); 8119 } else { 8120 gen_address_with_allocation_tag0(tcg_rd, tcg_rn); 8121 } 8122 return true; 8123 } 8124 return false; 8125 } 8126 8127 static bool trans_GMI(DisasContext *s, arg_rrr *a) 8128 { 8129 if (dc_isar_feature(aa64_mte_insn_reg, s)) { 8130 TCGv_i64 t = tcg_temp_new_i64(); 8131 8132 tcg_gen_extract_i64(t, cpu_reg_sp(s, a->rn), 56, 4); 8133 tcg_gen_shl_i64(t, tcg_constant_i64(1), t); 8134 tcg_gen_or_i64(cpu_reg(s, a->rd), cpu_reg(s, a->rm), t); 8135 return true; 8136 } 8137 return false; 8138 } 8139 8140 static bool trans_PACGA(DisasContext *s, arg_rrr *a) 8141 { 8142 if (dc_isar_feature(aa64_pauth, s)) { 8143 gen_helper_pacga(cpu_reg(s, a->rd), tcg_env, 8144 cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm)); 8145 return true; 8146 } 8147 return false; 8148 } 8149 8150 typedef void ArithOneOp(TCGv_i64, TCGv_i64); 8151 8152 static bool gen_rr(DisasContext *s, int rd, int rn, ArithOneOp fn) 8153 { 8154 fn(cpu_reg(s, rd), cpu_reg(s, rn)); 8155 return true; 8156 } 8157 8158 static void gen_rbit32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8159 { 8160 TCGv_i32 t32 = tcg_temp_new_i32(); 8161 8162 tcg_gen_extrl_i64_i32(t32, tcg_rn); 8163 gen_helper_rbit(t32, t32); 8164 tcg_gen_extu_i32_i64(tcg_rd, t32); 8165 } 8166 8167 static void gen_rev16_xx(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 mask) 8168 { 8169 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 8170 8171 tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8); 8172 tcg_gen_and_i64(tcg_rd, tcg_rn, mask); 8173 tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask); 8174 tcg_gen_shli_i64(tcg_rd, tcg_rd, 8); 8175 tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp); 8176 } 8177 8178 static void gen_rev16_32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8179 { 8180 gen_rev16_xx(tcg_rd, tcg_rn, tcg_constant_i64(0x00ff00ff)); 8181 } 8182 8183 static void gen_rev16_64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8184 { 8185 gen_rev16_xx(tcg_rd, tcg_rn, tcg_constant_i64(0x00ff00ff00ff00ffull)); 8186 } 8187 8188 static void gen_rev_32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8189 { 8190 tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ); 8191 } 8192 8193 static void gen_rev32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8194 { 8195 tcg_gen_bswap64_i64(tcg_rd, tcg_rn); 8196 tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32); 8197 } 8198 8199 TRANS(RBIT, gen_rr, a->rd, a->rn, a->sf ? gen_helper_rbit64 : gen_rbit32) 8200 TRANS(REV16, gen_rr, a->rd, a->rn, a->sf ? gen_rev16_64 : gen_rev16_32) 8201 TRANS(REV32, gen_rr, a->rd, a->rn, a->sf ? gen_rev32 : gen_rev_32) 8202 TRANS(REV64, gen_rr, a->rd, a->rn, tcg_gen_bswap64_i64) 8203 8204 static void gen_clz32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8205 { 8206 TCGv_i32 t32 = tcg_temp_new_i32(); 8207 8208 tcg_gen_extrl_i64_i32(t32, tcg_rn); 8209 tcg_gen_clzi_i32(t32, t32, 32); 8210 tcg_gen_extu_i32_i64(tcg_rd, t32); 8211 } 8212 8213 static void gen_clz64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8214 { 8215 tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64); 8216 } 8217 8218 static void gen_cls32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8219 { 8220 TCGv_i32 t32 = tcg_temp_new_i32(); 8221 8222 tcg_gen_extrl_i64_i32(t32, tcg_rn); 8223 tcg_gen_clrsb_i32(t32, t32); 8224 tcg_gen_extu_i32_i64(tcg_rd, t32); 8225 } 8226 8227 TRANS(CLZ, gen_rr, a->rd, a->rn, a->sf ? gen_clz64 : gen_clz32) 8228 TRANS(CLS, gen_rr, a->rd, a->rn, a->sf ? tcg_gen_clrsb_i64 : gen_cls32) 8229 8230 static bool gen_pacaut(DisasContext *s, arg_pacaut *a, NeonGenTwo64OpEnvFn fn) 8231 { 8232 TCGv_i64 tcg_rd, tcg_rn; 8233 8234 if (a->z) { 8235 if (a->rn != 31) { 8236 return false; 8237 } 8238 tcg_rn = tcg_constant_i64(0); 8239 } else { 8240 tcg_rn = cpu_reg_sp(s, a->rn); 8241 } 8242 if (s->pauth_active) { 8243 tcg_rd = cpu_reg(s, a->rd); 8244 fn(tcg_rd, tcg_env, tcg_rd, tcg_rn); 8245 } 8246 return true; 8247 } 8248 8249 TRANS_FEAT(PACIA, aa64_pauth, gen_pacaut, a, gen_helper_pacia) 8250 TRANS_FEAT(PACIB, aa64_pauth, gen_pacaut, a, gen_helper_pacib) 8251 TRANS_FEAT(PACDA, aa64_pauth, gen_pacaut, a, gen_helper_pacda) 8252 TRANS_FEAT(PACDB, aa64_pauth, gen_pacaut, a, gen_helper_pacdb) 8253 8254 TRANS_FEAT(AUTIA, aa64_pauth, gen_pacaut, a, gen_helper_autia) 8255 TRANS_FEAT(AUTIB, aa64_pauth, gen_pacaut, a, gen_helper_autib) 8256 TRANS_FEAT(AUTDA, aa64_pauth, gen_pacaut, a, gen_helper_autda) 8257 TRANS_FEAT(AUTDB, aa64_pauth, gen_pacaut, a, gen_helper_autdb) 8258 8259 static bool do_xpac(DisasContext *s, int rd, NeonGenOne64OpEnvFn *fn) 8260 { 8261 if (s->pauth_active) { 8262 TCGv_i64 tcg_rd = cpu_reg(s, rd); 8263 fn(tcg_rd, tcg_env, tcg_rd); 8264 } 8265 return true; 8266 } 8267 8268 TRANS_FEAT(XPACI, aa64_pauth, do_xpac, a->rd, gen_helper_xpaci) 8269 TRANS_FEAT(XPACD, aa64_pauth, do_xpac, a->rd, gen_helper_xpacd) 8270 8271 static bool do_logic_reg(DisasContext *s, arg_logic_shift *a, 8272 ArithTwoOp *fn, ArithTwoOp *inv_fn, bool setflags) 8273 { 8274 TCGv_i64 tcg_rd, tcg_rn, tcg_rm; 8275 8276 if (!a->sf && (a->sa & (1 << 5))) { 8277 return false; 8278 } 8279 8280 tcg_rd = cpu_reg(s, a->rd); 8281 tcg_rn = cpu_reg(s, a->rn); 8282 8283 tcg_rm = read_cpu_reg(s, a->rm, a->sf); 8284 if (a->sa) { 8285 shift_reg_imm(tcg_rm, tcg_rm, a->sf, a->st, a->sa); 8286 } 8287 8288 (a->n ? inv_fn : fn)(tcg_rd, tcg_rn, tcg_rm); 8289 if (!a->sf) { 8290 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 8291 } 8292 if (setflags) { 8293 gen_logic_CC(a->sf, tcg_rd); 8294 } 8295 return true; 8296 } 8297 8298 static bool trans_ORR_r(DisasContext *s, arg_logic_shift *a) 8299 { 8300 /* 8301 * Unshifted ORR and ORN with WZR/XZR is the standard encoding for 8302 * register-register MOV and MVN, so it is worth special casing. 8303 */ 8304 if (a->sa == 0 && a->st == 0 && a->rn == 31) { 8305 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8306 TCGv_i64 tcg_rm = cpu_reg(s, a->rm); 8307 8308 if (a->n) { 8309 tcg_gen_not_i64(tcg_rd, tcg_rm); 8310 if (!a->sf) { 8311 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 8312 } 8313 } else { 8314 if (a->sf) { 8315 tcg_gen_mov_i64(tcg_rd, tcg_rm); 8316 } else { 8317 tcg_gen_ext32u_i64(tcg_rd, tcg_rm); 8318 } 8319 } 8320 return true; 8321 } 8322 8323 return do_logic_reg(s, a, tcg_gen_or_i64, tcg_gen_orc_i64, false); 8324 } 8325 8326 TRANS(AND_r, do_logic_reg, a, tcg_gen_and_i64, tcg_gen_andc_i64, false) 8327 TRANS(ANDS_r, do_logic_reg, a, tcg_gen_and_i64, tcg_gen_andc_i64, true) 8328 TRANS(EOR_r, do_logic_reg, a, tcg_gen_xor_i64, tcg_gen_eqv_i64, false) 8329 8330 static bool do_addsub_ext(DisasContext *s, arg_addsub_ext *a, 8331 bool sub_op, bool setflags) 8332 { 8333 TCGv_i64 tcg_rm, tcg_rn, tcg_rd, tcg_result; 8334 8335 if (a->sa > 4) { 8336 return false; 8337 } 8338 8339 /* non-flag setting ops may use SP */ 8340 if (!setflags) { 8341 tcg_rd = cpu_reg_sp(s, a->rd); 8342 } else { 8343 tcg_rd = cpu_reg(s, a->rd); 8344 } 8345 tcg_rn = read_cpu_reg_sp(s, a->rn, a->sf); 8346 8347 tcg_rm = read_cpu_reg(s, a->rm, a->sf); 8348 ext_and_shift_reg(tcg_rm, tcg_rm, a->st, a->sa); 8349 8350 tcg_result = tcg_temp_new_i64(); 8351 if (!setflags) { 8352 if (sub_op) { 8353 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm); 8354 } else { 8355 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm); 8356 } 8357 } else { 8358 if (sub_op) { 8359 gen_sub_CC(a->sf, tcg_result, tcg_rn, tcg_rm); 8360 } else { 8361 gen_add_CC(a->sf, tcg_result, tcg_rn, tcg_rm); 8362 } 8363 } 8364 8365 if (a->sf) { 8366 tcg_gen_mov_i64(tcg_rd, tcg_result); 8367 } else { 8368 tcg_gen_ext32u_i64(tcg_rd, tcg_result); 8369 } 8370 return true; 8371 } 8372 8373 TRANS(ADD_ext, do_addsub_ext, a, false, false) 8374 TRANS(SUB_ext, do_addsub_ext, a, true, false) 8375 TRANS(ADDS_ext, do_addsub_ext, a, false, true) 8376 TRANS(SUBS_ext, do_addsub_ext, a, true, true) 8377 8378 static bool do_addsub_reg(DisasContext *s, arg_addsub_shift *a, 8379 bool sub_op, bool setflags) 8380 { 8381 TCGv_i64 tcg_rd, tcg_rn, tcg_rm, tcg_result; 8382 8383 if (a->st == 3 || (!a->sf && (a->sa & 32))) { 8384 return false; 8385 } 8386 8387 tcg_rd = cpu_reg(s, a->rd); 8388 tcg_rn = read_cpu_reg(s, a->rn, a->sf); 8389 tcg_rm = read_cpu_reg(s, a->rm, a->sf); 8390 8391 shift_reg_imm(tcg_rm, tcg_rm, a->sf, a->st, a->sa); 8392 8393 tcg_result = tcg_temp_new_i64(); 8394 if (!setflags) { 8395 if (sub_op) { 8396 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm); 8397 } else { 8398 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm); 8399 } 8400 } else { 8401 if (sub_op) { 8402 gen_sub_CC(a->sf, tcg_result, tcg_rn, tcg_rm); 8403 } else { 8404 gen_add_CC(a->sf, tcg_result, tcg_rn, tcg_rm); 8405 } 8406 } 8407 8408 if (a->sf) { 8409 tcg_gen_mov_i64(tcg_rd, tcg_result); 8410 } else { 8411 tcg_gen_ext32u_i64(tcg_rd, tcg_result); 8412 } 8413 return true; 8414 } 8415 8416 TRANS(ADD_r, do_addsub_reg, a, false, false) 8417 TRANS(SUB_r, do_addsub_reg, a, true, false) 8418 TRANS(ADDS_r, do_addsub_reg, a, false, true) 8419 TRANS(SUBS_r, do_addsub_reg, a, true, true) 8420 8421 static bool do_mulh(DisasContext *s, arg_rrr *a, 8422 void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64)) 8423 { 8424 TCGv_i64 discard = tcg_temp_new_i64(); 8425 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8426 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 8427 TCGv_i64 tcg_rm = cpu_reg(s, a->rm); 8428 8429 fn(discard, tcg_rd, tcg_rn, tcg_rm); 8430 return true; 8431 } 8432 8433 TRANS(SMULH, do_mulh, a, tcg_gen_muls2_i64) 8434 TRANS(UMULH, do_mulh, a, tcg_gen_mulu2_i64) 8435 8436 static bool do_muladd(DisasContext *s, arg_rrrr *a, 8437 bool sf, bool is_sub, MemOp mop) 8438 { 8439 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8440 TCGv_i64 tcg_op1, tcg_op2; 8441 8442 if (mop == MO_64) { 8443 tcg_op1 = cpu_reg(s, a->rn); 8444 tcg_op2 = cpu_reg(s, a->rm); 8445 } else { 8446 tcg_op1 = tcg_temp_new_i64(); 8447 tcg_op2 = tcg_temp_new_i64(); 8448 tcg_gen_ext_i64(tcg_op1, cpu_reg(s, a->rn), mop); 8449 tcg_gen_ext_i64(tcg_op2, cpu_reg(s, a->rm), mop); 8450 } 8451 8452 if (a->ra == 31 && !is_sub) { 8453 /* Special-case MADD with rA == XZR; it is the standard MUL alias */ 8454 tcg_gen_mul_i64(tcg_rd, tcg_op1, tcg_op2); 8455 } else { 8456 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 8457 TCGv_i64 tcg_ra = cpu_reg(s, a->ra); 8458 8459 tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2); 8460 if (is_sub) { 8461 tcg_gen_sub_i64(tcg_rd, tcg_ra, tcg_tmp); 8462 } else { 8463 tcg_gen_add_i64(tcg_rd, tcg_ra, tcg_tmp); 8464 } 8465 } 8466 8467 if (!sf) { 8468 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 8469 } 8470 return true; 8471 } 8472 8473 TRANS(MADD_w, do_muladd, a, false, false, MO_64) 8474 TRANS(MSUB_w, do_muladd, a, false, true, MO_64) 8475 TRANS(MADD_x, do_muladd, a, true, false, MO_64) 8476 TRANS(MSUB_x, do_muladd, a, true, true, MO_64) 8477 8478 TRANS(SMADDL, do_muladd, a, true, false, MO_SL) 8479 TRANS(SMSUBL, do_muladd, a, true, true, MO_SL) 8480 TRANS(UMADDL, do_muladd, a, true, false, MO_UL) 8481 TRANS(UMSUBL, do_muladd, a, true, true, MO_UL) 8482 8483 static bool do_adc_sbc(DisasContext *s, arg_rrr_sf *a, 8484 bool is_sub, bool setflags) 8485 { 8486 TCGv_i64 tcg_y, tcg_rn, tcg_rd; 8487 8488 tcg_rd = cpu_reg(s, a->rd); 8489 tcg_rn = cpu_reg(s, a->rn); 8490 8491 if (is_sub) { 8492 tcg_y = tcg_temp_new_i64(); 8493 tcg_gen_not_i64(tcg_y, cpu_reg(s, a->rm)); 8494 } else { 8495 tcg_y = cpu_reg(s, a->rm); 8496 } 8497 8498 if (setflags) { 8499 gen_adc_CC(a->sf, tcg_rd, tcg_rn, tcg_y); 8500 } else { 8501 gen_adc(a->sf, tcg_rd, tcg_rn, tcg_y); 8502 } 8503 return true; 8504 } 8505 8506 TRANS(ADC, do_adc_sbc, a, false, false) 8507 TRANS(SBC, do_adc_sbc, a, true, false) 8508 TRANS(ADCS, do_adc_sbc, a, false, true) 8509 TRANS(SBCS, do_adc_sbc, a, true, true) 8510 8511 static bool trans_RMIF(DisasContext *s, arg_RMIF *a) 8512 { 8513 int mask = a->mask; 8514 TCGv_i64 tcg_rn; 8515 TCGv_i32 nzcv; 8516 8517 if (!dc_isar_feature(aa64_condm_4, s)) { 8518 return false; 8519 } 8520 8521 tcg_rn = read_cpu_reg(s, a->rn, 1); 8522 tcg_gen_rotri_i64(tcg_rn, tcg_rn, a->imm); 8523 8524 nzcv = tcg_temp_new_i32(); 8525 tcg_gen_extrl_i64_i32(nzcv, tcg_rn); 8526 8527 if (mask & 8) { /* N */ 8528 tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3); 8529 } 8530 if (mask & 4) { /* Z */ 8531 tcg_gen_not_i32(cpu_ZF, nzcv); 8532 tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4); 8533 } 8534 if (mask & 2) { /* C */ 8535 tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1); 8536 } 8537 if (mask & 1) { /* V */ 8538 tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0); 8539 } 8540 return true; 8541 } 8542 8543 static bool do_setf(DisasContext *s, int rn, int shift) 8544 { 8545 TCGv_i32 tmp = tcg_temp_new_i32(); 8546 8547 tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn)); 8548 tcg_gen_shli_i32(cpu_NF, tmp, shift); 8549 tcg_gen_shli_i32(cpu_VF, tmp, shift - 1); 8550 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 8551 tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF); 8552 return true; 8553 } 8554 8555 TRANS_FEAT(SETF8, aa64_condm_4, do_setf, a->rn, 24) 8556 TRANS_FEAT(SETF16, aa64_condm_4, do_setf, a->rn, 16) 8557 8558 /* CCMP, CCMN */ 8559 static bool trans_CCMP(DisasContext *s, arg_CCMP *a) 8560 { 8561 TCGv_i32 tcg_t0 = tcg_temp_new_i32(); 8562 TCGv_i32 tcg_t1 = tcg_temp_new_i32(); 8563 TCGv_i32 tcg_t2 = tcg_temp_new_i32(); 8564 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 8565 TCGv_i64 tcg_rn, tcg_y; 8566 DisasCompare c; 8567 unsigned nzcv; 8568 bool has_andc; 8569 8570 /* Set T0 = !COND. */ 8571 arm_test_cc(&c, a->cond); 8572 tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0); 8573 8574 /* Load the arguments for the new comparison. */ 8575 if (a->imm) { 8576 tcg_y = tcg_constant_i64(a->y); 8577 } else { 8578 tcg_y = cpu_reg(s, a->y); 8579 } 8580 tcg_rn = cpu_reg(s, a->rn); 8581 8582 /* Set the flags for the new comparison. */ 8583 if (a->op) { 8584 gen_sub_CC(a->sf, tcg_tmp, tcg_rn, tcg_y); 8585 } else { 8586 gen_add_CC(a->sf, tcg_tmp, tcg_rn, tcg_y); 8587 } 8588 8589 /* 8590 * If COND was false, force the flags to #nzcv. Compute two masks 8591 * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0). 8592 * For tcg hosts that support ANDC, we can make do with just T1. 8593 * In either case, allow the tcg optimizer to delete any unused mask. 8594 */ 8595 tcg_gen_neg_i32(tcg_t1, tcg_t0); 8596 tcg_gen_subi_i32(tcg_t2, tcg_t0, 1); 8597 8598 nzcv = a->nzcv; 8599 has_andc = tcg_op_supported(INDEX_op_andc, TCG_TYPE_I32, 0); 8600 if (nzcv & 8) { /* N */ 8601 tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1); 8602 } else { 8603 if (has_andc) { 8604 tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1); 8605 } else { 8606 tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2); 8607 } 8608 } 8609 if (nzcv & 4) { /* Z */ 8610 if (has_andc) { 8611 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1); 8612 } else { 8613 tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2); 8614 } 8615 } else { 8616 tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0); 8617 } 8618 if (nzcv & 2) { /* C */ 8619 tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0); 8620 } else { 8621 if (has_andc) { 8622 tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1); 8623 } else { 8624 tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2); 8625 } 8626 } 8627 if (nzcv & 1) { /* V */ 8628 tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1); 8629 } else { 8630 if (has_andc) { 8631 tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1); 8632 } else { 8633 tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2); 8634 } 8635 } 8636 return true; 8637 } 8638 8639 static bool trans_CSEL(DisasContext *s, arg_CSEL *a) 8640 { 8641 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8642 TCGv_i64 zero = tcg_constant_i64(0); 8643 DisasCompare64 c; 8644 8645 a64_test_cc(&c, a->cond); 8646 8647 if (a->rn == 31 && a->rm == 31 && (a->else_inc ^ a->else_inv)) { 8648 /* CSET & CSETM. */ 8649 if (a->else_inv) { 8650 tcg_gen_negsetcond_i64(tcg_invert_cond(c.cond), 8651 tcg_rd, c.value, zero); 8652 } else { 8653 tcg_gen_setcond_i64(tcg_invert_cond(c.cond), 8654 tcg_rd, c.value, zero); 8655 } 8656 } else { 8657 TCGv_i64 t_true = cpu_reg(s, a->rn); 8658 TCGv_i64 t_false = read_cpu_reg(s, a->rm, 1); 8659 8660 if (a->else_inv && a->else_inc) { 8661 tcg_gen_neg_i64(t_false, t_false); 8662 } else if (a->else_inv) { 8663 tcg_gen_not_i64(t_false, t_false); 8664 } else if (a->else_inc) { 8665 tcg_gen_addi_i64(t_false, t_false, 1); 8666 } 8667 tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false); 8668 } 8669 8670 if (!a->sf) { 8671 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 8672 } 8673 return true; 8674 } 8675 8676 typedef struct FPScalar1Int { 8677 void (*gen_h)(TCGv_i32, TCGv_i32); 8678 void (*gen_s)(TCGv_i32, TCGv_i32); 8679 void (*gen_d)(TCGv_i64, TCGv_i64); 8680 } FPScalar1Int; 8681 8682 static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a, 8683 const FPScalar1Int *f, 8684 bool merging) 8685 { 8686 switch (a->esz) { 8687 case MO_64: 8688 if (fp_access_check(s)) { 8689 TCGv_i64 t = read_fp_dreg(s, a->rn); 8690 f->gen_d(t, t); 8691 if (merging) { 8692 write_fp_dreg_merging(s, a->rd, a->rd, t); 8693 } else { 8694 write_fp_dreg(s, a->rd, t); 8695 } 8696 } 8697 break; 8698 case MO_32: 8699 if (fp_access_check(s)) { 8700 TCGv_i32 t = read_fp_sreg(s, a->rn); 8701 f->gen_s(t, t); 8702 if (merging) { 8703 write_fp_sreg_merging(s, a->rd, a->rd, t); 8704 } else { 8705 write_fp_sreg(s, a->rd, t); 8706 } 8707 } 8708 break; 8709 case MO_16: 8710 if (!dc_isar_feature(aa64_fp16, s)) { 8711 return false; 8712 } 8713 if (fp_access_check(s)) { 8714 TCGv_i32 t = read_fp_hreg(s, a->rn); 8715 f->gen_h(t, t); 8716 if (merging) { 8717 write_fp_hreg_merging(s, a->rd, a->rd, t); 8718 } else { 8719 write_fp_sreg(s, a->rd, t); 8720 } 8721 } 8722 break; 8723 default: 8724 return false; 8725 } 8726 return true; 8727 } 8728 8729 static bool do_fp1_scalar_int_2fn(DisasContext *s, arg_rr_e *a, 8730 const FPScalar1Int *fnormal, 8731 const FPScalar1Int *fah) 8732 { 8733 return do_fp1_scalar_int(s, a, s->fpcr_ah ? fah : fnormal, true); 8734 } 8735 8736 static const FPScalar1Int f_scalar_fmov = { 8737 tcg_gen_mov_i32, 8738 tcg_gen_mov_i32, 8739 tcg_gen_mov_i64, 8740 }; 8741 TRANS(FMOV_s, do_fp1_scalar_int, a, &f_scalar_fmov, false) 8742 8743 static const FPScalar1Int f_scalar_fabs = { 8744 gen_vfp_absh, 8745 gen_vfp_abss, 8746 gen_vfp_absd, 8747 }; 8748 static const FPScalar1Int f_scalar_ah_fabs = { 8749 gen_vfp_ah_absh, 8750 gen_vfp_ah_abss, 8751 gen_vfp_ah_absd, 8752 }; 8753 TRANS(FABS_s, do_fp1_scalar_int_2fn, a, &f_scalar_fabs, &f_scalar_ah_fabs) 8754 8755 static const FPScalar1Int f_scalar_fneg = { 8756 gen_vfp_negh, 8757 gen_vfp_negs, 8758 gen_vfp_negd, 8759 }; 8760 static const FPScalar1Int f_scalar_ah_fneg = { 8761 gen_vfp_ah_negh, 8762 gen_vfp_ah_negs, 8763 gen_vfp_ah_negd, 8764 }; 8765 TRANS(FNEG_s, do_fp1_scalar_int_2fn, a, &f_scalar_fneg, &f_scalar_ah_fneg) 8766 8767 typedef struct FPScalar1 { 8768 void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_ptr); 8769 void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_ptr); 8770 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_ptr); 8771 } FPScalar1; 8772 8773 static bool do_fp1_scalar_with_fpsttype(DisasContext *s, arg_rr_e *a, 8774 const FPScalar1 *f, int rmode, 8775 ARMFPStatusFlavour fpsttype) 8776 { 8777 TCGv_i32 tcg_rmode = NULL; 8778 TCGv_ptr fpst; 8779 TCGv_i64 t64; 8780 TCGv_i32 t32; 8781 int check = fp_access_check_scalar_hsd(s, a->esz); 8782 8783 if (check <= 0) { 8784 return check == 0; 8785 } 8786 8787 fpst = fpstatus_ptr(fpsttype); 8788 if (rmode >= 0) { 8789 tcg_rmode = gen_set_rmode(rmode, fpst); 8790 } 8791 8792 switch (a->esz) { 8793 case MO_64: 8794 t64 = read_fp_dreg(s, a->rn); 8795 f->gen_d(t64, t64, fpst); 8796 write_fp_dreg_merging(s, a->rd, a->rd, t64); 8797 break; 8798 case MO_32: 8799 t32 = read_fp_sreg(s, a->rn); 8800 f->gen_s(t32, t32, fpst); 8801 write_fp_sreg_merging(s, a->rd, a->rd, t32); 8802 break; 8803 case MO_16: 8804 t32 = read_fp_hreg(s, a->rn); 8805 f->gen_h(t32, t32, fpst); 8806 write_fp_hreg_merging(s, a->rd, a->rd, t32); 8807 break; 8808 default: 8809 g_assert_not_reached(); 8810 } 8811 8812 if (rmode >= 0) { 8813 gen_restore_rmode(tcg_rmode, fpst); 8814 } 8815 return true; 8816 } 8817 8818 static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a, 8819 const FPScalar1 *f, int rmode) 8820 { 8821 return do_fp1_scalar_with_fpsttype(s, a, f, rmode, 8822 a->esz == MO_16 ? 8823 FPST_A64_F16 : FPST_A64); 8824 } 8825 8826 static bool do_fp1_scalar_ah(DisasContext *s, arg_rr_e *a, 8827 const FPScalar1 *f, int rmode) 8828 { 8829 return do_fp1_scalar_with_fpsttype(s, a, f, rmode, select_ah_fpst(s, a->esz)); 8830 } 8831 8832 static const FPScalar1 f_scalar_fsqrt = { 8833 gen_helper_vfp_sqrth, 8834 gen_helper_vfp_sqrts, 8835 gen_helper_vfp_sqrtd, 8836 }; 8837 TRANS(FSQRT_s, do_fp1_scalar, a, &f_scalar_fsqrt, -1) 8838 8839 static const FPScalar1 f_scalar_frint = { 8840 gen_helper_advsimd_rinth, 8841 gen_helper_rints, 8842 gen_helper_rintd, 8843 }; 8844 TRANS(FRINTN_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_TIEEVEN) 8845 TRANS(FRINTP_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_POSINF) 8846 TRANS(FRINTM_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_NEGINF) 8847 TRANS(FRINTZ_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_ZERO) 8848 TRANS(FRINTA_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_TIEAWAY) 8849 TRANS(FRINTI_s, do_fp1_scalar, a, &f_scalar_frint, -1) 8850 8851 static const FPScalar1 f_scalar_frintx = { 8852 gen_helper_advsimd_rinth_exact, 8853 gen_helper_rints_exact, 8854 gen_helper_rintd_exact, 8855 }; 8856 TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1) 8857 8858 static bool trans_BFCVT_s(DisasContext *s, arg_rr_e *a) 8859 { 8860 ARMFPStatusFlavour fpsttype = s->fpcr_ah ? FPST_AH : FPST_A64; 8861 TCGv_i32 t32; 8862 int check; 8863 8864 if (!dc_isar_feature(aa64_bf16, s)) { 8865 return false; 8866 } 8867 8868 check = fp_access_check_scalar_hsd(s, a->esz); 8869 8870 if (check <= 0) { 8871 return check == 0; 8872 } 8873 8874 t32 = read_fp_sreg(s, a->rn); 8875 gen_helper_bfcvt(t32, t32, fpstatus_ptr(fpsttype)); 8876 write_fp_hreg_merging(s, a->rd, a->rd, t32); 8877 return true; 8878 } 8879 8880 static const FPScalar1 f_scalar_frint32 = { 8881 NULL, 8882 gen_helper_frint32_s, 8883 gen_helper_frint32_d, 8884 }; 8885 TRANS_FEAT(FRINT32Z_s, aa64_frint, do_fp1_scalar, a, 8886 &f_scalar_frint32, FPROUNDING_ZERO) 8887 TRANS_FEAT(FRINT32X_s, aa64_frint, do_fp1_scalar, a, &f_scalar_frint32, -1) 8888 8889 static const FPScalar1 f_scalar_frint64 = { 8890 NULL, 8891 gen_helper_frint64_s, 8892 gen_helper_frint64_d, 8893 }; 8894 TRANS_FEAT(FRINT64Z_s, aa64_frint, do_fp1_scalar, a, 8895 &f_scalar_frint64, FPROUNDING_ZERO) 8896 TRANS_FEAT(FRINT64X_s, aa64_frint, do_fp1_scalar, a, &f_scalar_frint64, -1) 8897 8898 static const FPScalar1 f_scalar_frecpe = { 8899 gen_helper_recpe_f16, 8900 gen_helper_recpe_f32, 8901 gen_helper_recpe_f64, 8902 }; 8903 static const FPScalar1 f_scalar_frecpe_rpres = { 8904 gen_helper_recpe_f16, 8905 gen_helper_recpe_rpres_f32, 8906 gen_helper_recpe_f64, 8907 }; 8908 TRANS(FRECPE_s, do_fp1_scalar_ah, a, 8909 s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? 8910 &f_scalar_frecpe_rpres : &f_scalar_frecpe, -1) 8911 8912 static const FPScalar1 f_scalar_frecpx = { 8913 gen_helper_frecpx_f16, 8914 gen_helper_frecpx_f32, 8915 gen_helper_frecpx_f64, 8916 }; 8917 TRANS(FRECPX_s, do_fp1_scalar_ah, a, &f_scalar_frecpx, -1) 8918 8919 static const FPScalar1 f_scalar_frsqrte = { 8920 gen_helper_rsqrte_f16, 8921 gen_helper_rsqrte_f32, 8922 gen_helper_rsqrte_f64, 8923 }; 8924 static const FPScalar1 f_scalar_frsqrte_rpres = { 8925 gen_helper_rsqrte_f16, 8926 gen_helper_rsqrte_rpres_f32, 8927 gen_helper_rsqrte_f64, 8928 }; 8929 TRANS(FRSQRTE_s, do_fp1_scalar_ah, a, 8930 s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? 8931 &f_scalar_frsqrte_rpres : &f_scalar_frsqrte, -1) 8932 8933 static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a) 8934 { 8935 if (fp_access_check(s)) { 8936 TCGv_i32 tcg_rn = read_fp_sreg(s, a->rn); 8937 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 8938 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 8939 8940 gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, fpst); 8941 write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd); 8942 } 8943 return true; 8944 } 8945 8946 static bool trans_FCVT_s_hs(DisasContext *s, arg_rr *a) 8947 { 8948 if (fp_access_check(s)) { 8949 TCGv_i32 tmp = read_fp_sreg(s, a->rn); 8950 TCGv_i32 ahp = get_ahp_flag(); 8951 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 8952 8953 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp); 8954 /* write_fp_hreg_merging is OK here because top half of result is zero */ 8955 write_fp_hreg_merging(s, a->rd, a->rd, tmp); 8956 } 8957 return true; 8958 } 8959 8960 static bool trans_FCVT_s_sd(DisasContext *s, arg_rr *a) 8961 { 8962 if (fp_access_check(s)) { 8963 TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn); 8964 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 8965 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 8966 8967 gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, fpst); 8968 write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd); 8969 } 8970 return true; 8971 } 8972 8973 static bool trans_FCVT_s_hd(DisasContext *s, arg_rr *a) 8974 { 8975 if (fp_access_check(s)) { 8976 TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn); 8977 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 8978 TCGv_i32 ahp = get_ahp_flag(); 8979 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 8980 8981 gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp); 8982 /* write_fp_hreg_merging is OK here because top half of tcg_rd is zero */ 8983 write_fp_hreg_merging(s, a->rd, a->rd, tcg_rd); 8984 } 8985 return true; 8986 } 8987 8988 static bool trans_FCVT_s_sh(DisasContext *s, arg_rr *a) 8989 { 8990 if (fp_access_check(s)) { 8991 TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn); 8992 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 8993 TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16); 8994 TCGv_i32 tcg_ahp = get_ahp_flag(); 8995 8996 gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); 8997 write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd); 8998 } 8999 return true; 9000 } 9001 9002 static bool trans_FCVT_s_dh(DisasContext *s, arg_rr *a) 9003 { 9004 if (fp_access_check(s)) { 9005 TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn); 9006 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 9007 TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16); 9008 TCGv_i32 tcg_ahp = get_ahp_flag(); 9009 9010 gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); 9011 write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd); 9012 } 9013 return true; 9014 } 9015 9016 static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift, 9017 TCGv_i64 tcg_int, bool is_signed) 9018 { 9019 TCGv_ptr tcg_fpstatus; 9020 TCGv_i32 tcg_shift, tcg_single; 9021 TCGv_i64 tcg_double; 9022 9023 tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); 9024 tcg_shift = tcg_constant_i32(shift); 9025 9026 switch (esz) { 9027 case MO_64: 9028 tcg_double = tcg_temp_new_i64(); 9029 if (is_signed) { 9030 gen_helper_vfp_sqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus); 9031 } else { 9032 gen_helper_vfp_uqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus); 9033 } 9034 write_fp_dreg_merging(s, rd, rd, tcg_double); 9035 break; 9036 9037 case MO_32: 9038 tcg_single = tcg_temp_new_i32(); 9039 if (is_signed) { 9040 gen_helper_vfp_sqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); 9041 } else { 9042 gen_helper_vfp_uqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); 9043 } 9044 write_fp_sreg_merging(s, rd, rd, tcg_single); 9045 break; 9046 9047 case MO_16: 9048 tcg_single = tcg_temp_new_i32(); 9049 if (is_signed) { 9050 gen_helper_vfp_sqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); 9051 } else { 9052 gen_helper_vfp_uqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); 9053 } 9054 write_fp_hreg_merging(s, rd, rd, tcg_single); 9055 break; 9056 9057 default: 9058 g_assert_not_reached(); 9059 } 9060 return true; 9061 } 9062 9063 static bool do_cvtf_g(DisasContext *s, arg_fcvt *a, bool is_signed) 9064 { 9065 TCGv_i64 tcg_int; 9066 int check = fp_access_check_scalar_hsd(s, a->esz); 9067 9068 if (check <= 0) { 9069 return check == 0; 9070 } 9071 9072 if (a->sf) { 9073 tcg_int = cpu_reg(s, a->rn); 9074 } else { 9075 tcg_int = read_cpu_reg(s, a->rn, true); 9076 if (is_signed) { 9077 tcg_gen_ext32s_i64(tcg_int, tcg_int); 9078 } else { 9079 tcg_gen_ext32u_i64(tcg_int, tcg_int); 9080 } 9081 } 9082 return do_cvtf_scalar(s, a->esz, a->rd, a->shift, tcg_int, is_signed); 9083 } 9084 9085 TRANS(SCVTF_g, do_cvtf_g, a, true) 9086 TRANS(UCVTF_g, do_cvtf_g, a, false) 9087 9088 /* 9089 * [US]CVTF (vector), scalar version. 9090 * Which sounds weird, but really just means input from fp register 9091 * instead of input from general register. Input and output element 9092 * size are always equal. 9093 */ 9094 static bool do_cvtf_f(DisasContext *s, arg_fcvt *a, bool is_signed) 9095 { 9096 TCGv_i64 tcg_int; 9097 int check = fp_access_check_scalar_hsd(s, a->esz); 9098 9099 if (check <= 0) { 9100 return check == 0; 9101 } 9102 9103 tcg_int = tcg_temp_new_i64(); 9104 read_vec_element(s, tcg_int, a->rn, 0, a->esz | (is_signed ? MO_SIGN : 0)); 9105 return do_cvtf_scalar(s, a->esz, a->rd, a->shift, tcg_int, is_signed); 9106 } 9107 9108 TRANS(SCVTF_f, do_cvtf_f, a, true) 9109 TRANS(UCVTF_f, do_cvtf_f, a, false) 9110 9111 static void do_fcvt_scalar(DisasContext *s, MemOp out, MemOp esz, 9112 TCGv_i64 tcg_out, int shift, int rn, 9113 ARMFPRounding rmode) 9114 { 9115 TCGv_ptr tcg_fpstatus; 9116 TCGv_i32 tcg_shift, tcg_rmode, tcg_single; 9117 9118 tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); 9119 tcg_shift = tcg_constant_i32(shift); 9120 tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); 9121 9122 switch (esz) { 9123 case MO_64: 9124 read_vec_element(s, tcg_out, rn, 0, MO_64); 9125 switch (out) { 9126 case MO_64 | MO_SIGN: 9127 gen_helper_vfp_tosqd(tcg_out, tcg_out, tcg_shift, tcg_fpstatus); 9128 break; 9129 case MO_64: 9130 gen_helper_vfp_touqd(tcg_out, tcg_out, tcg_shift, tcg_fpstatus); 9131 break; 9132 case MO_32 | MO_SIGN: 9133 gen_helper_vfp_tosld(tcg_out, tcg_out, tcg_shift, tcg_fpstatus); 9134 break; 9135 case MO_32: 9136 gen_helper_vfp_tould(tcg_out, tcg_out, tcg_shift, tcg_fpstatus); 9137 break; 9138 default: 9139 g_assert_not_reached(); 9140 } 9141 break; 9142 9143 case MO_32: 9144 tcg_single = read_fp_sreg(s, rn); 9145 switch (out) { 9146 case MO_64 | MO_SIGN: 9147 gen_helper_vfp_tosqs(tcg_out, tcg_single, tcg_shift, tcg_fpstatus); 9148 break; 9149 case MO_64: 9150 gen_helper_vfp_touqs(tcg_out, tcg_single, tcg_shift, tcg_fpstatus); 9151 break; 9152 case MO_32 | MO_SIGN: 9153 gen_helper_vfp_tosls(tcg_single, tcg_single, 9154 tcg_shift, tcg_fpstatus); 9155 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 9156 break; 9157 case MO_32: 9158 gen_helper_vfp_touls(tcg_single, tcg_single, 9159 tcg_shift, tcg_fpstatus); 9160 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 9161 break; 9162 default: 9163 g_assert_not_reached(); 9164 } 9165 break; 9166 9167 case MO_16: 9168 tcg_single = read_fp_hreg(s, rn); 9169 switch (out) { 9170 case MO_64 | MO_SIGN: 9171 gen_helper_vfp_tosqh(tcg_out, tcg_single, tcg_shift, tcg_fpstatus); 9172 break; 9173 case MO_64: 9174 gen_helper_vfp_touqh(tcg_out, tcg_single, tcg_shift, tcg_fpstatus); 9175 break; 9176 case MO_32 | MO_SIGN: 9177 gen_helper_vfp_toslh(tcg_single, tcg_single, 9178 tcg_shift, tcg_fpstatus); 9179 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 9180 break; 9181 case MO_32: 9182 gen_helper_vfp_toulh(tcg_single, tcg_single, 9183 tcg_shift, tcg_fpstatus); 9184 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 9185 break; 9186 case MO_16 | MO_SIGN: 9187 gen_helper_vfp_toshh(tcg_single, tcg_single, 9188 tcg_shift, tcg_fpstatus); 9189 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 9190 break; 9191 case MO_16: 9192 gen_helper_vfp_touhh(tcg_single, tcg_single, 9193 tcg_shift, tcg_fpstatus); 9194 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 9195 break; 9196 default: 9197 g_assert_not_reached(); 9198 } 9199 break; 9200 9201 default: 9202 g_assert_not_reached(); 9203 } 9204 9205 gen_restore_rmode(tcg_rmode, tcg_fpstatus); 9206 } 9207 9208 static bool do_fcvt_g(DisasContext *s, arg_fcvt *a, 9209 ARMFPRounding rmode, bool is_signed) 9210 { 9211 TCGv_i64 tcg_int; 9212 int check = fp_access_check_scalar_hsd(s, a->esz); 9213 9214 if (check <= 0) { 9215 return check == 0; 9216 } 9217 9218 tcg_int = cpu_reg(s, a->rd); 9219 do_fcvt_scalar(s, (a->sf ? MO_64 : MO_32) | (is_signed ? MO_SIGN : 0), 9220 a->esz, tcg_int, a->shift, a->rn, rmode); 9221 9222 if (!a->sf) { 9223 tcg_gen_ext32u_i64(tcg_int, tcg_int); 9224 } 9225 return true; 9226 } 9227 9228 TRANS(FCVTNS_g, do_fcvt_g, a, FPROUNDING_TIEEVEN, true) 9229 TRANS(FCVTNU_g, do_fcvt_g, a, FPROUNDING_TIEEVEN, false) 9230 TRANS(FCVTPS_g, do_fcvt_g, a, FPROUNDING_POSINF, true) 9231 TRANS(FCVTPU_g, do_fcvt_g, a, FPROUNDING_POSINF, false) 9232 TRANS(FCVTMS_g, do_fcvt_g, a, FPROUNDING_NEGINF, true) 9233 TRANS(FCVTMU_g, do_fcvt_g, a, FPROUNDING_NEGINF, false) 9234 TRANS(FCVTZS_g, do_fcvt_g, a, FPROUNDING_ZERO, true) 9235 TRANS(FCVTZU_g, do_fcvt_g, a, FPROUNDING_ZERO, false) 9236 TRANS(FCVTAS_g, do_fcvt_g, a, FPROUNDING_TIEAWAY, true) 9237 TRANS(FCVTAU_g, do_fcvt_g, a, FPROUNDING_TIEAWAY, false) 9238 9239 /* 9240 * FCVT* (vector), scalar version. 9241 * Which sounds weird, but really just means output to fp register 9242 * instead of output to general register. Input and output element 9243 * size are always equal. 9244 */ 9245 static bool do_fcvt_f(DisasContext *s, arg_fcvt *a, 9246 ARMFPRounding rmode, bool is_signed) 9247 { 9248 TCGv_i64 tcg_int; 9249 int check = fp_access_check_scalar_hsd(s, a->esz); 9250 9251 if (check <= 0) { 9252 return check == 0; 9253 } 9254 9255 tcg_int = tcg_temp_new_i64(); 9256 do_fcvt_scalar(s, a->esz | (is_signed ? MO_SIGN : 0), 9257 a->esz, tcg_int, a->shift, a->rn, rmode); 9258 9259 if (!s->fpcr_nep) { 9260 clear_vec(s, a->rd); 9261 } 9262 write_vec_element(s, tcg_int, a->rd, 0, a->esz); 9263 return true; 9264 } 9265 9266 TRANS(FCVTNS_f, do_fcvt_f, a, FPROUNDING_TIEEVEN, true) 9267 TRANS(FCVTNU_f, do_fcvt_f, a, FPROUNDING_TIEEVEN, false) 9268 TRANS(FCVTPS_f, do_fcvt_f, a, FPROUNDING_POSINF, true) 9269 TRANS(FCVTPU_f, do_fcvt_f, a, FPROUNDING_POSINF, false) 9270 TRANS(FCVTMS_f, do_fcvt_f, a, FPROUNDING_NEGINF, true) 9271 TRANS(FCVTMU_f, do_fcvt_f, a, FPROUNDING_NEGINF, false) 9272 TRANS(FCVTZS_f, do_fcvt_f, a, FPROUNDING_ZERO, true) 9273 TRANS(FCVTZU_f, do_fcvt_f, a, FPROUNDING_ZERO, false) 9274 TRANS(FCVTAS_f, do_fcvt_f, a, FPROUNDING_TIEAWAY, true) 9275 TRANS(FCVTAU_f, do_fcvt_f, a, FPROUNDING_TIEAWAY, false) 9276 9277 static bool trans_FJCVTZS(DisasContext *s, arg_FJCVTZS *a) 9278 { 9279 if (!dc_isar_feature(aa64_jscvt, s)) { 9280 return false; 9281 } 9282 if (fp_access_check(s)) { 9283 TCGv_i64 t = read_fp_dreg(s, a->rn); 9284 TCGv_ptr fpstatus = fpstatus_ptr(FPST_A64); 9285 9286 gen_helper_fjcvtzs(t, t, fpstatus); 9287 9288 tcg_gen_ext32u_i64(cpu_reg(s, a->rd), t); 9289 tcg_gen_extrh_i64_i32(cpu_ZF, t); 9290 tcg_gen_movi_i32(cpu_CF, 0); 9291 tcg_gen_movi_i32(cpu_NF, 0); 9292 tcg_gen_movi_i32(cpu_VF, 0); 9293 } 9294 return true; 9295 } 9296 9297 static bool trans_FMOV_hx(DisasContext *s, arg_rr *a) 9298 { 9299 if (!dc_isar_feature(aa64_fp16, s)) { 9300 return false; 9301 } 9302 if (fp_access_check(s)) { 9303 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 9304 TCGv_i64 tmp = tcg_temp_new_i64(); 9305 tcg_gen_ext16u_i64(tmp, tcg_rn); 9306 write_fp_dreg(s, a->rd, tmp); 9307 } 9308 return true; 9309 } 9310 9311 static bool trans_FMOV_sw(DisasContext *s, arg_rr *a) 9312 { 9313 if (fp_access_check(s)) { 9314 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 9315 TCGv_i64 tmp = tcg_temp_new_i64(); 9316 tcg_gen_ext32u_i64(tmp, tcg_rn); 9317 write_fp_dreg(s, a->rd, tmp); 9318 } 9319 return true; 9320 } 9321 9322 static bool trans_FMOV_dx(DisasContext *s, arg_rr *a) 9323 { 9324 if (fp_access_check(s)) { 9325 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 9326 write_fp_dreg(s, a->rd, tcg_rn); 9327 } 9328 return true; 9329 } 9330 9331 static bool trans_FMOV_ux(DisasContext *s, arg_rr *a) 9332 { 9333 if (fp_access_check(s)) { 9334 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 9335 tcg_gen_st_i64(tcg_rn, tcg_env, fp_reg_hi_offset(s, a->rd)); 9336 clear_vec_high(s, true, a->rd); 9337 } 9338 return true; 9339 } 9340 9341 static bool trans_FMOV_xh(DisasContext *s, arg_rr *a) 9342 { 9343 if (!dc_isar_feature(aa64_fp16, s)) { 9344 return false; 9345 } 9346 if (fp_access_check(s)) { 9347 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 9348 tcg_gen_ld16u_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_16)); 9349 } 9350 return true; 9351 } 9352 9353 static bool trans_FMOV_ws(DisasContext *s, arg_rr *a) 9354 { 9355 if (fp_access_check(s)) { 9356 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 9357 tcg_gen_ld32u_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_32)); 9358 } 9359 return true; 9360 } 9361 9362 static bool trans_FMOV_xd(DisasContext *s, arg_rr *a) 9363 { 9364 if (fp_access_check(s)) { 9365 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 9366 tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_64)); 9367 } 9368 return true; 9369 } 9370 9371 static bool trans_FMOV_xu(DisasContext *s, arg_rr *a) 9372 { 9373 if (fp_access_check(s)) { 9374 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 9375 tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_hi_offset(s, a->rn)); 9376 } 9377 return true; 9378 } 9379 9380 typedef struct ENVScalar1 { 9381 NeonGenOneOpEnvFn *gen_bhs[3]; 9382 NeonGenOne64OpEnvFn *gen_d; 9383 } ENVScalar1; 9384 9385 static bool do_env_scalar1(DisasContext *s, arg_rr_e *a, const ENVScalar1 *f) 9386 { 9387 if (!fp_access_check(s)) { 9388 return true; 9389 } 9390 if (a->esz == MO_64) { 9391 TCGv_i64 t = read_fp_dreg(s, a->rn); 9392 f->gen_d(t, tcg_env, t); 9393 write_fp_dreg(s, a->rd, t); 9394 } else { 9395 TCGv_i32 t = tcg_temp_new_i32(); 9396 9397 read_vec_element_i32(s, t, a->rn, 0, a->esz); 9398 f->gen_bhs[a->esz](t, tcg_env, t); 9399 write_fp_sreg(s, a->rd, t); 9400 } 9401 return true; 9402 } 9403 9404 static bool do_env_vector1(DisasContext *s, arg_qrr_e *a, const ENVScalar1 *f) 9405 { 9406 if (a->esz == MO_64 && !a->q) { 9407 return false; 9408 } 9409 if (!fp_access_check(s)) { 9410 return true; 9411 } 9412 if (a->esz == MO_64) { 9413 TCGv_i64 t = tcg_temp_new_i64(); 9414 9415 for (int i = 0; i < 2; ++i) { 9416 read_vec_element(s, t, a->rn, i, MO_64); 9417 f->gen_d(t, tcg_env, t); 9418 write_vec_element(s, t, a->rd, i, MO_64); 9419 } 9420 } else { 9421 TCGv_i32 t = tcg_temp_new_i32(); 9422 int n = (a->q ? 16 : 8) >> a->esz; 9423 9424 for (int i = 0; i < n; ++i) { 9425 read_vec_element_i32(s, t, a->rn, i, a->esz); 9426 f->gen_bhs[a->esz](t, tcg_env, t); 9427 write_vec_element_i32(s, t, a->rd, i, a->esz); 9428 } 9429 } 9430 clear_vec_high(s, a->q, a->rd); 9431 return true; 9432 } 9433 9434 static const ENVScalar1 f_scalar_sqabs = { 9435 { gen_helper_neon_qabs_s8, 9436 gen_helper_neon_qabs_s16, 9437 gen_helper_neon_qabs_s32 }, 9438 gen_helper_neon_qabs_s64, 9439 }; 9440 TRANS(SQABS_s, do_env_scalar1, a, &f_scalar_sqabs) 9441 TRANS(SQABS_v, do_env_vector1, a, &f_scalar_sqabs) 9442 9443 static const ENVScalar1 f_scalar_sqneg = { 9444 { gen_helper_neon_qneg_s8, 9445 gen_helper_neon_qneg_s16, 9446 gen_helper_neon_qneg_s32 }, 9447 gen_helper_neon_qneg_s64, 9448 }; 9449 TRANS(SQNEG_s, do_env_scalar1, a, &f_scalar_sqneg) 9450 TRANS(SQNEG_v, do_env_vector1, a, &f_scalar_sqneg) 9451 9452 static bool do_scalar1_d(DisasContext *s, arg_rr *a, ArithOneOp *f) 9453 { 9454 if (fp_access_check(s)) { 9455 TCGv_i64 t = read_fp_dreg(s, a->rn); 9456 f(t, t); 9457 write_fp_dreg(s, a->rd, t); 9458 } 9459 return true; 9460 } 9461 9462 TRANS(ABS_s, do_scalar1_d, a, tcg_gen_abs_i64) 9463 TRANS(NEG_s, do_scalar1_d, a, tcg_gen_neg_i64) 9464 9465 static bool do_cmop0_d(DisasContext *s, arg_rr *a, TCGCond cond) 9466 { 9467 if (fp_access_check(s)) { 9468 TCGv_i64 t = read_fp_dreg(s, a->rn); 9469 tcg_gen_negsetcond_i64(cond, t, t, tcg_constant_i64(0)); 9470 write_fp_dreg(s, a->rd, t); 9471 } 9472 return true; 9473 } 9474 9475 TRANS(CMGT0_s, do_cmop0_d, a, TCG_COND_GT) 9476 TRANS(CMGE0_s, do_cmop0_d, a, TCG_COND_GE) 9477 TRANS(CMLE0_s, do_cmop0_d, a, TCG_COND_LE) 9478 TRANS(CMLT0_s, do_cmop0_d, a, TCG_COND_LT) 9479 TRANS(CMEQ0_s, do_cmop0_d, a, TCG_COND_EQ) 9480 9481 static bool do_2misc_narrow_scalar(DisasContext *s, arg_rr_e *a, 9482 ArithOneOp * const fn[3]) 9483 { 9484 if (a->esz == MO_64) { 9485 return false; 9486 } 9487 if (fp_access_check(s)) { 9488 TCGv_i64 t = tcg_temp_new_i64(); 9489 9490 read_vec_element(s, t, a->rn, 0, a->esz + 1); 9491 fn[a->esz](t, t); 9492 clear_vec(s, a->rd); 9493 write_vec_element(s, t, a->rd, 0, a->esz); 9494 } 9495 return true; 9496 } 9497 9498 #define WRAP_ENV(NAME) \ 9499 static void gen_##NAME(TCGv_i64 d, TCGv_i64 n) \ 9500 { gen_helper_##NAME(d, tcg_env, n); } 9501 9502 WRAP_ENV(neon_unarrow_sat8) 9503 WRAP_ENV(neon_unarrow_sat16) 9504 WRAP_ENV(neon_unarrow_sat32) 9505 9506 static ArithOneOp * const f_scalar_sqxtun[] = { 9507 gen_neon_unarrow_sat8, 9508 gen_neon_unarrow_sat16, 9509 gen_neon_unarrow_sat32, 9510 }; 9511 TRANS(SQXTUN_s, do_2misc_narrow_scalar, a, f_scalar_sqxtun) 9512 9513 WRAP_ENV(neon_narrow_sat_s8) 9514 WRAP_ENV(neon_narrow_sat_s16) 9515 WRAP_ENV(neon_narrow_sat_s32) 9516 9517 static ArithOneOp * const f_scalar_sqxtn[] = { 9518 gen_neon_narrow_sat_s8, 9519 gen_neon_narrow_sat_s16, 9520 gen_neon_narrow_sat_s32, 9521 }; 9522 TRANS(SQXTN_s, do_2misc_narrow_scalar, a, f_scalar_sqxtn) 9523 9524 WRAP_ENV(neon_narrow_sat_u8) 9525 WRAP_ENV(neon_narrow_sat_u16) 9526 WRAP_ENV(neon_narrow_sat_u32) 9527 9528 static ArithOneOp * const f_scalar_uqxtn[] = { 9529 gen_neon_narrow_sat_u8, 9530 gen_neon_narrow_sat_u16, 9531 gen_neon_narrow_sat_u32, 9532 }; 9533 TRANS(UQXTN_s, do_2misc_narrow_scalar, a, f_scalar_uqxtn) 9534 9535 static bool trans_FCVTXN_s(DisasContext *s, arg_rr_e *a) 9536 { 9537 if (fp_access_check(s)) { 9538 /* 9539 * 64 bit to 32 bit float conversion 9540 * with von Neumann rounding (round to odd) 9541 */ 9542 TCGv_i64 src = read_fp_dreg(s, a->rn); 9543 TCGv_i32 dst = tcg_temp_new_i32(); 9544 gen_helper_fcvtx_f64_to_f32(dst, src, fpstatus_ptr(FPST_A64)); 9545 write_fp_sreg_merging(s, a->rd, a->rd, dst); 9546 } 9547 return true; 9548 } 9549 9550 #undef WRAP_ENV 9551 9552 static bool do_gvec_fn2(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn) 9553 { 9554 if (!a->q && a->esz == MO_64) { 9555 return false; 9556 } 9557 if (fp_access_check(s)) { 9558 gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz); 9559 } 9560 return true; 9561 } 9562 9563 TRANS(ABS_v, do_gvec_fn2, a, tcg_gen_gvec_abs) 9564 TRANS(NEG_v, do_gvec_fn2, a, tcg_gen_gvec_neg) 9565 TRANS(NOT_v, do_gvec_fn2, a, tcg_gen_gvec_not) 9566 TRANS(CNT_v, do_gvec_fn2, a, gen_gvec_cnt) 9567 TRANS(RBIT_v, do_gvec_fn2, a, gen_gvec_rbit) 9568 TRANS(CMGT0_v, do_gvec_fn2, a, gen_gvec_cgt0) 9569 TRANS(CMGE0_v, do_gvec_fn2, a, gen_gvec_cge0) 9570 TRANS(CMLT0_v, do_gvec_fn2, a, gen_gvec_clt0) 9571 TRANS(CMLE0_v, do_gvec_fn2, a, gen_gvec_cle0) 9572 TRANS(CMEQ0_v, do_gvec_fn2, a, gen_gvec_ceq0) 9573 TRANS(REV16_v, do_gvec_fn2, a, gen_gvec_rev16) 9574 TRANS(REV32_v, do_gvec_fn2, a, gen_gvec_rev32) 9575 TRANS(URECPE_v, do_gvec_fn2, a, gen_gvec_urecpe) 9576 TRANS(URSQRTE_v, do_gvec_fn2, a, gen_gvec_ursqrte) 9577 9578 static bool do_gvec_fn2_bhs(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn) 9579 { 9580 if (a->esz == MO_64) { 9581 return false; 9582 } 9583 if (fp_access_check(s)) { 9584 gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz); 9585 } 9586 return true; 9587 } 9588 9589 TRANS(CLS_v, do_gvec_fn2_bhs, a, gen_gvec_cls) 9590 TRANS(CLZ_v, do_gvec_fn2_bhs, a, gen_gvec_clz) 9591 TRANS(REV64_v, do_gvec_fn2_bhs, a, gen_gvec_rev64) 9592 TRANS(SADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_saddlp) 9593 TRANS(UADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_uaddlp) 9594 TRANS(SADALP_v, do_gvec_fn2_bhs, a, gen_gvec_sadalp) 9595 TRANS(UADALP_v, do_gvec_fn2_bhs, a, gen_gvec_uadalp) 9596 9597 static bool do_2misc_narrow_vector(DisasContext *s, arg_qrr_e *a, 9598 ArithOneOp * const fn[3]) 9599 { 9600 if (a->esz == MO_64) { 9601 return false; 9602 } 9603 if (fp_access_check(s)) { 9604 TCGv_i64 t0 = tcg_temp_new_i64(); 9605 TCGv_i64 t1 = tcg_temp_new_i64(); 9606 9607 read_vec_element(s, t0, a->rn, 0, MO_64); 9608 read_vec_element(s, t1, a->rn, 1, MO_64); 9609 fn[a->esz](t0, t0); 9610 fn[a->esz](t1, t1); 9611 write_vec_element(s, t0, a->rd, a->q ? 2 : 0, MO_32); 9612 write_vec_element(s, t1, a->rd, a->q ? 3 : 1, MO_32); 9613 clear_vec_high(s, a->q, a->rd); 9614 } 9615 return true; 9616 } 9617 9618 static ArithOneOp * const f_scalar_xtn[] = { 9619 gen_helper_neon_narrow_u8, 9620 gen_helper_neon_narrow_u16, 9621 tcg_gen_ext32u_i64, 9622 }; 9623 TRANS(XTN, do_2misc_narrow_vector, a, f_scalar_xtn) 9624 TRANS(SQXTUN_v, do_2misc_narrow_vector, a, f_scalar_sqxtun) 9625 TRANS(SQXTN_v, do_2misc_narrow_vector, a, f_scalar_sqxtn) 9626 TRANS(UQXTN_v, do_2misc_narrow_vector, a, f_scalar_uqxtn) 9627 9628 static void gen_fcvtn_hs(TCGv_i64 d, TCGv_i64 n) 9629 { 9630 TCGv_i32 tcg_lo = tcg_temp_new_i32(); 9631 TCGv_i32 tcg_hi = tcg_temp_new_i32(); 9632 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 9633 TCGv_i32 ahp = get_ahp_flag(); 9634 9635 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, n); 9636 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp); 9637 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp); 9638 tcg_gen_deposit_i32(tcg_lo, tcg_lo, tcg_hi, 16, 16); 9639 tcg_gen_extu_i32_i64(d, tcg_lo); 9640 } 9641 9642 static void gen_fcvtn_sd(TCGv_i64 d, TCGv_i64 n) 9643 { 9644 TCGv_i32 tmp = tcg_temp_new_i32(); 9645 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 9646 9647 gen_helper_vfp_fcvtsd(tmp, n, fpst); 9648 tcg_gen_extu_i32_i64(d, tmp); 9649 } 9650 9651 static void gen_fcvtxn_sd(TCGv_i64 d, TCGv_i64 n) 9652 { 9653 /* 9654 * 64 bit to 32 bit float conversion 9655 * with von Neumann rounding (round to odd) 9656 */ 9657 TCGv_i32 tmp = tcg_temp_new_i32(); 9658 gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_A64)); 9659 tcg_gen_extu_i32_i64(d, tmp); 9660 } 9661 9662 static ArithOneOp * const f_vector_fcvtn[] = { 9663 NULL, 9664 gen_fcvtn_hs, 9665 gen_fcvtn_sd, 9666 }; 9667 static ArithOneOp * const f_scalar_fcvtxn[] = { 9668 NULL, 9669 NULL, 9670 gen_fcvtxn_sd, 9671 }; 9672 TRANS(FCVTN_v, do_2misc_narrow_vector, a, f_vector_fcvtn) 9673 TRANS(FCVTXN_v, do_2misc_narrow_vector, a, f_scalar_fcvtxn) 9674 9675 static void gen_bfcvtn_hs(TCGv_i64 d, TCGv_i64 n) 9676 { 9677 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 9678 TCGv_i32 tmp = tcg_temp_new_i32(); 9679 gen_helper_bfcvt_pair(tmp, n, fpst); 9680 tcg_gen_extu_i32_i64(d, tmp); 9681 } 9682 9683 static void gen_bfcvtn_ah_hs(TCGv_i64 d, TCGv_i64 n) 9684 { 9685 TCGv_ptr fpst = fpstatus_ptr(FPST_AH); 9686 TCGv_i32 tmp = tcg_temp_new_i32(); 9687 gen_helper_bfcvt_pair(tmp, n, fpst); 9688 tcg_gen_extu_i32_i64(d, tmp); 9689 } 9690 9691 static ArithOneOp * const f_vector_bfcvtn[2][3] = { 9692 { 9693 NULL, 9694 gen_bfcvtn_hs, 9695 NULL, 9696 }, { 9697 NULL, 9698 gen_bfcvtn_ah_hs, 9699 NULL, 9700 } 9701 }; 9702 TRANS_FEAT(BFCVTN_v, aa64_bf16, do_2misc_narrow_vector, a, 9703 f_vector_bfcvtn[s->fpcr_ah]) 9704 9705 static bool trans_SHLL_v(DisasContext *s, arg_qrr_e *a) 9706 { 9707 static NeonGenWidenFn * const widenfns[3] = { 9708 gen_helper_neon_widen_u8, 9709 gen_helper_neon_widen_u16, 9710 tcg_gen_extu_i32_i64, 9711 }; 9712 NeonGenWidenFn *widenfn; 9713 TCGv_i64 tcg_res[2]; 9714 TCGv_i32 tcg_op; 9715 int part, pass; 9716 9717 if (a->esz == MO_64) { 9718 return false; 9719 } 9720 if (!fp_access_check(s)) { 9721 return true; 9722 } 9723 9724 tcg_op = tcg_temp_new_i32(); 9725 widenfn = widenfns[a->esz]; 9726 part = a->q ? 2 : 0; 9727 9728 for (pass = 0; pass < 2; pass++) { 9729 read_vec_element_i32(s, tcg_op, a->rn, part + pass, MO_32); 9730 tcg_res[pass] = tcg_temp_new_i64(); 9731 widenfn(tcg_res[pass], tcg_op); 9732 tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << a->esz); 9733 } 9734 9735 for (pass = 0; pass < 2; pass++) { 9736 write_vec_element(s, tcg_res[pass], a->rd, pass, MO_64); 9737 } 9738 return true; 9739 } 9740 9741 static bool do_fabs_fneg_v(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn) 9742 { 9743 int check = fp_access_check_vector_hsd(s, a->q, a->esz); 9744 9745 if (check <= 0) { 9746 return check == 0; 9747 } 9748 9749 gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz); 9750 return true; 9751 } 9752 9753 TRANS(FABS_v, do_fabs_fneg_v, a, gen_gvec_fabs) 9754 TRANS(FNEG_v, do_fabs_fneg_v, a, gen_gvec_fneg) 9755 9756 static bool do_fp1_vector(DisasContext *s, arg_qrr_e *a, 9757 const FPScalar1 *f, int rmode) 9758 { 9759 TCGv_i32 tcg_rmode = NULL; 9760 TCGv_ptr fpst; 9761 int check = fp_access_check_vector_hsd(s, a->q, a->esz); 9762 9763 if (check <= 0) { 9764 return check == 0; 9765 } 9766 9767 fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); 9768 if (rmode >= 0) { 9769 tcg_rmode = gen_set_rmode(rmode, fpst); 9770 } 9771 9772 if (a->esz == MO_64) { 9773 TCGv_i64 t64 = tcg_temp_new_i64(); 9774 9775 for (int pass = 0; pass < 2; ++pass) { 9776 read_vec_element(s, t64, a->rn, pass, MO_64); 9777 f->gen_d(t64, t64, fpst); 9778 write_vec_element(s, t64, a->rd, pass, MO_64); 9779 } 9780 } else { 9781 TCGv_i32 t32 = tcg_temp_new_i32(); 9782 void (*gen)(TCGv_i32, TCGv_i32, TCGv_ptr) 9783 = (a->esz == MO_16 ? f->gen_h : f->gen_s); 9784 9785 for (int pass = 0, n = (a->q ? 16 : 8) >> a->esz; pass < n; ++pass) { 9786 read_vec_element_i32(s, t32, a->rn, pass, a->esz); 9787 gen(t32, t32, fpst); 9788 write_vec_element_i32(s, t32, a->rd, pass, a->esz); 9789 } 9790 } 9791 clear_vec_high(s, a->q, a->rd); 9792 9793 if (rmode >= 0) { 9794 gen_restore_rmode(tcg_rmode, fpst); 9795 } 9796 return true; 9797 } 9798 9799 TRANS(FSQRT_v, do_fp1_vector, a, &f_scalar_fsqrt, -1) 9800 9801 TRANS(FRINTN_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_TIEEVEN) 9802 TRANS(FRINTP_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_POSINF) 9803 TRANS(FRINTM_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_NEGINF) 9804 TRANS(FRINTZ_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_ZERO) 9805 TRANS(FRINTA_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_TIEAWAY) 9806 TRANS(FRINTI_v, do_fp1_vector, a, &f_scalar_frint, -1) 9807 TRANS(FRINTX_v, do_fp1_vector, a, &f_scalar_frintx, -1) 9808 9809 TRANS_FEAT(FRINT32Z_v, aa64_frint, do_fp1_vector, a, 9810 &f_scalar_frint32, FPROUNDING_ZERO) 9811 TRANS_FEAT(FRINT32X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint32, -1) 9812 TRANS_FEAT(FRINT64Z_v, aa64_frint, do_fp1_vector, a, 9813 &f_scalar_frint64, FPROUNDING_ZERO) 9814 TRANS_FEAT(FRINT64X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint64, -1) 9815 9816 static bool do_gvec_op2_fpst_with_fpsttype(DisasContext *s, MemOp esz, 9817 bool is_q, int rd, int rn, int data, 9818 gen_helper_gvec_2_ptr * const fns[3], 9819 ARMFPStatusFlavour fpsttype) 9820 { 9821 int check = fp_access_check_vector_hsd(s, is_q, esz); 9822 TCGv_ptr fpst; 9823 9824 if (check <= 0) { 9825 return check == 0; 9826 } 9827 9828 fpst = fpstatus_ptr(fpsttype); 9829 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd), 9830 vec_full_reg_offset(s, rn), fpst, 9831 is_q ? 16 : 8, vec_full_reg_size(s), 9832 data, fns[esz - 1]); 9833 return true; 9834 } 9835 9836 static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q, 9837 int rd, int rn, int data, 9838 gen_helper_gvec_2_ptr * const fns[3]) 9839 { 9840 return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data, fns, 9841 esz == MO_16 ? FPST_A64_F16 : 9842 FPST_A64); 9843 } 9844 9845 static bool do_gvec_op2_ah_fpst(DisasContext *s, MemOp esz, bool is_q, 9846 int rd, int rn, int data, 9847 gen_helper_gvec_2_ptr * const fns[3]) 9848 { 9849 return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data, 9850 fns, select_ah_fpst(s, esz)); 9851 } 9852 9853 static gen_helper_gvec_2_ptr * const f_scvtf_v[] = { 9854 gen_helper_gvec_vcvt_sh, 9855 gen_helper_gvec_vcvt_sf, 9856 gen_helper_gvec_vcvt_sd, 9857 }; 9858 TRANS(SCVTF_vi, do_gvec_op2_fpst, 9859 a->esz, a->q, a->rd, a->rn, 0, f_scvtf_v) 9860 TRANS(SCVTF_vf, do_gvec_op2_fpst, 9861 a->esz, a->q, a->rd, a->rn, a->shift, f_scvtf_v) 9862 9863 static gen_helper_gvec_2_ptr * const f_ucvtf_v[] = { 9864 gen_helper_gvec_vcvt_uh, 9865 gen_helper_gvec_vcvt_uf, 9866 gen_helper_gvec_vcvt_ud, 9867 }; 9868 TRANS(UCVTF_vi, do_gvec_op2_fpst, 9869 a->esz, a->q, a->rd, a->rn, 0, f_ucvtf_v) 9870 TRANS(UCVTF_vf, do_gvec_op2_fpst, 9871 a->esz, a->q, a->rd, a->rn, a->shift, f_ucvtf_v) 9872 9873 static gen_helper_gvec_2_ptr * const f_fcvtzs_vf[] = { 9874 gen_helper_gvec_vcvt_rz_hs, 9875 gen_helper_gvec_vcvt_rz_fs, 9876 gen_helper_gvec_vcvt_rz_ds, 9877 }; 9878 TRANS(FCVTZS_vf, do_gvec_op2_fpst, 9879 a->esz, a->q, a->rd, a->rn, a->shift, f_fcvtzs_vf) 9880 9881 static gen_helper_gvec_2_ptr * const f_fcvtzu_vf[] = { 9882 gen_helper_gvec_vcvt_rz_hu, 9883 gen_helper_gvec_vcvt_rz_fu, 9884 gen_helper_gvec_vcvt_rz_du, 9885 }; 9886 TRANS(FCVTZU_vf, do_gvec_op2_fpst, 9887 a->esz, a->q, a->rd, a->rn, a->shift, f_fcvtzu_vf) 9888 9889 static gen_helper_gvec_2_ptr * const f_fcvt_s_vi[] = { 9890 gen_helper_gvec_vcvt_rm_sh, 9891 gen_helper_gvec_vcvt_rm_ss, 9892 gen_helper_gvec_vcvt_rm_sd, 9893 }; 9894 9895 static gen_helper_gvec_2_ptr * const f_fcvt_u_vi[] = { 9896 gen_helper_gvec_vcvt_rm_uh, 9897 gen_helper_gvec_vcvt_rm_us, 9898 gen_helper_gvec_vcvt_rm_ud, 9899 }; 9900 9901 TRANS(FCVTNS_vi, do_gvec_op2_fpst, 9902 a->esz, a->q, a->rd, a->rn, float_round_nearest_even, f_fcvt_s_vi) 9903 TRANS(FCVTNU_vi, do_gvec_op2_fpst, 9904 a->esz, a->q, a->rd, a->rn, float_round_nearest_even, f_fcvt_u_vi) 9905 TRANS(FCVTPS_vi, do_gvec_op2_fpst, 9906 a->esz, a->q, a->rd, a->rn, float_round_up, f_fcvt_s_vi) 9907 TRANS(FCVTPU_vi, do_gvec_op2_fpst, 9908 a->esz, a->q, a->rd, a->rn, float_round_up, f_fcvt_u_vi) 9909 TRANS(FCVTMS_vi, do_gvec_op2_fpst, 9910 a->esz, a->q, a->rd, a->rn, float_round_down, f_fcvt_s_vi) 9911 TRANS(FCVTMU_vi, do_gvec_op2_fpst, 9912 a->esz, a->q, a->rd, a->rn, float_round_down, f_fcvt_u_vi) 9913 TRANS(FCVTZS_vi, do_gvec_op2_fpst, 9914 a->esz, a->q, a->rd, a->rn, float_round_to_zero, f_fcvt_s_vi) 9915 TRANS(FCVTZU_vi, do_gvec_op2_fpst, 9916 a->esz, a->q, a->rd, a->rn, float_round_to_zero, f_fcvt_u_vi) 9917 TRANS(FCVTAS_vi, do_gvec_op2_fpst, 9918 a->esz, a->q, a->rd, a->rn, float_round_ties_away, f_fcvt_s_vi) 9919 TRANS(FCVTAU_vi, do_gvec_op2_fpst, 9920 a->esz, a->q, a->rd, a->rn, float_round_ties_away, f_fcvt_u_vi) 9921 9922 static gen_helper_gvec_2_ptr * const f_fceq0[] = { 9923 gen_helper_gvec_fceq0_h, 9924 gen_helper_gvec_fceq0_s, 9925 gen_helper_gvec_fceq0_d, 9926 }; 9927 TRANS(FCMEQ0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fceq0) 9928 9929 static gen_helper_gvec_2_ptr * const f_fcgt0[] = { 9930 gen_helper_gvec_fcgt0_h, 9931 gen_helper_gvec_fcgt0_s, 9932 gen_helper_gvec_fcgt0_d, 9933 }; 9934 TRANS(FCMGT0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcgt0) 9935 9936 static gen_helper_gvec_2_ptr * const f_fcge0[] = { 9937 gen_helper_gvec_fcge0_h, 9938 gen_helper_gvec_fcge0_s, 9939 gen_helper_gvec_fcge0_d, 9940 }; 9941 TRANS(FCMGE0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcge0) 9942 9943 static gen_helper_gvec_2_ptr * const f_fclt0[] = { 9944 gen_helper_gvec_fclt0_h, 9945 gen_helper_gvec_fclt0_s, 9946 gen_helper_gvec_fclt0_d, 9947 }; 9948 TRANS(FCMLT0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fclt0) 9949 9950 static gen_helper_gvec_2_ptr * const f_fcle0[] = { 9951 gen_helper_gvec_fcle0_h, 9952 gen_helper_gvec_fcle0_s, 9953 gen_helper_gvec_fcle0_d, 9954 }; 9955 TRANS(FCMLE0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcle0) 9956 9957 static gen_helper_gvec_2_ptr * const f_frecpe[] = { 9958 gen_helper_gvec_frecpe_h, 9959 gen_helper_gvec_frecpe_s, 9960 gen_helper_gvec_frecpe_d, 9961 }; 9962 static gen_helper_gvec_2_ptr * const f_frecpe_rpres[] = { 9963 gen_helper_gvec_frecpe_h, 9964 gen_helper_gvec_frecpe_rpres_s, 9965 gen_helper_gvec_frecpe_d, 9966 }; 9967 TRANS(FRECPE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, 9968 s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frecpe_rpres : f_frecpe) 9969 9970 static gen_helper_gvec_2_ptr * const f_frsqrte[] = { 9971 gen_helper_gvec_frsqrte_h, 9972 gen_helper_gvec_frsqrte_s, 9973 gen_helper_gvec_frsqrte_d, 9974 }; 9975 static gen_helper_gvec_2_ptr * const f_frsqrte_rpres[] = { 9976 gen_helper_gvec_frsqrte_h, 9977 gen_helper_gvec_frsqrte_rpres_s, 9978 gen_helper_gvec_frsqrte_d, 9979 }; 9980 TRANS(FRSQRTE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, 9981 s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frsqrte_rpres : f_frsqrte) 9982 9983 static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a) 9984 { 9985 /* Handle 2-reg-misc ops which are widening (so each size element 9986 * in the source becomes a 2*size element in the destination. 9987 * The only instruction like this is FCVTL. 9988 */ 9989 int pass; 9990 TCGv_ptr fpst; 9991 9992 if (!fp_access_check(s)) { 9993 return true; 9994 } 9995 9996 if (a->esz == MO_64) { 9997 /* 32 -> 64 bit fp conversion */ 9998 TCGv_i64 tcg_res[2]; 9999 TCGv_i32 tcg_op = tcg_temp_new_i32(); 10000 int srcelt = a->q ? 2 : 0; 10001 10002 fpst = fpstatus_ptr(FPST_A64); 10003 10004 for (pass = 0; pass < 2; pass++) { 10005 tcg_res[pass] = tcg_temp_new_i64(); 10006 read_vec_element_i32(s, tcg_op, a->rn, srcelt + pass, MO_32); 10007 gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, fpst); 10008 } 10009 for (pass = 0; pass < 2; pass++) { 10010 write_vec_element(s, tcg_res[pass], a->rd, pass, MO_64); 10011 } 10012 } else { 10013 /* 16 -> 32 bit fp conversion */ 10014 int srcelt = a->q ? 4 : 0; 10015 TCGv_i32 tcg_res[4]; 10016 TCGv_i32 ahp = get_ahp_flag(); 10017 10018 fpst = fpstatus_ptr(FPST_A64_F16); 10019 10020 for (pass = 0; pass < 4; pass++) { 10021 tcg_res[pass] = tcg_temp_new_i32(); 10022 read_vec_element_i32(s, tcg_res[pass], a->rn, srcelt + pass, MO_16); 10023 gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass], 10024 fpst, ahp); 10025 } 10026 for (pass = 0; pass < 4; pass++) { 10027 write_vec_element_i32(s, tcg_res[pass], a->rd, pass, MO_32); 10028 } 10029 } 10030 clear_vec_high(s, true, a->rd); 10031 return true; 10032 } 10033 10034 static bool trans_OK(DisasContext *s, arg_OK *a) 10035 { 10036 return true; 10037 } 10038 10039 static bool trans_FAIL(DisasContext *s, arg_OK *a) 10040 { 10041 s->is_nonstreaming = true; 10042 return true; 10043 } 10044 10045 /** 10046 * btype_destination_ok: 10047 * @insn: The instruction at the branch destination 10048 * @bt: SCTLR_ELx.BT 10049 * @btype: PSTATE.BTYPE, and is non-zero 10050 * 10051 * On a guarded page, there are a limited number of insns 10052 * that may be present at the branch target: 10053 * - branch target identifiers, 10054 * - paciasp, pacibsp, 10055 * - BRK insn 10056 * - HLT insn 10057 * Anything else causes a Branch Target Exception. 10058 * 10059 * Return true if the branch is compatible, false to raise BTITRAP. 10060 */ 10061 static bool btype_destination_ok(uint32_t insn, bool bt, int btype) 10062 { 10063 if ((insn & 0xfffff01fu) == 0xd503201fu) { 10064 /* HINT space */ 10065 switch (extract32(insn, 5, 7)) { 10066 case 0b011001: /* PACIASP */ 10067 case 0b011011: /* PACIBSP */ 10068 /* 10069 * If SCTLR_ELx.BT, then PACI*SP are not compatible 10070 * with btype == 3. Otherwise all btype are ok. 10071 */ 10072 return !bt || btype != 3; 10073 case 0b100000: /* BTI */ 10074 /* Not compatible with any btype. */ 10075 return false; 10076 case 0b100010: /* BTI c */ 10077 /* Not compatible with btype == 3 */ 10078 return btype != 3; 10079 case 0b100100: /* BTI j */ 10080 /* Not compatible with btype == 2 */ 10081 return btype != 2; 10082 case 0b100110: /* BTI jc */ 10083 /* Compatible with any btype. */ 10084 return true; 10085 } 10086 } else { 10087 switch (insn & 0xffe0001fu) { 10088 case 0xd4200000u: /* BRK */ 10089 case 0xd4400000u: /* HLT */ 10090 /* Give priority to the breakpoint exception. */ 10091 return true; 10092 } 10093 } 10094 return false; 10095 } 10096 10097 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, 10098 CPUState *cpu) 10099 { 10100 DisasContext *dc = container_of(dcbase, DisasContext, base); 10101 CPUARMState *env = cpu_env(cpu); 10102 ARMCPU *arm_cpu = env_archcpu(env); 10103 CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb); 10104 int bound, core_mmu_idx; 10105 10106 dc->isar = &arm_cpu->isar; 10107 dc->condjmp = 0; 10108 dc->pc_save = dc->base.pc_first; 10109 dc->aarch64 = true; 10110 dc->thumb = false; 10111 dc->sctlr_b = 0; 10112 dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE; 10113 dc->condexec_mask = 0; 10114 dc->condexec_cond = 0; 10115 core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX); 10116 dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx); 10117 dc->tbii = EX_TBFLAG_A64(tb_flags, TBII); 10118 dc->tbid = EX_TBFLAG_A64(tb_flags, TBID); 10119 dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA); 10120 dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx); 10121 #if !defined(CONFIG_USER_ONLY) 10122 dc->user = (dc->current_el == 0); 10123 #endif 10124 dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL); 10125 dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM); 10126 dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL); 10127 dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE); 10128 dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC); 10129 dc->trap_eret = EX_TBFLAG_A64(tb_flags, TRAP_ERET); 10130 dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL); 10131 dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL); 10132 dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16; 10133 dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16; 10134 dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE); 10135 dc->bt = EX_TBFLAG_A64(tb_flags, BT); 10136 dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE); 10137 dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV); 10138 dc->ata[0] = EX_TBFLAG_A64(tb_flags, ATA); 10139 dc->ata[1] = EX_TBFLAG_A64(tb_flags, ATA0); 10140 dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE); 10141 dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE); 10142 dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM); 10143 dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA); 10144 dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING); 10145 dc->naa = EX_TBFLAG_A64(tb_flags, NAA); 10146 dc->nv = EX_TBFLAG_A64(tb_flags, NV); 10147 dc->nv1 = EX_TBFLAG_A64(tb_flags, NV1); 10148 dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2); 10149 dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20); 10150 dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE); 10151 dc->fpcr_ah = EX_TBFLAG_A64(tb_flags, AH); 10152 dc->fpcr_nep = EX_TBFLAG_A64(tb_flags, NEP); 10153 dc->vec_len = 0; 10154 dc->vec_stride = 0; 10155 dc->cp_regs = arm_cpu->cp_regs; 10156 dc->features = env->features; 10157 dc->dcz_blocksize = arm_cpu->dcz_blocksize; 10158 dc->gm_blocksize = arm_cpu->gm_blocksize; 10159 10160 #ifdef CONFIG_USER_ONLY 10161 /* In sve_probe_page, we assume TBI is enabled. */ 10162 tcg_debug_assert(dc->tbid & 1); 10163 #endif 10164 10165 dc->lse2 = dc_isar_feature(aa64_lse2, dc); 10166 10167 /* Single step state. The code-generation logic here is: 10168 * SS_ACTIVE == 0: 10169 * generate code with no special handling for single-stepping (except 10170 * that anything that can make us go to SS_ACTIVE == 1 must end the TB; 10171 * this happens anyway because those changes are all system register or 10172 * PSTATE writes). 10173 * SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending) 10174 * emit code for one insn 10175 * emit code to clear PSTATE.SS 10176 * emit code to generate software step exception for completed step 10177 * end TB (as usual for having generated an exception) 10178 * SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending) 10179 * emit code to generate a software step exception 10180 * end the TB 10181 */ 10182 dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE); 10183 dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS); 10184 dc->is_ldex = false; 10185 10186 /* Bound the number of insns to execute to those left on the page. */ 10187 bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4; 10188 10189 /* If architectural single step active, limit to 1. */ 10190 if (dc->ss_active) { 10191 bound = 1; 10192 } 10193 dc->base.max_insns = MIN(dc->base.max_insns, bound); 10194 } 10195 10196 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu) 10197 { 10198 } 10199 10200 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) 10201 { 10202 DisasContext *dc = container_of(dcbase, DisasContext, base); 10203 target_ulong pc_arg = dc->base.pc_next; 10204 10205 if (tb_cflags(dcbase->tb) & CF_PCREL) { 10206 pc_arg &= ~TARGET_PAGE_MASK; 10207 } 10208 tcg_gen_insn_start(pc_arg, 0, 0); 10209 dc->insn_start_updated = false; 10210 } 10211 10212 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) 10213 { 10214 DisasContext *s = container_of(dcbase, DisasContext, base); 10215 CPUARMState *env = cpu_env(cpu); 10216 uint64_t pc = s->base.pc_next; 10217 uint32_t insn; 10218 10219 /* Singlestep exceptions have the highest priority. */ 10220 if (s->ss_active && !s->pstate_ss) { 10221 /* Singlestep state is Active-pending. 10222 * If we're in this state at the start of a TB then either 10223 * a) we just took an exception to an EL which is being debugged 10224 * and this is the first insn in the exception handler 10225 * b) debug exceptions were masked and we just unmasked them 10226 * without changing EL (eg by clearing PSTATE.D) 10227 * In either case we're going to take a swstep exception in the 10228 * "did not step an insn" case, and so the syndrome ISV and EX 10229 * bits should be zero. 10230 */ 10231 assert(s->base.num_insns == 1); 10232 gen_swstep_exception(s, 0, 0); 10233 s->base.is_jmp = DISAS_NORETURN; 10234 s->base.pc_next = pc + 4; 10235 return; 10236 } 10237 10238 if (pc & 3) { 10239 /* 10240 * PC alignment fault. This has priority over the instruction abort 10241 * that we would receive from a translation fault via arm_ldl_code. 10242 * This should only be possible after an indirect branch, at the 10243 * start of the TB. 10244 */ 10245 assert(s->base.num_insns == 1); 10246 gen_helper_exception_pc_alignment(tcg_env, tcg_constant_tl(pc)); 10247 s->base.is_jmp = DISAS_NORETURN; 10248 s->base.pc_next = QEMU_ALIGN_UP(pc, 4); 10249 return; 10250 } 10251 10252 s->pc_curr = pc; 10253 insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b); 10254 s->insn = insn; 10255 s->base.pc_next = pc + 4; 10256 10257 s->fp_access_checked = 0; 10258 s->sve_access_checked = 0; 10259 10260 if (s->pstate_il) { 10261 /* 10262 * Illegal execution state. This has priority over BTI 10263 * exceptions, but comes after instruction abort exceptions. 10264 */ 10265 gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate()); 10266 return; 10267 } 10268 10269 if (dc_isar_feature(aa64_bti, s)) { 10270 if (s->base.num_insns == 1) { 10271 /* First insn can have btype set to non-zero. */ 10272 tcg_debug_assert(s->btype >= 0); 10273 10274 /* 10275 * Note that the Branch Target Exception has fairly high 10276 * priority -- below debugging exceptions but above most 10277 * everything else. This allows us to handle this now 10278 * instead of waiting until the insn is otherwise decoded. 10279 * 10280 * We can check all but the guarded page check here; 10281 * defer the latter to a helper. 10282 */ 10283 if (s->btype != 0 10284 && !btype_destination_ok(insn, s->bt, s->btype)) { 10285 gen_helper_guarded_page_check(tcg_env); 10286 } 10287 } else { 10288 /* Not the first insn: btype must be 0. */ 10289 tcg_debug_assert(s->btype == 0); 10290 } 10291 } 10292 10293 s->is_nonstreaming = false; 10294 if (s->sme_trap_nonstreaming) { 10295 disas_sme_fa64(s, insn); 10296 } 10297 10298 if (!disas_a64(s, insn) && 10299 !disas_sme(s, insn) && 10300 !disas_sve(s, insn)) { 10301 unallocated_encoding(s); 10302 } 10303 10304 /* 10305 * After execution of most insns, btype is reset to 0. 10306 * Note that we set btype == -1 when the insn sets btype. 10307 */ 10308 if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) { 10309 reset_btype(s); 10310 } 10311 } 10312 10313 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) 10314 { 10315 DisasContext *dc = container_of(dcbase, DisasContext, base); 10316 10317 if (unlikely(dc->ss_active)) { 10318 /* Note that this means single stepping WFI doesn't halt the CPU. 10319 * For conditional branch insns this is harmless unreachable code as 10320 * gen_goto_tb() has already handled emitting the debug exception 10321 * (and thus a tb-jump is not possible when singlestepping). 10322 */ 10323 switch (dc->base.is_jmp) { 10324 default: 10325 gen_a64_update_pc(dc, 4); 10326 /* fall through */ 10327 case DISAS_EXIT: 10328 case DISAS_JUMP: 10329 gen_step_complete_exception(dc); 10330 break; 10331 case DISAS_NORETURN: 10332 break; 10333 } 10334 } else { 10335 switch (dc->base.is_jmp) { 10336 case DISAS_NEXT: 10337 case DISAS_TOO_MANY: 10338 gen_goto_tb(dc, 1, 4); 10339 break; 10340 default: 10341 case DISAS_UPDATE_EXIT: 10342 gen_a64_update_pc(dc, 4); 10343 /* fall through */ 10344 case DISAS_EXIT: 10345 tcg_gen_exit_tb(NULL, 0); 10346 break; 10347 case DISAS_UPDATE_NOCHAIN: 10348 gen_a64_update_pc(dc, 4); 10349 /* fall through */ 10350 case DISAS_JUMP: 10351 tcg_gen_lookup_and_goto_ptr(); 10352 break; 10353 case DISAS_NORETURN: 10354 case DISAS_SWI: 10355 break; 10356 case DISAS_WFE: 10357 gen_a64_update_pc(dc, 4); 10358 gen_helper_wfe(tcg_env); 10359 break; 10360 case DISAS_YIELD: 10361 gen_a64_update_pc(dc, 4); 10362 gen_helper_yield(tcg_env); 10363 break; 10364 case DISAS_WFI: 10365 /* 10366 * This is a special case because we don't want to just halt 10367 * the CPU if trying to debug across a WFI. 10368 */ 10369 gen_a64_update_pc(dc, 4); 10370 gen_helper_wfi(tcg_env, tcg_constant_i32(4)); 10371 /* 10372 * The helper doesn't necessarily throw an exception, but we 10373 * must go back to the main loop to check for interrupts anyway. 10374 */ 10375 tcg_gen_exit_tb(NULL, 0); 10376 break; 10377 } 10378 } 10379 } 10380 10381 const TranslatorOps aarch64_translator_ops = { 10382 .init_disas_context = aarch64_tr_init_disas_context, 10383 .tb_start = aarch64_tr_tb_start, 10384 .insn_start = aarch64_tr_insn_start, 10385 .translate_insn = aarch64_tr_translate_insn, 10386 .tb_stop = aarch64_tr_tb_stop, 10387 }; 10388