1 /* 2 * AArch64 translation 3 * 4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de> 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 #include "qemu/osdep.h" 20 21 #include "exec/exec-all.h" 22 #include "translate.h" 23 #include "translate-a64.h" 24 #include "qemu/log.h" 25 #include "arm_ldst.h" 26 #include "semihosting/semihost.h" 27 #include "cpregs.h" 28 29 static TCGv_i64 cpu_X[32]; 30 static TCGv_i64 cpu_pc; 31 32 /* Load/store exclusive handling */ 33 static TCGv_i64 cpu_exclusive_high; 34 35 static const char *regnames[] = { 36 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 37 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 38 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 39 "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp" 40 }; 41 42 enum a64_shift_type { 43 A64_SHIFT_TYPE_LSL = 0, 44 A64_SHIFT_TYPE_LSR = 1, 45 A64_SHIFT_TYPE_ASR = 2, 46 A64_SHIFT_TYPE_ROR = 3 47 }; 48 49 /* 50 * Helpers for extracting complex instruction fields 51 */ 52 53 /* 54 * For load/store with an unsigned 12 bit immediate scaled by the element 55 * size. The input has the immediate field in bits [14:3] and the element 56 * size in [2:0]. 57 */ 58 static int uimm_scaled(DisasContext *s, int x) 59 { 60 unsigned imm = x >> 3; 61 unsigned scale = extract32(x, 0, 3); 62 return imm << scale; 63 } 64 65 /* For load/store memory tags: scale offset by LOG2_TAG_GRANULE */ 66 static int scale_by_log2_tag_granule(DisasContext *s, int x) 67 { 68 return x << LOG2_TAG_GRANULE; 69 } 70 71 /* 72 * Include the generated decoders. 73 */ 74 75 #include "decode-sme-fa64.c.inc" 76 #include "decode-a64.c.inc" 77 78 /* initialize TCG globals. */ 79 void a64_translate_init(void) 80 { 81 int i; 82 83 cpu_pc = tcg_global_mem_new_i64(tcg_env, 84 offsetof(CPUARMState, pc), 85 "pc"); 86 for (i = 0; i < 32; i++) { 87 cpu_X[i] = tcg_global_mem_new_i64(tcg_env, 88 offsetof(CPUARMState, xregs[i]), 89 regnames[i]); 90 } 91 92 cpu_exclusive_high = tcg_global_mem_new_i64(tcg_env, 93 offsetof(CPUARMState, exclusive_high), "exclusive_high"); 94 } 95 96 /* 97 * Return the core mmu_idx to use for A64 load/store insns which 98 * have a "unprivileged load/store" variant. Those insns access 99 * EL0 if executed from an EL which has control over EL0 (usually 100 * EL1) but behave like normal loads and stores if executed from 101 * elsewhere (eg EL3). 102 * 103 * @unpriv : true for the unprivileged encoding; false for the 104 * normal encoding (in which case we will return the same 105 * thing as get_mem_index(). 106 */ 107 static int get_a64_user_mem_index(DisasContext *s, bool unpriv) 108 { 109 /* 110 * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL, 111 * which is the usual mmu_idx for this cpu state. 112 */ 113 ARMMMUIdx useridx = s->mmu_idx; 114 115 if (unpriv && s->unpriv) { 116 /* 117 * We have pre-computed the condition for AccType_UNPRIV. 118 * Therefore we should never get here with a mmu_idx for 119 * which we do not know the corresponding user mmu_idx. 120 */ 121 switch (useridx) { 122 case ARMMMUIdx_E10_1: 123 case ARMMMUIdx_E10_1_PAN: 124 useridx = ARMMMUIdx_E10_0; 125 break; 126 case ARMMMUIdx_E20_2: 127 case ARMMMUIdx_E20_2_PAN: 128 useridx = ARMMMUIdx_E20_0; 129 break; 130 default: 131 g_assert_not_reached(); 132 } 133 } 134 return arm_to_core_mmu_idx(useridx); 135 } 136 137 static void set_btype_raw(int val) 138 { 139 tcg_gen_st_i32(tcg_constant_i32(val), tcg_env, 140 offsetof(CPUARMState, btype)); 141 } 142 143 static void set_btype(DisasContext *s, int val) 144 { 145 /* BTYPE is a 2-bit field, and 0 should be done with reset_btype. */ 146 tcg_debug_assert(val >= 1 && val <= 3); 147 set_btype_raw(val); 148 s->btype = -1; 149 } 150 151 static void reset_btype(DisasContext *s) 152 { 153 if (s->btype != 0) { 154 set_btype_raw(0); 155 s->btype = 0; 156 } 157 } 158 159 static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff) 160 { 161 assert(s->pc_save != -1); 162 if (tb_cflags(s->base.tb) & CF_PCREL) { 163 tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff); 164 } else { 165 tcg_gen_movi_i64(dest, s->pc_curr + diff); 166 } 167 } 168 169 void gen_a64_update_pc(DisasContext *s, target_long diff) 170 { 171 gen_pc_plus_diff(s, cpu_pc, diff); 172 s->pc_save = s->pc_curr + diff; 173 } 174 175 /* 176 * Handle Top Byte Ignore (TBI) bits. 177 * 178 * If address tagging is enabled via the TCR TBI bits: 179 * + for EL2 and EL3 there is only one TBI bit, and if it is set 180 * then the address is zero-extended, clearing bits [63:56] 181 * + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0 182 * and TBI1 controls addresses with bit 55 == 1. 183 * If the appropriate TBI bit is set for the address then 184 * the address is sign-extended from bit 55 into bits [63:56] 185 * 186 * Here We have concatenated TBI{1,0} into tbi. 187 */ 188 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst, 189 TCGv_i64 src, int tbi) 190 { 191 if (tbi == 0) { 192 /* Load unmodified address */ 193 tcg_gen_mov_i64(dst, src); 194 } else if (!regime_has_2_ranges(s->mmu_idx)) { 195 /* Force tag byte to all zero */ 196 tcg_gen_extract_i64(dst, src, 0, 56); 197 } else { 198 /* Sign-extend from bit 55. */ 199 tcg_gen_sextract_i64(dst, src, 0, 56); 200 201 switch (tbi) { 202 case 1: 203 /* tbi0 but !tbi1: only use the extension if positive */ 204 tcg_gen_and_i64(dst, dst, src); 205 break; 206 case 2: 207 /* !tbi0 but tbi1: only use the extension if negative */ 208 tcg_gen_or_i64(dst, dst, src); 209 break; 210 case 3: 211 /* tbi0 and tbi1: always use the extension */ 212 break; 213 default: 214 g_assert_not_reached(); 215 } 216 } 217 } 218 219 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src) 220 { 221 /* 222 * If address tagging is enabled for instructions via the TCR TBI bits, 223 * then loading an address into the PC will clear out any tag. 224 */ 225 gen_top_byte_ignore(s, cpu_pc, src, s->tbii); 226 s->pc_save = -1; 227 } 228 229 /* 230 * Handle MTE and/or TBI. 231 * 232 * For TBI, ideally, we would do nothing. Proper behaviour on fault is 233 * for the tag to be present in the FAR_ELx register. But for user-only 234 * mode we do not have a TLB with which to implement this, so we must 235 * remove the top byte now. 236 * 237 * Always return a fresh temporary that we can increment independently 238 * of the write-back address. 239 */ 240 241 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr) 242 { 243 TCGv_i64 clean = tcg_temp_new_i64(); 244 #ifdef CONFIG_USER_ONLY 245 gen_top_byte_ignore(s, clean, addr, s->tbid); 246 #else 247 tcg_gen_mov_i64(clean, addr); 248 #endif 249 return clean; 250 } 251 252 /* Insert a zero tag into src, with the result at dst. */ 253 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src) 254 { 255 tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4)); 256 } 257 258 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr, 259 MMUAccessType acc, int log2_size) 260 { 261 gen_helper_probe_access(tcg_env, ptr, 262 tcg_constant_i32(acc), 263 tcg_constant_i32(get_mem_index(s)), 264 tcg_constant_i32(1 << log2_size)); 265 } 266 267 /* 268 * For MTE, check a single logical or atomic access. This probes a single 269 * address, the exact one specified. The size and alignment of the access 270 * is not relevant to MTE, per se, but watchpoints do require the size, 271 * and we want to recognize those before making any other changes to state. 272 */ 273 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr, 274 bool is_write, bool tag_checked, 275 MemOp memop, bool is_unpriv, 276 int core_idx) 277 { 278 if (tag_checked && s->mte_active[is_unpriv]) { 279 TCGv_i64 ret; 280 int desc = 0; 281 282 desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx); 283 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 284 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 285 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 286 desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(memop)); 287 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, memop_size(memop) - 1); 288 289 ret = tcg_temp_new_i64(); 290 gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr); 291 292 return ret; 293 } 294 return clean_data_tbi(s, addr); 295 } 296 297 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, 298 bool tag_checked, MemOp memop) 299 { 300 return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, memop, 301 false, get_mem_index(s)); 302 } 303 304 /* 305 * For MTE, check multiple logical sequential accesses. 306 */ 307 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, 308 bool tag_checked, int total_size, MemOp single_mop) 309 { 310 if (tag_checked && s->mte_active[0]) { 311 TCGv_i64 ret; 312 int desc = 0; 313 314 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 315 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 316 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 317 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 318 desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(single_mop)); 319 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, total_size - 1); 320 321 ret = tcg_temp_new_i64(); 322 gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr); 323 324 return ret; 325 } 326 return clean_data_tbi(s, addr); 327 } 328 329 /* 330 * Generate the special alignment check that applies to AccType_ATOMIC 331 * and AccType_ORDERED insns under FEAT_LSE2: the access need not be 332 * naturally aligned, but it must not cross a 16-byte boundary. 333 * See AArch64.CheckAlignment(). 334 */ 335 static void check_lse2_align(DisasContext *s, int rn, int imm, 336 bool is_write, MemOp mop) 337 { 338 TCGv_i32 tmp; 339 TCGv_i64 addr; 340 TCGLabel *over_label; 341 MMUAccessType type; 342 int mmu_idx; 343 344 tmp = tcg_temp_new_i32(); 345 tcg_gen_extrl_i64_i32(tmp, cpu_reg_sp(s, rn)); 346 tcg_gen_addi_i32(tmp, tmp, imm & 15); 347 tcg_gen_andi_i32(tmp, tmp, 15); 348 tcg_gen_addi_i32(tmp, tmp, memop_size(mop)); 349 350 over_label = gen_new_label(); 351 tcg_gen_brcondi_i32(TCG_COND_LEU, tmp, 16, over_label); 352 353 addr = tcg_temp_new_i64(); 354 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm); 355 356 type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD, 357 mmu_idx = get_mem_index(s); 358 gen_helper_unaligned_access(tcg_env, addr, tcg_constant_i32(type), 359 tcg_constant_i32(mmu_idx)); 360 361 gen_set_label(over_label); 362 363 } 364 365 /* Handle the alignment check for AccType_ATOMIC instructions. */ 366 static MemOp check_atomic_align(DisasContext *s, int rn, MemOp mop) 367 { 368 MemOp size = mop & MO_SIZE; 369 370 if (size == MO_8) { 371 return mop; 372 } 373 374 /* 375 * If size == MO_128, this is a LDXP, and the operation is single-copy 376 * atomic for each doubleword, not the entire quadword; it still must 377 * be quadword aligned. 378 */ 379 if (size == MO_128) { 380 return finalize_memop_atom(s, MO_128 | MO_ALIGN, 381 MO_ATOM_IFALIGN_PAIR); 382 } 383 if (dc_isar_feature(aa64_lse2, s)) { 384 check_lse2_align(s, rn, 0, true, mop); 385 } else { 386 mop |= MO_ALIGN; 387 } 388 return finalize_memop(s, mop); 389 } 390 391 /* Handle the alignment check for AccType_ORDERED instructions. */ 392 static MemOp check_ordered_align(DisasContext *s, int rn, int imm, 393 bool is_write, MemOp mop) 394 { 395 MemOp size = mop & MO_SIZE; 396 397 if (size == MO_8) { 398 return mop; 399 } 400 if (size == MO_128) { 401 return finalize_memop_atom(s, MO_128 | MO_ALIGN, 402 MO_ATOM_IFALIGN_PAIR); 403 } 404 if (!dc_isar_feature(aa64_lse2, s)) { 405 mop |= MO_ALIGN; 406 } else if (!s->naa) { 407 check_lse2_align(s, rn, imm, is_write, mop); 408 } 409 return finalize_memop(s, mop); 410 } 411 412 typedef struct DisasCompare64 { 413 TCGCond cond; 414 TCGv_i64 value; 415 } DisasCompare64; 416 417 static void a64_test_cc(DisasCompare64 *c64, int cc) 418 { 419 DisasCompare c32; 420 421 arm_test_cc(&c32, cc); 422 423 /* 424 * Sign-extend the 32-bit value so that the GE/LT comparisons work 425 * properly. The NE/EQ comparisons are also fine with this choice. 426 */ 427 c64->cond = c32.cond; 428 c64->value = tcg_temp_new_i64(); 429 tcg_gen_ext_i32_i64(c64->value, c32.value); 430 } 431 432 static void gen_rebuild_hflags(DisasContext *s) 433 { 434 gen_helper_rebuild_hflags_a64(tcg_env, tcg_constant_i32(s->current_el)); 435 } 436 437 static void gen_exception_internal(int excp) 438 { 439 assert(excp_is_internal(excp)); 440 gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp)); 441 } 442 443 static void gen_exception_internal_insn(DisasContext *s, int excp) 444 { 445 gen_a64_update_pc(s, 0); 446 gen_exception_internal(excp); 447 s->base.is_jmp = DISAS_NORETURN; 448 } 449 450 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome) 451 { 452 gen_a64_update_pc(s, 0); 453 gen_helper_exception_bkpt_insn(tcg_env, tcg_constant_i32(syndrome)); 454 s->base.is_jmp = DISAS_NORETURN; 455 } 456 457 static void gen_step_complete_exception(DisasContext *s) 458 { 459 /* We just completed step of an insn. Move from Active-not-pending 460 * to Active-pending, and then also take the swstep exception. 461 * This corresponds to making the (IMPDEF) choice to prioritize 462 * swstep exceptions over asynchronous exceptions taken to an exception 463 * level where debug is disabled. This choice has the advantage that 464 * we do not need to maintain internal state corresponding to the 465 * ISV/EX syndrome bits between completion of the step and generation 466 * of the exception, and our syndrome information is always correct. 467 */ 468 gen_ss_advance(s); 469 gen_swstep_exception(s, 1, s->is_ldex); 470 s->base.is_jmp = DISAS_NORETURN; 471 } 472 473 static inline bool use_goto_tb(DisasContext *s, uint64_t dest) 474 { 475 if (s->ss_active) { 476 return false; 477 } 478 return translator_use_goto_tb(&s->base, dest); 479 } 480 481 static void gen_goto_tb(DisasContext *s, int n, int64_t diff) 482 { 483 if (use_goto_tb(s, s->pc_curr + diff)) { 484 /* 485 * For pcrel, the pc must always be up-to-date on entry to 486 * the linked TB, so that it can use simple additions for all 487 * further adjustments. For !pcrel, the linked TB is compiled 488 * to know its full virtual address, so we can delay the 489 * update to pc to the unlinked path. A long chain of links 490 * can thus avoid many updates to the PC. 491 */ 492 if (tb_cflags(s->base.tb) & CF_PCREL) { 493 gen_a64_update_pc(s, diff); 494 tcg_gen_goto_tb(n); 495 } else { 496 tcg_gen_goto_tb(n); 497 gen_a64_update_pc(s, diff); 498 } 499 tcg_gen_exit_tb(s->base.tb, n); 500 s->base.is_jmp = DISAS_NORETURN; 501 } else { 502 gen_a64_update_pc(s, diff); 503 if (s->ss_active) { 504 gen_step_complete_exception(s); 505 } else { 506 tcg_gen_lookup_and_goto_ptr(); 507 s->base.is_jmp = DISAS_NORETURN; 508 } 509 } 510 } 511 512 /* 513 * Register access functions 514 * 515 * These functions are used for directly accessing a register in where 516 * changes to the final register value are likely to be made. If you 517 * need to use a register for temporary calculation (e.g. index type 518 * operations) use the read_* form. 519 * 520 * B1.2.1 Register mappings 521 * 522 * In instruction register encoding 31 can refer to ZR (zero register) or 523 * the SP (stack pointer) depending on context. In QEMU's case we map SP 524 * to cpu_X[31] and ZR accesses to a temporary which can be discarded. 525 * This is the point of the _sp forms. 526 */ 527 TCGv_i64 cpu_reg(DisasContext *s, int reg) 528 { 529 if (reg == 31) { 530 TCGv_i64 t = tcg_temp_new_i64(); 531 tcg_gen_movi_i64(t, 0); 532 return t; 533 } else { 534 return cpu_X[reg]; 535 } 536 } 537 538 /* register access for when 31 == SP */ 539 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg) 540 { 541 return cpu_X[reg]; 542 } 543 544 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64 545 * representing the register contents. This TCGv is an auto-freed 546 * temporary so it need not be explicitly freed, and may be modified. 547 */ 548 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf) 549 { 550 TCGv_i64 v = tcg_temp_new_i64(); 551 if (reg != 31) { 552 if (sf) { 553 tcg_gen_mov_i64(v, cpu_X[reg]); 554 } else { 555 tcg_gen_ext32u_i64(v, cpu_X[reg]); 556 } 557 } else { 558 tcg_gen_movi_i64(v, 0); 559 } 560 return v; 561 } 562 563 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf) 564 { 565 TCGv_i64 v = tcg_temp_new_i64(); 566 if (sf) { 567 tcg_gen_mov_i64(v, cpu_X[reg]); 568 } else { 569 tcg_gen_ext32u_i64(v, cpu_X[reg]); 570 } 571 return v; 572 } 573 574 /* Return the offset into CPUARMState of a slice (from 575 * the least significant end) of FP register Qn (ie 576 * Dn, Sn, Hn or Bn). 577 * (Note that this is not the same mapping as for A32; see cpu.h) 578 */ 579 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size) 580 { 581 return vec_reg_offset(s, regno, 0, size); 582 } 583 584 /* Offset of the high half of the 128 bit vector Qn */ 585 static inline int fp_reg_hi_offset(DisasContext *s, int regno) 586 { 587 return vec_reg_offset(s, regno, 1, MO_64); 588 } 589 590 /* Convenience accessors for reading and writing single and double 591 * FP registers. Writing clears the upper parts of the associated 592 * 128 bit vector register, as required by the architecture. 593 * Note that unlike the GP register accessors, the values returned 594 * by the read functions must be manually freed. 595 */ 596 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg) 597 { 598 TCGv_i64 v = tcg_temp_new_i64(); 599 600 tcg_gen_ld_i64(v, tcg_env, fp_reg_offset(s, reg, MO_64)); 601 return v; 602 } 603 604 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg) 605 { 606 TCGv_i32 v = tcg_temp_new_i32(); 607 608 tcg_gen_ld_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32)); 609 return v; 610 } 611 612 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg) 613 { 614 TCGv_i32 v = tcg_temp_new_i32(); 615 616 tcg_gen_ld16u_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16)); 617 return v; 618 } 619 620 static void clear_vec(DisasContext *s, int rd) 621 { 622 unsigned ofs = fp_reg_offset(s, rd, MO_64); 623 unsigned vsz = vec_full_reg_size(s); 624 625 tcg_gen_gvec_dup_imm(MO_64, ofs, vsz, vsz, 0); 626 } 627 628 /* 629 * Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64). 630 * If SVE is not enabled, then there are only 128 bits in the vector. 631 */ 632 static void clear_vec_high(DisasContext *s, bool is_q, int rd) 633 { 634 unsigned ofs = fp_reg_offset(s, rd, MO_64); 635 unsigned vsz = vec_full_reg_size(s); 636 637 /* Nop move, with side effect of clearing the tail. */ 638 tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz); 639 } 640 641 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v) 642 { 643 unsigned ofs = fp_reg_offset(s, reg, MO_64); 644 645 tcg_gen_st_i64(v, tcg_env, ofs); 646 clear_vec_high(s, false, reg); 647 } 648 649 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v) 650 { 651 TCGv_i64 tmp = tcg_temp_new_i64(); 652 653 tcg_gen_extu_i32_i64(tmp, v); 654 write_fp_dreg(s, reg, tmp); 655 } 656 657 /* 658 * Write a double result to 128 bit vector register reg, honouring FPCR.NEP: 659 * - if FPCR.NEP == 0, clear the high elements of reg 660 * - if FPCR.NEP == 1, set the high elements of reg from mergereg 661 * (i.e. merge the result with those high elements) 662 * In either case, SVE register bits above 128 are zeroed (per R_WKYLB). 663 */ 664 static void write_fp_dreg_merging(DisasContext *s, int reg, int mergereg, 665 TCGv_i64 v) 666 { 667 if (!s->fpcr_nep) { 668 write_fp_dreg(s, reg, v); 669 return; 670 } 671 672 /* 673 * Move from mergereg to reg; this sets the high elements and 674 * clears the bits above 128 as a side effect. 675 */ 676 tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg), 677 vec_full_reg_offset(s, mergereg), 678 16, vec_full_reg_size(s)); 679 tcg_gen_st_i64(v, tcg_env, vec_full_reg_offset(s, reg)); 680 } 681 682 /* 683 * Write a single-prec result, but only clear the higher elements 684 * of the destination register if FPCR.NEP is 0; otherwise preserve them. 685 */ 686 static void write_fp_sreg_merging(DisasContext *s, int reg, int mergereg, 687 TCGv_i32 v) 688 { 689 if (!s->fpcr_nep) { 690 write_fp_sreg(s, reg, v); 691 return; 692 } 693 694 tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg), 695 vec_full_reg_offset(s, mergereg), 696 16, vec_full_reg_size(s)); 697 tcg_gen_st_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32)); 698 } 699 700 /* 701 * Write a half-prec result, but only clear the higher elements 702 * of the destination register if FPCR.NEP is 0; otherwise preserve them. 703 * The caller must ensure that the top 16 bits of v are zero. 704 */ 705 static void write_fp_hreg_merging(DisasContext *s, int reg, int mergereg, 706 TCGv_i32 v) 707 { 708 if (!s->fpcr_nep) { 709 write_fp_sreg(s, reg, v); 710 return; 711 } 712 713 tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg), 714 vec_full_reg_offset(s, mergereg), 715 16, vec_full_reg_size(s)); 716 tcg_gen_st16_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16)); 717 } 718 719 /* Expand a 2-operand AdvSIMD vector operation using an expander function. */ 720 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn, 721 GVecGen2Fn *gvec_fn, int vece) 722 { 723 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 724 is_q ? 16 : 8, vec_full_reg_size(s)); 725 } 726 727 /* Expand a 2-operand + immediate AdvSIMD vector operation using 728 * an expander function. 729 */ 730 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn, 731 int64_t imm, GVecGen2iFn *gvec_fn, int vece) 732 { 733 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 734 imm, is_q ? 16 : 8, vec_full_reg_size(s)); 735 } 736 737 /* Expand a 3-operand AdvSIMD vector operation using an expander function. */ 738 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm, 739 GVecGen3Fn *gvec_fn, int vece) 740 { 741 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 742 vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s)); 743 } 744 745 /* Expand a 4-operand AdvSIMD vector operation using an expander function. */ 746 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm, 747 int rx, GVecGen4Fn *gvec_fn, int vece) 748 { 749 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 750 vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx), 751 is_q ? 16 : 8, vec_full_reg_size(s)); 752 } 753 754 /* Expand a 2-operand operation using an out-of-line helper. */ 755 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd, 756 int rn, int data, gen_helper_gvec_2 *fn) 757 { 758 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd), 759 vec_full_reg_offset(s, rn), 760 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 761 } 762 763 /* Expand a 3-operand operation using an out-of-line helper. */ 764 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd, 765 int rn, int rm, int data, gen_helper_gvec_3 *fn) 766 { 767 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 768 vec_full_reg_offset(s, rn), 769 vec_full_reg_offset(s, rm), 770 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 771 } 772 773 /* Expand a 3-operand + fpstatus pointer + simd data value operation using 774 * an out-of-line helper. 775 */ 776 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn, 777 int rm, ARMFPStatusFlavour fpsttype, int data, 778 gen_helper_gvec_3_ptr *fn) 779 { 780 TCGv_ptr fpst = fpstatus_ptr(fpsttype); 781 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 782 vec_full_reg_offset(s, rn), 783 vec_full_reg_offset(s, rm), fpst, 784 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 785 } 786 787 /* Expand a 4-operand operation using an out-of-line helper. */ 788 static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn, 789 int rm, int ra, int data, gen_helper_gvec_4 *fn) 790 { 791 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 792 vec_full_reg_offset(s, rn), 793 vec_full_reg_offset(s, rm), 794 vec_full_reg_offset(s, ra), 795 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 796 } 797 798 /* 799 * Expand a 4-operand operation using an out-of-line helper that takes 800 * a pointer to the CPU env. 801 */ 802 static void gen_gvec_op4_env(DisasContext *s, bool is_q, int rd, int rn, 803 int rm, int ra, int data, 804 gen_helper_gvec_4_ptr *fn) 805 { 806 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 807 vec_full_reg_offset(s, rn), 808 vec_full_reg_offset(s, rm), 809 vec_full_reg_offset(s, ra), 810 tcg_env, 811 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 812 } 813 814 /* 815 * Expand a 4-operand + fpstatus pointer + simd data value operation using 816 * an out-of-line helper. 817 */ 818 static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn, 819 int rm, int ra, ARMFPStatusFlavour fpsttype, 820 int data, 821 gen_helper_gvec_4_ptr *fn) 822 { 823 TCGv_ptr fpst = fpstatus_ptr(fpsttype); 824 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 825 vec_full_reg_offset(s, rn), 826 vec_full_reg_offset(s, rm), 827 vec_full_reg_offset(s, ra), fpst, 828 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 829 } 830 831 /* 832 * When FPCR.AH == 1, NEG and ABS do not flip the sign bit of a NaN. 833 * These functions implement 834 * d = floatN_is_any_nan(s) ? s : floatN_chs(s) 835 * which for float32 is 836 * d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s ^ (1 << 31)) 837 * and similarly for the other float sizes. 838 */ 839 static void gen_vfp_ah_negh(TCGv_i32 d, TCGv_i32 s) 840 { 841 TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32(); 842 843 gen_vfp_negh(chs_s, s); 844 gen_vfp_absh(abs_s, s); 845 tcg_gen_movcond_i32(TCG_COND_GTU, d, 846 abs_s, tcg_constant_i32(0x7c00), 847 s, chs_s); 848 } 849 850 static void gen_vfp_ah_negs(TCGv_i32 d, TCGv_i32 s) 851 { 852 TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32(); 853 854 gen_vfp_negs(chs_s, s); 855 gen_vfp_abss(abs_s, s); 856 tcg_gen_movcond_i32(TCG_COND_GTU, d, 857 abs_s, tcg_constant_i32(0x7f800000UL), 858 s, chs_s); 859 } 860 861 static void gen_vfp_ah_negd(TCGv_i64 d, TCGv_i64 s) 862 { 863 TCGv_i64 abs_s = tcg_temp_new_i64(), chs_s = tcg_temp_new_i64(); 864 865 gen_vfp_negd(chs_s, s); 866 gen_vfp_absd(abs_s, s); 867 tcg_gen_movcond_i64(TCG_COND_GTU, d, 868 abs_s, tcg_constant_i64(0x7ff0000000000000ULL), 869 s, chs_s); 870 } 871 872 /* 873 * These functions implement 874 * d = floatN_is_any_nan(s) ? s : floatN_abs(s) 875 * which for float32 is 876 * d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s & ~(1 << 31)) 877 * and similarly for the other float sizes. 878 */ 879 static void gen_vfp_ah_absh(TCGv_i32 d, TCGv_i32 s) 880 { 881 TCGv_i32 abs_s = tcg_temp_new_i32(); 882 883 gen_vfp_absh(abs_s, s); 884 tcg_gen_movcond_i32(TCG_COND_GTU, d, 885 abs_s, tcg_constant_i32(0x7c00), 886 s, abs_s); 887 } 888 889 static void gen_vfp_ah_abss(TCGv_i32 d, TCGv_i32 s) 890 { 891 TCGv_i32 abs_s = tcg_temp_new_i32(); 892 893 gen_vfp_abss(abs_s, s); 894 tcg_gen_movcond_i32(TCG_COND_GTU, d, 895 abs_s, tcg_constant_i32(0x7f800000UL), 896 s, abs_s); 897 } 898 899 static void gen_vfp_ah_absd(TCGv_i64 d, TCGv_i64 s) 900 { 901 TCGv_i64 abs_s = tcg_temp_new_i64(); 902 903 gen_vfp_absd(abs_s, s); 904 tcg_gen_movcond_i64(TCG_COND_GTU, d, 905 abs_s, tcg_constant_i64(0x7ff0000000000000ULL), 906 s, abs_s); 907 } 908 909 static void gen_vfp_maybe_ah_negh(DisasContext *dc, TCGv_i32 d, TCGv_i32 s) 910 { 911 if (dc->fpcr_ah) { 912 gen_vfp_ah_negh(d, s); 913 } else { 914 gen_vfp_negh(d, s); 915 } 916 } 917 918 static void gen_vfp_maybe_ah_negs(DisasContext *dc, TCGv_i32 d, TCGv_i32 s) 919 { 920 if (dc->fpcr_ah) { 921 gen_vfp_ah_negs(d, s); 922 } else { 923 gen_vfp_negs(d, s); 924 } 925 } 926 927 static void gen_vfp_maybe_ah_negd(DisasContext *dc, TCGv_i64 d, TCGv_i64 s) 928 { 929 if (dc->fpcr_ah) { 930 gen_vfp_ah_negd(d, s); 931 } else { 932 gen_vfp_negd(d, s); 933 } 934 } 935 936 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier 937 * than the 32 bit equivalent. 938 */ 939 static inline void gen_set_NZ64(TCGv_i64 result) 940 { 941 tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result); 942 tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF); 943 } 944 945 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */ 946 static inline void gen_logic_CC(int sf, TCGv_i64 result) 947 { 948 if (sf) { 949 gen_set_NZ64(result); 950 } else { 951 tcg_gen_extrl_i64_i32(cpu_ZF, result); 952 tcg_gen_mov_i32(cpu_NF, cpu_ZF); 953 } 954 tcg_gen_movi_i32(cpu_CF, 0); 955 tcg_gen_movi_i32(cpu_VF, 0); 956 } 957 958 /* dest = T0 + T1; compute C, N, V and Z flags */ 959 static void gen_add64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 960 { 961 TCGv_i64 result, flag, tmp; 962 result = tcg_temp_new_i64(); 963 flag = tcg_temp_new_i64(); 964 tmp = tcg_temp_new_i64(); 965 966 tcg_gen_movi_i64(tmp, 0); 967 tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp); 968 969 tcg_gen_extrl_i64_i32(cpu_CF, flag); 970 971 gen_set_NZ64(result); 972 973 tcg_gen_xor_i64(flag, result, t0); 974 tcg_gen_xor_i64(tmp, t0, t1); 975 tcg_gen_andc_i64(flag, flag, tmp); 976 tcg_gen_extrh_i64_i32(cpu_VF, flag); 977 978 tcg_gen_mov_i64(dest, result); 979 } 980 981 static void gen_add32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 982 { 983 TCGv_i32 t0_32 = tcg_temp_new_i32(); 984 TCGv_i32 t1_32 = tcg_temp_new_i32(); 985 TCGv_i32 tmp = tcg_temp_new_i32(); 986 987 tcg_gen_movi_i32(tmp, 0); 988 tcg_gen_extrl_i64_i32(t0_32, t0); 989 tcg_gen_extrl_i64_i32(t1_32, t1); 990 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp); 991 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 992 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 993 tcg_gen_xor_i32(tmp, t0_32, t1_32); 994 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); 995 tcg_gen_extu_i32_i64(dest, cpu_NF); 996 } 997 998 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 999 { 1000 if (sf) { 1001 gen_add64_CC(dest, t0, t1); 1002 } else { 1003 gen_add32_CC(dest, t0, t1); 1004 } 1005 } 1006 1007 /* dest = T0 - T1; compute C, N, V and Z flags */ 1008 static void gen_sub64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 1009 { 1010 /* 64 bit arithmetic */ 1011 TCGv_i64 result, flag, tmp; 1012 1013 result = tcg_temp_new_i64(); 1014 flag = tcg_temp_new_i64(); 1015 tcg_gen_sub_i64(result, t0, t1); 1016 1017 gen_set_NZ64(result); 1018 1019 tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1); 1020 tcg_gen_extrl_i64_i32(cpu_CF, flag); 1021 1022 tcg_gen_xor_i64(flag, result, t0); 1023 tmp = tcg_temp_new_i64(); 1024 tcg_gen_xor_i64(tmp, t0, t1); 1025 tcg_gen_and_i64(flag, flag, tmp); 1026 tcg_gen_extrh_i64_i32(cpu_VF, flag); 1027 tcg_gen_mov_i64(dest, result); 1028 } 1029 1030 static void gen_sub32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 1031 { 1032 /* 32 bit arithmetic */ 1033 TCGv_i32 t0_32 = tcg_temp_new_i32(); 1034 TCGv_i32 t1_32 = tcg_temp_new_i32(); 1035 TCGv_i32 tmp; 1036 1037 tcg_gen_extrl_i64_i32(t0_32, t0); 1038 tcg_gen_extrl_i64_i32(t1_32, t1); 1039 tcg_gen_sub_i32(cpu_NF, t0_32, t1_32); 1040 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 1041 tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32); 1042 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 1043 tmp = tcg_temp_new_i32(); 1044 tcg_gen_xor_i32(tmp, t0_32, t1_32); 1045 tcg_gen_and_i32(cpu_VF, cpu_VF, tmp); 1046 tcg_gen_extu_i32_i64(dest, cpu_NF); 1047 } 1048 1049 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 1050 { 1051 if (sf) { 1052 gen_sub64_CC(dest, t0, t1); 1053 } else { 1054 gen_sub32_CC(dest, t0, t1); 1055 } 1056 } 1057 1058 /* dest = T0 + T1 + CF; do not compute flags. */ 1059 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 1060 { 1061 TCGv_i64 flag = tcg_temp_new_i64(); 1062 tcg_gen_extu_i32_i64(flag, cpu_CF); 1063 tcg_gen_add_i64(dest, t0, t1); 1064 tcg_gen_add_i64(dest, dest, flag); 1065 1066 if (!sf) { 1067 tcg_gen_ext32u_i64(dest, dest); 1068 } 1069 } 1070 1071 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */ 1072 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 1073 { 1074 if (sf) { 1075 TCGv_i64 result = tcg_temp_new_i64(); 1076 TCGv_i64 cf_64 = tcg_temp_new_i64(); 1077 TCGv_i64 vf_64 = tcg_temp_new_i64(); 1078 TCGv_i64 tmp = tcg_temp_new_i64(); 1079 TCGv_i64 zero = tcg_constant_i64(0); 1080 1081 tcg_gen_extu_i32_i64(cf_64, cpu_CF); 1082 tcg_gen_add2_i64(result, cf_64, t0, zero, cf_64, zero); 1083 tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, zero); 1084 tcg_gen_extrl_i64_i32(cpu_CF, cf_64); 1085 gen_set_NZ64(result); 1086 1087 tcg_gen_xor_i64(vf_64, result, t0); 1088 tcg_gen_xor_i64(tmp, t0, t1); 1089 tcg_gen_andc_i64(vf_64, vf_64, tmp); 1090 tcg_gen_extrh_i64_i32(cpu_VF, vf_64); 1091 1092 tcg_gen_mov_i64(dest, result); 1093 } else { 1094 TCGv_i32 t0_32 = tcg_temp_new_i32(); 1095 TCGv_i32 t1_32 = tcg_temp_new_i32(); 1096 TCGv_i32 tmp = tcg_temp_new_i32(); 1097 TCGv_i32 zero = tcg_constant_i32(0); 1098 1099 tcg_gen_extrl_i64_i32(t0_32, t0); 1100 tcg_gen_extrl_i64_i32(t1_32, t1); 1101 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, zero, cpu_CF, zero); 1102 tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, zero); 1103 1104 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 1105 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 1106 tcg_gen_xor_i32(tmp, t0_32, t1_32); 1107 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); 1108 tcg_gen_extu_i32_i64(dest, cpu_NF); 1109 } 1110 } 1111 1112 /* 1113 * Load/Store generators 1114 */ 1115 1116 /* 1117 * Store from GPR register to memory. 1118 */ 1119 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source, 1120 TCGv_i64 tcg_addr, MemOp memop, int memidx, 1121 bool iss_valid, 1122 unsigned int iss_srt, 1123 bool iss_sf, bool iss_ar) 1124 { 1125 tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop); 1126 1127 if (iss_valid) { 1128 uint32_t syn; 1129 1130 syn = syn_data_abort_with_iss(0, 1131 (memop & MO_SIZE), 1132 false, 1133 iss_srt, 1134 iss_sf, 1135 iss_ar, 1136 0, 0, 0, 0, 0, false); 1137 disas_set_insn_syndrome(s, syn); 1138 } 1139 } 1140 1141 static void do_gpr_st(DisasContext *s, TCGv_i64 source, 1142 TCGv_i64 tcg_addr, MemOp memop, 1143 bool iss_valid, 1144 unsigned int iss_srt, 1145 bool iss_sf, bool iss_ar) 1146 { 1147 do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s), 1148 iss_valid, iss_srt, iss_sf, iss_ar); 1149 } 1150 1151 /* 1152 * Load from memory to GPR register 1153 */ 1154 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, 1155 MemOp memop, bool extend, int memidx, 1156 bool iss_valid, unsigned int iss_srt, 1157 bool iss_sf, bool iss_ar) 1158 { 1159 tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop); 1160 1161 if (extend && (memop & MO_SIGN)) { 1162 g_assert((memop & MO_SIZE) <= MO_32); 1163 tcg_gen_ext32u_i64(dest, dest); 1164 } 1165 1166 if (iss_valid) { 1167 uint32_t syn; 1168 1169 syn = syn_data_abort_with_iss(0, 1170 (memop & MO_SIZE), 1171 (memop & MO_SIGN) != 0, 1172 iss_srt, 1173 iss_sf, 1174 iss_ar, 1175 0, 0, 0, 0, 0, false); 1176 disas_set_insn_syndrome(s, syn); 1177 } 1178 } 1179 1180 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, 1181 MemOp memop, bool extend, 1182 bool iss_valid, unsigned int iss_srt, 1183 bool iss_sf, bool iss_ar) 1184 { 1185 do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s), 1186 iss_valid, iss_srt, iss_sf, iss_ar); 1187 } 1188 1189 /* 1190 * Store from FP register to memory 1191 */ 1192 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, MemOp mop) 1193 { 1194 /* This writes the bottom N bits of a 128 bit wide vector to memory */ 1195 TCGv_i64 tmplo = tcg_temp_new_i64(); 1196 1197 tcg_gen_ld_i64(tmplo, tcg_env, fp_reg_offset(s, srcidx, MO_64)); 1198 1199 if ((mop & MO_SIZE) < MO_128) { 1200 tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop); 1201 } else { 1202 TCGv_i64 tmphi = tcg_temp_new_i64(); 1203 TCGv_i128 t16 = tcg_temp_new_i128(); 1204 1205 tcg_gen_ld_i64(tmphi, tcg_env, fp_reg_hi_offset(s, srcidx)); 1206 tcg_gen_concat_i64_i128(t16, tmplo, tmphi); 1207 1208 tcg_gen_qemu_st_i128(t16, tcg_addr, get_mem_index(s), mop); 1209 } 1210 } 1211 1212 /* 1213 * Load from memory to FP register 1214 */ 1215 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, MemOp mop) 1216 { 1217 /* This always zero-extends and writes to a full 128 bit wide vector */ 1218 TCGv_i64 tmplo = tcg_temp_new_i64(); 1219 TCGv_i64 tmphi = NULL; 1220 1221 if ((mop & MO_SIZE) < MO_128) { 1222 tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop); 1223 } else { 1224 TCGv_i128 t16 = tcg_temp_new_i128(); 1225 1226 tcg_gen_qemu_ld_i128(t16, tcg_addr, get_mem_index(s), mop); 1227 1228 tmphi = tcg_temp_new_i64(); 1229 tcg_gen_extr_i128_i64(tmplo, tmphi, t16); 1230 } 1231 1232 tcg_gen_st_i64(tmplo, tcg_env, fp_reg_offset(s, destidx, MO_64)); 1233 1234 if (tmphi) { 1235 tcg_gen_st_i64(tmphi, tcg_env, fp_reg_hi_offset(s, destidx)); 1236 } 1237 clear_vec_high(s, tmphi != NULL, destidx); 1238 } 1239 1240 /* 1241 * Vector load/store helpers. 1242 * 1243 * The principal difference between this and a FP load is that we don't 1244 * zero extend as we are filling a partial chunk of the vector register. 1245 * These functions don't support 128 bit loads/stores, which would be 1246 * normal load/store operations. 1247 * 1248 * The _i32 versions are useful when operating on 32 bit quantities 1249 * (eg for floating point single or using Neon helper functions). 1250 */ 1251 1252 /* Get value of an element within a vector register */ 1253 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx, 1254 int element, MemOp memop) 1255 { 1256 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE); 1257 switch ((unsigned)memop) { 1258 case MO_8: 1259 tcg_gen_ld8u_i64(tcg_dest, tcg_env, vect_off); 1260 break; 1261 case MO_16: 1262 tcg_gen_ld16u_i64(tcg_dest, tcg_env, vect_off); 1263 break; 1264 case MO_32: 1265 tcg_gen_ld32u_i64(tcg_dest, tcg_env, vect_off); 1266 break; 1267 case MO_8|MO_SIGN: 1268 tcg_gen_ld8s_i64(tcg_dest, tcg_env, vect_off); 1269 break; 1270 case MO_16|MO_SIGN: 1271 tcg_gen_ld16s_i64(tcg_dest, tcg_env, vect_off); 1272 break; 1273 case MO_32|MO_SIGN: 1274 tcg_gen_ld32s_i64(tcg_dest, tcg_env, vect_off); 1275 break; 1276 case MO_64: 1277 case MO_64|MO_SIGN: 1278 tcg_gen_ld_i64(tcg_dest, tcg_env, vect_off); 1279 break; 1280 default: 1281 g_assert_not_reached(); 1282 } 1283 } 1284 1285 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx, 1286 int element, MemOp memop) 1287 { 1288 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE); 1289 switch (memop) { 1290 case MO_8: 1291 tcg_gen_ld8u_i32(tcg_dest, tcg_env, vect_off); 1292 break; 1293 case MO_16: 1294 tcg_gen_ld16u_i32(tcg_dest, tcg_env, vect_off); 1295 break; 1296 case MO_8|MO_SIGN: 1297 tcg_gen_ld8s_i32(tcg_dest, tcg_env, vect_off); 1298 break; 1299 case MO_16|MO_SIGN: 1300 tcg_gen_ld16s_i32(tcg_dest, tcg_env, vect_off); 1301 break; 1302 case MO_32: 1303 case MO_32|MO_SIGN: 1304 tcg_gen_ld_i32(tcg_dest, tcg_env, vect_off); 1305 break; 1306 default: 1307 g_assert_not_reached(); 1308 } 1309 } 1310 1311 /* Set value of an element within a vector register */ 1312 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx, 1313 int element, MemOp memop) 1314 { 1315 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE); 1316 switch (memop) { 1317 case MO_8: 1318 tcg_gen_st8_i64(tcg_src, tcg_env, vect_off); 1319 break; 1320 case MO_16: 1321 tcg_gen_st16_i64(tcg_src, tcg_env, vect_off); 1322 break; 1323 case MO_32: 1324 tcg_gen_st32_i64(tcg_src, tcg_env, vect_off); 1325 break; 1326 case MO_64: 1327 tcg_gen_st_i64(tcg_src, tcg_env, vect_off); 1328 break; 1329 default: 1330 g_assert_not_reached(); 1331 } 1332 } 1333 1334 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src, 1335 int destidx, int element, MemOp memop) 1336 { 1337 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE); 1338 switch (memop) { 1339 case MO_8: 1340 tcg_gen_st8_i32(tcg_src, tcg_env, vect_off); 1341 break; 1342 case MO_16: 1343 tcg_gen_st16_i32(tcg_src, tcg_env, vect_off); 1344 break; 1345 case MO_32: 1346 tcg_gen_st_i32(tcg_src, tcg_env, vect_off); 1347 break; 1348 default: 1349 g_assert_not_reached(); 1350 } 1351 } 1352 1353 /* Store from vector register to memory */ 1354 static void do_vec_st(DisasContext *s, int srcidx, int element, 1355 TCGv_i64 tcg_addr, MemOp mop) 1356 { 1357 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 1358 1359 read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE); 1360 tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop); 1361 } 1362 1363 /* Load from memory to vector register */ 1364 static void do_vec_ld(DisasContext *s, int destidx, int element, 1365 TCGv_i64 tcg_addr, MemOp mop) 1366 { 1367 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 1368 1369 tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop); 1370 write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE); 1371 } 1372 1373 /* Check that FP/Neon access is enabled. If it is, return 1374 * true. If not, emit code to generate an appropriate exception, 1375 * and return false; the caller should not emit any code for 1376 * the instruction. Note that this check must happen after all 1377 * unallocated-encoding checks (otherwise the syndrome information 1378 * for the resulting exception will be incorrect). 1379 */ 1380 static bool fp_access_check_only(DisasContext *s) 1381 { 1382 if (s->fp_excp_el) { 1383 assert(!s->fp_access_checked); 1384 s->fp_access_checked = true; 1385 1386 gen_exception_insn_el(s, 0, EXCP_UDEF, 1387 syn_fp_access_trap(1, 0xe, false, 0), 1388 s->fp_excp_el); 1389 return false; 1390 } 1391 s->fp_access_checked = true; 1392 return true; 1393 } 1394 1395 static bool fp_access_check(DisasContext *s) 1396 { 1397 if (!fp_access_check_only(s)) { 1398 return false; 1399 } 1400 if (s->sme_trap_nonstreaming && s->is_nonstreaming) { 1401 gen_exception_insn(s, 0, EXCP_UDEF, 1402 syn_smetrap(SME_ET_Streaming, false)); 1403 return false; 1404 } 1405 return true; 1406 } 1407 1408 /* 1409 * Return <0 for non-supported element sizes, with MO_16 controlled by 1410 * FEAT_FP16; return 0 for fp disabled; otherwise return >0 for success. 1411 */ 1412 static int fp_access_check_scalar_hsd(DisasContext *s, MemOp esz) 1413 { 1414 switch (esz) { 1415 case MO_64: 1416 case MO_32: 1417 break; 1418 case MO_16: 1419 if (!dc_isar_feature(aa64_fp16, s)) { 1420 return -1; 1421 } 1422 break; 1423 default: 1424 return -1; 1425 } 1426 return fp_access_check(s); 1427 } 1428 1429 /* Likewise, but vector MO_64 must have two elements. */ 1430 static int fp_access_check_vector_hsd(DisasContext *s, bool is_q, MemOp esz) 1431 { 1432 switch (esz) { 1433 case MO_64: 1434 if (!is_q) { 1435 return -1; 1436 } 1437 break; 1438 case MO_32: 1439 break; 1440 case MO_16: 1441 if (!dc_isar_feature(aa64_fp16, s)) { 1442 return -1; 1443 } 1444 break; 1445 default: 1446 return -1; 1447 } 1448 return fp_access_check(s); 1449 } 1450 1451 /* 1452 * Check that SVE access is enabled. If it is, return true. 1453 * If not, emit code to generate an appropriate exception and return false. 1454 * This function corresponds to CheckSVEEnabled(). 1455 */ 1456 bool sve_access_check(DisasContext *s) 1457 { 1458 if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) { 1459 assert(dc_isar_feature(aa64_sme, s)); 1460 if (!sme_sm_enabled_check(s)) { 1461 goto fail_exit; 1462 } 1463 } else if (s->sve_excp_el) { 1464 gen_exception_insn_el(s, 0, EXCP_UDEF, 1465 syn_sve_access_trap(), s->sve_excp_el); 1466 goto fail_exit; 1467 } 1468 s->sve_access_checked = true; 1469 return fp_access_check(s); 1470 1471 fail_exit: 1472 /* Assert that we only raise one exception per instruction. */ 1473 assert(!s->sve_access_checked); 1474 s->sve_access_checked = true; 1475 return false; 1476 } 1477 1478 /* 1479 * Check that SME access is enabled, raise an exception if not. 1480 * Note that this function corresponds to CheckSMEAccess and is 1481 * only used directly for cpregs. 1482 */ 1483 static bool sme_access_check(DisasContext *s) 1484 { 1485 if (s->sme_excp_el) { 1486 gen_exception_insn_el(s, 0, EXCP_UDEF, 1487 syn_smetrap(SME_ET_AccessTrap, false), 1488 s->sme_excp_el); 1489 return false; 1490 } 1491 return true; 1492 } 1493 1494 /* This function corresponds to CheckSMEEnabled. */ 1495 bool sme_enabled_check(DisasContext *s) 1496 { 1497 /* 1498 * Note that unlike sve_excp_el, we have not constrained sme_excp_el 1499 * to be zero when fp_excp_el has priority. This is because we need 1500 * sme_excp_el by itself for cpregs access checks. 1501 */ 1502 if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) { 1503 s->fp_access_checked = true; 1504 return sme_access_check(s); 1505 } 1506 return fp_access_check_only(s); 1507 } 1508 1509 /* Common subroutine for CheckSMEAnd*Enabled. */ 1510 bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req) 1511 { 1512 if (!sme_enabled_check(s)) { 1513 return false; 1514 } 1515 if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) { 1516 gen_exception_insn(s, 0, EXCP_UDEF, 1517 syn_smetrap(SME_ET_NotStreaming, false)); 1518 return false; 1519 } 1520 if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) { 1521 gen_exception_insn(s, 0, EXCP_UDEF, 1522 syn_smetrap(SME_ET_InactiveZA, false)); 1523 return false; 1524 } 1525 return true; 1526 } 1527 1528 /* 1529 * Expanders for AdvSIMD translation functions. 1530 */ 1531 1532 static bool do_gvec_op2_ool(DisasContext *s, arg_qrr_e *a, int data, 1533 gen_helper_gvec_2 *fn) 1534 { 1535 if (!a->q && a->esz == MO_64) { 1536 return false; 1537 } 1538 if (fp_access_check(s)) { 1539 gen_gvec_op2_ool(s, a->q, a->rd, a->rn, data, fn); 1540 } 1541 return true; 1542 } 1543 1544 static bool do_gvec_op3_ool(DisasContext *s, arg_qrrr_e *a, int data, 1545 gen_helper_gvec_3 *fn) 1546 { 1547 if (!a->q && a->esz == MO_64) { 1548 return false; 1549 } 1550 if (fp_access_check(s)) { 1551 gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, data, fn); 1552 } 1553 return true; 1554 } 1555 1556 static bool do_gvec_fn3(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn) 1557 { 1558 if (!a->q && a->esz == MO_64) { 1559 return false; 1560 } 1561 if (fp_access_check(s)) { 1562 gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz); 1563 } 1564 return true; 1565 } 1566 1567 static bool do_gvec_fn3_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn) 1568 { 1569 if (a->esz == MO_64) { 1570 return false; 1571 } 1572 if (fp_access_check(s)) { 1573 gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz); 1574 } 1575 return true; 1576 } 1577 1578 static bool do_gvec_fn3_no8_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn) 1579 { 1580 if (a->esz == MO_8) { 1581 return false; 1582 } 1583 return do_gvec_fn3_no64(s, a, fn); 1584 } 1585 1586 static bool do_gvec_fn4(DisasContext *s, arg_qrrrr_e *a, GVecGen4Fn *fn) 1587 { 1588 if (!a->q && a->esz == MO_64) { 1589 return false; 1590 } 1591 if (fp_access_check(s)) { 1592 gen_gvec_fn4(s, a->q, a->rd, a->rn, a->rm, a->ra, fn, a->esz); 1593 } 1594 return true; 1595 } 1596 1597 /* 1598 * This utility function is for doing register extension with an 1599 * optional shift. You will likely want to pass a temporary for the 1600 * destination register. See DecodeRegExtend() in the ARM ARM. 1601 */ 1602 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in, 1603 int option, unsigned int shift) 1604 { 1605 int extsize = extract32(option, 0, 2); 1606 bool is_signed = extract32(option, 2, 1); 1607 1608 tcg_gen_ext_i64(tcg_out, tcg_in, extsize | (is_signed ? MO_SIGN : 0)); 1609 tcg_gen_shli_i64(tcg_out, tcg_out, shift); 1610 } 1611 1612 static inline void gen_check_sp_alignment(DisasContext *s) 1613 { 1614 /* The AArch64 architecture mandates that (if enabled via PSTATE 1615 * or SCTLR bits) there is a check that SP is 16-aligned on every 1616 * SP-relative load or store (with an exception generated if it is not). 1617 * In line with general QEMU practice regarding misaligned accesses, 1618 * we omit these checks for the sake of guest program performance. 1619 * This function is provided as a hook so we can more easily add these 1620 * checks in future (possibly as a "favour catching guest program bugs 1621 * over speed" user selectable option). 1622 */ 1623 } 1624 1625 /* 1626 * The instruction disassembly implemented here matches 1627 * the instruction encoding classifications in chapter C4 1628 * of the ARM Architecture Reference Manual (DDI0487B_a); 1629 * classification names and decode diagrams here should generally 1630 * match up with those in the manual. 1631 */ 1632 1633 static bool trans_B(DisasContext *s, arg_i *a) 1634 { 1635 reset_btype(s); 1636 gen_goto_tb(s, 0, a->imm); 1637 return true; 1638 } 1639 1640 static bool trans_BL(DisasContext *s, arg_i *a) 1641 { 1642 gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s)); 1643 reset_btype(s); 1644 gen_goto_tb(s, 0, a->imm); 1645 return true; 1646 } 1647 1648 1649 static bool trans_CBZ(DisasContext *s, arg_cbz *a) 1650 { 1651 DisasLabel match; 1652 TCGv_i64 tcg_cmp; 1653 1654 tcg_cmp = read_cpu_reg(s, a->rt, a->sf); 1655 reset_btype(s); 1656 1657 match = gen_disas_label(s); 1658 tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ, 1659 tcg_cmp, 0, match.label); 1660 gen_goto_tb(s, 0, 4); 1661 set_disas_label(s, match); 1662 gen_goto_tb(s, 1, a->imm); 1663 return true; 1664 } 1665 1666 static bool trans_TBZ(DisasContext *s, arg_tbz *a) 1667 { 1668 DisasLabel match; 1669 TCGv_i64 tcg_cmp; 1670 1671 tcg_cmp = tcg_temp_new_i64(); 1672 tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, a->rt), 1ULL << a->bitpos); 1673 1674 reset_btype(s); 1675 1676 match = gen_disas_label(s); 1677 tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ, 1678 tcg_cmp, 0, match.label); 1679 gen_goto_tb(s, 0, 4); 1680 set_disas_label(s, match); 1681 gen_goto_tb(s, 1, a->imm); 1682 return true; 1683 } 1684 1685 static bool trans_B_cond(DisasContext *s, arg_B_cond *a) 1686 { 1687 /* BC.cond is only present with FEAT_HBC */ 1688 if (a->c && !dc_isar_feature(aa64_hbc, s)) { 1689 return false; 1690 } 1691 reset_btype(s); 1692 if (a->cond < 0x0e) { 1693 /* genuinely conditional branches */ 1694 DisasLabel match = gen_disas_label(s); 1695 arm_gen_test_cc(a->cond, match.label); 1696 gen_goto_tb(s, 0, 4); 1697 set_disas_label(s, match); 1698 gen_goto_tb(s, 1, a->imm); 1699 } else { 1700 /* 0xe and 0xf are both "always" conditions */ 1701 gen_goto_tb(s, 0, a->imm); 1702 } 1703 return true; 1704 } 1705 1706 static void set_btype_for_br(DisasContext *s, int rn) 1707 { 1708 if (dc_isar_feature(aa64_bti, s)) { 1709 /* BR to {x16,x17} or !guard -> 1, else 3. */ 1710 if (rn == 16 || rn == 17) { 1711 set_btype(s, 1); 1712 } else { 1713 TCGv_i64 pc = tcg_temp_new_i64(); 1714 gen_pc_plus_diff(s, pc, 0); 1715 gen_helper_guarded_page_br(tcg_env, pc); 1716 s->btype = -1; 1717 } 1718 } 1719 } 1720 1721 static void set_btype_for_blr(DisasContext *s) 1722 { 1723 if (dc_isar_feature(aa64_bti, s)) { 1724 /* BLR sets BTYPE to 2, regardless of source guarded page. */ 1725 set_btype(s, 2); 1726 } 1727 } 1728 1729 static bool trans_BR(DisasContext *s, arg_r *a) 1730 { 1731 set_btype_for_br(s, a->rn); 1732 gen_a64_set_pc(s, cpu_reg(s, a->rn)); 1733 s->base.is_jmp = DISAS_JUMP; 1734 return true; 1735 } 1736 1737 static bool trans_BLR(DisasContext *s, arg_r *a) 1738 { 1739 TCGv_i64 dst = cpu_reg(s, a->rn); 1740 TCGv_i64 lr = cpu_reg(s, 30); 1741 if (dst == lr) { 1742 TCGv_i64 tmp = tcg_temp_new_i64(); 1743 tcg_gen_mov_i64(tmp, dst); 1744 dst = tmp; 1745 } 1746 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 1747 gen_a64_set_pc(s, dst); 1748 set_btype_for_blr(s); 1749 s->base.is_jmp = DISAS_JUMP; 1750 return true; 1751 } 1752 1753 static bool trans_RET(DisasContext *s, arg_r *a) 1754 { 1755 gen_a64_set_pc(s, cpu_reg(s, a->rn)); 1756 s->base.is_jmp = DISAS_JUMP; 1757 return true; 1758 } 1759 1760 static TCGv_i64 auth_branch_target(DisasContext *s, TCGv_i64 dst, 1761 TCGv_i64 modifier, bool use_key_a) 1762 { 1763 TCGv_i64 truedst; 1764 /* 1765 * Return the branch target for a BRAA/RETA/etc, which is either 1766 * just the destination dst, or that value with the pauth check 1767 * done and the code removed from the high bits. 1768 */ 1769 if (!s->pauth_active) { 1770 return dst; 1771 } 1772 1773 truedst = tcg_temp_new_i64(); 1774 if (use_key_a) { 1775 gen_helper_autia_combined(truedst, tcg_env, dst, modifier); 1776 } else { 1777 gen_helper_autib_combined(truedst, tcg_env, dst, modifier); 1778 } 1779 return truedst; 1780 } 1781 1782 static bool trans_BRAZ(DisasContext *s, arg_braz *a) 1783 { 1784 TCGv_i64 dst; 1785 1786 if (!dc_isar_feature(aa64_pauth, s)) { 1787 return false; 1788 } 1789 1790 dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m); 1791 set_btype_for_br(s, a->rn); 1792 gen_a64_set_pc(s, dst); 1793 s->base.is_jmp = DISAS_JUMP; 1794 return true; 1795 } 1796 1797 static bool trans_BLRAZ(DisasContext *s, arg_braz *a) 1798 { 1799 TCGv_i64 dst, lr; 1800 1801 if (!dc_isar_feature(aa64_pauth, s)) { 1802 return false; 1803 } 1804 1805 dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m); 1806 lr = cpu_reg(s, 30); 1807 if (dst == lr) { 1808 TCGv_i64 tmp = tcg_temp_new_i64(); 1809 tcg_gen_mov_i64(tmp, dst); 1810 dst = tmp; 1811 } 1812 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 1813 gen_a64_set_pc(s, dst); 1814 set_btype_for_blr(s); 1815 s->base.is_jmp = DISAS_JUMP; 1816 return true; 1817 } 1818 1819 static bool trans_RETA(DisasContext *s, arg_reta *a) 1820 { 1821 TCGv_i64 dst; 1822 1823 dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m); 1824 gen_a64_set_pc(s, dst); 1825 s->base.is_jmp = DISAS_JUMP; 1826 return true; 1827 } 1828 1829 static bool trans_BRA(DisasContext *s, arg_bra *a) 1830 { 1831 TCGv_i64 dst; 1832 1833 if (!dc_isar_feature(aa64_pauth, s)) { 1834 return false; 1835 } 1836 dst = auth_branch_target(s, cpu_reg(s,a->rn), cpu_reg_sp(s, a->rm), !a->m); 1837 gen_a64_set_pc(s, dst); 1838 set_btype_for_br(s, a->rn); 1839 s->base.is_jmp = DISAS_JUMP; 1840 return true; 1841 } 1842 1843 static bool trans_BLRA(DisasContext *s, arg_bra *a) 1844 { 1845 TCGv_i64 dst, lr; 1846 1847 if (!dc_isar_feature(aa64_pauth, s)) { 1848 return false; 1849 } 1850 dst = auth_branch_target(s, cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm), !a->m); 1851 lr = cpu_reg(s, 30); 1852 if (dst == lr) { 1853 TCGv_i64 tmp = tcg_temp_new_i64(); 1854 tcg_gen_mov_i64(tmp, dst); 1855 dst = tmp; 1856 } 1857 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 1858 gen_a64_set_pc(s, dst); 1859 set_btype_for_blr(s); 1860 s->base.is_jmp = DISAS_JUMP; 1861 return true; 1862 } 1863 1864 static bool trans_ERET(DisasContext *s, arg_ERET *a) 1865 { 1866 TCGv_i64 dst; 1867 1868 if (s->current_el == 0) { 1869 return false; 1870 } 1871 if (s->trap_eret) { 1872 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(0), 2); 1873 return true; 1874 } 1875 dst = tcg_temp_new_i64(); 1876 tcg_gen_ld_i64(dst, tcg_env, 1877 offsetof(CPUARMState, elr_el[s->current_el])); 1878 1879 translator_io_start(&s->base); 1880 1881 gen_helper_exception_return(tcg_env, dst); 1882 /* Must exit loop to check un-masked IRQs */ 1883 s->base.is_jmp = DISAS_EXIT; 1884 return true; 1885 } 1886 1887 static bool trans_ERETA(DisasContext *s, arg_reta *a) 1888 { 1889 TCGv_i64 dst; 1890 1891 if (!dc_isar_feature(aa64_pauth, s)) { 1892 return false; 1893 } 1894 if (s->current_el == 0) { 1895 return false; 1896 } 1897 /* The FGT trap takes precedence over an auth trap. */ 1898 if (s->trap_eret) { 1899 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(a->m ? 3 : 2), 2); 1900 return true; 1901 } 1902 dst = tcg_temp_new_i64(); 1903 tcg_gen_ld_i64(dst, tcg_env, 1904 offsetof(CPUARMState, elr_el[s->current_el])); 1905 1906 dst = auth_branch_target(s, dst, cpu_X[31], !a->m); 1907 1908 translator_io_start(&s->base); 1909 1910 gen_helper_exception_return(tcg_env, dst); 1911 /* Must exit loop to check un-masked IRQs */ 1912 s->base.is_jmp = DISAS_EXIT; 1913 return true; 1914 } 1915 1916 static bool trans_NOP(DisasContext *s, arg_NOP *a) 1917 { 1918 return true; 1919 } 1920 1921 static bool trans_YIELD(DisasContext *s, arg_YIELD *a) 1922 { 1923 /* 1924 * When running in MTTCG we don't generate jumps to the yield and 1925 * WFE helpers as it won't affect the scheduling of other vCPUs. 1926 * If we wanted to more completely model WFE/SEV so we don't busy 1927 * spin unnecessarily we would need to do something more involved. 1928 */ 1929 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1930 s->base.is_jmp = DISAS_YIELD; 1931 } 1932 return true; 1933 } 1934 1935 static bool trans_WFI(DisasContext *s, arg_WFI *a) 1936 { 1937 s->base.is_jmp = DISAS_WFI; 1938 return true; 1939 } 1940 1941 static bool trans_WFE(DisasContext *s, arg_WFI *a) 1942 { 1943 /* 1944 * When running in MTTCG we don't generate jumps to the yield and 1945 * WFE helpers as it won't affect the scheduling of other vCPUs. 1946 * If we wanted to more completely model WFE/SEV so we don't busy 1947 * spin unnecessarily we would need to do something more involved. 1948 */ 1949 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1950 s->base.is_jmp = DISAS_WFE; 1951 } 1952 return true; 1953 } 1954 1955 static bool trans_WFIT(DisasContext *s, arg_WFIT *a) 1956 { 1957 if (!dc_isar_feature(aa64_wfxt, s)) { 1958 return false; 1959 } 1960 1961 /* 1962 * Because we need to pass the register value to the helper, 1963 * it's easier to emit the code now, unlike trans_WFI which 1964 * defers it to aarch64_tr_tb_stop(). That means we need to 1965 * check ss_active so that single-stepping a WFIT doesn't halt. 1966 */ 1967 if (s->ss_active) { 1968 /* Act like a NOP under architectural singlestep */ 1969 return true; 1970 } 1971 1972 gen_a64_update_pc(s, 4); 1973 gen_helper_wfit(tcg_env, cpu_reg(s, a->rd)); 1974 /* Go back to the main loop to check for interrupts */ 1975 s->base.is_jmp = DISAS_EXIT; 1976 return true; 1977 } 1978 1979 static bool trans_WFET(DisasContext *s, arg_WFET *a) 1980 { 1981 if (!dc_isar_feature(aa64_wfxt, s)) { 1982 return false; 1983 } 1984 1985 /* 1986 * We rely here on our WFE implementation being a NOP, so we 1987 * don't need to do anything different to handle the WFET timeout 1988 * from what trans_WFE does. 1989 */ 1990 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1991 s->base.is_jmp = DISAS_WFE; 1992 } 1993 return true; 1994 } 1995 1996 static bool trans_XPACLRI(DisasContext *s, arg_XPACLRI *a) 1997 { 1998 if (s->pauth_active) { 1999 gen_helper_xpaci(cpu_X[30], tcg_env, cpu_X[30]); 2000 } 2001 return true; 2002 } 2003 2004 static bool trans_PACIA1716(DisasContext *s, arg_PACIA1716 *a) 2005 { 2006 if (s->pauth_active) { 2007 gen_helper_pacia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 2008 } 2009 return true; 2010 } 2011 2012 static bool trans_PACIB1716(DisasContext *s, arg_PACIB1716 *a) 2013 { 2014 if (s->pauth_active) { 2015 gen_helper_pacib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 2016 } 2017 return true; 2018 } 2019 2020 static bool trans_AUTIA1716(DisasContext *s, arg_AUTIA1716 *a) 2021 { 2022 if (s->pauth_active) { 2023 gen_helper_autia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 2024 } 2025 return true; 2026 } 2027 2028 static bool trans_AUTIB1716(DisasContext *s, arg_AUTIB1716 *a) 2029 { 2030 if (s->pauth_active) { 2031 gen_helper_autib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 2032 } 2033 return true; 2034 } 2035 2036 static bool trans_ESB(DisasContext *s, arg_ESB *a) 2037 { 2038 /* Without RAS, we must implement this as NOP. */ 2039 if (dc_isar_feature(aa64_ras, s)) { 2040 /* 2041 * QEMU does not have a source of physical SErrors, 2042 * so we are only concerned with virtual SErrors. 2043 * The pseudocode in the ARM for this case is 2044 * if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then 2045 * AArch64.vESBOperation(); 2046 * Most of the condition can be evaluated at translation time. 2047 * Test for EL2 present, and defer test for SEL2 to runtime. 2048 */ 2049 if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) { 2050 gen_helper_vesb(tcg_env); 2051 } 2052 } 2053 return true; 2054 } 2055 2056 static bool trans_PACIAZ(DisasContext *s, arg_PACIAZ *a) 2057 { 2058 if (s->pauth_active) { 2059 gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 2060 } 2061 return true; 2062 } 2063 2064 static bool trans_PACIASP(DisasContext *s, arg_PACIASP *a) 2065 { 2066 if (s->pauth_active) { 2067 gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 2068 } 2069 return true; 2070 } 2071 2072 static bool trans_PACIBZ(DisasContext *s, arg_PACIBZ *a) 2073 { 2074 if (s->pauth_active) { 2075 gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 2076 } 2077 return true; 2078 } 2079 2080 static bool trans_PACIBSP(DisasContext *s, arg_PACIBSP *a) 2081 { 2082 if (s->pauth_active) { 2083 gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 2084 } 2085 return true; 2086 } 2087 2088 static bool trans_AUTIAZ(DisasContext *s, arg_AUTIAZ *a) 2089 { 2090 if (s->pauth_active) { 2091 gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 2092 } 2093 return true; 2094 } 2095 2096 static bool trans_AUTIASP(DisasContext *s, arg_AUTIASP *a) 2097 { 2098 if (s->pauth_active) { 2099 gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 2100 } 2101 return true; 2102 } 2103 2104 static bool trans_AUTIBZ(DisasContext *s, arg_AUTIBZ *a) 2105 { 2106 if (s->pauth_active) { 2107 gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 2108 } 2109 return true; 2110 } 2111 2112 static bool trans_AUTIBSP(DisasContext *s, arg_AUTIBSP *a) 2113 { 2114 if (s->pauth_active) { 2115 gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 2116 } 2117 return true; 2118 } 2119 2120 static bool trans_CLREX(DisasContext *s, arg_CLREX *a) 2121 { 2122 tcg_gen_movi_i64(cpu_exclusive_addr, -1); 2123 return true; 2124 } 2125 2126 static bool trans_DSB_DMB(DisasContext *s, arg_DSB_DMB *a) 2127 { 2128 /* We handle DSB and DMB the same way */ 2129 TCGBar bar; 2130 2131 switch (a->types) { 2132 case 1: /* MBReqTypes_Reads */ 2133 bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST; 2134 break; 2135 case 2: /* MBReqTypes_Writes */ 2136 bar = TCG_BAR_SC | TCG_MO_ST_ST; 2137 break; 2138 default: /* MBReqTypes_All */ 2139 bar = TCG_BAR_SC | TCG_MO_ALL; 2140 break; 2141 } 2142 tcg_gen_mb(bar); 2143 return true; 2144 } 2145 2146 static bool trans_DSB_nXS(DisasContext *s, arg_DSB_nXS *a) 2147 { 2148 if (!dc_isar_feature(aa64_xs, s)) { 2149 return false; 2150 } 2151 tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL); 2152 return true; 2153 } 2154 2155 static bool trans_ISB(DisasContext *s, arg_ISB *a) 2156 { 2157 /* 2158 * We need to break the TB after this insn to execute 2159 * self-modifying code correctly and also to take 2160 * any pending interrupts immediately. 2161 */ 2162 reset_btype(s); 2163 gen_goto_tb(s, 0, 4); 2164 return true; 2165 } 2166 2167 static bool trans_SB(DisasContext *s, arg_SB *a) 2168 { 2169 if (!dc_isar_feature(aa64_sb, s)) { 2170 return false; 2171 } 2172 /* 2173 * TODO: There is no speculation barrier opcode for TCG; 2174 * MB and end the TB instead. 2175 */ 2176 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC); 2177 gen_goto_tb(s, 0, 4); 2178 return true; 2179 } 2180 2181 static bool trans_CFINV(DisasContext *s, arg_CFINV *a) 2182 { 2183 if (!dc_isar_feature(aa64_condm_4, s)) { 2184 return false; 2185 } 2186 tcg_gen_xori_i32(cpu_CF, cpu_CF, 1); 2187 return true; 2188 } 2189 2190 static bool trans_XAFLAG(DisasContext *s, arg_XAFLAG *a) 2191 { 2192 TCGv_i32 z; 2193 2194 if (!dc_isar_feature(aa64_condm_5, s)) { 2195 return false; 2196 } 2197 2198 z = tcg_temp_new_i32(); 2199 2200 tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0); 2201 2202 /* 2203 * (!C & !Z) << 31 2204 * (!(C | Z)) << 31 2205 * ~((C | Z) << 31) 2206 * ~-(C | Z) 2207 * (C | Z) - 1 2208 */ 2209 tcg_gen_or_i32(cpu_NF, cpu_CF, z); 2210 tcg_gen_subi_i32(cpu_NF, cpu_NF, 1); 2211 2212 /* !(Z & C) */ 2213 tcg_gen_and_i32(cpu_ZF, z, cpu_CF); 2214 tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1); 2215 2216 /* (!C & Z) << 31 -> -(Z & ~C) */ 2217 tcg_gen_andc_i32(cpu_VF, z, cpu_CF); 2218 tcg_gen_neg_i32(cpu_VF, cpu_VF); 2219 2220 /* C | Z */ 2221 tcg_gen_or_i32(cpu_CF, cpu_CF, z); 2222 2223 return true; 2224 } 2225 2226 static bool trans_AXFLAG(DisasContext *s, arg_AXFLAG *a) 2227 { 2228 if (!dc_isar_feature(aa64_condm_5, s)) { 2229 return false; 2230 } 2231 2232 tcg_gen_sari_i32(cpu_VF, cpu_VF, 31); /* V ? -1 : 0 */ 2233 tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF); /* C & !V */ 2234 2235 /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */ 2236 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF); 2237 2238 tcg_gen_movi_i32(cpu_NF, 0); 2239 tcg_gen_movi_i32(cpu_VF, 0); 2240 2241 return true; 2242 } 2243 2244 static bool trans_MSR_i_UAO(DisasContext *s, arg_i *a) 2245 { 2246 if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) { 2247 return false; 2248 } 2249 if (a->imm & 1) { 2250 set_pstate_bits(PSTATE_UAO); 2251 } else { 2252 clear_pstate_bits(PSTATE_UAO); 2253 } 2254 gen_rebuild_hflags(s); 2255 s->base.is_jmp = DISAS_TOO_MANY; 2256 return true; 2257 } 2258 2259 static bool trans_MSR_i_PAN(DisasContext *s, arg_i *a) 2260 { 2261 if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) { 2262 return false; 2263 } 2264 if (a->imm & 1) { 2265 set_pstate_bits(PSTATE_PAN); 2266 } else { 2267 clear_pstate_bits(PSTATE_PAN); 2268 } 2269 gen_rebuild_hflags(s); 2270 s->base.is_jmp = DISAS_TOO_MANY; 2271 return true; 2272 } 2273 2274 static bool trans_MSR_i_SPSEL(DisasContext *s, arg_i *a) 2275 { 2276 if (s->current_el == 0) { 2277 return false; 2278 } 2279 gen_helper_msr_i_spsel(tcg_env, tcg_constant_i32(a->imm & PSTATE_SP)); 2280 s->base.is_jmp = DISAS_TOO_MANY; 2281 return true; 2282 } 2283 2284 static bool trans_MSR_i_SBSS(DisasContext *s, arg_i *a) 2285 { 2286 if (!dc_isar_feature(aa64_ssbs, s)) { 2287 return false; 2288 } 2289 if (a->imm & 1) { 2290 set_pstate_bits(PSTATE_SSBS); 2291 } else { 2292 clear_pstate_bits(PSTATE_SSBS); 2293 } 2294 /* Don't need to rebuild hflags since SSBS is a nop */ 2295 s->base.is_jmp = DISAS_TOO_MANY; 2296 return true; 2297 } 2298 2299 static bool trans_MSR_i_DIT(DisasContext *s, arg_i *a) 2300 { 2301 if (!dc_isar_feature(aa64_dit, s)) { 2302 return false; 2303 } 2304 if (a->imm & 1) { 2305 set_pstate_bits(PSTATE_DIT); 2306 } else { 2307 clear_pstate_bits(PSTATE_DIT); 2308 } 2309 /* There's no need to rebuild hflags because DIT is a nop */ 2310 s->base.is_jmp = DISAS_TOO_MANY; 2311 return true; 2312 } 2313 2314 static bool trans_MSR_i_TCO(DisasContext *s, arg_i *a) 2315 { 2316 if (dc_isar_feature(aa64_mte, s)) { 2317 /* Full MTE is enabled -- set the TCO bit as directed. */ 2318 if (a->imm & 1) { 2319 set_pstate_bits(PSTATE_TCO); 2320 } else { 2321 clear_pstate_bits(PSTATE_TCO); 2322 } 2323 gen_rebuild_hflags(s); 2324 /* Many factors, including TCO, go into MTE_ACTIVE. */ 2325 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 2326 return true; 2327 } else if (dc_isar_feature(aa64_mte_insn_reg, s)) { 2328 /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI. */ 2329 return true; 2330 } else { 2331 /* Insn not present */ 2332 return false; 2333 } 2334 } 2335 2336 static bool trans_MSR_i_DAIFSET(DisasContext *s, arg_i *a) 2337 { 2338 gen_helper_msr_i_daifset(tcg_env, tcg_constant_i32(a->imm)); 2339 s->base.is_jmp = DISAS_TOO_MANY; 2340 return true; 2341 } 2342 2343 static bool trans_MSR_i_DAIFCLEAR(DisasContext *s, arg_i *a) 2344 { 2345 gen_helper_msr_i_daifclear(tcg_env, tcg_constant_i32(a->imm)); 2346 /* Exit the cpu loop to re-evaluate pending IRQs. */ 2347 s->base.is_jmp = DISAS_UPDATE_EXIT; 2348 return true; 2349 } 2350 2351 static bool trans_MSR_i_ALLINT(DisasContext *s, arg_i *a) 2352 { 2353 if (!dc_isar_feature(aa64_nmi, s) || s->current_el == 0) { 2354 return false; 2355 } 2356 2357 if (a->imm == 0) { 2358 clear_pstate_bits(PSTATE_ALLINT); 2359 } else if (s->current_el > 1) { 2360 set_pstate_bits(PSTATE_ALLINT); 2361 } else { 2362 gen_helper_msr_set_allint_el1(tcg_env); 2363 } 2364 2365 /* Exit the cpu loop to re-evaluate pending IRQs. */ 2366 s->base.is_jmp = DISAS_UPDATE_EXIT; 2367 return true; 2368 } 2369 2370 static bool trans_MSR_i_SVCR(DisasContext *s, arg_MSR_i_SVCR *a) 2371 { 2372 if (!dc_isar_feature(aa64_sme, s) || a->mask == 0) { 2373 return false; 2374 } 2375 if (sme_access_check(s)) { 2376 int old = s->pstate_sm | (s->pstate_za << 1); 2377 int new = a->imm * 3; 2378 2379 if ((old ^ new) & a->mask) { 2380 /* At least one bit changes. */ 2381 gen_helper_set_svcr(tcg_env, tcg_constant_i32(new), 2382 tcg_constant_i32(a->mask)); 2383 s->base.is_jmp = DISAS_TOO_MANY; 2384 } 2385 } 2386 return true; 2387 } 2388 2389 static void gen_get_nzcv(TCGv_i64 tcg_rt) 2390 { 2391 TCGv_i32 tmp = tcg_temp_new_i32(); 2392 TCGv_i32 nzcv = tcg_temp_new_i32(); 2393 2394 /* build bit 31, N */ 2395 tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31)); 2396 /* build bit 30, Z */ 2397 tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0); 2398 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1); 2399 /* build bit 29, C */ 2400 tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1); 2401 /* build bit 28, V */ 2402 tcg_gen_shri_i32(tmp, cpu_VF, 31); 2403 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1); 2404 /* generate result */ 2405 tcg_gen_extu_i32_i64(tcg_rt, nzcv); 2406 } 2407 2408 static void gen_set_nzcv(TCGv_i64 tcg_rt) 2409 { 2410 TCGv_i32 nzcv = tcg_temp_new_i32(); 2411 2412 /* take NZCV from R[t] */ 2413 tcg_gen_extrl_i64_i32(nzcv, tcg_rt); 2414 2415 /* bit 31, N */ 2416 tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31)); 2417 /* bit 30, Z */ 2418 tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30)); 2419 tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0); 2420 /* bit 29, C */ 2421 tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29)); 2422 tcg_gen_shri_i32(cpu_CF, cpu_CF, 29); 2423 /* bit 28, V */ 2424 tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28)); 2425 tcg_gen_shli_i32(cpu_VF, cpu_VF, 3); 2426 } 2427 2428 static void gen_sysreg_undef(DisasContext *s, bool isread, 2429 uint8_t op0, uint8_t op1, uint8_t op2, 2430 uint8_t crn, uint8_t crm, uint8_t rt) 2431 { 2432 /* 2433 * Generate code to emit an UNDEF with correct syndrome 2434 * information for a failed system register access. 2435 * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases, 2436 * but if FEAT_IDST is implemented then read accesses to registers 2437 * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP 2438 * syndrome. 2439 */ 2440 uint32_t syndrome; 2441 2442 if (isread && dc_isar_feature(aa64_ids, s) && 2443 arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) { 2444 syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); 2445 } else { 2446 syndrome = syn_uncategorized(); 2447 } 2448 gen_exception_insn(s, 0, EXCP_UDEF, syndrome); 2449 } 2450 2451 /* MRS - move from system register 2452 * MSR (register) - move to system register 2453 * SYS 2454 * SYSL 2455 * These are all essentially the same insn in 'read' and 'write' 2456 * versions, with varying op0 fields. 2457 */ 2458 static void handle_sys(DisasContext *s, bool isread, 2459 unsigned int op0, unsigned int op1, unsigned int op2, 2460 unsigned int crn, unsigned int crm, unsigned int rt) 2461 { 2462 uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, 2463 crn, crm, op0, op1, op2); 2464 const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key); 2465 bool need_exit_tb = false; 2466 bool nv_trap_to_el2 = false; 2467 bool nv_redirect_reg = false; 2468 bool skip_fp_access_checks = false; 2469 bool nv2_mem_redirect = false; 2470 TCGv_ptr tcg_ri = NULL; 2471 TCGv_i64 tcg_rt; 2472 uint32_t syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); 2473 2474 if (crn == 11 || crn == 15) { 2475 /* 2476 * Check for TIDCP trap, which must take precedence over 2477 * the UNDEF for "no such register" etc. 2478 */ 2479 switch (s->current_el) { 2480 case 0: 2481 if (dc_isar_feature(aa64_tidcp1, s)) { 2482 gen_helper_tidcp_el0(tcg_env, tcg_constant_i32(syndrome)); 2483 } 2484 break; 2485 case 1: 2486 gen_helper_tidcp_el1(tcg_env, tcg_constant_i32(syndrome)); 2487 break; 2488 } 2489 } 2490 2491 if (!ri) { 2492 /* Unknown register; this might be a guest error or a QEMU 2493 * unimplemented feature. 2494 */ 2495 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 " 2496 "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n", 2497 isread ? "read" : "write", op0, op1, crn, crm, op2); 2498 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt); 2499 return; 2500 } 2501 2502 if (s->nv2 && ri->nv2_redirect_offset) { 2503 /* 2504 * Some registers always redirect to memory; some only do so if 2505 * HCR_EL2.NV1 is 0, and some only if NV1 is 1 (these come in 2506 * pairs which share an offset; see the table in R_CSRPQ). 2507 */ 2508 if (ri->nv2_redirect_offset & NV2_REDIR_NV1) { 2509 nv2_mem_redirect = s->nv1; 2510 } else if (ri->nv2_redirect_offset & NV2_REDIR_NO_NV1) { 2511 nv2_mem_redirect = !s->nv1; 2512 } else { 2513 nv2_mem_redirect = true; 2514 } 2515 } 2516 2517 /* Check access permissions */ 2518 if (!cp_access_ok(s->current_el, ri, isread)) { 2519 /* 2520 * FEAT_NV/NV2 handling does not do the usual FP access checks 2521 * for registers only accessible at EL2 (though it *does* do them 2522 * for registers accessible at EL1). 2523 */ 2524 skip_fp_access_checks = true; 2525 if (s->nv2 && (ri->type & ARM_CP_NV2_REDIRECT)) { 2526 /* 2527 * This is one of the few EL2 registers which should redirect 2528 * to the equivalent EL1 register. We do that after running 2529 * the EL2 register's accessfn. 2530 */ 2531 nv_redirect_reg = true; 2532 assert(!nv2_mem_redirect); 2533 } else if (nv2_mem_redirect) { 2534 /* 2535 * NV2 redirect-to-memory takes precedence over trap to EL2 or 2536 * UNDEF to EL1. 2537 */ 2538 } else if (s->nv && arm_cpreg_traps_in_nv(ri)) { 2539 /* 2540 * This register / instruction exists and is an EL2 register, so 2541 * we must trap to EL2 if accessed in nested virtualization EL1 2542 * instead of UNDEFing. We'll do that after the usual access checks. 2543 * (This makes a difference only for a couple of registers like 2544 * VSTTBR_EL2 where the "UNDEF if NonSecure" should take priority 2545 * over the trap-to-EL2. Most trapped-by-FEAT_NV registers have 2546 * an accessfn which does nothing when called from EL1, because 2547 * the trap-to-EL3 controls which would apply to that register 2548 * at EL2 don't take priority over the FEAT_NV trap-to-EL2.) 2549 */ 2550 nv_trap_to_el2 = true; 2551 } else { 2552 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt); 2553 return; 2554 } 2555 } 2556 2557 if (ri->accessfn || (ri->fgt && s->fgt_active)) { 2558 /* Emit code to perform further access permissions checks at 2559 * runtime; this may result in an exception. 2560 */ 2561 gen_a64_update_pc(s, 0); 2562 tcg_ri = tcg_temp_new_ptr(); 2563 gen_helper_access_check_cp_reg(tcg_ri, tcg_env, 2564 tcg_constant_i32(key), 2565 tcg_constant_i32(syndrome), 2566 tcg_constant_i32(isread)); 2567 } else if (ri->type & ARM_CP_RAISES_EXC) { 2568 /* 2569 * The readfn or writefn might raise an exception; 2570 * synchronize the CPU state in case it does. 2571 */ 2572 gen_a64_update_pc(s, 0); 2573 } 2574 2575 if (!skip_fp_access_checks) { 2576 if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) { 2577 return; 2578 } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) { 2579 return; 2580 } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) { 2581 return; 2582 } 2583 } 2584 2585 if (nv_trap_to_el2) { 2586 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2); 2587 return; 2588 } 2589 2590 if (nv_redirect_reg) { 2591 /* 2592 * FEAT_NV2 redirection of an EL2 register to an EL1 register. 2593 * Conveniently in all cases the encoding of the EL1 register is 2594 * identical to the EL2 register except that opc1 is 0. 2595 * Get the reginfo for the EL1 register to use for the actual access. 2596 * We don't use the EL1 register's access function, and 2597 * fine-grained-traps on EL1 also do not apply here. 2598 */ 2599 key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, 2600 crn, crm, op0, 0, op2); 2601 ri = get_arm_cp_reginfo(s->cp_regs, key); 2602 assert(ri); 2603 assert(cp_access_ok(s->current_el, ri, isread)); 2604 /* 2605 * We might not have done an update_pc earlier, so check we don't 2606 * need it. We could support this in future if necessary. 2607 */ 2608 assert(!(ri->type & ARM_CP_RAISES_EXC)); 2609 } 2610 2611 if (nv2_mem_redirect) { 2612 /* 2613 * This system register is being redirected into an EL2 memory access. 2614 * This means it is not an IO operation, doesn't change hflags, 2615 * and need not end the TB, because it has no side effects. 2616 * 2617 * The access is 64-bit single copy atomic, guaranteed aligned because 2618 * of the definition of VCNR_EL2. Its endianness depends on 2619 * SCTLR_EL2.EE, not on the data endianness of EL1. 2620 * It is done under either the EL2 translation regime or the EL2&0 2621 * translation regime, depending on HCR_EL2.E2H. It behaves as if 2622 * PSTATE.PAN is 0. 2623 */ 2624 TCGv_i64 ptr = tcg_temp_new_i64(); 2625 MemOp mop = MO_64 | MO_ALIGN | MO_ATOM_IFALIGN; 2626 ARMMMUIdx armmemidx = s->nv2_mem_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_E2; 2627 int memidx = arm_to_core_mmu_idx(armmemidx); 2628 uint32_t syn; 2629 2630 mop |= (s->nv2_mem_be ? MO_BE : MO_LE); 2631 2632 tcg_gen_ld_i64(ptr, tcg_env, offsetof(CPUARMState, cp15.vncr_el2)); 2633 tcg_gen_addi_i64(ptr, ptr, 2634 (ri->nv2_redirect_offset & ~NV2_REDIR_FLAG_MASK)); 2635 tcg_rt = cpu_reg(s, rt); 2636 2637 syn = syn_data_abort_vncr(0, !isread, 0); 2638 disas_set_insn_syndrome(s, syn); 2639 if (isread) { 2640 tcg_gen_qemu_ld_i64(tcg_rt, ptr, memidx, mop); 2641 } else { 2642 tcg_gen_qemu_st_i64(tcg_rt, ptr, memidx, mop); 2643 } 2644 return; 2645 } 2646 2647 /* Handle special cases first */ 2648 switch (ri->type & ARM_CP_SPECIAL_MASK) { 2649 case 0: 2650 break; 2651 case ARM_CP_NOP: 2652 return; 2653 case ARM_CP_NZCV: 2654 tcg_rt = cpu_reg(s, rt); 2655 if (isread) { 2656 gen_get_nzcv(tcg_rt); 2657 } else { 2658 gen_set_nzcv(tcg_rt); 2659 } 2660 return; 2661 case ARM_CP_CURRENTEL: 2662 { 2663 /* 2664 * Reads as current EL value from pstate, which is 2665 * guaranteed to be constant by the tb flags. 2666 * For nested virt we should report EL2. 2667 */ 2668 int el = s->nv ? 2 : s->current_el; 2669 tcg_rt = cpu_reg(s, rt); 2670 tcg_gen_movi_i64(tcg_rt, el << 2); 2671 return; 2672 } 2673 case ARM_CP_DC_ZVA: 2674 /* Writes clear the aligned block of memory which rt points into. */ 2675 if (s->mte_active[0]) { 2676 int desc = 0; 2677 2678 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 2679 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 2680 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 2681 2682 tcg_rt = tcg_temp_new_i64(); 2683 gen_helper_mte_check_zva(tcg_rt, tcg_env, 2684 tcg_constant_i32(desc), cpu_reg(s, rt)); 2685 } else { 2686 tcg_rt = clean_data_tbi(s, cpu_reg(s, rt)); 2687 } 2688 gen_helper_dc_zva(tcg_env, tcg_rt); 2689 return; 2690 case ARM_CP_DC_GVA: 2691 { 2692 TCGv_i64 clean_addr, tag; 2693 2694 /* 2695 * DC_GVA, like DC_ZVA, requires that we supply the original 2696 * pointer for an invalid page. Probe that address first. 2697 */ 2698 tcg_rt = cpu_reg(s, rt); 2699 clean_addr = clean_data_tbi(s, tcg_rt); 2700 gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8); 2701 2702 if (s->ata[0]) { 2703 /* Extract the tag from the register to match STZGM. */ 2704 tag = tcg_temp_new_i64(); 2705 tcg_gen_shri_i64(tag, tcg_rt, 56); 2706 gen_helper_stzgm_tags(tcg_env, clean_addr, tag); 2707 } 2708 } 2709 return; 2710 case ARM_CP_DC_GZVA: 2711 { 2712 TCGv_i64 clean_addr, tag; 2713 2714 /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */ 2715 tcg_rt = cpu_reg(s, rt); 2716 clean_addr = clean_data_tbi(s, tcg_rt); 2717 gen_helper_dc_zva(tcg_env, clean_addr); 2718 2719 if (s->ata[0]) { 2720 /* Extract the tag from the register to match STZGM. */ 2721 tag = tcg_temp_new_i64(); 2722 tcg_gen_shri_i64(tag, tcg_rt, 56); 2723 gen_helper_stzgm_tags(tcg_env, clean_addr, tag); 2724 } 2725 } 2726 return; 2727 default: 2728 g_assert_not_reached(); 2729 } 2730 2731 if (ri->type & ARM_CP_IO) { 2732 /* I/O operations must end the TB here (whether read or write) */ 2733 need_exit_tb = translator_io_start(&s->base); 2734 } 2735 2736 tcg_rt = cpu_reg(s, rt); 2737 2738 if (isread) { 2739 if (ri->type & ARM_CP_CONST) { 2740 tcg_gen_movi_i64(tcg_rt, ri->resetvalue); 2741 } else if (ri->readfn) { 2742 if (!tcg_ri) { 2743 tcg_ri = gen_lookup_cp_reg(key); 2744 } 2745 gen_helper_get_cp_reg64(tcg_rt, tcg_env, tcg_ri); 2746 } else { 2747 tcg_gen_ld_i64(tcg_rt, tcg_env, ri->fieldoffset); 2748 } 2749 } else { 2750 if (ri->type & ARM_CP_CONST) { 2751 /* If not forbidden by access permissions, treat as WI */ 2752 return; 2753 } else if (ri->writefn) { 2754 if (!tcg_ri) { 2755 tcg_ri = gen_lookup_cp_reg(key); 2756 } 2757 gen_helper_set_cp_reg64(tcg_env, tcg_ri, tcg_rt); 2758 } else { 2759 tcg_gen_st_i64(tcg_rt, tcg_env, ri->fieldoffset); 2760 } 2761 } 2762 2763 if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) { 2764 /* 2765 * A write to any coprocessor register that ends a TB 2766 * must rebuild the hflags for the next TB. 2767 */ 2768 gen_rebuild_hflags(s); 2769 /* 2770 * We default to ending the TB on a coprocessor register write, 2771 * but allow this to be suppressed by the register definition 2772 * (usually only necessary to work around guest bugs). 2773 */ 2774 need_exit_tb = true; 2775 } 2776 if (need_exit_tb) { 2777 s->base.is_jmp = DISAS_UPDATE_EXIT; 2778 } 2779 } 2780 2781 static bool trans_SYS(DisasContext *s, arg_SYS *a) 2782 { 2783 handle_sys(s, a->l, a->op0, a->op1, a->op2, a->crn, a->crm, a->rt); 2784 return true; 2785 } 2786 2787 static bool trans_SVC(DisasContext *s, arg_i *a) 2788 { 2789 /* 2790 * For SVC, HVC and SMC we advance the single-step state 2791 * machine before taking the exception. This is architecturally 2792 * mandated, to ensure that single-stepping a system call 2793 * instruction works properly. 2794 */ 2795 uint32_t syndrome = syn_aa64_svc(a->imm); 2796 if (s->fgt_svc) { 2797 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2); 2798 return true; 2799 } 2800 gen_ss_advance(s); 2801 gen_exception_insn(s, 4, EXCP_SWI, syndrome); 2802 return true; 2803 } 2804 2805 static bool trans_HVC(DisasContext *s, arg_i *a) 2806 { 2807 int target_el = s->current_el == 3 ? 3 : 2; 2808 2809 if (s->current_el == 0) { 2810 unallocated_encoding(s); 2811 return true; 2812 } 2813 /* 2814 * The pre HVC helper handles cases when HVC gets trapped 2815 * as an undefined insn by runtime configuration. 2816 */ 2817 gen_a64_update_pc(s, 0); 2818 gen_helper_pre_hvc(tcg_env); 2819 /* Architecture requires ss advance before we do the actual work */ 2820 gen_ss_advance(s); 2821 gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(a->imm), target_el); 2822 return true; 2823 } 2824 2825 static bool trans_SMC(DisasContext *s, arg_i *a) 2826 { 2827 if (s->current_el == 0) { 2828 unallocated_encoding(s); 2829 return true; 2830 } 2831 gen_a64_update_pc(s, 0); 2832 gen_helper_pre_smc(tcg_env, tcg_constant_i32(syn_aa64_smc(a->imm))); 2833 /* Architecture requires ss advance before we do the actual work */ 2834 gen_ss_advance(s); 2835 gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(a->imm), 3); 2836 return true; 2837 } 2838 2839 static bool trans_BRK(DisasContext *s, arg_i *a) 2840 { 2841 gen_exception_bkpt_insn(s, syn_aa64_bkpt(a->imm)); 2842 return true; 2843 } 2844 2845 static bool trans_HLT(DisasContext *s, arg_i *a) 2846 { 2847 /* 2848 * HLT. This has two purposes. 2849 * Architecturally, it is an external halting debug instruction. 2850 * Since QEMU doesn't implement external debug, we treat this as 2851 * it is required for halting debug disabled: it will UNDEF. 2852 * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction. 2853 */ 2854 if (semihosting_enabled(s->current_el == 0) && a->imm == 0xf000) { 2855 gen_exception_internal_insn(s, EXCP_SEMIHOST); 2856 } else { 2857 unallocated_encoding(s); 2858 } 2859 return true; 2860 } 2861 2862 /* 2863 * Load/Store exclusive instructions are implemented by remembering 2864 * the value/address loaded, and seeing if these are the same 2865 * when the store is performed. This is not actually the architecturally 2866 * mandated semantics, but it works for typical guest code sequences 2867 * and avoids having to monitor regular stores. 2868 * 2869 * The store exclusive uses the atomic cmpxchg primitives to avoid 2870 * races in multi-threaded linux-user and when MTTCG softmmu is 2871 * enabled. 2872 */ 2873 static void gen_load_exclusive(DisasContext *s, int rt, int rt2, int rn, 2874 int size, bool is_pair) 2875 { 2876 int idx = get_mem_index(s); 2877 TCGv_i64 dirty_addr, clean_addr; 2878 MemOp memop = check_atomic_align(s, rn, size + is_pair); 2879 2880 s->is_ldex = true; 2881 dirty_addr = cpu_reg_sp(s, rn); 2882 clean_addr = gen_mte_check1(s, dirty_addr, false, rn != 31, memop); 2883 2884 g_assert(size <= 3); 2885 if (is_pair) { 2886 g_assert(size >= 2); 2887 if (size == 2) { 2888 tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop); 2889 if (s->be_data == MO_LE) { 2890 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32); 2891 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32); 2892 } else { 2893 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32); 2894 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32); 2895 } 2896 } else { 2897 TCGv_i128 t16 = tcg_temp_new_i128(); 2898 2899 tcg_gen_qemu_ld_i128(t16, clean_addr, idx, memop); 2900 2901 if (s->be_data == MO_LE) { 2902 tcg_gen_extr_i128_i64(cpu_exclusive_val, 2903 cpu_exclusive_high, t16); 2904 } else { 2905 tcg_gen_extr_i128_i64(cpu_exclusive_high, 2906 cpu_exclusive_val, t16); 2907 } 2908 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); 2909 tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high); 2910 } 2911 } else { 2912 tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop); 2913 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); 2914 } 2915 tcg_gen_mov_i64(cpu_exclusive_addr, clean_addr); 2916 } 2917 2918 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, 2919 int rn, int size, int is_pair) 2920 { 2921 /* if (env->exclusive_addr == addr && env->exclusive_val == [addr] 2922 * && (!is_pair || env->exclusive_high == [addr + datasize])) { 2923 * [addr] = {Rt}; 2924 * if (is_pair) { 2925 * [addr + datasize] = {Rt2}; 2926 * } 2927 * {Rd} = 0; 2928 * } else { 2929 * {Rd} = 1; 2930 * } 2931 * env->exclusive_addr = -1; 2932 */ 2933 TCGLabel *fail_label = gen_new_label(); 2934 TCGLabel *done_label = gen_new_label(); 2935 TCGv_i64 tmp, clean_addr; 2936 MemOp memop; 2937 2938 /* 2939 * FIXME: We are out of spec here. We have recorded only the address 2940 * from load_exclusive, not the entire range, and we assume that the 2941 * size of the access on both sides match. The architecture allows the 2942 * store to be smaller than the load, so long as the stored bytes are 2943 * within the range recorded by the load. 2944 */ 2945 2946 /* See AArch64.ExclusiveMonitorsPass() and AArch64.IsExclusiveVA(). */ 2947 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); 2948 tcg_gen_brcond_i64(TCG_COND_NE, clean_addr, cpu_exclusive_addr, fail_label); 2949 2950 /* 2951 * The write, and any associated faults, only happen if the virtual 2952 * and physical addresses pass the exclusive monitor check. These 2953 * faults are exceedingly unlikely, because normally the guest uses 2954 * the exact same address register for the load_exclusive, and we 2955 * would have recognized these faults there. 2956 * 2957 * It is possible to trigger an alignment fault pre-LSE2, e.g. with an 2958 * unaligned 4-byte write within the range of an aligned 8-byte load. 2959 * With LSE2, the store would need to cross a 16-byte boundary when the 2960 * load did not, which would mean the store is outside the range 2961 * recorded for the monitor, which would have failed a corrected monitor 2962 * check above. For now, we assume no size change and retain the 2963 * MO_ALIGN to let tcg know what we checked in the load_exclusive. 2964 * 2965 * It is possible to trigger an MTE fault, by performing the load with 2966 * a virtual address with a valid tag and performing the store with the 2967 * same virtual address and a different invalid tag. 2968 */ 2969 memop = size + is_pair; 2970 if (memop == MO_128 || !dc_isar_feature(aa64_lse2, s)) { 2971 memop |= MO_ALIGN; 2972 } 2973 memop = finalize_memop(s, memop); 2974 gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); 2975 2976 tmp = tcg_temp_new_i64(); 2977 if (is_pair) { 2978 if (size == 2) { 2979 if (s->be_data == MO_LE) { 2980 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2)); 2981 } else { 2982 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt)); 2983 } 2984 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, 2985 cpu_exclusive_val, tmp, 2986 get_mem_index(s), memop); 2987 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); 2988 } else { 2989 TCGv_i128 t16 = tcg_temp_new_i128(); 2990 TCGv_i128 c16 = tcg_temp_new_i128(); 2991 TCGv_i64 a, b; 2992 2993 if (s->be_data == MO_LE) { 2994 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2)); 2995 tcg_gen_concat_i64_i128(c16, cpu_exclusive_val, 2996 cpu_exclusive_high); 2997 } else { 2998 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt)); 2999 tcg_gen_concat_i64_i128(c16, cpu_exclusive_high, 3000 cpu_exclusive_val); 3001 } 3002 3003 tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16, 3004 get_mem_index(s), memop); 3005 3006 a = tcg_temp_new_i64(); 3007 b = tcg_temp_new_i64(); 3008 if (s->be_data == MO_LE) { 3009 tcg_gen_extr_i128_i64(a, b, t16); 3010 } else { 3011 tcg_gen_extr_i128_i64(b, a, t16); 3012 } 3013 3014 tcg_gen_xor_i64(a, a, cpu_exclusive_val); 3015 tcg_gen_xor_i64(b, b, cpu_exclusive_high); 3016 tcg_gen_or_i64(tmp, a, b); 3017 3018 tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0); 3019 } 3020 } else { 3021 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val, 3022 cpu_reg(s, rt), get_mem_index(s), memop); 3023 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); 3024 } 3025 tcg_gen_mov_i64(cpu_reg(s, rd), tmp); 3026 tcg_gen_br(done_label); 3027 3028 gen_set_label(fail_label); 3029 tcg_gen_movi_i64(cpu_reg(s, rd), 1); 3030 gen_set_label(done_label); 3031 tcg_gen_movi_i64(cpu_exclusive_addr, -1); 3032 } 3033 3034 static void gen_compare_and_swap(DisasContext *s, int rs, int rt, 3035 int rn, int size) 3036 { 3037 TCGv_i64 tcg_rs = cpu_reg(s, rs); 3038 TCGv_i64 tcg_rt = cpu_reg(s, rt); 3039 int memidx = get_mem_index(s); 3040 TCGv_i64 clean_addr; 3041 MemOp memop; 3042 3043 if (rn == 31) { 3044 gen_check_sp_alignment(s); 3045 } 3046 memop = check_atomic_align(s, rn, size); 3047 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); 3048 tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, 3049 memidx, memop); 3050 } 3051 3052 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, 3053 int rn, int size) 3054 { 3055 TCGv_i64 s1 = cpu_reg(s, rs); 3056 TCGv_i64 s2 = cpu_reg(s, rs + 1); 3057 TCGv_i64 t1 = cpu_reg(s, rt); 3058 TCGv_i64 t2 = cpu_reg(s, rt + 1); 3059 TCGv_i64 clean_addr; 3060 int memidx = get_mem_index(s); 3061 MemOp memop; 3062 3063 if (rn == 31) { 3064 gen_check_sp_alignment(s); 3065 } 3066 3067 /* This is a single atomic access, despite the "pair". */ 3068 memop = check_atomic_align(s, rn, size + 1); 3069 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); 3070 3071 if (size == 2) { 3072 TCGv_i64 cmp = tcg_temp_new_i64(); 3073 TCGv_i64 val = tcg_temp_new_i64(); 3074 3075 if (s->be_data == MO_LE) { 3076 tcg_gen_concat32_i64(val, t1, t2); 3077 tcg_gen_concat32_i64(cmp, s1, s2); 3078 } else { 3079 tcg_gen_concat32_i64(val, t2, t1); 3080 tcg_gen_concat32_i64(cmp, s2, s1); 3081 } 3082 3083 tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx, memop); 3084 3085 if (s->be_data == MO_LE) { 3086 tcg_gen_extr32_i64(s1, s2, cmp); 3087 } else { 3088 tcg_gen_extr32_i64(s2, s1, cmp); 3089 } 3090 } else { 3091 TCGv_i128 cmp = tcg_temp_new_i128(); 3092 TCGv_i128 val = tcg_temp_new_i128(); 3093 3094 if (s->be_data == MO_LE) { 3095 tcg_gen_concat_i64_i128(val, t1, t2); 3096 tcg_gen_concat_i64_i128(cmp, s1, s2); 3097 } else { 3098 tcg_gen_concat_i64_i128(val, t2, t1); 3099 tcg_gen_concat_i64_i128(cmp, s2, s1); 3100 } 3101 3102 tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx, memop); 3103 3104 if (s->be_data == MO_LE) { 3105 tcg_gen_extr_i128_i64(s1, s2, cmp); 3106 } else { 3107 tcg_gen_extr_i128_i64(s2, s1, cmp); 3108 } 3109 } 3110 } 3111 3112 /* 3113 * Compute the ISS.SF bit for syndrome information if an exception 3114 * is taken on a load or store. This indicates whether the instruction 3115 * is accessing a 32-bit or 64-bit register. This logic is derived 3116 * from the ARMv8 specs for LDR (Shared decode for all encodings). 3117 */ 3118 static bool ldst_iss_sf(int size, bool sign, bool ext) 3119 { 3120 3121 if (sign) { 3122 /* 3123 * Signed loads are 64 bit results if we are not going to 3124 * do a zero-extend from 32 to 64 after the load. 3125 * (For a store, sign and ext are always false.) 3126 */ 3127 return !ext; 3128 } else { 3129 /* Unsigned loads/stores work at the specified size */ 3130 return size == MO_64; 3131 } 3132 } 3133 3134 static bool trans_STXR(DisasContext *s, arg_stxr *a) 3135 { 3136 if (a->rn == 31) { 3137 gen_check_sp_alignment(s); 3138 } 3139 if (a->lasr) { 3140 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 3141 } 3142 gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, false); 3143 return true; 3144 } 3145 3146 static bool trans_LDXR(DisasContext *s, arg_stxr *a) 3147 { 3148 if (a->rn == 31) { 3149 gen_check_sp_alignment(s); 3150 } 3151 gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, false); 3152 if (a->lasr) { 3153 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3154 } 3155 return true; 3156 } 3157 3158 static bool trans_STLR(DisasContext *s, arg_stlr *a) 3159 { 3160 TCGv_i64 clean_addr; 3161 MemOp memop; 3162 bool iss_sf = ldst_iss_sf(a->sz, false, false); 3163 3164 /* 3165 * StoreLORelease is the same as Store-Release for QEMU, but 3166 * needs the feature-test. 3167 */ 3168 if (!a->lasr && !dc_isar_feature(aa64_lor, s)) { 3169 return false; 3170 } 3171 /* Generate ISS for non-exclusive accesses including LASR. */ 3172 if (a->rn == 31) { 3173 gen_check_sp_alignment(s); 3174 } 3175 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 3176 memop = check_ordered_align(s, a->rn, 0, true, a->sz); 3177 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), 3178 true, a->rn != 31, memop); 3179 do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, memop, true, a->rt, 3180 iss_sf, a->lasr); 3181 return true; 3182 } 3183 3184 static bool trans_LDAR(DisasContext *s, arg_stlr *a) 3185 { 3186 TCGv_i64 clean_addr; 3187 MemOp memop; 3188 bool iss_sf = ldst_iss_sf(a->sz, false, false); 3189 3190 /* LoadLOAcquire is the same as Load-Acquire for QEMU. */ 3191 if (!a->lasr && !dc_isar_feature(aa64_lor, s)) { 3192 return false; 3193 } 3194 /* Generate ISS for non-exclusive accesses including LASR. */ 3195 if (a->rn == 31) { 3196 gen_check_sp_alignment(s); 3197 } 3198 memop = check_ordered_align(s, a->rn, 0, false, a->sz); 3199 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), 3200 false, a->rn != 31, memop); 3201 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, memop, false, true, 3202 a->rt, iss_sf, a->lasr); 3203 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3204 return true; 3205 } 3206 3207 static bool trans_STXP(DisasContext *s, arg_stxr *a) 3208 { 3209 if (a->rn == 31) { 3210 gen_check_sp_alignment(s); 3211 } 3212 if (a->lasr) { 3213 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 3214 } 3215 gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, true); 3216 return true; 3217 } 3218 3219 static bool trans_LDXP(DisasContext *s, arg_stxr *a) 3220 { 3221 if (a->rn == 31) { 3222 gen_check_sp_alignment(s); 3223 } 3224 gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, true); 3225 if (a->lasr) { 3226 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3227 } 3228 return true; 3229 } 3230 3231 static bool trans_CASP(DisasContext *s, arg_CASP *a) 3232 { 3233 if (!dc_isar_feature(aa64_atomics, s)) { 3234 return false; 3235 } 3236 if (((a->rt | a->rs) & 1) != 0) { 3237 return false; 3238 } 3239 3240 gen_compare_and_swap_pair(s, a->rs, a->rt, a->rn, a->sz); 3241 return true; 3242 } 3243 3244 static bool trans_CAS(DisasContext *s, arg_CAS *a) 3245 { 3246 if (!dc_isar_feature(aa64_atomics, s)) { 3247 return false; 3248 } 3249 gen_compare_and_swap(s, a->rs, a->rt, a->rn, a->sz); 3250 return true; 3251 } 3252 3253 static bool trans_LD_lit(DisasContext *s, arg_ldlit *a) 3254 { 3255 bool iss_sf = ldst_iss_sf(a->sz, a->sign, false); 3256 TCGv_i64 tcg_rt = cpu_reg(s, a->rt); 3257 TCGv_i64 clean_addr = tcg_temp_new_i64(); 3258 MemOp memop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3259 3260 gen_pc_plus_diff(s, clean_addr, a->imm); 3261 do_gpr_ld(s, tcg_rt, clean_addr, memop, 3262 false, true, a->rt, iss_sf, false); 3263 return true; 3264 } 3265 3266 static bool trans_LD_lit_v(DisasContext *s, arg_ldlit *a) 3267 { 3268 /* Load register (literal), vector version */ 3269 TCGv_i64 clean_addr; 3270 MemOp memop; 3271 3272 if (!fp_access_check(s)) { 3273 return true; 3274 } 3275 memop = finalize_memop_asimd(s, a->sz); 3276 clean_addr = tcg_temp_new_i64(); 3277 gen_pc_plus_diff(s, clean_addr, a->imm); 3278 do_fp_ld(s, a->rt, clean_addr, memop); 3279 return true; 3280 } 3281 3282 static void op_addr_ldstpair_pre(DisasContext *s, arg_ldstpair *a, 3283 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, 3284 uint64_t offset, bool is_store, MemOp mop) 3285 { 3286 if (a->rn == 31) { 3287 gen_check_sp_alignment(s); 3288 } 3289 3290 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3291 if (!a->p) { 3292 tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset); 3293 } 3294 3295 *clean_addr = gen_mte_checkN(s, *dirty_addr, is_store, 3296 (a->w || a->rn != 31), 2 << a->sz, mop); 3297 } 3298 3299 static void op_addr_ldstpair_post(DisasContext *s, arg_ldstpair *a, 3300 TCGv_i64 dirty_addr, uint64_t offset) 3301 { 3302 if (a->w) { 3303 if (a->p) { 3304 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3305 } 3306 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); 3307 } 3308 } 3309 3310 static bool trans_STP(DisasContext *s, arg_ldstpair *a) 3311 { 3312 uint64_t offset = a->imm << a->sz; 3313 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; 3314 MemOp mop = finalize_memop(s, a->sz); 3315 3316 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop); 3317 tcg_rt = cpu_reg(s, a->rt); 3318 tcg_rt2 = cpu_reg(s, a->rt2); 3319 /* 3320 * We built mop above for the single logical access -- rebuild it 3321 * now for the paired operation. 3322 * 3323 * With LSE2, non-sign-extending pairs are treated atomically if 3324 * aligned, and if unaligned one of the pair will be completely 3325 * within a 16-byte block and that element will be atomic. 3326 * Otherwise each element is separately atomic. 3327 * In all cases, issue one operation with the correct atomicity. 3328 */ 3329 mop = a->sz + 1; 3330 if (s->align_mem) { 3331 mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8); 3332 } 3333 mop = finalize_memop_pair(s, mop); 3334 if (a->sz == 2) { 3335 TCGv_i64 tmp = tcg_temp_new_i64(); 3336 3337 if (s->be_data == MO_LE) { 3338 tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2); 3339 } else { 3340 tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt); 3341 } 3342 tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop); 3343 } else { 3344 TCGv_i128 tmp = tcg_temp_new_i128(); 3345 3346 if (s->be_data == MO_LE) { 3347 tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2); 3348 } else { 3349 tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt); 3350 } 3351 tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop); 3352 } 3353 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3354 return true; 3355 } 3356 3357 static bool trans_LDP(DisasContext *s, arg_ldstpair *a) 3358 { 3359 uint64_t offset = a->imm << a->sz; 3360 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; 3361 MemOp mop = finalize_memop(s, a->sz); 3362 3363 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop); 3364 tcg_rt = cpu_reg(s, a->rt); 3365 tcg_rt2 = cpu_reg(s, a->rt2); 3366 3367 /* 3368 * We built mop above for the single logical access -- rebuild it 3369 * now for the paired operation. 3370 * 3371 * With LSE2, non-sign-extending pairs are treated atomically if 3372 * aligned, and if unaligned one of the pair will be completely 3373 * within a 16-byte block and that element will be atomic. 3374 * Otherwise each element is separately atomic. 3375 * In all cases, issue one operation with the correct atomicity. 3376 * 3377 * This treats sign-extending loads like zero-extending loads, 3378 * since that reuses the most code below. 3379 */ 3380 mop = a->sz + 1; 3381 if (s->align_mem) { 3382 mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8); 3383 } 3384 mop = finalize_memop_pair(s, mop); 3385 if (a->sz == 2) { 3386 int o2 = s->be_data == MO_LE ? 32 : 0; 3387 int o1 = o2 ^ 32; 3388 3389 tcg_gen_qemu_ld_i64(tcg_rt, clean_addr, get_mem_index(s), mop); 3390 if (a->sign) { 3391 tcg_gen_sextract_i64(tcg_rt2, tcg_rt, o2, 32); 3392 tcg_gen_sextract_i64(tcg_rt, tcg_rt, o1, 32); 3393 } else { 3394 tcg_gen_extract_i64(tcg_rt2, tcg_rt, o2, 32); 3395 tcg_gen_extract_i64(tcg_rt, tcg_rt, o1, 32); 3396 } 3397 } else { 3398 TCGv_i128 tmp = tcg_temp_new_i128(); 3399 3400 tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop); 3401 if (s->be_data == MO_LE) { 3402 tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp); 3403 } else { 3404 tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp); 3405 } 3406 } 3407 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3408 return true; 3409 } 3410 3411 static bool trans_STP_v(DisasContext *s, arg_ldstpair *a) 3412 { 3413 uint64_t offset = a->imm << a->sz; 3414 TCGv_i64 clean_addr, dirty_addr; 3415 MemOp mop; 3416 3417 if (!fp_access_check(s)) { 3418 return true; 3419 } 3420 3421 /* LSE2 does not merge FP pairs; leave these as separate operations. */ 3422 mop = finalize_memop_asimd(s, a->sz); 3423 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop); 3424 do_fp_st(s, a->rt, clean_addr, mop); 3425 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz); 3426 do_fp_st(s, a->rt2, clean_addr, mop); 3427 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3428 return true; 3429 } 3430 3431 static bool trans_LDP_v(DisasContext *s, arg_ldstpair *a) 3432 { 3433 uint64_t offset = a->imm << a->sz; 3434 TCGv_i64 clean_addr, dirty_addr; 3435 MemOp mop; 3436 3437 if (!fp_access_check(s)) { 3438 return true; 3439 } 3440 3441 /* LSE2 does not merge FP pairs; leave these as separate operations. */ 3442 mop = finalize_memop_asimd(s, a->sz); 3443 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop); 3444 do_fp_ld(s, a->rt, clean_addr, mop); 3445 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz); 3446 do_fp_ld(s, a->rt2, clean_addr, mop); 3447 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3448 return true; 3449 } 3450 3451 static bool trans_STGP(DisasContext *s, arg_ldstpair *a) 3452 { 3453 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; 3454 uint64_t offset = a->imm << LOG2_TAG_GRANULE; 3455 MemOp mop; 3456 TCGv_i128 tmp; 3457 3458 /* STGP only comes in one size. */ 3459 tcg_debug_assert(a->sz == MO_64); 3460 3461 if (!dc_isar_feature(aa64_mte_insn_reg, s)) { 3462 return false; 3463 } 3464 3465 if (a->rn == 31) { 3466 gen_check_sp_alignment(s); 3467 } 3468 3469 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3470 if (!a->p) { 3471 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3472 } 3473 3474 clean_addr = clean_data_tbi(s, dirty_addr); 3475 tcg_rt = cpu_reg(s, a->rt); 3476 tcg_rt2 = cpu_reg(s, a->rt2); 3477 3478 /* 3479 * STGP is defined as two 8-byte memory operations, aligned to TAG_GRANULE, 3480 * and one tag operation. We implement it as one single aligned 16-byte 3481 * memory operation for convenience. Note that the alignment ensures 3482 * MO_ATOM_IFALIGN_PAIR produces 8-byte atomicity for the memory store. 3483 */ 3484 mop = finalize_memop_atom(s, MO_128 | MO_ALIGN, MO_ATOM_IFALIGN_PAIR); 3485 3486 tmp = tcg_temp_new_i128(); 3487 if (s->be_data == MO_LE) { 3488 tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2); 3489 } else { 3490 tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt); 3491 } 3492 tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop); 3493 3494 /* Perform the tag store, if tag access enabled. */ 3495 if (s->ata[0]) { 3496 if (tb_cflags(s->base.tb) & CF_PARALLEL) { 3497 gen_helper_stg_parallel(tcg_env, dirty_addr, dirty_addr); 3498 } else { 3499 gen_helper_stg(tcg_env, dirty_addr, dirty_addr); 3500 } 3501 } 3502 3503 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3504 return true; 3505 } 3506 3507 static void op_addr_ldst_imm_pre(DisasContext *s, arg_ldst_imm *a, 3508 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, 3509 uint64_t offset, bool is_store, MemOp mop) 3510 { 3511 int memidx; 3512 3513 if (a->rn == 31) { 3514 gen_check_sp_alignment(s); 3515 } 3516 3517 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3518 if (!a->p) { 3519 tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset); 3520 } 3521 memidx = get_a64_user_mem_index(s, a->unpriv); 3522 *clean_addr = gen_mte_check1_mmuidx(s, *dirty_addr, is_store, 3523 a->w || a->rn != 31, 3524 mop, a->unpriv, memidx); 3525 } 3526 3527 static void op_addr_ldst_imm_post(DisasContext *s, arg_ldst_imm *a, 3528 TCGv_i64 dirty_addr, uint64_t offset) 3529 { 3530 if (a->w) { 3531 if (a->p) { 3532 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3533 } 3534 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); 3535 } 3536 } 3537 3538 static bool trans_STR_i(DisasContext *s, arg_ldst_imm *a) 3539 { 3540 bool iss_sf, iss_valid = !a->w; 3541 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3542 int memidx = get_a64_user_mem_index(s, a->unpriv); 3543 MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3544 3545 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop); 3546 3547 tcg_rt = cpu_reg(s, a->rt); 3548 iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3549 3550 do_gpr_st_memidx(s, tcg_rt, clean_addr, mop, memidx, 3551 iss_valid, a->rt, iss_sf, false); 3552 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3553 return true; 3554 } 3555 3556 static bool trans_LDR_i(DisasContext *s, arg_ldst_imm *a) 3557 { 3558 bool iss_sf, iss_valid = !a->w; 3559 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3560 int memidx = get_a64_user_mem_index(s, a->unpriv); 3561 MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3562 3563 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop); 3564 3565 tcg_rt = cpu_reg(s, a->rt); 3566 iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3567 3568 do_gpr_ld_memidx(s, tcg_rt, clean_addr, mop, 3569 a->ext, memidx, iss_valid, a->rt, iss_sf, false); 3570 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3571 return true; 3572 } 3573 3574 static bool trans_STR_v_i(DisasContext *s, arg_ldst_imm *a) 3575 { 3576 TCGv_i64 clean_addr, dirty_addr; 3577 MemOp mop; 3578 3579 if (!fp_access_check(s)) { 3580 return true; 3581 } 3582 mop = finalize_memop_asimd(s, a->sz); 3583 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop); 3584 do_fp_st(s, a->rt, clean_addr, mop); 3585 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3586 return true; 3587 } 3588 3589 static bool trans_LDR_v_i(DisasContext *s, arg_ldst_imm *a) 3590 { 3591 TCGv_i64 clean_addr, dirty_addr; 3592 MemOp mop; 3593 3594 if (!fp_access_check(s)) { 3595 return true; 3596 } 3597 mop = finalize_memop_asimd(s, a->sz); 3598 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop); 3599 do_fp_ld(s, a->rt, clean_addr, mop); 3600 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3601 return true; 3602 } 3603 3604 static void op_addr_ldst_pre(DisasContext *s, arg_ldst *a, 3605 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, 3606 bool is_store, MemOp memop) 3607 { 3608 TCGv_i64 tcg_rm; 3609 3610 if (a->rn == 31) { 3611 gen_check_sp_alignment(s); 3612 } 3613 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3614 3615 tcg_rm = read_cpu_reg(s, a->rm, 1); 3616 ext_and_shift_reg(tcg_rm, tcg_rm, a->opt, a->s ? a->sz : 0); 3617 3618 tcg_gen_add_i64(*dirty_addr, *dirty_addr, tcg_rm); 3619 *clean_addr = gen_mte_check1(s, *dirty_addr, is_store, true, memop); 3620 } 3621 3622 static bool trans_LDR(DisasContext *s, arg_ldst *a) 3623 { 3624 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3625 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3626 MemOp memop; 3627 3628 if (extract32(a->opt, 1, 1) == 0) { 3629 return false; 3630 } 3631 3632 memop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3633 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop); 3634 tcg_rt = cpu_reg(s, a->rt); 3635 do_gpr_ld(s, tcg_rt, clean_addr, memop, 3636 a->ext, true, a->rt, iss_sf, false); 3637 return true; 3638 } 3639 3640 static bool trans_STR(DisasContext *s, arg_ldst *a) 3641 { 3642 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3643 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3644 MemOp memop; 3645 3646 if (extract32(a->opt, 1, 1) == 0) { 3647 return false; 3648 } 3649 3650 memop = finalize_memop(s, a->sz); 3651 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop); 3652 tcg_rt = cpu_reg(s, a->rt); 3653 do_gpr_st(s, tcg_rt, clean_addr, memop, true, a->rt, iss_sf, false); 3654 return true; 3655 } 3656 3657 static bool trans_LDR_v(DisasContext *s, arg_ldst *a) 3658 { 3659 TCGv_i64 clean_addr, dirty_addr; 3660 MemOp memop; 3661 3662 if (extract32(a->opt, 1, 1) == 0) { 3663 return false; 3664 } 3665 3666 if (!fp_access_check(s)) { 3667 return true; 3668 } 3669 3670 memop = finalize_memop_asimd(s, a->sz); 3671 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop); 3672 do_fp_ld(s, a->rt, clean_addr, memop); 3673 return true; 3674 } 3675 3676 static bool trans_STR_v(DisasContext *s, arg_ldst *a) 3677 { 3678 TCGv_i64 clean_addr, dirty_addr; 3679 MemOp memop; 3680 3681 if (extract32(a->opt, 1, 1) == 0) { 3682 return false; 3683 } 3684 3685 if (!fp_access_check(s)) { 3686 return true; 3687 } 3688 3689 memop = finalize_memop_asimd(s, a->sz); 3690 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop); 3691 do_fp_st(s, a->rt, clean_addr, memop); 3692 return true; 3693 } 3694 3695 3696 static bool do_atomic_ld(DisasContext *s, arg_atomic *a, AtomicThreeOpFn *fn, 3697 int sign, bool invert) 3698 { 3699 MemOp mop = a->sz | sign; 3700 TCGv_i64 clean_addr, tcg_rs, tcg_rt; 3701 3702 if (a->rn == 31) { 3703 gen_check_sp_alignment(s); 3704 } 3705 mop = check_atomic_align(s, a->rn, mop); 3706 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false, 3707 a->rn != 31, mop); 3708 tcg_rs = read_cpu_reg(s, a->rs, true); 3709 tcg_rt = cpu_reg(s, a->rt); 3710 if (invert) { 3711 tcg_gen_not_i64(tcg_rs, tcg_rs); 3712 } 3713 /* 3714 * The tcg atomic primitives are all full barriers. Therefore we 3715 * can ignore the Acquire and Release bits of this instruction. 3716 */ 3717 fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop); 3718 3719 if (mop & MO_SIGN) { 3720 switch (a->sz) { 3721 case MO_8: 3722 tcg_gen_ext8u_i64(tcg_rt, tcg_rt); 3723 break; 3724 case MO_16: 3725 tcg_gen_ext16u_i64(tcg_rt, tcg_rt); 3726 break; 3727 case MO_32: 3728 tcg_gen_ext32u_i64(tcg_rt, tcg_rt); 3729 break; 3730 case MO_64: 3731 break; 3732 default: 3733 g_assert_not_reached(); 3734 } 3735 } 3736 return true; 3737 } 3738 3739 TRANS_FEAT(LDADD, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_add_i64, 0, false) 3740 TRANS_FEAT(LDCLR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_and_i64, 0, true) 3741 TRANS_FEAT(LDEOR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_xor_i64, 0, false) 3742 TRANS_FEAT(LDSET, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_or_i64, 0, false) 3743 TRANS_FEAT(LDSMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smax_i64, MO_SIGN, false) 3744 TRANS_FEAT(LDSMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smin_i64, MO_SIGN, false) 3745 TRANS_FEAT(LDUMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umax_i64, 0, false) 3746 TRANS_FEAT(LDUMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false) 3747 TRANS_FEAT(SWP, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false) 3748 3749 static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a) 3750 { 3751 bool iss_sf = ldst_iss_sf(a->sz, false, false); 3752 TCGv_i64 clean_addr; 3753 MemOp mop; 3754 3755 if (!dc_isar_feature(aa64_atomics, s) || 3756 !dc_isar_feature(aa64_rcpc_8_3, s)) { 3757 return false; 3758 } 3759 if (a->rn == 31) { 3760 gen_check_sp_alignment(s); 3761 } 3762 mop = check_ordered_align(s, a->rn, 0, false, a->sz); 3763 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false, 3764 a->rn != 31, mop); 3765 /* 3766 * LDAPR* are a special case because they are a simple load, not a 3767 * fetch-and-do-something op. 3768 * The architectural consistency requirements here are weaker than 3769 * full load-acquire (we only need "load-acquire processor consistent"), 3770 * but we choose to implement them as full LDAQ. 3771 */ 3772 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, false, 3773 true, a->rt, iss_sf, true); 3774 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3775 return true; 3776 } 3777 3778 static bool trans_LDRA(DisasContext *s, arg_LDRA *a) 3779 { 3780 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3781 MemOp memop; 3782 3783 /* Load with pointer authentication */ 3784 if (!dc_isar_feature(aa64_pauth, s)) { 3785 return false; 3786 } 3787 3788 if (a->rn == 31) { 3789 gen_check_sp_alignment(s); 3790 } 3791 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3792 3793 if (s->pauth_active) { 3794 if (!a->m) { 3795 gen_helper_autda_combined(dirty_addr, tcg_env, dirty_addr, 3796 tcg_constant_i64(0)); 3797 } else { 3798 gen_helper_autdb_combined(dirty_addr, tcg_env, dirty_addr, 3799 tcg_constant_i64(0)); 3800 } 3801 } 3802 3803 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); 3804 3805 memop = finalize_memop(s, MO_64); 3806 3807 /* Note that "clean" and "dirty" here refer to TBI not PAC. */ 3808 clean_addr = gen_mte_check1(s, dirty_addr, false, 3809 a->w || a->rn != 31, memop); 3810 3811 tcg_rt = cpu_reg(s, a->rt); 3812 do_gpr_ld(s, tcg_rt, clean_addr, memop, 3813 /* extend */ false, /* iss_valid */ !a->w, 3814 /* iss_srt */ a->rt, /* iss_sf */ true, /* iss_ar */ false); 3815 3816 if (a->w) { 3817 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); 3818 } 3819 return true; 3820 } 3821 3822 static bool trans_LDAPR_i(DisasContext *s, arg_ldapr_stlr_i *a) 3823 { 3824 TCGv_i64 clean_addr, dirty_addr; 3825 MemOp mop = a->sz | (a->sign ? MO_SIGN : 0); 3826 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3827 3828 if (!dc_isar_feature(aa64_rcpc_8_4, s)) { 3829 return false; 3830 } 3831 3832 if (a->rn == 31) { 3833 gen_check_sp_alignment(s); 3834 } 3835 3836 mop = check_ordered_align(s, a->rn, a->imm, false, mop); 3837 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3838 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); 3839 clean_addr = clean_data_tbi(s, dirty_addr); 3840 3841 /* 3842 * Load-AcquirePC semantics; we implement as the slightly more 3843 * restrictive Load-Acquire. 3844 */ 3845 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, a->ext, true, 3846 a->rt, iss_sf, true); 3847 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3848 return true; 3849 } 3850 3851 static bool trans_STLR_i(DisasContext *s, arg_ldapr_stlr_i *a) 3852 { 3853 TCGv_i64 clean_addr, dirty_addr; 3854 MemOp mop = a->sz; 3855 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3856 3857 if (!dc_isar_feature(aa64_rcpc_8_4, s)) { 3858 return false; 3859 } 3860 3861 /* TODO: ARMv8.4-LSE SCTLR.nAA */ 3862 3863 if (a->rn == 31) { 3864 gen_check_sp_alignment(s); 3865 } 3866 3867 mop = check_ordered_align(s, a->rn, a->imm, true, mop); 3868 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3869 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); 3870 clean_addr = clean_data_tbi(s, dirty_addr); 3871 3872 /* Store-Release semantics */ 3873 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 3874 do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, mop, true, a->rt, iss_sf, true); 3875 return true; 3876 } 3877 3878 static bool trans_LD_mult(DisasContext *s, arg_ldst_mult *a) 3879 { 3880 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3881 MemOp endian, align, mop; 3882 3883 int total; /* total bytes */ 3884 int elements; /* elements per vector */ 3885 int r; 3886 int size = a->sz; 3887 3888 if (!a->p && a->rm != 0) { 3889 /* For non-postindexed accesses the Rm field must be 0 */ 3890 return false; 3891 } 3892 if (size == 3 && !a->q && a->selem != 1) { 3893 return false; 3894 } 3895 if (!fp_access_check(s)) { 3896 return true; 3897 } 3898 3899 if (a->rn == 31) { 3900 gen_check_sp_alignment(s); 3901 } 3902 3903 /* For our purposes, bytes are always little-endian. */ 3904 endian = s->be_data; 3905 if (size == 0) { 3906 endian = MO_LE; 3907 } 3908 3909 total = a->rpt * a->selem * (a->q ? 16 : 8); 3910 tcg_rn = cpu_reg_sp(s, a->rn); 3911 3912 /* 3913 * Issue the MTE check vs the logical repeat count, before we 3914 * promote consecutive little-endian elements below. 3915 */ 3916 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, total, 3917 finalize_memop_asimd(s, size)); 3918 3919 /* 3920 * Consecutive little-endian elements from a single register 3921 * can be promoted to a larger little-endian operation. 3922 */ 3923 align = MO_ALIGN; 3924 if (a->selem == 1 && endian == MO_LE) { 3925 align = pow2_align(size); 3926 size = 3; 3927 } 3928 if (!s->align_mem) { 3929 align = 0; 3930 } 3931 mop = endian | size | align; 3932 3933 elements = (a->q ? 16 : 8) >> size; 3934 tcg_ebytes = tcg_constant_i64(1 << size); 3935 for (r = 0; r < a->rpt; r++) { 3936 int e; 3937 for (e = 0; e < elements; e++) { 3938 int xs; 3939 for (xs = 0; xs < a->selem; xs++) { 3940 int tt = (a->rt + r + xs) % 32; 3941 do_vec_ld(s, tt, e, clean_addr, mop); 3942 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3943 } 3944 } 3945 } 3946 3947 /* 3948 * For non-quad operations, setting a slice of the low 64 bits of 3949 * the register clears the high 64 bits (in the ARM ARM pseudocode 3950 * this is implicit in the fact that 'rval' is a 64 bit wide 3951 * variable). For quad operations, we might still need to zero 3952 * the high bits of SVE. 3953 */ 3954 for (r = 0; r < a->rpt * a->selem; r++) { 3955 int tt = (a->rt + r) % 32; 3956 clear_vec_high(s, a->q, tt); 3957 } 3958 3959 if (a->p) { 3960 if (a->rm == 31) { 3961 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3962 } else { 3963 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3964 } 3965 } 3966 return true; 3967 } 3968 3969 static bool trans_ST_mult(DisasContext *s, arg_ldst_mult *a) 3970 { 3971 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3972 MemOp endian, align, mop; 3973 3974 int total; /* total bytes */ 3975 int elements; /* elements per vector */ 3976 int r; 3977 int size = a->sz; 3978 3979 if (!a->p && a->rm != 0) { 3980 /* For non-postindexed accesses the Rm field must be 0 */ 3981 return false; 3982 } 3983 if (size == 3 && !a->q && a->selem != 1) { 3984 return false; 3985 } 3986 if (!fp_access_check(s)) { 3987 return true; 3988 } 3989 3990 if (a->rn == 31) { 3991 gen_check_sp_alignment(s); 3992 } 3993 3994 /* For our purposes, bytes are always little-endian. */ 3995 endian = s->be_data; 3996 if (size == 0) { 3997 endian = MO_LE; 3998 } 3999 4000 total = a->rpt * a->selem * (a->q ? 16 : 8); 4001 tcg_rn = cpu_reg_sp(s, a->rn); 4002 4003 /* 4004 * Issue the MTE check vs the logical repeat count, before we 4005 * promote consecutive little-endian elements below. 4006 */ 4007 clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, total, 4008 finalize_memop_asimd(s, size)); 4009 4010 /* 4011 * Consecutive little-endian elements from a single register 4012 * can be promoted to a larger little-endian operation. 4013 */ 4014 align = MO_ALIGN; 4015 if (a->selem == 1 && endian == MO_LE) { 4016 align = pow2_align(size); 4017 size = 3; 4018 } 4019 if (!s->align_mem) { 4020 align = 0; 4021 } 4022 mop = endian | size | align; 4023 4024 elements = (a->q ? 16 : 8) >> size; 4025 tcg_ebytes = tcg_constant_i64(1 << size); 4026 for (r = 0; r < a->rpt; r++) { 4027 int e; 4028 for (e = 0; e < elements; e++) { 4029 int xs; 4030 for (xs = 0; xs < a->selem; xs++) { 4031 int tt = (a->rt + r + xs) % 32; 4032 do_vec_st(s, tt, e, clean_addr, mop); 4033 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 4034 } 4035 } 4036 } 4037 4038 if (a->p) { 4039 if (a->rm == 31) { 4040 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 4041 } else { 4042 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 4043 } 4044 } 4045 return true; 4046 } 4047 4048 static bool trans_ST_single(DisasContext *s, arg_ldst_single *a) 4049 { 4050 int xs, total, rt; 4051 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 4052 MemOp mop; 4053 4054 if (!a->p && a->rm != 0) { 4055 return false; 4056 } 4057 if (!fp_access_check(s)) { 4058 return true; 4059 } 4060 4061 if (a->rn == 31) { 4062 gen_check_sp_alignment(s); 4063 } 4064 4065 total = a->selem << a->scale; 4066 tcg_rn = cpu_reg_sp(s, a->rn); 4067 4068 mop = finalize_memop_asimd(s, a->scale); 4069 clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, 4070 total, mop); 4071 4072 tcg_ebytes = tcg_constant_i64(1 << a->scale); 4073 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { 4074 do_vec_st(s, rt, a->index, clean_addr, mop); 4075 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 4076 } 4077 4078 if (a->p) { 4079 if (a->rm == 31) { 4080 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 4081 } else { 4082 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 4083 } 4084 } 4085 return true; 4086 } 4087 4088 static bool trans_LD_single(DisasContext *s, arg_ldst_single *a) 4089 { 4090 int xs, total, rt; 4091 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 4092 MemOp mop; 4093 4094 if (!a->p && a->rm != 0) { 4095 return false; 4096 } 4097 if (!fp_access_check(s)) { 4098 return true; 4099 } 4100 4101 if (a->rn == 31) { 4102 gen_check_sp_alignment(s); 4103 } 4104 4105 total = a->selem << a->scale; 4106 tcg_rn = cpu_reg_sp(s, a->rn); 4107 4108 mop = finalize_memop_asimd(s, a->scale); 4109 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, 4110 total, mop); 4111 4112 tcg_ebytes = tcg_constant_i64(1 << a->scale); 4113 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { 4114 do_vec_ld(s, rt, a->index, clean_addr, mop); 4115 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 4116 } 4117 4118 if (a->p) { 4119 if (a->rm == 31) { 4120 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 4121 } else { 4122 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 4123 } 4124 } 4125 return true; 4126 } 4127 4128 static bool trans_LD_single_repl(DisasContext *s, arg_LD_single_repl *a) 4129 { 4130 int xs, total, rt; 4131 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 4132 MemOp mop; 4133 4134 if (!a->p && a->rm != 0) { 4135 return false; 4136 } 4137 if (!fp_access_check(s)) { 4138 return true; 4139 } 4140 4141 if (a->rn == 31) { 4142 gen_check_sp_alignment(s); 4143 } 4144 4145 total = a->selem << a->scale; 4146 tcg_rn = cpu_reg_sp(s, a->rn); 4147 4148 mop = finalize_memop_asimd(s, a->scale); 4149 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, 4150 total, mop); 4151 4152 tcg_ebytes = tcg_constant_i64(1 << a->scale); 4153 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { 4154 /* Load and replicate to all elements */ 4155 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 4156 4157 tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop); 4158 tcg_gen_gvec_dup_i64(a->scale, vec_full_reg_offset(s, rt), 4159 (a->q + 1) * 8, vec_full_reg_size(s), tcg_tmp); 4160 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 4161 } 4162 4163 if (a->p) { 4164 if (a->rm == 31) { 4165 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 4166 } else { 4167 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 4168 } 4169 } 4170 return true; 4171 } 4172 4173 static bool trans_STZGM(DisasContext *s, arg_ldst_tag *a) 4174 { 4175 TCGv_i64 addr, clean_addr, tcg_rt; 4176 int size = 4 << s->dcz_blocksize; 4177 4178 if (!dc_isar_feature(aa64_mte, s)) { 4179 return false; 4180 } 4181 if (s->current_el == 0) { 4182 return false; 4183 } 4184 4185 if (a->rn == 31) { 4186 gen_check_sp_alignment(s); 4187 } 4188 4189 addr = read_cpu_reg_sp(s, a->rn, true); 4190 tcg_gen_addi_i64(addr, addr, a->imm); 4191 tcg_rt = cpu_reg(s, a->rt); 4192 4193 if (s->ata[0]) { 4194 gen_helper_stzgm_tags(tcg_env, addr, tcg_rt); 4195 } 4196 /* 4197 * The non-tags portion of STZGM is mostly like DC_ZVA, 4198 * except the alignment happens before the access. 4199 */ 4200 clean_addr = clean_data_tbi(s, addr); 4201 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 4202 gen_helper_dc_zva(tcg_env, clean_addr); 4203 return true; 4204 } 4205 4206 static bool trans_STGM(DisasContext *s, arg_ldst_tag *a) 4207 { 4208 TCGv_i64 addr, clean_addr, tcg_rt; 4209 4210 if (!dc_isar_feature(aa64_mte, s)) { 4211 return false; 4212 } 4213 if (s->current_el == 0) { 4214 return false; 4215 } 4216 4217 if (a->rn == 31) { 4218 gen_check_sp_alignment(s); 4219 } 4220 4221 addr = read_cpu_reg_sp(s, a->rn, true); 4222 tcg_gen_addi_i64(addr, addr, a->imm); 4223 tcg_rt = cpu_reg(s, a->rt); 4224 4225 if (s->ata[0]) { 4226 gen_helper_stgm(tcg_env, addr, tcg_rt); 4227 } else { 4228 MMUAccessType acc = MMU_DATA_STORE; 4229 int size = 4 << s->gm_blocksize; 4230 4231 clean_addr = clean_data_tbi(s, addr); 4232 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 4233 gen_probe_access(s, clean_addr, acc, size); 4234 } 4235 return true; 4236 } 4237 4238 static bool trans_LDGM(DisasContext *s, arg_ldst_tag *a) 4239 { 4240 TCGv_i64 addr, clean_addr, tcg_rt; 4241 4242 if (!dc_isar_feature(aa64_mte, s)) { 4243 return false; 4244 } 4245 if (s->current_el == 0) { 4246 return false; 4247 } 4248 4249 if (a->rn == 31) { 4250 gen_check_sp_alignment(s); 4251 } 4252 4253 addr = read_cpu_reg_sp(s, a->rn, true); 4254 tcg_gen_addi_i64(addr, addr, a->imm); 4255 tcg_rt = cpu_reg(s, a->rt); 4256 4257 if (s->ata[0]) { 4258 gen_helper_ldgm(tcg_rt, tcg_env, addr); 4259 } else { 4260 MMUAccessType acc = MMU_DATA_LOAD; 4261 int size = 4 << s->gm_blocksize; 4262 4263 clean_addr = clean_data_tbi(s, addr); 4264 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 4265 gen_probe_access(s, clean_addr, acc, size); 4266 /* The result tags are zeros. */ 4267 tcg_gen_movi_i64(tcg_rt, 0); 4268 } 4269 return true; 4270 } 4271 4272 static bool trans_LDG(DisasContext *s, arg_ldst_tag *a) 4273 { 4274 TCGv_i64 addr, clean_addr, tcg_rt; 4275 4276 if (!dc_isar_feature(aa64_mte_insn_reg, s)) { 4277 return false; 4278 } 4279 4280 if (a->rn == 31) { 4281 gen_check_sp_alignment(s); 4282 } 4283 4284 addr = read_cpu_reg_sp(s, a->rn, true); 4285 if (!a->p) { 4286 /* pre-index or signed offset */ 4287 tcg_gen_addi_i64(addr, addr, a->imm); 4288 } 4289 4290 tcg_gen_andi_i64(addr, addr, -TAG_GRANULE); 4291 tcg_rt = cpu_reg(s, a->rt); 4292 if (s->ata[0]) { 4293 gen_helper_ldg(tcg_rt, tcg_env, addr, tcg_rt); 4294 } else { 4295 /* 4296 * Tag access disabled: we must check for aborts on the load 4297 * load from [rn+offset], and then insert a 0 tag into rt. 4298 */ 4299 clean_addr = clean_data_tbi(s, addr); 4300 gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8); 4301 gen_address_with_allocation_tag0(tcg_rt, tcg_rt); 4302 } 4303 4304 if (a->w) { 4305 /* pre-index or post-index */ 4306 if (a->p) { 4307 /* post-index */ 4308 tcg_gen_addi_i64(addr, addr, a->imm); 4309 } 4310 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr); 4311 } 4312 return true; 4313 } 4314 4315 static bool do_STG(DisasContext *s, arg_ldst_tag *a, bool is_zero, bool is_pair) 4316 { 4317 TCGv_i64 addr, tcg_rt; 4318 4319 if (a->rn == 31) { 4320 gen_check_sp_alignment(s); 4321 } 4322 4323 addr = read_cpu_reg_sp(s, a->rn, true); 4324 if (!a->p) { 4325 /* pre-index or signed offset */ 4326 tcg_gen_addi_i64(addr, addr, a->imm); 4327 } 4328 tcg_rt = cpu_reg_sp(s, a->rt); 4329 if (!s->ata[0]) { 4330 /* 4331 * For STG and ST2G, we need to check alignment and probe memory. 4332 * TODO: For STZG and STZ2G, we could rely on the stores below, 4333 * at least for system mode; user-only won't enforce alignment. 4334 */ 4335 if (is_pair) { 4336 gen_helper_st2g_stub(tcg_env, addr); 4337 } else { 4338 gen_helper_stg_stub(tcg_env, addr); 4339 } 4340 } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { 4341 if (is_pair) { 4342 gen_helper_st2g_parallel(tcg_env, addr, tcg_rt); 4343 } else { 4344 gen_helper_stg_parallel(tcg_env, addr, tcg_rt); 4345 } 4346 } else { 4347 if (is_pair) { 4348 gen_helper_st2g(tcg_env, addr, tcg_rt); 4349 } else { 4350 gen_helper_stg(tcg_env, addr, tcg_rt); 4351 } 4352 } 4353 4354 if (is_zero) { 4355 TCGv_i64 clean_addr = clean_data_tbi(s, addr); 4356 TCGv_i64 zero64 = tcg_constant_i64(0); 4357 TCGv_i128 zero128 = tcg_temp_new_i128(); 4358 int mem_index = get_mem_index(s); 4359 MemOp mop = finalize_memop(s, MO_128 | MO_ALIGN); 4360 4361 tcg_gen_concat_i64_i128(zero128, zero64, zero64); 4362 4363 /* This is 1 or 2 atomic 16-byte operations. */ 4364 tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop); 4365 if (is_pair) { 4366 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4367 tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop); 4368 } 4369 } 4370 4371 if (a->w) { 4372 /* pre-index or post-index */ 4373 if (a->p) { 4374 /* post-index */ 4375 tcg_gen_addi_i64(addr, addr, a->imm); 4376 } 4377 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr); 4378 } 4379 return true; 4380 } 4381 4382 TRANS_FEAT(STG, aa64_mte_insn_reg, do_STG, a, false, false) 4383 TRANS_FEAT(STZG, aa64_mte_insn_reg, do_STG, a, true, false) 4384 TRANS_FEAT(ST2G, aa64_mte_insn_reg, do_STG, a, false, true) 4385 TRANS_FEAT(STZ2G, aa64_mte_insn_reg, do_STG, a, true, true) 4386 4387 typedef void SetFn(TCGv_env, TCGv_i32, TCGv_i32); 4388 4389 static bool do_SET(DisasContext *s, arg_set *a, bool is_epilogue, 4390 bool is_setg, SetFn fn) 4391 { 4392 int memidx; 4393 uint32_t syndrome, desc = 0; 4394 4395 if (is_setg && !dc_isar_feature(aa64_mte, s)) { 4396 return false; 4397 } 4398 4399 /* 4400 * UNPREDICTABLE cases: we choose to UNDEF, which allows 4401 * us to pull this check before the CheckMOPSEnabled() test 4402 * (which we do in the helper function) 4403 */ 4404 if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd || 4405 a->rd == 31 || a->rn == 31) { 4406 return false; 4407 } 4408 4409 memidx = get_a64_user_mem_index(s, a->unpriv); 4410 4411 /* 4412 * We pass option_a == true, matching our implementation; 4413 * we pass wrong_option == false: helper function may set that bit. 4414 */ 4415 syndrome = syn_mop(true, is_setg, (a->nontemp << 1) | a->unpriv, 4416 is_epilogue, false, true, a->rd, a->rs, a->rn); 4417 4418 if (is_setg ? s->ata[a->unpriv] : s->mte_active[a->unpriv]) { 4419 /* We may need to do MTE tag checking, so assemble the descriptor */ 4420 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 4421 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 4422 desc = FIELD_DP32(desc, MTEDESC, WRITE, true); 4423 /* SIZEM1 and ALIGN we leave 0 (byte write) */ 4424 } 4425 /* The helper function always needs the memidx even with MTE disabled */ 4426 desc = FIELD_DP32(desc, MTEDESC, MIDX, memidx); 4427 4428 /* 4429 * The helper needs the register numbers, but since they're in 4430 * the syndrome anyway, we let it extract them from there rather 4431 * than passing in an extra three integer arguments. 4432 */ 4433 fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(desc)); 4434 return true; 4435 } 4436 4437 TRANS_FEAT(SETP, aa64_mops, do_SET, a, false, false, gen_helper_setp) 4438 TRANS_FEAT(SETM, aa64_mops, do_SET, a, false, false, gen_helper_setm) 4439 TRANS_FEAT(SETE, aa64_mops, do_SET, a, true, false, gen_helper_sete) 4440 TRANS_FEAT(SETGP, aa64_mops, do_SET, a, false, true, gen_helper_setgp) 4441 TRANS_FEAT(SETGM, aa64_mops, do_SET, a, false, true, gen_helper_setgm) 4442 TRANS_FEAT(SETGE, aa64_mops, do_SET, a, true, true, gen_helper_setge) 4443 4444 typedef void CpyFn(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32); 4445 4446 static bool do_CPY(DisasContext *s, arg_cpy *a, bool is_epilogue, CpyFn fn) 4447 { 4448 int rmemidx, wmemidx; 4449 uint32_t syndrome, rdesc = 0, wdesc = 0; 4450 bool wunpriv = extract32(a->options, 0, 1); 4451 bool runpriv = extract32(a->options, 1, 1); 4452 4453 /* 4454 * UNPREDICTABLE cases: we choose to UNDEF, which allows 4455 * us to pull this check before the CheckMOPSEnabled() test 4456 * (which we do in the helper function) 4457 */ 4458 if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd || 4459 a->rd == 31 || a->rs == 31 || a->rn == 31) { 4460 return false; 4461 } 4462 4463 rmemidx = get_a64_user_mem_index(s, runpriv); 4464 wmemidx = get_a64_user_mem_index(s, wunpriv); 4465 4466 /* 4467 * We pass option_a == true, matching our implementation; 4468 * we pass wrong_option == false: helper function may set that bit. 4469 */ 4470 syndrome = syn_mop(false, false, a->options, is_epilogue, 4471 false, true, a->rd, a->rs, a->rn); 4472 4473 /* If we need to do MTE tag checking, assemble the descriptors */ 4474 if (s->mte_active[runpriv]) { 4475 rdesc = FIELD_DP32(rdesc, MTEDESC, TBI, s->tbid); 4476 rdesc = FIELD_DP32(rdesc, MTEDESC, TCMA, s->tcma); 4477 } 4478 if (s->mte_active[wunpriv]) { 4479 wdesc = FIELD_DP32(wdesc, MTEDESC, TBI, s->tbid); 4480 wdesc = FIELD_DP32(wdesc, MTEDESC, TCMA, s->tcma); 4481 wdesc = FIELD_DP32(wdesc, MTEDESC, WRITE, true); 4482 } 4483 /* The helper function needs these parts of the descriptor regardless */ 4484 rdesc = FIELD_DP32(rdesc, MTEDESC, MIDX, rmemidx); 4485 wdesc = FIELD_DP32(wdesc, MTEDESC, MIDX, wmemidx); 4486 4487 /* 4488 * The helper needs the register numbers, but since they're in 4489 * the syndrome anyway, we let it extract them from there rather 4490 * than passing in an extra three integer arguments. 4491 */ 4492 fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(wdesc), 4493 tcg_constant_i32(rdesc)); 4494 return true; 4495 } 4496 4497 TRANS_FEAT(CPYP, aa64_mops, do_CPY, a, false, gen_helper_cpyp) 4498 TRANS_FEAT(CPYM, aa64_mops, do_CPY, a, false, gen_helper_cpym) 4499 TRANS_FEAT(CPYE, aa64_mops, do_CPY, a, true, gen_helper_cpye) 4500 TRANS_FEAT(CPYFP, aa64_mops, do_CPY, a, false, gen_helper_cpyfp) 4501 TRANS_FEAT(CPYFM, aa64_mops, do_CPY, a, false, gen_helper_cpyfm) 4502 TRANS_FEAT(CPYFE, aa64_mops, do_CPY, a, true, gen_helper_cpyfe) 4503 4504 typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64); 4505 4506 static bool gen_rri(DisasContext *s, arg_rri_sf *a, 4507 bool rd_sp, bool rn_sp, ArithTwoOp *fn) 4508 { 4509 TCGv_i64 tcg_rn = rn_sp ? cpu_reg_sp(s, a->rn) : cpu_reg(s, a->rn); 4510 TCGv_i64 tcg_rd = rd_sp ? cpu_reg_sp(s, a->rd) : cpu_reg(s, a->rd); 4511 TCGv_i64 tcg_imm = tcg_constant_i64(a->imm); 4512 4513 fn(tcg_rd, tcg_rn, tcg_imm); 4514 if (!a->sf) { 4515 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4516 } 4517 return true; 4518 } 4519 4520 /* 4521 * PC-rel. addressing 4522 */ 4523 4524 static bool trans_ADR(DisasContext *s, arg_ri *a) 4525 { 4526 gen_pc_plus_diff(s, cpu_reg(s, a->rd), a->imm); 4527 return true; 4528 } 4529 4530 static bool trans_ADRP(DisasContext *s, arg_ri *a) 4531 { 4532 int64_t offset = (int64_t)a->imm << 12; 4533 4534 /* The page offset is ok for CF_PCREL. */ 4535 offset -= s->pc_curr & 0xfff; 4536 gen_pc_plus_diff(s, cpu_reg(s, a->rd), offset); 4537 return true; 4538 } 4539 4540 /* 4541 * Add/subtract (immediate) 4542 */ 4543 TRANS(ADD_i, gen_rri, a, 1, 1, tcg_gen_add_i64) 4544 TRANS(SUB_i, gen_rri, a, 1, 1, tcg_gen_sub_i64) 4545 TRANS(ADDS_i, gen_rri, a, 0, 1, a->sf ? gen_add64_CC : gen_add32_CC) 4546 TRANS(SUBS_i, gen_rri, a, 0, 1, a->sf ? gen_sub64_CC : gen_sub32_CC) 4547 4548 /* 4549 * Add/subtract (immediate, with tags) 4550 */ 4551 4552 static bool gen_add_sub_imm_with_tags(DisasContext *s, arg_rri_tag *a, 4553 bool sub_op) 4554 { 4555 TCGv_i64 tcg_rn, tcg_rd; 4556 int imm; 4557 4558 imm = a->uimm6 << LOG2_TAG_GRANULE; 4559 if (sub_op) { 4560 imm = -imm; 4561 } 4562 4563 tcg_rn = cpu_reg_sp(s, a->rn); 4564 tcg_rd = cpu_reg_sp(s, a->rd); 4565 4566 if (s->ata[0]) { 4567 gen_helper_addsubg(tcg_rd, tcg_env, tcg_rn, 4568 tcg_constant_i32(imm), 4569 tcg_constant_i32(a->uimm4)); 4570 } else { 4571 tcg_gen_addi_i64(tcg_rd, tcg_rn, imm); 4572 gen_address_with_allocation_tag0(tcg_rd, tcg_rd); 4573 } 4574 return true; 4575 } 4576 4577 TRANS_FEAT(ADDG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, false) 4578 TRANS_FEAT(SUBG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, true) 4579 4580 /* The input should be a value in the bottom e bits (with higher 4581 * bits zero); returns that value replicated into every element 4582 * of size e in a 64 bit integer. 4583 */ 4584 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e) 4585 { 4586 assert(e != 0); 4587 while (e < 64) { 4588 mask |= mask << e; 4589 e *= 2; 4590 } 4591 return mask; 4592 } 4593 4594 /* 4595 * Logical (immediate) 4596 */ 4597 4598 /* 4599 * Simplified variant of pseudocode DecodeBitMasks() for the case where we 4600 * only require the wmask. Returns false if the imms/immr/immn are a reserved 4601 * value (ie should cause a guest UNDEF exception), and true if they are 4602 * valid, in which case the decoded bit pattern is written to result. 4603 */ 4604 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn, 4605 unsigned int imms, unsigned int immr) 4606 { 4607 uint64_t mask; 4608 unsigned e, levels, s, r; 4609 int len; 4610 4611 assert(immn < 2 && imms < 64 && immr < 64); 4612 4613 /* The bit patterns we create here are 64 bit patterns which 4614 * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or 4615 * 64 bits each. Each element contains the same value: a run 4616 * of between 1 and e-1 non-zero bits, rotated within the 4617 * element by between 0 and e-1 bits. 4618 * 4619 * The element size and run length are encoded into immn (1 bit) 4620 * and imms (6 bits) as follows: 4621 * 64 bit elements: immn = 1, imms = <length of run - 1> 4622 * 32 bit elements: immn = 0, imms = 0 : <length of run - 1> 4623 * 16 bit elements: immn = 0, imms = 10 : <length of run - 1> 4624 * 8 bit elements: immn = 0, imms = 110 : <length of run - 1> 4625 * 4 bit elements: immn = 0, imms = 1110 : <length of run - 1> 4626 * 2 bit elements: immn = 0, imms = 11110 : <length of run - 1> 4627 * Notice that immn = 0, imms = 11111x is the only combination 4628 * not covered by one of the above options; this is reserved. 4629 * Further, <length of run - 1> all-ones is a reserved pattern. 4630 * 4631 * In all cases the rotation is by immr % e (and immr is 6 bits). 4632 */ 4633 4634 /* First determine the element size */ 4635 len = 31 - clz32((immn << 6) | (~imms & 0x3f)); 4636 if (len < 1) { 4637 /* This is the immn == 0, imms == 0x11111x case */ 4638 return false; 4639 } 4640 e = 1 << len; 4641 4642 levels = e - 1; 4643 s = imms & levels; 4644 r = immr & levels; 4645 4646 if (s == levels) { 4647 /* <length of run - 1> mustn't be all-ones. */ 4648 return false; 4649 } 4650 4651 /* Create the value of one element: s+1 set bits rotated 4652 * by r within the element (which is e bits wide)... 4653 */ 4654 mask = MAKE_64BIT_MASK(0, s + 1); 4655 if (r) { 4656 mask = (mask >> r) | (mask << (e - r)); 4657 mask &= MAKE_64BIT_MASK(0, e); 4658 } 4659 /* ...then replicate the element over the whole 64 bit value */ 4660 mask = bitfield_replicate(mask, e); 4661 *result = mask; 4662 return true; 4663 } 4664 4665 static bool gen_rri_log(DisasContext *s, arg_rri_log *a, bool set_cc, 4666 void (*fn)(TCGv_i64, TCGv_i64, int64_t)) 4667 { 4668 TCGv_i64 tcg_rd, tcg_rn; 4669 uint64_t imm; 4670 4671 /* Some immediate field values are reserved. */ 4672 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1), 4673 extract32(a->dbm, 0, 6), 4674 extract32(a->dbm, 6, 6))) { 4675 return false; 4676 } 4677 if (!a->sf) { 4678 imm &= 0xffffffffull; 4679 } 4680 4681 tcg_rd = set_cc ? cpu_reg(s, a->rd) : cpu_reg_sp(s, a->rd); 4682 tcg_rn = cpu_reg(s, a->rn); 4683 4684 fn(tcg_rd, tcg_rn, imm); 4685 if (set_cc) { 4686 gen_logic_CC(a->sf, tcg_rd); 4687 } 4688 if (!a->sf) { 4689 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4690 } 4691 return true; 4692 } 4693 4694 TRANS(AND_i, gen_rri_log, a, false, tcg_gen_andi_i64) 4695 TRANS(ORR_i, gen_rri_log, a, false, tcg_gen_ori_i64) 4696 TRANS(EOR_i, gen_rri_log, a, false, tcg_gen_xori_i64) 4697 TRANS(ANDS_i, gen_rri_log, a, true, tcg_gen_andi_i64) 4698 4699 /* 4700 * Move wide (immediate) 4701 */ 4702 4703 static bool trans_MOVZ(DisasContext *s, arg_movw *a) 4704 { 4705 int pos = a->hw << 4; 4706 tcg_gen_movi_i64(cpu_reg(s, a->rd), (uint64_t)a->imm << pos); 4707 return true; 4708 } 4709 4710 static bool trans_MOVN(DisasContext *s, arg_movw *a) 4711 { 4712 int pos = a->hw << 4; 4713 uint64_t imm = a->imm; 4714 4715 imm = ~(imm << pos); 4716 if (!a->sf) { 4717 imm = (uint32_t)imm; 4718 } 4719 tcg_gen_movi_i64(cpu_reg(s, a->rd), imm); 4720 return true; 4721 } 4722 4723 static bool trans_MOVK(DisasContext *s, arg_movw *a) 4724 { 4725 int pos = a->hw << 4; 4726 TCGv_i64 tcg_rd, tcg_im; 4727 4728 tcg_rd = cpu_reg(s, a->rd); 4729 tcg_im = tcg_constant_i64(a->imm); 4730 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_im, pos, 16); 4731 if (!a->sf) { 4732 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4733 } 4734 return true; 4735 } 4736 4737 /* 4738 * Bitfield 4739 */ 4740 4741 static bool trans_SBFM(DisasContext *s, arg_SBFM *a) 4742 { 4743 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4744 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4745 unsigned int bitsize = a->sf ? 64 : 32; 4746 unsigned int ri = a->immr; 4747 unsigned int si = a->imms; 4748 unsigned int pos, len; 4749 4750 if (si >= ri) { 4751 /* Wd<s-r:0> = Wn<s:r> */ 4752 len = (si - ri) + 1; 4753 tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len); 4754 if (!a->sf) { 4755 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4756 } 4757 } else { 4758 /* Wd<32+s-r,32-r> = Wn<s:0> */ 4759 len = si + 1; 4760 pos = (bitsize - ri) & (bitsize - 1); 4761 4762 if (len < ri) { 4763 /* 4764 * Sign extend the destination field from len to fill the 4765 * balance of the word. Let the deposit below insert all 4766 * of those sign bits. 4767 */ 4768 tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len); 4769 len = ri; 4770 } 4771 4772 /* 4773 * We start with zero, and we haven't modified any bits outside 4774 * bitsize, therefore no final zero-extension is unneeded for !sf. 4775 */ 4776 tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len); 4777 } 4778 return true; 4779 } 4780 4781 static bool trans_UBFM(DisasContext *s, arg_UBFM *a) 4782 { 4783 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4784 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4785 unsigned int bitsize = a->sf ? 64 : 32; 4786 unsigned int ri = a->immr; 4787 unsigned int si = a->imms; 4788 unsigned int pos, len; 4789 4790 tcg_rd = cpu_reg(s, a->rd); 4791 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4792 4793 if (si >= ri) { 4794 /* Wd<s-r:0> = Wn<s:r> */ 4795 len = (si - ri) + 1; 4796 tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len); 4797 } else { 4798 /* Wd<32+s-r,32-r> = Wn<s:0> */ 4799 len = si + 1; 4800 pos = (bitsize - ri) & (bitsize - 1); 4801 tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len); 4802 } 4803 return true; 4804 } 4805 4806 static bool trans_BFM(DisasContext *s, arg_BFM *a) 4807 { 4808 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4809 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4810 unsigned int bitsize = a->sf ? 64 : 32; 4811 unsigned int ri = a->immr; 4812 unsigned int si = a->imms; 4813 unsigned int pos, len; 4814 4815 tcg_rd = cpu_reg(s, a->rd); 4816 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4817 4818 if (si >= ri) { 4819 /* Wd<s-r:0> = Wn<s:r> */ 4820 tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri); 4821 len = (si - ri) + 1; 4822 pos = 0; 4823 } else { 4824 /* Wd<32+s-r,32-r> = Wn<s:0> */ 4825 len = si + 1; 4826 pos = (bitsize - ri) & (bitsize - 1); 4827 } 4828 4829 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len); 4830 if (!a->sf) { 4831 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4832 } 4833 return true; 4834 } 4835 4836 static bool trans_EXTR(DisasContext *s, arg_extract *a) 4837 { 4838 TCGv_i64 tcg_rd, tcg_rm, tcg_rn; 4839 4840 tcg_rd = cpu_reg(s, a->rd); 4841 4842 if (unlikely(a->imm == 0)) { 4843 /* 4844 * tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts, 4845 * so an extract from bit 0 is a special case. 4846 */ 4847 if (a->sf) { 4848 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, a->rm)); 4849 } else { 4850 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, a->rm)); 4851 } 4852 } else { 4853 tcg_rm = cpu_reg(s, a->rm); 4854 tcg_rn = cpu_reg(s, a->rn); 4855 4856 if (a->sf) { 4857 /* Specialization to ROR happens in EXTRACT2. */ 4858 tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, a->imm); 4859 } else { 4860 TCGv_i32 t0 = tcg_temp_new_i32(); 4861 4862 tcg_gen_extrl_i64_i32(t0, tcg_rm); 4863 if (a->rm == a->rn) { 4864 tcg_gen_rotri_i32(t0, t0, a->imm); 4865 } else { 4866 TCGv_i32 t1 = tcg_temp_new_i32(); 4867 tcg_gen_extrl_i64_i32(t1, tcg_rn); 4868 tcg_gen_extract2_i32(t0, t0, t1, a->imm); 4869 } 4870 tcg_gen_extu_i32_i64(tcg_rd, t0); 4871 } 4872 } 4873 return true; 4874 } 4875 4876 static bool trans_TBL_TBX(DisasContext *s, arg_TBL_TBX *a) 4877 { 4878 if (fp_access_check(s)) { 4879 int len = (a->len + 1) * 16; 4880 4881 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd), 4882 vec_full_reg_offset(s, a->rm), tcg_env, 4883 a->q ? 16 : 8, vec_full_reg_size(s), 4884 (len << 6) | (a->tbx << 5) | a->rn, 4885 gen_helper_simd_tblx); 4886 } 4887 return true; 4888 } 4889 4890 typedef int simd_permute_idx_fn(int i, int part, int elements); 4891 4892 static bool do_simd_permute(DisasContext *s, arg_qrrr_e *a, 4893 simd_permute_idx_fn *fn, int part) 4894 { 4895 MemOp esz = a->esz; 4896 int datasize = a->q ? 16 : 8; 4897 int elements = datasize >> esz; 4898 TCGv_i64 tcg_res[2], tcg_ele; 4899 4900 if (esz == MO_64 && !a->q) { 4901 return false; 4902 } 4903 if (!fp_access_check(s)) { 4904 return true; 4905 } 4906 4907 tcg_res[0] = tcg_temp_new_i64(); 4908 tcg_res[1] = a->q ? tcg_temp_new_i64() : NULL; 4909 tcg_ele = tcg_temp_new_i64(); 4910 4911 for (int i = 0; i < elements; i++) { 4912 int o, w, idx; 4913 4914 idx = fn(i, part, elements); 4915 read_vec_element(s, tcg_ele, (idx & elements ? a->rm : a->rn), 4916 idx & (elements - 1), esz); 4917 4918 w = (i << (esz + 3)) / 64; 4919 o = (i << (esz + 3)) % 64; 4920 if (o == 0) { 4921 tcg_gen_mov_i64(tcg_res[w], tcg_ele); 4922 } else { 4923 tcg_gen_deposit_i64(tcg_res[w], tcg_res[w], tcg_ele, o, 8 << esz); 4924 } 4925 } 4926 4927 for (int i = a->q; i >= 0; --i) { 4928 write_vec_element(s, tcg_res[i], a->rd, i, MO_64); 4929 } 4930 clear_vec_high(s, a->q, a->rd); 4931 return true; 4932 } 4933 4934 static int permute_load_uzp(int i, int part, int elements) 4935 { 4936 return 2 * i + part; 4937 } 4938 4939 TRANS(UZP1, do_simd_permute, a, permute_load_uzp, 0) 4940 TRANS(UZP2, do_simd_permute, a, permute_load_uzp, 1) 4941 4942 static int permute_load_trn(int i, int part, int elements) 4943 { 4944 return (i & 1) * elements + (i & ~1) + part; 4945 } 4946 4947 TRANS(TRN1, do_simd_permute, a, permute_load_trn, 0) 4948 TRANS(TRN2, do_simd_permute, a, permute_load_trn, 1) 4949 4950 static int permute_load_zip(int i, int part, int elements) 4951 { 4952 return (i & 1) * elements + ((part * elements + i) >> 1); 4953 } 4954 4955 TRANS(ZIP1, do_simd_permute, a, permute_load_zip, 0) 4956 TRANS(ZIP2, do_simd_permute, a, permute_load_zip, 1) 4957 4958 /* 4959 * Cryptographic AES, SHA, SHA512 4960 */ 4961 4962 TRANS_FEAT(AESE, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aese) 4963 TRANS_FEAT(AESD, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aesd) 4964 TRANS_FEAT(AESMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesmc) 4965 TRANS_FEAT(AESIMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesimc) 4966 4967 TRANS_FEAT(SHA1C, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1c) 4968 TRANS_FEAT(SHA1P, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1p) 4969 TRANS_FEAT(SHA1M, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1m) 4970 TRANS_FEAT(SHA1SU0, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1su0) 4971 4972 TRANS_FEAT(SHA256H, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h) 4973 TRANS_FEAT(SHA256H2, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h2) 4974 TRANS_FEAT(SHA256SU1, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256su1) 4975 4976 TRANS_FEAT(SHA1H, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1h) 4977 TRANS_FEAT(SHA1SU1, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1su1) 4978 TRANS_FEAT(SHA256SU0, aa64_sha256, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha256su0) 4979 4980 TRANS_FEAT(SHA512H, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h) 4981 TRANS_FEAT(SHA512H2, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h2) 4982 TRANS_FEAT(SHA512SU1, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512su1) 4983 TRANS_FEAT(RAX1, aa64_sha3, do_gvec_fn3, a, gen_gvec_rax1) 4984 TRANS_FEAT(SM3PARTW1, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw1) 4985 TRANS_FEAT(SM3PARTW2, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw2) 4986 TRANS_FEAT(SM4EKEY, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4ekey) 4987 4988 TRANS_FEAT(SHA512SU0, aa64_sha512, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha512su0) 4989 TRANS_FEAT(SM4E, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4e) 4990 4991 TRANS_FEAT(EOR3, aa64_sha3, do_gvec_fn4, a, gen_gvec_eor3) 4992 TRANS_FEAT(BCAX, aa64_sha3, do_gvec_fn4, a, gen_gvec_bcax) 4993 4994 static bool trans_SM3SS1(DisasContext *s, arg_SM3SS1 *a) 4995 { 4996 if (!dc_isar_feature(aa64_sm3, s)) { 4997 return false; 4998 } 4999 if (fp_access_check(s)) { 5000 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 5001 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 5002 TCGv_i32 tcg_op3 = tcg_temp_new_i32(); 5003 TCGv_i32 tcg_res = tcg_temp_new_i32(); 5004 5005 read_vec_element_i32(s, tcg_op1, a->rn, 3, MO_32); 5006 read_vec_element_i32(s, tcg_op2, a->rm, 3, MO_32); 5007 read_vec_element_i32(s, tcg_op3, a->ra, 3, MO_32); 5008 5009 tcg_gen_rotri_i32(tcg_res, tcg_op1, 20); 5010 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2); 5011 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3); 5012 tcg_gen_rotri_i32(tcg_res, tcg_res, 25); 5013 5014 /* Clear the whole register first, then store bits [127:96]. */ 5015 clear_vec(s, a->rd); 5016 write_vec_element_i32(s, tcg_res, a->rd, 3, MO_32); 5017 } 5018 return true; 5019 } 5020 5021 static bool do_crypto3i(DisasContext *s, arg_crypto3i *a, gen_helper_gvec_3 *fn) 5022 { 5023 if (fp_access_check(s)) { 5024 gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->imm, fn); 5025 } 5026 return true; 5027 } 5028 TRANS_FEAT(SM3TT1A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1a) 5029 TRANS_FEAT(SM3TT1B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1b) 5030 TRANS_FEAT(SM3TT2A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2a) 5031 TRANS_FEAT(SM3TT2B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2b) 5032 5033 static bool trans_XAR(DisasContext *s, arg_XAR *a) 5034 { 5035 if (!dc_isar_feature(aa64_sha3, s)) { 5036 return false; 5037 } 5038 if (fp_access_check(s)) { 5039 gen_gvec_xar(MO_64, vec_full_reg_offset(s, a->rd), 5040 vec_full_reg_offset(s, a->rn), 5041 vec_full_reg_offset(s, a->rm), a->imm, 16, 5042 vec_full_reg_size(s)); 5043 } 5044 return true; 5045 } 5046 5047 /* 5048 * Advanced SIMD copy 5049 */ 5050 5051 static bool decode_esz_idx(int imm, MemOp *pesz, unsigned *pidx) 5052 { 5053 unsigned esz = ctz32(imm); 5054 if (esz <= MO_64) { 5055 *pesz = esz; 5056 *pidx = imm >> (esz + 1); 5057 return true; 5058 } 5059 return false; 5060 } 5061 5062 static bool trans_DUP_element_s(DisasContext *s, arg_DUP_element_s *a) 5063 { 5064 MemOp esz; 5065 unsigned idx; 5066 5067 if (!decode_esz_idx(a->imm, &esz, &idx)) { 5068 return false; 5069 } 5070 if (fp_access_check(s)) { 5071 /* 5072 * This instruction just extracts the specified element and 5073 * zero-extends it into the bottom of the destination register. 5074 */ 5075 TCGv_i64 tmp = tcg_temp_new_i64(); 5076 read_vec_element(s, tmp, a->rn, idx, esz); 5077 write_fp_dreg(s, a->rd, tmp); 5078 } 5079 return true; 5080 } 5081 5082 static bool trans_DUP_element_v(DisasContext *s, arg_DUP_element_v *a) 5083 { 5084 MemOp esz; 5085 unsigned idx; 5086 5087 if (!decode_esz_idx(a->imm, &esz, &idx)) { 5088 return false; 5089 } 5090 if (esz == MO_64 && !a->q) { 5091 return false; 5092 } 5093 if (fp_access_check(s)) { 5094 tcg_gen_gvec_dup_mem(esz, vec_full_reg_offset(s, a->rd), 5095 vec_reg_offset(s, a->rn, idx, esz), 5096 a->q ? 16 : 8, vec_full_reg_size(s)); 5097 } 5098 return true; 5099 } 5100 5101 static bool trans_DUP_general(DisasContext *s, arg_DUP_general *a) 5102 { 5103 MemOp esz; 5104 unsigned idx; 5105 5106 if (!decode_esz_idx(a->imm, &esz, &idx)) { 5107 return false; 5108 } 5109 if (esz == MO_64 && !a->q) { 5110 return false; 5111 } 5112 if (fp_access_check(s)) { 5113 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), 5114 a->q ? 16 : 8, vec_full_reg_size(s), 5115 cpu_reg(s, a->rn)); 5116 } 5117 return true; 5118 } 5119 5120 static bool do_smov_umov(DisasContext *s, arg_SMOV *a, MemOp is_signed) 5121 { 5122 MemOp esz; 5123 unsigned idx; 5124 5125 if (!decode_esz_idx(a->imm, &esz, &idx)) { 5126 return false; 5127 } 5128 if (is_signed) { 5129 if (esz == MO_64 || (esz == MO_32 && !a->q)) { 5130 return false; 5131 } 5132 } else { 5133 if (esz == MO_64 ? !a->q : a->q) { 5134 return false; 5135 } 5136 } 5137 if (fp_access_check(s)) { 5138 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 5139 read_vec_element(s, tcg_rd, a->rn, idx, esz | is_signed); 5140 if (is_signed && !a->q) { 5141 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 5142 } 5143 } 5144 return true; 5145 } 5146 5147 TRANS(SMOV, do_smov_umov, a, MO_SIGN) 5148 TRANS(UMOV, do_smov_umov, a, 0) 5149 5150 static bool trans_INS_general(DisasContext *s, arg_INS_general *a) 5151 { 5152 MemOp esz; 5153 unsigned idx; 5154 5155 if (!decode_esz_idx(a->imm, &esz, &idx)) { 5156 return false; 5157 } 5158 if (fp_access_check(s)) { 5159 write_vec_element(s, cpu_reg(s, a->rn), a->rd, idx, esz); 5160 clear_vec_high(s, true, a->rd); 5161 } 5162 return true; 5163 } 5164 5165 static bool trans_INS_element(DisasContext *s, arg_INS_element *a) 5166 { 5167 MemOp esz; 5168 unsigned didx, sidx; 5169 5170 if (!decode_esz_idx(a->di, &esz, &didx)) { 5171 return false; 5172 } 5173 sidx = a->si >> esz; 5174 if (fp_access_check(s)) { 5175 TCGv_i64 tmp = tcg_temp_new_i64(); 5176 5177 read_vec_element(s, tmp, a->rn, sidx, esz); 5178 write_vec_element(s, tmp, a->rd, didx, esz); 5179 5180 /* INS is considered a 128-bit write for SVE. */ 5181 clear_vec_high(s, true, a->rd); 5182 } 5183 return true; 5184 } 5185 5186 /* 5187 * Advanced SIMD three same 5188 */ 5189 5190 typedef struct FPScalar { 5191 void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); 5192 void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); 5193 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr); 5194 } FPScalar; 5195 5196 static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a, 5197 const FPScalar *f, int mergereg, 5198 ARMFPStatusFlavour fpsttype) 5199 { 5200 switch (a->esz) { 5201 case MO_64: 5202 if (fp_access_check(s)) { 5203 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 5204 TCGv_i64 t1 = read_fp_dreg(s, a->rm); 5205 f->gen_d(t0, t0, t1, fpstatus_ptr(fpsttype)); 5206 write_fp_dreg_merging(s, a->rd, mergereg, t0); 5207 } 5208 break; 5209 case MO_32: 5210 if (fp_access_check(s)) { 5211 TCGv_i32 t0 = read_fp_sreg(s, a->rn); 5212 TCGv_i32 t1 = read_fp_sreg(s, a->rm); 5213 f->gen_s(t0, t0, t1, fpstatus_ptr(fpsttype)); 5214 write_fp_sreg_merging(s, a->rd, mergereg, t0); 5215 } 5216 break; 5217 case MO_16: 5218 if (!dc_isar_feature(aa64_fp16, s)) { 5219 return false; 5220 } 5221 if (fp_access_check(s)) { 5222 TCGv_i32 t0 = read_fp_hreg(s, a->rn); 5223 TCGv_i32 t1 = read_fp_hreg(s, a->rm); 5224 f->gen_h(t0, t0, t1, fpstatus_ptr(fpsttype)); 5225 write_fp_hreg_merging(s, a->rd, mergereg, t0); 5226 } 5227 break; 5228 default: 5229 return false; 5230 } 5231 return true; 5232 } 5233 5234 static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f, 5235 int mergereg) 5236 { 5237 return do_fp3_scalar_with_fpsttype(s, a, f, mergereg, 5238 a->esz == MO_16 ? 5239 FPST_A64_F16 : FPST_A64); 5240 } 5241 5242 static bool do_fp3_scalar_ah_2fn(DisasContext *s, arg_rrr_e *a, 5243 const FPScalar *fnormal, const FPScalar *fah, 5244 int mergereg) 5245 { 5246 return do_fp3_scalar_with_fpsttype(s, a, s->fpcr_ah ? fah : fnormal, 5247 mergereg, select_ah_fpst(s, a->esz)); 5248 } 5249 5250 /* Some insns need to call different helpers when FPCR.AH == 1 */ 5251 static bool do_fp3_scalar_2fn(DisasContext *s, arg_rrr_e *a, 5252 const FPScalar *fnormal, 5253 const FPScalar *fah, 5254 int mergereg) 5255 { 5256 return do_fp3_scalar(s, a, s->fpcr_ah ? fah : fnormal, mergereg); 5257 } 5258 5259 static const FPScalar f_scalar_fadd = { 5260 gen_helper_vfp_addh, 5261 gen_helper_vfp_adds, 5262 gen_helper_vfp_addd, 5263 }; 5264 TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd, a->rn) 5265 5266 static const FPScalar f_scalar_fsub = { 5267 gen_helper_vfp_subh, 5268 gen_helper_vfp_subs, 5269 gen_helper_vfp_subd, 5270 }; 5271 TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub, a->rn) 5272 5273 static const FPScalar f_scalar_fdiv = { 5274 gen_helper_vfp_divh, 5275 gen_helper_vfp_divs, 5276 gen_helper_vfp_divd, 5277 }; 5278 TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv, a->rn) 5279 5280 static const FPScalar f_scalar_fmul = { 5281 gen_helper_vfp_mulh, 5282 gen_helper_vfp_muls, 5283 gen_helper_vfp_muld, 5284 }; 5285 TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul, a->rn) 5286 5287 static const FPScalar f_scalar_fmax = { 5288 gen_helper_vfp_maxh, 5289 gen_helper_vfp_maxs, 5290 gen_helper_vfp_maxd, 5291 }; 5292 static const FPScalar f_scalar_fmax_ah = { 5293 gen_helper_vfp_ah_maxh, 5294 gen_helper_vfp_ah_maxs, 5295 gen_helper_vfp_ah_maxd, 5296 }; 5297 TRANS(FMAX_s, do_fp3_scalar_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah, a->rn) 5298 5299 static const FPScalar f_scalar_fmin = { 5300 gen_helper_vfp_minh, 5301 gen_helper_vfp_mins, 5302 gen_helper_vfp_mind, 5303 }; 5304 static const FPScalar f_scalar_fmin_ah = { 5305 gen_helper_vfp_ah_minh, 5306 gen_helper_vfp_ah_mins, 5307 gen_helper_vfp_ah_mind, 5308 }; 5309 TRANS(FMIN_s, do_fp3_scalar_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah, a->rn) 5310 5311 static const FPScalar f_scalar_fmaxnm = { 5312 gen_helper_vfp_maxnumh, 5313 gen_helper_vfp_maxnums, 5314 gen_helper_vfp_maxnumd, 5315 }; 5316 TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm, a->rn) 5317 5318 static const FPScalar f_scalar_fminnm = { 5319 gen_helper_vfp_minnumh, 5320 gen_helper_vfp_minnums, 5321 gen_helper_vfp_minnumd, 5322 }; 5323 TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm, a->rn) 5324 5325 static const FPScalar f_scalar_fmulx = { 5326 gen_helper_advsimd_mulxh, 5327 gen_helper_vfp_mulxs, 5328 gen_helper_vfp_mulxd, 5329 }; 5330 TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx, a->rn) 5331 5332 static void gen_fnmul_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5333 { 5334 gen_helper_vfp_mulh(d, n, m, s); 5335 gen_vfp_negh(d, d); 5336 } 5337 5338 static void gen_fnmul_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5339 { 5340 gen_helper_vfp_muls(d, n, m, s); 5341 gen_vfp_negs(d, d); 5342 } 5343 5344 static void gen_fnmul_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) 5345 { 5346 gen_helper_vfp_muld(d, n, m, s); 5347 gen_vfp_negd(d, d); 5348 } 5349 5350 static void gen_fnmul_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5351 { 5352 gen_helper_vfp_mulh(d, n, m, s); 5353 gen_vfp_ah_negh(d, d); 5354 } 5355 5356 static void gen_fnmul_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5357 { 5358 gen_helper_vfp_muls(d, n, m, s); 5359 gen_vfp_ah_negs(d, d); 5360 } 5361 5362 static void gen_fnmul_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) 5363 { 5364 gen_helper_vfp_muld(d, n, m, s); 5365 gen_vfp_ah_negd(d, d); 5366 } 5367 5368 static const FPScalar f_scalar_fnmul = { 5369 gen_fnmul_h, 5370 gen_fnmul_s, 5371 gen_fnmul_d, 5372 }; 5373 static const FPScalar f_scalar_ah_fnmul = { 5374 gen_fnmul_ah_h, 5375 gen_fnmul_ah_s, 5376 gen_fnmul_ah_d, 5377 }; 5378 TRANS(FNMUL_s, do_fp3_scalar_2fn, a, &f_scalar_fnmul, &f_scalar_ah_fnmul, a->rn) 5379 5380 static const FPScalar f_scalar_fcmeq = { 5381 gen_helper_advsimd_ceq_f16, 5382 gen_helper_neon_ceq_f32, 5383 gen_helper_neon_ceq_f64, 5384 }; 5385 TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq, a->rm) 5386 5387 static const FPScalar f_scalar_fcmge = { 5388 gen_helper_advsimd_cge_f16, 5389 gen_helper_neon_cge_f32, 5390 gen_helper_neon_cge_f64, 5391 }; 5392 TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge, a->rm) 5393 5394 static const FPScalar f_scalar_fcmgt = { 5395 gen_helper_advsimd_cgt_f16, 5396 gen_helper_neon_cgt_f32, 5397 gen_helper_neon_cgt_f64, 5398 }; 5399 TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt, a->rm) 5400 5401 static const FPScalar f_scalar_facge = { 5402 gen_helper_advsimd_acge_f16, 5403 gen_helper_neon_acge_f32, 5404 gen_helper_neon_acge_f64, 5405 }; 5406 TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge, a->rm) 5407 5408 static const FPScalar f_scalar_facgt = { 5409 gen_helper_advsimd_acgt_f16, 5410 gen_helper_neon_acgt_f32, 5411 gen_helper_neon_acgt_f64, 5412 }; 5413 TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt, a->rm) 5414 5415 static void gen_fabd_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5416 { 5417 gen_helper_vfp_subh(d, n, m, s); 5418 gen_vfp_absh(d, d); 5419 } 5420 5421 static void gen_fabd_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5422 { 5423 gen_helper_vfp_subs(d, n, m, s); 5424 gen_vfp_abss(d, d); 5425 } 5426 5427 static void gen_fabd_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) 5428 { 5429 gen_helper_vfp_subd(d, n, m, s); 5430 gen_vfp_absd(d, d); 5431 } 5432 5433 static void gen_fabd_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5434 { 5435 gen_helper_vfp_subh(d, n, m, s); 5436 gen_vfp_ah_absh(d, d); 5437 } 5438 5439 static void gen_fabd_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5440 { 5441 gen_helper_vfp_subs(d, n, m, s); 5442 gen_vfp_ah_abss(d, d); 5443 } 5444 5445 static void gen_fabd_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) 5446 { 5447 gen_helper_vfp_subd(d, n, m, s); 5448 gen_vfp_ah_absd(d, d); 5449 } 5450 5451 static const FPScalar f_scalar_fabd = { 5452 gen_fabd_h, 5453 gen_fabd_s, 5454 gen_fabd_d, 5455 }; 5456 static const FPScalar f_scalar_ah_fabd = { 5457 gen_fabd_ah_h, 5458 gen_fabd_ah_s, 5459 gen_fabd_ah_d, 5460 }; 5461 TRANS(FABD_s, do_fp3_scalar_2fn, a, &f_scalar_fabd, &f_scalar_ah_fabd, a->rn) 5462 5463 static const FPScalar f_scalar_frecps = { 5464 gen_helper_recpsf_f16, 5465 gen_helper_recpsf_f32, 5466 gen_helper_recpsf_f64, 5467 }; 5468 static const FPScalar f_scalar_ah_frecps = { 5469 gen_helper_recpsf_ah_f16, 5470 gen_helper_recpsf_ah_f32, 5471 gen_helper_recpsf_ah_f64, 5472 }; 5473 TRANS(FRECPS_s, do_fp3_scalar_ah_2fn, a, 5474 &f_scalar_frecps, &f_scalar_ah_frecps, a->rn) 5475 5476 static const FPScalar f_scalar_frsqrts = { 5477 gen_helper_rsqrtsf_f16, 5478 gen_helper_rsqrtsf_f32, 5479 gen_helper_rsqrtsf_f64, 5480 }; 5481 static const FPScalar f_scalar_ah_frsqrts = { 5482 gen_helper_rsqrtsf_ah_f16, 5483 gen_helper_rsqrtsf_ah_f32, 5484 gen_helper_rsqrtsf_ah_f64, 5485 }; 5486 TRANS(FRSQRTS_s, do_fp3_scalar_ah_2fn, a, 5487 &f_scalar_frsqrts, &f_scalar_ah_frsqrts, a->rn) 5488 5489 static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a, 5490 const FPScalar *f, bool swap) 5491 { 5492 switch (a->esz) { 5493 case MO_64: 5494 if (fp_access_check(s)) { 5495 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 5496 TCGv_i64 t1 = tcg_constant_i64(0); 5497 if (swap) { 5498 f->gen_d(t0, t1, t0, fpstatus_ptr(FPST_A64)); 5499 } else { 5500 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); 5501 } 5502 write_fp_dreg(s, a->rd, t0); 5503 } 5504 break; 5505 case MO_32: 5506 if (fp_access_check(s)) { 5507 TCGv_i32 t0 = read_fp_sreg(s, a->rn); 5508 TCGv_i32 t1 = tcg_constant_i32(0); 5509 if (swap) { 5510 f->gen_s(t0, t1, t0, fpstatus_ptr(FPST_A64)); 5511 } else { 5512 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); 5513 } 5514 write_fp_sreg(s, a->rd, t0); 5515 } 5516 break; 5517 case MO_16: 5518 if (!dc_isar_feature(aa64_fp16, s)) { 5519 return false; 5520 } 5521 if (fp_access_check(s)) { 5522 TCGv_i32 t0 = read_fp_hreg(s, a->rn); 5523 TCGv_i32 t1 = tcg_constant_i32(0); 5524 if (swap) { 5525 f->gen_h(t0, t1, t0, fpstatus_ptr(FPST_A64_F16)); 5526 } else { 5527 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); 5528 } 5529 write_fp_sreg(s, a->rd, t0); 5530 } 5531 break; 5532 default: 5533 return false; 5534 } 5535 return true; 5536 } 5537 5538 TRANS(FCMEQ0_s, do_fcmp0_s, a, &f_scalar_fcmeq, false) 5539 TRANS(FCMGT0_s, do_fcmp0_s, a, &f_scalar_fcmgt, false) 5540 TRANS(FCMGE0_s, do_fcmp0_s, a, &f_scalar_fcmge, false) 5541 TRANS(FCMLT0_s, do_fcmp0_s, a, &f_scalar_fcmgt, true) 5542 TRANS(FCMLE0_s, do_fcmp0_s, a, &f_scalar_fcmge, true) 5543 5544 static bool do_satacc_s(DisasContext *s, arg_rrr_e *a, 5545 MemOp sgn_n, MemOp sgn_m, 5546 void (*gen_bhs)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64, MemOp), 5547 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64)) 5548 { 5549 TCGv_i64 t0, t1, t2, qc; 5550 MemOp esz = a->esz; 5551 5552 if (!fp_access_check(s)) { 5553 return true; 5554 } 5555 5556 t0 = tcg_temp_new_i64(); 5557 t1 = tcg_temp_new_i64(); 5558 t2 = tcg_temp_new_i64(); 5559 qc = tcg_temp_new_i64(); 5560 read_vec_element(s, t1, a->rn, 0, esz | sgn_n); 5561 read_vec_element(s, t2, a->rm, 0, esz | sgn_m); 5562 tcg_gen_ld_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc)); 5563 5564 if (esz == MO_64) { 5565 gen_d(t0, qc, t1, t2); 5566 } else { 5567 gen_bhs(t0, qc, t1, t2, esz); 5568 tcg_gen_ext_i64(t0, t0, esz); 5569 } 5570 5571 write_fp_dreg(s, a->rd, t0); 5572 tcg_gen_st_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc)); 5573 return true; 5574 } 5575 5576 TRANS(SQADD_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqadd_bhs, gen_sqadd_d) 5577 TRANS(SQSUB_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqsub_bhs, gen_sqsub_d) 5578 TRANS(UQADD_s, do_satacc_s, a, 0, 0, gen_uqadd_bhs, gen_uqadd_d) 5579 TRANS(UQSUB_s, do_satacc_s, a, 0, 0, gen_uqsub_bhs, gen_uqsub_d) 5580 TRANS(SUQADD_s, do_satacc_s, a, MO_SIGN, 0, gen_suqadd_bhs, gen_suqadd_d) 5581 TRANS(USQADD_s, do_satacc_s, a, 0, MO_SIGN, gen_usqadd_bhs, gen_usqadd_d) 5582 5583 static bool do_int3_scalar_d(DisasContext *s, arg_rrr_e *a, 5584 void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64)) 5585 { 5586 if (fp_access_check(s)) { 5587 TCGv_i64 t0 = tcg_temp_new_i64(); 5588 TCGv_i64 t1 = tcg_temp_new_i64(); 5589 5590 read_vec_element(s, t0, a->rn, 0, MO_64); 5591 read_vec_element(s, t1, a->rm, 0, MO_64); 5592 fn(t0, t0, t1); 5593 write_fp_dreg(s, a->rd, t0); 5594 } 5595 return true; 5596 } 5597 5598 TRANS(SSHL_s, do_int3_scalar_d, a, gen_sshl_i64) 5599 TRANS(USHL_s, do_int3_scalar_d, a, gen_ushl_i64) 5600 TRANS(SRSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_s64) 5601 TRANS(URSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_u64) 5602 TRANS(ADD_s, do_int3_scalar_d, a, tcg_gen_add_i64) 5603 TRANS(SUB_s, do_int3_scalar_d, a, tcg_gen_sub_i64) 5604 5605 typedef struct ENVScalar2 { 5606 NeonGenTwoOpEnvFn *gen_bhs[3]; 5607 NeonGenTwo64OpEnvFn *gen_d; 5608 } ENVScalar2; 5609 5610 static bool do_env_scalar2(DisasContext *s, arg_rrr_e *a, const ENVScalar2 *f) 5611 { 5612 if (!fp_access_check(s)) { 5613 return true; 5614 } 5615 if (a->esz == MO_64) { 5616 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 5617 TCGv_i64 t1 = read_fp_dreg(s, a->rm); 5618 f->gen_d(t0, tcg_env, t0, t1); 5619 write_fp_dreg(s, a->rd, t0); 5620 } else { 5621 TCGv_i32 t0 = tcg_temp_new_i32(); 5622 TCGv_i32 t1 = tcg_temp_new_i32(); 5623 5624 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 5625 read_vec_element_i32(s, t1, a->rm, 0, a->esz); 5626 f->gen_bhs[a->esz](t0, tcg_env, t0, t1); 5627 write_fp_sreg(s, a->rd, t0); 5628 } 5629 return true; 5630 } 5631 5632 static const ENVScalar2 f_scalar_sqshl = { 5633 { gen_helper_neon_qshl_s8, 5634 gen_helper_neon_qshl_s16, 5635 gen_helper_neon_qshl_s32 }, 5636 gen_helper_neon_qshl_s64, 5637 }; 5638 TRANS(SQSHL_s, do_env_scalar2, a, &f_scalar_sqshl) 5639 5640 static const ENVScalar2 f_scalar_uqshl = { 5641 { gen_helper_neon_qshl_u8, 5642 gen_helper_neon_qshl_u16, 5643 gen_helper_neon_qshl_u32 }, 5644 gen_helper_neon_qshl_u64, 5645 }; 5646 TRANS(UQSHL_s, do_env_scalar2, a, &f_scalar_uqshl) 5647 5648 static const ENVScalar2 f_scalar_sqrshl = { 5649 { gen_helper_neon_qrshl_s8, 5650 gen_helper_neon_qrshl_s16, 5651 gen_helper_neon_qrshl_s32 }, 5652 gen_helper_neon_qrshl_s64, 5653 }; 5654 TRANS(SQRSHL_s, do_env_scalar2, a, &f_scalar_sqrshl) 5655 5656 static const ENVScalar2 f_scalar_uqrshl = { 5657 { gen_helper_neon_qrshl_u8, 5658 gen_helper_neon_qrshl_u16, 5659 gen_helper_neon_qrshl_u32 }, 5660 gen_helper_neon_qrshl_u64, 5661 }; 5662 TRANS(UQRSHL_s, do_env_scalar2, a, &f_scalar_uqrshl) 5663 5664 static bool do_env_scalar2_hs(DisasContext *s, arg_rrr_e *a, 5665 const ENVScalar2 *f) 5666 { 5667 if (a->esz == MO_16 || a->esz == MO_32) { 5668 return do_env_scalar2(s, a, f); 5669 } 5670 return false; 5671 } 5672 5673 static const ENVScalar2 f_scalar_sqdmulh = { 5674 { NULL, gen_helper_neon_qdmulh_s16, gen_helper_neon_qdmulh_s32 } 5675 }; 5676 TRANS(SQDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqdmulh) 5677 5678 static const ENVScalar2 f_scalar_sqrdmulh = { 5679 { NULL, gen_helper_neon_qrdmulh_s16, gen_helper_neon_qrdmulh_s32 } 5680 }; 5681 TRANS(SQRDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqrdmulh) 5682 5683 typedef struct ENVScalar3 { 5684 NeonGenThreeOpEnvFn *gen_hs[2]; 5685 } ENVScalar3; 5686 5687 static bool do_env_scalar3_hs(DisasContext *s, arg_rrr_e *a, 5688 const ENVScalar3 *f) 5689 { 5690 TCGv_i32 t0, t1, t2; 5691 5692 if (a->esz != MO_16 && a->esz != MO_32) { 5693 return false; 5694 } 5695 if (!fp_access_check(s)) { 5696 return true; 5697 } 5698 5699 t0 = tcg_temp_new_i32(); 5700 t1 = tcg_temp_new_i32(); 5701 t2 = tcg_temp_new_i32(); 5702 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 5703 read_vec_element_i32(s, t1, a->rm, 0, a->esz); 5704 read_vec_element_i32(s, t2, a->rd, 0, a->esz); 5705 f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2); 5706 write_fp_sreg(s, a->rd, t0); 5707 return true; 5708 } 5709 5710 static const ENVScalar3 f_scalar_sqrdmlah = { 5711 { gen_helper_neon_qrdmlah_s16, gen_helper_neon_qrdmlah_s32 } 5712 }; 5713 TRANS_FEAT(SQRDMLAH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlah) 5714 5715 static const ENVScalar3 f_scalar_sqrdmlsh = { 5716 { gen_helper_neon_qrdmlsh_s16, gen_helper_neon_qrdmlsh_s32 } 5717 }; 5718 TRANS_FEAT(SQRDMLSH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlsh) 5719 5720 static bool do_cmop_d(DisasContext *s, arg_rrr_e *a, TCGCond cond) 5721 { 5722 if (fp_access_check(s)) { 5723 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 5724 TCGv_i64 t1 = read_fp_dreg(s, a->rm); 5725 tcg_gen_negsetcond_i64(cond, t0, t0, t1); 5726 write_fp_dreg(s, a->rd, t0); 5727 } 5728 return true; 5729 } 5730 5731 TRANS(CMGT_s, do_cmop_d, a, TCG_COND_GT) 5732 TRANS(CMHI_s, do_cmop_d, a, TCG_COND_GTU) 5733 TRANS(CMGE_s, do_cmop_d, a, TCG_COND_GE) 5734 TRANS(CMHS_s, do_cmop_d, a, TCG_COND_GEU) 5735 TRANS(CMEQ_s, do_cmop_d, a, TCG_COND_EQ) 5736 TRANS(CMTST_s, do_cmop_d, a, TCG_COND_TSTNE) 5737 5738 static bool do_fp3_vector_with_fpsttype(DisasContext *s, arg_qrrr_e *a, 5739 int data, 5740 gen_helper_gvec_3_ptr * const fns[3], 5741 ARMFPStatusFlavour fpsttype) 5742 { 5743 MemOp esz = a->esz; 5744 int check = fp_access_check_vector_hsd(s, a->q, esz); 5745 5746 if (check <= 0) { 5747 return check == 0; 5748 } 5749 5750 gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, fpsttype, 5751 data, fns[esz - 1]); 5752 return true; 5753 } 5754 5755 static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data, 5756 gen_helper_gvec_3_ptr * const fns[3]) 5757 { 5758 return do_fp3_vector_with_fpsttype(s, a, data, fns, 5759 a->esz == MO_16 ? 5760 FPST_A64_F16 : FPST_A64); 5761 } 5762 5763 static bool do_fp3_vector_2fn(DisasContext *s, arg_qrrr_e *a, int data, 5764 gen_helper_gvec_3_ptr * const fnormal[3], 5765 gen_helper_gvec_3_ptr * const fah[3]) 5766 { 5767 return do_fp3_vector(s, a, data, s->fpcr_ah ? fah : fnormal); 5768 } 5769 5770 static bool do_fp3_vector_ah_2fn(DisasContext *s, arg_qrrr_e *a, int data, 5771 gen_helper_gvec_3_ptr * const fnormal[3], 5772 gen_helper_gvec_3_ptr * const fah[3]) 5773 { 5774 return do_fp3_vector_with_fpsttype(s, a, data, s->fpcr_ah ? fah : fnormal, 5775 select_ah_fpst(s, a->esz)); 5776 } 5777 5778 static gen_helper_gvec_3_ptr * const f_vector_fadd[3] = { 5779 gen_helper_gvec_fadd_h, 5780 gen_helper_gvec_fadd_s, 5781 gen_helper_gvec_fadd_d, 5782 }; 5783 TRANS(FADD_v, do_fp3_vector, a, 0, f_vector_fadd) 5784 5785 static gen_helper_gvec_3_ptr * const f_vector_fsub[3] = { 5786 gen_helper_gvec_fsub_h, 5787 gen_helper_gvec_fsub_s, 5788 gen_helper_gvec_fsub_d, 5789 }; 5790 TRANS(FSUB_v, do_fp3_vector, a, 0, f_vector_fsub) 5791 5792 static gen_helper_gvec_3_ptr * const f_vector_fdiv[3] = { 5793 gen_helper_gvec_fdiv_h, 5794 gen_helper_gvec_fdiv_s, 5795 gen_helper_gvec_fdiv_d, 5796 }; 5797 TRANS(FDIV_v, do_fp3_vector, a, 0, f_vector_fdiv) 5798 5799 static gen_helper_gvec_3_ptr * const f_vector_fmul[3] = { 5800 gen_helper_gvec_fmul_h, 5801 gen_helper_gvec_fmul_s, 5802 gen_helper_gvec_fmul_d, 5803 }; 5804 TRANS(FMUL_v, do_fp3_vector, a, 0, f_vector_fmul) 5805 5806 static gen_helper_gvec_3_ptr * const f_vector_fmax[3] = { 5807 gen_helper_gvec_fmax_h, 5808 gen_helper_gvec_fmax_s, 5809 gen_helper_gvec_fmax_d, 5810 }; 5811 static gen_helper_gvec_3_ptr * const f_vector_fmax_ah[3] = { 5812 gen_helper_gvec_ah_fmax_h, 5813 gen_helper_gvec_ah_fmax_s, 5814 gen_helper_gvec_ah_fmax_d, 5815 }; 5816 TRANS(FMAX_v, do_fp3_vector_2fn, a, 0, f_vector_fmax, f_vector_fmax_ah) 5817 5818 static gen_helper_gvec_3_ptr * const f_vector_fmin[3] = { 5819 gen_helper_gvec_fmin_h, 5820 gen_helper_gvec_fmin_s, 5821 gen_helper_gvec_fmin_d, 5822 }; 5823 static gen_helper_gvec_3_ptr * const f_vector_fmin_ah[3] = { 5824 gen_helper_gvec_ah_fmin_h, 5825 gen_helper_gvec_ah_fmin_s, 5826 gen_helper_gvec_ah_fmin_d, 5827 }; 5828 TRANS(FMIN_v, do_fp3_vector_2fn, a, 0, f_vector_fmin, f_vector_fmin_ah) 5829 5830 static gen_helper_gvec_3_ptr * const f_vector_fmaxnm[3] = { 5831 gen_helper_gvec_fmaxnum_h, 5832 gen_helper_gvec_fmaxnum_s, 5833 gen_helper_gvec_fmaxnum_d, 5834 }; 5835 TRANS(FMAXNM_v, do_fp3_vector, a, 0, f_vector_fmaxnm) 5836 5837 static gen_helper_gvec_3_ptr * const f_vector_fminnm[3] = { 5838 gen_helper_gvec_fminnum_h, 5839 gen_helper_gvec_fminnum_s, 5840 gen_helper_gvec_fminnum_d, 5841 }; 5842 TRANS(FMINNM_v, do_fp3_vector, a, 0, f_vector_fminnm) 5843 5844 static gen_helper_gvec_3_ptr * const f_vector_fmulx[3] = { 5845 gen_helper_gvec_fmulx_h, 5846 gen_helper_gvec_fmulx_s, 5847 gen_helper_gvec_fmulx_d, 5848 }; 5849 TRANS(FMULX_v, do_fp3_vector, a, 0, f_vector_fmulx) 5850 5851 static gen_helper_gvec_3_ptr * const f_vector_fmla[3] = { 5852 gen_helper_gvec_vfma_h, 5853 gen_helper_gvec_vfma_s, 5854 gen_helper_gvec_vfma_d, 5855 }; 5856 TRANS(FMLA_v, do_fp3_vector, a, 0, f_vector_fmla) 5857 5858 static gen_helper_gvec_3_ptr * const f_vector_fmls[3] = { 5859 gen_helper_gvec_vfms_h, 5860 gen_helper_gvec_vfms_s, 5861 gen_helper_gvec_vfms_d, 5862 }; 5863 static gen_helper_gvec_3_ptr * const f_vector_fmls_ah[3] = { 5864 gen_helper_gvec_ah_vfms_h, 5865 gen_helper_gvec_ah_vfms_s, 5866 gen_helper_gvec_ah_vfms_d, 5867 }; 5868 TRANS(FMLS_v, do_fp3_vector_2fn, a, 0, f_vector_fmls, f_vector_fmls_ah) 5869 5870 static gen_helper_gvec_3_ptr * const f_vector_fcmeq[3] = { 5871 gen_helper_gvec_fceq_h, 5872 gen_helper_gvec_fceq_s, 5873 gen_helper_gvec_fceq_d, 5874 }; 5875 TRANS(FCMEQ_v, do_fp3_vector, a, 0, f_vector_fcmeq) 5876 5877 static gen_helper_gvec_3_ptr * const f_vector_fcmge[3] = { 5878 gen_helper_gvec_fcge_h, 5879 gen_helper_gvec_fcge_s, 5880 gen_helper_gvec_fcge_d, 5881 }; 5882 TRANS(FCMGE_v, do_fp3_vector, a, 0, f_vector_fcmge) 5883 5884 static gen_helper_gvec_3_ptr * const f_vector_fcmgt[3] = { 5885 gen_helper_gvec_fcgt_h, 5886 gen_helper_gvec_fcgt_s, 5887 gen_helper_gvec_fcgt_d, 5888 }; 5889 TRANS(FCMGT_v, do_fp3_vector, a, 0, f_vector_fcmgt) 5890 5891 static gen_helper_gvec_3_ptr * const f_vector_facge[3] = { 5892 gen_helper_gvec_facge_h, 5893 gen_helper_gvec_facge_s, 5894 gen_helper_gvec_facge_d, 5895 }; 5896 TRANS(FACGE_v, do_fp3_vector, a, 0, f_vector_facge) 5897 5898 static gen_helper_gvec_3_ptr * const f_vector_facgt[3] = { 5899 gen_helper_gvec_facgt_h, 5900 gen_helper_gvec_facgt_s, 5901 gen_helper_gvec_facgt_d, 5902 }; 5903 TRANS(FACGT_v, do_fp3_vector, a, 0, f_vector_facgt) 5904 5905 static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = { 5906 gen_helper_gvec_fabd_h, 5907 gen_helper_gvec_fabd_s, 5908 gen_helper_gvec_fabd_d, 5909 }; 5910 static gen_helper_gvec_3_ptr * const f_vector_ah_fabd[3] = { 5911 gen_helper_gvec_ah_fabd_h, 5912 gen_helper_gvec_ah_fabd_s, 5913 gen_helper_gvec_ah_fabd_d, 5914 }; 5915 TRANS(FABD_v, do_fp3_vector_2fn, a, 0, f_vector_fabd, f_vector_ah_fabd) 5916 5917 static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = { 5918 gen_helper_gvec_recps_h, 5919 gen_helper_gvec_recps_s, 5920 gen_helper_gvec_recps_d, 5921 }; 5922 static gen_helper_gvec_3_ptr * const f_vector_ah_frecps[3] = { 5923 gen_helper_gvec_ah_recps_h, 5924 gen_helper_gvec_ah_recps_s, 5925 gen_helper_gvec_ah_recps_d, 5926 }; 5927 TRANS(FRECPS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frecps, f_vector_ah_frecps) 5928 5929 static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = { 5930 gen_helper_gvec_rsqrts_h, 5931 gen_helper_gvec_rsqrts_s, 5932 gen_helper_gvec_rsqrts_d, 5933 }; 5934 static gen_helper_gvec_3_ptr * const f_vector_ah_frsqrts[3] = { 5935 gen_helper_gvec_ah_rsqrts_h, 5936 gen_helper_gvec_ah_rsqrts_s, 5937 gen_helper_gvec_ah_rsqrts_d, 5938 }; 5939 TRANS(FRSQRTS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frsqrts, f_vector_ah_frsqrts) 5940 5941 static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = { 5942 gen_helper_gvec_faddp_h, 5943 gen_helper_gvec_faddp_s, 5944 gen_helper_gvec_faddp_d, 5945 }; 5946 TRANS(FADDP_v, do_fp3_vector, a, 0, f_vector_faddp) 5947 5948 static gen_helper_gvec_3_ptr * const f_vector_fmaxp[3] = { 5949 gen_helper_gvec_fmaxp_h, 5950 gen_helper_gvec_fmaxp_s, 5951 gen_helper_gvec_fmaxp_d, 5952 }; 5953 static gen_helper_gvec_3_ptr * const f_vector_ah_fmaxp[3] = { 5954 gen_helper_gvec_ah_fmaxp_h, 5955 gen_helper_gvec_ah_fmaxp_s, 5956 gen_helper_gvec_ah_fmaxp_d, 5957 }; 5958 TRANS(FMAXP_v, do_fp3_vector_2fn, a, 0, f_vector_fmaxp, f_vector_ah_fmaxp) 5959 5960 static gen_helper_gvec_3_ptr * const f_vector_fminp[3] = { 5961 gen_helper_gvec_fminp_h, 5962 gen_helper_gvec_fminp_s, 5963 gen_helper_gvec_fminp_d, 5964 }; 5965 static gen_helper_gvec_3_ptr * const f_vector_ah_fminp[3] = { 5966 gen_helper_gvec_ah_fminp_h, 5967 gen_helper_gvec_ah_fminp_s, 5968 gen_helper_gvec_ah_fminp_d, 5969 }; 5970 TRANS(FMINP_v, do_fp3_vector_2fn, a, 0, f_vector_fminp, f_vector_ah_fminp) 5971 5972 static gen_helper_gvec_3_ptr * const f_vector_fmaxnmp[3] = { 5973 gen_helper_gvec_fmaxnump_h, 5974 gen_helper_gvec_fmaxnump_s, 5975 gen_helper_gvec_fmaxnump_d, 5976 }; 5977 TRANS(FMAXNMP_v, do_fp3_vector, a, 0, f_vector_fmaxnmp) 5978 5979 static gen_helper_gvec_3_ptr * const f_vector_fminnmp[3] = { 5980 gen_helper_gvec_fminnump_h, 5981 gen_helper_gvec_fminnump_s, 5982 gen_helper_gvec_fminnump_d, 5983 }; 5984 TRANS(FMINNMP_v, do_fp3_vector, a, 0, f_vector_fminnmp) 5985 5986 static bool do_fmlal(DisasContext *s, arg_qrrr_e *a, bool is_s, bool is_2) 5987 { 5988 if (fp_access_check(s)) { 5989 int data = (is_2 << 1) | is_s; 5990 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), 5991 vec_full_reg_offset(s, a->rn), 5992 vec_full_reg_offset(s, a->rm), tcg_env, 5993 a->q ? 16 : 8, vec_full_reg_size(s), 5994 data, gen_helper_gvec_fmlal_a64); 5995 } 5996 return true; 5997 } 5998 5999 TRANS_FEAT(FMLAL_v, aa64_fhm, do_fmlal, a, false, false) 6000 TRANS_FEAT(FMLSL_v, aa64_fhm, do_fmlal, a, true, false) 6001 TRANS_FEAT(FMLAL2_v, aa64_fhm, do_fmlal, a, false, true) 6002 TRANS_FEAT(FMLSL2_v, aa64_fhm, do_fmlal, a, true, true) 6003 6004 TRANS(ADDP_v, do_gvec_fn3, a, gen_gvec_addp) 6005 TRANS(SMAXP_v, do_gvec_fn3_no64, a, gen_gvec_smaxp) 6006 TRANS(SMINP_v, do_gvec_fn3_no64, a, gen_gvec_sminp) 6007 TRANS(UMAXP_v, do_gvec_fn3_no64, a, gen_gvec_umaxp) 6008 TRANS(UMINP_v, do_gvec_fn3_no64, a, gen_gvec_uminp) 6009 6010 TRANS(AND_v, do_gvec_fn3, a, tcg_gen_gvec_and) 6011 TRANS(BIC_v, do_gvec_fn3, a, tcg_gen_gvec_andc) 6012 TRANS(ORR_v, do_gvec_fn3, a, tcg_gen_gvec_or) 6013 TRANS(ORN_v, do_gvec_fn3, a, tcg_gen_gvec_orc) 6014 TRANS(EOR_v, do_gvec_fn3, a, tcg_gen_gvec_xor) 6015 6016 static bool do_bitsel(DisasContext *s, bool is_q, int d, int a, int b, int c) 6017 { 6018 if (fp_access_check(s)) { 6019 gen_gvec_fn4(s, is_q, d, a, b, c, tcg_gen_gvec_bitsel, 0); 6020 } 6021 return true; 6022 } 6023 6024 TRANS(BSL_v, do_bitsel, a->q, a->rd, a->rd, a->rn, a->rm) 6025 TRANS(BIT_v, do_bitsel, a->q, a->rd, a->rm, a->rn, a->rd) 6026 TRANS(BIF_v, do_bitsel, a->q, a->rd, a->rm, a->rd, a->rn) 6027 6028 TRANS(SQADD_v, do_gvec_fn3, a, gen_gvec_sqadd_qc) 6029 TRANS(UQADD_v, do_gvec_fn3, a, gen_gvec_uqadd_qc) 6030 TRANS(SQSUB_v, do_gvec_fn3, a, gen_gvec_sqsub_qc) 6031 TRANS(UQSUB_v, do_gvec_fn3, a, gen_gvec_uqsub_qc) 6032 TRANS(SUQADD_v, do_gvec_fn3, a, gen_gvec_suqadd_qc) 6033 TRANS(USQADD_v, do_gvec_fn3, a, gen_gvec_usqadd_qc) 6034 6035 TRANS(SSHL_v, do_gvec_fn3, a, gen_gvec_sshl) 6036 TRANS(USHL_v, do_gvec_fn3, a, gen_gvec_ushl) 6037 TRANS(SRSHL_v, do_gvec_fn3, a, gen_gvec_srshl) 6038 TRANS(URSHL_v, do_gvec_fn3, a, gen_gvec_urshl) 6039 TRANS(SQSHL_v, do_gvec_fn3, a, gen_neon_sqshl) 6040 TRANS(UQSHL_v, do_gvec_fn3, a, gen_neon_uqshl) 6041 TRANS(SQRSHL_v, do_gvec_fn3, a, gen_neon_sqrshl) 6042 TRANS(UQRSHL_v, do_gvec_fn3, a, gen_neon_uqrshl) 6043 6044 TRANS(ADD_v, do_gvec_fn3, a, tcg_gen_gvec_add) 6045 TRANS(SUB_v, do_gvec_fn3, a, tcg_gen_gvec_sub) 6046 TRANS(SHADD_v, do_gvec_fn3_no64, a, gen_gvec_shadd) 6047 TRANS(UHADD_v, do_gvec_fn3_no64, a, gen_gvec_uhadd) 6048 TRANS(SHSUB_v, do_gvec_fn3_no64, a, gen_gvec_shsub) 6049 TRANS(UHSUB_v, do_gvec_fn3_no64, a, gen_gvec_uhsub) 6050 TRANS(SRHADD_v, do_gvec_fn3_no64, a, gen_gvec_srhadd) 6051 TRANS(URHADD_v, do_gvec_fn3_no64, a, gen_gvec_urhadd) 6052 TRANS(SMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smax) 6053 TRANS(UMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umax) 6054 TRANS(SMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smin) 6055 TRANS(UMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umin) 6056 TRANS(SABA_v, do_gvec_fn3_no64, a, gen_gvec_saba) 6057 TRANS(UABA_v, do_gvec_fn3_no64, a, gen_gvec_uaba) 6058 TRANS(SABD_v, do_gvec_fn3_no64, a, gen_gvec_sabd) 6059 TRANS(UABD_v, do_gvec_fn3_no64, a, gen_gvec_uabd) 6060 TRANS(MUL_v, do_gvec_fn3_no64, a, tcg_gen_gvec_mul) 6061 TRANS(PMUL_v, do_gvec_op3_ool, a, 0, gen_helper_gvec_pmul_b) 6062 TRANS(MLA_v, do_gvec_fn3_no64, a, gen_gvec_mla) 6063 TRANS(MLS_v, do_gvec_fn3_no64, a, gen_gvec_mls) 6064 6065 static bool do_cmop_v(DisasContext *s, arg_qrrr_e *a, TCGCond cond) 6066 { 6067 if (a->esz == MO_64 && !a->q) { 6068 return false; 6069 } 6070 if (fp_access_check(s)) { 6071 tcg_gen_gvec_cmp(cond, a->esz, 6072 vec_full_reg_offset(s, a->rd), 6073 vec_full_reg_offset(s, a->rn), 6074 vec_full_reg_offset(s, a->rm), 6075 a->q ? 16 : 8, vec_full_reg_size(s)); 6076 } 6077 return true; 6078 } 6079 6080 TRANS(CMGT_v, do_cmop_v, a, TCG_COND_GT) 6081 TRANS(CMHI_v, do_cmop_v, a, TCG_COND_GTU) 6082 TRANS(CMGE_v, do_cmop_v, a, TCG_COND_GE) 6083 TRANS(CMHS_v, do_cmop_v, a, TCG_COND_GEU) 6084 TRANS(CMEQ_v, do_cmop_v, a, TCG_COND_EQ) 6085 TRANS(CMTST_v, do_gvec_fn3, a, gen_gvec_cmtst) 6086 6087 TRANS(SQDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqdmulh_qc) 6088 TRANS(SQRDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmulh_qc) 6089 TRANS_FEAT(SQRDMLAH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlah_qc) 6090 TRANS_FEAT(SQRDMLSH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlsh_qc) 6091 6092 static bool do_dot_vector(DisasContext *s, arg_qrrr_e *a, 6093 gen_helper_gvec_4 *fn) 6094 { 6095 if (fp_access_check(s)) { 6096 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn); 6097 } 6098 return true; 6099 } 6100 6101 static bool do_dot_vector_env(DisasContext *s, arg_qrrr_e *a, 6102 gen_helper_gvec_4_ptr *fn) 6103 { 6104 if (fp_access_check(s)) { 6105 gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn); 6106 } 6107 return true; 6108 } 6109 6110 TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_sdot_b) 6111 TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_b) 6112 TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_b) 6113 TRANS_FEAT(BFDOT_v, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfdot) 6114 TRANS_FEAT(BFMMLA, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfmmla) 6115 TRANS_FEAT(SMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_smmla_b) 6116 TRANS_FEAT(UMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_ummla_b) 6117 TRANS_FEAT(USMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usmmla_b) 6118 6119 static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a) 6120 { 6121 if (!dc_isar_feature(aa64_bf16, s)) { 6122 return false; 6123 } 6124 if (fp_access_check(s)) { 6125 /* Q bit selects BFMLALB vs BFMLALT. */ 6126 gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, 6127 s->fpcr_ah ? FPST_AH : FPST_A64, a->q, 6128 gen_helper_gvec_bfmlal); 6129 } 6130 return true; 6131 } 6132 6133 static gen_helper_gvec_3_ptr * const f_vector_fcadd[3] = { 6134 gen_helper_gvec_fcaddh, 6135 gen_helper_gvec_fcadds, 6136 gen_helper_gvec_fcaddd, 6137 }; 6138 /* 6139 * Encode FPCR.AH into the data so the helper knows whether the 6140 * negations it does should avoid flipping the sign bit on a NaN 6141 */ 6142 TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0 | (s->fpcr_ah << 1), 6143 f_vector_fcadd) 6144 TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1 | (s->fpcr_ah << 1), 6145 f_vector_fcadd) 6146 6147 static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a) 6148 { 6149 static gen_helper_gvec_4_ptr * const fn[] = { 6150 [MO_16] = gen_helper_gvec_fcmlah, 6151 [MO_32] = gen_helper_gvec_fcmlas, 6152 [MO_64] = gen_helper_gvec_fcmlad, 6153 }; 6154 int check; 6155 6156 if (!dc_isar_feature(aa64_fcma, s)) { 6157 return false; 6158 } 6159 6160 check = fp_access_check_vector_hsd(s, a->q, a->esz); 6161 if (check <= 0) { 6162 return check == 0; 6163 } 6164 6165 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, 6166 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, 6167 a->rot | (s->fpcr_ah << 2), fn[a->esz]); 6168 return true; 6169 } 6170 6171 /* 6172 * Widening vector x vector/indexed. 6173 * 6174 * These read from the top or bottom half of a 128-bit vector. 6175 * After widening, optionally accumulate with a 128-bit vector. 6176 * Implement these inline, as the number of elements are limited 6177 * and the related SVE and SME operations on larger vectors use 6178 * even/odd elements instead of top/bottom half. 6179 * 6180 * If idx >= 0, operand 2 is indexed, otherwise vector. 6181 * If acc, operand 0 is loaded with rd. 6182 */ 6183 6184 /* For low half, iterating up. */ 6185 static bool do_3op_widening(DisasContext *s, MemOp memop, int top, 6186 int rd, int rn, int rm, int idx, 6187 NeonGenTwo64OpFn *fn, bool acc) 6188 { 6189 TCGv_i64 tcg_op0 = tcg_temp_new_i64(); 6190 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 6191 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 6192 MemOp esz = memop & MO_SIZE; 6193 int half = 8 >> esz; 6194 int top_swap, top_half; 6195 6196 /* There are no 64x64->128 bit operations. */ 6197 if (esz >= MO_64) { 6198 return false; 6199 } 6200 if (!fp_access_check(s)) { 6201 return true; 6202 } 6203 6204 if (idx >= 0) { 6205 read_vec_element(s, tcg_op2, rm, idx, memop); 6206 } 6207 6208 /* 6209 * For top half inputs, iterate forward; backward for bottom half. 6210 * This means the store to the destination will not occur until 6211 * overlapping input inputs are consumed. 6212 * Use top_swap to conditionally invert the forward iteration index. 6213 */ 6214 top_swap = top ? 0 : half - 1; 6215 top_half = top ? half : 0; 6216 6217 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) { 6218 int elt = elt_fwd ^ top_swap; 6219 6220 read_vec_element(s, tcg_op1, rn, elt + top_half, memop); 6221 if (idx < 0) { 6222 read_vec_element(s, tcg_op2, rm, elt + top_half, memop); 6223 } 6224 if (acc) { 6225 read_vec_element(s, tcg_op0, rd, elt, memop + 1); 6226 } 6227 fn(tcg_op0, tcg_op1, tcg_op2); 6228 write_vec_element(s, tcg_op0, rd, elt, esz + 1); 6229 } 6230 clear_vec_high(s, 1, rd); 6231 return true; 6232 } 6233 6234 static void gen_muladd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6235 { 6236 TCGv_i64 t = tcg_temp_new_i64(); 6237 tcg_gen_mul_i64(t, n, m); 6238 tcg_gen_add_i64(d, d, t); 6239 } 6240 6241 static void gen_mulsub_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6242 { 6243 TCGv_i64 t = tcg_temp_new_i64(); 6244 tcg_gen_mul_i64(t, n, m); 6245 tcg_gen_sub_i64(d, d, t); 6246 } 6247 6248 TRANS(SMULL_v, do_3op_widening, 6249 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6250 tcg_gen_mul_i64, false) 6251 TRANS(UMULL_v, do_3op_widening, 6252 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6253 tcg_gen_mul_i64, false) 6254 TRANS(SMLAL_v, do_3op_widening, 6255 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6256 gen_muladd_i64, true) 6257 TRANS(UMLAL_v, do_3op_widening, 6258 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6259 gen_muladd_i64, true) 6260 TRANS(SMLSL_v, do_3op_widening, 6261 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6262 gen_mulsub_i64, true) 6263 TRANS(UMLSL_v, do_3op_widening, 6264 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6265 gen_mulsub_i64, true) 6266 6267 TRANS(SMULL_vi, do_3op_widening, 6268 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6269 tcg_gen_mul_i64, false) 6270 TRANS(UMULL_vi, do_3op_widening, 6271 a->esz, a->q, a->rd, a->rn, a->rm, a->idx, 6272 tcg_gen_mul_i64, false) 6273 TRANS(SMLAL_vi, do_3op_widening, 6274 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6275 gen_muladd_i64, true) 6276 TRANS(UMLAL_vi, do_3op_widening, 6277 a->esz, a->q, a->rd, a->rn, a->rm, a->idx, 6278 gen_muladd_i64, true) 6279 TRANS(SMLSL_vi, do_3op_widening, 6280 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6281 gen_mulsub_i64, true) 6282 TRANS(UMLSL_vi, do_3op_widening, 6283 a->esz, a->q, a->rd, a->rn, a->rm, a->idx, 6284 gen_mulsub_i64, true) 6285 6286 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6287 { 6288 TCGv_i64 t1 = tcg_temp_new_i64(); 6289 TCGv_i64 t2 = tcg_temp_new_i64(); 6290 6291 tcg_gen_sub_i64(t1, n, m); 6292 tcg_gen_sub_i64(t2, m, n); 6293 tcg_gen_movcond_i64(TCG_COND_GE, d, n, m, t1, t2); 6294 } 6295 6296 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6297 { 6298 TCGv_i64 t1 = tcg_temp_new_i64(); 6299 TCGv_i64 t2 = tcg_temp_new_i64(); 6300 6301 tcg_gen_sub_i64(t1, n, m); 6302 tcg_gen_sub_i64(t2, m, n); 6303 tcg_gen_movcond_i64(TCG_COND_GEU, d, n, m, t1, t2); 6304 } 6305 6306 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6307 { 6308 TCGv_i64 t = tcg_temp_new_i64(); 6309 gen_sabd_i64(t, n, m); 6310 tcg_gen_add_i64(d, d, t); 6311 } 6312 6313 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6314 { 6315 TCGv_i64 t = tcg_temp_new_i64(); 6316 gen_uabd_i64(t, n, m); 6317 tcg_gen_add_i64(d, d, t); 6318 } 6319 6320 TRANS(SADDL_v, do_3op_widening, 6321 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6322 tcg_gen_add_i64, false) 6323 TRANS(UADDL_v, do_3op_widening, 6324 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6325 tcg_gen_add_i64, false) 6326 TRANS(SSUBL_v, do_3op_widening, 6327 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6328 tcg_gen_sub_i64, false) 6329 TRANS(USUBL_v, do_3op_widening, 6330 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6331 tcg_gen_sub_i64, false) 6332 TRANS(SABDL_v, do_3op_widening, 6333 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6334 gen_sabd_i64, false) 6335 TRANS(UABDL_v, do_3op_widening, 6336 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6337 gen_uabd_i64, false) 6338 TRANS(SABAL_v, do_3op_widening, 6339 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6340 gen_saba_i64, true) 6341 TRANS(UABAL_v, do_3op_widening, 6342 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6343 gen_uaba_i64, true) 6344 6345 static void gen_sqdmull_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6346 { 6347 tcg_gen_mul_i64(d, n, m); 6348 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, d); 6349 } 6350 6351 static void gen_sqdmull_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6352 { 6353 tcg_gen_mul_i64(d, n, m); 6354 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, d); 6355 } 6356 6357 static void gen_sqdmlal_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6358 { 6359 TCGv_i64 t = tcg_temp_new_i64(); 6360 6361 tcg_gen_mul_i64(t, n, m); 6362 gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t); 6363 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t); 6364 } 6365 6366 static void gen_sqdmlal_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6367 { 6368 TCGv_i64 t = tcg_temp_new_i64(); 6369 6370 tcg_gen_mul_i64(t, n, m); 6371 gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t); 6372 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t); 6373 } 6374 6375 static void gen_sqdmlsl_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6376 { 6377 TCGv_i64 t = tcg_temp_new_i64(); 6378 6379 tcg_gen_mul_i64(t, n, m); 6380 gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t); 6381 tcg_gen_neg_i64(t, t); 6382 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t); 6383 } 6384 6385 static void gen_sqdmlsl_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6386 { 6387 TCGv_i64 t = tcg_temp_new_i64(); 6388 6389 tcg_gen_mul_i64(t, n, m); 6390 gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t); 6391 tcg_gen_neg_i64(t, t); 6392 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t); 6393 } 6394 6395 TRANS(SQDMULL_v, do_3op_widening, 6396 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6397 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false) 6398 TRANS(SQDMLAL_v, do_3op_widening, 6399 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6400 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true) 6401 TRANS(SQDMLSL_v, do_3op_widening, 6402 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6403 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true) 6404 6405 TRANS(SQDMULL_vi, do_3op_widening, 6406 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6407 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false) 6408 TRANS(SQDMLAL_vi, do_3op_widening, 6409 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6410 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true) 6411 TRANS(SQDMLSL_vi, do_3op_widening, 6412 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6413 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true) 6414 6415 static bool do_addsub_wide(DisasContext *s, arg_qrrr_e *a, 6416 MemOp sign, bool sub) 6417 { 6418 TCGv_i64 tcg_op0, tcg_op1; 6419 MemOp esz = a->esz; 6420 int half = 8 >> esz; 6421 bool top = a->q; 6422 int top_swap = top ? 0 : half - 1; 6423 int top_half = top ? half : 0; 6424 6425 /* There are no 64x64->128 bit operations. */ 6426 if (esz >= MO_64) { 6427 return false; 6428 } 6429 if (!fp_access_check(s)) { 6430 return true; 6431 } 6432 tcg_op0 = tcg_temp_new_i64(); 6433 tcg_op1 = tcg_temp_new_i64(); 6434 6435 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) { 6436 int elt = elt_fwd ^ top_swap; 6437 6438 read_vec_element(s, tcg_op1, a->rm, elt + top_half, esz | sign); 6439 read_vec_element(s, tcg_op0, a->rn, elt, esz + 1); 6440 if (sub) { 6441 tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1); 6442 } else { 6443 tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1); 6444 } 6445 write_vec_element(s, tcg_op0, a->rd, elt, esz + 1); 6446 } 6447 clear_vec_high(s, 1, a->rd); 6448 return true; 6449 } 6450 6451 TRANS(SADDW, do_addsub_wide, a, MO_SIGN, false) 6452 TRANS(UADDW, do_addsub_wide, a, 0, false) 6453 TRANS(SSUBW, do_addsub_wide, a, MO_SIGN, true) 6454 TRANS(USUBW, do_addsub_wide, a, 0, true) 6455 6456 static bool do_addsub_highnarrow(DisasContext *s, arg_qrrr_e *a, 6457 bool sub, bool round) 6458 { 6459 TCGv_i64 tcg_op0, tcg_op1; 6460 MemOp esz = a->esz; 6461 int half = 8 >> esz; 6462 bool top = a->q; 6463 int ebits = 8 << esz; 6464 uint64_t rbit = 1ull << (ebits - 1); 6465 int top_swap, top_half; 6466 6467 /* There are no 128x128->64 bit operations. */ 6468 if (esz >= MO_64) { 6469 return false; 6470 } 6471 if (!fp_access_check(s)) { 6472 return true; 6473 } 6474 tcg_op0 = tcg_temp_new_i64(); 6475 tcg_op1 = tcg_temp_new_i64(); 6476 6477 /* 6478 * For top half inputs, iterate backward; forward for bottom half. 6479 * This means the store to the destination will not occur until 6480 * overlapping input inputs are consumed. 6481 */ 6482 top_swap = top ? half - 1 : 0; 6483 top_half = top ? half : 0; 6484 6485 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) { 6486 int elt = elt_fwd ^ top_swap; 6487 6488 read_vec_element(s, tcg_op1, a->rm, elt, esz + 1); 6489 read_vec_element(s, tcg_op0, a->rn, elt, esz + 1); 6490 if (sub) { 6491 tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1); 6492 } else { 6493 tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1); 6494 } 6495 if (round) { 6496 tcg_gen_addi_i64(tcg_op0, tcg_op0, rbit); 6497 } 6498 tcg_gen_shri_i64(tcg_op0, tcg_op0, ebits); 6499 write_vec_element(s, tcg_op0, a->rd, elt + top_half, esz); 6500 } 6501 clear_vec_high(s, top, a->rd); 6502 return true; 6503 } 6504 6505 TRANS(ADDHN, do_addsub_highnarrow, a, false, false) 6506 TRANS(SUBHN, do_addsub_highnarrow, a, true, false) 6507 TRANS(RADDHN, do_addsub_highnarrow, a, false, true) 6508 TRANS(RSUBHN, do_addsub_highnarrow, a, true, true) 6509 6510 static bool do_pmull(DisasContext *s, arg_qrrr_e *a, gen_helper_gvec_3 *fn) 6511 { 6512 if (fp_access_check(s)) { 6513 /* The Q field specifies lo/hi half input for these insns. */ 6514 gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->q, fn); 6515 } 6516 return true; 6517 } 6518 6519 TRANS(PMULL_p8, do_pmull, a, gen_helper_neon_pmull_h) 6520 TRANS_FEAT(PMULL_p64, aa64_pmull, do_pmull, a, gen_helper_gvec_pmull_q) 6521 6522 /* 6523 * Advanced SIMD scalar/vector x indexed element 6524 */ 6525 6526 static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f) 6527 { 6528 switch (a->esz) { 6529 case MO_64: 6530 if (fp_access_check(s)) { 6531 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 6532 TCGv_i64 t1 = tcg_temp_new_i64(); 6533 6534 read_vec_element(s, t1, a->rm, a->idx, MO_64); 6535 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); 6536 write_fp_dreg_merging(s, a->rd, a->rn, t0); 6537 } 6538 break; 6539 case MO_32: 6540 if (fp_access_check(s)) { 6541 TCGv_i32 t0 = read_fp_sreg(s, a->rn); 6542 TCGv_i32 t1 = tcg_temp_new_i32(); 6543 6544 read_vec_element_i32(s, t1, a->rm, a->idx, MO_32); 6545 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); 6546 write_fp_sreg_merging(s, a->rd, a->rn, t0); 6547 } 6548 break; 6549 case MO_16: 6550 if (!dc_isar_feature(aa64_fp16, s)) { 6551 return false; 6552 } 6553 if (fp_access_check(s)) { 6554 TCGv_i32 t0 = read_fp_hreg(s, a->rn); 6555 TCGv_i32 t1 = tcg_temp_new_i32(); 6556 6557 read_vec_element_i32(s, t1, a->rm, a->idx, MO_16); 6558 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); 6559 write_fp_hreg_merging(s, a->rd, a->rn, t0); 6560 } 6561 break; 6562 default: 6563 g_assert_not_reached(); 6564 } 6565 return true; 6566 } 6567 6568 TRANS(FMUL_si, do_fp3_scalar_idx, a, &f_scalar_fmul) 6569 TRANS(FMULX_si, do_fp3_scalar_idx, a, &f_scalar_fmulx) 6570 6571 static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) 6572 { 6573 switch (a->esz) { 6574 case MO_64: 6575 if (fp_access_check(s)) { 6576 TCGv_i64 t0 = read_fp_dreg(s, a->rd); 6577 TCGv_i64 t1 = read_fp_dreg(s, a->rn); 6578 TCGv_i64 t2 = tcg_temp_new_i64(); 6579 6580 read_vec_element(s, t2, a->rm, a->idx, MO_64); 6581 if (neg) { 6582 gen_vfp_maybe_ah_negd(s, t1, t1); 6583 } 6584 gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); 6585 write_fp_dreg_merging(s, a->rd, a->rd, t0); 6586 } 6587 break; 6588 case MO_32: 6589 if (fp_access_check(s)) { 6590 TCGv_i32 t0 = read_fp_sreg(s, a->rd); 6591 TCGv_i32 t1 = read_fp_sreg(s, a->rn); 6592 TCGv_i32 t2 = tcg_temp_new_i32(); 6593 6594 read_vec_element_i32(s, t2, a->rm, a->idx, MO_32); 6595 if (neg) { 6596 gen_vfp_maybe_ah_negs(s, t1, t1); 6597 } 6598 gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); 6599 write_fp_sreg_merging(s, a->rd, a->rd, t0); 6600 } 6601 break; 6602 case MO_16: 6603 if (!dc_isar_feature(aa64_fp16, s)) { 6604 return false; 6605 } 6606 if (fp_access_check(s)) { 6607 TCGv_i32 t0 = read_fp_hreg(s, a->rd); 6608 TCGv_i32 t1 = read_fp_hreg(s, a->rn); 6609 TCGv_i32 t2 = tcg_temp_new_i32(); 6610 6611 read_vec_element_i32(s, t2, a->rm, a->idx, MO_16); 6612 if (neg) { 6613 gen_vfp_maybe_ah_negh(s, t1, t1); 6614 } 6615 gen_helper_advsimd_muladdh(t0, t1, t2, t0, 6616 fpstatus_ptr(FPST_A64_F16)); 6617 write_fp_hreg_merging(s, a->rd, a->rd, t0); 6618 } 6619 break; 6620 default: 6621 g_assert_not_reached(); 6622 } 6623 return true; 6624 } 6625 6626 TRANS(FMLA_si, do_fmla_scalar_idx, a, false) 6627 TRANS(FMLS_si, do_fmla_scalar_idx, a, true) 6628 6629 static bool do_env_scalar2_idx_hs(DisasContext *s, arg_rrx_e *a, 6630 const ENVScalar2 *f) 6631 { 6632 if (a->esz < MO_16 || a->esz > MO_32) { 6633 return false; 6634 } 6635 if (fp_access_check(s)) { 6636 TCGv_i32 t0 = tcg_temp_new_i32(); 6637 TCGv_i32 t1 = tcg_temp_new_i32(); 6638 6639 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 6640 read_vec_element_i32(s, t1, a->rm, a->idx, a->esz); 6641 f->gen_bhs[a->esz](t0, tcg_env, t0, t1); 6642 write_fp_sreg(s, a->rd, t0); 6643 } 6644 return true; 6645 } 6646 6647 TRANS(SQDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqdmulh) 6648 TRANS(SQRDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqrdmulh) 6649 6650 static bool do_env_scalar3_idx_hs(DisasContext *s, arg_rrx_e *a, 6651 const ENVScalar3 *f) 6652 { 6653 if (a->esz < MO_16 || a->esz > MO_32) { 6654 return false; 6655 } 6656 if (fp_access_check(s)) { 6657 TCGv_i32 t0 = tcg_temp_new_i32(); 6658 TCGv_i32 t1 = tcg_temp_new_i32(); 6659 TCGv_i32 t2 = tcg_temp_new_i32(); 6660 6661 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 6662 read_vec_element_i32(s, t1, a->rm, a->idx, a->esz); 6663 read_vec_element_i32(s, t2, a->rd, 0, a->esz); 6664 f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2); 6665 write_fp_sreg(s, a->rd, t0); 6666 } 6667 return true; 6668 } 6669 6670 TRANS_FEAT(SQRDMLAH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlah) 6671 TRANS_FEAT(SQRDMLSH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlsh) 6672 6673 static bool do_scalar_muladd_widening_idx(DisasContext *s, arg_rrx_e *a, 6674 NeonGenTwo64OpFn *fn, bool acc) 6675 { 6676 if (fp_access_check(s)) { 6677 TCGv_i64 t0 = tcg_temp_new_i64(); 6678 TCGv_i64 t1 = tcg_temp_new_i64(); 6679 TCGv_i64 t2 = tcg_temp_new_i64(); 6680 6681 if (acc) { 6682 read_vec_element(s, t0, a->rd, 0, a->esz + 1); 6683 } 6684 read_vec_element(s, t1, a->rn, 0, a->esz | MO_SIGN); 6685 read_vec_element(s, t2, a->rm, a->idx, a->esz | MO_SIGN); 6686 fn(t0, t1, t2); 6687 6688 /* Clear the whole register first, then store scalar. */ 6689 clear_vec(s, a->rd); 6690 write_vec_element(s, t0, a->rd, 0, a->esz + 1); 6691 } 6692 return true; 6693 } 6694 6695 TRANS(SQDMULL_si, do_scalar_muladd_widening_idx, a, 6696 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false) 6697 TRANS(SQDMLAL_si, do_scalar_muladd_widening_idx, a, 6698 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true) 6699 TRANS(SQDMLSL_si, do_scalar_muladd_widening_idx, a, 6700 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true) 6701 6702 static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a, 6703 gen_helper_gvec_3_ptr * const fns[3]) 6704 { 6705 MemOp esz = a->esz; 6706 int check = fp_access_check_vector_hsd(s, a->q, esz); 6707 6708 if (check <= 0) { 6709 return check == 0; 6710 } 6711 6712 gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, 6713 esz == MO_16 ? FPST_A64_F16 : FPST_A64, 6714 a->idx, fns[esz - 1]); 6715 return true; 6716 } 6717 6718 static gen_helper_gvec_3_ptr * const f_vector_idx_fmul[3] = { 6719 gen_helper_gvec_fmul_idx_h, 6720 gen_helper_gvec_fmul_idx_s, 6721 gen_helper_gvec_fmul_idx_d, 6722 }; 6723 TRANS(FMUL_vi, do_fp3_vector_idx, a, f_vector_idx_fmul) 6724 6725 static gen_helper_gvec_3_ptr * const f_vector_idx_fmulx[3] = { 6726 gen_helper_gvec_fmulx_idx_h, 6727 gen_helper_gvec_fmulx_idx_s, 6728 gen_helper_gvec_fmulx_idx_d, 6729 }; 6730 TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx) 6731 6732 static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg) 6733 { 6734 static gen_helper_gvec_4_ptr * const fns[3][3] = { 6735 { gen_helper_gvec_fmla_idx_h, 6736 gen_helper_gvec_fmla_idx_s, 6737 gen_helper_gvec_fmla_idx_d }, 6738 { gen_helper_gvec_fmls_idx_h, 6739 gen_helper_gvec_fmls_idx_s, 6740 gen_helper_gvec_fmls_idx_d }, 6741 { gen_helper_gvec_ah_fmls_idx_h, 6742 gen_helper_gvec_ah_fmls_idx_s, 6743 gen_helper_gvec_ah_fmls_idx_d }, 6744 }; 6745 MemOp esz = a->esz; 6746 int check = fp_access_check_vector_hsd(s, a->q, esz); 6747 6748 if (check <= 0) { 6749 return check == 0; 6750 } 6751 6752 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, 6753 esz == MO_16 ? FPST_A64_F16 : FPST_A64, 6754 a->idx, fns[neg ? 1 + s->fpcr_ah : 0][esz - 1]); 6755 return true; 6756 } 6757 6758 TRANS(FMLA_vi, do_fmla_vector_idx, a, false) 6759 TRANS(FMLS_vi, do_fmla_vector_idx, a, true) 6760 6761 static bool do_fmlal_idx(DisasContext *s, arg_qrrx_e *a, bool is_s, bool is_2) 6762 { 6763 if (fp_access_check(s)) { 6764 int data = (a->idx << 2) | (is_2 << 1) | is_s; 6765 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), 6766 vec_full_reg_offset(s, a->rn), 6767 vec_full_reg_offset(s, a->rm), tcg_env, 6768 a->q ? 16 : 8, vec_full_reg_size(s), 6769 data, gen_helper_gvec_fmlal_idx_a64); 6770 } 6771 return true; 6772 } 6773 6774 TRANS_FEAT(FMLAL_vi, aa64_fhm, do_fmlal_idx, a, false, false) 6775 TRANS_FEAT(FMLSL_vi, aa64_fhm, do_fmlal_idx, a, true, false) 6776 TRANS_FEAT(FMLAL2_vi, aa64_fhm, do_fmlal_idx, a, false, true) 6777 TRANS_FEAT(FMLSL2_vi, aa64_fhm, do_fmlal_idx, a, true, true) 6778 6779 static bool do_int3_vector_idx(DisasContext *s, arg_qrrx_e *a, 6780 gen_helper_gvec_3 * const fns[2]) 6781 { 6782 assert(a->esz == MO_16 || a->esz == MO_32); 6783 if (fp_access_check(s)) { 6784 gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, a->idx, fns[a->esz - 1]); 6785 } 6786 return true; 6787 } 6788 6789 static gen_helper_gvec_3 * const f_vector_idx_mul[2] = { 6790 gen_helper_gvec_mul_idx_h, 6791 gen_helper_gvec_mul_idx_s, 6792 }; 6793 TRANS(MUL_vi, do_int3_vector_idx, a, f_vector_idx_mul) 6794 6795 static bool do_mla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool sub) 6796 { 6797 static gen_helper_gvec_4 * const fns[2][2] = { 6798 { gen_helper_gvec_mla_idx_h, gen_helper_gvec_mls_idx_h }, 6799 { gen_helper_gvec_mla_idx_s, gen_helper_gvec_mls_idx_s }, 6800 }; 6801 6802 assert(a->esz == MO_16 || a->esz == MO_32); 6803 if (fp_access_check(s)) { 6804 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, 6805 a->idx, fns[a->esz - 1][sub]); 6806 } 6807 return true; 6808 } 6809 6810 TRANS(MLA_vi, do_mla_vector_idx, a, false) 6811 TRANS(MLS_vi, do_mla_vector_idx, a, true) 6812 6813 static bool do_int3_qc_vector_idx(DisasContext *s, arg_qrrx_e *a, 6814 gen_helper_gvec_4 * const fns[2]) 6815 { 6816 assert(a->esz == MO_16 || a->esz == MO_32); 6817 if (fp_access_check(s)) { 6818 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd), 6819 vec_full_reg_offset(s, a->rn), 6820 vec_full_reg_offset(s, a->rm), 6821 offsetof(CPUARMState, vfp.qc), 6822 a->q ? 16 : 8, vec_full_reg_size(s), 6823 a->idx, fns[a->esz - 1]); 6824 } 6825 return true; 6826 } 6827 6828 static gen_helper_gvec_4 * const f_vector_idx_sqdmulh[2] = { 6829 gen_helper_neon_sqdmulh_idx_h, 6830 gen_helper_neon_sqdmulh_idx_s, 6831 }; 6832 TRANS(SQDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqdmulh) 6833 6834 static gen_helper_gvec_4 * const f_vector_idx_sqrdmulh[2] = { 6835 gen_helper_neon_sqrdmulh_idx_h, 6836 gen_helper_neon_sqrdmulh_idx_s, 6837 }; 6838 TRANS(SQRDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqrdmulh) 6839 6840 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlah[2] = { 6841 gen_helper_neon_sqrdmlah_idx_h, 6842 gen_helper_neon_sqrdmlah_idx_s, 6843 }; 6844 TRANS_FEAT(SQRDMLAH_vi, aa64_rdm, do_int3_qc_vector_idx, a, 6845 f_vector_idx_sqrdmlah) 6846 6847 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlsh[2] = { 6848 gen_helper_neon_sqrdmlsh_idx_h, 6849 gen_helper_neon_sqrdmlsh_idx_s, 6850 }; 6851 TRANS_FEAT(SQRDMLSH_vi, aa64_rdm, do_int3_qc_vector_idx, a, 6852 f_vector_idx_sqrdmlsh) 6853 6854 static bool do_dot_vector_idx(DisasContext *s, arg_qrrx_e *a, 6855 gen_helper_gvec_4 *fn) 6856 { 6857 if (fp_access_check(s)) { 6858 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn); 6859 } 6860 return true; 6861 } 6862 6863 static bool do_dot_vector_idx_env(DisasContext *s, arg_qrrx_e *a, 6864 gen_helper_gvec_4_ptr *fn) 6865 { 6866 if (fp_access_check(s)) { 6867 gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn); 6868 } 6869 return true; 6870 } 6871 6872 TRANS_FEAT(SDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_sdot_idx_b) 6873 TRANS_FEAT(UDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_udot_idx_b) 6874 TRANS_FEAT(SUDOT_vi, aa64_i8mm, do_dot_vector_idx, a, 6875 gen_helper_gvec_sudot_idx_b) 6876 TRANS_FEAT(USDOT_vi, aa64_i8mm, do_dot_vector_idx, a, 6877 gen_helper_gvec_usdot_idx_b) 6878 TRANS_FEAT(BFDOT_vi, aa64_bf16, do_dot_vector_idx_env, a, 6879 gen_helper_gvec_bfdot_idx) 6880 6881 static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a) 6882 { 6883 if (!dc_isar_feature(aa64_bf16, s)) { 6884 return false; 6885 } 6886 if (fp_access_check(s)) { 6887 /* Q bit selects BFMLALB vs BFMLALT. */ 6888 gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, 6889 s->fpcr_ah ? FPST_AH : FPST_A64, 6890 (a->idx << 1) | a->q, 6891 gen_helper_gvec_bfmlal_idx); 6892 } 6893 return true; 6894 } 6895 6896 static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a) 6897 { 6898 gen_helper_gvec_4_ptr *fn; 6899 6900 if (!dc_isar_feature(aa64_fcma, s)) { 6901 return false; 6902 } 6903 switch (a->esz) { 6904 case MO_16: 6905 if (!dc_isar_feature(aa64_fp16, s)) { 6906 return false; 6907 } 6908 fn = gen_helper_gvec_fcmlah_idx; 6909 break; 6910 case MO_32: 6911 fn = gen_helper_gvec_fcmlas_idx; 6912 break; 6913 default: 6914 g_assert_not_reached(); 6915 } 6916 if (fp_access_check(s)) { 6917 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, 6918 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, 6919 (s->fpcr_ah << 4) | (a->idx << 2) | a->rot, fn); 6920 } 6921 return true; 6922 } 6923 6924 /* 6925 * Advanced SIMD scalar pairwise 6926 */ 6927 6928 static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f) 6929 { 6930 switch (a->esz) { 6931 case MO_64: 6932 if (fp_access_check(s)) { 6933 TCGv_i64 t0 = tcg_temp_new_i64(); 6934 TCGv_i64 t1 = tcg_temp_new_i64(); 6935 6936 read_vec_element(s, t0, a->rn, 0, MO_64); 6937 read_vec_element(s, t1, a->rn, 1, MO_64); 6938 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); 6939 write_fp_dreg(s, a->rd, t0); 6940 } 6941 break; 6942 case MO_32: 6943 if (fp_access_check(s)) { 6944 TCGv_i32 t0 = tcg_temp_new_i32(); 6945 TCGv_i32 t1 = tcg_temp_new_i32(); 6946 6947 read_vec_element_i32(s, t0, a->rn, 0, MO_32); 6948 read_vec_element_i32(s, t1, a->rn, 1, MO_32); 6949 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); 6950 write_fp_sreg(s, a->rd, t0); 6951 } 6952 break; 6953 case MO_16: 6954 if (!dc_isar_feature(aa64_fp16, s)) { 6955 return false; 6956 } 6957 if (fp_access_check(s)) { 6958 TCGv_i32 t0 = tcg_temp_new_i32(); 6959 TCGv_i32 t1 = tcg_temp_new_i32(); 6960 6961 read_vec_element_i32(s, t0, a->rn, 0, MO_16); 6962 read_vec_element_i32(s, t1, a->rn, 1, MO_16); 6963 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); 6964 write_fp_sreg(s, a->rd, t0); 6965 } 6966 break; 6967 default: 6968 g_assert_not_reached(); 6969 } 6970 return true; 6971 } 6972 6973 static bool do_fp3_scalar_pair_2fn(DisasContext *s, arg_rr_e *a, 6974 const FPScalar *fnormal, 6975 const FPScalar *fah) 6976 { 6977 return do_fp3_scalar_pair(s, a, s->fpcr_ah ? fah : fnormal); 6978 } 6979 6980 TRANS(FADDP_s, do_fp3_scalar_pair, a, &f_scalar_fadd) 6981 TRANS(FMAXP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah) 6982 TRANS(FMINP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah) 6983 TRANS(FMAXNMP_s, do_fp3_scalar_pair, a, &f_scalar_fmaxnm) 6984 TRANS(FMINNMP_s, do_fp3_scalar_pair, a, &f_scalar_fminnm) 6985 6986 static bool trans_ADDP_s(DisasContext *s, arg_rr_e *a) 6987 { 6988 if (fp_access_check(s)) { 6989 TCGv_i64 t0 = tcg_temp_new_i64(); 6990 TCGv_i64 t1 = tcg_temp_new_i64(); 6991 6992 read_vec_element(s, t0, a->rn, 0, MO_64); 6993 read_vec_element(s, t1, a->rn, 1, MO_64); 6994 tcg_gen_add_i64(t0, t0, t1); 6995 write_fp_dreg(s, a->rd, t0); 6996 } 6997 return true; 6998 } 6999 7000 /* 7001 * Floating-point conditional select 7002 */ 7003 7004 static bool trans_FCSEL(DisasContext *s, arg_FCSEL *a) 7005 { 7006 TCGv_i64 t_true, t_false; 7007 DisasCompare64 c; 7008 int check = fp_access_check_scalar_hsd(s, a->esz); 7009 7010 if (check <= 0) { 7011 return check == 0; 7012 } 7013 7014 /* Zero extend sreg & hreg inputs to 64 bits now. */ 7015 t_true = tcg_temp_new_i64(); 7016 t_false = tcg_temp_new_i64(); 7017 read_vec_element(s, t_true, a->rn, 0, a->esz); 7018 read_vec_element(s, t_false, a->rm, 0, a->esz); 7019 7020 a64_test_cc(&c, a->cond); 7021 tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0), 7022 t_true, t_false); 7023 7024 /* 7025 * Note that sregs & hregs write back zeros to the high bits, 7026 * and we've already done the zero-extension. 7027 */ 7028 write_fp_dreg(s, a->rd, t_true); 7029 return true; 7030 } 7031 7032 /* 7033 * Advanced SIMD Extract 7034 */ 7035 7036 static bool trans_EXT_d(DisasContext *s, arg_EXT_d *a) 7037 { 7038 if (fp_access_check(s)) { 7039 TCGv_i64 lo = read_fp_dreg(s, a->rn); 7040 if (a->imm != 0) { 7041 TCGv_i64 hi = read_fp_dreg(s, a->rm); 7042 tcg_gen_extract2_i64(lo, lo, hi, a->imm * 8); 7043 } 7044 write_fp_dreg(s, a->rd, lo); 7045 } 7046 return true; 7047 } 7048 7049 static bool trans_EXT_q(DisasContext *s, arg_EXT_q *a) 7050 { 7051 TCGv_i64 lo, hi; 7052 int pos = (a->imm & 7) * 8; 7053 int elt = a->imm >> 3; 7054 7055 if (!fp_access_check(s)) { 7056 return true; 7057 } 7058 7059 lo = tcg_temp_new_i64(); 7060 hi = tcg_temp_new_i64(); 7061 7062 read_vec_element(s, lo, a->rn, elt, MO_64); 7063 elt++; 7064 read_vec_element(s, hi, elt & 2 ? a->rm : a->rn, elt & 1, MO_64); 7065 elt++; 7066 7067 if (pos != 0) { 7068 TCGv_i64 hh = tcg_temp_new_i64(); 7069 tcg_gen_extract2_i64(lo, lo, hi, pos); 7070 read_vec_element(s, hh, a->rm, elt & 1, MO_64); 7071 tcg_gen_extract2_i64(hi, hi, hh, pos); 7072 } 7073 7074 write_vec_element(s, lo, a->rd, 0, MO_64); 7075 write_vec_element(s, hi, a->rd, 1, MO_64); 7076 clear_vec_high(s, true, a->rd); 7077 return true; 7078 } 7079 7080 /* 7081 * Floating-point data-processing (3 source) 7082 */ 7083 7084 static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) 7085 { 7086 TCGv_ptr fpst; 7087 7088 /* 7089 * These are fused multiply-add. Note that doing the negations here 7090 * as separate steps is correct: an input NaN should come out with 7091 * its sign bit flipped if it is a negated-input. 7092 */ 7093 switch (a->esz) { 7094 case MO_64: 7095 if (fp_access_check(s)) { 7096 TCGv_i64 tn = read_fp_dreg(s, a->rn); 7097 TCGv_i64 tm = read_fp_dreg(s, a->rm); 7098 TCGv_i64 ta = read_fp_dreg(s, a->ra); 7099 7100 if (neg_a) { 7101 gen_vfp_maybe_ah_negd(s, ta, ta); 7102 } 7103 if (neg_n) { 7104 gen_vfp_maybe_ah_negd(s, tn, tn); 7105 } 7106 fpst = fpstatus_ptr(FPST_A64); 7107 gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst); 7108 write_fp_dreg_merging(s, a->rd, a->ra, ta); 7109 } 7110 break; 7111 7112 case MO_32: 7113 if (fp_access_check(s)) { 7114 TCGv_i32 tn = read_fp_sreg(s, a->rn); 7115 TCGv_i32 tm = read_fp_sreg(s, a->rm); 7116 TCGv_i32 ta = read_fp_sreg(s, a->ra); 7117 7118 if (neg_a) { 7119 gen_vfp_maybe_ah_negs(s, ta, ta); 7120 } 7121 if (neg_n) { 7122 gen_vfp_maybe_ah_negs(s, tn, tn); 7123 } 7124 fpst = fpstatus_ptr(FPST_A64); 7125 gen_helper_vfp_muladds(ta, tn, tm, ta, fpst); 7126 write_fp_sreg_merging(s, a->rd, a->ra, ta); 7127 } 7128 break; 7129 7130 case MO_16: 7131 if (!dc_isar_feature(aa64_fp16, s)) { 7132 return false; 7133 } 7134 if (fp_access_check(s)) { 7135 TCGv_i32 tn = read_fp_hreg(s, a->rn); 7136 TCGv_i32 tm = read_fp_hreg(s, a->rm); 7137 TCGv_i32 ta = read_fp_hreg(s, a->ra); 7138 7139 if (neg_a) { 7140 gen_vfp_maybe_ah_negh(s, ta, ta); 7141 } 7142 if (neg_n) { 7143 gen_vfp_maybe_ah_negh(s, tn, tn); 7144 } 7145 fpst = fpstatus_ptr(FPST_A64_F16); 7146 gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst); 7147 write_fp_hreg_merging(s, a->rd, a->ra, ta); 7148 } 7149 break; 7150 7151 default: 7152 return false; 7153 } 7154 return true; 7155 } 7156 7157 TRANS(FMADD, do_fmadd, a, false, false) 7158 TRANS(FNMADD, do_fmadd, a, true, true) 7159 TRANS(FMSUB, do_fmadd, a, false, true) 7160 TRANS(FNMSUB, do_fmadd, a, true, false) 7161 7162 /* 7163 * Advanced SIMD Across Lanes 7164 */ 7165 7166 static bool do_int_reduction(DisasContext *s, arg_qrr_e *a, bool widen, 7167 MemOp src_sign, NeonGenTwo64OpFn *fn) 7168 { 7169 TCGv_i64 tcg_res, tcg_elt; 7170 MemOp src_mop = a->esz | src_sign; 7171 int elements = (a->q ? 16 : 8) >> a->esz; 7172 7173 /* Reject MO_64, and MO_32 without Q: a minimum of 4 elements. */ 7174 if (elements < 4) { 7175 return false; 7176 } 7177 if (!fp_access_check(s)) { 7178 return true; 7179 } 7180 7181 tcg_res = tcg_temp_new_i64(); 7182 tcg_elt = tcg_temp_new_i64(); 7183 7184 read_vec_element(s, tcg_res, a->rn, 0, src_mop); 7185 for (int i = 1; i < elements; i++) { 7186 read_vec_element(s, tcg_elt, a->rn, i, src_mop); 7187 fn(tcg_res, tcg_res, tcg_elt); 7188 } 7189 7190 tcg_gen_ext_i64(tcg_res, tcg_res, a->esz + widen); 7191 write_fp_dreg(s, a->rd, tcg_res); 7192 return true; 7193 } 7194 7195 TRANS(ADDV, do_int_reduction, a, false, 0, tcg_gen_add_i64) 7196 TRANS(SADDLV, do_int_reduction, a, true, MO_SIGN, tcg_gen_add_i64) 7197 TRANS(UADDLV, do_int_reduction, a, true, 0, tcg_gen_add_i64) 7198 TRANS(SMAXV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smax_i64) 7199 TRANS(UMAXV, do_int_reduction, a, false, 0, tcg_gen_umax_i64) 7200 TRANS(SMINV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smin_i64) 7201 TRANS(UMINV, do_int_reduction, a, false, 0, tcg_gen_umin_i64) 7202 7203 /* 7204 * do_fp_reduction helper 7205 * 7206 * This mirrors the Reduce() pseudocode in the ARM ARM. It is 7207 * important for correct NaN propagation that we do these 7208 * operations in exactly the order specified by the pseudocode. 7209 * 7210 * This is a recursive function. 7211 */ 7212 static TCGv_i32 do_reduction_op(DisasContext *s, int rn, MemOp esz, 7213 int ebase, int ecount, TCGv_ptr fpst, 7214 NeonGenTwoSingleOpFn *fn) 7215 { 7216 if (ecount == 1) { 7217 TCGv_i32 tcg_elem = tcg_temp_new_i32(); 7218 read_vec_element_i32(s, tcg_elem, rn, ebase, esz); 7219 return tcg_elem; 7220 } else { 7221 int half = ecount >> 1; 7222 TCGv_i32 tcg_hi, tcg_lo, tcg_res; 7223 7224 tcg_hi = do_reduction_op(s, rn, esz, ebase + half, half, fpst, fn); 7225 tcg_lo = do_reduction_op(s, rn, esz, ebase, half, fpst, fn); 7226 tcg_res = tcg_temp_new_i32(); 7227 7228 fn(tcg_res, tcg_lo, tcg_hi, fpst); 7229 return tcg_res; 7230 } 7231 } 7232 7233 static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a, 7234 NeonGenTwoSingleOpFn *fnormal, 7235 NeonGenTwoSingleOpFn *fah) 7236 { 7237 if (fp_access_check(s)) { 7238 MemOp esz = a->esz; 7239 int elts = (a->q ? 16 : 8) >> esz; 7240 TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); 7241 TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst, 7242 s->fpcr_ah ? fah : fnormal); 7243 write_fp_sreg(s, a->rd, res); 7244 } 7245 return true; 7246 } 7247 7248 TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a, 7249 gen_helper_vfp_maxnumh, gen_helper_vfp_maxnumh) 7250 TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a, 7251 gen_helper_vfp_minnumh, gen_helper_vfp_minnumh) 7252 TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a, 7253 gen_helper_vfp_maxh, gen_helper_vfp_ah_maxh) 7254 TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a, 7255 gen_helper_vfp_minh, gen_helper_vfp_ah_minh) 7256 7257 TRANS(FMAXNMV_s, do_fp_reduction, a, 7258 gen_helper_vfp_maxnums, gen_helper_vfp_maxnums) 7259 TRANS(FMINNMV_s, do_fp_reduction, a, 7260 gen_helper_vfp_minnums, gen_helper_vfp_minnums) 7261 TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs, gen_helper_vfp_ah_maxs) 7262 TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins, gen_helper_vfp_ah_mins) 7263 7264 /* 7265 * Floating-point Immediate 7266 */ 7267 7268 static bool trans_FMOVI_s(DisasContext *s, arg_FMOVI_s *a) 7269 { 7270 int check = fp_access_check_scalar_hsd(s, a->esz); 7271 uint64_t imm; 7272 7273 if (check <= 0) { 7274 return check == 0; 7275 } 7276 7277 imm = vfp_expand_imm(a->esz, a->imm); 7278 write_fp_dreg(s, a->rd, tcg_constant_i64(imm)); 7279 return true; 7280 } 7281 7282 /* 7283 * Floating point compare, conditional compare 7284 */ 7285 7286 static void handle_fp_compare(DisasContext *s, int size, 7287 unsigned int rn, unsigned int rm, 7288 bool cmp_with_zero, bool signal_all_nans) 7289 { 7290 TCGv_i64 tcg_flags = tcg_temp_new_i64(); 7291 TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_A64_F16 : FPST_A64); 7292 7293 if (size == MO_64) { 7294 TCGv_i64 tcg_vn, tcg_vm; 7295 7296 tcg_vn = read_fp_dreg(s, rn); 7297 if (cmp_with_zero) { 7298 tcg_vm = tcg_constant_i64(0); 7299 } else { 7300 tcg_vm = read_fp_dreg(s, rm); 7301 } 7302 if (signal_all_nans) { 7303 gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 7304 } else { 7305 gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 7306 } 7307 } else { 7308 TCGv_i32 tcg_vn = tcg_temp_new_i32(); 7309 TCGv_i32 tcg_vm = tcg_temp_new_i32(); 7310 7311 read_vec_element_i32(s, tcg_vn, rn, 0, size); 7312 if (cmp_with_zero) { 7313 tcg_gen_movi_i32(tcg_vm, 0); 7314 } else { 7315 read_vec_element_i32(s, tcg_vm, rm, 0, size); 7316 } 7317 7318 switch (size) { 7319 case MO_32: 7320 if (signal_all_nans) { 7321 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 7322 } else { 7323 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 7324 } 7325 break; 7326 case MO_16: 7327 if (signal_all_nans) { 7328 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 7329 } else { 7330 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 7331 } 7332 break; 7333 default: 7334 g_assert_not_reached(); 7335 } 7336 } 7337 7338 gen_set_nzcv(tcg_flags); 7339 } 7340 7341 /* FCMP, FCMPE */ 7342 static bool trans_FCMP(DisasContext *s, arg_FCMP *a) 7343 { 7344 int check = fp_access_check_scalar_hsd(s, a->esz); 7345 7346 if (check <= 0) { 7347 return check == 0; 7348 } 7349 7350 handle_fp_compare(s, a->esz, a->rn, a->rm, a->z, a->e); 7351 return true; 7352 } 7353 7354 /* FCCMP, FCCMPE */ 7355 static bool trans_FCCMP(DisasContext *s, arg_FCCMP *a) 7356 { 7357 TCGLabel *label_continue = NULL; 7358 int check = fp_access_check_scalar_hsd(s, a->esz); 7359 7360 if (check <= 0) { 7361 return check == 0; 7362 } 7363 7364 if (a->cond < 0x0e) { /* not always */ 7365 TCGLabel *label_match = gen_new_label(); 7366 label_continue = gen_new_label(); 7367 arm_gen_test_cc(a->cond, label_match); 7368 /* nomatch: */ 7369 gen_set_nzcv(tcg_constant_i64(a->nzcv << 28)); 7370 tcg_gen_br(label_continue); 7371 gen_set_label(label_match); 7372 } 7373 7374 handle_fp_compare(s, a->esz, a->rn, a->rm, false, a->e); 7375 7376 if (label_continue) { 7377 gen_set_label(label_continue); 7378 } 7379 return true; 7380 } 7381 7382 /* 7383 * Advanced SIMD Modified Immediate 7384 */ 7385 7386 static bool trans_FMOVI_v_h(DisasContext *s, arg_FMOVI_v_h *a) 7387 { 7388 if (!dc_isar_feature(aa64_fp16, s)) { 7389 return false; 7390 } 7391 if (fp_access_check(s)) { 7392 tcg_gen_gvec_dup_imm(MO_16, vec_full_reg_offset(s, a->rd), 7393 a->q ? 16 : 8, vec_full_reg_size(s), 7394 vfp_expand_imm(MO_16, a->abcdefgh)); 7395 } 7396 return true; 7397 } 7398 7399 static void gen_movi(unsigned vece, uint32_t dofs, uint32_t aofs, 7400 int64_t c, uint32_t oprsz, uint32_t maxsz) 7401 { 7402 tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c); 7403 } 7404 7405 static bool trans_Vimm(DisasContext *s, arg_Vimm *a) 7406 { 7407 GVecGen2iFn *fn; 7408 7409 /* Handle decode of cmode/op here between ORR/BIC/MOVI */ 7410 if ((a->cmode & 1) && a->cmode < 12) { 7411 /* For op=1, the imm will be inverted, so BIC becomes AND. */ 7412 fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori; 7413 } else { 7414 /* There is one unallocated cmode/op combination in this space */ 7415 if (a->cmode == 15 && a->op == 1 && a->q == 0) { 7416 return false; 7417 } 7418 fn = gen_movi; 7419 } 7420 7421 if (fp_access_check(s)) { 7422 uint64_t imm = asimd_imm_const(a->abcdefgh, a->cmode, a->op); 7423 gen_gvec_fn2i(s, a->q, a->rd, a->rd, imm, fn, MO_64); 7424 } 7425 return true; 7426 } 7427 7428 /* 7429 * Advanced SIMD Shift by Immediate 7430 */ 7431 7432 static bool do_vec_shift_imm(DisasContext *s, arg_qrri_e *a, GVecGen2iFn *fn) 7433 { 7434 if (fp_access_check(s)) { 7435 gen_gvec_fn2i(s, a->q, a->rd, a->rn, a->imm, fn, a->esz); 7436 } 7437 return true; 7438 } 7439 7440 TRANS(SSHR_v, do_vec_shift_imm, a, gen_gvec_sshr) 7441 TRANS(USHR_v, do_vec_shift_imm, a, gen_gvec_ushr) 7442 TRANS(SSRA_v, do_vec_shift_imm, a, gen_gvec_ssra) 7443 TRANS(USRA_v, do_vec_shift_imm, a, gen_gvec_usra) 7444 TRANS(SRSHR_v, do_vec_shift_imm, a, gen_gvec_srshr) 7445 TRANS(URSHR_v, do_vec_shift_imm, a, gen_gvec_urshr) 7446 TRANS(SRSRA_v, do_vec_shift_imm, a, gen_gvec_srsra) 7447 TRANS(URSRA_v, do_vec_shift_imm, a, gen_gvec_ursra) 7448 TRANS(SRI_v, do_vec_shift_imm, a, gen_gvec_sri) 7449 TRANS(SHL_v, do_vec_shift_imm, a, tcg_gen_gvec_shli) 7450 TRANS(SLI_v, do_vec_shift_imm, a, gen_gvec_sli); 7451 TRANS(SQSHL_vi, do_vec_shift_imm, a, gen_neon_sqshli) 7452 TRANS(UQSHL_vi, do_vec_shift_imm, a, gen_neon_uqshli) 7453 TRANS(SQSHLU_vi, do_vec_shift_imm, a, gen_neon_sqshlui) 7454 7455 static bool do_vec_shift_imm_wide(DisasContext *s, arg_qrri_e *a, bool is_u) 7456 { 7457 TCGv_i64 tcg_rn, tcg_rd; 7458 int esz = a->esz; 7459 int esize; 7460 7461 if (!fp_access_check(s)) { 7462 return true; 7463 } 7464 7465 /* 7466 * For the LL variants the store is larger than the load, 7467 * so if rd == rn we would overwrite parts of our input. 7468 * So load everything right now and use shifts in the main loop. 7469 */ 7470 tcg_rd = tcg_temp_new_i64(); 7471 tcg_rn = tcg_temp_new_i64(); 7472 read_vec_element(s, tcg_rn, a->rn, a->q, MO_64); 7473 7474 esize = 8 << esz; 7475 for (int i = 0, elements = 8 >> esz; i < elements; i++) { 7476 if (is_u) { 7477 tcg_gen_extract_i64(tcg_rd, tcg_rn, i * esize, esize); 7478 } else { 7479 tcg_gen_sextract_i64(tcg_rd, tcg_rn, i * esize, esize); 7480 } 7481 tcg_gen_shli_i64(tcg_rd, tcg_rd, a->imm); 7482 write_vec_element(s, tcg_rd, a->rd, i, esz + 1); 7483 } 7484 clear_vec_high(s, true, a->rd); 7485 return true; 7486 } 7487 7488 TRANS(SSHLL_v, do_vec_shift_imm_wide, a, false) 7489 TRANS(USHLL_v, do_vec_shift_imm_wide, a, true) 7490 7491 static void gen_sshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7492 { 7493 assert(shift >= 0 && shift <= 64); 7494 tcg_gen_sari_i64(dst, src, MIN(shift, 63)); 7495 } 7496 7497 static void gen_ushr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7498 { 7499 assert(shift >= 0 && shift <= 64); 7500 if (shift == 64) { 7501 tcg_gen_movi_i64(dst, 0); 7502 } else { 7503 tcg_gen_shri_i64(dst, src, shift); 7504 } 7505 } 7506 7507 static void gen_ssra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7508 { 7509 gen_sshr_d(src, src, shift); 7510 tcg_gen_add_i64(dst, dst, src); 7511 } 7512 7513 static void gen_usra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7514 { 7515 gen_ushr_d(src, src, shift); 7516 tcg_gen_add_i64(dst, dst, src); 7517 } 7518 7519 static void gen_srshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7520 { 7521 assert(shift >= 0 && shift <= 32); 7522 if (shift) { 7523 TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1)); 7524 tcg_gen_add_i64(dst, src, rnd); 7525 tcg_gen_sari_i64(dst, dst, shift); 7526 } else { 7527 tcg_gen_mov_i64(dst, src); 7528 } 7529 } 7530 7531 static void gen_urshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7532 { 7533 assert(shift >= 0 && shift <= 32); 7534 if (shift) { 7535 TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1)); 7536 tcg_gen_add_i64(dst, src, rnd); 7537 tcg_gen_shri_i64(dst, dst, shift); 7538 } else { 7539 tcg_gen_mov_i64(dst, src); 7540 } 7541 } 7542 7543 static void gen_srshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7544 { 7545 assert(shift >= 0 && shift <= 64); 7546 if (shift == 0) { 7547 tcg_gen_mov_i64(dst, src); 7548 } else if (shift == 64) { 7549 /* Extension of sign bit (0,-1) plus sign bit (0,1) is zero. */ 7550 tcg_gen_movi_i64(dst, 0); 7551 } else { 7552 TCGv_i64 rnd = tcg_temp_new_i64(); 7553 tcg_gen_extract_i64(rnd, src, shift - 1, 1); 7554 tcg_gen_sari_i64(dst, src, shift); 7555 tcg_gen_add_i64(dst, dst, rnd); 7556 } 7557 } 7558 7559 static void gen_urshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7560 { 7561 assert(shift >= 0 && shift <= 64); 7562 if (shift == 0) { 7563 tcg_gen_mov_i64(dst, src); 7564 } else if (shift == 64) { 7565 /* Rounding will propagate bit 63 into bit 64. */ 7566 tcg_gen_shri_i64(dst, src, 63); 7567 } else { 7568 TCGv_i64 rnd = tcg_temp_new_i64(); 7569 tcg_gen_extract_i64(rnd, src, shift - 1, 1); 7570 tcg_gen_shri_i64(dst, src, shift); 7571 tcg_gen_add_i64(dst, dst, rnd); 7572 } 7573 } 7574 7575 static void gen_srsra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7576 { 7577 gen_srshr_d(src, src, shift); 7578 tcg_gen_add_i64(dst, dst, src); 7579 } 7580 7581 static void gen_ursra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7582 { 7583 gen_urshr_d(src, src, shift); 7584 tcg_gen_add_i64(dst, dst, src); 7585 } 7586 7587 static void gen_sri_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7588 { 7589 /* If shift is 64, dst is unchanged. */ 7590 if (shift != 64) { 7591 tcg_gen_shri_i64(src, src, shift); 7592 tcg_gen_deposit_i64(dst, dst, src, 0, 64 - shift); 7593 } 7594 } 7595 7596 static void gen_sli_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7597 { 7598 tcg_gen_deposit_i64(dst, dst, src, shift, 64 - shift); 7599 } 7600 7601 static bool do_vec_shift_imm_narrow(DisasContext *s, arg_qrri_e *a, 7602 WideShiftImmFn * const fns[3], MemOp sign) 7603 { 7604 TCGv_i64 tcg_rn, tcg_rd; 7605 int esz = a->esz; 7606 int esize; 7607 WideShiftImmFn *fn; 7608 7609 tcg_debug_assert(esz >= MO_8 && esz <= MO_32); 7610 7611 if (!fp_access_check(s)) { 7612 return true; 7613 } 7614 7615 tcg_rn = tcg_temp_new_i64(); 7616 tcg_rd = tcg_temp_new_i64(); 7617 tcg_gen_movi_i64(tcg_rd, 0); 7618 7619 fn = fns[esz]; 7620 esize = 8 << esz; 7621 for (int i = 0, elements = 8 >> esz; i < elements; i++) { 7622 read_vec_element(s, tcg_rn, a->rn, i, (esz + 1) | sign); 7623 fn(tcg_rn, tcg_rn, a->imm); 7624 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, esize * i, esize); 7625 } 7626 7627 write_vec_element(s, tcg_rd, a->rd, a->q, MO_64); 7628 clear_vec_high(s, a->q, a->rd); 7629 return true; 7630 } 7631 7632 static void gen_sqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7633 { 7634 tcg_gen_sari_i64(d, s, i); 7635 tcg_gen_ext16u_i64(d, d); 7636 gen_helper_neon_narrow_sat_s8(d, tcg_env, d); 7637 } 7638 7639 static void gen_sqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7640 { 7641 tcg_gen_sari_i64(d, s, i); 7642 tcg_gen_ext32u_i64(d, d); 7643 gen_helper_neon_narrow_sat_s16(d, tcg_env, d); 7644 } 7645 7646 static void gen_sqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7647 { 7648 gen_sshr_d(d, s, i); 7649 gen_helper_neon_narrow_sat_s32(d, tcg_env, d); 7650 } 7651 7652 static void gen_uqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7653 { 7654 tcg_gen_shri_i64(d, s, i); 7655 gen_helper_neon_narrow_sat_u8(d, tcg_env, d); 7656 } 7657 7658 static void gen_uqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7659 { 7660 tcg_gen_shri_i64(d, s, i); 7661 gen_helper_neon_narrow_sat_u16(d, tcg_env, d); 7662 } 7663 7664 static void gen_uqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7665 { 7666 gen_ushr_d(d, s, i); 7667 gen_helper_neon_narrow_sat_u32(d, tcg_env, d); 7668 } 7669 7670 static void gen_sqshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7671 { 7672 tcg_gen_sari_i64(d, s, i); 7673 tcg_gen_ext16u_i64(d, d); 7674 gen_helper_neon_unarrow_sat8(d, tcg_env, d); 7675 } 7676 7677 static void gen_sqshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7678 { 7679 tcg_gen_sari_i64(d, s, i); 7680 tcg_gen_ext32u_i64(d, d); 7681 gen_helper_neon_unarrow_sat16(d, tcg_env, d); 7682 } 7683 7684 static void gen_sqshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7685 { 7686 gen_sshr_d(d, s, i); 7687 gen_helper_neon_unarrow_sat32(d, tcg_env, d); 7688 } 7689 7690 static void gen_sqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7691 { 7692 gen_srshr_bhs(d, s, i); 7693 tcg_gen_ext16u_i64(d, d); 7694 gen_helper_neon_narrow_sat_s8(d, tcg_env, d); 7695 } 7696 7697 static void gen_sqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7698 { 7699 gen_srshr_bhs(d, s, i); 7700 tcg_gen_ext32u_i64(d, d); 7701 gen_helper_neon_narrow_sat_s16(d, tcg_env, d); 7702 } 7703 7704 static void gen_sqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7705 { 7706 gen_srshr_d(d, s, i); 7707 gen_helper_neon_narrow_sat_s32(d, tcg_env, d); 7708 } 7709 7710 static void gen_uqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7711 { 7712 gen_urshr_bhs(d, s, i); 7713 gen_helper_neon_narrow_sat_u8(d, tcg_env, d); 7714 } 7715 7716 static void gen_uqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7717 { 7718 gen_urshr_bhs(d, s, i); 7719 gen_helper_neon_narrow_sat_u16(d, tcg_env, d); 7720 } 7721 7722 static void gen_uqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7723 { 7724 gen_urshr_d(d, s, i); 7725 gen_helper_neon_narrow_sat_u32(d, tcg_env, d); 7726 } 7727 7728 static void gen_sqrshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7729 { 7730 gen_srshr_bhs(d, s, i); 7731 tcg_gen_ext16u_i64(d, d); 7732 gen_helper_neon_unarrow_sat8(d, tcg_env, d); 7733 } 7734 7735 static void gen_sqrshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7736 { 7737 gen_srshr_bhs(d, s, i); 7738 tcg_gen_ext32u_i64(d, d); 7739 gen_helper_neon_unarrow_sat16(d, tcg_env, d); 7740 } 7741 7742 static void gen_sqrshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7743 { 7744 gen_srshr_d(d, s, i); 7745 gen_helper_neon_unarrow_sat32(d, tcg_env, d); 7746 } 7747 7748 static WideShiftImmFn * const shrn_fns[] = { 7749 tcg_gen_shri_i64, 7750 tcg_gen_shri_i64, 7751 gen_ushr_d, 7752 }; 7753 TRANS(SHRN_v, do_vec_shift_imm_narrow, a, shrn_fns, 0) 7754 7755 static WideShiftImmFn * const rshrn_fns[] = { 7756 gen_urshr_bhs, 7757 gen_urshr_bhs, 7758 gen_urshr_d, 7759 }; 7760 TRANS(RSHRN_v, do_vec_shift_imm_narrow, a, rshrn_fns, 0) 7761 7762 static WideShiftImmFn * const sqshrn_fns[] = { 7763 gen_sqshrn_b, 7764 gen_sqshrn_h, 7765 gen_sqshrn_s, 7766 }; 7767 TRANS(SQSHRN_v, do_vec_shift_imm_narrow, a, sqshrn_fns, MO_SIGN) 7768 7769 static WideShiftImmFn * const uqshrn_fns[] = { 7770 gen_uqshrn_b, 7771 gen_uqshrn_h, 7772 gen_uqshrn_s, 7773 }; 7774 TRANS(UQSHRN_v, do_vec_shift_imm_narrow, a, uqshrn_fns, 0) 7775 7776 static WideShiftImmFn * const sqshrun_fns[] = { 7777 gen_sqshrun_b, 7778 gen_sqshrun_h, 7779 gen_sqshrun_s, 7780 }; 7781 TRANS(SQSHRUN_v, do_vec_shift_imm_narrow, a, sqshrun_fns, MO_SIGN) 7782 7783 static WideShiftImmFn * const sqrshrn_fns[] = { 7784 gen_sqrshrn_b, 7785 gen_sqrshrn_h, 7786 gen_sqrshrn_s, 7787 }; 7788 TRANS(SQRSHRN_v, do_vec_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN) 7789 7790 static WideShiftImmFn * const uqrshrn_fns[] = { 7791 gen_uqrshrn_b, 7792 gen_uqrshrn_h, 7793 gen_uqrshrn_s, 7794 }; 7795 TRANS(UQRSHRN_v, do_vec_shift_imm_narrow, a, uqrshrn_fns, 0) 7796 7797 static WideShiftImmFn * const sqrshrun_fns[] = { 7798 gen_sqrshrun_b, 7799 gen_sqrshrun_h, 7800 gen_sqrshrun_s, 7801 }; 7802 TRANS(SQRSHRUN_v, do_vec_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN) 7803 7804 /* 7805 * Advanced SIMD Scalar Shift by Immediate 7806 */ 7807 7808 static bool do_scalar_shift_imm(DisasContext *s, arg_rri_e *a, 7809 WideShiftImmFn *fn, bool accumulate, 7810 MemOp sign) 7811 { 7812 if (fp_access_check(s)) { 7813 TCGv_i64 rd = tcg_temp_new_i64(); 7814 TCGv_i64 rn = tcg_temp_new_i64(); 7815 7816 read_vec_element(s, rn, a->rn, 0, a->esz | sign); 7817 if (accumulate) { 7818 read_vec_element(s, rd, a->rd, 0, a->esz | sign); 7819 } 7820 fn(rd, rn, a->imm); 7821 write_fp_dreg(s, a->rd, rd); 7822 } 7823 return true; 7824 } 7825 7826 TRANS(SSHR_s, do_scalar_shift_imm, a, gen_sshr_d, false, 0) 7827 TRANS(USHR_s, do_scalar_shift_imm, a, gen_ushr_d, false, 0) 7828 TRANS(SSRA_s, do_scalar_shift_imm, a, gen_ssra_d, true, 0) 7829 TRANS(USRA_s, do_scalar_shift_imm, a, gen_usra_d, true, 0) 7830 TRANS(SRSHR_s, do_scalar_shift_imm, a, gen_srshr_d, false, 0) 7831 TRANS(URSHR_s, do_scalar_shift_imm, a, gen_urshr_d, false, 0) 7832 TRANS(SRSRA_s, do_scalar_shift_imm, a, gen_srsra_d, true, 0) 7833 TRANS(URSRA_s, do_scalar_shift_imm, a, gen_ursra_d, true, 0) 7834 TRANS(SRI_s, do_scalar_shift_imm, a, gen_sri_d, true, 0) 7835 7836 TRANS(SHL_s, do_scalar_shift_imm, a, tcg_gen_shli_i64, false, 0) 7837 TRANS(SLI_s, do_scalar_shift_imm, a, gen_sli_d, true, 0) 7838 7839 static void trunc_i64_env_imm(TCGv_i64 d, TCGv_i64 s, int64_t i, 7840 NeonGenTwoOpEnvFn *fn) 7841 { 7842 TCGv_i32 t = tcg_temp_new_i32(); 7843 tcg_gen_extrl_i64_i32(t, s); 7844 fn(t, tcg_env, t, tcg_constant_i32(i)); 7845 tcg_gen_extu_i32_i64(d, t); 7846 } 7847 7848 static void gen_sqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7849 { 7850 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s8); 7851 } 7852 7853 static void gen_sqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7854 { 7855 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s16); 7856 } 7857 7858 static void gen_sqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7859 { 7860 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s32); 7861 } 7862 7863 static void gen_sqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i) 7864 { 7865 gen_helper_neon_qshl_s64(d, tcg_env, s, tcg_constant_i64(i)); 7866 } 7867 7868 static void gen_uqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7869 { 7870 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u8); 7871 } 7872 7873 static void gen_uqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7874 { 7875 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u16); 7876 } 7877 7878 static void gen_uqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7879 { 7880 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u32); 7881 } 7882 7883 static void gen_uqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i) 7884 { 7885 gen_helper_neon_qshl_u64(d, tcg_env, s, tcg_constant_i64(i)); 7886 } 7887 7888 static void gen_sqshlui_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7889 { 7890 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s8); 7891 } 7892 7893 static void gen_sqshlui_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7894 { 7895 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s16); 7896 } 7897 7898 static void gen_sqshlui_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7899 { 7900 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s32); 7901 } 7902 7903 static void gen_sqshlui_d(TCGv_i64 d, TCGv_i64 s, int64_t i) 7904 { 7905 gen_helper_neon_qshlu_s64(d, tcg_env, s, tcg_constant_i64(i)); 7906 } 7907 7908 static WideShiftImmFn * const f_scalar_sqshli[] = { 7909 gen_sqshli_b, gen_sqshli_h, gen_sqshli_s, gen_sqshli_d 7910 }; 7911 7912 static WideShiftImmFn * const f_scalar_uqshli[] = { 7913 gen_uqshli_b, gen_uqshli_h, gen_uqshli_s, gen_uqshli_d 7914 }; 7915 7916 static WideShiftImmFn * const f_scalar_sqshlui[] = { 7917 gen_sqshlui_b, gen_sqshlui_h, gen_sqshlui_s, gen_sqshlui_d 7918 }; 7919 7920 /* Note that the helpers sign-extend their inputs, so don't do it here. */ 7921 TRANS(SQSHL_si, do_scalar_shift_imm, a, f_scalar_sqshli[a->esz], false, 0) 7922 TRANS(UQSHL_si, do_scalar_shift_imm, a, f_scalar_uqshli[a->esz], false, 0) 7923 TRANS(SQSHLU_si, do_scalar_shift_imm, a, f_scalar_sqshlui[a->esz], false, 0) 7924 7925 static bool do_scalar_shift_imm_narrow(DisasContext *s, arg_rri_e *a, 7926 WideShiftImmFn * const fns[3], 7927 MemOp sign, bool zext) 7928 { 7929 MemOp esz = a->esz; 7930 7931 tcg_debug_assert(esz >= MO_8 && esz <= MO_32); 7932 7933 if (fp_access_check(s)) { 7934 TCGv_i64 rd = tcg_temp_new_i64(); 7935 TCGv_i64 rn = tcg_temp_new_i64(); 7936 7937 read_vec_element(s, rn, a->rn, 0, (esz + 1) | sign); 7938 fns[esz](rd, rn, a->imm); 7939 if (zext) { 7940 tcg_gen_ext_i64(rd, rd, esz); 7941 } 7942 write_fp_dreg(s, a->rd, rd); 7943 } 7944 return true; 7945 } 7946 7947 TRANS(SQSHRN_si, do_scalar_shift_imm_narrow, a, sqshrn_fns, MO_SIGN, true) 7948 TRANS(SQRSHRN_si, do_scalar_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN, true) 7949 TRANS(UQSHRN_si, do_scalar_shift_imm_narrow, a, uqshrn_fns, 0, false) 7950 TRANS(UQRSHRN_si, do_scalar_shift_imm_narrow, a, uqrshrn_fns, 0, false) 7951 TRANS(SQSHRUN_si, do_scalar_shift_imm_narrow, a, sqshrun_fns, MO_SIGN, false) 7952 TRANS(SQRSHRUN_si, do_scalar_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN, false) 7953 7954 static bool do_div(DisasContext *s, arg_rrr_sf *a, bool is_signed) 7955 { 7956 TCGv_i64 tcg_n, tcg_m, tcg_rd; 7957 tcg_rd = cpu_reg(s, a->rd); 7958 7959 if (!a->sf && is_signed) { 7960 tcg_n = tcg_temp_new_i64(); 7961 tcg_m = tcg_temp_new_i64(); 7962 tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, a->rn)); 7963 tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, a->rm)); 7964 } else { 7965 tcg_n = read_cpu_reg(s, a->rn, a->sf); 7966 tcg_m = read_cpu_reg(s, a->rm, a->sf); 7967 } 7968 7969 if (is_signed) { 7970 gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m); 7971 } else { 7972 gen_helper_udiv64(tcg_rd, tcg_n, tcg_m); 7973 } 7974 7975 if (!a->sf) { /* zero extend final result */ 7976 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 7977 } 7978 return true; 7979 } 7980 7981 TRANS(SDIV, do_div, a, true) 7982 TRANS(UDIV, do_div, a, false) 7983 7984 /* Shift a TCGv src by TCGv shift_amount, put result in dst. 7985 * Note that it is the caller's responsibility to ensure that the 7986 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM 7987 * mandated semantics for out of range shifts. 7988 */ 7989 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf, 7990 enum a64_shift_type shift_type, TCGv_i64 shift_amount) 7991 { 7992 switch (shift_type) { 7993 case A64_SHIFT_TYPE_LSL: 7994 tcg_gen_shl_i64(dst, src, shift_amount); 7995 break; 7996 case A64_SHIFT_TYPE_LSR: 7997 tcg_gen_shr_i64(dst, src, shift_amount); 7998 break; 7999 case A64_SHIFT_TYPE_ASR: 8000 if (!sf) { 8001 tcg_gen_ext32s_i64(dst, src); 8002 } 8003 tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount); 8004 break; 8005 case A64_SHIFT_TYPE_ROR: 8006 if (sf) { 8007 tcg_gen_rotr_i64(dst, src, shift_amount); 8008 } else { 8009 TCGv_i32 t0, t1; 8010 t0 = tcg_temp_new_i32(); 8011 t1 = tcg_temp_new_i32(); 8012 tcg_gen_extrl_i64_i32(t0, src); 8013 tcg_gen_extrl_i64_i32(t1, shift_amount); 8014 tcg_gen_rotr_i32(t0, t0, t1); 8015 tcg_gen_extu_i32_i64(dst, t0); 8016 } 8017 break; 8018 default: 8019 assert(FALSE); /* all shift types should be handled */ 8020 break; 8021 } 8022 8023 if (!sf) { /* zero extend final result */ 8024 tcg_gen_ext32u_i64(dst, dst); 8025 } 8026 } 8027 8028 /* Shift a TCGv src by immediate, put result in dst. 8029 * The shift amount must be in range (this should always be true as the 8030 * relevant instructions will UNDEF on bad shift immediates). 8031 */ 8032 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf, 8033 enum a64_shift_type shift_type, unsigned int shift_i) 8034 { 8035 assert(shift_i < (sf ? 64 : 32)); 8036 8037 if (shift_i == 0) { 8038 tcg_gen_mov_i64(dst, src); 8039 } else { 8040 shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i)); 8041 } 8042 } 8043 8044 static bool do_shift_reg(DisasContext *s, arg_rrr_sf *a, 8045 enum a64_shift_type shift_type) 8046 { 8047 TCGv_i64 tcg_shift = tcg_temp_new_i64(); 8048 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8049 TCGv_i64 tcg_rn = read_cpu_reg(s, a->rn, a->sf); 8050 8051 tcg_gen_andi_i64(tcg_shift, cpu_reg(s, a->rm), a->sf ? 63 : 31); 8052 shift_reg(tcg_rd, tcg_rn, a->sf, shift_type, tcg_shift); 8053 return true; 8054 } 8055 8056 TRANS(LSLV, do_shift_reg, a, A64_SHIFT_TYPE_LSL) 8057 TRANS(LSRV, do_shift_reg, a, A64_SHIFT_TYPE_LSR) 8058 TRANS(ASRV, do_shift_reg, a, A64_SHIFT_TYPE_ASR) 8059 TRANS(RORV, do_shift_reg, a, A64_SHIFT_TYPE_ROR) 8060 8061 static bool do_crc32(DisasContext *s, arg_rrr_e *a, bool crc32c) 8062 { 8063 TCGv_i64 tcg_acc, tcg_val, tcg_rd; 8064 TCGv_i32 tcg_bytes; 8065 8066 switch (a->esz) { 8067 case MO_8: 8068 case MO_16: 8069 case MO_32: 8070 tcg_val = tcg_temp_new_i64(); 8071 tcg_gen_extract_i64(tcg_val, cpu_reg(s, a->rm), 0, 8 << a->esz); 8072 break; 8073 case MO_64: 8074 tcg_val = cpu_reg(s, a->rm); 8075 break; 8076 default: 8077 g_assert_not_reached(); 8078 } 8079 tcg_acc = cpu_reg(s, a->rn); 8080 tcg_bytes = tcg_constant_i32(1 << a->esz); 8081 tcg_rd = cpu_reg(s, a->rd); 8082 8083 if (crc32c) { 8084 gen_helper_crc32c_64(tcg_rd, tcg_acc, tcg_val, tcg_bytes); 8085 } else { 8086 gen_helper_crc32_64(tcg_rd, tcg_acc, tcg_val, tcg_bytes); 8087 } 8088 return true; 8089 } 8090 8091 TRANS_FEAT(CRC32, aa64_crc32, do_crc32, a, false) 8092 TRANS_FEAT(CRC32C, aa64_crc32, do_crc32, a, true) 8093 8094 static bool do_subp(DisasContext *s, arg_rrr *a, bool setflag) 8095 { 8096 TCGv_i64 tcg_n = read_cpu_reg_sp(s, a->rn, true); 8097 TCGv_i64 tcg_m = read_cpu_reg_sp(s, a->rm, true); 8098 TCGv_i64 tcg_d = cpu_reg(s, a->rd); 8099 8100 tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56); 8101 tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56); 8102 8103 if (setflag) { 8104 gen_sub_CC(true, tcg_d, tcg_n, tcg_m); 8105 } else { 8106 tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m); 8107 } 8108 return true; 8109 } 8110 8111 TRANS_FEAT(SUBP, aa64_mte_insn_reg, do_subp, a, false) 8112 TRANS_FEAT(SUBPS, aa64_mte_insn_reg, do_subp, a, true) 8113 8114 static bool trans_IRG(DisasContext *s, arg_rrr *a) 8115 { 8116 if (dc_isar_feature(aa64_mte_insn_reg, s)) { 8117 TCGv_i64 tcg_rd = cpu_reg_sp(s, a->rd); 8118 TCGv_i64 tcg_rn = cpu_reg_sp(s, a->rn); 8119 8120 if (s->ata[0]) { 8121 gen_helper_irg(tcg_rd, tcg_env, tcg_rn, cpu_reg(s, a->rm)); 8122 } else { 8123 gen_address_with_allocation_tag0(tcg_rd, tcg_rn); 8124 } 8125 return true; 8126 } 8127 return false; 8128 } 8129 8130 static bool trans_GMI(DisasContext *s, arg_rrr *a) 8131 { 8132 if (dc_isar_feature(aa64_mte_insn_reg, s)) { 8133 TCGv_i64 t = tcg_temp_new_i64(); 8134 8135 tcg_gen_extract_i64(t, cpu_reg_sp(s, a->rn), 56, 4); 8136 tcg_gen_shl_i64(t, tcg_constant_i64(1), t); 8137 tcg_gen_or_i64(cpu_reg(s, a->rd), cpu_reg(s, a->rm), t); 8138 return true; 8139 } 8140 return false; 8141 } 8142 8143 static bool trans_PACGA(DisasContext *s, arg_rrr *a) 8144 { 8145 if (dc_isar_feature(aa64_pauth, s)) { 8146 gen_helper_pacga(cpu_reg(s, a->rd), tcg_env, 8147 cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm)); 8148 return true; 8149 } 8150 return false; 8151 } 8152 8153 typedef void ArithOneOp(TCGv_i64, TCGv_i64); 8154 8155 static bool gen_rr(DisasContext *s, int rd, int rn, ArithOneOp fn) 8156 { 8157 fn(cpu_reg(s, rd), cpu_reg(s, rn)); 8158 return true; 8159 } 8160 8161 static void gen_rbit32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8162 { 8163 TCGv_i32 t32 = tcg_temp_new_i32(); 8164 8165 tcg_gen_extrl_i64_i32(t32, tcg_rn); 8166 gen_helper_rbit(t32, t32); 8167 tcg_gen_extu_i32_i64(tcg_rd, t32); 8168 } 8169 8170 static void gen_rev16_xx(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 mask) 8171 { 8172 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 8173 8174 tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8); 8175 tcg_gen_and_i64(tcg_rd, tcg_rn, mask); 8176 tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask); 8177 tcg_gen_shli_i64(tcg_rd, tcg_rd, 8); 8178 tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp); 8179 } 8180 8181 static void gen_rev16_32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8182 { 8183 gen_rev16_xx(tcg_rd, tcg_rn, tcg_constant_i64(0x00ff00ff)); 8184 } 8185 8186 static void gen_rev16_64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8187 { 8188 gen_rev16_xx(tcg_rd, tcg_rn, tcg_constant_i64(0x00ff00ff00ff00ffull)); 8189 } 8190 8191 static void gen_rev_32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8192 { 8193 tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ); 8194 } 8195 8196 static void gen_rev32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8197 { 8198 tcg_gen_bswap64_i64(tcg_rd, tcg_rn); 8199 tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32); 8200 } 8201 8202 TRANS(RBIT, gen_rr, a->rd, a->rn, a->sf ? gen_helper_rbit64 : gen_rbit32) 8203 TRANS(REV16, gen_rr, a->rd, a->rn, a->sf ? gen_rev16_64 : gen_rev16_32) 8204 TRANS(REV32, gen_rr, a->rd, a->rn, a->sf ? gen_rev32 : gen_rev_32) 8205 TRANS(REV64, gen_rr, a->rd, a->rn, tcg_gen_bswap64_i64) 8206 8207 static void gen_clz32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8208 { 8209 TCGv_i32 t32 = tcg_temp_new_i32(); 8210 8211 tcg_gen_extrl_i64_i32(t32, tcg_rn); 8212 tcg_gen_clzi_i32(t32, t32, 32); 8213 tcg_gen_extu_i32_i64(tcg_rd, t32); 8214 } 8215 8216 static void gen_clz64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8217 { 8218 tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64); 8219 } 8220 8221 static void gen_cls32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8222 { 8223 TCGv_i32 t32 = tcg_temp_new_i32(); 8224 8225 tcg_gen_extrl_i64_i32(t32, tcg_rn); 8226 tcg_gen_clrsb_i32(t32, t32); 8227 tcg_gen_extu_i32_i64(tcg_rd, t32); 8228 } 8229 8230 TRANS(CLZ, gen_rr, a->rd, a->rn, a->sf ? gen_clz64 : gen_clz32) 8231 TRANS(CLS, gen_rr, a->rd, a->rn, a->sf ? tcg_gen_clrsb_i64 : gen_cls32) 8232 8233 static bool gen_pacaut(DisasContext *s, arg_pacaut *a, NeonGenTwo64OpEnvFn fn) 8234 { 8235 TCGv_i64 tcg_rd, tcg_rn; 8236 8237 if (a->z) { 8238 if (a->rn != 31) { 8239 return false; 8240 } 8241 tcg_rn = tcg_constant_i64(0); 8242 } else { 8243 tcg_rn = cpu_reg_sp(s, a->rn); 8244 } 8245 if (s->pauth_active) { 8246 tcg_rd = cpu_reg(s, a->rd); 8247 fn(tcg_rd, tcg_env, tcg_rd, tcg_rn); 8248 } 8249 return true; 8250 } 8251 8252 TRANS_FEAT(PACIA, aa64_pauth, gen_pacaut, a, gen_helper_pacia) 8253 TRANS_FEAT(PACIB, aa64_pauth, gen_pacaut, a, gen_helper_pacib) 8254 TRANS_FEAT(PACDA, aa64_pauth, gen_pacaut, a, gen_helper_pacda) 8255 TRANS_FEAT(PACDB, aa64_pauth, gen_pacaut, a, gen_helper_pacdb) 8256 8257 TRANS_FEAT(AUTIA, aa64_pauth, gen_pacaut, a, gen_helper_autia) 8258 TRANS_FEAT(AUTIB, aa64_pauth, gen_pacaut, a, gen_helper_autib) 8259 TRANS_FEAT(AUTDA, aa64_pauth, gen_pacaut, a, gen_helper_autda) 8260 TRANS_FEAT(AUTDB, aa64_pauth, gen_pacaut, a, gen_helper_autdb) 8261 8262 static bool do_xpac(DisasContext *s, int rd, NeonGenOne64OpEnvFn *fn) 8263 { 8264 if (s->pauth_active) { 8265 TCGv_i64 tcg_rd = cpu_reg(s, rd); 8266 fn(tcg_rd, tcg_env, tcg_rd); 8267 } 8268 return true; 8269 } 8270 8271 TRANS_FEAT(XPACI, aa64_pauth, do_xpac, a->rd, gen_helper_xpaci) 8272 TRANS_FEAT(XPACD, aa64_pauth, do_xpac, a->rd, gen_helper_xpacd) 8273 8274 static bool do_logic_reg(DisasContext *s, arg_logic_shift *a, 8275 ArithTwoOp *fn, ArithTwoOp *inv_fn, bool setflags) 8276 { 8277 TCGv_i64 tcg_rd, tcg_rn, tcg_rm; 8278 8279 if (!a->sf && (a->sa & (1 << 5))) { 8280 return false; 8281 } 8282 8283 tcg_rd = cpu_reg(s, a->rd); 8284 tcg_rn = cpu_reg(s, a->rn); 8285 8286 tcg_rm = read_cpu_reg(s, a->rm, a->sf); 8287 if (a->sa) { 8288 shift_reg_imm(tcg_rm, tcg_rm, a->sf, a->st, a->sa); 8289 } 8290 8291 (a->n ? inv_fn : fn)(tcg_rd, tcg_rn, tcg_rm); 8292 if (!a->sf) { 8293 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 8294 } 8295 if (setflags) { 8296 gen_logic_CC(a->sf, tcg_rd); 8297 } 8298 return true; 8299 } 8300 8301 static bool trans_ORR_r(DisasContext *s, arg_logic_shift *a) 8302 { 8303 /* 8304 * Unshifted ORR and ORN with WZR/XZR is the standard encoding for 8305 * register-register MOV and MVN, so it is worth special casing. 8306 */ 8307 if (a->sa == 0 && a->st == 0 && a->rn == 31) { 8308 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8309 TCGv_i64 tcg_rm = cpu_reg(s, a->rm); 8310 8311 if (a->n) { 8312 tcg_gen_not_i64(tcg_rd, tcg_rm); 8313 if (!a->sf) { 8314 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 8315 } 8316 } else { 8317 if (a->sf) { 8318 tcg_gen_mov_i64(tcg_rd, tcg_rm); 8319 } else { 8320 tcg_gen_ext32u_i64(tcg_rd, tcg_rm); 8321 } 8322 } 8323 return true; 8324 } 8325 8326 return do_logic_reg(s, a, tcg_gen_or_i64, tcg_gen_orc_i64, false); 8327 } 8328 8329 TRANS(AND_r, do_logic_reg, a, tcg_gen_and_i64, tcg_gen_andc_i64, false) 8330 TRANS(ANDS_r, do_logic_reg, a, tcg_gen_and_i64, tcg_gen_andc_i64, true) 8331 TRANS(EOR_r, do_logic_reg, a, tcg_gen_xor_i64, tcg_gen_eqv_i64, false) 8332 8333 static bool do_addsub_ext(DisasContext *s, arg_addsub_ext *a, 8334 bool sub_op, bool setflags) 8335 { 8336 TCGv_i64 tcg_rm, tcg_rn, tcg_rd, tcg_result; 8337 8338 if (a->sa > 4) { 8339 return false; 8340 } 8341 8342 /* non-flag setting ops may use SP */ 8343 if (!setflags) { 8344 tcg_rd = cpu_reg_sp(s, a->rd); 8345 } else { 8346 tcg_rd = cpu_reg(s, a->rd); 8347 } 8348 tcg_rn = read_cpu_reg_sp(s, a->rn, a->sf); 8349 8350 tcg_rm = read_cpu_reg(s, a->rm, a->sf); 8351 ext_and_shift_reg(tcg_rm, tcg_rm, a->st, a->sa); 8352 8353 tcg_result = tcg_temp_new_i64(); 8354 if (!setflags) { 8355 if (sub_op) { 8356 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm); 8357 } else { 8358 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm); 8359 } 8360 } else { 8361 if (sub_op) { 8362 gen_sub_CC(a->sf, tcg_result, tcg_rn, tcg_rm); 8363 } else { 8364 gen_add_CC(a->sf, tcg_result, tcg_rn, tcg_rm); 8365 } 8366 } 8367 8368 if (a->sf) { 8369 tcg_gen_mov_i64(tcg_rd, tcg_result); 8370 } else { 8371 tcg_gen_ext32u_i64(tcg_rd, tcg_result); 8372 } 8373 return true; 8374 } 8375 8376 TRANS(ADD_ext, do_addsub_ext, a, false, false) 8377 TRANS(SUB_ext, do_addsub_ext, a, true, false) 8378 TRANS(ADDS_ext, do_addsub_ext, a, false, true) 8379 TRANS(SUBS_ext, do_addsub_ext, a, true, true) 8380 8381 static bool do_addsub_reg(DisasContext *s, arg_addsub_shift *a, 8382 bool sub_op, bool setflags) 8383 { 8384 TCGv_i64 tcg_rd, tcg_rn, tcg_rm, tcg_result; 8385 8386 if (a->st == 3 || (!a->sf && (a->sa & 32))) { 8387 return false; 8388 } 8389 8390 tcg_rd = cpu_reg(s, a->rd); 8391 tcg_rn = read_cpu_reg(s, a->rn, a->sf); 8392 tcg_rm = read_cpu_reg(s, a->rm, a->sf); 8393 8394 shift_reg_imm(tcg_rm, tcg_rm, a->sf, a->st, a->sa); 8395 8396 tcg_result = tcg_temp_new_i64(); 8397 if (!setflags) { 8398 if (sub_op) { 8399 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm); 8400 } else { 8401 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm); 8402 } 8403 } else { 8404 if (sub_op) { 8405 gen_sub_CC(a->sf, tcg_result, tcg_rn, tcg_rm); 8406 } else { 8407 gen_add_CC(a->sf, tcg_result, tcg_rn, tcg_rm); 8408 } 8409 } 8410 8411 if (a->sf) { 8412 tcg_gen_mov_i64(tcg_rd, tcg_result); 8413 } else { 8414 tcg_gen_ext32u_i64(tcg_rd, tcg_result); 8415 } 8416 return true; 8417 } 8418 8419 TRANS(ADD_r, do_addsub_reg, a, false, false) 8420 TRANS(SUB_r, do_addsub_reg, a, true, false) 8421 TRANS(ADDS_r, do_addsub_reg, a, false, true) 8422 TRANS(SUBS_r, do_addsub_reg, a, true, true) 8423 8424 static bool do_mulh(DisasContext *s, arg_rrr *a, 8425 void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64)) 8426 { 8427 TCGv_i64 discard = tcg_temp_new_i64(); 8428 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8429 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 8430 TCGv_i64 tcg_rm = cpu_reg(s, a->rm); 8431 8432 fn(discard, tcg_rd, tcg_rn, tcg_rm); 8433 return true; 8434 } 8435 8436 TRANS(SMULH, do_mulh, a, tcg_gen_muls2_i64) 8437 TRANS(UMULH, do_mulh, a, tcg_gen_mulu2_i64) 8438 8439 static bool do_muladd(DisasContext *s, arg_rrrr *a, 8440 bool sf, bool is_sub, MemOp mop) 8441 { 8442 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8443 TCGv_i64 tcg_op1, tcg_op2; 8444 8445 if (mop == MO_64) { 8446 tcg_op1 = cpu_reg(s, a->rn); 8447 tcg_op2 = cpu_reg(s, a->rm); 8448 } else { 8449 tcg_op1 = tcg_temp_new_i64(); 8450 tcg_op2 = tcg_temp_new_i64(); 8451 tcg_gen_ext_i64(tcg_op1, cpu_reg(s, a->rn), mop); 8452 tcg_gen_ext_i64(tcg_op2, cpu_reg(s, a->rm), mop); 8453 } 8454 8455 if (a->ra == 31 && !is_sub) { 8456 /* Special-case MADD with rA == XZR; it is the standard MUL alias */ 8457 tcg_gen_mul_i64(tcg_rd, tcg_op1, tcg_op2); 8458 } else { 8459 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 8460 TCGv_i64 tcg_ra = cpu_reg(s, a->ra); 8461 8462 tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2); 8463 if (is_sub) { 8464 tcg_gen_sub_i64(tcg_rd, tcg_ra, tcg_tmp); 8465 } else { 8466 tcg_gen_add_i64(tcg_rd, tcg_ra, tcg_tmp); 8467 } 8468 } 8469 8470 if (!sf) { 8471 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 8472 } 8473 return true; 8474 } 8475 8476 TRANS(MADD_w, do_muladd, a, false, false, MO_64) 8477 TRANS(MSUB_w, do_muladd, a, false, true, MO_64) 8478 TRANS(MADD_x, do_muladd, a, true, false, MO_64) 8479 TRANS(MSUB_x, do_muladd, a, true, true, MO_64) 8480 8481 TRANS(SMADDL, do_muladd, a, true, false, MO_SL) 8482 TRANS(SMSUBL, do_muladd, a, true, true, MO_SL) 8483 TRANS(UMADDL, do_muladd, a, true, false, MO_UL) 8484 TRANS(UMSUBL, do_muladd, a, true, true, MO_UL) 8485 8486 static bool do_adc_sbc(DisasContext *s, arg_rrr_sf *a, 8487 bool is_sub, bool setflags) 8488 { 8489 TCGv_i64 tcg_y, tcg_rn, tcg_rd; 8490 8491 tcg_rd = cpu_reg(s, a->rd); 8492 tcg_rn = cpu_reg(s, a->rn); 8493 8494 if (is_sub) { 8495 tcg_y = tcg_temp_new_i64(); 8496 tcg_gen_not_i64(tcg_y, cpu_reg(s, a->rm)); 8497 } else { 8498 tcg_y = cpu_reg(s, a->rm); 8499 } 8500 8501 if (setflags) { 8502 gen_adc_CC(a->sf, tcg_rd, tcg_rn, tcg_y); 8503 } else { 8504 gen_adc(a->sf, tcg_rd, tcg_rn, tcg_y); 8505 } 8506 return true; 8507 } 8508 8509 TRANS(ADC, do_adc_sbc, a, false, false) 8510 TRANS(SBC, do_adc_sbc, a, true, false) 8511 TRANS(ADCS, do_adc_sbc, a, false, true) 8512 TRANS(SBCS, do_adc_sbc, a, true, true) 8513 8514 static bool trans_RMIF(DisasContext *s, arg_RMIF *a) 8515 { 8516 int mask = a->mask; 8517 TCGv_i64 tcg_rn; 8518 TCGv_i32 nzcv; 8519 8520 if (!dc_isar_feature(aa64_condm_4, s)) { 8521 return false; 8522 } 8523 8524 tcg_rn = read_cpu_reg(s, a->rn, 1); 8525 tcg_gen_rotri_i64(tcg_rn, tcg_rn, a->imm); 8526 8527 nzcv = tcg_temp_new_i32(); 8528 tcg_gen_extrl_i64_i32(nzcv, tcg_rn); 8529 8530 if (mask & 8) { /* N */ 8531 tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3); 8532 } 8533 if (mask & 4) { /* Z */ 8534 tcg_gen_not_i32(cpu_ZF, nzcv); 8535 tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4); 8536 } 8537 if (mask & 2) { /* C */ 8538 tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1); 8539 } 8540 if (mask & 1) { /* V */ 8541 tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0); 8542 } 8543 return true; 8544 } 8545 8546 static bool do_setf(DisasContext *s, int rn, int shift) 8547 { 8548 TCGv_i32 tmp = tcg_temp_new_i32(); 8549 8550 tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn)); 8551 tcg_gen_shli_i32(cpu_NF, tmp, shift); 8552 tcg_gen_shli_i32(cpu_VF, tmp, shift - 1); 8553 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 8554 tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF); 8555 return true; 8556 } 8557 8558 TRANS_FEAT(SETF8, aa64_condm_4, do_setf, a->rn, 24) 8559 TRANS_FEAT(SETF16, aa64_condm_4, do_setf, a->rn, 16) 8560 8561 /* CCMP, CCMN */ 8562 static bool trans_CCMP(DisasContext *s, arg_CCMP *a) 8563 { 8564 TCGv_i32 tcg_t0 = tcg_temp_new_i32(); 8565 TCGv_i32 tcg_t1 = tcg_temp_new_i32(); 8566 TCGv_i32 tcg_t2 = tcg_temp_new_i32(); 8567 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 8568 TCGv_i64 tcg_rn, tcg_y; 8569 DisasCompare c; 8570 unsigned nzcv; 8571 bool has_andc; 8572 8573 /* Set T0 = !COND. */ 8574 arm_test_cc(&c, a->cond); 8575 tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0); 8576 8577 /* Load the arguments for the new comparison. */ 8578 if (a->imm) { 8579 tcg_y = tcg_constant_i64(a->y); 8580 } else { 8581 tcg_y = cpu_reg(s, a->y); 8582 } 8583 tcg_rn = cpu_reg(s, a->rn); 8584 8585 /* Set the flags for the new comparison. */ 8586 if (a->op) { 8587 gen_sub_CC(a->sf, tcg_tmp, tcg_rn, tcg_y); 8588 } else { 8589 gen_add_CC(a->sf, tcg_tmp, tcg_rn, tcg_y); 8590 } 8591 8592 /* 8593 * If COND was false, force the flags to #nzcv. Compute two masks 8594 * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0). 8595 * For tcg hosts that support ANDC, we can make do with just T1. 8596 * In either case, allow the tcg optimizer to delete any unused mask. 8597 */ 8598 tcg_gen_neg_i32(tcg_t1, tcg_t0); 8599 tcg_gen_subi_i32(tcg_t2, tcg_t0, 1); 8600 8601 nzcv = a->nzcv; 8602 has_andc = tcg_op_supported(INDEX_op_andc_i32, TCG_TYPE_I32, 0); 8603 if (nzcv & 8) { /* N */ 8604 tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1); 8605 } else { 8606 if (has_andc) { 8607 tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1); 8608 } else { 8609 tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2); 8610 } 8611 } 8612 if (nzcv & 4) { /* Z */ 8613 if (has_andc) { 8614 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1); 8615 } else { 8616 tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2); 8617 } 8618 } else { 8619 tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0); 8620 } 8621 if (nzcv & 2) { /* C */ 8622 tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0); 8623 } else { 8624 if (has_andc) { 8625 tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1); 8626 } else { 8627 tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2); 8628 } 8629 } 8630 if (nzcv & 1) { /* V */ 8631 tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1); 8632 } else { 8633 if (has_andc) { 8634 tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1); 8635 } else { 8636 tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2); 8637 } 8638 } 8639 return true; 8640 } 8641 8642 static bool trans_CSEL(DisasContext *s, arg_CSEL *a) 8643 { 8644 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8645 TCGv_i64 zero = tcg_constant_i64(0); 8646 DisasCompare64 c; 8647 8648 a64_test_cc(&c, a->cond); 8649 8650 if (a->rn == 31 && a->rm == 31 && (a->else_inc ^ a->else_inv)) { 8651 /* CSET & CSETM. */ 8652 if (a->else_inv) { 8653 tcg_gen_negsetcond_i64(tcg_invert_cond(c.cond), 8654 tcg_rd, c.value, zero); 8655 } else { 8656 tcg_gen_setcond_i64(tcg_invert_cond(c.cond), 8657 tcg_rd, c.value, zero); 8658 } 8659 } else { 8660 TCGv_i64 t_true = cpu_reg(s, a->rn); 8661 TCGv_i64 t_false = read_cpu_reg(s, a->rm, 1); 8662 8663 if (a->else_inv && a->else_inc) { 8664 tcg_gen_neg_i64(t_false, t_false); 8665 } else if (a->else_inv) { 8666 tcg_gen_not_i64(t_false, t_false); 8667 } else if (a->else_inc) { 8668 tcg_gen_addi_i64(t_false, t_false, 1); 8669 } 8670 tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false); 8671 } 8672 8673 if (!a->sf) { 8674 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 8675 } 8676 return true; 8677 } 8678 8679 typedef struct FPScalar1Int { 8680 void (*gen_h)(TCGv_i32, TCGv_i32); 8681 void (*gen_s)(TCGv_i32, TCGv_i32); 8682 void (*gen_d)(TCGv_i64, TCGv_i64); 8683 } FPScalar1Int; 8684 8685 static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a, 8686 const FPScalar1Int *f, 8687 bool merging) 8688 { 8689 switch (a->esz) { 8690 case MO_64: 8691 if (fp_access_check(s)) { 8692 TCGv_i64 t = read_fp_dreg(s, a->rn); 8693 f->gen_d(t, t); 8694 if (merging) { 8695 write_fp_dreg_merging(s, a->rd, a->rd, t); 8696 } else { 8697 write_fp_dreg(s, a->rd, t); 8698 } 8699 } 8700 break; 8701 case MO_32: 8702 if (fp_access_check(s)) { 8703 TCGv_i32 t = read_fp_sreg(s, a->rn); 8704 f->gen_s(t, t); 8705 if (merging) { 8706 write_fp_sreg_merging(s, a->rd, a->rd, t); 8707 } else { 8708 write_fp_sreg(s, a->rd, t); 8709 } 8710 } 8711 break; 8712 case MO_16: 8713 if (!dc_isar_feature(aa64_fp16, s)) { 8714 return false; 8715 } 8716 if (fp_access_check(s)) { 8717 TCGv_i32 t = read_fp_hreg(s, a->rn); 8718 f->gen_h(t, t); 8719 if (merging) { 8720 write_fp_hreg_merging(s, a->rd, a->rd, t); 8721 } else { 8722 write_fp_sreg(s, a->rd, t); 8723 } 8724 } 8725 break; 8726 default: 8727 return false; 8728 } 8729 return true; 8730 } 8731 8732 static bool do_fp1_scalar_int_2fn(DisasContext *s, arg_rr_e *a, 8733 const FPScalar1Int *fnormal, 8734 const FPScalar1Int *fah) 8735 { 8736 return do_fp1_scalar_int(s, a, s->fpcr_ah ? fah : fnormal, true); 8737 } 8738 8739 static const FPScalar1Int f_scalar_fmov = { 8740 tcg_gen_mov_i32, 8741 tcg_gen_mov_i32, 8742 tcg_gen_mov_i64, 8743 }; 8744 TRANS(FMOV_s, do_fp1_scalar_int, a, &f_scalar_fmov, false) 8745 8746 static const FPScalar1Int f_scalar_fabs = { 8747 gen_vfp_absh, 8748 gen_vfp_abss, 8749 gen_vfp_absd, 8750 }; 8751 static const FPScalar1Int f_scalar_ah_fabs = { 8752 gen_vfp_ah_absh, 8753 gen_vfp_ah_abss, 8754 gen_vfp_ah_absd, 8755 }; 8756 TRANS(FABS_s, do_fp1_scalar_int_2fn, a, &f_scalar_fabs, &f_scalar_ah_fabs) 8757 8758 static const FPScalar1Int f_scalar_fneg = { 8759 gen_vfp_negh, 8760 gen_vfp_negs, 8761 gen_vfp_negd, 8762 }; 8763 static const FPScalar1Int f_scalar_ah_fneg = { 8764 gen_vfp_ah_negh, 8765 gen_vfp_ah_negs, 8766 gen_vfp_ah_negd, 8767 }; 8768 TRANS(FNEG_s, do_fp1_scalar_int_2fn, a, &f_scalar_fneg, &f_scalar_ah_fneg) 8769 8770 typedef struct FPScalar1 { 8771 void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_ptr); 8772 void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_ptr); 8773 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_ptr); 8774 } FPScalar1; 8775 8776 static bool do_fp1_scalar_with_fpsttype(DisasContext *s, arg_rr_e *a, 8777 const FPScalar1 *f, int rmode, 8778 ARMFPStatusFlavour fpsttype) 8779 { 8780 TCGv_i32 tcg_rmode = NULL; 8781 TCGv_ptr fpst; 8782 TCGv_i64 t64; 8783 TCGv_i32 t32; 8784 int check = fp_access_check_scalar_hsd(s, a->esz); 8785 8786 if (check <= 0) { 8787 return check == 0; 8788 } 8789 8790 fpst = fpstatus_ptr(fpsttype); 8791 if (rmode >= 0) { 8792 tcg_rmode = gen_set_rmode(rmode, fpst); 8793 } 8794 8795 switch (a->esz) { 8796 case MO_64: 8797 t64 = read_fp_dreg(s, a->rn); 8798 f->gen_d(t64, t64, fpst); 8799 write_fp_dreg_merging(s, a->rd, a->rd, t64); 8800 break; 8801 case MO_32: 8802 t32 = read_fp_sreg(s, a->rn); 8803 f->gen_s(t32, t32, fpst); 8804 write_fp_sreg_merging(s, a->rd, a->rd, t32); 8805 break; 8806 case MO_16: 8807 t32 = read_fp_hreg(s, a->rn); 8808 f->gen_h(t32, t32, fpst); 8809 write_fp_hreg_merging(s, a->rd, a->rd, t32); 8810 break; 8811 default: 8812 g_assert_not_reached(); 8813 } 8814 8815 if (rmode >= 0) { 8816 gen_restore_rmode(tcg_rmode, fpst); 8817 } 8818 return true; 8819 } 8820 8821 static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a, 8822 const FPScalar1 *f, int rmode) 8823 { 8824 return do_fp1_scalar_with_fpsttype(s, a, f, rmode, 8825 a->esz == MO_16 ? 8826 FPST_A64_F16 : FPST_A64); 8827 } 8828 8829 static bool do_fp1_scalar_ah(DisasContext *s, arg_rr_e *a, 8830 const FPScalar1 *f, int rmode) 8831 { 8832 return do_fp1_scalar_with_fpsttype(s, a, f, rmode, select_ah_fpst(s, a->esz)); 8833 } 8834 8835 static const FPScalar1 f_scalar_fsqrt = { 8836 gen_helper_vfp_sqrth, 8837 gen_helper_vfp_sqrts, 8838 gen_helper_vfp_sqrtd, 8839 }; 8840 TRANS(FSQRT_s, do_fp1_scalar, a, &f_scalar_fsqrt, -1) 8841 8842 static const FPScalar1 f_scalar_frint = { 8843 gen_helper_advsimd_rinth, 8844 gen_helper_rints, 8845 gen_helper_rintd, 8846 }; 8847 TRANS(FRINTN_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_TIEEVEN) 8848 TRANS(FRINTP_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_POSINF) 8849 TRANS(FRINTM_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_NEGINF) 8850 TRANS(FRINTZ_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_ZERO) 8851 TRANS(FRINTA_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_TIEAWAY) 8852 TRANS(FRINTI_s, do_fp1_scalar, a, &f_scalar_frint, -1) 8853 8854 static const FPScalar1 f_scalar_frintx = { 8855 gen_helper_advsimd_rinth_exact, 8856 gen_helper_rints_exact, 8857 gen_helper_rintd_exact, 8858 }; 8859 TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1) 8860 8861 static bool trans_BFCVT_s(DisasContext *s, arg_rr_e *a) 8862 { 8863 ARMFPStatusFlavour fpsttype = s->fpcr_ah ? FPST_AH : FPST_A64; 8864 TCGv_i32 t32; 8865 int check; 8866 8867 if (!dc_isar_feature(aa64_bf16, s)) { 8868 return false; 8869 } 8870 8871 check = fp_access_check_scalar_hsd(s, a->esz); 8872 8873 if (check <= 0) { 8874 return check == 0; 8875 } 8876 8877 t32 = read_fp_sreg(s, a->rn); 8878 gen_helper_bfcvt(t32, t32, fpstatus_ptr(fpsttype)); 8879 write_fp_hreg_merging(s, a->rd, a->rd, t32); 8880 return true; 8881 } 8882 8883 static const FPScalar1 f_scalar_frint32 = { 8884 NULL, 8885 gen_helper_frint32_s, 8886 gen_helper_frint32_d, 8887 }; 8888 TRANS_FEAT(FRINT32Z_s, aa64_frint, do_fp1_scalar, a, 8889 &f_scalar_frint32, FPROUNDING_ZERO) 8890 TRANS_FEAT(FRINT32X_s, aa64_frint, do_fp1_scalar, a, &f_scalar_frint32, -1) 8891 8892 static const FPScalar1 f_scalar_frint64 = { 8893 NULL, 8894 gen_helper_frint64_s, 8895 gen_helper_frint64_d, 8896 }; 8897 TRANS_FEAT(FRINT64Z_s, aa64_frint, do_fp1_scalar, a, 8898 &f_scalar_frint64, FPROUNDING_ZERO) 8899 TRANS_FEAT(FRINT64X_s, aa64_frint, do_fp1_scalar, a, &f_scalar_frint64, -1) 8900 8901 static const FPScalar1 f_scalar_frecpe = { 8902 gen_helper_recpe_f16, 8903 gen_helper_recpe_f32, 8904 gen_helper_recpe_f64, 8905 }; 8906 static const FPScalar1 f_scalar_frecpe_rpres = { 8907 gen_helper_recpe_f16, 8908 gen_helper_recpe_rpres_f32, 8909 gen_helper_recpe_f64, 8910 }; 8911 TRANS(FRECPE_s, do_fp1_scalar_ah, a, 8912 s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? 8913 &f_scalar_frecpe_rpres : &f_scalar_frecpe, -1) 8914 8915 static const FPScalar1 f_scalar_frecpx = { 8916 gen_helper_frecpx_f16, 8917 gen_helper_frecpx_f32, 8918 gen_helper_frecpx_f64, 8919 }; 8920 TRANS(FRECPX_s, do_fp1_scalar_ah, a, &f_scalar_frecpx, -1) 8921 8922 static const FPScalar1 f_scalar_frsqrte = { 8923 gen_helper_rsqrte_f16, 8924 gen_helper_rsqrte_f32, 8925 gen_helper_rsqrte_f64, 8926 }; 8927 static const FPScalar1 f_scalar_frsqrte_rpres = { 8928 gen_helper_rsqrte_f16, 8929 gen_helper_rsqrte_rpres_f32, 8930 gen_helper_rsqrte_f64, 8931 }; 8932 TRANS(FRSQRTE_s, do_fp1_scalar_ah, a, 8933 s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? 8934 &f_scalar_frsqrte_rpres : &f_scalar_frsqrte, -1) 8935 8936 static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a) 8937 { 8938 if (fp_access_check(s)) { 8939 TCGv_i32 tcg_rn = read_fp_sreg(s, a->rn); 8940 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 8941 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 8942 8943 gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, fpst); 8944 write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd); 8945 } 8946 return true; 8947 } 8948 8949 static bool trans_FCVT_s_hs(DisasContext *s, arg_rr *a) 8950 { 8951 if (fp_access_check(s)) { 8952 TCGv_i32 tmp = read_fp_sreg(s, a->rn); 8953 TCGv_i32 ahp = get_ahp_flag(); 8954 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 8955 8956 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp); 8957 /* write_fp_hreg_merging is OK here because top half of result is zero */ 8958 write_fp_hreg_merging(s, a->rd, a->rd, tmp); 8959 } 8960 return true; 8961 } 8962 8963 static bool trans_FCVT_s_sd(DisasContext *s, arg_rr *a) 8964 { 8965 if (fp_access_check(s)) { 8966 TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn); 8967 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 8968 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 8969 8970 gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, fpst); 8971 write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd); 8972 } 8973 return true; 8974 } 8975 8976 static bool trans_FCVT_s_hd(DisasContext *s, arg_rr *a) 8977 { 8978 if (fp_access_check(s)) { 8979 TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn); 8980 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 8981 TCGv_i32 ahp = get_ahp_flag(); 8982 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 8983 8984 gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp); 8985 /* write_fp_hreg_merging is OK here because top half of tcg_rd is zero */ 8986 write_fp_hreg_merging(s, a->rd, a->rd, tcg_rd); 8987 } 8988 return true; 8989 } 8990 8991 static bool trans_FCVT_s_sh(DisasContext *s, arg_rr *a) 8992 { 8993 if (fp_access_check(s)) { 8994 TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn); 8995 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 8996 TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16); 8997 TCGv_i32 tcg_ahp = get_ahp_flag(); 8998 8999 gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); 9000 write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd); 9001 } 9002 return true; 9003 } 9004 9005 static bool trans_FCVT_s_dh(DisasContext *s, arg_rr *a) 9006 { 9007 if (fp_access_check(s)) { 9008 TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn); 9009 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 9010 TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16); 9011 TCGv_i32 tcg_ahp = get_ahp_flag(); 9012 9013 gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); 9014 write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd); 9015 } 9016 return true; 9017 } 9018 9019 static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift, 9020 TCGv_i64 tcg_int, bool is_signed) 9021 { 9022 TCGv_ptr tcg_fpstatus; 9023 TCGv_i32 tcg_shift, tcg_single; 9024 TCGv_i64 tcg_double; 9025 9026 tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); 9027 tcg_shift = tcg_constant_i32(shift); 9028 9029 switch (esz) { 9030 case MO_64: 9031 tcg_double = tcg_temp_new_i64(); 9032 if (is_signed) { 9033 gen_helper_vfp_sqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus); 9034 } else { 9035 gen_helper_vfp_uqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus); 9036 } 9037 write_fp_dreg_merging(s, rd, rd, tcg_double); 9038 break; 9039 9040 case MO_32: 9041 tcg_single = tcg_temp_new_i32(); 9042 if (is_signed) { 9043 gen_helper_vfp_sqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); 9044 } else { 9045 gen_helper_vfp_uqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); 9046 } 9047 write_fp_sreg_merging(s, rd, rd, tcg_single); 9048 break; 9049 9050 case MO_16: 9051 tcg_single = tcg_temp_new_i32(); 9052 if (is_signed) { 9053 gen_helper_vfp_sqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); 9054 } else { 9055 gen_helper_vfp_uqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); 9056 } 9057 write_fp_hreg_merging(s, rd, rd, tcg_single); 9058 break; 9059 9060 default: 9061 g_assert_not_reached(); 9062 } 9063 return true; 9064 } 9065 9066 static bool do_cvtf_g(DisasContext *s, arg_fcvt *a, bool is_signed) 9067 { 9068 TCGv_i64 tcg_int; 9069 int check = fp_access_check_scalar_hsd(s, a->esz); 9070 9071 if (check <= 0) { 9072 return check == 0; 9073 } 9074 9075 if (a->sf) { 9076 tcg_int = cpu_reg(s, a->rn); 9077 } else { 9078 tcg_int = read_cpu_reg(s, a->rn, true); 9079 if (is_signed) { 9080 tcg_gen_ext32s_i64(tcg_int, tcg_int); 9081 } else { 9082 tcg_gen_ext32u_i64(tcg_int, tcg_int); 9083 } 9084 } 9085 return do_cvtf_scalar(s, a->esz, a->rd, a->shift, tcg_int, is_signed); 9086 } 9087 9088 TRANS(SCVTF_g, do_cvtf_g, a, true) 9089 TRANS(UCVTF_g, do_cvtf_g, a, false) 9090 9091 /* 9092 * [US]CVTF (vector), scalar version. 9093 * Which sounds weird, but really just means input from fp register 9094 * instead of input from general register. Input and output element 9095 * size are always equal. 9096 */ 9097 static bool do_cvtf_f(DisasContext *s, arg_fcvt *a, bool is_signed) 9098 { 9099 TCGv_i64 tcg_int; 9100 int check = fp_access_check_scalar_hsd(s, a->esz); 9101 9102 if (check <= 0) { 9103 return check == 0; 9104 } 9105 9106 tcg_int = tcg_temp_new_i64(); 9107 read_vec_element(s, tcg_int, a->rn, 0, a->esz | (is_signed ? MO_SIGN : 0)); 9108 return do_cvtf_scalar(s, a->esz, a->rd, a->shift, tcg_int, is_signed); 9109 } 9110 9111 TRANS(SCVTF_f, do_cvtf_f, a, true) 9112 TRANS(UCVTF_f, do_cvtf_f, a, false) 9113 9114 static void do_fcvt_scalar(DisasContext *s, MemOp out, MemOp esz, 9115 TCGv_i64 tcg_out, int shift, int rn, 9116 ARMFPRounding rmode) 9117 { 9118 TCGv_ptr tcg_fpstatus; 9119 TCGv_i32 tcg_shift, tcg_rmode, tcg_single; 9120 9121 tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); 9122 tcg_shift = tcg_constant_i32(shift); 9123 tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); 9124 9125 switch (esz) { 9126 case MO_64: 9127 read_vec_element(s, tcg_out, rn, 0, MO_64); 9128 switch (out) { 9129 case MO_64 | MO_SIGN: 9130 gen_helper_vfp_tosqd(tcg_out, tcg_out, tcg_shift, tcg_fpstatus); 9131 break; 9132 case MO_64: 9133 gen_helper_vfp_touqd(tcg_out, tcg_out, tcg_shift, tcg_fpstatus); 9134 break; 9135 case MO_32 | MO_SIGN: 9136 gen_helper_vfp_tosld(tcg_out, tcg_out, tcg_shift, tcg_fpstatus); 9137 break; 9138 case MO_32: 9139 gen_helper_vfp_tould(tcg_out, tcg_out, tcg_shift, tcg_fpstatus); 9140 break; 9141 default: 9142 g_assert_not_reached(); 9143 } 9144 break; 9145 9146 case MO_32: 9147 tcg_single = read_fp_sreg(s, rn); 9148 switch (out) { 9149 case MO_64 | MO_SIGN: 9150 gen_helper_vfp_tosqs(tcg_out, tcg_single, tcg_shift, tcg_fpstatus); 9151 break; 9152 case MO_64: 9153 gen_helper_vfp_touqs(tcg_out, tcg_single, tcg_shift, tcg_fpstatus); 9154 break; 9155 case MO_32 | MO_SIGN: 9156 gen_helper_vfp_tosls(tcg_single, tcg_single, 9157 tcg_shift, tcg_fpstatus); 9158 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 9159 break; 9160 case MO_32: 9161 gen_helper_vfp_touls(tcg_single, tcg_single, 9162 tcg_shift, tcg_fpstatus); 9163 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 9164 break; 9165 default: 9166 g_assert_not_reached(); 9167 } 9168 break; 9169 9170 case MO_16: 9171 tcg_single = read_fp_hreg(s, rn); 9172 switch (out) { 9173 case MO_64 | MO_SIGN: 9174 gen_helper_vfp_tosqh(tcg_out, tcg_single, tcg_shift, tcg_fpstatus); 9175 break; 9176 case MO_64: 9177 gen_helper_vfp_touqh(tcg_out, tcg_single, tcg_shift, tcg_fpstatus); 9178 break; 9179 case MO_32 | MO_SIGN: 9180 gen_helper_vfp_toslh(tcg_single, tcg_single, 9181 tcg_shift, tcg_fpstatus); 9182 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 9183 break; 9184 case MO_32: 9185 gen_helper_vfp_toulh(tcg_single, tcg_single, 9186 tcg_shift, tcg_fpstatus); 9187 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 9188 break; 9189 case MO_16 | MO_SIGN: 9190 gen_helper_vfp_toshh(tcg_single, tcg_single, 9191 tcg_shift, tcg_fpstatus); 9192 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 9193 break; 9194 case MO_16: 9195 gen_helper_vfp_touhh(tcg_single, tcg_single, 9196 tcg_shift, tcg_fpstatus); 9197 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 9198 break; 9199 default: 9200 g_assert_not_reached(); 9201 } 9202 break; 9203 9204 default: 9205 g_assert_not_reached(); 9206 } 9207 9208 gen_restore_rmode(tcg_rmode, tcg_fpstatus); 9209 } 9210 9211 static bool do_fcvt_g(DisasContext *s, arg_fcvt *a, 9212 ARMFPRounding rmode, bool is_signed) 9213 { 9214 TCGv_i64 tcg_int; 9215 int check = fp_access_check_scalar_hsd(s, a->esz); 9216 9217 if (check <= 0) { 9218 return check == 0; 9219 } 9220 9221 tcg_int = cpu_reg(s, a->rd); 9222 do_fcvt_scalar(s, (a->sf ? MO_64 : MO_32) | (is_signed ? MO_SIGN : 0), 9223 a->esz, tcg_int, a->shift, a->rn, rmode); 9224 9225 if (!a->sf) { 9226 tcg_gen_ext32u_i64(tcg_int, tcg_int); 9227 } 9228 return true; 9229 } 9230 9231 TRANS(FCVTNS_g, do_fcvt_g, a, FPROUNDING_TIEEVEN, true) 9232 TRANS(FCVTNU_g, do_fcvt_g, a, FPROUNDING_TIEEVEN, false) 9233 TRANS(FCVTPS_g, do_fcvt_g, a, FPROUNDING_POSINF, true) 9234 TRANS(FCVTPU_g, do_fcvt_g, a, FPROUNDING_POSINF, false) 9235 TRANS(FCVTMS_g, do_fcvt_g, a, FPROUNDING_NEGINF, true) 9236 TRANS(FCVTMU_g, do_fcvt_g, a, FPROUNDING_NEGINF, false) 9237 TRANS(FCVTZS_g, do_fcvt_g, a, FPROUNDING_ZERO, true) 9238 TRANS(FCVTZU_g, do_fcvt_g, a, FPROUNDING_ZERO, false) 9239 TRANS(FCVTAS_g, do_fcvt_g, a, FPROUNDING_TIEAWAY, true) 9240 TRANS(FCVTAU_g, do_fcvt_g, a, FPROUNDING_TIEAWAY, false) 9241 9242 /* 9243 * FCVT* (vector), scalar version. 9244 * Which sounds weird, but really just means output to fp register 9245 * instead of output to general register. Input and output element 9246 * size are always equal. 9247 */ 9248 static bool do_fcvt_f(DisasContext *s, arg_fcvt *a, 9249 ARMFPRounding rmode, bool is_signed) 9250 { 9251 TCGv_i64 tcg_int; 9252 int check = fp_access_check_scalar_hsd(s, a->esz); 9253 9254 if (check <= 0) { 9255 return check == 0; 9256 } 9257 9258 tcg_int = tcg_temp_new_i64(); 9259 do_fcvt_scalar(s, a->esz | (is_signed ? MO_SIGN : 0), 9260 a->esz, tcg_int, a->shift, a->rn, rmode); 9261 9262 if (!s->fpcr_nep) { 9263 clear_vec(s, a->rd); 9264 } 9265 write_vec_element(s, tcg_int, a->rd, 0, a->esz); 9266 return true; 9267 } 9268 9269 TRANS(FCVTNS_f, do_fcvt_f, a, FPROUNDING_TIEEVEN, true) 9270 TRANS(FCVTNU_f, do_fcvt_f, a, FPROUNDING_TIEEVEN, false) 9271 TRANS(FCVTPS_f, do_fcvt_f, a, FPROUNDING_POSINF, true) 9272 TRANS(FCVTPU_f, do_fcvt_f, a, FPROUNDING_POSINF, false) 9273 TRANS(FCVTMS_f, do_fcvt_f, a, FPROUNDING_NEGINF, true) 9274 TRANS(FCVTMU_f, do_fcvt_f, a, FPROUNDING_NEGINF, false) 9275 TRANS(FCVTZS_f, do_fcvt_f, a, FPROUNDING_ZERO, true) 9276 TRANS(FCVTZU_f, do_fcvt_f, a, FPROUNDING_ZERO, false) 9277 TRANS(FCVTAS_f, do_fcvt_f, a, FPROUNDING_TIEAWAY, true) 9278 TRANS(FCVTAU_f, do_fcvt_f, a, FPROUNDING_TIEAWAY, false) 9279 9280 static bool trans_FJCVTZS(DisasContext *s, arg_FJCVTZS *a) 9281 { 9282 if (!dc_isar_feature(aa64_jscvt, s)) { 9283 return false; 9284 } 9285 if (fp_access_check(s)) { 9286 TCGv_i64 t = read_fp_dreg(s, a->rn); 9287 TCGv_ptr fpstatus = fpstatus_ptr(FPST_A64); 9288 9289 gen_helper_fjcvtzs(t, t, fpstatus); 9290 9291 tcg_gen_ext32u_i64(cpu_reg(s, a->rd), t); 9292 tcg_gen_extrh_i64_i32(cpu_ZF, t); 9293 tcg_gen_movi_i32(cpu_CF, 0); 9294 tcg_gen_movi_i32(cpu_NF, 0); 9295 tcg_gen_movi_i32(cpu_VF, 0); 9296 } 9297 return true; 9298 } 9299 9300 static bool trans_FMOV_hx(DisasContext *s, arg_rr *a) 9301 { 9302 if (!dc_isar_feature(aa64_fp16, s)) { 9303 return false; 9304 } 9305 if (fp_access_check(s)) { 9306 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 9307 TCGv_i64 tmp = tcg_temp_new_i64(); 9308 tcg_gen_ext16u_i64(tmp, tcg_rn); 9309 write_fp_dreg(s, a->rd, tmp); 9310 } 9311 return true; 9312 } 9313 9314 static bool trans_FMOV_sw(DisasContext *s, arg_rr *a) 9315 { 9316 if (fp_access_check(s)) { 9317 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 9318 TCGv_i64 tmp = tcg_temp_new_i64(); 9319 tcg_gen_ext32u_i64(tmp, tcg_rn); 9320 write_fp_dreg(s, a->rd, tmp); 9321 } 9322 return true; 9323 } 9324 9325 static bool trans_FMOV_dx(DisasContext *s, arg_rr *a) 9326 { 9327 if (fp_access_check(s)) { 9328 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 9329 write_fp_dreg(s, a->rd, tcg_rn); 9330 } 9331 return true; 9332 } 9333 9334 static bool trans_FMOV_ux(DisasContext *s, arg_rr *a) 9335 { 9336 if (fp_access_check(s)) { 9337 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 9338 tcg_gen_st_i64(tcg_rn, tcg_env, fp_reg_hi_offset(s, a->rd)); 9339 clear_vec_high(s, true, a->rd); 9340 } 9341 return true; 9342 } 9343 9344 static bool trans_FMOV_xh(DisasContext *s, arg_rr *a) 9345 { 9346 if (!dc_isar_feature(aa64_fp16, s)) { 9347 return false; 9348 } 9349 if (fp_access_check(s)) { 9350 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 9351 tcg_gen_ld16u_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_16)); 9352 } 9353 return true; 9354 } 9355 9356 static bool trans_FMOV_ws(DisasContext *s, arg_rr *a) 9357 { 9358 if (fp_access_check(s)) { 9359 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 9360 tcg_gen_ld32u_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_32)); 9361 } 9362 return true; 9363 } 9364 9365 static bool trans_FMOV_xd(DisasContext *s, arg_rr *a) 9366 { 9367 if (fp_access_check(s)) { 9368 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 9369 tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_64)); 9370 } 9371 return true; 9372 } 9373 9374 static bool trans_FMOV_xu(DisasContext *s, arg_rr *a) 9375 { 9376 if (fp_access_check(s)) { 9377 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 9378 tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_hi_offset(s, a->rn)); 9379 } 9380 return true; 9381 } 9382 9383 typedef struct ENVScalar1 { 9384 NeonGenOneOpEnvFn *gen_bhs[3]; 9385 NeonGenOne64OpEnvFn *gen_d; 9386 } ENVScalar1; 9387 9388 static bool do_env_scalar1(DisasContext *s, arg_rr_e *a, const ENVScalar1 *f) 9389 { 9390 if (!fp_access_check(s)) { 9391 return true; 9392 } 9393 if (a->esz == MO_64) { 9394 TCGv_i64 t = read_fp_dreg(s, a->rn); 9395 f->gen_d(t, tcg_env, t); 9396 write_fp_dreg(s, a->rd, t); 9397 } else { 9398 TCGv_i32 t = tcg_temp_new_i32(); 9399 9400 read_vec_element_i32(s, t, a->rn, 0, a->esz); 9401 f->gen_bhs[a->esz](t, tcg_env, t); 9402 write_fp_sreg(s, a->rd, t); 9403 } 9404 return true; 9405 } 9406 9407 static bool do_env_vector1(DisasContext *s, arg_qrr_e *a, const ENVScalar1 *f) 9408 { 9409 if (a->esz == MO_64 && !a->q) { 9410 return false; 9411 } 9412 if (!fp_access_check(s)) { 9413 return true; 9414 } 9415 if (a->esz == MO_64) { 9416 TCGv_i64 t = tcg_temp_new_i64(); 9417 9418 for (int i = 0; i < 2; ++i) { 9419 read_vec_element(s, t, a->rn, i, MO_64); 9420 f->gen_d(t, tcg_env, t); 9421 write_vec_element(s, t, a->rd, i, MO_64); 9422 } 9423 } else { 9424 TCGv_i32 t = tcg_temp_new_i32(); 9425 int n = (a->q ? 16 : 8) >> a->esz; 9426 9427 for (int i = 0; i < n; ++i) { 9428 read_vec_element_i32(s, t, a->rn, i, a->esz); 9429 f->gen_bhs[a->esz](t, tcg_env, t); 9430 write_vec_element_i32(s, t, a->rd, i, a->esz); 9431 } 9432 } 9433 clear_vec_high(s, a->q, a->rd); 9434 return true; 9435 } 9436 9437 static const ENVScalar1 f_scalar_sqabs = { 9438 { gen_helper_neon_qabs_s8, 9439 gen_helper_neon_qabs_s16, 9440 gen_helper_neon_qabs_s32 }, 9441 gen_helper_neon_qabs_s64, 9442 }; 9443 TRANS(SQABS_s, do_env_scalar1, a, &f_scalar_sqabs) 9444 TRANS(SQABS_v, do_env_vector1, a, &f_scalar_sqabs) 9445 9446 static const ENVScalar1 f_scalar_sqneg = { 9447 { gen_helper_neon_qneg_s8, 9448 gen_helper_neon_qneg_s16, 9449 gen_helper_neon_qneg_s32 }, 9450 gen_helper_neon_qneg_s64, 9451 }; 9452 TRANS(SQNEG_s, do_env_scalar1, a, &f_scalar_sqneg) 9453 TRANS(SQNEG_v, do_env_vector1, a, &f_scalar_sqneg) 9454 9455 static bool do_scalar1_d(DisasContext *s, arg_rr *a, ArithOneOp *f) 9456 { 9457 if (fp_access_check(s)) { 9458 TCGv_i64 t = read_fp_dreg(s, a->rn); 9459 f(t, t); 9460 write_fp_dreg(s, a->rd, t); 9461 } 9462 return true; 9463 } 9464 9465 TRANS(ABS_s, do_scalar1_d, a, tcg_gen_abs_i64) 9466 TRANS(NEG_s, do_scalar1_d, a, tcg_gen_neg_i64) 9467 9468 static bool do_cmop0_d(DisasContext *s, arg_rr *a, TCGCond cond) 9469 { 9470 if (fp_access_check(s)) { 9471 TCGv_i64 t = read_fp_dreg(s, a->rn); 9472 tcg_gen_negsetcond_i64(cond, t, t, tcg_constant_i64(0)); 9473 write_fp_dreg(s, a->rd, t); 9474 } 9475 return true; 9476 } 9477 9478 TRANS(CMGT0_s, do_cmop0_d, a, TCG_COND_GT) 9479 TRANS(CMGE0_s, do_cmop0_d, a, TCG_COND_GE) 9480 TRANS(CMLE0_s, do_cmop0_d, a, TCG_COND_LE) 9481 TRANS(CMLT0_s, do_cmop0_d, a, TCG_COND_LT) 9482 TRANS(CMEQ0_s, do_cmop0_d, a, TCG_COND_EQ) 9483 9484 static bool do_2misc_narrow_scalar(DisasContext *s, arg_rr_e *a, 9485 ArithOneOp * const fn[3]) 9486 { 9487 if (a->esz == MO_64) { 9488 return false; 9489 } 9490 if (fp_access_check(s)) { 9491 TCGv_i64 t = tcg_temp_new_i64(); 9492 9493 read_vec_element(s, t, a->rn, 0, a->esz + 1); 9494 fn[a->esz](t, t); 9495 clear_vec(s, a->rd); 9496 write_vec_element(s, t, a->rd, 0, a->esz); 9497 } 9498 return true; 9499 } 9500 9501 #define WRAP_ENV(NAME) \ 9502 static void gen_##NAME(TCGv_i64 d, TCGv_i64 n) \ 9503 { gen_helper_##NAME(d, tcg_env, n); } 9504 9505 WRAP_ENV(neon_unarrow_sat8) 9506 WRAP_ENV(neon_unarrow_sat16) 9507 WRAP_ENV(neon_unarrow_sat32) 9508 9509 static ArithOneOp * const f_scalar_sqxtun[] = { 9510 gen_neon_unarrow_sat8, 9511 gen_neon_unarrow_sat16, 9512 gen_neon_unarrow_sat32, 9513 }; 9514 TRANS(SQXTUN_s, do_2misc_narrow_scalar, a, f_scalar_sqxtun) 9515 9516 WRAP_ENV(neon_narrow_sat_s8) 9517 WRAP_ENV(neon_narrow_sat_s16) 9518 WRAP_ENV(neon_narrow_sat_s32) 9519 9520 static ArithOneOp * const f_scalar_sqxtn[] = { 9521 gen_neon_narrow_sat_s8, 9522 gen_neon_narrow_sat_s16, 9523 gen_neon_narrow_sat_s32, 9524 }; 9525 TRANS(SQXTN_s, do_2misc_narrow_scalar, a, f_scalar_sqxtn) 9526 9527 WRAP_ENV(neon_narrow_sat_u8) 9528 WRAP_ENV(neon_narrow_sat_u16) 9529 WRAP_ENV(neon_narrow_sat_u32) 9530 9531 static ArithOneOp * const f_scalar_uqxtn[] = { 9532 gen_neon_narrow_sat_u8, 9533 gen_neon_narrow_sat_u16, 9534 gen_neon_narrow_sat_u32, 9535 }; 9536 TRANS(UQXTN_s, do_2misc_narrow_scalar, a, f_scalar_uqxtn) 9537 9538 static bool trans_FCVTXN_s(DisasContext *s, arg_rr_e *a) 9539 { 9540 if (fp_access_check(s)) { 9541 /* 9542 * 64 bit to 32 bit float conversion 9543 * with von Neumann rounding (round to odd) 9544 */ 9545 TCGv_i64 src = read_fp_dreg(s, a->rn); 9546 TCGv_i32 dst = tcg_temp_new_i32(); 9547 gen_helper_fcvtx_f64_to_f32(dst, src, fpstatus_ptr(FPST_A64)); 9548 write_fp_sreg_merging(s, a->rd, a->rd, dst); 9549 } 9550 return true; 9551 } 9552 9553 #undef WRAP_ENV 9554 9555 static bool do_gvec_fn2(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn) 9556 { 9557 if (!a->q && a->esz == MO_64) { 9558 return false; 9559 } 9560 if (fp_access_check(s)) { 9561 gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz); 9562 } 9563 return true; 9564 } 9565 9566 TRANS(ABS_v, do_gvec_fn2, a, tcg_gen_gvec_abs) 9567 TRANS(NEG_v, do_gvec_fn2, a, tcg_gen_gvec_neg) 9568 TRANS(NOT_v, do_gvec_fn2, a, tcg_gen_gvec_not) 9569 TRANS(CNT_v, do_gvec_fn2, a, gen_gvec_cnt) 9570 TRANS(RBIT_v, do_gvec_fn2, a, gen_gvec_rbit) 9571 TRANS(CMGT0_v, do_gvec_fn2, a, gen_gvec_cgt0) 9572 TRANS(CMGE0_v, do_gvec_fn2, a, gen_gvec_cge0) 9573 TRANS(CMLT0_v, do_gvec_fn2, a, gen_gvec_clt0) 9574 TRANS(CMLE0_v, do_gvec_fn2, a, gen_gvec_cle0) 9575 TRANS(CMEQ0_v, do_gvec_fn2, a, gen_gvec_ceq0) 9576 TRANS(REV16_v, do_gvec_fn2, a, gen_gvec_rev16) 9577 TRANS(REV32_v, do_gvec_fn2, a, gen_gvec_rev32) 9578 TRANS(URECPE_v, do_gvec_fn2, a, gen_gvec_urecpe) 9579 TRANS(URSQRTE_v, do_gvec_fn2, a, gen_gvec_ursqrte) 9580 9581 static bool do_gvec_fn2_bhs(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn) 9582 { 9583 if (a->esz == MO_64) { 9584 return false; 9585 } 9586 if (fp_access_check(s)) { 9587 gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz); 9588 } 9589 return true; 9590 } 9591 9592 TRANS(CLS_v, do_gvec_fn2_bhs, a, gen_gvec_cls) 9593 TRANS(CLZ_v, do_gvec_fn2_bhs, a, gen_gvec_clz) 9594 TRANS(REV64_v, do_gvec_fn2_bhs, a, gen_gvec_rev64) 9595 TRANS(SADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_saddlp) 9596 TRANS(UADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_uaddlp) 9597 TRANS(SADALP_v, do_gvec_fn2_bhs, a, gen_gvec_sadalp) 9598 TRANS(UADALP_v, do_gvec_fn2_bhs, a, gen_gvec_uadalp) 9599 9600 static bool do_2misc_narrow_vector(DisasContext *s, arg_qrr_e *a, 9601 ArithOneOp * const fn[3]) 9602 { 9603 if (a->esz == MO_64) { 9604 return false; 9605 } 9606 if (fp_access_check(s)) { 9607 TCGv_i64 t0 = tcg_temp_new_i64(); 9608 TCGv_i64 t1 = tcg_temp_new_i64(); 9609 9610 read_vec_element(s, t0, a->rn, 0, MO_64); 9611 read_vec_element(s, t1, a->rn, 1, MO_64); 9612 fn[a->esz](t0, t0); 9613 fn[a->esz](t1, t1); 9614 write_vec_element(s, t0, a->rd, a->q ? 2 : 0, MO_32); 9615 write_vec_element(s, t1, a->rd, a->q ? 3 : 1, MO_32); 9616 clear_vec_high(s, a->q, a->rd); 9617 } 9618 return true; 9619 } 9620 9621 static ArithOneOp * const f_scalar_xtn[] = { 9622 gen_helper_neon_narrow_u8, 9623 gen_helper_neon_narrow_u16, 9624 tcg_gen_ext32u_i64, 9625 }; 9626 TRANS(XTN, do_2misc_narrow_vector, a, f_scalar_xtn) 9627 TRANS(SQXTUN_v, do_2misc_narrow_vector, a, f_scalar_sqxtun) 9628 TRANS(SQXTN_v, do_2misc_narrow_vector, a, f_scalar_sqxtn) 9629 TRANS(UQXTN_v, do_2misc_narrow_vector, a, f_scalar_uqxtn) 9630 9631 static void gen_fcvtn_hs(TCGv_i64 d, TCGv_i64 n) 9632 { 9633 TCGv_i32 tcg_lo = tcg_temp_new_i32(); 9634 TCGv_i32 tcg_hi = tcg_temp_new_i32(); 9635 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 9636 TCGv_i32 ahp = get_ahp_flag(); 9637 9638 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, n); 9639 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp); 9640 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp); 9641 tcg_gen_deposit_i32(tcg_lo, tcg_lo, tcg_hi, 16, 16); 9642 tcg_gen_extu_i32_i64(d, tcg_lo); 9643 } 9644 9645 static void gen_fcvtn_sd(TCGv_i64 d, TCGv_i64 n) 9646 { 9647 TCGv_i32 tmp = tcg_temp_new_i32(); 9648 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 9649 9650 gen_helper_vfp_fcvtsd(tmp, n, fpst); 9651 tcg_gen_extu_i32_i64(d, tmp); 9652 } 9653 9654 static void gen_fcvtxn_sd(TCGv_i64 d, TCGv_i64 n) 9655 { 9656 /* 9657 * 64 bit to 32 bit float conversion 9658 * with von Neumann rounding (round to odd) 9659 */ 9660 TCGv_i32 tmp = tcg_temp_new_i32(); 9661 gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_A64)); 9662 tcg_gen_extu_i32_i64(d, tmp); 9663 } 9664 9665 static ArithOneOp * const f_vector_fcvtn[] = { 9666 NULL, 9667 gen_fcvtn_hs, 9668 gen_fcvtn_sd, 9669 }; 9670 static ArithOneOp * const f_scalar_fcvtxn[] = { 9671 NULL, 9672 NULL, 9673 gen_fcvtxn_sd, 9674 }; 9675 TRANS(FCVTN_v, do_2misc_narrow_vector, a, f_vector_fcvtn) 9676 TRANS(FCVTXN_v, do_2misc_narrow_vector, a, f_scalar_fcvtxn) 9677 9678 static void gen_bfcvtn_hs(TCGv_i64 d, TCGv_i64 n) 9679 { 9680 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 9681 TCGv_i32 tmp = tcg_temp_new_i32(); 9682 gen_helper_bfcvt_pair(tmp, n, fpst); 9683 tcg_gen_extu_i32_i64(d, tmp); 9684 } 9685 9686 static void gen_bfcvtn_ah_hs(TCGv_i64 d, TCGv_i64 n) 9687 { 9688 TCGv_ptr fpst = fpstatus_ptr(FPST_AH); 9689 TCGv_i32 tmp = tcg_temp_new_i32(); 9690 gen_helper_bfcvt_pair(tmp, n, fpst); 9691 tcg_gen_extu_i32_i64(d, tmp); 9692 } 9693 9694 static ArithOneOp * const f_vector_bfcvtn[2][3] = { 9695 { 9696 NULL, 9697 gen_bfcvtn_hs, 9698 NULL, 9699 }, { 9700 NULL, 9701 gen_bfcvtn_ah_hs, 9702 NULL, 9703 } 9704 }; 9705 TRANS_FEAT(BFCVTN_v, aa64_bf16, do_2misc_narrow_vector, a, 9706 f_vector_bfcvtn[s->fpcr_ah]) 9707 9708 static bool trans_SHLL_v(DisasContext *s, arg_qrr_e *a) 9709 { 9710 static NeonGenWidenFn * const widenfns[3] = { 9711 gen_helper_neon_widen_u8, 9712 gen_helper_neon_widen_u16, 9713 tcg_gen_extu_i32_i64, 9714 }; 9715 NeonGenWidenFn *widenfn; 9716 TCGv_i64 tcg_res[2]; 9717 TCGv_i32 tcg_op; 9718 int part, pass; 9719 9720 if (a->esz == MO_64) { 9721 return false; 9722 } 9723 if (!fp_access_check(s)) { 9724 return true; 9725 } 9726 9727 tcg_op = tcg_temp_new_i32(); 9728 widenfn = widenfns[a->esz]; 9729 part = a->q ? 2 : 0; 9730 9731 for (pass = 0; pass < 2; pass++) { 9732 read_vec_element_i32(s, tcg_op, a->rn, part + pass, MO_32); 9733 tcg_res[pass] = tcg_temp_new_i64(); 9734 widenfn(tcg_res[pass], tcg_op); 9735 tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << a->esz); 9736 } 9737 9738 for (pass = 0; pass < 2; pass++) { 9739 write_vec_element(s, tcg_res[pass], a->rd, pass, MO_64); 9740 } 9741 return true; 9742 } 9743 9744 static bool do_fabs_fneg_v(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn) 9745 { 9746 int check = fp_access_check_vector_hsd(s, a->q, a->esz); 9747 9748 if (check <= 0) { 9749 return check == 0; 9750 } 9751 9752 gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz); 9753 return true; 9754 } 9755 9756 TRANS(FABS_v, do_fabs_fneg_v, a, gen_gvec_fabs) 9757 TRANS(FNEG_v, do_fabs_fneg_v, a, gen_gvec_fneg) 9758 9759 static bool do_fp1_vector(DisasContext *s, arg_qrr_e *a, 9760 const FPScalar1 *f, int rmode) 9761 { 9762 TCGv_i32 tcg_rmode = NULL; 9763 TCGv_ptr fpst; 9764 int check = fp_access_check_vector_hsd(s, a->q, a->esz); 9765 9766 if (check <= 0) { 9767 return check == 0; 9768 } 9769 9770 fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); 9771 if (rmode >= 0) { 9772 tcg_rmode = gen_set_rmode(rmode, fpst); 9773 } 9774 9775 if (a->esz == MO_64) { 9776 TCGv_i64 t64 = tcg_temp_new_i64(); 9777 9778 for (int pass = 0; pass < 2; ++pass) { 9779 read_vec_element(s, t64, a->rn, pass, MO_64); 9780 f->gen_d(t64, t64, fpst); 9781 write_vec_element(s, t64, a->rd, pass, MO_64); 9782 } 9783 } else { 9784 TCGv_i32 t32 = tcg_temp_new_i32(); 9785 void (*gen)(TCGv_i32, TCGv_i32, TCGv_ptr) 9786 = (a->esz == MO_16 ? f->gen_h : f->gen_s); 9787 9788 for (int pass = 0, n = (a->q ? 16 : 8) >> a->esz; pass < n; ++pass) { 9789 read_vec_element_i32(s, t32, a->rn, pass, a->esz); 9790 gen(t32, t32, fpst); 9791 write_vec_element_i32(s, t32, a->rd, pass, a->esz); 9792 } 9793 } 9794 clear_vec_high(s, a->q, a->rd); 9795 9796 if (rmode >= 0) { 9797 gen_restore_rmode(tcg_rmode, fpst); 9798 } 9799 return true; 9800 } 9801 9802 TRANS(FSQRT_v, do_fp1_vector, a, &f_scalar_fsqrt, -1) 9803 9804 TRANS(FRINTN_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_TIEEVEN) 9805 TRANS(FRINTP_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_POSINF) 9806 TRANS(FRINTM_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_NEGINF) 9807 TRANS(FRINTZ_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_ZERO) 9808 TRANS(FRINTA_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_TIEAWAY) 9809 TRANS(FRINTI_v, do_fp1_vector, a, &f_scalar_frint, -1) 9810 TRANS(FRINTX_v, do_fp1_vector, a, &f_scalar_frintx, -1) 9811 9812 TRANS_FEAT(FRINT32Z_v, aa64_frint, do_fp1_vector, a, 9813 &f_scalar_frint32, FPROUNDING_ZERO) 9814 TRANS_FEAT(FRINT32X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint32, -1) 9815 TRANS_FEAT(FRINT64Z_v, aa64_frint, do_fp1_vector, a, 9816 &f_scalar_frint64, FPROUNDING_ZERO) 9817 TRANS_FEAT(FRINT64X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint64, -1) 9818 9819 static bool do_gvec_op2_fpst_with_fpsttype(DisasContext *s, MemOp esz, 9820 bool is_q, int rd, int rn, int data, 9821 gen_helper_gvec_2_ptr * const fns[3], 9822 ARMFPStatusFlavour fpsttype) 9823 { 9824 int check = fp_access_check_vector_hsd(s, is_q, esz); 9825 TCGv_ptr fpst; 9826 9827 if (check <= 0) { 9828 return check == 0; 9829 } 9830 9831 fpst = fpstatus_ptr(fpsttype); 9832 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd), 9833 vec_full_reg_offset(s, rn), fpst, 9834 is_q ? 16 : 8, vec_full_reg_size(s), 9835 data, fns[esz - 1]); 9836 return true; 9837 } 9838 9839 static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q, 9840 int rd, int rn, int data, 9841 gen_helper_gvec_2_ptr * const fns[3]) 9842 { 9843 return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data, fns, 9844 esz == MO_16 ? FPST_A64_F16 : 9845 FPST_A64); 9846 } 9847 9848 static bool do_gvec_op2_ah_fpst(DisasContext *s, MemOp esz, bool is_q, 9849 int rd, int rn, int data, 9850 gen_helper_gvec_2_ptr * const fns[3]) 9851 { 9852 return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data, 9853 fns, select_ah_fpst(s, esz)); 9854 } 9855 9856 static gen_helper_gvec_2_ptr * const f_scvtf_v[] = { 9857 gen_helper_gvec_vcvt_sh, 9858 gen_helper_gvec_vcvt_sf, 9859 gen_helper_gvec_vcvt_sd, 9860 }; 9861 TRANS(SCVTF_vi, do_gvec_op2_fpst, 9862 a->esz, a->q, a->rd, a->rn, 0, f_scvtf_v) 9863 TRANS(SCVTF_vf, do_gvec_op2_fpst, 9864 a->esz, a->q, a->rd, a->rn, a->shift, f_scvtf_v) 9865 9866 static gen_helper_gvec_2_ptr * const f_ucvtf_v[] = { 9867 gen_helper_gvec_vcvt_uh, 9868 gen_helper_gvec_vcvt_uf, 9869 gen_helper_gvec_vcvt_ud, 9870 }; 9871 TRANS(UCVTF_vi, do_gvec_op2_fpst, 9872 a->esz, a->q, a->rd, a->rn, 0, f_ucvtf_v) 9873 TRANS(UCVTF_vf, do_gvec_op2_fpst, 9874 a->esz, a->q, a->rd, a->rn, a->shift, f_ucvtf_v) 9875 9876 static gen_helper_gvec_2_ptr * const f_fcvtzs_vf[] = { 9877 gen_helper_gvec_vcvt_rz_hs, 9878 gen_helper_gvec_vcvt_rz_fs, 9879 gen_helper_gvec_vcvt_rz_ds, 9880 }; 9881 TRANS(FCVTZS_vf, do_gvec_op2_fpst, 9882 a->esz, a->q, a->rd, a->rn, a->shift, f_fcvtzs_vf) 9883 9884 static gen_helper_gvec_2_ptr * const f_fcvtzu_vf[] = { 9885 gen_helper_gvec_vcvt_rz_hu, 9886 gen_helper_gvec_vcvt_rz_fu, 9887 gen_helper_gvec_vcvt_rz_du, 9888 }; 9889 TRANS(FCVTZU_vf, do_gvec_op2_fpst, 9890 a->esz, a->q, a->rd, a->rn, a->shift, f_fcvtzu_vf) 9891 9892 static gen_helper_gvec_2_ptr * const f_fcvt_s_vi[] = { 9893 gen_helper_gvec_vcvt_rm_sh, 9894 gen_helper_gvec_vcvt_rm_ss, 9895 gen_helper_gvec_vcvt_rm_sd, 9896 }; 9897 9898 static gen_helper_gvec_2_ptr * const f_fcvt_u_vi[] = { 9899 gen_helper_gvec_vcvt_rm_uh, 9900 gen_helper_gvec_vcvt_rm_us, 9901 gen_helper_gvec_vcvt_rm_ud, 9902 }; 9903 9904 TRANS(FCVTNS_vi, do_gvec_op2_fpst, 9905 a->esz, a->q, a->rd, a->rn, float_round_nearest_even, f_fcvt_s_vi) 9906 TRANS(FCVTNU_vi, do_gvec_op2_fpst, 9907 a->esz, a->q, a->rd, a->rn, float_round_nearest_even, f_fcvt_u_vi) 9908 TRANS(FCVTPS_vi, do_gvec_op2_fpst, 9909 a->esz, a->q, a->rd, a->rn, float_round_up, f_fcvt_s_vi) 9910 TRANS(FCVTPU_vi, do_gvec_op2_fpst, 9911 a->esz, a->q, a->rd, a->rn, float_round_up, f_fcvt_u_vi) 9912 TRANS(FCVTMS_vi, do_gvec_op2_fpst, 9913 a->esz, a->q, a->rd, a->rn, float_round_down, f_fcvt_s_vi) 9914 TRANS(FCVTMU_vi, do_gvec_op2_fpst, 9915 a->esz, a->q, a->rd, a->rn, float_round_down, f_fcvt_u_vi) 9916 TRANS(FCVTZS_vi, do_gvec_op2_fpst, 9917 a->esz, a->q, a->rd, a->rn, float_round_to_zero, f_fcvt_s_vi) 9918 TRANS(FCVTZU_vi, do_gvec_op2_fpst, 9919 a->esz, a->q, a->rd, a->rn, float_round_to_zero, f_fcvt_u_vi) 9920 TRANS(FCVTAS_vi, do_gvec_op2_fpst, 9921 a->esz, a->q, a->rd, a->rn, float_round_ties_away, f_fcvt_s_vi) 9922 TRANS(FCVTAU_vi, do_gvec_op2_fpst, 9923 a->esz, a->q, a->rd, a->rn, float_round_ties_away, f_fcvt_u_vi) 9924 9925 static gen_helper_gvec_2_ptr * const f_fceq0[] = { 9926 gen_helper_gvec_fceq0_h, 9927 gen_helper_gvec_fceq0_s, 9928 gen_helper_gvec_fceq0_d, 9929 }; 9930 TRANS(FCMEQ0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fceq0) 9931 9932 static gen_helper_gvec_2_ptr * const f_fcgt0[] = { 9933 gen_helper_gvec_fcgt0_h, 9934 gen_helper_gvec_fcgt0_s, 9935 gen_helper_gvec_fcgt0_d, 9936 }; 9937 TRANS(FCMGT0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcgt0) 9938 9939 static gen_helper_gvec_2_ptr * const f_fcge0[] = { 9940 gen_helper_gvec_fcge0_h, 9941 gen_helper_gvec_fcge0_s, 9942 gen_helper_gvec_fcge0_d, 9943 }; 9944 TRANS(FCMGE0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcge0) 9945 9946 static gen_helper_gvec_2_ptr * const f_fclt0[] = { 9947 gen_helper_gvec_fclt0_h, 9948 gen_helper_gvec_fclt0_s, 9949 gen_helper_gvec_fclt0_d, 9950 }; 9951 TRANS(FCMLT0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fclt0) 9952 9953 static gen_helper_gvec_2_ptr * const f_fcle0[] = { 9954 gen_helper_gvec_fcle0_h, 9955 gen_helper_gvec_fcle0_s, 9956 gen_helper_gvec_fcle0_d, 9957 }; 9958 TRANS(FCMLE0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcle0) 9959 9960 static gen_helper_gvec_2_ptr * const f_frecpe[] = { 9961 gen_helper_gvec_frecpe_h, 9962 gen_helper_gvec_frecpe_s, 9963 gen_helper_gvec_frecpe_d, 9964 }; 9965 static gen_helper_gvec_2_ptr * const f_frecpe_rpres[] = { 9966 gen_helper_gvec_frecpe_h, 9967 gen_helper_gvec_frecpe_rpres_s, 9968 gen_helper_gvec_frecpe_d, 9969 }; 9970 TRANS(FRECPE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, 9971 s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frecpe_rpres : f_frecpe) 9972 9973 static gen_helper_gvec_2_ptr * const f_frsqrte[] = { 9974 gen_helper_gvec_frsqrte_h, 9975 gen_helper_gvec_frsqrte_s, 9976 gen_helper_gvec_frsqrte_d, 9977 }; 9978 static gen_helper_gvec_2_ptr * const f_frsqrte_rpres[] = { 9979 gen_helper_gvec_frsqrte_h, 9980 gen_helper_gvec_frsqrte_rpres_s, 9981 gen_helper_gvec_frsqrte_d, 9982 }; 9983 TRANS(FRSQRTE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, 9984 s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frsqrte_rpres : f_frsqrte) 9985 9986 static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a) 9987 { 9988 /* Handle 2-reg-misc ops which are widening (so each size element 9989 * in the source becomes a 2*size element in the destination. 9990 * The only instruction like this is FCVTL. 9991 */ 9992 int pass; 9993 TCGv_ptr fpst; 9994 9995 if (!fp_access_check(s)) { 9996 return true; 9997 } 9998 9999 if (a->esz == MO_64) { 10000 /* 32 -> 64 bit fp conversion */ 10001 TCGv_i64 tcg_res[2]; 10002 TCGv_i32 tcg_op = tcg_temp_new_i32(); 10003 int srcelt = a->q ? 2 : 0; 10004 10005 fpst = fpstatus_ptr(FPST_A64); 10006 10007 for (pass = 0; pass < 2; pass++) { 10008 tcg_res[pass] = tcg_temp_new_i64(); 10009 read_vec_element_i32(s, tcg_op, a->rn, srcelt + pass, MO_32); 10010 gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, fpst); 10011 } 10012 for (pass = 0; pass < 2; pass++) { 10013 write_vec_element(s, tcg_res[pass], a->rd, pass, MO_64); 10014 } 10015 } else { 10016 /* 16 -> 32 bit fp conversion */ 10017 int srcelt = a->q ? 4 : 0; 10018 TCGv_i32 tcg_res[4]; 10019 TCGv_i32 ahp = get_ahp_flag(); 10020 10021 fpst = fpstatus_ptr(FPST_A64_F16); 10022 10023 for (pass = 0; pass < 4; pass++) { 10024 tcg_res[pass] = tcg_temp_new_i32(); 10025 read_vec_element_i32(s, tcg_res[pass], a->rn, srcelt + pass, MO_16); 10026 gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass], 10027 fpst, ahp); 10028 } 10029 for (pass = 0; pass < 4; pass++) { 10030 write_vec_element_i32(s, tcg_res[pass], a->rd, pass, MO_32); 10031 } 10032 } 10033 clear_vec_high(s, true, a->rd); 10034 return true; 10035 } 10036 10037 static bool trans_OK(DisasContext *s, arg_OK *a) 10038 { 10039 return true; 10040 } 10041 10042 static bool trans_FAIL(DisasContext *s, arg_OK *a) 10043 { 10044 s->is_nonstreaming = true; 10045 return true; 10046 } 10047 10048 /** 10049 * btype_destination_ok: 10050 * @insn: The instruction at the branch destination 10051 * @bt: SCTLR_ELx.BT 10052 * @btype: PSTATE.BTYPE, and is non-zero 10053 * 10054 * On a guarded page, there are a limited number of insns 10055 * that may be present at the branch target: 10056 * - branch target identifiers, 10057 * - paciasp, pacibsp, 10058 * - BRK insn 10059 * - HLT insn 10060 * Anything else causes a Branch Target Exception. 10061 * 10062 * Return true if the branch is compatible, false to raise BTITRAP. 10063 */ 10064 static bool btype_destination_ok(uint32_t insn, bool bt, int btype) 10065 { 10066 if ((insn & 0xfffff01fu) == 0xd503201fu) { 10067 /* HINT space */ 10068 switch (extract32(insn, 5, 7)) { 10069 case 0b011001: /* PACIASP */ 10070 case 0b011011: /* PACIBSP */ 10071 /* 10072 * If SCTLR_ELx.BT, then PACI*SP are not compatible 10073 * with btype == 3. Otherwise all btype are ok. 10074 */ 10075 return !bt || btype != 3; 10076 case 0b100000: /* BTI */ 10077 /* Not compatible with any btype. */ 10078 return false; 10079 case 0b100010: /* BTI c */ 10080 /* Not compatible with btype == 3 */ 10081 return btype != 3; 10082 case 0b100100: /* BTI j */ 10083 /* Not compatible with btype == 2 */ 10084 return btype != 2; 10085 case 0b100110: /* BTI jc */ 10086 /* Compatible with any btype. */ 10087 return true; 10088 } 10089 } else { 10090 switch (insn & 0xffe0001fu) { 10091 case 0xd4200000u: /* BRK */ 10092 case 0xd4400000u: /* HLT */ 10093 /* Give priority to the breakpoint exception. */ 10094 return true; 10095 } 10096 } 10097 return false; 10098 } 10099 10100 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, 10101 CPUState *cpu) 10102 { 10103 DisasContext *dc = container_of(dcbase, DisasContext, base); 10104 CPUARMState *env = cpu_env(cpu); 10105 ARMCPU *arm_cpu = env_archcpu(env); 10106 CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb); 10107 int bound, core_mmu_idx; 10108 10109 dc->isar = &arm_cpu->isar; 10110 dc->condjmp = 0; 10111 dc->pc_save = dc->base.pc_first; 10112 dc->aarch64 = true; 10113 dc->thumb = false; 10114 dc->sctlr_b = 0; 10115 dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE; 10116 dc->condexec_mask = 0; 10117 dc->condexec_cond = 0; 10118 core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX); 10119 dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx); 10120 dc->tbii = EX_TBFLAG_A64(tb_flags, TBII); 10121 dc->tbid = EX_TBFLAG_A64(tb_flags, TBID); 10122 dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA); 10123 dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx); 10124 #if !defined(CONFIG_USER_ONLY) 10125 dc->user = (dc->current_el == 0); 10126 #endif 10127 dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL); 10128 dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM); 10129 dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL); 10130 dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE); 10131 dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC); 10132 dc->trap_eret = EX_TBFLAG_A64(tb_flags, TRAP_ERET); 10133 dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL); 10134 dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL); 10135 dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16; 10136 dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16; 10137 dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE); 10138 dc->bt = EX_TBFLAG_A64(tb_flags, BT); 10139 dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE); 10140 dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV); 10141 dc->ata[0] = EX_TBFLAG_A64(tb_flags, ATA); 10142 dc->ata[1] = EX_TBFLAG_A64(tb_flags, ATA0); 10143 dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE); 10144 dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE); 10145 dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM); 10146 dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA); 10147 dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING); 10148 dc->naa = EX_TBFLAG_A64(tb_flags, NAA); 10149 dc->nv = EX_TBFLAG_A64(tb_flags, NV); 10150 dc->nv1 = EX_TBFLAG_A64(tb_flags, NV1); 10151 dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2); 10152 dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20); 10153 dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE); 10154 dc->fpcr_ah = EX_TBFLAG_A64(tb_flags, AH); 10155 dc->fpcr_nep = EX_TBFLAG_A64(tb_flags, NEP); 10156 dc->vec_len = 0; 10157 dc->vec_stride = 0; 10158 dc->cp_regs = arm_cpu->cp_regs; 10159 dc->features = env->features; 10160 dc->dcz_blocksize = arm_cpu->dcz_blocksize; 10161 dc->gm_blocksize = arm_cpu->gm_blocksize; 10162 10163 #ifdef CONFIG_USER_ONLY 10164 /* In sve_probe_page, we assume TBI is enabled. */ 10165 tcg_debug_assert(dc->tbid & 1); 10166 #endif 10167 10168 dc->lse2 = dc_isar_feature(aa64_lse2, dc); 10169 10170 /* Single step state. The code-generation logic here is: 10171 * SS_ACTIVE == 0: 10172 * generate code with no special handling for single-stepping (except 10173 * that anything that can make us go to SS_ACTIVE == 1 must end the TB; 10174 * this happens anyway because those changes are all system register or 10175 * PSTATE writes). 10176 * SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending) 10177 * emit code for one insn 10178 * emit code to clear PSTATE.SS 10179 * emit code to generate software step exception for completed step 10180 * end TB (as usual for having generated an exception) 10181 * SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending) 10182 * emit code to generate a software step exception 10183 * end the TB 10184 */ 10185 dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE); 10186 dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS); 10187 dc->is_ldex = false; 10188 10189 /* Bound the number of insns to execute to those left on the page. */ 10190 bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4; 10191 10192 /* If architectural single step active, limit to 1. */ 10193 if (dc->ss_active) { 10194 bound = 1; 10195 } 10196 dc->base.max_insns = MIN(dc->base.max_insns, bound); 10197 } 10198 10199 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu) 10200 { 10201 } 10202 10203 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) 10204 { 10205 DisasContext *dc = container_of(dcbase, DisasContext, base); 10206 target_ulong pc_arg = dc->base.pc_next; 10207 10208 if (tb_cflags(dcbase->tb) & CF_PCREL) { 10209 pc_arg &= ~TARGET_PAGE_MASK; 10210 } 10211 tcg_gen_insn_start(pc_arg, 0, 0); 10212 dc->insn_start_updated = false; 10213 } 10214 10215 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) 10216 { 10217 DisasContext *s = container_of(dcbase, DisasContext, base); 10218 CPUARMState *env = cpu_env(cpu); 10219 uint64_t pc = s->base.pc_next; 10220 uint32_t insn; 10221 10222 /* Singlestep exceptions have the highest priority. */ 10223 if (s->ss_active && !s->pstate_ss) { 10224 /* Singlestep state is Active-pending. 10225 * If we're in this state at the start of a TB then either 10226 * a) we just took an exception to an EL which is being debugged 10227 * and this is the first insn in the exception handler 10228 * b) debug exceptions were masked and we just unmasked them 10229 * without changing EL (eg by clearing PSTATE.D) 10230 * In either case we're going to take a swstep exception in the 10231 * "did not step an insn" case, and so the syndrome ISV and EX 10232 * bits should be zero. 10233 */ 10234 assert(s->base.num_insns == 1); 10235 gen_swstep_exception(s, 0, 0); 10236 s->base.is_jmp = DISAS_NORETURN; 10237 s->base.pc_next = pc + 4; 10238 return; 10239 } 10240 10241 if (pc & 3) { 10242 /* 10243 * PC alignment fault. This has priority over the instruction abort 10244 * that we would receive from a translation fault via arm_ldl_code. 10245 * This should only be possible after an indirect branch, at the 10246 * start of the TB. 10247 */ 10248 assert(s->base.num_insns == 1); 10249 gen_helper_exception_pc_alignment(tcg_env, tcg_constant_tl(pc)); 10250 s->base.is_jmp = DISAS_NORETURN; 10251 s->base.pc_next = QEMU_ALIGN_UP(pc, 4); 10252 return; 10253 } 10254 10255 s->pc_curr = pc; 10256 insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b); 10257 s->insn = insn; 10258 s->base.pc_next = pc + 4; 10259 10260 s->fp_access_checked = false; 10261 s->sve_access_checked = false; 10262 10263 if (s->pstate_il) { 10264 /* 10265 * Illegal execution state. This has priority over BTI 10266 * exceptions, but comes after instruction abort exceptions. 10267 */ 10268 gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate()); 10269 return; 10270 } 10271 10272 if (dc_isar_feature(aa64_bti, s)) { 10273 if (s->base.num_insns == 1) { 10274 /* First insn can have btype set to non-zero. */ 10275 tcg_debug_assert(s->btype >= 0); 10276 10277 /* 10278 * Note that the Branch Target Exception has fairly high 10279 * priority -- below debugging exceptions but above most 10280 * everything else. This allows us to handle this now 10281 * instead of waiting until the insn is otherwise decoded. 10282 * 10283 * We can check all but the guarded page check here; 10284 * defer the latter to a helper. 10285 */ 10286 if (s->btype != 0 10287 && !btype_destination_ok(insn, s->bt, s->btype)) { 10288 gen_helper_guarded_page_check(tcg_env); 10289 } 10290 } else { 10291 /* Not the first insn: btype must be 0. */ 10292 tcg_debug_assert(s->btype == 0); 10293 } 10294 } 10295 10296 s->is_nonstreaming = false; 10297 if (s->sme_trap_nonstreaming) { 10298 disas_sme_fa64(s, insn); 10299 } 10300 10301 if (!disas_a64(s, insn) && 10302 !disas_sme(s, insn) && 10303 !disas_sve(s, insn)) { 10304 unallocated_encoding(s); 10305 } 10306 10307 /* 10308 * After execution of most insns, btype is reset to 0. 10309 * Note that we set btype == -1 when the insn sets btype. 10310 */ 10311 if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) { 10312 reset_btype(s); 10313 } 10314 } 10315 10316 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) 10317 { 10318 DisasContext *dc = container_of(dcbase, DisasContext, base); 10319 10320 if (unlikely(dc->ss_active)) { 10321 /* Note that this means single stepping WFI doesn't halt the CPU. 10322 * For conditional branch insns this is harmless unreachable code as 10323 * gen_goto_tb() has already handled emitting the debug exception 10324 * (and thus a tb-jump is not possible when singlestepping). 10325 */ 10326 switch (dc->base.is_jmp) { 10327 default: 10328 gen_a64_update_pc(dc, 4); 10329 /* fall through */ 10330 case DISAS_EXIT: 10331 case DISAS_JUMP: 10332 gen_step_complete_exception(dc); 10333 break; 10334 case DISAS_NORETURN: 10335 break; 10336 } 10337 } else { 10338 switch (dc->base.is_jmp) { 10339 case DISAS_NEXT: 10340 case DISAS_TOO_MANY: 10341 gen_goto_tb(dc, 1, 4); 10342 break; 10343 default: 10344 case DISAS_UPDATE_EXIT: 10345 gen_a64_update_pc(dc, 4); 10346 /* fall through */ 10347 case DISAS_EXIT: 10348 tcg_gen_exit_tb(NULL, 0); 10349 break; 10350 case DISAS_UPDATE_NOCHAIN: 10351 gen_a64_update_pc(dc, 4); 10352 /* fall through */ 10353 case DISAS_JUMP: 10354 tcg_gen_lookup_and_goto_ptr(); 10355 break; 10356 case DISAS_NORETURN: 10357 case DISAS_SWI: 10358 break; 10359 case DISAS_WFE: 10360 gen_a64_update_pc(dc, 4); 10361 gen_helper_wfe(tcg_env); 10362 break; 10363 case DISAS_YIELD: 10364 gen_a64_update_pc(dc, 4); 10365 gen_helper_yield(tcg_env); 10366 break; 10367 case DISAS_WFI: 10368 /* 10369 * This is a special case because we don't want to just halt 10370 * the CPU if trying to debug across a WFI. 10371 */ 10372 gen_a64_update_pc(dc, 4); 10373 gen_helper_wfi(tcg_env, tcg_constant_i32(4)); 10374 /* 10375 * The helper doesn't necessarily throw an exception, but we 10376 * must go back to the main loop to check for interrupts anyway. 10377 */ 10378 tcg_gen_exit_tb(NULL, 0); 10379 break; 10380 } 10381 } 10382 } 10383 10384 const TranslatorOps aarch64_translator_ops = { 10385 .init_disas_context = aarch64_tr_init_disas_context, 10386 .tb_start = aarch64_tr_tb_start, 10387 .insn_start = aarch64_tr_insn_start, 10388 .translate_insn = aarch64_tr_translate_insn, 10389 .tb_stop = aarch64_tr_tb_stop, 10390 }; 10391