1 /* 2 * AArch64 translation 3 * 4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de> 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 #include "qemu/osdep.h" 20 #include "exec/target_page.h" 21 #include "translate.h" 22 #include "translate-a64.h" 23 #include "qemu/log.h" 24 #include "arm_ldst.h" 25 #include "semihosting/semihost.h" 26 #include "cpregs.h" 27 28 static TCGv_i64 cpu_X[32]; 29 static TCGv_i64 cpu_pc; 30 31 /* Load/store exclusive handling */ 32 static TCGv_i64 cpu_exclusive_high; 33 34 static const char *regnames[] = { 35 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 36 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 37 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 38 "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp" 39 }; 40 41 enum a64_shift_type { 42 A64_SHIFT_TYPE_LSL = 0, 43 A64_SHIFT_TYPE_LSR = 1, 44 A64_SHIFT_TYPE_ASR = 2, 45 A64_SHIFT_TYPE_ROR = 3 46 }; 47 48 /* 49 * Helpers for extracting complex instruction fields 50 */ 51 52 /* 53 * For load/store with an unsigned 12 bit immediate scaled by the element 54 * size. The input has the immediate field in bits [14:3] and the element 55 * size in [2:0]. 56 */ 57 static int uimm_scaled(DisasContext *s, int x) 58 { 59 unsigned imm = x >> 3; 60 unsigned scale = extract32(x, 0, 3); 61 return imm << scale; 62 } 63 64 /* For load/store memory tags: scale offset by LOG2_TAG_GRANULE */ 65 static int scale_by_log2_tag_granule(DisasContext *s, int x) 66 { 67 return x << LOG2_TAG_GRANULE; 68 } 69 70 /* 71 * Include the generated decoders. 72 */ 73 74 #include "decode-sme-fa64.c.inc" 75 #include "decode-a64.c.inc" 76 77 /* initialize TCG globals. */ 78 void a64_translate_init(void) 79 { 80 int i; 81 82 cpu_pc = tcg_global_mem_new_i64(tcg_env, 83 offsetof(CPUARMState, pc), 84 "pc"); 85 for (i = 0; i < 32; i++) { 86 cpu_X[i] = tcg_global_mem_new_i64(tcg_env, 87 offsetof(CPUARMState, xregs[i]), 88 regnames[i]); 89 } 90 91 cpu_exclusive_high = tcg_global_mem_new_i64(tcg_env, 92 offsetof(CPUARMState, exclusive_high), "exclusive_high"); 93 } 94 95 /* 96 * Return the core mmu_idx to use for A64 load/store insns which 97 * have a "unprivileged load/store" variant. Those insns access 98 * EL0 if executed from an EL which has control over EL0 (usually 99 * EL1) but behave like normal loads and stores if executed from 100 * elsewhere (eg EL3). 101 * 102 * @unpriv : true for the unprivileged encoding; false for the 103 * normal encoding (in which case we will return the same 104 * thing as get_mem_index(). 105 */ 106 static int get_a64_user_mem_index(DisasContext *s, bool unpriv) 107 { 108 /* 109 * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL, 110 * which is the usual mmu_idx for this cpu state. 111 */ 112 ARMMMUIdx useridx = s->mmu_idx; 113 114 if (unpriv && s->unpriv) { 115 /* 116 * We have pre-computed the condition for AccType_UNPRIV. 117 * Therefore we should never get here with a mmu_idx for 118 * which we do not know the corresponding user mmu_idx. 119 */ 120 switch (useridx) { 121 case ARMMMUIdx_E10_1: 122 case ARMMMUIdx_E10_1_PAN: 123 useridx = ARMMMUIdx_E10_0; 124 break; 125 case ARMMMUIdx_E20_2: 126 case ARMMMUIdx_E20_2_PAN: 127 useridx = ARMMMUIdx_E20_0; 128 break; 129 default: 130 g_assert_not_reached(); 131 } 132 } 133 return arm_to_core_mmu_idx(useridx); 134 } 135 136 static void set_btype_raw(int val) 137 { 138 tcg_gen_st_i32(tcg_constant_i32(val), tcg_env, 139 offsetof(CPUARMState, btype)); 140 } 141 142 static void set_btype(DisasContext *s, int val) 143 { 144 /* BTYPE is a 2-bit field, and 0 should be done with reset_btype. */ 145 tcg_debug_assert(val >= 1 && val <= 3); 146 set_btype_raw(val); 147 s->btype = -1; 148 } 149 150 static void reset_btype(DisasContext *s) 151 { 152 if (s->btype != 0) { 153 set_btype_raw(0); 154 s->btype = 0; 155 } 156 } 157 158 static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff) 159 { 160 assert(s->pc_save != -1); 161 if (tb_cflags(s->base.tb) & CF_PCREL) { 162 tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff); 163 } else { 164 tcg_gen_movi_i64(dest, s->pc_curr + diff); 165 } 166 } 167 168 void gen_a64_update_pc(DisasContext *s, target_long diff) 169 { 170 gen_pc_plus_diff(s, cpu_pc, diff); 171 s->pc_save = s->pc_curr + diff; 172 } 173 174 /* 175 * Handle Top Byte Ignore (TBI) bits. 176 * 177 * If address tagging is enabled via the TCR TBI bits: 178 * + for EL2 and EL3 there is only one TBI bit, and if it is set 179 * then the address is zero-extended, clearing bits [63:56] 180 * + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0 181 * and TBI1 controls addresses with bit 55 == 1. 182 * If the appropriate TBI bit is set for the address then 183 * the address is sign-extended from bit 55 into bits [63:56] 184 * 185 * Here We have concatenated TBI{1,0} into tbi. 186 */ 187 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst, 188 TCGv_i64 src, int tbi) 189 { 190 if (tbi == 0) { 191 /* Load unmodified address */ 192 tcg_gen_mov_i64(dst, src); 193 } else if (!regime_has_2_ranges(s->mmu_idx)) { 194 /* Force tag byte to all zero */ 195 tcg_gen_extract_i64(dst, src, 0, 56); 196 } else { 197 /* Sign-extend from bit 55. */ 198 tcg_gen_sextract_i64(dst, src, 0, 56); 199 200 switch (tbi) { 201 case 1: 202 /* tbi0 but !tbi1: only use the extension if positive */ 203 tcg_gen_and_i64(dst, dst, src); 204 break; 205 case 2: 206 /* !tbi0 but tbi1: only use the extension if negative */ 207 tcg_gen_or_i64(dst, dst, src); 208 break; 209 case 3: 210 /* tbi0 and tbi1: always use the extension */ 211 break; 212 default: 213 g_assert_not_reached(); 214 } 215 } 216 } 217 218 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src) 219 { 220 /* 221 * If address tagging is enabled for instructions via the TCR TBI bits, 222 * then loading an address into the PC will clear out any tag. 223 */ 224 gen_top_byte_ignore(s, cpu_pc, src, s->tbii); 225 s->pc_save = -1; 226 } 227 228 /* 229 * Handle MTE and/or TBI. 230 * 231 * For TBI, ideally, we would do nothing. Proper behaviour on fault is 232 * for the tag to be present in the FAR_ELx register. But for user-only 233 * mode we do not have a TLB with which to implement this, so we must 234 * remove the top byte now. 235 * 236 * Always return a fresh temporary that we can increment independently 237 * of the write-back address. 238 */ 239 240 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr) 241 { 242 TCGv_i64 clean = tcg_temp_new_i64(); 243 #ifdef CONFIG_USER_ONLY 244 gen_top_byte_ignore(s, clean, addr, s->tbid); 245 #else 246 tcg_gen_mov_i64(clean, addr); 247 #endif 248 return clean; 249 } 250 251 /* Insert a zero tag into src, with the result at dst. */ 252 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src) 253 { 254 tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4)); 255 } 256 257 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr, 258 MMUAccessType acc, int log2_size) 259 { 260 gen_helper_probe_access(tcg_env, ptr, 261 tcg_constant_i32(acc), 262 tcg_constant_i32(get_mem_index(s)), 263 tcg_constant_i32(1 << log2_size)); 264 } 265 266 /* 267 * For MTE, check a single logical or atomic access. This probes a single 268 * address, the exact one specified. The size and alignment of the access 269 * is not relevant to MTE, per se, but watchpoints do require the size, 270 * and we want to recognize those before making any other changes to state. 271 */ 272 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr, 273 bool is_write, bool tag_checked, 274 MemOp memop, bool is_unpriv, 275 int core_idx) 276 { 277 if (tag_checked && s->mte_active[is_unpriv]) { 278 TCGv_i64 ret; 279 int desc = 0; 280 281 desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx); 282 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 283 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 284 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 285 desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(memop)); 286 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, memop_size(memop) - 1); 287 288 ret = tcg_temp_new_i64(); 289 gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr); 290 291 return ret; 292 } 293 return clean_data_tbi(s, addr); 294 } 295 296 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, 297 bool tag_checked, MemOp memop) 298 { 299 return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, memop, 300 false, get_mem_index(s)); 301 } 302 303 /* 304 * For MTE, check multiple logical sequential accesses. 305 */ 306 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, 307 bool tag_checked, int total_size, MemOp single_mop) 308 { 309 if (tag_checked && s->mte_active[0]) { 310 TCGv_i64 ret; 311 int desc = 0; 312 313 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 314 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 315 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 316 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 317 desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(single_mop)); 318 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, total_size - 1); 319 320 ret = tcg_temp_new_i64(); 321 gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr); 322 323 return ret; 324 } 325 return clean_data_tbi(s, addr); 326 } 327 328 /* 329 * Generate the special alignment check that applies to AccType_ATOMIC 330 * and AccType_ORDERED insns under FEAT_LSE2: the access need not be 331 * naturally aligned, but it must not cross a 16-byte boundary. 332 * See AArch64.CheckAlignment(). 333 */ 334 static void check_lse2_align(DisasContext *s, int rn, int imm, 335 bool is_write, MemOp mop) 336 { 337 TCGv_i32 tmp; 338 TCGv_i64 addr; 339 TCGLabel *over_label; 340 MMUAccessType type; 341 int mmu_idx; 342 343 tmp = tcg_temp_new_i32(); 344 tcg_gen_extrl_i64_i32(tmp, cpu_reg_sp(s, rn)); 345 tcg_gen_addi_i32(tmp, tmp, imm & 15); 346 tcg_gen_andi_i32(tmp, tmp, 15); 347 tcg_gen_addi_i32(tmp, tmp, memop_size(mop)); 348 349 over_label = gen_new_label(); 350 tcg_gen_brcondi_i32(TCG_COND_LEU, tmp, 16, over_label); 351 352 addr = tcg_temp_new_i64(); 353 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm); 354 355 type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD, 356 mmu_idx = get_mem_index(s); 357 gen_helper_unaligned_access(tcg_env, addr, tcg_constant_i32(type), 358 tcg_constant_i32(mmu_idx)); 359 360 gen_set_label(over_label); 361 362 } 363 364 /* Handle the alignment check for AccType_ATOMIC instructions. */ 365 static MemOp check_atomic_align(DisasContext *s, int rn, MemOp mop) 366 { 367 MemOp size = mop & MO_SIZE; 368 369 if (size == MO_8) { 370 return mop; 371 } 372 373 /* 374 * If size == MO_128, this is a LDXP, and the operation is single-copy 375 * atomic for each doubleword, not the entire quadword; it still must 376 * be quadword aligned. 377 */ 378 if (size == MO_128) { 379 return finalize_memop_atom(s, MO_128 | MO_ALIGN, 380 MO_ATOM_IFALIGN_PAIR); 381 } 382 if (dc_isar_feature(aa64_lse2, s)) { 383 check_lse2_align(s, rn, 0, true, mop); 384 } else { 385 mop |= MO_ALIGN; 386 } 387 return finalize_memop(s, mop); 388 } 389 390 /* Handle the alignment check for AccType_ORDERED instructions. */ 391 static MemOp check_ordered_align(DisasContext *s, int rn, int imm, 392 bool is_write, MemOp mop) 393 { 394 MemOp size = mop & MO_SIZE; 395 396 if (size == MO_8) { 397 return mop; 398 } 399 if (size == MO_128) { 400 return finalize_memop_atom(s, MO_128 | MO_ALIGN, 401 MO_ATOM_IFALIGN_PAIR); 402 } 403 if (!dc_isar_feature(aa64_lse2, s)) { 404 mop |= MO_ALIGN; 405 } else if (!s->naa) { 406 check_lse2_align(s, rn, imm, is_write, mop); 407 } 408 return finalize_memop(s, mop); 409 } 410 411 typedef struct DisasCompare64 { 412 TCGCond cond; 413 TCGv_i64 value; 414 } DisasCompare64; 415 416 static void a64_test_cc(DisasCompare64 *c64, int cc) 417 { 418 DisasCompare c32; 419 420 arm_test_cc(&c32, cc); 421 422 /* 423 * Sign-extend the 32-bit value so that the GE/LT comparisons work 424 * properly. The NE/EQ comparisons are also fine with this choice. 425 */ 426 c64->cond = c32.cond; 427 c64->value = tcg_temp_new_i64(); 428 tcg_gen_ext_i32_i64(c64->value, c32.value); 429 } 430 431 static void gen_rebuild_hflags(DisasContext *s) 432 { 433 gen_helper_rebuild_hflags_a64(tcg_env, tcg_constant_i32(s->current_el)); 434 } 435 436 static void gen_exception_internal_insn(DisasContext *s, int excp) 437 { 438 gen_a64_update_pc(s, 0); 439 gen_exception_internal(excp); 440 s->base.is_jmp = DISAS_NORETURN; 441 } 442 443 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome) 444 { 445 gen_a64_update_pc(s, 0); 446 gen_helper_exception_bkpt_insn(tcg_env, tcg_constant_i32(syndrome)); 447 s->base.is_jmp = DISAS_NORETURN; 448 } 449 450 static void gen_step_complete_exception(DisasContext *s) 451 { 452 /* We just completed step of an insn. Move from Active-not-pending 453 * to Active-pending, and then also take the swstep exception. 454 * This corresponds to making the (IMPDEF) choice to prioritize 455 * swstep exceptions over asynchronous exceptions taken to an exception 456 * level where debug is disabled. This choice has the advantage that 457 * we do not need to maintain internal state corresponding to the 458 * ISV/EX syndrome bits between completion of the step and generation 459 * of the exception, and our syndrome information is always correct. 460 */ 461 gen_ss_advance(s); 462 gen_swstep_exception(s, 1, s->is_ldex); 463 s->base.is_jmp = DISAS_NORETURN; 464 } 465 466 static inline bool use_goto_tb(DisasContext *s, uint64_t dest) 467 { 468 if (s->ss_active) { 469 return false; 470 } 471 return translator_use_goto_tb(&s->base, dest); 472 } 473 474 static void gen_goto_tb(DisasContext *s, int n, int64_t diff) 475 { 476 if (use_goto_tb(s, s->pc_curr + diff)) { 477 /* 478 * For pcrel, the pc must always be up-to-date on entry to 479 * the linked TB, so that it can use simple additions for all 480 * further adjustments. For !pcrel, the linked TB is compiled 481 * to know its full virtual address, so we can delay the 482 * update to pc to the unlinked path. A long chain of links 483 * can thus avoid many updates to the PC. 484 */ 485 if (tb_cflags(s->base.tb) & CF_PCREL) { 486 gen_a64_update_pc(s, diff); 487 tcg_gen_goto_tb(n); 488 } else { 489 tcg_gen_goto_tb(n); 490 gen_a64_update_pc(s, diff); 491 } 492 tcg_gen_exit_tb(s->base.tb, n); 493 s->base.is_jmp = DISAS_NORETURN; 494 } else { 495 gen_a64_update_pc(s, diff); 496 if (s->ss_active) { 497 gen_step_complete_exception(s); 498 } else { 499 tcg_gen_lookup_and_goto_ptr(); 500 s->base.is_jmp = DISAS_NORETURN; 501 } 502 } 503 } 504 505 /* 506 * Register access functions 507 * 508 * These functions are used for directly accessing a register in where 509 * changes to the final register value are likely to be made. If you 510 * need to use a register for temporary calculation (e.g. index type 511 * operations) use the read_* form. 512 * 513 * B1.2.1 Register mappings 514 * 515 * In instruction register encoding 31 can refer to ZR (zero register) or 516 * the SP (stack pointer) depending on context. In QEMU's case we map SP 517 * to cpu_X[31] and ZR accesses to a temporary which can be discarded. 518 * This is the point of the _sp forms. 519 */ 520 TCGv_i64 cpu_reg(DisasContext *s, int reg) 521 { 522 if (reg == 31) { 523 TCGv_i64 t = tcg_temp_new_i64(); 524 tcg_gen_movi_i64(t, 0); 525 return t; 526 } else { 527 return cpu_X[reg]; 528 } 529 } 530 531 /* register access for when 31 == SP */ 532 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg) 533 { 534 return cpu_X[reg]; 535 } 536 537 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64 538 * representing the register contents. This TCGv is an auto-freed 539 * temporary so it need not be explicitly freed, and may be modified. 540 */ 541 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf) 542 { 543 TCGv_i64 v = tcg_temp_new_i64(); 544 if (reg != 31) { 545 if (sf) { 546 tcg_gen_mov_i64(v, cpu_X[reg]); 547 } else { 548 tcg_gen_ext32u_i64(v, cpu_X[reg]); 549 } 550 } else { 551 tcg_gen_movi_i64(v, 0); 552 } 553 return v; 554 } 555 556 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf) 557 { 558 TCGv_i64 v = tcg_temp_new_i64(); 559 if (sf) { 560 tcg_gen_mov_i64(v, cpu_X[reg]); 561 } else { 562 tcg_gen_ext32u_i64(v, cpu_X[reg]); 563 } 564 return v; 565 } 566 567 /* Return the offset into CPUARMState of a slice (from 568 * the least significant end) of FP register Qn (ie 569 * Dn, Sn, Hn or Bn). 570 * (Note that this is not the same mapping as for A32; see cpu.h) 571 */ 572 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size) 573 { 574 return vec_reg_offset(s, regno, 0, size); 575 } 576 577 /* Offset of the high half of the 128 bit vector Qn */ 578 static inline int fp_reg_hi_offset(DisasContext *s, int regno) 579 { 580 return vec_reg_offset(s, regno, 1, MO_64); 581 } 582 583 /* Convenience accessors for reading and writing single and double 584 * FP registers. Writing clears the upper parts of the associated 585 * 128 bit vector register, as required by the architecture. 586 * Note that unlike the GP register accessors, the values returned 587 * by the read functions must be manually freed. 588 */ 589 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg) 590 { 591 TCGv_i64 v = tcg_temp_new_i64(); 592 593 tcg_gen_ld_i64(v, tcg_env, fp_reg_offset(s, reg, MO_64)); 594 return v; 595 } 596 597 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg) 598 { 599 TCGv_i32 v = tcg_temp_new_i32(); 600 601 tcg_gen_ld_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32)); 602 return v; 603 } 604 605 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg) 606 { 607 TCGv_i32 v = tcg_temp_new_i32(); 608 609 tcg_gen_ld16u_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16)); 610 return v; 611 } 612 613 static void clear_vec(DisasContext *s, int rd) 614 { 615 unsigned ofs = fp_reg_offset(s, rd, MO_64); 616 unsigned vsz = vec_full_reg_size(s); 617 618 tcg_gen_gvec_dup_imm(MO_64, ofs, vsz, vsz, 0); 619 } 620 621 /* 622 * Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64). 623 * If SVE is not enabled, then there are only 128 bits in the vector. 624 */ 625 static void clear_vec_high(DisasContext *s, bool is_q, int rd) 626 { 627 unsigned ofs = fp_reg_offset(s, rd, MO_64); 628 unsigned vsz = vec_full_reg_size(s); 629 630 /* Nop move, with side effect of clearing the tail. */ 631 tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz); 632 } 633 634 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v) 635 { 636 unsigned ofs = fp_reg_offset(s, reg, MO_64); 637 638 tcg_gen_st_i64(v, tcg_env, ofs); 639 clear_vec_high(s, false, reg); 640 } 641 642 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v) 643 { 644 TCGv_i64 tmp = tcg_temp_new_i64(); 645 646 tcg_gen_extu_i32_i64(tmp, v); 647 write_fp_dreg(s, reg, tmp); 648 } 649 650 /* 651 * Write a double result to 128 bit vector register reg, honouring FPCR.NEP: 652 * - if FPCR.NEP == 0, clear the high elements of reg 653 * - if FPCR.NEP == 1, set the high elements of reg from mergereg 654 * (i.e. merge the result with those high elements) 655 * In either case, SVE register bits above 128 are zeroed (per R_WKYLB). 656 */ 657 static void write_fp_dreg_merging(DisasContext *s, int reg, int mergereg, 658 TCGv_i64 v) 659 { 660 if (!s->fpcr_nep) { 661 write_fp_dreg(s, reg, v); 662 return; 663 } 664 665 /* 666 * Move from mergereg to reg; this sets the high elements and 667 * clears the bits above 128 as a side effect. 668 */ 669 tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg), 670 vec_full_reg_offset(s, mergereg), 671 16, vec_full_reg_size(s)); 672 tcg_gen_st_i64(v, tcg_env, vec_full_reg_offset(s, reg)); 673 } 674 675 /* 676 * Write a single-prec result, but only clear the higher elements 677 * of the destination register if FPCR.NEP is 0; otherwise preserve them. 678 */ 679 static void write_fp_sreg_merging(DisasContext *s, int reg, int mergereg, 680 TCGv_i32 v) 681 { 682 if (!s->fpcr_nep) { 683 write_fp_sreg(s, reg, v); 684 return; 685 } 686 687 tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg), 688 vec_full_reg_offset(s, mergereg), 689 16, vec_full_reg_size(s)); 690 tcg_gen_st_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32)); 691 } 692 693 /* 694 * Write a half-prec result, but only clear the higher elements 695 * of the destination register if FPCR.NEP is 0; otherwise preserve them. 696 * The caller must ensure that the top 16 bits of v are zero. 697 */ 698 static void write_fp_hreg_merging(DisasContext *s, int reg, int mergereg, 699 TCGv_i32 v) 700 { 701 if (!s->fpcr_nep) { 702 write_fp_sreg(s, reg, v); 703 return; 704 } 705 706 tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg), 707 vec_full_reg_offset(s, mergereg), 708 16, vec_full_reg_size(s)); 709 tcg_gen_st16_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16)); 710 } 711 712 /* Expand a 2-operand AdvSIMD vector operation using an expander function. */ 713 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn, 714 GVecGen2Fn *gvec_fn, int vece) 715 { 716 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 717 is_q ? 16 : 8, vec_full_reg_size(s)); 718 } 719 720 /* Expand a 2-operand + immediate AdvSIMD vector operation using 721 * an expander function. 722 */ 723 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn, 724 int64_t imm, GVecGen2iFn *gvec_fn, int vece) 725 { 726 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 727 imm, is_q ? 16 : 8, vec_full_reg_size(s)); 728 } 729 730 /* Expand a 3-operand AdvSIMD vector operation using an expander function. */ 731 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm, 732 GVecGen3Fn *gvec_fn, int vece) 733 { 734 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 735 vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s)); 736 } 737 738 /* Expand a 4-operand AdvSIMD vector operation using an expander function. */ 739 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm, 740 int rx, GVecGen4Fn *gvec_fn, int vece) 741 { 742 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 743 vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx), 744 is_q ? 16 : 8, vec_full_reg_size(s)); 745 } 746 747 /* Expand a 2-operand operation using an out-of-line helper. */ 748 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd, 749 int rn, int data, gen_helper_gvec_2 *fn) 750 { 751 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd), 752 vec_full_reg_offset(s, rn), 753 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 754 } 755 756 /* Expand a 3-operand operation using an out-of-line helper. */ 757 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd, 758 int rn, int rm, int data, gen_helper_gvec_3 *fn) 759 { 760 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 761 vec_full_reg_offset(s, rn), 762 vec_full_reg_offset(s, rm), 763 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 764 } 765 766 /* Expand a 3-operand + fpstatus pointer + simd data value operation using 767 * an out-of-line helper. 768 */ 769 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn, 770 int rm, ARMFPStatusFlavour fpsttype, int data, 771 gen_helper_gvec_3_ptr *fn) 772 { 773 TCGv_ptr fpst = fpstatus_ptr(fpsttype); 774 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 775 vec_full_reg_offset(s, rn), 776 vec_full_reg_offset(s, rm), fpst, 777 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 778 } 779 780 /* Expand a 4-operand operation using an out-of-line helper. */ 781 static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn, 782 int rm, int ra, int data, gen_helper_gvec_4 *fn) 783 { 784 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 785 vec_full_reg_offset(s, rn), 786 vec_full_reg_offset(s, rm), 787 vec_full_reg_offset(s, ra), 788 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 789 } 790 791 /* 792 * Expand a 4-operand operation using an out-of-line helper that takes 793 * a pointer to the CPU env. 794 */ 795 static void gen_gvec_op4_env(DisasContext *s, bool is_q, int rd, int rn, 796 int rm, int ra, int data, 797 gen_helper_gvec_4_ptr *fn) 798 { 799 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 800 vec_full_reg_offset(s, rn), 801 vec_full_reg_offset(s, rm), 802 vec_full_reg_offset(s, ra), 803 tcg_env, 804 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 805 } 806 807 /* 808 * Expand a 4-operand + fpstatus pointer + simd data value operation using 809 * an out-of-line helper. 810 */ 811 static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn, 812 int rm, int ra, ARMFPStatusFlavour fpsttype, 813 int data, 814 gen_helper_gvec_4_ptr *fn) 815 { 816 TCGv_ptr fpst = fpstatus_ptr(fpsttype); 817 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 818 vec_full_reg_offset(s, rn), 819 vec_full_reg_offset(s, rm), 820 vec_full_reg_offset(s, ra), fpst, 821 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 822 } 823 824 /* 825 * When FPCR.AH == 1, NEG and ABS do not flip the sign bit of a NaN. 826 * These functions implement 827 * d = floatN_is_any_nan(s) ? s : floatN_chs(s) 828 * which for float32 is 829 * d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s ^ (1 << 31)) 830 * and similarly for the other float sizes. 831 */ 832 static void gen_vfp_ah_negh(TCGv_i32 d, TCGv_i32 s) 833 { 834 TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32(); 835 836 gen_vfp_negh(chs_s, s); 837 gen_vfp_absh(abs_s, s); 838 tcg_gen_movcond_i32(TCG_COND_GTU, d, 839 abs_s, tcg_constant_i32(0x7c00), 840 s, chs_s); 841 } 842 843 static void gen_vfp_ah_negs(TCGv_i32 d, TCGv_i32 s) 844 { 845 TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32(); 846 847 gen_vfp_negs(chs_s, s); 848 gen_vfp_abss(abs_s, s); 849 tcg_gen_movcond_i32(TCG_COND_GTU, d, 850 abs_s, tcg_constant_i32(0x7f800000UL), 851 s, chs_s); 852 } 853 854 static void gen_vfp_ah_negd(TCGv_i64 d, TCGv_i64 s) 855 { 856 TCGv_i64 abs_s = tcg_temp_new_i64(), chs_s = tcg_temp_new_i64(); 857 858 gen_vfp_negd(chs_s, s); 859 gen_vfp_absd(abs_s, s); 860 tcg_gen_movcond_i64(TCG_COND_GTU, d, 861 abs_s, tcg_constant_i64(0x7ff0000000000000ULL), 862 s, chs_s); 863 } 864 865 /* 866 * These functions implement 867 * d = floatN_is_any_nan(s) ? s : floatN_abs(s) 868 * which for float32 is 869 * d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s & ~(1 << 31)) 870 * and similarly for the other float sizes. 871 */ 872 static void gen_vfp_ah_absh(TCGv_i32 d, TCGv_i32 s) 873 { 874 TCGv_i32 abs_s = tcg_temp_new_i32(); 875 876 gen_vfp_absh(abs_s, s); 877 tcg_gen_movcond_i32(TCG_COND_GTU, d, 878 abs_s, tcg_constant_i32(0x7c00), 879 s, abs_s); 880 } 881 882 static void gen_vfp_ah_abss(TCGv_i32 d, TCGv_i32 s) 883 { 884 TCGv_i32 abs_s = tcg_temp_new_i32(); 885 886 gen_vfp_abss(abs_s, s); 887 tcg_gen_movcond_i32(TCG_COND_GTU, d, 888 abs_s, tcg_constant_i32(0x7f800000UL), 889 s, abs_s); 890 } 891 892 static void gen_vfp_ah_absd(TCGv_i64 d, TCGv_i64 s) 893 { 894 TCGv_i64 abs_s = tcg_temp_new_i64(); 895 896 gen_vfp_absd(abs_s, s); 897 tcg_gen_movcond_i64(TCG_COND_GTU, d, 898 abs_s, tcg_constant_i64(0x7ff0000000000000ULL), 899 s, abs_s); 900 } 901 902 static void gen_vfp_maybe_ah_negh(DisasContext *dc, TCGv_i32 d, TCGv_i32 s) 903 { 904 if (dc->fpcr_ah) { 905 gen_vfp_ah_negh(d, s); 906 } else { 907 gen_vfp_negh(d, s); 908 } 909 } 910 911 static void gen_vfp_maybe_ah_negs(DisasContext *dc, TCGv_i32 d, TCGv_i32 s) 912 { 913 if (dc->fpcr_ah) { 914 gen_vfp_ah_negs(d, s); 915 } else { 916 gen_vfp_negs(d, s); 917 } 918 } 919 920 static void gen_vfp_maybe_ah_negd(DisasContext *dc, TCGv_i64 d, TCGv_i64 s) 921 { 922 if (dc->fpcr_ah) { 923 gen_vfp_ah_negd(d, s); 924 } else { 925 gen_vfp_negd(d, s); 926 } 927 } 928 929 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier 930 * than the 32 bit equivalent. 931 */ 932 static inline void gen_set_NZ64(TCGv_i64 result) 933 { 934 tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result); 935 tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF); 936 } 937 938 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */ 939 static inline void gen_logic_CC(int sf, TCGv_i64 result) 940 { 941 if (sf) { 942 gen_set_NZ64(result); 943 } else { 944 tcg_gen_extrl_i64_i32(cpu_ZF, result); 945 tcg_gen_mov_i32(cpu_NF, cpu_ZF); 946 } 947 tcg_gen_movi_i32(cpu_CF, 0); 948 tcg_gen_movi_i32(cpu_VF, 0); 949 } 950 951 /* dest = T0 + T1; compute C, N, V and Z flags */ 952 static void gen_add64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 953 { 954 TCGv_i64 result, flag, tmp; 955 result = tcg_temp_new_i64(); 956 flag = tcg_temp_new_i64(); 957 tmp = tcg_temp_new_i64(); 958 959 tcg_gen_movi_i64(tmp, 0); 960 tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp); 961 962 tcg_gen_extrl_i64_i32(cpu_CF, flag); 963 964 gen_set_NZ64(result); 965 966 tcg_gen_xor_i64(flag, result, t0); 967 tcg_gen_xor_i64(tmp, t0, t1); 968 tcg_gen_andc_i64(flag, flag, tmp); 969 tcg_gen_extrh_i64_i32(cpu_VF, flag); 970 971 tcg_gen_mov_i64(dest, result); 972 } 973 974 static void gen_add32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 975 { 976 TCGv_i32 t0_32 = tcg_temp_new_i32(); 977 TCGv_i32 t1_32 = tcg_temp_new_i32(); 978 TCGv_i32 tmp = tcg_temp_new_i32(); 979 980 tcg_gen_movi_i32(tmp, 0); 981 tcg_gen_extrl_i64_i32(t0_32, t0); 982 tcg_gen_extrl_i64_i32(t1_32, t1); 983 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp); 984 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 985 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 986 tcg_gen_xor_i32(tmp, t0_32, t1_32); 987 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); 988 tcg_gen_extu_i32_i64(dest, cpu_NF); 989 } 990 991 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 992 { 993 if (sf) { 994 gen_add64_CC(dest, t0, t1); 995 } else { 996 gen_add32_CC(dest, t0, t1); 997 } 998 } 999 1000 /* dest = T0 - T1; compute C, N, V and Z flags */ 1001 static void gen_sub64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 1002 { 1003 /* 64 bit arithmetic */ 1004 TCGv_i64 result, flag, tmp; 1005 1006 result = tcg_temp_new_i64(); 1007 flag = tcg_temp_new_i64(); 1008 tcg_gen_sub_i64(result, t0, t1); 1009 1010 gen_set_NZ64(result); 1011 1012 tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1); 1013 tcg_gen_extrl_i64_i32(cpu_CF, flag); 1014 1015 tcg_gen_xor_i64(flag, result, t0); 1016 tmp = tcg_temp_new_i64(); 1017 tcg_gen_xor_i64(tmp, t0, t1); 1018 tcg_gen_and_i64(flag, flag, tmp); 1019 tcg_gen_extrh_i64_i32(cpu_VF, flag); 1020 tcg_gen_mov_i64(dest, result); 1021 } 1022 1023 static void gen_sub32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 1024 { 1025 /* 32 bit arithmetic */ 1026 TCGv_i32 t0_32 = tcg_temp_new_i32(); 1027 TCGv_i32 t1_32 = tcg_temp_new_i32(); 1028 TCGv_i32 tmp; 1029 1030 tcg_gen_extrl_i64_i32(t0_32, t0); 1031 tcg_gen_extrl_i64_i32(t1_32, t1); 1032 tcg_gen_sub_i32(cpu_NF, t0_32, t1_32); 1033 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 1034 tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32); 1035 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 1036 tmp = tcg_temp_new_i32(); 1037 tcg_gen_xor_i32(tmp, t0_32, t1_32); 1038 tcg_gen_and_i32(cpu_VF, cpu_VF, tmp); 1039 tcg_gen_extu_i32_i64(dest, cpu_NF); 1040 } 1041 1042 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 1043 { 1044 if (sf) { 1045 gen_sub64_CC(dest, t0, t1); 1046 } else { 1047 gen_sub32_CC(dest, t0, t1); 1048 } 1049 } 1050 1051 /* dest = T0 + T1 + CF; do not compute flags. */ 1052 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 1053 { 1054 TCGv_i64 flag = tcg_temp_new_i64(); 1055 tcg_gen_extu_i32_i64(flag, cpu_CF); 1056 tcg_gen_add_i64(dest, t0, t1); 1057 tcg_gen_add_i64(dest, dest, flag); 1058 1059 if (!sf) { 1060 tcg_gen_ext32u_i64(dest, dest); 1061 } 1062 } 1063 1064 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */ 1065 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 1066 { 1067 if (sf) { 1068 TCGv_i64 result = tcg_temp_new_i64(); 1069 TCGv_i64 cf_64 = tcg_temp_new_i64(); 1070 TCGv_i64 vf_64 = tcg_temp_new_i64(); 1071 TCGv_i64 tmp = tcg_temp_new_i64(); 1072 1073 tcg_gen_extu_i32_i64(cf_64, cpu_CF); 1074 tcg_gen_addcio_i64(result, cf_64, t0, t1, cf_64); 1075 tcg_gen_extrl_i64_i32(cpu_CF, cf_64); 1076 gen_set_NZ64(result); 1077 1078 tcg_gen_xor_i64(vf_64, result, t0); 1079 tcg_gen_xor_i64(tmp, t0, t1); 1080 tcg_gen_andc_i64(vf_64, vf_64, tmp); 1081 tcg_gen_extrh_i64_i32(cpu_VF, vf_64); 1082 1083 tcg_gen_mov_i64(dest, result); 1084 } else { 1085 TCGv_i32 t0_32 = tcg_temp_new_i32(); 1086 TCGv_i32 t1_32 = tcg_temp_new_i32(); 1087 TCGv_i32 tmp = tcg_temp_new_i32(); 1088 1089 tcg_gen_extrl_i64_i32(t0_32, t0); 1090 tcg_gen_extrl_i64_i32(t1_32, t1); 1091 tcg_gen_addcio_i32(cpu_NF, cpu_CF, t0_32, t1_32, cpu_CF); 1092 1093 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 1094 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 1095 tcg_gen_xor_i32(tmp, t0_32, t1_32); 1096 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); 1097 tcg_gen_extu_i32_i64(dest, cpu_NF); 1098 } 1099 } 1100 1101 /* 1102 * Load/Store generators 1103 */ 1104 1105 /* 1106 * Store from GPR register to memory. 1107 */ 1108 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source, 1109 TCGv_i64 tcg_addr, MemOp memop, int memidx, 1110 bool iss_valid, 1111 unsigned int iss_srt, 1112 bool iss_sf, bool iss_ar) 1113 { 1114 tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop); 1115 1116 if (iss_valid) { 1117 uint32_t syn; 1118 1119 syn = syn_data_abort_with_iss(0, 1120 (memop & MO_SIZE), 1121 false, 1122 iss_srt, 1123 iss_sf, 1124 iss_ar, 1125 0, 0, 0, 0, 0, false); 1126 disas_set_insn_syndrome(s, syn); 1127 } 1128 } 1129 1130 static void do_gpr_st(DisasContext *s, TCGv_i64 source, 1131 TCGv_i64 tcg_addr, MemOp memop, 1132 bool iss_valid, 1133 unsigned int iss_srt, 1134 bool iss_sf, bool iss_ar) 1135 { 1136 do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s), 1137 iss_valid, iss_srt, iss_sf, iss_ar); 1138 } 1139 1140 /* 1141 * Load from memory to GPR register 1142 */ 1143 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, 1144 MemOp memop, bool extend, int memidx, 1145 bool iss_valid, unsigned int iss_srt, 1146 bool iss_sf, bool iss_ar) 1147 { 1148 tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop); 1149 1150 if (extend && (memop & MO_SIGN)) { 1151 g_assert((memop & MO_SIZE) <= MO_32); 1152 tcg_gen_ext32u_i64(dest, dest); 1153 } 1154 1155 if (iss_valid) { 1156 uint32_t syn; 1157 1158 syn = syn_data_abort_with_iss(0, 1159 (memop & MO_SIZE), 1160 (memop & MO_SIGN) != 0, 1161 iss_srt, 1162 iss_sf, 1163 iss_ar, 1164 0, 0, 0, 0, 0, false); 1165 disas_set_insn_syndrome(s, syn); 1166 } 1167 } 1168 1169 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, 1170 MemOp memop, bool extend, 1171 bool iss_valid, unsigned int iss_srt, 1172 bool iss_sf, bool iss_ar) 1173 { 1174 do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s), 1175 iss_valid, iss_srt, iss_sf, iss_ar); 1176 } 1177 1178 /* 1179 * Store from FP register to memory 1180 */ 1181 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, MemOp mop) 1182 { 1183 /* This writes the bottom N bits of a 128 bit wide vector to memory */ 1184 TCGv_i64 tmplo = tcg_temp_new_i64(); 1185 1186 tcg_gen_ld_i64(tmplo, tcg_env, fp_reg_offset(s, srcidx, MO_64)); 1187 1188 if ((mop & MO_SIZE) < MO_128) { 1189 tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop); 1190 } else { 1191 TCGv_i64 tmphi = tcg_temp_new_i64(); 1192 TCGv_i128 t16 = tcg_temp_new_i128(); 1193 1194 tcg_gen_ld_i64(tmphi, tcg_env, fp_reg_hi_offset(s, srcidx)); 1195 tcg_gen_concat_i64_i128(t16, tmplo, tmphi); 1196 1197 tcg_gen_qemu_st_i128(t16, tcg_addr, get_mem_index(s), mop); 1198 } 1199 } 1200 1201 /* 1202 * Load from memory to FP register 1203 */ 1204 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, MemOp mop) 1205 { 1206 /* This always zero-extends and writes to a full 128 bit wide vector */ 1207 TCGv_i64 tmplo = tcg_temp_new_i64(); 1208 TCGv_i64 tmphi = NULL; 1209 1210 if ((mop & MO_SIZE) < MO_128) { 1211 tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop); 1212 } else { 1213 TCGv_i128 t16 = tcg_temp_new_i128(); 1214 1215 tcg_gen_qemu_ld_i128(t16, tcg_addr, get_mem_index(s), mop); 1216 1217 tmphi = tcg_temp_new_i64(); 1218 tcg_gen_extr_i128_i64(tmplo, tmphi, t16); 1219 } 1220 1221 tcg_gen_st_i64(tmplo, tcg_env, fp_reg_offset(s, destidx, MO_64)); 1222 1223 if (tmphi) { 1224 tcg_gen_st_i64(tmphi, tcg_env, fp_reg_hi_offset(s, destidx)); 1225 } 1226 clear_vec_high(s, tmphi != NULL, destidx); 1227 } 1228 1229 /* 1230 * Vector load/store helpers. 1231 * 1232 * The principal difference between this and a FP load is that we don't 1233 * zero extend as we are filling a partial chunk of the vector register. 1234 * These functions don't support 128 bit loads/stores, which would be 1235 * normal load/store operations. 1236 * 1237 * The _i32 versions are useful when operating on 32 bit quantities 1238 * (eg for floating point single or using Neon helper functions). 1239 */ 1240 1241 /* Get value of an element within a vector register */ 1242 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx, 1243 int element, MemOp memop) 1244 { 1245 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE); 1246 switch ((unsigned)memop) { 1247 case MO_8: 1248 tcg_gen_ld8u_i64(tcg_dest, tcg_env, vect_off); 1249 break; 1250 case MO_16: 1251 tcg_gen_ld16u_i64(tcg_dest, tcg_env, vect_off); 1252 break; 1253 case MO_32: 1254 tcg_gen_ld32u_i64(tcg_dest, tcg_env, vect_off); 1255 break; 1256 case MO_8|MO_SIGN: 1257 tcg_gen_ld8s_i64(tcg_dest, tcg_env, vect_off); 1258 break; 1259 case MO_16|MO_SIGN: 1260 tcg_gen_ld16s_i64(tcg_dest, tcg_env, vect_off); 1261 break; 1262 case MO_32|MO_SIGN: 1263 tcg_gen_ld32s_i64(tcg_dest, tcg_env, vect_off); 1264 break; 1265 case MO_64: 1266 case MO_64|MO_SIGN: 1267 tcg_gen_ld_i64(tcg_dest, tcg_env, vect_off); 1268 break; 1269 default: 1270 g_assert_not_reached(); 1271 } 1272 } 1273 1274 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx, 1275 int element, MemOp memop) 1276 { 1277 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE); 1278 switch (memop) { 1279 case MO_8: 1280 tcg_gen_ld8u_i32(tcg_dest, tcg_env, vect_off); 1281 break; 1282 case MO_16: 1283 tcg_gen_ld16u_i32(tcg_dest, tcg_env, vect_off); 1284 break; 1285 case MO_8|MO_SIGN: 1286 tcg_gen_ld8s_i32(tcg_dest, tcg_env, vect_off); 1287 break; 1288 case MO_16|MO_SIGN: 1289 tcg_gen_ld16s_i32(tcg_dest, tcg_env, vect_off); 1290 break; 1291 case MO_32: 1292 case MO_32|MO_SIGN: 1293 tcg_gen_ld_i32(tcg_dest, tcg_env, vect_off); 1294 break; 1295 default: 1296 g_assert_not_reached(); 1297 } 1298 } 1299 1300 /* Set value of an element within a vector register */ 1301 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx, 1302 int element, MemOp memop) 1303 { 1304 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE); 1305 switch (memop) { 1306 case MO_8: 1307 tcg_gen_st8_i64(tcg_src, tcg_env, vect_off); 1308 break; 1309 case MO_16: 1310 tcg_gen_st16_i64(tcg_src, tcg_env, vect_off); 1311 break; 1312 case MO_32: 1313 tcg_gen_st32_i64(tcg_src, tcg_env, vect_off); 1314 break; 1315 case MO_64: 1316 tcg_gen_st_i64(tcg_src, tcg_env, vect_off); 1317 break; 1318 default: 1319 g_assert_not_reached(); 1320 } 1321 } 1322 1323 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src, 1324 int destidx, int element, MemOp memop) 1325 { 1326 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE); 1327 switch (memop) { 1328 case MO_8: 1329 tcg_gen_st8_i32(tcg_src, tcg_env, vect_off); 1330 break; 1331 case MO_16: 1332 tcg_gen_st16_i32(tcg_src, tcg_env, vect_off); 1333 break; 1334 case MO_32: 1335 tcg_gen_st_i32(tcg_src, tcg_env, vect_off); 1336 break; 1337 default: 1338 g_assert_not_reached(); 1339 } 1340 } 1341 1342 /* Store from vector register to memory */ 1343 static void do_vec_st(DisasContext *s, int srcidx, int element, 1344 TCGv_i64 tcg_addr, MemOp mop) 1345 { 1346 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 1347 1348 read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE); 1349 tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop); 1350 } 1351 1352 /* Load from memory to vector register */ 1353 static void do_vec_ld(DisasContext *s, int destidx, int element, 1354 TCGv_i64 tcg_addr, MemOp mop) 1355 { 1356 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 1357 1358 tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop); 1359 write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE); 1360 } 1361 1362 /* Check that FP/Neon access is enabled. If it is, return 1363 * true. If not, emit code to generate an appropriate exception, 1364 * and return false; the caller should not emit any code for 1365 * the instruction. Note that this check must happen after all 1366 * unallocated-encoding checks (otherwise the syndrome information 1367 * for the resulting exception will be incorrect). 1368 */ 1369 static bool fp_access_check_only(DisasContext *s) 1370 { 1371 if (s->fp_excp_el) { 1372 assert(!s->fp_access_checked); 1373 s->fp_access_checked = -1; 1374 1375 gen_exception_insn_el(s, 0, EXCP_UDEF, 1376 syn_fp_access_trap(1, 0xe, false, 0), 1377 s->fp_excp_el); 1378 return false; 1379 } 1380 s->fp_access_checked = 1; 1381 return true; 1382 } 1383 1384 static bool nonstreaming_check(DisasContext *s) 1385 { 1386 if (s->sme_trap_nonstreaming && s->is_nonstreaming) { 1387 gen_exception_insn(s, 0, EXCP_UDEF, 1388 syn_smetrap(SME_ET_Streaming, false)); 1389 return false; 1390 } 1391 return true; 1392 } 1393 1394 static bool fp_access_check(DisasContext *s) 1395 { 1396 return fp_access_check_only(s) && nonstreaming_check(s); 1397 } 1398 1399 /* 1400 * Return <0 for non-supported element sizes, with MO_16 controlled by 1401 * FEAT_FP16; return 0 for fp disabled; otherwise return >0 for success. 1402 */ 1403 static int fp_access_check_scalar_hsd(DisasContext *s, MemOp esz) 1404 { 1405 switch (esz) { 1406 case MO_64: 1407 case MO_32: 1408 break; 1409 case MO_16: 1410 if (!dc_isar_feature(aa64_fp16, s)) { 1411 return -1; 1412 } 1413 break; 1414 default: 1415 return -1; 1416 } 1417 return fp_access_check(s); 1418 } 1419 1420 /* Likewise, but vector MO_64 must have two elements. */ 1421 static int fp_access_check_vector_hsd(DisasContext *s, bool is_q, MemOp esz) 1422 { 1423 switch (esz) { 1424 case MO_64: 1425 if (!is_q) { 1426 return -1; 1427 } 1428 break; 1429 case MO_32: 1430 break; 1431 case MO_16: 1432 if (!dc_isar_feature(aa64_fp16, s)) { 1433 return -1; 1434 } 1435 break; 1436 default: 1437 return -1; 1438 } 1439 return fp_access_check(s); 1440 } 1441 1442 /* 1443 * Check that SVE access is enabled. If it is, return true. 1444 * If not, emit code to generate an appropriate exception and return false. 1445 * This function corresponds to CheckSVEEnabled(). 1446 */ 1447 bool sve_access_check(DisasContext *s) 1448 { 1449 if (dc_isar_feature(aa64_sme, s)) { 1450 bool ret; 1451 1452 if (s->pstate_sm) { 1453 ret = sme_enabled_check(s); 1454 } else if (dc_isar_feature(aa64_sve, s)) { 1455 goto continue_sve; 1456 } else { 1457 ret = sme_sm_enabled_check(s); 1458 } 1459 if (ret) { 1460 ret = nonstreaming_check(s); 1461 } 1462 s->sve_access_checked = (ret ? 1 : -1); 1463 return ret; 1464 } 1465 1466 continue_sve: 1467 if (s->sve_excp_el) { 1468 /* Assert that we only raise one exception per instruction. */ 1469 assert(!s->sve_access_checked); 1470 gen_exception_insn_el(s, 0, EXCP_UDEF, 1471 syn_sve_access_trap(), s->sve_excp_el); 1472 s->sve_access_checked = -1; 1473 return false; 1474 } 1475 s->sve_access_checked = 1; 1476 return fp_access_check(s); 1477 } 1478 1479 /* 1480 * Check that SME access is enabled, raise an exception if not. 1481 * Note that this function corresponds to CheckSMEAccess and is 1482 * only used directly for cpregs. 1483 */ 1484 static bool sme_access_check(DisasContext *s) 1485 { 1486 if (s->sme_excp_el) { 1487 gen_exception_insn_el(s, 0, EXCP_UDEF, 1488 syn_smetrap(SME_ET_AccessTrap, false), 1489 s->sme_excp_el); 1490 return false; 1491 } 1492 return true; 1493 } 1494 1495 /* This function corresponds to CheckSMEEnabled. */ 1496 bool sme_enabled_check(DisasContext *s) 1497 { 1498 /* 1499 * Note that unlike sve_excp_el, we have not constrained sme_excp_el 1500 * to be zero when fp_excp_el has priority. This is because we need 1501 * sme_excp_el by itself for cpregs access checks. 1502 */ 1503 if (s->sme_excp_el 1504 && (!s->fp_excp_el || s->sme_excp_el <= s->fp_excp_el)) { 1505 bool ret = sme_access_check(s); 1506 s->fp_access_checked = (ret ? 1 : -1); 1507 return ret; 1508 } 1509 return fp_access_check_only(s); 1510 } 1511 1512 /* Common subroutine for CheckSMEAnd*Enabled. */ 1513 bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req) 1514 { 1515 if (!sme_enabled_check(s)) { 1516 return false; 1517 } 1518 if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) { 1519 gen_exception_insn(s, 0, EXCP_UDEF, 1520 syn_smetrap(SME_ET_NotStreaming, false)); 1521 return false; 1522 } 1523 if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) { 1524 gen_exception_insn(s, 0, EXCP_UDEF, 1525 syn_smetrap(SME_ET_InactiveZA, false)); 1526 return false; 1527 } 1528 return true; 1529 } 1530 1531 /* 1532 * Expanders for AdvSIMD translation functions. 1533 */ 1534 1535 static bool do_gvec_op2_ool(DisasContext *s, arg_qrr_e *a, int data, 1536 gen_helper_gvec_2 *fn) 1537 { 1538 if (!a->q && a->esz == MO_64) { 1539 return false; 1540 } 1541 if (fp_access_check(s)) { 1542 gen_gvec_op2_ool(s, a->q, a->rd, a->rn, data, fn); 1543 } 1544 return true; 1545 } 1546 1547 static bool do_gvec_op3_ool(DisasContext *s, arg_qrrr_e *a, int data, 1548 gen_helper_gvec_3 *fn) 1549 { 1550 if (!a->q && a->esz == MO_64) { 1551 return false; 1552 } 1553 if (fp_access_check(s)) { 1554 gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, data, fn); 1555 } 1556 return true; 1557 } 1558 1559 static bool do_gvec_fn3(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn) 1560 { 1561 if (!a->q && a->esz == MO_64) { 1562 return false; 1563 } 1564 if (fp_access_check(s)) { 1565 gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz); 1566 } 1567 return true; 1568 } 1569 1570 static bool do_gvec_fn3_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn) 1571 { 1572 if (a->esz == MO_64) { 1573 return false; 1574 } 1575 if (fp_access_check(s)) { 1576 gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz); 1577 } 1578 return true; 1579 } 1580 1581 static bool do_gvec_fn3_no8_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn) 1582 { 1583 if (a->esz == MO_8) { 1584 return false; 1585 } 1586 return do_gvec_fn3_no64(s, a, fn); 1587 } 1588 1589 static bool do_gvec_fn4(DisasContext *s, arg_qrrrr_e *a, GVecGen4Fn *fn) 1590 { 1591 if (!a->q && a->esz == MO_64) { 1592 return false; 1593 } 1594 if (fp_access_check(s)) { 1595 gen_gvec_fn4(s, a->q, a->rd, a->rn, a->rm, a->ra, fn, a->esz); 1596 } 1597 return true; 1598 } 1599 1600 /* 1601 * This utility function is for doing register extension with an 1602 * optional shift. You will likely want to pass a temporary for the 1603 * destination register. See DecodeRegExtend() in the ARM ARM. 1604 */ 1605 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in, 1606 int option, unsigned int shift) 1607 { 1608 int extsize = extract32(option, 0, 2); 1609 bool is_signed = extract32(option, 2, 1); 1610 1611 tcg_gen_ext_i64(tcg_out, tcg_in, extsize | (is_signed ? MO_SIGN : 0)); 1612 tcg_gen_shli_i64(tcg_out, tcg_out, shift); 1613 } 1614 1615 static inline void gen_check_sp_alignment(DisasContext *s) 1616 { 1617 /* The AArch64 architecture mandates that (if enabled via PSTATE 1618 * or SCTLR bits) there is a check that SP is 16-aligned on every 1619 * SP-relative load or store (with an exception generated if it is not). 1620 * In line with general QEMU practice regarding misaligned accesses, 1621 * we omit these checks for the sake of guest program performance. 1622 * This function is provided as a hook so we can more easily add these 1623 * checks in future (possibly as a "favour catching guest program bugs 1624 * over speed" user selectable option). 1625 */ 1626 } 1627 1628 /* 1629 * The instruction disassembly implemented here matches 1630 * the instruction encoding classifications in chapter C4 1631 * of the ARM Architecture Reference Manual (DDI0487B_a); 1632 * classification names and decode diagrams here should generally 1633 * match up with those in the manual. 1634 */ 1635 1636 static bool trans_B(DisasContext *s, arg_i *a) 1637 { 1638 reset_btype(s); 1639 gen_goto_tb(s, 0, a->imm); 1640 return true; 1641 } 1642 1643 static bool trans_BL(DisasContext *s, arg_i *a) 1644 { 1645 gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s)); 1646 reset_btype(s); 1647 gen_goto_tb(s, 0, a->imm); 1648 return true; 1649 } 1650 1651 1652 static bool trans_CBZ(DisasContext *s, arg_cbz *a) 1653 { 1654 DisasLabel match; 1655 TCGv_i64 tcg_cmp; 1656 1657 tcg_cmp = read_cpu_reg(s, a->rt, a->sf); 1658 reset_btype(s); 1659 1660 match = gen_disas_label(s); 1661 tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ, 1662 tcg_cmp, 0, match.label); 1663 gen_goto_tb(s, 0, 4); 1664 set_disas_label(s, match); 1665 gen_goto_tb(s, 1, a->imm); 1666 return true; 1667 } 1668 1669 static bool trans_TBZ(DisasContext *s, arg_tbz *a) 1670 { 1671 DisasLabel match; 1672 TCGv_i64 tcg_cmp; 1673 1674 tcg_cmp = tcg_temp_new_i64(); 1675 tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, a->rt), 1ULL << a->bitpos); 1676 1677 reset_btype(s); 1678 1679 match = gen_disas_label(s); 1680 tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ, 1681 tcg_cmp, 0, match.label); 1682 gen_goto_tb(s, 0, 4); 1683 set_disas_label(s, match); 1684 gen_goto_tb(s, 1, a->imm); 1685 return true; 1686 } 1687 1688 static bool trans_B_cond(DisasContext *s, arg_B_cond *a) 1689 { 1690 /* BC.cond is only present with FEAT_HBC */ 1691 if (a->c && !dc_isar_feature(aa64_hbc, s)) { 1692 return false; 1693 } 1694 reset_btype(s); 1695 if (a->cond < 0x0e) { 1696 /* genuinely conditional branches */ 1697 DisasLabel match = gen_disas_label(s); 1698 arm_gen_test_cc(a->cond, match.label); 1699 gen_goto_tb(s, 0, 4); 1700 set_disas_label(s, match); 1701 gen_goto_tb(s, 1, a->imm); 1702 } else { 1703 /* 0xe and 0xf are both "always" conditions */ 1704 gen_goto_tb(s, 0, a->imm); 1705 } 1706 return true; 1707 } 1708 1709 static void set_btype_for_br(DisasContext *s, int rn) 1710 { 1711 if (dc_isar_feature(aa64_bti, s)) { 1712 /* BR to {x16,x17} or !guard -> 1, else 3. */ 1713 if (rn == 16 || rn == 17) { 1714 set_btype(s, 1); 1715 } else { 1716 TCGv_i64 pc = tcg_temp_new_i64(); 1717 gen_pc_plus_diff(s, pc, 0); 1718 gen_helper_guarded_page_br(tcg_env, pc); 1719 s->btype = -1; 1720 } 1721 } 1722 } 1723 1724 static void set_btype_for_blr(DisasContext *s) 1725 { 1726 if (dc_isar_feature(aa64_bti, s)) { 1727 /* BLR sets BTYPE to 2, regardless of source guarded page. */ 1728 set_btype(s, 2); 1729 } 1730 } 1731 1732 static bool trans_BR(DisasContext *s, arg_r *a) 1733 { 1734 set_btype_for_br(s, a->rn); 1735 gen_a64_set_pc(s, cpu_reg(s, a->rn)); 1736 s->base.is_jmp = DISAS_JUMP; 1737 return true; 1738 } 1739 1740 static bool trans_BLR(DisasContext *s, arg_r *a) 1741 { 1742 TCGv_i64 dst = cpu_reg(s, a->rn); 1743 TCGv_i64 lr = cpu_reg(s, 30); 1744 if (dst == lr) { 1745 TCGv_i64 tmp = tcg_temp_new_i64(); 1746 tcg_gen_mov_i64(tmp, dst); 1747 dst = tmp; 1748 } 1749 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 1750 gen_a64_set_pc(s, dst); 1751 set_btype_for_blr(s); 1752 s->base.is_jmp = DISAS_JUMP; 1753 return true; 1754 } 1755 1756 static bool trans_RET(DisasContext *s, arg_r *a) 1757 { 1758 gen_a64_set_pc(s, cpu_reg(s, a->rn)); 1759 s->base.is_jmp = DISAS_JUMP; 1760 return true; 1761 } 1762 1763 static TCGv_i64 auth_branch_target(DisasContext *s, TCGv_i64 dst, 1764 TCGv_i64 modifier, bool use_key_a) 1765 { 1766 TCGv_i64 truedst; 1767 /* 1768 * Return the branch target for a BRAA/RETA/etc, which is either 1769 * just the destination dst, or that value with the pauth check 1770 * done and the code removed from the high bits. 1771 */ 1772 if (!s->pauth_active) { 1773 return dst; 1774 } 1775 1776 truedst = tcg_temp_new_i64(); 1777 if (use_key_a) { 1778 gen_helper_autia_combined(truedst, tcg_env, dst, modifier); 1779 } else { 1780 gen_helper_autib_combined(truedst, tcg_env, dst, modifier); 1781 } 1782 return truedst; 1783 } 1784 1785 static bool trans_BRAZ(DisasContext *s, arg_braz *a) 1786 { 1787 TCGv_i64 dst; 1788 1789 if (!dc_isar_feature(aa64_pauth, s)) { 1790 return false; 1791 } 1792 1793 dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m); 1794 set_btype_for_br(s, a->rn); 1795 gen_a64_set_pc(s, dst); 1796 s->base.is_jmp = DISAS_JUMP; 1797 return true; 1798 } 1799 1800 static bool trans_BLRAZ(DisasContext *s, arg_braz *a) 1801 { 1802 TCGv_i64 dst, lr; 1803 1804 if (!dc_isar_feature(aa64_pauth, s)) { 1805 return false; 1806 } 1807 1808 dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m); 1809 lr = cpu_reg(s, 30); 1810 if (dst == lr) { 1811 TCGv_i64 tmp = tcg_temp_new_i64(); 1812 tcg_gen_mov_i64(tmp, dst); 1813 dst = tmp; 1814 } 1815 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 1816 gen_a64_set_pc(s, dst); 1817 set_btype_for_blr(s); 1818 s->base.is_jmp = DISAS_JUMP; 1819 return true; 1820 } 1821 1822 static bool trans_RETA(DisasContext *s, arg_reta *a) 1823 { 1824 TCGv_i64 dst; 1825 1826 if (!dc_isar_feature(aa64_pauth, s)) { 1827 return false; 1828 } 1829 1830 dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m); 1831 gen_a64_set_pc(s, dst); 1832 s->base.is_jmp = DISAS_JUMP; 1833 return true; 1834 } 1835 1836 static bool trans_BRA(DisasContext *s, arg_bra *a) 1837 { 1838 TCGv_i64 dst; 1839 1840 if (!dc_isar_feature(aa64_pauth, s)) { 1841 return false; 1842 } 1843 dst = auth_branch_target(s, cpu_reg(s,a->rn), cpu_reg_sp(s, a->rm), !a->m); 1844 gen_a64_set_pc(s, dst); 1845 set_btype_for_br(s, a->rn); 1846 s->base.is_jmp = DISAS_JUMP; 1847 return true; 1848 } 1849 1850 static bool trans_BLRA(DisasContext *s, arg_bra *a) 1851 { 1852 TCGv_i64 dst, lr; 1853 1854 if (!dc_isar_feature(aa64_pauth, s)) { 1855 return false; 1856 } 1857 dst = auth_branch_target(s, cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm), !a->m); 1858 lr = cpu_reg(s, 30); 1859 if (dst == lr) { 1860 TCGv_i64 tmp = tcg_temp_new_i64(); 1861 tcg_gen_mov_i64(tmp, dst); 1862 dst = tmp; 1863 } 1864 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 1865 gen_a64_set_pc(s, dst); 1866 set_btype_for_blr(s); 1867 s->base.is_jmp = DISAS_JUMP; 1868 return true; 1869 } 1870 1871 static bool trans_ERET(DisasContext *s, arg_ERET *a) 1872 { 1873 TCGv_i64 dst; 1874 1875 if (s->current_el == 0) { 1876 return false; 1877 } 1878 if (s->trap_eret) { 1879 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(0), 2); 1880 return true; 1881 } 1882 dst = tcg_temp_new_i64(); 1883 tcg_gen_ld_i64(dst, tcg_env, 1884 offsetof(CPUARMState, elr_el[s->current_el])); 1885 1886 translator_io_start(&s->base); 1887 1888 gen_helper_exception_return(tcg_env, dst); 1889 /* Must exit loop to check un-masked IRQs */ 1890 s->base.is_jmp = DISAS_EXIT; 1891 return true; 1892 } 1893 1894 static bool trans_ERETA(DisasContext *s, arg_reta *a) 1895 { 1896 TCGv_i64 dst; 1897 1898 if (!dc_isar_feature(aa64_pauth, s)) { 1899 return false; 1900 } 1901 if (s->current_el == 0) { 1902 return false; 1903 } 1904 /* The FGT trap takes precedence over an auth trap. */ 1905 if (s->trap_eret) { 1906 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(a->m ? 3 : 2), 2); 1907 return true; 1908 } 1909 dst = tcg_temp_new_i64(); 1910 tcg_gen_ld_i64(dst, tcg_env, 1911 offsetof(CPUARMState, elr_el[s->current_el])); 1912 1913 dst = auth_branch_target(s, dst, cpu_X[31], !a->m); 1914 1915 translator_io_start(&s->base); 1916 1917 gen_helper_exception_return(tcg_env, dst); 1918 /* Must exit loop to check un-masked IRQs */ 1919 s->base.is_jmp = DISAS_EXIT; 1920 return true; 1921 } 1922 1923 static bool trans_NOP(DisasContext *s, arg_NOP *a) 1924 { 1925 return true; 1926 } 1927 1928 static bool trans_YIELD(DisasContext *s, arg_YIELD *a) 1929 { 1930 /* 1931 * When running in MTTCG we don't generate jumps to the yield and 1932 * WFE helpers as it won't affect the scheduling of other vCPUs. 1933 * If we wanted to more completely model WFE/SEV so we don't busy 1934 * spin unnecessarily we would need to do something more involved. 1935 */ 1936 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1937 s->base.is_jmp = DISAS_YIELD; 1938 } 1939 return true; 1940 } 1941 1942 static bool trans_WFI(DisasContext *s, arg_WFI *a) 1943 { 1944 s->base.is_jmp = DISAS_WFI; 1945 return true; 1946 } 1947 1948 static bool trans_WFE(DisasContext *s, arg_WFI *a) 1949 { 1950 /* 1951 * When running in MTTCG we don't generate jumps to the yield and 1952 * WFE helpers as it won't affect the scheduling of other vCPUs. 1953 * If we wanted to more completely model WFE/SEV so we don't busy 1954 * spin unnecessarily we would need to do something more involved. 1955 */ 1956 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1957 s->base.is_jmp = DISAS_WFE; 1958 } 1959 return true; 1960 } 1961 1962 static bool trans_WFIT(DisasContext *s, arg_WFIT *a) 1963 { 1964 if (!dc_isar_feature(aa64_wfxt, s)) { 1965 return false; 1966 } 1967 1968 /* 1969 * Because we need to pass the register value to the helper, 1970 * it's easier to emit the code now, unlike trans_WFI which 1971 * defers it to aarch64_tr_tb_stop(). That means we need to 1972 * check ss_active so that single-stepping a WFIT doesn't halt. 1973 */ 1974 if (s->ss_active) { 1975 /* Act like a NOP under architectural singlestep */ 1976 return true; 1977 } 1978 1979 gen_a64_update_pc(s, 4); 1980 gen_helper_wfit(tcg_env, cpu_reg(s, a->rd)); 1981 /* Go back to the main loop to check for interrupts */ 1982 s->base.is_jmp = DISAS_EXIT; 1983 return true; 1984 } 1985 1986 static bool trans_WFET(DisasContext *s, arg_WFET *a) 1987 { 1988 if (!dc_isar_feature(aa64_wfxt, s)) { 1989 return false; 1990 } 1991 1992 /* 1993 * We rely here on our WFE implementation being a NOP, so we 1994 * don't need to do anything different to handle the WFET timeout 1995 * from what trans_WFE does. 1996 */ 1997 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1998 s->base.is_jmp = DISAS_WFE; 1999 } 2000 return true; 2001 } 2002 2003 static bool trans_XPACLRI(DisasContext *s, arg_XPACLRI *a) 2004 { 2005 if (s->pauth_active) { 2006 gen_helper_xpaci(cpu_X[30], tcg_env, cpu_X[30]); 2007 } 2008 return true; 2009 } 2010 2011 static bool trans_PACIA1716(DisasContext *s, arg_PACIA1716 *a) 2012 { 2013 if (s->pauth_active) { 2014 gen_helper_pacia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 2015 } 2016 return true; 2017 } 2018 2019 static bool trans_PACIB1716(DisasContext *s, arg_PACIB1716 *a) 2020 { 2021 if (s->pauth_active) { 2022 gen_helper_pacib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 2023 } 2024 return true; 2025 } 2026 2027 static bool trans_AUTIA1716(DisasContext *s, arg_AUTIA1716 *a) 2028 { 2029 if (s->pauth_active) { 2030 gen_helper_autia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 2031 } 2032 return true; 2033 } 2034 2035 static bool trans_AUTIB1716(DisasContext *s, arg_AUTIB1716 *a) 2036 { 2037 if (s->pauth_active) { 2038 gen_helper_autib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 2039 } 2040 return true; 2041 } 2042 2043 static bool trans_ESB(DisasContext *s, arg_ESB *a) 2044 { 2045 /* Without RAS, we must implement this as NOP. */ 2046 if (dc_isar_feature(aa64_ras, s)) { 2047 /* 2048 * QEMU does not have a source of physical SErrors, 2049 * so we are only concerned with virtual SErrors. 2050 * The pseudocode in the ARM for this case is 2051 * if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then 2052 * AArch64.vESBOperation(); 2053 * Most of the condition can be evaluated at translation time. 2054 * Test for EL2 present, and defer test for SEL2 to runtime. 2055 */ 2056 if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) { 2057 gen_helper_vesb(tcg_env); 2058 } 2059 } 2060 return true; 2061 } 2062 2063 static bool trans_PACIAZ(DisasContext *s, arg_PACIAZ *a) 2064 { 2065 if (s->pauth_active) { 2066 gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 2067 } 2068 return true; 2069 } 2070 2071 static bool trans_PACIASP(DisasContext *s, arg_PACIASP *a) 2072 { 2073 if (s->pauth_active) { 2074 gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 2075 } 2076 return true; 2077 } 2078 2079 static bool trans_PACIBZ(DisasContext *s, arg_PACIBZ *a) 2080 { 2081 if (s->pauth_active) { 2082 gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 2083 } 2084 return true; 2085 } 2086 2087 static bool trans_PACIBSP(DisasContext *s, arg_PACIBSP *a) 2088 { 2089 if (s->pauth_active) { 2090 gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 2091 } 2092 return true; 2093 } 2094 2095 static bool trans_AUTIAZ(DisasContext *s, arg_AUTIAZ *a) 2096 { 2097 if (s->pauth_active) { 2098 gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 2099 } 2100 return true; 2101 } 2102 2103 static bool trans_AUTIASP(DisasContext *s, arg_AUTIASP *a) 2104 { 2105 if (s->pauth_active) { 2106 gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 2107 } 2108 return true; 2109 } 2110 2111 static bool trans_AUTIBZ(DisasContext *s, arg_AUTIBZ *a) 2112 { 2113 if (s->pauth_active) { 2114 gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 2115 } 2116 return true; 2117 } 2118 2119 static bool trans_AUTIBSP(DisasContext *s, arg_AUTIBSP *a) 2120 { 2121 if (s->pauth_active) { 2122 gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 2123 } 2124 return true; 2125 } 2126 2127 static bool trans_CLREX(DisasContext *s, arg_CLREX *a) 2128 { 2129 tcg_gen_movi_i64(cpu_exclusive_addr, -1); 2130 return true; 2131 } 2132 2133 static bool trans_DSB_DMB(DisasContext *s, arg_DSB_DMB *a) 2134 { 2135 /* We handle DSB and DMB the same way */ 2136 TCGBar bar; 2137 2138 switch (a->types) { 2139 case 1: /* MBReqTypes_Reads */ 2140 bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST; 2141 break; 2142 case 2: /* MBReqTypes_Writes */ 2143 bar = TCG_BAR_SC | TCG_MO_ST_ST; 2144 break; 2145 default: /* MBReqTypes_All */ 2146 bar = TCG_BAR_SC | TCG_MO_ALL; 2147 break; 2148 } 2149 tcg_gen_mb(bar); 2150 return true; 2151 } 2152 2153 static bool trans_DSB_nXS(DisasContext *s, arg_DSB_nXS *a) 2154 { 2155 if (!dc_isar_feature(aa64_xs, s)) { 2156 return false; 2157 } 2158 tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL); 2159 return true; 2160 } 2161 2162 static bool trans_ISB(DisasContext *s, arg_ISB *a) 2163 { 2164 /* 2165 * We need to break the TB after this insn to execute 2166 * self-modifying code correctly and also to take 2167 * any pending interrupts immediately. 2168 */ 2169 reset_btype(s); 2170 gen_goto_tb(s, 0, 4); 2171 return true; 2172 } 2173 2174 static bool trans_SB(DisasContext *s, arg_SB *a) 2175 { 2176 if (!dc_isar_feature(aa64_sb, s)) { 2177 return false; 2178 } 2179 /* 2180 * TODO: There is no speculation barrier opcode for TCG; 2181 * MB and end the TB instead. 2182 */ 2183 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC); 2184 gen_goto_tb(s, 0, 4); 2185 return true; 2186 } 2187 2188 static bool trans_CFINV(DisasContext *s, arg_CFINV *a) 2189 { 2190 if (!dc_isar_feature(aa64_condm_4, s)) { 2191 return false; 2192 } 2193 tcg_gen_xori_i32(cpu_CF, cpu_CF, 1); 2194 return true; 2195 } 2196 2197 static bool trans_XAFLAG(DisasContext *s, arg_XAFLAG *a) 2198 { 2199 TCGv_i32 z; 2200 2201 if (!dc_isar_feature(aa64_condm_5, s)) { 2202 return false; 2203 } 2204 2205 z = tcg_temp_new_i32(); 2206 2207 tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0); 2208 2209 /* 2210 * (!C & !Z) << 31 2211 * (!(C | Z)) << 31 2212 * ~((C | Z) << 31) 2213 * ~-(C | Z) 2214 * (C | Z) - 1 2215 */ 2216 tcg_gen_or_i32(cpu_NF, cpu_CF, z); 2217 tcg_gen_subi_i32(cpu_NF, cpu_NF, 1); 2218 2219 /* !(Z & C) */ 2220 tcg_gen_and_i32(cpu_ZF, z, cpu_CF); 2221 tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1); 2222 2223 /* (!C & Z) << 31 -> -(Z & ~C) */ 2224 tcg_gen_andc_i32(cpu_VF, z, cpu_CF); 2225 tcg_gen_neg_i32(cpu_VF, cpu_VF); 2226 2227 /* C | Z */ 2228 tcg_gen_or_i32(cpu_CF, cpu_CF, z); 2229 2230 return true; 2231 } 2232 2233 static bool trans_AXFLAG(DisasContext *s, arg_AXFLAG *a) 2234 { 2235 if (!dc_isar_feature(aa64_condm_5, s)) { 2236 return false; 2237 } 2238 2239 tcg_gen_sari_i32(cpu_VF, cpu_VF, 31); /* V ? -1 : 0 */ 2240 tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF); /* C & !V */ 2241 2242 /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */ 2243 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF); 2244 2245 tcg_gen_movi_i32(cpu_NF, 0); 2246 tcg_gen_movi_i32(cpu_VF, 0); 2247 2248 return true; 2249 } 2250 2251 static bool trans_MSR_i_UAO(DisasContext *s, arg_i *a) 2252 { 2253 if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) { 2254 return false; 2255 } 2256 if (a->imm & 1) { 2257 set_pstate_bits(PSTATE_UAO); 2258 } else { 2259 clear_pstate_bits(PSTATE_UAO); 2260 } 2261 gen_rebuild_hflags(s); 2262 s->base.is_jmp = DISAS_TOO_MANY; 2263 return true; 2264 } 2265 2266 static bool trans_MSR_i_PAN(DisasContext *s, arg_i *a) 2267 { 2268 if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) { 2269 return false; 2270 } 2271 if (a->imm & 1) { 2272 set_pstate_bits(PSTATE_PAN); 2273 } else { 2274 clear_pstate_bits(PSTATE_PAN); 2275 } 2276 gen_rebuild_hflags(s); 2277 s->base.is_jmp = DISAS_TOO_MANY; 2278 return true; 2279 } 2280 2281 static bool trans_MSR_i_SPSEL(DisasContext *s, arg_i *a) 2282 { 2283 if (s->current_el == 0) { 2284 return false; 2285 } 2286 gen_helper_msr_i_spsel(tcg_env, tcg_constant_i32(a->imm & PSTATE_SP)); 2287 s->base.is_jmp = DISAS_TOO_MANY; 2288 return true; 2289 } 2290 2291 static bool trans_MSR_i_SBSS(DisasContext *s, arg_i *a) 2292 { 2293 if (!dc_isar_feature(aa64_ssbs, s)) { 2294 return false; 2295 } 2296 if (a->imm & 1) { 2297 set_pstate_bits(PSTATE_SSBS); 2298 } else { 2299 clear_pstate_bits(PSTATE_SSBS); 2300 } 2301 /* Don't need to rebuild hflags since SSBS is a nop */ 2302 s->base.is_jmp = DISAS_TOO_MANY; 2303 return true; 2304 } 2305 2306 static bool trans_MSR_i_DIT(DisasContext *s, arg_i *a) 2307 { 2308 if (!dc_isar_feature(aa64_dit, s)) { 2309 return false; 2310 } 2311 if (a->imm & 1) { 2312 set_pstate_bits(PSTATE_DIT); 2313 } else { 2314 clear_pstate_bits(PSTATE_DIT); 2315 } 2316 /* There's no need to rebuild hflags because DIT is a nop */ 2317 s->base.is_jmp = DISAS_TOO_MANY; 2318 return true; 2319 } 2320 2321 static bool trans_MSR_i_TCO(DisasContext *s, arg_i *a) 2322 { 2323 if (dc_isar_feature(aa64_mte, s)) { 2324 /* Full MTE is enabled -- set the TCO bit as directed. */ 2325 if (a->imm & 1) { 2326 set_pstate_bits(PSTATE_TCO); 2327 } else { 2328 clear_pstate_bits(PSTATE_TCO); 2329 } 2330 gen_rebuild_hflags(s); 2331 /* Many factors, including TCO, go into MTE_ACTIVE. */ 2332 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 2333 return true; 2334 } else if (dc_isar_feature(aa64_mte_insn_reg, s)) { 2335 /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI. */ 2336 return true; 2337 } else { 2338 /* Insn not present */ 2339 return false; 2340 } 2341 } 2342 2343 static bool trans_MSR_i_DAIFSET(DisasContext *s, arg_i *a) 2344 { 2345 gen_helper_msr_i_daifset(tcg_env, tcg_constant_i32(a->imm)); 2346 s->base.is_jmp = DISAS_TOO_MANY; 2347 return true; 2348 } 2349 2350 static bool trans_MSR_i_DAIFCLEAR(DisasContext *s, arg_i *a) 2351 { 2352 gen_helper_msr_i_daifclear(tcg_env, tcg_constant_i32(a->imm)); 2353 /* Exit the cpu loop to re-evaluate pending IRQs. */ 2354 s->base.is_jmp = DISAS_UPDATE_EXIT; 2355 return true; 2356 } 2357 2358 static bool trans_MSR_i_ALLINT(DisasContext *s, arg_i *a) 2359 { 2360 if (!dc_isar_feature(aa64_nmi, s) || s->current_el == 0) { 2361 return false; 2362 } 2363 2364 if (a->imm == 0) { 2365 clear_pstate_bits(PSTATE_ALLINT); 2366 } else if (s->current_el > 1) { 2367 set_pstate_bits(PSTATE_ALLINT); 2368 } else { 2369 gen_helper_msr_set_allint_el1(tcg_env); 2370 } 2371 2372 /* Exit the cpu loop to re-evaluate pending IRQs. */ 2373 s->base.is_jmp = DISAS_UPDATE_EXIT; 2374 return true; 2375 } 2376 2377 static bool trans_MSR_i_SVCR(DisasContext *s, arg_MSR_i_SVCR *a) 2378 { 2379 if (!dc_isar_feature(aa64_sme, s) || a->mask == 0) { 2380 return false; 2381 } 2382 if (sme_access_check(s)) { 2383 int old = s->pstate_sm | (s->pstate_za << 1); 2384 int new = a->imm * 3; 2385 2386 if ((old ^ new) & a->mask) { 2387 /* At least one bit changes. */ 2388 gen_helper_set_svcr(tcg_env, tcg_constant_i32(new), 2389 tcg_constant_i32(a->mask)); 2390 s->base.is_jmp = DISAS_TOO_MANY; 2391 } 2392 } 2393 return true; 2394 } 2395 2396 static void gen_get_nzcv(TCGv_i64 tcg_rt) 2397 { 2398 TCGv_i32 tmp = tcg_temp_new_i32(); 2399 TCGv_i32 nzcv = tcg_temp_new_i32(); 2400 2401 /* build bit 31, N */ 2402 tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31)); 2403 /* build bit 30, Z */ 2404 tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0); 2405 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1); 2406 /* build bit 29, C */ 2407 tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1); 2408 /* build bit 28, V */ 2409 tcg_gen_shri_i32(tmp, cpu_VF, 31); 2410 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1); 2411 /* generate result */ 2412 tcg_gen_extu_i32_i64(tcg_rt, nzcv); 2413 } 2414 2415 static void gen_set_nzcv(TCGv_i64 tcg_rt) 2416 { 2417 TCGv_i32 nzcv = tcg_temp_new_i32(); 2418 2419 /* take NZCV from R[t] */ 2420 tcg_gen_extrl_i64_i32(nzcv, tcg_rt); 2421 2422 /* bit 31, N */ 2423 tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31)); 2424 /* bit 30, Z */ 2425 tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30)); 2426 tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0); 2427 /* bit 29, C */ 2428 tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29)); 2429 tcg_gen_shri_i32(cpu_CF, cpu_CF, 29); 2430 /* bit 28, V */ 2431 tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28)); 2432 tcg_gen_shli_i32(cpu_VF, cpu_VF, 3); 2433 } 2434 2435 static void gen_sysreg_undef(DisasContext *s, bool isread, 2436 uint8_t op0, uint8_t op1, uint8_t op2, 2437 uint8_t crn, uint8_t crm, uint8_t rt) 2438 { 2439 /* 2440 * Generate code to emit an UNDEF with correct syndrome 2441 * information for a failed system register access. 2442 * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases, 2443 * but if FEAT_IDST is implemented then read accesses to registers 2444 * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP 2445 * syndrome. 2446 */ 2447 uint32_t syndrome; 2448 2449 if (isread && dc_isar_feature(aa64_ids, s) && 2450 arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) { 2451 syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); 2452 } else { 2453 syndrome = syn_uncategorized(); 2454 } 2455 gen_exception_insn(s, 0, EXCP_UDEF, syndrome); 2456 } 2457 2458 /* MRS - move from system register 2459 * MSR (register) - move to system register 2460 * SYS 2461 * SYSL 2462 * These are all essentially the same insn in 'read' and 'write' 2463 * versions, with varying op0 fields. 2464 */ 2465 static void handle_sys(DisasContext *s, bool isread, 2466 unsigned int op0, unsigned int op1, unsigned int op2, 2467 unsigned int crn, unsigned int crm, unsigned int rt) 2468 { 2469 uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, 2470 crn, crm, op0, op1, op2); 2471 const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key); 2472 bool need_exit_tb = false; 2473 bool nv_trap_to_el2 = false; 2474 bool nv_redirect_reg = false; 2475 bool skip_fp_access_checks = false; 2476 bool nv2_mem_redirect = false; 2477 TCGv_ptr tcg_ri = NULL; 2478 TCGv_i64 tcg_rt; 2479 uint32_t syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); 2480 2481 if (crn == 11 || crn == 15) { 2482 /* 2483 * Check for TIDCP trap, which must take precedence over 2484 * the UNDEF for "no such register" etc. 2485 */ 2486 switch (s->current_el) { 2487 case 0: 2488 if (dc_isar_feature(aa64_tidcp1, s)) { 2489 gen_helper_tidcp_el0(tcg_env, tcg_constant_i32(syndrome)); 2490 } 2491 break; 2492 case 1: 2493 gen_helper_tidcp_el1(tcg_env, tcg_constant_i32(syndrome)); 2494 break; 2495 } 2496 } 2497 2498 if (!ri) { 2499 /* Unknown register; this might be a guest error or a QEMU 2500 * unimplemented feature. 2501 */ 2502 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 " 2503 "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n", 2504 isread ? "read" : "write", op0, op1, crn, crm, op2); 2505 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt); 2506 return; 2507 } 2508 2509 if (s->nv2 && ri->nv2_redirect_offset) { 2510 /* 2511 * Some registers always redirect to memory; some only do so if 2512 * HCR_EL2.NV1 is 0, and some only if NV1 is 1 (these come in 2513 * pairs which share an offset; see the table in R_CSRPQ). 2514 */ 2515 if (ri->nv2_redirect_offset & NV2_REDIR_NV1) { 2516 nv2_mem_redirect = s->nv1; 2517 } else if (ri->nv2_redirect_offset & NV2_REDIR_NO_NV1) { 2518 nv2_mem_redirect = !s->nv1; 2519 } else { 2520 nv2_mem_redirect = true; 2521 } 2522 } 2523 2524 /* Check access permissions */ 2525 if (!cp_access_ok(s->current_el, ri, isread)) { 2526 /* 2527 * FEAT_NV/NV2 handling does not do the usual FP access checks 2528 * for registers only accessible at EL2 (though it *does* do them 2529 * for registers accessible at EL1). 2530 */ 2531 skip_fp_access_checks = true; 2532 if (s->nv2 && (ri->type & ARM_CP_NV2_REDIRECT)) { 2533 /* 2534 * This is one of the few EL2 registers which should redirect 2535 * to the equivalent EL1 register. We do that after running 2536 * the EL2 register's accessfn. 2537 */ 2538 nv_redirect_reg = true; 2539 assert(!nv2_mem_redirect); 2540 } else if (nv2_mem_redirect) { 2541 /* 2542 * NV2 redirect-to-memory takes precedence over trap to EL2 or 2543 * UNDEF to EL1. 2544 */ 2545 } else if (s->nv && arm_cpreg_traps_in_nv(ri)) { 2546 /* 2547 * This register / instruction exists and is an EL2 register, so 2548 * we must trap to EL2 if accessed in nested virtualization EL1 2549 * instead of UNDEFing. We'll do that after the usual access checks. 2550 * (This makes a difference only for a couple of registers like 2551 * VSTTBR_EL2 where the "UNDEF if NonSecure" should take priority 2552 * over the trap-to-EL2. Most trapped-by-FEAT_NV registers have 2553 * an accessfn which does nothing when called from EL1, because 2554 * the trap-to-EL3 controls which would apply to that register 2555 * at EL2 don't take priority over the FEAT_NV trap-to-EL2.) 2556 */ 2557 nv_trap_to_el2 = true; 2558 } else { 2559 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt); 2560 return; 2561 } 2562 } 2563 2564 if (ri->accessfn || (ri->fgt && s->fgt_active)) { 2565 /* Emit code to perform further access permissions checks at 2566 * runtime; this may result in an exception. 2567 */ 2568 gen_a64_update_pc(s, 0); 2569 tcg_ri = tcg_temp_new_ptr(); 2570 gen_helper_access_check_cp_reg(tcg_ri, tcg_env, 2571 tcg_constant_i32(key), 2572 tcg_constant_i32(syndrome), 2573 tcg_constant_i32(isread)); 2574 } else if (ri->type & ARM_CP_RAISES_EXC) { 2575 /* 2576 * The readfn or writefn might raise an exception; 2577 * synchronize the CPU state in case it does. 2578 */ 2579 gen_a64_update_pc(s, 0); 2580 } 2581 2582 if (!skip_fp_access_checks) { 2583 if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) { 2584 return; 2585 } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) { 2586 return; 2587 } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) { 2588 return; 2589 } 2590 } 2591 2592 if (nv_trap_to_el2) { 2593 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2); 2594 return; 2595 } 2596 2597 if (nv_redirect_reg) { 2598 /* 2599 * FEAT_NV2 redirection of an EL2 register to an EL1 register. 2600 * Conveniently in all cases the encoding of the EL1 register is 2601 * identical to the EL2 register except that opc1 is 0. 2602 * Get the reginfo for the EL1 register to use for the actual access. 2603 * We don't use the EL1 register's access function, and 2604 * fine-grained-traps on EL1 also do not apply here. 2605 */ 2606 key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, 2607 crn, crm, op0, 0, op2); 2608 ri = get_arm_cp_reginfo(s->cp_regs, key); 2609 assert(ri); 2610 assert(cp_access_ok(s->current_el, ri, isread)); 2611 /* 2612 * We might not have done an update_pc earlier, so check we don't 2613 * need it. We could support this in future if necessary. 2614 */ 2615 assert(!(ri->type & ARM_CP_RAISES_EXC)); 2616 } 2617 2618 if (nv2_mem_redirect) { 2619 /* 2620 * This system register is being redirected into an EL2 memory access. 2621 * This means it is not an IO operation, doesn't change hflags, 2622 * and need not end the TB, because it has no side effects. 2623 * 2624 * The access is 64-bit single copy atomic, guaranteed aligned because 2625 * of the definition of VCNR_EL2. Its endianness depends on 2626 * SCTLR_EL2.EE, not on the data endianness of EL1. 2627 * It is done under either the EL2 translation regime or the EL2&0 2628 * translation regime, depending on HCR_EL2.E2H. It behaves as if 2629 * PSTATE.PAN is 0. 2630 */ 2631 TCGv_i64 ptr = tcg_temp_new_i64(); 2632 MemOp mop = MO_64 | MO_ALIGN | MO_ATOM_IFALIGN; 2633 ARMMMUIdx armmemidx = s->nv2_mem_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_E2; 2634 int memidx = arm_to_core_mmu_idx(armmemidx); 2635 uint32_t syn; 2636 2637 mop |= (s->nv2_mem_be ? MO_BE : MO_LE); 2638 2639 tcg_gen_ld_i64(ptr, tcg_env, offsetof(CPUARMState, cp15.vncr_el2)); 2640 tcg_gen_addi_i64(ptr, ptr, 2641 (ri->nv2_redirect_offset & ~NV2_REDIR_FLAG_MASK)); 2642 tcg_rt = cpu_reg(s, rt); 2643 2644 syn = syn_data_abort_vncr(0, !isread, 0); 2645 disas_set_insn_syndrome(s, syn); 2646 if (isread) { 2647 tcg_gen_qemu_ld_i64(tcg_rt, ptr, memidx, mop); 2648 } else { 2649 tcg_gen_qemu_st_i64(tcg_rt, ptr, memidx, mop); 2650 } 2651 return; 2652 } 2653 2654 /* Handle special cases first */ 2655 switch (ri->type & ARM_CP_SPECIAL_MASK) { 2656 case 0: 2657 break; 2658 case ARM_CP_NOP: 2659 return; 2660 case ARM_CP_NZCV: 2661 tcg_rt = cpu_reg(s, rt); 2662 if (isread) { 2663 gen_get_nzcv(tcg_rt); 2664 } else { 2665 gen_set_nzcv(tcg_rt); 2666 } 2667 return; 2668 case ARM_CP_CURRENTEL: 2669 { 2670 /* 2671 * Reads as current EL value from pstate, which is 2672 * guaranteed to be constant by the tb flags. 2673 * For nested virt we should report EL2. 2674 */ 2675 int el = s->nv ? 2 : s->current_el; 2676 tcg_rt = cpu_reg(s, rt); 2677 tcg_gen_movi_i64(tcg_rt, el << 2); 2678 return; 2679 } 2680 case ARM_CP_DC_ZVA: 2681 /* Writes clear the aligned block of memory which rt points into. */ 2682 if (s->mte_active[0]) { 2683 int desc = 0; 2684 2685 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 2686 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 2687 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 2688 2689 tcg_rt = tcg_temp_new_i64(); 2690 gen_helper_mte_check_zva(tcg_rt, tcg_env, 2691 tcg_constant_i32(desc), cpu_reg(s, rt)); 2692 } else { 2693 tcg_rt = clean_data_tbi(s, cpu_reg(s, rt)); 2694 } 2695 gen_helper_dc_zva(tcg_env, tcg_rt); 2696 return; 2697 case ARM_CP_DC_GVA: 2698 { 2699 TCGv_i64 clean_addr, tag; 2700 2701 /* 2702 * DC_GVA, like DC_ZVA, requires that we supply the original 2703 * pointer for an invalid page. Probe that address first. 2704 */ 2705 tcg_rt = cpu_reg(s, rt); 2706 clean_addr = clean_data_tbi(s, tcg_rt); 2707 gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8); 2708 2709 if (s->ata[0]) { 2710 /* Extract the tag from the register to match STZGM. */ 2711 tag = tcg_temp_new_i64(); 2712 tcg_gen_shri_i64(tag, tcg_rt, 56); 2713 gen_helper_stzgm_tags(tcg_env, clean_addr, tag); 2714 } 2715 } 2716 return; 2717 case ARM_CP_DC_GZVA: 2718 { 2719 TCGv_i64 clean_addr, tag; 2720 2721 /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */ 2722 tcg_rt = cpu_reg(s, rt); 2723 clean_addr = clean_data_tbi(s, tcg_rt); 2724 gen_helper_dc_zva(tcg_env, clean_addr); 2725 2726 if (s->ata[0]) { 2727 /* Extract the tag from the register to match STZGM. */ 2728 tag = tcg_temp_new_i64(); 2729 tcg_gen_shri_i64(tag, tcg_rt, 56); 2730 gen_helper_stzgm_tags(tcg_env, clean_addr, tag); 2731 } 2732 } 2733 return; 2734 default: 2735 g_assert_not_reached(); 2736 } 2737 2738 if (ri->type & ARM_CP_IO) { 2739 /* I/O operations must end the TB here (whether read or write) */ 2740 need_exit_tb = translator_io_start(&s->base); 2741 } 2742 2743 tcg_rt = cpu_reg(s, rt); 2744 2745 if (isread) { 2746 if (ri->type & ARM_CP_CONST) { 2747 tcg_gen_movi_i64(tcg_rt, ri->resetvalue); 2748 } else if (ri->readfn) { 2749 if (!tcg_ri) { 2750 tcg_ri = gen_lookup_cp_reg(key); 2751 } 2752 gen_helper_get_cp_reg64(tcg_rt, tcg_env, tcg_ri); 2753 } else { 2754 tcg_gen_ld_i64(tcg_rt, tcg_env, ri->fieldoffset); 2755 } 2756 } else { 2757 if (ri->type & ARM_CP_CONST) { 2758 /* If not forbidden by access permissions, treat as WI */ 2759 return; 2760 } else if (ri->writefn) { 2761 if (!tcg_ri) { 2762 tcg_ri = gen_lookup_cp_reg(key); 2763 } 2764 gen_helper_set_cp_reg64(tcg_env, tcg_ri, tcg_rt); 2765 } else { 2766 tcg_gen_st_i64(tcg_rt, tcg_env, ri->fieldoffset); 2767 } 2768 } 2769 2770 if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) { 2771 /* 2772 * A write to any coprocessor register that ends a TB 2773 * must rebuild the hflags for the next TB. 2774 */ 2775 gen_rebuild_hflags(s); 2776 /* 2777 * We default to ending the TB on a coprocessor register write, 2778 * but allow this to be suppressed by the register definition 2779 * (usually only necessary to work around guest bugs). 2780 */ 2781 need_exit_tb = true; 2782 } 2783 if (need_exit_tb) { 2784 s->base.is_jmp = DISAS_UPDATE_EXIT; 2785 } 2786 } 2787 2788 static bool trans_SYS(DisasContext *s, arg_SYS *a) 2789 { 2790 handle_sys(s, a->l, a->op0, a->op1, a->op2, a->crn, a->crm, a->rt); 2791 return true; 2792 } 2793 2794 static bool trans_SVC(DisasContext *s, arg_i *a) 2795 { 2796 /* 2797 * For SVC, HVC and SMC we advance the single-step state 2798 * machine before taking the exception. This is architecturally 2799 * mandated, to ensure that single-stepping a system call 2800 * instruction works properly. 2801 */ 2802 uint32_t syndrome = syn_aa64_svc(a->imm); 2803 if (s->fgt_svc) { 2804 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2); 2805 return true; 2806 } 2807 gen_ss_advance(s); 2808 gen_exception_insn(s, 4, EXCP_SWI, syndrome); 2809 return true; 2810 } 2811 2812 static bool trans_HVC(DisasContext *s, arg_i *a) 2813 { 2814 int target_el = s->current_el == 3 ? 3 : 2; 2815 2816 if (s->current_el == 0) { 2817 unallocated_encoding(s); 2818 return true; 2819 } 2820 /* 2821 * The pre HVC helper handles cases when HVC gets trapped 2822 * as an undefined insn by runtime configuration. 2823 */ 2824 gen_a64_update_pc(s, 0); 2825 gen_helper_pre_hvc(tcg_env); 2826 /* Architecture requires ss advance before we do the actual work */ 2827 gen_ss_advance(s); 2828 gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(a->imm), target_el); 2829 return true; 2830 } 2831 2832 static bool trans_SMC(DisasContext *s, arg_i *a) 2833 { 2834 if (s->current_el == 0) { 2835 unallocated_encoding(s); 2836 return true; 2837 } 2838 gen_a64_update_pc(s, 0); 2839 gen_helper_pre_smc(tcg_env, tcg_constant_i32(syn_aa64_smc(a->imm))); 2840 /* Architecture requires ss advance before we do the actual work */ 2841 gen_ss_advance(s); 2842 gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(a->imm), 3); 2843 return true; 2844 } 2845 2846 static bool trans_BRK(DisasContext *s, arg_i *a) 2847 { 2848 gen_exception_bkpt_insn(s, syn_aa64_bkpt(a->imm)); 2849 return true; 2850 } 2851 2852 static bool trans_HLT(DisasContext *s, arg_i *a) 2853 { 2854 /* 2855 * HLT. This has two purposes. 2856 * Architecturally, it is an external halting debug instruction. 2857 * Since QEMU doesn't implement external debug, we treat this as 2858 * it is required for halting debug disabled: it will UNDEF. 2859 * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction. 2860 */ 2861 if (semihosting_enabled(s->current_el == 0) && a->imm == 0xf000) { 2862 gen_exception_internal_insn(s, EXCP_SEMIHOST); 2863 } else { 2864 unallocated_encoding(s); 2865 } 2866 return true; 2867 } 2868 2869 /* 2870 * Load/Store exclusive instructions are implemented by remembering 2871 * the value/address loaded, and seeing if these are the same 2872 * when the store is performed. This is not actually the architecturally 2873 * mandated semantics, but it works for typical guest code sequences 2874 * and avoids having to monitor regular stores. 2875 * 2876 * The store exclusive uses the atomic cmpxchg primitives to avoid 2877 * races in multi-threaded linux-user and when MTTCG softmmu is 2878 * enabled. 2879 */ 2880 static void gen_load_exclusive(DisasContext *s, int rt, int rt2, int rn, 2881 int size, bool is_pair) 2882 { 2883 int idx = get_mem_index(s); 2884 TCGv_i64 dirty_addr, clean_addr; 2885 MemOp memop = check_atomic_align(s, rn, size + is_pair); 2886 2887 s->is_ldex = true; 2888 dirty_addr = cpu_reg_sp(s, rn); 2889 clean_addr = gen_mte_check1(s, dirty_addr, false, rn != 31, memop); 2890 2891 g_assert(size <= 3); 2892 if (is_pair) { 2893 g_assert(size >= 2); 2894 if (size == 2) { 2895 tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop); 2896 if (s->be_data == MO_LE) { 2897 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32); 2898 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32); 2899 } else { 2900 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32); 2901 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32); 2902 } 2903 } else { 2904 TCGv_i128 t16 = tcg_temp_new_i128(); 2905 2906 tcg_gen_qemu_ld_i128(t16, clean_addr, idx, memop); 2907 2908 if (s->be_data == MO_LE) { 2909 tcg_gen_extr_i128_i64(cpu_exclusive_val, 2910 cpu_exclusive_high, t16); 2911 } else { 2912 tcg_gen_extr_i128_i64(cpu_exclusive_high, 2913 cpu_exclusive_val, t16); 2914 } 2915 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); 2916 tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high); 2917 } 2918 } else { 2919 tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop); 2920 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); 2921 } 2922 tcg_gen_mov_i64(cpu_exclusive_addr, clean_addr); 2923 } 2924 2925 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, 2926 int rn, int size, int is_pair) 2927 { 2928 /* if (env->exclusive_addr == addr && env->exclusive_val == [addr] 2929 * && (!is_pair || env->exclusive_high == [addr + datasize])) { 2930 * [addr] = {Rt}; 2931 * if (is_pair) { 2932 * [addr + datasize] = {Rt2}; 2933 * } 2934 * {Rd} = 0; 2935 * } else { 2936 * {Rd} = 1; 2937 * } 2938 * env->exclusive_addr = -1; 2939 */ 2940 TCGLabel *fail_label = gen_new_label(); 2941 TCGLabel *done_label = gen_new_label(); 2942 TCGv_i64 tmp, clean_addr; 2943 MemOp memop; 2944 2945 /* 2946 * FIXME: We are out of spec here. We have recorded only the address 2947 * from load_exclusive, not the entire range, and we assume that the 2948 * size of the access on both sides match. The architecture allows the 2949 * store to be smaller than the load, so long as the stored bytes are 2950 * within the range recorded by the load. 2951 */ 2952 2953 /* See AArch64.ExclusiveMonitorsPass() and AArch64.IsExclusiveVA(). */ 2954 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); 2955 tcg_gen_brcond_i64(TCG_COND_NE, clean_addr, cpu_exclusive_addr, fail_label); 2956 2957 /* 2958 * The write, and any associated faults, only happen if the virtual 2959 * and physical addresses pass the exclusive monitor check. These 2960 * faults are exceedingly unlikely, because normally the guest uses 2961 * the exact same address register for the load_exclusive, and we 2962 * would have recognized these faults there. 2963 * 2964 * It is possible to trigger an alignment fault pre-LSE2, e.g. with an 2965 * unaligned 4-byte write within the range of an aligned 8-byte load. 2966 * With LSE2, the store would need to cross a 16-byte boundary when the 2967 * load did not, which would mean the store is outside the range 2968 * recorded for the monitor, which would have failed a corrected monitor 2969 * check above. For now, we assume no size change and retain the 2970 * MO_ALIGN to let tcg know what we checked in the load_exclusive. 2971 * 2972 * It is possible to trigger an MTE fault, by performing the load with 2973 * a virtual address with a valid tag and performing the store with the 2974 * same virtual address and a different invalid tag. 2975 */ 2976 memop = size + is_pair; 2977 if (memop == MO_128 || !dc_isar_feature(aa64_lse2, s)) { 2978 memop |= MO_ALIGN; 2979 } 2980 memop = finalize_memop(s, memop); 2981 gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); 2982 2983 tmp = tcg_temp_new_i64(); 2984 if (is_pair) { 2985 if (size == 2) { 2986 if (s->be_data == MO_LE) { 2987 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2)); 2988 } else { 2989 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt)); 2990 } 2991 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, 2992 cpu_exclusive_val, tmp, 2993 get_mem_index(s), memop); 2994 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); 2995 } else { 2996 TCGv_i128 t16 = tcg_temp_new_i128(); 2997 TCGv_i128 c16 = tcg_temp_new_i128(); 2998 TCGv_i64 a, b; 2999 3000 if (s->be_data == MO_LE) { 3001 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2)); 3002 tcg_gen_concat_i64_i128(c16, cpu_exclusive_val, 3003 cpu_exclusive_high); 3004 } else { 3005 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt)); 3006 tcg_gen_concat_i64_i128(c16, cpu_exclusive_high, 3007 cpu_exclusive_val); 3008 } 3009 3010 tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16, 3011 get_mem_index(s), memop); 3012 3013 a = tcg_temp_new_i64(); 3014 b = tcg_temp_new_i64(); 3015 if (s->be_data == MO_LE) { 3016 tcg_gen_extr_i128_i64(a, b, t16); 3017 } else { 3018 tcg_gen_extr_i128_i64(b, a, t16); 3019 } 3020 3021 tcg_gen_xor_i64(a, a, cpu_exclusive_val); 3022 tcg_gen_xor_i64(b, b, cpu_exclusive_high); 3023 tcg_gen_or_i64(tmp, a, b); 3024 3025 tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0); 3026 } 3027 } else { 3028 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val, 3029 cpu_reg(s, rt), get_mem_index(s), memop); 3030 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); 3031 } 3032 tcg_gen_mov_i64(cpu_reg(s, rd), tmp); 3033 tcg_gen_br(done_label); 3034 3035 gen_set_label(fail_label); 3036 tcg_gen_movi_i64(cpu_reg(s, rd), 1); 3037 gen_set_label(done_label); 3038 tcg_gen_movi_i64(cpu_exclusive_addr, -1); 3039 } 3040 3041 static void gen_compare_and_swap(DisasContext *s, int rs, int rt, 3042 int rn, int size) 3043 { 3044 TCGv_i64 tcg_rs = cpu_reg(s, rs); 3045 TCGv_i64 tcg_rt = cpu_reg(s, rt); 3046 int memidx = get_mem_index(s); 3047 TCGv_i64 clean_addr; 3048 MemOp memop; 3049 3050 if (rn == 31) { 3051 gen_check_sp_alignment(s); 3052 } 3053 memop = check_atomic_align(s, rn, size); 3054 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); 3055 tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, 3056 memidx, memop); 3057 } 3058 3059 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, 3060 int rn, int size) 3061 { 3062 TCGv_i64 s1 = cpu_reg(s, rs); 3063 TCGv_i64 s2 = cpu_reg(s, rs + 1); 3064 TCGv_i64 t1 = cpu_reg(s, rt); 3065 TCGv_i64 t2 = cpu_reg(s, rt + 1); 3066 TCGv_i64 clean_addr; 3067 int memidx = get_mem_index(s); 3068 MemOp memop; 3069 3070 if (rn == 31) { 3071 gen_check_sp_alignment(s); 3072 } 3073 3074 /* This is a single atomic access, despite the "pair". */ 3075 memop = check_atomic_align(s, rn, size + 1); 3076 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); 3077 3078 if (size == 2) { 3079 TCGv_i64 cmp = tcg_temp_new_i64(); 3080 TCGv_i64 val = tcg_temp_new_i64(); 3081 3082 if (s->be_data == MO_LE) { 3083 tcg_gen_concat32_i64(val, t1, t2); 3084 tcg_gen_concat32_i64(cmp, s1, s2); 3085 } else { 3086 tcg_gen_concat32_i64(val, t2, t1); 3087 tcg_gen_concat32_i64(cmp, s2, s1); 3088 } 3089 3090 tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx, memop); 3091 3092 if (s->be_data == MO_LE) { 3093 tcg_gen_extr32_i64(s1, s2, cmp); 3094 } else { 3095 tcg_gen_extr32_i64(s2, s1, cmp); 3096 } 3097 } else { 3098 TCGv_i128 cmp = tcg_temp_new_i128(); 3099 TCGv_i128 val = tcg_temp_new_i128(); 3100 3101 if (s->be_data == MO_LE) { 3102 tcg_gen_concat_i64_i128(val, t1, t2); 3103 tcg_gen_concat_i64_i128(cmp, s1, s2); 3104 } else { 3105 tcg_gen_concat_i64_i128(val, t2, t1); 3106 tcg_gen_concat_i64_i128(cmp, s2, s1); 3107 } 3108 3109 tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx, memop); 3110 3111 if (s->be_data == MO_LE) { 3112 tcg_gen_extr_i128_i64(s1, s2, cmp); 3113 } else { 3114 tcg_gen_extr_i128_i64(s2, s1, cmp); 3115 } 3116 } 3117 } 3118 3119 /* 3120 * Compute the ISS.SF bit for syndrome information if an exception 3121 * is taken on a load or store. This indicates whether the instruction 3122 * is accessing a 32-bit or 64-bit register. This logic is derived 3123 * from the ARMv8 specs for LDR (Shared decode for all encodings). 3124 */ 3125 static bool ldst_iss_sf(int size, bool sign, bool ext) 3126 { 3127 3128 if (sign) { 3129 /* 3130 * Signed loads are 64 bit results if we are not going to 3131 * do a zero-extend from 32 to 64 after the load. 3132 * (For a store, sign and ext are always false.) 3133 */ 3134 return !ext; 3135 } else { 3136 /* Unsigned loads/stores work at the specified size */ 3137 return size == MO_64; 3138 } 3139 } 3140 3141 static bool trans_STXR(DisasContext *s, arg_stxr *a) 3142 { 3143 if (a->rn == 31) { 3144 gen_check_sp_alignment(s); 3145 } 3146 if (a->lasr) { 3147 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 3148 } 3149 gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, false); 3150 return true; 3151 } 3152 3153 static bool trans_LDXR(DisasContext *s, arg_stxr *a) 3154 { 3155 if (a->rn == 31) { 3156 gen_check_sp_alignment(s); 3157 } 3158 gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, false); 3159 if (a->lasr) { 3160 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3161 } 3162 return true; 3163 } 3164 3165 static bool trans_STLR(DisasContext *s, arg_stlr *a) 3166 { 3167 TCGv_i64 clean_addr; 3168 MemOp memop; 3169 bool iss_sf = ldst_iss_sf(a->sz, false, false); 3170 3171 /* 3172 * StoreLORelease is the same as Store-Release for QEMU, but 3173 * needs the feature-test. 3174 */ 3175 if (!a->lasr && !dc_isar_feature(aa64_lor, s)) { 3176 return false; 3177 } 3178 /* Generate ISS for non-exclusive accesses including LASR. */ 3179 if (a->rn == 31) { 3180 gen_check_sp_alignment(s); 3181 } 3182 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 3183 memop = check_ordered_align(s, a->rn, 0, true, a->sz); 3184 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), 3185 true, a->rn != 31, memop); 3186 do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, memop, true, a->rt, 3187 iss_sf, a->lasr); 3188 return true; 3189 } 3190 3191 static bool trans_LDAR(DisasContext *s, arg_stlr *a) 3192 { 3193 TCGv_i64 clean_addr; 3194 MemOp memop; 3195 bool iss_sf = ldst_iss_sf(a->sz, false, false); 3196 3197 /* LoadLOAcquire is the same as Load-Acquire for QEMU. */ 3198 if (!a->lasr && !dc_isar_feature(aa64_lor, s)) { 3199 return false; 3200 } 3201 /* Generate ISS for non-exclusive accesses including LASR. */ 3202 if (a->rn == 31) { 3203 gen_check_sp_alignment(s); 3204 } 3205 memop = check_ordered_align(s, a->rn, 0, false, a->sz); 3206 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), 3207 false, a->rn != 31, memop); 3208 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, memop, false, true, 3209 a->rt, iss_sf, a->lasr); 3210 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3211 return true; 3212 } 3213 3214 static bool trans_STXP(DisasContext *s, arg_stxr *a) 3215 { 3216 if (a->rn == 31) { 3217 gen_check_sp_alignment(s); 3218 } 3219 if (a->lasr) { 3220 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 3221 } 3222 gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, true); 3223 return true; 3224 } 3225 3226 static bool trans_LDXP(DisasContext *s, arg_stxr *a) 3227 { 3228 if (a->rn == 31) { 3229 gen_check_sp_alignment(s); 3230 } 3231 gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, true); 3232 if (a->lasr) { 3233 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3234 } 3235 return true; 3236 } 3237 3238 static bool trans_CASP(DisasContext *s, arg_CASP *a) 3239 { 3240 if (!dc_isar_feature(aa64_atomics, s)) { 3241 return false; 3242 } 3243 if (((a->rt | a->rs) & 1) != 0) { 3244 return false; 3245 } 3246 3247 gen_compare_and_swap_pair(s, a->rs, a->rt, a->rn, a->sz); 3248 return true; 3249 } 3250 3251 static bool trans_CAS(DisasContext *s, arg_CAS *a) 3252 { 3253 if (!dc_isar_feature(aa64_atomics, s)) { 3254 return false; 3255 } 3256 gen_compare_and_swap(s, a->rs, a->rt, a->rn, a->sz); 3257 return true; 3258 } 3259 3260 static bool trans_LD_lit(DisasContext *s, arg_ldlit *a) 3261 { 3262 bool iss_sf = ldst_iss_sf(a->sz, a->sign, false); 3263 TCGv_i64 tcg_rt = cpu_reg(s, a->rt); 3264 TCGv_i64 clean_addr = tcg_temp_new_i64(); 3265 MemOp memop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3266 3267 gen_pc_plus_diff(s, clean_addr, a->imm); 3268 do_gpr_ld(s, tcg_rt, clean_addr, memop, 3269 false, true, a->rt, iss_sf, false); 3270 return true; 3271 } 3272 3273 static bool trans_LD_lit_v(DisasContext *s, arg_ldlit *a) 3274 { 3275 /* Load register (literal), vector version */ 3276 TCGv_i64 clean_addr; 3277 MemOp memop; 3278 3279 if (!fp_access_check(s)) { 3280 return true; 3281 } 3282 memop = finalize_memop_asimd(s, a->sz); 3283 clean_addr = tcg_temp_new_i64(); 3284 gen_pc_plus_diff(s, clean_addr, a->imm); 3285 do_fp_ld(s, a->rt, clean_addr, memop); 3286 return true; 3287 } 3288 3289 static void op_addr_ldstpair_pre(DisasContext *s, arg_ldstpair *a, 3290 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, 3291 uint64_t offset, bool is_store, MemOp mop) 3292 { 3293 if (a->rn == 31) { 3294 gen_check_sp_alignment(s); 3295 } 3296 3297 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3298 if (!a->p) { 3299 tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset); 3300 } 3301 3302 *clean_addr = gen_mte_checkN(s, *dirty_addr, is_store, 3303 (a->w || a->rn != 31), 2 << a->sz, mop); 3304 } 3305 3306 static void op_addr_ldstpair_post(DisasContext *s, arg_ldstpair *a, 3307 TCGv_i64 dirty_addr, uint64_t offset) 3308 { 3309 if (a->w) { 3310 if (a->p) { 3311 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3312 } 3313 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); 3314 } 3315 } 3316 3317 static bool trans_STP(DisasContext *s, arg_ldstpair *a) 3318 { 3319 uint64_t offset = a->imm << a->sz; 3320 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; 3321 MemOp mop = finalize_memop(s, a->sz); 3322 3323 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop); 3324 tcg_rt = cpu_reg(s, a->rt); 3325 tcg_rt2 = cpu_reg(s, a->rt2); 3326 /* 3327 * We built mop above for the single logical access -- rebuild it 3328 * now for the paired operation. 3329 * 3330 * With LSE2, non-sign-extending pairs are treated atomically if 3331 * aligned, and if unaligned one of the pair will be completely 3332 * within a 16-byte block and that element will be atomic. 3333 * Otherwise each element is separately atomic. 3334 * In all cases, issue one operation with the correct atomicity. 3335 */ 3336 mop = a->sz + 1; 3337 if (s->align_mem) { 3338 mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8); 3339 } 3340 mop = finalize_memop_pair(s, mop); 3341 if (a->sz == 2) { 3342 TCGv_i64 tmp = tcg_temp_new_i64(); 3343 3344 if (s->be_data == MO_LE) { 3345 tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2); 3346 } else { 3347 tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt); 3348 } 3349 tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop); 3350 } else { 3351 TCGv_i128 tmp = tcg_temp_new_i128(); 3352 3353 if (s->be_data == MO_LE) { 3354 tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2); 3355 } else { 3356 tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt); 3357 } 3358 tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop); 3359 } 3360 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3361 return true; 3362 } 3363 3364 static bool trans_LDP(DisasContext *s, arg_ldstpair *a) 3365 { 3366 uint64_t offset = a->imm << a->sz; 3367 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; 3368 MemOp mop = finalize_memop(s, a->sz); 3369 3370 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop); 3371 tcg_rt = cpu_reg(s, a->rt); 3372 tcg_rt2 = cpu_reg(s, a->rt2); 3373 3374 /* 3375 * We built mop above for the single logical access -- rebuild it 3376 * now for the paired operation. 3377 * 3378 * With LSE2, non-sign-extending pairs are treated atomically if 3379 * aligned, and if unaligned one of the pair will be completely 3380 * within a 16-byte block and that element will be atomic. 3381 * Otherwise each element is separately atomic. 3382 * In all cases, issue one operation with the correct atomicity. 3383 * 3384 * This treats sign-extending loads like zero-extending loads, 3385 * since that reuses the most code below. 3386 */ 3387 mop = a->sz + 1; 3388 if (s->align_mem) { 3389 mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8); 3390 } 3391 mop = finalize_memop_pair(s, mop); 3392 if (a->sz == 2) { 3393 int o2 = s->be_data == MO_LE ? 32 : 0; 3394 int o1 = o2 ^ 32; 3395 3396 tcg_gen_qemu_ld_i64(tcg_rt, clean_addr, get_mem_index(s), mop); 3397 if (a->sign) { 3398 tcg_gen_sextract_i64(tcg_rt2, tcg_rt, o2, 32); 3399 tcg_gen_sextract_i64(tcg_rt, tcg_rt, o1, 32); 3400 } else { 3401 tcg_gen_extract_i64(tcg_rt2, tcg_rt, o2, 32); 3402 tcg_gen_extract_i64(tcg_rt, tcg_rt, o1, 32); 3403 } 3404 } else { 3405 TCGv_i128 tmp = tcg_temp_new_i128(); 3406 3407 tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop); 3408 if (s->be_data == MO_LE) { 3409 tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp); 3410 } else { 3411 tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp); 3412 } 3413 } 3414 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3415 return true; 3416 } 3417 3418 static bool trans_STP_v(DisasContext *s, arg_ldstpair *a) 3419 { 3420 uint64_t offset = a->imm << a->sz; 3421 TCGv_i64 clean_addr, dirty_addr; 3422 MemOp mop; 3423 3424 if (!fp_access_check(s)) { 3425 return true; 3426 } 3427 3428 /* LSE2 does not merge FP pairs; leave these as separate operations. */ 3429 mop = finalize_memop_asimd(s, a->sz); 3430 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop); 3431 do_fp_st(s, a->rt, clean_addr, mop); 3432 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz); 3433 do_fp_st(s, a->rt2, clean_addr, mop); 3434 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3435 return true; 3436 } 3437 3438 static bool trans_LDP_v(DisasContext *s, arg_ldstpair *a) 3439 { 3440 uint64_t offset = a->imm << a->sz; 3441 TCGv_i64 clean_addr, dirty_addr; 3442 MemOp mop; 3443 3444 if (!fp_access_check(s)) { 3445 return true; 3446 } 3447 3448 /* LSE2 does not merge FP pairs; leave these as separate operations. */ 3449 mop = finalize_memop_asimd(s, a->sz); 3450 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop); 3451 do_fp_ld(s, a->rt, clean_addr, mop); 3452 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz); 3453 do_fp_ld(s, a->rt2, clean_addr, mop); 3454 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3455 return true; 3456 } 3457 3458 static bool trans_STGP(DisasContext *s, arg_ldstpair *a) 3459 { 3460 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; 3461 uint64_t offset = a->imm << LOG2_TAG_GRANULE; 3462 MemOp mop; 3463 TCGv_i128 tmp; 3464 3465 /* STGP only comes in one size. */ 3466 tcg_debug_assert(a->sz == MO_64); 3467 3468 if (!dc_isar_feature(aa64_mte_insn_reg, s)) { 3469 return false; 3470 } 3471 3472 if (a->rn == 31) { 3473 gen_check_sp_alignment(s); 3474 } 3475 3476 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3477 if (!a->p) { 3478 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3479 } 3480 3481 clean_addr = clean_data_tbi(s, dirty_addr); 3482 tcg_rt = cpu_reg(s, a->rt); 3483 tcg_rt2 = cpu_reg(s, a->rt2); 3484 3485 /* 3486 * STGP is defined as two 8-byte memory operations, aligned to TAG_GRANULE, 3487 * and one tag operation. We implement it as one single aligned 16-byte 3488 * memory operation for convenience. Note that the alignment ensures 3489 * MO_ATOM_IFALIGN_PAIR produces 8-byte atomicity for the memory store. 3490 */ 3491 mop = finalize_memop_atom(s, MO_128 | MO_ALIGN, MO_ATOM_IFALIGN_PAIR); 3492 3493 tmp = tcg_temp_new_i128(); 3494 if (s->be_data == MO_LE) { 3495 tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2); 3496 } else { 3497 tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt); 3498 } 3499 tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop); 3500 3501 /* Perform the tag store, if tag access enabled. */ 3502 if (s->ata[0]) { 3503 if (tb_cflags(s->base.tb) & CF_PARALLEL) { 3504 gen_helper_stg_parallel(tcg_env, dirty_addr, dirty_addr); 3505 } else { 3506 gen_helper_stg(tcg_env, dirty_addr, dirty_addr); 3507 } 3508 } 3509 3510 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3511 return true; 3512 } 3513 3514 static void op_addr_ldst_imm_pre(DisasContext *s, arg_ldst_imm *a, 3515 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, 3516 uint64_t offset, bool is_store, MemOp mop) 3517 { 3518 int memidx; 3519 3520 if (a->rn == 31) { 3521 gen_check_sp_alignment(s); 3522 } 3523 3524 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3525 if (!a->p) { 3526 tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset); 3527 } 3528 memidx = get_a64_user_mem_index(s, a->unpriv); 3529 *clean_addr = gen_mte_check1_mmuidx(s, *dirty_addr, is_store, 3530 a->w || a->rn != 31, 3531 mop, a->unpriv, memidx); 3532 } 3533 3534 static void op_addr_ldst_imm_post(DisasContext *s, arg_ldst_imm *a, 3535 TCGv_i64 dirty_addr, uint64_t offset) 3536 { 3537 if (a->w) { 3538 if (a->p) { 3539 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3540 } 3541 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); 3542 } 3543 } 3544 3545 static bool trans_STR_i(DisasContext *s, arg_ldst_imm *a) 3546 { 3547 bool iss_sf, iss_valid = !a->w; 3548 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3549 int memidx = get_a64_user_mem_index(s, a->unpriv); 3550 MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3551 3552 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop); 3553 3554 tcg_rt = cpu_reg(s, a->rt); 3555 iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3556 3557 do_gpr_st_memidx(s, tcg_rt, clean_addr, mop, memidx, 3558 iss_valid, a->rt, iss_sf, false); 3559 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3560 return true; 3561 } 3562 3563 static bool trans_LDR_i(DisasContext *s, arg_ldst_imm *a) 3564 { 3565 bool iss_sf, iss_valid = !a->w; 3566 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3567 int memidx = get_a64_user_mem_index(s, a->unpriv); 3568 MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3569 3570 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop); 3571 3572 tcg_rt = cpu_reg(s, a->rt); 3573 iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3574 3575 do_gpr_ld_memidx(s, tcg_rt, clean_addr, mop, 3576 a->ext, memidx, iss_valid, a->rt, iss_sf, false); 3577 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3578 return true; 3579 } 3580 3581 static bool trans_STR_v_i(DisasContext *s, arg_ldst_imm *a) 3582 { 3583 TCGv_i64 clean_addr, dirty_addr; 3584 MemOp mop; 3585 3586 if (!fp_access_check(s)) { 3587 return true; 3588 } 3589 mop = finalize_memop_asimd(s, a->sz); 3590 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop); 3591 do_fp_st(s, a->rt, clean_addr, mop); 3592 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3593 return true; 3594 } 3595 3596 static bool trans_LDR_v_i(DisasContext *s, arg_ldst_imm *a) 3597 { 3598 TCGv_i64 clean_addr, dirty_addr; 3599 MemOp mop; 3600 3601 if (!fp_access_check(s)) { 3602 return true; 3603 } 3604 mop = finalize_memop_asimd(s, a->sz); 3605 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop); 3606 do_fp_ld(s, a->rt, clean_addr, mop); 3607 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3608 return true; 3609 } 3610 3611 static void op_addr_ldst_pre(DisasContext *s, arg_ldst *a, 3612 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, 3613 bool is_store, MemOp memop) 3614 { 3615 TCGv_i64 tcg_rm; 3616 3617 if (a->rn == 31) { 3618 gen_check_sp_alignment(s); 3619 } 3620 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3621 3622 tcg_rm = read_cpu_reg(s, a->rm, 1); 3623 ext_and_shift_reg(tcg_rm, tcg_rm, a->opt, a->s ? a->sz : 0); 3624 3625 tcg_gen_add_i64(*dirty_addr, *dirty_addr, tcg_rm); 3626 *clean_addr = gen_mte_check1(s, *dirty_addr, is_store, true, memop); 3627 } 3628 3629 static bool trans_LDR(DisasContext *s, arg_ldst *a) 3630 { 3631 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3632 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3633 MemOp memop; 3634 3635 if (extract32(a->opt, 1, 1) == 0) { 3636 return false; 3637 } 3638 3639 memop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3640 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop); 3641 tcg_rt = cpu_reg(s, a->rt); 3642 do_gpr_ld(s, tcg_rt, clean_addr, memop, 3643 a->ext, true, a->rt, iss_sf, false); 3644 return true; 3645 } 3646 3647 static bool trans_STR(DisasContext *s, arg_ldst *a) 3648 { 3649 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3650 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3651 MemOp memop; 3652 3653 if (extract32(a->opt, 1, 1) == 0) { 3654 return false; 3655 } 3656 3657 memop = finalize_memop(s, a->sz); 3658 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop); 3659 tcg_rt = cpu_reg(s, a->rt); 3660 do_gpr_st(s, tcg_rt, clean_addr, memop, true, a->rt, iss_sf, false); 3661 return true; 3662 } 3663 3664 static bool trans_LDR_v(DisasContext *s, arg_ldst *a) 3665 { 3666 TCGv_i64 clean_addr, dirty_addr; 3667 MemOp memop; 3668 3669 if (extract32(a->opt, 1, 1) == 0) { 3670 return false; 3671 } 3672 3673 if (!fp_access_check(s)) { 3674 return true; 3675 } 3676 3677 memop = finalize_memop_asimd(s, a->sz); 3678 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop); 3679 do_fp_ld(s, a->rt, clean_addr, memop); 3680 return true; 3681 } 3682 3683 static bool trans_STR_v(DisasContext *s, arg_ldst *a) 3684 { 3685 TCGv_i64 clean_addr, dirty_addr; 3686 MemOp memop; 3687 3688 if (extract32(a->opt, 1, 1) == 0) { 3689 return false; 3690 } 3691 3692 if (!fp_access_check(s)) { 3693 return true; 3694 } 3695 3696 memop = finalize_memop_asimd(s, a->sz); 3697 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop); 3698 do_fp_st(s, a->rt, clean_addr, memop); 3699 return true; 3700 } 3701 3702 3703 static bool do_atomic_ld(DisasContext *s, arg_atomic *a, AtomicThreeOpFn *fn, 3704 int sign, bool invert) 3705 { 3706 MemOp mop = a->sz | sign; 3707 TCGv_i64 clean_addr, tcg_rs, tcg_rt; 3708 3709 if (a->rn == 31) { 3710 gen_check_sp_alignment(s); 3711 } 3712 mop = check_atomic_align(s, a->rn, mop); 3713 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false, 3714 a->rn != 31, mop); 3715 tcg_rs = read_cpu_reg(s, a->rs, true); 3716 tcg_rt = cpu_reg(s, a->rt); 3717 if (invert) { 3718 tcg_gen_not_i64(tcg_rs, tcg_rs); 3719 } 3720 /* 3721 * The tcg atomic primitives are all full barriers. Therefore we 3722 * can ignore the Acquire and Release bits of this instruction. 3723 */ 3724 fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop); 3725 3726 if (mop & MO_SIGN) { 3727 switch (a->sz) { 3728 case MO_8: 3729 tcg_gen_ext8u_i64(tcg_rt, tcg_rt); 3730 break; 3731 case MO_16: 3732 tcg_gen_ext16u_i64(tcg_rt, tcg_rt); 3733 break; 3734 case MO_32: 3735 tcg_gen_ext32u_i64(tcg_rt, tcg_rt); 3736 break; 3737 case MO_64: 3738 break; 3739 default: 3740 g_assert_not_reached(); 3741 } 3742 } 3743 return true; 3744 } 3745 3746 TRANS_FEAT(LDADD, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_add_i64, 0, false) 3747 TRANS_FEAT(LDCLR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_and_i64, 0, true) 3748 TRANS_FEAT(LDEOR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_xor_i64, 0, false) 3749 TRANS_FEAT(LDSET, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_or_i64, 0, false) 3750 TRANS_FEAT(LDSMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smax_i64, MO_SIGN, false) 3751 TRANS_FEAT(LDSMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smin_i64, MO_SIGN, false) 3752 TRANS_FEAT(LDUMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umax_i64, 0, false) 3753 TRANS_FEAT(LDUMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false) 3754 TRANS_FEAT(SWP, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false) 3755 3756 static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a) 3757 { 3758 bool iss_sf = ldst_iss_sf(a->sz, false, false); 3759 TCGv_i64 clean_addr; 3760 MemOp mop; 3761 3762 if (!dc_isar_feature(aa64_atomics, s) || 3763 !dc_isar_feature(aa64_rcpc_8_3, s)) { 3764 return false; 3765 } 3766 if (a->rn == 31) { 3767 gen_check_sp_alignment(s); 3768 } 3769 mop = check_ordered_align(s, a->rn, 0, false, a->sz); 3770 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false, 3771 a->rn != 31, mop); 3772 /* 3773 * LDAPR* are a special case because they are a simple load, not a 3774 * fetch-and-do-something op. 3775 * The architectural consistency requirements here are weaker than 3776 * full load-acquire (we only need "load-acquire processor consistent"), 3777 * but we choose to implement them as full LDAQ. 3778 */ 3779 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, false, 3780 true, a->rt, iss_sf, true); 3781 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3782 return true; 3783 } 3784 3785 static bool trans_LDRA(DisasContext *s, arg_LDRA *a) 3786 { 3787 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3788 MemOp memop; 3789 3790 /* Load with pointer authentication */ 3791 if (!dc_isar_feature(aa64_pauth, s)) { 3792 return false; 3793 } 3794 3795 if (a->rn == 31) { 3796 gen_check_sp_alignment(s); 3797 } 3798 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3799 3800 if (s->pauth_active) { 3801 if (!a->m) { 3802 gen_helper_autda_combined(dirty_addr, tcg_env, dirty_addr, 3803 tcg_constant_i64(0)); 3804 } else { 3805 gen_helper_autdb_combined(dirty_addr, tcg_env, dirty_addr, 3806 tcg_constant_i64(0)); 3807 } 3808 } 3809 3810 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); 3811 3812 memop = finalize_memop(s, MO_64); 3813 3814 /* Note that "clean" and "dirty" here refer to TBI not PAC. */ 3815 clean_addr = gen_mte_check1(s, dirty_addr, false, 3816 a->w || a->rn != 31, memop); 3817 3818 tcg_rt = cpu_reg(s, a->rt); 3819 do_gpr_ld(s, tcg_rt, clean_addr, memop, 3820 /* extend */ false, /* iss_valid */ !a->w, 3821 /* iss_srt */ a->rt, /* iss_sf */ true, /* iss_ar */ false); 3822 3823 if (a->w) { 3824 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); 3825 } 3826 return true; 3827 } 3828 3829 static bool trans_LDAPR_i(DisasContext *s, arg_ldapr_stlr_i *a) 3830 { 3831 TCGv_i64 clean_addr, dirty_addr; 3832 MemOp mop = a->sz | (a->sign ? MO_SIGN : 0); 3833 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3834 3835 if (!dc_isar_feature(aa64_rcpc_8_4, s)) { 3836 return false; 3837 } 3838 3839 if (a->rn == 31) { 3840 gen_check_sp_alignment(s); 3841 } 3842 3843 mop = check_ordered_align(s, a->rn, a->imm, false, mop); 3844 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3845 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); 3846 clean_addr = clean_data_tbi(s, dirty_addr); 3847 3848 /* 3849 * Load-AcquirePC semantics; we implement as the slightly more 3850 * restrictive Load-Acquire. 3851 */ 3852 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, a->ext, true, 3853 a->rt, iss_sf, true); 3854 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3855 return true; 3856 } 3857 3858 static bool trans_STLR_i(DisasContext *s, arg_ldapr_stlr_i *a) 3859 { 3860 TCGv_i64 clean_addr, dirty_addr; 3861 MemOp mop = a->sz; 3862 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3863 3864 if (!dc_isar_feature(aa64_rcpc_8_4, s)) { 3865 return false; 3866 } 3867 3868 /* TODO: ARMv8.4-LSE SCTLR.nAA */ 3869 3870 if (a->rn == 31) { 3871 gen_check_sp_alignment(s); 3872 } 3873 3874 mop = check_ordered_align(s, a->rn, a->imm, true, mop); 3875 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3876 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); 3877 clean_addr = clean_data_tbi(s, dirty_addr); 3878 3879 /* Store-Release semantics */ 3880 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 3881 do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, mop, true, a->rt, iss_sf, true); 3882 return true; 3883 } 3884 3885 static bool trans_LD_mult(DisasContext *s, arg_ldst_mult *a) 3886 { 3887 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3888 MemOp endian, align, mop; 3889 3890 int total; /* total bytes */ 3891 int elements; /* elements per vector */ 3892 int r; 3893 int size = a->sz; 3894 3895 if (!a->p && a->rm != 0) { 3896 /* For non-postindexed accesses the Rm field must be 0 */ 3897 return false; 3898 } 3899 if (size == 3 && !a->q && a->selem != 1) { 3900 return false; 3901 } 3902 if (!fp_access_check(s)) { 3903 return true; 3904 } 3905 3906 if (a->rn == 31) { 3907 gen_check_sp_alignment(s); 3908 } 3909 3910 /* For our purposes, bytes are always little-endian. */ 3911 endian = s->be_data; 3912 if (size == 0) { 3913 endian = MO_LE; 3914 } 3915 3916 total = a->rpt * a->selem * (a->q ? 16 : 8); 3917 tcg_rn = cpu_reg_sp(s, a->rn); 3918 3919 /* 3920 * Issue the MTE check vs the logical repeat count, before we 3921 * promote consecutive little-endian elements below. 3922 */ 3923 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, total, 3924 finalize_memop_asimd(s, size)); 3925 3926 /* 3927 * Consecutive little-endian elements from a single register 3928 * can be promoted to a larger little-endian operation. 3929 */ 3930 align = MO_ALIGN; 3931 if (a->selem == 1 && endian == MO_LE) { 3932 align = pow2_align(size); 3933 size = 3; 3934 } 3935 if (!s->align_mem) { 3936 align = 0; 3937 } 3938 mop = endian | size | align; 3939 3940 elements = (a->q ? 16 : 8) >> size; 3941 tcg_ebytes = tcg_constant_i64(1 << size); 3942 for (r = 0; r < a->rpt; r++) { 3943 int e; 3944 for (e = 0; e < elements; e++) { 3945 int xs; 3946 for (xs = 0; xs < a->selem; xs++) { 3947 int tt = (a->rt + r + xs) % 32; 3948 do_vec_ld(s, tt, e, clean_addr, mop); 3949 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3950 } 3951 } 3952 } 3953 3954 /* 3955 * For non-quad operations, setting a slice of the low 64 bits of 3956 * the register clears the high 64 bits (in the ARM ARM pseudocode 3957 * this is implicit in the fact that 'rval' is a 64 bit wide 3958 * variable). For quad operations, we might still need to zero 3959 * the high bits of SVE. 3960 */ 3961 for (r = 0; r < a->rpt * a->selem; r++) { 3962 int tt = (a->rt + r) % 32; 3963 clear_vec_high(s, a->q, tt); 3964 } 3965 3966 if (a->p) { 3967 if (a->rm == 31) { 3968 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3969 } else { 3970 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3971 } 3972 } 3973 return true; 3974 } 3975 3976 static bool trans_ST_mult(DisasContext *s, arg_ldst_mult *a) 3977 { 3978 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3979 MemOp endian, align, mop; 3980 3981 int total; /* total bytes */ 3982 int elements; /* elements per vector */ 3983 int r; 3984 int size = a->sz; 3985 3986 if (!a->p && a->rm != 0) { 3987 /* For non-postindexed accesses the Rm field must be 0 */ 3988 return false; 3989 } 3990 if (size == 3 && !a->q && a->selem != 1) { 3991 return false; 3992 } 3993 if (!fp_access_check(s)) { 3994 return true; 3995 } 3996 3997 if (a->rn == 31) { 3998 gen_check_sp_alignment(s); 3999 } 4000 4001 /* For our purposes, bytes are always little-endian. */ 4002 endian = s->be_data; 4003 if (size == 0) { 4004 endian = MO_LE; 4005 } 4006 4007 total = a->rpt * a->selem * (a->q ? 16 : 8); 4008 tcg_rn = cpu_reg_sp(s, a->rn); 4009 4010 /* 4011 * Issue the MTE check vs the logical repeat count, before we 4012 * promote consecutive little-endian elements below. 4013 */ 4014 clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, total, 4015 finalize_memop_asimd(s, size)); 4016 4017 /* 4018 * Consecutive little-endian elements from a single register 4019 * can be promoted to a larger little-endian operation. 4020 */ 4021 align = MO_ALIGN; 4022 if (a->selem == 1 && endian == MO_LE) { 4023 align = pow2_align(size); 4024 size = 3; 4025 } 4026 if (!s->align_mem) { 4027 align = 0; 4028 } 4029 mop = endian | size | align; 4030 4031 elements = (a->q ? 16 : 8) >> size; 4032 tcg_ebytes = tcg_constant_i64(1 << size); 4033 for (r = 0; r < a->rpt; r++) { 4034 int e; 4035 for (e = 0; e < elements; e++) { 4036 int xs; 4037 for (xs = 0; xs < a->selem; xs++) { 4038 int tt = (a->rt + r + xs) % 32; 4039 do_vec_st(s, tt, e, clean_addr, mop); 4040 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 4041 } 4042 } 4043 } 4044 4045 if (a->p) { 4046 if (a->rm == 31) { 4047 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 4048 } else { 4049 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 4050 } 4051 } 4052 return true; 4053 } 4054 4055 static bool trans_ST_single(DisasContext *s, arg_ldst_single *a) 4056 { 4057 int xs, total, rt; 4058 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 4059 MemOp mop; 4060 4061 if (!a->p && a->rm != 0) { 4062 return false; 4063 } 4064 if (!fp_access_check(s)) { 4065 return true; 4066 } 4067 4068 if (a->rn == 31) { 4069 gen_check_sp_alignment(s); 4070 } 4071 4072 total = a->selem << a->scale; 4073 tcg_rn = cpu_reg_sp(s, a->rn); 4074 4075 mop = finalize_memop_asimd(s, a->scale); 4076 clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, 4077 total, mop); 4078 4079 tcg_ebytes = tcg_constant_i64(1 << a->scale); 4080 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { 4081 do_vec_st(s, rt, a->index, clean_addr, mop); 4082 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 4083 } 4084 4085 if (a->p) { 4086 if (a->rm == 31) { 4087 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 4088 } else { 4089 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 4090 } 4091 } 4092 return true; 4093 } 4094 4095 static bool trans_LD_single(DisasContext *s, arg_ldst_single *a) 4096 { 4097 int xs, total, rt; 4098 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 4099 MemOp mop; 4100 4101 if (!a->p && a->rm != 0) { 4102 return false; 4103 } 4104 if (!fp_access_check(s)) { 4105 return true; 4106 } 4107 4108 if (a->rn == 31) { 4109 gen_check_sp_alignment(s); 4110 } 4111 4112 total = a->selem << a->scale; 4113 tcg_rn = cpu_reg_sp(s, a->rn); 4114 4115 mop = finalize_memop_asimd(s, a->scale); 4116 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, 4117 total, mop); 4118 4119 tcg_ebytes = tcg_constant_i64(1 << a->scale); 4120 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { 4121 do_vec_ld(s, rt, a->index, clean_addr, mop); 4122 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 4123 } 4124 4125 if (a->p) { 4126 if (a->rm == 31) { 4127 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 4128 } else { 4129 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 4130 } 4131 } 4132 return true; 4133 } 4134 4135 static bool trans_LD_single_repl(DisasContext *s, arg_LD_single_repl *a) 4136 { 4137 int xs, total, rt; 4138 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 4139 MemOp mop; 4140 4141 if (!a->p && a->rm != 0) { 4142 return false; 4143 } 4144 if (!fp_access_check(s)) { 4145 return true; 4146 } 4147 4148 if (a->rn == 31) { 4149 gen_check_sp_alignment(s); 4150 } 4151 4152 total = a->selem << a->scale; 4153 tcg_rn = cpu_reg_sp(s, a->rn); 4154 4155 mop = finalize_memop_asimd(s, a->scale); 4156 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, 4157 total, mop); 4158 4159 tcg_ebytes = tcg_constant_i64(1 << a->scale); 4160 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { 4161 /* Load and replicate to all elements */ 4162 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 4163 4164 tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop); 4165 tcg_gen_gvec_dup_i64(a->scale, vec_full_reg_offset(s, rt), 4166 (a->q + 1) * 8, vec_full_reg_size(s), tcg_tmp); 4167 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 4168 } 4169 4170 if (a->p) { 4171 if (a->rm == 31) { 4172 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 4173 } else { 4174 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 4175 } 4176 } 4177 return true; 4178 } 4179 4180 static bool trans_STZGM(DisasContext *s, arg_ldst_tag *a) 4181 { 4182 TCGv_i64 addr, clean_addr, tcg_rt; 4183 int size = 4 << s->dcz_blocksize; 4184 4185 if (!dc_isar_feature(aa64_mte, s)) { 4186 return false; 4187 } 4188 if (s->current_el == 0) { 4189 return false; 4190 } 4191 4192 if (a->rn == 31) { 4193 gen_check_sp_alignment(s); 4194 } 4195 4196 addr = read_cpu_reg_sp(s, a->rn, true); 4197 tcg_gen_addi_i64(addr, addr, a->imm); 4198 tcg_rt = cpu_reg(s, a->rt); 4199 4200 if (s->ata[0]) { 4201 gen_helper_stzgm_tags(tcg_env, addr, tcg_rt); 4202 } 4203 /* 4204 * The non-tags portion of STZGM is mostly like DC_ZVA, 4205 * except the alignment happens before the access. 4206 */ 4207 clean_addr = clean_data_tbi(s, addr); 4208 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 4209 gen_helper_dc_zva(tcg_env, clean_addr); 4210 return true; 4211 } 4212 4213 static bool trans_STGM(DisasContext *s, arg_ldst_tag *a) 4214 { 4215 TCGv_i64 addr, clean_addr, tcg_rt; 4216 4217 if (!dc_isar_feature(aa64_mte, s)) { 4218 return false; 4219 } 4220 if (s->current_el == 0) { 4221 return false; 4222 } 4223 4224 if (a->rn == 31) { 4225 gen_check_sp_alignment(s); 4226 } 4227 4228 addr = read_cpu_reg_sp(s, a->rn, true); 4229 tcg_gen_addi_i64(addr, addr, a->imm); 4230 tcg_rt = cpu_reg(s, a->rt); 4231 4232 if (s->ata[0]) { 4233 gen_helper_stgm(tcg_env, addr, tcg_rt); 4234 } else { 4235 MMUAccessType acc = MMU_DATA_STORE; 4236 int size = 4 << s->gm_blocksize; 4237 4238 clean_addr = clean_data_tbi(s, addr); 4239 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 4240 gen_probe_access(s, clean_addr, acc, size); 4241 } 4242 return true; 4243 } 4244 4245 static bool trans_LDGM(DisasContext *s, arg_ldst_tag *a) 4246 { 4247 TCGv_i64 addr, clean_addr, tcg_rt; 4248 4249 if (!dc_isar_feature(aa64_mte, s)) { 4250 return false; 4251 } 4252 if (s->current_el == 0) { 4253 return false; 4254 } 4255 4256 if (a->rn == 31) { 4257 gen_check_sp_alignment(s); 4258 } 4259 4260 addr = read_cpu_reg_sp(s, a->rn, true); 4261 tcg_gen_addi_i64(addr, addr, a->imm); 4262 tcg_rt = cpu_reg(s, a->rt); 4263 4264 if (s->ata[0]) { 4265 gen_helper_ldgm(tcg_rt, tcg_env, addr); 4266 } else { 4267 MMUAccessType acc = MMU_DATA_LOAD; 4268 int size = 4 << s->gm_blocksize; 4269 4270 clean_addr = clean_data_tbi(s, addr); 4271 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 4272 gen_probe_access(s, clean_addr, acc, size); 4273 /* The result tags are zeros. */ 4274 tcg_gen_movi_i64(tcg_rt, 0); 4275 } 4276 return true; 4277 } 4278 4279 static bool trans_LDG(DisasContext *s, arg_ldst_tag *a) 4280 { 4281 TCGv_i64 addr, clean_addr, tcg_rt; 4282 4283 if (!dc_isar_feature(aa64_mte_insn_reg, s)) { 4284 return false; 4285 } 4286 4287 if (a->rn == 31) { 4288 gen_check_sp_alignment(s); 4289 } 4290 4291 addr = read_cpu_reg_sp(s, a->rn, true); 4292 if (!a->p) { 4293 /* pre-index or signed offset */ 4294 tcg_gen_addi_i64(addr, addr, a->imm); 4295 } 4296 4297 tcg_gen_andi_i64(addr, addr, -TAG_GRANULE); 4298 tcg_rt = cpu_reg(s, a->rt); 4299 if (s->ata[0]) { 4300 gen_helper_ldg(tcg_rt, tcg_env, addr, tcg_rt); 4301 } else { 4302 /* 4303 * Tag access disabled: we must check for aborts on the load 4304 * load from [rn+offset], and then insert a 0 tag into rt. 4305 */ 4306 clean_addr = clean_data_tbi(s, addr); 4307 gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8); 4308 gen_address_with_allocation_tag0(tcg_rt, tcg_rt); 4309 } 4310 4311 if (a->w) { 4312 /* pre-index or post-index */ 4313 if (a->p) { 4314 /* post-index */ 4315 tcg_gen_addi_i64(addr, addr, a->imm); 4316 } 4317 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr); 4318 } 4319 return true; 4320 } 4321 4322 static bool do_STG(DisasContext *s, arg_ldst_tag *a, bool is_zero, bool is_pair) 4323 { 4324 TCGv_i64 addr, tcg_rt; 4325 4326 if (a->rn == 31) { 4327 gen_check_sp_alignment(s); 4328 } 4329 4330 addr = read_cpu_reg_sp(s, a->rn, true); 4331 if (!a->p) { 4332 /* pre-index or signed offset */ 4333 tcg_gen_addi_i64(addr, addr, a->imm); 4334 } 4335 tcg_rt = cpu_reg_sp(s, a->rt); 4336 if (!s->ata[0]) { 4337 /* 4338 * For STG and ST2G, we need to check alignment and probe memory. 4339 * TODO: For STZG and STZ2G, we could rely on the stores below, 4340 * at least for system mode; user-only won't enforce alignment. 4341 */ 4342 if (is_pair) { 4343 gen_helper_st2g_stub(tcg_env, addr); 4344 } else { 4345 gen_helper_stg_stub(tcg_env, addr); 4346 } 4347 } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { 4348 if (is_pair) { 4349 gen_helper_st2g_parallel(tcg_env, addr, tcg_rt); 4350 } else { 4351 gen_helper_stg_parallel(tcg_env, addr, tcg_rt); 4352 } 4353 } else { 4354 if (is_pair) { 4355 gen_helper_st2g(tcg_env, addr, tcg_rt); 4356 } else { 4357 gen_helper_stg(tcg_env, addr, tcg_rt); 4358 } 4359 } 4360 4361 if (is_zero) { 4362 TCGv_i64 clean_addr = clean_data_tbi(s, addr); 4363 TCGv_i64 zero64 = tcg_constant_i64(0); 4364 TCGv_i128 zero128 = tcg_temp_new_i128(); 4365 int mem_index = get_mem_index(s); 4366 MemOp mop = finalize_memop(s, MO_128 | MO_ALIGN); 4367 4368 tcg_gen_concat_i64_i128(zero128, zero64, zero64); 4369 4370 /* This is 1 or 2 atomic 16-byte operations. */ 4371 tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop); 4372 if (is_pair) { 4373 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4374 tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop); 4375 } 4376 } 4377 4378 if (a->w) { 4379 /* pre-index or post-index */ 4380 if (a->p) { 4381 /* post-index */ 4382 tcg_gen_addi_i64(addr, addr, a->imm); 4383 } 4384 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr); 4385 } 4386 return true; 4387 } 4388 4389 TRANS_FEAT(STG, aa64_mte_insn_reg, do_STG, a, false, false) 4390 TRANS_FEAT(STZG, aa64_mte_insn_reg, do_STG, a, true, false) 4391 TRANS_FEAT(ST2G, aa64_mte_insn_reg, do_STG, a, false, true) 4392 TRANS_FEAT(STZ2G, aa64_mte_insn_reg, do_STG, a, true, true) 4393 4394 typedef void SetFn(TCGv_env, TCGv_i32, TCGv_i32); 4395 4396 static bool do_SET(DisasContext *s, arg_set *a, bool is_epilogue, 4397 bool is_setg, SetFn fn) 4398 { 4399 int memidx; 4400 uint32_t syndrome, desc = 0; 4401 4402 if (is_setg && !dc_isar_feature(aa64_mte, s)) { 4403 return false; 4404 } 4405 4406 /* 4407 * UNPREDICTABLE cases: we choose to UNDEF, which allows 4408 * us to pull this check before the CheckMOPSEnabled() test 4409 * (which we do in the helper function) 4410 */ 4411 if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd || 4412 a->rd == 31 || a->rn == 31) { 4413 return false; 4414 } 4415 4416 memidx = get_a64_user_mem_index(s, a->unpriv); 4417 4418 /* 4419 * We pass option_a == true, matching our implementation; 4420 * we pass wrong_option == false: helper function may set that bit. 4421 */ 4422 syndrome = syn_mop(true, is_setg, (a->nontemp << 1) | a->unpriv, 4423 is_epilogue, false, true, a->rd, a->rs, a->rn); 4424 4425 if (is_setg ? s->ata[a->unpriv] : s->mte_active[a->unpriv]) { 4426 /* We may need to do MTE tag checking, so assemble the descriptor */ 4427 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 4428 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 4429 desc = FIELD_DP32(desc, MTEDESC, WRITE, true); 4430 /* SIZEM1 and ALIGN we leave 0 (byte write) */ 4431 } 4432 /* The helper function always needs the memidx even with MTE disabled */ 4433 desc = FIELD_DP32(desc, MTEDESC, MIDX, memidx); 4434 4435 /* 4436 * The helper needs the register numbers, but since they're in 4437 * the syndrome anyway, we let it extract them from there rather 4438 * than passing in an extra three integer arguments. 4439 */ 4440 fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(desc)); 4441 return true; 4442 } 4443 4444 TRANS_FEAT(SETP, aa64_mops, do_SET, a, false, false, gen_helper_setp) 4445 TRANS_FEAT(SETM, aa64_mops, do_SET, a, false, false, gen_helper_setm) 4446 TRANS_FEAT(SETE, aa64_mops, do_SET, a, true, false, gen_helper_sete) 4447 TRANS_FEAT(SETGP, aa64_mops, do_SET, a, false, true, gen_helper_setgp) 4448 TRANS_FEAT(SETGM, aa64_mops, do_SET, a, false, true, gen_helper_setgm) 4449 TRANS_FEAT(SETGE, aa64_mops, do_SET, a, true, true, gen_helper_setge) 4450 4451 typedef void CpyFn(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32); 4452 4453 static bool do_CPY(DisasContext *s, arg_cpy *a, bool is_epilogue, CpyFn fn) 4454 { 4455 int rmemidx, wmemidx; 4456 uint32_t syndrome, rdesc = 0, wdesc = 0; 4457 bool wunpriv = extract32(a->options, 0, 1); 4458 bool runpriv = extract32(a->options, 1, 1); 4459 4460 /* 4461 * UNPREDICTABLE cases: we choose to UNDEF, which allows 4462 * us to pull this check before the CheckMOPSEnabled() test 4463 * (which we do in the helper function) 4464 */ 4465 if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd || 4466 a->rd == 31 || a->rs == 31 || a->rn == 31) { 4467 return false; 4468 } 4469 4470 rmemidx = get_a64_user_mem_index(s, runpriv); 4471 wmemidx = get_a64_user_mem_index(s, wunpriv); 4472 4473 /* 4474 * We pass option_a == true, matching our implementation; 4475 * we pass wrong_option == false: helper function may set that bit. 4476 */ 4477 syndrome = syn_mop(false, false, a->options, is_epilogue, 4478 false, true, a->rd, a->rs, a->rn); 4479 4480 /* If we need to do MTE tag checking, assemble the descriptors */ 4481 if (s->mte_active[runpriv]) { 4482 rdesc = FIELD_DP32(rdesc, MTEDESC, TBI, s->tbid); 4483 rdesc = FIELD_DP32(rdesc, MTEDESC, TCMA, s->tcma); 4484 } 4485 if (s->mte_active[wunpriv]) { 4486 wdesc = FIELD_DP32(wdesc, MTEDESC, TBI, s->tbid); 4487 wdesc = FIELD_DP32(wdesc, MTEDESC, TCMA, s->tcma); 4488 wdesc = FIELD_DP32(wdesc, MTEDESC, WRITE, true); 4489 } 4490 /* The helper function needs these parts of the descriptor regardless */ 4491 rdesc = FIELD_DP32(rdesc, MTEDESC, MIDX, rmemidx); 4492 wdesc = FIELD_DP32(wdesc, MTEDESC, MIDX, wmemidx); 4493 4494 /* 4495 * The helper needs the register numbers, but since they're in 4496 * the syndrome anyway, we let it extract them from there rather 4497 * than passing in an extra three integer arguments. 4498 */ 4499 fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(wdesc), 4500 tcg_constant_i32(rdesc)); 4501 return true; 4502 } 4503 4504 TRANS_FEAT(CPYP, aa64_mops, do_CPY, a, false, gen_helper_cpyp) 4505 TRANS_FEAT(CPYM, aa64_mops, do_CPY, a, false, gen_helper_cpym) 4506 TRANS_FEAT(CPYE, aa64_mops, do_CPY, a, true, gen_helper_cpye) 4507 TRANS_FEAT(CPYFP, aa64_mops, do_CPY, a, false, gen_helper_cpyfp) 4508 TRANS_FEAT(CPYFM, aa64_mops, do_CPY, a, false, gen_helper_cpyfm) 4509 TRANS_FEAT(CPYFE, aa64_mops, do_CPY, a, true, gen_helper_cpyfe) 4510 4511 typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64); 4512 4513 static bool gen_rri(DisasContext *s, arg_rri_sf *a, 4514 bool rd_sp, bool rn_sp, ArithTwoOp *fn) 4515 { 4516 TCGv_i64 tcg_rn = rn_sp ? cpu_reg_sp(s, a->rn) : cpu_reg(s, a->rn); 4517 TCGv_i64 tcg_rd = rd_sp ? cpu_reg_sp(s, a->rd) : cpu_reg(s, a->rd); 4518 TCGv_i64 tcg_imm = tcg_constant_i64(a->imm); 4519 4520 fn(tcg_rd, tcg_rn, tcg_imm); 4521 if (!a->sf) { 4522 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4523 } 4524 return true; 4525 } 4526 4527 /* 4528 * PC-rel. addressing 4529 */ 4530 4531 static bool trans_ADR(DisasContext *s, arg_ri *a) 4532 { 4533 gen_pc_plus_diff(s, cpu_reg(s, a->rd), a->imm); 4534 return true; 4535 } 4536 4537 static bool trans_ADRP(DisasContext *s, arg_ri *a) 4538 { 4539 int64_t offset = (int64_t)a->imm << 12; 4540 4541 /* The page offset is ok for CF_PCREL. */ 4542 offset -= s->pc_curr & 0xfff; 4543 gen_pc_plus_diff(s, cpu_reg(s, a->rd), offset); 4544 return true; 4545 } 4546 4547 /* 4548 * Add/subtract (immediate) 4549 */ 4550 TRANS(ADD_i, gen_rri, a, 1, 1, tcg_gen_add_i64) 4551 TRANS(SUB_i, gen_rri, a, 1, 1, tcg_gen_sub_i64) 4552 TRANS(ADDS_i, gen_rri, a, 0, 1, a->sf ? gen_add64_CC : gen_add32_CC) 4553 TRANS(SUBS_i, gen_rri, a, 0, 1, a->sf ? gen_sub64_CC : gen_sub32_CC) 4554 4555 /* 4556 * Add/subtract (immediate, with tags) 4557 */ 4558 4559 static bool gen_add_sub_imm_with_tags(DisasContext *s, arg_rri_tag *a, 4560 bool sub_op) 4561 { 4562 TCGv_i64 tcg_rn, tcg_rd; 4563 int imm; 4564 4565 imm = a->uimm6 << LOG2_TAG_GRANULE; 4566 if (sub_op) { 4567 imm = -imm; 4568 } 4569 4570 tcg_rn = cpu_reg_sp(s, a->rn); 4571 tcg_rd = cpu_reg_sp(s, a->rd); 4572 4573 if (s->ata[0]) { 4574 gen_helper_addsubg(tcg_rd, tcg_env, tcg_rn, 4575 tcg_constant_i32(imm), 4576 tcg_constant_i32(a->uimm4)); 4577 } else { 4578 tcg_gen_addi_i64(tcg_rd, tcg_rn, imm); 4579 gen_address_with_allocation_tag0(tcg_rd, tcg_rd); 4580 } 4581 return true; 4582 } 4583 4584 TRANS_FEAT(ADDG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, false) 4585 TRANS_FEAT(SUBG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, true) 4586 4587 /* The input should be a value in the bottom e bits (with higher 4588 * bits zero); returns that value replicated into every element 4589 * of size e in a 64 bit integer. 4590 */ 4591 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e) 4592 { 4593 assert(e != 0); 4594 while (e < 64) { 4595 mask |= mask << e; 4596 e *= 2; 4597 } 4598 return mask; 4599 } 4600 4601 /* 4602 * Logical (immediate) 4603 */ 4604 4605 /* 4606 * Simplified variant of pseudocode DecodeBitMasks() for the case where we 4607 * only require the wmask. Returns false if the imms/immr/immn are a reserved 4608 * value (ie should cause a guest UNDEF exception), and true if they are 4609 * valid, in which case the decoded bit pattern is written to result. 4610 */ 4611 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn, 4612 unsigned int imms, unsigned int immr) 4613 { 4614 uint64_t mask; 4615 unsigned e, levels, s, r; 4616 int len; 4617 4618 assert(immn < 2 && imms < 64 && immr < 64); 4619 4620 /* The bit patterns we create here are 64 bit patterns which 4621 * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or 4622 * 64 bits each. Each element contains the same value: a run 4623 * of between 1 and e-1 non-zero bits, rotated within the 4624 * element by between 0 and e-1 bits. 4625 * 4626 * The element size and run length are encoded into immn (1 bit) 4627 * and imms (6 bits) as follows: 4628 * 64 bit elements: immn = 1, imms = <length of run - 1> 4629 * 32 bit elements: immn = 0, imms = 0 : <length of run - 1> 4630 * 16 bit elements: immn = 0, imms = 10 : <length of run - 1> 4631 * 8 bit elements: immn = 0, imms = 110 : <length of run - 1> 4632 * 4 bit elements: immn = 0, imms = 1110 : <length of run - 1> 4633 * 2 bit elements: immn = 0, imms = 11110 : <length of run - 1> 4634 * Notice that immn = 0, imms = 11111x is the only combination 4635 * not covered by one of the above options; this is reserved. 4636 * Further, <length of run - 1> all-ones is a reserved pattern. 4637 * 4638 * In all cases the rotation is by immr % e (and immr is 6 bits). 4639 */ 4640 4641 /* First determine the element size */ 4642 len = 31 - clz32((immn << 6) | (~imms & 0x3f)); 4643 if (len < 1) { 4644 /* This is the immn == 0, imms == 0x11111x case */ 4645 return false; 4646 } 4647 e = 1 << len; 4648 4649 levels = e - 1; 4650 s = imms & levels; 4651 r = immr & levels; 4652 4653 if (s == levels) { 4654 /* <length of run - 1> mustn't be all-ones. */ 4655 return false; 4656 } 4657 4658 /* Create the value of one element: s+1 set bits rotated 4659 * by r within the element (which is e bits wide)... 4660 */ 4661 mask = MAKE_64BIT_MASK(0, s + 1); 4662 if (r) { 4663 mask = (mask >> r) | (mask << (e - r)); 4664 mask &= MAKE_64BIT_MASK(0, e); 4665 } 4666 /* ...then replicate the element over the whole 64 bit value */ 4667 mask = bitfield_replicate(mask, e); 4668 *result = mask; 4669 return true; 4670 } 4671 4672 static bool gen_rri_log(DisasContext *s, arg_rri_log *a, bool set_cc, 4673 void (*fn)(TCGv_i64, TCGv_i64, int64_t)) 4674 { 4675 TCGv_i64 tcg_rd, tcg_rn; 4676 uint64_t imm; 4677 4678 /* Some immediate field values are reserved. */ 4679 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1), 4680 extract32(a->dbm, 0, 6), 4681 extract32(a->dbm, 6, 6))) { 4682 return false; 4683 } 4684 if (!a->sf) { 4685 imm &= 0xffffffffull; 4686 } 4687 4688 tcg_rd = set_cc ? cpu_reg(s, a->rd) : cpu_reg_sp(s, a->rd); 4689 tcg_rn = cpu_reg(s, a->rn); 4690 4691 fn(tcg_rd, tcg_rn, imm); 4692 if (set_cc) { 4693 gen_logic_CC(a->sf, tcg_rd); 4694 } 4695 if (!a->sf) { 4696 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4697 } 4698 return true; 4699 } 4700 4701 TRANS(AND_i, gen_rri_log, a, false, tcg_gen_andi_i64) 4702 TRANS(ORR_i, gen_rri_log, a, false, tcg_gen_ori_i64) 4703 TRANS(EOR_i, gen_rri_log, a, false, tcg_gen_xori_i64) 4704 TRANS(ANDS_i, gen_rri_log, a, true, tcg_gen_andi_i64) 4705 4706 /* 4707 * Move wide (immediate) 4708 */ 4709 4710 static bool trans_MOVZ(DisasContext *s, arg_movw *a) 4711 { 4712 int pos = a->hw << 4; 4713 tcg_gen_movi_i64(cpu_reg(s, a->rd), (uint64_t)a->imm << pos); 4714 return true; 4715 } 4716 4717 static bool trans_MOVN(DisasContext *s, arg_movw *a) 4718 { 4719 int pos = a->hw << 4; 4720 uint64_t imm = a->imm; 4721 4722 imm = ~(imm << pos); 4723 if (!a->sf) { 4724 imm = (uint32_t)imm; 4725 } 4726 tcg_gen_movi_i64(cpu_reg(s, a->rd), imm); 4727 return true; 4728 } 4729 4730 static bool trans_MOVK(DisasContext *s, arg_movw *a) 4731 { 4732 int pos = a->hw << 4; 4733 TCGv_i64 tcg_rd, tcg_im; 4734 4735 tcg_rd = cpu_reg(s, a->rd); 4736 tcg_im = tcg_constant_i64(a->imm); 4737 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_im, pos, 16); 4738 if (!a->sf) { 4739 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4740 } 4741 return true; 4742 } 4743 4744 /* 4745 * Bitfield 4746 */ 4747 4748 static bool trans_SBFM(DisasContext *s, arg_SBFM *a) 4749 { 4750 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4751 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4752 unsigned int bitsize = a->sf ? 64 : 32; 4753 unsigned int ri = a->immr; 4754 unsigned int si = a->imms; 4755 unsigned int pos, len; 4756 4757 if (si >= ri) { 4758 /* Wd<s-r:0> = Wn<s:r> */ 4759 len = (si - ri) + 1; 4760 tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len); 4761 if (!a->sf) { 4762 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4763 } 4764 } else { 4765 /* Wd<32+s-r,32-r> = Wn<s:0> */ 4766 len = si + 1; 4767 pos = (bitsize - ri) & (bitsize - 1); 4768 4769 if (len < ri) { 4770 /* 4771 * Sign extend the destination field from len to fill the 4772 * balance of the word. Let the deposit below insert all 4773 * of those sign bits. 4774 */ 4775 tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len); 4776 len = ri; 4777 } 4778 4779 /* 4780 * We start with zero, and we haven't modified any bits outside 4781 * bitsize, therefore no final zero-extension is unneeded for !sf. 4782 */ 4783 tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len); 4784 } 4785 return true; 4786 } 4787 4788 static bool trans_UBFM(DisasContext *s, arg_UBFM *a) 4789 { 4790 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4791 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4792 unsigned int bitsize = a->sf ? 64 : 32; 4793 unsigned int ri = a->immr; 4794 unsigned int si = a->imms; 4795 unsigned int pos, len; 4796 4797 tcg_rd = cpu_reg(s, a->rd); 4798 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4799 4800 if (si >= ri) { 4801 /* Wd<s-r:0> = Wn<s:r> */ 4802 len = (si - ri) + 1; 4803 tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len); 4804 } else { 4805 /* Wd<32+s-r,32-r> = Wn<s:0> */ 4806 len = si + 1; 4807 pos = (bitsize - ri) & (bitsize - 1); 4808 tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len); 4809 } 4810 return true; 4811 } 4812 4813 static bool trans_BFM(DisasContext *s, arg_BFM *a) 4814 { 4815 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4816 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4817 unsigned int bitsize = a->sf ? 64 : 32; 4818 unsigned int ri = a->immr; 4819 unsigned int si = a->imms; 4820 unsigned int pos, len; 4821 4822 tcg_rd = cpu_reg(s, a->rd); 4823 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4824 4825 if (si >= ri) { 4826 /* Wd<s-r:0> = Wn<s:r> */ 4827 tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri); 4828 len = (si - ri) + 1; 4829 pos = 0; 4830 } else { 4831 /* Wd<32+s-r,32-r> = Wn<s:0> */ 4832 len = si + 1; 4833 pos = (bitsize - ri) & (bitsize - 1); 4834 } 4835 4836 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len); 4837 if (!a->sf) { 4838 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4839 } 4840 return true; 4841 } 4842 4843 static bool trans_EXTR(DisasContext *s, arg_extract *a) 4844 { 4845 TCGv_i64 tcg_rd, tcg_rm, tcg_rn; 4846 4847 tcg_rd = cpu_reg(s, a->rd); 4848 4849 if (unlikely(a->imm == 0)) { 4850 /* 4851 * tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts, 4852 * so an extract from bit 0 is a special case. 4853 */ 4854 if (a->sf) { 4855 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, a->rm)); 4856 } else { 4857 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, a->rm)); 4858 } 4859 } else { 4860 tcg_rm = cpu_reg(s, a->rm); 4861 tcg_rn = cpu_reg(s, a->rn); 4862 4863 if (a->sf) { 4864 /* Specialization to ROR happens in EXTRACT2. */ 4865 tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, a->imm); 4866 } else { 4867 TCGv_i32 t0 = tcg_temp_new_i32(); 4868 4869 tcg_gen_extrl_i64_i32(t0, tcg_rm); 4870 if (a->rm == a->rn) { 4871 tcg_gen_rotri_i32(t0, t0, a->imm); 4872 } else { 4873 TCGv_i32 t1 = tcg_temp_new_i32(); 4874 tcg_gen_extrl_i64_i32(t1, tcg_rn); 4875 tcg_gen_extract2_i32(t0, t0, t1, a->imm); 4876 } 4877 tcg_gen_extu_i32_i64(tcg_rd, t0); 4878 } 4879 } 4880 return true; 4881 } 4882 4883 static bool trans_TBL_TBX(DisasContext *s, arg_TBL_TBX *a) 4884 { 4885 if (fp_access_check(s)) { 4886 int len = (a->len + 1) * 16; 4887 4888 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd), 4889 vec_full_reg_offset(s, a->rm), tcg_env, 4890 a->q ? 16 : 8, vec_full_reg_size(s), 4891 (len << 6) | (a->tbx << 5) | a->rn, 4892 gen_helper_simd_tblx); 4893 } 4894 return true; 4895 } 4896 4897 typedef int simd_permute_idx_fn(int i, int part, int elements); 4898 4899 static bool do_simd_permute(DisasContext *s, arg_qrrr_e *a, 4900 simd_permute_idx_fn *fn, int part) 4901 { 4902 MemOp esz = a->esz; 4903 int datasize = a->q ? 16 : 8; 4904 int elements = datasize >> esz; 4905 TCGv_i64 tcg_res[2], tcg_ele; 4906 4907 if (esz == MO_64 && !a->q) { 4908 return false; 4909 } 4910 if (!fp_access_check(s)) { 4911 return true; 4912 } 4913 4914 tcg_res[0] = tcg_temp_new_i64(); 4915 tcg_res[1] = a->q ? tcg_temp_new_i64() : NULL; 4916 tcg_ele = tcg_temp_new_i64(); 4917 4918 for (int i = 0; i < elements; i++) { 4919 int o, w, idx; 4920 4921 idx = fn(i, part, elements); 4922 read_vec_element(s, tcg_ele, (idx & elements ? a->rm : a->rn), 4923 idx & (elements - 1), esz); 4924 4925 w = (i << (esz + 3)) / 64; 4926 o = (i << (esz + 3)) % 64; 4927 if (o == 0) { 4928 tcg_gen_mov_i64(tcg_res[w], tcg_ele); 4929 } else { 4930 tcg_gen_deposit_i64(tcg_res[w], tcg_res[w], tcg_ele, o, 8 << esz); 4931 } 4932 } 4933 4934 for (int i = a->q; i >= 0; --i) { 4935 write_vec_element(s, tcg_res[i], a->rd, i, MO_64); 4936 } 4937 clear_vec_high(s, a->q, a->rd); 4938 return true; 4939 } 4940 4941 static int permute_load_uzp(int i, int part, int elements) 4942 { 4943 return 2 * i + part; 4944 } 4945 4946 TRANS(UZP1, do_simd_permute, a, permute_load_uzp, 0) 4947 TRANS(UZP2, do_simd_permute, a, permute_load_uzp, 1) 4948 4949 static int permute_load_trn(int i, int part, int elements) 4950 { 4951 return (i & 1) * elements + (i & ~1) + part; 4952 } 4953 4954 TRANS(TRN1, do_simd_permute, a, permute_load_trn, 0) 4955 TRANS(TRN2, do_simd_permute, a, permute_load_trn, 1) 4956 4957 static int permute_load_zip(int i, int part, int elements) 4958 { 4959 return (i & 1) * elements + ((part * elements + i) >> 1); 4960 } 4961 4962 TRANS(ZIP1, do_simd_permute, a, permute_load_zip, 0) 4963 TRANS(ZIP2, do_simd_permute, a, permute_load_zip, 1) 4964 4965 /* 4966 * Cryptographic AES, SHA, SHA512 4967 */ 4968 4969 TRANS_FEAT(AESE, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aese) 4970 TRANS_FEAT(AESD, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aesd) 4971 TRANS_FEAT(AESMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesmc) 4972 TRANS_FEAT(AESIMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesimc) 4973 4974 TRANS_FEAT(SHA1C, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1c) 4975 TRANS_FEAT(SHA1P, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1p) 4976 TRANS_FEAT(SHA1M, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1m) 4977 TRANS_FEAT(SHA1SU0, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1su0) 4978 4979 TRANS_FEAT(SHA256H, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h) 4980 TRANS_FEAT(SHA256H2, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h2) 4981 TRANS_FEAT(SHA256SU1, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256su1) 4982 4983 TRANS_FEAT(SHA1H, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1h) 4984 TRANS_FEAT(SHA1SU1, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1su1) 4985 TRANS_FEAT(SHA256SU0, aa64_sha256, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha256su0) 4986 4987 TRANS_FEAT(SHA512H, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h) 4988 TRANS_FEAT(SHA512H2, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h2) 4989 TRANS_FEAT(SHA512SU1, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512su1) 4990 TRANS_FEAT(RAX1, aa64_sha3, do_gvec_fn3, a, gen_gvec_rax1) 4991 TRANS_FEAT(SM3PARTW1, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw1) 4992 TRANS_FEAT(SM3PARTW2, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw2) 4993 TRANS_FEAT(SM4EKEY, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4ekey) 4994 4995 TRANS_FEAT(SHA512SU0, aa64_sha512, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha512su0) 4996 TRANS_FEAT(SM4E, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4e) 4997 4998 TRANS_FEAT(EOR3, aa64_sha3, do_gvec_fn4, a, gen_gvec_eor3) 4999 TRANS_FEAT(BCAX, aa64_sha3, do_gvec_fn4, a, gen_gvec_bcax) 5000 5001 static bool trans_SM3SS1(DisasContext *s, arg_SM3SS1 *a) 5002 { 5003 if (!dc_isar_feature(aa64_sm3, s)) { 5004 return false; 5005 } 5006 if (fp_access_check(s)) { 5007 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 5008 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 5009 TCGv_i32 tcg_op3 = tcg_temp_new_i32(); 5010 TCGv_i32 tcg_res = tcg_temp_new_i32(); 5011 5012 read_vec_element_i32(s, tcg_op1, a->rn, 3, MO_32); 5013 read_vec_element_i32(s, tcg_op2, a->rm, 3, MO_32); 5014 read_vec_element_i32(s, tcg_op3, a->ra, 3, MO_32); 5015 5016 tcg_gen_rotri_i32(tcg_res, tcg_op1, 20); 5017 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2); 5018 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3); 5019 tcg_gen_rotri_i32(tcg_res, tcg_res, 25); 5020 5021 /* Clear the whole register first, then store bits [127:96]. */ 5022 clear_vec(s, a->rd); 5023 write_vec_element_i32(s, tcg_res, a->rd, 3, MO_32); 5024 } 5025 return true; 5026 } 5027 5028 static bool do_crypto3i(DisasContext *s, arg_crypto3i *a, gen_helper_gvec_3 *fn) 5029 { 5030 if (fp_access_check(s)) { 5031 gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->imm, fn); 5032 } 5033 return true; 5034 } 5035 TRANS_FEAT(SM3TT1A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1a) 5036 TRANS_FEAT(SM3TT1B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1b) 5037 TRANS_FEAT(SM3TT2A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2a) 5038 TRANS_FEAT(SM3TT2B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2b) 5039 5040 static bool trans_XAR(DisasContext *s, arg_XAR *a) 5041 { 5042 if (!dc_isar_feature(aa64_sha3, s)) { 5043 return false; 5044 } 5045 if (fp_access_check(s)) { 5046 gen_gvec_xar(MO_64, vec_full_reg_offset(s, a->rd), 5047 vec_full_reg_offset(s, a->rn), 5048 vec_full_reg_offset(s, a->rm), a->imm, 16, 5049 vec_full_reg_size(s)); 5050 } 5051 return true; 5052 } 5053 5054 /* 5055 * Advanced SIMD copy 5056 */ 5057 5058 static bool decode_esz_idx(int imm, MemOp *pesz, unsigned *pidx) 5059 { 5060 unsigned esz = ctz32(imm); 5061 if (esz <= MO_64) { 5062 *pesz = esz; 5063 *pidx = imm >> (esz + 1); 5064 return true; 5065 } 5066 return false; 5067 } 5068 5069 static bool trans_DUP_element_s(DisasContext *s, arg_DUP_element_s *a) 5070 { 5071 MemOp esz; 5072 unsigned idx; 5073 5074 if (!decode_esz_idx(a->imm, &esz, &idx)) { 5075 return false; 5076 } 5077 if (fp_access_check(s)) { 5078 /* 5079 * This instruction just extracts the specified element and 5080 * zero-extends it into the bottom of the destination register. 5081 */ 5082 TCGv_i64 tmp = tcg_temp_new_i64(); 5083 read_vec_element(s, tmp, a->rn, idx, esz); 5084 write_fp_dreg(s, a->rd, tmp); 5085 } 5086 return true; 5087 } 5088 5089 static bool trans_DUP_element_v(DisasContext *s, arg_DUP_element_v *a) 5090 { 5091 MemOp esz; 5092 unsigned idx; 5093 5094 if (!decode_esz_idx(a->imm, &esz, &idx)) { 5095 return false; 5096 } 5097 if (esz == MO_64 && !a->q) { 5098 return false; 5099 } 5100 if (fp_access_check(s)) { 5101 tcg_gen_gvec_dup_mem(esz, vec_full_reg_offset(s, a->rd), 5102 vec_reg_offset(s, a->rn, idx, esz), 5103 a->q ? 16 : 8, vec_full_reg_size(s)); 5104 } 5105 return true; 5106 } 5107 5108 static bool trans_DUP_general(DisasContext *s, arg_DUP_general *a) 5109 { 5110 MemOp esz; 5111 unsigned idx; 5112 5113 if (!decode_esz_idx(a->imm, &esz, &idx)) { 5114 return false; 5115 } 5116 if (esz == MO_64 && !a->q) { 5117 return false; 5118 } 5119 if (fp_access_check(s)) { 5120 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), 5121 a->q ? 16 : 8, vec_full_reg_size(s), 5122 cpu_reg(s, a->rn)); 5123 } 5124 return true; 5125 } 5126 5127 static bool do_smov_umov(DisasContext *s, arg_SMOV *a, MemOp is_signed) 5128 { 5129 MemOp esz; 5130 unsigned idx; 5131 5132 if (!decode_esz_idx(a->imm, &esz, &idx)) { 5133 return false; 5134 } 5135 if (is_signed) { 5136 if (esz == MO_64 || (esz == MO_32 && !a->q)) { 5137 return false; 5138 } 5139 } else { 5140 if (esz == MO_64 ? !a->q : a->q) { 5141 return false; 5142 } 5143 } 5144 if (fp_access_check(s)) { 5145 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 5146 read_vec_element(s, tcg_rd, a->rn, idx, esz | is_signed); 5147 if (is_signed && !a->q) { 5148 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 5149 } 5150 } 5151 return true; 5152 } 5153 5154 TRANS(SMOV, do_smov_umov, a, MO_SIGN) 5155 TRANS(UMOV, do_smov_umov, a, 0) 5156 5157 static bool trans_INS_general(DisasContext *s, arg_INS_general *a) 5158 { 5159 MemOp esz; 5160 unsigned idx; 5161 5162 if (!decode_esz_idx(a->imm, &esz, &idx)) { 5163 return false; 5164 } 5165 if (fp_access_check(s)) { 5166 write_vec_element(s, cpu_reg(s, a->rn), a->rd, idx, esz); 5167 clear_vec_high(s, true, a->rd); 5168 } 5169 return true; 5170 } 5171 5172 static bool trans_INS_element(DisasContext *s, arg_INS_element *a) 5173 { 5174 MemOp esz; 5175 unsigned didx, sidx; 5176 5177 if (!decode_esz_idx(a->di, &esz, &didx)) { 5178 return false; 5179 } 5180 sidx = a->si >> esz; 5181 if (fp_access_check(s)) { 5182 TCGv_i64 tmp = tcg_temp_new_i64(); 5183 5184 read_vec_element(s, tmp, a->rn, sidx, esz); 5185 write_vec_element(s, tmp, a->rd, didx, esz); 5186 5187 /* INS is considered a 128-bit write for SVE. */ 5188 clear_vec_high(s, true, a->rd); 5189 } 5190 return true; 5191 } 5192 5193 /* 5194 * Advanced SIMD three same 5195 */ 5196 5197 typedef struct FPScalar { 5198 void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); 5199 void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); 5200 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr); 5201 } FPScalar; 5202 5203 static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a, 5204 const FPScalar *f, int mergereg, 5205 ARMFPStatusFlavour fpsttype) 5206 { 5207 switch (a->esz) { 5208 case MO_64: 5209 if (fp_access_check(s)) { 5210 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 5211 TCGv_i64 t1 = read_fp_dreg(s, a->rm); 5212 f->gen_d(t0, t0, t1, fpstatus_ptr(fpsttype)); 5213 write_fp_dreg_merging(s, a->rd, mergereg, t0); 5214 } 5215 break; 5216 case MO_32: 5217 if (fp_access_check(s)) { 5218 TCGv_i32 t0 = read_fp_sreg(s, a->rn); 5219 TCGv_i32 t1 = read_fp_sreg(s, a->rm); 5220 f->gen_s(t0, t0, t1, fpstatus_ptr(fpsttype)); 5221 write_fp_sreg_merging(s, a->rd, mergereg, t0); 5222 } 5223 break; 5224 case MO_16: 5225 if (!dc_isar_feature(aa64_fp16, s)) { 5226 return false; 5227 } 5228 if (fp_access_check(s)) { 5229 TCGv_i32 t0 = read_fp_hreg(s, a->rn); 5230 TCGv_i32 t1 = read_fp_hreg(s, a->rm); 5231 f->gen_h(t0, t0, t1, fpstatus_ptr(fpsttype)); 5232 write_fp_hreg_merging(s, a->rd, mergereg, t0); 5233 } 5234 break; 5235 default: 5236 return false; 5237 } 5238 return true; 5239 } 5240 5241 static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f, 5242 int mergereg) 5243 { 5244 return do_fp3_scalar_with_fpsttype(s, a, f, mergereg, 5245 a->esz == MO_16 ? 5246 FPST_A64_F16 : FPST_A64); 5247 } 5248 5249 static bool do_fp3_scalar_ah_2fn(DisasContext *s, arg_rrr_e *a, 5250 const FPScalar *fnormal, const FPScalar *fah, 5251 int mergereg) 5252 { 5253 return do_fp3_scalar_with_fpsttype(s, a, s->fpcr_ah ? fah : fnormal, 5254 mergereg, select_ah_fpst(s, a->esz)); 5255 } 5256 5257 /* Some insns need to call different helpers when FPCR.AH == 1 */ 5258 static bool do_fp3_scalar_2fn(DisasContext *s, arg_rrr_e *a, 5259 const FPScalar *fnormal, 5260 const FPScalar *fah, 5261 int mergereg) 5262 { 5263 return do_fp3_scalar(s, a, s->fpcr_ah ? fah : fnormal, mergereg); 5264 } 5265 5266 static const FPScalar f_scalar_fadd = { 5267 gen_helper_vfp_addh, 5268 gen_helper_vfp_adds, 5269 gen_helper_vfp_addd, 5270 }; 5271 TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd, a->rn) 5272 5273 static const FPScalar f_scalar_fsub = { 5274 gen_helper_vfp_subh, 5275 gen_helper_vfp_subs, 5276 gen_helper_vfp_subd, 5277 }; 5278 TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub, a->rn) 5279 5280 static const FPScalar f_scalar_fdiv = { 5281 gen_helper_vfp_divh, 5282 gen_helper_vfp_divs, 5283 gen_helper_vfp_divd, 5284 }; 5285 TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv, a->rn) 5286 5287 static const FPScalar f_scalar_fmul = { 5288 gen_helper_vfp_mulh, 5289 gen_helper_vfp_muls, 5290 gen_helper_vfp_muld, 5291 }; 5292 TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul, a->rn) 5293 5294 static const FPScalar f_scalar_fmax = { 5295 gen_helper_vfp_maxh, 5296 gen_helper_vfp_maxs, 5297 gen_helper_vfp_maxd, 5298 }; 5299 static const FPScalar f_scalar_fmax_ah = { 5300 gen_helper_vfp_ah_maxh, 5301 gen_helper_vfp_ah_maxs, 5302 gen_helper_vfp_ah_maxd, 5303 }; 5304 TRANS(FMAX_s, do_fp3_scalar_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah, a->rn) 5305 5306 static const FPScalar f_scalar_fmin = { 5307 gen_helper_vfp_minh, 5308 gen_helper_vfp_mins, 5309 gen_helper_vfp_mind, 5310 }; 5311 static const FPScalar f_scalar_fmin_ah = { 5312 gen_helper_vfp_ah_minh, 5313 gen_helper_vfp_ah_mins, 5314 gen_helper_vfp_ah_mind, 5315 }; 5316 TRANS(FMIN_s, do_fp3_scalar_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah, a->rn) 5317 5318 static const FPScalar f_scalar_fmaxnm = { 5319 gen_helper_vfp_maxnumh, 5320 gen_helper_vfp_maxnums, 5321 gen_helper_vfp_maxnumd, 5322 }; 5323 TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm, a->rn) 5324 5325 static const FPScalar f_scalar_fminnm = { 5326 gen_helper_vfp_minnumh, 5327 gen_helper_vfp_minnums, 5328 gen_helper_vfp_minnumd, 5329 }; 5330 TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm, a->rn) 5331 5332 static const FPScalar f_scalar_fmulx = { 5333 gen_helper_advsimd_mulxh, 5334 gen_helper_vfp_mulxs, 5335 gen_helper_vfp_mulxd, 5336 }; 5337 TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx, a->rn) 5338 5339 static void gen_fnmul_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5340 { 5341 gen_helper_vfp_mulh(d, n, m, s); 5342 gen_vfp_negh(d, d); 5343 } 5344 5345 static void gen_fnmul_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5346 { 5347 gen_helper_vfp_muls(d, n, m, s); 5348 gen_vfp_negs(d, d); 5349 } 5350 5351 static void gen_fnmul_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) 5352 { 5353 gen_helper_vfp_muld(d, n, m, s); 5354 gen_vfp_negd(d, d); 5355 } 5356 5357 static void gen_fnmul_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5358 { 5359 gen_helper_vfp_mulh(d, n, m, s); 5360 gen_vfp_ah_negh(d, d); 5361 } 5362 5363 static void gen_fnmul_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5364 { 5365 gen_helper_vfp_muls(d, n, m, s); 5366 gen_vfp_ah_negs(d, d); 5367 } 5368 5369 static void gen_fnmul_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) 5370 { 5371 gen_helper_vfp_muld(d, n, m, s); 5372 gen_vfp_ah_negd(d, d); 5373 } 5374 5375 static const FPScalar f_scalar_fnmul = { 5376 gen_fnmul_h, 5377 gen_fnmul_s, 5378 gen_fnmul_d, 5379 }; 5380 static const FPScalar f_scalar_ah_fnmul = { 5381 gen_fnmul_ah_h, 5382 gen_fnmul_ah_s, 5383 gen_fnmul_ah_d, 5384 }; 5385 TRANS(FNMUL_s, do_fp3_scalar_2fn, a, &f_scalar_fnmul, &f_scalar_ah_fnmul, a->rn) 5386 5387 static const FPScalar f_scalar_fcmeq = { 5388 gen_helper_advsimd_ceq_f16, 5389 gen_helper_neon_ceq_f32, 5390 gen_helper_neon_ceq_f64, 5391 }; 5392 TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq, a->rm) 5393 5394 static const FPScalar f_scalar_fcmge = { 5395 gen_helper_advsimd_cge_f16, 5396 gen_helper_neon_cge_f32, 5397 gen_helper_neon_cge_f64, 5398 }; 5399 TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge, a->rm) 5400 5401 static const FPScalar f_scalar_fcmgt = { 5402 gen_helper_advsimd_cgt_f16, 5403 gen_helper_neon_cgt_f32, 5404 gen_helper_neon_cgt_f64, 5405 }; 5406 TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt, a->rm) 5407 5408 static const FPScalar f_scalar_facge = { 5409 gen_helper_advsimd_acge_f16, 5410 gen_helper_neon_acge_f32, 5411 gen_helper_neon_acge_f64, 5412 }; 5413 TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge, a->rm) 5414 5415 static const FPScalar f_scalar_facgt = { 5416 gen_helper_advsimd_acgt_f16, 5417 gen_helper_neon_acgt_f32, 5418 gen_helper_neon_acgt_f64, 5419 }; 5420 TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt, a->rm) 5421 5422 static void gen_fabd_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5423 { 5424 gen_helper_vfp_subh(d, n, m, s); 5425 gen_vfp_absh(d, d); 5426 } 5427 5428 static void gen_fabd_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5429 { 5430 gen_helper_vfp_subs(d, n, m, s); 5431 gen_vfp_abss(d, d); 5432 } 5433 5434 static void gen_fabd_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) 5435 { 5436 gen_helper_vfp_subd(d, n, m, s); 5437 gen_vfp_absd(d, d); 5438 } 5439 5440 static void gen_fabd_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5441 { 5442 gen_helper_vfp_subh(d, n, m, s); 5443 gen_vfp_ah_absh(d, d); 5444 } 5445 5446 static void gen_fabd_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5447 { 5448 gen_helper_vfp_subs(d, n, m, s); 5449 gen_vfp_ah_abss(d, d); 5450 } 5451 5452 static void gen_fabd_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) 5453 { 5454 gen_helper_vfp_subd(d, n, m, s); 5455 gen_vfp_ah_absd(d, d); 5456 } 5457 5458 static const FPScalar f_scalar_fabd = { 5459 gen_fabd_h, 5460 gen_fabd_s, 5461 gen_fabd_d, 5462 }; 5463 static const FPScalar f_scalar_ah_fabd = { 5464 gen_fabd_ah_h, 5465 gen_fabd_ah_s, 5466 gen_fabd_ah_d, 5467 }; 5468 TRANS(FABD_s, do_fp3_scalar_2fn, a, &f_scalar_fabd, &f_scalar_ah_fabd, a->rn) 5469 5470 static const FPScalar f_scalar_frecps = { 5471 gen_helper_recpsf_f16, 5472 gen_helper_recpsf_f32, 5473 gen_helper_recpsf_f64, 5474 }; 5475 static const FPScalar f_scalar_ah_frecps = { 5476 gen_helper_recpsf_ah_f16, 5477 gen_helper_recpsf_ah_f32, 5478 gen_helper_recpsf_ah_f64, 5479 }; 5480 TRANS(FRECPS_s, do_fp3_scalar_ah_2fn, a, 5481 &f_scalar_frecps, &f_scalar_ah_frecps, a->rn) 5482 5483 static const FPScalar f_scalar_frsqrts = { 5484 gen_helper_rsqrtsf_f16, 5485 gen_helper_rsqrtsf_f32, 5486 gen_helper_rsqrtsf_f64, 5487 }; 5488 static const FPScalar f_scalar_ah_frsqrts = { 5489 gen_helper_rsqrtsf_ah_f16, 5490 gen_helper_rsqrtsf_ah_f32, 5491 gen_helper_rsqrtsf_ah_f64, 5492 }; 5493 TRANS(FRSQRTS_s, do_fp3_scalar_ah_2fn, a, 5494 &f_scalar_frsqrts, &f_scalar_ah_frsqrts, a->rn) 5495 5496 static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a, 5497 const FPScalar *f, bool swap) 5498 { 5499 switch (a->esz) { 5500 case MO_64: 5501 if (fp_access_check(s)) { 5502 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 5503 TCGv_i64 t1 = tcg_constant_i64(0); 5504 if (swap) { 5505 f->gen_d(t0, t1, t0, fpstatus_ptr(FPST_A64)); 5506 } else { 5507 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); 5508 } 5509 write_fp_dreg(s, a->rd, t0); 5510 } 5511 break; 5512 case MO_32: 5513 if (fp_access_check(s)) { 5514 TCGv_i32 t0 = read_fp_sreg(s, a->rn); 5515 TCGv_i32 t1 = tcg_constant_i32(0); 5516 if (swap) { 5517 f->gen_s(t0, t1, t0, fpstatus_ptr(FPST_A64)); 5518 } else { 5519 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); 5520 } 5521 write_fp_sreg(s, a->rd, t0); 5522 } 5523 break; 5524 case MO_16: 5525 if (!dc_isar_feature(aa64_fp16, s)) { 5526 return false; 5527 } 5528 if (fp_access_check(s)) { 5529 TCGv_i32 t0 = read_fp_hreg(s, a->rn); 5530 TCGv_i32 t1 = tcg_constant_i32(0); 5531 if (swap) { 5532 f->gen_h(t0, t1, t0, fpstatus_ptr(FPST_A64_F16)); 5533 } else { 5534 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); 5535 } 5536 write_fp_sreg(s, a->rd, t0); 5537 } 5538 break; 5539 default: 5540 return false; 5541 } 5542 return true; 5543 } 5544 5545 TRANS(FCMEQ0_s, do_fcmp0_s, a, &f_scalar_fcmeq, false) 5546 TRANS(FCMGT0_s, do_fcmp0_s, a, &f_scalar_fcmgt, false) 5547 TRANS(FCMGE0_s, do_fcmp0_s, a, &f_scalar_fcmge, false) 5548 TRANS(FCMLT0_s, do_fcmp0_s, a, &f_scalar_fcmgt, true) 5549 TRANS(FCMLE0_s, do_fcmp0_s, a, &f_scalar_fcmge, true) 5550 5551 static bool do_satacc_s(DisasContext *s, arg_rrr_e *a, 5552 MemOp sgn_n, MemOp sgn_m, 5553 void (*gen_bhs)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64, MemOp), 5554 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64)) 5555 { 5556 TCGv_i64 t0, t1, t2, qc; 5557 MemOp esz = a->esz; 5558 5559 if (!fp_access_check(s)) { 5560 return true; 5561 } 5562 5563 t0 = tcg_temp_new_i64(); 5564 t1 = tcg_temp_new_i64(); 5565 t2 = tcg_temp_new_i64(); 5566 qc = tcg_temp_new_i64(); 5567 read_vec_element(s, t1, a->rn, 0, esz | sgn_n); 5568 read_vec_element(s, t2, a->rm, 0, esz | sgn_m); 5569 tcg_gen_ld_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc)); 5570 5571 if (esz == MO_64) { 5572 gen_d(t0, qc, t1, t2); 5573 } else { 5574 gen_bhs(t0, qc, t1, t2, esz); 5575 tcg_gen_ext_i64(t0, t0, esz); 5576 } 5577 5578 write_fp_dreg(s, a->rd, t0); 5579 tcg_gen_st_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc)); 5580 return true; 5581 } 5582 5583 TRANS(SQADD_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqadd_bhs, gen_sqadd_d) 5584 TRANS(SQSUB_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqsub_bhs, gen_sqsub_d) 5585 TRANS(UQADD_s, do_satacc_s, a, 0, 0, gen_uqadd_bhs, gen_uqadd_d) 5586 TRANS(UQSUB_s, do_satacc_s, a, 0, 0, gen_uqsub_bhs, gen_uqsub_d) 5587 TRANS(SUQADD_s, do_satacc_s, a, MO_SIGN, 0, gen_suqadd_bhs, gen_suqadd_d) 5588 TRANS(USQADD_s, do_satacc_s, a, 0, MO_SIGN, gen_usqadd_bhs, gen_usqadd_d) 5589 5590 static bool do_int3_scalar_d(DisasContext *s, arg_rrr_e *a, 5591 void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64)) 5592 { 5593 if (fp_access_check(s)) { 5594 TCGv_i64 t0 = tcg_temp_new_i64(); 5595 TCGv_i64 t1 = tcg_temp_new_i64(); 5596 5597 read_vec_element(s, t0, a->rn, 0, MO_64); 5598 read_vec_element(s, t1, a->rm, 0, MO_64); 5599 fn(t0, t0, t1); 5600 write_fp_dreg(s, a->rd, t0); 5601 } 5602 return true; 5603 } 5604 5605 TRANS(SSHL_s, do_int3_scalar_d, a, gen_sshl_i64) 5606 TRANS(USHL_s, do_int3_scalar_d, a, gen_ushl_i64) 5607 TRANS(SRSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_s64) 5608 TRANS(URSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_u64) 5609 TRANS(ADD_s, do_int3_scalar_d, a, tcg_gen_add_i64) 5610 TRANS(SUB_s, do_int3_scalar_d, a, tcg_gen_sub_i64) 5611 5612 typedef struct ENVScalar2 { 5613 NeonGenTwoOpEnvFn *gen_bhs[3]; 5614 NeonGenTwo64OpEnvFn *gen_d; 5615 } ENVScalar2; 5616 5617 static bool do_env_scalar2(DisasContext *s, arg_rrr_e *a, const ENVScalar2 *f) 5618 { 5619 if (!fp_access_check(s)) { 5620 return true; 5621 } 5622 if (a->esz == MO_64) { 5623 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 5624 TCGv_i64 t1 = read_fp_dreg(s, a->rm); 5625 f->gen_d(t0, tcg_env, t0, t1); 5626 write_fp_dreg(s, a->rd, t0); 5627 } else { 5628 TCGv_i32 t0 = tcg_temp_new_i32(); 5629 TCGv_i32 t1 = tcg_temp_new_i32(); 5630 5631 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 5632 read_vec_element_i32(s, t1, a->rm, 0, a->esz); 5633 f->gen_bhs[a->esz](t0, tcg_env, t0, t1); 5634 write_fp_sreg(s, a->rd, t0); 5635 } 5636 return true; 5637 } 5638 5639 static const ENVScalar2 f_scalar_sqshl = { 5640 { gen_helper_neon_qshl_s8, 5641 gen_helper_neon_qshl_s16, 5642 gen_helper_neon_qshl_s32 }, 5643 gen_helper_neon_qshl_s64, 5644 }; 5645 TRANS(SQSHL_s, do_env_scalar2, a, &f_scalar_sqshl) 5646 5647 static const ENVScalar2 f_scalar_uqshl = { 5648 { gen_helper_neon_qshl_u8, 5649 gen_helper_neon_qshl_u16, 5650 gen_helper_neon_qshl_u32 }, 5651 gen_helper_neon_qshl_u64, 5652 }; 5653 TRANS(UQSHL_s, do_env_scalar2, a, &f_scalar_uqshl) 5654 5655 static const ENVScalar2 f_scalar_sqrshl = { 5656 { gen_helper_neon_qrshl_s8, 5657 gen_helper_neon_qrshl_s16, 5658 gen_helper_neon_qrshl_s32 }, 5659 gen_helper_neon_qrshl_s64, 5660 }; 5661 TRANS(SQRSHL_s, do_env_scalar2, a, &f_scalar_sqrshl) 5662 5663 static const ENVScalar2 f_scalar_uqrshl = { 5664 { gen_helper_neon_qrshl_u8, 5665 gen_helper_neon_qrshl_u16, 5666 gen_helper_neon_qrshl_u32 }, 5667 gen_helper_neon_qrshl_u64, 5668 }; 5669 TRANS(UQRSHL_s, do_env_scalar2, a, &f_scalar_uqrshl) 5670 5671 static bool do_env_scalar2_hs(DisasContext *s, arg_rrr_e *a, 5672 const ENVScalar2 *f) 5673 { 5674 if (a->esz == MO_16 || a->esz == MO_32) { 5675 return do_env_scalar2(s, a, f); 5676 } 5677 return false; 5678 } 5679 5680 static const ENVScalar2 f_scalar_sqdmulh = { 5681 { NULL, gen_helper_neon_qdmulh_s16, gen_helper_neon_qdmulh_s32 } 5682 }; 5683 TRANS(SQDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqdmulh) 5684 5685 static const ENVScalar2 f_scalar_sqrdmulh = { 5686 { NULL, gen_helper_neon_qrdmulh_s16, gen_helper_neon_qrdmulh_s32 } 5687 }; 5688 TRANS(SQRDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqrdmulh) 5689 5690 typedef struct ENVScalar3 { 5691 NeonGenThreeOpEnvFn *gen_hs[2]; 5692 } ENVScalar3; 5693 5694 static bool do_env_scalar3_hs(DisasContext *s, arg_rrr_e *a, 5695 const ENVScalar3 *f) 5696 { 5697 TCGv_i32 t0, t1, t2; 5698 5699 if (a->esz != MO_16 && a->esz != MO_32) { 5700 return false; 5701 } 5702 if (!fp_access_check(s)) { 5703 return true; 5704 } 5705 5706 t0 = tcg_temp_new_i32(); 5707 t1 = tcg_temp_new_i32(); 5708 t2 = tcg_temp_new_i32(); 5709 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 5710 read_vec_element_i32(s, t1, a->rm, 0, a->esz); 5711 read_vec_element_i32(s, t2, a->rd, 0, a->esz); 5712 f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2); 5713 write_fp_sreg(s, a->rd, t0); 5714 return true; 5715 } 5716 5717 static const ENVScalar3 f_scalar_sqrdmlah = { 5718 { gen_helper_neon_qrdmlah_s16, gen_helper_neon_qrdmlah_s32 } 5719 }; 5720 TRANS_FEAT(SQRDMLAH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlah) 5721 5722 static const ENVScalar3 f_scalar_sqrdmlsh = { 5723 { gen_helper_neon_qrdmlsh_s16, gen_helper_neon_qrdmlsh_s32 } 5724 }; 5725 TRANS_FEAT(SQRDMLSH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlsh) 5726 5727 static bool do_cmop_d(DisasContext *s, arg_rrr_e *a, TCGCond cond) 5728 { 5729 if (fp_access_check(s)) { 5730 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 5731 TCGv_i64 t1 = read_fp_dreg(s, a->rm); 5732 tcg_gen_negsetcond_i64(cond, t0, t0, t1); 5733 write_fp_dreg(s, a->rd, t0); 5734 } 5735 return true; 5736 } 5737 5738 TRANS(CMGT_s, do_cmop_d, a, TCG_COND_GT) 5739 TRANS(CMHI_s, do_cmop_d, a, TCG_COND_GTU) 5740 TRANS(CMGE_s, do_cmop_d, a, TCG_COND_GE) 5741 TRANS(CMHS_s, do_cmop_d, a, TCG_COND_GEU) 5742 TRANS(CMEQ_s, do_cmop_d, a, TCG_COND_EQ) 5743 TRANS(CMTST_s, do_cmop_d, a, TCG_COND_TSTNE) 5744 5745 static bool do_fp3_vector_with_fpsttype(DisasContext *s, arg_qrrr_e *a, 5746 int data, 5747 gen_helper_gvec_3_ptr * const fns[3], 5748 ARMFPStatusFlavour fpsttype) 5749 { 5750 MemOp esz = a->esz; 5751 int check = fp_access_check_vector_hsd(s, a->q, esz); 5752 5753 if (check <= 0) { 5754 return check == 0; 5755 } 5756 5757 gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, fpsttype, 5758 data, fns[esz - 1]); 5759 return true; 5760 } 5761 5762 static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data, 5763 gen_helper_gvec_3_ptr * const fns[3]) 5764 { 5765 return do_fp3_vector_with_fpsttype(s, a, data, fns, 5766 a->esz == MO_16 ? 5767 FPST_A64_F16 : FPST_A64); 5768 } 5769 5770 static bool do_fp3_vector_2fn(DisasContext *s, arg_qrrr_e *a, int data, 5771 gen_helper_gvec_3_ptr * const fnormal[3], 5772 gen_helper_gvec_3_ptr * const fah[3]) 5773 { 5774 return do_fp3_vector(s, a, data, s->fpcr_ah ? fah : fnormal); 5775 } 5776 5777 static bool do_fp3_vector_ah_2fn(DisasContext *s, arg_qrrr_e *a, int data, 5778 gen_helper_gvec_3_ptr * const fnormal[3], 5779 gen_helper_gvec_3_ptr * const fah[3]) 5780 { 5781 return do_fp3_vector_with_fpsttype(s, a, data, s->fpcr_ah ? fah : fnormal, 5782 select_ah_fpst(s, a->esz)); 5783 } 5784 5785 static gen_helper_gvec_3_ptr * const f_vector_fadd[3] = { 5786 gen_helper_gvec_fadd_h, 5787 gen_helper_gvec_fadd_s, 5788 gen_helper_gvec_fadd_d, 5789 }; 5790 TRANS(FADD_v, do_fp3_vector, a, 0, f_vector_fadd) 5791 5792 static gen_helper_gvec_3_ptr * const f_vector_fsub[3] = { 5793 gen_helper_gvec_fsub_h, 5794 gen_helper_gvec_fsub_s, 5795 gen_helper_gvec_fsub_d, 5796 }; 5797 TRANS(FSUB_v, do_fp3_vector, a, 0, f_vector_fsub) 5798 5799 static gen_helper_gvec_3_ptr * const f_vector_fdiv[3] = { 5800 gen_helper_gvec_fdiv_h, 5801 gen_helper_gvec_fdiv_s, 5802 gen_helper_gvec_fdiv_d, 5803 }; 5804 TRANS(FDIV_v, do_fp3_vector, a, 0, f_vector_fdiv) 5805 5806 static gen_helper_gvec_3_ptr * const f_vector_fmul[3] = { 5807 gen_helper_gvec_fmul_h, 5808 gen_helper_gvec_fmul_s, 5809 gen_helper_gvec_fmul_d, 5810 }; 5811 TRANS(FMUL_v, do_fp3_vector, a, 0, f_vector_fmul) 5812 5813 static gen_helper_gvec_3_ptr * const f_vector_fmax[3] = { 5814 gen_helper_gvec_fmax_h, 5815 gen_helper_gvec_fmax_s, 5816 gen_helper_gvec_fmax_d, 5817 }; 5818 static gen_helper_gvec_3_ptr * const f_vector_fmax_ah[3] = { 5819 gen_helper_gvec_ah_fmax_h, 5820 gen_helper_gvec_ah_fmax_s, 5821 gen_helper_gvec_ah_fmax_d, 5822 }; 5823 TRANS(FMAX_v, do_fp3_vector_2fn, a, 0, f_vector_fmax, f_vector_fmax_ah) 5824 5825 static gen_helper_gvec_3_ptr * const f_vector_fmin[3] = { 5826 gen_helper_gvec_fmin_h, 5827 gen_helper_gvec_fmin_s, 5828 gen_helper_gvec_fmin_d, 5829 }; 5830 static gen_helper_gvec_3_ptr * const f_vector_fmin_ah[3] = { 5831 gen_helper_gvec_ah_fmin_h, 5832 gen_helper_gvec_ah_fmin_s, 5833 gen_helper_gvec_ah_fmin_d, 5834 }; 5835 TRANS(FMIN_v, do_fp3_vector_2fn, a, 0, f_vector_fmin, f_vector_fmin_ah) 5836 5837 static gen_helper_gvec_3_ptr * const f_vector_fmaxnm[3] = { 5838 gen_helper_gvec_fmaxnum_h, 5839 gen_helper_gvec_fmaxnum_s, 5840 gen_helper_gvec_fmaxnum_d, 5841 }; 5842 TRANS(FMAXNM_v, do_fp3_vector, a, 0, f_vector_fmaxnm) 5843 5844 static gen_helper_gvec_3_ptr * const f_vector_fminnm[3] = { 5845 gen_helper_gvec_fminnum_h, 5846 gen_helper_gvec_fminnum_s, 5847 gen_helper_gvec_fminnum_d, 5848 }; 5849 TRANS(FMINNM_v, do_fp3_vector, a, 0, f_vector_fminnm) 5850 5851 static gen_helper_gvec_3_ptr * const f_vector_fmulx[3] = { 5852 gen_helper_gvec_fmulx_h, 5853 gen_helper_gvec_fmulx_s, 5854 gen_helper_gvec_fmulx_d, 5855 }; 5856 TRANS(FMULX_v, do_fp3_vector, a, 0, f_vector_fmulx) 5857 5858 static gen_helper_gvec_3_ptr * const f_vector_fmla[3] = { 5859 gen_helper_gvec_vfma_h, 5860 gen_helper_gvec_vfma_s, 5861 gen_helper_gvec_vfma_d, 5862 }; 5863 TRANS(FMLA_v, do_fp3_vector, a, 0, f_vector_fmla) 5864 5865 static gen_helper_gvec_3_ptr * const f_vector_fmls[3] = { 5866 gen_helper_gvec_vfms_h, 5867 gen_helper_gvec_vfms_s, 5868 gen_helper_gvec_vfms_d, 5869 }; 5870 static gen_helper_gvec_3_ptr * const f_vector_fmls_ah[3] = { 5871 gen_helper_gvec_ah_vfms_h, 5872 gen_helper_gvec_ah_vfms_s, 5873 gen_helper_gvec_ah_vfms_d, 5874 }; 5875 TRANS(FMLS_v, do_fp3_vector_2fn, a, 0, f_vector_fmls, f_vector_fmls_ah) 5876 5877 static gen_helper_gvec_3_ptr * const f_vector_fcmeq[3] = { 5878 gen_helper_gvec_fceq_h, 5879 gen_helper_gvec_fceq_s, 5880 gen_helper_gvec_fceq_d, 5881 }; 5882 TRANS(FCMEQ_v, do_fp3_vector, a, 0, f_vector_fcmeq) 5883 5884 static gen_helper_gvec_3_ptr * const f_vector_fcmge[3] = { 5885 gen_helper_gvec_fcge_h, 5886 gen_helper_gvec_fcge_s, 5887 gen_helper_gvec_fcge_d, 5888 }; 5889 TRANS(FCMGE_v, do_fp3_vector, a, 0, f_vector_fcmge) 5890 5891 static gen_helper_gvec_3_ptr * const f_vector_fcmgt[3] = { 5892 gen_helper_gvec_fcgt_h, 5893 gen_helper_gvec_fcgt_s, 5894 gen_helper_gvec_fcgt_d, 5895 }; 5896 TRANS(FCMGT_v, do_fp3_vector, a, 0, f_vector_fcmgt) 5897 5898 static gen_helper_gvec_3_ptr * const f_vector_facge[3] = { 5899 gen_helper_gvec_facge_h, 5900 gen_helper_gvec_facge_s, 5901 gen_helper_gvec_facge_d, 5902 }; 5903 TRANS(FACGE_v, do_fp3_vector, a, 0, f_vector_facge) 5904 5905 static gen_helper_gvec_3_ptr * const f_vector_facgt[3] = { 5906 gen_helper_gvec_facgt_h, 5907 gen_helper_gvec_facgt_s, 5908 gen_helper_gvec_facgt_d, 5909 }; 5910 TRANS(FACGT_v, do_fp3_vector, a, 0, f_vector_facgt) 5911 5912 static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = { 5913 gen_helper_gvec_fabd_h, 5914 gen_helper_gvec_fabd_s, 5915 gen_helper_gvec_fabd_d, 5916 }; 5917 static gen_helper_gvec_3_ptr * const f_vector_ah_fabd[3] = { 5918 gen_helper_gvec_ah_fabd_h, 5919 gen_helper_gvec_ah_fabd_s, 5920 gen_helper_gvec_ah_fabd_d, 5921 }; 5922 TRANS(FABD_v, do_fp3_vector_2fn, a, 0, f_vector_fabd, f_vector_ah_fabd) 5923 5924 static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = { 5925 gen_helper_gvec_recps_h, 5926 gen_helper_gvec_recps_s, 5927 gen_helper_gvec_recps_d, 5928 }; 5929 static gen_helper_gvec_3_ptr * const f_vector_ah_frecps[3] = { 5930 gen_helper_gvec_ah_recps_h, 5931 gen_helper_gvec_ah_recps_s, 5932 gen_helper_gvec_ah_recps_d, 5933 }; 5934 TRANS(FRECPS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frecps, f_vector_ah_frecps) 5935 5936 static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = { 5937 gen_helper_gvec_rsqrts_h, 5938 gen_helper_gvec_rsqrts_s, 5939 gen_helper_gvec_rsqrts_d, 5940 }; 5941 static gen_helper_gvec_3_ptr * const f_vector_ah_frsqrts[3] = { 5942 gen_helper_gvec_ah_rsqrts_h, 5943 gen_helper_gvec_ah_rsqrts_s, 5944 gen_helper_gvec_ah_rsqrts_d, 5945 }; 5946 TRANS(FRSQRTS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frsqrts, f_vector_ah_frsqrts) 5947 5948 static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = { 5949 gen_helper_gvec_faddp_h, 5950 gen_helper_gvec_faddp_s, 5951 gen_helper_gvec_faddp_d, 5952 }; 5953 TRANS(FADDP_v, do_fp3_vector, a, 0, f_vector_faddp) 5954 5955 static gen_helper_gvec_3_ptr * const f_vector_fmaxp[3] = { 5956 gen_helper_gvec_fmaxp_h, 5957 gen_helper_gvec_fmaxp_s, 5958 gen_helper_gvec_fmaxp_d, 5959 }; 5960 static gen_helper_gvec_3_ptr * const f_vector_ah_fmaxp[3] = { 5961 gen_helper_gvec_ah_fmaxp_h, 5962 gen_helper_gvec_ah_fmaxp_s, 5963 gen_helper_gvec_ah_fmaxp_d, 5964 }; 5965 TRANS(FMAXP_v, do_fp3_vector_2fn, a, 0, f_vector_fmaxp, f_vector_ah_fmaxp) 5966 5967 static gen_helper_gvec_3_ptr * const f_vector_fminp[3] = { 5968 gen_helper_gvec_fminp_h, 5969 gen_helper_gvec_fminp_s, 5970 gen_helper_gvec_fminp_d, 5971 }; 5972 static gen_helper_gvec_3_ptr * const f_vector_ah_fminp[3] = { 5973 gen_helper_gvec_ah_fminp_h, 5974 gen_helper_gvec_ah_fminp_s, 5975 gen_helper_gvec_ah_fminp_d, 5976 }; 5977 TRANS(FMINP_v, do_fp3_vector_2fn, a, 0, f_vector_fminp, f_vector_ah_fminp) 5978 5979 static gen_helper_gvec_3_ptr * const f_vector_fmaxnmp[3] = { 5980 gen_helper_gvec_fmaxnump_h, 5981 gen_helper_gvec_fmaxnump_s, 5982 gen_helper_gvec_fmaxnump_d, 5983 }; 5984 TRANS(FMAXNMP_v, do_fp3_vector, a, 0, f_vector_fmaxnmp) 5985 5986 static gen_helper_gvec_3_ptr * const f_vector_fminnmp[3] = { 5987 gen_helper_gvec_fminnump_h, 5988 gen_helper_gvec_fminnump_s, 5989 gen_helper_gvec_fminnump_d, 5990 }; 5991 TRANS(FMINNMP_v, do_fp3_vector, a, 0, f_vector_fminnmp) 5992 5993 static bool do_fmlal(DisasContext *s, arg_qrrr_e *a, bool is_s, bool is_2) 5994 { 5995 if (fp_access_check(s)) { 5996 int data = (is_2 << 1) | is_s; 5997 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), 5998 vec_full_reg_offset(s, a->rn), 5999 vec_full_reg_offset(s, a->rm), tcg_env, 6000 a->q ? 16 : 8, vec_full_reg_size(s), 6001 data, gen_helper_gvec_fmlal_a64); 6002 } 6003 return true; 6004 } 6005 6006 TRANS_FEAT(FMLAL_v, aa64_fhm, do_fmlal, a, false, false) 6007 TRANS_FEAT(FMLSL_v, aa64_fhm, do_fmlal, a, true, false) 6008 TRANS_FEAT(FMLAL2_v, aa64_fhm, do_fmlal, a, false, true) 6009 TRANS_FEAT(FMLSL2_v, aa64_fhm, do_fmlal, a, true, true) 6010 6011 TRANS(ADDP_v, do_gvec_fn3, a, gen_gvec_addp) 6012 TRANS(SMAXP_v, do_gvec_fn3_no64, a, gen_gvec_smaxp) 6013 TRANS(SMINP_v, do_gvec_fn3_no64, a, gen_gvec_sminp) 6014 TRANS(UMAXP_v, do_gvec_fn3_no64, a, gen_gvec_umaxp) 6015 TRANS(UMINP_v, do_gvec_fn3_no64, a, gen_gvec_uminp) 6016 6017 TRANS(AND_v, do_gvec_fn3, a, tcg_gen_gvec_and) 6018 TRANS(BIC_v, do_gvec_fn3, a, tcg_gen_gvec_andc) 6019 TRANS(ORR_v, do_gvec_fn3, a, tcg_gen_gvec_or) 6020 TRANS(ORN_v, do_gvec_fn3, a, tcg_gen_gvec_orc) 6021 TRANS(EOR_v, do_gvec_fn3, a, tcg_gen_gvec_xor) 6022 6023 static bool do_bitsel(DisasContext *s, bool is_q, int d, int a, int b, int c) 6024 { 6025 if (fp_access_check(s)) { 6026 gen_gvec_fn4(s, is_q, d, a, b, c, tcg_gen_gvec_bitsel, 0); 6027 } 6028 return true; 6029 } 6030 6031 TRANS(BSL_v, do_bitsel, a->q, a->rd, a->rd, a->rn, a->rm) 6032 TRANS(BIT_v, do_bitsel, a->q, a->rd, a->rm, a->rn, a->rd) 6033 TRANS(BIF_v, do_bitsel, a->q, a->rd, a->rm, a->rd, a->rn) 6034 6035 TRANS(SQADD_v, do_gvec_fn3, a, gen_gvec_sqadd_qc) 6036 TRANS(UQADD_v, do_gvec_fn3, a, gen_gvec_uqadd_qc) 6037 TRANS(SQSUB_v, do_gvec_fn3, a, gen_gvec_sqsub_qc) 6038 TRANS(UQSUB_v, do_gvec_fn3, a, gen_gvec_uqsub_qc) 6039 TRANS(SUQADD_v, do_gvec_fn3, a, gen_gvec_suqadd_qc) 6040 TRANS(USQADD_v, do_gvec_fn3, a, gen_gvec_usqadd_qc) 6041 6042 TRANS(SSHL_v, do_gvec_fn3, a, gen_gvec_sshl) 6043 TRANS(USHL_v, do_gvec_fn3, a, gen_gvec_ushl) 6044 TRANS(SRSHL_v, do_gvec_fn3, a, gen_gvec_srshl) 6045 TRANS(URSHL_v, do_gvec_fn3, a, gen_gvec_urshl) 6046 TRANS(SQSHL_v, do_gvec_fn3, a, gen_neon_sqshl) 6047 TRANS(UQSHL_v, do_gvec_fn3, a, gen_neon_uqshl) 6048 TRANS(SQRSHL_v, do_gvec_fn3, a, gen_neon_sqrshl) 6049 TRANS(UQRSHL_v, do_gvec_fn3, a, gen_neon_uqrshl) 6050 6051 TRANS(ADD_v, do_gvec_fn3, a, tcg_gen_gvec_add) 6052 TRANS(SUB_v, do_gvec_fn3, a, tcg_gen_gvec_sub) 6053 TRANS(SHADD_v, do_gvec_fn3_no64, a, gen_gvec_shadd) 6054 TRANS(UHADD_v, do_gvec_fn3_no64, a, gen_gvec_uhadd) 6055 TRANS(SHSUB_v, do_gvec_fn3_no64, a, gen_gvec_shsub) 6056 TRANS(UHSUB_v, do_gvec_fn3_no64, a, gen_gvec_uhsub) 6057 TRANS(SRHADD_v, do_gvec_fn3_no64, a, gen_gvec_srhadd) 6058 TRANS(URHADD_v, do_gvec_fn3_no64, a, gen_gvec_urhadd) 6059 TRANS(SMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smax) 6060 TRANS(UMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umax) 6061 TRANS(SMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smin) 6062 TRANS(UMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umin) 6063 TRANS(SABA_v, do_gvec_fn3_no64, a, gen_gvec_saba) 6064 TRANS(UABA_v, do_gvec_fn3_no64, a, gen_gvec_uaba) 6065 TRANS(SABD_v, do_gvec_fn3_no64, a, gen_gvec_sabd) 6066 TRANS(UABD_v, do_gvec_fn3_no64, a, gen_gvec_uabd) 6067 TRANS(MUL_v, do_gvec_fn3_no64, a, tcg_gen_gvec_mul) 6068 TRANS(PMUL_v, do_gvec_op3_ool, a, 0, gen_helper_gvec_pmul_b) 6069 TRANS(MLA_v, do_gvec_fn3_no64, a, gen_gvec_mla) 6070 TRANS(MLS_v, do_gvec_fn3_no64, a, gen_gvec_mls) 6071 6072 static bool do_cmop_v(DisasContext *s, arg_qrrr_e *a, TCGCond cond) 6073 { 6074 if (a->esz == MO_64 && !a->q) { 6075 return false; 6076 } 6077 if (fp_access_check(s)) { 6078 tcg_gen_gvec_cmp(cond, a->esz, 6079 vec_full_reg_offset(s, a->rd), 6080 vec_full_reg_offset(s, a->rn), 6081 vec_full_reg_offset(s, a->rm), 6082 a->q ? 16 : 8, vec_full_reg_size(s)); 6083 } 6084 return true; 6085 } 6086 6087 TRANS(CMGT_v, do_cmop_v, a, TCG_COND_GT) 6088 TRANS(CMHI_v, do_cmop_v, a, TCG_COND_GTU) 6089 TRANS(CMGE_v, do_cmop_v, a, TCG_COND_GE) 6090 TRANS(CMHS_v, do_cmop_v, a, TCG_COND_GEU) 6091 TRANS(CMEQ_v, do_cmop_v, a, TCG_COND_EQ) 6092 TRANS(CMTST_v, do_gvec_fn3, a, gen_gvec_cmtst) 6093 6094 TRANS(SQDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqdmulh_qc) 6095 TRANS(SQRDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmulh_qc) 6096 TRANS_FEAT(SQRDMLAH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlah_qc) 6097 TRANS_FEAT(SQRDMLSH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlsh_qc) 6098 6099 static bool do_dot_vector(DisasContext *s, arg_qrrr_e *a, 6100 gen_helper_gvec_4 *fn) 6101 { 6102 if (fp_access_check(s)) { 6103 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn); 6104 } 6105 return true; 6106 } 6107 6108 static bool do_dot_vector_env(DisasContext *s, arg_qrrr_e *a, 6109 gen_helper_gvec_4_ptr *fn) 6110 { 6111 if (fp_access_check(s)) { 6112 gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn); 6113 } 6114 return true; 6115 } 6116 6117 TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_sdot_4b) 6118 TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_4b) 6119 TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_4b) 6120 TRANS_FEAT(BFDOT_v, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfdot) 6121 TRANS_FEAT(BFMMLA, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfmmla) 6122 TRANS_FEAT(SMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_smmla_b) 6123 TRANS_FEAT(UMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_ummla_b) 6124 TRANS_FEAT(USMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usmmla_b) 6125 6126 static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a) 6127 { 6128 if (!dc_isar_feature(aa64_bf16, s)) { 6129 return false; 6130 } 6131 if (fp_access_check(s)) { 6132 /* Q bit selects BFMLALB vs BFMLALT. */ 6133 gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, 6134 s->fpcr_ah ? FPST_AH : FPST_A64, a->q, 6135 gen_helper_gvec_bfmlal); 6136 } 6137 return true; 6138 } 6139 6140 static gen_helper_gvec_3_ptr * const f_vector_fcadd[3] = { 6141 gen_helper_gvec_fcaddh, 6142 gen_helper_gvec_fcadds, 6143 gen_helper_gvec_fcaddd, 6144 }; 6145 /* 6146 * Encode FPCR.AH into the data so the helper knows whether the 6147 * negations it does should avoid flipping the sign bit on a NaN 6148 */ 6149 TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0 | (s->fpcr_ah << 1), 6150 f_vector_fcadd) 6151 TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1 | (s->fpcr_ah << 1), 6152 f_vector_fcadd) 6153 6154 static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a) 6155 { 6156 static gen_helper_gvec_4_ptr * const fn[] = { 6157 [MO_16] = gen_helper_gvec_fcmlah, 6158 [MO_32] = gen_helper_gvec_fcmlas, 6159 [MO_64] = gen_helper_gvec_fcmlad, 6160 }; 6161 int check; 6162 6163 if (!dc_isar_feature(aa64_fcma, s)) { 6164 return false; 6165 } 6166 6167 check = fp_access_check_vector_hsd(s, a->q, a->esz); 6168 if (check <= 0) { 6169 return check == 0; 6170 } 6171 6172 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, 6173 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, 6174 a->rot | (s->fpcr_ah << 2), fn[a->esz]); 6175 return true; 6176 } 6177 6178 /* 6179 * Widening vector x vector/indexed. 6180 * 6181 * These read from the top or bottom half of a 128-bit vector. 6182 * After widening, optionally accumulate with a 128-bit vector. 6183 * Implement these inline, as the number of elements are limited 6184 * and the related SVE and SME operations on larger vectors use 6185 * even/odd elements instead of top/bottom half. 6186 * 6187 * If idx >= 0, operand 2 is indexed, otherwise vector. 6188 * If acc, operand 0 is loaded with rd. 6189 */ 6190 6191 /* For low half, iterating up. */ 6192 static bool do_3op_widening(DisasContext *s, MemOp memop, int top, 6193 int rd, int rn, int rm, int idx, 6194 NeonGenTwo64OpFn *fn, bool acc) 6195 { 6196 TCGv_i64 tcg_op0 = tcg_temp_new_i64(); 6197 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 6198 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 6199 MemOp esz = memop & MO_SIZE; 6200 int half = 8 >> esz; 6201 int top_swap, top_half; 6202 6203 /* There are no 64x64->128 bit operations. */ 6204 if (esz >= MO_64) { 6205 return false; 6206 } 6207 if (!fp_access_check(s)) { 6208 return true; 6209 } 6210 6211 if (idx >= 0) { 6212 read_vec_element(s, tcg_op2, rm, idx, memop); 6213 } 6214 6215 /* 6216 * For top half inputs, iterate forward; backward for bottom half. 6217 * This means the store to the destination will not occur until 6218 * overlapping input inputs are consumed. 6219 * Use top_swap to conditionally invert the forward iteration index. 6220 */ 6221 top_swap = top ? 0 : half - 1; 6222 top_half = top ? half : 0; 6223 6224 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) { 6225 int elt = elt_fwd ^ top_swap; 6226 6227 read_vec_element(s, tcg_op1, rn, elt + top_half, memop); 6228 if (idx < 0) { 6229 read_vec_element(s, tcg_op2, rm, elt + top_half, memop); 6230 } 6231 if (acc) { 6232 read_vec_element(s, tcg_op0, rd, elt, memop + 1); 6233 } 6234 fn(tcg_op0, tcg_op1, tcg_op2); 6235 write_vec_element(s, tcg_op0, rd, elt, esz + 1); 6236 } 6237 clear_vec_high(s, 1, rd); 6238 return true; 6239 } 6240 6241 static void gen_muladd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6242 { 6243 TCGv_i64 t = tcg_temp_new_i64(); 6244 tcg_gen_mul_i64(t, n, m); 6245 tcg_gen_add_i64(d, d, t); 6246 } 6247 6248 static void gen_mulsub_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6249 { 6250 TCGv_i64 t = tcg_temp_new_i64(); 6251 tcg_gen_mul_i64(t, n, m); 6252 tcg_gen_sub_i64(d, d, t); 6253 } 6254 6255 TRANS(SMULL_v, do_3op_widening, 6256 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6257 tcg_gen_mul_i64, false) 6258 TRANS(UMULL_v, do_3op_widening, 6259 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6260 tcg_gen_mul_i64, false) 6261 TRANS(SMLAL_v, do_3op_widening, 6262 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6263 gen_muladd_i64, true) 6264 TRANS(UMLAL_v, do_3op_widening, 6265 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6266 gen_muladd_i64, true) 6267 TRANS(SMLSL_v, do_3op_widening, 6268 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6269 gen_mulsub_i64, true) 6270 TRANS(UMLSL_v, do_3op_widening, 6271 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6272 gen_mulsub_i64, true) 6273 6274 TRANS(SMULL_vi, do_3op_widening, 6275 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6276 tcg_gen_mul_i64, false) 6277 TRANS(UMULL_vi, do_3op_widening, 6278 a->esz, a->q, a->rd, a->rn, a->rm, a->idx, 6279 tcg_gen_mul_i64, false) 6280 TRANS(SMLAL_vi, do_3op_widening, 6281 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6282 gen_muladd_i64, true) 6283 TRANS(UMLAL_vi, do_3op_widening, 6284 a->esz, a->q, a->rd, a->rn, a->rm, a->idx, 6285 gen_muladd_i64, true) 6286 TRANS(SMLSL_vi, do_3op_widening, 6287 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6288 gen_mulsub_i64, true) 6289 TRANS(UMLSL_vi, do_3op_widening, 6290 a->esz, a->q, a->rd, a->rn, a->rm, a->idx, 6291 gen_mulsub_i64, true) 6292 6293 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6294 { 6295 TCGv_i64 t1 = tcg_temp_new_i64(); 6296 TCGv_i64 t2 = tcg_temp_new_i64(); 6297 6298 tcg_gen_sub_i64(t1, n, m); 6299 tcg_gen_sub_i64(t2, m, n); 6300 tcg_gen_movcond_i64(TCG_COND_GE, d, n, m, t1, t2); 6301 } 6302 6303 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6304 { 6305 TCGv_i64 t1 = tcg_temp_new_i64(); 6306 TCGv_i64 t2 = tcg_temp_new_i64(); 6307 6308 tcg_gen_sub_i64(t1, n, m); 6309 tcg_gen_sub_i64(t2, m, n); 6310 tcg_gen_movcond_i64(TCG_COND_GEU, d, n, m, t1, t2); 6311 } 6312 6313 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6314 { 6315 TCGv_i64 t = tcg_temp_new_i64(); 6316 gen_sabd_i64(t, n, m); 6317 tcg_gen_add_i64(d, d, t); 6318 } 6319 6320 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6321 { 6322 TCGv_i64 t = tcg_temp_new_i64(); 6323 gen_uabd_i64(t, n, m); 6324 tcg_gen_add_i64(d, d, t); 6325 } 6326 6327 TRANS(SADDL_v, do_3op_widening, 6328 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6329 tcg_gen_add_i64, false) 6330 TRANS(UADDL_v, do_3op_widening, 6331 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6332 tcg_gen_add_i64, false) 6333 TRANS(SSUBL_v, do_3op_widening, 6334 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6335 tcg_gen_sub_i64, false) 6336 TRANS(USUBL_v, do_3op_widening, 6337 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6338 tcg_gen_sub_i64, false) 6339 TRANS(SABDL_v, do_3op_widening, 6340 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6341 gen_sabd_i64, false) 6342 TRANS(UABDL_v, do_3op_widening, 6343 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6344 gen_uabd_i64, false) 6345 TRANS(SABAL_v, do_3op_widening, 6346 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6347 gen_saba_i64, true) 6348 TRANS(UABAL_v, do_3op_widening, 6349 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6350 gen_uaba_i64, true) 6351 6352 static void gen_sqdmull_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6353 { 6354 tcg_gen_mul_i64(d, n, m); 6355 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, d); 6356 } 6357 6358 static void gen_sqdmull_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6359 { 6360 tcg_gen_mul_i64(d, n, m); 6361 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, d); 6362 } 6363 6364 static void gen_sqdmlal_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6365 { 6366 TCGv_i64 t = tcg_temp_new_i64(); 6367 6368 tcg_gen_mul_i64(t, n, m); 6369 gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t); 6370 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t); 6371 } 6372 6373 static void gen_sqdmlal_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6374 { 6375 TCGv_i64 t = tcg_temp_new_i64(); 6376 6377 tcg_gen_mul_i64(t, n, m); 6378 gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t); 6379 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t); 6380 } 6381 6382 static void gen_sqdmlsl_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6383 { 6384 TCGv_i64 t = tcg_temp_new_i64(); 6385 6386 tcg_gen_mul_i64(t, n, m); 6387 gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t); 6388 tcg_gen_neg_i64(t, t); 6389 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t); 6390 } 6391 6392 static void gen_sqdmlsl_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6393 { 6394 TCGv_i64 t = tcg_temp_new_i64(); 6395 6396 tcg_gen_mul_i64(t, n, m); 6397 gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t); 6398 tcg_gen_neg_i64(t, t); 6399 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t); 6400 } 6401 6402 TRANS(SQDMULL_v, do_3op_widening, 6403 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6404 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false) 6405 TRANS(SQDMLAL_v, do_3op_widening, 6406 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6407 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true) 6408 TRANS(SQDMLSL_v, do_3op_widening, 6409 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6410 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true) 6411 6412 TRANS(SQDMULL_vi, do_3op_widening, 6413 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6414 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false) 6415 TRANS(SQDMLAL_vi, do_3op_widening, 6416 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6417 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true) 6418 TRANS(SQDMLSL_vi, do_3op_widening, 6419 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6420 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true) 6421 6422 static bool do_addsub_wide(DisasContext *s, arg_qrrr_e *a, 6423 MemOp sign, bool sub) 6424 { 6425 TCGv_i64 tcg_op0, tcg_op1; 6426 MemOp esz = a->esz; 6427 int half = 8 >> esz; 6428 bool top = a->q; 6429 int top_swap = top ? 0 : half - 1; 6430 int top_half = top ? half : 0; 6431 6432 /* There are no 64x64->128 bit operations. */ 6433 if (esz >= MO_64) { 6434 return false; 6435 } 6436 if (!fp_access_check(s)) { 6437 return true; 6438 } 6439 tcg_op0 = tcg_temp_new_i64(); 6440 tcg_op1 = tcg_temp_new_i64(); 6441 6442 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) { 6443 int elt = elt_fwd ^ top_swap; 6444 6445 read_vec_element(s, tcg_op1, a->rm, elt + top_half, esz | sign); 6446 read_vec_element(s, tcg_op0, a->rn, elt, esz + 1); 6447 if (sub) { 6448 tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1); 6449 } else { 6450 tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1); 6451 } 6452 write_vec_element(s, tcg_op0, a->rd, elt, esz + 1); 6453 } 6454 clear_vec_high(s, 1, a->rd); 6455 return true; 6456 } 6457 6458 TRANS(SADDW, do_addsub_wide, a, MO_SIGN, false) 6459 TRANS(UADDW, do_addsub_wide, a, 0, false) 6460 TRANS(SSUBW, do_addsub_wide, a, MO_SIGN, true) 6461 TRANS(USUBW, do_addsub_wide, a, 0, true) 6462 6463 static bool do_addsub_highnarrow(DisasContext *s, arg_qrrr_e *a, 6464 bool sub, bool round) 6465 { 6466 TCGv_i64 tcg_op0, tcg_op1; 6467 MemOp esz = a->esz; 6468 int half = 8 >> esz; 6469 bool top = a->q; 6470 int ebits = 8 << esz; 6471 uint64_t rbit = 1ull << (ebits - 1); 6472 int top_swap, top_half; 6473 6474 /* There are no 128x128->64 bit operations. */ 6475 if (esz >= MO_64) { 6476 return false; 6477 } 6478 if (!fp_access_check(s)) { 6479 return true; 6480 } 6481 tcg_op0 = tcg_temp_new_i64(); 6482 tcg_op1 = tcg_temp_new_i64(); 6483 6484 /* 6485 * For top half inputs, iterate backward; forward for bottom half. 6486 * This means the store to the destination will not occur until 6487 * overlapping input inputs are consumed. 6488 */ 6489 top_swap = top ? half - 1 : 0; 6490 top_half = top ? half : 0; 6491 6492 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) { 6493 int elt = elt_fwd ^ top_swap; 6494 6495 read_vec_element(s, tcg_op1, a->rm, elt, esz + 1); 6496 read_vec_element(s, tcg_op0, a->rn, elt, esz + 1); 6497 if (sub) { 6498 tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1); 6499 } else { 6500 tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1); 6501 } 6502 if (round) { 6503 tcg_gen_addi_i64(tcg_op0, tcg_op0, rbit); 6504 } 6505 tcg_gen_shri_i64(tcg_op0, tcg_op0, ebits); 6506 write_vec_element(s, tcg_op0, a->rd, elt + top_half, esz); 6507 } 6508 clear_vec_high(s, top, a->rd); 6509 return true; 6510 } 6511 6512 TRANS(ADDHN, do_addsub_highnarrow, a, false, false) 6513 TRANS(SUBHN, do_addsub_highnarrow, a, true, false) 6514 TRANS(RADDHN, do_addsub_highnarrow, a, false, true) 6515 TRANS(RSUBHN, do_addsub_highnarrow, a, true, true) 6516 6517 static bool do_pmull(DisasContext *s, arg_qrrr_e *a, gen_helper_gvec_3 *fn) 6518 { 6519 if (fp_access_check(s)) { 6520 /* The Q field specifies lo/hi half input for these insns. */ 6521 gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->q, fn); 6522 } 6523 return true; 6524 } 6525 6526 TRANS(PMULL_p8, do_pmull, a, gen_helper_neon_pmull_h) 6527 TRANS_FEAT(PMULL_p64, aa64_pmull, do_pmull, a, gen_helper_gvec_pmull_q) 6528 6529 /* 6530 * Advanced SIMD scalar/vector x indexed element 6531 */ 6532 6533 static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f) 6534 { 6535 switch (a->esz) { 6536 case MO_64: 6537 if (fp_access_check(s)) { 6538 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 6539 TCGv_i64 t1 = tcg_temp_new_i64(); 6540 6541 read_vec_element(s, t1, a->rm, a->idx, MO_64); 6542 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); 6543 write_fp_dreg_merging(s, a->rd, a->rn, t0); 6544 } 6545 break; 6546 case MO_32: 6547 if (fp_access_check(s)) { 6548 TCGv_i32 t0 = read_fp_sreg(s, a->rn); 6549 TCGv_i32 t1 = tcg_temp_new_i32(); 6550 6551 read_vec_element_i32(s, t1, a->rm, a->idx, MO_32); 6552 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); 6553 write_fp_sreg_merging(s, a->rd, a->rn, t0); 6554 } 6555 break; 6556 case MO_16: 6557 if (!dc_isar_feature(aa64_fp16, s)) { 6558 return false; 6559 } 6560 if (fp_access_check(s)) { 6561 TCGv_i32 t0 = read_fp_hreg(s, a->rn); 6562 TCGv_i32 t1 = tcg_temp_new_i32(); 6563 6564 read_vec_element_i32(s, t1, a->rm, a->idx, MO_16); 6565 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); 6566 write_fp_hreg_merging(s, a->rd, a->rn, t0); 6567 } 6568 break; 6569 default: 6570 g_assert_not_reached(); 6571 } 6572 return true; 6573 } 6574 6575 TRANS(FMUL_si, do_fp3_scalar_idx, a, &f_scalar_fmul) 6576 TRANS(FMULX_si, do_fp3_scalar_idx, a, &f_scalar_fmulx) 6577 6578 static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) 6579 { 6580 switch (a->esz) { 6581 case MO_64: 6582 if (fp_access_check(s)) { 6583 TCGv_i64 t0 = read_fp_dreg(s, a->rd); 6584 TCGv_i64 t1 = read_fp_dreg(s, a->rn); 6585 TCGv_i64 t2 = tcg_temp_new_i64(); 6586 6587 read_vec_element(s, t2, a->rm, a->idx, MO_64); 6588 if (neg) { 6589 gen_vfp_maybe_ah_negd(s, t1, t1); 6590 } 6591 gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); 6592 write_fp_dreg_merging(s, a->rd, a->rd, t0); 6593 } 6594 break; 6595 case MO_32: 6596 if (fp_access_check(s)) { 6597 TCGv_i32 t0 = read_fp_sreg(s, a->rd); 6598 TCGv_i32 t1 = read_fp_sreg(s, a->rn); 6599 TCGv_i32 t2 = tcg_temp_new_i32(); 6600 6601 read_vec_element_i32(s, t2, a->rm, a->idx, MO_32); 6602 if (neg) { 6603 gen_vfp_maybe_ah_negs(s, t1, t1); 6604 } 6605 gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); 6606 write_fp_sreg_merging(s, a->rd, a->rd, t0); 6607 } 6608 break; 6609 case MO_16: 6610 if (!dc_isar_feature(aa64_fp16, s)) { 6611 return false; 6612 } 6613 if (fp_access_check(s)) { 6614 TCGv_i32 t0 = read_fp_hreg(s, a->rd); 6615 TCGv_i32 t1 = read_fp_hreg(s, a->rn); 6616 TCGv_i32 t2 = tcg_temp_new_i32(); 6617 6618 read_vec_element_i32(s, t2, a->rm, a->idx, MO_16); 6619 if (neg) { 6620 gen_vfp_maybe_ah_negh(s, t1, t1); 6621 } 6622 gen_helper_advsimd_muladdh(t0, t1, t2, t0, 6623 fpstatus_ptr(FPST_A64_F16)); 6624 write_fp_hreg_merging(s, a->rd, a->rd, t0); 6625 } 6626 break; 6627 default: 6628 g_assert_not_reached(); 6629 } 6630 return true; 6631 } 6632 6633 TRANS(FMLA_si, do_fmla_scalar_idx, a, false) 6634 TRANS(FMLS_si, do_fmla_scalar_idx, a, true) 6635 6636 static bool do_env_scalar2_idx_hs(DisasContext *s, arg_rrx_e *a, 6637 const ENVScalar2 *f) 6638 { 6639 if (a->esz < MO_16 || a->esz > MO_32) { 6640 return false; 6641 } 6642 if (fp_access_check(s)) { 6643 TCGv_i32 t0 = tcg_temp_new_i32(); 6644 TCGv_i32 t1 = tcg_temp_new_i32(); 6645 6646 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 6647 read_vec_element_i32(s, t1, a->rm, a->idx, a->esz); 6648 f->gen_bhs[a->esz](t0, tcg_env, t0, t1); 6649 write_fp_sreg(s, a->rd, t0); 6650 } 6651 return true; 6652 } 6653 6654 TRANS(SQDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqdmulh) 6655 TRANS(SQRDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqrdmulh) 6656 6657 static bool do_env_scalar3_idx_hs(DisasContext *s, arg_rrx_e *a, 6658 const ENVScalar3 *f) 6659 { 6660 if (a->esz < MO_16 || a->esz > MO_32) { 6661 return false; 6662 } 6663 if (fp_access_check(s)) { 6664 TCGv_i32 t0 = tcg_temp_new_i32(); 6665 TCGv_i32 t1 = tcg_temp_new_i32(); 6666 TCGv_i32 t2 = tcg_temp_new_i32(); 6667 6668 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 6669 read_vec_element_i32(s, t1, a->rm, a->idx, a->esz); 6670 read_vec_element_i32(s, t2, a->rd, 0, a->esz); 6671 f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2); 6672 write_fp_sreg(s, a->rd, t0); 6673 } 6674 return true; 6675 } 6676 6677 TRANS_FEAT(SQRDMLAH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlah) 6678 TRANS_FEAT(SQRDMLSH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlsh) 6679 6680 static bool do_scalar_muladd_widening_idx(DisasContext *s, arg_rrx_e *a, 6681 NeonGenTwo64OpFn *fn, bool acc) 6682 { 6683 if (fp_access_check(s)) { 6684 TCGv_i64 t0 = tcg_temp_new_i64(); 6685 TCGv_i64 t1 = tcg_temp_new_i64(); 6686 TCGv_i64 t2 = tcg_temp_new_i64(); 6687 6688 if (acc) { 6689 read_vec_element(s, t0, a->rd, 0, a->esz + 1); 6690 } 6691 read_vec_element(s, t1, a->rn, 0, a->esz | MO_SIGN); 6692 read_vec_element(s, t2, a->rm, a->idx, a->esz | MO_SIGN); 6693 fn(t0, t1, t2); 6694 6695 /* Clear the whole register first, then store scalar. */ 6696 clear_vec(s, a->rd); 6697 write_vec_element(s, t0, a->rd, 0, a->esz + 1); 6698 } 6699 return true; 6700 } 6701 6702 TRANS(SQDMULL_si, do_scalar_muladd_widening_idx, a, 6703 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false) 6704 TRANS(SQDMLAL_si, do_scalar_muladd_widening_idx, a, 6705 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true) 6706 TRANS(SQDMLSL_si, do_scalar_muladd_widening_idx, a, 6707 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true) 6708 6709 static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a, 6710 gen_helper_gvec_3_ptr * const fns[3]) 6711 { 6712 MemOp esz = a->esz; 6713 int check = fp_access_check_vector_hsd(s, a->q, esz); 6714 6715 if (check <= 0) { 6716 return check == 0; 6717 } 6718 6719 gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, 6720 esz == MO_16 ? FPST_A64_F16 : FPST_A64, 6721 a->idx, fns[esz - 1]); 6722 return true; 6723 } 6724 6725 static gen_helper_gvec_3_ptr * const f_vector_idx_fmul[3] = { 6726 gen_helper_gvec_fmul_idx_h, 6727 gen_helper_gvec_fmul_idx_s, 6728 gen_helper_gvec_fmul_idx_d, 6729 }; 6730 TRANS(FMUL_vi, do_fp3_vector_idx, a, f_vector_idx_fmul) 6731 6732 static gen_helper_gvec_3_ptr * const f_vector_idx_fmulx[3] = { 6733 gen_helper_gvec_fmulx_idx_h, 6734 gen_helper_gvec_fmulx_idx_s, 6735 gen_helper_gvec_fmulx_idx_d, 6736 }; 6737 TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx) 6738 6739 static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg) 6740 { 6741 static gen_helper_gvec_4_ptr * const fns[3][3] = { 6742 { gen_helper_gvec_fmla_idx_h, 6743 gen_helper_gvec_fmla_idx_s, 6744 gen_helper_gvec_fmla_idx_d }, 6745 { gen_helper_gvec_fmls_idx_h, 6746 gen_helper_gvec_fmls_idx_s, 6747 gen_helper_gvec_fmls_idx_d }, 6748 { gen_helper_gvec_ah_fmls_idx_h, 6749 gen_helper_gvec_ah_fmls_idx_s, 6750 gen_helper_gvec_ah_fmls_idx_d }, 6751 }; 6752 MemOp esz = a->esz; 6753 int check = fp_access_check_vector_hsd(s, a->q, esz); 6754 6755 if (check <= 0) { 6756 return check == 0; 6757 } 6758 6759 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, 6760 esz == MO_16 ? FPST_A64_F16 : FPST_A64, 6761 a->idx, fns[neg ? 1 + s->fpcr_ah : 0][esz - 1]); 6762 return true; 6763 } 6764 6765 TRANS(FMLA_vi, do_fmla_vector_idx, a, false) 6766 TRANS(FMLS_vi, do_fmla_vector_idx, a, true) 6767 6768 static bool do_fmlal_idx(DisasContext *s, arg_qrrx_e *a, bool is_s, bool is_2) 6769 { 6770 if (fp_access_check(s)) { 6771 int data = (a->idx << 2) | (is_2 << 1) | is_s; 6772 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), 6773 vec_full_reg_offset(s, a->rn), 6774 vec_full_reg_offset(s, a->rm), tcg_env, 6775 a->q ? 16 : 8, vec_full_reg_size(s), 6776 data, gen_helper_gvec_fmlal_idx_a64); 6777 } 6778 return true; 6779 } 6780 6781 TRANS_FEAT(FMLAL_vi, aa64_fhm, do_fmlal_idx, a, false, false) 6782 TRANS_FEAT(FMLSL_vi, aa64_fhm, do_fmlal_idx, a, true, false) 6783 TRANS_FEAT(FMLAL2_vi, aa64_fhm, do_fmlal_idx, a, false, true) 6784 TRANS_FEAT(FMLSL2_vi, aa64_fhm, do_fmlal_idx, a, true, true) 6785 6786 static bool do_int3_vector_idx(DisasContext *s, arg_qrrx_e *a, 6787 gen_helper_gvec_3 * const fns[2]) 6788 { 6789 assert(a->esz == MO_16 || a->esz == MO_32); 6790 if (fp_access_check(s)) { 6791 gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, a->idx, fns[a->esz - 1]); 6792 } 6793 return true; 6794 } 6795 6796 static gen_helper_gvec_3 * const f_vector_idx_mul[2] = { 6797 gen_helper_gvec_mul_idx_h, 6798 gen_helper_gvec_mul_idx_s, 6799 }; 6800 TRANS(MUL_vi, do_int3_vector_idx, a, f_vector_idx_mul) 6801 6802 static bool do_mla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool sub) 6803 { 6804 static gen_helper_gvec_4 * const fns[2][2] = { 6805 { gen_helper_gvec_mla_idx_h, gen_helper_gvec_mls_idx_h }, 6806 { gen_helper_gvec_mla_idx_s, gen_helper_gvec_mls_idx_s }, 6807 }; 6808 6809 assert(a->esz == MO_16 || a->esz == MO_32); 6810 if (fp_access_check(s)) { 6811 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, 6812 a->idx, fns[a->esz - 1][sub]); 6813 } 6814 return true; 6815 } 6816 6817 TRANS(MLA_vi, do_mla_vector_idx, a, false) 6818 TRANS(MLS_vi, do_mla_vector_idx, a, true) 6819 6820 static bool do_int3_qc_vector_idx(DisasContext *s, arg_qrrx_e *a, 6821 gen_helper_gvec_4 * const fns[2]) 6822 { 6823 assert(a->esz == MO_16 || a->esz == MO_32); 6824 if (fp_access_check(s)) { 6825 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd), 6826 vec_full_reg_offset(s, a->rn), 6827 vec_full_reg_offset(s, a->rm), 6828 offsetof(CPUARMState, vfp.qc), 6829 a->q ? 16 : 8, vec_full_reg_size(s), 6830 a->idx, fns[a->esz - 1]); 6831 } 6832 return true; 6833 } 6834 6835 static gen_helper_gvec_4 * const f_vector_idx_sqdmulh[2] = { 6836 gen_helper_neon_sqdmulh_idx_h, 6837 gen_helper_neon_sqdmulh_idx_s, 6838 }; 6839 TRANS(SQDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqdmulh) 6840 6841 static gen_helper_gvec_4 * const f_vector_idx_sqrdmulh[2] = { 6842 gen_helper_neon_sqrdmulh_idx_h, 6843 gen_helper_neon_sqrdmulh_idx_s, 6844 }; 6845 TRANS(SQRDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqrdmulh) 6846 6847 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlah[2] = { 6848 gen_helper_neon_sqrdmlah_idx_h, 6849 gen_helper_neon_sqrdmlah_idx_s, 6850 }; 6851 TRANS_FEAT(SQRDMLAH_vi, aa64_rdm, do_int3_qc_vector_idx, a, 6852 f_vector_idx_sqrdmlah) 6853 6854 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlsh[2] = { 6855 gen_helper_neon_sqrdmlsh_idx_h, 6856 gen_helper_neon_sqrdmlsh_idx_s, 6857 }; 6858 TRANS_FEAT(SQRDMLSH_vi, aa64_rdm, do_int3_qc_vector_idx, a, 6859 f_vector_idx_sqrdmlsh) 6860 6861 static bool do_dot_vector_idx(DisasContext *s, arg_qrrx_e *a, 6862 gen_helper_gvec_4 *fn) 6863 { 6864 if (fp_access_check(s)) { 6865 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn); 6866 } 6867 return true; 6868 } 6869 6870 static bool do_dot_vector_idx_env(DisasContext *s, arg_qrrx_e *a, 6871 gen_helper_gvec_4_ptr *fn) 6872 { 6873 if (fp_access_check(s)) { 6874 gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn); 6875 } 6876 return true; 6877 } 6878 6879 TRANS_FEAT(SDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_sdot_idx_4b) 6880 TRANS_FEAT(UDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_udot_idx_4b) 6881 TRANS_FEAT(SUDOT_vi, aa64_i8mm, do_dot_vector_idx, a, 6882 gen_helper_gvec_sudot_idx_4b) 6883 TRANS_FEAT(USDOT_vi, aa64_i8mm, do_dot_vector_idx, a, 6884 gen_helper_gvec_usdot_idx_4b) 6885 TRANS_FEAT(BFDOT_vi, aa64_bf16, do_dot_vector_idx_env, a, 6886 gen_helper_gvec_bfdot_idx) 6887 6888 static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a) 6889 { 6890 if (!dc_isar_feature(aa64_bf16, s)) { 6891 return false; 6892 } 6893 if (fp_access_check(s)) { 6894 /* Q bit selects BFMLALB vs BFMLALT. */ 6895 gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, 6896 s->fpcr_ah ? FPST_AH : FPST_A64, 6897 (a->idx << 1) | a->q, 6898 gen_helper_gvec_bfmlal_idx); 6899 } 6900 return true; 6901 } 6902 6903 static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a) 6904 { 6905 gen_helper_gvec_4_ptr *fn; 6906 6907 if (!dc_isar_feature(aa64_fcma, s)) { 6908 return false; 6909 } 6910 switch (a->esz) { 6911 case MO_16: 6912 if (!dc_isar_feature(aa64_fp16, s)) { 6913 return false; 6914 } 6915 fn = gen_helper_gvec_fcmlah_idx; 6916 break; 6917 case MO_32: 6918 fn = gen_helper_gvec_fcmlas_idx; 6919 break; 6920 default: 6921 g_assert_not_reached(); 6922 } 6923 if (fp_access_check(s)) { 6924 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, 6925 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, 6926 (s->fpcr_ah << 4) | (a->idx << 2) | a->rot, fn); 6927 } 6928 return true; 6929 } 6930 6931 /* 6932 * Advanced SIMD scalar pairwise 6933 */ 6934 6935 static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f) 6936 { 6937 switch (a->esz) { 6938 case MO_64: 6939 if (fp_access_check(s)) { 6940 TCGv_i64 t0 = tcg_temp_new_i64(); 6941 TCGv_i64 t1 = tcg_temp_new_i64(); 6942 6943 read_vec_element(s, t0, a->rn, 0, MO_64); 6944 read_vec_element(s, t1, a->rn, 1, MO_64); 6945 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); 6946 write_fp_dreg(s, a->rd, t0); 6947 } 6948 break; 6949 case MO_32: 6950 if (fp_access_check(s)) { 6951 TCGv_i32 t0 = tcg_temp_new_i32(); 6952 TCGv_i32 t1 = tcg_temp_new_i32(); 6953 6954 read_vec_element_i32(s, t0, a->rn, 0, MO_32); 6955 read_vec_element_i32(s, t1, a->rn, 1, MO_32); 6956 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); 6957 write_fp_sreg(s, a->rd, t0); 6958 } 6959 break; 6960 case MO_16: 6961 if (!dc_isar_feature(aa64_fp16, s)) { 6962 return false; 6963 } 6964 if (fp_access_check(s)) { 6965 TCGv_i32 t0 = tcg_temp_new_i32(); 6966 TCGv_i32 t1 = tcg_temp_new_i32(); 6967 6968 read_vec_element_i32(s, t0, a->rn, 0, MO_16); 6969 read_vec_element_i32(s, t1, a->rn, 1, MO_16); 6970 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); 6971 write_fp_sreg(s, a->rd, t0); 6972 } 6973 break; 6974 default: 6975 g_assert_not_reached(); 6976 } 6977 return true; 6978 } 6979 6980 static bool do_fp3_scalar_pair_2fn(DisasContext *s, arg_rr_e *a, 6981 const FPScalar *fnormal, 6982 const FPScalar *fah) 6983 { 6984 return do_fp3_scalar_pair(s, a, s->fpcr_ah ? fah : fnormal); 6985 } 6986 6987 TRANS(FADDP_s, do_fp3_scalar_pair, a, &f_scalar_fadd) 6988 TRANS(FMAXP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah) 6989 TRANS(FMINP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah) 6990 TRANS(FMAXNMP_s, do_fp3_scalar_pair, a, &f_scalar_fmaxnm) 6991 TRANS(FMINNMP_s, do_fp3_scalar_pair, a, &f_scalar_fminnm) 6992 6993 static bool trans_ADDP_s(DisasContext *s, arg_rr_e *a) 6994 { 6995 if (fp_access_check(s)) { 6996 TCGv_i64 t0 = tcg_temp_new_i64(); 6997 TCGv_i64 t1 = tcg_temp_new_i64(); 6998 6999 read_vec_element(s, t0, a->rn, 0, MO_64); 7000 read_vec_element(s, t1, a->rn, 1, MO_64); 7001 tcg_gen_add_i64(t0, t0, t1); 7002 write_fp_dreg(s, a->rd, t0); 7003 } 7004 return true; 7005 } 7006 7007 /* 7008 * Floating-point conditional select 7009 */ 7010 7011 static bool trans_FCSEL(DisasContext *s, arg_FCSEL *a) 7012 { 7013 TCGv_i64 t_true, t_false; 7014 DisasCompare64 c; 7015 int check = fp_access_check_scalar_hsd(s, a->esz); 7016 7017 if (check <= 0) { 7018 return check == 0; 7019 } 7020 7021 /* Zero extend sreg & hreg inputs to 64 bits now. */ 7022 t_true = tcg_temp_new_i64(); 7023 t_false = tcg_temp_new_i64(); 7024 read_vec_element(s, t_true, a->rn, 0, a->esz); 7025 read_vec_element(s, t_false, a->rm, 0, a->esz); 7026 7027 a64_test_cc(&c, a->cond); 7028 tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0), 7029 t_true, t_false); 7030 7031 /* 7032 * Note that sregs & hregs write back zeros to the high bits, 7033 * and we've already done the zero-extension. 7034 */ 7035 write_fp_dreg(s, a->rd, t_true); 7036 return true; 7037 } 7038 7039 /* 7040 * Advanced SIMD Extract 7041 */ 7042 7043 static bool trans_EXT_d(DisasContext *s, arg_EXT_d *a) 7044 { 7045 if (fp_access_check(s)) { 7046 TCGv_i64 lo = read_fp_dreg(s, a->rn); 7047 if (a->imm != 0) { 7048 TCGv_i64 hi = read_fp_dreg(s, a->rm); 7049 tcg_gen_extract2_i64(lo, lo, hi, a->imm * 8); 7050 } 7051 write_fp_dreg(s, a->rd, lo); 7052 } 7053 return true; 7054 } 7055 7056 static bool trans_EXT_q(DisasContext *s, arg_EXT_q *a) 7057 { 7058 TCGv_i64 lo, hi; 7059 int pos = (a->imm & 7) * 8; 7060 int elt = a->imm >> 3; 7061 7062 if (!fp_access_check(s)) { 7063 return true; 7064 } 7065 7066 lo = tcg_temp_new_i64(); 7067 hi = tcg_temp_new_i64(); 7068 7069 read_vec_element(s, lo, a->rn, elt, MO_64); 7070 elt++; 7071 read_vec_element(s, hi, elt & 2 ? a->rm : a->rn, elt & 1, MO_64); 7072 elt++; 7073 7074 if (pos != 0) { 7075 TCGv_i64 hh = tcg_temp_new_i64(); 7076 tcg_gen_extract2_i64(lo, lo, hi, pos); 7077 read_vec_element(s, hh, a->rm, elt & 1, MO_64); 7078 tcg_gen_extract2_i64(hi, hi, hh, pos); 7079 } 7080 7081 write_vec_element(s, lo, a->rd, 0, MO_64); 7082 write_vec_element(s, hi, a->rd, 1, MO_64); 7083 clear_vec_high(s, true, a->rd); 7084 return true; 7085 } 7086 7087 /* 7088 * Floating-point data-processing (3 source) 7089 */ 7090 7091 static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) 7092 { 7093 TCGv_ptr fpst; 7094 7095 /* 7096 * These are fused multiply-add. Note that doing the negations here 7097 * as separate steps is correct: an input NaN should come out with 7098 * its sign bit flipped if it is a negated-input. 7099 */ 7100 switch (a->esz) { 7101 case MO_64: 7102 if (fp_access_check(s)) { 7103 TCGv_i64 tn = read_fp_dreg(s, a->rn); 7104 TCGv_i64 tm = read_fp_dreg(s, a->rm); 7105 TCGv_i64 ta = read_fp_dreg(s, a->ra); 7106 7107 if (neg_a) { 7108 gen_vfp_maybe_ah_negd(s, ta, ta); 7109 } 7110 if (neg_n) { 7111 gen_vfp_maybe_ah_negd(s, tn, tn); 7112 } 7113 fpst = fpstatus_ptr(FPST_A64); 7114 gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst); 7115 write_fp_dreg_merging(s, a->rd, a->ra, ta); 7116 } 7117 break; 7118 7119 case MO_32: 7120 if (fp_access_check(s)) { 7121 TCGv_i32 tn = read_fp_sreg(s, a->rn); 7122 TCGv_i32 tm = read_fp_sreg(s, a->rm); 7123 TCGv_i32 ta = read_fp_sreg(s, a->ra); 7124 7125 if (neg_a) { 7126 gen_vfp_maybe_ah_negs(s, ta, ta); 7127 } 7128 if (neg_n) { 7129 gen_vfp_maybe_ah_negs(s, tn, tn); 7130 } 7131 fpst = fpstatus_ptr(FPST_A64); 7132 gen_helper_vfp_muladds(ta, tn, tm, ta, fpst); 7133 write_fp_sreg_merging(s, a->rd, a->ra, ta); 7134 } 7135 break; 7136 7137 case MO_16: 7138 if (!dc_isar_feature(aa64_fp16, s)) { 7139 return false; 7140 } 7141 if (fp_access_check(s)) { 7142 TCGv_i32 tn = read_fp_hreg(s, a->rn); 7143 TCGv_i32 tm = read_fp_hreg(s, a->rm); 7144 TCGv_i32 ta = read_fp_hreg(s, a->ra); 7145 7146 if (neg_a) { 7147 gen_vfp_maybe_ah_negh(s, ta, ta); 7148 } 7149 if (neg_n) { 7150 gen_vfp_maybe_ah_negh(s, tn, tn); 7151 } 7152 fpst = fpstatus_ptr(FPST_A64_F16); 7153 gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst); 7154 write_fp_hreg_merging(s, a->rd, a->ra, ta); 7155 } 7156 break; 7157 7158 default: 7159 return false; 7160 } 7161 return true; 7162 } 7163 7164 TRANS(FMADD, do_fmadd, a, false, false) 7165 TRANS(FNMADD, do_fmadd, a, true, true) 7166 TRANS(FMSUB, do_fmadd, a, false, true) 7167 TRANS(FNMSUB, do_fmadd, a, true, false) 7168 7169 /* 7170 * Advanced SIMD Across Lanes 7171 */ 7172 7173 static bool do_int_reduction(DisasContext *s, arg_qrr_e *a, bool widen, 7174 MemOp src_sign, NeonGenTwo64OpFn *fn) 7175 { 7176 TCGv_i64 tcg_res, tcg_elt; 7177 MemOp src_mop = a->esz | src_sign; 7178 int elements = (a->q ? 16 : 8) >> a->esz; 7179 7180 /* Reject MO_64, and MO_32 without Q: a minimum of 4 elements. */ 7181 if (elements < 4) { 7182 return false; 7183 } 7184 if (!fp_access_check(s)) { 7185 return true; 7186 } 7187 7188 tcg_res = tcg_temp_new_i64(); 7189 tcg_elt = tcg_temp_new_i64(); 7190 7191 read_vec_element(s, tcg_res, a->rn, 0, src_mop); 7192 for (int i = 1; i < elements; i++) { 7193 read_vec_element(s, tcg_elt, a->rn, i, src_mop); 7194 fn(tcg_res, tcg_res, tcg_elt); 7195 } 7196 7197 tcg_gen_ext_i64(tcg_res, tcg_res, a->esz + widen); 7198 write_fp_dreg(s, a->rd, tcg_res); 7199 return true; 7200 } 7201 7202 TRANS(ADDV, do_int_reduction, a, false, 0, tcg_gen_add_i64) 7203 TRANS(SADDLV, do_int_reduction, a, true, MO_SIGN, tcg_gen_add_i64) 7204 TRANS(UADDLV, do_int_reduction, a, true, 0, tcg_gen_add_i64) 7205 TRANS(SMAXV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smax_i64) 7206 TRANS(UMAXV, do_int_reduction, a, false, 0, tcg_gen_umax_i64) 7207 TRANS(SMINV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smin_i64) 7208 TRANS(UMINV, do_int_reduction, a, false, 0, tcg_gen_umin_i64) 7209 7210 /* 7211 * do_fp_reduction helper 7212 * 7213 * This mirrors the Reduce() pseudocode in the ARM ARM. It is 7214 * important for correct NaN propagation that we do these 7215 * operations in exactly the order specified by the pseudocode. 7216 * 7217 * This is a recursive function. 7218 */ 7219 static TCGv_i32 do_reduction_op(DisasContext *s, int rn, MemOp esz, 7220 int ebase, int ecount, TCGv_ptr fpst, 7221 NeonGenTwoSingleOpFn *fn) 7222 { 7223 if (ecount == 1) { 7224 TCGv_i32 tcg_elem = tcg_temp_new_i32(); 7225 read_vec_element_i32(s, tcg_elem, rn, ebase, esz); 7226 return tcg_elem; 7227 } else { 7228 int half = ecount >> 1; 7229 TCGv_i32 tcg_hi, tcg_lo, tcg_res; 7230 7231 tcg_hi = do_reduction_op(s, rn, esz, ebase + half, half, fpst, fn); 7232 tcg_lo = do_reduction_op(s, rn, esz, ebase, half, fpst, fn); 7233 tcg_res = tcg_temp_new_i32(); 7234 7235 fn(tcg_res, tcg_lo, tcg_hi, fpst); 7236 return tcg_res; 7237 } 7238 } 7239 7240 static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a, 7241 NeonGenTwoSingleOpFn *fnormal, 7242 NeonGenTwoSingleOpFn *fah) 7243 { 7244 if (fp_access_check(s)) { 7245 MemOp esz = a->esz; 7246 int elts = (a->q ? 16 : 8) >> esz; 7247 TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); 7248 TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst, 7249 s->fpcr_ah ? fah : fnormal); 7250 write_fp_sreg(s, a->rd, res); 7251 } 7252 return true; 7253 } 7254 7255 TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a, 7256 gen_helper_vfp_maxnumh, gen_helper_vfp_maxnumh) 7257 TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a, 7258 gen_helper_vfp_minnumh, gen_helper_vfp_minnumh) 7259 TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a, 7260 gen_helper_vfp_maxh, gen_helper_vfp_ah_maxh) 7261 TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a, 7262 gen_helper_vfp_minh, gen_helper_vfp_ah_minh) 7263 7264 TRANS(FMAXNMV_s, do_fp_reduction, a, 7265 gen_helper_vfp_maxnums, gen_helper_vfp_maxnums) 7266 TRANS(FMINNMV_s, do_fp_reduction, a, 7267 gen_helper_vfp_minnums, gen_helper_vfp_minnums) 7268 TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs, gen_helper_vfp_ah_maxs) 7269 TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins, gen_helper_vfp_ah_mins) 7270 7271 /* 7272 * Floating-point Immediate 7273 */ 7274 7275 static bool trans_FMOVI_s(DisasContext *s, arg_FMOVI_s *a) 7276 { 7277 int check = fp_access_check_scalar_hsd(s, a->esz); 7278 uint64_t imm; 7279 7280 if (check <= 0) { 7281 return check == 0; 7282 } 7283 7284 imm = vfp_expand_imm(a->esz, a->imm); 7285 write_fp_dreg(s, a->rd, tcg_constant_i64(imm)); 7286 return true; 7287 } 7288 7289 /* 7290 * Floating point compare, conditional compare 7291 */ 7292 7293 static void handle_fp_compare(DisasContext *s, int size, 7294 unsigned int rn, unsigned int rm, 7295 bool cmp_with_zero, bool signal_all_nans) 7296 { 7297 TCGv_i64 tcg_flags = tcg_temp_new_i64(); 7298 TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_A64_F16 : FPST_A64); 7299 7300 if (size == MO_64) { 7301 TCGv_i64 tcg_vn, tcg_vm; 7302 7303 tcg_vn = read_fp_dreg(s, rn); 7304 if (cmp_with_zero) { 7305 tcg_vm = tcg_constant_i64(0); 7306 } else { 7307 tcg_vm = read_fp_dreg(s, rm); 7308 } 7309 if (signal_all_nans) { 7310 gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 7311 } else { 7312 gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 7313 } 7314 } else { 7315 TCGv_i32 tcg_vn = tcg_temp_new_i32(); 7316 TCGv_i32 tcg_vm = tcg_temp_new_i32(); 7317 7318 read_vec_element_i32(s, tcg_vn, rn, 0, size); 7319 if (cmp_with_zero) { 7320 tcg_gen_movi_i32(tcg_vm, 0); 7321 } else { 7322 read_vec_element_i32(s, tcg_vm, rm, 0, size); 7323 } 7324 7325 switch (size) { 7326 case MO_32: 7327 if (signal_all_nans) { 7328 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 7329 } else { 7330 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 7331 } 7332 break; 7333 case MO_16: 7334 if (signal_all_nans) { 7335 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 7336 } else { 7337 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 7338 } 7339 break; 7340 default: 7341 g_assert_not_reached(); 7342 } 7343 } 7344 7345 gen_set_nzcv(tcg_flags); 7346 } 7347 7348 /* FCMP, FCMPE */ 7349 static bool trans_FCMP(DisasContext *s, arg_FCMP *a) 7350 { 7351 int check = fp_access_check_scalar_hsd(s, a->esz); 7352 7353 if (check <= 0) { 7354 return check == 0; 7355 } 7356 7357 handle_fp_compare(s, a->esz, a->rn, a->rm, a->z, a->e); 7358 return true; 7359 } 7360 7361 /* FCCMP, FCCMPE */ 7362 static bool trans_FCCMP(DisasContext *s, arg_FCCMP *a) 7363 { 7364 TCGLabel *label_continue = NULL; 7365 int check = fp_access_check_scalar_hsd(s, a->esz); 7366 7367 if (check <= 0) { 7368 return check == 0; 7369 } 7370 7371 if (a->cond < 0x0e) { /* not always */ 7372 TCGLabel *label_match = gen_new_label(); 7373 label_continue = gen_new_label(); 7374 arm_gen_test_cc(a->cond, label_match); 7375 /* nomatch: */ 7376 gen_set_nzcv(tcg_constant_i64(a->nzcv << 28)); 7377 tcg_gen_br(label_continue); 7378 gen_set_label(label_match); 7379 } 7380 7381 handle_fp_compare(s, a->esz, a->rn, a->rm, false, a->e); 7382 7383 if (label_continue) { 7384 gen_set_label(label_continue); 7385 } 7386 return true; 7387 } 7388 7389 /* 7390 * Advanced SIMD Modified Immediate 7391 */ 7392 7393 static bool trans_FMOVI_v_h(DisasContext *s, arg_FMOVI_v_h *a) 7394 { 7395 if (!dc_isar_feature(aa64_fp16, s)) { 7396 return false; 7397 } 7398 if (fp_access_check(s)) { 7399 tcg_gen_gvec_dup_imm(MO_16, vec_full_reg_offset(s, a->rd), 7400 a->q ? 16 : 8, vec_full_reg_size(s), 7401 vfp_expand_imm(MO_16, a->abcdefgh)); 7402 } 7403 return true; 7404 } 7405 7406 static void gen_movi(unsigned vece, uint32_t dofs, uint32_t aofs, 7407 int64_t c, uint32_t oprsz, uint32_t maxsz) 7408 { 7409 tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c); 7410 } 7411 7412 static bool trans_Vimm(DisasContext *s, arg_Vimm *a) 7413 { 7414 GVecGen2iFn *fn; 7415 7416 /* Handle decode of cmode/op here between ORR/BIC/MOVI */ 7417 if ((a->cmode & 1) && a->cmode < 12) { 7418 /* For op=1, the imm will be inverted, so BIC becomes AND. */ 7419 fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori; 7420 } else { 7421 /* There is one unallocated cmode/op combination in this space */ 7422 if (a->cmode == 15 && a->op == 1 && a->q == 0) { 7423 return false; 7424 } 7425 fn = gen_movi; 7426 } 7427 7428 if (fp_access_check(s)) { 7429 uint64_t imm = asimd_imm_const(a->abcdefgh, a->cmode, a->op); 7430 gen_gvec_fn2i(s, a->q, a->rd, a->rd, imm, fn, MO_64); 7431 } 7432 return true; 7433 } 7434 7435 /* 7436 * Advanced SIMD Shift by Immediate 7437 */ 7438 7439 static bool do_vec_shift_imm(DisasContext *s, arg_qrri_e *a, GVecGen2iFn *fn) 7440 { 7441 if (fp_access_check(s)) { 7442 gen_gvec_fn2i(s, a->q, a->rd, a->rn, a->imm, fn, a->esz); 7443 } 7444 return true; 7445 } 7446 7447 TRANS(SSHR_v, do_vec_shift_imm, a, gen_gvec_sshr) 7448 TRANS(USHR_v, do_vec_shift_imm, a, gen_gvec_ushr) 7449 TRANS(SSRA_v, do_vec_shift_imm, a, gen_gvec_ssra) 7450 TRANS(USRA_v, do_vec_shift_imm, a, gen_gvec_usra) 7451 TRANS(SRSHR_v, do_vec_shift_imm, a, gen_gvec_srshr) 7452 TRANS(URSHR_v, do_vec_shift_imm, a, gen_gvec_urshr) 7453 TRANS(SRSRA_v, do_vec_shift_imm, a, gen_gvec_srsra) 7454 TRANS(URSRA_v, do_vec_shift_imm, a, gen_gvec_ursra) 7455 TRANS(SRI_v, do_vec_shift_imm, a, gen_gvec_sri) 7456 TRANS(SHL_v, do_vec_shift_imm, a, tcg_gen_gvec_shli) 7457 TRANS(SLI_v, do_vec_shift_imm, a, gen_gvec_sli); 7458 TRANS(SQSHL_vi, do_vec_shift_imm, a, gen_neon_sqshli) 7459 TRANS(UQSHL_vi, do_vec_shift_imm, a, gen_neon_uqshli) 7460 TRANS(SQSHLU_vi, do_vec_shift_imm, a, gen_neon_sqshlui) 7461 7462 static bool do_vec_shift_imm_wide(DisasContext *s, arg_qrri_e *a, bool is_u) 7463 { 7464 TCGv_i64 tcg_rn, tcg_rd; 7465 int esz = a->esz; 7466 int esize; 7467 7468 if (!fp_access_check(s)) { 7469 return true; 7470 } 7471 7472 /* 7473 * For the LL variants the store is larger than the load, 7474 * so if rd == rn we would overwrite parts of our input. 7475 * So load everything right now and use shifts in the main loop. 7476 */ 7477 tcg_rd = tcg_temp_new_i64(); 7478 tcg_rn = tcg_temp_new_i64(); 7479 read_vec_element(s, tcg_rn, a->rn, a->q, MO_64); 7480 7481 esize = 8 << esz; 7482 for (int i = 0, elements = 8 >> esz; i < elements; i++) { 7483 if (is_u) { 7484 tcg_gen_extract_i64(tcg_rd, tcg_rn, i * esize, esize); 7485 } else { 7486 tcg_gen_sextract_i64(tcg_rd, tcg_rn, i * esize, esize); 7487 } 7488 tcg_gen_shli_i64(tcg_rd, tcg_rd, a->imm); 7489 write_vec_element(s, tcg_rd, a->rd, i, esz + 1); 7490 } 7491 clear_vec_high(s, true, a->rd); 7492 return true; 7493 } 7494 7495 TRANS(SSHLL_v, do_vec_shift_imm_wide, a, false) 7496 TRANS(USHLL_v, do_vec_shift_imm_wide, a, true) 7497 7498 static void gen_sshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7499 { 7500 assert(shift >= 0 && shift <= 64); 7501 tcg_gen_sari_i64(dst, src, MIN(shift, 63)); 7502 } 7503 7504 static void gen_ushr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7505 { 7506 assert(shift >= 0 && shift <= 64); 7507 if (shift == 64) { 7508 tcg_gen_movi_i64(dst, 0); 7509 } else { 7510 tcg_gen_shri_i64(dst, src, shift); 7511 } 7512 } 7513 7514 static void gen_ssra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7515 { 7516 gen_sshr_d(src, src, shift); 7517 tcg_gen_add_i64(dst, dst, src); 7518 } 7519 7520 static void gen_usra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7521 { 7522 gen_ushr_d(src, src, shift); 7523 tcg_gen_add_i64(dst, dst, src); 7524 } 7525 7526 static void gen_srshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7527 { 7528 assert(shift >= 0 && shift <= 32); 7529 if (shift) { 7530 TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1)); 7531 tcg_gen_add_i64(dst, src, rnd); 7532 tcg_gen_sari_i64(dst, dst, shift); 7533 } else { 7534 tcg_gen_mov_i64(dst, src); 7535 } 7536 } 7537 7538 static void gen_urshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7539 { 7540 assert(shift >= 0 && shift <= 32); 7541 if (shift) { 7542 TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1)); 7543 tcg_gen_add_i64(dst, src, rnd); 7544 tcg_gen_shri_i64(dst, dst, shift); 7545 } else { 7546 tcg_gen_mov_i64(dst, src); 7547 } 7548 } 7549 7550 static void gen_srshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7551 { 7552 assert(shift >= 0 && shift <= 64); 7553 if (shift == 0) { 7554 tcg_gen_mov_i64(dst, src); 7555 } else if (shift == 64) { 7556 /* Extension of sign bit (0,-1) plus sign bit (0,1) is zero. */ 7557 tcg_gen_movi_i64(dst, 0); 7558 } else { 7559 TCGv_i64 rnd = tcg_temp_new_i64(); 7560 tcg_gen_extract_i64(rnd, src, shift - 1, 1); 7561 tcg_gen_sari_i64(dst, src, shift); 7562 tcg_gen_add_i64(dst, dst, rnd); 7563 } 7564 } 7565 7566 static void gen_urshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7567 { 7568 assert(shift >= 0 && shift <= 64); 7569 if (shift == 0) { 7570 tcg_gen_mov_i64(dst, src); 7571 } else if (shift == 64) { 7572 /* Rounding will propagate bit 63 into bit 64. */ 7573 tcg_gen_shri_i64(dst, src, 63); 7574 } else { 7575 TCGv_i64 rnd = tcg_temp_new_i64(); 7576 tcg_gen_extract_i64(rnd, src, shift - 1, 1); 7577 tcg_gen_shri_i64(dst, src, shift); 7578 tcg_gen_add_i64(dst, dst, rnd); 7579 } 7580 } 7581 7582 static void gen_srsra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7583 { 7584 gen_srshr_d(src, src, shift); 7585 tcg_gen_add_i64(dst, dst, src); 7586 } 7587 7588 static void gen_ursra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7589 { 7590 gen_urshr_d(src, src, shift); 7591 tcg_gen_add_i64(dst, dst, src); 7592 } 7593 7594 static void gen_sri_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7595 { 7596 /* If shift is 64, dst is unchanged. */ 7597 if (shift != 64) { 7598 tcg_gen_shri_i64(src, src, shift); 7599 tcg_gen_deposit_i64(dst, dst, src, 0, 64 - shift); 7600 } 7601 } 7602 7603 static void gen_sli_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7604 { 7605 tcg_gen_deposit_i64(dst, dst, src, shift, 64 - shift); 7606 } 7607 7608 static bool do_vec_shift_imm_narrow(DisasContext *s, arg_qrri_e *a, 7609 WideShiftImmFn * const fns[3], MemOp sign) 7610 { 7611 TCGv_i64 tcg_rn, tcg_rd; 7612 int esz = a->esz; 7613 int esize; 7614 WideShiftImmFn *fn; 7615 7616 tcg_debug_assert(esz >= MO_8 && esz <= MO_32); 7617 7618 if (!fp_access_check(s)) { 7619 return true; 7620 } 7621 7622 tcg_rn = tcg_temp_new_i64(); 7623 tcg_rd = tcg_temp_new_i64(); 7624 tcg_gen_movi_i64(tcg_rd, 0); 7625 7626 fn = fns[esz]; 7627 esize = 8 << esz; 7628 for (int i = 0, elements = 8 >> esz; i < elements; i++) { 7629 read_vec_element(s, tcg_rn, a->rn, i, (esz + 1) | sign); 7630 fn(tcg_rn, tcg_rn, a->imm); 7631 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, esize * i, esize); 7632 } 7633 7634 write_vec_element(s, tcg_rd, a->rd, a->q, MO_64); 7635 clear_vec_high(s, a->q, a->rd); 7636 return true; 7637 } 7638 7639 static void gen_sqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7640 { 7641 tcg_gen_sari_i64(d, s, i); 7642 tcg_gen_ext16u_i64(d, d); 7643 gen_helper_neon_narrow_sat_s8(d, tcg_env, d); 7644 } 7645 7646 static void gen_sqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7647 { 7648 tcg_gen_sari_i64(d, s, i); 7649 tcg_gen_ext32u_i64(d, d); 7650 gen_helper_neon_narrow_sat_s16(d, tcg_env, d); 7651 } 7652 7653 static void gen_sqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7654 { 7655 gen_sshr_d(d, s, i); 7656 gen_helper_neon_narrow_sat_s32(d, tcg_env, d); 7657 } 7658 7659 static void gen_uqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7660 { 7661 tcg_gen_shri_i64(d, s, i); 7662 gen_helper_neon_narrow_sat_u8(d, tcg_env, d); 7663 } 7664 7665 static void gen_uqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7666 { 7667 tcg_gen_shri_i64(d, s, i); 7668 gen_helper_neon_narrow_sat_u16(d, tcg_env, d); 7669 } 7670 7671 static void gen_uqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7672 { 7673 gen_ushr_d(d, s, i); 7674 gen_helper_neon_narrow_sat_u32(d, tcg_env, d); 7675 } 7676 7677 static void gen_sqshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7678 { 7679 tcg_gen_sari_i64(d, s, i); 7680 tcg_gen_ext16u_i64(d, d); 7681 gen_helper_neon_unarrow_sat8(d, tcg_env, d); 7682 } 7683 7684 static void gen_sqshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7685 { 7686 tcg_gen_sari_i64(d, s, i); 7687 tcg_gen_ext32u_i64(d, d); 7688 gen_helper_neon_unarrow_sat16(d, tcg_env, d); 7689 } 7690 7691 static void gen_sqshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7692 { 7693 gen_sshr_d(d, s, i); 7694 gen_helper_neon_unarrow_sat32(d, tcg_env, d); 7695 } 7696 7697 static void gen_sqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7698 { 7699 gen_srshr_bhs(d, s, i); 7700 tcg_gen_ext16u_i64(d, d); 7701 gen_helper_neon_narrow_sat_s8(d, tcg_env, d); 7702 } 7703 7704 static void gen_sqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7705 { 7706 gen_srshr_bhs(d, s, i); 7707 tcg_gen_ext32u_i64(d, d); 7708 gen_helper_neon_narrow_sat_s16(d, tcg_env, d); 7709 } 7710 7711 static void gen_sqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7712 { 7713 gen_srshr_d(d, s, i); 7714 gen_helper_neon_narrow_sat_s32(d, tcg_env, d); 7715 } 7716 7717 static void gen_uqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7718 { 7719 gen_urshr_bhs(d, s, i); 7720 gen_helper_neon_narrow_sat_u8(d, tcg_env, d); 7721 } 7722 7723 static void gen_uqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7724 { 7725 gen_urshr_bhs(d, s, i); 7726 gen_helper_neon_narrow_sat_u16(d, tcg_env, d); 7727 } 7728 7729 static void gen_uqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7730 { 7731 gen_urshr_d(d, s, i); 7732 gen_helper_neon_narrow_sat_u32(d, tcg_env, d); 7733 } 7734 7735 static void gen_sqrshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7736 { 7737 gen_srshr_bhs(d, s, i); 7738 tcg_gen_ext16u_i64(d, d); 7739 gen_helper_neon_unarrow_sat8(d, tcg_env, d); 7740 } 7741 7742 static void gen_sqrshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7743 { 7744 gen_srshr_bhs(d, s, i); 7745 tcg_gen_ext32u_i64(d, d); 7746 gen_helper_neon_unarrow_sat16(d, tcg_env, d); 7747 } 7748 7749 static void gen_sqrshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7750 { 7751 gen_srshr_d(d, s, i); 7752 gen_helper_neon_unarrow_sat32(d, tcg_env, d); 7753 } 7754 7755 static WideShiftImmFn * const shrn_fns[] = { 7756 tcg_gen_shri_i64, 7757 tcg_gen_shri_i64, 7758 gen_ushr_d, 7759 }; 7760 TRANS(SHRN_v, do_vec_shift_imm_narrow, a, shrn_fns, 0) 7761 7762 static WideShiftImmFn * const rshrn_fns[] = { 7763 gen_urshr_bhs, 7764 gen_urshr_bhs, 7765 gen_urshr_d, 7766 }; 7767 TRANS(RSHRN_v, do_vec_shift_imm_narrow, a, rshrn_fns, 0) 7768 7769 static WideShiftImmFn * const sqshrn_fns[] = { 7770 gen_sqshrn_b, 7771 gen_sqshrn_h, 7772 gen_sqshrn_s, 7773 }; 7774 TRANS(SQSHRN_v, do_vec_shift_imm_narrow, a, sqshrn_fns, MO_SIGN) 7775 7776 static WideShiftImmFn * const uqshrn_fns[] = { 7777 gen_uqshrn_b, 7778 gen_uqshrn_h, 7779 gen_uqshrn_s, 7780 }; 7781 TRANS(UQSHRN_v, do_vec_shift_imm_narrow, a, uqshrn_fns, 0) 7782 7783 static WideShiftImmFn * const sqshrun_fns[] = { 7784 gen_sqshrun_b, 7785 gen_sqshrun_h, 7786 gen_sqshrun_s, 7787 }; 7788 TRANS(SQSHRUN_v, do_vec_shift_imm_narrow, a, sqshrun_fns, MO_SIGN) 7789 7790 static WideShiftImmFn * const sqrshrn_fns[] = { 7791 gen_sqrshrn_b, 7792 gen_sqrshrn_h, 7793 gen_sqrshrn_s, 7794 }; 7795 TRANS(SQRSHRN_v, do_vec_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN) 7796 7797 static WideShiftImmFn * const uqrshrn_fns[] = { 7798 gen_uqrshrn_b, 7799 gen_uqrshrn_h, 7800 gen_uqrshrn_s, 7801 }; 7802 TRANS(UQRSHRN_v, do_vec_shift_imm_narrow, a, uqrshrn_fns, 0) 7803 7804 static WideShiftImmFn * const sqrshrun_fns[] = { 7805 gen_sqrshrun_b, 7806 gen_sqrshrun_h, 7807 gen_sqrshrun_s, 7808 }; 7809 TRANS(SQRSHRUN_v, do_vec_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN) 7810 7811 /* 7812 * Advanced SIMD Scalar Shift by Immediate 7813 */ 7814 7815 static bool do_scalar_shift_imm(DisasContext *s, arg_rri_e *a, 7816 WideShiftImmFn *fn, bool accumulate, 7817 MemOp sign) 7818 { 7819 if (fp_access_check(s)) { 7820 TCGv_i64 rd = tcg_temp_new_i64(); 7821 TCGv_i64 rn = tcg_temp_new_i64(); 7822 7823 read_vec_element(s, rn, a->rn, 0, a->esz | sign); 7824 if (accumulate) { 7825 read_vec_element(s, rd, a->rd, 0, a->esz | sign); 7826 } 7827 fn(rd, rn, a->imm); 7828 write_fp_dreg(s, a->rd, rd); 7829 } 7830 return true; 7831 } 7832 7833 TRANS(SSHR_s, do_scalar_shift_imm, a, gen_sshr_d, false, 0) 7834 TRANS(USHR_s, do_scalar_shift_imm, a, gen_ushr_d, false, 0) 7835 TRANS(SSRA_s, do_scalar_shift_imm, a, gen_ssra_d, true, 0) 7836 TRANS(USRA_s, do_scalar_shift_imm, a, gen_usra_d, true, 0) 7837 TRANS(SRSHR_s, do_scalar_shift_imm, a, gen_srshr_d, false, 0) 7838 TRANS(URSHR_s, do_scalar_shift_imm, a, gen_urshr_d, false, 0) 7839 TRANS(SRSRA_s, do_scalar_shift_imm, a, gen_srsra_d, true, 0) 7840 TRANS(URSRA_s, do_scalar_shift_imm, a, gen_ursra_d, true, 0) 7841 TRANS(SRI_s, do_scalar_shift_imm, a, gen_sri_d, true, 0) 7842 7843 TRANS(SHL_s, do_scalar_shift_imm, a, tcg_gen_shli_i64, false, 0) 7844 TRANS(SLI_s, do_scalar_shift_imm, a, gen_sli_d, true, 0) 7845 7846 static void trunc_i64_env_imm(TCGv_i64 d, TCGv_i64 s, int64_t i, 7847 NeonGenTwoOpEnvFn *fn) 7848 { 7849 TCGv_i32 t = tcg_temp_new_i32(); 7850 tcg_gen_extrl_i64_i32(t, s); 7851 fn(t, tcg_env, t, tcg_constant_i32(i)); 7852 tcg_gen_extu_i32_i64(d, t); 7853 } 7854 7855 static void gen_sqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7856 { 7857 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s8); 7858 } 7859 7860 static void gen_sqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7861 { 7862 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s16); 7863 } 7864 7865 static void gen_sqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7866 { 7867 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s32); 7868 } 7869 7870 static void gen_sqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i) 7871 { 7872 gen_helper_neon_qshl_s64(d, tcg_env, s, tcg_constant_i64(i)); 7873 } 7874 7875 static void gen_uqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7876 { 7877 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u8); 7878 } 7879 7880 static void gen_uqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7881 { 7882 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u16); 7883 } 7884 7885 static void gen_uqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7886 { 7887 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u32); 7888 } 7889 7890 static void gen_uqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i) 7891 { 7892 gen_helper_neon_qshl_u64(d, tcg_env, s, tcg_constant_i64(i)); 7893 } 7894 7895 static void gen_sqshlui_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7896 { 7897 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s8); 7898 } 7899 7900 static void gen_sqshlui_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7901 { 7902 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s16); 7903 } 7904 7905 static void gen_sqshlui_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7906 { 7907 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s32); 7908 } 7909 7910 static void gen_sqshlui_d(TCGv_i64 d, TCGv_i64 s, int64_t i) 7911 { 7912 gen_helper_neon_qshlu_s64(d, tcg_env, s, tcg_constant_i64(i)); 7913 } 7914 7915 static WideShiftImmFn * const f_scalar_sqshli[] = { 7916 gen_sqshli_b, gen_sqshli_h, gen_sqshli_s, gen_sqshli_d 7917 }; 7918 7919 static WideShiftImmFn * const f_scalar_uqshli[] = { 7920 gen_uqshli_b, gen_uqshli_h, gen_uqshli_s, gen_uqshli_d 7921 }; 7922 7923 static WideShiftImmFn * const f_scalar_sqshlui[] = { 7924 gen_sqshlui_b, gen_sqshlui_h, gen_sqshlui_s, gen_sqshlui_d 7925 }; 7926 7927 /* Note that the helpers sign-extend their inputs, so don't do it here. */ 7928 TRANS(SQSHL_si, do_scalar_shift_imm, a, f_scalar_sqshli[a->esz], false, 0) 7929 TRANS(UQSHL_si, do_scalar_shift_imm, a, f_scalar_uqshli[a->esz], false, 0) 7930 TRANS(SQSHLU_si, do_scalar_shift_imm, a, f_scalar_sqshlui[a->esz], false, 0) 7931 7932 static bool do_scalar_shift_imm_narrow(DisasContext *s, arg_rri_e *a, 7933 WideShiftImmFn * const fns[3], 7934 MemOp sign, bool zext) 7935 { 7936 MemOp esz = a->esz; 7937 7938 tcg_debug_assert(esz >= MO_8 && esz <= MO_32); 7939 7940 if (fp_access_check(s)) { 7941 TCGv_i64 rd = tcg_temp_new_i64(); 7942 TCGv_i64 rn = tcg_temp_new_i64(); 7943 7944 read_vec_element(s, rn, a->rn, 0, (esz + 1) | sign); 7945 fns[esz](rd, rn, a->imm); 7946 if (zext) { 7947 tcg_gen_ext_i64(rd, rd, esz); 7948 } 7949 write_fp_dreg(s, a->rd, rd); 7950 } 7951 return true; 7952 } 7953 7954 TRANS(SQSHRN_si, do_scalar_shift_imm_narrow, a, sqshrn_fns, MO_SIGN, true) 7955 TRANS(SQRSHRN_si, do_scalar_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN, true) 7956 TRANS(UQSHRN_si, do_scalar_shift_imm_narrow, a, uqshrn_fns, 0, false) 7957 TRANS(UQRSHRN_si, do_scalar_shift_imm_narrow, a, uqrshrn_fns, 0, false) 7958 TRANS(SQSHRUN_si, do_scalar_shift_imm_narrow, a, sqshrun_fns, MO_SIGN, false) 7959 TRANS(SQRSHRUN_si, do_scalar_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN, false) 7960 7961 static bool do_div(DisasContext *s, arg_rrr_sf *a, bool is_signed) 7962 { 7963 TCGv_i64 tcg_n, tcg_m, tcg_rd; 7964 tcg_rd = cpu_reg(s, a->rd); 7965 7966 if (!a->sf && is_signed) { 7967 tcg_n = tcg_temp_new_i64(); 7968 tcg_m = tcg_temp_new_i64(); 7969 tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, a->rn)); 7970 tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, a->rm)); 7971 } else { 7972 tcg_n = read_cpu_reg(s, a->rn, a->sf); 7973 tcg_m = read_cpu_reg(s, a->rm, a->sf); 7974 } 7975 7976 if (is_signed) { 7977 gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m); 7978 } else { 7979 gen_helper_udiv64(tcg_rd, tcg_n, tcg_m); 7980 } 7981 7982 if (!a->sf) { /* zero extend final result */ 7983 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 7984 } 7985 return true; 7986 } 7987 7988 TRANS(SDIV, do_div, a, true) 7989 TRANS(UDIV, do_div, a, false) 7990 7991 /* Shift a TCGv src by TCGv shift_amount, put result in dst. 7992 * Note that it is the caller's responsibility to ensure that the 7993 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM 7994 * mandated semantics for out of range shifts. 7995 */ 7996 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf, 7997 enum a64_shift_type shift_type, TCGv_i64 shift_amount) 7998 { 7999 switch (shift_type) { 8000 case A64_SHIFT_TYPE_LSL: 8001 tcg_gen_shl_i64(dst, src, shift_amount); 8002 break; 8003 case A64_SHIFT_TYPE_LSR: 8004 tcg_gen_shr_i64(dst, src, shift_amount); 8005 break; 8006 case A64_SHIFT_TYPE_ASR: 8007 if (!sf) { 8008 tcg_gen_ext32s_i64(dst, src); 8009 } 8010 tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount); 8011 break; 8012 case A64_SHIFT_TYPE_ROR: 8013 if (sf) { 8014 tcg_gen_rotr_i64(dst, src, shift_amount); 8015 } else { 8016 TCGv_i32 t0, t1; 8017 t0 = tcg_temp_new_i32(); 8018 t1 = tcg_temp_new_i32(); 8019 tcg_gen_extrl_i64_i32(t0, src); 8020 tcg_gen_extrl_i64_i32(t1, shift_amount); 8021 tcg_gen_rotr_i32(t0, t0, t1); 8022 tcg_gen_extu_i32_i64(dst, t0); 8023 } 8024 break; 8025 default: 8026 assert(FALSE); /* all shift types should be handled */ 8027 break; 8028 } 8029 8030 if (!sf) { /* zero extend final result */ 8031 tcg_gen_ext32u_i64(dst, dst); 8032 } 8033 } 8034 8035 /* Shift a TCGv src by immediate, put result in dst. 8036 * The shift amount must be in range (this should always be true as the 8037 * relevant instructions will UNDEF on bad shift immediates). 8038 */ 8039 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf, 8040 enum a64_shift_type shift_type, unsigned int shift_i) 8041 { 8042 assert(shift_i < (sf ? 64 : 32)); 8043 8044 if (shift_i == 0) { 8045 tcg_gen_mov_i64(dst, src); 8046 } else { 8047 shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i)); 8048 } 8049 } 8050 8051 static bool do_shift_reg(DisasContext *s, arg_rrr_sf *a, 8052 enum a64_shift_type shift_type) 8053 { 8054 TCGv_i64 tcg_shift = tcg_temp_new_i64(); 8055 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8056 TCGv_i64 tcg_rn = read_cpu_reg(s, a->rn, a->sf); 8057 8058 tcg_gen_andi_i64(tcg_shift, cpu_reg(s, a->rm), a->sf ? 63 : 31); 8059 shift_reg(tcg_rd, tcg_rn, a->sf, shift_type, tcg_shift); 8060 return true; 8061 } 8062 8063 TRANS(LSLV, do_shift_reg, a, A64_SHIFT_TYPE_LSL) 8064 TRANS(LSRV, do_shift_reg, a, A64_SHIFT_TYPE_LSR) 8065 TRANS(ASRV, do_shift_reg, a, A64_SHIFT_TYPE_ASR) 8066 TRANS(RORV, do_shift_reg, a, A64_SHIFT_TYPE_ROR) 8067 8068 static bool do_crc32(DisasContext *s, arg_rrr_e *a, bool crc32c) 8069 { 8070 TCGv_i64 tcg_acc, tcg_val, tcg_rd; 8071 TCGv_i32 tcg_bytes; 8072 8073 switch (a->esz) { 8074 case MO_8: 8075 case MO_16: 8076 case MO_32: 8077 tcg_val = tcg_temp_new_i64(); 8078 tcg_gen_extract_i64(tcg_val, cpu_reg(s, a->rm), 0, 8 << a->esz); 8079 break; 8080 case MO_64: 8081 tcg_val = cpu_reg(s, a->rm); 8082 break; 8083 default: 8084 g_assert_not_reached(); 8085 } 8086 tcg_acc = cpu_reg(s, a->rn); 8087 tcg_bytes = tcg_constant_i32(1 << a->esz); 8088 tcg_rd = cpu_reg(s, a->rd); 8089 8090 if (crc32c) { 8091 gen_helper_crc32c_64(tcg_rd, tcg_acc, tcg_val, tcg_bytes); 8092 } else { 8093 gen_helper_crc32_64(tcg_rd, tcg_acc, tcg_val, tcg_bytes); 8094 } 8095 return true; 8096 } 8097 8098 TRANS_FEAT(CRC32, aa64_crc32, do_crc32, a, false) 8099 TRANS_FEAT(CRC32C, aa64_crc32, do_crc32, a, true) 8100 8101 static bool do_subp(DisasContext *s, arg_rrr *a, bool setflag) 8102 { 8103 TCGv_i64 tcg_n = read_cpu_reg_sp(s, a->rn, true); 8104 TCGv_i64 tcg_m = read_cpu_reg_sp(s, a->rm, true); 8105 TCGv_i64 tcg_d = cpu_reg(s, a->rd); 8106 8107 tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56); 8108 tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56); 8109 8110 if (setflag) { 8111 gen_sub_CC(true, tcg_d, tcg_n, tcg_m); 8112 } else { 8113 tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m); 8114 } 8115 return true; 8116 } 8117 8118 TRANS_FEAT(SUBP, aa64_mte_insn_reg, do_subp, a, false) 8119 TRANS_FEAT(SUBPS, aa64_mte_insn_reg, do_subp, a, true) 8120 8121 static bool trans_IRG(DisasContext *s, arg_rrr *a) 8122 { 8123 if (dc_isar_feature(aa64_mte_insn_reg, s)) { 8124 TCGv_i64 tcg_rd = cpu_reg_sp(s, a->rd); 8125 TCGv_i64 tcg_rn = cpu_reg_sp(s, a->rn); 8126 8127 if (s->ata[0]) { 8128 gen_helper_irg(tcg_rd, tcg_env, tcg_rn, cpu_reg(s, a->rm)); 8129 } else { 8130 gen_address_with_allocation_tag0(tcg_rd, tcg_rn); 8131 } 8132 return true; 8133 } 8134 return false; 8135 } 8136 8137 static bool trans_GMI(DisasContext *s, arg_rrr *a) 8138 { 8139 if (dc_isar_feature(aa64_mte_insn_reg, s)) { 8140 TCGv_i64 t = tcg_temp_new_i64(); 8141 8142 tcg_gen_extract_i64(t, cpu_reg_sp(s, a->rn), 56, 4); 8143 tcg_gen_shl_i64(t, tcg_constant_i64(1), t); 8144 tcg_gen_or_i64(cpu_reg(s, a->rd), cpu_reg(s, a->rm), t); 8145 return true; 8146 } 8147 return false; 8148 } 8149 8150 static bool trans_PACGA(DisasContext *s, arg_rrr *a) 8151 { 8152 if (dc_isar_feature(aa64_pauth, s)) { 8153 gen_helper_pacga(cpu_reg(s, a->rd), tcg_env, 8154 cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm)); 8155 return true; 8156 } 8157 return false; 8158 } 8159 8160 typedef void ArithOneOp(TCGv_i64, TCGv_i64); 8161 8162 static bool gen_rr(DisasContext *s, int rd, int rn, ArithOneOp fn) 8163 { 8164 fn(cpu_reg(s, rd), cpu_reg(s, rn)); 8165 return true; 8166 } 8167 8168 static void gen_rbit32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8169 { 8170 TCGv_i32 t32 = tcg_temp_new_i32(); 8171 8172 tcg_gen_extrl_i64_i32(t32, tcg_rn); 8173 gen_helper_rbit(t32, t32); 8174 tcg_gen_extu_i32_i64(tcg_rd, t32); 8175 } 8176 8177 static void gen_rev16_xx(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 mask) 8178 { 8179 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 8180 8181 tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8); 8182 tcg_gen_and_i64(tcg_rd, tcg_rn, mask); 8183 tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask); 8184 tcg_gen_shli_i64(tcg_rd, tcg_rd, 8); 8185 tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp); 8186 } 8187 8188 static void gen_rev16_32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8189 { 8190 gen_rev16_xx(tcg_rd, tcg_rn, tcg_constant_i64(0x00ff00ff)); 8191 } 8192 8193 static void gen_rev16_64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8194 { 8195 gen_rev16_xx(tcg_rd, tcg_rn, tcg_constant_i64(0x00ff00ff00ff00ffull)); 8196 } 8197 8198 static void gen_rev_32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8199 { 8200 tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ); 8201 } 8202 8203 static void gen_rev32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8204 { 8205 tcg_gen_bswap64_i64(tcg_rd, tcg_rn); 8206 tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32); 8207 } 8208 8209 TRANS(RBIT, gen_rr, a->rd, a->rn, a->sf ? gen_helper_rbit64 : gen_rbit32) 8210 TRANS(REV16, gen_rr, a->rd, a->rn, a->sf ? gen_rev16_64 : gen_rev16_32) 8211 TRANS(REV32, gen_rr, a->rd, a->rn, a->sf ? gen_rev32 : gen_rev_32) 8212 TRANS(REV64, gen_rr, a->rd, a->rn, tcg_gen_bswap64_i64) 8213 8214 static void gen_clz32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8215 { 8216 TCGv_i32 t32 = tcg_temp_new_i32(); 8217 8218 tcg_gen_extrl_i64_i32(t32, tcg_rn); 8219 tcg_gen_clzi_i32(t32, t32, 32); 8220 tcg_gen_extu_i32_i64(tcg_rd, t32); 8221 } 8222 8223 static void gen_clz64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8224 { 8225 tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64); 8226 } 8227 8228 static void gen_cls32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8229 { 8230 TCGv_i32 t32 = tcg_temp_new_i32(); 8231 8232 tcg_gen_extrl_i64_i32(t32, tcg_rn); 8233 tcg_gen_clrsb_i32(t32, t32); 8234 tcg_gen_extu_i32_i64(tcg_rd, t32); 8235 } 8236 8237 TRANS(CLZ, gen_rr, a->rd, a->rn, a->sf ? gen_clz64 : gen_clz32) 8238 TRANS(CLS, gen_rr, a->rd, a->rn, a->sf ? tcg_gen_clrsb_i64 : gen_cls32) 8239 8240 static bool gen_pacaut(DisasContext *s, arg_pacaut *a, NeonGenTwo64OpEnvFn fn) 8241 { 8242 TCGv_i64 tcg_rd, tcg_rn; 8243 8244 if (a->z) { 8245 if (a->rn != 31) { 8246 return false; 8247 } 8248 tcg_rn = tcg_constant_i64(0); 8249 } else { 8250 tcg_rn = cpu_reg_sp(s, a->rn); 8251 } 8252 if (s->pauth_active) { 8253 tcg_rd = cpu_reg(s, a->rd); 8254 fn(tcg_rd, tcg_env, tcg_rd, tcg_rn); 8255 } 8256 return true; 8257 } 8258 8259 TRANS_FEAT(PACIA, aa64_pauth, gen_pacaut, a, gen_helper_pacia) 8260 TRANS_FEAT(PACIB, aa64_pauth, gen_pacaut, a, gen_helper_pacib) 8261 TRANS_FEAT(PACDA, aa64_pauth, gen_pacaut, a, gen_helper_pacda) 8262 TRANS_FEAT(PACDB, aa64_pauth, gen_pacaut, a, gen_helper_pacdb) 8263 8264 TRANS_FEAT(AUTIA, aa64_pauth, gen_pacaut, a, gen_helper_autia) 8265 TRANS_FEAT(AUTIB, aa64_pauth, gen_pacaut, a, gen_helper_autib) 8266 TRANS_FEAT(AUTDA, aa64_pauth, gen_pacaut, a, gen_helper_autda) 8267 TRANS_FEAT(AUTDB, aa64_pauth, gen_pacaut, a, gen_helper_autdb) 8268 8269 static bool do_xpac(DisasContext *s, int rd, NeonGenOne64OpEnvFn *fn) 8270 { 8271 if (s->pauth_active) { 8272 TCGv_i64 tcg_rd = cpu_reg(s, rd); 8273 fn(tcg_rd, tcg_env, tcg_rd); 8274 } 8275 return true; 8276 } 8277 8278 TRANS_FEAT(XPACI, aa64_pauth, do_xpac, a->rd, gen_helper_xpaci) 8279 TRANS_FEAT(XPACD, aa64_pauth, do_xpac, a->rd, gen_helper_xpacd) 8280 8281 static bool do_logic_reg(DisasContext *s, arg_logic_shift *a, 8282 ArithTwoOp *fn, ArithTwoOp *inv_fn, bool setflags) 8283 { 8284 TCGv_i64 tcg_rd, tcg_rn, tcg_rm; 8285 8286 if (!a->sf && (a->sa & (1 << 5))) { 8287 return false; 8288 } 8289 8290 tcg_rd = cpu_reg(s, a->rd); 8291 tcg_rn = cpu_reg(s, a->rn); 8292 8293 tcg_rm = read_cpu_reg(s, a->rm, a->sf); 8294 if (a->sa) { 8295 shift_reg_imm(tcg_rm, tcg_rm, a->sf, a->st, a->sa); 8296 } 8297 8298 (a->n ? inv_fn : fn)(tcg_rd, tcg_rn, tcg_rm); 8299 if (!a->sf) { 8300 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 8301 } 8302 if (setflags) { 8303 gen_logic_CC(a->sf, tcg_rd); 8304 } 8305 return true; 8306 } 8307 8308 static bool trans_ORR_r(DisasContext *s, arg_logic_shift *a) 8309 { 8310 /* 8311 * Unshifted ORR and ORN with WZR/XZR is the standard encoding for 8312 * register-register MOV and MVN, so it is worth special casing. 8313 */ 8314 if (a->sa == 0 && a->st == 0 && a->rn == 31) { 8315 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8316 TCGv_i64 tcg_rm = cpu_reg(s, a->rm); 8317 8318 if (a->n) { 8319 tcg_gen_not_i64(tcg_rd, tcg_rm); 8320 if (!a->sf) { 8321 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 8322 } 8323 } else { 8324 if (a->sf) { 8325 tcg_gen_mov_i64(tcg_rd, tcg_rm); 8326 } else { 8327 tcg_gen_ext32u_i64(tcg_rd, tcg_rm); 8328 } 8329 } 8330 return true; 8331 } 8332 8333 return do_logic_reg(s, a, tcg_gen_or_i64, tcg_gen_orc_i64, false); 8334 } 8335 8336 TRANS(AND_r, do_logic_reg, a, tcg_gen_and_i64, tcg_gen_andc_i64, false) 8337 TRANS(ANDS_r, do_logic_reg, a, tcg_gen_and_i64, tcg_gen_andc_i64, true) 8338 TRANS(EOR_r, do_logic_reg, a, tcg_gen_xor_i64, tcg_gen_eqv_i64, false) 8339 8340 static bool do_addsub_ext(DisasContext *s, arg_addsub_ext *a, 8341 bool sub_op, bool setflags) 8342 { 8343 TCGv_i64 tcg_rm, tcg_rn, tcg_rd, tcg_result; 8344 8345 if (a->sa > 4) { 8346 return false; 8347 } 8348 8349 /* non-flag setting ops may use SP */ 8350 if (!setflags) { 8351 tcg_rd = cpu_reg_sp(s, a->rd); 8352 } else { 8353 tcg_rd = cpu_reg(s, a->rd); 8354 } 8355 tcg_rn = read_cpu_reg_sp(s, a->rn, a->sf); 8356 8357 tcg_rm = read_cpu_reg(s, a->rm, a->sf); 8358 ext_and_shift_reg(tcg_rm, tcg_rm, a->st, a->sa); 8359 8360 tcg_result = tcg_temp_new_i64(); 8361 if (!setflags) { 8362 if (sub_op) { 8363 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm); 8364 } else { 8365 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm); 8366 } 8367 } else { 8368 if (sub_op) { 8369 gen_sub_CC(a->sf, tcg_result, tcg_rn, tcg_rm); 8370 } else { 8371 gen_add_CC(a->sf, tcg_result, tcg_rn, tcg_rm); 8372 } 8373 } 8374 8375 if (a->sf) { 8376 tcg_gen_mov_i64(tcg_rd, tcg_result); 8377 } else { 8378 tcg_gen_ext32u_i64(tcg_rd, tcg_result); 8379 } 8380 return true; 8381 } 8382 8383 TRANS(ADD_ext, do_addsub_ext, a, false, false) 8384 TRANS(SUB_ext, do_addsub_ext, a, true, false) 8385 TRANS(ADDS_ext, do_addsub_ext, a, false, true) 8386 TRANS(SUBS_ext, do_addsub_ext, a, true, true) 8387 8388 static bool do_addsub_reg(DisasContext *s, arg_addsub_shift *a, 8389 bool sub_op, bool setflags) 8390 { 8391 TCGv_i64 tcg_rd, tcg_rn, tcg_rm, tcg_result; 8392 8393 if (a->st == 3 || (!a->sf && (a->sa & 32))) { 8394 return false; 8395 } 8396 8397 tcg_rd = cpu_reg(s, a->rd); 8398 tcg_rn = read_cpu_reg(s, a->rn, a->sf); 8399 tcg_rm = read_cpu_reg(s, a->rm, a->sf); 8400 8401 shift_reg_imm(tcg_rm, tcg_rm, a->sf, a->st, a->sa); 8402 8403 tcg_result = tcg_temp_new_i64(); 8404 if (!setflags) { 8405 if (sub_op) { 8406 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm); 8407 } else { 8408 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm); 8409 } 8410 } else { 8411 if (sub_op) { 8412 gen_sub_CC(a->sf, tcg_result, tcg_rn, tcg_rm); 8413 } else { 8414 gen_add_CC(a->sf, tcg_result, tcg_rn, tcg_rm); 8415 } 8416 } 8417 8418 if (a->sf) { 8419 tcg_gen_mov_i64(tcg_rd, tcg_result); 8420 } else { 8421 tcg_gen_ext32u_i64(tcg_rd, tcg_result); 8422 } 8423 return true; 8424 } 8425 8426 TRANS(ADD_r, do_addsub_reg, a, false, false) 8427 TRANS(SUB_r, do_addsub_reg, a, true, false) 8428 TRANS(ADDS_r, do_addsub_reg, a, false, true) 8429 TRANS(SUBS_r, do_addsub_reg, a, true, true) 8430 8431 static bool do_mulh(DisasContext *s, arg_rrr *a, 8432 void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64)) 8433 { 8434 TCGv_i64 discard = tcg_temp_new_i64(); 8435 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8436 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 8437 TCGv_i64 tcg_rm = cpu_reg(s, a->rm); 8438 8439 fn(discard, tcg_rd, tcg_rn, tcg_rm); 8440 return true; 8441 } 8442 8443 TRANS(SMULH, do_mulh, a, tcg_gen_muls2_i64) 8444 TRANS(UMULH, do_mulh, a, tcg_gen_mulu2_i64) 8445 8446 static bool do_muladd(DisasContext *s, arg_rrrr *a, 8447 bool sf, bool is_sub, MemOp mop) 8448 { 8449 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8450 TCGv_i64 tcg_op1, tcg_op2; 8451 8452 if (mop == MO_64) { 8453 tcg_op1 = cpu_reg(s, a->rn); 8454 tcg_op2 = cpu_reg(s, a->rm); 8455 } else { 8456 tcg_op1 = tcg_temp_new_i64(); 8457 tcg_op2 = tcg_temp_new_i64(); 8458 tcg_gen_ext_i64(tcg_op1, cpu_reg(s, a->rn), mop); 8459 tcg_gen_ext_i64(tcg_op2, cpu_reg(s, a->rm), mop); 8460 } 8461 8462 if (a->ra == 31 && !is_sub) { 8463 /* Special-case MADD with rA == XZR; it is the standard MUL alias */ 8464 tcg_gen_mul_i64(tcg_rd, tcg_op1, tcg_op2); 8465 } else { 8466 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 8467 TCGv_i64 tcg_ra = cpu_reg(s, a->ra); 8468 8469 tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2); 8470 if (is_sub) { 8471 tcg_gen_sub_i64(tcg_rd, tcg_ra, tcg_tmp); 8472 } else { 8473 tcg_gen_add_i64(tcg_rd, tcg_ra, tcg_tmp); 8474 } 8475 } 8476 8477 if (!sf) { 8478 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 8479 } 8480 return true; 8481 } 8482 8483 TRANS(MADD_w, do_muladd, a, false, false, MO_64) 8484 TRANS(MSUB_w, do_muladd, a, false, true, MO_64) 8485 TRANS(MADD_x, do_muladd, a, true, false, MO_64) 8486 TRANS(MSUB_x, do_muladd, a, true, true, MO_64) 8487 8488 TRANS(SMADDL, do_muladd, a, true, false, MO_SL) 8489 TRANS(SMSUBL, do_muladd, a, true, true, MO_SL) 8490 TRANS(UMADDL, do_muladd, a, true, false, MO_UL) 8491 TRANS(UMSUBL, do_muladd, a, true, true, MO_UL) 8492 8493 static bool do_adc_sbc(DisasContext *s, arg_rrr_sf *a, 8494 bool is_sub, bool setflags) 8495 { 8496 TCGv_i64 tcg_y, tcg_rn, tcg_rd; 8497 8498 tcg_rd = cpu_reg(s, a->rd); 8499 tcg_rn = cpu_reg(s, a->rn); 8500 8501 if (is_sub) { 8502 tcg_y = tcg_temp_new_i64(); 8503 tcg_gen_not_i64(tcg_y, cpu_reg(s, a->rm)); 8504 } else { 8505 tcg_y = cpu_reg(s, a->rm); 8506 } 8507 8508 if (setflags) { 8509 gen_adc_CC(a->sf, tcg_rd, tcg_rn, tcg_y); 8510 } else { 8511 gen_adc(a->sf, tcg_rd, tcg_rn, tcg_y); 8512 } 8513 return true; 8514 } 8515 8516 TRANS(ADC, do_adc_sbc, a, false, false) 8517 TRANS(SBC, do_adc_sbc, a, true, false) 8518 TRANS(ADCS, do_adc_sbc, a, false, true) 8519 TRANS(SBCS, do_adc_sbc, a, true, true) 8520 8521 static bool trans_RMIF(DisasContext *s, arg_RMIF *a) 8522 { 8523 int mask = a->mask; 8524 TCGv_i64 tcg_rn; 8525 TCGv_i32 nzcv; 8526 8527 if (!dc_isar_feature(aa64_condm_4, s)) { 8528 return false; 8529 } 8530 8531 tcg_rn = read_cpu_reg(s, a->rn, 1); 8532 tcg_gen_rotri_i64(tcg_rn, tcg_rn, a->imm); 8533 8534 nzcv = tcg_temp_new_i32(); 8535 tcg_gen_extrl_i64_i32(nzcv, tcg_rn); 8536 8537 if (mask & 8) { /* N */ 8538 tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3); 8539 } 8540 if (mask & 4) { /* Z */ 8541 tcg_gen_not_i32(cpu_ZF, nzcv); 8542 tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4); 8543 } 8544 if (mask & 2) { /* C */ 8545 tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1); 8546 } 8547 if (mask & 1) { /* V */ 8548 tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0); 8549 } 8550 return true; 8551 } 8552 8553 static bool do_setf(DisasContext *s, int rn, int shift) 8554 { 8555 TCGv_i32 tmp = tcg_temp_new_i32(); 8556 8557 tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn)); 8558 tcg_gen_shli_i32(cpu_NF, tmp, shift); 8559 tcg_gen_shli_i32(cpu_VF, tmp, shift - 1); 8560 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 8561 tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF); 8562 return true; 8563 } 8564 8565 TRANS_FEAT(SETF8, aa64_condm_4, do_setf, a->rn, 24) 8566 TRANS_FEAT(SETF16, aa64_condm_4, do_setf, a->rn, 16) 8567 8568 /* CCMP, CCMN */ 8569 static bool trans_CCMP(DisasContext *s, arg_CCMP *a) 8570 { 8571 TCGv_i32 tcg_t0 = tcg_temp_new_i32(); 8572 TCGv_i32 tcg_t1 = tcg_temp_new_i32(); 8573 TCGv_i32 tcg_t2 = tcg_temp_new_i32(); 8574 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 8575 TCGv_i64 tcg_rn, tcg_y; 8576 DisasCompare c; 8577 unsigned nzcv; 8578 bool has_andc; 8579 8580 /* Set T0 = !COND. */ 8581 arm_test_cc(&c, a->cond); 8582 tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0); 8583 8584 /* Load the arguments for the new comparison. */ 8585 if (a->imm) { 8586 tcg_y = tcg_constant_i64(a->y); 8587 } else { 8588 tcg_y = cpu_reg(s, a->y); 8589 } 8590 tcg_rn = cpu_reg(s, a->rn); 8591 8592 /* Set the flags for the new comparison. */ 8593 if (a->op) { 8594 gen_sub_CC(a->sf, tcg_tmp, tcg_rn, tcg_y); 8595 } else { 8596 gen_add_CC(a->sf, tcg_tmp, tcg_rn, tcg_y); 8597 } 8598 8599 /* 8600 * If COND was false, force the flags to #nzcv. Compute two masks 8601 * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0). 8602 * For tcg hosts that support ANDC, we can make do with just T1. 8603 * In either case, allow the tcg optimizer to delete any unused mask. 8604 */ 8605 tcg_gen_neg_i32(tcg_t1, tcg_t0); 8606 tcg_gen_subi_i32(tcg_t2, tcg_t0, 1); 8607 8608 nzcv = a->nzcv; 8609 has_andc = tcg_op_supported(INDEX_op_andc, TCG_TYPE_I32, 0); 8610 if (nzcv & 8) { /* N */ 8611 tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1); 8612 } else { 8613 if (has_andc) { 8614 tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1); 8615 } else { 8616 tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2); 8617 } 8618 } 8619 if (nzcv & 4) { /* Z */ 8620 if (has_andc) { 8621 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1); 8622 } else { 8623 tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2); 8624 } 8625 } else { 8626 tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0); 8627 } 8628 if (nzcv & 2) { /* C */ 8629 tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0); 8630 } else { 8631 if (has_andc) { 8632 tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1); 8633 } else { 8634 tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2); 8635 } 8636 } 8637 if (nzcv & 1) { /* V */ 8638 tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1); 8639 } else { 8640 if (has_andc) { 8641 tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1); 8642 } else { 8643 tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2); 8644 } 8645 } 8646 return true; 8647 } 8648 8649 static bool trans_CSEL(DisasContext *s, arg_CSEL *a) 8650 { 8651 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8652 TCGv_i64 zero = tcg_constant_i64(0); 8653 DisasCompare64 c; 8654 8655 a64_test_cc(&c, a->cond); 8656 8657 if (a->rn == 31 && a->rm == 31 && (a->else_inc ^ a->else_inv)) { 8658 /* CSET & CSETM. */ 8659 if (a->else_inv) { 8660 tcg_gen_negsetcond_i64(tcg_invert_cond(c.cond), 8661 tcg_rd, c.value, zero); 8662 } else { 8663 tcg_gen_setcond_i64(tcg_invert_cond(c.cond), 8664 tcg_rd, c.value, zero); 8665 } 8666 } else { 8667 TCGv_i64 t_true = cpu_reg(s, a->rn); 8668 TCGv_i64 t_false = read_cpu_reg(s, a->rm, 1); 8669 8670 if (a->else_inv && a->else_inc) { 8671 tcg_gen_neg_i64(t_false, t_false); 8672 } else if (a->else_inv) { 8673 tcg_gen_not_i64(t_false, t_false); 8674 } else if (a->else_inc) { 8675 tcg_gen_addi_i64(t_false, t_false, 1); 8676 } 8677 tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false); 8678 } 8679 8680 if (!a->sf) { 8681 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 8682 } 8683 return true; 8684 } 8685 8686 typedef struct FPScalar1Int { 8687 void (*gen_h)(TCGv_i32, TCGv_i32); 8688 void (*gen_s)(TCGv_i32, TCGv_i32); 8689 void (*gen_d)(TCGv_i64, TCGv_i64); 8690 } FPScalar1Int; 8691 8692 static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a, 8693 const FPScalar1Int *f, 8694 bool merging) 8695 { 8696 switch (a->esz) { 8697 case MO_64: 8698 if (fp_access_check(s)) { 8699 TCGv_i64 t = read_fp_dreg(s, a->rn); 8700 f->gen_d(t, t); 8701 if (merging) { 8702 write_fp_dreg_merging(s, a->rd, a->rd, t); 8703 } else { 8704 write_fp_dreg(s, a->rd, t); 8705 } 8706 } 8707 break; 8708 case MO_32: 8709 if (fp_access_check(s)) { 8710 TCGv_i32 t = read_fp_sreg(s, a->rn); 8711 f->gen_s(t, t); 8712 if (merging) { 8713 write_fp_sreg_merging(s, a->rd, a->rd, t); 8714 } else { 8715 write_fp_sreg(s, a->rd, t); 8716 } 8717 } 8718 break; 8719 case MO_16: 8720 if (!dc_isar_feature(aa64_fp16, s)) { 8721 return false; 8722 } 8723 if (fp_access_check(s)) { 8724 TCGv_i32 t = read_fp_hreg(s, a->rn); 8725 f->gen_h(t, t); 8726 if (merging) { 8727 write_fp_hreg_merging(s, a->rd, a->rd, t); 8728 } else { 8729 write_fp_sreg(s, a->rd, t); 8730 } 8731 } 8732 break; 8733 default: 8734 return false; 8735 } 8736 return true; 8737 } 8738 8739 static bool do_fp1_scalar_int_2fn(DisasContext *s, arg_rr_e *a, 8740 const FPScalar1Int *fnormal, 8741 const FPScalar1Int *fah) 8742 { 8743 return do_fp1_scalar_int(s, a, s->fpcr_ah ? fah : fnormal, true); 8744 } 8745 8746 static const FPScalar1Int f_scalar_fmov = { 8747 tcg_gen_mov_i32, 8748 tcg_gen_mov_i32, 8749 tcg_gen_mov_i64, 8750 }; 8751 TRANS(FMOV_s, do_fp1_scalar_int, a, &f_scalar_fmov, false) 8752 8753 static const FPScalar1Int f_scalar_fabs = { 8754 gen_vfp_absh, 8755 gen_vfp_abss, 8756 gen_vfp_absd, 8757 }; 8758 static const FPScalar1Int f_scalar_ah_fabs = { 8759 gen_vfp_ah_absh, 8760 gen_vfp_ah_abss, 8761 gen_vfp_ah_absd, 8762 }; 8763 TRANS(FABS_s, do_fp1_scalar_int_2fn, a, &f_scalar_fabs, &f_scalar_ah_fabs) 8764 8765 static const FPScalar1Int f_scalar_fneg = { 8766 gen_vfp_negh, 8767 gen_vfp_negs, 8768 gen_vfp_negd, 8769 }; 8770 static const FPScalar1Int f_scalar_ah_fneg = { 8771 gen_vfp_ah_negh, 8772 gen_vfp_ah_negs, 8773 gen_vfp_ah_negd, 8774 }; 8775 TRANS(FNEG_s, do_fp1_scalar_int_2fn, a, &f_scalar_fneg, &f_scalar_ah_fneg) 8776 8777 typedef struct FPScalar1 { 8778 void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_ptr); 8779 void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_ptr); 8780 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_ptr); 8781 } FPScalar1; 8782 8783 static bool do_fp1_scalar_with_fpsttype(DisasContext *s, arg_rr_e *a, 8784 const FPScalar1 *f, int rmode, 8785 ARMFPStatusFlavour fpsttype) 8786 { 8787 TCGv_i32 tcg_rmode = NULL; 8788 TCGv_ptr fpst; 8789 TCGv_i64 t64; 8790 TCGv_i32 t32; 8791 int check = fp_access_check_scalar_hsd(s, a->esz); 8792 8793 if (check <= 0) { 8794 return check == 0; 8795 } 8796 8797 fpst = fpstatus_ptr(fpsttype); 8798 if (rmode >= 0) { 8799 tcg_rmode = gen_set_rmode(rmode, fpst); 8800 } 8801 8802 switch (a->esz) { 8803 case MO_64: 8804 t64 = read_fp_dreg(s, a->rn); 8805 f->gen_d(t64, t64, fpst); 8806 write_fp_dreg_merging(s, a->rd, a->rd, t64); 8807 break; 8808 case MO_32: 8809 t32 = read_fp_sreg(s, a->rn); 8810 f->gen_s(t32, t32, fpst); 8811 write_fp_sreg_merging(s, a->rd, a->rd, t32); 8812 break; 8813 case MO_16: 8814 t32 = read_fp_hreg(s, a->rn); 8815 f->gen_h(t32, t32, fpst); 8816 write_fp_hreg_merging(s, a->rd, a->rd, t32); 8817 break; 8818 default: 8819 g_assert_not_reached(); 8820 } 8821 8822 if (rmode >= 0) { 8823 gen_restore_rmode(tcg_rmode, fpst); 8824 } 8825 return true; 8826 } 8827 8828 static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a, 8829 const FPScalar1 *f, int rmode) 8830 { 8831 return do_fp1_scalar_with_fpsttype(s, a, f, rmode, 8832 a->esz == MO_16 ? 8833 FPST_A64_F16 : FPST_A64); 8834 } 8835 8836 static bool do_fp1_scalar_ah(DisasContext *s, arg_rr_e *a, 8837 const FPScalar1 *f, int rmode) 8838 { 8839 return do_fp1_scalar_with_fpsttype(s, a, f, rmode, select_ah_fpst(s, a->esz)); 8840 } 8841 8842 static const FPScalar1 f_scalar_fsqrt = { 8843 gen_helper_vfp_sqrth, 8844 gen_helper_vfp_sqrts, 8845 gen_helper_vfp_sqrtd, 8846 }; 8847 TRANS(FSQRT_s, do_fp1_scalar, a, &f_scalar_fsqrt, -1) 8848 8849 static const FPScalar1 f_scalar_frint = { 8850 gen_helper_advsimd_rinth, 8851 gen_helper_rints, 8852 gen_helper_rintd, 8853 }; 8854 TRANS(FRINTN_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_TIEEVEN) 8855 TRANS(FRINTP_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_POSINF) 8856 TRANS(FRINTM_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_NEGINF) 8857 TRANS(FRINTZ_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_ZERO) 8858 TRANS(FRINTA_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_TIEAWAY) 8859 TRANS(FRINTI_s, do_fp1_scalar, a, &f_scalar_frint, -1) 8860 8861 static const FPScalar1 f_scalar_frintx = { 8862 gen_helper_advsimd_rinth_exact, 8863 gen_helper_rints_exact, 8864 gen_helper_rintd_exact, 8865 }; 8866 TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1) 8867 8868 static bool trans_BFCVT_s(DisasContext *s, arg_rr_e *a) 8869 { 8870 ARMFPStatusFlavour fpsttype = s->fpcr_ah ? FPST_AH : FPST_A64; 8871 TCGv_i32 t32; 8872 int check; 8873 8874 if (!dc_isar_feature(aa64_bf16, s)) { 8875 return false; 8876 } 8877 8878 check = fp_access_check_scalar_hsd(s, a->esz); 8879 8880 if (check <= 0) { 8881 return check == 0; 8882 } 8883 8884 t32 = read_fp_sreg(s, a->rn); 8885 gen_helper_bfcvt(t32, t32, fpstatus_ptr(fpsttype)); 8886 write_fp_hreg_merging(s, a->rd, a->rd, t32); 8887 return true; 8888 } 8889 8890 static const FPScalar1 f_scalar_frint32 = { 8891 NULL, 8892 gen_helper_frint32_s, 8893 gen_helper_frint32_d, 8894 }; 8895 TRANS_FEAT(FRINT32Z_s, aa64_frint, do_fp1_scalar, a, 8896 &f_scalar_frint32, FPROUNDING_ZERO) 8897 TRANS_FEAT(FRINT32X_s, aa64_frint, do_fp1_scalar, a, &f_scalar_frint32, -1) 8898 8899 static const FPScalar1 f_scalar_frint64 = { 8900 NULL, 8901 gen_helper_frint64_s, 8902 gen_helper_frint64_d, 8903 }; 8904 TRANS_FEAT(FRINT64Z_s, aa64_frint, do_fp1_scalar, a, 8905 &f_scalar_frint64, FPROUNDING_ZERO) 8906 TRANS_FEAT(FRINT64X_s, aa64_frint, do_fp1_scalar, a, &f_scalar_frint64, -1) 8907 8908 static const FPScalar1 f_scalar_frecpe = { 8909 gen_helper_recpe_f16, 8910 gen_helper_recpe_f32, 8911 gen_helper_recpe_f64, 8912 }; 8913 static const FPScalar1 f_scalar_frecpe_rpres = { 8914 gen_helper_recpe_f16, 8915 gen_helper_recpe_rpres_f32, 8916 gen_helper_recpe_f64, 8917 }; 8918 TRANS(FRECPE_s, do_fp1_scalar_ah, a, 8919 s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? 8920 &f_scalar_frecpe_rpres : &f_scalar_frecpe, -1) 8921 8922 static const FPScalar1 f_scalar_frecpx = { 8923 gen_helper_frecpx_f16, 8924 gen_helper_frecpx_f32, 8925 gen_helper_frecpx_f64, 8926 }; 8927 TRANS(FRECPX_s, do_fp1_scalar_ah, a, &f_scalar_frecpx, -1) 8928 8929 static const FPScalar1 f_scalar_frsqrte = { 8930 gen_helper_rsqrte_f16, 8931 gen_helper_rsqrte_f32, 8932 gen_helper_rsqrte_f64, 8933 }; 8934 static const FPScalar1 f_scalar_frsqrte_rpres = { 8935 gen_helper_rsqrte_f16, 8936 gen_helper_rsqrte_rpres_f32, 8937 gen_helper_rsqrte_f64, 8938 }; 8939 TRANS(FRSQRTE_s, do_fp1_scalar_ah, a, 8940 s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? 8941 &f_scalar_frsqrte_rpres : &f_scalar_frsqrte, -1) 8942 8943 static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a) 8944 { 8945 if (fp_access_check(s)) { 8946 TCGv_i32 tcg_rn = read_fp_sreg(s, a->rn); 8947 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 8948 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 8949 8950 gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, fpst); 8951 write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd); 8952 } 8953 return true; 8954 } 8955 8956 static bool trans_FCVT_s_hs(DisasContext *s, arg_rr *a) 8957 { 8958 if (fp_access_check(s)) { 8959 TCGv_i32 tmp = read_fp_sreg(s, a->rn); 8960 TCGv_i32 ahp = get_ahp_flag(); 8961 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 8962 8963 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp); 8964 /* write_fp_hreg_merging is OK here because top half of result is zero */ 8965 write_fp_hreg_merging(s, a->rd, a->rd, tmp); 8966 } 8967 return true; 8968 } 8969 8970 static bool trans_FCVT_s_sd(DisasContext *s, arg_rr *a) 8971 { 8972 if (fp_access_check(s)) { 8973 TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn); 8974 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 8975 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 8976 8977 gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, fpst); 8978 write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd); 8979 } 8980 return true; 8981 } 8982 8983 static bool trans_FCVT_s_hd(DisasContext *s, arg_rr *a) 8984 { 8985 if (fp_access_check(s)) { 8986 TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn); 8987 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 8988 TCGv_i32 ahp = get_ahp_flag(); 8989 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 8990 8991 gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp); 8992 /* write_fp_hreg_merging is OK here because top half of tcg_rd is zero */ 8993 write_fp_hreg_merging(s, a->rd, a->rd, tcg_rd); 8994 } 8995 return true; 8996 } 8997 8998 static bool trans_FCVT_s_sh(DisasContext *s, arg_rr *a) 8999 { 9000 if (fp_access_check(s)) { 9001 TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn); 9002 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 9003 TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16); 9004 TCGv_i32 tcg_ahp = get_ahp_flag(); 9005 9006 gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); 9007 write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd); 9008 } 9009 return true; 9010 } 9011 9012 static bool trans_FCVT_s_dh(DisasContext *s, arg_rr *a) 9013 { 9014 if (fp_access_check(s)) { 9015 TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn); 9016 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 9017 TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16); 9018 TCGv_i32 tcg_ahp = get_ahp_flag(); 9019 9020 gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); 9021 write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd); 9022 } 9023 return true; 9024 } 9025 9026 static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift, 9027 TCGv_i64 tcg_int, bool is_signed) 9028 { 9029 TCGv_ptr tcg_fpstatus; 9030 TCGv_i32 tcg_shift, tcg_single; 9031 TCGv_i64 tcg_double; 9032 9033 tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); 9034 tcg_shift = tcg_constant_i32(shift); 9035 9036 switch (esz) { 9037 case MO_64: 9038 tcg_double = tcg_temp_new_i64(); 9039 if (is_signed) { 9040 gen_helper_vfp_sqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus); 9041 } else { 9042 gen_helper_vfp_uqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus); 9043 } 9044 write_fp_dreg_merging(s, rd, rd, tcg_double); 9045 break; 9046 9047 case MO_32: 9048 tcg_single = tcg_temp_new_i32(); 9049 if (is_signed) { 9050 gen_helper_vfp_sqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); 9051 } else { 9052 gen_helper_vfp_uqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); 9053 } 9054 write_fp_sreg_merging(s, rd, rd, tcg_single); 9055 break; 9056 9057 case MO_16: 9058 tcg_single = tcg_temp_new_i32(); 9059 if (is_signed) { 9060 gen_helper_vfp_sqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); 9061 } else { 9062 gen_helper_vfp_uqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); 9063 } 9064 write_fp_hreg_merging(s, rd, rd, tcg_single); 9065 break; 9066 9067 default: 9068 g_assert_not_reached(); 9069 } 9070 return true; 9071 } 9072 9073 static bool do_cvtf_g(DisasContext *s, arg_fcvt *a, bool is_signed) 9074 { 9075 TCGv_i64 tcg_int; 9076 int check = fp_access_check_scalar_hsd(s, a->esz); 9077 9078 if (check <= 0) { 9079 return check == 0; 9080 } 9081 9082 if (a->sf) { 9083 tcg_int = cpu_reg(s, a->rn); 9084 } else { 9085 tcg_int = read_cpu_reg(s, a->rn, true); 9086 if (is_signed) { 9087 tcg_gen_ext32s_i64(tcg_int, tcg_int); 9088 } else { 9089 tcg_gen_ext32u_i64(tcg_int, tcg_int); 9090 } 9091 } 9092 return do_cvtf_scalar(s, a->esz, a->rd, a->shift, tcg_int, is_signed); 9093 } 9094 9095 TRANS(SCVTF_g, do_cvtf_g, a, true) 9096 TRANS(UCVTF_g, do_cvtf_g, a, false) 9097 9098 /* 9099 * [US]CVTF (vector), scalar version. 9100 * Which sounds weird, but really just means input from fp register 9101 * instead of input from general register. Input and output element 9102 * size are always equal. 9103 */ 9104 static bool do_cvtf_f(DisasContext *s, arg_fcvt *a, bool is_signed) 9105 { 9106 TCGv_i64 tcg_int; 9107 int check = fp_access_check_scalar_hsd(s, a->esz); 9108 9109 if (check <= 0) { 9110 return check == 0; 9111 } 9112 9113 tcg_int = tcg_temp_new_i64(); 9114 read_vec_element(s, tcg_int, a->rn, 0, a->esz | (is_signed ? MO_SIGN : 0)); 9115 return do_cvtf_scalar(s, a->esz, a->rd, a->shift, tcg_int, is_signed); 9116 } 9117 9118 TRANS(SCVTF_f, do_cvtf_f, a, true) 9119 TRANS(UCVTF_f, do_cvtf_f, a, false) 9120 9121 static void do_fcvt_scalar(DisasContext *s, MemOp out, MemOp esz, 9122 TCGv_i64 tcg_out, int shift, int rn, 9123 ARMFPRounding rmode) 9124 { 9125 TCGv_ptr tcg_fpstatus; 9126 TCGv_i32 tcg_shift, tcg_rmode, tcg_single; 9127 9128 tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); 9129 tcg_shift = tcg_constant_i32(shift); 9130 tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); 9131 9132 switch (esz) { 9133 case MO_64: 9134 read_vec_element(s, tcg_out, rn, 0, MO_64); 9135 switch (out) { 9136 case MO_64 | MO_SIGN: 9137 gen_helper_vfp_tosqd(tcg_out, tcg_out, tcg_shift, tcg_fpstatus); 9138 break; 9139 case MO_64: 9140 gen_helper_vfp_touqd(tcg_out, tcg_out, tcg_shift, tcg_fpstatus); 9141 break; 9142 case MO_32 | MO_SIGN: 9143 gen_helper_vfp_tosld(tcg_out, tcg_out, tcg_shift, tcg_fpstatus); 9144 break; 9145 case MO_32: 9146 gen_helper_vfp_tould(tcg_out, tcg_out, tcg_shift, tcg_fpstatus); 9147 break; 9148 default: 9149 g_assert_not_reached(); 9150 } 9151 break; 9152 9153 case MO_32: 9154 tcg_single = read_fp_sreg(s, rn); 9155 switch (out) { 9156 case MO_64 | MO_SIGN: 9157 gen_helper_vfp_tosqs(tcg_out, tcg_single, tcg_shift, tcg_fpstatus); 9158 break; 9159 case MO_64: 9160 gen_helper_vfp_touqs(tcg_out, tcg_single, tcg_shift, tcg_fpstatus); 9161 break; 9162 case MO_32 | MO_SIGN: 9163 gen_helper_vfp_tosls(tcg_single, tcg_single, 9164 tcg_shift, tcg_fpstatus); 9165 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 9166 break; 9167 case MO_32: 9168 gen_helper_vfp_touls(tcg_single, tcg_single, 9169 tcg_shift, tcg_fpstatus); 9170 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 9171 break; 9172 default: 9173 g_assert_not_reached(); 9174 } 9175 break; 9176 9177 case MO_16: 9178 tcg_single = read_fp_hreg(s, rn); 9179 switch (out) { 9180 case MO_64 | MO_SIGN: 9181 gen_helper_vfp_tosqh(tcg_out, tcg_single, tcg_shift, tcg_fpstatus); 9182 break; 9183 case MO_64: 9184 gen_helper_vfp_touqh(tcg_out, tcg_single, tcg_shift, tcg_fpstatus); 9185 break; 9186 case MO_32 | MO_SIGN: 9187 gen_helper_vfp_toslh(tcg_single, tcg_single, 9188 tcg_shift, tcg_fpstatus); 9189 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 9190 break; 9191 case MO_32: 9192 gen_helper_vfp_toulh(tcg_single, tcg_single, 9193 tcg_shift, tcg_fpstatus); 9194 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 9195 break; 9196 case MO_16 | MO_SIGN: 9197 gen_helper_vfp_toshh(tcg_single, tcg_single, 9198 tcg_shift, tcg_fpstatus); 9199 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 9200 break; 9201 case MO_16: 9202 gen_helper_vfp_touhh(tcg_single, tcg_single, 9203 tcg_shift, tcg_fpstatus); 9204 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 9205 break; 9206 default: 9207 g_assert_not_reached(); 9208 } 9209 break; 9210 9211 default: 9212 g_assert_not_reached(); 9213 } 9214 9215 gen_restore_rmode(tcg_rmode, tcg_fpstatus); 9216 } 9217 9218 static bool do_fcvt_g(DisasContext *s, arg_fcvt *a, 9219 ARMFPRounding rmode, bool is_signed) 9220 { 9221 TCGv_i64 tcg_int; 9222 int check = fp_access_check_scalar_hsd(s, a->esz); 9223 9224 if (check <= 0) { 9225 return check == 0; 9226 } 9227 9228 tcg_int = cpu_reg(s, a->rd); 9229 do_fcvt_scalar(s, (a->sf ? MO_64 : MO_32) | (is_signed ? MO_SIGN : 0), 9230 a->esz, tcg_int, a->shift, a->rn, rmode); 9231 9232 if (!a->sf) { 9233 tcg_gen_ext32u_i64(tcg_int, tcg_int); 9234 } 9235 return true; 9236 } 9237 9238 TRANS(FCVTNS_g, do_fcvt_g, a, FPROUNDING_TIEEVEN, true) 9239 TRANS(FCVTNU_g, do_fcvt_g, a, FPROUNDING_TIEEVEN, false) 9240 TRANS(FCVTPS_g, do_fcvt_g, a, FPROUNDING_POSINF, true) 9241 TRANS(FCVTPU_g, do_fcvt_g, a, FPROUNDING_POSINF, false) 9242 TRANS(FCVTMS_g, do_fcvt_g, a, FPROUNDING_NEGINF, true) 9243 TRANS(FCVTMU_g, do_fcvt_g, a, FPROUNDING_NEGINF, false) 9244 TRANS(FCVTZS_g, do_fcvt_g, a, FPROUNDING_ZERO, true) 9245 TRANS(FCVTZU_g, do_fcvt_g, a, FPROUNDING_ZERO, false) 9246 TRANS(FCVTAS_g, do_fcvt_g, a, FPROUNDING_TIEAWAY, true) 9247 TRANS(FCVTAU_g, do_fcvt_g, a, FPROUNDING_TIEAWAY, false) 9248 9249 /* 9250 * FCVT* (vector), scalar version. 9251 * Which sounds weird, but really just means output to fp register 9252 * instead of output to general register. Input and output element 9253 * size are always equal. 9254 */ 9255 static bool do_fcvt_f(DisasContext *s, arg_fcvt *a, 9256 ARMFPRounding rmode, bool is_signed) 9257 { 9258 TCGv_i64 tcg_int; 9259 int check = fp_access_check_scalar_hsd(s, a->esz); 9260 9261 if (check <= 0) { 9262 return check == 0; 9263 } 9264 9265 tcg_int = tcg_temp_new_i64(); 9266 do_fcvt_scalar(s, a->esz | (is_signed ? MO_SIGN : 0), 9267 a->esz, tcg_int, a->shift, a->rn, rmode); 9268 9269 if (!s->fpcr_nep) { 9270 clear_vec(s, a->rd); 9271 } 9272 write_vec_element(s, tcg_int, a->rd, 0, a->esz); 9273 return true; 9274 } 9275 9276 TRANS(FCVTNS_f, do_fcvt_f, a, FPROUNDING_TIEEVEN, true) 9277 TRANS(FCVTNU_f, do_fcvt_f, a, FPROUNDING_TIEEVEN, false) 9278 TRANS(FCVTPS_f, do_fcvt_f, a, FPROUNDING_POSINF, true) 9279 TRANS(FCVTPU_f, do_fcvt_f, a, FPROUNDING_POSINF, false) 9280 TRANS(FCVTMS_f, do_fcvt_f, a, FPROUNDING_NEGINF, true) 9281 TRANS(FCVTMU_f, do_fcvt_f, a, FPROUNDING_NEGINF, false) 9282 TRANS(FCVTZS_f, do_fcvt_f, a, FPROUNDING_ZERO, true) 9283 TRANS(FCVTZU_f, do_fcvt_f, a, FPROUNDING_ZERO, false) 9284 TRANS(FCVTAS_f, do_fcvt_f, a, FPROUNDING_TIEAWAY, true) 9285 TRANS(FCVTAU_f, do_fcvt_f, a, FPROUNDING_TIEAWAY, false) 9286 9287 static bool trans_FJCVTZS(DisasContext *s, arg_FJCVTZS *a) 9288 { 9289 if (!dc_isar_feature(aa64_jscvt, s)) { 9290 return false; 9291 } 9292 if (fp_access_check(s)) { 9293 TCGv_i64 t = read_fp_dreg(s, a->rn); 9294 TCGv_ptr fpstatus = fpstatus_ptr(FPST_A64); 9295 9296 gen_helper_fjcvtzs(t, t, fpstatus); 9297 9298 tcg_gen_ext32u_i64(cpu_reg(s, a->rd), t); 9299 tcg_gen_extrh_i64_i32(cpu_ZF, t); 9300 tcg_gen_movi_i32(cpu_CF, 0); 9301 tcg_gen_movi_i32(cpu_NF, 0); 9302 tcg_gen_movi_i32(cpu_VF, 0); 9303 } 9304 return true; 9305 } 9306 9307 static bool trans_FMOV_hx(DisasContext *s, arg_rr *a) 9308 { 9309 if (!dc_isar_feature(aa64_fp16, s)) { 9310 return false; 9311 } 9312 if (fp_access_check(s)) { 9313 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 9314 TCGv_i64 tmp = tcg_temp_new_i64(); 9315 tcg_gen_ext16u_i64(tmp, tcg_rn); 9316 write_fp_dreg(s, a->rd, tmp); 9317 } 9318 return true; 9319 } 9320 9321 static bool trans_FMOV_sw(DisasContext *s, arg_rr *a) 9322 { 9323 if (fp_access_check(s)) { 9324 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 9325 TCGv_i64 tmp = tcg_temp_new_i64(); 9326 tcg_gen_ext32u_i64(tmp, tcg_rn); 9327 write_fp_dreg(s, a->rd, tmp); 9328 } 9329 return true; 9330 } 9331 9332 static bool trans_FMOV_dx(DisasContext *s, arg_rr *a) 9333 { 9334 if (fp_access_check(s)) { 9335 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 9336 write_fp_dreg(s, a->rd, tcg_rn); 9337 } 9338 return true; 9339 } 9340 9341 static bool trans_FMOV_ux(DisasContext *s, arg_rr *a) 9342 { 9343 if (fp_access_check(s)) { 9344 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 9345 tcg_gen_st_i64(tcg_rn, tcg_env, fp_reg_hi_offset(s, a->rd)); 9346 clear_vec_high(s, true, a->rd); 9347 } 9348 return true; 9349 } 9350 9351 static bool trans_FMOV_xh(DisasContext *s, arg_rr *a) 9352 { 9353 if (!dc_isar_feature(aa64_fp16, s)) { 9354 return false; 9355 } 9356 if (fp_access_check(s)) { 9357 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 9358 tcg_gen_ld16u_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_16)); 9359 } 9360 return true; 9361 } 9362 9363 static bool trans_FMOV_ws(DisasContext *s, arg_rr *a) 9364 { 9365 if (fp_access_check(s)) { 9366 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 9367 tcg_gen_ld32u_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_32)); 9368 } 9369 return true; 9370 } 9371 9372 static bool trans_FMOV_xd(DisasContext *s, arg_rr *a) 9373 { 9374 if (fp_access_check(s)) { 9375 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 9376 tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_64)); 9377 } 9378 return true; 9379 } 9380 9381 static bool trans_FMOV_xu(DisasContext *s, arg_rr *a) 9382 { 9383 if (fp_access_check(s)) { 9384 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 9385 tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_hi_offset(s, a->rn)); 9386 } 9387 return true; 9388 } 9389 9390 typedef struct ENVScalar1 { 9391 NeonGenOneOpEnvFn *gen_bhs[3]; 9392 NeonGenOne64OpEnvFn *gen_d; 9393 } ENVScalar1; 9394 9395 static bool do_env_scalar1(DisasContext *s, arg_rr_e *a, const ENVScalar1 *f) 9396 { 9397 if (!fp_access_check(s)) { 9398 return true; 9399 } 9400 if (a->esz == MO_64) { 9401 TCGv_i64 t = read_fp_dreg(s, a->rn); 9402 f->gen_d(t, tcg_env, t); 9403 write_fp_dreg(s, a->rd, t); 9404 } else { 9405 TCGv_i32 t = tcg_temp_new_i32(); 9406 9407 read_vec_element_i32(s, t, a->rn, 0, a->esz); 9408 f->gen_bhs[a->esz](t, tcg_env, t); 9409 write_fp_sreg(s, a->rd, t); 9410 } 9411 return true; 9412 } 9413 9414 static bool do_env_vector1(DisasContext *s, arg_qrr_e *a, const ENVScalar1 *f) 9415 { 9416 if (a->esz == MO_64 && !a->q) { 9417 return false; 9418 } 9419 if (!fp_access_check(s)) { 9420 return true; 9421 } 9422 if (a->esz == MO_64) { 9423 TCGv_i64 t = tcg_temp_new_i64(); 9424 9425 for (int i = 0; i < 2; ++i) { 9426 read_vec_element(s, t, a->rn, i, MO_64); 9427 f->gen_d(t, tcg_env, t); 9428 write_vec_element(s, t, a->rd, i, MO_64); 9429 } 9430 } else { 9431 TCGv_i32 t = tcg_temp_new_i32(); 9432 int n = (a->q ? 16 : 8) >> a->esz; 9433 9434 for (int i = 0; i < n; ++i) { 9435 read_vec_element_i32(s, t, a->rn, i, a->esz); 9436 f->gen_bhs[a->esz](t, tcg_env, t); 9437 write_vec_element_i32(s, t, a->rd, i, a->esz); 9438 } 9439 } 9440 clear_vec_high(s, a->q, a->rd); 9441 return true; 9442 } 9443 9444 static const ENVScalar1 f_scalar_sqabs = { 9445 { gen_helper_neon_qabs_s8, 9446 gen_helper_neon_qabs_s16, 9447 gen_helper_neon_qabs_s32 }, 9448 gen_helper_neon_qabs_s64, 9449 }; 9450 TRANS(SQABS_s, do_env_scalar1, a, &f_scalar_sqabs) 9451 TRANS(SQABS_v, do_env_vector1, a, &f_scalar_sqabs) 9452 9453 static const ENVScalar1 f_scalar_sqneg = { 9454 { gen_helper_neon_qneg_s8, 9455 gen_helper_neon_qneg_s16, 9456 gen_helper_neon_qneg_s32 }, 9457 gen_helper_neon_qneg_s64, 9458 }; 9459 TRANS(SQNEG_s, do_env_scalar1, a, &f_scalar_sqneg) 9460 TRANS(SQNEG_v, do_env_vector1, a, &f_scalar_sqneg) 9461 9462 static bool do_scalar1_d(DisasContext *s, arg_rr *a, ArithOneOp *f) 9463 { 9464 if (fp_access_check(s)) { 9465 TCGv_i64 t = read_fp_dreg(s, a->rn); 9466 f(t, t); 9467 write_fp_dreg(s, a->rd, t); 9468 } 9469 return true; 9470 } 9471 9472 TRANS(ABS_s, do_scalar1_d, a, tcg_gen_abs_i64) 9473 TRANS(NEG_s, do_scalar1_d, a, tcg_gen_neg_i64) 9474 9475 static bool do_cmop0_d(DisasContext *s, arg_rr *a, TCGCond cond) 9476 { 9477 if (fp_access_check(s)) { 9478 TCGv_i64 t = read_fp_dreg(s, a->rn); 9479 tcg_gen_negsetcond_i64(cond, t, t, tcg_constant_i64(0)); 9480 write_fp_dreg(s, a->rd, t); 9481 } 9482 return true; 9483 } 9484 9485 TRANS(CMGT0_s, do_cmop0_d, a, TCG_COND_GT) 9486 TRANS(CMGE0_s, do_cmop0_d, a, TCG_COND_GE) 9487 TRANS(CMLE0_s, do_cmop0_d, a, TCG_COND_LE) 9488 TRANS(CMLT0_s, do_cmop0_d, a, TCG_COND_LT) 9489 TRANS(CMEQ0_s, do_cmop0_d, a, TCG_COND_EQ) 9490 9491 static bool do_2misc_narrow_scalar(DisasContext *s, arg_rr_e *a, 9492 ArithOneOp * const fn[3]) 9493 { 9494 if (a->esz == MO_64) { 9495 return false; 9496 } 9497 if (fp_access_check(s)) { 9498 TCGv_i64 t = tcg_temp_new_i64(); 9499 9500 read_vec_element(s, t, a->rn, 0, a->esz + 1); 9501 fn[a->esz](t, t); 9502 clear_vec(s, a->rd); 9503 write_vec_element(s, t, a->rd, 0, a->esz); 9504 } 9505 return true; 9506 } 9507 9508 #define WRAP_ENV(NAME) \ 9509 static void gen_##NAME(TCGv_i64 d, TCGv_i64 n) \ 9510 { gen_helper_##NAME(d, tcg_env, n); } 9511 9512 WRAP_ENV(neon_unarrow_sat8) 9513 WRAP_ENV(neon_unarrow_sat16) 9514 WRAP_ENV(neon_unarrow_sat32) 9515 9516 static ArithOneOp * const f_scalar_sqxtun[] = { 9517 gen_neon_unarrow_sat8, 9518 gen_neon_unarrow_sat16, 9519 gen_neon_unarrow_sat32, 9520 }; 9521 TRANS(SQXTUN_s, do_2misc_narrow_scalar, a, f_scalar_sqxtun) 9522 9523 WRAP_ENV(neon_narrow_sat_s8) 9524 WRAP_ENV(neon_narrow_sat_s16) 9525 WRAP_ENV(neon_narrow_sat_s32) 9526 9527 static ArithOneOp * const f_scalar_sqxtn[] = { 9528 gen_neon_narrow_sat_s8, 9529 gen_neon_narrow_sat_s16, 9530 gen_neon_narrow_sat_s32, 9531 }; 9532 TRANS(SQXTN_s, do_2misc_narrow_scalar, a, f_scalar_sqxtn) 9533 9534 WRAP_ENV(neon_narrow_sat_u8) 9535 WRAP_ENV(neon_narrow_sat_u16) 9536 WRAP_ENV(neon_narrow_sat_u32) 9537 9538 static ArithOneOp * const f_scalar_uqxtn[] = { 9539 gen_neon_narrow_sat_u8, 9540 gen_neon_narrow_sat_u16, 9541 gen_neon_narrow_sat_u32, 9542 }; 9543 TRANS(UQXTN_s, do_2misc_narrow_scalar, a, f_scalar_uqxtn) 9544 9545 static bool trans_FCVTXN_s(DisasContext *s, arg_rr_e *a) 9546 { 9547 if (fp_access_check(s)) { 9548 /* 9549 * 64 bit to 32 bit float conversion 9550 * with von Neumann rounding (round to odd) 9551 */ 9552 TCGv_i64 src = read_fp_dreg(s, a->rn); 9553 TCGv_i32 dst = tcg_temp_new_i32(); 9554 gen_helper_fcvtx_f64_to_f32(dst, src, fpstatus_ptr(FPST_A64)); 9555 write_fp_sreg_merging(s, a->rd, a->rd, dst); 9556 } 9557 return true; 9558 } 9559 9560 #undef WRAP_ENV 9561 9562 static bool do_gvec_fn2(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn) 9563 { 9564 if (!a->q && a->esz == MO_64) { 9565 return false; 9566 } 9567 if (fp_access_check(s)) { 9568 gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz); 9569 } 9570 return true; 9571 } 9572 9573 TRANS(ABS_v, do_gvec_fn2, a, tcg_gen_gvec_abs) 9574 TRANS(NEG_v, do_gvec_fn2, a, tcg_gen_gvec_neg) 9575 TRANS(NOT_v, do_gvec_fn2, a, tcg_gen_gvec_not) 9576 TRANS(CNT_v, do_gvec_fn2, a, gen_gvec_cnt) 9577 TRANS(RBIT_v, do_gvec_fn2, a, gen_gvec_rbit) 9578 TRANS(CMGT0_v, do_gvec_fn2, a, gen_gvec_cgt0) 9579 TRANS(CMGE0_v, do_gvec_fn2, a, gen_gvec_cge0) 9580 TRANS(CMLT0_v, do_gvec_fn2, a, gen_gvec_clt0) 9581 TRANS(CMLE0_v, do_gvec_fn2, a, gen_gvec_cle0) 9582 TRANS(CMEQ0_v, do_gvec_fn2, a, gen_gvec_ceq0) 9583 TRANS(REV16_v, do_gvec_fn2, a, gen_gvec_rev16) 9584 TRANS(REV32_v, do_gvec_fn2, a, gen_gvec_rev32) 9585 TRANS(URECPE_v, do_gvec_fn2, a, gen_gvec_urecpe) 9586 TRANS(URSQRTE_v, do_gvec_fn2, a, gen_gvec_ursqrte) 9587 9588 static bool do_gvec_fn2_bhs(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn) 9589 { 9590 if (a->esz == MO_64) { 9591 return false; 9592 } 9593 if (fp_access_check(s)) { 9594 gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz); 9595 } 9596 return true; 9597 } 9598 9599 TRANS(CLS_v, do_gvec_fn2_bhs, a, gen_gvec_cls) 9600 TRANS(CLZ_v, do_gvec_fn2_bhs, a, gen_gvec_clz) 9601 TRANS(REV64_v, do_gvec_fn2_bhs, a, gen_gvec_rev64) 9602 TRANS(SADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_saddlp) 9603 TRANS(UADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_uaddlp) 9604 TRANS(SADALP_v, do_gvec_fn2_bhs, a, gen_gvec_sadalp) 9605 TRANS(UADALP_v, do_gvec_fn2_bhs, a, gen_gvec_uadalp) 9606 9607 static bool do_2misc_narrow_vector(DisasContext *s, arg_qrr_e *a, 9608 ArithOneOp * const fn[3]) 9609 { 9610 if (a->esz == MO_64) { 9611 return false; 9612 } 9613 if (fp_access_check(s)) { 9614 TCGv_i64 t0 = tcg_temp_new_i64(); 9615 TCGv_i64 t1 = tcg_temp_new_i64(); 9616 9617 read_vec_element(s, t0, a->rn, 0, MO_64); 9618 read_vec_element(s, t1, a->rn, 1, MO_64); 9619 fn[a->esz](t0, t0); 9620 fn[a->esz](t1, t1); 9621 write_vec_element(s, t0, a->rd, a->q ? 2 : 0, MO_32); 9622 write_vec_element(s, t1, a->rd, a->q ? 3 : 1, MO_32); 9623 clear_vec_high(s, a->q, a->rd); 9624 } 9625 return true; 9626 } 9627 9628 static ArithOneOp * const f_scalar_xtn[] = { 9629 gen_helper_neon_narrow_u8, 9630 gen_helper_neon_narrow_u16, 9631 tcg_gen_ext32u_i64, 9632 }; 9633 TRANS(XTN, do_2misc_narrow_vector, a, f_scalar_xtn) 9634 TRANS(SQXTUN_v, do_2misc_narrow_vector, a, f_scalar_sqxtun) 9635 TRANS(SQXTN_v, do_2misc_narrow_vector, a, f_scalar_sqxtn) 9636 TRANS(UQXTN_v, do_2misc_narrow_vector, a, f_scalar_uqxtn) 9637 9638 static void gen_fcvtn_hs(TCGv_i64 d, TCGv_i64 n) 9639 { 9640 TCGv_i32 tcg_lo = tcg_temp_new_i32(); 9641 TCGv_i32 tcg_hi = tcg_temp_new_i32(); 9642 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 9643 TCGv_i32 ahp = get_ahp_flag(); 9644 9645 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, n); 9646 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp); 9647 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp); 9648 tcg_gen_deposit_i32(tcg_lo, tcg_lo, tcg_hi, 16, 16); 9649 tcg_gen_extu_i32_i64(d, tcg_lo); 9650 } 9651 9652 static void gen_fcvtn_sd(TCGv_i64 d, TCGv_i64 n) 9653 { 9654 TCGv_i32 tmp = tcg_temp_new_i32(); 9655 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 9656 9657 gen_helper_vfp_fcvtsd(tmp, n, fpst); 9658 tcg_gen_extu_i32_i64(d, tmp); 9659 } 9660 9661 static void gen_fcvtxn_sd(TCGv_i64 d, TCGv_i64 n) 9662 { 9663 /* 9664 * 64 bit to 32 bit float conversion 9665 * with von Neumann rounding (round to odd) 9666 */ 9667 TCGv_i32 tmp = tcg_temp_new_i32(); 9668 gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_A64)); 9669 tcg_gen_extu_i32_i64(d, tmp); 9670 } 9671 9672 static ArithOneOp * const f_vector_fcvtn[] = { 9673 NULL, 9674 gen_fcvtn_hs, 9675 gen_fcvtn_sd, 9676 }; 9677 static ArithOneOp * const f_scalar_fcvtxn[] = { 9678 NULL, 9679 NULL, 9680 gen_fcvtxn_sd, 9681 }; 9682 TRANS(FCVTN_v, do_2misc_narrow_vector, a, f_vector_fcvtn) 9683 TRANS(FCVTXN_v, do_2misc_narrow_vector, a, f_scalar_fcvtxn) 9684 9685 static void gen_bfcvtn_hs(TCGv_i64 d, TCGv_i64 n) 9686 { 9687 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 9688 TCGv_i32 tmp = tcg_temp_new_i32(); 9689 gen_helper_bfcvt_pair(tmp, n, fpst); 9690 tcg_gen_extu_i32_i64(d, tmp); 9691 } 9692 9693 static void gen_bfcvtn_ah_hs(TCGv_i64 d, TCGv_i64 n) 9694 { 9695 TCGv_ptr fpst = fpstatus_ptr(FPST_AH); 9696 TCGv_i32 tmp = tcg_temp_new_i32(); 9697 gen_helper_bfcvt_pair(tmp, n, fpst); 9698 tcg_gen_extu_i32_i64(d, tmp); 9699 } 9700 9701 static ArithOneOp * const f_vector_bfcvtn[2][3] = { 9702 { 9703 NULL, 9704 gen_bfcvtn_hs, 9705 NULL, 9706 }, { 9707 NULL, 9708 gen_bfcvtn_ah_hs, 9709 NULL, 9710 } 9711 }; 9712 TRANS_FEAT(BFCVTN_v, aa64_bf16, do_2misc_narrow_vector, a, 9713 f_vector_bfcvtn[s->fpcr_ah]) 9714 9715 static bool trans_SHLL_v(DisasContext *s, arg_qrr_e *a) 9716 { 9717 static NeonGenWidenFn * const widenfns[3] = { 9718 gen_helper_neon_widen_u8, 9719 gen_helper_neon_widen_u16, 9720 tcg_gen_extu_i32_i64, 9721 }; 9722 NeonGenWidenFn *widenfn; 9723 TCGv_i64 tcg_res[2]; 9724 TCGv_i32 tcg_op; 9725 int part, pass; 9726 9727 if (a->esz == MO_64) { 9728 return false; 9729 } 9730 if (!fp_access_check(s)) { 9731 return true; 9732 } 9733 9734 tcg_op = tcg_temp_new_i32(); 9735 widenfn = widenfns[a->esz]; 9736 part = a->q ? 2 : 0; 9737 9738 for (pass = 0; pass < 2; pass++) { 9739 read_vec_element_i32(s, tcg_op, a->rn, part + pass, MO_32); 9740 tcg_res[pass] = tcg_temp_new_i64(); 9741 widenfn(tcg_res[pass], tcg_op); 9742 tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << a->esz); 9743 } 9744 9745 for (pass = 0; pass < 2; pass++) { 9746 write_vec_element(s, tcg_res[pass], a->rd, pass, MO_64); 9747 } 9748 return true; 9749 } 9750 9751 static bool do_fabs_fneg_v(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn) 9752 { 9753 int check = fp_access_check_vector_hsd(s, a->q, a->esz); 9754 9755 if (check <= 0) { 9756 return check == 0; 9757 } 9758 9759 gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz); 9760 return true; 9761 } 9762 9763 TRANS(FABS_v, do_fabs_fneg_v, a, gen_gvec_fabs) 9764 TRANS(FNEG_v, do_fabs_fneg_v, a, gen_gvec_fneg) 9765 9766 static bool do_fp1_vector(DisasContext *s, arg_qrr_e *a, 9767 const FPScalar1 *f, int rmode) 9768 { 9769 TCGv_i32 tcg_rmode = NULL; 9770 TCGv_ptr fpst; 9771 int check = fp_access_check_vector_hsd(s, a->q, a->esz); 9772 9773 if (check <= 0) { 9774 return check == 0; 9775 } 9776 9777 fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); 9778 if (rmode >= 0) { 9779 tcg_rmode = gen_set_rmode(rmode, fpst); 9780 } 9781 9782 if (a->esz == MO_64) { 9783 TCGv_i64 t64 = tcg_temp_new_i64(); 9784 9785 for (int pass = 0; pass < 2; ++pass) { 9786 read_vec_element(s, t64, a->rn, pass, MO_64); 9787 f->gen_d(t64, t64, fpst); 9788 write_vec_element(s, t64, a->rd, pass, MO_64); 9789 } 9790 } else { 9791 TCGv_i32 t32 = tcg_temp_new_i32(); 9792 void (*gen)(TCGv_i32, TCGv_i32, TCGv_ptr) 9793 = (a->esz == MO_16 ? f->gen_h : f->gen_s); 9794 9795 for (int pass = 0, n = (a->q ? 16 : 8) >> a->esz; pass < n; ++pass) { 9796 read_vec_element_i32(s, t32, a->rn, pass, a->esz); 9797 gen(t32, t32, fpst); 9798 write_vec_element_i32(s, t32, a->rd, pass, a->esz); 9799 } 9800 } 9801 clear_vec_high(s, a->q, a->rd); 9802 9803 if (rmode >= 0) { 9804 gen_restore_rmode(tcg_rmode, fpst); 9805 } 9806 return true; 9807 } 9808 9809 TRANS(FSQRT_v, do_fp1_vector, a, &f_scalar_fsqrt, -1) 9810 9811 TRANS(FRINTN_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_TIEEVEN) 9812 TRANS(FRINTP_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_POSINF) 9813 TRANS(FRINTM_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_NEGINF) 9814 TRANS(FRINTZ_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_ZERO) 9815 TRANS(FRINTA_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_TIEAWAY) 9816 TRANS(FRINTI_v, do_fp1_vector, a, &f_scalar_frint, -1) 9817 TRANS(FRINTX_v, do_fp1_vector, a, &f_scalar_frintx, -1) 9818 9819 TRANS_FEAT(FRINT32Z_v, aa64_frint, do_fp1_vector, a, 9820 &f_scalar_frint32, FPROUNDING_ZERO) 9821 TRANS_FEAT(FRINT32X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint32, -1) 9822 TRANS_FEAT(FRINT64Z_v, aa64_frint, do_fp1_vector, a, 9823 &f_scalar_frint64, FPROUNDING_ZERO) 9824 TRANS_FEAT(FRINT64X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint64, -1) 9825 9826 static bool do_gvec_op2_fpst_with_fpsttype(DisasContext *s, MemOp esz, 9827 bool is_q, int rd, int rn, int data, 9828 gen_helper_gvec_2_ptr * const fns[3], 9829 ARMFPStatusFlavour fpsttype) 9830 { 9831 int check = fp_access_check_vector_hsd(s, is_q, esz); 9832 TCGv_ptr fpst; 9833 9834 if (check <= 0) { 9835 return check == 0; 9836 } 9837 9838 fpst = fpstatus_ptr(fpsttype); 9839 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd), 9840 vec_full_reg_offset(s, rn), fpst, 9841 is_q ? 16 : 8, vec_full_reg_size(s), 9842 data, fns[esz - 1]); 9843 return true; 9844 } 9845 9846 static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q, 9847 int rd, int rn, int data, 9848 gen_helper_gvec_2_ptr * const fns[3]) 9849 { 9850 return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data, fns, 9851 esz == MO_16 ? FPST_A64_F16 : 9852 FPST_A64); 9853 } 9854 9855 static bool do_gvec_op2_ah_fpst(DisasContext *s, MemOp esz, bool is_q, 9856 int rd, int rn, int data, 9857 gen_helper_gvec_2_ptr * const fns[3]) 9858 { 9859 return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data, 9860 fns, select_ah_fpst(s, esz)); 9861 } 9862 9863 static gen_helper_gvec_2_ptr * const f_scvtf_v[] = { 9864 gen_helper_gvec_vcvt_sh, 9865 gen_helper_gvec_vcvt_sf, 9866 gen_helper_gvec_vcvt_sd, 9867 }; 9868 TRANS(SCVTF_vi, do_gvec_op2_fpst, 9869 a->esz, a->q, a->rd, a->rn, 0, f_scvtf_v) 9870 TRANS(SCVTF_vf, do_gvec_op2_fpst, 9871 a->esz, a->q, a->rd, a->rn, a->shift, f_scvtf_v) 9872 9873 static gen_helper_gvec_2_ptr * const f_ucvtf_v[] = { 9874 gen_helper_gvec_vcvt_uh, 9875 gen_helper_gvec_vcvt_uf, 9876 gen_helper_gvec_vcvt_ud, 9877 }; 9878 TRANS(UCVTF_vi, do_gvec_op2_fpst, 9879 a->esz, a->q, a->rd, a->rn, 0, f_ucvtf_v) 9880 TRANS(UCVTF_vf, do_gvec_op2_fpst, 9881 a->esz, a->q, a->rd, a->rn, a->shift, f_ucvtf_v) 9882 9883 static gen_helper_gvec_2_ptr * const f_fcvtzs_vf[] = { 9884 gen_helper_gvec_vcvt_rz_hs, 9885 gen_helper_gvec_vcvt_rz_fs, 9886 gen_helper_gvec_vcvt_rz_ds, 9887 }; 9888 TRANS(FCVTZS_vf, do_gvec_op2_fpst, 9889 a->esz, a->q, a->rd, a->rn, a->shift, f_fcvtzs_vf) 9890 9891 static gen_helper_gvec_2_ptr * const f_fcvtzu_vf[] = { 9892 gen_helper_gvec_vcvt_rz_hu, 9893 gen_helper_gvec_vcvt_rz_fu, 9894 gen_helper_gvec_vcvt_rz_du, 9895 }; 9896 TRANS(FCVTZU_vf, do_gvec_op2_fpst, 9897 a->esz, a->q, a->rd, a->rn, a->shift, f_fcvtzu_vf) 9898 9899 static gen_helper_gvec_2_ptr * const f_fcvt_s_vi[] = { 9900 gen_helper_gvec_vcvt_rm_sh, 9901 gen_helper_gvec_vcvt_rm_ss, 9902 gen_helper_gvec_vcvt_rm_sd, 9903 }; 9904 9905 static gen_helper_gvec_2_ptr * const f_fcvt_u_vi[] = { 9906 gen_helper_gvec_vcvt_rm_uh, 9907 gen_helper_gvec_vcvt_rm_us, 9908 gen_helper_gvec_vcvt_rm_ud, 9909 }; 9910 9911 TRANS(FCVTNS_vi, do_gvec_op2_fpst, 9912 a->esz, a->q, a->rd, a->rn, float_round_nearest_even, f_fcvt_s_vi) 9913 TRANS(FCVTNU_vi, do_gvec_op2_fpst, 9914 a->esz, a->q, a->rd, a->rn, float_round_nearest_even, f_fcvt_u_vi) 9915 TRANS(FCVTPS_vi, do_gvec_op2_fpst, 9916 a->esz, a->q, a->rd, a->rn, float_round_up, f_fcvt_s_vi) 9917 TRANS(FCVTPU_vi, do_gvec_op2_fpst, 9918 a->esz, a->q, a->rd, a->rn, float_round_up, f_fcvt_u_vi) 9919 TRANS(FCVTMS_vi, do_gvec_op2_fpst, 9920 a->esz, a->q, a->rd, a->rn, float_round_down, f_fcvt_s_vi) 9921 TRANS(FCVTMU_vi, do_gvec_op2_fpst, 9922 a->esz, a->q, a->rd, a->rn, float_round_down, f_fcvt_u_vi) 9923 TRANS(FCVTZS_vi, do_gvec_op2_fpst, 9924 a->esz, a->q, a->rd, a->rn, float_round_to_zero, f_fcvt_s_vi) 9925 TRANS(FCVTZU_vi, do_gvec_op2_fpst, 9926 a->esz, a->q, a->rd, a->rn, float_round_to_zero, f_fcvt_u_vi) 9927 TRANS(FCVTAS_vi, do_gvec_op2_fpst, 9928 a->esz, a->q, a->rd, a->rn, float_round_ties_away, f_fcvt_s_vi) 9929 TRANS(FCVTAU_vi, do_gvec_op2_fpst, 9930 a->esz, a->q, a->rd, a->rn, float_round_ties_away, f_fcvt_u_vi) 9931 9932 static gen_helper_gvec_2_ptr * const f_fceq0[] = { 9933 gen_helper_gvec_fceq0_h, 9934 gen_helper_gvec_fceq0_s, 9935 gen_helper_gvec_fceq0_d, 9936 }; 9937 TRANS(FCMEQ0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fceq0) 9938 9939 static gen_helper_gvec_2_ptr * const f_fcgt0[] = { 9940 gen_helper_gvec_fcgt0_h, 9941 gen_helper_gvec_fcgt0_s, 9942 gen_helper_gvec_fcgt0_d, 9943 }; 9944 TRANS(FCMGT0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcgt0) 9945 9946 static gen_helper_gvec_2_ptr * const f_fcge0[] = { 9947 gen_helper_gvec_fcge0_h, 9948 gen_helper_gvec_fcge0_s, 9949 gen_helper_gvec_fcge0_d, 9950 }; 9951 TRANS(FCMGE0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcge0) 9952 9953 static gen_helper_gvec_2_ptr * const f_fclt0[] = { 9954 gen_helper_gvec_fclt0_h, 9955 gen_helper_gvec_fclt0_s, 9956 gen_helper_gvec_fclt0_d, 9957 }; 9958 TRANS(FCMLT0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fclt0) 9959 9960 static gen_helper_gvec_2_ptr * const f_fcle0[] = { 9961 gen_helper_gvec_fcle0_h, 9962 gen_helper_gvec_fcle0_s, 9963 gen_helper_gvec_fcle0_d, 9964 }; 9965 TRANS(FCMLE0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcle0) 9966 9967 static gen_helper_gvec_2_ptr * const f_frecpe[] = { 9968 gen_helper_gvec_frecpe_h, 9969 gen_helper_gvec_frecpe_s, 9970 gen_helper_gvec_frecpe_d, 9971 }; 9972 static gen_helper_gvec_2_ptr * const f_frecpe_rpres[] = { 9973 gen_helper_gvec_frecpe_h, 9974 gen_helper_gvec_frecpe_rpres_s, 9975 gen_helper_gvec_frecpe_d, 9976 }; 9977 TRANS(FRECPE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, 9978 s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frecpe_rpres : f_frecpe) 9979 9980 static gen_helper_gvec_2_ptr * const f_frsqrte[] = { 9981 gen_helper_gvec_frsqrte_h, 9982 gen_helper_gvec_frsqrte_s, 9983 gen_helper_gvec_frsqrte_d, 9984 }; 9985 static gen_helper_gvec_2_ptr * const f_frsqrte_rpres[] = { 9986 gen_helper_gvec_frsqrte_h, 9987 gen_helper_gvec_frsqrte_rpres_s, 9988 gen_helper_gvec_frsqrte_d, 9989 }; 9990 TRANS(FRSQRTE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, 9991 s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frsqrte_rpres : f_frsqrte) 9992 9993 static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a) 9994 { 9995 /* Handle 2-reg-misc ops which are widening (so each size element 9996 * in the source becomes a 2*size element in the destination. 9997 * The only instruction like this is FCVTL. 9998 */ 9999 int pass; 10000 TCGv_ptr fpst; 10001 10002 if (!fp_access_check(s)) { 10003 return true; 10004 } 10005 10006 if (a->esz == MO_64) { 10007 /* 32 -> 64 bit fp conversion */ 10008 TCGv_i64 tcg_res[2]; 10009 TCGv_i32 tcg_op = tcg_temp_new_i32(); 10010 int srcelt = a->q ? 2 : 0; 10011 10012 fpst = fpstatus_ptr(FPST_A64); 10013 10014 for (pass = 0; pass < 2; pass++) { 10015 tcg_res[pass] = tcg_temp_new_i64(); 10016 read_vec_element_i32(s, tcg_op, a->rn, srcelt + pass, MO_32); 10017 gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, fpst); 10018 } 10019 for (pass = 0; pass < 2; pass++) { 10020 write_vec_element(s, tcg_res[pass], a->rd, pass, MO_64); 10021 } 10022 } else { 10023 /* 16 -> 32 bit fp conversion */ 10024 int srcelt = a->q ? 4 : 0; 10025 TCGv_i32 tcg_res[4]; 10026 TCGv_i32 ahp = get_ahp_flag(); 10027 10028 fpst = fpstatus_ptr(FPST_A64_F16); 10029 10030 for (pass = 0; pass < 4; pass++) { 10031 tcg_res[pass] = tcg_temp_new_i32(); 10032 read_vec_element_i32(s, tcg_res[pass], a->rn, srcelt + pass, MO_16); 10033 gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass], 10034 fpst, ahp); 10035 } 10036 for (pass = 0; pass < 4; pass++) { 10037 write_vec_element_i32(s, tcg_res[pass], a->rd, pass, MO_32); 10038 } 10039 } 10040 clear_vec_high(s, true, a->rd); 10041 return true; 10042 } 10043 10044 static bool trans_OK(DisasContext *s, arg_OK *a) 10045 { 10046 return true; 10047 } 10048 10049 static bool trans_FAIL(DisasContext *s, arg_OK *a) 10050 { 10051 s->is_nonstreaming = true; 10052 return true; 10053 } 10054 10055 /** 10056 * btype_destination_ok: 10057 * @insn: The instruction at the branch destination 10058 * @bt: SCTLR_ELx.BT 10059 * @btype: PSTATE.BTYPE, and is non-zero 10060 * 10061 * On a guarded page, there are a limited number of insns 10062 * that may be present at the branch target: 10063 * - branch target identifiers, 10064 * - paciasp, pacibsp, 10065 * - BRK insn 10066 * - HLT insn 10067 * Anything else causes a Branch Target Exception. 10068 * 10069 * Return true if the branch is compatible, false to raise BTITRAP. 10070 */ 10071 static bool btype_destination_ok(uint32_t insn, bool bt, int btype) 10072 { 10073 if ((insn & 0xfffff01fu) == 0xd503201fu) { 10074 /* HINT space */ 10075 switch (extract32(insn, 5, 7)) { 10076 case 0b011001: /* PACIASP */ 10077 case 0b011011: /* PACIBSP */ 10078 /* 10079 * If SCTLR_ELx.BT, then PACI*SP are not compatible 10080 * with btype == 3. Otherwise all btype are ok. 10081 */ 10082 return !bt || btype != 3; 10083 case 0b100000: /* BTI */ 10084 /* Not compatible with any btype. */ 10085 return false; 10086 case 0b100010: /* BTI c */ 10087 /* Not compatible with btype == 3 */ 10088 return btype != 3; 10089 case 0b100100: /* BTI j */ 10090 /* Not compatible with btype == 2 */ 10091 return btype != 2; 10092 case 0b100110: /* BTI jc */ 10093 /* Compatible with any btype. */ 10094 return true; 10095 } 10096 } else { 10097 switch (insn & 0xffe0001fu) { 10098 case 0xd4200000u: /* BRK */ 10099 case 0xd4400000u: /* HLT */ 10100 /* Give priority to the breakpoint exception. */ 10101 return true; 10102 } 10103 } 10104 return false; 10105 } 10106 10107 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, 10108 CPUState *cpu) 10109 { 10110 DisasContext *dc = container_of(dcbase, DisasContext, base); 10111 CPUARMState *env = cpu_env(cpu); 10112 ARMCPU *arm_cpu = env_archcpu(env); 10113 CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb); 10114 int bound, core_mmu_idx; 10115 10116 dc->isar = &arm_cpu->isar; 10117 dc->condjmp = 0; 10118 dc->pc_save = dc->base.pc_first; 10119 dc->aarch64 = true; 10120 dc->thumb = false; 10121 dc->sctlr_b = 0; 10122 dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE; 10123 dc->condexec_mask = 0; 10124 dc->condexec_cond = 0; 10125 core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX); 10126 dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx); 10127 dc->tbii = EX_TBFLAG_A64(tb_flags, TBII); 10128 dc->tbid = EX_TBFLAG_A64(tb_flags, TBID); 10129 dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA); 10130 dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx); 10131 #if !defined(CONFIG_USER_ONLY) 10132 dc->user = (dc->current_el == 0); 10133 #endif 10134 dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL); 10135 dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM); 10136 dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL); 10137 dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE); 10138 dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC); 10139 dc->trap_eret = EX_TBFLAG_A64(tb_flags, TRAP_ERET); 10140 dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL); 10141 dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL); 10142 dc->zt0_excp_el = EX_TBFLAG_A64(tb_flags, ZT0EXC_EL); 10143 dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16; 10144 dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16; 10145 dc->max_svl = arm_cpu->sme_max_vq * 16; 10146 dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE); 10147 dc->bt = EX_TBFLAG_A64(tb_flags, BT); 10148 dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE); 10149 dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV); 10150 dc->ata[0] = EX_TBFLAG_A64(tb_flags, ATA); 10151 dc->ata[1] = EX_TBFLAG_A64(tb_flags, ATA0); 10152 dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE); 10153 dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE); 10154 dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM); 10155 dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA); 10156 dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING); 10157 dc->naa = EX_TBFLAG_A64(tb_flags, NAA); 10158 dc->nv = EX_TBFLAG_A64(tb_flags, NV); 10159 dc->nv1 = EX_TBFLAG_A64(tb_flags, NV1); 10160 dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2); 10161 dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20); 10162 dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE); 10163 dc->fpcr_ah = EX_TBFLAG_A64(tb_flags, AH); 10164 dc->fpcr_nep = EX_TBFLAG_A64(tb_flags, NEP); 10165 dc->vec_len = 0; 10166 dc->vec_stride = 0; 10167 dc->cp_regs = arm_cpu->cp_regs; 10168 dc->features = env->features; 10169 dc->dcz_blocksize = arm_cpu->dcz_blocksize; 10170 dc->gm_blocksize = arm_cpu->gm_blocksize; 10171 10172 #ifdef CONFIG_USER_ONLY 10173 /* In sve_probe_page, we assume TBI is enabled. */ 10174 tcg_debug_assert(dc->tbid & 1); 10175 #endif 10176 10177 dc->lse2 = dc_isar_feature(aa64_lse2, dc); 10178 10179 /* Single step state. The code-generation logic here is: 10180 * SS_ACTIVE == 0: 10181 * generate code with no special handling for single-stepping (except 10182 * that anything that can make us go to SS_ACTIVE == 1 must end the TB; 10183 * this happens anyway because those changes are all system register or 10184 * PSTATE writes). 10185 * SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending) 10186 * emit code for one insn 10187 * emit code to clear PSTATE.SS 10188 * emit code to generate software step exception for completed step 10189 * end TB (as usual for having generated an exception) 10190 * SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending) 10191 * emit code to generate a software step exception 10192 * end the TB 10193 */ 10194 dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE); 10195 dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS); 10196 dc->is_ldex = false; 10197 10198 /* Bound the number of insns to execute to those left on the page. */ 10199 bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4; 10200 10201 /* If architectural single step active, limit to 1. */ 10202 if (dc->ss_active) { 10203 bound = 1; 10204 } 10205 dc->base.max_insns = MIN(dc->base.max_insns, bound); 10206 } 10207 10208 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu) 10209 { 10210 } 10211 10212 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) 10213 { 10214 DisasContext *dc = container_of(dcbase, DisasContext, base); 10215 target_ulong pc_arg = dc->base.pc_next; 10216 10217 if (tb_cflags(dcbase->tb) & CF_PCREL) { 10218 pc_arg &= ~TARGET_PAGE_MASK; 10219 } 10220 tcg_gen_insn_start(pc_arg, 0, 0); 10221 dc->insn_start_updated = false; 10222 } 10223 10224 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) 10225 { 10226 DisasContext *s = container_of(dcbase, DisasContext, base); 10227 CPUARMState *env = cpu_env(cpu); 10228 uint64_t pc = s->base.pc_next; 10229 uint32_t insn; 10230 10231 /* Singlestep exceptions have the highest priority. */ 10232 if (s->ss_active && !s->pstate_ss) { 10233 /* Singlestep state is Active-pending. 10234 * If we're in this state at the start of a TB then either 10235 * a) we just took an exception to an EL which is being debugged 10236 * and this is the first insn in the exception handler 10237 * b) debug exceptions were masked and we just unmasked them 10238 * without changing EL (eg by clearing PSTATE.D) 10239 * In either case we're going to take a swstep exception in the 10240 * "did not step an insn" case, and so the syndrome ISV and EX 10241 * bits should be zero. 10242 */ 10243 assert(s->base.num_insns == 1); 10244 gen_swstep_exception(s, 0, 0); 10245 s->base.is_jmp = DISAS_NORETURN; 10246 s->base.pc_next = pc + 4; 10247 return; 10248 } 10249 10250 if (pc & 3) { 10251 /* 10252 * PC alignment fault. This has priority over the instruction abort 10253 * that we would receive from a translation fault via arm_ldl_code. 10254 * This should only be possible after an indirect branch, at the 10255 * start of the TB. 10256 */ 10257 assert(s->base.num_insns == 1); 10258 gen_helper_exception_pc_alignment(tcg_env, tcg_constant_vaddr(pc)); 10259 s->base.is_jmp = DISAS_NORETURN; 10260 s->base.pc_next = QEMU_ALIGN_UP(pc, 4); 10261 return; 10262 } 10263 10264 s->pc_curr = pc; 10265 insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b); 10266 s->insn = insn; 10267 s->base.pc_next = pc + 4; 10268 10269 s->fp_access_checked = 0; 10270 s->sve_access_checked = 0; 10271 10272 if (s->pstate_il) { 10273 /* 10274 * Illegal execution state. This has priority over BTI 10275 * exceptions, but comes after instruction abort exceptions. 10276 */ 10277 gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate()); 10278 return; 10279 } 10280 10281 if (dc_isar_feature(aa64_bti, s)) { 10282 if (s->base.num_insns == 1) { 10283 /* First insn can have btype set to non-zero. */ 10284 tcg_debug_assert(s->btype >= 0); 10285 10286 /* 10287 * Note that the Branch Target Exception has fairly high 10288 * priority -- below debugging exceptions but above most 10289 * everything else. This allows us to handle this now 10290 * instead of waiting until the insn is otherwise decoded. 10291 * 10292 * We can check all but the guarded page check here; 10293 * defer the latter to a helper. 10294 */ 10295 if (s->btype != 0 10296 && !btype_destination_ok(insn, s->bt, s->btype)) { 10297 gen_helper_guarded_page_check(tcg_env); 10298 } 10299 } else { 10300 /* Not the first insn: btype must be 0. */ 10301 tcg_debug_assert(s->btype == 0); 10302 } 10303 } 10304 10305 s->is_nonstreaming = false; 10306 if (s->sme_trap_nonstreaming) { 10307 disas_sme_fa64(s, insn); 10308 } 10309 10310 if (!disas_a64(s, insn) && 10311 !disas_sme(s, insn) && 10312 !disas_sve(s, insn)) { 10313 unallocated_encoding(s); 10314 } 10315 10316 /* 10317 * After execution of most insns, btype is reset to 0. 10318 * Note that we set btype == -1 when the insn sets btype. 10319 */ 10320 if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) { 10321 reset_btype(s); 10322 } 10323 } 10324 10325 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) 10326 { 10327 DisasContext *dc = container_of(dcbase, DisasContext, base); 10328 10329 if (unlikely(dc->ss_active)) { 10330 /* Note that this means single stepping WFI doesn't halt the CPU. 10331 * For conditional branch insns this is harmless unreachable code as 10332 * gen_goto_tb() has already handled emitting the debug exception 10333 * (and thus a tb-jump is not possible when singlestepping). 10334 */ 10335 switch (dc->base.is_jmp) { 10336 default: 10337 gen_a64_update_pc(dc, 4); 10338 /* fall through */ 10339 case DISAS_EXIT: 10340 case DISAS_JUMP: 10341 gen_step_complete_exception(dc); 10342 break; 10343 case DISAS_NORETURN: 10344 break; 10345 } 10346 } else { 10347 switch (dc->base.is_jmp) { 10348 case DISAS_NEXT: 10349 case DISAS_TOO_MANY: 10350 gen_goto_tb(dc, 1, 4); 10351 break; 10352 default: 10353 case DISAS_UPDATE_EXIT: 10354 gen_a64_update_pc(dc, 4); 10355 /* fall through */ 10356 case DISAS_EXIT: 10357 tcg_gen_exit_tb(NULL, 0); 10358 break; 10359 case DISAS_UPDATE_NOCHAIN: 10360 gen_a64_update_pc(dc, 4); 10361 /* fall through */ 10362 case DISAS_JUMP: 10363 tcg_gen_lookup_and_goto_ptr(); 10364 break; 10365 case DISAS_NORETURN: 10366 case DISAS_SWI: 10367 break; 10368 case DISAS_WFE: 10369 gen_a64_update_pc(dc, 4); 10370 gen_helper_wfe(tcg_env); 10371 break; 10372 case DISAS_YIELD: 10373 gen_a64_update_pc(dc, 4); 10374 gen_helper_yield(tcg_env); 10375 break; 10376 case DISAS_WFI: 10377 /* 10378 * This is a special case because we don't want to just halt 10379 * the CPU if trying to debug across a WFI. 10380 */ 10381 gen_a64_update_pc(dc, 4); 10382 gen_helper_wfi(tcg_env, tcg_constant_i32(4)); 10383 /* 10384 * The helper doesn't necessarily throw an exception, but we 10385 * must go back to the main loop to check for interrupts anyway. 10386 */ 10387 tcg_gen_exit_tb(NULL, 0); 10388 break; 10389 } 10390 } 10391 } 10392 10393 const TranslatorOps aarch64_translator_ops = { 10394 .init_disas_context = aarch64_tr_init_disas_context, 10395 .tb_start = aarch64_tr_tb_start, 10396 .insn_start = aarch64_tr_insn_start, 10397 .translate_insn = aarch64_tr_translate_insn, 10398 .tb_stop = aarch64_tr_tb_stop, 10399 }; 10400