xref: /qemu/target/arm/tcg/translate-a64.c (revision ccf86c392c5b8949bafd363e44d3abb112578044)
1 /*
2  *  AArch64 translation
3  *
4  *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 
21 #include "exec/exec-all.h"
22 #include "translate.h"
23 #include "translate-a64.h"
24 #include "qemu/log.h"
25 #include "arm_ldst.h"
26 #include "semihosting/semihost.h"
27 #include "cpregs.h"
28 
29 static TCGv_i64 cpu_X[32];
30 static TCGv_i64 cpu_pc;
31 
32 /* Load/store exclusive handling */
33 static TCGv_i64 cpu_exclusive_high;
34 
35 static const char *regnames[] = {
36     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
37     "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
38     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
39     "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
40 };
41 
42 enum a64_shift_type {
43     A64_SHIFT_TYPE_LSL = 0,
44     A64_SHIFT_TYPE_LSR = 1,
45     A64_SHIFT_TYPE_ASR = 2,
46     A64_SHIFT_TYPE_ROR = 3
47 };
48 
49 /*
50  * Helpers for extracting complex instruction fields
51  */
52 
53 /*
54  * For load/store with an unsigned 12 bit immediate scaled by the element
55  * size. The input has the immediate field in bits [14:3] and the element
56  * size in [2:0].
57  */
58 static int uimm_scaled(DisasContext *s, int x)
59 {
60     unsigned imm = x >> 3;
61     unsigned scale = extract32(x, 0, 3);
62     return imm << scale;
63 }
64 
65 /* For load/store memory tags: scale offset by LOG2_TAG_GRANULE */
66 static int scale_by_log2_tag_granule(DisasContext *s, int x)
67 {
68     return x << LOG2_TAG_GRANULE;
69 }
70 
71 /*
72  * Include the generated decoders.
73  */
74 
75 #include "decode-sme-fa64.c.inc"
76 #include "decode-a64.c.inc"
77 
78 /* Table based decoder typedefs - used when the relevant bits for decode
79  * are too awkwardly scattered across the instruction (eg SIMD).
80  */
81 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
82 
83 typedef struct AArch64DecodeTable {
84     uint32_t pattern;
85     uint32_t mask;
86     AArch64DecodeFn *disas_fn;
87 } AArch64DecodeTable;
88 
89 /* initialize TCG globals.  */
90 void a64_translate_init(void)
91 {
92     int i;
93 
94     cpu_pc = tcg_global_mem_new_i64(tcg_env,
95                                     offsetof(CPUARMState, pc),
96                                     "pc");
97     for (i = 0; i < 32; i++) {
98         cpu_X[i] = tcg_global_mem_new_i64(tcg_env,
99                                           offsetof(CPUARMState, xregs[i]),
100                                           regnames[i]);
101     }
102 
103     cpu_exclusive_high = tcg_global_mem_new_i64(tcg_env,
104         offsetof(CPUARMState, exclusive_high), "exclusive_high");
105 }
106 
107 /*
108  * Return the core mmu_idx to use for A64 load/store insns which
109  * have a "unprivileged load/store" variant. Those insns access
110  * EL0 if executed from an EL which has control over EL0 (usually
111  * EL1) but behave like normal loads and stores if executed from
112  * elsewhere (eg EL3).
113  *
114  * @unpriv : true for the unprivileged encoding; false for the
115  *           normal encoding (in which case we will return the same
116  *           thing as get_mem_index().
117  */
118 static int get_a64_user_mem_index(DisasContext *s, bool unpriv)
119 {
120     /*
121      * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL,
122      * which is the usual mmu_idx for this cpu state.
123      */
124     ARMMMUIdx useridx = s->mmu_idx;
125 
126     if (unpriv && s->unpriv) {
127         /*
128          * We have pre-computed the condition for AccType_UNPRIV.
129          * Therefore we should never get here with a mmu_idx for
130          * which we do not know the corresponding user mmu_idx.
131          */
132         switch (useridx) {
133         case ARMMMUIdx_E10_1:
134         case ARMMMUIdx_E10_1_PAN:
135             useridx = ARMMMUIdx_E10_0;
136             break;
137         case ARMMMUIdx_E20_2:
138         case ARMMMUIdx_E20_2_PAN:
139             useridx = ARMMMUIdx_E20_0;
140             break;
141         default:
142             g_assert_not_reached();
143         }
144     }
145     return arm_to_core_mmu_idx(useridx);
146 }
147 
148 static void set_btype_raw(int val)
149 {
150     tcg_gen_st_i32(tcg_constant_i32(val), tcg_env,
151                    offsetof(CPUARMState, btype));
152 }
153 
154 static void set_btype(DisasContext *s, int val)
155 {
156     /* BTYPE is a 2-bit field, and 0 should be done with reset_btype.  */
157     tcg_debug_assert(val >= 1 && val <= 3);
158     set_btype_raw(val);
159     s->btype = -1;
160 }
161 
162 static void reset_btype(DisasContext *s)
163 {
164     if (s->btype != 0) {
165         set_btype_raw(0);
166         s->btype = 0;
167     }
168 }
169 
170 static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff)
171 {
172     assert(s->pc_save != -1);
173     if (tb_cflags(s->base.tb) & CF_PCREL) {
174         tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff);
175     } else {
176         tcg_gen_movi_i64(dest, s->pc_curr + diff);
177     }
178 }
179 
180 void gen_a64_update_pc(DisasContext *s, target_long diff)
181 {
182     gen_pc_plus_diff(s, cpu_pc, diff);
183     s->pc_save = s->pc_curr + diff;
184 }
185 
186 /*
187  * Handle Top Byte Ignore (TBI) bits.
188  *
189  * If address tagging is enabled via the TCR TBI bits:
190  *  + for EL2 and EL3 there is only one TBI bit, and if it is set
191  *    then the address is zero-extended, clearing bits [63:56]
192  *  + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
193  *    and TBI1 controls addresses with bit 55 == 1.
194  *    If the appropriate TBI bit is set for the address then
195  *    the address is sign-extended from bit 55 into bits [63:56]
196  *
197  * Here We have concatenated TBI{1,0} into tbi.
198  */
199 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst,
200                                 TCGv_i64 src, int tbi)
201 {
202     if (tbi == 0) {
203         /* Load unmodified address */
204         tcg_gen_mov_i64(dst, src);
205     } else if (!regime_has_2_ranges(s->mmu_idx)) {
206         /* Force tag byte to all zero */
207         tcg_gen_extract_i64(dst, src, 0, 56);
208     } else {
209         /* Sign-extend from bit 55.  */
210         tcg_gen_sextract_i64(dst, src, 0, 56);
211 
212         switch (tbi) {
213         case 1:
214             /* tbi0 but !tbi1: only use the extension if positive */
215             tcg_gen_and_i64(dst, dst, src);
216             break;
217         case 2:
218             /* !tbi0 but tbi1: only use the extension if negative */
219             tcg_gen_or_i64(dst, dst, src);
220             break;
221         case 3:
222             /* tbi0 and tbi1: always use the extension */
223             break;
224         default:
225             g_assert_not_reached();
226         }
227     }
228 }
229 
230 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
231 {
232     /*
233      * If address tagging is enabled for instructions via the TCR TBI bits,
234      * then loading an address into the PC will clear out any tag.
235      */
236     gen_top_byte_ignore(s, cpu_pc, src, s->tbii);
237     s->pc_save = -1;
238 }
239 
240 /*
241  * Handle MTE and/or TBI.
242  *
243  * For TBI, ideally, we would do nothing.  Proper behaviour on fault is
244  * for the tag to be present in the FAR_ELx register.  But for user-only
245  * mode we do not have a TLB with which to implement this, so we must
246  * remove the top byte now.
247  *
248  * Always return a fresh temporary that we can increment independently
249  * of the write-back address.
250  */
251 
252 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
253 {
254     TCGv_i64 clean = tcg_temp_new_i64();
255 #ifdef CONFIG_USER_ONLY
256     gen_top_byte_ignore(s, clean, addr, s->tbid);
257 #else
258     tcg_gen_mov_i64(clean, addr);
259 #endif
260     return clean;
261 }
262 
263 /* Insert a zero tag into src, with the result at dst. */
264 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src)
265 {
266     tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4));
267 }
268 
269 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr,
270                              MMUAccessType acc, int log2_size)
271 {
272     gen_helper_probe_access(tcg_env, ptr,
273                             tcg_constant_i32(acc),
274                             tcg_constant_i32(get_mem_index(s)),
275                             tcg_constant_i32(1 << log2_size));
276 }
277 
278 /*
279  * For MTE, check a single logical or atomic access.  This probes a single
280  * address, the exact one specified.  The size and alignment of the access
281  * is not relevant to MTE, per se, but watchpoints do require the size,
282  * and we want to recognize those before making any other changes to state.
283  */
284 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr,
285                                       bool is_write, bool tag_checked,
286                                       MemOp memop, bool is_unpriv,
287                                       int core_idx)
288 {
289     if (tag_checked && s->mte_active[is_unpriv]) {
290         TCGv_i64 ret;
291         int desc = 0;
292 
293         desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx);
294         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
295         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
296         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
297         desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(memop));
298         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, memop_size(memop) - 1);
299 
300         ret = tcg_temp_new_i64();
301         gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr);
302 
303         return ret;
304     }
305     return clean_data_tbi(s, addr);
306 }
307 
308 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
309                         bool tag_checked, MemOp memop)
310 {
311     return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, memop,
312                                  false, get_mem_index(s));
313 }
314 
315 /*
316  * For MTE, check multiple logical sequential accesses.
317  */
318 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
319                         bool tag_checked, int total_size, MemOp single_mop)
320 {
321     if (tag_checked && s->mte_active[0]) {
322         TCGv_i64 ret;
323         int desc = 0;
324 
325         desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
326         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
327         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
328         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
329         desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(single_mop));
330         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, total_size - 1);
331 
332         ret = tcg_temp_new_i64();
333         gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr);
334 
335         return ret;
336     }
337     return clean_data_tbi(s, addr);
338 }
339 
340 /*
341  * Generate the special alignment check that applies to AccType_ATOMIC
342  * and AccType_ORDERED insns under FEAT_LSE2: the access need not be
343  * naturally aligned, but it must not cross a 16-byte boundary.
344  * See AArch64.CheckAlignment().
345  */
346 static void check_lse2_align(DisasContext *s, int rn, int imm,
347                              bool is_write, MemOp mop)
348 {
349     TCGv_i32 tmp;
350     TCGv_i64 addr;
351     TCGLabel *over_label;
352     MMUAccessType type;
353     int mmu_idx;
354 
355     tmp = tcg_temp_new_i32();
356     tcg_gen_extrl_i64_i32(tmp, cpu_reg_sp(s, rn));
357     tcg_gen_addi_i32(tmp, tmp, imm & 15);
358     tcg_gen_andi_i32(tmp, tmp, 15);
359     tcg_gen_addi_i32(tmp, tmp, memop_size(mop));
360 
361     over_label = gen_new_label();
362     tcg_gen_brcondi_i32(TCG_COND_LEU, tmp, 16, over_label);
363 
364     addr = tcg_temp_new_i64();
365     tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm);
366 
367     type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD,
368     mmu_idx = get_mem_index(s);
369     gen_helper_unaligned_access(tcg_env, addr, tcg_constant_i32(type),
370                                 tcg_constant_i32(mmu_idx));
371 
372     gen_set_label(over_label);
373 
374 }
375 
376 /* Handle the alignment check for AccType_ATOMIC instructions. */
377 static MemOp check_atomic_align(DisasContext *s, int rn, MemOp mop)
378 {
379     MemOp size = mop & MO_SIZE;
380 
381     if (size == MO_8) {
382         return mop;
383     }
384 
385     /*
386      * If size == MO_128, this is a LDXP, and the operation is single-copy
387      * atomic for each doubleword, not the entire quadword; it still must
388      * be quadword aligned.
389      */
390     if (size == MO_128) {
391         return finalize_memop_atom(s, MO_128 | MO_ALIGN,
392                                    MO_ATOM_IFALIGN_PAIR);
393     }
394     if (dc_isar_feature(aa64_lse2, s)) {
395         check_lse2_align(s, rn, 0, true, mop);
396     } else {
397         mop |= MO_ALIGN;
398     }
399     return finalize_memop(s, mop);
400 }
401 
402 /* Handle the alignment check for AccType_ORDERED instructions. */
403 static MemOp check_ordered_align(DisasContext *s, int rn, int imm,
404                                  bool is_write, MemOp mop)
405 {
406     MemOp size = mop & MO_SIZE;
407 
408     if (size == MO_8) {
409         return mop;
410     }
411     if (size == MO_128) {
412         return finalize_memop_atom(s, MO_128 | MO_ALIGN,
413                                    MO_ATOM_IFALIGN_PAIR);
414     }
415     if (!dc_isar_feature(aa64_lse2, s)) {
416         mop |= MO_ALIGN;
417     } else if (!s->naa) {
418         check_lse2_align(s, rn, imm, is_write, mop);
419     }
420     return finalize_memop(s, mop);
421 }
422 
423 typedef struct DisasCompare64 {
424     TCGCond cond;
425     TCGv_i64 value;
426 } DisasCompare64;
427 
428 static void a64_test_cc(DisasCompare64 *c64, int cc)
429 {
430     DisasCompare c32;
431 
432     arm_test_cc(&c32, cc);
433 
434     /*
435      * Sign-extend the 32-bit value so that the GE/LT comparisons work
436      * properly.  The NE/EQ comparisons are also fine with this choice.
437       */
438     c64->cond = c32.cond;
439     c64->value = tcg_temp_new_i64();
440     tcg_gen_ext_i32_i64(c64->value, c32.value);
441 }
442 
443 static void gen_rebuild_hflags(DisasContext *s)
444 {
445     gen_helper_rebuild_hflags_a64(tcg_env, tcg_constant_i32(s->current_el));
446 }
447 
448 static void gen_exception_internal(int excp)
449 {
450     assert(excp_is_internal(excp));
451     gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp));
452 }
453 
454 static void gen_exception_internal_insn(DisasContext *s, int excp)
455 {
456     gen_a64_update_pc(s, 0);
457     gen_exception_internal(excp);
458     s->base.is_jmp = DISAS_NORETURN;
459 }
460 
461 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome)
462 {
463     gen_a64_update_pc(s, 0);
464     gen_helper_exception_bkpt_insn(tcg_env, tcg_constant_i32(syndrome));
465     s->base.is_jmp = DISAS_NORETURN;
466 }
467 
468 static void gen_step_complete_exception(DisasContext *s)
469 {
470     /* We just completed step of an insn. Move from Active-not-pending
471      * to Active-pending, and then also take the swstep exception.
472      * This corresponds to making the (IMPDEF) choice to prioritize
473      * swstep exceptions over asynchronous exceptions taken to an exception
474      * level where debug is disabled. This choice has the advantage that
475      * we do not need to maintain internal state corresponding to the
476      * ISV/EX syndrome bits between completion of the step and generation
477      * of the exception, and our syndrome information is always correct.
478      */
479     gen_ss_advance(s);
480     gen_swstep_exception(s, 1, s->is_ldex);
481     s->base.is_jmp = DISAS_NORETURN;
482 }
483 
484 static inline bool use_goto_tb(DisasContext *s, uint64_t dest)
485 {
486     if (s->ss_active) {
487         return false;
488     }
489     return translator_use_goto_tb(&s->base, dest);
490 }
491 
492 static void gen_goto_tb(DisasContext *s, int n, int64_t diff)
493 {
494     if (use_goto_tb(s, s->pc_curr + diff)) {
495         /*
496          * For pcrel, the pc must always be up-to-date on entry to
497          * the linked TB, so that it can use simple additions for all
498          * further adjustments.  For !pcrel, the linked TB is compiled
499          * to know its full virtual address, so we can delay the
500          * update to pc to the unlinked path.  A long chain of links
501          * can thus avoid many updates to the PC.
502          */
503         if (tb_cflags(s->base.tb) & CF_PCREL) {
504             gen_a64_update_pc(s, diff);
505             tcg_gen_goto_tb(n);
506         } else {
507             tcg_gen_goto_tb(n);
508             gen_a64_update_pc(s, diff);
509         }
510         tcg_gen_exit_tb(s->base.tb, n);
511         s->base.is_jmp = DISAS_NORETURN;
512     } else {
513         gen_a64_update_pc(s, diff);
514         if (s->ss_active) {
515             gen_step_complete_exception(s);
516         } else {
517             tcg_gen_lookup_and_goto_ptr();
518             s->base.is_jmp = DISAS_NORETURN;
519         }
520     }
521 }
522 
523 /*
524  * Register access functions
525  *
526  * These functions are used for directly accessing a register in where
527  * changes to the final register value are likely to be made. If you
528  * need to use a register for temporary calculation (e.g. index type
529  * operations) use the read_* form.
530  *
531  * B1.2.1 Register mappings
532  *
533  * In instruction register encoding 31 can refer to ZR (zero register) or
534  * the SP (stack pointer) depending on context. In QEMU's case we map SP
535  * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
536  * This is the point of the _sp forms.
537  */
538 TCGv_i64 cpu_reg(DisasContext *s, int reg)
539 {
540     if (reg == 31) {
541         TCGv_i64 t = tcg_temp_new_i64();
542         tcg_gen_movi_i64(t, 0);
543         return t;
544     } else {
545         return cpu_X[reg];
546     }
547 }
548 
549 /* register access for when 31 == SP */
550 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
551 {
552     return cpu_X[reg];
553 }
554 
555 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
556  * representing the register contents. This TCGv is an auto-freed
557  * temporary so it need not be explicitly freed, and may be modified.
558  */
559 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
560 {
561     TCGv_i64 v = tcg_temp_new_i64();
562     if (reg != 31) {
563         if (sf) {
564             tcg_gen_mov_i64(v, cpu_X[reg]);
565         } else {
566             tcg_gen_ext32u_i64(v, cpu_X[reg]);
567         }
568     } else {
569         tcg_gen_movi_i64(v, 0);
570     }
571     return v;
572 }
573 
574 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
575 {
576     TCGv_i64 v = tcg_temp_new_i64();
577     if (sf) {
578         tcg_gen_mov_i64(v, cpu_X[reg]);
579     } else {
580         tcg_gen_ext32u_i64(v, cpu_X[reg]);
581     }
582     return v;
583 }
584 
585 /* Return the offset into CPUARMState of a slice (from
586  * the least significant end) of FP register Qn (ie
587  * Dn, Sn, Hn or Bn).
588  * (Note that this is not the same mapping as for A32; see cpu.h)
589  */
590 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size)
591 {
592     return vec_reg_offset(s, regno, 0, size);
593 }
594 
595 /* Offset of the high half of the 128 bit vector Qn */
596 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
597 {
598     return vec_reg_offset(s, regno, 1, MO_64);
599 }
600 
601 /* Convenience accessors for reading and writing single and double
602  * FP registers. Writing clears the upper parts of the associated
603  * 128 bit vector register, as required by the architecture.
604  * Note that unlike the GP register accessors, the values returned
605  * by the read functions must be manually freed.
606  */
607 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
608 {
609     TCGv_i64 v = tcg_temp_new_i64();
610 
611     tcg_gen_ld_i64(v, tcg_env, fp_reg_offset(s, reg, MO_64));
612     return v;
613 }
614 
615 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
616 {
617     TCGv_i32 v = tcg_temp_new_i32();
618 
619     tcg_gen_ld_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32));
620     return v;
621 }
622 
623 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
624 {
625     TCGv_i32 v = tcg_temp_new_i32();
626 
627     tcg_gen_ld16u_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16));
628     return v;
629 }
630 
631 static void clear_vec(DisasContext *s, int rd)
632 {
633     unsigned ofs = fp_reg_offset(s, rd, MO_64);
634     unsigned vsz = vec_full_reg_size(s);
635 
636     tcg_gen_gvec_dup_imm(MO_64, ofs, vsz, vsz, 0);
637 }
638 
639 /*
640  * Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
641  * If SVE is not enabled, then there are only 128 bits in the vector.
642  */
643 static void clear_vec_high(DisasContext *s, bool is_q, int rd)
644 {
645     unsigned ofs = fp_reg_offset(s, rd, MO_64);
646     unsigned vsz = vec_full_reg_size(s);
647 
648     /* Nop move, with side effect of clearing the tail. */
649     tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz);
650 }
651 
652 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
653 {
654     unsigned ofs = fp_reg_offset(s, reg, MO_64);
655 
656     tcg_gen_st_i64(v, tcg_env, ofs);
657     clear_vec_high(s, false, reg);
658 }
659 
660 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
661 {
662     TCGv_i64 tmp = tcg_temp_new_i64();
663 
664     tcg_gen_extu_i32_i64(tmp, v);
665     write_fp_dreg(s, reg, tmp);
666 }
667 
668 /* Expand a 2-operand AdvSIMD vector operation using an expander function.  */
669 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
670                          GVecGen2Fn *gvec_fn, int vece)
671 {
672     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
673             is_q ? 16 : 8, vec_full_reg_size(s));
674 }
675 
676 /* Expand a 2-operand + immediate AdvSIMD vector operation using
677  * an expander function.
678  */
679 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn,
680                           int64_t imm, GVecGen2iFn *gvec_fn, int vece)
681 {
682     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
683             imm, is_q ? 16 : 8, vec_full_reg_size(s));
684 }
685 
686 /* Expand a 3-operand AdvSIMD vector operation using an expander function.  */
687 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
688                          GVecGen3Fn *gvec_fn, int vece)
689 {
690     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
691             vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
692 }
693 
694 /* Expand a 4-operand AdvSIMD vector operation using an expander function.  */
695 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
696                          int rx, GVecGen4Fn *gvec_fn, int vece)
697 {
698     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
699             vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx),
700             is_q ? 16 : 8, vec_full_reg_size(s));
701 }
702 
703 /* Expand a 2-operand operation using an out-of-line helper.  */
704 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd,
705                              int rn, int data, gen_helper_gvec_2 *fn)
706 {
707     tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
708                        vec_full_reg_offset(s, rn),
709                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
710 }
711 
712 /* Expand a 3-operand operation using an out-of-line helper.  */
713 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
714                              int rn, int rm, int data, gen_helper_gvec_3 *fn)
715 {
716     tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
717                        vec_full_reg_offset(s, rn),
718                        vec_full_reg_offset(s, rm),
719                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
720 }
721 
722 /* Expand a 3-operand + fpstatus pointer + simd data value operation using
723  * an out-of-line helper.
724  */
725 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
726                               int rm, bool is_fp16, int data,
727                               gen_helper_gvec_3_ptr *fn)
728 {
729     TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_A64_F16 : FPST_A64);
730     tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
731                        vec_full_reg_offset(s, rn),
732                        vec_full_reg_offset(s, rm), fpst,
733                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
734 }
735 
736 /* Expand a 4-operand operation using an out-of-line helper.  */
737 static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn,
738                              int rm, int ra, int data, gen_helper_gvec_4 *fn)
739 {
740     tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
741                        vec_full_reg_offset(s, rn),
742                        vec_full_reg_offset(s, rm),
743                        vec_full_reg_offset(s, ra),
744                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
745 }
746 
747 /*
748  * Expand a 4-operand operation using an out-of-line helper that takes
749  * a pointer to the CPU env.
750  */
751 static void gen_gvec_op4_env(DisasContext *s, bool is_q, int rd, int rn,
752                              int rm, int ra, int data,
753                              gen_helper_gvec_4_ptr *fn)
754 {
755     tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
756                        vec_full_reg_offset(s, rn),
757                        vec_full_reg_offset(s, rm),
758                        vec_full_reg_offset(s, ra),
759                        tcg_env,
760                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
761 }
762 
763 /*
764  * Expand a 4-operand + fpstatus pointer + simd data value operation using
765  * an out-of-line helper.
766  */
767 static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn,
768                               int rm, int ra, bool is_fp16, int data,
769                               gen_helper_gvec_4_ptr *fn)
770 {
771     TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_A64_F16 : FPST_A64);
772     tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
773                        vec_full_reg_offset(s, rn),
774                        vec_full_reg_offset(s, rm),
775                        vec_full_reg_offset(s, ra), fpst,
776                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
777 }
778 
779 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
780  * than the 32 bit equivalent.
781  */
782 static inline void gen_set_NZ64(TCGv_i64 result)
783 {
784     tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
785     tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
786 }
787 
788 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
789 static inline void gen_logic_CC(int sf, TCGv_i64 result)
790 {
791     if (sf) {
792         gen_set_NZ64(result);
793     } else {
794         tcg_gen_extrl_i64_i32(cpu_ZF, result);
795         tcg_gen_mov_i32(cpu_NF, cpu_ZF);
796     }
797     tcg_gen_movi_i32(cpu_CF, 0);
798     tcg_gen_movi_i32(cpu_VF, 0);
799 }
800 
801 /* dest = T0 + T1; compute C, N, V and Z flags */
802 static void gen_add64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
803 {
804     TCGv_i64 result, flag, tmp;
805     result = tcg_temp_new_i64();
806     flag = tcg_temp_new_i64();
807     tmp = tcg_temp_new_i64();
808 
809     tcg_gen_movi_i64(tmp, 0);
810     tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
811 
812     tcg_gen_extrl_i64_i32(cpu_CF, flag);
813 
814     gen_set_NZ64(result);
815 
816     tcg_gen_xor_i64(flag, result, t0);
817     tcg_gen_xor_i64(tmp, t0, t1);
818     tcg_gen_andc_i64(flag, flag, tmp);
819     tcg_gen_extrh_i64_i32(cpu_VF, flag);
820 
821     tcg_gen_mov_i64(dest, result);
822 }
823 
824 static void gen_add32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
825 {
826     TCGv_i32 t0_32 = tcg_temp_new_i32();
827     TCGv_i32 t1_32 = tcg_temp_new_i32();
828     TCGv_i32 tmp = tcg_temp_new_i32();
829 
830     tcg_gen_movi_i32(tmp, 0);
831     tcg_gen_extrl_i64_i32(t0_32, t0);
832     tcg_gen_extrl_i64_i32(t1_32, t1);
833     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
834     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
835     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
836     tcg_gen_xor_i32(tmp, t0_32, t1_32);
837     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
838     tcg_gen_extu_i32_i64(dest, cpu_NF);
839 }
840 
841 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
842 {
843     if (sf) {
844         gen_add64_CC(dest, t0, t1);
845     } else {
846         gen_add32_CC(dest, t0, t1);
847     }
848 }
849 
850 /* dest = T0 - T1; compute C, N, V and Z flags */
851 static void gen_sub64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
852 {
853     /* 64 bit arithmetic */
854     TCGv_i64 result, flag, tmp;
855 
856     result = tcg_temp_new_i64();
857     flag = tcg_temp_new_i64();
858     tcg_gen_sub_i64(result, t0, t1);
859 
860     gen_set_NZ64(result);
861 
862     tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
863     tcg_gen_extrl_i64_i32(cpu_CF, flag);
864 
865     tcg_gen_xor_i64(flag, result, t0);
866     tmp = tcg_temp_new_i64();
867     tcg_gen_xor_i64(tmp, t0, t1);
868     tcg_gen_and_i64(flag, flag, tmp);
869     tcg_gen_extrh_i64_i32(cpu_VF, flag);
870     tcg_gen_mov_i64(dest, result);
871 }
872 
873 static void gen_sub32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
874 {
875     /* 32 bit arithmetic */
876     TCGv_i32 t0_32 = tcg_temp_new_i32();
877     TCGv_i32 t1_32 = tcg_temp_new_i32();
878     TCGv_i32 tmp;
879 
880     tcg_gen_extrl_i64_i32(t0_32, t0);
881     tcg_gen_extrl_i64_i32(t1_32, t1);
882     tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
883     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
884     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
885     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
886     tmp = tcg_temp_new_i32();
887     tcg_gen_xor_i32(tmp, t0_32, t1_32);
888     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
889     tcg_gen_extu_i32_i64(dest, cpu_NF);
890 }
891 
892 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
893 {
894     if (sf) {
895         gen_sub64_CC(dest, t0, t1);
896     } else {
897         gen_sub32_CC(dest, t0, t1);
898     }
899 }
900 
901 /* dest = T0 + T1 + CF; do not compute flags. */
902 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
903 {
904     TCGv_i64 flag = tcg_temp_new_i64();
905     tcg_gen_extu_i32_i64(flag, cpu_CF);
906     tcg_gen_add_i64(dest, t0, t1);
907     tcg_gen_add_i64(dest, dest, flag);
908 
909     if (!sf) {
910         tcg_gen_ext32u_i64(dest, dest);
911     }
912 }
913 
914 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
915 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
916 {
917     if (sf) {
918         TCGv_i64 result = tcg_temp_new_i64();
919         TCGv_i64 cf_64 = tcg_temp_new_i64();
920         TCGv_i64 vf_64 = tcg_temp_new_i64();
921         TCGv_i64 tmp = tcg_temp_new_i64();
922         TCGv_i64 zero = tcg_constant_i64(0);
923 
924         tcg_gen_extu_i32_i64(cf_64, cpu_CF);
925         tcg_gen_add2_i64(result, cf_64, t0, zero, cf_64, zero);
926         tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, zero);
927         tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
928         gen_set_NZ64(result);
929 
930         tcg_gen_xor_i64(vf_64, result, t0);
931         tcg_gen_xor_i64(tmp, t0, t1);
932         tcg_gen_andc_i64(vf_64, vf_64, tmp);
933         tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
934 
935         tcg_gen_mov_i64(dest, result);
936     } else {
937         TCGv_i32 t0_32 = tcg_temp_new_i32();
938         TCGv_i32 t1_32 = tcg_temp_new_i32();
939         TCGv_i32 tmp = tcg_temp_new_i32();
940         TCGv_i32 zero = tcg_constant_i32(0);
941 
942         tcg_gen_extrl_i64_i32(t0_32, t0);
943         tcg_gen_extrl_i64_i32(t1_32, t1);
944         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, zero, cpu_CF, zero);
945         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, zero);
946 
947         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
948         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
949         tcg_gen_xor_i32(tmp, t0_32, t1_32);
950         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
951         tcg_gen_extu_i32_i64(dest, cpu_NF);
952     }
953 }
954 
955 /*
956  * Load/Store generators
957  */
958 
959 /*
960  * Store from GPR register to memory.
961  */
962 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
963                              TCGv_i64 tcg_addr, MemOp memop, int memidx,
964                              bool iss_valid,
965                              unsigned int iss_srt,
966                              bool iss_sf, bool iss_ar)
967 {
968     tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop);
969 
970     if (iss_valid) {
971         uint32_t syn;
972 
973         syn = syn_data_abort_with_iss(0,
974                                       (memop & MO_SIZE),
975                                       false,
976                                       iss_srt,
977                                       iss_sf,
978                                       iss_ar,
979                                       0, 0, 0, 0, 0, false);
980         disas_set_insn_syndrome(s, syn);
981     }
982 }
983 
984 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
985                       TCGv_i64 tcg_addr, MemOp memop,
986                       bool iss_valid,
987                       unsigned int iss_srt,
988                       bool iss_sf, bool iss_ar)
989 {
990     do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s),
991                      iss_valid, iss_srt, iss_sf, iss_ar);
992 }
993 
994 /*
995  * Load from memory to GPR register
996  */
997 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
998                              MemOp memop, bool extend, int memidx,
999                              bool iss_valid, unsigned int iss_srt,
1000                              bool iss_sf, bool iss_ar)
1001 {
1002     tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
1003 
1004     if (extend && (memop & MO_SIGN)) {
1005         g_assert((memop & MO_SIZE) <= MO_32);
1006         tcg_gen_ext32u_i64(dest, dest);
1007     }
1008 
1009     if (iss_valid) {
1010         uint32_t syn;
1011 
1012         syn = syn_data_abort_with_iss(0,
1013                                       (memop & MO_SIZE),
1014                                       (memop & MO_SIGN) != 0,
1015                                       iss_srt,
1016                                       iss_sf,
1017                                       iss_ar,
1018                                       0, 0, 0, 0, 0, false);
1019         disas_set_insn_syndrome(s, syn);
1020     }
1021 }
1022 
1023 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
1024                       MemOp memop, bool extend,
1025                       bool iss_valid, unsigned int iss_srt,
1026                       bool iss_sf, bool iss_ar)
1027 {
1028     do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s),
1029                      iss_valid, iss_srt, iss_sf, iss_ar);
1030 }
1031 
1032 /*
1033  * Store from FP register to memory
1034  */
1035 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, MemOp mop)
1036 {
1037     /* This writes the bottom N bits of a 128 bit wide vector to memory */
1038     TCGv_i64 tmplo = tcg_temp_new_i64();
1039 
1040     tcg_gen_ld_i64(tmplo, tcg_env, fp_reg_offset(s, srcidx, MO_64));
1041 
1042     if ((mop & MO_SIZE) < MO_128) {
1043         tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop);
1044     } else {
1045         TCGv_i64 tmphi = tcg_temp_new_i64();
1046         TCGv_i128 t16 = tcg_temp_new_i128();
1047 
1048         tcg_gen_ld_i64(tmphi, tcg_env, fp_reg_hi_offset(s, srcidx));
1049         tcg_gen_concat_i64_i128(t16, tmplo, tmphi);
1050 
1051         tcg_gen_qemu_st_i128(t16, tcg_addr, get_mem_index(s), mop);
1052     }
1053 }
1054 
1055 /*
1056  * Load from memory to FP register
1057  */
1058 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, MemOp mop)
1059 {
1060     /* This always zero-extends and writes to a full 128 bit wide vector */
1061     TCGv_i64 tmplo = tcg_temp_new_i64();
1062     TCGv_i64 tmphi = NULL;
1063 
1064     if ((mop & MO_SIZE) < MO_128) {
1065         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop);
1066     } else {
1067         TCGv_i128 t16 = tcg_temp_new_i128();
1068 
1069         tcg_gen_qemu_ld_i128(t16, tcg_addr, get_mem_index(s), mop);
1070 
1071         tmphi = tcg_temp_new_i64();
1072         tcg_gen_extr_i128_i64(tmplo, tmphi, t16);
1073     }
1074 
1075     tcg_gen_st_i64(tmplo, tcg_env, fp_reg_offset(s, destidx, MO_64));
1076 
1077     if (tmphi) {
1078         tcg_gen_st_i64(tmphi, tcg_env, fp_reg_hi_offset(s, destidx));
1079     }
1080     clear_vec_high(s, tmphi != NULL, destidx);
1081 }
1082 
1083 /*
1084  * Vector load/store helpers.
1085  *
1086  * The principal difference between this and a FP load is that we don't
1087  * zero extend as we are filling a partial chunk of the vector register.
1088  * These functions don't support 128 bit loads/stores, which would be
1089  * normal load/store operations.
1090  *
1091  * The _i32 versions are useful when operating on 32 bit quantities
1092  * (eg for floating point single or using Neon helper functions).
1093  */
1094 
1095 /* Get value of an element within a vector register */
1096 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
1097                              int element, MemOp memop)
1098 {
1099     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1100     switch ((unsigned)memop) {
1101     case MO_8:
1102         tcg_gen_ld8u_i64(tcg_dest, tcg_env, vect_off);
1103         break;
1104     case MO_16:
1105         tcg_gen_ld16u_i64(tcg_dest, tcg_env, vect_off);
1106         break;
1107     case MO_32:
1108         tcg_gen_ld32u_i64(tcg_dest, tcg_env, vect_off);
1109         break;
1110     case MO_8|MO_SIGN:
1111         tcg_gen_ld8s_i64(tcg_dest, tcg_env, vect_off);
1112         break;
1113     case MO_16|MO_SIGN:
1114         tcg_gen_ld16s_i64(tcg_dest, tcg_env, vect_off);
1115         break;
1116     case MO_32|MO_SIGN:
1117         tcg_gen_ld32s_i64(tcg_dest, tcg_env, vect_off);
1118         break;
1119     case MO_64:
1120     case MO_64|MO_SIGN:
1121         tcg_gen_ld_i64(tcg_dest, tcg_env, vect_off);
1122         break;
1123     default:
1124         g_assert_not_reached();
1125     }
1126 }
1127 
1128 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
1129                                  int element, MemOp memop)
1130 {
1131     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1132     switch (memop) {
1133     case MO_8:
1134         tcg_gen_ld8u_i32(tcg_dest, tcg_env, vect_off);
1135         break;
1136     case MO_16:
1137         tcg_gen_ld16u_i32(tcg_dest, tcg_env, vect_off);
1138         break;
1139     case MO_8|MO_SIGN:
1140         tcg_gen_ld8s_i32(tcg_dest, tcg_env, vect_off);
1141         break;
1142     case MO_16|MO_SIGN:
1143         tcg_gen_ld16s_i32(tcg_dest, tcg_env, vect_off);
1144         break;
1145     case MO_32:
1146     case MO_32|MO_SIGN:
1147         tcg_gen_ld_i32(tcg_dest, tcg_env, vect_off);
1148         break;
1149     default:
1150         g_assert_not_reached();
1151     }
1152 }
1153 
1154 /* Set value of an element within a vector register */
1155 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1156                               int element, MemOp memop)
1157 {
1158     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1159     switch (memop) {
1160     case MO_8:
1161         tcg_gen_st8_i64(tcg_src, tcg_env, vect_off);
1162         break;
1163     case MO_16:
1164         tcg_gen_st16_i64(tcg_src, tcg_env, vect_off);
1165         break;
1166     case MO_32:
1167         tcg_gen_st32_i64(tcg_src, tcg_env, vect_off);
1168         break;
1169     case MO_64:
1170         tcg_gen_st_i64(tcg_src, tcg_env, vect_off);
1171         break;
1172     default:
1173         g_assert_not_reached();
1174     }
1175 }
1176 
1177 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1178                                   int destidx, int element, MemOp memop)
1179 {
1180     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1181     switch (memop) {
1182     case MO_8:
1183         tcg_gen_st8_i32(tcg_src, tcg_env, vect_off);
1184         break;
1185     case MO_16:
1186         tcg_gen_st16_i32(tcg_src, tcg_env, vect_off);
1187         break;
1188     case MO_32:
1189         tcg_gen_st_i32(tcg_src, tcg_env, vect_off);
1190         break;
1191     default:
1192         g_assert_not_reached();
1193     }
1194 }
1195 
1196 /* Store from vector register to memory */
1197 static void do_vec_st(DisasContext *s, int srcidx, int element,
1198                       TCGv_i64 tcg_addr, MemOp mop)
1199 {
1200     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1201 
1202     read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE);
1203     tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1204 }
1205 
1206 /* Load from memory to vector register */
1207 static void do_vec_ld(DisasContext *s, int destidx, int element,
1208                       TCGv_i64 tcg_addr, MemOp mop)
1209 {
1210     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1211 
1212     tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1213     write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE);
1214 }
1215 
1216 /* Check that FP/Neon access is enabled. If it is, return
1217  * true. If not, emit code to generate an appropriate exception,
1218  * and return false; the caller should not emit any code for
1219  * the instruction. Note that this check must happen after all
1220  * unallocated-encoding checks (otherwise the syndrome information
1221  * for the resulting exception will be incorrect).
1222  */
1223 static bool fp_access_check_only(DisasContext *s)
1224 {
1225     if (s->fp_excp_el) {
1226         assert(!s->fp_access_checked);
1227         s->fp_access_checked = true;
1228 
1229         gen_exception_insn_el(s, 0, EXCP_UDEF,
1230                               syn_fp_access_trap(1, 0xe, false, 0),
1231                               s->fp_excp_el);
1232         return false;
1233     }
1234     s->fp_access_checked = true;
1235     return true;
1236 }
1237 
1238 static bool fp_access_check(DisasContext *s)
1239 {
1240     if (!fp_access_check_only(s)) {
1241         return false;
1242     }
1243     if (s->sme_trap_nonstreaming && s->is_nonstreaming) {
1244         gen_exception_insn(s, 0, EXCP_UDEF,
1245                            syn_smetrap(SME_ET_Streaming, false));
1246         return false;
1247     }
1248     return true;
1249 }
1250 
1251 /*
1252  * Return <0 for non-supported element sizes, with MO_16 controlled by
1253  * FEAT_FP16; return 0 for fp disabled; otherwise return >0 for success.
1254  */
1255 static int fp_access_check_scalar_hsd(DisasContext *s, MemOp esz)
1256 {
1257     switch (esz) {
1258     case MO_64:
1259     case MO_32:
1260         break;
1261     case MO_16:
1262         if (!dc_isar_feature(aa64_fp16, s)) {
1263             return -1;
1264         }
1265         break;
1266     default:
1267         return -1;
1268     }
1269     return fp_access_check(s);
1270 }
1271 
1272 /* Likewise, but vector MO_64 must have two elements. */
1273 static int fp_access_check_vector_hsd(DisasContext *s, bool is_q, MemOp esz)
1274 {
1275     switch (esz) {
1276     case MO_64:
1277         if (!is_q) {
1278             return -1;
1279         }
1280         break;
1281     case MO_32:
1282         break;
1283     case MO_16:
1284         if (!dc_isar_feature(aa64_fp16, s)) {
1285             return -1;
1286         }
1287         break;
1288     default:
1289         return -1;
1290     }
1291     return fp_access_check(s);
1292 }
1293 
1294 /*
1295  * Check that SVE access is enabled.  If it is, return true.
1296  * If not, emit code to generate an appropriate exception and return false.
1297  * This function corresponds to CheckSVEEnabled().
1298  */
1299 bool sve_access_check(DisasContext *s)
1300 {
1301     if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) {
1302         assert(dc_isar_feature(aa64_sme, s));
1303         if (!sme_sm_enabled_check(s)) {
1304             goto fail_exit;
1305         }
1306     } else if (s->sve_excp_el) {
1307         gen_exception_insn_el(s, 0, EXCP_UDEF,
1308                               syn_sve_access_trap(), s->sve_excp_el);
1309         goto fail_exit;
1310     }
1311     s->sve_access_checked = true;
1312     return fp_access_check(s);
1313 
1314  fail_exit:
1315     /* Assert that we only raise one exception per instruction. */
1316     assert(!s->sve_access_checked);
1317     s->sve_access_checked = true;
1318     return false;
1319 }
1320 
1321 /*
1322  * Check that SME access is enabled, raise an exception if not.
1323  * Note that this function corresponds to CheckSMEAccess and is
1324  * only used directly for cpregs.
1325  */
1326 static bool sme_access_check(DisasContext *s)
1327 {
1328     if (s->sme_excp_el) {
1329         gen_exception_insn_el(s, 0, EXCP_UDEF,
1330                               syn_smetrap(SME_ET_AccessTrap, false),
1331                               s->sme_excp_el);
1332         return false;
1333     }
1334     return true;
1335 }
1336 
1337 /* This function corresponds to CheckSMEEnabled. */
1338 bool sme_enabled_check(DisasContext *s)
1339 {
1340     /*
1341      * Note that unlike sve_excp_el, we have not constrained sme_excp_el
1342      * to be zero when fp_excp_el has priority.  This is because we need
1343      * sme_excp_el by itself for cpregs access checks.
1344      */
1345     if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) {
1346         s->fp_access_checked = true;
1347         return sme_access_check(s);
1348     }
1349     return fp_access_check_only(s);
1350 }
1351 
1352 /* Common subroutine for CheckSMEAnd*Enabled. */
1353 bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req)
1354 {
1355     if (!sme_enabled_check(s)) {
1356         return false;
1357     }
1358     if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) {
1359         gen_exception_insn(s, 0, EXCP_UDEF,
1360                            syn_smetrap(SME_ET_NotStreaming, false));
1361         return false;
1362     }
1363     if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) {
1364         gen_exception_insn(s, 0, EXCP_UDEF,
1365                            syn_smetrap(SME_ET_InactiveZA, false));
1366         return false;
1367     }
1368     return true;
1369 }
1370 
1371 /*
1372  * Expanders for AdvSIMD translation functions.
1373  */
1374 
1375 static bool do_gvec_op2_ool(DisasContext *s, arg_qrr_e *a, int data,
1376                             gen_helper_gvec_2 *fn)
1377 {
1378     if (!a->q && a->esz == MO_64) {
1379         return false;
1380     }
1381     if (fp_access_check(s)) {
1382         gen_gvec_op2_ool(s, a->q, a->rd, a->rn, data, fn);
1383     }
1384     return true;
1385 }
1386 
1387 static bool do_gvec_op3_ool(DisasContext *s, arg_qrrr_e *a, int data,
1388                             gen_helper_gvec_3 *fn)
1389 {
1390     if (!a->q && a->esz == MO_64) {
1391         return false;
1392     }
1393     if (fp_access_check(s)) {
1394         gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, data, fn);
1395     }
1396     return true;
1397 }
1398 
1399 static bool do_gvec_fn3(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
1400 {
1401     if (!a->q && a->esz == MO_64) {
1402         return false;
1403     }
1404     if (fp_access_check(s)) {
1405         gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz);
1406     }
1407     return true;
1408 }
1409 
1410 static bool do_gvec_fn3_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
1411 {
1412     if (a->esz == MO_64) {
1413         return false;
1414     }
1415     if (fp_access_check(s)) {
1416         gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz);
1417     }
1418     return true;
1419 }
1420 
1421 static bool do_gvec_fn3_no8_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
1422 {
1423     if (a->esz == MO_8) {
1424         return false;
1425     }
1426     return do_gvec_fn3_no64(s, a, fn);
1427 }
1428 
1429 static bool do_gvec_fn4(DisasContext *s, arg_qrrrr_e *a, GVecGen4Fn *fn)
1430 {
1431     if (!a->q && a->esz == MO_64) {
1432         return false;
1433     }
1434     if (fp_access_check(s)) {
1435         gen_gvec_fn4(s, a->q, a->rd, a->rn, a->rm, a->ra, fn, a->esz);
1436     }
1437     return true;
1438 }
1439 
1440 /*
1441  * This utility function is for doing register extension with an
1442  * optional shift. You will likely want to pass a temporary for the
1443  * destination register. See DecodeRegExtend() in the ARM ARM.
1444  */
1445 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1446                               int option, unsigned int shift)
1447 {
1448     int extsize = extract32(option, 0, 2);
1449     bool is_signed = extract32(option, 2, 1);
1450 
1451     tcg_gen_ext_i64(tcg_out, tcg_in, extsize | (is_signed ? MO_SIGN : 0));
1452     tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1453 }
1454 
1455 static inline void gen_check_sp_alignment(DisasContext *s)
1456 {
1457     /* The AArch64 architecture mandates that (if enabled via PSTATE
1458      * or SCTLR bits) there is a check that SP is 16-aligned on every
1459      * SP-relative load or store (with an exception generated if it is not).
1460      * In line with general QEMU practice regarding misaligned accesses,
1461      * we omit these checks for the sake of guest program performance.
1462      * This function is provided as a hook so we can more easily add these
1463      * checks in future (possibly as a "favour catching guest program bugs
1464      * over speed" user selectable option).
1465      */
1466 }
1467 
1468 /*
1469  * The instruction disassembly implemented here matches
1470  * the instruction encoding classifications in chapter C4
1471  * of the ARM Architecture Reference Manual (DDI0487B_a);
1472  * classification names and decode diagrams here should generally
1473  * match up with those in the manual.
1474  */
1475 
1476 static bool trans_B(DisasContext *s, arg_i *a)
1477 {
1478     reset_btype(s);
1479     gen_goto_tb(s, 0, a->imm);
1480     return true;
1481 }
1482 
1483 static bool trans_BL(DisasContext *s, arg_i *a)
1484 {
1485     gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s));
1486     reset_btype(s);
1487     gen_goto_tb(s, 0, a->imm);
1488     return true;
1489 }
1490 
1491 
1492 static bool trans_CBZ(DisasContext *s, arg_cbz *a)
1493 {
1494     DisasLabel match;
1495     TCGv_i64 tcg_cmp;
1496 
1497     tcg_cmp = read_cpu_reg(s, a->rt, a->sf);
1498     reset_btype(s);
1499 
1500     match = gen_disas_label(s);
1501     tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1502                         tcg_cmp, 0, match.label);
1503     gen_goto_tb(s, 0, 4);
1504     set_disas_label(s, match);
1505     gen_goto_tb(s, 1, a->imm);
1506     return true;
1507 }
1508 
1509 static bool trans_TBZ(DisasContext *s, arg_tbz *a)
1510 {
1511     DisasLabel match;
1512     TCGv_i64 tcg_cmp;
1513 
1514     tcg_cmp = tcg_temp_new_i64();
1515     tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, a->rt), 1ULL << a->bitpos);
1516 
1517     reset_btype(s);
1518 
1519     match = gen_disas_label(s);
1520     tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1521                         tcg_cmp, 0, match.label);
1522     gen_goto_tb(s, 0, 4);
1523     set_disas_label(s, match);
1524     gen_goto_tb(s, 1, a->imm);
1525     return true;
1526 }
1527 
1528 static bool trans_B_cond(DisasContext *s, arg_B_cond *a)
1529 {
1530     /* BC.cond is only present with FEAT_HBC */
1531     if (a->c && !dc_isar_feature(aa64_hbc, s)) {
1532         return false;
1533     }
1534     reset_btype(s);
1535     if (a->cond < 0x0e) {
1536         /* genuinely conditional branches */
1537         DisasLabel match = gen_disas_label(s);
1538         arm_gen_test_cc(a->cond, match.label);
1539         gen_goto_tb(s, 0, 4);
1540         set_disas_label(s, match);
1541         gen_goto_tb(s, 1, a->imm);
1542     } else {
1543         /* 0xe and 0xf are both "always" conditions */
1544         gen_goto_tb(s, 0, a->imm);
1545     }
1546     return true;
1547 }
1548 
1549 static void set_btype_for_br(DisasContext *s, int rn)
1550 {
1551     if (dc_isar_feature(aa64_bti, s)) {
1552         /* BR to {x16,x17} or !guard -> 1, else 3.  */
1553         if (rn == 16 || rn == 17) {
1554             set_btype(s, 1);
1555         } else {
1556             TCGv_i64 pc = tcg_temp_new_i64();
1557             gen_pc_plus_diff(s, pc, 0);
1558             gen_helper_guarded_page_br(tcg_env, pc);
1559             s->btype = -1;
1560         }
1561     }
1562 }
1563 
1564 static void set_btype_for_blr(DisasContext *s)
1565 {
1566     if (dc_isar_feature(aa64_bti, s)) {
1567         /* BLR sets BTYPE to 2, regardless of source guarded page.  */
1568         set_btype(s, 2);
1569     }
1570 }
1571 
1572 static bool trans_BR(DisasContext *s, arg_r *a)
1573 {
1574     set_btype_for_br(s, a->rn);
1575     gen_a64_set_pc(s, cpu_reg(s, a->rn));
1576     s->base.is_jmp = DISAS_JUMP;
1577     return true;
1578 }
1579 
1580 static bool trans_BLR(DisasContext *s, arg_r *a)
1581 {
1582     TCGv_i64 dst = cpu_reg(s, a->rn);
1583     TCGv_i64 lr = cpu_reg(s, 30);
1584     if (dst == lr) {
1585         TCGv_i64 tmp = tcg_temp_new_i64();
1586         tcg_gen_mov_i64(tmp, dst);
1587         dst = tmp;
1588     }
1589     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1590     gen_a64_set_pc(s, dst);
1591     set_btype_for_blr(s);
1592     s->base.is_jmp = DISAS_JUMP;
1593     return true;
1594 }
1595 
1596 static bool trans_RET(DisasContext *s, arg_r *a)
1597 {
1598     gen_a64_set_pc(s, cpu_reg(s, a->rn));
1599     s->base.is_jmp = DISAS_JUMP;
1600     return true;
1601 }
1602 
1603 static TCGv_i64 auth_branch_target(DisasContext *s, TCGv_i64 dst,
1604                                    TCGv_i64 modifier, bool use_key_a)
1605 {
1606     TCGv_i64 truedst;
1607     /*
1608      * Return the branch target for a BRAA/RETA/etc, which is either
1609      * just the destination dst, or that value with the pauth check
1610      * done and the code removed from the high bits.
1611      */
1612     if (!s->pauth_active) {
1613         return dst;
1614     }
1615 
1616     truedst = tcg_temp_new_i64();
1617     if (use_key_a) {
1618         gen_helper_autia_combined(truedst, tcg_env, dst, modifier);
1619     } else {
1620         gen_helper_autib_combined(truedst, tcg_env, dst, modifier);
1621     }
1622     return truedst;
1623 }
1624 
1625 static bool trans_BRAZ(DisasContext *s, arg_braz *a)
1626 {
1627     TCGv_i64 dst;
1628 
1629     if (!dc_isar_feature(aa64_pauth, s)) {
1630         return false;
1631     }
1632 
1633     dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1634     set_btype_for_br(s, a->rn);
1635     gen_a64_set_pc(s, dst);
1636     s->base.is_jmp = DISAS_JUMP;
1637     return true;
1638 }
1639 
1640 static bool trans_BLRAZ(DisasContext *s, arg_braz *a)
1641 {
1642     TCGv_i64 dst, lr;
1643 
1644     if (!dc_isar_feature(aa64_pauth, s)) {
1645         return false;
1646     }
1647 
1648     dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1649     lr = cpu_reg(s, 30);
1650     if (dst == lr) {
1651         TCGv_i64 tmp = tcg_temp_new_i64();
1652         tcg_gen_mov_i64(tmp, dst);
1653         dst = tmp;
1654     }
1655     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1656     gen_a64_set_pc(s, dst);
1657     set_btype_for_blr(s);
1658     s->base.is_jmp = DISAS_JUMP;
1659     return true;
1660 }
1661 
1662 static bool trans_RETA(DisasContext *s, arg_reta *a)
1663 {
1664     TCGv_i64 dst;
1665 
1666     dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m);
1667     gen_a64_set_pc(s, dst);
1668     s->base.is_jmp = DISAS_JUMP;
1669     return true;
1670 }
1671 
1672 static bool trans_BRA(DisasContext *s, arg_bra *a)
1673 {
1674     TCGv_i64 dst;
1675 
1676     if (!dc_isar_feature(aa64_pauth, s)) {
1677         return false;
1678     }
1679     dst = auth_branch_target(s, cpu_reg(s,a->rn), cpu_reg_sp(s, a->rm), !a->m);
1680     gen_a64_set_pc(s, dst);
1681     set_btype_for_br(s, a->rn);
1682     s->base.is_jmp = DISAS_JUMP;
1683     return true;
1684 }
1685 
1686 static bool trans_BLRA(DisasContext *s, arg_bra *a)
1687 {
1688     TCGv_i64 dst, lr;
1689 
1690     if (!dc_isar_feature(aa64_pauth, s)) {
1691         return false;
1692     }
1693     dst = auth_branch_target(s, cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm), !a->m);
1694     lr = cpu_reg(s, 30);
1695     if (dst == lr) {
1696         TCGv_i64 tmp = tcg_temp_new_i64();
1697         tcg_gen_mov_i64(tmp, dst);
1698         dst = tmp;
1699     }
1700     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1701     gen_a64_set_pc(s, dst);
1702     set_btype_for_blr(s);
1703     s->base.is_jmp = DISAS_JUMP;
1704     return true;
1705 }
1706 
1707 static bool trans_ERET(DisasContext *s, arg_ERET *a)
1708 {
1709     TCGv_i64 dst;
1710 
1711     if (s->current_el == 0) {
1712         return false;
1713     }
1714     if (s->trap_eret) {
1715         gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(0), 2);
1716         return true;
1717     }
1718     dst = tcg_temp_new_i64();
1719     tcg_gen_ld_i64(dst, tcg_env,
1720                    offsetof(CPUARMState, elr_el[s->current_el]));
1721 
1722     translator_io_start(&s->base);
1723 
1724     gen_helper_exception_return(tcg_env, dst);
1725     /* Must exit loop to check un-masked IRQs */
1726     s->base.is_jmp = DISAS_EXIT;
1727     return true;
1728 }
1729 
1730 static bool trans_ERETA(DisasContext *s, arg_reta *a)
1731 {
1732     TCGv_i64 dst;
1733 
1734     if (!dc_isar_feature(aa64_pauth, s)) {
1735         return false;
1736     }
1737     if (s->current_el == 0) {
1738         return false;
1739     }
1740     /* The FGT trap takes precedence over an auth trap. */
1741     if (s->trap_eret) {
1742         gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(a->m ? 3 : 2), 2);
1743         return true;
1744     }
1745     dst = tcg_temp_new_i64();
1746     tcg_gen_ld_i64(dst, tcg_env,
1747                    offsetof(CPUARMState, elr_el[s->current_el]));
1748 
1749     dst = auth_branch_target(s, dst, cpu_X[31], !a->m);
1750 
1751     translator_io_start(&s->base);
1752 
1753     gen_helper_exception_return(tcg_env, dst);
1754     /* Must exit loop to check un-masked IRQs */
1755     s->base.is_jmp = DISAS_EXIT;
1756     return true;
1757 }
1758 
1759 static bool trans_NOP(DisasContext *s, arg_NOP *a)
1760 {
1761     return true;
1762 }
1763 
1764 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
1765 {
1766     /*
1767      * When running in MTTCG we don't generate jumps to the yield and
1768      * WFE helpers as it won't affect the scheduling of other vCPUs.
1769      * If we wanted to more completely model WFE/SEV so we don't busy
1770      * spin unnecessarily we would need to do something more involved.
1771      */
1772     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1773         s->base.is_jmp = DISAS_YIELD;
1774     }
1775     return true;
1776 }
1777 
1778 static bool trans_WFI(DisasContext *s, arg_WFI *a)
1779 {
1780     s->base.is_jmp = DISAS_WFI;
1781     return true;
1782 }
1783 
1784 static bool trans_WFE(DisasContext *s, arg_WFI *a)
1785 {
1786     /*
1787      * When running in MTTCG we don't generate jumps to the yield and
1788      * WFE helpers as it won't affect the scheduling of other vCPUs.
1789      * If we wanted to more completely model WFE/SEV so we don't busy
1790      * spin unnecessarily we would need to do something more involved.
1791      */
1792     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1793         s->base.is_jmp = DISAS_WFE;
1794     }
1795     return true;
1796 }
1797 
1798 static bool trans_WFIT(DisasContext *s, arg_WFIT *a)
1799 {
1800     if (!dc_isar_feature(aa64_wfxt, s)) {
1801         return false;
1802     }
1803 
1804     /*
1805      * Because we need to pass the register value to the helper,
1806      * it's easier to emit the code now, unlike trans_WFI which
1807      * defers it to aarch64_tr_tb_stop(). That means we need to
1808      * check ss_active so that single-stepping a WFIT doesn't halt.
1809      */
1810     if (s->ss_active) {
1811         /* Act like a NOP under architectural singlestep */
1812         return true;
1813     }
1814 
1815     gen_a64_update_pc(s, 4);
1816     gen_helper_wfit(tcg_env, cpu_reg(s, a->rd));
1817     /* Go back to the main loop to check for interrupts */
1818     s->base.is_jmp = DISAS_EXIT;
1819     return true;
1820 }
1821 
1822 static bool trans_WFET(DisasContext *s, arg_WFET *a)
1823 {
1824     if (!dc_isar_feature(aa64_wfxt, s)) {
1825         return false;
1826     }
1827 
1828     /*
1829      * We rely here on our WFE implementation being a NOP, so we
1830      * don't need to do anything different to handle the WFET timeout
1831      * from what trans_WFE does.
1832      */
1833     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1834         s->base.is_jmp = DISAS_WFE;
1835     }
1836     return true;
1837 }
1838 
1839 static bool trans_XPACLRI(DisasContext *s, arg_XPACLRI *a)
1840 {
1841     if (s->pauth_active) {
1842         gen_helper_xpaci(cpu_X[30], tcg_env, cpu_X[30]);
1843     }
1844     return true;
1845 }
1846 
1847 static bool trans_PACIA1716(DisasContext *s, arg_PACIA1716 *a)
1848 {
1849     if (s->pauth_active) {
1850         gen_helper_pacia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
1851     }
1852     return true;
1853 }
1854 
1855 static bool trans_PACIB1716(DisasContext *s, arg_PACIB1716 *a)
1856 {
1857     if (s->pauth_active) {
1858         gen_helper_pacib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
1859     }
1860     return true;
1861 }
1862 
1863 static bool trans_AUTIA1716(DisasContext *s, arg_AUTIA1716 *a)
1864 {
1865     if (s->pauth_active) {
1866         gen_helper_autia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
1867     }
1868     return true;
1869 }
1870 
1871 static bool trans_AUTIB1716(DisasContext *s, arg_AUTIB1716 *a)
1872 {
1873     if (s->pauth_active) {
1874         gen_helper_autib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
1875     }
1876     return true;
1877 }
1878 
1879 static bool trans_ESB(DisasContext *s, arg_ESB *a)
1880 {
1881     /* Without RAS, we must implement this as NOP. */
1882     if (dc_isar_feature(aa64_ras, s)) {
1883         /*
1884          * QEMU does not have a source of physical SErrors,
1885          * so we are only concerned with virtual SErrors.
1886          * The pseudocode in the ARM for this case is
1887          *   if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
1888          *      AArch64.vESBOperation();
1889          * Most of the condition can be evaluated at translation time.
1890          * Test for EL2 present, and defer test for SEL2 to runtime.
1891          */
1892         if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
1893             gen_helper_vesb(tcg_env);
1894         }
1895     }
1896     return true;
1897 }
1898 
1899 static bool trans_PACIAZ(DisasContext *s, arg_PACIAZ *a)
1900 {
1901     if (s->pauth_active) {
1902         gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
1903     }
1904     return true;
1905 }
1906 
1907 static bool trans_PACIASP(DisasContext *s, arg_PACIASP *a)
1908 {
1909     if (s->pauth_active) {
1910         gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
1911     }
1912     return true;
1913 }
1914 
1915 static bool trans_PACIBZ(DisasContext *s, arg_PACIBZ *a)
1916 {
1917     if (s->pauth_active) {
1918         gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
1919     }
1920     return true;
1921 }
1922 
1923 static bool trans_PACIBSP(DisasContext *s, arg_PACIBSP *a)
1924 {
1925     if (s->pauth_active) {
1926         gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
1927     }
1928     return true;
1929 }
1930 
1931 static bool trans_AUTIAZ(DisasContext *s, arg_AUTIAZ *a)
1932 {
1933     if (s->pauth_active) {
1934         gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
1935     }
1936     return true;
1937 }
1938 
1939 static bool trans_AUTIASP(DisasContext *s, arg_AUTIASP *a)
1940 {
1941     if (s->pauth_active) {
1942         gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
1943     }
1944     return true;
1945 }
1946 
1947 static bool trans_AUTIBZ(DisasContext *s, arg_AUTIBZ *a)
1948 {
1949     if (s->pauth_active) {
1950         gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
1951     }
1952     return true;
1953 }
1954 
1955 static bool trans_AUTIBSP(DisasContext *s, arg_AUTIBSP *a)
1956 {
1957     if (s->pauth_active) {
1958         gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
1959     }
1960     return true;
1961 }
1962 
1963 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
1964 {
1965     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1966     return true;
1967 }
1968 
1969 static bool trans_DSB_DMB(DisasContext *s, arg_DSB_DMB *a)
1970 {
1971     /* We handle DSB and DMB the same way */
1972     TCGBar bar;
1973 
1974     switch (a->types) {
1975     case 1: /* MBReqTypes_Reads */
1976         bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
1977         break;
1978     case 2: /* MBReqTypes_Writes */
1979         bar = TCG_BAR_SC | TCG_MO_ST_ST;
1980         break;
1981     default: /* MBReqTypes_All */
1982         bar = TCG_BAR_SC | TCG_MO_ALL;
1983         break;
1984     }
1985     tcg_gen_mb(bar);
1986     return true;
1987 }
1988 
1989 static bool trans_DSB_nXS(DisasContext *s, arg_DSB_nXS *a)
1990 {
1991     if (!dc_isar_feature(aa64_xs, s)) {
1992         return false;
1993     }
1994     tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL);
1995     return true;
1996 }
1997 
1998 static bool trans_ISB(DisasContext *s, arg_ISB *a)
1999 {
2000     /*
2001      * We need to break the TB after this insn to execute
2002      * self-modifying code correctly and also to take
2003      * any pending interrupts immediately.
2004      */
2005     reset_btype(s);
2006     gen_goto_tb(s, 0, 4);
2007     return true;
2008 }
2009 
2010 static bool trans_SB(DisasContext *s, arg_SB *a)
2011 {
2012     if (!dc_isar_feature(aa64_sb, s)) {
2013         return false;
2014     }
2015     /*
2016      * TODO: There is no speculation barrier opcode for TCG;
2017      * MB and end the TB instead.
2018      */
2019     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
2020     gen_goto_tb(s, 0, 4);
2021     return true;
2022 }
2023 
2024 static bool trans_CFINV(DisasContext *s, arg_CFINV *a)
2025 {
2026     if (!dc_isar_feature(aa64_condm_4, s)) {
2027         return false;
2028     }
2029     tcg_gen_xori_i32(cpu_CF, cpu_CF, 1);
2030     return true;
2031 }
2032 
2033 static bool trans_XAFLAG(DisasContext *s, arg_XAFLAG *a)
2034 {
2035     TCGv_i32 z;
2036 
2037     if (!dc_isar_feature(aa64_condm_5, s)) {
2038         return false;
2039     }
2040 
2041     z = tcg_temp_new_i32();
2042 
2043     tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0);
2044 
2045     /*
2046      * (!C & !Z) << 31
2047      * (!(C | Z)) << 31
2048      * ~((C | Z) << 31)
2049      * ~-(C | Z)
2050      * (C | Z) - 1
2051      */
2052     tcg_gen_or_i32(cpu_NF, cpu_CF, z);
2053     tcg_gen_subi_i32(cpu_NF, cpu_NF, 1);
2054 
2055     /* !(Z & C) */
2056     tcg_gen_and_i32(cpu_ZF, z, cpu_CF);
2057     tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1);
2058 
2059     /* (!C & Z) << 31 -> -(Z & ~C) */
2060     tcg_gen_andc_i32(cpu_VF, z, cpu_CF);
2061     tcg_gen_neg_i32(cpu_VF, cpu_VF);
2062 
2063     /* C | Z */
2064     tcg_gen_or_i32(cpu_CF, cpu_CF, z);
2065 
2066     return true;
2067 }
2068 
2069 static bool trans_AXFLAG(DisasContext *s, arg_AXFLAG *a)
2070 {
2071     if (!dc_isar_feature(aa64_condm_5, s)) {
2072         return false;
2073     }
2074 
2075     tcg_gen_sari_i32(cpu_VF, cpu_VF, 31);         /* V ? -1 : 0 */
2076     tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF);     /* C & !V */
2077 
2078     /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */
2079     tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF);
2080 
2081     tcg_gen_movi_i32(cpu_NF, 0);
2082     tcg_gen_movi_i32(cpu_VF, 0);
2083 
2084     return true;
2085 }
2086 
2087 static bool trans_MSR_i_UAO(DisasContext *s, arg_i *a)
2088 {
2089     if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) {
2090         return false;
2091     }
2092     if (a->imm & 1) {
2093         set_pstate_bits(PSTATE_UAO);
2094     } else {
2095         clear_pstate_bits(PSTATE_UAO);
2096     }
2097     gen_rebuild_hflags(s);
2098     s->base.is_jmp = DISAS_TOO_MANY;
2099     return true;
2100 }
2101 
2102 static bool trans_MSR_i_PAN(DisasContext *s, arg_i *a)
2103 {
2104     if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) {
2105         return false;
2106     }
2107     if (a->imm & 1) {
2108         set_pstate_bits(PSTATE_PAN);
2109     } else {
2110         clear_pstate_bits(PSTATE_PAN);
2111     }
2112     gen_rebuild_hflags(s);
2113     s->base.is_jmp = DISAS_TOO_MANY;
2114     return true;
2115 }
2116 
2117 static bool trans_MSR_i_SPSEL(DisasContext *s, arg_i *a)
2118 {
2119     if (s->current_el == 0) {
2120         return false;
2121     }
2122     gen_helper_msr_i_spsel(tcg_env, tcg_constant_i32(a->imm & PSTATE_SP));
2123     s->base.is_jmp = DISAS_TOO_MANY;
2124     return true;
2125 }
2126 
2127 static bool trans_MSR_i_SBSS(DisasContext *s, arg_i *a)
2128 {
2129     if (!dc_isar_feature(aa64_ssbs, s)) {
2130         return false;
2131     }
2132     if (a->imm & 1) {
2133         set_pstate_bits(PSTATE_SSBS);
2134     } else {
2135         clear_pstate_bits(PSTATE_SSBS);
2136     }
2137     /* Don't need to rebuild hflags since SSBS is a nop */
2138     s->base.is_jmp = DISAS_TOO_MANY;
2139     return true;
2140 }
2141 
2142 static bool trans_MSR_i_DIT(DisasContext *s, arg_i *a)
2143 {
2144     if (!dc_isar_feature(aa64_dit, s)) {
2145         return false;
2146     }
2147     if (a->imm & 1) {
2148         set_pstate_bits(PSTATE_DIT);
2149     } else {
2150         clear_pstate_bits(PSTATE_DIT);
2151     }
2152     /* There's no need to rebuild hflags because DIT is a nop */
2153     s->base.is_jmp = DISAS_TOO_MANY;
2154     return true;
2155 }
2156 
2157 static bool trans_MSR_i_TCO(DisasContext *s, arg_i *a)
2158 {
2159     if (dc_isar_feature(aa64_mte, s)) {
2160         /* Full MTE is enabled -- set the TCO bit as directed. */
2161         if (a->imm & 1) {
2162             set_pstate_bits(PSTATE_TCO);
2163         } else {
2164             clear_pstate_bits(PSTATE_TCO);
2165         }
2166         gen_rebuild_hflags(s);
2167         /* Many factors, including TCO, go into MTE_ACTIVE. */
2168         s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
2169         return true;
2170     } else if (dc_isar_feature(aa64_mte_insn_reg, s)) {
2171         /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI.  */
2172         return true;
2173     } else {
2174         /* Insn not present */
2175         return false;
2176     }
2177 }
2178 
2179 static bool trans_MSR_i_DAIFSET(DisasContext *s, arg_i *a)
2180 {
2181     gen_helper_msr_i_daifset(tcg_env, tcg_constant_i32(a->imm));
2182     s->base.is_jmp = DISAS_TOO_MANY;
2183     return true;
2184 }
2185 
2186 static bool trans_MSR_i_DAIFCLEAR(DisasContext *s, arg_i *a)
2187 {
2188     gen_helper_msr_i_daifclear(tcg_env, tcg_constant_i32(a->imm));
2189     /* Exit the cpu loop to re-evaluate pending IRQs. */
2190     s->base.is_jmp = DISAS_UPDATE_EXIT;
2191     return true;
2192 }
2193 
2194 static bool trans_MSR_i_ALLINT(DisasContext *s, arg_i *a)
2195 {
2196     if (!dc_isar_feature(aa64_nmi, s) || s->current_el == 0) {
2197         return false;
2198     }
2199 
2200     if (a->imm == 0) {
2201         clear_pstate_bits(PSTATE_ALLINT);
2202     } else if (s->current_el > 1) {
2203         set_pstate_bits(PSTATE_ALLINT);
2204     } else {
2205         gen_helper_msr_set_allint_el1(tcg_env);
2206     }
2207 
2208     /* Exit the cpu loop to re-evaluate pending IRQs. */
2209     s->base.is_jmp = DISAS_UPDATE_EXIT;
2210     return true;
2211 }
2212 
2213 static bool trans_MSR_i_SVCR(DisasContext *s, arg_MSR_i_SVCR *a)
2214 {
2215     if (!dc_isar_feature(aa64_sme, s) || a->mask == 0) {
2216         return false;
2217     }
2218     if (sme_access_check(s)) {
2219         int old = s->pstate_sm | (s->pstate_za << 1);
2220         int new = a->imm * 3;
2221 
2222         if ((old ^ new) & a->mask) {
2223             /* At least one bit changes. */
2224             gen_helper_set_svcr(tcg_env, tcg_constant_i32(new),
2225                                 tcg_constant_i32(a->mask));
2226             s->base.is_jmp = DISAS_TOO_MANY;
2227         }
2228     }
2229     return true;
2230 }
2231 
2232 static void gen_get_nzcv(TCGv_i64 tcg_rt)
2233 {
2234     TCGv_i32 tmp = tcg_temp_new_i32();
2235     TCGv_i32 nzcv = tcg_temp_new_i32();
2236 
2237     /* build bit 31, N */
2238     tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
2239     /* build bit 30, Z */
2240     tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
2241     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
2242     /* build bit 29, C */
2243     tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
2244     /* build bit 28, V */
2245     tcg_gen_shri_i32(tmp, cpu_VF, 31);
2246     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
2247     /* generate result */
2248     tcg_gen_extu_i32_i64(tcg_rt, nzcv);
2249 }
2250 
2251 static void gen_set_nzcv(TCGv_i64 tcg_rt)
2252 {
2253     TCGv_i32 nzcv = tcg_temp_new_i32();
2254 
2255     /* take NZCV from R[t] */
2256     tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
2257 
2258     /* bit 31, N */
2259     tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
2260     /* bit 30, Z */
2261     tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
2262     tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
2263     /* bit 29, C */
2264     tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
2265     tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
2266     /* bit 28, V */
2267     tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
2268     tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
2269 }
2270 
2271 static void gen_sysreg_undef(DisasContext *s, bool isread,
2272                              uint8_t op0, uint8_t op1, uint8_t op2,
2273                              uint8_t crn, uint8_t crm, uint8_t rt)
2274 {
2275     /*
2276      * Generate code to emit an UNDEF with correct syndrome
2277      * information for a failed system register access.
2278      * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases,
2279      * but if FEAT_IDST is implemented then read accesses to registers
2280      * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP
2281      * syndrome.
2282      */
2283     uint32_t syndrome;
2284 
2285     if (isread && dc_isar_feature(aa64_ids, s) &&
2286         arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) {
2287         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2288     } else {
2289         syndrome = syn_uncategorized();
2290     }
2291     gen_exception_insn(s, 0, EXCP_UDEF, syndrome);
2292 }
2293 
2294 /* MRS - move from system register
2295  * MSR (register) - move to system register
2296  * SYS
2297  * SYSL
2298  * These are all essentially the same insn in 'read' and 'write'
2299  * versions, with varying op0 fields.
2300  */
2301 static void handle_sys(DisasContext *s, bool isread,
2302                        unsigned int op0, unsigned int op1, unsigned int op2,
2303                        unsigned int crn, unsigned int crm, unsigned int rt)
2304 {
2305     uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
2306                                       crn, crm, op0, op1, op2);
2307     const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
2308     bool need_exit_tb = false;
2309     bool nv_trap_to_el2 = false;
2310     bool nv_redirect_reg = false;
2311     bool skip_fp_access_checks = false;
2312     bool nv2_mem_redirect = false;
2313     TCGv_ptr tcg_ri = NULL;
2314     TCGv_i64 tcg_rt;
2315     uint32_t syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2316 
2317     if (crn == 11 || crn == 15) {
2318         /*
2319          * Check for TIDCP trap, which must take precedence over
2320          * the UNDEF for "no such register" etc.
2321          */
2322         switch (s->current_el) {
2323         case 0:
2324             if (dc_isar_feature(aa64_tidcp1, s)) {
2325                 gen_helper_tidcp_el0(tcg_env, tcg_constant_i32(syndrome));
2326             }
2327             break;
2328         case 1:
2329             gen_helper_tidcp_el1(tcg_env, tcg_constant_i32(syndrome));
2330             break;
2331         }
2332     }
2333 
2334     if (!ri) {
2335         /* Unknown register; this might be a guest error or a QEMU
2336          * unimplemented feature.
2337          */
2338         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
2339                       "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
2340                       isread ? "read" : "write", op0, op1, crn, crm, op2);
2341         gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2342         return;
2343     }
2344 
2345     if (s->nv2 && ri->nv2_redirect_offset) {
2346         /*
2347          * Some registers always redirect to memory; some only do so if
2348          * HCR_EL2.NV1 is 0, and some only if NV1 is 1 (these come in
2349          * pairs which share an offset; see the table in R_CSRPQ).
2350          */
2351         if (ri->nv2_redirect_offset & NV2_REDIR_NV1) {
2352             nv2_mem_redirect = s->nv1;
2353         } else if (ri->nv2_redirect_offset & NV2_REDIR_NO_NV1) {
2354             nv2_mem_redirect = !s->nv1;
2355         } else {
2356             nv2_mem_redirect = true;
2357         }
2358     }
2359 
2360     /* Check access permissions */
2361     if (!cp_access_ok(s->current_el, ri, isread)) {
2362         /*
2363          * FEAT_NV/NV2 handling does not do the usual FP access checks
2364          * for registers only accessible at EL2 (though it *does* do them
2365          * for registers accessible at EL1).
2366          */
2367         skip_fp_access_checks = true;
2368         if (s->nv2 && (ri->type & ARM_CP_NV2_REDIRECT)) {
2369             /*
2370              * This is one of the few EL2 registers which should redirect
2371              * to the equivalent EL1 register. We do that after running
2372              * the EL2 register's accessfn.
2373              */
2374             nv_redirect_reg = true;
2375             assert(!nv2_mem_redirect);
2376         } else if (nv2_mem_redirect) {
2377             /*
2378              * NV2 redirect-to-memory takes precedence over trap to EL2 or
2379              * UNDEF to EL1.
2380              */
2381         } else if (s->nv && arm_cpreg_traps_in_nv(ri)) {
2382             /*
2383              * This register / instruction exists and is an EL2 register, so
2384              * we must trap to EL2 if accessed in nested virtualization EL1
2385              * instead of UNDEFing. We'll do that after the usual access checks.
2386              * (This makes a difference only for a couple of registers like
2387              * VSTTBR_EL2 where the "UNDEF if NonSecure" should take priority
2388              * over the trap-to-EL2. Most trapped-by-FEAT_NV registers have
2389              * an accessfn which does nothing when called from EL1, because
2390              * the trap-to-EL3 controls which would apply to that register
2391              * at EL2 don't take priority over the FEAT_NV trap-to-EL2.)
2392              */
2393             nv_trap_to_el2 = true;
2394         } else {
2395             gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2396             return;
2397         }
2398     }
2399 
2400     if (ri->accessfn || (ri->fgt && s->fgt_active)) {
2401         /* Emit code to perform further access permissions checks at
2402          * runtime; this may result in an exception.
2403          */
2404         gen_a64_update_pc(s, 0);
2405         tcg_ri = tcg_temp_new_ptr();
2406         gen_helper_access_check_cp_reg(tcg_ri, tcg_env,
2407                                        tcg_constant_i32(key),
2408                                        tcg_constant_i32(syndrome),
2409                                        tcg_constant_i32(isread));
2410     } else if (ri->type & ARM_CP_RAISES_EXC) {
2411         /*
2412          * The readfn or writefn might raise an exception;
2413          * synchronize the CPU state in case it does.
2414          */
2415         gen_a64_update_pc(s, 0);
2416     }
2417 
2418     if (!skip_fp_access_checks) {
2419         if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) {
2420             return;
2421         } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
2422             return;
2423         } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) {
2424             return;
2425         }
2426     }
2427 
2428     if (nv_trap_to_el2) {
2429         gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
2430         return;
2431     }
2432 
2433     if (nv_redirect_reg) {
2434         /*
2435          * FEAT_NV2 redirection of an EL2 register to an EL1 register.
2436          * Conveniently in all cases the encoding of the EL1 register is
2437          * identical to the EL2 register except that opc1 is 0.
2438          * Get the reginfo for the EL1 register to use for the actual access.
2439          * We don't use the EL1 register's access function, and
2440          * fine-grained-traps on EL1 also do not apply here.
2441          */
2442         key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
2443                                  crn, crm, op0, 0, op2);
2444         ri = get_arm_cp_reginfo(s->cp_regs, key);
2445         assert(ri);
2446         assert(cp_access_ok(s->current_el, ri, isread));
2447         /*
2448          * We might not have done an update_pc earlier, so check we don't
2449          * need it. We could support this in future if necessary.
2450          */
2451         assert(!(ri->type & ARM_CP_RAISES_EXC));
2452     }
2453 
2454     if (nv2_mem_redirect) {
2455         /*
2456          * This system register is being redirected into an EL2 memory access.
2457          * This means it is not an IO operation, doesn't change hflags,
2458          * and need not end the TB, because it has no side effects.
2459          *
2460          * The access is 64-bit single copy atomic, guaranteed aligned because
2461          * of the definition of VCNR_EL2. Its endianness depends on
2462          * SCTLR_EL2.EE, not on the data endianness of EL1.
2463          * It is done under either the EL2 translation regime or the EL2&0
2464          * translation regime, depending on HCR_EL2.E2H. It behaves as if
2465          * PSTATE.PAN is 0.
2466          */
2467         TCGv_i64 ptr = tcg_temp_new_i64();
2468         MemOp mop = MO_64 | MO_ALIGN | MO_ATOM_IFALIGN;
2469         ARMMMUIdx armmemidx = s->nv2_mem_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_E2;
2470         int memidx = arm_to_core_mmu_idx(armmemidx);
2471         uint32_t syn;
2472 
2473         mop |= (s->nv2_mem_be ? MO_BE : MO_LE);
2474 
2475         tcg_gen_ld_i64(ptr, tcg_env, offsetof(CPUARMState, cp15.vncr_el2));
2476         tcg_gen_addi_i64(ptr, ptr,
2477                          (ri->nv2_redirect_offset & ~NV2_REDIR_FLAG_MASK));
2478         tcg_rt = cpu_reg(s, rt);
2479 
2480         syn = syn_data_abort_vncr(0, !isread, 0);
2481         disas_set_insn_syndrome(s, syn);
2482         if (isread) {
2483             tcg_gen_qemu_ld_i64(tcg_rt, ptr, memidx, mop);
2484         } else {
2485             tcg_gen_qemu_st_i64(tcg_rt, ptr, memidx, mop);
2486         }
2487         return;
2488     }
2489 
2490     /* Handle special cases first */
2491     switch (ri->type & ARM_CP_SPECIAL_MASK) {
2492     case 0:
2493         break;
2494     case ARM_CP_NOP:
2495         return;
2496     case ARM_CP_NZCV:
2497         tcg_rt = cpu_reg(s, rt);
2498         if (isread) {
2499             gen_get_nzcv(tcg_rt);
2500         } else {
2501             gen_set_nzcv(tcg_rt);
2502         }
2503         return;
2504     case ARM_CP_CURRENTEL:
2505     {
2506         /*
2507          * Reads as current EL value from pstate, which is
2508          * guaranteed to be constant by the tb flags.
2509          * For nested virt we should report EL2.
2510          */
2511         int el = s->nv ? 2 : s->current_el;
2512         tcg_rt = cpu_reg(s, rt);
2513         tcg_gen_movi_i64(tcg_rt, el << 2);
2514         return;
2515     }
2516     case ARM_CP_DC_ZVA:
2517         /* Writes clear the aligned block of memory which rt points into. */
2518         if (s->mte_active[0]) {
2519             int desc = 0;
2520 
2521             desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
2522             desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
2523             desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
2524 
2525             tcg_rt = tcg_temp_new_i64();
2526             gen_helper_mte_check_zva(tcg_rt, tcg_env,
2527                                      tcg_constant_i32(desc), cpu_reg(s, rt));
2528         } else {
2529             tcg_rt = clean_data_tbi(s, cpu_reg(s, rt));
2530         }
2531         gen_helper_dc_zva(tcg_env, tcg_rt);
2532         return;
2533     case ARM_CP_DC_GVA:
2534         {
2535             TCGv_i64 clean_addr, tag;
2536 
2537             /*
2538              * DC_GVA, like DC_ZVA, requires that we supply the original
2539              * pointer for an invalid page.  Probe that address first.
2540              */
2541             tcg_rt = cpu_reg(s, rt);
2542             clean_addr = clean_data_tbi(s, tcg_rt);
2543             gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8);
2544 
2545             if (s->ata[0]) {
2546                 /* Extract the tag from the register to match STZGM.  */
2547                 tag = tcg_temp_new_i64();
2548                 tcg_gen_shri_i64(tag, tcg_rt, 56);
2549                 gen_helper_stzgm_tags(tcg_env, clean_addr, tag);
2550             }
2551         }
2552         return;
2553     case ARM_CP_DC_GZVA:
2554         {
2555             TCGv_i64 clean_addr, tag;
2556 
2557             /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */
2558             tcg_rt = cpu_reg(s, rt);
2559             clean_addr = clean_data_tbi(s, tcg_rt);
2560             gen_helper_dc_zva(tcg_env, clean_addr);
2561 
2562             if (s->ata[0]) {
2563                 /* Extract the tag from the register to match STZGM.  */
2564                 tag = tcg_temp_new_i64();
2565                 tcg_gen_shri_i64(tag, tcg_rt, 56);
2566                 gen_helper_stzgm_tags(tcg_env, clean_addr, tag);
2567             }
2568         }
2569         return;
2570     default:
2571         g_assert_not_reached();
2572     }
2573 
2574     if (ri->type & ARM_CP_IO) {
2575         /* I/O operations must end the TB here (whether read or write) */
2576         need_exit_tb = translator_io_start(&s->base);
2577     }
2578 
2579     tcg_rt = cpu_reg(s, rt);
2580 
2581     if (isread) {
2582         if (ri->type & ARM_CP_CONST) {
2583             tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
2584         } else if (ri->readfn) {
2585             if (!tcg_ri) {
2586                 tcg_ri = gen_lookup_cp_reg(key);
2587             }
2588             gen_helper_get_cp_reg64(tcg_rt, tcg_env, tcg_ri);
2589         } else {
2590             tcg_gen_ld_i64(tcg_rt, tcg_env, ri->fieldoffset);
2591         }
2592     } else {
2593         if (ri->type & ARM_CP_CONST) {
2594             /* If not forbidden by access permissions, treat as WI */
2595             return;
2596         } else if (ri->writefn) {
2597             if (!tcg_ri) {
2598                 tcg_ri = gen_lookup_cp_reg(key);
2599             }
2600             gen_helper_set_cp_reg64(tcg_env, tcg_ri, tcg_rt);
2601         } else {
2602             tcg_gen_st_i64(tcg_rt, tcg_env, ri->fieldoffset);
2603         }
2604     }
2605 
2606     if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
2607         /*
2608          * A write to any coprocessor register that ends a TB
2609          * must rebuild the hflags for the next TB.
2610          */
2611         gen_rebuild_hflags(s);
2612         /*
2613          * We default to ending the TB on a coprocessor register write,
2614          * but allow this to be suppressed by the register definition
2615          * (usually only necessary to work around guest bugs).
2616          */
2617         need_exit_tb = true;
2618     }
2619     if (need_exit_tb) {
2620         s->base.is_jmp = DISAS_UPDATE_EXIT;
2621     }
2622 }
2623 
2624 static bool trans_SYS(DisasContext *s, arg_SYS *a)
2625 {
2626     handle_sys(s, a->l, a->op0, a->op1, a->op2, a->crn, a->crm, a->rt);
2627     return true;
2628 }
2629 
2630 static bool trans_SVC(DisasContext *s, arg_i *a)
2631 {
2632     /*
2633      * For SVC, HVC and SMC we advance the single-step state
2634      * machine before taking the exception. This is architecturally
2635      * mandated, to ensure that single-stepping a system call
2636      * instruction works properly.
2637      */
2638     uint32_t syndrome = syn_aa64_svc(a->imm);
2639     if (s->fgt_svc) {
2640         gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
2641         return true;
2642     }
2643     gen_ss_advance(s);
2644     gen_exception_insn(s, 4, EXCP_SWI, syndrome);
2645     return true;
2646 }
2647 
2648 static bool trans_HVC(DisasContext *s, arg_i *a)
2649 {
2650     int target_el = s->current_el == 3 ? 3 : 2;
2651 
2652     if (s->current_el == 0) {
2653         unallocated_encoding(s);
2654         return true;
2655     }
2656     /*
2657      * The pre HVC helper handles cases when HVC gets trapped
2658      * as an undefined insn by runtime configuration.
2659      */
2660     gen_a64_update_pc(s, 0);
2661     gen_helper_pre_hvc(tcg_env);
2662     /* Architecture requires ss advance before we do the actual work */
2663     gen_ss_advance(s);
2664     gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(a->imm), target_el);
2665     return true;
2666 }
2667 
2668 static bool trans_SMC(DisasContext *s, arg_i *a)
2669 {
2670     if (s->current_el == 0) {
2671         unallocated_encoding(s);
2672         return true;
2673     }
2674     gen_a64_update_pc(s, 0);
2675     gen_helper_pre_smc(tcg_env, tcg_constant_i32(syn_aa64_smc(a->imm)));
2676     /* Architecture requires ss advance before we do the actual work */
2677     gen_ss_advance(s);
2678     gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(a->imm), 3);
2679     return true;
2680 }
2681 
2682 static bool trans_BRK(DisasContext *s, arg_i *a)
2683 {
2684     gen_exception_bkpt_insn(s, syn_aa64_bkpt(a->imm));
2685     return true;
2686 }
2687 
2688 static bool trans_HLT(DisasContext *s, arg_i *a)
2689 {
2690     /*
2691      * HLT. This has two purposes.
2692      * Architecturally, it is an external halting debug instruction.
2693      * Since QEMU doesn't implement external debug, we treat this as
2694      * it is required for halting debug disabled: it will UNDEF.
2695      * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
2696      */
2697     if (semihosting_enabled(s->current_el == 0) && a->imm == 0xf000) {
2698         gen_exception_internal_insn(s, EXCP_SEMIHOST);
2699     } else {
2700         unallocated_encoding(s);
2701     }
2702     return true;
2703 }
2704 
2705 /*
2706  * Load/Store exclusive instructions are implemented by remembering
2707  * the value/address loaded, and seeing if these are the same
2708  * when the store is performed. This is not actually the architecturally
2709  * mandated semantics, but it works for typical guest code sequences
2710  * and avoids having to monitor regular stores.
2711  *
2712  * The store exclusive uses the atomic cmpxchg primitives to avoid
2713  * races in multi-threaded linux-user and when MTTCG softmmu is
2714  * enabled.
2715  */
2716 static void gen_load_exclusive(DisasContext *s, int rt, int rt2, int rn,
2717                                int size, bool is_pair)
2718 {
2719     int idx = get_mem_index(s);
2720     TCGv_i64 dirty_addr, clean_addr;
2721     MemOp memop = check_atomic_align(s, rn, size + is_pair);
2722 
2723     s->is_ldex = true;
2724     dirty_addr = cpu_reg_sp(s, rn);
2725     clean_addr = gen_mte_check1(s, dirty_addr, false, rn != 31, memop);
2726 
2727     g_assert(size <= 3);
2728     if (is_pair) {
2729         g_assert(size >= 2);
2730         if (size == 2) {
2731             tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
2732             if (s->be_data == MO_LE) {
2733                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
2734                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
2735             } else {
2736                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
2737                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
2738             }
2739         } else {
2740             TCGv_i128 t16 = tcg_temp_new_i128();
2741 
2742             tcg_gen_qemu_ld_i128(t16, clean_addr, idx, memop);
2743 
2744             if (s->be_data == MO_LE) {
2745                 tcg_gen_extr_i128_i64(cpu_exclusive_val,
2746                                       cpu_exclusive_high, t16);
2747             } else {
2748                 tcg_gen_extr_i128_i64(cpu_exclusive_high,
2749                                       cpu_exclusive_val, t16);
2750             }
2751             tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2752             tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
2753         }
2754     } else {
2755         tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
2756         tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2757     }
2758     tcg_gen_mov_i64(cpu_exclusive_addr, clean_addr);
2759 }
2760 
2761 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
2762                                 int rn, int size, int is_pair)
2763 {
2764     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
2765      *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
2766      *     [addr] = {Rt};
2767      *     if (is_pair) {
2768      *         [addr + datasize] = {Rt2};
2769      *     }
2770      *     {Rd} = 0;
2771      * } else {
2772      *     {Rd} = 1;
2773      * }
2774      * env->exclusive_addr = -1;
2775      */
2776     TCGLabel *fail_label = gen_new_label();
2777     TCGLabel *done_label = gen_new_label();
2778     TCGv_i64 tmp, clean_addr;
2779     MemOp memop;
2780 
2781     /*
2782      * FIXME: We are out of spec here.  We have recorded only the address
2783      * from load_exclusive, not the entire range, and we assume that the
2784      * size of the access on both sides match.  The architecture allows the
2785      * store to be smaller than the load, so long as the stored bytes are
2786      * within the range recorded by the load.
2787      */
2788 
2789     /* See AArch64.ExclusiveMonitorsPass() and AArch64.IsExclusiveVA(). */
2790     clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2791     tcg_gen_brcond_i64(TCG_COND_NE, clean_addr, cpu_exclusive_addr, fail_label);
2792 
2793     /*
2794      * The write, and any associated faults, only happen if the virtual
2795      * and physical addresses pass the exclusive monitor check.  These
2796      * faults are exceedingly unlikely, because normally the guest uses
2797      * the exact same address register for the load_exclusive, and we
2798      * would have recognized these faults there.
2799      *
2800      * It is possible to trigger an alignment fault pre-LSE2, e.g. with an
2801      * unaligned 4-byte write within the range of an aligned 8-byte load.
2802      * With LSE2, the store would need to cross a 16-byte boundary when the
2803      * load did not, which would mean the store is outside the range
2804      * recorded for the monitor, which would have failed a corrected monitor
2805      * check above.  For now, we assume no size change and retain the
2806      * MO_ALIGN to let tcg know what we checked in the load_exclusive.
2807      *
2808      * It is possible to trigger an MTE fault, by performing the load with
2809      * a virtual address with a valid tag and performing the store with the
2810      * same virtual address and a different invalid tag.
2811      */
2812     memop = size + is_pair;
2813     if (memop == MO_128 || !dc_isar_feature(aa64_lse2, s)) {
2814         memop |= MO_ALIGN;
2815     }
2816     memop = finalize_memop(s, memop);
2817     gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
2818 
2819     tmp = tcg_temp_new_i64();
2820     if (is_pair) {
2821         if (size == 2) {
2822             if (s->be_data == MO_LE) {
2823                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
2824             } else {
2825                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
2826             }
2827             tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
2828                                        cpu_exclusive_val, tmp,
2829                                        get_mem_index(s), memop);
2830             tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2831         } else {
2832             TCGv_i128 t16 = tcg_temp_new_i128();
2833             TCGv_i128 c16 = tcg_temp_new_i128();
2834             TCGv_i64 a, b;
2835 
2836             if (s->be_data == MO_LE) {
2837                 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2));
2838                 tcg_gen_concat_i64_i128(c16, cpu_exclusive_val,
2839                                         cpu_exclusive_high);
2840             } else {
2841                 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt));
2842                 tcg_gen_concat_i64_i128(c16, cpu_exclusive_high,
2843                                         cpu_exclusive_val);
2844             }
2845 
2846             tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16,
2847                                         get_mem_index(s), memop);
2848 
2849             a = tcg_temp_new_i64();
2850             b = tcg_temp_new_i64();
2851             if (s->be_data == MO_LE) {
2852                 tcg_gen_extr_i128_i64(a, b, t16);
2853             } else {
2854                 tcg_gen_extr_i128_i64(b, a, t16);
2855             }
2856 
2857             tcg_gen_xor_i64(a, a, cpu_exclusive_val);
2858             tcg_gen_xor_i64(b, b, cpu_exclusive_high);
2859             tcg_gen_or_i64(tmp, a, b);
2860 
2861             tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0);
2862         }
2863     } else {
2864         tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
2865                                    cpu_reg(s, rt), get_mem_index(s), memop);
2866         tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2867     }
2868     tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
2869     tcg_gen_br(done_label);
2870 
2871     gen_set_label(fail_label);
2872     tcg_gen_movi_i64(cpu_reg(s, rd), 1);
2873     gen_set_label(done_label);
2874     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
2875 }
2876 
2877 static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
2878                                  int rn, int size)
2879 {
2880     TCGv_i64 tcg_rs = cpu_reg(s, rs);
2881     TCGv_i64 tcg_rt = cpu_reg(s, rt);
2882     int memidx = get_mem_index(s);
2883     TCGv_i64 clean_addr;
2884     MemOp memop;
2885 
2886     if (rn == 31) {
2887         gen_check_sp_alignment(s);
2888     }
2889     memop = check_atomic_align(s, rn, size);
2890     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
2891     tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt,
2892                                memidx, memop);
2893 }
2894 
2895 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
2896                                       int rn, int size)
2897 {
2898     TCGv_i64 s1 = cpu_reg(s, rs);
2899     TCGv_i64 s2 = cpu_reg(s, rs + 1);
2900     TCGv_i64 t1 = cpu_reg(s, rt);
2901     TCGv_i64 t2 = cpu_reg(s, rt + 1);
2902     TCGv_i64 clean_addr;
2903     int memidx = get_mem_index(s);
2904     MemOp memop;
2905 
2906     if (rn == 31) {
2907         gen_check_sp_alignment(s);
2908     }
2909 
2910     /* This is a single atomic access, despite the "pair". */
2911     memop = check_atomic_align(s, rn, size + 1);
2912     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
2913 
2914     if (size == 2) {
2915         TCGv_i64 cmp = tcg_temp_new_i64();
2916         TCGv_i64 val = tcg_temp_new_i64();
2917 
2918         if (s->be_data == MO_LE) {
2919             tcg_gen_concat32_i64(val, t1, t2);
2920             tcg_gen_concat32_i64(cmp, s1, s2);
2921         } else {
2922             tcg_gen_concat32_i64(val, t2, t1);
2923             tcg_gen_concat32_i64(cmp, s2, s1);
2924         }
2925 
2926         tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx, memop);
2927 
2928         if (s->be_data == MO_LE) {
2929             tcg_gen_extr32_i64(s1, s2, cmp);
2930         } else {
2931             tcg_gen_extr32_i64(s2, s1, cmp);
2932         }
2933     } else {
2934         TCGv_i128 cmp = tcg_temp_new_i128();
2935         TCGv_i128 val = tcg_temp_new_i128();
2936 
2937         if (s->be_data == MO_LE) {
2938             tcg_gen_concat_i64_i128(val, t1, t2);
2939             tcg_gen_concat_i64_i128(cmp, s1, s2);
2940         } else {
2941             tcg_gen_concat_i64_i128(val, t2, t1);
2942             tcg_gen_concat_i64_i128(cmp, s2, s1);
2943         }
2944 
2945         tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx, memop);
2946 
2947         if (s->be_data == MO_LE) {
2948             tcg_gen_extr_i128_i64(s1, s2, cmp);
2949         } else {
2950             tcg_gen_extr_i128_i64(s2, s1, cmp);
2951         }
2952     }
2953 }
2954 
2955 /*
2956  * Compute the ISS.SF bit for syndrome information if an exception
2957  * is taken on a load or store. This indicates whether the instruction
2958  * is accessing a 32-bit or 64-bit register. This logic is derived
2959  * from the ARMv8 specs for LDR (Shared decode for all encodings).
2960  */
2961 static bool ldst_iss_sf(int size, bool sign, bool ext)
2962 {
2963 
2964     if (sign) {
2965         /*
2966          * Signed loads are 64 bit results if we are not going to
2967          * do a zero-extend from 32 to 64 after the load.
2968          * (For a store, sign and ext are always false.)
2969          */
2970         return !ext;
2971     } else {
2972         /* Unsigned loads/stores work at the specified size */
2973         return size == MO_64;
2974     }
2975 }
2976 
2977 static bool trans_STXR(DisasContext *s, arg_stxr *a)
2978 {
2979     if (a->rn == 31) {
2980         gen_check_sp_alignment(s);
2981     }
2982     if (a->lasr) {
2983         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2984     }
2985     gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, false);
2986     return true;
2987 }
2988 
2989 static bool trans_LDXR(DisasContext *s, arg_stxr *a)
2990 {
2991     if (a->rn == 31) {
2992         gen_check_sp_alignment(s);
2993     }
2994     gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, false);
2995     if (a->lasr) {
2996         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2997     }
2998     return true;
2999 }
3000 
3001 static bool trans_STLR(DisasContext *s, arg_stlr *a)
3002 {
3003     TCGv_i64 clean_addr;
3004     MemOp memop;
3005     bool iss_sf = ldst_iss_sf(a->sz, false, false);
3006 
3007     /*
3008      * StoreLORelease is the same as Store-Release for QEMU, but
3009      * needs the feature-test.
3010      */
3011     if (!a->lasr && !dc_isar_feature(aa64_lor, s)) {
3012         return false;
3013     }
3014     /* Generate ISS for non-exclusive accesses including LASR.  */
3015     if (a->rn == 31) {
3016         gen_check_sp_alignment(s);
3017     }
3018     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3019     memop = check_ordered_align(s, a->rn, 0, true, a->sz);
3020     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn),
3021                                 true, a->rn != 31, memop);
3022     do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, memop, true, a->rt,
3023               iss_sf, a->lasr);
3024     return true;
3025 }
3026 
3027 static bool trans_LDAR(DisasContext *s, arg_stlr *a)
3028 {
3029     TCGv_i64 clean_addr;
3030     MemOp memop;
3031     bool iss_sf = ldst_iss_sf(a->sz, false, false);
3032 
3033     /* LoadLOAcquire is the same as Load-Acquire for QEMU.  */
3034     if (!a->lasr && !dc_isar_feature(aa64_lor, s)) {
3035         return false;
3036     }
3037     /* Generate ISS for non-exclusive accesses including LASR.  */
3038     if (a->rn == 31) {
3039         gen_check_sp_alignment(s);
3040     }
3041     memop = check_ordered_align(s, a->rn, 0, false, a->sz);
3042     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn),
3043                                 false, a->rn != 31, memop);
3044     do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, memop, false, true,
3045               a->rt, iss_sf, a->lasr);
3046     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3047     return true;
3048 }
3049 
3050 static bool trans_STXP(DisasContext *s, arg_stxr *a)
3051 {
3052     if (a->rn == 31) {
3053         gen_check_sp_alignment(s);
3054     }
3055     if (a->lasr) {
3056         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3057     }
3058     gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, true);
3059     return true;
3060 }
3061 
3062 static bool trans_LDXP(DisasContext *s, arg_stxr *a)
3063 {
3064     if (a->rn == 31) {
3065         gen_check_sp_alignment(s);
3066     }
3067     gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, true);
3068     if (a->lasr) {
3069         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3070     }
3071     return true;
3072 }
3073 
3074 static bool trans_CASP(DisasContext *s, arg_CASP *a)
3075 {
3076     if (!dc_isar_feature(aa64_atomics, s)) {
3077         return false;
3078     }
3079     if (((a->rt | a->rs) & 1) != 0) {
3080         return false;
3081     }
3082 
3083     gen_compare_and_swap_pair(s, a->rs, a->rt, a->rn, a->sz);
3084     return true;
3085 }
3086 
3087 static bool trans_CAS(DisasContext *s, arg_CAS *a)
3088 {
3089     if (!dc_isar_feature(aa64_atomics, s)) {
3090         return false;
3091     }
3092     gen_compare_and_swap(s, a->rs, a->rt, a->rn, a->sz);
3093     return true;
3094 }
3095 
3096 static bool trans_LD_lit(DisasContext *s, arg_ldlit *a)
3097 {
3098     bool iss_sf = ldst_iss_sf(a->sz, a->sign, false);
3099     TCGv_i64 tcg_rt = cpu_reg(s, a->rt);
3100     TCGv_i64 clean_addr = tcg_temp_new_i64();
3101     MemOp memop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3102 
3103     gen_pc_plus_diff(s, clean_addr, a->imm);
3104     do_gpr_ld(s, tcg_rt, clean_addr, memop,
3105               false, true, a->rt, iss_sf, false);
3106     return true;
3107 }
3108 
3109 static bool trans_LD_lit_v(DisasContext *s, arg_ldlit *a)
3110 {
3111     /* Load register (literal), vector version */
3112     TCGv_i64 clean_addr;
3113     MemOp memop;
3114 
3115     if (!fp_access_check(s)) {
3116         return true;
3117     }
3118     memop = finalize_memop_asimd(s, a->sz);
3119     clean_addr = tcg_temp_new_i64();
3120     gen_pc_plus_diff(s, clean_addr, a->imm);
3121     do_fp_ld(s, a->rt, clean_addr, memop);
3122     return true;
3123 }
3124 
3125 static void op_addr_ldstpair_pre(DisasContext *s, arg_ldstpair *a,
3126                                  TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3127                                  uint64_t offset, bool is_store, MemOp mop)
3128 {
3129     if (a->rn == 31) {
3130         gen_check_sp_alignment(s);
3131     }
3132 
3133     *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3134     if (!a->p) {
3135         tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset);
3136     }
3137 
3138     *clean_addr = gen_mte_checkN(s, *dirty_addr, is_store,
3139                                  (a->w || a->rn != 31), 2 << a->sz, mop);
3140 }
3141 
3142 static void op_addr_ldstpair_post(DisasContext *s, arg_ldstpair *a,
3143                                   TCGv_i64 dirty_addr, uint64_t offset)
3144 {
3145     if (a->w) {
3146         if (a->p) {
3147             tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3148         }
3149         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3150     }
3151 }
3152 
3153 static bool trans_STP(DisasContext *s, arg_ldstpair *a)
3154 {
3155     uint64_t offset = a->imm << a->sz;
3156     TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3157     MemOp mop = finalize_memop(s, a->sz);
3158 
3159     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop);
3160     tcg_rt = cpu_reg(s, a->rt);
3161     tcg_rt2 = cpu_reg(s, a->rt2);
3162     /*
3163      * We built mop above for the single logical access -- rebuild it
3164      * now for the paired operation.
3165      *
3166      * With LSE2, non-sign-extending pairs are treated atomically if
3167      * aligned, and if unaligned one of the pair will be completely
3168      * within a 16-byte block and that element will be atomic.
3169      * Otherwise each element is separately atomic.
3170      * In all cases, issue one operation with the correct atomicity.
3171      */
3172     mop = a->sz + 1;
3173     if (s->align_mem) {
3174         mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
3175     }
3176     mop = finalize_memop_pair(s, mop);
3177     if (a->sz == 2) {
3178         TCGv_i64 tmp = tcg_temp_new_i64();
3179 
3180         if (s->be_data == MO_LE) {
3181             tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2);
3182         } else {
3183             tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt);
3184         }
3185         tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop);
3186     } else {
3187         TCGv_i128 tmp = tcg_temp_new_i128();
3188 
3189         if (s->be_data == MO_LE) {
3190             tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2);
3191         } else {
3192             tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt);
3193         }
3194         tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
3195     }
3196     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3197     return true;
3198 }
3199 
3200 static bool trans_LDP(DisasContext *s, arg_ldstpair *a)
3201 {
3202     uint64_t offset = a->imm << a->sz;
3203     TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3204     MemOp mop = finalize_memop(s, a->sz);
3205 
3206     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop);
3207     tcg_rt = cpu_reg(s, a->rt);
3208     tcg_rt2 = cpu_reg(s, a->rt2);
3209 
3210     /*
3211      * We built mop above for the single logical access -- rebuild it
3212      * now for the paired operation.
3213      *
3214      * With LSE2, non-sign-extending pairs are treated atomically if
3215      * aligned, and if unaligned one of the pair will be completely
3216      * within a 16-byte block and that element will be atomic.
3217      * Otherwise each element is separately atomic.
3218      * In all cases, issue one operation with the correct atomicity.
3219      *
3220      * This treats sign-extending loads like zero-extending loads,
3221      * since that reuses the most code below.
3222      */
3223     mop = a->sz + 1;
3224     if (s->align_mem) {
3225         mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
3226     }
3227     mop = finalize_memop_pair(s, mop);
3228     if (a->sz == 2) {
3229         int o2 = s->be_data == MO_LE ? 32 : 0;
3230         int o1 = o2 ^ 32;
3231 
3232         tcg_gen_qemu_ld_i64(tcg_rt, clean_addr, get_mem_index(s), mop);
3233         if (a->sign) {
3234             tcg_gen_sextract_i64(tcg_rt2, tcg_rt, o2, 32);
3235             tcg_gen_sextract_i64(tcg_rt, tcg_rt, o1, 32);
3236         } else {
3237             tcg_gen_extract_i64(tcg_rt2, tcg_rt, o2, 32);
3238             tcg_gen_extract_i64(tcg_rt, tcg_rt, o1, 32);
3239         }
3240     } else {
3241         TCGv_i128 tmp = tcg_temp_new_i128();
3242 
3243         tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop);
3244         if (s->be_data == MO_LE) {
3245             tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp);
3246         } else {
3247             tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp);
3248         }
3249     }
3250     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3251     return true;
3252 }
3253 
3254 static bool trans_STP_v(DisasContext *s, arg_ldstpair *a)
3255 {
3256     uint64_t offset = a->imm << a->sz;
3257     TCGv_i64 clean_addr, dirty_addr;
3258     MemOp mop;
3259 
3260     if (!fp_access_check(s)) {
3261         return true;
3262     }
3263 
3264     /* LSE2 does not merge FP pairs; leave these as separate operations. */
3265     mop = finalize_memop_asimd(s, a->sz);
3266     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop);
3267     do_fp_st(s, a->rt, clean_addr, mop);
3268     tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz);
3269     do_fp_st(s, a->rt2, clean_addr, mop);
3270     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3271     return true;
3272 }
3273 
3274 static bool trans_LDP_v(DisasContext *s, arg_ldstpair *a)
3275 {
3276     uint64_t offset = a->imm << a->sz;
3277     TCGv_i64 clean_addr, dirty_addr;
3278     MemOp mop;
3279 
3280     if (!fp_access_check(s)) {
3281         return true;
3282     }
3283 
3284     /* LSE2 does not merge FP pairs; leave these as separate operations. */
3285     mop = finalize_memop_asimd(s, a->sz);
3286     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop);
3287     do_fp_ld(s, a->rt, clean_addr, mop);
3288     tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz);
3289     do_fp_ld(s, a->rt2, clean_addr, mop);
3290     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3291     return true;
3292 }
3293 
3294 static bool trans_STGP(DisasContext *s, arg_ldstpair *a)
3295 {
3296     TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3297     uint64_t offset = a->imm << LOG2_TAG_GRANULE;
3298     MemOp mop;
3299     TCGv_i128 tmp;
3300 
3301     /* STGP only comes in one size. */
3302     tcg_debug_assert(a->sz == MO_64);
3303 
3304     if (!dc_isar_feature(aa64_mte_insn_reg, s)) {
3305         return false;
3306     }
3307 
3308     if (a->rn == 31) {
3309         gen_check_sp_alignment(s);
3310     }
3311 
3312     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3313     if (!a->p) {
3314         tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3315     }
3316 
3317     clean_addr = clean_data_tbi(s, dirty_addr);
3318     tcg_rt = cpu_reg(s, a->rt);
3319     tcg_rt2 = cpu_reg(s, a->rt2);
3320 
3321     /*
3322      * STGP is defined as two 8-byte memory operations, aligned to TAG_GRANULE,
3323      * and one tag operation.  We implement it as one single aligned 16-byte
3324      * memory operation for convenience.  Note that the alignment ensures
3325      * MO_ATOM_IFALIGN_PAIR produces 8-byte atomicity for the memory store.
3326      */
3327     mop = finalize_memop_atom(s, MO_128 | MO_ALIGN, MO_ATOM_IFALIGN_PAIR);
3328 
3329     tmp = tcg_temp_new_i128();
3330     if (s->be_data == MO_LE) {
3331         tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2);
3332     } else {
3333         tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt);
3334     }
3335     tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
3336 
3337     /* Perform the tag store, if tag access enabled. */
3338     if (s->ata[0]) {
3339         if (tb_cflags(s->base.tb) & CF_PARALLEL) {
3340             gen_helper_stg_parallel(tcg_env, dirty_addr, dirty_addr);
3341         } else {
3342             gen_helper_stg(tcg_env, dirty_addr, dirty_addr);
3343         }
3344     }
3345 
3346     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3347     return true;
3348 }
3349 
3350 static void op_addr_ldst_imm_pre(DisasContext *s, arg_ldst_imm *a,
3351                                  TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3352                                  uint64_t offset, bool is_store, MemOp mop)
3353 {
3354     int memidx;
3355 
3356     if (a->rn == 31) {
3357         gen_check_sp_alignment(s);
3358     }
3359 
3360     *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3361     if (!a->p) {
3362         tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset);
3363     }
3364     memidx = get_a64_user_mem_index(s, a->unpriv);
3365     *clean_addr = gen_mte_check1_mmuidx(s, *dirty_addr, is_store,
3366                                         a->w || a->rn != 31,
3367                                         mop, a->unpriv, memidx);
3368 }
3369 
3370 static void op_addr_ldst_imm_post(DisasContext *s, arg_ldst_imm *a,
3371                                   TCGv_i64 dirty_addr, uint64_t offset)
3372 {
3373     if (a->w) {
3374         if (a->p) {
3375             tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3376         }
3377         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3378     }
3379 }
3380 
3381 static bool trans_STR_i(DisasContext *s, arg_ldst_imm *a)
3382 {
3383     bool iss_sf, iss_valid = !a->w;
3384     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3385     int memidx = get_a64_user_mem_index(s, a->unpriv);
3386     MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3387 
3388     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop);
3389 
3390     tcg_rt = cpu_reg(s, a->rt);
3391     iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3392 
3393     do_gpr_st_memidx(s, tcg_rt, clean_addr, mop, memidx,
3394                      iss_valid, a->rt, iss_sf, false);
3395     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3396     return true;
3397 }
3398 
3399 static bool trans_LDR_i(DisasContext *s, arg_ldst_imm *a)
3400 {
3401     bool iss_sf, iss_valid = !a->w;
3402     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3403     int memidx = get_a64_user_mem_index(s, a->unpriv);
3404     MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3405 
3406     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop);
3407 
3408     tcg_rt = cpu_reg(s, a->rt);
3409     iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3410 
3411     do_gpr_ld_memidx(s, tcg_rt, clean_addr, mop,
3412                      a->ext, memidx, iss_valid, a->rt, iss_sf, false);
3413     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3414     return true;
3415 }
3416 
3417 static bool trans_STR_v_i(DisasContext *s, arg_ldst_imm *a)
3418 {
3419     TCGv_i64 clean_addr, dirty_addr;
3420     MemOp mop;
3421 
3422     if (!fp_access_check(s)) {
3423         return true;
3424     }
3425     mop = finalize_memop_asimd(s, a->sz);
3426     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop);
3427     do_fp_st(s, a->rt, clean_addr, mop);
3428     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3429     return true;
3430 }
3431 
3432 static bool trans_LDR_v_i(DisasContext *s, arg_ldst_imm *a)
3433 {
3434     TCGv_i64 clean_addr, dirty_addr;
3435     MemOp mop;
3436 
3437     if (!fp_access_check(s)) {
3438         return true;
3439     }
3440     mop = finalize_memop_asimd(s, a->sz);
3441     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop);
3442     do_fp_ld(s, a->rt, clean_addr, mop);
3443     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3444     return true;
3445 }
3446 
3447 static void op_addr_ldst_pre(DisasContext *s, arg_ldst *a,
3448                              TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3449                              bool is_store, MemOp memop)
3450 {
3451     TCGv_i64 tcg_rm;
3452 
3453     if (a->rn == 31) {
3454         gen_check_sp_alignment(s);
3455     }
3456     *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3457 
3458     tcg_rm = read_cpu_reg(s, a->rm, 1);
3459     ext_and_shift_reg(tcg_rm, tcg_rm, a->opt, a->s ? a->sz : 0);
3460 
3461     tcg_gen_add_i64(*dirty_addr, *dirty_addr, tcg_rm);
3462     *clean_addr = gen_mte_check1(s, *dirty_addr, is_store, true, memop);
3463 }
3464 
3465 static bool trans_LDR(DisasContext *s, arg_ldst *a)
3466 {
3467     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3468     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3469     MemOp memop;
3470 
3471     if (extract32(a->opt, 1, 1) == 0) {
3472         return false;
3473     }
3474 
3475     memop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3476     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop);
3477     tcg_rt = cpu_reg(s, a->rt);
3478     do_gpr_ld(s, tcg_rt, clean_addr, memop,
3479               a->ext, true, a->rt, iss_sf, false);
3480     return true;
3481 }
3482 
3483 static bool trans_STR(DisasContext *s, arg_ldst *a)
3484 {
3485     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3486     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3487     MemOp memop;
3488 
3489     if (extract32(a->opt, 1, 1) == 0) {
3490         return false;
3491     }
3492 
3493     memop = finalize_memop(s, a->sz);
3494     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop);
3495     tcg_rt = cpu_reg(s, a->rt);
3496     do_gpr_st(s, tcg_rt, clean_addr, memop, true, a->rt, iss_sf, false);
3497     return true;
3498 }
3499 
3500 static bool trans_LDR_v(DisasContext *s, arg_ldst *a)
3501 {
3502     TCGv_i64 clean_addr, dirty_addr;
3503     MemOp memop;
3504 
3505     if (extract32(a->opt, 1, 1) == 0) {
3506         return false;
3507     }
3508 
3509     if (!fp_access_check(s)) {
3510         return true;
3511     }
3512 
3513     memop = finalize_memop_asimd(s, a->sz);
3514     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop);
3515     do_fp_ld(s, a->rt, clean_addr, memop);
3516     return true;
3517 }
3518 
3519 static bool trans_STR_v(DisasContext *s, arg_ldst *a)
3520 {
3521     TCGv_i64 clean_addr, dirty_addr;
3522     MemOp memop;
3523 
3524     if (extract32(a->opt, 1, 1) == 0) {
3525         return false;
3526     }
3527 
3528     if (!fp_access_check(s)) {
3529         return true;
3530     }
3531 
3532     memop = finalize_memop_asimd(s, a->sz);
3533     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop);
3534     do_fp_st(s, a->rt, clean_addr, memop);
3535     return true;
3536 }
3537 
3538 
3539 static bool do_atomic_ld(DisasContext *s, arg_atomic *a, AtomicThreeOpFn *fn,
3540                          int sign, bool invert)
3541 {
3542     MemOp mop = a->sz | sign;
3543     TCGv_i64 clean_addr, tcg_rs, tcg_rt;
3544 
3545     if (a->rn == 31) {
3546         gen_check_sp_alignment(s);
3547     }
3548     mop = check_atomic_align(s, a->rn, mop);
3549     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
3550                                 a->rn != 31, mop);
3551     tcg_rs = read_cpu_reg(s, a->rs, true);
3552     tcg_rt = cpu_reg(s, a->rt);
3553     if (invert) {
3554         tcg_gen_not_i64(tcg_rs, tcg_rs);
3555     }
3556     /*
3557      * The tcg atomic primitives are all full barriers.  Therefore we
3558      * can ignore the Acquire and Release bits of this instruction.
3559      */
3560     fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop);
3561 
3562     if (mop & MO_SIGN) {
3563         switch (a->sz) {
3564         case MO_8:
3565             tcg_gen_ext8u_i64(tcg_rt, tcg_rt);
3566             break;
3567         case MO_16:
3568             tcg_gen_ext16u_i64(tcg_rt, tcg_rt);
3569             break;
3570         case MO_32:
3571             tcg_gen_ext32u_i64(tcg_rt, tcg_rt);
3572             break;
3573         case MO_64:
3574             break;
3575         default:
3576             g_assert_not_reached();
3577         }
3578     }
3579     return true;
3580 }
3581 
3582 TRANS_FEAT(LDADD, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_add_i64, 0, false)
3583 TRANS_FEAT(LDCLR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_and_i64, 0, true)
3584 TRANS_FEAT(LDEOR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_xor_i64, 0, false)
3585 TRANS_FEAT(LDSET, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_or_i64, 0, false)
3586 TRANS_FEAT(LDSMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smax_i64, MO_SIGN, false)
3587 TRANS_FEAT(LDSMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smin_i64, MO_SIGN, false)
3588 TRANS_FEAT(LDUMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umax_i64, 0, false)
3589 TRANS_FEAT(LDUMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false)
3590 TRANS_FEAT(SWP, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false)
3591 
3592 static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a)
3593 {
3594     bool iss_sf = ldst_iss_sf(a->sz, false, false);
3595     TCGv_i64 clean_addr;
3596     MemOp mop;
3597 
3598     if (!dc_isar_feature(aa64_atomics, s) ||
3599         !dc_isar_feature(aa64_rcpc_8_3, s)) {
3600         return false;
3601     }
3602     if (a->rn == 31) {
3603         gen_check_sp_alignment(s);
3604     }
3605     mop = check_ordered_align(s, a->rn, 0, false, a->sz);
3606     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
3607                                 a->rn != 31, mop);
3608     /*
3609      * LDAPR* are a special case because they are a simple load, not a
3610      * fetch-and-do-something op.
3611      * The architectural consistency requirements here are weaker than
3612      * full load-acquire (we only need "load-acquire processor consistent"),
3613      * but we choose to implement them as full LDAQ.
3614      */
3615     do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, false,
3616               true, a->rt, iss_sf, true);
3617     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3618     return true;
3619 }
3620 
3621 static bool trans_LDRA(DisasContext *s, arg_LDRA *a)
3622 {
3623     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3624     MemOp memop;
3625 
3626     /* Load with pointer authentication */
3627     if (!dc_isar_feature(aa64_pauth, s)) {
3628         return false;
3629     }
3630 
3631     if (a->rn == 31) {
3632         gen_check_sp_alignment(s);
3633     }
3634     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3635 
3636     if (s->pauth_active) {
3637         if (!a->m) {
3638             gen_helper_autda_combined(dirty_addr, tcg_env, dirty_addr,
3639                                       tcg_constant_i64(0));
3640         } else {
3641             gen_helper_autdb_combined(dirty_addr, tcg_env, dirty_addr,
3642                                       tcg_constant_i64(0));
3643         }
3644     }
3645 
3646     tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3647 
3648     memop = finalize_memop(s, MO_64);
3649 
3650     /* Note that "clean" and "dirty" here refer to TBI not PAC.  */
3651     clean_addr = gen_mte_check1(s, dirty_addr, false,
3652                                 a->w || a->rn != 31, memop);
3653 
3654     tcg_rt = cpu_reg(s, a->rt);
3655     do_gpr_ld(s, tcg_rt, clean_addr, memop,
3656               /* extend */ false, /* iss_valid */ !a->w,
3657               /* iss_srt */ a->rt, /* iss_sf */ true, /* iss_ar */ false);
3658 
3659     if (a->w) {
3660         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3661     }
3662     return true;
3663 }
3664 
3665 static bool trans_LDAPR_i(DisasContext *s, arg_ldapr_stlr_i *a)
3666 {
3667     TCGv_i64 clean_addr, dirty_addr;
3668     MemOp mop = a->sz | (a->sign ? MO_SIGN : 0);
3669     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3670 
3671     if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3672         return false;
3673     }
3674 
3675     if (a->rn == 31) {
3676         gen_check_sp_alignment(s);
3677     }
3678 
3679     mop = check_ordered_align(s, a->rn, a->imm, false, mop);
3680     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3681     tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3682     clean_addr = clean_data_tbi(s, dirty_addr);
3683 
3684     /*
3685      * Load-AcquirePC semantics; we implement as the slightly more
3686      * restrictive Load-Acquire.
3687      */
3688     do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, a->ext, true,
3689               a->rt, iss_sf, true);
3690     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3691     return true;
3692 }
3693 
3694 static bool trans_STLR_i(DisasContext *s, arg_ldapr_stlr_i *a)
3695 {
3696     TCGv_i64 clean_addr, dirty_addr;
3697     MemOp mop = a->sz;
3698     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3699 
3700     if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3701         return false;
3702     }
3703 
3704     /* TODO: ARMv8.4-LSE SCTLR.nAA */
3705 
3706     if (a->rn == 31) {
3707         gen_check_sp_alignment(s);
3708     }
3709 
3710     mop = check_ordered_align(s, a->rn, a->imm, true, mop);
3711     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3712     tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3713     clean_addr = clean_data_tbi(s, dirty_addr);
3714 
3715     /* Store-Release semantics */
3716     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3717     do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, mop, true, a->rt, iss_sf, true);
3718     return true;
3719 }
3720 
3721 static bool trans_LD_mult(DisasContext *s, arg_ldst_mult *a)
3722 {
3723     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3724     MemOp endian, align, mop;
3725 
3726     int total;    /* total bytes */
3727     int elements; /* elements per vector */
3728     int r;
3729     int size = a->sz;
3730 
3731     if (!a->p && a->rm != 0) {
3732         /* For non-postindexed accesses the Rm field must be 0 */
3733         return false;
3734     }
3735     if (size == 3 && !a->q && a->selem != 1) {
3736         return false;
3737     }
3738     if (!fp_access_check(s)) {
3739         return true;
3740     }
3741 
3742     if (a->rn == 31) {
3743         gen_check_sp_alignment(s);
3744     }
3745 
3746     /* For our purposes, bytes are always little-endian.  */
3747     endian = s->be_data;
3748     if (size == 0) {
3749         endian = MO_LE;
3750     }
3751 
3752     total = a->rpt * a->selem * (a->q ? 16 : 8);
3753     tcg_rn = cpu_reg_sp(s, a->rn);
3754 
3755     /*
3756      * Issue the MTE check vs the logical repeat count, before we
3757      * promote consecutive little-endian elements below.
3758      */
3759     clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, total,
3760                                 finalize_memop_asimd(s, size));
3761 
3762     /*
3763      * Consecutive little-endian elements from a single register
3764      * can be promoted to a larger little-endian operation.
3765      */
3766     align = MO_ALIGN;
3767     if (a->selem == 1 && endian == MO_LE) {
3768         align = pow2_align(size);
3769         size = 3;
3770     }
3771     if (!s->align_mem) {
3772         align = 0;
3773     }
3774     mop = endian | size | align;
3775 
3776     elements = (a->q ? 16 : 8) >> size;
3777     tcg_ebytes = tcg_constant_i64(1 << size);
3778     for (r = 0; r < a->rpt; r++) {
3779         int e;
3780         for (e = 0; e < elements; e++) {
3781             int xs;
3782             for (xs = 0; xs < a->selem; xs++) {
3783                 int tt = (a->rt + r + xs) % 32;
3784                 do_vec_ld(s, tt, e, clean_addr, mop);
3785                 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3786             }
3787         }
3788     }
3789 
3790     /*
3791      * For non-quad operations, setting a slice of the low 64 bits of
3792      * the register clears the high 64 bits (in the ARM ARM pseudocode
3793      * this is implicit in the fact that 'rval' is a 64 bit wide
3794      * variable).  For quad operations, we might still need to zero
3795      * the high bits of SVE.
3796      */
3797     for (r = 0; r < a->rpt * a->selem; r++) {
3798         int tt = (a->rt + r) % 32;
3799         clear_vec_high(s, a->q, tt);
3800     }
3801 
3802     if (a->p) {
3803         if (a->rm == 31) {
3804             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3805         } else {
3806             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3807         }
3808     }
3809     return true;
3810 }
3811 
3812 static bool trans_ST_mult(DisasContext *s, arg_ldst_mult *a)
3813 {
3814     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3815     MemOp endian, align, mop;
3816 
3817     int total;    /* total bytes */
3818     int elements; /* elements per vector */
3819     int r;
3820     int size = a->sz;
3821 
3822     if (!a->p && a->rm != 0) {
3823         /* For non-postindexed accesses the Rm field must be 0 */
3824         return false;
3825     }
3826     if (size == 3 && !a->q && a->selem != 1) {
3827         return false;
3828     }
3829     if (!fp_access_check(s)) {
3830         return true;
3831     }
3832 
3833     if (a->rn == 31) {
3834         gen_check_sp_alignment(s);
3835     }
3836 
3837     /* For our purposes, bytes are always little-endian.  */
3838     endian = s->be_data;
3839     if (size == 0) {
3840         endian = MO_LE;
3841     }
3842 
3843     total = a->rpt * a->selem * (a->q ? 16 : 8);
3844     tcg_rn = cpu_reg_sp(s, a->rn);
3845 
3846     /*
3847      * Issue the MTE check vs the logical repeat count, before we
3848      * promote consecutive little-endian elements below.
3849      */
3850     clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, total,
3851                                 finalize_memop_asimd(s, size));
3852 
3853     /*
3854      * Consecutive little-endian elements from a single register
3855      * can be promoted to a larger little-endian operation.
3856      */
3857     align = MO_ALIGN;
3858     if (a->selem == 1 && endian == MO_LE) {
3859         align = pow2_align(size);
3860         size = 3;
3861     }
3862     if (!s->align_mem) {
3863         align = 0;
3864     }
3865     mop = endian | size | align;
3866 
3867     elements = (a->q ? 16 : 8) >> size;
3868     tcg_ebytes = tcg_constant_i64(1 << size);
3869     for (r = 0; r < a->rpt; r++) {
3870         int e;
3871         for (e = 0; e < elements; e++) {
3872             int xs;
3873             for (xs = 0; xs < a->selem; xs++) {
3874                 int tt = (a->rt + r + xs) % 32;
3875                 do_vec_st(s, tt, e, clean_addr, mop);
3876                 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3877             }
3878         }
3879     }
3880 
3881     if (a->p) {
3882         if (a->rm == 31) {
3883             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3884         } else {
3885             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3886         }
3887     }
3888     return true;
3889 }
3890 
3891 static bool trans_ST_single(DisasContext *s, arg_ldst_single *a)
3892 {
3893     int xs, total, rt;
3894     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3895     MemOp mop;
3896 
3897     if (!a->p && a->rm != 0) {
3898         return false;
3899     }
3900     if (!fp_access_check(s)) {
3901         return true;
3902     }
3903 
3904     if (a->rn == 31) {
3905         gen_check_sp_alignment(s);
3906     }
3907 
3908     total = a->selem << a->scale;
3909     tcg_rn = cpu_reg_sp(s, a->rn);
3910 
3911     mop = finalize_memop_asimd(s, a->scale);
3912     clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31,
3913                                 total, mop);
3914 
3915     tcg_ebytes = tcg_constant_i64(1 << a->scale);
3916     for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
3917         do_vec_st(s, rt, a->index, clean_addr, mop);
3918         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3919     }
3920 
3921     if (a->p) {
3922         if (a->rm == 31) {
3923             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3924         } else {
3925             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3926         }
3927     }
3928     return true;
3929 }
3930 
3931 static bool trans_LD_single(DisasContext *s, arg_ldst_single *a)
3932 {
3933     int xs, total, rt;
3934     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3935     MemOp mop;
3936 
3937     if (!a->p && a->rm != 0) {
3938         return false;
3939     }
3940     if (!fp_access_check(s)) {
3941         return true;
3942     }
3943 
3944     if (a->rn == 31) {
3945         gen_check_sp_alignment(s);
3946     }
3947 
3948     total = a->selem << a->scale;
3949     tcg_rn = cpu_reg_sp(s, a->rn);
3950 
3951     mop = finalize_memop_asimd(s, a->scale);
3952     clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31,
3953                                 total, mop);
3954 
3955     tcg_ebytes = tcg_constant_i64(1 << a->scale);
3956     for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
3957         do_vec_ld(s, rt, a->index, clean_addr, mop);
3958         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3959     }
3960 
3961     if (a->p) {
3962         if (a->rm == 31) {
3963             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3964         } else {
3965             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3966         }
3967     }
3968     return true;
3969 }
3970 
3971 static bool trans_LD_single_repl(DisasContext *s, arg_LD_single_repl *a)
3972 {
3973     int xs, total, rt;
3974     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3975     MemOp mop;
3976 
3977     if (!a->p && a->rm != 0) {
3978         return false;
3979     }
3980     if (!fp_access_check(s)) {
3981         return true;
3982     }
3983 
3984     if (a->rn == 31) {
3985         gen_check_sp_alignment(s);
3986     }
3987 
3988     total = a->selem << a->scale;
3989     tcg_rn = cpu_reg_sp(s, a->rn);
3990 
3991     mop = finalize_memop_asimd(s, a->scale);
3992     clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31,
3993                                 total, mop);
3994 
3995     tcg_ebytes = tcg_constant_i64(1 << a->scale);
3996     for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
3997         /* Load and replicate to all elements */
3998         TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3999 
4000         tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop);
4001         tcg_gen_gvec_dup_i64(a->scale, vec_full_reg_offset(s, rt),
4002                              (a->q + 1) * 8, vec_full_reg_size(s), tcg_tmp);
4003         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
4004     }
4005 
4006     if (a->p) {
4007         if (a->rm == 31) {
4008             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
4009         } else {
4010             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
4011         }
4012     }
4013     return true;
4014 }
4015 
4016 static bool trans_STZGM(DisasContext *s, arg_ldst_tag *a)
4017 {
4018     TCGv_i64 addr, clean_addr, tcg_rt;
4019     int size = 4 << s->dcz_blocksize;
4020 
4021     if (!dc_isar_feature(aa64_mte, s)) {
4022         return false;
4023     }
4024     if (s->current_el == 0) {
4025         return false;
4026     }
4027 
4028     if (a->rn == 31) {
4029         gen_check_sp_alignment(s);
4030     }
4031 
4032     addr = read_cpu_reg_sp(s, a->rn, true);
4033     tcg_gen_addi_i64(addr, addr, a->imm);
4034     tcg_rt = cpu_reg(s, a->rt);
4035 
4036     if (s->ata[0]) {
4037         gen_helper_stzgm_tags(tcg_env, addr, tcg_rt);
4038     }
4039     /*
4040      * The non-tags portion of STZGM is mostly like DC_ZVA,
4041      * except the alignment happens before the access.
4042      */
4043     clean_addr = clean_data_tbi(s, addr);
4044     tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4045     gen_helper_dc_zva(tcg_env, clean_addr);
4046     return true;
4047 }
4048 
4049 static bool trans_STGM(DisasContext *s, arg_ldst_tag *a)
4050 {
4051     TCGv_i64 addr, clean_addr, tcg_rt;
4052 
4053     if (!dc_isar_feature(aa64_mte, s)) {
4054         return false;
4055     }
4056     if (s->current_el == 0) {
4057         return false;
4058     }
4059 
4060     if (a->rn == 31) {
4061         gen_check_sp_alignment(s);
4062     }
4063 
4064     addr = read_cpu_reg_sp(s, a->rn, true);
4065     tcg_gen_addi_i64(addr, addr, a->imm);
4066     tcg_rt = cpu_reg(s, a->rt);
4067 
4068     if (s->ata[0]) {
4069         gen_helper_stgm(tcg_env, addr, tcg_rt);
4070     } else {
4071         MMUAccessType acc = MMU_DATA_STORE;
4072         int size = 4 << s->gm_blocksize;
4073 
4074         clean_addr = clean_data_tbi(s, addr);
4075         tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4076         gen_probe_access(s, clean_addr, acc, size);
4077     }
4078     return true;
4079 }
4080 
4081 static bool trans_LDGM(DisasContext *s, arg_ldst_tag *a)
4082 {
4083     TCGv_i64 addr, clean_addr, tcg_rt;
4084 
4085     if (!dc_isar_feature(aa64_mte, s)) {
4086         return false;
4087     }
4088     if (s->current_el == 0) {
4089         return false;
4090     }
4091 
4092     if (a->rn == 31) {
4093         gen_check_sp_alignment(s);
4094     }
4095 
4096     addr = read_cpu_reg_sp(s, a->rn, true);
4097     tcg_gen_addi_i64(addr, addr, a->imm);
4098     tcg_rt = cpu_reg(s, a->rt);
4099 
4100     if (s->ata[0]) {
4101         gen_helper_ldgm(tcg_rt, tcg_env, addr);
4102     } else {
4103         MMUAccessType acc = MMU_DATA_LOAD;
4104         int size = 4 << s->gm_blocksize;
4105 
4106         clean_addr = clean_data_tbi(s, addr);
4107         tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4108         gen_probe_access(s, clean_addr, acc, size);
4109         /* The result tags are zeros.  */
4110         tcg_gen_movi_i64(tcg_rt, 0);
4111     }
4112     return true;
4113 }
4114 
4115 static bool trans_LDG(DisasContext *s, arg_ldst_tag *a)
4116 {
4117     TCGv_i64 addr, clean_addr, tcg_rt;
4118 
4119     if (!dc_isar_feature(aa64_mte_insn_reg, s)) {
4120         return false;
4121     }
4122 
4123     if (a->rn == 31) {
4124         gen_check_sp_alignment(s);
4125     }
4126 
4127     addr = read_cpu_reg_sp(s, a->rn, true);
4128     if (!a->p) {
4129         /* pre-index or signed offset */
4130         tcg_gen_addi_i64(addr, addr, a->imm);
4131     }
4132 
4133     tcg_gen_andi_i64(addr, addr, -TAG_GRANULE);
4134     tcg_rt = cpu_reg(s, a->rt);
4135     if (s->ata[0]) {
4136         gen_helper_ldg(tcg_rt, tcg_env, addr, tcg_rt);
4137     } else {
4138         /*
4139          * Tag access disabled: we must check for aborts on the load
4140          * load from [rn+offset], and then insert a 0 tag into rt.
4141          */
4142         clean_addr = clean_data_tbi(s, addr);
4143         gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8);
4144         gen_address_with_allocation_tag0(tcg_rt, tcg_rt);
4145     }
4146 
4147     if (a->w) {
4148         /* pre-index or post-index */
4149         if (a->p) {
4150             /* post-index */
4151             tcg_gen_addi_i64(addr, addr, a->imm);
4152         }
4153         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr);
4154     }
4155     return true;
4156 }
4157 
4158 static bool do_STG(DisasContext *s, arg_ldst_tag *a, bool is_zero, bool is_pair)
4159 {
4160     TCGv_i64 addr, tcg_rt;
4161 
4162     if (a->rn == 31) {
4163         gen_check_sp_alignment(s);
4164     }
4165 
4166     addr = read_cpu_reg_sp(s, a->rn, true);
4167     if (!a->p) {
4168         /* pre-index or signed offset */
4169         tcg_gen_addi_i64(addr, addr, a->imm);
4170     }
4171     tcg_rt = cpu_reg_sp(s, a->rt);
4172     if (!s->ata[0]) {
4173         /*
4174          * For STG and ST2G, we need to check alignment and probe memory.
4175          * TODO: For STZG and STZ2G, we could rely on the stores below,
4176          * at least for system mode; user-only won't enforce alignment.
4177          */
4178         if (is_pair) {
4179             gen_helper_st2g_stub(tcg_env, addr);
4180         } else {
4181             gen_helper_stg_stub(tcg_env, addr);
4182         }
4183     } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
4184         if (is_pair) {
4185             gen_helper_st2g_parallel(tcg_env, addr, tcg_rt);
4186         } else {
4187             gen_helper_stg_parallel(tcg_env, addr, tcg_rt);
4188         }
4189     } else {
4190         if (is_pair) {
4191             gen_helper_st2g(tcg_env, addr, tcg_rt);
4192         } else {
4193             gen_helper_stg(tcg_env, addr, tcg_rt);
4194         }
4195     }
4196 
4197     if (is_zero) {
4198         TCGv_i64 clean_addr = clean_data_tbi(s, addr);
4199         TCGv_i64 zero64 = tcg_constant_i64(0);
4200         TCGv_i128 zero128 = tcg_temp_new_i128();
4201         int mem_index = get_mem_index(s);
4202         MemOp mop = finalize_memop(s, MO_128 | MO_ALIGN);
4203 
4204         tcg_gen_concat_i64_i128(zero128, zero64, zero64);
4205 
4206         /* This is 1 or 2 atomic 16-byte operations. */
4207         tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
4208         if (is_pair) {
4209             tcg_gen_addi_i64(clean_addr, clean_addr, 16);
4210             tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
4211         }
4212     }
4213 
4214     if (a->w) {
4215         /* pre-index or post-index */
4216         if (a->p) {
4217             /* post-index */
4218             tcg_gen_addi_i64(addr, addr, a->imm);
4219         }
4220         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr);
4221     }
4222     return true;
4223 }
4224 
4225 TRANS_FEAT(STG, aa64_mte_insn_reg, do_STG, a, false, false)
4226 TRANS_FEAT(STZG, aa64_mte_insn_reg, do_STG, a, true, false)
4227 TRANS_FEAT(ST2G, aa64_mte_insn_reg, do_STG, a, false, true)
4228 TRANS_FEAT(STZ2G, aa64_mte_insn_reg, do_STG, a, true, true)
4229 
4230 typedef void SetFn(TCGv_env, TCGv_i32, TCGv_i32);
4231 
4232 static bool do_SET(DisasContext *s, arg_set *a, bool is_epilogue,
4233                    bool is_setg, SetFn fn)
4234 {
4235     int memidx;
4236     uint32_t syndrome, desc = 0;
4237 
4238     if (is_setg && !dc_isar_feature(aa64_mte, s)) {
4239         return false;
4240     }
4241 
4242     /*
4243      * UNPREDICTABLE cases: we choose to UNDEF, which allows
4244      * us to pull this check before the CheckMOPSEnabled() test
4245      * (which we do in the helper function)
4246      */
4247     if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd ||
4248         a->rd == 31 || a->rn == 31) {
4249         return false;
4250     }
4251 
4252     memidx = get_a64_user_mem_index(s, a->unpriv);
4253 
4254     /*
4255      * We pass option_a == true, matching our implementation;
4256      * we pass wrong_option == false: helper function may set that bit.
4257      */
4258     syndrome = syn_mop(true, is_setg, (a->nontemp << 1) | a->unpriv,
4259                        is_epilogue, false, true, a->rd, a->rs, a->rn);
4260 
4261     if (is_setg ? s->ata[a->unpriv] : s->mte_active[a->unpriv]) {
4262         /* We may need to do MTE tag checking, so assemble the descriptor */
4263         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
4264         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
4265         desc = FIELD_DP32(desc, MTEDESC, WRITE, true);
4266         /* SIZEM1 and ALIGN we leave 0 (byte write) */
4267     }
4268     /* The helper function always needs the memidx even with MTE disabled */
4269     desc = FIELD_DP32(desc, MTEDESC, MIDX, memidx);
4270 
4271     /*
4272      * The helper needs the register numbers, but since they're in
4273      * the syndrome anyway, we let it extract them from there rather
4274      * than passing in an extra three integer arguments.
4275      */
4276     fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(desc));
4277     return true;
4278 }
4279 
4280 TRANS_FEAT(SETP, aa64_mops, do_SET, a, false, false, gen_helper_setp)
4281 TRANS_FEAT(SETM, aa64_mops, do_SET, a, false, false, gen_helper_setm)
4282 TRANS_FEAT(SETE, aa64_mops, do_SET, a, true, false, gen_helper_sete)
4283 TRANS_FEAT(SETGP, aa64_mops, do_SET, a, false, true, gen_helper_setgp)
4284 TRANS_FEAT(SETGM, aa64_mops, do_SET, a, false, true, gen_helper_setgm)
4285 TRANS_FEAT(SETGE, aa64_mops, do_SET, a, true, true, gen_helper_setge)
4286 
4287 typedef void CpyFn(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32);
4288 
4289 static bool do_CPY(DisasContext *s, arg_cpy *a, bool is_epilogue, CpyFn fn)
4290 {
4291     int rmemidx, wmemidx;
4292     uint32_t syndrome, rdesc = 0, wdesc = 0;
4293     bool wunpriv = extract32(a->options, 0, 1);
4294     bool runpriv = extract32(a->options, 1, 1);
4295 
4296     /*
4297      * UNPREDICTABLE cases: we choose to UNDEF, which allows
4298      * us to pull this check before the CheckMOPSEnabled() test
4299      * (which we do in the helper function)
4300      */
4301     if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd ||
4302         a->rd == 31 || a->rs == 31 || a->rn == 31) {
4303         return false;
4304     }
4305 
4306     rmemidx = get_a64_user_mem_index(s, runpriv);
4307     wmemidx = get_a64_user_mem_index(s, wunpriv);
4308 
4309     /*
4310      * We pass option_a == true, matching our implementation;
4311      * we pass wrong_option == false: helper function may set that bit.
4312      */
4313     syndrome = syn_mop(false, false, a->options, is_epilogue,
4314                        false, true, a->rd, a->rs, a->rn);
4315 
4316     /* If we need to do MTE tag checking, assemble the descriptors */
4317     if (s->mte_active[runpriv]) {
4318         rdesc = FIELD_DP32(rdesc, MTEDESC, TBI, s->tbid);
4319         rdesc = FIELD_DP32(rdesc, MTEDESC, TCMA, s->tcma);
4320     }
4321     if (s->mte_active[wunpriv]) {
4322         wdesc = FIELD_DP32(wdesc, MTEDESC, TBI, s->tbid);
4323         wdesc = FIELD_DP32(wdesc, MTEDESC, TCMA, s->tcma);
4324         wdesc = FIELD_DP32(wdesc, MTEDESC, WRITE, true);
4325     }
4326     /* The helper function needs these parts of the descriptor regardless */
4327     rdesc = FIELD_DP32(rdesc, MTEDESC, MIDX, rmemidx);
4328     wdesc = FIELD_DP32(wdesc, MTEDESC, MIDX, wmemidx);
4329 
4330     /*
4331      * The helper needs the register numbers, but since they're in
4332      * the syndrome anyway, we let it extract them from there rather
4333      * than passing in an extra three integer arguments.
4334      */
4335     fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(wdesc),
4336        tcg_constant_i32(rdesc));
4337     return true;
4338 }
4339 
4340 TRANS_FEAT(CPYP, aa64_mops, do_CPY, a, false, gen_helper_cpyp)
4341 TRANS_FEAT(CPYM, aa64_mops, do_CPY, a, false, gen_helper_cpym)
4342 TRANS_FEAT(CPYE, aa64_mops, do_CPY, a, true, gen_helper_cpye)
4343 TRANS_FEAT(CPYFP, aa64_mops, do_CPY, a, false, gen_helper_cpyfp)
4344 TRANS_FEAT(CPYFM, aa64_mops, do_CPY, a, false, gen_helper_cpyfm)
4345 TRANS_FEAT(CPYFE, aa64_mops, do_CPY, a, true, gen_helper_cpyfe)
4346 
4347 typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64);
4348 
4349 static bool gen_rri(DisasContext *s, arg_rri_sf *a,
4350                     bool rd_sp, bool rn_sp, ArithTwoOp *fn)
4351 {
4352     TCGv_i64 tcg_rn = rn_sp ? cpu_reg_sp(s, a->rn) : cpu_reg(s, a->rn);
4353     TCGv_i64 tcg_rd = rd_sp ? cpu_reg_sp(s, a->rd) : cpu_reg(s, a->rd);
4354     TCGv_i64 tcg_imm = tcg_constant_i64(a->imm);
4355 
4356     fn(tcg_rd, tcg_rn, tcg_imm);
4357     if (!a->sf) {
4358         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4359     }
4360     return true;
4361 }
4362 
4363 /*
4364  * PC-rel. addressing
4365  */
4366 
4367 static bool trans_ADR(DisasContext *s, arg_ri *a)
4368 {
4369     gen_pc_plus_diff(s, cpu_reg(s, a->rd), a->imm);
4370     return true;
4371 }
4372 
4373 static bool trans_ADRP(DisasContext *s, arg_ri *a)
4374 {
4375     int64_t offset = (int64_t)a->imm << 12;
4376 
4377     /* The page offset is ok for CF_PCREL. */
4378     offset -= s->pc_curr & 0xfff;
4379     gen_pc_plus_diff(s, cpu_reg(s, a->rd), offset);
4380     return true;
4381 }
4382 
4383 /*
4384  * Add/subtract (immediate)
4385  */
4386 TRANS(ADD_i, gen_rri, a, 1, 1, tcg_gen_add_i64)
4387 TRANS(SUB_i, gen_rri, a, 1, 1, tcg_gen_sub_i64)
4388 TRANS(ADDS_i, gen_rri, a, 0, 1, a->sf ? gen_add64_CC : gen_add32_CC)
4389 TRANS(SUBS_i, gen_rri, a, 0, 1, a->sf ? gen_sub64_CC : gen_sub32_CC)
4390 
4391 /*
4392  * Add/subtract (immediate, with tags)
4393  */
4394 
4395 static bool gen_add_sub_imm_with_tags(DisasContext *s, arg_rri_tag *a,
4396                                       bool sub_op)
4397 {
4398     TCGv_i64 tcg_rn, tcg_rd;
4399     int imm;
4400 
4401     imm = a->uimm6 << LOG2_TAG_GRANULE;
4402     if (sub_op) {
4403         imm = -imm;
4404     }
4405 
4406     tcg_rn = cpu_reg_sp(s, a->rn);
4407     tcg_rd = cpu_reg_sp(s, a->rd);
4408 
4409     if (s->ata[0]) {
4410         gen_helper_addsubg(tcg_rd, tcg_env, tcg_rn,
4411                            tcg_constant_i32(imm),
4412                            tcg_constant_i32(a->uimm4));
4413     } else {
4414         tcg_gen_addi_i64(tcg_rd, tcg_rn, imm);
4415         gen_address_with_allocation_tag0(tcg_rd, tcg_rd);
4416     }
4417     return true;
4418 }
4419 
4420 TRANS_FEAT(ADDG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, false)
4421 TRANS_FEAT(SUBG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, true)
4422 
4423 /* The input should be a value in the bottom e bits (with higher
4424  * bits zero); returns that value replicated into every element
4425  * of size e in a 64 bit integer.
4426  */
4427 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
4428 {
4429     assert(e != 0);
4430     while (e < 64) {
4431         mask |= mask << e;
4432         e *= 2;
4433     }
4434     return mask;
4435 }
4436 
4437 /*
4438  * Logical (immediate)
4439  */
4440 
4441 /*
4442  * Simplified variant of pseudocode DecodeBitMasks() for the case where we
4443  * only require the wmask. Returns false if the imms/immr/immn are a reserved
4444  * value (ie should cause a guest UNDEF exception), and true if they are
4445  * valid, in which case the decoded bit pattern is written to result.
4446  */
4447 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
4448                             unsigned int imms, unsigned int immr)
4449 {
4450     uint64_t mask;
4451     unsigned e, levels, s, r;
4452     int len;
4453 
4454     assert(immn < 2 && imms < 64 && immr < 64);
4455 
4456     /* The bit patterns we create here are 64 bit patterns which
4457      * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
4458      * 64 bits each. Each element contains the same value: a run
4459      * of between 1 and e-1 non-zero bits, rotated within the
4460      * element by between 0 and e-1 bits.
4461      *
4462      * The element size and run length are encoded into immn (1 bit)
4463      * and imms (6 bits) as follows:
4464      * 64 bit elements: immn = 1, imms = <length of run - 1>
4465      * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
4466      * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
4467      *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
4468      *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
4469      *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
4470      * Notice that immn = 0, imms = 11111x is the only combination
4471      * not covered by one of the above options; this is reserved.
4472      * Further, <length of run - 1> all-ones is a reserved pattern.
4473      *
4474      * In all cases the rotation is by immr % e (and immr is 6 bits).
4475      */
4476 
4477     /* First determine the element size */
4478     len = 31 - clz32((immn << 6) | (~imms & 0x3f));
4479     if (len < 1) {
4480         /* This is the immn == 0, imms == 0x11111x case */
4481         return false;
4482     }
4483     e = 1 << len;
4484 
4485     levels = e - 1;
4486     s = imms & levels;
4487     r = immr & levels;
4488 
4489     if (s == levels) {
4490         /* <length of run - 1> mustn't be all-ones. */
4491         return false;
4492     }
4493 
4494     /* Create the value of one element: s+1 set bits rotated
4495      * by r within the element (which is e bits wide)...
4496      */
4497     mask = MAKE_64BIT_MASK(0, s + 1);
4498     if (r) {
4499         mask = (mask >> r) | (mask << (e - r));
4500         mask &= MAKE_64BIT_MASK(0, e);
4501     }
4502     /* ...then replicate the element over the whole 64 bit value */
4503     mask = bitfield_replicate(mask, e);
4504     *result = mask;
4505     return true;
4506 }
4507 
4508 static bool gen_rri_log(DisasContext *s, arg_rri_log *a, bool set_cc,
4509                         void (*fn)(TCGv_i64, TCGv_i64, int64_t))
4510 {
4511     TCGv_i64 tcg_rd, tcg_rn;
4512     uint64_t imm;
4513 
4514     /* Some immediate field values are reserved. */
4515     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
4516                                 extract32(a->dbm, 0, 6),
4517                                 extract32(a->dbm, 6, 6))) {
4518         return false;
4519     }
4520     if (!a->sf) {
4521         imm &= 0xffffffffull;
4522     }
4523 
4524     tcg_rd = set_cc ? cpu_reg(s, a->rd) : cpu_reg_sp(s, a->rd);
4525     tcg_rn = cpu_reg(s, a->rn);
4526 
4527     fn(tcg_rd, tcg_rn, imm);
4528     if (set_cc) {
4529         gen_logic_CC(a->sf, tcg_rd);
4530     }
4531     if (!a->sf) {
4532         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4533     }
4534     return true;
4535 }
4536 
4537 TRANS(AND_i, gen_rri_log, a, false, tcg_gen_andi_i64)
4538 TRANS(ORR_i, gen_rri_log, a, false, tcg_gen_ori_i64)
4539 TRANS(EOR_i, gen_rri_log, a, false, tcg_gen_xori_i64)
4540 TRANS(ANDS_i, gen_rri_log, a, true, tcg_gen_andi_i64)
4541 
4542 /*
4543  * Move wide (immediate)
4544  */
4545 
4546 static bool trans_MOVZ(DisasContext *s, arg_movw *a)
4547 {
4548     int pos = a->hw << 4;
4549     tcg_gen_movi_i64(cpu_reg(s, a->rd), (uint64_t)a->imm << pos);
4550     return true;
4551 }
4552 
4553 static bool trans_MOVN(DisasContext *s, arg_movw *a)
4554 {
4555     int pos = a->hw << 4;
4556     uint64_t imm = a->imm;
4557 
4558     imm = ~(imm << pos);
4559     if (!a->sf) {
4560         imm = (uint32_t)imm;
4561     }
4562     tcg_gen_movi_i64(cpu_reg(s, a->rd), imm);
4563     return true;
4564 }
4565 
4566 static bool trans_MOVK(DisasContext *s, arg_movw *a)
4567 {
4568     int pos = a->hw << 4;
4569     TCGv_i64 tcg_rd, tcg_im;
4570 
4571     tcg_rd = cpu_reg(s, a->rd);
4572     tcg_im = tcg_constant_i64(a->imm);
4573     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_im, pos, 16);
4574     if (!a->sf) {
4575         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4576     }
4577     return true;
4578 }
4579 
4580 /*
4581  * Bitfield
4582  */
4583 
4584 static bool trans_SBFM(DisasContext *s, arg_SBFM *a)
4585 {
4586     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4587     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4588     unsigned int bitsize = a->sf ? 64 : 32;
4589     unsigned int ri = a->immr;
4590     unsigned int si = a->imms;
4591     unsigned int pos, len;
4592 
4593     if (si >= ri) {
4594         /* Wd<s-r:0> = Wn<s:r> */
4595         len = (si - ri) + 1;
4596         tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
4597         if (!a->sf) {
4598             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4599         }
4600     } else {
4601         /* Wd<32+s-r,32-r> = Wn<s:0> */
4602         len = si + 1;
4603         pos = (bitsize - ri) & (bitsize - 1);
4604 
4605         if (len < ri) {
4606             /*
4607              * Sign extend the destination field from len to fill the
4608              * balance of the word.  Let the deposit below insert all
4609              * of those sign bits.
4610              */
4611             tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
4612             len = ri;
4613         }
4614 
4615         /*
4616          * We start with zero, and we haven't modified any bits outside
4617          * bitsize, therefore no final zero-extension is unneeded for !sf.
4618          */
4619         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4620     }
4621     return true;
4622 }
4623 
4624 static bool trans_UBFM(DisasContext *s, arg_UBFM *a)
4625 {
4626     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4627     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4628     unsigned int bitsize = a->sf ? 64 : 32;
4629     unsigned int ri = a->immr;
4630     unsigned int si = a->imms;
4631     unsigned int pos, len;
4632 
4633     tcg_rd = cpu_reg(s, a->rd);
4634     tcg_tmp = read_cpu_reg(s, a->rn, 1);
4635 
4636     if (si >= ri) {
4637         /* Wd<s-r:0> = Wn<s:r> */
4638         len = (si - ri) + 1;
4639         tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
4640     } else {
4641         /* Wd<32+s-r,32-r> = Wn<s:0> */
4642         len = si + 1;
4643         pos = (bitsize - ri) & (bitsize - 1);
4644         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4645     }
4646     return true;
4647 }
4648 
4649 static bool trans_BFM(DisasContext *s, arg_BFM *a)
4650 {
4651     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4652     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4653     unsigned int bitsize = a->sf ? 64 : 32;
4654     unsigned int ri = a->immr;
4655     unsigned int si = a->imms;
4656     unsigned int pos, len;
4657 
4658     tcg_rd = cpu_reg(s, a->rd);
4659     tcg_tmp = read_cpu_reg(s, a->rn, 1);
4660 
4661     if (si >= ri) {
4662         /* Wd<s-r:0> = Wn<s:r> */
4663         tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
4664         len = (si - ri) + 1;
4665         pos = 0;
4666     } else {
4667         /* Wd<32+s-r,32-r> = Wn<s:0> */
4668         len = si + 1;
4669         pos = (bitsize - ri) & (bitsize - 1);
4670     }
4671 
4672     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
4673     if (!a->sf) {
4674         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4675     }
4676     return true;
4677 }
4678 
4679 static bool trans_EXTR(DisasContext *s, arg_extract *a)
4680 {
4681     TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
4682 
4683     tcg_rd = cpu_reg(s, a->rd);
4684 
4685     if (unlikely(a->imm == 0)) {
4686         /*
4687          * tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
4688          * so an extract from bit 0 is a special case.
4689          */
4690         if (a->sf) {
4691             tcg_gen_mov_i64(tcg_rd, cpu_reg(s, a->rm));
4692         } else {
4693             tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, a->rm));
4694         }
4695     } else {
4696         tcg_rm = cpu_reg(s, a->rm);
4697         tcg_rn = cpu_reg(s, a->rn);
4698 
4699         if (a->sf) {
4700             /* Specialization to ROR happens in EXTRACT2.  */
4701             tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, a->imm);
4702         } else {
4703             TCGv_i32 t0 = tcg_temp_new_i32();
4704 
4705             tcg_gen_extrl_i64_i32(t0, tcg_rm);
4706             if (a->rm == a->rn) {
4707                 tcg_gen_rotri_i32(t0, t0, a->imm);
4708             } else {
4709                 TCGv_i32 t1 = tcg_temp_new_i32();
4710                 tcg_gen_extrl_i64_i32(t1, tcg_rn);
4711                 tcg_gen_extract2_i32(t0, t0, t1, a->imm);
4712             }
4713             tcg_gen_extu_i32_i64(tcg_rd, t0);
4714         }
4715     }
4716     return true;
4717 }
4718 
4719 static bool trans_TBL_TBX(DisasContext *s, arg_TBL_TBX *a)
4720 {
4721     if (fp_access_check(s)) {
4722         int len = (a->len + 1) * 16;
4723 
4724         tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
4725                            vec_full_reg_offset(s, a->rm), tcg_env,
4726                            a->q ? 16 : 8, vec_full_reg_size(s),
4727                            (len << 6) | (a->tbx << 5) | a->rn,
4728                            gen_helper_simd_tblx);
4729     }
4730     return true;
4731 }
4732 
4733 typedef int simd_permute_idx_fn(int i, int part, int elements);
4734 
4735 static bool do_simd_permute(DisasContext *s, arg_qrrr_e *a,
4736                             simd_permute_idx_fn *fn, int part)
4737 {
4738     MemOp esz = a->esz;
4739     int datasize = a->q ? 16 : 8;
4740     int elements = datasize >> esz;
4741     TCGv_i64 tcg_res[2], tcg_ele;
4742 
4743     if (esz == MO_64 && !a->q) {
4744         return false;
4745     }
4746     if (!fp_access_check(s)) {
4747         return true;
4748     }
4749 
4750     tcg_res[0] = tcg_temp_new_i64();
4751     tcg_res[1] = a->q ? tcg_temp_new_i64() : NULL;
4752     tcg_ele = tcg_temp_new_i64();
4753 
4754     for (int i = 0; i < elements; i++) {
4755         int o, w, idx;
4756 
4757         idx = fn(i, part, elements);
4758         read_vec_element(s, tcg_ele, (idx & elements ? a->rm : a->rn),
4759                          idx & (elements - 1), esz);
4760 
4761         w = (i << (esz + 3)) / 64;
4762         o = (i << (esz + 3)) % 64;
4763         if (o == 0) {
4764             tcg_gen_mov_i64(tcg_res[w], tcg_ele);
4765         } else {
4766             tcg_gen_deposit_i64(tcg_res[w], tcg_res[w], tcg_ele, o, 8 << esz);
4767         }
4768     }
4769 
4770     for (int i = a->q; i >= 0; --i) {
4771         write_vec_element(s, tcg_res[i], a->rd, i, MO_64);
4772     }
4773     clear_vec_high(s, a->q, a->rd);
4774     return true;
4775 }
4776 
4777 static int permute_load_uzp(int i, int part, int elements)
4778 {
4779     return 2 * i + part;
4780 }
4781 
4782 TRANS(UZP1, do_simd_permute, a, permute_load_uzp, 0)
4783 TRANS(UZP2, do_simd_permute, a, permute_load_uzp, 1)
4784 
4785 static int permute_load_trn(int i, int part, int elements)
4786 {
4787     return (i & 1) * elements + (i & ~1) + part;
4788 }
4789 
4790 TRANS(TRN1, do_simd_permute, a, permute_load_trn, 0)
4791 TRANS(TRN2, do_simd_permute, a, permute_load_trn, 1)
4792 
4793 static int permute_load_zip(int i, int part, int elements)
4794 {
4795     return (i & 1) * elements + ((part * elements + i) >> 1);
4796 }
4797 
4798 TRANS(ZIP1, do_simd_permute, a, permute_load_zip, 0)
4799 TRANS(ZIP2, do_simd_permute, a, permute_load_zip, 1)
4800 
4801 /*
4802  * Cryptographic AES, SHA, SHA512
4803  */
4804 
4805 TRANS_FEAT(AESE, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aese)
4806 TRANS_FEAT(AESD, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aesd)
4807 TRANS_FEAT(AESMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesmc)
4808 TRANS_FEAT(AESIMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesimc)
4809 
4810 TRANS_FEAT(SHA1C, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1c)
4811 TRANS_FEAT(SHA1P, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1p)
4812 TRANS_FEAT(SHA1M, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1m)
4813 TRANS_FEAT(SHA1SU0, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1su0)
4814 
4815 TRANS_FEAT(SHA256H, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h)
4816 TRANS_FEAT(SHA256H2, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h2)
4817 TRANS_FEAT(SHA256SU1, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256su1)
4818 
4819 TRANS_FEAT(SHA1H, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1h)
4820 TRANS_FEAT(SHA1SU1, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1su1)
4821 TRANS_FEAT(SHA256SU0, aa64_sha256, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha256su0)
4822 
4823 TRANS_FEAT(SHA512H, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h)
4824 TRANS_FEAT(SHA512H2, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h2)
4825 TRANS_FEAT(SHA512SU1, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512su1)
4826 TRANS_FEAT(RAX1, aa64_sha3, do_gvec_fn3, a, gen_gvec_rax1)
4827 TRANS_FEAT(SM3PARTW1, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw1)
4828 TRANS_FEAT(SM3PARTW2, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw2)
4829 TRANS_FEAT(SM4EKEY, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4ekey)
4830 
4831 TRANS_FEAT(SHA512SU0, aa64_sha512, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha512su0)
4832 TRANS_FEAT(SM4E, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4e)
4833 
4834 TRANS_FEAT(EOR3, aa64_sha3, do_gvec_fn4, a, gen_gvec_eor3)
4835 TRANS_FEAT(BCAX, aa64_sha3, do_gvec_fn4, a, gen_gvec_bcax)
4836 
4837 static bool trans_SM3SS1(DisasContext *s, arg_SM3SS1 *a)
4838 {
4839     if (!dc_isar_feature(aa64_sm3, s)) {
4840         return false;
4841     }
4842     if (fp_access_check(s)) {
4843         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
4844         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
4845         TCGv_i32 tcg_op3 = tcg_temp_new_i32();
4846         TCGv_i32 tcg_res = tcg_temp_new_i32();
4847 
4848         read_vec_element_i32(s, tcg_op1, a->rn, 3, MO_32);
4849         read_vec_element_i32(s, tcg_op2, a->rm, 3, MO_32);
4850         read_vec_element_i32(s, tcg_op3, a->ra, 3, MO_32);
4851 
4852         tcg_gen_rotri_i32(tcg_res, tcg_op1, 20);
4853         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2);
4854         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3);
4855         tcg_gen_rotri_i32(tcg_res, tcg_res, 25);
4856 
4857         /* Clear the whole register first, then store bits [127:96]. */
4858         clear_vec(s, a->rd);
4859         write_vec_element_i32(s, tcg_res, a->rd, 3, MO_32);
4860     }
4861     return true;
4862 }
4863 
4864 static bool do_crypto3i(DisasContext *s, arg_crypto3i *a, gen_helper_gvec_3 *fn)
4865 {
4866     if (fp_access_check(s)) {
4867         gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->imm, fn);
4868     }
4869     return true;
4870 }
4871 TRANS_FEAT(SM3TT1A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1a)
4872 TRANS_FEAT(SM3TT1B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1b)
4873 TRANS_FEAT(SM3TT2A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2a)
4874 TRANS_FEAT(SM3TT2B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2b)
4875 
4876 static bool trans_XAR(DisasContext *s, arg_XAR *a)
4877 {
4878     if (!dc_isar_feature(aa64_sha3, s)) {
4879         return false;
4880     }
4881     if (fp_access_check(s)) {
4882         gen_gvec_xar(MO_64, vec_full_reg_offset(s, a->rd),
4883                      vec_full_reg_offset(s, a->rn),
4884                      vec_full_reg_offset(s, a->rm), a->imm, 16,
4885                      vec_full_reg_size(s));
4886     }
4887     return true;
4888 }
4889 
4890 /*
4891  * Advanced SIMD copy
4892  */
4893 
4894 static bool decode_esz_idx(int imm, MemOp *pesz, unsigned *pidx)
4895 {
4896     unsigned esz = ctz32(imm);
4897     if (esz <= MO_64) {
4898         *pesz = esz;
4899         *pidx = imm >> (esz + 1);
4900         return true;
4901     }
4902     return false;
4903 }
4904 
4905 static bool trans_DUP_element_s(DisasContext *s, arg_DUP_element_s *a)
4906 {
4907     MemOp esz;
4908     unsigned idx;
4909 
4910     if (!decode_esz_idx(a->imm, &esz, &idx)) {
4911         return false;
4912     }
4913     if (fp_access_check(s)) {
4914         /*
4915          * This instruction just extracts the specified element and
4916          * zero-extends it into the bottom of the destination register.
4917          */
4918         TCGv_i64 tmp = tcg_temp_new_i64();
4919         read_vec_element(s, tmp, a->rn, idx, esz);
4920         write_fp_dreg(s, a->rd, tmp);
4921     }
4922     return true;
4923 }
4924 
4925 static bool trans_DUP_element_v(DisasContext *s, arg_DUP_element_v *a)
4926 {
4927     MemOp esz;
4928     unsigned idx;
4929 
4930     if (!decode_esz_idx(a->imm, &esz, &idx)) {
4931         return false;
4932     }
4933     if (esz == MO_64 && !a->q) {
4934         return false;
4935     }
4936     if (fp_access_check(s)) {
4937         tcg_gen_gvec_dup_mem(esz, vec_full_reg_offset(s, a->rd),
4938                              vec_reg_offset(s, a->rn, idx, esz),
4939                              a->q ? 16 : 8, vec_full_reg_size(s));
4940     }
4941     return true;
4942 }
4943 
4944 static bool trans_DUP_general(DisasContext *s, arg_DUP_general *a)
4945 {
4946     MemOp esz;
4947     unsigned idx;
4948 
4949     if (!decode_esz_idx(a->imm, &esz, &idx)) {
4950         return false;
4951     }
4952     if (esz == MO_64 && !a->q) {
4953         return false;
4954     }
4955     if (fp_access_check(s)) {
4956         tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
4957                              a->q ? 16 : 8, vec_full_reg_size(s),
4958                              cpu_reg(s, a->rn));
4959     }
4960     return true;
4961 }
4962 
4963 static bool do_smov_umov(DisasContext *s, arg_SMOV *a, MemOp is_signed)
4964 {
4965     MemOp esz;
4966     unsigned idx;
4967 
4968     if (!decode_esz_idx(a->imm, &esz, &idx)) {
4969         return false;
4970     }
4971     if (is_signed) {
4972         if (esz == MO_64 || (esz == MO_32 && !a->q)) {
4973             return false;
4974         }
4975     } else {
4976         if (esz == MO_64 ? !a->q : a->q) {
4977             return false;
4978         }
4979     }
4980     if (fp_access_check(s)) {
4981         TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4982         read_vec_element(s, tcg_rd, a->rn, idx, esz | is_signed);
4983         if (is_signed && !a->q) {
4984             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4985         }
4986     }
4987     return true;
4988 }
4989 
4990 TRANS(SMOV, do_smov_umov, a, MO_SIGN)
4991 TRANS(UMOV, do_smov_umov, a, 0)
4992 
4993 static bool trans_INS_general(DisasContext *s, arg_INS_general *a)
4994 {
4995     MemOp esz;
4996     unsigned idx;
4997 
4998     if (!decode_esz_idx(a->imm, &esz, &idx)) {
4999         return false;
5000     }
5001     if (fp_access_check(s)) {
5002         write_vec_element(s, cpu_reg(s, a->rn), a->rd, idx, esz);
5003         clear_vec_high(s, true, a->rd);
5004     }
5005     return true;
5006 }
5007 
5008 static bool trans_INS_element(DisasContext *s, arg_INS_element *a)
5009 {
5010     MemOp esz;
5011     unsigned didx, sidx;
5012 
5013     if (!decode_esz_idx(a->di, &esz, &didx)) {
5014         return false;
5015     }
5016     sidx = a->si >> esz;
5017     if (fp_access_check(s)) {
5018         TCGv_i64 tmp = tcg_temp_new_i64();
5019 
5020         read_vec_element(s, tmp, a->rn, sidx, esz);
5021         write_vec_element(s, tmp, a->rd, didx, esz);
5022 
5023         /* INS is considered a 128-bit write for SVE. */
5024         clear_vec_high(s, true, a->rd);
5025     }
5026     return true;
5027 }
5028 
5029 /*
5030  * Advanced SIMD three same
5031  */
5032 
5033 typedef struct FPScalar {
5034     void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
5035     void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
5036     void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
5037 } FPScalar;
5038 
5039 static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f)
5040 {
5041     switch (a->esz) {
5042     case MO_64:
5043         if (fp_access_check(s)) {
5044             TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5045             TCGv_i64 t1 = read_fp_dreg(s, a->rm);
5046             f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
5047             write_fp_dreg(s, a->rd, t0);
5048         }
5049         break;
5050     case MO_32:
5051         if (fp_access_check(s)) {
5052             TCGv_i32 t0 = read_fp_sreg(s, a->rn);
5053             TCGv_i32 t1 = read_fp_sreg(s, a->rm);
5054             f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
5055             write_fp_sreg(s, a->rd, t0);
5056         }
5057         break;
5058     case MO_16:
5059         if (!dc_isar_feature(aa64_fp16, s)) {
5060             return false;
5061         }
5062         if (fp_access_check(s)) {
5063             TCGv_i32 t0 = read_fp_hreg(s, a->rn);
5064             TCGv_i32 t1 = read_fp_hreg(s, a->rm);
5065             f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
5066             write_fp_sreg(s, a->rd, t0);
5067         }
5068         break;
5069     default:
5070         return false;
5071     }
5072     return true;
5073 }
5074 
5075 static const FPScalar f_scalar_fadd = {
5076     gen_helper_vfp_addh,
5077     gen_helper_vfp_adds,
5078     gen_helper_vfp_addd,
5079 };
5080 TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd)
5081 
5082 static const FPScalar f_scalar_fsub = {
5083     gen_helper_vfp_subh,
5084     gen_helper_vfp_subs,
5085     gen_helper_vfp_subd,
5086 };
5087 TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub)
5088 
5089 static const FPScalar f_scalar_fdiv = {
5090     gen_helper_vfp_divh,
5091     gen_helper_vfp_divs,
5092     gen_helper_vfp_divd,
5093 };
5094 TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv)
5095 
5096 static const FPScalar f_scalar_fmul = {
5097     gen_helper_vfp_mulh,
5098     gen_helper_vfp_muls,
5099     gen_helper_vfp_muld,
5100 };
5101 TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul)
5102 
5103 static const FPScalar f_scalar_fmax = {
5104     gen_helper_vfp_maxh,
5105     gen_helper_vfp_maxs,
5106     gen_helper_vfp_maxd,
5107 };
5108 TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax)
5109 
5110 static const FPScalar f_scalar_fmin = {
5111     gen_helper_vfp_minh,
5112     gen_helper_vfp_mins,
5113     gen_helper_vfp_mind,
5114 };
5115 TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin)
5116 
5117 static const FPScalar f_scalar_fmaxnm = {
5118     gen_helper_vfp_maxnumh,
5119     gen_helper_vfp_maxnums,
5120     gen_helper_vfp_maxnumd,
5121 };
5122 TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm)
5123 
5124 static const FPScalar f_scalar_fminnm = {
5125     gen_helper_vfp_minnumh,
5126     gen_helper_vfp_minnums,
5127     gen_helper_vfp_minnumd,
5128 };
5129 TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm)
5130 
5131 static const FPScalar f_scalar_fmulx = {
5132     gen_helper_advsimd_mulxh,
5133     gen_helper_vfp_mulxs,
5134     gen_helper_vfp_mulxd,
5135 };
5136 TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx)
5137 
5138 static void gen_fnmul_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5139 {
5140     gen_helper_vfp_mulh(d, n, m, s);
5141     gen_vfp_negh(d, d);
5142 }
5143 
5144 static void gen_fnmul_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5145 {
5146     gen_helper_vfp_muls(d, n, m, s);
5147     gen_vfp_negs(d, d);
5148 }
5149 
5150 static void gen_fnmul_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5151 {
5152     gen_helper_vfp_muld(d, n, m, s);
5153     gen_vfp_negd(d, d);
5154 }
5155 
5156 static const FPScalar f_scalar_fnmul = {
5157     gen_fnmul_h,
5158     gen_fnmul_s,
5159     gen_fnmul_d,
5160 };
5161 TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul)
5162 
5163 static const FPScalar f_scalar_fcmeq = {
5164     gen_helper_advsimd_ceq_f16,
5165     gen_helper_neon_ceq_f32,
5166     gen_helper_neon_ceq_f64,
5167 };
5168 TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq)
5169 
5170 static const FPScalar f_scalar_fcmge = {
5171     gen_helper_advsimd_cge_f16,
5172     gen_helper_neon_cge_f32,
5173     gen_helper_neon_cge_f64,
5174 };
5175 TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge)
5176 
5177 static const FPScalar f_scalar_fcmgt = {
5178     gen_helper_advsimd_cgt_f16,
5179     gen_helper_neon_cgt_f32,
5180     gen_helper_neon_cgt_f64,
5181 };
5182 TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt)
5183 
5184 static const FPScalar f_scalar_facge = {
5185     gen_helper_advsimd_acge_f16,
5186     gen_helper_neon_acge_f32,
5187     gen_helper_neon_acge_f64,
5188 };
5189 TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge)
5190 
5191 static const FPScalar f_scalar_facgt = {
5192     gen_helper_advsimd_acgt_f16,
5193     gen_helper_neon_acgt_f32,
5194     gen_helper_neon_acgt_f64,
5195 };
5196 TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt)
5197 
5198 static void gen_fabd_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5199 {
5200     gen_helper_vfp_subh(d, n, m, s);
5201     gen_vfp_absh(d, d);
5202 }
5203 
5204 static void gen_fabd_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5205 {
5206     gen_helper_vfp_subs(d, n, m, s);
5207     gen_vfp_abss(d, d);
5208 }
5209 
5210 static void gen_fabd_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5211 {
5212     gen_helper_vfp_subd(d, n, m, s);
5213     gen_vfp_absd(d, d);
5214 }
5215 
5216 static const FPScalar f_scalar_fabd = {
5217     gen_fabd_h,
5218     gen_fabd_s,
5219     gen_fabd_d,
5220 };
5221 TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd)
5222 
5223 static const FPScalar f_scalar_frecps = {
5224     gen_helper_recpsf_f16,
5225     gen_helper_recpsf_f32,
5226     gen_helper_recpsf_f64,
5227 };
5228 TRANS(FRECPS_s, do_fp3_scalar, a, &f_scalar_frecps)
5229 
5230 static const FPScalar f_scalar_frsqrts = {
5231     gen_helper_rsqrtsf_f16,
5232     gen_helper_rsqrtsf_f32,
5233     gen_helper_rsqrtsf_f64,
5234 };
5235 TRANS(FRSQRTS_s, do_fp3_scalar, a, &f_scalar_frsqrts)
5236 
5237 static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a,
5238                        const FPScalar *f, bool swap)
5239 {
5240     switch (a->esz) {
5241     case MO_64:
5242         if (fp_access_check(s)) {
5243             TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5244             TCGv_i64 t1 = tcg_constant_i64(0);
5245             if (swap) {
5246                 f->gen_d(t0, t1, t0, fpstatus_ptr(FPST_A64));
5247             } else {
5248                 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
5249             }
5250             write_fp_dreg(s, a->rd, t0);
5251         }
5252         break;
5253     case MO_32:
5254         if (fp_access_check(s)) {
5255             TCGv_i32 t0 = read_fp_sreg(s, a->rn);
5256             TCGv_i32 t1 = tcg_constant_i32(0);
5257             if (swap) {
5258                 f->gen_s(t0, t1, t0, fpstatus_ptr(FPST_A64));
5259             } else {
5260                 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
5261             }
5262             write_fp_sreg(s, a->rd, t0);
5263         }
5264         break;
5265     case MO_16:
5266         if (!dc_isar_feature(aa64_fp16, s)) {
5267             return false;
5268         }
5269         if (fp_access_check(s)) {
5270             TCGv_i32 t0 = read_fp_hreg(s, a->rn);
5271             TCGv_i32 t1 = tcg_constant_i32(0);
5272             if (swap) {
5273                 f->gen_h(t0, t1, t0, fpstatus_ptr(FPST_A64_F16));
5274             } else {
5275                 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
5276             }
5277             write_fp_sreg(s, a->rd, t0);
5278         }
5279         break;
5280     default:
5281         return false;
5282     }
5283     return true;
5284 }
5285 
5286 TRANS(FCMEQ0_s, do_fcmp0_s, a, &f_scalar_fcmeq, false)
5287 TRANS(FCMGT0_s, do_fcmp0_s, a, &f_scalar_fcmgt, false)
5288 TRANS(FCMGE0_s, do_fcmp0_s, a, &f_scalar_fcmge, false)
5289 TRANS(FCMLT0_s, do_fcmp0_s, a, &f_scalar_fcmgt, true)
5290 TRANS(FCMLE0_s, do_fcmp0_s, a, &f_scalar_fcmge, true)
5291 
5292 static bool do_satacc_s(DisasContext *s, arg_rrr_e *a,
5293                 MemOp sgn_n, MemOp sgn_m,
5294                 void (*gen_bhs)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64, MemOp),
5295                 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
5296 {
5297     TCGv_i64 t0, t1, t2, qc;
5298     MemOp esz = a->esz;
5299 
5300     if (!fp_access_check(s)) {
5301         return true;
5302     }
5303 
5304     t0 = tcg_temp_new_i64();
5305     t1 = tcg_temp_new_i64();
5306     t2 = tcg_temp_new_i64();
5307     qc = tcg_temp_new_i64();
5308     read_vec_element(s, t1, a->rn, 0, esz | sgn_n);
5309     read_vec_element(s, t2, a->rm, 0, esz | sgn_m);
5310     tcg_gen_ld_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc));
5311 
5312     if (esz == MO_64) {
5313         gen_d(t0, qc, t1, t2);
5314     } else {
5315         gen_bhs(t0, qc, t1, t2, esz);
5316         tcg_gen_ext_i64(t0, t0, esz);
5317     }
5318 
5319     write_fp_dreg(s, a->rd, t0);
5320     tcg_gen_st_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc));
5321     return true;
5322 }
5323 
5324 TRANS(SQADD_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqadd_bhs, gen_sqadd_d)
5325 TRANS(SQSUB_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqsub_bhs, gen_sqsub_d)
5326 TRANS(UQADD_s, do_satacc_s, a, 0, 0, gen_uqadd_bhs, gen_uqadd_d)
5327 TRANS(UQSUB_s, do_satacc_s, a, 0, 0, gen_uqsub_bhs, gen_uqsub_d)
5328 TRANS(SUQADD_s, do_satacc_s, a, MO_SIGN, 0, gen_suqadd_bhs, gen_suqadd_d)
5329 TRANS(USQADD_s, do_satacc_s, a, 0, MO_SIGN, gen_usqadd_bhs, gen_usqadd_d)
5330 
5331 static bool do_int3_scalar_d(DisasContext *s, arg_rrr_e *a,
5332                              void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64))
5333 {
5334     if (fp_access_check(s)) {
5335         TCGv_i64 t0 = tcg_temp_new_i64();
5336         TCGv_i64 t1 = tcg_temp_new_i64();
5337 
5338         read_vec_element(s, t0, a->rn, 0, MO_64);
5339         read_vec_element(s, t1, a->rm, 0, MO_64);
5340         fn(t0, t0, t1);
5341         write_fp_dreg(s, a->rd, t0);
5342     }
5343     return true;
5344 }
5345 
5346 TRANS(SSHL_s, do_int3_scalar_d, a, gen_sshl_i64)
5347 TRANS(USHL_s, do_int3_scalar_d, a, gen_ushl_i64)
5348 TRANS(SRSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_s64)
5349 TRANS(URSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_u64)
5350 TRANS(ADD_s, do_int3_scalar_d, a, tcg_gen_add_i64)
5351 TRANS(SUB_s, do_int3_scalar_d, a, tcg_gen_sub_i64)
5352 
5353 typedef struct ENVScalar2 {
5354     NeonGenTwoOpEnvFn *gen_bhs[3];
5355     NeonGenTwo64OpEnvFn *gen_d;
5356 } ENVScalar2;
5357 
5358 static bool do_env_scalar2(DisasContext *s, arg_rrr_e *a, const ENVScalar2 *f)
5359 {
5360     if (!fp_access_check(s)) {
5361         return true;
5362     }
5363     if (a->esz == MO_64) {
5364         TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5365         TCGv_i64 t1 = read_fp_dreg(s, a->rm);
5366         f->gen_d(t0, tcg_env, t0, t1);
5367         write_fp_dreg(s, a->rd, t0);
5368     } else {
5369         TCGv_i32 t0 = tcg_temp_new_i32();
5370         TCGv_i32 t1 = tcg_temp_new_i32();
5371 
5372         read_vec_element_i32(s, t0, a->rn, 0, a->esz);
5373         read_vec_element_i32(s, t1, a->rm, 0, a->esz);
5374         f->gen_bhs[a->esz](t0, tcg_env, t0, t1);
5375         write_fp_sreg(s, a->rd, t0);
5376     }
5377     return true;
5378 }
5379 
5380 static const ENVScalar2 f_scalar_sqshl = {
5381     { gen_helper_neon_qshl_s8,
5382       gen_helper_neon_qshl_s16,
5383       gen_helper_neon_qshl_s32 },
5384     gen_helper_neon_qshl_s64,
5385 };
5386 TRANS(SQSHL_s, do_env_scalar2, a, &f_scalar_sqshl)
5387 
5388 static const ENVScalar2 f_scalar_uqshl = {
5389     { gen_helper_neon_qshl_u8,
5390       gen_helper_neon_qshl_u16,
5391       gen_helper_neon_qshl_u32 },
5392     gen_helper_neon_qshl_u64,
5393 };
5394 TRANS(UQSHL_s, do_env_scalar2, a, &f_scalar_uqshl)
5395 
5396 static const ENVScalar2 f_scalar_sqrshl = {
5397     { gen_helper_neon_qrshl_s8,
5398       gen_helper_neon_qrshl_s16,
5399       gen_helper_neon_qrshl_s32 },
5400     gen_helper_neon_qrshl_s64,
5401 };
5402 TRANS(SQRSHL_s, do_env_scalar2, a, &f_scalar_sqrshl)
5403 
5404 static const ENVScalar2 f_scalar_uqrshl = {
5405     { gen_helper_neon_qrshl_u8,
5406       gen_helper_neon_qrshl_u16,
5407       gen_helper_neon_qrshl_u32 },
5408     gen_helper_neon_qrshl_u64,
5409 };
5410 TRANS(UQRSHL_s, do_env_scalar2, a, &f_scalar_uqrshl)
5411 
5412 static bool do_env_scalar2_hs(DisasContext *s, arg_rrr_e *a,
5413                               const ENVScalar2 *f)
5414 {
5415     if (a->esz == MO_16 || a->esz == MO_32) {
5416         return do_env_scalar2(s, a, f);
5417     }
5418     return false;
5419 }
5420 
5421 static const ENVScalar2 f_scalar_sqdmulh = {
5422     { NULL, gen_helper_neon_qdmulh_s16, gen_helper_neon_qdmulh_s32 }
5423 };
5424 TRANS(SQDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqdmulh)
5425 
5426 static const ENVScalar2 f_scalar_sqrdmulh = {
5427     { NULL, gen_helper_neon_qrdmulh_s16, gen_helper_neon_qrdmulh_s32 }
5428 };
5429 TRANS(SQRDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqrdmulh)
5430 
5431 typedef struct ENVScalar3 {
5432     NeonGenThreeOpEnvFn *gen_hs[2];
5433 } ENVScalar3;
5434 
5435 static bool do_env_scalar3_hs(DisasContext *s, arg_rrr_e *a,
5436                               const ENVScalar3 *f)
5437 {
5438     TCGv_i32 t0, t1, t2;
5439 
5440     if (a->esz != MO_16 && a->esz != MO_32) {
5441         return false;
5442     }
5443     if (!fp_access_check(s)) {
5444         return true;
5445     }
5446 
5447     t0 = tcg_temp_new_i32();
5448     t1 = tcg_temp_new_i32();
5449     t2 = tcg_temp_new_i32();
5450     read_vec_element_i32(s, t0, a->rn, 0, a->esz);
5451     read_vec_element_i32(s, t1, a->rm, 0, a->esz);
5452     read_vec_element_i32(s, t2, a->rd, 0, a->esz);
5453     f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2);
5454     write_fp_sreg(s, a->rd, t0);
5455     return true;
5456 }
5457 
5458 static const ENVScalar3 f_scalar_sqrdmlah = {
5459     { gen_helper_neon_qrdmlah_s16, gen_helper_neon_qrdmlah_s32 }
5460 };
5461 TRANS_FEAT(SQRDMLAH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlah)
5462 
5463 static const ENVScalar3 f_scalar_sqrdmlsh = {
5464     { gen_helper_neon_qrdmlsh_s16, gen_helper_neon_qrdmlsh_s32 }
5465 };
5466 TRANS_FEAT(SQRDMLSH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlsh)
5467 
5468 static bool do_cmop_d(DisasContext *s, arg_rrr_e *a, TCGCond cond)
5469 {
5470     if (fp_access_check(s)) {
5471         TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5472         TCGv_i64 t1 = read_fp_dreg(s, a->rm);
5473         tcg_gen_negsetcond_i64(cond, t0, t0, t1);
5474         write_fp_dreg(s, a->rd, t0);
5475     }
5476     return true;
5477 }
5478 
5479 TRANS(CMGT_s, do_cmop_d, a, TCG_COND_GT)
5480 TRANS(CMHI_s, do_cmop_d, a, TCG_COND_GTU)
5481 TRANS(CMGE_s, do_cmop_d, a, TCG_COND_GE)
5482 TRANS(CMHS_s, do_cmop_d, a, TCG_COND_GEU)
5483 TRANS(CMEQ_s, do_cmop_d, a, TCG_COND_EQ)
5484 TRANS(CMTST_s, do_cmop_d, a, TCG_COND_TSTNE)
5485 
5486 static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data,
5487                           gen_helper_gvec_3_ptr * const fns[3])
5488 {
5489     MemOp esz = a->esz;
5490     int check = fp_access_check_vector_hsd(s, a->q, esz);
5491 
5492     if (check <= 0) {
5493         return check == 0;
5494     }
5495 
5496     gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm,
5497                       esz == MO_16, data, fns[esz - 1]);
5498     return true;
5499 }
5500 
5501 static gen_helper_gvec_3_ptr * const f_vector_fadd[3] = {
5502     gen_helper_gvec_fadd_h,
5503     gen_helper_gvec_fadd_s,
5504     gen_helper_gvec_fadd_d,
5505 };
5506 TRANS(FADD_v, do_fp3_vector, a, 0, f_vector_fadd)
5507 
5508 static gen_helper_gvec_3_ptr * const f_vector_fsub[3] = {
5509     gen_helper_gvec_fsub_h,
5510     gen_helper_gvec_fsub_s,
5511     gen_helper_gvec_fsub_d,
5512 };
5513 TRANS(FSUB_v, do_fp3_vector, a, 0, f_vector_fsub)
5514 
5515 static gen_helper_gvec_3_ptr * const f_vector_fdiv[3] = {
5516     gen_helper_gvec_fdiv_h,
5517     gen_helper_gvec_fdiv_s,
5518     gen_helper_gvec_fdiv_d,
5519 };
5520 TRANS(FDIV_v, do_fp3_vector, a, 0, f_vector_fdiv)
5521 
5522 static gen_helper_gvec_3_ptr * const f_vector_fmul[3] = {
5523     gen_helper_gvec_fmul_h,
5524     gen_helper_gvec_fmul_s,
5525     gen_helper_gvec_fmul_d,
5526 };
5527 TRANS(FMUL_v, do_fp3_vector, a, 0, f_vector_fmul)
5528 
5529 static gen_helper_gvec_3_ptr * const f_vector_fmax[3] = {
5530     gen_helper_gvec_fmax_h,
5531     gen_helper_gvec_fmax_s,
5532     gen_helper_gvec_fmax_d,
5533 };
5534 TRANS(FMAX_v, do_fp3_vector, a, 0, f_vector_fmax)
5535 
5536 static gen_helper_gvec_3_ptr * const f_vector_fmin[3] = {
5537     gen_helper_gvec_fmin_h,
5538     gen_helper_gvec_fmin_s,
5539     gen_helper_gvec_fmin_d,
5540 };
5541 TRANS(FMIN_v, do_fp3_vector, a, 0, f_vector_fmin)
5542 
5543 static gen_helper_gvec_3_ptr * const f_vector_fmaxnm[3] = {
5544     gen_helper_gvec_fmaxnum_h,
5545     gen_helper_gvec_fmaxnum_s,
5546     gen_helper_gvec_fmaxnum_d,
5547 };
5548 TRANS(FMAXNM_v, do_fp3_vector, a, 0, f_vector_fmaxnm)
5549 
5550 static gen_helper_gvec_3_ptr * const f_vector_fminnm[3] = {
5551     gen_helper_gvec_fminnum_h,
5552     gen_helper_gvec_fminnum_s,
5553     gen_helper_gvec_fminnum_d,
5554 };
5555 TRANS(FMINNM_v, do_fp3_vector, a, 0, f_vector_fminnm)
5556 
5557 static gen_helper_gvec_3_ptr * const f_vector_fmulx[3] = {
5558     gen_helper_gvec_fmulx_h,
5559     gen_helper_gvec_fmulx_s,
5560     gen_helper_gvec_fmulx_d,
5561 };
5562 TRANS(FMULX_v, do_fp3_vector, a, 0, f_vector_fmulx)
5563 
5564 static gen_helper_gvec_3_ptr * const f_vector_fmla[3] = {
5565     gen_helper_gvec_vfma_h,
5566     gen_helper_gvec_vfma_s,
5567     gen_helper_gvec_vfma_d,
5568 };
5569 TRANS(FMLA_v, do_fp3_vector, a, 0, f_vector_fmla)
5570 
5571 static gen_helper_gvec_3_ptr * const f_vector_fmls[3] = {
5572     gen_helper_gvec_vfms_h,
5573     gen_helper_gvec_vfms_s,
5574     gen_helper_gvec_vfms_d,
5575 };
5576 TRANS(FMLS_v, do_fp3_vector, a, 0, f_vector_fmls)
5577 
5578 static gen_helper_gvec_3_ptr * const f_vector_fcmeq[3] = {
5579     gen_helper_gvec_fceq_h,
5580     gen_helper_gvec_fceq_s,
5581     gen_helper_gvec_fceq_d,
5582 };
5583 TRANS(FCMEQ_v, do_fp3_vector, a, 0, f_vector_fcmeq)
5584 
5585 static gen_helper_gvec_3_ptr * const f_vector_fcmge[3] = {
5586     gen_helper_gvec_fcge_h,
5587     gen_helper_gvec_fcge_s,
5588     gen_helper_gvec_fcge_d,
5589 };
5590 TRANS(FCMGE_v, do_fp3_vector, a, 0, f_vector_fcmge)
5591 
5592 static gen_helper_gvec_3_ptr * const f_vector_fcmgt[3] = {
5593     gen_helper_gvec_fcgt_h,
5594     gen_helper_gvec_fcgt_s,
5595     gen_helper_gvec_fcgt_d,
5596 };
5597 TRANS(FCMGT_v, do_fp3_vector, a, 0, f_vector_fcmgt)
5598 
5599 static gen_helper_gvec_3_ptr * const f_vector_facge[3] = {
5600     gen_helper_gvec_facge_h,
5601     gen_helper_gvec_facge_s,
5602     gen_helper_gvec_facge_d,
5603 };
5604 TRANS(FACGE_v, do_fp3_vector, a, 0, f_vector_facge)
5605 
5606 static gen_helper_gvec_3_ptr * const f_vector_facgt[3] = {
5607     gen_helper_gvec_facgt_h,
5608     gen_helper_gvec_facgt_s,
5609     gen_helper_gvec_facgt_d,
5610 };
5611 TRANS(FACGT_v, do_fp3_vector, a, 0, f_vector_facgt)
5612 
5613 static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = {
5614     gen_helper_gvec_fabd_h,
5615     gen_helper_gvec_fabd_s,
5616     gen_helper_gvec_fabd_d,
5617 };
5618 TRANS(FABD_v, do_fp3_vector, a, 0, f_vector_fabd)
5619 
5620 static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = {
5621     gen_helper_gvec_recps_h,
5622     gen_helper_gvec_recps_s,
5623     gen_helper_gvec_recps_d,
5624 };
5625 TRANS(FRECPS_v, do_fp3_vector, a, 0, f_vector_frecps)
5626 
5627 static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = {
5628     gen_helper_gvec_rsqrts_h,
5629     gen_helper_gvec_rsqrts_s,
5630     gen_helper_gvec_rsqrts_d,
5631 };
5632 TRANS(FRSQRTS_v, do_fp3_vector, a, 0, f_vector_frsqrts)
5633 
5634 static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = {
5635     gen_helper_gvec_faddp_h,
5636     gen_helper_gvec_faddp_s,
5637     gen_helper_gvec_faddp_d,
5638 };
5639 TRANS(FADDP_v, do_fp3_vector, a, 0, f_vector_faddp)
5640 
5641 static gen_helper_gvec_3_ptr * const f_vector_fmaxp[3] = {
5642     gen_helper_gvec_fmaxp_h,
5643     gen_helper_gvec_fmaxp_s,
5644     gen_helper_gvec_fmaxp_d,
5645 };
5646 TRANS(FMAXP_v, do_fp3_vector, a, 0, f_vector_fmaxp)
5647 
5648 static gen_helper_gvec_3_ptr * const f_vector_fminp[3] = {
5649     gen_helper_gvec_fminp_h,
5650     gen_helper_gvec_fminp_s,
5651     gen_helper_gvec_fminp_d,
5652 };
5653 TRANS(FMINP_v, do_fp3_vector, a, 0, f_vector_fminp)
5654 
5655 static gen_helper_gvec_3_ptr * const f_vector_fmaxnmp[3] = {
5656     gen_helper_gvec_fmaxnump_h,
5657     gen_helper_gvec_fmaxnump_s,
5658     gen_helper_gvec_fmaxnump_d,
5659 };
5660 TRANS(FMAXNMP_v, do_fp3_vector, a, 0, f_vector_fmaxnmp)
5661 
5662 static gen_helper_gvec_3_ptr * const f_vector_fminnmp[3] = {
5663     gen_helper_gvec_fminnump_h,
5664     gen_helper_gvec_fminnump_s,
5665     gen_helper_gvec_fminnump_d,
5666 };
5667 TRANS(FMINNMP_v, do_fp3_vector, a, 0, f_vector_fminnmp)
5668 
5669 static bool do_fmlal(DisasContext *s, arg_qrrr_e *a, bool is_s, bool is_2)
5670 {
5671     if (fp_access_check(s)) {
5672         int data = (is_2 << 1) | is_s;
5673         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
5674                            vec_full_reg_offset(s, a->rn),
5675                            vec_full_reg_offset(s, a->rm), tcg_env,
5676                            a->q ? 16 : 8, vec_full_reg_size(s),
5677                            data, gen_helper_gvec_fmlal_a64);
5678     }
5679     return true;
5680 }
5681 
5682 TRANS_FEAT(FMLAL_v, aa64_fhm, do_fmlal, a, false, false)
5683 TRANS_FEAT(FMLSL_v, aa64_fhm, do_fmlal, a, true, false)
5684 TRANS_FEAT(FMLAL2_v, aa64_fhm, do_fmlal, a, false, true)
5685 TRANS_FEAT(FMLSL2_v, aa64_fhm, do_fmlal, a, true, true)
5686 
5687 TRANS(ADDP_v, do_gvec_fn3, a, gen_gvec_addp)
5688 TRANS(SMAXP_v, do_gvec_fn3_no64, a, gen_gvec_smaxp)
5689 TRANS(SMINP_v, do_gvec_fn3_no64, a, gen_gvec_sminp)
5690 TRANS(UMAXP_v, do_gvec_fn3_no64, a, gen_gvec_umaxp)
5691 TRANS(UMINP_v, do_gvec_fn3_no64, a, gen_gvec_uminp)
5692 
5693 TRANS(AND_v, do_gvec_fn3, a, tcg_gen_gvec_and)
5694 TRANS(BIC_v, do_gvec_fn3, a, tcg_gen_gvec_andc)
5695 TRANS(ORR_v, do_gvec_fn3, a, tcg_gen_gvec_or)
5696 TRANS(ORN_v, do_gvec_fn3, a, tcg_gen_gvec_orc)
5697 TRANS(EOR_v, do_gvec_fn3, a, tcg_gen_gvec_xor)
5698 
5699 static bool do_bitsel(DisasContext *s, bool is_q, int d, int a, int b, int c)
5700 {
5701     if (fp_access_check(s)) {
5702         gen_gvec_fn4(s, is_q, d, a, b, c, tcg_gen_gvec_bitsel, 0);
5703     }
5704     return true;
5705 }
5706 
5707 TRANS(BSL_v, do_bitsel, a->q, a->rd, a->rd, a->rn, a->rm)
5708 TRANS(BIT_v, do_bitsel, a->q, a->rd, a->rm, a->rn, a->rd)
5709 TRANS(BIF_v, do_bitsel, a->q, a->rd, a->rm, a->rd, a->rn)
5710 
5711 TRANS(SQADD_v, do_gvec_fn3, a, gen_gvec_sqadd_qc)
5712 TRANS(UQADD_v, do_gvec_fn3, a, gen_gvec_uqadd_qc)
5713 TRANS(SQSUB_v, do_gvec_fn3, a, gen_gvec_sqsub_qc)
5714 TRANS(UQSUB_v, do_gvec_fn3, a, gen_gvec_uqsub_qc)
5715 TRANS(SUQADD_v, do_gvec_fn3, a, gen_gvec_suqadd_qc)
5716 TRANS(USQADD_v, do_gvec_fn3, a, gen_gvec_usqadd_qc)
5717 
5718 TRANS(SSHL_v, do_gvec_fn3, a, gen_gvec_sshl)
5719 TRANS(USHL_v, do_gvec_fn3, a, gen_gvec_ushl)
5720 TRANS(SRSHL_v, do_gvec_fn3, a, gen_gvec_srshl)
5721 TRANS(URSHL_v, do_gvec_fn3, a, gen_gvec_urshl)
5722 TRANS(SQSHL_v, do_gvec_fn3, a, gen_neon_sqshl)
5723 TRANS(UQSHL_v, do_gvec_fn3, a, gen_neon_uqshl)
5724 TRANS(SQRSHL_v, do_gvec_fn3, a, gen_neon_sqrshl)
5725 TRANS(UQRSHL_v, do_gvec_fn3, a, gen_neon_uqrshl)
5726 
5727 TRANS(ADD_v, do_gvec_fn3, a, tcg_gen_gvec_add)
5728 TRANS(SUB_v, do_gvec_fn3, a, tcg_gen_gvec_sub)
5729 TRANS(SHADD_v, do_gvec_fn3_no64, a, gen_gvec_shadd)
5730 TRANS(UHADD_v, do_gvec_fn3_no64, a, gen_gvec_uhadd)
5731 TRANS(SHSUB_v, do_gvec_fn3_no64, a, gen_gvec_shsub)
5732 TRANS(UHSUB_v, do_gvec_fn3_no64, a, gen_gvec_uhsub)
5733 TRANS(SRHADD_v, do_gvec_fn3_no64, a, gen_gvec_srhadd)
5734 TRANS(URHADD_v, do_gvec_fn3_no64, a, gen_gvec_urhadd)
5735 TRANS(SMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smax)
5736 TRANS(UMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umax)
5737 TRANS(SMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smin)
5738 TRANS(UMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umin)
5739 TRANS(SABA_v, do_gvec_fn3_no64, a, gen_gvec_saba)
5740 TRANS(UABA_v, do_gvec_fn3_no64, a, gen_gvec_uaba)
5741 TRANS(SABD_v, do_gvec_fn3_no64, a, gen_gvec_sabd)
5742 TRANS(UABD_v, do_gvec_fn3_no64, a, gen_gvec_uabd)
5743 TRANS(MUL_v, do_gvec_fn3_no64, a, tcg_gen_gvec_mul)
5744 TRANS(PMUL_v, do_gvec_op3_ool, a, 0, gen_helper_gvec_pmul_b)
5745 TRANS(MLA_v, do_gvec_fn3_no64, a, gen_gvec_mla)
5746 TRANS(MLS_v, do_gvec_fn3_no64, a, gen_gvec_mls)
5747 
5748 static bool do_cmop_v(DisasContext *s, arg_qrrr_e *a, TCGCond cond)
5749 {
5750     if (a->esz == MO_64 && !a->q) {
5751         return false;
5752     }
5753     if (fp_access_check(s)) {
5754         tcg_gen_gvec_cmp(cond, a->esz,
5755                          vec_full_reg_offset(s, a->rd),
5756                          vec_full_reg_offset(s, a->rn),
5757                          vec_full_reg_offset(s, a->rm),
5758                          a->q ? 16 : 8, vec_full_reg_size(s));
5759     }
5760     return true;
5761 }
5762 
5763 TRANS(CMGT_v, do_cmop_v, a, TCG_COND_GT)
5764 TRANS(CMHI_v, do_cmop_v, a, TCG_COND_GTU)
5765 TRANS(CMGE_v, do_cmop_v, a, TCG_COND_GE)
5766 TRANS(CMHS_v, do_cmop_v, a, TCG_COND_GEU)
5767 TRANS(CMEQ_v, do_cmop_v, a, TCG_COND_EQ)
5768 TRANS(CMTST_v, do_gvec_fn3, a, gen_gvec_cmtst)
5769 
5770 TRANS(SQDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqdmulh_qc)
5771 TRANS(SQRDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmulh_qc)
5772 TRANS_FEAT(SQRDMLAH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlah_qc)
5773 TRANS_FEAT(SQRDMLSH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlsh_qc)
5774 
5775 static bool do_dot_vector(DisasContext *s, arg_qrrr_e *a,
5776                           gen_helper_gvec_4 *fn)
5777 {
5778     if (fp_access_check(s)) {
5779         gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn);
5780     }
5781     return true;
5782 }
5783 
5784 static bool do_dot_vector_env(DisasContext *s, arg_qrrr_e *a,
5785                               gen_helper_gvec_4_ptr *fn)
5786 {
5787     if (fp_access_check(s)) {
5788         gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn);
5789     }
5790     return true;
5791 }
5792 
5793 TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_sdot_b)
5794 TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_b)
5795 TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_b)
5796 TRANS_FEAT(BFDOT_v, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfdot)
5797 TRANS_FEAT(BFMMLA, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfmmla)
5798 TRANS_FEAT(SMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_smmla_b)
5799 TRANS_FEAT(UMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_ummla_b)
5800 TRANS_FEAT(USMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usmmla_b)
5801 
5802 static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a)
5803 {
5804     if (!dc_isar_feature(aa64_bf16, s)) {
5805         return false;
5806     }
5807     if (fp_access_check(s)) {
5808         /* Q bit selects BFMLALB vs BFMLALT. */
5809         gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, false, a->q,
5810                           gen_helper_gvec_bfmlal);
5811     }
5812     return true;
5813 }
5814 
5815 static gen_helper_gvec_3_ptr * const f_vector_fcadd[3] = {
5816     gen_helper_gvec_fcaddh,
5817     gen_helper_gvec_fcadds,
5818     gen_helper_gvec_fcaddd,
5819 };
5820 TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0, f_vector_fcadd)
5821 TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1, f_vector_fcadd)
5822 
5823 static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a)
5824 {
5825     static gen_helper_gvec_4_ptr * const fn[] = {
5826         [MO_16] = gen_helper_gvec_fcmlah,
5827         [MO_32] = gen_helper_gvec_fcmlas,
5828         [MO_64] = gen_helper_gvec_fcmlad,
5829     };
5830     int check;
5831 
5832     if (!dc_isar_feature(aa64_fcma, s)) {
5833         return false;
5834     }
5835 
5836     check = fp_access_check_vector_hsd(s, a->q, a->esz);
5837     if (check <= 0) {
5838         return check == 0;
5839     }
5840 
5841     gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
5842                       a->esz == MO_16, a->rot, fn[a->esz]);
5843     return true;
5844 }
5845 
5846 /*
5847  * Widening vector x vector/indexed.
5848  *
5849  * These read from the top or bottom half of a 128-bit vector.
5850  * After widening, optionally accumulate with a 128-bit vector.
5851  * Implement these inline, as the number of elements are limited
5852  * and the related SVE and SME operations on larger vectors use
5853  * even/odd elements instead of top/bottom half.
5854  *
5855  * If idx >= 0, operand 2 is indexed, otherwise vector.
5856  * If acc, operand 0 is loaded with rd.
5857  */
5858 
5859 /* For low half, iterating up. */
5860 static bool do_3op_widening(DisasContext *s, MemOp memop, int top,
5861                             int rd, int rn, int rm, int idx,
5862                             NeonGenTwo64OpFn *fn, bool acc)
5863 {
5864     TCGv_i64 tcg_op0 = tcg_temp_new_i64();
5865     TCGv_i64 tcg_op1 = tcg_temp_new_i64();
5866     TCGv_i64 tcg_op2 = tcg_temp_new_i64();
5867     MemOp esz = memop & MO_SIZE;
5868     int half = 8 >> esz;
5869     int top_swap, top_half;
5870 
5871     /* There are no 64x64->128 bit operations. */
5872     if (esz >= MO_64) {
5873         return false;
5874     }
5875     if (!fp_access_check(s)) {
5876         return true;
5877     }
5878 
5879     if (idx >= 0) {
5880         read_vec_element(s, tcg_op2, rm, idx, memop);
5881     }
5882 
5883     /*
5884      * For top half inputs, iterate forward; backward for bottom half.
5885      * This means the store to the destination will not occur until
5886      * overlapping input inputs are consumed.
5887      * Use top_swap to conditionally invert the forward iteration index.
5888      */
5889     top_swap = top ? 0 : half - 1;
5890     top_half = top ? half : 0;
5891 
5892     for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
5893         int elt = elt_fwd ^ top_swap;
5894 
5895         read_vec_element(s, tcg_op1, rn, elt + top_half, memop);
5896         if (idx < 0) {
5897             read_vec_element(s, tcg_op2, rm, elt + top_half, memop);
5898         }
5899         if (acc) {
5900             read_vec_element(s, tcg_op0, rd, elt, memop + 1);
5901         }
5902         fn(tcg_op0, tcg_op1, tcg_op2);
5903         write_vec_element(s, tcg_op0, rd, elt, esz + 1);
5904     }
5905     clear_vec_high(s, 1, rd);
5906     return true;
5907 }
5908 
5909 static void gen_muladd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5910 {
5911     TCGv_i64 t = tcg_temp_new_i64();
5912     tcg_gen_mul_i64(t, n, m);
5913     tcg_gen_add_i64(d, d, t);
5914 }
5915 
5916 static void gen_mulsub_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5917 {
5918     TCGv_i64 t = tcg_temp_new_i64();
5919     tcg_gen_mul_i64(t, n, m);
5920     tcg_gen_sub_i64(d, d, t);
5921 }
5922 
5923 TRANS(SMULL_v, do_3op_widening,
5924       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5925       tcg_gen_mul_i64, false)
5926 TRANS(UMULL_v, do_3op_widening,
5927       a->esz, a->q, a->rd, a->rn, a->rm, -1,
5928       tcg_gen_mul_i64, false)
5929 TRANS(SMLAL_v, do_3op_widening,
5930       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5931       gen_muladd_i64, true)
5932 TRANS(UMLAL_v, do_3op_widening,
5933       a->esz, a->q, a->rd, a->rn, a->rm, -1,
5934       gen_muladd_i64, true)
5935 TRANS(SMLSL_v, do_3op_widening,
5936       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5937       gen_mulsub_i64, true)
5938 TRANS(UMLSL_v, do_3op_widening,
5939       a->esz, a->q, a->rd, a->rn, a->rm, -1,
5940       gen_mulsub_i64, true)
5941 
5942 TRANS(SMULL_vi, do_3op_widening,
5943       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
5944       tcg_gen_mul_i64, false)
5945 TRANS(UMULL_vi, do_3op_widening,
5946       a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
5947       tcg_gen_mul_i64, false)
5948 TRANS(SMLAL_vi, do_3op_widening,
5949       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
5950       gen_muladd_i64, true)
5951 TRANS(UMLAL_vi, do_3op_widening,
5952       a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
5953       gen_muladd_i64, true)
5954 TRANS(SMLSL_vi, do_3op_widening,
5955       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
5956       gen_mulsub_i64, true)
5957 TRANS(UMLSL_vi, do_3op_widening,
5958       a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
5959       gen_mulsub_i64, true)
5960 
5961 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5962 {
5963     TCGv_i64 t1 = tcg_temp_new_i64();
5964     TCGv_i64 t2 = tcg_temp_new_i64();
5965 
5966     tcg_gen_sub_i64(t1, n, m);
5967     tcg_gen_sub_i64(t2, m, n);
5968     tcg_gen_movcond_i64(TCG_COND_GE, d, n, m, t1, t2);
5969 }
5970 
5971 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5972 {
5973     TCGv_i64 t1 = tcg_temp_new_i64();
5974     TCGv_i64 t2 = tcg_temp_new_i64();
5975 
5976     tcg_gen_sub_i64(t1, n, m);
5977     tcg_gen_sub_i64(t2, m, n);
5978     tcg_gen_movcond_i64(TCG_COND_GEU, d, n, m, t1, t2);
5979 }
5980 
5981 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5982 {
5983     TCGv_i64 t = tcg_temp_new_i64();
5984     gen_sabd_i64(t, n, m);
5985     tcg_gen_add_i64(d, d, t);
5986 }
5987 
5988 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5989 {
5990     TCGv_i64 t = tcg_temp_new_i64();
5991     gen_uabd_i64(t, n, m);
5992     tcg_gen_add_i64(d, d, t);
5993 }
5994 
5995 TRANS(SADDL_v, do_3op_widening,
5996       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5997       tcg_gen_add_i64, false)
5998 TRANS(UADDL_v, do_3op_widening,
5999       a->esz, a->q, a->rd, a->rn, a->rm, -1,
6000       tcg_gen_add_i64, false)
6001 TRANS(SSUBL_v, do_3op_widening,
6002       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6003       tcg_gen_sub_i64, false)
6004 TRANS(USUBL_v, do_3op_widening,
6005       a->esz, a->q, a->rd, a->rn, a->rm, -1,
6006       tcg_gen_sub_i64, false)
6007 TRANS(SABDL_v, do_3op_widening,
6008       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6009       gen_sabd_i64, false)
6010 TRANS(UABDL_v, do_3op_widening,
6011       a->esz, a->q, a->rd, a->rn, a->rm, -1,
6012       gen_uabd_i64, false)
6013 TRANS(SABAL_v, do_3op_widening,
6014       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6015       gen_saba_i64, true)
6016 TRANS(UABAL_v, do_3op_widening,
6017       a->esz, a->q, a->rd, a->rn, a->rm, -1,
6018       gen_uaba_i64, true)
6019 
6020 static void gen_sqdmull_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6021 {
6022     tcg_gen_mul_i64(d, n, m);
6023     gen_helper_neon_addl_saturate_s32(d, tcg_env, d, d);
6024 }
6025 
6026 static void gen_sqdmull_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6027 {
6028     tcg_gen_mul_i64(d, n, m);
6029     gen_helper_neon_addl_saturate_s64(d, tcg_env, d, d);
6030 }
6031 
6032 static void gen_sqdmlal_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6033 {
6034     TCGv_i64 t = tcg_temp_new_i64();
6035 
6036     tcg_gen_mul_i64(t, n, m);
6037     gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t);
6038     gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t);
6039 }
6040 
6041 static void gen_sqdmlal_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6042 {
6043     TCGv_i64 t = tcg_temp_new_i64();
6044 
6045     tcg_gen_mul_i64(t, n, m);
6046     gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t);
6047     gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t);
6048 }
6049 
6050 static void gen_sqdmlsl_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6051 {
6052     TCGv_i64 t = tcg_temp_new_i64();
6053 
6054     tcg_gen_mul_i64(t, n, m);
6055     gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t);
6056     tcg_gen_neg_i64(t, t);
6057     gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t);
6058 }
6059 
6060 static void gen_sqdmlsl_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6061 {
6062     TCGv_i64 t = tcg_temp_new_i64();
6063 
6064     tcg_gen_mul_i64(t, n, m);
6065     gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t);
6066     tcg_gen_neg_i64(t, t);
6067     gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t);
6068 }
6069 
6070 TRANS(SQDMULL_v, do_3op_widening,
6071       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6072       a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
6073 TRANS(SQDMLAL_v, do_3op_widening,
6074       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6075       a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
6076 TRANS(SQDMLSL_v, do_3op_widening,
6077       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6078       a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
6079 
6080 TRANS(SQDMULL_vi, do_3op_widening,
6081       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6082       a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
6083 TRANS(SQDMLAL_vi, do_3op_widening,
6084       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6085       a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
6086 TRANS(SQDMLSL_vi, do_3op_widening,
6087       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6088       a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
6089 
6090 static bool do_addsub_wide(DisasContext *s, arg_qrrr_e *a,
6091                            MemOp sign, bool sub)
6092 {
6093     TCGv_i64 tcg_op0, tcg_op1;
6094     MemOp esz = a->esz;
6095     int half = 8 >> esz;
6096     bool top = a->q;
6097     int top_swap = top ? 0 : half - 1;
6098     int top_half = top ? half : 0;
6099 
6100     /* There are no 64x64->128 bit operations. */
6101     if (esz >= MO_64) {
6102         return false;
6103     }
6104     if (!fp_access_check(s)) {
6105         return true;
6106     }
6107     tcg_op0 = tcg_temp_new_i64();
6108     tcg_op1 = tcg_temp_new_i64();
6109 
6110     for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
6111         int elt = elt_fwd ^ top_swap;
6112 
6113         read_vec_element(s, tcg_op1, a->rm, elt + top_half, esz | sign);
6114         read_vec_element(s, tcg_op0, a->rn, elt, esz + 1);
6115         if (sub) {
6116             tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1);
6117         } else {
6118             tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1);
6119         }
6120         write_vec_element(s, tcg_op0, a->rd, elt, esz + 1);
6121     }
6122     clear_vec_high(s, 1, a->rd);
6123     return true;
6124 }
6125 
6126 TRANS(SADDW, do_addsub_wide, a, MO_SIGN, false)
6127 TRANS(UADDW, do_addsub_wide, a, 0, false)
6128 TRANS(SSUBW, do_addsub_wide, a, MO_SIGN, true)
6129 TRANS(USUBW, do_addsub_wide, a, 0, true)
6130 
6131 static bool do_addsub_highnarrow(DisasContext *s, arg_qrrr_e *a,
6132                                  bool sub, bool round)
6133 {
6134     TCGv_i64 tcg_op0, tcg_op1;
6135     MemOp esz = a->esz;
6136     int half = 8 >> esz;
6137     bool top = a->q;
6138     int ebits = 8 << esz;
6139     uint64_t rbit = 1ull << (ebits - 1);
6140     int top_swap, top_half;
6141 
6142     /* There are no 128x128->64 bit operations. */
6143     if (esz >= MO_64) {
6144         return false;
6145     }
6146     if (!fp_access_check(s)) {
6147         return true;
6148     }
6149     tcg_op0 = tcg_temp_new_i64();
6150     tcg_op1 = tcg_temp_new_i64();
6151 
6152     /*
6153      * For top half inputs, iterate backward; forward for bottom half.
6154      * This means the store to the destination will not occur until
6155      * overlapping input inputs are consumed.
6156      */
6157     top_swap = top ? half - 1 : 0;
6158     top_half = top ? half : 0;
6159 
6160     for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
6161         int elt = elt_fwd ^ top_swap;
6162 
6163         read_vec_element(s, tcg_op1, a->rm, elt, esz + 1);
6164         read_vec_element(s, tcg_op0, a->rn, elt, esz + 1);
6165         if (sub) {
6166             tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1);
6167         } else {
6168             tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1);
6169         }
6170         if (round) {
6171             tcg_gen_addi_i64(tcg_op0, tcg_op0, rbit);
6172         }
6173         tcg_gen_shri_i64(tcg_op0, tcg_op0, ebits);
6174         write_vec_element(s, tcg_op0, a->rd, elt + top_half, esz);
6175     }
6176     clear_vec_high(s, top, a->rd);
6177     return true;
6178 }
6179 
6180 TRANS(ADDHN, do_addsub_highnarrow, a, false, false)
6181 TRANS(SUBHN, do_addsub_highnarrow, a, true, false)
6182 TRANS(RADDHN, do_addsub_highnarrow, a, false, true)
6183 TRANS(RSUBHN, do_addsub_highnarrow, a, true, true)
6184 
6185 static bool do_pmull(DisasContext *s, arg_qrrr_e *a, gen_helper_gvec_3 *fn)
6186 {
6187     if (fp_access_check(s)) {
6188         /* The Q field specifies lo/hi half input for these insns.  */
6189         gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->q, fn);
6190     }
6191     return true;
6192 }
6193 
6194 TRANS(PMULL_p8, do_pmull, a, gen_helper_neon_pmull_h)
6195 TRANS_FEAT(PMULL_p64, aa64_pmull, do_pmull, a, gen_helper_gvec_pmull_q)
6196 
6197 /*
6198  * Advanced SIMD scalar/vector x indexed element
6199  */
6200 
6201 static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f)
6202 {
6203     switch (a->esz) {
6204     case MO_64:
6205         if (fp_access_check(s)) {
6206             TCGv_i64 t0 = read_fp_dreg(s, a->rn);
6207             TCGv_i64 t1 = tcg_temp_new_i64();
6208 
6209             read_vec_element(s, t1, a->rm, a->idx, MO_64);
6210             f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
6211             write_fp_dreg(s, a->rd, t0);
6212         }
6213         break;
6214     case MO_32:
6215         if (fp_access_check(s)) {
6216             TCGv_i32 t0 = read_fp_sreg(s, a->rn);
6217             TCGv_i32 t1 = tcg_temp_new_i32();
6218 
6219             read_vec_element_i32(s, t1, a->rm, a->idx, MO_32);
6220             f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
6221             write_fp_sreg(s, a->rd, t0);
6222         }
6223         break;
6224     case MO_16:
6225         if (!dc_isar_feature(aa64_fp16, s)) {
6226             return false;
6227         }
6228         if (fp_access_check(s)) {
6229             TCGv_i32 t0 = read_fp_hreg(s, a->rn);
6230             TCGv_i32 t1 = tcg_temp_new_i32();
6231 
6232             read_vec_element_i32(s, t1, a->rm, a->idx, MO_16);
6233             f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
6234             write_fp_sreg(s, a->rd, t0);
6235         }
6236         break;
6237     default:
6238         g_assert_not_reached();
6239     }
6240     return true;
6241 }
6242 
6243 TRANS(FMUL_si, do_fp3_scalar_idx, a, &f_scalar_fmul)
6244 TRANS(FMULX_si, do_fp3_scalar_idx, a, &f_scalar_fmulx)
6245 
6246 static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg)
6247 {
6248     switch (a->esz) {
6249     case MO_64:
6250         if (fp_access_check(s)) {
6251             TCGv_i64 t0 = read_fp_dreg(s, a->rd);
6252             TCGv_i64 t1 = read_fp_dreg(s, a->rn);
6253             TCGv_i64 t2 = tcg_temp_new_i64();
6254 
6255             read_vec_element(s, t2, a->rm, a->idx, MO_64);
6256             if (neg) {
6257                 gen_vfp_negd(t1, t1);
6258             }
6259             gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_A64));
6260             write_fp_dreg(s, a->rd, t0);
6261         }
6262         break;
6263     case MO_32:
6264         if (fp_access_check(s)) {
6265             TCGv_i32 t0 = read_fp_sreg(s, a->rd);
6266             TCGv_i32 t1 = read_fp_sreg(s, a->rn);
6267             TCGv_i32 t2 = tcg_temp_new_i32();
6268 
6269             read_vec_element_i32(s, t2, a->rm, a->idx, MO_32);
6270             if (neg) {
6271                 gen_vfp_negs(t1, t1);
6272             }
6273             gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_A64));
6274             write_fp_sreg(s, a->rd, t0);
6275         }
6276         break;
6277     case MO_16:
6278         if (!dc_isar_feature(aa64_fp16, s)) {
6279             return false;
6280         }
6281         if (fp_access_check(s)) {
6282             TCGv_i32 t0 = read_fp_hreg(s, a->rd);
6283             TCGv_i32 t1 = read_fp_hreg(s, a->rn);
6284             TCGv_i32 t2 = tcg_temp_new_i32();
6285 
6286             read_vec_element_i32(s, t2, a->rm, a->idx, MO_16);
6287             if (neg) {
6288                 gen_vfp_negh(t1, t1);
6289             }
6290             gen_helper_advsimd_muladdh(t0, t1, t2, t0,
6291                                        fpstatus_ptr(FPST_A64_F16));
6292             write_fp_sreg(s, a->rd, t0);
6293         }
6294         break;
6295     default:
6296         g_assert_not_reached();
6297     }
6298     return true;
6299 }
6300 
6301 TRANS(FMLA_si, do_fmla_scalar_idx, a, false)
6302 TRANS(FMLS_si, do_fmla_scalar_idx, a, true)
6303 
6304 static bool do_env_scalar2_idx_hs(DisasContext *s, arg_rrx_e *a,
6305                                   const ENVScalar2 *f)
6306 {
6307     if (a->esz < MO_16 || a->esz > MO_32) {
6308         return false;
6309     }
6310     if (fp_access_check(s)) {
6311         TCGv_i32 t0 = tcg_temp_new_i32();
6312         TCGv_i32 t1 = tcg_temp_new_i32();
6313 
6314         read_vec_element_i32(s, t0, a->rn, 0, a->esz);
6315         read_vec_element_i32(s, t1, a->rm, a->idx, a->esz);
6316         f->gen_bhs[a->esz](t0, tcg_env, t0, t1);
6317         write_fp_sreg(s, a->rd, t0);
6318     }
6319     return true;
6320 }
6321 
6322 TRANS(SQDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqdmulh)
6323 TRANS(SQRDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqrdmulh)
6324 
6325 static bool do_env_scalar3_idx_hs(DisasContext *s, arg_rrx_e *a,
6326                                   const ENVScalar3 *f)
6327 {
6328     if (a->esz < MO_16 || a->esz > MO_32) {
6329         return false;
6330     }
6331     if (fp_access_check(s)) {
6332         TCGv_i32 t0 = tcg_temp_new_i32();
6333         TCGv_i32 t1 = tcg_temp_new_i32();
6334         TCGv_i32 t2 = tcg_temp_new_i32();
6335 
6336         read_vec_element_i32(s, t0, a->rn, 0, a->esz);
6337         read_vec_element_i32(s, t1, a->rm, a->idx, a->esz);
6338         read_vec_element_i32(s, t2, a->rd, 0, a->esz);
6339         f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2);
6340         write_fp_sreg(s, a->rd, t0);
6341     }
6342     return true;
6343 }
6344 
6345 TRANS_FEAT(SQRDMLAH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlah)
6346 TRANS_FEAT(SQRDMLSH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlsh)
6347 
6348 static bool do_scalar_muladd_widening_idx(DisasContext *s, arg_rrx_e *a,
6349                                           NeonGenTwo64OpFn *fn, bool acc)
6350 {
6351     if (fp_access_check(s)) {
6352         TCGv_i64 t0 = tcg_temp_new_i64();
6353         TCGv_i64 t1 = tcg_temp_new_i64();
6354         TCGv_i64 t2 = tcg_temp_new_i64();
6355 
6356         if (acc) {
6357             read_vec_element(s, t0, a->rd, 0, a->esz + 1);
6358         }
6359         read_vec_element(s, t1, a->rn, 0, a->esz | MO_SIGN);
6360         read_vec_element(s, t2, a->rm, a->idx, a->esz | MO_SIGN);
6361         fn(t0, t1, t2);
6362 
6363         /* Clear the whole register first, then store scalar. */
6364         clear_vec(s, a->rd);
6365         write_vec_element(s, t0, a->rd, 0, a->esz + 1);
6366     }
6367     return true;
6368 }
6369 
6370 TRANS(SQDMULL_si, do_scalar_muladd_widening_idx, a,
6371       a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
6372 TRANS(SQDMLAL_si, do_scalar_muladd_widening_idx, a,
6373       a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
6374 TRANS(SQDMLSL_si, do_scalar_muladd_widening_idx, a,
6375       a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
6376 
6377 static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a,
6378                               gen_helper_gvec_3_ptr * const fns[3])
6379 {
6380     MemOp esz = a->esz;
6381     int check = fp_access_check_vector_hsd(s, a->q, esz);
6382 
6383     if (check <= 0) {
6384         return check == 0;
6385     }
6386 
6387     gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm,
6388                       esz == MO_16, a->idx, fns[esz - 1]);
6389     return true;
6390 }
6391 
6392 static gen_helper_gvec_3_ptr * const f_vector_idx_fmul[3] = {
6393     gen_helper_gvec_fmul_idx_h,
6394     gen_helper_gvec_fmul_idx_s,
6395     gen_helper_gvec_fmul_idx_d,
6396 };
6397 TRANS(FMUL_vi, do_fp3_vector_idx, a, f_vector_idx_fmul)
6398 
6399 static gen_helper_gvec_3_ptr * const f_vector_idx_fmulx[3] = {
6400     gen_helper_gvec_fmulx_idx_h,
6401     gen_helper_gvec_fmulx_idx_s,
6402     gen_helper_gvec_fmulx_idx_d,
6403 };
6404 TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx)
6405 
6406 static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg)
6407 {
6408     static gen_helper_gvec_4_ptr * const fns[3] = {
6409         gen_helper_gvec_fmla_idx_h,
6410         gen_helper_gvec_fmla_idx_s,
6411         gen_helper_gvec_fmla_idx_d,
6412     };
6413     MemOp esz = a->esz;
6414     int check = fp_access_check_vector_hsd(s, a->q, esz);
6415 
6416     if (check <= 0) {
6417         return check == 0;
6418     }
6419 
6420     gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
6421                       esz == MO_16, (a->idx << 1) | neg,
6422                       fns[esz - 1]);
6423     return true;
6424 }
6425 
6426 TRANS(FMLA_vi, do_fmla_vector_idx, a, false)
6427 TRANS(FMLS_vi, do_fmla_vector_idx, a, true)
6428 
6429 static bool do_fmlal_idx(DisasContext *s, arg_qrrx_e *a, bool is_s, bool is_2)
6430 {
6431     if (fp_access_check(s)) {
6432         int data = (a->idx << 2) | (is_2 << 1) | is_s;
6433         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
6434                            vec_full_reg_offset(s, a->rn),
6435                            vec_full_reg_offset(s, a->rm), tcg_env,
6436                            a->q ? 16 : 8, vec_full_reg_size(s),
6437                            data, gen_helper_gvec_fmlal_idx_a64);
6438     }
6439     return true;
6440 }
6441 
6442 TRANS_FEAT(FMLAL_vi, aa64_fhm, do_fmlal_idx, a, false, false)
6443 TRANS_FEAT(FMLSL_vi, aa64_fhm, do_fmlal_idx, a, true, false)
6444 TRANS_FEAT(FMLAL2_vi, aa64_fhm, do_fmlal_idx, a, false, true)
6445 TRANS_FEAT(FMLSL2_vi, aa64_fhm, do_fmlal_idx, a, true, true)
6446 
6447 static bool do_int3_vector_idx(DisasContext *s, arg_qrrx_e *a,
6448                                gen_helper_gvec_3 * const fns[2])
6449 {
6450     assert(a->esz == MO_16 || a->esz == MO_32);
6451     if (fp_access_check(s)) {
6452         gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, a->idx, fns[a->esz - 1]);
6453     }
6454     return true;
6455 }
6456 
6457 static gen_helper_gvec_3 * const f_vector_idx_mul[2] = {
6458     gen_helper_gvec_mul_idx_h,
6459     gen_helper_gvec_mul_idx_s,
6460 };
6461 TRANS(MUL_vi, do_int3_vector_idx, a, f_vector_idx_mul)
6462 
6463 static bool do_mla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool sub)
6464 {
6465     static gen_helper_gvec_4 * const fns[2][2] = {
6466         { gen_helper_gvec_mla_idx_h, gen_helper_gvec_mls_idx_h },
6467         { gen_helper_gvec_mla_idx_s, gen_helper_gvec_mls_idx_s },
6468     };
6469 
6470     assert(a->esz == MO_16 || a->esz == MO_32);
6471     if (fp_access_check(s)) {
6472         gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd,
6473                          a->idx, fns[a->esz - 1][sub]);
6474     }
6475     return true;
6476 }
6477 
6478 TRANS(MLA_vi, do_mla_vector_idx, a, false)
6479 TRANS(MLS_vi, do_mla_vector_idx, a, true)
6480 
6481 static bool do_int3_qc_vector_idx(DisasContext *s, arg_qrrx_e *a,
6482                                   gen_helper_gvec_4 * const fns[2])
6483 {
6484     assert(a->esz == MO_16 || a->esz == MO_32);
6485     if (fp_access_check(s)) {
6486         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
6487                            vec_full_reg_offset(s, a->rn),
6488                            vec_full_reg_offset(s, a->rm),
6489                            offsetof(CPUARMState, vfp.qc),
6490                            a->q ? 16 : 8, vec_full_reg_size(s),
6491                            a->idx, fns[a->esz - 1]);
6492     }
6493     return true;
6494 }
6495 
6496 static gen_helper_gvec_4 * const f_vector_idx_sqdmulh[2] = {
6497     gen_helper_neon_sqdmulh_idx_h,
6498     gen_helper_neon_sqdmulh_idx_s,
6499 };
6500 TRANS(SQDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqdmulh)
6501 
6502 static gen_helper_gvec_4 * const f_vector_idx_sqrdmulh[2] = {
6503     gen_helper_neon_sqrdmulh_idx_h,
6504     gen_helper_neon_sqrdmulh_idx_s,
6505 };
6506 TRANS(SQRDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqrdmulh)
6507 
6508 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlah[2] = {
6509     gen_helper_neon_sqrdmlah_idx_h,
6510     gen_helper_neon_sqrdmlah_idx_s,
6511 };
6512 TRANS_FEAT(SQRDMLAH_vi, aa64_rdm, do_int3_qc_vector_idx, a,
6513            f_vector_idx_sqrdmlah)
6514 
6515 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlsh[2] = {
6516     gen_helper_neon_sqrdmlsh_idx_h,
6517     gen_helper_neon_sqrdmlsh_idx_s,
6518 };
6519 TRANS_FEAT(SQRDMLSH_vi, aa64_rdm, do_int3_qc_vector_idx, a,
6520            f_vector_idx_sqrdmlsh)
6521 
6522 static bool do_dot_vector_idx(DisasContext *s, arg_qrrx_e *a,
6523                               gen_helper_gvec_4 *fn)
6524 {
6525     if (fp_access_check(s)) {
6526         gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn);
6527     }
6528     return true;
6529 }
6530 
6531 static bool do_dot_vector_idx_env(DisasContext *s, arg_qrrx_e *a,
6532                                   gen_helper_gvec_4_ptr *fn)
6533 {
6534     if (fp_access_check(s)) {
6535         gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn);
6536     }
6537     return true;
6538 }
6539 
6540 TRANS_FEAT(SDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_sdot_idx_b)
6541 TRANS_FEAT(UDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_udot_idx_b)
6542 TRANS_FEAT(SUDOT_vi, aa64_i8mm, do_dot_vector_idx, a,
6543            gen_helper_gvec_sudot_idx_b)
6544 TRANS_FEAT(USDOT_vi, aa64_i8mm, do_dot_vector_idx, a,
6545            gen_helper_gvec_usdot_idx_b)
6546 TRANS_FEAT(BFDOT_vi, aa64_bf16, do_dot_vector_idx_env, a,
6547            gen_helper_gvec_bfdot_idx)
6548 
6549 static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a)
6550 {
6551     if (!dc_isar_feature(aa64_bf16, s)) {
6552         return false;
6553     }
6554     if (fp_access_check(s)) {
6555         /* Q bit selects BFMLALB vs BFMLALT. */
6556         gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, 0,
6557                           (a->idx << 1) | a->q,
6558                           gen_helper_gvec_bfmlal_idx);
6559     }
6560     return true;
6561 }
6562 
6563 static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a)
6564 {
6565     gen_helper_gvec_4_ptr *fn;
6566 
6567     if (!dc_isar_feature(aa64_fcma, s)) {
6568         return false;
6569     }
6570     switch (a->esz) {
6571     case MO_16:
6572         if (!dc_isar_feature(aa64_fp16, s)) {
6573             return false;
6574         }
6575         fn = gen_helper_gvec_fcmlah_idx;
6576         break;
6577     case MO_32:
6578         fn = gen_helper_gvec_fcmlas_idx;
6579         break;
6580     default:
6581         g_assert_not_reached();
6582     }
6583     if (fp_access_check(s)) {
6584         gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
6585                           a->esz == MO_16, (a->idx << 2) | a->rot, fn);
6586     }
6587     return true;
6588 }
6589 
6590 /*
6591  * Advanced SIMD scalar pairwise
6592  */
6593 
6594 static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f)
6595 {
6596     switch (a->esz) {
6597     case MO_64:
6598         if (fp_access_check(s)) {
6599             TCGv_i64 t0 = tcg_temp_new_i64();
6600             TCGv_i64 t1 = tcg_temp_new_i64();
6601 
6602             read_vec_element(s, t0, a->rn, 0, MO_64);
6603             read_vec_element(s, t1, a->rn, 1, MO_64);
6604             f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
6605             write_fp_dreg(s, a->rd, t0);
6606         }
6607         break;
6608     case MO_32:
6609         if (fp_access_check(s)) {
6610             TCGv_i32 t0 = tcg_temp_new_i32();
6611             TCGv_i32 t1 = tcg_temp_new_i32();
6612 
6613             read_vec_element_i32(s, t0, a->rn, 0, MO_32);
6614             read_vec_element_i32(s, t1, a->rn, 1, MO_32);
6615             f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
6616             write_fp_sreg(s, a->rd, t0);
6617         }
6618         break;
6619     case MO_16:
6620         if (!dc_isar_feature(aa64_fp16, s)) {
6621             return false;
6622         }
6623         if (fp_access_check(s)) {
6624             TCGv_i32 t0 = tcg_temp_new_i32();
6625             TCGv_i32 t1 = tcg_temp_new_i32();
6626 
6627             read_vec_element_i32(s, t0, a->rn, 0, MO_16);
6628             read_vec_element_i32(s, t1, a->rn, 1, MO_16);
6629             f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
6630             write_fp_sreg(s, a->rd, t0);
6631         }
6632         break;
6633     default:
6634         g_assert_not_reached();
6635     }
6636     return true;
6637 }
6638 
6639 TRANS(FADDP_s, do_fp3_scalar_pair, a, &f_scalar_fadd)
6640 TRANS(FMAXP_s, do_fp3_scalar_pair, a, &f_scalar_fmax)
6641 TRANS(FMINP_s, do_fp3_scalar_pair, a, &f_scalar_fmin)
6642 TRANS(FMAXNMP_s, do_fp3_scalar_pair, a, &f_scalar_fmaxnm)
6643 TRANS(FMINNMP_s, do_fp3_scalar_pair, a, &f_scalar_fminnm)
6644 
6645 static bool trans_ADDP_s(DisasContext *s, arg_rr_e *a)
6646 {
6647     if (fp_access_check(s)) {
6648         TCGv_i64 t0 = tcg_temp_new_i64();
6649         TCGv_i64 t1 = tcg_temp_new_i64();
6650 
6651         read_vec_element(s, t0, a->rn, 0, MO_64);
6652         read_vec_element(s, t1, a->rn, 1, MO_64);
6653         tcg_gen_add_i64(t0, t0, t1);
6654         write_fp_dreg(s, a->rd, t0);
6655     }
6656     return true;
6657 }
6658 
6659 /*
6660  * Floating-point conditional select
6661  */
6662 
6663 static bool trans_FCSEL(DisasContext *s, arg_FCSEL *a)
6664 {
6665     TCGv_i64 t_true, t_false;
6666     DisasCompare64 c;
6667     int check = fp_access_check_scalar_hsd(s, a->esz);
6668 
6669     if (check <= 0) {
6670         return check == 0;
6671     }
6672 
6673     /* Zero extend sreg & hreg inputs to 64 bits now.  */
6674     t_true = tcg_temp_new_i64();
6675     t_false = tcg_temp_new_i64();
6676     read_vec_element(s, t_true, a->rn, 0, a->esz);
6677     read_vec_element(s, t_false, a->rm, 0, a->esz);
6678 
6679     a64_test_cc(&c, a->cond);
6680     tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0),
6681                         t_true, t_false);
6682 
6683     /*
6684      * Note that sregs & hregs write back zeros to the high bits,
6685      * and we've already done the zero-extension.
6686      */
6687     write_fp_dreg(s, a->rd, t_true);
6688     return true;
6689 }
6690 
6691 /*
6692  * Advanced SIMD Extract
6693  */
6694 
6695 static bool trans_EXT_d(DisasContext *s, arg_EXT_d *a)
6696 {
6697     if (fp_access_check(s)) {
6698         TCGv_i64 lo = read_fp_dreg(s, a->rn);
6699         if (a->imm != 0) {
6700             TCGv_i64 hi = read_fp_dreg(s, a->rm);
6701             tcg_gen_extract2_i64(lo, lo, hi, a->imm * 8);
6702         }
6703         write_fp_dreg(s, a->rd, lo);
6704     }
6705     return true;
6706 }
6707 
6708 static bool trans_EXT_q(DisasContext *s, arg_EXT_q *a)
6709 {
6710     TCGv_i64 lo, hi;
6711     int pos = (a->imm & 7) * 8;
6712     int elt = a->imm >> 3;
6713 
6714     if (!fp_access_check(s)) {
6715         return true;
6716     }
6717 
6718     lo = tcg_temp_new_i64();
6719     hi = tcg_temp_new_i64();
6720 
6721     read_vec_element(s, lo, a->rn, elt, MO_64);
6722     elt++;
6723     read_vec_element(s, hi, elt & 2 ? a->rm : a->rn, elt & 1, MO_64);
6724     elt++;
6725 
6726     if (pos != 0) {
6727         TCGv_i64 hh = tcg_temp_new_i64();
6728         tcg_gen_extract2_i64(lo, lo, hi, pos);
6729         read_vec_element(s, hh, a->rm, elt & 1, MO_64);
6730         tcg_gen_extract2_i64(hi, hi, hh, pos);
6731     }
6732 
6733     write_vec_element(s, lo, a->rd, 0, MO_64);
6734     write_vec_element(s, hi, a->rd, 1, MO_64);
6735     clear_vec_high(s, true, a->rd);
6736     return true;
6737 }
6738 
6739 /*
6740  * Floating-point data-processing (3 source)
6741  */
6742 
6743 static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n)
6744 {
6745     TCGv_ptr fpst;
6746 
6747     /*
6748      * These are fused multiply-add.  Note that doing the negations here
6749      * as separate steps is correct: an input NaN should come out with
6750      * its sign bit flipped if it is a negated-input.
6751      */
6752     switch (a->esz) {
6753     case MO_64:
6754         if (fp_access_check(s)) {
6755             TCGv_i64 tn = read_fp_dreg(s, a->rn);
6756             TCGv_i64 tm = read_fp_dreg(s, a->rm);
6757             TCGv_i64 ta = read_fp_dreg(s, a->ra);
6758 
6759             if (neg_a) {
6760                 gen_vfp_negd(ta, ta);
6761             }
6762             if (neg_n) {
6763                 gen_vfp_negd(tn, tn);
6764             }
6765             fpst = fpstatus_ptr(FPST_A64);
6766             gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst);
6767             write_fp_dreg(s, a->rd, ta);
6768         }
6769         break;
6770 
6771     case MO_32:
6772         if (fp_access_check(s)) {
6773             TCGv_i32 tn = read_fp_sreg(s, a->rn);
6774             TCGv_i32 tm = read_fp_sreg(s, a->rm);
6775             TCGv_i32 ta = read_fp_sreg(s, a->ra);
6776 
6777             if (neg_a) {
6778                 gen_vfp_negs(ta, ta);
6779             }
6780             if (neg_n) {
6781                 gen_vfp_negs(tn, tn);
6782             }
6783             fpst = fpstatus_ptr(FPST_A64);
6784             gen_helper_vfp_muladds(ta, tn, tm, ta, fpst);
6785             write_fp_sreg(s, a->rd, ta);
6786         }
6787         break;
6788 
6789     case MO_16:
6790         if (!dc_isar_feature(aa64_fp16, s)) {
6791             return false;
6792         }
6793         if (fp_access_check(s)) {
6794             TCGv_i32 tn = read_fp_hreg(s, a->rn);
6795             TCGv_i32 tm = read_fp_hreg(s, a->rm);
6796             TCGv_i32 ta = read_fp_hreg(s, a->ra);
6797 
6798             if (neg_a) {
6799                 gen_vfp_negh(ta, ta);
6800             }
6801             if (neg_n) {
6802                 gen_vfp_negh(tn, tn);
6803             }
6804             fpst = fpstatus_ptr(FPST_A64_F16);
6805             gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst);
6806             write_fp_sreg(s, a->rd, ta);
6807         }
6808         break;
6809 
6810     default:
6811         return false;
6812     }
6813     return true;
6814 }
6815 
6816 TRANS(FMADD, do_fmadd, a, false, false)
6817 TRANS(FNMADD, do_fmadd, a, true, true)
6818 TRANS(FMSUB, do_fmadd, a, false, true)
6819 TRANS(FNMSUB, do_fmadd, a, true, false)
6820 
6821 /*
6822  * Advanced SIMD Across Lanes
6823  */
6824 
6825 static bool do_int_reduction(DisasContext *s, arg_qrr_e *a, bool widen,
6826                              MemOp src_sign, NeonGenTwo64OpFn *fn)
6827 {
6828     TCGv_i64 tcg_res, tcg_elt;
6829     MemOp src_mop = a->esz | src_sign;
6830     int elements = (a->q ? 16 : 8) >> a->esz;
6831 
6832     /* Reject MO_64, and MO_32 without Q: a minimum of 4 elements. */
6833     if (elements < 4) {
6834         return false;
6835     }
6836     if (!fp_access_check(s)) {
6837         return true;
6838     }
6839 
6840     tcg_res = tcg_temp_new_i64();
6841     tcg_elt = tcg_temp_new_i64();
6842 
6843     read_vec_element(s, tcg_res, a->rn, 0, src_mop);
6844     for (int i = 1; i < elements; i++) {
6845         read_vec_element(s, tcg_elt, a->rn, i, src_mop);
6846         fn(tcg_res, tcg_res, tcg_elt);
6847     }
6848 
6849     tcg_gen_ext_i64(tcg_res, tcg_res, a->esz + widen);
6850     write_fp_dreg(s, a->rd, tcg_res);
6851     return true;
6852 }
6853 
6854 TRANS(ADDV, do_int_reduction, a, false, 0, tcg_gen_add_i64)
6855 TRANS(SADDLV, do_int_reduction, a, true, MO_SIGN, tcg_gen_add_i64)
6856 TRANS(UADDLV, do_int_reduction, a, true, 0, tcg_gen_add_i64)
6857 TRANS(SMAXV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smax_i64)
6858 TRANS(UMAXV, do_int_reduction, a, false, 0, tcg_gen_umax_i64)
6859 TRANS(SMINV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smin_i64)
6860 TRANS(UMINV, do_int_reduction, a, false, 0, tcg_gen_umin_i64)
6861 
6862 /*
6863  * do_fp_reduction helper
6864  *
6865  * This mirrors the Reduce() pseudocode in the ARM ARM. It is
6866  * important for correct NaN propagation that we do these
6867  * operations in exactly the order specified by the pseudocode.
6868  *
6869  * This is a recursive function.
6870  */
6871 static TCGv_i32 do_reduction_op(DisasContext *s, int rn, MemOp esz,
6872                                 int ebase, int ecount, TCGv_ptr fpst,
6873                                 NeonGenTwoSingleOpFn *fn)
6874 {
6875     if (ecount == 1) {
6876         TCGv_i32 tcg_elem = tcg_temp_new_i32();
6877         read_vec_element_i32(s, tcg_elem, rn, ebase, esz);
6878         return tcg_elem;
6879     } else {
6880         int half = ecount >> 1;
6881         TCGv_i32 tcg_hi, tcg_lo, tcg_res;
6882 
6883         tcg_hi = do_reduction_op(s, rn, esz, ebase + half, half, fpst, fn);
6884         tcg_lo = do_reduction_op(s, rn, esz, ebase, half, fpst, fn);
6885         tcg_res = tcg_temp_new_i32();
6886 
6887         fn(tcg_res, tcg_lo, tcg_hi, fpst);
6888         return tcg_res;
6889     }
6890 }
6891 
6892 static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a,
6893                               NeonGenTwoSingleOpFn *fn)
6894 {
6895     if (fp_access_check(s)) {
6896         MemOp esz = a->esz;
6897         int elts = (a->q ? 16 : 8) >> esz;
6898         TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
6899         TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst, fn);
6900         write_fp_sreg(s, a->rd, res);
6901     }
6902     return true;
6903 }
6904 
6905 TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_maxnumh)
6906 TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_minnumh)
6907 TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_maxh)
6908 TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_minh)
6909 
6910 TRANS(FMAXNMV_s, do_fp_reduction, a, gen_helper_vfp_maxnums)
6911 TRANS(FMINNMV_s, do_fp_reduction, a, gen_helper_vfp_minnums)
6912 TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs)
6913 TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins)
6914 
6915 /*
6916  * Floating-point Immediate
6917  */
6918 
6919 static bool trans_FMOVI_s(DisasContext *s, arg_FMOVI_s *a)
6920 {
6921     int check = fp_access_check_scalar_hsd(s, a->esz);
6922     uint64_t imm;
6923 
6924     if (check <= 0) {
6925         return check == 0;
6926     }
6927 
6928     imm = vfp_expand_imm(a->esz, a->imm);
6929     write_fp_dreg(s, a->rd, tcg_constant_i64(imm));
6930     return true;
6931 }
6932 
6933 /*
6934  * Floating point compare, conditional compare
6935  */
6936 
6937 static void handle_fp_compare(DisasContext *s, int size,
6938                               unsigned int rn, unsigned int rm,
6939                               bool cmp_with_zero, bool signal_all_nans)
6940 {
6941     TCGv_i64 tcg_flags = tcg_temp_new_i64();
6942     TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_A64_F16 : FPST_A64);
6943 
6944     if (size == MO_64) {
6945         TCGv_i64 tcg_vn, tcg_vm;
6946 
6947         tcg_vn = read_fp_dreg(s, rn);
6948         if (cmp_with_zero) {
6949             tcg_vm = tcg_constant_i64(0);
6950         } else {
6951             tcg_vm = read_fp_dreg(s, rm);
6952         }
6953         if (signal_all_nans) {
6954             gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
6955         } else {
6956             gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
6957         }
6958     } else {
6959         TCGv_i32 tcg_vn = tcg_temp_new_i32();
6960         TCGv_i32 tcg_vm = tcg_temp_new_i32();
6961 
6962         read_vec_element_i32(s, tcg_vn, rn, 0, size);
6963         if (cmp_with_zero) {
6964             tcg_gen_movi_i32(tcg_vm, 0);
6965         } else {
6966             read_vec_element_i32(s, tcg_vm, rm, 0, size);
6967         }
6968 
6969         switch (size) {
6970         case MO_32:
6971             if (signal_all_nans) {
6972                 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
6973             } else {
6974                 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
6975             }
6976             break;
6977         case MO_16:
6978             if (signal_all_nans) {
6979                 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
6980             } else {
6981                 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
6982             }
6983             break;
6984         default:
6985             g_assert_not_reached();
6986         }
6987     }
6988 
6989     gen_set_nzcv(tcg_flags);
6990 }
6991 
6992 /* FCMP, FCMPE */
6993 static bool trans_FCMP(DisasContext *s, arg_FCMP *a)
6994 {
6995     int check = fp_access_check_scalar_hsd(s, a->esz);
6996 
6997     if (check <= 0) {
6998         return check == 0;
6999     }
7000 
7001     handle_fp_compare(s, a->esz, a->rn, a->rm, a->z, a->e);
7002     return true;
7003 }
7004 
7005 /* FCCMP, FCCMPE */
7006 static bool trans_FCCMP(DisasContext *s, arg_FCCMP *a)
7007 {
7008     TCGLabel *label_continue = NULL;
7009     int check = fp_access_check_scalar_hsd(s, a->esz);
7010 
7011     if (check <= 0) {
7012         return check == 0;
7013     }
7014 
7015     if (a->cond < 0x0e) { /* not always */
7016         TCGLabel *label_match = gen_new_label();
7017         label_continue = gen_new_label();
7018         arm_gen_test_cc(a->cond, label_match);
7019         /* nomatch: */
7020         gen_set_nzcv(tcg_constant_i64(a->nzcv << 28));
7021         tcg_gen_br(label_continue);
7022         gen_set_label(label_match);
7023     }
7024 
7025     handle_fp_compare(s, a->esz, a->rn, a->rm, false, a->e);
7026 
7027     if (label_continue) {
7028         gen_set_label(label_continue);
7029     }
7030     return true;
7031 }
7032 
7033 /*
7034  * Advanced SIMD Modified Immediate
7035  */
7036 
7037 static bool trans_FMOVI_v_h(DisasContext *s, arg_FMOVI_v_h *a)
7038 {
7039     if (!dc_isar_feature(aa64_fp16, s)) {
7040         return false;
7041     }
7042     if (fp_access_check(s)) {
7043         tcg_gen_gvec_dup_imm(MO_16, vec_full_reg_offset(s, a->rd),
7044                              a->q ? 16 : 8, vec_full_reg_size(s),
7045                              vfp_expand_imm(MO_16, a->abcdefgh));
7046     }
7047     return true;
7048 }
7049 
7050 static void gen_movi(unsigned vece, uint32_t dofs, uint32_t aofs,
7051                      int64_t c, uint32_t oprsz, uint32_t maxsz)
7052 {
7053     tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c);
7054 }
7055 
7056 static bool trans_Vimm(DisasContext *s, arg_Vimm *a)
7057 {
7058     GVecGen2iFn *fn;
7059 
7060     /* Handle decode of cmode/op here between ORR/BIC/MOVI */
7061     if ((a->cmode & 1) && a->cmode < 12) {
7062         /* For op=1, the imm will be inverted, so BIC becomes AND. */
7063         fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori;
7064     } else {
7065         /* There is one unallocated cmode/op combination in this space */
7066         if (a->cmode == 15 && a->op == 1 && a->q == 0) {
7067             return false;
7068         }
7069         fn = gen_movi;
7070     }
7071 
7072     if (fp_access_check(s)) {
7073         uint64_t imm = asimd_imm_const(a->abcdefgh, a->cmode, a->op);
7074         gen_gvec_fn2i(s, a->q, a->rd, a->rd, imm, fn, MO_64);
7075     }
7076     return true;
7077 }
7078 
7079 /*
7080  * Advanced SIMD Shift by Immediate
7081  */
7082 
7083 static bool do_vec_shift_imm(DisasContext *s, arg_qrri_e *a, GVecGen2iFn *fn)
7084 {
7085     if (fp_access_check(s)) {
7086         gen_gvec_fn2i(s, a->q, a->rd, a->rn, a->imm, fn, a->esz);
7087     }
7088     return true;
7089 }
7090 
7091 TRANS(SSHR_v, do_vec_shift_imm, a, gen_gvec_sshr)
7092 TRANS(USHR_v, do_vec_shift_imm, a, gen_gvec_ushr)
7093 TRANS(SSRA_v, do_vec_shift_imm, a, gen_gvec_ssra)
7094 TRANS(USRA_v, do_vec_shift_imm, a, gen_gvec_usra)
7095 TRANS(SRSHR_v, do_vec_shift_imm, a, gen_gvec_srshr)
7096 TRANS(URSHR_v, do_vec_shift_imm, a, gen_gvec_urshr)
7097 TRANS(SRSRA_v, do_vec_shift_imm, a, gen_gvec_srsra)
7098 TRANS(URSRA_v, do_vec_shift_imm, a, gen_gvec_ursra)
7099 TRANS(SRI_v, do_vec_shift_imm, a, gen_gvec_sri)
7100 TRANS(SHL_v, do_vec_shift_imm, a, tcg_gen_gvec_shli)
7101 TRANS(SLI_v, do_vec_shift_imm, a, gen_gvec_sli);
7102 TRANS(SQSHL_vi, do_vec_shift_imm, a, gen_neon_sqshli)
7103 TRANS(UQSHL_vi, do_vec_shift_imm, a, gen_neon_uqshli)
7104 TRANS(SQSHLU_vi, do_vec_shift_imm, a, gen_neon_sqshlui)
7105 
7106 static bool do_vec_shift_imm_wide(DisasContext *s, arg_qrri_e *a, bool is_u)
7107 {
7108     TCGv_i64 tcg_rn, tcg_rd;
7109     int esz = a->esz;
7110     int esize;
7111 
7112     if (!fp_access_check(s)) {
7113         return true;
7114     }
7115 
7116     /*
7117      * For the LL variants the store is larger than the load,
7118      * so if rd == rn we would overwrite parts of our input.
7119      * So load everything right now and use shifts in the main loop.
7120      */
7121     tcg_rd = tcg_temp_new_i64();
7122     tcg_rn = tcg_temp_new_i64();
7123     read_vec_element(s, tcg_rn, a->rn, a->q, MO_64);
7124 
7125     esize = 8 << esz;
7126     for (int i = 0, elements = 8 >> esz; i < elements; i++) {
7127         if (is_u) {
7128             tcg_gen_extract_i64(tcg_rd, tcg_rn, i * esize, esize);
7129         } else {
7130             tcg_gen_sextract_i64(tcg_rd, tcg_rn, i * esize, esize);
7131         }
7132         tcg_gen_shli_i64(tcg_rd, tcg_rd, a->imm);
7133         write_vec_element(s, tcg_rd, a->rd, i, esz + 1);
7134     }
7135     clear_vec_high(s, true, a->rd);
7136     return true;
7137 }
7138 
7139 TRANS(SSHLL_v, do_vec_shift_imm_wide, a, false)
7140 TRANS(USHLL_v, do_vec_shift_imm_wide, a, true)
7141 
7142 static void gen_sshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7143 {
7144     assert(shift >= 0 && shift <= 64);
7145     tcg_gen_sari_i64(dst, src, MIN(shift, 63));
7146 }
7147 
7148 static void gen_ushr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7149 {
7150     assert(shift >= 0 && shift <= 64);
7151     if (shift == 64) {
7152         tcg_gen_movi_i64(dst, 0);
7153     } else {
7154         tcg_gen_shri_i64(dst, src, shift);
7155     }
7156 }
7157 
7158 static void gen_ssra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7159 {
7160     gen_sshr_d(src, src, shift);
7161     tcg_gen_add_i64(dst, dst, src);
7162 }
7163 
7164 static void gen_usra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7165 {
7166     gen_ushr_d(src, src, shift);
7167     tcg_gen_add_i64(dst, dst, src);
7168 }
7169 
7170 static void gen_srshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7171 {
7172     assert(shift >= 0 && shift <= 32);
7173     if (shift) {
7174         TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1));
7175         tcg_gen_add_i64(dst, src, rnd);
7176         tcg_gen_sari_i64(dst, dst, shift);
7177     } else {
7178         tcg_gen_mov_i64(dst, src);
7179     }
7180 }
7181 
7182 static void gen_urshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7183 {
7184     assert(shift >= 0 && shift <= 32);
7185     if (shift) {
7186         TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1));
7187         tcg_gen_add_i64(dst, src, rnd);
7188         tcg_gen_shri_i64(dst, dst, shift);
7189     } else {
7190         tcg_gen_mov_i64(dst, src);
7191     }
7192 }
7193 
7194 static void gen_srshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7195 {
7196     assert(shift >= 0 && shift <= 64);
7197     if (shift == 0) {
7198         tcg_gen_mov_i64(dst, src);
7199     } else if (shift == 64) {
7200         /* Extension of sign bit (0,-1) plus sign bit (0,1) is zero. */
7201         tcg_gen_movi_i64(dst, 0);
7202     } else {
7203         TCGv_i64 rnd = tcg_temp_new_i64();
7204         tcg_gen_extract_i64(rnd, src, shift - 1, 1);
7205         tcg_gen_sari_i64(dst, src, shift);
7206         tcg_gen_add_i64(dst, dst, rnd);
7207     }
7208 }
7209 
7210 static void gen_urshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7211 {
7212     assert(shift >= 0 && shift <= 64);
7213     if (shift == 0) {
7214         tcg_gen_mov_i64(dst, src);
7215     } else if (shift == 64) {
7216         /* Rounding will propagate bit 63 into bit 64. */
7217         tcg_gen_shri_i64(dst, src, 63);
7218     } else {
7219         TCGv_i64 rnd = tcg_temp_new_i64();
7220         tcg_gen_extract_i64(rnd, src, shift - 1, 1);
7221         tcg_gen_shri_i64(dst, src, shift);
7222         tcg_gen_add_i64(dst, dst, rnd);
7223     }
7224 }
7225 
7226 static void gen_srsra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7227 {
7228     gen_srshr_d(src, src, shift);
7229     tcg_gen_add_i64(dst, dst, src);
7230 }
7231 
7232 static void gen_ursra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7233 {
7234     gen_urshr_d(src, src, shift);
7235     tcg_gen_add_i64(dst, dst, src);
7236 }
7237 
7238 static void gen_sri_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7239 {
7240     /* If shift is 64, dst is unchanged. */
7241     if (shift != 64) {
7242         tcg_gen_shri_i64(src, src, shift);
7243         tcg_gen_deposit_i64(dst, dst, src, 0, 64 - shift);
7244     }
7245 }
7246 
7247 static void gen_sli_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7248 {
7249     tcg_gen_deposit_i64(dst, dst, src, shift, 64 - shift);
7250 }
7251 
7252 static bool do_vec_shift_imm_narrow(DisasContext *s, arg_qrri_e *a,
7253                                     WideShiftImmFn * const fns[3], MemOp sign)
7254 {
7255     TCGv_i64 tcg_rn, tcg_rd;
7256     int esz = a->esz;
7257     int esize;
7258     WideShiftImmFn *fn;
7259 
7260     tcg_debug_assert(esz >= MO_8 && esz <= MO_32);
7261 
7262     if (!fp_access_check(s)) {
7263         return true;
7264     }
7265 
7266     tcg_rn = tcg_temp_new_i64();
7267     tcg_rd = tcg_temp_new_i64();
7268     tcg_gen_movi_i64(tcg_rd, 0);
7269 
7270     fn = fns[esz];
7271     esize = 8 << esz;
7272     for (int i = 0, elements = 8 >> esz; i < elements; i++) {
7273         read_vec_element(s, tcg_rn, a->rn, i, (esz + 1) | sign);
7274         fn(tcg_rn, tcg_rn, a->imm);
7275         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, esize * i, esize);
7276     }
7277 
7278     write_vec_element(s, tcg_rd, a->rd, a->q, MO_64);
7279     clear_vec_high(s, a->q, a->rd);
7280     return true;
7281 }
7282 
7283 static void gen_sqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7284 {
7285     tcg_gen_sari_i64(d, s, i);
7286     tcg_gen_ext16u_i64(d, d);
7287     gen_helper_neon_narrow_sat_s8(d, tcg_env, d);
7288 }
7289 
7290 static void gen_sqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7291 {
7292     tcg_gen_sari_i64(d, s, i);
7293     tcg_gen_ext32u_i64(d, d);
7294     gen_helper_neon_narrow_sat_s16(d, tcg_env, d);
7295 }
7296 
7297 static void gen_sqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7298 {
7299     gen_sshr_d(d, s, i);
7300     gen_helper_neon_narrow_sat_s32(d, tcg_env, d);
7301 }
7302 
7303 static void gen_uqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7304 {
7305     tcg_gen_shri_i64(d, s, i);
7306     gen_helper_neon_narrow_sat_u8(d, tcg_env, d);
7307 }
7308 
7309 static void gen_uqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7310 {
7311     tcg_gen_shri_i64(d, s, i);
7312     gen_helper_neon_narrow_sat_u16(d, tcg_env, d);
7313 }
7314 
7315 static void gen_uqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7316 {
7317     gen_ushr_d(d, s, i);
7318     gen_helper_neon_narrow_sat_u32(d, tcg_env, d);
7319 }
7320 
7321 static void gen_sqshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7322 {
7323     tcg_gen_sari_i64(d, s, i);
7324     tcg_gen_ext16u_i64(d, d);
7325     gen_helper_neon_unarrow_sat8(d, tcg_env, d);
7326 }
7327 
7328 static void gen_sqshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7329 {
7330     tcg_gen_sari_i64(d, s, i);
7331     tcg_gen_ext32u_i64(d, d);
7332     gen_helper_neon_unarrow_sat16(d, tcg_env, d);
7333 }
7334 
7335 static void gen_sqshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7336 {
7337     gen_sshr_d(d, s, i);
7338     gen_helper_neon_unarrow_sat32(d, tcg_env, d);
7339 }
7340 
7341 static void gen_sqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7342 {
7343     gen_srshr_bhs(d, s, i);
7344     tcg_gen_ext16u_i64(d, d);
7345     gen_helper_neon_narrow_sat_s8(d, tcg_env, d);
7346 }
7347 
7348 static void gen_sqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7349 {
7350     gen_srshr_bhs(d, s, i);
7351     tcg_gen_ext32u_i64(d, d);
7352     gen_helper_neon_narrow_sat_s16(d, tcg_env, d);
7353 }
7354 
7355 static void gen_sqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7356 {
7357     gen_srshr_d(d, s, i);
7358     gen_helper_neon_narrow_sat_s32(d, tcg_env, d);
7359 }
7360 
7361 static void gen_uqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7362 {
7363     gen_urshr_bhs(d, s, i);
7364     gen_helper_neon_narrow_sat_u8(d, tcg_env, d);
7365 }
7366 
7367 static void gen_uqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7368 {
7369     gen_urshr_bhs(d, s, i);
7370     gen_helper_neon_narrow_sat_u16(d, tcg_env, d);
7371 }
7372 
7373 static void gen_uqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7374 {
7375     gen_urshr_d(d, s, i);
7376     gen_helper_neon_narrow_sat_u32(d, tcg_env, d);
7377 }
7378 
7379 static void gen_sqrshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7380 {
7381     gen_srshr_bhs(d, s, i);
7382     tcg_gen_ext16u_i64(d, d);
7383     gen_helper_neon_unarrow_sat8(d, tcg_env, d);
7384 }
7385 
7386 static void gen_sqrshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7387 {
7388     gen_srshr_bhs(d, s, i);
7389     tcg_gen_ext32u_i64(d, d);
7390     gen_helper_neon_unarrow_sat16(d, tcg_env, d);
7391 }
7392 
7393 static void gen_sqrshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7394 {
7395     gen_srshr_d(d, s, i);
7396     gen_helper_neon_unarrow_sat32(d, tcg_env, d);
7397 }
7398 
7399 static WideShiftImmFn * const shrn_fns[] = {
7400     tcg_gen_shri_i64,
7401     tcg_gen_shri_i64,
7402     gen_ushr_d,
7403 };
7404 TRANS(SHRN_v, do_vec_shift_imm_narrow, a, shrn_fns, 0)
7405 
7406 static WideShiftImmFn * const rshrn_fns[] = {
7407     gen_urshr_bhs,
7408     gen_urshr_bhs,
7409     gen_urshr_d,
7410 };
7411 TRANS(RSHRN_v, do_vec_shift_imm_narrow, a, rshrn_fns, 0)
7412 
7413 static WideShiftImmFn * const sqshrn_fns[] = {
7414     gen_sqshrn_b,
7415     gen_sqshrn_h,
7416     gen_sqshrn_s,
7417 };
7418 TRANS(SQSHRN_v, do_vec_shift_imm_narrow, a, sqshrn_fns, MO_SIGN)
7419 
7420 static WideShiftImmFn * const uqshrn_fns[] = {
7421     gen_uqshrn_b,
7422     gen_uqshrn_h,
7423     gen_uqshrn_s,
7424 };
7425 TRANS(UQSHRN_v, do_vec_shift_imm_narrow, a, uqshrn_fns, 0)
7426 
7427 static WideShiftImmFn * const sqshrun_fns[] = {
7428     gen_sqshrun_b,
7429     gen_sqshrun_h,
7430     gen_sqshrun_s,
7431 };
7432 TRANS(SQSHRUN_v, do_vec_shift_imm_narrow, a, sqshrun_fns, MO_SIGN)
7433 
7434 static WideShiftImmFn * const sqrshrn_fns[] = {
7435     gen_sqrshrn_b,
7436     gen_sqrshrn_h,
7437     gen_sqrshrn_s,
7438 };
7439 TRANS(SQRSHRN_v, do_vec_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN)
7440 
7441 static WideShiftImmFn * const uqrshrn_fns[] = {
7442     gen_uqrshrn_b,
7443     gen_uqrshrn_h,
7444     gen_uqrshrn_s,
7445 };
7446 TRANS(UQRSHRN_v, do_vec_shift_imm_narrow, a, uqrshrn_fns, 0)
7447 
7448 static WideShiftImmFn * const sqrshrun_fns[] = {
7449     gen_sqrshrun_b,
7450     gen_sqrshrun_h,
7451     gen_sqrshrun_s,
7452 };
7453 TRANS(SQRSHRUN_v, do_vec_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN)
7454 
7455 /*
7456  * Advanced SIMD Scalar Shift by Immediate
7457  */
7458 
7459 static bool do_scalar_shift_imm(DisasContext *s, arg_rri_e *a,
7460                                 WideShiftImmFn *fn, bool accumulate,
7461                                 MemOp sign)
7462 {
7463     if (fp_access_check(s)) {
7464         TCGv_i64 rd = tcg_temp_new_i64();
7465         TCGv_i64 rn = tcg_temp_new_i64();
7466 
7467         read_vec_element(s, rn, a->rn, 0, a->esz | sign);
7468         if (accumulate) {
7469             read_vec_element(s, rd, a->rd, 0, a->esz | sign);
7470         }
7471         fn(rd, rn, a->imm);
7472         write_fp_dreg(s, a->rd, rd);
7473     }
7474     return true;
7475 }
7476 
7477 TRANS(SSHR_s, do_scalar_shift_imm, a, gen_sshr_d, false, 0)
7478 TRANS(USHR_s, do_scalar_shift_imm, a, gen_ushr_d, false, 0)
7479 TRANS(SSRA_s, do_scalar_shift_imm, a, gen_ssra_d, true, 0)
7480 TRANS(USRA_s, do_scalar_shift_imm, a, gen_usra_d, true, 0)
7481 TRANS(SRSHR_s, do_scalar_shift_imm, a, gen_srshr_d, false, 0)
7482 TRANS(URSHR_s, do_scalar_shift_imm, a, gen_urshr_d, false, 0)
7483 TRANS(SRSRA_s, do_scalar_shift_imm, a, gen_srsra_d, true, 0)
7484 TRANS(URSRA_s, do_scalar_shift_imm, a, gen_ursra_d, true, 0)
7485 TRANS(SRI_s, do_scalar_shift_imm, a, gen_sri_d, true, 0)
7486 
7487 TRANS(SHL_s, do_scalar_shift_imm, a, tcg_gen_shli_i64, false, 0)
7488 TRANS(SLI_s, do_scalar_shift_imm, a, gen_sli_d, true, 0)
7489 
7490 static void trunc_i64_env_imm(TCGv_i64 d, TCGv_i64 s, int64_t i,
7491                               NeonGenTwoOpEnvFn *fn)
7492 {
7493     TCGv_i32 t = tcg_temp_new_i32();
7494     tcg_gen_extrl_i64_i32(t, s);
7495     fn(t, tcg_env, t, tcg_constant_i32(i));
7496     tcg_gen_extu_i32_i64(d, t);
7497 }
7498 
7499 static void gen_sqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7500 {
7501     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s8);
7502 }
7503 
7504 static void gen_sqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7505 {
7506     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s16);
7507 }
7508 
7509 static void gen_sqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7510 {
7511     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s32);
7512 }
7513 
7514 static void gen_sqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
7515 {
7516     gen_helper_neon_qshl_s64(d, tcg_env, s, tcg_constant_i64(i));
7517 }
7518 
7519 static void gen_uqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7520 {
7521     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u8);
7522 }
7523 
7524 static void gen_uqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7525 {
7526     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u16);
7527 }
7528 
7529 static void gen_uqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7530 {
7531     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u32);
7532 }
7533 
7534 static void gen_uqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
7535 {
7536     gen_helper_neon_qshl_u64(d, tcg_env, s, tcg_constant_i64(i));
7537 }
7538 
7539 static void gen_sqshlui_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7540 {
7541     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s8);
7542 }
7543 
7544 static void gen_sqshlui_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7545 {
7546     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s16);
7547 }
7548 
7549 static void gen_sqshlui_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7550 {
7551     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s32);
7552 }
7553 
7554 static void gen_sqshlui_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
7555 {
7556     gen_helper_neon_qshlu_s64(d, tcg_env, s, tcg_constant_i64(i));
7557 }
7558 
7559 static WideShiftImmFn * const f_scalar_sqshli[] = {
7560     gen_sqshli_b, gen_sqshli_h, gen_sqshli_s, gen_sqshli_d
7561 };
7562 
7563 static WideShiftImmFn * const f_scalar_uqshli[] = {
7564     gen_uqshli_b, gen_uqshli_h, gen_uqshli_s, gen_uqshli_d
7565 };
7566 
7567 static WideShiftImmFn * const f_scalar_sqshlui[] = {
7568     gen_sqshlui_b, gen_sqshlui_h, gen_sqshlui_s, gen_sqshlui_d
7569 };
7570 
7571 /* Note that the helpers sign-extend their inputs, so don't do it here. */
7572 TRANS(SQSHL_si, do_scalar_shift_imm, a, f_scalar_sqshli[a->esz], false, 0)
7573 TRANS(UQSHL_si, do_scalar_shift_imm, a, f_scalar_uqshli[a->esz], false, 0)
7574 TRANS(SQSHLU_si, do_scalar_shift_imm, a, f_scalar_sqshlui[a->esz], false, 0)
7575 
7576 static bool do_scalar_shift_imm_narrow(DisasContext *s, arg_rri_e *a,
7577                                        WideShiftImmFn * const fns[3],
7578                                        MemOp sign, bool zext)
7579 {
7580     MemOp esz = a->esz;
7581 
7582     tcg_debug_assert(esz >= MO_8 && esz <= MO_32);
7583 
7584     if (fp_access_check(s)) {
7585         TCGv_i64 rd = tcg_temp_new_i64();
7586         TCGv_i64 rn = tcg_temp_new_i64();
7587 
7588         read_vec_element(s, rn, a->rn, 0, (esz + 1) | sign);
7589         fns[esz](rd, rn, a->imm);
7590         if (zext) {
7591             tcg_gen_ext_i64(rd, rd, esz);
7592         }
7593         write_fp_dreg(s, a->rd, rd);
7594     }
7595     return true;
7596 }
7597 
7598 TRANS(SQSHRN_si, do_scalar_shift_imm_narrow, a, sqshrn_fns, MO_SIGN, true)
7599 TRANS(SQRSHRN_si, do_scalar_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN, true)
7600 TRANS(UQSHRN_si, do_scalar_shift_imm_narrow, a, uqshrn_fns, 0, false)
7601 TRANS(UQRSHRN_si, do_scalar_shift_imm_narrow, a, uqrshrn_fns, 0, false)
7602 TRANS(SQSHRUN_si, do_scalar_shift_imm_narrow, a, sqshrun_fns, MO_SIGN, false)
7603 TRANS(SQRSHRUN_si, do_scalar_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN, false)
7604 
7605 static bool do_div(DisasContext *s, arg_rrr_sf *a, bool is_signed)
7606 {
7607     TCGv_i64 tcg_n, tcg_m, tcg_rd;
7608     tcg_rd = cpu_reg(s, a->rd);
7609 
7610     if (!a->sf && is_signed) {
7611         tcg_n = tcg_temp_new_i64();
7612         tcg_m = tcg_temp_new_i64();
7613         tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, a->rn));
7614         tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, a->rm));
7615     } else {
7616         tcg_n = read_cpu_reg(s, a->rn, a->sf);
7617         tcg_m = read_cpu_reg(s, a->rm, a->sf);
7618     }
7619 
7620     if (is_signed) {
7621         gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
7622     } else {
7623         gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
7624     }
7625 
7626     if (!a->sf) { /* zero extend final result */
7627         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
7628     }
7629     return true;
7630 }
7631 
7632 TRANS(SDIV, do_div, a, true)
7633 TRANS(UDIV, do_div, a, false)
7634 
7635 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
7636  * Note that it is the caller's responsibility to ensure that the
7637  * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
7638  * mandated semantics for out of range shifts.
7639  */
7640 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
7641                       enum a64_shift_type shift_type, TCGv_i64 shift_amount)
7642 {
7643     switch (shift_type) {
7644     case A64_SHIFT_TYPE_LSL:
7645         tcg_gen_shl_i64(dst, src, shift_amount);
7646         break;
7647     case A64_SHIFT_TYPE_LSR:
7648         tcg_gen_shr_i64(dst, src, shift_amount);
7649         break;
7650     case A64_SHIFT_TYPE_ASR:
7651         if (!sf) {
7652             tcg_gen_ext32s_i64(dst, src);
7653         }
7654         tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
7655         break;
7656     case A64_SHIFT_TYPE_ROR:
7657         if (sf) {
7658             tcg_gen_rotr_i64(dst, src, shift_amount);
7659         } else {
7660             TCGv_i32 t0, t1;
7661             t0 = tcg_temp_new_i32();
7662             t1 = tcg_temp_new_i32();
7663             tcg_gen_extrl_i64_i32(t0, src);
7664             tcg_gen_extrl_i64_i32(t1, shift_amount);
7665             tcg_gen_rotr_i32(t0, t0, t1);
7666             tcg_gen_extu_i32_i64(dst, t0);
7667         }
7668         break;
7669     default:
7670         assert(FALSE); /* all shift types should be handled */
7671         break;
7672     }
7673 
7674     if (!sf) { /* zero extend final result */
7675         tcg_gen_ext32u_i64(dst, dst);
7676     }
7677 }
7678 
7679 /* Shift a TCGv src by immediate, put result in dst.
7680  * The shift amount must be in range (this should always be true as the
7681  * relevant instructions will UNDEF on bad shift immediates).
7682  */
7683 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
7684                           enum a64_shift_type shift_type, unsigned int shift_i)
7685 {
7686     assert(shift_i < (sf ? 64 : 32));
7687 
7688     if (shift_i == 0) {
7689         tcg_gen_mov_i64(dst, src);
7690     } else {
7691         shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i));
7692     }
7693 }
7694 
7695 static bool do_shift_reg(DisasContext *s, arg_rrr_sf *a,
7696                          enum a64_shift_type shift_type)
7697 {
7698     TCGv_i64 tcg_shift = tcg_temp_new_i64();
7699     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
7700     TCGv_i64 tcg_rn = read_cpu_reg(s, a->rn, a->sf);
7701 
7702     tcg_gen_andi_i64(tcg_shift, cpu_reg(s, a->rm), a->sf ? 63 : 31);
7703     shift_reg(tcg_rd, tcg_rn, a->sf, shift_type, tcg_shift);
7704     return true;
7705 }
7706 
7707 TRANS(LSLV, do_shift_reg, a, A64_SHIFT_TYPE_LSL)
7708 TRANS(LSRV, do_shift_reg, a, A64_SHIFT_TYPE_LSR)
7709 TRANS(ASRV, do_shift_reg, a, A64_SHIFT_TYPE_ASR)
7710 TRANS(RORV, do_shift_reg, a, A64_SHIFT_TYPE_ROR)
7711 
7712 static bool do_crc32(DisasContext *s, arg_rrr_e *a, bool crc32c)
7713 {
7714     TCGv_i64 tcg_acc, tcg_val, tcg_rd;
7715     TCGv_i32 tcg_bytes;
7716 
7717     switch (a->esz) {
7718     case MO_8:
7719     case MO_16:
7720     case MO_32:
7721         tcg_val = tcg_temp_new_i64();
7722         tcg_gen_extract_i64(tcg_val, cpu_reg(s, a->rm), 0, 8 << a->esz);
7723         break;
7724     case MO_64:
7725         tcg_val = cpu_reg(s, a->rm);
7726         break;
7727     default:
7728         g_assert_not_reached();
7729     }
7730     tcg_acc = cpu_reg(s, a->rn);
7731     tcg_bytes = tcg_constant_i32(1 << a->esz);
7732     tcg_rd = cpu_reg(s, a->rd);
7733 
7734     if (crc32c) {
7735         gen_helper_crc32c_64(tcg_rd, tcg_acc, tcg_val, tcg_bytes);
7736     } else {
7737         gen_helper_crc32_64(tcg_rd, tcg_acc, tcg_val, tcg_bytes);
7738     }
7739     return true;
7740 }
7741 
7742 TRANS_FEAT(CRC32, aa64_crc32, do_crc32, a, false)
7743 TRANS_FEAT(CRC32C, aa64_crc32, do_crc32, a, true)
7744 
7745 static bool do_subp(DisasContext *s, arg_rrr *a, bool setflag)
7746 {
7747     TCGv_i64 tcg_n = read_cpu_reg_sp(s, a->rn, true);
7748     TCGv_i64 tcg_m = read_cpu_reg_sp(s, a->rm, true);
7749     TCGv_i64 tcg_d = cpu_reg(s, a->rd);
7750 
7751     tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56);
7752     tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56);
7753 
7754     if (setflag) {
7755         gen_sub_CC(true, tcg_d, tcg_n, tcg_m);
7756     } else {
7757         tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m);
7758     }
7759     return true;
7760 }
7761 
7762 TRANS_FEAT(SUBP, aa64_mte_insn_reg, do_subp, a, false)
7763 TRANS_FEAT(SUBPS, aa64_mte_insn_reg, do_subp, a, true)
7764 
7765 static bool trans_IRG(DisasContext *s, arg_rrr *a)
7766 {
7767     if (dc_isar_feature(aa64_mte_insn_reg, s)) {
7768         TCGv_i64 tcg_rd = cpu_reg_sp(s, a->rd);
7769         TCGv_i64 tcg_rn = cpu_reg_sp(s, a->rn);
7770 
7771         if (s->ata[0]) {
7772             gen_helper_irg(tcg_rd, tcg_env, tcg_rn, cpu_reg(s, a->rm));
7773         } else {
7774             gen_address_with_allocation_tag0(tcg_rd, tcg_rn);
7775         }
7776         return true;
7777     }
7778     return false;
7779 }
7780 
7781 static bool trans_GMI(DisasContext *s, arg_rrr *a)
7782 {
7783     if (dc_isar_feature(aa64_mte_insn_reg, s)) {
7784         TCGv_i64 t = tcg_temp_new_i64();
7785 
7786         tcg_gen_extract_i64(t, cpu_reg_sp(s, a->rn), 56, 4);
7787         tcg_gen_shl_i64(t, tcg_constant_i64(1), t);
7788         tcg_gen_or_i64(cpu_reg(s, a->rd), cpu_reg(s, a->rm), t);
7789         return true;
7790     }
7791     return false;
7792 }
7793 
7794 static bool trans_PACGA(DisasContext *s, arg_rrr *a)
7795 {
7796     if (dc_isar_feature(aa64_pauth, s)) {
7797         gen_helper_pacga(cpu_reg(s, a->rd), tcg_env,
7798                          cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm));
7799         return true;
7800     }
7801     return false;
7802 }
7803 
7804 typedef void ArithOneOp(TCGv_i64, TCGv_i64);
7805 
7806 static bool gen_rr(DisasContext *s, int rd, int rn, ArithOneOp fn)
7807 {
7808     fn(cpu_reg(s, rd), cpu_reg(s, rn));
7809     return true;
7810 }
7811 
7812 static void gen_rbit32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
7813 {
7814     TCGv_i32 t32 = tcg_temp_new_i32();
7815 
7816     tcg_gen_extrl_i64_i32(t32, tcg_rn);
7817     gen_helper_rbit(t32, t32);
7818     tcg_gen_extu_i32_i64(tcg_rd, t32);
7819 }
7820 
7821 static void gen_rev16_xx(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 mask)
7822 {
7823     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
7824 
7825     tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
7826     tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
7827     tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
7828     tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
7829     tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
7830 }
7831 
7832 static void gen_rev16_32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
7833 {
7834     gen_rev16_xx(tcg_rd, tcg_rn, tcg_constant_i64(0x00ff00ff));
7835 }
7836 
7837 static void gen_rev16_64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
7838 {
7839     gen_rev16_xx(tcg_rd, tcg_rn, tcg_constant_i64(0x00ff00ff00ff00ffull));
7840 }
7841 
7842 static void gen_rev_32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
7843 {
7844     tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ);
7845 }
7846 
7847 static void gen_rev32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
7848 {
7849     tcg_gen_bswap64_i64(tcg_rd, tcg_rn);
7850     tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32);
7851 }
7852 
7853 TRANS(RBIT, gen_rr, a->rd, a->rn, a->sf ? gen_helper_rbit64 : gen_rbit32)
7854 TRANS(REV16, gen_rr, a->rd, a->rn, a->sf ? gen_rev16_64 : gen_rev16_32)
7855 TRANS(REV32, gen_rr, a->rd, a->rn, a->sf ? gen_rev32 : gen_rev_32)
7856 TRANS(REV64, gen_rr, a->rd, a->rn, tcg_gen_bswap64_i64)
7857 
7858 static void gen_clz32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
7859 {
7860     TCGv_i32 t32 = tcg_temp_new_i32();
7861 
7862     tcg_gen_extrl_i64_i32(t32, tcg_rn);
7863     tcg_gen_clzi_i32(t32, t32, 32);
7864     tcg_gen_extu_i32_i64(tcg_rd, t32);
7865 }
7866 
7867 static void gen_clz64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
7868 {
7869     tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
7870 }
7871 
7872 static void gen_cls32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
7873 {
7874     TCGv_i32 t32 = tcg_temp_new_i32();
7875 
7876     tcg_gen_extrl_i64_i32(t32, tcg_rn);
7877     tcg_gen_clrsb_i32(t32, t32);
7878     tcg_gen_extu_i32_i64(tcg_rd, t32);
7879 }
7880 
7881 TRANS(CLZ, gen_rr, a->rd, a->rn, a->sf ? gen_clz64 : gen_clz32)
7882 TRANS(CLS, gen_rr, a->rd, a->rn, a->sf ? tcg_gen_clrsb_i64 : gen_cls32)
7883 
7884 static bool gen_pacaut(DisasContext *s, arg_pacaut *a, NeonGenTwo64OpEnvFn fn)
7885 {
7886     TCGv_i64 tcg_rd, tcg_rn;
7887 
7888     if (a->z) {
7889         if (a->rn != 31) {
7890             return false;
7891         }
7892         tcg_rn = tcg_constant_i64(0);
7893     } else {
7894         tcg_rn = cpu_reg_sp(s, a->rn);
7895     }
7896     if (s->pauth_active) {
7897         tcg_rd = cpu_reg(s, a->rd);
7898         fn(tcg_rd, tcg_env, tcg_rd, tcg_rn);
7899     }
7900     return true;
7901 }
7902 
7903 TRANS_FEAT(PACIA, aa64_pauth, gen_pacaut, a, gen_helper_pacia)
7904 TRANS_FEAT(PACIB, aa64_pauth, gen_pacaut, a, gen_helper_pacib)
7905 TRANS_FEAT(PACDA, aa64_pauth, gen_pacaut, a, gen_helper_pacda)
7906 TRANS_FEAT(PACDB, aa64_pauth, gen_pacaut, a, gen_helper_pacdb)
7907 
7908 TRANS_FEAT(AUTIA, aa64_pauth, gen_pacaut, a, gen_helper_autia)
7909 TRANS_FEAT(AUTIB, aa64_pauth, gen_pacaut, a, gen_helper_autib)
7910 TRANS_FEAT(AUTDA, aa64_pauth, gen_pacaut, a, gen_helper_autda)
7911 TRANS_FEAT(AUTDB, aa64_pauth, gen_pacaut, a, gen_helper_autdb)
7912 
7913 static bool do_xpac(DisasContext *s, int rd, NeonGenOne64OpEnvFn *fn)
7914 {
7915     if (s->pauth_active) {
7916         TCGv_i64 tcg_rd = cpu_reg(s, rd);
7917         fn(tcg_rd, tcg_env, tcg_rd);
7918     }
7919     return true;
7920 }
7921 
7922 TRANS_FEAT(XPACI, aa64_pauth, do_xpac, a->rd, gen_helper_xpaci)
7923 TRANS_FEAT(XPACD, aa64_pauth, do_xpac, a->rd, gen_helper_xpacd)
7924 
7925 static bool do_logic_reg(DisasContext *s, arg_logic_shift *a,
7926                          ArithTwoOp *fn, ArithTwoOp *inv_fn, bool setflags)
7927 {
7928     TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
7929 
7930     if (!a->sf && (a->sa & (1 << 5))) {
7931         return false;
7932     }
7933 
7934     tcg_rd = cpu_reg(s, a->rd);
7935     tcg_rn = cpu_reg(s, a->rn);
7936 
7937     tcg_rm = read_cpu_reg(s, a->rm, a->sf);
7938     if (a->sa) {
7939         shift_reg_imm(tcg_rm, tcg_rm, a->sf, a->st, a->sa);
7940     }
7941 
7942     (a->n ? inv_fn : fn)(tcg_rd, tcg_rn, tcg_rm);
7943     if (!a->sf) {
7944         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
7945     }
7946     if (setflags) {
7947         gen_logic_CC(a->sf, tcg_rd);
7948     }
7949     return true;
7950 }
7951 
7952 static bool trans_ORR_r(DisasContext *s, arg_logic_shift *a)
7953 {
7954     /*
7955      * Unshifted ORR and ORN with WZR/XZR is the standard encoding for
7956      * register-register MOV and MVN, so it is worth special casing.
7957      */
7958     if (a->sa == 0 && a->st == 0 && a->rn == 31) {
7959         TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
7960         TCGv_i64 tcg_rm = cpu_reg(s, a->rm);
7961 
7962         if (a->n) {
7963             tcg_gen_not_i64(tcg_rd, tcg_rm);
7964             if (!a->sf) {
7965                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
7966             }
7967         } else {
7968             if (a->sf) {
7969                 tcg_gen_mov_i64(tcg_rd, tcg_rm);
7970             } else {
7971                 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
7972             }
7973         }
7974         return true;
7975     }
7976 
7977     return do_logic_reg(s, a, tcg_gen_or_i64, tcg_gen_orc_i64, false);
7978 }
7979 
7980 TRANS(AND_r, do_logic_reg, a, tcg_gen_and_i64, tcg_gen_andc_i64, false)
7981 TRANS(ANDS_r, do_logic_reg, a, tcg_gen_and_i64, tcg_gen_andc_i64, true)
7982 TRANS(EOR_r, do_logic_reg, a, tcg_gen_xor_i64, tcg_gen_eqv_i64, false)
7983 
7984 static bool do_addsub_ext(DisasContext *s, arg_addsub_ext *a,
7985                           bool sub_op, bool setflags)
7986 {
7987     TCGv_i64 tcg_rm, tcg_rn, tcg_rd, tcg_result;
7988 
7989     if (a->sa > 4) {
7990         return false;
7991     }
7992 
7993     /* non-flag setting ops may use SP */
7994     if (!setflags) {
7995         tcg_rd = cpu_reg_sp(s, a->rd);
7996     } else {
7997         tcg_rd = cpu_reg(s, a->rd);
7998     }
7999     tcg_rn = read_cpu_reg_sp(s, a->rn, a->sf);
8000 
8001     tcg_rm = read_cpu_reg(s, a->rm, a->sf);
8002     ext_and_shift_reg(tcg_rm, tcg_rm, a->st, a->sa);
8003 
8004     tcg_result = tcg_temp_new_i64();
8005     if (!setflags) {
8006         if (sub_op) {
8007             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
8008         } else {
8009             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
8010         }
8011     } else {
8012         if (sub_op) {
8013             gen_sub_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
8014         } else {
8015             gen_add_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
8016         }
8017     }
8018 
8019     if (a->sf) {
8020         tcg_gen_mov_i64(tcg_rd, tcg_result);
8021     } else {
8022         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
8023     }
8024     return true;
8025 }
8026 
8027 TRANS(ADD_ext, do_addsub_ext, a, false, false)
8028 TRANS(SUB_ext, do_addsub_ext, a, true, false)
8029 TRANS(ADDS_ext, do_addsub_ext, a, false, true)
8030 TRANS(SUBS_ext, do_addsub_ext, a, true, true)
8031 
8032 static bool do_addsub_reg(DisasContext *s, arg_addsub_shift *a,
8033                           bool sub_op, bool setflags)
8034 {
8035     TCGv_i64 tcg_rd, tcg_rn, tcg_rm, tcg_result;
8036 
8037     if (a->st == 3 || (!a->sf && (a->sa & 32))) {
8038         return false;
8039     }
8040 
8041     tcg_rd = cpu_reg(s, a->rd);
8042     tcg_rn = read_cpu_reg(s, a->rn, a->sf);
8043     tcg_rm = read_cpu_reg(s, a->rm, a->sf);
8044 
8045     shift_reg_imm(tcg_rm, tcg_rm, a->sf, a->st, a->sa);
8046 
8047     tcg_result = tcg_temp_new_i64();
8048     if (!setflags) {
8049         if (sub_op) {
8050             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
8051         } else {
8052             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
8053         }
8054     } else {
8055         if (sub_op) {
8056             gen_sub_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
8057         } else {
8058             gen_add_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
8059         }
8060     }
8061 
8062     if (a->sf) {
8063         tcg_gen_mov_i64(tcg_rd, tcg_result);
8064     } else {
8065         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
8066     }
8067     return true;
8068 }
8069 
8070 TRANS(ADD_r, do_addsub_reg, a, false, false)
8071 TRANS(SUB_r, do_addsub_reg, a, true, false)
8072 TRANS(ADDS_r, do_addsub_reg, a, false, true)
8073 TRANS(SUBS_r, do_addsub_reg, a, true, true)
8074 
8075 static bool do_mulh(DisasContext *s, arg_rrr *a,
8076                     void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
8077 {
8078     TCGv_i64 discard = tcg_temp_new_i64();
8079     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8080     TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
8081     TCGv_i64 tcg_rm = cpu_reg(s, a->rm);
8082 
8083     fn(discard, tcg_rd, tcg_rn, tcg_rm);
8084     return true;
8085 }
8086 
8087 TRANS(SMULH, do_mulh, a, tcg_gen_muls2_i64)
8088 TRANS(UMULH, do_mulh, a, tcg_gen_mulu2_i64)
8089 
8090 static bool do_muladd(DisasContext *s, arg_rrrr *a,
8091                       bool sf, bool is_sub, MemOp mop)
8092 {
8093     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8094     TCGv_i64 tcg_op1, tcg_op2;
8095 
8096     if (mop == MO_64) {
8097         tcg_op1 = cpu_reg(s, a->rn);
8098         tcg_op2 = cpu_reg(s, a->rm);
8099     } else {
8100         tcg_op1 = tcg_temp_new_i64();
8101         tcg_op2 = tcg_temp_new_i64();
8102         tcg_gen_ext_i64(tcg_op1, cpu_reg(s, a->rn), mop);
8103         tcg_gen_ext_i64(tcg_op2, cpu_reg(s, a->rm), mop);
8104     }
8105 
8106     if (a->ra == 31 && !is_sub) {
8107         /* Special-case MADD with rA == XZR; it is the standard MUL alias */
8108         tcg_gen_mul_i64(tcg_rd, tcg_op1, tcg_op2);
8109     } else {
8110         TCGv_i64 tcg_tmp = tcg_temp_new_i64();
8111         TCGv_i64 tcg_ra = cpu_reg(s, a->ra);
8112 
8113         tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
8114         if (is_sub) {
8115             tcg_gen_sub_i64(tcg_rd, tcg_ra, tcg_tmp);
8116         } else {
8117             tcg_gen_add_i64(tcg_rd, tcg_ra, tcg_tmp);
8118         }
8119     }
8120 
8121     if (!sf) {
8122         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
8123     }
8124     return true;
8125 }
8126 
8127 TRANS(MADD_w, do_muladd, a, false, false, MO_64)
8128 TRANS(MSUB_w, do_muladd, a, false, true, MO_64)
8129 TRANS(MADD_x, do_muladd, a, true, false, MO_64)
8130 TRANS(MSUB_x, do_muladd, a, true, true, MO_64)
8131 
8132 TRANS(SMADDL, do_muladd, a, true, false, MO_SL)
8133 TRANS(SMSUBL, do_muladd, a, true, true, MO_SL)
8134 TRANS(UMADDL, do_muladd, a, true, false, MO_UL)
8135 TRANS(UMSUBL, do_muladd, a, true, true, MO_UL)
8136 
8137 static bool do_adc_sbc(DisasContext *s, arg_rrr_sf *a,
8138                        bool is_sub, bool setflags)
8139 {
8140     TCGv_i64 tcg_y, tcg_rn, tcg_rd;
8141 
8142     tcg_rd = cpu_reg(s, a->rd);
8143     tcg_rn = cpu_reg(s, a->rn);
8144 
8145     if (is_sub) {
8146         tcg_y = tcg_temp_new_i64();
8147         tcg_gen_not_i64(tcg_y, cpu_reg(s, a->rm));
8148     } else {
8149         tcg_y = cpu_reg(s, a->rm);
8150     }
8151 
8152     if (setflags) {
8153         gen_adc_CC(a->sf, tcg_rd, tcg_rn, tcg_y);
8154     } else {
8155         gen_adc(a->sf, tcg_rd, tcg_rn, tcg_y);
8156     }
8157     return true;
8158 }
8159 
8160 TRANS(ADC, do_adc_sbc, a, false, false)
8161 TRANS(SBC, do_adc_sbc, a, true, false)
8162 TRANS(ADCS, do_adc_sbc, a, false, true)
8163 TRANS(SBCS, do_adc_sbc, a, true, true)
8164 
8165 static bool trans_RMIF(DisasContext *s, arg_RMIF *a)
8166 {
8167     int mask = a->mask;
8168     TCGv_i64 tcg_rn;
8169     TCGv_i32 nzcv;
8170 
8171     if (!dc_isar_feature(aa64_condm_4, s)) {
8172         return false;
8173     }
8174 
8175     tcg_rn = read_cpu_reg(s, a->rn, 1);
8176     tcg_gen_rotri_i64(tcg_rn, tcg_rn, a->imm);
8177 
8178     nzcv = tcg_temp_new_i32();
8179     tcg_gen_extrl_i64_i32(nzcv, tcg_rn);
8180 
8181     if (mask & 8) { /* N */
8182         tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3);
8183     }
8184     if (mask & 4) { /* Z */
8185         tcg_gen_not_i32(cpu_ZF, nzcv);
8186         tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4);
8187     }
8188     if (mask & 2) { /* C */
8189         tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1);
8190     }
8191     if (mask & 1) { /* V */
8192         tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0);
8193     }
8194     return true;
8195 }
8196 
8197 static bool do_setf(DisasContext *s, int rn, int shift)
8198 {
8199     TCGv_i32 tmp = tcg_temp_new_i32();
8200 
8201     tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn));
8202     tcg_gen_shli_i32(cpu_NF, tmp, shift);
8203     tcg_gen_shli_i32(cpu_VF, tmp, shift - 1);
8204     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
8205     tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF);
8206     return true;
8207 }
8208 
8209 TRANS_FEAT(SETF8, aa64_condm_4, do_setf, a->rn, 24)
8210 TRANS_FEAT(SETF16, aa64_condm_4, do_setf, a->rn, 16)
8211 
8212 /* CCMP, CCMN */
8213 static bool trans_CCMP(DisasContext *s, arg_CCMP *a)
8214 {
8215     TCGv_i32 tcg_t0 = tcg_temp_new_i32();
8216     TCGv_i32 tcg_t1 = tcg_temp_new_i32();
8217     TCGv_i32 tcg_t2 = tcg_temp_new_i32();
8218     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
8219     TCGv_i64 tcg_rn, tcg_y;
8220     DisasCompare c;
8221     unsigned nzcv;
8222     bool has_andc;
8223 
8224     /* Set T0 = !COND.  */
8225     arm_test_cc(&c, a->cond);
8226     tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
8227 
8228     /* Load the arguments for the new comparison.  */
8229     if (a->imm) {
8230         tcg_y = tcg_constant_i64(a->y);
8231     } else {
8232         tcg_y = cpu_reg(s, a->y);
8233     }
8234     tcg_rn = cpu_reg(s, a->rn);
8235 
8236     /* Set the flags for the new comparison.  */
8237     if (a->op) {
8238         gen_sub_CC(a->sf, tcg_tmp, tcg_rn, tcg_y);
8239     } else {
8240         gen_add_CC(a->sf, tcg_tmp, tcg_rn, tcg_y);
8241     }
8242 
8243     /*
8244      * If COND was false, force the flags to #nzcv.  Compute two masks
8245      * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
8246      * For tcg hosts that support ANDC, we can make do with just T1.
8247      * In either case, allow the tcg optimizer to delete any unused mask.
8248      */
8249     tcg_gen_neg_i32(tcg_t1, tcg_t0);
8250     tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
8251 
8252     nzcv = a->nzcv;
8253     has_andc = tcg_op_supported(INDEX_op_andc_i32, TCG_TYPE_I32, 0);
8254     if (nzcv & 8) { /* N */
8255         tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
8256     } else {
8257         if (has_andc) {
8258             tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
8259         } else {
8260             tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
8261         }
8262     }
8263     if (nzcv & 4) { /* Z */
8264         if (has_andc) {
8265             tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
8266         } else {
8267             tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
8268         }
8269     } else {
8270         tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
8271     }
8272     if (nzcv & 2) { /* C */
8273         tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
8274     } else {
8275         if (has_andc) {
8276             tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
8277         } else {
8278             tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
8279         }
8280     }
8281     if (nzcv & 1) { /* V */
8282         tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
8283     } else {
8284         if (has_andc) {
8285             tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
8286         } else {
8287             tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
8288         }
8289     }
8290     return true;
8291 }
8292 
8293 static bool trans_CSEL(DisasContext *s, arg_CSEL *a)
8294 {
8295     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8296     TCGv_i64 zero = tcg_constant_i64(0);
8297     DisasCompare64 c;
8298 
8299     a64_test_cc(&c, a->cond);
8300 
8301     if (a->rn == 31 && a->rm == 31 && (a->else_inc ^ a->else_inv)) {
8302         /* CSET & CSETM.  */
8303         if (a->else_inv) {
8304             tcg_gen_negsetcond_i64(tcg_invert_cond(c.cond),
8305                                    tcg_rd, c.value, zero);
8306         } else {
8307             tcg_gen_setcond_i64(tcg_invert_cond(c.cond),
8308                                 tcg_rd, c.value, zero);
8309         }
8310     } else {
8311         TCGv_i64 t_true = cpu_reg(s, a->rn);
8312         TCGv_i64 t_false = read_cpu_reg(s, a->rm, 1);
8313 
8314         if (a->else_inv && a->else_inc) {
8315             tcg_gen_neg_i64(t_false, t_false);
8316         } else if (a->else_inv) {
8317             tcg_gen_not_i64(t_false, t_false);
8318         } else if (a->else_inc) {
8319             tcg_gen_addi_i64(t_false, t_false, 1);
8320         }
8321         tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
8322     }
8323 
8324     if (!a->sf) {
8325         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
8326     }
8327     return true;
8328 }
8329 
8330 typedef struct FPScalar1Int {
8331     void (*gen_h)(TCGv_i32, TCGv_i32);
8332     void (*gen_s)(TCGv_i32, TCGv_i32);
8333     void (*gen_d)(TCGv_i64, TCGv_i64);
8334 } FPScalar1Int;
8335 
8336 static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a,
8337                               const FPScalar1Int *f)
8338 {
8339     switch (a->esz) {
8340     case MO_64:
8341         if (fp_access_check(s)) {
8342             TCGv_i64 t = read_fp_dreg(s, a->rn);
8343             f->gen_d(t, t);
8344             write_fp_dreg(s, a->rd, t);
8345         }
8346         break;
8347     case MO_32:
8348         if (fp_access_check(s)) {
8349             TCGv_i32 t = read_fp_sreg(s, a->rn);
8350             f->gen_s(t, t);
8351             write_fp_sreg(s, a->rd, t);
8352         }
8353         break;
8354     case MO_16:
8355         if (!dc_isar_feature(aa64_fp16, s)) {
8356             return false;
8357         }
8358         if (fp_access_check(s)) {
8359             TCGv_i32 t = read_fp_hreg(s, a->rn);
8360             f->gen_h(t, t);
8361             write_fp_sreg(s, a->rd, t);
8362         }
8363         break;
8364     default:
8365         return false;
8366     }
8367     return true;
8368 }
8369 
8370 static const FPScalar1Int f_scalar_fmov = {
8371     tcg_gen_mov_i32,
8372     tcg_gen_mov_i32,
8373     tcg_gen_mov_i64,
8374 };
8375 TRANS(FMOV_s, do_fp1_scalar_int, a, &f_scalar_fmov)
8376 
8377 static const FPScalar1Int f_scalar_fabs = {
8378     gen_vfp_absh,
8379     gen_vfp_abss,
8380     gen_vfp_absd,
8381 };
8382 TRANS(FABS_s, do_fp1_scalar_int, a, &f_scalar_fabs)
8383 
8384 static const FPScalar1Int f_scalar_fneg = {
8385     gen_vfp_negh,
8386     gen_vfp_negs,
8387     gen_vfp_negd,
8388 };
8389 TRANS(FNEG_s, do_fp1_scalar_int, a, &f_scalar_fneg)
8390 
8391 typedef struct FPScalar1 {
8392     void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_ptr);
8393     void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_ptr);
8394     void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_ptr);
8395 } FPScalar1;
8396 
8397 static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a,
8398                           const FPScalar1 *f, int rmode)
8399 {
8400     TCGv_i32 tcg_rmode = NULL;
8401     TCGv_ptr fpst;
8402     TCGv_i64 t64;
8403     TCGv_i32 t32;
8404     int check = fp_access_check_scalar_hsd(s, a->esz);
8405 
8406     if (check <= 0) {
8407         return check == 0;
8408     }
8409 
8410     fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
8411     if (rmode >= 0) {
8412         tcg_rmode = gen_set_rmode(rmode, fpst);
8413     }
8414 
8415     switch (a->esz) {
8416     case MO_64:
8417         t64 = read_fp_dreg(s, a->rn);
8418         f->gen_d(t64, t64, fpst);
8419         write_fp_dreg(s, a->rd, t64);
8420         break;
8421     case MO_32:
8422         t32 = read_fp_sreg(s, a->rn);
8423         f->gen_s(t32, t32, fpst);
8424         write_fp_sreg(s, a->rd, t32);
8425         break;
8426     case MO_16:
8427         t32 = read_fp_hreg(s, a->rn);
8428         f->gen_h(t32, t32, fpst);
8429         write_fp_sreg(s, a->rd, t32);
8430         break;
8431     default:
8432         g_assert_not_reached();
8433     }
8434 
8435     if (rmode >= 0) {
8436         gen_restore_rmode(tcg_rmode, fpst);
8437     }
8438     return true;
8439 }
8440 
8441 static const FPScalar1 f_scalar_fsqrt = {
8442     gen_helper_vfp_sqrth,
8443     gen_helper_vfp_sqrts,
8444     gen_helper_vfp_sqrtd,
8445 };
8446 TRANS(FSQRT_s, do_fp1_scalar, a, &f_scalar_fsqrt, -1)
8447 
8448 static const FPScalar1 f_scalar_frint = {
8449     gen_helper_advsimd_rinth,
8450     gen_helper_rints,
8451     gen_helper_rintd,
8452 };
8453 TRANS(FRINTN_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_TIEEVEN)
8454 TRANS(FRINTP_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_POSINF)
8455 TRANS(FRINTM_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_NEGINF)
8456 TRANS(FRINTZ_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_ZERO)
8457 TRANS(FRINTA_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_TIEAWAY)
8458 TRANS(FRINTI_s, do_fp1_scalar, a, &f_scalar_frint, -1)
8459 
8460 static const FPScalar1 f_scalar_frintx = {
8461     gen_helper_advsimd_rinth_exact,
8462     gen_helper_rints_exact,
8463     gen_helper_rintd_exact,
8464 };
8465 TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1)
8466 
8467 static const FPScalar1 f_scalar_bfcvt = {
8468     .gen_s = gen_helper_bfcvt,
8469 };
8470 TRANS_FEAT(BFCVT_s, aa64_bf16, do_fp1_scalar, a, &f_scalar_bfcvt, -1)
8471 
8472 static const FPScalar1 f_scalar_frint32 = {
8473     NULL,
8474     gen_helper_frint32_s,
8475     gen_helper_frint32_d,
8476 };
8477 TRANS_FEAT(FRINT32Z_s, aa64_frint, do_fp1_scalar, a,
8478            &f_scalar_frint32, FPROUNDING_ZERO)
8479 TRANS_FEAT(FRINT32X_s, aa64_frint, do_fp1_scalar, a, &f_scalar_frint32, -1)
8480 
8481 static const FPScalar1 f_scalar_frint64 = {
8482     NULL,
8483     gen_helper_frint64_s,
8484     gen_helper_frint64_d,
8485 };
8486 TRANS_FEAT(FRINT64Z_s, aa64_frint, do_fp1_scalar, a,
8487            &f_scalar_frint64, FPROUNDING_ZERO)
8488 TRANS_FEAT(FRINT64X_s, aa64_frint, do_fp1_scalar, a, &f_scalar_frint64, -1)
8489 
8490 static const FPScalar1 f_scalar_frecpe = {
8491     gen_helper_recpe_f16,
8492     gen_helper_recpe_f32,
8493     gen_helper_recpe_f64,
8494 };
8495 TRANS(FRECPE_s, do_fp1_scalar, a, &f_scalar_frecpe, -1)
8496 
8497 static const FPScalar1 f_scalar_frecpx = {
8498     gen_helper_frecpx_f16,
8499     gen_helper_frecpx_f32,
8500     gen_helper_frecpx_f64,
8501 };
8502 TRANS(FRECPX_s, do_fp1_scalar, a, &f_scalar_frecpx, -1)
8503 
8504 static const FPScalar1 f_scalar_frsqrte = {
8505     gen_helper_rsqrte_f16,
8506     gen_helper_rsqrte_f32,
8507     gen_helper_rsqrte_f64,
8508 };
8509 TRANS(FRSQRTE_s, do_fp1_scalar, a, &f_scalar_frsqrte, -1)
8510 
8511 static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a)
8512 {
8513     if (fp_access_check(s)) {
8514         TCGv_i32 tcg_rn = read_fp_sreg(s, a->rn);
8515         TCGv_i64 tcg_rd = tcg_temp_new_i64();
8516         TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
8517 
8518         gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, fpst);
8519         write_fp_dreg(s, a->rd, tcg_rd);
8520     }
8521     return true;
8522 }
8523 
8524 static bool trans_FCVT_s_hs(DisasContext *s, arg_rr *a)
8525 {
8526     if (fp_access_check(s)) {
8527         TCGv_i32 tmp = read_fp_sreg(s, a->rn);
8528         TCGv_i32 ahp = get_ahp_flag();
8529         TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
8530 
8531         gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
8532         /* write_fp_sreg is OK here because top half of result is zero */
8533         write_fp_sreg(s, a->rd, tmp);
8534     }
8535     return true;
8536 }
8537 
8538 static bool trans_FCVT_s_sd(DisasContext *s, arg_rr *a)
8539 {
8540     if (fp_access_check(s)) {
8541         TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn);
8542         TCGv_i32 tcg_rd = tcg_temp_new_i32();
8543         TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
8544 
8545         gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, fpst);
8546         write_fp_sreg(s, a->rd, tcg_rd);
8547     }
8548     return true;
8549 }
8550 
8551 static bool trans_FCVT_s_hd(DisasContext *s, arg_rr *a)
8552 {
8553     if (fp_access_check(s)) {
8554         TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn);
8555         TCGv_i32 tcg_rd = tcg_temp_new_i32();
8556         TCGv_i32 ahp = get_ahp_flag();
8557         TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
8558 
8559         gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
8560         /* write_fp_sreg is OK here because top half of tcg_rd is zero */
8561         write_fp_sreg(s, a->rd, tcg_rd);
8562     }
8563     return true;
8564 }
8565 
8566 static bool trans_FCVT_s_sh(DisasContext *s, arg_rr *a)
8567 {
8568     if (fp_access_check(s)) {
8569         TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn);
8570         TCGv_i32 tcg_rd = tcg_temp_new_i32();
8571         TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16);
8572         TCGv_i32 tcg_ahp = get_ahp_flag();
8573 
8574         gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
8575         write_fp_sreg(s, a->rd, tcg_rd);
8576     }
8577     return true;
8578 }
8579 
8580 static bool trans_FCVT_s_dh(DisasContext *s, arg_rr *a)
8581 {
8582     if (fp_access_check(s)) {
8583         TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn);
8584         TCGv_i64 tcg_rd = tcg_temp_new_i64();
8585         TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16);
8586         TCGv_i32 tcg_ahp = get_ahp_flag();
8587 
8588         gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
8589         write_fp_dreg(s, a->rd, tcg_rd);
8590     }
8591     return true;
8592 }
8593 
8594 static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift,
8595                            TCGv_i64 tcg_int, bool is_signed)
8596 {
8597     TCGv_ptr tcg_fpstatus;
8598     TCGv_i32 tcg_shift, tcg_single;
8599     TCGv_i64 tcg_double;
8600 
8601     tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
8602     tcg_shift = tcg_constant_i32(shift);
8603 
8604     switch (esz) {
8605     case MO_64:
8606         tcg_double = tcg_temp_new_i64();
8607         if (is_signed) {
8608             gen_helper_vfp_sqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus);
8609         } else {
8610             gen_helper_vfp_uqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus);
8611         }
8612         write_fp_dreg(s, rd, tcg_double);
8613         break;
8614 
8615     case MO_32:
8616         tcg_single = tcg_temp_new_i32();
8617         if (is_signed) {
8618             gen_helper_vfp_sqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
8619         } else {
8620             gen_helper_vfp_uqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
8621         }
8622         write_fp_sreg(s, rd, tcg_single);
8623         break;
8624 
8625     case MO_16:
8626         tcg_single = tcg_temp_new_i32();
8627         if (is_signed) {
8628             gen_helper_vfp_sqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
8629         } else {
8630             gen_helper_vfp_uqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
8631         }
8632         write_fp_sreg(s, rd, tcg_single);
8633         break;
8634 
8635     default:
8636         g_assert_not_reached();
8637     }
8638     return true;
8639 }
8640 
8641 static bool do_cvtf_g(DisasContext *s, arg_fcvt *a, bool is_signed)
8642 {
8643     TCGv_i64 tcg_int;
8644     int check = fp_access_check_scalar_hsd(s, a->esz);
8645 
8646     if (check <= 0) {
8647         return check == 0;
8648     }
8649 
8650     if (a->sf) {
8651         tcg_int = cpu_reg(s, a->rn);
8652     } else {
8653         tcg_int = read_cpu_reg(s, a->rn, true);
8654         if (is_signed) {
8655             tcg_gen_ext32s_i64(tcg_int, tcg_int);
8656         } else {
8657             tcg_gen_ext32u_i64(tcg_int, tcg_int);
8658         }
8659     }
8660     return do_cvtf_scalar(s, a->esz, a->rd, a->shift, tcg_int, is_signed);
8661 }
8662 
8663 TRANS(SCVTF_g, do_cvtf_g, a, true)
8664 TRANS(UCVTF_g, do_cvtf_g, a, false)
8665 
8666 /*
8667  * [US]CVTF (vector), scalar version.
8668  * Which sounds weird, but really just means input from fp register
8669  * instead of input from general register.  Input and output element
8670  * size are always equal.
8671  */
8672 static bool do_cvtf_f(DisasContext *s, arg_fcvt *a, bool is_signed)
8673 {
8674     TCGv_i64 tcg_int;
8675     int check = fp_access_check_scalar_hsd(s, a->esz);
8676 
8677     if (check <= 0) {
8678         return check == 0;
8679     }
8680 
8681     tcg_int = tcg_temp_new_i64();
8682     read_vec_element(s, tcg_int, a->rn, 0, a->esz | (is_signed ? MO_SIGN : 0));
8683     return do_cvtf_scalar(s, a->esz, a->rd, a->shift, tcg_int, is_signed);
8684 }
8685 
8686 TRANS(SCVTF_f, do_cvtf_f, a, true)
8687 TRANS(UCVTF_f, do_cvtf_f, a, false)
8688 
8689 static void do_fcvt_scalar(DisasContext *s, MemOp out, MemOp esz,
8690                            TCGv_i64 tcg_out, int shift, int rn,
8691                            ARMFPRounding rmode)
8692 {
8693     TCGv_ptr tcg_fpstatus;
8694     TCGv_i32 tcg_shift, tcg_rmode, tcg_single;
8695 
8696     tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
8697     tcg_shift = tcg_constant_i32(shift);
8698     tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
8699 
8700     switch (esz) {
8701     case MO_64:
8702         read_vec_element(s, tcg_out, rn, 0, MO_64);
8703         switch (out) {
8704         case MO_64 | MO_SIGN:
8705             gen_helper_vfp_tosqd(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
8706             break;
8707         case MO_64:
8708             gen_helper_vfp_touqd(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
8709             break;
8710         case MO_32 | MO_SIGN:
8711             gen_helper_vfp_tosld(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
8712             break;
8713         case MO_32:
8714             gen_helper_vfp_tould(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
8715             break;
8716         default:
8717             g_assert_not_reached();
8718         }
8719         break;
8720 
8721     case MO_32:
8722         tcg_single = read_fp_sreg(s, rn);
8723         switch (out) {
8724         case MO_64 | MO_SIGN:
8725             gen_helper_vfp_tosqs(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
8726             break;
8727         case MO_64:
8728             gen_helper_vfp_touqs(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
8729             break;
8730         case MO_32 | MO_SIGN:
8731             gen_helper_vfp_tosls(tcg_single, tcg_single,
8732                                  tcg_shift, tcg_fpstatus);
8733             tcg_gen_extu_i32_i64(tcg_out, tcg_single);
8734             break;
8735         case MO_32:
8736             gen_helper_vfp_touls(tcg_single, tcg_single,
8737                                  tcg_shift, tcg_fpstatus);
8738             tcg_gen_extu_i32_i64(tcg_out, tcg_single);
8739             break;
8740         default:
8741             g_assert_not_reached();
8742         }
8743         break;
8744 
8745     case MO_16:
8746         tcg_single = read_fp_hreg(s, rn);
8747         switch (out) {
8748         case MO_64 | MO_SIGN:
8749             gen_helper_vfp_tosqh(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
8750             break;
8751         case MO_64:
8752             gen_helper_vfp_touqh(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
8753             break;
8754         case MO_32 | MO_SIGN:
8755             gen_helper_vfp_toslh(tcg_single, tcg_single,
8756                                  tcg_shift, tcg_fpstatus);
8757             tcg_gen_extu_i32_i64(tcg_out, tcg_single);
8758             break;
8759         case MO_32:
8760             gen_helper_vfp_toulh(tcg_single, tcg_single,
8761                                  tcg_shift, tcg_fpstatus);
8762             tcg_gen_extu_i32_i64(tcg_out, tcg_single);
8763             break;
8764         case MO_16 | MO_SIGN:
8765             gen_helper_vfp_toshh(tcg_single, tcg_single,
8766                                  tcg_shift, tcg_fpstatus);
8767             tcg_gen_extu_i32_i64(tcg_out, tcg_single);
8768             break;
8769         case MO_16:
8770             gen_helper_vfp_touhh(tcg_single, tcg_single,
8771                                  tcg_shift, tcg_fpstatus);
8772             tcg_gen_extu_i32_i64(tcg_out, tcg_single);
8773             break;
8774         default:
8775             g_assert_not_reached();
8776         }
8777         break;
8778 
8779     default:
8780         g_assert_not_reached();
8781     }
8782 
8783     gen_restore_rmode(tcg_rmode, tcg_fpstatus);
8784 }
8785 
8786 static bool do_fcvt_g(DisasContext *s, arg_fcvt *a,
8787                       ARMFPRounding rmode, bool is_signed)
8788 {
8789     TCGv_i64 tcg_int;
8790     int check = fp_access_check_scalar_hsd(s, a->esz);
8791 
8792     if (check <= 0) {
8793         return check == 0;
8794     }
8795 
8796     tcg_int = cpu_reg(s, a->rd);
8797     do_fcvt_scalar(s, (a->sf ? MO_64 : MO_32) | (is_signed ? MO_SIGN : 0),
8798                    a->esz, tcg_int, a->shift, a->rn, rmode);
8799 
8800     if (!a->sf) {
8801         tcg_gen_ext32u_i64(tcg_int, tcg_int);
8802     }
8803     return true;
8804 }
8805 
8806 TRANS(FCVTNS_g, do_fcvt_g, a, FPROUNDING_TIEEVEN, true)
8807 TRANS(FCVTNU_g, do_fcvt_g, a, FPROUNDING_TIEEVEN, false)
8808 TRANS(FCVTPS_g, do_fcvt_g, a, FPROUNDING_POSINF, true)
8809 TRANS(FCVTPU_g, do_fcvt_g, a, FPROUNDING_POSINF, false)
8810 TRANS(FCVTMS_g, do_fcvt_g, a, FPROUNDING_NEGINF, true)
8811 TRANS(FCVTMU_g, do_fcvt_g, a, FPROUNDING_NEGINF, false)
8812 TRANS(FCVTZS_g, do_fcvt_g, a, FPROUNDING_ZERO, true)
8813 TRANS(FCVTZU_g, do_fcvt_g, a, FPROUNDING_ZERO, false)
8814 TRANS(FCVTAS_g, do_fcvt_g, a, FPROUNDING_TIEAWAY, true)
8815 TRANS(FCVTAU_g, do_fcvt_g, a, FPROUNDING_TIEAWAY, false)
8816 
8817 /*
8818  * FCVT* (vector), scalar version.
8819  * Which sounds weird, but really just means output to fp register
8820  * instead of output to general register.  Input and output element
8821  * size are always equal.
8822  */
8823 static bool do_fcvt_f(DisasContext *s, arg_fcvt *a,
8824                       ARMFPRounding rmode, bool is_signed)
8825 {
8826     TCGv_i64 tcg_int;
8827     int check = fp_access_check_scalar_hsd(s, a->esz);
8828 
8829     if (check <= 0) {
8830         return check == 0;
8831     }
8832 
8833     tcg_int = tcg_temp_new_i64();
8834     do_fcvt_scalar(s, a->esz | (is_signed ? MO_SIGN : 0),
8835                    a->esz, tcg_int, a->shift, a->rn, rmode);
8836 
8837     clear_vec(s, a->rd);
8838     write_vec_element(s, tcg_int, a->rd, 0, a->esz);
8839     return true;
8840 }
8841 
8842 TRANS(FCVTNS_f, do_fcvt_f, a, FPROUNDING_TIEEVEN, true)
8843 TRANS(FCVTNU_f, do_fcvt_f, a, FPROUNDING_TIEEVEN, false)
8844 TRANS(FCVTPS_f, do_fcvt_f, a, FPROUNDING_POSINF, true)
8845 TRANS(FCVTPU_f, do_fcvt_f, a, FPROUNDING_POSINF, false)
8846 TRANS(FCVTMS_f, do_fcvt_f, a, FPROUNDING_NEGINF, true)
8847 TRANS(FCVTMU_f, do_fcvt_f, a, FPROUNDING_NEGINF, false)
8848 TRANS(FCVTZS_f, do_fcvt_f, a, FPROUNDING_ZERO, true)
8849 TRANS(FCVTZU_f, do_fcvt_f, a, FPROUNDING_ZERO, false)
8850 TRANS(FCVTAS_f, do_fcvt_f, a, FPROUNDING_TIEAWAY, true)
8851 TRANS(FCVTAU_f, do_fcvt_f, a, FPROUNDING_TIEAWAY, false)
8852 
8853 static bool trans_FJCVTZS(DisasContext *s, arg_FJCVTZS *a)
8854 {
8855     if (!dc_isar_feature(aa64_jscvt, s)) {
8856         return false;
8857     }
8858     if (fp_access_check(s)) {
8859         TCGv_i64 t = read_fp_dreg(s, a->rn);
8860         TCGv_ptr fpstatus = fpstatus_ptr(FPST_A64);
8861 
8862         gen_helper_fjcvtzs(t, t, fpstatus);
8863 
8864         tcg_gen_ext32u_i64(cpu_reg(s, a->rd), t);
8865         tcg_gen_extrh_i64_i32(cpu_ZF, t);
8866         tcg_gen_movi_i32(cpu_CF, 0);
8867         tcg_gen_movi_i32(cpu_NF, 0);
8868         tcg_gen_movi_i32(cpu_VF, 0);
8869     }
8870     return true;
8871 }
8872 
8873 static bool trans_FMOV_hx(DisasContext *s, arg_rr *a)
8874 {
8875     if (!dc_isar_feature(aa64_fp16, s)) {
8876         return false;
8877     }
8878     if (fp_access_check(s)) {
8879         TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
8880         TCGv_i64 tmp = tcg_temp_new_i64();
8881         tcg_gen_ext16u_i64(tmp, tcg_rn);
8882         write_fp_dreg(s, a->rd, tmp);
8883     }
8884     return true;
8885 }
8886 
8887 static bool trans_FMOV_sw(DisasContext *s, arg_rr *a)
8888 {
8889     if (fp_access_check(s)) {
8890         TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
8891         TCGv_i64 tmp = tcg_temp_new_i64();
8892         tcg_gen_ext32u_i64(tmp, tcg_rn);
8893         write_fp_dreg(s, a->rd, tmp);
8894     }
8895     return true;
8896 }
8897 
8898 static bool trans_FMOV_dx(DisasContext *s, arg_rr *a)
8899 {
8900     if (fp_access_check(s)) {
8901         TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
8902         write_fp_dreg(s, a->rd, tcg_rn);
8903     }
8904     return true;
8905 }
8906 
8907 static bool trans_FMOV_ux(DisasContext *s, arg_rr *a)
8908 {
8909     if (fp_access_check(s)) {
8910         TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
8911         tcg_gen_st_i64(tcg_rn, tcg_env, fp_reg_hi_offset(s, a->rd));
8912         clear_vec_high(s, true, a->rd);
8913     }
8914     return true;
8915 }
8916 
8917 static bool trans_FMOV_xh(DisasContext *s, arg_rr *a)
8918 {
8919     if (!dc_isar_feature(aa64_fp16, s)) {
8920         return false;
8921     }
8922     if (fp_access_check(s)) {
8923         TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8924         tcg_gen_ld16u_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_16));
8925     }
8926     return true;
8927 }
8928 
8929 static bool trans_FMOV_ws(DisasContext *s, arg_rr *a)
8930 {
8931     if (fp_access_check(s)) {
8932         TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8933         tcg_gen_ld32u_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_32));
8934     }
8935     return true;
8936 }
8937 
8938 static bool trans_FMOV_xd(DisasContext *s, arg_rr *a)
8939 {
8940     if (fp_access_check(s)) {
8941         TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8942         tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_64));
8943     }
8944     return true;
8945 }
8946 
8947 static bool trans_FMOV_xu(DisasContext *s, arg_rr *a)
8948 {
8949     if (fp_access_check(s)) {
8950         TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8951         tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_hi_offset(s, a->rn));
8952     }
8953     return true;
8954 }
8955 
8956 typedef struct ENVScalar1 {
8957     NeonGenOneOpEnvFn *gen_bhs[3];
8958     NeonGenOne64OpEnvFn *gen_d;
8959 } ENVScalar1;
8960 
8961 static bool do_env_scalar1(DisasContext *s, arg_rr_e *a, const ENVScalar1 *f)
8962 {
8963     if (!fp_access_check(s)) {
8964         return true;
8965     }
8966     if (a->esz == MO_64) {
8967         TCGv_i64 t = read_fp_dreg(s, a->rn);
8968         f->gen_d(t, tcg_env, t);
8969         write_fp_dreg(s, a->rd, t);
8970     } else {
8971         TCGv_i32 t = tcg_temp_new_i32();
8972 
8973         read_vec_element_i32(s, t, a->rn, 0, a->esz);
8974         f->gen_bhs[a->esz](t, tcg_env, t);
8975         write_fp_sreg(s, a->rd, t);
8976     }
8977     return true;
8978 }
8979 
8980 static bool do_env_vector1(DisasContext *s, arg_qrr_e *a, const ENVScalar1 *f)
8981 {
8982     if (a->esz == MO_64 && !a->q) {
8983         return false;
8984     }
8985     if (!fp_access_check(s)) {
8986         return true;
8987     }
8988     if (a->esz == MO_64) {
8989         TCGv_i64 t = tcg_temp_new_i64();
8990 
8991         for (int i = 0; i < 2; ++i) {
8992             read_vec_element(s, t, a->rn, i, MO_64);
8993             f->gen_d(t, tcg_env, t);
8994             write_vec_element(s, t, a->rd, i, MO_64);
8995         }
8996     } else {
8997         TCGv_i32 t = tcg_temp_new_i32();
8998         int n = (a->q ? 16 : 8) >> a->esz;
8999 
9000         for (int i = 0; i < n; ++i) {
9001             read_vec_element_i32(s, t, a->rn, i, a->esz);
9002             f->gen_bhs[a->esz](t, tcg_env, t);
9003             write_vec_element_i32(s, t, a->rd, i, a->esz);
9004         }
9005     }
9006     clear_vec_high(s, a->q, a->rd);
9007     return true;
9008 }
9009 
9010 static const ENVScalar1 f_scalar_sqabs = {
9011     { gen_helper_neon_qabs_s8,
9012       gen_helper_neon_qabs_s16,
9013       gen_helper_neon_qabs_s32 },
9014     gen_helper_neon_qabs_s64,
9015 };
9016 TRANS(SQABS_s, do_env_scalar1, a, &f_scalar_sqabs)
9017 TRANS(SQABS_v, do_env_vector1, a, &f_scalar_sqabs)
9018 
9019 static const ENVScalar1 f_scalar_sqneg = {
9020     { gen_helper_neon_qneg_s8,
9021       gen_helper_neon_qneg_s16,
9022       gen_helper_neon_qneg_s32 },
9023     gen_helper_neon_qneg_s64,
9024 };
9025 TRANS(SQNEG_s, do_env_scalar1, a, &f_scalar_sqneg)
9026 TRANS(SQNEG_v, do_env_vector1, a, &f_scalar_sqneg)
9027 
9028 static bool do_scalar1_d(DisasContext *s, arg_rr *a, ArithOneOp *f)
9029 {
9030     if (fp_access_check(s)) {
9031         TCGv_i64 t = read_fp_dreg(s, a->rn);
9032         f(t, t);
9033         write_fp_dreg(s, a->rd, t);
9034     }
9035     return true;
9036 }
9037 
9038 TRANS(ABS_s, do_scalar1_d, a, tcg_gen_abs_i64)
9039 TRANS(NEG_s, do_scalar1_d, a, tcg_gen_neg_i64)
9040 
9041 static bool do_cmop0_d(DisasContext *s, arg_rr *a, TCGCond cond)
9042 {
9043     if (fp_access_check(s)) {
9044         TCGv_i64 t = read_fp_dreg(s, a->rn);
9045         tcg_gen_negsetcond_i64(cond, t, t, tcg_constant_i64(0));
9046         write_fp_dreg(s, a->rd, t);
9047     }
9048     return true;
9049 }
9050 
9051 TRANS(CMGT0_s, do_cmop0_d, a, TCG_COND_GT)
9052 TRANS(CMGE0_s, do_cmop0_d, a, TCG_COND_GE)
9053 TRANS(CMLE0_s, do_cmop0_d, a, TCG_COND_LE)
9054 TRANS(CMLT0_s, do_cmop0_d, a, TCG_COND_LT)
9055 TRANS(CMEQ0_s, do_cmop0_d, a, TCG_COND_EQ)
9056 
9057 static bool do_2misc_narrow_scalar(DisasContext *s, arg_rr_e *a,
9058                                    ArithOneOp * const fn[3])
9059 {
9060     if (a->esz == MO_64) {
9061         return false;
9062     }
9063     if (fp_access_check(s)) {
9064         TCGv_i64 t = tcg_temp_new_i64();
9065 
9066         read_vec_element(s, t, a->rn, 0, a->esz + 1);
9067         fn[a->esz](t, t);
9068         clear_vec(s, a->rd);
9069         write_vec_element(s, t, a->rd, 0, a->esz);
9070     }
9071     return true;
9072 }
9073 
9074 #define WRAP_ENV(NAME) \
9075     static void gen_##NAME(TCGv_i64 d, TCGv_i64 n) \
9076     { gen_helper_##NAME(d, tcg_env, n); }
9077 
9078 WRAP_ENV(neon_unarrow_sat8)
9079 WRAP_ENV(neon_unarrow_sat16)
9080 WRAP_ENV(neon_unarrow_sat32)
9081 
9082 static ArithOneOp * const f_scalar_sqxtun[] = {
9083     gen_neon_unarrow_sat8,
9084     gen_neon_unarrow_sat16,
9085     gen_neon_unarrow_sat32,
9086 };
9087 TRANS(SQXTUN_s, do_2misc_narrow_scalar, a, f_scalar_sqxtun)
9088 
9089 WRAP_ENV(neon_narrow_sat_s8)
9090 WRAP_ENV(neon_narrow_sat_s16)
9091 WRAP_ENV(neon_narrow_sat_s32)
9092 
9093 static ArithOneOp * const f_scalar_sqxtn[] = {
9094     gen_neon_narrow_sat_s8,
9095     gen_neon_narrow_sat_s16,
9096     gen_neon_narrow_sat_s32,
9097 };
9098 TRANS(SQXTN_s, do_2misc_narrow_scalar, a, f_scalar_sqxtn)
9099 
9100 WRAP_ENV(neon_narrow_sat_u8)
9101 WRAP_ENV(neon_narrow_sat_u16)
9102 WRAP_ENV(neon_narrow_sat_u32)
9103 
9104 static ArithOneOp * const f_scalar_uqxtn[] = {
9105     gen_neon_narrow_sat_u8,
9106     gen_neon_narrow_sat_u16,
9107     gen_neon_narrow_sat_u32,
9108 };
9109 TRANS(UQXTN_s, do_2misc_narrow_scalar, a, f_scalar_uqxtn)
9110 
9111 static void gen_fcvtxn_sd(TCGv_i64 d, TCGv_i64 n)
9112 {
9113     /*
9114      * 64 bit to 32 bit float conversion
9115      * with von Neumann rounding (round to odd)
9116      */
9117     TCGv_i32 tmp = tcg_temp_new_i32();
9118     gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_A64));
9119     tcg_gen_extu_i32_i64(d, tmp);
9120 }
9121 
9122 static ArithOneOp * const f_scalar_fcvtxn[] = {
9123     NULL,
9124     NULL,
9125     gen_fcvtxn_sd,
9126 };
9127 TRANS(FCVTXN_s, do_2misc_narrow_scalar, a, f_scalar_fcvtxn)
9128 
9129 #undef WRAP_ENV
9130 
9131 static bool do_gvec_fn2(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn)
9132 {
9133     if (!a->q && a->esz == MO_64) {
9134         return false;
9135     }
9136     if (fp_access_check(s)) {
9137         gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz);
9138     }
9139     return true;
9140 }
9141 
9142 TRANS(ABS_v, do_gvec_fn2, a, tcg_gen_gvec_abs)
9143 TRANS(NEG_v, do_gvec_fn2, a, tcg_gen_gvec_neg)
9144 TRANS(NOT_v, do_gvec_fn2, a, tcg_gen_gvec_not)
9145 TRANS(CNT_v, do_gvec_fn2, a, gen_gvec_cnt)
9146 TRANS(RBIT_v, do_gvec_fn2, a, gen_gvec_rbit)
9147 TRANS(CMGT0_v, do_gvec_fn2, a, gen_gvec_cgt0)
9148 TRANS(CMGE0_v, do_gvec_fn2, a, gen_gvec_cge0)
9149 TRANS(CMLT0_v, do_gvec_fn2, a, gen_gvec_clt0)
9150 TRANS(CMLE0_v, do_gvec_fn2, a, gen_gvec_cle0)
9151 TRANS(CMEQ0_v, do_gvec_fn2, a, gen_gvec_ceq0)
9152 TRANS(REV16_v, do_gvec_fn2, a, gen_gvec_rev16)
9153 TRANS(REV32_v, do_gvec_fn2, a, gen_gvec_rev32)
9154 TRANS(URECPE_v, do_gvec_fn2, a, gen_gvec_urecpe)
9155 TRANS(URSQRTE_v, do_gvec_fn2, a, gen_gvec_ursqrte)
9156 
9157 static bool do_gvec_fn2_bhs(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn)
9158 {
9159     if (a->esz == MO_64) {
9160         return false;
9161     }
9162     if (fp_access_check(s)) {
9163         gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz);
9164     }
9165     return true;
9166 }
9167 
9168 TRANS(CLS_v, do_gvec_fn2_bhs, a, gen_gvec_cls)
9169 TRANS(CLZ_v, do_gvec_fn2_bhs, a, gen_gvec_clz)
9170 TRANS(REV64_v, do_gvec_fn2_bhs, a, gen_gvec_rev64)
9171 TRANS(SADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_saddlp)
9172 TRANS(UADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_uaddlp)
9173 TRANS(SADALP_v, do_gvec_fn2_bhs, a, gen_gvec_sadalp)
9174 TRANS(UADALP_v, do_gvec_fn2_bhs, a, gen_gvec_uadalp)
9175 
9176 static bool do_2misc_narrow_vector(DisasContext *s, arg_qrr_e *a,
9177                                    ArithOneOp * const fn[3])
9178 {
9179     if (a->esz == MO_64) {
9180         return false;
9181     }
9182     if (fp_access_check(s)) {
9183         TCGv_i64 t0 = tcg_temp_new_i64();
9184         TCGv_i64 t1 = tcg_temp_new_i64();
9185 
9186         read_vec_element(s, t0, a->rn, 0, MO_64);
9187         read_vec_element(s, t1, a->rn, 1, MO_64);
9188         fn[a->esz](t0, t0);
9189         fn[a->esz](t1, t1);
9190         write_vec_element(s, t0, a->rd, a->q ? 2 : 0, MO_32);
9191         write_vec_element(s, t1, a->rd, a->q ? 3 : 1, MO_32);
9192         clear_vec_high(s, a->q, a->rd);
9193     }
9194     return true;
9195 }
9196 
9197 static ArithOneOp * const f_scalar_xtn[] = {
9198     gen_helper_neon_narrow_u8,
9199     gen_helper_neon_narrow_u16,
9200     tcg_gen_ext32u_i64,
9201 };
9202 TRANS(XTN, do_2misc_narrow_vector, a, f_scalar_xtn)
9203 TRANS(SQXTUN_v, do_2misc_narrow_vector, a, f_scalar_sqxtun)
9204 TRANS(SQXTN_v, do_2misc_narrow_vector, a, f_scalar_sqxtn)
9205 TRANS(UQXTN_v, do_2misc_narrow_vector, a, f_scalar_uqxtn)
9206 
9207 static void gen_fcvtn_hs(TCGv_i64 d, TCGv_i64 n)
9208 {
9209     TCGv_i32 tcg_lo = tcg_temp_new_i32();
9210     TCGv_i32 tcg_hi = tcg_temp_new_i32();
9211     TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
9212     TCGv_i32 ahp = get_ahp_flag();
9213 
9214     tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, n);
9215     gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp);
9216     gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp);
9217     tcg_gen_deposit_i32(tcg_lo, tcg_lo, tcg_hi, 16, 16);
9218     tcg_gen_extu_i32_i64(d, tcg_lo);
9219 }
9220 
9221 static void gen_fcvtn_sd(TCGv_i64 d, TCGv_i64 n)
9222 {
9223     TCGv_i32 tmp = tcg_temp_new_i32();
9224     TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
9225 
9226     gen_helper_vfp_fcvtsd(tmp, n, fpst);
9227     tcg_gen_extu_i32_i64(d, tmp);
9228 }
9229 
9230 static ArithOneOp * const f_vector_fcvtn[] = {
9231     NULL,
9232     gen_fcvtn_hs,
9233     gen_fcvtn_sd,
9234 };
9235 TRANS(FCVTN_v, do_2misc_narrow_vector, a, f_vector_fcvtn)
9236 TRANS(FCVTXN_v, do_2misc_narrow_vector, a, f_scalar_fcvtxn)
9237 
9238 static void gen_bfcvtn_hs(TCGv_i64 d, TCGv_i64 n)
9239 {
9240     TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
9241     TCGv_i32 tmp = tcg_temp_new_i32();
9242     gen_helper_bfcvt_pair(tmp, n, fpst);
9243     tcg_gen_extu_i32_i64(d, tmp);
9244 }
9245 
9246 static ArithOneOp * const f_vector_bfcvtn[] = {
9247     NULL,
9248     gen_bfcvtn_hs,
9249     NULL,
9250 };
9251 TRANS_FEAT(BFCVTN_v, aa64_bf16, do_2misc_narrow_vector, a, f_vector_bfcvtn)
9252 
9253 static bool trans_SHLL_v(DisasContext *s, arg_qrr_e *a)
9254 {
9255     static NeonGenWidenFn * const widenfns[3] = {
9256         gen_helper_neon_widen_u8,
9257         gen_helper_neon_widen_u16,
9258         tcg_gen_extu_i32_i64,
9259     };
9260     NeonGenWidenFn *widenfn;
9261     TCGv_i64 tcg_res[2];
9262     TCGv_i32 tcg_op;
9263     int part, pass;
9264 
9265     if (a->esz == MO_64) {
9266         return false;
9267     }
9268     if (!fp_access_check(s)) {
9269         return true;
9270     }
9271 
9272     tcg_op = tcg_temp_new_i32();
9273     widenfn = widenfns[a->esz];
9274     part = a->q ? 2 : 0;
9275 
9276     for (pass = 0; pass < 2; pass++) {
9277         read_vec_element_i32(s, tcg_op, a->rn, part + pass, MO_32);
9278         tcg_res[pass] = tcg_temp_new_i64();
9279         widenfn(tcg_res[pass], tcg_op);
9280         tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << a->esz);
9281     }
9282 
9283     for (pass = 0; pass < 2; pass++) {
9284         write_vec_element(s, tcg_res[pass], a->rd, pass, MO_64);
9285     }
9286     return true;
9287 }
9288 
9289 static bool do_fabs_fneg_v(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn)
9290 {
9291     int check = fp_access_check_vector_hsd(s, a->q, a->esz);
9292 
9293     if (check <= 0) {
9294         return check == 0;
9295     }
9296 
9297     gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz);
9298     return true;
9299 }
9300 
9301 TRANS(FABS_v, do_fabs_fneg_v, a, gen_gvec_fabs)
9302 TRANS(FNEG_v, do_fabs_fneg_v, a, gen_gvec_fneg)
9303 
9304 static bool do_fp1_vector(DisasContext *s, arg_qrr_e *a,
9305                           const FPScalar1 *f, int rmode)
9306 {
9307     TCGv_i32 tcg_rmode = NULL;
9308     TCGv_ptr fpst;
9309     int check = fp_access_check_vector_hsd(s, a->q, a->esz);
9310 
9311     if (check <= 0) {
9312         return check == 0;
9313     }
9314 
9315     fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
9316     if (rmode >= 0) {
9317         tcg_rmode = gen_set_rmode(rmode, fpst);
9318     }
9319 
9320     if (a->esz == MO_64) {
9321         TCGv_i64 t64 = tcg_temp_new_i64();
9322 
9323         for (int pass = 0; pass < 2; ++pass) {
9324             read_vec_element(s, t64, a->rn, pass, MO_64);
9325             f->gen_d(t64, t64, fpst);
9326             write_vec_element(s, t64, a->rd, pass, MO_64);
9327         }
9328     } else {
9329         TCGv_i32 t32 = tcg_temp_new_i32();
9330         void (*gen)(TCGv_i32, TCGv_i32, TCGv_ptr)
9331             = (a->esz == MO_16 ? f->gen_h : f->gen_s);
9332 
9333         for (int pass = 0, n = (a->q ? 16 : 8) >> a->esz; pass < n; ++pass) {
9334             read_vec_element_i32(s, t32, a->rn, pass, a->esz);
9335             gen(t32, t32, fpst);
9336             write_vec_element_i32(s, t32, a->rd, pass, a->esz);
9337         }
9338     }
9339     clear_vec_high(s, a->q, a->rd);
9340 
9341     if (rmode >= 0) {
9342         gen_restore_rmode(tcg_rmode, fpst);
9343     }
9344     return true;
9345 }
9346 
9347 TRANS(FSQRT_v, do_fp1_vector, a, &f_scalar_fsqrt, -1)
9348 
9349 TRANS(FRINTN_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_TIEEVEN)
9350 TRANS(FRINTP_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_POSINF)
9351 TRANS(FRINTM_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_NEGINF)
9352 TRANS(FRINTZ_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_ZERO)
9353 TRANS(FRINTA_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_TIEAWAY)
9354 TRANS(FRINTI_v, do_fp1_vector, a, &f_scalar_frint, -1)
9355 TRANS(FRINTX_v, do_fp1_vector, a, &f_scalar_frintx, -1)
9356 
9357 TRANS_FEAT(FRINT32Z_v, aa64_frint, do_fp1_vector, a,
9358            &f_scalar_frint32, FPROUNDING_ZERO)
9359 TRANS_FEAT(FRINT32X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint32, -1)
9360 TRANS_FEAT(FRINT64Z_v, aa64_frint, do_fp1_vector, a,
9361            &f_scalar_frint64, FPROUNDING_ZERO)
9362 TRANS_FEAT(FRINT64X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint64, -1)
9363 
9364 static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q,
9365                              int rd, int rn, int data,
9366                              gen_helper_gvec_2_ptr * const fns[3])
9367 {
9368     int check = fp_access_check_vector_hsd(s, is_q, esz);
9369     TCGv_ptr fpst;
9370 
9371     if (check <= 0) {
9372         return check == 0;
9373     }
9374 
9375     fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
9376     tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
9377                        vec_full_reg_offset(s, rn), fpst,
9378                        is_q ? 16 : 8, vec_full_reg_size(s),
9379                        data, fns[esz - 1]);
9380     return true;
9381 }
9382 
9383 static gen_helper_gvec_2_ptr * const f_scvtf_v[] = {
9384     gen_helper_gvec_vcvt_sh,
9385     gen_helper_gvec_vcvt_sf,
9386     gen_helper_gvec_vcvt_sd,
9387 };
9388 TRANS(SCVTF_vi, do_gvec_op2_fpst,
9389       a->esz, a->q, a->rd, a->rn, 0, f_scvtf_v)
9390 TRANS(SCVTF_vf, do_gvec_op2_fpst,
9391       a->esz, a->q, a->rd, a->rn, a->shift, f_scvtf_v)
9392 
9393 static gen_helper_gvec_2_ptr * const f_ucvtf_v[] = {
9394     gen_helper_gvec_vcvt_uh,
9395     gen_helper_gvec_vcvt_uf,
9396     gen_helper_gvec_vcvt_ud,
9397 };
9398 TRANS(UCVTF_vi, do_gvec_op2_fpst,
9399       a->esz, a->q, a->rd, a->rn, 0, f_ucvtf_v)
9400 TRANS(UCVTF_vf, do_gvec_op2_fpst,
9401       a->esz, a->q, a->rd, a->rn, a->shift, f_ucvtf_v)
9402 
9403 static gen_helper_gvec_2_ptr * const f_fcvtzs_vf[] = {
9404     gen_helper_gvec_vcvt_rz_hs,
9405     gen_helper_gvec_vcvt_rz_fs,
9406     gen_helper_gvec_vcvt_rz_ds,
9407 };
9408 TRANS(FCVTZS_vf, do_gvec_op2_fpst,
9409       a->esz, a->q, a->rd, a->rn, a->shift, f_fcvtzs_vf)
9410 
9411 static gen_helper_gvec_2_ptr * const f_fcvtzu_vf[] = {
9412     gen_helper_gvec_vcvt_rz_hu,
9413     gen_helper_gvec_vcvt_rz_fu,
9414     gen_helper_gvec_vcvt_rz_du,
9415 };
9416 TRANS(FCVTZU_vf, do_gvec_op2_fpst,
9417       a->esz, a->q, a->rd, a->rn, a->shift, f_fcvtzu_vf)
9418 
9419 static gen_helper_gvec_2_ptr * const f_fcvt_s_vi[] = {
9420     gen_helper_gvec_vcvt_rm_sh,
9421     gen_helper_gvec_vcvt_rm_ss,
9422     gen_helper_gvec_vcvt_rm_sd,
9423 };
9424 
9425 static gen_helper_gvec_2_ptr * const f_fcvt_u_vi[] = {
9426     gen_helper_gvec_vcvt_rm_uh,
9427     gen_helper_gvec_vcvt_rm_us,
9428     gen_helper_gvec_vcvt_rm_ud,
9429 };
9430 
9431 TRANS(FCVTNS_vi, do_gvec_op2_fpst,
9432       a->esz, a->q, a->rd, a->rn, float_round_nearest_even, f_fcvt_s_vi)
9433 TRANS(FCVTNU_vi, do_gvec_op2_fpst,
9434       a->esz, a->q, a->rd, a->rn, float_round_nearest_even, f_fcvt_u_vi)
9435 TRANS(FCVTPS_vi, do_gvec_op2_fpst,
9436       a->esz, a->q, a->rd, a->rn, float_round_up, f_fcvt_s_vi)
9437 TRANS(FCVTPU_vi, do_gvec_op2_fpst,
9438       a->esz, a->q, a->rd, a->rn, float_round_up, f_fcvt_u_vi)
9439 TRANS(FCVTMS_vi, do_gvec_op2_fpst,
9440       a->esz, a->q, a->rd, a->rn, float_round_down, f_fcvt_s_vi)
9441 TRANS(FCVTMU_vi, do_gvec_op2_fpst,
9442       a->esz, a->q, a->rd, a->rn, float_round_down, f_fcvt_u_vi)
9443 TRANS(FCVTZS_vi, do_gvec_op2_fpst,
9444       a->esz, a->q, a->rd, a->rn, float_round_to_zero, f_fcvt_s_vi)
9445 TRANS(FCVTZU_vi, do_gvec_op2_fpst,
9446       a->esz, a->q, a->rd, a->rn, float_round_to_zero, f_fcvt_u_vi)
9447 TRANS(FCVTAS_vi, do_gvec_op2_fpst,
9448       a->esz, a->q, a->rd, a->rn, float_round_ties_away, f_fcvt_s_vi)
9449 TRANS(FCVTAU_vi, do_gvec_op2_fpst,
9450       a->esz, a->q, a->rd, a->rn, float_round_ties_away, f_fcvt_u_vi)
9451 
9452 static gen_helper_gvec_2_ptr * const f_fceq0[] = {
9453     gen_helper_gvec_fceq0_h,
9454     gen_helper_gvec_fceq0_s,
9455     gen_helper_gvec_fceq0_d,
9456 };
9457 TRANS(FCMEQ0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fceq0)
9458 
9459 static gen_helper_gvec_2_ptr * const f_fcgt0[] = {
9460     gen_helper_gvec_fcgt0_h,
9461     gen_helper_gvec_fcgt0_s,
9462     gen_helper_gvec_fcgt0_d,
9463 };
9464 TRANS(FCMGT0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcgt0)
9465 
9466 static gen_helper_gvec_2_ptr * const f_fcge0[] = {
9467     gen_helper_gvec_fcge0_h,
9468     gen_helper_gvec_fcge0_s,
9469     gen_helper_gvec_fcge0_d,
9470 };
9471 TRANS(FCMGE0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcge0)
9472 
9473 static gen_helper_gvec_2_ptr * const f_fclt0[] = {
9474     gen_helper_gvec_fclt0_h,
9475     gen_helper_gvec_fclt0_s,
9476     gen_helper_gvec_fclt0_d,
9477 };
9478 TRANS(FCMLT0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fclt0)
9479 
9480 static gen_helper_gvec_2_ptr * const f_fcle0[] = {
9481     gen_helper_gvec_fcle0_h,
9482     gen_helper_gvec_fcle0_s,
9483     gen_helper_gvec_fcle0_d,
9484 };
9485 TRANS(FCMLE0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcle0)
9486 
9487 static gen_helper_gvec_2_ptr * const f_frecpe[] = {
9488     gen_helper_gvec_frecpe_h,
9489     gen_helper_gvec_frecpe_s,
9490     gen_helper_gvec_frecpe_d,
9491 };
9492 TRANS(FRECPE_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frecpe)
9493 
9494 static gen_helper_gvec_2_ptr * const f_frsqrte[] = {
9495     gen_helper_gvec_frsqrte_h,
9496     gen_helper_gvec_frsqrte_s,
9497     gen_helper_gvec_frsqrte_d,
9498 };
9499 TRANS(FRSQRTE_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frsqrte)
9500 
9501 static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a)
9502 {
9503     /* Handle 2-reg-misc ops which are widening (so each size element
9504      * in the source becomes a 2*size element in the destination.
9505      * The only instruction like this is FCVTL.
9506      */
9507     int pass;
9508     TCGv_ptr fpst;
9509 
9510     if (!fp_access_check(s)) {
9511         return true;
9512     }
9513 
9514     if (a->esz == MO_64) {
9515         /* 32 -> 64 bit fp conversion */
9516         TCGv_i64 tcg_res[2];
9517         TCGv_i32 tcg_op = tcg_temp_new_i32();
9518         int srcelt = a->q ? 2 : 0;
9519 
9520         fpst = fpstatus_ptr(FPST_A64);
9521 
9522         for (pass = 0; pass < 2; pass++) {
9523             tcg_res[pass] = tcg_temp_new_i64();
9524             read_vec_element_i32(s, tcg_op, a->rn, srcelt + pass, MO_32);
9525             gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, fpst);
9526         }
9527         for (pass = 0; pass < 2; pass++) {
9528             write_vec_element(s, tcg_res[pass], a->rd, pass, MO_64);
9529         }
9530     } else {
9531         /* 16 -> 32 bit fp conversion */
9532         int srcelt = a->q ? 4 : 0;
9533         TCGv_i32 tcg_res[4];
9534         TCGv_i32 ahp = get_ahp_flag();
9535 
9536         fpst = fpstatus_ptr(FPST_A64_F16);
9537 
9538         for (pass = 0; pass < 4; pass++) {
9539             tcg_res[pass] = tcg_temp_new_i32();
9540             read_vec_element_i32(s, tcg_res[pass], a->rn, srcelt + pass, MO_16);
9541             gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
9542                                            fpst, ahp);
9543         }
9544         for (pass = 0; pass < 4; pass++) {
9545             write_vec_element_i32(s, tcg_res[pass], a->rd, pass, MO_32);
9546         }
9547     }
9548     clear_vec_high(s, true, a->rd);
9549     return true;
9550 }
9551 
9552 static bool trans_OK(DisasContext *s, arg_OK *a)
9553 {
9554     return true;
9555 }
9556 
9557 static bool trans_FAIL(DisasContext *s, arg_OK *a)
9558 {
9559     s->is_nonstreaming = true;
9560     return true;
9561 }
9562 
9563 /**
9564  * btype_destination_ok:
9565  * @insn: The instruction at the branch destination
9566  * @bt: SCTLR_ELx.BT
9567  * @btype: PSTATE.BTYPE, and is non-zero
9568  *
9569  * On a guarded page, there are a limited number of insns
9570  * that may be present at the branch target:
9571  *   - branch target identifiers,
9572  *   - paciasp, pacibsp,
9573  *   - BRK insn
9574  *   - HLT insn
9575  * Anything else causes a Branch Target Exception.
9576  *
9577  * Return true if the branch is compatible, false to raise BTITRAP.
9578  */
9579 static bool btype_destination_ok(uint32_t insn, bool bt, int btype)
9580 {
9581     if ((insn & 0xfffff01fu) == 0xd503201fu) {
9582         /* HINT space */
9583         switch (extract32(insn, 5, 7)) {
9584         case 0b011001: /* PACIASP */
9585         case 0b011011: /* PACIBSP */
9586             /*
9587              * If SCTLR_ELx.BT, then PACI*SP are not compatible
9588              * with btype == 3.  Otherwise all btype are ok.
9589              */
9590             return !bt || btype != 3;
9591         case 0b100000: /* BTI */
9592             /* Not compatible with any btype.  */
9593             return false;
9594         case 0b100010: /* BTI c */
9595             /* Not compatible with btype == 3 */
9596             return btype != 3;
9597         case 0b100100: /* BTI j */
9598             /* Not compatible with btype == 2 */
9599             return btype != 2;
9600         case 0b100110: /* BTI jc */
9601             /* Compatible with any btype.  */
9602             return true;
9603         }
9604     } else {
9605         switch (insn & 0xffe0001fu) {
9606         case 0xd4200000u: /* BRK */
9607         case 0xd4400000u: /* HLT */
9608             /* Give priority to the breakpoint exception.  */
9609             return true;
9610         }
9611     }
9612     return false;
9613 }
9614 
9615 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
9616                                           CPUState *cpu)
9617 {
9618     DisasContext *dc = container_of(dcbase, DisasContext, base);
9619     CPUARMState *env = cpu_env(cpu);
9620     ARMCPU *arm_cpu = env_archcpu(env);
9621     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
9622     int bound, core_mmu_idx;
9623 
9624     dc->isar = &arm_cpu->isar;
9625     dc->condjmp = 0;
9626     dc->pc_save = dc->base.pc_first;
9627     dc->aarch64 = true;
9628     dc->thumb = false;
9629     dc->sctlr_b = 0;
9630     dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
9631     dc->condexec_mask = 0;
9632     dc->condexec_cond = 0;
9633     core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
9634     dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx);
9635     dc->tbii = EX_TBFLAG_A64(tb_flags, TBII);
9636     dc->tbid = EX_TBFLAG_A64(tb_flags, TBID);
9637     dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA);
9638     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
9639 #if !defined(CONFIG_USER_ONLY)
9640     dc->user = (dc->current_el == 0);
9641 #endif
9642     dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
9643     dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
9644     dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
9645     dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE);
9646     dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC);
9647     dc->trap_eret = EX_TBFLAG_A64(tb_flags, TRAP_ERET);
9648     dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL);
9649     dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL);
9650     dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16;
9651     dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16;
9652     dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE);
9653     dc->bt = EX_TBFLAG_A64(tb_flags, BT);
9654     dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE);
9655     dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV);
9656     dc->ata[0] = EX_TBFLAG_A64(tb_flags, ATA);
9657     dc->ata[1] = EX_TBFLAG_A64(tb_flags, ATA0);
9658     dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE);
9659     dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE);
9660     dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM);
9661     dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA);
9662     dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING);
9663     dc->naa = EX_TBFLAG_A64(tb_flags, NAA);
9664     dc->nv = EX_TBFLAG_A64(tb_flags, NV);
9665     dc->nv1 = EX_TBFLAG_A64(tb_flags, NV1);
9666     dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2);
9667     dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20);
9668     dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE);
9669     dc->vec_len = 0;
9670     dc->vec_stride = 0;
9671     dc->cp_regs = arm_cpu->cp_regs;
9672     dc->features = env->features;
9673     dc->dcz_blocksize = arm_cpu->dcz_blocksize;
9674     dc->gm_blocksize = arm_cpu->gm_blocksize;
9675 
9676 #ifdef CONFIG_USER_ONLY
9677     /* In sve_probe_page, we assume TBI is enabled. */
9678     tcg_debug_assert(dc->tbid & 1);
9679 #endif
9680 
9681     dc->lse2 = dc_isar_feature(aa64_lse2, dc);
9682 
9683     /* Single step state. The code-generation logic here is:
9684      *  SS_ACTIVE == 0:
9685      *   generate code with no special handling for single-stepping (except
9686      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
9687      *   this happens anyway because those changes are all system register or
9688      *   PSTATE writes).
9689      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
9690      *   emit code for one insn
9691      *   emit code to clear PSTATE.SS
9692      *   emit code to generate software step exception for completed step
9693      *   end TB (as usual for having generated an exception)
9694      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
9695      *   emit code to generate a software step exception
9696      *   end the TB
9697      */
9698     dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
9699     dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
9700     dc->is_ldex = false;
9701 
9702     /* Bound the number of insns to execute to those left on the page.  */
9703     bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
9704 
9705     /* If architectural single step active, limit to 1.  */
9706     if (dc->ss_active) {
9707         bound = 1;
9708     }
9709     dc->base.max_insns = MIN(dc->base.max_insns, bound);
9710 }
9711 
9712 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
9713 {
9714 }
9715 
9716 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
9717 {
9718     DisasContext *dc = container_of(dcbase, DisasContext, base);
9719     target_ulong pc_arg = dc->base.pc_next;
9720 
9721     if (tb_cflags(dcbase->tb) & CF_PCREL) {
9722         pc_arg &= ~TARGET_PAGE_MASK;
9723     }
9724     tcg_gen_insn_start(pc_arg, 0, 0);
9725     dc->insn_start_updated = false;
9726 }
9727 
9728 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9729 {
9730     DisasContext *s = container_of(dcbase, DisasContext, base);
9731     CPUARMState *env = cpu_env(cpu);
9732     uint64_t pc = s->base.pc_next;
9733     uint32_t insn;
9734 
9735     /* Singlestep exceptions have the highest priority. */
9736     if (s->ss_active && !s->pstate_ss) {
9737         /* Singlestep state is Active-pending.
9738          * If we're in this state at the start of a TB then either
9739          *  a) we just took an exception to an EL which is being debugged
9740          *     and this is the first insn in the exception handler
9741          *  b) debug exceptions were masked and we just unmasked them
9742          *     without changing EL (eg by clearing PSTATE.D)
9743          * In either case we're going to take a swstep exception in the
9744          * "did not step an insn" case, and so the syndrome ISV and EX
9745          * bits should be zero.
9746          */
9747         assert(s->base.num_insns == 1);
9748         gen_swstep_exception(s, 0, 0);
9749         s->base.is_jmp = DISAS_NORETURN;
9750         s->base.pc_next = pc + 4;
9751         return;
9752     }
9753 
9754     if (pc & 3) {
9755         /*
9756          * PC alignment fault.  This has priority over the instruction abort
9757          * that we would receive from a translation fault via arm_ldl_code.
9758          * This should only be possible after an indirect branch, at the
9759          * start of the TB.
9760          */
9761         assert(s->base.num_insns == 1);
9762         gen_helper_exception_pc_alignment(tcg_env, tcg_constant_tl(pc));
9763         s->base.is_jmp = DISAS_NORETURN;
9764         s->base.pc_next = QEMU_ALIGN_UP(pc, 4);
9765         return;
9766     }
9767 
9768     s->pc_curr = pc;
9769     insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b);
9770     s->insn = insn;
9771     s->base.pc_next = pc + 4;
9772 
9773     s->fp_access_checked = false;
9774     s->sve_access_checked = false;
9775 
9776     if (s->pstate_il) {
9777         /*
9778          * Illegal execution state. This has priority over BTI
9779          * exceptions, but comes after instruction abort exceptions.
9780          */
9781         gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
9782         return;
9783     }
9784 
9785     if (dc_isar_feature(aa64_bti, s)) {
9786         if (s->base.num_insns == 1) {
9787             /* First insn can have btype set to non-zero.  */
9788             tcg_debug_assert(s->btype >= 0);
9789 
9790             /*
9791              * Note that the Branch Target Exception has fairly high
9792              * priority -- below debugging exceptions but above most
9793              * everything else.  This allows us to handle this now
9794              * instead of waiting until the insn is otherwise decoded.
9795              *
9796              * We can check all but the guarded page check here;
9797              * defer the latter to a helper.
9798              */
9799             if (s->btype != 0
9800                 && !btype_destination_ok(insn, s->bt, s->btype)) {
9801                 gen_helper_guarded_page_check(tcg_env);
9802             }
9803         } else {
9804             /* Not the first insn: btype must be 0.  */
9805             tcg_debug_assert(s->btype == 0);
9806         }
9807     }
9808 
9809     s->is_nonstreaming = false;
9810     if (s->sme_trap_nonstreaming) {
9811         disas_sme_fa64(s, insn);
9812     }
9813 
9814     if (!disas_a64(s, insn) &&
9815         !disas_sme(s, insn) &&
9816         !disas_sve(s, insn)) {
9817         unallocated_encoding(s);
9818     }
9819 
9820     /*
9821      * After execution of most insns, btype is reset to 0.
9822      * Note that we set btype == -1 when the insn sets btype.
9823      */
9824     if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) {
9825         reset_btype(s);
9826     }
9827 }
9828 
9829 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
9830 {
9831     DisasContext *dc = container_of(dcbase, DisasContext, base);
9832 
9833     if (unlikely(dc->ss_active)) {
9834         /* Note that this means single stepping WFI doesn't halt the CPU.
9835          * For conditional branch insns this is harmless unreachable code as
9836          * gen_goto_tb() has already handled emitting the debug exception
9837          * (and thus a tb-jump is not possible when singlestepping).
9838          */
9839         switch (dc->base.is_jmp) {
9840         default:
9841             gen_a64_update_pc(dc, 4);
9842             /* fall through */
9843         case DISAS_EXIT:
9844         case DISAS_JUMP:
9845             gen_step_complete_exception(dc);
9846             break;
9847         case DISAS_NORETURN:
9848             break;
9849         }
9850     } else {
9851         switch (dc->base.is_jmp) {
9852         case DISAS_NEXT:
9853         case DISAS_TOO_MANY:
9854             gen_goto_tb(dc, 1, 4);
9855             break;
9856         default:
9857         case DISAS_UPDATE_EXIT:
9858             gen_a64_update_pc(dc, 4);
9859             /* fall through */
9860         case DISAS_EXIT:
9861             tcg_gen_exit_tb(NULL, 0);
9862             break;
9863         case DISAS_UPDATE_NOCHAIN:
9864             gen_a64_update_pc(dc, 4);
9865             /* fall through */
9866         case DISAS_JUMP:
9867             tcg_gen_lookup_and_goto_ptr();
9868             break;
9869         case DISAS_NORETURN:
9870         case DISAS_SWI:
9871             break;
9872         case DISAS_WFE:
9873             gen_a64_update_pc(dc, 4);
9874             gen_helper_wfe(tcg_env);
9875             break;
9876         case DISAS_YIELD:
9877             gen_a64_update_pc(dc, 4);
9878             gen_helper_yield(tcg_env);
9879             break;
9880         case DISAS_WFI:
9881             /*
9882              * This is a special case because we don't want to just halt
9883              * the CPU if trying to debug across a WFI.
9884              */
9885             gen_a64_update_pc(dc, 4);
9886             gen_helper_wfi(tcg_env, tcg_constant_i32(4));
9887             /*
9888              * The helper doesn't necessarily throw an exception, but we
9889              * must go back to the main loop to check for interrupts anyway.
9890              */
9891             tcg_gen_exit_tb(NULL, 0);
9892             break;
9893         }
9894     }
9895 }
9896 
9897 const TranslatorOps aarch64_translator_ops = {
9898     .init_disas_context = aarch64_tr_init_disas_context,
9899     .tb_start           = aarch64_tr_tb_start,
9900     .insn_start         = aarch64_tr_insn_start,
9901     .translate_insn     = aarch64_tr_translate_insn,
9902     .tb_stop            = aarch64_tr_tb_stop,
9903 };
9904