xref: /qemu/target/arm/tcg/translate-a64.c (revision e923f5e1b853ac80e96b416143300af9d189af8e)
1 /*
2  *  AArch64 translation
3  *
4  *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 
21 #include "exec/exec-all.h"
22 #include "translate.h"
23 #include "translate-a64.h"
24 #include "qemu/log.h"
25 #include "arm_ldst.h"
26 #include "semihosting/semihost.h"
27 #include "cpregs.h"
28 
29 static TCGv_i64 cpu_X[32];
30 static TCGv_i64 cpu_pc;
31 
32 /* Load/store exclusive handling */
33 static TCGv_i64 cpu_exclusive_high;
34 
35 static const char *regnames[] = {
36     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
37     "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
38     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
39     "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
40 };
41 
42 enum a64_shift_type {
43     A64_SHIFT_TYPE_LSL = 0,
44     A64_SHIFT_TYPE_LSR = 1,
45     A64_SHIFT_TYPE_ASR = 2,
46     A64_SHIFT_TYPE_ROR = 3
47 };
48 
49 /*
50  * Helpers for extracting complex instruction fields
51  */
52 
53 /*
54  * For load/store with an unsigned 12 bit immediate scaled by the element
55  * size. The input has the immediate field in bits [14:3] and the element
56  * size in [2:0].
57  */
58 static int uimm_scaled(DisasContext *s, int x)
59 {
60     unsigned imm = x >> 3;
61     unsigned scale = extract32(x, 0, 3);
62     return imm << scale;
63 }
64 
65 /* For load/store memory tags: scale offset by LOG2_TAG_GRANULE */
66 static int scale_by_log2_tag_granule(DisasContext *s, int x)
67 {
68     return x << LOG2_TAG_GRANULE;
69 }
70 
71 /*
72  * Include the generated decoders.
73  */
74 
75 #include "decode-sme-fa64.c.inc"
76 #include "decode-a64.c.inc"
77 
78 /* Table based decoder typedefs - used when the relevant bits for decode
79  * are too awkwardly scattered across the instruction (eg SIMD).
80  */
81 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
82 
83 typedef struct AArch64DecodeTable {
84     uint32_t pattern;
85     uint32_t mask;
86     AArch64DecodeFn *disas_fn;
87 } AArch64DecodeTable;
88 
89 /* initialize TCG globals.  */
90 void a64_translate_init(void)
91 {
92     int i;
93 
94     cpu_pc = tcg_global_mem_new_i64(tcg_env,
95                                     offsetof(CPUARMState, pc),
96                                     "pc");
97     for (i = 0; i < 32; i++) {
98         cpu_X[i] = tcg_global_mem_new_i64(tcg_env,
99                                           offsetof(CPUARMState, xregs[i]),
100                                           regnames[i]);
101     }
102 
103     cpu_exclusive_high = tcg_global_mem_new_i64(tcg_env,
104         offsetof(CPUARMState, exclusive_high), "exclusive_high");
105 }
106 
107 /*
108  * Return the core mmu_idx to use for A64 load/store insns which
109  * have a "unprivileged load/store" variant. Those insns access
110  * EL0 if executed from an EL which has control over EL0 (usually
111  * EL1) but behave like normal loads and stores if executed from
112  * elsewhere (eg EL3).
113  *
114  * @unpriv : true for the unprivileged encoding; false for the
115  *           normal encoding (in which case we will return the same
116  *           thing as get_mem_index().
117  */
118 static int get_a64_user_mem_index(DisasContext *s, bool unpriv)
119 {
120     /*
121      * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL,
122      * which is the usual mmu_idx for this cpu state.
123      */
124     ARMMMUIdx useridx = s->mmu_idx;
125 
126     if (unpriv && s->unpriv) {
127         /*
128          * We have pre-computed the condition for AccType_UNPRIV.
129          * Therefore we should never get here with a mmu_idx for
130          * which we do not know the corresponding user mmu_idx.
131          */
132         switch (useridx) {
133         case ARMMMUIdx_E10_1:
134         case ARMMMUIdx_E10_1_PAN:
135             useridx = ARMMMUIdx_E10_0;
136             break;
137         case ARMMMUIdx_E20_2:
138         case ARMMMUIdx_E20_2_PAN:
139             useridx = ARMMMUIdx_E20_0;
140             break;
141         default:
142             g_assert_not_reached();
143         }
144     }
145     return arm_to_core_mmu_idx(useridx);
146 }
147 
148 static void set_btype_raw(int val)
149 {
150     tcg_gen_st_i32(tcg_constant_i32(val), tcg_env,
151                    offsetof(CPUARMState, btype));
152 }
153 
154 static void set_btype(DisasContext *s, int val)
155 {
156     /* BTYPE is a 2-bit field, and 0 should be done with reset_btype.  */
157     tcg_debug_assert(val >= 1 && val <= 3);
158     set_btype_raw(val);
159     s->btype = -1;
160 }
161 
162 static void reset_btype(DisasContext *s)
163 {
164     if (s->btype != 0) {
165         set_btype_raw(0);
166         s->btype = 0;
167     }
168 }
169 
170 static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff)
171 {
172     assert(s->pc_save != -1);
173     if (tb_cflags(s->base.tb) & CF_PCREL) {
174         tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff);
175     } else {
176         tcg_gen_movi_i64(dest, s->pc_curr + diff);
177     }
178 }
179 
180 void gen_a64_update_pc(DisasContext *s, target_long diff)
181 {
182     gen_pc_plus_diff(s, cpu_pc, diff);
183     s->pc_save = s->pc_curr + diff;
184 }
185 
186 /*
187  * Handle Top Byte Ignore (TBI) bits.
188  *
189  * If address tagging is enabled via the TCR TBI bits:
190  *  + for EL2 and EL3 there is only one TBI bit, and if it is set
191  *    then the address is zero-extended, clearing bits [63:56]
192  *  + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
193  *    and TBI1 controls addresses with bit 55 == 1.
194  *    If the appropriate TBI bit is set for the address then
195  *    the address is sign-extended from bit 55 into bits [63:56]
196  *
197  * Here We have concatenated TBI{1,0} into tbi.
198  */
199 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst,
200                                 TCGv_i64 src, int tbi)
201 {
202     if (tbi == 0) {
203         /* Load unmodified address */
204         tcg_gen_mov_i64(dst, src);
205     } else if (!regime_has_2_ranges(s->mmu_idx)) {
206         /* Force tag byte to all zero */
207         tcg_gen_extract_i64(dst, src, 0, 56);
208     } else {
209         /* Sign-extend from bit 55.  */
210         tcg_gen_sextract_i64(dst, src, 0, 56);
211 
212         switch (tbi) {
213         case 1:
214             /* tbi0 but !tbi1: only use the extension if positive */
215             tcg_gen_and_i64(dst, dst, src);
216             break;
217         case 2:
218             /* !tbi0 but tbi1: only use the extension if negative */
219             tcg_gen_or_i64(dst, dst, src);
220             break;
221         case 3:
222             /* tbi0 and tbi1: always use the extension */
223             break;
224         default:
225             g_assert_not_reached();
226         }
227     }
228 }
229 
230 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
231 {
232     /*
233      * If address tagging is enabled for instructions via the TCR TBI bits,
234      * then loading an address into the PC will clear out any tag.
235      */
236     gen_top_byte_ignore(s, cpu_pc, src, s->tbii);
237     s->pc_save = -1;
238 }
239 
240 /*
241  * Handle MTE and/or TBI.
242  *
243  * For TBI, ideally, we would do nothing.  Proper behaviour on fault is
244  * for the tag to be present in the FAR_ELx register.  But for user-only
245  * mode we do not have a TLB with which to implement this, so we must
246  * remove the top byte now.
247  *
248  * Always return a fresh temporary that we can increment independently
249  * of the write-back address.
250  */
251 
252 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
253 {
254     TCGv_i64 clean = tcg_temp_new_i64();
255 #ifdef CONFIG_USER_ONLY
256     gen_top_byte_ignore(s, clean, addr, s->tbid);
257 #else
258     tcg_gen_mov_i64(clean, addr);
259 #endif
260     return clean;
261 }
262 
263 /* Insert a zero tag into src, with the result at dst. */
264 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src)
265 {
266     tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4));
267 }
268 
269 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr,
270                              MMUAccessType acc, int log2_size)
271 {
272     gen_helper_probe_access(tcg_env, ptr,
273                             tcg_constant_i32(acc),
274                             tcg_constant_i32(get_mem_index(s)),
275                             tcg_constant_i32(1 << log2_size));
276 }
277 
278 /*
279  * For MTE, check a single logical or atomic access.  This probes a single
280  * address, the exact one specified.  The size and alignment of the access
281  * is not relevant to MTE, per se, but watchpoints do require the size,
282  * and we want to recognize those before making any other changes to state.
283  */
284 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr,
285                                       bool is_write, bool tag_checked,
286                                       MemOp memop, bool is_unpriv,
287                                       int core_idx)
288 {
289     if (tag_checked && s->mte_active[is_unpriv]) {
290         TCGv_i64 ret;
291         int desc = 0;
292 
293         desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx);
294         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
295         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
296         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
297         desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(memop));
298         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, memop_size(memop) - 1);
299 
300         ret = tcg_temp_new_i64();
301         gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr);
302 
303         return ret;
304     }
305     return clean_data_tbi(s, addr);
306 }
307 
308 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
309                         bool tag_checked, MemOp memop)
310 {
311     return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, memop,
312                                  false, get_mem_index(s));
313 }
314 
315 /*
316  * For MTE, check multiple logical sequential accesses.
317  */
318 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
319                         bool tag_checked, int total_size, MemOp single_mop)
320 {
321     if (tag_checked && s->mte_active[0]) {
322         TCGv_i64 ret;
323         int desc = 0;
324 
325         desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
326         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
327         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
328         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
329         desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(single_mop));
330         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, total_size - 1);
331 
332         ret = tcg_temp_new_i64();
333         gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr);
334 
335         return ret;
336     }
337     return clean_data_tbi(s, addr);
338 }
339 
340 /*
341  * Generate the special alignment check that applies to AccType_ATOMIC
342  * and AccType_ORDERED insns under FEAT_LSE2: the access need not be
343  * naturally aligned, but it must not cross a 16-byte boundary.
344  * See AArch64.CheckAlignment().
345  */
346 static void check_lse2_align(DisasContext *s, int rn, int imm,
347                              bool is_write, MemOp mop)
348 {
349     TCGv_i32 tmp;
350     TCGv_i64 addr;
351     TCGLabel *over_label;
352     MMUAccessType type;
353     int mmu_idx;
354 
355     tmp = tcg_temp_new_i32();
356     tcg_gen_extrl_i64_i32(tmp, cpu_reg_sp(s, rn));
357     tcg_gen_addi_i32(tmp, tmp, imm & 15);
358     tcg_gen_andi_i32(tmp, tmp, 15);
359     tcg_gen_addi_i32(tmp, tmp, memop_size(mop));
360 
361     over_label = gen_new_label();
362     tcg_gen_brcondi_i32(TCG_COND_LEU, tmp, 16, over_label);
363 
364     addr = tcg_temp_new_i64();
365     tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm);
366 
367     type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD,
368     mmu_idx = get_mem_index(s);
369     gen_helper_unaligned_access(tcg_env, addr, tcg_constant_i32(type),
370                                 tcg_constant_i32(mmu_idx));
371 
372     gen_set_label(over_label);
373 
374 }
375 
376 /* Handle the alignment check for AccType_ATOMIC instructions. */
377 static MemOp check_atomic_align(DisasContext *s, int rn, MemOp mop)
378 {
379     MemOp size = mop & MO_SIZE;
380 
381     if (size == MO_8) {
382         return mop;
383     }
384 
385     /*
386      * If size == MO_128, this is a LDXP, and the operation is single-copy
387      * atomic for each doubleword, not the entire quadword; it still must
388      * be quadword aligned.
389      */
390     if (size == MO_128) {
391         return finalize_memop_atom(s, MO_128 | MO_ALIGN,
392                                    MO_ATOM_IFALIGN_PAIR);
393     }
394     if (dc_isar_feature(aa64_lse2, s)) {
395         check_lse2_align(s, rn, 0, true, mop);
396     } else {
397         mop |= MO_ALIGN;
398     }
399     return finalize_memop(s, mop);
400 }
401 
402 /* Handle the alignment check for AccType_ORDERED instructions. */
403 static MemOp check_ordered_align(DisasContext *s, int rn, int imm,
404                                  bool is_write, MemOp mop)
405 {
406     MemOp size = mop & MO_SIZE;
407 
408     if (size == MO_8) {
409         return mop;
410     }
411     if (size == MO_128) {
412         return finalize_memop_atom(s, MO_128 | MO_ALIGN,
413                                    MO_ATOM_IFALIGN_PAIR);
414     }
415     if (!dc_isar_feature(aa64_lse2, s)) {
416         mop |= MO_ALIGN;
417     } else if (!s->naa) {
418         check_lse2_align(s, rn, imm, is_write, mop);
419     }
420     return finalize_memop(s, mop);
421 }
422 
423 typedef struct DisasCompare64 {
424     TCGCond cond;
425     TCGv_i64 value;
426 } DisasCompare64;
427 
428 static void a64_test_cc(DisasCompare64 *c64, int cc)
429 {
430     DisasCompare c32;
431 
432     arm_test_cc(&c32, cc);
433 
434     /*
435      * Sign-extend the 32-bit value so that the GE/LT comparisons work
436      * properly.  The NE/EQ comparisons are also fine with this choice.
437       */
438     c64->cond = c32.cond;
439     c64->value = tcg_temp_new_i64();
440     tcg_gen_ext_i32_i64(c64->value, c32.value);
441 }
442 
443 static void gen_rebuild_hflags(DisasContext *s)
444 {
445     gen_helper_rebuild_hflags_a64(tcg_env, tcg_constant_i32(s->current_el));
446 }
447 
448 static void gen_exception_internal(int excp)
449 {
450     assert(excp_is_internal(excp));
451     gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp));
452 }
453 
454 static void gen_exception_internal_insn(DisasContext *s, int excp)
455 {
456     gen_a64_update_pc(s, 0);
457     gen_exception_internal(excp);
458     s->base.is_jmp = DISAS_NORETURN;
459 }
460 
461 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome)
462 {
463     gen_a64_update_pc(s, 0);
464     gen_helper_exception_bkpt_insn(tcg_env, tcg_constant_i32(syndrome));
465     s->base.is_jmp = DISAS_NORETURN;
466 }
467 
468 static void gen_step_complete_exception(DisasContext *s)
469 {
470     /* We just completed step of an insn. Move from Active-not-pending
471      * to Active-pending, and then also take the swstep exception.
472      * This corresponds to making the (IMPDEF) choice to prioritize
473      * swstep exceptions over asynchronous exceptions taken to an exception
474      * level where debug is disabled. This choice has the advantage that
475      * we do not need to maintain internal state corresponding to the
476      * ISV/EX syndrome bits between completion of the step and generation
477      * of the exception, and our syndrome information is always correct.
478      */
479     gen_ss_advance(s);
480     gen_swstep_exception(s, 1, s->is_ldex);
481     s->base.is_jmp = DISAS_NORETURN;
482 }
483 
484 static inline bool use_goto_tb(DisasContext *s, uint64_t dest)
485 {
486     if (s->ss_active) {
487         return false;
488     }
489     return translator_use_goto_tb(&s->base, dest);
490 }
491 
492 static void gen_goto_tb(DisasContext *s, int n, int64_t diff)
493 {
494     if (use_goto_tb(s, s->pc_curr + diff)) {
495         /*
496          * For pcrel, the pc must always be up-to-date on entry to
497          * the linked TB, so that it can use simple additions for all
498          * further adjustments.  For !pcrel, the linked TB is compiled
499          * to know its full virtual address, so we can delay the
500          * update to pc to the unlinked path.  A long chain of links
501          * can thus avoid many updates to the PC.
502          */
503         if (tb_cflags(s->base.tb) & CF_PCREL) {
504             gen_a64_update_pc(s, diff);
505             tcg_gen_goto_tb(n);
506         } else {
507             tcg_gen_goto_tb(n);
508             gen_a64_update_pc(s, diff);
509         }
510         tcg_gen_exit_tb(s->base.tb, n);
511         s->base.is_jmp = DISAS_NORETURN;
512     } else {
513         gen_a64_update_pc(s, diff);
514         if (s->ss_active) {
515             gen_step_complete_exception(s);
516         } else {
517             tcg_gen_lookup_and_goto_ptr();
518             s->base.is_jmp = DISAS_NORETURN;
519         }
520     }
521 }
522 
523 /*
524  * Register access functions
525  *
526  * These functions are used for directly accessing a register in where
527  * changes to the final register value are likely to be made. If you
528  * need to use a register for temporary calculation (e.g. index type
529  * operations) use the read_* form.
530  *
531  * B1.2.1 Register mappings
532  *
533  * In instruction register encoding 31 can refer to ZR (zero register) or
534  * the SP (stack pointer) depending on context. In QEMU's case we map SP
535  * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
536  * This is the point of the _sp forms.
537  */
538 TCGv_i64 cpu_reg(DisasContext *s, int reg)
539 {
540     if (reg == 31) {
541         TCGv_i64 t = tcg_temp_new_i64();
542         tcg_gen_movi_i64(t, 0);
543         return t;
544     } else {
545         return cpu_X[reg];
546     }
547 }
548 
549 /* register access for when 31 == SP */
550 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
551 {
552     return cpu_X[reg];
553 }
554 
555 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
556  * representing the register contents. This TCGv is an auto-freed
557  * temporary so it need not be explicitly freed, and may be modified.
558  */
559 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
560 {
561     TCGv_i64 v = tcg_temp_new_i64();
562     if (reg != 31) {
563         if (sf) {
564             tcg_gen_mov_i64(v, cpu_X[reg]);
565         } else {
566             tcg_gen_ext32u_i64(v, cpu_X[reg]);
567         }
568     } else {
569         tcg_gen_movi_i64(v, 0);
570     }
571     return v;
572 }
573 
574 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
575 {
576     TCGv_i64 v = tcg_temp_new_i64();
577     if (sf) {
578         tcg_gen_mov_i64(v, cpu_X[reg]);
579     } else {
580         tcg_gen_ext32u_i64(v, cpu_X[reg]);
581     }
582     return v;
583 }
584 
585 /* Return the offset into CPUARMState of a slice (from
586  * the least significant end) of FP register Qn (ie
587  * Dn, Sn, Hn or Bn).
588  * (Note that this is not the same mapping as for A32; see cpu.h)
589  */
590 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size)
591 {
592     return vec_reg_offset(s, regno, 0, size);
593 }
594 
595 /* Offset of the high half of the 128 bit vector Qn */
596 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
597 {
598     return vec_reg_offset(s, regno, 1, MO_64);
599 }
600 
601 /* Convenience accessors for reading and writing single and double
602  * FP registers. Writing clears the upper parts of the associated
603  * 128 bit vector register, as required by the architecture.
604  * Note that unlike the GP register accessors, the values returned
605  * by the read functions must be manually freed.
606  */
607 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
608 {
609     TCGv_i64 v = tcg_temp_new_i64();
610 
611     tcg_gen_ld_i64(v, tcg_env, fp_reg_offset(s, reg, MO_64));
612     return v;
613 }
614 
615 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
616 {
617     TCGv_i32 v = tcg_temp_new_i32();
618 
619     tcg_gen_ld_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32));
620     return v;
621 }
622 
623 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
624 {
625     TCGv_i32 v = tcg_temp_new_i32();
626 
627     tcg_gen_ld16u_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16));
628     return v;
629 }
630 
631 static void clear_vec(DisasContext *s, int rd)
632 {
633     unsigned ofs = fp_reg_offset(s, rd, MO_64);
634     unsigned vsz = vec_full_reg_size(s);
635 
636     tcg_gen_gvec_dup_imm(MO_64, ofs, vsz, vsz, 0);
637 }
638 
639 /*
640  * Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
641  * If SVE is not enabled, then there are only 128 bits in the vector.
642  */
643 static void clear_vec_high(DisasContext *s, bool is_q, int rd)
644 {
645     unsigned ofs = fp_reg_offset(s, rd, MO_64);
646     unsigned vsz = vec_full_reg_size(s);
647 
648     /* Nop move, with side effect of clearing the tail. */
649     tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz);
650 }
651 
652 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
653 {
654     unsigned ofs = fp_reg_offset(s, reg, MO_64);
655 
656     tcg_gen_st_i64(v, tcg_env, ofs);
657     clear_vec_high(s, false, reg);
658 }
659 
660 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
661 {
662     TCGv_i64 tmp = tcg_temp_new_i64();
663 
664     tcg_gen_extu_i32_i64(tmp, v);
665     write_fp_dreg(s, reg, tmp);
666 }
667 
668 /* Expand a 2-operand AdvSIMD vector operation using an expander function.  */
669 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
670                          GVecGen2Fn *gvec_fn, int vece)
671 {
672     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
673             is_q ? 16 : 8, vec_full_reg_size(s));
674 }
675 
676 /* Expand a 2-operand + immediate AdvSIMD vector operation using
677  * an expander function.
678  */
679 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn,
680                           int64_t imm, GVecGen2iFn *gvec_fn, int vece)
681 {
682     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
683             imm, is_q ? 16 : 8, vec_full_reg_size(s));
684 }
685 
686 /* Expand a 3-operand AdvSIMD vector operation using an expander function.  */
687 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
688                          GVecGen3Fn *gvec_fn, int vece)
689 {
690     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
691             vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
692 }
693 
694 /* Expand a 4-operand AdvSIMD vector operation using an expander function.  */
695 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
696                          int rx, GVecGen4Fn *gvec_fn, int vece)
697 {
698     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
699             vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx),
700             is_q ? 16 : 8, vec_full_reg_size(s));
701 }
702 
703 /* Expand a 2-operand operation using an out-of-line helper.  */
704 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd,
705                              int rn, int data, gen_helper_gvec_2 *fn)
706 {
707     tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
708                        vec_full_reg_offset(s, rn),
709                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
710 }
711 
712 /* Expand a 3-operand operation using an out-of-line helper.  */
713 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
714                              int rn, int rm, int data, gen_helper_gvec_3 *fn)
715 {
716     tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
717                        vec_full_reg_offset(s, rn),
718                        vec_full_reg_offset(s, rm),
719                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
720 }
721 
722 /* Expand a 3-operand + fpstatus pointer + simd data value operation using
723  * an out-of-line helper.
724  */
725 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
726                               int rm, bool is_fp16, int data,
727                               gen_helper_gvec_3_ptr *fn)
728 {
729     TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
730     tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
731                        vec_full_reg_offset(s, rn),
732                        vec_full_reg_offset(s, rm), fpst,
733                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
734 }
735 
736 /* Expand a 4-operand operation using an out-of-line helper.  */
737 static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn,
738                              int rm, int ra, int data, gen_helper_gvec_4 *fn)
739 {
740     tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
741                        vec_full_reg_offset(s, rn),
742                        vec_full_reg_offset(s, rm),
743                        vec_full_reg_offset(s, ra),
744                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
745 }
746 
747 /*
748  * Expand a 4-operand operation using an out-of-line helper that takes
749  * a pointer to the CPU env.
750  */
751 static void gen_gvec_op4_env(DisasContext *s, bool is_q, int rd, int rn,
752                              int rm, int ra, int data,
753                              gen_helper_gvec_4_ptr *fn)
754 {
755     tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
756                        vec_full_reg_offset(s, rn),
757                        vec_full_reg_offset(s, rm),
758                        vec_full_reg_offset(s, ra),
759                        tcg_env,
760                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
761 }
762 
763 /*
764  * Expand a 4-operand + fpstatus pointer + simd data value operation using
765  * an out-of-line helper.
766  */
767 static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn,
768                               int rm, int ra, bool is_fp16, int data,
769                               gen_helper_gvec_4_ptr *fn)
770 {
771     TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
772     tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
773                        vec_full_reg_offset(s, rn),
774                        vec_full_reg_offset(s, rm),
775                        vec_full_reg_offset(s, ra), fpst,
776                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
777 }
778 
779 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
780  * than the 32 bit equivalent.
781  */
782 static inline void gen_set_NZ64(TCGv_i64 result)
783 {
784     tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
785     tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
786 }
787 
788 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
789 static inline void gen_logic_CC(int sf, TCGv_i64 result)
790 {
791     if (sf) {
792         gen_set_NZ64(result);
793     } else {
794         tcg_gen_extrl_i64_i32(cpu_ZF, result);
795         tcg_gen_mov_i32(cpu_NF, cpu_ZF);
796     }
797     tcg_gen_movi_i32(cpu_CF, 0);
798     tcg_gen_movi_i32(cpu_VF, 0);
799 }
800 
801 /* dest = T0 + T1; compute C, N, V and Z flags */
802 static void gen_add64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
803 {
804     TCGv_i64 result, flag, tmp;
805     result = tcg_temp_new_i64();
806     flag = tcg_temp_new_i64();
807     tmp = tcg_temp_new_i64();
808 
809     tcg_gen_movi_i64(tmp, 0);
810     tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
811 
812     tcg_gen_extrl_i64_i32(cpu_CF, flag);
813 
814     gen_set_NZ64(result);
815 
816     tcg_gen_xor_i64(flag, result, t0);
817     tcg_gen_xor_i64(tmp, t0, t1);
818     tcg_gen_andc_i64(flag, flag, tmp);
819     tcg_gen_extrh_i64_i32(cpu_VF, flag);
820 
821     tcg_gen_mov_i64(dest, result);
822 }
823 
824 static void gen_add32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
825 {
826     TCGv_i32 t0_32 = tcg_temp_new_i32();
827     TCGv_i32 t1_32 = tcg_temp_new_i32();
828     TCGv_i32 tmp = tcg_temp_new_i32();
829 
830     tcg_gen_movi_i32(tmp, 0);
831     tcg_gen_extrl_i64_i32(t0_32, t0);
832     tcg_gen_extrl_i64_i32(t1_32, t1);
833     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
834     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
835     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
836     tcg_gen_xor_i32(tmp, t0_32, t1_32);
837     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
838     tcg_gen_extu_i32_i64(dest, cpu_NF);
839 }
840 
841 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
842 {
843     if (sf) {
844         gen_add64_CC(dest, t0, t1);
845     } else {
846         gen_add32_CC(dest, t0, t1);
847     }
848 }
849 
850 /* dest = T0 - T1; compute C, N, V and Z flags */
851 static void gen_sub64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
852 {
853     /* 64 bit arithmetic */
854     TCGv_i64 result, flag, tmp;
855 
856     result = tcg_temp_new_i64();
857     flag = tcg_temp_new_i64();
858     tcg_gen_sub_i64(result, t0, t1);
859 
860     gen_set_NZ64(result);
861 
862     tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
863     tcg_gen_extrl_i64_i32(cpu_CF, flag);
864 
865     tcg_gen_xor_i64(flag, result, t0);
866     tmp = tcg_temp_new_i64();
867     tcg_gen_xor_i64(tmp, t0, t1);
868     tcg_gen_and_i64(flag, flag, tmp);
869     tcg_gen_extrh_i64_i32(cpu_VF, flag);
870     tcg_gen_mov_i64(dest, result);
871 }
872 
873 static void gen_sub32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
874 {
875     /* 32 bit arithmetic */
876     TCGv_i32 t0_32 = tcg_temp_new_i32();
877     TCGv_i32 t1_32 = tcg_temp_new_i32();
878     TCGv_i32 tmp;
879 
880     tcg_gen_extrl_i64_i32(t0_32, t0);
881     tcg_gen_extrl_i64_i32(t1_32, t1);
882     tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
883     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
884     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
885     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
886     tmp = tcg_temp_new_i32();
887     tcg_gen_xor_i32(tmp, t0_32, t1_32);
888     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
889     tcg_gen_extu_i32_i64(dest, cpu_NF);
890 }
891 
892 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
893 {
894     if (sf) {
895         gen_sub64_CC(dest, t0, t1);
896     } else {
897         gen_sub32_CC(dest, t0, t1);
898     }
899 }
900 
901 /* dest = T0 + T1 + CF; do not compute flags. */
902 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
903 {
904     TCGv_i64 flag = tcg_temp_new_i64();
905     tcg_gen_extu_i32_i64(flag, cpu_CF);
906     tcg_gen_add_i64(dest, t0, t1);
907     tcg_gen_add_i64(dest, dest, flag);
908 
909     if (!sf) {
910         tcg_gen_ext32u_i64(dest, dest);
911     }
912 }
913 
914 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
915 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
916 {
917     if (sf) {
918         TCGv_i64 result = tcg_temp_new_i64();
919         TCGv_i64 cf_64 = tcg_temp_new_i64();
920         TCGv_i64 vf_64 = tcg_temp_new_i64();
921         TCGv_i64 tmp = tcg_temp_new_i64();
922         TCGv_i64 zero = tcg_constant_i64(0);
923 
924         tcg_gen_extu_i32_i64(cf_64, cpu_CF);
925         tcg_gen_add2_i64(result, cf_64, t0, zero, cf_64, zero);
926         tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, zero);
927         tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
928         gen_set_NZ64(result);
929 
930         tcg_gen_xor_i64(vf_64, result, t0);
931         tcg_gen_xor_i64(tmp, t0, t1);
932         tcg_gen_andc_i64(vf_64, vf_64, tmp);
933         tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
934 
935         tcg_gen_mov_i64(dest, result);
936     } else {
937         TCGv_i32 t0_32 = tcg_temp_new_i32();
938         TCGv_i32 t1_32 = tcg_temp_new_i32();
939         TCGv_i32 tmp = tcg_temp_new_i32();
940         TCGv_i32 zero = tcg_constant_i32(0);
941 
942         tcg_gen_extrl_i64_i32(t0_32, t0);
943         tcg_gen_extrl_i64_i32(t1_32, t1);
944         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, zero, cpu_CF, zero);
945         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, zero);
946 
947         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
948         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
949         tcg_gen_xor_i32(tmp, t0_32, t1_32);
950         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
951         tcg_gen_extu_i32_i64(dest, cpu_NF);
952     }
953 }
954 
955 /*
956  * Load/Store generators
957  */
958 
959 /*
960  * Store from GPR register to memory.
961  */
962 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
963                              TCGv_i64 tcg_addr, MemOp memop, int memidx,
964                              bool iss_valid,
965                              unsigned int iss_srt,
966                              bool iss_sf, bool iss_ar)
967 {
968     tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop);
969 
970     if (iss_valid) {
971         uint32_t syn;
972 
973         syn = syn_data_abort_with_iss(0,
974                                       (memop & MO_SIZE),
975                                       false,
976                                       iss_srt,
977                                       iss_sf,
978                                       iss_ar,
979                                       0, 0, 0, 0, 0, false);
980         disas_set_insn_syndrome(s, syn);
981     }
982 }
983 
984 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
985                       TCGv_i64 tcg_addr, MemOp memop,
986                       bool iss_valid,
987                       unsigned int iss_srt,
988                       bool iss_sf, bool iss_ar)
989 {
990     do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s),
991                      iss_valid, iss_srt, iss_sf, iss_ar);
992 }
993 
994 /*
995  * Load from memory to GPR register
996  */
997 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
998                              MemOp memop, bool extend, int memidx,
999                              bool iss_valid, unsigned int iss_srt,
1000                              bool iss_sf, bool iss_ar)
1001 {
1002     tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
1003 
1004     if (extend && (memop & MO_SIGN)) {
1005         g_assert((memop & MO_SIZE) <= MO_32);
1006         tcg_gen_ext32u_i64(dest, dest);
1007     }
1008 
1009     if (iss_valid) {
1010         uint32_t syn;
1011 
1012         syn = syn_data_abort_with_iss(0,
1013                                       (memop & MO_SIZE),
1014                                       (memop & MO_SIGN) != 0,
1015                                       iss_srt,
1016                                       iss_sf,
1017                                       iss_ar,
1018                                       0, 0, 0, 0, 0, false);
1019         disas_set_insn_syndrome(s, syn);
1020     }
1021 }
1022 
1023 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
1024                       MemOp memop, bool extend,
1025                       bool iss_valid, unsigned int iss_srt,
1026                       bool iss_sf, bool iss_ar)
1027 {
1028     do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s),
1029                      iss_valid, iss_srt, iss_sf, iss_ar);
1030 }
1031 
1032 /*
1033  * Store from FP register to memory
1034  */
1035 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, MemOp mop)
1036 {
1037     /* This writes the bottom N bits of a 128 bit wide vector to memory */
1038     TCGv_i64 tmplo = tcg_temp_new_i64();
1039 
1040     tcg_gen_ld_i64(tmplo, tcg_env, fp_reg_offset(s, srcidx, MO_64));
1041 
1042     if ((mop & MO_SIZE) < MO_128) {
1043         tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop);
1044     } else {
1045         TCGv_i64 tmphi = tcg_temp_new_i64();
1046         TCGv_i128 t16 = tcg_temp_new_i128();
1047 
1048         tcg_gen_ld_i64(tmphi, tcg_env, fp_reg_hi_offset(s, srcidx));
1049         tcg_gen_concat_i64_i128(t16, tmplo, tmphi);
1050 
1051         tcg_gen_qemu_st_i128(t16, tcg_addr, get_mem_index(s), mop);
1052     }
1053 }
1054 
1055 /*
1056  * Load from memory to FP register
1057  */
1058 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, MemOp mop)
1059 {
1060     /* This always zero-extends and writes to a full 128 bit wide vector */
1061     TCGv_i64 tmplo = tcg_temp_new_i64();
1062     TCGv_i64 tmphi = NULL;
1063 
1064     if ((mop & MO_SIZE) < MO_128) {
1065         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop);
1066     } else {
1067         TCGv_i128 t16 = tcg_temp_new_i128();
1068 
1069         tcg_gen_qemu_ld_i128(t16, tcg_addr, get_mem_index(s), mop);
1070 
1071         tmphi = tcg_temp_new_i64();
1072         tcg_gen_extr_i128_i64(tmplo, tmphi, t16);
1073     }
1074 
1075     tcg_gen_st_i64(tmplo, tcg_env, fp_reg_offset(s, destidx, MO_64));
1076 
1077     if (tmphi) {
1078         tcg_gen_st_i64(tmphi, tcg_env, fp_reg_hi_offset(s, destidx));
1079     }
1080     clear_vec_high(s, tmphi != NULL, destidx);
1081 }
1082 
1083 /*
1084  * Vector load/store helpers.
1085  *
1086  * The principal difference between this and a FP load is that we don't
1087  * zero extend as we are filling a partial chunk of the vector register.
1088  * These functions don't support 128 bit loads/stores, which would be
1089  * normal load/store operations.
1090  *
1091  * The _i32 versions are useful when operating on 32 bit quantities
1092  * (eg for floating point single or using Neon helper functions).
1093  */
1094 
1095 /* Get value of an element within a vector register */
1096 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
1097                              int element, MemOp memop)
1098 {
1099     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1100     switch ((unsigned)memop) {
1101     case MO_8:
1102         tcg_gen_ld8u_i64(tcg_dest, tcg_env, vect_off);
1103         break;
1104     case MO_16:
1105         tcg_gen_ld16u_i64(tcg_dest, tcg_env, vect_off);
1106         break;
1107     case MO_32:
1108         tcg_gen_ld32u_i64(tcg_dest, tcg_env, vect_off);
1109         break;
1110     case MO_8|MO_SIGN:
1111         tcg_gen_ld8s_i64(tcg_dest, tcg_env, vect_off);
1112         break;
1113     case MO_16|MO_SIGN:
1114         tcg_gen_ld16s_i64(tcg_dest, tcg_env, vect_off);
1115         break;
1116     case MO_32|MO_SIGN:
1117         tcg_gen_ld32s_i64(tcg_dest, tcg_env, vect_off);
1118         break;
1119     case MO_64:
1120     case MO_64|MO_SIGN:
1121         tcg_gen_ld_i64(tcg_dest, tcg_env, vect_off);
1122         break;
1123     default:
1124         g_assert_not_reached();
1125     }
1126 }
1127 
1128 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
1129                                  int element, MemOp memop)
1130 {
1131     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1132     switch (memop) {
1133     case MO_8:
1134         tcg_gen_ld8u_i32(tcg_dest, tcg_env, vect_off);
1135         break;
1136     case MO_16:
1137         tcg_gen_ld16u_i32(tcg_dest, tcg_env, vect_off);
1138         break;
1139     case MO_8|MO_SIGN:
1140         tcg_gen_ld8s_i32(tcg_dest, tcg_env, vect_off);
1141         break;
1142     case MO_16|MO_SIGN:
1143         tcg_gen_ld16s_i32(tcg_dest, tcg_env, vect_off);
1144         break;
1145     case MO_32:
1146     case MO_32|MO_SIGN:
1147         tcg_gen_ld_i32(tcg_dest, tcg_env, vect_off);
1148         break;
1149     default:
1150         g_assert_not_reached();
1151     }
1152 }
1153 
1154 /* Set value of an element within a vector register */
1155 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1156                               int element, MemOp memop)
1157 {
1158     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1159     switch (memop) {
1160     case MO_8:
1161         tcg_gen_st8_i64(tcg_src, tcg_env, vect_off);
1162         break;
1163     case MO_16:
1164         tcg_gen_st16_i64(tcg_src, tcg_env, vect_off);
1165         break;
1166     case MO_32:
1167         tcg_gen_st32_i64(tcg_src, tcg_env, vect_off);
1168         break;
1169     case MO_64:
1170         tcg_gen_st_i64(tcg_src, tcg_env, vect_off);
1171         break;
1172     default:
1173         g_assert_not_reached();
1174     }
1175 }
1176 
1177 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1178                                   int destidx, int element, MemOp memop)
1179 {
1180     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1181     switch (memop) {
1182     case MO_8:
1183         tcg_gen_st8_i32(tcg_src, tcg_env, vect_off);
1184         break;
1185     case MO_16:
1186         tcg_gen_st16_i32(tcg_src, tcg_env, vect_off);
1187         break;
1188     case MO_32:
1189         tcg_gen_st_i32(tcg_src, tcg_env, vect_off);
1190         break;
1191     default:
1192         g_assert_not_reached();
1193     }
1194 }
1195 
1196 /* Store from vector register to memory */
1197 static void do_vec_st(DisasContext *s, int srcidx, int element,
1198                       TCGv_i64 tcg_addr, MemOp mop)
1199 {
1200     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1201 
1202     read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE);
1203     tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1204 }
1205 
1206 /* Load from memory to vector register */
1207 static void do_vec_ld(DisasContext *s, int destidx, int element,
1208                       TCGv_i64 tcg_addr, MemOp mop)
1209 {
1210     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1211 
1212     tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1213     write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE);
1214 }
1215 
1216 /* Check that FP/Neon access is enabled. If it is, return
1217  * true. If not, emit code to generate an appropriate exception,
1218  * and return false; the caller should not emit any code for
1219  * the instruction. Note that this check must happen after all
1220  * unallocated-encoding checks (otherwise the syndrome information
1221  * for the resulting exception will be incorrect).
1222  */
1223 static bool fp_access_check_only(DisasContext *s)
1224 {
1225     if (s->fp_excp_el) {
1226         assert(!s->fp_access_checked);
1227         s->fp_access_checked = true;
1228 
1229         gen_exception_insn_el(s, 0, EXCP_UDEF,
1230                               syn_fp_access_trap(1, 0xe, false, 0),
1231                               s->fp_excp_el);
1232         return false;
1233     }
1234     s->fp_access_checked = true;
1235     return true;
1236 }
1237 
1238 static bool fp_access_check(DisasContext *s)
1239 {
1240     if (!fp_access_check_only(s)) {
1241         return false;
1242     }
1243     if (s->sme_trap_nonstreaming && s->is_nonstreaming) {
1244         gen_exception_insn(s, 0, EXCP_UDEF,
1245                            syn_smetrap(SME_ET_Streaming, false));
1246         return false;
1247     }
1248     return true;
1249 }
1250 
1251 /*
1252  * Return <0 for non-supported element sizes, with MO_16 controlled by
1253  * FEAT_FP16; return 0 for fp disabled; otherwise return >0 for success.
1254  */
1255 static int fp_access_check_scalar_hsd(DisasContext *s, MemOp esz)
1256 {
1257     switch (esz) {
1258     case MO_64:
1259     case MO_32:
1260         break;
1261     case MO_16:
1262         if (!dc_isar_feature(aa64_fp16, s)) {
1263             return -1;
1264         }
1265         break;
1266     default:
1267         return -1;
1268     }
1269     return fp_access_check(s);
1270 }
1271 
1272 /* Likewise, but vector MO_64 must have two elements. */
1273 static int fp_access_check_vector_hsd(DisasContext *s, bool is_q, MemOp esz)
1274 {
1275     switch (esz) {
1276     case MO_64:
1277         if (!is_q) {
1278             return -1;
1279         }
1280         break;
1281     case MO_32:
1282         break;
1283     case MO_16:
1284         if (!dc_isar_feature(aa64_fp16, s)) {
1285             return -1;
1286         }
1287         break;
1288     default:
1289         return -1;
1290     }
1291     return fp_access_check(s);
1292 }
1293 
1294 /*
1295  * Check that SVE access is enabled.  If it is, return true.
1296  * If not, emit code to generate an appropriate exception and return false.
1297  * This function corresponds to CheckSVEEnabled().
1298  */
1299 bool sve_access_check(DisasContext *s)
1300 {
1301     if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) {
1302         assert(dc_isar_feature(aa64_sme, s));
1303         if (!sme_sm_enabled_check(s)) {
1304             goto fail_exit;
1305         }
1306     } else if (s->sve_excp_el) {
1307         gen_exception_insn_el(s, 0, EXCP_UDEF,
1308                               syn_sve_access_trap(), s->sve_excp_el);
1309         goto fail_exit;
1310     }
1311     s->sve_access_checked = true;
1312     return fp_access_check(s);
1313 
1314  fail_exit:
1315     /* Assert that we only raise one exception per instruction. */
1316     assert(!s->sve_access_checked);
1317     s->sve_access_checked = true;
1318     return false;
1319 }
1320 
1321 /*
1322  * Check that SME access is enabled, raise an exception if not.
1323  * Note that this function corresponds to CheckSMEAccess and is
1324  * only used directly for cpregs.
1325  */
1326 static bool sme_access_check(DisasContext *s)
1327 {
1328     if (s->sme_excp_el) {
1329         gen_exception_insn_el(s, 0, EXCP_UDEF,
1330                               syn_smetrap(SME_ET_AccessTrap, false),
1331                               s->sme_excp_el);
1332         return false;
1333     }
1334     return true;
1335 }
1336 
1337 /* This function corresponds to CheckSMEEnabled. */
1338 bool sme_enabled_check(DisasContext *s)
1339 {
1340     /*
1341      * Note that unlike sve_excp_el, we have not constrained sme_excp_el
1342      * to be zero when fp_excp_el has priority.  This is because we need
1343      * sme_excp_el by itself for cpregs access checks.
1344      */
1345     if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) {
1346         s->fp_access_checked = true;
1347         return sme_access_check(s);
1348     }
1349     return fp_access_check_only(s);
1350 }
1351 
1352 /* Common subroutine for CheckSMEAnd*Enabled. */
1353 bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req)
1354 {
1355     if (!sme_enabled_check(s)) {
1356         return false;
1357     }
1358     if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) {
1359         gen_exception_insn(s, 0, EXCP_UDEF,
1360                            syn_smetrap(SME_ET_NotStreaming, false));
1361         return false;
1362     }
1363     if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) {
1364         gen_exception_insn(s, 0, EXCP_UDEF,
1365                            syn_smetrap(SME_ET_InactiveZA, false));
1366         return false;
1367     }
1368     return true;
1369 }
1370 
1371 /*
1372  * Expanders for AdvSIMD translation functions.
1373  */
1374 
1375 static bool do_gvec_op2_ool(DisasContext *s, arg_qrr_e *a, int data,
1376                             gen_helper_gvec_2 *fn)
1377 {
1378     if (!a->q && a->esz == MO_64) {
1379         return false;
1380     }
1381     if (fp_access_check(s)) {
1382         gen_gvec_op2_ool(s, a->q, a->rd, a->rn, data, fn);
1383     }
1384     return true;
1385 }
1386 
1387 static bool do_gvec_op3_ool(DisasContext *s, arg_qrrr_e *a, int data,
1388                             gen_helper_gvec_3 *fn)
1389 {
1390     if (!a->q && a->esz == MO_64) {
1391         return false;
1392     }
1393     if (fp_access_check(s)) {
1394         gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, data, fn);
1395     }
1396     return true;
1397 }
1398 
1399 static bool do_gvec_fn3(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
1400 {
1401     if (!a->q && a->esz == MO_64) {
1402         return false;
1403     }
1404     if (fp_access_check(s)) {
1405         gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz);
1406     }
1407     return true;
1408 }
1409 
1410 static bool do_gvec_fn3_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
1411 {
1412     if (a->esz == MO_64) {
1413         return false;
1414     }
1415     if (fp_access_check(s)) {
1416         gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz);
1417     }
1418     return true;
1419 }
1420 
1421 static bool do_gvec_fn3_no8_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
1422 {
1423     if (a->esz == MO_8) {
1424         return false;
1425     }
1426     return do_gvec_fn3_no64(s, a, fn);
1427 }
1428 
1429 static bool do_gvec_fn4(DisasContext *s, arg_qrrrr_e *a, GVecGen4Fn *fn)
1430 {
1431     if (!a->q && a->esz == MO_64) {
1432         return false;
1433     }
1434     if (fp_access_check(s)) {
1435         gen_gvec_fn4(s, a->q, a->rd, a->rn, a->rm, a->ra, fn, a->esz);
1436     }
1437     return true;
1438 }
1439 
1440 /*
1441  * This utility function is for doing register extension with an
1442  * optional shift. You will likely want to pass a temporary for the
1443  * destination register. See DecodeRegExtend() in the ARM ARM.
1444  */
1445 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1446                               int option, unsigned int shift)
1447 {
1448     int extsize = extract32(option, 0, 2);
1449     bool is_signed = extract32(option, 2, 1);
1450 
1451     tcg_gen_ext_i64(tcg_out, tcg_in, extsize | (is_signed ? MO_SIGN : 0));
1452     tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1453 }
1454 
1455 static inline void gen_check_sp_alignment(DisasContext *s)
1456 {
1457     /* The AArch64 architecture mandates that (if enabled via PSTATE
1458      * or SCTLR bits) there is a check that SP is 16-aligned on every
1459      * SP-relative load or store (with an exception generated if it is not).
1460      * In line with general QEMU practice regarding misaligned accesses,
1461      * we omit these checks for the sake of guest program performance.
1462      * This function is provided as a hook so we can more easily add these
1463      * checks in future (possibly as a "favour catching guest program bugs
1464      * over speed" user selectable option).
1465      */
1466 }
1467 
1468 /*
1469  * The instruction disassembly implemented here matches
1470  * the instruction encoding classifications in chapter C4
1471  * of the ARM Architecture Reference Manual (DDI0487B_a);
1472  * classification names and decode diagrams here should generally
1473  * match up with those in the manual.
1474  */
1475 
1476 static bool trans_B(DisasContext *s, arg_i *a)
1477 {
1478     reset_btype(s);
1479     gen_goto_tb(s, 0, a->imm);
1480     return true;
1481 }
1482 
1483 static bool trans_BL(DisasContext *s, arg_i *a)
1484 {
1485     gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s));
1486     reset_btype(s);
1487     gen_goto_tb(s, 0, a->imm);
1488     return true;
1489 }
1490 
1491 
1492 static bool trans_CBZ(DisasContext *s, arg_cbz *a)
1493 {
1494     DisasLabel match;
1495     TCGv_i64 tcg_cmp;
1496 
1497     tcg_cmp = read_cpu_reg(s, a->rt, a->sf);
1498     reset_btype(s);
1499 
1500     match = gen_disas_label(s);
1501     tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1502                         tcg_cmp, 0, match.label);
1503     gen_goto_tb(s, 0, 4);
1504     set_disas_label(s, match);
1505     gen_goto_tb(s, 1, a->imm);
1506     return true;
1507 }
1508 
1509 static bool trans_TBZ(DisasContext *s, arg_tbz *a)
1510 {
1511     DisasLabel match;
1512     TCGv_i64 tcg_cmp;
1513 
1514     tcg_cmp = tcg_temp_new_i64();
1515     tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, a->rt), 1ULL << a->bitpos);
1516 
1517     reset_btype(s);
1518 
1519     match = gen_disas_label(s);
1520     tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1521                         tcg_cmp, 0, match.label);
1522     gen_goto_tb(s, 0, 4);
1523     set_disas_label(s, match);
1524     gen_goto_tb(s, 1, a->imm);
1525     return true;
1526 }
1527 
1528 static bool trans_B_cond(DisasContext *s, arg_B_cond *a)
1529 {
1530     /* BC.cond is only present with FEAT_HBC */
1531     if (a->c && !dc_isar_feature(aa64_hbc, s)) {
1532         return false;
1533     }
1534     reset_btype(s);
1535     if (a->cond < 0x0e) {
1536         /* genuinely conditional branches */
1537         DisasLabel match = gen_disas_label(s);
1538         arm_gen_test_cc(a->cond, match.label);
1539         gen_goto_tb(s, 0, 4);
1540         set_disas_label(s, match);
1541         gen_goto_tb(s, 1, a->imm);
1542     } else {
1543         /* 0xe and 0xf are both "always" conditions */
1544         gen_goto_tb(s, 0, a->imm);
1545     }
1546     return true;
1547 }
1548 
1549 static void set_btype_for_br(DisasContext *s, int rn)
1550 {
1551     if (dc_isar_feature(aa64_bti, s)) {
1552         /* BR to {x16,x17} or !guard -> 1, else 3.  */
1553         if (rn == 16 || rn == 17) {
1554             set_btype(s, 1);
1555         } else {
1556             TCGv_i64 pc = tcg_temp_new_i64();
1557             gen_pc_plus_diff(s, pc, 0);
1558             gen_helper_guarded_page_br(tcg_env, pc);
1559             s->btype = -1;
1560         }
1561     }
1562 }
1563 
1564 static void set_btype_for_blr(DisasContext *s)
1565 {
1566     if (dc_isar_feature(aa64_bti, s)) {
1567         /* BLR sets BTYPE to 2, regardless of source guarded page.  */
1568         set_btype(s, 2);
1569     }
1570 }
1571 
1572 static bool trans_BR(DisasContext *s, arg_r *a)
1573 {
1574     set_btype_for_br(s, a->rn);
1575     gen_a64_set_pc(s, cpu_reg(s, a->rn));
1576     s->base.is_jmp = DISAS_JUMP;
1577     return true;
1578 }
1579 
1580 static bool trans_BLR(DisasContext *s, arg_r *a)
1581 {
1582     TCGv_i64 dst = cpu_reg(s, a->rn);
1583     TCGv_i64 lr = cpu_reg(s, 30);
1584     if (dst == lr) {
1585         TCGv_i64 tmp = tcg_temp_new_i64();
1586         tcg_gen_mov_i64(tmp, dst);
1587         dst = tmp;
1588     }
1589     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1590     gen_a64_set_pc(s, dst);
1591     set_btype_for_blr(s);
1592     s->base.is_jmp = DISAS_JUMP;
1593     return true;
1594 }
1595 
1596 static bool trans_RET(DisasContext *s, arg_r *a)
1597 {
1598     gen_a64_set_pc(s, cpu_reg(s, a->rn));
1599     s->base.is_jmp = DISAS_JUMP;
1600     return true;
1601 }
1602 
1603 static TCGv_i64 auth_branch_target(DisasContext *s, TCGv_i64 dst,
1604                                    TCGv_i64 modifier, bool use_key_a)
1605 {
1606     TCGv_i64 truedst;
1607     /*
1608      * Return the branch target for a BRAA/RETA/etc, which is either
1609      * just the destination dst, or that value with the pauth check
1610      * done and the code removed from the high bits.
1611      */
1612     if (!s->pauth_active) {
1613         return dst;
1614     }
1615 
1616     truedst = tcg_temp_new_i64();
1617     if (use_key_a) {
1618         gen_helper_autia_combined(truedst, tcg_env, dst, modifier);
1619     } else {
1620         gen_helper_autib_combined(truedst, tcg_env, dst, modifier);
1621     }
1622     return truedst;
1623 }
1624 
1625 static bool trans_BRAZ(DisasContext *s, arg_braz *a)
1626 {
1627     TCGv_i64 dst;
1628 
1629     if (!dc_isar_feature(aa64_pauth, s)) {
1630         return false;
1631     }
1632 
1633     dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1634     set_btype_for_br(s, a->rn);
1635     gen_a64_set_pc(s, dst);
1636     s->base.is_jmp = DISAS_JUMP;
1637     return true;
1638 }
1639 
1640 static bool trans_BLRAZ(DisasContext *s, arg_braz *a)
1641 {
1642     TCGv_i64 dst, lr;
1643 
1644     if (!dc_isar_feature(aa64_pauth, s)) {
1645         return false;
1646     }
1647 
1648     dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1649     lr = cpu_reg(s, 30);
1650     if (dst == lr) {
1651         TCGv_i64 tmp = tcg_temp_new_i64();
1652         tcg_gen_mov_i64(tmp, dst);
1653         dst = tmp;
1654     }
1655     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1656     gen_a64_set_pc(s, dst);
1657     set_btype_for_blr(s);
1658     s->base.is_jmp = DISAS_JUMP;
1659     return true;
1660 }
1661 
1662 static bool trans_RETA(DisasContext *s, arg_reta *a)
1663 {
1664     TCGv_i64 dst;
1665 
1666     dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m);
1667     gen_a64_set_pc(s, dst);
1668     s->base.is_jmp = DISAS_JUMP;
1669     return true;
1670 }
1671 
1672 static bool trans_BRA(DisasContext *s, arg_bra *a)
1673 {
1674     TCGv_i64 dst;
1675 
1676     if (!dc_isar_feature(aa64_pauth, s)) {
1677         return false;
1678     }
1679     dst = auth_branch_target(s, cpu_reg(s,a->rn), cpu_reg_sp(s, a->rm), !a->m);
1680     gen_a64_set_pc(s, dst);
1681     set_btype_for_br(s, a->rn);
1682     s->base.is_jmp = DISAS_JUMP;
1683     return true;
1684 }
1685 
1686 static bool trans_BLRA(DisasContext *s, arg_bra *a)
1687 {
1688     TCGv_i64 dst, lr;
1689 
1690     if (!dc_isar_feature(aa64_pauth, s)) {
1691         return false;
1692     }
1693     dst = auth_branch_target(s, cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm), !a->m);
1694     lr = cpu_reg(s, 30);
1695     if (dst == lr) {
1696         TCGv_i64 tmp = tcg_temp_new_i64();
1697         tcg_gen_mov_i64(tmp, dst);
1698         dst = tmp;
1699     }
1700     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1701     gen_a64_set_pc(s, dst);
1702     set_btype_for_blr(s);
1703     s->base.is_jmp = DISAS_JUMP;
1704     return true;
1705 }
1706 
1707 static bool trans_ERET(DisasContext *s, arg_ERET *a)
1708 {
1709     TCGv_i64 dst;
1710 
1711     if (s->current_el == 0) {
1712         return false;
1713     }
1714     if (s->trap_eret) {
1715         gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(0), 2);
1716         return true;
1717     }
1718     dst = tcg_temp_new_i64();
1719     tcg_gen_ld_i64(dst, tcg_env,
1720                    offsetof(CPUARMState, elr_el[s->current_el]));
1721 
1722     translator_io_start(&s->base);
1723 
1724     gen_helper_exception_return(tcg_env, dst);
1725     /* Must exit loop to check un-masked IRQs */
1726     s->base.is_jmp = DISAS_EXIT;
1727     return true;
1728 }
1729 
1730 static bool trans_ERETA(DisasContext *s, arg_reta *a)
1731 {
1732     TCGv_i64 dst;
1733 
1734     if (!dc_isar_feature(aa64_pauth, s)) {
1735         return false;
1736     }
1737     if (s->current_el == 0) {
1738         return false;
1739     }
1740     /* The FGT trap takes precedence over an auth trap. */
1741     if (s->trap_eret) {
1742         gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(a->m ? 3 : 2), 2);
1743         return true;
1744     }
1745     dst = tcg_temp_new_i64();
1746     tcg_gen_ld_i64(dst, tcg_env,
1747                    offsetof(CPUARMState, elr_el[s->current_el]));
1748 
1749     dst = auth_branch_target(s, dst, cpu_X[31], !a->m);
1750 
1751     translator_io_start(&s->base);
1752 
1753     gen_helper_exception_return(tcg_env, dst);
1754     /* Must exit loop to check un-masked IRQs */
1755     s->base.is_jmp = DISAS_EXIT;
1756     return true;
1757 }
1758 
1759 static bool trans_NOP(DisasContext *s, arg_NOP *a)
1760 {
1761     return true;
1762 }
1763 
1764 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
1765 {
1766     /*
1767      * When running in MTTCG we don't generate jumps to the yield and
1768      * WFE helpers as it won't affect the scheduling of other vCPUs.
1769      * If we wanted to more completely model WFE/SEV so we don't busy
1770      * spin unnecessarily we would need to do something more involved.
1771      */
1772     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1773         s->base.is_jmp = DISAS_YIELD;
1774     }
1775     return true;
1776 }
1777 
1778 static bool trans_WFI(DisasContext *s, arg_WFI *a)
1779 {
1780     s->base.is_jmp = DISAS_WFI;
1781     return true;
1782 }
1783 
1784 static bool trans_WFE(DisasContext *s, arg_WFI *a)
1785 {
1786     /*
1787      * When running in MTTCG we don't generate jumps to the yield and
1788      * WFE helpers as it won't affect the scheduling of other vCPUs.
1789      * If we wanted to more completely model WFE/SEV so we don't busy
1790      * spin unnecessarily we would need to do something more involved.
1791      */
1792     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1793         s->base.is_jmp = DISAS_WFE;
1794     }
1795     return true;
1796 }
1797 
1798 static bool trans_WFIT(DisasContext *s, arg_WFIT *a)
1799 {
1800     if (!dc_isar_feature(aa64_wfxt, s)) {
1801         return false;
1802     }
1803 
1804     /*
1805      * Because we need to pass the register value to the helper,
1806      * it's easier to emit the code now, unlike trans_WFI which
1807      * defers it to aarch64_tr_tb_stop(). That means we need to
1808      * check ss_active so that single-stepping a WFIT doesn't halt.
1809      */
1810     if (s->ss_active) {
1811         /* Act like a NOP under architectural singlestep */
1812         return true;
1813     }
1814 
1815     gen_a64_update_pc(s, 4);
1816     gen_helper_wfit(tcg_env, cpu_reg(s, a->rd));
1817     /* Go back to the main loop to check for interrupts */
1818     s->base.is_jmp = DISAS_EXIT;
1819     return true;
1820 }
1821 
1822 static bool trans_WFET(DisasContext *s, arg_WFET *a)
1823 {
1824     if (!dc_isar_feature(aa64_wfxt, s)) {
1825         return false;
1826     }
1827 
1828     /*
1829      * We rely here on our WFE implementation being a NOP, so we
1830      * don't need to do anything different to handle the WFET timeout
1831      * from what trans_WFE does.
1832      */
1833     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1834         s->base.is_jmp = DISAS_WFE;
1835     }
1836     return true;
1837 }
1838 
1839 static bool trans_XPACLRI(DisasContext *s, arg_XPACLRI *a)
1840 {
1841     if (s->pauth_active) {
1842         gen_helper_xpaci(cpu_X[30], tcg_env, cpu_X[30]);
1843     }
1844     return true;
1845 }
1846 
1847 static bool trans_PACIA1716(DisasContext *s, arg_PACIA1716 *a)
1848 {
1849     if (s->pauth_active) {
1850         gen_helper_pacia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
1851     }
1852     return true;
1853 }
1854 
1855 static bool trans_PACIB1716(DisasContext *s, arg_PACIB1716 *a)
1856 {
1857     if (s->pauth_active) {
1858         gen_helper_pacib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
1859     }
1860     return true;
1861 }
1862 
1863 static bool trans_AUTIA1716(DisasContext *s, arg_AUTIA1716 *a)
1864 {
1865     if (s->pauth_active) {
1866         gen_helper_autia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
1867     }
1868     return true;
1869 }
1870 
1871 static bool trans_AUTIB1716(DisasContext *s, arg_AUTIB1716 *a)
1872 {
1873     if (s->pauth_active) {
1874         gen_helper_autib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
1875     }
1876     return true;
1877 }
1878 
1879 static bool trans_ESB(DisasContext *s, arg_ESB *a)
1880 {
1881     /* Without RAS, we must implement this as NOP. */
1882     if (dc_isar_feature(aa64_ras, s)) {
1883         /*
1884          * QEMU does not have a source of physical SErrors,
1885          * so we are only concerned with virtual SErrors.
1886          * The pseudocode in the ARM for this case is
1887          *   if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
1888          *      AArch64.vESBOperation();
1889          * Most of the condition can be evaluated at translation time.
1890          * Test for EL2 present, and defer test for SEL2 to runtime.
1891          */
1892         if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
1893             gen_helper_vesb(tcg_env);
1894         }
1895     }
1896     return true;
1897 }
1898 
1899 static bool trans_PACIAZ(DisasContext *s, arg_PACIAZ *a)
1900 {
1901     if (s->pauth_active) {
1902         gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
1903     }
1904     return true;
1905 }
1906 
1907 static bool trans_PACIASP(DisasContext *s, arg_PACIASP *a)
1908 {
1909     if (s->pauth_active) {
1910         gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
1911     }
1912     return true;
1913 }
1914 
1915 static bool trans_PACIBZ(DisasContext *s, arg_PACIBZ *a)
1916 {
1917     if (s->pauth_active) {
1918         gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
1919     }
1920     return true;
1921 }
1922 
1923 static bool trans_PACIBSP(DisasContext *s, arg_PACIBSP *a)
1924 {
1925     if (s->pauth_active) {
1926         gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
1927     }
1928     return true;
1929 }
1930 
1931 static bool trans_AUTIAZ(DisasContext *s, arg_AUTIAZ *a)
1932 {
1933     if (s->pauth_active) {
1934         gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
1935     }
1936     return true;
1937 }
1938 
1939 static bool trans_AUTIASP(DisasContext *s, arg_AUTIASP *a)
1940 {
1941     if (s->pauth_active) {
1942         gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
1943     }
1944     return true;
1945 }
1946 
1947 static bool trans_AUTIBZ(DisasContext *s, arg_AUTIBZ *a)
1948 {
1949     if (s->pauth_active) {
1950         gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
1951     }
1952     return true;
1953 }
1954 
1955 static bool trans_AUTIBSP(DisasContext *s, arg_AUTIBSP *a)
1956 {
1957     if (s->pauth_active) {
1958         gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
1959     }
1960     return true;
1961 }
1962 
1963 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
1964 {
1965     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1966     return true;
1967 }
1968 
1969 static bool trans_DSB_DMB(DisasContext *s, arg_DSB_DMB *a)
1970 {
1971     /* We handle DSB and DMB the same way */
1972     TCGBar bar;
1973 
1974     switch (a->types) {
1975     case 1: /* MBReqTypes_Reads */
1976         bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
1977         break;
1978     case 2: /* MBReqTypes_Writes */
1979         bar = TCG_BAR_SC | TCG_MO_ST_ST;
1980         break;
1981     default: /* MBReqTypes_All */
1982         bar = TCG_BAR_SC | TCG_MO_ALL;
1983         break;
1984     }
1985     tcg_gen_mb(bar);
1986     return true;
1987 }
1988 
1989 static bool trans_ISB(DisasContext *s, arg_ISB *a)
1990 {
1991     /*
1992      * We need to break the TB after this insn to execute
1993      * self-modifying code correctly and also to take
1994      * any pending interrupts immediately.
1995      */
1996     reset_btype(s);
1997     gen_goto_tb(s, 0, 4);
1998     return true;
1999 }
2000 
2001 static bool trans_SB(DisasContext *s, arg_SB *a)
2002 {
2003     if (!dc_isar_feature(aa64_sb, s)) {
2004         return false;
2005     }
2006     /*
2007      * TODO: There is no speculation barrier opcode for TCG;
2008      * MB and end the TB instead.
2009      */
2010     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
2011     gen_goto_tb(s, 0, 4);
2012     return true;
2013 }
2014 
2015 static bool trans_CFINV(DisasContext *s, arg_CFINV *a)
2016 {
2017     if (!dc_isar_feature(aa64_condm_4, s)) {
2018         return false;
2019     }
2020     tcg_gen_xori_i32(cpu_CF, cpu_CF, 1);
2021     return true;
2022 }
2023 
2024 static bool trans_XAFLAG(DisasContext *s, arg_XAFLAG *a)
2025 {
2026     TCGv_i32 z;
2027 
2028     if (!dc_isar_feature(aa64_condm_5, s)) {
2029         return false;
2030     }
2031 
2032     z = tcg_temp_new_i32();
2033 
2034     tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0);
2035 
2036     /*
2037      * (!C & !Z) << 31
2038      * (!(C | Z)) << 31
2039      * ~((C | Z) << 31)
2040      * ~-(C | Z)
2041      * (C | Z) - 1
2042      */
2043     tcg_gen_or_i32(cpu_NF, cpu_CF, z);
2044     tcg_gen_subi_i32(cpu_NF, cpu_NF, 1);
2045 
2046     /* !(Z & C) */
2047     tcg_gen_and_i32(cpu_ZF, z, cpu_CF);
2048     tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1);
2049 
2050     /* (!C & Z) << 31 -> -(Z & ~C) */
2051     tcg_gen_andc_i32(cpu_VF, z, cpu_CF);
2052     tcg_gen_neg_i32(cpu_VF, cpu_VF);
2053 
2054     /* C | Z */
2055     tcg_gen_or_i32(cpu_CF, cpu_CF, z);
2056 
2057     return true;
2058 }
2059 
2060 static bool trans_AXFLAG(DisasContext *s, arg_AXFLAG *a)
2061 {
2062     if (!dc_isar_feature(aa64_condm_5, s)) {
2063         return false;
2064     }
2065 
2066     tcg_gen_sari_i32(cpu_VF, cpu_VF, 31);         /* V ? -1 : 0 */
2067     tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF);     /* C & !V */
2068 
2069     /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */
2070     tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF);
2071 
2072     tcg_gen_movi_i32(cpu_NF, 0);
2073     tcg_gen_movi_i32(cpu_VF, 0);
2074 
2075     return true;
2076 }
2077 
2078 static bool trans_MSR_i_UAO(DisasContext *s, arg_i *a)
2079 {
2080     if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) {
2081         return false;
2082     }
2083     if (a->imm & 1) {
2084         set_pstate_bits(PSTATE_UAO);
2085     } else {
2086         clear_pstate_bits(PSTATE_UAO);
2087     }
2088     gen_rebuild_hflags(s);
2089     s->base.is_jmp = DISAS_TOO_MANY;
2090     return true;
2091 }
2092 
2093 static bool trans_MSR_i_PAN(DisasContext *s, arg_i *a)
2094 {
2095     if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) {
2096         return false;
2097     }
2098     if (a->imm & 1) {
2099         set_pstate_bits(PSTATE_PAN);
2100     } else {
2101         clear_pstate_bits(PSTATE_PAN);
2102     }
2103     gen_rebuild_hflags(s);
2104     s->base.is_jmp = DISAS_TOO_MANY;
2105     return true;
2106 }
2107 
2108 static bool trans_MSR_i_SPSEL(DisasContext *s, arg_i *a)
2109 {
2110     if (s->current_el == 0) {
2111         return false;
2112     }
2113     gen_helper_msr_i_spsel(tcg_env, tcg_constant_i32(a->imm & PSTATE_SP));
2114     s->base.is_jmp = DISAS_TOO_MANY;
2115     return true;
2116 }
2117 
2118 static bool trans_MSR_i_SBSS(DisasContext *s, arg_i *a)
2119 {
2120     if (!dc_isar_feature(aa64_ssbs, s)) {
2121         return false;
2122     }
2123     if (a->imm & 1) {
2124         set_pstate_bits(PSTATE_SSBS);
2125     } else {
2126         clear_pstate_bits(PSTATE_SSBS);
2127     }
2128     /* Don't need to rebuild hflags since SSBS is a nop */
2129     s->base.is_jmp = DISAS_TOO_MANY;
2130     return true;
2131 }
2132 
2133 static bool trans_MSR_i_DIT(DisasContext *s, arg_i *a)
2134 {
2135     if (!dc_isar_feature(aa64_dit, s)) {
2136         return false;
2137     }
2138     if (a->imm & 1) {
2139         set_pstate_bits(PSTATE_DIT);
2140     } else {
2141         clear_pstate_bits(PSTATE_DIT);
2142     }
2143     /* There's no need to rebuild hflags because DIT is a nop */
2144     s->base.is_jmp = DISAS_TOO_MANY;
2145     return true;
2146 }
2147 
2148 static bool trans_MSR_i_TCO(DisasContext *s, arg_i *a)
2149 {
2150     if (dc_isar_feature(aa64_mte, s)) {
2151         /* Full MTE is enabled -- set the TCO bit as directed. */
2152         if (a->imm & 1) {
2153             set_pstate_bits(PSTATE_TCO);
2154         } else {
2155             clear_pstate_bits(PSTATE_TCO);
2156         }
2157         gen_rebuild_hflags(s);
2158         /* Many factors, including TCO, go into MTE_ACTIVE. */
2159         s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
2160         return true;
2161     } else if (dc_isar_feature(aa64_mte_insn_reg, s)) {
2162         /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI.  */
2163         return true;
2164     } else {
2165         /* Insn not present */
2166         return false;
2167     }
2168 }
2169 
2170 static bool trans_MSR_i_DAIFSET(DisasContext *s, arg_i *a)
2171 {
2172     gen_helper_msr_i_daifset(tcg_env, tcg_constant_i32(a->imm));
2173     s->base.is_jmp = DISAS_TOO_MANY;
2174     return true;
2175 }
2176 
2177 static bool trans_MSR_i_DAIFCLEAR(DisasContext *s, arg_i *a)
2178 {
2179     gen_helper_msr_i_daifclear(tcg_env, tcg_constant_i32(a->imm));
2180     /* Exit the cpu loop to re-evaluate pending IRQs. */
2181     s->base.is_jmp = DISAS_UPDATE_EXIT;
2182     return true;
2183 }
2184 
2185 static bool trans_MSR_i_ALLINT(DisasContext *s, arg_i *a)
2186 {
2187     if (!dc_isar_feature(aa64_nmi, s) || s->current_el == 0) {
2188         return false;
2189     }
2190 
2191     if (a->imm == 0) {
2192         clear_pstate_bits(PSTATE_ALLINT);
2193     } else if (s->current_el > 1) {
2194         set_pstate_bits(PSTATE_ALLINT);
2195     } else {
2196         gen_helper_msr_set_allint_el1(tcg_env);
2197     }
2198 
2199     /* Exit the cpu loop to re-evaluate pending IRQs. */
2200     s->base.is_jmp = DISAS_UPDATE_EXIT;
2201     return true;
2202 }
2203 
2204 static bool trans_MSR_i_SVCR(DisasContext *s, arg_MSR_i_SVCR *a)
2205 {
2206     if (!dc_isar_feature(aa64_sme, s) || a->mask == 0) {
2207         return false;
2208     }
2209     if (sme_access_check(s)) {
2210         int old = s->pstate_sm | (s->pstate_za << 1);
2211         int new = a->imm * 3;
2212 
2213         if ((old ^ new) & a->mask) {
2214             /* At least one bit changes. */
2215             gen_helper_set_svcr(tcg_env, tcg_constant_i32(new),
2216                                 tcg_constant_i32(a->mask));
2217             s->base.is_jmp = DISAS_TOO_MANY;
2218         }
2219     }
2220     return true;
2221 }
2222 
2223 static void gen_get_nzcv(TCGv_i64 tcg_rt)
2224 {
2225     TCGv_i32 tmp = tcg_temp_new_i32();
2226     TCGv_i32 nzcv = tcg_temp_new_i32();
2227 
2228     /* build bit 31, N */
2229     tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
2230     /* build bit 30, Z */
2231     tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
2232     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
2233     /* build bit 29, C */
2234     tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
2235     /* build bit 28, V */
2236     tcg_gen_shri_i32(tmp, cpu_VF, 31);
2237     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
2238     /* generate result */
2239     tcg_gen_extu_i32_i64(tcg_rt, nzcv);
2240 }
2241 
2242 static void gen_set_nzcv(TCGv_i64 tcg_rt)
2243 {
2244     TCGv_i32 nzcv = tcg_temp_new_i32();
2245 
2246     /* take NZCV from R[t] */
2247     tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
2248 
2249     /* bit 31, N */
2250     tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
2251     /* bit 30, Z */
2252     tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
2253     tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
2254     /* bit 29, C */
2255     tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
2256     tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
2257     /* bit 28, V */
2258     tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
2259     tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
2260 }
2261 
2262 static void gen_sysreg_undef(DisasContext *s, bool isread,
2263                              uint8_t op0, uint8_t op1, uint8_t op2,
2264                              uint8_t crn, uint8_t crm, uint8_t rt)
2265 {
2266     /*
2267      * Generate code to emit an UNDEF with correct syndrome
2268      * information for a failed system register access.
2269      * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases,
2270      * but if FEAT_IDST is implemented then read accesses to registers
2271      * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP
2272      * syndrome.
2273      */
2274     uint32_t syndrome;
2275 
2276     if (isread && dc_isar_feature(aa64_ids, s) &&
2277         arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) {
2278         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2279     } else {
2280         syndrome = syn_uncategorized();
2281     }
2282     gen_exception_insn(s, 0, EXCP_UDEF, syndrome);
2283 }
2284 
2285 /* MRS - move from system register
2286  * MSR (register) - move to system register
2287  * SYS
2288  * SYSL
2289  * These are all essentially the same insn in 'read' and 'write'
2290  * versions, with varying op0 fields.
2291  */
2292 static void handle_sys(DisasContext *s, bool isread,
2293                        unsigned int op0, unsigned int op1, unsigned int op2,
2294                        unsigned int crn, unsigned int crm, unsigned int rt)
2295 {
2296     uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
2297                                       crn, crm, op0, op1, op2);
2298     const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
2299     bool need_exit_tb = false;
2300     bool nv_trap_to_el2 = false;
2301     bool nv_redirect_reg = false;
2302     bool skip_fp_access_checks = false;
2303     bool nv2_mem_redirect = false;
2304     TCGv_ptr tcg_ri = NULL;
2305     TCGv_i64 tcg_rt;
2306     uint32_t syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2307 
2308     if (crn == 11 || crn == 15) {
2309         /*
2310          * Check for TIDCP trap, which must take precedence over
2311          * the UNDEF for "no such register" etc.
2312          */
2313         switch (s->current_el) {
2314         case 0:
2315             if (dc_isar_feature(aa64_tidcp1, s)) {
2316                 gen_helper_tidcp_el0(tcg_env, tcg_constant_i32(syndrome));
2317             }
2318             break;
2319         case 1:
2320             gen_helper_tidcp_el1(tcg_env, tcg_constant_i32(syndrome));
2321             break;
2322         }
2323     }
2324 
2325     if (!ri) {
2326         /* Unknown register; this might be a guest error or a QEMU
2327          * unimplemented feature.
2328          */
2329         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
2330                       "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
2331                       isread ? "read" : "write", op0, op1, crn, crm, op2);
2332         gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2333         return;
2334     }
2335 
2336     if (s->nv2 && ri->nv2_redirect_offset) {
2337         /*
2338          * Some registers always redirect to memory; some only do so if
2339          * HCR_EL2.NV1 is 0, and some only if NV1 is 1 (these come in
2340          * pairs which share an offset; see the table in R_CSRPQ).
2341          */
2342         if (ri->nv2_redirect_offset & NV2_REDIR_NV1) {
2343             nv2_mem_redirect = s->nv1;
2344         } else if (ri->nv2_redirect_offset & NV2_REDIR_NO_NV1) {
2345             nv2_mem_redirect = !s->nv1;
2346         } else {
2347             nv2_mem_redirect = true;
2348         }
2349     }
2350 
2351     /* Check access permissions */
2352     if (!cp_access_ok(s->current_el, ri, isread)) {
2353         /*
2354          * FEAT_NV/NV2 handling does not do the usual FP access checks
2355          * for registers only accessible at EL2 (though it *does* do them
2356          * for registers accessible at EL1).
2357          */
2358         skip_fp_access_checks = true;
2359         if (s->nv2 && (ri->type & ARM_CP_NV2_REDIRECT)) {
2360             /*
2361              * This is one of the few EL2 registers which should redirect
2362              * to the equivalent EL1 register. We do that after running
2363              * the EL2 register's accessfn.
2364              */
2365             nv_redirect_reg = true;
2366             assert(!nv2_mem_redirect);
2367         } else if (nv2_mem_redirect) {
2368             /*
2369              * NV2 redirect-to-memory takes precedence over trap to EL2 or
2370              * UNDEF to EL1.
2371              */
2372         } else if (s->nv && arm_cpreg_traps_in_nv(ri)) {
2373             /*
2374              * This register / instruction exists and is an EL2 register, so
2375              * we must trap to EL2 if accessed in nested virtualization EL1
2376              * instead of UNDEFing. We'll do that after the usual access checks.
2377              * (This makes a difference only for a couple of registers like
2378              * VSTTBR_EL2 where the "UNDEF if NonSecure" should take priority
2379              * over the trap-to-EL2. Most trapped-by-FEAT_NV registers have
2380              * an accessfn which does nothing when called from EL1, because
2381              * the trap-to-EL3 controls which would apply to that register
2382              * at EL2 don't take priority over the FEAT_NV trap-to-EL2.)
2383              */
2384             nv_trap_to_el2 = true;
2385         } else {
2386             gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2387             return;
2388         }
2389     }
2390 
2391     if (ri->accessfn || (ri->fgt && s->fgt_active)) {
2392         /* Emit code to perform further access permissions checks at
2393          * runtime; this may result in an exception.
2394          */
2395         gen_a64_update_pc(s, 0);
2396         tcg_ri = tcg_temp_new_ptr();
2397         gen_helper_access_check_cp_reg(tcg_ri, tcg_env,
2398                                        tcg_constant_i32(key),
2399                                        tcg_constant_i32(syndrome),
2400                                        tcg_constant_i32(isread));
2401     } else if (ri->type & ARM_CP_RAISES_EXC) {
2402         /*
2403          * The readfn or writefn might raise an exception;
2404          * synchronize the CPU state in case it does.
2405          */
2406         gen_a64_update_pc(s, 0);
2407     }
2408 
2409     if (!skip_fp_access_checks) {
2410         if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) {
2411             return;
2412         } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
2413             return;
2414         } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) {
2415             return;
2416         }
2417     }
2418 
2419     if (nv_trap_to_el2) {
2420         gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
2421         return;
2422     }
2423 
2424     if (nv_redirect_reg) {
2425         /*
2426          * FEAT_NV2 redirection of an EL2 register to an EL1 register.
2427          * Conveniently in all cases the encoding of the EL1 register is
2428          * identical to the EL2 register except that opc1 is 0.
2429          * Get the reginfo for the EL1 register to use for the actual access.
2430          * We don't use the EL1 register's access function, and
2431          * fine-grained-traps on EL1 also do not apply here.
2432          */
2433         key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
2434                                  crn, crm, op0, 0, op2);
2435         ri = get_arm_cp_reginfo(s->cp_regs, key);
2436         assert(ri);
2437         assert(cp_access_ok(s->current_el, ri, isread));
2438         /*
2439          * We might not have done an update_pc earlier, so check we don't
2440          * need it. We could support this in future if necessary.
2441          */
2442         assert(!(ri->type & ARM_CP_RAISES_EXC));
2443     }
2444 
2445     if (nv2_mem_redirect) {
2446         /*
2447          * This system register is being redirected into an EL2 memory access.
2448          * This means it is not an IO operation, doesn't change hflags,
2449          * and need not end the TB, because it has no side effects.
2450          *
2451          * The access is 64-bit single copy atomic, guaranteed aligned because
2452          * of the definition of VCNR_EL2. Its endianness depends on
2453          * SCTLR_EL2.EE, not on the data endianness of EL1.
2454          * It is done under either the EL2 translation regime or the EL2&0
2455          * translation regime, depending on HCR_EL2.E2H. It behaves as if
2456          * PSTATE.PAN is 0.
2457          */
2458         TCGv_i64 ptr = tcg_temp_new_i64();
2459         MemOp mop = MO_64 | MO_ALIGN | MO_ATOM_IFALIGN;
2460         ARMMMUIdx armmemidx = s->nv2_mem_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_E2;
2461         int memidx = arm_to_core_mmu_idx(armmemidx);
2462         uint32_t syn;
2463 
2464         mop |= (s->nv2_mem_be ? MO_BE : MO_LE);
2465 
2466         tcg_gen_ld_i64(ptr, tcg_env, offsetof(CPUARMState, cp15.vncr_el2));
2467         tcg_gen_addi_i64(ptr, ptr,
2468                          (ri->nv2_redirect_offset & ~NV2_REDIR_FLAG_MASK));
2469         tcg_rt = cpu_reg(s, rt);
2470 
2471         syn = syn_data_abort_vncr(0, !isread, 0);
2472         disas_set_insn_syndrome(s, syn);
2473         if (isread) {
2474             tcg_gen_qemu_ld_i64(tcg_rt, ptr, memidx, mop);
2475         } else {
2476             tcg_gen_qemu_st_i64(tcg_rt, ptr, memidx, mop);
2477         }
2478         return;
2479     }
2480 
2481     /* Handle special cases first */
2482     switch (ri->type & ARM_CP_SPECIAL_MASK) {
2483     case 0:
2484         break;
2485     case ARM_CP_NOP:
2486         return;
2487     case ARM_CP_NZCV:
2488         tcg_rt = cpu_reg(s, rt);
2489         if (isread) {
2490             gen_get_nzcv(tcg_rt);
2491         } else {
2492             gen_set_nzcv(tcg_rt);
2493         }
2494         return;
2495     case ARM_CP_CURRENTEL:
2496     {
2497         /*
2498          * Reads as current EL value from pstate, which is
2499          * guaranteed to be constant by the tb flags.
2500          * For nested virt we should report EL2.
2501          */
2502         int el = s->nv ? 2 : s->current_el;
2503         tcg_rt = cpu_reg(s, rt);
2504         tcg_gen_movi_i64(tcg_rt, el << 2);
2505         return;
2506     }
2507     case ARM_CP_DC_ZVA:
2508         /* Writes clear the aligned block of memory which rt points into. */
2509         if (s->mte_active[0]) {
2510             int desc = 0;
2511 
2512             desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
2513             desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
2514             desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
2515 
2516             tcg_rt = tcg_temp_new_i64();
2517             gen_helper_mte_check_zva(tcg_rt, tcg_env,
2518                                      tcg_constant_i32(desc), cpu_reg(s, rt));
2519         } else {
2520             tcg_rt = clean_data_tbi(s, cpu_reg(s, rt));
2521         }
2522         gen_helper_dc_zva(tcg_env, tcg_rt);
2523         return;
2524     case ARM_CP_DC_GVA:
2525         {
2526             TCGv_i64 clean_addr, tag;
2527 
2528             /*
2529              * DC_GVA, like DC_ZVA, requires that we supply the original
2530              * pointer for an invalid page.  Probe that address first.
2531              */
2532             tcg_rt = cpu_reg(s, rt);
2533             clean_addr = clean_data_tbi(s, tcg_rt);
2534             gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8);
2535 
2536             if (s->ata[0]) {
2537                 /* Extract the tag from the register to match STZGM.  */
2538                 tag = tcg_temp_new_i64();
2539                 tcg_gen_shri_i64(tag, tcg_rt, 56);
2540                 gen_helper_stzgm_tags(tcg_env, clean_addr, tag);
2541             }
2542         }
2543         return;
2544     case ARM_CP_DC_GZVA:
2545         {
2546             TCGv_i64 clean_addr, tag;
2547 
2548             /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */
2549             tcg_rt = cpu_reg(s, rt);
2550             clean_addr = clean_data_tbi(s, tcg_rt);
2551             gen_helper_dc_zva(tcg_env, clean_addr);
2552 
2553             if (s->ata[0]) {
2554                 /* Extract the tag from the register to match STZGM.  */
2555                 tag = tcg_temp_new_i64();
2556                 tcg_gen_shri_i64(tag, tcg_rt, 56);
2557                 gen_helper_stzgm_tags(tcg_env, clean_addr, tag);
2558             }
2559         }
2560         return;
2561     default:
2562         g_assert_not_reached();
2563     }
2564 
2565     if (ri->type & ARM_CP_IO) {
2566         /* I/O operations must end the TB here (whether read or write) */
2567         need_exit_tb = translator_io_start(&s->base);
2568     }
2569 
2570     tcg_rt = cpu_reg(s, rt);
2571 
2572     if (isread) {
2573         if (ri->type & ARM_CP_CONST) {
2574             tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
2575         } else if (ri->readfn) {
2576             if (!tcg_ri) {
2577                 tcg_ri = gen_lookup_cp_reg(key);
2578             }
2579             gen_helper_get_cp_reg64(tcg_rt, tcg_env, tcg_ri);
2580         } else {
2581             tcg_gen_ld_i64(tcg_rt, tcg_env, ri->fieldoffset);
2582         }
2583     } else {
2584         if (ri->type & ARM_CP_CONST) {
2585             /* If not forbidden by access permissions, treat as WI */
2586             return;
2587         } else if (ri->writefn) {
2588             if (!tcg_ri) {
2589                 tcg_ri = gen_lookup_cp_reg(key);
2590             }
2591             gen_helper_set_cp_reg64(tcg_env, tcg_ri, tcg_rt);
2592         } else {
2593             tcg_gen_st_i64(tcg_rt, tcg_env, ri->fieldoffset);
2594         }
2595     }
2596 
2597     if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
2598         /*
2599          * A write to any coprocessor register that ends a TB
2600          * must rebuild the hflags for the next TB.
2601          */
2602         gen_rebuild_hflags(s);
2603         /*
2604          * We default to ending the TB on a coprocessor register write,
2605          * but allow this to be suppressed by the register definition
2606          * (usually only necessary to work around guest bugs).
2607          */
2608         need_exit_tb = true;
2609     }
2610     if (need_exit_tb) {
2611         s->base.is_jmp = DISAS_UPDATE_EXIT;
2612     }
2613 }
2614 
2615 static bool trans_SYS(DisasContext *s, arg_SYS *a)
2616 {
2617     handle_sys(s, a->l, a->op0, a->op1, a->op2, a->crn, a->crm, a->rt);
2618     return true;
2619 }
2620 
2621 static bool trans_SVC(DisasContext *s, arg_i *a)
2622 {
2623     /*
2624      * For SVC, HVC and SMC we advance the single-step state
2625      * machine before taking the exception. This is architecturally
2626      * mandated, to ensure that single-stepping a system call
2627      * instruction works properly.
2628      */
2629     uint32_t syndrome = syn_aa64_svc(a->imm);
2630     if (s->fgt_svc) {
2631         gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
2632         return true;
2633     }
2634     gen_ss_advance(s);
2635     gen_exception_insn(s, 4, EXCP_SWI, syndrome);
2636     return true;
2637 }
2638 
2639 static bool trans_HVC(DisasContext *s, arg_i *a)
2640 {
2641     int target_el = s->current_el == 3 ? 3 : 2;
2642 
2643     if (s->current_el == 0) {
2644         unallocated_encoding(s);
2645         return true;
2646     }
2647     /*
2648      * The pre HVC helper handles cases when HVC gets trapped
2649      * as an undefined insn by runtime configuration.
2650      */
2651     gen_a64_update_pc(s, 0);
2652     gen_helper_pre_hvc(tcg_env);
2653     /* Architecture requires ss advance before we do the actual work */
2654     gen_ss_advance(s);
2655     gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(a->imm), target_el);
2656     return true;
2657 }
2658 
2659 static bool trans_SMC(DisasContext *s, arg_i *a)
2660 {
2661     if (s->current_el == 0) {
2662         unallocated_encoding(s);
2663         return true;
2664     }
2665     gen_a64_update_pc(s, 0);
2666     gen_helper_pre_smc(tcg_env, tcg_constant_i32(syn_aa64_smc(a->imm)));
2667     /* Architecture requires ss advance before we do the actual work */
2668     gen_ss_advance(s);
2669     gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(a->imm), 3);
2670     return true;
2671 }
2672 
2673 static bool trans_BRK(DisasContext *s, arg_i *a)
2674 {
2675     gen_exception_bkpt_insn(s, syn_aa64_bkpt(a->imm));
2676     return true;
2677 }
2678 
2679 static bool trans_HLT(DisasContext *s, arg_i *a)
2680 {
2681     /*
2682      * HLT. This has two purposes.
2683      * Architecturally, it is an external halting debug instruction.
2684      * Since QEMU doesn't implement external debug, we treat this as
2685      * it is required for halting debug disabled: it will UNDEF.
2686      * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
2687      */
2688     if (semihosting_enabled(s->current_el == 0) && a->imm == 0xf000) {
2689         gen_exception_internal_insn(s, EXCP_SEMIHOST);
2690     } else {
2691         unallocated_encoding(s);
2692     }
2693     return true;
2694 }
2695 
2696 /*
2697  * Load/Store exclusive instructions are implemented by remembering
2698  * the value/address loaded, and seeing if these are the same
2699  * when the store is performed. This is not actually the architecturally
2700  * mandated semantics, but it works for typical guest code sequences
2701  * and avoids having to monitor regular stores.
2702  *
2703  * The store exclusive uses the atomic cmpxchg primitives to avoid
2704  * races in multi-threaded linux-user and when MTTCG softmmu is
2705  * enabled.
2706  */
2707 static void gen_load_exclusive(DisasContext *s, int rt, int rt2, int rn,
2708                                int size, bool is_pair)
2709 {
2710     int idx = get_mem_index(s);
2711     TCGv_i64 dirty_addr, clean_addr;
2712     MemOp memop = check_atomic_align(s, rn, size + is_pair);
2713 
2714     s->is_ldex = true;
2715     dirty_addr = cpu_reg_sp(s, rn);
2716     clean_addr = gen_mte_check1(s, dirty_addr, false, rn != 31, memop);
2717 
2718     g_assert(size <= 3);
2719     if (is_pair) {
2720         g_assert(size >= 2);
2721         if (size == 2) {
2722             tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
2723             if (s->be_data == MO_LE) {
2724                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
2725                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
2726             } else {
2727                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
2728                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
2729             }
2730         } else {
2731             TCGv_i128 t16 = tcg_temp_new_i128();
2732 
2733             tcg_gen_qemu_ld_i128(t16, clean_addr, idx, memop);
2734 
2735             if (s->be_data == MO_LE) {
2736                 tcg_gen_extr_i128_i64(cpu_exclusive_val,
2737                                       cpu_exclusive_high, t16);
2738             } else {
2739                 tcg_gen_extr_i128_i64(cpu_exclusive_high,
2740                                       cpu_exclusive_val, t16);
2741             }
2742             tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2743             tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
2744         }
2745     } else {
2746         tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
2747         tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2748     }
2749     tcg_gen_mov_i64(cpu_exclusive_addr, clean_addr);
2750 }
2751 
2752 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
2753                                 int rn, int size, int is_pair)
2754 {
2755     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
2756      *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
2757      *     [addr] = {Rt};
2758      *     if (is_pair) {
2759      *         [addr + datasize] = {Rt2};
2760      *     }
2761      *     {Rd} = 0;
2762      * } else {
2763      *     {Rd} = 1;
2764      * }
2765      * env->exclusive_addr = -1;
2766      */
2767     TCGLabel *fail_label = gen_new_label();
2768     TCGLabel *done_label = gen_new_label();
2769     TCGv_i64 tmp, clean_addr;
2770     MemOp memop;
2771 
2772     /*
2773      * FIXME: We are out of spec here.  We have recorded only the address
2774      * from load_exclusive, not the entire range, and we assume that the
2775      * size of the access on both sides match.  The architecture allows the
2776      * store to be smaller than the load, so long as the stored bytes are
2777      * within the range recorded by the load.
2778      */
2779 
2780     /* See AArch64.ExclusiveMonitorsPass() and AArch64.IsExclusiveVA(). */
2781     clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2782     tcg_gen_brcond_i64(TCG_COND_NE, clean_addr, cpu_exclusive_addr, fail_label);
2783 
2784     /*
2785      * The write, and any associated faults, only happen if the virtual
2786      * and physical addresses pass the exclusive monitor check.  These
2787      * faults are exceedingly unlikely, because normally the guest uses
2788      * the exact same address register for the load_exclusive, and we
2789      * would have recognized these faults there.
2790      *
2791      * It is possible to trigger an alignment fault pre-LSE2, e.g. with an
2792      * unaligned 4-byte write within the range of an aligned 8-byte load.
2793      * With LSE2, the store would need to cross a 16-byte boundary when the
2794      * load did not, which would mean the store is outside the range
2795      * recorded for the monitor, which would have failed a corrected monitor
2796      * check above.  For now, we assume no size change and retain the
2797      * MO_ALIGN to let tcg know what we checked in the load_exclusive.
2798      *
2799      * It is possible to trigger an MTE fault, by performing the load with
2800      * a virtual address with a valid tag and performing the store with the
2801      * same virtual address and a different invalid tag.
2802      */
2803     memop = size + is_pair;
2804     if (memop == MO_128 || !dc_isar_feature(aa64_lse2, s)) {
2805         memop |= MO_ALIGN;
2806     }
2807     memop = finalize_memop(s, memop);
2808     gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
2809 
2810     tmp = tcg_temp_new_i64();
2811     if (is_pair) {
2812         if (size == 2) {
2813             if (s->be_data == MO_LE) {
2814                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
2815             } else {
2816                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
2817             }
2818             tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
2819                                        cpu_exclusive_val, tmp,
2820                                        get_mem_index(s), memop);
2821             tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2822         } else {
2823             TCGv_i128 t16 = tcg_temp_new_i128();
2824             TCGv_i128 c16 = tcg_temp_new_i128();
2825             TCGv_i64 a, b;
2826 
2827             if (s->be_data == MO_LE) {
2828                 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2));
2829                 tcg_gen_concat_i64_i128(c16, cpu_exclusive_val,
2830                                         cpu_exclusive_high);
2831             } else {
2832                 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt));
2833                 tcg_gen_concat_i64_i128(c16, cpu_exclusive_high,
2834                                         cpu_exclusive_val);
2835             }
2836 
2837             tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16,
2838                                         get_mem_index(s), memop);
2839 
2840             a = tcg_temp_new_i64();
2841             b = tcg_temp_new_i64();
2842             if (s->be_data == MO_LE) {
2843                 tcg_gen_extr_i128_i64(a, b, t16);
2844             } else {
2845                 tcg_gen_extr_i128_i64(b, a, t16);
2846             }
2847 
2848             tcg_gen_xor_i64(a, a, cpu_exclusive_val);
2849             tcg_gen_xor_i64(b, b, cpu_exclusive_high);
2850             tcg_gen_or_i64(tmp, a, b);
2851 
2852             tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0);
2853         }
2854     } else {
2855         tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
2856                                    cpu_reg(s, rt), get_mem_index(s), memop);
2857         tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2858     }
2859     tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
2860     tcg_gen_br(done_label);
2861 
2862     gen_set_label(fail_label);
2863     tcg_gen_movi_i64(cpu_reg(s, rd), 1);
2864     gen_set_label(done_label);
2865     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
2866 }
2867 
2868 static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
2869                                  int rn, int size)
2870 {
2871     TCGv_i64 tcg_rs = cpu_reg(s, rs);
2872     TCGv_i64 tcg_rt = cpu_reg(s, rt);
2873     int memidx = get_mem_index(s);
2874     TCGv_i64 clean_addr;
2875     MemOp memop;
2876 
2877     if (rn == 31) {
2878         gen_check_sp_alignment(s);
2879     }
2880     memop = check_atomic_align(s, rn, size);
2881     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
2882     tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt,
2883                                memidx, memop);
2884 }
2885 
2886 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
2887                                       int rn, int size)
2888 {
2889     TCGv_i64 s1 = cpu_reg(s, rs);
2890     TCGv_i64 s2 = cpu_reg(s, rs + 1);
2891     TCGv_i64 t1 = cpu_reg(s, rt);
2892     TCGv_i64 t2 = cpu_reg(s, rt + 1);
2893     TCGv_i64 clean_addr;
2894     int memidx = get_mem_index(s);
2895     MemOp memop;
2896 
2897     if (rn == 31) {
2898         gen_check_sp_alignment(s);
2899     }
2900 
2901     /* This is a single atomic access, despite the "pair". */
2902     memop = check_atomic_align(s, rn, size + 1);
2903     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
2904 
2905     if (size == 2) {
2906         TCGv_i64 cmp = tcg_temp_new_i64();
2907         TCGv_i64 val = tcg_temp_new_i64();
2908 
2909         if (s->be_data == MO_LE) {
2910             tcg_gen_concat32_i64(val, t1, t2);
2911             tcg_gen_concat32_i64(cmp, s1, s2);
2912         } else {
2913             tcg_gen_concat32_i64(val, t2, t1);
2914             tcg_gen_concat32_i64(cmp, s2, s1);
2915         }
2916 
2917         tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx, memop);
2918 
2919         if (s->be_data == MO_LE) {
2920             tcg_gen_extr32_i64(s1, s2, cmp);
2921         } else {
2922             tcg_gen_extr32_i64(s2, s1, cmp);
2923         }
2924     } else {
2925         TCGv_i128 cmp = tcg_temp_new_i128();
2926         TCGv_i128 val = tcg_temp_new_i128();
2927 
2928         if (s->be_data == MO_LE) {
2929             tcg_gen_concat_i64_i128(val, t1, t2);
2930             tcg_gen_concat_i64_i128(cmp, s1, s2);
2931         } else {
2932             tcg_gen_concat_i64_i128(val, t2, t1);
2933             tcg_gen_concat_i64_i128(cmp, s2, s1);
2934         }
2935 
2936         tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx, memop);
2937 
2938         if (s->be_data == MO_LE) {
2939             tcg_gen_extr_i128_i64(s1, s2, cmp);
2940         } else {
2941             tcg_gen_extr_i128_i64(s2, s1, cmp);
2942         }
2943     }
2944 }
2945 
2946 /*
2947  * Compute the ISS.SF bit for syndrome information if an exception
2948  * is taken on a load or store. This indicates whether the instruction
2949  * is accessing a 32-bit or 64-bit register. This logic is derived
2950  * from the ARMv8 specs for LDR (Shared decode for all encodings).
2951  */
2952 static bool ldst_iss_sf(int size, bool sign, bool ext)
2953 {
2954 
2955     if (sign) {
2956         /*
2957          * Signed loads are 64 bit results if we are not going to
2958          * do a zero-extend from 32 to 64 after the load.
2959          * (For a store, sign and ext are always false.)
2960          */
2961         return !ext;
2962     } else {
2963         /* Unsigned loads/stores work at the specified size */
2964         return size == MO_64;
2965     }
2966 }
2967 
2968 static bool trans_STXR(DisasContext *s, arg_stxr *a)
2969 {
2970     if (a->rn == 31) {
2971         gen_check_sp_alignment(s);
2972     }
2973     if (a->lasr) {
2974         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2975     }
2976     gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, false);
2977     return true;
2978 }
2979 
2980 static bool trans_LDXR(DisasContext *s, arg_stxr *a)
2981 {
2982     if (a->rn == 31) {
2983         gen_check_sp_alignment(s);
2984     }
2985     gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, false);
2986     if (a->lasr) {
2987         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2988     }
2989     return true;
2990 }
2991 
2992 static bool trans_STLR(DisasContext *s, arg_stlr *a)
2993 {
2994     TCGv_i64 clean_addr;
2995     MemOp memop;
2996     bool iss_sf = ldst_iss_sf(a->sz, false, false);
2997 
2998     /*
2999      * StoreLORelease is the same as Store-Release for QEMU, but
3000      * needs the feature-test.
3001      */
3002     if (!a->lasr && !dc_isar_feature(aa64_lor, s)) {
3003         return false;
3004     }
3005     /* Generate ISS for non-exclusive accesses including LASR.  */
3006     if (a->rn == 31) {
3007         gen_check_sp_alignment(s);
3008     }
3009     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3010     memop = check_ordered_align(s, a->rn, 0, true, a->sz);
3011     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn),
3012                                 true, a->rn != 31, memop);
3013     do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, memop, true, a->rt,
3014               iss_sf, a->lasr);
3015     return true;
3016 }
3017 
3018 static bool trans_LDAR(DisasContext *s, arg_stlr *a)
3019 {
3020     TCGv_i64 clean_addr;
3021     MemOp memop;
3022     bool iss_sf = ldst_iss_sf(a->sz, false, false);
3023 
3024     /* LoadLOAcquire is the same as Load-Acquire for QEMU.  */
3025     if (!a->lasr && !dc_isar_feature(aa64_lor, s)) {
3026         return false;
3027     }
3028     /* Generate ISS for non-exclusive accesses including LASR.  */
3029     if (a->rn == 31) {
3030         gen_check_sp_alignment(s);
3031     }
3032     memop = check_ordered_align(s, a->rn, 0, false, a->sz);
3033     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn),
3034                                 false, a->rn != 31, memop);
3035     do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, memop, false, true,
3036               a->rt, iss_sf, a->lasr);
3037     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3038     return true;
3039 }
3040 
3041 static bool trans_STXP(DisasContext *s, arg_stxr *a)
3042 {
3043     if (a->rn == 31) {
3044         gen_check_sp_alignment(s);
3045     }
3046     if (a->lasr) {
3047         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3048     }
3049     gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, true);
3050     return true;
3051 }
3052 
3053 static bool trans_LDXP(DisasContext *s, arg_stxr *a)
3054 {
3055     if (a->rn == 31) {
3056         gen_check_sp_alignment(s);
3057     }
3058     gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, true);
3059     if (a->lasr) {
3060         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3061     }
3062     return true;
3063 }
3064 
3065 static bool trans_CASP(DisasContext *s, arg_CASP *a)
3066 {
3067     if (!dc_isar_feature(aa64_atomics, s)) {
3068         return false;
3069     }
3070     if (((a->rt | a->rs) & 1) != 0) {
3071         return false;
3072     }
3073 
3074     gen_compare_and_swap_pair(s, a->rs, a->rt, a->rn, a->sz);
3075     return true;
3076 }
3077 
3078 static bool trans_CAS(DisasContext *s, arg_CAS *a)
3079 {
3080     if (!dc_isar_feature(aa64_atomics, s)) {
3081         return false;
3082     }
3083     gen_compare_and_swap(s, a->rs, a->rt, a->rn, a->sz);
3084     return true;
3085 }
3086 
3087 static bool trans_LD_lit(DisasContext *s, arg_ldlit *a)
3088 {
3089     bool iss_sf = ldst_iss_sf(a->sz, a->sign, false);
3090     TCGv_i64 tcg_rt = cpu_reg(s, a->rt);
3091     TCGv_i64 clean_addr = tcg_temp_new_i64();
3092     MemOp memop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3093 
3094     gen_pc_plus_diff(s, clean_addr, a->imm);
3095     do_gpr_ld(s, tcg_rt, clean_addr, memop,
3096               false, true, a->rt, iss_sf, false);
3097     return true;
3098 }
3099 
3100 static bool trans_LD_lit_v(DisasContext *s, arg_ldlit *a)
3101 {
3102     /* Load register (literal), vector version */
3103     TCGv_i64 clean_addr;
3104     MemOp memop;
3105 
3106     if (!fp_access_check(s)) {
3107         return true;
3108     }
3109     memop = finalize_memop_asimd(s, a->sz);
3110     clean_addr = tcg_temp_new_i64();
3111     gen_pc_plus_diff(s, clean_addr, a->imm);
3112     do_fp_ld(s, a->rt, clean_addr, memop);
3113     return true;
3114 }
3115 
3116 static void op_addr_ldstpair_pre(DisasContext *s, arg_ldstpair *a,
3117                                  TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3118                                  uint64_t offset, bool is_store, MemOp mop)
3119 {
3120     if (a->rn == 31) {
3121         gen_check_sp_alignment(s);
3122     }
3123 
3124     *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3125     if (!a->p) {
3126         tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset);
3127     }
3128 
3129     *clean_addr = gen_mte_checkN(s, *dirty_addr, is_store,
3130                                  (a->w || a->rn != 31), 2 << a->sz, mop);
3131 }
3132 
3133 static void op_addr_ldstpair_post(DisasContext *s, arg_ldstpair *a,
3134                                   TCGv_i64 dirty_addr, uint64_t offset)
3135 {
3136     if (a->w) {
3137         if (a->p) {
3138             tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3139         }
3140         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3141     }
3142 }
3143 
3144 static bool trans_STP(DisasContext *s, arg_ldstpair *a)
3145 {
3146     uint64_t offset = a->imm << a->sz;
3147     TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3148     MemOp mop = finalize_memop(s, a->sz);
3149 
3150     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop);
3151     tcg_rt = cpu_reg(s, a->rt);
3152     tcg_rt2 = cpu_reg(s, a->rt2);
3153     /*
3154      * We built mop above for the single logical access -- rebuild it
3155      * now for the paired operation.
3156      *
3157      * With LSE2, non-sign-extending pairs are treated atomically if
3158      * aligned, and if unaligned one of the pair will be completely
3159      * within a 16-byte block and that element will be atomic.
3160      * Otherwise each element is separately atomic.
3161      * In all cases, issue one operation with the correct atomicity.
3162      */
3163     mop = a->sz + 1;
3164     if (s->align_mem) {
3165         mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
3166     }
3167     mop = finalize_memop_pair(s, mop);
3168     if (a->sz == 2) {
3169         TCGv_i64 tmp = tcg_temp_new_i64();
3170 
3171         if (s->be_data == MO_LE) {
3172             tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2);
3173         } else {
3174             tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt);
3175         }
3176         tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop);
3177     } else {
3178         TCGv_i128 tmp = tcg_temp_new_i128();
3179 
3180         if (s->be_data == MO_LE) {
3181             tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2);
3182         } else {
3183             tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt);
3184         }
3185         tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
3186     }
3187     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3188     return true;
3189 }
3190 
3191 static bool trans_LDP(DisasContext *s, arg_ldstpair *a)
3192 {
3193     uint64_t offset = a->imm << a->sz;
3194     TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3195     MemOp mop = finalize_memop(s, a->sz);
3196 
3197     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop);
3198     tcg_rt = cpu_reg(s, a->rt);
3199     tcg_rt2 = cpu_reg(s, a->rt2);
3200 
3201     /*
3202      * We built mop above for the single logical access -- rebuild it
3203      * now for the paired operation.
3204      *
3205      * With LSE2, non-sign-extending pairs are treated atomically if
3206      * aligned, and if unaligned one of the pair will be completely
3207      * within a 16-byte block and that element will be atomic.
3208      * Otherwise each element is separately atomic.
3209      * In all cases, issue one operation with the correct atomicity.
3210      *
3211      * This treats sign-extending loads like zero-extending loads,
3212      * since that reuses the most code below.
3213      */
3214     mop = a->sz + 1;
3215     if (s->align_mem) {
3216         mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
3217     }
3218     mop = finalize_memop_pair(s, mop);
3219     if (a->sz == 2) {
3220         int o2 = s->be_data == MO_LE ? 32 : 0;
3221         int o1 = o2 ^ 32;
3222 
3223         tcg_gen_qemu_ld_i64(tcg_rt, clean_addr, get_mem_index(s), mop);
3224         if (a->sign) {
3225             tcg_gen_sextract_i64(tcg_rt2, tcg_rt, o2, 32);
3226             tcg_gen_sextract_i64(tcg_rt, tcg_rt, o1, 32);
3227         } else {
3228             tcg_gen_extract_i64(tcg_rt2, tcg_rt, o2, 32);
3229             tcg_gen_extract_i64(tcg_rt, tcg_rt, o1, 32);
3230         }
3231     } else {
3232         TCGv_i128 tmp = tcg_temp_new_i128();
3233 
3234         tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop);
3235         if (s->be_data == MO_LE) {
3236             tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp);
3237         } else {
3238             tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp);
3239         }
3240     }
3241     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3242     return true;
3243 }
3244 
3245 static bool trans_STP_v(DisasContext *s, arg_ldstpair *a)
3246 {
3247     uint64_t offset = a->imm << a->sz;
3248     TCGv_i64 clean_addr, dirty_addr;
3249     MemOp mop;
3250 
3251     if (!fp_access_check(s)) {
3252         return true;
3253     }
3254 
3255     /* LSE2 does not merge FP pairs; leave these as separate operations. */
3256     mop = finalize_memop_asimd(s, a->sz);
3257     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop);
3258     do_fp_st(s, a->rt, clean_addr, mop);
3259     tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz);
3260     do_fp_st(s, a->rt2, clean_addr, mop);
3261     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3262     return true;
3263 }
3264 
3265 static bool trans_LDP_v(DisasContext *s, arg_ldstpair *a)
3266 {
3267     uint64_t offset = a->imm << a->sz;
3268     TCGv_i64 clean_addr, dirty_addr;
3269     MemOp mop;
3270 
3271     if (!fp_access_check(s)) {
3272         return true;
3273     }
3274 
3275     /* LSE2 does not merge FP pairs; leave these as separate operations. */
3276     mop = finalize_memop_asimd(s, a->sz);
3277     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop);
3278     do_fp_ld(s, a->rt, clean_addr, mop);
3279     tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz);
3280     do_fp_ld(s, a->rt2, clean_addr, mop);
3281     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3282     return true;
3283 }
3284 
3285 static bool trans_STGP(DisasContext *s, arg_ldstpair *a)
3286 {
3287     TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3288     uint64_t offset = a->imm << LOG2_TAG_GRANULE;
3289     MemOp mop;
3290     TCGv_i128 tmp;
3291 
3292     /* STGP only comes in one size. */
3293     tcg_debug_assert(a->sz == MO_64);
3294 
3295     if (!dc_isar_feature(aa64_mte_insn_reg, s)) {
3296         return false;
3297     }
3298 
3299     if (a->rn == 31) {
3300         gen_check_sp_alignment(s);
3301     }
3302 
3303     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3304     if (!a->p) {
3305         tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3306     }
3307 
3308     clean_addr = clean_data_tbi(s, dirty_addr);
3309     tcg_rt = cpu_reg(s, a->rt);
3310     tcg_rt2 = cpu_reg(s, a->rt2);
3311 
3312     /*
3313      * STGP is defined as two 8-byte memory operations, aligned to TAG_GRANULE,
3314      * and one tag operation.  We implement it as one single aligned 16-byte
3315      * memory operation for convenience.  Note that the alignment ensures
3316      * MO_ATOM_IFALIGN_PAIR produces 8-byte atomicity for the memory store.
3317      */
3318     mop = finalize_memop_atom(s, MO_128 | MO_ALIGN, MO_ATOM_IFALIGN_PAIR);
3319 
3320     tmp = tcg_temp_new_i128();
3321     if (s->be_data == MO_LE) {
3322         tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2);
3323     } else {
3324         tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt);
3325     }
3326     tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
3327 
3328     /* Perform the tag store, if tag access enabled. */
3329     if (s->ata[0]) {
3330         if (tb_cflags(s->base.tb) & CF_PARALLEL) {
3331             gen_helper_stg_parallel(tcg_env, dirty_addr, dirty_addr);
3332         } else {
3333             gen_helper_stg(tcg_env, dirty_addr, dirty_addr);
3334         }
3335     }
3336 
3337     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3338     return true;
3339 }
3340 
3341 static void op_addr_ldst_imm_pre(DisasContext *s, arg_ldst_imm *a,
3342                                  TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3343                                  uint64_t offset, bool is_store, MemOp mop)
3344 {
3345     int memidx;
3346 
3347     if (a->rn == 31) {
3348         gen_check_sp_alignment(s);
3349     }
3350 
3351     *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3352     if (!a->p) {
3353         tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset);
3354     }
3355     memidx = get_a64_user_mem_index(s, a->unpriv);
3356     *clean_addr = gen_mte_check1_mmuidx(s, *dirty_addr, is_store,
3357                                         a->w || a->rn != 31,
3358                                         mop, a->unpriv, memidx);
3359 }
3360 
3361 static void op_addr_ldst_imm_post(DisasContext *s, arg_ldst_imm *a,
3362                                   TCGv_i64 dirty_addr, uint64_t offset)
3363 {
3364     if (a->w) {
3365         if (a->p) {
3366             tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3367         }
3368         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3369     }
3370 }
3371 
3372 static bool trans_STR_i(DisasContext *s, arg_ldst_imm *a)
3373 {
3374     bool iss_sf, iss_valid = !a->w;
3375     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3376     int memidx = get_a64_user_mem_index(s, a->unpriv);
3377     MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3378 
3379     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop);
3380 
3381     tcg_rt = cpu_reg(s, a->rt);
3382     iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3383 
3384     do_gpr_st_memidx(s, tcg_rt, clean_addr, mop, memidx,
3385                      iss_valid, a->rt, iss_sf, false);
3386     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3387     return true;
3388 }
3389 
3390 static bool trans_LDR_i(DisasContext *s, arg_ldst_imm *a)
3391 {
3392     bool iss_sf, iss_valid = !a->w;
3393     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3394     int memidx = get_a64_user_mem_index(s, a->unpriv);
3395     MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3396 
3397     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop);
3398 
3399     tcg_rt = cpu_reg(s, a->rt);
3400     iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3401 
3402     do_gpr_ld_memidx(s, tcg_rt, clean_addr, mop,
3403                      a->ext, memidx, iss_valid, a->rt, iss_sf, false);
3404     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3405     return true;
3406 }
3407 
3408 static bool trans_STR_v_i(DisasContext *s, arg_ldst_imm *a)
3409 {
3410     TCGv_i64 clean_addr, dirty_addr;
3411     MemOp mop;
3412 
3413     if (!fp_access_check(s)) {
3414         return true;
3415     }
3416     mop = finalize_memop_asimd(s, a->sz);
3417     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop);
3418     do_fp_st(s, a->rt, clean_addr, mop);
3419     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3420     return true;
3421 }
3422 
3423 static bool trans_LDR_v_i(DisasContext *s, arg_ldst_imm *a)
3424 {
3425     TCGv_i64 clean_addr, dirty_addr;
3426     MemOp mop;
3427 
3428     if (!fp_access_check(s)) {
3429         return true;
3430     }
3431     mop = finalize_memop_asimd(s, a->sz);
3432     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop);
3433     do_fp_ld(s, a->rt, clean_addr, mop);
3434     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3435     return true;
3436 }
3437 
3438 static void op_addr_ldst_pre(DisasContext *s, arg_ldst *a,
3439                              TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3440                              bool is_store, MemOp memop)
3441 {
3442     TCGv_i64 tcg_rm;
3443 
3444     if (a->rn == 31) {
3445         gen_check_sp_alignment(s);
3446     }
3447     *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3448 
3449     tcg_rm = read_cpu_reg(s, a->rm, 1);
3450     ext_and_shift_reg(tcg_rm, tcg_rm, a->opt, a->s ? a->sz : 0);
3451 
3452     tcg_gen_add_i64(*dirty_addr, *dirty_addr, tcg_rm);
3453     *clean_addr = gen_mte_check1(s, *dirty_addr, is_store, true, memop);
3454 }
3455 
3456 static bool trans_LDR(DisasContext *s, arg_ldst *a)
3457 {
3458     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3459     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3460     MemOp memop;
3461 
3462     if (extract32(a->opt, 1, 1) == 0) {
3463         return false;
3464     }
3465 
3466     memop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3467     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop);
3468     tcg_rt = cpu_reg(s, a->rt);
3469     do_gpr_ld(s, tcg_rt, clean_addr, memop,
3470               a->ext, true, a->rt, iss_sf, false);
3471     return true;
3472 }
3473 
3474 static bool trans_STR(DisasContext *s, arg_ldst *a)
3475 {
3476     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3477     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3478     MemOp memop;
3479 
3480     if (extract32(a->opt, 1, 1) == 0) {
3481         return false;
3482     }
3483 
3484     memop = finalize_memop(s, a->sz);
3485     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop);
3486     tcg_rt = cpu_reg(s, a->rt);
3487     do_gpr_st(s, tcg_rt, clean_addr, memop, true, a->rt, iss_sf, false);
3488     return true;
3489 }
3490 
3491 static bool trans_LDR_v(DisasContext *s, arg_ldst *a)
3492 {
3493     TCGv_i64 clean_addr, dirty_addr;
3494     MemOp memop;
3495 
3496     if (extract32(a->opt, 1, 1) == 0) {
3497         return false;
3498     }
3499 
3500     if (!fp_access_check(s)) {
3501         return true;
3502     }
3503 
3504     memop = finalize_memop_asimd(s, a->sz);
3505     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop);
3506     do_fp_ld(s, a->rt, clean_addr, memop);
3507     return true;
3508 }
3509 
3510 static bool trans_STR_v(DisasContext *s, arg_ldst *a)
3511 {
3512     TCGv_i64 clean_addr, dirty_addr;
3513     MemOp memop;
3514 
3515     if (extract32(a->opt, 1, 1) == 0) {
3516         return false;
3517     }
3518 
3519     if (!fp_access_check(s)) {
3520         return true;
3521     }
3522 
3523     memop = finalize_memop_asimd(s, a->sz);
3524     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop);
3525     do_fp_st(s, a->rt, clean_addr, memop);
3526     return true;
3527 }
3528 
3529 
3530 static bool do_atomic_ld(DisasContext *s, arg_atomic *a, AtomicThreeOpFn *fn,
3531                          int sign, bool invert)
3532 {
3533     MemOp mop = a->sz | sign;
3534     TCGv_i64 clean_addr, tcg_rs, tcg_rt;
3535 
3536     if (a->rn == 31) {
3537         gen_check_sp_alignment(s);
3538     }
3539     mop = check_atomic_align(s, a->rn, mop);
3540     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
3541                                 a->rn != 31, mop);
3542     tcg_rs = read_cpu_reg(s, a->rs, true);
3543     tcg_rt = cpu_reg(s, a->rt);
3544     if (invert) {
3545         tcg_gen_not_i64(tcg_rs, tcg_rs);
3546     }
3547     /*
3548      * The tcg atomic primitives are all full barriers.  Therefore we
3549      * can ignore the Acquire and Release bits of this instruction.
3550      */
3551     fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop);
3552 
3553     if (mop & MO_SIGN) {
3554         switch (a->sz) {
3555         case MO_8:
3556             tcg_gen_ext8u_i64(tcg_rt, tcg_rt);
3557             break;
3558         case MO_16:
3559             tcg_gen_ext16u_i64(tcg_rt, tcg_rt);
3560             break;
3561         case MO_32:
3562             tcg_gen_ext32u_i64(tcg_rt, tcg_rt);
3563             break;
3564         case MO_64:
3565             break;
3566         default:
3567             g_assert_not_reached();
3568         }
3569     }
3570     return true;
3571 }
3572 
3573 TRANS_FEAT(LDADD, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_add_i64, 0, false)
3574 TRANS_FEAT(LDCLR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_and_i64, 0, true)
3575 TRANS_FEAT(LDEOR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_xor_i64, 0, false)
3576 TRANS_FEAT(LDSET, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_or_i64, 0, false)
3577 TRANS_FEAT(LDSMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smax_i64, MO_SIGN, false)
3578 TRANS_FEAT(LDSMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smin_i64, MO_SIGN, false)
3579 TRANS_FEAT(LDUMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umax_i64, 0, false)
3580 TRANS_FEAT(LDUMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false)
3581 TRANS_FEAT(SWP, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false)
3582 
3583 static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a)
3584 {
3585     bool iss_sf = ldst_iss_sf(a->sz, false, false);
3586     TCGv_i64 clean_addr;
3587     MemOp mop;
3588 
3589     if (!dc_isar_feature(aa64_atomics, s) ||
3590         !dc_isar_feature(aa64_rcpc_8_3, s)) {
3591         return false;
3592     }
3593     if (a->rn == 31) {
3594         gen_check_sp_alignment(s);
3595     }
3596     mop = check_ordered_align(s, a->rn, 0, false, a->sz);
3597     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
3598                                 a->rn != 31, mop);
3599     /*
3600      * LDAPR* are a special case because they are a simple load, not a
3601      * fetch-and-do-something op.
3602      * The architectural consistency requirements here are weaker than
3603      * full load-acquire (we only need "load-acquire processor consistent"),
3604      * but we choose to implement them as full LDAQ.
3605      */
3606     do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, false,
3607               true, a->rt, iss_sf, true);
3608     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3609     return true;
3610 }
3611 
3612 static bool trans_LDRA(DisasContext *s, arg_LDRA *a)
3613 {
3614     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3615     MemOp memop;
3616 
3617     /* Load with pointer authentication */
3618     if (!dc_isar_feature(aa64_pauth, s)) {
3619         return false;
3620     }
3621 
3622     if (a->rn == 31) {
3623         gen_check_sp_alignment(s);
3624     }
3625     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3626 
3627     if (s->pauth_active) {
3628         if (!a->m) {
3629             gen_helper_autda_combined(dirty_addr, tcg_env, dirty_addr,
3630                                       tcg_constant_i64(0));
3631         } else {
3632             gen_helper_autdb_combined(dirty_addr, tcg_env, dirty_addr,
3633                                       tcg_constant_i64(0));
3634         }
3635     }
3636 
3637     tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3638 
3639     memop = finalize_memop(s, MO_64);
3640 
3641     /* Note that "clean" and "dirty" here refer to TBI not PAC.  */
3642     clean_addr = gen_mte_check1(s, dirty_addr, false,
3643                                 a->w || a->rn != 31, memop);
3644 
3645     tcg_rt = cpu_reg(s, a->rt);
3646     do_gpr_ld(s, tcg_rt, clean_addr, memop,
3647               /* extend */ false, /* iss_valid */ !a->w,
3648               /* iss_srt */ a->rt, /* iss_sf */ true, /* iss_ar */ false);
3649 
3650     if (a->w) {
3651         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3652     }
3653     return true;
3654 }
3655 
3656 static bool trans_LDAPR_i(DisasContext *s, arg_ldapr_stlr_i *a)
3657 {
3658     TCGv_i64 clean_addr, dirty_addr;
3659     MemOp mop = a->sz | (a->sign ? MO_SIGN : 0);
3660     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3661 
3662     if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3663         return false;
3664     }
3665 
3666     if (a->rn == 31) {
3667         gen_check_sp_alignment(s);
3668     }
3669 
3670     mop = check_ordered_align(s, a->rn, a->imm, false, mop);
3671     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3672     tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3673     clean_addr = clean_data_tbi(s, dirty_addr);
3674 
3675     /*
3676      * Load-AcquirePC semantics; we implement as the slightly more
3677      * restrictive Load-Acquire.
3678      */
3679     do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, a->ext, true,
3680               a->rt, iss_sf, true);
3681     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3682     return true;
3683 }
3684 
3685 static bool trans_STLR_i(DisasContext *s, arg_ldapr_stlr_i *a)
3686 {
3687     TCGv_i64 clean_addr, dirty_addr;
3688     MemOp mop = a->sz;
3689     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3690 
3691     if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3692         return false;
3693     }
3694 
3695     /* TODO: ARMv8.4-LSE SCTLR.nAA */
3696 
3697     if (a->rn == 31) {
3698         gen_check_sp_alignment(s);
3699     }
3700 
3701     mop = check_ordered_align(s, a->rn, a->imm, true, mop);
3702     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3703     tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3704     clean_addr = clean_data_tbi(s, dirty_addr);
3705 
3706     /* Store-Release semantics */
3707     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3708     do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, mop, true, a->rt, iss_sf, true);
3709     return true;
3710 }
3711 
3712 static bool trans_LD_mult(DisasContext *s, arg_ldst_mult *a)
3713 {
3714     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3715     MemOp endian, align, mop;
3716 
3717     int total;    /* total bytes */
3718     int elements; /* elements per vector */
3719     int r;
3720     int size = a->sz;
3721 
3722     if (!a->p && a->rm != 0) {
3723         /* For non-postindexed accesses the Rm field must be 0 */
3724         return false;
3725     }
3726     if (size == 3 && !a->q && a->selem != 1) {
3727         return false;
3728     }
3729     if (!fp_access_check(s)) {
3730         return true;
3731     }
3732 
3733     if (a->rn == 31) {
3734         gen_check_sp_alignment(s);
3735     }
3736 
3737     /* For our purposes, bytes are always little-endian.  */
3738     endian = s->be_data;
3739     if (size == 0) {
3740         endian = MO_LE;
3741     }
3742 
3743     total = a->rpt * a->selem * (a->q ? 16 : 8);
3744     tcg_rn = cpu_reg_sp(s, a->rn);
3745 
3746     /*
3747      * Issue the MTE check vs the logical repeat count, before we
3748      * promote consecutive little-endian elements below.
3749      */
3750     clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, total,
3751                                 finalize_memop_asimd(s, size));
3752 
3753     /*
3754      * Consecutive little-endian elements from a single register
3755      * can be promoted to a larger little-endian operation.
3756      */
3757     align = MO_ALIGN;
3758     if (a->selem == 1 && endian == MO_LE) {
3759         align = pow2_align(size);
3760         size = 3;
3761     }
3762     if (!s->align_mem) {
3763         align = 0;
3764     }
3765     mop = endian | size | align;
3766 
3767     elements = (a->q ? 16 : 8) >> size;
3768     tcg_ebytes = tcg_constant_i64(1 << size);
3769     for (r = 0; r < a->rpt; r++) {
3770         int e;
3771         for (e = 0; e < elements; e++) {
3772             int xs;
3773             for (xs = 0; xs < a->selem; xs++) {
3774                 int tt = (a->rt + r + xs) % 32;
3775                 do_vec_ld(s, tt, e, clean_addr, mop);
3776                 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3777             }
3778         }
3779     }
3780 
3781     /*
3782      * For non-quad operations, setting a slice of the low 64 bits of
3783      * the register clears the high 64 bits (in the ARM ARM pseudocode
3784      * this is implicit in the fact that 'rval' is a 64 bit wide
3785      * variable).  For quad operations, we might still need to zero
3786      * the high bits of SVE.
3787      */
3788     for (r = 0; r < a->rpt * a->selem; r++) {
3789         int tt = (a->rt + r) % 32;
3790         clear_vec_high(s, a->q, tt);
3791     }
3792 
3793     if (a->p) {
3794         if (a->rm == 31) {
3795             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3796         } else {
3797             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3798         }
3799     }
3800     return true;
3801 }
3802 
3803 static bool trans_ST_mult(DisasContext *s, arg_ldst_mult *a)
3804 {
3805     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3806     MemOp endian, align, mop;
3807 
3808     int total;    /* total bytes */
3809     int elements; /* elements per vector */
3810     int r;
3811     int size = a->sz;
3812 
3813     if (!a->p && a->rm != 0) {
3814         /* For non-postindexed accesses the Rm field must be 0 */
3815         return false;
3816     }
3817     if (size == 3 && !a->q && a->selem != 1) {
3818         return false;
3819     }
3820     if (!fp_access_check(s)) {
3821         return true;
3822     }
3823 
3824     if (a->rn == 31) {
3825         gen_check_sp_alignment(s);
3826     }
3827 
3828     /* For our purposes, bytes are always little-endian.  */
3829     endian = s->be_data;
3830     if (size == 0) {
3831         endian = MO_LE;
3832     }
3833 
3834     total = a->rpt * a->selem * (a->q ? 16 : 8);
3835     tcg_rn = cpu_reg_sp(s, a->rn);
3836 
3837     /*
3838      * Issue the MTE check vs the logical repeat count, before we
3839      * promote consecutive little-endian elements below.
3840      */
3841     clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, total,
3842                                 finalize_memop_asimd(s, size));
3843 
3844     /*
3845      * Consecutive little-endian elements from a single register
3846      * can be promoted to a larger little-endian operation.
3847      */
3848     align = MO_ALIGN;
3849     if (a->selem == 1 && endian == MO_LE) {
3850         align = pow2_align(size);
3851         size = 3;
3852     }
3853     if (!s->align_mem) {
3854         align = 0;
3855     }
3856     mop = endian | size | align;
3857 
3858     elements = (a->q ? 16 : 8) >> size;
3859     tcg_ebytes = tcg_constant_i64(1 << size);
3860     for (r = 0; r < a->rpt; r++) {
3861         int e;
3862         for (e = 0; e < elements; e++) {
3863             int xs;
3864             for (xs = 0; xs < a->selem; xs++) {
3865                 int tt = (a->rt + r + xs) % 32;
3866                 do_vec_st(s, tt, e, clean_addr, mop);
3867                 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3868             }
3869         }
3870     }
3871 
3872     if (a->p) {
3873         if (a->rm == 31) {
3874             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3875         } else {
3876             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3877         }
3878     }
3879     return true;
3880 }
3881 
3882 static bool trans_ST_single(DisasContext *s, arg_ldst_single *a)
3883 {
3884     int xs, total, rt;
3885     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3886     MemOp mop;
3887 
3888     if (!a->p && a->rm != 0) {
3889         return false;
3890     }
3891     if (!fp_access_check(s)) {
3892         return true;
3893     }
3894 
3895     if (a->rn == 31) {
3896         gen_check_sp_alignment(s);
3897     }
3898 
3899     total = a->selem << a->scale;
3900     tcg_rn = cpu_reg_sp(s, a->rn);
3901 
3902     mop = finalize_memop_asimd(s, a->scale);
3903     clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31,
3904                                 total, mop);
3905 
3906     tcg_ebytes = tcg_constant_i64(1 << a->scale);
3907     for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
3908         do_vec_st(s, rt, a->index, clean_addr, mop);
3909         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3910     }
3911 
3912     if (a->p) {
3913         if (a->rm == 31) {
3914             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3915         } else {
3916             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3917         }
3918     }
3919     return true;
3920 }
3921 
3922 static bool trans_LD_single(DisasContext *s, arg_ldst_single *a)
3923 {
3924     int xs, total, rt;
3925     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3926     MemOp mop;
3927 
3928     if (!a->p && a->rm != 0) {
3929         return false;
3930     }
3931     if (!fp_access_check(s)) {
3932         return true;
3933     }
3934 
3935     if (a->rn == 31) {
3936         gen_check_sp_alignment(s);
3937     }
3938 
3939     total = a->selem << a->scale;
3940     tcg_rn = cpu_reg_sp(s, a->rn);
3941 
3942     mop = finalize_memop_asimd(s, a->scale);
3943     clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31,
3944                                 total, mop);
3945 
3946     tcg_ebytes = tcg_constant_i64(1 << a->scale);
3947     for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
3948         do_vec_ld(s, rt, a->index, clean_addr, mop);
3949         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3950     }
3951 
3952     if (a->p) {
3953         if (a->rm == 31) {
3954             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3955         } else {
3956             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3957         }
3958     }
3959     return true;
3960 }
3961 
3962 static bool trans_LD_single_repl(DisasContext *s, arg_LD_single_repl *a)
3963 {
3964     int xs, total, rt;
3965     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3966     MemOp mop;
3967 
3968     if (!a->p && a->rm != 0) {
3969         return false;
3970     }
3971     if (!fp_access_check(s)) {
3972         return true;
3973     }
3974 
3975     if (a->rn == 31) {
3976         gen_check_sp_alignment(s);
3977     }
3978 
3979     total = a->selem << a->scale;
3980     tcg_rn = cpu_reg_sp(s, a->rn);
3981 
3982     mop = finalize_memop_asimd(s, a->scale);
3983     clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31,
3984                                 total, mop);
3985 
3986     tcg_ebytes = tcg_constant_i64(1 << a->scale);
3987     for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
3988         /* Load and replicate to all elements */
3989         TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3990 
3991         tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop);
3992         tcg_gen_gvec_dup_i64(a->scale, vec_full_reg_offset(s, rt),
3993                              (a->q + 1) * 8, vec_full_reg_size(s), tcg_tmp);
3994         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3995     }
3996 
3997     if (a->p) {
3998         if (a->rm == 31) {
3999             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
4000         } else {
4001             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
4002         }
4003     }
4004     return true;
4005 }
4006 
4007 static bool trans_STZGM(DisasContext *s, arg_ldst_tag *a)
4008 {
4009     TCGv_i64 addr, clean_addr, tcg_rt;
4010     int size = 4 << s->dcz_blocksize;
4011 
4012     if (!dc_isar_feature(aa64_mte, s)) {
4013         return false;
4014     }
4015     if (s->current_el == 0) {
4016         return false;
4017     }
4018 
4019     if (a->rn == 31) {
4020         gen_check_sp_alignment(s);
4021     }
4022 
4023     addr = read_cpu_reg_sp(s, a->rn, true);
4024     tcg_gen_addi_i64(addr, addr, a->imm);
4025     tcg_rt = cpu_reg(s, a->rt);
4026 
4027     if (s->ata[0]) {
4028         gen_helper_stzgm_tags(tcg_env, addr, tcg_rt);
4029     }
4030     /*
4031      * The non-tags portion of STZGM is mostly like DC_ZVA,
4032      * except the alignment happens before the access.
4033      */
4034     clean_addr = clean_data_tbi(s, addr);
4035     tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4036     gen_helper_dc_zva(tcg_env, clean_addr);
4037     return true;
4038 }
4039 
4040 static bool trans_STGM(DisasContext *s, arg_ldst_tag *a)
4041 {
4042     TCGv_i64 addr, clean_addr, tcg_rt;
4043 
4044     if (!dc_isar_feature(aa64_mte, s)) {
4045         return false;
4046     }
4047     if (s->current_el == 0) {
4048         return false;
4049     }
4050 
4051     if (a->rn == 31) {
4052         gen_check_sp_alignment(s);
4053     }
4054 
4055     addr = read_cpu_reg_sp(s, a->rn, true);
4056     tcg_gen_addi_i64(addr, addr, a->imm);
4057     tcg_rt = cpu_reg(s, a->rt);
4058 
4059     if (s->ata[0]) {
4060         gen_helper_stgm(tcg_env, addr, tcg_rt);
4061     } else {
4062         MMUAccessType acc = MMU_DATA_STORE;
4063         int size = 4 << s->gm_blocksize;
4064 
4065         clean_addr = clean_data_tbi(s, addr);
4066         tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4067         gen_probe_access(s, clean_addr, acc, size);
4068     }
4069     return true;
4070 }
4071 
4072 static bool trans_LDGM(DisasContext *s, arg_ldst_tag *a)
4073 {
4074     TCGv_i64 addr, clean_addr, tcg_rt;
4075 
4076     if (!dc_isar_feature(aa64_mte, s)) {
4077         return false;
4078     }
4079     if (s->current_el == 0) {
4080         return false;
4081     }
4082 
4083     if (a->rn == 31) {
4084         gen_check_sp_alignment(s);
4085     }
4086 
4087     addr = read_cpu_reg_sp(s, a->rn, true);
4088     tcg_gen_addi_i64(addr, addr, a->imm);
4089     tcg_rt = cpu_reg(s, a->rt);
4090 
4091     if (s->ata[0]) {
4092         gen_helper_ldgm(tcg_rt, tcg_env, addr);
4093     } else {
4094         MMUAccessType acc = MMU_DATA_LOAD;
4095         int size = 4 << s->gm_blocksize;
4096 
4097         clean_addr = clean_data_tbi(s, addr);
4098         tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4099         gen_probe_access(s, clean_addr, acc, size);
4100         /* The result tags are zeros.  */
4101         tcg_gen_movi_i64(tcg_rt, 0);
4102     }
4103     return true;
4104 }
4105 
4106 static bool trans_LDG(DisasContext *s, arg_ldst_tag *a)
4107 {
4108     TCGv_i64 addr, clean_addr, tcg_rt;
4109 
4110     if (!dc_isar_feature(aa64_mte_insn_reg, s)) {
4111         return false;
4112     }
4113 
4114     if (a->rn == 31) {
4115         gen_check_sp_alignment(s);
4116     }
4117 
4118     addr = read_cpu_reg_sp(s, a->rn, true);
4119     if (!a->p) {
4120         /* pre-index or signed offset */
4121         tcg_gen_addi_i64(addr, addr, a->imm);
4122     }
4123 
4124     tcg_gen_andi_i64(addr, addr, -TAG_GRANULE);
4125     tcg_rt = cpu_reg(s, a->rt);
4126     if (s->ata[0]) {
4127         gen_helper_ldg(tcg_rt, tcg_env, addr, tcg_rt);
4128     } else {
4129         /*
4130          * Tag access disabled: we must check for aborts on the load
4131          * load from [rn+offset], and then insert a 0 tag into rt.
4132          */
4133         clean_addr = clean_data_tbi(s, addr);
4134         gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8);
4135         gen_address_with_allocation_tag0(tcg_rt, tcg_rt);
4136     }
4137 
4138     if (a->w) {
4139         /* pre-index or post-index */
4140         if (a->p) {
4141             /* post-index */
4142             tcg_gen_addi_i64(addr, addr, a->imm);
4143         }
4144         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr);
4145     }
4146     return true;
4147 }
4148 
4149 static bool do_STG(DisasContext *s, arg_ldst_tag *a, bool is_zero, bool is_pair)
4150 {
4151     TCGv_i64 addr, tcg_rt;
4152 
4153     if (a->rn == 31) {
4154         gen_check_sp_alignment(s);
4155     }
4156 
4157     addr = read_cpu_reg_sp(s, a->rn, true);
4158     if (!a->p) {
4159         /* pre-index or signed offset */
4160         tcg_gen_addi_i64(addr, addr, a->imm);
4161     }
4162     tcg_rt = cpu_reg_sp(s, a->rt);
4163     if (!s->ata[0]) {
4164         /*
4165          * For STG and ST2G, we need to check alignment and probe memory.
4166          * TODO: For STZG and STZ2G, we could rely on the stores below,
4167          * at least for system mode; user-only won't enforce alignment.
4168          */
4169         if (is_pair) {
4170             gen_helper_st2g_stub(tcg_env, addr);
4171         } else {
4172             gen_helper_stg_stub(tcg_env, addr);
4173         }
4174     } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
4175         if (is_pair) {
4176             gen_helper_st2g_parallel(tcg_env, addr, tcg_rt);
4177         } else {
4178             gen_helper_stg_parallel(tcg_env, addr, tcg_rt);
4179         }
4180     } else {
4181         if (is_pair) {
4182             gen_helper_st2g(tcg_env, addr, tcg_rt);
4183         } else {
4184             gen_helper_stg(tcg_env, addr, tcg_rt);
4185         }
4186     }
4187 
4188     if (is_zero) {
4189         TCGv_i64 clean_addr = clean_data_tbi(s, addr);
4190         TCGv_i64 zero64 = tcg_constant_i64(0);
4191         TCGv_i128 zero128 = tcg_temp_new_i128();
4192         int mem_index = get_mem_index(s);
4193         MemOp mop = finalize_memop(s, MO_128 | MO_ALIGN);
4194 
4195         tcg_gen_concat_i64_i128(zero128, zero64, zero64);
4196 
4197         /* This is 1 or 2 atomic 16-byte operations. */
4198         tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
4199         if (is_pair) {
4200             tcg_gen_addi_i64(clean_addr, clean_addr, 16);
4201             tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
4202         }
4203     }
4204 
4205     if (a->w) {
4206         /* pre-index or post-index */
4207         if (a->p) {
4208             /* post-index */
4209             tcg_gen_addi_i64(addr, addr, a->imm);
4210         }
4211         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr);
4212     }
4213     return true;
4214 }
4215 
4216 TRANS_FEAT(STG, aa64_mte_insn_reg, do_STG, a, false, false)
4217 TRANS_FEAT(STZG, aa64_mte_insn_reg, do_STG, a, true, false)
4218 TRANS_FEAT(ST2G, aa64_mte_insn_reg, do_STG, a, false, true)
4219 TRANS_FEAT(STZ2G, aa64_mte_insn_reg, do_STG, a, true, true)
4220 
4221 typedef void SetFn(TCGv_env, TCGv_i32, TCGv_i32);
4222 
4223 static bool do_SET(DisasContext *s, arg_set *a, bool is_epilogue,
4224                    bool is_setg, SetFn fn)
4225 {
4226     int memidx;
4227     uint32_t syndrome, desc = 0;
4228 
4229     if (is_setg && !dc_isar_feature(aa64_mte, s)) {
4230         return false;
4231     }
4232 
4233     /*
4234      * UNPREDICTABLE cases: we choose to UNDEF, which allows
4235      * us to pull this check before the CheckMOPSEnabled() test
4236      * (which we do in the helper function)
4237      */
4238     if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd ||
4239         a->rd == 31 || a->rn == 31) {
4240         return false;
4241     }
4242 
4243     memidx = get_a64_user_mem_index(s, a->unpriv);
4244 
4245     /*
4246      * We pass option_a == true, matching our implementation;
4247      * we pass wrong_option == false: helper function may set that bit.
4248      */
4249     syndrome = syn_mop(true, is_setg, (a->nontemp << 1) | a->unpriv,
4250                        is_epilogue, false, true, a->rd, a->rs, a->rn);
4251 
4252     if (is_setg ? s->ata[a->unpriv] : s->mte_active[a->unpriv]) {
4253         /* We may need to do MTE tag checking, so assemble the descriptor */
4254         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
4255         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
4256         desc = FIELD_DP32(desc, MTEDESC, WRITE, true);
4257         /* SIZEM1 and ALIGN we leave 0 (byte write) */
4258     }
4259     /* The helper function always needs the memidx even with MTE disabled */
4260     desc = FIELD_DP32(desc, MTEDESC, MIDX, memidx);
4261 
4262     /*
4263      * The helper needs the register numbers, but since they're in
4264      * the syndrome anyway, we let it extract them from there rather
4265      * than passing in an extra three integer arguments.
4266      */
4267     fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(desc));
4268     return true;
4269 }
4270 
4271 TRANS_FEAT(SETP, aa64_mops, do_SET, a, false, false, gen_helper_setp)
4272 TRANS_FEAT(SETM, aa64_mops, do_SET, a, false, false, gen_helper_setm)
4273 TRANS_FEAT(SETE, aa64_mops, do_SET, a, true, false, gen_helper_sete)
4274 TRANS_FEAT(SETGP, aa64_mops, do_SET, a, false, true, gen_helper_setgp)
4275 TRANS_FEAT(SETGM, aa64_mops, do_SET, a, false, true, gen_helper_setgm)
4276 TRANS_FEAT(SETGE, aa64_mops, do_SET, a, true, true, gen_helper_setge)
4277 
4278 typedef void CpyFn(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32);
4279 
4280 static bool do_CPY(DisasContext *s, arg_cpy *a, bool is_epilogue, CpyFn fn)
4281 {
4282     int rmemidx, wmemidx;
4283     uint32_t syndrome, rdesc = 0, wdesc = 0;
4284     bool wunpriv = extract32(a->options, 0, 1);
4285     bool runpriv = extract32(a->options, 1, 1);
4286 
4287     /*
4288      * UNPREDICTABLE cases: we choose to UNDEF, which allows
4289      * us to pull this check before the CheckMOPSEnabled() test
4290      * (which we do in the helper function)
4291      */
4292     if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd ||
4293         a->rd == 31 || a->rs == 31 || a->rn == 31) {
4294         return false;
4295     }
4296 
4297     rmemidx = get_a64_user_mem_index(s, runpriv);
4298     wmemidx = get_a64_user_mem_index(s, wunpriv);
4299 
4300     /*
4301      * We pass option_a == true, matching our implementation;
4302      * we pass wrong_option == false: helper function may set that bit.
4303      */
4304     syndrome = syn_mop(false, false, a->options, is_epilogue,
4305                        false, true, a->rd, a->rs, a->rn);
4306 
4307     /* If we need to do MTE tag checking, assemble the descriptors */
4308     if (s->mte_active[runpriv]) {
4309         rdesc = FIELD_DP32(rdesc, MTEDESC, TBI, s->tbid);
4310         rdesc = FIELD_DP32(rdesc, MTEDESC, TCMA, s->tcma);
4311     }
4312     if (s->mte_active[wunpriv]) {
4313         wdesc = FIELD_DP32(wdesc, MTEDESC, TBI, s->tbid);
4314         wdesc = FIELD_DP32(wdesc, MTEDESC, TCMA, s->tcma);
4315         wdesc = FIELD_DP32(wdesc, MTEDESC, WRITE, true);
4316     }
4317     /* The helper function needs these parts of the descriptor regardless */
4318     rdesc = FIELD_DP32(rdesc, MTEDESC, MIDX, rmemidx);
4319     wdesc = FIELD_DP32(wdesc, MTEDESC, MIDX, wmemidx);
4320 
4321     /*
4322      * The helper needs the register numbers, but since they're in
4323      * the syndrome anyway, we let it extract them from there rather
4324      * than passing in an extra three integer arguments.
4325      */
4326     fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(wdesc),
4327        tcg_constant_i32(rdesc));
4328     return true;
4329 }
4330 
4331 TRANS_FEAT(CPYP, aa64_mops, do_CPY, a, false, gen_helper_cpyp)
4332 TRANS_FEAT(CPYM, aa64_mops, do_CPY, a, false, gen_helper_cpym)
4333 TRANS_FEAT(CPYE, aa64_mops, do_CPY, a, true, gen_helper_cpye)
4334 TRANS_FEAT(CPYFP, aa64_mops, do_CPY, a, false, gen_helper_cpyfp)
4335 TRANS_FEAT(CPYFM, aa64_mops, do_CPY, a, false, gen_helper_cpyfm)
4336 TRANS_FEAT(CPYFE, aa64_mops, do_CPY, a, true, gen_helper_cpyfe)
4337 
4338 typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64);
4339 
4340 static bool gen_rri(DisasContext *s, arg_rri_sf *a,
4341                     bool rd_sp, bool rn_sp, ArithTwoOp *fn)
4342 {
4343     TCGv_i64 tcg_rn = rn_sp ? cpu_reg_sp(s, a->rn) : cpu_reg(s, a->rn);
4344     TCGv_i64 tcg_rd = rd_sp ? cpu_reg_sp(s, a->rd) : cpu_reg(s, a->rd);
4345     TCGv_i64 tcg_imm = tcg_constant_i64(a->imm);
4346 
4347     fn(tcg_rd, tcg_rn, tcg_imm);
4348     if (!a->sf) {
4349         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4350     }
4351     return true;
4352 }
4353 
4354 /*
4355  * PC-rel. addressing
4356  */
4357 
4358 static bool trans_ADR(DisasContext *s, arg_ri *a)
4359 {
4360     gen_pc_plus_diff(s, cpu_reg(s, a->rd), a->imm);
4361     return true;
4362 }
4363 
4364 static bool trans_ADRP(DisasContext *s, arg_ri *a)
4365 {
4366     int64_t offset = (int64_t)a->imm << 12;
4367 
4368     /* The page offset is ok for CF_PCREL. */
4369     offset -= s->pc_curr & 0xfff;
4370     gen_pc_plus_diff(s, cpu_reg(s, a->rd), offset);
4371     return true;
4372 }
4373 
4374 /*
4375  * Add/subtract (immediate)
4376  */
4377 TRANS(ADD_i, gen_rri, a, 1, 1, tcg_gen_add_i64)
4378 TRANS(SUB_i, gen_rri, a, 1, 1, tcg_gen_sub_i64)
4379 TRANS(ADDS_i, gen_rri, a, 0, 1, a->sf ? gen_add64_CC : gen_add32_CC)
4380 TRANS(SUBS_i, gen_rri, a, 0, 1, a->sf ? gen_sub64_CC : gen_sub32_CC)
4381 
4382 /*
4383  * Add/subtract (immediate, with tags)
4384  */
4385 
4386 static bool gen_add_sub_imm_with_tags(DisasContext *s, arg_rri_tag *a,
4387                                       bool sub_op)
4388 {
4389     TCGv_i64 tcg_rn, tcg_rd;
4390     int imm;
4391 
4392     imm = a->uimm6 << LOG2_TAG_GRANULE;
4393     if (sub_op) {
4394         imm = -imm;
4395     }
4396 
4397     tcg_rn = cpu_reg_sp(s, a->rn);
4398     tcg_rd = cpu_reg_sp(s, a->rd);
4399 
4400     if (s->ata[0]) {
4401         gen_helper_addsubg(tcg_rd, tcg_env, tcg_rn,
4402                            tcg_constant_i32(imm),
4403                            tcg_constant_i32(a->uimm4));
4404     } else {
4405         tcg_gen_addi_i64(tcg_rd, tcg_rn, imm);
4406         gen_address_with_allocation_tag0(tcg_rd, tcg_rd);
4407     }
4408     return true;
4409 }
4410 
4411 TRANS_FEAT(ADDG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, false)
4412 TRANS_FEAT(SUBG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, true)
4413 
4414 /* The input should be a value in the bottom e bits (with higher
4415  * bits zero); returns that value replicated into every element
4416  * of size e in a 64 bit integer.
4417  */
4418 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
4419 {
4420     assert(e != 0);
4421     while (e < 64) {
4422         mask |= mask << e;
4423         e *= 2;
4424     }
4425     return mask;
4426 }
4427 
4428 /*
4429  * Logical (immediate)
4430  */
4431 
4432 /*
4433  * Simplified variant of pseudocode DecodeBitMasks() for the case where we
4434  * only require the wmask. Returns false if the imms/immr/immn are a reserved
4435  * value (ie should cause a guest UNDEF exception), and true if they are
4436  * valid, in which case the decoded bit pattern is written to result.
4437  */
4438 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
4439                             unsigned int imms, unsigned int immr)
4440 {
4441     uint64_t mask;
4442     unsigned e, levels, s, r;
4443     int len;
4444 
4445     assert(immn < 2 && imms < 64 && immr < 64);
4446 
4447     /* The bit patterns we create here are 64 bit patterns which
4448      * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
4449      * 64 bits each. Each element contains the same value: a run
4450      * of between 1 and e-1 non-zero bits, rotated within the
4451      * element by between 0 and e-1 bits.
4452      *
4453      * The element size and run length are encoded into immn (1 bit)
4454      * and imms (6 bits) as follows:
4455      * 64 bit elements: immn = 1, imms = <length of run - 1>
4456      * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
4457      * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
4458      *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
4459      *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
4460      *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
4461      * Notice that immn = 0, imms = 11111x is the only combination
4462      * not covered by one of the above options; this is reserved.
4463      * Further, <length of run - 1> all-ones is a reserved pattern.
4464      *
4465      * In all cases the rotation is by immr % e (and immr is 6 bits).
4466      */
4467 
4468     /* First determine the element size */
4469     len = 31 - clz32((immn << 6) | (~imms & 0x3f));
4470     if (len < 1) {
4471         /* This is the immn == 0, imms == 0x11111x case */
4472         return false;
4473     }
4474     e = 1 << len;
4475 
4476     levels = e - 1;
4477     s = imms & levels;
4478     r = immr & levels;
4479 
4480     if (s == levels) {
4481         /* <length of run - 1> mustn't be all-ones. */
4482         return false;
4483     }
4484 
4485     /* Create the value of one element: s+1 set bits rotated
4486      * by r within the element (which is e bits wide)...
4487      */
4488     mask = MAKE_64BIT_MASK(0, s + 1);
4489     if (r) {
4490         mask = (mask >> r) | (mask << (e - r));
4491         mask &= MAKE_64BIT_MASK(0, e);
4492     }
4493     /* ...then replicate the element over the whole 64 bit value */
4494     mask = bitfield_replicate(mask, e);
4495     *result = mask;
4496     return true;
4497 }
4498 
4499 static bool gen_rri_log(DisasContext *s, arg_rri_log *a, bool set_cc,
4500                         void (*fn)(TCGv_i64, TCGv_i64, int64_t))
4501 {
4502     TCGv_i64 tcg_rd, tcg_rn;
4503     uint64_t imm;
4504 
4505     /* Some immediate field values are reserved. */
4506     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
4507                                 extract32(a->dbm, 0, 6),
4508                                 extract32(a->dbm, 6, 6))) {
4509         return false;
4510     }
4511     if (!a->sf) {
4512         imm &= 0xffffffffull;
4513     }
4514 
4515     tcg_rd = set_cc ? cpu_reg(s, a->rd) : cpu_reg_sp(s, a->rd);
4516     tcg_rn = cpu_reg(s, a->rn);
4517 
4518     fn(tcg_rd, tcg_rn, imm);
4519     if (set_cc) {
4520         gen_logic_CC(a->sf, tcg_rd);
4521     }
4522     if (!a->sf) {
4523         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4524     }
4525     return true;
4526 }
4527 
4528 TRANS(AND_i, gen_rri_log, a, false, tcg_gen_andi_i64)
4529 TRANS(ORR_i, gen_rri_log, a, false, tcg_gen_ori_i64)
4530 TRANS(EOR_i, gen_rri_log, a, false, tcg_gen_xori_i64)
4531 TRANS(ANDS_i, gen_rri_log, a, true, tcg_gen_andi_i64)
4532 
4533 /*
4534  * Move wide (immediate)
4535  */
4536 
4537 static bool trans_MOVZ(DisasContext *s, arg_movw *a)
4538 {
4539     int pos = a->hw << 4;
4540     tcg_gen_movi_i64(cpu_reg(s, a->rd), (uint64_t)a->imm << pos);
4541     return true;
4542 }
4543 
4544 static bool trans_MOVN(DisasContext *s, arg_movw *a)
4545 {
4546     int pos = a->hw << 4;
4547     uint64_t imm = a->imm;
4548 
4549     imm = ~(imm << pos);
4550     if (!a->sf) {
4551         imm = (uint32_t)imm;
4552     }
4553     tcg_gen_movi_i64(cpu_reg(s, a->rd), imm);
4554     return true;
4555 }
4556 
4557 static bool trans_MOVK(DisasContext *s, arg_movw *a)
4558 {
4559     int pos = a->hw << 4;
4560     TCGv_i64 tcg_rd, tcg_im;
4561 
4562     tcg_rd = cpu_reg(s, a->rd);
4563     tcg_im = tcg_constant_i64(a->imm);
4564     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_im, pos, 16);
4565     if (!a->sf) {
4566         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4567     }
4568     return true;
4569 }
4570 
4571 /*
4572  * Bitfield
4573  */
4574 
4575 static bool trans_SBFM(DisasContext *s, arg_SBFM *a)
4576 {
4577     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4578     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4579     unsigned int bitsize = a->sf ? 64 : 32;
4580     unsigned int ri = a->immr;
4581     unsigned int si = a->imms;
4582     unsigned int pos, len;
4583 
4584     if (si >= ri) {
4585         /* Wd<s-r:0> = Wn<s:r> */
4586         len = (si - ri) + 1;
4587         tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
4588         if (!a->sf) {
4589             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4590         }
4591     } else {
4592         /* Wd<32+s-r,32-r> = Wn<s:0> */
4593         len = si + 1;
4594         pos = (bitsize - ri) & (bitsize - 1);
4595 
4596         if (len < ri) {
4597             /*
4598              * Sign extend the destination field from len to fill the
4599              * balance of the word.  Let the deposit below insert all
4600              * of those sign bits.
4601              */
4602             tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
4603             len = ri;
4604         }
4605 
4606         /*
4607          * We start with zero, and we haven't modified any bits outside
4608          * bitsize, therefore no final zero-extension is unneeded for !sf.
4609          */
4610         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4611     }
4612     return true;
4613 }
4614 
4615 static bool trans_UBFM(DisasContext *s, arg_UBFM *a)
4616 {
4617     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4618     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4619     unsigned int bitsize = a->sf ? 64 : 32;
4620     unsigned int ri = a->immr;
4621     unsigned int si = a->imms;
4622     unsigned int pos, len;
4623 
4624     tcg_rd = cpu_reg(s, a->rd);
4625     tcg_tmp = read_cpu_reg(s, a->rn, 1);
4626 
4627     if (si >= ri) {
4628         /* Wd<s-r:0> = Wn<s:r> */
4629         len = (si - ri) + 1;
4630         tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
4631     } else {
4632         /* Wd<32+s-r,32-r> = Wn<s:0> */
4633         len = si + 1;
4634         pos = (bitsize - ri) & (bitsize - 1);
4635         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4636     }
4637     return true;
4638 }
4639 
4640 static bool trans_BFM(DisasContext *s, arg_BFM *a)
4641 {
4642     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4643     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4644     unsigned int bitsize = a->sf ? 64 : 32;
4645     unsigned int ri = a->immr;
4646     unsigned int si = a->imms;
4647     unsigned int pos, len;
4648 
4649     tcg_rd = cpu_reg(s, a->rd);
4650     tcg_tmp = read_cpu_reg(s, a->rn, 1);
4651 
4652     if (si >= ri) {
4653         /* Wd<s-r:0> = Wn<s:r> */
4654         tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
4655         len = (si - ri) + 1;
4656         pos = 0;
4657     } else {
4658         /* Wd<32+s-r,32-r> = Wn<s:0> */
4659         len = si + 1;
4660         pos = (bitsize - ri) & (bitsize - 1);
4661     }
4662 
4663     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
4664     if (!a->sf) {
4665         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4666     }
4667     return true;
4668 }
4669 
4670 static bool trans_EXTR(DisasContext *s, arg_extract *a)
4671 {
4672     TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
4673 
4674     tcg_rd = cpu_reg(s, a->rd);
4675 
4676     if (unlikely(a->imm == 0)) {
4677         /*
4678          * tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
4679          * so an extract from bit 0 is a special case.
4680          */
4681         if (a->sf) {
4682             tcg_gen_mov_i64(tcg_rd, cpu_reg(s, a->rm));
4683         } else {
4684             tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, a->rm));
4685         }
4686     } else {
4687         tcg_rm = cpu_reg(s, a->rm);
4688         tcg_rn = cpu_reg(s, a->rn);
4689 
4690         if (a->sf) {
4691             /* Specialization to ROR happens in EXTRACT2.  */
4692             tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, a->imm);
4693         } else {
4694             TCGv_i32 t0 = tcg_temp_new_i32();
4695 
4696             tcg_gen_extrl_i64_i32(t0, tcg_rm);
4697             if (a->rm == a->rn) {
4698                 tcg_gen_rotri_i32(t0, t0, a->imm);
4699             } else {
4700                 TCGv_i32 t1 = tcg_temp_new_i32();
4701                 tcg_gen_extrl_i64_i32(t1, tcg_rn);
4702                 tcg_gen_extract2_i32(t0, t0, t1, a->imm);
4703             }
4704             tcg_gen_extu_i32_i64(tcg_rd, t0);
4705         }
4706     }
4707     return true;
4708 }
4709 
4710 static bool trans_TBL_TBX(DisasContext *s, arg_TBL_TBX *a)
4711 {
4712     if (fp_access_check(s)) {
4713         int len = (a->len + 1) * 16;
4714 
4715         tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
4716                            vec_full_reg_offset(s, a->rm), tcg_env,
4717                            a->q ? 16 : 8, vec_full_reg_size(s),
4718                            (len << 6) | (a->tbx << 5) | a->rn,
4719                            gen_helper_simd_tblx);
4720     }
4721     return true;
4722 }
4723 
4724 typedef int simd_permute_idx_fn(int i, int part, int elements);
4725 
4726 static bool do_simd_permute(DisasContext *s, arg_qrrr_e *a,
4727                             simd_permute_idx_fn *fn, int part)
4728 {
4729     MemOp esz = a->esz;
4730     int datasize = a->q ? 16 : 8;
4731     int elements = datasize >> esz;
4732     TCGv_i64 tcg_res[2], tcg_ele;
4733 
4734     if (esz == MO_64 && !a->q) {
4735         return false;
4736     }
4737     if (!fp_access_check(s)) {
4738         return true;
4739     }
4740 
4741     tcg_res[0] = tcg_temp_new_i64();
4742     tcg_res[1] = a->q ? tcg_temp_new_i64() : NULL;
4743     tcg_ele = tcg_temp_new_i64();
4744 
4745     for (int i = 0; i < elements; i++) {
4746         int o, w, idx;
4747 
4748         idx = fn(i, part, elements);
4749         read_vec_element(s, tcg_ele, (idx & elements ? a->rm : a->rn),
4750                          idx & (elements - 1), esz);
4751 
4752         w = (i << (esz + 3)) / 64;
4753         o = (i << (esz + 3)) % 64;
4754         if (o == 0) {
4755             tcg_gen_mov_i64(tcg_res[w], tcg_ele);
4756         } else {
4757             tcg_gen_deposit_i64(tcg_res[w], tcg_res[w], tcg_ele, o, 8 << esz);
4758         }
4759     }
4760 
4761     for (int i = a->q; i >= 0; --i) {
4762         write_vec_element(s, tcg_res[i], a->rd, i, MO_64);
4763     }
4764     clear_vec_high(s, a->q, a->rd);
4765     return true;
4766 }
4767 
4768 static int permute_load_uzp(int i, int part, int elements)
4769 {
4770     return 2 * i + part;
4771 }
4772 
4773 TRANS(UZP1, do_simd_permute, a, permute_load_uzp, 0)
4774 TRANS(UZP2, do_simd_permute, a, permute_load_uzp, 1)
4775 
4776 static int permute_load_trn(int i, int part, int elements)
4777 {
4778     return (i & 1) * elements + (i & ~1) + part;
4779 }
4780 
4781 TRANS(TRN1, do_simd_permute, a, permute_load_trn, 0)
4782 TRANS(TRN2, do_simd_permute, a, permute_load_trn, 1)
4783 
4784 static int permute_load_zip(int i, int part, int elements)
4785 {
4786     return (i & 1) * elements + ((part * elements + i) >> 1);
4787 }
4788 
4789 TRANS(ZIP1, do_simd_permute, a, permute_load_zip, 0)
4790 TRANS(ZIP2, do_simd_permute, a, permute_load_zip, 1)
4791 
4792 /*
4793  * Cryptographic AES, SHA, SHA512
4794  */
4795 
4796 TRANS_FEAT(AESE, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aese)
4797 TRANS_FEAT(AESD, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aesd)
4798 TRANS_FEAT(AESMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesmc)
4799 TRANS_FEAT(AESIMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesimc)
4800 
4801 TRANS_FEAT(SHA1C, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1c)
4802 TRANS_FEAT(SHA1P, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1p)
4803 TRANS_FEAT(SHA1M, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1m)
4804 TRANS_FEAT(SHA1SU0, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1su0)
4805 
4806 TRANS_FEAT(SHA256H, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h)
4807 TRANS_FEAT(SHA256H2, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h2)
4808 TRANS_FEAT(SHA256SU1, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256su1)
4809 
4810 TRANS_FEAT(SHA1H, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1h)
4811 TRANS_FEAT(SHA1SU1, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1su1)
4812 TRANS_FEAT(SHA256SU0, aa64_sha256, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha256su0)
4813 
4814 TRANS_FEAT(SHA512H, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h)
4815 TRANS_FEAT(SHA512H2, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h2)
4816 TRANS_FEAT(SHA512SU1, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512su1)
4817 TRANS_FEAT(RAX1, aa64_sha3, do_gvec_fn3, a, gen_gvec_rax1)
4818 TRANS_FEAT(SM3PARTW1, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw1)
4819 TRANS_FEAT(SM3PARTW2, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw2)
4820 TRANS_FEAT(SM4EKEY, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4ekey)
4821 
4822 TRANS_FEAT(SHA512SU0, aa64_sha512, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha512su0)
4823 TRANS_FEAT(SM4E, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4e)
4824 
4825 TRANS_FEAT(EOR3, aa64_sha3, do_gvec_fn4, a, gen_gvec_eor3)
4826 TRANS_FEAT(BCAX, aa64_sha3, do_gvec_fn4, a, gen_gvec_bcax)
4827 
4828 static bool trans_SM3SS1(DisasContext *s, arg_SM3SS1 *a)
4829 {
4830     if (!dc_isar_feature(aa64_sm3, s)) {
4831         return false;
4832     }
4833     if (fp_access_check(s)) {
4834         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
4835         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
4836         TCGv_i32 tcg_op3 = tcg_temp_new_i32();
4837         TCGv_i32 tcg_res = tcg_temp_new_i32();
4838 
4839         read_vec_element_i32(s, tcg_op1, a->rn, 3, MO_32);
4840         read_vec_element_i32(s, tcg_op2, a->rm, 3, MO_32);
4841         read_vec_element_i32(s, tcg_op3, a->ra, 3, MO_32);
4842 
4843         tcg_gen_rotri_i32(tcg_res, tcg_op1, 20);
4844         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2);
4845         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3);
4846         tcg_gen_rotri_i32(tcg_res, tcg_res, 25);
4847 
4848         /* Clear the whole register first, then store bits [127:96]. */
4849         clear_vec(s, a->rd);
4850         write_vec_element_i32(s, tcg_res, a->rd, 3, MO_32);
4851     }
4852     return true;
4853 }
4854 
4855 static bool do_crypto3i(DisasContext *s, arg_crypto3i *a, gen_helper_gvec_3 *fn)
4856 {
4857     if (fp_access_check(s)) {
4858         gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->imm, fn);
4859     }
4860     return true;
4861 }
4862 TRANS_FEAT(SM3TT1A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1a)
4863 TRANS_FEAT(SM3TT1B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1b)
4864 TRANS_FEAT(SM3TT2A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2a)
4865 TRANS_FEAT(SM3TT2B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2b)
4866 
4867 static bool trans_XAR(DisasContext *s, arg_XAR *a)
4868 {
4869     if (!dc_isar_feature(aa64_sha3, s)) {
4870         return false;
4871     }
4872     if (fp_access_check(s)) {
4873         gen_gvec_xar(MO_64, vec_full_reg_offset(s, a->rd),
4874                      vec_full_reg_offset(s, a->rn),
4875                      vec_full_reg_offset(s, a->rm), a->imm, 16,
4876                      vec_full_reg_size(s));
4877     }
4878     return true;
4879 }
4880 
4881 /*
4882  * Advanced SIMD copy
4883  */
4884 
4885 static bool decode_esz_idx(int imm, MemOp *pesz, unsigned *pidx)
4886 {
4887     unsigned esz = ctz32(imm);
4888     if (esz <= MO_64) {
4889         *pesz = esz;
4890         *pidx = imm >> (esz + 1);
4891         return true;
4892     }
4893     return false;
4894 }
4895 
4896 static bool trans_DUP_element_s(DisasContext *s, arg_DUP_element_s *a)
4897 {
4898     MemOp esz;
4899     unsigned idx;
4900 
4901     if (!decode_esz_idx(a->imm, &esz, &idx)) {
4902         return false;
4903     }
4904     if (fp_access_check(s)) {
4905         /*
4906          * This instruction just extracts the specified element and
4907          * zero-extends it into the bottom of the destination register.
4908          */
4909         TCGv_i64 tmp = tcg_temp_new_i64();
4910         read_vec_element(s, tmp, a->rn, idx, esz);
4911         write_fp_dreg(s, a->rd, tmp);
4912     }
4913     return true;
4914 }
4915 
4916 static bool trans_DUP_element_v(DisasContext *s, arg_DUP_element_v *a)
4917 {
4918     MemOp esz;
4919     unsigned idx;
4920 
4921     if (!decode_esz_idx(a->imm, &esz, &idx)) {
4922         return false;
4923     }
4924     if (esz == MO_64 && !a->q) {
4925         return false;
4926     }
4927     if (fp_access_check(s)) {
4928         tcg_gen_gvec_dup_mem(esz, vec_full_reg_offset(s, a->rd),
4929                              vec_reg_offset(s, a->rn, idx, esz),
4930                              a->q ? 16 : 8, vec_full_reg_size(s));
4931     }
4932     return true;
4933 }
4934 
4935 static bool trans_DUP_general(DisasContext *s, arg_DUP_general *a)
4936 {
4937     MemOp esz;
4938     unsigned idx;
4939 
4940     if (!decode_esz_idx(a->imm, &esz, &idx)) {
4941         return false;
4942     }
4943     if (esz == MO_64 && !a->q) {
4944         return false;
4945     }
4946     if (fp_access_check(s)) {
4947         tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
4948                              a->q ? 16 : 8, vec_full_reg_size(s),
4949                              cpu_reg(s, a->rn));
4950     }
4951     return true;
4952 }
4953 
4954 static bool do_smov_umov(DisasContext *s, arg_SMOV *a, MemOp is_signed)
4955 {
4956     MemOp esz;
4957     unsigned idx;
4958 
4959     if (!decode_esz_idx(a->imm, &esz, &idx)) {
4960         return false;
4961     }
4962     if (is_signed) {
4963         if (esz == MO_64 || (esz == MO_32 && !a->q)) {
4964             return false;
4965         }
4966     } else {
4967         if (esz == MO_64 ? !a->q : a->q) {
4968             return false;
4969         }
4970     }
4971     if (fp_access_check(s)) {
4972         TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4973         read_vec_element(s, tcg_rd, a->rn, idx, esz | is_signed);
4974         if (is_signed && !a->q) {
4975             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4976         }
4977     }
4978     return true;
4979 }
4980 
4981 TRANS(SMOV, do_smov_umov, a, MO_SIGN)
4982 TRANS(UMOV, do_smov_umov, a, 0)
4983 
4984 static bool trans_INS_general(DisasContext *s, arg_INS_general *a)
4985 {
4986     MemOp esz;
4987     unsigned idx;
4988 
4989     if (!decode_esz_idx(a->imm, &esz, &idx)) {
4990         return false;
4991     }
4992     if (fp_access_check(s)) {
4993         write_vec_element(s, cpu_reg(s, a->rn), a->rd, idx, esz);
4994         clear_vec_high(s, true, a->rd);
4995     }
4996     return true;
4997 }
4998 
4999 static bool trans_INS_element(DisasContext *s, arg_INS_element *a)
5000 {
5001     MemOp esz;
5002     unsigned didx, sidx;
5003 
5004     if (!decode_esz_idx(a->di, &esz, &didx)) {
5005         return false;
5006     }
5007     sidx = a->si >> esz;
5008     if (fp_access_check(s)) {
5009         TCGv_i64 tmp = tcg_temp_new_i64();
5010 
5011         read_vec_element(s, tmp, a->rn, sidx, esz);
5012         write_vec_element(s, tmp, a->rd, didx, esz);
5013 
5014         /* INS is considered a 128-bit write for SVE. */
5015         clear_vec_high(s, true, a->rd);
5016     }
5017     return true;
5018 }
5019 
5020 /*
5021  * Advanced SIMD three same
5022  */
5023 
5024 typedef struct FPScalar {
5025     void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
5026     void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
5027     void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
5028 } FPScalar;
5029 
5030 static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f)
5031 {
5032     switch (a->esz) {
5033     case MO_64:
5034         if (fp_access_check(s)) {
5035             TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5036             TCGv_i64 t1 = read_fp_dreg(s, a->rm);
5037             f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
5038             write_fp_dreg(s, a->rd, t0);
5039         }
5040         break;
5041     case MO_32:
5042         if (fp_access_check(s)) {
5043             TCGv_i32 t0 = read_fp_sreg(s, a->rn);
5044             TCGv_i32 t1 = read_fp_sreg(s, a->rm);
5045             f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
5046             write_fp_sreg(s, a->rd, t0);
5047         }
5048         break;
5049     case MO_16:
5050         if (!dc_isar_feature(aa64_fp16, s)) {
5051             return false;
5052         }
5053         if (fp_access_check(s)) {
5054             TCGv_i32 t0 = read_fp_hreg(s, a->rn);
5055             TCGv_i32 t1 = read_fp_hreg(s, a->rm);
5056             f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16));
5057             write_fp_sreg(s, a->rd, t0);
5058         }
5059         break;
5060     default:
5061         return false;
5062     }
5063     return true;
5064 }
5065 
5066 static const FPScalar f_scalar_fadd = {
5067     gen_helper_vfp_addh,
5068     gen_helper_vfp_adds,
5069     gen_helper_vfp_addd,
5070 };
5071 TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd)
5072 
5073 static const FPScalar f_scalar_fsub = {
5074     gen_helper_vfp_subh,
5075     gen_helper_vfp_subs,
5076     gen_helper_vfp_subd,
5077 };
5078 TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub)
5079 
5080 static const FPScalar f_scalar_fdiv = {
5081     gen_helper_vfp_divh,
5082     gen_helper_vfp_divs,
5083     gen_helper_vfp_divd,
5084 };
5085 TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv)
5086 
5087 static const FPScalar f_scalar_fmul = {
5088     gen_helper_vfp_mulh,
5089     gen_helper_vfp_muls,
5090     gen_helper_vfp_muld,
5091 };
5092 TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul)
5093 
5094 static const FPScalar f_scalar_fmax = {
5095     gen_helper_advsimd_maxh,
5096     gen_helper_vfp_maxs,
5097     gen_helper_vfp_maxd,
5098 };
5099 TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax)
5100 
5101 static const FPScalar f_scalar_fmin = {
5102     gen_helper_advsimd_minh,
5103     gen_helper_vfp_mins,
5104     gen_helper_vfp_mind,
5105 };
5106 TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin)
5107 
5108 static const FPScalar f_scalar_fmaxnm = {
5109     gen_helper_advsimd_maxnumh,
5110     gen_helper_vfp_maxnums,
5111     gen_helper_vfp_maxnumd,
5112 };
5113 TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm)
5114 
5115 static const FPScalar f_scalar_fminnm = {
5116     gen_helper_advsimd_minnumh,
5117     gen_helper_vfp_minnums,
5118     gen_helper_vfp_minnumd,
5119 };
5120 TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm)
5121 
5122 static const FPScalar f_scalar_fmulx = {
5123     gen_helper_advsimd_mulxh,
5124     gen_helper_vfp_mulxs,
5125     gen_helper_vfp_mulxd,
5126 };
5127 TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx)
5128 
5129 static void gen_fnmul_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5130 {
5131     gen_helper_vfp_mulh(d, n, m, s);
5132     gen_vfp_negh(d, d);
5133 }
5134 
5135 static void gen_fnmul_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5136 {
5137     gen_helper_vfp_muls(d, n, m, s);
5138     gen_vfp_negs(d, d);
5139 }
5140 
5141 static void gen_fnmul_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5142 {
5143     gen_helper_vfp_muld(d, n, m, s);
5144     gen_vfp_negd(d, d);
5145 }
5146 
5147 static const FPScalar f_scalar_fnmul = {
5148     gen_fnmul_h,
5149     gen_fnmul_s,
5150     gen_fnmul_d,
5151 };
5152 TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul)
5153 
5154 static const FPScalar f_scalar_fcmeq = {
5155     gen_helper_advsimd_ceq_f16,
5156     gen_helper_neon_ceq_f32,
5157     gen_helper_neon_ceq_f64,
5158 };
5159 TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq)
5160 
5161 static const FPScalar f_scalar_fcmge = {
5162     gen_helper_advsimd_cge_f16,
5163     gen_helper_neon_cge_f32,
5164     gen_helper_neon_cge_f64,
5165 };
5166 TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge)
5167 
5168 static const FPScalar f_scalar_fcmgt = {
5169     gen_helper_advsimd_cgt_f16,
5170     gen_helper_neon_cgt_f32,
5171     gen_helper_neon_cgt_f64,
5172 };
5173 TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt)
5174 
5175 static const FPScalar f_scalar_facge = {
5176     gen_helper_advsimd_acge_f16,
5177     gen_helper_neon_acge_f32,
5178     gen_helper_neon_acge_f64,
5179 };
5180 TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge)
5181 
5182 static const FPScalar f_scalar_facgt = {
5183     gen_helper_advsimd_acgt_f16,
5184     gen_helper_neon_acgt_f32,
5185     gen_helper_neon_acgt_f64,
5186 };
5187 TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt)
5188 
5189 static void gen_fabd_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5190 {
5191     gen_helper_vfp_subh(d, n, m, s);
5192     gen_vfp_absh(d, d);
5193 }
5194 
5195 static void gen_fabd_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5196 {
5197     gen_helper_vfp_subs(d, n, m, s);
5198     gen_vfp_abss(d, d);
5199 }
5200 
5201 static void gen_fabd_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5202 {
5203     gen_helper_vfp_subd(d, n, m, s);
5204     gen_vfp_absd(d, d);
5205 }
5206 
5207 static const FPScalar f_scalar_fabd = {
5208     gen_fabd_h,
5209     gen_fabd_s,
5210     gen_fabd_d,
5211 };
5212 TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd)
5213 
5214 static const FPScalar f_scalar_frecps = {
5215     gen_helper_recpsf_f16,
5216     gen_helper_recpsf_f32,
5217     gen_helper_recpsf_f64,
5218 };
5219 TRANS(FRECPS_s, do_fp3_scalar, a, &f_scalar_frecps)
5220 
5221 static const FPScalar f_scalar_frsqrts = {
5222     gen_helper_rsqrtsf_f16,
5223     gen_helper_rsqrtsf_f32,
5224     gen_helper_rsqrtsf_f64,
5225 };
5226 TRANS(FRSQRTS_s, do_fp3_scalar, a, &f_scalar_frsqrts)
5227 
5228 static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a,
5229                        const FPScalar *f, bool swap)
5230 {
5231     switch (a->esz) {
5232     case MO_64:
5233         if (fp_access_check(s)) {
5234             TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5235             TCGv_i64 t1 = tcg_constant_i64(0);
5236             if (swap) {
5237                 f->gen_d(t0, t1, t0, fpstatus_ptr(FPST_FPCR));
5238             } else {
5239                 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
5240             }
5241             write_fp_dreg(s, a->rd, t0);
5242         }
5243         break;
5244     case MO_32:
5245         if (fp_access_check(s)) {
5246             TCGv_i32 t0 = read_fp_sreg(s, a->rn);
5247             TCGv_i32 t1 = tcg_constant_i32(0);
5248             if (swap) {
5249                 f->gen_s(t0, t1, t0, fpstatus_ptr(FPST_FPCR));
5250             } else {
5251                 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
5252             }
5253             write_fp_sreg(s, a->rd, t0);
5254         }
5255         break;
5256     case MO_16:
5257         if (!dc_isar_feature(aa64_fp16, s)) {
5258             return false;
5259         }
5260         if (fp_access_check(s)) {
5261             TCGv_i32 t0 = read_fp_hreg(s, a->rn);
5262             TCGv_i32 t1 = tcg_constant_i32(0);
5263             if (swap) {
5264                 f->gen_h(t0, t1, t0, fpstatus_ptr(FPST_FPCR_F16));
5265             } else {
5266                 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16));
5267             }
5268             write_fp_sreg(s, a->rd, t0);
5269         }
5270         break;
5271     default:
5272         return false;
5273     }
5274     return true;
5275 }
5276 
5277 TRANS(FCMEQ0_s, do_fcmp0_s, a, &f_scalar_fcmeq, false)
5278 TRANS(FCMGT0_s, do_fcmp0_s, a, &f_scalar_fcmgt, false)
5279 TRANS(FCMGE0_s, do_fcmp0_s, a, &f_scalar_fcmge, false)
5280 TRANS(FCMLT0_s, do_fcmp0_s, a, &f_scalar_fcmgt, true)
5281 TRANS(FCMLE0_s, do_fcmp0_s, a, &f_scalar_fcmge, true)
5282 
5283 static bool do_satacc_s(DisasContext *s, arg_rrr_e *a,
5284                 MemOp sgn_n, MemOp sgn_m,
5285                 void (*gen_bhs)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64, MemOp),
5286                 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
5287 {
5288     TCGv_i64 t0, t1, t2, qc;
5289     MemOp esz = a->esz;
5290 
5291     if (!fp_access_check(s)) {
5292         return true;
5293     }
5294 
5295     t0 = tcg_temp_new_i64();
5296     t1 = tcg_temp_new_i64();
5297     t2 = tcg_temp_new_i64();
5298     qc = tcg_temp_new_i64();
5299     read_vec_element(s, t1, a->rn, 0, esz | sgn_n);
5300     read_vec_element(s, t2, a->rm, 0, esz | sgn_m);
5301     tcg_gen_ld_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc));
5302 
5303     if (esz == MO_64) {
5304         gen_d(t0, qc, t1, t2);
5305     } else {
5306         gen_bhs(t0, qc, t1, t2, esz);
5307         tcg_gen_ext_i64(t0, t0, esz);
5308     }
5309 
5310     write_fp_dreg(s, a->rd, t0);
5311     tcg_gen_st_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc));
5312     return true;
5313 }
5314 
5315 TRANS(SQADD_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqadd_bhs, gen_sqadd_d)
5316 TRANS(SQSUB_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqsub_bhs, gen_sqsub_d)
5317 TRANS(UQADD_s, do_satacc_s, a, 0, 0, gen_uqadd_bhs, gen_uqadd_d)
5318 TRANS(UQSUB_s, do_satacc_s, a, 0, 0, gen_uqsub_bhs, gen_uqsub_d)
5319 TRANS(SUQADD_s, do_satacc_s, a, MO_SIGN, 0, gen_suqadd_bhs, gen_suqadd_d)
5320 TRANS(USQADD_s, do_satacc_s, a, 0, MO_SIGN, gen_usqadd_bhs, gen_usqadd_d)
5321 
5322 static bool do_int3_scalar_d(DisasContext *s, arg_rrr_e *a,
5323                              void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64))
5324 {
5325     if (fp_access_check(s)) {
5326         TCGv_i64 t0 = tcg_temp_new_i64();
5327         TCGv_i64 t1 = tcg_temp_new_i64();
5328 
5329         read_vec_element(s, t0, a->rn, 0, MO_64);
5330         read_vec_element(s, t1, a->rm, 0, MO_64);
5331         fn(t0, t0, t1);
5332         write_fp_dreg(s, a->rd, t0);
5333     }
5334     return true;
5335 }
5336 
5337 TRANS(SSHL_s, do_int3_scalar_d, a, gen_sshl_i64)
5338 TRANS(USHL_s, do_int3_scalar_d, a, gen_ushl_i64)
5339 TRANS(SRSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_s64)
5340 TRANS(URSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_u64)
5341 TRANS(ADD_s, do_int3_scalar_d, a, tcg_gen_add_i64)
5342 TRANS(SUB_s, do_int3_scalar_d, a, tcg_gen_sub_i64)
5343 
5344 typedef struct ENVScalar2 {
5345     NeonGenTwoOpEnvFn *gen_bhs[3];
5346     NeonGenTwo64OpEnvFn *gen_d;
5347 } ENVScalar2;
5348 
5349 static bool do_env_scalar2(DisasContext *s, arg_rrr_e *a, const ENVScalar2 *f)
5350 {
5351     if (!fp_access_check(s)) {
5352         return true;
5353     }
5354     if (a->esz == MO_64) {
5355         TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5356         TCGv_i64 t1 = read_fp_dreg(s, a->rm);
5357         f->gen_d(t0, tcg_env, t0, t1);
5358         write_fp_dreg(s, a->rd, t0);
5359     } else {
5360         TCGv_i32 t0 = tcg_temp_new_i32();
5361         TCGv_i32 t1 = tcg_temp_new_i32();
5362 
5363         read_vec_element_i32(s, t0, a->rn, 0, a->esz);
5364         read_vec_element_i32(s, t1, a->rm, 0, a->esz);
5365         f->gen_bhs[a->esz](t0, tcg_env, t0, t1);
5366         write_fp_sreg(s, a->rd, t0);
5367     }
5368     return true;
5369 }
5370 
5371 static const ENVScalar2 f_scalar_sqshl = {
5372     { gen_helper_neon_qshl_s8,
5373       gen_helper_neon_qshl_s16,
5374       gen_helper_neon_qshl_s32 },
5375     gen_helper_neon_qshl_s64,
5376 };
5377 TRANS(SQSHL_s, do_env_scalar2, a, &f_scalar_sqshl)
5378 
5379 static const ENVScalar2 f_scalar_uqshl = {
5380     { gen_helper_neon_qshl_u8,
5381       gen_helper_neon_qshl_u16,
5382       gen_helper_neon_qshl_u32 },
5383     gen_helper_neon_qshl_u64,
5384 };
5385 TRANS(UQSHL_s, do_env_scalar2, a, &f_scalar_uqshl)
5386 
5387 static const ENVScalar2 f_scalar_sqrshl = {
5388     { gen_helper_neon_qrshl_s8,
5389       gen_helper_neon_qrshl_s16,
5390       gen_helper_neon_qrshl_s32 },
5391     gen_helper_neon_qrshl_s64,
5392 };
5393 TRANS(SQRSHL_s, do_env_scalar2, a, &f_scalar_sqrshl)
5394 
5395 static const ENVScalar2 f_scalar_uqrshl = {
5396     { gen_helper_neon_qrshl_u8,
5397       gen_helper_neon_qrshl_u16,
5398       gen_helper_neon_qrshl_u32 },
5399     gen_helper_neon_qrshl_u64,
5400 };
5401 TRANS(UQRSHL_s, do_env_scalar2, a, &f_scalar_uqrshl)
5402 
5403 static bool do_env_scalar2_hs(DisasContext *s, arg_rrr_e *a,
5404                               const ENVScalar2 *f)
5405 {
5406     if (a->esz == MO_16 || a->esz == MO_32) {
5407         return do_env_scalar2(s, a, f);
5408     }
5409     return false;
5410 }
5411 
5412 static const ENVScalar2 f_scalar_sqdmulh = {
5413     { NULL, gen_helper_neon_qdmulh_s16, gen_helper_neon_qdmulh_s32 }
5414 };
5415 TRANS(SQDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqdmulh)
5416 
5417 static const ENVScalar2 f_scalar_sqrdmulh = {
5418     { NULL, gen_helper_neon_qrdmulh_s16, gen_helper_neon_qrdmulh_s32 }
5419 };
5420 TRANS(SQRDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqrdmulh)
5421 
5422 typedef struct ENVScalar3 {
5423     NeonGenThreeOpEnvFn *gen_hs[2];
5424 } ENVScalar3;
5425 
5426 static bool do_env_scalar3_hs(DisasContext *s, arg_rrr_e *a,
5427                               const ENVScalar3 *f)
5428 {
5429     TCGv_i32 t0, t1, t2;
5430 
5431     if (a->esz != MO_16 && a->esz != MO_32) {
5432         return false;
5433     }
5434     if (!fp_access_check(s)) {
5435         return true;
5436     }
5437 
5438     t0 = tcg_temp_new_i32();
5439     t1 = tcg_temp_new_i32();
5440     t2 = tcg_temp_new_i32();
5441     read_vec_element_i32(s, t0, a->rn, 0, a->esz);
5442     read_vec_element_i32(s, t1, a->rm, 0, a->esz);
5443     read_vec_element_i32(s, t2, a->rd, 0, a->esz);
5444     f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2);
5445     write_fp_sreg(s, a->rd, t0);
5446     return true;
5447 }
5448 
5449 static const ENVScalar3 f_scalar_sqrdmlah = {
5450     { gen_helper_neon_qrdmlah_s16, gen_helper_neon_qrdmlah_s32 }
5451 };
5452 TRANS_FEAT(SQRDMLAH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlah)
5453 
5454 static const ENVScalar3 f_scalar_sqrdmlsh = {
5455     { gen_helper_neon_qrdmlsh_s16, gen_helper_neon_qrdmlsh_s32 }
5456 };
5457 TRANS_FEAT(SQRDMLSH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlsh)
5458 
5459 static bool do_cmop_d(DisasContext *s, arg_rrr_e *a, TCGCond cond)
5460 {
5461     if (fp_access_check(s)) {
5462         TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5463         TCGv_i64 t1 = read_fp_dreg(s, a->rm);
5464         tcg_gen_negsetcond_i64(cond, t0, t0, t1);
5465         write_fp_dreg(s, a->rd, t0);
5466     }
5467     return true;
5468 }
5469 
5470 TRANS(CMGT_s, do_cmop_d, a, TCG_COND_GT)
5471 TRANS(CMHI_s, do_cmop_d, a, TCG_COND_GTU)
5472 TRANS(CMGE_s, do_cmop_d, a, TCG_COND_GE)
5473 TRANS(CMHS_s, do_cmop_d, a, TCG_COND_GEU)
5474 TRANS(CMEQ_s, do_cmop_d, a, TCG_COND_EQ)
5475 TRANS(CMTST_s, do_cmop_d, a, TCG_COND_TSTNE)
5476 
5477 static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data,
5478                           gen_helper_gvec_3_ptr * const fns[3])
5479 {
5480     MemOp esz = a->esz;
5481     int check = fp_access_check_vector_hsd(s, a->q, esz);
5482 
5483     if (check <= 0) {
5484         return check == 0;
5485     }
5486 
5487     gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm,
5488                       esz == MO_16, data, fns[esz - 1]);
5489     return true;
5490 }
5491 
5492 static gen_helper_gvec_3_ptr * const f_vector_fadd[3] = {
5493     gen_helper_gvec_fadd_h,
5494     gen_helper_gvec_fadd_s,
5495     gen_helper_gvec_fadd_d,
5496 };
5497 TRANS(FADD_v, do_fp3_vector, a, 0, f_vector_fadd)
5498 
5499 static gen_helper_gvec_3_ptr * const f_vector_fsub[3] = {
5500     gen_helper_gvec_fsub_h,
5501     gen_helper_gvec_fsub_s,
5502     gen_helper_gvec_fsub_d,
5503 };
5504 TRANS(FSUB_v, do_fp3_vector, a, 0, f_vector_fsub)
5505 
5506 static gen_helper_gvec_3_ptr * const f_vector_fdiv[3] = {
5507     gen_helper_gvec_fdiv_h,
5508     gen_helper_gvec_fdiv_s,
5509     gen_helper_gvec_fdiv_d,
5510 };
5511 TRANS(FDIV_v, do_fp3_vector, a, 0, f_vector_fdiv)
5512 
5513 static gen_helper_gvec_3_ptr * const f_vector_fmul[3] = {
5514     gen_helper_gvec_fmul_h,
5515     gen_helper_gvec_fmul_s,
5516     gen_helper_gvec_fmul_d,
5517 };
5518 TRANS(FMUL_v, do_fp3_vector, a, 0, f_vector_fmul)
5519 
5520 static gen_helper_gvec_3_ptr * const f_vector_fmax[3] = {
5521     gen_helper_gvec_fmax_h,
5522     gen_helper_gvec_fmax_s,
5523     gen_helper_gvec_fmax_d,
5524 };
5525 TRANS(FMAX_v, do_fp3_vector, a, 0, f_vector_fmax)
5526 
5527 static gen_helper_gvec_3_ptr * const f_vector_fmin[3] = {
5528     gen_helper_gvec_fmin_h,
5529     gen_helper_gvec_fmin_s,
5530     gen_helper_gvec_fmin_d,
5531 };
5532 TRANS(FMIN_v, do_fp3_vector, a, 0, f_vector_fmin)
5533 
5534 static gen_helper_gvec_3_ptr * const f_vector_fmaxnm[3] = {
5535     gen_helper_gvec_fmaxnum_h,
5536     gen_helper_gvec_fmaxnum_s,
5537     gen_helper_gvec_fmaxnum_d,
5538 };
5539 TRANS(FMAXNM_v, do_fp3_vector, a, 0, f_vector_fmaxnm)
5540 
5541 static gen_helper_gvec_3_ptr * const f_vector_fminnm[3] = {
5542     gen_helper_gvec_fminnum_h,
5543     gen_helper_gvec_fminnum_s,
5544     gen_helper_gvec_fminnum_d,
5545 };
5546 TRANS(FMINNM_v, do_fp3_vector, a, 0, f_vector_fminnm)
5547 
5548 static gen_helper_gvec_3_ptr * const f_vector_fmulx[3] = {
5549     gen_helper_gvec_fmulx_h,
5550     gen_helper_gvec_fmulx_s,
5551     gen_helper_gvec_fmulx_d,
5552 };
5553 TRANS(FMULX_v, do_fp3_vector, a, 0, f_vector_fmulx)
5554 
5555 static gen_helper_gvec_3_ptr * const f_vector_fmla[3] = {
5556     gen_helper_gvec_vfma_h,
5557     gen_helper_gvec_vfma_s,
5558     gen_helper_gvec_vfma_d,
5559 };
5560 TRANS(FMLA_v, do_fp3_vector, a, 0, f_vector_fmla)
5561 
5562 static gen_helper_gvec_3_ptr * const f_vector_fmls[3] = {
5563     gen_helper_gvec_vfms_h,
5564     gen_helper_gvec_vfms_s,
5565     gen_helper_gvec_vfms_d,
5566 };
5567 TRANS(FMLS_v, do_fp3_vector, a, 0, f_vector_fmls)
5568 
5569 static gen_helper_gvec_3_ptr * const f_vector_fcmeq[3] = {
5570     gen_helper_gvec_fceq_h,
5571     gen_helper_gvec_fceq_s,
5572     gen_helper_gvec_fceq_d,
5573 };
5574 TRANS(FCMEQ_v, do_fp3_vector, a, 0, f_vector_fcmeq)
5575 
5576 static gen_helper_gvec_3_ptr * const f_vector_fcmge[3] = {
5577     gen_helper_gvec_fcge_h,
5578     gen_helper_gvec_fcge_s,
5579     gen_helper_gvec_fcge_d,
5580 };
5581 TRANS(FCMGE_v, do_fp3_vector, a, 0, f_vector_fcmge)
5582 
5583 static gen_helper_gvec_3_ptr * const f_vector_fcmgt[3] = {
5584     gen_helper_gvec_fcgt_h,
5585     gen_helper_gvec_fcgt_s,
5586     gen_helper_gvec_fcgt_d,
5587 };
5588 TRANS(FCMGT_v, do_fp3_vector, a, 0, f_vector_fcmgt)
5589 
5590 static gen_helper_gvec_3_ptr * const f_vector_facge[3] = {
5591     gen_helper_gvec_facge_h,
5592     gen_helper_gvec_facge_s,
5593     gen_helper_gvec_facge_d,
5594 };
5595 TRANS(FACGE_v, do_fp3_vector, a, 0, f_vector_facge)
5596 
5597 static gen_helper_gvec_3_ptr * const f_vector_facgt[3] = {
5598     gen_helper_gvec_facgt_h,
5599     gen_helper_gvec_facgt_s,
5600     gen_helper_gvec_facgt_d,
5601 };
5602 TRANS(FACGT_v, do_fp3_vector, a, 0, f_vector_facgt)
5603 
5604 static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = {
5605     gen_helper_gvec_fabd_h,
5606     gen_helper_gvec_fabd_s,
5607     gen_helper_gvec_fabd_d,
5608 };
5609 TRANS(FABD_v, do_fp3_vector, a, 0, f_vector_fabd)
5610 
5611 static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = {
5612     gen_helper_gvec_recps_h,
5613     gen_helper_gvec_recps_s,
5614     gen_helper_gvec_recps_d,
5615 };
5616 TRANS(FRECPS_v, do_fp3_vector, a, 0, f_vector_frecps)
5617 
5618 static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = {
5619     gen_helper_gvec_rsqrts_h,
5620     gen_helper_gvec_rsqrts_s,
5621     gen_helper_gvec_rsqrts_d,
5622 };
5623 TRANS(FRSQRTS_v, do_fp3_vector, a, 0, f_vector_frsqrts)
5624 
5625 static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = {
5626     gen_helper_gvec_faddp_h,
5627     gen_helper_gvec_faddp_s,
5628     gen_helper_gvec_faddp_d,
5629 };
5630 TRANS(FADDP_v, do_fp3_vector, a, 0, f_vector_faddp)
5631 
5632 static gen_helper_gvec_3_ptr * const f_vector_fmaxp[3] = {
5633     gen_helper_gvec_fmaxp_h,
5634     gen_helper_gvec_fmaxp_s,
5635     gen_helper_gvec_fmaxp_d,
5636 };
5637 TRANS(FMAXP_v, do_fp3_vector, a, 0, f_vector_fmaxp)
5638 
5639 static gen_helper_gvec_3_ptr * const f_vector_fminp[3] = {
5640     gen_helper_gvec_fminp_h,
5641     gen_helper_gvec_fminp_s,
5642     gen_helper_gvec_fminp_d,
5643 };
5644 TRANS(FMINP_v, do_fp3_vector, a, 0, f_vector_fminp)
5645 
5646 static gen_helper_gvec_3_ptr * const f_vector_fmaxnmp[3] = {
5647     gen_helper_gvec_fmaxnump_h,
5648     gen_helper_gvec_fmaxnump_s,
5649     gen_helper_gvec_fmaxnump_d,
5650 };
5651 TRANS(FMAXNMP_v, do_fp3_vector, a, 0, f_vector_fmaxnmp)
5652 
5653 static gen_helper_gvec_3_ptr * const f_vector_fminnmp[3] = {
5654     gen_helper_gvec_fminnump_h,
5655     gen_helper_gvec_fminnump_s,
5656     gen_helper_gvec_fminnump_d,
5657 };
5658 TRANS(FMINNMP_v, do_fp3_vector, a, 0, f_vector_fminnmp)
5659 
5660 static bool do_fmlal(DisasContext *s, arg_qrrr_e *a, bool is_s, bool is_2)
5661 {
5662     if (fp_access_check(s)) {
5663         int data = (is_2 << 1) | is_s;
5664         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
5665                            vec_full_reg_offset(s, a->rn),
5666                            vec_full_reg_offset(s, a->rm), tcg_env,
5667                            a->q ? 16 : 8, vec_full_reg_size(s),
5668                            data, gen_helper_gvec_fmlal_a64);
5669     }
5670     return true;
5671 }
5672 
5673 TRANS_FEAT(FMLAL_v, aa64_fhm, do_fmlal, a, false, false)
5674 TRANS_FEAT(FMLSL_v, aa64_fhm, do_fmlal, a, true, false)
5675 TRANS_FEAT(FMLAL2_v, aa64_fhm, do_fmlal, a, false, true)
5676 TRANS_FEAT(FMLSL2_v, aa64_fhm, do_fmlal, a, true, true)
5677 
5678 TRANS(ADDP_v, do_gvec_fn3, a, gen_gvec_addp)
5679 TRANS(SMAXP_v, do_gvec_fn3_no64, a, gen_gvec_smaxp)
5680 TRANS(SMINP_v, do_gvec_fn3_no64, a, gen_gvec_sminp)
5681 TRANS(UMAXP_v, do_gvec_fn3_no64, a, gen_gvec_umaxp)
5682 TRANS(UMINP_v, do_gvec_fn3_no64, a, gen_gvec_uminp)
5683 
5684 TRANS(AND_v, do_gvec_fn3, a, tcg_gen_gvec_and)
5685 TRANS(BIC_v, do_gvec_fn3, a, tcg_gen_gvec_andc)
5686 TRANS(ORR_v, do_gvec_fn3, a, tcg_gen_gvec_or)
5687 TRANS(ORN_v, do_gvec_fn3, a, tcg_gen_gvec_orc)
5688 TRANS(EOR_v, do_gvec_fn3, a, tcg_gen_gvec_xor)
5689 
5690 static bool do_bitsel(DisasContext *s, bool is_q, int d, int a, int b, int c)
5691 {
5692     if (fp_access_check(s)) {
5693         gen_gvec_fn4(s, is_q, d, a, b, c, tcg_gen_gvec_bitsel, 0);
5694     }
5695     return true;
5696 }
5697 
5698 TRANS(BSL_v, do_bitsel, a->q, a->rd, a->rd, a->rn, a->rm)
5699 TRANS(BIT_v, do_bitsel, a->q, a->rd, a->rm, a->rn, a->rd)
5700 TRANS(BIF_v, do_bitsel, a->q, a->rd, a->rm, a->rd, a->rn)
5701 
5702 TRANS(SQADD_v, do_gvec_fn3, a, gen_gvec_sqadd_qc)
5703 TRANS(UQADD_v, do_gvec_fn3, a, gen_gvec_uqadd_qc)
5704 TRANS(SQSUB_v, do_gvec_fn3, a, gen_gvec_sqsub_qc)
5705 TRANS(UQSUB_v, do_gvec_fn3, a, gen_gvec_uqsub_qc)
5706 TRANS(SUQADD_v, do_gvec_fn3, a, gen_gvec_suqadd_qc)
5707 TRANS(USQADD_v, do_gvec_fn3, a, gen_gvec_usqadd_qc)
5708 
5709 TRANS(SSHL_v, do_gvec_fn3, a, gen_gvec_sshl)
5710 TRANS(USHL_v, do_gvec_fn3, a, gen_gvec_ushl)
5711 TRANS(SRSHL_v, do_gvec_fn3, a, gen_gvec_srshl)
5712 TRANS(URSHL_v, do_gvec_fn3, a, gen_gvec_urshl)
5713 TRANS(SQSHL_v, do_gvec_fn3, a, gen_neon_sqshl)
5714 TRANS(UQSHL_v, do_gvec_fn3, a, gen_neon_uqshl)
5715 TRANS(SQRSHL_v, do_gvec_fn3, a, gen_neon_sqrshl)
5716 TRANS(UQRSHL_v, do_gvec_fn3, a, gen_neon_uqrshl)
5717 
5718 TRANS(ADD_v, do_gvec_fn3, a, tcg_gen_gvec_add)
5719 TRANS(SUB_v, do_gvec_fn3, a, tcg_gen_gvec_sub)
5720 TRANS(SHADD_v, do_gvec_fn3_no64, a, gen_gvec_shadd)
5721 TRANS(UHADD_v, do_gvec_fn3_no64, a, gen_gvec_uhadd)
5722 TRANS(SHSUB_v, do_gvec_fn3_no64, a, gen_gvec_shsub)
5723 TRANS(UHSUB_v, do_gvec_fn3_no64, a, gen_gvec_uhsub)
5724 TRANS(SRHADD_v, do_gvec_fn3_no64, a, gen_gvec_srhadd)
5725 TRANS(URHADD_v, do_gvec_fn3_no64, a, gen_gvec_urhadd)
5726 TRANS(SMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smax)
5727 TRANS(UMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umax)
5728 TRANS(SMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smin)
5729 TRANS(UMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umin)
5730 TRANS(SABA_v, do_gvec_fn3_no64, a, gen_gvec_saba)
5731 TRANS(UABA_v, do_gvec_fn3_no64, a, gen_gvec_uaba)
5732 TRANS(SABD_v, do_gvec_fn3_no64, a, gen_gvec_sabd)
5733 TRANS(UABD_v, do_gvec_fn3_no64, a, gen_gvec_uabd)
5734 TRANS(MUL_v, do_gvec_fn3_no64, a, tcg_gen_gvec_mul)
5735 TRANS(PMUL_v, do_gvec_op3_ool, a, 0, gen_helper_gvec_pmul_b)
5736 TRANS(MLA_v, do_gvec_fn3_no64, a, gen_gvec_mla)
5737 TRANS(MLS_v, do_gvec_fn3_no64, a, gen_gvec_mls)
5738 
5739 static bool do_cmop_v(DisasContext *s, arg_qrrr_e *a, TCGCond cond)
5740 {
5741     if (a->esz == MO_64 && !a->q) {
5742         return false;
5743     }
5744     if (fp_access_check(s)) {
5745         tcg_gen_gvec_cmp(cond, a->esz,
5746                          vec_full_reg_offset(s, a->rd),
5747                          vec_full_reg_offset(s, a->rn),
5748                          vec_full_reg_offset(s, a->rm),
5749                          a->q ? 16 : 8, vec_full_reg_size(s));
5750     }
5751     return true;
5752 }
5753 
5754 TRANS(CMGT_v, do_cmop_v, a, TCG_COND_GT)
5755 TRANS(CMHI_v, do_cmop_v, a, TCG_COND_GTU)
5756 TRANS(CMGE_v, do_cmop_v, a, TCG_COND_GE)
5757 TRANS(CMHS_v, do_cmop_v, a, TCG_COND_GEU)
5758 TRANS(CMEQ_v, do_cmop_v, a, TCG_COND_EQ)
5759 TRANS(CMTST_v, do_gvec_fn3, a, gen_gvec_cmtst)
5760 
5761 TRANS(SQDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqdmulh_qc)
5762 TRANS(SQRDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmulh_qc)
5763 TRANS_FEAT(SQRDMLAH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlah_qc)
5764 TRANS_FEAT(SQRDMLSH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlsh_qc)
5765 
5766 static bool do_dot_vector(DisasContext *s, arg_qrrr_e *a,
5767                           gen_helper_gvec_4 *fn)
5768 {
5769     if (fp_access_check(s)) {
5770         gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn);
5771     }
5772     return true;
5773 }
5774 
5775 static bool do_dot_vector_env(DisasContext *s, arg_qrrr_e *a,
5776                               gen_helper_gvec_4_ptr *fn)
5777 {
5778     if (fp_access_check(s)) {
5779         gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn);
5780     }
5781     return true;
5782 }
5783 
5784 TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_sdot_b)
5785 TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_b)
5786 TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_b)
5787 TRANS_FEAT(BFDOT_v, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfdot)
5788 TRANS_FEAT(BFMMLA, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfmmla)
5789 TRANS_FEAT(SMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_smmla_b)
5790 TRANS_FEAT(UMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_ummla_b)
5791 TRANS_FEAT(USMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usmmla_b)
5792 
5793 static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a)
5794 {
5795     if (!dc_isar_feature(aa64_bf16, s)) {
5796         return false;
5797     }
5798     if (fp_access_check(s)) {
5799         /* Q bit selects BFMLALB vs BFMLALT. */
5800         gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, false, a->q,
5801                           gen_helper_gvec_bfmlal);
5802     }
5803     return true;
5804 }
5805 
5806 static gen_helper_gvec_3_ptr * const f_vector_fcadd[3] = {
5807     gen_helper_gvec_fcaddh,
5808     gen_helper_gvec_fcadds,
5809     gen_helper_gvec_fcaddd,
5810 };
5811 TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0, f_vector_fcadd)
5812 TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1, f_vector_fcadd)
5813 
5814 static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a)
5815 {
5816     static gen_helper_gvec_4_ptr * const fn[] = {
5817         [MO_16] = gen_helper_gvec_fcmlah,
5818         [MO_32] = gen_helper_gvec_fcmlas,
5819         [MO_64] = gen_helper_gvec_fcmlad,
5820     };
5821     int check;
5822 
5823     if (!dc_isar_feature(aa64_fcma, s)) {
5824         return false;
5825     }
5826 
5827     check = fp_access_check_vector_hsd(s, a->q, a->esz);
5828     if (check <= 0) {
5829         return check == 0;
5830     }
5831 
5832     gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
5833                       a->esz == MO_16, a->rot, fn[a->esz]);
5834     return true;
5835 }
5836 
5837 /*
5838  * Widening vector x vector/indexed.
5839  *
5840  * These read from the top or bottom half of a 128-bit vector.
5841  * After widening, optionally accumulate with a 128-bit vector.
5842  * Implement these inline, as the number of elements are limited
5843  * and the related SVE and SME operations on larger vectors use
5844  * even/odd elements instead of top/bottom half.
5845  *
5846  * If idx >= 0, operand 2 is indexed, otherwise vector.
5847  * If acc, operand 0 is loaded with rd.
5848  */
5849 
5850 /* For low half, iterating up. */
5851 static bool do_3op_widening(DisasContext *s, MemOp memop, int top,
5852                             int rd, int rn, int rm, int idx,
5853                             NeonGenTwo64OpFn *fn, bool acc)
5854 {
5855     TCGv_i64 tcg_op0 = tcg_temp_new_i64();
5856     TCGv_i64 tcg_op1 = tcg_temp_new_i64();
5857     TCGv_i64 tcg_op2 = tcg_temp_new_i64();
5858     MemOp esz = memop & MO_SIZE;
5859     int half = 8 >> esz;
5860     int top_swap, top_half;
5861 
5862     /* There are no 64x64->128 bit operations. */
5863     if (esz >= MO_64) {
5864         return false;
5865     }
5866     if (!fp_access_check(s)) {
5867         return true;
5868     }
5869 
5870     if (idx >= 0) {
5871         read_vec_element(s, tcg_op2, rm, idx, memop);
5872     }
5873 
5874     /*
5875      * For top half inputs, iterate forward; backward for bottom half.
5876      * This means the store to the destination will not occur until
5877      * overlapping input inputs are consumed.
5878      * Use top_swap to conditionally invert the forward iteration index.
5879      */
5880     top_swap = top ? 0 : half - 1;
5881     top_half = top ? half : 0;
5882 
5883     for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
5884         int elt = elt_fwd ^ top_swap;
5885 
5886         read_vec_element(s, tcg_op1, rn, elt + top_half, memop);
5887         if (idx < 0) {
5888             read_vec_element(s, tcg_op2, rm, elt + top_half, memop);
5889         }
5890         if (acc) {
5891             read_vec_element(s, tcg_op0, rd, elt, memop + 1);
5892         }
5893         fn(tcg_op0, tcg_op1, tcg_op2);
5894         write_vec_element(s, tcg_op0, rd, elt, esz + 1);
5895     }
5896     clear_vec_high(s, 1, rd);
5897     return true;
5898 }
5899 
5900 static void gen_muladd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5901 {
5902     TCGv_i64 t = tcg_temp_new_i64();
5903     tcg_gen_mul_i64(t, n, m);
5904     tcg_gen_add_i64(d, d, t);
5905 }
5906 
5907 static void gen_mulsub_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5908 {
5909     TCGv_i64 t = tcg_temp_new_i64();
5910     tcg_gen_mul_i64(t, n, m);
5911     tcg_gen_sub_i64(d, d, t);
5912 }
5913 
5914 TRANS(SMULL_v, do_3op_widening,
5915       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5916       tcg_gen_mul_i64, false)
5917 TRANS(UMULL_v, do_3op_widening,
5918       a->esz, a->q, a->rd, a->rn, a->rm, -1,
5919       tcg_gen_mul_i64, false)
5920 TRANS(SMLAL_v, do_3op_widening,
5921       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5922       gen_muladd_i64, true)
5923 TRANS(UMLAL_v, do_3op_widening,
5924       a->esz, a->q, a->rd, a->rn, a->rm, -1,
5925       gen_muladd_i64, true)
5926 TRANS(SMLSL_v, do_3op_widening,
5927       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5928       gen_mulsub_i64, true)
5929 TRANS(UMLSL_v, do_3op_widening,
5930       a->esz, a->q, a->rd, a->rn, a->rm, -1,
5931       gen_mulsub_i64, true)
5932 
5933 TRANS(SMULL_vi, do_3op_widening,
5934       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
5935       tcg_gen_mul_i64, false)
5936 TRANS(UMULL_vi, do_3op_widening,
5937       a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
5938       tcg_gen_mul_i64, false)
5939 TRANS(SMLAL_vi, do_3op_widening,
5940       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
5941       gen_muladd_i64, true)
5942 TRANS(UMLAL_vi, do_3op_widening,
5943       a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
5944       gen_muladd_i64, true)
5945 TRANS(SMLSL_vi, do_3op_widening,
5946       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
5947       gen_mulsub_i64, true)
5948 TRANS(UMLSL_vi, do_3op_widening,
5949       a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
5950       gen_mulsub_i64, true)
5951 
5952 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5953 {
5954     TCGv_i64 t1 = tcg_temp_new_i64();
5955     TCGv_i64 t2 = tcg_temp_new_i64();
5956 
5957     tcg_gen_sub_i64(t1, n, m);
5958     tcg_gen_sub_i64(t2, m, n);
5959     tcg_gen_movcond_i64(TCG_COND_GE, d, n, m, t1, t2);
5960 }
5961 
5962 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5963 {
5964     TCGv_i64 t1 = tcg_temp_new_i64();
5965     TCGv_i64 t2 = tcg_temp_new_i64();
5966 
5967     tcg_gen_sub_i64(t1, n, m);
5968     tcg_gen_sub_i64(t2, m, n);
5969     tcg_gen_movcond_i64(TCG_COND_GEU, d, n, m, t1, t2);
5970 }
5971 
5972 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5973 {
5974     TCGv_i64 t = tcg_temp_new_i64();
5975     gen_sabd_i64(t, n, m);
5976     tcg_gen_add_i64(d, d, t);
5977 }
5978 
5979 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5980 {
5981     TCGv_i64 t = tcg_temp_new_i64();
5982     gen_uabd_i64(t, n, m);
5983     tcg_gen_add_i64(d, d, t);
5984 }
5985 
5986 TRANS(SADDL_v, do_3op_widening,
5987       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5988       tcg_gen_add_i64, false)
5989 TRANS(UADDL_v, do_3op_widening,
5990       a->esz, a->q, a->rd, a->rn, a->rm, -1,
5991       tcg_gen_add_i64, false)
5992 TRANS(SSUBL_v, do_3op_widening,
5993       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5994       tcg_gen_sub_i64, false)
5995 TRANS(USUBL_v, do_3op_widening,
5996       a->esz, a->q, a->rd, a->rn, a->rm, -1,
5997       tcg_gen_sub_i64, false)
5998 TRANS(SABDL_v, do_3op_widening,
5999       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6000       gen_sabd_i64, false)
6001 TRANS(UABDL_v, do_3op_widening,
6002       a->esz, a->q, a->rd, a->rn, a->rm, -1,
6003       gen_uabd_i64, false)
6004 TRANS(SABAL_v, do_3op_widening,
6005       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6006       gen_saba_i64, true)
6007 TRANS(UABAL_v, do_3op_widening,
6008       a->esz, a->q, a->rd, a->rn, a->rm, -1,
6009       gen_uaba_i64, true)
6010 
6011 static void gen_sqdmull_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6012 {
6013     tcg_gen_mul_i64(d, n, m);
6014     gen_helper_neon_addl_saturate_s32(d, tcg_env, d, d);
6015 }
6016 
6017 static void gen_sqdmull_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6018 {
6019     tcg_gen_mul_i64(d, n, m);
6020     gen_helper_neon_addl_saturate_s64(d, tcg_env, d, d);
6021 }
6022 
6023 static void gen_sqdmlal_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6024 {
6025     TCGv_i64 t = tcg_temp_new_i64();
6026 
6027     tcg_gen_mul_i64(t, n, m);
6028     gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t);
6029     gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t);
6030 }
6031 
6032 static void gen_sqdmlal_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6033 {
6034     TCGv_i64 t = tcg_temp_new_i64();
6035 
6036     tcg_gen_mul_i64(t, n, m);
6037     gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t);
6038     gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t);
6039 }
6040 
6041 static void gen_sqdmlsl_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6042 {
6043     TCGv_i64 t = tcg_temp_new_i64();
6044 
6045     tcg_gen_mul_i64(t, n, m);
6046     gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t);
6047     tcg_gen_neg_i64(t, t);
6048     gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t);
6049 }
6050 
6051 static void gen_sqdmlsl_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6052 {
6053     TCGv_i64 t = tcg_temp_new_i64();
6054 
6055     tcg_gen_mul_i64(t, n, m);
6056     gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t);
6057     tcg_gen_neg_i64(t, t);
6058     gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t);
6059 }
6060 
6061 TRANS(SQDMULL_v, do_3op_widening,
6062       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6063       a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
6064 TRANS(SQDMLAL_v, do_3op_widening,
6065       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6066       a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
6067 TRANS(SQDMLSL_v, do_3op_widening,
6068       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6069       a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
6070 
6071 TRANS(SQDMULL_vi, do_3op_widening,
6072       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6073       a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
6074 TRANS(SQDMLAL_vi, do_3op_widening,
6075       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6076       a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
6077 TRANS(SQDMLSL_vi, do_3op_widening,
6078       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6079       a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
6080 
6081 static bool do_addsub_wide(DisasContext *s, arg_qrrr_e *a,
6082                            MemOp sign, bool sub)
6083 {
6084     TCGv_i64 tcg_op0, tcg_op1;
6085     MemOp esz = a->esz;
6086     int half = 8 >> esz;
6087     bool top = a->q;
6088     int top_swap = top ? 0 : half - 1;
6089     int top_half = top ? half : 0;
6090 
6091     /* There are no 64x64->128 bit operations. */
6092     if (esz >= MO_64) {
6093         return false;
6094     }
6095     if (!fp_access_check(s)) {
6096         return true;
6097     }
6098     tcg_op0 = tcg_temp_new_i64();
6099     tcg_op1 = tcg_temp_new_i64();
6100 
6101     for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
6102         int elt = elt_fwd ^ top_swap;
6103 
6104         read_vec_element(s, tcg_op1, a->rm, elt + top_half, esz | sign);
6105         read_vec_element(s, tcg_op0, a->rn, elt, esz + 1);
6106         if (sub) {
6107             tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1);
6108         } else {
6109             tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1);
6110         }
6111         write_vec_element(s, tcg_op0, a->rd, elt, esz + 1);
6112     }
6113     clear_vec_high(s, 1, a->rd);
6114     return true;
6115 }
6116 
6117 TRANS(SADDW, do_addsub_wide, a, MO_SIGN, false)
6118 TRANS(UADDW, do_addsub_wide, a, 0, false)
6119 TRANS(SSUBW, do_addsub_wide, a, MO_SIGN, true)
6120 TRANS(USUBW, do_addsub_wide, a, 0, true)
6121 
6122 static bool do_addsub_highnarrow(DisasContext *s, arg_qrrr_e *a,
6123                                  bool sub, bool round)
6124 {
6125     TCGv_i64 tcg_op0, tcg_op1;
6126     MemOp esz = a->esz;
6127     int half = 8 >> esz;
6128     bool top = a->q;
6129     int ebits = 8 << esz;
6130     uint64_t rbit = 1ull << (ebits - 1);
6131     int top_swap, top_half;
6132 
6133     /* There are no 128x128->64 bit operations. */
6134     if (esz >= MO_64) {
6135         return false;
6136     }
6137     if (!fp_access_check(s)) {
6138         return true;
6139     }
6140     tcg_op0 = tcg_temp_new_i64();
6141     tcg_op1 = tcg_temp_new_i64();
6142 
6143     /*
6144      * For top half inputs, iterate backward; forward for bottom half.
6145      * This means the store to the destination will not occur until
6146      * overlapping input inputs are consumed.
6147      */
6148     top_swap = top ? half - 1 : 0;
6149     top_half = top ? half : 0;
6150 
6151     for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
6152         int elt = elt_fwd ^ top_swap;
6153 
6154         read_vec_element(s, tcg_op1, a->rm, elt, esz + 1);
6155         read_vec_element(s, tcg_op0, a->rn, elt, esz + 1);
6156         if (sub) {
6157             tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1);
6158         } else {
6159             tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1);
6160         }
6161         if (round) {
6162             tcg_gen_addi_i64(tcg_op0, tcg_op0, rbit);
6163         }
6164         tcg_gen_shri_i64(tcg_op0, tcg_op0, ebits);
6165         write_vec_element(s, tcg_op0, a->rd, elt + top_half, esz);
6166     }
6167     clear_vec_high(s, top, a->rd);
6168     return true;
6169 }
6170 
6171 TRANS(ADDHN, do_addsub_highnarrow, a, false, false)
6172 TRANS(SUBHN, do_addsub_highnarrow, a, true, false)
6173 TRANS(RADDHN, do_addsub_highnarrow, a, false, true)
6174 TRANS(RSUBHN, do_addsub_highnarrow, a, true, true)
6175 
6176 static bool do_pmull(DisasContext *s, arg_qrrr_e *a, gen_helper_gvec_3 *fn)
6177 {
6178     if (fp_access_check(s)) {
6179         /* The Q field specifies lo/hi half input for these insns.  */
6180         gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->q, fn);
6181     }
6182     return true;
6183 }
6184 
6185 TRANS(PMULL_p8, do_pmull, a, gen_helper_neon_pmull_h)
6186 TRANS_FEAT(PMULL_p64, aa64_pmull, do_pmull, a, gen_helper_gvec_pmull_q)
6187 
6188 /*
6189  * Advanced SIMD scalar/vector x indexed element
6190  */
6191 
6192 static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f)
6193 {
6194     switch (a->esz) {
6195     case MO_64:
6196         if (fp_access_check(s)) {
6197             TCGv_i64 t0 = read_fp_dreg(s, a->rn);
6198             TCGv_i64 t1 = tcg_temp_new_i64();
6199 
6200             read_vec_element(s, t1, a->rm, a->idx, MO_64);
6201             f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
6202             write_fp_dreg(s, a->rd, t0);
6203         }
6204         break;
6205     case MO_32:
6206         if (fp_access_check(s)) {
6207             TCGv_i32 t0 = read_fp_sreg(s, a->rn);
6208             TCGv_i32 t1 = tcg_temp_new_i32();
6209 
6210             read_vec_element_i32(s, t1, a->rm, a->idx, MO_32);
6211             f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
6212             write_fp_sreg(s, a->rd, t0);
6213         }
6214         break;
6215     case MO_16:
6216         if (!dc_isar_feature(aa64_fp16, s)) {
6217             return false;
6218         }
6219         if (fp_access_check(s)) {
6220             TCGv_i32 t0 = read_fp_hreg(s, a->rn);
6221             TCGv_i32 t1 = tcg_temp_new_i32();
6222 
6223             read_vec_element_i32(s, t1, a->rm, a->idx, MO_16);
6224             f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16));
6225             write_fp_sreg(s, a->rd, t0);
6226         }
6227         break;
6228     default:
6229         g_assert_not_reached();
6230     }
6231     return true;
6232 }
6233 
6234 TRANS(FMUL_si, do_fp3_scalar_idx, a, &f_scalar_fmul)
6235 TRANS(FMULX_si, do_fp3_scalar_idx, a, &f_scalar_fmulx)
6236 
6237 static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg)
6238 {
6239     switch (a->esz) {
6240     case MO_64:
6241         if (fp_access_check(s)) {
6242             TCGv_i64 t0 = read_fp_dreg(s, a->rd);
6243             TCGv_i64 t1 = read_fp_dreg(s, a->rn);
6244             TCGv_i64 t2 = tcg_temp_new_i64();
6245 
6246             read_vec_element(s, t2, a->rm, a->idx, MO_64);
6247             if (neg) {
6248                 gen_vfp_negd(t1, t1);
6249             }
6250             gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_FPCR));
6251             write_fp_dreg(s, a->rd, t0);
6252         }
6253         break;
6254     case MO_32:
6255         if (fp_access_check(s)) {
6256             TCGv_i32 t0 = read_fp_sreg(s, a->rd);
6257             TCGv_i32 t1 = read_fp_sreg(s, a->rn);
6258             TCGv_i32 t2 = tcg_temp_new_i32();
6259 
6260             read_vec_element_i32(s, t2, a->rm, a->idx, MO_32);
6261             if (neg) {
6262                 gen_vfp_negs(t1, t1);
6263             }
6264             gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_FPCR));
6265             write_fp_sreg(s, a->rd, t0);
6266         }
6267         break;
6268     case MO_16:
6269         if (!dc_isar_feature(aa64_fp16, s)) {
6270             return false;
6271         }
6272         if (fp_access_check(s)) {
6273             TCGv_i32 t0 = read_fp_hreg(s, a->rd);
6274             TCGv_i32 t1 = read_fp_hreg(s, a->rn);
6275             TCGv_i32 t2 = tcg_temp_new_i32();
6276 
6277             read_vec_element_i32(s, t2, a->rm, a->idx, MO_16);
6278             if (neg) {
6279                 gen_vfp_negh(t1, t1);
6280             }
6281             gen_helper_advsimd_muladdh(t0, t1, t2, t0,
6282                                        fpstatus_ptr(FPST_FPCR_F16));
6283             write_fp_sreg(s, a->rd, t0);
6284         }
6285         break;
6286     default:
6287         g_assert_not_reached();
6288     }
6289     return true;
6290 }
6291 
6292 TRANS(FMLA_si, do_fmla_scalar_idx, a, false)
6293 TRANS(FMLS_si, do_fmla_scalar_idx, a, true)
6294 
6295 static bool do_env_scalar2_idx_hs(DisasContext *s, arg_rrx_e *a,
6296                                   const ENVScalar2 *f)
6297 {
6298     if (a->esz < MO_16 || a->esz > MO_32) {
6299         return false;
6300     }
6301     if (fp_access_check(s)) {
6302         TCGv_i32 t0 = tcg_temp_new_i32();
6303         TCGv_i32 t1 = tcg_temp_new_i32();
6304 
6305         read_vec_element_i32(s, t0, a->rn, 0, a->esz);
6306         read_vec_element_i32(s, t1, a->rm, a->idx, a->esz);
6307         f->gen_bhs[a->esz](t0, tcg_env, t0, t1);
6308         write_fp_sreg(s, a->rd, t0);
6309     }
6310     return true;
6311 }
6312 
6313 TRANS(SQDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqdmulh)
6314 TRANS(SQRDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqrdmulh)
6315 
6316 static bool do_env_scalar3_idx_hs(DisasContext *s, arg_rrx_e *a,
6317                                   const ENVScalar3 *f)
6318 {
6319     if (a->esz < MO_16 || a->esz > MO_32) {
6320         return false;
6321     }
6322     if (fp_access_check(s)) {
6323         TCGv_i32 t0 = tcg_temp_new_i32();
6324         TCGv_i32 t1 = tcg_temp_new_i32();
6325         TCGv_i32 t2 = tcg_temp_new_i32();
6326 
6327         read_vec_element_i32(s, t0, a->rn, 0, a->esz);
6328         read_vec_element_i32(s, t1, a->rm, a->idx, a->esz);
6329         read_vec_element_i32(s, t2, a->rd, 0, a->esz);
6330         f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2);
6331         write_fp_sreg(s, a->rd, t0);
6332     }
6333     return true;
6334 }
6335 
6336 TRANS_FEAT(SQRDMLAH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlah)
6337 TRANS_FEAT(SQRDMLSH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlsh)
6338 
6339 static bool do_scalar_muladd_widening_idx(DisasContext *s, arg_rrx_e *a,
6340                                           NeonGenTwo64OpFn *fn, bool acc)
6341 {
6342     if (fp_access_check(s)) {
6343         TCGv_i64 t0 = tcg_temp_new_i64();
6344         TCGv_i64 t1 = tcg_temp_new_i64();
6345         TCGv_i64 t2 = tcg_temp_new_i64();
6346 
6347         if (acc) {
6348             read_vec_element(s, t0, a->rd, 0, a->esz + 1);
6349         }
6350         read_vec_element(s, t1, a->rn, 0, a->esz | MO_SIGN);
6351         read_vec_element(s, t2, a->rm, a->idx, a->esz | MO_SIGN);
6352         fn(t0, t1, t2);
6353 
6354         /* Clear the whole register first, then store scalar. */
6355         clear_vec(s, a->rd);
6356         write_vec_element(s, t0, a->rd, 0, a->esz + 1);
6357     }
6358     return true;
6359 }
6360 
6361 TRANS(SQDMULL_si, do_scalar_muladd_widening_idx, a,
6362       a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
6363 TRANS(SQDMLAL_si, do_scalar_muladd_widening_idx, a,
6364       a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
6365 TRANS(SQDMLSL_si, do_scalar_muladd_widening_idx, a,
6366       a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
6367 
6368 static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a,
6369                               gen_helper_gvec_3_ptr * const fns[3])
6370 {
6371     MemOp esz = a->esz;
6372     int check = fp_access_check_vector_hsd(s, a->q, esz);
6373 
6374     if (check <= 0) {
6375         return check == 0;
6376     }
6377 
6378     gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm,
6379                       esz == MO_16, a->idx, fns[esz - 1]);
6380     return true;
6381 }
6382 
6383 static gen_helper_gvec_3_ptr * const f_vector_idx_fmul[3] = {
6384     gen_helper_gvec_fmul_idx_h,
6385     gen_helper_gvec_fmul_idx_s,
6386     gen_helper_gvec_fmul_idx_d,
6387 };
6388 TRANS(FMUL_vi, do_fp3_vector_idx, a, f_vector_idx_fmul)
6389 
6390 static gen_helper_gvec_3_ptr * const f_vector_idx_fmulx[3] = {
6391     gen_helper_gvec_fmulx_idx_h,
6392     gen_helper_gvec_fmulx_idx_s,
6393     gen_helper_gvec_fmulx_idx_d,
6394 };
6395 TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx)
6396 
6397 static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg)
6398 {
6399     static gen_helper_gvec_4_ptr * const fns[3] = {
6400         gen_helper_gvec_fmla_idx_h,
6401         gen_helper_gvec_fmla_idx_s,
6402         gen_helper_gvec_fmla_idx_d,
6403     };
6404     MemOp esz = a->esz;
6405     int check = fp_access_check_vector_hsd(s, a->q, esz);
6406 
6407     if (check <= 0) {
6408         return check == 0;
6409     }
6410 
6411     gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
6412                       esz == MO_16, (a->idx << 1) | neg,
6413                       fns[esz - 1]);
6414     return true;
6415 }
6416 
6417 TRANS(FMLA_vi, do_fmla_vector_idx, a, false)
6418 TRANS(FMLS_vi, do_fmla_vector_idx, a, true)
6419 
6420 static bool do_fmlal_idx(DisasContext *s, arg_qrrx_e *a, bool is_s, bool is_2)
6421 {
6422     if (fp_access_check(s)) {
6423         int data = (a->idx << 2) | (is_2 << 1) | is_s;
6424         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
6425                            vec_full_reg_offset(s, a->rn),
6426                            vec_full_reg_offset(s, a->rm), tcg_env,
6427                            a->q ? 16 : 8, vec_full_reg_size(s),
6428                            data, gen_helper_gvec_fmlal_idx_a64);
6429     }
6430     return true;
6431 }
6432 
6433 TRANS_FEAT(FMLAL_vi, aa64_fhm, do_fmlal_idx, a, false, false)
6434 TRANS_FEAT(FMLSL_vi, aa64_fhm, do_fmlal_idx, a, true, false)
6435 TRANS_FEAT(FMLAL2_vi, aa64_fhm, do_fmlal_idx, a, false, true)
6436 TRANS_FEAT(FMLSL2_vi, aa64_fhm, do_fmlal_idx, a, true, true)
6437 
6438 static bool do_int3_vector_idx(DisasContext *s, arg_qrrx_e *a,
6439                                gen_helper_gvec_3 * const fns[2])
6440 {
6441     assert(a->esz == MO_16 || a->esz == MO_32);
6442     if (fp_access_check(s)) {
6443         gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, a->idx, fns[a->esz - 1]);
6444     }
6445     return true;
6446 }
6447 
6448 static gen_helper_gvec_3 * const f_vector_idx_mul[2] = {
6449     gen_helper_gvec_mul_idx_h,
6450     gen_helper_gvec_mul_idx_s,
6451 };
6452 TRANS(MUL_vi, do_int3_vector_idx, a, f_vector_idx_mul)
6453 
6454 static bool do_mla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool sub)
6455 {
6456     static gen_helper_gvec_4 * const fns[2][2] = {
6457         { gen_helper_gvec_mla_idx_h, gen_helper_gvec_mls_idx_h },
6458         { gen_helper_gvec_mla_idx_s, gen_helper_gvec_mls_idx_s },
6459     };
6460 
6461     assert(a->esz == MO_16 || a->esz == MO_32);
6462     if (fp_access_check(s)) {
6463         gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd,
6464                          a->idx, fns[a->esz - 1][sub]);
6465     }
6466     return true;
6467 }
6468 
6469 TRANS(MLA_vi, do_mla_vector_idx, a, false)
6470 TRANS(MLS_vi, do_mla_vector_idx, a, true)
6471 
6472 static bool do_int3_qc_vector_idx(DisasContext *s, arg_qrrx_e *a,
6473                                   gen_helper_gvec_4 * const fns[2])
6474 {
6475     assert(a->esz == MO_16 || a->esz == MO_32);
6476     if (fp_access_check(s)) {
6477         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
6478                            vec_full_reg_offset(s, a->rn),
6479                            vec_full_reg_offset(s, a->rm),
6480                            offsetof(CPUARMState, vfp.qc),
6481                            a->q ? 16 : 8, vec_full_reg_size(s),
6482                            a->idx, fns[a->esz - 1]);
6483     }
6484     return true;
6485 }
6486 
6487 static gen_helper_gvec_4 * const f_vector_idx_sqdmulh[2] = {
6488     gen_helper_neon_sqdmulh_idx_h,
6489     gen_helper_neon_sqdmulh_idx_s,
6490 };
6491 TRANS(SQDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqdmulh)
6492 
6493 static gen_helper_gvec_4 * const f_vector_idx_sqrdmulh[2] = {
6494     gen_helper_neon_sqrdmulh_idx_h,
6495     gen_helper_neon_sqrdmulh_idx_s,
6496 };
6497 TRANS(SQRDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqrdmulh)
6498 
6499 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlah[2] = {
6500     gen_helper_neon_sqrdmlah_idx_h,
6501     gen_helper_neon_sqrdmlah_idx_s,
6502 };
6503 TRANS_FEAT(SQRDMLAH_vi, aa64_rdm, do_int3_qc_vector_idx, a,
6504            f_vector_idx_sqrdmlah)
6505 
6506 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlsh[2] = {
6507     gen_helper_neon_sqrdmlsh_idx_h,
6508     gen_helper_neon_sqrdmlsh_idx_s,
6509 };
6510 TRANS_FEAT(SQRDMLSH_vi, aa64_rdm, do_int3_qc_vector_idx, a,
6511            f_vector_idx_sqrdmlsh)
6512 
6513 static bool do_dot_vector_idx(DisasContext *s, arg_qrrx_e *a,
6514                               gen_helper_gvec_4 *fn)
6515 {
6516     if (fp_access_check(s)) {
6517         gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn);
6518     }
6519     return true;
6520 }
6521 
6522 static bool do_dot_vector_idx_env(DisasContext *s, arg_qrrx_e *a,
6523                                   gen_helper_gvec_4_ptr *fn)
6524 {
6525     if (fp_access_check(s)) {
6526         gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn);
6527     }
6528     return true;
6529 }
6530 
6531 TRANS_FEAT(SDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_sdot_idx_b)
6532 TRANS_FEAT(UDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_udot_idx_b)
6533 TRANS_FEAT(SUDOT_vi, aa64_i8mm, do_dot_vector_idx, a,
6534            gen_helper_gvec_sudot_idx_b)
6535 TRANS_FEAT(USDOT_vi, aa64_i8mm, do_dot_vector_idx, a,
6536            gen_helper_gvec_usdot_idx_b)
6537 TRANS_FEAT(BFDOT_vi, aa64_bf16, do_dot_vector_idx_env, a,
6538            gen_helper_gvec_bfdot_idx)
6539 
6540 static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a)
6541 {
6542     if (!dc_isar_feature(aa64_bf16, s)) {
6543         return false;
6544     }
6545     if (fp_access_check(s)) {
6546         /* Q bit selects BFMLALB vs BFMLALT. */
6547         gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, 0,
6548                           (a->idx << 1) | a->q,
6549                           gen_helper_gvec_bfmlal_idx);
6550     }
6551     return true;
6552 }
6553 
6554 static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a)
6555 {
6556     gen_helper_gvec_4_ptr *fn;
6557 
6558     if (!dc_isar_feature(aa64_fcma, s)) {
6559         return false;
6560     }
6561     switch (a->esz) {
6562     case MO_16:
6563         if (!dc_isar_feature(aa64_fp16, s)) {
6564             return false;
6565         }
6566         fn = gen_helper_gvec_fcmlah_idx;
6567         break;
6568     case MO_32:
6569         fn = gen_helper_gvec_fcmlas_idx;
6570         break;
6571     default:
6572         g_assert_not_reached();
6573     }
6574     if (fp_access_check(s)) {
6575         gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
6576                           a->esz == MO_16, (a->idx << 2) | a->rot, fn);
6577     }
6578     return true;
6579 }
6580 
6581 /*
6582  * Advanced SIMD scalar pairwise
6583  */
6584 
6585 static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f)
6586 {
6587     switch (a->esz) {
6588     case MO_64:
6589         if (fp_access_check(s)) {
6590             TCGv_i64 t0 = tcg_temp_new_i64();
6591             TCGv_i64 t1 = tcg_temp_new_i64();
6592 
6593             read_vec_element(s, t0, a->rn, 0, MO_64);
6594             read_vec_element(s, t1, a->rn, 1, MO_64);
6595             f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
6596             write_fp_dreg(s, a->rd, t0);
6597         }
6598         break;
6599     case MO_32:
6600         if (fp_access_check(s)) {
6601             TCGv_i32 t0 = tcg_temp_new_i32();
6602             TCGv_i32 t1 = tcg_temp_new_i32();
6603 
6604             read_vec_element_i32(s, t0, a->rn, 0, MO_32);
6605             read_vec_element_i32(s, t1, a->rn, 1, MO_32);
6606             f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
6607             write_fp_sreg(s, a->rd, t0);
6608         }
6609         break;
6610     case MO_16:
6611         if (!dc_isar_feature(aa64_fp16, s)) {
6612             return false;
6613         }
6614         if (fp_access_check(s)) {
6615             TCGv_i32 t0 = tcg_temp_new_i32();
6616             TCGv_i32 t1 = tcg_temp_new_i32();
6617 
6618             read_vec_element_i32(s, t0, a->rn, 0, MO_16);
6619             read_vec_element_i32(s, t1, a->rn, 1, MO_16);
6620             f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16));
6621             write_fp_sreg(s, a->rd, t0);
6622         }
6623         break;
6624     default:
6625         g_assert_not_reached();
6626     }
6627     return true;
6628 }
6629 
6630 TRANS(FADDP_s, do_fp3_scalar_pair, a, &f_scalar_fadd)
6631 TRANS(FMAXP_s, do_fp3_scalar_pair, a, &f_scalar_fmax)
6632 TRANS(FMINP_s, do_fp3_scalar_pair, a, &f_scalar_fmin)
6633 TRANS(FMAXNMP_s, do_fp3_scalar_pair, a, &f_scalar_fmaxnm)
6634 TRANS(FMINNMP_s, do_fp3_scalar_pair, a, &f_scalar_fminnm)
6635 
6636 static bool trans_ADDP_s(DisasContext *s, arg_rr_e *a)
6637 {
6638     if (fp_access_check(s)) {
6639         TCGv_i64 t0 = tcg_temp_new_i64();
6640         TCGv_i64 t1 = tcg_temp_new_i64();
6641 
6642         read_vec_element(s, t0, a->rn, 0, MO_64);
6643         read_vec_element(s, t1, a->rn, 1, MO_64);
6644         tcg_gen_add_i64(t0, t0, t1);
6645         write_fp_dreg(s, a->rd, t0);
6646     }
6647     return true;
6648 }
6649 
6650 /*
6651  * Floating-point conditional select
6652  */
6653 
6654 static bool trans_FCSEL(DisasContext *s, arg_FCSEL *a)
6655 {
6656     TCGv_i64 t_true, t_false;
6657     DisasCompare64 c;
6658     int check = fp_access_check_scalar_hsd(s, a->esz);
6659 
6660     if (check <= 0) {
6661         return check == 0;
6662     }
6663 
6664     /* Zero extend sreg & hreg inputs to 64 bits now.  */
6665     t_true = tcg_temp_new_i64();
6666     t_false = tcg_temp_new_i64();
6667     read_vec_element(s, t_true, a->rn, 0, a->esz);
6668     read_vec_element(s, t_false, a->rm, 0, a->esz);
6669 
6670     a64_test_cc(&c, a->cond);
6671     tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0),
6672                         t_true, t_false);
6673 
6674     /*
6675      * Note that sregs & hregs write back zeros to the high bits,
6676      * and we've already done the zero-extension.
6677      */
6678     write_fp_dreg(s, a->rd, t_true);
6679     return true;
6680 }
6681 
6682 /*
6683  * Advanced SIMD Extract
6684  */
6685 
6686 static bool trans_EXT_d(DisasContext *s, arg_EXT_d *a)
6687 {
6688     if (fp_access_check(s)) {
6689         TCGv_i64 lo = read_fp_dreg(s, a->rn);
6690         if (a->imm != 0) {
6691             TCGv_i64 hi = read_fp_dreg(s, a->rm);
6692             tcg_gen_extract2_i64(lo, lo, hi, a->imm * 8);
6693         }
6694         write_fp_dreg(s, a->rd, lo);
6695     }
6696     return true;
6697 }
6698 
6699 static bool trans_EXT_q(DisasContext *s, arg_EXT_q *a)
6700 {
6701     TCGv_i64 lo, hi;
6702     int pos = (a->imm & 7) * 8;
6703     int elt = a->imm >> 3;
6704 
6705     if (!fp_access_check(s)) {
6706         return true;
6707     }
6708 
6709     lo = tcg_temp_new_i64();
6710     hi = tcg_temp_new_i64();
6711 
6712     read_vec_element(s, lo, a->rn, elt, MO_64);
6713     elt++;
6714     read_vec_element(s, hi, elt & 2 ? a->rm : a->rn, elt & 1, MO_64);
6715     elt++;
6716 
6717     if (pos != 0) {
6718         TCGv_i64 hh = tcg_temp_new_i64();
6719         tcg_gen_extract2_i64(lo, lo, hi, pos);
6720         read_vec_element(s, hh, a->rm, elt & 1, MO_64);
6721         tcg_gen_extract2_i64(hi, hi, hh, pos);
6722     }
6723 
6724     write_vec_element(s, lo, a->rd, 0, MO_64);
6725     write_vec_element(s, hi, a->rd, 1, MO_64);
6726     clear_vec_high(s, true, a->rd);
6727     return true;
6728 }
6729 
6730 /*
6731  * Floating-point data-processing (3 source)
6732  */
6733 
6734 static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n)
6735 {
6736     TCGv_ptr fpst;
6737 
6738     /*
6739      * These are fused multiply-add.  Note that doing the negations here
6740      * as separate steps is correct: an input NaN should come out with
6741      * its sign bit flipped if it is a negated-input.
6742      */
6743     switch (a->esz) {
6744     case MO_64:
6745         if (fp_access_check(s)) {
6746             TCGv_i64 tn = read_fp_dreg(s, a->rn);
6747             TCGv_i64 tm = read_fp_dreg(s, a->rm);
6748             TCGv_i64 ta = read_fp_dreg(s, a->ra);
6749 
6750             if (neg_a) {
6751                 gen_vfp_negd(ta, ta);
6752             }
6753             if (neg_n) {
6754                 gen_vfp_negd(tn, tn);
6755             }
6756             fpst = fpstatus_ptr(FPST_FPCR);
6757             gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst);
6758             write_fp_dreg(s, a->rd, ta);
6759         }
6760         break;
6761 
6762     case MO_32:
6763         if (fp_access_check(s)) {
6764             TCGv_i32 tn = read_fp_sreg(s, a->rn);
6765             TCGv_i32 tm = read_fp_sreg(s, a->rm);
6766             TCGv_i32 ta = read_fp_sreg(s, a->ra);
6767 
6768             if (neg_a) {
6769                 gen_vfp_negs(ta, ta);
6770             }
6771             if (neg_n) {
6772                 gen_vfp_negs(tn, tn);
6773             }
6774             fpst = fpstatus_ptr(FPST_FPCR);
6775             gen_helper_vfp_muladds(ta, tn, tm, ta, fpst);
6776             write_fp_sreg(s, a->rd, ta);
6777         }
6778         break;
6779 
6780     case MO_16:
6781         if (!dc_isar_feature(aa64_fp16, s)) {
6782             return false;
6783         }
6784         if (fp_access_check(s)) {
6785             TCGv_i32 tn = read_fp_hreg(s, a->rn);
6786             TCGv_i32 tm = read_fp_hreg(s, a->rm);
6787             TCGv_i32 ta = read_fp_hreg(s, a->ra);
6788 
6789             if (neg_a) {
6790                 gen_vfp_negh(ta, ta);
6791             }
6792             if (neg_n) {
6793                 gen_vfp_negh(tn, tn);
6794             }
6795             fpst = fpstatus_ptr(FPST_FPCR_F16);
6796             gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst);
6797             write_fp_sreg(s, a->rd, ta);
6798         }
6799         break;
6800 
6801     default:
6802         return false;
6803     }
6804     return true;
6805 }
6806 
6807 TRANS(FMADD, do_fmadd, a, false, false)
6808 TRANS(FNMADD, do_fmadd, a, true, true)
6809 TRANS(FMSUB, do_fmadd, a, false, true)
6810 TRANS(FNMSUB, do_fmadd, a, true, false)
6811 
6812 /*
6813  * Advanced SIMD Across Lanes
6814  */
6815 
6816 static bool do_int_reduction(DisasContext *s, arg_qrr_e *a, bool widen,
6817                              MemOp src_sign, NeonGenTwo64OpFn *fn)
6818 {
6819     TCGv_i64 tcg_res, tcg_elt;
6820     MemOp src_mop = a->esz | src_sign;
6821     int elements = (a->q ? 16 : 8) >> a->esz;
6822 
6823     /* Reject MO_64, and MO_32 without Q: a minimum of 4 elements. */
6824     if (elements < 4) {
6825         return false;
6826     }
6827     if (!fp_access_check(s)) {
6828         return true;
6829     }
6830 
6831     tcg_res = tcg_temp_new_i64();
6832     tcg_elt = tcg_temp_new_i64();
6833 
6834     read_vec_element(s, tcg_res, a->rn, 0, src_mop);
6835     for (int i = 1; i < elements; i++) {
6836         read_vec_element(s, tcg_elt, a->rn, i, src_mop);
6837         fn(tcg_res, tcg_res, tcg_elt);
6838     }
6839 
6840     tcg_gen_ext_i64(tcg_res, tcg_res, a->esz + widen);
6841     write_fp_dreg(s, a->rd, tcg_res);
6842     return true;
6843 }
6844 
6845 TRANS(ADDV, do_int_reduction, a, false, 0, tcg_gen_add_i64)
6846 TRANS(SADDLV, do_int_reduction, a, true, MO_SIGN, tcg_gen_add_i64)
6847 TRANS(UADDLV, do_int_reduction, a, true, 0, tcg_gen_add_i64)
6848 TRANS(SMAXV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smax_i64)
6849 TRANS(UMAXV, do_int_reduction, a, false, 0, tcg_gen_umax_i64)
6850 TRANS(SMINV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smin_i64)
6851 TRANS(UMINV, do_int_reduction, a, false, 0, tcg_gen_umin_i64)
6852 
6853 /*
6854  * do_fp_reduction helper
6855  *
6856  * This mirrors the Reduce() pseudocode in the ARM ARM. It is
6857  * important for correct NaN propagation that we do these
6858  * operations in exactly the order specified by the pseudocode.
6859  *
6860  * This is a recursive function.
6861  */
6862 static TCGv_i32 do_reduction_op(DisasContext *s, int rn, MemOp esz,
6863                                 int ebase, int ecount, TCGv_ptr fpst,
6864                                 NeonGenTwoSingleOpFn *fn)
6865 {
6866     if (ecount == 1) {
6867         TCGv_i32 tcg_elem = tcg_temp_new_i32();
6868         read_vec_element_i32(s, tcg_elem, rn, ebase, esz);
6869         return tcg_elem;
6870     } else {
6871         int half = ecount >> 1;
6872         TCGv_i32 tcg_hi, tcg_lo, tcg_res;
6873 
6874         tcg_hi = do_reduction_op(s, rn, esz, ebase + half, half, fpst, fn);
6875         tcg_lo = do_reduction_op(s, rn, esz, ebase, half, fpst, fn);
6876         tcg_res = tcg_temp_new_i32();
6877 
6878         fn(tcg_res, tcg_lo, tcg_hi, fpst);
6879         return tcg_res;
6880     }
6881 }
6882 
6883 static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a,
6884                               NeonGenTwoSingleOpFn *fn)
6885 {
6886     if (fp_access_check(s)) {
6887         MemOp esz = a->esz;
6888         int elts = (a->q ? 16 : 8) >> esz;
6889         TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
6890         TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst, fn);
6891         write_fp_sreg(s, a->rd, res);
6892     }
6893     return true;
6894 }
6895 
6896 TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_maxnumh)
6897 TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_minnumh)
6898 TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_maxh)
6899 TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_minh)
6900 
6901 TRANS(FMAXNMV_s, do_fp_reduction, a, gen_helper_vfp_maxnums)
6902 TRANS(FMINNMV_s, do_fp_reduction, a, gen_helper_vfp_minnums)
6903 TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs)
6904 TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins)
6905 
6906 /*
6907  * Floating-point Immediate
6908  */
6909 
6910 static bool trans_FMOVI_s(DisasContext *s, arg_FMOVI_s *a)
6911 {
6912     int check = fp_access_check_scalar_hsd(s, a->esz);
6913     uint64_t imm;
6914 
6915     if (check <= 0) {
6916         return check == 0;
6917     }
6918 
6919     imm = vfp_expand_imm(a->esz, a->imm);
6920     write_fp_dreg(s, a->rd, tcg_constant_i64(imm));
6921     return true;
6922 }
6923 
6924 /*
6925  * Floating point compare, conditional compare
6926  */
6927 
6928 static void handle_fp_compare(DisasContext *s, int size,
6929                               unsigned int rn, unsigned int rm,
6930                               bool cmp_with_zero, bool signal_all_nans)
6931 {
6932     TCGv_i64 tcg_flags = tcg_temp_new_i64();
6933     TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
6934 
6935     if (size == MO_64) {
6936         TCGv_i64 tcg_vn, tcg_vm;
6937 
6938         tcg_vn = read_fp_dreg(s, rn);
6939         if (cmp_with_zero) {
6940             tcg_vm = tcg_constant_i64(0);
6941         } else {
6942             tcg_vm = read_fp_dreg(s, rm);
6943         }
6944         if (signal_all_nans) {
6945             gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
6946         } else {
6947             gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
6948         }
6949     } else {
6950         TCGv_i32 tcg_vn = tcg_temp_new_i32();
6951         TCGv_i32 tcg_vm = tcg_temp_new_i32();
6952 
6953         read_vec_element_i32(s, tcg_vn, rn, 0, size);
6954         if (cmp_with_zero) {
6955             tcg_gen_movi_i32(tcg_vm, 0);
6956         } else {
6957             read_vec_element_i32(s, tcg_vm, rm, 0, size);
6958         }
6959 
6960         switch (size) {
6961         case MO_32:
6962             if (signal_all_nans) {
6963                 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
6964             } else {
6965                 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
6966             }
6967             break;
6968         case MO_16:
6969             if (signal_all_nans) {
6970                 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
6971             } else {
6972                 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
6973             }
6974             break;
6975         default:
6976             g_assert_not_reached();
6977         }
6978     }
6979 
6980     gen_set_nzcv(tcg_flags);
6981 }
6982 
6983 /* FCMP, FCMPE */
6984 static bool trans_FCMP(DisasContext *s, arg_FCMP *a)
6985 {
6986     int check = fp_access_check_scalar_hsd(s, a->esz);
6987 
6988     if (check <= 0) {
6989         return check == 0;
6990     }
6991 
6992     handle_fp_compare(s, a->esz, a->rn, a->rm, a->z, a->e);
6993     return true;
6994 }
6995 
6996 /* FCCMP, FCCMPE */
6997 static bool trans_FCCMP(DisasContext *s, arg_FCCMP *a)
6998 {
6999     TCGLabel *label_continue = NULL;
7000     int check = fp_access_check_scalar_hsd(s, a->esz);
7001 
7002     if (check <= 0) {
7003         return check == 0;
7004     }
7005 
7006     if (a->cond < 0x0e) { /* not always */
7007         TCGLabel *label_match = gen_new_label();
7008         label_continue = gen_new_label();
7009         arm_gen_test_cc(a->cond, label_match);
7010         /* nomatch: */
7011         gen_set_nzcv(tcg_constant_i64(a->nzcv << 28));
7012         tcg_gen_br(label_continue);
7013         gen_set_label(label_match);
7014     }
7015 
7016     handle_fp_compare(s, a->esz, a->rn, a->rm, false, a->e);
7017 
7018     if (label_continue) {
7019         gen_set_label(label_continue);
7020     }
7021     return true;
7022 }
7023 
7024 /*
7025  * Advanced SIMD Modified Immediate
7026  */
7027 
7028 static bool trans_FMOVI_v_h(DisasContext *s, arg_FMOVI_v_h *a)
7029 {
7030     if (!dc_isar_feature(aa64_fp16, s)) {
7031         return false;
7032     }
7033     if (fp_access_check(s)) {
7034         tcg_gen_gvec_dup_imm(MO_16, vec_full_reg_offset(s, a->rd),
7035                              a->q ? 16 : 8, vec_full_reg_size(s),
7036                              vfp_expand_imm(MO_16, a->abcdefgh));
7037     }
7038     return true;
7039 }
7040 
7041 static void gen_movi(unsigned vece, uint32_t dofs, uint32_t aofs,
7042                      int64_t c, uint32_t oprsz, uint32_t maxsz)
7043 {
7044     tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c);
7045 }
7046 
7047 static bool trans_Vimm(DisasContext *s, arg_Vimm *a)
7048 {
7049     GVecGen2iFn *fn;
7050 
7051     /* Handle decode of cmode/op here between ORR/BIC/MOVI */
7052     if ((a->cmode & 1) && a->cmode < 12) {
7053         /* For op=1, the imm will be inverted, so BIC becomes AND. */
7054         fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori;
7055     } else {
7056         /* There is one unallocated cmode/op combination in this space */
7057         if (a->cmode == 15 && a->op == 1 && a->q == 0) {
7058             return false;
7059         }
7060         fn = gen_movi;
7061     }
7062 
7063     if (fp_access_check(s)) {
7064         uint64_t imm = asimd_imm_const(a->abcdefgh, a->cmode, a->op);
7065         gen_gvec_fn2i(s, a->q, a->rd, a->rd, imm, fn, MO_64);
7066     }
7067     return true;
7068 }
7069 
7070 /*
7071  * Advanced SIMD Shift by Immediate
7072  */
7073 
7074 static bool do_vec_shift_imm(DisasContext *s, arg_qrri_e *a, GVecGen2iFn *fn)
7075 {
7076     if (fp_access_check(s)) {
7077         gen_gvec_fn2i(s, a->q, a->rd, a->rn, a->imm, fn, a->esz);
7078     }
7079     return true;
7080 }
7081 
7082 TRANS(SSHR_v, do_vec_shift_imm, a, gen_gvec_sshr)
7083 TRANS(USHR_v, do_vec_shift_imm, a, gen_gvec_ushr)
7084 TRANS(SSRA_v, do_vec_shift_imm, a, gen_gvec_ssra)
7085 TRANS(USRA_v, do_vec_shift_imm, a, gen_gvec_usra)
7086 TRANS(SRSHR_v, do_vec_shift_imm, a, gen_gvec_srshr)
7087 TRANS(URSHR_v, do_vec_shift_imm, a, gen_gvec_urshr)
7088 TRANS(SRSRA_v, do_vec_shift_imm, a, gen_gvec_srsra)
7089 TRANS(URSRA_v, do_vec_shift_imm, a, gen_gvec_ursra)
7090 TRANS(SRI_v, do_vec_shift_imm, a, gen_gvec_sri)
7091 TRANS(SHL_v, do_vec_shift_imm, a, tcg_gen_gvec_shli)
7092 TRANS(SLI_v, do_vec_shift_imm, a, gen_gvec_sli);
7093 TRANS(SQSHL_vi, do_vec_shift_imm, a, gen_neon_sqshli)
7094 TRANS(UQSHL_vi, do_vec_shift_imm, a, gen_neon_uqshli)
7095 TRANS(SQSHLU_vi, do_vec_shift_imm, a, gen_neon_sqshlui)
7096 
7097 static bool do_vec_shift_imm_wide(DisasContext *s, arg_qrri_e *a, bool is_u)
7098 {
7099     TCGv_i64 tcg_rn, tcg_rd;
7100     int esz = a->esz;
7101     int esize;
7102 
7103     if (!fp_access_check(s)) {
7104         return true;
7105     }
7106 
7107     /*
7108      * For the LL variants the store is larger than the load,
7109      * so if rd == rn we would overwrite parts of our input.
7110      * So load everything right now and use shifts in the main loop.
7111      */
7112     tcg_rd = tcg_temp_new_i64();
7113     tcg_rn = tcg_temp_new_i64();
7114     read_vec_element(s, tcg_rn, a->rn, a->q, MO_64);
7115 
7116     esize = 8 << esz;
7117     for (int i = 0, elements = 8 >> esz; i < elements; i++) {
7118         if (is_u) {
7119             tcg_gen_extract_i64(tcg_rd, tcg_rn, i * esize, esize);
7120         } else {
7121             tcg_gen_sextract_i64(tcg_rd, tcg_rn, i * esize, esize);
7122         }
7123         tcg_gen_shli_i64(tcg_rd, tcg_rd, a->imm);
7124         write_vec_element(s, tcg_rd, a->rd, i, esz + 1);
7125     }
7126     clear_vec_high(s, true, a->rd);
7127     return true;
7128 }
7129 
7130 TRANS(SSHLL_v, do_vec_shift_imm_wide, a, false)
7131 TRANS(USHLL_v, do_vec_shift_imm_wide, a, true)
7132 
7133 static void gen_sshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7134 {
7135     assert(shift >= 0 && shift <= 64);
7136     tcg_gen_sari_i64(dst, src, MIN(shift, 63));
7137 }
7138 
7139 static void gen_ushr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7140 {
7141     assert(shift >= 0 && shift <= 64);
7142     if (shift == 64) {
7143         tcg_gen_movi_i64(dst, 0);
7144     } else {
7145         tcg_gen_shri_i64(dst, src, shift);
7146     }
7147 }
7148 
7149 static void gen_ssra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7150 {
7151     gen_sshr_d(src, src, shift);
7152     tcg_gen_add_i64(dst, dst, src);
7153 }
7154 
7155 static void gen_usra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7156 {
7157     gen_ushr_d(src, src, shift);
7158     tcg_gen_add_i64(dst, dst, src);
7159 }
7160 
7161 static void gen_srshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7162 {
7163     assert(shift >= 0 && shift <= 32);
7164     if (shift) {
7165         TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1));
7166         tcg_gen_add_i64(dst, src, rnd);
7167         tcg_gen_sari_i64(dst, dst, shift);
7168     } else {
7169         tcg_gen_mov_i64(dst, src);
7170     }
7171 }
7172 
7173 static void gen_urshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7174 {
7175     assert(shift >= 0 && shift <= 32);
7176     if (shift) {
7177         TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1));
7178         tcg_gen_add_i64(dst, src, rnd);
7179         tcg_gen_shri_i64(dst, dst, shift);
7180     } else {
7181         tcg_gen_mov_i64(dst, src);
7182     }
7183 }
7184 
7185 static void gen_srshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7186 {
7187     assert(shift >= 0 && shift <= 64);
7188     if (shift == 0) {
7189         tcg_gen_mov_i64(dst, src);
7190     } else if (shift == 64) {
7191         /* Extension of sign bit (0,-1) plus sign bit (0,1) is zero. */
7192         tcg_gen_movi_i64(dst, 0);
7193     } else {
7194         TCGv_i64 rnd = tcg_temp_new_i64();
7195         tcg_gen_extract_i64(rnd, src, shift - 1, 1);
7196         tcg_gen_sari_i64(dst, src, shift);
7197         tcg_gen_add_i64(dst, dst, rnd);
7198     }
7199 }
7200 
7201 static void gen_urshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7202 {
7203     assert(shift >= 0 && shift <= 64);
7204     if (shift == 0) {
7205         tcg_gen_mov_i64(dst, src);
7206     } else if (shift == 64) {
7207         /* Rounding will propagate bit 63 into bit 64. */
7208         tcg_gen_shri_i64(dst, src, 63);
7209     } else {
7210         TCGv_i64 rnd = tcg_temp_new_i64();
7211         tcg_gen_extract_i64(rnd, src, shift - 1, 1);
7212         tcg_gen_shri_i64(dst, src, shift);
7213         tcg_gen_add_i64(dst, dst, rnd);
7214     }
7215 }
7216 
7217 static void gen_srsra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7218 {
7219     gen_srshr_d(src, src, shift);
7220     tcg_gen_add_i64(dst, dst, src);
7221 }
7222 
7223 static void gen_ursra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7224 {
7225     gen_urshr_d(src, src, shift);
7226     tcg_gen_add_i64(dst, dst, src);
7227 }
7228 
7229 static void gen_sri_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7230 {
7231     /* If shift is 64, dst is unchanged. */
7232     if (shift != 64) {
7233         tcg_gen_shri_i64(src, src, shift);
7234         tcg_gen_deposit_i64(dst, dst, src, 0, 64 - shift);
7235     }
7236 }
7237 
7238 static void gen_sli_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7239 {
7240     tcg_gen_deposit_i64(dst, dst, src, shift, 64 - shift);
7241 }
7242 
7243 static bool do_vec_shift_imm_narrow(DisasContext *s, arg_qrri_e *a,
7244                                     WideShiftImmFn * const fns[3], MemOp sign)
7245 {
7246     TCGv_i64 tcg_rn, tcg_rd;
7247     int esz = a->esz;
7248     int esize;
7249     WideShiftImmFn *fn;
7250 
7251     tcg_debug_assert(esz >= MO_8 && esz <= MO_32);
7252 
7253     if (!fp_access_check(s)) {
7254         return true;
7255     }
7256 
7257     tcg_rn = tcg_temp_new_i64();
7258     tcg_rd = tcg_temp_new_i64();
7259     tcg_gen_movi_i64(tcg_rd, 0);
7260 
7261     fn = fns[esz];
7262     esize = 8 << esz;
7263     for (int i = 0, elements = 8 >> esz; i < elements; i++) {
7264         read_vec_element(s, tcg_rn, a->rn, i, (esz + 1) | sign);
7265         fn(tcg_rn, tcg_rn, a->imm);
7266         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, esize * i, esize);
7267     }
7268 
7269     write_vec_element(s, tcg_rd, a->rd, a->q, MO_64);
7270     clear_vec_high(s, a->q, a->rd);
7271     return true;
7272 }
7273 
7274 static void gen_sqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7275 {
7276     tcg_gen_sari_i64(d, s, i);
7277     tcg_gen_ext16u_i64(d, d);
7278     gen_helper_neon_narrow_sat_s8(d, tcg_env, d);
7279 }
7280 
7281 static void gen_sqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7282 {
7283     tcg_gen_sari_i64(d, s, i);
7284     tcg_gen_ext32u_i64(d, d);
7285     gen_helper_neon_narrow_sat_s16(d, tcg_env, d);
7286 }
7287 
7288 static void gen_sqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7289 {
7290     gen_sshr_d(d, s, i);
7291     gen_helper_neon_narrow_sat_s32(d, tcg_env, d);
7292 }
7293 
7294 static void gen_uqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7295 {
7296     tcg_gen_shri_i64(d, s, i);
7297     gen_helper_neon_narrow_sat_u8(d, tcg_env, d);
7298 }
7299 
7300 static void gen_uqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7301 {
7302     tcg_gen_shri_i64(d, s, i);
7303     gen_helper_neon_narrow_sat_u16(d, tcg_env, d);
7304 }
7305 
7306 static void gen_uqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7307 {
7308     gen_ushr_d(d, s, i);
7309     gen_helper_neon_narrow_sat_u32(d, tcg_env, d);
7310 }
7311 
7312 static void gen_sqshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7313 {
7314     tcg_gen_sari_i64(d, s, i);
7315     tcg_gen_ext16u_i64(d, d);
7316     gen_helper_neon_unarrow_sat8(d, tcg_env, d);
7317 }
7318 
7319 static void gen_sqshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7320 {
7321     tcg_gen_sari_i64(d, s, i);
7322     tcg_gen_ext32u_i64(d, d);
7323     gen_helper_neon_unarrow_sat16(d, tcg_env, d);
7324 }
7325 
7326 static void gen_sqshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7327 {
7328     gen_sshr_d(d, s, i);
7329     gen_helper_neon_unarrow_sat32(d, tcg_env, d);
7330 }
7331 
7332 static void gen_sqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7333 {
7334     gen_srshr_bhs(d, s, i);
7335     tcg_gen_ext16u_i64(d, d);
7336     gen_helper_neon_narrow_sat_s8(d, tcg_env, d);
7337 }
7338 
7339 static void gen_sqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7340 {
7341     gen_srshr_bhs(d, s, i);
7342     tcg_gen_ext32u_i64(d, d);
7343     gen_helper_neon_narrow_sat_s16(d, tcg_env, d);
7344 }
7345 
7346 static void gen_sqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7347 {
7348     gen_srshr_d(d, s, i);
7349     gen_helper_neon_narrow_sat_s32(d, tcg_env, d);
7350 }
7351 
7352 static void gen_uqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7353 {
7354     gen_urshr_bhs(d, s, i);
7355     gen_helper_neon_narrow_sat_u8(d, tcg_env, d);
7356 }
7357 
7358 static void gen_uqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7359 {
7360     gen_urshr_bhs(d, s, i);
7361     gen_helper_neon_narrow_sat_u16(d, tcg_env, d);
7362 }
7363 
7364 static void gen_uqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7365 {
7366     gen_urshr_d(d, s, i);
7367     gen_helper_neon_narrow_sat_u32(d, tcg_env, d);
7368 }
7369 
7370 static void gen_sqrshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7371 {
7372     gen_srshr_bhs(d, s, i);
7373     tcg_gen_ext16u_i64(d, d);
7374     gen_helper_neon_unarrow_sat8(d, tcg_env, d);
7375 }
7376 
7377 static void gen_sqrshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7378 {
7379     gen_srshr_bhs(d, s, i);
7380     tcg_gen_ext32u_i64(d, d);
7381     gen_helper_neon_unarrow_sat16(d, tcg_env, d);
7382 }
7383 
7384 static void gen_sqrshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7385 {
7386     gen_srshr_d(d, s, i);
7387     gen_helper_neon_unarrow_sat32(d, tcg_env, d);
7388 }
7389 
7390 static WideShiftImmFn * const shrn_fns[] = {
7391     tcg_gen_shri_i64,
7392     tcg_gen_shri_i64,
7393     gen_ushr_d,
7394 };
7395 TRANS(SHRN_v, do_vec_shift_imm_narrow, a, shrn_fns, 0)
7396 
7397 static WideShiftImmFn * const rshrn_fns[] = {
7398     gen_urshr_bhs,
7399     gen_urshr_bhs,
7400     gen_urshr_d,
7401 };
7402 TRANS(RSHRN_v, do_vec_shift_imm_narrow, a, rshrn_fns, 0)
7403 
7404 static WideShiftImmFn * const sqshrn_fns[] = {
7405     gen_sqshrn_b,
7406     gen_sqshrn_h,
7407     gen_sqshrn_s,
7408 };
7409 TRANS(SQSHRN_v, do_vec_shift_imm_narrow, a, sqshrn_fns, MO_SIGN)
7410 
7411 static WideShiftImmFn * const uqshrn_fns[] = {
7412     gen_uqshrn_b,
7413     gen_uqshrn_h,
7414     gen_uqshrn_s,
7415 };
7416 TRANS(UQSHRN_v, do_vec_shift_imm_narrow, a, uqshrn_fns, 0)
7417 
7418 static WideShiftImmFn * const sqshrun_fns[] = {
7419     gen_sqshrun_b,
7420     gen_sqshrun_h,
7421     gen_sqshrun_s,
7422 };
7423 TRANS(SQSHRUN_v, do_vec_shift_imm_narrow, a, sqshrun_fns, MO_SIGN)
7424 
7425 static WideShiftImmFn * const sqrshrn_fns[] = {
7426     gen_sqrshrn_b,
7427     gen_sqrshrn_h,
7428     gen_sqrshrn_s,
7429 };
7430 TRANS(SQRSHRN_v, do_vec_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN)
7431 
7432 static WideShiftImmFn * const uqrshrn_fns[] = {
7433     gen_uqrshrn_b,
7434     gen_uqrshrn_h,
7435     gen_uqrshrn_s,
7436 };
7437 TRANS(UQRSHRN_v, do_vec_shift_imm_narrow, a, uqrshrn_fns, 0)
7438 
7439 static WideShiftImmFn * const sqrshrun_fns[] = {
7440     gen_sqrshrun_b,
7441     gen_sqrshrun_h,
7442     gen_sqrshrun_s,
7443 };
7444 TRANS(SQRSHRUN_v, do_vec_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN)
7445 
7446 /*
7447  * Advanced SIMD Scalar Shift by Immediate
7448  */
7449 
7450 static bool do_scalar_shift_imm(DisasContext *s, arg_rri_e *a,
7451                                 WideShiftImmFn *fn, bool accumulate,
7452                                 MemOp sign)
7453 {
7454     if (fp_access_check(s)) {
7455         TCGv_i64 rd = tcg_temp_new_i64();
7456         TCGv_i64 rn = tcg_temp_new_i64();
7457 
7458         read_vec_element(s, rn, a->rn, 0, a->esz | sign);
7459         if (accumulate) {
7460             read_vec_element(s, rd, a->rd, 0, a->esz | sign);
7461         }
7462         fn(rd, rn, a->imm);
7463         write_fp_dreg(s, a->rd, rd);
7464     }
7465     return true;
7466 }
7467 
7468 TRANS(SSHR_s, do_scalar_shift_imm, a, gen_sshr_d, false, 0)
7469 TRANS(USHR_s, do_scalar_shift_imm, a, gen_ushr_d, false, 0)
7470 TRANS(SSRA_s, do_scalar_shift_imm, a, gen_ssra_d, true, 0)
7471 TRANS(USRA_s, do_scalar_shift_imm, a, gen_usra_d, true, 0)
7472 TRANS(SRSHR_s, do_scalar_shift_imm, a, gen_srshr_d, false, 0)
7473 TRANS(URSHR_s, do_scalar_shift_imm, a, gen_urshr_d, false, 0)
7474 TRANS(SRSRA_s, do_scalar_shift_imm, a, gen_srsra_d, true, 0)
7475 TRANS(URSRA_s, do_scalar_shift_imm, a, gen_ursra_d, true, 0)
7476 TRANS(SRI_s, do_scalar_shift_imm, a, gen_sri_d, true, 0)
7477 
7478 TRANS(SHL_s, do_scalar_shift_imm, a, tcg_gen_shli_i64, false, 0)
7479 TRANS(SLI_s, do_scalar_shift_imm, a, gen_sli_d, true, 0)
7480 
7481 static void trunc_i64_env_imm(TCGv_i64 d, TCGv_i64 s, int64_t i,
7482                               NeonGenTwoOpEnvFn *fn)
7483 {
7484     TCGv_i32 t = tcg_temp_new_i32();
7485     tcg_gen_extrl_i64_i32(t, s);
7486     fn(t, tcg_env, t, tcg_constant_i32(i));
7487     tcg_gen_extu_i32_i64(d, t);
7488 }
7489 
7490 static void gen_sqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7491 {
7492     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s8);
7493 }
7494 
7495 static void gen_sqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7496 {
7497     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s16);
7498 }
7499 
7500 static void gen_sqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7501 {
7502     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s32);
7503 }
7504 
7505 static void gen_sqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
7506 {
7507     gen_helper_neon_qshl_s64(d, tcg_env, s, tcg_constant_i64(i));
7508 }
7509 
7510 static void gen_uqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7511 {
7512     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u8);
7513 }
7514 
7515 static void gen_uqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7516 {
7517     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u16);
7518 }
7519 
7520 static void gen_uqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7521 {
7522     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u32);
7523 }
7524 
7525 static void gen_uqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
7526 {
7527     gen_helper_neon_qshl_u64(d, tcg_env, s, tcg_constant_i64(i));
7528 }
7529 
7530 static void gen_sqshlui_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7531 {
7532     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s8);
7533 }
7534 
7535 static void gen_sqshlui_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7536 {
7537     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s16);
7538 }
7539 
7540 static void gen_sqshlui_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7541 {
7542     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s32);
7543 }
7544 
7545 static void gen_sqshlui_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
7546 {
7547     gen_helper_neon_qshlu_s64(d, tcg_env, s, tcg_constant_i64(i));
7548 }
7549 
7550 static WideShiftImmFn * const f_scalar_sqshli[] = {
7551     gen_sqshli_b, gen_sqshli_h, gen_sqshli_s, gen_sqshli_d
7552 };
7553 
7554 static WideShiftImmFn * const f_scalar_uqshli[] = {
7555     gen_uqshli_b, gen_uqshli_h, gen_uqshli_s, gen_uqshli_d
7556 };
7557 
7558 static WideShiftImmFn * const f_scalar_sqshlui[] = {
7559     gen_sqshlui_b, gen_sqshlui_h, gen_sqshlui_s, gen_sqshlui_d
7560 };
7561 
7562 /* Note that the helpers sign-extend their inputs, so don't do it here. */
7563 TRANS(SQSHL_si, do_scalar_shift_imm, a, f_scalar_sqshli[a->esz], false, 0)
7564 TRANS(UQSHL_si, do_scalar_shift_imm, a, f_scalar_uqshli[a->esz], false, 0)
7565 TRANS(SQSHLU_si, do_scalar_shift_imm, a, f_scalar_sqshlui[a->esz], false, 0)
7566 
7567 static bool do_scalar_shift_imm_narrow(DisasContext *s, arg_rri_e *a,
7568                                        WideShiftImmFn * const fns[3],
7569                                        MemOp sign, bool zext)
7570 {
7571     MemOp esz = a->esz;
7572 
7573     tcg_debug_assert(esz >= MO_8 && esz <= MO_32);
7574 
7575     if (fp_access_check(s)) {
7576         TCGv_i64 rd = tcg_temp_new_i64();
7577         TCGv_i64 rn = tcg_temp_new_i64();
7578 
7579         read_vec_element(s, rn, a->rn, 0, (esz + 1) | sign);
7580         fns[esz](rd, rn, a->imm);
7581         if (zext) {
7582             tcg_gen_ext_i64(rd, rd, esz);
7583         }
7584         write_fp_dreg(s, a->rd, rd);
7585     }
7586     return true;
7587 }
7588 
7589 TRANS(SQSHRN_si, do_scalar_shift_imm_narrow, a, sqshrn_fns, MO_SIGN, true)
7590 TRANS(SQRSHRN_si, do_scalar_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN, true)
7591 TRANS(UQSHRN_si, do_scalar_shift_imm_narrow, a, uqshrn_fns, 0, false)
7592 TRANS(UQRSHRN_si, do_scalar_shift_imm_narrow, a, uqrshrn_fns, 0, false)
7593 TRANS(SQSHRUN_si, do_scalar_shift_imm_narrow, a, sqshrun_fns, MO_SIGN, false)
7594 TRANS(SQRSHRUN_si, do_scalar_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN, false)
7595 
7596 static bool do_div(DisasContext *s, arg_rrr_sf *a, bool is_signed)
7597 {
7598     TCGv_i64 tcg_n, tcg_m, tcg_rd;
7599     tcg_rd = cpu_reg(s, a->rd);
7600 
7601     if (!a->sf && is_signed) {
7602         tcg_n = tcg_temp_new_i64();
7603         tcg_m = tcg_temp_new_i64();
7604         tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, a->rn));
7605         tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, a->rm));
7606     } else {
7607         tcg_n = read_cpu_reg(s, a->rn, a->sf);
7608         tcg_m = read_cpu_reg(s, a->rm, a->sf);
7609     }
7610 
7611     if (is_signed) {
7612         gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
7613     } else {
7614         gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
7615     }
7616 
7617     if (!a->sf) { /* zero extend final result */
7618         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
7619     }
7620     return true;
7621 }
7622 
7623 TRANS(SDIV, do_div, a, true)
7624 TRANS(UDIV, do_div, a, false)
7625 
7626 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
7627  * Note that it is the caller's responsibility to ensure that the
7628  * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
7629  * mandated semantics for out of range shifts.
7630  */
7631 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
7632                       enum a64_shift_type shift_type, TCGv_i64 shift_amount)
7633 {
7634     switch (shift_type) {
7635     case A64_SHIFT_TYPE_LSL:
7636         tcg_gen_shl_i64(dst, src, shift_amount);
7637         break;
7638     case A64_SHIFT_TYPE_LSR:
7639         tcg_gen_shr_i64(dst, src, shift_amount);
7640         break;
7641     case A64_SHIFT_TYPE_ASR:
7642         if (!sf) {
7643             tcg_gen_ext32s_i64(dst, src);
7644         }
7645         tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
7646         break;
7647     case A64_SHIFT_TYPE_ROR:
7648         if (sf) {
7649             tcg_gen_rotr_i64(dst, src, shift_amount);
7650         } else {
7651             TCGv_i32 t0, t1;
7652             t0 = tcg_temp_new_i32();
7653             t1 = tcg_temp_new_i32();
7654             tcg_gen_extrl_i64_i32(t0, src);
7655             tcg_gen_extrl_i64_i32(t1, shift_amount);
7656             tcg_gen_rotr_i32(t0, t0, t1);
7657             tcg_gen_extu_i32_i64(dst, t0);
7658         }
7659         break;
7660     default:
7661         assert(FALSE); /* all shift types should be handled */
7662         break;
7663     }
7664 
7665     if (!sf) { /* zero extend final result */
7666         tcg_gen_ext32u_i64(dst, dst);
7667     }
7668 }
7669 
7670 /* Shift a TCGv src by immediate, put result in dst.
7671  * The shift amount must be in range (this should always be true as the
7672  * relevant instructions will UNDEF on bad shift immediates).
7673  */
7674 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
7675                           enum a64_shift_type shift_type, unsigned int shift_i)
7676 {
7677     assert(shift_i < (sf ? 64 : 32));
7678 
7679     if (shift_i == 0) {
7680         tcg_gen_mov_i64(dst, src);
7681     } else {
7682         shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i));
7683     }
7684 }
7685 
7686 static bool do_shift_reg(DisasContext *s, arg_rrr_sf *a,
7687                          enum a64_shift_type shift_type)
7688 {
7689     TCGv_i64 tcg_shift = tcg_temp_new_i64();
7690     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
7691     TCGv_i64 tcg_rn = read_cpu_reg(s, a->rn, a->sf);
7692 
7693     tcg_gen_andi_i64(tcg_shift, cpu_reg(s, a->rm), a->sf ? 63 : 31);
7694     shift_reg(tcg_rd, tcg_rn, a->sf, shift_type, tcg_shift);
7695     return true;
7696 }
7697 
7698 TRANS(LSLV, do_shift_reg, a, A64_SHIFT_TYPE_LSL)
7699 TRANS(LSRV, do_shift_reg, a, A64_SHIFT_TYPE_LSR)
7700 TRANS(ASRV, do_shift_reg, a, A64_SHIFT_TYPE_ASR)
7701 TRANS(RORV, do_shift_reg, a, A64_SHIFT_TYPE_ROR)
7702 
7703 static bool do_crc32(DisasContext *s, arg_rrr_e *a, bool crc32c)
7704 {
7705     TCGv_i64 tcg_acc, tcg_val, tcg_rd;
7706     TCGv_i32 tcg_bytes;
7707 
7708     switch (a->esz) {
7709     case MO_8:
7710     case MO_16:
7711     case MO_32:
7712         tcg_val = tcg_temp_new_i64();
7713         tcg_gen_extract_i64(tcg_val, cpu_reg(s, a->rm), 0, 8 << a->esz);
7714         break;
7715     case MO_64:
7716         tcg_val = cpu_reg(s, a->rm);
7717         break;
7718     default:
7719         g_assert_not_reached();
7720     }
7721     tcg_acc = cpu_reg(s, a->rn);
7722     tcg_bytes = tcg_constant_i32(1 << a->esz);
7723     tcg_rd = cpu_reg(s, a->rd);
7724 
7725     if (crc32c) {
7726         gen_helper_crc32c_64(tcg_rd, tcg_acc, tcg_val, tcg_bytes);
7727     } else {
7728         gen_helper_crc32_64(tcg_rd, tcg_acc, tcg_val, tcg_bytes);
7729     }
7730     return true;
7731 }
7732 
7733 TRANS_FEAT(CRC32, aa64_crc32, do_crc32, a, false)
7734 TRANS_FEAT(CRC32C, aa64_crc32, do_crc32, a, true)
7735 
7736 static bool do_subp(DisasContext *s, arg_rrr *a, bool setflag)
7737 {
7738     TCGv_i64 tcg_n = read_cpu_reg_sp(s, a->rn, true);
7739     TCGv_i64 tcg_m = read_cpu_reg_sp(s, a->rm, true);
7740     TCGv_i64 tcg_d = cpu_reg(s, a->rd);
7741 
7742     tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56);
7743     tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56);
7744 
7745     if (setflag) {
7746         gen_sub_CC(true, tcg_d, tcg_n, tcg_m);
7747     } else {
7748         tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m);
7749     }
7750     return true;
7751 }
7752 
7753 TRANS_FEAT(SUBP, aa64_mte_insn_reg, do_subp, a, false)
7754 TRANS_FEAT(SUBPS, aa64_mte_insn_reg, do_subp, a, true)
7755 
7756 static bool trans_IRG(DisasContext *s, arg_rrr *a)
7757 {
7758     if (dc_isar_feature(aa64_mte_insn_reg, s)) {
7759         TCGv_i64 tcg_rd = cpu_reg_sp(s, a->rd);
7760         TCGv_i64 tcg_rn = cpu_reg_sp(s, a->rn);
7761 
7762         if (s->ata[0]) {
7763             gen_helper_irg(tcg_rd, tcg_env, tcg_rn, cpu_reg(s, a->rm));
7764         } else {
7765             gen_address_with_allocation_tag0(tcg_rd, tcg_rn);
7766         }
7767         return true;
7768     }
7769     return false;
7770 }
7771 
7772 static bool trans_GMI(DisasContext *s, arg_rrr *a)
7773 {
7774     if (dc_isar_feature(aa64_mte_insn_reg, s)) {
7775         TCGv_i64 t = tcg_temp_new_i64();
7776 
7777         tcg_gen_extract_i64(t, cpu_reg_sp(s, a->rn), 56, 4);
7778         tcg_gen_shl_i64(t, tcg_constant_i64(1), t);
7779         tcg_gen_or_i64(cpu_reg(s, a->rd), cpu_reg(s, a->rm), t);
7780         return true;
7781     }
7782     return false;
7783 }
7784 
7785 static bool trans_PACGA(DisasContext *s, arg_rrr *a)
7786 {
7787     if (dc_isar_feature(aa64_pauth, s)) {
7788         gen_helper_pacga(cpu_reg(s, a->rd), tcg_env,
7789                          cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm));
7790         return true;
7791     }
7792     return false;
7793 }
7794 
7795 typedef void ArithOneOp(TCGv_i64, TCGv_i64);
7796 
7797 static bool gen_rr(DisasContext *s, int rd, int rn, ArithOneOp fn)
7798 {
7799     fn(cpu_reg(s, rd), cpu_reg(s, rn));
7800     return true;
7801 }
7802 
7803 static void gen_rbit32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
7804 {
7805     TCGv_i32 t32 = tcg_temp_new_i32();
7806 
7807     tcg_gen_extrl_i64_i32(t32, tcg_rn);
7808     gen_helper_rbit(t32, t32);
7809     tcg_gen_extu_i32_i64(tcg_rd, t32);
7810 }
7811 
7812 static void gen_rev16_xx(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 mask)
7813 {
7814     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
7815 
7816     tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
7817     tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
7818     tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
7819     tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
7820     tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
7821 }
7822 
7823 static void gen_rev16_32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
7824 {
7825     gen_rev16_xx(tcg_rd, tcg_rn, tcg_constant_i64(0x00ff00ff));
7826 }
7827 
7828 static void gen_rev16_64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
7829 {
7830     gen_rev16_xx(tcg_rd, tcg_rn, tcg_constant_i64(0x00ff00ff00ff00ffull));
7831 }
7832 
7833 static void gen_rev_32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
7834 {
7835     tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ);
7836 }
7837 
7838 static void gen_rev32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
7839 {
7840     tcg_gen_bswap64_i64(tcg_rd, tcg_rn);
7841     tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32);
7842 }
7843 
7844 TRANS(RBIT, gen_rr, a->rd, a->rn, a->sf ? gen_helper_rbit64 : gen_rbit32)
7845 TRANS(REV16, gen_rr, a->rd, a->rn, a->sf ? gen_rev16_64 : gen_rev16_32)
7846 TRANS(REV32, gen_rr, a->rd, a->rn, a->sf ? gen_rev32 : gen_rev_32)
7847 TRANS(REV64, gen_rr, a->rd, a->rn, tcg_gen_bswap64_i64)
7848 
7849 static void gen_clz32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
7850 {
7851     TCGv_i32 t32 = tcg_temp_new_i32();
7852 
7853     tcg_gen_extrl_i64_i32(t32, tcg_rn);
7854     tcg_gen_clzi_i32(t32, t32, 32);
7855     tcg_gen_extu_i32_i64(tcg_rd, t32);
7856 }
7857 
7858 static void gen_clz64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
7859 {
7860     tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
7861 }
7862 
7863 static void gen_cls32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
7864 {
7865     TCGv_i32 t32 = tcg_temp_new_i32();
7866 
7867     tcg_gen_extrl_i64_i32(t32, tcg_rn);
7868     tcg_gen_clrsb_i32(t32, t32);
7869     tcg_gen_extu_i32_i64(tcg_rd, t32);
7870 }
7871 
7872 TRANS(CLZ, gen_rr, a->rd, a->rn, a->sf ? gen_clz64 : gen_clz32)
7873 TRANS(CLS, gen_rr, a->rd, a->rn, a->sf ? tcg_gen_clrsb_i64 : gen_cls32)
7874 
7875 static bool gen_pacaut(DisasContext *s, arg_pacaut *a, NeonGenTwo64OpEnvFn fn)
7876 {
7877     TCGv_i64 tcg_rd, tcg_rn;
7878 
7879     if (a->z) {
7880         if (a->rn != 31) {
7881             return false;
7882         }
7883         tcg_rn = tcg_constant_i64(0);
7884     } else {
7885         tcg_rn = cpu_reg_sp(s, a->rn);
7886     }
7887     if (s->pauth_active) {
7888         tcg_rd = cpu_reg(s, a->rd);
7889         fn(tcg_rd, tcg_env, tcg_rd, tcg_rn);
7890     }
7891     return true;
7892 }
7893 
7894 TRANS_FEAT(PACIA, aa64_pauth, gen_pacaut, a, gen_helper_pacia)
7895 TRANS_FEAT(PACIB, aa64_pauth, gen_pacaut, a, gen_helper_pacib)
7896 TRANS_FEAT(PACDA, aa64_pauth, gen_pacaut, a, gen_helper_pacda)
7897 TRANS_FEAT(PACDB, aa64_pauth, gen_pacaut, a, gen_helper_pacdb)
7898 
7899 TRANS_FEAT(AUTIA, aa64_pauth, gen_pacaut, a, gen_helper_autia)
7900 TRANS_FEAT(AUTIB, aa64_pauth, gen_pacaut, a, gen_helper_autib)
7901 TRANS_FEAT(AUTDA, aa64_pauth, gen_pacaut, a, gen_helper_autda)
7902 TRANS_FEAT(AUTDB, aa64_pauth, gen_pacaut, a, gen_helper_autdb)
7903 
7904 static bool do_xpac(DisasContext *s, int rd, NeonGenOne64OpEnvFn *fn)
7905 {
7906     if (s->pauth_active) {
7907         TCGv_i64 tcg_rd = cpu_reg(s, rd);
7908         fn(tcg_rd, tcg_env, tcg_rd);
7909     }
7910     return true;
7911 }
7912 
7913 TRANS_FEAT(XPACI, aa64_pauth, do_xpac, a->rd, gen_helper_xpaci)
7914 TRANS_FEAT(XPACD, aa64_pauth, do_xpac, a->rd, gen_helper_xpacd)
7915 
7916 static bool do_logic_reg(DisasContext *s, arg_logic_shift *a,
7917                          ArithTwoOp *fn, ArithTwoOp *inv_fn, bool setflags)
7918 {
7919     TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
7920 
7921     if (!a->sf && (a->sa & (1 << 5))) {
7922         return false;
7923     }
7924 
7925     tcg_rd = cpu_reg(s, a->rd);
7926     tcg_rn = cpu_reg(s, a->rn);
7927 
7928     tcg_rm = read_cpu_reg(s, a->rm, a->sf);
7929     if (a->sa) {
7930         shift_reg_imm(tcg_rm, tcg_rm, a->sf, a->st, a->sa);
7931     }
7932 
7933     (a->n ? inv_fn : fn)(tcg_rd, tcg_rn, tcg_rm);
7934     if (!a->sf) {
7935         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
7936     }
7937     if (setflags) {
7938         gen_logic_CC(a->sf, tcg_rd);
7939     }
7940     return true;
7941 }
7942 
7943 static bool trans_ORR_r(DisasContext *s, arg_logic_shift *a)
7944 {
7945     /*
7946      * Unshifted ORR and ORN with WZR/XZR is the standard encoding for
7947      * register-register MOV and MVN, so it is worth special casing.
7948      */
7949     if (a->sa == 0 && a->st == 0 && a->rn == 31) {
7950         TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
7951         TCGv_i64 tcg_rm = cpu_reg(s, a->rm);
7952 
7953         if (a->n) {
7954             tcg_gen_not_i64(tcg_rd, tcg_rm);
7955             if (!a->sf) {
7956                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
7957             }
7958         } else {
7959             if (a->sf) {
7960                 tcg_gen_mov_i64(tcg_rd, tcg_rm);
7961             } else {
7962                 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
7963             }
7964         }
7965         return true;
7966     }
7967 
7968     return do_logic_reg(s, a, tcg_gen_or_i64, tcg_gen_orc_i64, false);
7969 }
7970 
7971 TRANS(AND_r, do_logic_reg, a, tcg_gen_and_i64, tcg_gen_andc_i64, false)
7972 TRANS(ANDS_r, do_logic_reg, a, tcg_gen_and_i64, tcg_gen_andc_i64, true)
7973 TRANS(EOR_r, do_logic_reg, a, tcg_gen_xor_i64, tcg_gen_eqv_i64, false)
7974 
7975 static bool do_addsub_ext(DisasContext *s, arg_addsub_ext *a,
7976                           bool sub_op, bool setflags)
7977 {
7978     TCGv_i64 tcg_rm, tcg_rn, tcg_rd, tcg_result;
7979 
7980     if (a->sa > 4) {
7981         return false;
7982     }
7983 
7984     /* non-flag setting ops may use SP */
7985     if (!setflags) {
7986         tcg_rd = cpu_reg_sp(s, a->rd);
7987     } else {
7988         tcg_rd = cpu_reg(s, a->rd);
7989     }
7990     tcg_rn = read_cpu_reg_sp(s, a->rn, a->sf);
7991 
7992     tcg_rm = read_cpu_reg(s, a->rm, a->sf);
7993     ext_and_shift_reg(tcg_rm, tcg_rm, a->st, a->sa);
7994 
7995     tcg_result = tcg_temp_new_i64();
7996     if (!setflags) {
7997         if (sub_op) {
7998             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
7999         } else {
8000             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
8001         }
8002     } else {
8003         if (sub_op) {
8004             gen_sub_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
8005         } else {
8006             gen_add_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
8007         }
8008     }
8009 
8010     if (a->sf) {
8011         tcg_gen_mov_i64(tcg_rd, tcg_result);
8012     } else {
8013         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
8014     }
8015     return true;
8016 }
8017 
8018 TRANS(ADD_ext, do_addsub_ext, a, false, false)
8019 TRANS(SUB_ext, do_addsub_ext, a, true, false)
8020 TRANS(ADDS_ext, do_addsub_ext, a, false, true)
8021 TRANS(SUBS_ext, do_addsub_ext, a, true, true)
8022 
8023 static bool do_addsub_reg(DisasContext *s, arg_addsub_shift *a,
8024                           bool sub_op, bool setflags)
8025 {
8026     TCGv_i64 tcg_rd, tcg_rn, tcg_rm, tcg_result;
8027 
8028     if (a->st == 3 || (!a->sf && (a->sa & 32))) {
8029         return false;
8030     }
8031 
8032     tcg_rd = cpu_reg(s, a->rd);
8033     tcg_rn = read_cpu_reg(s, a->rn, a->sf);
8034     tcg_rm = read_cpu_reg(s, a->rm, a->sf);
8035 
8036     shift_reg_imm(tcg_rm, tcg_rm, a->sf, a->st, a->sa);
8037 
8038     tcg_result = tcg_temp_new_i64();
8039     if (!setflags) {
8040         if (sub_op) {
8041             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
8042         } else {
8043             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
8044         }
8045     } else {
8046         if (sub_op) {
8047             gen_sub_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
8048         } else {
8049             gen_add_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
8050         }
8051     }
8052 
8053     if (a->sf) {
8054         tcg_gen_mov_i64(tcg_rd, tcg_result);
8055     } else {
8056         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
8057     }
8058     return true;
8059 }
8060 
8061 TRANS(ADD_r, do_addsub_reg, a, false, false)
8062 TRANS(SUB_r, do_addsub_reg, a, true, false)
8063 TRANS(ADDS_r, do_addsub_reg, a, false, true)
8064 TRANS(SUBS_r, do_addsub_reg, a, true, true)
8065 
8066 static bool do_mulh(DisasContext *s, arg_rrr *a,
8067                     void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
8068 {
8069     TCGv_i64 discard = tcg_temp_new_i64();
8070     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8071     TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
8072     TCGv_i64 tcg_rm = cpu_reg(s, a->rm);
8073 
8074     fn(discard, tcg_rd, tcg_rn, tcg_rm);
8075     return true;
8076 }
8077 
8078 TRANS(SMULH, do_mulh, a, tcg_gen_muls2_i64)
8079 TRANS(UMULH, do_mulh, a, tcg_gen_mulu2_i64)
8080 
8081 static bool do_muladd(DisasContext *s, arg_rrrr *a,
8082                       bool sf, bool is_sub, MemOp mop)
8083 {
8084     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8085     TCGv_i64 tcg_op1, tcg_op2;
8086 
8087     if (mop == MO_64) {
8088         tcg_op1 = cpu_reg(s, a->rn);
8089         tcg_op2 = cpu_reg(s, a->rm);
8090     } else {
8091         tcg_op1 = tcg_temp_new_i64();
8092         tcg_op2 = tcg_temp_new_i64();
8093         tcg_gen_ext_i64(tcg_op1, cpu_reg(s, a->rn), mop);
8094         tcg_gen_ext_i64(tcg_op2, cpu_reg(s, a->rm), mop);
8095     }
8096 
8097     if (a->ra == 31 && !is_sub) {
8098         /* Special-case MADD with rA == XZR; it is the standard MUL alias */
8099         tcg_gen_mul_i64(tcg_rd, tcg_op1, tcg_op2);
8100     } else {
8101         TCGv_i64 tcg_tmp = tcg_temp_new_i64();
8102         TCGv_i64 tcg_ra = cpu_reg(s, a->ra);
8103 
8104         tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
8105         if (is_sub) {
8106             tcg_gen_sub_i64(tcg_rd, tcg_ra, tcg_tmp);
8107         } else {
8108             tcg_gen_add_i64(tcg_rd, tcg_ra, tcg_tmp);
8109         }
8110     }
8111 
8112     if (!sf) {
8113         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
8114     }
8115     return true;
8116 }
8117 
8118 TRANS(MADD_w, do_muladd, a, false, false, MO_64)
8119 TRANS(MSUB_w, do_muladd, a, false, true, MO_64)
8120 TRANS(MADD_x, do_muladd, a, true, false, MO_64)
8121 TRANS(MSUB_x, do_muladd, a, true, true, MO_64)
8122 
8123 TRANS(SMADDL, do_muladd, a, true, false, MO_SL)
8124 TRANS(SMSUBL, do_muladd, a, true, true, MO_SL)
8125 TRANS(UMADDL, do_muladd, a, true, false, MO_UL)
8126 TRANS(UMSUBL, do_muladd, a, true, true, MO_UL)
8127 
8128 static bool do_adc_sbc(DisasContext *s, arg_rrr_sf *a,
8129                        bool is_sub, bool setflags)
8130 {
8131     TCGv_i64 tcg_y, tcg_rn, tcg_rd;
8132 
8133     tcg_rd = cpu_reg(s, a->rd);
8134     tcg_rn = cpu_reg(s, a->rn);
8135 
8136     if (is_sub) {
8137         tcg_y = tcg_temp_new_i64();
8138         tcg_gen_not_i64(tcg_y, cpu_reg(s, a->rm));
8139     } else {
8140         tcg_y = cpu_reg(s, a->rm);
8141     }
8142 
8143     if (setflags) {
8144         gen_adc_CC(a->sf, tcg_rd, tcg_rn, tcg_y);
8145     } else {
8146         gen_adc(a->sf, tcg_rd, tcg_rn, tcg_y);
8147     }
8148     return true;
8149 }
8150 
8151 TRANS(ADC, do_adc_sbc, a, false, false)
8152 TRANS(SBC, do_adc_sbc, a, true, false)
8153 TRANS(ADCS, do_adc_sbc, a, false, true)
8154 TRANS(SBCS, do_adc_sbc, a, true, true)
8155 
8156 static bool trans_RMIF(DisasContext *s, arg_RMIF *a)
8157 {
8158     int mask = a->mask;
8159     TCGv_i64 tcg_rn;
8160     TCGv_i32 nzcv;
8161 
8162     if (!dc_isar_feature(aa64_condm_4, s)) {
8163         return false;
8164     }
8165 
8166     tcg_rn = read_cpu_reg(s, a->rn, 1);
8167     tcg_gen_rotri_i64(tcg_rn, tcg_rn, a->imm);
8168 
8169     nzcv = tcg_temp_new_i32();
8170     tcg_gen_extrl_i64_i32(nzcv, tcg_rn);
8171 
8172     if (mask & 8) { /* N */
8173         tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3);
8174     }
8175     if (mask & 4) { /* Z */
8176         tcg_gen_not_i32(cpu_ZF, nzcv);
8177         tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4);
8178     }
8179     if (mask & 2) { /* C */
8180         tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1);
8181     }
8182     if (mask & 1) { /* V */
8183         tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0);
8184     }
8185     return true;
8186 }
8187 
8188 static bool do_setf(DisasContext *s, int rn, int shift)
8189 {
8190     TCGv_i32 tmp = tcg_temp_new_i32();
8191 
8192     tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn));
8193     tcg_gen_shli_i32(cpu_NF, tmp, shift);
8194     tcg_gen_shli_i32(cpu_VF, tmp, shift - 1);
8195     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
8196     tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF);
8197     return true;
8198 }
8199 
8200 TRANS_FEAT(SETF8, aa64_condm_4, do_setf, a->rn, 24)
8201 TRANS_FEAT(SETF16, aa64_condm_4, do_setf, a->rn, 16)
8202 
8203 /* CCMP, CCMN */
8204 static bool trans_CCMP(DisasContext *s, arg_CCMP *a)
8205 {
8206     TCGv_i32 tcg_t0 = tcg_temp_new_i32();
8207     TCGv_i32 tcg_t1 = tcg_temp_new_i32();
8208     TCGv_i32 tcg_t2 = tcg_temp_new_i32();
8209     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
8210     TCGv_i64 tcg_rn, tcg_y;
8211     DisasCompare c;
8212     unsigned nzcv;
8213 
8214     /* Set T0 = !COND.  */
8215     arm_test_cc(&c, a->cond);
8216     tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
8217 
8218     /* Load the arguments for the new comparison.  */
8219     if (a->imm) {
8220         tcg_y = tcg_constant_i64(a->y);
8221     } else {
8222         tcg_y = cpu_reg(s, a->y);
8223     }
8224     tcg_rn = cpu_reg(s, a->rn);
8225 
8226     /* Set the flags for the new comparison.  */
8227     if (a->op) {
8228         gen_sub_CC(a->sf, tcg_tmp, tcg_rn, tcg_y);
8229     } else {
8230         gen_add_CC(a->sf, tcg_tmp, tcg_rn, tcg_y);
8231     }
8232 
8233     /*
8234      * If COND was false, force the flags to #nzcv.  Compute two masks
8235      * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
8236      * For tcg hosts that support ANDC, we can make do with just T1.
8237      * In either case, allow the tcg optimizer to delete any unused mask.
8238      */
8239     tcg_gen_neg_i32(tcg_t1, tcg_t0);
8240     tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
8241 
8242     nzcv = a->nzcv;
8243     if (nzcv & 8) { /* N */
8244         tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
8245     } else {
8246         if (TCG_TARGET_HAS_andc_i32) {
8247             tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
8248         } else {
8249             tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
8250         }
8251     }
8252     if (nzcv & 4) { /* Z */
8253         if (TCG_TARGET_HAS_andc_i32) {
8254             tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
8255         } else {
8256             tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
8257         }
8258     } else {
8259         tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
8260     }
8261     if (nzcv & 2) { /* C */
8262         tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
8263     } else {
8264         if (TCG_TARGET_HAS_andc_i32) {
8265             tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
8266         } else {
8267             tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
8268         }
8269     }
8270     if (nzcv & 1) { /* V */
8271         tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
8272     } else {
8273         if (TCG_TARGET_HAS_andc_i32) {
8274             tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
8275         } else {
8276             tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
8277         }
8278     }
8279     return true;
8280 }
8281 
8282 static bool trans_CSEL(DisasContext *s, arg_CSEL *a)
8283 {
8284     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8285     TCGv_i64 zero = tcg_constant_i64(0);
8286     DisasCompare64 c;
8287 
8288     a64_test_cc(&c, a->cond);
8289 
8290     if (a->rn == 31 && a->rm == 31 && (a->else_inc ^ a->else_inv)) {
8291         /* CSET & CSETM.  */
8292         if (a->else_inv) {
8293             tcg_gen_negsetcond_i64(tcg_invert_cond(c.cond),
8294                                    tcg_rd, c.value, zero);
8295         } else {
8296             tcg_gen_setcond_i64(tcg_invert_cond(c.cond),
8297                                 tcg_rd, c.value, zero);
8298         }
8299     } else {
8300         TCGv_i64 t_true = cpu_reg(s, a->rn);
8301         TCGv_i64 t_false = read_cpu_reg(s, a->rm, 1);
8302 
8303         if (a->else_inv && a->else_inc) {
8304             tcg_gen_neg_i64(t_false, t_false);
8305         } else if (a->else_inv) {
8306             tcg_gen_not_i64(t_false, t_false);
8307         } else if (a->else_inc) {
8308             tcg_gen_addi_i64(t_false, t_false, 1);
8309         }
8310         tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
8311     }
8312 
8313     if (!a->sf) {
8314         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
8315     }
8316     return true;
8317 }
8318 
8319 typedef struct FPScalar1Int {
8320     void (*gen_h)(TCGv_i32, TCGv_i32);
8321     void (*gen_s)(TCGv_i32, TCGv_i32);
8322     void (*gen_d)(TCGv_i64, TCGv_i64);
8323 } FPScalar1Int;
8324 
8325 static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a,
8326                               const FPScalar1Int *f)
8327 {
8328     switch (a->esz) {
8329     case MO_64:
8330         if (fp_access_check(s)) {
8331             TCGv_i64 t = read_fp_dreg(s, a->rn);
8332             f->gen_d(t, t);
8333             write_fp_dreg(s, a->rd, t);
8334         }
8335         break;
8336     case MO_32:
8337         if (fp_access_check(s)) {
8338             TCGv_i32 t = read_fp_sreg(s, a->rn);
8339             f->gen_s(t, t);
8340             write_fp_sreg(s, a->rd, t);
8341         }
8342         break;
8343     case MO_16:
8344         if (!dc_isar_feature(aa64_fp16, s)) {
8345             return false;
8346         }
8347         if (fp_access_check(s)) {
8348             TCGv_i32 t = read_fp_hreg(s, a->rn);
8349             f->gen_h(t, t);
8350             write_fp_sreg(s, a->rd, t);
8351         }
8352         break;
8353     default:
8354         return false;
8355     }
8356     return true;
8357 }
8358 
8359 static const FPScalar1Int f_scalar_fmov = {
8360     tcg_gen_mov_i32,
8361     tcg_gen_mov_i32,
8362     tcg_gen_mov_i64,
8363 };
8364 TRANS(FMOV_s, do_fp1_scalar_int, a, &f_scalar_fmov)
8365 
8366 static const FPScalar1Int f_scalar_fabs = {
8367     gen_vfp_absh,
8368     gen_vfp_abss,
8369     gen_vfp_absd,
8370 };
8371 TRANS(FABS_s, do_fp1_scalar_int, a, &f_scalar_fabs)
8372 
8373 static const FPScalar1Int f_scalar_fneg = {
8374     gen_vfp_negh,
8375     gen_vfp_negs,
8376     gen_vfp_negd,
8377 };
8378 TRANS(FNEG_s, do_fp1_scalar_int, a, &f_scalar_fneg)
8379 
8380 typedef struct FPScalar1 {
8381     void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_ptr);
8382     void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_ptr);
8383     void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_ptr);
8384 } FPScalar1;
8385 
8386 static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a,
8387                           const FPScalar1 *f, int rmode)
8388 {
8389     TCGv_i32 tcg_rmode = NULL;
8390     TCGv_ptr fpst;
8391     TCGv_i64 t64;
8392     TCGv_i32 t32;
8393     int check = fp_access_check_scalar_hsd(s, a->esz);
8394 
8395     if (check <= 0) {
8396         return check == 0;
8397     }
8398 
8399     fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8400     if (rmode >= 0) {
8401         tcg_rmode = gen_set_rmode(rmode, fpst);
8402     }
8403 
8404     switch (a->esz) {
8405     case MO_64:
8406         t64 = read_fp_dreg(s, a->rn);
8407         f->gen_d(t64, t64, fpst);
8408         write_fp_dreg(s, a->rd, t64);
8409         break;
8410     case MO_32:
8411         t32 = read_fp_sreg(s, a->rn);
8412         f->gen_s(t32, t32, fpst);
8413         write_fp_sreg(s, a->rd, t32);
8414         break;
8415     case MO_16:
8416         t32 = read_fp_hreg(s, a->rn);
8417         f->gen_h(t32, t32, fpst);
8418         write_fp_sreg(s, a->rd, t32);
8419         break;
8420     default:
8421         g_assert_not_reached();
8422     }
8423 
8424     if (rmode >= 0) {
8425         gen_restore_rmode(tcg_rmode, fpst);
8426     }
8427     return true;
8428 }
8429 
8430 static const FPScalar1 f_scalar_fsqrt = {
8431     gen_helper_vfp_sqrth,
8432     gen_helper_vfp_sqrts,
8433     gen_helper_vfp_sqrtd,
8434 };
8435 TRANS(FSQRT_s, do_fp1_scalar, a, &f_scalar_fsqrt, -1)
8436 
8437 static const FPScalar1 f_scalar_frint = {
8438     gen_helper_advsimd_rinth,
8439     gen_helper_rints,
8440     gen_helper_rintd,
8441 };
8442 TRANS(FRINTN_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_TIEEVEN)
8443 TRANS(FRINTP_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_POSINF)
8444 TRANS(FRINTM_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_NEGINF)
8445 TRANS(FRINTZ_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_ZERO)
8446 TRANS(FRINTA_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_TIEAWAY)
8447 TRANS(FRINTI_s, do_fp1_scalar, a, &f_scalar_frint, -1)
8448 
8449 static const FPScalar1 f_scalar_frintx = {
8450     gen_helper_advsimd_rinth_exact,
8451     gen_helper_rints_exact,
8452     gen_helper_rintd_exact,
8453 };
8454 TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1)
8455 
8456 static const FPScalar1 f_scalar_bfcvt = {
8457     .gen_s = gen_helper_bfcvt,
8458 };
8459 TRANS_FEAT(BFCVT_s, aa64_bf16, do_fp1_scalar, a, &f_scalar_bfcvt, -1)
8460 
8461 static const FPScalar1 f_scalar_frint32 = {
8462     NULL,
8463     gen_helper_frint32_s,
8464     gen_helper_frint32_d,
8465 };
8466 TRANS_FEAT(FRINT32Z_s, aa64_frint, do_fp1_scalar, a,
8467            &f_scalar_frint32, FPROUNDING_ZERO)
8468 TRANS_FEAT(FRINT32X_s, aa64_frint, do_fp1_scalar, a, &f_scalar_frint32, -1)
8469 
8470 static const FPScalar1 f_scalar_frint64 = {
8471     NULL,
8472     gen_helper_frint64_s,
8473     gen_helper_frint64_d,
8474 };
8475 TRANS_FEAT(FRINT64Z_s, aa64_frint, do_fp1_scalar, a,
8476            &f_scalar_frint64, FPROUNDING_ZERO)
8477 TRANS_FEAT(FRINT64X_s, aa64_frint, do_fp1_scalar, a, &f_scalar_frint64, -1)
8478 
8479 static const FPScalar1 f_scalar_frecpe = {
8480     gen_helper_recpe_f16,
8481     gen_helper_recpe_f32,
8482     gen_helper_recpe_f64,
8483 };
8484 TRANS(FRECPE_s, do_fp1_scalar, a, &f_scalar_frecpe, -1)
8485 
8486 static const FPScalar1 f_scalar_frecpx = {
8487     gen_helper_frecpx_f16,
8488     gen_helper_frecpx_f32,
8489     gen_helper_frecpx_f64,
8490 };
8491 TRANS(FRECPX_s, do_fp1_scalar, a, &f_scalar_frecpx, -1)
8492 
8493 static const FPScalar1 f_scalar_frsqrte = {
8494     gen_helper_rsqrte_f16,
8495     gen_helper_rsqrte_f32,
8496     gen_helper_rsqrte_f64,
8497 };
8498 TRANS(FRSQRTE_s, do_fp1_scalar, a, &f_scalar_frsqrte, -1)
8499 
8500 static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a)
8501 {
8502     if (fp_access_check(s)) {
8503         TCGv_i32 tcg_rn = read_fp_sreg(s, a->rn);
8504         TCGv_i64 tcg_rd = tcg_temp_new_i64();
8505 
8506         gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, tcg_env);
8507         write_fp_dreg(s, a->rd, tcg_rd);
8508     }
8509     return true;
8510 }
8511 
8512 static bool trans_FCVT_s_hs(DisasContext *s, arg_rr *a)
8513 {
8514     if (fp_access_check(s)) {
8515         TCGv_i32 tmp = read_fp_sreg(s, a->rn);
8516         TCGv_i32 ahp = get_ahp_flag();
8517         TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
8518 
8519         gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
8520         /* write_fp_sreg is OK here because top half of result is zero */
8521         write_fp_sreg(s, a->rd, tmp);
8522     }
8523     return true;
8524 }
8525 
8526 static bool trans_FCVT_s_sd(DisasContext *s, arg_rr *a)
8527 {
8528     if (fp_access_check(s)) {
8529         TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn);
8530         TCGv_i32 tcg_rd = tcg_temp_new_i32();
8531 
8532         gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, tcg_env);
8533         write_fp_sreg(s, a->rd, tcg_rd);
8534     }
8535     return true;
8536 }
8537 
8538 static bool trans_FCVT_s_hd(DisasContext *s, arg_rr *a)
8539 {
8540     if (fp_access_check(s)) {
8541         TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn);
8542         TCGv_i32 tcg_rd = tcg_temp_new_i32();
8543         TCGv_i32 ahp = get_ahp_flag();
8544         TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
8545 
8546         gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
8547         /* write_fp_sreg is OK here because top half of tcg_rd is zero */
8548         write_fp_sreg(s, a->rd, tcg_rd);
8549     }
8550     return true;
8551 }
8552 
8553 static bool trans_FCVT_s_sh(DisasContext *s, arg_rr *a)
8554 {
8555     if (fp_access_check(s)) {
8556         TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn);
8557         TCGv_i32 tcg_rd = tcg_temp_new_i32();
8558         TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR);
8559         TCGv_i32 tcg_ahp = get_ahp_flag();
8560 
8561         gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
8562         write_fp_sreg(s, a->rd, tcg_rd);
8563     }
8564     return true;
8565 }
8566 
8567 static bool trans_FCVT_s_dh(DisasContext *s, arg_rr *a)
8568 {
8569     if (fp_access_check(s)) {
8570         TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn);
8571         TCGv_i64 tcg_rd = tcg_temp_new_i64();
8572         TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR);
8573         TCGv_i32 tcg_ahp = get_ahp_flag();
8574 
8575         gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
8576         write_fp_dreg(s, a->rd, tcg_rd);
8577     }
8578     return true;
8579 }
8580 
8581 static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift,
8582                            TCGv_i64 tcg_int, bool is_signed)
8583 {
8584     TCGv_ptr tcg_fpstatus;
8585     TCGv_i32 tcg_shift, tcg_single;
8586     TCGv_i64 tcg_double;
8587 
8588     tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8589     tcg_shift = tcg_constant_i32(shift);
8590 
8591     switch (esz) {
8592     case MO_64:
8593         tcg_double = tcg_temp_new_i64();
8594         if (is_signed) {
8595             gen_helper_vfp_sqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus);
8596         } else {
8597             gen_helper_vfp_uqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus);
8598         }
8599         write_fp_dreg(s, rd, tcg_double);
8600         break;
8601 
8602     case MO_32:
8603         tcg_single = tcg_temp_new_i32();
8604         if (is_signed) {
8605             gen_helper_vfp_sqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
8606         } else {
8607             gen_helper_vfp_uqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
8608         }
8609         write_fp_sreg(s, rd, tcg_single);
8610         break;
8611 
8612     case MO_16:
8613         tcg_single = tcg_temp_new_i32();
8614         if (is_signed) {
8615             gen_helper_vfp_sqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
8616         } else {
8617             gen_helper_vfp_uqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
8618         }
8619         write_fp_sreg(s, rd, tcg_single);
8620         break;
8621 
8622     default:
8623         g_assert_not_reached();
8624     }
8625     return true;
8626 }
8627 
8628 static bool do_cvtf_g(DisasContext *s, arg_fcvt *a, bool is_signed)
8629 {
8630     TCGv_i64 tcg_int;
8631     int check = fp_access_check_scalar_hsd(s, a->esz);
8632 
8633     if (check <= 0) {
8634         return check == 0;
8635     }
8636 
8637     if (a->sf) {
8638         tcg_int = cpu_reg(s, a->rn);
8639     } else {
8640         tcg_int = read_cpu_reg(s, a->rn, true);
8641         if (is_signed) {
8642             tcg_gen_ext32s_i64(tcg_int, tcg_int);
8643         } else {
8644             tcg_gen_ext32u_i64(tcg_int, tcg_int);
8645         }
8646     }
8647     return do_cvtf_scalar(s, a->esz, a->rd, a->shift, tcg_int, is_signed);
8648 }
8649 
8650 TRANS(SCVTF_g, do_cvtf_g, a, true)
8651 TRANS(UCVTF_g, do_cvtf_g, a, false)
8652 
8653 /*
8654  * [US]CVTF (vector), scalar version.
8655  * Which sounds weird, but really just means input from fp register
8656  * instead of input from general register.  Input and output element
8657  * size are always equal.
8658  */
8659 static bool do_cvtf_f(DisasContext *s, arg_fcvt *a, bool is_signed)
8660 {
8661     TCGv_i64 tcg_int;
8662     int check = fp_access_check_scalar_hsd(s, a->esz);
8663 
8664     if (check <= 0) {
8665         return check == 0;
8666     }
8667 
8668     tcg_int = tcg_temp_new_i64();
8669     read_vec_element(s, tcg_int, a->rn, 0, a->esz | (is_signed ? MO_SIGN : 0));
8670     return do_cvtf_scalar(s, a->esz, a->rd, a->shift, tcg_int, is_signed);
8671 }
8672 
8673 TRANS(SCVTF_f, do_cvtf_f, a, true)
8674 TRANS(UCVTF_f, do_cvtf_f, a, false)
8675 
8676 static void do_fcvt_scalar(DisasContext *s, MemOp out, MemOp esz,
8677                            TCGv_i64 tcg_out, int shift, int rn,
8678                            ARMFPRounding rmode)
8679 {
8680     TCGv_ptr tcg_fpstatus;
8681     TCGv_i32 tcg_shift, tcg_rmode, tcg_single;
8682 
8683     tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8684     tcg_shift = tcg_constant_i32(shift);
8685     tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
8686 
8687     switch (esz) {
8688     case MO_64:
8689         read_vec_element(s, tcg_out, rn, 0, MO_64);
8690         switch (out) {
8691         case MO_64 | MO_SIGN:
8692             gen_helper_vfp_tosqd(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
8693             break;
8694         case MO_64:
8695             gen_helper_vfp_touqd(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
8696             break;
8697         case MO_32 | MO_SIGN:
8698             gen_helper_vfp_tosld(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
8699             break;
8700         case MO_32:
8701             gen_helper_vfp_tould(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
8702             break;
8703         default:
8704             g_assert_not_reached();
8705         }
8706         break;
8707 
8708     case MO_32:
8709         tcg_single = read_fp_sreg(s, rn);
8710         switch (out) {
8711         case MO_64 | MO_SIGN:
8712             gen_helper_vfp_tosqs(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
8713             break;
8714         case MO_64:
8715             gen_helper_vfp_touqs(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
8716             break;
8717         case MO_32 | MO_SIGN:
8718             gen_helper_vfp_tosls(tcg_single, tcg_single,
8719                                  tcg_shift, tcg_fpstatus);
8720             tcg_gen_extu_i32_i64(tcg_out, tcg_single);
8721             break;
8722         case MO_32:
8723             gen_helper_vfp_touls(tcg_single, tcg_single,
8724                                  tcg_shift, tcg_fpstatus);
8725             tcg_gen_extu_i32_i64(tcg_out, tcg_single);
8726             break;
8727         default:
8728             g_assert_not_reached();
8729         }
8730         break;
8731 
8732     case MO_16:
8733         tcg_single = read_fp_hreg(s, rn);
8734         switch (out) {
8735         case MO_64 | MO_SIGN:
8736             gen_helper_vfp_tosqh(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
8737             break;
8738         case MO_64:
8739             gen_helper_vfp_touqh(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
8740             break;
8741         case MO_32 | MO_SIGN:
8742             gen_helper_vfp_toslh(tcg_single, tcg_single,
8743                                  tcg_shift, tcg_fpstatus);
8744             tcg_gen_extu_i32_i64(tcg_out, tcg_single);
8745             break;
8746         case MO_32:
8747             gen_helper_vfp_toulh(tcg_single, tcg_single,
8748                                  tcg_shift, tcg_fpstatus);
8749             tcg_gen_extu_i32_i64(tcg_out, tcg_single);
8750             break;
8751         case MO_16 | MO_SIGN:
8752             gen_helper_vfp_toshh(tcg_single, tcg_single,
8753                                  tcg_shift, tcg_fpstatus);
8754             tcg_gen_extu_i32_i64(tcg_out, tcg_single);
8755             break;
8756         case MO_16:
8757             gen_helper_vfp_touhh(tcg_single, tcg_single,
8758                                  tcg_shift, tcg_fpstatus);
8759             tcg_gen_extu_i32_i64(tcg_out, tcg_single);
8760             break;
8761         default:
8762             g_assert_not_reached();
8763         }
8764         break;
8765 
8766     default:
8767         g_assert_not_reached();
8768     }
8769 
8770     gen_restore_rmode(tcg_rmode, tcg_fpstatus);
8771 }
8772 
8773 static bool do_fcvt_g(DisasContext *s, arg_fcvt *a,
8774                       ARMFPRounding rmode, bool is_signed)
8775 {
8776     TCGv_i64 tcg_int;
8777     int check = fp_access_check_scalar_hsd(s, a->esz);
8778 
8779     if (check <= 0) {
8780         return check == 0;
8781     }
8782 
8783     tcg_int = cpu_reg(s, a->rd);
8784     do_fcvt_scalar(s, (a->sf ? MO_64 : MO_32) | (is_signed ? MO_SIGN : 0),
8785                    a->esz, tcg_int, a->shift, a->rn, rmode);
8786 
8787     if (!a->sf) {
8788         tcg_gen_ext32u_i64(tcg_int, tcg_int);
8789     }
8790     return true;
8791 }
8792 
8793 TRANS(FCVTNS_g, do_fcvt_g, a, FPROUNDING_TIEEVEN, true)
8794 TRANS(FCVTNU_g, do_fcvt_g, a, FPROUNDING_TIEEVEN, false)
8795 TRANS(FCVTPS_g, do_fcvt_g, a, FPROUNDING_POSINF, true)
8796 TRANS(FCVTPU_g, do_fcvt_g, a, FPROUNDING_POSINF, false)
8797 TRANS(FCVTMS_g, do_fcvt_g, a, FPROUNDING_NEGINF, true)
8798 TRANS(FCVTMU_g, do_fcvt_g, a, FPROUNDING_NEGINF, false)
8799 TRANS(FCVTZS_g, do_fcvt_g, a, FPROUNDING_ZERO, true)
8800 TRANS(FCVTZU_g, do_fcvt_g, a, FPROUNDING_ZERO, false)
8801 TRANS(FCVTAS_g, do_fcvt_g, a, FPROUNDING_TIEAWAY, true)
8802 TRANS(FCVTAU_g, do_fcvt_g, a, FPROUNDING_TIEAWAY, false)
8803 
8804 /*
8805  * FCVT* (vector), scalar version.
8806  * Which sounds weird, but really just means output to fp register
8807  * instead of output to general register.  Input and output element
8808  * size are always equal.
8809  */
8810 static bool do_fcvt_f(DisasContext *s, arg_fcvt *a,
8811                       ARMFPRounding rmode, bool is_signed)
8812 {
8813     TCGv_i64 tcg_int;
8814     int check = fp_access_check_scalar_hsd(s, a->esz);
8815 
8816     if (check <= 0) {
8817         return check == 0;
8818     }
8819 
8820     tcg_int = tcg_temp_new_i64();
8821     do_fcvt_scalar(s, a->esz | (is_signed ? MO_SIGN : 0),
8822                    a->esz, tcg_int, a->shift, a->rn, rmode);
8823 
8824     clear_vec(s, a->rd);
8825     write_vec_element(s, tcg_int, a->rd, 0, a->esz);
8826     return true;
8827 }
8828 
8829 TRANS(FCVTNS_f, do_fcvt_f, a, FPROUNDING_TIEEVEN, true)
8830 TRANS(FCVTNU_f, do_fcvt_f, a, FPROUNDING_TIEEVEN, false)
8831 TRANS(FCVTPS_f, do_fcvt_f, a, FPROUNDING_POSINF, true)
8832 TRANS(FCVTPU_f, do_fcvt_f, a, FPROUNDING_POSINF, false)
8833 TRANS(FCVTMS_f, do_fcvt_f, a, FPROUNDING_NEGINF, true)
8834 TRANS(FCVTMU_f, do_fcvt_f, a, FPROUNDING_NEGINF, false)
8835 TRANS(FCVTZS_f, do_fcvt_f, a, FPROUNDING_ZERO, true)
8836 TRANS(FCVTZU_f, do_fcvt_f, a, FPROUNDING_ZERO, false)
8837 TRANS(FCVTAS_f, do_fcvt_f, a, FPROUNDING_TIEAWAY, true)
8838 TRANS(FCVTAU_f, do_fcvt_f, a, FPROUNDING_TIEAWAY, false)
8839 
8840 static bool trans_FJCVTZS(DisasContext *s, arg_FJCVTZS *a)
8841 {
8842     if (!dc_isar_feature(aa64_jscvt, s)) {
8843         return false;
8844     }
8845     if (fp_access_check(s)) {
8846         TCGv_i64 t = read_fp_dreg(s, a->rn);
8847         TCGv_ptr fpstatus = fpstatus_ptr(FPST_FPCR);
8848 
8849         gen_helper_fjcvtzs(t, t, fpstatus);
8850 
8851         tcg_gen_ext32u_i64(cpu_reg(s, a->rd), t);
8852         tcg_gen_extrh_i64_i32(cpu_ZF, t);
8853         tcg_gen_movi_i32(cpu_CF, 0);
8854         tcg_gen_movi_i32(cpu_NF, 0);
8855         tcg_gen_movi_i32(cpu_VF, 0);
8856     }
8857     return true;
8858 }
8859 
8860 static bool trans_FMOV_hx(DisasContext *s, arg_rr *a)
8861 {
8862     if (!dc_isar_feature(aa64_fp16, s)) {
8863         return false;
8864     }
8865     if (fp_access_check(s)) {
8866         TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
8867         TCGv_i64 tmp = tcg_temp_new_i64();
8868         tcg_gen_ext16u_i64(tmp, tcg_rn);
8869         write_fp_dreg(s, a->rd, tmp);
8870     }
8871     return true;
8872 }
8873 
8874 static bool trans_FMOV_sw(DisasContext *s, arg_rr *a)
8875 {
8876     if (fp_access_check(s)) {
8877         TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
8878         TCGv_i64 tmp = tcg_temp_new_i64();
8879         tcg_gen_ext32u_i64(tmp, tcg_rn);
8880         write_fp_dreg(s, a->rd, tmp);
8881     }
8882     return true;
8883 }
8884 
8885 static bool trans_FMOV_dx(DisasContext *s, arg_rr *a)
8886 {
8887     if (fp_access_check(s)) {
8888         TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
8889         write_fp_dreg(s, a->rd, tcg_rn);
8890     }
8891     return true;
8892 }
8893 
8894 static bool trans_FMOV_ux(DisasContext *s, arg_rr *a)
8895 {
8896     if (fp_access_check(s)) {
8897         TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
8898         tcg_gen_st_i64(tcg_rn, tcg_env, fp_reg_hi_offset(s, a->rd));
8899         clear_vec_high(s, true, a->rd);
8900     }
8901     return true;
8902 }
8903 
8904 static bool trans_FMOV_xh(DisasContext *s, arg_rr *a)
8905 {
8906     if (!dc_isar_feature(aa64_fp16, s)) {
8907         return false;
8908     }
8909     if (fp_access_check(s)) {
8910         TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8911         tcg_gen_ld16u_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_16));
8912     }
8913     return true;
8914 }
8915 
8916 static bool trans_FMOV_ws(DisasContext *s, arg_rr *a)
8917 {
8918     if (fp_access_check(s)) {
8919         TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8920         tcg_gen_ld32u_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_32));
8921     }
8922     return true;
8923 }
8924 
8925 static bool trans_FMOV_xd(DisasContext *s, arg_rr *a)
8926 {
8927     if (fp_access_check(s)) {
8928         TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8929         tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_64));
8930     }
8931     return true;
8932 }
8933 
8934 static bool trans_FMOV_xu(DisasContext *s, arg_rr *a)
8935 {
8936     if (fp_access_check(s)) {
8937         TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8938         tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_hi_offset(s, a->rn));
8939     }
8940     return true;
8941 }
8942 
8943 typedef struct ENVScalar1 {
8944     NeonGenOneOpEnvFn *gen_bhs[3];
8945     NeonGenOne64OpEnvFn *gen_d;
8946 } ENVScalar1;
8947 
8948 static bool do_env_scalar1(DisasContext *s, arg_rr_e *a, const ENVScalar1 *f)
8949 {
8950     if (!fp_access_check(s)) {
8951         return true;
8952     }
8953     if (a->esz == MO_64) {
8954         TCGv_i64 t = read_fp_dreg(s, a->rn);
8955         f->gen_d(t, tcg_env, t);
8956         write_fp_dreg(s, a->rd, t);
8957     } else {
8958         TCGv_i32 t = tcg_temp_new_i32();
8959 
8960         read_vec_element_i32(s, t, a->rn, 0, a->esz);
8961         f->gen_bhs[a->esz](t, tcg_env, t);
8962         write_fp_sreg(s, a->rd, t);
8963     }
8964     return true;
8965 }
8966 
8967 static bool do_env_vector1(DisasContext *s, arg_qrr_e *a, const ENVScalar1 *f)
8968 {
8969     if (a->esz == MO_64 && !a->q) {
8970         return false;
8971     }
8972     if (!fp_access_check(s)) {
8973         return true;
8974     }
8975     if (a->esz == MO_64) {
8976         TCGv_i64 t = tcg_temp_new_i64();
8977 
8978         for (int i = 0; i < 2; ++i) {
8979             read_vec_element(s, t, a->rn, i, MO_64);
8980             f->gen_d(t, tcg_env, t);
8981             write_vec_element(s, t, a->rd, i, MO_64);
8982         }
8983     } else {
8984         TCGv_i32 t = tcg_temp_new_i32();
8985         int n = (a->q ? 16 : 8) >> a->esz;
8986 
8987         for (int i = 0; i < n; ++i) {
8988             read_vec_element_i32(s, t, a->rn, i, a->esz);
8989             f->gen_bhs[a->esz](t, tcg_env, t);
8990             write_vec_element_i32(s, t, a->rd, i, a->esz);
8991         }
8992     }
8993     clear_vec_high(s, a->q, a->rd);
8994     return true;
8995 }
8996 
8997 static const ENVScalar1 f_scalar_sqabs = {
8998     { gen_helper_neon_qabs_s8,
8999       gen_helper_neon_qabs_s16,
9000       gen_helper_neon_qabs_s32 },
9001     gen_helper_neon_qabs_s64,
9002 };
9003 TRANS(SQABS_s, do_env_scalar1, a, &f_scalar_sqabs)
9004 TRANS(SQABS_v, do_env_vector1, a, &f_scalar_sqabs)
9005 
9006 static const ENVScalar1 f_scalar_sqneg = {
9007     { gen_helper_neon_qneg_s8,
9008       gen_helper_neon_qneg_s16,
9009       gen_helper_neon_qneg_s32 },
9010     gen_helper_neon_qneg_s64,
9011 };
9012 TRANS(SQNEG_s, do_env_scalar1, a, &f_scalar_sqneg)
9013 TRANS(SQNEG_v, do_env_vector1, a, &f_scalar_sqneg)
9014 
9015 static bool do_scalar1_d(DisasContext *s, arg_rr *a, ArithOneOp *f)
9016 {
9017     if (fp_access_check(s)) {
9018         TCGv_i64 t = read_fp_dreg(s, a->rn);
9019         f(t, t);
9020         write_fp_dreg(s, a->rd, t);
9021     }
9022     return true;
9023 }
9024 
9025 TRANS(ABS_s, do_scalar1_d, a, tcg_gen_abs_i64)
9026 TRANS(NEG_s, do_scalar1_d, a, tcg_gen_neg_i64)
9027 
9028 static bool do_cmop0_d(DisasContext *s, arg_rr *a, TCGCond cond)
9029 {
9030     if (fp_access_check(s)) {
9031         TCGv_i64 t = read_fp_dreg(s, a->rn);
9032         tcg_gen_negsetcond_i64(cond, t, t, tcg_constant_i64(0));
9033         write_fp_dreg(s, a->rd, t);
9034     }
9035     return true;
9036 }
9037 
9038 TRANS(CMGT0_s, do_cmop0_d, a, TCG_COND_GT)
9039 TRANS(CMGE0_s, do_cmop0_d, a, TCG_COND_GE)
9040 TRANS(CMLE0_s, do_cmop0_d, a, TCG_COND_LE)
9041 TRANS(CMLT0_s, do_cmop0_d, a, TCG_COND_LT)
9042 TRANS(CMEQ0_s, do_cmop0_d, a, TCG_COND_EQ)
9043 
9044 static bool do_2misc_narrow_scalar(DisasContext *s, arg_rr_e *a,
9045                                    ArithOneOp * const fn[3])
9046 {
9047     if (a->esz == MO_64) {
9048         return false;
9049     }
9050     if (fp_access_check(s)) {
9051         TCGv_i64 t = tcg_temp_new_i64();
9052 
9053         read_vec_element(s, t, a->rn, 0, a->esz + 1);
9054         fn[a->esz](t, t);
9055         clear_vec(s, a->rd);
9056         write_vec_element(s, t, a->rd, 0, a->esz);
9057     }
9058     return true;
9059 }
9060 
9061 #define WRAP_ENV(NAME) \
9062     static void gen_##NAME(TCGv_i64 d, TCGv_i64 n) \
9063     { gen_helper_##NAME(d, tcg_env, n); }
9064 
9065 WRAP_ENV(neon_unarrow_sat8)
9066 WRAP_ENV(neon_unarrow_sat16)
9067 WRAP_ENV(neon_unarrow_sat32)
9068 
9069 static ArithOneOp * const f_scalar_sqxtun[] = {
9070     gen_neon_unarrow_sat8,
9071     gen_neon_unarrow_sat16,
9072     gen_neon_unarrow_sat32,
9073 };
9074 TRANS(SQXTUN_s, do_2misc_narrow_scalar, a, f_scalar_sqxtun)
9075 
9076 WRAP_ENV(neon_narrow_sat_s8)
9077 WRAP_ENV(neon_narrow_sat_s16)
9078 WRAP_ENV(neon_narrow_sat_s32)
9079 
9080 static ArithOneOp * const f_scalar_sqxtn[] = {
9081     gen_neon_narrow_sat_s8,
9082     gen_neon_narrow_sat_s16,
9083     gen_neon_narrow_sat_s32,
9084 };
9085 TRANS(SQXTN_s, do_2misc_narrow_scalar, a, f_scalar_sqxtn)
9086 
9087 WRAP_ENV(neon_narrow_sat_u8)
9088 WRAP_ENV(neon_narrow_sat_u16)
9089 WRAP_ENV(neon_narrow_sat_u32)
9090 
9091 static ArithOneOp * const f_scalar_uqxtn[] = {
9092     gen_neon_narrow_sat_u8,
9093     gen_neon_narrow_sat_u16,
9094     gen_neon_narrow_sat_u32,
9095 };
9096 TRANS(UQXTN_s, do_2misc_narrow_scalar, a, f_scalar_uqxtn)
9097 
9098 static void gen_fcvtxn_sd(TCGv_i64 d, TCGv_i64 n)
9099 {
9100     /*
9101      * 64 bit to 32 bit float conversion
9102      * with von Neumann rounding (round to odd)
9103      */
9104     TCGv_i32 tmp = tcg_temp_new_i32();
9105     gen_helper_fcvtx_f64_to_f32(tmp, n, tcg_env);
9106     tcg_gen_extu_i32_i64(d, tmp);
9107 }
9108 
9109 static ArithOneOp * const f_scalar_fcvtxn[] = {
9110     NULL,
9111     NULL,
9112     gen_fcvtxn_sd,
9113 };
9114 TRANS(FCVTXN_s, do_2misc_narrow_scalar, a, f_scalar_fcvtxn)
9115 
9116 #undef WRAP_ENV
9117 
9118 static bool do_gvec_fn2(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn)
9119 {
9120     if (!a->q && a->esz == MO_64) {
9121         return false;
9122     }
9123     if (fp_access_check(s)) {
9124         gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz);
9125     }
9126     return true;
9127 }
9128 
9129 TRANS(ABS_v, do_gvec_fn2, a, tcg_gen_gvec_abs)
9130 TRANS(NEG_v, do_gvec_fn2, a, tcg_gen_gvec_neg)
9131 TRANS(NOT_v, do_gvec_fn2, a, tcg_gen_gvec_not)
9132 TRANS(CNT_v, do_gvec_fn2, a, gen_gvec_cnt)
9133 TRANS(RBIT_v, do_gvec_fn2, a, gen_gvec_rbit)
9134 TRANS(CMGT0_v, do_gvec_fn2, a, gen_gvec_cgt0)
9135 TRANS(CMGE0_v, do_gvec_fn2, a, gen_gvec_cge0)
9136 TRANS(CMLT0_v, do_gvec_fn2, a, gen_gvec_clt0)
9137 TRANS(CMLE0_v, do_gvec_fn2, a, gen_gvec_cle0)
9138 TRANS(CMEQ0_v, do_gvec_fn2, a, gen_gvec_ceq0)
9139 TRANS(REV16_v, do_gvec_fn2, a, gen_gvec_rev16)
9140 TRANS(REV32_v, do_gvec_fn2, a, gen_gvec_rev32)
9141 TRANS(URECPE_v, do_gvec_fn2, a, gen_gvec_urecpe)
9142 TRANS(URSQRTE_v, do_gvec_fn2, a, gen_gvec_ursqrte)
9143 
9144 static bool do_gvec_fn2_bhs(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn)
9145 {
9146     if (a->esz == MO_64) {
9147         return false;
9148     }
9149     if (fp_access_check(s)) {
9150         gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz);
9151     }
9152     return true;
9153 }
9154 
9155 TRANS(CLS_v, do_gvec_fn2_bhs, a, gen_gvec_cls)
9156 TRANS(CLZ_v, do_gvec_fn2_bhs, a, gen_gvec_clz)
9157 TRANS(REV64_v, do_gvec_fn2_bhs, a, gen_gvec_rev64)
9158 TRANS(SADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_saddlp)
9159 TRANS(UADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_uaddlp)
9160 TRANS(SADALP_v, do_gvec_fn2_bhs, a, gen_gvec_sadalp)
9161 TRANS(UADALP_v, do_gvec_fn2_bhs, a, gen_gvec_uadalp)
9162 
9163 static bool do_2misc_narrow_vector(DisasContext *s, arg_qrr_e *a,
9164                                    ArithOneOp * const fn[3])
9165 {
9166     if (a->esz == MO_64) {
9167         return false;
9168     }
9169     if (fp_access_check(s)) {
9170         TCGv_i64 t0 = tcg_temp_new_i64();
9171         TCGv_i64 t1 = tcg_temp_new_i64();
9172 
9173         read_vec_element(s, t0, a->rn, 0, MO_64);
9174         read_vec_element(s, t1, a->rn, 1, MO_64);
9175         fn[a->esz](t0, t0);
9176         fn[a->esz](t1, t1);
9177         write_vec_element(s, t0, a->rd, a->q ? 2 : 0, MO_32);
9178         write_vec_element(s, t1, a->rd, a->q ? 3 : 1, MO_32);
9179         clear_vec_high(s, a->q, a->rd);
9180     }
9181     return true;
9182 }
9183 
9184 static ArithOneOp * const f_scalar_xtn[] = {
9185     gen_helper_neon_narrow_u8,
9186     gen_helper_neon_narrow_u16,
9187     tcg_gen_ext32u_i64,
9188 };
9189 TRANS(XTN, do_2misc_narrow_vector, a, f_scalar_xtn)
9190 TRANS(SQXTUN_v, do_2misc_narrow_vector, a, f_scalar_sqxtun)
9191 TRANS(SQXTN_v, do_2misc_narrow_vector, a, f_scalar_sqxtn)
9192 TRANS(UQXTN_v, do_2misc_narrow_vector, a, f_scalar_uqxtn)
9193 
9194 static void gen_fcvtn_hs(TCGv_i64 d, TCGv_i64 n)
9195 {
9196     TCGv_i32 tcg_lo = tcg_temp_new_i32();
9197     TCGv_i32 tcg_hi = tcg_temp_new_i32();
9198     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
9199     TCGv_i32 ahp = get_ahp_flag();
9200 
9201     tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, n);
9202     gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp);
9203     gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp);
9204     tcg_gen_deposit_i32(tcg_lo, tcg_lo, tcg_hi, 16, 16);
9205     tcg_gen_extu_i32_i64(d, tcg_lo);
9206 }
9207 
9208 static void gen_fcvtn_sd(TCGv_i64 d, TCGv_i64 n)
9209 {
9210     TCGv_i32 tmp = tcg_temp_new_i32();
9211     gen_helper_vfp_fcvtsd(tmp, n, tcg_env);
9212     tcg_gen_extu_i32_i64(d, tmp);
9213 }
9214 
9215 static ArithOneOp * const f_vector_fcvtn[] = {
9216     NULL,
9217     gen_fcvtn_hs,
9218     gen_fcvtn_sd,
9219 };
9220 TRANS(FCVTN_v, do_2misc_narrow_vector, a, f_vector_fcvtn)
9221 TRANS(FCVTXN_v, do_2misc_narrow_vector, a, f_scalar_fcvtxn)
9222 
9223 static void gen_bfcvtn_hs(TCGv_i64 d, TCGv_i64 n)
9224 {
9225     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
9226     TCGv_i32 tmp = tcg_temp_new_i32();
9227     gen_helper_bfcvt_pair(tmp, n, fpst);
9228     tcg_gen_extu_i32_i64(d, tmp);
9229 }
9230 
9231 static ArithOneOp * const f_vector_bfcvtn[] = {
9232     NULL,
9233     gen_bfcvtn_hs,
9234     NULL,
9235 };
9236 TRANS_FEAT(BFCVTN_v, aa64_bf16, do_2misc_narrow_vector, a, f_vector_bfcvtn)
9237 
9238 static bool trans_SHLL_v(DisasContext *s, arg_qrr_e *a)
9239 {
9240     static NeonGenWidenFn * const widenfns[3] = {
9241         gen_helper_neon_widen_u8,
9242         gen_helper_neon_widen_u16,
9243         tcg_gen_extu_i32_i64,
9244     };
9245     NeonGenWidenFn *widenfn;
9246     TCGv_i64 tcg_res[2];
9247     TCGv_i32 tcg_op;
9248     int part, pass;
9249 
9250     if (a->esz == MO_64) {
9251         return false;
9252     }
9253     if (!fp_access_check(s)) {
9254         return true;
9255     }
9256 
9257     tcg_op = tcg_temp_new_i32();
9258     widenfn = widenfns[a->esz];
9259     part = a->q ? 2 : 0;
9260 
9261     for (pass = 0; pass < 2; pass++) {
9262         read_vec_element_i32(s, tcg_op, a->rn, part + pass, MO_32);
9263         tcg_res[pass] = tcg_temp_new_i64();
9264         widenfn(tcg_res[pass], tcg_op);
9265         tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << a->esz);
9266     }
9267 
9268     for (pass = 0; pass < 2; pass++) {
9269         write_vec_element(s, tcg_res[pass], a->rd, pass, MO_64);
9270     }
9271     return true;
9272 }
9273 
9274 static bool do_fabs_fneg_v(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn)
9275 {
9276     int check = fp_access_check_vector_hsd(s, a->q, a->esz);
9277 
9278     if (check <= 0) {
9279         return check == 0;
9280     }
9281 
9282     gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz);
9283     return true;
9284 }
9285 
9286 TRANS(FABS_v, do_fabs_fneg_v, a, gen_gvec_fabs)
9287 TRANS(FNEG_v, do_fabs_fneg_v, a, gen_gvec_fneg)
9288 
9289 static bool do_fp1_vector(DisasContext *s, arg_qrr_e *a,
9290                           const FPScalar1 *f, int rmode)
9291 {
9292     TCGv_i32 tcg_rmode = NULL;
9293     TCGv_ptr fpst;
9294     int check = fp_access_check_vector_hsd(s, a->q, a->esz);
9295 
9296     if (check <= 0) {
9297         return check == 0;
9298     }
9299 
9300     fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
9301     if (rmode >= 0) {
9302         tcg_rmode = gen_set_rmode(rmode, fpst);
9303     }
9304 
9305     if (a->esz == MO_64) {
9306         TCGv_i64 t64 = tcg_temp_new_i64();
9307 
9308         for (int pass = 0; pass < 2; ++pass) {
9309             read_vec_element(s, t64, a->rn, pass, MO_64);
9310             f->gen_d(t64, t64, fpst);
9311             write_vec_element(s, t64, a->rd, pass, MO_64);
9312         }
9313     } else {
9314         TCGv_i32 t32 = tcg_temp_new_i32();
9315         void (*gen)(TCGv_i32, TCGv_i32, TCGv_ptr)
9316             = (a->esz == MO_16 ? f->gen_h : f->gen_s);
9317 
9318         for (int pass = 0, n = (a->q ? 16 : 8) >> a->esz; pass < n; ++pass) {
9319             read_vec_element_i32(s, t32, a->rn, pass, a->esz);
9320             gen(t32, t32, fpst);
9321             write_vec_element_i32(s, t32, a->rd, pass, a->esz);
9322         }
9323     }
9324     clear_vec_high(s, a->q, a->rd);
9325 
9326     if (rmode >= 0) {
9327         gen_restore_rmode(tcg_rmode, fpst);
9328     }
9329     return true;
9330 }
9331 
9332 TRANS(FSQRT_v, do_fp1_vector, a, &f_scalar_fsqrt, -1)
9333 
9334 TRANS(FRINTN_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_TIEEVEN)
9335 TRANS(FRINTP_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_POSINF)
9336 TRANS(FRINTM_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_NEGINF)
9337 TRANS(FRINTZ_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_ZERO)
9338 TRANS(FRINTA_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_TIEAWAY)
9339 TRANS(FRINTI_v, do_fp1_vector, a, &f_scalar_frint, -1)
9340 TRANS(FRINTX_v, do_fp1_vector, a, &f_scalar_frintx, -1)
9341 
9342 TRANS_FEAT(FRINT32Z_v, aa64_frint, do_fp1_vector, a,
9343            &f_scalar_frint32, FPROUNDING_ZERO)
9344 TRANS_FEAT(FRINT32X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint32, -1)
9345 TRANS_FEAT(FRINT64Z_v, aa64_frint, do_fp1_vector, a,
9346            &f_scalar_frint64, FPROUNDING_ZERO)
9347 TRANS_FEAT(FRINT64X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint64, -1)
9348 
9349 static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q,
9350                              int rd, int rn, int data,
9351                              gen_helper_gvec_2_ptr * const fns[3])
9352 {
9353     int check = fp_access_check_vector_hsd(s, is_q, esz);
9354     TCGv_ptr fpst;
9355 
9356     if (check <= 0) {
9357         return check == 0;
9358     }
9359 
9360     fpst = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
9361     tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
9362                        vec_full_reg_offset(s, rn), fpst,
9363                        is_q ? 16 : 8, vec_full_reg_size(s),
9364                        data, fns[esz - 1]);
9365     return true;
9366 }
9367 
9368 static gen_helper_gvec_2_ptr * const f_scvtf_v[] = {
9369     gen_helper_gvec_vcvt_sh,
9370     gen_helper_gvec_vcvt_sf,
9371     gen_helper_gvec_vcvt_sd,
9372 };
9373 TRANS(SCVTF_vi, do_gvec_op2_fpst,
9374       a->esz, a->q, a->rd, a->rn, 0, f_scvtf_v)
9375 TRANS(SCVTF_vf, do_gvec_op2_fpst,
9376       a->esz, a->q, a->rd, a->rn, a->shift, f_scvtf_v)
9377 
9378 static gen_helper_gvec_2_ptr * const f_ucvtf_v[] = {
9379     gen_helper_gvec_vcvt_uh,
9380     gen_helper_gvec_vcvt_uf,
9381     gen_helper_gvec_vcvt_ud,
9382 };
9383 TRANS(UCVTF_vi, do_gvec_op2_fpst,
9384       a->esz, a->q, a->rd, a->rn, 0, f_ucvtf_v)
9385 TRANS(UCVTF_vf, do_gvec_op2_fpst,
9386       a->esz, a->q, a->rd, a->rn, a->shift, f_ucvtf_v)
9387 
9388 static gen_helper_gvec_2_ptr * const f_fcvtzs_vf[] = {
9389     gen_helper_gvec_vcvt_rz_hs,
9390     gen_helper_gvec_vcvt_rz_fs,
9391     gen_helper_gvec_vcvt_rz_ds,
9392 };
9393 TRANS(FCVTZS_vf, do_gvec_op2_fpst,
9394       a->esz, a->q, a->rd, a->rn, a->shift, f_fcvtzs_vf)
9395 
9396 static gen_helper_gvec_2_ptr * const f_fcvtzu_vf[] = {
9397     gen_helper_gvec_vcvt_rz_hu,
9398     gen_helper_gvec_vcvt_rz_fu,
9399     gen_helper_gvec_vcvt_rz_du,
9400 };
9401 TRANS(FCVTZU_vf, do_gvec_op2_fpst,
9402       a->esz, a->q, a->rd, a->rn, a->shift, f_fcvtzu_vf)
9403 
9404 static gen_helper_gvec_2_ptr * const f_fcvt_s_vi[] = {
9405     gen_helper_gvec_vcvt_rm_sh,
9406     gen_helper_gvec_vcvt_rm_ss,
9407     gen_helper_gvec_vcvt_rm_sd,
9408 };
9409 
9410 static gen_helper_gvec_2_ptr * const f_fcvt_u_vi[] = {
9411     gen_helper_gvec_vcvt_rm_uh,
9412     gen_helper_gvec_vcvt_rm_us,
9413     gen_helper_gvec_vcvt_rm_ud,
9414 };
9415 
9416 TRANS(FCVTNS_vi, do_gvec_op2_fpst,
9417       a->esz, a->q, a->rd, a->rn, float_round_nearest_even, f_fcvt_s_vi)
9418 TRANS(FCVTNU_vi, do_gvec_op2_fpst,
9419       a->esz, a->q, a->rd, a->rn, float_round_nearest_even, f_fcvt_u_vi)
9420 TRANS(FCVTPS_vi, do_gvec_op2_fpst,
9421       a->esz, a->q, a->rd, a->rn, float_round_up, f_fcvt_s_vi)
9422 TRANS(FCVTPU_vi, do_gvec_op2_fpst,
9423       a->esz, a->q, a->rd, a->rn, float_round_up, f_fcvt_u_vi)
9424 TRANS(FCVTMS_vi, do_gvec_op2_fpst,
9425       a->esz, a->q, a->rd, a->rn, float_round_down, f_fcvt_s_vi)
9426 TRANS(FCVTMU_vi, do_gvec_op2_fpst,
9427       a->esz, a->q, a->rd, a->rn, float_round_down, f_fcvt_u_vi)
9428 TRANS(FCVTZS_vi, do_gvec_op2_fpst,
9429       a->esz, a->q, a->rd, a->rn, float_round_to_zero, f_fcvt_s_vi)
9430 TRANS(FCVTZU_vi, do_gvec_op2_fpst,
9431       a->esz, a->q, a->rd, a->rn, float_round_to_zero, f_fcvt_u_vi)
9432 TRANS(FCVTAS_vi, do_gvec_op2_fpst,
9433       a->esz, a->q, a->rd, a->rn, float_round_ties_away, f_fcvt_s_vi)
9434 TRANS(FCVTAU_vi, do_gvec_op2_fpst,
9435       a->esz, a->q, a->rd, a->rn, float_round_ties_away, f_fcvt_u_vi)
9436 
9437 static gen_helper_gvec_2_ptr * const f_fceq0[] = {
9438     gen_helper_gvec_fceq0_h,
9439     gen_helper_gvec_fceq0_s,
9440     gen_helper_gvec_fceq0_d,
9441 };
9442 TRANS(FCMEQ0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fceq0)
9443 
9444 static gen_helper_gvec_2_ptr * const f_fcgt0[] = {
9445     gen_helper_gvec_fcgt0_h,
9446     gen_helper_gvec_fcgt0_s,
9447     gen_helper_gvec_fcgt0_d,
9448 };
9449 TRANS(FCMGT0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcgt0)
9450 
9451 static gen_helper_gvec_2_ptr * const f_fcge0[] = {
9452     gen_helper_gvec_fcge0_h,
9453     gen_helper_gvec_fcge0_s,
9454     gen_helper_gvec_fcge0_d,
9455 };
9456 TRANS(FCMGE0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcge0)
9457 
9458 static gen_helper_gvec_2_ptr * const f_fclt0[] = {
9459     gen_helper_gvec_fclt0_h,
9460     gen_helper_gvec_fclt0_s,
9461     gen_helper_gvec_fclt0_d,
9462 };
9463 TRANS(FCMLT0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fclt0)
9464 
9465 static gen_helper_gvec_2_ptr * const f_fcle0[] = {
9466     gen_helper_gvec_fcle0_h,
9467     gen_helper_gvec_fcle0_s,
9468     gen_helper_gvec_fcle0_d,
9469 };
9470 TRANS(FCMLE0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcle0)
9471 
9472 static gen_helper_gvec_2_ptr * const f_frecpe[] = {
9473     gen_helper_gvec_frecpe_h,
9474     gen_helper_gvec_frecpe_s,
9475     gen_helper_gvec_frecpe_d,
9476 };
9477 TRANS(FRECPE_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frecpe)
9478 
9479 static gen_helper_gvec_2_ptr * const f_frsqrte[] = {
9480     gen_helper_gvec_frsqrte_h,
9481     gen_helper_gvec_frsqrte_s,
9482     gen_helper_gvec_frsqrte_d,
9483 };
9484 TRANS(FRSQRTE_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frsqrte)
9485 
9486 static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a)
9487 {
9488     /* Handle 2-reg-misc ops which are widening (so each size element
9489      * in the source becomes a 2*size element in the destination.
9490      * The only instruction like this is FCVTL.
9491      */
9492     int pass;
9493 
9494     if (!fp_access_check(s)) {
9495         return true;
9496     }
9497 
9498     if (a->esz == MO_64) {
9499         /* 32 -> 64 bit fp conversion */
9500         TCGv_i64 tcg_res[2];
9501         TCGv_i32 tcg_op = tcg_temp_new_i32();
9502         int srcelt = a->q ? 2 : 0;
9503 
9504         for (pass = 0; pass < 2; pass++) {
9505             tcg_res[pass] = tcg_temp_new_i64();
9506             read_vec_element_i32(s, tcg_op, a->rn, srcelt + pass, MO_32);
9507             gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, tcg_env);
9508         }
9509         for (pass = 0; pass < 2; pass++) {
9510             write_vec_element(s, tcg_res[pass], a->rd, pass, MO_64);
9511         }
9512     } else {
9513         /* 16 -> 32 bit fp conversion */
9514         int srcelt = a->q ? 4 : 0;
9515         TCGv_i32 tcg_res[4];
9516         TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
9517         TCGv_i32 ahp = get_ahp_flag();
9518 
9519         for (pass = 0; pass < 4; pass++) {
9520             tcg_res[pass] = tcg_temp_new_i32();
9521             read_vec_element_i32(s, tcg_res[pass], a->rn, srcelt + pass, MO_16);
9522             gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
9523                                            fpst, ahp);
9524         }
9525         for (pass = 0; pass < 4; pass++) {
9526             write_vec_element_i32(s, tcg_res[pass], a->rd, pass, MO_32);
9527         }
9528     }
9529     clear_vec_high(s, true, a->rd);
9530     return true;
9531 }
9532 
9533 static bool trans_OK(DisasContext *s, arg_OK *a)
9534 {
9535     return true;
9536 }
9537 
9538 static bool trans_FAIL(DisasContext *s, arg_OK *a)
9539 {
9540     s->is_nonstreaming = true;
9541     return true;
9542 }
9543 
9544 /**
9545  * btype_destination_ok:
9546  * @insn: The instruction at the branch destination
9547  * @bt: SCTLR_ELx.BT
9548  * @btype: PSTATE.BTYPE, and is non-zero
9549  *
9550  * On a guarded page, there are a limited number of insns
9551  * that may be present at the branch target:
9552  *   - branch target identifiers,
9553  *   - paciasp, pacibsp,
9554  *   - BRK insn
9555  *   - HLT insn
9556  * Anything else causes a Branch Target Exception.
9557  *
9558  * Return true if the branch is compatible, false to raise BTITRAP.
9559  */
9560 static bool btype_destination_ok(uint32_t insn, bool bt, int btype)
9561 {
9562     if ((insn & 0xfffff01fu) == 0xd503201fu) {
9563         /* HINT space */
9564         switch (extract32(insn, 5, 7)) {
9565         case 0b011001: /* PACIASP */
9566         case 0b011011: /* PACIBSP */
9567             /*
9568              * If SCTLR_ELx.BT, then PACI*SP are not compatible
9569              * with btype == 3.  Otherwise all btype are ok.
9570              */
9571             return !bt || btype != 3;
9572         case 0b100000: /* BTI */
9573             /* Not compatible with any btype.  */
9574             return false;
9575         case 0b100010: /* BTI c */
9576             /* Not compatible with btype == 3 */
9577             return btype != 3;
9578         case 0b100100: /* BTI j */
9579             /* Not compatible with btype == 2 */
9580             return btype != 2;
9581         case 0b100110: /* BTI jc */
9582             /* Compatible with any btype.  */
9583             return true;
9584         }
9585     } else {
9586         switch (insn & 0xffe0001fu) {
9587         case 0xd4200000u: /* BRK */
9588         case 0xd4400000u: /* HLT */
9589             /* Give priority to the breakpoint exception.  */
9590             return true;
9591         }
9592     }
9593     return false;
9594 }
9595 
9596 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
9597                                           CPUState *cpu)
9598 {
9599     DisasContext *dc = container_of(dcbase, DisasContext, base);
9600     CPUARMState *env = cpu_env(cpu);
9601     ARMCPU *arm_cpu = env_archcpu(env);
9602     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
9603     int bound, core_mmu_idx;
9604 
9605     dc->isar = &arm_cpu->isar;
9606     dc->condjmp = 0;
9607     dc->pc_save = dc->base.pc_first;
9608     dc->aarch64 = true;
9609     dc->thumb = false;
9610     dc->sctlr_b = 0;
9611     dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
9612     dc->condexec_mask = 0;
9613     dc->condexec_cond = 0;
9614     core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
9615     dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx);
9616     dc->tbii = EX_TBFLAG_A64(tb_flags, TBII);
9617     dc->tbid = EX_TBFLAG_A64(tb_flags, TBID);
9618     dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA);
9619     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
9620 #if !defined(CONFIG_USER_ONLY)
9621     dc->user = (dc->current_el == 0);
9622 #endif
9623     dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
9624     dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
9625     dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
9626     dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE);
9627     dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC);
9628     dc->trap_eret = EX_TBFLAG_A64(tb_flags, TRAP_ERET);
9629     dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL);
9630     dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL);
9631     dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16;
9632     dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16;
9633     dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE);
9634     dc->bt = EX_TBFLAG_A64(tb_flags, BT);
9635     dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE);
9636     dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV);
9637     dc->ata[0] = EX_TBFLAG_A64(tb_flags, ATA);
9638     dc->ata[1] = EX_TBFLAG_A64(tb_flags, ATA0);
9639     dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE);
9640     dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE);
9641     dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM);
9642     dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA);
9643     dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING);
9644     dc->naa = EX_TBFLAG_A64(tb_flags, NAA);
9645     dc->nv = EX_TBFLAG_A64(tb_flags, NV);
9646     dc->nv1 = EX_TBFLAG_A64(tb_flags, NV1);
9647     dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2);
9648     dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20);
9649     dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE);
9650     dc->vec_len = 0;
9651     dc->vec_stride = 0;
9652     dc->cp_regs = arm_cpu->cp_regs;
9653     dc->features = env->features;
9654     dc->dcz_blocksize = arm_cpu->dcz_blocksize;
9655     dc->gm_blocksize = arm_cpu->gm_blocksize;
9656 
9657 #ifdef CONFIG_USER_ONLY
9658     /* In sve_probe_page, we assume TBI is enabled. */
9659     tcg_debug_assert(dc->tbid & 1);
9660 #endif
9661 
9662     dc->lse2 = dc_isar_feature(aa64_lse2, dc);
9663 
9664     /* Single step state. The code-generation logic here is:
9665      *  SS_ACTIVE == 0:
9666      *   generate code with no special handling for single-stepping (except
9667      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
9668      *   this happens anyway because those changes are all system register or
9669      *   PSTATE writes).
9670      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
9671      *   emit code for one insn
9672      *   emit code to clear PSTATE.SS
9673      *   emit code to generate software step exception for completed step
9674      *   end TB (as usual for having generated an exception)
9675      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
9676      *   emit code to generate a software step exception
9677      *   end the TB
9678      */
9679     dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
9680     dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
9681     dc->is_ldex = false;
9682 
9683     /* Bound the number of insns to execute to those left on the page.  */
9684     bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
9685 
9686     /* If architectural single step active, limit to 1.  */
9687     if (dc->ss_active) {
9688         bound = 1;
9689     }
9690     dc->base.max_insns = MIN(dc->base.max_insns, bound);
9691 }
9692 
9693 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
9694 {
9695 }
9696 
9697 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
9698 {
9699     DisasContext *dc = container_of(dcbase, DisasContext, base);
9700     target_ulong pc_arg = dc->base.pc_next;
9701 
9702     if (tb_cflags(dcbase->tb) & CF_PCREL) {
9703         pc_arg &= ~TARGET_PAGE_MASK;
9704     }
9705     tcg_gen_insn_start(pc_arg, 0, 0);
9706     dc->insn_start_updated = false;
9707 }
9708 
9709 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9710 {
9711     DisasContext *s = container_of(dcbase, DisasContext, base);
9712     CPUARMState *env = cpu_env(cpu);
9713     uint64_t pc = s->base.pc_next;
9714     uint32_t insn;
9715 
9716     /* Singlestep exceptions have the highest priority. */
9717     if (s->ss_active && !s->pstate_ss) {
9718         /* Singlestep state is Active-pending.
9719          * If we're in this state at the start of a TB then either
9720          *  a) we just took an exception to an EL which is being debugged
9721          *     and this is the first insn in the exception handler
9722          *  b) debug exceptions were masked and we just unmasked them
9723          *     without changing EL (eg by clearing PSTATE.D)
9724          * In either case we're going to take a swstep exception in the
9725          * "did not step an insn" case, and so the syndrome ISV and EX
9726          * bits should be zero.
9727          */
9728         assert(s->base.num_insns == 1);
9729         gen_swstep_exception(s, 0, 0);
9730         s->base.is_jmp = DISAS_NORETURN;
9731         s->base.pc_next = pc + 4;
9732         return;
9733     }
9734 
9735     if (pc & 3) {
9736         /*
9737          * PC alignment fault.  This has priority over the instruction abort
9738          * that we would receive from a translation fault via arm_ldl_code.
9739          * This should only be possible after an indirect branch, at the
9740          * start of the TB.
9741          */
9742         assert(s->base.num_insns == 1);
9743         gen_helper_exception_pc_alignment(tcg_env, tcg_constant_tl(pc));
9744         s->base.is_jmp = DISAS_NORETURN;
9745         s->base.pc_next = QEMU_ALIGN_UP(pc, 4);
9746         return;
9747     }
9748 
9749     s->pc_curr = pc;
9750     insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b);
9751     s->insn = insn;
9752     s->base.pc_next = pc + 4;
9753 
9754     s->fp_access_checked = false;
9755     s->sve_access_checked = false;
9756 
9757     if (s->pstate_il) {
9758         /*
9759          * Illegal execution state. This has priority over BTI
9760          * exceptions, but comes after instruction abort exceptions.
9761          */
9762         gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
9763         return;
9764     }
9765 
9766     if (dc_isar_feature(aa64_bti, s)) {
9767         if (s->base.num_insns == 1) {
9768             /* First insn can have btype set to non-zero.  */
9769             tcg_debug_assert(s->btype >= 0);
9770 
9771             /*
9772              * Note that the Branch Target Exception has fairly high
9773              * priority -- below debugging exceptions but above most
9774              * everything else.  This allows us to handle this now
9775              * instead of waiting until the insn is otherwise decoded.
9776              *
9777              * We can check all but the guarded page check here;
9778              * defer the latter to a helper.
9779              */
9780             if (s->btype != 0
9781                 && !btype_destination_ok(insn, s->bt, s->btype)) {
9782                 gen_helper_guarded_page_check(tcg_env);
9783             }
9784         } else {
9785             /* Not the first insn: btype must be 0.  */
9786             tcg_debug_assert(s->btype == 0);
9787         }
9788     }
9789 
9790     s->is_nonstreaming = false;
9791     if (s->sme_trap_nonstreaming) {
9792         disas_sme_fa64(s, insn);
9793     }
9794 
9795     if (!disas_a64(s, insn) &&
9796         !disas_sme(s, insn) &&
9797         !disas_sve(s, insn)) {
9798         unallocated_encoding(s);
9799     }
9800 
9801     /*
9802      * After execution of most insns, btype is reset to 0.
9803      * Note that we set btype == -1 when the insn sets btype.
9804      */
9805     if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) {
9806         reset_btype(s);
9807     }
9808 }
9809 
9810 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
9811 {
9812     DisasContext *dc = container_of(dcbase, DisasContext, base);
9813 
9814     if (unlikely(dc->ss_active)) {
9815         /* Note that this means single stepping WFI doesn't halt the CPU.
9816          * For conditional branch insns this is harmless unreachable code as
9817          * gen_goto_tb() has already handled emitting the debug exception
9818          * (and thus a tb-jump is not possible when singlestepping).
9819          */
9820         switch (dc->base.is_jmp) {
9821         default:
9822             gen_a64_update_pc(dc, 4);
9823             /* fall through */
9824         case DISAS_EXIT:
9825         case DISAS_JUMP:
9826             gen_step_complete_exception(dc);
9827             break;
9828         case DISAS_NORETURN:
9829             break;
9830         }
9831     } else {
9832         switch (dc->base.is_jmp) {
9833         case DISAS_NEXT:
9834         case DISAS_TOO_MANY:
9835             gen_goto_tb(dc, 1, 4);
9836             break;
9837         default:
9838         case DISAS_UPDATE_EXIT:
9839             gen_a64_update_pc(dc, 4);
9840             /* fall through */
9841         case DISAS_EXIT:
9842             tcg_gen_exit_tb(NULL, 0);
9843             break;
9844         case DISAS_UPDATE_NOCHAIN:
9845             gen_a64_update_pc(dc, 4);
9846             /* fall through */
9847         case DISAS_JUMP:
9848             tcg_gen_lookup_and_goto_ptr();
9849             break;
9850         case DISAS_NORETURN:
9851         case DISAS_SWI:
9852             break;
9853         case DISAS_WFE:
9854             gen_a64_update_pc(dc, 4);
9855             gen_helper_wfe(tcg_env);
9856             break;
9857         case DISAS_YIELD:
9858             gen_a64_update_pc(dc, 4);
9859             gen_helper_yield(tcg_env);
9860             break;
9861         case DISAS_WFI:
9862             /*
9863              * This is a special case because we don't want to just halt
9864              * the CPU if trying to debug across a WFI.
9865              */
9866             gen_a64_update_pc(dc, 4);
9867             gen_helper_wfi(tcg_env, tcg_constant_i32(4));
9868             /*
9869              * The helper doesn't necessarily throw an exception, but we
9870              * must go back to the main loop to check for interrupts anyway.
9871              */
9872             tcg_gen_exit_tb(NULL, 0);
9873             break;
9874         }
9875     }
9876 }
9877 
9878 const TranslatorOps aarch64_translator_ops = {
9879     .init_disas_context = aarch64_tr_init_disas_context,
9880     .tb_start           = aarch64_tr_tb_start,
9881     .insn_start         = aarch64_tr_insn_start,
9882     .translate_insn     = aarch64_tr_translate_insn,
9883     .tb_stop            = aarch64_tr_tb_stop,
9884 };
9885