xref: /qemu/target/arm/tcg/translate-a64.c (revision 641f1c53862aec64810c0b93b5b1de49d55fda92)
1 /*
2  *  AArch64 translation
3  *
4  *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 #include "exec/exec-all.h"
21 #include "exec/target_page.h"
22 #include "translate.h"
23 #include "translate-a64.h"
24 #include "qemu/log.h"
25 #include "arm_ldst.h"
26 #include "semihosting/semihost.h"
27 #include "cpregs.h"
28 
29 static TCGv_i64 cpu_X[32];
30 static TCGv_i64 cpu_pc;
31 
32 /* Load/store exclusive handling */
33 static TCGv_i64 cpu_exclusive_high;
34 
35 static const char *regnames[] = {
36     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
37     "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
38     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
39     "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
40 };
41 
42 enum a64_shift_type {
43     A64_SHIFT_TYPE_LSL = 0,
44     A64_SHIFT_TYPE_LSR = 1,
45     A64_SHIFT_TYPE_ASR = 2,
46     A64_SHIFT_TYPE_ROR = 3
47 };
48 
49 /*
50  * Helpers for extracting complex instruction fields
51  */
52 
53 /*
54  * For load/store with an unsigned 12 bit immediate scaled by the element
55  * size. The input has the immediate field in bits [14:3] and the element
56  * size in [2:0].
57  */
58 static int uimm_scaled(DisasContext *s, int x)
59 {
60     unsigned imm = x >> 3;
61     unsigned scale = extract32(x, 0, 3);
62     return imm << scale;
63 }
64 
65 /* For load/store memory tags: scale offset by LOG2_TAG_GRANULE */
66 static int scale_by_log2_tag_granule(DisasContext *s, int x)
67 {
68     return x << LOG2_TAG_GRANULE;
69 }
70 
71 /*
72  * Include the generated decoders.
73  */
74 
75 #include "decode-sme-fa64.c.inc"
76 #include "decode-a64.c.inc"
77 
78 /* initialize TCG globals.  */
79 void a64_translate_init(void)
80 {
81     int i;
82 
83     cpu_pc = tcg_global_mem_new_i64(tcg_env,
84                                     offsetof(CPUARMState, pc),
85                                     "pc");
86     for (i = 0; i < 32; i++) {
87         cpu_X[i] = tcg_global_mem_new_i64(tcg_env,
88                                           offsetof(CPUARMState, xregs[i]),
89                                           regnames[i]);
90     }
91 
92     cpu_exclusive_high = tcg_global_mem_new_i64(tcg_env,
93         offsetof(CPUARMState, exclusive_high), "exclusive_high");
94 }
95 
96 /*
97  * Return the core mmu_idx to use for A64 load/store insns which
98  * have a "unprivileged load/store" variant. Those insns access
99  * EL0 if executed from an EL which has control over EL0 (usually
100  * EL1) but behave like normal loads and stores if executed from
101  * elsewhere (eg EL3).
102  *
103  * @unpriv : true for the unprivileged encoding; false for the
104  *           normal encoding (in which case we will return the same
105  *           thing as get_mem_index().
106  */
107 static int get_a64_user_mem_index(DisasContext *s, bool unpriv)
108 {
109     /*
110      * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL,
111      * which is the usual mmu_idx for this cpu state.
112      */
113     ARMMMUIdx useridx = s->mmu_idx;
114 
115     if (unpriv && s->unpriv) {
116         /*
117          * We have pre-computed the condition for AccType_UNPRIV.
118          * Therefore we should never get here with a mmu_idx for
119          * which we do not know the corresponding user mmu_idx.
120          */
121         switch (useridx) {
122         case ARMMMUIdx_E10_1:
123         case ARMMMUIdx_E10_1_PAN:
124             useridx = ARMMMUIdx_E10_0;
125             break;
126         case ARMMMUIdx_E20_2:
127         case ARMMMUIdx_E20_2_PAN:
128             useridx = ARMMMUIdx_E20_0;
129             break;
130         default:
131             g_assert_not_reached();
132         }
133     }
134     return arm_to_core_mmu_idx(useridx);
135 }
136 
137 static void set_btype_raw(int val)
138 {
139     tcg_gen_st_i32(tcg_constant_i32(val), tcg_env,
140                    offsetof(CPUARMState, btype));
141 }
142 
143 static void set_btype(DisasContext *s, int val)
144 {
145     /* BTYPE is a 2-bit field, and 0 should be done with reset_btype.  */
146     tcg_debug_assert(val >= 1 && val <= 3);
147     set_btype_raw(val);
148     s->btype = -1;
149 }
150 
151 static void reset_btype(DisasContext *s)
152 {
153     if (s->btype != 0) {
154         set_btype_raw(0);
155         s->btype = 0;
156     }
157 }
158 
159 static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff)
160 {
161     assert(s->pc_save != -1);
162     if (tb_cflags(s->base.tb) & CF_PCREL) {
163         tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff);
164     } else {
165         tcg_gen_movi_i64(dest, s->pc_curr + diff);
166     }
167 }
168 
169 void gen_a64_update_pc(DisasContext *s, target_long diff)
170 {
171     gen_pc_plus_diff(s, cpu_pc, diff);
172     s->pc_save = s->pc_curr + diff;
173 }
174 
175 /*
176  * Handle Top Byte Ignore (TBI) bits.
177  *
178  * If address tagging is enabled via the TCR TBI bits:
179  *  + for EL2 and EL3 there is only one TBI bit, and if it is set
180  *    then the address is zero-extended, clearing bits [63:56]
181  *  + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
182  *    and TBI1 controls addresses with bit 55 == 1.
183  *    If the appropriate TBI bit is set for the address then
184  *    the address is sign-extended from bit 55 into bits [63:56]
185  *
186  * Here We have concatenated TBI{1,0} into tbi.
187  */
188 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst,
189                                 TCGv_i64 src, int tbi)
190 {
191     if (tbi == 0) {
192         /* Load unmodified address */
193         tcg_gen_mov_i64(dst, src);
194     } else if (!regime_has_2_ranges(s->mmu_idx)) {
195         /* Force tag byte to all zero */
196         tcg_gen_extract_i64(dst, src, 0, 56);
197     } else {
198         /* Sign-extend from bit 55.  */
199         tcg_gen_sextract_i64(dst, src, 0, 56);
200 
201         switch (tbi) {
202         case 1:
203             /* tbi0 but !tbi1: only use the extension if positive */
204             tcg_gen_and_i64(dst, dst, src);
205             break;
206         case 2:
207             /* !tbi0 but tbi1: only use the extension if negative */
208             tcg_gen_or_i64(dst, dst, src);
209             break;
210         case 3:
211             /* tbi0 and tbi1: always use the extension */
212             break;
213         default:
214             g_assert_not_reached();
215         }
216     }
217 }
218 
219 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
220 {
221     /*
222      * If address tagging is enabled for instructions via the TCR TBI bits,
223      * then loading an address into the PC will clear out any tag.
224      */
225     gen_top_byte_ignore(s, cpu_pc, src, s->tbii);
226     s->pc_save = -1;
227 }
228 
229 /*
230  * Handle MTE and/or TBI.
231  *
232  * For TBI, ideally, we would do nothing.  Proper behaviour on fault is
233  * for the tag to be present in the FAR_ELx register.  But for user-only
234  * mode we do not have a TLB with which to implement this, so we must
235  * remove the top byte now.
236  *
237  * Always return a fresh temporary that we can increment independently
238  * of the write-back address.
239  */
240 
241 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
242 {
243     TCGv_i64 clean = tcg_temp_new_i64();
244 #ifdef CONFIG_USER_ONLY
245     gen_top_byte_ignore(s, clean, addr, s->tbid);
246 #else
247     tcg_gen_mov_i64(clean, addr);
248 #endif
249     return clean;
250 }
251 
252 /* Insert a zero tag into src, with the result at dst. */
253 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src)
254 {
255     tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4));
256 }
257 
258 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr,
259                              MMUAccessType acc, int log2_size)
260 {
261     gen_helper_probe_access(tcg_env, ptr,
262                             tcg_constant_i32(acc),
263                             tcg_constant_i32(get_mem_index(s)),
264                             tcg_constant_i32(1 << log2_size));
265 }
266 
267 /*
268  * For MTE, check a single logical or atomic access.  This probes a single
269  * address, the exact one specified.  The size and alignment of the access
270  * is not relevant to MTE, per se, but watchpoints do require the size,
271  * and we want to recognize those before making any other changes to state.
272  */
273 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr,
274                                       bool is_write, bool tag_checked,
275                                       MemOp memop, bool is_unpriv,
276                                       int core_idx)
277 {
278     if (tag_checked && s->mte_active[is_unpriv]) {
279         TCGv_i64 ret;
280         int desc = 0;
281 
282         desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx);
283         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
284         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
285         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
286         desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(memop));
287         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, memop_size(memop) - 1);
288 
289         ret = tcg_temp_new_i64();
290         gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr);
291 
292         return ret;
293     }
294     return clean_data_tbi(s, addr);
295 }
296 
297 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
298                         bool tag_checked, MemOp memop)
299 {
300     return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, memop,
301                                  false, get_mem_index(s));
302 }
303 
304 /*
305  * For MTE, check multiple logical sequential accesses.
306  */
307 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
308                         bool tag_checked, int total_size, MemOp single_mop)
309 {
310     if (tag_checked && s->mte_active[0]) {
311         TCGv_i64 ret;
312         int desc = 0;
313 
314         desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
315         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
316         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
317         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
318         desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(single_mop));
319         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, total_size - 1);
320 
321         ret = tcg_temp_new_i64();
322         gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr);
323 
324         return ret;
325     }
326     return clean_data_tbi(s, addr);
327 }
328 
329 /*
330  * Generate the special alignment check that applies to AccType_ATOMIC
331  * and AccType_ORDERED insns under FEAT_LSE2: the access need not be
332  * naturally aligned, but it must not cross a 16-byte boundary.
333  * See AArch64.CheckAlignment().
334  */
335 static void check_lse2_align(DisasContext *s, int rn, int imm,
336                              bool is_write, MemOp mop)
337 {
338     TCGv_i32 tmp;
339     TCGv_i64 addr;
340     TCGLabel *over_label;
341     MMUAccessType type;
342     int mmu_idx;
343 
344     tmp = tcg_temp_new_i32();
345     tcg_gen_extrl_i64_i32(tmp, cpu_reg_sp(s, rn));
346     tcg_gen_addi_i32(tmp, tmp, imm & 15);
347     tcg_gen_andi_i32(tmp, tmp, 15);
348     tcg_gen_addi_i32(tmp, tmp, memop_size(mop));
349 
350     over_label = gen_new_label();
351     tcg_gen_brcondi_i32(TCG_COND_LEU, tmp, 16, over_label);
352 
353     addr = tcg_temp_new_i64();
354     tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm);
355 
356     type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD,
357     mmu_idx = get_mem_index(s);
358     gen_helper_unaligned_access(tcg_env, addr, tcg_constant_i32(type),
359                                 tcg_constant_i32(mmu_idx));
360 
361     gen_set_label(over_label);
362 
363 }
364 
365 /* Handle the alignment check for AccType_ATOMIC instructions. */
366 static MemOp check_atomic_align(DisasContext *s, int rn, MemOp mop)
367 {
368     MemOp size = mop & MO_SIZE;
369 
370     if (size == MO_8) {
371         return mop;
372     }
373 
374     /*
375      * If size == MO_128, this is a LDXP, and the operation is single-copy
376      * atomic for each doubleword, not the entire quadword; it still must
377      * be quadword aligned.
378      */
379     if (size == MO_128) {
380         return finalize_memop_atom(s, MO_128 | MO_ALIGN,
381                                    MO_ATOM_IFALIGN_PAIR);
382     }
383     if (dc_isar_feature(aa64_lse2, s)) {
384         check_lse2_align(s, rn, 0, true, mop);
385     } else {
386         mop |= MO_ALIGN;
387     }
388     return finalize_memop(s, mop);
389 }
390 
391 /* Handle the alignment check for AccType_ORDERED instructions. */
392 static MemOp check_ordered_align(DisasContext *s, int rn, int imm,
393                                  bool is_write, MemOp mop)
394 {
395     MemOp size = mop & MO_SIZE;
396 
397     if (size == MO_8) {
398         return mop;
399     }
400     if (size == MO_128) {
401         return finalize_memop_atom(s, MO_128 | MO_ALIGN,
402                                    MO_ATOM_IFALIGN_PAIR);
403     }
404     if (!dc_isar_feature(aa64_lse2, s)) {
405         mop |= MO_ALIGN;
406     } else if (!s->naa) {
407         check_lse2_align(s, rn, imm, is_write, mop);
408     }
409     return finalize_memop(s, mop);
410 }
411 
412 typedef struct DisasCompare64 {
413     TCGCond cond;
414     TCGv_i64 value;
415 } DisasCompare64;
416 
417 static void a64_test_cc(DisasCompare64 *c64, int cc)
418 {
419     DisasCompare c32;
420 
421     arm_test_cc(&c32, cc);
422 
423     /*
424      * Sign-extend the 32-bit value so that the GE/LT comparisons work
425      * properly.  The NE/EQ comparisons are also fine with this choice.
426       */
427     c64->cond = c32.cond;
428     c64->value = tcg_temp_new_i64();
429     tcg_gen_ext_i32_i64(c64->value, c32.value);
430 }
431 
432 static void gen_rebuild_hflags(DisasContext *s)
433 {
434     gen_helper_rebuild_hflags_a64(tcg_env, tcg_constant_i32(s->current_el));
435 }
436 
437 static void gen_exception_internal(int excp)
438 {
439     assert(excp_is_internal(excp));
440     gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp));
441 }
442 
443 static void gen_exception_internal_insn(DisasContext *s, int excp)
444 {
445     gen_a64_update_pc(s, 0);
446     gen_exception_internal(excp);
447     s->base.is_jmp = DISAS_NORETURN;
448 }
449 
450 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome)
451 {
452     gen_a64_update_pc(s, 0);
453     gen_helper_exception_bkpt_insn(tcg_env, tcg_constant_i32(syndrome));
454     s->base.is_jmp = DISAS_NORETURN;
455 }
456 
457 static void gen_step_complete_exception(DisasContext *s)
458 {
459     /* We just completed step of an insn. Move from Active-not-pending
460      * to Active-pending, and then also take the swstep exception.
461      * This corresponds to making the (IMPDEF) choice to prioritize
462      * swstep exceptions over asynchronous exceptions taken to an exception
463      * level where debug is disabled. This choice has the advantage that
464      * we do not need to maintain internal state corresponding to the
465      * ISV/EX syndrome bits between completion of the step and generation
466      * of the exception, and our syndrome information is always correct.
467      */
468     gen_ss_advance(s);
469     gen_swstep_exception(s, 1, s->is_ldex);
470     s->base.is_jmp = DISAS_NORETURN;
471 }
472 
473 static inline bool use_goto_tb(DisasContext *s, uint64_t dest)
474 {
475     if (s->ss_active) {
476         return false;
477     }
478     return translator_use_goto_tb(&s->base, dest);
479 }
480 
481 static void gen_goto_tb(DisasContext *s, int n, int64_t diff)
482 {
483     if (use_goto_tb(s, s->pc_curr + diff)) {
484         /*
485          * For pcrel, the pc must always be up-to-date on entry to
486          * the linked TB, so that it can use simple additions for all
487          * further adjustments.  For !pcrel, the linked TB is compiled
488          * to know its full virtual address, so we can delay the
489          * update to pc to the unlinked path.  A long chain of links
490          * can thus avoid many updates to the PC.
491          */
492         if (tb_cflags(s->base.tb) & CF_PCREL) {
493             gen_a64_update_pc(s, diff);
494             tcg_gen_goto_tb(n);
495         } else {
496             tcg_gen_goto_tb(n);
497             gen_a64_update_pc(s, diff);
498         }
499         tcg_gen_exit_tb(s->base.tb, n);
500         s->base.is_jmp = DISAS_NORETURN;
501     } else {
502         gen_a64_update_pc(s, diff);
503         if (s->ss_active) {
504             gen_step_complete_exception(s);
505         } else {
506             tcg_gen_lookup_and_goto_ptr();
507             s->base.is_jmp = DISAS_NORETURN;
508         }
509     }
510 }
511 
512 /*
513  * Register access functions
514  *
515  * These functions are used for directly accessing a register in where
516  * changes to the final register value are likely to be made. If you
517  * need to use a register for temporary calculation (e.g. index type
518  * operations) use the read_* form.
519  *
520  * B1.2.1 Register mappings
521  *
522  * In instruction register encoding 31 can refer to ZR (zero register) or
523  * the SP (stack pointer) depending on context. In QEMU's case we map SP
524  * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
525  * This is the point of the _sp forms.
526  */
527 TCGv_i64 cpu_reg(DisasContext *s, int reg)
528 {
529     if (reg == 31) {
530         TCGv_i64 t = tcg_temp_new_i64();
531         tcg_gen_movi_i64(t, 0);
532         return t;
533     } else {
534         return cpu_X[reg];
535     }
536 }
537 
538 /* register access for when 31 == SP */
539 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
540 {
541     return cpu_X[reg];
542 }
543 
544 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
545  * representing the register contents. This TCGv is an auto-freed
546  * temporary so it need not be explicitly freed, and may be modified.
547  */
548 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
549 {
550     TCGv_i64 v = tcg_temp_new_i64();
551     if (reg != 31) {
552         if (sf) {
553             tcg_gen_mov_i64(v, cpu_X[reg]);
554         } else {
555             tcg_gen_ext32u_i64(v, cpu_X[reg]);
556         }
557     } else {
558         tcg_gen_movi_i64(v, 0);
559     }
560     return v;
561 }
562 
563 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
564 {
565     TCGv_i64 v = tcg_temp_new_i64();
566     if (sf) {
567         tcg_gen_mov_i64(v, cpu_X[reg]);
568     } else {
569         tcg_gen_ext32u_i64(v, cpu_X[reg]);
570     }
571     return v;
572 }
573 
574 /* Return the offset into CPUARMState of a slice (from
575  * the least significant end) of FP register Qn (ie
576  * Dn, Sn, Hn or Bn).
577  * (Note that this is not the same mapping as for A32; see cpu.h)
578  */
579 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size)
580 {
581     return vec_reg_offset(s, regno, 0, size);
582 }
583 
584 /* Offset of the high half of the 128 bit vector Qn */
585 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
586 {
587     return vec_reg_offset(s, regno, 1, MO_64);
588 }
589 
590 /* Convenience accessors for reading and writing single and double
591  * FP registers. Writing clears the upper parts of the associated
592  * 128 bit vector register, as required by the architecture.
593  * Note that unlike the GP register accessors, the values returned
594  * by the read functions must be manually freed.
595  */
596 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
597 {
598     TCGv_i64 v = tcg_temp_new_i64();
599 
600     tcg_gen_ld_i64(v, tcg_env, fp_reg_offset(s, reg, MO_64));
601     return v;
602 }
603 
604 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
605 {
606     TCGv_i32 v = tcg_temp_new_i32();
607 
608     tcg_gen_ld_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32));
609     return v;
610 }
611 
612 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
613 {
614     TCGv_i32 v = tcg_temp_new_i32();
615 
616     tcg_gen_ld16u_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16));
617     return v;
618 }
619 
620 static void clear_vec(DisasContext *s, int rd)
621 {
622     unsigned ofs = fp_reg_offset(s, rd, MO_64);
623     unsigned vsz = vec_full_reg_size(s);
624 
625     tcg_gen_gvec_dup_imm(MO_64, ofs, vsz, vsz, 0);
626 }
627 
628 /*
629  * Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
630  * If SVE is not enabled, then there are only 128 bits in the vector.
631  */
632 static void clear_vec_high(DisasContext *s, bool is_q, int rd)
633 {
634     unsigned ofs = fp_reg_offset(s, rd, MO_64);
635     unsigned vsz = vec_full_reg_size(s);
636 
637     /* Nop move, with side effect of clearing the tail. */
638     tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz);
639 }
640 
641 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
642 {
643     unsigned ofs = fp_reg_offset(s, reg, MO_64);
644 
645     tcg_gen_st_i64(v, tcg_env, ofs);
646     clear_vec_high(s, false, reg);
647 }
648 
649 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
650 {
651     TCGv_i64 tmp = tcg_temp_new_i64();
652 
653     tcg_gen_extu_i32_i64(tmp, v);
654     write_fp_dreg(s, reg, tmp);
655 }
656 
657 /*
658  * Write a double result to 128 bit vector register reg, honouring FPCR.NEP:
659  * - if FPCR.NEP == 0, clear the high elements of reg
660  * - if FPCR.NEP == 1, set the high elements of reg from mergereg
661  *   (i.e. merge the result with those high elements)
662  * In either case, SVE register bits above 128 are zeroed (per R_WKYLB).
663  */
664 static void write_fp_dreg_merging(DisasContext *s, int reg, int mergereg,
665                                   TCGv_i64 v)
666 {
667     if (!s->fpcr_nep) {
668         write_fp_dreg(s, reg, v);
669         return;
670     }
671 
672     /*
673      * Move from mergereg to reg; this sets the high elements and
674      * clears the bits above 128 as a side effect.
675      */
676     tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg),
677                      vec_full_reg_offset(s, mergereg),
678                      16, vec_full_reg_size(s));
679     tcg_gen_st_i64(v, tcg_env, vec_full_reg_offset(s, reg));
680 }
681 
682 /*
683  * Write a single-prec result, but only clear the higher elements
684  * of the destination register if FPCR.NEP is 0; otherwise preserve them.
685  */
686 static void write_fp_sreg_merging(DisasContext *s, int reg, int mergereg,
687                                   TCGv_i32 v)
688 {
689     if (!s->fpcr_nep) {
690         write_fp_sreg(s, reg, v);
691         return;
692     }
693 
694     tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg),
695                      vec_full_reg_offset(s, mergereg),
696                      16, vec_full_reg_size(s));
697     tcg_gen_st_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32));
698 }
699 
700 /*
701  * Write a half-prec result, but only clear the higher elements
702  * of the destination register if FPCR.NEP is 0; otherwise preserve them.
703  * The caller must ensure that the top 16 bits of v are zero.
704  */
705 static void write_fp_hreg_merging(DisasContext *s, int reg, int mergereg,
706                                   TCGv_i32 v)
707 {
708     if (!s->fpcr_nep) {
709         write_fp_sreg(s, reg, v);
710         return;
711     }
712 
713     tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg),
714                      vec_full_reg_offset(s, mergereg),
715                      16, vec_full_reg_size(s));
716     tcg_gen_st16_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16));
717 }
718 
719 /* Expand a 2-operand AdvSIMD vector operation using an expander function.  */
720 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
721                          GVecGen2Fn *gvec_fn, int vece)
722 {
723     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
724             is_q ? 16 : 8, vec_full_reg_size(s));
725 }
726 
727 /* Expand a 2-operand + immediate AdvSIMD vector operation using
728  * an expander function.
729  */
730 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn,
731                           int64_t imm, GVecGen2iFn *gvec_fn, int vece)
732 {
733     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
734             imm, is_q ? 16 : 8, vec_full_reg_size(s));
735 }
736 
737 /* Expand a 3-operand AdvSIMD vector operation using an expander function.  */
738 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
739                          GVecGen3Fn *gvec_fn, int vece)
740 {
741     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
742             vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
743 }
744 
745 /* Expand a 4-operand AdvSIMD vector operation using an expander function.  */
746 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
747                          int rx, GVecGen4Fn *gvec_fn, int vece)
748 {
749     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
750             vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx),
751             is_q ? 16 : 8, vec_full_reg_size(s));
752 }
753 
754 /* Expand a 2-operand operation using an out-of-line helper.  */
755 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd,
756                              int rn, int data, gen_helper_gvec_2 *fn)
757 {
758     tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
759                        vec_full_reg_offset(s, rn),
760                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
761 }
762 
763 /* Expand a 3-operand operation using an out-of-line helper.  */
764 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
765                              int rn, int rm, int data, gen_helper_gvec_3 *fn)
766 {
767     tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
768                        vec_full_reg_offset(s, rn),
769                        vec_full_reg_offset(s, rm),
770                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
771 }
772 
773 /* Expand a 3-operand + fpstatus pointer + simd data value operation using
774  * an out-of-line helper.
775  */
776 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
777                               int rm, ARMFPStatusFlavour fpsttype, int data,
778                               gen_helper_gvec_3_ptr *fn)
779 {
780     TCGv_ptr fpst = fpstatus_ptr(fpsttype);
781     tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
782                        vec_full_reg_offset(s, rn),
783                        vec_full_reg_offset(s, rm), fpst,
784                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
785 }
786 
787 /* Expand a 4-operand operation using an out-of-line helper.  */
788 static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn,
789                              int rm, int ra, int data, gen_helper_gvec_4 *fn)
790 {
791     tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
792                        vec_full_reg_offset(s, rn),
793                        vec_full_reg_offset(s, rm),
794                        vec_full_reg_offset(s, ra),
795                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
796 }
797 
798 /*
799  * Expand a 4-operand operation using an out-of-line helper that takes
800  * a pointer to the CPU env.
801  */
802 static void gen_gvec_op4_env(DisasContext *s, bool is_q, int rd, int rn,
803                              int rm, int ra, int data,
804                              gen_helper_gvec_4_ptr *fn)
805 {
806     tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
807                        vec_full_reg_offset(s, rn),
808                        vec_full_reg_offset(s, rm),
809                        vec_full_reg_offset(s, ra),
810                        tcg_env,
811                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
812 }
813 
814 /*
815  * Expand a 4-operand + fpstatus pointer + simd data value operation using
816  * an out-of-line helper.
817  */
818 static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn,
819                               int rm, int ra, ARMFPStatusFlavour fpsttype,
820                               int data,
821                               gen_helper_gvec_4_ptr *fn)
822 {
823     TCGv_ptr fpst = fpstatus_ptr(fpsttype);
824     tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
825                        vec_full_reg_offset(s, rn),
826                        vec_full_reg_offset(s, rm),
827                        vec_full_reg_offset(s, ra), fpst,
828                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
829 }
830 
831 /*
832  * When FPCR.AH == 1, NEG and ABS do not flip the sign bit of a NaN.
833  * These functions implement
834  *   d = floatN_is_any_nan(s) ? s : floatN_chs(s)
835  * which for float32 is
836  *   d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s ^ (1 << 31))
837  * and similarly for the other float sizes.
838  */
839 static void gen_vfp_ah_negh(TCGv_i32 d, TCGv_i32 s)
840 {
841     TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32();
842 
843     gen_vfp_negh(chs_s, s);
844     gen_vfp_absh(abs_s, s);
845     tcg_gen_movcond_i32(TCG_COND_GTU, d,
846                         abs_s, tcg_constant_i32(0x7c00),
847                         s, chs_s);
848 }
849 
850 static void gen_vfp_ah_negs(TCGv_i32 d, TCGv_i32 s)
851 {
852     TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32();
853 
854     gen_vfp_negs(chs_s, s);
855     gen_vfp_abss(abs_s, s);
856     tcg_gen_movcond_i32(TCG_COND_GTU, d,
857                         abs_s, tcg_constant_i32(0x7f800000UL),
858                         s, chs_s);
859 }
860 
861 static void gen_vfp_ah_negd(TCGv_i64 d, TCGv_i64 s)
862 {
863     TCGv_i64 abs_s = tcg_temp_new_i64(), chs_s = tcg_temp_new_i64();
864 
865     gen_vfp_negd(chs_s, s);
866     gen_vfp_absd(abs_s, s);
867     tcg_gen_movcond_i64(TCG_COND_GTU, d,
868                         abs_s, tcg_constant_i64(0x7ff0000000000000ULL),
869                         s, chs_s);
870 }
871 
872 /*
873  * These functions implement
874  *  d = floatN_is_any_nan(s) ? s : floatN_abs(s)
875  * which for float32 is
876  *  d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s & ~(1 << 31))
877  * and similarly for the other float sizes.
878  */
879 static void gen_vfp_ah_absh(TCGv_i32 d, TCGv_i32 s)
880 {
881     TCGv_i32 abs_s = tcg_temp_new_i32();
882 
883     gen_vfp_absh(abs_s, s);
884     tcg_gen_movcond_i32(TCG_COND_GTU, d,
885                         abs_s, tcg_constant_i32(0x7c00),
886                         s, abs_s);
887 }
888 
889 static void gen_vfp_ah_abss(TCGv_i32 d, TCGv_i32 s)
890 {
891     TCGv_i32 abs_s = tcg_temp_new_i32();
892 
893     gen_vfp_abss(abs_s, s);
894     tcg_gen_movcond_i32(TCG_COND_GTU, d,
895                         abs_s, tcg_constant_i32(0x7f800000UL),
896                         s, abs_s);
897 }
898 
899 static void gen_vfp_ah_absd(TCGv_i64 d, TCGv_i64 s)
900 {
901     TCGv_i64 abs_s = tcg_temp_new_i64();
902 
903     gen_vfp_absd(abs_s, s);
904     tcg_gen_movcond_i64(TCG_COND_GTU, d,
905                         abs_s, tcg_constant_i64(0x7ff0000000000000ULL),
906                         s, abs_s);
907 }
908 
909 static void gen_vfp_maybe_ah_negh(DisasContext *dc, TCGv_i32 d, TCGv_i32 s)
910 {
911     if (dc->fpcr_ah) {
912         gen_vfp_ah_negh(d, s);
913     } else {
914         gen_vfp_negh(d, s);
915     }
916 }
917 
918 static void gen_vfp_maybe_ah_negs(DisasContext *dc, TCGv_i32 d, TCGv_i32 s)
919 {
920     if (dc->fpcr_ah) {
921         gen_vfp_ah_negs(d, s);
922     } else {
923         gen_vfp_negs(d, s);
924     }
925 }
926 
927 static void gen_vfp_maybe_ah_negd(DisasContext *dc, TCGv_i64 d, TCGv_i64 s)
928 {
929     if (dc->fpcr_ah) {
930         gen_vfp_ah_negd(d, s);
931     } else {
932         gen_vfp_negd(d, s);
933     }
934 }
935 
936 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
937  * than the 32 bit equivalent.
938  */
939 static inline void gen_set_NZ64(TCGv_i64 result)
940 {
941     tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
942     tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
943 }
944 
945 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
946 static inline void gen_logic_CC(int sf, TCGv_i64 result)
947 {
948     if (sf) {
949         gen_set_NZ64(result);
950     } else {
951         tcg_gen_extrl_i64_i32(cpu_ZF, result);
952         tcg_gen_mov_i32(cpu_NF, cpu_ZF);
953     }
954     tcg_gen_movi_i32(cpu_CF, 0);
955     tcg_gen_movi_i32(cpu_VF, 0);
956 }
957 
958 /* dest = T0 + T1; compute C, N, V and Z flags */
959 static void gen_add64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
960 {
961     TCGv_i64 result, flag, tmp;
962     result = tcg_temp_new_i64();
963     flag = tcg_temp_new_i64();
964     tmp = tcg_temp_new_i64();
965 
966     tcg_gen_movi_i64(tmp, 0);
967     tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
968 
969     tcg_gen_extrl_i64_i32(cpu_CF, flag);
970 
971     gen_set_NZ64(result);
972 
973     tcg_gen_xor_i64(flag, result, t0);
974     tcg_gen_xor_i64(tmp, t0, t1);
975     tcg_gen_andc_i64(flag, flag, tmp);
976     tcg_gen_extrh_i64_i32(cpu_VF, flag);
977 
978     tcg_gen_mov_i64(dest, result);
979 }
980 
981 static void gen_add32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
982 {
983     TCGv_i32 t0_32 = tcg_temp_new_i32();
984     TCGv_i32 t1_32 = tcg_temp_new_i32();
985     TCGv_i32 tmp = tcg_temp_new_i32();
986 
987     tcg_gen_movi_i32(tmp, 0);
988     tcg_gen_extrl_i64_i32(t0_32, t0);
989     tcg_gen_extrl_i64_i32(t1_32, t1);
990     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
991     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
992     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
993     tcg_gen_xor_i32(tmp, t0_32, t1_32);
994     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
995     tcg_gen_extu_i32_i64(dest, cpu_NF);
996 }
997 
998 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
999 {
1000     if (sf) {
1001         gen_add64_CC(dest, t0, t1);
1002     } else {
1003         gen_add32_CC(dest, t0, t1);
1004     }
1005 }
1006 
1007 /* dest = T0 - T1; compute C, N, V and Z flags */
1008 static void gen_sub64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
1009 {
1010     /* 64 bit arithmetic */
1011     TCGv_i64 result, flag, tmp;
1012 
1013     result = tcg_temp_new_i64();
1014     flag = tcg_temp_new_i64();
1015     tcg_gen_sub_i64(result, t0, t1);
1016 
1017     gen_set_NZ64(result);
1018 
1019     tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
1020     tcg_gen_extrl_i64_i32(cpu_CF, flag);
1021 
1022     tcg_gen_xor_i64(flag, result, t0);
1023     tmp = tcg_temp_new_i64();
1024     tcg_gen_xor_i64(tmp, t0, t1);
1025     tcg_gen_and_i64(flag, flag, tmp);
1026     tcg_gen_extrh_i64_i32(cpu_VF, flag);
1027     tcg_gen_mov_i64(dest, result);
1028 }
1029 
1030 static void gen_sub32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
1031 {
1032     /* 32 bit arithmetic */
1033     TCGv_i32 t0_32 = tcg_temp_new_i32();
1034     TCGv_i32 t1_32 = tcg_temp_new_i32();
1035     TCGv_i32 tmp;
1036 
1037     tcg_gen_extrl_i64_i32(t0_32, t0);
1038     tcg_gen_extrl_i64_i32(t1_32, t1);
1039     tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
1040     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1041     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
1042     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
1043     tmp = tcg_temp_new_i32();
1044     tcg_gen_xor_i32(tmp, t0_32, t1_32);
1045     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
1046     tcg_gen_extu_i32_i64(dest, cpu_NF);
1047 }
1048 
1049 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
1050 {
1051     if (sf) {
1052         gen_sub64_CC(dest, t0, t1);
1053     } else {
1054         gen_sub32_CC(dest, t0, t1);
1055     }
1056 }
1057 
1058 /* dest = T0 + T1 + CF; do not compute flags. */
1059 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
1060 {
1061     TCGv_i64 flag = tcg_temp_new_i64();
1062     tcg_gen_extu_i32_i64(flag, cpu_CF);
1063     tcg_gen_add_i64(dest, t0, t1);
1064     tcg_gen_add_i64(dest, dest, flag);
1065 
1066     if (!sf) {
1067         tcg_gen_ext32u_i64(dest, dest);
1068     }
1069 }
1070 
1071 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
1072 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
1073 {
1074     if (sf) {
1075         TCGv_i64 result = tcg_temp_new_i64();
1076         TCGv_i64 cf_64 = tcg_temp_new_i64();
1077         TCGv_i64 vf_64 = tcg_temp_new_i64();
1078         TCGv_i64 tmp = tcg_temp_new_i64();
1079 
1080         tcg_gen_extu_i32_i64(cf_64, cpu_CF);
1081         tcg_gen_addcio_i64(result, cf_64, t0, t1, cf_64);
1082         tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
1083         gen_set_NZ64(result);
1084 
1085         tcg_gen_xor_i64(vf_64, result, t0);
1086         tcg_gen_xor_i64(tmp, t0, t1);
1087         tcg_gen_andc_i64(vf_64, vf_64, tmp);
1088         tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
1089 
1090         tcg_gen_mov_i64(dest, result);
1091     } else {
1092         TCGv_i32 t0_32 = tcg_temp_new_i32();
1093         TCGv_i32 t1_32 = tcg_temp_new_i32();
1094         TCGv_i32 tmp = tcg_temp_new_i32();
1095 
1096         tcg_gen_extrl_i64_i32(t0_32, t0);
1097         tcg_gen_extrl_i64_i32(t1_32, t1);
1098         tcg_gen_addcio_i32(cpu_NF, cpu_CF, t0_32, t1_32, cpu_CF);
1099 
1100         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1101         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
1102         tcg_gen_xor_i32(tmp, t0_32, t1_32);
1103         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
1104         tcg_gen_extu_i32_i64(dest, cpu_NF);
1105     }
1106 }
1107 
1108 /*
1109  * Load/Store generators
1110  */
1111 
1112 /*
1113  * Store from GPR register to memory.
1114  */
1115 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
1116                              TCGv_i64 tcg_addr, MemOp memop, int memidx,
1117                              bool iss_valid,
1118                              unsigned int iss_srt,
1119                              bool iss_sf, bool iss_ar)
1120 {
1121     tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop);
1122 
1123     if (iss_valid) {
1124         uint32_t syn;
1125 
1126         syn = syn_data_abort_with_iss(0,
1127                                       (memop & MO_SIZE),
1128                                       false,
1129                                       iss_srt,
1130                                       iss_sf,
1131                                       iss_ar,
1132                                       0, 0, 0, 0, 0, false);
1133         disas_set_insn_syndrome(s, syn);
1134     }
1135 }
1136 
1137 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
1138                       TCGv_i64 tcg_addr, MemOp memop,
1139                       bool iss_valid,
1140                       unsigned int iss_srt,
1141                       bool iss_sf, bool iss_ar)
1142 {
1143     do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s),
1144                      iss_valid, iss_srt, iss_sf, iss_ar);
1145 }
1146 
1147 /*
1148  * Load from memory to GPR register
1149  */
1150 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
1151                              MemOp memop, bool extend, int memidx,
1152                              bool iss_valid, unsigned int iss_srt,
1153                              bool iss_sf, bool iss_ar)
1154 {
1155     tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
1156 
1157     if (extend && (memop & MO_SIGN)) {
1158         g_assert((memop & MO_SIZE) <= MO_32);
1159         tcg_gen_ext32u_i64(dest, dest);
1160     }
1161 
1162     if (iss_valid) {
1163         uint32_t syn;
1164 
1165         syn = syn_data_abort_with_iss(0,
1166                                       (memop & MO_SIZE),
1167                                       (memop & MO_SIGN) != 0,
1168                                       iss_srt,
1169                                       iss_sf,
1170                                       iss_ar,
1171                                       0, 0, 0, 0, 0, false);
1172         disas_set_insn_syndrome(s, syn);
1173     }
1174 }
1175 
1176 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
1177                       MemOp memop, bool extend,
1178                       bool iss_valid, unsigned int iss_srt,
1179                       bool iss_sf, bool iss_ar)
1180 {
1181     do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s),
1182                      iss_valid, iss_srt, iss_sf, iss_ar);
1183 }
1184 
1185 /*
1186  * Store from FP register to memory
1187  */
1188 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, MemOp mop)
1189 {
1190     /* This writes the bottom N bits of a 128 bit wide vector to memory */
1191     TCGv_i64 tmplo = tcg_temp_new_i64();
1192 
1193     tcg_gen_ld_i64(tmplo, tcg_env, fp_reg_offset(s, srcidx, MO_64));
1194 
1195     if ((mop & MO_SIZE) < MO_128) {
1196         tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop);
1197     } else {
1198         TCGv_i64 tmphi = tcg_temp_new_i64();
1199         TCGv_i128 t16 = tcg_temp_new_i128();
1200 
1201         tcg_gen_ld_i64(tmphi, tcg_env, fp_reg_hi_offset(s, srcidx));
1202         tcg_gen_concat_i64_i128(t16, tmplo, tmphi);
1203 
1204         tcg_gen_qemu_st_i128(t16, tcg_addr, get_mem_index(s), mop);
1205     }
1206 }
1207 
1208 /*
1209  * Load from memory to FP register
1210  */
1211 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, MemOp mop)
1212 {
1213     /* This always zero-extends and writes to a full 128 bit wide vector */
1214     TCGv_i64 tmplo = tcg_temp_new_i64();
1215     TCGv_i64 tmphi = NULL;
1216 
1217     if ((mop & MO_SIZE) < MO_128) {
1218         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop);
1219     } else {
1220         TCGv_i128 t16 = tcg_temp_new_i128();
1221 
1222         tcg_gen_qemu_ld_i128(t16, tcg_addr, get_mem_index(s), mop);
1223 
1224         tmphi = tcg_temp_new_i64();
1225         tcg_gen_extr_i128_i64(tmplo, tmphi, t16);
1226     }
1227 
1228     tcg_gen_st_i64(tmplo, tcg_env, fp_reg_offset(s, destidx, MO_64));
1229 
1230     if (tmphi) {
1231         tcg_gen_st_i64(tmphi, tcg_env, fp_reg_hi_offset(s, destidx));
1232     }
1233     clear_vec_high(s, tmphi != NULL, destidx);
1234 }
1235 
1236 /*
1237  * Vector load/store helpers.
1238  *
1239  * The principal difference between this and a FP load is that we don't
1240  * zero extend as we are filling a partial chunk of the vector register.
1241  * These functions don't support 128 bit loads/stores, which would be
1242  * normal load/store operations.
1243  *
1244  * The _i32 versions are useful when operating on 32 bit quantities
1245  * (eg for floating point single or using Neon helper functions).
1246  */
1247 
1248 /* Get value of an element within a vector register */
1249 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
1250                              int element, MemOp memop)
1251 {
1252     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1253     switch ((unsigned)memop) {
1254     case MO_8:
1255         tcg_gen_ld8u_i64(tcg_dest, tcg_env, vect_off);
1256         break;
1257     case MO_16:
1258         tcg_gen_ld16u_i64(tcg_dest, tcg_env, vect_off);
1259         break;
1260     case MO_32:
1261         tcg_gen_ld32u_i64(tcg_dest, tcg_env, vect_off);
1262         break;
1263     case MO_8|MO_SIGN:
1264         tcg_gen_ld8s_i64(tcg_dest, tcg_env, vect_off);
1265         break;
1266     case MO_16|MO_SIGN:
1267         tcg_gen_ld16s_i64(tcg_dest, tcg_env, vect_off);
1268         break;
1269     case MO_32|MO_SIGN:
1270         tcg_gen_ld32s_i64(tcg_dest, tcg_env, vect_off);
1271         break;
1272     case MO_64:
1273     case MO_64|MO_SIGN:
1274         tcg_gen_ld_i64(tcg_dest, tcg_env, vect_off);
1275         break;
1276     default:
1277         g_assert_not_reached();
1278     }
1279 }
1280 
1281 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
1282                                  int element, MemOp memop)
1283 {
1284     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1285     switch (memop) {
1286     case MO_8:
1287         tcg_gen_ld8u_i32(tcg_dest, tcg_env, vect_off);
1288         break;
1289     case MO_16:
1290         tcg_gen_ld16u_i32(tcg_dest, tcg_env, vect_off);
1291         break;
1292     case MO_8|MO_SIGN:
1293         tcg_gen_ld8s_i32(tcg_dest, tcg_env, vect_off);
1294         break;
1295     case MO_16|MO_SIGN:
1296         tcg_gen_ld16s_i32(tcg_dest, tcg_env, vect_off);
1297         break;
1298     case MO_32:
1299     case MO_32|MO_SIGN:
1300         tcg_gen_ld_i32(tcg_dest, tcg_env, vect_off);
1301         break;
1302     default:
1303         g_assert_not_reached();
1304     }
1305 }
1306 
1307 /* Set value of an element within a vector register */
1308 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1309                               int element, MemOp memop)
1310 {
1311     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1312     switch (memop) {
1313     case MO_8:
1314         tcg_gen_st8_i64(tcg_src, tcg_env, vect_off);
1315         break;
1316     case MO_16:
1317         tcg_gen_st16_i64(tcg_src, tcg_env, vect_off);
1318         break;
1319     case MO_32:
1320         tcg_gen_st32_i64(tcg_src, tcg_env, vect_off);
1321         break;
1322     case MO_64:
1323         tcg_gen_st_i64(tcg_src, tcg_env, vect_off);
1324         break;
1325     default:
1326         g_assert_not_reached();
1327     }
1328 }
1329 
1330 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1331                                   int destidx, int element, MemOp memop)
1332 {
1333     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1334     switch (memop) {
1335     case MO_8:
1336         tcg_gen_st8_i32(tcg_src, tcg_env, vect_off);
1337         break;
1338     case MO_16:
1339         tcg_gen_st16_i32(tcg_src, tcg_env, vect_off);
1340         break;
1341     case MO_32:
1342         tcg_gen_st_i32(tcg_src, tcg_env, vect_off);
1343         break;
1344     default:
1345         g_assert_not_reached();
1346     }
1347 }
1348 
1349 /* Store from vector register to memory */
1350 static void do_vec_st(DisasContext *s, int srcidx, int element,
1351                       TCGv_i64 tcg_addr, MemOp mop)
1352 {
1353     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1354 
1355     read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE);
1356     tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1357 }
1358 
1359 /* Load from memory to vector register */
1360 static void do_vec_ld(DisasContext *s, int destidx, int element,
1361                       TCGv_i64 tcg_addr, MemOp mop)
1362 {
1363     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1364 
1365     tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1366     write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE);
1367 }
1368 
1369 /* Check that FP/Neon access is enabled. If it is, return
1370  * true. If not, emit code to generate an appropriate exception,
1371  * and return false; the caller should not emit any code for
1372  * the instruction. Note that this check must happen after all
1373  * unallocated-encoding checks (otherwise the syndrome information
1374  * for the resulting exception will be incorrect).
1375  */
1376 static bool fp_access_check_only(DisasContext *s)
1377 {
1378     if (s->fp_excp_el) {
1379         assert(!s->fp_access_checked);
1380         s->fp_access_checked = -1;
1381 
1382         gen_exception_insn_el(s, 0, EXCP_UDEF,
1383                               syn_fp_access_trap(1, 0xe, false, 0),
1384                               s->fp_excp_el);
1385         return false;
1386     }
1387     s->fp_access_checked = 1;
1388     return true;
1389 }
1390 
1391 static bool fp_access_check(DisasContext *s)
1392 {
1393     if (!fp_access_check_only(s)) {
1394         return false;
1395     }
1396     if (s->sme_trap_nonstreaming && s->is_nonstreaming) {
1397         gen_exception_insn(s, 0, EXCP_UDEF,
1398                            syn_smetrap(SME_ET_Streaming, false));
1399         return false;
1400     }
1401     return true;
1402 }
1403 
1404 /*
1405  * Return <0 for non-supported element sizes, with MO_16 controlled by
1406  * FEAT_FP16; return 0 for fp disabled; otherwise return >0 for success.
1407  */
1408 static int fp_access_check_scalar_hsd(DisasContext *s, MemOp esz)
1409 {
1410     switch (esz) {
1411     case MO_64:
1412     case MO_32:
1413         break;
1414     case MO_16:
1415         if (!dc_isar_feature(aa64_fp16, s)) {
1416             return -1;
1417         }
1418         break;
1419     default:
1420         return -1;
1421     }
1422     return fp_access_check(s);
1423 }
1424 
1425 /* Likewise, but vector MO_64 must have two elements. */
1426 static int fp_access_check_vector_hsd(DisasContext *s, bool is_q, MemOp esz)
1427 {
1428     switch (esz) {
1429     case MO_64:
1430         if (!is_q) {
1431             return -1;
1432         }
1433         break;
1434     case MO_32:
1435         break;
1436     case MO_16:
1437         if (!dc_isar_feature(aa64_fp16, s)) {
1438             return -1;
1439         }
1440         break;
1441     default:
1442         return -1;
1443     }
1444     return fp_access_check(s);
1445 }
1446 
1447 /*
1448  * Check that SVE access is enabled.  If it is, return true.
1449  * If not, emit code to generate an appropriate exception and return false.
1450  * This function corresponds to CheckSVEEnabled().
1451  */
1452 bool sve_access_check(DisasContext *s)
1453 {
1454     if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) {
1455         bool ret;
1456 
1457         assert(dc_isar_feature(aa64_sme, s));
1458         ret = sme_sm_enabled_check(s);
1459         s->sve_access_checked = (ret ? 1 : -1);
1460         return ret;
1461     }
1462     if (s->sve_excp_el) {
1463         /* Assert that we only raise one exception per instruction. */
1464         assert(!s->sve_access_checked);
1465         gen_exception_insn_el(s, 0, EXCP_UDEF,
1466                               syn_sve_access_trap(), s->sve_excp_el);
1467         s->sve_access_checked = -1;
1468         return false;
1469     }
1470     s->sve_access_checked = 1;
1471     return fp_access_check(s);
1472 }
1473 
1474 /*
1475  * Check that SME access is enabled, raise an exception if not.
1476  * Note that this function corresponds to CheckSMEAccess and is
1477  * only used directly for cpregs.
1478  */
1479 static bool sme_access_check(DisasContext *s)
1480 {
1481     if (s->sme_excp_el) {
1482         gen_exception_insn_el(s, 0, EXCP_UDEF,
1483                               syn_smetrap(SME_ET_AccessTrap, false),
1484                               s->sme_excp_el);
1485         return false;
1486     }
1487     return true;
1488 }
1489 
1490 /* This function corresponds to CheckSMEEnabled. */
1491 bool sme_enabled_check(DisasContext *s)
1492 {
1493     /*
1494      * Note that unlike sve_excp_el, we have not constrained sme_excp_el
1495      * to be zero when fp_excp_el has priority.  This is because we need
1496      * sme_excp_el by itself for cpregs access checks.
1497      */
1498     if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) {
1499         bool ret = sme_access_check(s);
1500         s->fp_access_checked = (ret ? 1 : -1);
1501         return ret;
1502     }
1503     return fp_access_check_only(s);
1504 }
1505 
1506 /* Common subroutine for CheckSMEAnd*Enabled. */
1507 bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req)
1508 {
1509     if (!sme_enabled_check(s)) {
1510         return false;
1511     }
1512     if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) {
1513         gen_exception_insn(s, 0, EXCP_UDEF,
1514                            syn_smetrap(SME_ET_NotStreaming, false));
1515         return false;
1516     }
1517     if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) {
1518         gen_exception_insn(s, 0, EXCP_UDEF,
1519                            syn_smetrap(SME_ET_InactiveZA, false));
1520         return false;
1521     }
1522     return true;
1523 }
1524 
1525 /*
1526  * Expanders for AdvSIMD translation functions.
1527  */
1528 
1529 static bool do_gvec_op2_ool(DisasContext *s, arg_qrr_e *a, int data,
1530                             gen_helper_gvec_2 *fn)
1531 {
1532     if (!a->q && a->esz == MO_64) {
1533         return false;
1534     }
1535     if (fp_access_check(s)) {
1536         gen_gvec_op2_ool(s, a->q, a->rd, a->rn, data, fn);
1537     }
1538     return true;
1539 }
1540 
1541 static bool do_gvec_op3_ool(DisasContext *s, arg_qrrr_e *a, int data,
1542                             gen_helper_gvec_3 *fn)
1543 {
1544     if (!a->q && a->esz == MO_64) {
1545         return false;
1546     }
1547     if (fp_access_check(s)) {
1548         gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, data, fn);
1549     }
1550     return true;
1551 }
1552 
1553 static bool do_gvec_fn3(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
1554 {
1555     if (!a->q && a->esz == MO_64) {
1556         return false;
1557     }
1558     if (fp_access_check(s)) {
1559         gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz);
1560     }
1561     return true;
1562 }
1563 
1564 static bool do_gvec_fn3_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
1565 {
1566     if (a->esz == MO_64) {
1567         return false;
1568     }
1569     if (fp_access_check(s)) {
1570         gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz);
1571     }
1572     return true;
1573 }
1574 
1575 static bool do_gvec_fn3_no8_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
1576 {
1577     if (a->esz == MO_8) {
1578         return false;
1579     }
1580     return do_gvec_fn3_no64(s, a, fn);
1581 }
1582 
1583 static bool do_gvec_fn4(DisasContext *s, arg_qrrrr_e *a, GVecGen4Fn *fn)
1584 {
1585     if (!a->q && a->esz == MO_64) {
1586         return false;
1587     }
1588     if (fp_access_check(s)) {
1589         gen_gvec_fn4(s, a->q, a->rd, a->rn, a->rm, a->ra, fn, a->esz);
1590     }
1591     return true;
1592 }
1593 
1594 /*
1595  * This utility function is for doing register extension with an
1596  * optional shift. You will likely want to pass a temporary for the
1597  * destination register. See DecodeRegExtend() in the ARM ARM.
1598  */
1599 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1600                               int option, unsigned int shift)
1601 {
1602     int extsize = extract32(option, 0, 2);
1603     bool is_signed = extract32(option, 2, 1);
1604 
1605     tcg_gen_ext_i64(tcg_out, tcg_in, extsize | (is_signed ? MO_SIGN : 0));
1606     tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1607 }
1608 
1609 static inline void gen_check_sp_alignment(DisasContext *s)
1610 {
1611     /* The AArch64 architecture mandates that (if enabled via PSTATE
1612      * or SCTLR bits) there is a check that SP is 16-aligned on every
1613      * SP-relative load or store (with an exception generated if it is not).
1614      * In line with general QEMU practice regarding misaligned accesses,
1615      * we omit these checks for the sake of guest program performance.
1616      * This function is provided as a hook so we can more easily add these
1617      * checks in future (possibly as a "favour catching guest program bugs
1618      * over speed" user selectable option).
1619      */
1620 }
1621 
1622 /*
1623  * The instruction disassembly implemented here matches
1624  * the instruction encoding classifications in chapter C4
1625  * of the ARM Architecture Reference Manual (DDI0487B_a);
1626  * classification names and decode diagrams here should generally
1627  * match up with those in the manual.
1628  */
1629 
1630 static bool trans_B(DisasContext *s, arg_i *a)
1631 {
1632     reset_btype(s);
1633     gen_goto_tb(s, 0, a->imm);
1634     return true;
1635 }
1636 
1637 static bool trans_BL(DisasContext *s, arg_i *a)
1638 {
1639     gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s));
1640     reset_btype(s);
1641     gen_goto_tb(s, 0, a->imm);
1642     return true;
1643 }
1644 
1645 
1646 static bool trans_CBZ(DisasContext *s, arg_cbz *a)
1647 {
1648     DisasLabel match;
1649     TCGv_i64 tcg_cmp;
1650 
1651     tcg_cmp = read_cpu_reg(s, a->rt, a->sf);
1652     reset_btype(s);
1653 
1654     match = gen_disas_label(s);
1655     tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1656                         tcg_cmp, 0, match.label);
1657     gen_goto_tb(s, 0, 4);
1658     set_disas_label(s, match);
1659     gen_goto_tb(s, 1, a->imm);
1660     return true;
1661 }
1662 
1663 static bool trans_TBZ(DisasContext *s, arg_tbz *a)
1664 {
1665     DisasLabel match;
1666     TCGv_i64 tcg_cmp;
1667 
1668     tcg_cmp = tcg_temp_new_i64();
1669     tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, a->rt), 1ULL << a->bitpos);
1670 
1671     reset_btype(s);
1672 
1673     match = gen_disas_label(s);
1674     tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1675                         tcg_cmp, 0, match.label);
1676     gen_goto_tb(s, 0, 4);
1677     set_disas_label(s, match);
1678     gen_goto_tb(s, 1, a->imm);
1679     return true;
1680 }
1681 
1682 static bool trans_B_cond(DisasContext *s, arg_B_cond *a)
1683 {
1684     /* BC.cond is only present with FEAT_HBC */
1685     if (a->c && !dc_isar_feature(aa64_hbc, s)) {
1686         return false;
1687     }
1688     reset_btype(s);
1689     if (a->cond < 0x0e) {
1690         /* genuinely conditional branches */
1691         DisasLabel match = gen_disas_label(s);
1692         arm_gen_test_cc(a->cond, match.label);
1693         gen_goto_tb(s, 0, 4);
1694         set_disas_label(s, match);
1695         gen_goto_tb(s, 1, a->imm);
1696     } else {
1697         /* 0xe and 0xf are both "always" conditions */
1698         gen_goto_tb(s, 0, a->imm);
1699     }
1700     return true;
1701 }
1702 
1703 static void set_btype_for_br(DisasContext *s, int rn)
1704 {
1705     if (dc_isar_feature(aa64_bti, s)) {
1706         /* BR to {x16,x17} or !guard -> 1, else 3.  */
1707         if (rn == 16 || rn == 17) {
1708             set_btype(s, 1);
1709         } else {
1710             TCGv_i64 pc = tcg_temp_new_i64();
1711             gen_pc_plus_diff(s, pc, 0);
1712             gen_helper_guarded_page_br(tcg_env, pc);
1713             s->btype = -1;
1714         }
1715     }
1716 }
1717 
1718 static void set_btype_for_blr(DisasContext *s)
1719 {
1720     if (dc_isar_feature(aa64_bti, s)) {
1721         /* BLR sets BTYPE to 2, regardless of source guarded page.  */
1722         set_btype(s, 2);
1723     }
1724 }
1725 
1726 static bool trans_BR(DisasContext *s, arg_r *a)
1727 {
1728     set_btype_for_br(s, a->rn);
1729     gen_a64_set_pc(s, cpu_reg(s, a->rn));
1730     s->base.is_jmp = DISAS_JUMP;
1731     return true;
1732 }
1733 
1734 static bool trans_BLR(DisasContext *s, arg_r *a)
1735 {
1736     TCGv_i64 dst = cpu_reg(s, a->rn);
1737     TCGv_i64 lr = cpu_reg(s, 30);
1738     if (dst == lr) {
1739         TCGv_i64 tmp = tcg_temp_new_i64();
1740         tcg_gen_mov_i64(tmp, dst);
1741         dst = tmp;
1742     }
1743     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1744     gen_a64_set_pc(s, dst);
1745     set_btype_for_blr(s);
1746     s->base.is_jmp = DISAS_JUMP;
1747     return true;
1748 }
1749 
1750 static bool trans_RET(DisasContext *s, arg_r *a)
1751 {
1752     gen_a64_set_pc(s, cpu_reg(s, a->rn));
1753     s->base.is_jmp = DISAS_JUMP;
1754     return true;
1755 }
1756 
1757 static TCGv_i64 auth_branch_target(DisasContext *s, TCGv_i64 dst,
1758                                    TCGv_i64 modifier, bool use_key_a)
1759 {
1760     TCGv_i64 truedst;
1761     /*
1762      * Return the branch target for a BRAA/RETA/etc, which is either
1763      * just the destination dst, or that value with the pauth check
1764      * done and the code removed from the high bits.
1765      */
1766     if (!s->pauth_active) {
1767         return dst;
1768     }
1769 
1770     truedst = tcg_temp_new_i64();
1771     if (use_key_a) {
1772         gen_helper_autia_combined(truedst, tcg_env, dst, modifier);
1773     } else {
1774         gen_helper_autib_combined(truedst, tcg_env, dst, modifier);
1775     }
1776     return truedst;
1777 }
1778 
1779 static bool trans_BRAZ(DisasContext *s, arg_braz *a)
1780 {
1781     TCGv_i64 dst;
1782 
1783     if (!dc_isar_feature(aa64_pauth, s)) {
1784         return false;
1785     }
1786 
1787     dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1788     set_btype_for_br(s, a->rn);
1789     gen_a64_set_pc(s, dst);
1790     s->base.is_jmp = DISAS_JUMP;
1791     return true;
1792 }
1793 
1794 static bool trans_BLRAZ(DisasContext *s, arg_braz *a)
1795 {
1796     TCGv_i64 dst, lr;
1797 
1798     if (!dc_isar_feature(aa64_pauth, s)) {
1799         return false;
1800     }
1801 
1802     dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1803     lr = cpu_reg(s, 30);
1804     if (dst == lr) {
1805         TCGv_i64 tmp = tcg_temp_new_i64();
1806         tcg_gen_mov_i64(tmp, dst);
1807         dst = tmp;
1808     }
1809     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1810     gen_a64_set_pc(s, dst);
1811     set_btype_for_blr(s);
1812     s->base.is_jmp = DISAS_JUMP;
1813     return true;
1814 }
1815 
1816 static bool trans_RETA(DisasContext *s, arg_reta *a)
1817 {
1818     TCGv_i64 dst;
1819 
1820     dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m);
1821     gen_a64_set_pc(s, dst);
1822     s->base.is_jmp = DISAS_JUMP;
1823     return true;
1824 }
1825 
1826 static bool trans_BRA(DisasContext *s, arg_bra *a)
1827 {
1828     TCGv_i64 dst;
1829 
1830     if (!dc_isar_feature(aa64_pauth, s)) {
1831         return false;
1832     }
1833     dst = auth_branch_target(s, cpu_reg(s,a->rn), cpu_reg_sp(s, a->rm), !a->m);
1834     gen_a64_set_pc(s, dst);
1835     set_btype_for_br(s, a->rn);
1836     s->base.is_jmp = DISAS_JUMP;
1837     return true;
1838 }
1839 
1840 static bool trans_BLRA(DisasContext *s, arg_bra *a)
1841 {
1842     TCGv_i64 dst, lr;
1843 
1844     if (!dc_isar_feature(aa64_pauth, s)) {
1845         return false;
1846     }
1847     dst = auth_branch_target(s, cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm), !a->m);
1848     lr = cpu_reg(s, 30);
1849     if (dst == lr) {
1850         TCGv_i64 tmp = tcg_temp_new_i64();
1851         tcg_gen_mov_i64(tmp, dst);
1852         dst = tmp;
1853     }
1854     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1855     gen_a64_set_pc(s, dst);
1856     set_btype_for_blr(s);
1857     s->base.is_jmp = DISAS_JUMP;
1858     return true;
1859 }
1860 
1861 static bool trans_ERET(DisasContext *s, arg_ERET *a)
1862 {
1863     TCGv_i64 dst;
1864 
1865     if (s->current_el == 0) {
1866         return false;
1867     }
1868     if (s->trap_eret) {
1869         gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(0), 2);
1870         return true;
1871     }
1872     dst = tcg_temp_new_i64();
1873     tcg_gen_ld_i64(dst, tcg_env,
1874                    offsetof(CPUARMState, elr_el[s->current_el]));
1875 
1876     translator_io_start(&s->base);
1877 
1878     gen_helper_exception_return(tcg_env, dst);
1879     /* Must exit loop to check un-masked IRQs */
1880     s->base.is_jmp = DISAS_EXIT;
1881     return true;
1882 }
1883 
1884 static bool trans_ERETA(DisasContext *s, arg_reta *a)
1885 {
1886     TCGv_i64 dst;
1887 
1888     if (!dc_isar_feature(aa64_pauth, s)) {
1889         return false;
1890     }
1891     if (s->current_el == 0) {
1892         return false;
1893     }
1894     /* The FGT trap takes precedence over an auth trap. */
1895     if (s->trap_eret) {
1896         gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(a->m ? 3 : 2), 2);
1897         return true;
1898     }
1899     dst = tcg_temp_new_i64();
1900     tcg_gen_ld_i64(dst, tcg_env,
1901                    offsetof(CPUARMState, elr_el[s->current_el]));
1902 
1903     dst = auth_branch_target(s, dst, cpu_X[31], !a->m);
1904 
1905     translator_io_start(&s->base);
1906 
1907     gen_helper_exception_return(tcg_env, dst);
1908     /* Must exit loop to check un-masked IRQs */
1909     s->base.is_jmp = DISAS_EXIT;
1910     return true;
1911 }
1912 
1913 static bool trans_NOP(DisasContext *s, arg_NOP *a)
1914 {
1915     return true;
1916 }
1917 
1918 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
1919 {
1920     /*
1921      * When running in MTTCG we don't generate jumps to the yield and
1922      * WFE helpers as it won't affect the scheduling of other vCPUs.
1923      * If we wanted to more completely model WFE/SEV so we don't busy
1924      * spin unnecessarily we would need to do something more involved.
1925      */
1926     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1927         s->base.is_jmp = DISAS_YIELD;
1928     }
1929     return true;
1930 }
1931 
1932 static bool trans_WFI(DisasContext *s, arg_WFI *a)
1933 {
1934     s->base.is_jmp = DISAS_WFI;
1935     return true;
1936 }
1937 
1938 static bool trans_WFE(DisasContext *s, arg_WFI *a)
1939 {
1940     /*
1941      * When running in MTTCG we don't generate jumps to the yield and
1942      * WFE helpers as it won't affect the scheduling of other vCPUs.
1943      * If we wanted to more completely model WFE/SEV so we don't busy
1944      * spin unnecessarily we would need to do something more involved.
1945      */
1946     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1947         s->base.is_jmp = DISAS_WFE;
1948     }
1949     return true;
1950 }
1951 
1952 static bool trans_WFIT(DisasContext *s, arg_WFIT *a)
1953 {
1954     if (!dc_isar_feature(aa64_wfxt, s)) {
1955         return false;
1956     }
1957 
1958     /*
1959      * Because we need to pass the register value to the helper,
1960      * it's easier to emit the code now, unlike trans_WFI which
1961      * defers it to aarch64_tr_tb_stop(). That means we need to
1962      * check ss_active so that single-stepping a WFIT doesn't halt.
1963      */
1964     if (s->ss_active) {
1965         /* Act like a NOP under architectural singlestep */
1966         return true;
1967     }
1968 
1969     gen_a64_update_pc(s, 4);
1970     gen_helper_wfit(tcg_env, cpu_reg(s, a->rd));
1971     /* Go back to the main loop to check for interrupts */
1972     s->base.is_jmp = DISAS_EXIT;
1973     return true;
1974 }
1975 
1976 static bool trans_WFET(DisasContext *s, arg_WFET *a)
1977 {
1978     if (!dc_isar_feature(aa64_wfxt, s)) {
1979         return false;
1980     }
1981 
1982     /*
1983      * We rely here on our WFE implementation being a NOP, so we
1984      * don't need to do anything different to handle the WFET timeout
1985      * from what trans_WFE does.
1986      */
1987     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1988         s->base.is_jmp = DISAS_WFE;
1989     }
1990     return true;
1991 }
1992 
1993 static bool trans_XPACLRI(DisasContext *s, arg_XPACLRI *a)
1994 {
1995     if (s->pauth_active) {
1996         gen_helper_xpaci(cpu_X[30], tcg_env, cpu_X[30]);
1997     }
1998     return true;
1999 }
2000 
2001 static bool trans_PACIA1716(DisasContext *s, arg_PACIA1716 *a)
2002 {
2003     if (s->pauth_active) {
2004         gen_helper_pacia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
2005     }
2006     return true;
2007 }
2008 
2009 static bool trans_PACIB1716(DisasContext *s, arg_PACIB1716 *a)
2010 {
2011     if (s->pauth_active) {
2012         gen_helper_pacib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
2013     }
2014     return true;
2015 }
2016 
2017 static bool trans_AUTIA1716(DisasContext *s, arg_AUTIA1716 *a)
2018 {
2019     if (s->pauth_active) {
2020         gen_helper_autia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
2021     }
2022     return true;
2023 }
2024 
2025 static bool trans_AUTIB1716(DisasContext *s, arg_AUTIB1716 *a)
2026 {
2027     if (s->pauth_active) {
2028         gen_helper_autib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
2029     }
2030     return true;
2031 }
2032 
2033 static bool trans_ESB(DisasContext *s, arg_ESB *a)
2034 {
2035     /* Without RAS, we must implement this as NOP. */
2036     if (dc_isar_feature(aa64_ras, s)) {
2037         /*
2038          * QEMU does not have a source of physical SErrors,
2039          * so we are only concerned with virtual SErrors.
2040          * The pseudocode in the ARM for this case is
2041          *   if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
2042          *      AArch64.vESBOperation();
2043          * Most of the condition can be evaluated at translation time.
2044          * Test for EL2 present, and defer test for SEL2 to runtime.
2045          */
2046         if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
2047             gen_helper_vesb(tcg_env);
2048         }
2049     }
2050     return true;
2051 }
2052 
2053 static bool trans_PACIAZ(DisasContext *s, arg_PACIAZ *a)
2054 {
2055     if (s->pauth_active) {
2056         gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
2057     }
2058     return true;
2059 }
2060 
2061 static bool trans_PACIASP(DisasContext *s, arg_PACIASP *a)
2062 {
2063     if (s->pauth_active) {
2064         gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
2065     }
2066     return true;
2067 }
2068 
2069 static bool trans_PACIBZ(DisasContext *s, arg_PACIBZ *a)
2070 {
2071     if (s->pauth_active) {
2072         gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
2073     }
2074     return true;
2075 }
2076 
2077 static bool trans_PACIBSP(DisasContext *s, arg_PACIBSP *a)
2078 {
2079     if (s->pauth_active) {
2080         gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
2081     }
2082     return true;
2083 }
2084 
2085 static bool trans_AUTIAZ(DisasContext *s, arg_AUTIAZ *a)
2086 {
2087     if (s->pauth_active) {
2088         gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
2089     }
2090     return true;
2091 }
2092 
2093 static bool trans_AUTIASP(DisasContext *s, arg_AUTIASP *a)
2094 {
2095     if (s->pauth_active) {
2096         gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
2097     }
2098     return true;
2099 }
2100 
2101 static bool trans_AUTIBZ(DisasContext *s, arg_AUTIBZ *a)
2102 {
2103     if (s->pauth_active) {
2104         gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
2105     }
2106     return true;
2107 }
2108 
2109 static bool trans_AUTIBSP(DisasContext *s, arg_AUTIBSP *a)
2110 {
2111     if (s->pauth_active) {
2112         gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
2113     }
2114     return true;
2115 }
2116 
2117 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
2118 {
2119     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
2120     return true;
2121 }
2122 
2123 static bool trans_DSB_DMB(DisasContext *s, arg_DSB_DMB *a)
2124 {
2125     /* We handle DSB and DMB the same way */
2126     TCGBar bar;
2127 
2128     switch (a->types) {
2129     case 1: /* MBReqTypes_Reads */
2130         bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
2131         break;
2132     case 2: /* MBReqTypes_Writes */
2133         bar = TCG_BAR_SC | TCG_MO_ST_ST;
2134         break;
2135     default: /* MBReqTypes_All */
2136         bar = TCG_BAR_SC | TCG_MO_ALL;
2137         break;
2138     }
2139     tcg_gen_mb(bar);
2140     return true;
2141 }
2142 
2143 static bool trans_DSB_nXS(DisasContext *s, arg_DSB_nXS *a)
2144 {
2145     if (!dc_isar_feature(aa64_xs, s)) {
2146         return false;
2147     }
2148     tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL);
2149     return true;
2150 }
2151 
2152 static bool trans_ISB(DisasContext *s, arg_ISB *a)
2153 {
2154     /*
2155      * We need to break the TB after this insn to execute
2156      * self-modifying code correctly and also to take
2157      * any pending interrupts immediately.
2158      */
2159     reset_btype(s);
2160     gen_goto_tb(s, 0, 4);
2161     return true;
2162 }
2163 
2164 static bool trans_SB(DisasContext *s, arg_SB *a)
2165 {
2166     if (!dc_isar_feature(aa64_sb, s)) {
2167         return false;
2168     }
2169     /*
2170      * TODO: There is no speculation barrier opcode for TCG;
2171      * MB and end the TB instead.
2172      */
2173     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
2174     gen_goto_tb(s, 0, 4);
2175     return true;
2176 }
2177 
2178 static bool trans_CFINV(DisasContext *s, arg_CFINV *a)
2179 {
2180     if (!dc_isar_feature(aa64_condm_4, s)) {
2181         return false;
2182     }
2183     tcg_gen_xori_i32(cpu_CF, cpu_CF, 1);
2184     return true;
2185 }
2186 
2187 static bool trans_XAFLAG(DisasContext *s, arg_XAFLAG *a)
2188 {
2189     TCGv_i32 z;
2190 
2191     if (!dc_isar_feature(aa64_condm_5, s)) {
2192         return false;
2193     }
2194 
2195     z = tcg_temp_new_i32();
2196 
2197     tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0);
2198 
2199     /*
2200      * (!C & !Z) << 31
2201      * (!(C | Z)) << 31
2202      * ~((C | Z) << 31)
2203      * ~-(C | Z)
2204      * (C | Z) - 1
2205      */
2206     tcg_gen_or_i32(cpu_NF, cpu_CF, z);
2207     tcg_gen_subi_i32(cpu_NF, cpu_NF, 1);
2208 
2209     /* !(Z & C) */
2210     tcg_gen_and_i32(cpu_ZF, z, cpu_CF);
2211     tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1);
2212 
2213     /* (!C & Z) << 31 -> -(Z & ~C) */
2214     tcg_gen_andc_i32(cpu_VF, z, cpu_CF);
2215     tcg_gen_neg_i32(cpu_VF, cpu_VF);
2216 
2217     /* C | Z */
2218     tcg_gen_or_i32(cpu_CF, cpu_CF, z);
2219 
2220     return true;
2221 }
2222 
2223 static bool trans_AXFLAG(DisasContext *s, arg_AXFLAG *a)
2224 {
2225     if (!dc_isar_feature(aa64_condm_5, s)) {
2226         return false;
2227     }
2228 
2229     tcg_gen_sari_i32(cpu_VF, cpu_VF, 31);         /* V ? -1 : 0 */
2230     tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF);     /* C & !V */
2231 
2232     /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */
2233     tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF);
2234 
2235     tcg_gen_movi_i32(cpu_NF, 0);
2236     tcg_gen_movi_i32(cpu_VF, 0);
2237 
2238     return true;
2239 }
2240 
2241 static bool trans_MSR_i_UAO(DisasContext *s, arg_i *a)
2242 {
2243     if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) {
2244         return false;
2245     }
2246     if (a->imm & 1) {
2247         set_pstate_bits(PSTATE_UAO);
2248     } else {
2249         clear_pstate_bits(PSTATE_UAO);
2250     }
2251     gen_rebuild_hflags(s);
2252     s->base.is_jmp = DISAS_TOO_MANY;
2253     return true;
2254 }
2255 
2256 static bool trans_MSR_i_PAN(DisasContext *s, arg_i *a)
2257 {
2258     if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) {
2259         return false;
2260     }
2261     if (a->imm & 1) {
2262         set_pstate_bits(PSTATE_PAN);
2263     } else {
2264         clear_pstate_bits(PSTATE_PAN);
2265     }
2266     gen_rebuild_hflags(s);
2267     s->base.is_jmp = DISAS_TOO_MANY;
2268     return true;
2269 }
2270 
2271 static bool trans_MSR_i_SPSEL(DisasContext *s, arg_i *a)
2272 {
2273     if (s->current_el == 0) {
2274         return false;
2275     }
2276     gen_helper_msr_i_spsel(tcg_env, tcg_constant_i32(a->imm & PSTATE_SP));
2277     s->base.is_jmp = DISAS_TOO_MANY;
2278     return true;
2279 }
2280 
2281 static bool trans_MSR_i_SBSS(DisasContext *s, arg_i *a)
2282 {
2283     if (!dc_isar_feature(aa64_ssbs, s)) {
2284         return false;
2285     }
2286     if (a->imm & 1) {
2287         set_pstate_bits(PSTATE_SSBS);
2288     } else {
2289         clear_pstate_bits(PSTATE_SSBS);
2290     }
2291     /* Don't need to rebuild hflags since SSBS is a nop */
2292     s->base.is_jmp = DISAS_TOO_MANY;
2293     return true;
2294 }
2295 
2296 static bool trans_MSR_i_DIT(DisasContext *s, arg_i *a)
2297 {
2298     if (!dc_isar_feature(aa64_dit, s)) {
2299         return false;
2300     }
2301     if (a->imm & 1) {
2302         set_pstate_bits(PSTATE_DIT);
2303     } else {
2304         clear_pstate_bits(PSTATE_DIT);
2305     }
2306     /* There's no need to rebuild hflags because DIT is a nop */
2307     s->base.is_jmp = DISAS_TOO_MANY;
2308     return true;
2309 }
2310 
2311 static bool trans_MSR_i_TCO(DisasContext *s, arg_i *a)
2312 {
2313     if (dc_isar_feature(aa64_mte, s)) {
2314         /* Full MTE is enabled -- set the TCO bit as directed. */
2315         if (a->imm & 1) {
2316             set_pstate_bits(PSTATE_TCO);
2317         } else {
2318             clear_pstate_bits(PSTATE_TCO);
2319         }
2320         gen_rebuild_hflags(s);
2321         /* Many factors, including TCO, go into MTE_ACTIVE. */
2322         s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
2323         return true;
2324     } else if (dc_isar_feature(aa64_mte_insn_reg, s)) {
2325         /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI.  */
2326         return true;
2327     } else {
2328         /* Insn not present */
2329         return false;
2330     }
2331 }
2332 
2333 static bool trans_MSR_i_DAIFSET(DisasContext *s, arg_i *a)
2334 {
2335     gen_helper_msr_i_daifset(tcg_env, tcg_constant_i32(a->imm));
2336     s->base.is_jmp = DISAS_TOO_MANY;
2337     return true;
2338 }
2339 
2340 static bool trans_MSR_i_DAIFCLEAR(DisasContext *s, arg_i *a)
2341 {
2342     gen_helper_msr_i_daifclear(tcg_env, tcg_constant_i32(a->imm));
2343     /* Exit the cpu loop to re-evaluate pending IRQs. */
2344     s->base.is_jmp = DISAS_UPDATE_EXIT;
2345     return true;
2346 }
2347 
2348 static bool trans_MSR_i_ALLINT(DisasContext *s, arg_i *a)
2349 {
2350     if (!dc_isar_feature(aa64_nmi, s) || s->current_el == 0) {
2351         return false;
2352     }
2353 
2354     if (a->imm == 0) {
2355         clear_pstate_bits(PSTATE_ALLINT);
2356     } else if (s->current_el > 1) {
2357         set_pstate_bits(PSTATE_ALLINT);
2358     } else {
2359         gen_helper_msr_set_allint_el1(tcg_env);
2360     }
2361 
2362     /* Exit the cpu loop to re-evaluate pending IRQs. */
2363     s->base.is_jmp = DISAS_UPDATE_EXIT;
2364     return true;
2365 }
2366 
2367 static bool trans_MSR_i_SVCR(DisasContext *s, arg_MSR_i_SVCR *a)
2368 {
2369     if (!dc_isar_feature(aa64_sme, s) || a->mask == 0) {
2370         return false;
2371     }
2372     if (sme_access_check(s)) {
2373         int old = s->pstate_sm | (s->pstate_za << 1);
2374         int new = a->imm * 3;
2375 
2376         if ((old ^ new) & a->mask) {
2377             /* At least one bit changes. */
2378             gen_helper_set_svcr(tcg_env, tcg_constant_i32(new),
2379                                 tcg_constant_i32(a->mask));
2380             s->base.is_jmp = DISAS_TOO_MANY;
2381         }
2382     }
2383     return true;
2384 }
2385 
2386 static void gen_get_nzcv(TCGv_i64 tcg_rt)
2387 {
2388     TCGv_i32 tmp = tcg_temp_new_i32();
2389     TCGv_i32 nzcv = tcg_temp_new_i32();
2390 
2391     /* build bit 31, N */
2392     tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
2393     /* build bit 30, Z */
2394     tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
2395     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
2396     /* build bit 29, C */
2397     tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
2398     /* build bit 28, V */
2399     tcg_gen_shri_i32(tmp, cpu_VF, 31);
2400     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
2401     /* generate result */
2402     tcg_gen_extu_i32_i64(tcg_rt, nzcv);
2403 }
2404 
2405 static void gen_set_nzcv(TCGv_i64 tcg_rt)
2406 {
2407     TCGv_i32 nzcv = tcg_temp_new_i32();
2408 
2409     /* take NZCV from R[t] */
2410     tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
2411 
2412     /* bit 31, N */
2413     tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
2414     /* bit 30, Z */
2415     tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
2416     tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
2417     /* bit 29, C */
2418     tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
2419     tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
2420     /* bit 28, V */
2421     tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
2422     tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
2423 }
2424 
2425 static void gen_sysreg_undef(DisasContext *s, bool isread,
2426                              uint8_t op0, uint8_t op1, uint8_t op2,
2427                              uint8_t crn, uint8_t crm, uint8_t rt)
2428 {
2429     /*
2430      * Generate code to emit an UNDEF with correct syndrome
2431      * information for a failed system register access.
2432      * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases,
2433      * but if FEAT_IDST is implemented then read accesses to registers
2434      * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP
2435      * syndrome.
2436      */
2437     uint32_t syndrome;
2438 
2439     if (isread && dc_isar_feature(aa64_ids, s) &&
2440         arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) {
2441         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2442     } else {
2443         syndrome = syn_uncategorized();
2444     }
2445     gen_exception_insn(s, 0, EXCP_UDEF, syndrome);
2446 }
2447 
2448 /* MRS - move from system register
2449  * MSR (register) - move to system register
2450  * SYS
2451  * SYSL
2452  * These are all essentially the same insn in 'read' and 'write'
2453  * versions, with varying op0 fields.
2454  */
2455 static void handle_sys(DisasContext *s, bool isread,
2456                        unsigned int op0, unsigned int op1, unsigned int op2,
2457                        unsigned int crn, unsigned int crm, unsigned int rt)
2458 {
2459     uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
2460                                       crn, crm, op0, op1, op2);
2461     const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
2462     bool need_exit_tb = false;
2463     bool nv_trap_to_el2 = false;
2464     bool nv_redirect_reg = false;
2465     bool skip_fp_access_checks = false;
2466     bool nv2_mem_redirect = false;
2467     TCGv_ptr tcg_ri = NULL;
2468     TCGv_i64 tcg_rt;
2469     uint32_t syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2470 
2471     if (crn == 11 || crn == 15) {
2472         /*
2473          * Check for TIDCP trap, which must take precedence over
2474          * the UNDEF for "no such register" etc.
2475          */
2476         switch (s->current_el) {
2477         case 0:
2478             if (dc_isar_feature(aa64_tidcp1, s)) {
2479                 gen_helper_tidcp_el0(tcg_env, tcg_constant_i32(syndrome));
2480             }
2481             break;
2482         case 1:
2483             gen_helper_tidcp_el1(tcg_env, tcg_constant_i32(syndrome));
2484             break;
2485         }
2486     }
2487 
2488     if (!ri) {
2489         /* Unknown register; this might be a guest error or a QEMU
2490          * unimplemented feature.
2491          */
2492         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
2493                       "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
2494                       isread ? "read" : "write", op0, op1, crn, crm, op2);
2495         gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2496         return;
2497     }
2498 
2499     if (s->nv2 && ri->nv2_redirect_offset) {
2500         /*
2501          * Some registers always redirect to memory; some only do so if
2502          * HCR_EL2.NV1 is 0, and some only if NV1 is 1 (these come in
2503          * pairs which share an offset; see the table in R_CSRPQ).
2504          */
2505         if (ri->nv2_redirect_offset & NV2_REDIR_NV1) {
2506             nv2_mem_redirect = s->nv1;
2507         } else if (ri->nv2_redirect_offset & NV2_REDIR_NO_NV1) {
2508             nv2_mem_redirect = !s->nv1;
2509         } else {
2510             nv2_mem_redirect = true;
2511         }
2512     }
2513 
2514     /* Check access permissions */
2515     if (!cp_access_ok(s->current_el, ri, isread)) {
2516         /*
2517          * FEAT_NV/NV2 handling does not do the usual FP access checks
2518          * for registers only accessible at EL2 (though it *does* do them
2519          * for registers accessible at EL1).
2520          */
2521         skip_fp_access_checks = true;
2522         if (s->nv2 && (ri->type & ARM_CP_NV2_REDIRECT)) {
2523             /*
2524              * This is one of the few EL2 registers which should redirect
2525              * to the equivalent EL1 register. We do that after running
2526              * the EL2 register's accessfn.
2527              */
2528             nv_redirect_reg = true;
2529             assert(!nv2_mem_redirect);
2530         } else if (nv2_mem_redirect) {
2531             /*
2532              * NV2 redirect-to-memory takes precedence over trap to EL2 or
2533              * UNDEF to EL1.
2534              */
2535         } else if (s->nv && arm_cpreg_traps_in_nv(ri)) {
2536             /*
2537              * This register / instruction exists and is an EL2 register, so
2538              * we must trap to EL2 if accessed in nested virtualization EL1
2539              * instead of UNDEFing. We'll do that after the usual access checks.
2540              * (This makes a difference only for a couple of registers like
2541              * VSTTBR_EL2 where the "UNDEF if NonSecure" should take priority
2542              * over the trap-to-EL2. Most trapped-by-FEAT_NV registers have
2543              * an accessfn which does nothing when called from EL1, because
2544              * the trap-to-EL3 controls which would apply to that register
2545              * at EL2 don't take priority over the FEAT_NV trap-to-EL2.)
2546              */
2547             nv_trap_to_el2 = true;
2548         } else {
2549             gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2550             return;
2551         }
2552     }
2553 
2554     if (ri->accessfn || (ri->fgt && s->fgt_active)) {
2555         /* Emit code to perform further access permissions checks at
2556          * runtime; this may result in an exception.
2557          */
2558         gen_a64_update_pc(s, 0);
2559         tcg_ri = tcg_temp_new_ptr();
2560         gen_helper_access_check_cp_reg(tcg_ri, tcg_env,
2561                                        tcg_constant_i32(key),
2562                                        tcg_constant_i32(syndrome),
2563                                        tcg_constant_i32(isread));
2564     } else if (ri->type & ARM_CP_RAISES_EXC) {
2565         /*
2566          * The readfn or writefn might raise an exception;
2567          * synchronize the CPU state in case it does.
2568          */
2569         gen_a64_update_pc(s, 0);
2570     }
2571 
2572     if (!skip_fp_access_checks) {
2573         if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) {
2574             return;
2575         } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
2576             return;
2577         } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) {
2578             return;
2579         }
2580     }
2581 
2582     if (nv_trap_to_el2) {
2583         gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
2584         return;
2585     }
2586 
2587     if (nv_redirect_reg) {
2588         /*
2589          * FEAT_NV2 redirection of an EL2 register to an EL1 register.
2590          * Conveniently in all cases the encoding of the EL1 register is
2591          * identical to the EL2 register except that opc1 is 0.
2592          * Get the reginfo for the EL1 register to use for the actual access.
2593          * We don't use the EL1 register's access function, and
2594          * fine-grained-traps on EL1 also do not apply here.
2595          */
2596         key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
2597                                  crn, crm, op0, 0, op2);
2598         ri = get_arm_cp_reginfo(s->cp_regs, key);
2599         assert(ri);
2600         assert(cp_access_ok(s->current_el, ri, isread));
2601         /*
2602          * We might not have done an update_pc earlier, so check we don't
2603          * need it. We could support this in future if necessary.
2604          */
2605         assert(!(ri->type & ARM_CP_RAISES_EXC));
2606     }
2607 
2608     if (nv2_mem_redirect) {
2609         /*
2610          * This system register is being redirected into an EL2 memory access.
2611          * This means it is not an IO operation, doesn't change hflags,
2612          * and need not end the TB, because it has no side effects.
2613          *
2614          * The access is 64-bit single copy atomic, guaranteed aligned because
2615          * of the definition of VCNR_EL2. Its endianness depends on
2616          * SCTLR_EL2.EE, not on the data endianness of EL1.
2617          * It is done under either the EL2 translation regime or the EL2&0
2618          * translation regime, depending on HCR_EL2.E2H. It behaves as if
2619          * PSTATE.PAN is 0.
2620          */
2621         TCGv_i64 ptr = tcg_temp_new_i64();
2622         MemOp mop = MO_64 | MO_ALIGN | MO_ATOM_IFALIGN;
2623         ARMMMUIdx armmemidx = s->nv2_mem_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_E2;
2624         int memidx = arm_to_core_mmu_idx(armmemidx);
2625         uint32_t syn;
2626 
2627         mop |= (s->nv2_mem_be ? MO_BE : MO_LE);
2628 
2629         tcg_gen_ld_i64(ptr, tcg_env, offsetof(CPUARMState, cp15.vncr_el2));
2630         tcg_gen_addi_i64(ptr, ptr,
2631                          (ri->nv2_redirect_offset & ~NV2_REDIR_FLAG_MASK));
2632         tcg_rt = cpu_reg(s, rt);
2633 
2634         syn = syn_data_abort_vncr(0, !isread, 0);
2635         disas_set_insn_syndrome(s, syn);
2636         if (isread) {
2637             tcg_gen_qemu_ld_i64(tcg_rt, ptr, memidx, mop);
2638         } else {
2639             tcg_gen_qemu_st_i64(tcg_rt, ptr, memidx, mop);
2640         }
2641         return;
2642     }
2643 
2644     /* Handle special cases first */
2645     switch (ri->type & ARM_CP_SPECIAL_MASK) {
2646     case 0:
2647         break;
2648     case ARM_CP_NOP:
2649         return;
2650     case ARM_CP_NZCV:
2651         tcg_rt = cpu_reg(s, rt);
2652         if (isread) {
2653             gen_get_nzcv(tcg_rt);
2654         } else {
2655             gen_set_nzcv(tcg_rt);
2656         }
2657         return;
2658     case ARM_CP_CURRENTEL:
2659     {
2660         /*
2661          * Reads as current EL value from pstate, which is
2662          * guaranteed to be constant by the tb flags.
2663          * For nested virt we should report EL2.
2664          */
2665         int el = s->nv ? 2 : s->current_el;
2666         tcg_rt = cpu_reg(s, rt);
2667         tcg_gen_movi_i64(tcg_rt, el << 2);
2668         return;
2669     }
2670     case ARM_CP_DC_ZVA:
2671         /* Writes clear the aligned block of memory which rt points into. */
2672         if (s->mte_active[0]) {
2673             int desc = 0;
2674 
2675             desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
2676             desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
2677             desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
2678 
2679             tcg_rt = tcg_temp_new_i64();
2680             gen_helper_mte_check_zva(tcg_rt, tcg_env,
2681                                      tcg_constant_i32(desc), cpu_reg(s, rt));
2682         } else {
2683             tcg_rt = clean_data_tbi(s, cpu_reg(s, rt));
2684         }
2685         gen_helper_dc_zva(tcg_env, tcg_rt);
2686         return;
2687     case ARM_CP_DC_GVA:
2688         {
2689             TCGv_i64 clean_addr, tag;
2690 
2691             /*
2692              * DC_GVA, like DC_ZVA, requires that we supply the original
2693              * pointer for an invalid page.  Probe that address first.
2694              */
2695             tcg_rt = cpu_reg(s, rt);
2696             clean_addr = clean_data_tbi(s, tcg_rt);
2697             gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8);
2698 
2699             if (s->ata[0]) {
2700                 /* Extract the tag from the register to match STZGM.  */
2701                 tag = tcg_temp_new_i64();
2702                 tcg_gen_shri_i64(tag, tcg_rt, 56);
2703                 gen_helper_stzgm_tags(tcg_env, clean_addr, tag);
2704             }
2705         }
2706         return;
2707     case ARM_CP_DC_GZVA:
2708         {
2709             TCGv_i64 clean_addr, tag;
2710 
2711             /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */
2712             tcg_rt = cpu_reg(s, rt);
2713             clean_addr = clean_data_tbi(s, tcg_rt);
2714             gen_helper_dc_zva(tcg_env, clean_addr);
2715 
2716             if (s->ata[0]) {
2717                 /* Extract the tag from the register to match STZGM.  */
2718                 tag = tcg_temp_new_i64();
2719                 tcg_gen_shri_i64(tag, tcg_rt, 56);
2720                 gen_helper_stzgm_tags(tcg_env, clean_addr, tag);
2721             }
2722         }
2723         return;
2724     default:
2725         g_assert_not_reached();
2726     }
2727 
2728     if (ri->type & ARM_CP_IO) {
2729         /* I/O operations must end the TB here (whether read or write) */
2730         need_exit_tb = translator_io_start(&s->base);
2731     }
2732 
2733     tcg_rt = cpu_reg(s, rt);
2734 
2735     if (isread) {
2736         if (ri->type & ARM_CP_CONST) {
2737             tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
2738         } else if (ri->readfn) {
2739             if (!tcg_ri) {
2740                 tcg_ri = gen_lookup_cp_reg(key);
2741             }
2742             gen_helper_get_cp_reg64(tcg_rt, tcg_env, tcg_ri);
2743         } else {
2744             tcg_gen_ld_i64(tcg_rt, tcg_env, ri->fieldoffset);
2745         }
2746     } else {
2747         if (ri->type & ARM_CP_CONST) {
2748             /* If not forbidden by access permissions, treat as WI */
2749             return;
2750         } else if (ri->writefn) {
2751             if (!tcg_ri) {
2752                 tcg_ri = gen_lookup_cp_reg(key);
2753             }
2754             gen_helper_set_cp_reg64(tcg_env, tcg_ri, tcg_rt);
2755         } else {
2756             tcg_gen_st_i64(tcg_rt, tcg_env, ri->fieldoffset);
2757         }
2758     }
2759 
2760     if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
2761         /*
2762          * A write to any coprocessor register that ends a TB
2763          * must rebuild the hflags for the next TB.
2764          */
2765         gen_rebuild_hflags(s);
2766         /*
2767          * We default to ending the TB on a coprocessor register write,
2768          * but allow this to be suppressed by the register definition
2769          * (usually only necessary to work around guest bugs).
2770          */
2771         need_exit_tb = true;
2772     }
2773     if (need_exit_tb) {
2774         s->base.is_jmp = DISAS_UPDATE_EXIT;
2775     }
2776 }
2777 
2778 static bool trans_SYS(DisasContext *s, arg_SYS *a)
2779 {
2780     handle_sys(s, a->l, a->op0, a->op1, a->op2, a->crn, a->crm, a->rt);
2781     return true;
2782 }
2783 
2784 static bool trans_SVC(DisasContext *s, arg_i *a)
2785 {
2786     /*
2787      * For SVC, HVC and SMC we advance the single-step state
2788      * machine before taking the exception. This is architecturally
2789      * mandated, to ensure that single-stepping a system call
2790      * instruction works properly.
2791      */
2792     uint32_t syndrome = syn_aa64_svc(a->imm);
2793     if (s->fgt_svc) {
2794         gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
2795         return true;
2796     }
2797     gen_ss_advance(s);
2798     gen_exception_insn(s, 4, EXCP_SWI, syndrome);
2799     return true;
2800 }
2801 
2802 static bool trans_HVC(DisasContext *s, arg_i *a)
2803 {
2804     int target_el = s->current_el == 3 ? 3 : 2;
2805 
2806     if (s->current_el == 0) {
2807         unallocated_encoding(s);
2808         return true;
2809     }
2810     /*
2811      * The pre HVC helper handles cases when HVC gets trapped
2812      * as an undefined insn by runtime configuration.
2813      */
2814     gen_a64_update_pc(s, 0);
2815     gen_helper_pre_hvc(tcg_env);
2816     /* Architecture requires ss advance before we do the actual work */
2817     gen_ss_advance(s);
2818     gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(a->imm), target_el);
2819     return true;
2820 }
2821 
2822 static bool trans_SMC(DisasContext *s, arg_i *a)
2823 {
2824     if (s->current_el == 0) {
2825         unallocated_encoding(s);
2826         return true;
2827     }
2828     gen_a64_update_pc(s, 0);
2829     gen_helper_pre_smc(tcg_env, tcg_constant_i32(syn_aa64_smc(a->imm)));
2830     /* Architecture requires ss advance before we do the actual work */
2831     gen_ss_advance(s);
2832     gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(a->imm), 3);
2833     return true;
2834 }
2835 
2836 static bool trans_BRK(DisasContext *s, arg_i *a)
2837 {
2838     gen_exception_bkpt_insn(s, syn_aa64_bkpt(a->imm));
2839     return true;
2840 }
2841 
2842 static bool trans_HLT(DisasContext *s, arg_i *a)
2843 {
2844     /*
2845      * HLT. This has two purposes.
2846      * Architecturally, it is an external halting debug instruction.
2847      * Since QEMU doesn't implement external debug, we treat this as
2848      * it is required for halting debug disabled: it will UNDEF.
2849      * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
2850      */
2851     if (semihosting_enabled(s->current_el == 0) && a->imm == 0xf000) {
2852         gen_exception_internal_insn(s, EXCP_SEMIHOST);
2853     } else {
2854         unallocated_encoding(s);
2855     }
2856     return true;
2857 }
2858 
2859 /*
2860  * Load/Store exclusive instructions are implemented by remembering
2861  * the value/address loaded, and seeing if these are the same
2862  * when the store is performed. This is not actually the architecturally
2863  * mandated semantics, but it works for typical guest code sequences
2864  * and avoids having to monitor regular stores.
2865  *
2866  * The store exclusive uses the atomic cmpxchg primitives to avoid
2867  * races in multi-threaded linux-user and when MTTCG softmmu is
2868  * enabled.
2869  */
2870 static void gen_load_exclusive(DisasContext *s, int rt, int rt2, int rn,
2871                                int size, bool is_pair)
2872 {
2873     int idx = get_mem_index(s);
2874     TCGv_i64 dirty_addr, clean_addr;
2875     MemOp memop = check_atomic_align(s, rn, size + is_pair);
2876 
2877     s->is_ldex = true;
2878     dirty_addr = cpu_reg_sp(s, rn);
2879     clean_addr = gen_mte_check1(s, dirty_addr, false, rn != 31, memop);
2880 
2881     g_assert(size <= 3);
2882     if (is_pair) {
2883         g_assert(size >= 2);
2884         if (size == 2) {
2885             tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
2886             if (s->be_data == MO_LE) {
2887                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
2888                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
2889             } else {
2890                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
2891                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
2892             }
2893         } else {
2894             TCGv_i128 t16 = tcg_temp_new_i128();
2895 
2896             tcg_gen_qemu_ld_i128(t16, clean_addr, idx, memop);
2897 
2898             if (s->be_data == MO_LE) {
2899                 tcg_gen_extr_i128_i64(cpu_exclusive_val,
2900                                       cpu_exclusive_high, t16);
2901             } else {
2902                 tcg_gen_extr_i128_i64(cpu_exclusive_high,
2903                                       cpu_exclusive_val, t16);
2904             }
2905             tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2906             tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
2907         }
2908     } else {
2909         tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
2910         tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2911     }
2912     tcg_gen_mov_i64(cpu_exclusive_addr, clean_addr);
2913 }
2914 
2915 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
2916                                 int rn, int size, int is_pair)
2917 {
2918     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
2919      *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
2920      *     [addr] = {Rt};
2921      *     if (is_pair) {
2922      *         [addr + datasize] = {Rt2};
2923      *     }
2924      *     {Rd} = 0;
2925      * } else {
2926      *     {Rd} = 1;
2927      * }
2928      * env->exclusive_addr = -1;
2929      */
2930     TCGLabel *fail_label = gen_new_label();
2931     TCGLabel *done_label = gen_new_label();
2932     TCGv_i64 tmp, clean_addr;
2933     MemOp memop;
2934 
2935     /*
2936      * FIXME: We are out of spec here.  We have recorded only the address
2937      * from load_exclusive, not the entire range, and we assume that the
2938      * size of the access on both sides match.  The architecture allows the
2939      * store to be smaller than the load, so long as the stored bytes are
2940      * within the range recorded by the load.
2941      */
2942 
2943     /* See AArch64.ExclusiveMonitorsPass() and AArch64.IsExclusiveVA(). */
2944     clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2945     tcg_gen_brcond_i64(TCG_COND_NE, clean_addr, cpu_exclusive_addr, fail_label);
2946 
2947     /*
2948      * The write, and any associated faults, only happen if the virtual
2949      * and physical addresses pass the exclusive monitor check.  These
2950      * faults are exceedingly unlikely, because normally the guest uses
2951      * the exact same address register for the load_exclusive, and we
2952      * would have recognized these faults there.
2953      *
2954      * It is possible to trigger an alignment fault pre-LSE2, e.g. with an
2955      * unaligned 4-byte write within the range of an aligned 8-byte load.
2956      * With LSE2, the store would need to cross a 16-byte boundary when the
2957      * load did not, which would mean the store is outside the range
2958      * recorded for the monitor, which would have failed a corrected monitor
2959      * check above.  For now, we assume no size change and retain the
2960      * MO_ALIGN to let tcg know what we checked in the load_exclusive.
2961      *
2962      * It is possible to trigger an MTE fault, by performing the load with
2963      * a virtual address with a valid tag and performing the store with the
2964      * same virtual address and a different invalid tag.
2965      */
2966     memop = size + is_pair;
2967     if (memop == MO_128 || !dc_isar_feature(aa64_lse2, s)) {
2968         memop |= MO_ALIGN;
2969     }
2970     memop = finalize_memop(s, memop);
2971     gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
2972 
2973     tmp = tcg_temp_new_i64();
2974     if (is_pair) {
2975         if (size == 2) {
2976             if (s->be_data == MO_LE) {
2977                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
2978             } else {
2979                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
2980             }
2981             tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
2982                                        cpu_exclusive_val, tmp,
2983                                        get_mem_index(s), memop);
2984             tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2985         } else {
2986             TCGv_i128 t16 = tcg_temp_new_i128();
2987             TCGv_i128 c16 = tcg_temp_new_i128();
2988             TCGv_i64 a, b;
2989 
2990             if (s->be_data == MO_LE) {
2991                 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2));
2992                 tcg_gen_concat_i64_i128(c16, cpu_exclusive_val,
2993                                         cpu_exclusive_high);
2994             } else {
2995                 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt));
2996                 tcg_gen_concat_i64_i128(c16, cpu_exclusive_high,
2997                                         cpu_exclusive_val);
2998             }
2999 
3000             tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16,
3001                                         get_mem_index(s), memop);
3002 
3003             a = tcg_temp_new_i64();
3004             b = tcg_temp_new_i64();
3005             if (s->be_data == MO_LE) {
3006                 tcg_gen_extr_i128_i64(a, b, t16);
3007             } else {
3008                 tcg_gen_extr_i128_i64(b, a, t16);
3009             }
3010 
3011             tcg_gen_xor_i64(a, a, cpu_exclusive_val);
3012             tcg_gen_xor_i64(b, b, cpu_exclusive_high);
3013             tcg_gen_or_i64(tmp, a, b);
3014 
3015             tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0);
3016         }
3017     } else {
3018         tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
3019                                    cpu_reg(s, rt), get_mem_index(s), memop);
3020         tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
3021     }
3022     tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
3023     tcg_gen_br(done_label);
3024 
3025     gen_set_label(fail_label);
3026     tcg_gen_movi_i64(cpu_reg(s, rd), 1);
3027     gen_set_label(done_label);
3028     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
3029 }
3030 
3031 static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
3032                                  int rn, int size)
3033 {
3034     TCGv_i64 tcg_rs = cpu_reg(s, rs);
3035     TCGv_i64 tcg_rt = cpu_reg(s, rt);
3036     int memidx = get_mem_index(s);
3037     TCGv_i64 clean_addr;
3038     MemOp memop;
3039 
3040     if (rn == 31) {
3041         gen_check_sp_alignment(s);
3042     }
3043     memop = check_atomic_align(s, rn, size);
3044     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
3045     tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt,
3046                                memidx, memop);
3047 }
3048 
3049 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
3050                                       int rn, int size)
3051 {
3052     TCGv_i64 s1 = cpu_reg(s, rs);
3053     TCGv_i64 s2 = cpu_reg(s, rs + 1);
3054     TCGv_i64 t1 = cpu_reg(s, rt);
3055     TCGv_i64 t2 = cpu_reg(s, rt + 1);
3056     TCGv_i64 clean_addr;
3057     int memidx = get_mem_index(s);
3058     MemOp memop;
3059 
3060     if (rn == 31) {
3061         gen_check_sp_alignment(s);
3062     }
3063 
3064     /* This is a single atomic access, despite the "pair". */
3065     memop = check_atomic_align(s, rn, size + 1);
3066     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
3067 
3068     if (size == 2) {
3069         TCGv_i64 cmp = tcg_temp_new_i64();
3070         TCGv_i64 val = tcg_temp_new_i64();
3071 
3072         if (s->be_data == MO_LE) {
3073             tcg_gen_concat32_i64(val, t1, t2);
3074             tcg_gen_concat32_i64(cmp, s1, s2);
3075         } else {
3076             tcg_gen_concat32_i64(val, t2, t1);
3077             tcg_gen_concat32_i64(cmp, s2, s1);
3078         }
3079 
3080         tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx, memop);
3081 
3082         if (s->be_data == MO_LE) {
3083             tcg_gen_extr32_i64(s1, s2, cmp);
3084         } else {
3085             tcg_gen_extr32_i64(s2, s1, cmp);
3086         }
3087     } else {
3088         TCGv_i128 cmp = tcg_temp_new_i128();
3089         TCGv_i128 val = tcg_temp_new_i128();
3090 
3091         if (s->be_data == MO_LE) {
3092             tcg_gen_concat_i64_i128(val, t1, t2);
3093             tcg_gen_concat_i64_i128(cmp, s1, s2);
3094         } else {
3095             tcg_gen_concat_i64_i128(val, t2, t1);
3096             tcg_gen_concat_i64_i128(cmp, s2, s1);
3097         }
3098 
3099         tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx, memop);
3100 
3101         if (s->be_data == MO_LE) {
3102             tcg_gen_extr_i128_i64(s1, s2, cmp);
3103         } else {
3104             tcg_gen_extr_i128_i64(s2, s1, cmp);
3105         }
3106     }
3107 }
3108 
3109 /*
3110  * Compute the ISS.SF bit for syndrome information if an exception
3111  * is taken on a load or store. This indicates whether the instruction
3112  * is accessing a 32-bit or 64-bit register. This logic is derived
3113  * from the ARMv8 specs for LDR (Shared decode for all encodings).
3114  */
3115 static bool ldst_iss_sf(int size, bool sign, bool ext)
3116 {
3117 
3118     if (sign) {
3119         /*
3120          * Signed loads are 64 bit results if we are not going to
3121          * do a zero-extend from 32 to 64 after the load.
3122          * (For a store, sign and ext are always false.)
3123          */
3124         return !ext;
3125     } else {
3126         /* Unsigned loads/stores work at the specified size */
3127         return size == MO_64;
3128     }
3129 }
3130 
3131 static bool trans_STXR(DisasContext *s, arg_stxr *a)
3132 {
3133     if (a->rn == 31) {
3134         gen_check_sp_alignment(s);
3135     }
3136     if (a->lasr) {
3137         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3138     }
3139     gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, false);
3140     return true;
3141 }
3142 
3143 static bool trans_LDXR(DisasContext *s, arg_stxr *a)
3144 {
3145     if (a->rn == 31) {
3146         gen_check_sp_alignment(s);
3147     }
3148     gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, false);
3149     if (a->lasr) {
3150         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3151     }
3152     return true;
3153 }
3154 
3155 static bool trans_STLR(DisasContext *s, arg_stlr *a)
3156 {
3157     TCGv_i64 clean_addr;
3158     MemOp memop;
3159     bool iss_sf = ldst_iss_sf(a->sz, false, false);
3160 
3161     /*
3162      * StoreLORelease is the same as Store-Release for QEMU, but
3163      * needs the feature-test.
3164      */
3165     if (!a->lasr && !dc_isar_feature(aa64_lor, s)) {
3166         return false;
3167     }
3168     /* Generate ISS for non-exclusive accesses including LASR.  */
3169     if (a->rn == 31) {
3170         gen_check_sp_alignment(s);
3171     }
3172     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3173     memop = check_ordered_align(s, a->rn, 0, true, a->sz);
3174     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn),
3175                                 true, a->rn != 31, memop);
3176     do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, memop, true, a->rt,
3177               iss_sf, a->lasr);
3178     return true;
3179 }
3180 
3181 static bool trans_LDAR(DisasContext *s, arg_stlr *a)
3182 {
3183     TCGv_i64 clean_addr;
3184     MemOp memop;
3185     bool iss_sf = ldst_iss_sf(a->sz, false, false);
3186 
3187     /* LoadLOAcquire is the same as Load-Acquire for QEMU.  */
3188     if (!a->lasr && !dc_isar_feature(aa64_lor, s)) {
3189         return false;
3190     }
3191     /* Generate ISS for non-exclusive accesses including LASR.  */
3192     if (a->rn == 31) {
3193         gen_check_sp_alignment(s);
3194     }
3195     memop = check_ordered_align(s, a->rn, 0, false, a->sz);
3196     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn),
3197                                 false, a->rn != 31, memop);
3198     do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, memop, false, true,
3199               a->rt, iss_sf, a->lasr);
3200     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3201     return true;
3202 }
3203 
3204 static bool trans_STXP(DisasContext *s, arg_stxr *a)
3205 {
3206     if (a->rn == 31) {
3207         gen_check_sp_alignment(s);
3208     }
3209     if (a->lasr) {
3210         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3211     }
3212     gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, true);
3213     return true;
3214 }
3215 
3216 static bool trans_LDXP(DisasContext *s, arg_stxr *a)
3217 {
3218     if (a->rn == 31) {
3219         gen_check_sp_alignment(s);
3220     }
3221     gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, true);
3222     if (a->lasr) {
3223         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3224     }
3225     return true;
3226 }
3227 
3228 static bool trans_CASP(DisasContext *s, arg_CASP *a)
3229 {
3230     if (!dc_isar_feature(aa64_atomics, s)) {
3231         return false;
3232     }
3233     if (((a->rt | a->rs) & 1) != 0) {
3234         return false;
3235     }
3236 
3237     gen_compare_and_swap_pair(s, a->rs, a->rt, a->rn, a->sz);
3238     return true;
3239 }
3240 
3241 static bool trans_CAS(DisasContext *s, arg_CAS *a)
3242 {
3243     if (!dc_isar_feature(aa64_atomics, s)) {
3244         return false;
3245     }
3246     gen_compare_and_swap(s, a->rs, a->rt, a->rn, a->sz);
3247     return true;
3248 }
3249 
3250 static bool trans_LD_lit(DisasContext *s, arg_ldlit *a)
3251 {
3252     bool iss_sf = ldst_iss_sf(a->sz, a->sign, false);
3253     TCGv_i64 tcg_rt = cpu_reg(s, a->rt);
3254     TCGv_i64 clean_addr = tcg_temp_new_i64();
3255     MemOp memop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3256 
3257     gen_pc_plus_diff(s, clean_addr, a->imm);
3258     do_gpr_ld(s, tcg_rt, clean_addr, memop,
3259               false, true, a->rt, iss_sf, false);
3260     return true;
3261 }
3262 
3263 static bool trans_LD_lit_v(DisasContext *s, arg_ldlit *a)
3264 {
3265     /* Load register (literal), vector version */
3266     TCGv_i64 clean_addr;
3267     MemOp memop;
3268 
3269     if (!fp_access_check(s)) {
3270         return true;
3271     }
3272     memop = finalize_memop_asimd(s, a->sz);
3273     clean_addr = tcg_temp_new_i64();
3274     gen_pc_plus_diff(s, clean_addr, a->imm);
3275     do_fp_ld(s, a->rt, clean_addr, memop);
3276     return true;
3277 }
3278 
3279 static void op_addr_ldstpair_pre(DisasContext *s, arg_ldstpair *a,
3280                                  TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3281                                  uint64_t offset, bool is_store, MemOp mop)
3282 {
3283     if (a->rn == 31) {
3284         gen_check_sp_alignment(s);
3285     }
3286 
3287     *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3288     if (!a->p) {
3289         tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset);
3290     }
3291 
3292     *clean_addr = gen_mte_checkN(s, *dirty_addr, is_store,
3293                                  (a->w || a->rn != 31), 2 << a->sz, mop);
3294 }
3295 
3296 static void op_addr_ldstpair_post(DisasContext *s, arg_ldstpair *a,
3297                                   TCGv_i64 dirty_addr, uint64_t offset)
3298 {
3299     if (a->w) {
3300         if (a->p) {
3301             tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3302         }
3303         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3304     }
3305 }
3306 
3307 static bool trans_STP(DisasContext *s, arg_ldstpair *a)
3308 {
3309     uint64_t offset = a->imm << a->sz;
3310     TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3311     MemOp mop = finalize_memop(s, a->sz);
3312 
3313     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop);
3314     tcg_rt = cpu_reg(s, a->rt);
3315     tcg_rt2 = cpu_reg(s, a->rt2);
3316     /*
3317      * We built mop above for the single logical access -- rebuild it
3318      * now for the paired operation.
3319      *
3320      * With LSE2, non-sign-extending pairs are treated atomically if
3321      * aligned, and if unaligned one of the pair will be completely
3322      * within a 16-byte block and that element will be atomic.
3323      * Otherwise each element is separately atomic.
3324      * In all cases, issue one operation with the correct atomicity.
3325      */
3326     mop = a->sz + 1;
3327     if (s->align_mem) {
3328         mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
3329     }
3330     mop = finalize_memop_pair(s, mop);
3331     if (a->sz == 2) {
3332         TCGv_i64 tmp = tcg_temp_new_i64();
3333 
3334         if (s->be_data == MO_LE) {
3335             tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2);
3336         } else {
3337             tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt);
3338         }
3339         tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop);
3340     } else {
3341         TCGv_i128 tmp = tcg_temp_new_i128();
3342 
3343         if (s->be_data == MO_LE) {
3344             tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2);
3345         } else {
3346             tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt);
3347         }
3348         tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
3349     }
3350     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3351     return true;
3352 }
3353 
3354 static bool trans_LDP(DisasContext *s, arg_ldstpair *a)
3355 {
3356     uint64_t offset = a->imm << a->sz;
3357     TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3358     MemOp mop = finalize_memop(s, a->sz);
3359 
3360     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop);
3361     tcg_rt = cpu_reg(s, a->rt);
3362     tcg_rt2 = cpu_reg(s, a->rt2);
3363 
3364     /*
3365      * We built mop above for the single logical access -- rebuild it
3366      * now for the paired operation.
3367      *
3368      * With LSE2, non-sign-extending pairs are treated atomically if
3369      * aligned, and if unaligned one of the pair will be completely
3370      * within a 16-byte block and that element will be atomic.
3371      * Otherwise each element is separately atomic.
3372      * In all cases, issue one operation with the correct atomicity.
3373      *
3374      * This treats sign-extending loads like zero-extending loads,
3375      * since that reuses the most code below.
3376      */
3377     mop = a->sz + 1;
3378     if (s->align_mem) {
3379         mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
3380     }
3381     mop = finalize_memop_pair(s, mop);
3382     if (a->sz == 2) {
3383         int o2 = s->be_data == MO_LE ? 32 : 0;
3384         int o1 = o2 ^ 32;
3385 
3386         tcg_gen_qemu_ld_i64(tcg_rt, clean_addr, get_mem_index(s), mop);
3387         if (a->sign) {
3388             tcg_gen_sextract_i64(tcg_rt2, tcg_rt, o2, 32);
3389             tcg_gen_sextract_i64(tcg_rt, tcg_rt, o1, 32);
3390         } else {
3391             tcg_gen_extract_i64(tcg_rt2, tcg_rt, o2, 32);
3392             tcg_gen_extract_i64(tcg_rt, tcg_rt, o1, 32);
3393         }
3394     } else {
3395         TCGv_i128 tmp = tcg_temp_new_i128();
3396 
3397         tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop);
3398         if (s->be_data == MO_LE) {
3399             tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp);
3400         } else {
3401             tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp);
3402         }
3403     }
3404     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3405     return true;
3406 }
3407 
3408 static bool trans_STP_v(DisasContext *s, arg_ldstpair *a)
3409 {
3410     uint64_t offset = a->imm << a->sz;
3411     TCGv_i64 clean_addr, dirty_addr;
3412     MemOp mop;
3413 
3414     if (!fp_access_check(s)) {
3415         return true;
3416     }
3417 
3418     /* LSE2 does not merge FP pairs; leave these as separate operations. */
3419     mop = finalize_memop_asimd(s, a->sz);
3420     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop);
3421     do_fp_st(s, a->rt, clean_addr, mop);
3422     tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz);
3423     do_fp_st(s, a->rt2, clean_addr, mop);
3424     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3425     return true;
3426 }
3427 
3428 static bool trans_LDP_v(DisasContext *s, arg_ldstpair *a)
3429 {
3430     uint64_t offset = a->imm << a->sz;
3431     TCGv_i64 clean_addr, dirty_addr;
3432     MemOp mop;
3433 
3434     if (!fp_access_check(s)) {
3435         return true;
3436     }
3437 
3438     /* LSE2 does not merge FP pairs; leave these as separate operations. */
3439     mop = finalize_memop_asimd(s, a->sz);
3440     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop);
3441     do_fp_ld(s, a->rt, clean_addr, mop);
3442     tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz);
3443     do_fp_ld(s, a->rt2, clean_addr, mop);
3444     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3445     return true;
3446 }
3447 
3448 static bool trans_STGP(DisasContext *s, arg_ldstpair *a)
3449 {
3450     TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3451     uint64_t offset = a->imm << LOG2_TAG_GRANULE;
3452     MemOp mop;
3453     TCGv_i128 tmp;
3454 
3455     /* STGP only comes in one size. */
3456     tcg_debug_assert(a->sz == MO_64);
3457 
3458     if (!dc_isar_feature(aa64_mte_insn_reg, s)) {
3459         return false;
3460     }
3461 
3462     if (a->rn == 31) {
3463         gen_check_sp_alignment(s);
3464     }
3465 
3466     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3467     if (!a->p) {
3468         tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3469     }
3470 
3471     clean_addr = clean_data_tbi(s, dirty_addr);
3472     tcg_rt = cpu_reg(s, a->rt);
3473     tcg_rt2 = cpu_reg(s, a->rt2);
3474 
3475     /*
3476      * STGP is defined as two 8-byte memory operations, aligned to TAG_GRANULE,
3477      * and one tag operation.  We implement it as one single aligned 16-byte
3478      * memory operation for convenience.  Note that the alignment ensures
3479      * MO_ATOM_IFALIGN_PAIR produces 8-byte atomicity for the memory store.
3480      */
3481     mop = finalize_memop_atom(s, MO_128 | MO_ALIGN, MO_ATOM_IFALIGN_PAIR);
3482 
3483     tmp = tcg_temp_new_i128();
3484     if (s->be_data == MO_LE) {
3485         tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2);
3486     } else {
3487         tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt);
3488     }
3489     tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
3490 
3491     /* Perform the tag store, if tag access enabled. */
3492     if (s->ata[0]) {
3493         if (tb_cflags(s->base.tb) & CF_PARALLEL) {
3494             gen_helper_stg_parallel(tcg_env, dirty_addr, dirty_addr);
3495         } else {
3496             gen_helper_stg(tcg_env, dirty_addr, dirty_addr);
3497         }
3498     }
3499 
3500     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3501     return true;
3502 }
3503 
3504 static void op_addr_ldst_imm_pre(DisasContext *s, arg_ldst_imm *a,
3505                                  TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3506                                  uint64_t offset, bool is_store, MemOp mop)
3507 {
3508     int memidx;
3509 
3510     if (a->rn == 31) {
3511         gen_check_sp_alignment(s);
3512     }
3513 
3514     *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3515     if (!a->p) {
3516         tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset);
3517     }
3518     memidx = get_a64_user_mem_index(s, a->unpriv);
3519     *clean_addr = gen_mte_check1_mmuidx(s, *dirty_addr, is_store,
3520                                         a->w || a->rn != 31,
3521                                         mop, a->unpriv, memidx);
3522 }
3523 
3524 static void op_addr_ldst_imm_post(DisasContext *s, arg_ldst_imm *a,
3525                                   TCGv_i64 dirty_addr, uint64_t offset)
3526 {
3527     if (a->w) {
3528         if (a->p) {
3529             tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3530         }
3531         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3532     }
3533 }
3534 
3535 static bool trans_STR_i(DisasContext *s, arg_ldst_imm *a)
3536 {
3537     bool iss_sf, iss_valid = !a->w;
3538     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3539     int memidx = get_a64_user_mem_index(s, a->unpriv);
3540     MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3541 
3542     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop);
3543 
3544     tcg_rt = cpu_reg(s, a->rt);
3545     iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3546 
3547     do_gpr_st_memidx(s, tcg_rt, clean_addr, mop, memidx,
3548                      iss_valid, a->rt, iss_sf, false);
3549     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3550     return true;
3551 }
3552 
3553 static bool trans_LDR_i(DisasContext *s, arg_ldst_imm *a)
3554 {
3555     bool iss_sf, iss_valid = !a->w;
3556     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3557     int memidx = get_a64_user_mem_index(s, a->unpriv);
3558     MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3559 
3560     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop);
3561 
3562     tcg_rt = cpu_reg(s, a->rt);
3563     iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3564 
3565     do_gpr_ld_memidx(s, tcg_rt, clean_addr, mop,
3566                      a->ext, memidx, iss_valid, a->rt, iss_sf, false);
3567     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3568     return true;
3569 }
3570 
3571 static bool trans_STR_v_i(DisasContext *s, arg_ldst_imm *a)
3572 {
3573     TCGv_i64 clean_addr, dirty_addr;
3574     MemOp mop;
3575 
3576     if (!fp_access_check(s)) {
3577         return true;
3578     }
3579     mop = finalize_memop_asimd(s, a->sz);
3580     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop);
3581     do_fp_st(s, a->rt, clean_addr, mop);
3582     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3583     return true;
3584 }
3585 
3586 static bool trans_LDR_v_i(DisasContext *s, arg_ldst_imm *a)
3587 {
3588     TCGv_i64 clean_addr, dirty_addr;
3589     MemOp mop;
3590 
3591     if (!fp_access_check(s)) {
3592         return true;
3593     }
3594     mop = finalize_memop_asimd(s, a->sz);
3595     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop);
3596     do_fp_ld(s, a->rt, clean_addr, mop);
3597     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3598     return true;
3599 }
3600 
3601 static void op_addr_ldst_pre(DisasContext *s, arg_ldst *a,
3602                              TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3603                              bool is_store, MemOp memop)
3604 {
3605     TCGv_i64 tcg_rm;
3606 
3607     if (a->rn == 31) {
3608         gen_check_sp_alignment(s);
3609     }
3610     *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3611 
3612     tcg_rm = read_cpu_reg(s, a->rm, 1);
3613     ext_and_shift_reg(tcg_rm, tcg_rm, a->opt, a->s ? a->sz : 0);
3614 
3615     tcg_gen_add_i64(*dirty_addr, *dirty_addr, tcg_rm);
3616     *clean_addr = gen_mte_check1(s, *dirty_addr, is_store, true, memop);
3617 }
3618 
3619 static bool trans_LDR(DisasContext *s, arg_ldst *a)
3620 {
3621     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3622     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3623     MemOp memop;
3624 
3625     if (extract32(a->opt, 1, 1) == 0) {
3626         return false;
3627     }
3628 
3629     memop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3630     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop);
3631     tcg_rt = cpu_reg(s, a->rt);
3632     do_gpr_ld(s, tcg_rt, clean_addr, memop,
3633               a->ext, true, a->rt, iss_sf, false);
3634     return true;
3635 }
3636 
3637 static bool trans_STR(DisasContext *s, arg_ldst *a)
3638 {
3639     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3640     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3641     MemOp memop;
3642 
3643     if (extract32(a->opt, 1, 1) == 0) {
3644         return false;
3645     }
3646 
3647     memop = finalize_memop(s, a->sz);
3648     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop);
3649     tcg_rt = cpu_reg(s, a->rt);
3650     do_gpr_st(s, tcg_rt, clean_addr, memop, true, a->rt, iss_sf, false);
3651     return true;
3652 }
3653 
3654 static bool trans_LDR_v(DisasContext *s, arg_ldst *a)
3655 {
3656     TCGv_i64 clean_addr, dirty_addr;
3657     MemOp memop;
3658 
3659     if (extract32(a->opt, 1, 1) == 0) {
3660         return false;
3661     }
3662 
3663     if (!fp_access_check(s)) {
3664         return true;
3665     }
3666 
3667     memop = finalize_memop_asimd(s, a->sz);
3668     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop);
3669     do_fp_ld(s, a->rt, clean_addr, memop);
3670     return true;
3671 }
3672 
3673 static bool trans_STR_v(DisasContext *s, arg_ldst *a)
3674 {
3675     TCGv_i64 clean_addr, dirty_addr;
3676     MemOp memop;
3677 
3678     if (extract32(a->opt, 1, 1) == 0) {
3679         return false;
3680     }
3681 
3682     if (!fp_access_check(s)) {
3683         return true;
3684     }
3685 
3686     memop = finalize_memop_asimd(s, a->sz);
3687     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop);
3688     do_fp_st(s, a->rt, clean_addr, memop);
3689     return true;
3690 }
3691 
3692 
3693 static bool do_atomic_ld(DisasContext *s, arg_atomic *a, AtomicThreeOpFn *fn,
3694                          int sign, bool invert)
3695 {
3696     MemOp mop = a->sz | sign;
3697     TCGv_i64 clean_addr, tcg_rs, tcg_rt;
3698 
3699     if (a->rn == 31) {
3700         gen_check_sp_alignment(s);
3701     }
3702     mop = check_atomic_align(s, a->rn, mop);
3703     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
3704                                 a->rn != 31, mop);
3705     tcg_rs = read_cpu_reg(s, a->rs, true);
3706     tcg_rt = cpu_reg(s, a->rt);
3707     if (invert) {
3708         tcg_gen_not_i64(tcg_rs, tcg_rs);
3709     }
3710     /*
3711      * The tcg atomic primitives are all full barriers.  Therefore we
3712      * can ignore the Acquire and Release bits of this instruction.
3713      */
3714     fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop);
3715 
3716     if (mop & MO_SIGN) {
3717         switch (a->sz) {
3718         case MO_8:
3719             tcg_gen_ext8u_i64(tcg_rt, tcg_rt);
3720             break;
3721         case MO_16:
3722             tcg_gen_ext16u_i64(tcg_rt, tcg_rt);
3723             break;
3724         case MO_32:
3725             tcg_gen_ext32u_i64(tcg_rt, tcg_rt);
3726             break;
3727         case MO_64:
3728             break;
3729         default:
3730             g_assert_not_reached();
3731         }
3732     }
3733     return true;
3734 }
3735 
3736 TRANS_FEAT(LDADD, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_add_i64, 0, false)
3737 TRANS_FEAT(LDCLR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_and_i64, 0, true)
3738 TRANS_FEAT(LDEOR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_xor_i64, 0, false)
3739 TRANS_FEAT(LDSET, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_or_i64, 0, false)
3740 TRANS_FEAT(LDSMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smax_i64, MO_SIGN, false)
3741 TRANS_FEAT(LDSMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smin_i64, MO_SIGN, false)
3742 TRANS_FEAT(LDUMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umax_i64, 0, false)
3743 TRANS_FEAT(LDUMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false)
3744 TRANS_FEAT(SWP, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false)
3745 
3746 static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a)
3747 {
3748     bool iss_sf = ldst_iss_sf(a->sz, false, false);
3749     TCGv_i64 clean_addr;
3750     MemOp mop;
3751 
3752     if (!dc_isar_feature(aa64_atomics, s) ||
3753         !dc_isar_feature(aa64_rcpc_8_3, s)) {
3754         return false;
3755     }
3756     if (a->rn == 31) {
3757         gen_check_sp_alignment(s);
3758     }
3759     mop = check_ordered_align(s, a->rn, 0, false, a->sz);
3760     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
3761                                 a->rn != 31, mop);
3762     /*
3763      * LDAPR* are a special case because they are a simple load, not a
3764      * fetch-and-do-something op.
3765      * The architectural consistency requirements here are weaker than
3766      * full load-acquire (we only need "load-acquire processor consistent"),
3767      * but we choose to implement them as full LDAQ.
3768      */
3769     do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, false,
3770               true, a->rt, iss_sf, true);
3771     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3772     return true;
3773 }
3774 
3775 static bool trans_LDRA(DisasContext *s, arg_LDRA *a)
3776 {
3777     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3778     MemOp memop;
3779 
3780     /* Load with pointer authentication */
3781     if (!dc_isar_feature(aa64_pauth, s)) {
3782         return false;
3783     }
3784 
3785     if (a->rn == 31) {
3786         gen_check_sp_alignment(s);
3787     }
3788     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3789 
3790     if (s->pauth_active) {
3791         if (!a->m) {
3792             gen_helper_autda_combined(dirty_addr, tcg_env, dirty_addr,
3793                                       tcg_constant_i64(0));
3794         } else {
3795             gen_helper_autdb_combined(dirty_addr, tcg_env, dirty_addr,
3796                                       tcg_constant_i64(0));
3797         }
3798     }
3799 
3800     tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3801 
3802     memop = finalize_memop(s, MO_64);
3803 
3804     /* Note that "clean" and "dirty" here refer to TBI not PAC.  */
3805     clean_addr = gen_mte_check1(s, dirty_addr, false,
3806                                 a->w || a->rn != 31, memop);
3807 
3808     tcg_rt = cpu_reg(s, a->rt);
3809     do_gpr_ld(s, tcg_rt, clean_addr, memop,
3810               /* extend */ false, /* iss_valid */ !a->w,
3811               /* iss_srt */ a->rt, /* iss_sf */ true, /* iss_ar */ false);
3812 
3813     if (a->w) {
3814         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3815     }
3816     return true;
3817 }
3818 
3819 static bool trans_LDAPR_i(DisasContext *s, arg_ldapr_stlr_i *a)
3820 {
3821     TCGv_i64 clean_addr, dirty_addr;
3822     MemOp mop = a->sz | (a->sign ? MO_SIGN : 0);
3823     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3824 
3825     if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3826         return false;
3827     }
3828 
3829     if (a->rn == 31) {
3830         gen_check_sp_alignment(s);
3831     }
3832 
3833     mop = check_ordered_align(s, a->rn, a->imm, false, mop);
3834     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3835     tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3836     clean_addr = clean_data_tbi(s, dirty_addr);
3837 
3838     /*
3839      * Load-AcquirePC semantics; we implement as the slightly more
3840      * restrictive Load-Acquire.
3841      */
3842     do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, a->ext, true,
3843               a->rt, iss_sf, true);
3844     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3845     return true;
3846 }
3847 
3848 static bool trans_STLR_i(DisasContext *s, arg_ldapr_stlr_i *a)
3849 {
3850     TCGv_i64 clean_addr, dirty_addr;
3851     MemOp mop = a->sz;
3852     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3853 
3854     if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3855         return false;
3856     }
3857 
3858     /* TODO: ARMv8.4-LSE SCTLR.nAA */
3859 
3860     if (a->rn == 31) {
3861         gen_check_sp_alignment(s);
3862     }
3863 
3864     mop = check_ordered_align(s, a->rn, a->imm, true, mop);
3865     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3866     tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3867     clean_addr = clean_data_tbi(s, dirty_addr);
3868 
3869     /* Store-Release semantics */
3870     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3871     do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, mop, true, a->rt, iss_sf, true);
3872     return true;
3873 }
3874 
3875 static bool trans_LD_mult(DisasContext *s, arg_ldst_mult *a)
3876 {
3877     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3878     MemOp endian, align, mop;
3879 
3880     int total;    /* total bytes */
3881     int elements; /* elements per vector */
3882     int r;
3883     int size = a->sz;
3884 
3885     if (!a->p && a->rm != 0) {
3886         /* For non-postindexed accesses the Rm field must be 0 */
3887         return false;
3888     }
3889     if (size == 3 && !a->q && a->selem != 1) {
3890         return false;
3891     }
3892     if (!fp_access_check(s)) {
3893         return true;
3894     }
3895 
3896     if (a->rn == 31) {
3897         gen_check_sp_alignment(s);
3898     }
3899 
3900     /* For our purposes, bytes are always little-endian.  */
3901     endian = s->be_data;
3902     if (size == 0) {
3903         endian = MO_LE;
3904     }
3905 
3906     total = a->rpt * a->selem * (a->q ? 16 : 8);
3907     tcg_rn = cpu_reg_sp(s, a->rn);
3908 
3909     /*
3910      * Issue the MTE check vs the logical repeat count, before we
3911      * promote consecutive little-endian elements below.
3912      */
3913     clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, total,
3914                                 finalize_memop_asimd(s, size));
3915 
3916     /*
3917      * Consecutive little-endian elements from a single register
3918      * can be promoted to a larger little-endian operation.
3919      */
3920     align = MO_ALIGN;
3921     if (a->selem == 1 && endian == MO_LE) {
3922         align = pow2_align(size);
3923         size = 3;
3924     }
3925     if (!s->align_mem) {
3926         align = 0;
3927     }
3928     mop = endian | size | align;
3929 
3930     elements = (a->q ? 16 : 8) >> size;
3931     tcg_ebytes = tcg_constant_i64(1 << size);
3932     for (r = 0; r < a->rpt; r++) {
3933         int e;
3934         for (e = 0; e < elements; e++) {
3935             int xs;
3936             for (xs = 0; xs < a->selem; xs++) {
3937                 int tt = (a->rt + r + xs) % 32;
3938                 do_vec_ld(s, tt, e, clean_addr, mop);
3939                 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3940             }
3941         }
3942     }
3943 
3944     /*
3945      * For non-quad operations, setting a slice of the low 64 bits of
3946      * the register clears the high 64 bits (in the ARM ARM pseudocode
3947      * this is implicit in the fact that 'rval' is a 64 bit wide
3948      * variable).  For quad operations, we might still need to zero
3949      * the high bits of SVE.
3950      */
3951     for (r = 0; r < a->rpt * a->selem; r++) {
3952         int tt = (a->rt + r) % 32;
3953         clear_vec_high(s, a->q, tt);
3954     }
3955 
3956     if (a->p) {
3957         if (a->rm == 31) {
3958             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3959         } else {
3960             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3961         }
3962     }
3963     return true;
3964 }
3965 
3966 static bool trans_ST_mult(DisasContext *s, arg_ldst_mult *a)
3967 {
3968     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3969     MemOp endian, align, mop;
3970 
3971     int total;    /* total bytes */
3972     int elements; /* elements per vector */
3973     int r;
3974     int size = a->sz;
3975 
3976     if (!a->p && a->rm != 0) {
3977         /* For non-postindexed accesses the Rm field must be 0 */
3978         return false;
3979     }
3980     if (size == 3 && !a->q && a->selem != 1) {
3981         return false;
3982     }
3983     if (!fp_access_check(s)) {
3984         return true;
3985     }
3986 
3987     if (a->rn == 31) {
3988         gen_check_sp_alignment(s);
3989     }
3990 
3991     /* For our purposes, bytes are always little-endian.  */
3992     endian = s->be_data;
3993     if (size == 0) {
3994         endian = MO_LE;
3995     }
3996 
3997     total = a->rpt * a->selem * (a->q ? 16 : 8);
3998     tcg_rn = cpu_reg_sp(s, a->rn);
3999 
4000     /*
4001      * Issue the MTE check vs the logical repeat count, before we
4002      * promote consecutive little-endian elements below.
4003      */
4004     clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, total,
4005                                 finalize_memop_asimd(s, size));
4006 
4007     /*
4008      * Consecutive little-endian elements from a single register
4009      * can be promoted to a larger little-endian operation.
4010      */
4011     align = MO_ALIGN;
4012     if (a->selem == 1 && endian == MO_LE) {
4013         align = pow2_align(size);
4014         size = 3;
4015     }
4016     if (!s->align_mem) {
4017         align = 0;
4018     }
4019     mop = endian | size | align;
4020 
4021     elements = (a->q ? 16 : 8) >> size;
4022     tcg_ebytes = tcg_constant_i64(1 << size);
4023     for (r = 0; r < a->rpt; r++) {
4024         int e;
4025         for (e = 0; e < elements; e++) {
4026             int xs;
4027             for (xs = 0; xs < a->selem; xs++) {
4028                 int tt = (a->rt + r + xs) % 32;
4029                 do_vec_st(s, tt, e, clean_addr, mop);
4030                 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
4031             }
4032         }
4033     }
4034 
4035     if (a->p) {
4036         if (a->rm == 31) {
4037             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
4038         } else {
4039             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
4040         }
4041     }
4042     return true;
4043 }
4044 
4045 static bool trans_ST_single(DisasContext *s, arg_ldst_single *a)
4046 {
4047     int xs, total, rt;
4048     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
4049     MemOp mop;
4050 
4051     if (!a->p && a->rm != 0) {
4052         return false;
4053     }
4054     if (!fp_access_check(s)) {
4055         return true;
4056     }
4057 
4058     if (a->rn == 31) {
4059         gen_check_sp_alignment(s);
4060     }
4061 
4062     total = a->selem << a->scale;
4063     tcg_rn = cpu_reg_sp(s, a->rn);
4064 
4065     mop = finalize_memop_asimd(s, a->scale);
4066     clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31,
4067                                 total, mop);
4068 
4069     tcg_ebytes = tcg_constant_i64(1 << a->scale);
4070     for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
4071         do_vec_st(s, rt, a->index, clean_addr, mop);
4072         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
4073     }
4074 
4075     if (a->p) {
4076         if (a->rm == 31) {
4077             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
4078         } else {
4079             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
4080         }
4081     }
4082     return true;
4083 }
4084 
4085 static bool trans_LD_single(DisasContext *s, arg_ldst_single *a)
4086 {
4087     int xs, total, rt;
4088     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
4089     MemOp mop;
4090 
4091     if (!a->p && a->rm != 0) {
4092         return false;
4093     }
4094     if (!fp_access_check(s)) {
4095         return true;
4096     }
4097 
4098     if (a->rn == 31) {
4099         gen_check_sp_alignment(s);
4100     }
4101 
4102     total = a->selem << a->scale;
4103     tcg_rn = cpu_reg_sp(s, a->rn);
4104 
4105     mop = finalize_memop_asimd(s, a->scale);
4106     clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31,
4107                                 total, mop);
4108 
4109     tcg_ebytes = tcg_constant_i64(1 << a->scale);
4110     for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
4111         do_vec_ld(s, rt, a->index, clean_addr, mop);
4112         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
4113     }
4114 
4115     if (a->p) {
4116         if (a->rm == 31) {
4117             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
4118         } else {
4119             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
4120         }
4121     }
4122     return true;
4123 }
4124 
4125 static bool trans_LD_single_repl(DisasContext *s, arg_LD_single_repl *a)
4126 {
4127     int xs, total, rt;
4128     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
4129     MemOp mop;
4130 
4131     if (!a->p && a->rm != 0) {
4132         return false;
4133     }
4134     if (!fp_access_check(s)) {
4135         return true;
4136     }
4137 
4138     if (a->rn == 31) {
4139         gen_check_sp_alignment(s);
4140     }
4141 
4142     total = a->selem << a->scale;
4143     tcg_rn = cpu_reg_sp(s, a->rn);
4144 
4145     mop = finalize_memop_asimd(s, a->scale);
4146     clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31,
4147                                 total, mop);
4148 
4149     tcg_ebytes = tcg_constant_i64(1 << a->scale);
4150     for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
4151         /* Load and replicate to all elements */
4152         TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4153 
4154         tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop);
4155         tcg_gen_gvec_dup_i64(a->scale, vec_full_reg_offset(s, rt),
4156                              (a->q + 1) * 8, vec_full_reg_size(s), tcg_tmp);
4157         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
4158     }
4159 
4160     if (a->p) {
4161         if (a->rm == 31) {
4162             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
4163         } else {
4164             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
4165         }
4166     }
4167     return true;
4168 }
4169 
4170 static bool trans_STZGM(DisasContext *s, arg_ldst_tag *a)
4171 {
4172     TCGv_i64 addr, clean_addr, tcg_rt;
4173     int size = 4 << s->dcz_blocksize;
4174 
4175     if (!dc_isar_feature(aa64_mte, s)) {
4176         return false;
4177     }
4178     if (s->current_el == 0) {
4179         return false;
4180     }
4181 
4182     if (a->rn == 31) {
4183         gen_check_sp_alignment(s);
4184     }
4185 
4186     addr = read_cpu_reg_sp(s, a->rn, true);
4187     tcg_gen_addi_i64(addr, addr, a->imm);
4188     tcg_rt = cpu_reg(s, a->rt);
4189 
4190     if (s->ata[0]) {
4191         gen_helper_stzgm_tags(tcg_env, addr, tcg_rt);
4192     }
4193     /*
4194      * The non-tags portion of STZGM is mostly like DC_ZVA,
4195      * except the alignment happens before the access.
4196      */
4197     clean_addr = clean_data_tbi(s, addr);
4198     tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4199     gen_helper_dc_zva(tcg_env, clean_addr);
4200     return true;
4201 }
4202 
4203 static bool trans_STGM(DisasContext *s, arg_ldst_tag *a)
4204 {
4205     TCGv_i64 addr, clean_addr, tcg_rt;
4206 
4207     if (!dc_isar_feature(aa64_mte, s)) {
4208         return false;
4209     }
4210     if (s->current_el == 0) {
4211         return false;
4212     }
4213 
4214     if (a->rn == 31) {
4215         gen_check_sp_alignment(s);
4216     }
4217 
4218     addr = read_cpu_reg_sp(s, a->rn, true);
4219     tcg_gen_addi_i64(addr, addr, a->imm);
4220     tcg_rt = cpu_reg(s, a->rt);
4221 
4222     if (s->ata[0]) {
4223         gen_helper_stgm(tcg_env, addr, tcg_rt);
4224     } else {
4225         MMUAccessType acc = MMU_DATA_STORE;
4226         int size = 4 << s->gm_blocksize;
4227 
4228         clean_addr = clean_data_tbi(s, addr);
4229         tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4230         gen_probe_access(s, clean_addr, acc, size);
4231     }
4232     return true;
4233 }
4234 
4235 static bool trans_LDGM(DisasContext *s, arg_ldst_tag *a)
4236 {
4237     TCGv_i64 addr, clean_addr, tcg_rt;
4238 
4239     if (!dc_isar_feature(aa64_mte, s)) {
4240         return false;
4241     }
4242     if (s->current_el == 0) {
4243         return false;
4244     }
4245 
4246     if (a->rn == 31) {
4247         gen_check_sp_alignment(s);
4248     }
4249 
4250     addr = read_cpu_reg_sp(s, a->rn, true);
4251     tcg_gen_addi_i64(addr, addr, a->imm);
4252     tcg_rt = cpu_reg(s, a->rt);
4253 
4254     if (s->ata[0]) {
4255         gen_helper_ldgm(tcg_rt, tcg_env, addr);
4256     } else {
4257         MMUAccessType acc = MMU_DATA_LOAD;
4258         int size = 4 << s->gm_blocksize;
4259 
4260         clean_addr = clean_data_tbi(s, addr);
4261         tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4262         gen_probe_access(s, clean_addr, acc, size);
4263         /* The result tags are zeros.  */
4264         tcg_gen_movi_i64(tcg_rt, 0);
4265     }
4266     return true;
4267 }
4268 
4269 static bool trans_LDG(DisasContext *s, arg_ldst_tag *a)
4270 {
4271     TCGv_i64 addr, clean_addr, tcg_rt;
4272 
4273     if (!dc_isar_feature(aa64_mte_insn_reg, s)) {
4274         return false;
4275     }
4276 
4277     if (a->rn == 31) {
4278         gen_check_sp_alignment(s);
4279     }
4280 
4281     addr = read_cpu_reg_sp(s, a->rn, true);
4282     if (!a->p) {
4283         /* pre-index or signed offset */
4284         tcg_gen_addi_i64(addr, addr, a->imm);
4285     }
4286 
4287     tcg_gen_andi_i64(addr, addr, -TAG_GRANULE);
4288     tcg_rt = cpu_reg(s, a->rt);
4289     if (s->ata[0]) {
4290         gen_helper_ldg(tcg_rt, tcg_env, addr, tcg_rt);
4291     } else {
4292         /*
4293          * Tag access disabled: we must check for aborts on the load
4294          * load from [rn+offset], and then insert a 0 tag into rt.
4295          */
4296         clean_addr = clean_data_tbi(s, addr);
4297         gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8);
4298         gen_address_with_allocation_tag0(tcg_rt, tcg_rt);
4299     }
4300 
4301     if (a->w) {
4302         /* pre-index or post-index */
4303         if (a->p) {
4304             /* post-index */
4305             tcg_gen_addi_i64(addr, addr, a->imm);
4306         }
4307         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr);
4308     }
4309     return true;
4310 }
4311 
4312 static bool do_STG(DisasContext *s, arg_ldst_tag *a, bool is_zero, bool is_pair)
4313 {
4314     TCGv_i64 addr, tcg_rt;
4315 
4316     if (a->rn == 31) {
4317         gen_check_sp_alignment(s);
4318     }
4319 
4320     addr = read_cpu_reg_sp(s, a->rn, true);
4321     if (!a->p) {
4322         /* pre-index or signed offset */
4323         tcg_gen_addi_i64(addr, addr, a->imm);
4324     }
4325     tcg_rt = cpu_reg_sp(s, a->rt);
4326     if (!s->ata[0]) {
4327         /*
4328          * For STG and ST2G, we need to check alignment and probe memory.
4329          * TODO: For STZG and STZ2G, we could rely on the stores below,
4330          * at least for system mode; user-only won't enforce alignment.
4331          */
4332         if (is_pair) {
4333             gen_helper_st2g_stub(tcg_env, addr);
4334         } else {
4335             gen_helper_stg_stub(tcg_env, addr);
4336         }
4337     } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
4338         if (is_pair) {
4339             gen_helper_st2g_parallel(tcg_env, addr, tcg_rt);
4340         } else {
4341             gen_helper_stg_parallel(tcg_env, addr, tcg_rt);
4342         }
4343     } else {
4344         if (is_pair) {
4345             gen_helper_st2g(tcg_env, addr, tcg_rt);
4346         } else {
4347             gen_helper_stg(tcg_env, addr, tcg_rt);
4348         }
4349     }
4350 
4351     if (is_zero) {
4352         TCGv_i64 clean_addr = clean_data_tbi(s, addr);
4353         TCGv_i64 zero64 = tcg_constant_i64(0);
4354         TCGv_i128 zero128 = tcg_temp_new_i128();
4355         int mem_index = get_mem_index(s);
4356         MemOp mop = finalize_memop(s, MO_128 | MO_ALIGN);
4357 
4358         tcg_gen_concat_i64_i128(zero128, zero64, zero64);
4359 
4360         /* This is 1 or 2 atomic 16-byte operations. */
4361         tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
4362         if (is_pair) {
4363             tcg_gen_addi_i64(clean_addr, clean_addr, 16);
4364             tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
4365         }
4366     }
4367 
4368     if (a->w) {
4369         /* pre-index or post-index */
4370         if (a->p) {
4371             /* post-index */
4372             tcg_gen_addi_i64(addr, addr, a->imm);
4373         }
4374         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr);
4375     }
4376     return true;
4377 }
4378 
4379 TRANS_FEAT(STG, aa64_mte_insn_reg, do_STG, a, false, false)
4380 TRANS_FEAT(STZG, aa64_mte_insn_reg, do_STG, a, true, false)
4381 TRANS_FEAT(ST2G, aa64_mte_insn_reg, do_STG, a, false, true)
4382 TRANS_FEAT(STZ2G, aa64_mte_insn_reg, do_STG, a, true, true)
4383 
4384 typedef void SetFn(TCGv_env, TCGv_i32, TCGv_i32);
4385 
4386 static bool do_SET(DisasContext *s, arg_set *a, bool is_epilogue,
4387                    bool is_setg, SetFn fn)
4388 {
4389     int memidx;
4390     uint32_t syndrome, desc = 0;
4391 
4392     if (is_setg && !dc_isar_feature(aa64_mte, s)) {
4393         return false;
4394     }
4395 
4396     /*
4397      * UNPREDICTABLE cases: we choose to UNDEF, which allows
4398      * us to pull this check before the CheckMOPSEnabled() test
4399      * (which we do in the helper function)
4400      */
4401     if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd ||
4402         a->rd == 31 || a->rn == 31) {
4403         return false;
4404     }
4405 
4406     memidx = get_a64_user_mem_index(s, a->unpriv);
4407 
4408     /*
4409      * We pass option_a == true, matching our implementation;
4410      * we pass wrong_option == false: helper function may set that bit.
4411      */
4412     syndrome = syn_mop(true, is_setg, (a->nontemp << 1) | a->unpriv,
4413                        is_epilogue, false, true, a->rd, a->rs, a->rn);
4414 
4415     if (is_setg ? s->ata[a->unpriv] : s->mte_active[a->unpriv]) {
4416         /* We may need to do MTE tag checking, so assemble the descriptor */
4417         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
4418         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
4419         desc = FIELD_DP32(desc, MTEDESC, WRITE, true);
4420         /* SIZEM1 and ALIGN we leave 0 (byte write) */
4421     }
4422     /* The helper function always needs the memidx even with MTE disabled */
4423     desc = FIELD_DP32(desc, MTEDESC, MIDX, memidx);
4424 
4425     /*
4426      * The helper needs the register numbers, but since they're in
4427      * the syndrome anyway, we let it extract them from there rather
4428      * than passing in an extra three integer arguments.
4429      */
4430     fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(desc));
4431     return true;
4432 }
4433 
4434 TRANS_FEAT(SETP, aa64_mops, do_SET, a, false, false, gen_helper_setp)
4435 TRANS_FEAT(SETM, aa64_mops, do_SET, a, false, false, gen_helper_setm)
4436 TRANS_FEAT(SETE, aa64_mops, do_SET, a, true, false, gen_helper_sete)
4437 TRANS_FEAT(SETGP, aa64_mops, do_SET, a, false, true, gen_helper_setgp)
4438 TRANS_FEAT(SETGM, aa64_mops, do_SET, a, false, true, gen_helper_setgm)
4439 TRANS_FEAT(SETGE, aa64_mops, do_SET, a, true, true, gen_helper_setge)
4440 
4441 typedef void CpyFn(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32);
4442 
4443 static bool do_CPY(DisasContext *s, arg_cpy *a, bool is_epilogue, CpyFn fn)
4444 {
4445     int rmemidx, wmemidx;
4446     uint32_t syndrome, rdesc = 0, wdesc = 0;
4447     bool wunpriv = extract32(a->options, 0, 1);
4448     bool runpriv = extract32(a->options, 1, 1);
4449 
4450     /*
4451      * UNPREDICTABLE cases: we choose to UNDEF, which allows
4452      * us to pull this check before the CheckMOPSEnabled() test
4453      * (which we do in the helper function)
4454      */
4455     if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd ||
4456         a->rd == 31 || a->rs == 31 || a->rn == 31) {
4457         return false;
4458     }
4459 
4460     rmemidx = get_a64_user_mem_index(s, runpriv);
4461     wmemidx = get_a64_user_mem_index(s, wunpriv);
4462 
4463     /*
4464      * We pass option_a == true, matching our implementation;
4465      * we pass wrong_option == false: helper function may set that bit.
4466      */
4467     syndrome = syn_mop(false, false, a->options, is_epilogue,
4468                        false, true, a->rd, a->rs, a->rn);
4469 
4470     /* If we need to do MTE tag checking, assemble the descriptors */
4471     if (s->mte_active[runpriv]) {
4472         rdesc = FIELD_DP32(rdesc, MTEDESC, TBI, s->tbid);
4473         rdesc = FIELD_DP32(rdesc, MTEDESC, TCMA, s->tcma);
4474     }
4475     if (s->mte_active[wunpriv]) {
4476         wdesc = FIELD_DP32(wdesc, MTEDESC, TBI, s->tbid);
4477         wdesc = FIELD_DP32(wdesc, MTEDESC, TCMA, s->tcma);
4478         wdesc = FIELD_DP32(wdesc, MTEDESC, WRITE, true);
4479     }
4480     /* The helper function needs these parts of the descriptor regardless */
4481     rdesc = FIELD_DP32(rdesc, MTEDESC, MIDX, rmemidx);
4482     wdesc = FIELD_DP32(wdesc, MTEDESC, MIDX, wmemidx);
4483 
4484     /*
4485      * The helper needs the register numbers, but since they're in
4486      * the syndrome anyway, we let it extract them from there rather
4487      * than passing in an extra three integer arguments.
4488      */
4489     fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(wdesc),
4490        tcg_constant_i32(rdesc));
4491     return true;
4492 }
4493 
4494 TRANS_FEAT(CPYP, aa64_mops, do_CPY, a, false, gen_helper_cpyp)
4495 TRANS_FEAT(CPYM, aa64_mops, do_CPY, a, false, gen_helper_cpym)
4496 TRANS_FEAT(CPYE, aa64_mops, do_CPY, a, true, gen_helper_cpye)
4497 TRANS_FEAT(CPYFP, aa64_mops, do_CPY, a, false, gen_helper_cpyfp)
4498 TRANS_FEAT(CPYFM, aa64_mops, do_CPY, a, false, gen_helper_cpyfm)
4499 TRANS_FEAT(CPYFE, aa64_mops, do_CPY, a, true, gen_helper_cpyfe)
4500 
4501 typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64);
4502 
4503 static bool gen_rri(DisasContext *s, arg_rri_sf *a,
4504                     bool rd_sp, bool rn_sp, ArithTwoOp *fn)
4505 {
4506     TCGv_i64 tcg_rn = rn_sp ? cpu_reg_sp(s, a->rn) : cpu_reg(s, a->rn);
4507     TCGv_i64 tcg_rd = rd_sp ? cpu_reg_sp(s, a->rd) : cpu_reg(s, a->rd);
4508     TCGv_i64 tcg_imm = tcg_constant_i64(a->imm);
4509 
4510     fn(tcg_rd, tcg_rn, tcg_imm);
4511     if (!a->sf) {
4512         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4513     }
4514     return true;
4515 }
4516 
4517 /*
4518  * PC-rel. addressing
4519  */
4520 
4521 static bool trans_ADR(DisasContext *s, arg_ri *a)
4522 {
4523     gen_pc_plus_diff(s, cpu_reg(s, a->rd), a->imm);
4524     return true;
4525 }
4526 
4527 static bool trans_ADRP(DisasContext *s, arg_ri *a)
4528 {
4529     int64_t offset = (int64_t)a->imm << 12;
4530 
4531     /* The page offset is ok for CF_PCREL. */
4532     offset -= s->pc_curr & 0xfff;
4533     gen_pc_plus_diff(s, cpu_reg(s, a->rd), offset);
4534     return true;
4535 }
4536 
4537 /*
4538  * Add/subtract (immediate)
4539  */
4540 TRANS(ADD_i, gen_rri, a, 1, 1, tcg_gen_add_i64)
4541 TRANS(SUB_i, gen_rri, a, 1, 1, tcg_gen_sub_i64)
4542 TRANS(ADDS_i, gen_rri, a, 0, 1, a->sf ? gen_add64_CC : gen_add32_CC)
4543 TRANS(SUBS_i, gen_rri, a, 0, 1, a->sf ? gen_sub64_CC : gen_sub32_CC)
4544 
4545 /*
4546  * Add/subtract (immediate, with tags)
4547  */
4548 
4549 static bool gen_add_sub_imm_with_tags(DisasContext *s, arg_rri_tag *a,
4550                                       bool sub_op)
4551 {
4552     TCGv_i64 tcg_rn, tcg_rd;
4553     int imm;
4554 
4555     imm = a->uimm6 << LOG2_TAG_GRANULE;
4556     if (sub_op) {
4557         imm = -imm;
4558     }
4559 
4560     tcg_rn = cpu_reg_sp(s, a->rn);
4561     tcg_rd = cpu_reg_sp(s, a->rd);
4562 
4563     if (s->ata[0]) {
4564         gen_helper_addsubg(tcg_rd, tcg_env, tcg_rn,
4565                            tcg_constant_i32(imm),
4566                            tcg_constant_i32(a->uimm4));
4567     } else {
4568         tcg_gen_addi_i64(tcg_rd, tcg_rn, imm);
4569         gen_address_with_allocation_tag0(tcg_rd, tcg_rd);
4570     }
4571     return true;
4572 }
4573 
4574 TRANS_FEAT(ADDG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, false)
4575 TRANS_FEAT(SUBG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, true)
4576 
4577 /* The input should be a value in the bottom e bits (with higher
4578  * bits zero); returns that value replicated into every element
4579  * of size e in a 64 bit integer.
4580  */
4581 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
4582 {
4583     assert(e != 0);
4584     while (e < 64) {
4585         mask |= mask << e;
4586         e *= 2;
4587     }
4588     return mask;
4589 }
4590 
4591 /*
4592  * Logical (immediate)
4593  */
4594 
4595 /*
4596  * Simplified variant of pseudocode DecodeBitMasks() for the case where we
4597  * only require the wmask. Returns false if the imms/immr/immn are a reserved
4598  * value (ie should cause a guest UNDEF exception), and true if they are
4599  * valid, in which case the decoded bit pattern is written to result.
4600  */
4601 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
4602                             unsigned int imms, unsigned int immr)
4603 {
4604     uint64_t mask;
4605     unsigned e, levels, s, r;
4606     int len;
4607 
4608     assert(immn < 2 && imms < 64 && immr < 64);
4609 
4610     /* The bit patterns we create here are 64 bit patterns which
4611      * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
4612      * 64 bits each. Each element contains the same value: a run
4613      * of between 1 and e-1 non-zero bits, rotated within the
4614      * element by between 0 and e-1 bits.
4615      *
4616      * The element size and run length are encoded into immn (1 bit)
4617      * and imms (6 bits) as follows:
4618      * 64 bit elements: immn = 1, imms = <length of run - 1>
4619      * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
4620      * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
4621      *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
4622      *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
4623      *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
4624      * Notice that immn = 0, imms = 11111x is the only combination
4625      * not covered by one of the above options; this is reserved.
4626      * Further, <length of run - 1> all-ones is a reserved pattern.
4627      *
4628      * In all cases the rotation is by immr % e (and immr is 6 bits).
4629      */
4630 
4631     /* First determine the element size */
4632     len = 31 - clz32((immn << 6) | (~imms & 0x3f));
4633     if (len < 1) {
4634         /* This is the immn == 0, imms == 0x11111x case */
4635         return false;
4636     }
4637     e = 1 << len;
4638 
4639     levels = e - 1;
4640     s = imms & levels;
4641     r = immr & levels;
4642 
4643     if (s == levels) {
4644         /* <length of run - 1> mustn't be all-ones. */
4645         return false;
4646     }
4647 
4648     /* Create the value of one element: s+1 set bits rotated
4649      * by r within the element (which is e bits wide)...
4650      */
4651     mask = MAKE_64BIT_MASK(0, s + 1);
4652     if (r) {
4653         mask = (mask >> r) | (mask << (e - r));
4654         mask &= MAKE_64BIT_MASK(0, e);
4655     }
4656     /* ...then replicate the element over the whole 64 bit value */
4657     mask = bitfield_replicate(mask, e);
4658     *result = mask;
4659     return true;
4660 }
4661 
4662 static bool gen_rri_log(DisasContext *s, arg_rri_log *a, bool set_cc,
4663                         void (*fn)(TCGv_i64, TCGv_i64, int64_t))
4664 {
4665     TCGv_i64 tcg_rd, tcg_rn;
4666     uint64_t imm;
4667 
4668     /* Some immediate field values are reserved. */
4669     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
4670                                 extract32(a->dbm, 0, 6),
4671                                 extract32(a->dbm, 6, 6))) {
4672         return false;
4673     }
4674     if (!a->sf) {
4675         imm &= 0xffffffffull;
4676     }
4677 
4678     tcg_rd = set_cc ? cpu_reg(s, a->rd) : cpu_reg_sp(s, a->rd);
4679     tcg_rn = cpu_reg(s, a->rn);
4680 
4681     fn(tcg_rd, tcg_rn, imm);
4682     if (set_cc) {
4683         gen_logic_CC(a->sf, tcg_rd);
4684     }
4685     if (!a->sf) {
4686         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4687     }
4688     return true;
4689 }
4690 
4691 TRANS(AND_i, gen_rri_log, a, false, tcg_gen_andi_i64)
4692 TRANS(ORR_i, gen_rri_log, a, false, tcg_gen_ori_i64)
4693 TRANS(EOR_i, gen_rri_log, a, false, tcg_gen_xori_i64)
4694 TRANS(ANDS_i, gen_rri_log, a, true, tcg_gen_andi_i64)
4695 
4696 /*
4697  * Move wide (immediate)
4698  */
4699 
4700 static bool trans_MOVZ(DisasContext *s, arg_movw *a)
4701 {
4702     int pos = a->hw << 4;
4703     tcg_gen_movi_i64(cpu_reg(s, a->rd), (uint64_t)a->imm << pos);
4704     return true;
4705 }
4706 
4707 static bool trans_MOVN(DisasContext *s, arg_movw *a)
4708 {
4709     int pos = a->hw << 4;
4710     uint64_t imm = a->imm;
4711 
4712     imm = ~(imm << pos);
4713     if (!a->sf) {
4714         imm = (uint32_t)imm;
4715     }
4716     tcg_gen_movi_i64(cpu_reg(s, a->rd), imm);
4717     return true;
4718 }
4719 
4720 static bool trans_MOVK(DisasContext *s, arg_movw *a)
4721 {
4722     int pos = a->hw << 4;
4723     TCGv_i64 tcg_rd, tcg_im;
4724 
4725     tcg_rd = cpu_reg(s, a->rd);
4726     tcg_im = tcg_constant_i64(a->imm);
4727     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_im, pos, 16);
4728     if (!a->sf) {
4729         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4730     }
4731     return true;
4732 }
4733 
4734 /*
4735  * Bitfield
4736  */
4737 
4738 static bool trans_SBFM(DisasContext *s, arg_SBFM *a)
4739 {
4740     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4741     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4742     unsigned int bitsize = a->sf ? 64 : 32;
4743     unsigned int ri = a->immr;
4744     unsigned int si = a->imms;
4745     unsigned int pos, len;
4746 
4747     if (si >= ri) {
4748         /* Wd<s-r:0> = Wn<s:r> */
4749         len = (si - ri) + 1;
4750         tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
4751         if (!a->sf) {
4752             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4753         }
4754     } else {
4755         /* Wd<32+s-r,32-r> = Wn<s:0> */
4756         len = si + 1;
4757         pos = (bitsize - ri) & (bitsize - 1);
4758 
4759         if (len < ri) {
4760             /*
4761              * Sign extend the destination field from len to fill the
4762              * balance of the word.  Let the deposit below insert all
4763              * of those sign bits.
4764              */
4765             tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
4766             len = ri;
4767         }
4768 
4769         /*
4770          * We start with zero, and we haven't modified any bits outside
4771          * bitsize, therefore no final zero-extension is unneeded for !sf.
4772          */
4773         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4774     }
4775     return true;
4776 }
4777 
4778 static bool trans_UBFM(DisasContext *s, arg_UBFM *a)
4779 {
4780     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4781     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4782     unsigned int bitsize = a->sf ? 64 : 32;
4783     unsigned int ri = a->immr;
4784     unsigned int si = a->imms;
4785     unsigned int pos, len;
4786 
4787     tcg_rd = cpu_reg(s, a->rd);
4788     tcg_tmp = read_cpu_reg(s, a->rn, 1);
4789 
4790     if (si >= ri) {
4791         /* Wd<s-r:0> = Wn<s:r> */
4792         len = (si - ri) + 1;
4793         tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
4794     } else {
4795         /* Wd<32+s-r,32-r> = Wn<s:0> */
4796         len = si + 1;
4797         pos = (bitsize - ri) & (bitsize - 1);
4798         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4799     }
4800     return true;
4801 }
4802 
4803 static bool trans_BFM(DisasContext *s, arg_BFM *a)
4804 {
4805     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4806     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4807     unsigned int bitsize = a->sf ? 64 : 32;
4808     unsigned int ri = a->immr;
4809     unsigned int si = a->imms;
4810     unsigned int pos, len;
4811 
4812     tcg_rd = cpu_reg(s, a->rd);
4813     tcg_tmp = read_cpu_reg(s, a->rn, 1);
4814 
4815     if (si >= ri) {
4816         /* Wd<s-r:0> = Wn<s:r> */
4817         tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
4818         len = (si - ri) + 1;
4819         pos = 0;
4820     } else {
4821         /* Wd<32+s-r,32-r> = Wn<s:0> */
4822         len = si + 1;
4823         pos = (bitsize - ri) & (bitsize - 1);
4824     }
4825 
4826     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
4827     if (!a->sf) {
4828         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4829     }
4830     return true;
4831 }
4832 
4833 static bool trans_EXTR(DisasContext *s, arg_extract *a)
4834 {
4835     TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
4836 
4837     tcg_rd = cpu_reg(s, a->rd);
4838 
4839     if (unlikely(a->imm == 0)) {
4840         /*
4841          * tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
4842          * so an extract from bit 0 is a special case.
4843          */
4844         if (a->sf) {
4845             tcg_gen_mov_i64(tcg_rd, cpu_reg(s, a->rm));
4846         } else {
4847             tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, a->rm));
4848         }
4849     } else {
4850         tcg_rm = cpu_reg(s, a->rm);
4851         tcg_rn = cpu_reg(s, a->rn);
4852 
4853         if (a->sf) {
4854             /* Specialization to ROR happens in EXTRACT2.  */
4855             tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, a->imm);
4856         } else {
4857             TCGv_i32 t0 = tcg_temp_new_i32();
4858 
4859             tcg_gen_extrl_i64_i32(t0, tcg_rm);
4860             if (a->rm == a->rn) {
4861                 tcg_gen_rotri_i32(t0, t0, a->imm);
4862             } else {
4863                 TCGv_i32 t1 = tcg_temp_new_i32();
4864                 tcg_gen_extrl_i64_i32(t1, tcg_rn);
4865                 tcg_gen_extract2_i32(t0, t0, t1, a->imm);
4866             }
4867             tcg_gen_extu_i32_i64(tcg_rd, t0);
4868         }
4869     }
4870     return true;
4871 }
4872 
4873 static bool trans_TBL_TBX(DisasContext *s, arg_TBL_TBX *a)
4874 {
4875     if (fp_access_check(s)) {
4876         int len = (a->len + 1) * 16;
4877 
4878         tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
4879                            vec_full_reg_offset(s, a->rm), tcg_env,
4880                            a->q ? 16 : 8, vec_full_reg_size(s),
4881                            (len << 6) | (a->tbx << 5) | a->rn,
4882                            gen_helper_simd_tblx);
4883     }
4884     return true;
4885 }
4886 
4887 typedef int simd_permute_idx_fn(int i, int part, int elements);
4888 
4889 static bool do_simd_permute(DisasContext *s, arg_qrrr_e *a,
4890                             simd_permute_idx_fn *fn, int part)
4891 {
4892     MemOp esz = a->esz;
4893     int datasize = a->q ? 16 : 8;
4894     int elements = datasize >> esz;
4895     TCGv_i64 tcg_res[2], tcg_ele;
4896 
4897     if (esz == MO_64 && !a->q) {
4898         return false;
4899     }
4900     if (!fp_access_check(s)) {
4901         return true;
4902     }
4903 
4904     tcg_res[0] = tcg_temp_new_i64();
4905     tcg_res[1] = a->q ? tcg_temp_new_i64() : NULL;
4906     tcg_ele = tcg_temp_new_i64();
4907 
4908     for (int i = 0; i < elements; i++) {
4909         int o, w, idx;
4910 
4911         idx = fn(i, part, elements);
4912         read_vec_element(s, tcg_ele, (idx & elements ? a->rm : a->rn),
4913                          idx & (elements - 1), esz);
4914 
4915         w = (i << (esz + 3)) / 64;
4916         o = (i << (esz + 3)) % 64;
4917         if (o == 0) {
4918             tcg_gen_mov_i64(tcg_res[w], tcg_ele);
4919         } else {
4920             tcg_gen_deposit_i64(tcg_res[w], tcg_res[w], tcg_ele, o, 8 << esz);
4921         }
4922     }
4923 
4924     for (int i = a->q; i >= 0; --i) {
4925         write_vec_element(s, tcg_res[i], a->rd, i, MO_64);
4926     }
4927     clear_vec_high(s, a->q, a->rd);
4928     return true;
4929 }
4930 
4931 static int permute_load_uzp(int i, int part, int elements)
4932 {
4933     return 2 * i + part;
4934 }
4935 
4936 TRANS(UZP1, do_simd_permute, a, permute_load_uzp, 0)
4937 TRANS(UZP2, do_simd_permute, a, permute_load_uzp, 1)
4938 
4939 static int permute_load_trn(int i, int part, int elements)
4940 {
4941     return (i & 1) * elements + (i & ~1) + part;
4942 }
4943 
4944 TRANS(TRN1, do_simd_permute, a, permute_load_trn, 0)
4945 TRANS(TRN2, do_simd_permute, a, permute_load_trn, 1)
4946 
4947 static int permute_load_zip(int i, int part, int elements)
4948 {
4949     return (i & 1) * elements + ((part * elements + i) >> 1);
4950 }
4951 
4952 TRANS(ZIP1, do_simd_permute, a, permute_load_zip, 0)
4953 TRANS(ZIP2, do_simd_permute, a, permute_load_zip, 1)
4954 
4955 /*
4956  * Cryptographic AES, SHA, SHA512
4957  */
4958 
4959 TRANS_FEAT(AESE, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aese)
4960 TRANS_FEAT(AESD, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aesd)
4961 TRANS_FEAT(AESMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesmc)
4962 TRANS_FEAT(AESIMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesimc)
4963 
4964 TRANS_FEAT(SHA1C, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1c)
4965 TRANS_FEAT(SHA1P, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1p)
4966 TRANS_FEAT(SHA1M, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1m)
4967 TRANS_FEAT(SHA1SU0, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1su0)
4968 
4969 TRANS_FEAT(SHA256H, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h)
4970 TRANS_FEAT(SHA256H2, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h2)
4971 TRANS_FEAT(SHA256SU1, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256su1)
4972 
4973 TRANS_FEAT(SHA1H, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1h)
4974 TRANS_FEAT(SHA1SU1, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1su1)
4975 TRANS_FEAT(SHA256SU0, aa64_sha256, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha256su0)
4976 
4977 TRANS_FEAT(SHA512H, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h)
4978 TRANS_FEAT(SHA512H2, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h2)
4979 TRANS_FEAT(SHA512SU1, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512su1)
4980 TRANS_FEAT(RAX1, aa64_sha3, do_gvec_fn3, a, gen_gvec_rax1)
4981 TRANS_FEAT(SM3PARTW1, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw1)
4982 TRANS_FEAT(SM3PARTW2, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw2)
4983 TRANS_FEAT(SM4EKEY, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4ekey)
4984 
4985 TRANS_FEAT(SHA512SU0, aa64_sha512, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha512su0)
4986 TRANS_FEAT(SM4E, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4e)
4987 
4988 TRANS_FEAT(EOR3, aa64_sha3, do_gvec_fn4, a, gen_gvec_eor3)
4989 TRANS_FEAT(BCAX, aa64_sha3, do_gvec_fn4, a, gen_gvec_bcax)
4990 
4991 static bool trans_SM3SS1(DisasContext *s, arg_SM3SS1 *a)
4992 {
4993     if (!dc_isar_feature(aa64_sm3, s)) {
4994         return false;
4995     }
4996     if (fp_access_check(s)) {
4997         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
4998         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
4999         TCGv_i32 tcg_op3 = tcg_temp_new_i32();
5000         TCGv_i32 tcg_res = tcg_temp_new_i32();
5001 
5002         read_vec_element_i32(s, tcg_op1, a->rn, 3, MO_32);
5003         read_vec_element_i32(s, tcg_op2, a->rm, 3, MO_32);
5004         read_vec_element_i32(s, tcg_op3, a->ra, 3, MO_32);
5005 
5006         tcg_gen_rotri_i32(tcg_res, tcg_op1, 20);
5007         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2);
5008         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3);
5009         tcg_gen_rotri_i32(tcg_res, tcg_res, 25);
5010 
5011         /* Clear the whole register first, then store bits [127:96]. */
5012         clear_vec(s, a->rd);
5013         write_vec_element_i32(s, tcg_res, a->rd, 3, MO_32);
5014     }
5015     return true;
5016 }
5017 
5018 static bool do_crypto3i(DisasContext *s, arg_crypto3i *a, gen_helper_gvec_3 *fn)
5019 {
5020     if (fp_access_check(s)) {
5021         gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->imm, fn);
5022     }
5023     return true;
5024 }
5025 TRANS_FEAT(SM3TT1A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1a)
5026 TRANS_FEAT(SM3TT1B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1b)
5027 TRANS_FEAT(SM3TT2A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2a)
5028 TRANS_FEAT(SM3TT2B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2b)
5029 
5030 static bool trans_XAR(DisasContext *s, arg_XAR *a)
5031 {
5032     if (!dc_isar_feature(aa64_sha3, s)) {
5033         return false;
5034     }
5035     if (fp_access_check(s)) {
5036         gen_gvec_xar(MO_64, vec_full_reg_offset(s, a->rd),
5037                      vec_full_reg_offset(s, a->rn),
5038                      vec_full_reg_offset(s, a->rm), a->imm, 16,
5039                      vec_full_reg_size(s));
5040     }
5041     return true;
5042 }
5043 
5044 /*
5045  * Advanced SIMD copy
5046  */
5047 
5048 static bool decode_esz_idx(int imm, MemOp *pesz, unsigned *pidx)
5049 {
5050     unsigned esz = ctz32(imm);
5051     if (esz <= MO_64) {
5052         *pesz = esz;
5053         *pidx = imm >> (esz + 1);
5054         return true;
5055     }
5056     return false;
5057 }
5058 
5059 static bool trans_DUP_element_s(DisasContext *s, arg_DUP_element_s *a)
5060 {
5061     MemOp esz;
5062     unsigned idx;
5063 
5064     if (!decode_esz_idx(a->imm, &esz, &idx)) {
5065         return false;
5066     }
5067     if (fp_access_check(s)) {
5068         /*
5069          * This instruction just extracts the specified element and
5070          * zero-extends it into the bottom of the destination register.
5071          */
5072         TCGv_i64 tmp = tcg_temp_new_i64();
5073         read_vec_element(s, tmp, a->rn, idx, esz);
5074         write_fp_dreg(s, a->rd, tmp);
5075     }
5076     return true;
5077 }
5078 
5079 static bool trans_DUP_element_v(DisasContext *s, arg_DUP_element_v *a)
5080 {
5081     MemOp esz;
5082     unsigned idx;
5083 
5084     if (!decode_esz_idx(a->imm, &esz, &idx)) {
5085         return false;
5086     }
5087     if (esz == MO_64 && !a->q) {
5088         return false;
5089     }
5090     if (fp_access_check(s)) {
5091         tcg_gen_gvec_dup_mem(esz, vec_full_reg_offset(s, a->rd),
5092                              vec_reg_offset(s, a->rn, idx, esz),
5093                              a->q ? 16 : 8, vec_full_reg_size(s));
5094     }
5095     return true;
5096 }
5097 
5098 static bool trans_DUP_general(DisasContext *s, arg_DUP_general *a)
5099 {
5100     MemOp esz;
5101     unsigned idx;
5102 
5103     if (!decode_esz_idx(a->imm, &esz, &idx)) {
5104         return false;
5105     }
5106     if (esz == MO_64 && !a->q) {
5107         return false;
5108     }
5109     if (fp_access_check(s)) {
5110         tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
5111                              a->q ? 16 : 8, vec_full_reg_size(s),
5112                              cpu_reg(s, a->rn));
5113     }
5114     return true;
5115 }
5116 
5117 static bool do_smov_umov(DisasContext *s, arg_SMOV *a, MemOp is_signed)
5118 {
5119     MemOp esz;
5120     unsigned idx;
5121 
5122     if (!decode_esz_idx(a->imm, &esz, &idx)) {
5123         return false;
5124     }
5125     if (is_signed) {
5126         if (esz == MO_64 || (esz == MO_32 && !a->q)) {
5127             return false;
5128         }
5129     } else {
5130         if (esz == MO_64 ? !a->q : a->q) {
5131             return false;
5132         }
5133     }
5134     if (fp_access_check(s)) {
5135         TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
5136         read_vec_element(s, tcg_rd, a->rn, idx, esz | is_signed);
5137         if (is_signed && !a->q) {
5138             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5139         }
5140     }
5141     return true;
5142 }
5143 
5144 TRANS(SMOV, do_smov_umov, a, MO_SIGN)
5145 TRANS(UMOV, do_smov_umov, a, 0)
5146 
5147 static bool trans_INS_general(DisasContext *s, arg_INS_general *a)
5148 {
5149     MemOp esz;
5150     unsigned idx;
5151 
5152     if (!decode_esz_idx(a->imm, &esz, &idx)) {
5153         return false;
5154     }
5155     if (fp_access_check(s)) {
5156         write_vec_element(s, cpu_reg(s, a->rn), a->rd, idx, esz);
5157         clear_vec_high(s, true, a->rd);
5158     }
5159     return true;
5160 }
5161 
5162 static bool trans_INS_element(DisasContext *s, arg_INS_element *a)
5163 {
5164     MemOp esz;
5165     unsigned didx, sidx;
5166 
5167     if (!decode_esz_idx(a->di, &esz, &didx)) {
5168         return false;
5169     }
5170     sidx = a->si >> esz;
5171     if (fp_access_check(s)) {
5172         TCGv_i64 tmp = tcg_temp_new_i64();
5173 
5174         read_vec_element(s, tmp, a->rn, sidx, esz);
5175         write_vec_element(s, tmp, a->rd, didx, esz);
5176 
5177         /* INS is considered a 128-bit write for SVE. */
5178         clear_vec_high(s, true, a->rd);
5179     }
5180     return true;
5181 }
5182 
5183 /*
5184  * Advanced SIMD three same
5185  */
5186 
5187 typedef struct FPScalar {
5188     void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
5189     void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
5190     void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
5191 } FPScalar;
5192 
5193 static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a,
5194                                         const FPScalar *f, int mergereg,
5195                                         ARMFPStatusFlavour fpsttype)
5196 {
5197     switch (a->esz) {
5198     case MO_64:
5199         if (fp_access_check(s)) {
5200             TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5201             TCGv_i64 t1 = read_fp_dreg(s, a->rm);
5202             f->gen_d(t0, t0, t1, fpstatus_ptr(fpsttype));
5203             write_fp_dreg_merging(s, a->rd, mergereg, t0);
5204         }
5205         break;
5206     case MO_32:
5207         if (fp_access_check(s)) {
5208             TCGv_i32 t0 = read_fp_sreg(s, a->rn);
5209             TCGv_i32 t1 = read_fp_sreg(s, a->rm);
5210             f->gen_s(t0, t0, t1, fpstatus_ptr(fpsttype));
5211             write_fp_sreg_merging(s, a->rd, mergereg, t0);
5212         }
5213         break;
5214     case MO_16:
5215         if (!dc_isar_feature(aa64_fp16, s)) {
5216             return false;
5217         }
5218         if (fp_access_check(s)) {
5219             TCGv_i32 t0 = read_fp_hreg(s, a->rn);
5220             TCGv_i32 t1 = read_fp_hreg(s, a->rm);
5221             f->gen_h(t0, t0, t1, fpstatus_ptr(fpsttype));
5222             write_fp_hreg_merging(s, a->rd, mergereg, t0);
5223         }
5224         break;
5225     default:
5226         return false;
5227     }
5228     return true;
5229 }
5230 
5231 static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f,
5232                           int mergereg)
5233 {
5234     return do_fp3_scalar_with_fpsttype(s, a, f, mergereg,
5235                                        a->esz == MO_16 ?
5236                                        FPST_A64_F16 : FPST_A64);
5237 }
5238 
5239 static bool do_fp3_scalar_ah_2fn(DisasContext *s, arg_rrr_e *a,
5240                                  const FPScalar *fnormal, const FPScalar *fah,
5241                                  int mergereg)
5242 {
5243     return do_fp3_scalar_with_fpsttype(s, a, s->fpcr_ah ? fah : fnormal,
5244                                        mergereg, select_ah_fpst(s, a->esz));
5245 }
5246 
5247 /* Some insns need to call different helpers when FPCR.AH == 1 */
5248 static bool do_fp3_scalar_2fn(DisasContext *s, arg_rrr_e *a,
5249                               const FPScalar *fnormal,
5250                               const FPScalar *fah,
5251                               int mergereg)
5252 {
5253     return do_fp3_scalar(s, a, s->fpcr_ah ? fah : fnormal, mergereg);
5254 }
5255 
5256 static const FPScalar f_scalar_fadd = {
5257     gen_helper_vfp_addh,
5258     gen_helper_vfp_adds,
5259     gen_helper_vfp_addd,
5260 };
5261 TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd, a->rn)
5262 
5263 static const FPScalar f_scalar_fsub = {
5264     gen_helper_vfp_subh,
5265     gen_helper_vfp_subs,
5266     gen_helper_vfp_subd,
5267 };
5268 TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub, a->rn)
5269 
5270 static const FPScalar f_scalar_fdiv = {
5271     gen_helper_vfp_divh,
5272     gen_helper_vfp_divs,
5273     gen_helper_vfp_divd,
5274 };
5275 TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv, a->rn)
5276 
5277 static const FPScalar f_scalar_fmul = {
5278     gen_helper_vfp_mulh,
5279     gen_helper_vfp_muls,
5280     gen_helper_vfp_muld,
5281 };
5282 TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul, a->rn)
5283 
5284 static const FPScalar f_scalar_fmax = {
5285     gen_helper_vfp_maxh,
5286     gen_helper_vfp_maxs,
5287     gen_helper_vfp_maxd,
5288 };
5289 static const FPScalar f_scalar_fmax_ah = {
5290     gen_helper_vfp_ah_maxh,
5291     gen_helper_vfp_ah_maxs,
5292     gen_helper_vfp_ah_maxd,
5293 };
5294 TRANS(FMAX_s, do_fp3_scalar_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah, a->rn)
5295 
5296 static const FPScalar f_scalar_fmin = {
5297     gen_helper_vfp_minh,
5298     gen_helper_vfp_mins,
5299     gen_helper_vfp_mind,
5300 };
5301 static const FPScalar f_scalar_fmin_ah = {
5302     gen_helper_vfp_ah_minh,
5303     gen_helper_vfp_ah_mins,
5304     gen_helper_vfp_ah_mind,
5305 };
5306 TRANS(FMIN_s, do_fp3_scalar_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah, a->rn)
5307 
5308 static const FPScalar f_scalar_fmaxnm = {
5309     gen_helper_vfp_maxnumh,
5310     gen_helper_vfp_maxnums,
5311     gen_helper_vfp_maxnumd,
5312 };
5313 TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm, a->rn)
5314 
5315 static const FPScalar f_scalar_fminnm = {
5316     gen_helper_vfp_minnumh,
5317     gen_helper_vfp_minnums,
5318     gen_helper_vfp_minnumd,
5319 };
5320 TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm, a->rn)
5321 
5322 static const FPScalar f_scalar_fmulx = {
5323     gen_helper_advsimd_mulxh,
5324     gen_helper_vfp_mulxs,
5325     gen_helper_vfp_mulxd,
5326 };
5327 TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx, a->rn)
5328 
5329 static void gen_fnmul_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5330 {
5331     gen_helper_vfp_mulh(d, n, m, s);
5332     gen_vfp_negh(d, d);
5333 }
5334 
5335 static void gen_fnmul_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5336 {
5337     gen_helper_vfp_muls(d, n, m, s);
5338     gen_vfp_negs(d, d);
5339 }
5340 
5341 static void gen_fnmul_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5342 {
5343     gen_helper_vfp_muld(d, n, m, s);
5344     gen_vfp_negd(d, d);
5345 }
5346 
5347 static void gen_fnmul_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5348 {
5349     gen_helper_vfp_mulh(d, n, m, s);
5350     gen_vfp_ah_negh(d, d);
5351 }
5352 
5353 static void gen_fnmul_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5354 {
5355     gen_helper_vfp_muls(d, n, m, s);
5356     gen_vfp_ah_negs(d, d);
5357 }
5358 
5359 static void gen_fnmul_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5360 {
5361     gen_helper_vfp_muld(d, n, m, s);
5362     gen_vfp_ah_negd(d, d);
5363 }
5364 
5365 static const FPScalar f_scalar_fnmul = {
5366     gen_fnmul_h,
5367     gen_fnmul_s,
5368     gen_fnmul_d,
5369 };
5370 static const FPScalar f_scalar_ah_fnmul = {
5371     gen_fnmul_ah_h,
5372     gen_fnmul_ah_s,
5373     gen_fnmul_ah_d,
5374 };
5375 TRANS(FNMUL_s, do_fp3_scalar_2fn, a, &f_scalar_fnmul, &f_scalar_ah_fnmul, a->rn)
5376 
5377 static const FPScalar f_scalar_fcmeq = {
5378     gen_helper_advsimd_ceq_f16,
5379     gen_helper_neon_ceq_f32,
5380     gen_helper_neon_ceq_f64,
5381 };
5382 TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq, a->rm)
5383 
5384 static const FPScalar f_scalar_fcmge = {
5385     gen_helper_advsimd_cge_f16,
5386     gen_helper_neon_cge_f32,
5387     gen_helper_neon_cge_f64,
5388 };
5389 TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge, a->rm)
5390 
5391 static const FPScalar f_scalar_fcmgt = {
5392     gen_helper_advsimd_cgt_f16,
5393     gen_helper_neon_cgt_f32,
5394     gen_helper_neon_cgt_f64,
5395 };
5396 TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt, a->rm)
5397 
5398 static const FPScalar f_scalar_facge = {
5399     gen_helper_advsimd_acge_f16,
5400     gen_helper_neon_acge_f32,
5401     gen_helper_neon_acge_f64,
5402 };
5403 TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge, a->rm)
5404 
5405 static const FPScalar f_scalar_facgt = {
5406     gen_helper_advsimd_acgt_f16,
5407     gen_helper_neon_acgt_f32,
5408     gen_helper_neon_acgt_f64,
5409 };
5410 TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt, a->rm)
5411 
5412 static void gen_fabd_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5413 {
5414     gen_helper_vfp_subh(d, n, m, s);
5415     gen_vfp_absh(d, d);
5416 }
5417 
5418 static void gen_fabd_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5419 {
5420     gen_helper_vfp_subs(d, n, m, s);
5421     gen_vfp_abss(d, d);
5422 }
5423 
5424 static void gen_fabd_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5425 {
5426     gen_helper_vfp_subd(d, n, m, s);
5427     gen_vfp_absd(d, d);
5428 }
5429 
5430 static void gen_fabd_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5431 {
5432     gen_helper_vfp_subh(d, n, m, s);
5433     gen_vfp_ah_absh(d, d);
5434 }
5435 
5436 static void gen_fabd_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5437 {
5438     gen_helper_vfp_subs(d, n, m, s);
5439     gen_vfp_ah_abss(d, d);
5440 }
5441 
5442 static void gen_fabd_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5443 {
5444     gen_helper_vfp_subd(d, n, m, s);
5445     gen_vfp_ah_absd(d, d);
5446 }
5447 
5448 static const FPScalar f_scalar_fabd = {
5449     gen_fabd_h,
5450     gen_fabd_s,
5451     gen_fabd_d,
5452 };
5453 static const FPScalar f_scalar_ah_fabd = {
5454     gen_fabd_ah_h,
5455     gen_fabd_ah_s,
5456     gen_fabd_ah_d,
5457 };
5458 TRANS(FABD_s, do_fp3_scalar_2fn, a, &f_scalar_fabd, &f_scalar_ah_fabd, a->rn)
5459 
5460 static const FPScalar f_scalar_frecps = {
5461     gen_helper_recpsf_f16,
5462     gen_helper_recpsf_f32,
5463     gen_helper_recpsf_f64,
5464 };
5465 static const FPScalar f_scalar_ah_frecps = {
5466     gen_helper_recpsf_ah_f16,
5467     gen_helper_recpsf_ah_f32,
5468     gen_helper_recpsf_ah_f64,
5469 };
5470 TRANS(FRECPS_s, do_fp3_scalar_ah_2fn, a,
5471       &f_scalar_frecps, &f_scalar_ah_frecps, a->rn)
5472 
5473 static const FPScalar f_scalar_frsqrts = {
5474     gen_helper_rsqrtsf_f16,
5475     gen_helper_rsqrtsf_f32,
5476     gen_helper_rsqrtsf_f64,
5477 };
5478 static const FPScalar f_scalar_ah_frsqrts = {
5479     gen_helper_rsqrtsf_ah_f16,
5480     gen_helper_rsqrtsf_ah_f32,
5481     gen_helper_rsqrtsf_ah_f64,
5482 };
5483 TRANS(FRSQRTS_s, do_fp3_scalar_ah_2fn, a,
5484       &f_scalar_frsqrts, &f_scalar_ah_frsqrts, a->rn)
5485 
5486 static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a,
5487                        const FPScalar *f, bool swap)
5488 {
5489     switch (a->esz) {
5490     case MO_64:
5491         if (fp_access_check(s)) {
5492             TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5493             TCGv_i64 t1 = tcg_constant_i64(0);
5494             if (swap) {
5495                 f->gen_d(t0, t1, t0, fpstatus_ptr(FPST_A64));
5496             } else {
5497                 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
5498             }
5499             write_fp_dreg(s, a->rd, t0);
5500         }
5501         break;
5502     case MO_32:
5503         if (fp_access_check(s)) {
5504             TCGv_i32 t0 = read_fp_sreg(s, a->rn);
5505             TCGv_i32 t1 = tcg_constant_i32(0);
5506             if (swap) {
5507                 f->gen_s(t0, t1, t0, fpstatus_ptr(FPST_A64));
5508             } else {
5509                 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
5510             }
5511             write_fp_sreg(s, a->rd, t0);
5512         }
5513         break;
5514     case MO_16:
5515         if (!dc_isar_feature(aa64_fp16, s)) {
5516             return false;
5517         }
5518         if (fp_access_check(s)) {
5519             TCGv_i32 t0 = read_fp_hreg(s, a->rn);
5520             TCGv_i32 t1 = tcg_constant_i32(0);
5521             if (swap) {
5522                 f->gen_h(t0, t1, t0, fpstatus_ptr(FPST_A64_F16));
5523             } else {
5524                 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
5525             }
5526             write_fp_sreg(s, a->rd, t0);
5527         }
5528         break;
5529     default:
5530         return false;
5531     }
5532     return true;
5533 }
5534 
5535 TRANS(FCMEQ0_s, do_fcmp0_s, a, &f_scalar_fcmeq, false)
5536 TRANS(FCMGT0_s, do_fcmp0_s, a, &f_scalar_fcmgt, false)
5537 TRANS(FCMGE0_s, do_fcmp0_s, a, &f_scalar_fcmge, false)
5538 TRANS(FCMLT0_s, do_fcmp0_s, a, &f_scalar_fcmgt, true)
5539 TRANS(FCMLE0_s, do_fcmp0_s, a, &f_scalar_fcmge, true)
5540 
5541 static bool do_satacc_s(DisasContext *s, arg_rrr_e *a,
5542                 MemOp sgn_n, MemOp sgn_m,
5543                 void (*gen_bhs)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64, MemOp),
5544                 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
5545 {
5546     TCGv_i64 t0, t1, t2, qc;
5547     MemOp esz = a->esz;
5548 
5549     if (!fp_access_check(s)) {
5550         return true;
5551     }
5552 
5553     t0 = tcg_temp_new_i64();
5554     t1 = tcg_temp_new_i64();
5555     t2 = tcg_temp_new_i64();
5556     qc = tcg_temp_new_i64();
5557     read_vec_element(s, t1, a->rn, 0, esz | sgn_n);
5558     read_vec_element(s, t2, a->rm, 0, esz | sgn_m);
5559     tcg_gen_ld_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc));
5560 
5561     if (esz == MO_64) {
5562         gen_d(t0, qc, t1, t2);
5563     } else {
5564         gen_bhs(t0, qc, t1, t2, esz);
5565         tcg_gen_ext_i64(t0, t0, esz);
5566     }
5567 
5568     write_fp_dreg(s, a->rd, t0);
5569     tcg_gen_st_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc));
5570     return true;
5571 }
5572 
5573 TRANS(SQADD_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqadd_bhs, gen_sqadd_d)
5574 TRANS(SQSUB_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqsub_bhs, gen_sqsub_d)
5575 TRANS(UQADD_s, do_satacc_s, a, 0, 0, gen_uqadd_bhs, gen_uqadd_d)
5576 TRANS(UQSUB_s, do_satacc_s, a, 0, 0, gen_uqsub_bhs, gen_uqsub_d)
5577 TRANS(SUQADD_s, do_satacc_s, a, MO_SIGN, 0, gen_suqadd_bhs, gen_suqadd_d)
5578 TRANS(USQADD_s, do_satacc_s, a, 0, MO_SIGN, gen_usqadd_bhs, gen_usqadd_d)
5579 
5580 static bool do_int3_scalar_d(DisasContext *s, arg_rrr_e *a,
5581                              void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64))
5582 {
5583     if (fp_access_check(s)) {
5584         TCGv_i64 t0 = tcg_temp_new_i64();
5585         TCGv_i64 t1 = tcg_temp_new_i64();
5586 
5587         read_vec_element(s, t0, a->rn, 0, MO_64);
5588         read_vec_element(s, t1, a->rm, 0, MO_64);
5589         fn(t0, t0, t1);
5590         write_fp_dreg(s, a->rd, t0);
5591     }
5592     return true;
5593 }
5594 
5595 TRANS(SSHL_s, do_int3_scalar_d, a, gen_sshl_i64)
5596 TRANS(USHL_s, do_int3_scalar_d, a, gen_ushl_i64)
5597 TRANS(SRSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_s64)
5598 TRANS(URSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_u64)
5599 TRANS(ADD_s, do_int3_scalar_d, a, tcg_gen_add_i64)
5600 TRANS(SUB_s, do_int3_scalar_d, a, tcg_gen_sub_i64)
5601 
5602 typedef struct ENVScalar2 {
5603     NeonGenTwoOpEnvFn *gen_bhs[3];
5604     NeonGenTwo64OpEnvFn *gen_d;
5605 } ENVScalar2;
5606 
5607 static bool do_env_scalar2(DisasContext *s, arg_rrr_e *a, const ENVScalar2 *f)
5608 {
5609     if (!fp_access_check(s)) {
5610         return true;
5611     }
5612     if (a->esz == MO_64) {
5613         TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5614         TCGv_i64 t1 = read_fp_dreg(s, a->rm);
5615         f->gen_d(t0, tcg_env, t0, t1);
5616         write_fp_dreg(s, a->rd, t0);
5617     } else {
5618         TCGv_i32 t0 = tcg_temp_new_i32();
5619         TCGv_i32 t1 = tcg_temp_new_i32();
5620 
5621         read_vec_element_i32(s, t0, a->rn, 0, a->esz);
5622         read_vec_element_i32(s, t1, a->rm, 0, a->esz);
5623         f->gen_bhs[a->esz](t0, tcg_env, t0, t1);
5624         write_fp_sreg(s, a->rd, t0);
5625     }
5626     return true;
5627 }
5628 
5629 static const ENVScalar2 f_scalar_sqshl = {
5630     { gen_helper_neon_qshl_s8,
5631       gen_helper_neon_qshl_s16,
5632       gen_helper_neon_qshl_s32 },
5633     gen_helper_neon_qshl_s64,
5634 };
5635 TRANS(SQSHL_s, do_env_scalar2, a, &f_scalar_sqshl)
5636 
5637 static const ENVScalar2 f_scalar_uqshl = {
5638     { gen_helper_neon_qshl_u8,
5639       gen_helper_neon_qshl_u16,
5640       gen_helper_neon_qshl_u32 },
5641     gen_helper_neon_qshl_u64,
5642 };
5643 TRANS(UQSHL_s, do_env_scalar2, a, &f_scalar_uqshl)
5644 
5645 static const ENVScalar2 f_scalar_sqrshl = {
5646     { gen_helper_neon_qrshl_s8,
5647       gen_helper_neon_qrshl_s16,
5648       gen_helper_neon_qrshl_s32 },
5649     gen_helper_neon_qrshl_s64,
5650 };
5651 TRANS(SQRSHL_s, do_env_scalar2, a, &f_scalar_sqrshl)
5652 
5653 static const ENVScalar2 f_scalar_uqrshl = {
5654     { gen_helper_neon_qrshl_u8,
5655       gen_helper_neon_qrshl_u16,
5656       gen_helper_neon_qrshl_u32 },
5657     gen_helper_neon_qrshl_u64,
5658 };
5659 TRANS(UQRSHL_s, do_env_scalar2, a, &f_scalar_uqrshl)
5660 
5661 static bool do_env_scalar2_hs(DisasContext *s, arg_rrr_e *a,
5662                               const ENVScalar2 *f)
5663 {
5664     if (a->esz == MO_16 || a->esz == MO_32) {
5665         return do_env_scalar2(s, a, f);
5666     }
5667     return false;
5668 }
5669 
5670 static const ENVScalar2 f_scalar_sqdmulh = {
5671     { NULL, gen_helper_neon_qdmulh_s16, gen_helper_neon_qdmulh_s32 }
5672 };
5673 TRANS(SQDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqdmulh)
5674 
5675 static const ENVScalar2 f_scalar_sqrdmulh = {
5676     { NULL, gen_helper_neon_qrdmulh_s16, gen_helper_neon_qrdmulh_s32 }
5677 };
5678 TRANS(SQRDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqrdmulh)
5679 
5680 typedef struct ENVScalar3 {
5681     NeonGenThreeOpEnvFn *gen_hs[2];
5682 } ENVScalar3;
5683 
5684 static bool do_env_scalar3_hs(DisasContext *s, arg_rrr_e *a,
5685                               const ENVScalar3 *f)
5686 {
5687     TCGv_i32 t0, t1, t2;
5688 
5689     if (a->esz != MO_16 && a->esz != MO_32) {
5690         return false;
5691     }
5692     if (!fp_access_check(s)) {
5693         return true;
5694     }
5695 
5696     t0 = tcg_temp_new_i32();
5697     t1 = tcg_temp_new_i32();
5698     t2 = tcg_temp_new_i32();
5699     read_vec_element_i32(s, t0, a->rn, 0, a->esz);
5700     read_vec_element_i32(s, t1, a->rm, 0, a->esz);
5701     read_vec_element_i32(s, t2, a->rd, 0, a->esz);
5702     f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2);
5703     write_fp_sreg(s, a->rd, t0);
5704     return true;
5705 }
5706 
5707 static const ENVScalar3 f_scalar_sqrdmlah = {
5708     { gen_helper_neon_qrdmlah_s16, gen_helper_neon_qrdmlah_s32 }
5709 };
5710 TRANS_FEAT(SQRDMLAH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlah)
5711 
5712 static const ENVScalar3 f_scalar_sqrdmlsh = {
5713     { gen_helper_neon_qrdmlsh_s16, gen_helper_neon_qrdmlsh_s32 }
5714 };
5715 TRANS_FEAT(SQRDMLSH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlsh)
5716 
5717 static bool do_cmop_d(DisasContext *s, arg_rrr_e *a, TCGCond cond)
5718 {
5719     if (fp_access_check(s)) {
5720         TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5721         TCGv_i64 t1 = read_fp_dreg(s, a->rm);
5722         tcg_gen_negsetcond_i64(cond, t0, t0, t1);
5723         write_fp_dreg(s, a->rd, t0);
5724     }
5725     return true;
5726 }
5727 
5728 TRANS(CMGT_s, do_cmop_d, a, TCG_COND_GT)
5729 TRANS(CMHI_s, do_cmop_d, a, TCG_COND_GTU)
5730 TRANS(CMGE_s, do_cmop_d, a, TCG_COND_GE)
5731 TRANS(CMHS_s, do_cmop_d, a, TCG_COND_GEU)
5732 TRANS(CMEQ_s, do_cmop_d, a, TCG_COND_EQ)
5733 TRANS(CMTST_s, do_cmop_d, a, TCG_COND_TSTNE)
5734 
5735 static bool do_fp3_vector_with_fpsttype(DisasContext *s, arg_qrrr_e *a,
5736                                         int data,
5737                                         gen_helper_gvec_3_ptr * const fns[3],
5738                                         ARMFPStatusFlavour fpsttype)
5739 {
5740     MemOp esz = a->esz;
5741     int check = fp_access_check_vector_hsd(s, a->q, esz);
5742 
5743     if (check <= 0) {
5744         return check == 0;
5745     }
5746 
5747     gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, fpsttype,
5748                       data, fns[esz - 1]);
5749     return true;
5750 }
5751 
5752 static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data,
5753                           gen_helper_gvec_3_ptr * const fns[3])
5754 {
5755     return do_fp3_vector_with_fpsttype(s, a, data, fns,
5756                                        a->esz == MO_16 ?
5757                                        FPST_A64_F16 : FPST_A64);
5758 }
5759 
5760 static bool do_fp3_vector_2fn(DisasContext *s, arg_qrrr_e *a, int data,
5761                               gen_helper_gvec_3_ptr * const fnormal[3],
5762                               gen_helper_gvec_3_ptr * const fah[3])
5763 {
5764     return do_fp3_vector(s, a, data, s->fpcr_ah ? fah : fnormal);
5765 }
5766 
5767 static bool do_fp3_vector_ah_2fn(DisasContext *s, arg_qrrr_e *a, int data,
5768                                  gen_helper_gvec_3_ptr * const fnormal[3],
5769                                  gen_helper_gvec_3_ptr * const fah[3])
5770 {
5771     return do_fp3_vector_with_fpsttype(s, a, data, s->fpcr_ah ? fah : fnormal,
5772                                        select_ah_fpst(s, a->esz));
5773 }
5774 
5775 static gen_helper_gvec_3_ptr * const f_vector_fadd[3] = {
5776     gen_helper_gvec_fadd_h,
5777     gen_helper_gvec_fadd_s,
5778     gen_helper_gvec_fadd_d,
5779 };
5780 TRANS(FADD_v, do_fp3_vector, a, 0, f_vector_fadd)
5781 
5782 static gen_helper_gvec_3_ptr * const f_vector_fsub[3] = {
5783     gen_helper_gvec_fsub_h,
5784     gen_helper_gvec_fsub_s,
5785     gen_helper_gvec_fsub_d,
5786 };
5787 TRANS(FSUB_v, do_fp3_vector, a, 0, f_vector_fsub)
5788 
5789 static gen_helper_gvec_3_ptr * const f_vector_fdiv[3] = {
5790     gen_helper_gvec_fdiv_h,
5791     gen_helper_gvec_fdiv_s,
5792     gen_helper_gvec_fdiv_d,
5793 };
5794 TRANS(FDIV_v, do_fp3_vector, a, 0, f_vector_fdiv)
5795 
5796 static gen_helper_gvec_3_ptr * const f_vector_fmul[3] = {
5797     gen_helper_gvec_fmul_h,
5798     gen_helper_gvec_fmul_s,
5799     gen_helper_gvec_fmul_d,
5800 };
5801 TRANS(FMUL_v, do_fp3_vector, a, 0, f_vector_fmul)
5802 
5803 static gen_helper_gvec_3_ptr * const f_vector_fmax[3] = {
5804     gen_helper_gvec_fmax_h,
5805     gen_helper_gvec_fmax_s,
5806     gen_helper_gvec_fmax_d,
5807 };
5808 static gen_helper_gvec_3_ptr * const f_vector_fmax_ah[3] = {
5809     gen_helper_gvec_ah_fmax_h,
5810     gen_helper_gvec_ah_fmax_s,
5811     gen_helper_gvec_ah_fmax_d,
5812 };
5813 TRANS(FMAX_v, do_fp3_vector_2fn, a, 0, f_vector_fmax, f_vector_fmax_ah)
5814 
5815 static gen_helper_gvec_3_ptr * const f_vector_fmin[3] = {
5816     gen_helper_gvec_fmin_h,
5817     gen_helper_gvec_fmin_s,
5818     gen_helper_gvec_fmin_d,
5819 };
5820 static gen_helper_gvec_3_ptr * const f_vector_fmin_ah[3] = {
5821     gen_helper_gvec_ah_fmin_h,
5822     gen_helper_gvec_ah_fmin_s,
5823     gen_helper_gvec_ah_fmin_d,
5824 };
5825 TRANS(FMIN_v, do_fp3_vector_2fn, a, 0, f_vector_fmin, f_vector_fmin_ah)
5826 
5827 static gen_helper_gvec_3_ptr * const f_vector_fmaxnm[3] = {
5828     gen_helper_gvec_fmaxnum_h,
5829     gen_helper_gvec_fmaxnum_s,
5830     gen_helper_gvec_fmaxnum_d,
5831 };
5832 TRANS(FMAXNM_v, do_fp3_vector, a, 0, f_vector_fmaxnm)
5833 
5834 static gen_helper_gvec_3_ptr * const f_vector_fminnm[3] = {
5835     gen_helper_gvec_fminnum_h,
5836     gen_helper_gvec_fminnum_s,
5837     gen_helper_gvec_fminnum_d,
5838 };
5839 TRANS(FMINNM_v, do_fp3_vector, a, 0, f_vector_fminnm)
5840 
5841 static gen_helper_gvec_3_ptr * const f_vector_fmulx[3] = {
5842     gen_helper_gvec_fmulx_h,
5843     gen_helper_gvec_fmulx_s,
5844     gen_helper_gvec_fmulx_d,
5845 };
5846 TRANS(FMULX_v, do_fp3_vector, a, 0, f_vector_fmulx)
5847 
5848 static gen_helper_gvec_3_ptr * const f_vector_fmla[3] = {
5849     gen_helper_gvec_vfma_h,
5850     gen_helper_gvec_vfma_s,
5851     gen_helper_gvec_vfma_d,
5852 };
5853 TRANS(FMLA_v, do_fp3_vector, a, 0, f_vector_fmla)
5854 
5855 static gen_helper_gvec_3_ptr * const f_vector_fmls[3] = {
5856     gen_helper_gvec_vfms_h,
5857     gen_helper_gvec_vfms_s,
5858     gen_helper_gvec_vfms_d,
5859 };
5860 static gen_helper_gvec_3_ptr * const f_vector_fmls_ah[3] = {
5861     gen_helper_gvec_ah_vfms_h,
5862     gen_helper_gvec_ah_vfms_s,
5863     gen_helper_gvec_ah_vfms_d,
5864 };
5865 TRANS(FMLS_v, do_fp3_vector_2fn, a, 0, f_vector_fmls, f_vector_fmls_ah)
5866 
5867 static gen_helper_gvec_3_ptr * const f_vector_fcmeq[3] = {
5868     gen_helper_gvec_fceq_h,
5869     gen_helper_gvec_fceq_s,
5870     gen_helper_gvec_fceq_d,
5871 };
5872 TRANS(FCMEQ_v, do_fp3_vector, a, 0, f_vector_fcmeq)
5873 
5874 static gen_helper_gvec_3_ptr * const f_vector_fcmge[3] = {
5875     gen_helper_gvec_fcge_h,
5876     gen_helper_gvec_fcge_s,
5877     gen_helper_gvec_fcge_d,
5878 };
5879 TRANS(FCMGE_v, do_fp3_vector, a, 0, f_vector_fcmge)
5880 
5881 static gen_helper_gvec_3_ptr * const f_vector_fcmgt[3] = {
5882     gen_helper_gvec_fcgt_h,
5883     gen_helper_gvec_fcgt_s,
5884     gen_helper_gvec_fcgt_d,
5885 };
5886 TRANS(FCMGT_v, do_fp3_vector, a, 0, f_vector_fcmgt)
5887 
5888 static gen_helper_gvec_3_ptr * const f_vector_facge[3] = {
5889     gen_helper_gvec_facge_h,
5890     gen_helper_gvec_facge_s,
5891     gen_helper_gvec_facge_d,
5892 };
5893 TRANS(FACGE_v, do_fp3_vector, a, 0, f_vector_facge)
5894 
5895 static gen_helper_gvec_3_ptr * const f_vector_facgt[3] = {
5896     gen_helper_gvec_facgt_h,
5897     gen_helper_gvec_facgt_s,
5898     gen_helper_gvec_facgt_d,
5899 };
5900 TRANS(FACGT_v, do_fp3_vector, a, 0, f_vector_facgt)
5901 
5902 static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = {
5903     gen_helper_gvec_fabd_h,
5904     gen_helper_gvec_fabd_s,
5905     gen_helper_gvec_fabd_d,
5906 };
5907 static gen_helper_gvec_3_ptr * const f_vector_ah_fabd[3] = {
5908     gen_helper_gvec_ah_fabd_h,
5909     gen_helper_gvec_ah_fabd_s,
5910     gen_helper_gvec_ah_fabd_d,
5911 };
5912 TRANS(FABD_v, do_fp3_vector_2fn, a, 0, f_vector_fabd, f_vector_ah_fabd)
5913 
5914 static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = {
5915     gen_helper_gvec_recps_h,
5916     gen_helper_gvec_recps_s,
5917     gen_helper_gvec_recps_d,
5918 };
5919 static gen_helper_gvec_3_ptr * const f_vector_ah_frecps[3] = {
5920     gen_helper_gvec_ah_recps_h,
5921     gen_helper_gvec_ah_recps_s,
5922     gen_helper_gvec_ah_recps_d,
5923 };
5924 TRANS(FRECPS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frecps, f_vector_ah_frecps)
5925 
5926 static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = {
5927     gen_helper_gvec_rsqrts_h,
5928     gen_helper_gvec_rsqrts_s,
5929     gen_helper_gvec_rsqrts_d,
5930 };
5931 static gen_helper_gvec_3_ptr * const f_vector_ah_frsqrts[3] = {
5932     gen_helper_gvec_ah_rsqrts_h,
5933     gen_helper_gvec_ah_rsqrts_s,
5934     gen_helper_gvec_ah_rsqrts_d,
5935 };
5936 TRANS(FRSQRTS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frsqrts, f_vector_ah_frsqrts)
5937 
5938 static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = {
5939     gen_helper_gvec_faddp_h,
5940     gen_helper_gvec_faddp_s,
5941     gen_helper_gvec_faddp_d,
5942 };
5943 TRANS(FADDP_v, do_fp3_vector, a, 0, f_vector_faddp)
5944 
5945 static gen_helper_gvec_3_ptr * const f_vector_fmaxp[3] = {
5946     gen_helper_gvec_fmaxp_h,
5947     gen_helper_gvec_fmaxp_s,
5948     gen_helper_gvec_fmaxp_d,
5949 };
5950 static gen_helper_gvec_3_ptr * const f_vector_ah_fmaxp[3] = {
5951     gen_helper_gvec_ah_fmaxp_h,
5952     gen_helper_gvec_ah_fmaxp_s,
5953     gen_helper_gvec_ah_fmaxp_d,
5954 };
5955 TRANS(FMAXP_v, do_fp3_vector_2fn, a, 0, f_vector_fmaxp, f_vector_ah_fmaxp)
5956 
5957 static gen_helper_gvec_3_ptr * const f_vector_fminp[3] = {
5958     gen_helper_gvec_fminp_h,
5959     gen_helper_gvec_fminp_s,
5960     gen_helper_gvec_fminp_d,
5961 };
5962 static gen_helper_gvec_3_ptr * const f_vector_ah_fminp[3] = {
5963     gen_helper_gvec_ah_fminp_h,
5964     gen_helper_gvec_ah_fminp_s,
5965     gen_helper_gvec_ah_fminp_d,
5966 };
5967 TRANS(FMINP_v, do_fp3_vector_2fn, a, 0, f_vector_fminp, f_vector_ah_fminp)
5968 
5969 static gen_helper_gvec_3_ptr * const f_vector_fmaxnmp[3] = {
5970     gen_helper_gvec_fmaxnump_h,
5971     gen_helper_gvec_fmaxnump_s,
5972     gen_helper_gvec_fmaxnump_d,
5973 };
5974 TRANS(FMAXNMP_v, do_fp3_vector, a, 0, f_vector_fmaxnmp)
5975 
5976 static gen_helper_gvec_3_ptr * const f_vector_fminnmp[3] = {
5977     gen_helper_gvec_fminnump_h,
5978     gen_helper_gvec_fminnump_s,
5979     gen_helper_gvec_fminnump_d,
5980 };
5981 TRANS(FMINNMP_v, do_fp3_vector, a, 0, f_vector_fminnmp)
5982 
5983 static bool do_fmlal(DisasContext *s, arg_qrrr_e *a, bool is_s, bool is_2)
5984 {
5985     if (fp_access_check(s)) {
5986         int data = (is_2 << 1) | is_s;
5987         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
5988                            vec_full_reg_offset(s, a->rn),
5989                            vec_full_reg_offset(s, a->rm), tcg_env,
5990                            a->q ? 16 : 8, vec_full_reg_size(s),
5991                            data, gen_helper_gvec_fmlal_a64);
5992     }
5993     return true;
5994 }
5995 
5996 TRANS_FEAT(FMLAL_v, aa64_fhm, do_fmlal, a, false, false)
5997 TRANS_FEAT(FMLSL_v, aa64_fhm, do_fmlal, a, true, false)
5998 TRANS_FEAT(FMLAL2_v, aa64_fhm, do_fmlal, a, false, true)
5999 TRANS_FEAT(FMLSL2_v, aa64_fhm, do_fmlal, a, true, true)
6000 
6001 TRANS(ADDP_v, do_gvec_fn3, a, gen_gvec_addp)
6002 TRANS(SMAXP_v, do_gvec_fn3_no64, a, gen_gvec_smaxp)
6003 TRANS(SMINP_v, do_gvec_fn3_no64, a, gen_gvec_sminp)
6004 TRANS(UMAXP_v, do_gvec_fn3_no64, a, gen_gvec_umaxp)
6005 TRANS(UMINP_v, do_gvec_fn3_no64, a, gen_gvec_uminp)
6006 
6007 TRANS(AND_v, do_gvec_fn3, a, tcg_gen_gvec_and)
6008 TRANS(BIC_v, do_gvec_fn3, a, tcg_gen_gvec_andc)
6009 TRANS(ORR_v, do_gvec_fn3, a, tcg_gen_gvec_or)
6010 TRANS(ORN_v, do_gvec_fn3, a, tcg_gen_gvec_orc)
6011 TRANS(EOR_v, do_gvec_fn3, a, tcg_gen_gvec_xor)
6012 
6013 static bool do_bitsel(DisasContext *s, bool is_q, int d, int a, int b, int c)
6014 {
6015     if (fp_access_check(s)) {
6016         gen_gvec_fn4(s, is_q, d, a, b, c, tcg_gen_gvec_bitsel, 0);
6017     }
6018     return true;
6019 }
6020 
6021 TRANS(BSL_v, do_bitsel, a->q, a->rd, a->rd, a->rn, a->rm)
6022 TRANS(BIT_v, do_bitsel, a->q, a->rd, a->rm, a->rn, a->rd)
6023 TRANS(BIF_v, do_bitsel, a->q, a->rd, a->rm, a->rd, a->rn)
6024 
6025 TRANS(SQADD_v, do_gvec_fn3, a, gen_gvec_sqadd_qc)
6026 TRANS(UQADD_v, do_gvec_fn3, a, gen_gvec_uqadd_qc)
6027 TRANS(SQSUB_v, do_gvec_fn3, a, gen_gvec_sqsub_qc)
6028 TRANS(UQSUB_v, do_gvec_fn3, a, gen_gvec_uqsub_qc)
6029 TRANS(SUQADD_v, do_gvec_fn3, a, gen_gvec_suqadd_qc)
6030 TRANS(USQADD_v, do_gvec_fn3, a, gen_gvec_usqadd_qc)
6031 
6032 TRANS(SSHL_v, do_gvec_fn3, a, gen_gvec_sshl)
6033 TRANS(USHL_v, do_gvec_fn3, a, gen_gvec_ushl)
6034 TRANS(SRSHL_v, do_gvec_fn3, a, gen_gvec_srshl)
6035 TRANS(URSHL_v, do_gvec_fn3, a, gen_gvec_urshl)
6036 TRANS(SQSHL_v, do_gvec_fn3, a, gen_neon_sqshl)
6037 TRANS(UQSHL_v, do_gvec_fn3, a, gen_neon_uqshl)
6038 TRANS(SQRSHL_v, do_gvec_fn3, a, gen_neon_sqrshl)
6039 TRANS(UQRSHL_v, do_gvec_fn3, a, gen_neon_uqrshl)
6040 
6041 TRANS(ADD_v, do_gvec_fn3, a, tcg_gen_gvec_add)
6042 TRANS(SUB_v, do_gvec_fn3, a, tcg_gen_gvec_sub)
6043 TRANS(SHADD_v, do_gvec_fn3_no64, a, gen_gvec_shadd)
6044 TRANS(UHADD_v, do_gvec_fn3_no64, a, gen_gvec_uhadd)
6045 TRANS(SHSUB_v, do_gvec_fn3_no64, a, gen_gvec_shsub)
6046 TRANS(UHSUB_v, do_gvec_fn3_no64, a, gen_gvec_uhsub)
6047 TRANS(SRHADD_v, do_gvec_fn3_no64, a, gen_gvec_srhadd)
6048 TRANS(URHADD_v, do_gvec_fn3_no64, a, gen_gvec_urhadd)
6049 TRANS(SMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smax)
6050 TRANS(UMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umax)
6051 TRANS(SMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smin)
6052 TRANS(UMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umin)
6053 TRANS(SABA_v, do_gvec_fn3_no64, a, gen_gvec_saba)
6054 TRANS(UABA_v, do_gvec_fn3_no64, a, gen_gvec_uaba)
6055 TRANS(SABD_v, do_gvec_fn3_no64, a, gen_gvec_sabd)
6056 TRANS(UABD_v, do_gvec_fn3_no64, a, gen_gvec_uabd)
6057 TRANS(MUL_v, do_gvec_fn3_no64, a, tcg_gen_gvec_mul)
6058 TRANS(PMUL_v, do_gvec_op3_ool, a, 0, gen_helper_gvec_pmul_b)
6059 TRANS(MLA_v, do_gvec_fn3_no64, a, gen_gvec_mla)
6060 TRANS(MLS_v, do_gvec_fn3_no64, a, gen_gvec_mls)
6061 
6062 static bool do_cmop_v(DisasContext *s, arg_qrrr_e *a, TCGCond cond)
6063 {
6064     if (a->esz == MO_64 && !a->q) {
6065         return false;
6066     }
6067     if (fp_access_check(s)) {
6068         tcg_gen_gvec_cmp(cond, a->esz,
6069                          vec_full_reg_offset(s, a->rd),
6070                          vec_full_reg_offset(s, a->rn),
6071                          vec_full_reg_offset(s, a->rm),
6072                          a->q ? 16 : 8, vec_full_reg_size(s));
6073     }
6074     return true;
6075 }
6076 
6077 TRANS(CMGT_v, do_cmop_v, a, TCG_COND_GT)
6078 TRANS(CMHI_v, do_cmop_v, a, TCG_COND_GTU)
6079 TRANS(CMGE_v, do_cmop_v, a, TCG_COND_GE)
6080 TRANS(CMHS_v, do_cmop_v, a, TCG_COND_GEU)
6081 TRANS(CMEQ_v, do_cmop_v, a, TCG_COND_EQ)
6082 TRANS(CMTST_v, do_gvec_fn3, a, gen_gvec_cmtst)
6083 
6084 TRANS(SQDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqdmulh_qc)
6085 TRANS(SQRDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmulh_qc)
6086 TRANS_FEAT(SQRDMLAH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlah_qc)
6087 TRANS_FEAT(SQRDMLSH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlsh_qc)
6088 
6089 static bool do_dot_vector(DisasContext *s, arg_qrrr_e *a,
6090                           gen_helper_gvec_4 *fn)
6091 {
6092     if (fp_access_check(s)) {
6093         gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn);
6094     }
6095     return true;
6096 }
6097 
6098 static bool do_dot_vector_env(DisasContext *s, arg_qrrr_e *a,
6099                               gen_helper_gvec_4_ptr *fn)
6100 {
6101     if (fp_access_check(s)) {
6102         gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn);
6103     }
6104     return true;
6105 }
6106 
6107 TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_sdot_b)
6108 TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_b)
6109 TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_b)
6110 TRANS_FEAT(BFDOT_v, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfdot)
6111 TRANS_FEAT(BFMMLA, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfmmla)
6112 TRANS_FEAT(SMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_smmla_b)
6113 TRANS_FEAT(UMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_ummla_b)
6114 TRANS_FEAT(USMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usmmla_b)
6115 
6116 static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a)
6117 {
6118     if (!dc_isar_feature(aa64_bf16, s)) {
6119         return false;
6120     }
6121     if (fp_access_check(s)) {
6122         /* Q bit selects BFMLALB vs BFMLALT. */
6123         gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd,
6124                           s->fpcr_ah ? FPST_AH : FPST_A64, a->q,
6125                           gen_helper_gvec_bfmlal);
6126     }
6127     return true;
6128 }
6129 
6130 static gen_helper_gvec_3_ptr * const f_vector_fcadd[3] = {
6131     gen_helper_gvec_fcaddh,
6132     gen_helper_gvec_fcadds,
6133     gen_helper_gvec_fcaddd,
6134 };
6135 /*
6136  * Encode FPCR.AH into the data so the helper knows whether the
6137  * negations it does should avoid flipping the sign bit on a NaN
6138  */
6139 TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0 | (s->fpcr_ah << 1),
6140            f_vector_fcadd)
6141 TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1 | (s->fpcr_ah << 1),
6142            f_vector_fcadd)
6143 
6144 static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a)
6145 {
6146     static gen_helper_gvec_4_ptr * const fn[] = {
6147         [MO_16] = gen_helper_gvec_fcmlah,
6148         [MO_32] = gen_helper_gvec_fcmlas,
6149         [MO_64] = gen_helper_gvec_fcmlad,
6150     };
6151     int check;
6152 
6153     if (!dc_isar_feature(aa64_fcma, s)) {
6154         return false;
6155     }
6156 
6157     check = fp_access_check_vector_hsd(s, a->q, a->esz);
6158     if (check <= 0) {
6159         return check == 0;
6160     }
6161 
6162     gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
6163                       a->esz == MO_16 ? FPST_A64_F16 : FPST_A64,
6164                       a->rot | (s->fpcr_ah << 2), fn[a->esz]);
6165     return true;
6166 }
6167 
6168 /*
6169  * Widening vector x vector/indexed.
6170  *
6171  * These read from the top or bottom half of a 128-bit vector.
6172  * After widening, optionally accumulate with a 128-bit vector.
6173  * Implement these inline, as the number of elements are limited
6174  * and the related SVE and SME operations on larger vectors use
6175  * even/odd elements instead of top/bottom half.
6176  *
6177  * If idx >= 0, operand 2 is indexed, otherwise vector.
6178  * If acc, operand 0 is loaded with rd.
6179  */
6180 
6181 /* For low half, iterating up. */
6182 static bool do_3op_widening(DisasContext *s, MemOp memop, int top,
6183                             int rd, int rn, int rm, int idx,
6184                             NeonGenTwo64OpFn *fn, bool acc)
6185 {
6186     TCGv_i64 tcg_op0 = tcg_temp_new_i64();
6187     TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6188     TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6189     MemOp esz = memop & MO_SIZE;
6190     int half = 8 >> esz;
6191     int top_swap, top_half;
6192 
6193     /* There are no 64x64->128 bit operations. */
6194     if (esz >= MO_64) {
6195         return false;
6196     }
6197     if (!fp_access_check(s)) {
6198         return true;
6199     }
6200 
6201     if (idx >= 0) {
6202         read_vec_element(s, tcg_op2, rm, idx, memop);
6203     }
6204 
6205     /*
6206      * For top half inputs, iterate forward; backward for bottom half.
6207      * This means the store to the destination will not occur until
6208      * overlapping input inputs are consumed.
6209      * Use top_swap to conditionally invert the forward iteration index.
6210      */
6211     top_swap = top ? 0 : half - 1;
6212     top_half = top ? half : 0;
6213 
6214     for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
6215         int elt = elt_fwd ^ top_swap;
6216 
6217         read_vec_element(s, tcg_op1, rn, elt + top_half, memop);
6218         if (idx < 0) {
6219             read_vec_element(s, tcg_op2, rm, elt + top_half, memop);
6220         }
6221         if (acc) {
6222             read_vec_element(s, tcg_op0, rd, elt, memop + 1);
6223         }
6224         fn(tcg_op0, tcg_op1, tcg_op2);
6225         write_vec_element(s, tcg_op0, rd, elt, esz + 1);
6226     }
6227     clear_vec_high(s, 1, rd);
6228     return true;
6229 }
6230 
6231 static void gen_muladd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6232 {
6233     TCGv_i64 t = tcg_temp_new_i64();
6234     tcg_gen_mul_i64(t, n, m);
6235     tcg_gen_add_i64(d, d, t);
6236 }
6237 
6238 static void gen_mulsub_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6239 {
6240     TCGv_i64 t = tcg_temp_new_i64();
6241     tcg_gen_mul_i64(t, n, m);
6242     tcg_gen_sub_i64(d, d, t);
6243 }
6244 
6245 TRANS(SMULL_v, do_3op_widening,
6246       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6247       tcg_gen_mul_i64, false)
6248 TRANS(UMULL_v, do_3op_widening,
6249       a->esz, a->q, a->rd, a->rn, a->rm, -1,
6250       tcg_gen_mul_i64, false)
6251 TRANS(SMLAL_v, do_3op_widening,
6252       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6253       gen_muladd_i64, true)
6254 TRANS(UMLAL_v, do_3op_widening,
6255       a->esz, a->q, a->rd, a->rn, a->rm, -1,
6256       gen_muladd_i64, true)
6257 TRANS(SMLSL_v, do_3op_widening,
6258       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6259       gen_mulsub_i64, true)
6260 TRANS(UMLSL_v, do_3op_widening,
6261       a->esz, a->q, a->rd, a->rn, a->rm, -1,
6262       gen_mulsub_i64, true)
6263 
6264 TRANS(SMULL_vi, do_3op_widening,
6265       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6266       tcg_gen_mul_i64, false)
6267 TRANS(UMULL_vi, do_3op_widening,
6268       a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
6269       tcg_gen_mul_i64, false)
6270 TRANS(SMLAL_vi, do_3op_widening,
6271       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6272       gen_muladd_i64, true)
6273 TRANS(UMLAL_vi, do_3op_widening,
6274       a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
6275       gen_muladd_i64, true)
6276 TRANS(SMLSL_vi, do_3op_widening,
6277       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6278       gen_mulsub_i64, true)
6279 TRANS(UMLSL_vi, do_3op_widening,
6280       a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
6281       gen_mulsub_i64, true)
6282 
6283 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6284 {
6285     TCGv_i64 t1 = tcg_temp_new_i64();
6286     TCGv_i64 t2 = tcg_temp_new_i64();
6287 
6288     tcg_gen_sub_i64(t1, n, m);
6289     tcg_gen_sub_i64(t2, m, n);
6290     tcg_gen_movcond_i64(TCG_COND_GE, d, n, m, t1, t2);
6291 }
6292 
6293 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6294 {
6295     TCGv_i64 t1 = tcg_temp_new_i64();
6296     TCGv_i64 t2 = tcg_temp_new_i64();
6297 
6298     tcg_gen_sub_i64(t1, n, m);
6299     tcg_gen_sub_i64(t2, m, n);
6300     tcg_gen_movcond_i64(TCG_COND_GEU, d, n, m, t1, t2);
6301 }
6302 
6303 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6304 {
6305     TCGv_i64 t = tcg_temp_new_i64();
6306     gen_sabd_i64(t, n, m);
6307     tcg_gen_add_i64(d, d, t);
6308 }
6309 
6310 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6311 {
6312     TCGv_i64 t = tcg_temp_new_i64();
6313     gen_uabd_i64(t, n, m);
6314     tcg_gen_add_i64(d, d, t);
6315 }
6316 
6317 TRANS(SADDL_v, do_3op_widening,
6318       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6319       tcg_gen_add_i64, false)
6320 TRANS(UADDL_v, do_3op_widening,
6321       a->esz, a->q, a->rd, a->rn, a->rm, -1,
6322       tcg_gen_add_i64, false)
6323 TRANS(SSUBL_v, do_3op_widening,
6324       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6325       tcg_gen_sub_i64, false)
6326 TRANS(USUBL_v, do_3op_widening,
6327       a->esz, a->q, a->rd, a->rn, a->rm, -1,
6328       tcg_gen_sub_i64, false)
6329 TRANS(SABDL_v, do_3op_widening,
6330       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6331       gen_sabd_i64, false)
6332 TRANS(UABDL_v, do_3op_widening,
6333       a->esz, a->q, a->rd, a->rn, a->rm, -1,
6334       gen_uabd_i64, false)
6335 TRANS(SABAL_v, do_3op_widening,
6336       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6337       gen_saba_i64, true)
6338 TRANS(UABAL_v, do_3op_widening,
6339       a->esz, a->q, a->rd, a->rn, a->rm, -1,
6340       gen_uaba_i64, true)
6341 
6342 static void gen_sqdmull_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6343 {
6344     tcg_gen_mul_i64(d, n, m);
6345     gen_helper_neon_addl_saturate_s32(d, tcg_env, d, d);
6346 }
6347 
6348 static void gen_sqdmull_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6349 {
6350     tcg_gen_mul_i64(d, n, m);
6351     gen_helper_neon_addl_saturate_s64(d, tcg_env, d, d);
6352 }
6353 
6354 static void gen_sqdmlal_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6355 {
6356     TCGv_i64 t = tcg_temp_new_i64();
6357 
6358     tcg_gen_mul_i64(t, n, m);
6359     gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t);
6360     gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t);
6361 }
6362 
6363 static void gen_sqdmlal_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6364 {
6365     TCGv_i64 t = tcg_temp_new_i64();
6366 
6367     tcg_gen_mul_i64(t, n, m);
6368     gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t);
6369     gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t);
6370 }
6371 
6372 static void gen_sqdmlsl_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6373 {
6374     TCGv_i64 t = tcg_temp_new_i64();
6375 
6376     tcg_gen_mul_i64(t, n, m);
6377     gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t);
6378     tcg_gen_neg_i64(t, t);
6379     gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t);
6380 }
6381 
6382 static void gen_sqdmlsl_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6383 {
6384     TCGv_i64 t = tcg_temp_new_i64();
6385 
6386     tcg_gen_mul_i64(t, n, m);
6387     gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t);
6388     tcg_gen_neg_i64(t, t);
6389     gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t);
6390 }
6391 
6392 TRANS(SQDMULL_v, do_3op_widening,
6393       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6394       a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
6395 TRANS(SQDMLAL_v, do_3op_widening,
6396       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6397       a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
6398 TRANS(SQDMLSL_v, do_3op_widening,
6399       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6400       a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
6401 
6402 TRANS(SQDMULL_vi, do_3op_widening,
6403       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6404       a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
6405 TRANS(SQDMLAL_vi, do_3op_widening,
6406       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6407       a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
6408 TRANS(SQDMLSL_vi, do_3op_widening,
6409       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6410       a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
6411 
6412 static bool do_addsub_wide(DisasContext *s, arg_qrrr_e *a,
6413                            MemOp sign, bool sub)
6414 {
6415     TCGv_i64 tcg_op0, tcg_op1;
6416     MemOp esz = a->esz;
6417     int half = 8 >> esz;
6418     bool top = a->q;
6419     int top_swap = top ? 0 : half - 1;
6420     int top_half = top ? half : 0;
6421 
6422     /* There are no 64x64->128 bit operations. */
6423     if (esz >= MO_64) {
6424         return false;
6425     }
6426     if (!fp_access_check(s)) {
6427         return true;
6428     }
6429     tcg_op0 = tcg_temp_new_i64();
6430     tcg_op1 = tcg_temp_new_i64();
6431 
6432     for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
6433         int elt = elt_fwd ^ top_swap;
6434 
6435         read_vec_element(s, tcg_op1, a->rm, elt + top_half, esz | sign);
6436         read_vec_element(s, tcg_op0, a->rn, elt, esz + 1);
6437         if (sub) {
6438             tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1);
6439         } else {
6440             tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1);
6441         }
6442         write_vec_element(s, tcg_op0, a->rd, elt, esz + 1);
6443     }
6444     clear_vec_high(s, 1, a->rd);
6445     return true;
6446 }
6447 
6448 TRANS(SADDW, do_addsub_wide, a, MO_SIGN, false)
6449 TRANS(UADDW, do_addsub_wide, a, 0, false)
6450 TRANS(SSUBW, do_addsub_wide, a, MO_SIGN, true)
6451 TRANS(USUBW, do_addsub_wide, a, 0, true)
6452 
6453 static bool do_addsub_highnarrow(DisasContext *s, arg_qrrr_e *a,
6454                                  bool sub, bool round)
6455 {
6456     TCGv_i64 tcg_op0, tcg_op1;
6457     MemOp esz = a->esz;
6458     int half = 8 >> esz;
6459     bool top = a->q;
6460     int ebits = 8 << esz;
6461     uint64_t rbit = 1ull << (ebits - 1);
6462     int top_swap, top_half;
6463 
6464     /* There are no 128x128->64 bit operations. */
6465     if (esz >= MO_64) {
6466         return false;
6467     }
6468     if (!fp_access_check(s)) {
6469         return true;
6470     }
6471     tcg_op0 = tcg_temp_new_i64();
6472     tcg_op1 = tcg_temp_new_i64();
6473 
6474     /*
6475      * For top half inputs, iterate backward; forward for bottom half.
6476      * This means the store to the destination will not occur until
6477      * overlapping input inputs are consumed.
6478      */
6479     top_swap = top ? half - 1 : 0;
6480     top_half = top ? half : 0;
6481 
6482     for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
6483         int elt = elt_fwd ^ top_swap;
6484 
6485         read_vec_element(s, tcg_op1, a->rm, elt, esz + 1);
6486         read_vec_element(s, tcg_op0, a->rn, elt, esz + 1);
6487         if (sub) {
6488             tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1);
6489         } else {
6490             tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1);
6491         }
6492         if (round) {
6493             tcg_gen_addi_i64(tcg_op0, tcg_op0, rbit);
6494         }
6495         tcg_gen_shri_i64(tcg_op0, tcg_op0, ebits);
6496         write_vec_element(s, tcg_op0, a->rd, elt + top_half, esz);
6497     }
6498     clear_vec_high(s, top, a->rd);
6499     return true;
6500 }
6501 
6502 TRANS(ADDHN, do_addsub_highnarrow, a, false, false)
6503 TRANS(SUBHN, do_addsub_highnarrow, a, true, false)
6504 TRANS(RADDHN, do_addsub_highnarrow, a, false, true)
6505 TRANS(RSUBHN, do_addsub_highnarrow, a, true, true)
6506 
6507 static bool do_pmull(DisasContext *s, arg_qrrr_e *a, gen_helper_gvec_3 *fn)
6508 {
6509     if (fp_access_check(s)) {
6510         /* The Q field specifies lo/hi half input for these insns.  */
6511         gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->q, fn);
6512     }
6513     return true;
6514 }
6515 
6516 TRANS(PMULL_p8, do_pmull, a, gen_helper_neon_pmull_h)
6517 TRANS_FEAT(PMULL_p64, aa64_pmull, do_pmull, a, gen_helper_gvec_pmull_q)
6518 
6519 /*
6520  * Advanced SIMD scalar/vector x indexed element
6521  */
6522 
6523 static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f)
6524 {
6525     switch (a->esz) {
6526     case MO_64:
6527         if (fp_access_check(s)) {
6528             TCGv_i64 t0 = read_fp_dreg(s, a->rn);
6529             TCGv_i64 t1 = tcg_temp_new_i64();
6530 
6531             read_vec_element(s, t1, a->rm, a->idx, MO_64);
6532             f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
6533             write_fp_dreg_merging(s, a->rd, a->rn, t0);
6534         }
6535         break;
6536     case MO_32:
6537         if (fp_access_check(s)) {
6538             TCGv_i32 t0 = read_fp_sreg(s, a->rn);
6539             TCGv_i32 t1 = tcg_temp_new_i32();
6540 
6541             read_vec_element_i32(s, t1, a->rm, a->idx, MO_32);
6542             f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
6543             write_fp_sreg_merging(s, a->rd, a->rn, t0);
6544         }
6545         break;
6546     case MO_16:
6547         if (!dc_isar_feature(aa64_fp16, s)) {
6548             return false;
6549         }
6550         if (fp_access_check(s)) {
6551             TCGv_i32 t0 = read_fp_hreg(s, a->rn);
6552             TCGv_i32 t1 = tcg_temp_new_i32();
6553 
6554             read_vec_element_i32(s, t1, a->rm, a->idx, MO_16);
6555             f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
6556             write_fp_hreg_merging(s, a->rd, a->rn, t0);
6557         }
6558         break;
6559     default:
6560         g_assert_not_reached();
6561     }
6562     return true;
6563 }
6564 
6565 TRANS(FMUL_si, do_fp3_scalar_idx, a, &f_scalar_fmul)
6566 TRANS(FMULX_si, do_fp3_scalar_idx, a, &f_scalar_fmulx)
6567 
6568 static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg)
6569 {
6570     switch (a->esz) {
6571     case MO_64:
6572         if (fp_access_check(s)) {
6573             TCGv_i64 t0 = read_fp_dreg(s, a->rd);
6574             TCGv_i64 t1 = read_fp_dreg(s, a->rn);
6575             TCGv_i64 t2 = tcg_temp_new_i64();
6576 
6577             read_vec_element(s, t2, a->rm, a->idx, MO_64);
6578             if (neg) {
6579                 gen_vfp_maybe_ah_negd(s, t1, t1);
6580             }
6581             gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_A64));
6582             write_fp_dreg_merging(s, a->rd, a->rd, t0);
6583         }
6584         break;
6585     case MO_32:
6586         if (fp_access_check(s)) {
6587             TCGv_i32 t0 = read_fp_sreg(s, a->rd);
6588             TCGv_i32 t1 = read_fp_sreg(s, a->rn);
6589             TCGv_i32 t2 = tcg_temp_new_i32();
6590 
6591             read_vec_element_i32(s, t2, a->rm, a->idx, MO_32);
6592             if (neg) {
6593                 gen_vfp_maybe_ah_negs(s, t1, t1);
6594             }
6595             gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_A64));
6596             write_fp_sreg_merging(s, a->rd, a->rd, t0);
6597         }
6598         break;
6599     case MO_16:
6600         if (!dc_isar_feature(aa64_fp16, s)) {
6601             return false;
6602         }
6603         if (fp_access_check(s)) {
6604             TCGv_i32 t0 = read_fp_hreg(s, a->rd);
6605             TCGv_i32 t1 = read_fp_hreg(s, a->rn);
6606             TCGv_i32 t2 = tcg_temp_new_i32();
6607 
6608             read_vec_element_i32(s, t2, a->rm, a->idx, MO_16);
6609             if (neg) {
6610                 gen_vfp_maybe_ah_negh(s, t1, t1);
6611             }
6612             gen_helper_advsimd_muladdh(t0, t1, t2, t0,
6613                                        fpstatus_ptr(FPST_A64_F16));
6614             write_fp_hreg_merging(s, a->rd, a->rd, t0);
6615         }
6616         break;
6617     default:
6618         g_assert_not_reached();
6619     }
6620     return true;
6621 }
6622 
6623 TRANS(FMLA_si, do_fmla_scalar_idx, a, false)
6624 TRANS(FMLS_si, do_fmla_scalar_idx, a, true)
6625 
6626 static bool do_env_scalar2_idx_hs(DisasContext *s, arg_rrx_e *a,
6627                                   const ENVScalar2 *f)
6628 {
6629     if (a->esz < MO_16 || a->esz > MO_32) {
6630         return false;
6631     }
6632     if (fp_access_check(s)) {
6633         TCGv_i32 t0 = tcg_temp_new_i32();
6634         TCGv_i32 t1 = tcg_temp_new_i32();
6635 
6636         read_vec_element_i32(s, t0, a->rn, 0, a->esz);
6637         read_vec_element_i32(s, t1, a->rm, a->idx, a->esz);
6638         f->gen_bhs[a->esz](t0, tcg_env, t0, t1);
6639         write_fp_sreg(s, a->rd, t0);
6640     }
6641     return true;
6642 }
6643 
6644 TRANS(SQDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqdmulh)
6645 TRANS(SQRDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqrdmulh)
6646 
6647 static bool do_env_scalar3_idx_hs(DisasContext *s, arg_rrx_e *a,
6648                                   const ENVScalar3 *f)
6649 {
6650     if (a->esz < MO_16 || a->esz > MO_32) {
6651         return false;
6652     }
6653     if (fp_access_check(s)) {
6654         TCGv_i32 t0 = tcg_temp_new_i32();
6655         TCGv_i32 t1 = tcg_temp_new_i32();
6656         TCGv_i32 t2 = tcg_temp_new_i32();
6657 
6658         read_vec_element_i32(s, t0, a->rn, 0, a->esz);
6659         read_vec_element_i32(s, t1, a->rm, a->idx, a->esz);
6660         read_vec_element_i32(s, t2, a->rd, 0, a->esz);
6661         f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2);
6662         write_fp_sreg(s, a->rd, t0);
6663     }
6664     return true;
6665 }
6666 
6667 TRANS_FEAT(SQRDMLAH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlah)
6668 TRANS_FEAT(SQRDMLSH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlsh)
6669 
6670 static bool do_scalar_muladd_widening_idx(DisasContext *s, arg_rrx_e *a,
6671                                           NeonGenTwo64OpFn *fn, bool acc)
6672 {
6673     if (fp_access_check(s)) {
6674         TCGv_i64 t0 = tcg_temp_new_i64();
6675         TCGv_i64 t1 = tcg_temp_new_i64();
6676         TCGv_i64 t2 = tcg_temp_new_i64();
6677 
6678         if (acc) {
6679             read_vec_element(s, t0, a->rd, 0, a->esz + 1);
6680         }
6681         read_vec_element(s, t1, a->rn, 0, a->esz | MO_SIGN);
6682         read_vec_element(s, t2, a->rm, a->idx, a->esz | MO_SIGN);
6683         fn(t0, t1, t2);
6684 
6685         /* Clear the whole register first, then store scalar. */
6686         clear_vec(s, a->rd);
6687         write_vec_element(s, t0, a->rd, 0, a->esz + 1);
6688     }
6689     return true;
6690 }
6691 
6692 TRANS(SQDMULL_si, do_scalar_muladd_widening_idx, a,
6693       a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
6694 TRANS(SQDMLAL_si, do_scalar_muladd_widening_idx, a,
6695       a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
6696 TRANS(SQDMLSL_si, do_scalar_muladd_widening_idx, a,
6697       a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
6698 
6699 static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a,
6700                               gen_helper_gvec_3_ptr * const fns[3])
6701 {
6702     MemOp esz = a->esz;
6703     int check = fp_access_check_vector_hsd(s, a->q, esz);
6704 
6705     if (check <= 0) {
6706         return check == 0;
6707     }
6708 
6709     gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm,
6710                       esz == MO_16 ? FPST_A64_F16 : FPST_A64,
6711                       a->idx, fns[esz - 1]);
6712     return true;
6713 }
6714 
6715 static gen_helper_gvec_3_ptr * const f_vector_idx_fmul[3] = {
6716     gen_helper_gvec_fmul_idx_h,
6717     gen_helper_gvec_fmul_idx_s,
6718     gen_helper_gvec_fmul_idx_d,
6719 };
6720 TRANS(FMUL_vi, do_fp3_vector_idx, a, f_vector_idx_fmul)
6721 
6722 static gen_helper_gvec_3_ptr * const f_vector_idx_fmulx[3] = {
6723     gen_helper_gvec_fmulx_idx_h,
6724     gen_helper_gvec_fmulx_idx_s,
6725     gen_helper_gvec_fmulx_idx_d,
6726 };
6727 TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx)
6728 
6729 static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg)
6730 {
6731     static gen_helper_gvec_4_ptr * const fns[3][3] = {
6732         { gen_helper_gvec_fmla_idx_h,
6733           gen_helper_gvec_fmla_idx_s,
6734           gen_helper_gvec_fmla_idx_d },
6735         { gen_helper_gvec_fmls_idx_h,
6736           gen_helper_gvec_fmls_idx_s,
6737           gen_helper_gvec_fmls_idx_d },
6738         { gen_helper_gvec_ah_fmls_idx_h,
6739           gen_helper_gvec_ah_fmls_idx_s,
6740           gen_helper_gvec_ah_fmls_idx_d },
6741     };
6742     MemOp esz = a->esz;
6743     int check = fp_access_check_vector_hsd(s, a->q, esz);
6744 
6745     if (check <= 0) {
6746         return check == 0;
6747     }
6748 
6749     gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
6750                       esz == MO_16 ? FPST_A64_F16 : FPST_A64,
6751                       a->idx, fns[neg ? 1 + s->fpcr_ah : 0][esz - 1]);
6752     return true;
6753 }
6754 
6755 TRANS(FMLA_vi, do_fmla_vector_idx, a, false)
6756 TRANS(FMLS_vi, do_fmla_vector_idx, a, true)
6757 
6758 static bool do_fmlal_idx(DisasContext *s, arg_qrrx_e *a, bool is_s, bool is_2)
6759 {
6760     if (fp_access_check(s)) {
6761         int data = (a->idx << 2) | (is_2 << 1) | is_s;
6762         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
6763                            vec_full_reg_offset(s, a->rn),
6764                            vec_full_reg_offset(s, a->rm), tcg_env,
6765                            a->q ? 16 : 8, vec_full_reg_size(s),
6766                            data, gen_helper_gvec_fmlal_idx_a64);
6767     }
6768     return true;
6769 }
6770 
6771 TRANS_FEAT(FMLAL_vi, aa64_fhm, do_fmlal_idx, a, false, false)
6772 TRANS_FEAT(FMLSL_vi, aa64_fhm, do_fmlal_idx, a, true, false)
6773 TRANS_FEAT(FMLAL2_vi, aa64_fhm, do_fmlal_idx, a, false, true)
6774 TRANS_FEAT(FMLSL2_vi, aa64_fhm, do_fmlal_idx, a, true, true)
6775 
6776 static bool do_int3_vector_idx(DisasContext *s, arg_qrrx_e *a,
6777                                gen_helper_gvec_3 * const fns[2])
6778 {
6779     assert(a->esz == MO_16 || a->esz == MO_32);
6780     if (fp_access_check(s)) {
6781         gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, a->idx, fns[a->esz - 1]);
6782     }
6783     return true;
6784 }
6785 
6786 static gen_helper_gvec_3 * const f_vector_idx_mul[2] = {
6787     gen_helper_gvec_mul_idx_h,
6788     gen_helper_gvec_mul_idx_s,
6789 };
6790 TRANS(MUL_vi, do_int3_vector_idx, a, f_vector_idx_mul)
6791 
6792 static bool do_mla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool sub)
6793 {
6794     static gen_helper_gvec_4 * const fns[2][2] = {
6795         { gen_helper_gvec_mla_idx_h, gen_helper_gvec_mls_idx_h },
6796         { gen_helper_gvec_mla_idx_s, gen_helper_gvec_mls_idx_s },
6797     };
6798 
6799     assert(a->esz == MO_16 || a->esz == MO_32);
6800     if (fp_access_check(s)) {
6801         gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd,
6802                          a->idx, fns[a->esz - 1][sub]);
6803     }
6804     return true;
6805 }
6806 
6807 TRANS(MLA_vi, do_mla_vector_idx, a, false)
6808 TRANS(MLS_vi, do_mla_vector_idx, a, true)
6809 
6810 static bool do_int3_qc_vector_idx(DisasContext *s, arg_qrrx_e *a,
6811                                   gen_helper_gvec_4 * const fns[2])
6812 {
6813     assert(a->esz == MO_16 || a->esz == MO_32);
6814     if (fp_access_check(s)) {
6815         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
6816                            vec_full_reg_offset(s, a->rn),
6817                            vec_full_reg_offset(s, a->rm),
6818                            offsetof(CPUARMState, vfp.qc),
6819                            a->q ? 16 : 8, vec_full_reg_size(s),
6820                            a->idx, fns[a->esz - 1]);
6821     }
6822     return true;
6823 }
6824 
6825 static gen_helper_gvec_4 * const f_vector_idx_sqdmulh[2] = {
6826     gen_helper_neon_sqdmulh_idx_h,
6827     gen_helper_neon_sqdmulh_idx_s,
6828 };
6829 TRANS(SQDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqdmulh)
6830 
6831 static gen_helper_gvec_4 * const f_vector_idx_sqrdmulh[2] = {
6832     gen_helper_neon_sqrdmulh_idx_h,
6833     gen_helper_neon_sqrdmulh_idx_s,
6834 };
6835 TRANS(SQRDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqrdmulh)
6836 
6837 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlah[2] = {
6838     gen_helper_neon_sqrdmlah_idx_h,
6839     gen_helper_neon_sqrdmlah_idx_s,
6840 };
6841 TRANS_FEAT(SQRDMLAH_vi, aa64_rdm, do_int3_qc_vector_idx, a,
6842            f_vector_idx_sqrdmlah)
6843 
6844 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlsh[2] = {
6845     gen_helper_neon_sqrdmlsh_idx_h,
6846     gen_helper_neon_sqrdmlsh_idx_s,
6847 };
6848 TRANS_FEAT(SQRDMLSH_vi, aa64_rdm, do_int3_qc_vector_idx, a,
6849            f_vector_idx_sqrdmlsh)
6850 
6851 static bool do_dot_vector_idx(DisasContext *s, arg_qrrx_e *a,
6852                               gen_helper_gvec_4 *fn)
6853 {
6854     if (fp_access_check(s)) {
6855         gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn);
6856     }
6857     return true;
6858 }
6859 
6860 static bool do_dot_vector_idx_env(DisasContext *s, arg_qrrx_e *a,
6861                                   gen_helper_gvec_4_ptr *fn)
6862 {
6863     if (fp_access_check(s)) {
6864         gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn);
6865     }
6866     return true;
6867 }
6868 
6869 TRANS_FEAT(SDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_sdot_idx_b)
6870 TRANS_FEAT(UDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_udot_idx_b)
6871 TRANS_FEAT(SUDOT_vi, aa64_i8mm, do_dot_vector_idx, a,
6872            gen_helper_gvec_sudot_idx_b)
6873 TRANS_FEAT(USDOT_vi, aa64_i8mm, do_dot_vector_idx, a,
6874            gen_helper_gvec_usdot_idx_b)
6875 TRANS_FEAT(BFDOT_vi, aa64_bf16, do_dot_vector_idx_env, a,
6876            gen_helper_gvec_bfdot_idx)
6877 
6878 static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a)
6879 {
6880     if (!dc_isar_feature(aa64_bf16, s)) {
6881         return false;
6882     }
6883     if (fp_access_check(s)) {
6884         /* Q bit selects BFMLALB vs BFMLALT. */
6885         gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd,
6886                           s->fpcr_ah ? FPST_AH : FPST_A64,
6887                           (a->idx << 1) | a->q,
6888                           gen_helper_gvec_bfmlal_idx);
6889     }
6890     return true;
6891 }
6892 
6893 static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a)
6894 {
6895     gen_helper_gvec_4_ptr *fn;
6896 
6897     if (!dc_isar_feature(aa64_fcma, s)) {
6898         return false;
6899     }
6900     switch (a->esz) {
6901     case MO_16:
6902         if (!dc_isar_feature(aa64_fp16, s)) {
6903             return false;
6904         }
6905         fn = gen_helper_gvec_fcmlah_idx;
6906         break;
6907     case MO_32:
6908         fn = gen_helper_gvec_fcmlas_idx;
6909         break;
6910     default:
6911         g_assert_not_reached();
6912     }
6913     if (fp_access_check(s)) {
6914         gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
6915                           a->esz == MO_16 ? FPST_A64_F16 : FPST_A64,
6916                           (s->fpcr_ah << 4) | (a->idx << 2) | a->rot, fn);
6917     }
6918     return true;
6919 }
6920 
6921 /*
6922  * Advanced SIMD scalar pairwise
6923  */
6924 
6925 static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f)
6926 {
6927     switch (a->esz) {
6928     case MO_64:
6929         if (fp_access_check(s)) {
6930             TCGv_i64 t0 = tcg_temp_new_i64();
6931             TCGv_i64 t1 = tcg_temp_new_i64();
6932 
6933             read_vec_element(s, t0, a->rn, 0, MO_64);
6934             read_vec_element(s, t1, a->rn, 1, MO_64);
6935             f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
6936             write_fp_dreg(s, a->rd, t0);
6937         }
6938         break;
6939     case MO_32:
6940         if (fp_access_check(s)) {
6941             TCGv_i32 t0 = tcg_temp_new_i32();
6942             TCGv_i32 t1 = tcg_temp_new_i32();
6943 
6944             read_vec_element_i32(s, t0, a->rn, 0, MO_32);
6945             read_vec_element_i32(s, t1, a->rn, 1, MO_32);
6946             f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
6947             write_fp_sreg(s, a->rd, t0);
6948         }
6949         break;
6950     case MO_16:
6951         if (!dc_isar_feature(aa64_fp16, s)) {
6952             return false;
6953         }
6954         if (fp_access_check(s)) {
6955             TCGv_i32 t0 = tcg_temp_new_i32();
6956             TCGv_i32 t1 = tcg_temp_new_i32();
6957 
6958             read_vec_element_i32(s, t0, a->rn, 0, MO_16);
6959             read_vec_element_i32(s, t1, a->rn, 1, MO_16);
6960             f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
6961             write_fp_sreg(s, a->rd, t0);
6962         }
6963         break;
6964     default:
6965         g_assert_not_reached();
6966     }
6967     return true;
6968 }
6969 
6970 static bool do_fp3_scalar_pair_2fn(DisasContext *s, arg_rr_e *a,
6971                                    const FPScalar *fnormal,
6972                                    const FPScalar *fah)
6973 {
6974     return do_fp3_scalar_pair(s, a, s->fpcr_ah ? fah : fnormal);
6975 }
6976 
6977 TRANS(FADDP_s, do_fp3_scalar_pair, a, &f_scalar_fadd)
6978 TRANS(FMAXP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah)
6979 TRANS(FMINP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah)
6980 TRANS(FMAXNMP_s, do_fp3_scalar_pair, a, &f_scalar_fmaxnm)
6981 TRANS(FMINNMP_s, do_fp3_scalar_pair, a, &f_scalar_fminnm)
6982 
6983 static bool trans_ADDP_s(DisasContext *s, arg_rr_e *a)
6984 {
6985     if (fp_access_check(s)) {
6986         TCGv_i64 t0 = tcg_temp_new_i64();
6987         TCGv_i64 t1 = tcg_temp_new_i64();
6988 
6989         read_vec_element(s, t0, a->rn, 0, MO_64);
6990         read_vec_element(s, t1, a->rn, 1, MO_64);
6991         tcg_gen_add_i64(t0, t0, t1);
6992         write_fp_dreg(s, a->rd, t0);
6993     }
6994     return true;
6995 }
6996 
6997 /*
6998  * Floating-point conditional select
6999  */
7000 
7001 static bool trans_FCSEL(DisasContext *s, arg_FCSEL *a)
7002 {
7003     TCGv_i64 t_true, t_false;
7004     DisasCompare64 c;
7005     int check = fp_access_check_scalar_hsd(s, a->esz);
7006 
7007     if (check <= 0) {
7008         return check == 0;
7009     }
7010 
7011     /* Zero extend sreg & hreg inputs to 64 bits now.  */
7012     t_true = tcg_temp_new_i64();
7013     t_false = tcg_temp_new_i64();
7014     read_vec_element(s, t_true, a->rn, 0, a->esz);
7015     read_vec_element(s, t_false, a->rm, 0, a->esz);
7016 
7017     a64_test_cc(&c, a->cond);
7018     tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0),
7019                         t_true, t_false);
7020 
7021     /*
7022      * Note that sregs & hregs write back zeros to the high bits,
7023      * and we've already done the zero-extension.
7024      */
7025     write_fp_dreg(s, a->rd, t_true);
7026     return true;
7027 }
7028 
7029 /*
7030  * Advanced SIMD Extract
7031  */
7032 
7033 static bool trans_EXT_d(DisasContext *s, arg_EXT_d *a)
7034 {
7035     if (fp_access_check(s)) {
7036         TCGv_i64 lo = read_fp_dreg(s, a->rn);
7037         if (a->imm != 0) {
7038             TCGv_i64 hi = read_fp_dreg(s, a->rm);
7039             tcg_gen_extract2_i64(lo, lo, hi, a->imm * 8);
7040         }
7041         write_fp_dreg(s, a->rd, lo);
7042     }
7043     return true;
7044 }
7045 
7046 static bool trans_EXT_q(DisasContext *s, arg_EXT_q *a)
7047 {
7048     TCGv_i64 lo, hi;
7049     int pos = (a->imm & 7) * 8;
7050     int elt = a->imm >> 3;
7051 
7052     if (!fp_access_check(s)) {
7053         return true;
7054     }
7055 
7056     lo = tcg_temp_new_i64();
7057     hi = tcg_temp_new_i64();
7058 
7059     read_vec_element(s, lo, a->rn, elt, MO_64);
7060     elt++;
7061     read_vec_element(s, hi, elt & 2 ? a->rm : a->rn, elt & 1, MO_64);
7062     elt++;
7063 
7064     if (pos != 0) {
7065         TCGv_i64 hh = tcg_temp_new_i64();
7066         tcg_gen_extract2_i64(lo, lo, hi, pos);
7067         read_vec_element(s, hh, a->rm, elt & 1, MO_64);
7068         tcg_gen_extract2_i64(hi, hi, hh, pos);
7069     }
7070 
7071     write_vec_element(s, lo, a->rd, 0, MO_64);
7072     write_vec_element(s, hi, a->rd, 1, MO_64);
7073     clear_vec_high(s, true, a->rd);
7074     return true;
7075 }
7076 
7077 /*
7078  * Floating-point data-processing (3 source)
7079  */
7080 
7081 static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n)
7082 {
7083     TCGv_ptr fpst;
7084 
7085     /*
7086      * These are fused multiply-add.  Note that doing the negations here
7087      * as separate steps is correct: an input NaN should come out with
7088      * its sign bit flipped if it is a negated-input.
7089      */
7090     switch (a->esz) {
7091     case MO_64:
7092         if (fp_access_check(s)) {
7093             TCGv_i64 tn = read_fp_dreg(s, a->rn);
7094             TCGv_i64 tm = read_fp_dreg(s, a->rm);
7095             TCGv_i64 ta = read_fp_dreg(s, a->ra);
7096 
7097             if (neg_a) {
7098                 gen_vfp_maybe_ah_negd(s, ta, ta);
7099             }
7100             if (neg_n) {
7101                 gen_vfp_maybe_ah_negd(s, tn, tn);
7102             }
7103             fpst = fpstatus_ptr(FPST_A64);
7104             gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst);
7105             write_fp_dreg_merging(s, a->rd, a->ra, ta);
7106         }
7107         break;
7108 
7109     case MO_32:
7110         if (fp_access_check(s)) {
7111             TCGv_i32 tn = read_fp_sreg(s, a->rn);
7112             TCGv_i32 tm = read_fp_sreg(s, a->rm);
7113             TCGv_i32 ta = read_fp_sreg(s, a->ra);
7114 
7115             if (neg_a) {
7116                 gen_vfp_maybe_ah_negs(s, ta, ta);
7117             }
7118             if (neg_n) {
7119                 gen_vfp_maybe_ah_negs(s, tn, tn);
7120             }
7121             fpst = fpstatus_ptr(FPST_A64);
7122             gen_helper_vfp_muladds(ta, tn, tm, ta, fpst);
7123             write_fp_sreg_merging(s, a->rd, a->ra, ta);
7124         }
7125         break;
7126 
7127     case MO_16:
7128         if (!dc_isar_feature(aa64_fp16, s)) {
7129             return false;
7130         }
7131         if (fp_access_check(s)) {
7132             TCGv_i32 tn = read_fp_hreg(s, a->rn);
7133             TCGv_i32 tm = read_fp_hreg(s, a->rm);
7134             TCGv_i32 ta = read_fp_hreg(s, a->ra);
7135 
7136             if (neg_a) {
7137                 gen_vfp_maybe_ah_negh(s, ta, ta);
7138             }
7139             if (neg_n) {
7140                 gen_vfp_maybe_ah_negh(s, tn, tn);
7141             }
7142             fpst = fpstatus_ptr(FPST_A64_F16);
7143             gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst);
7144             write_fp_hreg_merging(s, a->rd, a->ra, ta);
7145         }
7146         break;
7147 
7148     default:
7149         return false;
7150     }
7151     return true;
7152 }
7153 
7154 TRANS(FMADD, do_fmadd, a, false, false)
7155 TRANS(FNMADD, do_fmadd, a, true, true)
7156 TRANS(FMSUB, do_fmadd, a, false, true)
7157 TRANS(FNMSUB, do_fmadd, a, true, false)
7158 
7159 /*
7160  * Advanced SIMD Across Lanes
7161  */
7162 
7163 static bool do_int_reduction(DisasContext *s, arg_qrr_e *a, bool widen,
7164                              MemOp src_sign, NeonGenTwo64OpFn *fn)
7165 {
7166     TCGv_i64 tcg_res, tcg_elt;
7167     MemOp src_mop = a->esz | src_sign;
7168     int elements = (a->q ? 16 : 8) >> a->esz;
7169 
7170     /* Reject MO_64, and MO_32 without Q: a minimum of 4 elements. */
7171     if (elements < 4) {
7172         return false;
7173     }
7174     if (!fp_access_check(s)) {
7175         return true;
7176     }
7177 
7178     tcg_res = tcg_temp_new_i64();
7179     tcg_elt = tcg_temp_new_i64();
7180 
7181     read_vec_element(s, tcg_res, a->rn, 0, src_mop);
7182     for (int i = 1; i < elements; i++) {
7183         read_vec_element(s, tcg_elt, a->rn, i, src_mop);
7184         fn(tcg_res, tcg_res, tcg_elt);
7185     }
7186 
7187     tcg_gen_ext_i64(tcg_res, tcg_res, a->esz + widen);
7188     write_fp_dreg(s, a->rd, tcg_res);
7189     return true;
7190 }
7191 
7192 TRANS(ADDV, do_int_reduction, a, false, 0, tcg_gen_add_i64)
7193 TRANS(SADDLV, do_int_reduction, a, true, MO_SIGN, tcg_gen_add_i64)
7194 TRANS(UADDLV, do_int_reduction, a, true, 0, tcg_gen_add_i64)
7195 TRANS(SMAXV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smax_i64)
7196 TRANS(UMAXV, do_int_reduction, a, false, 0, tcg_gen_umax_i64)
7197 TRANS(SMINV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smin_i64)
7198 TRANS(UMINV, do_int_reduction, a, false, 0, tcg_gen_umin_i64)
7199 
7200 /*
7201  * do_fp_reduction helper
7202  *
7203  * This mirrors the Reduce() pseudocode in the ARM ARM. It is
7204  * important for correct NaN propagation that we do these
7205  * operations in exactly the order specified by the pseudocode.
7206  *
7207  * This is a recursive function.
7208  */
7209 static TCGv_i32 do_reduction_op(DisasContext *s, int rn, MemOp esz,
7210                                 int ebase, int ecount, TCGv_ptr fpst,
7211                                 NeonGenTwoSingleOpFn *fn)
7212 {
7213     if (ecount == 1) {
7214         TCGv_i32 tcg_elem = tcg_temp_new_i32();
7215         read_vec_element_i32(s, tcg_elem, rn, ebase, esz);
7216         return tcg_elem;
7217     } else {
7218         int half = ecount >> 1;
7219         TCGv_i32 tcg_hi, tcg_lo, tcg_res;
7220 
7221         tcg_hi = do_reduction_op(s, rn, esz, ebase + half, half, fpst, fn);
7222         tcg_lo = do_reduction_op(s, rn, esz, ebase, half, fpst, fn);
7223         tcg_res = tcg_temp_new_i32();
7224 
7225         fn(tcg_res, tcg_lo, tcg_hi, fpst);
7226         return tcg_res;
7227     }
7228 }
7229 
7230 static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a,
7231                             NeonGenTwoSingleOpFn *fnormal,
7232                             NeonGenTwoSingleOpFn *fah)
7233 {
7234     if (fp_access_check(s)) {
7235         MemOp esz = a->esz;
7236         int elts = (a->q ? 16 : 8) >> esz;
7237         TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
7238         TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst,
7239                                        s->fpcr_ah ? fah : fnormal);
7240         write_fp_sreg(s, a->rd, res);
7241     }
7242     return true;
7243 }
7244 
7245 TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a,
7246            gen_helper_vfp_maxnumh, gen_helper_vfp_maxnumh)
7247 TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a,
7248            gen_helper_vfp_minnumh, gen_helper_vfp_minnumh)
7249 TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a,
7250            gen_helper_vfp_maxh, gen_helper_vfp_ah_maxh)
7251 TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a,
7252            gen_helper_vfp_minh, gen_helper_vfp_ah_minh)
7253 
7254 TRANS(FMAXNMV_s, do_fp_reduction, a,
7255       gen_helper_vfp_maxnums, gen_helper_vfp_maxnums)
7256 TRANS(FMINNMV_s, do_fp_reduction, a,
7257       gen_helper_vfp_minnums, gen_helper_vfp_minnums)
7258 TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs, gen_helper_vfp_ah_maxs)
7259 TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins, gen_helper_vfp_ah_mins)
7260 
7261 /*
7262  * Floating-point Immediate
7263  */
7264 
7265 static bool trans_FMOVI_s(DisasContext *s, arg_FMOVI_s *a)
7266 {
7267     int check = fp_access_check_scalar_hsd(s, a->esz);
7268     uint64_t imm;
7269 
7270     if (check <= 0) {
7271         return check == 0;
7272     }
7273 
7274     imm = vfp_expand_imm(a->esz, a->imm);
7275     write_fp_dreg(s, a->rd, tcg_constant_i64(imm));
7276     return true;
7277 }
7278 
7279 /*
7280  * Floating point compare, conditional compare
7281  */
7282 
7283 static void handle_fp_compare(DisasContext *s, int size,
7284                               unsigned int rn, unsigned int rm,
7285                               bool cmp_with_zero, bool signal_all_nans)
7286 {
7287     TCGv_i64 tcg_flags = tcg_temp_new_i64();
7288     TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_A64_F16 : FPST_A64);
7289 
7290     if (size == MO_64) {
7291         TCGv_i64 tcg_vn, tcg_vm;
7292 
7293         tcg_vn = read_fp_dreg(s, rn);
7294         if (cmp_with_zero) {
7295             tcg_vm = tcg_constant_i64(0);
7296         } else {
7297             tcg_vm = read_fp_dreg(s, rm);
7298         }
7299         if (signal_all_nans) {
7300             gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7301         } else {
7302             gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7303         }
7304     } else {
7305         TCGv_i32 tcg_vn = tcg_temp_new_i32();
7306         TCGv_i32 tcg_vm = tcg_temp_new_i32();
7307 
7308         read_vec_element_i32(s, tcg_vn, rn, 0, size);
7309         if (cmp_with_zero) {
7310             tcg_gen_movi_i32(tcg_vm, 0);
7311         } else {
7312             read_vec_element_i32(s, tcg_vm, rm, 0, size);
7313         }
7314 
7315         switch (size) {
7316         case MO_32:
7317             if (signal_all_nans) {
7318                 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7319             } else {
7320                 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7321             }
7322             break;
7323         case MO_16:
7324             if (signal_all_nans) {
7325                 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7326             } else {
7327                 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7328             }
7329             break;
7330         default:
7331             g_assert_not_reached();
7332         }
7333     }
7334 
7335     gen_set_nzcv(tcg_flags);
7336 }
7337 
7338 /* FCMP, FCMPE */
7339 static bool trans_FCMP(DisasContext *s, arg_FCMP *a)
7340 {
7341     int check = fp_access_check_scalar_hsd(s, a->esz);
7342 
7343     if (check <= 0) {
7344         return check == 0;
7345     }
7346 
7347     handle_fp_compare(s, a->esz, a->rn, a->rm, a->z, a->e);
7348     return true;
7349 }
7350 
7351 /* FCCMP, FCCMPE */
7352 static bool trans_FCCMP(DisasContext *s, arg_FCCMP *a)
7353 {
7354     TCGLabel *label_continue = NULL;
7355     int check = fp_access_check_scalar_hsd(s, a->esz);
7356 
7357     if (check <= 0) {
7358         return check == 0;
7359     }
7360 
7361     if (a->cond < 0x0e) { /* not always */
7362         TCGLabel *label_match = gen_new_label();
7363         label_continue = gen_new_label();
7364         arm_gen_test_cc(a->cond, label_match);
7365         /* nomatch: */
7366         gen_set_nzcv(tcg_constant_i64(a->nzcv << 28));
7367         tcg_gen_br(label_continue);
7368         gen_set_label(label_match);
7369     }
7370 
7371     handle_fp_compare(s, a->esz, a->rn, a->rm, false, a->e);
7372 
7373     if (label_continue) {
7374         gen_set_label(label_continue);
7375     }
7376     return true;
7377 }
7378 
7379 /*
7380  * Advanced SIMD Modified Immediate
7381  */
7382 
7383 static bool trans_FMOVI_v_h(DisasContext *s, arg_FMOVI_v_h *a)
7384 {
7385     if (!dc_isar_feature(aa64_fp16, s)) {
7386         return false;
7387     }
7388     if (fp_access_check(s)) {
7389         tcg_gen_gvec_dup_imm(MO_16, vec_full_reg_offset(s, a->rd),
7390                              a->q ? 16 : 8, vec_full_reg_size(s),
7391                              vfp_expand_imm(MO_16, a->abcdefgh));
7392     }
7393     return true;
7394 }
7395 
7396 static void gen_movi(unsigned vece, uint32_t dofs, uint32_t aofs,
7397                      int64_t c, uint32_t oprsz, uint32_t maxsz)
7398 {
7399     tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c);
7400 }
7401 
7402 static bool trans_Vimm(DisasContext *s, arg_Vimm *a)
7403 {
7404     GVecGen2iFn *fn;
7405 
7406     /* Handle decode of cmode/op here between ORR/BIC/MOVI */
7407     if ((a->cmode & 1) && a->cmode < 12) {
7408         /* For op=1, the imm will be inverted, so BIC becomes AND. */
7409         fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori;
7410     } else {
7411         /* There is one unallocated cmode/op combination in this space */
7412         if (a->cmode == 15 && a->op == 1 && a->q == 0) {
7413             return false;
7414         }
7415         fn = gen_movi;
7416     }
7417 
7418     if (fp_access_check(s)) {
7419         uint64_t imm = asimd_imm_const(a->abcdefgh, a->cmode, a->op);
7420         gen_gvec_fn2i(s, a->q, a->rd, a->rd, imm, fn, MO_64);
7421     }
7422     return true;
7423 }
7424 
7425 /*
7426  * Advanced SIMD Shift by Immediate
7427  */
7428 
7429 static bool do_vec_shift_imm(DisasContext *s, arg_qrri_e *a, GVecGen2iFn *fn)
7430 {
7431     if (fp_access_check(s)) {
7432         gen_gvec_fn2i(s, a->q, a->rd, a->rn, a->imm, fn, a->esz);
7433     }
7434     return true;
7435 }
7436 
7437 TRANS(SSHR_v, do_vec_shift_imm, a, gen_gvec_sshr)
7438 TRANS(USHR_v, do_vec_shift_imm, a, gen_gvec_ushr)
7439 TRANS(SSRA_v, do_vec_shift_imm, a, gen_gvec_ssra)
7440 TRANS(USRA_v, do_vec_shift_imm, a, gen_gvec_usra)
7441 TRANS(SRSHR_v, do_vec_shift_imm, a, gen_gvec_srshr)
7442 TRANS(URSHR_v, do_vec_shift_imm, a, gen_gvec_urshr)
7443 TRANS(SRSRA_v, do_vec_shift_imm, a, gen_gvec_srsra)
7444 TRANS(URSRA_v, do_vec_shift_imm, a, gen_gvec_ursra)
7445 TRANS(SRI_v, do_vec_shift_imm, a, gen_gvec_sri)
7446 TRANS(SHL_v, do_vec_shift_imm, a, tcg_gen_gvec_shli)
7447 TRANS(SLI_v, do_vec_shift_imm, a, gen_gvec_sli);
7448 TRANS(SQSHL_vi, do_vec_shift_imm, a, gen_neon_sqshli)
7449 TRANS(UQSHL_vi, do_vec_shift_imm, a, gen_neon_uqshli)
7450 TRANS(SQSHLU_vi, do_vec_shift_imm, a, gen_neon_sqshlui)
7451 
7452 static bool do_vec_shift_imm_wide(DisasContext *s, arg_qrri_e *a, bool is_u)
7453 {
7454     TCGv_i64 tcg_rn, tcg_rd;
7455     int esz = a->esz;
7456     int esize;
7457 
7458     if (!fp_access_check(s)) {
7459         return true;
7460     }
7461 
7462     /*
7463      * For the LL variants the store is larger than the load,
7464      * so if rd == rn we would overwrite parts of our input.
7465      * So load everything right now and use shifts in the main loop.
7466      */
7467     tcg_rd = tcg_temp_new_i64();
7468     tcg_rn = tcg_temp_new_i64();
7469     read_vec_element(s, tcg_rn, a->rn, a->q, MO_64);
7470 
7471     esize = 8 << esz;
7472     for (int i = 0, elements = 8 >> esz; i < elements; i++) {
7473         if (is_u) {
7474             tcg_gen_extract_i64(tcg_rd, tcg_rn, i * esize, esize);
7475         } else {
7476             tcg_gen_sextract_i64(tcg_rd, tcg_rn, i * esize, esize);
7477         }
7478         tcg_gen_shli_i64(tcg_rd, tcg_rd, a->imm);
7479         write_vec_element(s, tcg_rd, a->rd, i, esz + 1);
7480     }
7481     clear_vec_high(s, true, a->rd);
7482     return true;
7483 }
7484 
7485 TRANS(SSHLL_v, do_vec_shift_imm_wide, a, false)
7486 TRANS(USHLL_v, do_vec_shift_imm_wide, a, true)
7487 
7488 static void gen_sshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7489 {
7490     assert(shift >= 0 && shift <= 64);
7491     tcg_gen_sari_i64(dst, src, MIN(shift, 63));
7492 }
7493 
7494 static void gen_ushr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7495 {
7496     assert(shift >= 0 && shift <= 64);
7497     if (shift == 64) {
7498         tcg_gen_movi_i64(dst, 0);
7499     } else {
7500         tcg_gen_shri_i64(dst, src, shift);
7501     }
7502 }
7503 
7504 static void gen_ssra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7505 {
7506     gen_sshr_d(src, src, shift);
7507     tcg_gen_add_i64(dst, dst, src);
7508 }
7509 
7510 static void gen_usra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7511 {
7512     gen_ushr_d(src, src, shift);
7513     tcg_gen_add_i64(dst, dst, src);
7514 }
7515 
7516 static void gen_srshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7517 {
7518     assert(shift >= 0 && shift <= 32);
7519     if (shift) {
7520         TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1));
7521         tcg_gen_add_i64(dst, src, rnd);
7522         tcg_gen_sari_i64(dst, dst, shift);
7523     } else {
7524         tcg_gen_mov_i64(dst, src);
7525     }
7526 }
7527 
7528 static void gen_urshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7529 {
7530     assert(shift >= 0 && shift <= 32);
7531     if (shift) {
7532         TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1));
7533         tcg_gen_add_i64(dst, src, rnd);
7534         tcg_gen_shri_i64(dst, dst, shift);
7535     } else {
7536         tcg_gen_mov_i64(dst, src);
7537     }
7538 }
7539 
7540 static void gen_srshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7541 {
7542     assert(shift >= 0 && shift <= 64);
7543     if (shift == 0) {
7544         tcg_gen_mov_i64(dst, src);
7545     } else if (shift == 64) {
7546         /* Extension of sign bit (0,-1) plus sign bit (0,1) is zero. */
7547         tcg_gen_movi_i64(dst, 0);
7548     } else {
7549         TCGv_i64 rnd = tcg_temp_new_i64();
7550         tcg_gen_extract_i64(rnd, src, shift - 1, 1);
7551         tcg_gen_sari_i64(dst, src, shift);
7552         tcg_gen_add_i64(dst, dst, rnd);
7553     }
7554 }
7555 
7556 static void gen_urshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7557 {
7558     assert(shift >= 0 && shift <= 64);
7559     if (shift == 0) {
7560         tcg_gen_mov_i64(dst, src);
7561     } else if (shift == 64) {
7562         /* Rounding will propagate bit 63 into bit 64. */
7563         tcg_gen_shri_i64(dst, src, 63);
7564     } else {
7565         TCGv_i64 rnd = tcg_temp_new_i64();
7566         tcg_gen_extract_i64(rnd, src, shift - 1, 1);
7567         tcg_gen_shri_i64(dst, src, shift);
7568         tcg_gen_add_i64(dst, dst, rnd);
7569     }
7570 }
7571 
7572 static void gen_srsra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7573 {
7574     gen_srshr_d(src, src, shift);
7575     tcg_gen_add_i64(dst, dst, src);
7576 }
7577 
7578 static void gen_ursra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7579 {
7580     gen_urshr_d(src, src, shift);
7581     tcg_gen_add_i64(dst, dst, src);
7582 }
7583 
7584 static void gen_sri_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7585 {
7586     /* If shift is 64, dst is unchanged. */
7587     if (shift != 64) {
7588         tcg_gen_shri_i64(src, src, shift);
7589         tcg_gen_deposit_i64(dst, dst, src, 0, 64 - shift);
7590     }
7591 }
7592 
7593 static void gen_sli_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7594 {
7595     tcg_gen_deposit_i64(dst, dst, src, shift, 64 - shift);
7596 }
7597 
7598 static bool do_vec_shift_imm_narrow(DisasContext *s, arg_qrri_e *a,
7599                                     WideShiftImmFn * const fns[3], MemOp sign)
7600 {
7601     TCGv_i64 tcg_rn, tcg_rd;
7602     int esz = a->esz;
7603     int esize;
7604     WideShiftImmFn *fn;
7605 
7606     tcg_debug_assert(esz >= MO_8 && esz <= MO_32);
7607 
7608     if (!fp_access_check(s)) {
7609         return true;
7610     }
7611 
7612     tcg_rn = tcg_temp_new_i64();
7613     tcg_rd = tcg_temp_new_i64();
7614     tcg_gen_movi_i64(tcg_rd, 0);
7615 
7616     fn = fns[esz];
7617     esize = 8 << esz;
7618     for (int i = 0, elements = 8 >> esz; i < elements; i++) {
7619         read_vec_element(s, tcg_rn, a->rn, i, (esz + 1) | sign);
7620         fn(tcg_rn, tcg_rn, a->imm);
7621         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, esize * i, esize);
7622     }
7623 
7624     write_vec_element(s, tcg_rd, a->rd, a->q, MO_64);
7625     clear_vec_high(s, a->q, a->rd);
7626     return true;
7627 }
7628 
7629 static void gen_sqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7630 {
7631     tcg_gen_sari_i64(d, s, i);
7632     tcg_gen_ext16u_i64(d, d);
7633     gen_helper_neon_narrow_sat_s8(d, tcg_env, d);
7634 }
7635 
7636 static void gen_sqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7637 {
7638     tcg_gen_sari_i64(d, s, i);
7639     tcg_gen_ext32u_i64(d, d);
7640     gen_helper_neon_narrow_sat_s16(d, tcg_env, d);
7641 }
7642 
7643 static void gen_sqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7644 {
7645     gen_sshr_d(d, s, i);
7646     gen_helper_neon_narrow_sat_s32(d, tcg_env, d);
7647 }
7648 
7649 static void gen_uqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7650 {
7651     tcg_gen_shri_i64(d, s, i);
7652     gen_helper_neon_narrow_sat_u8(d, tcg_env, d);
7653 }
7654 
7655 static void gen_uqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7656 {
7657     tcg_gen_shri_i64(d, s, i);
7658     gen_helper_neon_narrow_sat_u16(d, tcg_env, d);
7659 }
7660 
7661 static void gen_uqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7662 {
7663     gen_ushr_d(d, s, i);
7664     gen_helper_neon_narrow_sat_u32(d, tcg_env, d);
7665 }
7666 
7667 static void gen_sqshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7668 {
7669     tcg_gen_sari_i64(d, s, i);
7670     tcg_gen_ext16u_i64(d, d);
7671     gen_helper_neon_unarrow_sat8(d, tcg_env, d);
7672 }
7673 
7674 static void gen_sqshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7675 {
7676     tcg_gen_sari_i64(d, s, i);
7677     tcg_gen_ext32u_i64(d, d);
7678     gen_helper_neon_unarrow_sat16(d, tcg_env, d);
7679 }
7680 
7681 static void gen_sqshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7682 {
7683     gen_sshr_d(d, s, i);
7684     gen_helper_neon_unarrow_sat32(d, tcg_env, d);
7685 }
7686 
7687 static void gen_sqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7688 {
7689     gen_srshr_bhs(d, s, i);
7690     tcg_gen_ext16u_i64(d, d);
7691     gen_helper_neon_narrow_sat_s8(d, tcg_env, d);
7692 }
7693 
7694 static void gen_sqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7695 {
7696     gen_srshr_bhs(d, s, i);
7697     tcg_gen_ext32u_i64(d, d);
7698     gen_helper_neon_narrow_sat_s16(d, tcg_env, d);
7699 }
7700 
7701 static void gen_sqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7702 {
7703     gen_srshr_d(d, s, i);
7704     gen_helper_neon_narrow_sat_s32(d, tcg_env, d);
7705 }
7706 
7707 static void gen_uqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7708 {
7709     gen_urshr_bhs(d, s, i);
7710     gen_helper_neon_narrow_sat_u8(d, tcg_env, d);
7711 }
7712 
7713 static void gen_uqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7714 {
7715     gen_urshr_bhs(d, s, i);
7716     gen_helper_neon_narrow_sat_u16(d, tcg_env, d);
7717 }
7718 
7719 static void gen_uqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7720 {
7721     gen_urshr_d(d, s, i);
7722     gen_helper_neon_narrow_sat_u32(d, tcg_env, d);
7723 }
7724 
7725 static void gen_sqrshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7726 {
7727     gen_srshr_bhs(d, s, i);
7728     tcg_gen_ext16u_i64(d, d);
7729     gen_helper_neon_unarrow_sat8(d, tcg_env, d);
7730 }
7731 
7732 static void gen_sqrshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7733 {
7734     gen_srshr_bhs(d, s, i);
7735     tcg_gen_ext32u_i64(d, d);
7736     gen_helper_neon_unarrow_sat16(d, tcg_env, d);
7737 }
7738 
7739 static void gen_sqrshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7740 {
7741     gen_srshr_d(d, s, i);
7742     gen_helper_neon_unarrow_sat32(d, tcg_env, d);
7743 }
7744 
7745 static WideShiftImmFn * const shrn_fns[] = {
7746     tcg_gen_shri_i64,
7747     tcg_gen_shri_i64,
7748     gen_ushr_d,
7749 };
7750 TRANS(SHRN_v, do_vec_shift_imm_narrow, a, shrn_fns, 0)
7751 
7752 static WideShiftImmFn * const rshrn_fns[] = {
7753     gen_urshr_bhs,
7754     gen_urshr_bhs,
7755     gen_urshr_d,
7756 };
7757 TRANS(RSHRN_v, do_vec_shift_imm_narrow, a, rshrn_fns, 0)
7758 
7759 static WideShiftImmFn * const sqshrn_fns[] = {
7760     gen_sqshrn_b,
7761     gen_sqshrn_h,
7762     gen_sqshrn_s,
7763 };
7764 TRANS(SQSHRN_v, do_vec_shift_imm_narrow, a, sqshrn_fns, MO_SIGN)
7765 
7766 static WideShiftImmFn * const uqshrn_fns[] = {
7767     gen_uqshrn_b,
7768     gen_uqshrn_h,
7769     gen_uqshrn_s,
7770 };
7771 TRANS(UQSHRN_v, do_vec_shift_imm_narrow, a, uqshrn_fns, 0)
7772 
7773 static WideShiftImmFn * const sqshrun_fns[] = {
7774     gen_sqshrun_b,
7775     gen_sqshrun_h,
7776     gen_sqshrun_s,
7777 };
7778 TRANS(SQSHRUN_v, do_vec_shift_imm_narrow, a, sqshrun_fns, MO_SIGN)
7779 
7780 static WideShiftImmFn * const sqrshrn_fns[] = {
7781     gen_sqrshrn_b,
7782     gen_sqrshrn_h,
7783     gen_sqrshrn_s,
7784 };
7785 TRANS(SQRSHRN_v, do_vec_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN)
7786 
7787 static WideShiftImmFn * const uqrshrn_fns[] = {
7788     gen_uqrshrn_b,
7789     gen_uqrshrn_h,
7790     gen_uqrshrn_s,
7791 };
7792 TRANS(UQRSHRN_v, do_vec_shift_imm_narrow, a, uqrshrn_fns, 0)
7793 
7794 static WideShiftImmFn * const sqrshrun_fns[] = {
7795     gen_sqrshrun_b,
7796     gen_sqrshrun_h,
7797     gen_sqrshrun_s,
7798 };
7799 TRANS(SQRSHRUN_v, do_vec_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN)
7800 
7801 /*
7802  * Advanced SIMD Scalar Shift by Immediate
7803  */
7804 
7805 static bool do_scalar_shift_imm(DisasContext *s, arg_rri_e *a,
7806                                 WideShiftImmFn *fn, bool accumulate,
7807                                 MemOp sign)
7808 {
7809     if (fp_access_check(s)) {
7810         TCGv_i64 rd = tcg_temp_new_i64();
7811         TCGv_i64 rn = tcg_temp_new_i64();
7812 
7813         read_vec_element(s, rn, a->rn, 0, a->esz | sign);
7814         if (accumulate) {
7815             read_vec_element(s, rd, a->rd, 0, a->esz | sign);
7816         }
7817         fn(rd, rn, a->imm);
7818         write_fp_dreg(s, a->rd, rd);
7819     }
7820     return true;
7821 }
7822 
7823 TRANS(SSHR_s, do_scalar_shift_imm, a, gen_sshr_d, false, 0)
7824 TRANS(USHR_s, do_scalar_shift_imm, a, gen_ushr_d, false, 0)
7825 TRANS(SSRA_s, do_scalar_shift_imm, a, gen_ssra_d, true, 0)
7826 TRANS(USRA_s, do_scalar_shift_imm, a, gen_usra_d, true, 0)
7827 TRANS(SRSHR_s, do_scalar_shift_imm, a, gen_srshr_d, false, 0)
7828 TRANS(URSHR_s, do_scalar_shift_imm, a, gen_urshr_d, false, 0)
7829 TRANS(SRSRA_s, do_scalar_shift_imm, a, gen_srsra_d, true, 0)
7830 TRANS(URSRA_s, do_scalar_shift_imm, a, gen_ursra_d, true, 0)
7831 TRANS(SRI_s, do_scalar_shift_imm, a, gen_sri_d, true, 0)
7832 
7833 TRANS(SHL_s, do_scalar_shift_imm, a, tcg_gen_shli_i64, false, 0)
7834 TRANS(SLI_s, do_scalar_shift_imm, a, gen_sli_d, true, 0)
7835 
7836 static void trunc_i64_env_imm(TCGv_i64 d, TCGv_i64 s, int64_t i,
7837                               NeonGenTwoOpEnvFn *fn)
7838 {
7839     TCGv_i32 t = tcg_temp_new_i32();
7840     tcg_gen_extrl_i64_i32(t, s);
7841     fn(t, tcg_env, t, tcg_constant_i32(i));
7842     tcg_gen_extu_i32_i64(d, t);
7843 }
7844 
7845 static void gen_sqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7846 {
7847     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s8);
7848 }
7849 
7850 static void gen_sqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7851 {
7852     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s16);
7853 }
7854 
7855 static void gen_sqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7856 {
7857     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s32);
7858 }
7859 
7860 static void gen_sqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
7861 {
7862     gen_helper_neon_qshl_s64(d, tcg_env, s, tcg_constant_i64(i));
7863 }
7864 
7865 static void gen_uqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7866 {
7867     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u8);
7868 }
7869 
7870 static void gen_uqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7871 {
7872     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u16);
7873 }
7874 
7875 static void gen_uqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7876 {
7877     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u32);
7878 }
7879 
7880 static void gen_uqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
7881 {
7882     gen_helper_neon_qshl_u64(d, tcg_env, s, tcg_constant_i64(i));
7883 }
7884 
7885 static void gen_sqshlui_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7886 {
7887     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s8);
7888 }
7889 
7890 static void gen_sqshlui_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7891 {
7892     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s16);
7893 }
7894 
7895 static void gen_sqshlui_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7896 {
7897     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s32);
7898 }
7899 
7900 static void gen_sqshlui_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
7901 {
7902     gen_helper_neon_qshlu_s64(d, tcg_env, s, tcg_constant_i64(i));
7903 }
7904 
7905 static WideShiftImmFn * const f_scalar_sqshli[] = {
7906     gen_sqshli_b, gen_sqshli_h, gen_sqshli_s, gen_sqshli_d
7907 };
7908 
7909 static WideShiftImmFn * const f_scalar_uqshli[] = {
7910     gen_uqshli_b, gen_uqshli_h, gen_uqshli_s, gen_uqshli_d
7911 };
7912 
7913 static WideShiftImmFn * const f_scalar_sqshlui[] = {
7914     gen_sqshlui_b, gen_sqshlui_h, gen_sqshlui_s, gen_sqshlui_d
7915 };
7916 
7917 /* Note that the helpers sign-extend their inputs, so don't do it here. */
7918 TRANS(SQSHL_si, do_scalar_shift_imm, a, f_scalar_sqshli[a->esz], false, 0)
7919 TRANS(UQSHL_si, do_scalar_shift_imm, a, f_scalar_uqshli[a->esz], false, 0)
7920 TRANS(SQSHLU_si, do_scalar_shift_imm, a, f_scalar_sqshlui[a->esz], false, 0)
7921 
7922 static bool do_scalar_shift_imm_narrow(DisasContext *s, arg_rri_e *a,
7923                                        WideShiftImmFn * const fns[3],
7924                                        MemOp sign, bool zext)
7925 {
7926     MemOp esz = a->esz;
7927 
7928     tcg_debug_assert(esz >= MO_8 && esz <= MO_32);
7929 
7930     if (fp_access_check(s)) {
7931         TCGv_i64 rd = tcg_temp_new_i64();
7932         TCGv_i64 rn = tcg_temp_new_i64();
7933 
7934         read_vec_element(s, rn, a->rn, 0, (esz + 1) | sign);
7935         fns[esz](rd, rn, a->imm);
7936         if (zext) {
7937             tcg_gen_ext_i64(rd, rd, esz);
7938         }
7939         write_fp_dreg(s, a->rd, rd);
7940     }
7941     return true;
7942 }
7943 
7944 TRANS(SQSHRN_si, do_scalar_shift_imm_narrow, a, sqshrn_fns, MO_SIGN, true)
7945 TRANS(SQRSHRN_si, do_scalar_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN, true)
7946 TRANS(UQSHRN_si, do_scalar_shift_imm_narrow, a, uqshrn_fns, 0, false)
7947 TRANS(UQRSHRN_si, do_scalar_shift_imm_narrow, a, uqrshrn_fns, 0, false)
7948 TRANS(SQSHRUN_si, do_scalar_shift_imm_narrow, a, sqshrun_fns, MO_SIGN, false)
7949 TRANS(SQRSHRUN_si, do_scalar_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN, false)
7950 
7951 static bool do_div(DisasContext *s, arg_rrr_sf *a, bool is_signed)
7952 {
7953     TCGv_i64 tcg_n, tcg_m, tcg_rd;
7954     tcg_rd = cpu_reg(s, a->rd);
7955 
7956     if (!a->sf && is_signed) {
7957         tcg_n = tcg_temp_new_i64();
7958         tcg_m = tcg_temp_new_i64();
7959         tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, a->rn));
7960         tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, a->rm));
7961     } else {
7962         tcg_n = read_cpu_reg(s, a->rn, a->sf);
7963         tcg_m = read_cpu_reg(s, a->rm, a->sf);
7964     }
7965 
7966     if (is_signed) {
7967         gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
7968     } else {
7969         gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
7970     }
7971 
7972     if (!a->sf) { /* zero extend final result */
7973         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
7974     }
7975     return true;
7976 }
7977 
7978 TRANS(SDIV, do_div, a, true)
7979 TRANS(UDIV, do_div, a, false)
7980 
7981 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
7982  * Note that it is the caller's responsibility to ensure that the
7983  * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
7984  * mandated semantics for out of range shifts.
7985  */
7986 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
7987                       enum a64_shift_type shift_type, TCGv_i64 shift_amount)
7988 {
7989     switch (shift_type) {
7990     case A64_SHIFT_TYPE_LSL:
7991         tcg_gen_shl_i64(dst, src, shift_amount);
7992         break;
7993     case A64_SHIFT_TYPE_LSR:
7994         tcg_gen_shr_i64(dst, src, shift_amount);
7995         break;
7996     case A64_SHIFT_TYPE_ASR:
7997         if (!sf) {
7998             tcg_gen_ext32s_i64(dst, src);
7999         }
8000         tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
8001         break;
8002     case A64_SHIFT_TYPE_ROR:
8003         if (sf) {
8004             tcg_gen_rotr_i64(dst, src, shift_amount);
8005         } else {
8006             TCGv_i32 t0, t1;
8007             t0 = tcg_temp_new_i32();
8008             t1 = tcg_temp_new_i32();
8009             tcg_gen_extrl_i64_i32(t0, src);
8010             tcg_gen_extrl_i64_i32(t1, shift_amount);
8011             tcg_gen_rotr_i32(t0, t0, t1);
8012             tcg_gen_extu_i32_i64(dst, t0);
8013         }
8014         break;
8015     default:
8016         assert(FALSE); /* all shift types should be handled */
8017         break;
8018     }
8019 
8020     if (!sf) { /* zero extend final result */
8021         tcg_gen_ext32u_i64(dst, dst);
8022     }
8023 }
8024 
8025 /* Shift a TCGv src by immediate, put result in dst.
8026  * The shift amount must be in range (this should always be true as the
8027  * relevant instructions will UNDEF on bad shift immediates).
8028  */
8029 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
8030                           enum a64_shift_type shift_type, unsigned int shift_i)
8031 {
8032     assert(shift_i < (sf ? 64 : 32));
8033 
8034     if (shift_i == 0) {
8035         tcg_gen_mov_i64(dst, src);
8036     } else {
8037         shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i));
8038     }
8039 }
8040 
8041 static bool do_shift_reg(DisasContext *s, arg_rrr_sf *a,
8042                          enum a64_shift_type shift_type)
8043 {
8044     TCGv_i64 tcg_shift = tcg_temp_new_i64();
8045     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8046     TCGv_i64 tcg_rn = read_cpu_reg(s, a->rn, a->sf);
8047 
8048     tcg_gen_andi_i64(tcg_shift, cpu_reg(s, a->rm), a->sf ? 63 : 31);
8049     shift_reg(tcg_rd, tcg_rn, a->sf, shift_type, tcg_shift);
8050     return true;
8051 }
8052 
8053 TRANS(LSLV, do_shift_reg, a, A64_SHIFT_TYPE_LSL)
8054 TRANS(LSRV, do_shift_reg, a, A64_SHIFT_TYPE_LSR)
8055 TRANS(ASRV, do_shift_reg, a, A64_SHIFT_TYPE_ASR)
8056 TRANS(RORV, do_shift_reg, a, A64_SHIFT_TYPE_ROR)
8057 
8058 static bool do_crc32(DisasContext *s, arg_rrr_e *a, bool crc32c)
8059 {
8060     TCGv_i64 tcg_acc, tcg_val, tcg_rd;
8061     TCGv_i32 tcg_bytes;
8062 
8063     switch (a->esz) {
8064     case MO_8:
8065     case MO_16:
8066     case MO_32:
8067         tcg_val = tcg_temp_new_i64();
8068         tcg_gen_extract_i64(tcg_val, cpu_reg(s, a->rm), 0, 8 << a->esz);
8069         break;
8070     case MO_64:
8071         tcg_val = cpu_reg(s, a->rm);
8072         break;
8073     default:
8074         g_assert_not_reached();
8075     }
8076     tcg_acc = cpu_reg(s, a->rn);
8077     tcg_bytes = tcg_constant_i32(1 << a->esz);
8078     tcg_rd = cpu_reg(s, a->rd);
8079 
8080     if (crc32c) {
8081         gen_helper_crc32c_64(tcg_rd, tcg_acc, tcg_val, tcg_bytes);
8082     } else {
8083         gen_helper_crc32_64(tcg_rd, tcg_acc, tcg_val, tcg_bytes);
8084     }
8085     return true;
8086 }
8087 
8088 TRANS_FEAT(CRC32, aa64_crc32, do_crc32, a, false)
8089 TRANS_FEAT(CRC32C, aa64_crc32, do_crc32, a, true)
8090 
8091 static bool do_subp(DisasContext *s, arg_rrr *a, bool setflag)
8092 {
8093     TCGv_i64 tcg_n = read_cpu_reg_sp(s, a->rn, true);
8094     TCGv_i64 tcg_m = read_cpu_reg_sp(s, a->rm, true);
8095     TCGv_i64 tcg_d = cpu_reg(s, a->rd);
8096 
8097     tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56);
8098     tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56);
8099 
8100     if (setflag) {
8101         gen_sub_CC(true, tcg_d, tcg_n, tcg_m);
8102     } else {
8103         tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m);
8104     }
8105     return true;
8106 }
8107 
8108 TRANS_FEAT(SUBP, aa64_mte_insn_reg, do_subp, a, false)
8109 TRANS_FEAT(SUBPS, aa64_mte_insn_reg, do_subp, a, true)
8110 
8111 static bool trans_IRG(DisasContext *s, arg_rrr *a)
8112 {
8113     if (dc_isar_feature(aa64_mte_insn_reg, s)) {
8114         TCGv_i64 tcg_rd = cpu_reg_sp(s, a->rd);
8115         TCGv_i64 tcg_rn = cpu_reg_sp(s, a->rn);
8116 
8117         if (s->ata[0]) {
8118             gen_helper_irg(tcg_rd, tcg_env, tcg_rn, cpu_reg(s, a->rm));
8119         } else {
8120             gen_address_with_allocation_tag0(tcg_rd, tcg_rn);
8121         }
8122         return true;
8123     }
8124     return false;
8125 }
8126 
8127 static bool trans_GMI(DisasContext *s, arg_rrr *a)
8128 {
8129     if (dc_isar_feature(aa64_mte_insn_reg, s)) {
8130         TCGv_i64 t = tcg_temp_new_i64();
8131 
8132         tcg_gen_extract_i64(t, cpu_reg_sp(s, a->rn), 56, 4);
8133         tcg_gen_shl_i64(t, tcg_constant_i64(1), t);
8134         tcg_gen_or_i64(cpu_reg(s, a->rd), cpu_reg(s, a->rm), t);
8135         return true;
8136     }
8137     return false;
8138 }
8139 
8140 static bool trans_PACGA(DisasContext *s, arg_rrr *a)
8141 {
8142     if (dc_isar_feature(aa64_pauth, s)) {
8143         gen_helper_pacga(cpu_reg(s, a->rd), tcg_env,
8144                          cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm));
8145         return true;
8146     }
8147     return false;
8148 }
8149 
8150 typedef void ArithOneOp(TCGv_i64, TCGv_i64);
8151 
8152 static bool gen_rr(DisasContext *s, int rd, int rn, ArithOneOp fn)
8153 {
8154     fn(cpu_reg(s, rd), cpu_reg(s, rn));
8155     return true;
8156 }
8157 
8158 static void gen_rbit32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8159 {
8160     TCGv_i32 t32 = tcg_temp_new_i32();
8161 
8162     tcg_gen_extrl_i64_i32(t32, tcg_rn);
8163     gen_helper_rbit(t32, t32);
8164     tcg_gen_extu_i32_i64(tcg_rd, t32);
8165 }
8166 
8167 static void gen_rev16_xx(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 mask)
8168 {
8169     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
8170 
8171     tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
8172     tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
8173     tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
8174     tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
8175     tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
8176 }
8177 
8178 static void gen_rev16_32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8179 {
8180     gen_rev16_xx(tcg_rd, tcg_rn, tcg_constant_i64(0x00ff00ff));
8181 }
8182 
8183 static void gen_rev16_64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8184 {
8185     gen_rev16_xx(tcg_rd, tcg_rn, tcg_constant_i64(0x00ff00ff00ff00ffull));
8186 }
8187 
8188 static void gen_rev_32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8189 {
8190     tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ);
8191 }
8192 
8193 static void gen_rev32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8194 {
8195     tcg_gen_bswap64_i64(tcg_rd, tcg_rn);
8196     tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32);
8197 }
8198 
8199 TRANS(RBIT, gen_rr, a->rd, a->rn, a->sf ? gen_helper_rbit64 : gen_rbit32)
8200 TRANS(REV16, gen_rr, a->rd, a->rn, a->sf ? gen_rev16_64 : gen_rev16_32)
8201 TRANS(REV32, gen_rr, a->rd, a->rn, a->sf ? gen_rev32 : gen_rev_32)
8202 TRANS(REV64, gen_rr, a->rd, a->rn, tcg_gen_bswap64_i64)
8203 
8204 static void gen_clz32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8205 {
8206     TCGv_i32 t32 = tcg_temp_new_i32();
8207 
8208     tcg_gen_extrl_i64_i32(t32, tcg_rn);
8209     tcg_gen_clzi_i32(t32, t32, 32);
8210     tcg_gen_extu_i32_i64(tcg_rd, t32);
8211 }
8212 
8213 static void gen_clz64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8214 {
8215     tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
8216 }
8217 
8218 static void gen_cls32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8219 {
8220     TCGv_i32 t32 = tcg_temp_new_i32();
8221 
8222     tcg_gen_extrl_i64_i32(t32, tcg_rn);
8223     tcg_gen_clrsb_i32(t32, t32);
8224     tcg_gen_extu_i32_i64(tcg_rd, t32);
8225 }
8226 
8227 TRANS(CLZ, gen_rr, a->rd, a->rn, a->sf ? gen_clz64 : gen_clz32)
8228 TRANS(CLS, gen_rr, a->rd, a->rn, a->sf ? tcg_gen_clrsb_i64 : gen_cls32)
8229 
8230 static bool gen_pacaut(DisasContext *s, arg_pacaut *a, NeonGenTwo64OpEnvFn fn)
8231 {
8232     TCGv_i64 tcg_rd, tcg_rn;
8233 
8234     if (a->z) {
8235         if (a->rn != 31) {
8236             return false;
8237         }
8238         tcg_rn = tcg_constant_i64(0);
8239     } else {
8240         tcg_rn = cpu_reg_sp(s, a->rn);
8241     }
8242     if (s->pauth_active) {
8243         tcg_rd = cpu_reg(s, a->rd);
8244         fn(tcg_rd, tcg_env, tcg_rd, tcg_rn);
8245     }
8246     return true;
8247 }
8248 
8249 TRANS_FEAT(PACIA, aa64_pauth, gen_pacaut, a, gen_helper_pacia)
8250 TRANS_FEAT(PACIB, aa64_pauth, gen_pacaut, a, gen_helper_pacib)
8251 TRANS_FEAT(PACDA, aa64_pauth, gen_pacaut, a, gen_helper_pacda)
8252 TRANS_FEAT(PACDB, aa64_pauth, gen_pacaut, a, gen_helper_pacdb)
8253 
8254 TRANS_FEAT(AUTIA, aa64_pauth, gen_pacaut, a, gen_helper_autia)
8255 TRANS_FEAT(AUTIB, aa64_pauth, gen_pacaut, a, gen_helper_autib)
8256 TRANS_FEAT(AUTDA, aa64_pauth, gen_pacaut, a, gen_helper_autda)
8257 TRANS_FEAT(AUTDB, aa64_pauth, gen_pacaut, a, gen_helper_autdb)
8258 
8259 static bool do_xpac(DisasContext *s, int rd, NeonGenOne64OpEnvFn *fn)
8260 {
8261     if (s->pauth_active) {
8262         TCGv_i64 tcg_rd = cpu_reg(s, rd);
8263         fn(tcg_rd, tcg_env, tcg_rd);
8264     }
8265     return true;
8266 }
8267 
8268 TRANS_FEAT(XPACI, aa64_pauth, do_xpac, a->rd, gen_helper_xpaci)
8269 TRANS_FEAT(XPACD, aa64_pauth, do_xpac, a->rd, gen_helper_xpacd)
8270 
8271 static bool do_logic_reg(DisasContext *s, arg_logic_shift *a,
8272                          ArithTwoOp *fn, ArithTwoOp *inv_fn, bool setflags)
8273 {
8274     TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
8275 
8276     if (!a->sf && (a->sa & (1 << 5))) {
8277         return false;
8278     }
8279 
8280     tcg_rd = cpu_reg(s, a->rd);
8281     tcg_rn = cpu_reg(s, a->rn);
8282 
8283     tcg_rm = read_cpu_reg(s, a->rm, a->sf);
8284     if (a->sa) {
8285         shift_reg_imm(tcg_rm, tcg_rm, a->sf, a->st, a->sa);
8286     }
8287 
8288     (a->n ? inv_fn : fn)(tcg_rd, tcg_rn, tcg_rm);
8289     if (!a->sf) {
8290         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
8291     }
8292     if (setflags) {
8293         gen_logic_CC(a->sf, tcg_rd);
8294     }
8295     return true;
8296 }
8297 
8298 static bool trans_ORR_r(DisasContext *s, arg_logic_shift *a)
8299 {
8300     /*
8301      * Unshifted ORR and ORN with WZR/XZR is the standard encoding for
8302      * register-register MOV and MVN, so it is worth special casing.
8303      */
8304     if (a->sa == 0 && a->st == 0 && a->rn == 31) {
8305         TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8306         TCGv_i64 tcg_rm = cpu_reg(s, a->rm);
8307 
8308         if (a->n) {
8309             tcg_gen_not_i64(tcg_rd, tcg_rm);
8310             if (!a->sf) {
8311                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
8312             }
8313         } else {
8314             if (a->sf) {
8315                 tcg_gen_mov_i64(tcg_rd, tcg_rm);
8316             } else {
8317                 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
8318             }
8319         }
8320         return true;
8321     }
8322 
8323     return do_logic_reg(s, a, tcg_gen_or_i64, tcg_gen_orc_i64, false);
8324 }
8325 
8326 TRANS(AND_r, do_logic_reg, a, tcg_gen_and_i64, tcg_gen_andc_i64, false)
8327 TRANS(ANDS_r, do_logic_reg, a, tcg_gen_and_i64, tcg_gen_andc_i64, true)
8328 TRANS(EOR_r, do_logic_reg, a, tcg_gen_xor_i64, tcg_gen_eqv_i64, false)
8329 
8330 static bool do_addsub_ext(DisasContext *s, arg_addsub_ext *a,
8331                           bool sub_op, bool setflags)
8332 {
8333     TCGv_i64 tcg_rm, tcg_rn, tcg_rd, tcg_result;
8334 
8335     if (a->sa > 4) {
8336         return false;
8337     }
8338 
8339     /* non-flag setting ops may use SP */
8340     if (!setflags) {
8341         tcg_rd = cpu_reg_sp(s, a->rd);
8342     } else {
8343         tcg_rd = cpu_reg(s, a->rd);
8344     }
8345     tcg_rn = read_cpu_reg_sp(s, a->rn, a->sf);
8346 
8347     tcg_rm = read_cpu_reg(s, a->rm, a->sf);
8348     ext_and_shift_reg(tcg_rm, tcg_rm, a->st, a->sa);
8349 
8350     tcg_result = tcg_temp_new_i64();
8351     if (!setflags) {
8352         if (sub_op) {
8353             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
8354         } else {
8355             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
8356         }
8357     } else {
8358         if (sub_op) {
8359             gen_sub_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
8360         } else {
8361             gen_add_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
8362         }
8363     }
8364 
8365     if (a->sf) {
8366         tcg_gen_mov_i64(tcg_rd, tcg_result);
8367     } else {
8368         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
8369     }
8370     return true;
8371 }
8372 
8373 TRANS(ADD_ext, do_addsub_ext, a, false, false)
8374 TRANS(SUB_ext, do_addsub_ext, a, true, false)
8375 TRANS(ADDS_ext, do_addsub_ext, a, false, true)
8376 TRANS(SUBS_ext, do_addsub_ext, a, true, true)
8377 
8378 static bool do_addsub_reg(DisasContext *s, arg_addsub_shift *a,
8379                           bool sub_op, bool setflags)
8380 {
8381     TCGv_i64 tcg_rd, tcg_rn, tcg_rm, tcg_result;
8382 
8383     if (a->st == 3 || (!a->sf && (a->sa & 32))) {
8384         return false;
8385     }
8386 
8387     tcg_rd = cpu_reg(s, a->rd);
8388     tcg_rn = read_cpu_reg(s, a->rn, a->sf);
8389     tcg_rm = read_cpu_reg(s, a->rm, a->sf);
8390 
8391     shift_reg_imm(tcg_rm, tcg_rm, a->sf, a->st, a->sa);
8392 
8393     tcg_result = tcg_temp_new_i64();
8394     if (!setflags) {
8395         if (sub_op) {
8396             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
8397         } else {
8398             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
8399         }
8400     } else {
8401         if (sub_op) {
8402             gen_sub_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
8403         } else {
8404             gen_add_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
8405         }
8406     }
8407 
8408     if (a->sf) {
8409         tcg_gen_mov_i64(tcg_rd, tcg_result);
8410     } else {
8411         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
8412     }
8413     return true;
8414 }
8415 
8416 TRANS(ADD_r, do_addsub_reg, a, false, false)
8417 TRANS(SUB_r, do_addsub_reg, a, true, false)
8418 TRANS(ADDS_r, do_addsub_reg, a, false, true)
8419 TRANS(SUBS_r, do_addsub_reg, a, true, true)
8420 
8421 static bool do_mulh(DisasContext *s, arg_rrr *a,
8422                     void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
8423 {
8424     TCGv_i64 discard = tcg_temp_new_i64();
8425     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8426     TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
8427     TCGv_i64 tcg_rm = cpu_reg(s, a->rm);
8428 
8429     fn(discard, tcg_rd, tcg_rn, tcg_rm);
8430     return true;
8431 }
8432 
8433 TRANS(SMULH, do_mulh, a, tcg_gen_muls2_i64)
8434 TRANS(UMULH, do_mulh, a, tcg_gen_mulu2_i64)
8435 
8436 static bool do_muladd(DisasContext *s, arg_rrrr *a,
8437                       bool sf, bool is_sub, MemOp mop)
8438 {
8439     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8440     TCGv_i64 tcg_op1, tcg_op2;
8441 
8442     if (mop == MO_64) {
8443         tcg_op1 = cpu_reg(s, a->rn);
8444         tcg_op2 = cpu_reg(s, a->rm);
8445     } else {
8446         tcg_op1 = tcg_temp_new_i64();
8447         tcg_op2 = tcg_temp_new_i64();
8448         tcg_gen_ext_i64(tcg_op1, cpu_reg(s, a->rn), mop);
8449         tcg_gen_ext_i64(tcg_op2, cpu_reg(s, a->rm), mop);
8450     }
8451 
8452     if (a->ra == 31 && !is_sub) {
8453         /* Special-case MADD with rA == XZR; it is the standard MUL alias */
8454         tcg_gen_mul_i64(tcg_rd, tcg_op1, tcg_op2);
8455     } else {
8456         TCGv_i64 tcg_tmp = tcg_temp_new_i64();
8457         TCGv_i64 tcg_ra = cpu_reg(s, a->ra);
8458 
8459         tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
8460         if (is_sub) {
8461             tcg_gen_sub_i64(tcg_rd, tcg_ra, tcg_tmp);
8462         } else {
8463             tcg_gen_add_i64(tcg_rd, tcg_ra, tcg_tmp);
8464         }
8465     }
8466 
8467     if (!sf) {
8468         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
8469     }
8470     return true;
8471 }
8472 
8473 TRANS(MADD_w, do_muladd, a, false, false, MO_64)
8474 TRANS(MSUB_w, do_muladd, a, false, true, MO_64)
8475 TRANS(MADD_x, do_muladd, a, true, false, MO_64)
8476 TRANS(MSUB_x, do_muladd, a, true, true, MO_64)
8477 
8478 TRANS(SMADDL, do_muladd, a, true, false, MO_SL)
8479 TRANS(SMSUBL, do_muladd, a, true, true, MO_SL)
8480 TRANS(UMADDL, do_muladd, a, true, false, MO_UL)
8481 TRANS(UMSUBL, do_muladd, a, true, true, MO_UL)
8482 
8483 static bool do_adc_sbc(DisasContext *s, arg_rrr_sf *a,
8484                        bool is_sub, bool setflags)
8485 {
8486     TCGv_i64 tcg_y, tcg_rn, tcg_rd;
8487 
8488     tcg_rd = cpu_reg(s, a->rd);
8489     tcg_rn = cpu_reg(s, a->rn);
8490 
8491     if (is_sub) {
8492         tcg_y = tcg_temp_new_i64();
8493         tcg_gen_not_i64(tcg_y, cpu_reg(s, a->rm));
8494     } else {
8495         tcg_y = cpu_reg(s, a->rm);
8496     }
8497 
8498     if (setflags) {
8499         gen_adc_CC(a->sf, tcg_rd, tcg_rn, tcg_y);
8500     } else {
8501         gen_adc(a->sf, tcg_rd, tcg_rn, tcg_y);
8502     }
8503     return true;
8504 }
8505 
8506 TRANS(ADC, do_adc_sbc, a, false, false)
8507 TRANS(SBC, do_adc_sbc, a, true, false)
8508 TRANS(ADCS, do_adc_sbc, a, false, true)
8509 TRANS(SBCS, do_adc_sbc, a, true, true)
8510 
8511 static bool trans_RMIF(DisasContext *s, arg_RMIF *a)
8512 {
8513     int mask = a->mask;
8514     TCGv_i64 tcg_rn;
8515     TCGv_i32 nzcv;
8516 
8517     if (!dc_isar_feature(aa64_condm_4, s)) {
8518         return false;
8519     }
8520 
8521     tcg_rn = read_cpu_reg(s, a->rn, 1);
8522     tcg_gen_rotri_i64(tcg_rn, tcg_rn, a->imm);
8523 
8524     nzcv = tcg_temp_new_i32();
8525     tcg_gen_extrl_i64_i32(nzcv, tcg_rn);
8526 
8527     if (mask & 8) { /* N */
8528         tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3);
8529     }
8530     if (mask & 4) { /* Z */
8531         tcg_gen_not_i32(cpu_ZF, nzcv);
8532         tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4);
8533     }
8534     if (mask & 2) { /* C */
8535         tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1);
8536     }
8537     if (mask & 1) { /* V */
8538         tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0);
8539     }
8540     return true;
8541 }
8542 
8543 static bool do_setf(DisasContext *s, int rn, int shift)
8544 {
8545     TCGv_i32 tmp = tcg_temp_new_i32();
8546 
8547     tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn));
8548     tcg_gen_shli_i32(cpu_NF, tmp, shift);
8549     tcg_gen_shli_i32(cpu_VF, tmp, shift - 1);
8550     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
8551     tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF);
8552     return true;
8553 }
8554 
8555 TRANS_FEAT(SETF8, aa64_condm_4, do_setf, a->rn, 24)
8556 TRANS_FEAT(SETF16, aa64_condm_4, do_setf, a->rn, 16)
8557 
8558 /* CCMP, CCMN */
8559 static bool trans_CCMP(DisasContext *s, arg_CCMP *a)
8560 {
8561     TCGv_i32 tcg_t0 = tcg_temp_new_i32();
8562     TCGv_i32 tcg_t1 = tcg_temp_new_i32();
8563     TCGv_i32 tcg_t2 = tcg_temp_new_i32();
8564     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
8565     TCGv_i64 tcg_rn, tcg_y;
8566     DisasCompare c;
8567     unsigned nzcv;
8568     bool has_andc;
8569 
8570     /* Set T0 = !COND.  */
8571     arm_test_cc(&c, a->cond);
8572     tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
8573 
8574     /* Load the arguments for the new comparison.  */
8575     if (a->imm) {
8576         tcg_y = tcg_constant_i64(a->y);
8577     } else {
8578         tcg_y = cpu_reg(s, a->y);
8579     }
8580     tcg_rn = cpu_reg(s, a->rn);
8581 
8582     /* Set the flags for the new comparison.  */
8583     if (a->op) {
8584         gen_sub_CC(a->sf, tcg_tmp, tcg_rn, tcg_y);
8585     } else {
8586         gen_add_CC(a->sf, tcg_tmp, tcg_rn, tcg_y);
8587     }
8588 
8589     /*
8590      * If COND was false, force the flags to #nzcv.  Compute two masks
8591      * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
8592      * For tcg hosts that support ANDC, we can make do with just T1.
8593      * In either case, allow the tcg optimizer to delete any unused mask.
8594      */
8595     tcg_gen_neg_i32(tcg_t1, tcg_t0);
8596     tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
8597 
8598     nzcv = a->nzcv;
8599     has_andc = tcg_op_supported(INDEX_op_andc, TCG_TYPE_I32, 0);
8600     if (nzcv & 8) { /* N */
8601         tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
8602     } else {
8603         if (has_andc) {
8604             tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
8605         } else {
8606             tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
8607         }
8608     }
8609     if (nzcv & 4) { /* Z */
8610         if (has_andc) {
8611             tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
8612         } else {
8613             tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
8614         }
8615     } else {
8616         tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
8617     }
8618     if (nzcv & 2) { /* C */
8619         tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
8620     } else {
8621         if (has_andc) {
8622             tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
8623         } else {
8624             tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
8625         }
8626     }
8627     if (nzcv & 1) { /* V */
8628         tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
8629     } else {
8630         if (has_andc) {
8631             tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
8632         } else {
8633             tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
8634         }
8635     }
8636     return true;
8637 }
8638 
8639 static bool trans_CSEL(DisasContext *s, arg_CSEL *a)
8640 {
8641     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8642     TCGv_i64 zero = tcg_constant_i64(0);
8643     DisasCompare64 c;
8644 
8645     a64_test_cc(&c, a->cond);
8646 
8647     if (a->rn == 31 && a->rm == 31 && (a->else_inc ^ a->else_inv)) {
8648         /* CSET & CSETM.  */
8649         if (a->else_inv) {
8650             tcg_gen_negsetcond_i64(tcg_invert_cond(c.cond),
8651                                    tcg_rd, c.value, zero);
8652         } else {
8653             tcg_gen_setcond_i64(tcg_invert_cond(c.cond),
8654                                 tcg_rd, c.value, zero);
8655         }
8656     } else {
8657         TCGv_i64 t_true = cpu_reg(s, a->rn);
8658         TCGv_i64 t_false = read_cpu_reg(s, a->rm, 1);
8659 
8660         if (a->else_inv && a->else_inc) {
8661             tcg_gen_neg_i64(t_false, t_false);
8662         } else if (a->else_inv) {
8663             tcg_gen_not_i64(t_false, t_false);
8664         } else if (a->else_inc) {
8665             tcg_gen_addi_i64(t_false, t_false, 1);
8666         }
8667         tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
8668     }
8669 
8670     if (!a->sf) {
8671         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
8672     }
8673     return true;
8674 }
8675 
8676 typedef struct FPScalar1Int {
8677     void (*gen_h)(TCGv_i32, TCGv_i32);
8678     void (*gen_s)(TCGv_i32, TCGv_i32);
8679     void (*gen_d)(TCGv_i64, TCGv_i64);
8680 } FPScalar1Int;
8681 
8682 static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a,
8683                               const FPScalar1Int *f,
8684                               bool merging)
8685 {
8686     switch (a->esz) {
8687     case MO_64:
8688         if (fp_access_check(s)) {
8689             TCGv_i64 t = read_fp_dreg(s, a->rn);
8690             f->gen_d(t, t);
8691             if (merging) {
8692                 write_fp_dreg_merging(s, a->rd, a->rd, t);
8693             } else {
8694                 write_fp_dreg(s, a->rd, t);
8695             }
8696         }
8697         break;
8698     case MO_32:
8699         if (fp_access_check(s)) {
8700             TCGv_i32 t = read_fp_sreg(s, a->rn);
8701             f->gen_s(t, t);
8702             if (merging) {
8703                 write_fp_sreg_merging(s, a->rd, a->rd, t);
8704             } else {
8705                 write_fp_sreg(s, a->rd, t);
8706             }
8707         }
8708         break;
8709     case MO_16:
8710         if (!dc_isar_feature(aa64_fp16, s)) {
8711             return false;
8712         }
8713         if (fp_access_check(s)) {
8714             TCGv_i32 t = read_fp_hreg(s, a->rn);
8715             f->gen_h(t, t);
8716             if (merging) {
8717                 write_fp_hreg_merging(s, a->rd, a->rd, t);
8718             } else {
8719                 write_fp_sreg(s, a->rd, t);
8720             }
8721         }
8722         break;
8723     default:
8724         return false;
8725     }
8726     return true;
8727 }
8728 
8729 static bool do_fp1_scalar_int_2fn(DisasContext *s, arg_rr_e *a,
8730                                   const FPScalar1Int *fnormal,
8731                                   const FPScalar1Int *fah)
8732 {
8733     return do_fp1_scalar_int(s, a, s->fpcr_ah ? fah : fnormal, true);
8734 }
8735 
8736 static const FPScalar1Int f_scalar_fmov = {
8737     tcg_gen_mov_i32,
8738     tcg_gen_mov_i32,
8739     tcg_gen_mov_i64,
8740 };
8741 TRANS(FMOV_s, do_fp1_scalar_int, a, &f_scalar_fmov, false)
8742 
8743 static const FPScalar1Int f_scalar_fabs = {
8744     gen_vfp_absh,
8745     gen_vfp_abss,
8746     gen_vfp_absd,
8747 };
8748 static const FPScalar1Int f_scalar_ah_fabs = {
8749     gen_vfp_ah_absh,
8750     gen_vfp_ah_abss,
8751     gen_vfp_ah_absd,
8752 };
8753 TRANS(FABS_s, do_fp1_scalar_int_2fn, a, &f_scalar_fabs, &f_scalar_ah_fabs)
8754 
8755 static const FPScalar1Int f_scalar_fneg = {
8756     gen_vfp_negh,
8757     gen_vfp_negs,
8758     gen_vfp_negd,
8759 };
8760 static const FPScalar1Int f_scalar_ah_fneg = {
8761     gen_vfp_ah_negh,
8762     gen_vfp_ah_negs,
8763     gen_vfp_ah_negd,
8764 };
8765 TRANS(FNEG_s, do_fp1_scalar_int_2fn, a, &f_scalar_fneg, &f_scalar_ah_fneg)
8766 
8767 typedef struct FPScalar1 {
8768     void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_ptr);
8769     void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_ptr);
8770     void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_ptr);
8771 } FPScalar1;
8772 
8773 static bool do_fp1_scalar_with_fpsttype(DisasContext *s, arg_rr_e *a,
8774                                         const FPScalar1 *f, int rmode,
8775                                         ARMFPStatusFlavour fpsttype)
8776 {
8777     TCGv_i32 tcg_rmode = NULL;
8778     TCGv_ptr fpst;
8779     TCGv_i64 t64;
8780     TCGv_i32 t32;
8781     int check = fp_access_check_scalar_hsd(s, a->esz);
8782 
8783     if (check <= 0) {
8784         return check == 0;
8785     }
8786 
8787     fpst = fpstatus_ptr(fpsttype);
8788     if (rmode >= 0) {
8789         tcg_rmode = gen_set_rmode(rmode, fpst);
8790     }
8791 
8792     switch (a->esz) {
8793     case MO_64:
8794         t64 = read_fp_dreg(s, a->rn);
8795         f->gen_d(t64, t64, fpst);
8796         write_fp_dreg_merging(s, a->rd, a->rd, t64);
8797         break;
8798     case MO_32:
8799         t32 = read_fp_sreg(s, a->rn);
8800         f->gen_s(t32, t32, fpst);
8801         write_fp_sreg_merging(s, a->rd, a->rd, t32);
8802         break;
8803     case MO_16:
8804         t32 = read_fp_hreg(s, a->rn);
8805         f->gen_h(t32, t32, fpst);
8806         write_fp_hreg_merging(s, a->rd, a->rd, t32);
8807         break;
8808     default:
8809         g_assert_not_reached();
8810     }
8811 
8812     if (rmode >= 0) {
8813         gen_restore_rmode(tcg_rmode, fpst);
8814     }
8815     return true;
8816 }
8817 
8818 static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a,
8819                           const FPScalar1 *f, int rmode)
8820 {
8821     return do_fp1_scalar_with_fpsttype(s, a, f, rmode,
8822                                        a->esz == MO_16 ?
8823                                        FPST_A64_F16 : FPST_A64);
8824 }
8825 
8826 static bool do_fp1_scalar_ah(DisasContext *s, arg_rr_e *a,
8827                              const FPScalar1 *f, int rmode)
8828 {
8829     return do_fp1_scalar_with_fpsttype(s, a, f, rmode, select_ah_fpst(s, a->esz));
8830 }
8831 
8832 static const FPScalar1 f_scalar_fsqrt = {
8833     gen_helper_vfp_sqrth,
8834     gen_helper_vfp_sqrts,
8835     gen_helper_vfp_sqrtd,
8836 };
8837 TRANS(FSQRT_s, do_fp1_scalar, a, &f_scalar_fsqrt, -1)
8838 
8839 static const FPScalar1 f_scalar_frint = {
8840     gen_helper_advsimd_rinth,
8841     gen_helper_rints,
8842     gen_helper_rintd,
8843 };
8844 TRANS(FRINTN_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_TIEEVEN)
8845 TRANS(FRINTP_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_POSINF)
8846 TRANS(FRINTM_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_NEGINF)
8847 TRANS(FRINTZ_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_ZERO)
8848 TRANS(FRINTA_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_TIEAWAY)
8849 TRANS(FRINTI_s, do_fp1_scalar, a, &f_scalar_frint, -1)
8850 
8851 static const FPScalar1 f_scalar_frintx = {
8852     gen_helper_advsimd_rinth_exact,
8853     gen_helper_rints_exact,
8854     gen_helper_rintd_exact,
8855 };
8856 TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1)
8857 
8858 static bool trans_BFCVT_s(DisasContext *s, arg_rr_e *a)
8859 {
8860     ARMFPStatusFlavour fpsttype = s->fpcr_ah ? FPST_AH : FPST_A64;
8861     TCGv_i32 t32;
8862     int check;
8863 
8864     if (!dc_isar_feature(aa64_bf16, s)) {
8865         return false;
8866     }
8867 
8868     check = fp_access_check_scalar_hsd(s, a->esz);
8869 
8870     if (check <= 0) {
8871         return check == 0;
8872     }
8873 
8874     t32 = read_fp_sreg(s, a->rn);
8875     gen_helper_bfcvt(t32, t32, fpstatus_ptr(fpsttype));
8876     write_fp_hreg_merging(s, a->rd, a->rd, t32);
8877     return true;
8878 }
8879 
8880 static const FPScalar1 f_scalar_frint32 = {
8881     NULL,
8882     gen_helper_frint32_s,
8883     gen_helper_frint32_d,
8884 };
8885 TRANS_FEAT(FRINT32Z_s, aa64_frint, do_fp1_scalar, a,
8886            &f_scalar_frint32, FPROUNDING_ZERO)
8887 TRANS_FEAT(FRINT32X_s, aa64_frint, do_fp1_scalar, a, &f_scalar_frint32, -1)
8888 
8889 static const FPScalar1 f_scalar_frint64 = {
8890     NULL,
8891     gen_helper_frint64_s,
8892     gen_helper_frint64_d,
8893 };
8894 TRANS_FEAT(FRINT64Z_s, aa64_frint, do_fp1_scalar, a,
8895            &f_scalar_frint64, FPROUNDING_ZERO)
8896 TRANS_FEAT(FRINT64X_s, aa64_frint, do_fp1_scalar, a, &f_scalar_frint64, -1)
8897 
8898 static const FPScalar1 f_scalar_frecpe = {
8899     gen_helper_recpe_f16,
8900     gen_helper_recpe_f32,
8901     gen_helper_recpe_f64,
8902 };
8903 static const FPScalar1 f_scalar_frecpe_rpres = {
8904     gen_helper_recpe_f16,
8905     gen_helper_recpe_rpres_f32,
8906     gen_helper_recpe_f64,
8907 };
8908 TRANS(FRECPE_s, do_fp1_scalar_ah, a,
8909       s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ?
8910       &f_scalar_frecpe_rpres : &f_scalar_frecpe, -1)
8911 
8912 static const FPScalar1 f_scalar_frecpx = {
8913     gen_helper_frecpx_f16,
8914     gen_helper_frecpx_f32,
8915     gen_helper_frecpx_f64,
8916 };
8917 TRANS(FRECPX_s, do_fp1_scalar_ah, a, &f_scalar_frecpx, -1)
8918 
8919 static const FPScalar1 f_scalar_frsqrte = {
8920     gen_helper_rsqrte_f16,
8921     gen_helper_rsqrte_f32,
8922     gen_helper_rsqrte_f64,
8923 };
8924 static const FPScalar1 f_scalar_frsqrte_rpres = {
8925     gen_helper_rsqrte_f16,
8926     gen_helper_rsqrte_rpres_f32,
8927     gen_helper_rsqrte_f64,
8928 };
8929 TRANS(FRSQRTE_s, do_fp1_scalar_ah, a,
8930       s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ?
8931       &f_scalar_frsqrte_rpres : &f_scalar_frsqrte, -1)
8932 
8933 static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a)
8934 {
8935     if (fp_access_check(s)) {
8936         TCGv_i32 tcg_rn = read_fp_sreg(s, a->rn);
8937         TCGv_i64 tcg_rd = tcg_temp_new_i64();
8938         TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
8939 
8940         gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, fpst);
8941         write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd);
8942     }
8943     return true;
8944 }
8945 
8946 static bool trans_FCVT_s_hs(DisasContext *s, arg_rr *a)
8947 {
8948     if (fp_access_check(s)) {
8949         TCGv_i32 tmp = read_fp_sreg(s, a->rn);
8950         TCGv_i32 ahp = get_ahp_flag();
8951         TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
8952 
8953         gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
8954         /* write_fp_hreg_merging is OK here because top half of result is zero */
8955         write_fp_hreg_merging(s, a->rd, a->rd, tmp);
8956     }
8957     return true;
8958 }
8959 
8960 static bool trans_FCVT_s_sd(DisasContext *s, arg_rr *a)
8961 {
8962     if (fp_access_check(s)) {
8963         TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn);
8964         TCGv_i32 tcg_rd = tcg_temp_new_i32();
8965         TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
8966 
8967         gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, fpst);
8968         write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd);
8969     }
8970     return true;
8971 }
8972 
8973 static bool trans_FCVT_s_hd(DisasContext *s, arg_rr *a)
8974 {
8975     if (fp_access_check(s)) {
8976         TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn);
8977         TCGv_i32 tcg_rd = tcg_temp_new_i32();
8978         TCGv_i32 ahp = get_ahp_flag();
8979         TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
8980 
8981         gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
8982         /* write_fp_hreg_merging is OK here because top half of tcg_rd is zero */
8983         write_fp_hreg_merging(s, a->rd, a->rd, tcg_rd);
8984     }
8985     return true;
8986 }
8987 
8988 static bool trans_FCVT_s_sh(DisasContext *s, arg_rr *a)
8989 {
8990     if (fp_access_check(s)) {
8991         TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn);
8992         TCGv_i32 tcg_rd = tcg_temp_new_i32();
8993         TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16);
8994         TCGv_i32 tcg_ahp = get_ahp_flag();
8995 
8996         gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
8997         write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd);
8998     }
8999     return true;
9000 }
9001 
9002 static bool trans_FCVT_s_dh(DisasContext *s, arg_rr *a)
9003 {
9004     if (fp_access_check(s)) {
9005         TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn);
9006         TCGv_i64 tcg_rd = tcg_temp_new_i64();
9007         TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16);
9008         TCGv_i32 tcg_ahp = get_ahp_flag();
9009 
9010         gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
9011         write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd);
9012     }
9013     return true;
9014 }
9015 
9016 static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift,
9017                            TCGv_i64 tcg_int, bool is_signed)
9018 {
9019     TCGv_ptr tcg_fpstatus;
9020     TCGv_i32 tcg_shift, tcg_single;
9021     TCGv_i64 tcg_double;
9022 
9023     tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
9024     tcg_shift = tcg_constant_i32(shift);
9025 
9026     switch (esz) {
9027     case MO_64:
9028         tcg_double = tcg_temp_new_i64();
9029         if (is_signed) {
9030             gen_helper_vfp_sqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus);
9031         } else {
9032             gen_helper_vfp_uqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus);
9033         }
9034         write_fp_dreg_merging(s, rd, rd, tcg_double);
9035         break;
9036 
9037     case MO_32:
9038         tcg_single = tcg_temp_new_i32();
9039         if (is_signed) {
9040             gen_helper_vfp_sqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
9041         } else {
9042             gen_helper_vfp_uqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
9043         }
9044         write_fp_sreg_merging(s, rd, rd, tcg_single);
9045         break;
9046 
9047     case MO_16:
9048         tcg_single = tcg_temp_new_i32();
9049         if (is_signed) {
9050             gen_helper_vfp_sqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
9051         } else {
9052             gen_helper_vfp_uqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
9053         }
9054         write_fp_hreg_merging(s, rd, rd, tcg_single);
9055         break;
9056 
9057     default:
9058         g_assert_not_reached();
9059     }
9060     return true;
9061 }
9062 
9063 static bool do_cvtf_g(DisasContext *s, arg_fcvt *a, bool is_signed)
9064 {
9065     TCGv_i64 tcg_int;
9066     int check = fp_access_check_scalar_hsd(s, a->esz);
9067 
9068     if (check <= 0) {
9069         return check == 0;
9070     }
9071 
9072     if (a->sf) {
9073         tcg_int = cpu_reg(s, a->rn);
9074     } else {
9075         tcg_int = read_cpu_reg(s, a->rn, true);
9076         if (is_signed) {
9077             tcg_gen_ext32s_i64(tcg_int, tcg_int);
9078         } else {
9079             tcg_gen_ext32u_i64(tcg_int, tcg_int);
9080         }
9081     }
9082     return do_cvtf_scalar(s, a->esz, a->rd, a->shift, tcg_int, is_signed);
9083 }
9084 
9085 TRANS(SCVTF_g, do_cvtf_g, a, true)
9086 TRANS(UCVTF_g, do_cvtf_g, a, false)
9087 
9088 /*
9089  * [US]CVTF (vector), scalar version.
9090  * Which sounds weird, but really just means input from fp register
9091  * instead of input from general register.  Input and output element
9092  * size are always equal.
9093  */
9094 static bool do_cvtf_f(DisasContext *s, arg_fcvt *a, bool is_signed)
9095 {
9096     TCGv_i64 tcg_int;
9097     int check = fp_access_check_scalar_hsd(s, a->esz);
9098 
9099     if (check <= 0) {
9100         return check == 0;
9101     }
9102 
9103     tcg_int = tcg_temp_new_i64();
9104     read_vec_element(s, tcg_int, a->rn, 0, a->esz | (is_signed ? MO_SIGN : 0));
9105     return do_cvtf_scalar(s, a->esz, a->rd, a->shift, tcg_int, is_signed);
9106 }
9107 
9108 TRANS(SCVTF_f, do_cvtf_f, a, true)
9109 TRANS(UCVTF_f, do_cvtf_f, a, false)
9110 
9111 static void do_fcvt_scalar(DisasContext *s, MemOp out, MemOp esz,
9112                            TCGv_i64 tcg_out, int shift, int rn,
9113                            ARMFPRounding rmode)
9114 {
9115     TCGv_ptr tcg_fpstatus;
9116     TCGv_i32 tcg_shift, tcg_rmode, tcg_single;
9117 
9118     tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
9119     tcg_shift = tcg_constant_i32(shift);
9120     tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
9121 
9122     switch (esz) {
9123     case MO_64:
9124         read_vec_element(s, tcg_out, rn, 0, MO_64);
9125         switch (out) {
9126         case MO_64 | MO_SIGN:
9127             gen_helper_vfp_tosqd(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
9128             break;
9129         case MO_64:
9130             gen_helper_vfp_touqd(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
9131             break;
9132         case MO_32 | MO_SIGN:
9133             gen_helper_vfp_tosld(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
9134             break;
9135         case MO_32:
9136             gen_helper_vfp_tould(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
9137             break;
9138         default:
9139             g_assert_not_reached();
9140         }
9141         break;
9142 
9143     case MO_32:
9144         tcg_single = read_fp_sreg(s, rn);
9145         switch (out) {
9146         case MO_64 | MO_SIGN:
9147             gen_helper_vfp_tosqs(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
9148             break;
9149         case MO_64:
9150             gen_helper_vfp_touqs(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
9151             break;
9152         case MO_32 | MO_SIGN:
9153             gen_helper_vfp_tosls(tcg_single, tcg_single,
9154                                  tcg_shift, tcg_fpstatus);
9155             tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9156             break;
9157         case MO_32:
9158             gen_helper_vfp_touls(tcg_single, tcg_single,
9159                                  tcg_shift, tcg_fpstatus);
9160             tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9161             break;
9162         default:
9163             g_assert_not_reached();
9164         }
9165         break;
9166 
9167     case MO_16:
9168         tcg_single = read_fp_hreg(s, rn);
9169         switch (out) {
9170         case MO_64 | MO_SIGN:
9171             gen_helper_vfp_tosqh(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
9172             break;
9173         case MO_64:
9174             gen_helper_vfp_touqh(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
9175             break;
9176         case MO_32 | MO_SIGN:
9177             gen_helper_vfp_toslh(tcg_single, tcg_single,
9178                                  tcg_shift, tcg_fpstatus);
9179             tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9180             break;
9181         case MO_32:
9182             gen_helper_vfp_toulh(tcg_single, tcg_single,
9183                                  tcg_shift, tcg_fpstatus);
9184             tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9185             break;
9186         case MO_16 | MO_SIGN:
9187             gen_helper_vfp_toshh(tcg_single, tcg_single,
9188                                  tcg_shift, tcg_fpstatus);
9189             tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9190             break;
9191         case MO_16:
9192             gen_helper_vfp_touhh(tcg_single, tcg_single,
9193                                  tcg_shift, tcg_fpstatus);
9194             tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9195             break;
9196         default:
9197             g_assert_not_reached();
9198         }
9199         break;
9200 
9201     default:
9202         g_assert_not_reached();
9203     }
9204 
9205     gen_restore_rmode(tcg_rmode, tcg_fpstatus);
9206 }
9207 
9208 static bool do_fcvt_g(DisasContext *s, arg_fcvt *a,
9209                       ARMFPRounding rmode, bool is_signed)
9210 {
9211     TCGv_i64 tcg_int;
9212     int check = fp_access_check_scalar_hsd(s, a->esz);
9213 
9214     if (check <= 0) {
9215         return check == 0;
9216     }
9217 
9218     tcg_int = cpu_reg(s, a->rd);
9219     do_fcvt_scalar(s, (a->sf ? MO_64 : MO_32) | (is_signed ? MO_SIGN : 0),
9220                    a->esz, tcg_int, a->shift, a->rn, rmode);
9221 
9222     if (!a->sf) {
9223         tcg_gen_ext32u_i64(tcg_int, tcg_int);
9224     }
9225     return true;
9226 }
9227 
9228 TRANS(FCVTNS_g, do_fcvt_g, a, FPROUNDING_TIEEVEN, true)
9229 TRANS(FCVTNU_g, do_fcvt_g, a, FPROUNDING_TIEEVEN, false)
9230 TRANS(FCVTPS_g, do_fcvt_g, a, FPROUNDING_POSINF, true)
9231 TRANS(FCVTPU_g, do_fcvt_g, a, FPROUNDING_POSINF, false)
9232 TRANS(FCVTMS_g, do_fcvt_g, a, FPROUNDING_NEGINF, true)
9233 TRANS(FCVTMU_g, do_fcvt_g, a, FPROUNDING_NEGINF, false)
9234 TRANS(FCVTZS_g, do_fcvt_g, a, FPROUNDING_ZERO, true)
9235 TRANS(FCVTZU_g, do_fcvt_g, a, FPROUNDING_ZERO, false)
9236 TRANS(FCVTAS_g, do_fcvt_g, a, FPROUNDING_TIEAWAY, true)
9237 TRANS(FCVTAU_g, do_fcvt_g, a, FPROUNDING_TIEAWAY, false)
9238 
9239 /*
9240  * FCVT* (vector), scalar version.
9241  * Which sounds weird, but really just means output to fp register
9242  * instead of output to general register.  Input and output element
9243  * size are always equal.
9244  */
9245 static bool do_fcvt_f(DisasContext *s, arg_fcvt *a,
9246                       ARMFPRounding rmode, bool is_signed)
9247 {
9248     TCGv_i64 tcg_int;
9249     int check = fp_access_check_scalar_hsd(s, a->esz);
9250 
9251     if (check <= 0) {
9252         return check == 0;
9253     }
9254 
9255     tcg_int = tcg_temp_new_i64();
9256     do_fcvt_scalar(s, a->esz | (is_signed ? MO_SIGN : 0),
9257                    a->esz, tcg_int, a->shift, a->rn, rmode);
9258 
9259     if (!s->fpcr_nep) {
9260         clear_vec(s, a->rd);
9261     }
9262     write_vec_element(s, tcg_int, a->rd, 0, a->esz);
9263     return true;
9264 }
9265 
9266 TRANS(FCVTNS_f, do_fcvt_f, a, FPROUNDING_TIEEVEN, true)
9267 TRANS(FCVTNU_f, do_fcvt_f, a, FPROUNDING_TIEEVEN, false)
9268 TRANS(FCVTPS_f, do_fcvt_f, a, FPROUNDING_POSINF, true)
9269 TRANS(FCVTPU_f, do_fcvt_f, a, FPROUNDING_POSINF, false)
9270 TRANS(FCVTMS_f, do_fcvt_f, a, FPROUNDING_NEGINF, true)
9271 TRANS(FCVTMU_f, do_fcvt_f, a, FPROUNDING_NEGINF, false)
9272 TRANS(FCVTZS_f, do_fcvt_f, a, FPROUNDING_ZERO, true)
9273 TRANS(FCVTZU_f, do_fcvt_f, a, FPROUNDING_ZERO, false)
9274 TRANS(FCVTAS_f, do_fcvt_f, a, FPROUNDING_TIEAWAY, true)
9275 TRANS(FCVTAU_f, do_fcvt_f, a, FPROUNDING_TIEAWAY, false)
9276 
9277 static bool trans_FJCVTZS(DisasContext *s, arg_FJCVTZS *a)
9278 {
9279     if (!dc_isar_feature(aa64_jscvt, s)) {
9280         return false;
9281     }
9282     if (fp_access_check(s)) {
9283         TCGv_i64 t = read_fp_dreg(s, a->rn);
9284         TCGv_ptr fpstatus = fpstatus_ptr(FPST_A64);
9285 
9286         gen_helper_fjcvtzs(t, t, fpstatus);
9287 
9288         tcg_gen_ext32u_i64(cpu_reg(s, a->rd), t);
9289         tcg_gen_extrh_i64_i32(cpu_ZF, t);
9290         tcg_gen_movi_i32(cpu_CF, 0);
9291         tcg_gen_movi_i32(cpu_NF, 0);
9292         tcg_gen_movi_i32(cpu_VF, 0);
9293     }
9294     return true;
9295 }
9296 
9297 static bool trans_FMOV_hx(DisasContext *s, arg_rr *a)
9298 {
9299     if (!dc_isar_feature(aa64_fp16, s)) {
9300         return false;
9301     }
9302     if (fp_access_check(s)) {
9303         TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
9304         TCGv_i64 tmp = tcg_temp_new_i64();
9305         tcg_gen_ext16u_i64(tmp, tcg_rn);
9306         write_fp_dreg(s, a->rd, tmp);
9307     }
9308     return true;
9309 }
9310 
9311 static bool trans_FMOV_sw(DisasContext *s, arg_rr *a)
9312 {
9313     if (fp_access_check(s)) {
9314         TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
9315         TCGv_i64 tmp = tcg_temp_new_i64();
9316         tcg_gen_ext32u_i64(tmp, tcg_rn);
9317         write_fp_dreg(s, a->rd, tmp);
9318     }
9319     return true;
9320 }
9321 
9322 static bool trans_FMOV_dx(DisasContext *s, arg_rr *a)
9323 {
9324     if (fp_access_check(s)) {
9325         TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
9326         write_fp_dreg(s, a->rd, tcg_rn);
9327     }
9328     return true;
9329 }
9330 
9331 static bool trans_FMOV_ux(DisasContext *s, arg_rr *a)
9332 {
9333     if (fp_access_check(s)) {
9334         TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
9335         tcg_gen_st_i64(tcg_rn, tcg_env, fp_reg_hi_offset(s, a->rd));
9336         clear_vec_high(s, true, a->rd);
9337     }
9338     return true;
9339 }
9340 
9341 static bool trans_FMOV_xh(DisasContext *s, arg_rr *a)
9342 {
9343     if (!dc_isar_feature(aa64_fp16, s)) {
9344         return false;
9345     }
9346     if (fp_access_check(s)) {
9347         TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
9348         tcg_gen_ld16u_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_16));
9349     }
9350     return true;
9351 }
9352 
9353 static bool trans_FMOV_ws(DisasContext *s, arg_rr *a)
9354 {
9355     if (fp_access_check(s)) {
9356         TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
9357         tcg_gen_ld32u_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_32));
9358     }
9359     return true;
9360 }
9361 
9362 static bool trans_FMOV_xd(DisasContext *s, arg_rr *a)
9363 {
9364     if (fp_access_check(s)) {
9365         TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
9366         tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_64));
9367     }
9368     return true;
9369 }
9370 
9371 static bool trans_FMOV_xu(DisasContext *s, arg_rr *a)
9372 {
9373     if (fp_access_check(s)) {
9374         TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
9375         tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_hi_offset(s, a->rn));
9376     }
9377     return true;
9378 }
9379 
9380 typedef struct ENVScalar1 {
9381     NeonGenOneOpEnvFn *gen_bhs[3];
9382     NeonGenOne64OpEnvFn *gen_d;
9383 } ENVScalar1;
9384 
9385 static bool do_env_scalar1(DisasContext *s, arg_rr_e *a, const ENVScalar1 *f)
9386 {
9387     if (!fp_access_check(s)) {
9388         return true;
9389     }
9390     if (a->esz == MO_64) {
9391         TCGv_i64 t = read_fp_dreg(s, a->rn);
9392         f->gen_d(t, tcg_env, t);
9393         write_fp_dreg(s, a->rd, t);
9394     } else {
9395         TCGv_i32 t = tcg_temp_new_i32();
9396 
9397         read_vec_element_i32(s, t, a->rn, 0, a->esz);
9398         f->gen_bhs[a->esz](t, tcg_env, t);
9399         write_fp_sreg(s, a->rd, t);
9400     }
9401     return true;
9402 }
9403 
9404 static bool do_env_vector1(DisasContext *s, arg_qrr_e *a, const ENVScalar1 *f)
9405 {
9406     if (a->esz == MO_64 && !a->q) {
9407         return false;
9408     }
9409     if (!fp_access_check(s)) {
9410         return true;
9411     }
9412     if (a->esz == MO_64) {
9413         TCGv_i64 t = tcg_temp_new_i64();
9414 
9415         for (int i = 0; i < 2; ++i) {
9416             read_vec_element(s, t, a->rn, i, MO_64);
9417             f->gen_d(t, tcg_env, t);
9418             write_vec_element(s, t, a->rd, i, MO_64);
9419         }
9420     } else {
9421         TCGv_i32 t = tcg_temp_new_i32();
9422         int n = (a->q ? 16 : 8) >> a->esz;
9423 
9424         for (int i = 0; i < n; ++i) {
9425             read_vec_element_i32(s, t, a->rn, i, a->esz);
9426             f->gen_bhs[a->esz](t, tcg_env, t);
9427             write_vec_element_i32(s, t, a->rd, i, a->esz);
9428         }
9429     }
9430     clear_vec_high(s, a->q, a->rd);
9431     return true;
9432 }
9433 
9434 static const ENVScalar1 f_scalar_sqabs = {
9435     { gen_helper_neon_qabs_s8,
9436       gen_helper_neon_qabs_s16,
9437       gen_helper_neon_qabs_s32 },
9438     gen_helper_neon_qabs_s64,
9439 };
9440 TRANS(SQABS_s, do_env_scalar1, a, &f_scalar_sqabs)
9441 TRANS(SQABS_v, do_env_vector1, a, &f_scalar_sqabs)
9442 
9443 static const ENVScalar1 f_scalar_sqneg = {
9444     { gen_helper_neon_qneg_s8,
9445       gen_helper_neon_qneg_s16,
9446       gen_helper_neon_qneg_s32 },
9447     gen_helper_neon_qneg_s64,
9448 };
9449 TRANS(SQNEG_s, do_env_scalar1, a, &f_scalar_sqneg)
9450 TRANS(SQNEG_v, do_env_vector1, a, &f_scalar_sqneg)
9451 
9452 static bool do_scalar1_d(DisasContext *s, arg_rr *a, ArithOneOp *f)
9453 {
9454     if (fp_access_check(s)) {
9455         TCGv_i64 t = read_fp_dreg(s, a->rn);
9456         f(t, t);
9457         write_fp_dreg(s, a->rd, t);
9458     }
9459     return true;
9460 }
9461 
9462 TRANS(ABS_s, do_scalar1_d, a, tcg_gen_abs_i64)
9463 TRANS(NEG_s, do_scalar1_d, a, tcg_gen_neg_i64)
9464 
9465 static bool do_cmop0_d(DisasContext *s, arg_rr *a, TCGCond cond)
9466 {
9467     if (fp_access_check(s)) {
9468         TCGv_i64 t = read_fp_dreg(s, a->rn);
9469         tcg_gen_negsetcond_i64(cond, t, t, tcg_constant_i64(0));
9470         write_fp_dreg(s, a->rd, t);
9471     }
9472     return true;
9473 }
9474 
9475 TRANS(CMGT0_s, do_cmop0_d, a, TCG_COND_GT)
9476 TRANS(CMGE0_s, do_cmop0_d, a, TCG_COND_GE)
9477 TRANS(CMLE0_s, do_cmop0_d, a, TCG_COND_LE)
9478 TRANS(CMLT0_s, do_cmop0_d, a, TCG_COND_LT)
9479 TRANS(CMEQ0_s, do_cmop0_d, a, TCG_COND_EQ)
9480 
9481 static bool do_2misc_narrow_scalar(DisasContext *s, arg_rr_e *a,
9482                                    ArithOneOp * const fn[3])
9483 {
9484     if (a->esz == MO_64) {
9485         return false;
9486     }
9487     if (fp_access_check(s)) {
9488         TCGv_i64 t = tcg_temp_new_i64();
9489 
9490         read_vec_element(s, t, a->rn, 0, a->esz + 1);
9491         fn[a->esz](t, t);
9492         clear_vec(s, a->rd);
9493         write_vec_element(s, t, a->rd, 0, a->esz);
9494     }
9495     return true;
9496 }
9497 
9498 #define WRAP_ENV(NAME) \
9499     static void gen_##NAME(TCGv_i64 d, TCGv_i64 n) \
9500     { gen_helper_##NAME(d, tcg_env, n); }
9501 
9502 WRAP_ENV(neon_unarrow_sat8)
9503 WRAP_ENV(neon_unarrow_sat16)
9504 WRAP_ENV(neon_unarrow_sat32)
9505 
9506 static ArithOneOp * const f_scalar_sqxtun[] = {
9507     gen_neon_unarrow_sat8,
9508     gen_neon_unarrow_sat16,
9509     gen_neon_unarrow_sat32,
9510 };
9511 TRANS(SQXTUN_s, do_2misc_narrow_scalar, a, f_scalar_sqxtun)
9512 
9513 WRAP_ENV(neon_narrow_sat_s8)
9514 WRAP_ENV(neon_narrow_sat_s16)
9515 WRAP_ENV(neon_narrow_sat_s32)
9516 
9517 static ArithOneOp * const f_scalar_sqxtn[] = {
9518     gen_neon_narrow_sat_s8,
9519     gen_neon_narrow_sat_s16,
9520     gen_neon_narrow_sat_s32,
9521 };
9522 TRANS(SQXTN_s, do_2misc_narrow_scalar, a, f_scalar_sqxtn)
9523 
9524 WRAP_ENV(neon_narrow_sat_u8)
9525 WRAP_ENV(neon_narrow_sat_u16)
9526 WRAP_ENV(neon_narrow_sat_u32)
9527 
9528 static ArithOneOp * const f_scalar_uqxtn[] = {
9529     gen_neon_narrow_sat_u8,
9530     gen_neon_narrow_sat_u16,
9531     gen_neon_narrow_sat_u32,
9532 };
9533 TRANS(UQXTN_s, do_2misc_narrow_scalar, a, f_scalar_uqxtn)
9534 
9535 static bool trans_FCVTXN_s(DisasContext *s, arg_rr_e *a)
9536 {
9537     if (fp_access_check(s)) {
9538         /*
9539          * 64 bit to 32 bit float conversion
9540          * with von Neumann rounding (round to odd)
9541          */
9542         TCGv_i64 src = read_fp_dreg(s, a->rn);
9543         TCGv_i32 dst = tcg_temp_new_i32();
9544         gen_helper_fcvtx_f64_to_f32(dst, src, fpstatus_ptr(FPST_A64));
9545         write_fp_sreg_merging(s, a->rd, a->rd, dst);
9546     }
9547     return true;
9548 }
9549 
9550 #undef WRAP_ENV
9551 
9552 static bool do_gvec_fn2(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn)
9553 {
9554     if (!a->q && a->esz == MO_64) {
9555         return false;
9556     }
9557     if (fp_access_check(s)) {
9558         gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz);
9559     }
9560     return true;
9561 }
9562 
9563 TRANS(ABS_v, do_gvec_fn2, a, tcg_gen_gvec_abs)
9564 TRANS(NEG_v, do_gvec_fn2, a, tcg_gen_gvec_neg)
9565 TRANS(NOT_v, do_gvec_fn2, a, tcg_gen_gvec_not)
9566 TRANS(CNT_v, do_gvec_fn2, a, gen_gvec_cnt)
9567 TRANS(RBIT_v, do_gvec_fn2, a, gen_gvec_rbit)
9568 TRANS(CMGT0_v, do_gvec_fn2, a, gen_gvec_cgt0)
9569 TRANS(CMGE0_v, do_gvec_fn2, a, gen_gvec_cge0)
9570 TRANS(CMLT0_v, do_gvec_fn2, a, gen_gvec_clt0)
9571 TRANS(CMLE0_v, do_gvec_fn2, a, gen_gvec_cle0)
9572 TRANS(CMEQ0_v, do_gvec_fn2, a, gen_gvec_ceq0)
9573 TRANS(REV16_v, do_gvec_fn2, a, gen_gvec_rev16)
9574 TRANS(REV32_v, do_gvec_fn2, a, gen_gvec_rev32)
9575 TRANS(URECPE_v, do_gvec_fn2, a, gen_gvec_urecpe)
9576 TRANS(URSQRTE_v, do_gvec_fn2, a, gen_gvec_ursqrte)
9577 
9578 static bool do_gvec_fn2_bhs(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn)
9579 {
9580     if (a->esz == MO_64) {
9581         return false;
9582     }
9583     if (fp_access_check(s)) {
9584         gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz);
9585     }
9586     return true;
9587 }
9588 
9589 TRANS(CLS_v, do_gvec_fn2_bhs, a, gen_gvec_cls)
9590 TRANS(CLZ_v, do_gvec_fn2_bhs, a, gen_gvec_clz)
9591 TRANS(REV64_v, do_gvec_fn2_bhs, a, gen_gvec_rev64)
9592 TRANS(SADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_saddlp)
9593 TRANS(UADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_uaddlp)
9594 TRANS(SADALP_v, do_gvec_fn2_bhs, a, gen_gvec_sadalp)
9595 TRANS(UADALP_v, do_gvec_fn2_bhs, a, gen_gvec_uadalp)
9596 
9597 static bool do_2misc_narrow_vector(DisasContext *s, arg_qrr_e *a,
9598                                    ArithOneOp * const fn[3])
9599 {
9600     if (a->esz == MO_64) {
9601         return false;
9602     }
9603     if (fp_access_check(s)) {
9604         TCGv_i64 t0 = tcg_temp_new_i64();
9605         TCGv_i64 t1 = tcg_temp_new_i64();
9606 
9607         read_vec_element(s, t0, a->rn, 0, MO_64);
9608         read_vec_element(s, t1, a->rn, 1, MO_64);
9609         fn[a->esz](t0, t0);
9610         fn[a->esz](t1, t1);
9611         write_vec_element(s, t0, a->rd, a->q ? 2 : 0, MO_32);
9612         write_vec_element(s, t1, a->rd, a->q ? 3 : 1, MO_32);
9613         clear_vec_high(s, a->q, a->rd);
9614     }
9615     return true;
9616 }
9617 
9618 static ArithOneOp * const f_scalar_xtn[] = {
9619     gen_helper_neon_narrow_u8,
9620     gen_helper_neon_narrow_u16,
9621     tcg_gen_ext32u_i64,
9622 };
9623 TRANS(XTN, do_2misc_narrow_vector, a, f_scalar_xtn)
9624 TRANS(SQXTUN_v, do_2misc_narrow_vector, a, f_scalar_sqxtun)
9625 TRANS(SQXTN_v, do_2misc_narrow_vector, a, f_scalar_sqxtn)
9626 TRANS(UQXTN_v, do_2misc_narrow_vector, a, f_scalar_uqxtn)
9627 
9628 static void gen_fcvtn_hs(TCGv_i64 d, TCGv_i64 n)
9629 {
9630     TCGv_i32 tcg_lo = tcg_temp_new_i32();
9631     TCGv_i32 tcg_hi = tcg_temp_new_i32();
9632     TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
9633     TCGv_i32 ahp = get_ahp_flag();
9634 
9635     tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, n);
9636     gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp);
9637     gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp);
9638     tcg_gen_deposit_i32(tcg_lo, tcg_lo, tcg_hi, 16, 16);
9639     tcg_gen_extu_i32_i64(d, tcg_lo);
9640 }
9641 
9642 static void gen_fcvtn_sd(TCGv_i64 d, TCGv_i64 n)
9643 {
9644     TCGv_i32 tmp = tcg_temp_new_i32();
9645     TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
9646 
9647     gen_helper_vfp_fcvtsd(tmp, n, fpst);
9648     tcg_gen_extu_i32_i64(d, tmp);
9649 }
9650 
9651 static void gen_fcvtxn_sd(TCGv_i64 d, TCGv_i64 n)
9652 {
9653     /*
9654      * 64 bit to 32 bit float conversion
9655      * with von Neumann rounding (round to odd)
9656      */
9657     TCGv_i32 tmp = tcg_temp_new_i32();
9658     gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_A64));
9659     tcg_gen_extu_i32_i64(d, tmp);
9660 }
9661 
9662 static ArithOneOp * const f_vector_fcvtn[] = {
9663     NULL,
9664     gen_fcvtn_hs,
9665     gen_fcvtn_sd,
9666 };
9667 static ArithOneOp * const f_scalar_fcvtxn[] = {
9668     NULL,
9669     NULL,
9670     gen_fcvtxn_sd,
9671 };
9672 TRANS(FCVTN_v, do_2misc_narrow_vector, a, f_vector_fcvtn)
9673 TRANS(FCVTXN_v, do_2misc_narrow_vector, a, f_scalar_fcvtxn)
9674 
9675 static void gen_bfcvtn_hs(TCGv_i64 d, TCGv_i64 n)
9676 {
9677     TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
9678     TCGv_i32 tmp = tcg_temp_new_i32();
9679     gen_helper_bfcvt_pair(tmp, n, fpst);
9680     tcg_gen_extu_i32_i64(d, tmp);
9681 }
9682 
9683 static void gen_bfcvtn_ah_hs(TCGv_i64 d, TCGv_i64 n)
9684 {
9685     TCGv_ptr fpst = fpstatus_ptr(FPST_AH);
9686     TCGv_i32 tmp = tcg_temp_new_i32();
9687     gen_helper_bfcvt_pair(tmp, n, fpst);
9688     tcg_gen_extu_i32_i64(d, tmp);
9689 }
9690 
9691 static ArithOneOp * const f_vector_bfcvtn[2][3] = {
9692     {
9693         NULL,
9694         gen_bfcvtn_hs,
9695         NULL,
9696     }, {
9697         NULL,
9698         gen_bfcvtn_ah_hs,
9699         NULL,
9700     }
9701 };
9702 TRANS_FEAT(BFCVTN_v, aa64_bf16, do_2misc_narrow_vector, a,
9703            f_vector_bfcvtn[s->fpcr_ah])
9704 
9705 static bool trans_SHLL_v(DisasContext *s, arg_qrr_e *a)
9706 {
9707     static NeonGenWidenFn * const widenfns[3] = {
9708         gen_helper_neon_widen_u8,
9709         gen_helper_neon_widen_u16,
9710         tcg_gen_extu_i32_i64,
9711     };
9712     NeonGenWidenFn *widenfn;
9713     TCGv_i64 tcg_res[2];
9714     TCGv_i32 tcg_op;
9715     int part, pass;
9716 
9717     if (a->esz == MO_64) {
9718         return false;
9719     }
9720     if (!fp_access_check(s)) {
9721         return true;
9722     }
9723 
9724     tcg_op = tcg_temp_new_i32();
9725     widenfn = widenfns[a->esz];
9726     part = a->q ? 2 : 0;
9727 
9728     for (pass = 0; pass < 2; pass++) {
9729         read_vec_element_i32(s, tcg_op, a->rn, part + pass, MO_32);
9730         tcg_res[pass] = tcg_temp_new_i64();
9731         widenfn(tcg_res[pass], tcg_op);
9732         tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << a->esz);
9733     }
9734 
9735     for (pass = 0; pass < 2; pass++) {
9736         write_vec_element(s, tcg_res[pass], a->rd, pass, MO_64);
9737     }
9738     return true;
9739 }
9740 
9741 static bool do_fabs_fneg_v(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn)
9742 {
9743     int check = fp_access_check_vector_hsd(s, a->q, a->esz);
9744 
9745     if (check <= 0) {
9746         return check == 0;
9747     }
9748 
9749     gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz);
9750     return true;
9751 }
9752 
9753 TRANS(FABS_v, do_fabs_fneg_v, a, gen_gvec_fabs)
9754 TRANS(FNEG_v, do_fabs_fneg_v, a, gen_gvec_fneg)
9755 
9756 static bool do_fp1_vector(DisasContext *s, arg_qrr_e *a,
9757                           const FPScalar1 *f, int rmode)
9758 {
9759     TCGv_i32 tcg_rmode = NULL;
9760     TCGv_ptr fpst;
9761     int check = fp_access_check_vector_hsd(s, a->q, a->esz);
9762 
9763     if (check <= 0) {
9764         return check == 0;
9765     }
9766 
9767     fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
9768     if (rmode >= 0) {
9769         tcg_rmode = gen_set_rmode(rmode, fpst);
9770     }
9771 
9772     if (a->esz == MO_64) {
9773         TCGv_i64 t64 = tcg_temp_new_i64();
9774 
9775         for (int pass = 0; pass < 2; ++pass) {
9776             read_vec_element(s, t64, a->rn, pass, MO_64);
9777             f->gen_d(t64, t64, fpst);
9778             write_vec_element(s, t64, a->rd, pass, MO_64);
9779         }
9780     } else {
9781         TCGv_i32 t32 = tcg_temp_new_i32();
9782         void (*gen)(TCGv_i32, TCGv_i32, TCGv_ptr)
9783             = (a->esz == MO_16 ? f->gen_h : f->gen_s);
9784 
9785         for (int pass = 0, n = (a->q ? 16 : 8) >> a->esz; pass < n; ++pass) {
9786             read_vec_element_i32(s, t32, a->rn, pass, a->esz);
9787             gen(t32, t32, fpst);
9788             write_vec_element_i32(s, t32, a->rd, pass, a->esz);
9789         }
9790     }
9791     clear_vec_high(s, a->q, a->rd);
9792 
9793     if (rmode >= 0) {
9794         gen_restore_rmode(tcg_rmode, fpst);
9795     }
9796     return true;
9797 }
9798 
9799 TRANS(FSQRT_v, do_fp1_vector, a, &f_scalar_fsqrt, -1)
9800 
9801 TRANS(FRINTN_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_TIEEVEN)
9802 TRANS(FRINTP_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_POSINF)
9803 TRANS(FRINTM_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_NEGINF)
9804 TRANS(FRINTZ_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_ZERO)
9805 TRANS(FRINTA_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_TIEAWAY)
9806 TRANS(FRINTI_v, do_fp1_vector, a, &f_scalar_frint, -1)
9807 TRANS(FRINTX_v, do_fp1_vector, a, &f_scalar_frintx, -1)
9808 
9809 TRANS_FEAT(FRINT32Z_v, aa64_frint, do_fp1_vector, a,
9810            &f_scalar_frint32, FPROUNDING_ZERO)
9811 TRANS_FEAT(FRINT32X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint32, -1)
9812 TRANS_FEAT(FRINT64Z_v, aa64_frint, do_fp1_vector, a,
9813            &f_scalar_frint64, FPROUNDING_ZERO)
9814 TRANS_FEAT(FRINT64X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint64, -1)
9815 
9816 static bool do_gvec_op2_fpst_with_fpsttype(DisasContext *s, MemOp esz,
9817                                            bool is_q, int rd, int rn, int data,
9818                                            gen_helper_gvec_2_ptr * const fns[3],
9819                                            ARMFPStatusFlavour fpsttype)
9820 {
9821     int check = fp_access_check_vector_hsd(s, is_q, esz);
9822     TCGv_ptr fpst;
9823 
9824     if (check <= 0) {
9825         return check == 0;
9826     }
9827 
9828     fpst = fpstatus_ptr(fpsttype);
9829     tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
9830                        vec_full_reg_offset(s, rn), fpst,
9831                        is_q ? 16 : 8, vec_full_reg_size(s),
9832                        data, fns[esz - 1]);
9833     return true;
9834 }
9835 
9836 static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q,
9837                              int rd, int rn, int data,
9838                              gen_helper_gvec_2_ptr * const fns[3])
9839 {
9840     return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data, fns,
9841                                           esz == MO_16 ? FPST_A64_F16 :
9842                                           FPST_A64);
9843 }
9844 
9845 static bool do_gvec_op2_ah_fpst(DisasContext *s, MemOp esz, bool is_q,
9846                                 int rd, int rn, int data,
9847                                 gen_helper_gvec_2_ptr * const fns[3])
9848 {
9849     return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data,
9850                                           fns, select_ah_fpst(s, esz));
9851 }
9852 
9853 static gen_helper_gvec_2_ptr * const f_scvtf_v[] = {
9854     gen_helper_gvec_vcvt_sh,
9855     gen_helper_gvec_vcvt_sf,
9856     gen_helper_gvec_vcvt_sd,
9857 };
9858 TRANS(SCVTF_vi, do_gvec_op2_fpst,
9859       a->esz, a->q, a->rd, a->rn, 0, f_scvtf_v)
9860 TRANS(SCVTF_vf, do_gvec_op2_fpst,
9861       a->esz, a->q, a->rd, a->rn, a->shift, f_scvtf_v)
9862 
9863 static gen_helper_gvec_2_ptr * const f_ucvtf_v[] = {
9864     gen_helper_gvec_vcvt_uh,
9865     gen_helper_gvec_vcvt_uf,
9866     gen_helper_gvec_vcvt_ud,
9867 };
9868 TRANS(UCVTF_vi, do_gvec_op2_fpst,
9869       a->esz, a->q, a->rd, a->rn, 0, f_ucvtf_v)
9870 TRANS(UCVTF_vf, do_gvec_op2_fpst,
9871       a->esz, a->q, a->rd, a->rn, a->shift, f_ucvtf_v)
9872 
9873 static gen_helper_gvec_2_ptr * const f_fcvtzs_vf[] = {
9874     gen_helper_gvec_vcvt_rz_hs,
9875     gen_helper_gvec_vcvt_rz_fs,
9876     gen_helper_gvec_vcvt_rz_ds,
9877 };
9878 TRANS(FCVTZS_vf, do_gvec_op2_fpst,
9879       a->esz, a->q, a->rd, a->rn, a->shift, f_fcvtzs_vf)
9880 
9881 static gen_helper_gvec_2_ptr * const f_fcvtzu_vf[] = {
9882     gen_helper_gvec_vcvt_rz_hu,
9883     gen_helper_gvec_vcvt_rz_fu,
9884     gen_helper_gvec_vcvt_rz_du,
9885 };
9886 TRANS(FCVTZU_vf, do_gvec_op2_fpst,
9887       a->esz, a->q, a->rd, a->rn, a->shift, f_fcvtzu_vf)
9888 
9889 static gen_helper_gvec_2_ptr * const f_fcvt_s_vi[] = {
9890     gen_helper_gvec_vcvt_rm_sh,
9891     gen_helper_gvec_vcvt_rm_ss,
9892     gen_helper_gvec_vcvt_rm_sd,
9893 };
9894 
9895 static gen_helper_gvec_2_ptr * const f_fcvt_u_vi[] = {
9896     gen_helper_gvec_vcvt_rm_uh,
9897     gen_helper_gvec_vcvt_rm_us,
9898     gen_helper_gvec_vcvt_rm_ud,
9899 };
9900 
9901 TRANS(FCVTNS_vi, do_gvec_op2_fpst,
9902       a->esz, a->q, a->rd, a->rn, float_round_nearest_even, f_fcvt_s_vi)
9903 TRANS(FCVTNU_vi, do_gvec_op2_fpst,
9904       a->esz, a->q, a->rd, a->rn, float_round_nearest_even, f_fcvt_u_vi)
9905 TRANS(FCVTPS_vi, do_gvec_op2_fpst,
9906       a->esz, a->q, a->rd, a->rn, float_round_up, f_fcvt_s_vi)
9907 TRANS(FCVTPU_vi, do_gvec_op2_fpst,
9908       a->esz, a->q, a->rd, a->rn, float_round_up, f_fcvt_u_vi)
9909 TRANS(FCVTMS_vi, do_gvec_op2_fpst,
9910       a->esz, a->q, a->rd, a->rn, float_round_down, f_fcvt_s_vi)
9911 TRANS(FCVTMU_vi, do_gvec_op2_fpst,
9912       a->esz, a->q, a->rd, a->rn, float_round_down, f_fcvt_u_vi)
9913 TRANS(FCVTZS_vi, do_gvec_op2_fpst,
9914       a->esz, a->q, a->rd, a->rn, float_round_to_zero, f_fcvt_s_vi)
9915 TRANS(FCVTZU_vi, do_gvec_op2_fpst,
9916       a->esz, a->q, a->rd, a->rn, float_round_to_zero, f_fcvt_u_vi)
9917 TRANS(FCVTAS_vi, do_gvec_op2_fpst,
9918       a->esz, a->q, a->rd, a->rn, float_round_ties_away, f_fcvt_s_vi)
9919 TRANS(FCVTAU_vi, do_gvec_op2_fpst,
9920       a->esz, a->q, a->rd, a->rn, float_round_ties_away, f_fcvt_u_vi)
9921 
9922 static gen_helper_gvec_2_ptr * const f_fceq0[] = {
9923     gen_helper_gvec_fceq0_h,
9924     gen_helper_gvec_fceq0_s,
9925     gen_helper_gvec_fceq0_d,
9926 };
9927 TRANS(FCMEQ0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fceq0)
9928 
9929 static gen_helper_gvec_2_ptr * const f_fcgt0[] = {
9930     gen_helper_gvec_fcgt0_h,
9931     gen_helper_gvec_fcgt0_s,
9932     gen_helper_gvec_fcgt0_d,
9933 };
9934 TRANS(FCMGT0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcgt0)
9935 
9936 static gen_helper_gvec_2_ptr * const f_fcge0[] = {
9937     gen_helper_gvec_fcge0_h,
9938     gen_helper_gvec_fcge0_s,
9939     gen_helper_gvec_fcge0_d,
9940 };
9941 TRANS(FCMGE0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcge0)
9942 
9943 static gen_helper_gvec_2_ptr * const f_fclt0[] = {
9944     gen_helper_gvec_fclt0_h,
9945     gen_helper_gvec_fclt0_s,
9946     gen_helper_gvec_fclt0_d,
9947 };
9948 TRANS(FCMLT0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fclt0)
9949 
9950 static gen_helper_gvec_2_ptr * const f_fcle0[] = {
9951     gen_helper_gvec_fcle0_h,
9952     gen_helper_gvec_fcle0_s,
9953     gen_helper_gvec_fcle0_d,
9954 };
9955 TRANS(FCMLE0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcle0)
9956 
9957 static gen_helper_gvec_2_ptr * const f_frecpe[] = {
9958     gen_helper_gvec_frecpe_h,
9959     gen_helper_gvec_frecpe_s,
9960     gen_helper_gvec_frecpe_d,
9961 };
9962 static gen_helper_gvec_2_ptr * const f_frecpe_rpres[] = {
9963     gen_helper_gvec_frecpe_h,
9964     gen_helper_gvec_frecpe_rpres_s,
9965     gen_helper_gvec_frecpe_d,
9966 };
9967 TRANS(FRECPE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0,
9968       s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frecpe_rpres : f_frecpe)
9969 
9970 static gen_helper_gvec_2_ptr * const f_frsqrte[] = {
9971     gen_helper_gvec_frsqrte_h,
9972     gen_helper_gvec_frsqrte_s,
9973     gen_helper_gvec_frsqrte_d,
9974 };
9975 static gen_helper_gvec_2_ptr * const f_frsqrte_rpres[] = {
9976     gen_helper_gvec_frsqrte_h,
9977     gen_helper_gvec_frsqrte_rpres_s,
9978     gen_helper_gvec_frsqrte_d,
9979 };
9980 TRANS(FRSQRTE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0,
9981       s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frsqrte_rpres : f_frsqrte)
9982 
9983 static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a)
9984 {
9985     /* Handle 2-reg-misc ops which are widening (so each size element
9986      * in the source becomes a 2*size element in the destination.
9987      * The only instruction like this is FCVTL.
9988      */
9989     int pass;
9990     TCGv_ptr fpst;
9991 
9992     if (!fp_access_check(s)) {
9993         return true;
9994     }
9995 
9996     if (a->esz == MO_64) {
9997         /* 32 -> 64 bit fp conversion */
9998         TCGv_i64 tcg_res[2];
9999         TCGv_i32 tcg_op = tcg_temp_new_i32();
10000         int srcelt = a->q ? 2 : 0;
10001 
10002         fpst = fpstatus_ptr(FPST_A64);
10003 
10004         for (pass = 0; pass < 2; pass++) {
10005             tcg_res[pass] = tcg_temp_new_i64();
10006             read_vec_element_i32(s, tcg_op, a->rn, srcelt + pass, MO_32);
10007             gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, fpst);
10008         }
10009         for (pass = 0; pass < 2; pass++) {
10010             write_vec_element(s, tcg_res[pass], a->rd, pass, MO_64);
10011         }
10012     } else {
10013         /* 16 -> 32 bit fp conversion */
10014         int srcelt = a->q ? 4 : 0;
10015         TCGv_i32 tcg_res[4];
10016         TCGv_i32 ahp = get_ahp_flag();
10017 
10018         fpst = fpstatus_ptr(FPST_A64_F16);
10019 
10020         for (pass = 0; pass < 4; pass++) {
10021             tcg_res[pass] = tcg_temp_new_i32();
10022             read_vec_element_i32(s, tcg_res[pass], a->rn, srcelt + pass, MO_16);
10023             gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
10024                                            fpst, ahp);
10025         }
10026         for (pass = 0; pass < 4; pass++) {
10027             write_vec_element_i32(s, tcg_res[pass], a->rd, pass, MO_32);
10028         }
10029     }
10030     clear_vec_high(s, true, a->rd);
10031     return true;
10032 }
10033 
10034 static bool trans_OK(DisasContext *s, arg_OK *a)
10035 {
10036     return true;
10037 }
10038 
10039 static bool trans_FAIL(DisasContext *s, arg_OK *a)
10040 {
10041     s->is_nonstreaming = true;
10042     return true;
10043 }
10044 
10045 /**
10046  * btype_destination_ok:
10047  * @insn: The instruction at the branch destination
10048  * @bt: SCTLR_ELx.BT
10049  * @btype: PSTATE.BTYPE, and is non-zero
10050  *
10051  * On a guarded page, there are a limited number of insns
10052  * that may be present at the branch target:
10053  *   - branch target identifiers,
10054  *   - paciasp, pacibsp,
10055  *   - BRK insn
10056  *   - HLT insn
10057  * Anything else causes a Branch Target Exception.
10058  *
10059  * Return true if the branch is compatible, false to raise BTITRAP.
10060  */
10061 static bool btype_destination_ok(uint32_t insn, bool bt, int btype)
10062 {
10063     if ((insn & 0xfffff01fu) == 0xd503201fu) {
10064         /* HINT space */
10065         switch (extract32(insn, 5, 7)) {
10066         case 0b011001: /* PACIASP */
10067         case 0b011011: /* PACIBSP */
10068             /*
10069              * If SCTLR_ELx.BT, then PACI*SP are not compatible
10070              * with btype == 3.  Otherwise all btype are ok.
10071              */
10072             return !bt || btype != 3;
10073         case 0b100000: /* BTI */
10074             /* Not compatible with any btype.  */
10075             return false;
10076         case 0b100010: /* BTI c */
10077             /* Not compatible with btype == 3 */
10078             return btype != 3;
10079         case 0b100100: /* BTI j */
10080             /* Not compatible with btype == 2 */
10081             return btype != 2;
10082         case 0b100110: /* BTI jc */
10083             /* Compatible with any btype.  */
10084             return true;
10085         }
10086     } else {
10087         switch (insn & 0xffe0001fu) {
10088         case 0xd4200000u: /* BRK */
10089         case 0xd4400000u: /* HLT */
10090             /* Give priority to the breakpoint exception.  */
10091             return true;
10092         }
10093     }
10094     return false;
10095 }
10096 
10097 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
10098                                           CPUState *cpu)
10099 {
10100     DisasContext *dc = container_of(dcbase, DisasContext, base);
10101     CPUARMState *env = cpu_env(cpu);
10102     ARMCPU *arm_cpu = env_archcpu(env);
10103     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
10104     int bound, core_mmu_idx;
10105 
10106     dc->isar = &arm_cpu->isar;
10107     dc->condjmp = 0;
10108     dc->pc_save = dc->base.pc_first;
10109     dc->aarch64 = true;
10110     dc->thumb = false;
10111     dc->sctlr_b = 0;
10112     dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
10113     dc->condexec_mask = 0;
10114     dc->condexec_cond = 0;
10115     core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
10116     dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx);
10117     dc->tbii = EX_TBFLAG_A64(tb_flags, TBII);
10118     dc->tbid = EX_TBFLAG_A64(tb_flags, TBID);
10119     dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA);
10120     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
10121 #if !defined(CONFIG_USER_ONLY)
10122     dc->user = (dc->current_el == 0);
10123 #endif
10124     dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
10125     dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
10126     dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
10127     dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE);
10128     dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC);
10129     dc->trap_eret = EX_TBFLAG_A64(tb_flags, TRAP_ERET);
10130     dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL);
10131     dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL);
10132     dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16;
10133     dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16;
10134     dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE);
10135     dc->bt = EX_TBFLAG_A64(tb_flags, BT);
10136     dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE);
10137     dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV);
10138     dc->ata[0] = EX_TBFLAG_A64(tb_flags, ATA);
10139     dc->ata[1] = EX_TBFLAG_A64(tb_flags, ATA0);
10140     dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE);
10141     dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE);
10142     dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM);
10143     dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA);
10144     dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING);
10145     dc->naa = EX_TBFLAG_A64(tb_flags, NAA);
10146     dc->nv = EX_TBFLAG_A64(tb_flags, NV);
10147     dc->nv1 = EX_TBFLAG_A64(tb_flags, NV1);
10148     dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2);
10149     dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20);
10150     dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE);
10151     dc->fpcr_ah = EX_TBFLAG_A64(tb_flags, AH);
10152     dc->fpcr_nep = EX_TBFLAG_A64(tb_flags, NEP);
10153     dc->vec_len = 0;
10154     dc->vec_stride = 0;
10155     dc->cp_regs = arm_cpu->cp_regs;
10156     dc->features = env->features;
10157     dc->dcz_blocksize = arm_cpu->dcz_blocksize;
10158     dc->gm_blocksize = arm_cpu->gm_blocksize;
10159 
10160 #ifdef CONFIG_USER_ONLY
10161     /* In sve_probe_page, we assume TBI is enabled. */
10162     tcg_debug_assert(dc->tbid & 1);
10163 #endif
10164 
10165     dc->lse2 = dc_isar_feature(aa64_lse2, dc);
10166 
10167     /* Single step state. The code-generation logic here is:
10168      *  SS_ACTIVE == 0:
10169      *   generate code with no special handling for single-stepping (except
10170      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
10171      *   this happens anyway because those changes are all system register or
10172      *   PSTATE writes).
10173      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
10174      *   emit code for one insn
10175      *   emit code to clear PSTATE.SS
10176      *   emit code to generate software step exception for completed step
10177      *   end TB (as usual for having generated an exception)
10178      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
10179      *   emit code to generate a software step exception
10180      *   end the TB
10181      */
10182     dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
10183     dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
10184     dc->is_ldex = false;
10185 
10186     /* Bound the number of insns to execute to those left on the page.  */
10187     bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
10188 
10189     /* If architectural single step active, limit to 1.  */
10190     if (dc->ss_active) {
10191         bound = 1;
10192     }
10193     dc->base.max_insns = MIN(dc->base.max_insns, bound);
10194 }
10195 
10196 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
10197 {
10198 }
10199 
10200 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
10201 {
10202     DisasContext *dc = container_of(dcbase, DisasContext, base);
10203     target_ulong pc_arg = dc->base.pc_next;
10204 
10205     if (tb_cflags(dcbase->tb) & CF_PCREL) {
10206         pc_arg &= ~TARGET_PAGE_MASK;
10207     }
10208     tcg_gen_insn_start(pc_arg, 0, 0);
10209     dc->insn_start_updated = false;
10210 }
10211 
10212 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
10213 {
10214     DisasContext *s = container_of(dcbase, DisasContext, base);
10215     CPUARMState *env = cpu_env(cpu);
10216     uint64_t pc = s->base.pc_next;
10217     uint32_t insn;
10218 
10219     /* Singlestep exceptions have the highest priority. */
10220     if (s->ss_active && !s->pstate_ss) {
10221         /* Singlestep state is Active-pending.
10222          * If we're in this state at the start of a TB then either
10223          *  a) we just took an exception to an EL which is being debugged
10224          *     and this is the first insn in the exception handler
10225          *  b) debug exceptions were masked and we just unmasked them
10226          *     without changing EL (eg by clearing PSTATE.D)
10227          * In either case we're going to take a swstep exception in the
10228          * "did not step an insn" case, and so the syndrome ISV and EX
10229          * bits should be zero.
10230          */
10231         assert(s->base.num_insns == 1);
10232         gen_swstep_exception(s, 0, 0);
10233         s->base.is_jmp = DISAS_NORETURN;
10234         s->base.pc_next = pc + 4;
10235         return;
10236     }
10237 
10238     if (pc & 3) {
10239         /*
10240          * PC alignment fault.  This has priority over the instruction abort
10241          * that we would receive from a translation fault via arm_ldl_code.
10242          * This should only be possible after an indirect branch, at the
10243          * start of the TB.
10244          */
10245         assert(s->base.num_insns == 1);
10246         gen_helper_exception_pc_alignment(tcg_env, tcg_constant_tl(pc));
10247         s->base.is_jmp = DISAS_NORETURN;
10248         s->base.pc_next = QEMU_ALIGN_UP(pc, 4);
10249         return;
10250     }
10251 
10252     s->pc_curr = pc;
10253     insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b);
10254     s->insn = insn;
10255     s->base.pc_next = pc + 4;
10256 
10257     s->fp_access_checked = 0;
10258     s->sve_access_checked = 0;
10259 
10260     if (s->pstate_il) {
10261         /*
10262          * Illegal execution state. This has priority over BTI
10263          * exceptions, but comes after instruction abort exceptions.
10264          */
10265         gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
10266         return;
10267     }
10268 
10269     if (dc_isar_feature(aa64_bti, s)) {
10270         if (s->base.num_insns == 1) {
10271             /* First insn can have btype set to non-zero.  */
10272             tcg_debug_assert(s->btype >= 0);
10273 
10274             /*
10275              * Note that the Branch Target Exception has fairly high
10276              * priority -- below debugging exceptions but above most
10277              * everything else.  This allows us to handle this now
10278              * instead of waiting until the insn is otherwise decoded.
10279              *
10280              * We can check all but the guarded page check here;
10281              * defer the latter to a helper.
10282              */
10283             if (s->btype != 0
10284                 && !btype_destination_ok(insn, s->bt, s->btype)) {
10285                 gen_helper_guarded_page_check(tcg_env);
10286             }
10287         } else {
10288             /* Not the first insn: btype must be 0.  */
10289             tcg_debug_assert(s->btype == 0);
10290         }
10291     }
10292 
10293     s->is_nonstreaming = false;
10294     if (s->sme_trap_nonstreaming) {
10295         disas_sme_fa64(s, insn);
10296     }
10297 
10298     if (!disas_a64(s, insn) &&
10299         !disas_sme(s, insn) &&
10300         !disas_sve(s, insn)) {
10301         unallocated_encoding(s);
10302     }
10303 
10304     /*
10305      * After execution of most insns, btype is reset to 0.
10306      * Note that we set btype == -1 when the insn sets btype.
10307      */
10308     if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) {
10309         reset_btype(s);
10310     }
10311 }
10312 
10313 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
10314 {
10315     DisasContext *dc = container_of(dcbase, DisasContext, base);
10316 
10317     if (unlikely(dc->ss_active)) {
10318         /* Note that this means single stepping WFI doesn't halt the CPU.
10319          * For conditional branch insns this is harmless unreachable code as
10320          * gen_goto_tb() has already handled emitting the debug exception
10321          * (and thus a tb-jump is not possible when singlestepping).
10322          */
10323         switch (dc->base.is_jmp) {
10324         default:
10325             gen_a64_update_pc(dc, 4);
10326             /* fall through */
10327         case DISAS_EXIT:
10328         case DISAS_JUMP:
10329             gen_step_complete_exception(dc);
10330             break;
10331         case DISAS_NORETURN:
10332             break;
10333         }
10334     } else {
10335         switch (dc->base.is_jmp) {
10336         case DISAS_NEXT:
10337         case DISAS_TOO_MANY:
10338             gen_goto_tb(dc, 1, 4);
10339             break;
10340         default:
10341         case DISAS_UPDATE_EXIT:
10342             gen_a64_update_pc(dc, 4);
10343             /* fall through */
10344         case DISAS_EXIT:
10345             tcg_gen_exit_tb(NULL, 0);
10346             break;
10347         case DISAS_UPDATE_NOCHAIN:
10348             gen_a64_update_pc(dc, 4);
10349             /* fall through */
10350         case DISAS_JUMP:
10351             tcg_gen_lookup_and_goto_ptr();
10352             break;
10353         case DISAS_NORETURN:
10354         case DISAS_SWI:
10355             break;
10356         case DISAS_WFE:
10357             gen_a64_update_pc(dc, 4);
10358             gen_helper_wfe(tcg_env);
10359             break;
10360         case DISAS_YIELD:
10361             gen_a64_update_pc(dc, 4);
10362             gen_helper_yield(tcg_env);
10363             break;
10364         case DISAS_WFI:
10365             /*
10366              * This is a special case because we don't want to just halt
10367              * the CPU if trying to debug across a WFI.
10368              */
10369             gen_a64_update_pc(dc, 4);
10370             gen_helper_wfi(tcg_env, tcg_constant_i32(4));
10371             /*
10372              * The helper doesn't necessarily throw an exception, but we
10373              * must go back to the main loop to check for interrupts anyway.
10374              */
10375             tcg_gen_exit_tb(NULL, 0);
10376             break;
10377         }
10378     }
10379 }
10380 
10381 const TranslatorOps aarch64_translator_ops = {
10382     .init_disas_context = aarch64_tr_init_disas_context,
10383     .tb_start           = aarch64_tr_tb_start,
10384     .insn_start         = aarch64_tr_insn_start,
10385     .translate_insn     = aarch64_tr_translate_insn,
10386     .tb_stop            = aarch64_tr_tb_stop,
10387 };
10388