xref: /qemu/target/arm/tcg/translate-a64.c (revision 70ce076fa6dff60585c229a4b641b13e64bf03cf)
1 /*
2  *  AArch64 translation
3  *
4  *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 
21 #include "exec/exec-all.h"
22 #include "translate.h"
23 #include "translate-a64.h"
24 #include "qemu/log.h"
25 #include "arm_ldst.h"
26 #include "semihosting/semihost.h"
27 #include "cpregs.h"
28 
29 static TCGv_i64 cpu_X[32];
30 static TCGv_i64 cpu_pc;
31 
32 /* Load/store exclusive handling */
33 static TCGv_i64 cpu_exclusive_high;
34 
35 static const char *regnames[] = {
36     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
37     "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
38     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
39     "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
40 };
41 
42 enum a64_shift_type {
43     A64_SHIFT_TYPE_LSL = 0,
44     A64_SHIFT_TYPE_LSR = 1,
45     A64_SHIFT_TYPE_ASR = 2,
46     A64_SHIFT_TYPE_ROR = 3
47 };
48 
49 /*
50  * Helpers for extracting complex instruction fields
51  */
52 
53 /*
54  * For load/store with an unsigned 12 bit immediate scaled by the element
55  * size. The input has the immediate field in bits [14:3] and the element
56  * size in [2:0].
57  */
58 static int uimm_scaled(DisasContext *s, int x)
59 {
60     unsigned imm = x >> 3;
61     unsigned scale = extract32(x, 0, 3);
62     return imm << scale;
63 }
64 
65 /* For load/store memory tags: scale offset by LOG2_TAG_GRANULE */
66 static int scale_by_log2_tag_granule(DisasContext *s, int x)
67 {
68     return x << LOG2_TAG_GRANULE;
69 }
70 
71 /*
72  * Include the generated decoders.
73  */
74 
75 #include "decode-sme-fa64.c.inc"
76 #include "decode-a64.c.inc"
77 
78 /* initialize TCG globals.  */
79 void a64_translate_init(void)
80 {
81     int i;
82 
83     cpu_pc = tcg_global_mem_new_i64(tcg_env,
84                                     offsetof(CPUARMState, pc),
85                                     "pc");
86     for (i = 0; i < 32; i++) {
87         cpu_X[i] = tcg_global_mem_new_i64(tcg_env,
88                                           offsetof(CPUARMState, xregs[i]),
89                                           regnames[i]);
90     }
91 
92     cpu_exclusive_high = tcg_global_mem_new_i64(tcg_env,
93         offsetof(CPUARMState, exclusive_high), "exclusive_high");
94 }
95 
96 /*
97  * Return the core mmu_idx to use for A64 load/store insns which
98  * have a "unprivileged load/store" variant. Those insns access
99  * EL0 if executed from an EL which has control over EL0 (usually
100  * EL1) but behave like normal loads and stores if executed from
101  * elsewhere (eg EL3).
102  *
103  * @unpriv : true for the unprivileged encoding; false for the
104  *           normal encoding (in which case we will return the same
105  *           thing as get_mem_index().
106  */
107 static int get_a64_user_mem_index(DisasContext *s, bool unpriv)
108 {
109     /*
110      * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL,
111      * which is the usual mmu_idx for this cpu state.
112      */
113     ARMMMUIdx useridx = s->mmu_idx;
114 
115     if (unpriv && s->unpriv) {
116         /*
117          * We have pre-computed the condition for AccType_UNPRIV.
118          * Therefore we should never get here with a mmu_idx for
119          * which we do not know the corresponding user mmu_idx.
120          */
121         switch (useridx) {
122         case ARMMMUIdx_E10_1:
123         case ARMMMUIdx_E10_1_PAN:
124             useridx = ARMMMUIdx_E10_0;
125             break;
126         case ARMMMUIdx_E20_2:
127         case ARMMMUIdx_E20_2_PAN:
128             useridx = ARMMMUIdx_E20_0;
129             break;
130         default:
131             g_assert_not_reached();
132         }
133     }
134     return arm_to_core_mmu_idx(useridx);
135 }
136 
137 static void set_btype_raw(int val)
138 {
139     tcg_gen_st_i32(tcg_constant_i32(val), tcg_env,
140                    offsetof(CPUARMState, btype));
141 }
142 
143 static void set_btype(DisasContext *s, int val)
144 {
145     /* BTYPE is a 2-bit field, and 0 should be done with reset_btype.  */
146     tcg_debug_assert(val >= 1 && val <= 3);
147     set_btype_raw(val);
148     s->btype = -1;
149 }
150 
151 static void reset_btype(DisasContext *s)
152 {
153     if (s->btype != 0) {
154         set_btype_raw(0);
155         s->btype = 0;
156     }
157 }
158 
159 static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff)
160 {
161     assert(s->pc_save != -1);
162     if (tb_cflags(s->base.tb) & CF_PCREL) {
163         tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff);
164     } else {
165         tcg_gen_movi_i64(dest, s->pc_curr + diff);
166     }
167 }
168 
169 void gen_a64_update_pc(DisasContext *s, target_long diff)
170 {
171     gen_pc_plus_diff(s, cpu_pc, diff);
172     s->pc_save = s->pc_curr + diff;
173 }
174 
175 /*
176  * Handle Top Byte Ignore (TBI) bits.
177  *
178  * If address tagging is enabled via the TCR TBI bits:
179  *  + for EL2 and EL3 there is only one TBI bit, and if it is set
180  *    then the address is zero-extended, clearing bits [63:56]
181  *  + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
182  *    and TBI1 controls addresses with bit 55 == 1.
183  *    If the appropriate TBI bit is set for the address then
184  *    the address is sign-extended from bit 55 into bits [63:56]
185  *
186  * Here We have concatenated TBI{1,0} into tbi.
187  */
188 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst,
189                                 TCGv_i64 src, int tbi)
190 {
191     if (tbi == 0) {
192         /* Load unmodified address */
193         tcg_gen_mov_i64(dst, src);
194     } else if (!regime_has_2_ranges(s->mmu_idx)) {
195         /* Force tag byte to all zero */
196         tcg_gen_extract_i64(dst, src, 0, 56);
197     } else {
198         /* Sign-extend from bit 55.  */
199         tcg_gen_sextract_i64(dst, src, 0, 56);
200 
201         switch (tbi) {
202         case 1:
203             /* tbi0 but !tbi1: only use the extension if positive */
204             tcg_gen_and_i64(dst, dst, src);
205             break;
206         case 2:
207             /* !tbi0 but tbi1: only use the extension if negative */
208             tcg_gen_or_i64(dst, dst, src);
209             break;
210         case 3:
211             /* tbi0 and tbi1: always use the extension */
212             break;
213         default:
214             g_assert_not_reached();
215         }
216     }
217 }
218 
219 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
220 {
221     /*
222      * If address tagging is enabled for instructions via the TCR TBI bits,
223      * then loading an address into the PC will clear out any tag.
224      */
225     gen_top_byte_ignore(s, cpu_pc, src, s->tbii);
226     s->pc_save = -1;
227 }
228 
229 /*
230  * Handle MTE and/or TBI.
231  *
232  * For TBI, ideally, we would do nothing.  Proper behaviour on fault is
233  * for the tag to be present in the FAR_ELx register.  But for user-only
234  * mode we do not have a TLB with which to implement this, so we must
235  * remove the top byte now.
236  *
237  * Always return a fresh temporary that we can increment independently
238  * of the write-back address.
239  */
240 
241 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
242 {
243     TCGv_i64 clean = tcg_temp_new_i64();
244 #ifdef CONFIG_USER_ONLY
245     gen_top_byte_ignore(s, clean, addr, s->tbid);
246 #else
247     tcg_gen_mov_i64(clean, addr);
248 #endif
249     return clean;
250 }
251 
252 /* Insert a zero tag into src, with the result at dst. */
253 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src)
254 {
255     tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4));
256 }
257 
258 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr,
259                              MMUAccessType acc, int log2_size)
260 {
261     gen_helper_probe_access(tcg_env, ptr,
262                             tcg_constant_i32(acc),
263                             tcg_constant_i32(get_mem_index(s)),
264                             tcg_constant_i32(1 << log2_size));
265 }
266 
267 /*
268  * For MTE, check a single logical or atomic access.  This probes a single
269  * address, the exact one specified.  The size and alignment of the access
270  * is not relevant to MTE, per se, but watchpoints do require the size,
271  * and we want to recognize those before making any other changes to state.
272  */
273 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr,
274                                       bool is_write, bool tag_checked,
275                                       MemOp memop, bool is_unpriv,
276                                       int core_idx)
277 {
278     if (tag_checked && s->mte_active[is_unpriv]) {
279         TCGv_i64 ret;
280         int desc = 0;
281 
282         desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx);
283         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
284         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
285         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
286         desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(memop));
287         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, memop_size(memop) - 1);
288 
289         ret = tcg_temp_new_i64();
290         gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr);
291 
292         return ret;
293     }
294     return clean_data_tbi(s, addr);
295 }
296 
297 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
298                         bool tag_checked, MemOp memop)
299 {
300     return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, memop,
301                                  false, get_mem_index(s));
302 }
303 
304 /*
305  * For MTE, check multiple logical sequential accesses.
306  */
307 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
308                         bool tag_checked, int total_size, MemOp single_mop)
309 {
310     if (tag_checked && s->mte_active[0]) {
311         TCGv_i64 ret;
312         int desc = 0;
313 
314         desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
315         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
316         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
317         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
318         desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(single_mop));
319         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, total_size - 1);
320 
321         ret = tcg_temp_new_i64();
322         gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr);
323 
324         return ret;
325     }
326     return clean_data_tbi(s, addr);
327 }
328 
329 /*
330  * Generate the special alignment check that applies to AccType_ATOMIC
331  * and AccType_ORDERED insns under FEAT_LSE2: the access need not be
332  * naturally aligned, but it must not cross a 16-byte boundary.
333  * See AArch64.CheckAlignment().
334  */
335 static void check_lse2_align(DisasContext *s, int rn, int imm,
336                              bool is_write, MemOp mop)
337 {
338     TCGv_i32 tmp;
339     TCGv_i64 addr;
340     TCGLabel *over_label;
341     MMUAccessType type;
342     int mmu_idx;
343 
344     tmp = tcg_temp_new_i32();
345     tcg_gen_extrl_i64_i32(tmp, cpu_reg_sp(s, rn));
346     tcg_gen_addi_i32(tmp, tmp, imm & 15);
347     tcg_gen_andi_i32(tmp, tmp, 15);
348     tcg_gen_addi_i32(tmp, tmp, memop_size(mop));
349 
350     over_label = gen_new_label();
351     tcg_gen_brcondi_i32(TCG_COND_LEU, tmp, 16, over_label);
352 
353     addr = tcg_temp_new_i64();
354     tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm);
355 
356     type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD,
357     mmu_idx = get_mem_index(s);
358     gen_helper_unaligned_access(tcg_env, addr, tcg_constant_i32(type),
359                                 tcg_constant_i32(mmu_idx));
360 
361     gen_set_label(over_label);
362 
363 }
364 
365 /* Handle the alignment check for AccType_ATOMIC instructions. */
366 static MemOp check_atomic_align(DisasContext *s, int rn, MemOp mop)
367 {
368     MemOp size = mop & MO_SIZE;
369 
370     if (size == MO_8) {
371         return mop;
372     }
373 
374     /*
375      * If size == MO_128, this is a LDXP, and the operation is single-copy
376      * atomic for each doubleword, not the entire quadword; it still must
377      * be quadword aligned.
378      */
379     if (size == MO_128) {
380         return finalize_memop_atom(s, MO_128 | MO_ALIGN,
381                                    MO_ATOM_IFALIGN_PAIR);
382     }
383     if (dc_isar_feature(aa64_lse2, s)) {
384         check_lse2_align(s, rn, 0, true, mop);
385     } else {
386         mop |= MO_ALIGN;
387     }
388     return finalize_memop(s, mop);
389 }
390 
391 /* Handle the alignment check for AccType_ORDERED instructions. */
392 static MemOp check_ordered_align(DisasContext *s, int rn, int imm,
393                                  bool is_write, MemOp mop)
394 {
395     MemOp size = mop & MO_SIZE;
396 
397     if (size == MO_8) {
398         return mop;
399     }
400     if (size == MO_128) {
401         return finalize_memop_atom(s, MO_128 | MO_ALIGN,
402                                    MO_ATOM_IFALIGN_PAIR);
403     }
404     if (!dc_isar_feature(aa64_lse2, s)) {
405         mop |= MO_ALIGN;
406     } else if (!s->naa) {
407         check_lse2_align(s, rn, imm, is_write, mop);
408     }
409     return finalize_memop(s, mop);
410 }
411 
412 typedef struct DisasCompare64 {
413     TCGCond cond;
414     TCGv_i64 value;
415 } DisasCompare64;
416 
417 static void a64_test_cc(DisasCompare64 *c64, int cc)
418 {
419     DisasCompare c32;
420 
421     arm_test_cc(&c32, cc);
422 
423     /*
424      * Sign-extend the 32-bit value so that the GE/LT comparisons work
425      * properly.  The NE/EQ comparisons are also fine with this choice.
426       */
427     c64->cond = c32.cond;
428     c64->value = tcg_temp_new_i64();
429     tcg_gen_ext_i32_i64(c64->value, c32.value);
430 }
431 
432 static void gen_rebuild_hflags(DisasContext *s)
433 {
434     gen_helper_rebuild_hflags_a64(tcg_env, tcg_constant_i32(s->current_el));
435 }
436 
437 static void gen_exception_internal(int excp)
438 {
439     assert(excp_is_internal(excp));
440     gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp));
441 }
442 
443 static void gen_exception_internal_insn(DisasContext *s, int excp)
444 {
445     gen_a64_update_pc(s, 0);
446     gen_exception_internal(excp);
447     s->base.is_jmp = DISAS_NORETURN;
448 }
449 
450 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome)
451 {
452     gen_a64_update_pc(s, 0);
453     gen_helper_exception_bkpt_insn(tcg_env, tcg_constant_i32(syndrome));
454     s->base.is_jmp = DISAS_NORETURN;
455 }
456 
457 static void gen_step_complete_exception(DisasContext *s)
458 {
459     /* We just completed step of an insn. Move from Active-not-pending
460      * to Active-pending, and then also take the swstep exception.
461      * This corresponds to making the (IMPDEF) choice to prioritize
462      * swstep exceptions over asynchronous exceptions taken to an exception
463      * level where debug is disabled. This choice has the advantage that
464      * we do not need to maintain internal state corresponding to the
465      * ISV/EX syndrome bits between completion of the step and generation
466      * of the exception, and our syndrome information is always correct.
467      */
468     gen_ss_advance(s);
469     gen_swstep_exception(s, 1, s->is_ldex);
470     s->base.is_jmp = DISAS_NORETURN;
471 }
472 
473 static inline bool use_goto_tb(DisasContext *s, uint64_t dest)
474 {
475     if (s->ss_active) {
476         return false;
477     }
478     return translator_use_goto_tb(&s->base, dest);
479 }
480 
481 static void gen_goto_tb(DisasContext *s, int n, int64_t diff)
482 {
483     if (use_goto_tb(s, s->pc_curr + diff)) {
484         /*
485          * For pcrel, the pc must always be up-to-date on entry to
486          * the linked TB, so that it can use simple additions for all
487          * further adjustments.  For !pcrel, the linked TB is compiled
488          * to know its full virtual address, so we can delay the
489          * update to pc to the unlinked path.  A long chain of links
490          * can thus avoid many updates to the PC.
491          */
492         if (tb_cflags(s->base.tb) & CF_PCREL) {
493             gen_a64_update_pc(s, diff);
494             tcg_gen_goto_tb(n);
495         } else {
496             tcg_gen_goto_tb(n);
497             gen_a64_update_pc(s, diff);
498         }
499         tcg_gen_exit_tb(s->base.tb, n);
500         s->base.is_jmp = DISAS_NORETURN;
501     } else {
502         gen_a64_update_pc(s, diff);
503         if (s->ss_active) {
504             gen_step_complete_exception(s);
505         } else {
506             tcg_gen_lookup_and_goto_ptr();
507             s->base.is_jmp = DISAS_NORETURN;
508         }
509     }
510 }
511 
512 /*
513  * Register access functions
514  *
515  * These functions are used for directly accessing a register in where
516  * changes to the final register value are likely to be made. If you
517  * need to use a register for temporary calculation (e.g. index type
518  * operations) use the read_* form.
519  *
520  * B1.2.1 Register mappings
521  *
522  * In instruction register encoding 31 can refer to ZR (zero register) or
523  * the SP (stack pointer) depending on context. In QEMU's case we map SP
524  * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
525  * This is the point of the _sp forms.
526  */
527 TCGv_i64 cpu_reg(DisasContext *s, int reg)
528 {
529     if (reg == 31) {
530         TCGv_i64 t = tcg_temp_new_i64();
531         tcg_gen_movi_i64(t, 0);
532         return t;
533     } else {
534         return cpu_X[reg];
535     }
536 }
537 
538 /* register access for when 31 == SP */
539 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
540 {
541     return cpu_X[reg];
542 }
543 
544 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
545  * representing the register contents. This TCGv is an auto-freed
546  * temporary so it need not be explicitly freed, and may be modified.
547  */
548 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
549 {
550     TCGv_i64 v = tcg_temp_new_i64();
551     if (reg != 31) {
552         if (sf) {
553             tcg_gen_mov_i64(v, cpu_X[reg]);
554         } else {
555             tcg_gen_ext32u_i64(v, cpu_X[reg]);
556         }
557     } else {
558         tcg_gen_movi_i64(v, 0);
559     }
560     return v;
561 }
562 
563 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
564 {
565     TCGv_i64 v = tcg_temp_new_i64();
566     if (sf) {
567         tcg_gen_mov_i64(v, cpu_X[reg]);
568     } else {
569         tcg_gen_ext32u_i64(v, cpu_X[reg]);
570     }
571     return v;
572 }
573 
574 /* Return the offset into CPUARMState of a slice (from
575  * the least significant end) of FP register Qn (ie
576  * Dn, Sn, Hn or Bn).
577  * (Note that this is not the same mapping as for A32; see cpu.h)
578  */
579 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size)
580 {
581     return vec_reg_offset(s, regno, 0, size);
582 }
583 
584 /* Offset of the high half of the 128 bit vector Qn */
585 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
586 {
587     return vec_reg_offset(s, regno, 1, MO_64);
588 }
589 
590 /* Convenience accessors for reading and writing single and double
591  * FP registers. Writing clears the upper parts of the associated
592  * 128 bit vector register, as required by the architecture.
593  * Note that unlike the GP register accessors, the values returned
594  * by the read functions must be manually freed.
595  */
596 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
597 {
598     TCGv_i64 v = tcg_temp_new_i64();
599 
600     tcg_gen_ld_i64(v, tcg_env, fp_reg_offset(s, reg, MO_64));
601     return v;
602 }
603 
604 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
605 {
606     TCGv_i32 v = tcg_temp_new_i32();
607 
608     tcg_gen_ld_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32));
609     return v;
610 }
611 
612 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
613 {
614     TCGv_i32 v = tcg_temp_new_i32();
615 
616     tcg_gen_ld16u_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16));
617     return v;
618 }
619 
620 static void clear_vec(DisasContext *s, int rd)
621 {
622     unsigned ofs = fp_reg_offset(s, rd, MO_64);
623     unsigned vsz = vec_full_reg_size(s);
624 
625     tcg_gen_gvec_dup_imm(MO_64, ofs, vsz, vsz, 0);
626 }
627 
628 /*
629  * Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
630  * If SVE is not enabled, then there are only 128 bits in the vector.
631  */
632 static void clear_vec_high(DisasContext *s, bool is_q, int rd)
633 {
634     unsigned ofs = fp_reg_offset(s, rd, MO_64);
635     unsigned vsz = vec_full_reg_size(s);
636 
637     /* Nop move, with side effect of clearing the tail. */
638     tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz);
639 }
640 
641 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
642 {
643     unsigned ofs = fp_reg_offset(s, reg, MO_64);
644 
645     tcg_gen_st_i64(v, tcg_env, ofs);
646     clear_vec_high(s, false, reg);
647 }
648 
649 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
650 {
651     TCGv_i64 tmp = tcg_temp_new_i64();
652 
653     tcg_gen_extu_i32_i64(tmp, v);
654     write_fp_dreg(s, reg, tmp);
655 }
656 
657 /*
658  * Write a double result to 128 bit vector register reg, honouring FPCR.NEP:
659  * - if FPCR.NEP == 0, clear the high elements of reg
660  * - if FPCR.NEP == 1, set the high elements of reg from mergereg
661  *   (i.e. merge the result with those high elements)
662  * In either case, SVE register bits above 128 are zeroed (per R_WKYLB).
663  */
664 static void write_fp_dreg_merging(DisasContext *s, int reg, int mergereg,
665                                   TCGv_i64 v)
666 {
667     if (!s->fpcr_nep) {
668         write_fp_dreg(s, reg, v);
669         return;
670     }
671 
672     /*
673      * Move from mergereg to reg; this sets the high elements and
674      * clears the bits above 128 as a side effect.
675      */
676     tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg),
677                      vec_full_reg_offset(s, mergereg),
678                      16, vec_full_reg_size(s));
679     tcg_gen_st_i64(v, tcg_env, vec_full_reg_offset(s, reg));
680 }
681 
682 /*
683  * Write a single-prec result, but only clear the higher elements
684  * of the destination register if FPCR.NEP is 0; otherwise preserve them.
685  */
686 static void write_fp_sreg_merging(DisasContext *s, int reg, int mergereg,
687                                   TCGv_i32 v)
688 {
689     if (!s->fpcr_nep) {
690         write_fp_sreg(s, reg, v);
691         return;
692     }
693 
694     tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg),
695                      vec_full_reg_offset(s, mergereg),
696                      16, vec_full_reg_size(s));
697     tcg_gen_st_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32));
698 }
699 
700 /*
701  * Write a half-prec result, but only clear the higher elements
702  * of the destination register if FPCR.NEP is 0; otherwise preserve them.
703  * The caller must ensure that the top 16 bits of v are zero.
704  */
705 static void write_fp_hreg_merging(DisasContext *s, int reg, int mergereg,
706                                   TCGv_i32 v)
707 {
708     if (!s->fpcr_nep) {
709         write_fp_sreg(s, reg, v);
710         return;
711     }
712 
713     tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg),
714                      vec_full_reg_offset(s, mergereg),
715                      16, vec_full_reg_size(s));
716     tcg_gen_st16_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16));
717 }
718 
719 /* Expand a 2-operand AdvSIMD vector operation using an expander function.  */
720 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
721                          GVecGen2Fn *gvec_fn, int vece)
722 {
723     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
724             is_q ? 16 : 8, vec_full_reg_size(s));
725 }
726 
727 /* Expand a 2-operand + immediate AdvSIMD vector operation using
728  * an expander function.
729  */
730 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn,
731                           int64_t imm, GVecGen2iFn *gvec_fn, int vece)
732 {
733     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
734             imm, is_q ? 16 : 8, vec_full_reg_size(s));
735 }
736 
737 /* Expand a 3-operand AdvSIMD vector operation using an expander function.  */
738 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
739                          GVecGen3Fn *gvec_fn, int vece)
740 {
741     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
742             vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
743 }
744 
745 /* Expand a 4-operand AdvSIMD vector operation using an expander function.  */
746 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
747                          int rx, GVecGen4Fn *gvec_fn, int vece)
748 {
749     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
750             vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx),
751             is_q ? 16 : 8, vec_full_reg_size(s));
752 }
753 
754 /* Expand a 2-operand operation using an out-of-line helper.  */
755 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd,
756                              int rn, int data, gen_helper_gvec_2 *fn)
757 {
758     tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
759                        vec_full_reg_offset(s, rn),
760                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
761 }
762 
763 /* Expand a 3-operand operation using an out-of-line helper.  */
764 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
765                              int rn, int rm, int data, gen_helper_gvec_3 *fn)
766 {
767     tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
768                        vec_full_reg_offset(s, rn),
769                        vec_full_reg_offset(s, rm),
770                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
771 }
772 
773 /* Expand a 3-operand + fpstatus pointer + simd data value operation using
774  * an out-of-line helper.
775  */
776 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
777                               int rm, ARMFPStatusFlavour fpsttype, int data,
778                               gen_helper_gvec_3_ptr *fn)
779 {
780     TCGv_ptr fpst = fpstatus_ptr(fpsttype);
781     tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
782                        vec_full_reg_offset(s, rn),
783                        vec_full_reg_offset(s, rm), fpst,
784                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
785 }
786 
787 /* Expand a 4-operand operation using an out-of-line helper.  */
788 static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn,
789                              int rm, int ra, int data, gen_helper_gvec_4 *fn)
790 {
791     tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
792                        vec_full_reg_offset(s, rn),
793                        vec_full_reg_offset(s, rm),
794                        vec_full_reg_offset(s, ra),
795                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
796 }
797 
798 /*
799  * Expand a 4-operand operation using an out-of-line helper that takes
800  * a pointer to the CPU env.
801  */
802 static void gen_gvec_op4_env(DisasContext *s, bool is_q, int rd, int rn,
803                              int rm, int ra, int data,
804                              gen_helper_gvec_4_ptr *fn)
805 {
806     tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
807                        vec_full_reg_offset(s, rn),
808                        vec_full_reg_offset(s, rm),
809                        vec_full_reg_offset(s, ra),
810                        tcg_env,
811                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
812 }
813 
814 /*
815  * Expand a 4-operand + fpstatus pointer + simd data value operation using
816  * an out-of-line helper.
817  */
818 static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn,
819                               int rm, int ra, ARMFPStatusFlavour fpsttype,
820                               int data,
821                               gen_helper_gvec_4_ptr *fn)
822 {
823     TCGv_ptr fpst = fpstatus_ptr(fpsttype);
824     tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
825                        vec_full_reg_offset(s, rn),
826                        vec_full_reg_offset(s, rm),
827                        vec_full_reg_offset(s, ra), fpst,
828                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
829 }
830 
831 /*
832  * When FPCR.AH == 1, NEG and ABS do not flip the sign bit of a NaN.
833  * These functions implement
834  *   d = floatN_is_any_nan(s) ? s : floatN_chs(s)
835  * which for float32 is
836  *   d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s ^ (1 << 31))
837  * and similarly for the other float sizes.
838  */
839 static void gen_vfp_ah_negh(TCGv_i32 d, TCGv_i32 s)
840 {
841     TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32();
842 
843     gen_vfp_negh(chs_s, s);
844     gen_vfp_absh(abs_s, s);
845     tcg_gen_movcond_i32(TCG_COND_GTU, d,
846                         abs_s, tcg_constant_i32(0x7c00),
847                         s, chs_s);
848 }
849 
850 static void gen_vfp_ah_negs(TCGv_i32 d, TCGv_i32 s)
851 {
852     TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32();
853 
854     gen_vfp_negs(chs_s, s);
855     gen_vfp_abss(abs_s, s);
856     tcg_gen_movcond_i32(TCG_COND_GTU, d,
857                         abs_s, tcg_constant_i32(0x7f800000UL),
858                         s, chs_s);
859 }
860 
861 static void gen_vfp_ah_negd(TCGv_i64 d, TCGv_i64 s)
862 {
863     TCGv_i64 abs_s = tcg_temp_new_i64(), chs_s = tcg_temp_new_i64();
864 
865     gen_vfp_negd(chs_s, s);
866     gen_vfp_absd(abs_s, s);
867     tcg_gen_movcond_i64(TCG_COND_GTU, d,
868                         abs_s, tcg_constant_i64(0x7ff0000000000000ULL),
869                         s, chs_s);
870 }
871 
872 /*
873  * These functions implement
874  *  d = floatN_is_any_nan(s) ? s : floatN_abs(s)
875  * which for float32 is
876  *  d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s & ~(1 << 31))
877  * and similarly for the other float sizes.
878  */
879 static void gen_vfp_ah_absh(TCGv_i32 d, TCGv_i32 s)
880 {
881     TCGv_i32 abs_s = tcg_temp_new_i32();
882 
883     gen_vfp_absh(abs_s, s);
884     tcg_gen_movcond_i32(TCG_COND_GTU, d,
885                         abs_s, tcg_constant_i32(0x7c00),
886                         s, abs_s);
887 }
888 
889 static void gen_vfp_ah_abss(TCGv_i32 d, TCGv_i32 s)
890 {
891     TCGv_i32 abs_s = tcg_temp_new_i32();
892 
893     gen_vfp_abss(abs_s, s);
894     tcg_gen_movcond_i32(TCG_COND_GTU, d,
895                         abs_s, tcg_constant_i32(0x7f800000UL),
896                         s, abs_s);
897 }
898 
899 static void gen_vfp_ah_absd(TCGv_i64 d, TCGv_i64 s)
900 {
901     TCGv_i64 abs_s = tcg_temp_new_i64();
902 
903     gen_vfp_absd(abs_s, s);
904     tcg_gen_movcond_i64(TCG_COND_GTU, d,
905                         abs_s, tcg_constant_i64(0x7ff0000000000000ULL),
906                         s, abs_s);
907 }
908 
909 static void gen_vfp_maybe_ah_negh(DisasContext *dc, TCGv_i32 d, TCGv_i32 s)
910 {
911     if (dc->fpcr_ah) {
912         gen_vfp_ah_negh(d, s);
913     } else {
914         gen_vfp_negh(d, s);
915     }
916 }
917 
918 static void gen_vfp_maybe_ah_negs(DisasContext *dc, TCGv_i32 d, TCGv_i32 s)
919 {
920     if (dc->fpcr_ah) {
921         gen_vfp_ah_negs(d, s);
922     } else {
923         gen_vfp_negs(d, s);
924     }
925 }
926 
927 static void gen_vfp_maybe_ah_negd(DisasContext *dc, TCGv_i64 d, TCGv_i64 s)
928 {
929     if (dc->fpcr_ah) {
930         gen_vfp_ah_negd(d, s);
931     } else {
932         gen_vfp_negd(d, s);
933     }
934 }
935 
936 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
937  * than the 32 bit equivalent.
938  */
939 static inline void gen_set_NZ64(TCGv_i64 result)
940 {
941     tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
942     tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
943 }
944 
945 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
946 static inline void gen_logic_CC(int sf, TCGv_i64 result)
947 {
948     if (sf) {
949         gen_set_NZ64(result);
950     } else {
951         tcg_gen_extrl_i64_i32(cpu_ZF, result);
952         tcg_gen_mov_i32(cpu_NF, cpu_ZF);
953     }
954     tcg_gen_movi_i32(cpu_CF, 0);
955     tcg_gen_movi_i32(cpu_VF, 0);
956 }
957 
958 /* dest = T0 + T1; compute C, N, V and Z flags */
959 static void gen_add64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
960 {
961     TCGv_i64 result, flag, tmp;
962     result = tcg_temp_new_i64();
963     flag = tcg_temp_new_i64();
964     tmp = tcg_temp_new_i64();
965 
966     tcg_gen_movi_i64(tmp, 0);
967     tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
968 
969     tcg_gen_extrl_i64_i32(cpu_CF, flag);
970 
971     gen_set_NZ64(result);
972 
973     tcg_gen_xor_i64(flag, result, t0);
974     tcg_gen_xor_i64(tmp, t0, t1);
975     tcg_gen_andc_i64(flag, flag, tmp);
976     tcg_gen_extrh_i64_i32(cpu_VF, flag);
977 
978     tcg_gen_mov_i64(dest, result);
979 }
980 
981 static void gen_add32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
982 {
983     TCGv_i32 t0_32 = tcg_temp_new_i32();
984     TCGv_i32 t1_32 = tcg_temp_new_i32();
985     TCGv_i32 tmp = tcg_temp_new_i32();
986 
987     tcg_gen_movi_i32(tmp, 0);
988     tcg_gen_extrl_i64_i32(t0_32, t0);
989     tcg_gen_extrl_i64_i32(t1_32, t1);
990     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
991     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
992     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
993     tcg_gen_xor_i32(tmp, t0_32, t1_32);
994     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
995     tcg_gen_extu_i32_i64(dest, cpu_NF);
996 }
997 
998 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
999 {
1000     if (sf) {
1001         gen_add64_CC(dest, t0, t1);
1002     } else {
1003         gen_add32_CC(dest, t0, t1);
1004     }
1005 }
1006 
1007 /* dest = T0 - T1; compute C, N, V and Z flags */
1008 static void gen_sub64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
1009 {
1010     /* 64 bit arithmetic */
1011     TCGv_i64 result, flag, tmp;
1012 
1013     result = tcg_temp_new_i64();
1014     flag = tcg_temp_new_i64();
1015     tcg_gen_sub_i64(result, t0, t1);
1016 
1017     gen_set_NZ64(result);
1018 
1019     tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
1020     tcg_gen_extrl_i64_i32(cpu_CF, flag);
1021 
1022     tcg_gen_xor_i64(flag, result, t0);
1023     tmp = tcg_temp_new_i64();
1024     tcg_gen_xor_i64(tmp, t0, t1);
1025     tcg_gen_and_i64(flag, flag, tmp);
1026     tcg_gen_extrh_i64_i32(cpu_VF, flag);
1027     tcg_gen_mov_i64(dest, result);
1028 }
1029 
1030 static void gen_sub32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
1031 {
1032     /* 32 bit arithmetic */
1033     TCGv_i32 t0_32 = tcg_temp_new_i32();
1034     TCGv_i32 t1_32 = tcg_temp_new_i32();
1035     TCGv_i32 tmp;
1036 
1037     tcg_gen_extrl_i64_i32(t0_32, t0);
1038     tcg_gen_extrl_i64_i32(t1_32, t1);
1039     tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
1040     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1041     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
1042     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
1043     tmp = tcg_temp_new_i32();
1044     tcg_gen_xor_i32(tmp, t0_32, t1_32);
1045     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
1046     tcg_gen_extu_i32_i64(dest, cpu_NF);
1047 }
1048 
1049 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
1050 {
1051     if (sf) {
1052         gen_sub64_CC(dest, t0, t1);
1053     } else {
1054         gen_sub32_CC(dest, t0, t1);
1055     }
1056 }
1057 
1058 /* dest = T0 + T1 + CF; do not compute flags. */
1059 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
1060 {
1061     TCGv_i64 flag = tcg_temp_new_i64();
1062     tcg_gen_extu_i32_i64(flag, cpu_CF);
1063     tcg_gen_add_i64(dest, t0, t1);
1064     tcg_gen_add_i64(dest, dest, flag);
1065 
1066     if (!sf) {
1067         tcg_gen_ext32u_i64(dest, dest);
1068     }
1069 }
1070 
1071 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
1072 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
1073 {
1074     if (sf) {
1075         TCGv_i64 result = tcg_temp_new_i64();
1076         TCGv_i64 cf_64 = tcg_temp_new_i64();
1077         TCGv_i64 vf_64 = tcg_temp_new_i64();
1078         TCGv_i64 tmp = tcg_temp_new_i64();
1079         TCGv_i64 zero = tcg_constant_i64(0);
1080 
1081         tcg_gen_extu_i32_i64(cf_64, cpu_CF);
1082         tcg_gen_add2_i64(result, cf_64, t0, zero, cf_64, zero);
1083         tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, zero);
1084         tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
1085         gen_set_NZ64(result);
1086 
1087         tcg_gen_xor_i64(vf_64, result, t0);
1088         tcg_gen_xor_i64(tmp, t0, t1);
1089         tcg_gen_andc_i64(vf_64, vf_64, tmp);
1090         tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
1091 
1092         tcg_gen_mov_i64(dest, result);
1093     } else {
1094         TCGv_i32 t0_32 = tcg_temp_new_i32();
1095         TCGv_i32 t1_32 = tcg_temp_new_i32();
1096         TCGv_i32 tmp = tcg_temp_new_i32();
1097         TCGv_i32 zero = tcg_constant_i32(0);
1098 
1099         tcg_gen_extrl_i64_i32(t0_32, t0);
1100         tcg_gen_extrl_i64_i32(t1_32, t1);
1101         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, zero, cpu_CF, zero);
1102         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, zero);
1103 
1104         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1105         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
1106         tcg_gen_xor_i32(tmp, t0_32, t1_32);
1107         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
1108         tcg_gen_extu_i32_i64(dest, cpu_NF);
1109     }
1110 }
1111 
1112 /*
1113  * Load/Store generators
1114  */
1115 
1116 /*
1117  * Store from GPR register to memory.
1118  */
1119 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
1120                              TCGv_i64 tcg_addr, MemOp memop, int memidx,
1121                              bool iss_valid,
1122                              unsigned int iss_srt,
1123                              bool iss_sf, bool iss_ar)
1124 {
1125     tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop);
1126 
1127     if (iss_valid) {
1128         uint32_t syn;
1129 
1130         syn = syn_data_abort_with_iss(0,
1131                                       (memop & MO_SIZE),
1132                                       false,
1133                                       iss_srt,
1134                                       iss_sf,
1135                                       iss_ar,
1136                                       0, 0, 0, 0, 0, false);
1137         disas_set_insn_syndrome(s, syn);
1138     }
1139 }
1140 
1141 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
1142                       TCGv_i64 tcg_addr, MemOp memop,
1143                       bool iss_valid,
1144                       unsigned int iss_srt,
1145                       bool iss_sf, bool iss_ar)
1146 {
1147     do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s),
1148                      iss_valid, iss_srt, iss_sf, iss_ar);
1149 }
1150 
1151 /*
1152  * Load from memory to GPR register
1153  */
1154 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
1155                              MemOp memop, bool extend, int memidx,
1156                              bool iss_valid, unsigned int iss_srt,
1157                              bool iss_sf, bool iss_ar)
1158 {
1159     tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
1160 
1161     if (extend && (memop & MO_SIGN)) {
1162         g_assert((memop & MO_SIZE) <= MO_32);
1163         tcg_gen_ext32u_i64(dest, dest);
1164     }
1165 
1166     if (iss_valid) {
1167         uint32_t syn;
1168 
1169         syn = syn_data_abort_with_iss(0,
1170                                       (memop & MO_SIZE),
1171                                       (memop & MO_SIGN) != 0,
1172                                       iss_srt,
1173                                       iss_sf,
1174                                       iss_ar,
1175                                       0, 0, 0, 0, 0, false);
1176         disas_set_insn_syndrome(s, syn);
1177     }
1178 }
1179 
1180 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
1181                       MemOp memop, bool extend,
1182                       bool iss_valid, unsigned int iss_srt,
1183                       bool iss_sf, bool iss_ar)
1184 {
1185     do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s),
1186                      iss_valid, iss_srt, iss_sf, iss_ar);
1187 }
1188 
1189 /*
1190  * Store from FP register to memory
1191  */
1192 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, MemOp mop)
1193 {
1194     /* This writes the bottom N bits of a 128 bit wide vector to memory */
1195     TCGv_i64 tmplo = tcg_temp_new_i64();
1196 
1197     tcg_gen_ld_i64(tmplo, tcg_env, fp_reg_offset(s, srcidx, MO_64));
1198 
1199     if ((mop & MO_SIZE) < MO_128) {
1200         tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop);
1201     } else {
1202         TCGv_i64 tmphi = tcg_temp_new_i64();
1203         TCGv_i128 t16 = tcg_temp_new_i128();
1204 
1205         tcg_gen_ld_i64(tmphi, tcg_env, fp_reg_hi_offset(s, srcidx));
1206         tcg_gen_concat_i64_i128(t16, tmplo, tmphi);
1207 
1208         tcg_gen_qemu_st_i128(t16, tcg_addr, get_mem_index(s), mop);
1209     }
1210 }
1211 
1212 /*
1213  * Load from memory to FP register
1214  */
1215 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, MemOp mop)
1216 {
1217     /* This always zero-extends and writes to a full 128 bit wide vector */
1218     TCGv_i64 tmplo = tcg_temp_new_i64();
1219     TCGv_i64 tmphi = NULL;
1220 
1221     if ((mop & MO_SIZE) < MO_128) {
1222         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop);
1223     } else {
1224         TCGv_i128 t16 = tcg_temp_new_i128();
1225 
1226         tcg_gen_qemu_ld_i128(t16, tcg_addr, get_mem_index(s), mop);
1227 
1228         tmphi = tcg_temp_new_i64();
1229         tcg_gen_extr_i128_i64(tmplo, tmphi, t16);
1230     }
1231 
1232     tcg_gen_st_i64(tmplo, tcg_env, fp_reg_offset(s, destidx, MO_64));
1233 
1234     if (tmphi) {
1235         tcg_gen_st_i64(tmphi, tcg_env, fp_reg_hi_offset(s, destidx));
1236     }
1237     clear_vec_high(s, tmphi != NULL, destidx);
1238 }
1239 
1240 /*
1241  * Vector load/store helpers.
1242  *
1243  * The principal difference between this and a FP load is that we don't
1244  * zero extend as we are filling a partial chunk of the vector register.
1245  * These functions don't support 128 bit loads/stores, which would be
1246  * normal load/store operations.
1247  *
1248  * The _i32 versions are useful when operating on 32 bit quantities
1249  * (eg for floating point single or using Neon helper functions).
1250  */
1251 
1252 /* Get value of an element within a vector register */
1253 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
1254                              int element, MemOp memop)
1255 {
1256     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1257     switch ((unsigned)memop) {
1258     case MO_8:
1259         tcg_gen_ld8u_i64(tcg_dest, tcg_env, vect_off);
1260         break;
1261     case MO_16:
1262         tcg_gen_ld16u_i64(tcg_dest, tcg_env, vect_off);
1263         break;
1264     case MO_32:
1265         tcg_gen_ld32u_i64(tcg_dest, tcg_env, vect_off);
1266         break;
1267     case MO_8|MO_SIGN:
1268         tcg_gen_ld8s_i64(tcg_dest, tcg_env, vect_off);
1269         break;
1270     case MO_16|MO_SIGN:
1271         tcg_gen_ld16s_i64(tcg_dest, tcg_env, vect_off);
1272         break;
1273     case MO_32|MO_SIGN:
1274         tcg_gen_ld32s_i64(tcg_dest, tcg_env, vect_off);
1275         break;
1276     case MO_64:
1277     case MO_64|MO_SIGN:
1278         tcg_gen_ld_i64(tcg_dest, tcg_env, vect_off);
1279         break;
1280     default:
1281         g_assert_not_reached();
1282     }
1283 }
1284 
1285 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
1286                                  int element, MemOp memop)
1287 {
1288     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1289     switch (memop) {
1290     case MO_8:
1291         tcg_gen_ld8u_i32(tcg_dest, tcg_env, vect_off);
1292         break;
1293     case MO_16:
1294         tcg_gen_ld16u_i32(tcg_dest, tcg_env, vect_off);
1295         break;
1296     case MO_8|MO_SIGN:
1297         tcg_gen_ld8s_i32(tcg_dest, tcg_env, vect_off);
1298         break;
1299     case MO_16|MO_SIGN:
1300         tcg_gen_ld16s_i32(tcg_dest, tcg_env, vect_off);
1301         break;
1302     case MO_32:
1303     case MO_32|MO_SIGN:
1304         tcg_gen_ld_i32(tcg_dest, tcg_env, vect_off);
1305         break;
1306     default:
1307         g_assert_not_reached();
1308     }
1309 }
1310 
1311 /* Set value of an element within a vector register */
1312 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1313                               int element, MemOp memop)
1314 {
1315     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1316     switch (memop) {
1317     case MO_8:
1318         tcg_gen_st8_i64(tcg_src, tcg_env, vect_off);
1319         break;
1320     case MO_16:
1321         tcg_gen_st16_i64(tcg_src, tcg_env, vect_off);
1322         break;
1323     case MO_32:
1324         tcg_gen_st32_i64(tcg_src, tcg_env, vect_off);
1325         break;
1326     case MO_64:
1327         tcg_gen_st_i64(tcg_src, tcg_env, vect_off);
1328         break;
1329     default:
1330         g_assert_not_reached();
1331     }
1332 }
1333 
1334 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1335                                   int destidx, int element, MemOp memop)
1336 {
1337     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1338     switch (memop) {
1339     case MO_8:
1340         tcg_gen_st8_i32(tcg_src, tcg_env, vect_off);
1341         break;
1342     case MO_16:
1343         tcg_gen_st16_i32(tcg_src, tcg_env, vect_off);
1344         break;
1345     case MO_32:
1346         tcg_gen_st_i32(tcg_src, tcg_env, vect_off);
1347         break;
1348     default:
1349         g_assert_not_reached();
1350     }
1351 }
1352 
1353 /* Store from vector register to memory */
1354 static void do_vec_st(DisasContext *s, int srcidx, int element,
1355                       TCGv_i64 tcg_addr, MemOp mop)
1356 {
1357     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1358 
1359     read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE);
1360     tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1361 }
1362 
1363 /* Load from memory to vector register */
1364 static void do_vec_ld(DisasContext *s, int destidx, int element,
1365                       TCGv_i64 tcg_addr, MemOp mop)
1366 {
1367     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1368 
1369     tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1370     write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE);
1371 }
1372 
1373 /* Check that FP/Neon access is enabled. If it is, return
1374  * true. If not, emit code to generate an appropriate exception,
1375  * and return false; the caller should not emit any code for
1376  * the instruction. Note that this check must happen after all
1377  * unallocated-encoding checks (otherwise the syndrome information
1378  * for the resulting exception will be incorrect).
1379  */
1380 static bool fp_access_check_only(DisasContext *s)
1381 {
1382     if (s->fp_excp_el) {
1383         assert(!s->fp_access_checked);
1384         s->fp_access_checked = true;
1385 
1386         gen_exception_insn_el(s, 0, EXCP_UDEF,
1387                               syn_fp_access_trap(1, 0xe, false, 0),
1388                               s->fp_excp_el);
1389         return false;
1390     }
1391     s->fp_access_checked = true;
1392     return true;
1393 }
1394 
1395 static bool fp_access_check(DisasContext *s)
1396 {
1397     if (!fp_access_check_only(s)) {
1398         return false;
1399     }
1400     if (s->sme_trap_nonstreaming && s->is_nonstreaming) {
1401         gen_exception_insn(s, 0, EXCP_UDEF,
1402                            syn_smetrap(SME_ET_Streaming, false));
1403         return false;
1404     }
1405     return true;
1406 }
1407 
1408 /*
1409  * Return <0 for non-supported element sizes, with MO_16 controlled by
1410  * FEAT_FP16; return 0 for fp disabled; otherwise return >0 for success.
1411  */
1412 static int fp_access_check_scalar_hsd(DisasContext *s, MemOp esz)
1413 {
1414     switch (esz) {
1415     case MO_64:
1416     case MO_32:
1417         break;
1418     case MO_16:
1419         if (!dc_isar_feature(aa64_fp16, s)) {
1420             return -1;
1421         }
1422         break;
1423     default:
1424         return -1;
1425     }
1426     return fp_access_check(s);
1427 }
1428 
1429 /* Likewise, but vector MO_64 must have two elements. */
1430 static int fp_access_check_vector_hsd(DisasContext *s, bool is_q, MemOp esz)
1431 {
1432     switch (esz) {
1433     case MO_64:
1434         if (!is_q) {
1435             return -1;
1436         }
1437         break;
1438     case MO_32:
1439         break;
1440     case MO_16:
1441         if (!dc_isar_feature(aa64_fp16, s)) {
1442             return -1;
1443         }
1444         break;
1445     default:
1446         return -1;
1447     }
1448     return fp_access_check(s);
1449 }
1450 
1451 /*
1452  * Check that SVE access is enabled.  If it is, return true.
1453  * If not, emit code to generate an appropriate exception and return false.
1454  * This function corresponds to CheckSVEEnabled().
1455  */
1456 bool sve_access_check(DisasContext *s)
1457 {
1458     if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) {
1459         assert(dc_isar_feature(aa64_sme, s));
1460         if (!sme_sm_enabled_check(s)) {
1461             goto fail_exit;
1462         }
1463     } else if (s->sve_excp_el) {
1464         gen_exception_insn_el(s, 0, EXCP_UDEF,
1465                               syn_sve_access_trap(), s->sve_excp_el);
1466         goto fail_exit;
1467     }
1468     s->sve_access_checked = true;
1469     return fp_access_check(s);
1470 
1471  fail_exit:
1472     /* Assert that we only raise one exception per instruction. */
1473     assert(!s->sve_access_checked);
1474     s->sve_access_checked = true;
1475     return false;
1476 }
1477 
1478 /*
1479  * Check that SME access is enabled, raise an exception if not.
1480  * Note that this function corresponds to CheckSMEAccess and is
1481  * only used directly for cpregs.
1482  */
1483 static bool sme_access_check(DisasContext *s)
1484 {
1485     if (s->sme_excp_el) {
1486         gen_exception_insn_el(s, 0, EXCP_UDEF,
1487                               syn_smetrap(SME_ET_AccessTrap, false),
1488                               s->sme_excp_el);
1489         return false;
1490     }
1491     return true;
1492 }
1493 
1494 /* This function corresponds to CheckSMEEnabled. */
1495 bool sme_enabled_check(DisasContext *s)
1496 {
1497     /*
1498      * Note that unlike sve_excp_el, we have not constrained sme_excp_el
1499      * to be zero when fp_excp_el has priority.  This is because we need
1500      * sme_excp_el by itself for cpregs access checks.
1501      */
1502     if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) {
1503         s->fp_access_checked = true;
1504         return sme_access_check(s);
1505     }
1506     return fp_access_check_only(s);
1507 }
1508 
1509 /* Common subroutine for CheckSMEAnd*Enabled. */
1510 bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req)
1511 {
1512     if (!sme_enabled_check(s)) {
1513         return false;
1514     }
1515     if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) {
1516         gen_exception_insn(s, 0, EXCP_UDEF,
1517                            syn_smetrap(SME_ET_NotStreaming, false));
1518         return false;
1519     }
1520     if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) {
1521         gen_exception_insn(s, 0, EXCP_UDEF,
1522                            syn_smetrap(SME_ET_InactiveZA, false));
1523         return false;
1524     }
1525     return true;
1526 }
1527 
1528 /*
1529  * Expanders for AdvSIMD translation functions.
1530  */
1531 
1532 static bool do_gvec_op2_ool(DisasContext *s, arg_qrr_e *a, int data,
1533                             gen_helper_gvec_2 *fn)
1534 {
1535     if (!a->q && a->esz == MO_64) {
1536         return false;
1537     }
1538     if (fp_access_check(s)) {
1539         gen_gvec_op2_ool(s, a->q, a->rd, a->rn, data, fn);
1540     }
1541     return true;
1542 }
1543 
1544 static bool do_gvec_op3_ool(DisasContext *s, arg_qrrr_e *a, int data,
1545                             gen_helper_gvec_3 *fn)
1546 {
1547     if (!a->q && a->esz == MO_64) {
1548         return false;
1549     }
1550     if (fp_access_check(s)) {
1551         gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, data, fn);
1552     }
1553     return true;
1554 }
1555 
1556 static bool do_gvec_fn3(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
1557 {
1558     if (!a->q && a->esz == MO_64) {
1559         return false;
1560     }
1561     if (fp_access_check(s)) {
1562         gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz);
1563     }
1564     return true;
1565 }
1566 
1567 static bool do_gvec_fn3_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
1568 {
1569     if (a->esz == MO_64) {
1570         return false;
1571     }
1572     if (fp_access_check(s)) {
1573         gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz);
1574     }
1575     return true;
1576 }
1577 
1578 static bool do_gvec_fn3_no8_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
1579 {
1580     if (a->esz == MO_8) {
1581         return false;
1582     }
1583     return do_gvec_fn3_no64(s, a, fn);
1584 }
1585 
1586 static bool do_gvec_fn4(DisasContext *s, arg_qrrrr_e *a, GVecGen4Fn *fn)
1587 {
1588     if (!a->q && a->esz == MO_64) {
1589         return false;
1590     }
1591     if (fp_access_check(s)) {
1592         gen_gvec_fn4(s, a->q, a->rd, a->rn, a->rm, a->ra, fn, a->esz);
1593     }
1594     return true;
1595 }
1596 
1597 /*
1598  * This utility function is for doing register extension with an
1599  * optional shift. You will likely want to pass a temporary for the
1600  * destination register. See DecodeRegExtend() in the ARM ARM.
1601  */
1602 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1603                               int option, unsigned int shift)
1604 {
1605     int extsize = extract32(option, 0, 2);
1606     bool is_signed = extract32(option, 2, 1);
1607 
1608     tcg_gen_ext_i64(tcg_out, tcg_in, extsize | (is_signed ? MO_SIGN : 0));
1609     tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1610 }
1611 
1612 static inline void gen_check_sp_alignment(DisasContext *s)
1613 {
1614     /* The AArch64 architecture mandates that (if enabled via PSTATE
1615      * or SCTLR bits) there is a check that SP is 16-aligned on every
1616      * SP-relative load or store (with an exception generated if it is not).
1617      * In line with general QEMU practice regarding misaligned accesses,
1618      * we omit these checks for the sake of guest program performance.
1619      * This function is provided as a hook so we can more easily add these
1620      * checks in future (possibly as a "favour catching guest program bugs
1621      * over speed" user selectable option).
1622      */
1623 }
1624 
1625 /*
1626  * The instruction disassembly implemented here matches
1627  * the instruction encoding classifications in chapter C4
1628  * of the ARM Architecture Reference Manual (DDI0487B_a);
1629  * classification names and decode diagrams here should generally
1630  * match up with those in the manual.
1631  */
1632 
1633 static bool trans_B(DisasContext *s, arg_i *a)
1634 {
1635     reset_btype(s);
1636     gen_goto_tb(s, 0, a->imm);
1637     return true;
1638 }
1639 
1640 static bool trans_BL(DisasContext *s, arg_i *a)
1641 {
1642     gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s));
1643     reset_btype(s);
1644     gen_goto_tb(s, 0, a->imm);
1645     return true;
1646 }
1647 
1648 
1649 static bool trans_CBZ(DisasContext *s, arg_cbz *a)
1650 {
1651     DisasLabel match;
1652     TCGv_i64 tcg_cmp;
1653 
1654     tcg_cmp = read_cpu_reg(s, a->rt, a->sf);
1655     reset_btype(s);
1656 
1657     match = gen_disas_label(s);
1658     tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1659                         tcg_cmp, 0, match.label);
1660     gen_goto_tb(s, 0, 4);
1661     set_disas_label(s, match);
1662     gen_goto_tb(s, 1, a->imm);
1663     return true;
1664 }
1665 
1666 static bool trans_TBZ(DisasContext *s, arg_tbz *a)
1667 {
1668     DisasLabel match;
1669     TCGv_i64 tcg_cmp;
1670 
1671     tcg_cmp = tcg_temp_new_i64();
1672     tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, a->rt), 1ULL << a->bitpos);
1673 
1674     reset_btype(s);
1675 
1676     match = gen_disas_label(s);
1677     tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1678                         tcg_cmp, 0, match.label);
1679     gen_goto_tb(s, 0, 4);
1680     set_disas_label(s, match);
1681     gen_goto_tb(s, 1, a->imm);
1682     return true;
1683 }
1684 
1685 static bool trans_B_cond(DisasContext *s, arg_B_cond *a)
1686 {
1687     /* BC.cond is only present with FEAT_HBC */
1688     if (a->c && !dc_isar_feature(aa64_hbc, s)) {
1689         return false;
1690     }
1691     reset_btype(s);
1692     if (a->cond < 0x0e) {
1693         /* genuinely conditional branches */
1694         DisasLabel match = gen_disas_label(s);
1695         arm_gen_test_cc(a->cond, match.label);
1696         gen_goto_tb(s, 0, 4);
1697         set_disas_label(s, match);
1698         gen_goto_tb(s, 1, a->imm);
1699     } else {
1700         /* 0xe and 0xf are both "always" conditions */
1701         gen_goto_tb(s, 0, a->imm);
1702     }
1703     return true;
1704 }
1705 
1706 static void set_btype_for_br(DisasContext *s, int rn)
1707 {
1708     if (dc_isar_feature(aa64_bti, s)) {
1709         /* BR to {x16,x17} or !guard -> 1, else 3.  */
1710         if (rn == 16 || rn == 17) {
1711             set_btype(s, 1);
1712         } else {
1713             TCGv_i64 pc = tcg_temp_new_i64();
1714             gen_pc_plus_diff(s, pc, 0);
1715             gen_helper_guarded_page_br(tcg_env, pc);
1716             s->btype = -1;
1717         }
1718     }
1719 }
1720 
1721 static void set_btype_for_blr(DisasContext *s)
1722 {
1723     if (dc_isar_feature(aa64_bti, s)) {
1724         /* BLR sets BTYPE to 2, regardless of source guarded page.  */
1725         set_btype(s, 2);
1726     }
1727 }
1728 
1729 static bool trans_BR(DisasContext *s, arg_r *a)
1730 {
1731     set_btype_for_br(s, a->rn);
1732     gen_a64_set_pc(s, cpu_reg(s, a->rn));
1733     s->base.is_jmp = DISAS_JUMP;
1734     return true;
1735 }
1736 
1737 static bool trans_BLR(DisasContext *s, arg_r *a)
1738 {
1739     TCGv_i64 dst = cpu_reg(s, a->rn);
1740     TCGv_i64 lr = cpu_reg(s, 30);
1741     if (dst == lr) {
1742         TCGv_i64 tmp = tcg_temp_new_i64();
1743         tcg_gen_mov_i64(tmp, dst);
1744         dst = tmp;
1745     }
1746     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1747     gen_a64_set_pc(s, dst);
1748     set_btype_for_blr(s);
1749     s->base.is_jmp = DISAS_JUMP;
1750     return true;
1751 }
1752 
1753 static bool trans_RET(DisasContext *s, arg_r *a)
1754 {
1755     gen_a64_set_pc(s, cpu_reg(s, a->rn));
1756     s->base.is_jmp = DISAS_JUMP;
1757     return true;
1758 }
1759 
1760 static TCGv_i64 auth_branch_target(DisasContext *s, TCGv_i64 dst,
1761                                    TCGv_i64 modifier, bool use_key_a)
1762 {
1763     TCGv_i64 truedst;
1764     /*
1765      * Return the branch target for a BRAA/RETA/etc, which is either
1766      * just the destination dst, or that value with the pauth check
1767      * done and the code removed from the high bits.
1768      */
1769     if (!s->pauth_active) {
1770         return dst;
1771     }
1772 
1773     truedst = tcg_temp_new_i64();
1774     if (use_key_a) {
1775         gen_helper_autia_combined(truedst, tcg_env, dst, modifier);
1776     } else {
1777         gen_helper_autib_combined(truedst, tcg_env, dst, modifier);
1778     }
1779     return truedst;
1780 }
1781 
1782 static bool trans_BRAZ(DisasContext *s, arg_braz *a)
1783 {
1784     TCGv_i64 dst;
1785 
1786     if (!dc_isar_feature(aa64_pauth, s)) {
1787         return false;
1788     }
1789 
1790     dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1791     set_btype_for_br(s, a->rn);
1792     gen_a64_set_pc(s, dst);
1793     s->base.is_jmp = DISAS_JUMP;
1794     return true;
1795 }
1796 
1797 static bool trans_BLRAZ(DisasContext *s, arg_braz *a)
1798 {
1799     TCGv_i64 dst, lr;
1800 
1801     if (!dc_isar_feature(aa64_pauth, s)) {
1802         return false;
1803     }
1804 
1805     dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1806     lr = cpu_reg(s, 30);
1807     if (dst == lr) {
1808         TCGv_i64 tmp = tcg_temp_new_i64();
1809         tcg_gen_mov_i64(tmp, dst);
1810         dst = tmp;
1811     }
1812     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1813     gen_a64_set_pc(s, dst);
1814     set_btype_for_blr(s);
1815     s->base.is_jmp = DISAS_JUMP;
1816     return true;
1817 }
1818 
1819 static bool trans_RETA(DisasContext *s, arg_reta *a)
1820 {
1821     TCGv_i64 dst;
1822 
1823     dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m);
1824     gen_a64_set_pc(s, dst);
1825     s->base.is_jmp = DISAS_JUMP;
1826     return true;
1827 }
1828 
1829 static bool trans_BRA(DisasContext *s, arg_bra *a)
1830 {
1831     TCGv_i64 dst;
1832 
1833     if (!dc_isar_feature(aa64_pauth, s)) {
1834         return false;
1835     }
1836     dst = auth_branch_target(s, cpu_reg(s,a->rn), cpu_reg_sp(s, a->rm), !a->m);
1837     gen_a64_set_pc(s, dst);
1838     set_btype_for_br(s, a->rn);
1839     s->base.is_jmp = DISAS_JUMP;
1840     return true;
1841 }
1842 
1843 static bool trans_BLRA(DisasContext *s, arg_bra *a)
1844 {
1845     TCGv_i64 dst, lr;
1846 
1847     if (!dc_isar_feature(aa64_pauth, s)) {
1848         return false;
1849     }
1850     dst = auth_branch_target(s, cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm), !a->m);
1851     lr = cpu_reg(s, 30);
1852     if (dst == lr) {
1853         TCGv_i64 tmp = tcg_temp_new_i64();
1854         tcg_gen_mov_i64(tmp, dst);
1855         dst = tmp;
1856     }
1857     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1858     gen_a64_set_pc(s, dst);
1859     set_btype_for_blr(s);
1860     s->base.is_jmp = DISAS_JUMP;
1861     return true;
1862 }
1863 
1864 static bool trans_ERET(DisasContext *s, arg_ERET *a)
1865 {
1866     TCGv_i64 dst;
1867 
1868     if (s->current_el == 0) {
1869         return false;
1870     }
1871     if (s->trap_eret) {
1872         gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(0), 2);
1873         return true;
1874     }
1875     dst = tcg_temp_new_i64();
1876     tcg_gen_ld_i64(dst, tcg_env,
1877                    offsetof(CPUARMState, elr_el[s->current_el]));
1878 
1879     translator_io_start(&s->base);
1880 
1881     gen_helper_exception_return(tcg_env, dst);
1882     /* Must exit loop to check un-masked IRQs */
1883     s->base.is_jmp = DISAS_EXIT;
1884     return true;
1885 }
1886 
1887 static bool trans_ERETA(DisasContext *s, arg_reta *a)
1888 {
1889     TCGv_i64 dst;
1890 
1891     if (!dc_isar_feature(aa64_pauth, s)) {
1892         return false;
1893     }
1894     if (s->current_el == 0) {
1895         return false;
1896     }
1897     /* The FGT trap takes precedence over an auth trap. */
1898     if (s->trap_eret) {
1899         gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(a->m ? 3 : 2), 2);
1900         return true;
1901     }
1902     dst = tcg_temp_new_i64();
1903     tcg_gen_ld_i64(dst, tcg_env,
1904                    offsetof(CPUARMState, elr_el[s->current_el]));
1905 
1906     dst = auth_branch_target(s, dst, cpu_X[31], !a->m);
1907 
1908     translator_io_start(&s->base);
1909 
1910     gen_helper_exception_return(tcg_env, dst);
1911     /* Must exit loop to check un-masked IRQs */
1912     s->base.is_jmp = DISAS_EXIT;
1913     return true;
1914 }
1915 
1916 static bool trans_NOP(DisasContext *s, arg_NOP *a)
1917 {
1918     return true;
1919 }
1920 
1921 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
1922 {
1923     /*
1924      * When running in MTTCG we don't generate jumps to the yield and
1925      * WFE helpers as it won't affect the scheduling of other vCPUs.
1926      * If we wanted to more completely model WFE/SEV so we don't busy
1927      * spin unnecessarily we would need to do something more involved.
1928      */
1929     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1930         s->base.is_jmp = DISAS_YIELD;
1931     }
1932     return true;
1933 }
1934 
1935 static bool trans_WFI(DisasContext *s, arg_WFI *a)
1936 {
1937     s->base.is_jmp = DISAS_WFI;
1938     return true;
1939 }
1940 
1941 static bool trans_WFE(DisasContext *s, arg_WFI *a)
1942 {
1943     /*
1944      * When running in MTTCG we don't generate jumps to the yield and
1945      * WFE helpers as it won't affect the scheduling of other vCPUs.
1946      * If we wanted to more completely model WFE/SEV so we don't busy
1947      * spin unnecessarily we would need to do something more involved.
1948      */
1949     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1950         s->base.is_jmp = DISAS_WFE;
1951     }
1952     return true;
1953 }
1954 
1955 static bool trans_WFIT(DisasContext *s, arg_WFIT *a)
1956 {
1957     if (!dc_isar_feature(aa64_wfxt, s)) {
1958         return false;
1959     }
1960 
1961     /*
1962      * Because we need to pass the register value to the helper,
1963      * it's easier to emit the code now, unlike trans_WFI which
1964      * defers it to aarch64_tr_tb_stop(). That means we need to
1965      * check ss_active so that single-stepping a WFIT doesn't halt.
1966      */
1967     if (s->ss_active) {
1968         /* Act like a NOP under architectural singlestep */
1969         return true;
1970     }
1971 
1972     gen_a64_update_pc(s, 4);
1973     gen_helper_wfit(tcg_env, cpu_reg(s, a->rd));
1974     /* Go back to the main loop to check for interrupts */
1975     s->base.is_jmp = DISAS_EXIT;
1976     return true;
1977 }
1978 
1979 static bool trans_WFET(DisasContext *s, arg_WFET *a)
1980 {
1981     if (!dc_isar_feature(aa64_wfxt, s)) {
1982         return false;
1983     }
1984 
1985     /*
1986      * We rely here on our WFE implementation being a NOP, so we
1987      * don't need to do anything different to handle the WFET timeout
1988      * from what trans_WFE does.
1989      */
1990     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1991         s->base.is_jmp = DISAS_WFE;
1992     }
1993     return true;
1994 }
1995 
1996 static bool trans_XPACLRI(DisasContext *s, arg_XPACLRI *a)
1997 {
1998     if (s->pauth_active) {
1999         gen_helper_xpaci(cpu_X[30], tcg_env, cpu_X[30]);
2000     }
2001     return true;
2002 }
2003 
2004 static bool trans_PACIA1716(DisasContext *s, arg_PACIA1716 *a)
2005 {
2006     if (s->pauth_active) {
2007         gen_helper_pacia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
2008     }
2009     return true;
2010 }
2011 
2012 static bool trans_PACIB1716(DisasContext *s, arg_PACIB1716 *a)
2013 {
2014     if (s->pauth_active) {
2015         gen_helper_pacib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
2016     }
2017     return true;
2018 }
2019 
2020 static bool trans_AUTIA1716(DisasContext *s, arg_AUTIA1716 *a)
2021 {
2022     if (s->pauth_active) {
2023         gen_helper_autia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
2024     }
2025     return true;
2026 }
2027 
2028 static bool trans_AUTIB1716(DisasContext *s, arg_AUTIB1716 *a)
2029 {
2030     if (s->pauth_active) {
2031         gen_helper_autib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
2032     }
2033     return true;
2034 }
2035 
2036 static bool trans_ESB(DisasContext *s, arg_ESB *a)
2037 {
2038     /* Without RAS, we must implement this as NOP. */
2039     if (dc_isar_feature(aa64_ras, s)) {
2040         /*
2041          * QEMU does not have a source of physical SErrors,
2042          * so we are only concerned with virtual SErrors.
2043          * The pseudocode in the ARM for this case is
2044          *   if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
2045          *      AArch64.vESBOperation();
2046          * Most of the condition can be evaluated at translation time.
2047          * Test for EL2 present, and defer test for SEL2 to runtime.
2048          */
2049         if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
2050             gen_helper_vesb(tcg_env);
2051         }
2052     }
2053     return true;
2054 }
2055 
2056 static bool trans_PACIAZ(DisasContext *s, arg_PACIAZ *a)
2057 {
2058     if (s->pauth_active) {
2059         gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
2060     }
2061     return true;
2062 }
2063 
2064 static bool trans_PACIASP(DisasContext *s, arg_PACIASP *a)
2065 {
2066     if (s->pauth_active) {
2067         gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
2068     }
2069     return true;
2070 }
2071 
2072 static bool trans_PACIBZ(DisasContext *s, arg_PACIBZ *a)
2073 {
2074     if (s->pauth_active) {
2075         gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
2076     }
2077     return true;
2078 }
2079 
2080 static bool trans_PACIBSP(DisasContext *s, arg_PACIBSP *a)
2081 {
2082     if (s->pauth_active) {
2083         gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
2084     }
2085     return true;
2086 }
2087 
2088 static bool trans_AUTIAZ(DisasContext *s, arg_AUTIAZ *a)
2089 {
2090     if (s->pauth_active) {
2091         gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
2092     }
2093     return true;
2094 }
2095 
2096 static bool trans_AUTIASP(DisasContext *s, arg_AUTIASP *a)
2097 {
2098     if (s->pauth_active) {
2099         gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
2100     }
2101     return true;
2102 }
2103 
2104 static bool trans_AUTIBZ(DisasContext *s, arg_AUTIBZ *a)
2105 {
2106     if (s->pauth_active) {
2107         gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
2108     }
2109     return true;
2110 }
2111 
2112 static bool trans_AUTIBSP(DisasContext *s, arg_AUTIBSP *a)
2113 {
2114     if (s->pauth_active) {
2115         gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
2116     }
2117     return true;
2118 }
2119 
2120 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
2121 {
2122     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
2123     return true;
2124 }
2125 
2126 static bool trans_DSB_DMB(DisasContext *s, arg_DSB_DMB *a)
2127 {
2128     /* We handle DSB and DMB the same way */
2129     TCGBar bar;
2130 
2131     switch (a->types) {
2132     case 1: /* MBReqTypes_Reads */
2133         bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
2134         break;
2135     case 2: /* MBReqTypes_Writes */
2136         bar = TCG_BAR_SC | TCG_MO_ST_ST;
2137         break;
2138     default: /* MBReqTypes_All */
2139         bar = TCG_BAR_SC | TCG_MO_ALL;
2140         break;
2141     }
2142     tcg_gen_mb(bar);
2143     return true;
2144 }
2145 
2146 static bool trans_DSB_nXS(DisasContext *s, arg_DSB_nXS *a)
2147 {
2148     if (!dc_isar_feature(aa64_xs, s)) {
2149         return false;
2150     }
2151     tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL);
2152     return true;
2153 }
2154 
2155 static bool trans_ISB(DisasContext *s, arg_ISB *a)
2156 {
2157     /*
2158      * We need to break the TB after this insn to execute
2159      * self-modifying code correctly and also to take
2160      * any pending interrupts immediately.
2161      */
2162     reset_btype(s);
2163     gen_goto_tb(s, 0, 4);
2164     return true;
2165 }
2166 
2167 static bool trans_SB(DisasContext *s, arg_SB *a)
2168 {
2169     if (!dc_isar_feature(aa64_sb, s)) {
2170         return false;
2171     }
2172     /*
2173      * TODO: There is no speculation barrier opcode for TCG;
2174      * MB and end the TB instead.
2175      */
2176     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
2177     gen_goto_tb(s, 0, 4);
2178     return true;
2179 }
2180 
2181 static bool trans_CFINV(DisasContext *s, arg_CFINV *a)
2182 {
2183     if (!dc_isar_feature(aa64_condm_4, s)) {
2184         return false;
2185     }
2186     tcg_gen_xori_i32(cpu_CF, cpu_CF, 1);
2187     return true;
2188 }
2189 
2190 static bool trans_XAFLAG(DisasContext *s, arg_XAFLAG *a)
2191 {
2192     TCGv_i32 z;
2193 
2194     if (!dc_isar_feature(aa64_condm_5, s)) {
2195         return false;
2196     }
2197 
2198     z = tcg_temp_new_i32();
2199 
2200     tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0);
2201 
2202     /*
2203      * (!C & !Z) << 31
2204      * (!(C | Z)) << 31
2205      * ~((C | Z) << 31)
2206      * ~-(C | Z)
2207      * (C | Z) - 1
2208      */
2209     tcg_gen_or_i32(cpu_NF, cpu_CF, z);
2210     tcg_gen_subi_i32(cpu_NF, cpu_NF, 1);
2211 
2212     /* !(Z & C) */
2213     tcg_gen_and_i32(cpu_ZF, z, cpu_CF);
2214     tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1);
2215 
2216     /* (!C & Z) << 31 -> -(Z & ~C) */
2217     tcg_gen_andc_i32(cpu_VF, z, cpu_CF);
2218     tcg_gen_neg_i32(cpu_VF, cpu_VF);
2219 
2220     /* C | Z */
2221     tcg_gen_or_i32(cpu_CF, cpu_CF, z);
2222 
2223     return true;
2224 }
2225 
2226 static bool trans_AXFLAG(DisasContext *s, arg_AXFLAG *a)
2227 {
2228     if (!dc_isar_feature(aa64_condm_5, s)) {
2229         return false;
2230     }
2231 
2232     tcg_gen_sari_i32(cpu_VF, cpu_VF, 31);         /* V ? -1 : 0 */
2233     tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF);     /* C & !V */
2234 
2235     /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */
2236     tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF);
2237 
2238     tcg_gen_movi_i32(cpu_NF, 0);
2239     tcg_gen_movi_i32(cpu_VF, 0);
2240 
2241     return true;
2242 }
2243 
2244 static bool trans_MSR_i_UAO(DisasContext *s, arg_i *a)
2245 {
2246     if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) {
2247         return false;
2248     }
2249     if (a->imm & 1) {
2250         set_pstate_bits(PSTATE_UAO);
2251     } else {
2252         clear_pstate_bits(PSTATE_UAO);
2253     }
2254     gen_rebuild_hflags(s);
2255     s->base.is_jmp = DISAS_TOO_MANY;
2256     return true;
2257 }
2258 
2259 static bool trans_MSR_i_PAN(DisasContext *s, arg_i *a)
2260 {
2261     if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) {
2262         return false;
2263     }
2264     if (a->imm & 1) {
2265         set_pstate_bits(PSTATE_PAN);
2266     } else {
2267         clear_pstate_bits(PSTATE_PAN);
2268     }
2269     gen_rebuild_hflags(s);
2270     s->base.is_jmp = DISAS_TOO_MANY;
2271     return true;
2272 }
2273 
2274 static bool trans_MSR_i_SPSEL(DisasContext *s, arg_i *a)
2275 {
2276     if (s->current_el == 0) {
2277         return false;
2278     }
2279     gen_helper_msr_i_spsel(tcg_env, tcg_constant_i32(a->imm & PSTATE_SP));
2280     s->base.is_jmp = DISAS_TOO_MANY;
2281     return true;
2282 }
2283 
2284 static bool trans_MSR_i_SBSS(DisasContext *s, arg_i *a)
2285 {
2286     if (!dc_isar_feature(aa64_ssbs, s)) {
2287         return false;
2288     }
2289     if (a->imm & 1) {
2290         set_pstate_bits(PSTATE_SSBS);
2291     } else {
2292         clear_pstate_bits(PSTATE_SSBS);
2293     }
2294     /* Don't need to rebuild hflags since SSBS is a nop */
2295     s->base.is_jmp = DISAS_TOO_MANY;
2296     return true;
2297 }
2298 
2299 static bool trans_MSR_i_DIT(DisasContext *s, arg_i *a)
2300 {
2301     if (!dc_isar_feature(aa64_dit, s)) {
2302         return false;
2303     }
2304     if (a->imm & 1) {
2305         set_pstate_bits(PSTATE_DIT);
2306     } else {
2307         clear_pstate_bits(PSTATE_DIT);
2308     }
2309     /* There's no need to rebuild hflags because DIT is a nop */
2310     s->base.is_jmp = DISAS_TOO_MANY;
2311     return true;
2312 }
2313 
2314 static bool trans_MSR_i_TCO(DisasContext *s, arg_i *a)
2315 {
2316     if (dc_isar_feature(aa64_mte, s)) {
2317         /* Full MTE is enabled -- set the TCO bit as directed. */
2318         if (a->imm & 1) {
2319             set_pstate_bits(PSTATE_TCO);
2320         } else {
2321             clear_pstate_bits(PSTATE_TCO);
2322         }
2323         gen_rebuild_hflags(s);
2324         /* Many factors, including TCO, go into MTE_ACTIVE. */
2325         s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
2326         return true;
2327     } else if (dc_isar_feature(aa64_mte_insn_reg, s)) {
2328         /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI.  */
2329         return true;
2330     } else {
2331         /* Insn not present */
2332         return false;
2333     }
2334 }
2335 
2336 static bool trans_MSR_i_DAIFSET(DisasContext *s, arg_i *a)
2337 {
2338     gen_helper_msr_i_daifset(tcg_env, tcg_constant_i32(a->imm));
2339     s->base.is_jmp = DISAS_TOO_MANY;
2340     return true;
2341 }
2342 
2343 static bool trans_MSR_i_DAIFCLEAR(DisasContext *s, arg_i *a)
2344 {
2345     gen_helper_msr_i_daifclear(tcg_env, tcg_constant_i32(a->imm));
2346     /* Exit the cpu loop to re-evaluate pending IRQs. */
2347     s->base.is_jmp = DISAS_UPDATE_EXIT;
2348     return true;
2349 }
2350 
2351 static bool trans_MSR_i_ALLINT(DisasContext *s, arg_i *a)
2352 {
2353     if (!dc_isar_feature(aa64_nmi, s) || s->current_el == 0) {
2354         return false;
2355     }
2356 
2357     if (a->imm == 0) {
2358         clear_pstate_bits(PSTATE_ALLINT);
2359     } else if (s->current_el > 1) {
2360         set_pstate_bits(PSTATE_ALLINT);
2361     } else {
2362         gen_helper_msr_set_allint_el1(tcg_env);
2363     }
2364 
2365     /* Exit the cpu loop to re-evaluate pending IRQs. */
2366     s->base.is_jmp = DISAS_UPDATE_EXIT;
2367     return true;
2368 }
2369 
2370 static bool trans_MSR_i_SVCR(DisasContext *s, arg_MSR_i_SVCR *a)
2371 {
2372     if (!dc_isar_feature(aa64_sme, s) || a->mask == 0) {
2373         return false;
2374     }
2375     if (sme_access_check(s)) {
2376         int old = s->pstate_sm | (s->pstate_za << 1);
2377         int new = a->imm * 3;
2378 
2379         if ((old ^ new) & a->mask) {
2380             /* At least one bit changes. */
2381             gen_helper_set_svcr(tcg_env, tcg_constant_i32(new),
2382                                 tcg_constant_i32(a->mask));
2383             s->base.is_jmp = DISAS_TOO_MANY;
2384         }
2385     }
2386     return true;
2387 }
2388 
2389 static void gen_get_nzcv(TCGv_i64 tcg_rt)
2390 {
2391     TCGv_i32 tmp = tcg_temp_new_i32();
2392     TCGv_i32 nzcv = tcg_temp_new_i32();
2393 
2394     /* build bit 31, N */
2395     tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
2396     /* build bit 30, Z */
2397     tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
2398     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
2399     /* build bit 29, C */
2400     tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
2401     /* build bit 28, V */
2402     tcg_gen_shri_i32(tmp, cpu_VF, 31);
2403     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
2404     /* generate result */
2405     tcg_gen_extu_i32_i64(tcg_rt, nzcv);
2406 }
2407 
2408 static void gen_set_nzcv(TCGv_i64 tcg_rt)
2409 {
2410     TCGv_i32 nzcv = tcg_temp_new_i32();
2411 
2412     /* take NZCV from R[t] */
2413     tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
2414 
2415     /* bit 31, N */
2416     tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
2417     /* bit 30, Z */
2418     tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
2419     tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
2420     /* bit 29, C */
2421     tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
2422     tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
2423     /* bit 28, V */
2424     tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
2425     tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
2426 }
2427 
2428 static void gen_sysreg_undef(DisasContext *s, bool isread,
2429                              uint8_t op0, uint8_t op1, uint8_t op2,
2430                              uint8_t crn, uint8_t crm, uint8_t rt)
2431 {
2432     /*
2433      * Generate code to emit an UNDEF with correct syndrome
2434      * information for a failed system register access.
2435      * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases,
2436      * but if FEAT_IDST is implemented then read accesses to registers
2437      * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP
2438      * syndrome.
2439      */
2440     uint32_t syndrome;
2441 
2442     if (isread && dc_isar_feature(aa64_ids, s) &&
2443         arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) {
2444         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2445     } else {
2446         syndrome = syn_uncategorized();
2447     }
2448     gen_exception_insn(s, 0, EXCP_UDEF, syndrome);
2449 }
2450 
2451 /* MRS - move from system register
2452  * MSR (register) - move to system register
2453  * SYS
2454  * SYSL
2455  * These are all essentially the same insn in 'read' and 'write'
2456  * versions, with varying op0 fields.
2457  */
2458 static void handle_sys(DisasContext *s, bool isread,
2459                        unsigned int op0, unsigned int op1, unsigned int op2,
2460                        unsigned int crn, unsigned int crm, unsigned int rt)
2461 {
2462     uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
2463                                       crn, crm, op0, op1, op2);
2464     const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
2465     bool need_exit_tb = false;
2466     bool nv_trap_to_el2 = false;
2467     bool nv_redirect_reg = false;
2468     bool skip_fp_access_checks = false;
2469     bool nv2_mem_redirect = false;
2470     TCGv_ptr tcg_ri = NULL;
2471     TCGv_i64 tcg_rt;
2472     uint32_t syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2473 
2474     if (crn == 11 || crn == 15) {
2475         /*
2476          * Check for TIDCP trap, which must take precedence over
2477          * the UNDEF for "no such register" etc.
2478          */
2479         switch (s->current_el) {
2480         case 0:
2481             if (dc_isar_feature(aa64_tidcp1, s)) {
2482                 gen_helper_tidcp_el0(tcg_env, tcg_constant_i32(syndrome));
2483             }
2484             break;
2485         case 1:
2486             gen_helper_tidcp_el1(tcg_env, tcg_constant_i32(syndrome));
2487             break;
2488         }
2489     }
2490 
2491     if (!ri) {
2492         /* Unknown register; this might be a guest error or a QEMU
2493          * unimplemented feature.
2494          */
2495         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
2496                       "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
2497                       isread ? "read" : "write", op0, op1, crn, crm, op2);
2498         gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2499         return;
2500     }
2501 
2502     if (s->nv2 && ri->nv2_redirect_offset) {
2503         /*
2504          * Some registers always redirect to memory; some only do so if
2505          * HCR_EL2.NV1 is 0, and some only if NV1 is 1 (these come in
2506          * pairs which share an offset; see the table in R_CSRPQ).
2507          */
2508         if (ri->nv2_redirect_offset & NV2_REDIR_NV1) {
2509             nv2_mem_redirect = s->nv1;
2510         } else if (ri->nv2_redirect_offset & NV2_REDIR_NO_NV1) {
2511             nv2_mem_redirect = !s->nv1;
2512         } else {
2513             nv2_mem_redirect = true;
2514         }
2515     }
2516 
2517     /* Check access permissions */
2518     if (!cp_access_ok(s->current_el, ri, isread)) {
2519         /*
2520          * FEAT_NV/NV2 handling does not do the usual FP access checks
2521          * for registers only accessible at EL2 (though it *does* do them
2522          * for registers accessible at EL1).
2523          */
2524         skip_fp_access_checks = true;
2525         if (s->nv2 && (ri->type & ARM_CP_NV2_REDIRECT)) {
2526             /*
2527              * This is one of the few EL2 registers which should redirect
2528              * to the equivalent EL1 register. We do that after running
2529              * the EL2 register's accessfn.
2530              */
2531             nv_redirect_reg = true;
2532             assert(!nv2_mem_redirect);
2533         } else if (nv2_mem_redirect) {
2534             /*
2535              * NV2 redirect-to-memory takes precedence over trap to EL2 or
2536              * UNDEF to EL1.
2537              */
2538         } else if (s->nv && arm_cpreg_traps_in_nv(ri)) {
2539             /*
2540              * This register / instruction exists and is an EL2 register, so
2541              * we must trap to EL2 if accessed in nested virtualization EL1
2542              * instead of UNDEFing. We'll do that after the usual access checks.
2543              * (This makes a difference only for a couple of registers like
2544              * VSTTBR_EL2 where the "UNDEF if NonSecure" should take priority
2545              * over the trap-to-EL2. Most trapped-by-FEAT_NV registers have
2546              * an accessfn which does nothing when called from EL1, because
2547              * the trap-to-EL3 controls which would apply to that register
2548              * at EL2 don't take priority over the FEAT_NV trap-to-EL2.)
2549              */
2550             nv_trap_to_el2 = true;
2551         } else {
2552             gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2553             return;
2554         }
2555     }
2556 
2557     if (ri->accessfn || (ri->fgt && s->fgt_active)) {
2558         /* Emit code to perform further access permissions checks at
2559          * runtime; this may result in an exception.
2560          */
2561         gen_a64_update_pc(s, 0);
2562         tcg_ri = tcg_temp_new_ptr();
2563         gen_helper_access_check_cp_reg(tcg_ri, tcg_env,
2564                                        tcg_constant_i32(key),
2565                                        tcg_constant_i32(syndrome),
2566                                        tcg_constant_i32(isread));
2567     } else if (ri->type & ARM_CP_RAISES_EXC) {
2568         /*
2569          * The readfn or writefn might raise an exception;
2570          * synchronize the CPU state in case it does.
2571          */
2572         gen_a64_update_pc(s, 0);
2573     }
2574 
2575     if (!skip_fp_access_checks) {
2576         if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) {
2577             return;
2578         } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
2579             return;
2580         } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) {
2581             return;
2582         }
2583     }
2584 
2585     if (nv_trap_to_el2) {
2586         gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
2587         return;
2588     }
2589 
2590     if (nv_redirect_reg) {
2591         /*
2592          * FEAT_NV2 redirection of an EL2 register to an EL1 register.
2593          * Conveniently in all cases the encoding of the EL1 register is
2594          * identical to the EL2 register except that opc1 is 0.
2595          * Get the reginfo for the EL1 register to use for the actual access.
2596          * We don't use the EL1 register's access function, and
2597          * fine-grained-traps on EL1 also do not apply here.
2598          */
2599         key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
2600                                  crn, crm, op0, 0, op2);
2601         ri = get_arm_cp_reginfo(s->cp_regs, key);
2602         assert(ri);
2603         assert(cp_access_ok(s->current_el, ri, isread));
2604         /*
2605          * We might not have done an update_pc earlier, so check we don't
2606          * need it. We could support this in future if necessary.
2607          */
2608         assert(!(ri->type & ARM_CP_RAISES_EXC));
2609     }
2610 
2611     if (nv2_mem_redirect) {
2612         /*
2613          * This system register is being redirected into an EL2 memory access.
2614          * This means it is not an IO operation, doesn't change hflags,
2615          * and need not end the TB, because it has no side effects.
2616          *
2617          * The access is 64-bit single copy atomic, guaranteed aligned because
2618          * of the definition of VCNR_EL2. Its endianness depends on
2619          * SCTLR_EL2.EE, not on the data endianness of EL1.
2620          * It is done under either the EL2 translation regime or the EL2&0
2621          * translation regime, depending on HCR_EL2.E2H. It behaves as if
2622          * PSTATE.PAN is 0.
2623          */
2624         TCGv_i64 ptr = tcg_temp_new_i64();
2625         MemOp mop = MO_64 | MO_ALIGN | MO_ATOM_IFALIGN;
2626         ARMMMUIdx armmemidx = s->nv2_mem_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_E2;
2627         int memidx = arm_to_core_mmu_idx(armmemidx);
2628         uint32_t syn;
2629 
2630         mop |= (s->nv2_mem_be ? MO_BE : MO_LE);
2631 
2632         tcg_gen_ld_i64(ptr, tcg_env, offsetof(CPUARMState, cp15.vncr_el2));
2633         tcg_gen_addi_i64(ptr, ptr,
2634                          (ri->nv2_redirect_offset & ~NV2_REDIR_FLAG_MASK));
2635         tcg_rt = cpu_reg(s, rt);
2636 
2637         syn = syn_data_abort_vncr(0, !isread, 0);
2638         disas_set_insn_syndrome(s, syn);
2639         if (isread) {
2640             tcg_gen_qemu_ld_i64(tcg_rt, ptr, memidx, mop);
2641         } else {
2642             tcg_gen_qemu_st_i64(tcg_rt, ptr, memidx, mop);
2643         }
2644         return;
2645     }
2646 
2647     /* Handle special cases first */
2648     switch (ri->type & ARM_CP_SPECIAL_MASK) {
2649     case 0:
2650         break;
2651     case ARM_CP_NOP:
2652         return;
2653     case ARM_CP_NZCV:
2654         tcg_rt = cpu_reg(s, rt);
2655         if (isread) {
2656             gen_get_nzcv(tcg_rt);
2657         } else {
2658             gen_set_nzcv(tcg_rt);
2659         }
2660         return;
2661     case ARM_CP_CURRENTEL:
2662     {
2663         /*
2664          * Reads as current EL value from pstate, which is
2665          * guaranteed to be constant by the tb flags.
2666          * For nested virt we should report EL2.
2667          */
2668         int el = s->nv ? 2 : s->current_el;
2669         tcg_rt = cpu_reg(s, rt);
2670         tcg_gen_movi_i64(tcg_rt, el << 2);
2671         return;
2672     }
2673     case ARM_CP_DC_ZVA:
2674         /* Writes clear the aligned block of memory which rt points into. */
2675         if (s->mte_active[0]) {
2676             int desc = 0;
2677 
2678             desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
2679             desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
2680             desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
2681 
2682             tcg_rt = tcg_temp_new_i64();
2683             gen_helper_mte_check_zva(tcg_rt, tcg_env,
2684                                      tcg_constant_i32(desc), cpu_reg(s, rt));
2685         } else {
2686             tcg_rt = clean_data_tbi(s, cpu_reg(s, rt));
2687         }
2688         gen_helper_dc_zva(tcg_env, tcg_rt);
2689         return;
2690     case ARM_CP_DC_GVA:
2691         {
2692             TCGv_i64 clean_addr, tag;
2693 
2694             /*
2695              * DC_GVA, like DC_ZVA, requires that we supply the original
2696              * pointer for an invalid page.  Probe that address first.
2697              */
2698             tcg_rt = cpu_reg(s, rt);
2699             clean_addr = clean_data_tbi(s, tcg_rt);
2700             gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8);
2701 
2702             if (s->ata[0]) {
2703                 /* Extract the tag from the register to match STZGM.  */
2704                 tag = tcg_temp_new_i64();
2705                 tcg_gen_shri_i64(tag, tcg_rt, 56);
2706                 gen_helper_stzgm_tags(tcg_env, clean_addr, tag);
2707             }
2708         }
2709         return;
2710     case ARM_CP_DC_GZVA:
2711         {
2712             TCGv_i64 clean_addr, tag;
2713 
2714             /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */
2715             tcg_rt = cpu_reg(s, rt);
2716             clean_addr = clean_data_tbi(s, tcg_rt);
2717             gen_helper_dc_zva(tcg_env, clean_addr);
2718 
2719             if (s->ata[0]) {
2720                 /* Extract the tag from the register to match STZGM.  */
2721                 tag = tcg_temp_new_i64();
2722                 tcg_gen_shri_i64(tag, tcg_rt, 56);
2723                 gen_helper_stzgm_tags(tcg_env, clean_addr, tag);
2724             }
2725         }
2726         return;
2727     default:
2728         g_assert_not_reached();
2729     }
2730 
2731     if (ri->type & ARM_CP_IO) {
2732         /* I/O operations must end the TB here (whether read or write) */
2733         need_exit_tb = translator_io_start(&s->base);
2734     }
2735 
2736     tcg_rt = cpu_reg(s, rt);
2737 
2738     if (isread) {
2739         if (ri->type & ARM_CP_CONST) {
2740             tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
2741         } else if (ri->readfn) {
2742             if (!tcg_ri) {
2743                 tcg_ri = gen_lookup_cp_reg(key);
2744             }
2745             gen_helper_get_cp_reg64(tcg_rt, tcg_env, tcg_ri);
2746         } else {
2747             tcg_gen_ld_i64(tcg_rt, tcg_env, ri->fieldoffset);
2748         }
2749     } else {
2750         if (ri->type & ARM_CP_CONST) {
2751             /* If not forbidden by access permissions, treat as WI */
2752             return;
2753         } else if (ri->writefn) {
2754             if (!tcg_ri) {
2755                 tcg_ri = gen_lookup_cp_reg(key);
2756             }
2757             gen_helper_set_cp_reg64(tcg_env, tcg_ri, tcg_rt);
2758         } else {
2759             tcg_gen_st_i64(tcg_rt, tcg_env, ri->fieldoffset);
2760         }
2761     }
2762 
2763     if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
2764         /*
2765          * A write to any coprocessor register that ends a TB
2766          * must rebuild the hflags for the next TB.
2767          */
2768         gen_rebuild_hflags(s);
2769         /*
2770          * We default to ending the TB on a coprocessor register write,
2771          * but allow this to be suppressed by the register definition
2772          * (usually only necessary to work around guest bugs).
2773          */
2774         need_exit_tb = true;
2775     }
2776     if (need_exit_tb) {
2777         s->base.is_jmp = DISAS_UPDATE_EXIT;
2778     }
2779 }
2780 
2781 static bool trans_SYS(DisasContext *s, arg_SYS *a)
2782 {
2783     handle_sys(s, a->l, a->op0, a->op1, a->op2, a->crn, a->crm, a->rt);
2784     return true;
2785 }
2786 
2787 static bool trans_SVC(DisasContext *s, arg_i *a)
2788 {
2789     /*
2790      * For SVC, HVC and SMC we advance the single-step state
2791      * machine before taking the exception. This is architecturally
2792      * mandated, to ensure that single-stepping a system call
2793      * instruction works properly.
2794      */
2795     uint32_t syndrome = syn_aa64_svc(a->imm);
2796     if (s->fgt_svc) {
2797         gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
2798         return true;
2799     }
2800     gen_ss_advance(s);
2801     gen_exception_insn(s, 4, EXCP_SWI, syndrome);
2802     return true;
2803 }
2804 
2805 static bool trans_HVC(DisasContext *s, arg_i *a)
2806 {
2807     int target_el = s->current_el == 3 ? 3 : 2;
2808 
2809     if (s->current_el == 0) {
2810         unallocated_encoding(s);
2811         return true;
2812     }
2813     /*
2814      * The pre HVC helper handles cases when HVC gets trapped
2815      * as an undefined insn by runtime configuration.
2816      */
2817     gen_a64_update_pc(s, 0);
2818     gen_helper_pre_hvc(tcg_env);
2819     /* Architecture requires ss advance before we do the actual work */
2820     gen_ss_advance(s);
2821     gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(a->imm), target_el);
2822     return true;
2823 }
2824 
2825 static bool trans_SMC(DisasContext *s, arg_i *a)
2826 {
2827     if (s->current_el == 0) {
2828         unallocated_encoding(s);
2829         return true;
2830     }
2831     gen_a64_update_pc(s, 0);
2832     gen_helper_pre_smc(tcg_env, tcg_constant_i32(syn_aa64_smc(a->imm)));
2833     /* Architecture requires ss advance before we do the actual work */
2834     gen_ss_advance(s);
2835     gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(a->imm), 3);
2836     return true;
2837 }
2838 
2839 static bool trans_BRK(DisasContext *s, arg_i *a)
2840 {
2841     gen_exception_bkpt_insn(s, syn_aa64_bkpt(a->imm));
2842     return true;
2843 }
2844 
2845 static bool trans_HLT(DisasContext *s, arg_i *a)
2846 {
2847     /*
2848      * HLT. This has two purposes.
2849      * Architecturally, it is an external halting debug instruction.
2850      * Since QEMU doesn't implement external debug, we treat this as
2851      * it is required for halting debug disabled: it will UNDEF.
2852      * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
2853      */
2854     if (semihosting_enabled(s->current_el == 0) && a->imm == 0xf000) {
2855         gen_exception_internal_insn(s, EXCP_SEMIHOST);
2856     } else {
2857         unallocated_encoding(s);
2858     }
2859     return true;
2860 }
2861 
2862 /*
2863  * Load/Store exclusive instructions are implemented by remembering
2864  * the value/address loaded, and seeing if these are the same
2865  * when the store is performed. This is not actually the architecturally
2866  * mandated semantics, but it works for typical guest code sequences
2867  * and avoids having to monitor regular stores.
2868  *
2869  * The store exclusive uses the atomic cmpxchg primitives to avoid
2870  * races in multi-threaded linux-user and when MTTCG softmmu is
2871  * enabled.
2872  */
2873 static void gen_load_exclusive(DisasContext *s, int rt, int rt2, int rn,
2874                                int size, bool is_pair)
2875 {
2876     int idx = get_mem_index(s);
2877     TCGv_i64 dirty_addr, clean_addr;
2878     MemOp memop = check_atomic_align(s, rn, size + is_pair);
2879 
2880     s->is_ldex = true;
2881     dirty_addr = cpu_reg_sp(s, rn);
2882     clean_addr = gen_mte_check1(s, dirty_addr, false, rn != 31, memop);
2883 
2884     g_assert(size <= 3);
2885     if (is_pair) {
2886         g_assert(size >= 2);
2887         if (size == 2) {
2888             tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
2889             if (s->be_data == MO_LE) {
2890                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
2891                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
2892             } else {
2893                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
2894                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
2895             }
2896         } else {
2897             TCGv_i128 t16 = tcg_temp_new_i128();
2898 
2899             tcg_gen_qemu_ld_i128(t16, clean_addr, idx, memop);
2900 
2901             if (s->be_data == MO_LE) {
2902                 tcg_gen_extr_i128_i64(cpu_exclusive_val,
2903                                       cpu_exclusive_high, t16);
2904             } else {
2905                 tcg_gen_extr_i128_i64(cpu_exclusive_high,
2906                                       cpu_exclusive_val, t16);
2907             }
2908             tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2909             tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
2910         }
2911     } else {
2912         tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
2913         tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2914     }
2915     tcg_gen_mov_i64(cpu_exclusive_addr, clean_addr);
2916 }
2917 
2918 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
2919                                 int rn, int size, int is_pair)
2920 {
2921     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
2922      *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
2923      *     [addr] = {Rt};
2924      *     if (is_pair) {
2925      *         [addr + datasize] = {Rt2};
2926      *     }
2927      *     {Rd} = 0;
2928      * } else {
2929      *     {Rd} = 1;
2930      * }
2931      * env->exclusive_addr = -1;
2932      */
2933     TCGLabel *fail_label = gen_new_label();
2934     TCGLabel *done_label = gen_new_label();
2935     TCGv_i64 tmp, clean_addr;
2936     MemOp memop;
2937 
2938     /*
2939      * FIXME: We are out of spec here.  We have recorded only the address
2940      * from load_exclusive, not the entire range, and we assume that the
2941      * size of the access on both sides match.  The architecture allows the
2942      * store to be smaller than the load, so long as the stored bytes are
2943      * within the range recorded by the load.
2944      */
2945 
2946     /* See AArch64.ExclusiveMonitorsPass() and AArch64.IsExclusiveVA(). */
2947     clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2948     tcg_gen_brcond_i64(TCG_COND_NE, clean_addr, cpu_exclusive_addr, fail_label);
2949 
2950     /*
2951      * The write, and any associated faults, only happen if the virtual
2952      * and physical addresses pass the exclusive monitor check.  These
2953      * faults are exceedingly unlikely, because normally the guest uses
2954      * the exact same address register for the load_exclusive, and we
2955      * would have recognized these faults there.
2956      *
2957      * It is possible to trigger an alignment fault pre-LSE2, e.g. with an
2958      * unaligned 4-byte write within the range of an aligned 8-byte load.
2959      * With LSE2, the store would need to cross a 16-byte boundary when the
2960      * load did not, which would mean the store is outside the range
2961      * recorded for the monitor, which would have failed a corrected monitor
2962      * check above.  For now, we assume no size change and retain the
2963      * MO_ALIGN to let tcg know what we checked in the load_exclusive.
2964      *
2965      * It is possible to trigger an MTE fault, by performing the load with
2966      * a virtual address with a valid tag and performing the store with the
2967      * same virtual address and a different invalid tag.
2968      */
2969     memop = size + is_pair;
2970     if (memop == MO_128 || !dc_isar_feature(aa64_lse2, s)) {
2971         memop |= MO_ALIGN;
2972     }
2973     memop = finalize_memop(s, memop);
2974     gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
2975 
2976     tmp = tcg_temp_new_i64();
2977     if (is_pair) {
2978         if (size == 2) {
2979             if (s->be_data == MO_LE) {
2980                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
2981             } else {
2982                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
2983             }
2984             tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
2985                                        cpu_exclusive_val, tmp,
2986                                        get_mem_index(s), memop);
2987             tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2988         } else {
2989             TCGv_i128 t16 = tcg_temp_new_i128();
2990             TCGv_i128 c16 = tcg_temp_new_i128();
2991             TCGv_i64 a, b;
2992 
2993             if (s->be_data == MO_LE) {
2994                 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2));
2995                 tcg_gen_concat_i64_i128(c16, cpu_exclusive_val,
2996                                         cpu_exclusive_high);
2997             } else {
2998                 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt));
2999                 tcg_gen_concat_i64_i128(c16, cpu_exclusive_high,
3000                                         cpu_exclusive_val);
3001             }
3002 
3003             tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16,
3004                                         get_mem_index(s), memop);
3005 
3006             a = tcg_temp_new_i64();
3007             b = tcg_temp_new_i64();
3008             if (s->be_data == MO_LE) {
3009                 tcg_gen_extr_i128_i64(a, b, t16);
3010             } else {
3011                 tcg_gen_extr_i128_i64(b, a, t16);
3012             }
3013 
3014             tcg_gen_xor_i64(a, a, cpu_exclusive_val);
3015             tcg_gen_xor_i64(b, b, cpu_exclusive_high);
3016             tcg_gen_or_i64(tmp, a, b);
3017 
3018             tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0);
3019         }
3020     } else {
3021         tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
3022                                    cpu_reg(s, rt), get_mem_index(s), memop);
3023         tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
3024     }
3025     tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
3026     tcg_gen_br(done_label);
3027 
3028     gen_set_label(fail_label);
3029     tcg_gen_movi_i64(cpu_reg(s, rd), 1);
3030     gen_set_label(done_label);
3031     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
3032 }
3033 
3034 static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
3035                                  int rn, int size)
3036 {
3037     TCGv_i64 tcg_rs = cpu_reg(s, rs);
3038     TCGv_i64 tcg_rt = cpu_reg(s, rt);
3039     int memidx = get_mem_index(s);
3040     TCGv_i64 clean_addr;
3041     MemOp memop;
3042 
3043     if (rn == 31) {
3044         gen_check_sp_alignment(s);
3045     }
3046     memop = check_atomic_align(s, rn, size);
3047     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
3048     tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt,
3049                                memidx, memop);
3050 }
3051 
3052 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
3053                                       int rn, int size)
3054 {
3055     TCGv_i64 s1 = cpu_reg(s, rs);
3056     TCGv_i64 s2 = cpu_reg(s, rs + 1);
3057     TCGv_i64 t1 = cpu_reg(s, rt);
3058     TCGv_i64 t2 = cpu_reg(s, rt + 1);
3059     TCGv_i64 clean_addr;
3060     int memidx = get_mem_index(s);
3061     MemOp memop;
3062 
3063     if (rn == 31) {
3064         gen_check_sp_alignment(s);
3065     }
3066 
3067     /* This is a single atomic access, despite the "pair". */
3068     memop = check_atomic_align(s, rn, size + 1);
3069     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
3070 
3071     if (size == 2) {
3072         TCGv_i64 cmp = tcg_temp_new_i64();
3073         TCGv_i64 val = tcg_temp_new_i64();
3074 
3075         if (s->be_data == MO_LE) {
3076             tcg_gen_concat32_i64(val, t1, t2);
3077             tcg_gen_concat32_i64(cmp, s1, s2);
3078         } else {
3079             tcg_gen_concat32_i64(val, t2, t1);
3080             tcg_gen_concat32_i64(cmp, s2, s1);
3081         }
3082 
3083         tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx, memop);
3084 
3085         if (s->be_data == MO_LE) {
3086             tcg_gen_extr32_i64(s1, s2, cmp);
3087         } else {
3088             tcg_gen_extr32_i64(s2, s1, cmp);
3089         }
3090     } else {
3091         TCGv_i128 cmp = tcg_temp_new_i128();
3092         TCGv_i128 val = tcg_temp_new_i128();
3093 
3094         if (s->be_data == MO_LE) {
3095             tcg_gen_concat_i64_i128(val, t1, t2);
3096             tcg_gen_concat_i64_i128(cmp, s1, s2);
3097         } else {
3098             tcg_gen_concat_i64_i128(val, t2, t1);
3099             tcg_gen_concat_i64_i128(cmp, s2, s1);
3100         }
3101 
3102         tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx, memop);
3103 
3104         if (s->be_data == MO_LE) {
3105             tcg_gen_extr_i128_i64(s1, s2, cmp);
3106         } else {
3107             tcg_gen_extr_i128_i64(s2, s1, cmp);
3108         }
3109     }
3110 }
3111 
3112 /*
3113  * Compute the ISS.SF bit for syndrome information if an exception
3114  * is taken on a load or store. This indicates whether the instruction
3115  * is accessing a 32-bit or 64-bit register. This logic is derived
3116  * from the ARMv8 specs for LDR (Shared decode for all encodings).
3117  */
3118 static bool ldst_iss_sf(int size, bool sign, bool ext)
3119 {
3120 
3121     if (sign) {
3122         /*
3123          * Signed loads are 64 bit results if we are not going to
3124          * do a zero-extend from 32 to 64 after the load.
3125          * (For a store, sign and ext are always false.)
3126          */
3127         return !ext;
3128     } else {
3129         /* Unsigned loads/stores work at the specified size */
3130         return size == MO_64;
3131     }
3132 }
3133 
3134 static bool trans_STXR(DisasContext *s, arg_stxr *a)
3135 {
3136     if (a->rn == 31) {
3137         gen_check_sp_alignment(s);
3138     }
3139     if (a->lasr) {
3140         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3141     }
3142     gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, false);
3143     return true;
3144 }
3145 
3146 static bool trans_LDXR(DisasContext *s, arg_stxr *a)
3147 {
3148     if (a->rn == 31) {
3149         gen_check_sp_alignment(s);
3150     }
3151     gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, false);
3152     if (a->lasr) {
3153         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3154     }
3155     return true;
3156 }
3157 
3158 static bool trans_STLR(DisasContext *s, arg_stlr *a)
3159 {
3160     TCGv_i64 clean_addr;
3161     MemOp memop;
3162     bool iss_sf = ldst_iss_sf(a->sz, false, false);
3163 
3164     /*
3165      * StoreLORelease is the same as Store-Release for QEMU, but
3166      * needs the feature-test.
3167      */
3168     if (!a->lasr && !dc_isar_feature(aa64_lor, s)) {
3169         return false;
3170     }
3171     /* Generate ISS for non-exclusive accesses including LASR.  */
3172     if (a->rn == 31) {
3173         gen_check_sp_alignment(s);
3174     }
3175     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3176     memop = check_ordered_align(s, a->rn, 0, true, a->sz);
3177     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn),
3178                                 true, a->rn != 31, memop);
3179     do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, memop, true, a->rt,
3180               iss_sf, a->lasr);
3181     return true;
3182 }
3183 
3184 static bool trans_LDAR(DisasContext *s, arg_stlr *a)
3185 {
3186     TCGv_i64 clean_addr;
3187     MemOp memop;
3188     bool iss_sf = ldst_iss_sf(a->sz, false, false);
3189 
3190     /* LoadLOAcquire is the same as Load-Acquire for QEMU.  */
3191     if (!a->lasr && !dc_isar_feature(aa64_lor, s)) {
3192         return false;
3193     }
3194     /* Generate ISS for non-exclusive accesses including LASR.  */
3195     if (a->rn == 31) {
3196         gen_check_sp_alignment(s);
3197     }
3198     memop = check_ordered_align(s, a->rn, 0, false, a->sz);
3199     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn),
3200                                 false, a->rn != 31, memop);
3201     do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, memop, false, true,
3202               a->rt, iss_sf, a->lasr);
3203     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3204     return true;
3205 }
3206 
3207 static bool trans_STXP(DisasContext *s, arg_stxr *a)
3208 {
3209     if (a->rn == 31) {
3210         gen_check_sp_alignment(s);
3211     }
3212     if (a->lasr) {
3213         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3214     }
3215     gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, true);
3216     return true;
3217 }
3218 
3219 static bool trans_LDXP(DisasContext *s, arg_stxr *a)
3220 {
3221     if (a->rn == 31) {
3222         gen_check_sp_alignment(s);
3223     }
3224     gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, true);
3225     if (a->lasr) {
3226         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3227     }
3228     return true;
3229 }
3230 
3231 static bool trans_CASP(DisasContext *s, arg_CASP *a)
3232 {
3233     if (!dc_isar_feature(aa64_atomics, s)) {
3234         return false;
3235     }
3236     if (((a->rt | a->rs) & 1) != 0) {
3237         return false;
3238     }
3239 
3240     gen_compare_and_swap_pair(s, a->rs, a->rt, a->rn, a->sz);
3241     return true;
3242 }
3243 
3244 static bool trans_CAS(DisasContext *s, arg_CAS *a)
3245 {
3246     if (!dc_isar_feature(aa64_atomics, s)) {
3247         return false;
3248     }
3249     gen_compare_and_swap(s, a->rs, a->rt, a->rn, a->sz);
3250     return true;
3251 }
3252 
3253 static bool trans_LD_lit(DisasContext *s, arg_ldlit *a)
3254 {
3255     bool iss_sf = ldst_iss_sf(a->sz, a->sign, false);
3256     TCGv_i64 tcg_rt = cpu_reg(s, a->rt);
3257     TCGv_i64 clean_addr = tcg_temp_new_i64();
3258     MemOp memop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3259 
3260     gen_pc_plus_diff(s, clean_addr, a->imm);
3261     do_gpr_ld(s, tcg_rt, clean_addr, memop,
3262               false, true, a->rt, iss_sf, false);
3263     return true;
3264 }
3265 
3266 static bool trans_LD_lit_v(DisasContext *s, arg_ldlit *a)
3267 {
3268     /* Load register (literal), vector version */
3269     TCGv_i64 clean_addr;
3270     MemOp memop;
3271 
3272     if (!fp_access_check(s)) {
3273         return true;
3274     }
3275     memop = finalize_memop_asimd(s, a->sz);
3276     clean_addr = tcg_temp_new_i64();
3277     gen_pc_plus_diff(s, clean_addr, a->imm);
3278     do_fp_ld(s, a->rt, clean_addr, memop);
3279     return true;
3280 }
3281 
3282 static void op_addr_ldstpair_pre(DisasContext *s, arg_ldstpair *a,
3283                                  TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3284                                  uint64_t offset, bool is_store, MemOp mop)
3285 {
3286     if (a->rn == 31) {
3287         gen_check_sp_alignment(s);
3288     }
3289 
3290     *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3291     if (!a->p) {
3292         tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset);
3293     }
3294 
3295     *clean_addr = gen_mte_checkN(s, *dirty_addr, is_store,
3296                                  (a->w || a->rn != 31), 2 << a->sz, mop);
3297 }
3298 
3299 static void op_addr_ldstpair_post(DisasContext *s, arg_ldstpair *a,
3300                                   TCGv_i64 dirty_addr, uint64_t offset)
3301 {
3302     if (a->w) {
3303         if (a->p) {
3304             tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3305         }
3306         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3307     }
3308 }
3309 
3310 static bool trans_STP(DisasContext *s, arg_ldstpair *a)
3311 {
3312     uint64_t offset = a->imm << a->sz;
3313     TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3314     MemOp mop = finalize_memop(s, a->sz);
3315 
3316     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop);
3317     tcg_rt = cpu_reg(s, a->rt);
3318     tcg_rt2 = cpu_reg(s, a->rt2);
3319     /*
3320      * We built mop above for the single logical access -- rebuild it
3321      * now for the paired operation.
3322      *
3323      * With LSE2, non-sign-extending pairs are treated atomically if
3324      * aligned, and if unaligned one of the pair will be completely
3325      * within a 16-byte block and that element will be atomic.
3326      * Otherwise each element is separately atomic.
3327      * In all cases, issue one operation with the correct atomicity.
3328      */
3329     mop = a->sz + 1;
3330     if (s->align_mem) {
3331         mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
3332     }
3333     mop = finalize_memop_pair(s, mop);
3334     if (a->sz == 2) {
3335         TCGv_i64 tmp = tcg_temp_new_i64();
3336 
3337         if (s->be_data == MO_LE) {
3338             tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2);
3339         } else {
3340             tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt);
3341         }
3342         tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop);
3343     } else {
3344         TCGv_i128 tmp = tcg_temp_new_i128();
3345 
3346         if (s->be_data == MO_LE) {
3347             tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2);
3348         } else {
3349             tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt);
3350         }
3351         tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
3352     }
3353     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3354     return true;
3355 }
3356 
3357 static bool trans_LDP(DisasContext *s, arg_ldstpair *a)
3358 {
3359     uint64_t offset = a->imm << a->sz;
3360     TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3361     MemOp mop = finalize_memop(s, a->sz);
3362 
3363     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop);
3364     tcg_rt = cpu_reg(s, a->rt);
3365     tcg_rt2 = cpu_reg(s, a->rt2);
3366 
3367     /*
3368      * We built mop above for the single logical access -- rebuild it
3369      * now for the paired operation.
3370      *
3371      * With LSE2, non-sign-extending pairs are treated atomically if
3372      * aligned, and if unaligned one of the pair will be completely
3373      * within a 16-byte block and that element will be atomic.
3374      * Otherwise each element is separately atomic.
3375      * In all cases, issue one operation with the correct atomicity.
3376      *
3377      * This treats sign-extending loads like zero-extending loads,
3378      * since that reuses the most code below.
3379      */
3380     mop = a->sz + 1;
3381     if (s->align_mem) {
3382         mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
3383     }
3384     mop = finalize_memop_pair(s, mop);
3385     if (a->sz == 2) {
3386         int o2 = s->be_data == MO_LE ? 32 : 0;
3387         int o1 = o2 ^ 32;
3388 
3389         tcg_gen_qemu_ld_i64(tcg_rt, clean_addr, get_mem_index(s), mop);
3390         if (a->sign) {
3391             tcg_gen_sextract_i64(tcg_rt2, tcg_rt, o2, 32);
3392             tcg_gen_sextract_i64(tcg_rt, tcg_rt, o1, 32);
3393         } else {
3394             tcg_gen_extract_i64(tcg_rt2, tcg_rt, o2, 32);
3395             tcg_gen_extract_i64(tcg_rt, tcg_rt, o1, 32);
3396         }
3397     } else {
3398         TCGv_i128 tmp = tcg_temp_new_i128();
3399 
3400         tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop);
3401         if (s->be_data == MO_LE) {
3402             tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp);
3403         } else {
3404             tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp);
3405         }
3406     }
3407     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3408     return true;
3409 }
3410 
3411 static bool trans_STP_v(DisasContext *s, arg_ldstpair *a)
3412 {
3413     uint64_t offset = a->imm << a->sz;
3414     TCGv_i64 clean_addr, dirty_addr;
3415     MemOp mop;
3416 
3417     if (!fp_access_check(s)) {
3418         return true;
3419     }
3420 
3421     /* LSE2 does not merge FP pairs; leave these as separate operations. */
3422     mop = finalize_memop_asimd(s, a->sz);
3423     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop);
3424     do_fp_st(s, a->rt, clean_addr, mop);
3425     tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz);
3426     do_fp_st(s, a->rt2, clean_addr, mop);
3427     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3428     return true;
3429 }
3430 
3431 static bool trans_LDP_v(DisasContext *s, arg_ldstpair *a)
3432 {
3433     uint64_t offset = a->imm << a->sz;
3434     TCGv_i64 clean_addr, dirty_addr;
3435     MemOp mop;
3436 
3437     if (!fp_access_check(s)) {
3438         return true;
3439     }
3440 
3441     /* LSE2 does not merge FP pairs; leave these as separate operations. */
3442     mop = finalize_memop_asimd(s, a->sz);
3443     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop);
3444     do_fp_ld(s, a->rt, clean_addr, mop);
3445     tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz);
3446     do_fp_ld(s, a->rt2, clean_addr, mop);
3447     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3448     return true;
3449 }
3450 
3451 static bool trans_STGP(DisasContext *s, arg_ldstpair *a)
3452 {
3453     TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3454     uint64_t offset = a->imm << LOG2_TAG_GRANULE;
3455     MemOp mop;
3456     TCGv_i128 tmp;
3457 
3458     /* STGP only comes in one size. */
3459     tcg_debug_assert(a->sz == MO_64);
3460 
3461     if (!dc_isar_feature(aa64_mte_insn_reg, s)) {
3462         return false;
3463     }
3464 
3465     if (a->rn == 31) {
3466         gen_check_sp_alignment(s);
3467     }
3468 
3469     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3470     if (!a->p) {
3471         tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3472     }
3473 
3474     clean_addr = clean_data_tbi(s, dirty_addr);
3475     tcg_rt = cpu_reg(s, a->rt);
3476     tcg_rt2 = cpu_reg(s, a->rt2);
3477 
3478     /*
3479      * STGP is defined as two 8-byte memory operations, aligned to TAG_GRANULE,
3480      * and one tag operation.  We implement it as one single aligned 16-byte
3481      * memory operation for convenience.  Note that the alignment ensures
3482      * MO_ATOM_IFALIGN_PAIR produces 8-byte atomicity for the memory store.
3483      */
3484     mop = finalize_memop_atom(s, MO_128 | MO_ALIGN, MO_ATOM_IFALIGN_PAIR);
3485 
3486     tmp = tcg_temp_new_i128();
3487     if (s->be_data == MO_LE) {
3488         tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2);
3489     } else {
3490         tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt);
3491     }
3492     tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
3493 
3494     /* Perform the tag store, if tag access enabled. */
3495     if (s->ata[0]) {
3496         if (tb_cflags(s->base.tb) & CF_PARALLEL) {
3497             gen_helper_stg_parallel(tcg_env, dirty_addr, dirty_addr);
3498         } else {
3499             gen_helper_stg(tcg_env, dirty_addr, dirty_addr);
3500         }
3501     }
3502 
3503     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3504     return true;
3505 }
3506 
3507 static void op_addr_ldst_imm_pre(DisasContext *s, arg_ldst_imm *a,
3508                                  TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3509                                  uint64_t offset, bool is_store, MemOp mop)
3510 {
3511     int memidx;
3512 
3513     if (a->rn == 31) {
3514         gen_check_sp_alignment(s);
3515     }
3516 
3517     *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3518     if (!a->p) {
3519         tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset);
3520     }
3521     memidx = get_a64_user_mem_index(s, a->unpriv);
3522     *clean_addr = gen_mte_check1_mmuidx(s, *dirty_addr, is_store,
3523                                         a->w || a->rn != 31,
3524                                         mop, a->unpriv, memidx);
3525 }
3526 
3527 static void op_addr_ldst_imm_post(DisasContext *s, arg_ldst_imm *a,
3528                                   TCGv_i64 dirty_addr, uint64_t offset)
3529 {
3530     if (a->w) {
3531         if (a->p) {
3532             tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3533         }
3534         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3535     }
3536 }
3537 
3538 static bool trans_STR_i(DisasContext *s, arg_ldst_imm *a)
3539 {
3540     bool iss_sf, iss_valid = !a->w;
3541     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3542     int memidx = get_a64_user_mem_index(s, a->unpriv);
3543     MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3544 
3545     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop);
3546 
3547     tcg_rt = cpu_reg(s, a->rt);
3548     iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3549 
3550     do_gpr_st_memidx(s, tcg_rt, clean_addr, mop, memidx,
3551                      iss_valid, a->rt, iss_sf, false);
3552     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3553     return true;
3554 }
3555 
3556 static bool trans_LDR_i(DisasContext *s, arg_ldst_imm *a)
3557 {
3558     bool iss_sf, iss_valid = !a->w;
3559     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3560     int memidx = get_a64_user_mem_index(s, a->unpriv);
3561     MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3562 
3563     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop);
3564 
3565     tcg_rt = cpu_reg(s, a->rt);
3566     iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3567 
3568     do_gpr_ld_memidx(s, tcg_rt, clean_addr, mop,
3569                      a->ext, memidx, iss_valid, a->rt, iss_sf, false);
3570     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3571     return true;
3572 }
3573 
3574 static bool trans_STR_v_i(DisasContext *s, arg_ldst_imm *a)
3575 {
3576     TCGv_i64 clean_addr, dirty_addr;
3577     MemOp mop;
3578 
3579     if (!fp_access_check(s)) {
3580         return true;
3581     }
3582     mop = finalize_memop_asimd(s, a->sz);
3583     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop);
3584     do_fp_st(s, a->rt, clean_addr, mop);
3585     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3586     return true;
3587 }
3588 
3589 static bool trans_LDR_v_i(DisasContext *s, arg_ldst_imm *a)
3590 {
3591     TCGv_i64 clean_addr, dirty_addr;
3592     MemOp mop;
3593 
3594     if (!fp_access_check(s)) {
3595         return true;
3596     }
3597     mop = finalize_memop_asimd(s, a->sz);
3598     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop);
3599     do_fp_ld(s, a->rt, clean_addr, mop);
3600     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3601     return true;
3602 }
3603 
3604 static void op_addr_ldst_pre(DisasContext *s, arg_ldst *a,
3605                              TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3606                              bool is_store, MemOp memop)
3607 {
3608     TCGv_i64 tcg_rm;
3609 
3610     if (a->rn == 31) {
3611         gen_check_sp_alignment(s);
3612     }
3613     *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3614 
3615     tcg_rm = read_cpu_reg(s, a->rm, 1);
3616     ext_and_shift_reg(tcg_rm, tcg_rm, a->opt, a->s ? a->sz : 0);
3617 
3618     tcg_gen_add_i64(*dirty_addr, *dirty_addr, tcg_rm);
3619     *clean_addr = gen_mte_check1(s, *dirty_addr, is_store, true, memop);
3620 }
3621 
3622 static bool trans_LDR(DisasContext *s, arg_ldst *a)
3623 {
3624     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3625     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3626     MemOp memop;
3627 
3628     if (extract32(a->opt, 1, 1) == 0) {
3629         return false;
3630     }
3631 
3632     memop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3633     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop);
3634     tcg_rt = cpu_reg(s, a->rt);
3635     do_gpr_ld(s, tcg_rt, clean_addr, memop,
3636               a->ext, true, a->rt, iss_sf, false);
3637     return true;
3638 }
3639 
3640 static bool trans_STR(DisasContext *s, arg_ldst *a)
3641 {
3642     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3643     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3644     MemOp memop;
3645 
3646     if (extract32(a->opt, 1, 1) == 0) {
3647         return false;
3648     }
3649 
3650     memop = finalize_memop(s, a->sz);
3651     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop);
3652     tcg_rt = cpu_reg(s, a->rt);
3653     do_gpr_st(s, tcg_rt, clean_addr, memop, true, a->rt, iss_sf, false);
3654     return true;
3655 }
3656 
3657 static bool trans_LDR_v(DisasContext *s, arg_ldst *a)
3658 {
3659     TCGv_i64 clean_addr, dirty_addr;
3660     MemOp memop;
3661 
3662     if (extract32(a->opt, 1, 1) == 0) {
3663         return false;
3664     }
3665 
3666     if (!fp_access_check(s)) {
3667         return true;
3668     }
3669 
3670     memop = finalize_memop_asimd(s, a->sz);
3671     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop);
3672     do_fp_ld(s, a->rt, clean_addr, memop);
3673     return true;
3674 }
3675 
3676 static bool trans_STR_v(DisasContext *s, arg_ldst *a)
3677 {
3678     TCGv_i64 clean_addr, dirty_addr;
3679     MemOp memop;
3680 
3681     if (extract32(a->opt, 1, 1) == 0) {
3682         return false;
3683     }
3684 
3685     if (!fp_access_check(s)) {
3686         return true;
3687     }
3688 
3689     memop = finalize_memop_asimd(s, a->sz);
3690     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop);
3691     do_fp_st(s, a->rt, clean_addr, memop);
3692     return true;
3693 }
3694 
3695 
3696 static bool do_atomic_ld(DisasContext *s, arg_atomic *a, AtomicThreeOpFn *fn,
3697                          int sign, bool invert)
3698 {
3699     MemOp mop = a->sz | sign;
3700     TCGv_i64 clean_addr, tcg_rs, tcg_rt;
3701 
3702     if (a->rn == 31) {
3703         gen_check_sp_alignment(s);
3704     }
3705     mop = check_atomic_align(s, a->rn, mop);
3706     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
3707                                 a->rn != 31, mop);
3708     tcg_rs = read_cpu_reg(s, a->rs, true);
3709     tcg_rt = cpu_reg(s, a->rt);
3710     if (invert) {
3711         tcg_gen_not_i64(tcg_rs, tcg_rs);
3712     }
3713     /*
3714      * The tcg atomic primitives are all full barriers.  Therefore we
3715      * can ignore the Acquire and Release bits of this instruction.
3716      */
3717     fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop);
3718 
3719     if (mop & MO_SIGN) {
3720         switch (a->sz) {
3721         case MO_8:
3722             tcg_gen_ext8u_i64(tcg_rt, tcg_rt);
3723             break;
3724         case MO_16:
3725             tcg_gen_ext16u_i64(tcg_rt, tcg_rt);
3726             break;
3727         case MO_32:
3728             tcg_gen_ext32u_i64(tcg_rt, tcg_rt);
3729             break;
3730         case MO_64:
3731             break;
3732         default:
3733             g_assert_not_reached();
3734         }
3735     }
3736     return true;
3737 }
3738 
3739 TRANS_FEAT(LDADD, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_add_i64, 0, false)
3740 TRANS_FEAT(LDCLR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_and_i64, 0, true)
3741 TRANS_FEAT(LDEOR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_xor_i64, 0, false)
3742 TRANS_FEAT(LDSET, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_or_i64, 0, false)
3743 TRANS_FEAT(LDSMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smax_i64, MO_SIGN, false)
3744 TRANS_FEAT(LDSMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smin_i64, MO_SIGN, false)
3745 TRANS_FEAT(LDUMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umax_i64, 0, false)
3746 TRANS_FEAT(LDUMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false)
3747 TRANS_FEAT(SWP, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false)
3748 
3749 static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a)
3750 {
3751     bool iss_sf = ldst_iss_sf(a->sz, false, false);
3752     TCGv_i64 clean_addr;
3753     MemOp mop;
3754 
3755     if (!dc_isar_feature(aa64_atomics, s) ||
3756         !dc_isar_feature(aa64_rcpc_8_3, s)) {
3757         return false;
3758     }
3759     if (a->rn == 31) {
3760         gen_check_sp_alignment(s);
3761     }
3762     mop = check_ordered_align(s, a->rn, 0, false, a->sz);
3763     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
3764                                 a->rn != 31, mop);
3765     /*
3766      * LDAPR* are a special case because they are a simple load, not a
3767      * fetch-and-do-something op.
3768      * The architectural consistency requirements here are weaker than
3769      * full load-acquire (we only need "load-acquire processor consistent"),
3770      * but we choose to implement them as full LDAQ.
3771      */
3772     do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, false,
3773               true, a->rt, iss_sf, true);
3774     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3775     return true;
3776 }
3777 
3778 static bool trans_LDRA(DisasContext *s, arg_LDRA *a)
3779 {
3780     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3781     MemOp memop;
3782 
3783     /* Load with pointer authentication */
3784     if (!dc_isar_feature(aa64_pauth, s)) {
3785         return false;
3786     }
3787 
3788     if (a->rn == 31) {
3789         gen_check_sp_alignment(s);
3790     }
3791     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3792 
3793     if (s->pauth_active) {
3794         if (!a->m) {
3795             gen_helper_autda_combined(dirty_addr, tcg_env, dirty_addr,
3796                                       tcg_constant_i64(0));
3797         } else {
3798             gen_helper_autdb_combined(dirty_addr, tcg_env, dirty_addr,
3799                                       tcg_constant_i64(0));
3800         }
3801     }
3802 
3803     tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3804 
3805     memop = finalize_memop(s, MO_64);
3806 
3807     /* Note that "clean" and "dirty" here refer to TBI not PAC.  */
3808     clean_addr = gen_mte_check1(s, dirty_addr, false,
3809                                 a->w || a->rn != 31, memop);
3810 
3811     tcg_rt = cpu_reg(s, a->rt);
3812     do_gpr_ld(s, tcg_rt, clean_addr, memop,
3813               /* extend */ false, /* iss_valid */ !a->w,
3814               /* iss_srt */ a->rt, /* iss_sf */ true, /* iss_ar */ false);
3815 
3816     if (a->w) {
3817         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3818     }
3819     return true;
3820 }
3821 
3822 static bool trans_LDAPR_i(DisasContext *s, arg_ldapr_stlr_i *a)
3823 {
3824     TCGv_i64 clean_addr, dirty_addr;
3825     MemOp mop = a->sz | (a->sign ? MO_SIGN : 0);
3826     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3827 
3828     if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3829         return false;
3830     }
3831 
3832     if (a->rn == 31) {
3833         gen_check_sp_alignment(s);
3834     }
3835 
3836     mop = check_ordered_align(s, a->rn, a->imm, false, mop);
3837     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3838     tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3839     clean_addr = clean_data_tbi(s, dirty_addr);
3840 
3841     /*
3842      * Load-AcquirePC semantics; we implement as the slightly more
3843      * restrictive Load-Acquire.
3844      */
3845     do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, a->ext, true,
3846               a->rt, iss_sf, true);
3847     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3848     return true;
3849 }
3850 
3851 static bool trans_STLR_i(DisasContext *s, arg_ldapr_stlr_i *a)
3852 {
3853     TCGv_i64 clean_addr, dirty_addr;
3854     MemOp mop = a->sz;
3855     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3856 
3857     if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3858         return false;
3859     }
3860 
3861     /* TODO: ARMv8.4-LSE SCTLR.nAA */
3862 
3863     if (a->rn == 31) {
3864         gen_check_sp_alignment(s);
3865     }
3866 
3867     mop = check_ordered_align(s, a->rn, a->imm, true, mop);
3868     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3869     tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3870     clean_addr = clean_data_tbi(s, dirty_addr);
3871 
3872     /* Store-Release semantics */
3873     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3874     do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, mop, true, a->rt, iss_sf, true);
3875     return true;
3876 }
3877 
3878 static bool trans_LD_mult(DisasContext *s, arg_ldst_mult *a)
3879 {
3880     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3881     MemOp endian, align, mop;
3882 
3883     int total;    /* total bytes */
3884     int elements; /* elements per vector */
3885     int r;
3886     int size = a->sz;
3887 
3888     if (!a->p && a->rm != 0) {
3889         /* For non-postindexed accesses the Rm field must be 0 */
3890         return false;
3891     }
3892     if (size == 3 && !a->q && a->selem != 1) {
3893         return false;
3894     }
3895     if (!fp_access_check(s)) {
3896         return true;
3897     }
3898 
3899     if (a->rn == 31) {
3900         gen_check_sp_alignment(s);
3901     }
3902 
3903     /* For our purposes, bytes are always little-endian.  */
3904     endian = s->be_data;
3905     if (size == 0) {
3906         endian = MO_LE;
3907     }
3908 
3909     total = a->rpt * a->selem * (a->q ? 16 : 8);
3910     tcg_rn = cpu_reg_sp(s, a->rn);
3911 
3912     /*
3913      * Issue the MTE check vs the logical repeat count, before we
3914      * promote consecutive little-endian elements below.
3915      */
3916     clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, total,
3917                                 finalize_memop_asimd(s, size));
3918 
3919     /*
3920      * Consecutive little-endian elements from a single register
3921      * can be promoted to a larger little-endian operation.
3922      */
3923     align = MO_ALIGN;
3924     if (a->selem == 1 && endian == MO_LE) {
3925         align = pow2_align(size);
3926         size = 3;
3927     }
3928     if (!s->align_mem) {
3929         align = 0;
3930     }
3931     mop = endian | size | align;
3932 
3933     elements = (a->q ? 16 : 8) >> size;
3934     tcg_ebytes = tcg_constant_i64(1 << size);
3935     for (r = 0; r < a->rpt; r++) {
3936         int e;
3937         for (e = 0; e < elements; e++) {
3938             int xs;
3939             for (xs = 0; xs < a->selem; xs++) {
3940                 int tt = (a->rt + r + xs) % 32;
3941                 do_vec_ld(s, tt, e, clean_addr, mop);
3942                 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3943             }
3944         }
3945     }
3946 
3947     /*
3948      * For non-quad operations, setting a slice of the low 64 bits of
3949      * the register clears the high 64 bits (in the ARM ARM pseudocode
3950      * this is implicit in the fact that 'rval' is a 64 bit wide
3951      * variable).  For quad operations, we might still need to zero
3952      * the high bits of SVE.
3953      */
3954     for (r = 0; r < a->rpt * a->selem; r++) {
3955         int tt = (a->rt + r) % 32;
3956         clear_vec_high(s, a->q, tt);
3957     }
3958 
3959     if (a->p) {
3960         if (a->rm == 31) {
3961             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3962         } else {
3963             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3964         }
3965     }
3966     return true;
3967 }
3968 
3969 static bool trans_ST_mult(DisasContext *s, arg_ldst_mult *a)
3970 {
3971     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3972     MemOp endian, align, mop;
3973 
3974     int total;    /* total bytes */
3975     int elements; /* elements per vector */
3976     int r;
3977     int size = a->sz;
3978 
3979     if (!a->p && a->rm != 0) {
3980         /* For non-postindexed accesses the Rm field must be 0 */
3981         return false;
3982     }
3983     if (size == 3 && !a->q && a->selem != 1) {
3984         return false;
3985     }
3986     if (!fp_access_check(s)) {
3987         return true;
3988     }
3989 
3990     if (a->rn == 31) {
3991         gen_check_sp_alignment(s);
3992     }
3993 
3994     /* For our purposes, bytes are always little-endian.  */
3995     endian = s->be_data;
3996     if (size == 0) {
3997         endian = MO_LE;
3998     }
3999 
4000     total = a->rpt * a->selem * (a->q ? 16 : 8);
4001     tcg_rn = cpu_reg_sp(s, a->rn);
4002 
4003     /*
4004      * Issue the MTE check vs the logical repeat count, before we
4005      * promote consecutive little-endian elements below.
4006      */
4007     clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, total,
4008                                 finalize_memop_asimd(s, size));
4009 
4010     /*
4011      * Consecutive little-endian elements from a single register
4012      * can be promoted to a larger little-endian operation.
4013      */
4014     align = MO_ALIGN;
4015     if (a->selem == 1 && endian == MO_LE) {
4016         align = pow2_align(size);
4017         size = 3;
4018     }
4019     if (!s->align_mem) {
4020         align = 0;
4021     }
4022     mop = endian | size | align;
4023 
4024     elements = (a->q ? 16 : 8) >> size;
4025     tcg_ebytes = tcg_constant_i64(1 << size);
4026     for (r = 0; r < a->rpt; r++) {
4027         int e;
4028         for (e = 0; e < elements; e++) {
4029             int xs;
4030             for (xs = 0; xs < a->selem; xs++) {
4031                 int tt = (a->rt + r + xs) % 32;
4032                 do_vec_st(s, tt, e, clean_addr, mop);
4033                 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
4034             }
4035         }
4036     }
4037 
4038     if (a->p) {
4039         if (a->rm == 31) {
4040             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
4041         } else {
4042             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
4043         }
4044     }
4045     return true;
4046 }
4047 
4048 static bool trans_ST_single(DisasContext *s, arg_ldst_single *a)
4049 {
4050     int xs, total, rt;
4051     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
4052     MemOp mop;
4053 
4054     if (!a->p && a->rm != 0) {
4055         return false;
4056     }
4057     if (!fp_access_check(s)) {
4058         return true;
4059     }
4060 
4061     if (a->rn == 31) {
4062         gen_check_sp_alignment(s);
4063     }
4064 
4065     total = a->selem << a->scale;
4066     tcg_rn = cpu_reg_sp(s, a->rn);
4067 
4068     mop = finalize_memop_asimd(s, a->scale);
4069     clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31,
4070                                 total, mop);
4071 
4072     tcg_ebytes = tcg_constant_i64(1 << a->scale);
4073     for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
4074         do_vec_st(s, rt, a->index, clean_addr, mop);
4075         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
4076     }
4077 
4078     if (a->p) {
4079         if (a->rm == 31) {
4080             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
4081         } else {
4082             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
4083         }
4084     }
4085     return true;
4086 }
4087 
4088 static bool trans_LD_single(DisasContext *s, arg_ldst_single *a)
4089 {
4090     int xs, total, rt;
4091     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
4092     MemOp mop;
4093 
4094     if (!a->p && a->rm != 0) {
4095         return false;
4096     }
4097     if (!fp_access_check(s)) {
4098         return true;
4099     }
4100 
4101     if (a->rn == 31) {
4102         gen_check_sp_alignment(s);
4103     }
4104 
4105     total = a->selem << a->scale;
4106     tcg_rn = cpu_reg_sp(s, a->rn);
4107 
4108     mop = finalize_memop_asimd(s, a->scale);
4109     clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31,
4110                                 total, mop);
4111 
4112     tcg_ebytes = tcg_constant_i64(1 << a->scale);
4113     for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
4114         do_vec_ld(s, rt, a->index, clean_addr, mop);
4115         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
4116     }
4117 
4118     if (a->p) {
4119         if (a->rm == 31) {
4120             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
4121         } else {
4122             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
4123         }
4124     }
4125     return true;
4126 }
4127 
4128 static bool trans_LD_single_repl(DisasContext *s, arg_LD_single_repl *a)
4129 {
4130     int xs, total, rt;
4131     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
4132     MemOp mop;
4133 
4134     if (!a->p && a->rm != 0) {
4135         return false;
4136     }
4137     if (!fp_access_check(s)) {
4138         return true;
4139     }
4140 
4141     if (a->rn == 31) {
4142         gen_check_sp_alignment(s);
4143     }
4144 
4145     total = a->selem << a->scale;
4146     tcg_rn = cpu_reg_sp(s, a->rn);
4147 
4148     mop = finalize_memop_asimd(s, a->scale);
4149     clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31,
4150                                 total, mop);
4151 
4152     tcg_ebytes = tcg_constant_i64(1 << a->scale);
4153     for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
4154         /* Load and replicate to all elements */
4155         TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4156 
4157         tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop);
4158         tcg_gen_gvec_dup_i64(a->scale, vec_full_reg_offset(s, rt),
4159                              (a->q + 1) * 8, vec_full_reg_size(s), tcg_tmp);
4160         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
4161     }
4162 
4163     if (a->p) {
4164         if (a->rm == 31) {
4165             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
4166         } else {
4167             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
4168         }
4169     }
4170     return true;
4171 }
4172 
4173 static bool trans_STZGM(DisasContext *s, arg_ldst_tag *a)
4174 {
4175     TCGv_i64 addr, clean_addr, tcg_rt;
4176     int size = 4 << s->dcz_blocksize;
4177 
4178     if (!dc_isar_feature(aa64_mte, s)) {
4179         return false;
4180     }
4181     if (s->current_el == 0) {
4182         return false;
4183     }
4184 
4185     if (a->rn == 31) {
4186         gen_check_sp_alignment(s);
4187     }
4188 
4189     addr = read_cpu_reg_sp(s, a->rn, true);
4190     tcg_gen_addi_i64(addr, addr, a->imm);
4191     tcg_rt = cpu_reg(s, a->rt);
4192 
4193     if (s->ata[0]) {
4194         gen_helper_stzgm_tags(tcg_env, addr, tcg_rt);
4195     }
4196     /*
4197      * The non-tags portion of STZGM is mostly like DC_ZVA,
4198      * except the alignment happens before the access.
4199      */
4200     clean_addr = clean_data_tbi(s, addr);
4201     tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4202     gen_helper_dc_zva(tcg_env, clean_addr);
4203     return true;
4204 }
4205 
4206 static bool trans_STGM(DisasContext *s, arg_ldst_tag *a)
4207 {
4208     TCGv_i64 addr, clean_addr, tcg_rt;
4209 
4210     if (!dc_isar_feature(aa64_mte, s)) {
4211         return false;
4212     }
4213     if (s->current_el == 0) {
4214         return false;
4215     }
4216 
4217     if (a->rn == 31) {
4218         gen_check_sp_alignment(s);
4219     }
4220 
4221     addr = read_cpu_reg_sp(s, a->rn, true);
4222     tcg_gen_addi_i64(addr, addr, a->imm);
4223     tcg_rt = cpu_reg(s, a->rt);
4224 
4225     if (s->ata[0]) {
4226         gen_helper_stgm(tcg_env, addr, tcg_rt);
4227     } else {
4228         MMUAccessType acc = MMU_DATA_STORE;
4229         int size = 4 << s->gm_blocksize;
4230 
4231         clean_addr = clean_data_tbi(s, addr);
4232         tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4233         gen_probe_access(s, clean_addr, acc, size);
4234     }
4235     return true;
4236 }
4237 
4238 static bool trans_LDGM(DisasContext *s, arg_ldst_tag *a)
4239 {
4240     TCGv_i64 addr, clean_addr, tcg_rt;
4241 
4242     if (!dc_isar_feature(aa64_mte, s)) {
4243         return false;
4244     }
4245     if (s->current_el == 0) {
4246         return false;
4247     }
4248 
4249     if (a->rn == 31) {
4250         gen_check_sp_alignment(s);
4251     }
4252 
4253     addr = read_cpu_reg_sp(s, a->rn, true);
4254     tcg_gen_addi_i64(addr, addr, a->imm);
4255     tcg_rt = cpu_reg(s, a->rt);
4256 
4257     if (s->ata[0]) {
4258         gen_helper_ldgm(tcg_rt, tcg_env, addr);
4259     } else {
4260         MMUAccessType acc = MMU_DATA_LOAD;
4261         int size = 4 << s->gm_blocksize;
4262 
4263         clean_addr = clean_data_tbi(s, addr);
4264         tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4265         gen_probe_access(s, clean_addr, acc, size);
4266         /* The result tags are zeros.  */
4267         tcg_gen_movi_i64(tcg_rt, 0);
4268     }
4269     return true;
4270 }
4271 
4272 static bool trans_LDG(DisasContext *s, arg_ldst_tag *a)
4273 {
4274     TCGv_i64 addr, clean_addr, tcg_rt;
4275 
4276     if (!dc_isar_feature(aa64_mte_insn_reg, s)) {
4277         return false;
4278     }
4279 
4280     if (a->rn == 31) {
4281         gen_check_sp_alignment(s);
4282     }
4283 
4284     addr = read_cpu_reg_sp(s, a->rn, true);
4285     if (!a->p) {
4286         /* pre-index or signed offset */
4287         tcg_gen_addi_i64(addr, addr, a->imm);
4288     }
4289 
4290     tcg_gen_andi_i64(addr, addr, -TAG_GRANULE);
4291     tcg_rt = cpu_reg(s, a->rt);
4292     if (s->ata[0]) {
4293         gen_helper_ldg(tcg_rt, tcg_env, addr, tcg_rt);
4294     } else {
4295         /*
4296          * Tag access disabled: we must check for aborts on the load
4297          * load from [rn+offset], and then insert a 0 tag into rt.
4298          */
4299         clean_addr = clean_data_tbi(s, addr);
4300         gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8);
4301         gen_address_with_allocation_tag0(tcg_rt, tcg_rt);
4302     }
4303 
4304     if (a->w) {
4305         /* pre-index or post-index */
4306         if (a->p) {
4307             /* post-index */
4308             tcg_gen_addi_i64(addr, addr, a->imm);
4309         }
4310         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr);
4311     }
4312     return true;
4313 }
4314 
4315 static bool do_STG(DisasContext *s, arg_ldst_tag *a, bool is_zero, bool is_pair)
4316 {
4317     TCGv_i64 addr, tcg_rt;
4318 
4319     if (a->rn == 31) {
4320         gen_check_sp_alignment(s);
4321     }
4322 
4323     addr = read_cpu_reg_sp(s, a->rn, true);
4324     if (!a->p) {
4325         /* pre-index or signed offset */
4326         tcg_gen_addi_i64(addr, addr, a->imm);
4327     }
4328     tcg_rt = cpu_reg_sp(s, a->rt);
4329     if (!s->ata[0]) {
4330         /*
4331          * For STG and ST2G, we need to check alignment and probe memory.
4332          * TODO: For STZG and STZ2G, we could rely on the stores below,
4333          * at least for system mode; user-only won't enforce alignment.
4334          */
4335         if (is_pair) {
4336             gen_helper_st2g_stub(tcg_env, addr);
4337         } else {
4338             gen_helper_stg_stub(tcg_env, addr);
4339         }
4340     } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
4341         if (is_pair) {
4342             gen_helper_st2g_parallel(tcg_env, addr, tcg_rt);
4343         } else {
4344             gen_helper_stg_parallel(tcg_env, addr, tcg_rt);
4345         }
4346     } else {
4347         if (is_pair) {
4348             gen_helper_st2g(tcg_env, addr, tcg_rt);
4349         } else {
4350             gen_helper_stg(tcg_env, addr, tcg_rt);
4351         }
4352     }
4353 
4354     if (is_zero) {
4355         TCGv_i64 clean_addr = clean_data_tbi(s, addr);
4356         TCGv_i64 zero64 = tcg_constant_i64(0);
4357         TCGv_i128 zero128 = tcg_temp_new_i128();
4358         int mem_index = get_mem_index(s);
4359         MemOp mop = finalize_memop(s, MO_128 | MO_ALIGN);
4360 
4361         tcg_gen_concat_i64_i128(zero128, zero64, zero64);
4362 
4363         /* This is 1 or 2 atomic 16-byte operations. */
4364         tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
4365         if (is_pair) {
4366             tcg_gen_addi_i64(clean_addr, clean_addr, 16);
4367             tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
4368         }
4369     }
4370 
4371     if (a->w) {
4372         /* pre-index or post-index */
4373         if (a->p) {
4374             /* post-index */
4375             tcg_gen_addi_i64(addr, addr, a->imm);
4376         }
4377         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr);
4378     }
4379     return true;
4380 }
4381 
4382 TRANS_FEAT(STG, aa64_mte_insn_reg, do_STG, a, false, false)
4383 TRANS_FEAT(STZG, aa64_mte_insn_reg, do_STG, a, true, false)
4384 TRANS_FEAT(ST2G, aa64_mte_insn_reg, do_STG, a, false, true)
4385 TRANS_FEAT(STZ2G, aa64_mte_insn_reg, do_STG, a, true, true)
4386 
4387 typedef void SetFn(TCGv_env, TCGv_i32, TCGv_i32);
4388 
4389 static bool do_SET(DisasContext *s, arg_set *a, bool is_epilogue,
4390                    bool is_setg, SetFn fn)
4391 {
4392     int memidx;
4393     uint32_t syndrome, desc = 0;
4394 
4395     if (is_setg && !dc_isar_feature(aa64_mte, s)) {
4396         return false;
4397     }
4398 
4399     /*
4400      * UNPREDICTABLE cases: we choose to UNDEF, which allows
4401      * us to pull this check before the CheckMOPSEnabled() test
4402      * (which we do in the helper function)
4403      */
4404     if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd ||
4405         a->rd == 31 || a->rn == 31) {
4406         return false;
4407     }
4408 
4409     memidx = get_a64_user_mem_index(s, a->unpriv);
4410 
4411     /*
4412      * We pass option_a == true, matching our implementation;
4413      * we pass wrong_option == false: helper function may set that bit.
4414      */
4415     syndrome = syn_mop(true, is_setg, (a->nontemp << 1) | a->unpriv,
4416                        is_epilogue, false, true, a->rd, a->rs, a->rn);
4417 
4418     if (is_setg ? s->ata[a->unpriv] : s->mte_active[a->unpriv]) {
4419         /* We may need to do MTE tag checking, so assemble the descriptor */
4420         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
4421         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
4422         desc = FIELD_DP32(desc, MTEDESC, WRITE, true);
4423         /* SIZEM1 and ALIGN we leave 0 (byte write) */
4424     }
4425     /* The helper function always needs the memidx even with MTE disabled */
4426     desc = FIELD_DP32(desc, MTEDESC, MIDX, memidx);
4427 
4428     /*
4429      * The helper needs the register numbers, but since they're in
4430      * the syndrome anyway, we let it extract them from there rather
4431      * than passing in an extra three integer arguments.
4432      */
4433     fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(desc));
4434     return true;
4435 }
4436 
4437 TRANS_FEAT(SETP, aa64_mops, do_SET, a, false, false, gen_helper_setp)
4438 TRANS_FEAT(SETM, aa64_mops, do_SET, a, false, false, gen_helper_setm)
4439 TRANS_FEAT(SETE, aa64_mops, do_SET, a, true, false, gen_helper_sete)
4440 TRANS_FEAT(SETGP, aa64_mops, do_SET, a, false, true, gen_helper_setgp)
4441 TRANS_FEAT(SETGM, aa64_mops, do_SET, a, false, true, gen_helper_setgm)
4442 TRANS_FEAT(SETGE, aa64_mops, do_SET, a, true, true, gen_helper_setge)
4443 
4444 typedef void CpyFn(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32);
4445 
4446 static bool do_CPY(DisasContext *s, arg_cpy *a, bool is_epilogue, CpyFn fn)
4447 {
4448     int rmemidx, wmemidx;
4449     uint32_t syndrome, rdesc = 0, wdesc = 0;
4450     bool wunpriv = extract32(a->options, 0, 1);
4451     bool runpriv = extract32(a->options, 1, 1);
4452 
4453     /*
4454      * UNPREDICTABLE cases: we choose to UNDEF, which allows
4455      * us to pull this check before the CheckMOPSEnabled() test
4456      * (which we do in the helper function)
4457      */
4458     if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd ||
4459         a->rd == 31 || a->rs == 31 || a->rn == 31) {
4460         return false;
4461     }
4462 
4463     rmemidx = get_a64_user_mem_index(s, runpriv);
4464     wmemidx = get_a64_user_mem_index(s, wunpriv);
4465 
4466     /*
4467      * We pass option_a == true, matching our implementation;
4468      * we pass wrong_option == false: helper function may set that bit.
4469      */
4470     syndrome = syn_mop(false, false, a->options, is_epilogue,
4471                        false, true, a->rd, a->rs, a->rn);
4472 
4473     /* If we need to do MTE tag checking, assemble the descriptors */
4474     if (s->mte_active[runpriv]) {
4475         rdesc = FIELD_DP32(rdesc, MTEDESC, TBI, s->tbid);
4476         rdesc = FIELD_DP32(rdesc, MTEDESC, TCMA, s->tcma);
4477     }
4478     if (s->mte_active[wunpriv]) {
4479         wdesc = FIELD_DP32(wdesc, MTEDESC, TBI, s->tbid);
4480         wdesc = FIELD_DP32(wdesc, MTEDESC, TCMA, s->tcma);
4481         wdesc = FIELD_DP32(wdesc, MTEDESC, WRITE, true);
4482     }
4483     /* The helper function needs these parts of the descriptor regardless */
4484     rdesc = FIELD_DP32(rdesc, MTEDESC, MIDX, rmemidx);
4485     wdesc = FIELD_DP32(wdesc, MTEDESC, MIDX, wmemidx);
4486 
4487     /*
4488      * The helper needs the register numbers, but since they're in
4489      * the syndrome anyway, we let it extract them from there rather
4490      * than passing in an extra three integer arguments.
4491      */
4492     fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(wdesc),
4493        tcg_constant_i32(rdesc));
4494     return true;
4495 }
4496 
4497 TRANS_FEAT(CPYP, aa64_mops, do_CPY, a, false, gen_helper_cpyp)
4498 TRANS_FEAT(CPYM, aa64_mops, do_CPY, a, false, gen_helper_cpym)
4499 TRANS_FEAT(CPYE, aa64_mops, do_CPY, a, true, gen_helper_cpye)
4500 TRANS_FEAT(CPYFP, aa64_mops, do_CPY, a, false, gen_helper_cpyfp)
4501 TRANS_FEAT(CPYFM, aa64_mops, do_CPY, a, false, gen_helper_cpyfm)
4502 TRANS_FEAT(CPYFE, aa64_mops, do_CPY, a, true, gen_helper_cpyfe)
4503 
4504 typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64);
4505 
4506 static bool gen_rri(DisasContext *s, arg_rri_sf *a,
4507                     bool rd_sp, bool rn_sp, ArithTwoOp *fn)
4508 {
4509     TCGv_i64 tcg_rn = rn_sp ? cpu_reg_sp(s, a->rn) : cpu_reg(s, a->rn);
4510     TCGv_i64 tcg_rd = rd_sp ? cpu_reg_sp(s, a->rd) : cpu_reg(s, a->rd);
4511     TCGv_i64 tcg_imm = tcg_constant_i64(a->imm);
4512 
4513     fn(tcg_rd, tcg_rn, tcg_imm);
4514     if (!a->sf) {
4515         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4516     }
4517     return true;
4518 }
4519 
4520 /*
4521  * PC-rel. addressing
4522  */
4523 
4524 static bool trans_ADR(DisasContext *s, arg_ri *a)
4525 {
4526     gen_pc_plus_diff(s, cpu_reg(s, a->rd), a->imm);
4527     return true;
4528 }
4529 
4530 static bool trans_ADRP(DisasContext *s, arg_ri *a)
4531 {
4532     int64_t offset = (int64_t)a->imm << 12;
4533 
4534     /* The page offset is ok for CF_PCREL. */
4535     offset -= s->pc_curr & 0xfff;
4536     gen_pc_plus_diff(s, cpu_reg(s, a->rd), offset);
4537     return true;
4538 }
4539 
4540 /*
4541  * Add/subtract (immediate)
4542  */
4543 TRANS(ADD_i, gen_rri, a, 1, 1, tcg_gen_add_i64)
4544 TRANS(SUB_i, gen_rri, a, 1, 1, tcg_gen_sub_i64)
4545 TRANS(ADDS_i, gen_rri, a, 0, 1, a->sf ? gen_add64_CC : gen_add32_CC)
4546 TRANS(SUBS_i, gen_rri, a, 0, 1, a->sf ? gen_sub64_CC : gen_sub32_CC)
4547 
4548 /*
4549  * Add/subtract (immediate, with tags)
4550  */
4551 
4552 static bool gen_add_sub_imm_with_tags(DisasContext *s, arg_rri_tag *a,
4553                                       bool sub_op)
4554 {
4555     TCGv_i64 tcg_rn, tcg_rd;
4556     int imm;
4557 
4558     imm = a->uimm6 << LOG2_TAG_GRANULE;
4559     if (sub_op) {
4560         imm = -imm;
4561     }
4562 
4563     tcg_rn = cpu_reg_sp(s, a->rn);
4564     tcg_rd = cpu_reg_sp(s, a->rd);
4565 
4566     if (s->ata[0]) {
4567         gen_helper_addsubg(tcg_rd, tcg_env, tcg_rn,
4568                            tcg_constant_i32(imm),
4569                            tcg_constant_i32(a->uimm4));
4570     } else {
4571         tcg_gen_addi_i64(tcg_rd, tcg_rn, imm);
4572         gen_address_with_allocation_tag0(tcg_rd, tcg_rd);
4573     }
4574     return true;
4575 }
4576 
4577 TRANS_FEAT(ADDG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, false)
4578 TRANS_FEAT(SUBG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, true)
4579 
4580 /* The input should be a value in the bottom e bits (with higher
4581  * bits zero); returns that value replicated into every element
4582  * of size e in a 64 bit integer.
4583  */
4584 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
4585 {
4586     assert(e != 0);
4587     while (e < 64) {
4588         mask |= mask << e;
4589         e *= 2;
4590     }
4591     return mask;
4592 }
4593 
4594 /*
4595  * Logical (immediate)
4596  */
4597 
4598 /*
4599  * Simplified variant of pseudocode DecodeBitMasks() for the case where we
4600  * only require the wmask. Returns false if the imms/immr/immn are a reserved
4601  * value (ie should cause a guest UNDEF exception), and true if they are
4602  * valid, in which case the decoded bit pattern is written to result.
4603  */
4604 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
4605                             unsigned int imms, unsigned int immr)
4606 {
4607     uint64_t mask;
4608     unsigned e, levels, s, r;
4609     int len;
4610 
4611     assert(immn < 2 && imms < 64 && immr < 64);
4612 
4613     /* The bit patterns we create here are 64 bit patterns which
4614      * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
4615      * 64 bits each. Each element contains the same value: a run
4616      * of between 1 and e-1 non-zero bits, rotated within the
4617      * element by between 0 and e-1 bits.
4618      *
4619      * The element size and run length are encoded into immn (1 bit)
4620      * and imms (6 bits) as follows:
4621      * 64 bit elements: immn = 1, imms = <length of run - 1>
4622      * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
4623      * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
4624      *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
4625      *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
4626      *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
4627      * Notice that immn = 0, imms = 11111x is the only combination
4628      * not covered by one of the above options; this is reserved.
4629      * Further, <length of run - 1> all-ones is a reserved pattern.
4630      *
4631      * In all cases the rotation is by immr % e (and immr is 6 bits).
4632      */
4633 
4634     /* First determine the element size */
4635     len = 31 - clz32((immn << 6) | (~imms & 0x3f));
4636     if (len < 1) {
4637         /* This is the immn == 0, imms == 0x11111x case */
4638         return false;
4639     }
4640     e = 1 << len;
4641 
4642     levels = e - 1;
4643     s = imms & levels;
4644     r = immr & levels;
4645 
4646     if (s == levels) {
4647         /* <length of run - 1> mustn't be all-ones. */
4648         return false;
4649     }
4650 
4651     /* Create the value of one element: s+1 set bits rotated
4652      * by r within the element (which is e bits wide)...
4653      */
4654     mask = MAKE_64BIT_MASK(0, s + 1);
4655     if (r) {
4656         mask = (mask >> r) | (mask << (e - r));
4657         mask &= MAKE_64BIT_MASK(0, e);
4658     }
4659     /* ...then replicate the element over the whole 64 bit value */
4660     mask = bitfield_replicate(mask, e);
4661     *result = mask;
4662     return true;
4663 }
4664 
4665 static bool gen_rri_log(DisasContext *s, arg_rri_log *a, bool set_cc,
4666                         void (*fn)(TCGv_i64, TCGv_i64, int64_t))
4667 {
4668     TCGv_i64 tcg_rd, tcg_rn;
4669     uint64_t imm;
4670 
4671     /* Some immediate field values are reserved. */
4672     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
4673                                 extract32(a->dbm, 0, 6),
4674                                 extract32(a->dbm, 6, 6))) {
4675         return false;
4676     }
4677     if (!a->sf) {
4678         imm &= 0xffffffffull;
4679     }
4680 
4681     tcg_rd = set_cc ? cpu_reg(s, a->rd) : cpu_reg_sp(s, a->rd);
4682     tcg_rn = cpu_reg(s, a->rn);
4683 
4684     fn(tcg_rd, tcg_rn, imm);
4685     if (set_cc) {
4686         gen_logic_CC(a->sf, tcg_rd);
4687     }
4688     if (!a->sf) {
4689         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4690     }
4691     return true;
4692 }
4693 
4694 TRANS(AND_i, gen_rri_log, a, false, tcg_gen_andi_i64)
4695 TRANS(ORR_i, gen_rri_log, a, false, tcg_gen_ori_i64)
4696 TRANS(EOR_i, gen_rri_log, a, false, tcg_gen_xori_i64)
4697 TRANS(ANDS_i, gen_rri_log, a, true, tcg_gen_andi_i64)
4698 
4699 /*
4700  * Move wide (immediate)
4701  */
4702 
4703 static bool trans_MOVZ(DisasContext *s, arg_movw *a)
4704 {
4705     int pos = a->hw << 4;
4706     tcg_gen_movi_i64(cpu_reg(s, a->rd), (uint64_t)a->imm << pos);
4707     return true;
4708 }
4709 
4710 static bool trans_MOVN(DisasContext *s, arg_movw *a)
4711 {
4712     int pos = a->hw << 4;
4713     uint64_t imm = a->imm;
4714 
4715     imm = ~(imm << pos);
4716     if (!a->sf) {
4717         imm = (uint32_t)imm;
4718     }
4719     tcg_gen_movi_i64(cpu_reg(s, a->rd), imm);
4720     return true;
4721 }
4722 
4723 static bool trans_MOVK(DisasContext *s, arg_movw *a)
4724 {
4725     int pos = a->hw << 4;
4726     TCGv_i64 tcg_rd, tcg_im;
4727 
4728     tcg_rd = cpu_reg(s, a->rd);
4729     tcg_im = tcg_constant_i64(a->imm);
4730     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_im, pos, 16);
4731     if (!a->sf) {
4732         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4733     }
4734     return true;
4735 }
4736 
4737 /*
4738  * Bitfield
4739  */
4740 
4741 static bool trans_SBFM(DisasContext *s, arg_SBFM *a)
4742 {
4743     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4744     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4745     unsigned int bitsize = a->sf ? 64 : 32;
4746     unsigned int ri = a->immr;
4747     unsigned int si = a->imms;
4748     unsigned int pos, len;
4749 
4750     if (si >= ri) {
4751         /* Wd<s-r:0> = Wn<s:r> */
4752         len = (si - ri) + 1;
4753         tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
4754         if (!a->sf) {
4755             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4756         }
4757     } else {
4758         /* Wd<32+s-r,32-r> = Wn<s:0> */
4759         len = si + 1;
4760         pos = (bitsize - ri) & (bitsize - 1);
4761 
4762         if (len < ri) {
4763             /*
4764              * Sign extend the destination field from len to fill the
4765              * balance of the word.  Let the deposit below insert all
4766              * of those sign bits.
4767              */
4768             tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
4769             len = ri;
4770         }
4771 
4772         /*
4773          * We start with zero, and we haven't modified any bits outside
4774          * bitsize, therefore no final zero-extension is unneeded for !sf.
4775          */
4776         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4777     }
4778     return true;
4779 }
4780 
4781 static bool trans_UBFM(DisasContext *s, arg_UBFM *a)
4782 {
4783     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4784     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4785     unsigned int bitsize = a->sf ? 64 : 32;
4786     unsigned int ri = a->immr;
4787     unsigned int si = a->imms;
4788     unsigned int pos, len;
4789 
4790     tcg_rd = cpu_reg(s, a->rd);
4791     tcg_tmp = read_cpu_reg(s, a->rn, 1);
4792 
4793     if (si >= ri) {
4794         /* Wd<s-r:0> = Wn<s:r> */
4795         len = (si - ri) + 1;
4796         tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
4797     } else {
4798         /* Wd<32+s-r,32-r> = Wn<s:0> */
4799         len = si + 1;
4800         pos = (bitsize - ri) & (bitsize - 1);
4801         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4802     }
4803     return true;
4804 }
4805 
4806 static bool trans_BFM(DisasContext *s, arg_BFM *a)
4807 {
4808     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4809     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4810     unsigned int bitsize = a->sf ? 64 : 32;
4811     unsigned int ri = a->immr;
4812     unsigned int si = a->imms;
4813     unsigned int pos, len;
4814 
4815     tcg_rd = cpu_reg(s, a->rd);
4816     tcg_tmp = read_cpu_reg(s, a->rn, 1);
4817 
4818     if (si >= ri) {
4819         /* Wd<s-r:0> = Wn<s:r> */
4820         tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
4821         len = (si - ri) + 1;
4822         pos = 0;
4823     } else {
4824         /* Wd<32+s-r,32-r> = Wn<s:0> */
4825         len = si + 1;
4826         pos = (bitsize - ri) & (bitsize - 1);
4827     }
4828 
4829     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
4830     if (!a->sf) {
4831         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4832     }
4833     return true;
4834 }
4835 
4836 static bool trans_EXTR(DisasContext *s, arg_extract *a)
4837 {
4838     TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
4839 
4840     tcg_rd = cpu_reg(s, a->rd);
4841 
4842     if (unlikely(a->imm == 0)) {
4843         /*
4844          * tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
4845          * so an extract from bit 0 is a special case.
4846          */
4847         if (a->sf) {
4848             tcg_gen_mov_i64(tcg_rd, cpu_reg(s, a->rm));
4849         } else {
4850             tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, a->rm));
4851         }
4852     } else {
4853         tcg_rm = cpu_reg(s, a->rm);
4854         tcg_rn = cpu_reg(s, a->rn);
4855 
4856         if (a->sf) {
4857             /* Specialization to ROR happens in EXTRACT2.  */
4858             tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, a->imm);
4859         } else {
4860             TCGv_i32 t0 = tcg_temp_new_i32();
4861 
4862             tcg_gen_extrl_i64_i32(t0, tcg_rm);
4863             if (a->rm == a->rn) {
4864                 tcg_gen_rotri_i32(t0, t0, a->imm);
4865             } else {
4866                 TCGv_i32 t1 = tcg_temp_new_i32();
4867                 tcg_gen_extrl_i64_i32(t1, tcg_rn);
4868                 tcg_gen_extract2_i32(t0, t0, t1, a->imm);
4869             }
4870             tcg_gen_extu_i32_i64(tcg_rd, t0);
4871         }
4872     }
4873     return true;
4874 }
4875 
4876 static bool trans_TBL_TBX(DisasContext *s, arg_TBL_TBX *a)
4877 {
4878     if (fp_access_check(s)) {
4879         int len = (a->len + 1) * 16;
4880 
4881         tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
4882                            vec_full_reg_offset(s, a->rm), tcg_env,
4883                            a->q ? 16 : 8, vec_full_reg_size(s),
4884                            (len << 6) | (a->tbx << 5) | a->rn,
4885                            gen_helper_simd_tblx);
4886     }
4887     return true;
4888 }
4889 
4890 typedef int simd_permute_idx_fn(int i, int part, int elements);
4891 
4892 static bool do_simd_permute(DisasContext *s, arg_qrrr_e *a,
4893                             simd_permute_idx_fn *fn, int part)
4894 {
4895     MemOp esz = a->esz;
4896     int datasize = a->q ? 16 : 8;
4897     int elements = datasize >> esz;
4898     TCGv_i64 tcg_res[2], tcg_ele;
4899 
4900     if (esz == MO_64 && !a->q) {
4901         return false;
4902     }
4903     if (!fp_access_check(s)) {
4904         return true;
4905     }
4906 
4907     tcg_res[0] = tcg_temp_new_i64();
4908     tcg_res[1] = a->q ? tcg_temp_new_i64() : NULL;
4909     tcg_ele = tcg_temp_new_i64();
4910 
4911     for (int i = 0; i < elements; i++) {
4912         int o, w, idx;
4913 
4914         idx = fn(i, part, elements);
4915         read_vec_element(s, tcg_ele, (idx & elements ? a->rm : a->rn),
4916                          idx & (elements - 1), esz);
4917 
4918         w = (i << (esz + 3)) / 64;
4919         o = (i << (esz + 3)) % 64;
4920         if (o == 0) {
4921             tcg_gen_mov_i64(tcg_res[w], tcg_ele);
4922         } else {
4923             tcg_gen_deposit_i64(tcg_res[w], tcg_res[w], tcg_ele, o, 8 << esz);
4924         }
4925     }
4926 
4927     for (int i = a->q; i >= 0; --i) {
4928         write_vec_element(s, tcg_res[i], a->rd, i, MO_64);
4929     }
4930     clear_vec_high(s, a->q, a->rd);
4931     return true;
4932 }
4933 
4934 static int permute_load_uzp(int i, int part, int elements)
4935 {
4936     return 2 * i + part;
4937 }
4938 
4939 TRANS(UZP1, do_simd_permute, a, permute_load_uzp, 0)
4940 TRANS(UZP2, do_simd_permute, a, permute_load_uzp, 1)
4941 
4942 static int permute_load_trn(int i, int part, int elements)
4943 {
4944     return (i & 1) * elements + (i & ~1) + part;
4945 }
4946 
4947 TRANS(TRN1, do_simd_permute, a, permute_load_trn, 0)
4948 TRANS(TRN2, do_simd_permute, a, permute_load_trn, 1)
4949 
4950 static int permute_load_zip(int i, int part, int elements)
4951 {
4952     return (i & 1) * elements + ((part * elements + i) >> 1);
4953 }
4954 
4955 TRANS(ZIP1, do_simd_permute, a, permute_load_zip, 0)
4956 TRANS(ZIP2, do_simd_permute, a, permute_load_zip, 1)
4957 
4958 /*
4959  * Cryptographic AES, SHA, SHA512
4960  */
4961 
4962 TRANS_FEAT(AESE, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aese)
4963 TRANS_FEAT(AESD, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aesd)
4964 TRANS_FEAT(AESMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesmc)
4965 TRANS_FEAT(AESIMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesimc)
4966 
4967 TRANS_FEAT(SHA1C, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1c)
4968 TRANS_FEAT(SHA1P, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1p)
4969 TRANS_FEAT(SHA1M, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1m)
4970 TRANS_FEAT(SHA1SU0, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1su0)
4971 
4972 TRANS_FEAT(SHA256H, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h)
4973 TRANS_FEAT(SHA256H2, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h2)
4974 TRANS_FEAT(SHA256SU1, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256su1)
4975 
4976 TRANS_FEAT(SHA1H, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1h)
4977 TRANS_FEAT(SHA1SU1, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1su1)
4978 TRANS_FEAT(SHA256SU0, aa64_sha256, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha256su0)
4979 
4980 TRANS_FEAT(SHA512H, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h)
4981 TRANS_FEAT(SHA512H2, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h2)
4982 TRANS_FEAT(SHA512SU1, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512su1)
4983 TRANS_FEAT(RAX1, aa64_sha3, do_gvec_fn3, a, gen_gvec_rax1)
4984 TRANS_FEAT(SM3PARTW1, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw1)
4985 TRANS_FEAT(SM3PARTW2, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw2)
4986 TRANS_FEAT(SM4EKEY, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4ekey)
4987 
4988 TRANS_FEAT(SHA512SU0, aa64_sha512, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha512su0)
4989 TRANS_FEAT(SM4E, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4e)
4990 
4991 TRANS_FEAT(EOR3, aa64_sha3, do_gvec_fn4, a, gen_gvec_eor3)
4992 TRANS_FEAT(BCAX, aa64_sha3, do_gvec_fn4, a, gen_gvec_bcax)
4993 
4994 static bool trans_SM3SS1(DisasContext *s, arg_SM3SS1 *a)
4995 {
4996     if (!dc_isar_feature(aa64_sm3, s)) {
4997         return false;
4998     }
4999     if (fp_access_check(s)) {
5000         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
5001         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
5002         TCGv_i32 tcg_op3 = tcg_temp_new_i32();
5003         TCGv_i32 tcg_res = tcg_temp_new_i32();
5004 
5005         read_vec_element_i32(s, tcg_op1, a->rn, 3, MO_32);
5006         read_vec_element_i32(s, tcg_op2, a->rm, 3, MO_32);
5007         read_vec_element_i32(s, tcg_op3, a->ra, 3, MO_32);
5008 
5009         tcg_gen_rotri_i32(tcg_res, tcg_op1, 20);
5010         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2);
5011         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3);
5012         tcg_gen_rotri_i32(tcg_res, tcg_res, 25);
5013 
5014         /* Clear the whole register first, then store bits [127:96]. */
5015         clear_vec(s, a->rd);
5016         write_vec_element_i32(s, tcg_res, a->rd, 3, MO_32);
5017     }
5018     return true;
5019 }
5020 
5021 static bool do_crypto3i(DisasContext *s, arg_crypto3i *a, gen_helper_gvec_3 *fn)
5022 {
5023     if (fp_access_check(s)) {
5024         gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->imm, fn);
5025     }
5026     return true;
5027 }
5028 TRANS_FEAT(SM3TT1A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1a)
5029 TRANS_FEAT(SM3TT1B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1b)
5030 TRANS_FEAT(SM3TT2A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2a)
5031 TRANS_FEAT(SM3TT2B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2b)
5032 
5033 static bool trans_XAR(DisasContext *s, arg_XAR *a)
5034 {
5035     if (!dc_isar_feature(aa64_sha3, s)) {
5036         return false;
5037     }
5038     if (fp_access_check(s)) {
5039         gen_gvec_xar(MO_64, vec_full_reg_offset(s, a->rd),
5040                      vec_full_reg_offset(s, a->rn),
5041                      vec_full_reg_offset(s, a->rm), a->imm, 16,
5042                      vec_full_reg_size(s));
5043     }
5044     return true;
5045 }
5046 
5047 /*
5048  * Advanced SIMD copy
5049  */
5050 
5051 static bool decode_esz_idx(int imm, MemOp *pesz, unsigned *pidx)
5052 {
5053     unsigned esz = ctz32(imm);
5054     if (esz <= MO_64) {
5055         *pesz = esz;
5056         *pidx = imm >> (esz + 1);
5057         return true;
5058     }
5059     return false;
5060 }
5061 
5062 static bool trans_DUP_element_s(DisasContext *s, arg_DUP_element_s *a)
5063 {
5064     MemOp esz;
5065     unsigned idx;
5066 
5067     if (!decode_esz_idx(a->imm, &esz, &idx)) {
5068         return false;
5069     }
5070     if (fp_access_check(s)) {
5071         /*
5072          * This instruction just extracts the specified element and
5073          * zero-extends it into the bottom of the destination register.
5074          */
5075         TCGv_i64 tmp = tcg_temp_new_i64();
5076         read_vec_element(s, tmp, a->rn, idx, esz);
5077         write_fp_dreg(s, a->rd, tmp);
5078     }
5079     return true;
5080 }
5081 
5082 static bool trans_DUP_element_v(DisasContext *s, arg_DUP_element_v *a)
5083 {
5084     MemOp esz;
5085     unsigned idx;
5086 
5087     if (!decode_esz_idx(a->imm, &esz, &idx)) {
5088         return false;
5089     }
5090     if (esz == MO_64 && !a->q) {
5091         return false;
5092     }
5093     if (fp_access_check(s)) {
5094         tcg_gen_gvec_dup_mem(esz, vec_full_reg_offset(s, a->rd),
5095                              vec_reg_offset(s, a->rn, idx, esz),
5096                              a->q ? 16 : 8, vec_full_reg_size(s));
5097     }
5098     return true;
5099 }
5100 
5101 static bool trans_DUP_general(DisasContext *s, arg_DUP_general *a)
5102 {
5103     MemOp esz;
5104     unsigned idx;
5105 
5106     if (!decode_esz_idx(a->imm, &esz, &idx)) {
5107         return false;
5108     }
5109     if (esz == MO_64 && !a->q) {
5110         return false;
5111     }
5112     if (fp_access_check(s)) {
5113         tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
5114                              a->q ? 16 : 8, vec_full_reg_size(s),
5115                              cpu_reg(s, a->rn));
5116     }
5117     return true;
5118 }
5119 
5120 static bool do_smov_umov(DisasContext *s, arg_SMOV *a, MemOp is_signed)
5121 {
5122     MemOp esz;
5123     unsigned idx;
5124 
5125     if (!decode_esz_idx(a->imm, &esz, &idx)) {
5126         return false;
5127     }
5128     if (is_signed) {
5129         if (esz == MO_64 || (esz == MO_32 && !a->q)) {
5130             return false;
5131         }
5132     } else {
5133         if (esz == MO_64 ? !a->q : a->q) {
5134             return false;
5135         }
5136     }
5137     if (fp_access_check(s)) {
5138         TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
5139         read_vec_element(s, tcg_rd, a->rn, idx, esz | is_signed);
5140         if (is_signed && !a->q) {
5141             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5142         }
5143     }
5144     return true;
5145 }
5146 
5147 TRANS(SMOV, do_smov_umov, a, MO_SIGN)
5148 TRANS(UMOV, do_smov_umov, a, 0)
5149 
5150 static bool trans_INS_general(DisasContext *s, arg_INS_general *a)
5151 {
5152     MemOp esz;
5153     unsigned idx;
5154 
5155     if (!decode_esz_idx(a->imm, &esz, &idx)) {
5156         return false;
5157     }
5158     if (fp_access_check(s)) {
5159         write_vec_element(s, cpu_reg(s, a->rn), a->rd, idx, esz);
5160         clear_vec_high(s, true, a->rd);
5161     }
5162     return true;
5163 }
5164 
5165 static bool trans_INS_element(DisasContext *s, arg_INS_element *a)
5166 {
5167     MemOp esz;
5168     unsigned didx, sidx;
5169 
5170     if (!decode_esz_idx(a->di, &esz, &didx)) {
5171         return false;
5172     }
5173     sidx = a->si >> esz;
5174     if (fp_access_check(s)) {
5175         TCGv_i64 tmp = tcg_temp_new_i64();
5176 
5177         read_vec_element(s, tmp, a->rn, sidx, esz);
5178         write_vec_element(s, tmp, a->rd, didx, esz);
5179 
5180         /* INS is considered a 128-bit write for SVE. */
5181         clear_vec_high(s, true, a->rd);
5182     }
5183     return true;
5184 }
5185 
5186 /*
5187  * Advanced SIMD three same
5188  */
5189 
5190 typedef struct FPScalar {
5191     void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
5192     void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
5193     void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
5194 } FPScalar;
5195 
5196 static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a,
5197                                         const FPScalar *f, int mergereg,
5198                                         ARMFPStatusFlavour fpsttype)
5199 {
5200     switch (a->esz) {
5201     case MO_64:
5202         if (fp_access_check(s)) {
5203             TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5204             TCGv_i64 t1 = read_fp_dreg(s, a->rm);
5205             f->gen_d(t0, t0, t1, fpstatus_ptr(fpsttype));
5206             write_fp_dreg_merging(s, a->rd, mergereg, t0);
5207         }
5208         break;
5209     case MO_32:
5210         if (fp_access_check(s)) {
5211             TCGv_i32 t0 = read_fp_sreg(s, a->rn);
5212             TCGv_i32 t1 = read_fp_sreg(s, a->rm);
5213             f->gen_s(t0, t0, t1, fpstatus_ptr(fpsttype));
5214             write_fp_sreg_merging(s, a->rd, mergereg, t0);
5215         }
5216         break;
5217     case MO_16:
5218         if (!dc_isar_feature(aa64_fp16, s)) {
5219             return false;
5220         }
5221         if (fp_access_check(s)) {
5222             TCGv_i32 t0 = read_fp_hreg(s, a->rn);
5223             TCGv_i32 t1 = read_fp_hreg(s, a->rm);
5224             f->gen_h(t0, t0, t1, fpstatus_ptr(fpsttype));
5225             write_fp_hreg_merging(s, a->rd, mergereg, t0);
5226         }
5227         break;
5228     default:
5229         return false;
5230     }
5231     return true;
5232 }
5233 
5234 static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f,
5235                           int mergereg)
5236 {
5237     return do_fp3_scalar_with_fpsttype(s, a, f, mergereg,
5238                                        a->esz == MO_16 ?
5239                                        FPST_A64_F16 : FPST_A64);
5240 }
5241 
5242 static bool do_fp3_scalar_ah_2fn(DisasContext *s, arg_rrr_e *a,
5243                                  const FPScalar *fnormal, const FPScalar *fah,
5244                                  int mergereg)
5245 {
5246     return do_fp3_scalar_with_fpsttype(s, a, s->fpcr_ah ? fah : fnormal,
5247                                        mergereg, select_ah_fpst(s, a->esz));
5248 }
5249 
5250 /* Some insns need to call different helpers when FPCR.AH == 1 */
5251 static bool do_fp3_scalar_2fn(DisasContext *s, arg_rrr_e *a,
5252                               const FPScalar *fnormal,
5253                               const FPScalar *fah,
5254                               int mergereg)
5255 {
5256     return do_fp3_scalar(s, a, s->fpcr_ah ? fah : fnormal, mergereg);
5257 }
5258 
5259 static const FPScalar f_scalar_fadd = {
5260     gen_helper_vfp_addh,
5261     gen_helper_vfp_adds,
5262     gen_helper_vfp_addd,
5263 };
5264 TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd, a->rn)
5265 
5266 static const FPScalar f_scalar_fsub = {
5267     gen_helper_vfp_subh,
5268     gen_helper_vfp_subs,
5269     gen_helper_vfp_subd,
5270 };
5271 TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub, a->rn)
5272 
5273 static const FPScalar f_scalar_fdiv = {
5274     gen_helper_vfp_divh,
5275     gen_helper_vfp_divs,
5276     gen_helper_vfp_divd,
5277 };
5278 TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv, a->rn)
5279 
5280 static const FPScalar f_scalar_fmul = {
5281     gen_helper_vfp_mulh,
5282     gen_helper_vfp_muls,
5283     gen_helper_vfp_muld,
5284 };
5285 TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul, a->rn)
5286 
5287 static const FPScalar f_scalar_fmax = {
5288     gen_helper_vfp_maxh,
5289     gen_helper_vfp_maxs,
5290     gen_helper_vfp_maxd,
5291 };
5292 static const FPScalar f_scalar_fmax_ah = {
5293     gen_helper_vfp_ah_maxh,
5294     gen_helper_vfp_ah_maxs,
5295     gen_helper_vfp_ah_maxd,
5296 };
5297 TRANS(FMAX_s, do_fp3_scalar_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah, a->rn)
5298 
5299 static const FPScalar f_scalar_fmin = {
5300     gen_helper_vfp_minh,
5301     gen_helper_vfp_mins,
5302     gen_helper_vfp_mind,
5303 };
5304 static const FPScalar f_scalar_fmin_ah = {
5305     gen_helper_vfp_ah_minh,
5306     gen_helper_vfp_ah_mins,
5307     gen_helper_vfp_ah_mind,
5308 };
5309 TRANS(FMIN_s, do_fp3_scalar_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah, a->rn)
5310 
5311 static const FPScalar f_scalar_fmaxnm = {
5312     gen_helper_vfp_maxnumh,
5313     gen_helper_vfp_maxnums,
5314     gen_helper_vfp_maxnumd,
5315 };
5316 TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm, a->rn)
5317 
5318 static const FPScalar f_scalar_fminnm = {
5319     gen_helper_vfp_minnumh,
5320     gen_helper_vfp_minnums,
5321     gen_helper_vfp_minnumd,
5322 };
5323 TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm, a->rn)
5324 
5325 static const FPScalar f_scalar_fmulx = {
5326     gen_helper_advsimd_mulxh,
5327     gen_helper_vfp_mulxs,
5328     gen_helper_vfp_mulxd,
5329 };
5330 TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx, a->rn)
5331 
5332 static void gen_fnmul_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5333 {
5334     gen_helper_vfp_mulh(d, n, m, s);
5335     gen_vfp_negh(d, d);
5336 }
5337 
5338 static void gen_fnmul_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5339 {
5340     gen_helper_vfp_muls(d, n, m, s);
5341     gen_vfp_negs(d, d);
5342 }
5343 
5344 static void gen_fnmul_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5345 {
5346     gen_helper_vfp_muld(d, n, m, s);
5347     gen_vfp_negd(d, d);
5348 }
5349 
5350 static void gen_fnmul_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5351 {
5352     gen_helper_vfp_mulh(d, n, m, s);
5353     gen_vfp_ah_negh(d, d);
5354 }
5355 
5356 static void gen_fnmul_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5357 {
5358     gen_helper_vfp_muls(d, n, m, s);
5359     gen_vfp_ah_negs(d, d);
5360 }
5361 
5362 static void gen_fnmul_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5363 {
5364     gen_helper_vfp_muld(d, n, m, s);
5365     gen_vfp_ah_negd(d, d);
5366 }
5367 
5368 static const FPScalar f_scalar_fnmul = {
5369     gen_fnmul_h,
5370     gen_fnmul_s,
5371     gen_fnmul_d,
5372 };
5373 static const FPScalar f_scalar_ah_fnmul = {
5374     gen_fnmul_ah_h,
5375     gen_fnmul_ah_s,
5376     gen_fnmul_ah_d,
5377 };
5378 TRANS(FNMUL_s, do_fp3_scalar_2fn, a, &f_scalar_fnmul, &f_scalar_ah_fnmul, a->rn)
5379 
5380 static const FPScalar f_scalar_fcmeq = {
5381     gen_helper_advsimd_ceq_f16,
5382     gen_helper_neon_ceq_f32,
5383     gen_helper_neon_ceq_f64,
5384 };
5385 TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq, a->rm)
5386 
5387 static const FPScalar f_scalar_fcmge = {
5388     gen_helper_advsimd_cge_f16,
5389     gen_helper_neon_cge_f32,
5390     gen_helper_neon_cge_f64,
5391 };
5392 TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge, a->rm)
5393 
5394 static const FPScalar f_scalar_fcmgt = {
5395     gen_helper_advsimd_cgt_f16,
5396     gen_helper_neon_cgt_f32,
5397     gen_helper_neon_cgt_f64,
5398 };
5399 TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt, a->rm)
5400 
5401 static const FPScalar f_scalar_facge = {
5402     gen_helper_advsimd_acge_f16,
5403     gen_helper_neon_acge_f32,
5404     gen_helper_neon_acge_f64,
5405 };
5406 TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge, a->rm)
5407 
5408 static const FPScalar f_scalar_facgt = {
5409     gen_helper_advsimd_acgt_f16,
5410     gen_helper_neon_acgt_f32,
5411     gen_helper_neon_acgt_f64,
5412 };
5413 TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt, a->rm)
5414 
5415 static void gen_fabd_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5416 {
5417     gen_helper_vfp_subh(d, n, m, s);
5418     gen_vfp_absh(d, d);
5419 }
5420 
5421 static void gen_fabd_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5422 {
5423     gen_helper_vfp_subs(d, n, m, s);
5424     gen_vfp_abss(d, d);
5425 }
5426 
5427 static void gen_fabd_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5428 {
5429     gen_helper_vfp_subd(d, n, m, s);
5430     gen_vfp_absd(d, d);
5431 }
5432 
5433 static void gen_fabd_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5434 {
5435     gen_helper_vfp_subh(d, n, m, s);
5436     gen_vfp_ah_absh(d, d);
5437 }
5438 
5439 static void gen_fabd_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5440 {
5441     gen_helper_vfp_subs(d, n, m, s);
5442     gen_vfp_ah_abss(d, d);
5443 }
5444 
5445 static void gen_fabd_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5446 {
5447     gen_helper_vfp_subd(d, n, m, s);
5448     gen_vfp_ah_absd(d, d);
5449 }
5450 
5451 static const FPScalar f_scalar_fabd = {
5452     gen_fabd_h,
5453     gen_fabd_s,
5454     gen_fabd_d,
5455 };
5456 static const FPScalar f_scalar_ah_fabd = {
5457     gen_fabd_ah_h,
5458     gen_fabd_ah_s,
5459     gen_fabd_ah_d,
5460 };
5461 TRANS(FABD_s, do_fp3_scalar_2fn, a, &f_scalar_fabd, &f_scalar_ah_fabd, a->rn)
5462 
5463 static const FPScalar f_scalar_frecps = {
5464     gen_helper_recpsf_f16,
5465     gen_helper_recpsf_f32,
5466     gen_helper_recpsf_f64,
5467 };
5468 static const FPScalar f_scalar_ah_frecps = {
5469     gen_helper_recpsf_ah_f16,
5470     gen_helper_recpsf_ah_f32,
5471     gen_helper_recpsf_ah_f64,
5472 };
5473 TRANS(FRECPS_s, do_fp3_scalar_ah_2fn, a,
5474       &f_scalar_frecps, &f_scalar_ah_frecps, a->rn)
5475 
5476 static const FPScalar f_scalar_frsqrts = {
5477     gen_helper_rsqrtsf_f16,
5478     gen_helper_rsqrtsf_f32,
5479     gen_helper_rsqrtsf_f64,
5480 };
5481 static const FPScalar f_scalar_ah_frsqrts = {
5482     gen_helper_rsqrtsf_ah_f16,
5483     gen_helper_rsqrtsf_ah_f32,
5484     gen_helper_rsqrtsf_ah_f64,
5485 };
5486 TRANS(FRSQRTS_s, do_fp3_scalar_ah_2fn, a,
5487       &f_scalar_frsqrts, &f_scalar_ah_frsqrts, a->rn)
5488 
5489 static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a,
5490                        const FPScalar *f, bool swap)
5491 {
5492     switch (a->esz) {
5493     case MO_64:
5494         if (fp_access_check(s)) {
5495             TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5496             TCGv_i64 t1 = tcg_constant_i64(0);
5497             if (swap) {
5498                 f->gen_d(t0, t1, t0, fpstatus_ptr(FPST_A64));
5499             } else {
5500                 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
5501             }
5502             write_fp_dreg(s, a->rd, t0);
5503         }
5504         break;
5505     case MO_32:
5506         if (fp_access_check(s)) {
5507             TCGv_i32 t0 = read_fp_sreg(s, a->rn);
5508             TCGv_i32 t1 = tcg_constant_i32(0);
5509             if (swap) {
5510                 f->gen_s(t0, t1, t0, fpstatus_ptr(FPST_A64));
5511             } else {
5512                 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
5513             }
5514             write_fp_sreg(s, a->rd, t0);
5515         }
5516         break;
5517     case MO_16:
5518         if (!dc_isar_feature(aa64_fp16, s)) {
5519             return false;
5520         }
5521         if (fp_access_check(s)) {
5522             TCGv_i32 t0 = read_fp_hreg(s, a->rn);
5523             TCGv_i32 t1 = tcg_constant_i32(0);
5524             if (swap) {
5525                 f->gen_h(t0, t1, t0, fpstatus_ptr(FPST_A64_F16));
5526             } else {
5527                 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
5528             }
5529             write_fp_sreg(s, a->rd, t0);
5530         }
5531         break;
5532     default:
5533         return false;
5534     }
5535     return true;
5536 }
5537 
5538 TRANS(FCMEQ0_s, do_fcmp0_s, a, &f_scalar_fcmeq, false)
5539 TRANS(FCMGT0_s, do_fcmp0_s, a, &f_scalar_fcmgt, false)
5540 TRANS(FCMGE0_s, do_fcmp0_s, a, &f_scalar_fcmge, false)
5541 TRANS(FCMLT0_s, do_fcmp0_s, a, &f_scalar_fcmgt, true)
5542 TRANS(FCMLE0_s, do_fcmp0_s, a, &f_scalar_fcmge, true)
5543 
5544 static bool do_satacc_s(DisasContext *s, arg_rrr_e *a,
5545                 MemOp sgn_n, MemOp sgn_m,
5546                 void (*gen_bhs)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64, MemOp),
5547                 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
5548 {
5549     TCGv_i64 t0, t1, t2, qc;
5550     MemOp esz = a->esz;
5551 
5552     if (!fp_access_check(s)) {
5553         return true;
5554     }
5555 
5556     t0 = tcg_temp_new_i64();
5557     t1 = tcg_temp_new_i64();
5558     t2 = tcg_temp_new_i64();
5559     qc = tcg_temp_new_i64();
5560     read_vec_element(s, t1, a->rn, 0, esz | sgn_n);
5561     read_vec_element(s, t2, a->rm, 0, esz | sgn_m);
5562     tcg_gen_ld_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc));
5563 
5564     if (esz == MO_64) {
5565         gen_d(t0, qc, t1, t2);
5566     } else {
5567         gen_bhs(t0, qc, t1, t2, esz);
5568         tcg_gen_ext_i64(t0, t0, esz);
5569     }
5570 
5571     write_fp_dreg(s, a->rd, t0);
5572     tcg_gen_st_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc));
5573     return true;
5574 }
5575 
5576 TRANS(SQADD_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqadd_bhs, gen_sqadd_d)
5577 TRANS(SQSUB_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqsub_bhs, gen_sqsub_d)
5578 TRANS(UQADD_s, do_satacc_s, a, 0, 0, gen_uqadd_bhs, gen_uqadd_d)
5579 TRANS(UQSUB_s, do_satacc_s, a, 0, 0, gen_uqsub_bhs, gen_uqsub_d)
5580 TRANS(SUQADD_s, do_satacc_s, a, MO_SIGN, 0, gen_suqadd_bhs, gen_suqadd_d)
5581 TRANS(USQADD_s, do_satacc_s, a, 0, MO_SIGN, gen_usqadd_bhs, gen_usqadd_d)
5582 
5583 static bool do_int3_scalar_d(DisasContext *s, arg_rrr_e *a,
5584                              void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64))
5585 {
5586     if (fp_access_check(s)) {
5587         TCGv_i64 t0 = tcg_temp_new_i64();
5588         TCGv_i64 t1 = tcg_temp_new_i64();
5589 
5590         read_vec_element(s, t0, a->rn, 0, MO_64);
5591         read_vec_element(s, t1, a->rm, 0, MO_64);
5592         fn(t0, t0, t1);
5593         write_fp_dreg(s, a->rd, t0);
5594     }
5595     return true;
5596 }
5597 
5598 TRANS(SSHL_s, do_int3_scalar_d, a, gen_sshl_i64)
5599 TRANS(USHL_s, do_int3_scalar_d, a, gen_ushl_i64)
5600 TRANS(SRSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_s64)
5601 TRANS(URSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_u64)
5602 TRANS(ADD_s, do_int3_scalar_d, a, tcg_gen_add_i64)
5603 TRANS(SUB_s, do_int3_scalar_d, a, tcg_gen_sub_i64)
5604 
5605 typedef struct ENVScalar2 {
5606     NeonGenTwoOpEnvFn *gen_bhs[3];
5607     NeonGenTwo64OpEnvFn *gen_d;
5608 } ENVScalar2;
5609 
5610 static bool do_env_scalar2(DisasContext *s, arg_rrr_e *a, const ENVScalar2 *f)
5611 {
5612     if (!fp_access_check(s)) {
5613         return true;
5614     }
5615     if (a->esz == MO_64) {
5616         TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5617         TCGv_i64 t1 = read_fp_dreg(s, a->rm);
5618         f->gen_d(t0, tcg_env, t0, t1);
5619         write_fp_dreg(s, a->rd, t0);
5620     } else {
5621         TCGv_i32 t0 = tcg_temp_new_i32();
5622         TCGv_i32 t1 = tcg_temp_new_i32();
5623 
5624         read_vec_element_i32(s, t0, a->rn, 0, a->esz);
5625         read_vec_element_i32(s, t1, a->rm, 0, a->esz);
5626         f->gen_bhs[a->esz](t0, tcg_env, t0, t1);
5627         write_fp_sreg(s, a->rd, t0);
5628     }
5629     return true;
5630 }
5631 
5632 static const ENVScalar2 f_scalar_sqshl = {
5633     { gen_helper_neon_qshl_s8,
5634       gen_helper_neon_qshl_s16,
5635       gen_helper_neon_qshl_s32 },
5636     gen_helper_neon_qshl_s64,
5637 };
5638 TRANS(SQSHL_s, do_env_scalar2, a, &f_scalar_sqshl)
5639 
5640 static const ENVScalar2 f_scalar_uqshl = {
5641     { gen_helper_neon_qshl_u8,
5642       gen_helper_neon_qshl_u16,
5643       gen_helper_neon_qshl_u32 },
5644     gen_helper_neon_qshl_u64,
5645 };
5646 TRANS(UQSHL_s, do_env_scalar2, a, &f_scalar_uqshl)
5647 
5648 static const ENVScalar2 f_scalar_sqrshl = {
5649     { gen_helper_neon_qrshl_s8,
5650       gen_helper_neon_qrshl_s16,
5651       gen_helper_neon_qrshl_s32 },
5652     gen_helper_neon_qrshl_s64,
5653 };
5654 TRANS(SQRSHL_s, do_env_scalar2, a, &f_scalar_sqrshl)
5655 
5656 static const ENVScalar2 f_scalar_uqrshl = {
5657     { gen_helper_neon_qrshl_u8,
5658       gen_helper_neon_qrshl_u16,
5659       gen_helper_neon_qrshl_u32 },
5660     gen_helper_neon_qrshl_u64,
5661 };
5662 TRANS(UQRSHL_s, do_env_scalar2, a, &f_scalar_uqrshl)
5663 
5664 static bool do_env_scalar2_hs(DisasContext *s, arg_rrr_e *a,
5665                               const ENVScalar2 *f)
5666 {
5667     if (a->esz == MO_16 || a->esz == MO_32) {
5668         return do_env_scalar2(s, a, f);
5669     }
5670     return false;
5671 }
5672 
5673 static const ENVScalar2 f_scalar_sqdmulh = {
5674     { NULL, gen_helper_neon_qdmulh_s16, gen_helper_neon_qdmulh_s32 }
5675 };
5676 TRANS(SQDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqdmulh)
5677 
5678 static const ENVScalar2 f_scalar_sqrdmulh = {
5679     { NULL, gen_helper_neon_qrdmulh_s16, gen_helper_neon_qrdmulh_s32 }
5680 };
5681 TRANS(SQRDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqrdmulh)
5682 
5683 typedef struct ENVScalar3 {
5684     NeonGenThreeOpEnvFn *gen_hs[2];
5685 } ENVScalar3;
5686 
5687 static bool do_env_scalar3_hs(DisasContext *s, arg_rrr_e *a,
5688                               const ENVScalar3 *f)
5689 {
5690     TCGv_i32 t0, t1, t2;
5691 
5692     if (a->esz != MO_16 && a->esz != MO_32) {
5693         return false;
5694     }
5695     if (!fp_access_check(s)) {
5696         return true;
5697     }
5698 
5699     t0 = tcg_temp_new_i32();
5700     t1 = tcg_temp_new_i32();
5701     t2 = tcg_temp_new_i32();
5702     read_vec_element_i32(s, t0, a->rn, 0, a->esz);
5703     read_vec_element_i32(s, t1, a->rm, 0, a->esz);
5704     read_vec_element_i32(s, t2, a->rd, 0, a->esz);
5705     f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2);
5706     write_fp_sreg(s, a->rd, t0);
5707     return true;
5708 }
5709 
5710 static const ENVScalar3 f_scalar_sqrdmlah = {
5711     { gen_helper_neon_qrdmlah_s16, gen_helper_neon_qrdmlah_s32 }
5712 };
5713 TRANS_FEAT(SQRDMLAH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlah)
5714 
5715 static const ENVScalar3 f_scalar_sqrdmlsh = {
5716     { gen_helper_neon_qrdmlsh_s16, gen_helper_neon_qrdmlsh_s32 }
5717 };
5718 TRANS_FEAT(SQRDMLSH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlsh)
5719 
5720 static bool do_cmop_d(DisasContext *s, arg_rrr_e *a, TCGCond cond)
5721 {
5722     if (fp_access_check(s)) {
5723         TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5724         TCGv_i64 t1 = read_fp_dreg(s, a->rm);
5725         tcg_gen_negsetcond_i64(cond, t0, t0, t1);
5726         write_fp_dreg(s, a->rd, t0);
5727     }
5728     return true;
5729 }
5730 
5731 TRANS(CMGT_s, do_cmop_d, a, TCG_COND_GT)
5732 TRANS(CMHI_s, do_cmop_d, a, TCG_COND_GTU)
5733 TRANS(CMGE_s, do_cmop_d, a, TCG_COND_GE)
5734 TRANS(CMHS_s, do_cmop_d, a, TCG_COND_GEU)
5735 TRANS(CMEQ_s, do_cmop_d, a, TCG_COND_EQ)
5736 TRANS(CMTST_s, do_cmop_d, a, TCG_COND_TSTNE)
5737 
5738 static bool do_fp3_vector_with_fpsttype(DisasContext *s, arg_qrrr_e *a,
5739                                         int data,
5740                                         gen_helper_gvec_3_ptr * const fns[3],
5741                                         ARMFPStatusFlavour fpsttype)
5742 {
5743     MemOp esz = a->esz;
5744     int check = fp_access_check_vector_hsd(s, a->q, esz);
5745 
5746     if (check <= 0) {
5747         return check == 0;
5748     }
5749 
5750     gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, fpsttype,
5751                       data, fns[esz - 1]);
5752     return true;
5753 }
5754 
5755 static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data,
5756                           gen_helper_gvec_3_ptr * const fns[3])
5757 {
5758     return do_fp3_vector_with_fpsttype(s, a, data, fns,
5759                                        a->esz == MO_16 ?
5760                                        FPST_A64_F16 : FPST_A64);
5761 }
5762 
5763 static bool do_fp3_vector_2fn(DisasContext *s, arg_qrrr_e *a, int data,
5764                               gen_helper_gvec_3_ptr * const fnormal[3],
5765                               gen_helper_gvec_3_ptr * const fah[3])
5766 {
5767     return do_fp3_vector(s, a, data, s->fpcr_ah ? fah : fnormal);
5768 }
5769 
5770 static bool do_fp3_vector_ah_2fn(DisasContext *s, arg_qrrr_e *a, int data,
5771                                  gen_helper_gvec_3_ptr * const fnormal[3],
5772                                  gen_helper_gvec_3_ptr * const fah[3])
5773 {
5774     return do_fp3_vector_with_fpsttype(s, a, data, s->fpcr_ah ? fah : fnormal,
5775                                        select_ah_fpst(s, a->esz));
5776 }
5777 
5778 static gen_helper_gvec_3_ptr * const f_vector_fadd[3] = {
5779     gen_helper_gvec_fadd_h,
5780     gen_helper_gvec_fadd_s,
5781     gen_helper_gvec_fadd_d,
5782 };
5783 TRANS(FADD_v, do_fp3_vector, a, 0, f_vector_fadd)
5784 
5785 static gen_helper_gvec_3_ptr * const f_vector_fsub[3] = {
5786     gen_helper_gvec_fsub_h,
5787     gen_helper_gvec_fsub_s,
5788     gen_helper_gvec_fsub_d,
5789 };
5790 TRANS(FSUB_v, do_fp3_vector, a, 0, f_vector_fsub)
5791 
5792 static gen_helper_gvec_3_ptr * const f_vector_fdiv[3] = {
5793     gen_helper_gvec_fdiv_h,
5794     gen_helper_gvec_fdiv_s,
5795     gen_helper_gvec_fdiv_d,
5796 };
5797 TRANS(FDIV_v, do_fp3_vector, a, 0, f_vector_fdiv)
5798 
5799 static gen_helper_gvec_3_ptr * const f_vector_fmul[3] = {
5800     gen_helper_gvec_fmul_h,
5801     gen_helper_gvec_fmul_s,
5802     gen_helper_gvec_fmul_d,
5803 };
5804 TRANS(FMUL_v, do_fp3_vector, a, 0, f_vector_fmul)
5805 
5806 static gen_helper_gvec_3_ptr * const f_vector_fmax[3] = {
5807     gen_helper_gvec_fmax_h,
5808     gen_helper_gvec_fmax_s,
5809     gen_helper_gvec_fmax_d,
5810 };
5811 static gen_helper_gvec_3_ptr * const f_vector_fmax_ah[3] = {
5812     gen_helper_gvec_ah_fmax_h,
5813     gen_helper_gvec_ah_fmax_s,
5814     gen_helper_gvec_ah_fmax_d,
5815 };
5816 TRANS(FMAX_v, do_fp3_vector_2fn, a, 0, f_vector_fmax, f_vector_fmax_ah)
5817 
5818 static gen_helper_gvec_3_ptr * const f_vector_fmin[3] = {
5819     gen_helper_gvec_fmin_h,
5820     gen_helper_gvec_fmin_s,
5821     gen_helper_gvec_fmin_d,
5822 };
5823 static gen_helper_gvec_3_ptr * const f_vector_fmin_ah[3] = {
5824     gen_helper_gvec_ah_fmin_h,
5825     gen_helper_gvec_ah_fmin_s,
5826     gen_helper_gvec_ah_fmin_d,
5827 };
5828 TRANS(FMIN_v, do_fp3_vector_2fn, a, 0, f_vector_fmin, f_vector_fmin_ah)
5829 
5830 static gen_helper_gvec_3_ptr * const f_vector_fmaxnm[3] = {
5831     gen_helper_gvec_fmaxnum_h,
5832     gen_helper_gvec_fmaxnum_s,
5833     gen_helper_gvec_fmaxnum_d,
5834 };
5835 TRANS(FMAXNM_v, do_fp3_vector, a, 0, f_vector_fmaxnm)
5836 
5837 static gen_helper_gvec_3_ptr * const f_vector_fminnm[3] = {
5838     gen_helper_gvec_fminnum_h,
5839     gen_helper_gvec_fminnum_s,
5840     gen_helper_gvec_fminnum_d,
5841 };
5842 TRANS(FMINNM_v, do_fp3_vector, a, 0, f_vector_fminnm)
5843 
5844 static gen_helper_gvec_3_ptr * const f_vector_fmulx[3] = {
5845     gen_helper_gvec_fmulx_h,
5846     gen_helper_gvec_fmulx_s,
5847     gen_helper_gvec_fmulx_d,
5848 };
5849 TRANS(FMULX_v, do_fp3_vector, a, 0, f_vector_fmulx)
5850 
5851 static gen_helper_gvec_3_ptr * const f_vector_fmla[3] = {
5852     gen_helper_gvec_vfma_h,
5853     gen_helper_gvec_vfma_s,
5854     gen_helper_gvec_vfma_d,
5855 };
5856 TRANS(FMLA_v, do_fp3_vector, a, 0, f_vector_fmla)
5857 
5858 static gen_helper_gvec_3_ptr * const f_vector_fmls[3] = {
5859     gen_helper_gvec_vfms_h,
5860     gen_helper_gvec_vfms_s,
5861     gen_helper_gvec_vfms_d,
5862 };
5863 static gen_helper_gvec_3_ptr * const f_vector_fmls_ah[3] = {
5864     gen_helper_gvec_ah_vfms_h,
5865     gen_helper_gvec_ah_vfms_s,
5866     gen_helper_gvec_ah_vfms_d,
5867 };
5868 TRANS(FMLS_v, do_fp3_vector_2fn, a, 0, f_vector_fmls, f_vector_fmls_ah)
5869 
5870 static gen_helper_gvec_3_ptr * const f_vector_fcmeq[3] = {
5871     gen_helper_gvec_fceq_h,
5872     gen_helper_gvec_fceq_s,
5873     gen_helper_gvec_fceq_d,
5874 };
5875 TRANS(FCMEQ_v, do_fp3_vector, a, 0, f_vector_fcmeq)
5876 
5877 static gen_helper_gvec_3_ptr * const f_vector_fcmge[3] = {
5878     gen_helper_gvec_fcge_h,
5879     gen_helper_gvec_fcge_s,
5880     gen_helper_gvec_fcge_d,
5881 };
5882 TRANS(FCMGE_v, do_fp3_vector, a, 0, f_vector_fcmge)
5883 
5884 static gen_helper_gvec_3_ptr * const f_vector_fcmgt[3] = {
5885     gen_helper_gvec_fcgt_h,
5886     gen_helper_gvec_fcgt_s,
5887     gen_helper_gvec_fcgt_d,
5888 };
5889 TRANS(FCMGT_v, do_fp3_vector, a, 0, f_vector_fcmgt)
5890 
5891 static gen_helper_gvec_3_ptr * const f_vector_facge[3] = {
5892     gen_helper_gvec_facge_h,
5893     gen_helper_gvec_facge_s,
5894     gen_helper_gvec_facge_d,
5895 };
5896 TRANS(FACGE_v, do_fp3_vector, a, 0, f_vector_facge)
5897 
5898 static gen_helper_gvec_3_ptr * const f_vector_facgt[3] = {
5899     gen_helper_gvec_facgt_h,
5900     gen_helper_gvec_facgt_s,
5901     gen_helper_gvec_facgt_d,
5902 };
5903 TRANS(FACGT_v, do_fp3_vector, a, 0, f_vector_facgt)
5904 
5905 static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = {
5906     gen_helper_gvec_fabd_h,
5907     gen_helper_gvec_fabd_s,
5908     gen_helper_gvec_fabd_d,
5909 };
5910 static gen_helper_gvec_3_ptr * const f_vector_ah_fabd[3] = {
5911     gen_helper_gvec_ah_fabd_h,
5912     gen_helper_gvec_ah_fabd_s,
5913     gen_helper_gvec_ah_fabd_d,
5914 };
5915 TRANS(FABD_v, do_fp3_vector_2fn, a, 0, f_vector_fabd, f_vector_ah_fabd)
5916 
5917 static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = {
5918     gen_helper_gvec_recps_h,
5919     gen_helper_gvec_recps_s,
5920     gen_helper_gvec_recps_d,
5921 };
5922 static gen_helper_gvec_3_ptr * const f_vector_ah_frecps[3] = {
5923     gen_helper_gvec_ah_recps_h,
5924     gen_helper_gvec_ah_recps_s,
5925     gen_helper_gvec_ah_recps_d,
5926 };
5927 TRANS(FRECPS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frecps, f_vector_ah_frecps)
5928 
5929 static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = {
5930     gen_helper_gvec_rsqrts_h,
5931     gen_helper_gvec_rsqrts_s,
5932     gen_helper_gvec_rsqrts_d,
5933 };
5934 static gen_helper_gvec_3_ptr * const f_vector_ah_frsqrts[3] = {
5935     gen_helper_gvec_ah_rsqrts_h,
5936     gen_helper_gvec_ah_rsqrts_s,
5937     gen_helper_gvec_ah_rsqrts_d,
5938 };
5939 TRANS(FRSQRTS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frsqrts, f_vector_ah_frsqrts)
5940 
5941 static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = {
5942     gen_helper_gvec_faddp_h,
5943     gen_helper_gvec_faddp_s,
5944     gen_helper_gvec_faddp_d,
5945 };
5946 TRANS(FADDP_v, do_fp3_vector, a, 0, f_vector_faddp)
5947 
5948 static gen_helper_gvec_3_ptr * const f_vector_fmaxp[3] = {
5949     gen_helper_gvec_fmaxp_h,
5950     gen_helper_gvec_fmaxp_s,
5951     gen_helper_gvec_fmaxp_d,
5952 };
5953 static gen_helper_gvec_3_ptr * const f_vector_ah_fmaxp[3] = {
5954     gen_helper_gvec_ah_fmaxp_h,
5955     gen_helper_gvec_ah_fmaxp_s,
5956     gen_helper_gvec_ah_fmaxp_d,
5957 };
5958 TRANS(FMAXP_v, do_fp3_vector_2fn, a, 0, f_vector_fmaxp, f_vector_ah_fmaxp)
5959 
5960 static gen_helper_gvec_3_ptr * const f_vector_fminp[3] = {
5961     gen_helper_gvec_fminp_h,
5962     gen_helper_gvec_fminp_s,
5963     gen_helper_gvec_fminp_d,
5964 };
5965 static gen_helper_gvec_3_ptr * const f_vector_ah_fminp[3] = {
5966     gen_helper_gvec_ah_fminp_h,
5967     gen_helper_gvec_ah_fminp_s,
5968     gen_helper_gvec_ah_fminp_d,
5969 };
5970 TRANS(FMINP_v, do_fp3_vector_2fn, a, 0, f_vector_fminp, f_vector_ah_fminp)
5971 
5972 static gen_helper_gvec_3_ptr * const f_vector_fmaxnmp[3] = {
5973     gen_helper_gvec_fmaxnump_h,
5974     gen_helper_gvec_fmaxnump_s,
5975     gen_helper_gvec_fmaxnump_d,
5976 };
5977 TRANS(FMAXNMP_v, do_fp3_vector, a, 0, f_vector_fmaxnmp)
5978 
5979 static gen_helper_gvec_3_ptr * const f_vector_fminnmp[3] = {
5980     gen_helper_gvec_fminnump_h,
5981     gen_helper_gvec_fminnump_s,
5982     gen_helper_gvec_fminnump_d,
5983 };
5984 TRANS(FMINNMP_v, do_fp3_vector, a, 0, f_vector_fminnmp)
5985 
5986 static bool do_fmlal(DisasContext *s, arg_qrrr_e *a, bool is_s, bool is_2)
5987 {
5988     if (fp_access_check(s)) {
5989         int data = (is_2 << 1) | is_s;
5990         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
5991                            vec_full_reg_offset(s, a->rn),
5992                            vec_full_reg_offset(s, a->rm), tcg_env,
5993                            a->q ? 16 : 8, vec_full_reg_size(s),
5994                            data, gen_helper_gvec_fmlal_a64);
5995     }
5996     return true;
5997 }
5998 
5999 TRANS_FEAT(FMLAL_v, aa64_fhm, do_fmlal, a, false, false)
6000 TRANS_FEAT(FMLSL_v, aa64_fhm, do_fmlal, a, true, false)
6001 TRANS_FEAT(FMLAL2_v, aa64_fhm, do_fmlal, a, false, true)
6002 TRANS_FEAT(FMLSL2_v, aa64_fhm, do_fmlal, a, true, true)
6003 
6004 TRANS(ADDP_v, do_gvec_fn3, a, gen_gvec_addp)
6005 TRANS(SMAXP_v, do_gvec_fn3_no64, a, gen_gvec_smaxp)
6006 TRANS(SMINP_v, do_gvec_fn3_no64, a, gen_gvec_sminp)
6007 TRANS(UMAXP_v, do_gvec_fn3_no64, a, gen_gvec_umaxp)
6008 TRANS(UMINP_v, do_gvec_fn3_no64, a, gen_gvec_uminp)
6009 
6010 TRANS(AND_v, do_gvec_fn3, a, tcg_gen_gvec_and)
6011 TRANS(BIC_v, do_gvec_fn3, a, tcg_gen_gvec_andc)
6012 TRANS(ORR_v, do_gvec_fn3, a, tcg_gen_gvec_or)
6013 TRANS(ORN_v, do_gvec_fn3, a, tcg_gen_gvec_orc)
6014 TRANS(EOR_v, do_gvec_fn3, a, tcg_gen_gvec_xor)
6015 
6016 static bool do_bitsel(DisasContext *s, bool is_q, int d, int a, int b, int c)
6017 {
6018     if (fp_access_check(s)) {
6019         gen_gvec_fn4(s, is_q, d, a, b, c, tcg_gen_gvec_bitsel, 0);
6020     }
6021     return true;
6022 }
6023 
6024 TRANS(BSL_v, do_bitsel, a->q, a->rd, a->rd, a->rn, a->rm)
6025 TRANS(BIT_v, do_bitsel, a->q, a->rd, a->rm, a->rn, a->rd)
6026 TRANS(BIF_v, do_bitsel, a->q, a->rd, a->rm, a->rd, a->rn)
6027 
6028 TRANS(SQADD_v, do_gvec_fn3, a, gen_gvec_sqadd_qc)
6029 TRANS(UQADD_v, do_gvec_fn3, a, gen_gvec_uqadd_qc)
6030 TRANS(SQSUB_v, do_gvec_fn3, a, gen_gvec_sqsub_qc)
6031 TRANS(UQSUB_v, do_gvec_fn3, a, gen_gvec_uqsub_qc)
6032 TRANS(SUQADD_v, do_gvec_fn3, a, gen_gvec_suqadd_qc)
6033 TRANS(USQADD_v, do_gvec_fn3, a, gen_gvec_usqadd_qc)
6034 
6035 TRANS(SSHL_v, do_gvec_fn3, a, gen_gvec_sshl)
6036 TRANS(USHL_v, do_gvec_fn3, a, gen_gvec_ushl)
6037 TRANS(SRSHL_v, do_gvec_fn3, a, gen_gvec_srshl)
6038 TRANS(URSHL_v, do_gvec_fn3, a, gen_gvec_urshl)
6039 TRANS(SQSHL_v, do_gvec_fn3, a, gen_neon_sqshl)
6040 TRANS(UQSHL_v, do_gvec_fn3, a, gen_neon_uqshl)
6041 TRANS(SQRSHL_v, do_gvec_fn3, a, gen_neon_sqrshl)
6042 TRANS(UQRSHL_v, do_gvec_fn3, a, gen_neon_uqrshl)
6043 
6044 TRANS(ADD_v, do_gvec_fn3, a, tcg_gen_gvec_add)
6045 TRANS(SUB_v, do_gvec_fn3, a, tcg_gen_gvec_sub)
6046 TRANS(SHADD_v, do_gvec_fn3_no64, a, gen_gvec_shadd)
6047 TRANS(UHADD_v, do_gvec_fn3_no64, a, gen_gvec_uhadd)
6048 TRANS(SHSUB_v, do_gvec_fn3_no64, a, gen_gvec_shsub)
6049 TRANS(UHSUB_v, do_gvec_fn3_no64, a, gen_gvec_uhsub)
6050 TRANS(SRHADD_v, do_gvec_fn3_no64, a, gen_gvec_srhadd)
6051 TRANS(URHADD_v, do_gvec_fn3_no64, a, gen_gvec_urhadd)
6052 TRANS(SMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smax)
6053 TRANS(UMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umax)
6054 TRANS(SMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smin)
6055 TRANS(UMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umin)
6056 TRANS(SABA_v, do_gvec_fn3_no64, a, gen_gvec_saba)
6057 TRANS(UABA_v, do_gvec_fn3_no64, a, gen_gvec_uaba)
6058 TRANS(SABD_v, do_gvec_fn3_no64, a, gen_gvec_sabd)
6059 TRANS(UABD_v, do_gvec_fn3_no64, a, gen_gvec_uabd)
6060 TRANS(MUL_v, do_gvec_fn3_no64, a, tcg_gen_gvec_mul)
6061 TRANS(PMUL_v, do_gvec_op3_ool, a, 0, gen_helper_gvec_pmul_b)
6062 TRANS(MLA_v, do_gvec_fn3_no64, a, gen_gvec_mla)
6063 TRANS(MLS_v, do_gvec_fn3_no64, a, gen_gvec_mls)
6064 
6065 static bool do_cmop_v(DisasContext *s, arg_qrrr_e *a, TCGCond cond)
6066 {
6067     if (a->esz == MO_64 && !a->q) {
6068         return false;
6069     }
6070     if (fp_access_check(s)) {
6071         tcg_gen_gvec_cmp(cond, a->esz,
6072                          vec_full_reg_offset(s, a->rd),
6073                          vec_full_reg_offset(s, a->rn),
6074                          vec_full_reg_offset(s, a->rm),
6075                          a->q ? 16 : 8, vec_full_reg_size(s));
6076     }
6077     return true;
6078 }
6079 
6080 TRANS(CMGT_v, do_cmop_v, a, TCG_COND_GT)
6081 TRANS(CMHI_v, do_cmop_v, a, TCG_COND_GTU)
6082 TRANS(CMGE_v, do_cmop_v, a, TCG_COND_GE)
6083 TRANS(CMHS_v, do_cmop_v, a, TCG_COND_GEU)
6084 TRANS(CMEQ_v, do_cmop_v, a, TCG_COND_EQ)
6085 TRANS(CMTST_v, do_gvec_fn3, a, gen_gvec_cmtst)
6086 
6087 TRANS(SQDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqdmulh_qc)
6088 TRANS(SQRDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmulh_qc)
6089 TRANS_FEAT(SQRDMLAH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlah_qc)
6090 TRANS_FEAT(SQRDMLSH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlsh_qc)
6091 
6092 static bool do_dot_vector(DisasContext *s, arg_qrrr_e *a,
6093                           gen_helper_gvec_4 *fn)
6094 {
6095     if (fp_access_check(s)) {
6096         gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn);
6097     }
6098     return true;
6099 }
6100 
6101 static bool do_dot_vector_env(DisasContext *s, arg_qrrr_e *a,
6102                               gen_helper_gvec_4_ptr *fn)
6103 {
6104     if (fp_access_check(s)) {
6105         gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn);
6106     }
6107     return true;
6108 }
6109 
6110 TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_sdot_b)
6111 TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_b)
6112 TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_b)
6113 TRANS_FEAT(BFDOT_v, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfdot)
6114 TRANS_FEAT(BFMMLA, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfmmla)
6115 TRANS_FEAT(SMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_smmla_b)
6116 TRANS_FEAT(UMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_ummla_b)
6117 TRANS_FEAT(USMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usmmla_b)
6118 
6119 static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a)
6120 {
6121     if (!dc_isar_feature(aa64_bf16, s)) {
6122         return false;
6123     }
6124     if (fp_access_check(s)) {
6125         /* Q bit selects BFMLALB vs BFMLALT. */
6126         gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd,
6127                           s->fpcr_ah ? FPST_AH : FPST_A64, a->q,
6128                           gen_helper_gvec_bfmlal);
6129     }
6130     return true;
6131 }
6132 
6133 static gen_helper_gvec_3_ptr * const f_vector_fcadd[3] = {
6134     gen_helper_gvec_fcaddh,
6135     gen_helper_gvec_fcadds,
6136     gen_helper_gvec_fcaddd,
6137 };
6138 /*
6139  * Encode FPCR.AH into the data so the helper knows whether the
6140  * negations it does should avoid flipping the sign bit on a NaN
6141  */
6142 TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0 | (s->fpcr_ah << 1),
6143            f_vector_fcadd)
6144 TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1 | (s->fpcr_ah << 1),
6145            f_vector_fcadd)
6146 
6147 static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a)
6148 {
6149     static gen_helper_gvec_4_ptr * const fn[] = {
6150         [MO_16] = gen_helper_gvec_fcmlah,
6151         [MO_32] = gen_helper_gvec_fcmlas,
6152         [MO_64] = gen_helper_gvec_fcmlad,
6153     };
6154     int check;
6155 
6156     if (!dc_isar_feature(aa64_fcma, s)) {
6157         return false;
6158     }
6159 
6160     check = fp_access_check_vector_hsd(s, a->q, a->esz);
6161     if (check <= 0) {
6162         return check == 0;
6163     }
6164 
6165     gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
6166                       a->esz == MO_16 ? FPST_A64_F16 : FPST_A64,
6167                       a->rot | (s->fpcr_ah << 2), fn[a->esz]);
6168     return true;
6169 }
6170 
6171 /*
6172  * Widening vector x vector/indexed.
6173  *
6174  * These read from the top or bottom half of a 128-bit vector.
6175  * After widening, optionally accumulate with a 128-bit vector.
6176  * Implement these inline, as the number of elements are limited
6177  * and the related SVE and SME operations on larger vectors use
6178  * even/odd elements instead of top/bottom half.
6179  *
6180  * If idx >= 0, operand 2 is indexed, otherwise vector.
6181  * If acc, operand 0 is loaded with rd.
6182  */
6183 
6184 /* For low half, iterating up. */
6185 static bool do_3op_widening(DisasContext *s, MemOp memop, int top,
6186                             int rd, int rn, int rm, int idx,
6187                             NeonGenTwo64OpFn *fn, bool acc)
6188 {
6189     TCGv_i64 tcg_op0 = tcg_temp_new_i64();
6190     TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6191     TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6192     MemOp esz = memop & MO_SIZE;
6193     int half = 8 >> esz;
6194     int top_swap, top_half;
6195 
6196     /* There are no 64x64->128 bit operations. */
6197     if (esz >= MO_64) {
6198         return false;
6199     }
6200     if (!fp_access_check(s)) {
6201         return true;
6202     }
6203 
6204     if (idx >= 0) {
6205         read_vec_element(s, tcg_op2, rm, idx, memop);
6206     }
6207 
6208     /*
6209      * For top half inputs, iterate forward; backward for bottom half.
6210      * This means the store to the destination will not occur until
6211      * overlapping input inputs are consumed.
6212      * Use top_swap to conditionally invert the forward iteration index.
6213      */
6214     top_swap = top ? 0 : half - 1;
6215     top_half = top ? half : 0;
6216 
6217     for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
6218         int elt = elt_fwd ^ top_swap;
6219 
6220         read_vec_element(s, tcg_op1, rn, elt + top_half, memop);
6221         if (idx < 0) {
6222             read_vec_element(s, tcg_op2, rm, elt + top_half, memop);
6223         }
6224         if (acc) {
6225             read_vec_element(s, tcg_op0, rd, elt, memop + 1);
6226         }
6227         fn(tcg_op0, tcg_op1, tcg_op2);
6228         write_vec_element(s, tcg_op0, rd, elt, esz + 1);
6229     }
6230     clear_vec_high(s, 1, rd);
6231     return true;
6232 }
6233 
6234 static void gen_muladd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6235 {
6236     TCGv_i64 t = tcg_temp_new_i64();
6237     tcg_gen_mul_i64(t, n, m);
6238     tcg_gen_add_i64(d, d, t);
6239 }
6240 
6241 static void gen_mulsub_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6242 {
6243     TCGv_i64 t = tcg_temp_new_i64();
6244     tcg_gen_mul_i64(t, n, m);
6245     tcg_gen_sub_i64(d, d, t);
6246 }
6247 
6248 TRANS(SMULL_v, do_3op_widening,
6249       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6250       tcg_gen_mul_i64, false)
6251 TRANS(UMULL_v, do_3op_widening,
6252       a->esz, a->q, a->rd, a->rn, a->rm, -1,
6253       tcg_gen_mul_i64, false)
6254 TRANS(SMLAL_v, do_3op_widening,
6255       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6256       gen_muladd_i64, true)
6257 TRANS(UMLAL_v, do_3op_widening,
6258       a->esz, a->q, a->rd, a->rn, a->rm, -1,
6259       gen_muladd_i64, true)
6260 TRANS(SMLSL_v, do_3op_widening,
6261       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6262       gen_mulsub_i64, true)
6263 TRANS(UMLSL_v, do_3op_widening,
6264       a->esz, a->q, a->rd, a->rn, a->rm, -1,
6265       gen_mulsub_i64, true)
6266 
6267 TRANS(SMULL_vi, do_3op_widening,
6268       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6269       tcg_gen_mul_i64, false)
6270 TRANS(UMULL_vi, do_3op_widening,
6271       a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
6272       tcg_gen_mul_i64, false)
6273 TRANS(SMLAL_vi, do_3op_widening,
6274       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6275       gen_muladd_i64, true)
6276 TRANS(UMLAL_vi, do_3op_widening,
6277       a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
6278       gen_muladd_i64, true)
6279 TRANS(SMLSL_vi, do_3op_widening,
6280       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6281       gen_mulsub_i64, true)
6282 TRANS(UMLSL_vi, do_3op_widening,
6283       a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
6284       gen_mulsub_i64, true)
6285 
6286 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6287 {
6288     TCGv_i64 t1 = tcg_temp_new_i64();
6289     TCGv_i64 t2 = tcg_temp_new_i64();
6290 
6291     tcg_gen_sub_i64(t1, n, m);
6292     tcg_gen_sub_i64(t2, m, n);
6293     tcg_gen_movcond_i64(TCG_COND_GE, d, n, m, t1, t2);
6294 }
6295 
6296 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6297 {
6298     TCGv_i64 t1 = tcg_temp_new_i64();
6299     TCGv_i64 t2 = tcg_temp_new_i64();
6300 
6301     tcg_gen_sub_i64(t1, n, m);
6302     tcg_gen_sub_i64(t2, m, n);
6303     tcg_gen_movcond_i64(TCG_COND_GEU, d, n, m, t1, t2);
6304 }
6305 
6306 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6307 {
6308     TCGv_i64 t = tcg_temp_new_i64();
6309     gen_sabd_i64(t, n, m);
6310     tcg_gen_add_i64(d, d, t);
6311 }
6312 
6313 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6314 {
6315     TCGv_i64 t = tcg_temp_new_i64();
6316     gen_uabd_i64(t, n, m);
6317     tcg_gen_add_i64(d, d, t);
6318 }
6319 
6320 TRANS(SADDL_v, do_3op_widening,
6321       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6322       tcg_gen_add_i64, false)
6323 TRANS(UADDL_v, do_3op_widening,
6324       a->esz, a->q, a->rd, a->rn, a->rm, -1,
6325       tcg_gen_add_i64, false)
6326 TRANS(SSUBL_v, do_3op_widening,
6327       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6328       tcg_gen_sub_i64, false)
6329 TRANS(USUBL_v, do_3op_widening,
6330       a->esz, a->q, a->rd, a->rn, a->rm, -1,
6331       tcg_gen_sub_i64, false)
6332 TRANS(SABDL_v, do_3op_widening,
6333       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6334       gen_sabd_i64, false)
6335 TRANS(UABDL_v, do_3op_widening,
6336       a->esz, a->q, a->rd, a->rn, a->rm, -1,
6337       gen_uabd_i64, false)
6338 TRANS(SABAL_v, do_3op_widening,
6339       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6340       gen_saba_i64, true)
6341 TRANS(UABAL_v, do_3op_widening,
6342       a->esz, a->q, a->rd, a->rn, a->rm, -1,
6343       gen_uaba_i64, true)
6344 
6345 static void gen_sqdmull_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6346 {
6347     tcg_gen_mul_i64(d, n, m);
6348     gen_helper_neon_addl_saturate_s32(d, tcg_env, d, d);
6349 }
6350 
6351 static void gen_sqdmull_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6352 {
6353     tcg_gen_mul_i64(d, n, m);
6354     gen_helper_neon_addl_saturate_s64(d, tcg_env, d, d);
6355 }
6356 
6357 static void gen_sqdmlal_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6358 {
6359     TCGv_i64 t = tcg_temp_new_i64();
6360 
6361     tcg_gen_mul_i64(t, n, m);
6362     gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t);
6363     gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t);
6364 }
6365 
6366 static void gen_sqdmlal_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6367 {
6368     TCGv_i64 t = tcg_temp_new_i64();
6369 
6370     tcg_gen_mul_i64(t, n, m);
6371     gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t);
6372     gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t);
6373 }
6374 
6375 static void gen_sqdmlsl_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6376 {
6377     TCGv_i64 t = tcg_temp_new_i64();
6378 
6379     tcg_gen_mul_i64(t, n, m);
6380     gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t);
6381     tcg_gen_neg_i64(t, t);
6382     gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t);
6383 }
6384 
6385 static void gen_sqdmlsl_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6386 {
6387     TCGv_i64 t = tcg_temp_new_i64();
6388 
6389     tcg_gen_mul_i64(t, n, m);
6390     gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t);
6391     tcg_gen_neg_i64(t, t);
6392     gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t);
6393 }
6394 
6395 TRANS(SQDMULL_v, do_3op_widening,
6396       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6397       a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
6398 TRANS(SQDMLAL_v, do_3op_widening,
6399       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6400       a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
6401 TRANS(SQDMLSL_v, do_3op_widening,
6402       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6403       a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
6404 
6405 TRANS(SQDMULL_vi, do_3op_widening,
6406       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6407       a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
6408 TRANS(SQDMLAL_vi, do_3op_widening,
6409       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6410       a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
6411 TRANS(SQDMLSL_vi, do_3op_widening,
6412       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6413       a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
6414 
6415 static bool do_addsub_wide(DisasContext *s, arg_qrrr_e *a,
6416                            MemOp sign, bool sub)
6417 {
6418     TCGv_i64 tcg_op0, tcg_op1;
6419     MemOp esz = a->esz;
6420     int half = 8 >> esz;
6421     bool top = a->q;
6422     int top_swap = top ? 0 : half - 1;
6423     int top_half = top ? half : 0;
6424 
6425     /* There are no 64x64->128 bit operations. */
6426     if (esz >= MO_64) {
6427         return false;
6428     }
6429     if (!fp_access_check(s)) {
6430         return true;
6431     }
6432     tcg_op0 = tcg_temp_new_i64();
6433     tcg_op1 = tcg_temp_new_i64();
6434 
6435     for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
6436         int elt = elt_fwd ^ top_swap;
6437 
6438         read_vec_element(s, tcg_op1, a->rm, elt + top_half, esz | sign);
6439         read_vec_element(s, tcg_op0, a->rn, elt, esz + 1);
6440         if (sub) {
6441             tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1);
6442         } else {
6443             tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1);
6444         }
6445         write_vec_element(s, tcg_op0, a->rd, elt, esz + 1);
6446     }
6447     clear_vec_high(s, 1, a->rd);
6448     return true;
6449 }
6450 
6451 TRANS(SADDW, do_addsub_wide, a, MO_SIGN, false)
6452 TRANS(UADDW, do_addsub_wide, a, 0, false)
6453 TRANS(SSUBW, do_addsub_wide, a, MO_SIGN, true)
6454 TRANS(USUBW, do_addsub_wide, a, 0, true)
6455 
6456 static bool do_addsub_highnarrow(DisasContext *s, arg_qrrr_e *a,
6457                                  bool sub, bool round)
6458 {
6459     TCGv_i64 tcg_op0, tcg_op1;
6460     MemOp esz = a->esz;
6461     int half = 8 >> esz;
6462     bool top = a->q;
6463     int ebits = 8 << esz;
6464     uint64_t rbit = 1ull << (ebits - 1);
6465     int top_swap, top_half;
6466 
6467     /* There are no 128x128->64 bit operations. */
6468     if (esz >= MO_64) {
6469         return false;
6470     }
6471     if (!fp_access_check(s)) {
6472         return true;
6473     }
6474     tcg_op0 = tcg_temp_new_i64();
6475     tcg_op1 = tcg_temp_new_i64();
6476 
6477     /*
6478      * For top half inputs, iterate backward; forward for bottom half.
6479      * This means the store to the destination will not occur until
6480      * overlapping input inputs are consumed.
6481      */
6482     top_swap = top ? half - 1 : 0;
6483     top_half = top ? half : 0;
6484 
6485     for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
6486         int elt = elt_fwd ^ top_swap;
6487 
6488         read_vec_element(s, tcg_op1, a->rm, elt, esz + 1);
6489         read_vec_element(s, tcg_op0, a->rn, elt, esz + 1);
6490         if (sub) {
6491             tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1);
6492         } else {
6493             tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1);
6494         }
6495         if (round) {
6496             tcg_gen_addi_i64(tcg_op0, tcg_op0, rbit);
6497         }
6498         tcg_gen_shri_i64(tcg_op0, tcg_op0, ebits);
6499         write_vec_element(s, tcg_op0, a->rd, elt + top_half, esz);
6500     }
6501     clear_vec_high(s, top, a->rd);
6502     return true;
6503 }
6504 
6505 TRANS(ADDHN, do_addsub_highnarrow, a, false, false)
6506 TRANS(SUBHN, do_addsub_highnarrow, a, true, false)
6507 TRANS(RADDHN, do_addsub_highnarrow, a, false, true)
6508 TRANS(RSUBHN, do_addsub_highnarrow, a, true, true)
6509 
6510 static bool do_pmull(DisasContext *s, arg_qrrr_e *a, gen_helper_gvec_3 *fn)
6511 {
6512     if (fp_access_check(s)) {
6513         /* The Q field specifies lo/hi half input for these insns.  */
6514         gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->q, fn);
6515     }
6516     return true;
6517 }
6518 
6519 TRANS(PMULL_p8, do_pmull, a, gen_helper_neon_pmull_h)
6520 TRANS_FEAT(PMULL_p64, aa64_pmull, do_pmull, a, gen_helper_gvec_pmull_q)
6521 
6522 /*
6523  * Advanced SIMD scalar/vector x indexed element
6524  */
6525 
6526 static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f)
6527 {
6528     switch (a->esz) {
6529     case MO_64:
6530         if (fp_access_check(s)) {
6531             TCGv_i64 t0 = read_fp_dreg(s, a->rn);
6532             TCGv_i64 t1 = tcg_temp_new_i64();
6533 
6534             read_vec_element(s, t1, a->rm, a->idx, MO_64);
6535             f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
6536             write_fp_dreg_merging(s, a->rd, a->rn, t0);
6537         }
6538         break;
6539     case MO_32:
6540         if (fp_access_check(s)) {
6541             TCGv_i32 t0 = read_fp_sreg(s, a->rn);
6542             TCGv_i32 t1 = tcg_temp_new_i32();
6543 
6544             read_vec_element_i32(s, t1, a->rm, a->idx, MO_32);
6545             f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
6546             write_fp_sreg_merging(s, a->rd, a->rn, t0);
6547         }
6548         break;
6549     case MO_16:
6550         if (!dc_isar_feature(aa64_fp16, s)) {
6551             return false;
6552         }
6553         if (fp_access_check(s)) {
6554             TCGv_i32 t0 = read_fp_hreg(s, a->rn);
6555             TCGv_i32 t1 = tcg_temp_new_i32();
6556 
6557             read_vec_element_i32(s, t1, a->rm, a->idx, MO_16);
6558             f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
6559             write_fp_hreg_merging(s, a->rd, a->rn, t0);
6560         }
6561         break;
6562     default:
6563         g_assert_not_reached();
6564     }
6565     return true;
6566 }
6567 
6568 TRANS(FMUL_si, do_fp3_scalar_idx, a, &f_scalar_fmul)
6569 TRANS(FMULX_si, do_fp3_scalar_idx, a, &f_scalar_fmulx)
6570 
6571 static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg)
6572 {
6573     switch (a->esz) {
6574     case MO_64:
6575         if (fp_access_check(s)) {
6576             TCGv_i64 t0 = read_fp_dreg(s, a->rd);
6577             TCGv_i64 t1 = read_fp_dreg(s, a->rn);
6578             TCGv_i64 t2 = tcg_temp_new_i64();
6579 
6580             read_vec_element(s, t2, a->rm, a->idx, MO_64);
6581             if (neg) {
6582                 gen_vfp_maybe_ah_negd(s, t1, t1);
6583             }
6584             gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_A64));
6585             write_fp_dreg_merging(s, a->rd, a->rd, t0);
6586         }
6587         break;
6588     case MO_32:
6589         if (fp_access_check(s)) {
6590             TCGv_i32 t0 = read_fp_sreg(s, a->rd);
6591             TCGv_i32 t1 = read_fp_sreg(s, a->rn);
6592             TCGv_i32 t2 = tcg_temp_new_i32();
6593 
6594             read_vec_element_i32(s, t2, a->rm, a->idx, MO_32);
6595             if (neg) {
6596                 gen_vfp_maybe_ah_negs(s, t1, t1);
6597             }
6598             gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_A64));
6599             write_fp_sreg_merging(s, a->rd, a->rd, t0);
6600         }
6601         break;
6602     case MO_16:
6603         if (!dc_isar_feature(aa64_fp16, s)) {
6604             return false;
6605         }
6606         if (fp_access_check(s)) {
6607             TCGv_i32 t0 = read_fp_hreg(s, a->rd);
6608             TCGv_i32 t1 = read_fp_hreg(s, a->rn);
6609             TCGv_i32 t2 = tcg_temp_new_i32();
6610 
6611             read_vec_element_i32(s, t2, a->rm, a->idx, MO_16);
6612             if (neg) {
6613                 gen_vfp_maybe_ah_negh(s, t1, t1);
6614             }
6615             gen_helper_advsimd_muladdh(t0, t1, t2, t0,
6616                                        fpstatus_ptr(FPST_A64_F16));
6617             write_fp_hreg_merging(s, a->rd, a->rd, t0);
6618         }
6619         break;
6620     default:
6621         g_assert_not_reached();
6622     }
6623     return true;
6624 }
6625 
6626 TRANS(FMLA_si, do_fmla_scalar_idx, a, false)
6627 TRANS(FMLS_si, do_fmla_scalar_idx, a, true)
6628 
6629 static bool do_env_scalar2_idx_hs(DisasContext *s, arg_rrx_e *a,
6630                                   const ENVScalar2 *f)
6631 {
6632     if (a->esz < MO_16 || a->esz > MO_32) {
6633         return false;
6634     }
6635     if (fp_access_check(s)) {
6636         TCGv_i32 t0 = tcg_temp_new_i32();
6637         TCGv_i32 t1 = tcg_temp_new_i32();
6638 
6639         read_vec_element_i32(s, t0, a->rn, 0, a->esz);
6640         read_vec_element_i32(s, t1, a->rm, a->idx, a->esz);
6641         f->gen_bhs[a->esz](t0, tcg_env, t0, t1);
6642         write_fp_sreg(s, a->rd, t0);
6643     }
6644     return true;
6645 }
6646 
6647 TRANS(SQDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqdmulh)
6648 TRANS(SQRDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqrdmulh)
6649 
6650 static bool do_env_scalar3_idx_hs(DisasContext *s, arg_rrx_e *a,
6651                                   const ENVScalar3 *f)
6652 {
6653     if (a->esz < MO_16 || a->esz > MO_32) {
6654         return false;
6655     }
6656     if (fp_access_check(s)) {
6657         TCGv_i32 t0 = tcg_temp_new_i32();
6658         TCGv_i32 t1 = tcg_temp_new_i32();
6659         TCGv_i32 t2 = tcg_temp_new_i32();
6660 
6661         read_vec_element_i32(s, t0, a->rn, 0, a->esz);
6662         read_vec_element_i32(s, t1, a->rm, a->idx, a->esz);
6663         read_vec_element_i32(s, t2, a->rd, 0, a->esz);
6664         f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2);
6665         write_fp_sreg(s, a->rd, t0);
6666     }
6667     return true;
6668 }
6669 
6670 TRANS_FEAT(SQRDMLAH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlah)
6671 TRANS_FEAT(SQRDMLSH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlsh)
6672 
6673 static bool do_scalar_muladd_widening_idx(DisasContext *s, arg_rrx_e *a,
6674                                           NeonGenTwo64OpFn *fn, bool acc)
6675 {
6676     if (fp_access_check(s)) {
6677         TCGv_i64 t0 = tcg_temp_new_i64();
6678         TCGv_i64 t1 = tcg_temp_new_i64();
6679         TCGv_i64 t2 = tcg_temp_new_i64();
6680 
6681         if (acc) {
6682             read_vec_element(s, t0, a->rd, 0, a->esz + 1);
6683         }
6684         read_vec_element(s, t1, a->rn, 0, a->esz | MO_SIGN);
6685         read_vec_element(s, t2, a->rm, a->idx, a->esz | MO_SIGN);
6686         fn(t0, t1, t2);
6687 
6688         /* Clear the whole register first, then store scalar. */
6689         clear_vec(s, a->rd);
6690         write_vec_element(s, t0, a->rd, 0, a->esz + 1);
6691     }
6692     return true;
6693 }
6694 
6695 TRANS(SQDMULL_si, do_scalar_muladd_widening_idx, a,
6696       a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
6697 TRANS(SQDMLAL_si, do_scalar_muladd_widening_idx, a,
6698       a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
6699 TRANS(SQDMLSL_si, do_scalar_muladd_widening_idx, a,
6700       a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
6701 
6702 static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a,
6703                               gen_helper_gvec_3_ptr * const fns[3])
6704 {
6705     MemOp esz = a->esz;
6706     int check = fp_access_check_vector_hsd(s, a->q, esz);
6707 
6708     if (check <= 0) {
6709         return check == 0;
6710     }
6711 
6712     gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm,
6713                       esz == MO_16 ? FPST_A64_F16 : FPST_A64,
6714                       a->idx, fns[esz - 1]);
6715     return true;
6716 }
6717 
6718 static gen_helper_gvec_3_ptr * const f_vector_idx_fmul[3] = {
6719     gen_helper_gvec_fmul_idx_h,
6720     gen_helper_gvec_fmul_idx_s,
6721     gen_helper_gvec_fmul_idx_d,
6722 };
6723 TRANS(FMUL_vi, do_fp3_vector_idx, a, f_vector_idx_fmul)
6724 
6725 static gen_helper_gvec_3_ptr * const f_vector_idx_fmulx[3] = {
6726     gen_helper_gvec_fmulx_idx_h,
6727     gen_helper_gvec_fmulx_idx_s,
6728     gen_helper_gvec_fmulx_idx_d,
6729 };
6730 TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx)
6731 
6732 static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg)
6733 {
6734     static gen_helper_gvec_4_ptr * const fns[3][3] = {
6735         { gen_helper_gvec_fmla_idx_h,
6736           gen_helper_gvec_fmla_idx_s,
6737           gen_helper_gvec_fmla_idx_d },
6738         { gen_helper_gvec_fmls_idx_h,
6739           gen_helper_gvec_fmls_idx_s,
6740           gen_helper_gvec_fmls_idx_d },
6741         { gen_helper_gvec_ah_fmls_idx_h,
6742           gen_helper_gvec_ah_fmls_idx_s,
6743           gen_helper_gvec_ah_fmls_idx_d },
6744     };
6745     MemOp esz = a->esz;
6746     int check = fp_access_check_vector_hsd(s, a->q, esz);
6747 
6748     if (check <= 0) {
6749         return check == 0;
6750     }
6751 
6752     gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
6753                       esz == MO_16 ? FPST_A64_F16 : FPST_A64,
6754                       a->idx, fns[neg ? 1 + s->fpcr_ah : 0][esz - 1]);
6755     return true;
6756 }
6757 
6758 TRANS(FMLA_vi, do_fmla_vector_idx, a, false)
6759 TRANS(FMLS_vi, do_fmla_vector_idx, a, true)
6760 
6761 static bool do_fmlal_idx(DisasContext *s, arg_qrrx_e *a, bool is_s, bool is_2)
6762 {
6763     if (fp_access_check(s)) {
6764         int data = (a->idx << 2) | (is_2 << 1) | is_s;
6765         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
6766                            vec_full_reg_offset(s, a->rn),
6767                            vec_full_reg_offset(s, a->rm), tcg_env,
6768                            a->q ? 16 : 8, vec_full_reg_size(s),
6769                            data, gen_helper_gvec_fmlal_idx_a64);
6770     }
6771     return true;
6772 }
6773 
6774 TRANS_FEAT(FMLAL_vi, aa64_fhm, do_fmlal_idx, a, false, false)
6775 TRANS_FEAT(FMLSL_vi, aa64_fhm, do_fmlal_idx, a, true, false)
6776 TRANS_FEAT(FMLAL2_vi, aa64_fhm, do_fmlal_idx, a, false, true)
6777 TRANS_FEAT(FMLSL2_vi, aa64_fhm, do_fmlal_idx, a, true, true)
6778 
6779 static bool do_int3_vector_idx(DisasContext *s, arg_qrrx_e *a,
6780                                gen_helper_gvec_3 * const fns[2])
6781 {
6782     assert(a->esz == MO_16 || a->esz == MO_32);
6783     if (fp_access_check(s)) {
6784         gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, a->idx, fns[a->esz - 1]);
6785     }
6786     return true;
6787 }
6788 
6789 static gen_helper_gvec_3 * const f_vector_idx_mul[2] = {
6790     gen_helper_gvec_mul_idx_h,
6791     gen_helper_gvec_mul_idx_s,
6792 };
6793 TRANS(MUL_vi, do_int3_vector_idx, a, f_vector_idx_mul)
6794 
6795 static bool do_mla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool sub)
6796 {
6797     static gen_helper_gvec_4 * const fns[2][2] = {
6798         { gen_helper_gvec_mla_idx_h, gen_helper_gvec_mls_idx_h },
6799         { gen_helper_gvec_mla_idx_s, gen_helper_gvec_mls_idx_s },
6800     };
6801 
6802     assert(a->esz == MO_16 || a->esz == MO_32);
6803     if (fp_access_check(s)) {
6804         gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd,
6805                          a->idx, fns[a->esz - 1][sub]);
6806     }
6807     return true;
6808 }
6809 
6810 TRANS(MLA_vi, do_mla_vector_idx, a, false)
6811 TRANS(MLS_vi, do_mla_vector_idx, a, true)
6812 
6813 static bool do_int3_qc_vector_idx(DisasContext *s, arg_qrrx_e *a,
6814                                   gen_helper_gvec_4 * const fns[2])
6815 {
6816     assert(a->esz == MO_16 || a->esz == MO_32);
6817     if (fp_access_check(s)) {
6818         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
6819                            vec_full_reg_offset(s, a->rn),
6820                            vec_full_reg_offset(s, a->rm),
6821                            offsetof(CPUARMState, vfp.qc),
6822                            a->q ? 16 : 8, vec_full_reg_size(s),
6823                            a->idx, fns[a->esz - 1]);
6824     }
6825     return true;
6826 }
6827 
6828 static gen_helper_gvec_4 * const f_vector_idx_sqdmulh[2] = {
6829     gen_helper_neon_sqdmulh_idx_h,
6830     gen_helper_neon_sqdmulh_idx_s,
6831 };
6832 TRANS(SQDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqdmulh)
6833 
6834 static gen_helper_gvec_4 * const f_vector_idx_sqrdmulh[2] = {
6835     gen_helper_neon_sqrdmulh_idx_h,
6836     gen_helper_neon_sqrdmulh_idx_s,
6837 };
6838 TRANS(SQRDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqrdmulh)
6839 
6840 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlah[2] = {
6841     gen_helper_neon_sqrdmlah_idx_h,
6842     gen_helper_neon_sqrdmlah_idx_s,
6843 };
6844 TRANS_FEAT(SQRDMLAH_vi, aa64_rdm, do_int3_qc_vector_idx, a,
6845            f_vector_idx_sqrdmlah)
6846 
6847 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlsh[2] = {
6848     gen_helper_neon_sqrdmlsh_idx_h,
6849     gen_helper_neon_sqrdmlsh_idx_s,
6850 };
6851 TRANS_FEAT(SQRDMLSH_vi, aa64_rdm, do_int3_qc_vector_idx, a,
6852            f_vector_idx_sqrdmlsh)
6853 
6854 static bool do_dot_vector_idx(DisasContext *s, arg_qrrx_e *a,
6855                               gen_helper_gvec_4 *fn)
6856 {
6857     if (fp_access_check(s)) {
6858         gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn);
6859     }
6860     return true;
6861 }
6862 
6863 static bool do_dot_vector_idx_env(DisasContext *s, arg_qrrx_e *a,
6864                                   gen_helper_gvec_4_ptr *fn)
6865 {
6866     if (fp_access_check(s)) {
6867         gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn);
6868     }
6869     return true;
6870 }
6871 
6872 TRANS_FEAT(SDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_sdot_idx_b)
6873 TRANS_FEAT(UDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_udot_idx_b)
6874 TRANS_FEAT(SUDOT_vi, aa64_i8mm, do_dot_vector_idx, a,
6875            gen_helper_gvec_sudot_idx_b)
6876 TRANS_FEAT(USDOT_vi, aa64_i8mm, do_dot_vector_idx, a,
6877            gen_helper_gvec_usdot_idx_b)
6878 TRANS_FEAT(BFDOT_vi, aa64_bf16, do_dot_vector_idx_env, a,
6879            gen_helper_gvec_bfdot_idx)
6880 
6881 static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a)
6882 {
6883     if (!dc_isar_feature(aa64_bf16, s)) {
6884         return false;
6885     }
6886     if (fp_access_check(s)) {
6887         /* Q bit selects BFMLALB vs BFMLALT. */
6888         gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd,
6889                           s->fpcr_ah ? FPST_AH : FPST_A64,
6890                           (a->idx << 1) | a->q,
6891                           gen_helper_gvec_bfmlal_idx);
6892     }
6893     return true;
6894 }
6895 
6896 static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a)
6897 {
6898     gen_helper_gvec_4_ptr *fn;
6899 
6900     if (!dc_isar_feature(aa64_fcma, s)) {
6901         return false;
6902     }
6903     switch (a->esz) {
6904     case MO_16:
6905         if (!dc_isar_feature(aa64_fp16, s)) {
6906             return false;
6907         }
6908         fn = gen_helper_gvec_fcmlah_idx;
6909         break;
6910     case MO_32:
6911         fn = gen_helper_gvec_fcmlas_idx;
6912         break;
6913     default:
6914         g_assert_not_reached();
6915     }
6916     if (fp_access_check(s)) {
6917         gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
6918                           a->esz == MO_16 ? FPST_A64_F16 : FPST_A64,
6919                           (s->fpcr_ah << 4) | (a->idx << 2) | a->rot, fn);
6920     }
6921     return true;
6922 }
6923 
6924 /*
6925  * Advanced SIMD scalar pairwise
6926  */
6927 
6928 static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f)
6929 {
6930     switch (a->esz) {
6931     case MO_64:
6932         if (fp_access_check(s)) {
6933             TCGv_i64 t0 = tcg_temp_new_i64();
6934             TCGv_i64 t1 = tcg_temp_new_i64();
6935 
6936             read_vec_element(s, t0, a->rn, 0, MO_64);
6937             read_vec_element(s, t1, a->rn, 1, MO_64);
6938             f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
6939             write_fp_dreg(s, a->rd, t0);
6940         }
6941         break;
6942     case MO_32:
6943         if (fp_access_check(s)) {
6944             TCGv_i32 t0 = tcg_temp_new_i32();
6945             TCGv_i32 t1 = tcg_temp_new_i32();
6946 
6947             read_vec_element_i32(s, t0, a->rn, 0, MO_32);
6948             read_vec_element_i32(s, t1, a->rn, 1, MO_32);
6949             f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
6950             write_fp_sreg(s, a->rd, t0);
6951         }
6952         break;
6953     case MO_16:
6954         if (!dc_isar_feature(aa64_fp16, s)) {
6955             return false;
6956         }
6957         if (fp_access_check(s)) {
6958             TCGv_i32 t0 = tcg_temp_new_i32();
6959             TCGv_i32 t1 = tcg_temp_new_i32();
6960 
6961             read_vec_element_i32(s, t0, a->rn, 0, MO_16);
6962             read_vec_element_i32(s, t1, a->rn, 1, MO_16);
6963             f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
6964             write_fp_sreg(s, a->rd, t0);
6965         }
6966         break;
6967     default:
6968         g_assert_not_reached();
6969     }
6970     return true;
6971 }
6972 
6973 static bool do_fp3_scalar_pair_2fn(DisasContext *s, arg_rr_e *a,
6974                                    const FPScalar *fnormal,
6975                                    const FPScalar *fah)
6976 {
6977     return do_fp3_scalar_pair(s, a, s->fpcr_ah ? fah : fnormal);
6978 }
6979 
6980 TRANS(FADDP_s, do_fp3_scalar_pair, a, &f_scalar_fadd)
6981 TRANS(FMAXP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah)
6982 TRANS(FMINP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah)
6983 TRANS(FMAXNMP_s, do_fp3_scalar_pair, a, &f_scalar_fmaxnm)
6984 TRANS(FMINNMP_s, do_fp3_scalar_pair, a, &f_scalar_fminnm)
6985 
6986 static bool trans_ADDP_s(DisasContext *s, arg_rr_e *a)
6987 {
6988     if (fp_access_check(s)) {
6989         TCGv_i64 t0 = tcg_temp_new_i64();
6990         TCGv_i64 t1 = tcg_temp_new_i64();
6991 
6992         read_vec_element(s, t0, a->rn, 0, MO_64);
6993         read_vec_element(s, t1, a->rn, 1, MO_64);
6994         tcg_gen_add_i64(t0, t0, t1);
6995         write_fp_dreg(s, a->rd, t0);
6996     }
6997     return true;
6998 }
6999 
7000 /*
7001  * Floating-point conditional select
7002  */
7003 
7004 static bool trans_FCSEL(DisasContext *s, arg_FCSEL *a)
7005 {
7006     TCGv_i64 t_true, t_false;
7007     DisasCompare64 c;
7008     int check = fp_access_check_scalar_hsd(s, a->esz);
7009 
7010     if (check <= 0) {
7011         return check == 0;
7012     }
7013 
7014     /* Zero extend sreg & hreg inputs to 64 bits now.  */
7015     t_true = tcg_temp_new_i64();
7016     t_false = tcg_temp_new_i64();
7017     read_vec_element(s, t_true, a->rn, 0, a->esz);
7018     read_vec_element(s, t_false, a->rm, 0, a->esz);
7019 
7020     a64_test_cc(&c, a->cond);
7021     tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0),
7022                         t_true, t_false);
7023 
7024     /*
7025      * Note that sregs & hregs write back zeros to the high bits,
7026      * and we've already done the zero-extension.
7027      */
7028     write_fp_dreg(s, a->rd, t_true);
7029     return true;
7030 }
7031 
7032 /*
7033  * Advanced SIMD Extract
7034  */
7035 
7036 static bool trans_EXT_d(DisasContext *s, arg_EXT_d *a)
7037 {
7038     if (fp_access_check(s)) {
7039         TCGv_i64 lo = read_fp_dreg(s, a->rn);
7040         if (a->imm != 0) {
7041             TCGv_i64 hi = read_fp_dreg(s, a->rm);
7042             tcg_gen_extract2_i64(lo, lo, hi, a->imm * 8);
7043         }
7044         write_fp_dreg(s, a->rd, lo);
7045     }
7046     return true;
7047 }
7048 
7049 static bool trans_EXT_q(DisasContext *s, arg_EXT_q *a)
7050 {
7051     TCGv_i64 lo, hi;
7052     int pos = (a->imm & 7) * 8;
7053     int elt = a->imm >> 3;
7054 
7055     if (!fp_access_check(s)) {
7056         return true;
7057     }
7058 
7059     lo = tcg_temp_new_i64();
7060     hi = tcg_temp_new_i64();
7061 
7062     read_vec_element(s, lo, a->rn, elt, MO_64);
7063     elt++;
7064     read_vec_element(s, hi, elt & 2 ? a->rm : a->rn, elt & 1, MO_64);
7065     elt++;
7066 
7067     if (pos != 0) {
7068         TCGv_i64 hh = tcg_temp_new_i64();
7069         tcg_gen_extract2_i64(lo, lo, hi, pos);
7070         read_vec_element(s, hh, a->rm, elt & 1, MO_64);
7071         tcg_gen_extract2_i64(hi, hi, hh, pos);
7072     }
7073 
7074     write_vec_element(s, lo, a->rd, 0, MO_64);
7075     write_vec_element(s, hi, a->rd, 1, MO_64);
7076     clear_vec_high(s, true, a->rd);
7077     return true;
7078 }
7079 
7080 /*
7081  * Floating-point data-processing (3 source)
7082  */
7083 
7084 static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n)
7085 {
7086     TCGv_ptr fpst;
7087 
7088     /*
7089      * These are fused multiply-add.  Note that doing the negations here
7090      * as separate steps is correct: an input NaN should come out with
7091      * its sign bit flipped if it is a negated-input.
7092      */
7093     switch (a->esz) {
7094     case MO_64:
7095         if (fp_access_check(s)) {
7096             TCGv_i64 tn = read_fp_dreg(s, a->rn);
7097             TCGv_i64 tm = read_fp_dreg(s, a->rm);
7098             TCGv_i64 ta = read_fp_dreg(s, a->ra);
7099 
7100             if (neg_a) {
7101                 gen_vfp_maybe_ah_negd(s, ta, ta);
7102             }
7103             if (neg_n) {
7104                 gen_vfp_maybe_ah_negd(s, tn, tn);
7105             }
7106             fpst = fpstatus_ptr(FPST_A64);
7107             gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst);
7108             write_fp_dreg_merging(s, a->rd, a->ra, ta);
7109         }
7110         break;
7111 
7112     case MO_32:
7113         if (fp_access_check(s)) {
7114             TCGv_i32 tn = read_fp_sreg(s, a->rn);
7115             TCGv_i32 tm = read_fp_sreg(s, a->rm);
7116             TCGv_i32 ta = read_fp_sreg(s, a->ra);
7117 
7118             if (neg_a) {
7119                 gen_vfp_maybe_ah_negs(s, ta, ta);
7120             }
7121             if (neg_n) {
7122                 gen_vfp_maybe_ah_negs(s, tn, tn);
7123             }
7124             fpst = fpstatus_ptr(FPST_A64);
7125             gen_helper_vfp_muladds(ta, tn, tm, ta, fpst);
7126             write_fp_sreg_merging(s, a->rd, a->ra, ta);
7127         }
7128         break;
7129 
7130     case MO_16:
7131         if (!dc_isar_feature(aa64_fp16, s)) {
7132             return false;
7133         }
7134         if (fp_access_check(s)) {
7135             TCGv_i32 tn = read_fp_hreg(s, a->rn);
7136             TCGv_i32 tm = read_fp_hreg(s, a->rm);
7137             TCGv_i32 ta = read_fp_hreg(s, a->ra);
7138 
7139             if (neg_a) {
7140                 gen_vfp_maybe_ah_negh(s, ta, ta);
7141             }
7142             if (neg_n) {
7143                 gen_vfp_maybe_ah_negh(s, tn, tn);
7144             }
7145             fpst = fpstatus_ptr(FPST_A64_F16);
7146             gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst);
7147             write_fp_hreg_merging(s, a->rd, a->ra, ta);
7148         }
7149         break;
7150 
7151     default:
7152         return false;
7153     }
7154     return true;
7155 }
7156 
7157 TRANS(FMADD, do_fmadd, a, false, false)
7158 TRANS(FNMADD, do_fmadd, a, true, true)
7159 TRANS(FMSUB, do_fmadd, a, false, true)
7160 TRANS(FNMSUB, do_fmadd, a, true, false)
7161 
7162 /*
7163  * Advanced SIMD Across Lanes
7164  */
7165 
7166 static bool do_int_reduction(DisasContext *s, arg_qrr_e *a, bool widen,
7167                              MemOp src_sign, NeonGenTwo64OpFn *fn)
7168 {
7169     TCGv_i64 tcg_res, tcg_elt;
7170     MemOp src_mop = a->esz | src_sign;
7171     int elements = (a->q ? 16 : 8) >> a->esz;
7172 
7173     /* Reject MO_64, and MO_32 without Q: a minimum of 4 elements. */
7174     if (elements < 4) {
7175         return false;
7176     }
7177     if (!fp_access_check(s)) {
7178         return true;
7179     }
7180 
7181     tcg_res = tcg_temp_new_i64();
7182     tcg_elt = tcg_temp_new_i64();
7183 
7184     read_vec_element(s, tcg_res, a->rn, 0, src_mop);
7185     for (int i = 1; i < elements; i++) {
7186         read_vec_element(s, tcg_elt, a->rn, i, src_mop);
7187         fn(tcg_res, tcg_res, tcg_elt);
7188     }
7189 
7190     tcg_gen_ext_i64(tcg_res, tcg_res, a->esz + widen);
7191     write_fp_dreg(s, a->rd, tcg_res);
7192     return true;
7193 }
7194 
7195 TRANS(ADDV, do_int_reduction, a, false, 0, tcg_gen_add_i64)
7196 TRANS(SADDLV, do_int_reduction, a, true, MO_SIGN, tcg_gen_add_i64)
7197 TRANS(UADDLV, do_int_reduction, a, true, 0, tcg_gen_add_i64)
7198 TRANS(SMAXV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smax_i64)
7199 TRANS(UMAXV, do_int_reduction, a, false, 0, tcg_gen_umax_i64)
7200 TRANS(SMINV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smin_i64)
7201 TRANS(UMINV, do_int_reduction, a, false, 0, tcg_gen_umin_i64)
7202 
7203 /*
7204  * do_fp_reduction helper
7205  *
7206  * This mirrors the Reduce() pseudocode in the ARM ARM. It is
7207  * important for correct NaN propagation that we do these
7208  * operations in exactly the order specified by the pseudocode.
7209  *
7210  * This is a recursive function.
7211  */
7212 static TCGv_i32 do_reduction_op(DisasContext *s, int rn, MemOp esz,
7213                                 int ebase, int ecount, TCGv_ptr fpst,
7214                                 NeonGenTwoSingleOpFn *fn)
7215 {
7216     if (ecount == 1) {
7217         TCGv_i32 tcg_elem = tcg_temp_new_i32();
7218         read_vec_element_i32(s, tcg_elem, rn, ebase, esz);
7219         return tcg_elem;
7220     } else {
7221         int half = ecount >> 1;
7222         TCGv_i32 tcg_hi, tcg_lo, tcg_res;
7223 
7224         tcg_hi = do_reduction_op(s, rn, esz, ebase + half, half, fpst, fn);
7225         tcg_lo = do_reduction_op(s, rn, esz, ebase, half, fpst, fn);
7226         tcg_res = tcg_temp_new_i32();
7227 
7228         fn(tcg_res, tcg_lo, tcg_hi, fpst);
7229         return tcg_res;
7230     }
7231 }
7232 
7233 static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a,
7234                             NeonGenTwoSingleOpFn *fnormal,
7235                             NeonGenTwoSingleOpFn *fah)
7236 {
7237     if (fp_access_check(s)) {
7238         MemOp esz = a->esz;
7239         int elts = (a->q ? 16 : 8) >> esz;
7240         TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
7241         TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst,
7242                                        s->fpcr_ah ? fah : fnormal);
7243         write_fp_sreg(s, a->rd, res);
7244     }
7245     return true;
7246 }
7247 
7248 TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a,
7249            gen_helper_vfp_maxnumh, gen_helper_vfp_maxnumh)
7250 TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a,
7251            gen_helper_vfp_minnumh, gen_helper_vfp_minnumh)
7252 TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a,
7253            gen_helper_vfp_maxh, gen_helper_vfp_ah_maxh)
7254 TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a,
7255            gen_helper_vfp_minh, gen_helper_vfp_ah_minh)
7256 
7257 TRANS(FMAXNMV_s, do_fp_reduction, a,
7258       gen_helper_vfp_maxnums, gen_helper_vfp_maxnums)
7259 TRANS(FMINNMV_s, do_fp_reduction, a,
7260       gen_helper_vfp_minnums, gen_helper_vfp_minnums)
7261 TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs, gen_helper_vfp_ah_maxs)
7262 TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins, gen_helper_vfp_ah_mins)
7263 
7264 /*
7265  * Floating-point Immediate
7266  */
7267 
7268 static bool trans_FMOVI_s(DisasContext *s, arg_FMOVI_s *a)
7269 {
7270     int check = fp_access_check_scalar_hsd(s, a->esz);
7271     uint64_t imm;
7272 
7273     if (check <= 0) {
7274         return check == 0;
7275     }
7276 
7277     imm = vfp_expand_imm(a->esz, a->imm);
7278     write_fp_dreg(s, a->rd, tcg_constant_i64(imm));
7279     return true;
7280 }
7281 
7282 /*
7283  * Floating point compare, conditional compare
7284  */
7285 
7286 static void handle_fp_compare(DisasContext *s, int size,
7287                               unsigned int rn, unsigned int rm,
7288                               bool cmp_with_zero, bool signal_all_nans)
7289 {
7290     TCGv_i64 tcg_flags = tcg_temp_new_i64();
7291     TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_A64_F16 : FPST_A64);
7292 
7293     if (size == MO_64) {
7294         TCGv_i64 tcg_vn, tcg_vm;
7295 
7296         tcg_vn = read_fp_dreg(s, rn);
7297         if (cmp_with_zero) {
7298             tcg_vm = tcg_constant_i64(0);
7299         } else {
7300             tcg_vm = read_fp_dreg(s, rm);
7301         }
7302         if (signal_all_nans) {
7303             gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7304         } else {
7305             gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7306         }
7307     } else {
7308         TCGv_i32 tcg_vn = tcg_temp_new_i32();
7309         TCGv_i32 tcg_vm = tcg_temp_new_i32();
7310 
7311         read_vec_element_i32(s, tcg_vn, rn, 0, size);
7312         if (cmp_with_zero) {
7313             tcg_gen_movi_i32(tcg_vm, 0);
7314         } else {
7315             read_vec_element_i32(s, tcg_vm, rm, 0, size);
7316         }
7317 
7318         switch (size) {
7319         case MO_32:
7320             if (signal_all_nans) {
7321                 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7322             } else {
7323                 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7324             }
7325             break;
7326         case MO_16:
7327             if (signal_all_nans) {
7328                 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7329             } else {
7330                 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7331             }
7332             break;
7333         default:
7334             g_assert_not_reached();
7335         }
7336     }
7337 
7338     gen_set_nzcv(tcg_flags);
7339 }
7340 
7341 /* FCMP, FCMPE */
7342 static bool trans_FCMP(DisasContext *s, arg_FCMP *a)
7343 {
7344     int check = fp_access_check_scalar_hsd(s, a->esz);
7345 
7346     if (check <= 0) {
7347         return check == 0;
7348     }
7349 
7350     handle_fp_compare(s, a->esz, a->rn, a->rm, a->z, a->e);
7351     return true;
7352 }
7353 
7354 /* FCCMP, FCCMPE */
7355 static bool trans_FCCMP(DisasContext *s, arg_FCCMP *a)
7356 {
7357     TCGLabel *label_continue = NULL;
7358     int check = fp_access_check_scalar_hsd(s, a->esz);
7359 
7360     if (check <= 0) {
7361         return check == 0;
7362     }
7363 
7364     if (a->cond < 0x0e) { /* not always */
7365         TCGLabel *label_match = gen_new_label();
7366         label_continue = gen_new_label();
7367         arm_gen_test_cc(a->cond, label_match);
7368         /* nomatch: */
7369         gen_set_nzcv(tcg_constant_i64(a->nzcv << 28));
7370         tcg_gen_br(label_continue);
7371         gen_set_label(label_match);
7372     }
7373 
7374     handle_fp_compare(s, a->esz, a->rn, a->rm, false, a->e);
7375 
7376     if (label_continue) {
7377         gen_set_label(label_continue);
7378     }
7379     return true;
7380 }
7381 
7382 /*
7383  * Advanced SIMD Modified Immediate
7384  */
7385 
7386 static bool trans_FMOVI_v_h(DisasContext *s, arg_FMOVI_v_h *a)
7387 {
7388     if (!dc_isar_feature(aa64_fp16, s)) {
7389         return false;
7390     }
7391     if (fp_access_check(s)) {
7392         tcg_gen_gvec_dup_imm(MO_16, vec_full_reg_offset(s, a->rd),
7393                              a->q ? 16 : 8, vec_full_reg_size(s),
7394                              vfp_expand_imm(MO_16, a->abcdefgh));
7395     }
7396     return true;
7397 }
7398 
7399 static void gen_movi(unsigned vece, uint32_t dofs, uint32_t aofs,
7400                      int64_t c, uint32_t oprsz, uint32_t maxsz)
7401 {
7402     tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c);
7403 }
7404 
7405 static bool trans_Vimm(DisasContext *s, arg_Vimm *a)
7406 {
7407     GVecGen2iFn *fn;
7408 
7409     /* Handle decode of cmode/op here between ORR/BIC/MOVI */
7410     if ((a->cmode & 1) && a->cmode < 12) {
7411         /* For op=1, the imm will be inverted, so BIC becomes AND. */
7412         fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori;
7413     } else {
7414         /* There is one unallocated cmode/op combination in this space */
7415         if (a->cmode == 15 && a->op == 1 && a->q == 0) {
7416             return false;
7417         }
7418         fn = gen_movi;
7419     }
7420 
7421     if (fp_access_check(s)) {
7422         uint64_t imm = asimd_imm_const(a->abcdefgh, a->cmode, a->op);
7423         gen_gvec_fn2i(s, a->q, a->rd, a->rd, imm, fn, MO_64);
7424     }
7425     return true;
7426 }
7427 
7428 /*
7429  * Advanced SIMD Shift by Immediate
7430  */
7431 
7432 static bool do_vec_shift_imm(DisasContext *s, arg_qrri_e *a, GVecGen2iFn *fn)
7433 {
7434     if (fp_access_check(s)) {
7435         gen_gvec_fn2i(s, a->q, a->rd, a->rn, a->imm, fn, a->esz);
7436     }
7437     return true;
7438 }
7439 
7440 TRANS(SSHR_v, do_vec_shift_imm, a, gen_gvec_sshr)
7441 TRANS(USHR_v, do_vec_shift_imm, a, gen_gvec_ushr)
7442 TRANS(SSRA_v, do_vec_shift_imm, a, gen_gvec_ssra)
7443 TRANS(USRA_v, do_vec_shift_imm, a, gen_gvec_usra)
7444 TRANS(SRSHR_v, do_vec_shift_imm, a, gen_gvec_srshr)
7445 TRANS(URSHR_v, do_vec_shift_imm, a, gen_gvec_urshr)
7446 TRANS(SRSRA_v, do_vec_shift_imm, a, gen_gvec_srsra)
7447 TRANS(URSRA_v, do_vec_shift_imm, a, gen_gvec_ursra)
7448 TRANS(SRI_v, do_vec_shift_imm, a, gen_gvec_sri)
7449 TRANS(SHL_v, do_vec_shift_imm, a, tcg_gen_gvec_shli)
7450 TRANS(SLI_v, do_vec_shift_imm, a, gen_gvec_sli);
7451 TRANS(SQSHL_vi, do_vec_shift_imm, a, gen_neon_sqshli)
7452 TRANS(UQSHL_vi, do_vec_shift_imm, a, gen_neon_uqshli)
7453 TRANS(SQSHLU_vi, do_vec_shift_imm, a, gen_neon_sqshlui)
7454 
7455 static bool do_vec_shift_imm_wide(DisasContext *s, arg_qrri_e *a, bool is_u)
7456 {
7457     TCGv_i64 tcg_rn, tcg_rd;
7458     int esz = a->esz;
7459     int esize;
7460 
7461     if (!fp_access_check(s)) {
7462         return true;
7463     }
7464 
7465     /*
7466      * For the LL variants the store is larger than the load,
7467      * so if rd == rn we would overwrite parts of our input.
7468      * So load everything right now and use shifts in the main loop.
7469      */
7470     tcg_rd = tcg_temp_new_i64();
7471     tcg_rn = tcg_temp_new_i64();
7472     read_vec_element(s, tcg_rn, a->rn, a->q, MO_64);
7473 
7474     esize = 8 << esz;
7475     for (int i = 0, elements = 8 >> esz; i < elements; i++) {
7476         if (is_u) {
7477             tcg_gen_extract_i64(tcg_rd, tcg_rn, i * esize, esize);
7478         } else {
7479             tcg_gen_sextract_i64(tcg_rd, tcg_rn, i * esize, esize);
7480         }
7481         tcg_gen_shli_i64(tcg_rd, tcg_rd, a->imm);
7482         write_vec_element(s, tcg_rd, a->rd, i, esz + 1);
7483     }
7484     clear_vec_high(s, true, a->rd);
7485     return true;
7486 }
7487 
7488 TRANS(SSHLL_v, do_vec_shift_imm_wide, a, false)
7489 TRANS(USHLL_v, do_vec_shift_imm_wide, a, true)
7490 
7491 static void gen_sshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7492 {
7493     assert(shift >= 0 && shift <= 64);
7494     tcg_gen_sari_i64(dst, src, MIN(shift, 63));
7495 }
7496 
7497 static void gen_ushr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7498 {
7499     assert(shift >= 0 && shift <= 64);
7500     if (shift == 64) {
7501         tcg_gen_movi_i64(dst, 0);
7502     } else {
7503         tcg_gen_shri_i64(dst, src, shift);
7504     }
7505 }
7506 
7507 static void gen_ssra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7508 {
7509     gen_sshr_d(src, src, shift);
7510     tcg_gen_add_i64(dst, dst, src);
7511 }
7512 
7513 static void gen_usra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7514 {
7515     gen_ushr_d(src, src, shift);
7516     tcg_gen_add_i64(dst, dst, src);
7517 }
7518 
7519 static void gen_srshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7520 {
7521     assert(shift >= 0 && shift <= 32);
7522     if (shift) {
7523         TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1));
7524         tcg_gen_add_i64(dst, src, rnd);
7525         tcg_gen_sari_i64(dst, dst, shift);
7526     } else {
7527         tcg_gen_mov_i64(dst, src);
7528     }
7529 }
7530 
7531 static void gen_urshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7532 {
7533     assert(shift >= 0 && shift <= 32);
7534     if (shift) {
7535         TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1));
7536         tcg_gen_add_i64(dst, src, rnd);
7537         tcg_gen_shri_i64(dst, dst, shift);
7538     } else {
7539         tcg_gen_mov_i64(dst, src);
7540     }
7541 }
7542 
7543 static void gen_srshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7544 {
7545     assert(shift >= 0 && shift <= 64);
7546     if (shift == 0) {
7547         tcg_gen_mov_i64(dst, src);
7548     } else if (shift == 64) {
7549         /* Extension of sign bit (0,-1) plus sign bit (0,1) is zero. */
7550         tcg_gen_movi_i64(dst, 0);
7551     } else {
7552         TCGv_i64 rnd = tcg_temp_new_i64();
7553         tcg_gen_extract_i64(rnd, src, shift - 1, 1);
7554         tcg_gen_sari_i64(dst, src, shift);
7555         tcg_gen_add_i64(dst, dst, rnd);
7556     }
7557 }
7558 
7559 static void gen_urshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7560 {
7561     assert(shift >= 0 && shift <= 64);
7562     if (shift == 0) {
7563         tcg_gen_mov_i64(dst, src);
7564     } else if (shift == 64) {
7565         /* Rounding will propagate bit 63 into bit 64. */
7566         tcg_gen_shri_i64(dst, src, 63);
7567     } else {
7568         TCGv_i64 rnd = tcg_temp_new_i64();
7569         tcg_gen_extract_i64(rnd, src, shift - 1, 1);
7570         tcg_gen_shri_i64(dst, src, shift);
7571         tcg_gen_add_i64(dst, dst, rnd);
7572     }
7573 }
7574 
7575 static void gen_srsra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7576 {
7577     gen_srshr_d(src, src, shift);
7578     tcg_gen_add_i64(dst, dst, src);
7579 }
7580 
7581 static void gen_ursra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7582 {
7583     gen_urshr_d(src, src, shift);
7584     tcg_gen_add_i64(dst, dst, src);
7585 }
7586 
7587 static void gen_sri_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7588 {
7589     /* If shift is 64, dst is unchanged. */
7590     if (shift != 64) {
7591         tcg_gen_shri_i64(src, src, shift);
7592         tcg_gen_deposit_i64(dst, dst, src, 0, 64 - shift);
7593     }
7594 }
7595 
7596 static void gen_sli_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7597 {
7598     tcg_gen_deposit_i64(dst, dst, src, shift, 64 - shift);
7599 }
7600 
7601 static bool do_vec_shift_imm_narrow(DisasContext *s, arg_qrri_e *a,
7602                                     WideShiftImmFn * const fns[3], MemOp sign)
7603 {
7604     TCGv_i64 tcg_rn, tcg_rd;
7605     int esz = a->esz;
7606     int esize;
7607     WideShiftImmFn *fn;
7608 
7609     tcg_debug_assert(esz >= MO_8 && esz <= MO_32);
7610 
7611     if (!fp_access_check(s)) {
7612         return true;
7613     }
7614 
7615     tcg_rn = tcg_temp_new_i64();
7616     tcg_rd = tcg_temp_new_i64();
7617     tcg_gen_movi_i64(tcg_rd, 0);
7618 
7619     fn = fns[esz];
7620     esize = 8 << esz;
7621     for (int i = 0, elements = 8 >> esz; i < elements; i++) {
7622         read_vec_element(s, tcg_rn, a->rn, i, (esz + 1) | sign);
7623         fn(tcg_rn, tcg_rn, a->imm);
7624         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, esize * i, esize);
7625     }
7626 
7627     write_vec_element(s, tcg_rd, a->rd, a->q, MO_64);
7628     clear_vec_high(s, a->q, a->rd);
7629     return true;
7630 }
7631 
7632 static void gen_sqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7633 {
7634     tcg_gen_sari_i64(d, s, i);
7635     tcg_gen_ext16u_i64(d, d);
7636     gen_helper_neon_narrow_sat_s8(d, tcg_env, d);
7637 }
7638 
7639 static void gen_sqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7640 {
7641     tcg_gen_sari_i64(d, s, i);
7642     tcg_gen_ext32u_i64(d, d);
7643     gen_helper_neon_narrow_sat_s16(d, tcg_env, d);
7644 }
7645 
7646 static void gen_sqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7647 {
7648     gen_sshr_d(d, s, i);
7649     gen_helper_neon_narrow_sat_s32(d, tcg_env, d);
7650 }
7651 
7652 static void gen_uqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7653 {
7654     tcg_gen_shri_i64(d, s, i);
7655     gen_helper_neon_narrow_sat_u8(d, tcg_env, d);
7656 }
7657 
7658 static void gen_uqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7659 {
7660     tcg_gen_shri_i64(d, s, i);
7661     gen_helper_neon_narrow_sat_u16(d, tcg_env, d);
7662 }
7663 
7664 static void gen_uqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7665 {
7666     gen_ushr_d(d, s, i);
7667     gen_helper_neon_narrow_sat_u32(d, tcg_env, d);
7668 }
7669 
7670 static void gen_sqshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7671 {
7672     tcg_gen_sari_i64(d, s, i);
7673     tcg_gen_ext16u_i64(d, d);
7674     gen_helper_neon_unarrow_sat8(d, tcg_env, d);
7675 }
7676 
7677 static void gen_sqshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7678 {
7679     tcg_gen_sari_i64(d, s, i);
7680     tcg_gen_ext32u_i64(d, d);
7681     gen_helper_neon_unarrow_sat16(d, tcg_env, d);
7682 }
7683 
7684 static void gen_sqshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7685 {
7686     gen_sshr_d(d, s, i);
7687     gen_helper_neon_unarrow_sat32(d, tcg_env, d);
7688 }
7689 
7690 static void gen_sqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7691 {
7692     gen_srshr_bhs(d, s, i);
7693     tcg_gen_ext16u_i64(d, d);
7694     gen_helper_neon_narrow_sat_s8(d, tcg_env, d);
7695 }
7696 
7697 static void gen_sqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7698 {
7699     gen_srshr_bhs(d, s, i);
7700     tcg_gen_ext32u_i64(d, d);
7701     gen_helper_neon_narrow_sat_s16(d, tcg_env, d);
7702 }
7703 
7704 static void gen_sqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7705 {
7706     gen_srshr_d(d, s, i);
7707     gen_helper_neon_narrow_sat_s32(d, tcg_env, d);
7708 }
7709 
7710 static void gen_uqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7711 {
7712     gen_urshr_bhs(d, s, i);
7713     gen_helper_neon_narrow_sat_u8(d, tcg_env, d);
7714 }
7715 
7716 static void gen_uqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7717 {
7718     gen_urshr_bhs(d, s, i);
7719     gen_helper_neon_narrow_sat_u16(d, tcg_env, d);
7720 }
7721 
7722 static void gen_uqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7723 {
7724     gen_urshr_d(d, s, i);
7725     gen_helper_neon_narrow_sat_u32(d, tcg_env, d);
7726 }
7727 
7728 static void gen_sqrshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7729 {
7730     gen_srshr_bhs(d, s, i);
7731     tcg_gen_ext16u_i64(d, d);
7732     gen_helper_neon_unarrow_sat8(d, tcg_env, d);
7733 }
7734 
7735 static void gen_sqrshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7736 {
7737     gen_srshr_bhs(d, s, i);
7738     tcg_gen_ext32u_i64(d, d);
7739     gen_helper_neon_unarrow_sat16(d, tcg_env, d);
7740 }
7741 
7742 static void gen_sqrshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7743 {
7744     gen_srshr_d(d, s, i);
7745     gen_helper_neon_unarrow_sat32(d, tcg_env, d);
7746 }
7747 
7748 static WideShiftImmFn * const shrn_fns[] = {
7749     tcg_gen_shri_i64,
7750     tcg_gen_shri_i64,
7751     gen_ushr_d,
7752 };
7753 TRANS(SHRN_v, do_vec_shift_imm_narrow, a, shrn_fns, 0)
7754 
7755 static WideShiftImmFn * const rshrn_fns[] = {
7756     gen_urshr_bhs,
7757     gen_urshr_bhs,
7758     gen_urshr_d,
7759 };
7760 TRANS(RSHRN_v, do_vec_shift_imm_narrow, a, rshrn_fns, 0)
7761 
7762 static WideShiftImmFn * const sqshrn_fns[] = {
7763     gen_sqshrn_b,
7764     gen_sqshrn_h,
7765     gen_sqshrn_s,
7766 };
7767 TRANS(SQSHRN_v, do_vec_shift_imm_narrow, a, sqshrn_fns, MO_SIGN)
7768 
7769 static WideShiftImmFn * const uqshrn_fns[] = {
7770     gen_uqshrn_b,
7771     gen_uqshrn_h,
7772     gen_uqshrn_s,
7773 };
7774 TRANS(UQSHRN_v, do_vec_shift_imm_narrow, a, uqshrn_fns, 0)
7775 
7776 static WideShiftImmFn * const sqshrun_fns[] = {
7777     gen_sqshrun_b,
7778     gen_sqshrun_h,
7779     gen_sqshrun_s,
7780 };
7781 TRANS(SQSHRUN_v, do_vec_shift_imm_narrow, a, sqshrun_fns, MO_SIGN)
7782 
7783 static WideShiftImmFn * const sqrshrn_fns[] = {
7784     gen_sqrshrn_b,
7785     gen_sqrshrn_h,
7786     gen_sqrshrn_s,
7787 };
7788 TRANS(SQRSHRN_v, do_vec_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN)
7789 
7790 static WideShiftImmFn * const uqrshrn_fns[] = {
7791     gen_uqrshrn_b,
7792     gen_uqrshrn_h,
7793     gen_uqrshrn_s,
7794 };
7795 TRANS(UQRSHRN_v, do_vec_shift_imm_narrow, a, uqrshrn_fns, 0)
7796 
7797 static WideShiftImmFn * const sqrshrun_fns[] = {
7798     gen_sqrshrun_b,
7799     gen_sqrshrun_h,
7800     gen_sqrshrun_s,
7801 };
7802 TRANS(SQRSHRUN_v, do_vec_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN)
7803 
7804 /*
7805  * Advanced SIMD Scalar Shift by Immediate
7806  */
7807 
7808 static bool do_scalar_shift_imm(DisasContext *s, arg_rri_e *a,
7809                                 WideShiftImmFn *fn, bool accumulate,
7810                                 MemOp sign)
7811 {
7812     if (fp_access_check(s)) {
7813         TCGv_i64 rd = tcg_temp_new_i64();
7814         TCGv_i64 rn = tcg_temp_new_i64();
7815 
7816         read_vec_element(s, rn, a->rn, 0, a->esz | sign);
7817         if (accumulate) {
7818             read_vec_element(s, rd, a->rd, 0, a->esz | sign);
7819         }
7820         fn(rd, rn, a->imm);
7821         write_fp_dreg(s, a->rd, rd);
7822     }
7823     return true;
7824 }
7825 
7826 TRANS(SSHR_s, do_scalar_shift_imm, a, gen_sshr_d, false, 0)
7827 TRANS(USHR_s, do_scalar_shift_imm, a, gen_ushr_d, false, 0)
7828 TRANS(SSRA_s, do_scalar_shift_imm, a, gen_ssra_d, true, 0)
7829 TRANS(USRA_s, do_scalar_shift_imm, a, gen_usra_d, true, 0)
7830 TRANS(SRSHR_s, do_scalar_shift_imm, a, gen_srshr_d, false, 0)
7831 TRANS(URSHR_s, do_scalar_shift_imm, a, gen_urshr_d, false, 0)
7832 TRANS(SRSRA_s, do_scalar_shift_imm, a, gen_srsra_d, true, 0)
7833 TRANS(URSRA_s, do_scalar_shift_imm, a, gen_ursra_d, true, 0)
7834 TRANS(SRI_s, do_scalar_shift_imm, a, gen_sri_d, true, 0)
7835 
7836 TRANS(SHL_s, do_scalar_shift_imm, a, tcg_gen_shli_i64, false, 0)
7837 TRANS(SLI_s, do_scalar_shift_imm, a, gen_sli_d, true, 0)
7838 
7839 static void trunc_i64_env_imm(TCGv_i64 d, TCGv_i64 s, int64_t i,
7840                               NeonGenTwoOpEnvFn *fn)
7841 {
7842     TCGv_i32 t = tcg_temp_new_i32();
7843     tcg_gen_extrl_i64_i32(t, s);
7844     fn(t, tcg_env, t, tcg_constant_i32(i));
7845     tcg_gen_extu_i32_i64(d, t);
7846 }
7847 
7848 static void gen_sqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7849 {
7850     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s8);
7851 }
7852 
7853 static void gen_sqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7854 {
7855     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s16);
7856 }
7857 
7858 static void gen_sqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7859 {
7860     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s32);
7861 }
7862 
7863 static void gen_sqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
7864 {
7865     gen_helper_neon_qshl_s64(d, tcg_env, s, tcg_constant_i64(i));
7866 }
7867 
7868 static void gen_uqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7869 {
7870     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u8);
7871 }
7872 
7873 static void gen_uqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7874 {
7875     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u16);
7876 }
7877 
7878 static void gen_uqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7879 {
7880     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u32);
7881 }
7882 
7883 static void gen_uqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
7884 {
7885     gen_helper_neon_qshl_u64(d, tcg_env, s, tcg_constant_i64(i));
7886 }
7887 
7888 static void gen_sqshlui_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7889 {
7890     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s8);
7891 }
7892 
7893 static void gen_sqshlui_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7894 {
7895     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s16);
7896 }
7897 
7898 static void gen_sqshlui_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7899 {
7900     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s32);
7901 }
7902 
7903 static void gen_sqshlui_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
7904 {
7905     gen_helper_neon_qshlu_s64(d, tcg_env, s, tcg_constant_i64(i));
7906 }
7907 
7908 static WideShiftImmFn * const f_scalar_sqshli[] = {
7909     gen_sqshli_b, gen_sqshli_h, gen_sqshli_s, gen_sqshli_d
7910 };
7911 
7912 static WideShiftImmFn * const f_scalar_uqshli[] = {
7913     gen_uqshli_b, gen_uqshli_h, gen_uqshli_s, gen_uqshli_d
7914 };
7915 
7916 static WideShiftImmFn * const f_scalar_sqshlui[] = {
7917     gen_sqshlui_b, gen_sqshlui_h, gen_sqshlui_s, gen_sqshlui_d
7918 };
7919 
7920 /* Note that the helpers sign-extend their inputs, so don't do it here. */
7921 TRANS(SQSHL_si, do_scalar_shift_imm, a, f_scalar_sqshli[a->esz], false, 0)
7922 TRANS(UQSHL_si, do_scalar_shift_imm, a, f_scalar_uqshli[a->esz], false, 0)
7923 TRANS(SQSHLU_si, do_scalar_shift_imm, a, f_scalar_sqshlui[a->esz], false, 0)
7924 
7925 static bool do_scalar_shift_imm_narrow(DisasContext *s, arg_rri_e *a,
7926                                        WideShiftImmFn * const fns[3],
7927                                        MemOp sign, bool zext)
7928 {
7929     MemOp esz = a->esz;
7930 
7931     tcg_debug_assert(esz >= MO_8 && esz <= MO_32);
7932 
7933     if (fp_access_check(s)) {
7934         TCGv_i64 rd = tcg_temp_new_i64();
7935         TCGv_i64 rn = tcg_temp_new_i64();
7936 
7937         read_vec_element(s, rn, a->rn, 0, (esz + 1) | sign);
7938         fns[esz](rd, rn, a->imm);
7939         if (zext) {
7940             tcg_gen_ext_i64(rd, rd, esz);
7941         }
7942         write_fp_dreg(s, a->rd, rd);
7943     }
7944     return true;
7945 }
7946 
7947 TRANS(SQSHRN_si, do_scalar_shift_imm_narrow, a, sqshrn_fns, MO_SIGN, true)
7948 TRANS(SQRSHRN_si, do_scalar_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN, true)
7949 TRANS(UQSHRN_si, do_scalar_shift_imm_narrow, a, uqshrn_fns, 0, false)
7950 TRANS(UQRSHRN_si, do_scalar_shift_imm_narrow, a, uqrshrn_fns, 0, false)
7951 TRANS(SQSHRUN_si, do_scalar_shift_imm_narrow, a, sqshrun_fns, MO_SIGN, false)
7952 TRANS(SQRSHRUN_si, do_scalar_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN, false)
7953 
7954 static bool do_div(DisasContext *s, arg_rrr_sf *a, bool is_signed)
7955 {
7956     TCGv_i64 tcg_n, tcg_m, tcg_rd;
7957     tcg_rd = cpu_reg(s, a->rd);
7958 
7959     if (!a->sf && is_signed) {
7960         tcg_n = tcg_temp_new_i64();
7961         tcg_m = tcg_temp_new_i64();
7962         tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, a->rn));
7963         tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, a->rm));
7964     } else {
7965         tcg_n = read_cpu_reg(s, a->rn, a->sf);
7966         tcg_m = read_cpu_reg(s, a->rm, a->sf);
7967     }
7968 
7969     if (is_signed) {
7970         gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
7971     } else {
7972         gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
7973     }
7974 
7975     if (!a->sf) { /* zero extend final result */
7976         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
7977     }
7978     return true;
7979 }
7980 
7981 TRANS(SDIV, do_div, a, true)
7982 TRANS(UDIV, do_div, a, false)
7983 
7984 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
7985  * Note that it is the caller's responsibility to ensure that the
7986  * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
7987  * mandated semantics for out of range shifts.
7988  */
7989 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
7990                       enum a64_shift_type shift_type, TCGv_i64 shift_amount)
7991 {
7992     switch (shift_type) {
7993     case A64_SHIFT_TYPE_LSL:
7994         tcg_gen_shl_i64(dst, src, shift_amount);
7995         break;
7996     case A64_SHIFT_TYPE_LSR:
7997         tcg_gen_shr_i64(dst, src, shift_amount);
7998         break;
7999     case A64_SHIFT_TYPE_ASR:
8000         if (!sf) {
8001             tcg_gen_ext32s_i64(dst, src);
8002         }
8003         tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
8004         break;
8005     case A64_SHIFT_TYPE_ROR:
8006         if (sf) {
8007             tcg_gen_rotr_i64(dst, src, shift_amount);
8008         } else {
8009             TCGv_i32 t0, t1;
8010             t0 = tcg_temp_new_i32();
8011             t1 = tcg_temp_new_i32();
8012             tcg_gen_extrl_i64_i32(t0, src);
8013             tcg_gen_extrl_i64_i32(t1, shift_amount);
8014             tcg_gen_rotr_i32(t0, t0, t1);
8015             tcg_gen_extu_i32_i64(dst, t0);
8016         }
8017         break;
8018     default:
8019         assert(FALSE); /* all shift types should be handled */
8020         break;
8021     }
8022 
8023     if (!sf) { /* zero extend final result */
8024         tcg_gen_ext32u_i64(dst, dst);
8025     }
8026 }
8027 
8028 /* Shift a TCGv src by immediate, put result in dst.
8029  * The shift amount must be in range (this should always be true as the
8030  * relevant instructions will UNDEF on bad shift immediates).
8031  */
8032 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
8033                           enum a64_shift_type shift_type, unsigned int shift_i)
8034 {
8035     assert(shift_i < (sf ? 64 : 32));
8036 
8037     if (shift_i == 0) {
8038         tcg_gen_mov_i64(dst, src);
8039     } else {
8040         shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i));
8041     }
8042 }
8043 
8044 static bool do_shift_reg(DisasContext *s, arg_rrr_sf *a,
8045                          enum a64_shift_type shift_type)
8046 {
8047     TCGv_i64 tcg_shift = tcg_temp_new_i64();
8048     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8049     TCGv_i64 tcg_rn = read_cpu_reg(s, a->rn, a->sf);
8050 
8051     tcg_gen_andi_i64(tcg_shift, cpu_reg(s, a->rm), a->sf ? 63 : 31);
8052     shift_reg(tcg_rd, tcg_rn, a->sf, shift_type, tcg_shift);
8053     return true;
8054 }
8055 
8056 TRANS(LSLV, do_shift_reg, a, A64_SHIFT_TYPE_LSL)
8057 TRANS(LSRV, do_shift_reg, a, A64_SHIFT_TYPE_LSR)
8058 TRANS(ASRV, do_shift_reg, a, A64_SHIFT_TYPE_ASR)
8059 TRANS(RORV, do_shift_reg, a, A64_SHIFT_TYPE_ROR)
8060 
8061 static bool do_crc32(DisasContext *s, arg_rrr_e *a, bool crc32c)
8062 {
8063     TCGv_i64 tcg_acc, tcg_val, tcg_rd;
8064     TCGv_i32 tcg_bytes;
8065 
8066     switch (a->esz) {
8067     case MO_8:
8068     case MO_16:
8069     case MO_32:
8070         tcg_val = tcg_temp_new_i64();
8071         tcg_gen_extract_i64(tcg_val, cpu_reg(s, a->rm), 0, 8 << a->esz);
8072         break;
8073     case MO_64:
8074         tcg_val = cpu_reg(s, a->rm);
8075         break;
8076     default:
8077         g_assert_not_reached();
8078     }
8079     tcg_acc = cpu_reg(s, a->rn);
8080     tcg_bytes = tcg_constant_i32(1 << a->esz);
8081     tcg_rd = cpu_reg(s, a->rd);
8082 
8083     if (crc32c) {
8084         gen_helper_crc32c_64(tcg_rd, tcg_acc, tcg_val, tcg_bytes);
8085     } else {
8086         gen_helper_crc32_64(tcg_rd, tcg_acc, tcg_val, tcg_bytes);
8087     }
8088     return true;
8089 }
8090 
8091 TRANS_FEAT(CRC32, aa64_crc32, do_crc32, a, false)
8092 TRANS_FEAT(CRC32C, aa64_crc32, do_crc32, a, true)
8093 
8094 static bool do_subp(DisasContext *s, arg_rrr *a, bool setflag)
8095 {
8096     TCGv_i64 tcg_n = read_cpu_reg_sp(s, a->rn, true);
8097     TCGv_i64 tcg_m = read_cpu_reg_sp(s, a->rm, true);
8098     TCGv_i64 tcg_d = cpu_reg(s, a->rd);
8099 
8100     tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56);
8101     tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56);
8102 
8103     if (setflag) {
8104         gen_sub_CC(true, tcg_d, tcg_n, tcg_m);
8105     } else {
8106         tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m);
8107     }
8108     return true;
8109 }
8110 
8111 TRANS_FEAT(SUBP, aa64_mte_insn_reg, do_subp, a, false)
8112 TRANS_FEAT(SUBPS, aa64_mte_insn_reg, do_subp, a, true)
8113 
8114 static bool trans_IRG(DisasContext *s, arg_rrr *a)
8115 {
8116     if (dc_isar_feature(aa64_mte_insn_reg, s)) {
8117         TCGv_i64 tcg_rd = cpu_reg_sp(s, a->rd);
8118         TCGv_i64 tcg_rn = cpu_reg_sp(s, a->rn);
8119 
8120         if (s->ata[0]) {
8121             gen_helper_irg(tcg_rd, tcg_env, tcg_rn, cpu_reg(s, a->rm));
8122         } else {
8123             gen_address_with_allocation_tag0(tcg_rd, tcg_rn);
8124         }
8125         return true;
8126     }
8127     return false;
8128 }
8129 
8130 static bool trans_GMI(DisasContext *s, arg_rrr *a)
8131 {
8132     if (dc_isar_feature(aa64_mte_insn_reg, s)) {
8133         TCGv_i64 t = tcg_temp_new_i64();
8134 
8135         tcg_gen_extract_i64(t, cpu_reg_sp(s, a->rn), 56, 4);
8136         tcg_gen_shl_i64(t, tcg_constant_i64(1), t);
8137         tcg_gen_or_i64(cpu_reg(s, a->rd), cpu_reg(s, a->rm), t);
8138         return true;
8139     }
8140     return false;
8141 }
8142 
8143 static bool trans_PACGA(DisasContext *s, arg_rrr *a)
8144 {
8145     if (dc_isar_feature(aa64_pauth, s)) {
8146         gen_helper_pacga(cpu_reg(s, a->rd), tcg_env,
8147                          cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm));
8148         return true;
8149     }
8150     return false;
8151 }
8152 
8153 typedef void ArithOneOp(TCGv_i64, TCGv_i64);
8154 
8155 static bool gen_rr(DisasContext *s, int rd, int rn, ArithOneOp fn)
8156 {
8157     fn(cpu_reg(s, rd), cpu_reg(s, rn));
8158     return true;
8159 }
8160 
8161 static void gen_rbit32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8162 {
8163     TCGv_i32 t32 = tcg_temp_new_i32();
8164 
8165     tcg_gen_extrl_i64_i32(t32, tcg_rn);
8166     gen_helper_rbit(t32, t32);
8167     tcg_gen_extu_i32_i64(tcg_rd, t32);
8168 }
8169 
8170 static void gen_rev16_xx(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 mask)
8171 {
8172     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
8173 
8174     tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
8175     tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
8176     tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
8177     tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
8178     tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
8179 }
8180 
8181 static void gen_rev16_32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8182 {
8183     gen_rev16_xx(tcg_rd, tcg_rn, tcg_constant_i64(0x00ff00ff));
8184 }
8185 
8186 static void gen_rev16_64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8187 {
8188     gen_rev16_xx(tcg_rd, tcg_rn, tcg_constant_i64(0x00ff00ff00ff00ffull));
8189 }
8190 
8191 static void gen_rev_32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8192 {
8193     tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ);
8194 }
8195 
8196 static void gen_rev32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8197 {
8198     tcg_gen_bswap64_i64(tcg_rd, tcg_rn);
8199     tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32);
8200 }
8201 
8202 TRANS(RBIT, gen_rr, a->rd, a->rn, a->sf ? gen_helper_rbit64 : gen_rbit32)
8203 TRANS(REV16, gen_rr, a->rd, a->rn, a->sf ? gen_rev16_64 : gen_rev16_32)
8204 TRANS(REV32, gen_rr, a->rd, a->rn, a->sf ? gen_rev32 : gen_rev_32)
8205 TRANS(REV64, gen_rr, a->rd, a->rn, tcg_gen_bswap64_i64)
8206 
8207 static void gen_clz32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8208 {
8209     TCGv_i32 t32 = tcg_temp_new_i32();
8210 
8211     tcg_gen_extrl_i64_i32(t32, tcg_rn);
8212     tcg_gen_clzi_i32(t32, t32, 32);
8213     tcg_gen_extu_i32_i64(tcg_rd, t32);
8214 }
8215 
8216 static void gen_clz64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8217 {
8218     tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
8219 }
8220 
8221 static void gen_cls32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8222 {
8223     TCGv_i32 t32 = tcg_temp_new_i32();
8224 
8225     tcg_gen_extrl_i64_i32(t32, tcg_rn);
8226     tcg_gen_clrsb_i32(t32, t32);
8227     tcg_gen_extu_i32_i64(tcg_rd, t32);
8228 }
8229 
8230 TRANS(CLZ, gen_rr, a->rd, a->rn, a->sf ? gen_clz64 : gen_clz32)
8231 TRANS(CLS, gen_rr, a->rd, a->rn, a->sf ? tcg_gen_clrsb_i64 : gen_cls32)
8232 
8233 static bool gen_pacaut(DisasContext *s, arg_pacaut *a, NeonGenTwo64OpEnvFn fn)
8234 {
8235     TCGv_i64 tcg_rd, tcg_rn;
8236 
8237     if (a->z) {
8238         if (a->rn != 31) {
8239             return false;
8240         }
8241         tcg_rn = tcg_constant_i64(0);
8242     } else {
8243         tcg_rn = cpu_reg_sp(s, a->rn);
8244     }
8245     if (s->pauth_active) {
8246         tcg_rd = cpu_reg(s, a->rd);
8247         fn(tcg_rd, tcg_env, tcg_rd, tcg_rn);
8248     }
8249     return true;
8250 }
8251 
8252 TRANS_FEAT(PACIA, aa64_pauth, gen_pacaut, a, gen_helper_pacia)
8253 TRANS_FEAT(PACIB, aa64_pauth, gen_pacaut, a, gen_helper_pacib)
8254 TRANS_FEAT(PACDA, aa64_pauth, gen_pacaut, a, gen_helper_pacda)
8255 TRANS_FEAT(PACDB, aa64_pauth, gen_pacaut, a, gen_helper_pacdb)
8256 
8257 TRANS_FEAT(AUTIA, aa64_pauth, gen_pacaut, a, gen_helper_autia)
8258 TRANS_FEAT(AUTIB, aa64_pauth, gen_pacaut, a, gen_helper_autib)
8259 TRANS_FEAT(AUTDA, aa64_pauth, gen_pacaut, a, gen_helper_autda)
8260 TRANS_FEAT(AUTDB, aa64_pauth, gen_pacaut, a, gen_helper_autdb)
8261 
8262 static bool do_xpac(DisasContext *s, int rd, NeonGenOne64OpEnvFn *fn)
8263 {
8264     if (s->pauth_active) {
8265         TCGv_i64 tcg_rd = cpu_reg(s, rd);
8266         fn(tcg_rd, tcg_env, tcg_rd);
8267     }
8268     return true;
8269 }
8270 
8271 TRANS_FEAT(XPACI, aa64_pauth, do_xpac, a->rd, gen_helper_xpaci)
8272 TRANS_FEAT(XPACD, aa64_pauth, do_xpac, a->rd, gen_helper_xpacd)
8273 
8274 static bool do_logic_reg(DisasContext *s, arg_logic_shift *a,
8275                          ArithTwoOp *fn, ArithTwoOp *inv_fn, bool setflags)
8276 {
8277     TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
8278 
8279     if (!a->sf && (a->sa & (1 << 5))) {
8280         return false;
8281     }
8282 
8283     tcg_rd = cpu_reg(s, a->rd);
8284     tcg_rn = cpu_reg(s, a->rn);
8285 
8286     tcg_rm = read_cpu_reg(s, a->rm, a->sf);
8287     if (a->sa) {
8288         shift_reg_imm(tcg_rm, tcg_rm, a->sf, a->st, a->sa);
8289     }
8290 
8291     (a->n ? inv_fn : fn)(tcg_rd, tcg_rn, tcg_rm);
8292     if (!a->sf) {
8293         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
8294     }
8295     if (setflags) {
8296         gen_logic_CC(a->sf, tcg_rd);
8297     }
8298     return true;
8299 }
8300 
8301 static bool trans_ORR_r(DisasContext *s, arg_logic_shift *a)
8302 {
8303     /*
8304      * Unshifted ORR and ORN with WZR/XZR is the standard encoding for
8305      * register-register MOV and MVN, so it is worth special casing.
8306      */
8307     if (a->sa == 0 && a->st == 0 && a->rn == 31) {
8308         TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8309         TCGv_i64 tcg_rm = cpu_reg(s, a->rm);
8310 
8311         if (a->n) {
8312             tcg_gen_not_i64(tcg_rd, tcg_rm);
8313             if (!a->sf) {
8314                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
8315             }
8316         } else {
8317             if (a->sf) {
8318                 tcg_gen_mov_i64(tcg_rd, tcg_rm);
8319             } else {
8320                 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
8321             }
8322         }
8323         return true;
8324     }
8325 
8326     return do_logic_reg(s, a, tcg_gen_or_i64, tcg_gen_orc_i64, false);
8327 }
8328 
8329 TRANS(AND_r, do_logic_reg, a, tcg_gen_and_i64, tcg_gen_andc_i64, false)
8330 TRANS(ANDS_r, do_logic_reg, a, tcg_gen_and_i64, tcg_gen_andc_i64, true)
8331 TRANS(EOR_r, do_logic_reg, a, tcg_gen_xor_i64, tcg_gen_eqv_i64, false)
8332 
8333 static bool do_addsub_ext(DisasContext *s, arg_addsub_ext *a,
8334                           bool sub_op, bool setflags)
8335 {
8336     TCGv_i64 tcg_rm, tcg_rn, tcg_rd, tcg_result;
8337 
8338     if (a->sa > 4) {
8339         return false;
8340     }
8341 
8342     /* non-flag setting ops may use SP */
8343     if (!setflags) {
8344         tcg_rd = cpu_reg_sp(s, a->rd);
8345     } else {
8346         tcg_rd = cpu_reg(s, a->rd);
8347     }
8348     tcg_rn = read_cpu_reg_sp(s, a->rn, a->sf);
8349 
8350     tcg_rm = read_cpu_reg(s, a->rm, a->sf);
8351     ext_and_shift_reg(tcg_rm, tcg_rm, a->st, a->sa);
8352 
8353     tcg_result = tcg_temp_new_i64();
8354     if (!setflags) {
8355         if (sub_op) {
8356             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
8357         } else {
8358             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
8359         }
8360     } else {
8361         if (sub_op) {
8362             gen_sub_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
8363         } else {
8364             gen_add_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
8365         }
8366     }
8367 
8368     if (a->sf) {
8369         tcg_gen_mov_i64(tcg_rd, tcg_result);
8370     } else {
8371         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
8372     }
8373     return true;
8374 }
8375 
8376 TRANS(ADD_ext, do_addsub_ext, a, false, false)
8377 TRANS(SUB_ext, do_addsub_ext, a, true, false)
8378 TRANS(ADDS_ext, do_addsub_ext, a, false, true)
8379 TRANS(SUBS_ext, do_addsub_ext, a, true, true)
8380 
8381 static bool do_addsub_reg(DisasContext *s, arg_addsub_shift *a,
8382                           bool sub_op, bool setflags)
8383 {
8384     TCGv_i64 tcg_rd, tcg_rn, tcg_rm, tcg_result;
8385 
8386     if (a->st == 3 || (!a->sf && (a->sa & 32))) {
8387         return false;
8388     }
8389 
8390     tcg_rd = cpu_reg(s, a->rd);
8391     tcg_rn = read_cpu_reg(s, a->rn, a->sf);
8392     tcg_rm = read_cpu_reg(s, a->rm, a->sf);
8393 
8394     shift_reg_imm(tcg_rm, tcg_rm, a->sf, a->st, a->sa);
8395 
8396     tcg_result = tcg_temp_new_i64();
8397     if (!setflags) {
8398         if (sub_op) {
8399             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
8400         } else {
8401             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
8402         }
8403     } else {
8404         if (sub_op) {
8405             gen_sub_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
8406         } else {
8407             gen_add_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
8408         }
8409     }
8410 
8411     if (a->sf) {
8412         tcg_gen_mov_i64(tcg_rd, tcg_result);
8413     } else {
8414         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
8415     }
8416     return true;
8417 }
8418 
8419 TRANS(ADD_r, do_addsub_reg, a, false, false)
8420 TRANS(SUB_r, do_addsub_reg, a, true, false)
8421 TRANS(ADDS_r, do_addsub_reg, a, false, true)
8422 TRANS(SUBS_r, do_addsub_reg, a, true, true)
8423 
8424 static bool do_mulh(DisasContext *s, arg_rrr *a,
8425                     void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
8426 {
8427     TCGv_i64 discard = tcg_temp_new_i64();
8428     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8429     TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
8430     TCGv_i64 tcg_rm = cpu_reg(s, a->rm);
8431 
8432     fn(discard, tcg_rd, tcg_rn, tcg_rm);
8433     return true;
8434 }
8435 
8436 TRANS(SMULH, do_mulh, a, tcg_gen_muls2_i64)
8437 TRANS(UMULH, do_mulh, a, tcg_gen_mulu2_i64)
8438 
8439 static bool do_muladd(DisasContext *s, arg_rrrr *a,
8440                       bool sf, bool is_sub, MemOp mop)
8441 {
8442     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8443     TCGv_i64 tcg_op1, tcg_op2;
8444 
8445     if (mop == MO_64) {
8446         tcg_op1 = cpu_reg(s, a->rn);
8447         tcg_op2 = cpu_reg(s, a->rm);
8448     } else {
8449         tcg_op1 = tcg_temp_new_i64();
8450         tcg_op2 = tcg_temp_new_i64();
8451         tcg_gen_ext_i64(tcg_op1, cpu_reg(s, a->rn), mop);
8452         tcg_gen_ext_i64(tcg_op2, cpu_reg(s, a->rm), mop);
8453     }
8454 
8455     if (a->ra == 31 && !is_sub) {
8456         /* Special-case MADD with rA == XZR; it is the standard MUL alias */
8457         tcg_gen_mul_i64(tcg_rd, tcg_op1, tcg_op2);
8458     } else {
8459         TCGv_i64 tcg_tmp = tcg_temp_new_i64();
8460         TCGv_i64 tcg_ra = cpu_reg(s, a->ra);
8461 
8462         tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
8463         if (is_sub) {
8464             tcg_gen_sub_i64(tcg_rd, tcg_ra, tcg_tmp);
8465         } else {
8466             tcg_gen_add_i64(tcg_rd, tcg_ra, tcg_tmp);
8467         }
8468     }
8469 
8470     if (!sf) {
8471         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
8472     }
8473     return true;
8474 }
8475 
8476 TRANS(MADD_w, do_muladd, a, false, false, MO_64)
8477 TRANS(MSUB_w, do_muladd, a, false, true, MO_64)
8478 TRANS(MADD_x, do_muladd, a, true, false, MO_64)
8479 TRANS(MSUB_x, do_muladd, a, true, true, MO_64)
8480 
8481 TRANS(SMADDL, do_muladd, a, true, false, MO_SL)
8482 TRANS(SMSUBL, do_muladd, a, true, true, MO_SL)
8483 TRANS(UMADDL, do_muladd, a, true, false, MO_UL)
8484 TRANS(UMSUBL, do_muladd, a, true, true, MO_UL)
8485 
8486 static bool do_adc_sbc(DisasContext *s, arg_rrr_sf *a,
8487                        bool is_sub, bool setflags)
8488 {
8489     TCGv_i64 tcg_y, tcg_rn, tcg_rd;
8490 
8491     tcg_rd = cpu_reg(s, a->rd);
8492     tcg_rn = cpu_reg(s, a->rn);
8493 
8494     if (is_sub) {
8495         tcg_y = tcg_temp_new_i64();
8496         tcg_gen_not_i64(tcg_y, cpu_reg(s, a->rm));
8497     } else {
8498         tcg_y = cpu_reg(s, a->rm);
8499     }
8500 
8501     if (setflags) {
8502         gen_adc_CC(a->sf, tcg_rd, tcg_rn, tcg_y);
8503     } else {
8504         gen_adc(a->sf, tcg_rd, tcg_rn, tcg_y);
8505     }
8506     return true;
8507 }
8508 
8509 TRANS(ADC, do_adc_sbc, a, false, false)
8510 TRANS(SBC, do_adc_sbc, a, true, false)
8511 TRANS(ADCS, do_adc_sbc, a, false, true)
8512 TRANS(SBCS, do_adc_sbc, a, true, true)
8513 
8514 static bool trans_RMIF(DisasContext *s, arg_RMIF *a)
8515 {
8516     int mask = a->mask;
8517     TCGv_i64 tcg_rn;
8518     TCGv_i32 nzcv;
8519 
8520     if (!dc_isar_feature(aa64_condm_4, s)) {
8521         return false;
8522     }
8523 
8524     tcg_rn = read_cpu_reg(s, a->rn, 1);
8525     tcg_gen_rotri_i64(tcg_rn, tcg_rn, a->imm);
8526 
8527     nzcv = tcg_temp_new_i32();
8528     tcg_gen_extrl_i64_i32(nzcv, tcg_rn);
8529 
8530     if (mask & 8) { /* N */
8531         tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3);
8532     }
8533     if (mask & 4) { /* Z */
8534         tcg_gen_not_i32(cpu_ZF, nzcv);
8535         tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4);
8536     }
8537     if (mask & 2) { /* C */
8538         tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1);
8539     }
8540     if (mask & 1) { /* V */
8541         tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0);
8542     }
8543     return true;
8544 }
8545 
8546 static bool do_setf(DisasContext *s, int rn, int shift)
8547 {
8548     TCGv_i32 tmp = tcg_temp_new_i32();
8549 
8550     tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn));
8551     tcg_gen_shli_i32(cpu_NF, tmp, shift);
8552     tcg_gen_shli_i32(cpu_VF, tmp, shift - 1);
8553     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
8554     tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF);
8555     return true;
8556 }
8557 
8558 TRANS_FEAT(SETF8, aa64_condm_4, do_setf, a->rn, 24)
8559 TRANS_FEAT(SETF16, aa64_condm_4, do_setf, a->rn, 16)
8560 
8561 /* CCMP, CCMN */
8562 static bool trans_CCMP(DisasContext *s, arg_CCMP *a)
8563 {
8564     TCGv_i32 tcg_t0 = tcg_temp_new_i32();
8565     TCGv_i32 tcg_t1 = tcg_temp_new_i32();
8566     TCGv_i32 tcg_t2 = tcg_temp_new_i32();
8567     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
8568     TCGv_i64 tcg_rn, tcg_y;
8569     DisasCompare c;
8570     unsigned nzcv;
8571     bool has_andc;
8572 
8573     /* Set T0 = !COND.  */
8574     arm_test_cc(&c, a->cond);
8575     tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
8576 
8577     /* Load the arguments for the new comparison.  */
8578     if (a->imm) {
8579         tcg_y = tcg_constant_i64(a->y);
8580     } else {
8581         tcg_y = cpu_reg(s, a->y);
8582     }
8583     tcg_rn = cpu_reg(s, a->rn);
8584 
8585     /* Set the flags for the new comparison.  */
8586     if (a->op) {
8587         gen_sub_CC(a->sf, tcg_tmp, tcg_rn, tcg_y);
8588     } else {
8589         gen_add_CC(a->sf, tcg_tmp, tcg_rn, tcg_y);
8590     }
8591 
8592     /*
8593      * If COND was false, force the flags to #nzcv.  Compute two masks
8594      * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
8595      * For tcg hosts that support ANDC, we can make do with just T1.
8596      * In either case, allow the tcg optimizer to delete any unused mask.
8597      */
8598     tcg_gen_neg_i32(tcg_t1, tcg_t0);
8599     tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
8600 
8601     nzcv = a->nzcv;
8602     has_andc = tcg_op_supported(INDEX_op_andc_i32, TCG_TYPE_I32, 0);
8603     if (nzcv & 8) { /* N */
8604         tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
8605     } else {
8606         if (has_andc) {
8607             tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
8608         } else {
8609             tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
8610         }
8611     }
8612     if (nzcv & 4) { /* Z */
8613         if (has_andc) {
8614             tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
8615         } else {
8616             tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
8617         }
8618     } else {
8619         tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
8620     }
8621     if (nzcv & 2) { /* C */
8622         tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
8623     } else {
8624         if (has_andc) {
8625             tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
8626         } else {
8627             tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
8628         }
8629     }
8630     if (nzcv & 1) { /* V */
8631         tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
8632     } else {
8633         if (has_andc) {
8634             tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
8635         } else {
8636             tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
8637         }
8638     }
8639     return true;
8640 }
8641 
8642 static bool trans_CSEL(DisasContext *s, arg_CSEL *a)
8643 {
8644     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8645     TCGv_i64 zero = tcg_constant_i64(0);
8646     DisasCompare64 c;
8647 
8648     a64_test_cc(&c, a->cond);
8649 
8650     if (a->rn == 31 && a->rm == 31 && (a->else_inc ^ a->else_inv)) {
8651         /* CSET & CSETM.  */
8652         if (a->else_inv) {
8653             tcg_gen_negsetcond_i64(tcg_invert_cond(c.cond),
8654                                    tcg_rd, c.value, zero);
8655         } else {
8656             tcg_gen_setcond_i64(tcg_invert_cond(c.cond),
8657                                 tcg_rd, c.value, zero);
8658         }
8659     } else {
8660         TCGv_i64 t_true = cpu_reg(s, a->rn);
8661         TCGv_i64 t_false = read_cpu_reg(s, a->rm, 1);
8662 
8663         if (a->else_inv && a->else_inc) {
8664             tcg_gen_neg_i64(t_false, t_false);
8665         } else if (a->else_inv) {
8666             tcg_gen_not_i64(t_false, t_false);
8667         } else if (a->else_inc) {
8668             tcg_gen_addi_i64(t_false, t_false, 1);
8669         }
8670         tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
8671     }
8672 
8673     if (!a->sf) {
8674         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
8675     }
8676     return true;
8677 }
8678 
8679 typedef struct FPScalar1Int {
8680     void (*gen_h)(TCGv_i32, TCGv_i32);
8681     void (*gen_s)(TCGv_i32, TCGv_i32);
8682     void (*gen_d)(TCGv_i64, TCGv_i64);
8683 } FPScalar1Int;
8684 
8685 static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a,
8686                               const FPScalar1Int *f,
8687                               bool merging)
8688 {
8689     switch (a->esz) {
8690     case MO_64:
8691         if (fp_access_check(s)) {
8692             TCGv_i64 t = read_fp_dreg(s, a->rn);
8693             f->gen_d(t, t);
8694             if (merging) {
8695                 write_fp_dreg_merging(s, a->rd, a->rd, t);
8696             } else {
8697                 write_fp_dreg(s, a->rd, t);
8698             }
8699         }
8700         break;
8701     case MO_32:
8702         if (fp_access_check(s)) {
8703             TCGv_i32 t = read_fp_sreg(s, a->rn);
8704             f->gen_s(t, t);
8705             if (merging) {
8706                 write_fp_sreg_merging(s, a->rd, a->rd, t);
8707             } else {
8708                 write_fp_sreg(s, a->rd, t);
8709             }
8710         }
8711         break;
8712     case MO_16:
8713         if (!dc_isar_feature(aa64_fp16, s)) {
8714             return false;
8715         }
8716         if (fp_access_check(s)) {
8717             TCGv_i32 t = read_fp_hreg(s, a->rn);
8718             f->gen_h(t, t);
8719             if (merging) {
8720                 write_fp_hreg_merging(s, a->rd, a->rd, t);
8721             } else {
8722                 write_fp_sreg(s, a->rd, t);
8723             }
8724         }
8725         break;
8726     default:
8727         return false;
8728     }
8729     return true;
8730 }
8731 
8732 static bool do_fp1_scalar_int_2fn(DisasContext *s, arg_rr_e *a,
8733                                   const FPScalar1Int *fnormal,
8734                                   const FPScalar1Int *fah)
8735 {
8736     return do_fp1_scalar_int(s, a, s->fpcr_ah ? fah : fnormal, true);
8737 }
8738 
8739 static const FPScalar1Int f_scalar_fmov = {
8740     tcg_gen_mov_i32,
8741     tcg_gen_mov_i32,
8742     tcg_gen_mov_i64,
8743 };
8744 TRANS(FMOV_s, do_fp1_scalar_int, a, &f_scalar_fmov, false)
8745 
8746 static const FPScalar1Int f_scalar_fabs = {
8747     gen_vfp_absh,
8748     gen_vfp_abss,
8749     gen_vfp_absd,
8750 };
8751 static const FPScalar1Int f_scalar_ah_fabs = {
8752     gen_vfp_ah_absh,
8753     gen_vfp_ah_abss,
8754     gen_vfp_ah_absd,
8755 };
8756 TRANS(FABS_s, do_fp1_scalar_int_2fn, a, &f_scalar_fabs, &f_scalar_ah_fabs)
8757 
8758 static const FPScalar1Int f_scalar_fneg = {
8759     gen_vfp_negh,
8760     gen_vfp_negs,
8761     gen_vfp_negd,
8762 };
8763 static const FPScalar1Int f_scalar_ah_fneg = {
8764     gen_vfp_ah_negh,
8765     gen_vfp_ah_negs,
8766     gen_vfp_ah_negd,
8767 };
8768 TRANS(FNEG_s, do_fp1_scalar_int_2fn, a, &f_scalar_fneg, &f_scalar_ah_fneg)
8769 
8770 typedef struct FPScalar1 {
8771     void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_ptr);
8772     void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_ptr);
8773     void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_ptr);
8774 } FPScalar1;
8775 
8776 static bool do_fp1_scalar_with_fpsttype(DisasContext *s, arg_rr_e *a,
8777                                         const FPScalar1 *f, int rmode,
8778                                         ARMFPStatusFlavour fpsttype)
8779 {
8780     TCGv_i32 tcg_rmode = NULL;
8781     TCGv_ptr fpst;
8782     TCGv_i64 t64;
8783     TCGv_i32 t32;
8784     int check = fp_access_check_scalar_hsd(s, a->esz);
8785 
8786     if (check <= 0) {
8787         return check == 0;
8788     }
8789 
8790     fpst = fpstatus_ptr(fpsttype);
8791     if (rmode >= 0) {
8792         tcg_rmode = gen_set_rmode(rmode, fpst);
8793     }
8794 
8795     switch (a->esz) {
8796     case MO_64:
8797         t64 = read_fp_dreg(s, a->rn);
8798         f->gen_d(t64, t64, fpst);
8799         write_fp_dreg_merging(s, a->rd, a->rd, t64);
8800         break;
8801     case MO_32:
8802         t32 = read_fp_sreg(s, a->rn);
8803         f->gen_s(t32, t32, fpst);
8804         write_fp_sreg_merging(s, a->rd, a->rd, t32);
8805         break;
8806     case MO_16:
8807         t32 = read_fp_hreg(s, a->rn);
8808         f->gen_h(t32, t32, fpst);
8809         write_fp_hreg_merging(s, a->rd, a->rd, t32);
8810         break;
8811     default:
8812         g_assert_not_reached();
8813     }
8814 
8815     if (rmode >= 0) {
8816         gen_restore_rmode(tcg_rmode, fpst);
8817     }
8818     return true;
8819 }
8820 
8821 static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a,
8822                           const FPScalar1 *f, int rmode)
8823 {
8824     return do_fp1_scalar_with_fpsttype(s, a, f, rmode,
8825                                        a->esz == MO_16 ?
8826                                        FPST_A64_F16 : FPST_A64);
8827 }
8828 
8829 static bool do_fp1_scalar_ah(DisasContext *s, arg_rr_e *a,
8830                              const FPScalar1 *f, int rmode)
8831 {
8832     return do_fp1_scalar_with_fpsttype(s, a, f, rmode, select_ah_fpst(s, a->esz));
8833 }
8834 
8835 static const FPScalar1 f_scalar_fsqrt = {
8836     gen_helper_vfp_sqrth,
8837     gen_helper_vfp_sqrts,
8838     gen_helper_vfp_sqrtd,
8839 };
8840 TRANS(FSQRT_s, do_fp1_scalar, a, &f_scalar_fsqrt, -1)
8841 
8842 static const FPScalar1 f_scalar_frint = {
8843     gen_helper_advsimd_rinth,
8844     gen_helper_rints,
8845     gen_helper_rintd,
8846 };
8847 TRANS(FRINTN_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_TIEEVEN)
8848 TRANS(FRINTP_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_POSINF)
8849 TRANS(FRINTM_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_NEGINF)
8850 TRANS(FRINTZ_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_ZERO)
8851 TRANS(FRINTA_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_TIEAWAY)
8852 TRANS(FRINTI_s, do_fp1_scalar, a, &f_scalar_frint, -1)
8853 
8854 static const FPScalar1 f_scalar_frintx = {
8855     gen_helper_advsimd_rinth_exact,
8856     gen_helper_rints_exact,
8857     gen_helper_rintd_exact,
8858 };
8859 TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1)
8860 
8861 static bool trans_BFCVT_s(DisasContext *s, arg_rr_e *a)
8862 {
8863     ARMFPStatusFlavour fpsttype = s->fpcr_ah ? FPST_AH : FPST_A64;
8864     TCGv_i32 t32;
8865     int check;
8866 
8867     if (!dc_isar_feature(aa64_bf16, s)) {
8868         return false;
8869     }
8870 
8871     check = fp_access_check_scalar_hsd(s, a->esz);
8872 
8873     if (check <= 0) {
8874         return check == 0;
8875     }
8876 
8877     t32 = read_fp_sreg(s, a->rn);
8878     gen_helper_bfcvt(t32, t32, fpstatus_ptr(fpsttype));
8879     write_fp_hreg_merging(s, a->rd, a->rd, t32);
8880     return true;
8881 }
8882 
8883 static const FPScalar1 f_scalar_frint32 = {
8884     NULL,
8885     gen_helper_frint32_s,
8886     gen_helper_frint32_d,
8887 };
8888 TRANS_FEAT(FRINT32Z_s, aa64_frint, do_fp1_scalar, a,
8889            &f_scalar_frint32, FPROUNDING_ZERO)
8890 TRANS_FEAT(FRINT32X_s, aa64_frint, do_fp1_scalar, a, &f_scalar_frint32, -1)
8891 
8892 static const FPScalar1 f_scalar_frint64 = {
8893     NULL,
8894     gen_helper_frint64_s,
8895     gen_helper_frint64_d,
8896 };
8897 TRANS_FEAT(FRINT64Z_s, aa64_frint, do_fp1_scalar, a,
8898            &f_scalar_frint64, FPROUNDING_ZERO)
8899 TRANS_FEAT(FRINT64X_s, aa64_frint, do_fp1_scalar, a, &f_scalar_frint64, -1)
8900 
8901 static const FPScalar1 f_scalar_frecpe = {
8902     gen_helper_recpe_f16,
8903     gen_helper_recpe_f32,
8904     gen_helper_recpe_f64,
8905 };
8906 static const FPScalar1 f_scalar_frecpe_rpres = {
8907     gen_helper_recpe_f16,
8908     gen_helper_recpe_rpres_f32,
8909     gen_helper_recpe_f64,
8910 };
8911 TRANS(FRECPE_s, do_fp1_scalar_ah, a,
8912       s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ?
8913       &f_scalar_frecpe_rpres : &f_scalar_frecpe, -1)
8914 
8915 static const FPScalar1 f_scalar_frecpx = {
8916     gen_helper_frecpx_f16,
8917     gen_helper_frecpx_f32,
8918     gen_helper_frecpx_f64,
8919 };
8920 TRANS(FRECPX_s, do_fp1_scalar_ah, a, &f_scalar_frecpx, -1)
8921 
8922 static const FPScalar1 f_scalar_frsqrte = {
8923     gen_helper_rsqrte_f16,
8924     gen_helper_rsqrte_f32,
8925     gen_helper_rsqrte_f64,
8926 };
8927 static const FPScalar1 f_scalar_frsqrte_rpres = {
8928     gen_helper_rsqrte_f16,
8929     gen_helper_rsqrte_rpres_f32,
8930     gen_helper_rsqrte_f64,
8931 };
8932 TRANS(FRSQRTE_s, do_fp1_scalar_ah, a,
8933       s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ?
8934       &f_scalar_frsqrte_rpres : &f_scalar_frsqrte, -1)
8935 
8936 static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a)
8937 {
8938     if (fp_access_check(s)) {
8939         TCGv_i32 tcg_rn = read_fp_sreg(s, a->rn);
8940         TCGv_i64 tcg_rd = tcg_temp_new_i64();
8941         TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
8942 
8943         gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, fpst);
8944         write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd);
8945     }
8946     return true;
8947 }
8948 
8949 static bool trans_FCVT_s_hs(DisasContext *s, arg_rr *a)
8950 {
8951     if (fp_access_check(s)) {
8952         TCGv_i32 tmp = read_fp_sreg(s, a->rn);
8953         TCGv_i32 ahp = get_ahp_flag();
8954         TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
8955 
8956         gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
8957         /* write_fp_hreg_merging is OK here because top half of result is zero */
8958         write_fp_hreg_merging(s, a->rd, a->rd, tmp);
8959     }
8960     return true;
8961 }
8962 
8963 static bool trans_FCVT_s_sd(DisasContext *s, arg_rr *a)
8964 {
8965     if (fp_access_check(s)) {
8966         TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn);
8967         TCGv_i32 tcg_rd = tcg_temp_new_i32();
8968         TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
8969 
8970         gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, fpst);
8971         write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd);
8972     }
8973     return true;
8974 }
8975 
8976 static bool trans_FCVT_s_hd(DisasContext *s, arg_rr *a)
8977 {
8978     if (fp_access_check(s)) {
8979         TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn);
8980         TCGv_i32 tcg_rd = tcg_temp_new_i32();
8981         TCGv_i32 ahp = get_ahp_flag();
8982         TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
8983 
8984         gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
8985         /* write_fp_hreg_merging is OK here because top half of tcg_rd is zero */
8986         write_fp_hreg_merging(s, a->rd, a->rd, tcg_rd);
8987     }
8988     return true;
8989 }
8990 
8991 static bool trans_FCVT_s_sh(DisasContext *s, arg_rr *a)
8992 {
8993     if (fp_access_check(s)) {
8994         TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn);
8995         TCGv_i32 tcg_rd = tcg_temp_new_i32();
8996         TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16);
8997         TCGv_i32 tcg_ahp = get_ahp_flag();
8998 
8999         gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
9000         write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd);
9001     }
9002     return true;
9003 }
9004 
9005 static bool trans_FCVT_s_dh(DisasContext *s, arg_rr *a)
9006 {
9007     if (fp_access_check(s)) {
9008         TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn);
9009         TCGv_i64 tcg_rd = tcg_temp_new_i64();
9010         TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16);
9011         TCGv_i32 tcg_ahp = get_ahp_flag();
9012 
9013         gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
9014         write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd);
9015     }
9016     return true;
9017 }
9018 
9019 static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift,
9020                            TCGv_i64 tcg_int, bool is_signed)
9021 {
9022     TCGv_ptr tcg_fpstatus;
9023     TCGv_i32 tcg_shift, tcg_single;
9024     TCGv_i64 tcg_double;
9025 
9026     tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
9027     tcg_shift = tcg_constant_i32(shift);
9028 
9029     switch (esz) {
9030     case MO_64:
9031         tcg_double = tcg_temp_new_i64();
9032         if (is_signed) {
9033             gen_helper_vfp_sqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus);
9034         } else {
9035             gen_helper_vfp_uqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus);
9036         }
9037         write_fp_dreg_merging(s, rd, rd, tcg_double);
9038         break;
9039 
9040     case MO_32:
9041         tcg_single = tcg_temp_new_i32();
9042         if (is_signed) {
9043             gen_helper_vfp_sqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
9044         } else {
9045             gen_helper_vfp_uqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
9046         }
9047         write_fp_sreg_merging(s, rd, rd, tcg_single);
9048         break;
9049 
9050     case MO_16:
9051         tcg_single = tcg_temp_new_i32();
9052         if (is_signed) {
9053             gen_helper_vfp_sqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
9054         } else {
9055             gen_helper_vfp_uqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
9056         }
9057         write_fp_hreg_merging(s, rd, rd, tcg_single);
9058         break;
9059 
9060     default:
9061         g_assert_not_reached();
9062     }
9063     return true;
9064 }
9065 
9066 static bool do_cvtf_g(DisasContext *s, arg_fcvt *a, bool is_signed)
9067 {
9068     TCGv_i64 tcg_int;
9069     int check = fp_access_check_scalar_hsd(s, a->esz);
9070 
9071     if (check <= 0) {
9072         return check == 0;
9073     }
9074 
9075     if (a->sf) {
9076         tcg_int = cpu_reg(s, a->rn);
9077     } else {
9078         tcg_int = read_cpu_reg(s, a->rn, true);
9079         if (is_signed) {
9080             tcg_gen_ext32s_i64(tcg_int, tcg_int);
9081         } else {
9082             tcg_gen_ext32u_i64(tcg_int, tcg_int);
9083         }
9084     }
9085     return do_cvtf_scalar(s, a->esz, a->rd, a->shift, tcg_int, is_signed);
9086 }
9087 
9088 TRANS(SCVTF_g, do_cvtf_g, a, true)
9089 TRANS(UCVTF_g, do_cvtf_g, a, false)
9090 
9091 /*
9092  * [US]CVTF (vector), scalar version.
9093  * Which sounds weird, but really just means input from fp register
9094  * instead of input from general register.  Input and output element
9095  * size are always equal.
9096  */
9097 static bool do_cvtf_f(DisasContext *s, arg_fcvt *a, bool is_signed)
9098 {
9099     TCGv_i64 tcg_int;
9100     int check = fp_access_check_scalar_hsd(s, a->esz);
9101 
9102     if (check <= 0) {
9103         return check == 0;
9104     }
9105 
9106     tcg_int = tcg_temp_new_i64();
9107     read_vec_element(s, tcg_int, a->rn, 0, a->esz | (is_signed ? MO_SIGN : 0));
9108     return do_cvtf_scalar(s, a->esz, a->rd, a->shift, tcg_int, is_signed);
9109 }
9110 
9111 TRANS(SCVTF_f, do_cvtf_f, a, true)
9112 TRANS(UCVTF_f, do_cvtf_f, a, false)
9113 
9114 static void do_fcvt_scalar(DisasContext *s, MemOp out, MemOp esz,
9115                            TCGv_i64 tcg_out, int shift, int rn,
9116                            ARMFPRounding rmode)
9117 {
9118     TCGv_ptr tcg_fpstatus;
9119     TCGv_i32 tcg_shift, tcg_rmode, tcg_single;
9120 
9121     tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
9122     tcg_shift = tcg_constant_i32(shift);
9123     tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
9124 
9125     switch (esz) {
9126     case MO_64:
9127         read_vec_element(s, tcg_out, rn, 0, MO_64);
9128         switch (out) {
9129         case MO_64 | MO_SIGN:
9130             gen_helper_vfp_tosqd(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
9131             break;
9132         case MO_64:
9133             gen_helper_vfp_touqd(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
9134             break;
9135         case MO_32 | MO_SIGN:
9136             gen_helper_vfp_tosld(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
9137             break;
9138         case MO_32:
9139             gen_helper_vfp_tould(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
9140             break;
9141         default:
9142             g_assert_not_reached();
9143         }
9144         break;
9145 
9146     case MO_32:
9147         tcg_single = read_fp_sreg(s, rn);
9148         switch (out) {
9149         case MO_64 | MO_SIGN:
9150             gen_helper_vfp_tosqs(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
9151             break;
9152         case MO_64:
9153             gen_helper_vfp_touqs(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
9154             break;
9155         case MO_32 | MO_SIGN:
9156             gen_helper_vfp_tosls(tcg_single, tcg_single,
9157                                  tcg_shift, tcg_fpstatus);
9158             tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9159             break;
9160         case MO_32:
9161             gen_helper_vfp_touls(tcg_single, tcg_single,
9162                                  tcg_shift, tcg_fpstatus);
9163             tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9164             break;
9165         default:
9166             g_assert_not_reached();
9167         }
9168         break;
9169 
9170     case MO_16:
9171         tcg_single = read_fp_hreg(s, rn);
9172         switch (out) {
9173         case MO_64 | MO_SIGN:
9174             gen_helper_vfp_tosqh(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
9175             break;
9176         case MO_64:
9177             gen_helper_vfp_touqh(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
9178             break;
9179         case MO_32 | MO_SIGN:
9180             gen_helper_vfp_toslh(tcg_single, tcg_single,
9181                                  tcg_shift, tcg_fpstatus);
9182             tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9183             break;
9184         case MO_32:
9185             gen_helper_vfp_toulh(tcg_single, tcg_single,
9186                                  tcg_shift, tcg_fpstatus);
9187             tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9188             break;
9189         case MO_16 | MO_SIGN:
9190             gen_helper_vfp_toshh(tcg_single, tcg_single,
9191                                  tcg_shift, tcg_fpstatus);
9192             tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9193             break;
9194         case MO_16:
9195             gen_helper_vfp_touhh(tcg_single, tcg_single,
9196                                  tcg_shift, tcg_fpstatus);
9197             tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9198             break;
9199         default:
9200             g_assert_not_reached();
9201         }
9202         break;
9203 
9204     default:
9205         g_assert_not_reached();
9206     }
9207 
9208     gen_restore_rmode(tcg_rmode, tcg_fpstatus);
9209 }
9210 
9211 static bool do_fcvt_g(DisasContext *s, arg_fcvt *a,
9212                       ARMFPRounding rmode, bool is_signed)
9213 {
9214     TCGv_i64 tcg_int;
9215     int check = fp_access_check_scalar_hsd(s, a->esz);
9216 
9217     if (check <= 0) {
9218         return check == 0;
9219     }
9220 
9221     tcg_int = cpu_reg(s, a->rd);
9222     do_fcvt_scalar(s, (a->sf ? MO_64 : MO_32) | (is_signed ? MO_SIGN : 0),
9223                    a->esz, tcg_int, a->shift, a->rn, rmode);
9224 
9225     if (!a->sf) {
9226         tcg_gen_ext32u_i64(tcg_int, tcg_int);
9227     }
9228     return true;
9229 }
9230 
9231 TRANS(FCVTNS_g, do_fcvt_g, a, FPROUNDING_TIEEVEN, true)
9232 TRANS(FCVTNU_g, do_fcvt_g, a, FPROUNDING_TIEEVEN, false)
9233 TRANS(FCVTPS_g, do_fcvt_g, a, FPROUNDING_POSINF, true)
9234 TRANS(FCVTPU_g, do_fcvt_g, a, FPROUNDING_POSINF, false)
9235 TRANS(FCVTMS_g, do_fcvt_g, a, FPROUNDING_NEGINF, true)
9236 TRANS(FCVTMU_g, do_fcvt_g, a, FPROUNDING_NEGINF, false)
9237 TRANS(FCVTZS_g, do_fcvt_g, a, FPROUNDING_ZERO, true)
9238 TRANS(FCVTZU_g, do_fcvt_g, a, FPROUNDING_ZERO, false)
9239 TRANS(FCVTAS_g, do_fcvt_g, a, FPROUNDING_TIEAWAY, true)
9240 TRANS(FCVTAU_g, do_fcvt_g, a, FPROUNDING_TIEAWAY, false)
9241 
9242 /*
9243  * FCVT* (vector), scalar version.
9244  * Which sounds weird, but really just means output to fp register
9245  * instead of output to general register.  Input and output element
9246  * size are always equal.
9247  */
9248 static bool do_fcvt_f(DisasContext *s, arg_fcvt *a,
9249                       ARMFPRounding rmode, bool is_signed)
9250 {
9251     TCGv_i64 tcg_int;
9252     int check = fp_access_check_scalar_hsd(s, a->esz);
9253 
9254     if (check <= 0) {
9255         return check == 0;
9256     }
9257 
9258     tcg_int = tcg_temp_new_i64();
9259     do_fcvt_scalar(s, a->esz | (is_signed ? MO_SIGN : 0),
9260                    a->esz, tcg_int, a->shift, a->rn, rmode);
9261 
9262     if (!s->fpcr_nep) {
9263         clear_vec(s, a->rd);
9264     }
9265     write_vec_element(s, tcg_int, a->rd, 0, a->esz);
9266     return true;
9267 }
9268 
9269 TRANS(FCVTNS_f, do_fcvt_f, a, FPROUNDING_TIEEVEN, true)
9270 TRANS(FCVTNU_f, do_fcvt_f, a, FPROUNDING_TIEEVEN, false)
9271 TRANS(FCVTPS_f, do_fcvt_f, a, FPROUNDING_POSINF, true)
9272 TRANS(FCVTPU_f, do_fcvt_f, a, FPROUNDING_POSINF, false)
9273 TRANS(FCVTMS_f, do_fcvt_f, a, FPROUNDING_NEGINF, true)
9274 TRANS(FCVTMU_f, do_fcvt_f, a, FPROUNDING_NEGINF, false)
9275 TRANS(FCVTZS_f, do_fcvt_f, a, FPROUNDING_ZERO, true)
9276 TRANS(FCVTZU_f, do_fcvt_f, a, FPROUNDING_ZERO, false)
9277 TRANS(FCVTAS_f, do_fcvt_f, a, FPROUNDING_TIEAWAY, true)
9278 TRANS(FCVTAU_f, do_fcvt_f, a, FPROUNDING_TIEAWAY, false)
9279 
9280 static bool trans_FJCVTZS(DisasContext *s, arg_FJCVTZS *a)
9281 {
9282     if (!dc_isar_feature(aa64_jscvt, s)) {
9283         return false;
9284     }
9285     if (fp_access_check(s)) {
9286         TCGv_i64 t = read_fp_dreg(s, a->rn);
9287         TCGv_ptr fpstatus = fpstatus_ptr(FPST_A64);
9288 
9289         gen_helper_fjcvtzs(t, t, fpstatus);
9290 
9291         tcg_gen_ext32u_i64(cpu_reg(s, a->rd), t);
9292         tcg_gen_extrh_i64_i32(cpu_ZF, t);
9293         tcg_gen_movi_i32(cpu_CF, 0);
9294         tcg_gen_movi_i32(cpu_NF, 0);
9295         tcg_gen_movi_i32(cpu_VF, 0);
9296     }
9297     return true;
9298 }
9299 
9300 static bool trans_FMOV_hx(DisasContext *s, arg_rr *a)
9301 {
9302     if (!dc_isar_feature(aa64_fp16, s)) {
9303         return false;
9304     }
9305     if (fp_access_check(s)) {
9306         TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
9307         TCGv_i64 tmp = tcg_temp_new_i64();
9308         tcg_gen_ext16u_i64(tmp, tcg_rn);
9309         write_fp_dreg(s, a->rd, tmp);
9310     }
9311     return true;
9312 }
9313 
9314 static bool trans_FMOV_sw(DisasContext *s, arg_rr *a)
9315 {
9316     if (fp_access_check(s)) {
9317         TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
9318         TCGv_i64 tmp = tcg_temp_new_i64();
9319         tcg_gen_ext32u_i64(tmp, tcg_rn);
9320         write_fp_dreg(s, a->rd, tmp);
9321     }
9322     return true;
9323 }
9324 
9325 static bool trans_FMOV_dx(DisasContext *s, arg_rr *a)
9326 {
9327     if (fp_access_check(s)) {
9328         TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
9329         write_fp_dreg(s, a->rd, tcg_rn);
9330     }
9331     return true;
9332 }
9333 
9334 static bool trans_FMOV_ux(DisasContext *s, arg_rr *a)
9335 {
9336     if (fp_access_check(s)) {
9337         TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
9338         tcg_gen_st_i64(tcg_rn, tcg_env, fp_reg_hi_offset(s, a->rd));
9339         clear_vec_high(s, true, a->rd);
9340     }
9341     return true;
9342 }
9343 
9344 static bool trans_FMOV_xh(DisasContext *s, arg_rr *a)
9345 {
9346     if (!dc_isar_feature(aa64_fp16, s)) {
9347         return false;
9348     }
9349     if (fp_access_check(s)) {
9350         TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
9351         tcg_gen_ld16u_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_16));
9352     }
9353     return true;
9354 }
9355 
9356 static bool trans_FMOV_ws(DisasContext *s, arg_rr *a)
9357 {
9358     if (fp_access_check(s)) {
9359         TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
9360         tcg_gen_ld32u_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_32));
9361     }
9362     return true;
9363 }
9364 
9365 static bool trans_FMOV_xd(DisasContext *s, arg_rr *a)
9366 {
9367     if (fp_access_check(s)) {
9368         TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
9369         tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_64));
9370     }
9371     return true;
9372 }
9373 
9374 static bool trans_FMOV_xu(DisasContext *s, arg_rr *a)
9375 {
9376     if (fp_access_check(s)) {
9377         TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
9378         tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_hi_offset(s, a->rn));
9379     }
9380     return true;
9381 }
9382 
9383 typedef struct ENVScalar1 {
9384     NeonGenOneOpEnvFn *gen_bhs[3];
9385     NeonGenOne64OpEnvFn *gen_d;
9386 } ENVScalar1;
9387 
9388 static bool do_env_scalar1(DisasContext *s, arg_rr_e *a, const ENVScalar1 *f)
9389 {
9390     if (!fp_access_check(s)) {
9391         return true;
9392     }
9393     if (a->esz == MO_64) {
9394         TCGv_i64 t = read_fp_dreg(s, a->rn);
9395         f->gen_d(t, tcg_env, t);
9396         write_fp_dreg(s, a->rd, t);
9397     } else {
9398         TCGv_i32 t = tcg_temp_new_i32();
9399 
9400         read_vec_element_i32(s, t, a->rn, 0, a->esz);
9401         f->gen_bhs[a->esz](t, tcg_env, t);
9402         write_fp_sreg(s, a->rd, t);
9403     }
9404     return true;
9405 }
9406 
9407 static bool do_env_vector1(DisasContext *s, arg_qrr_e *a, const ENVScalar1 *f)
9408 {
9409     if (a->esz == MO_64 && !a->q) {
9410         return false;
9411     }
9412     if (!fp_access_check(s)) {
9413         return true;
9414     }
9415     if (a->esz == MO_64) {
9416         TCGv_i64 t = tcg_temp_new_i64();
9417 
9418         for (int i = 0; i < 2; ++i) {
9419             read_vec_element(s, t, a->rn, i, MO_64);
9420             f->gen_d(t, tcg_env, t);
9421             write_vec_element(s, t, a->rd, i, MO_64);
9422         }
9423     } else {
9424         TCGv_i32 t = tcg_temp_new_i32();
9425         int n = (a->q ? 16 : 8) >> a->esz;
9426 
9427         for (int i = 0; i < n; ++i) {
9428             read_vec_element_i32(s, t, a->rn, i, a->esz);
9429             f->gen_bhs[a->esz](t, tcg_env, t);
9430             write_vec_element_i32(s, t, a->rd, i, a->esz);
9431         }
9432     }
9433     clear_vec_high(s, a->q, a->rd);
9434     return true;
9435 }
9436 
9437 static const ENVScalar1 f_scalar_sqabs = {
9438     { gen_helper_neon_qabs_s8,
9439       gen_helper_neon_qabs_s16,
9440       gen_helper_neon_qabs_s32 },
9441     gen_helper_neon_qabs_s64,
9442 };
9443 TRANS(SQABS_s, do_env_scalar1, a, &f_scalar_sqabs)
9444 TRANS(SQABS_v, do_env_vector1, a, &f_scalar_sqabs)
9445 
9446 static const ENVScalar1 f_scalar_sqneg = {
9447     { gen_helper_neon_qneg_s8,
9448       gen_helper_neon_qneg_s16,
9449       gen_helper_neon_qneg_s32 },
9450     gen_helper_neon_qneg_s64,
9451 };
9452 TRANS(SQNEG_s, do_env_scalar1, a, &f_scalar_sqneg)
9453 TRANS(SQNEG_v, do_env_vector1, a, &f_scalar_sqneg)
9454 
9455 static bool do_scalar1_d(DisasContext *s, arg_rr *a, ArithOneOp *f)
9456 {
9457     if (fp_access_check(s)) {
9458         TCGv_i64 t = read_fp_dreg(s, a->rn);
9459         f(t, t);
9460         write_fp_dreg(s, a->rd, t);
9461     }
9462     return true;
9463 }
9464 
9465 TRANS(ABS_s, do_scalar1_d, a, tcg_gen_abs_i64)
9466 TRANS(NEG_s, do_scalar1_d, a, tcg_gen_neg_i64)
9467 
9468 static bool do_cmop0_d(DisasContext *s, arg_rr *a, TCGCond cond)
9469 {
9470     if (fp_access_check(s)) {
9471         TCGv_i64 t = read_fp_dreg(s, a->rn);
9472         tcg_gen_negsetcond_i64(cond, t, t, tcg_constant_i64(0));
9473         write_fp_dreg(s, a->rd, t);
9474     }
9475     return true;
9476 }
9477 
9478 TRANS(CMGT0_s, do_cmop0_d, a, TCG_COND_GT)
9479 TRANS(CMGE0_s, do_cmop0_d, a, TCG_COND_GE)
9480 TRANS(CMLE0_s, do_cmop0_d, a, TCG_COND_LE)
9481 TRANS(CMLT0_s, do_cmop0_d, a, TCG_COND_LT)
9482 TRANS(CMEQ0_s, do_cmop0_d, a, TCG_COND_EQ)
9483 
9484 static bool do_2misc_narrow_scalar(DisasContext *s, arg_rr_e *a,
9485                                    ArithOneOp * const fn[3])
9486 {
9487     if (a->esz == MO_64) {
9488         return false;
9489     }
9490     if (fp_access_check(s)) {
9491         TCGv_i64 t = tcg_temp_new_i64();
9492 
9493         read_vec_element(s, t, a->rn, 0, a->esz + 1);
9494         fn[a->esz](t, t);
9495         clear_vec(s, a->rd);
9496         write_vec_element(s, t, a->rd, 0, a->esz);
9497     }
9498     return true;
9499 }
9500 
9501 #define WRAP_ENV(NAME) \
9502     static void gen_##NAME(TCGv_i64 d, TCGv_i64 n) \
9503     { gen_helper_##NAME(d, tcg_env, n); }
9504 
9505 WRAP_ENV(neon_unarrow_sat8)
9506 WRAP_ENV(neon_unarrow_sat16)
9507 WRAP_ENV(neon_unarrow_sat32)
9508 
9509 static ArithOneOp * const f_scalar_sqxtun[] = {
9510     gen_neon_unarrow_sat8,
9511     gen_neon_unarrow_sat16,
9512     gen_neon_unarrow_sat32,
9513 };
9514 TRANS(SQXTUN_s, do_2misc_narrow_scalar, a, f_scalar_sqxtun)
9515 
9516 WRAP_ENV(neon_narrow_sat_s8)
9517 WRAP_ENV(neon_narrow_sat_s16)
9518 WRAP_ENV(neon_narrow_sat_s32)
9519 
9520 static ArithOneOp * const f_scalar_sqxtn[] = {
9521     gen_neon_narrow_sat_s8,
9522     gen_neon_narrow_sat_s16,
9523     gen_neon_narrow_sat_s32,
9524 };
9525 TRANS(SQXTN_s, do_2misc_narrow_scalar, a, f_scalar_sqxtn)
9526 
9527 WRAP_ENV(neon_narrow_sat_u8)
9528 WRAP_ENV(neon_narrow_sat_u16)
9529 WRAP_ENV(neon_narrow_sat_u32)
9530 
9531 static ArithOneOp * const f_scalar_uqxtn[] = {
9532     gen_neon_narrow_sat_u8,
9533     gen_neon_narrow_sat_u16,
9534     gen_neon_narrow_sat_u32,
9535 };
9536 TRANS(UQXTN_s, do_2misc_narrow_scalar, a, f_scalar_uqxtn)
9537 
9538 static bool trans_FCVTXN_s(DisasContext *s, arg_rr_e *a)
9539 {
9540     if (fp_access_check(s)) {
9541         /*
9542          * 64 bit to 32 bit float conversion
9543          * with von Neumann rounding (round to odd)
9544          */
9545         TCGv_i64 src = read_fp_dreg(s, a->rn);
9546         TCGv_i32 dst = tcg_temp_new_i32();
9547         gen_helper_fcvtx_f64_to_f32(dst, src, fpstatus_ptr(FPST_A64));
9548         write_fp_sreg_merging(s, a->rd, a->rd, dst);
9549     }
9550     return true;
9551 }
9552 
9553 #undef WRAP_ENV
9554 
9555 static bool do_gvec_fn2(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn)
9556 {
9557     if (!a->q && a->esz == MO_64) {
9558         return false;
9559     }
9560     if (fp_access_check(s)) {
9561         gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz);
9562     }
9563     return true;
9564 }
9565 
9566 TRANS(ABS_v, do_gvec_fn2, a, tcg_gen_gvec_abs)
9567 TRANS(NEG_v, do_gvec_fn2, a, tcg_gen_gvec_neg)
9568 TRANS(NOT_v, do_gvec_fn2, a, tcg_gen_gvec_not)
9569 TRANS(CNT_v, do_gvec_fn2, a, gen_gvec_cnt)
9570 TRANS(RBIT_v, do_gvec_fn2, a, gen_gvec_rbit)
9571 TRANS(CMGT0_v, do_gvec_fn2, a, gen_gvec_cgt0)
9572 TRANS(CMGE0_v, do_gvec_fn2, a, gen_gvec_cge0)
9573 TRANS(CMLT0_v, do_gvec_fn2, a, gen_gvec_clt0)
9574 TRANS(CMLE0_v, do_gvec_fn2, a, gen_gvec_cle0)
9575 TRANS(CMEQ0_v, do_gvec_fn2, a, gen_gvec_ceq0)
9576 TRANS(REV16_v, do_gvec_fn2, a, gen_gvec_rev16)
9577 TRANS(REV32_v, do_gvec_fn2, a, gen_gvec_rev32)
9578 TRANS(URECPE_v, do_gvec_fn2, a, gen_gvec_urecpe)
9579 TRANS(URSQRTE_v, do_gvec_fn2, a, gen_gvec_ursqrte)
9580 
9581 static bool do_gvec_fn2_bhs(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn)
9582 {
9583     if (a->esz == MO_64) {
9584         return false;
9585     }
9586     if (fp_access_check(s)) {
9587         gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz);
9588     }
9589     return true;
9590 }
9591 
9592 TRANS(CLS_v, do_gvec_fn2_bhs, a, gen_gvec_cls)
9593 TRANS(CLZ_v, do_gvec_fn2_bhs, a, gen_gvec_clz)
9594 TRANS(REV64_v, do_gvec_fn2_bhs, a, gen_gvec_rev64)
9595 TRANS(SADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_saddlp)
9596 TRANS(UADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_uaddlp)
9597 TRANS(SADALP_v, do_gvec_fn2_bhs, a, gen_gvec_sadalp)
9598 TRANS(UADALP_v, do_gvec_fn2_bhs, a, gen_gvec_uadalp)
9599 
9600 static bool do_2misc_narrow_vector(DisasContext *s, arg_qrr_e *a,
9601                                    ArithOneOp * const fn[3])
9602 {
9603     if (a->esz == MO_64) {
9604         return false;
9605     }
9606     if (fp_access_check(s)) {
9607         TCGv_i64 t0 = tcg_temp_new_i64();
9608         TCGv_i64 t1 = tcg_temp_new_i64();
9609 
9610         read_vec_element(s, t0, a->rn, 0, MO_64);
9611         read_vec_element(s, t1, a->rn, 1, MO_64);
9612         fn[a->esz](t0, t0);
9613         fn[a->esz](t1, t1);
9614         write_vec_element(s, t0, a->rd, a->q ? 2 : 0, MO_32);
9615         write_vec_element(s, t1, a->rd, a->q ? 3 : 1, MO_32);
9616         clear_vec_high(s, a->q, a->rd);
9617     }
9618     return true;
9619 }
9620 
9621 static ArithOneOp * const f_scalar_xtn[] = {
9622     gen_helper_neon_narrow_u8,
9623     gen_helper_neon_narrow_u16,
9624     tcg_gen_ext32u_i64,
9625 };
9626 TRANS(XTN, do_2misc_narrow_vector, a, f_scalar_xtn)
9627 TRANS(SQXTUN_v, do_2misc_narrow_vector, a, f_scalar_sqxtun)
9628 TRANS(SQXTN_v, do_2misc_narrow_vector, a, f_scalar_sqxtn)
9629 TRANS(UQXTN_v, do_2misc_narrow_vector, a, f_scalar_uqxtn)
9630 
9631 static void gen_fcvtn_hs(TCGv_i64 d, TCGv_i64 n)
9632 {
9633     TCGv_i32 tcg_lo = tcg_temp_new_i32();
9634     TCGv_i32 tcg_hi = tcg_temp_new_i32();
9635     TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
9636     TCGv_i32 ahp = get_ahp_flag();
9637 
9638     tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, n);
9639     gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp);
9640     gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp);
9641     tcg_gen_deposit_i32(tcg_lo, tcg_lo, tcg_hi, 16, 16);
9642     tcg_gen_extu_i32_i64(d, tcg_lo);
9643 }
9644 
9645 static void gen_fcvtn_sd(TCGv_i64 d, TCGv_i64 n)
9646 {
9647     TCGv_i32 tmp = tcg_temp_new_i32();
9648     TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
9649 
9650     gen_helper_vfp_fcvtsd(tmp, n, fpst);
9651     tcg_gen_extu_i32_i64(d, tmp);
9652 }
9653 
9654 static void gen_fcvtxn_sd(TCGv_i64 d, TCGv_i64 n)
9655 {
9656     /*
9657      * 64 bit to 32 bit float conversion
9658      * with von Neumann rounding (round to odd)
9659      */
9660     TCGv_i32 tmp = tcg_temp_new_i32();
9661     gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_A64));
9662     tcg_gen_extu_i32_i64(d, tmp);
9663 }
9664 
9665 static ArithOneOp * const f_vector_fcvtn[] = {
9666     NULL,
9667     gen_fcvtn_hs,
9668     gen_fcvtn_sd,
9669 };
9670 static ArithOneOp * const f_scalar_fcvtxn[] = {
9671     NULL,
9672     NULL,
9673     gen_fcvtxn_sd,
9674 };
9675 TRANS(FCVTN_v, do_2misc_narrow_vector, a, f_vector_fcvtn)
9676 TRANS(FCVTXN_v, do_2misc_narrow_vector, a, f_scalar_fcvtxn)
9677 
9678 static void gen_bfcvtn_hs(TCGv_i64 d, TCGv_i64 n)
9679 {
9680     TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
9681     TCGv_i32 tmp = tcg_temp_new_i32();
9682     gen_helper_bfcvt_pair(tmp, n, fpst);
9683     tcg_gen_extu_i32_i64(d, tmp);
9684 }
9685 
9686 static void gen_bfcvtn_ah_hs(TCGv_i64 d, TCGv_i64 n)
9687 {
9688     TCGv_ptr fpst = fpstatus_ptr(FPST_AH);
9689     TCGv_i32 tmp = tcg_temp_new_i32();
9690     gen_helper_bfcvt_pair(tmp, n, fpst);
9691     tcg_gen_extu_i32_i64(d, tmp);
9692 }
9693 
9694 static ArithOneOp * const f_vector_bfcvtn[2][3] = {
9695     {
9696         NULL,
9697         gen_bfcvtn_hs,
9698         NULL,
9699     }, {
9700         NULL,
9701         gen_bfcvtn_ah_hs,
9702         NULL,
9703     }
9704 };
9705 TRANS_FEAT(BFCVTN_v, aa64_bf16, do_2misc_narrow_vector, a,
9706            f_vector_bfcvtn[s->fpcr_ah])
9707 
9708 static bool trans_SHLL_v(DisasContext *s, arg_qrr_e *a)
9709 {
9710     static NeonGenWidenFn * const widenfns[3] = {
9711         gen_helper_neon_widen_u8,
9712         gen_helper_neon_widen_u16,
9713         tcg_gen_extu_i32_i64,
9714     };
9715     NeonGenWidenFn *widenfn;
9716     TCGv_i64 tcg_res[2];
9717     TCGv_i32 tcg_op;
9718     int part, pass;
9719 
9720     if (a->esz == MO_64) {
9721         return false;
9722     }
9723     if (!fp_access_check(s)) {
9724         return true;
9725     }
9726 
9727     tcg_op = tcg_temp_new_i32();
9728     widenfn = widenfns[a->esz];
9729     part = a->q ? 2 : 0;
9730 
9731     for (pass = 0; pass < 2; pass++) {
9732         read_vec_element_i32(s, tcg_op, a->rn, part + pass, MO_32);
9733         tcg_res[pass] = tcg_temp_new_i64();
9734         widenfn(tcg_res[pass], tcg_op);
9735         tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << a->esz);
9736     }
9737 
9738     for (pass = 0; pass < 2; pass++) {
9739         write_vec_element(s, tcg_res[pass], a->rd, pass, MO_64);
9740     }
9741     return true;
9742 }
9743 
9744 static bool do_fabs_fneg_v(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn)
9745 {
9746     int check = fp_access_check_vector_hsd(s, a->q, a->esz);
9747 
9748     if (check <= 0) {
9749         return check == 0;
9750     }
9751 
9752     gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz);
9753     return true;
9754 }
9755 
9756 TRANS(FABS_v, do_fabs_fneg_v, a, gen_gvec_fabs)
9757 TRANS(FNEG_v, do_fabs_fneg_v, a, gen_gvec_fneg)
9758 
9759 static bool do_fp1_vector(DisasContext *s, arg_qrr_e *a,
9760                           const FPScalar1 *f, int rmode)
9761 {
9762     TCGv_i32 tcg_rmode = NULL;
9763     TCGv_ptr fpst;
9764     int check = fp_access_check_vector_hsd(s, a->q, a->esz);
9765 
9766     if (check <= 0) {
9767         return check == 0;
9768     }
9769 
9770     fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
9771     if (rmode >= 0) {
9772         tcg_rmode = gen_set_rmode(rmode, fpst);
9773     }
9774 
9775     if (a->esz == MO_64) {
9776         TCGv_i64 t64 = tcg_temp_new_i64();
9777 
9778         for (int pass = 0; pass < 2; ++pass) {
9779             read_vec_element(s, t64, a->rn, pass, MO_64);
9780             f->gen_d(t64, t64, fpst);
9781             write_vec_element(s, t64, a->rd, pass, MO_64);
9782         }
9783     } else {
9784         TCGv_i32 t32 = tcg_temp_new_i32();
9785         void (*gen)(TCGv_i32, TCGv_i32, TCGv_ptr)
9786             = (a->esz == MO_16 ? f->gen_h : f->gen_s);
9787 
9788         for (int pass = 0, n = (a->q ? 16 : 8) >> a->esz; pass < n; ++pass) {
9789             read_vec_element_i32(s, t32, a->rn, pass, a->esz);
9790             gen(t32, t32, fpst);
9791             write_vec_element_i32(s, t32, a->rd, pass, a->esz);
9792         }
9793     }
9794     clear_vec_high(s, a->q, a->rd);
9795 
9796     if (rmode >= 0) {
9797         gen_restore_rmode(tcg_rmode, fpst);
9798     }
9799     return true;
9800 }
9801 
9802 TRANS(FSQRT_v, do_fp1_vector, a, &f_scalar_fsqrt, -1)
9803 
9804 TRANS(FRINTN_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_TIEEVEN)
9805 TRANS(FRINTP_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_POSINF)
9806 TRANS(FRINTM_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_NEGINF)
9807 TRANS(FRINTZ_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_ZERO)
9808 TRANS(FRINTA_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_TIEAWAY)
9809 TRANS(FRINTI_v, do_fp1_vector, a, &f_scalar_frint, -1)
9810 TRANS(FRINTX_v, do_fp1_vector, a, &f_scalar_frintx, -1)
9811 
9812 TRANS_FEAT(FRINT32Z_v, aa64_frint, do_fp1_vector, a,
9813            &f_scalar_frint32, FPROUNDING_ZERO)
9814 TRANS_FEAT(FRINT32X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint32, -1)
9815 TRANS_FEAT(FRINT64Z_v, aa64_frint, do_fp1_vector, a,
9816            &f_scalar_frint64, FPROUNDING_ZERO)
9817 TRANS_FEAT(FRINT64X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint64, -1)
9818 
9819 static bool do_gvec_op2_fpst_with_fpsttype(DisasContext *s, MemOp esz,
9820                                            bool is_q, int rd, int rn, int data,
9821                                            gen_helper_gvec_2_ptr * const fns[3],
9822                                            ARMFPStatusFlavour fpsttype)
9823 {
9824     int check = fp_access_check_vector_hsd(s, is_q, esz);
9825     TCGv_ptr fpst;
9826 
9827     if (check <= 0) {
9828         return check == 0;
9829     }
9830 
9831     fpst = fpstatus_ptr(fpsttype);
9832     tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
9833                        vec_full_reg_offset(s, rn), fpst,
9834                        is_q ? 16 : 8, vec_full_reg_size(s),
9835                        data, fns[esz - 1]);
9836     return true;
9837 }
9838 
9839 static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q,
9840                              int rd, int rn, int data,
9841                              gen_helper_gvec_2_ptr * const fns[3])
9842 {
9843     return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data, fns,
9844                                           esz == MO_16 ? FPST_A64_F16 :
9845                                           FPST_A64);
9846 }
9847 
9848 static bool do_gvec_op2_ah_fpst(DisasContext *s, MemOp esz, bool is_q,
9849                                 int rd, int rn, int data,
9850                                 gen_helper_gvec_2_ptr * const fns[3])
9851 {
9852     return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data,
9853                                           fns, select_ah_fpst(s, esz));
9854 }
9855 
9856 static gen_helper_gvec_2_ptr * const f_scvtf_v[] = {
9857     gen_helper_gvec_vcvt_sh,
9858     gen_helper_gvec_vcvt_sf,
9859     gen_helper_gvec_vcvt_sd,
9860 };
9861 TRANS(SCVTF_vi, do_gvec_op2_fpst,
9862       a->esz, a->q, a->rd, a->rn, 0, f_scvtf_v)
9863 TRANS(SCVTF_vf, do_gvec_op2_fpst,
9864       a->esz, a->q, a->rd, a->rn, a->shift, f_scvtf_v)
9865 
9866 static gen_helper_gvec_2_ptr * const f_ucvtf_v[] = {
9867     gen_helper_gvec_vcvt_uh,
9868     gen_helper_gvec_vcvt_uf,
9869     gen_helper_gvec_vcvt_ud,
9870 };
9871 TRANS(UCVTF_vi, do_gvec_op2_fpst,
9872       a->esz, a->q, a->rd, a->rn, 0, f_ucvtf_v)
9873 TRANS(UCVTF_vf, do_gvec_op2_fpst,
9874       a->esz, a->q, a->rd, a->rn, a->shift, f_ucvtf_v)
9875 
9876 static gen_helper_gvec_2_ptr * const f_fcvtzs_vf[] = {
9877     gen_helper_gvec_vcvt_rz_hs,
9878     gen_helper_gvec_vcvt_rz_fs,
9879     gen_helper_gvec_vcvt_rz_ds,
9880 };
9881 TRANS(FCVTZS_vf, do_gvec_op2_fpst,
9882       a->esz, a->q, a->rd, a->rn, a->shift, f_fcvtzs_vf)
9883 
9884 static gen_helper_gvec_2_ptr * const f_fcvtzu_vf[] = {
9885     gen_helper_gvec_vcvt_rz_hu,
9886     gen_helper_gvec_vcvt_rz_fu,
9887     gen_helper_gvec_vcvt_rz_du,
9888 };
9889 TRANS(FCVTZU_vf, do_gvec_op2_fpst,
9890       a->esz, a->q, a->rd, a->rn, a->shift, f_fcvtzu_vf)
9891 
9892 static gen_helper_gvec_2_ptr * const f_fcvt_s_vi[] = {
9893     gen_helper_gvec_vcvt_rm_sh,
9894     gen_helper_gvec_vcvt_rm_ss,
9895     gen_helper_gvec_vcvt_rm_sd,
9896 };
9897 
9898 static gen_helper_gvec_2_ptr * const f_fcvt_u_vi[] = {
9899     gen_helper_gvec_vcvt_rm_uh,
9900     gen_helper_gvec_vcvt_rm_us,
9901     gen_helper_gvec_vcvt_rm_ud,
9902 };
9903 
9904 TRANS(FCVTNS_vi, do_gvec_op2_fpst,
9905       a->esz, a->q, a->rd, a->rn, float_round_nearest_even, f_fcvt_s_vi)
9906 TRANS(FCVTNU_vi, do_gvec_op2_fpst,
9907       a->esz, a->q, a->rd, a->rn, float_round_nearest_even, f_fcvt_u_vi)
9908 TRANS(FCVTPS_vi, do_gvec_op2_fpst,
9909       a->esz, a->q, a->rd, a->rn, float_round_up, f_fcvt_s_vi)
9910 TRANS(FCVTPU_vi, do_gvec_op2_fpst,
9911       a->esz, a->q, a->rd, a->rn, float_round_up, f_fcvt_u_vi)
9912 TRANS(FCVTMS_vi, do_gvec_op2_fpst,
9913       a->esz, a->q, a->rd, a->rn, float_round_down, f_fcvt_s_vi)
9914 TRANS(FCVTMU_vi, do_gvec_op2_fpst,
9915       a->esz, a->q, a->rd, a->rn, float_round_down, f_fcvt_u_vi)
9916 TRANS(FCVTZS_vi, do_gvec_op2_fpst,
9917       a->esz, a->q, a->rd, a->rn, float_round_to_zero, f_fcvt_s_vi)
9918 TRANS(FCVTZU_vi, do_gvec_op2_fpst,
9919       a->esz, a->q, a->rd, a->rn, float_round_to_zero, f_fcvt_u_vi)
9920 TRANS(FCVTAS_vi, do_gvec_op2_fpst,
9921       a->esz, a->q, a->rd, a->rn, float_round_ties_away, f_fcvt_s_vi)
9922 TRANS(FCVTAU_vi, do_gvec_op2_fpst,
9923       a->esz, a->q, a->rd, a->rn, float_round_ties_away, f_fcvt_u_vi)
9924 
9925 static gen_helper_gvec_2_ptr * const f_fceq0[] = {
9926     gen_helper_gvec_fceq0_h,
9927     gen_helper_gvec_fceq0_s,
9928     gen_helper_gvec_fceq0_d,
9929 };
9930 TRANS(FCMEQ0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fceq0)
9931 
9932 static gen_helper_gvec_2_ptr * const f_fcgt0[] = {
9933     gen_helper_gvec_fcgt0_h,
9934     gen_helper_gvec_fcgt0_s,
9935     gen_helper_gvec_fcgt0_d,
9936 };
9937 TRANS(FCMGT0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcgt0)
9938 
9939 static gen_helper_gvec_2_ptr * const f_fcge0[] = {
9940     gen_helper_gvec_fcge0_h,
9941     gen_helper_gvec_fcge0_s,
9942     gen_helper_gvec_fcge0_d,
9943 };
9944 TRANS(FCMGE0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcge0)
9945 
9946 static gen_helper_gvec_2_ptr * const f_fclt0[] = {
9947     gen_helper_gvec_fclt0_h,
9948     gen_helper_gvec_fclt0_s,
9949     gen_helper_gvec_fclt0_d,
9950 };
9951 TRANS(FCMLT0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fclt0)
9952 
9953 static gen_helper_gvec_2_ptr * const f_fcle0[] = {
9954     gen_helper_gvec_fcle0_h,
9955     gen_helper_gvec_fcle0_s,
9956     gen_helper_gvec_fcle0_d,
9957 };
9958 TRANS(FCMLE0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcle0)
9959 
9960 static gen_helper_gvec_2_ptr * const f_frecpe[] = {
9961     gen_helper_gvec_frecpe_h,
9962     gen_helper_gvec_frecpe_s,
9963     gen_helper_gvec_frecpe_d,
9964 };
9965 static gen_helper_gvec_2_ptr * const f_frecpe_rpres[] = {
9966     gen_helper_gvec_frecpe_h,
9967     gen_helper_gvec_frecpe_rpres_s,
9968     gen_helper_gvec_frecpe_d,
9969 };
9970 TRANS(FRECPE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0,
9971       s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frecpe_rpres : f_frecpe)
9972 
9973 static gen_helper_gvec_2_ptr * const f_frsqrte[] = {
9974     gen_helper_gvec_frsqrte_h,
9975     gen_helper_gvec_frsqrte_s,
9976     gen_helper_gvec_frsqrte_d,
9977 };
9978 static gen_helper_gvec_2_ptr * const f_frsqrte_rpres[] = {
9979     gen_helper_gvec_frsqrte_h,
9980     gen_helper_gvec_frsqrte_rpres_s,
9981     gen_helper_gvec_frsqrte_d,
9982 };
9983 TRANS(FRSQRTE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0,
9984       s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frsqrte_rpres : f_frsqrte)
9985 
9986 static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a)
9987 {
9988     /* Handle 2-reg-misc ops which are widening (so each size element
9989      * in the source becomes a 2*size element in the destination.
9990      * The only instruction like this is FCVTL.
9991      */
9992     int pass;
9993     TCGv_ptr fpst;
9994 
9995     if (!fp_access_check(s)) {
9996         return true;
9997     }
9998 
9999     if (a->esz == MO_64) {
10000         /* 32 -> 64 bit fp conversion */
10001         TCGv_i64 tcg_res[2];
10002         TCGv_i32 tcg_op = tcg_temp_new_i32();
10003         int srcelt = a->q ? 2 : 0;
10004 
10005         fpst = fpstatus_ptr(FPST_A64);
10006 
10007         for (pass = 0; pass < 2; pass++) {
10008             tcg_res[pass] = tcg_temp_new_i64();
10009             read_vec_element_i32(s, tcg_op, a->rn, srcelt + pass, MO_32);
10010             gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, fpst);
10011         }
10012         for (pass = 0; pass < 2; pass++) {
10013             write_vec_element(s, tcg_res[pass], a->rd, pass, MO_64);
10014         }
10015     } else {
10016         /* 16 -> 32 bit fp conversion */
10017         int srcelt = a->q ? 4 : 0;
10018         TCGv_i32 tcg_res[4];
10019         TCGv_i32 ahp = get_ahp_flag();
10020 
10021         fpst = fpstatus_ptr(FPST_A64_F16);
10022 
10023         for (pass = 0; pass < 4; pass++) {
10024             tcg_res[pass] = tcg_temp_new_i32();
10025             read_vec_element_i32(s, tcg_res[pass], a->rn, srcelt + pass, MO_16);
10026             gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
10027                                            fpst, ahp);
10028         }
10029         for (pass = 0; pass < 4; pass++) {
10030             write_vec_element_i32(s, tcg_res[pass], a->rd, pass, MO_32);
10031         }
10032     }
10033     clear_vec_high(s, true, a->rd);
10034     return true;
10035 }
10036 
10037 static bool trans_OK(DisasContext *s, arg_OK *a)
10038 {
10039     return true;
10040 }
10041 
10042 static bool trans_FAIL(DisasContext *s, arg_OK *a)
10043 {
10044     s->is_nonstreaming = true;
10045     return true;
10046 }
10047 
10048 /**
10049  * btype_destination_ok:
10050  * @insn: The instruction at the branch destination
10051  * @bt: SCTLR_ELx.BT
10052  * @btype: PSTATE.BTYPE, and is non-zero
10053  *
10054  * On a guarded page, there are a limited number of insns
10055  * that may be present at the branch target:
10056  *   - branch target identifiers,
10057  *   - paciasp, pacibsp,
10058  *   - BRK insn
10059  *   - HLT insn
10060  * Anything else causes a Branch Target Exception.
10061  *
10062  * Return true if the branch is compatible, false to raise BTITRAP.
10063  */
10064 static bool btype_destination_ok(uint32_t insn, bool bt, int btype)
10065 {
10066     if ((insn & 0xfffff01fu) == 0xd503201fu) {
10067         /* HINT space */
10068         switch (extract32(insn, 5, 7)) {
10069         case 0b011001: /* PACIASP */
10070         case 0b011011: /* PACIBSP */
10071             /*
10072              * If SCTLR_ELx.BT, then PACI*SP are not compatible
10073              * with btype == 3.  Otherwise all btype are ok.
10074              */
10075             return !bt || btype != 3;
10076         case 0b100000: /* BTI */
10077             /* Not compatible with any btype.  */
10078             return false;
10079         case 0b100010: /* BTI c */
10080             /* Not compatible with btype == 3 */
10081             return btype != 3;
10082         case 0b100100: /* BTI j */
10083             /* Not compatible with btype == 2 */
10084             return btype != 2;
10085         case 0b100110: /* BTI jc */
10086             /* Compatible with any btype.  */
10087             return true;
10088         }
10089     } else {
10090         switch (insn & 0xffe0001fu) {
10091         case 0xd4200000u: /* BRK */
10092         case 0xd4400000u: /* HLT */
10093             /* Give priority to the breakpoint exception.  */
10094             return true;
10095         }
10096     }
10097     return false;
10098 }
10099 
10100 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
10101                                           CPUState *cpu)
10102 {
10103     DisasContext *dc = container_of(dcbase, DisasContext, base);
10104     CPUARMState *env = cpu_env(cpu);
10105     ARMCPU *arm_cpu = env_archcpu(env);
10106     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
10107     int bound, core_mmu_idx;
10108 
10109     dc->isar = &arm_cpu->isar;
10110     dc->condjmp = 0;
10111     dc->pc_save = dc->base.pc_first;
10112     dc->aarch64 = true;
10113     dc->thumb = false;
10114     dc->sctlr_b = 0;
10115     dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
10116     dc->condexec_mask = 0;
10117     dc->condexec_cond = 0;
10118     core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
10119     dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx);
10120     dc->tbii = EX_TBFLAG_A64(tb_flags, TBII);
10121     dc->tbid = EX_TBFLAG_A64(tb_flags, TBID);
10122     dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA);
10123     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
10124 #if !defined(CONFIG_USER_ONLY)
10125     dc->user = (dc->current_el == 0);
10126 #endif
10127     dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
10128     dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
10129     dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
10130     dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE);
10131     dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC);
10132     dc->trap_eret = EX_TBFLAG_A64(tb_flags, TRAP_ERET);
10133     dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL);
10134     dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL);
10135     dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16;
10136     dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16;
10137     dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE);
10138     dc->bt = EX_TBFLAG_A64(tb_flags, BT);
10139     dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE);
10140     dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV);
10141     dc->ata[0] = EX_TBFLAG_A64(tb_flags, ATA);
10142     dc->ata[1] = EX_TBFLAG_A64(tb_flags, ATA0);
10143     dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE);
10144     dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE);
10145     dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM);
10146     dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA);
10147     dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING);
10148     dc->naa = EX_TBFLAG_A64(tb_flags, NAA);
10149     dc->nv = EX_TBFLAG_A64(tb_flags, NV);
10150     dc->nv1 = EX_TBFLAG_A64(tb_flags, NV1);
10151     dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2);
10152     dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20);
10153     dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE);
10154     dc->fpcr_ah = EX_TBFLAG_A64(tb_flags, AH);
10155     dc->fpcr_nep = EX_TBFLAG_A64(tb_flags, NEP);
10156     dc->vec_len = 0;
10157     dc->vec_stride = 0;
10158     dc->cp_regs = arm_cpu->cp_regs;
10159     dc->features = env->features;
10160     dc->dcz_blocksize = arm_cpu->dcz_blocksize;
10161     dc->gm_blocksize = arm_cpu->gm_blocksize;
10162 
10163 #ifdef CONFIG_USER_ONLY
10164     /* In sve_probe_page, we assume TBI is enabled. */
10165     tcg_debug_assert(dc->tbid & 1);
10166 #endif
10167 
10168     dc->lse2 = dc_isar_feature(aa64_lse2, dc);
10169 
10170     /* Single step state. The code-generation logic here is:
10171      *  SS_ACTIVE == 0:
10172      *   generate code with no special handling for single-stepping (except
10173      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
10174      *   this happens anyway because those changes are all system register or
10175      *   PSTATE writes).
10176      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
10177      *   emit code for one insn
10178      *   emit code to clear PSTATE.SS
10179      *   emit code to generate software step exception for completed step
10180      *   end TB (as usual for having generated an exception)
10181      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
10182      *   emit code to generate a software step exception
10183      *   end the TB
10184      */
10185     dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
10186     dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
10187     dc->is_ldex = false;
10188 
10189     /* Bound the number of insns to execute to those left on the page.  */
10190     bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
10191 
10192     /* If architectural single step active, limit to 1.  */
10193     if (dc->ss_active) {
10194         bound = 1;
10195     }
10196     dc->base.max_insns = MIN(dc->base.max_insns, bound);
10197 }
10198 
10199 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
10200 {
10201 }
10202 
10203 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
10204 {
10205     DisasContext *dc = container_of(dcbase, DisasContext, base);
10206     target_ulong pc_arg = dc->base.pc_next;
10207 
10208     if (tb_cflags(dcbase->tb) & CF_PCREL) {
10209         pc_arg &= ~TARGET_PAGE_MASK;
10210     }
10211     tcg_gen_insn_start(pc_arg, 0, 0);
10212     dc->insn_start_updated = false;
10213 }
10214 
10215 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
10216 {
10217     DisasContext *s = container_of(dcbase, DisasContext, base);
10218     CPUARMState *env = cpu_env(cpu);
10219     uint64_t pc = s->base.pc_next;
10220     uint32_t insn;
10221 
10222     /* Singlestep exceptions have the highest priority. */
10223     if (s->ss_active && !s->pstate_ss) {
10224         /* Singlestep state is Active-pending.
10225          * If we're in this state at the start of a TB then either
10226          *  a) we just took an exception to an EL which is being debugged
10227          *     and this is the first insn in the exception handler
10228          *  b) debug exceptions were masked and we just unmasked them
10229          *     without changing EL (eg by clearing PSTATE.D)
10230          * In either case we're going to take a swstep exception in the
10231          * "did not step an insn" case, and so the syndrome ISV and EX
10232          * bits should be zero.
10233          */
10234         assert(s->base.num_insns == 1);
10235         gen_swstep_exception(s, 0, 0);
10236         s->base.is_jmp = DISAS_NORETURN;
10237         s->base.pc_next = pc + 4;
10238         return;
10239     }
10240 
10241     if (pc & 3) {
10242         /*
10243          * PC alignment fault.  This has priority over the instruction abort
10244          * that we would receive from a translation fault via arm_ldl_code.
10245          * This should only be possible after an indirect branch, at the
10246          * start of the TB.
10247          */
10248         assert(s->base.num_insns == 1);
10249         gen_helper_exception_pc_alignment(tcg_env, tcg_constant_tl(pc));
10250         s->base.is_jmp = DISAS_NORETURN;
10251         s->base.pc_next = QEMU_ALIGN_UP(pc, 4);
10252         return;
10253     }
10254 
10255     s->pc_curr = pc;
10256     insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b);
10257     s->insn = insn;
10258     s->base.pc_next = pc + 4;
10259 
10260     s->fp_access_checked = false;
10261     s->sve_access_checked = false;
10262 
10263     if (s->pstate_il) {
10264         /*
10265          * Illegal execution state. This has priority over BTI
10266          * exceptions, but comes after instruction abort exceptions.
10267          */
10268         gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
10269         return;
10270     }
10271 
10272     if (dc_isar_feature(aa64_bti, s)) {
10273         if (s->base.num_insns == 1) {
10274             /* First insn can have btype set to non-zero.  */
10275             tcg_debug_assert(s->btype >= 0);
10276 
10277             /*
10278              * Note that the Branch Target Exception has fairly high
10279              * priority -- below debugging exceptions but above most
10280              * everything else.  This allows us to handle this now
10281              * instead of waiting until the insn is otherwise decoded.
10282              *
10283              * We can check all but the guarded page check here;
10284              * defer the latter to a helper.
10285              */
10286             if (s->btype != 0
10287                 && !btype_destination_ok(insn, s->bt, s->btype)) {
10288                 gen_helper_guarded_page_check(tcg_env);
10289             }
10290         } else {
10291             /* Not the first insn: btype must be 0.  */
10292             tcg_debug_assert(s->btype == 0);
10293         }
10294     }
10295 
10296     s->is_nonstreaming = false;
10297     if (s->sme_trap_nonstreaming) {
10298         disas_sme_fa64(s, insn);
10299     }
10300 
10301     if (!disas_a64(s, insn) &&
10302         !disas_sme(s, insn) &&
10303         !disas_sve(s, insn)) {
10304         unallocated_encoding(s);
10305     }
10306 
10307     /*
10308      * After execution of most insns, btype is reset to 0.
10309      * Note that we set btype == -1 when the insn sets btype.
10310      */
10311     if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) {
10312         reset_btype(s);
10313     }
10314 }
10315 
10316 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
10317 {
10318     DisasContext *dc = container_of(dcbase, DisasContext, base);
10319 
10320     if (unlikely(dc->ss_active)) {
10321         /* Note that this means single stepping WFI doesn't halt the CPU.
10322          * For conditional branch insns this is harmless unreachable code as
10323          * gen_goto_tb() has already handled emitting the debug exception
10324          * (and thus a tb-jump is not possible when singlestepping).
10325          */
10326         switch (dc->base.is_jmp) {
10327         default:
10328             gen_a64_update_pc(dc, 4);
10329             /* fall through */
10330         case DISAS_EXIT:
10331         case DISAS_JUMP:
10332             gen_step_complete_exception(dc);
10333             break;
10334         case DISAS_NORETURN:
10335             break;
10336         }
10337     } else {
10338         switch (dc->base.is_jmp) {
10339         case DISAS_NEXT:
10340         case DISAS_TOO_MANY:
10341             gen_goto_tb(dc, 1, 4);
10342             break;
10343         default:
10344         case DISAS_UPDATE_EXIT:
10345             gen_a64_update_pc(dc, 4);
10346             /* fall through */
10347         case DISAS_EXIT:
10348             tcg_gen_exit_tb(NULL, 0);
10349             break;
10350         case DISAS_UPDATE_NOCHAIN:
10351             gen_a64_update_pc(dc, 4);
10352             /* fall through */
10353         case DISAS_JUMP:
10354             tcg_gen_lookup_and_goto_ptr();
10355             break;
10356         case DISAS_NORETURN:
10357         case DISAS_SWI:
10358             break;
10359         case DISAS_WFE:
10360             gen_a64_update_pc(dc, 4);
10361             gen_helper_wfe(tcg_env);
10362             break;
10363         case DISAS_YIELD:
10364             gen_a64_update_pc(dc, 4);
10365             gen_helper_yield(tcg_env);
10366             break;
10367         case DISAS_WFI:
10368             /*
10369              * This is a special case because we don't want to just halt
10370              * the CPU if trying to debug across a WFI.
10371              */
10372             gen_a64_update_pc(dc, 4);
10373             gen_helper_wfi(tcg_env, tcg_constant_i32(4));
10374             /*
10375              * The helper doesn't necessarily throw an exception, but we
10376              * must go back to the main loop to check for interrupts anyway.
10377              */
10378             tcg_gen_exit_tb(NULL, 0);
10379             break;
10380         }
10381     }
10382 }
10383 
10384 const TranslatorOps aarch64_translator_ops = {
10385     .init_disas_context = aarch64_tr_init_disas_context,
10386     .tb_start           = aarch64_tr_tb_start,
10387     .insn_start         = aarch64_tr_insn_start,
10388     .translate_insn     = aarch64_tr_translate_insn,
10389     .tb_stop            = aarch64_tr_tb_stop,
10390 };
10391