xref: /qemu/target/arm/tcg/translate-a64.c (revision 9c2ff9cdc9b33472333e9431cbf4417f5f228883)
1  /*
2   *  AArch64 translation
3   *
4   *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
5   *
6   * This library is free software; you can redistribute it and/or
7   * modify it under the terms of the GNU Lesser General Public
8   * License as published by the Free Software Foundation; either
9   * version 2.1 of the License, or (at your option) any later version.
10   *
11   * This library is distributed in the hope that it will be useful,
12   * but WITHOUT ANY WARRANTY; without even the implied warranty of
13   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14   * Lesser General Public License for more details.
15   *
16   * You should have received a copy of the GNU Lesser General Public
17   * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18   */
19  #include "qemu/osdep.h"
20  #include "exec/exec-all.h"
21  #include "exec/target_page.h"
22  #include "translate.h"
23  #include "translate-a64.h"
24  #include "qemu/log.h"
25  #include "arm_ldst.h"
26  #include "semihosting/semihost.h"
27  #include "cpregs.h"
28  
29  static TCGv_i64 cpu_X[32];
30  static TCGv_i64 cpu_pc;
31  
32  /* Load/store exclusive handling */
33  static TCGv_i64 cpu_exclusive_high;
34  
35  static const char *regnames[] = {
36      "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
37      "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
38      "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
39      "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
40  };
41  
42  enum a64_shift_type {
43      A64_SHIFT_TYPE_LSL = 0,
44      A64_SHIFT_TYPE_LSR = 1,
45      A64_SHIFT_TYPE_ASR = 2,
46      A64_SHIFT_TYPE_ROR = 3
47  };
48  
49  /*
50   * Helpers for extracting complex instruction fields
51   */
52  
53  /*
54   * For load/store with an unsigned 12 bit immediate scaled by the element
55   * size. The input has the immediate field in bits [14:3] and the element
56   * size in [2:0].
57   */
58  static int uimm_scaled(DisasContext *s, int x)
59  {
60      unsigned imm = x >> 3;
61      unsigned scale = extract32(x, 0, 3);
62      return imm << scale;
63  }
64  
65  /* For load/store memory tags: scale offset by LOG2_TAG_GRANULE */
66  static int scale_by_log2_tag_granule(DisasContext *s, int x)
67  {
68      return x << LOG2_TAG_GRANULE;
69  }
70  
71  /*
72   * Include the generated decoders.
73   */
74  
75  #include "decode-sme-fa64.c.inc"
76  #include "decode-a64.c.inc"
77  
78  /* initialize TCG globals.  */
79  void a64_translate_init(void)
80  {
81      int i;
82  
83      cpu_pc = tcg_global_mem_new_i64(tcg_env,
84                                      offsetof(CPUARMState, pc),
85                                      "pc");
86      for (i = 0; i < 32; i++) {
87          cpu_X[i] = tcg_global_mem_new_i64(tcg_env,
88                                            offsetof(CPUARMState, xregs[i]),
89                                            regnames[i]);
90      }
91  
92      cpu_exclusive_high = tcg_global_mem_new_i64(tcg_env,
93          offsetof(CPUARMState, exclusive_high), "exclusive_high");
94  }
95  
96  /*
97   * Return the core mmu_idx to use for A64 load/store insns which
98   * have a "unprivileged load/store" variant. Those insns access
99   * EL0 if executed from an EL which has control over EL0 (usually
100   * EL1) but behave like normal loads and stores if executed from
101   * elsewhere (eg EL3).
102   *
103   * @unpriv : true for the unprivileged encoding; false for the
104   *           normal encoding (in which case we will return the same
105   *           thing as get_mem_index().
106   */
107  static int get_a64_user_mem_index(DisasContext *s, bool unpriv)
108  {
109      /*
110       * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL,
111       * which is the usual mmu_idx for this cpu state.
112       */
113      ARMMMUIdx useridx = s->mmu_idx;
114  
115      if (unpriv && s->unpriv) {
116          /*
117           * We have pre-computed the condition for AccType_UNPRIV.
118           * Therefore we should never get here with a mmu_idx for
119           * which we do not know the corresponding user mmu_idx.
120           */
121          switch (useridx) {
122          case ARMMMUIdx_E10_1:
123          case ARMMMUIdx_E10_1_PAN:
124              useridx = ARMMMUIdx_E10_0;
125              break;
126          case ARMMMUIdx_E20_2:
127          case ARMMMUIdx_E20_2_PAN:
128              useridx = ARMMMUIdx_E20_0;
129              break;
130          default:
131              g_assert_not_reached();
132          }
133      }
134      return arm_to_core_mmu_idx(useridx);
135  }
136  
137  static void set_btype_raw(int val)
138  {
139      tcg_gen_st_i32(tcg_constant_i32(val), tcg_env,
140                     offsetof(CPUARMState, btype));
141  }
142  
143  static void set_btype(DisasContext *s, int val)
144  {
145      /* BTYPE is a 2-bit field, and 0 should be done with reset_btype.  */
146      tcg_debug_assert(val >= 1 && val <= 3);
147      set_btype_raw(val);
148      s->btype = -1;
149  }
150  
151  static void reset_btype(DisasContext *s)
152  {
153      if (s->btype != 0) {
154          set_btype_raw(0);
155          s->btype = 0;
156      }
157  }
158  
159  static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff)
160  {
161      assert(s->pc_save != -1);
162      if (tb_cflags(s->base.tb) & CF_PCREL) {
163          tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff);
164      } else {
165          tcg_gen_movi_i64(dest, s->pc_curr + diff);
166      }
167  }
168  
169  void gen_a64_update_pc(DisasContext *s, target_long diff)
170  {
171      gen_pc_plus_diff(s, cpu_pc, diff);
172      s->pc_save = s->pc_curr + diff;
173  }
174  
175  /*
176   * Handle Top Byte Ignore (TBI) bits.
177   *
178   * If address tagging is enabled via the TCR TBI bits:
179   *  + for EL2 and EL3 there is only one TBI bit, and if it is set
180   *    then the address is zero-extended, clearing bits [63:56]
181   *  + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
182   *    and TBI1 controls addresses with bit 55 == 1.
183   *    If the appropriate TBI bit is set for the address then
184   *    the address is sign-extended from bit 55 into bits [63:56]
185   *
186   * Here We have concatenated TBI{1,0} into tbi.
187   */
188  static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst,
189                                  TCGv_i64 src, int tbi)
190  {
191      if (tbi == 0) {
192          /* Load unmodified address */
193          tcg_gen_mov_i64(dst, src);
194      } else if (!regime_has_2_ranges(s->mmu_idx)) {
195          /* Force tag byte to all zero */
196          tcg_gen_extract_i64(dst, src, 0, 56);
197      } else {
198          /* Sign-extend from bit 55.  */
199          tcg_gen_sextract_i64(dst, src, 0, 56);
200  
201          switch (tbi) {
202          case 1:
203              /* tbi0 but !tbi1: only use the extension if positive */
204              tcg_gen_and_i64(dst, dst, src);
205              break;
206          case 2:
207              /* !tbi0 but tbi1: only use the extension if negative */
208              tcg_gen_or_i64(dst, dst, src);
209              break;
210          case 3:
211              /* tbi0 and tbi1: always use the extension */
212              break;
213          default:
214              g_assert_not_reached();
215          }
216      }
217  }
218  
219  static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
220  {
221      /*
222       * If address tagging is enabled for instructions via the TCR TBI bits,
223       * then loading an address into the PC will clear out any tag.
224       */
225      gen_top_byte_ignore(s, cpu_pc, src, s->tbii);
226      s->pc_save = -1;
227  }
228  
229  /*
230   * Handle MTE and/or TBI.
231   *
232   * For TBI, ideally, we would do nothing.  Proper behaviour on fault is
233   * for the tag to be present in the FAR_ELx register.  But for user-only
234   * mode we do not have a TLB with which to implement this, so we must
235   * remove the top byte now.
236   *
237   * Always return a fresh temporary that we can increment independently
238   * of the write-back address.
239   */
240  
241  TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
242  {
243      TCGv_i64 clean = tcg_temp_new_i64();
244  #ifdef CONFIG_USER_ONLY
245      gen_top_byte_ignore(s, clean, addr, s->tbid);
246  #else
247      tcg_gen_mov_i64(clean, addr);
248  #endif
249      return clean;
250  }
251  
252  /* Insert a zero tag into src, with the result at dst. */
253  static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src)
254  {
255      tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4));
256  }
257  
258  static void gen_probe_access(DisasContext *s, TCGv_i64 ptr,
259                               MMUAccessType acc, int log2_size)
260  {
261      gen_helper_probe_access(tcg_env, ptr,
262                              tcg_constant_i32(acc),
263                              tcg_constant_i32(get_mem_index(s)),
264                              tcg_constant_i32(1 << log2_size));
265  }
266  
267  /*
268   * For MTE, check a single logical or atomic access.  This probes a single
269   * address, the exact one specified.  The size and alignment of the access
270   * is not relevant to MTE, per se, but watchpoints do require the size,
271   * and we want to recognize those before making any other changes to state.
272   */
273  static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr,
274                                        bool is_write, bool tag_checked,
275                                        MemOp memop, bool is_unpriv,
276                                        int core_idx)
277  {
278      if (tag_checked && s->mte_active[is_unpriv]) {
279          TCGv_i64 ret;
280          int desc = 0;
281  
282          desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx);
283          desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
284          desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
285          desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
286          desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(memop));
287          desc = FIELD_DP32(desc, MTEDESC, SIZEM1, memop_size(memop) - 1);
288  
289          ret = tcg_temp_new_i64();
290          gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr);
291  
292          return ret;
293      }
294      return clean_data_tbi(s, addr);
295  }
296  
297  TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
298                          bool tag_checked, MemOp memop)
299  {
300      return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, memop,
301                                   false, get_mem_index(s));
302  }
303  
304  /*
305   * For MTE, check multiple logical sequential accesses.
306   */
307  TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
308                          bool tag_checked, int total_size, MemOp single_mop)
309  {
310      if (tag_checked && s->mte_active[0]) {
311          TCGv_i64 ret;
312          int desc = 0;
313  
314          desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
315          desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
316          desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
317          desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
318          desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(single_mop));
319          desc = FIELD_DP32(desc, MTEDESC, SIZEM1, total_size - 1);
320  
321          ret = tcg_temp_new_i64();
322          gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr);
323  
324          return ret;
325      }
326      return clean_data_tbi(s, addr);
327  }
328  
329  /*
330   * Generate the special alignment check that applies to AccType_ATOMIC
331   * and AccType_ORDERED insns under FEAT_LSE2: the access need not be
332   * naturally aligned, but it must not cross a 16-byte boundary.
333   * See AArch64.CheckAlignment().
334   */
335  static void check_lse2_align(DisasContext *s, int rn, int imm,
336                               bool is_write, MemOp mop)
337  {
338      TCGv_i32 tmp;
339      TCGv_i64 addr;
340      TCGLabel *over_label;
341      MMUAccessType type;
342      int mmu_idx;
343  
344      tmp = tcg_temp_new_i32();
345      tcg_gen_extrl_i64_i32(tmp, cpu_reg_sp(s, rn));
346      tcg_gen_addi_i32(tmp, tmp, imm & 15);
347      tcg_gen_andi_i32(tmp, tmp, 15);
348      tcg_gen_addi_i32(tmp, tmp, memop_size(mop));
349  
350      over_label = gen_new_label();
351      tcg_gen_brcondi_i32(TCG_COND_LEU, tmp, 16, over_label);
352  
353      addr = tcg_temp_new_i64();
354      tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm);
355  
356      type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD,
357      mmu_idx = get_mem_index(s);
358      gen_helper_unaligned_access(tcg_env, addr, tcg_constant_i32(type),
359                                  tcg_constant_i32(mmu_idx));
360  
361      gen_set_label(over_label);
362  
363  }
364  
365  /* Handle the alignment check for AccType_ATOMIC instructions. */
366  static MemOp check_atomic_align(DisasContext *s, int rn, MemOp mop)
367  {
368      MemOp size = mop & MO_SIZE;
369  
370      if (size == MO_8) {
371          return mop;
372      }
373  
374      /*
375       * If size == MO_128, this is a LDXP, and the operation is single-copy
376       * atomic for each doubleword, not the entire quadword; it still must
377       * be quadword aligned.
378       */
379      if (size == MO_128) {
380          return finalize_memop_atom(s, MO_128 | MO_ALIGN,
381                                     MO_ATOM_IFALIGN_PAIR);
382      }
383      if (dc_isar_feature(aa64_lse2, s)) {
384          check_lse2_align(s, rn, 0, true, mop);
385      } else {
386          mop |= MO_ALIGN;
387      }
388      return finalize_memop(s, mop);
389  }
390  
391  /* Handle the alignment check for AccType_ORDERED instructions. */
392  static MemOp check_ordered_align(DisasContext *s, int rn, int imm,
393                                   bool is_write, MemOp mop)
394  {
395      MemOp size = mop & MO_SIZE;
396  
397      if (size == MO_8) {
398          return mop;
399      }
400      if (size == MO_128) {
401          return finalize_memop_atom(s, MO_128 | MO_ALIGN,
402                                     MO_ATOM_IFALIGN_PAIR);
403      }
404      if (!dc_isar_feature(aa64_lse2, s)) {
405          mop |= MO_ALIGN;
406      } else if (!s->naa) {
407          check_lse2_align(s, rn, imm, is_write, mop);
408      }
409      return finalize_memop(s, mop);
410  }
411  
412  typedef struct DisasCompare64 {
413      TCGCond cond;
414      TCGv_i64 value;
415  } DisasCompare64;
416  
417  static void a64_test_cc(DisasCompare64 *c64, int cc)
418  {
419      DisasCompare c32;
420  
421      arm_test_cc(&c32, cc);
422  
423      /*
424       * Sign-extend the 32-bit value so that the GE/LT comparisons work
425       * properly.  The NE/EQ comparisons are also fine with this choice.
426        */
427      c64->cond = c32.cond;
428      c64->value = tcg_temp_new_i64();
429      tcg_gen_ext_i32_i64(c64->value, c32.value);
430  }
431  
432  static void gen_rebuild_hflags(DisasContext *s)
433  {
434      gen_helper_rebuild_hflags_a64(tcg_env, tcg_constant_i32(s->current_el));
435  }
436  
437  static void gen_exception_internal(int excp)
438  {
439      assert(excp_is_internal(excp));
440      gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp));
441  }
442  
443  static void gen_exception_internal_insn(DisasContext *s, int excp)
444  {
445      gen_a64_update_pc(s, 0);
446      gen_exception_internal(excp);
447      s->base.is_jmp = DISAS_NORETURN;
448  }
449  
450  static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome)
451  {
452      gen_a64_update_pc(s, 0);
453      gen_helper_exception_bkpt_insn(tcg_env, tcg_constant_i32(syndrome));
454      s->base.is_jmp = DISAS_NORETURN;
455  }
456  
457  static void gen_step_complete_exception(DisasContext *s)
458  {
459      /* We just completed step of an insn. Move from Active-not-pending
460       * to Active-pending, and then also take the swstep exception.
461       * This corresponds to making the (IMPDEF) choice to prioritize
462       * swstep exceptions over asynchronous exceptions taken to an exception
463       * level where debug is disabled. This choice has the advantage that
464       * we do not need to maintain internal state corresponding to the
465       * ISV/EX syndrome bits between completion of the step and generation
466       * of the exception, and our syndrome information is always correct.
467       */
468      gen_ss_advance(s);
469      gen_swstep_exception(s, 1, s->is_ldex);
470      s->base.is_jmp = DISAS_NORETURN;
471  }
472  
473  static inline bool use_goto_tb(DisasContext *s, uint64_t dest)
474  {
475      if (s->ss_active) {
476          return false;
477      }
478      return translator_use_goto_tb(&s->base, dest);
479  }
480  
481  static void gen_goto_tb(DisasContext *s, int n, int64_t diff)
482  {
483      if (use_goto_tb(s, s->pc_curr + diff)) {
484          /*
485           * For pcrel, the pc must always be up-to-date on entry to
486           * the linked TB, so that it can use simple additions for all
487           * further adjustments.  For !pcrel, the linked TB is compiled
488           * to know its full virtual address, so we can delay the
489           * update to pc to the unlinked path.  A long chain of links
490           * can thus avoid many updates to the PC.
491           */
492          if (tb_cflags(s->base.tb) & CF_PCREL) {
493              gen_a64_update_pc(s, diff);
494              tcg_gen_goto_tb(n);
495          } else {
496              tcg_gen_goto_tb(n);
497              gen_a64_update_pc(s, diff);
498          }
499          tcg_gen_exit_tb(s->base.tb, n);
500          s->base.is_jmp = DISAS_NORETURN;
501      } else {
502          gen_a64_update_pc(s, diff);
503          if (s->ss_active) {
504              gen_step_complete_exception(s);
505          } else {
506              tcg_gen_lookup_and_goto_ptr();
507              s->base.is_jmp = DISAS_NORETURN;
508          }
509      }
510  }
511  
512  /*
513   * Register access functions
514   *
515   * These functions are used for directly accessing a register in where
516   * changes to the final register value are likely to be made. If you
517   * need to use a register for temporary calculation (e.g. index type
518   * operations) use the read_* form.
519   *
520   * B1.2.1 Register mappings
521   *
522   * In instruction register encoding 31 can refer to ZR (zero register) or
523   * the SP (stack pointer) depending on context. In QEMU's case we map SP
524   * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
525   * This is the point of the _sp forms.
526   */
527  TCGv_i64 cpu_reg(DisasContext *s, int reg)
528  {
529      if (reg == 31) {
530          TCGv_i64 t = tcg_temp_new_i64();
531          tcg_gen_movi_i64(t, 0);
532          return t;
533      } else {
534          return cpu_X[reg];
535      }
536  }
537  
538  /* register access for when 31 == SP */
539  TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
540  {
541      return cpu_X[reg];
542  }
543  
544  /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
545   * representing the register contents. This TCGv is an auto-freed
546   * temporary so it need not be explicitly freed, and may be modified.
547   */
548  TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
549  {
550      TCGv_i64 v = tcg_temp_new_i64();
551      if (reg != 31) {
552          if (sf) {
553              tcg_gen_mov_i64(v, cpu_X[reg]);
554          } else {
555              tcg_gen_ext32u_i64(v, cpu_X[reg]);
556          }
557      } else {
558          tcg_gen_movi_i64(v, 0);
559      }
560      return v;
561  }
562  
563  TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
564  {
565      TCGv_i64 v = tcg_temp_new_i64();
566      if (sf) {
567          tcg_gen_mov_i64(v, cpu_X[reg]);
568      } else {
569          tcg_gen_ext32u_i64(v, cpu_X[reg]);
570      }
571      return v;
572  }
573  
574  /* Return the offset into CPUARMState of a slice (from
575   * the least significant end) of FP register Qn (ie
576   * Dn, Sn, Hn or Bn).
577   * (Note that this is not the same mapping as for A32; see cpu.h)
578   */
579  static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size)
580  {
581      return vec_reg_offset(s, regno, 0, size);
582  }
583  
584  /* Offset of the high half of the 128 bit vector Qn */
585  static inline int fp_reg_hi_offset(DisasContext *s, int regno)
586  {
587      return vec_reg_offset(s, regno, 1, MO_64);
588  }
589  
590  /* Convenience accessors for reading and writing single and double
591   * FP registers. Writing clears the upper parts of the associated
592   * 128 bit vector register, as required by the architecture.
593   * Note that unlike the GP register accessors, the values returned
594   * by the read functions must be manually freed.
595   */
596  static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
597  {
598      TCGv_i64 v = tcg_temp_new_i64();
599  
600      tcg_gen_ld_i64(v, tcg_env, fp_reg_offset(s, reg, MO_64));
601      return v;
602  }
603  
604  static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
605  {
606      TCGv_i32 v = tcg_temp_new_i32();
607  
608      tcg_gen_ld_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32));
609      return v;
610  }
611  
612  static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
613  {
614      TCGv_i32 v = tcg_temp_new_i32();
615  
616      tcg_gen_ld16u_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16));
617      return v;
618  }
619  
620  static void clear_vec(DisasContext *s, int rd)
621  {
622      unsigned ofs = fp_reg_offset(s, rd, MO_64);
623      unsigned vsz = vec_full_reg_size(s);
624  
625      tcg_gen_gvec_dup_imm(MO_64, ofs, vsz, vsz, 0);
626  }
627  
628  /*
629   * Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
630   * If SVE is not enabled, then there are only 128 bits in the vector.
631   */
632  static void clear_vec_high(DisasContext *s, bool is_q, int rd)
633  {
634      unsigned ofs = fp_reg_offset(s, rd, MO_64);
635      unsigned vsz = vec_full_reg_size(s);
636  
637      /* Nop move, with side effect of clearing the tail. */
638      tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz);
639  }
640  
641  void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
642  {
643      unsigned ofs = fp_reg_offset(s, reg, MO_64);
644  
645      tcg_gen_st_i64(v, tcg_env, ofs);
646      clear_vec_high(s, false, reg);
647  }
648  
649  static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
650  {
651      TCGv_i64 tmp = tcg_temp_new_i64();
652  
653      tcg_gen_extu_i32_i64(tmp, v);
654      write_fp_dreg(s, reg, tmp);
655  }
656  
657  /*
658   * Write a double result to 128 bit vector register reg, honouring FPCR.NEP:
659   * - if FPCR.NEP == 0, clear the high elements of reg
660   * - if FPCR.NEP == 1, set the high elements of reg from mergereg
661   *   (i.e. merge the result with those high elements)
662   * In either case, SVE register bits above 128 are zeroed (per R_WKYLB).
663   */
664  static void write_fp_dreg_merging(DisasContext *s, int reg, int mergereg,
665                                    TCGv_i64 v)
666  {
667      if (!s->fpcr_nep) {
668          write_fp_dreg(s, reg, v);
669          return;
670      }
671  
672      /*
673       * Move from mergereg to reg; this sets the high elements and
674       * clears the bits above 128 as a side effect.
675       */
676      tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg),
677                       vec_full_reg_offset(s, mergereg),
678                       16, vec_full_reg_size(s));
679      tcg_gen_st_i64(v, tcg_env, vec_full_reg_offset(s, reg));
680  }
681  
682  /*
683   * Write a single-prec result, but only clear the higher elements
684   * of the destination register if FPCR.NEP is 0; otherwise preserve them.
685   */
686  static void write_fp_sreg_merging(DisasContext *s, int reg, int mergereg,
687                                    TCGv_i32 v)
688  {
689      if (!s->fpcr_nep) {
690          write_fp_sreg(s, reg, v);
691          return;
692      }
693  
694      tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg),
695                       vec_full_reg_offset(s, mergereg),
696                       16, vec_full_reg_size(s));
697      tcg_gen_st_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32));
698  }
699  
700  /*
701   * Write a half-prec result, but only clear the higher elements
702   * of the destination register if FPCR.NEP is 0; otherwise preserve them.
703   * The caller must ensure that the top 16 bits of v are zero.
704   */
705  static void write_fp_hreg_merging(DisasContext *s, int reg, int mergereg,
706                                    TCGv_i32 v)
707  {
708      if (!s->fpcr_nep) {
709          write_fp_sreg(s, reg, v);
710          return;
711      }
712  
713      tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg),
714                       vec_full_reg_offset(s, mergereg),
715                       16, vec_full_reg_size(s));
716      tcg_gen_st16_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16));
717  }
718  
719  /* Expand a 2-operand AdvSIMD vector operation using an expander function.  */
720  static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
721                           GVecGen2Fn *gvec_fn, int vece)
722  {
723      gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
724              is_q ? 16 : 8, vec_full_reg_size(s));
725  }
726  
727  /* Expand a 2-operand + immediate AdvSIMD vector operation using
728   * an expander function.
729   */
730  static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn,
731                            int64_t imm, GVecGen2iFn *gvec_fn, int vece)
732  {
733      gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
734              imm, is_q ? 16 : 8, vec_full_reg_size(s));
735  }
736  
737  /* Expand a 3-operand AdvSIMD vector operation using an expander function.  */
738  static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
739                           GVecGen3Fn *gvec_fn, int vece)
740  {
741      gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
742              vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
743  }
744  
745  /* Expand a 4-operand AdvSIMD vector operation using an expander function.  */
746  static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
747                           int rx, GVecGen4Fn *gvec_fn, int vece)
748  {
749      gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
750              vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx),
751              is_q ? 16 : 8, vec_full_reg_size(s));
752  }
753  
754  /* Expand a 2-operand operation using an out-of-line helper.  */
755  static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd,
756                               int rn, int data, gen_helper_gvec_2 *fn)
757  {
758      tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
759                         vec_full_reg_offset(s, rn),
760                         is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
761  }
762  
763  /* Expand a 3-operand operation using an out-of-line helper.  */
764  static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
765                               int rn, int rm, int data, gen_helper_gvec_3 *fn)
766  {
767      tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
768                         vec_full_reg_offset(s, rn),
769                         vec_full_reg_offset(s, rm),
770                         is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
771  }
772  
773  /* Expand a 3-operand + fpstatus pointer + simd data value operation using
774   * an out-of-line helper.
775   */
776  static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
777                                int rm, ARMFPStatusFlavour fpsttype, int data,
778                                gen_helper_gvec_3_ptr *fn)
779  {
780      TCGv_ptr fpst = fpstatus_ptr(fpsttype);
781      tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
782                         vec_full_reg_offset(s, rn),
783                         vec_full_reg_offset(s, rm), fpst,
784                         is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
785  }
786  
787  /* Expand a 4-operand operation using an out-of-line helper.  */
788  static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn,
789                               int rm, int ra, int data, gen_helper_gvec_4 *fn)
790  {
791      tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
792                         vec_full_reg_offset(s, rn),
793                         vec_full_reg_offset(s, rm),
794                         vec_full_reg_offset(s, ra),
795                         is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
796  }
797  
798  /*
799   * Expand a 4-operand operation using an out-of-line helper that takes
800   * a pointer to the CPU env.
801   */
802  static void gen_gvec_op4_env(DisasContext *s, bool is_q, int rd, int rn,
803                               int rm, int ra, int data,
804                               gen_helper_gvec_4_ptr *fn)
805  {
806      tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
807                         vec_full_reg_offset(s, rn),
808                         vec_full_reg_offset(s, rm),
809                         vec_full_reg_offset(s, ra),
810                         tcg_env,
811                         is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
812  }
813  
814  /*
815   * Expand a 4-operand + fpstatus pointer + simd data value operation using
816   * an out-of-line helper.
817   */
818  static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn,
819                                int rm, int ra, ARMFPStatusFlavour fpsttype,
820                                int data,
821                                gen_helper_gvec_4_ptr *fn)
822  {
823      TCGv_ptr fpst = fpstatus_ptr(fpsttype);
824      tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
825                         vec_full_reg_offset(s, rn),
826                         vec_full_reg_offset(s, rm),
827                         vec_full_reg_offset(s, ra), fpst,
828                         is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
829  }
830  
831  /*
832   * When FPCR.AH == 1, NEG and ABS do not flip the sign bit of a NaN.
833   * These functions implement
834   *   d = floatN_is_any_nan(s) ? s : floatN_chs(s)
835   * which for float32 is
836   *   d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s ^ (1 << 31))
837   * and similarly for the other float sizes.
838   */
839  static void gen_vfp_ah_negh(TCGv_i32 d, TCGv_i32 s)
840  {
841      TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32();
842  
843      gen_vfp_negh(chs_s, s);
844      gen_vfp_absh(abs_s, s);
845      tcg_gen_movcond_i32(TCG_COND_GTU, d,
846                          abs_s, tcg_constant_i32(0x7c00),
847                          s, chs_s);
848  }
849  
850  static void gen_vfp_ah_negs(TCGv_i32 d, TCGv_i32 s)
851  {
852      TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32();
853  
854      gen_vfp_negs(chs_s, s);
855      gen_vfp_abss(abs_s, s);
856      tcg_gen_movcond_i32(TCG_COND_GTU, d,
857                          abs_s, tcg_constant_i32(0x7f800000UL),
858                          s, chs_s);
859  }
860  
861  static void gen_vfp_ah_negd(TCGv_i64 d, TCGv_i64 s)
862  {
863      TCGv_i64 abs_s = tcg_temp_new_i64(), chs_s = tcg_temp_new_i64();
864  
865      gen_vfp_negd(chs_s, s);
866      gen_vfp_absd(abs_s, s);
867      tcg_gen_movcond_i64(TCG_COND_GTU, d,
868                          abs_s, tcg_constant_i64(0x7ff0000000000000ULL),
869                          s, chs_s);
870  }
871  
872  /*
873   * These functions implement
874   *  d = floatN_is_any_nan(s) ? s : floatN_abs(s)
875   * which for float32 is
876   *  d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s & ~(1 << 31))
877   * and similarly for the other float sizes.
878   */
879  static void gen_vfp_ah_absh(TCGv_i32 d, TCGv_i32 s)
880  {
881      TCGv_i32 abs_s = tcg_temp_new_i32();
882  
883      gen_vfp_absh(abs_s, s);
884      tcg_gen_movcond_i32(TCG_COND_GTU, d,
885                          abs_s, tcg_constant_i32(0x7c00),
886                          s, abs_s);
887  }
888  
889  static void gen_vfp_ah_abss(TCGv_i32 d, TCGv_i32 s)
890  {
891      TCGv_i32 abs_s = tcg_temp_new_i32();
892  
893      gen_vfp_abss(abs_s, s);
894      tcg_gen_movcond_i32(TCG_COND_GTU, d,
895                          abs_s, tcg_constant_i32(0x7f800000UL),
896                          s, abs_s);
897  }
898  
899  static void gen_vfp_ah_absd(TCGv_i64 d, TCGv_i64 s)
900  {
901      TCGv_i64 abs_s = tcg_temp_new_i64();
902  
903      gen_vfp_absd(abs_s, s);
904      tcg_gen_movcond_i64(TCG_COND_GTU, d,
905                          abs_s, tcg_constant_i64(0x7ff0000000000000ULL),
906                          s, abs_s);
907  }
908  
909  static void gen_vfp_maybe_ah_negh(DisasContext *dc, TCGv_i32 d, TCGv_i32 s)
910  {
911      if (dc->fpcr_ah) {
912          gen_vfp_ah_negh(d, s);
913      } else {
914          gen_vfp_negh(d, s);
915      }
916  }
917  
918  static void gen_vfp_maybe_ah_negs(DisasContext *dc, TCGv_i32 d, TCGv_i32 s)
919  {
920      if (dc->fpcr_ah) {
921          gen_vfp_ah_negs(d, s);
922      } else {
923          gen_vfp_negs(d, s);
924      }
925  }
926  
927  static void gen_vfp_maybe_ah_negd(DisasContext *dc, TCGv_i64 d, TCGv_i64 s)
928  {
929      if (dc->fpcr_ah) {
930          gen_vfp_ah_negd(d, s);
931      } else {
932          gen_vfp_negd(d, s);
933      }
934  }
935  
936  /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
937   * than the 32 bit equivalent.
938   */
939  static inline void gen_set_NZ64(TCGv_i64 result)
940  {
941      tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
942      tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
943  }
944  
945  /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
946  static inline void gen_logic_CC(int sf, TCGv_i64 result)
947  {
948      if (sf) {
949          gen_set_NZ64(result);
950      } else {
951          tcg_gen_extrl_i64_i32(cpu_ZF, result);
952          tcg_gen_mov_i32(cpu_NF, cpu_ZF);
953      }
954      tcg_gen_movi_i32(cpu_CF, 0);
955      tcg_gen_movi_i32(cpu_VF, 0);
956  }
957  
958  /* dest = T0 + T1; compute C, N, V and Z flags */
959  static void gen_add64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
960  {
961      TCGv_i64 result, flag, tmp;
962      result = tcg_temp_new_i64();
963      flag = tcg_temp_new_i64();
964      tmp = tcg_temp_new_i64();
965  
966      tcg_gen_movi_i64(tmp, 0);
967      tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
968  
969      tcg_gen_extrl_i64_i32(cpu_CF, flag);
970  
971      gen_set_NZ64(result);
972  
973      tcg_gen_xor_i64(flag, result, t0);
974      tcg_gen_xor_i64(tmp, t0, t1);
975      tcg_gen_andc_i64(flag, flag, tmp);
976      tcg_gen_extrh_i64_i32(cpu_VF, flag);
977  
978      tcg_gen_mov_i64(dest, result);
979  }
980  
981  static void gen_add32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
982  {
983      TCGv_i32 t0_32 = tcg_temp_new_i32();
984      TCGv_i32 t1_32 = tcg_temp_new_i32();
985      TCGv_i32 tmp = tcg_temp_new_i32();
986  
987      tcg_gen_movi_i32(tmp, 0);
988      tcg_gen_extrl_i64_i32(t0_32, t0);
989      tcg_gen_extrl_i64_i32(t1_32, t1);
990      tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
991      tcg_gen_mov_i32(cpu_ZF, cpu_NF);
992      tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
993      tcg_gen_xor_i32(tmp, t0_32, t1_32);
994      tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
995      tcg_gen_extu_i32_i64(dest, cpu_NF);
996  }
997  
998  static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
999  {
1000      if (sf) {
1001          gen_add64_CC(dest, t0, t1);
1002      } else {
1003          gen_add32_CC(dest, t0, t1);
1004      }
1005  }
1006  
1007  /* dest = T0 - T1; compute C, N, V and Z flags */
1008  static void gen_sub64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
1009  {
1010      /* 64 bit arithmetic */
1011      TCGv_i64 result, flag, tmp;
1012  
1013      result = tcg_temp_new_i64();
1014      flag = tcg_temp_new_i64();
1015      tcg_gen_sub_i64(result, t0, t1);
1016  
1017      gen_set_NZ64(result);
1018  
1019      tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
1020      tcg_gen_extrl_i64_i32(cpu_CF, flag);
1021  
1022      tcg_gen_xor_i64(flag, result, t0);
1023      tmp = tcg_temp_new_i64();
1024      tcg_gen_xor_i64(tmp, t0, t1);
1025      tcg_gen_and_i64(flag, flag, tmp);
1026      tcg_gen_extrh_i64_i32(cpu_VF, flag);
1027      tcg_gen_mov_i64(dest, result);
1028  }
1029  
1030  static void gen_sub32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
1031  {
1032      /* 32 bit arithmetic */
1033      TCGv_i32 t0_32 = tcg_temp_new_i32();
1034      TCGv_i32 t1_32 = tcg_temp_new_i32();
1035      TCGv_i32 tmp;
1036  
1037      tcg_gen_extrl_i64_i32(t0_32, t0);
1038      tcg_gen_extrl_i64_i32(t1_32, t1);
1039      tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
1040      tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1041      tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
1042      tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
1043      tmp = tcg_temp_new_i32();
1044      tcg_gen_xor_i32(tmp, t0_32, t1_32);
1045      tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
1046      tcg_gen_extu_i32_i64(dest, cpu_NF);
1047  }
1048  
1049  static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
1050  {
1051      if (sf) {
1052          gen_sub64_CC(dest, t0, t1);
1053      } else {
1054          gen_sub32_CC(dest, t0, t1);
1055      }
1056  }
1057  
1058  /* dest = T0 + T1 + CF; do not compute flags. */
1059  static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
1060  {
1061      TCGv_i64 flag = tcg_temp_new_i64();
1062      tcg_gen_extu_i32_i64(flag, cpu_CF);
1063      tcg_gen_add_i64(dest, t0, t1);
1064      tcg_gen_add_i64(dest, dest, flag);
1065  
1066      if (!sf) {
1067          tcg_gen_ext32u_i64(dest, dest);
1068      }
1069  }
1070  
1071  /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
1072  static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
1073  {
1074      if (sf) {
1075          TCGv_i64 result = tcg_temp_new_i64();
1076          TCGv_i64 cf_64 = tcg_temp_new_i64();
1077          TCGv_i64 vf_64 = tcg_temp_new_i64();
1078          TCGv_i64 tmp = tcg_temp_new_i64();
1079          TCGv_i64 zero = tcg_constant_i64(0);
1080  
1081          tcg_gen_extu_i32_i64(cf_64, cpu_CF);
1082          tcg_gen_add2_i64(result, cf_64, t0, zero, cf_64, zero);
1083          tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, zero);
1084          tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
1085          gen_set_NZ64(result);
1086  
1087          tcg_gen_xor_i64(vf_64, result, t0);
1088          tcg_gen_xor_i64(tmp, t0, t1);
1089          tcg_gen_andc_i64(vf_64, vf_64, tmp);
1090          tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
1091  
1092          tcg_gen_mov_i64(dest, result);
1093      } else {
1094          TCGv_i32 t0_32 = tcg_temp_new_i32();
1095          TCGv_i32 t1_32 = tcg_temp_new_i32();
1096          TCGv_i32 tmp = tcg_temp_new_i32();
1097          TCGv_i32 zero = tcg_constant_i32(0);
1098  
1099          tcg_gen_extrl_i64_i32(t0_32, t0);
1100          tcg_gen_extrl_i64_i32(t1_32, t1);
1101          tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, zero, cpu_CF, zero);
1102          tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, zero);
1103  
1104          tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1105          tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
1106          tcg_gen_xor_i32(tmp, t0_32, t1_32);
1107          tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
1108          tcg_gen_extu_i32_i64(dest, cpu_NF);
1109      }
1110  }
1111  
1112  /*
1113   * Load/Store generators
1114   */
1115  
1116  /*
1117   * Store from GPR register to memory.
1118   */
1119  static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
1120                               TCGv_i64 tcg_addr, MemOp memop, int memidx,
1121                               bool iss_valid,
1122                               unsigned int iss_srt,
1123                               bool iss_sf, bool iss_ar)
1124  {
1125      tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop);
1126  
1127      if (iss_valid) {
1128          uint32_t syn;
1129  
1130          syn = syn_data_abort_with_iss(0,
1131                                        (memop & MO_SIZE),
1132                                        false,
1133                                        iss_srt,
1134                                        iss_sf,
1135                                        iss_ar,
1136                                        0, 0, 0, 0, 0, false);
1137          disas_set_insn_syndrome(s, syn);
1138      }
1139  }
1140  
1141  static void do_gpr_st(DisasContext *s, TCGv_i64 source,
1142                        TCGv_i64 tcg_addr, MemOp memop,
1143                        bool iss_valid,
1144                        unsigned int iss_srt,
1145                        bool iss_sf, bool iss_ar)
1146  {
1147      do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s),
1148                       iss_valid, iss_srt, iss_sf, iss_ar);
1149  }
1150  
1151  /*
1152   * Load from memory to GPR register
1153   */
1154  static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
1155                               MemOp memop, bool extend, int memidx,
1156                               bool iss_valid, unsigned int iss_srt,
1157                               bool iss_sf, bool iss_ar)
1158  {
1159      tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
1160  
1161      if (extend && (memop & MO_SIGN)) {
1162          g_assert((memop & MO_SIZE) <= MO_32);
1163          tcg_gen_ext32u_i64(dest, dest);
1164      }
1165  
1166      if (iss_valid) {
1167          uint32_t syn;
1168  
1169          syn = syn_data_abort_with_iss(0,
1170                                        (memop & MO_SIZE),
1171                                        (memop & MO_SIGN) != 0,
1172                                        iss_srt,
1173                                        iss_sf,
1174                                        iss_ar,
1175                                        0, 0, 0, 0, 0, false);
1176          disas_set_insn_syndrome(s, syn);
1177      }
1178  }
1179  
1180  static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
1181                        MemOp memop, bool extend,
1182                        bool iss_valid, unsigned int iss_srt,
1183                        bool iss_sf, bool iss_ar)
1184  {
1185      do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s),
1186                       iss_valid, iss_srt, iss_sf, iss_ar);
1187  }
1188  
1189  /*
1190   * Store from FP register to memory
1191   */
1192  static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, MemOp mop)
1193  {
1194      /* This writes the bottom N bits of a 128 bit wide vector to memory */
1195      TCGv_i64 tmplo = tcg_temp_new_i64();
1196  
1197      tcg_gen_ld_i64(tmplo, tcg_env, fp_reg_offset(s, srcidx, MO_64));
1198  
1199      if ((mop & MO_SIZE) < MO_128) {
1200          tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop);
1201      } else {
1202          TCGv_i64 tmphi = tcg_temp_new_i64();
1203          TCGv_i128 t16 = tcg_temp_new_i128();
1204  
1205          tcg_gen_ld_i64(tmphi, tcg_env, fp_reg_hi_offset(s, srcidx));
1206          tcg_gen_concat_i64_i128(t16, tmplo, tmphi);
1207  
1208          tcg_gen_qemu_st_i128(t16, tcg_addr, get_mem_index(s), mop);
1209      }
1210  }
1211  
1212  /*
1213   * Load from memory to FP register
1214   */
1215  static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, MemOp mop)
1216  {
1217      /* This always zero-extends and writes to a full 128 bit wide vector */
1218      TCGv_i64 tmplo = tcg_temp_new_i64();
1219      TCGv_i64 tmphi = NULL;
1220  
1221      if ((mop & MO_SIZE) < MO_128) {
1222          tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop);
1223      } else {
1224          TCGv_i128 t16 = tcg_temp_new_i128();
1225  
1226          tcg_gen_qemu_ld_i128(t16, tcg_addr, get_mem_index(s), mop);
1227  
1228          tmphi = tcg_temp_new_i64();
1229          tcg_gen_extr_i128_i64(tmplo, tmphi, t16);
1230      }
1231  
1232      tcg_gen_st_i64(tmplo, tcg_env, fp_reg_offset(s, destidx, MO_64));
1233  
1234      if (tmphi) {
1235          tcg_gen_st_i64(tmphi, tcg_env, fp_reg_hi_offset(s, destidx));
1236      }
1237      clear_vec_high(s, tmphi != NULL, destidx);
1238  }
1239  
1240  /*
1241   * Vector load/store helpers.
1242   *
1243   * The principal difference between this and a FP load is that we don't
1244   * zero extend as we are filling a partial chunk of the vector register.
1245   * These functions don't support 128 bit loads/stores, which would be
1246   * normal load/store operations.
1247   *
1248   * The _i32 versions are useful when operating on 32 bit quantities
1249   * (eg for floating point single or using Neon helper functions).
1250   */
1251  
1252  /* Get value of an element within a vector register */
1253  static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
1254                               int element, MemOp memop)
1255  {
1256      int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1257      switch ((unsigned)memop) {
1258      case MO_8:
1259          tcg_gen_ld8u_i64(tcg_dest, tcg_env, vect_off);
1260          break;
1261      case MO_16:
1262          tcg_gen_ld16u_i64(tcg_dest, tcg_env, vect_off);
1263          break;
1264      case MO_32:
1265          tcg_gen_ld32u_i64(tcg_dest, tcg_env, vect_off);
1266          break;
1267      case MO_8|MO_SIGN:
1268          tcg_gen_ld8s_i64(tcg_dest, tcg_env, vect_off);
1269          break;
1270      case MO_16|MO_SIGN:
1271          tcg_gen_ld16s_i64(tcg_dest, tcg_env, vect_off);
1272          break;
1273      case MO_32|MO_SIGN:
1274          tcg_gen_ld32s_i64(tcg_dest, tcg_env, vect_off);
1275          break;
1276      case MO_64:
1277      case MO_64|MO_SIGN:
1278          tcg_gen_ld_i64(tcg_dest, tcg_env, vect_off);
1279          break;
1280      default:
1281          g_assert_not_reached();
1282      }
1283  }
1284  
1285  static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
1286                                   int element, MemOp memop)
1287  {
1288      int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1289      switch (memop) {
1290      case MO_8:
1291          tcg_gen_ld8u_i32(tcg_dest, tcg_env, vect_off);
1292          break;
1293      case MO_16:
1294          tcg_gen_ld16u_i32(tcg_dest, tcg_env, vect_off);
1295          break;
1296      case MO_8|MO_SIGN:
1297          tcg_gen_ld8s_i32(tcg_dest, tcg_env, vect_off);
1298          break;
1299      case MO_16|MO_SIGN:
1300          tcg_gen_ld16s_i32(tcg_dest, tcg_env, vect_off);
1301          break;
1302      case MO_32:
1303      case MO_32|MO_SIGN:
1304          tcg_gen_ld_i32(tcg_dest, tcg_env, vect_off);
1305          break;
1306      default:
1307          g_assert_not_reached();
1308      }
1309  }
1310  
1311  /* Set value of an element within a vector register */
1312  static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1313                                int element, MemOp memop)
1314  {
1315      int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1316      switch (memop) {
1317      case MO_8:
1318          tcg_gen_st8_i64(tcg_src, tcg_env, vect_off);
1319          break;
1320      case MO_16:
1321          tcg_gen_st16_i64(tcg_src, tcg_env, vect_off);
1322          break;
1323      case MO_32:
1324          tcg_gen_st32_i64(tcg_src, tcg_env, vect_off);
1325          break;
1326      case MO_64:
1327          tcg_gen_st_i64(tcg_src, tcg_env, vect_off);
1328          break;
1329      default:
1330          g_assert_not_reached();
1331      }
1332  }
1333  
1334  static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1335                                    int destidx, int element, MemOp memop)
1336  {
1337      int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1338      switch (memop) {
1339      case MO_8:
1340          tcg_gen_st8_i32(tcg_src, tcg_env, vect_off);
1341          break;
1342      case MO_16:
1343          tcg_gen_st16_i32(tcg_src, tcg_env, vect_off);
1344          break;
1345      case MO_32:
1346          tcg_gen_st_i32(tcg_src, tcg_env, vect_off);
1347          break;
1348      default:
1349          g_assert_not_reached();
1350      }
1351  }
1352  
1353  /* Store from vector register to memory */
1354  static void do_vec_st(DisasContext *s, int srcidx, int element,
1355                        TCGv_i64 tcg_addr, MemOp mop)
1356  {
1357      TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1358  
1359      read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE);
1360      tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1361  }
1362  
1363  /* Load from memory to vector register */
1364  static void do_vec_ld(DisasContext *s, int destidx, int element,
1365                        TCGv_i64 tcg_addr, MemOp mop)
1366  {
1367      TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1368  
1369      tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1370      write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE);
1371  }
1372  
1373  /* Check that FP/Neon access is enabled. If it is, return
1374   * true. If not, emit code to generate an appropriate exception,
1375   * and return false; the caller should not emit any code for
1376   * the instruction. Note that this check must happen after all
1377   * unallocated-encoding checks (otherwise the syndrome information
1378   * for the resulting exception will be incorrect).
1379   */
1380  static bool fp_access_check_only(DisasContext *s)
1381  {
1382      if (s->fp_excp_el) {
1383          assert(!s->fp_access_checked);
1384          s->fp_access_checked = -1;
1385  
1386          gen_exception_insn_el(s, 0, EXCP_UDEF,
1387                                syn_fp_access_trap(1, 0xe, false, 0),
1388                                s->fp_excp_el);
1389          return false;
1390      }
1391      s->fp_access_checked = 1;
1392      return true;
1393  }
1394  
1395  static bool fp_access_check(DisasContext *s)
1396  {
1397      if (!fp_access_check_only(s)) {
1398          return false;
1399      }
1400      if (s->sme_trap_nonstreaming && s->is_nonstreaming) {
1401          gen_exception_insn(s, 0, EXCP_UDEF,
1402                             syn_smetrap(SME_ET_Streaming, false));
1403          return false;
1404      }
1405      return true;
1406  }
1407  
1408  /*
1409   * Return <0 for non-supported element sizes, with MO_16 controlled by
1410   * FEAT_FP16; return 0 for fp disabled; otherwise return >0 for success.
1411   */
1412  static int fp_access_check_scalar_hsd(DisasContext *s, MemOp esz)
1413  {
1414      switch (esz) {
1415      case MO_64:
1416      case MO_32:
1417          break;
1418      case MO_16:
1419          if (!dc_isar_feature(aa64_fp16, s)) {
1420              return -1;
1421          }
1422          break;
1423      default:
1424          return -1;
1425      }
1426      return fp_access_check(s);
1427  }
1428  
1429  /* Likewise, but vector MO_64 must have two elements. */
1430  static int fp_access_check_vector_hsd(DisasContext *s, bool is_q, MemOp esz)
1431  {
1432      switch (esz) {
1433      case MO_64:
1434          if (!is_q) {
1435              return -1;
1436          }
1437          break;
1438      case MO_32:
1439          break;
1440      case MO_16:
1441          if (!dc_isar_feature(aa64_fp16, s)) {
1442              return -1;
1443          }
1444          break;
1445      default:
1446          return -1;
1447      }
1448      return fp_access_check(s);
1449  }
1450  
1451  /*
1452   * Check that SVE access is enabled.  If it is, return true.
1453   * If not, emit code to generate an appropriate exception and return false.
1454   * This function corresponds to CheckSVEEnabled().
1455   */
1456  bool sve_access_check(DisasContext *s)
1457  {
1458      if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) {
1459          bool ret;
1460  
1461          assert(dc_isar_feature(aa64_sme, s));
1462          ret = sme_sm_enabled_check(s);
1463          s->sve_access_checked = (ret ? 1 : -1);
1464          return ret;
1465      }
1466      if (s->sve_excp_el) {
1467          /* Assert that we only raise one exception per instruction. */
1468          assert(!s->sve_access_checked);
1469          gen_exception_insn_el(s, 0, EXCP_UDEF,
1470                                syn_sve_access_trap(), s->sve_excp_el);
1471          s->sve_access_checked = -1;
1472          return false;
1473      }
1474      s->sve_access_checked = 1;
1475      return fp_access_check(s);
1476  }
1477  
1478  /*
1479   * Check that SME access is enabled, raise an exception if not.
1480   * Note that this function corresponds to CheckSMEAccess and is
1481   * only used directly for cpregs.
1482   */
1483  static bool sme_access_check(DisasContext *s)
1484  {
1485      if (s->sme_excp_el) {
1486          gen_exception_insn_el(s, 0, EXCP_UDEF,
1487                                syn_smetrap(SME_ET_AccessTrap, false),
1488                                s->sme_excp_el);
1489          return false;
1490      }
1491      return true;
1492  }
1493  
1494  /* This function corresponds to CheckSMEEnabled. */
1495  bool sme_enabled_check(DisasContext *s)
1496  {
1497      /*
1498       * Note that unlike sve_excp_el, we have not constrained sme_excp_el
1499       * to be zero when fp_excp_el has priority.  This is because we need
1500       * sme_excp_el by itself for cpregs access checks.
1501       */
1502      if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) {
1503          bool ret = sme_access_check(s);
1504          s->fp_access_checked = (ret ? 1 : -1);
1505          return ret;
1506      }
1507      return fp_access_check_only(s);
1508  }
1509  
1510  /* Common subroutine for CheckSMEAnd*Enabled. */
1511  bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req)
1512  {
1513      if (!sme_enabled_check(s)) {
1514          return false;
1515      }
1516      if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) {
1517          gen_exception_insn(s, 0, EXCP_UDEF,
1518                             syn_smetrap(SME_ET_NotStreaming, false));
1519          return false;
1520      }
1521      if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) {
1522          gen_exception_insn(s, 0, EXCP_UDEF,
1523                             syn_smetrap(SME_ET_InactiveZA, false));
1524          return false;
1525      }
1526      return true;
1527  }
1528  
1529  /*
1530   * Expanders for AdvSIMD translation functions.
1531   */
1532  
1533  static bool do_gvec_op2_ool(DisasContext *s, arg_qrr_e *a, int data,
1534                              gen_helper_gvec_2 *fn)
1535  {
1536      if (!a->q && a->esz == MO_64) {
1537          return false;
1538      }
1539      if (fp_access_check(s)) {
1540          gen_gvec_op2_ool(s, a->q, a->rd, a->rn, data, fn);
1541      }
1542      return true;
1543  }
1544  
1545  static bool do_gvec_op3_ool(DisasContext *s, arg_qrrr_e *a, int data,
1546                              gen_helper_gvec_3 *fn)
1547  {
1548      if (!a->q && a->esz == MO_64) {
1549          return false;
1550      }
1551      if (fp_access_check(s)) {
1552          gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, data, fn);
1553      }
1554      return true;
1555  }
1556  
1557  static bool do_gvec_fn3(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
1558  {
1559      if (!a->q && a->esz == MO_64) {
1560          return false;
1561      }
1562      if (fp_access_check(s)) {
1563          gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz);
1564      }
1565      return true;
1566  }
1567  
1568  static bool do_gvec_fn3_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
1569  {
1570      if (a->esz == MO_64) {
1571          return false;
1572      }
1573      if (fp_access_check(s)) {
1574          gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz);
1575      }
1576      return true;
1577  }
1578  
1579  static bool do_gvec_fn3_no8_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
1580  {
1581      if (a->esz == MO_8) {
1582          return false;
1583      }
1584      return do_gvec_fn3_no64(s, a, fn);
1585  }
1586  
1587  static bool do_gvec_fn4(DisasContext *s, arg_qrrrr_e *a, GVecGen4Fn *fn)
1588  {
1589      if (!a->q && a->esz == MO_64) {
1590          return false;
1591      }
1592      if (fp_access_check(s)) {
1593          gen_gvec_fn4(s, a->q, a->rd, a->rn, a->rm, a->ra, fn, a->esz);
1594      }
1595      return true;
1596  }
1597  
1598  /*
1599   * This utility function is for doing register extension with an
1600   * optional shift. You will likely want to pass a temporary for the
1601   * destination register. See DecodeRegExtend() in the ARM ARM.
1602   */
1603  static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1604                                int option, unsigned int shift)
1605  {
1606      int extsize = extract32(option, 0, 2);
1607      bool is_signed = extract32(option, 2, 1);
1608  
1609      tcg_gen_ext_i64(tcg_out, tcg_in, extsize | (is_signed ? MO_SIGN : 0));
1610      tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1611  }
1612  
1613  static inline void gen_check_sp_alignment(DisasContext *s)
1614  {
1615      /* The AArch64 architecture mandates that (if enabled via PSTATE
1616       * or SCTLR bits) there is a check that SP is 16-aligned on every
1617       * SP-relative load or store (with an exception generated if it is not).
1618       * In line with general QEMU practice regarding misaligned accesses,
1619       * we omit these checks for the sake of guest program performance.
1620       * This function is provided as a hook so we can more easily add these
1621       * checks in future (possibly as a "favour catching guest program bugs
1622       * over speed" user selectable option).
1623       */
1624  }
1625  
1626  /*
1627   * The instruction disassembly implemented here matches
1628   * the instruction encoding classifications in chapter C4
1629   * of the ARM Architecture Reference Manual (DDI0487B_a);
1630   * classification names and decode diagrams here should generally
1631   * match up with those in the manual.
1632   */
1633  
1634  static bool trans_B(DisasContext *s, arg_i *a)
1635  {
1636      reset_btype(s);
1637      gen_goto_tb(s, 0, a->imm);
1638      return true;
1639  }
1640  
1641  static bool trans_BL(DisasContext *s, arg_i *a)
1642  {
1643      gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s));
1644      reset_btype(s);
1645      gen_goto_tb(s, 0, a->imm);
1646      return true;
1647  }
1648  
1649  
1650  static bool trans_CBZ(DisasContext *s, arg_cbz *a)
1651  {
1652      DisasLabel match;
1653      TCGv_i64 tcg_cmp;
1654  
1655      tcg_cmp = read_cpu_reg(s, a->rt, a->sf);
1656      reset_btype(s);
1657  
1658      match = gen_disas_label(s);
1659      tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1660                          tcg_cmp, 0, match.label);
1661      gen_goto_tb(s, 0, 4);
1662      set_disas_label(s, match);
1663      gen_goto_tb(s, 1, a->imm);
1664      return true;
1665  }
1666  
1667  static bool trans_TBZ(DisasContext *s, arg_tbz *a)
1668  {
1669      DisasLabel match;
1670      TCGv_i64 tcg_cmp;
1671  
1672      tcg_cmp = tcg_temp_new_i64();
1673      tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, a->rt), 1ULL << a->bitpos);
1674  
1675      reset_btype(s);
1676  
1677      match = gen_disas_label(s);
1678      tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1679                          tcg_cmp, 0, match.label);
1680      gen_goto_tb(s, 0, 4);
1681      set_disas_label(s, match);
1682      gen_goto_tb(s, 1, a->imm);
1683      return true;
1684  }
1685  
1686  static bool trans_B_cond(DisasContext *s, arg_B_cond *a)
1687  {
1688      /* BC.cond is only present with FEAT_HBC */
1689      if (a->c && !dc_isar_feature(aa64_hbc, s)) {
1690          return false;
1691      }
1692      reset_btype(s);
1693      if (a->cond < 0x0e) {
1694          /* genuinely conditional branches */
1695          DisasLabel match = gen_disas_label(s);
1696          arm_gen_test_cc(a->cond, match.label);
1697          gen_goto_tb(s, 0, 4);
1698          set_disas_label(s, match);
1699          gen_goto_tb(s, 1, a->imm);
1700      } else {
1701          /* 0xe and 0xf are both "always" conditions */
1702          gen_goto_tb(s, 0, a->imm);
1703      }
1704      return true;
1705  }
1706  
1707  static void set_btype_for_br(DisasContext *s, int rn)
1708  {
1709      if (dc_isar_feature(aa64_bti, s)) {
1710          /* BR to {x16,x17} or !guard -> 1, else 3.  */
1711          if (rn == 16 || rn == 17) {
1712              set_btype(s, 1);
1713          } else {
1714              TCGv_i64 pc = tcg_temp_new_i64();
1715              gen_pc_plus_diff(s, pc, 0);
1716              gen_helper_guarded_page_br(tcg_env, pc);
1717              s->btype = -1;
1718          }
1719      }
1720  }
1721  
1722  static void set_btype_for_blr(DisasContext *s)
1723  {
1724      if (dc_isar_feature(aa64_bti, s)) {
1725          /* BLR sets BTYPE to 2, regardless of source guarded page.  */
1726          set_btype(s, 2);
1727      }
1728  }
1729  
1730  static bool trans_BR(DisasContext *s, arg_r *a)
1731  {
1732      set_btype_for_br(s, a->rn);
1733      gen_a64_set_pc(s, cpu_reg(s, a->rn));
1734      s->base.is_jmp = DISAS_JUMP;
1735      return true;
1736  }
1737  
1738  static bool trans_BLR(DisasContext *s, arg_r *a)
1739  {
1740      TCGv_i64 dst = cpu_reg(s, a->rn);
1741      TCGv_i64 lr = cpu_reg(s, 30);
1742      if (dst == lr) {
1743          TCGv_i64 tmp = tcg_temp_new_i64();
1744          tcg_gen_mov_i64(tmp, dst);
1745          dst = tmp;
1746      }
1747      gen_pc_plus_diff(s, lr, curr_insn_len(s));
1748      gen_a64_set_pc(s, dst);
1749      set_btype_for_blr(s);
1750      s->base.is_jmp = DISAS_JUMP;
1751      return true;
1752  }
1753  
1754  static bool trans_RET(DisasContext *s, arg_r *a)
1755  {
1756      gen_a64_set_pc(s, cpu_reg(s, a->rn));
1757      s->base.is_jmp = DISAS_JUMP;
1758      return true;
1759  }
1760  
1761  static TCGv_i64 auth_branch_target(DisasContext *s, TCGv_i64 dst,
1762                                     TCGv_i64 modifier, bool use_key_a)
1763  {
1764      TCGv_i64 truedst;
1765      /*
1766       * Return the branch target for a BRAA/RETA/etc, which is either
1767       * just the destination dst, or that value with the pauth check
1768       * done and the code removed from the high bits.
1769       */
1770      if (!s->pauth_active) {
1771          return dst;
1772      }
1773  
1774      truedst = tcg_temp_new_i64();
1775      if (use_key_a) {
1776          gen_helper_autia_combined(truedst, tcg_env, dst, modifier);
1777      } else {
1778          gen_helper_autib_combined(truedst, tcg_env, dst, modifier);
1779      }
1780      return truedst;
1781  }
1782  
1783  static bool trans_BRAZ(DisasContext *s, arg_braz *a)
1784  {
1785      TCGv_i64 dst;
1786  
1787      if (!dc_isar_feature(aa64_pauth, s)) {
1788          return false;
1789      }
1790  
1791      dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1792      set_btype_for_br(s, a->rn);
1793      gen_a64_set_pc(s, dst);
1794      s->base.is_jmp = DISAS_JUMP;
1795      return true;
1796  }
1797  
1798  static bool trans_BLRAZ(DisasContext *s, arg_braz *a)
1799  {
1800      TCGv_i64 dst, lr;
1801  
1802      if (!dc_isar_feature(aa64_pauth, s)) {
1803          return false;
1804      }
1805  
1806      dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1807      lr = cpu_reg(s, 30);
1808      if (dst == lr) {
1809          TCGv_i64 tmp = tcg_temp_new_i64();
1810          tcg_gen_mov_i64(tmp, dst);
1811          dst = tmp;
1812      }
1813      gen_pc_plus_diff(s, lr, curr_insn_len(s));
1814      gen_a64_set_pc(s, dst);
1815      set_btype_for_blr(s);
1816      s->base.is_jmp = DISAS_JUMP;
1817      return true;
1818  }
1819  
1820  static bool trans_RETA(DisasContext *s, arg_reta *a)
1821  {
1822      TCGv_i64 dst;
1823  
1824      dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m);
1825      gen_a64_set_pc(s, dst);
1826      s->base.is_jmp = DISAS_JUMP;
1827      return true;
1828  }
1829  
1830  static bool trans_BRA(DisasContext *s, arg_bra *a)
1831  {
1832      TCGv_i64 dst;
1833  
1834      if (!dc_isar_feature(aa64_pauth, s)) {
1835          return false;
1836      }
1837      dst = auth_branch_target(s, cpu_reg(s,a->rn), cpu_reg_sp(s, a->rm), !a->m);
1838      gen_a64_set_pc(s, dst);
1839      set_btype_for_br(s, a->rn);
1840      s->base.is_jmp = DISAS_JUMP;
1841      return true;
1842  }
1843  
1844  static bool trans_BLRA(DisasContext *s, arg_bra *a)
1845  {
1846      TCGv_i64 dst, lr;
1847  
1848      if (!dc_isar_feature(aa64_pauth, s)) {
1849          return false;
1850      }
1851      dst = auth_branch_target(s, cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm), !a->m);
1852      lr = cpu_reg(s, 30);
1853      if (dst == lr) {
1854          TCGv_i64 tmp = tcg_temp_new_i64();
1855          tcg_gen_mov_i64(tmp, dst);
1856          dst = tmp;
1857      }
1858      gen_pc_plus_diff(s, lr, curr_insn_len(s));
1859      gen_a64_set_pc(s, dst);
1860      set_btype_for_blr(s);
1861      s->base.is_jmp = DISAS_JUMP;
1862      return true;
1863  }
1864  
1865  static bool trans_ERET(DisasContext *s, arg_ERET *a)
1866  {
1867      TCGv_i64 dst;
1868  
1869      if (s->current_el == 0) {
1870          return false;
1871      }
1872      if (s->trap_eret) {
1873          gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(0), 2);
1874          return true;
1875      }
1876      dst = tcg_temp_new_i64();
1877      tcg_gen_ld_i64(dst, tcg_env,
1878                     offsetof(CPUARMState, elr_el[s->current_el]));
1879  
1880      translator_io_start(&s->base);
1881  
1882      gen_helper_exception_return(tcg_env, dst);
1883      /* Must exit loop to check un-masked IRQs */
1884      s->base.is_jmp = DISAS_EXIT;
1885      return true;
1886  }
1887  
1888  static bool trans_ERETA(DisasContext *s, arg_reta *a)
1889  {
1890      TCGv_i64 dst;
1891  
1892      if (!dc_isar_feature(aa64_pauth, s)) {
1893          return false;
1894      }
1895      if (s->current_el == 0) {
1896          return false;
1897      }
1898      /* The FGT trap takes precedence over an auth trap. */
1899      if (s->trap_eret) {
1900          gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(a->m ? 3 : 2), 2);
1901          return true;
1902      }
1903      dst = tcg_temp_new_i64();
1904      tcg_gen_ld_i64(dst, tcg_env,
1905                     offsetof(CPUARMState, elr_el[s->current_el]));
1906  
1907      dst = auth_branch_target(s, dst, cpu_X[31], !a->m);
1908  
1909      translator_io_start(&s->base);
1910  
1911      gen_helper_exception_return(tcg_env, dst);
1912      /* Must exit loop to check un-masked IRQs */
1913      s->base.is_jmp = DISAS_EXIT;
1914      return true;
1915  }
1916  
1917  static bool trans_NOP(DisasContext *s, arg_NOP *a)
1918  {
1919      return true;
1920  }
1921  
1922  static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
1923  {
1924      /*
1925       * When running in MTTCG we don't generate jumps to the yield and
1926       * WFE helpers as it won't affect the scheduling of other vCPUs.
1927       * If we wanted to more completely model WFE/SEV so we don't busy
1928       * spin unnecessarily we would need to do something more involved.
1929       */
1930      if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1931          s->base.is_jmp = DISAS_YIELD;
1932      }
1933      return true;
1934  }
1935  
1936  static bool trans_WFI(DisasContext *s, arg_WFI *a)
1937  {
1938      s->base.is_jmp = DISAS_WFI;
1939      return true;
1940  }
1941  
1942  static bool trans_WFE(DisasContext *s, arg_WFI *a)
1943  {
1944      /*
1945       * When running in MTTCG we don't generate jumps to the yield and
1946       * WFE helpers as it won't affect the scheduling of other vCPUs.
1947       * If we wanted to more completely model WFE/SEV so we don't busy
1948       * spin unnecessarily we would need to do something more involved.
1949       */
1950      if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1951          s->base.is_jmp = DISAS_WFE;
1952      }
1953      return true;
1954  }
1955  
1956  static bool trans_WFIT(DisasContext *s, arg_WFIT *a)
1957  {
1958      if (!dc_isar_feature(aa64_wfxt, s)) {
1959          return false;
1960      }
1961  
1962      /*
1963       * Because we need to pass the register value to the helper,
1964       * it's easier to emit the code now, unlike trans_WFI which
1965       * defers it to aarch64_tr_tb_stop(). That means we need to
1966       * check ss_active so that single-stepping a WFIT doesn't halt.
1967       */
1968      if (s->ss_active) {
1969          /* Act like a NOP under architectural singlestep */
1970          return true;
1971      }
1972  
1973      gen_a64_update_pc(s, 4);
1974      gen_helper_wfit(tcg_env, cpu_reg(s, a->rd));
1975      /* Go back to the main loop to check for interrupts */
1976      s->base.is_jmp = DISAS_EXIT;
1977      return true;
1978  }
1979  
1980  static bool trans_WFET(DisasContext *s, arg_WFET *a)
1981  {
1982      if (!dc_isar_feature(aa64_wfxt, s)) {
1983          return false;
1984      }
1985  
1986      /*
1987       * We rely here on our WFE implementation being a NOP, so we
1988       * don't need to do anything different to handle the WFET timeout
1989       * from what trans_WFE does.
1990       */
1991      if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1992          s->base.is_jmp = DISAS_WFE;
1993      }
1994      return true;
1995  }
1996  
1997  static bool trans_XPACLRI(DisasContext *s, arg_XPACLRI *a)
1998  {
1999      if (s->pauth_active) {
2000          gen_helper_xpaci(cpu_X[30], tcg_env, cpu_X[30]);
2001      }
2002      return true;
2003  }
2004  
2005  static bool trans_PACIA1716(DisasContext *s, arg_PACIA1716 *a)
2006  {
2007      if (s->pauth_active) {
2008          gen_helper_pacia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
2009      }
2010      return true;
2011  }
2012  
2013  static bool trans_PACIB1716(DisasContext *s, arg_PACIB1716 *a)
2014  {
2015      if (s->pauth_active) {
2016          gen_helper_pacib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
2017      }
2018      return true;
2019  }
2020  
2021  static bool trans_AUTIA1716(DisasContext *s, arg_AUTIA1716 *a)
2022  {
2023      if (s->pauth_active) {
2024          gen_helper_autia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
2025      }
2026      return true;
2027  }
2028  
2029  static bool trans_AUTIB1716(DisasContext *s, arg_AUTIB1716 *a)
2030  {
2031      if (s->pauth_active) {
2032          gen_helper_autib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
2033      }
2034      return true;
2035  }
2036  
2037  static bool trans_ESB(DisasContext *s, arg_ESB *a)
2038  {
2039      /* Without RAS, we must implement this as NOP. */
2040      if (dc_isar_feature(aa64_ras, s)) {
2041          /*
2042           * QEMU does not have a source of physical SErrors,
2043           * so we are only concerned with virtual SErrors.
2044           * The pseudocode in the ARM for this case is
2045           *   if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
2046           *      AArch64.vESBOperation();
2047           * Most of the condition can be evaluated at translation time.
2048           * Test for EL2 present, and defer test for SEL2 to runtime.
2049           */
2050          if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
2051              gen_helper_vesb(tcg_env);
2052          }
2053      }
2054      return true;
2055  }
2056  
2057  static bool trans_PACIAZ(DisasContext *s, arg_PACIAZ *a)
2058  {
2059      if (s->pauth_active) {
2060          gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
2061      }
2062      return true;
2063  }
2064  
2065  static bool trans_PACIASP(DisasContext *s, arg_PACIASP *a)
2066  {
2067      if (s->pauth_active) {
2068          gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
2069      }
2070      return true;
2071  }
2072  
2073  static bool trans_PACIBZ(DisasContext *s, arg_PACIBZ *a)
2074  {
2075      if (s->pauth_active) {
2076          gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
2077      }
2078      return true;
2079  }
2080  
2081  static bool trans_PACIBSP(DisasContext *s, arg_PACIBSP *a)
2082  {
2083      if (s->pauth_active) {
2084          gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
2085      }
2086      return true;
2087  }
2088  
2089  static bool trans_AUTIAZ(DisasContext *s, arg_AUTIAZ *a)
2090  {
2091      if (s->pauth_active) {
2092          gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
2093      }
2094      return true;
2095  }
2096  
2097  static bool trans_AUTIASP(DisasContext *s, arg_AUTIASP *a)
2098  {
2099      if (s->pauth_active) {
2100          gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
2101      }
2102      return true;
2103  }
2104  
2105  static bool trans_AUTIBZ(DisasContext *s, arg_AUTIBZ *a)
2106  {
2107      if (s->pauth_active) {
2108          gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
2109      }
2110      return true;
2111  }
2112  
2113  static bool trans_AUTIBSP(DisasContext *s, arg_AUTIBSP *a)
2114  {
2115      if (s->pauth_active) {
2116          gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
2117      }
2118      return true;
2119  }
2120  
2121  static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
2122  {
2123      tcg_gen_movi_i64(cpu_exclusive_addr, -1);
2124      return true;
2125  }
2126  
2127  static bool trans_DSB_DMB(DisasContext *s, arg_DSB_DMB *a)
2128  {
2129      /* We handle DSB and DMB the same way */
2130      TCGBar bar;
2131  
2132      switch (a->types) {
2133      case 1: /* MBReqTypes_Reads */
2134          bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
2135          break;
2136      case 2: /* MBReqTypes_Writes */
2137          bar = TCG_BAR_SC | TCG_MO_ST_ST;
2138          break;
2139      default: /* MBReqTypes_All */
2140          bar = TCG_BAR_SC | TCG_MO_ALL;
2141          break;
2142      }
2143      tcg_gen_mb(bar);
2144      return true;
2145  }
2146  
2147  static bool trans_DSB_nXS(DisasContext *s, arg_DSB_nXS *a)
2148  {
2149      if (!dc_isar_feature(aa64_xs, s)) {
2150          return false;
2151      }
2152      tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL);
2153      return true;
2154  }
2155  
2156  static bool trans_ISB(DisasContext *s, arg_ISB *a)
2157  {
2158      /*
2159       * We need to break the TB after this insn to execute
2160       * self-modifying code correctly and also to take
2161       * any pending interrupts immediately.
2162       */
2163      reset_btype(s);
2164      gen_goto_tb(s, 0, 4);
2165      return true;
2166  }
2167  
2168  static bool trans_SB(DisasContext *s, arg_SB *a)
2169  {
2170      if (!dc_isar_feature(aa64_sb, s)) {
2171          return false;
2172      }
2173      /*
2174       * TODO: There is no speculation barrier opcode for TCG;
2175       * MB and end the TB instead.
2176       */
2177      tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
2178      gen_goto_tb(s, 0, 4);
2179      return true;
2180  }
2181  
2182  static bool trans_CFINV(DisasContext *s, arg_CFINV *a)
2183  {
2184      if (!dc_isar_feature(aa64_condm_4, s)) {
2185          return false;
2186      }
2187      tcg_gen_xori_i32(cpu_CF, cpu_CF, 1);
2188      return true;
2189  }
2190  
2191  static bool trans_XAFLAG(DisasContext *s, arg_XAFLAG *a)
2192  {
2193      TCGv_i32 z;
2194  
2195      if (!dc_isar_feature(aa64_condm_5, s)) {
2196          return false;
2197      }
2198  
2199      z = tcg_temp_new_i32();
2200  
2201      tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0);
2202  
2203      /*
2204       * (!C & !Z) << 31
2205       * (!(C | Z)) << 31
2206       * ~((C | Z) << 31)
2207       * ~-(C | Z)
2208       * (C | Z) - 1
2209       */
2210      tcg_gen_or_i32(cpu_NF, cpu_CF, z);
2211      tcg_gen_subi_i32(cpu_NF, cpu_NF, 1);
2212  
2213      /* !(Z & C) */
2214      tcg_gen_and_i32(cpu_ZF, z, cpu_CF);
2215      tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1);
2216  
2217      /* (!C & Z) << 31 -> -(Z & ~C) */
2218      tcg_gen_andc_i32(cpu_VF, z, cpu_CF);
2219      tcg_gen_neg_i32(cpu_VF, cpu_VF);
2220  
2221      /* C | Z */
2222      tcg_gen_or_i32(cpu_CF, cpu_CF, z);
2223  
2224      return true;
2225  }
2226  
2227  static bool trans_AXFLAG(DisasContext *s, arg_AXFLAG *a)
2228  {
2229      if (!dc_isar_feature(aa64_condm_5, s)) {
2230          return false;
2231      }
2232  
2233      tcg_gen_sari_i32(cpu_VF, cpu_VF, 31);         /* V ? -1 : 0 */
2234      tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF);     /* C & !V */
2235  
2236      /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */
2237      tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF);
2238  
2239      tcg_gen_movi_i32(cpu_NF, 0);
2240      tcg_gen_movi_i32(cpu_VF, 0);
2241  
2242      return true;
2243  }
2244  
2245  static bool trans_MSR_i_UAO(DisasContext *s, arg_i *a)
2246  {
2247      if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) {
2248          return false;
2249      }
2250      if (a->imm & 1) {
2251          set_pstate_bits(PSTATE_UAO);
2252      } else {
2253          clear_pstate_bits(PSTATE_UAO);
2254      }
2255      gen_rebuild_hflags(s);
2256      s->base.is_jmp = DISAS_TOO_MANY;
2257      return true;
2258  }
2259  
2260  static bool trans_MSR_i_PAN(DisasContext *s, arg_i *a)
2261  {
2262      if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) {
2263          return false;
2264      }
2265      if (a->imm & 1) {
2266          set_pstate_bits(PSTATE_PAN);
2267      } else {
2268          clear_pstate_bits(PSTATE_PAN);
2269      }
2270      gen_rebuild_hflags(s);
2271      s->base.is_jmp = DISAS_TOO_MANY;
2272      return true;
2273  }
2274  
2275  static bool trans_MSR_i_SPSEL(DisasContext *s, arg_i *a)
2276  {
2277      if (s->current_el == 0) {
2278          return false;
2279      }
2280      gen_helper_msr_i_spsel(tcg_env, tcg_constant_i32(a->imm & PSTATE_SP));
2281      s->base.is_jmp = DISAS_TOO_MANY;
2282      return true;
2283  }
2284  
2285  static bool trans_MSR_i_SBSS(DisasContext *s, arg_i *a)
2286  {
2287      if (!dc_isar_feature(aa64_ssbs, s)) {
2288          return false;
2289      }
2290      if (a->imm & 1) {
2291          set_pstate_bits(PSTATE_SSBS);
2292      } else {
2293          clear_pstate_bits(PSTATE_SSBS);
2294      }
2295      /* Don't need to rebuild hflags since SSBS is a nop */
2296      s->base.is_jmp = DISAS_TOO_MANY;
2297      return true;
2298  }
2299  
2300  static bool trans_MSR_i_DIT(DisasContext *s, arg_i *a)
2301  {
2302      if (!dc_isar_feature(aa64_dit, s)) {
2303          return false;
2304      }
2305      if (a->imm & 1) {
2306          set_pstate_bits(PSTATE_DIT);
2307      } else {
2308          clear_pstate_bits(PSTATE_DIT);
2309      }
2310      /* There's no need to rebuild hflags because DIT is a nop */
2311      s->base.is_jmp = DISAS_TOO_MANY;
2312      return true;
2313  }
2314  
2315  static bool trans_MSR_i_TCO(DisasContext *s, arg_i *a)
2316  {
2317      if (dc_isar_feature(aa64_mte, s)) {
2318          /* Full MTE is enabled -- set the TCO bit as directed. */
2319          if (a->imm & 1) {
2320              set_pstate_bits(PSTATE_TCO);
2321          } else {
2322              clear_pstate_bits(PSTATE_TCO);
2323          }
2324          gen_rebuild_hflags(s);
2325          /* Many factors, including TCO, go into MTE_ACTIVE. */
2326          s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
2327          return true;
2328      } else if (dc_isar_feature(aa64_mte_insn_reg, s)) {
2329          /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI.  */
2330          return true;
2331      } else {
2332          /* Insn not present */
2333          return false;
2334      }
2335  }
2336  
2337  static bool trans_MSR_i_DAIFSET(DisasContext *s, arg_i *a)
2338  {
2339      gen_helper_msr_i_daifset(tcg_env, tcg_constant_i32(a->imm));
2340      s->base.is_jmp = DISAS_TOO_MANY;
2341      return true;
2342  }
2343  
2344  static bool trans_MSR_i_DAIFCLEAR(DisasContext *s, arg_i *a)
2345  {
2346      gen_helper_msr_i_daifclear(tcg_env, tcg_constant_i32(a->imm));
2347      /* Exit the cpu loop to re-evaluate pending IRQs. */
2348      s->base.is_jmp = DISAS_UPDATE_EXIT;
2349      return true;
2350  }
2351  
2352  static bool trans_MSR_i_ALLINT(DisasContext *s, arg_i *a)
2353  {
2354      if (!dc_isar_feature(aa64_nmi, s) || s->current_el == 0) {
2355          return false;
2356      }
2357  
2358      if (a->imm == 0) {
2359          clear_pstate_bits(PSTATE_ALLINT);
2360      } else if (s->current_el > 1) {
2361          set_pstate_bits(PSTATE_ALLINT);
2362      } else {
2363          gen_helper_msr_set_allint_el1(tcg_env);
2364      }
2365  
2366      /* Exit the cpu loop to re-evaluate pending IRQs. */
2367      s->base.is_jmp = DISAS_UPDATE_EXIT;
2368      return true;
2369  }
2370  
2371  static bool trans_MSR_i_SVCR(DisasContext *s, arg_MSR_i_SVCR *a)
2372  {
2373      if (!dc_isar_feature(aa64_sme, s) || a->mask == 0) {
2374          return false;
2375      }
2376      if (sme_access_check(s)) {
2377          int old = s->pstate_sm | (s->pstate_za << 1);
2378          int new = a->imm * 3;
2379  
2380          if ((old ^ new) & a->mask) {
2381              /* At least one bit changes. */
2382              gen_helper_set_svcr(tcg_env, tcg_constant_i32(new),
2383                                  tcg_constant_i32(a->mask));
2384              s->base.is_jmp = DISAS_TOO_MANY;
2385          }
2386      }
2387      return true;
2388  }
2389  
2390  static void gen_get_nzcv(TCGv_i64 tcg_rt)
2391  {
2392      TCGv_i32 tmp = tcg_temp_new_i32();
2393      TCGv_i32 nzcv = tcg_temp_new_i32();
2394  
2395      /* build bit 31, N */
2396      tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
2397      /* build bit 30, Z */
2398      tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
2399      tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
2400      /* build bit 29, C */
2401      tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
2402      /* build bit 28, V */
2403      tcg_gen_shri_i32(tmp, cpu_VF, 31);
2404      tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
2405      /* generate result */
2406      tcg_gen_extu_i32_i64(tcg_rt, nzcv);
2407  }
2408  
2409  static void gen_set_nzcv(TCGv_i64 tcg_rt)
2410  {
2411      TCGv_i32 nzcv = tcg_temp_new_i32();
2412  
2413      /* take NZCV from R[t] */
2414      tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
2415  
2416      /* bit 31, N */
2417      tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
2418      /* bit 30, Z */
2419      tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
2420      tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
2421      /* bit 29, C */
2422      tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
2423      tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
2424      /* bit 28, V */
2425      tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
2426      tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
2427  }
2428  
2429  static void gen_sysreg_undef(DisasContext *s, bool isread,
2430                               uint8_t op0, uint8_t op1, uint8_t op2,
2431                               uint8_t crn, uint8_t crm, uint8_t rt)
2432  {
2433      /*
2434       * Generate code to emit an UNDEF with correct syndrome
2435       * information for a failed system register access.
2436       * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases,
2437       * but if FEAT_IDST is implemented then read accesses to registers
2438       * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP
2439       * syndrome.
2440       */
2441      uint32_t syndrome;
2442  
2443      if (isread && dc_isar_feature(aa64_ids, s) &&
2444          arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) {
2445          syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2446      } else {
2447          syndrome = syn_uncategorized();
2448      }
2449      gen_exception_insn(s, 0, EXCP_UDEF, syndrome);
2450  }
2451  
2452  /* MRS - move from system register
2453   * MSR (register) - move to system register
2454   * SYS
2455   * SYSL
2456   * These are all essentially the same insn in 'read' and 'write'
2457   * versions, with varying op0 fields.
2458   */
2459  static void handle_sys(DisasContext *s, bool isread,
2460                         unsigned int op0, unsigned int op1, unsigned int op2,
2461                         unsigned int crn, unsigned int crm, unsigned int rt)
2462  {
2463      uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
2464                                        crn, crm, op0, op1, op2);
2465      const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
2466      bool need_exit_tb = false;
2467      bool nv_trap_to_el2 = false;
2468      bool nv_redirect_reg = false;
2469      bool skip_fp_access_checks = false;
2470      bool nv2_mem_redirect = false;
2471      TCGv_ptr tcg_ri = NULL;
2472      TCGv_i64 tcg_rt;
2473      uint32_t syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2474  
2475      if (crn == 11 || crn == 15) {
2476          /*
2477           * Check for TIDCP trap, which must take precedence over
2478           * the UNDEF for "no such register" etc.
2479           */
2480          switch (s->current_el) {
2481          case 0:
2482              if (dc_isar_feature(aa64_tidcp1, s)) {
2483                  gen_helper_tidcp_el0(tcg_env, tcg_constant_i32(syndrome));
2484              }
2485              break;
2486          case 1:
2487              gen_helper_tidcp_el1(tcg_env, tcg_constant_i32(syndrome));
2488              break;
2489          }
2490      }
2491  
2492      if (!ri) {
2493          /* Unknown register; this might be a guest error or a QEMU
2494           * unimplemented feature.
2495           */
2496          qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
2497                        "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
2498                        isread ? "read" : "write", op0, op1, crn, crm, op2);
2499          gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2500          return;
2501      }
2502  
2503      if (s->nv2 && ri->nv2_redirect_offset) {
2504          /*
2505           * Some registers always redirect to memory; some only do so if
2506           * HCR_EL2.NV1 is 0, and some only if NV1 is 1 (these come in
2507           * pairs which share an offset; see the table in R_CSRPQ).
2508           */
2509          if (ri->nv2_redirect_offset & NV2_REDIR_NV1) {
2510              nv2_mem_redirect = s->nv1;
2511          } else if (ri->nv2_redirect_offset & NV2_REDIR_NO_NV1) {
2512              nv2_mem_redirect = !s->nv1;
2513          } else {
2514              nv2_mem_redirect = true;
2515          }
2516      }
2517  
2518      /* Check access permissions */
2519      if (!cp_access_ok(s->current_el, ri, isread)) {
2520          /*
2521           * FEAT_NV/NV2 handling does not do the usual FP access checks
2522           * for registers only accessible at EL2 (though it *does* do them
2523           * for registers accessible at EL1).
2524           */
2525          skip_fp_access_checks = true;
2526          if (s->nv2 && (ri->type & ARM_CP_NV2_REDIRECT)) {
2527              /*
2528               * This is one of the few EL2 registers which should redirect
2529               * to the equivalent EL1 register. We do that after running
2530               * the EL2 register's accessfn.
2531               */
2532              nv_redirect_reg = true;
2533              assert(!nv2_mem_redirect);
2534          } else if (nv2_mem_redirect) {
2535              /*
2536               * NV2 redirect-to-memory takes precedence over trap to EL2 or
2537               * UNDEF to EL1.
2538               */
2539          } else if (s->nv && arm_cpreg_traps_in_nv(ri)) {
2540              /*
2541               * This register / instruction exists and is an EL2 register, so
2542               * we must trap to EL2 if accessed in nested virtualization EL1
2543               * instead of UNDEFing. We'll do that after the usual access checks.
2544               * (This makes a difference only for a couple of registers like
2545               * VSTTBR_EL2 where the "UNDEF if NonSecure" should take priority
2546               * over the trap-to-EL2. Most trapped-by-FEAT_NV registers have
2547               * an accessfn which does nothing when called from EL1, because
2548               * the trap-to-EL3 controls which would apply to that register
2549               * at EL2 don't take priority over the FEAT_NV trap-to-EL2.)
2550               */
2551              nv_trap_to_el2 = true;
2552          } else {
2553              gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2554              return;
2555          }
2556      }
2557  
2558      if (ri->accessfn || (ri->fgt && s->fgt_active)) {
2559          /* Emit code to perform further access permissions checks at
2560           * runtime; this may result in an exception.
2561           */
2562          gen_a64_update_pc(s, 0);
2563          tcg_ri = tcg_temp_new_ptr();
2564          gen_helper_access_check_cp_reg(tcg_ri, tcg_env,
2565                                         tcg_constant_i32(key),
2566                                         tcg_constant_i32(syndrome),
2567                                         tcg_constant_i32(isread));
2568      } else if (ri->type & ARM_CP_RAISES_EXC) {
2569          /*
2570           * The readfn or writefn might raise an exception;
2571           * synchronize the CPU state in case it does.
2572           */
2573          gen_a64_update_pc(s, 0);
2574      }
2575  
2576      if (!skip_fp_access_checks) {
2577          if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) {
2578              return;
2579          } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
2580              return;
2581          } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) {
2582              return;
2583          }
2584      }
2585  
2586      if (nv_trap_to_el2) {
2587          gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
2588          return;
2589      }
2590  
2591      if (nv_redirect_reg) {
2592          /*
2593           * FEAT_NV2 redirection of an EL2 register to an EL1 register.
2594           * Conveniently in all cases the encoding of the EL1 register is
2595           * identical to the EL2 register except that opc1 is 0.
2596           * Get the reginfo for the EL1 register to use for the actual access.
2597           * We don't use the EL1 register's access function, and
2598           * fine-grained-traps on EL1 also do not apply here.
2599           */
2600          key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
2601                                   crn, crm, op0, 0, op2);
2602          ri = get_arm_cp_reginfo(s->cp_regs, key);
2603          assert(ri);
2604          assert(cp_access_ok(s->current_el, ri, isread));
2605          /*
2606           * We might not have done an update_pc earlier, so check we don't
2607           * need it. We could support this in future if necessary.
2608           */
2609          assert(!(ri->type & ARM_CP_RAISES_EXC));
2610      }
2611  
2612      if (nv2_mem_redirect) {
2613          /*
2614           * This system register is being redirected into an EL2 memory access.
2615           * This means it is not an IO operation, doesn't change hflags,
2616           * and need not end the TB, because it has no side effects.
2617           *
2618           * The access is 64-bit single copy atomic, guaranteed aligned because
2619           * of the definition of VCNR_EL2. Its endianness depends on
2620           * SCTLR_EL2.EE, not on the data endianness of EL1.
2621           * It is done under either the EL2 translation regime or the EL2&0
2622           * translation regime, depending on HCR_EL2.E2H. It behaves as if
2623           * PSTATE.PAN is 0.
2624           */
2625          TCGv_i64 ptr = tcg_temp_new_i64();
2626          MemOp mop = MO_64 | MO_ALIGN | MO_ATOM_IFALIGN;
2627          ARMMMUIdx armmemidx = s->nv2_mem_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_E2;
2628          int memidx = arm_to_core_mmu_idx(armmemidx);
2629          uint32_t syn;
2630  
2631          mop |= (s->nv2_mem_be ? MO_BE : MO_LE);
2632  
2633          tcg_gen_ld_i64(ptr, tcg_env, offsetof(CPUARMState, cp15.vncr_el2));
2634          tcg_gen_addi_i64(ptr, ptr,
2635                           (ri->nv2_redirect_offset & ~NV2_REDIR_FLAG_MASK));
2636          tcg_rt = cpu_reg(s, rt);
2637  
2638          syn = syn_data_abort_vncr(0, !isread, 0);
2639          disas_set_insn_syndrome(s, syn);
2640          if (isread) {
2641              tcg_gen_qemu_ld_i64(tcg_rt, ptr, memidx, mop);
2642          } else {
2643              tcg_gen_qemu_st_i64(tcg_rt, ptr, memidx, mop);
2644          }
2645          return;
2646      }
2647  
2648      /* Handle special cases first */
2649      switch (ri->type & ARM_CP_SPECIAL_MASK) {
2650      case 0:
2651          break;
2652      case ARM_CP_NOP:
2653          return;
2654      case ARM_CP_NZCV:
2655          tcg_rt = cpu_reg(s, rt);
2656          if (isread) {
2657              gen_get_nzcv(tcg_rt);
2658          } else {
2659              gen_set_nzcv(tcg_rt);
2660          }
2661          return;
2662      case ARM_CP_CURRENTEL:
2663      {
2664          /*
2665           * Reads as current EL value from pstate, which is
2666           * guaranteed to be constant by the tb flags.
2667           * For nested virt we should report EL2.
2668           */
2669          int el = s->nv ? 2 : s->current_el;
2670          tcg_rt = cpu_reg(s, rt);
2671          tcg_gen_movi_i64(tcg_rt, el << 2);
2672          return;
2673      }
2674      case ARM_CP_DC_ZVA:
2675          /* Writes clear the aligned block of memory which rt points into. */
2676          if (s->mte_active[0]) {
2677              int desc = 0;
2678  
2679              desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
2680              desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
2681              desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
2682  
2683              tcg_rt = tcg_temp_new_i64();
2684              gen_helper_mte_check_zva(tcg_rt, tcg_env,
2685                                       tcg_constant_i32(desc), cpu_reg(s, rt));
2686          } else {
2687              tcg_rt = clean_data_tbi(s, cpu_reg(s, rt));
2688          }
2689          gen_helper_dc_zva(tcg_env, tcg_rt);
2690          return;
2691      case ARM_CP_DC_GVA:
2692          {
2693              TCGv_i64 clean_addr, tag;
2694  
2695              /*
2696               * DC_GVA, like DC_ZVA, requires that we supply the original
2697               * pointer for an invalid page.  Probe that address first.
2698               */
2699              tcg_rt = cpu_reg(s, rt);
2700              clean_addr = clean_data_tbi(s, tcg_rt);
2701              gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8);
2702  
2703              if (s->ata[0]) {
2704                  /* Extract the tag from the register to match STZGM.  */
2705                  tag = tcg_temp_new_i64();
2706                  tcg_gen_shri_i64(tag, tcg_rt, 56);
2707                  gen_helper_stzgm_tags(tcg_env, clean_addr, tag);
2708              }
2709          }
2710          return;
2711      case ARM_CP_DC_GZVA:
2712          {
2713              TCGv_i64 clean_addr, tag;
2714  
2715              /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */
2716              tcg_rt = cpu_reg(s, rt);
2717              clean_addr = clean_data_tbi(s, tcg_rt);
2718              gen_helper_dc_zva(tcg_env, clean_addr);
2719  
2720              if (s->ata[0]) {
2721                  /* Extract the tag from the register to match STZGM.  */
2722                  tag = tcg_temp_new_i64();
2723                  tcg_gen_shri_i64(tag, tcg_rt, 56);
2724                  gen_helper_stzgm_tags(tcg_env, clean_addr, tag);
2725              }
2726          }
2727          return;
2728      default:
2729          g_assert_not_reached();
2730      }
2731  
2732      if (ri->type & ARM_CP_IO) {
2733          /* I/O operations must end the TB here (whether read or write) */
2734          need_exit_tb = translator_io_start(&s->base);
2735      }
2736  
2737      tcg_rt = cpu_reg(s, rt);
2738  
2739      if (isread) {
2740          if (ri->type & ARM_CP_CONST) {
2741              tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
2742          } else if (ri->readfn) {
2743              if (!tcg_ri) {
2744                  tcg_ri = gen_lookup_cp_reg(key);
2745              }
2746              gen_helper_get_cp_reg64(tcg_rt, tcg_env, tcg_ri);
2747          } else {
2748              tcg_gen_ld_i64(tcg_rt, tcg_env, ri->fieldoffset);
2749          }
2750      } else {
2751          if (ri->type & ARM_CP_CONST) {
2752              /* If not forbidden by access permissions, treat as WI */
2753              return;
2754          } else if (ri->writefn) {
2755              if (!tcg_ri) {
2756                  tcg_ri = gen_lookup_cp_reg(key);
2757              }
2758              gen_helper_set_cp_reg64(tcg_env, tcg_ri, tcg_rt);
2759          } else {
2760              tcg_gen_st_i64(tcg_rt, tcg_env, ri->fieldoffset);
2761          }
2762      }
2763  
2764      if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
2765          /*
2766           * A write to any coprocessor register that ends a TB
2767           * must rebuild the hflags for the next TB.
2768           */
2769          gen_rebuild_hflags(s);
2770          /*
2771           * We default to ending the TB on a coprocessor register write,
2772           * but allow this to be suppressed by the register definition
2773           * (usually only necessary to work around guest bugs).
2774           */
2775          need_exit_tb = true;
2776      }
2777      if (need_exit_tb) {
2778          s->base.is_jmp = DISAS_UPDATE_EXIT;
2779      }
2780  }
2781  
2782  static bool trans_SYS(DisasContext *s, arg_SYS *a)
2783  {
2784      handle_sys(s, a->l, a->op0, a->op1, a->op2, a->crn, a->crm, a->rt);
2785      return true;
2786  }
2787  
2788  static bool trans_SVC(DisasContext *s, arg_i *a)
2789  {
2790      /*
2791       * For SVC, HVC and SMC we advance the single-step state
2792       * machine before taking the exception. This is architecturally
2793       * mandated, to ensure that single-stepping a system call
2794       * instruction works properly.
2795       */
2796      uint32_t syndrome = syn_aa64_svc(a->imm);
2797      if (s->fgt_svc) {
2798          gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
2799          return true;
2800      }
2801      gen_ss_advance(s);
2802      gen_exception_insn(s, 4, EXCP_SWI, syndrome);
2803      return true;
2804  }
2805  
2806  static bool trans_HVC(DisasContext *s, arg_i *a)
2807  {
2808      int target_el = s->current_el == 3 ? 3 : 2;
2809  
2810      if (s->current_el == 0) {
2811          unallocated_encoding(s);
2812          return true;
2813      }
2814      /*
2815       * The pre HVC helper handles cases when HVC gets trapped
2816       * as an undefined insn by runtime configuration.
2817       */
2818      gen_a64_update_pc(s, 0);
2819      gen_helper_pre_hvc(tcg_env);
2820      /* Architecture requires ss advance before we do the actual work */
2821      gen_ss_advance(s);
2822      gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(a->imm), target_el);
2823      return true;
2824  }
2825  
2826  static bool trans_SMC(DisasContext *s, arg_i *a)
2827  {
2828      if (s->current_el == 0) {
2829          unallocated_encoding(s);
2830          return true;
2831      }
2832      gen_a64_update_pc(s, 0);
2833      gen_helper_pre_smc(tcg_env, tcg_constant_i32(syn_aa64_smc(a->imm)));
2834      /* Architecture requires ss advance before we do the actual work */
2835      gen_ss_advance(s);
2836      gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(a->imm), 3);
2837      return true;
2838  }
2839  
2840  static bool trans_BRK(DisasContext *s, arg_i *a)
2841  {
2842      gen_exception_bkpt_insn(s, syn_aa64_bkpt(a->imm));
2843      return true;
2844  }
2845  
2846  static bool trans_HLT(DisasContext *s, arg_i *a)
2847  {
2848      /*
2849       * HLT. This has two purposes.
2850       * Architecturally, it is an external halting debug instruction.
2851       * Since QEMU doesn't implement external debug, we treat this as
2852       * it is required for halting debug disabled: it will UNDEF.
2853       * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
2854       */
2855      if (semihosting_enabled(s->current_el == 0) && a->imm == 0xf000) {
2856          gen_exception_internal_insn(s, EXCP_SEMIHOST);
2857      } else {
2858          unallocated_encoding(s);
2859      }
2860      return true;
2861  }
2862  
2863  /*
2864   * Load/Store exclusive instructions are implemented by remembering
2865   * the value/address loaded, and seeing if these are the same
2866   * when the store is performed. This is not actually the architecturally
2867   * mandated semantics, but it works for typical guest code sequences
2868   * and avoids having to monitor regular stores.
2869   *
2870   * The store exclusive uses the atomic cmpxchg primitives to avoid
2871   * races in multi-threaded linux-user and when MTTCG softmmu is
2872   * enabled.
2873   */
2874  static void gen_load_exclusive(DisasContext *s, int rt, int rt2, int rn,
2875                                 int size, bool is_pair)
2876  {
2877      int idx = get_mem_index(s);
2878      TCGv_i64 dirty_addr, clean_addr;
2879      MemOp memop = check_atomic_align(s, rn, size + is_pair);
2880  
2881      s->is_ldex = true;
2882      dirty_addr = cpu_reg_sp(s, rn);
2883      clean_addr = gen_mte_check1(s, dirty_addr, false, rn != 31, memop);
2884  
2885      g_assert(size <= 3);
2886      if (is_pair) {
2887          g_assert(size >= 2);
2888          if (size == 2) {
2889              tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
2890              if (s->be_data == MO_LE) {
2891                  tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
2892                  tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
2893              } else {
2894                  tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
2895                  tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
2896              }
2897          } else {
2898              TCGv_i128 t16 = tcg_temp_new_i128();
2899  
2900              tcg_gen_qemu_ld_i128(t16, clean_addr, idx, memop);
2901  
2902              if (s->be_data == MO_LE) {
2903                  tcg_gen_extr_i128_i64(cpu_exclusive_val,
2904                                        cpu_exclusive_high, t16);
2905              } else {
2906                  tcg_gen_extr_i128_i64(cpu_exclusive_high,
2907                                        cpu_exclusive_val, t16);
2908              }
2909              tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2910              tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
2911          }
2912      } else {
2913          tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
2914          tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2915      }
2916      tcg_gen_mov_i64(cpu_exclusive_addr, clean_addr);
2917  }
2918  
2919  static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
2920                                  int rn, int size, int is_pair)
2921  {
2922      /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
2923       *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
2924       *     [addr] = {Rt};
2925       *     if (is_pair) {
2926       *         [addr + datasize] = {Rt2};
2927       *     }
2928       *     {Rd} = 0;
2929       * } else {
2930       *     {Rd} = 1;
2931       * }
2932       * env->exclusive_addr = -1;
2933       */
2934      TCGLabel *fail_label = gen_new_label();
2935      TCGLabel *done_label = gen_new_label();
2936      TCGv_i64 tmp, clean_addr;
2937      MemOp memop;
2938  
2939      /*
2940       * FIXME: We are out of spec here.  We have recorded only the address
2941       * from load_exclusive, not the entire range, and we assume that the
2942       * size of the access on both sides match.  The architecture allows the
2943       * store to be smaller than the load, so long as the stored bytes are
2944       * within the range recorded by the load.
2945       */
2946  
2947      /* See AArch64.ExclusiveMonitorsPass() and AArch64.IsExclusiveVA(). */
2948      clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2949      tcg_gen_brcond_i64(TCG_COND_NE, clean_addr, cpu_exclusive_addr, fail_label);
2950  
2951      /*
2952       * The write, and any associated faults, only happen if the virtual
2953       * and physical addresses pass the exclusive monitor check.  These
2954       * faults are exceedingly unlikely, because normally the guest uses
2955       * the exact same address register for the load_exclusive, and we
2956       * would have recognized these faults there.
2957       *
2958       * It is possible to trigger an alignment fault pre-LSE2, e.g. with an
2959       * unaligned 4-byte write within the range of an aligned 8-byte load.
2960       * With LSE2, the store would need to cross a 16-byte boundary when the
2961       * load did not, which would mean the store is outside the range
2962       * recorded for the monitor, which would have failed a corrected monitor
2963       * check above.  For now, we assume no size change and retain the
2964       * MO_ALIGN to let tcg know what we checked in the load_exclusive.
2965       *
2966       * It is possible to trigger an MTE fault, by performing the load with
2967       * a virtual address with a valid tag and performing the store with the
2968       * same virtual address and a different invalid tag.
2969       */
2970      memop = size + is_pair;
2971      if (memop == MO_128 || !dc_isar_feature(aa64_lse2, s)) {
2972          memop |= MO_ALIGN;
2973      }
2974      memop = finalize_memop(s, memop);
2975      gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
2976  
2977      tmp = tcg_temp_new_i64();
2978      if (is_pair) {
2979          if (size == 2) {
2980              if (s->be_data == MO_LE) {
2981                  tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
2982              } else {
2983                  tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
2984              }
2985              tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
2986                                         cpu_exclusive_val, tmp,
2987                                         get_mem_index(s), memop);
2988              tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2989          } else {
2990              TCGv_i128 t16 = tcg_temp_new_i128();
2991              TCGv_i128 c16 = tcg_temp_new_i128();
2992              TCGv_i64 a, b;
2993  
2994              if (s->be_data == MO_LE) {
2995                  tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2));
2996                  tcg_gen_concat_i64_i128(c16, cpu_exclusive_val,
2997                                          cpu_exclusive_high);
2998              } else {
2999                  tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt));
3000                  tcg_gen_concat_i64_i128(c16, cpu_exclusive_high,
3001                                          cpu_exclusive_val);
3002              }
3003  
3004              tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16,
3005                                          get_mem_index(s), memop);
3006  
3007              a = tcg_temp_new_i64();
3008              b = tcg_temp_new_i64();
3009              if (s->be_data == MO_LE) {
3010                  tcg_gen_extr_i128_i64(a, b, t16);
3011              } else {
3012                  tcg_gen_extr_i128_i64(b, a, t16);
3013              }
3014  
3015              tcg_gen_xor_i64(a, a, cpu_exclusive_val);
3016              tcg_gen_xor_i64(b, b, cpu_exclusive_high);
3017              tcg_gen_or_i64(tmp, a, b);
3018  
3019              tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0);
3020          }
3021      } else {
3022          tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
3023                                     cpu_reg(s, rt), get_mem_index(s), memop);
3024          tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
3025      }
3026      tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
3027      tcg_gen_br(done_label);
3028  
3029      gen_set_label(fail_label);
3030      tcg_gen_movi_i64(cpu_reg(s, rd), 1);
3031      gen_set_label(done_label);
3032      tcg_gen_movi_i64(cpu_exclusive_addr, -1);
3033  }
3034  
3035  static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
3036                                   int rn, int size)
3037  {
3038      TCGv_i64 tcg_rs = cpu_reg(s, rs);
3039      TCGv_i64 tcg_rt = cpu_reg(s, rt);
3040      int memidx = get_mem_index(s);
3041      TCGv_i64 clean_addr;
3042      MemOp memop;
3043  
3044      if (rn == 31) {
3045          gen_check_sp_alignment(s);
3046      }
3047      memop = check_atomic_align(s, rn, size);
3048      clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
3049      tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt,
3050                                 memidx, memop);
3051  }
3052  
3053  static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
3054                                        int rn, int size)
3055  {
3056      TCGv_i64 s1 = cpu_reg(s, rs);
3057      TCGv_i64 s2 = cpu_reg(s, rs + 1);
3058      TCGv_i64 t1 = cpu_reg(s, rt);
3059      TCGv_i64 t2 = cpu_reg(s, rt + 1);
3060      TCGv_i64 clean_addr;
3061      int memidx = get_mem_index(s);
3062      MemOp memop;
3063  
3064      if (rn == 31) {
3065          gen_check_sp_alignment(s);
3066      }
3067  
3068      /* This is a single atomic access, despite the "pair". */
3069      memop = check_atomic_align(s, rn, size + 1);
3070      clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
3071  
3072      if (size == 2) {
3073          TCGv_i64 cmp = tcg_temp_new_i64();
3074          TCGv_i64 val = tcg_temp_new_i64();
3075  
3076          if (s->be_data == MO_LE) {
3077              tcg_gen_concat32_i64(val, t1, t2);
3078              tcg_gen_concat32_i64(cmp, s1, s2);
3079          } else {
3080              tcg_gen_concat32_i64(val, t2, t1);
3081              tcg_gen_concat32_i64(cmp, s2, s1);
3082          }
3083  
3084          tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx, memop);
3085  
3086          if (s->be_data == MO_LE) {
3087              tcg_gen_extr32_i64(s1, s2, cmp);
3088          } else {
3089              tcg_gen_extr32_i64(s2, s1, cmp);
3090          }
3091      } else {
3092          TCGv_i128 cmp = tcg_temp_new_i128();
3093          TCGv_i128 val = tcg_temp_new_i128();
3094  
3095          if (s->be_data == MO_LE) {
3096              tcg_gen_concat_i64_i128(val, t1, t2);
3097              tcg_gen_concat_i64_i128(cmp, s1, s2);
3098          } else {
3099              tcg_gen_concat_i64_i128(val, t2, t1);
3100              tcg_gen_concat_i64_i128(cmp, s2, s1);
3101          }
3102  
3103          tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx, memop);
3104  
3105          if (s->be_data == MO_LE) {
3106              tcg_gen_extr_i128_i64(s1, s2, cmp);
3107          } else {
3108              tcg_gen_extr_i128_i64(s2, s1, cmp);
3109          }
3110      }
3111  }
3112  
3113  /*
3114   * Compute the ISS.SF bit for syndrome information if an exception
3115   * is taken on a load or store. This indicates whether the instruction
3116   * is accessing a 32-bit or 64-bit register. This logic is derived
3117   * from the ARMv8 specs for LDR (Shared decode for all encodings).
3118   */
3119  static bool ldst_iss_sf(int size, bool sign, bool ext)
3120  {
3121  
3122      if (sign) {
3123          /*
3124           * Signed loads are 64 bit results if we are not going to
3125           * do a zero-extend from 32 to 64 after the load.
3126           * (For a store, sign and ext are always false.)
3127           */
3128          return !ext;
3129      } else {
3130          /* Unsigned loads/stores work at the specified size */
3131          return size == MO_64;
3132      }
3133  }
3134  
3135  static bool trans_STXR(DisasContext *s, arg_stxr *a)
3136  {
3137      if (a->rn == 31) {
3138          gen_check_sp_alignment(s);
3139      }
3140      if (a->lasr) {
3141          tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3142      }
3143      gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, false);
3144      return true;
3145  }
3146  
3147  static bool trans_LDXR(DisasContext *s, arg_stxr *a)
3148  {
3149      if (a->rn == 31) {
3150          gen_check_sp_alignment(s);
3151      }
3152      gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, false);
3153      if (a->lasr) {
3154          tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3155      }
3156      return true;
3157  }
3158  
3159  static bool trans_STLR(DisasContext *s, arg_stlr *a)
3160  {
3161      TCGv_i64 clean_addr;
3162      MemOp memop;
3163      bool iss_sf = ldst_iss_sf(a->sz, false, false);
3164  
3165      /*
3166       * StoreLORelease is the same as Store-Release for QEMU, but
3167       * needs the feature-test.
3168       */
3169      if (!a->lasr && !dc_isar_feature(aa64_lor, s)) {
3170          return false;
3171      }
3172      /* Generate ISS for non-exclusive accesses including LASR.  */
3173      if (a->rn == 31) {
3174          gen_check_sp_alignment(s);
3175      }
3176      tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3177      memop = check_ordered_align(s, a->rn, 0, true, a->sz);
3178      clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn),
3179                                  true, a->rn != 31, memop);
3180      do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, memop, true, a->rt,
3181                iss_sf, a->lasr);
3182      return true;
3183  }
3184  
3185  static bool trans_LDAR(DisasContext *s, arg_stlr *a)
3186  {
3187      TCGv_i64 clean_addr;
3188      MemOp memop;
3189      bool iss_sf = ldst_iss_sf(a->sz, false, false);
3190  
3191      /* LoadLOAcquire is the same as Load-Acquire for QEMU.  */
3192      if (!a->lasr && !dc_isar_feature(aa64_lor, s)) {
3193          return false;
3194      }
3195      /* Generate ISS for non-exclusive accesses including LASR.  */
3196      if (a->rn == 31) {
3197          gen_check_sp_alignment(s);
3198      }
3199      memop = check_ordered_align(s, a->rn, 0, false, a->sz);
3200      clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn),
3201                                  false, a->rn != 31, memop);
3202      do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, memop, false, true,
3203                a->rt, iss_sf, a->lasr);
3204      tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3205      return true;
3206  }
3207  
3208  static bool trans_STXP(DisasContext *s, arg_stxr *a)
3209  {
3210      if (a->rn == 31) {
3211          gen_check_sp_alignment(s);
3212      }
3213      if (a->lasr) {
3214          tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3215      }
3216      gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, true);
3217      return true;
3218  }
3219  
3220  static bool trans_LDXP(DisasContext *s, arg_stxr *a)
3221  {
3222      if (a->rn == 31) {
3223          gen_check_sp_alignment(s);
3224      }
3225      gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, true);
3226      if (a->lasr) {
3227          tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3228      }
3229      return true;
3230  }
3231  
3232  static bool trans_CASP(DisasContext *s, arg_CASP *a)
3233  {
3234      if (!dc_isar_feature(aa64_atomics, s)) {
3235          return false;
3236      }
3237      if (((a->rt | a->rs) & 1) != 0) {
3238          return false;
3239      }
3240  
3241      gen_compare_and_swap_pair(s, a->rs, a->rt, a->rn, a->sz);
3242      return true;
3243  }
3244  
3245  static bool trans_CAS(DisasContext *s, arg_CAS *a)
3246  {
3247      if (!dc_isar_feature(aa64_atomics, s)) {
3248          return false;
3249      }
3250      gen_compare_and_swap(s, a->rs, a->rt, a->rn, a->sz);
3251      return true;
3252  }
3253  
3254  static bool trans_LD_lit(DisasContext *s, arg_ldlit *a)
3255  {
3256      bool iss_sf = ldst_iss_sf(a->sz, a->sign, false);
3257      TCGv_i64 tcg_rt = cpu_reg(s, a->rt);
3258      TCGv_i64 clean_addr = tcg_temp_new_i64();
3259      MemOp memop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3260  
3261      gen_pc_plus_diff(s, clean_addr, a->imm);
3262      do_gpr_ld(s, tcg_rt, clean_addr, memop,
3263                false, true, a->rt, iss_sf, false);
3264      return true;
3265  }
3266  
3267  static bool trans_LD_lit_v(DisasContext *s, arg_ldlit *a)
3268  {
3269      /* Load register (literal), vector version */
3270      TCGv_i64 clean_addr;
3271      MemOp memop;
3272  
3273      if (!fp_access_check(s)) {
3274          return true;
3275      }
3276      memop = finalize_memop_asimd(s, a->sz);
3277      clean_addr = tcg_temp_new_i64();
3278      gen_pc_plus_diff(s, clean_addr, a->imm);
3279      do_fp_ld(s, a->rt, clean_addr, memop);
3280      return true;
3281  }
3282  
3283  static void op_addr_ldstpair_pre(DisasContext *s, arg_ldstpair *a,
3284                                   TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3285                                   uint64_t offset, bool is_store, MemOp mop)
3286  {
3287      if (a->rn == 31) {
3288          gen_check_sp_alignment(s);
3289      }
3290  
3291      *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3292      if (!a->p) {
3293          tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset);
3294      }
3295  
3296      *clean_addr = gen_mte_checkN(s, *dirty_addr, is_store,
3297                                   (a->w || a->rn != 31), 2 << a->sz, mop);
3298  }
3299  
3300  static void op_addr_ldstpair_post(DisasContext *s, arg_ldstpair *a,
3301                                    TCGv_i64 dirty_addr, uint64_t offset)
3302  {
3303      if (a->w) {
3304          if (a->p) {
3305              tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3306          }
3307          tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3308      }
3309  }
3310  
3311  static bool trans_STP(DisasContext *s, arg_ldstpair *a)
3312  {
3313      uint64_t offset = a->imm << a->sz;
3314      TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3315      MemOp mop = finalize_memop(s, a->sz);
3316  
3317      op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop);
3318      tcg_rt = cpu_reg(s, a->rt);
3319      tcg_rt2 = cpu_reg(s, a->rt2);
3320      /*
3321       * We built mop above for the single logical access -- rebuild it
3322       * now for the paired operation.
3323       *
3324       * With LSE2, non-sign-extending pairs are treated atomically if
3325       * aligned, and if unaligned one of the pair will be completely
3326       * within a 16-byte block and that element will be atomic.
3327       * Otherwise each element is separately atomic.
3328       * In all cases, issue one operation with the correct atomicity.
3329       */
3330      mop = a->sz + 1;
3331      if (s->align_mem) {
3332          mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
3333      }
3334      mop = finalize_memop_pair(s, mop);
3335      if (a->sz == 2) {
3336          TCGv_i64 tmp = tcg_temp_new_i64();
3337  
3338          if (s->be_data == MO_LE) {
3339              tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2);
3340          } else {
3341              tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt);
3342          }
3343          tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop);
3344      } else {
3345          TCGv_i128 tmp = tcg_temp_new_i128();
3346  
3347          if (s->be_data == MO_LE) {
3348              tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2);
3349          } else {
3350              tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt);
3351          }
3352          tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
3353      }
3354      op_addr_ldstpair_post(s, a, dirty_addr, offset);
3355      return true;
3356  }
3357  
3358  static bool trans_LDP(DisasContext *s, arg_ldstpair *a)
3359  {
3360      uint64_t offset = a->imm << a->sz;
3361      TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3362      MemOp mop = finalize_memop(s, a->sz);
3363  
3364      op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop);
3365      tcg_rt = cpu_reg(s, a->rt);
3366      tcg_rt2 = cpu_reg(s, a->rt2);
3367  
3368      /*
3369       * We built mop above for the single logical access -- rebuild it
3370       * now for the paired operation.
3371       *
3372       * With LSE2, non-sign-extending pairs are treated atomically if
3373       * aligned, and if unaligned one of the pair will be completely
3374       * within a 16-byte block and that element will be atomic.
3375       * Otherwise each element is separately atomic.
3376       * In all cases, issue one operation with the correct atomicity.
3377       *
3378       * This treats sign-extending loads like zero-extending loads,
3379       * since that reuses the most code below.
3380       */
3381      mop = a->sz + 1;
3382      if (s->align_mem) {
3383          mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
3384      }
3385      mop = finalize_memop_pair(s, mop);
3386      if (a->sz == 2) {
3387          int o2 = s->be_data == MO_LE ? 32 : 0;
3388          int o1 = o2 ^ 32;
3389  
3390          tcg_gen_qemu_ld_i64(tcg_rt, clean_addr, get_mem_index(s), mop);
3391          if (a->sign) {
3392              tcg_gen_sextract_i64(tcg_rt2, tcg_rt, o2, 32);
3393              tcg_gen_sextract_i64(tcg_rt, tcg_rt, o1, 32);
3394          } else {
3395              tcg_gen_extract_i64(tcg_rt2, tcg_rt, o2, 32);
3396              tcg_gen_extract_i64(tcg_rt, tcg_rt, o1, 32);
3397          }
3398      } else {
3399          TCGv_i128 tmp = tcg_temp_new_i128();
3400  
3401          tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop);
3402          if (s->be_data == MO_LE) {
3403              tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp);
3404          } else {
3405              tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp);
3406          }
3407      }
3408      op_addr_ldstpair_post(s, a, dirty_addr, offset);
3409      return true;
3410  }
3411  
3412  static bool trans_STP_v(DisasContext *s, arg_ldstpair *a)
3413  {
3414      uint64_t offset = a->imm << a->sz;
3415      TCGv_i64 clean_addr, dirty_addr;
3416      MemOp mop;
3417  
3418      if (!fp_access_check(s)) {
3419          return true;
3420      }
3421  
3422      /* LSE2 does not merge FP pairs; leave these as separate operations. */
3423      mop = finalize_memop_asimd(s, a->sz);
3424      op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop);
3425      do_fp_st(s, a->rt, clean_addr, mop);
3426      tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz);
3427      do_fp_st(s, a->rt2, clean_addr, mop);
3428      op_addr_ldstpair_post(s, a, dirty_addr, offset);
3429      return true;
3430  }
3431  
3432  static bool trans_LDP_v(DisasContext *s, arg_ldstpair *a)
3433  {
3434      uint64_t offset = a->imm << a->sz;
3435      TCGv_i64 clean_addr, dirty_addr;
3436      MemOp mop;
3437  
3438      if (!fp_access_check(s)) {
3439          return true;
3440      }
3441  
3442      /* LSE2 does not merge FP pairs; leave these as separate operations. */
3443      mop = finalize_memop_asimd(s, a->sz);
3444      op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop);
3445      do_fp_ld(s, a->rt, clean_addr, mop);
3446      tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz);
3447      do_fp_ld(s, a->rt2, clean_addr, mop);
3448      op_addr_ldstpair_post(s, a, dirty_addr, offset);
3449      return true;
3450  }
3451  
3452  static bool trans_STGP(DisasContext *s, arg_ldstpair *a)
3453  {
3454      TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3455      uint64_t offset = a->imm << LOG2_TAG_GRANULE;
3456      MemOp mop;
3457      TCGv_i128 tmp;
3458  
3459      /* STGP only comes in one size. */
3460      tcg_debug_assert(a->sz == MO_64);
3461  
3462      if (!dc_isar_feature(aa64_mte_insn_reg, s)) {
3463          return false;
3464      }
3465  
3466      if (a->rn == 31) {
3467          gen_check_sp_alignment(s);
3468      }
3469  
3470      dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3471      if (!a->p) {
3472          tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3473      }
3474  
3475      clean_addr = clean_data_tbi(s, dirty_addr);
3476      tcg_rt = cpu_reg(s, a->rt);
3477      tcg_rt2 = cpu_reg(s, a->rt2);
3478  
3479      /*
3480       * STGP is defined as two 8-byte memory operations, aligned to TAG_GRANULE,
3481       * and one tag operation.  We implement it as one single aligned 16-byte
3482       * memory operation for convenience.  Note that the alignment ensures
3483       * MO_ATOM_IFALIGN_PAIR produces 8-byte atomicity for the memory store.
3484       */
3485      mop = finalize_memop_atom(s, MO_128 | MO_ALIGN, MO_ATOM_IFALIGN_PAIR);
3486  
3487      tmp = tcg_temp_new_i128();
3488      if (s->be_data == MO_LE) {
3489          tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2);
3490      } else {
3491          tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt);
3492      }
3493      tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
3494  
3495      /* Perform the tag store, if tag access enabled. */
3496      if (s->ata[0]) {
3497          if (tb_cflags(s->base.tb) & CF_PARALLEL) {
3498              gen_helper_stg_parallel(tcg_env, dirty_addr, dirty_addr);
3499          } else {
3500              gen_helper_stg(tcg_env, dirty_addr, dirty_addr);
3501          }
3502      }
3503  
3504      op_addr_ldstpair_post(s, a, dirty_addr, offset);
3505      return true;
3506  }
3507  
3508  static void op_addr_ldst_imm_pre(DisasContext *s, arg_ldst_imm *a,
3509                                   TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3510                                   uint64_t offset, bool is_store, MemOp mop)
3511  {
3512      int memidx;
3513  
3514      if (a->rn == 31) {
3515          gen_check_sp_alignment(s);
3516      }
3517  
3518      *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3519      if (!a->p) {
3520          tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset);
3521      }
3522      memidx = get_a64_user_mem_index(s, a->unpriv);
3523      *clean_addr = gen_mte_check1_mmuidx(s, *dirty_addr, is_store,
3524                                          a->w || a->rn != 31,
3525                                          mop, a->unpriv, memidx);
3526  }
3527  
3528  static void op_addr_ldst_imm_post(DisasContext *s, arg_ldst_imm *a,
3529                                    TCGv_i64 dirty_addr, uint64_t offset)
3530  {
3531      if (a->w) {
3532          if (a->p) {
3533              tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3534          }
3535          tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3536      }
3537  }
3538  
3539  static bool trans_STR_i(DisasContext *s, arg_ldst_imm *a)
3540  {
3541      bool iss_sf, iss_valid = !a->w;
3542      TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3543      int memidx = get_a64_user_mem_index(s, a->unpriv);
3544      MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3545  
3546      op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop);
3547  
3548      tcg_rt = cpu_reg(s, a->rt);
3549      iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3550  
3551      do_gpr_st_memidx(s, tcg_rt, clean_addr, mop, memidx,
3552                       iss_valid, a->rt, iss_sf, false);
3553      op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3554      return true;
3555  }
3556  
3557  static bool trans_LDR_i(DisasContext *s, arg_ldst_imm *a)
3558  {
3559      bool iss_sf, iss_valid = !a->w;
3560      TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3561      int memidx = get_a64_user_mem_index(s, a->unpriv);
3562      MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3563  
3564      op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop);
3565  
3566      tcg_rt = cpu_reg(s, a->rt);
3567      iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3568  
3569      do_gpr_ld_memidx(s, tcg_rt, clean_addr, mop,
3570                       a->ext, memidx, iss_valid, a->rt, iss_sf, false);
3571      op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3572      return true;
3573  }
3574  
3575  static bool trans_STR_v_i(DisasContext *s, arg_ldst_imm *a)
3576  {
3577      TCGv_i64 clean_addr, dirty_addr;
3578      MemOp mop;
3579  
3580      if (!fp_access_check(s)) {
3581          return true;
3582      }
3583      mop = finalize_memop_asimd(s, a->sz);
3584      op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop);
3585      do_fp_st(s, a->rt, clean_addr, mop);
3586      op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3587      return true;
3588  }
3589  
3590  static bool trans_LDR_v_i(DisasContext *s, arg_ldst_imm *a)
3591  {
3592      TCGv_i64 clean_addr, dirty_addr;
3593      MemOp mop;
3594  
3595      if (!fp_access_check(s)) {
3596          return true;
3597      }
3598      mop = finalize_memop_asimd(s, a->sz);
3599      op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop);
3600      do_fp_ld(s, a->rt, clean_addr, mop);
3601      op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3602      return true;
3603  }
3604  
3605  static void op_addr_ldst_pre(DisasContext *s, arg_ldst *a,
3606                               TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3607                               bool is_store, MemOp memop)
3608  {
3609      TCGv_i64 tcg_rm;
3610  
3611      if (a->rn == 31) {
3612          gen_check_sp_alignment(s);
3613      }
3614      *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3615  
3616      tcg_rm = read_cpu_reg(s, a->rm, 1);
3617      ext_and_shift_reg(tcg_rm, tcg_rm, a->opt, a->s ? a->sz : 0);
3618  
3619      tcg_gen_add_i64(*dirty_addr, *dirty_addr, tcg_rm);
3620      *clean_addr = gen_mte_check1(s, *dirty_addr, is_store, true, memop);
3621  }
3622  
3623  static bool trans_LDR(DisasContext *s, arg_ldst *a)
3624  {
3625      TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3626      bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3627      MemOp memop;
3628  
3629      if (extract32(a->opt, 1, 1) == 0) {
3630          return false;
3631      }
3632  
3633      memop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3634      op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop);
3635      tcg_rt = cpu_reg(s, a->rt);
3636      do_gpr_ld(s, tcg_rt, clean_addr, memop,
3637                a->ext, true, a->rt, iss_sf, false);
3638      return true;
3639  }
3640  
3641  static bool trans_STR(DisasContext *s, arg_ldst *a)
3642  {
3643      TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3644      bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3645      MemOp memop;
3646  
3647      if (extract32(a->opt, 1, 1) == 0) {
3648          return false;
3649      }
3650  
3651      memop = finalize_memop(s, a->sz);
3652      op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop);
3653      tcg_rt = cpu_reg(s, a->rt);
3654      do_gpr_st(s, tcg_rt, clean_addr, memop, true, a->rt, iss_sf, false);
3655      return true;
3656  }
3657  
3658  static bool trans_LDR_v(DisasContext *s, arg_ldst *a)
3659  {
3660      TCGv_i64 clean_addr, dirty_addr;
3661      MemOp memop;
3662  
3663      if (extract32(a->opt, 1, 1) == 0) {
3664          return false;
3665      }
3666  
3667      if (!fp_access_check(s)) {
3668          return true;
3669      }
3670  
3671      memop = finalize_memop_asimd(s, a->sz);
3672      op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop);
3673      do_fp_ld(s, a->rt, clean_addr, memop);
3674      return true;
3675  }
3676  
3677  static bool trans_STR_v(DisasContext *s, arg_ldst *a)
3678  {
3679      TCGv_i64 clean_addr, dirty_addr;
3680      MemOp memop;
3681  
3682      if (extract32(a->opt, 1, 1) == 0) {
3683          return false;
3684      }
3685  
3686      if (!fp_access_check(s)) {
3687          return true;
3688      }
3689  
3690      memop = finalize_memop_asimd(s, a->sz);
3691      op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop);
3692      do_fp_st(s, a->rt, clean_addr, memop);
3693      return true;
3694  }
3695  
3696  
3697  static bool do_atomic_ld(DisasContext *s, arg_atomic *a, AtomicThreeOpFn *fn,
3698                           int sign, bool invert)
3699  {
3700      MemOp mop = a->sz | sign;
3701      TCGv_i64 clean_addr, tcg_rs, tcg_rt;
3702  
3703      if (a->rn == 31) {
3704          gen_check_sp_alignment(s);
3705      }
3706      mop = check_atomic_align(s, a->rn, mop);
3707      clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
3708                                  a->rn != 31, mop);
3709      tcg_rs = read_cpu_reg(s, a->rs, true);
3710      tcg_rt = cpu_reg(s, a->rt);
3711      if (invert) {
3712          tcg_gen_not_i64(tcg_rs, tcg_rs);
3713      }
3714      /*
3715       * The tcg atomic primitives are all full barriers.  Therefore we
3716       * can ignore the Acquire and Release bits of this instruction.
3717       */
3718      fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop);
3719  
3720      if (mop & MO_SIGN) {
3721          switch (a->sz) {
3722          case MO_8:
3723              tcg_gen_ext8u_i64(tcg_rt, tcg_rt);
3724              break;
3725          case MO_16:
3726              tcg_gen_ext16u_i64(tcg_rt, tcg_rt);
3727              break;
3728          case MO_32:
3729              tcg_gen_ext32u_i64(tcg_rt, tcg_rt);
3730              break;
3731          case MO_64:
3732              break;
3733          default:
3734              g_assert_not_reached();
3735          }
3736      }
3737      return true;
3738  }
3739  
3740  TRANS_FEAT(LDADD, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_add_i64, 0, false)
3741  TRANS_FEAT(LDCLR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_and_i64, 0, true)
3742  TRANS_FEAT(LDEOR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_xor_i64, 0, false)
3743  TRANS_FEAT(LDSET, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_or_i64, 0, false)
3744  TRANS_FEAT(LDSMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smax_i64, MO_SIGN, false)
3745  TRANS_FEAT(LDSMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smin_i64, MO_SIGN, false)
3746  TRANS_FEAT(LDUMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umax_i64, 0, false)
3747  TRANS_FEAT(LDUMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false)
3748  TRANS_FEAT(SWP, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false)
3749  
3750  static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a)
3751  {
3752      bool iss_sf = ldst_iss_sf(a->sz, false, false);
3753      TCGv_i64 clean_addr;
3754      MemOp mop;
3755  
3756      if (!dc_isar_feature(aa64_atomics, s) ||
3757          !dc_isar_feature(aa64_rcpc_8_3, s)) {
3758          return false;
3759      }
3760      if (a->rn == 31) {
3761          gen_check_sp_alignment(s);
3762      }
3763      mop = check_ordered_align(s, a->rn, 0, false, a->sz);
3764      clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
3765                                  a->rn != 31, mop);
3766      /*
3767       * LDAPR* are a special case because they are a simple load, not a
3768       * fetch-and-do-something op.
3769       * The architectural consistency requirements here are weaker than
3770       * full load-acquire (we only need "load-acquire processor consistent"),
3771       * but we choose to implement them as full LDAQ.
3772       */
3773      do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, false,
3774                true, a->rt, iss_sf, true);
3775      tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3776      return true;
3777  }
3778  
3779  static bool trans_LDRA(DisasContext *s, arg_LDRA *a)
3780  {
3781      TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3782      MemOp memop;
3783  
3784      /* Load with pointer authentication */
3785      if (!dc_isar_feature(aa64_pauth, s)) {
3786          return false;
3787      }
3788  
3789      if (a->rn == 31) {
3790          gen_check_sp_alignment(s);
3791      }
3792      dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3793  
3794      if (s->pauth_active) {
3795          if (!a->m) {
3796              gen_helper_autda_combined(dirty_addr, tcg_env, dirty_addr,
3797                                        tcg_constant_i64(0));
3798          } else {
3799              gen_helper_autdb_combined(dirty_addr, tcg_env, dirty_addr,
3800                                        tcg_constant_i64(0));
3801          }
3802      }
3803  
3804      tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3805  
3806      memop = finalize_memop(s, MO_64);
3807  
3808      /* Note that "clean" and "dirty" here refer to TBI not PAC.  */
3809      clean_addr = gen_mte_check1(s, dirty_addr, false,
3810                                  a->w || a->rn != 31, memop);
3811  
3812      tcg_rt = cpu_reg(s, a->rt);
3813      do_gpr_ld(s, tcg_rt, clean_addr, memop,
3814                /* extend */ false, /* iss_valid */ !a->w,
3815                /* iss_srt */ a->rt, /* iss_sf */ true, /* iss_ar */ false);
3816  
3817      if (a->w) {
3818          tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3819      }
3820      return true;
3821  }
3822  
3823  static bool trans_LDAPR_i(DisasContext *s, arg_ldapr_stlr_i *a)
3824  {
3825      TCGv_i64 clean_addr, dirty_addr;
3826      MemOp mop = a->sz | (a->sign ? MO_SIGN : 0);
3827      bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3828  
3829      if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3830          return false;
3831      }
3832  
3833      if (a->rn == 31) {
3834          gen_check_sp_alignment(s);
3835      }
3836  
3837      mop = check_ordered_align(s, a->rn, a->imm, false, mop);
3838      dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3839      tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3840      clean_addr = clean_data_tbi(s, dirty_addr);
3841  
3842      /*
3843       * Load-AcquirePC semantics; we implement as the slightly more
3844       * restrictive Load-Acquire.
3845       */
3846      do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, a->ext, true,
3847                a->rt, iss_sf, true);
3848      tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3849      return true;
3850  }
3851  
3852  static bool trans_STLR_i(DisasContext *s, arg_ldapr_stlr_i *a)
3853  {
3854      TCGv_i64 clean_addr, dirty_addr;
3855      MemOp mop = a->sz;
3856      bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3857  
3858      if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3859          return false;
3860      }
3861  
3862      /* TODO: ARMv8.4-LSE SCTLR.nAA */
3863  
3864      if (a->rn == 31) {
3865          gen_check_sp_alignment(s);
3866      }
3867  
3868      mop = check_ordered_align(s, a->rn, a->imm, true, mop);
3869      dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3870      tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3871      clean_addr = clean_data_tbi(s, dirty_addr);
3872  
3873      /* Store-Release semantics */
3874      tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3875      do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, mop, true, a->rt, iss_sf, true);
3876      return true;
3877  }
3878  
3879  static bool trans_LD_mult(DisasContext *s, arg_ldst_mult *a)
3880  {
3881      TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3882      MemOp endian, align, mop;
3883  
3884      int total;    /* total bytes */
3885      int elements; /* elements per vector */
3886      int r;
3887      int size = a->sz;
3888  
3889      if (!a->p && a->rm != 0) {
3890          /* For non-postindexed accesses the Rm field must be 0 */
3891          return false;
3892      }
3893      if (size == 3 && !a->q && a->selem != 1) {
3894          return false;
3895      }
3896      if (!fp_access_check(s)) {
3897          return true;
3898      }
3899  
3900      if (a->rn == 31) {
3901          gen_check_sp_alignment(s);
3902      }
3903  
3904      /* For our purposes, bytes are always little-endian.  */
3905      endian = s->be_data;
3906      if (size == 0) {
3907          endian = MO_LE;
3908      }
3909  
3910      total = a->rpt * a->selem * (a->q ? 16 : 8);
3911      tcg_rn = cpu_reg_sp(s, a->rn);
3912  
3913      /*
3914       * Issue the MTE check vs the logical repeat count, before we
3915       * promote consecutive little-endian elements below.
3916       */
3917      clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, total,
3918                                  finalize_memop_asimd(s, size));
3919  
3920      /*
3921       * Consecutive little-endian elements from a single register
3922       * can be promoted to a larger little-endian operation.
3923       */
3924      align = MO_ALIGN;
3925      if (a->selem == 1 && endian == MO_LE) {
3926          align = pow2_align(size);
3927          size = 3;
3928      }
3929      if (!s->align_mem) {
3930          align = 0;
3931      }
3932      mop = endian | size | align;
3933  
3934      elements = (a->q ? 16 : 8) >> size;
3935      tcg_ebytes = tcg_constant_i64(1 << size);
3936      for (r = 0; r < a->rpt; r++) {
3937          int e;
3938          for (e = 0; e < elements; e++) {
3939              int xs;
3940              for (xs = 0; xs < a->selem; xs++) {
3941                  int tt = (a->rt + r + xs) % 32;
3942                  do_vec_ld(s, tt, e, clean_addr, mop);
3943                  tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3944              }
3945          }
3946      }
3947  
3948      /*
3949       * For non-quad operations, setting a slice of the low 64 bits of
3950       * the register clears the high 64 bits (in the ARM ARM pseudocode
3951       * this is implicit in the fact that 'rval' is a 64 bit wide
3952       * variable).  For quad operations, we might still need to zero
3953       * the high bits of SVE.
3954       */
3955      for (r = 0; r < a->rpt * a->selem; r++) {
3956          int tt = (a->rt + r) % 32;
3957          clear_vec_high(s, a->q, tt);
3958      }
3959  
3960      if (a->p) {
3961          if (a->rm == 31) {
3962              tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3963          } else {
3964              tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3965          }
3966      }
3967      return true;
3968  }
3969  
3970  static bool trans_ST_mult(DisasContext *s, arg_ldst_mult *a)
3971  {
3972      TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3973      MemOp endian, align, mop;
3974  
3975      int total;    /* total bytes */
3976      int elements; /* elements per vector */
3977      int r;
3978      int size = a->sz;
3979  
3980      if (!a->p && a->rm != 0) {
3981          /* For non-postindexed accesses the Rm field must be 0 */
3982          return false;
3983      }
3984      if (size == 3 && !a->q && a->selem != 1) {
3985          return false;
3986      }
3987      if (!fp_access_check(s)) {
3988          return true;
3989      }
3990  
3991      if (a->rn == 31) {
3992          gen_check_sp_alignment(s);
3993      }
3994  
3995      /* For our purposes, bytes are always little-endian.  */
3996      endian = s->be_data;
3997      if (size == 0) {
3998          endian = MO_LE;
3999      }
4000  
4001      total = a->rpt * a->selem * (a->q ? 16 : 8);
4002      tcg_rn = cpu_reg_sp(s, a->rn);
4003  
4004      /*
4005       * Issue the MTE check vs the logical repeat count, before we
4006       * promote consecutive little-endian elements below.
4007       */
4008      clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, total,
4009                                  finalize_memop_asimd(s, size));
4010  
4011      /*
4012       * Consecutive little-endian elements from a single register
4013       * can be promoted to a larger little-endian operation.
4014       */
4015      align = MO_ALIGN;
4016      if (a->selem == 1 && endian == MO_LE) {
4017          align = pow2_align(size);
4018          size = 3;
4019      }
4020      if (!s->align_mem) {
4021          align = 0;
4022      }
4023      mop = endian | size | align;
4024  
4025      elements = (a->q ? 16 : 8) >> size;
4026      tcg_ebytes = tcg_constant_i64(1 << size);
4027      for (r = 0; r < a->rpt; r++) {
4028          int e;
4029          for (e = 0; e < elements; e++) {
4030              int xs;
4031              for (xs = 0; xs < a->selem; xs++) {
4032                  int tt = (a->rt + r + xs) % 32;
4033                  do_vec_st(s, tt, e, clean_addr, mop);
4034                  tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
4035              }
4036          }
4037      }
4038  
4039      if (a->p) {
4040          if (a->rm == 31) {
4041              tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
4042          } else {
4043              tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
4044          }
4045      }
4046      return true;
4047  }
4048  
4049  static bool trans_ST_single(DisasContext *s, arg_ldst_single *a)
4050  {
4051      int xs, total, rt;
4052      TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
4053      MemOp mop;
4054  
4055      if (!a->p && a->rm != 0) {
4056          return false;
4057      }
4058      if (!fp_access_check(s)) {
4059          return true;
4060      }
4061  
4062      if (a->rn == 31) {
4063          gen_check_sp_alignment(s);
4064      }
4065  
4066      total = a->selem << a->scale;
4067      tcg_rn = cpu_reg_sp(s, a->rn);
4068  
4069      mop = finalize_memop_asimd(s, a->scale);
4070      clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31,
4071                                  total, mop);
4072  
4073      tcg_ebytes = tcg_constant_i64(1 << a->scale);
4074      for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
4075          do_vec_st(s, rt, a->index, clean_addr, mop);
4076          tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
4077      }
4078  
4079      if (a->p) {
4080          if (a->rm == 31) {
4081              tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
4082          } else {
4083              tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
4084          }
4085      }
4086      return true;
4087  }
4088  
4089  static bool trans_LD_single(DisasContext *s, arg_ldst_single *a)
4090  {
4091      int xs, total, rt;
4092      TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
4093      MemOp mop;
4094  
4095      if (!a->p && a->rm != 0) {
4096          return false;
4097      }
4098      if (!fp_access_check(s)) {
4099          return true;
4100      }
4101  
4102      if (a->rn == 31) {
4103          gen_check_sp_alignment(s);
4104      }
4105  
4106      total = a->selem << a->scale;
4107      tcg_rn = cpu_reg_sp(s, a->rn);
4108  
4109      mop = finalize_memop_asimd(s, a->scale);
4110      clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31,
4111                                  total, mop);
4112  
4113      tcg_ebytes = tcg_constant_i64(1 << a->scale);
4114      for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
4115          do_vec_ld(s, rt, a->index, clean_addr, mop);
4116          tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
4117      }
4118  
4119      if (a->p) {
4120          if (a->rm == 31) {
4121              tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
4122          } else {
4123              tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
4124          }
4125      }
4126      return true;
4127  }
4128  
4129  static bool trans_LD_single_repl(DisasContext *s, arg_LD_single_repl *a)
4130  {
4131      int xs, total, rt;
4132      TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
4133      MemOp mop;
4134  
4135      if (!a->p && a->rm != 0) {
4136          return false;
4137      }
4138      if (!fp_access_check(s)) {
4139          return true;
4140      }
4141  
4142      if (a->rn == 31) {
4143          gen_check_sp_alignment(s);
4144      }
4145  
4146      total = a->selem << a->scale;
4147      tcg_rn = cpu_reg_sp(s, a->rn);
4148  
4149      mop = finalize_memop_asimd(s, a->scale);
4150      clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31,
4151                                  total, mop);
4152  
4153      tcg_ebytes = tcg_constant_i64(1 << a->scale);
4154      for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
4155          /* Load and replicate to all elements */
4156          TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4157  
4158          tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop);
4159          tcg_gen_gvec_dup_i64(a->scale, vec_full_reg_offset(s, rt),
4160                               (a->q + 1) * 8, vec_full_reg_size(s), tcg_tmp);
4161          tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
4162      }
4163  
4164      if (a->p) {
4165          if (a->rm == 31) {
4166              tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
4167          } else {
4168              tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
4169          }
4170      }
4171      return true;
4172  }
4173  
4174  static bool trans_STZGM(DisasContext *s, arg_ldst_tag *a)
4175  {
4176      TCGv_i64 addr, clean_addr, tcg_rt;
4177      int size = 4 << s->dcz_blocksize;
4178  
4179      if (!dc_isar_feature(aa64_mte, s)) {
4180          return false;
4181      }
4182      if (s->current_el == 0) {
4183          return false;
4184      }
4185  
4186      if (a->rn == 31) {
4187          gen_check_sp_alignment(s);
4188      }
4189  
4190      addr = read_cpu_reg_sp(s, a->rn, true);
4191      tcg_gen_addi_i64(addr, addr, a->imm);
4192      tcg_rt = cpu_reg(s, a->rt);
4193  
4194      if (s->ata[0]) {
4195          gen_helper_stzgm_tags(tcg_env, addr, tcg_rt);
4196      }
4197      /*
4198       * The non-tags portion of STZGM is mostly like DC_ZVA,
4199       * except the alignment happens before the access.
4200       */
4201      clean_addr = clean_data_tbi(s, addr);
4202      tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4203      gen_helper_dc_zva(tcg_env, clean_addr);
4204      return true;
4205  }
4206  
4207  static bool trans_STGM(DisasContext *s, arg_ldst_tag *a)
4208  {
4209      TCGv_i64 addr, clean_addr, tcg_rt;
4210  
4211      if (!dc_isar_feature(aa64_mte, s)) {
4212          return false;
4213      }
4214      if (s->current_el == 0) {
4215          return false;
4216      }
4217  
4218      if (a->rn == 31) {
4219          gen_check_sp_alignment(s);
4220      }
4221  
4222      addr = read_cpu_reg_sp(s, a->rn, true);
4223      tcg_gen_addi_i64(addr, addr, a->imm);
4224      tcg_rt = cpu_reg(s, a->rt);
4225  
4226      if (s->ata[0]) {
4227          gen_helper_stgm(tcg_env, addr, tcg_rt);
4228      } else {
4229          MMUAccessType acc = MMU_DATA_STORE;
4230          int size = 4 << s->gm_blocksize;
4231  
4232          clean_addr = clean_data_tbi(s, addr);
4233          tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4234          gen_probe_access(s, clean_addr, acc, size);
4235      }
4236      return true;
4237  }
4238  
4239  static bool trans_LDGM(DisasContext *s, arg_ldst_tag *a)
4240  {
4241      TCGv_i64 addr, clean_addr, tcg_rt;
4242  
4243      if (!dc_isar_feature(aa64_mte, s)) {
4244          return false;
4245      }
4246      if (s->current_el == 0) {
4247          return false;
4248      }
4249  
4250      if (a->rn == 31) {
4251          gen_check_sp_alignment(s);
4252      }
4253  
4254      addr = read_cpu_reg_sp(s, a->rn, true);
4255      tcg_gen_addi_i64(addr, addr, a->imm);
4256      tcg_rt = cpu_reg(s, a->rt);
4257  
4258      if (s->ata[0]) {
4259          gen_helper_ldgm(tcg_rt, tcg_env, addr);
4260      } else {
4261          MMUAccessType acc = MMU_DATA_LOAD;
4262          int size = 4 << s->gm_blocksize;
4263  
4264          clean_addr = clean_data_tbi(s, addr);
4265          tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4266          gen_probe_access(s, clean_addr, acc, size);
4267          /* The result tags are zeros.  */
4268          tcg_gen_movi_i64(tcg_rt, 0);
4269      }
4270      return true;
4271  }
4272  
4273  static bool trans_LDG(DisasContext *s, arg_ldst_tag *a)
4274  {
4275      TCGv_i64 addr, clean_addr, tcg_rt;
4276  
4277      if (!dc_isar_feature(aa64_mte_insn_reg, s)) {
4278          return false;
4279      }
4280  
4281      if (a->rn == 31) {
4282          gen_check_sp_alignment(s);
4283      }
4284  
4285      addr = read_cpu_reg_sp(s, a->rn, true);
4286      if (!a->p) {
4287          /* pre-index or signed offset */
4288          tcg_gen_addi_i64(addr, addr, a->imm);
4289      }
4290  
4291      tcg_gen_andi_i64(addr, addr, -TAG_GRANULE);
4292      tcg_rt = cpu_reg(s, a->rt);
4293      if (s->ata[0]) {
4294          gen_helper_ldg(tcg_rt, tcg_env, addr, tcg_rt);
4295      } else {
4296          /*
4297           * Tag access disabled: we must check for aborts on the load
4298           * load from [rn+offset], and then insert a 0 tag into rt.
4299           */
4300          clean_addr = clean_data_tbi(s, addr);
4301          gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8);
4302          gen_address_with_allocation_tag0(tcg_rt, tcg_rt);
4303      }
4304  
4305      if (a->w) {
4306          /* pre-index or post-index */
4307          if (a->p) {
4308              /* post-index */
4309              tcg_gen_addi_i64(addr, addr, a->imm);
4310          }
4311          tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr);
4312      }
4313      return true;
4314  }
4315  
4316  static bool do_STG(DisasContext *s, arg_ldst_tag *a, bool is_zero, bool is_pair)
4317  {
4318      TCGv_i64 addr, tcg_rt;
4319  
4320      if (a->rn == 31) {
4321          gen_check_sp_alignment(s);
4322      }
4323  
4324      addr = read_cpu_reg_sp(s, a->rn, true);
4325      if (!a->p) {
4326          /* pre-index or signed offset */
4327          tcg_gen_addi_i64(addr, addr, a->imm);
4328      }
4329      tcg_rt = cpu_reg_sp(s, a->rt);
4330      if (!s->ata[0]) {
4331          /*
4332           * For STG and ST2G, we need to check alignment and probe memory.
4333           * TODO: For STZG and STZ2G, we could rely on the stores below,
4334           * at least for system mode; user-only won't enforce alignment.
4335           */
4336          if (is_pair) {
4337              gen_helper_st2g_stub(tcg_env, addr);
4338          } else {
4339              gen_helper_stg_stub(tcg_env, addr);
4340          }
4341      } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
4342          if (is_pair) {
4343              gen_helper_st2g_parallel(tcg_env, addr, tcg_rt);
4344          } else {
4345              gen_helper_stg_parallel(tcg_env, addr, tcg_rt);
4346          }
4347      } else {
4348          if (is_pair) {
4349              gen_helper_st2g(tcg_env, addr, tcg_rt);
4350          } else {
4351              gen_helper_stg(tcg_env, addr, tcg_rt);
4352          }
4353      }
4354  
4355      if (is_zero) {
4356          TCGv_i64 clean_addr = clean_data_tbi(s, addr);
4357          TCGv_i64 zero64 = tcg_constant_i64(0);
4358          TCGv_i128 zero128 = tcg_temp_new_i128();
4359          int mem_index = get_mem_index(s);
4360          MemOp mop = finalize_memop(s, MO_128 | MO_ALIGN);
4361  
4362          tcg_gen_concat_i64_i128(zero128, zero64, zero64);
4363  
4364          /* This is 1 or 2 atomic 16-byte operations. */
4365          tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
4366          if (is_pair) {
4367              tcg_gen_addi_i64(clean_addr, clean_addr, 16);
4368              tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
4369          }
4370      }
4371  
4372      if (a->w) {
4373          /* pre-index or post-index */
4374          if (a->p) {
4375              /* post-index */
4376              tcg_gen_addi_i64(addr, addr, a->imm);
4377          }
4378          tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr);
4379      }
4380      return true;
4381  }
4382  
4383  TRANS_FEAT(STG, aa64_mte_insn_reg, do_STG, a, false, false)
4384  TRANS_FEAT(STZG, aa64_mte_insn_reg, do_STG, a, true, false)
4385  TRANS_FEAT(ST2G, aa64_mte_insn_reg, do_STG, a, false, true)
4386  TRANS_FEAT(STZ2G, aa64_mte_insn_reg, do_STG, a, true, true)
4387  
4388  typedef void SetFn(TCGv_env, TCGv_i32, TCGv_i32);
4389  
4390  static bool do_SET(DisasContext *s, arg_set *a, bool is_epilogue,
4391                     bool is_setg, SetFn fn)
4392  {
4393      int memidx;
4394      uint32_t syndrome, desc = 0;
4395  
4396      if (is_setg && !dc_isar_feature(aa64_mte, s)) {
4397          return false;
4398      }
4399  
4400      /*
4401       * UNPREDICTABLE cases: we choose to UNDEF, which allows
4402       * us to pull this check before the CheckMOPSEnabled() test
4403       * (which we do in the helper function)
4404       */
4405      if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd ||
4406          a->rd == 31 || a->rn == 31) {
4407          return false;
4408      }
4409  
4410      memidx = get_a64_user_mem_index(s, a->unpriv);
4411  
4412      /*
4413       * We pass option_a == true, matching our implementation;
4414       * we pass wrong_option == false: helper function may set that bit.
4415       */
4416      syndrome = syn_mop(true, is_setg, (a->nontemp << 1) | a->unpriv,
4417                         is_epilogue, false, true, a->rd, a->rs, a->rn);
4418  
4419      if (is_setg ? s->ata[a->unpriv] : s->mte_active[a->unpriv]) {
4420          /* We may need to do MTE tag checking, so assemble the descriptor */
4421          desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
4422          desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
4423          desc = FIELD_DP32(desc, MTEDESC, WRITE, true);
4424          /* SIZEM1 and ALIGN we leave 0 (byte write) */
4425      }
4426      /* The helper function always needs the memidx even with MTE disabled */
4427      desc = FIELD_DP32(desc, MTEDESC, MIDX, memidx);
4428  
4429      /*
4430       * The helper needs the register numbers, but since they're in
4431       * the syndrome anyway, we let it extract them from there rather
4432       * than passing in an extra three integer arguments.
4433       */
4434      fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(desc));
4435      return true;
4436  }
4437  
4438  TRANS_FEAT(SETP, aa64_mops, do_SET, a, false, false, gen_helper_setp)
4439  TRANS_FEAT(SETM, aa64_mops, do_SET, a, false, false, gen_helper_setm)
4440  TRANS_FEAT(SETE, aa64_mops, do_SET, a, true, false, gen_helper_sete)
4441  TRANS_FEAT(SETGP, aa64_mops, do_SET, a, false, true, gen_helper_setgp)
4442  TRANS_FEAT(SETGM, aa64_mops, do_SET, a, false, true, gen_helper_setgm)
4443  TRANS_FEAT(SETGE, aa64_mops, do_SET, a, true, true, gen_helper_setge)
4444  
4445  typedef void CpyFn(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32);
4446  
4447  static bool do_CPY(DisasContext *s, arg_cpy *a, bool is_epilogue, CpyFn fn)
4448  {
4449      int rmemidx, wmemidx;
4450      uint32_t syndrome, rdesc = 0, wdesc = 0;
4451      bool wunpriv = extract32(a->options, 0, 1);
4452      bool runpriv = extract32(a->options, 1, 1);
4453  
4454      /*
4455       * UNPREDICTABLE cases: we choose to UNDEF, which allows
4456       * us to pull this check before the CheckMOPSEnabled() test
4457       * (which we do in the helper function)
4458       */
4459      if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd ||
4460          a->rd == 31 || a->rs == 31 || a->rn == 31) {
4461          return false;
4462      }
4463  
4464      rmemidx = get_a64_user_mem_index(s, runpriv);
4465      wmemidx = get_a64_user_mem_index(s, wunpriv);
4466  
4467      /*
4468       * We pass option_a == true, matching our implementation;
4469       * we pass wrong_option == false: helper function may set that bit.
4470       */
4471      syndrome = syn_mop(false, false, a->options, is_epilogue,
4472                         false, true, a->rd, a->rs, a->rn);
4473  
4474      /* If we need to do MTE tag checking, assemble the descriptors */
4475      if (s->mte_active[runpriv]) {
4476          rdesc = FIELD_DP32(rdesc, MTEDESC, TBI, s->tbid);
4477          rdesc = FIELD_DP32(rdesc, MTEDESC, TCMA, s->tcma);
4478      }
4479      if (s->mte_active[wunpriv]) {
4480          wdesc = FIELD_DP32(wdesc, MTEDESC, TBI, s->tbid);
4481          wdesc = FIELD_DP32(wdesc, MTEDESC, TCMA, s->tcma);
4482          wdesc = FIELD_DP32(wdesc, MTEDESC, WRITE, true);
4483      }
4484      /* The helper function needs these parts of the descriptor regardless */
4485      rdesc = FIELD_DP32(rdesc, MTEDESC, MIDX, rmemidx);
4486      wdesc = FIELD_DP32(wdesc, MTEDESC, MIDX, wmemidx);
4487  
4488      /*
4489       * The helper needs the register numbers, but since they're in
4490       * the syndrome anyway, we let it extract them from there rather
4491       * than passing in an extra three integer arguments.
4492       */
4493      fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(wdesc),
4494         tcg_constant_i32(rdesc));
4495      return true;
4496  }
4497  
4498  TRANS_FEAT(CPYP, aa64_mops, do_CPY, a, false, gen_helper_cpyp)
4499  TRANS_FEAT(CPYM, aa64_mops, do_CPY, a, false, gen_helper_cpym)
4500  TRANS_FEAT(CPYE, aa64_mops, do_CPY, a, true, gen_helper_cpye)
4501  TRANS_FEAT(CPYFP, aa64_mops, do_CPY, a, false, gen_helper_cpyfp)
4502  TRANS_FEAT(CPYFM, aa64_mops, do_CPY, a, false, gen_helper_cpyfm)
4503  TRANS_FEAT(CPYFE, aa64_mops, do_CPY, a, true, gen_helper_cpyfe)
4504  
4505  typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64);
4506  
4507  static bool gen_rri(DisasContext *s, arg_rri_sf *a,
4508                      bool rd_sp, bool rn_sp, ArithTwoOp *fn)
4509  {
4510      TCGv_i64 tcg_rn = rn_sp ? cpu_reg_sp(s, a->rn) : cpu_reg(s, a->rn);
4511      TCGv_i64 tcg_rd = rd_sp ? cpu_reg_sp(s, a->rd) : cpu_reg(s, a->rd);
4512      TCGv_i64 tcg_imm = tcg_constant_i64(a->imm);
4513  
4514      fn(tcg_rd, tcg_rn, tcg_imm);
4515      if (!a->sf) {
4516          tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4517      }
4518      return true;
4519  }
4520  
4521  /*
4522   * PC-rel. addressing
4523   */
4524  
4525  static bool trans_ADR(DisasContext *s, arg_ri *a)
4526  {
4527      gen_pc_plus_diff(s, cpu_reg(s, a->rd), a->imm);
4528      return true;
4529  }
4530  
4531  static bool trans_ADRP(DisasContext *s, arg_ri *a)
4532  {
4533      int64_t offset = (int64_t)a->imm << 12;
4534  
4535      /* The page offset is ok for CF_PCREL. */
4536      offset -= s->pc_curr & 0xfff;
4537      gen_pc_plus_diff(s, cpu_reg(s, a->rd), offset);
4538      return true;
4539  }
4540  
4541  /*
4542   * Add/subtract (immediate)
4543   */
4544  TRANS(ADD_i, gen_rri, a, 1, 1, tcg_gen_add_i64)
4545  TRANS(SUB_i, gen_rri, a, 1, 1, tcg_gen_sub_i64)
4546  TRANS(ADDS_i, gen_rri, a, 0, 1, a->sf ? gen_add64_CC : gen_add32_CC)
4547  TRANS(SUBS_i, gen_rri, a, 0, 1, a->sf ? gen_sub64_CC : gen_sub32_CC)
4548  
4549  /*
4550   * Add/subtract (immediate, with tags)
4551   */
4552  
4553  static bool gen_add_sub_imm_with_tags(DisasContext *s, arg_rri_tag *a,
4554                                        bool sub_op)
4555  {
4556      TCGv_i64 tcg_rn, tcg_rd;
4557      int imm;
4558  
4559      imm = a->uimm6 << LOG2_TAG_GRANULE;
4560      if (sub_op) {
4561          imm = -imm;
4562      }
4563  
4564      tcg_rn = cpu_reg_sp(s, a->rn);
4565      tcg_rd = cpu_reg_sp(s, a->rd);
4566  
4567      if (s->ata[0]) {
4568          gen_helper_addsubg(tcg_rd, tcg_env, tcg_rn,
4569                             tcg_constant_i32(imm),
4570                             tcg_constant_i32(a->uimm4));
4571      } else {
4572          tcg_gen_addi_i64(tcg_rd, tcg_rn, imm);
4573          gen_address_with_allocation_tag0(tcg_rd, tcg_rd);
4574      }
4575      return true;
4576  }
4577  
4578  TRANS_FEAT(ADDG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, false)
4579  TRANS_FEAT(SUBG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, true)
4580  
4581  /* The input should be a value in the bottom e bits (with higher
4582   * bits zero); returns that value replicated into every element
4583   * of size e in a 64 bit integer.
4584   */
4585  static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
4586  {
4587      assert(e != 0);
4588      while (e < 64) {
4589          mask |= mask << e;
4590          e *= 2;
4591      }
4592      return mask;
4593  }
4594  
4595  /*
4596   * Logical (immediate)
4597   */
4598  
4599  /*
4600   * Simplified variant of pseudocode DecodeBitMasks() for the case where we
4601   * only require the wmask. Returns false if the imms/immr/immn are a reserved
4602   * value (ie should cause a guest UNDEF exception), and true if they are
4603   * valid, in which case the decoded bit pattern is written to result.
4604   */
4605  bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
4606                              unsigned int imms, unsigned int immr)
4607  {
4608      uint64_t mask;
4609      unsigned e, levels, s, r;
4610      int len;
4611  
4612      assert(immn < 2 && imms < 64 && immr < 64);
4613  
4614      /* The bit patterns we create here are 64 bit patterns which
4615       * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
4616       * 64 bits each. Each element contains the same value: a run
4617       * of between 1 and e-1 non-zero bits, rotated within the
4618       * element by between 0 and e-1 bits.
4619       *
4620       * The element size and run length are encoded into immn (1 bit)
4621       * and imms (6 bits) as follows:
4622       * 64 bit elements: immn = 1, imms = <length of run - 1>
4623       * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
4624       * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
4625       *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
4626       *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
4627       *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
4628       * Notice that immn = 0, imms = 11111x is the only combination
4629       * not covered by one of the above options; this is reserved.
4630       * Further, <length of run - 1> all-ones is a reserved pattern.
4631       *
4632       * In all cases the rotation is by immr % e (and immr is 6 bits).
4633       */
4634  
4635      /* First determine the element size */
4636      len = 31 - clz32((immn << 6) | (~imms & 0x3f));
4637      if (len < 1) {
4638          /* This is the immn == 0, imms == 0x11111x case */
4639          return false;
4640      }
4641      e = 1 << len;
4642  
4643      levels = e - 1;
4644      s = imms & levels;
4645      r = immr & levels;
4646  
4647      if (s == levels) {
4648          /* <length of run - 1> mustn't be all-ones. */
4649          return false;
4650      }
4651  
4652      /* Create the value of one element: s+1 set bits rotated
4653       * by r within the element (which is e bits wide)...
4654       */
4655      mask = MAKE_64BIT_MASK(0, s + 1);
4656      if (r) {
4657          mask = (mask >> r) | (mask << (e - r));
4658          mask &= MAKE_64BIT_MASK(0, e);
4659      }
4660      /* ...then replicate the element over the whole 64 bit value */
4661      mask = bitfield_replicate(mask, e);
4662      *result = mask;
4663      return true;
4664  }
4665  
4666  static bool gen_rri_log(DisasContext *s, arg_rri_log *a, bool set_cc,
4667                          void (*fn)(TCGv_i64, TCGv_i64, int64_t))
4668  {
4669      TCGv_i64 tcg_rd, tcg_rn;
4670      uint64_t imm;
4671  
4672      /* Some immediate field values are reserved. */
4673      if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
4674                                  extract32(a->dbm, 0, 6),
4675                                  extract32(a->dbm, 6, 6))) {
4676          return false;
4677      }
4678      if (!a->sf) {
4679          imm &= 0xffffffffull;
4680      }
4681  
4682      tcg_rd = set_cc ? cpu_reg(s, a->rd) : cpu_reg_sp(s, a->rd);
4683      tcg_rn = cpu_reg(s, a->rn);
4684  
4685      fn(tcg_rd, tcg_rn, imm);
4686      if (set_cc) {
4687          gen_logic_CC(a->sf, tcg_rd);
4688      }
4689      if (!a->sf) {
4690          tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4691      }
4692      return true;
4693  }
4694  
4695  TRANS(AND_i, gen_rri_log, a, false, tcg_gen_andi_i64)
4696  TRANS(ORR_i, gen_rri_log, a, false, tcg_gen_ori_i64)
4697  TRANS(EOR_i, gen_rri_log, a, false, tcg_gen_xori_i64)
4698  TRANS(ANDS_i, gen_rri_log, a, true, tcg_gen_andi_i64)
4699  
4700  /*
4701   * Move wide (immediate)
4702   */
4703  
4704  static bool trans_MOVZ(DisasContext *s, arg_movw *a)
4705  {
4706      int pos = a->hw << 4;
4707      tcg_gen_movi_i64(cpu_reg(s, a->rd), (uint64_t)a->imm << pos);
4708      return true;
4709  }
4710  
4711  static bool trans_MOVN(DisasContext *s, arg_movw *a)
4712  {
4713      int pos = a->hw << 4;
4714      uint64_t imm = a->imm;
4715  
4716      imm = ~(imm << pos);
4717      if (!a->sf) {
4718          imm = (uint32_t)imm;
4719      }
4720      tcg_gen_movi_i64(cpu_reg(s, a->rd), imm);
4721      return true;
4722  }
4723  
4724  static bool trans_MOVK(DisasContext *s, arg_movw *a)
4725  {
4726      int pos = a->hw << 4;
4727      TCGv_i64 tcg_rd, tcg_im;
4728  
4729      tcg_rd = cpu_reg(s, a->rd);
4730      tcg_im = tcg_constant_i64(a->imm);
4731      tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_im, pos, 16);
4732      if (!a->sf) {
4733          tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4734      }
4735      return true;
4736  }
4737  
4738  /*
4739   * Bitfield
4740   */
4741  
4742  static bool trans_SBFM(DisasContext *s, arg_SBFM *a)
4743  {
4744      TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4745      TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4746      unsigned int bitsize = a->sf ? 64 : 32;
4747      unsigned int ri = a->immr;
4748      unsigned int si = a->imms;
4749      unsigned int pos, len;
4750  
4751      if (si >= ri) {
4752          /* Wd<s-r:0> = Wn<s:r> */
4753          len = (si - ri) + 1;
4754          tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
4755          if (!a->sf) {
4756              tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4757          }
4758      } else {
4759          /* Wd<32+s-r,32-r> = Wn<s:0> */
4760          len = si + 1;
4761          pos = (bitsize - ri) & (bitsize - 1);
4762  
4763          if (len < ri) {
4764              /*
4765               * Sign extend the destination field from len to fill the
4766               * balance of the word.  Let the deposit below insert all
4767               * of those sign bits.
4768               */
4769              tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
4770              len = ri;
4771          }
4772  
4773          /*
4774           * We start with zero, and we haven't modified any bits outside
4775           * bitsize, therefore no final zero-extension is unneeded for !sf.
4776           */
4777          tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4778      }
4779      return true;
4780  }
4781  
4782  static bool trans_UBFM(DisasContext *s, arg_UBFM *a)
4783  {
4784      TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4785      TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4786      unsigned int bitsize = a->sf ? 64 : 32;
4787      unsigned int ri = a->immr;
4788      unsigned int si = a->imms;
4789      unsigned int pos, len;
4790  
4791      tcg_rd = cpu_reg(s, a->rd);
4792      tcg_tmp = read_cpu_reg(s, a->rn, 1);
4793  
4794      if (si >= ri) {
4795          /* Wd<s-r:0> = Wn<s:r> */
4796          len = (si - ri) + 1;
4797          tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
4798      } else {
4799          /* Wd<32+s-r,32-r> = Wn<s:0> */
4800          len = si + 1;
4801          pos = (bitsize - ri) & (bitsize - 1);
4802          tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4803      }
4804      return true;
4805  }
4806  
4807  static bool trans_BFM(DisasContext *s, arg_BFM *a)
4808  {
4809      TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4810      TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4811      unsigned int bitsize = a->sf ? 64 : 32;
4812      unsigned int ri = a->immr;
4813      unsigned int si = a->imms;
4814      unsigned int pos, len;
4815  
4816      tcg_rd = cpu_reg(s, a->rd);
4817      tcg_tmp = read_cpu_reg(s, a->rn, 1);
4818  
4819      if (si >= ri) {
4820          /* Wd<s-r:0> = Wn<s:r> */
4821          tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
4822          len = (si - ri) + 1;
4823          pos = 0;
4824      } else {
4825          /* Wd<32+s-r,32-r> = Wn<s:0> */
4826          len = si + 1;
4827          pos = (bitsize - ri) & (bitsize - 1);
4828      }
4829  
4830      tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
4831      if (!a->sf) {
4832          tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4833      }
4834      return true;
4835  }
4836  
4837  static bool trans_EXTR(DisasContext *s, arg_extract *a)
4838  {
4839      TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
4840  
4841      tcg_rd = cpu_reg(s, a->rd);
4842  
4843      if (unlikely(a->imm == 0)) {
4844          /*
4845           * tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
4846           * so an extract from bit 0 is a special case.
4847           */
4848          if (a->sf) {
4849              tcg_gen_mov_i64(tcg_rd, cpu_reg(s, a->rm));
4850          } else {
4851              tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, a->rm));
4852          }
4853      } else {
4854          tcg_rm = cpu_reg(s, a->rm);
4855          tcg_rn = cpu_reg(s, a->rn);
4856  
4857          if (a->sf) {
4858              /* Specialization to ROR happens in EXTRACT2.  */
4859              tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, a->imm);
4860          } else {
4861              TCGv_i32 t0 = tcg_temp_new_i32();
4862  
4863              tcg_gen_extrl_i64_i32(t0, tcg_rm);
4864              if (a->rm == a->rn) {
4865                  tcg_gen_rotri_i32(t0, t0, a->imm);
4866              } else {
4867                  TCGv_i32 t1 = tcg_temp_new_i32();
4868                  tcg_gen_extrl_i64_i32(t1, tcg_rn);
4869                  tcg_gen_extract2_i32(t0, t0, t1, a->imm);
4870              }
4871              tcg_gen_extu_i32_i64(tcg_rd, t0);
4872          }
4873      }
4874      return true;
4875  }
4876  
4877  static bool trans_TBL_TBX(DisasContext *s, arg_TBL_TBX *a)
4878  {
4879      if (fp_access_check(s)) {
4880          int len = (a->len + 1) * 16;
4881  
4882          tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
4883                             vec_full_reg_offset(s, a->rm), tcg_env,
4884                             a->q ? 16 : 8, vec_full_reg_size(s),
4885                             (len << 6) | (a->tbx << 5) | a->rn,
4886                             gen_helper_simd_tblx);
4887      }
4888      return true;
4889  }
4890  
4891  typedef int simd_permute_idx_fn(int i, int part, int elements);
4892  
4893  static bool do_simd_permute(DisasContext *s, arg_qrrr_e *a,
4894                              simd_permute_idx_fn *fn, int part)
4895  {
4896      MemOp esz = a->esz;
4897      int datasize = a->q ? 16 : 8;
4898      int elements = datasize >> esz;
4899      TCGv_i64 tcg_res[2], tcg_ele;
4900  
4901      if (esz == MO_64 && !a->q) {
4902          return false;
4903      }
4904      if (!fp_access_check(s)) {
4905          return true;
4906      }
4907  
4908      tcg_res[0] = tcg_temp_new_i64();
4909      tcg_res[1] = a->q ? tcg_temp_new_i64() : NULL;
4910      tcg_ele = tcg_temp_new_i64();
4911  
4912      for (int i = 0; i < elements; i++) {
4913          int o, w, idx;
4914  
4915          idx = fn(i, part, elements);
4916          read_vec_element(s, tcg_ele, (idx & elements ? a->rm : a->rn),
4917                           idx & (elements - 1), esz);
4918  
4919          w = (i << (esz + 3)) / 64;
4920          o = (i << (esz + 3)) % 64;
4921          if (o == 0) {
4922              tcg_gen_mov_i64(tcg_res[w], tcg_ele);
4923          } else {
4924              tcg_gen_deposit_i64(tcg_res[w], tcg_res[w], tcg_ele, o, 8 << esz);
4925          }
4926      }
4927  
4928      for (int i = a->q; i >= 0; --i) {
4929          write_vec_element(s, tcg_res[i], a->rd, i, MO_64);
4930      }
4931      clear_vec_high(s, a->q, a->rd);
4932      return true;
4933  }
4934  
4935  static int permute_load_uzp(int i, int part, int elements)
4936  {
4937      return 2 * i + part;
4938  }
4939  
4940  TRANS(UZP1, do_simd_permute, a, permute_load_uzp, 0)
4941  TRANS(UZP2, do_simd_permute, a, permute_load_uzp, 1)
4942  
4943  static int permute_load_trn(int i, int part, int elements)
4944  {
4945      return (i & 1) * elements + (i & ~1) + part;
4946  }
4947  
4948  TRANS(TRN1, do_simd_permute, a, permute_load_trn, 0)
4949  TRANS(TRN2, do_simd_permute, a, permute_load_trn, 1)
4950  
4951  static int permute_load_zip(int i, int part, int elements)
4952  {
4953      return (i & 1) * elements + ((part * elements + i) >> 1);
4954  }
4955  
4956  TRANS(ZIP1, do_simd_permute, a, permute_load_zip, 0)
4957  TRANS(ZIP2, do_simd_permute, a, permute_load_zip, 1)
4958  
4959  /*
4960   * Cryptographic AES, SHA, SHA512
4961   */
4962  
4963  TRANS_FEAT(AESE, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aese)
4964  TRANS_FEAT(AESD, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aesd)
4965  TRANS_FEAT(AESMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesmc)
4966  TRANS_FEAT(AESIMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesimc)
4967  
4968  TRANS_FEAT(SHA1C, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1c)
4969  TRANS_FEAT(SHA1P, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1p)
4970  TRANS_FEAT(SHA1M, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1m)
4971  TRANS_FEAT(SHA1SU0, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1su0)
4972  
4973  TRANS_FEAT(SHA256H, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h)
4974  TRANS_FEAT(SHA256H2, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h2)
4975  TRANS_FEAT(SHA256SU1, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256su1)
4976  
4977  TRANS_FEAT(SHA1H, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1h)
4978  TRANS_FEAT(SHA1SU1, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1su1)
4979  TRANS_FEAT(SHA256SU0, aa64_sha256, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha256su0)
4980  
4981  TRANS_FEAT(SHA512H, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h)
4982  TRANS_FEAT(SHA512H2, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h2)
4983  TRANS_FEAT(SHA512SU1, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512su1)
4984  TRANS_FEAT(RAX1, aa64_sha3, do_gvec_fn3, a, gen_gvec_rax1)
4985  TRANS_FEAT(SM3PARTW1, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw1)
4986  TRANS_FEAT(SM3PARTW2, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw2)
4987  TRANS_FEAT(SM4EKEY, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4ekey)
4988  
4989  TRANS_FEAT(SHA512SU0, aa64_sha512, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha512su0)
4990  TRANS_FEAT(SM4E, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4e)
4991  
4992  TRANS_FEAT(EOR3, aa64_sha3, do_gvec_fn4, a, gen_gvec_eor3)
4993  TRANS_FEAT(BCAX, aa64_sha3, do_gvec_fn4, a, gen_gvec_bcax)
4994  
4995  static bool trans_SM3SS1(DisasContext *s, arg_SM3SS1 *a)
4996  {
4997      if (!dc_isar_feature(aa64_sm3, s)) {
4998          return false;
4999      }
5000      if (fp_access_check(s)) {
5001          TCGv_i32 tcg_op1 = tcg_temp_new_i32();
5002          TCGv_i32 tcg_op2 = tcg_temp_new_i32();
5003          TCGv_i32 tcg_op3 = tcg_temp_new_i32();
5004          TCGv_i32 tcg_res = tcg_temp_new_i32();
5005  
5006          read_vec_element_i32(s, tcg_op1, a->rn, 3, MO_32);
5007          read_vec_element_i32(s, tcg_op2, a->rm, 3, MO_32);
5008          read_vec_element_i32(s, tcg_op3, a->ra, 3, MO_32);
5009  
5010          tcg_gen_rotri_i32(tcg_res, tcg_op1, 20);
5011          tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2);
5012          tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3);
5013          tcg_gen_rotri_i32(tcg_res, tcg_res, 25);
5014  
5015          /* Clear the whole register first, then store bits [127:96]. */
5016          clear_vec(s, a->rd);
5017          write_vec_element_i32(s, tcg_res, a->rd, 3, MO_32);
5018      }
5019      return true;
5020  }
5021  
5022  static bool do_crypto3i(DisasContext *s, arg_crypto3i *a, gen_helper_gvec_3 *fn)
5023  {
5024      if (fp_access_check(s)) {
5025          gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->imm, fn);
5026      }
5027      return true;
5028  }
5029  TRANS_FEAT(SM3TT1A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1a)
5030  TRANS_FEAT(SM3TT1B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1b)
5031  TRANS_FEAT(SM3TT2A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2a)
5032  TRANS_FEAT(SM3TT2B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2b)
5033  
5034  static bool trans_XAR(DisasContext *s, arg_XAR *a)
5035  {
5036      if (!dc_isar_feature(aa64_sha3, s)) {
5037          return false;
5038      }
5039      if (fp_access_check(s)) {
5040          gen_gvec_xar(MO_64, vec_full_reg_offset(s, a->rd),
5041                       vec_full_reg_offset(s, a->rn),
5042                       vec_full_reg_offset(s, a->rm), a->imm, 16,
5043                       vec_full_reg_size(s));
5044      }
5045      return true;
5046  }
5047  
5048  /*
5049   * Advanced SIMD copy
5050   */
5051  
5052  static bool decode_esz_idx(int imm, MemOp *pesz, unsigned *pidx)
5053  {
5054      unsigned esz = ctz32(imm);
5055      if (esz <= MO_64) {
5056          *pesz = esz;
5057          *pidx = imm >> (esz + 1);
5058          return true;
5059      }
5060      return false;
5061  }
5062  
5063  static bool trans_DUP_element_s(DisasContext *s, arg_DUP_element_s *a)
5064  {
5065      MemOp esz;
5066      unsigned idx;
5067  
5068      if (!decode_esz_idx(a->imm, &esz, &idx)) {
5069          return false;
5070      }
5071      if (fp_access_check(s)) {
5072          /*
5073           * This instruction just extracts the specified element and
5074           * zero-extends it into the bottom of the destination register.
5075           */
5076          TCGv_i64 tmp = tcg_temp_new_i64();
5077          read_vec_element(s, tmp, a->rn, idx, esz);
5078          write_fp_dreg(s, a->rd, tmp);
5079      }
5080      return true;
5081  }
5082  
5083  static bool trans_DUP_element_v(DisasContext *s, arg_DUP_element_v *a)
5084  {
5085      MemOp esz;
5086      unsigned idx;
5087  
5088      if (!decode_esz_idx(a->imm, &esz, &idx)) {
5089          return false;
5090      }
5091      if (esz == MO_64 && !a->q) {
5092          return false;
5093      }
5094      if (fp_access_check(s)) {
5095          tcg_gen_gvec_dup_mem(esz, vec_full_reg_offset(s, a->rd),
5096                               vec_reg_offset(s, a->rn, idx, esz),
5097                               a->q ? 16 : 8, vec_full_reg_size(s));
5098      }
5099      return true;
5100  }
5101  
5102  static bool trans_DUP_general(DisasContext *s, arg_DUP_general *a)
5103  {
5104      MemOp esz;
5105      unsigned idx;
5106  
5107      if (!decode_esz_idx(a->imm, &esz, &idx)) {
5108          return false;
5109      }
5110      if (esz == MO_64 && !a->q) {
5111          return false;
5112      }
5113      if (fp_access_check(s)) {
5114          tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
5115                               a->q ? 16 : 8, vec_full_reg_size(s),
5116                               cpu_reg(s, a->rn));
5117      }
5118      return true;
5119  }
5120  
5121  static bool do_smov_umov(DisasContext *s, arg_SMOV *a, MemOp is_signed)
5122  {
5123      MemOp esz;
5124      unsigned idx;
5125  
5126      if (!decode_esz_idx(a->imm, &esz, &idx)) {
5127          return false;
5128      }
5129      if (is_signed) {
5130          if (esz == MO_64 || (esz == MO_32 && !a->q)) {
5131              return false;
5132          }
5133      } else {
5134          if (esz == MO_64 ? !a->q : a->q) {
5135              return false;
5136          }
5137      }
5138      if (fp_access_check(s)) {
5139          TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
5140          read_vec_element(s, tcg_rd, a->rn, idx, esz | is_signed);
5141          if (is_signed && !a->q) {
5142              tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5143          }
5144      }
5145      return true;
5146  }
5147  
5148  TRANS(SMOV, do_smov_umov, a, MO_SIGN)
5149  TRANS(UMOV, do_smov_umov, a, 0)
5150  
5151  static bool trans_INS_general(DisasContext *s, arg_INS_general *a)
5152  {
5153      MemOp esz;
5154      unsigned idx;
5155  
5156      if (!decode_esz_idx(a->imm, &esz, &idx)) {
5157          return false;
5158      }
5159      if (fp_access_check(s)) {
5160          write_vec_element(s, cpu_reg(s, a->rn), a->rd, idx, esz);
5161          clear_vec_high(s, true, a->rd);
5162      }
5163      return true;
5164  }
5165  
5166  static bool trans_INS_element(DisasContext *s, arg_INS_element *a)
5167  {
5168      MemOp esz;
5169      unsigned didx, sidx;
5170  
5171      if (!decode_esz_idx(a->di, &esz, &didx)) {
5172          return false;
5173      }
5174      sidx = a->si >> esz;
5175      if (fp_access_check(s)) {
5176          TCGv_i64 tmp = tcg_temp_new_i64();
5177  
5178          read_vec_element(s, tmp, a->rn, sidx, esz);
5179          write_vec_element(s, tmp, a->rd, didx, esz);
5180  
5181          /* INS is considered a 128-bit write for SVE. */
5182          clear_vec_high(s, true, a->rd);
5183      }
5184      return true;
5185  }
5186  
5187  /*
5188   * Advanced SIMD three same
5189   */
5190  
5191  typedef struct FPScalar {
5192      void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
5193      void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
5194      void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
5195  } FPScalar;
5196  
5197  static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a,
5198                                          const FPScalar *f, int mergereg,
5199                                          ARMFPStatusFlavour fpsttype)
5200  {
5201      switch (a->esz) {
5202      case MO_64:
5203          if (fp_access_check(s)) {
5204              TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5205              TCGv_i64 t1 = read_fp_dreg(s, a->rm);
5206              f->gen_d(t0, t0, t1, fpstatus_ptr(fpsttype));
5207              write_fp_dreg_merging(s, a->rd, mergereg, t0);
5208          }
5209          break;
5210      case MO_32:
5211          if (fp_access_check(s)) {
5212              TCGv_i32 t0 = read_fp_sreg(s, a->rn);
5213              TCGv_i32 t1 = read_fp_sreg(s, a->rm);
5214              f->gen_s(t0, t0, t1, fpstatus_ptr(fpsttype));
5215              write_fp_sreg_merging(s, a->rd, mergereg, t0);
5216          }
5217          break;
5218      case MO_16:
5219          if (!dc_isar_feature(aa64_fp16, s)) {
5220              return false;
5221          }
5222          if (fp_access_check(s)) {
5223              TCGv_i32 t0 = read_fp_hreg(s, a->rn);
5224              TCGv_i32 t1 = read_fp_hreg(s, a->rm);
5225              f->gen_h(t0, t0, t1, fpstatus_ptr(fpsttype));
5226              write_fp_hreg_merging(s, a->rd, mergereg, t0);
5227          }
5228          break;
5229      default:
5230          return false;
5231      }
5232      return true;
5233  }
5234  
5235  static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f,
5236                            int mergereg)
5237  {
5238      return do_fp3_scalar_with_fpsttype(s, a, f, mergereg,
5239                                         a->esz == MO_16 ?
5240                                         FPST_A64_F16 : FPST_A64);
5241  }
5242  
5243  static bool do_fp3_scalar_ah_2fn(DisasContext *s, arg_rrr_e *a,
5244                                   const FPScalar *fnormal, const FPScalar *fah,
5245                                   int mergereg)
5246  {
5247      return do_fp3_scalar_with_fpsttype(s, a, s->fpcr_ah ? fah : fnormal,
5248                                         mergereg, select_ah_fpst(s, a->esz));
5249  }
5250  
5251  /* Some insns need to call different helpers when FPCR.AH == 1 */
5252  static bool do_fp3_scalar_2fn(DisasContext *s, arg_rrr_e *a,
5253                                const FPScalar *fnormal,
5254                                const FPScalar *fah,
5255                                int mergereg)
5256  {
5257      return do_fp3_scalar(s, a, s->fpcr_ah ? fah : fnormal, mergereg);
5258  }
5259  
5260  static const FPScalar f_scalar_fadd = {
5261      gen_helper_vfp_addh,
5262      gen_helper_vfp_adds,
5263      gen_helper_vfp_addd,
5264  };
5265  TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd, a->rn)
5266  
5267  static const FPScalar f_scalar_fsub = {
5268      gen_helper_vfp_subh,
5269      gen_helper_vfp_subs,
5270      gen_helper_vfp_subd,
5271  };
5272  TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub, a->rn)
5273  
5274  static const FPScalar f_scalar_fdiv = {
5275      gen_helper_vfp_divh,
5276      gen_helper_vfp_divs,
5277      gen_helper_vfp_divd,
5278  };
5279  TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv, a->rn)
5280  
5281  static const FPScalar f_scalar_fmul = {
5282      gen_helper_vfp_mulh,
5283      gen_helper_vfp_muls,
5284      gen_helper_vfp_muld,
5285  };
5286  TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul, a->rn)
5287  
5288  static const FPScalar f_scalar_fmax = {
5289      gen_helper_vfp_maxh,
5290      gen_helper_vfp_maxs,
5291      gen_helper_vfp_maxd,
5292  };
5293  static const FPScalar f_scalar_fmax_ah = {
5294      gen_helper_vfp_ah_maxh,
5295      gen_helper_vfp_ah_maxs,
5296      gen_helper_vfp_ah_maxd,
5297  };
5298  TRANS(FMAX_s, do_fp3_scalar_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah, a->rn)
5299  
5300  static const FPScalar f_scalar_fmin = {
5301      gen_helper_vfp_minh,
5302      gen_helper_vfp_mins,
5303      gen_helper_vfp_mind,
5304  };
5305  static const FPScalar f_scalar_fmin_ah = {
5306      gen_helper_vfp_ah_minh,
5307      gen_helper_vfp_ah_mins,
5308      gen_helper_vfp_ah_mind,
5309  };
5310  TRANS(FMIN_s, do_fp3_scalar_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah, a->rn)
5311  
5312  static const FPScalar f_scalar_fmaxnm = {
5313      gen_helper_vfp_maxnumh,
5314      gen_helper_vfp_maxnums,
5315      gen_helper_vfp_maxnumd,
5316  };
5317  TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm, a->rn)
5318  
5319  static const FPScalar f_scalar_fminnm = {
5320      gen_helper_vfp_minnumh,
5321      gen_helper_vfp_minnums,
5322      gen_helper_vfp_minnumd,
5323  };
5324  TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm, a->rn)
5325  
5326  static const FPScalar f_scalar_fmulx = {
5327      gen_helper_advsimd_mulxh,
5328      gen_helper_vfp_mulxs,
5329      gen_helper_vfp_mulxd,
5330  };
5331  TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx, a->rn)
5332  
5333  static void gen_fnmul_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5334  {
5335      gen_helper_vfp_mulh(d, n, m, s);
5336      gen_vfp_negh(d, d);
5337  }
5338  
5339  static void gen_fnmul_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5340  {
5341      gen_helper_vfp_muls(d, n, m, s);
5342      gen_vfp_negs(d, d);
5343  }
5344  
5345  static void gen_fnmul_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5346  {
5347      gen_helper_vfp_muld(d, n, m, s);
5348      gen_vfp_negd(d, d);
5349  }
5350  
5351  static void gen_fnmul_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5352  {
5353      gen_helper_vfp_mulh(d, n, m, s);
5354      gen_vfp_ah_negh(d, d);
5355  }
5356  
5357  static void gen_fnmul_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5358  {
5359      gen_helper_vfp_muls(d, n, m, s);
5360      gen_vfp_ah_negs(d, d);
5361  }
5362  
5363  static void gen_fnmul_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5364  {
5365      gen_helper_vfp_muld(d, n, m, s);
5366      gen_vfp_ah_negd(d, d);
5367  }
5368  
5369  static const FPScalar f_scalar_fnmul = {
5370      gen_fnmul_h,
5371      gen_fnmul_s,
5372      gen_fnmul_d,
5373  };
5374  static const FPScalar f_scalar_ah_fnmul = {
5375      gen_fnmul_ah_h,
5376      gen_fnmul_ah_s,
5377      gen_fnmul_ah_d,
5378  };
5379  TRANS(FNMUL_s, do_fp3_scalar_2fn, a, &f_scalar_fnmul, &f_scalar_ah_fnmul, a->rn)
5380  
5381  static const FPScalar f_scalar_fcmeq = {
5382      gen_helper_advsimd_ceq_f16,
5383      gen_helper_neon_ceq_f32,
5384      gen_helper_neon_ceq_f64,
5385  };
5386  TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq, a->rm)
5387  
5388  static const FPScalar f_scalar_fcmge = {
5389      gen_helper_advsimd_cge_f16,
5390      gen_helper_neon_cge_f32,
5391      gen_helper_neon_cge_f64,
5392  };
5393  TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge, a->rm)
5394  
5395  static const FPScalar f_scalar_fcmgt = {
5396      gen_helper_advsimd_cgt_f16,
5397      gen_helper_neon_cgt_f32,
5398      gen_helper_neon_cgt_f64,
5399  };
5400  TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt, a->rm)
5401  
5402  static const FPScalar f_scalar_facge = {
5403      gen_helper_advsimd_acge_f16,
5404      gen_helper_neon_acge_f32,
5405      gen_helper_neon_acge_f64,
5406  };
5407  TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge, a->rm)
5408  
5409  static const FPScalar f_scalar_facgt = {
5410      gen_helper_advsimd_acgt_f16,
5411      gen_helper_neon_acgt_f32,
5412      gen_helper_neon_acgt_f64,
5413  };
5414  TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt, a->rm)
5415  
5416  static void gen_fabd_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5417  {
5418      gen_helper_vfp_subh(d, n, m, s);
5419      gen_vfp_absh(d, d);
5420  }
5421  
5422  static void gen_fabd_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5423  {
5424      gen_helper_vfp_subs(d, n, m, s);
5425      gen_vfp_abss(d, d);
5426  }
5427  
5428  static void gen_fabd_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5429  {
5430      gen_helper_vfp_subd(d, n, m, s);
5431      gen_vfp_absd(d, d);
5432  }
5433  
5434  static void gen_fabd_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5435  {
5436      gen_helper_vfp_subh(d, n, m, s);
5437      gen_vfp_ah_absh(d, d);
5438  }
5439  
5440  static void gen_fabd_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5441  {
5442      gen_helper_vfp_subs(d, n, m, s);
5443      gen_vfp_ah_abss(d, d);
5444  }
5445  
5446  static void gen_fabd_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5447  {
5448      gen_helper_vfp_subd(d, n, m, s);
5449      gen_vfp_ah_absd(d, d);
5450  }
5451  
5452  static const FPScalar f_scalar_fabd = {
5453      gen_fabd_h,
5454      gen_fabd_s,
5455      gen_fabd_d,
5456  };
5457  static const FPScalar f_scalar_ah_fabd = {
5458      gen_fabd_ah_h,
5459      gen_fabd_ah_s,
5460      gen_fabd_ah_d,
5461  };
5462  TRANS(FABD_s, do_fp3_scalar_2fn, a, &f_scalar_fabd, &f_scalar_ah_fabd, a->rn)
5463  
5464  static const FPScalar f_scalar_frecps = {
5465      gen_helper_recpsf_f16,
5466      gen_helper_recpsf_f32,
5467      gen_helper_recpsf_f64,
5468  };
5469  static const FPScalar f_scalar_ah_frecps = {
5470      gen_helper_recpsf_ah_f16,
5471      gen_helper_recpsf_ah_f32,
5472      gen_helper_recpsf_ah_f64,
5473  };
5474  TRANS(FRECPS_s, do_fp3_scalar_ah_2fn, a,
5475        &f_scalar_frecps, &f_scalar_ah_frecps, a->rn)
5476  
5477  static const FPScalar f_scalar_frsqrts = {
5478      gen_helper_rsqrtsf_f16,
5479      gen_helper_rsqrtsf_f32,
5480      gen_helper_rsqrtsf_f64,
5481  };
5482  static const FPScalar f_scalar_ah_frsqrts = {
5483      gen_helper_rsqrtsf_ah_f16,
5484      gen_helper_rsqrtsf_ah_f32,
5485      gen_helper_rsqrtsf_ah_f64,
5486  };
5487  TRANS(FRSQRTS_s, do_fp3_scalar_ah_2fn, a,
5488        &f_scalar_frsqrts, &f_scalar_ah_frsqrts, a->rn)
5489  
5490  static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a,
5491                         const FPScalar *f, bool swap)
5492  {
5493      switch (a->esz) {
5494      case MO_64:
5495          if (fp_access_check(s)) {
5496              TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5497              TCGv_i64 t1 = tcg_constant_i64(0);
5498              if (swap) {
5499                  f->gen_d(t0, t1, t0, fpstatus_ptr(FPST_A64));
5500              } else {
5501                  f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
5502              }
5503              write_fp_dreg(s, a->rd, t0);
5504          }
5505          break;
5506      case MO_32:
5507          if (fp_access_check(s)) {
5508              TCGv_i32 t0 = read_fp_sreg(s, a->rn);
5509              TCGv_i32 t1 = tcg_constant_i32(0);
5510              if (swap) {
5511                  f->gen_s(t0, t1, t0, fpstatus_ptr(FPST_A64));
5512              } else {
5513                  f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
5514              }
5515              write_fp_sreg(s, a->rd, t0);
5516          }
5517          break;
5518      case MO_16:
5519          if (!dc_isar_feature(aa64_fp16, s)) {
5520              return false;
5521          }
5522          if (fp_access_check(s)) {
5523              TCGv_i32 t0 = read_fp_hreg(s, a->rn);
5524              TCGv_i32 t1 = tcg_constant_i32(0);
5525              if (swap) {
5526                  f->gen_h(t0, t1, t0, fpstatus_ptr(FPST_A64_F16));
5527              } else {
5528                  f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
5529              }
5530              write_fp_sreg(s, a->rd, t0);
5531          }
5532          break;
5533      default:
5534          return false;
5535      }
5536      return true;
5537  }
5538  
5539  TRANS(FCMEQ0_s, do_fcmp0_s, a, &f_scalar_fcmeq, false)
5540  TRANS(FCMGT0_s, do_fcmp0_s, a, &f_scalar_fcmgt, false)
5541  TRANS(FCMGE0_s, do_fcmp0_s, a, &f_scalar_fcmge, false)
5542  TRANS(FCMLT0_s, do_fcmp0_s, a, &f_scalar_fcmgt, true)
5543  TRANS(FCMLE0_s, do_fcmp0_s, a, &f_scalar_fcmge, true)
5544  
5545  static bool do_satacc_s(DisasContext *s, arg_rrr_e *a,
5546                  MemOp sgn_n, MemOp sgn_m,
5547                  void (*gen_bhs)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64, MemOp),
5548                  void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
5549  {
5550      TCGv_i64 t0, t1, t2, qc;
5551      MemOp esz = a->esz;
5552  
5553      if (!fp_access_check(s)) {
5554          return true;
5555      }
5556  
5557      t0 = tcg_temp_new_i64();
5558      t1 = tcg_temp_new_i64();
5559      t2 = tcg_temp_new_i64();
5560      qc = tcg_temp_new_i64();
5561      read_vec_element(s, t1, a->rn, 0, esz | sgn_n);
5562      read_vec_element(s, t2, a->rm, 0, esz | sgn_m);
5563      tcg_gen_ld_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc));
5564  
5565      if (esz == MO_64) {
5566          gen_d(t0, qc, t1, t2);
5567      } else {
5568          gen_bhs(t0, qc, t1, t2, esz);
5569          tcg_gen_ext_i64(t0, t0, esz);
5570      }
5571  
5572      write_fp_dreg(s, a->rd, t0);
5573      tcg_gen_st_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc));
5574      return true;
5575  }
5576  
5577  TRANS(SQADD_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqadd_bhs, gen_sqadd_d)
5578  TRANS(SQSUB_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqsub_bhs, gen_sqsub_d)
5579  TRANS(UQADD_s, do_satacc_s, a, 0, 0, gen_uqadd_bhs, gen_uqadd_d)
5580  TRANS(UQSUB_s, do_satacc_s, a, 0, 0, gen_uqsub_bhs, gen_uqsub_d)
5581  TRANS(SUQADD_s, do_satacc_s, a, MO_SIGN, 0, gen_suqadd_bhs, gen_suqadd_d)
5582  TRANS(USQADD_s, do_satacc_s, a, 0, MO_SIGN, gen_usqadd_bhs, gen_usqadd_d)
5583  
5584  static bool do_int3_scalar_d(DisasContext *s, arg_rrr_e *a,
5585                               void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64))
5586  {
5587      if (fp_access_check(s)) {
5588          TCGv_i64 t0 = tcg_temp_new_i64();
5589          TCGv_i64 t1 = tcg_temp_new_i64();
5590  
5591          read_vec_element(s, t0, a->rn, 0, MO_64);
5592          read_vec_element(s, t1, a->rm, 0, MO_64);
5593          fn(t0, t0, t1);
5594          write_fp_dreg(s, a->rd, t0);
5595      }
5596      return true;
5597  }
5598  
5599  TRANS(SSHL_s, do_int3_scalar_d, a, gen_sshl_i64)
5600  TRANS(USHL_s, do_int3_scalar_d, a, gen_ushl_i64)
5601  TRANS(SRSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_s64)
5602  TRANS(URSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_u64)
5603  TRANS(ADD_s, do_int3_scalar_d, a, tcg_gen_add_i64)
5604  TRANS(SUB_s, do_int3_scalar_d, a, tcg_gen_sub_i64)
5605  
5606  typedef struct ENVScalar2 {
5607      NeonGenTwoOpEnvFn *gen_bhs[3];
5608      NeonGenTwo64OpEnvFn *gen_d;
5609  } ENVScalar2;
5610  
5611  static bool do_env_scalar2(DisasContext *s, arg_rrr_e *a, const ENVScalar2 *f)
5612  {
5613      if (!fp_access_check(s)) {
5614          return true;
5615      }
5616      if (a->esz == MO_64) {
5617          TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5618          TCGv_i64 t1 = read_fp_dreg(s, a->rm);
5619          f->gen_d(t0, tcg_env, t0, t1);
5620          write_fp_dreg(s, a->rd, t0);
5621      } else {
5622          TCGv_i32 t0 = tcg_temp_new_i32();
5623          TCGv_i32 t1 = tcg_temp_new_i32();
5624  
5625          read_vec_element_i32(s, t0, a->rn, 0, a->esz);
5626          read_vec_element_i32(s, t1, a->rm, 0, a->esz);
5627          f->gen_bhs[a->esz](t0, tcg_env, t0, t1);
5628          write_fp_sreg(s, a->rd, t0);
5629      }
5630      return true;
5631  }
5632  
5633  static const ENVScalar2 f_scalar_sqshl = {
5634      { gen_helper_neon_qshl_s8,
5635        gen_helper_neon_qshl_s16,
5636        gen_helper_neon_qshl_s32 },
5637      gen_helper_neon_qshl_s64,
5638  };
5639  TRANS(SQSHL_s, do_env_scalar2, a, &f_scalar_sqshl)
5640  
5641  static const ENVScalar2 f_scalar_uqshl = {
5642      { gen_helper_neon_qshl_u8,
5643        gen_helper_neon_qshl_u16,
5644        gen_helper_neon_qshl_u32 },
5645      gen_helper_neon_qshl_u64,
5646  };
5647  TRANS(UQSHL_s, do_env_scalar2, a, &f_scalar_uqshl)
5648  
5649  static const ENVScalar2 f_scalar_sqrshl = {
5650      { gen_helper_neon_qrshl_s8,
5651        gen_helper_neon_qrshl_s16,
5652        gen_helper_neon_qrshl_s32 },
5653      gen_helper_neon_qrshl_s64,
5654  };
5655  TRANS(SQRSHL_s, do_env_scalar2, a, &f_scalar_sqrshl)
5656  
5657  static const ENVScalar2 f_scalar_uqrshl = {
5658      { gen_helper_neon_qrshl_u8,
5659        gen_helper_neon_qrshl_u16,
5660        gen_helper_neon_qrshl_u32 },
5661      gen_helper_neon_qrshl_u64,
5662  };
5663  TRANS(UQRSHL_s, do_env_scalar2, a, &f_scalar_uqrshl)
5664  
5665  static bool do_env_scalar2_hs(DisasContext *s, arg_rrr_e *a,
5666                                const ENVScalar2 *f)
5667  {
5668      if (a->esz == MO_16 || a->esz == MO_32) {
5669          return do_env_scalar2(s, a, f);
5670      }
5671      return false;
5672  }
5673  
5674  static const ENVScalar2 f_scalar_sqdmulh = {
5675      { NULL, gen_helper_neon_qdmulh_s16, gen_helper_neon_qdmulh_s32 }
5676  };
5677  TRANS(SQDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqdmulh)
5678  
5679  static const ENVScalar2 f_scalar_sqrdmulh = {
5680      { NULL, gen_helper_neon_qrdmulh_s16, gen_helper_neon_qrdmulh_s32 }
5681  };
5682  TRANS(SQRDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqrdmulh)
5683  
5684  typedef struct ENVScalar3 {
5685      NeonGenThreeOpEnvFn *gen_hs[2];
5686  } ENVScalar3;
5687  
5688  static bool do_env_scalar3_hs(DisasContext *s, arg_rrr_e *a,
5689                                const ENVScalar3 *f)
5690  {
5691      TCGv_i32 t0, t1, t2;
5692  
5693      if (a->esz != MO_16 && a->esz != MO_32) {
5694          return false;
5695      }
5696      if (!fp_access_check(s)) {
5697          return true;
5698      }
5699  
5700      t0 = tcg_temp_new_i32();
5701      t1 = tcg_temp_new_i32();
5702      t2 = tcg_temp_new_i32();
5703      read_vec_element_i32(s, t0, a->rn, 0, a->esz);
5704      read_vec_element_i32(s, t1, a->rm, 0, a->esz);
5705      read_vec_element_i32(s, t2, a->rd, 0, a->esz);
5706      f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2);
5707      write_fp_sreg(s, a->rd, t0);
5708      return true;
5709  }
5710  
5711  static const ENVScalar3 f_scalar_sqrdmlah = {
5712      { gen_helper_neon_qrdmlah_s16, gen_helper_neon_qrdmlah_s32 }
5713  };
5714  TRANS_FEAT(SQRDMLAH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlah)
5715  
5716  static const ENVScalar3 f_scalar_sqrdmlsh = {
5717      { gen_helper_neon_qrdmlsh_s16, gen_helper_neon_qrdmlsh_s32 }
5718  };
5719  TRANS_FEAT(SQRDMLSH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlsh)
5720  
5721  static bool do_cmop_d(DisasContext *s, arg_rrr_e *a, TCGCond cond)
5722  {
5723      if (fp_access_check(s)) {
5724          TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5725          TCGv_i64 t1 = read_fp_dreg(s, a->rm);
5726          tcg_gen_negsetcond_i64(cond, t0, t0, t1);
5727          write_fp_dreg(s, a->rd, t0);
5728      }
5729      return true;
5730  }
5731  
5732  TRANS(CMGT_s, do_cmop_d, a, TCG_COND_GT)
5733  TRANS(CMHI_s, do_cmop_d, a, TCG_COND_GTU)
5734  TRANS(CMGE_s, do_cmop_d, a, TCG_COND_GE)
5735  TRANS(CMHS_s, do_cmop_d, a, TCG_COND_GEU)
5736  TRANS(CMEQ_s, do_cmop_d, a, TCG_COND_EQ)
5737  TRANS(CMTST_s, do_cmop_d, a, TCG_COND_TSTNE)
5738  
5739  static bool do_fp3_vector_with_fpsttype(DisasContext *s, arg_qrrr_e *a,
5740                                          int data,
5741                                          gen_helper_gvec_3_ptr * const fns[3],
5742                                          ARMFPStatusFlavour fpsttype)
5743  {
5744      MemOp esz = a->esz;
5745      int check = fp_access_check_vector_hsd(s, a->q, esz);
5746  
5747      if (check <= 0) {
5748          return check == 0;
5749      }
5750  
5751      gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, fpsttype,
5752                        data, fns[esz - 1]);
5753      return true;
5754  }
5755  
5756  static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data,
5757                            gen_helper_gvec_3_ptr * const fns[3])
5758  {
5759      return do_fp3_vector_with_fpsttype(s, a, data, fns,
5760                                         a->esz == MO_16 ?
5761                                         FPST_A64_F16 : FPST_A64);
5762  }
5763  
5764  static bool do_fp3_vector_2fn(DisasContext *s, arg_qrrr_e *a, int data,
5765                                gen_helper_gvec_3_ptr * const fnormal[3],
5766                                gen_helper_gvec_3_ptr * const fah[3])
5767  {
5768      return do_fp3_vector(s, a, data, s->fpcr_ah ? fah : fnormal);
5769  }
5770  
5771  static bool do_fp3_vector_ah_2fn(DisasContext *s, arg_qrrr_e *a, int data,
5772                                   gen_helper_gvec_3_ptr * const fnormal[3],
5773                                   gen_helper_gvec_3_ptr * const fah[3])
5774  {
5775      return do_fp3_vector_with_fpsttype(s, a, data, s->fpcr_ah ? fah : fnormal,
5776                                         select_ah_fpst(s, a->esz));
5777  }
5778  
5779  static gen_helper_gvec_3_ptr * const f_vector_fadd[3] = {
5780      gen_helper_gvec_fadd_h,
5781      gen_helper_gvec_fadd_s,
5782      gen_helper_gvec_fadd_d,
5783  };
5784  TRANS(FADD_v, do_fp3_vector, a, 0, f_vector_fadd)
5785  
5786  static gen_helper_gvec_3_ptr * const f_vector_fsub[3] = {
5787      gen_helper_gvec_fsub_h,
5788      gen_helper_gvec_fsub_s,
5789      gen_helper_gvec_fsub_d,
5790  };
5791  TRANS(FSUB_v, do_fp3_vector, a, 0, f_vector_fsub)
5792  
5793  static gen_helper_gvec_3_ptr * const f_vector_fdiv[3] = {
5794      gen_helper_gvec_fdiv_h,
5795      gen_helper_gvec_fdiv_s,
5796      gen_helper_gvec_fdiv_d,
5797  };
5798  TRANS(FDIV_v, do_fp3_vector, a, 0, f_vector_fdiv)
5799  
5800  static gen_helper_gvec_3_ptr * const f_vector_fmul[3] = {
5801      gen_helper_gvec_fmul_h,
5802      gen_helper_gvec_fmul_s,
5803      gen_helper_gvec_fmul_d,
5804  };
5805  TRANS(FMUL_v, do_fp3_vector, a, 0, f_vector_fmul)
5806  
5807  static gen_helper_gvec_3_ptr * const f_vector_fmax[3] = {
5808      gen_helper_gvec_fmax_h,
5809      gen_helper_gvec_fmax_s,
5810      gen_helper_gvec_fmax_d,
5811  };
5812  static gen_helper_gvec_3_ptr * const f_vector_fmax_ah[3] = {
5813      gen_helper_gvec_ah_fmax_h,
5814      gen_helper_gvec_ah_fmax_s,
5815      gen_helper_gvec_ah_fmax_d,
5816  };
5817  TRANS(FMAX_v, do_fp3_vector_2fn, a, 0, f_vector_fmax, f_vector_fmax_ah)
5818  
5819  static gen_helper_gvec_3_ptr * const f_vector_fmin[3] = {
5820      gen_helper_gvec_fmin_h,
5821      gen_helper_gvec_fmin_s,
5822      gen_helper_gvec_fmin_d,
5823  };
5824  static gen_helper_gvec_3_ptr * const f_vector_fmin_ah[3] = {
5825      gen_helper_gvec_ah_fmin_h,
5826      gen_helper_gvec_ah_fmin_s,
5827      gen_helper_gvec_ah_fmin_d,
5828  };
5829  TRANS(FMIN_v, do_fp3_vector_2fn, a, 0, f_vector_fmin, f_vector_fmin_ah)
5830  
5831  static gen_helper_gvec_3_ptr * const f_vector_fmaxnm[3] = {
5832      gen_helper_gvec_fmaxnum_h,
5833      gen_helper_gvec_fmaxnum_s,
5834      gen_helper_gvec_fmaxnum_d,
5835  };
5836  TRANS(FMAXNM_v, do_fp3_vector, a, 0, f_vector_fmaxnm)
5837  
5838  static gen_helper_gvec_3_ptr * const f_vector_fminnm[3] = {
5839      gen_helper_gvec_fminnum_h,
5840      gen_helper_gvec_fminnum_s,
5841      gen_helper_gvec_fminnum_d,
5842  };
5843  TRANS(FMINNM_v, do_fp3_vector, a, 0, f_vector_fminnm)
5844  
5845  static gen_helper_gvec_3_ptr * const f_vector_fmulx[3] = {
5846      gen_helper_gvec_fmulx_h,
5847      gen_helper_gvec_fmulx_s,
5848      gen_helper_gvec_fmulx_d,
5849  };
5850  TRANS(FMULX_v, do_fp3_vector, a, 0, f_vector_fmulx)
5851  
5852  static gen_helper_gvec_3_ptr * const f_vector_fmla[3] = {
5853      gen_helper_gvec_vfma_h,
5854      gen_helper_gvec_vfma_s,
5855      gen_helper_gvec_vfma_d,
5856  };
5857  TRANS(FMLA_v, do_fp3_vector, a, 0, f_vector_fmla)
5858  
5859  static gen_helper_gvec_3_ptr * const f_vector_fmls[3] = {
5860      gen_helper_gvec_vfms_h,
5861      gen_helper_gvec_vfms_s,
5862      gen_helper_gvec_vfms_d,
5863  };
5864  static gen_helper_gvec_3_ptr * const f_vector_fmls_ah[3] = {
5865      gen_helper_gvec_ah_vfms_h,
5866      gen_helper_gvec_ah_vfms_s,
5867      gen_helper_gvec_ah_vfms_d,
5868  };
5869  TRANS(FMLS_v, do_fp3_vector_2fn, a, 0, f_vector_fmls, f_vector_fmls_ah)
5870  
5871  static gen_helper_gvec_3_ptr * const f_vector_fcmeq[3] = {
5872      gen_helper_gvec_fceq_h,
5873      gen_helper_gvec_fceq_s,
5874      gen_helper_gvec_fceq_d,
5875  };
5876  TRANS(FCMEQ_v, do_fp3_vector, a, 0, f_vector_fcmeq)
5877  
5878  static gen_helper_gvec_3_ptr * const f_vector_fcmge[3] = {
5879      gen_helper_gvec_fcge_h,
5880      gen_helper_gvec_fcge_s,
5881      gen_helper_gvec_fcge_d,
5882  };
5883  TRANS(FCMGE_v, do_fp3_vector, a, 0, f_vector_fcmge)
5884  
5885  static gen_helper_gvec_3_ptr * const f_vector_fcmgt[3] = {
5886      gen_helper_gvec_fcgt_h,
5887      gen_helper_gvec_fcgt_s,
5888      gen_helper_gvec_fcgt_d,
5889  };
5890  TRANS(FCMGT_v, do_fp3_vector, a, 0, f_vector_fcmgt)
5891  
5892  static gen_helper_gvec_3_ptr * const f_vector_facge[3] = {
5893      gen_helper_gvec_facge_h,
5894      gen_helper_gvec_facge_s,
5895      gen_helper_gvec_facge_d,
5896  };
5897  TRANS(FACGE_v, do_fp3_vector, a, 0, f_vector_facge)
5898  
5899  static gen_helper_gvec_3_ptr * const f_vector_facgt[3] = {
5900      gen_helper_gvec_facgt_h,
5901      gen_helper_gvec_facgt_s,
5902      gen_helper_gvec_facgt_d,
5903  };
5904  TRANS(FACGT_v, do_fp3_vector, a, 0, f_vector_facgt)
5905  
5906  static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = {
5907      gen_helper_gvec_fabd_h,
5908      gen_helper_gvec_fabd_s,
5909      gen_helper_gvec_fabd_d,
5910  };
5911  static gen_helper_gvec_3_ptr * const f_vector_ah_fabd[3] = {
5912      gen_helper_gvec_ah_fabd_h,
5913      gen_helper_gvec_ah_fabd_s,
5914      gen_helper_gvec_ah_fabd_d,
5915  };
5916  TRANS(FABD_v, do_fp3_vector_2fn, a, 0, f_vector_fabd, f_vector_ah_fabd)
5917  
5918  static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = {
5919      gen_helper_gvec_recps_h,
5920      gen_helper_gvec_recps_s,
5921      gen_helper_gvec_recps_d,
5922  };
5923  static gen_helper_gvec_3_ptr * const f_vector_ah_frecps[3] = {
5924      gen_helper_gvec_ah_recps_h,
5925      gen_helper_gvec_ah_recps_s,
5926      gen_helper_gvec_ah_recps_d,
5927  };
5928  TRANS(FRECPS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frecps, f_vector_ah_frecps)
5929  
5930  static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = {
5931      gen_helper_gvec_rsqrts_h,
5932      gen_helper_gvec_rsqrts_s,
5933      gen_helper_gvec_rsqrts_d,
5934  };
5935  static gen_helper_gvec_3_ptr * const f_vector_ah_frsqrts[3] = {
5936      gen_helper_gvec_ah_rsqrts_h,
5937      gen_helper_gvec_ah_rsqrts_s,
5938      gen_helper_gvec_ah_rsqrts_d,
5939  };
5940  TRANS(FRSQRTS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frsqrts, f_vector_ah_frsqrts)
5941  
5942  static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = {
5943      gen_helper_gvec_faddp_h,
5944      gen_helper_gvec_faddp_s,
5945      gen_helper_gvec_faddp_d,
5946  };
5947  TRANS(FADDP_v, do_fp3_vector, a, 0, f_vector_faddp)
5948  
5949  static gen_helper_gvec_3_ptr * const f_vector_fmaxp[3] = {
5950      gen_helper_gvec_fmaxp_h,
5951      gen_helper_gvec_fmaxp_s,
5952      gen_helper_gvec_fmaxp_d,
5953  };
5954  static gen_helper_gvec_3_ptr * const f_vector_ah_fmaxp[3] = {
5955      gen_helper_gvec_ah_fmaxp_h,
5956      gen_helper_gvec_ah_fmaxp_s,
5957      gen_helper_gvec_ah_fmaxp_d,
5958  };
5959  TRANS(FMAXP_v, do_fp3_vector_2fn, a, 0, f_vector_fmaxp, f_vector_ah_fmaxp)
5960  
5961  static gen_helper_gvec_3_ptr * const f_vector_fminp[3] = {
5962      gen_helper_gvec_fminp_h,
5963      gen_helper_gvec_fminp_s,
5964      gen_helper_gvec_fminp_d,
5965  };
5966  static gen_helper_gvec_3_ptr * const f_vector_ah_fminp[3] = {
5967      gen_helper_gvec_ah_fminp_h,
5968      gen_helper_gvec_ah_fminp_s,
5969      gen_helper_gvec_ah_fminp_d,
5970  };
5971  TRANS(FMINP_v, do_fp3_vector_2fn, a, 0, f_vector_fminp, f_vector_ah_fminp)
5972  
5973  static gen_helper_gvec_3_ptr * const f_vector_fmaxnmp[3] = {
5974      gen_helper_gvec_fmaxnump_h,
5975      gen_helper_gvec_fmaxnump_s,
5976      gen_helper_gvec_fmaxnump_d,
5977  };
5978  TRANS(FMAXNMP_v, do_fp3_vector, a, 0, f_vector_fmaxnmp)
5979  
5980  static gen_helper_gvec_3_ptr * const f_vector_fminnmp[3] = {
5981      gen_helper_gvec_fminnump_h,
5982      gen_helper_gvec_fminnump_s,
5983      gen_helper_gvec_fminnump_d,
5984  };
5985  TRANS(FMINNMP_v, do_fp3_vector, a, 0, f_vector_fminnmp)
5986  
5987  static bool do_fmlal(DisasContext *s, arg_qrrr_e *a, bool is_s, bool is_2)
5988  {
5989      if (fp_access_check(s)) {
5990          int data = (is_2 << 1) | is_s;
5991          tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
5992                             vec_full_reg_offset(s, a->rn),
5993                             vec_full_reg_offset(s, a->rm), tcg_env,
5994                             a->q ? 16 : 8, vec_full_reg_size(s),
5995                             data, gen_helper_gvec_fmlal_a64);
5996      }
5997      return true;
5998  }
5999  
6000  TRANS_FEAT(FMLAL_v, aa64_fhm, do_fmlal, a, false, false)
6001  TRANS_FEAT(FMLSL_v, aa64_fhm, do_fmlal, a, true, false)
6002  TRANS_FEAT(FMLAL2_v, aa64_fhm, do_fmlal, a, false, true)
6003  TRANS_FEAT(FMLSL2_v, aa64_fhm, do_fmlal, a, true, true)
6004  
6005  TRANS(ADDP_v, do_gvec_fn3, a, gen_gvec_addp)
6006  TRANS(SMAXP_v, do_gvec_fn3_no64, a, gen_gvec_smaxp)
6007  TRANS(SMINP_v, do_gvec_fn3_no64, a, gen_gvec_sminp)
6008  TRANS(UMAXP_v, do_gvec_fn3_no64, a, gen_gvec_umaxp)
6009  TRANS(UMINP_v, do_gvec_fn3_no64, a, gen_gvec_uminp)
6010  
6011  TRANS(AND_v, do_gvec_fn3, a, tcg_gen_gvec_and)
6012  TRANS(BIC_v, do_gvec_fn3, a, tcg_gen_gvec_andc)
6013  TRANS(ORR_v, do_gvec_fn3, a, tcg_gen_gvec_or)
6014  TRANS(ORN_v, do_gvec_fn3, a, tcg_gen_gvec_orc)
6015  TRANS(EOR_v, do_gvec_fn3, a, tcg_gen_gvec_xor)
6016  
6017  static bool do_bitsel(DisasContext *s, bool is_q, int d, int a, int b, int c)
6018  {
6019      if (fp_access_check(s)) {
6020          gen_gvec_fn4(s, is_q, d, a, b, c, tcg_gen_gvec_bitsel, 0);
6021      }
6022      return true;
6023  }
6024  
6025  TRANS(BSL_v, do_bitsel, a->q, a->rd, a->rd, a->rn, a->rm)
6026  TRANS(BIT_v, do_bitsel, a->q, a->rd, a->rm, a->rn, a->rd)
6027  TRANS(BIF_v, do_bitsel, a->q, a->rd, a->rm, a->rd, a->rn)
6028  
6029  TRANS(SQADD_v, do_gvec_fn3, a, gen_gvec_sqadd_qc)
6030  TRANS(UQADD_v, do_gvec_fn3, a, gen_gvec_uqadd_qc)
6031  TRANS(SQSUB_v, do_gvec_fn3, a, gen_gvec_sqsub_qc)
6032  TRANS(UQSUB_v, do_gvec_fn3, a, gen_gvec_uqsub_qc)
6033  TRANS(SUQADD_v, do_gvec_fn3, a, gen_gvec_suqadd_qc)
6034  TRANS(USQADD_v, do_gvec_fn3, a, gen_gvec_usqadd_qc)
6035  
6036  TRANS(SSHL_v, do_gvec_fn3, a, gen_gvec_sshl)
6037  TRANS(USHL_v, do_gvec_fn3, a, gen_gvec_ushl)
6038  TRANS(SRSHL_v, do_gvec_fn3, a, gen_gvec_srshl)
6039  TRANS(URSHL_v, do_gvec_fn3, a, gen_gvec_urshl)
6040  TRANS(SQSHL_v, do_gvec_fn3, a, gen_neon_sqshl)
6041  TRANS(UQSHL_v, do_gvec_fn3, a, gen_neon_uqshl)
6042  TRANS(SQRSHL_v, do_gvec_fn3, a, gen_neon_sqrshl)
6043  TRANS(UQRSHL_v, do_gvec_fn3, a, gen_neon_uqrshl)
6044  
6045  TRANS(ADD_v, do_gvec_fn3, a, tcg_gen_gvec_add)
6046  TRANS(SUB_v, do_gvec_fn3, a, tcg_gen_gvec_sub)
6047  TRANS(SHADD_v, do_gvec_fn3_no64, a, gen_gvec_shadd)
6048  TRANS(UHADD_v, do_gvec_fn3_no64, a, gen_gvec_uhadd)
6049  TRANS(SHSUB_v, do_gvec_fn3_no64, a, gen_gvec_shsub)
6050  TRANS(UHSUB_v, do_gvec_fn3_no64, a, gen_gvec_uhsub)
6051  TRANS(SRHADD_v, do_gvec_fn3_no64, a, gen_gvec_srhadd)
6052  TRANS(URHADD_v, do_gvec_fn3_no64, a, gen_gvec_urhadd)
6053  TRANS(SMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smax)
6054  TRANS(UMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umax)
6055  TRANS(SMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smin)
6056  TRANS(UMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umin)
6057  TRANS(SABA_v, do_gvec_fn3_no64, a, gen_gvec_saba)
6058  TRANS(UABA_v, do_gvec_fn3_no64, a, gen_gvec_uaba)
6059  TRANS(SABD_v, do_gvec_fn3_no64, a, gen_gvec_sabd)
6060  TRANS(UABD_v, do_gvec_fn3_no64, a, gen_gvec_uabd)
6061  TRANS(MUL_v, do_gvec_fn3_no64, a, tcg_gen_gvec_mul)
6062  TRANS(PMUL_v, do_gvec_op3_ool, a, 0, gen_helper_gvec_pmul_b)
6063  TRANS(MLA_v, do_gvec_fn3_no64, a, gen_gvec_mla)
6064  TRANS(MLS_v, do_gvec_fn3_no64, a, gen_gvec_mls)
6065  
6066  static bool do_cmop_v(DisasContext *s, arg_qrrr_e *a, TCGCond cond)
6067  {
6068      if (a->esz == MO_64 && !a->q) {
6069          return false;
6070      }
6071      if (fp_access_check(s)) {
6072          tcg_gen_gvec_cmp(cond, a->esz,
6073                           vec_full_reg_offset(s, a->rd),
6074                           vec_full_reg_offset(s, a->rn),
6075                           vec_full_reg_offset(s, a->rm),
6076                           a->q ? 16 : 8, vec_full_reg_size(s));
6077      }
6078      return true;
6079  }
6080  
6081  TRANS(CMGT_v, do_cmop_v, a, TCG_COND_GT)
6082  TRANS(CMHI_v, do_cmop_v, a, TCG_COND_GTU)
6083  TRANS(CMGE_v, do_cmop_v, a, TCG_COND_GE)
6084  TRANS(CMHS_v, do_cmop_v, a, TCG_COND_GEU)
6085  TRANS(CMEQ_v, do_cmop_v, a, TCG_COND_EQ)
6086  TRANS(CMTST_v, do_gvec_fn3, a, gen_gvec_cmtst)
6087  
6088  TRANS(SQDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqdmulh_qc)
6089  TRANS(SQRDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmulh_qc)
6090  TRANS_FEAT(SQRDMLAH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlah_qc)
6091  TRANS_FEAT(SQRDMLSH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlsh_qc)
6092  
6093  static bool do_dot_vector(DisasContext *s, arg_qrrr_e *a,
6094                            gen_helper_gvec_4 *fn)
6095  {
6096      if (fp_access_check(s)) {
6097          gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn);
6098      }
6099      return true;
6100  }
6101  
6102  static bool do_dot_vector_env(DisasContext *s, arg_qrrr_e *a,
6103                                gen_helper_gvec_4_ptr *fn)
6104  {
6105      if (fp_access_check(s)) {
6106          gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn);
6107      }
6108      return true;
6109  }
6110  
6111  TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_sdot_b)
6112  TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_b)
6113  TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_b)
6114  TRANS_FEAT(BFDOT_v, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfdot)
6115  TRANS_FEAT(BFMMLA, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfmmla)
6116  TRANS_FEAT(SMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_smmla_b)
6117  TRANS_FEAT(UMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_ummla_b)
6118  TRANS_FEAT(USMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usmmla_b)
6119  
6120  static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a)
6121  {
6122      if (!dc_isar_feature(aa64_bf16, s)) {
6123          return false;
6124      }
6125      if (fp_access_check(s)) {
6126          /* Q bit selects BFMLALB vs BFMLALT. */
6127          gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd,
6128                            s->fpcr_ah ? FPST_AH : FPST_A64, a->q,
6129                            gen_helper_gvec_bfmlal);
6130      }
6131      return true;
6132  }
6133  
6134  static gen_helper_gvec_3_ptr * const f_vector_fcadd[3] = {
6135      gen_helper_gvec_fcaddh,
6136      gen_helper_gvec_fcadds,
6137      gen_helper_gvec_fcaddd,
6138  };
6139  /*
6140   * Encode FPCR.AH into the data so the helper knows whether the
6141   * negations it does should avoid flipping the sign bit on a NaN
6142   */
6143  TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0 | (s->fpcr_ah << 1),
6144             f_vector_fcadd)
6145  TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1 | (s->fpcr_ah << 1),
6146             f_vector_fcadd)
6147  
6148  static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a)
6149  {
6150      static gen_helper_gvec_4_ptr * const fn[] = {
6151          [MO_16] = gen_helper_gvec_fcmlah,
6152          [MO_32] = gen_helper_gvec_fcmlas,
6153          [MO_64] = gen_helper_gvec_fcmlad,
6154      };
6155      int check;
6156  
6157      if (!dc_isar_feature(aa64_fcma, s)) {
6158          return false;
6159      }
6160  
6161      check = fp_access_check_vector_hsd(s, a->q, a->esz);
6162      if (check <= 0) {
6163          return check == 0;
6164      }
6165  
6166      gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
6167                        a->esz == MO_16 ? FPST_A64_F16 : FPST_A64,
6168                        a->rot | (s->fpcr_ah << 2), fn[a->esz]);
6169      return true;
6170  }
6171  
6172  /*
6173   * Widening vector x vector/indexed.
6174   *
6175   * These read from the top or bottom half of a 128-bit vector.
6176   * After widening, optionally accumulate with a 128-bit vector.
6177   * Implement these inline, as the number of elements are limited
6178   * and the related SVE and SME operations on larger vectors use
6179   * even/odd elements instead of top/bottom half.
6180   *
6181   * If idx >= 0, operand 2 is indexed, otherwise vector.
6182   * If acc, operand 0 is loaded with rd.
6183   */
6184  
6185  /* For low half, iterating up. */
6186  static bool do_3op_widening(DisasContext *s, MemOp memop, int top,
6187                              int rd, int rn, int rm, int idx,
6188                              NeonGenTwo64OpFn *fn, bool acc)
6189  {
6190      TCGv_i64 tcg_op0 = tcg_temp_new_i64();
6191      TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6192      TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6193      MemOp esz = memop & MO_SIZE;
6194      int half = 8 >> esz;
6195      int top_swap, top_half;
6196  
6197      /* There are no 64x64->128 bit operations. */
6198      if (esz >= MO_64) {
6199          return false;
6200      }
6201      if (!fp_access_check(s)) {
6202          return true;
6203      }
6204  
6205      if (idx >= 0) {
6206          read_vec_element(s, tcg_op2, rm, idx, memop);
6207      }
6208  
6209      /*
6210       * For top half inputs, iterate forward; backward for bottom half.
6211       * This means the store to the destination will not occur until
6212       * overlapping input inputs are consumed.
6213       * Use top_swap to conditionally invert the forward iteration index.
6214       */
6215      top_swap = top ? 0 : half - 1;
6216      top_half = top ? half : 0;
6217  
6218      for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
6219          int elt = elt_fwd ^ top_swap;
6220  
6221          read_vec_element(s, tcg_op1, rn, elt + top_half, memop);
6222          if (idx < 0) {
6223              read_vec_element(s, tcg_op2, rm, elt + top_half, memop);
6224          }
6225          if (acc) {
6226              read_vec_element(s, tcg_op0, rd, elt, memop + 1);
6227          }
6228          fn(tcg_op0, tcg_op1, tcg_op2);
6229          write_vec_element(s, tcg_op0, rd, elt, esz + 1);
6230      }
6231      clear_vec_high(s, 1, rd);
6232      return true;
6233  }
6234  
6235  static void gen_muladd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6236  {
6237      TCGv_i64 t = tcg_temp_new_i64();
6238      tcg_gen_mul_i64(t, n, m);
6239      tcg_gen_add_i64(d, d, t);
6240  }
6241  
6242  static void gen_mulsub_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6243  {
6244      TCGv_i64 t = tcg_temp_new_i64();
6245      tcg_gen_mul_i64(t, n, m);
6246      tcg_gen_sub_i64(d, d, t);
6247  }
6248  
6249  TRANS(SMULL_v, do_3op_widening,
6250        a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6251        tcg_gen_mul_i64, false)
6252  TRANS(UMULL_v, do_3op_widening,
6253        a->esz, a->q, a->rd, a->rn, a->rm, -1,
6254        tcg_gen_mul_i64, false)
6255  TRANS(SMLAL_v, do_3op_widening,
6256        a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6257        gen_muladd_i64, true)
6258  TRANS(UMLAL_v, do_3op_widening,
6259        a->esz, a->q, a->rd, a->rn, a->rm, -1,
6260        gen_muladd_i64, true)
6261  TRANS(SMLSL_v, do_3op_widening,
6262        a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6263        gen_mulsub_i64, true)
6264  TRANS(UMLSL_v, do_3op_widening,
6265        a->esz, a->q, a->rd, a->rn, a->rm, -1,
6266        gen_mulsub_i64, true)
6267  
6268  TRANS(SMULL_vi, do_3op_widening,
6269        a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6270        tcg_gen_mul_i64, false)
6271  TRANS(UMULL_vi, do_3op_widening,
6272        a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
6273        tcg_gen_mul_i64, false)
6274  TRANS(SMLAL_vi, do_3op_widening,
6275        a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6276        gen_muladd_i64, true)
6277  TRANS(UMLAL_vi, do_3op_widening,
6278        a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
6279        gen_muladd_i64, true)
6280  TRANS(SMLSL_vi, do_3op_widening,
6281        a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6282        gen_mulsub_i64, true)
6283  TRANS(UMLSL_vi, do_3op_widening,
6284        a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
6285        gen_mulsub_i64, true)
6286  
6287  static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6288  {
6289      TCGv_i64 t1 = tcg_temp_new_i64();
6290      TCGv_i64 t2 = tcg_temp_new_i64();
6291  
6292      tcg_gen_sub_i64(t1, n, m);
6293      tcg_gen_sub_i64(t2, m, n);
6294      tcg_gen_movcond_i64(TCG_COND_GE, d, n, m, t1, t2);
6295  }
6296  
6297  static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6298  {
6299      TCGv_i64 t1 = tcg_temp_new_i64();
6300      TCGv_i64 t2 = tcg_temp_new_i64();
6301  
6302      tcg_gen_sub_i64(t1, n, m);
6303      tcg_gen_sub_i64(t2, m, n);
6304      tcg_gen_movcond_i64(TCG_COND_GEU, d, n, m, t1, t2);
6305  }
6306  
6307  static void gen_saba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6308  {
6309      TCGv_i64 t = tcg_temp_new_i64();
6310      gen_sabd_i64(t, n, m);
6311      tcg_gen_add_i64(d, d, t);
6312  }
6313  
6314  static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6315  {
6316      TCGv_i64 t = tcg_temp_new_i64();
6317      gen_uabd_i64(t, n, m);
6318      tcg_gen_add_i64(d, d, t);
6319  }
6320  
6321  TRANS(SADDL_v, do_3op_widening,
6322        a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6323        tcg_gen_add_i64, false)
6324  TRANS(UADDL_v, do_3op_widening,
6325        a->esz, a->q, a->rd, a->rn, a->rm, -1,
6326        tcg_gen_add_i64, false)
6327  TRANS(SSUBL_v, do_3op_widening,
6328        a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6329        tcg_gen_sub_i64, false)
6330  TRANS(USUBL_v, do_3op_widening,
6331        a->esz, a->q, a->rd, a->rn, a->rm, -1,
6332        tcg_gen_sub_i64, false)
6333  TRANS(SABDL_v, do_3op_widening,
6334        a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6335        gen_sabd_i64, false)
6336  TRANS(UABDL_v, do_3op_widening,
6337        a->esz, a->q, a->rd, a->rn, a->rm, -1,
6338        gen_uabd_i64, false)
6339  TRANS(SABAL_v, do_3op_widening,
6340        a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6341        gen_saba_i64, true)
6342  TRANS(UABAL_v, do_3op_widening,
6343        a->esz, a->q, a->rd, a->rn, a->rm, -1,
6344        gen_uaba_i64, true)
6345  
6346  static void gen_sqdmull_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6347  {
6348      tcg_gen_mul_i64(d, n, m);
6349      gen_helper_neon_addl_saturate_s32(d, tcg_env, d, d);
6350  }
6351  
6352  static void gen_sqdmull_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6353  {
6354      tcg_gen_mul_i64(d, n, m);
6355      gen_helper_neon_addl_saturate_s64(d, tcg_env, d, d);
6356  }
6357  
6358  static void gen_sqdmlal_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6359  {
6360      TCGv_i64 t = tcg_temp_new_i64();
6361  
6362      tcg_gen_mul_i64(t, n, m);
6363      gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t);
6364      gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t);
6365  }
6366  
6367  static void gen_sqdmlal_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6368  {
6369      TCGv_i64 t = tcg_temp_new_i64();
6370  
6371      tcg_gen_mul_i64(t, n, m);
6372      gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t);
6373      gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t);
6374  }
6375  
6376  static void gen_sqdmlsl_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6377  {
6378      TCGv_i64 t = tcg_temp_new_i64();
6379  
6380      tcg_gen_mul_i64(t, n, m);
6381      gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t);
6382      tcg_gen_neg_i64(t, t);
6383      gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t);
6384  }
6385  
6386  static void gen_sqdmlsl_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6387  {
6388      TCGv_i64 t = tcg_temp_new_i64();
6389  
6390      tcg_gen_mul_i64(t, n, m);
6391      gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t);
6392      tcg_gen_neg_i64(t, t);
6393      gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t);
6394  }
6395  
6396  TRANS(SQDMULL_v, do_3op_widening,
6397        a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6398        a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
6399  TRANS(SQDMLAL_v, do_3op_widening,
6400        a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6401        a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
6402  TRANS(SQDMLSL_v, do_3op_widening,
6403        a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6404        a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
6405  
6406  TRANS(SQDMULL_vi, do_3op_widening,
6407        a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6408        a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
6409  TRANS(SQDMLAL_vi, do_3op_widening,
6410        a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6411        a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
6412  TRANS(SQDMLSL_vi, do_3op_widening,
6413        a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6414        a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
6415  
6416  static bool do_addsub_wide(DisasContext *s, arg_qrrr_e *a,
6417                             MemOp sign, bool sub)
6418  {
6419      TCGv_i64 tcg_op0, tcg_op1;
6420      MemOp esz = a->esz;
6421      int half = 8 >> esz;
6422      bool top = a->q;
6423      int top_swap = top ? 0 : half - 1;
6424      int top_half = top ? half : 0;
6425  
6426      /* There are no 64x64->128 bit operations. */
6427      if (esz >= MO_64) {
6428          return false;
6429      }
6430      if (!fp_access_check(s)) {
6431          return true;
6432      }
6433      tcg_op0 = tcg_temp_new_i64();
6434      tcg_op1 = tcg_temp_new_i64();
6435  
6436      for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
6437          int elt = elt_fwd ^ top_swap;
6438  
6439          read_vec_element(s, tcg_op1, a->rm, elt + top_half, esz | sign);
6440          read_vec_element(s, tcg_op0, a->rn, elt, esz + 1);
6441          if (sub) {
6442              tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1);
6443          } else {
6444              tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1);
6445          }
6446          write_vec_element(s, tcg_op0, a->rd, elt, esz + 1);
6447      }
6448      clear_vec_high(s, 1, a->rd);
6449      return true;
6450  }
6451  
6452  TRANS(SADDW, do_addsub_wide, a, MO_SIGN, false)
6453  TRANS(UADDW, do_addsub_wide, a, 0, false)
6454  TRANS(SSUBW, do_addsub_wide, a, MO_SIGN, true)
6455  TRANS(USUBW, do_addsub_wide, a, 0, true)
6456  
6457  static bool do_addsub_highnarrow(DisasContext *s, arg_qrrr_e *a,
6458                                   bool sub, bool round)
6459  {
6460      TCGv_i64 tcg_op0, tcg_op1;
6461      MemOp esz = a->esz;
6462      int half = 8 >> esz;
6463      bool top = a->q;
6464      int ebits = 8 << esz;
6465      uint64_t rbit = 1ull << (ebits - 1);
6466      int top_swap, top_half;
6467  
6468      /* There are no 128x128->64 bit operations. */
6469      if (esz >= MO_64) {
6470          return false;
6471      }
6472      if (!fp_access_check(s)) {
6473          return true;
6474      }
6475      tcg_op0 = tcg_temp_new_i64();
6476      tcg_op1 = tcg_temp_new_i64();
6477  
6478      /*
6479       * For top half inputs, iterate backward; forward for bottom half.
6480       * This means the store to the destination will not occur until
6481       * overlapping input inputs are consumed.
6482       */
6483      top_swap = top ? half - 1 : 0;
6484      top_half = top ? half : 0;
6485  
6486      for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
6487          int elt = elt_fwd ^ top_swap;
6488  
6489          read_vec_element(s, tcg_op1, a->rm, elt, esz + 1);
6490          read_vec_element(s, tcg_op0, a->rn, elt, esz + 1);
6491          if (sub) {
6492              tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1);
6493          } else {
6494              tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1);
6495          }
6496          if (round) {
6497              tcg_gen_addi_i64(tcg_op0, tcg_op0, rbit);
6498          }
6499          tcg_gen_shri_i64(tcg_op0, tcg_op0, ebits);
6500          write_vec_element(s, tcg_op0, a->rd, elt + top_half, esz);
6501      }
6502      clear_vec_high(s, top, a->rd);
6503      return true;
6504  }
6505  
6506  TRANS(ADDHN, do_addsub_highnarrow, a, false, false)
6507  TRANS(SUBHN, do_addsub_highnarrow, a, true, false)
6508  TRANS(RADDHN, do_addsub_highnarrow, a, false, true)
6509  TRANS(RSUBHN, do_addsub_highnarrow, a, true, true)
6510  
6511  static bool do_pmull(DisasContext *s, arg_qrrr_e *a, gen_helper_gvec_3 *fn)
6512  {
6513      if (fp_access_check(s)) {
6514          /* The Q field specifies lo/hi half input for these insns.  */
6515          gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->q, fn);
6516      }
6517      return true;
6518  }
6519  
6520  TRANS(PMULL_p8, do_pmull, a, gen_helper_neon_pmull_h)
6521  TRANS_FEAT(PMULL_p64, aa64_pmull, do_pmull, a, gen_helper_gvec_pmull_q)
6522  
6523  /*
6524   * Advanced SIMD scalar/vector x indexed element
6525   */
6526  
6527  static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f)
6528  {
6529      switch (a->esz) {
6530      case MO_64:
6531          if (fp_access_check(s)) {
6532              TCGv_i64 t0 = read_fp_dreg(s, a->rn);
6533              TCGv_i64 t1 = tcg_temp_new_i64();
6534  
6535              read_vec_element(s, t1, a->rm, a->idx, MO_64);
6536              f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
6537              write_fp_dreg_merging(s, a->rd, a->rn, t0);
6538          }
6539          break;
6540      case MO_32:
6541          if (fp_access_check(s)) {
6542              TCGv_i32 t0 = read_fp_sreg(s, a->rn);
6543              TCGv_i32 t1 = tcg_temp_new_i32();
6544  
6545              read_vec_element_i32(s, t1, a->rm, a->idx, MO_32);
6546              f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
6547              write_fp_sreg_merging(s, a->rd, a->rn, t0);
6548          }
6549          break;
6550      case MO_16:
6551          if (!dc_isar_feature(aa64_fp16, s)) {
6552              return false;
6553          }
6554          if (fp_access_check(s)) {
6555              TCGv_i32 t0 = read_fp_hreg(s, a->rn);
6556              TCGv_i32 t1 = tcg_temp_new_i32();
6557  
6558              read_vec_element_i32(s, t1, a->rm, a->idx, MO_16);
6559              f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
6560              write_fp_hreg_merging(s, a->rd, a->rn, t0);
6561          }
6562          break;
6563      default:
6564          g_assert_not_reached();
6565      }
6566      return true;
6567  }
6568  
6569  TRANS(FMUL_si, do_fp3_scalar_idx, a, &f_scalar_fmul)
6570  TRANS(FMULX_si, do_fp3_scalar_idx, a, &f_scalar_fmulx)
6571  
6572  static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg)
6573  {
6574      switch (a->esz) {
6575      case MO_64:
6576          if (fp_access_check(s)) {
6577              TCGv_i64 t0 = read_fp_dreg(s, a->rd);
6578              TCGv_i64 t1 = read_fp_dreg(s, a->rn);
6579              TCGv_i64 t2 = tcg_temp_new_i64();
6580  
6581              read_vec_element(s, t2, a->rm, a->idx, MO_64);
6582              if (neg) {
6583                  gen_vfp_maybe_ah_negd(s, t1, t1);
6584              }
6585              gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_A64));
6586              write_fp_dreg_merging(s, a->rd, a->rd, t0);
6587          }
6588          break;
6589      case MO_32:
6590          if (fp_access_check(s)) {
6591              TCGv_i32 t0 = read_fp_sreg(s, a->rd);
6592              TCGv_i32 t1 = read_fp_sreg(s, a->rn);
6593              TCGv_i32 t2 = tcg_temp_new_i32();
6594  
6595              read_vec_element_i32(s, t2, a->rm, a->idx, MO_32);
6596              if (neg) {
6597                  gen_vfp_maybe_ah_negs(s, t1, t1);
6598              }
6599              gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_A64));
6600              write_fp_sreg_merging(s, a->rd, a->rd, t0);
6601          }
6602          break;
6603      case MO_16:
6604          if (!dc_isar_feature(aa64_fp16, s)) {
6605              return false;
6606          }
6607          if (fp_access_check(s)) {
6608              TCGv_i32 t0 = read_fp_hreg(s, a->rd);
6609              TCGv_i32 t1 = read_fp_hreg(s, a->rn);
6610              TCGv_i32 t2 = tcg_temp_new_i32();
6611  
6612              read_vec_element_i32(s, t2, a->rm, a->idx, MO_16);
6613              if (neg) {
6614                  gen_vfp_maybe_ah_negh(s, t1, t1);
6615              }
6616              gen_helper_advsimd_muladdh(t0, t1, t2, t0,
6617                                         fpstatus_ptr(FPST_A64_F16));
6618              write_fp_hreg_merging(s, a->rd, a->rd, t0);
6619          }
6620          break;
6621      default:
6622          g_assert_not_reached();
6623      }
6624      return true;
6625  }
6626  
6627  TRANS(FMLA_si, do_fmla_scalar_idx, a, false)
6628  TRANS(FMLS_si, do_fmla_scalar_idx, a, true)
6629  
6630  static bool do_env_scalar2_idx_hs(DisasContext *s, arg_rrx_e *a,
6631                                    const ENVScalar2 *f)
6632  {
6633      if (a->esz < MO_16 || a->esz > MO_32) {
6634          return false;
6635      }
6636      if (fp_access_check(s)) {
6637          TCGv_i32 t0 = tcg_temp_new_i32();
6638          TCGv_i32 t1 = tcg_temp_new_i32();
6639  
6640          read_vec_element_i32(s, t0, a->rn, 0, a->esz);
6641          read_vec_element_i32(s, t1, a->rm, a->idx, a->esz);
6642          f->gen_bhs[a->esz](t0, tcg_env, t0, t1);
6643          write_fp_sreg(s, a->rd, t0);
6644      }
6645      return true;
6646  }
6647  
6648  TRANS(SQDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqdmulh)
6649  TRANS(SQRDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqrdmulh)
6650  
6651  static bool do_env_scalar3_idx_hs(DisasContext *s, arg_rrx_e *a,
6652                                    const ENVScalar3 *f)
6653  {
6654      if (a->esz < MO_16 || a->esz > MO_32) {
6655          return false;
6656      }
6657      if (fp_access_check(s)) {
6658          TCGv_i32 t0 = tcg_temp_new_i32();
6659          TCGv_i32 t1 = tcg_temp_new_i32();
6660          TCGv_i32 t2 = tcg_temp_new_i32();
6661  
6662          read_vec_element_i32(s, t0, a->rn, 0, a->esz);
6663          read_vec_element_i32(s, t1, a->rm, a->idx, a->esz);
6664          read_vec_element_i32(s, t2, a->rd, 0, a->esz);
6665          f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2);
6666          write_fp_sreg(s, a->rd, t0);
6667      }
6668      return true;
6669  }
6670  
6671  TRANS_FEAT(SQRDMLAH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlah)
6672  TRANS_FEAT(SQRDMLSH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlsh)
6673  
6674  static bool do_scalar_muladd_widening_idx(DisasContext *s, arg_rrx_e *a,
6675                                            NeonGenTwo64OpFn *fn, bool acc)
6676  {
6677      if (fp_access_check(s)) {
6678          TCGv_i64 t0 = tcg_temp_new_i64();
6679          TCGv_i64 t1 = tcg_temp_new_i64();
6680          TCGv_i64 t2 = tcg_temp_new_i64();
6681  
6682          if (acc) {
6683              read_vec_element(s, t0, a->rd, 0, a->esz + 1);
6684          }
6685          read_vec_element(s, t1, a->rn, 0, a->esz | MO_SIGN);
6686          read_vec_element(s, t2, a->rm, a->idx, a->esz | MO_SIGN);
6687          fn(t0, t1, t2);
6688  
6689          /* Clear the whole register first, then store scalar. */
6690          clear_vec(s, a->rd);
6691          write_vec_element(s, t0, a->rd, 0, a->esz + 1);
6692      }
6693      return true;
6694  }
6695  
6696  TRANS(SQDMULL_si, do_scalar_muladd_widening_idx, a,
6697        a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
6698  TRANS(SQDMLAL_si, do_scalar_muladd_widening_idx, a,
6699        a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
6700  TRANS(SQDMLSL_si, do_scalar_muladd_widening_idx, a,
6701        a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
6702  
6703  static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a,
6704                                gen_helper_gvec_3_ptr * const fns[3])
6705  {
6706      MemOp esz = a->esz;
6707      int check = fp_access_check_vector_hsd(s, a->q, esz);
6708  
6709      if (check <= 0) {
6710          return check == 0;
6711      }
6712  
6713      gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm,
6714                        esz == MO_16 ? FPST_A64_F16 : FPST_A64,
6715                        a->idx, fns[esz - 1]);
6716      return true;
6717  }
6718  
6719  static gen_helper_gvec_3_ptr * const f_vector_idx_fmul[3] = {
6720      gen_helper_gvec_fmul_idx_h,
6721      gen_helper_gvec_fmul_idx_s,
6722      gen_helper_gvec_fmul_idx_d,
6723  };
6724  TRANS(FMUL_vi, do_fp3_vector_idx, a, f_vector_idx_fmul)
6725  
6726  static gen_helper_gvec_3_ptr * const f_vector_idx_fmulx[3] = {
6727      gen_helper_gvec_fmulx_idx_h,
6728      gen_helper_gvec_fmulx_idx_s,
6729      gen_helper_gvec_fmulx_idx_d,
6730  };
6731  TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx)
6732  
6733  static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg)
6734  {
6735      static gen_helper_gvec_4_ptr * const fns[3][3] = {
6736          { gen_helper_gvec_fmla_idx_h,
6737            gen_helper_gvec_fmla_idx_s,
6738            gen_helper_gvec_fmla_idx_d },
6739          { gen_helper_gvec_fmls_idx_h,
6740            gen_helper_gvec_fmls_idx_s,
6741            gen_helper_gvec_fmls_idx_d },
6742          { gen_helper_gvec_ah_fmls_idx_h,
6743            gen_helper_gvec_ah_fmls_idx_s,
6744            gen_helper_gvec_ah_fmls_idx_d },
6745      };
6746      MemOp esz = a->esz;
6747      int check = fp_access_check_vector_hsd(s, a->q, esz);
6748  
6749      if (check <= 0) {
6750          return check == 0;
6751      }
6752  
6753      gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
6754                        esz == MO_16 ? FPST_A64_F16 : FPST_A64,
6755                        a->idx, fns[neg ? 1 + s->fpcr_ah : 0][esz - 1]);
6756      return true;
6757  }
6758  
6759  TRANS(FMLA_vi, do_fmla_vector_idx, a, false)
6760  TRANS(FMLS_vi, do_fmla_vector_idx, a, true)
6761  
6762  static bool do_fmlal_idx(DisasContext *s, arg_qrrx_e *a, bool is_s, bool is_2)
6763  {
6764      if (fp_access_check(s)) {
6765          int data = (a->idx << 2) | (is_2 << 1) | is_s;
6766          tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
6767                             vec_full_reg_offset(s, a->rn),
6768                             vec_full_reg_offset(s, a->rm), tcg_env,
6769                             a->q ? 16 : 8, vec_full_reg_size(s),
6770                             data, gen_helper_gvec_fmlal_idx_a64);
6771      }
6772      return true;
6773  }
6774  
6775  TRANS_FEAT(FMLAL_vi, aa64_fhm, do_fmlal_idx, a, false, false)
6776  TRANS_FEAT(FMLSL_vi, aa64_fhm, do_fmlal_idx, a, true, false)
6777  TRANS_FEAT(FMLAL2_vi, aa64_fhm, do_fmlal_idx, a, false, true)
6778  TRANS_FEAT(FMLSL2_vi, aa64_fhm, do_fmlal_idx, a, true, true)
6779  
6780  static bool do_int3_vector_idx(DisasContext *s, arg_qrrx_e *a,
6781                                 gen_helper_gvec_3 * const fns[2])
6782  {
6783      assert(a->esz == MO_16 || a->esz == MO_32);
6784      if (fp_access_check(s)) {
6785          gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, a->idx, fns[a->esz - 1]);
6786      }
6787      return true;
6788  }
6789  
6790  static gen_helper_gvec_3 * const f_vector_idx_mul[2] = {
6791      gen_helper_gvec_mul_idx_h,
6792      gen_helper_gvec_mul_idx_s,
6793  };
6794  TRANS(MUL_vi, do_int3_vector_idx, a, f_vector_idx_mul)
6795  
6796  static bool do_mla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool sub)
6797  {
6798      static gen_helper_gvec_4 * const fns[2][2] = {
6799          { gen_helper_gvec_mla_idx_h, gen_helper_gvec_mls_idx_h },
6800          { gen_helper_gvec_mla_idx_s, gen_helper_gvec_mls_idx_s },
6801      };
6802  
6803      assert(a->esz == MO_16 || a->esz == MO_32);
6804      if (fp_access_check(s)) {
6805          gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd,
6806                           a->idx, fns[a->esz - 1][sub]);
6807      }
6808      return true;
6809  }
6810  
6811  TRANS(MLA_vi, do_mla_vector_idx, a, false)
6812  TRANS(MLS_vi, do_mla_vector_idx, a, true)
6813  
6814  static bool do_int3_qc_vector_idx(DisasContext *s, arg_qrrx_e *a,
6815                                    gen_helper_gvec_4 * const fns[2])
6816  {
6817      assert(a->esz == MO_16 || a->esz == MO_32);
6818      if (fp_access_check(s)) {
6819          tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
6820                             vec_full_reg_offset(s, a->rn),
6821                             vec_full_reg_offset(s, a->rm),
6822                             offsetof(CPUARMState, vfp.qc),
6823                             a->q ? 16 : 8, vec_full_reg_size(s),
6824                             a->idx, fns[a->esz - 1]);
6825      }
6826      return true;
6827  }
6828  
6829  static gen_helper_gvec_4 * const f_vector_idx_sqdmulh[2] = {
6830      gen_helper_neon_sqdmulh_idx_h,
6831      gen_helper_neon_sqdmulh_idx_s,
6832  };
6833  TRANS(SQDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqdmulh)
6834  
6835  static gen_helper_gvec_4 * const f_vector_idx_sqrdmulh[2] = {
6836      gen_helper_neon_sqrdmulh_idx_h,
6837      gen_helper_neon_sqrdmulh_idx_s,
6838  };
6839  TRANS(SQRDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqrdmulh)
6840  
6841  static gen_helper_gvec_4 * const f_vector_idx_sqrdmlah[2] = {
6842      gen_helper_neon_sqrdmlah_idx_h,
6843      gen_helper_neon_sqrdmlah_idx_s,
6844  };
6845  TRANS_FEAT(SQRDMLAH_vi, aa64_rdm, do_int3_qc_vector_idx, a,
6846             f_vector_idx_sqrdmlah)
6847  
6848  static gen_helper_gvec_4 * const f_vector_idx_sqrdmlsh[2] = {
6849      gen_helper_neon_sqrdmlsh_idx_h,
6850      gen_helper_neon_sqrdmlsh_idx_s,
6851  };
6852  TRANS_FEAT(SQRDMLSH_vi, aa64_rdm, do_int3_qc_vector_idx, a,
6853             f_vector_idx_sqrdmlsh)
6854  
6855  static bool do_dot_vector_idx(DisasContext *s, arg_qrrx_e *a,
6856                                gen_helper_gvec_4 *fn)
6857  {
6858      if (fp_access_check(s)) {
6859          gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn);
6860      }
6861      return true;
6862  }
6863  
6864  static bool do_dot_vector_idx_env(DisasContext *s, arg_qrrx_e *a,
6865                                    gen_helper_gvec_4_ptr *fn)
6866  {
6867      if (fp_access_check(s)) {
6868          gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn);
6869      }
6870      return true;
6871  }
6872  
6873  TRANS_FEAT(SDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_sdot_idx_b)
6874  TRANS_FEAT(UDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_udot_idx_b)
6875  TRANS_FEAT(SUDOT_vi, aa64_i8mm, do_dot_vector_idx, a,
6876             gen_helper_gvec_sudot_idx_b)
6877  TRANS_FEAT(USDOT_vi, aa64_i8mm, do_dot_vector_idx, a,
6878             gen_helper_gvec_usdot_idx_b)
6879  TRANS_FEAT(BFDOT_vi, aa64_bf16, do_dot_vector_idx_env, a,
6880             gen_helper_gvec_bfdot_idx)
6881  
6882  static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a)
6883  {
6884      if (!dc_isar_feature(aa64_bf16, s)) {
6885          return false;
6886      }
6887      if (fp_access_check(s)) {
6888          /* Q bit selects BFMLALB vs BFMLALT. */
6889          gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd,
6890                            s->fpcr_ah ? FPST_AH : FPST_A64,
6891                            (a->idx << 1) | a->q,
6892                            gen_helper_gvec_bfmlal_idx);
6893      }
6894      return true;
6895  }
6896  
6897  static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a)
6898  {
6899      gen_helper_gvec_4_ptr *fn;
6900  
6901      if (!dc_isar_feature(aa64_fcma, s)) {
6902          return false;
6903      }
6904      switch (a->esz) {
6905      case MO_16:
6906          if (!dc_isar_feature(aa64_fp16, s)) {
6907              return false;
6908          }
6909          fn = gen_helper_gvec_fcmlah_idx;
6910          break;
6911      case MO_32:
6912          fn = gen_helper_gvec_fcmlas_idx;
6913          break;
6914      default:
6915          g_assert_not_reached();
6916      }
6917      if (fp_access_check(s)) {
6918          gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
6919                            a->esz == MO_16 ? FPST_A64_F16 : FPST_A64,
6920                            (s->fpcr_ah << 4) | (a->idx << 2) | a->rot, fn);
6921      }
6922      return true;
6923  }
6924  
6925  /*
6926   * Advanced SIMD scalar pairwise
6927   */
6928  
6929  static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f)
6930  {
6931      switch (a->esz) {
6932      case MO_64:
6933          if (fp_access_check(s)) {
6934              TCGv_i64 t0 = tcg_temp_new_i64();
6935              TCGv_i64 t1 = tcg_temp_new_i64();
6936  
6937              read_vec_element(s, t0, a->rn, 0, MO_64);
6938              read_vec_element(s, t1, a->rn, 1, MO_64);
6939              f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
6940              write_fp_dreg(s, a->rd, t0);
6941          }
6942          break;
6943      case MO_32:
6944          if (fp_access_check(s)) {
6945              TCGv_i32 t0 = tcg_temp_new_i32();
6946              TCGv_i32 t1 = tcg_temp_new_i32();
6947  
6948              read_vec_element_i32(s, t0, a->rn, 0, MO_32);
6949              read_vec_element_i32(s, t1, a->rn, 1, MO_32);
6950              f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
6951              write_fp_sreg(s, a->rd, t0);
6952          }
6953          break;
6954      case MO_16:
6955          if (!dc_isar_feature(aa64_fp16, s)) {
6956              return false;
6957          }
6958          if (fp_access_check(s)) {
6959              TCGv_i32 t0 = tcg_temp_new_i32();
6960              TCGv_i32 t1 = tcg_temp_new_i32();
6961  
6962              read_vec_element_i32(s, t0, a->rn, 0, MO_16);
6963              read_vec_element_i32(s, t1, a->rn, 1, MO_16);
6964              f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
6965              write_fp_sreg(s, a->rd, t0);
6966          }
6967          break;
6968      default:
6969          g_assert_not_reached();
6970      }
6971      return true;
6972  }
6973  
6974  static bool do_fp3_scalar_pair_2fn(DisasContext *s, arg_rr_e *a,
6975                                     const FPScalar *fnormal,
6976                                     const FPScalar *fah)
6977  {
6978      return do_fp3_scalar_pair(s, a, s->fpcr_ah ? fah : fnormal);
6979  }
6980  
6981  TRANS(FADDP_s, do_fp3_scalar_pair, a, &f_scalar_fadd)
6982  TRANS(FMAXP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah)
6983  TRANS(FMINP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah)
6984  TRANS(FMAXNMP_s, do_fp3_scalar_pair, a, &f_scalar_fmaxnm)
6985  TRANS(FMINNMP_s, do_fp3_scalar_pair, a, &f_scalar_fminnm)
6986  
6987  static bool trans_ADDP_s(DisasContext *s, arg_rr_e *a)
6988  {
6989      if (fp_access_check(s)) {
6990          TCGv_i64 t0 = tcg_temp_new_i64();
6991          TCGv_i64 t1 = tcg_temp_new_i64();
6992  
6993          read_vec_element(s, t0, a->rn, 0, MO_64);
6994          read_vec_element(s, t1, a->rn, 1, MO_64);
6995          tcg_gen_add_i64(t0, t0, t1);
6996          write_fp_dreg(s, a->rd, t0);
6997      }
6998      return true;
6999  }
7000  
7001  /*
7002   * Floating-point conditional select
7003   */
7004  
7005  static bool trans_FCSEL(DisasContext *s, arg_FCSEL *a)
7006  {
7007      TCGv_i64 t_true, t_false;
7008      DisasCompare64 c;
7009      int check = fp_access_check_scalar_hsd(s, a->esz);
7010  
7011      if (check <= 0) {
7012          return check == 0;
7013      }
7014  
7015      /* Zero extend sreg & hreg inputs to 64 bits now.  */
7016      t_true = tcg_temp_new_i64();
7017      t_false = tcg_temp_new_i64();
7018      read_vec_element(s, t_true, a->rn, 0, a->esz);
7019      read_vec_element(s, t_false, a->rm, 0, a->esz);
7020  
7021      a64_test_cc(&c, a->cond);
7022      tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0),
7023                          t_true, t_false);
7024  
7025      /*
7026       * Note that sregs & hregs write back zeros to the high bits,
7027       * and we've already done the zero-extension.
7028       */
7029      write_fp_dreg(s, a->rd, t_true);
7030      return true;
7031  }
7032  
7033  /*
7034   * Advanced SIMD Extract
7035   */
7036  
7037  static bool trans_EXT_d(DisasContext *s, arg_EXT_d *a)
7038  {
7039      if (fp_access_check(s)) {
7040          TCGv_i64 lo = read_fp_dreg(s, a->rn);
7041          if (a->imm != 0) {
7042              TCGv_i64 hi = read_fp_dreg(s, a->rm);
7043              tcg_gen_extract2_i64(lo, lo, hi, a->imm * 8);
7044          }
7045          write_fp_dreg(s, a->rd, lo);
7046      }
7047      return true;
7048  }
7049  
7050  static bool trans_EXT_q(DisasContext *s, arg_EXT_q *a)
7051  {
7052      TCGv_i64 lo, hi;
7053      int pos = (a->imm & 7) * 8;
7054      int elt = a->imm >> 3;
7055  
7056      if (!fp_access_check(s)) {
7057          return true;
7058      }
7059  
7060      lo = tcg_temp_new_i64();
7061      hi = tcg_temp_new_i64();
7062  
7063      read_vec_element(s, lo, a->rn, elt, MO_64);
7064      elt++;
7065      read_vec_element(s, hi, elt & 2 ? a->rm : a->rn, elt & 1, MO_64);
7066      elt++;
7067  
7068      if (pos != 0) {
7069          TCGv_i64 hh = tcg_temp_new_i64();
7070          tcg_gen_extract2_i64(lo, lo, hi, pos);
7071          read_vec_element(s, hh, a->rm, elt & 1, MO_64);
7072          tcg_gen_extract2_i64(hi, hi, hh, pos);
7073      }
7074  
7075      write_vec_element(s, lo, a->rd, 0, MO_64);
7076      write_vec_element(s, hi, a->rd, 1, MO_64);
7077      clear_vec_high(s, true, a->rd);
7078      return true;
7079  }
7080  
7081  /*
7082   * Floating-point data-processing (3 source)
7083   */
7084  
7085  static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n)
7086  {
7087      TCGv_ptr fpst;
7088  
7089      /*
7090       * These are fused multiply-add.  Note that doing the negations here
7091       * as separate steps is correct: an input NaN should come out with
7092       * its sign bit flipped if it is a negated-input.
7093       */
7094      switch (a->esz) {
7095      case MO_64:
7096          if (fp_access_check(s)) {
7097              TCGv_i64 tn = read_fp_dreg(s, a->rn);
7098              TCGv_i64 tm = read_fp_dreg(s, a->rm);
7099              TCGv_i64 ta = read_fp_dreg(s, a->ra);
7100  
7101              if (neg_a) {
7102                  gen_vfp_maybe_ah_negd(s, ta, ta);
7103              }
7104              if (neg_n) {
7105                  gen_vfp_maybe_ah_negd(s, tn, tn);
7106              }
7107              fpst = fpstatus_ptr(FPST_A64);
7108              gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst);
7109              write_fp_dreg_merging(s, a->rd, a->ra, ta);
7110          }
7111          break;
7112  
7113      case MO_32:
7114          if (fp_access_check(s)) {
7115              TCGv_i32 tn = read_fp_sreg(s, a->rn);
7116              TCGv_i32 tm = read_fp_sreg(s, a->rm);
7117              TCGv_i32 ta = read_fp_sreg(s, a->ra);
7118  
7119              if (neg_a) {
7120                  gen_vfp_maybe_ah_negs(s, ta, ta);
7121              }
7122              if (neg_n) {
7123                  gen_vfp_maybe_ah_negs(s, tn, tn);
7124              }
7125              fpst = fpstatus_ptr(FPST_A64);
7126              gen_helper_vfp_muladds(ta, tn, tm, ta, fpst);
7127              write_fp_sreg_merging(s, a->rd, a->ra, ta);
7128          }
7129          break;
7130  
7131      case MO_16:
7132          if (!dc_isar_feature(aa64_fp16, s)) {
7133              return false;
7134          }
7135          if (fp_access_check(s)) {
7136              TCGv_i32 tn = read_fp_hreg(s, a->rn);
7137              TCGv_i32 tm = read_fp_hreg(s, a->rm);
7138              TCGv_i32 ta = read_fp_hreg(s, a->ra);
7139  
7140              if (neg_a) {
7141                  gen_vfp_maybe_ah_negh(s, ta, ta);
7142              }
7143              if (neg_n) {
7144                  gen_vfp_maybe_ah_negh(s, tn, tn);
7145              }
7146              fpst = fpstatus_ptr(FPST_A64_F16);
7147              gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst);
7148              write_fp_hreg_merging(s, a->rd, a->ra, ta);
7149          }
7150          break;
7151  
7152      default:
7153          return false;
7154      }
7155      return true;
7156  }
7157  
7158  TRANS(FMADD, do_fmadd, a, false, false)
7159  TRANS(FNMADD, do_fmadd, a, true, true)
7160  TRANS(FMSUB, do_fmadd, a, false, true)
7161  TRANS(FNMSUB, do_fmadd, a, true, false)
7162  
7163  /*
7164   * Advanced SIMD Across Lanes
7165   */
7166  
7167  static bool do_int_reduction(DisasContext *s, arg_qrr_e *a, bool widen,
7168                               MemOp src_sign, NeonGenTwo64OpFn *fn)
7169  {
7170      TCGv_i64 tcg_res, tcg_elt;
7171      MemOp src_mop = a->esz | src_sign;
7172      int elements = (a->q ? 16 : 8) >> a->esz;
7173  
7174      /* Reject MO_64, and MO_32 without Q: a minimum of 4 elements. */
7175      if (elements < 4) {
7176          return false;
7177      }
7178      if (!fp_access_check(s)) {
7179          return true;
7180      }
7181  
7182      tcg_res = tcg_temp_new_i64();
7183      tcg_elt = tcg_temp_new_i64();
7184  
7185      read_vec_element(s, tcg_res, a->rn, 0, src_mop);
7186      for (int i = 1; i < elements; i++) {
7187          read_vec_element(s, tcg_elt, a->rn, i, src_mop);
7188          fn(tcg_res, tcg_res, tcg_elt);
7189      }
7190  
7191      tcg_gen_ext_i64(tcg_res, tcg_res, a->esz + widen);
7192      write_fp_dreg(s, a->rd, tcg_res);
7193      return true;
7194  }
7195  
7196  TRANS(ADDV, do_int_reduction, a, false, 0, tcg_gen_add_i64)
7197  TRANS(SADDLV, do_int_reduction, a, true, MO_SIGN, tcg_gen_add_i64)
7198  TRANS(UADDLV, do_int_reduction, a, true, 0, tcg_gen_add_i64)
7199  TRANS(SMAXV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smax_i64)
7200  TRANS(UMAXV, do_int_reduction, a, false, 0, tcg_gen_umax_i64)
7201  TRANS(SMINV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smin_i64)
7202  TRANS(UMINV, do_int_reduction, a, false, 0, tcg_gen_umin_i64)
7203  
7204  /*
7205   * do_fp_reduction helper
7206   *
7207   * This mirrors the Reduce() pseudocode in the ARM ARM. It is
7208   * important for correct NaN propagation that we do these
7209   * operations in exactly the order specified by the pseudocode.
7210   *
7211   * This is a recursive function.
7212   */
7213  static TCGv_i32 do_reduction_op(DisasContext *s, int rn, MemOp esz,
7214                                  int ebase, int ecount, TCGv_ptr fpst,
7215                                  NeonGenTwoSingleOpFn *fn)
7216  {
7217      if (ecount == 1) {
7218          TCGv_i32 tcg_elem = tcg_temp_new_i32();
7219          read_vec_element_i32(s, tcg_elem, rn, ebase, esz);
7220          return tcg_elem;
7221      } else {
7222          int half = ecount >> 1;
7223          TCGv_i32 tcg_hi, tcg_lo, tcg_res;
7224  
7225          tcg_hi = do_reduction_op(s, rn, esz, ebase + half, half, fpst, fn);
7226          tcg_lo = do_reduction_op(s, rn, esz, ebase, half, fpst, fn);
7227          tcg_res = tcg_temp_new_i32();
7228  
7229          fn(tcg_res, tcg_lo, tcg_hi, fpst);
7230          return tcg_res;
7231      }
7232  }
7233  
7234  static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a,
7235                              NeonGenTwoSingleOpFn *fnormal,
7236                              NeonGenTwoSingleOpFn *fah)
7237  {
7238      if (fp_access_check(s)) {
7239          MemOp esz = a->esz;
7240          int elts = (a->q ? 16 : 8) >> esz;
7241          TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
7242          TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst,
7243                                         s->fpcr_ah ? fah : fnormal);
7244          write_fp_sreg(s, a->rd, res);
7245      }
7246      return true;
7247  }
7248  
7249  TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a,
7250             gen_helper_vfp_maxnumh, gen_helper_vfp_maxnumh)
7251  TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a,
7252             gen_helper_vfp_minnumh, gen_helper_vfp_minnumh)
7253  TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a,
7254             gen_helper_vfp_maxh, gen_helper_vfp_ah_maxh)
7255  TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a,
7256             gen_helper_vfp_minh, gen_helper_vfp_ah_minh)
7257  
7258  TRANS(FMAXNMV_s, do_fp_reduction, a,
7259        gen_helper_vfp_maxnums, gen_helper_vfp_maxnums)
7260  TRANS(FMINNMV_s, do_fp_reduction, a,
7261        gen_helper_vfp_minnums, gen_helper_vfp_minnums)
7262  TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs, gen_helper_vfp_ah_maxs)
7263  TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins, gen_helper_vfp_ah_mins)
7264  
7265  /*
7266   * Floating-point Immediate
7267   */
7268  
7269  static bool trans_FMOVI_s(DisasContext *s, arg_FMOVI_s *a)
7270  {
7271      int check = fp_access_check_scalar_hsd(s, a->esz);
7272      uint64_t imm;
7273  
7274      if (check <= 0) {
7275          return check == 0;
7276      }
7277  
7278      imm = vfp_expand_imm(a->esz, a->imm);
7279      write_fp_dreg(s, a->rd, tcg_constant_i64(imm));
7280      return true;
7281  }
7282  
7283  /*
7284   * Floating point compare, conditional compare
7285   */
7286  
7287  static void handle_fp_compare(DisasContext *s, int size,
7288                                unsigned int rn, unsigned int rm,
7289                                bool cmp_with_zero, bool signal_all_nans)
7290  {
7291      TCGv_i64 tcg_flags = tcg_temp_new_i64();
7292      TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_A64_F16 : FPST_A64);
7293  
7294      if (size == MO_64) {
7295          TCGv_i64 tcg_vn, tcg_vm;
7296  
7297          tcg_vn = read_fp_dreg(s, rn);
7298          if (cmp_with_zero) {
7299              tcg_vm = tcg_constant_i64(0);
7300          } else {
7301              tcg_vm = read_fp_dreg(s, rm);
7302          }
7303          if (signal_all_nans) {
7304              gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7305          } else {
7306              gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7307          }
7308      } else {
7309          TCGv_i32 tcg_vn = tcg_temp_new_i32();
7310          TCGv_i32 tcg_vm = tcg_temp_new_i32();
7311  
7312          read_vec_element_i32(s, tcg_vn, rn, 0, size);
7313          if (cmp_with_zero) {
7314              tcg_gen_movi_i32(tcg_vm, 0);
7315          } else {
7316              read_vec_element_i32(s, tcg_vm, rm, 0, size);
7317          }
7318  
7319          switch (size) {
7320          case MO_32:
7321              if (signal_all_nans) {
7322                  gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7323              } else {
7324                  gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7325              }
7326              break;
7327          case MO_16:
7328              if (signal_all_nans) {
7329                  gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7330              } else {
7331                  gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7332              }
7333              break;
7334          default:
7335              g_assert_not_reached();
7336          }
7337      }
7338  
7339      gen_set_nzcv(tcg_flags);
7340  }
7341  
7342  /* FCMP, FCMPE */
7343  static bool trans_FCMP(DisasContext *s, arg_FCMP *a)
7344  {
7345      int check = fp_access_check_scalar_hsd(s, a->esz);
7346  
7347      if (check <= 0) {
7348          return check == 0;
7349      }
7350  
7351      handle_fp_compare(s, a->esz, a->rn, a->rm, a->z, a->e);
7352      return true;
7353  }
7354  
7355  /* FCCMP, FCCMPE */
7356  static bool trans_FCCMP(DisasContext *s, arg_FCCMP *a)
7357  {
7358      TCGLabel *label_continue = NULL;
7359      int check = fp_access_check_scalar_hsd(s, a->esz);
7360  
7361      if (check <= 0) {
7362          return check == 0;
7363      }
7364  
7365      if (a->cond < 0x0e) { /* not always */
7366          TCGLabel *label_match = gen_new_label();
7367          label_continue = gen_new_label();
7368          arm_gen_test_cc(a->cond, label_match);
7369          /* nomatch: */
7370          gen_set_nzcv(tcg_constant_i64(a->nzcv << 28));
7371          tcg_gen_br(label_continue);
7372          gen_set_label(label_match);
7373      }
7374  
7375      handle_fp_compare(s, a->esz, a->rn, a->rm, false, a->e);
7376  
7377      if (label_continue) {
7378          gen_set_label(label_continue);
7379      }
7380      return true;
7381  }
7382  
7383  /*
7384   * Advanced SIMD Modified Immediate
7385   */
7386  
7387  static bool trans_FMOVI_v_h(DisasContext *s, arg_FMOVI_v_h *a)
7388  {
7389      if (!dc_isar_feature(aa64_fp16, s)) {
7390          return false;
7391      }
7392      if (fp_access_check(s)) {
7393          tcg_gen_gvec_dup_imm(MO_16, vec_full_reg_offset(s, a->rd),
7394                               a->q ? 16 : 8, vec_full_reg_size(s),
7395                               vfp_expand_imm(MO_16, a->abcdefgh));
7396      }
7397      return true;
7398  }
7399  
7400  static void gen_movi(unsigned vece, uint32_t dofs, uint32_t aofs,
7401                       int64_t c, uint32_t oprsz, uint32_t maxsz)
7402  {
7403      tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c);
7404  }
7405  
7406  static bool trans_Vimm(DisasContext *s, arg_Vimm *a)
7407  {
7408      GVecGen2iFn *fn;
7409  
7410      /* Handle decode of cmode/op here between ORR/BIC/MOVI */
7411      if ((a->cmode & 1) && a->cmode < 12) {
7412          /* For op=1, the imm will be inverted, so BIC becomes AND. */
7413          fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori;
7414      } else {
7415          /* There is one unallocated cmode/op combination in this space */
7416          if (a->cmode == 15 && a->op == 1 && a->q == 0) {
7417              return false;
7418          }
7419          fn = gen_movi;
7420      }
7421  
7422      if (fp_access_check(s)) {
7423          uint64_t imm = asimd_imm_const(a->abcdefgh, a->cmode, a->op);
7424          gen_gvec_fn2i(s, a->q, a->rd, a->rd, imm, fn, MO_64);
7425      }
7426      return true;
7427  }
7428  
7429  /*
7430   * Advanced SIMD Shift by Immediate
7431   */
7432  
7433  static bool do_vec_shift_imm(DisasContext *s, arg_qrri_e *a, GVecGen2iFn *fn)
7434  {
7435      if (fp_access_check(s)) {
7436          gen_gvec_fn2i(s, a->q, a->rd, a->rn, a->imm, fn, a->esz);
7437      }
7438      return true;
7439  }
7440  
7441  TRANS(SSHR_v, do_vec_shift_imm, a, gen_gvec_sshr)
7442  TRANS(USHR_v, do_vec_shift_imm, a, gen_gvec_ushr)
7443  TRANS(SSRA_v, do_vec_shift_imm, a, gen_gvec_ssra)
7444  TRANS(USRA_v, do_vec_shift_imm, a, gen_gvec_usra)
7445  TRANS(SRSHR_v, do_vec_shift_imm, a, gen_gvec_srshr)
7446  TRANS(URSHR_v, do_vec_shift_imm, a, gen_gvec_urshr)
7447  TRANS(SRSRA_v, do_vec_shift_imm, a, gen_gvec_srsra)
7448  TRANS(URSRA_v, do_vec_shift_imm, a, gen_gvec_ursra)
7449  TRANS(SRI_v, do_vec_shift_imm, a, gen_gvec_sri)
7450  TRANS(SHL_v, do_vec_shift_imm, a, tcg_gen_gvec_shli)
7451  TRANS(SLI_v, do_vec_shift_imm, a, gen_gvec_sli);
7452  TRANS(SQSHL_vi, do_vec_shift_imm, a, gen_neon_sqshli)
7453  TRANS(UQSHL_vi, do_vec_shift_imm, a, gen_neon_uqshli)
7454  TRANS(SQSHLU_vi, do_vec_shift_imm, a, gen_neon_sqshlui)
7455  
7456  static bool do_vec_shift_imm_wide(DisasContext *s, arg_qrri_e *a, bool is_u)
7457  {
7458      TCGv_i64 tcg_rn, tcg_rd;
7459      int esz = a->esz;
7460      int esize;
7461  
7462      if (!fp_access_check(s)) {
7463          return true;
7464      }
7465  
7466      /*
7467       * For the LL variants the store is larger than the load,
7468       * so if rd == rn we would overwrite parts of our input.
7469       * So load everything right now and use shifts in the main loop.
7470       */
7471      tcg_rd = tcg_temp_new_i64();
7472      tcg_rn = tcg_temp_new_i64();
7473      read_vec_element(s, tcg_rn, a->rn, a->q, MO_64);
7474  
7475      esize = 8 << esz;
7476      for (int i = 0, elements = 8 >> esz; i < elements; i++) {
7477          if (is_u) {
7478              tcg_gen_extract_i64(tcg_rd, tcg_rn, i * esize, esize);
7479          } else {
7480              tcg_gen_sextract_i64(tcg_rd, tcg_rn, i * esize, esize);
7481          }
7482          tcg_gen_shli_i64(tcg_rd, tcg_rd, a->imm);
7483          write_vec_element(s, tcg_rd, a->rd, i, esz + 1);
7484      }
7485      clear_vec_high(s, true, a->rd);
7486      return true;
7487  }
7488  
7489  TRANS(SSHLL_v, do_vec_shift_imm_wide, a, false)
7490  TRANS(USHLL_v, do_vec_shift_imm_wide, a, true)
7491  
7492  static void gen_sshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7493  {
7494      assert(shift >= 0 && shift <= 64);
7495      tcg_gen_sari_i64(dst, src, MIN(shift, 63));
7496  }
7497  
7498  static void gen_ushr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7499  {
7500      assert(shift >= 0 && shift <= 64);
7501      if (shift == 64) {
7502          tcg_gen_movi_i64(dst, 0);
7503      } else {
7504          tcg_gen_shri_i64(dst, src, shift);
7505      }
7506  }
7507  
7508  static void gen_ssra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7509  {
7510      gen_sshr_d(src, src, shift);
7511      tcg_gen_add_i64(dst, dst, src);
7512  }
7513  
7514  static void gen_usra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7515  {
7516      gen_ushr_d(src, src, shift);
7517      tcg_gen_add_i64(dst, dst, src);
7518  }
7519  
7520  static void gen_srshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7521  {
7522      assert(shift >= 0 && shift <= 32);
7523      if (shift) {
7524          TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1));
7525          tcg_gen_add_i64(dst, src, rnd);
7526          tcg_gen_sari_i64(dst, dst, shift);
7527      } else {
7528          tcg_gen_mov_i64(dst, src);
7529      }
7530  }
7531  
7532  static void gen_urshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7533  {
7534      assert(shift >= 0 && shift <= 32);
7535      if (shift) {
7536          TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1));
7537          tcg_gen_add_i64(dst, src, rnd);
7538          tcg_gen_shri_i64(dst, dst, shift);
7539      } else {
7540          tcg_gen_mov_i64(dst, src);
7541      }
7542  }
7543  
7544  static void gen_srshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7545  {
7546      assert(shift >= 0 && shift <= 64);
7547      if (shift == 0) {
7548          tcg_gen_mov_i64(dst, src);
7549      } else if (shift == 64) {
7550          /* Extension of sign bit (0,-1) plus sign bit (0,1) is zero. */
7551          tcg_gen_movi_i64(dst, 0);
7552      } else {
7553          TCGv_i64 rnd = tcg_temp_new_i64();
7554          tcg_gen_extract_i64(rnd, src, shift - 1, 1);
7555          tcg_gen_sari_i64(dst, src, shift);
7556          tcg_gen_add_i64(dst, dst, rnd);
7557      }
7558  }
7559  
7560  static void gen_urshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7561  {
7562      assert(shift >= 0 && shift <= 64);
7563      if (shift == 0) {
7564          tcg_gen_mov_i64(dst, src);
7565      } else if (shift == 64) {
7566          /* Rounding will propagate bit 63 into bit 64. */
7567          tcg_gen_shri_i64(dst, src, 63);
7568      } else {
7569          TCGv_i64 rnd = tcg_temp_new_i64();
7570          tcg_gen_extract_i64(rnd, src, shift - 1, 1);
7571          tcg_gen_shri_i64(dst, src, shift);
7572          tcg_gen_add_i64(dst, dst, rnd);
7573      }
7574  }
7575  
7576  static void gen_srsra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7577  {
7578      gen_srshr_d(src, src, shift);
7579      tcg_gen_add_i64(dst, dst, src);
7580  }
7581  
7582  static void gen_ursra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7583  {
7584      gen_urshr_d(src, src, shift);
7585      tcg_gen_add_i64(dst, dst, src);
7586  }
7587  
7588  static void gen_sri_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7589  {
7590      /* If shift is 64, dst is unchanged. */
7591      if (shift != 64) {
7592          tcg_gen_shri_i64(src, src, shift);
7593          tcg_gen_deposit_i64(dst, dst, src, 0, 64 - shift);
7594      }
7595  }
7596  
7597  static void gen_sli_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7598  {
7599      tcg_gen_deposit_i64(dst, dst, src, shift, 64 - shift);
7600  }
7601  
7602  static bool do_vec_shift_imm_narrow(DisasContext *s, arg_qrri_e *a,
7603                                      WideShiftImmFn * const fns[3], MemOp sign)
7604  {
7605      TCGv_i64 tcg_rn, tcg_rd;
7606      int esz = a->esz;
7607      int esize;
7608      WideShiftImmFn *fn;
7609  
7610      tcg_debug_assert(esz >= MO_8 && esz <= MO_32);
7611  
7612      if (!fp_access_check(s)) {
7613          return true;
7614      }
7615  
7616      tcg_rn = tcg_temp_new_i64();
7617      tcg_rd = tcg_temp_new_i64();
7618      tcg_gen_movi_i64(tcg_rd, 0);
7619  
7620      fn = fns[esz];
7621      esize = 8 << esz;
7622      for (int i = 0, elements = 8 >> esz; i < elements; i++) {
7623          read_vec_element(s, tcg_rn, a->rn, i, (esz + 1) | sign);
7624          fn(tcg_rn, tcg_rn, a->imm);
7625          tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, esize * i, esize);
7626      }
7627  
7628      write_vec_element(s, tcg_rd, a->rd, a->q, MO_64);
7629      clear_vec_high(s, a->q, a->rd);
7630      return true;
7631  }
7632  
7633  static void gen_sqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7634  {
7635      tcg_gen_sari_i64(d, s, i);
7636      tcg_gen_ext16u_i64(d, d);
7637      gen_helper_neon_narrow_sat_s8(d, tcg_env, d);
7638  }
7639  
7640  static void gen_sqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7641  {
7642      tcg_gen_sari_i64(d, s, i);
7643      tcg_gen_ext32u_i64(d, d);
7644      gen_helper_neon_narrow_sat_s16(d, tcg_env, d);
7645  }
7646  
7647  static void gen_sqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7648  {
7649      gen_sshr_d(d, s, i);
7650      gen_helper_neon_narrow_sat_s32(d, tcg_env, d);
7651  }
7652  
7653  static void gen_uqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7654  {
7655      tcg_gen_shri_i64(d, s, i);
7656      gen_helper_neon_narrow_sat_u8(d, tcg_env, d);
7657  }
7658  
7659  static void gen_uqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7660  {
7661      tcg_gen_shri_i64(d, s, i);
7662      gen_helper_neon_narrow_sat_u16(d, tcg_env, d);
7663  }
7664  
7665  static void gen_uqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7666  {
7667      gen_ushr_d(d, s, i);
7668      gen_helper_neon_narrow_sat_u32(d, tcg_env, d);
7669  }
7670  
7671  static void gen_sqshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7672  {
7673      tcg_gen_sari_i64(d, s, i);
7674      tcg_gen_ext16u_i64(d, d);
7675      gen_helper_neon_unarrow_sat8(d, tcg_env, d);
7676  }
7677  
7678  static void gen_sqshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7679  {
7680      tcg_gen_sari_i64(d, s, i);
7681      tcg_gen_ext32u_i64(d, d);
7682      gen_helper_neon_unarrow_sat16(d, tcg_env, d);
7683  }
7684  
7685  static void gen_sqshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7686  {
7687      gen_sshr_d(d, s, i);
7688      gen_helper_neon_unarrow_sat32(d, tcg_env, d);
7689  }
7690  
7691  static void gen_sqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7692  {
7693      gen_srshr_bhs(d, s, i);
7694      tcg_gen_ext16u_i64(d, d);
7695      gen_helper_neon_narrow_sat_s8(d, tcg_env, d);
7696  }
7697  
7698  static void gen_sqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7699  {
7700      gen_srshr_bhs(d, s, i);
7701      tcg_gen_ext32u_i64(d, d);
7702      gen_helper_neon_narrow_sat_s16(d, tcg_env, d);
7703  }
7704  
7705  static void gen_sqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7706  {
7707      gen_srshr_d(d, s, i);
7708      gen_helper_neon_narrow_sat_s32(d, tcg_env, d);
7709  }
7710  
7711  static void gen_uqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7712  {
7713      gen_urshr_bhs(d, s, i);
7714      gen_helper_neon_narrow_sat_u8(d, tcg_env, d);
7715  }
7716  
7717  static void gen_uqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7718  {
7719      gen_urshr_bhs(d, s, i);
7720      gen_helper_neon_narrow_sat_u16(d, tcg_env, d);
7721  }
7722  
7723  static void gen_uqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7724  {
7725      gen_urshr_d(d, s, i);
7726      gen_helper_neon_narrow_sat_u32(d, tcg_env, d);
7727  }
7728  
7729  static void gen_sqrshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7730  {
7731      gen_srshr_bhs(d, s, i);
7732      tcg_gen_ext16u_i64(d, d);
7733      gen_helper_neon_unarrow_sat8(d, tcg_env, d);
7734  }
7735  
7736  static void gen_sqrshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7737  {
7738      gen_srshr_bhs(d, s, i);
7739      tcg_gen_ext32u_i64(d, d);
7740      gen_helper_neon_unarrow_sat16(d, tcg_env, d);
7741  }
7742  
7743  static void gen_sqrshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7744  {
7745      gen_srshr_d(d, s, i);
7746      gen_helper_neon_unarrow_sat32(d, tcg_env, d);
7747  }
7748  
7749  static WideShiftImmFn * const shrn_fns[] = {
7750      tcg_gen_shri_i64,
7751      tcg_gen_shri_i64,
7752      gen_ushr_d,
7753  };
7754  TRANS(SHRN_v, do_vec_shift_imm_narrow, a, shrn_fns, 0)
7755  
7756  static WideShiftImmFn * const rshrn_fns[] = {
7757      gen_urshr_bhs,
7758      gen_urshr_bhs,
7759      gen_urshr_d,
7760  };
7761  TRANS(RSHRN_v, do_vec_shift_imm_narrow, a, rshrn_fns, 0)
7762  
7763  static WideShiftImmFn * const sqshrn_fns[] = {
7764      gen_sqshrn_b,
7765      gen_sqshrn_h,
7766      gen_sqshrn_s,
7767  };
7768  TRANS(SQSHRN_v, do_vec_shift_imm_narrow, a, sqshrn_fns, MO_SIGN)
7769  
7770  static WideShiftImmFn * const uqshrn_fns[] = {
7771      gen_uqshrn_b,
7772      gen_uqshrn_h,
7773      gen_uqshrn_s,
7774  };
7775  TRANS(UQSHRN_v, do_vec_shift_imm_narrow, a, uqshrn_fns, 0)
7776  
7777  static WideShiftImmFn * const sqshrun_fns[] = {
7778      gen_sqshrun_b,
7779      gen_sqshrun_h,
7780      gen_sqshrun_s,
7781  };
7782  TRANS(SQSHRUN_v, do_vec_shift_imm_narrow, a, sqshrun_fns, MO_SIGN)
7783  
7784  static WideShiftImmFn * const sqrshrn_fns[] = {
7785      gen_sqrshrn_b,
7786      gen_sqrshrn_h,
7787      gen_sqrshrn_s,
7788  };
7789  TRANS(SQRSHRN_v, do_vec_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN)
7790  
7791  static WideShiftImmFn * const uqrshrn_fns[] = {
7792      gen_uqrshrn_b,
7793      gen_uqrshrn_h,
7794      gen_uqrshrn_s,
7795  };
7796  TRANS(UQRSHRN_v, do_vec_shift_imm_narrow, a, uqrshrn_fns, 0)
7797  
7798  static WideShiftImmFn * const sqrshrun_fns[] = {
7799      gen_sqrshrun_b,
7800      gen_sqrshrun_h,
7801      gen_sqrshrun_s,
7802  };
7803  TRANS(SQRSHRUN_v, do_vec_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN)
7804  
7805  /*
7806   * Advanced SIMD Scalar Shift by Immediate
7807   */
7808  
7809  static bool do_scalar_shift_imm(DisasContext *s, arg_rri_e *a,
7810                                  WideShiftImmFn *fn, bool accumulate,
7811                                  MemOp sign)
7812  {
7813      if (fp_access_check(s)) {
7814          TCGv_i64 rd = tcg_temp_new_i64();
7815          TCGv_i64 rn = tcg_temp_new_i64();
7816  
7817          read_vec_element(s, rn, a->rn, 0, a->esz | sign);
7818          if (accumulate) {
7819              read_vec_element(s, rd, a->rd, 0, a->esz | sign);
7820          }
7821          fn(rd, rn, a->imm);
7822          write_fp_dreg(s, a->rd, rd);
7823      }
7824      return true;
7825  }
7826  
7827  TRANS(SSHR_s, do_scalar_shift_imm, a, gen_sshr_d, false, 0)
7828  TRANS(USHR_s, do_scalar_shift_imm, a, gen_ushr_d, false, 0)
7829  TRANS(SSRA_s, do_scalar_shift_imm, a, gen_ssra_d, true, 0)
7830  TRANS(USRA_s, do_scalar_shift_imm, a, gen_usra_d, true, 0)
7831  TRANS(SRSHR_s, do_scalar_shift_imm, a, gen_srshr_d, false, 0)
7832  TRANS(URSHR_s, do_scalar_shift_imm, a, gen_urshr_d, false, 0)
7833  TRANS(SRSRA_s, do_scalar_shift_imm, a, gen_srsra_d, true, 0)
7834  TRANS(URSRA_s, do_scalar_shift_imm, a, gen_ursra_d, true, 0)
7835  TRANS(SRI_s, do_scalar_shift_imm, a, gen_sri_d, true, 0)
7836  
7837  TRANS(SHL_s, do_scalar_shift_imm, a, tcg_gen_shli_i64, false, 0)
7838  TRANS(SLI_s, do_scalar_shift_imm, a, gen_sli_d, true, 0)
7839  
7840  static void trunc_i64_env_imm(TCGv_i64 d, TCGv_i64 s, int64_t i,
7841                                NeonGenTwoOpEnvFn *fn)
7842  {
7843      TCGv_i32 t = tcg_temp_new_i32();
7844      tcg_gen_extrl_i64_i32(t, s);
7845      fn(t, tcg_env, t, tcg_constant_i32(i));
7846      tcg_gen_extu_i32_i64(d, t);
7847  }
7848  
7849  static void gen_sqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7850  {
7851      trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s8);
7852  }
7853  
7854  static void gen_sqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7855  {
7856      trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s16);
7857  }
7858  
7859  static void gen_sqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7860  {
7861      trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s32);
7862  }
7863  
7864  static void gen_sqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
7865  {
7866      gen_helper_neon_qshl_s64(d, tcg_env, s, tcg_constant_i64(i));
7867  }
7868  
7869  static void gen_uqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7870  {
7871      trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u8);
7872  }
7873  
7874  static void gen_uqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7875  {
7876      trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u16);
7877  }
7878  
7879  static void gen_uqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7880  {
7881      trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u32);
7882  }
7883  
7884  static void gen_uqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
7885  {
7886      gen_helper_neon_qshl_u64(d, tcg_env, s, tcg_constant_i64(i));
7887  }
7888  
7889  static void gen_sqshlui_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7890  {
7891      trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s8);
7892  }
7893  
7894  static void gen_sqshlui_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7895  {
7896      trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s16);
7897  }
7898  
7899  static void gen_sqshlui_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7900  {
7901      trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s32);
7902  }
7903  
7904  static void gen_sqshlui_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
7905  {
7906      gen_helper_neon_qshlu_s64(d, tcg_env, s, tcg_constant_i64(i));
7907  }
7908  
7909  static WideShiftImmFn * const f_scalar_sqshli[] = {
7910      gen_sqshli_b, gen_sqshli_h, gen_sqshli_s, gen_sqshli_d
7911  };
7912  
7913  static WideShiftImmFn * const f_scalar_uqshli[] = {
7914      gen_uqshli_b, gen_uqshli_h, gen_uqshli_s, gen_uqshli_d
7915  };
7916  
7917  static WideShiftImmFn * const f_scalar_sqshlui[] = {
7918      gen_sqshlui_b, gen_sqshlui_h, gen_sqshlui_s, gen_sqshlui_d
7919  };
7920  
7921  /* Note that the helpers sign-extend their inputs, so don't do it here. */
7922  TRANS(SQSHL_si, do_scalar_shift_imm, a, f_scalar_sqshli[a->esz], false, 0)
7923  TRANS(UQSHL_si, do_scalar_shift_imm, a, f_scalar_uqshli[a->esz], false, 0)
7924  TRANS(SQSHLU_si, do_scalar_shift_imm, a, f_scalar_sqshlui[a->esz], false, 0)
7925  
7926  static bool do_scalar_shift_imm_narrow(DisasContext *s, arg_rri_e *a,
7927                                         WideShiftImmFn * const fns[3],
7928                                         MemOp sign, bool zext)
7929  {
7930      MemOp esz = a->esz;
7931  
7932      tcg_debug_assert(esz >= MO_8 && esz <= MO_32);
7933  
7934      if (fp_access_check(s)) {
7935          TCGv_i64 rd = tcg_temp_new_i64();
7936          TCGv_i64 rn = tcg_temp_new_i64();
7937  
7938          read_vec_element(s, rn, a->rn, 0, (esz + 1) | sign);
7939          fns[esz](rd, rn, a->imm);
7940          if (zext) {
7941              tcg_gen_ext_i64(rd, rd, esz);
7942          }
7943          write_fp_dreg(s, a->rd, rd);
7944      }
7945      return true;
7946  }
7947  
7948  TRANS(SQSHRN_si, do_scalar_shift_imm_narrow, a, sqshrn_fns, MO_SIGN, true)
7949  TRANS(SQRSHRN_si, do_scalar_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN, true)
7950  TRANS(UQSHRN_si, do_scalar_shift_imm_narrow, a, uqshrn_fns, 0, false)
7951  TRANS(UQRSHRN_si, do_scalar_shift_imm_narrow, a, uqrshrn_fns, 0, false)
7952  TRANS(SQSHRUN_si, do_scalar_shift_imm_narrow, a, sqshrun_fns, MO_SIGN, false)
7953  TRANS(SQRSHRUN_si, do_scalar_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN, false)
7954  
7955  static bool do_div(DisasContext *s, arg_rrr_sf *a, bool is_signed)
7956  {
7957      TCGv_i64 tcg_n, tcg_m, tcg_rd;
7958      tcg_rd = cpu_reg(s, a->rd);
7959  
7960      if (!a->sf && is_signed) {
7961          tcg_n = tcg_temp_new_i64();
7962          tcg_m = tcg_temp_new_i64();
7963          tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, a->rn));
7964          tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, a->rm));
7965      } else {
7966          tcg_n = read_cpu_reg(s, a->rn, a->sf);
7967          tcg_m = read_cpu_reg(s, a->rm, a->sf);
7968      }
7969  
7970      if (is_signed) {
7971          gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
7972      } else {
7973          gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
7974      }
7975  
7976      if (!a->sf) { /* zero extend final result */
7977          tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
7978      }
7979      return true;
7980  }
7981  
7982  TRANS(SDIV, do_div, a, true)
7983  TRANS(UDIV, do_div, a, false)
7984  
7985  /* Shift a TCGv src by TCGv shift_amount, put result in dst.
7986   * Note that it is the caller's responsibility to ensure that the
7987   * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
7988   * mandated semantics for out of range shifts.
7989   */
7990  static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
7991                        enum a64_shift_type shift_type, TCGv_i64 shift_amount)
7992  {
7993      switch (shift_type) {
7994      case A64_SHIFT_TYPE_LSL:
7995          tcg_gen_shl_i64(dst, src, shift_amount);
7996          break;
7997      case A64_SHIFT_TYPE_LSR:
7998          tcg_gen_shr_i64(dst, src, shift_amount);
7999          break;
8000      case A64_SHIFT_TYPE_ASR:
8001          if (!sf) {
8002              tcg_gen_ext32s_i64(dst, src);
8003          }
8004          tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
8005          break;
8006      case A64_SHIFT_TYPE_ROR:
8007          if (sf) {
8008              tcg_gen_rotr_i64(dst, src, shift_amount);
8009          } else {
8010              TCGv_i32 t0, t1;
8011              t0 = tcg_temp_new_i32();
8012              t1 = tcg_temp_new_i32();
8013              tcg_gen_extrl_i64_i32(t0, src);
8014              tcg_gen_extrl_i64_i32(t1, shift_amount);
8015              tcg_gen_rotr_i32(t0, t0, t1);
8016              tcg_gen_extu_i32_i64(dst, t0);
8017          }
8018          break;
8019      default:
8020          assert(FALSE); /* all shift types should be handled */
8021          break;
8022      }
8023  
8024      if (!sf) { /* zero extend final result */
8025          tcg_gen_ext32u_i64(dst, dst);
8026      }
8027  }
8028  
8029  /* Shift a TCGv src by immediate, put result in dst.
8030   * The shift amount must be in range (this should always be true as the
8031   * relevant instructions will UNDEF on bad shift immediates).
8032   */
8033  static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
8034                            enum a64_shift_type shift_type, unsigned int shift_i)
8035  {
8036      assert(shift_i < (sf ? 64 : 32));
8037  
8038      if (shift_i == 0) {
8039          tcg_gen_mov_i64(dst, src);
8040      } else {
8041          shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i));
8042      }
8043  }
8044  
8045  static bool do_shift_reg(DisasContext *s, arg_rrr_sf *a,
8046                           enum a64_shift_type shift_type)
8047  {
8048      TCGv_i64 tcg_shift = tcg_temp_new_i64();
8049      TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8050      TCGv_i64 tcg_rn = read_cpu_reg(s, a->rn, a->sf);
8051  
8052      tcg_gen_andi_i64(tcg_shift, cpu_reg(s, a->rm), a->sf ? 63 : 31);
8053      shift_reg(tcg_rd, tcg_rn, a->sf, shift_type, tcg_shift);
8054      return true;
8055  }
8056  
8057  TRANS(LSLV, do_shift_reg, a, A64_SHIFT_TYPE_LSL)
8058  TRANS(LSRV, do_shift_reg, a, A64_SHIFT_TYPE_LSR)
8059  TRANS(ASRV, do_shift_reg, a, A64_SHIFT_TYPE_ASR)
8060  TRANS(RORV, do_shift_reg, a, A64_SHIFT_TYPE_ROR)
8061  
8062  static bool do_crc32(DisasContext *s, arg_rrr_e *a, bool crc32c)
8063  {
8064      TCGv_i64 tcg_acc, tcg_val, tcg_rd;
8065      TCGv_i32 tcg_bytes;
8066  
8067      switch (a->esz) {
8068      case MO_8:
8069      case MO_16:
8070      case MO_32:
8071          tcg_val = tcg_temp_new_i64();
8072          tcg_gen_extract_i64(tcg_val, cpu_reg(s, a->rm), 0, 8 << a->esz);
8073          break;
8074      case MO_64:
8075          tcg_val = cpu_reg(s, a->rm);
8076          break;
8077      default:
8078          g_assert_not_reached();
8079      }
8080      tcg_acc = cpu_reg(s, a->rn);
8081      tcg_bytes = tcg_constant_i32(1 << a->esz);
8082      tcg_rd = cpu_reg(s, a->rd);
8083  
8084      if (crc32c) {
8085          gen_helper_crc32c_64(tcg_rd, tcg_acc, tcg_val, tcg_bytes);
8086      } else {
8087          gen_helper_crc32_64(tcg_rd, tcg_acc, tcg_val, tcg_bytes);
8088      }
8089      return true;
8090  }
8091  
8092  TRANS_FEAT(CRC32, aa64_crc32, do_crc32, a, false)
8093  TRANS_FEAT(CRC32C, aa64_crc32, do_crc32, a, true)
8094  
8095  static bool do_subp(DisasContext *s, arg_rrr *a, bool setflag)
8096  {
8097      TCGv_i64 tcg_n = read_cpu_reg_sp(s, a->rn, true);
8098      TCGv_i64 tcg_m = read_cpu_reg_sp(s, a->rm, true);
8099      TCGv_i64 tcg_d = cpu_reg(s, a->rd);
8100  
8101      tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56);
8102      tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56);
8103  
8104      if (setflag) {
8105          gen_sub_CC(true, tcg_d, tcg_n, tcg_m);
8106      } else {
8107          tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m);
8108      }
8109      return true;
8110  }
8111  
8112  TRANS_FEAT(SUBP, aa64_mte_insn_reg, do_subp, a, false)
8113  TRANS_FEAT(SUBPS, aa64_mte_insn_reg, do_subp, a, true)
8114  
8115  static bool trans_IRG(DisasContext *s, arg_rrr *a)
8116  {
8117      if (dc_isar_feature(aa64_mte_insn_reg, s)) {
8118          TCGv_i64 tcg_rd = cpu_reg_sp(s, a->rd);
8119          TCGv_i64 tcg_rn = cpu_reg_sp(s, a->rn);
8120  
8121          if (s->ata[0]) {
8122              gen_helper_irg(tcg_rd, tcg_env, tcg_rn, cpu_reg(s, a->rm));
8123          } else {
8124              gen_address_with_allocation_tag0(tcg_rd, tcg_rn);
8125          }
8126          return true;
8127      }
8128      return false;
8129  }
8130  
8131  static bool trans_GMI(DisasContext *s, arg_rrr *a)
8132  {
8133      if (dc_isar_feature(aa64_mte_insn_reg, s)) {
8134          TCGv_i64 t = tcg_temp_new_i64();
8135  
8136          tcg_gen_extract_i64(t, cpu_reg_sp(s, a->rn), 56, 4);
8137          tcg_gen_shl_i64(t, tcg_constant_i64(1), t);
8138          tcg_gen_or_i64(cpu_reg(s, a->rd), cpu_reg(s, a->rm), t);
8139          return true;
8140      }
8141      return false;
8142  }
8143  
8144  static bool trans_PACGA(DisasContext *s, arg_rrr *a)
8145  {
8146      if (dc_isar_feature(aa64_pauth, s)) {
8147          gen_helper_pacga(cpu_reg(s, a->rd), tcg_env,
8148                           cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm));
8149          return true;
8150      }
8151      return false;
8152  }
8153  
8154  typedef void ArithOneOp(TCGv_i64, TCGv_i64);
8155  
8156  static bool gen_rr(DisasContext *s, int rd, int rn, ArithOneOp fn)
8157  {
8158      fn(cpu_reg(s, rd), cpu_reg(s, rn));
8159      return true;
8160  }
8161  
8162  static void gen_rbit32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8163  {
8164      TCGv_i32 t32 = tcg_temp_new_i32();
8165  
8166      tcg_gen_extrl_i64_i32(t32, tcg_rn);
8167      gen_helper_rbit(t32, t32);
8168      tcg_gen_extu_i32_i64(tcg_rd, t32);
8169  }
8170  
8171  static void gen_rev16_xx(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 mask)
8172  {
8173      TCGv_i64 tcg_tmp = tcg_temp_new_i64();
8174  
8175      tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
8176      tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
8177      tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
8178      tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
8179      tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
8180  }
8181  
8182  static void gen_rev16_32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8183  {
8184      gen_rev16_xx(tcg_rd, tcg_rn, tcg_constant_i64(0x00ff00ff));
8185  }
8186  
8187  static void gen_rev16_64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8188  {
8189      gen_rev16_xx(tcg_rd, tcg_rn, tcg_constant_i64(0x00ff00ff00ff00ffull));
8190  }
8191  
8192  static void gen_rev_32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8193  {
8194      tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ);
8195  }
8196  
8197  static void gen_rev32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8198  {
8199      tcg_gen_bswap64_i64(tcg_rd, tcg_rn);
8200      tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32);
8201  }
8202  
8203  TRANS(RBIT, gen_rr, a->rd, a->rn, a->sf ? gen_helper_rbit64 : gen_rbit32)
8204  TRANS(REV16, gen_rr, a->rd, a->rn, a->sf ? gen_rev16_64 : gen_rev16_32)
8205  TRANS(REV32, gen_rr, a->rd, a->rn, a->sf ? gen_rev32 : gen_rev_32)
8206  TRANS(REV64, gen_rr, a->rd, a->rn, tcg_gen_bswap64_i64)
8207  
8208  static void gen_clz32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8209  {
8210      TCGv_i32 t32 = tcg_temp_new_i32();
8211  
8212      tcg_gen_extrl_i64_i32(t32, tcg_rn);
8213      tcg_gen_clzi_i32(t32, t32, 32);
8214      tcg_gen_extu_i32_i64(tcg_rd, t32);
8215  }
8216  
8217  static void gen_clz64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8218  {
8219      tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
8220  }
8221  
8222  static void gen_cls32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8223  {
8224      TCGv_i32 t32 = tcg_temp_new_i32();
8225  
8226      tcg_gen_extrl_i64_i32(t32, tcg_rn);
8227      tcg_gen_clrsb_i32(t32, t32);
8228      tcg_gen_extu_i32_i64(tcg_rd, t32);
8229  }
8230  
8231  TRANS(CLZ, gen_rr, a->rd, a->rn, a->sf ? gen_clz64 : gen_clz32)
8232  TRANS(CLS, gen_rr, a->rd, a->rn, a->sf ? tcg_gen_clrsb_i64 : gen_cls32)
8233  
8234  static bool gen_pacaut(DisasContext *s, arg_pacaut *a, NeonGenTwo64OpEnvFn fn)
8235  {
8236      TCGv_i64 tcg_rd, tcg_rn;
8237  
8238      if (a->z) {
8239          if (a->rn != 31) {
8240              return false;
8241          }
8242          tcg_rn = tcg_constant_i64(0);
8243      } else {
8244          tcg_rn = cpu_reg_sp(s, a->rn);
8245      }
8246      if (s->pauth_active) {
8247          tcg_rd = cpu_reg(s, a->rd);
8248          fn(tcg_rd, tcg_env, tcg_rd, tcg_rn);
8249      }
8250      return true;
8251  }
8252  
8253  TRANS_FEAT(PACIA, aa64_pauth, gen_pacaut, a, gen_helper_pacia)
8254  TRANS_FEAT(PACIB, aa64_pauth, gen_pacaut, a, gen_helper_pacib)
8255  TRANS_FEAT(PACDA, aa64_pauth, gen_pacaut, a, gen_helper_pacda)
8256  TRANS_FEAT(PACDB, aa64_pauth, gen_pacaut, a, gen_helper_pacdb)
8257  
8258  TRANS_FEAT(AUTIA, aa64_pauth, gen_pacaut, a, gen_helper_autia)
8259  TRANS_FEAT(AUTIB, aa64_pauth, gen_pacaut, a, gen_helper_autib)
8260  TRANS_FEAT(AUTDA, aa64_pauth, gen_pacaut, a, gen_helper_autda)
8261  TRANS_FEAT(AUTDB, aa64_pauth, gen_pacaut, a, gen_helper_autdb)
8262  
8263  static bool do_xpac(DisasContext *s, int rd, NeonGenOne64OpEnvFn *fn)
8264  {
8265      if (s->pauth_active) {
8266          TCGv_i64 tcg_rd = cpu_reg(s, rd);
8267          fn(tcg_rd, tcg_env, tcg_rd);
8268      }
8269      return true;
8270  }
8271  
8272  TRANS_FEAT(XPACI, aa64_pauth, do_xpac, a->rd, gen_helper_xpaci)
8273  TRANS_FEAT(XPACD, aa64_pauth, do_xpac, a->rd, gen_helper_xpacd)
8274  
8275  static bool do_logic_reg(DisasContext *s, arg_logic_shift *a,
8276                           ArithTwoOp *fn, ArithTwoOp *inv_fn, bool setflags)
8277  {
8278      TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
8279  
8280      if (!a->sf && (a->sa & (1 << 5))) {
8281          return false;
8282      }
8283  
8284      tcg_rd = cpu_reg(s, a->rd);
8285      tcg_rn = cpu_reg(s, a->rn);
8286  
8287      tcg_rm = read_cpu_reg(s, a->rm, a->sf);
8288      if (a->sa) {
8289          shift_reg_imm(tcg_rm, tcg_rm, a->sf, a->st, a->sa);
8290      }
8291  
8292      (a->n ? inv_fn : fn)(tcg_rd, tcg_rn, tcg_rm);
8293      if (!a->sf) {
8294          tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
8295      }
8296      if (setflags) {
8297          gen_logic_CC(a->sf, tcg_rd);
8298      }
8299      return true;
8300  }
8301  
8302  static bool trans_ORR_r(DisasContext *s, arg_logic_shift *a)
8303  {
8304      /*
8305       * Unshifted ORR and ORN with WZR/XZR is the standard encoding for
8306       * register-register MOV and MVN, so it is worth special casing.
8307       */
8308      if (a->sa == 0 && a->st == 0 && a->rn == 31) {
8309          TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8310          TCGv_i64 tcg_rm = cpu_reg(s, a->rm);
8311  
8312          if (a->n) {
8313              tcg_gen_not_i64(tcg_rd, tcg_rm);
8314              if (!a->sf) {
8315                  tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
8316              }
8317          } else {
8318              if (a->sf) {
8319                  tcg_gen_mov_i64(tcg_rd, tcg_rm);
8320              } else {
8321                  tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
8322              }
8323          }
8324          return true;
8325      }
8326  
8327      return do_logic_reg(s, a, tcg_gen_or_i64, tcg_gen_orc_i64, false);
8328  }
8329  
8330  TRANS(AND_r, do_logic_reg, a, tcg_gen_and_i64, tcg_gen_andc_i64, false)
8331  TRANS(ANDS_r, do_logic_reg, a, tcg_gen_and_i64, tcg_gen_andc_i64, true)
8332  TRANS(EOR_r, do_logic_reg, a, tcg_gen_xor_i64, tcg_gen_eqv_i64, false)
8333  
8334  static bool do_addsub_ext(DisasContext *s, arg_addsub_ext *a,
8335                            bool sub_op, bool setflags)
8336  {
8337      TCGv_i64 tcg_rm, tcg_rn, tcg_rd, tcg_result;
8338  
8339      if (a->sa > 4) {
8340          return false;
8341      }
8342  
8343      /* non-flag setting ops may use SP */
8344      if (!setflags) {
8345          tcg_rd = cpu_reg_sp(s, a->rd);
8346      } else {
8347          tcg_rd = cpu_reg(s, a->rd);
8348      }
8349      tcg_rn = read_cpu_reg_sp(s, a->rn, a->sf);
8350  
8351      tcg_rm = read_cpu_reg(s, a->rm, a->sf);
8352      ext_and_shift_reg(tcg_rm, tcg_rm, a->st, a->sa);
8353  
8354      tcg_result = tcg_temp_new_i64();
8355      if (!setflags) {
8356          if (sub_op) {
8357              tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
8358          } else {
8359              tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
8360          }
8361      } else {
8362          if (sub_op) {
8363              gen_sub_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
8364          } else {
8365              gen_add_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
8366          }
8367      }
8368  
8369      if (a->sf) {
8370          tcg_gen_mov_i64(tcg_rd, tcg_result);
8371      } else {
8372          tcg_gen_ext32u_i64(tcg_rd, tcg_result);
8373      }
8374      return true;
8375  }
8376  
8377  TRANS(ADD_ext, do_addsub_ext, a, false, false)
8378  TRANS(SUB_ext, do_addsub_ext, a, true, false)
8379  TRANS(ADDS_ext, do_addsub_ext, a, false, true)
8380  TRANS(SUBS_ext, do_addsub_ext, a, true, true)
8381  
8382  static bool do_addsub_reg(DisasContext *s, arg_addsub_shift *a,
8383                            bool sub_op, bool setflags)
8384  {
8385      TCGv_i64 tcg_rd, tcg_rn, tcg_rm, tcg_result;
8386  
8387      if (a->st == 3 || (!a->sf && (a->sa & 32))) {
8388          return false;
8389      }
8390  
8391      tcg_rd = cpu_reg(s, a->rd);
8392      tcg_rn = read_cpu_reg(s, a->rn, a->sf);
8393      tcg_rm = read_cpu_reg(s, a->rm, a->sf);
8394  
8395      shift_reg_imm(tcg_rm, tcg_rm, a->sf, a->st, a->sa);
8396  
8397      tcg_result = tcg_temp_new_i64();
8398      if (!setflags) {
8399          if (sub_op) {
8400              tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
8401          } else {
8402              tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
8403          }
8404      } else {
8405          if (sub_op) {
8406              gen_sub_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
8407          } else {
8408              gen_add_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
8409          }
8410      }
8411  
8412      if (a->sf) {
8413          tcg_gen_mov_i64(tcg_rd, tcg_result);
8414      } else {
8415          tcg_gen_ext32u_i64(tcg_rd, tcg_result);
8416      }
8417      return true;
8418  }
8419  
8420  TRANS(ADD_r, do_addsub_reg, a, false, false)
8421  TRANS(SUB_r, do_addsub_reg, a, true, false)
8422  TRANS(ADDS_r, do_addsub_reg, a, false, true)
8423  TRANS(SUBS_r, do_addsub_reg, a, true, true)
8424  
8425  static bool do_mulh(DisasContext *s, arg_rrr *a,
8426                      void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
8427  {
8428      TCGv_i64 discard = tcg_temp_new_i64();
8429      TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8430      TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
8431      TCGv_i64 tcg_rm = cpu_reg(s, a->rm);
8432  
8433      fn(discard, tcg_rd, tcg_rn, tcg_rm);
8434      return true;
8435  }
8436  
8437  TRANS(SMULH, do_mulh, a, tcg_gen_muls2_i64)
8438  TRANS(UMULH, do_mulh, a, tcg_gen_mulu2_i64)
8439  
8440  static bool do_muladd(DisasContext *s, arg_rrrr *a,
8441                        bool sf, bool is_sub, MemOp mop)
8442  {
8443      TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8444      TCGv_i64 tcg_op1, tcg_op2;
8445  
8446      if (mop == MO_64) {
8447          tcg_op1 = cpu_reg(s, a->rn);
8448          tcg_op2 = cpu_reg(s, a->rm);
8449      } else {
8450          tcg_op1 = tcg_temp_new_i64();
8451          tcg_op2 = tcg_temp_new_i64();
8452          tcg_gen_ext_i64(tcg_op1, cpu_reg(s, a->rn), mop);
8453          tcg_gen_ext_i64(tcg_op2, cpu_reg(s, a->rm), mop);
8454      }
8455  
8456      if (a->ra == 31 && !is_sub) {
8457          /* Special-case MADD with rA == XZR; it is the standard MUL alias */
8458          tcg_gen_mul_i64(tcg_rd, tcg_op1, tcg_op2);
8459      } else {
8460          TCGv_i64 tcg_tmp = tcg_temp_new_i64();
8461          TCGv_i64 tcg_ra = cpu_reg(s, a->ra);
8462  
8463          tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
8464          if (is_sub) {
8465              tcg_gen_sub_i64(tcg_rd, tcg_ra, tcg_tmp);
8466          } else {
8467              tcg_gen_add_i64(tcg_rd, tcg_ra, tcg_tmp);
8468          }
8469      }
8470  
8471      if (!sf) {
8472          tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
8473      }
8474      return true;
8475  }
8476  
8477  TRANS(MADD_w, do_muladd, a, false, false, MO_64)
8478  TRANS(MSUB_w, do_muladd, a, false, true, MO_64)
8479  TRANS(MADD_x, do_muladd, a, true, false, MO_64)
8480  TRANS(MSUB_x, do_muladd, a, true, true, MO_64)
8481  
8482  TRANS(SMADDL, do_muladd, a, true, false, MO_SL)
8483  TRANS(SMSUBL, do_muladd, a, true, true, MO_SL)
8484  TRANS(UMADDL, do_muladd, a, true, false, MO_UL)
8485  TRANS(UMSUBL, do_muladd, a, true, true, MO_UL)
8486  
8487  static bool do_adc_sbc(DisasContext *s, arg_rrr_sf *a,
8488                         bool is_sub, bool setflags)
8489  {
8490      TCGv_i64 tcg_y, tcg_rn, tcg_rd;
8491  
8492      tcg_rd = cpu_reg(s, a->rd);
8493      tcg_rn = cpu_reg(s, a->rn);
8494  
8495      if (is_sub) {
8496          tcg_y = tcg_temp_new_i64();
8497          tcg_gen_not_i64(tcg_y, cpu_reg(s, a->rm));
8498      } else {
8499          tcg_y = cpu_reg(s, a->rm);
8500      }
8501  
8502      if (setflags) {
8503          gen_adc_CC(a->sf, tcg_rd, tcg_rn, tcg_y);
8504      } else {
8505          gen_adc(a->sf, tcg_rd, tcg_rn, tcg_y);
8506      }
8507      return true;
8508  }
8509  
8510  TRANS(ADC, do_adc_sbc, a, false, false)
8511  TRANS(SBC, do_adc_sbc, a, true, false)
8512  TRANS(ADCS, do_adc_sbc, a, false, true)
8513  TRANS(SBCS, do_adc_sbc, a, true, true)
8514  
8515  static bool trans_RMIF(DisasContext *s, arg_RMIF *a)
8516  {
8517      int mask = a->mask;
8518      TCGv_i64 tcg_rn;
8519      TCGv_i32 nzcv;
8520  
8521      if (!dc_isar_feature(aa64_condm_4, s)) {
8522          return false;
8523      }
8524  
8525      tcg_rn = read_cpu_reg(s, a->rn, 1);
8526      tcg_gen_rotri_i64(tcg_rn, tcg_rn, a->imm);
8527  
8528      nzcv = tcg_temp_new_i32();
8529      tcg_gen_extrl_i64_i32(nzcv, tcg_rn);
8530  
8531      if (mask & 8) { /* N */
8532          tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3);
8533      }
8534      if (mask & 4) { /* Z */
8535          tcg_gen_not_i32(cpu_ZF, nzcv);
8536          tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4);
8537      }
8538      if (mask & 2) { /* C */
8539          tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1);
8540      }
8541      if (mask & 1) { /* V */
8542          tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0);
8543      }
8544      return true;
8545  }
8546  
8547  static bool do_setf(DisasContext *s, int rn, int shift)
8548  {
8549      TCGv_i32 tmp = tcg_temp_new_i32();
8550  
8551      tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn));
8552      tcg_gen_shli_i32(cpu_NF, tmp, shift);
8553      tcg_gen_shli_i32(cpu_VF, tmp, shift - 1);
8554      tcg_gen_mov_i32(cpu_ZF, cpu_NF);
8555      tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF);
8556      return true;
8557  }
8558  
8559  TRANS_FEAT(SETF8, aa64_condm_4, do_setf, a->rn, 24)
8560  TRANS_FEAT(SETF16, aa64_condm_4, do_setf, a->rn, 16)
8561  
8562  /* CCMP, CCMN */
8563  static bool trans_CCMP(DisasContext *s, arg_CCMP *a)
8564  {
8565      TCGv_i32 tcg_t0 = tcg_temp_new_i32();
8566      TCGv_i32 tcg_t1 = tcg_temp_new_i32();
8567      TCGv_i32 tcg_t2 = tcg_temp_new_i32();
8568      TCGv_i64 tcg_tmp = tcg_temp_new_i64();
8569      TCGv_i64 tcg_rn, tcg_y;
8570      DisasCompare c;
8571      unsigned nzcv;
8572      bool has_andc;
8573  
8574      /* Set T0 = !COND.  */
8575      arm_test_cc(&c, a->cond);
8576      tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
8577  
8578      /* Load the arguments for the new comparison.  */
8579      if (a->imm) {
8580          tcg_y = tcg_constant_i64(a->y);
8581      } else {
8582          tcg_y = cpu_reg(s, a->y);
8583      }
8584      tcg_rn = cpu_reg(s, a->rn);
8585  
8586      /* Set the flags for the new comparison.  */
8587      if (a->op) {
8588          gen_sub_CC(a->sf, tcg_tmp, tcg_rn, tcg_y);
8589      } else {
8590          gen_add_CC(a->sf, tcg_tmp, tcg_rn, tcg_y);
8591      }
8592  
8593      /*
8594       * If COND was false, force the flags to #nzcv.  Compute two masks
8595       * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
8596       * For tcg hosts that support ANDC, we can make do with just T1.
8597       * In either case, allow the tcg optimizer to delete any unused mask.
8598       */
8599      tcg_gen_neg_i32(tcg_t1, tcg_t0);
8600      tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
8601  
8602      nzcv = a->nzcv;
8603      has_andc = tcg_op_supported(INDEX_op_andc_i32, TCG_TYPE_I32, 0);
8604      if (nzcv & 8) { /* N */
8605          tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
8606      } else {
8607          if (has_andc) {
8608              tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
8609          } else {
8610              tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
8611          }
8612      }
8613      if (nzcv & 4) { /* Z */
8614          if (has_andc) {
8615              tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
8616          } else {
8617              tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
8618          }
8619      } else {
8620          tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
8621      }
8622      if (nzcv & 2) { /* C */
8623          tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
8624      } else {
8625          if (has_andc) {
8626              tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
8627          } else {
8628              tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
8629          }
8630      }
8631      if (nzcv & 1) { /* V */
8632          tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
8633      } else {
8634          if (has_andc) {
8635              tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
8636          } else {
8637              tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
8638          }
8639      }
8640      return true;
8641  }
8642  
8643  static bool trans_CSEL(DisasContext *s, arg_CSEL *a)
8644  {
8645      TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8646      TCGv_i64 zero = tcg_constant_i64(0);
8647      DisasCompare64 c;
8648  
8649      a64_test_cc(&c, a->cond);
8650  
8651      if (a->rn == 31 && a->rm == 31 && (a->else_inc ^ a->else_inv)) {
8652          /* CSET & CSETM.  */
8653          if (a->else_inv) {
8654              tcg_gen_negsetcond_i64(tcg_invert_cond(c.cond),
8655                                     tcg_rd, c.value, zero);
8656          } else {
8657              tcg_gen_setcond_i64(tcg_invert_cond(c.cond),
8658                                  tcg_rd, c.value, zero);
8659          }
8660      } else {
8661          TCGv_i64 t_true = cpu_reg(s, a->rn);
8662          TCGv_i64 t_false = read_cpu_reg(s, a->rm, 1);
8663  
8664          if (a->else_inv && a->else_inc) {
8665              tcg_gen_neg_i64(t_false, t_false);
8666          } else if (a->else_inv) {
8667              tcg_gen_not_i64(t_false, t_false);
8668          } else if (a->else_inc) {
8669              tcg_gen_addi_i64(t_false, t_false, 1);
8670          }
8671          tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
8672      }
8673  
8674      if (!a->sf) {
8675          tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
8676      }
8677      return true;
8678  }
8679  
8680  typedef struct FPScalar1Int {
8681      void (*gen_h)(TCGv_i32, TCGv_i32);
8682      void (*gen_s)(TCGv_i32, TCGv_i32);
8683      void (*gen_d)(TCGv_i64, TCGv_i64);
8684  } FPScalar1Int;
8685  
8686  static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a,
8687                                const FPScalar1Int *f,
8688                                bool merging)
8689  {
8690      switch (a->esz) {
8691      case MO_64:
8692          if (fp_access_check(s)) {
8693              TCGv_i64 t = read_fp_dreg(s, a->rn);
8694              f->gen_d(t, t);
8695              if (merging) {
8696                  write_fp_dreg_merging(s, a->rd, a->rd, t);
8697              } else {
8698                  write_fp_dreg(s, a->rd, t);
8699              }
8700          }
8701          break;
8702      case MO_32:
8703          if (fp_access_check(s)) {
8704              TCGv_i32 t = read_fp_sreg(s, a->rn);
8705              f->gen_s(t, t);
8706              if (merging) {
8707                  write_fp_sreg_merging(s, a->rd, a->rd, t);
8708              } else {
8709                  write_fp_sreg(s, a->rd, t);
8710              }
8711          }
8712          break;
8713      case MO_16:
8714          if (!dc_isar_feature(aa64_fp16, s)) {
8715              return false;
8716          }
8717          if (fp_access_check(s)) {
8718              TCGv_i32 t = read_fp_hreg(s, a->rn);
8719              f->gen_h(t, t);
8720              if (merging) {
8721                  write_fp_hreg_merging(s, a->rd, a->rd, t);
8722              } else {
8723                  write_fp_sreg(s, a->rd, t);
8724              }
8725          }
8726          break;
8727      default:
8728          return false;
8729      }
8730      return true;
8731  }
8732  
8733  static bool do_fp1_scalar_int_2fn(DisasContext *s, arg_rr_e *a,
8734                                    const FPScalar1Int *fnormal,
8735                                    const FPScalar1Int *fah)
8736  {
8737      return do_fp1_scalar_int(s, a, s->fpcr_ah ? fah : fnormal, true);
8738  }
8739  
8740  static const FPScalar1Int f_scalar_fmov = {
8741      tcg_gen_mov_i32,
8742      tcg_gen_mov_i32,
8743      tcg_gen_mov_i64,
8744  };
8745  TRANS(FMOV_s, do_fp1_scalar_int, a, &f_scalar_fmov, false)
8746  
8747  static const FPScalar1Int f_scalar_fabs = {
8748      gen_vfp_absh,
8749      gen_vfp_abss,
8750      gen_vfp_absd,
8751  };
8752  static const FPScalar1Int f_scalar_ah_fabs = {
8753      gen_vfp_ah_absh,
8754      gen_vfp_ah_abss,
8755      gen_vfp_ah_absd,
8756  };
8757  TRANS(FABS_s, do_fp1_scalar_int_2fn, a, &f_scalar_fabs, &f_scalar_ah_fabs)
8758  
8759  static const FPScalar1Int f_scalar_fneg = {
8760      gen_vfp_negh,
8761      gen_vfp_negs,
8762      gen_vfp_negd,
8763  };
8764  static const FPScalar1Int f_scalar_ah_fneg = {
8765      gen_vfp_ah_negh,
8766      gen_vfp_ah_negs,
8767      gen_vfp_ah_negd,
8768  };
8769  TRANS(FNEG_s, do_fp1_scalar_int_2fn, a, &f_scalar_fneg, &f_scalar_ah_fneg)
8770  
8771  typedef struct FPScalar1 {
8772      void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_ptr);
8773      void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_ptr);
8774      void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_ptr);
8775  } FPScalar1;
8776  
8777  static bool do_fp1_scalar_with_fpsttype(DisasContext *s, arg_rr_e *a,
8778                                          const FPScalar1 *f, int rmode,
8779                                          ARMFPStatusFlavour fpsttype)
8780  {
8781      TCGv_i32 tcg_rmode = NULL;
8782      TCGv_ptr fpst;
8783      TCGv_i64 t64;
8784      TCGv_i32 t32;
8785      int check = fp_access_check_scalar_hsd(s, a->esz);
8786  
8787      if (check <= 0) {
8788          return check == 0;
8789      }
8790  
8791      fpst = fpstatus_ptr(fpsttype);
8792      if (rmode >= 0) {
8793          tcg_rmode = gen_set_rmode(rmode, fpst);
8794      }
8795  
8796      switch (a->esz) {
8797      case MO_64:
8798          t64 = read_fp_dreg(s, a->rn);
8799          f->gen_d(t64, t64, fpst);
8800          write_fp_dreg_merging(s, a->rd, a->rd, t64);
8801          break;
8802      case MO_32:
8803          t32 = read_fp_sreg(s, a->rn);
8804          f->gen_s(t32, t32, fpst);
8805          write_fp_sreg_merging(s, a->rd, a->rd, t32);
8806          break;
8807      case MO_16:
8808          t32 = read_fp_hreg(s, a->rn);
8809          f->gen_h(t32, t32, fpst);
8810          write_fp_hreg_merging(s, a->rd, a->rd, t32);
8811          break;
8812      default:
8813          g_assert_not_reached();
8814      }
8815  
8816      if (rmode >= 0) {
8817          gen_restore_rmode(tcg_rmode, fpst);
8818      }
8819      return true;
8820  }
8821  
8822  static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a,
8823                            const FPScalar1 *f, int rmode)
8824  {
8825      return do_fp1_scalar_with_fpsttype(s, a, f, rmode,
8826                                         a->esz == MO_16 ?
8827                                         FPST_A64_F16 : FPST_A64);
8828  }
8829  
8830  static bool do_fp1_scalar_ah(DisasContext *s, arg_rr_e *a,
8831                               const FPScalar1 *f, int rmode)
8832  {
8833      return do_fp1_scalar_with_fpsttype(s, a, f, rmode, select_ah_fpst(s, a->esz));
8834  }
8835  
8836  static const FPScalar1 f_scalar_fsqrt = {
8837      gen_helper_vfp_sqrth,
8838      gen_helper_vfp_sqrts,
8839      gen_helper_vfp_sqrtd,
8840  };
8841  TRANS(FSQRT_s, do_fp1_scalar, a, &f_scalar_fsqrt, -1)
8842  
8843  static const FPScalar1 f_scalar_frint = {
8844      gen_helper_advsimd_rinth,
8845      gen_helper_rints,
8846      gen_helper_rintd,
8847  };
8848  TRANS(FRINTN_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_TIEEVEN)
8849  TRANS(FRINTP_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_POSINF)
8850  TRANS(FRINTM_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_NEGINF)
8851  TRANS(FRINTZ_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_ZERO)
8852  TRANS(FRINTA_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_TIEAWAY)
8853  TRANS(FRINTI_s, do_fp1_scalar, a, &f_scalar_frint, -1)
8854  
8855  static const FPScalar1 f_scalar_frintx = {
8856      gen_helper_advsimd_rinth_exact,
8857      gen_helper_rints_exact,
8858      gen_helper_rintd_exact,
8859  };
8860  TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1)
8861  
8862  static bool trans_BFCVT_s(DisasContext *s, arg_rr_e *a)
8863  {
8864      ARMFPStatusFlavour fpsttype = s->fpcr_ah ? FPST_AH : FPST_A64;
8865      TCGv_i32 t32;
8866      int check;
8867  
8868      if (!dc_isar_feature(aa64_bf16, s)) {
8869          return false;
8870      }
8871  
8872      check = fp_access_check_scalar_hsd(s, a->esz);
8873  
8874      if (check <= 0) {
8875          return check == 0;
8876      }
8877  
8878      t32 = read_fp_sreg(s, a->rn);
8879      gen_helper_bfcvt(t32, t32, fpstatus_ptr(fpsttype));
8880      write_fp_hreg_merging(s, a->rd, a->rd, t32);
8881      return true;
8882  }
8883  
8884  static const FPScalar1 f_scalar_frint32 = {
8885      NULL,
8886      gen_helper_frint32_s,
8887      gen_helper_frint32_d,
8888  };
8889  TRANS_FEAT(FRINT32Z_s, aa64_frint, do_fp1_scalar, a,
8890             &f_scalar_frint32, FPROUNDING_ZERO)
8891  TRANS_FEAT(FRINT32X_s, aa64_frint, do_fp1_scalar, a, &f_scalar_frint32, -1)
8892  
8893  static const FPScalar1 f_scalar_frint64 = {
8894      NULL,
8895      gen_helper_frint64_s,
8896      gen_helper_frint64_d,
8897  };
8898  TRANS_FEAT(FRINT64Z_s, aa64_frint, do_fp1_scalar, a,
8899             &f_scalar_frint64, FPROUNDING_ZERO)
8900  TRANS_FEAT(FRINT64X_s, aa64_frint, do_fp1_scalar, a, &f_scalar_frint64, -1)
8901  
8902  static const FPScalar1 f_scalar_frecpe = {
8903      gen_helper_recpe_f16,
8904      gen_helper_recpe_f32,
8905      gen_helper_recpe_f64,
8906  };
8907  static const FPScalar1 f_scalar_frecpe_rpres = {
8908      gen_helper_recpe_f16,
8909      gen_helper_recpe_rpres_f32,
8910      gen_helper_recpe_f64,
8911  };
8912  TRANS(FRECPE_s, do_fp1_scalar_ah, a,
8913        s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ?
8914        &f_scalar_frecpe_rpres : &f_scalar_frecpe, -1)
8915  
8916  static const FPScalar1 f_scalar_frecpx = {
8917      gen_helper_frecpx_f16,
8918      gen_helper_frecpx_f32,
8919      gen_helper_frecpx_f64,
8920  };
8921  TRANS(FRECPX_s, do_fp1_scalar_ah, a, &f_scalar_frecpx, -1)
8922  
8923  static const FPScalar1 f_scalar_frsqrte = {
8924      gen_helper_rsqrte_f16,
8925      gen_helper_rsqrte_f32,
8926      gen_helper_rsqrte_f64,
8927  };
8928  static const FPScalar1 f_scalar_frsqrte_rpres = {
8929      gen_helper_rsqrte_f16,
8930      gen_helper_rsqrte_rpres_f32,
8931      gen_helper_rsqrte_f64,
8932  };
8933  TRANS(FRSQRTE_s, do_fp1_scalar_ah, a,
8934        s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ?
8935        &f_scalar_frsqrte_rpres : &f_scalar_frsqrte, -1)
8936  
8937  static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a)
8938  {
8939      if (fp_access_check(s)) {
8940          TCGv_i32 tcg_rn = read_fp_sreg(s, a->rn);
8941          TCGv_i64 tcg_rd = tcg_temp_new_i64();
8942          TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
8943  
8944          gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, fpst);
8945          write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd);
8946      }
8947      return true;
8948  }
8949  
8950  static bool trans_FCVT_s_hs(DisasContext *s, arg_rr *a)
8951  {
8952      if (fp_access_check(s)) {
8953          TCGv_i32 tmp = read_fp_sreg(s, a->rn);
8954          TCGv_i32 ahp = get_ahp_flag();
8955          TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
8956  
8957          gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
8958          /* write_fp_hreg_merging is OK here because top half of result is zero */
8959          write_fp_hreg_merging(s, a->rd, a->rd, tmp);
8960      }
8961      return true;
8962  }
8963  
8964  static bool trans_FCVT_s_sd(DisasContext *s, arg_rr *a)
8965  {
8966      if (fp_access_check(s)) {
8967          TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn);
8968          TCGv_i32 tcg_rd = tcg_temp_new_i32();
8969          TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
8970  
8971          gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, fpst);
8972          write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd);
8973      }
8974      return true;
8975  }
8976  
8977  static bool trans_FCVT_s_hd(DisasContext *s, arg_rr *a)
8978  {
8979      if (fp_access_check(s)) {
8980          TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn);
8981          TCGv_i32 tcg_rd = tcg_temp_new_i32();
8982          TCGv_i32 ahp = get_ahp_flag();
8983          TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
8984  
8985          gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
8986          /* write_fp_hreg_merging is OK here because top half of tcg_rd is zero */
8987          write_fp_hreg_merging(s, a->rd, a->rd, tcg_rd);
8988      }
8989      return true;
8990  }
8991  
8992  static bool trans_FCVT_s_sh(DisasContext *s, arg_rr *a)
8993  {
8994      if (fp_access_check(s)) {
8995          TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn);
8996          TCGv_i32 tcg_rd = tcg_temp_new_i32();
8997          TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16);
8998          TCGv_i32 tcg_ahp = get_ahp_flag();
8999  
9000          gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
9001          write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd);
9002      }
9003      return true;
9004  }
9005  
9006  static bool trans_FCVT_s_dh(DisasContext *s, arg_rr *a)
9007  {
9008      if (fp_access_check(s)) {
9009          TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn);
9010          TCGv_i64 tcg_rd = tcg_temp_new_i64();
9011          TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16);
9012          TCGv_i32 tcg_ahp = get_ahp_flag();
9013  
9014          gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
9015          write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd);
9016      }
9017      return true;
9018  }
9019  
9020  static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift,
9021                             TCGv_i64 tcg_int, bool is_signed)
9022  {
9023      TCGv_ptr tcg_fpstatus;
9024      TCGv_i32 tcg_shift, tcg_single;
9025      TCGv_i64 tcg_double;
9026  
9027      tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
9028      tcg_shift = tcg_constant_i32(shift);
9029  
9030      switch (esz) {
9031      case MO_64:
9032          tcg_double = tcg_temp_new_i64();
9033          if (is_signed) {
9034              gen_helper_vfp_sqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus);
9035          } else {
9036              gen_helper_vfp_uqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus);
9037          }
9038          write_fp_dreg_merging(s, rd, rd, tcg_double);
9039          break;
9040  
9041      case MO_32:
9042          tcg_single = tcg_temp_new_i32();
9043          if (is_signed) {
9044              gen_helper_vfp_sqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
9045          } else {
9046              gen_helper_vfp_uqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
9047          }
9048          write_fp_sreg_merging(s, rd, rd, tcg_single);
9049          break;
9050  
9051      case MO_16:
9052          tcg_single = tcg_temp_new_i32();
9053          if (is_signed) {
9054              gen_helper_vfp_sqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
9055          } else {
9056              gen_helper_vfp_uqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
9057          }
9058          write_fp_hreg_merging(s, rd, rd, tcg_single);
9059          break;
9060  
9061      default:
9062          g_assert_not_reached();
9063      }
9064      return true;
9065  }
9066  
9067  static bool do_cvtf_g(DisasContext *s, arg_fcvt *a, bool is_signed)
9068  {
9069      TCGv_i64 tcg_int;
9070      int check = fp_access_check_scalar_hsd(s, a->esz);
9071  
9072      if (check <= 0) {
9073          return check == 0;
9074      }
9075  
9076      if (a->sf) {
9077          tcg_int = cpu_reg(s, a->rn);
9078      } else {
9079          tcg_int = read_cpu_reg(s, a->rn, true);
9080          if (is_signed) {
9081              tcg_gen_ext32s_i64(tcg_int, tcg_int);
9082          } else {
9083              tcg_gen_ext32u_i64(tcg_int, tcg_int);
9084          }
9085      }
9086      return do_cvtf_scalar(s, a->esz, a->rd, a->shift, tcg_int, is_signed);
9087  }
9088  
9089  TRANS(SCVTF_g, do_cvtf_g, a, true)
9090  TRANS(UCVTF_g, do_cvtf_g, a, false)
9091  
9092  /*
9093   * [US]CVTF (vector), scalar version.
9094   * Which sounds weird, but really just means input from fp register
9095   * instead of input from general register.  Input and output element
9096   * size are always equal.
9097   */
9098  static bool do_cvtf_f(DisasContext *s, arg_fcvt *a, bool is_signed)
9099  {
9100      TCGv_i64 tcg_int;
9101      int check = fp_access_check_scalar_hsd(s, a->esz);
9102  
9103      if (check <= 0) {
9104          return check == 0;
9105      }
9106  
9107      tcg_int = tcg_temp_new_i64();
9108      read_vec_element(s, tcg_int, a->rn, 0, a->esz | (is_signed ? MO_SIGN : 0));
9109      return do_cvtf_scalar(s, a->esz, a->rd, a->shift, tcg_int, is_signed);
9110  }
9111  
9112  TRANS(SCVTF_f, do_cvtf_f, a, true)
9113  TRANS(UCVTF_f, do_cvtf_f, a, false)
9114  
9115  static void do_fcvt_scalar(DisasContext *s, MemOp out, MemOp esz,
9116                             TCGv_i64 tcg_out, int shift, int rn,
9117                             ARMFPRounding rmode)
9118  {
9119      TCGv_ptr tcg_fpstatus;
9120      TCGv_i32 tcg_shift, tcg_rmode, tcg_single;
9121  
9122      tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
9123      tcg_shift = tcg_constant_i32(shift);
9124      tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
9125  
9126      switch (esz) {
9127      case MO_64:
9128          read_vec_element(s, tcg_out, rn, 0, MO_64);
9129          switch (out) {
9130          case MO_64 | MO_SIGN:
9131              gen_helper_vfp_tosqd(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
9132              break;
9133          case MO_64:
9134              gen_helper_vfp_touqd(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
9135              break;
9136          case MO_32 | MO_SIGN:
9137              gen_helper_vfp_tosld(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
9138              break;
9139          case MO_32:
9140              gen_helper_vfp_tould(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
9141              break;
9142          default:
9143              g_assert_not_reached();
9144          }
9145          break;
9146  
9147      case MO_32:
9148          tcg_single = read_fp_sreg(s, rn);
9149          switch (out) {
9150          case MO_64 | MO_SIGN:
9151              gen_helper_vfp_tosqs(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
9152              break;
9153          case MO_64:
9154              gen_helper_vfp_touqs(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
9155              break;
9156          case MO_32 | MO_SIGN:
9157              gen_helper_vfp_tosls(tcg_single, tcg_single,
9158                                   tcg_shift, tcg_fpstatus);
9159              tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9160              break;
9161          case MO_32:
9162              gen_helper_vfp_touls(tcg_single, tcg_single,
9163                                   tcg_shift, tcg_fpstatus);
9164              tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9165              break;
9166          default:
9167              g_assert_not_reached();
9168          }
9169          break;
9170  
9171      case MO_16:
9172          tcg_single = read_fp_hreg(s, rn);
9173          switch (out) {
9174          case MO_64 | MO_SIGN:
9175              gen_helper_vfp_tosqh(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
9176              break;
9177          case MO_64:
9178              gen_helper_vfp_touqh(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
9179              break;
9180          case MO_32 | MO_SIGN:
9181              gen_helper_vfp_toslh(tcg_single, tcg_single,
9182                                   tcg_shift, tcg_fpstatus);
9183              tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9184              break;
9185          case MO_32:
9186              gen_helper_vfp_toulh(tcg_single, tcg_single,
9187                                   tcg_shift, tcg_fpstatus);
9188              tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9189              break;
9190          case MO_16 | MO_SIGN:
9191              gen_helper_vfp_toshh(tcg_single, tcg_single,
9192                                   tcg_shift, tcg_fpstatus);
9193              tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9194              break;
9195          case MO_16:
9196              gen_helper_vfp_touhh(tcg_single, tcg_single,
9197                                   tcg_shift, tcg_fpstatus);
9198              tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9199              break;
9200          default:
9201              g_assert_not_reached();
9202          }
9203          break;
9204  
9205      default:
9206          g_assert_not_reached();
9207      }
9208  
9209      gen_restore_rmode(tcg_rmode, tcg_fpstatus);
9210  }
9211  
9212  static bool do_fcvt_g(DisasContext *s, arg_fcvt *a,
9213                        ARMFPRounding rmode, bool is_signed)
9214  {
9215      TCGv_i64 tcg_int;
9216      int check = fp_access_check_scalar_hsd(s, a->esz);
9217  
9218      if (check <= 0) {
9219          return check == 0;
9220      }
9221  
9222      tcg_int = cpu_reg(s, a->rd);
9223      do_fcvt_scalar(s, (a->sf ? MO_64 : MO_32) | (is_signed ? MO_SIGN : 0),
9224                     a->esz, tcg_int, a->shift, a->rn, rmode);
9225  
9226      if (!a->sf) {
9227          tcg_gen_ext32u_i64(tcg_int, tcg_int);
9228      }
9229      return true;
9230  }
9231  
9232  TRANS(FCVTNS_g, do_fcvt_g, a, FPROUNDING_TIEEVEN, true)
9233  TRANS(FCVTNU_g, do_fcvt_g, a, FPROUNDING_TIEEVEN, false)
9234  TRANS(FCVTPS_g, do_fcvt_g, a, FPROUNDING_POSINF, true)
9235  TRANS(FCVTPU_g, do_fcvt_g, a, FPROUNDING_POSINF, false)
9236  TRANS(FCVTMS_g, do_fcvt_g, a, FPROUNDING_NEGINF, true)
9237  TRANS(FCVTMU_g, do_fcvt_g, a, FPROUNDING_NEGINF, false)
9238  TRANS(FCVTZS_g, do_fcvt_g, a, FPROUNDING_ZERO, true)
9239  TRANS(FCVTZU_g, do_fcvt_g, a, FPROUNDING_ZERO, false)
9240  TRANS(FCVTAS_g, do_fcvt_g, a, FPROUNDING_TIEAWAY, true)
9241  TRANS(FCVTAU_g, do_fcvt_g, a, FPROUNDING_TIEAWAY, false)
9242  
9243  /*
9244   * FCVT* (vector), scalar version.
9245   * Which sounds weird, but really just means output to fp register
9246   * instead of output to general register.  Input and output element
9247   * size are always equal.
9248   */
9249  static bool do_fcvt_f(DisasContext *s, arg_fcvt *a,
9250                        ARMFPRounding rmode, bool is_signed)
9251  {
9252      TCGv_i64 tcg_int;
9253      int check = fp_access_check_scalar_hsd(s, a->esz);
9254  
9255      if (check <= 0) {
9256          return check == 0;
9257      }
9258  
9259      tcg_int = tcg_temp_new_i64();
9260      do_fcvt_scalar(s, a->esz | (is_signed ? MO_SIGN : 0),
9261                     a->esz, tcg_int, a->shift, a->rn, rmode);
9262  
9263      if (!s->fpcr_nep) {
9264          clear_vec(s, a->rd);
9265      }
9266      write_vec_element(s, tcg_int, a->rd, 0, a->esz);
9267      return true;
9268  }
9269  
9270  TRANS(FCVTNS_f, do_fcvt_f, a, FPROUNDING_TIEEVEN, true)
9271  TRANS(FCVTNU_f, do_fcvt_f, a, FPROUNDING_TIEEVEN, false)
9272  TRANS(FCVTPS_f, do_fcvt_f, a, FPROUNDING_POSINF, true)
9273  TRANS(FCVTPU_f, do_fcvt_f, a, FPROUNDING_POSINF, false)
9274  TRANS(FCVTMS_f, do_fcvt_f, a, FPROUNDING_NEGINF, true)
9275  TRANS(FCVTMU_f, do_fcvt_f, a, FPROUNDING_NEGINF, false)
9276  TRANS(FCVTZS_f, do_fcvt_f, a, FPROUNDING_ZERO, true)
9277  TRANS(FCVTZU_f, do_fcvt_f, a, FPROUNDING_ZERO, false)
9278  TRANS(FCVTAS_f, do_fcvt_f, a, FPROUNDING_TIEAWAY, true)
9279  TRANS(FCVTAU_f, do_fcvt_f, a, FPROUNDING_TIEAWAY, false)
9280  
9281  static bool trans_FJCVTZS(DisasContext *s, arg_FJCVTZS *a)
9282  {
9283      if (!dc_isar_feature(aa64_jscvt, s)) {
9284          return false;
9285      }
9286      if (fp_access_check(s)) {
9287          TCGv_i64 t = read_fp_dreg(s, a->rn);
9288          TCGv_ptr fpstatus = fpstatus_ptr(FPST_A64);
9289  
9290          gen_helper_fjcvtzs(t, t, fpstatus);
9291  
9292          tcg_gen_ext32u_i64(cpu_reg(s, a->rd), t);
9293          tcg_gen_extrh_i64_i32(cpu_ZF, t);
9294          tcg_gen_movi_i32(cpu_CF, 0);
9295          tcg_gen_movi_i32(cpu_NF, 0);
9296          tcg_gen_movi_i32(cpu_VF, 0);
9297      }
9298      return true;
9299  }
9300  
9301  static bool trans_FMOV_hx(DisasContext *s, arg_rr *a)
9302  {
9303      if (!dc_isar_feature(aa64_fp16, s)) {
9304          return false;
9305      }
9306      if (fp_access_check(s)) {
9307          TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
9308          TCGv_i64 tmp = tcg_temp_new_i64();
9309          tcg_gen_ext16u_i64(tmp, tcg_rn);
9310          write_fp_dreg(s, a->rd, tmp);
9311      }
9312      return true;
9313  }
9314  
9315  static bool trans_FMOV_sw(DisasContext *s, arg_rr *a)
9316  {
9317      if (fp_access_check(s)) {
9318          TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
9319          TCGv_i64 tmp = tcg_temp_new_i64();
9320          tcg_gen_ext32u_i64(tmp, tcg_rn);
9321          write_fp_dreg(s, a->rd, tmp);
9322      }
9323      return true;
9324  }
9325  
9326  static bool trans_FMOV_dx(DisasContext *s, arg_rr *a)
9327  {
9328      if (fp_access_check(s)) {
9329          TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
9330          write_fp_dreg(s, a->rd, tcg_rn);
9331      }
9332      return true;
9333  }
9334  
9335  static bool trans_FMOV_ux(DisasContext *s, arg_rr *a)
9336  {
9337      if (fp_access_check(s)) {
9338          TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
9339          tcg_gen_st_i64(tcg_rn, tcg_env, fp_reg_hi_offset(s, a->rd));
9340          clear_vec_high(s, true, a->rd);
9341      }
9342      return true;
9343  }
9344  
9345  static bool trans_FMOV_xh(DisasContext *s, arg_rr *a)
9346  {
9347      if (!dc_isar_feature(aa64_fp16, s)) {
9348          return false;
9349      }
9350      if (fp_access_check(s)) {
9351          TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
9352          tcg_gen_ld16u_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_16));
9353      }
9354      return true;
9355  }
9356  
9357  static bool trans_FMOV_ws(DisasContext *s, arg_rr *a)
9358  {
9359      if (fp_access_check(s)) {
9360          TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
9361          tcg_gen_ld32u_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_32));
9362      }
9363      return true;
9364  }
9365  
9366  static bool trans_FMOV_xd(DisasContext *s, arg_rr *a)
9367  {
9368      if (fp_access_check(s)) {
9369          TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
9370          tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_64));
9371      }
9372      return true;
9373  }
9374  
9375  static bool trans_FMOV_xu(DisasContext *s, arg_rr *a)
9376  {
9377      if (fp_access_check(s)) {
9378          TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
9379          tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_hi_offset(s, a->rn));
9380      }
9381      return true;
9382  }
9383  
9384  typedef struct ENVScalar1 {
9385      NeonGenOneOpEnvFn *gen_bhs[3];
9386      NeonGenOne64OpEnvFn *gen_d;
9387  } ENVScalar1;
9388  
9389  static bool do_env_scalar1(DisasContext *s, arg_rr_e *a, const ENVScalar1 *f)
9390  {
9391      if (!fp_access_check(s)) {
9392          return true;
9393      }
9394      if (a->esz == MO_64) {
9395          TCGv_i64 t = read_fp_dreg(s, a->rn);
9396          f->gen_d(t, tcg_env, t);
9397          write_fp_dreg(s, a->rd, t);
9398      } else {
9399          TCGv_i32 t = tcg_temp_new_i32();
9400  
9401          read_vec_element_i32(s, t, a->rn, 0, a->esz);
9402          f->gen_bhs[a->esz](t, tcg_env, t);
9403          write_fp_sreg(s, a->rd, t);
9404      }
9405      return true;
9406  }
9407  
9408  static bool do_env_vector1(DisasContext *s, arg_qrr_e *a, const ENVScalar1 *f)
9409  {
9410      if (a->esz == MO_64 && !a->q) {
9411          return false;
9412      }
9413      if (!fp_access_check(s)) {
9414          return true;
9415      }
9416      if (a->esz == MO_64) {
9417          TCGv_i64 t = tcg_temp_new_i64();
9418  
9419          for (int i = 0; i < 2; ++i) {
9420              read_vec_element(s, t, a->rn, i, MO_64);
9421              f->gen_d(t, tcg_env, t);
9422              write_vec_element(s, t, a->rd, i, MO_64);
9423          }
9424      } else {
9425          TCGv_i32 t = tcg_temp_new_i32();
9426          int n = (a->q ? 16 : 8) >> a->esz;
9427  
9428          for (int i = 0; i < n; ++i) {
9429              read_vec_element_i32(s, t, a->rn, i, a->esz);
9430              f->gen_bhs[a->esz](t, tcg_env, t);
9431              write_vec_element_i32(s, t, a->rd, i, a->esz);
9432          }
9433      }
9434      clear_vec_high(s, a->q, a->rd);
9435      return true;
9436  }
9437  
9438  static const ENVScalar1 f_scalar_sqabs = {
9439      { gen_helper_neon_qabs_s8,
9440        gen_helper_neon_qabs_s16,
9441        gen_helper_neon_qabs_s32 },
9442      gen_helper_neon_qabs_s64,
9443  };
9444  TRANS(SQABS_s, do_env_scalar1, a, &f_scalar_sqabs)
9445  TRANS(SQABS_v, do_env_vector1, a, &f_scalar_sqabs)
9446  
9447  static const ENVScalar1 f_scalar_sqneg = {
9448      { gen_helper_neon_qneg_s8,
9449        gen_helper_neon_qneg_s16,
9450        gen_helper_neon_qneg_s32 },
9451      gen_helper_neon_qneg_s64,
9452  };
9453  TRANS(SQNEG_s, do_env_scalar1, a, &f_scalar_sqneg)
9454  TRANS(SQNEG_v, do_env_vector1, a, &f_scalar_sqneg)
9455  
9456  static bool do_scalar1_d(DisasContext *s, arg_rr *a, ArithOneOp *f)
9457  {
9458      if (fp_access_check(s)) {
9459          TCGv_i64 t = read_fp_dreg(s, a->rn);
9460          f(t, t);
9461          write_fp_dreg(s, a->rd, t);
9462      }
9463      return true;
9464  }
9465  
9466  TRANS(ABS_s, do_scalar1_d, a, tcg_gen_abs_i64)
9467  TRANS(NEG_s, do_scalar1_d, a, tcg_gen_neg_i64)
9468  
9469  static bool do_cmop0_d(DisasContext *s, arg_rr *a, TCGCond cond)
9470  {
9471      if (fp_access_check(s)) {
9472          TCGv_i64 t = read_fp_dreg(s, a->rn);
9473          tcg_gen_negsetcond_i64(cond, t, t, tcg_constant_i64(0));
9474          write_fp_dreg(s, a->rd, t);
9475      }
9476      return true;
9477  }
9478  
9479  TRANS(CMGT0_s, do_cmop0_d, a, TCG_COND_GT)
9480  TRANS(CMGE0_s, do_cmop0_d, a, TCG_COND_GE)
9481  TRANS(CMLE0_s, do_cmop0_d, a, TCG_COND_LE)
9482  TRANS(CMLT0_s, do_cmop0_d, a, TCG_COND_LT)
9483  TRANS(CMEQ0_s, do_cmop0_d, a, TCG_COND_EQ)
9484  
9485  static bool do_2misc_narrow_scalar(DisasContext *s, arg_rr_e *a,
9486                                     ArithOneOp * const fn[3])
9487  {
9488      if (a->esz == MO_64) {
9489          return false;
9490      }
9491      if (fp_access_check(s)) {
9492          TCGv_i64 t = tcg_temp_new_i64();
9493  
9494          read_vec_element(s, t, a->rn, 0, a->esz + 1);
9495          fn[a->esz](t, t);
9496          clear_vec(s, a->rd);
9497          write_vec_element(s, t, a->rd, 0, a->esz);
9498      }
9499      return true;
9500  }
9501  
9502  #define WRAP_ENV(NAME) \
9503      static void gen_##NAME(TCGv_i64 d, TCGv_i64 n) \
9504      { gen_helper_##NAME(d, tcg_env, n); }
9505  
9506  WRAP_ENV(neon_unarrow_sat8)
9507  WRAP_ENV(neon_unarrow_sat16)
9508  WRAP_ENV(neon_unarrow_sat32)
9509  
9510  static ArithOneOp * const f_scalar_sqxtun[] = {
9511      gen_neon_unarrow_sat8,
9512      gen_neon_unarrow_sat16,
9513      gen_neon_unarrow_sat32,
9514  };
9515  TRANS(SQXTUN_s, do_2misc_narrow_scalar, a, f_scalar_sqxtun)
9516  
9517  WRAP_ENV(neon_narrow_sat_s8)
9518  WRAP_ENV(neon_narrow_sat_s16)
9519  WRAP_ENV(neon_narrow_sat_s32)
9520  
9521  static ArithOneOp * const f_scalar_sqxtn[] = {
9522      gen_neon_narrow_sat_s8,
9523      gen_neon_narrow_sat_s16,
9524      gen_neon_narrow_sat_s32,
9525  };
9526  TRANS(SQXTN_s, do_2misc_narrow_scalar, a, f_scalar_sqxtn)
9527  
9528  WRAP_ENV(neon_narrow_sat_u8)
9529  WRAP_ENV(neon_narrow_sat_u16)
9530  WRAP_ENV(neon_narrow_sat_u32)
9531  
9532  static ArithOneOp * const f_scalar_uqxtn[] = {
9533      gen_neon_narrow_sat_u8,
9534      gen_neon_narrow_sat_u16,
9535      gen_neon_narrow_sat_u32,
9536  };
9537  TRANS(UQXTN_s, do_2misc_narrow_scalar, a, f_scalar_uqxtn)
9538  
9539  static bool trans_FCVTXN_s(DisasContext *s, arg_rr_e *a)
9540  {
9541      if (fp_access_check(s)) {
9542          /*
9543           * 64 bit to 32 bit float conversion
9544           * with von Neumann rounding (round to odd)
9545           */
9546          TCGv_i64 src = read_fp_dreg(s, a->rn);
9547          TCGv_i32 dst = tcg_temp_new_i32();
9548          gen_helper_fcvtx_f64_to_f32(dst, src, fpstatus_ptr(FPST_A64));
9549          write_fp_sreg_merging(s, a->rd, a->rd, dst);
9550      }
9551      return true;
9552  }
9553  
9554  #undef WRAP_ENV
9555  
9556  static bool do_gvec_fn2(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn)
9557  {
9558      if (!a->q && a->esz == MO_64) {
9559          return false;
9560      }
9561      if (fp_access_check(s)) {
9562          gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz);
9563      }
9564      return true;
9565  }
9566  
9567  TRANS(ABS_v, do_gvec_fn2, a, tcg_gen_gvec_abs)
9568  TRANS(NEG_v, do_gvec_fn2, a, tcg_gen_gvec_neg)
9569  TRANS(NOT_v, do_gvec_fn2, a, tcg_gen_gvec_not)
9570  TRANS(CNT_v, do_gvec_fn2, a, gen_gvec_cnt)
9571  TRANS(RBIT_v, do_gvec_fn2, a, gen_gvec_rbit)
9572  TRANS(CMGT0_v, do_gvec_fn2, a, gen_gvec_cgt0)
9573  TRANS(CMGE0_v, do_gvec_fn2, a, gen_gvec_cge0)
9574  TRANS(CMLT0_v, do_gvec_fn2, a, gen_gvec_clt0)
9575  TRANS(CMLE0_v, do_gvec_fn2, a, gen_gvec_cle0)
9576  TRANS(CMEQ0_v, do_gvec_fn2, a, gen_gvec_ceq0)
9577  TRANS(REV16_v, do_gvec_fn2, a, gen_gvec_rev16)
9578  TRANS(REV32_v, do_gvec_fn2, a, gen_gvec_rev32)
9579  TRANS(URECPE_v, do_gvec_fn2, a, gen_gvec_urecpe)
9580  TRANS(URSQRTE_v, do_gvec_fn2, a, gen_gvec_ursqrte)
9581  
9582  static bool do_gvec_fn2_bhs(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn)
9583  {
9584      if (a->esz == MO_64) {
9585          return false;
9586      }
9587      if (fp_access_check(s)) {
9588          gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz);
9589      }
9590      return true;
9591  }
9592  
9593  TRANS(CLS_v, do_gvec_fn2_bhs, a, gen_gvec_cls)
9594  TRANS(CLZ_v, do_gvec_fn2_bhs, a, gen_gvec_clz)
9595  TRANS(REV64_v, do_gvec_fn2_bhs, a, gen_gvec_rev64)
9596  TRANS(SADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_saddlp)
9597  TRANS(UADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_uaddlp)
9598  TRANS(SADALP_v, do_gvec_fn2_bhs, a, gen_gvec_sadalp)
9599  TRANS(UADALP_v, do_gvec_fn2_bhs, a, gen_gvec_uadalp)
9600  
9601  static bool do_2misc_narrow_vector(DisasContext *s, arg_qrr_e *a,
9602                                     ArithOneOp * const fn[3])
9603  {
9604      if (a->esz == MO_64) {
9605          return false;
9606      }
9607      if (fp_access_check(s)) {
9608          TCGv_i64 t0 = tcg_temp_new_i64();
9609          TCGv_i64 t1 = tcg_temp_new_i64();
9610  
9611          read_vec_element(s, t0, a->rn, 0, MO_64);
9612          read_vec_element(s, t1, a->rn, 1, MO_64);
9613          fn[a->esz](t0, t0);
9614          fn[a->esz](t1, t1);
9615          write_vec_element(s, t0, a->rd, a->q ? 2 : 0, MO_32);
9616          write_vec_element(s, t1, a->rd, a->q ? 3 : 1, MO_32);
9617          clear_vec_high(s, a->q, a->rd);
9618      }
9619      return true;
9620  }
9621  
9622  static ArithOneOp * const f_scalar_xtn[] = {
9623      gen_helper_neon_narrow_u8,
9624      gen_helper_neon_narrow_u16,
9625      tcg_gen_ext32u_i64,
9626  };
9627  TRANS(XTN, do_2misc_narrow_vector, a, f_scalar_xtn)
9628  TRANS(SQXTUN_v, do_2misc_narrow_vector, a, f_scalar_sqxtun)
9629  TRANS(SQXTN_v, do_2misc_narrow_vector, a, f_scalar_sqxtn)
9630  TRANS(UQXTN_v, do_2misc_narrow_vector, a, f_scalar_uqxtn)
9631  
9632  static void gen_fcvtn_hs(TCGv_i64 d, TCGv_i64 n)
9633  {
9634      TCGv_i32 tcg_lo = tcg_temp_new_i32();
9635      TCGv_i32 tcg_hi = tcg_temp_new_i32();
9636      TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
9637      TCGv_i32 ahp = get_ahp_flag();
9638  
9639      tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, n);
9640      gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp);
9641      gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp);
9642      tcg_gen_deposit_i32(tcg_lo, tcg_lo, tcg_hi, 16, 16);
9643      tcg_gen_extu_i32_i64(d, tcg_lo);
9644  }
9645  
9646  static void gen_fcvtn_sd(TCGv_i64 d, TCGv_i64 n)
9647  {
9648      TCGv_i32 tmp = tcg_temp_new_i32();
9649      TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
9650  
9651      gen_helper_vfp_fcvtsd(tmp, n, fpst);
9652      tcg_gen_extu_i32_i64(d, tmp);
9653  }
9654  
9655  static void gen_fcvtxn_sd(TCGv_i64 d, TCGv_i64 n)
9656  {
9657      /*
9658       * 64 bit to 32 bit float conversion
9659       * with von Neumann rounding (round to odd)
9660       */
9661      TCGv_i32 tmp = tcg_temp_new_i32();
9662      gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_A64));
9663      tcg_gen_extu_i32_i64(d, tmp);
9664  }
9665  
9666  static ArithOneOp * const f_vector_fcvtn[] = {
9667      NULL,
9668      gen_fcvtn_hs,
9669      gen_fcvtn_sd,
9670  };
9671  static ArithOneOp * const f_scalar_fcvtxn[] = {
9672      NULL,
9673      NULL,
9674      gen_fcvtxn_sd,
9675  };
9676  TRANS(FCVTN_v, do_2misc_narrow_vector, a, f_vector_fcvtn)
9677  TRANS(FCVTXN_v, do_2misc_narrow_vector, a, f_scalar_fcvtxn)
9678  
9679  static void gen_bfcvtn_hs(TCGv_i64 d, TCGv_i64 n)
9680  {
9681      TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
9682      TCGv_i32 tmp = tcg_temp_new_i32();
9683      gen_helper_bfcvt_pair(tmp, n, fpst);
9684      tcg_gen_extu_i32_i64(d, tmp);
9685  }
9686  
9687  static void gen_bfcvtn_ah_hs(TCGv_i64 d, TCGv_i64 n)
9688  {
9689      TCGv_ptr fpst = fpstatus_ptr(FPST_AH);
9690      TCGv_i32 tmp = tcg_temp_new_i32();
9691      gen_helper_bfcvt_pair(tmp, n, fpst);
9692      tcg_gen_extu_i32_i64(d, tmp);
9693  }
9694  
9695  static ArithOneOp * const f_vector_bfcvtn[2][3] = {
9696      {
9697          NULL,
9698          gen_bfcvtn_hs,
9699          NULL,
9700      }, {
9701          NULL,
9702          gen_bfcvtn_ah_hs,
9703          NULL,
9704      }
9705  };
9706  TRANS_FEAT(BFCVTN_v, aa64_bf16, do_2misc_narrow_vector, a,
9707             f_vector_bfcvtn[s->fpcr_ah])
9708  
9709  static bool trans_SHLL_v(DisasContext *s, arg_qrr_e *a)
9710  {
9711      static NeonGenWidenFn * const widenfns[3] = {
9712          gen_helper_neon_widen_u8,
9713          gen_helper_neon_widen_u16,
9714          tcg_gen_extu_i32_i64,
9715      };
9716      NeonGenWidenFn *widenfn;
9717      TCGv_i64 tcg_res[2];
9718      TCGv_i32 tcg_op;
9719      int part, pass;
9720  
9721      if (a->esz == MO_64) {
9722          return false;
9723      }
9724      if (!fp_access_check(s)) {
9725          return true;
9726      }
9727  
9728      tcg_op = tcg_temp_new_i32();
9729      widenfn = widenfns[a->esz];
9730      part = a->q ? 2 : 0;
9731  
9732      for (pass = 0; pass < 2; pass++) {
9733          read_vec_element_i32(s, tcg_op, a->rn, part + pass, MO_32);
9734          tcg_res[pass] = tcg_temp_new_i64();
9735          widenfn(tcg_res[pass], tcg_op);
9736          tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << a->esz);
9737      }
9738  
9739      for (pass = 0; pass < 2; pass++) {
9740          write_vec_element(s, tcg_res[pass], a->rd, pass, MO_64);
9741      }
9742      return true;
9743  }
9744  
9745  static bool do_fabs_fneg_v(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn)
9746  {
9747      int check = fp_access_check_vector_hsd(s, a->q, a->esz);
9748  
9749      if (check <= 0) {
9750          return check == 0;
9751      }
9752  
9753      gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz);
9754      return true;
9755  }
9756  
9757  TRANS(FABS_v, do_fabs_fneg_v, a, gen_gvec_fabs)
9758  TRANS(FNEG_v, do_fabs_fneg_v, a, gen_gvec_fneg)
9759  
9760  static bool do_fp1_vector(DisasContext *s, arg_qrr_e *a,
9761                            const FPScalar1 *f, int rmode)
9762  {
9763      TCGv_i32 tcg_rmode = NULL;
9764      TCGv_ptr fpst;
9765      int check = fp_access_check_vector_hsd(s, a->q, a->esz);
9766  
9767      if (check <= 0) {
9768          return check == 0;
9769      }
9770  
9771      fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
9772      if (rmode >= 0) {
9773          tcg_rmode = gen_set_rmode(rmode, fpst);
9774      }
9775  
9776      if (a->esz == MO_64) {
9777          TCGv_i64 t64 = tcg_temp_new_i64();
9778  
9779          for (int pass = 0; pass < 2; ++pass) {
9780              read_vec_element(s, t64, a->rn, pass, MO_64);
9781              f->gen_d(t64, t64, fpst);
9782              write_vec_element(s, t64, a->rd, pass, MO_64);
9783          }
9784      } else {
9785          TCGv_i32 t32 = tcg_temp_new_i32();
9786          void (*gen)(TCGv_i32, TCGv_i32, TCGv_ptr)
9787              = (a->esz == MO_16 ? f->gen_h : f->gen_s);
9788  
9789          for (int pass = 0, n = (a->q ? 16 : 8) >> a->esz; pass < n; ++pass) {
9790              read_vec_element_i32(s, t32, a->rn, pass, a->esz);
9791              gen(t32, t32, fpst);
9792              write_vec_element_i32(s, t32, a->rd, pass, a->esz);
9793          }
9794      }
9795      clear_vec_high(s, a->q, a->rd);
9796  
9797      if (rmode >= 0) {
9798          gen_restore_rmode(tcg_rmode, fpst);
9799      }
9800      return true;
9801  }
9802  
9803  TRANS(FSQRT_v, do_fp1_vector, a, &f_scalar_fsqrt, -1)
9804  
9805  TRANS(FRINTN_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_TIEEVEN)
9806  TRANS(FRINTP_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_POSINF)
9807  TRANS(FRINTM_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_NEGINF)
9808  TRANS(FRINTZ_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_ZERO)
9809  TRANS(FRINTA_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_TIEAWAY)
9810  TRANS(FRINTI_v, do_fp1_vector, a, &f_scalar_frint, -1)
9811  TRANS(FRINTX_v, do_fp1_vector, a, &f_scalar_frintx, -1)
9812  
9813  TRANS_FEAT(FRINT32Z_v, aa64_frint, do_fp1_vector, a,
9814             &f_scalar_frint32, FPROUNDING_ZERO)
9815  TRANS_FEAT(FRINT32X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint32, -1)
9816  TRANS_FEAT(FRINT64Z_v, aa64_frint, do_fp1_vector, a,
9817             &f_scalar_frint64, FPROUNDING_ZERO)
9818  TRANS_FEAT(FRINT64X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint64, -1)
9819  
9820  static bool do_gvec_op2_fpst_with_fpsttype(DisasContext *s, MemOp esz,
9821                                             bool is_q, int rd, int rn, int data,
9822                                             gen_helper_gvec_2_ptr * const fns[3],
9823                                             ARMFPStatusFlavour fpsttype)
9824  {
9825      int check = fp_access_check_vector_hsd(s, is_q, esz);
9826      TCGv_ptr fpst;
9827  
9828      if (check <= 0) {
9829          return check == 0;
9830      }
9831  
9832      fpst = fpstatus_ptr(fpsttype);
9833      tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
9834                         vec_full_reg_offset(s, rn), fpst,
9835                         is_q ? 16 : 8, vec_full_reg_size(s),
9836                         data, fns[esz - 1]);
9837      return true;
9838  }
9839  
9840  static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q,
9841                               int rd, int rn, int data,
9842                               gen_helper_gvec_2_ptr * const fns[3])
9843  {
9844      return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data, fns,
9845                                            esz == MO_16 ? FPST_A64_F16 :
9846                                            FPST_A64);
9847  }
9848  
9849  static bool do_gvec_op2_ah_fpst(DisasContext *s, MemOp esz, bool is_q,
9850                                  int rd, int rn, int data,
9851                                  gen_helper_gvec_2_ptr * const fns[3])
9852  {
9853      return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data,
9854                                            fns, select_ah_fpst(s, esz));
9855  }
9856  
9857  static gen_helper_gvec_2_ptr * const f_scvtf_v[] = {
9858      gen_helper_gvec_vcvt_sh,
9859      gen_helper_gvec_vcvt_sf,
9860      gen_helper_gvec_vcvt_sd,
9861  };
9862  TRANS(SCVTF_vi, do_gvec_op2_fpst,
9863        a->esz, a->q, a->rd, a->rn, 0, f_scvtf_v)
9864  TRANS(SCVTF_vf, do_gvec_op2_fpst,
9865        a->esz, a->q, a->rd, a->rn, a->shift, f_scvtf_v)
9866  
9867  static gen_helper_gvec_2_ptr * const f_ucvtf_v[] = {
9868      gen_helper_gvec_vcvt_uh,
9869      gen_helper_gvec_vcvt_uf,
9870      gen_helper_gvec_vcvt_ud,
9871  };
9872  TRANS(UCVTF_vi, do_gvec_op2_fpst,
9873        a->esz, a->q, a->rd, a->rn, 0, f_ucvtf_v)
9874  TRANS(UCVTF_vf, do_gvec_op2_fpst,
9875        a->esz, a->q, a->rd, a->rn, a->shift, f_ucvtf_v)
9876  
9877  static gen_helper_gvec_2_ptr * const f_fcvtzs_vf[] = {
9878      gen_helper_gvec_vcvt_rz_hs,
9879      gen_helper_gvec_vcvt_rz_fs,
9880      gen_helper_gvec_vcvt_rz_ds,
9881  };
9882  TRANS(FCVTZS_vf, do_gvec_op2_fpst,
9883        a->esz, a->q, a->rd, a->rn, a->shift, f_fcvtzs_vf)
9884  
9885  static gen_helper_gvec_2_ptr * const f_fcvtzu_vf[] = {
9886      gen_helper_gvec_vcvt_rz_hu,
9887      gen_helper_gvec_vcvt_rz_fu,
9888      gen_helper_gvec_vcvt_rz_du,
9889  };
9890  TRANS(FCVTZU_vf, do_gvec_op2_fpst,
9891        a->esz, a->q, a->rd, a->rn, a->shift, f_fcvtzu_vf)
9892  
9893  static gen_helper_gvec_2_ptr * const f_fcvt_s_vi[] = {
9894      gen_helper_gvec_vcvt_rm_sh,
9895      gen_helper_gvec_vcvt_rm_ss,
9896      gen_helper_gvec_vcvt_rm_sd,
9897  };
9898  
9899  static gen_helper_gvec_2_ptr * const f_fcvt_u_vi[] = {
9900      gen_helper_gvec_vcvt_rm_uh,
9901      gen_helper_gvec_vcvt_rm_us,
9902      gen_helper_gvec_vcvt_rm_ud,
9903  };
9904  
9905  TRANS(FCVTNS_vi, do_gvec_op2_fpst,
9906        a->esz, a->q, a->rd, a->rn, float_round_nearest_even, f_fcvt_s_vi)
9907  TRANS(FCVTNU_vi, do_gvec_op2_fpst,
9908        a->esz, a->q, a->rd, a->rn, float_round_nearest_even, f_fcvt_u_vi)
9909  TRANS(FCVTPS_vi, do_gvec_op2_fpst,
9910        a->esz, a->q, a->rd, a->rn, float_round_up, f_fcvt_s_vi)
9911  TRANS(FCVTPU_vi, do_gvec_op2_fpst,
9912        a->esz, a->q, a->rd, a->rn, float_round_up, f_fcvt_u_vi)
9913  TRANS(FCVTMS_vi, do_gvec_op2_fpst,
9914        a->esz, a->q, a->rd, a->rn, float_round_down, f_fcvt_s_vi)
9915  TRANS(FCVTMU_vi, do_gvec_op2_fpst,
9916        a->esz, a->q, a->rd, a->rn, float_round_down, f_fcvt_u_vi)
9917  TRANS(FCVTZS_vi, do_gvec_op2_fpst,
9918        a->esz, a->q, a->rd, a->rn, float_round_to_zero, f_fcvt_s_vi)
9919  TRANS(FCVTZU_vi, do_gvec_op2_fpst,
9920        a->esz, a->q, a->rd, a->rn, float_round_to_zero, f_fcvt_u_vi)
9921  TRANS(FCVTAS_vi, do_gvec_op2_fpst,
9922        a->esz, a->q, a->rd, a->rn, float_round_ties_away, f_fcvt_s_vi)
9923  TRANS(FCVTAU_vi, do_gvec_op2_fpst,
9924        a->esz, a->q, a->rd, a->rn, float_round_ties_away, f_fcvt_u_vi)
9925  
9926  static gen_helper_gvec_2_ptr * const f_fceq0[] = {
9927      gen_helper_gvec_fceq0_h,
9928      gen_helper_gvec_fceq0_s,
9929      gen_helper_gvec_fceq0_d,
9930  };
9931  TRANS(FCMEQ0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fceq0)
9932  
9933  static gen_helper_gvec_2_ptr * const f_fcgt0[] = {
9934      gen_helper_gvec_fcgt0_h,
9935      gen_helper_gvec_fcgt0_s,
9936      gen_helper_gvec_fcgt0_d,
9937  };
9938  TRANS(FCMGT0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcgt0)
9939  
9940  static gen_helper_gvec_2_ptr * const f_fcge0[] = {
9941      gen_helper_gvec_fcge0_h,
9942      gen_helper_gvec_fcge0_s,
9943      gen_helper_gvec_fcge0_d,
9944  };
9945  TRANS(FCMGE0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcge0)
9946  
9947  static gen_helper_gvec_2_ptr * const f_fclt0[] = {
9948      gen_helper_gvec_fclt0_h,
9949      gen_helper_gvec_fclt0_s,
9950      gen_helper_gvec_fclt0_d,
9951  };
9952  TRANS(FCMLT0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fclt0)
9953  
9954  static gen_helper_gvec_2_ptr * const f_fcle0[] = {
9955      gen_helper_gvec_fcle0_h,
9956      gen_helper_gvec_fcle0_s,
9957      gen_helper_gvec_fcle0_d,
9958  };
9959  TRANS(FCMLE0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcle0)
9960  
9961  static gen_helper_gvec_2_ptr * const f_frecpe[] = {
9962      gen_helper_gvec_frecpe_h,
9963      gen_helper_gvec_frecpe_s,
9964      gen_helper_gvec_frecpe_d,
9965  };
9966  static gen_helper_gvec_2_ptr * const f_frecpe_rpres[] = {
9967      gen_helper_gvec_frecpe_h,
9968      gen_helper_gvec_frecpe_rpres_s,
9969      gen_helper_gvec_frecpe_d,
9970  };
9971  TRANS(FRECPE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0,
9972        s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frecpe_rpres : f_frecpe)
9973  
9974  static gen_helper_gvec_2_ptr * const f_frsqrte[] = {
9975      gen_helper_gvec_frsqrte_h,
9976      gen_helper_gvec_frsqrte_s,
9977      gen_helper_gvec_frsqrte_d,
9978  };
9979  static gen_helper_gvec_2_ptr * const f_frsqrte_rpres[] = {
9980      gen_helper_gvec_frsqrte_h,
9981      gen_helper_gvec_frsqrte_rpres_s,
9982      gen_helper_gvec_frsqrte_d,
9983  };
9984  TRANS(FRSQRTE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0,
9985        s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frsqrte_rpres : f_frsqrte)
9986  
9987  static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a)
9988  {
9989      /* Handle 2-reg-misc ops which are widening (so each size element
9990       * in the source becomes a 2*size element in the destination.
9991       * The only instruction like this is FCVTL.
9992       */
9993      int pass;
9994      TCGv_ptr fpst;
9995  
9996      if (!fp_access_check(s)) {
9997          return true;
9998      }
9999  
10000      if (a->esz == MO_64) {
10001          /* 32 -> 64 bit fp conversion */
10002          TCGv_i64 tcg_res[2];
10003          TCGv_i32 tcg_op = tcg_temp_new_i32();
10004          int srcelt = a->q ? 2 : 0;
10005  
10006          fpst = fpstatus_ptr(FPST_A64);
10007  
10008          for (pass = 0; pass < 2; pass++) {
10009              tcg_res[pass] = tcg_temp_new_i64();
10010              read_vec_element_i32(s, tcg_op, a->rn, srcelt + pass, MO_32);
10011              gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, fpst);
10012          }
10013          for (pass = 0; pass < 2; pass++) {
10014              write_vec_element(s, tcg_res[pass], a->rd, pass, MO_64);
10015          }
10016      } else {
10017          /* 16 -> 32 bit fp conversion */
10018          int srcelt = a->q ? 4 : 0;
10019          TCGv_i32 tcg_res[4];
10020          TCGv_i32 ahp = get_ahp_flag();
10021  
10022          fpst = fpstatus_ptr(FPST_A64_F16);
10023  
10024          for (pass = 0; pass < 4; pass++) {
10025              tcg_res[pass] = tcg_temp_new_i32();
10026              read_vec_element_i32(s, tcg_res[pass], a->rn, srcelt + pass, MO_16);
10027              gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
10028                                             fpst, ahp);
10029          }
10030          for (pass = 0; pass < 4; pass++) {
10031              write_vec_element_i32(s, tcg_res[pass], a->rd, pass, MO_32);
10032          }
10033      }
10034      clear_vec_high(s, true, a->rd);
10035      return true;
10036  }
10037  
10038  static bool trans_OK(DisasContext *s, arg_OK *a)
10039  {
10040      return true;
10041  }
10042  
10043  static bool trans_FAIL(DisasContext *s, arg_OK *a)
10044  {
10045      s->is_nonstreaming = true;
10046      return true;
10047  }
10048  
10049  /**
10050   * btype_destination_ok:
10051   * @insn: The instruction at the branch destination
10052   * @bt: SCTLR_ELx.BT
10053   * @btype: PSTATE.BTYPE, and is non-zero
10054   *
10055   * On a guarded page, there are a limited number of insns
10056   * that may be present at the branch target:
10057   *   - branch target identifiers,
10058   *   - paciasp, pacibsp,
10059   *   - BRK insn
10060   *   - HLT insn
10061   * Anything else causes a Branch Target Exception.
10062   *
10063   * Return true if the branch is compatible, false to raise BTITRAP.
10064   */
10065  static bool btype_destination_ok(uint32_t insn, bool bt, int btype)
10066  {
10067      if ((insn & 0xfffff01fu) == 0xd503201fu) {
10068          /* HINT space */
10069          switch (extract32(insn, 5, 7)) {
10070          case 0b011001: /* PACIASP */
10071          case 0b011011: /* PACIBSP */
10072              /*
10073               * If SCTLR_ELx.BT, then PACI*SP are not compatible
10074               * with btype == 3.  Otherwise all btype are ok.
10075               */
10076              return !bt || btype != 3;
10077          case 0b100000: /* BTI */
10078              /* Not compatible with any btype.  */
10079              return false;
10080          case 0b100010: /* BTI c */
10081              /* Not compatible with btype == 3 */
10082              return btype != 3;
10083          case 0b100100: /* BTI j */
10084              /* Not compatible with btype == 2 */
10085              return btype != 2;
10086          case 0b100110: /* BTI jc */
10087              /* Compatible with any btype.  */
10088              return true;
10089          }
10090      } else {
10091          switch (insn & 0xffe0001fu) {
10092          case 0xd4200000u: /* BRK */
10093          case 0xd4400000u: /* HLT */
10094              /* Give priority to the breakpoint exception.  */
10095              return true;
10096          }
10097      }
10098      return false;
10099  }
10100  
10101  static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
10102                                            CPUState *cpu)
10103  {
10104      DisasContext *dc = container_of(dcbase, DisasContext, base);
10105      CPUARMState *env = cpu_env(cpu);
10106      ARMCPU *arm_cpu = env_archcpu(env);
10107      CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
10108      int bound, core_mmu_idx;
10109  
10110      dc->isar = &arm_cpu->isar;
10111      dc->condjmp = 0;
10112      dc->pc_save = dc->base.pc_first;
10113      dc->aarch64 = true;
10114      dc->thumb = false;
10115      dc->sctlr_b = 0;
10116      dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
10117      dc->condexec_mask = 0;
10118      dc->condexec_cond = 0;
10119      core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
10120      dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx);
10121      dc->tbii = EX_TBFLAG_A64(tb_flags, TBII);
10122      dc->tbid = EX_TBFLAG_A64(tb_flags, TBID);
10123      dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA);
10124      dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
10125  #if !defined(CONFIG_USER_ONLY)
10126      dc->user = (dc->current_el == 0);
10127  #endif
10128      dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
10129      dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
10130      dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
10131      dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE);
10132      dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC);
10133      dc->trap_eret = EX_TBFLAG_A64(tb_flags, TRAP_ERET);
10134      dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL);
10135      dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL);
10136      dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16;
10137      dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16;
10138      dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE);
10139      dc->bt = EX_TBFLAG_A64(tb_flags, BT);
10140      dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE);
10141      dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV);
10142      dc->ata[0] = EX_TBFLAG_A64(tb_flags, ATA);
10143      dc->ata[1] = EX_TBFLAG_A64(tb_flags, ATA0);
10144      dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE);
10145      dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE);
10146      dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM);
10147      dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA);
10148      dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING);
10149      dc->naa = EX_TBFLAG_A64(tb_flags, NAA);
10150      dc->nv = EX_TBFLAG_A64(tb_flags, NV);
10151      dc->nv1 = EX_TBFLAG_A64(tb_flags, NV1);
10152      dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2);
10153      dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20);
10154      dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE);
10155      dc->fpcr_ah = EX_TBFLAG_A64(tb_flags, AH);
10156      dc->fpcr_nep = EX_TBFLAG_A64(tb_flags, NEP);
10157      dc->vec_len = 0;
10158      dc->vec_stride = 0;
10159      dc->cp_regs = arm_cpu->cp_regs;
10160      dc->features = env->features;
10161      dc->dcz_blocksize = arm_cpu->dcz_blocksize;
10162      dc->gm_blocksize = arm_cpu->gm_blocksize;
10163  
10164  #ifdef CONFIG_USER_ONLY
10165      /* In sve_probe_page, we assume TBI is enabled. */
10166      tcg_debug_assert(dc->tbid & 1);
10167  #endif
10168  
10169      dc->lse2 = dc_isar_feature(aa64_lse2, dc);
10170  
10171      /* Single step state. The code-generation logic here is:
10172       *  SS_ACTIVE == 0:
10173       *   generate code with no special handling for single-stepping (except
10174       *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
10175       *   this happens anyway because those changes are all system register or
10176       *   PSTATE writes).
10177       *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
10178       *   emit code for one insn
10179       *   emit code to clear PSTATE.SS
10180       *   emit code to generate software step exception for completed step
10181       *   end TB (as usual for having generated an exception)
10182       *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
10183       *   emit code to generate a software step exception
10184       *   end the TB
10185       */
10186      dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
10187      dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
10188      dc->is_ldex = false;
10189  
10190      /* Bound the number of insns to execute to those left on the page.  */
10191      bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
10192  
10193      /* If architectural single step active, limit to 1.  */
10194      if (dc->ss_active) {
10195          bound = 1;
10196      }
10197      dc->base.max_insns = MIN(dc->base.max_insns, bound);
10198  }
10199  
10200  static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
10201  {
10202  }
10203  
10204  static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
10205  {
10206      DisasContext *dc = container_of(dcbase, DisasContext, base);
10207      target_ulong pc_arg = dc->base.pc_next;
10208  
10209      if (tb_cflags(dcbase->tb) & CF_PCREL) {
10210          pc_arg &= ~TARGET_PAGE_MASK;
10211      }
10212      tcg_gen_insn_start(pc_arg, 0, 0);
10213      dc->insn_start_updated = false;
10214  }
10215  
10216  static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
10217  {
10218      DisasContext *s = container_of(dcbase, DisasContext, base);
10219      CPUARMState *env = cpu_env(cpu);
10220      uint64_t pc = s->base.pc_next;
10221      uint32_t insn;
10222  
10223      /* Singlestep exceptions have the highest priority. */
10224      if (s->ss_active && !s->pstate_ss) {
10225          /* Singlestep state is Active-pending.
10226           * If we're in this state at the start of a TB then either
10227           *  a) we just took an exception to an EL which is being debugged
10228           *     and this is the first insn in the exception handler
10229           *  b) debug exceptions were masked and we just unmasked them
10230           *     without changing EL (eg by clearing PSTATE.D)
10231           * In either case we're going to take a swstep exception in the
10232           * "did not step an insn" case, and so the syndrome ISV and EX
10233           * bits should be zero.
10234           */
10235          assert(s->base.num_insns == 1);
10236          gen_swstep_exception(s, 0, 0);
10237          s->base.is_jmp = DISAS_NORETURN;
10238          s->base.pc_next = pc + 4;
10239          return;
10240      }
10241  
10242      if (pc & 3) {
10243          /*
10244           * PC alignment fault.  This has priority over the instruction abort
10245           * that we would receive from a translation fault via arm_ldl_code.
10246           * This should only be possible after an indirect branch, at the
10247           * start of the TB.
10248           */
10249          assert(s->base.num_insns == 1);
10250          gen_helper_exception_pc_alignment(tcg_env, tcg_constant_tl(pc));
10251          s->base.is_jmp = DISAS_NORETURN;
10252          s->base.pc_next = QEMU_ALIGN_UP(pc, 4);
10253          return;
10254      }
10255  
10256      s->pc_curr = pc;
10257      insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b);
10258      s->insn = insn;
10259      s->base.pc_next = pc + 4;
10260  
10261      s->fp_access_checked = 0;
10262      s->sve_access_checked = 0;
10263  
10264      if (s->pstate_il) {
10265          /*
10266           * Illegal execution state. This has priority over BTI
10267           * exceptions, but comes after instruction abort exceptions.
10268           */
10269          gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
10270          return;
10271      }
10272  
10273      if (dc_isar_feature(aa64_bti, s)) {
10274          if (s->base.num_insns == 1) {
10275              /* First insn can have btype set to non-zero.  */
10276              tcg_debug_assert(s->btype >= 0);
10277  
10278              /*
10279               * Note that the Branch Target Exception has fairly high
10280               * priority -- below debugging exceptions but above most
10281               * everything else.  This allows us to handle this now
10282               * instead of waiting until the insn is otherwise decoded.
10283               *
10284               * We can check all but the guarded page check here;
10285               * defer the latter to a helper.
10286               */
10287              if (s->btype != 0
10288                  && !btype_destination_ok(insn, s->bt, s->btype)) {
10289                  gen_helper_guarded_page_check(tcg_env);
10290              }
10291          } else {
10292              /* Not the first insn: btype must be 0.  */
10293              tcg_debug_assert(s->btype == 0);
10294          }
10295      }
10296  
10297      s->is_nonstreaming = false;
10298      if (s->sme_trap_nonstreaming) {
10299          disas_sme_fa64(s, insn);
10300      }
10301  
10302      if (!disas_a64(s, insn) &&
10303          !disas_sme(s, insn) &&
10304          !disas_sve(s, insn)) {
10305          unallocated_encoding(s);
10306      }
10307  
10308      /*
10309       * After execution of most insns, btype is reset to 0.
10310       * Note that we set btype == -1 when the insn sets btype.
10311       */
10312      if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) {
10313          reset_btype(s);
10314      }
10315  }
10316  
10317  static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
10318  {
10319      DisasContext *dc = container_of(dcbase, DisasContext, base);
10320  
10321      if (unlikely(dc->ss_active)) {
10322          /* Note that this means single stepping WFI doesn't halt the CPU.
10323           * For conditional branch insns this is harmless unreachable code as
10324           * gen_goto_tb() has already handled emitting the debug exception
10325           * (and thus a tb-jump is not possible when singlestepping).
10326           */
10327          switch (dc->base.is_jmp) {
10328          default:
10329              gen_a64_update_pc(dc, 4);
10330              /* fall through */
10331          case DISAS_EXIT:
10332          case DISAS_JUMP:
10333              gen_step_complete_exception(dc);
10334              break;
10335          case DISAS_NORETURN:
10336              break;
10337          }
10338      } else {
10339          switch (dc->base.is_jmp) {
10340          case DISAS_NEXT:
10341          case DISAS_TOO_MANY:
10342              gen_goto_tb(dc, 1, 4);
10343              break;
10344          default:
10345          case DISAS_UPDATE_EXIT:
10346              gen_a64_update_pc(dc, 4);
10347              /* fall through */
10348          case DISAS_EXIT:
10349              tcg_gen_exit_tb(NULL, 0);
10350              break;
10351          case DISAS_UPDATE_NOCHAIN:
10352              gen_a64_update_pc(dc, 4);
10353              /* fall through */
10354          case DISAS_JUMP:
10355              tcg_gen_lookup_and_goto_ptr();
10356              break;
10357          case DISAS_NORETURN:
10358          case DISAS_SWI:
10359              break;
10360          case DISAS_WFE:
10361              gen_a64_update_pc(dc, 4);
10362              gen_helper_wfe(tcg_env);
10363              break;
10364          case DISAS_YIELD:
10365              gen_a64_update_pc(dc, 4);
10366              gen_helper_yield(tcg_env);
10367              break;
10368          case DISAS_WFI:
10369              /*
10370               * This is a special case because we don't want to just halt
10371               * the CPU if trying to debug across a WFI.
10372               */
10373              gen_a64_update_pc(dc, 4);
10374              gen_helper_wfi(tcg_env, tcg_constant_i32(4));
10375              /*
10376               * The helper doesn't necessarily throw an exception, but we
10377               * must go back to the main loop to check for interrupts anyway.
10378               */
10379              tcg_gen_exit_tb(NULL, 0);
10380              break;
10381          }
10382      }
10383  }
10384  
10385  const TranslatorOps aarch64_translator_ops = {
10386      .init_disas_context = aarch64_tr_init_disas_context,
10387      .tb_start           = aarch64_tr_tb_start,
10388      .insn_start         = aarch64_tr_insn_start,
10389      .translate_insn     = aarch64_tr_translate_insn,
10390      .tb_stop            = aarch64_tr_tb_stop,
10391  };
10392