xref: /qemu/target/arm/tcg/translate-a64.c (revision 7cef6d686309e2792186504ae17cf4f3eb57ef68)
1 /*
2  *  AArch64 translation
3  *
4  *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 #include "exec/target_page.h"
21 #include "translate.h"
22 #include "translate-a64.h"
23 #include "qemu/log.h"
24 #include "arm_ldst.h"
25 #include "semihosting/semihost.h"
26 #include "cpregs.h"
27 
28 static TCGv_i64 cpu_X[32];
29 static TCGv_i64 cpu_pc;
30 
31 /* Load/store exclusive handling */
32 static TCGv_i64 cpu_exclusive_high;
33 
34 static const char *regnames[] = {
35     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
36     "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
37     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
38     "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
39 };
40 
41 enum a64_shift_type {
42     A64_SHIFT_TYPE_LSL = 0,
43     A64_SHIFT_TYPE_LSR = 1,
44     A64_SHIFT_TYPE_ASR = 2,
45     A64_SHIFT_TYPE_ROR = 3
46 };
47 
48 /*
49  * Helpers for extracting complex instruction fields
50  */
51 
52 /*
53  * For load/store with an unsigned 12 bit immediate scaled by the element
54  * size. The input has the immediate field in bits [14:3] and the element
55  * size in [2:0].
56  */
uimm_scaled(DisasContext * s,int x)57 static int uimm_scaled(DisasContext *s, int x)
58 {
59     unsigned imm = x >> 3;
60     unsigned scale = extract32(x, 0, 3);
61     return imm << scale;
62 }
63 
64 /* For load/store memory tags: scale offset by LOG2_TAG_GRANULE */
scale_by_log2_tag_granule(DisasContext * s,int x)65 static int scale_by_log2_tag_granule(DisasContext *s, int x)
66 {
67     return x << LOG2_TAG_GRANULE;
68 }
69 
70 /*
71  * Include the generated decoders.
72  */
73 
74 #include "decode-sme-fa64.c.inc"
75 #include "decode-a64.c.inc"
76 
77 /* initialize TCG globals.  */
a64_translate_init(void)78 void a64_translate_init(void)
79 {
80     int i;
81 
82     cpu_pc = tcg_global_mem_new_i64(tcg_env,
83                                     offsetof(CPUARMState, pc),
84                                     "pc");
85     for (i = 0; i < 32; i++) {
86         cpu_X[i] = tcg_global_mem_new_i64(tcg_env,
87                                           offsetof(CPUARMState, xregs[i]),
88                                           regnames[i]);
89     }
90 
91     cpu_exclusive_high = tcg_global_mem_new_i64(tcg_env,
92         offsetof(CPUARMState, exclusive_high), "exclusive_high");
93 }
94 
95 /*
96  * Return the core mmu_idx to use for A64 load/store insns which
97  * have a "unprivileged load/store" variant. Those insns access
98  * EL0 if executed from an EL which has control over EL0 (usually
99  * EL1) but behave like normal loads and stores if executed from
100  * elsewhere (eg EL3).
101  *
102  * @unpriv : true for the unprivileged encoding; false for the
103  *           normal encoding (in which case we will return the same
104  *           thing as get_mem_index().
105  */
get_a64_user_mem_index(DisasContext * s,bool unpriv)106 static int get_a64_user_mem_index(DisasContext *s, bool unpriv)
107 {
108     /*
109      * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL,
110      * which is the usual mmu_idx for this cpu state.
111      */
112     ARMMMUIdx useridx = s->mmu_idx;
113 
114     if (unpriv && s->unpriv) {
115         /*
116          * We have pre-computed the condition for AccType_UNPRIV.
117          * Therefore we should never get here with a mmu_idx for
118          * which we do not know the corresponding user mmu_idx.
119          */
120         switch (useridx) {
121         case ARMMMUIdx_E10_1:
122         case ARMMMUIdx_E10_1_PAN:
123             useridx = ARMMMUIdx_E10_0;
124             break;
125         case ARMMMUIdx_E20_2:
126         case ARMMMUIdx_E20_2_PAN:
127             useridx = ARMMMUIdx_E20_0;
128             break;
129         default:
130             g_assert_not_reached();
131         }
132     }
133     return arm_to_core_mmu_idx(useridx);
134 }
135 
set_btype_raw(int val)136 static void set_btype_raw(int val)
137 {
138     tcg_gen_st_i32(tcg_constant_i32(val), tcg_env,
139                    offsetof(CPUARMState, btype));
140 }
141 
set_btype(DisasContext * s,int val)142 static void set_btype(DisasContext *s, int val)
143 {
144     /* BTYPE is a 2-bit field, and 0 should be done with reset_btype.  */
145     tcg_debug_assert(val >= 1 && val <= 3);
146     set_btype_raw(val);
147     s->btype = -1;
148 }
149 
reset_btype(DisasContext * s)150 static void reset_btype(DisasContext *s)
151 {
152     if (s->btype != 0) {
153         set_btype_raw(0);
154         s->btype = 0;
155     }
156 }
157 
gen_pc_plus_diff(DisasContext * s,TCGv_i64 dest,target_long diff)158 static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff)
159 {
160     assert(s->pc_save != -1);
161     if (tb_cflags(s->base.tb) & CF_PCREL) {
162         tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff);
163     } else {
164         tcg_gen_movi_i64(dest, s->pc_curr + diff);
165     }
166 }
167 
gen_a64_update_pc(DisasContext * s,target_long diff)168 void gen_a64_update_pc(DisasContext *s, target_long diff)
169 {
170     gen_pc_plus_diff(s, cpu_pc, diff);
171     s->pc_save = s->pc_curr + diff;
172 }
173 
174 /*
175  * Handle Top Byte Ignore (TBI) bits.
176  *
177  * If address tagging is enabled via the TCR TBI bits:
178  *  + for EL2 and EL3 there is only one TBI bit, and if it is set
179  *    then the address is zero-extended, clearing bits [63:56]
180  *  + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
181  *    and TBI1 controls addresses with bit 55 == 1.
182  *    If the appropriate TBI bit is set for the address then
183  *    the address is sign-extended from bit 55 into bits [63:56]
184  *
185  * Here We have concatenated TBI{1,0} into tbi.
186  */
gen_top_byte_ignore(DisasContext * s,TCGv_i64 dst,TCGv_i64 src,int tbi)187 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst,
188                                 TCGv_i64 src, int tbi)
189 {
190     if (tbi == 0) {
191         /* Load unmodified address */
192         tcg_gen_mov_i64(dst, src);
193     } else if (!regime_has_2_ranges(s->mmu_idx)) {
194         /* Force tag byte to all zero */
195         tcg_gen_extract_i64(dst, src, 0, 56);
196     } else {
197         /* Sign-extend from bit 55.  */
198         tcg_gen_sextract_i64(dst, src, 0, 56);
199 
200         switch (tbi) {
201         case 1:
202             /* tbi0 but !tbi1: only use the extension if positive */
203             tcg_gen_and_i64(dst, dst, src);
204             break;
205         case 2:
206             /* !tbi0 but tbi1: only use the extension if negative */
207             tcg_gen_or_i64(dst, dst, src);
208             break;
209         case 3:
210             /* tbi0 and tbi1: always use the extension */
211             break;
212         default:
213             g_assert_not_reached();
214         }
215     }
216 }
217 
gen_a64_set_pc(DisasContext * s,TCGv_i64 src)218 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
219 {
220     /*
221      * If address tagging is enabled for instructions via the TCR TBI bits,
222      * then loading an address into the PC will clear out any tag.
223      */
224     gen_top_byte_ignore(s, cpu_pc, src, s->tbii);
225     s->pc_save = -1;
226 }
227 
228 /*
229  * Handle MTE and/or TBI.
230  *
231  * For TBI, ideally, we would do nothing.  Proper behaviour on fault is
232  * for the tag to be present in the FAR_ELx register.  But for user-only
233  * mode we do not have a TLB with which to implement this, so we must
234  * remove the top byte now.
235  *
236  * Always return a fresh temporary that we can increment independently
237  * of the write-back address.
238  */
239 
clean_data_tbi(DisasContext * s,TCGv_i64 addr)240 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
241 {
242     TCGv_i64 clean = tcg_temp_new_i64();
243 #ifdef CONFIG_USER_ONLY
244     gen_top_byte_ignore(s, clean, addr, s->tbid);
245 #else
246     tcg_gen_mov_i64(clean, addr);
247 #endif
248     return clean;
249 }
250 
251 /* Insert a zero tag into src, with the result at dst. */
gen_address_with_allocation_tag0(TCGv_i64 dst,TCGv_i64 src)252 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src)
253 {
254     tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4));
255 }
256 
gen_probe_access(DisasContext * s,TCGv_i64 ptr,MMUAccessType acc,int log2_size)257 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr,
258                              MMUAccessType acc, int log2_size)
259 {
260     gen_helper_probe_access(tcg_env, ptr,
261                             tcg_constant_i32(acc),
262                             tcg_constant_i32(get_mem_index(s)),
263                             tcg_constant_i32(1 << log2_size));
264 }
265 
266 /*
267  * For MTE, check a single logical or atomic access.  This probes a single
268  * address, the exact one specified.  The size and alignment of the access
269  * is not relevant to MTE, per se, but watchpoints do require the size,
270  * and we want to recognize those before making any other changes to state.
271  */
gen_mte_check1_mmuidx(DisasContext * s,TCGv_i64 addr,bool is_write,bool tag_checked,MemOp memop,bool is_unpriv,int core_idx)272 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr,
273                                       bool is_write, bool tag_checked,
274                                       MemOp memop, bool is_unpriv,
275                                       int core_idx)
276 {
277     if (tag_checked && s->mte_active[is_unpriv]) {
278         TCGv_i64 ret;
279         int desc = 0;
280 
281         desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx);
282         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
283         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
284         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
285         desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(memop));
286         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, memop_size(memop) - 1);
287 
288         ret = tcg_temp_new_i64();
289         gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr);
290 
291         return ret;
292     }
293     return clean_data_tbi(s, addr);
294 }
295 
gen_mte_check1(DisasContext * s,TCGv_i64 addr,bool is_write,bool tag_checked,MemOp memop)296 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
297                         bool tag_checked, MemOp memop)
298 {
299     return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, memop,
300                                  false, get_mem_index(s));
301 }
302 
303 /*
304  * For MTE, check multiple logical sequential accesses.
305  */
gen_mte_checkN(DisasContext * s,TCGv_i64 addr,bool is_write,bool tag_checked,int total_size,MemOp single_mop)306 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
307                         bool tag_checked, int total_size, MemOp single_mop)
308 {
309     if (tag_checked && s->mte_active[0]) {
310         TCGv_i64 ret;
311         int desc = 0;
312 
313         desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
314         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
315         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
316         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
317         desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(single_mop));
318         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, total_size - 1);
319 
320         ret = tcg_temp_new_i64();
321         gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr);
322 
323         return ret;
324     }
325     return clean_data_tbi(s, addr);
326 }
327 
328 /*
329  * Generate the special alignment check that applies to AccType_ATOMIC
330  * and AccType_ORDERED insns under FEAT_LSE2: the access need not be
331  * naturally aligned, but it must not cross a 16-byte boundary.
332  * See AArch64.CheckAlignment().
333  */
check_lse2_align(DisasContext * s,int rn,int imm,bool is_write,MemOp mop)334 static void check_lse2_align(DisasContext *s, int rn, int imm,
335                              bool is_write, MemOp mop)
336 {
337     TCGv_i32 tmp;
338     TCGv_i64 addr;
339     TCGLabel *over_label;
340     MMUAccessType type;
341     int mmu_idx;
342 
343     tmp = tcg_temp_new_i32();
344     tcg_gen_extrl_i64_i32(tmp, cpu_reg_sp(s, rn));
345     tcg_gen_addi_i32(tmp, tmp, imm & 15);
346     tcg_gen_andi_i32(tmp, tmp, 15);
347     tcg_gen_addi_i32(tmp, tmp, memop_size(mop));
348 
349     over_label = gen_new_label();
350     tcg_gen_brcondi_i32(TCG_COND_LEU, tmp, 16, over_label);
351 
352     addr = tcg_temp_new_i64();
353     tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm);
354 
355     type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD,
356     mmu_idx = get_mem_index(s);
357     gen_helper_unaligned_access(tcg_env, addr, tcg_constant_i32(type),
358                                 tcg_constant_i32(mmu_idx));
359 
360     gen_set_label(over_label);
361 
362 }
363 
364 /* Handle the alignment check for AccType_ATOMIC instructions. */
check_atomic_align(DisasContext * s,int rn,MemOp mop)365 static MemOp check_atomic_align(DisasContext *s, int rn, MemOp mop)
366 {
367     MemOp size = mop & MO_SIZE;
368 
369     if (size == MO_8) {
370         return mop;
371     }
372 
373     /*
374      * If size == MO_128, this is a LDXP, and the operation is single-copy
375      * atomic for each doubleword, not the entire quadword; it still must
376      * be quadword aligned.
377      */
378     if (size == MO_128) {
379         return finalize_memop_atom(s, MO_128 | MO_ALIGN,
380                                    MO_ATOM_IFALIGN_PAIR);
381     }
382     if (dc_isar_feature(aa64_lse2, s)) {
383         check_lse2_align(s, rn, 0, true, mop);
384     } else {
385         mop |= MO_ALIGN;
386     }
387     return finalize_memop(s, mop);
388 }
389 
390 /* Handle the alignment check for AccType_ORDERED instructions. */
check_ordered_align(DisasContext * s,int rn,int imm,bool is_write,MemOp mop)391 static MemOp check_ordered_align(DisasContext *s, int rn, int imm,
392                                  bool is_write, MemOp mop)
393 {
394     MemOp size = mop & MO_SIZE;
395 
396     if (size == MO_8) {
397         return mop;
398     }
399     if (size == MO_128) {
400         return finalize_memop_atom(s, MO_128 | MO_ALIGN,
401                                    MO_ATOM_IFALIGN_PAIR);
402     }
403     if (!dc_isar_feature(aa64_lse2, s)) {
404         mop |= MO_ALIGN;
405     } else if (!s->naa) {
406         check_lse2_align(s, rn, imm, is_write, mop);
407     }
408     return finalize_memop(s, mop);
409 }
410 
411 typedef struct DisasCompare64 {
412     TCGCond cond;
413     TCGv_i64 value;
414 } DisasCompare64;
415 
a64_test_cc(DisasCompare64 * c64,int cc)416 static void a64_test_cc(DisasCompare64 *c64, int cc)
417 {
418     DisasCompare c32;
419 
420     arm_test_cc(&c32, cc);
421 
422     /*
423      * Sign-extend the 32-bit value so that the GE/LT comparisons work
424      * properly.  The NE/EQ comparisons are also fine with this choice.
425       */
426     c64->cond = c32.cond;
427     c64->value = tcg_temp_new_i64();
428     tcg_gen_ext_i32_i64(c64->value, c32.value);
429 }
430 
gen_rebuild_hflags(DisasContext * s)431 static void gen_rebuild_hflags(DisasContext *s)
432 {
433     gen_helper_rebuild_hflags_a64(tcg_env, tcg_constant_i32(s->current_el));
434 }
435 
gen_exception_internal(int excp)436 static void gen_exception_internal(int excp)
437 {
438     assert(excp_is_internal(excp));
439     gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp));
440 }
441 
gen_exception_internal_insn(DisasContext * s,int excp)442 static void gen_exception_internal_insn(DisasContext *s, int excp)
443 {
444     gen_a64_update_pc(s, 0);
445     gen_exception_internal(excp);
446     s->base.is_jmp = DISAS_NORETURN;
447 }
448 
gen_exception_bkpt_insn(DisasContext * s,uint32_t syndrome)449 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome)
450 {
451     gen_a64_update_pc(s, 0);
452     gen_helper_exception_bkpt_insn(tcg_env, tcg_constant_i32(syndrome));
453     s->base.is_jmp = DISAS_NORETURN;
454 }
455 
gen_step_complete_exception(DisasContext * s)456 static void gen_step_complete_exception(DisasContext *s)
457 {
458     /* We just completed step of an insn. Move from Active-not-pending
459      * to Active-pending, and then also take the swstep exception.
460      * This corresponds to making the (IMPDEF) choice to prioritize
461      * swstep exceptions over asynchronous exceptions taken to an exception
462      * level where debug is disabled. This choice has the advantage that
463      * we do not need to maintain internal state corresponding to the
464      * ISV/EX syndrome bits between completion of the step and generation
465      * of the exception, and our syndrome information is always correct.
466      */
467     gen_ss_advance(s);
468     gen_swstep_exception(s, 1, s->is_ldex);
469     s->base.is_jmp = DISAS_NORETURN;
470 }
471 
use_goto_tb(DisasContext * s,uint64_t dest)472 static inline bool use_goto_tb(DisasContext *s, uint64_t dest)
473 {
474     if (s->ss_active) {
475         return false;
476     }
477     return translator_use_goto_tb(&s->base, dest);
478 }
479 
gen_goto_tb(DisasContext * s,int n,int64_t diff)480 static void gen_goto_tb(DisasContext *s, int n, int64_t diff)
481 {
482     if (use_goto_tb(s, s->pc_curr + diff)) {
483         /*
484          * For pcrel, the pc must always be up-to-date on entry to
485          * the linked TB, so that it can use simple additions for all
486          * further adjustments.  For !pcrel, the linked TB is compiled
487          * to know its full virtual address, so we can delay the
488          * update to pc to the unlinked path.  A long chain of links
489          * can thus avoid many updates to the PC.
490          */
491         if (tb_cflags(s->base.tb) & CF_PCREL) {
492             gen_a64_update_pc(s, diff);
493             tcg_gen_goto_tb(n);
494         } else {
495             tcg_gen_goto_tb(n);
496             gen_a64_update_pc(s, diff);
497         }
498         tcg_gen_exit_tb(s->base.tb, n);
499         s->base.is_jmp = DISAS_NORETURN;
500     } else {
501         gen_a64_update_pc(s, diff);
502         if (s->ss_active) {
503             gen_step_complete_exception(s);
504         } else {
505             tcg_gen_lookup_and_goto_ptr();
506             s->base.is_jmp = DISAS_NORETURN;
507         }
508     }
509 }
510 
511 /*
512  * Register access functions
513  *
514  * These functions are used for directly accessing a register in where
515  * changes to the final register value are likely to be made. If you
516  * need to use a register for temporary calculation (e.g. index type
517  * operations) use the read_* form.
518  *
519  * B1.2.1 Register mappings
520  *
521  * In instruction register encoding 31 can refer to ZR (zero register) or
522  * the SP (stack pointer) depending on context. In QEMU's case we map SP
523  * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
524  * This is the point of the _sp forms.
525  */
cpu_reg(DisasContext * s,int reg)526 TCGv_i64 cpu_reg(DisasContext *s, int reg)
527 {
528     if (reg == 31) {
529         TCGv_i64 t = tcg_temp_new_i64();
530         tcg_gen_movi_i64(t, 0);
531         return t;
532     } else {
533         return cpu_X[reg];
534     }
535 }
536 
537 /* register access for when 31 == SP */
cpu_reg_sp(DisasContext * s,int reg)538 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
539 {
540     return cpu_X[reg];
541 }
542 
543 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
544  * representing the register contents. This TCGv is an auto-freed
545  * temporary so it need not be explicitly freed, and may be modified.
546  */
read_cpu_reg(DisasContext * s,int reg,int sf)547 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
548 {
549     TCGv_i64 v = tcg_temp_new_i64();
550     if (reg != 31) {
551         if (sf) {
552             tcg_gen_mov_i64(v, cpu_X[reg]);
553         } else {
554             tcg_gen_ext32u_i64(v, cpu_X[reg]);
555         }
556     } else {
557         tcg_gen_movi_i64(v, 0);
558     }
559     return v;
560 }
561 
read_cpu_reg_sp(DisasContext * s,int reg,int sf)562 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
563 {
564     TCGv_i64 v = tcg_temp_new_i64();
565     if (sf) {
566         tcg_gen_mov_i64(v, cpu_X[reg]);
567     } else {
568         tcg_gen_ext32u_i64(v, cpu_X[reg]);
569     }
570     return v;
571 }
572 
573 /* Return the offset into CPUARMState of a slice (from
574  * the least significant end) of FP register Qn (ie
575  * Dn, Sn, Hn or Bn).
576  * (Note that this is not the same mapping as for A32; see cpu.h)
577  */
fp_reg_offset(DisasContext * s,int regno,MemOp size)578 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size)
579 {
580     return vec_reg_offset(s, regno, 0, size);
581 }
582 
583 /* Offset of the high half of the 128 bit vector Qn */
fp_reg_hi_offset(DisasContext * s,int regno)584 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
585 {
586     return vec_reg_offset(s, regno, 1, MO_64);
587 }
588 
589 /* Convenience accessors for reading and writing single and double
590  * FP registers. Writing clears the upper parts of the associated
591  * 128 bit vector register, as required by the architecture.
592  * Note that unlike the GP register accessors, the values returned
593  * by the read functions must be manually freed.
594  */
read_fp_dreg(DisasContext * s,int reg)595 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
596 {
597     TCGv_i64 v = tcg_temp_new_i64();
598 
599     tcg_gen_ld_i64(v, tcg_env, fp_reg_offset(s, reg, MO_64));
600     return v;
601 }
602 
read_fp_sreg(DisasContext * s,int reg)603 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
604 {
605     TCGv_i32 v = tcg_temp_new_i32();
606 
607     tcg_gen_ld_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32));
608     return v;
609 }
610 
read_fp_hreg(DisasContext * s,int reg)611 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
612 {
613     TCGv_i32 v = tcg_temp_new_i32();
614 
615     tcg_gen_ld16u_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16));
616     return v;
617 }
618 
clear_vec(DisasContext * s,int rd)619 static void clear_vec(DisasContext *s, int rd)
620 {
621     unsigned ofs = fp_reg_offset(s, rd, MO_64);
622     unsigned vsz = vec_full_reg_size(s);
623 
624     tcg_gen_gvec_dup_imm(MO_64, ofs, vsz, vsz, 0);
625 }
626 
627 /*
628  * Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
629  * If SVE is not enabled, then there are only 128 bits in the vector.
630  */
clear_vec_high(DisasContext * s,bool is_q,int rd)631 static void clear_vec_high(DisasContext *s, bool is_q, int rd)
632 {
633     unsigned ofs = fp_reg_offset(s, rd, MO_64);
634     unsigned vsz = vec_full_reg_size(s);
635 
636     /* Nop move, with side effect of clearing the tail. */
637     tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz);
638 }
639 
write_fp_dreg(DisasContext * s,int reg,TCGv_i64 v)640 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
641 {
642     unsigned ofs = fp_reg_offset(s, reg, MO_64);
643 
644     tcg_gen_st_i64(v, tcg_env, ofs);
645     clear_vec_high(s, false, reg);
646 }
647 
write_fp_sreg(DisasContext * s,int reg,TCGv_i32 v)648 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
649 {
650     TCGv_i64 tmp = tcg_temp_new_i64();
651 
652     tcg_gen_extu_i32_i64(tmp, v);
653     write_fp_dreg(s, reg, tmp);
654 }
655 
656 /*
657  * Write a double result to 128 bit vector register reg, honouring FPCR.NEP:
658  * - if FPCR.NEP == 0, clear the high elements of reg
659  * - if FPCR.NEP == 1, set the high elements of reg from mergereg
660  *   (i.e. merge the result with those high elements)
661  * In either case, SVE register bits above 128 are zeroed (per R_WKYLB).
662  */
write_fp_dreg_merging(DisasContext * s,int reg,int mergereg,TCGv_i64 v)663 static void write_fp_dreg_merging(DisasContext *s, int reg, int mergereg,
664                                   TCGv_i64 v)
665 {
666     if (!s->fpcr_nep) {
667         write_fp_dreg(s, reg, v);
668         return;
669     }
670 
671     /*
672      * Move from mergereg to reg; this sets the high elements and
673      * clears the bits above 128 as a side effect.
674      */
675     tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg),
676                      vec_full_reg_offset(s, mergereg),
677                      16, vec_full_reg_size(s));
678     tcg_gen_st_i64(v, tcg_env, vec_full_reg_offset(s, reg));
679 }
680 
681 /*
682  * Write a single-prec result, but only clear the higher elements
683  * of the destination register if FPCR.NEP is 0; otherwise preserve them.
684  */
write_fp_sreg_merging(DisasContext * s,int reg,int mergereg,TCGv_i32 v)685 static void write_fp_sreg_merging(DisasContext *s, int reg, int mergereg,
686                                   TCGv_i32 v)
687 {
688     if (!s->fpcr_nep) {
689         write_fp_sreg(s, reg, v);
690         return;
691     }
692 
693     tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg),
694                      vec_full_reg_offset(s, mergereg),
695                      16, vec_full_reg_size(s));
696     tcg_gen_st_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32));
697 }
698 
699 /*
700  * Write a half-prec result, but only clear the higher elements
701  * of the destination register if FPCR.NEP is 0; otherwise preserve them.
702  * The caller must ensure that the top 16 bits of v are zero.
703  */
write_fp_hreg_merging(DisasContext * s,int reg,int mergereg,TCGv_i32 v)704 static void write_fp_hreg_merging(DisasContext *s, int reg, int mergereg,
705                                   TCGv_i32 v)
706 {
707     if (!s->fpcr_nep) {
708         write_fp_sreg(s, reg, v);
709         return;
710     }
711 
712     tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg),
713                      vec_full_reg_offset(s, mergereg),
714                      16, vec_full_reg_size(s));
715     tcg_gen_st16_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16));
716 }
717 
718 /* Expand a 2-operand AdvSIMD vector operation using an expander function.  */
gen_gvec_fn2(DisasContext * s,bool is_q,int rd,int rn,GVecGen2Fn * gvec_fn,int vece)719 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
720                          GVecGen2Fn *gvec_fn, int vece)
721 {
722     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
723             is_q ? 16 : 8, vec_full_reg_size(s));
724 }
725 
726 /* Expand a 2-operand + immediate AdvSIMD vector operation using
727  * an expander function.
728  */
gen_gvec_fn2i(DisasContext * s,bool is_q,int rd,int rn,int64_t imm,GVecGen2iFn * gvec_fn,int vece)729 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn,
730                           int64_t imm, GVecGen2iFn *gvec_fn, int vece)
731 {
732     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
733             imm, is_q ? 16 : 8, vec_full_reg_size(s));
734 }
735 
736 /* Expand a 3-operand AdvSIMD vector operation using an expander function.  */
gen_gvec_fn3(DisasContext * s,bool is_q,int rd,int rn,int rm,GVecGen3Fn * gvec_fn,int vece)737 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
738                          GVecGen3Fn *gvec_fn, int vece)
739 {
740     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
741             vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
742 }
743 
744 /* Expand a 4-operand AdvSIMD vector operation using an expander function.  */
gen_gvec_fn4(DisasContext * s,bool is_q,int rd,int rn,int rm,int rx,GVecGen4Fn * gvec_fn,int vece)745 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
746                          int rx, GVecGen4Fn *gvec_fn, int vece)
747 {
748     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
749             vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx),
750             is_q ? 16 : 8, vec_full_reg_size(s));
751 }
752 
753 /* Expand a 2-operand operation using an out-of-line helper.  */
gen_gvec_op2_ool(DisasContext * s,bool is_q,int rd,int rn,int data,gen_helper_gvec_2 * fn)754 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd,
755                              int rn, int data, gen_helper_gvec_2 *fn)
756 {
757     tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
758                        vec_full_reg_offset(s, rn),
759                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
760 }
761 
762 /* Expand a 3-operand operation using an out-of-line helper.  */
gen_gvec_op3_ool(DisasContext * s,bool is_q,int rd,int rn,int rm,int data,gen_helper_gvec_3 * fn)763 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
764                              int rn, int rm, int data, gen_helper_gvec_3 *fn)
765 {
766     tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
767                        vec_full_reg_offset(s, rn),
768                        vec_full_reg_offset(s, rm),
769                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
770 }
771 
772 /* Expand a 3-operand + fpstatus pointer + simd data value operation using
773  * an out-of-line helper.
774  */
gen_gvec_op3_fpst(DisasContext * s,bool is_q,int rd,int rn,int rm,ARMFPStatusFlavour fpsttype,int data,gen_helper_gvec_3_ptr * fn)775 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
776                               int rm, ARMFPStatusFlavour fpsttype, int data,
777                               gen_helper_gvec_3_ptr *fn)
778 {
779     TCGv_ptr fpst = fpstatus_ptr(fpsttype);
780     tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
781                        vec_full_reg_offset(s, rn),
782                        vec_full_reg_offset(s, rm), fpst,
783                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
784 }
785 
786 /* Expand a 4-operand operation using an out-of-line helper.  */
gen_gvec_op4_ool(DisasContext * s,bool is_q,int rd,int rn,int rm,int ra,int data,gen_helper_gvec_4 * fn)787 static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn,
788                              int rm, int ra, int data, gen_helper_gvec_4 *fn)
789 {
790     tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
791                        vec_full_reg_offset(s, rn),
792                        vec_full_reg_offset(s, rm),
793                        vec_full_reg_offset(s, ra),
794                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
795 }
796 
797 /*
798  * Expand a 4-operand operation using an out-of-line helper that takes
799  * a pointer to the CPU env.
800  */
gen_gvec_op4_env(DisasContext * s,bool is_q,int rd,int rn,int rm,int ra,int data,gen_helper_gvec_4_ptr * fn)801 static void gen_gvec_op4_env(DisasContext *s, bool is_q, int rd, int rn,
802                              int rm, int ra, int data,
803                              gen_helper_gvec_4_ptr *fn)
804 {
805     tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
806                        vec_full_reg_offset(s, rn),
807                        vec_full_reg_offset(s, rm),
808                        vec_full_reg_offset(s, ra),
809                        tcg_env,
810                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
811 }
812 
813 /*
814  * Expand a 4-operand + fpstatus pointer + simd data value operation using
815  * an out-of-line helper.
816  */
gen_gvec_op4_fpst(DisasContext * s,bool is_q,int rd,int rn,int rm,int ra,ARMFPStatusFlavour fpsttype,int data,gen_helper_gvec_4_ptr * fn)817 static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn,
818                               int rm, int ra, ARMFPStatusFlavour fpsttype,
819                               int data,
820                               gen_helper_gvec_4_ptr *fn)
821 {
822     TCGv_ptr fpst = fpstatus_ptr(fpsttype);
823     tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
824                        vec_full_reg_offset(s, rn),
825                        vec_full_reg_offset(s, rm),
826                        vec_full_reg_offset(s, ra), fpst,
827                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
828 }
829 
830 /*
831  * When FPCR.AH == 1, NEG and ABS do not flip the sign bit of a NaN.
832  * These functions implement
833  *   d = floatN_is_any_nan(s) ? s : floatN_chs(s)
834  * which for float32 is
835  *   d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s ^ (1 << 31))
836  * and similarly for the other float sizes.
837  */
gen_vfp_ah_negh(TCGv_i32 d,TCGv_i32 s)838 static void gen_vfp_ah_negh(TCGv_i32 d, TCGv_i32 s)
839 {
840     TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32();
841 
842     gen_vfp_negh(chs_s, s);
843     gen_vfp_absh(abs_s, s);
844     tcg_gen_movcond_i32(TCG_COND_GTU, d,
845                         abs_s, tcg_constant_i32(0x7c00),
846                         s, chs_s);
847 }
848 
gen_vfp_ah_negs(TCGv_i32 d,TCGv_i32 s)849 static void gen_vfp_ah_negs(TCGv_i32 d, TCGv_i32 s)
850 {
851     TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32();
852 
853     gen_vfp_negs(chs_s, s);
854     gen_vfp_abss(abs_s, s);
855     tcg_gen_movcond_i32(TCG_COND_GTU, d,
856                         abs_s, tcg_constant_i32(0x7f800000UL),
857                         s, chs_s);
858 }
859 
gen_vfp_ah_negd(TCGv_i64 d,TCGv_i64 s)860 static void gen_vfp_ah_negd(TCGv_i64 d, TCGv_i64 s)
861 {
862     TCGv_i64 abs_s = tcg_temp_new_i64(), chs_s = tcg_temp_new_i64();
863 
864     gen_vfp_negd(chs_s, s);
865     gen_vfp_absd(abs_s, s);
866     tcg_gen_movcond_i64(TCG_COND_GTU, d,
867                         abs_s, tcg_constant_i64(0x7ff0000000000000ULL),
868                         s, chs_s);
869 }
870 
871 /*
872  * These functions implement
873  *  d = floatN_is_any_nan(s) ? s : floatN_abs(s)
874  * which for float32 is
875  *  d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s & ~(1 << 31))
876  * and similarly for the other float sizes.
877  */
gen_vfp_ah_absh(TCGv_i32 d,TCGv_i32 s)878 static void gen_vfp_ah_absh(TCGv_i32 d, TCGv_i32 s)
879 {
880     TCGv_i32 abs_s = tcg_temp_new_i32();
881 
882     gen_vfp_absh(abs_s, s);
883     tcg_gen_movcond_i32(TCG_COND_GTU, d,
884                         abs_s, tcg_constant_i32(0x7c00),
885                         s, abs_s);
886 }
887 
gen_vfp_ah_abss(TCGv_i32 d,TCGv_i32 s)888 static void gen_vfp_ah_abss(TCGv_i32 d, TCGv_i32 s)
889 {
890     TCGv_i32 abs_s = tcg_temp_new_i32();
891 
892     gen_vfp_abss(abs_s, s);
893     tcg_gen_movcond_i32(TCG_COND_GTU, d,
894                         abs_s, tcg_constant_i32(0x7f800000UL),
895                         s, abs_s);
896 }
897 
gen_vfp_ah_absd(TCGv_i64 d,TCGv_i64 s)898 static void gen_vfp_ah_absd(TCGv_i64 d, TCGv_i64 s)
899 {
900     TCGv_i64 abs_s = tcg_temp_new_i64();
901 
902     gen_vfp_absd(abs_s, s);
903     tcg_gen_movcond_i64(TCG_COND_GTU, d,
904                         abs_s, tcg_constant_i64(0x7ff0000000000000ULL),
905                         s, abs_s);
906 }
907 
gen_vfp_maybe_ah_negh(DisasContext * dc,TCGv_i32 d,TCGv_i32 s)908 static void gen_vfp_maybe_ah_negh(DisasContext *dc, TCGv_i32 d, TCGv_i32 s)
909 {
910     if (dc->fpcr_ah) {
911         gen_vfp_ah_negh(d, s);
912     } else {
913         gen_vfp_negh(d, s);
914     }
915 }
916 
gen_vfp_maybe_ah_negs(DisasContext * dc,TCGv_i32 d,TCGv_i32 s)917 static void gen_vfp_maybe_ah_negs(DisasContext *dc, TCGv_i32 d, TCGv_i32 s)
918 {
919     if (dc->fpcr_ah) {
920         gen_vfp_ah_negs(d, s);
921     } else {
922         gen_vfp_negs(d, s);
923     }
924 }
925 
gen_vfp_maybe_ah_negd(DisasContext * dc,TCGv_i64 d,TCGv_i64 s)926 static void gen_vfp_maybe_ah_negd(DisasContext *dc, TCGv_i64 d, TCGv_i64 s)
927 {
928     if (dc->fpcr_ah) {
929         gen_vfp_ah_negd(d, s);
930     } else {
931         gen_vfp_negd(d, s);
932     }
933 }
934 
935 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
936  * than the 32 bit equivalent.
937  */
gen_set_NZ64(TCGv_i64 result)938 static inline void gen_set_NZ64(TCGv_i64 result)
939 {
940     tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
941     tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
942 }
943 
944 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
gen_logic_CC(int sf,TCGv_i64 result)945 static inline void gen_logic_CC(int sf, TCGv_i64 result)
946 {
947     if (sf) {
948         gen_set_NZ64(result);
949     } else {
950         tcg_gen_extrl_i64_i32(cpu_ZF, result);
951         tcg_gen_mov_i32(cpu_NF, cpu_ZF);
952     }
953     tcg_gen_movi_i32(cpu_CF, 0);
954     tcg_gen_movi_i32(cpu_VF, 0);
955 }
956 
957 /* dest = T0 + T1; compute C, N, V and Z flags */
gen_add64_CC(TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)958 static void gen_add64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
959 {
960     TCGv_i64 result, flag, tmp;
961     result = tcg_temp_new_i64();
962     flag = tcg_temp_new_i64();
963     tmp = tcg_temp_new_i64();
964 
965     tcg_gen_movi_i64(tmp, 0);
966     tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
967 
968     tcg_gen_extrl_i64_i32(cpu_CF, flag);
969 
970     gen_set_NZ64(result);
971 
972     tcg_gen_xor_i64(flag, result, t0);
973     tcg_gen_xor_i64(tmp, t0, t1);
974     tcg_gen_andc_i64(flag, flag, tmp);
975     tcg_gen_extrh_i64_i32(cpu_VF, flag);
976 
977     tcg_gen_mov_i64(dest, result);
978 }
979 
gen_add32_CC(TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)980 static void gen_add32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
981 {
982     TCGv_i32 t0_32 = tcg_temp_new_i32();
983     TCGv_i32 t1_32 = tcg_temp_new_i32();
984     TCGv_i32 tmp = tcg_temp_new_i32();
985 
986     tcg_gen_movi_i32(tmp, 0);
987     tcg_gen_extrl_i64_i32(t0_32, t0);
988     tcg_gen_extrl_i64_i32(t1_32, t1);
989     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
990     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
991     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
992     tcg_gen_xor_i32(tmp, t0_32, t1_32);
993     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
994     tcg_gen_extu_i32_i64(dest, cpu_NF);
995 }
996 
gen_add_CC(int sf,TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)997 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
998 {
999     if (sf) {
1000         gen_add64_CC(dest, t0, t1);
1001     } else {
1002         gen_add32_CC(dest, t0, t1);
1003     }
1004 }
1005 
1006 /* dest = T0 - T1; compute C, N, V and Z flags */
gen_sub64_CC(TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)1007 static void gen_sub64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
1008 {
1009     /* 64 bit arithmetic */
1010     TCGv_i64 result, flag, tmp;
1011 
1012     result = tcg_temp_new_i64();
1013     flag = tcg_temp_new_i64();
1014     tcg_gen_sub_i64(result, t0, t1);
1015 
1016     gen_set_NZ64(result);
1017 
1018     tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
1019     tcg_gen_extrl_i64_i32(cpu_CF, flag);
1020 
1021     tcg_gen_xor_i64(flag, result, t0);
1022     tmp = tcg_temp_new_i64();
1023     tcg_gen_xor_i64(tmp, t0, t1);
1024     tcg_gen_and_i64(flag, flag, tmp);
1025     tcg_gen_extrh_i64_i32(cpu_VF, flag);
1026     tcg_gen_mov_i64(dest, result);
1027 }
1028 
gen_sub32_CC(TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)1029 static void gen_sub32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
1030 {
1031     /* 32 bit arithmetic */
1032     TCGv_i32 t0_32 = tcg_temp_new_i32();
1033     TCGv_i32 t1_32 = tcg_temp_new_i32();
1034     TCGv_i32 tmp;
1035 
1036     tcg_gen_extrl_i64_i32(t0_32, t0);
1037     tcg_gen_extrl_i64_i32(t1_32, t1);
1038     tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
1039     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1040     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
1041     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
1042     tmp = tcg_temp_new_i32();
1043     tcg_gen_xor_i32(tmp, t0_32, t1_32);
1044     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
1045     tcg_gen_extu_i32_i64(dest, cpu_NF);
1046 }
1047 
gen_sub_CC(int sf,TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)1048 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
1049 {
1050     if (sf) {
1051         gen_sub64_CC(dest, t0, t1);
1052     } else {
1053         gen_sub32_CC(dest, t0, t1);
1054     }
1055 }
1056 
1057 /* dest = T0 + T1 + CF; do not compute flags. */
gen_adc(int sf,TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)1058 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
1059 {
1060     TCGv_i64 flag = tcg_temp_new_i64();
1061     tcg_gen_extu_i32_i64(flag, cpu_CF);
1062     tcg_gen_add_i64(dest, t0, t1);
1063     tcg_gen_add_i64(dest, dest, flag);
1064 
1065     if (!sf) {
1066         tcg_gen_ext32u_i64(dest, dest);
1067     }
1068 }
1069 
1070 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
gen_adc_CC(int sf,TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)1071 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
1072 {
1073     if (sf) {
1074         TCGv_i64 result = tcg_temp_new_i64();
1075         TCGv_i64 cf_64 = tcg_temp_new_i64();
1076         TCGv_i64 vf_64 = tcg_temp_new_i64();
1077         TCGv_i64 tmp = tcg_temp_new_i64();
1078 
1079         tcg_gen_extu_i32_i64(cf_64, cpu_CF);
1080         tcg_gen_addcio_i64(result, cf_64, t0, t1, cf_64);
1081         tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
1082         gen_set_NZ64(result);
1083 
1084         tcg_gen_xor_i64(vf_64, result, t0);
1085         tcg_gen_xor_i64(tmp, t0, t1);
1086         tcg_gen_andc_i64(vf_64, vf_64, tmp);
1087         tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
1088 
1089         tcg_gen_mov_i64(dest, result);
1090     } else {
1091         TCGv_i32 t0_32 = tcg_temp_new_i32();
1092         TCGv_i32 t1_32 = tcg_temp_new_i32();
1093         TCGv_i32 tmp = tcg_temp_new_i32();
1094 
1095         tcg_gen_extrl_i64_i32(t0_32, t0);
1096         tcg_gen_extrl_i64_i32(t1_32, t1);
1097         tcg_gen_addcio_i32(cpu_NF, cpu_CF, t0_32, t1_32, cpu_CF);
1098 
1099         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1100         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
1101         tcg_gen_xor_i32(tmp, t0_32, t1_32);
1102         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
1103         tcg_gen_extu_i32_i64(dest, cpu_NF);
1104     }
1105 }
1106 
1107 /*
1108  * Load/Store generators
1109  */
1110 
1111 /*
1112  * Store from GPR register to memory.
1113  */
do_gpr_st_memidx(DisasContext * s,TCGv_i64 source,TCGv_i64 tcg_addr,MemOp memop,int memidx,bool iss_valid,unsigned int iss_srt,bool iss_sf,bool iss_ar)1114 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
1115                              TCGv_i64 tcg_addr, MemOp memop, int memidx,
1116                              bool iss_valid,
1117                              unsigned int iss_srt,
1118                              bool iss_sf, bool iss_ar)
1119 {
1120     tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop);
1121 
1122     if (iss_valid) {
1123         uint32_t syn;
1124 
1125         syn = syn_data_abort_with_iss(0,
1126                                       (memop & MO_SIZE),
1127                                       false,
1128                                       iss_srt,
1129                                       iss_sf,
1130                                       iss_ar,
1131                                       0, 0, 0, 0, 0, false);
1132         disas_set_insn_syndrome(s, syn);
1133     }
1134 }
1135 
do_gpr_st(DisasContext * s,TCGv_i64 source,TCGv_i64 tcg_addr,MemOp memop,bool iss_valid,unsigned int iss_srt,bool iss_sf,bool iss_ar)1136 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
1137                       TCGv_i64 tcg_addr, MemOp memop,
1138                       bool iss_valid,
1139                       unsigned int iss_srt,
1140                       bool iss_sf, bool iss_ar)
1141 {
1142     do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s),
1143                      iss_valid, iss_srt, iss_sf, iss_ar);
1144 }
1145 
1146 /*
1147  * Load from memory to GPR register
1148  */
do_gpr_ld_memidx(DisasContext * s,TCGv_i64 dest,TCGv_i64 tcg_addr,MemOp memop,bool extend,int memidx,bool iss_valid,unsigned int iss_srt,bool iss_sf,bool iss_ar)1149 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
1150                              MemOp memop, bool extend, int memidx,
1151                              bool iss_valid, unsigned int iss_srt,
1152                              bool iss_sf, bool iss_ar)
1153 {
1154     tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
1155 
1156     if (extend && (memop & MO_SIGN)) {
1157         g_assert((memop & MO_SIZE) <= MO_32);
1158         tcg_gen_ext32u_i64(dest, dest);
1159     }
1160 
1161     if (iss_valid) {
1162         uint32_t syn;
1163 
1164         syn = syn_data_abort_with_iss(0,
1165                                       (memop & MO_SIZE),
1166                                       (memop & MO_SIGN) != 0,
1167                                       iss_srt,
1168                                       iss_sf,
1169                                       iss_ar,
1170                                       0, 0, 0, 0, 0, false);
1171         disas_set_insn_syndrome(s, syn);
1172     }
1173 }
1174 
do_gpr_ld(DisasContext * s,TCGv_i64 dest,TCGv_i64 tcg_addr,MemOp memop,bool extend,bool iss_valid,unsigned int iss_srt,bool iss_sf,bool iss_ar)1175 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
1176                       MemOp memop, bool extend,
1177                       bool iss_valid, unsigned int iss_srt,
1178                       bool iss_sf, bool iss_ar)
1179 {
1180     do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s),
1181                      iss_valid, iss_srt, iss_sf, iss_ar);
1182 }
1183 
1184 /*
1185  * Store from FP register to memory
1186  */
do_fp_st(DisasContext * s,int srcidx,TCGv_i64 tcg_addr,MemOp mop)1187 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, MemOp mop)
1188 {
1189     /* This writes the bottom N bits of a 128 bit wide vector to memory */
1190     TCGv_i64 tmplo = tcg_temp_new_i64();
1191 
1192     tcg_gen_ld_i64(tmplo, tcg_env, fp_reg_offset(s, srcidx, MO_64));
1193 
1194     if ((mop & MO_SIZE) < MO_128) {
1195         tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop);
1196     } else {
1197         TCGv_i64 tmphi = tcg_temp_new_i64();
1198         TCGv_i128 t16 = tcg_temp_new_i128();
1199 
1200         tcg_gen_ld_i64(tmphi, tcg_env, fp_reg_hi_offset(s, srcidx));
1201         tcg_gen_concat_i64_i128(t16, tmplo, tmphi);
1202 
1203         tcg_gen_qemu_st_i128(t16, tcg_addr, get_mem_index(s), mop);
1204     }
1205 }
1206 
1207 /*
1208  * Load from memory to FP register
1209  */
do_fp_ld(DisasContext * s,int destidx,TCGv_i64 tcg_addr,MemOp mop)1210 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, MemOp mop)
1211 {
1212     /* This always zero-extends and writes to a full 128 bit wide vector */
1213     TCGv_i64 tmplo = tcg_temp_new_i64();
1214     TCGv_i64 tmphi = NULL;
1215 
1216     if ((mop & MO_SIZE) < MO_128) {
1217         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop);
1218     } else {
1219         TCGv_i128 t16 = tcg_temp_new_i128();
1220 
1221         tcg_gen_qemu_ld_i128(t16, tcg_addr, get_mem_index(s), mop);
1222 
1223         tmphi = tcg_temp_new_i64();
1224         tcg_gen_extr_i128_i64(tmplo, tmphi, t16);
1225     }
1226 
1227     tcg_gen_st_i64(tmplo, tcg_env, fp_reg_offset(s, destidx, MO_64));
1228 
1229     if (tmphi) {
1230         tcg_gen_st_i64(tmphi, tcg_env, fp_reg_hi_offset(s, destidx));
1231     }
1232     clear_vec_high(s, tmphi != NULL, destidx);
1233 }
1234 
1235 /*
1236  * Vector load/store helpers.
1237  *
1238  * The principal difference between this and a FP load is that we don't
1239  * zero extend as we are filling a partial chunk of the vector register.
1240  * These functions don't support 128 bit loads/stores, which would be
1241  * normal load/store operations.
1242  *
1243  * The _i32 versions are useful when operating on 32 bit quantities
1244  * (eg for floating point single or using Neon helper functions).
1245  */
1246 
1247 /* Get value of an element within a vector register */
read_vec_element(DisasContext * s,TCGv_i64 tcg_dest,int srcidx,int element,MemOp memop)1248 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
1249                              int element, MemOp memop)
1250 {
1251     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1252     switch ((unsigned)memop) {
1253     case MO_8:
1254         tcg_gen_ld8u_i64(tcg_dest, tcg_env, vect_off);
1255         break;
1256     case MO_16:
1257         tcg_gen_ld16u_i64(tcg_dest, tcg_env, vect_off);
1258         break;
1259     case MO_32:
1260         tcg_gen_ld32u_i64(tcg_dest, tcg_env, vect_off);
1261         break;
1262     case MO_8|MO_SIGN:
1263         tcg_gen_ld8s_i64(tcg_dest, tcg_env, vect_off);
1264         break;
1265     case MO_16|MO_SIGN:
1266         tcg_gen_ld16s_i64(tcg_dest, tcg_env, vect_off);
1267         break;
1268     case MO_32|MO_SIGN:
1269         tcg_gen_ld32s_i64(tcg_dest, tcg_env, vect_off);
1270         break;
1271     case MO_64:
1272     case MO_64|MO_SIGN:
1273         tcg_gen_ld_i64(tcg_dest, tcg_env, vect_off);
1274         break;
1275     default:
1276         g_assert_not_reached();
1277     }
1278 }
1279 
read_vec_element_i32(DisasContext * s,TCGv_i32 tcg_dest,int srcidx,int element,MemOp memop)1280 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
1281                                  int element, MemOp memop)
1282 {
1283     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1284     switch (memop) {
1285     case MO_8:
1286         tcg_gen_ld8u_i32(tcg_dest, tcg_env, vect_off);
1287         break;
1288     case MO_16:
1289         tcg_gen_ld16u_i32(tcg_dest, tcg_env, vect_off);
1290         break;
1291     case MO_8|MO_SIGN:
1292         tcg_gen_ld8s_i32(tcg_dest, tcg_env, vect_off);
1293         break;
1294     case MO_16|MO_SIGN:
1295         tcg_gen_ld16s_i32(tcg_dest, tcg_env, vect_off);
1296         break;
1297     case MO_32:
1298     case MO_32|MO_SIGN:
1299         tcg_gen_ld_i32(tcg_dest, tcg_env, vect_off);
1300         break;
1301     default:
1302         g_assert_not_reached();
1303     }
1304 }
1305 
1306 /* Set value of an element within a vector register */
write_vec_element(DisasContext * s,TCGv_i64 tcg_src,int destidx,int element,MemOp memop)1307 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1308                               int element, MemOp memop)
1309 {
1310     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1311     switch (memop) {
1312     case MO_8:
1313         tcg_gen_st8_i64(tcg_src, tcg_env, vect_off);
1314         break;
1315     case MO_16:
1316         tcg_gen_st16_i64(tcg_src, tcg_env, vect_off);
1317         break;
1318     case MO_32:
1319         tcg_gen_st32_i64(tcg_src, tcg_env, vect_off);
1320         break;
1321     case MO_64:
1322         tcg_gen_st_i64(tcg_src, tcg_env, vect_off);
1323         break;
1324     default:
1325         g_assert_not_reached();
1326     }
1327 }
1328 
write_vec_element_i32(DisasContext * s,TCGv_i32 tcg_src,int destidx,int element,MemOp memop)1329 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1330                                   int destidx, int element, MemOp memop)
1331 {
1332     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1333     switch (memop) {
1334     case MO_8:
1335         tcg_gen_st8_i32(tcg_src, tcg_env, vect_off);
1336         break;
1337     case MO_16:
1338         tcg_gen_st16_i32(tcg_src, tcg_env, vect_off);
1339         break;
1340     case MO_32:
1341         tcg_gen_st_i32(tcg_src, tcg_env, vect_off);
1342         break;
1343     default:
1344         g_assert_not_reached();
1345     }
1346 }
1347 
1348 /* Store from vector register to memory */
do_vec_st(DisasContext * s,int srcidx,int element,TCGv_i64 tcg_addr,MemOp mop)1349 static void do_vec_st(DisasContext *s, int srcidx, int element,
1350                       TCGv_i64 tcg_addr, MemOp mop)
1351 {
1352     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1353 
1354     read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE);
1355     tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1356 }
1357 
1358 /* Load from memory to vector register */
do_vec_ld(DisasContext * s,int destidx,int element,TCGv_i64 tcg_addr,MemOp mop)1359 static void do_vec_ld(DisasContext *s, int destidx, int element,
1360                       TCGv_i64 tcg_addr, MemOp mop)
1361 {
1362     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1363 
1364     tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1365     write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE);
1366 }
1367 
1368 /* Check that FP/Neon access is enabled. If it is, return
1369  * true. If not, emit code to generate an appropriate exception,
1370  * and return false; the caller should not emit any code for
1371  * the instruction. Note that this check must happen after all
1372  * unallocated-encoding checks (otherwise the syndrome information
1373  * for the resulting exception will be incorrect).
1374  */
fp_access_check_only(DisasContext * s)1375 static bool fp_access_check_only(DisasContext *s)
1376 {
1377     if (s->fp_excp_el) {
1378         assert(!s->fp_access_checked);
1379         s->fp_access_checked = -1;
1380 
1381         gen_exception_insn_el(s, 0, EXCP_UDEF,
1382                               syn_fp_access_trap(1, 0xe, false, 0),
1383                               s->fp_excp_el);
1384         return false;
1385     }
1386     s->fp_access_checked = 1;
1387     return true;
1388 }
1389 
fp_access_check(DisasContext * s)1390 static bool fp_access_check(DisasContext *s)
1391 {
1392     if (!fp_access_check_only(s)) {
1393         return false;
1394     }
1395     if (s->sme_trap_nonstreaming && s->is_nonstreaming) {
1396         gen_exception_insn(s, 0, EXCP_UDEF,
1397                            syn_smetrap(SME_ET_Streaming, false));
1398         return false;
1399     }
1400     return true;
1401 }
1402 
1403 /*
1404  * Return <0 for non-supported element sizes, with MO_16 controlled by
1405  * FEAT_FP16; return 0 for fp disabled; otherwise return >0 for success.
1406  */
fp_access_check_scalar_hsd(DisasContext * s,MemOp esz)1407 static int fp_access_check_scalar_hsd(DisasContext *s, MemOp esz)
1408 {
1409     switch (esz) {
1410     case MO_64:
1411     case MO_32:
1412         break;
1413     case MO_16:
1414         if (!dc_isar_feature(aa64_fp16, s)) {
1415             return -1;
1416         }
1417         break;
1418     default:
1419         return -1;
1420     }
1421     return fp_access_check(s);
1422 }
1423 
1424 /* Likewise, but vector MO_64 must have two elements. */
fp_access_check_vector_hsd(DisasContext * s,bool is_q,MemOp esz)1425 static int fp_access_check_vector_hsd(DisasContext *s, bool is_q, MemOp esz)
1426 {
1427     switch (esz) {
1428     case MO_64:
1429         if (!is_q) {
1430             return -1;
1431         }
1432         break;
1433     case MO_32:
1434         break;
1435     case MO_16:
1436         if (!dc_isar_feature(aa64_fp16, s)) {
1437             return -1;
1438         }
1439         break;
1440     default:
1441         return -1;
1442     }
1443     return fp_access_check(s);
1444 }
1445 
1446 /*
1447  * Check that SVE access is enabled.  If it is, return true.
1448  * If not, emit code to generate an appropriate exception and return false.
1449  * This function corresponds to CheckSVEEnabled().
1450  */
sve_access_check(DisasContext * s)1451 bool sve_access_check(DisasContext *s)
1452 {
1453     if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) {
1454         bool ret;
1455 
1456         assert(dc_isar_feature(aa64_sme, s));
1457         ret = sme_sm_enabled_check(s);
1458         s->sve_access_checked = (ret ? 1 : -1);
1459         return ret;
1460     }
1461     if (s->sve_excp_el) {
1462         /* Assert that we only raise one exception per instruction. */
1463         assert(!s->sve_access_checked);
1464         gen_exception_insn_el(s, 0, EXCP_UDEF,
1465                               syn_sve_access_trap(), s->sve_excp_el);
1466         s->sve_access_checked = -1;
1467         return false;
1468     }
1469     s->sve_access_checked = 1;
1470     return fp_access_check(s);
1471 }
1472 
1473 /*
1474  * Check that SME access is enabled, raise an exception if not.
1475  * Note that this function corresponds to CheckSMEAccess and is
1476  * only used directly for cpregs.
1477  */
sme_access_check(DisasContext * s)1478 static bool sme_access_check(DisasContext *s)
1479 {
1480     if (s->sme_excp_el) {
1481         gen_exception_insn_el(s, 0, EXCP_UDEF,
1482                               syn_smetrap(SME_ET_AccessTrap, false),
1483                               s->sme_excp_el);
1484         return false;
1485     }
1486     return true;
1487 }
1488 
1489 /* This function corresponds to CheckSMEEnabled. */
sme_enabled_check(DisasContext * s)1490 bool sme_enabled_check(DisasContext *s)
1491 {
1492     /*
1493      * Note that unlike sve_excp_el, we have not constrained sme_excp_el
1494      * to be zero when fp_excp_el has priority.  This is because we need
1495      * sme_excp_el by itself for cpregs access checks.
1496      */
1497     if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) {
1498         bool ret = sme_access_check(s);
1499         s->fp_access_checked = (ret ? 1 : -1);
1500         return ret;
1501     }
1502     return fp_access_check_only(s);
1503 }
1504 
1505 /* Common subroutine for CheckSMEAnd*Enabled. */
sme_enabled_check_with_svcr(DisasContext * s,unsigned req)1506 bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req)
1507 {
1508     if (!sme_enabled_check(s)) {
1509         return false;
1510     }
1511     if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) {
1512         gen_exception_insn(s, 0, EXCP_UDEF,
1513                            syn_smetrap(SME_ET_NotStreaming, false));
1514         return false;
1515     }
1516     if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) {
1517         gen_exception_insn(s, 0, EXCP_UDEF,
1518                            syn_smetrap(SME_ET_InactiveZA, false));
1519         return false;
1520     }
1521     return true;
1522 }
1523 
1524 /*
1525  * Expanders for AdvSIMD translation functions.
1526  */
1527 
do_gvec_op2_ool(DisasContext * s,arg_qrr_e * a,int data,gen_helper_gvec_2 * fn)1528 static bool do_gvec_op2_ool(DisasContext *s, arg_qrr_e *a, int data,
1529                             gen_helper_gvec_2 *fn)
1530 {
1531     if (!a->q && a->esz == MO_64) {
1532         return false;
1533     }
1534     if (fp_access_check(s)) {
1535         gen_gvec_op2_ool(s, a->q, a->rd, a->rn, data, fn);
1536     }
1537     return true;
1538 }
1539 
do_gvec_op3_ool(DisasContext * s,arg_qrrr_e * a,int data,gen_helper_gvec_3 * fn)1540 static bool do_gvec_op3_ool(DisasContext *s, arg_qrrr_e *a, int data,
1541                             gen_helper_gvec_3 *fn)
1542 {
1543     if (!a->q && a->esz == MO_64) {
1544         return false;
1545     }
1546     if (fp_access_check(s)) {
1547         gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, data, fn);
1548     }
1549     return true;
1550 }
1551 
do_gvec_fn3(DisasContext * s,arg_qrrr_e * a,GVecGen3Fn * fn)1552 static bool do_gvec_fn3(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
1553 {
1554     if (!a->q && a->esz == MO_64) {
1555         return false;
1556     }
1557     if (fp_access_check(s)) {
1558         gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz);
1559     }
1560     return true;
1561 }
1562 
do_gvec_fn3_no64(DisasContext * s,arg_qrrr_e * a,GVecGen3Fn * fn)1563 static bool do_gvec_fn3_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
1564 {
1565     if (a->esz == MO_64) {
1566         return false;
1567     }
1568     if (fp_access_check(s)) {
1569         gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz);
1570     }
1571     return true;
1572 }
1573 
do_gvec_fn3_no8_no64(DisasContext * s,arg_qrrr_e * a,GVecGen3Fn * fn)1574 static bool do_gvec_fn3_no8_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
1575 {
1576     if (a->esz == MO_8) {
1577         return false;
1578     }
1579     return do_gvec_fn3_no64(s, a, fn);
1580 }
1581 
do_gvec_fn4(DisasContext * s,arg_qrrrr_e * a,GVecGen4Fn * fn)1582 static bool do_gvec_fn4(DisasContext *s, arg_qrrrr_e *a, GVecGen4Fn *fn)
1583 {
1584     if (!a->q && a->esz == MO_64) {
1585         return false;
1586     }
1587     if (fp_access_check(s)) {
1588         gen_gvec_fn4(s, a->q, a->rd, a->rn, a->rm, a->ra, fn, a->esz);
1589     }
1590     return true;
1591 }
1592 
1593 /*
1594  * This utility function is for doing register extension with an
1595  * optional shift. You will likely want to pass a temporary for the
1596  * destination register. See DecodeRegExtend() in the ARM ARM.
1597  */
ext_and_shift_reg(TCGv_i64 tcg_out,TCGv_i64 tcg_in,int option,unsigned int shift)1598 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1599                               int option, unsigned int shift)
1600 {
1601     int extsize = extract32(option, 0, 2);
1602     bool is_signed = extract32(option, 2, 1);
1603 
1604     tcg_gen_ext_i64(tcg_out, tcg_in, extsize | (is_signed ? MO_SIGN : 0));
1605     tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1606 }
1607 
gen_check_sp_alignment(DisasContext * s)1608 static inline void gen_check_sp_alignment(DisasContext *s)
1609 {
1610     /* The AArch64 architecture mandates that (if enabled via PSTATE
1611      * or SCTLR bits) there is a check that SP is 16-aligned on every
1612      * SP-relative load or store (with an exception generated if it is not).
1613      * In line with general QEMU practice regarding misaligned accesses,
1614      * we omit these checks for the sake of guest program performance.
1615      * This function is provided as a hook so we can more easily add these
1616      * checks in future (possibly as a "favour catching guest program bugs
1617      * over speed" user selectable option).
1618      */
1619 }
1620 
1621 /*
1622  * The instruction disassembly implemented here matches
1623  * the instruction encoding classifications in chapter C4
1624  * of the ARM Architecture Reference Manual (DDI0487B_a);
1625  * classification names and decode diagrams here should generally
1626  * match up with those in the manual.
1627  */
1628 
trans_B(DisasContext * s,arg_i * a)1629 static bool trans_B(DisasContext *s, arg_i *a)
1630 {
1631     reset_btype(s);
1632     gen_goto_tb(s, 0, a->imm);
1633     return true;
1634 }
1635 
trans_BL(DisasContext * s,arg_i * a)1636 static bool trans_BL(DisasContext *s, arg_i *a)
1637 {
1638     gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s));
1639     reset_btype(s);
1640     gen_goto_tb(s, 0, a->imm);
1641     return true;
1642 }
1643 
1644 
trans_CBZ(DisasContext * s,arg_cbz * a)1645 static bool trans_CBZ(DisasContext *s, arg_cbz *a)
1646 {
1647     DisasLabel match;
1648     TCGv_i64 tcg_cmp;
1649 
1650     tcg_cmp = read_cpu_reg(s, a->rt, a->sf);
1651     reset_btype(s);
1652 
1653     match = gen_disas_label(s);
1654     tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1655                         tcg_cmp, 0, match.label);
1656     gen_goto_tb(s, 0, 4);
1657     set_disas_label(s, match);
1658     gen_goto_tb(s, 1, a->imm);
1659     return true;
1660 }
1661 
trans_TBZ(DisasContext * s,arg_tbz * a)1662 static bool trans_TBZ(DisasContext *s, arg_tbz *a)
1663 {
1664     DisasLabel match;
1665     TCGv_i64 tcg_cmp;
1666 
1667     tcg_cmp = tcg_temp_new_i64();
1668     tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, a->rt), 1ULL << a->bitpos);
1669 
1670     reset_btype(s);
1671 
1672     match = gen_disas_label(s);
1673     tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1674                         tcg_cmp, 0, match.label);
1675     gen_goto_tb(s, 0, 4);
1676     set_disas_label(s, match);
1677     gen_goto_tb(s, 1, a->imm);
1678     return true;
1679 }
1680 
trans_B_cond(DisasContext * s,arg_B_cond * a)1681 static bool trans_B_cond(DisasContext *s, arg_B_cond *a)
1682 {
1683     /* BC.cond is only present with FEAT_HBC */
1684     if (a->c && !dc_isar_feature(aa64_hbc, s)) {
1685         return false;
1686     }
1687     reset_btype(s);
1688     if (a->cond < 0x0e) {
1689         /* genuinely conditional branches */
1690         DisasLabel match = gen_disas_label(s);
1691         arm_gen_test_cc(a->cond, match.label);
1692         gen_goto_tb(s, 0, 4);
1693         set_disas_label(s, match);
1694         gen_goto_tb(s, 1, a->imm);
1695     } else {
1696         /* 0xe and 0xf are both "always" conditions */
1697         gen_goto_tb(s, 0, a->imm);
1698     }
1699     return true;
1700 }
1701 
set_btype_for_br(DisasContext * s,int rn)1702 static void set_btype_for_br(DisasContext *s, int rn)
1703 {
1704     if (dc_isar_feature(aa64_bti, s)) {
1705         /* BR to {x16,x17} or !guard -> 1, else 3.  */
1706         if (rn == 16 || rn == 17) {
1707             set_btype(s, 1);
1708         } else {
1709             TCGv_i64 pc = tcg_temp_new_i64();
1710             gen_pc_plus_diff(s, pc, 0);
1711             gen_helper_guarded_page_br(tcg_env, pc);
1712             s->btype = -1;
1713         }
1714     }
1715 }
1716 
set_btype_for_blr(DisasContext * s)1717 static void set_btype_for_blr(DisasContext *s)
1718 {
1719     if (dc_isar_feature(aa64_bti, s)) {
1720         /* BLR sets BTYPE to 2, regardless of source guarded page.  */
1721         set_btype(s, 2);
1722     }
1723 }
1724 
trans_BR(DisasContext * s,arg_r * a)1725 static bool trans_BR(DisasContext *s, arg_r *a)
1726 {
1727     set_btype_for_br(s, a->rn);
1728     gen_a64_set_pc(s, cpu_reg(s, a->rn));
1729     s->base.is_jmp = DISAS_JUMP;
1730     return true;
1731 }
1732 
trans_BLR(DisasContext * s,arg_r * a)1733 static bool trans_BLR(DisasContext *s, arg_r *a)
1734 {
1735     TCGv_i64 dst = cpu_reg(s, a->rn);
1736     TCGv_i64 lr = cpu_reg(s, 30);
1737     if (dst == lr) {
1738         TCGv_i64 tmp = tcg_temp_new_i64();
1739         tcg_gen_mov_i64(tmp, dst);
1740         dst = tmp;
1741     }
1742     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1743     gen_a64_set_pc(s, dst);
1744     set_btype_for_blr(s);
1745     s->base.is_jmp = DISAS_JUMP;
1746     return true;
1747 }
1748 
trans_RET(DisasContext * s,arg_r * a)1749 static bool trans_RET(DisasContext *s, arg_r *a)
1750 {
1751     gen_a64_set_pc(s, cpu_reg(s, a->rn));
1752     s->base.is_jmp = DISAS_JUMP;
1753     return true;
1754 }
1755 
auth_branch_target(DisasContext * s,TCGv_i64 dst,TCGv_i64 modifier,bool use_key_a)1756 static TCGv_i64 auth_branch_target(DisasContext *s, TCGv_i64 dst,
1757                                    TCGv_i64 modifier, bool use_key_a)
1758 {
1759     TCGv_i64 truedst;
1760     /*
1761      * Return the branch target for a BRAA/RETA/etc, which is either
1762      * just the destination dst, or that value with the pauth check
1763      * done and the code removed from the high bits.
1764      */
1765     if (!s->pauth_active) {
1766         return dst;
1767     }
1768 
1769     truedst = tcg_temp_new_i64();
1770     if (use_key_a) {
1771         gen_helper_autia_combined(truedst, tcg_env, dst, modifier);
1772     } else {
1773         gen_helper_autib_combined(truedst, tcg_env, dst, modifier);
1774     }
1775     return truedst;
1776 }
1777 
trans_BRAZ(DisasContext * s,arg_braz * a)1778 static bool trans_BRAZ(DisasContext *s, arg_braz *a)
1779 {
1780     TCGv_i64 dst;
1781 
1782     if (!dc_isar_feature(aa64_pauth, s)) {
1783         return false;
1784     }
1785 
1786     dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1787     set_btype_for_br(s, a->rn);
1788     gen_a64_set_pc(s, dst);
1789     s->base.is_jmp = DISAS_JUMP;
1790     return true;
1791 }
1792 
trans_BLRAZ(DisasContext * s,arg_braz * a)1793 static bool trans_BLRAZ(DisasContext *s, arg_braz *a)
1794 {
1795     TCGv_i64 dst, lr;
1796 
1797     if (!dc_isar_feature(aa64_pauth, s)) {
1798         return false;
1799     }
1800 
1801     dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1802     lr = cpu_reg(s, 30);
1803     if (dst == lr) {
1804         TCGv_i64 tmp = tcg_temp_new_i64();
1805         tcg_gen_mov_i64(tmp, dst);
1806         dst = tmp;
1807     }
1808     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1809     gen_a64_set_pc(s, dst);
1810     set_btype_for_blr(s);
1811     s->base.is_jmp = DISAS_JUMP;
1812     return true;
1813 }
1814 
trans_RETA(DisasContext * s,arg_reta * a)1815 static bool trans_RETA(DisasContext *s, arg_reta *a)
1816 {
1817     TCGv_i64 dst;
1818 
1819     dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m);
1820     gen_a64_set_pc(s, dst);
1821     s->base.is_jmp = DISAS_JUMP;
1822     return true;
1823 }
1824 
trans_BRA(DisasContext * s,arg_bra * a)1825 static bool trans_BRA(DisasContext *s, arg_bra *a)
1826 {
1827     TCGv_i64 dst;
1828 
1829     if (!dc_isar_feature(aa64_pauth, s)) {
1830         return false;
1831     }
1832     dst = auth_branch_target(s, cpu_reg(s,a->rn), cpu_reg_sp(s, a->rm), !a->m);
1833     gen_a64_set_pc(s, dst);
1834     set_btype_for_br(s, a->rn);
1835     s->base.is_jmp = DISAS_JUMP;
1836     return true;
1837 }
1838 
trans_BLRA(DisasContext * s,arg_bra * a)1839 static bool trans_BLRA(DisasContext *s, arg_bra *a)
1840 {
1841     TCGv_i64 dst, lr;
1842 
1843     if (!dc_isar_feature(aa64_pauth, s)) {
1844         return false;
1845     }
1846     dst = auth_branch_target(s, cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm), !a->m);
1847     lr = cpu_reg(s, 30);
1848     if (dst == lr) {
1849         TCGv_i64 tmp = tcg_temp_new_i64();
1850         tcg_gen_mov_i64(tmp, dst);
1851         dst = tmp;
1852     }
1853     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1854     gen_a64_set_pc(s, dst);
1855     set_btype_for_blr(s);
1856     s->base.is_jmp = DISAS_JUMP;
1857     return true;
1858 }
1859 
trans_ERET(DisasContext * s,arg_ERET * a)1860 static bool trans_ERET(DisasContext *s, arg_ERET *a)
1861 {
1862     TCGv_i64 dst;
1863 
1864     if (s->current_el == 0) {
1865         return false;
1866     }
1867     if (s->trap_eret) {
1868         gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(0), 2);
1869         return true;
1870     }
1871     dst = tcg_temp_new_i64();
1872     tcg_gen_ld_i64(dst, tcg_env,
1873                    offsetof(CPUARMState, elr_el[s->current_el]));
1874 
1875     translator_io_start(&s->base);
1876 
1877     gen_helper_exception_return(tcg_env, dst);
1878     /* Must exit loop to check un-masked IRQs */
1879     s->base.is_jmp = DISAS_EXIT;
1880     return true;
1881 }
1882 
trans_ERETA(DisasContext * s,arg_reta * a)1883 static bool trans_ERETA(DisasContext *s, arg_reta *a)
1884 {
1885     TCGv_i64 dst;
1886 
1887     if (!dc_isar_feature(aa64_pauth, s)) {
1888         return false;
1889     }
1890     if (s->current_el == 0) {
1891         return false;
1892     }
1893     /* The FGT trap takes precedence over an auth trap. */
1894     if (s->trap_eret) {
1895         gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(a->m ? 3 : 2), 2);
1896         return true;
1897     }
1898     dst = tcg_temp_new_i64();
1899     tcg_gen_ld_i64(dst, tcg_env,
1900                    offsetof(CPUARMState, elr_el[s->current_el]));
1901 
1902     dst = auth_branch_target(s, dst, cpu_X[31], !a->m);
1903 
1904     translator_io_start(&s->base);
1905 
1906     gen_helper_exception_return(tcg_env, dst);
1907     /* Must exit loop to check un-masked IRQs */
1908     s->base.is_jmp = DISAS_EXIT;
1909     return true;
1910 }
1911 
trans_NOP(DisasContext * s,arg_NOP * a)1912 static bool trans_NOP(DisasContext *s, arg_NOP *a)
1913 {
1914     return true;
1915 }
1916 
trans_YIELD(DisasContext * s,arg_YIELD * a)1917 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
1918 {
1919     /*
1920      * When running in MTTCG we don't generate jumps to the yield and
1921      * WFE helpers as it won't affect the scheduling of other vCPUs.
1922      * If we wanted to more completely model WFE/SEV so we don't busy
1923      * spin unnecessarily we would need to do something more involved.
1924      */
1925     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1926         s->base.is_jmp = DISAS_YIELD;
1927     }
1928     return true;
1929 }
1930 
trans_WFI(DisasContext * s,arg_WFI * a)1931 static bool trans_WFI(DisasContext *s, arg_WFI *a)
1932 {
1933     s->base.is_jmp = DISAS_WFI;
1934     return true;
1935 }
1936 
trans_WFE(DisasContext * s,arg_WFI * a)1937 static bool trans_WFE(DisasContext *s, arg_WFI *a)
1938 {
1939     /*
1940      * When running in MTTCG we don't generate jumps to the yield and
1941      * WFE helpers as it won't affect the scheduling of other vCPUs.
1942      * If we wanted to more completely model WFE/SEV so we don't busy
1943      * spin unnecessarily we would need to do something more involved.
1944      */
1945     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1946         s->base.is_jmp = DISAS_WFE;
1947     }
1948     return true;
1949 }
1950 
trans_WFIT(DisasContext * s,arg_WFIT * a)1951 static bool trans_WFIT(DisasContext *s, arg_WFIT *a)
1952 {
1953     if (!dc_isar_feature(aa64_wfxt, s)) {
1954         return false;
1955     }
1956 
1957     /*
1958      * Because we need to pass the register value to the helper,
1959      * it's easier to emit the code now, unlike trans_WFI which
1960      * defers it to aarch64_tr_tb_stop(). That means we need to
1961      * check ss_active so that single-stepping a WFIT doesn't halt.
1962      */
1963     if (s->ss_active) {
1964         /* Act like a NOP under architectural singlestep */
1965         return true;
1966     }
1967 
1968     gen_a64_update_pc(s, 4);
1969     gen_helper_wfit(tcg_env, cpu_reg(s, a->rd));
1970     /* Go back to the main loop to check for interrupts */
1971     s->base.is_jmp = DISAS_EXIT;
1972     return true;
1973 }
1974 
trans_WFET(DisasContext * s,arg_WFET * a)1975 static bool trans_WFET(DisasContext *s, arg_WFET *a)
1976 {
1977     if (!dc_isar_feature(aa64_wfxt, s)) {
1978         return false;
1979     }
1980 
1981     /*
1982      * We rely here on our WFE implementation being a NOP, so we
1983      * don't need to do anything different to handle the WFET timeout
1984      * from what trans_WFE does.
1985      */
1986     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1987         s->base.is_jmp = DISAS_WFE;
1988     }
1989     return true;
1990 }
1991 
trans_XPACLRI(DisasContext * s,arg_XPACLRI * a)1992 static bool trans_XPACLRI(DisasContext *s, arg_XPACLRI *a)
1993 {
1994     if (s->pauth_active) {
1995         gen_helper_xpaci(cpu_X[30], tcg_env, cpu_X[30]);
1996     }
1997     return true;
1998 }
1999 
trans_PACIA1716(DisasContext * s,arg_PACIA1716 * a)2000 static bool trans_PACIA1716(DisasContext *s, arg_PACIA1716 *a)
2001 {
2002     if (s->pauth_active) {
2003         gen_helper_pacia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
2004     }
2005     return true;
2006 }
2007 
trans_PACIB1716(DisasContext * s,arg_PACIB1716 * a)2008 static bool trans_PACIB1716(DisasContext *s, arg_PACIB1716 *a)
2009 {
2010     if (s->pauth_active) {
2011         gen_helper_pacib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
2012     }
2013     return true;
2014 }
2015 
trans_AUTIA1716(DisasContext * s,arg_AUTIA1716 * a)2016 static bool trans_AUTIA1716(DisasContext *s, arg_AUTIA1716 *a)
2017 {
2018     if (s->pauth_active) {
2019         gen_helper_autia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
2020     }
2021     return true;
2022 }
2023 
trans_AUTIB1716(DisasContext * s,arg_AUTIB1716 * a)2024 static bool trans_AUTIB1716(DisasContext *s, arg_AUTIB1716 *a)
2025 {
2026     if (s->pauth_active) {
2027         gen_helper_autib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
2028     }
2029     return true;
2030 }
2031 
trans_ESB(DisasContext * s,arg_ESB * a)2032 static bool trans_ESB(DisasContext *s, arg_ESB *a)
2033 {
2034     /* Without RAS, we must implement this as NOP. */
2035     if (dc_isar_feature(aa64_ras, s)) {
2036         /*
2037          * QEMU does not have a source of physical SErrors,
2038          * so we are only concerned with virtual SErrors.
2039          * The pseudocode in the ARM for this case is
2040          *   if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
2041          *      AArch64.vESBOperation();
2042          * Most of the condition can be evaluated at translation time.
2043          * Test for EL2 present, and defer test for SEL2 to runtime.
2044          */
2045         if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
2046             gen_helper_vesb(tcg_env);
2047         }
2048     }
2049     return true;
2050 }
2051 
trans_PACIAZ(DisasContext * s,arg_PACIAZ * a)2052 static bool trans_PACIAZ(DisasContext *s, arg_PACIAZ *a)
2053 {
2054     if (s->pauth_active) {
2055         gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
2056     }
2057     return true;
2058 }
2059 
trans_PACIASP(DisasContext * s,arg_PACIASP * a)2060 static bool trans_PACIASP(DisasContext *s, arg_PACIASP *a)
2061 {
2062     if (s->pauth_active) {
2063         gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
2064     }
2065     return true;
2066 }
2067 
trans_PACIBZ(DisasContext * s,arg_PACIBZ * a)2068 static bool trans_PACIBZ(DisasContext *s, arg_PACIBZ *a)
2069 {
2070     if (s->pauth_active) {
2071         gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
2072     }
2073     return true;
2074 }
2075 
trans_PACIBSP(DisasContext * s,arg_PACIBSP * a)2076 static bool trans_PACIBSP(DisasContext *s, arg_PACIBSP *a)
2077 {
2078     if (s->pauth_active) {
2079         gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
2080     }
2081     return true;
2082 }
2083 
trans_AUTIAZ(DisasContext * s,arg_AUTIAZ * a)2084 static bool trans_AUTIAZ(DisasContext *s, arg_AUTIAZ *a)
2085 {
2086     if (s->pauth_active) {
2087         gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
2088     }
2089     return true;
2090 }
2091 
trans_AUTIASP(DisasContext * s,arg_AUTIASP * a)2092 static bool trans_AUTIASP(DisasContext *s, arg_AUTIASP *a)
2093 {
2094     if (s->pauth_active) {
2095         gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
2096     }
2097     return true;
2098 }
2099 
trans_AUTIBZ(DisasContext * s,arg_AUTIBZ * a)2100 static bool trans_AUTIBZ(DisasContext *s, arg_AUTIBZ *a)
2101 {
2102     if (s->pauth_active) {
2103         gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
2104     }
2105     return true;
2106 }
2107 
trans_AUTIBSP(DisasContext * s,arg_AUTIBSP * a)2108 static bool trans_AUTIBSP(DisasContext *s, arg_AUTIBSP *a)
2109 {
2110     if (s->pauth_active) {
2111         gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
2112     }
2113     return true;
2114 }
2115 
trans_CLREX(DisasContext * s,arg_CLREX * a)2116 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
2117 {
2118     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
2119     return true;
2120 }
2121 
trans_DSB_DMB(DisasContext * s,arg_DSB_DMB * a)2122 static bool trans_DSB_DMB(DisasContext *s, arg_DSB_DMB *a)
2123 {
2124     /* We handle DSB and DMB the same way */
2125     TCGBar bar;
2126 
2127     switch (a->types) {
2128     case 1: /* MBReqTypes_Reads */
2129         bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
2130         break;
2131     case 2: /* MBReqTypes_Writes */
2132         bar = TCG_BAR_SC | TCG_MO_ST_ST;
2133         break;
2134     default: /* MBReqTypes_All */
2135         bar = TCG_BAR_SC | TCG_MO_ALL;
2136         break;
2137     }
2138     tcg_gen_mb(bar);
2139     return true;
2140 }
2141 
trans_DSB_nXS(DisasContext * s,arg_DSB_nXS * a)2142 static bool trans_DSB_nXS(DisasContext *s, arg_DSB_nXS *a)
2143 {
2144     if (!dc_isar_feature(aa64_xs, s)) {
2145         return false;
2146     }
2147     tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL);
2148     return true;
2149 }
2150 
trans_ISB(DisasContext * s,arg_ISB * a)2151 static bool trans_ISB(DisasContext *s, arg_ISB *a)
2152 {
2153     /*
2154      * We need to break the TB after this insn to execute
2155      * self-modifying code correctly and also to take
2156      * any pending interrupts immediately.
2157      */
2158     reset_btype(s);
2159     gen_goto_tb(s, 0, 4);
2160     return true;
2161 }
2162 
trans_SB(DisasContext * s,arg_SB * a)2163 static bool trans_SB(DisasContext *s, arg_SB *a)
2164 {
2165     if (!dc_isar_feature(aa64_sb, s)) {
2166         return false;
2167     }
2168     /*
2169      * TODO: There is no speculation barrier opcode for TCG;
2170      * MB and end the TB instead.
2171      */
2172     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
2173     gen_goto_tb(s, 0, 4);
2174     return true;
2175 }
2176 
trans_CFINV(DisasContext * s,arg_CFINV * a)2177 static bool trans_CFINV(DisasContext *s, arg_CFINV *a)
2178 {
2179     if (!dc_isar_feature(aa64_condm_4, s)) {
2180         return false;
2181     }
2182     tcg_gen_xori_i32(cpu_CF, cpu_CF, 1);
2183     return true;
2184 }
2185 
trans_XAFLAG(DisasContext * s,arg_XAFLAG * a)2186 static bool trans_XAFLAG(DisasContext *s, arg_XAFLAG *a)
2187 {
2188     TCGv_i32 z;
2189 
2190     if (!dc_isar_feature(aa64_condm_5, s)) {
2191         return false;
2192     }
2193 
2194     z = tcg_temp_new_i32();
2195 
2196     tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0);
2197 
2198     /*
2199      * (!C & !Z) << 31
2200      * (!(C | Z)) << 31
2201      * ~((C | Z) << 31)
2202      * ~-(C | Z)
2203      * (C | Z) - 1
2204      */
2205     tcg_gen_or_i32(cpu_NF, cpu_CF, z);
2206     tcg_gen_subi_i32(cpu_NF, cpu_NF, 1);
2207 
2208     /* !(Z & C) */
2209     tcg_gen_and_i32(cpu_ZF, z, cpu_CF);
2210     tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1);
2211 
2212     /* (!C & Z) << 31 -> -(Z & ~C) */
2213     tcg_gen_andc_i32(cpu_VF, z, cpu_CF);
2214     tcg_gen_neg_i32(cpu_VF, cpu_VF);
2215 
2216     /* C | Z */
2217     tcg_gen_or_i32(cpu_CF, cpu_CF, z);
2218 
2219     return true;
2220 }
2221 
trans_AXFLAG(DisasContext * s,arg_AXFLAG * a)2222 static bool trans_AXFLAG(DisasContext *s, arg_AXFLAG *a)
2223 {
2224     if (!dc_isar_feature(aa64_condm_5, s)) {
2225         return false;
2226     }
2227 
2228     tcg_gen_sari_i32(cpu_VF, cpu_VF, 31);         /* V ? -1 : 0 */
2229     tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF);     /* C & !V */
2230 
2231     /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */
2232     tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF);
2233 
2234     tcg_gen_movi_i32(cpu_NF, 0);
2235     tcg_gen_movi_i32(cpu_VF, 0);
2236 
2237     return true;
2238 }
2239 
trans_MSR_i_UAO(DisasContext * s,arg_i * a)2240 static bool trans_MSR_i_UAO(DisasContext *s, arg_i *a)
2241 {
2242     if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) {
2243         return false;
2244     }
2245     if (a->imm & 1) {
2246         set_pstate_bits(PSTATE_UAO);
2247     } else {
2248         clear_pstate_bits(PSTATE_UAO);
2249     }
2250     gen_rebuild_hflags(s);
2251     s->base.is_jmp = DISAS_TOO_MANY;
2252     return true;
2253 }
2254 
trans_MSR_i_PAN(DisasContext * s,arg_i * a)2255 static bool trans_MSR_i_PAN(DisasContext *s, arg_i *a)
2256 {
2257     if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) {
2258         return false;
2259     }
2260     if (a->imm & 1) {
2261         set_pstate_bits(PSTATE_PAN);
2262     } else {
2263         clear_pstate_bits(PSTATE_PAN);
2264     }
2265     gen_rebuild_hflags(s);
2266     s->base.is_jmp = DISAS_TOO_MANY;
2267     return true;
2268 }
2269 
trans_MSR_i_SPSEL(DisasContext * s,arg_i * a)2270 static bool trans_MSR_i_SPSEL(DisasContext *s, arg_i *a)
2271 {
2272     if (s->current_el == 0) {
2273         return false;
2274     }
2275     gen_helper_msr_i_spsel(tcg_env, tcg_constant_i32(a->imm & PSTATE_SP));
2276     s->base.is_jmp = DISAS_TOO_MANY;
2277     return true;
2278 }
2279 
trans_MSR_i_SBSS(DisasContext * s,arg_i * a)2280 static bool trans_MSR_i_SBSS(DisasContext *s, arg_i *a)
2281 {
2282     if (!dc_isar_feature(aa64_ssbs, s)) {
2283         return false;
2284     }
2285     if (a->imm & 1) {
2286         set_pstate_bits(PSTATE_SSBS);
2287     } else {
2288         clear_pstate_bits(PSTATE_SSBS);
2289     }
2290     /* Don't need to rebuild hflags since SSBS is a nop */
2291     s->base.is_jmp = DISAS_TOO_MANY;
2292     return true;
2293 }
2294 
trans_MSR_i_DIT(DisasContext * s,arg_i * a)2295 static bool trans_MSR_i_DIT(DisasContext *s, arg_i *a)
2296 {
2297     if (!dc_isar_feature(aa64_dit, s)) {
2298         return false;
2299     }
2300     if (a->imm & 1) {
2301         set_pstate_bits(PSTATE_DIT);
2302     } else {
2303         clear_pstate_bits(PSTATE_DIT);
2304     }
2305     /* There's no need to rebuild hflags because DIT is a nop */
2306     s->base.is_jmp = DISAS_TOO_MANY;
2307     return true;
2308 }
2309 
trans_MSR_i_TCO(DisasContext * s,arg_i * a)2310 static bool trans_MSR_i_TCO(DisasContext *s, arg_i *a)
2311 {
2312     if (dc_isar_feature(aa64_mte, s)) {
2313         /* Full MTE is enabled -- set the TCO bit as directed. */
2314         if (a->imm & 1) {
2315             set_pstate_bits(PSTATE_TCO);
2316         } else {
2317             clear_pstate_bits(PSTATE_TCO);
2318         }
2319         gen_rebuild_hflags(s);
2320         /* Many factors, including TCO, go into MTE_ACTIVE. */
2321         s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
2322         return true;
2323     } else if (dc_isar_feature(aa64_mte_insn_reg, s)) {
2324         /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI.  */
2325         return true;
2326     } else {
2327         /* Insn not present */
2328         return false;
2329     }
2330 }
2331 
trans_MSR_i_DAIFSET(DisasContext * s,arg_i * a)2332 static bool trans_MSR_i_DAIFSET(DisasContext *s, arg_i *a)
2333 {
2334     gen_helper_msr_i_daifset(tcg_env, tcg_constant_i32(a->imm));
2335     s->base.is_jmp = DISAS_TOO_MANY;
2336     return true;
2337 }
2338 
trans_MSR_i_DAIFCLEAR(DisasContext * s,arg_i * a)2339 static bool trans_MSR_i_DAIFCLEAR(DisasContext *s, arg_i *a)
2340 {
2341     gen_helper_msr_i_daifclear(tcg_env, tcg_constant_i32(a->imm));
2342     /* Exit the cpu loop to re-evaluate pending IRQs. */
2343     s->base.is_jmp = DISAS_UPDATE_EXIT;
2344     return true;
2345 }
2346 
trans_MSR_i_ALLINT(DisasContext * s,arg_i * a)2347 static bool trans_MSR_i_ALLINT(DisasContext *s, arg_i *a)
2348 {
2349     if (!dc_isar_feature(aa64_nmi, s) || s->current_el == 0) {
2350         return false;
2351     }
2352 
2353     if (a->imm == 0) {
2354         clear_pstate_bits(PSTATE_ALLINT);
2355     } else if (s->current_el > 1) {
2356         set_pstate_bits(PSTATE_ALLINT);
2357     } else {
2358         gen_helper_msr_set_allint_el1(tcg_env);
2359     }
2360 
2361     /* Exit the cpu loop to re-evaluate pending IRQs. */
2362     s->base.is_jmp = DISAS_UPDATE_EXIT;
2363     return true;
2364 }
2365 
trans_MSR_i_SVCR(DisasContext * s,arg_MSR_i_SVCR * a)2366 static bool trans_MSR_i_SVCR(DisasContext *s, arg_MSR_i_SVCR *a)
2367 {
2368     if (!dc_isar_feature(aa64_sme, s) || a->mask == 0) {
2369         return false;
2370     }
2371     if (sme_access_check(s)) {
2372         int old = s->pstate_sm | (s->pstate_za << 1);
2373         int new = a->imm * 3;
2374 
2375         if ((old ^ new) & a->mask) {
2376             /* At least one bit changes. */
2377             gen_helper_set_svcr(tcg_env, tcg_constant_i32(new),
2378                                 tcg_constant_i32(a->mask));
2379             s->base.is_jmp = DISAS_TOO_MANY;
2380         }
2381     }
2382     return true;
2383 }
2384 
gen_get_nzcv(TCGv_i64 tcg_rt)2385 static void gen_get_nzcv(TCGv_i64 tcg_rt)
2386 {
2387     TCGv_i32 tmp = tcg_temp_new_i32();
2388     TCGv_i32 nzcv = tcg_temp_new_i32();
2389 
2390     /* build bit 31, N */
2391     tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
2392     /* build bit 30, Z */
2393     tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
2394     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
2395     /* build bit 29, C */
2396     tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
2397     /* build bit 28, V */
2398     tcg_gen_shri_i32(tmp, cpu_VF, 31);
2399     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
2400     /* generate result */
2401     tcg_gen_extu_i32_i64(tcg_rt, nzcv);
2402 }
2403 
gen_set_nzcv(TCGv_i64 tcg_rt)2404 static void gen_set_nzcv(TCGv_i64 tcg_rt)
2405 {
2406     TCGv_i32 nzcv = tcg_temp_new_i32();
2407 
2408     /* take NZCV from R[t] */
2409     tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
2410 
2411     /* bit 31, N */
2412     tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
2413     /* bit 30, Z */
2414     tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
2415     tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
2416     /* bit 29, C */
2417     tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
2418     tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
2419     /* bit 28, V */
2420     tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
2421     tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
2422 }
2423 
gen_sysreg_undef(DisasContext * s,bool isread,uint8_t op0,uint8_t op1,uint8_t op2,uint8_t crn,uint8_t crm,uint8_t rt)2424 static void gen_sysreg_undef(DisasContext *s, bool isread,
2425                              uint8_t op0, uint8_t op1, uint8_t op2,
2426                              uint8_t crn, uint8_t crm, uint8_t rt)
2427 {
2428     /*
2429      * Generate code to emit an UNDEF with correct syndrome
2430      * information for a failed system register access.
2431      * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases,
2432      * but if FEAT_IDST is implemented then read accesses to registers
2433      * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP
2434      * syndrome.
2435      */
2436     uint32_t syndrome;
2437 
2438     if (isread && dc_isar_feature(aa64_ids, s) &&
2439         arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) {
2440         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2441     } else {
2442         syndrome = syn_uncategorized();
2443     }
2444     gen_exception_insn(s, 0, EXCP_UDEF, syndrome);
2445 }
2446 
2447 /* MRS - move from system register
2448  * MSR (register) - move to system register
2449  * SYS
2450  * SYSL
2451  * These are all essentially the same insn in 'read' and 'write'
2452  * versions, with varying op0 fields.
2453  */
handle_sys(DisasContext * s,bool isread,unsigned int op0,unsigned int op1,unsigned int op2,unsigned int crn,unsigned int crm,unsigned int rt)2454 static void handle_sys(DisasContext *s, bool isread,
2455                        unsigned int op0, unsigned int op1, unsigned int op2,
2456                        unsigned int crn, unsigned int crm, unsigned int rt)
2457 {
2458     uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
2459                                       crn, crm, op0, op1, op2);
2460     const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
2461     bool need_exit_tb = false;
2462     bool nv_trap_to_el2 = false;
2463     bool nv_redirect_reg = false;
2464     bool skip_fp_access_checks = false;
2465     bool nv2_mem_redirect = false;
2466     TCGv_ptr tcg_ri = NULL;
2467     TCGv_i64 tcg_rt;
2468     uint32_t syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2469 
2470     if (crn == 11 || crn == 15) {
2471         /*
2472          * Check for TIDCP trap, which must take precedence over
2473          * the UNDEF for "no such register" etc.
2474          */
2475         switch (s->current_el) {
2476         case 0:
2477             if (dc_isar_feature(aa64_tidcp1, s)) {
2478                 gen_helper_tidcp_el0(tcg_env, tcg_constant_i32(syndrome));
2479             }
2480             break;
2481         case 1:
2482             gen_helper_tidcp_el1(tcg_env, tcg_constant_i32(syndrome));
2483             break;
2484         }
2485     }
2486 
2487     if (!ri) {
2488         /* Unknown register; this might be a guest error or a QEMU
2489          * unimplemented feature.
2490          */
2491         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
2492                       "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
2493                       isread ? "read" : "write", op0, op1, crn, crm, op2);
2494         gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2495         return;
2496     }
2497 
2498     if (s->nv2 && ri->nv2_redirect_offset) {
2499         /*
2500          * Some registers always redirect to memory; some only do so if
2501          * HCR_EL2.NV1 is 0, and some only if NV1 is 1 (these come in
2502          * pairs which share an offset; see the table in R_CSRPQ).
2503          */
2504         if (ri->nv2_redirect_offset & NV2_REDIR_NV1) {
2505             nv2_mem_redirect = s->nv1;
2506         } else if (ri->nv2_redirect_offset & NV2_REDIR_NO_NV1) {
2507             nv2_mem_redirect = !s->nv1;
2508         } else {
2509             nv2_mem_redirect = true;
2510         }
2511     }
2512 
2513     /* Check access permissions */
2514     if (!cp_access_ok(s->current_el, ri, isread)) {
2515         /*
2516          * FEAT_NV/NV2 handling does not do the usual FP access checks
2517          * for registers only accessible at EL2 (though it *does* do them
2518          * for registers accessible at EL1).
2519          */
2520         skip_fp_access_checks = true;
2521         if (s->nv2 && (ri->type & ARM_CP_NV2_REDIRECT)) {
2522             /*
2523              * This is one of the few EL2 registers which should redirect
2524              * to the equivalent EL1 register. We do that after running
2525              * the EL2 register's accessfn.
2526              */
2527             nv_redirect_reg = true;
2528             assert(!nv2_mem_redirect);
2529         } else if (nv2_mem_redirect) {
2530             /*
2531              * NV2 redirect-to-memory takes precedence over trap to EL2 or
2532              * UNDEF to EL1.
2533              */
2534         } else if (s->nv && arm_cpreg_traps_in_nv(ri)) {
2535             /*
2536              * This register / instruction exists and is an EL2 register, so
2537              * we must trap to EL2 if accessed in nested virtualization EL1
2538              * instead of UNDEFing. We'll do that after the usual access checks.
2539              * (This makes a difference only for a couple of registers like
2540              * VSTTBR_EL2 where the "UNDEF if NonSecure" should take priority
2541              * over the trap-to-EL2. Most trapped-by-FEAT_NV registers have
2542              * an accessfn which does nothing when called from EL1, because
2543              * the trap-to-EL3 controls which would apply to that register
2544              * at EL2 don't take priority over the FEAT_NV trap-to-EL2.)
2545              */
2546             nv_trap_to_el2 = true;
2547         } else {
2548             gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2549             return;
2550         }
2551     }
2552 
2553     if (ri->accessfn || (ri->fgt && s->fgt_active)) {
2554         /* Emit code to perform further access permissions checks at
2555          * runtime; this may result in an exception.
2556          */
2557         gen_a64_update_pc(s, 0);
2558         tcg_ri = tcg_temp_new_ptr();
2559         gen_helper_access_check_cp_reg(tcg_ri, tcg_env,
2560                                        tcg_constant_i32(key),
2561                                        tcg_constant_i32(syndrome),
2562                                        tcg_constant_i32(isread));
2563     } else if (ri->type & ARM_CP_RAISES_EXC) {
2564         /*
2565          * The readfn or writefn might raise an exception;
2566          * synchronize the CPU state in case it does.
2567          */
2568         gen_a64_update_pc(s, 0);
2569     }
2570 
2571     if (!skip_fp_access_checks) {
2572         if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) {
2573             return;
2574         } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
2575             return;
2576         } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) {
2577             return;
2578         }
2579     }
2580 
2581     if (nv_trap_to_el2) {
2582         gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
2583         return;
2584     }
2585 
2586     if (nv_redirect_reg) {
2587         /*
2588          * FEAT_NV2 redirection of an EL2 register to an EL1 register.
2589          * Conveniently in all cases the encoding of the EL1 register is
2590          * identical to the EL2 register except that opc1 is 0.
2591          * Get the reginfo for the EL1 register to use for the actual access.
2592          * We don't use the EL1 register's access function, and
2593          * fine-grained-traps on EL1 also do not apply here.
2594          */
2595         key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
2596                                  crn, crm, op0, 0, op2);
2597         ri = get_arm_cp_reginfo(s->cp_regs, key);
2598         assert(ri);
2599         assert(cp_access_ok(s->current_el, ri, isread));
2600         /*
2601          * We might not have done an update_pc earlier, so check we don't
2602          * need it. We could support this in future if necessary.
2603          */
2604         assert(!(ri->type & ARM_CP_RAISES_EXC));
2605     }
2606 
2607     if (nv2_mem_redirect) {
2608         /*
2609          * This system register is being redirected into an EL2 memory access.
2610          * This means it is not an IO operation, doesn't change hflags,
2611          * and need not end the TB, because it has no side effects.
2612          *
2613          * The access is 64-bit single copy atomic, guaranteed aligned because
2614          * of the definition of VCNR_EL2. Its endianness depends on
2615          * SCTLR_EL2.EE, not on the data endianness of EL1.
2616          * It is done under either the EL2 translation regime or the EL2&0
2617          * translation regime, depending on HCR_EL2.E2H. It behaves as if
2618          * PSTATE.PAN is 0.
2619          */
2620         TCGv_i64 ptr = tcg_temp_new_i64();
2621         MemOp mop = MO_64 | MO_ALIGN | MO_ATOM_IFALIGN;
2622         ARMMMUIdx armmemidx = s->nv2_mem_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_E2;
2623         int memidx = arm_to_core_mmu_idx(armmemidx);
2624         uint32_t syn;
2625 
2626         mop |= (s->nv2_mem_be ? MO_BE : MO_LE);
2627 
2628         tcg_gen_ld_i64(ptr, tcg_env, offsetof(CPUARMState, cp15.vncr_el2));
2629         tcg_gen_addi_i64(ptr, ptr,
2630                          (ri->nv2_redirect_offset & ~NV2_REDIR_FLAG_MASK));
2631         tcg_rt = cpu_reg(s, rt);
2632 
2633         syn = syn_data_abort_vncr(0, !isread, 0);
2634         disas_set_insn_syndrome(s, syn);
2635         if (isread) {
2636             tcg_gen_qemu_ld_i64(tcg_rt, ptr, memidx, mop);
2637         } else {
2638             tcg_gen_qemu_st_i64(tcg_rt, ptr, memidx, mop);
2639         }
2640         return;
2641     }
2642 
2643     /* Handle special cases first */
2644     switch (ri->type & ARM_CP_SPECIAL_MASK) {
2645     case 0:
2646         break;
2647     case ARM_CP_NOP:
2648         return;
2649     case ARM_CP_NZCV:
2650         tcg_rt = cpu_reg(s, rt);
2651         if (isread) {
2652             gen_get_nzcv(tcg_rt);
2653         } else {
2654             gen_set_nzcv(tcg_rt);
2655         }
2656         return;
2657     case ARM_CP_CURRENTEL:
2658     {
2659         /*
2660          * Reads as current EL value from pstate, which is
2661          * guaranteed to be constant by the tb flags.
2662          * For nested virt we should report EL2.
2663          */
2664         int el = s->nv ? 2 : s->current_el;
2665         tcg_rt = cpu_reg(s, rt);
2666         tcg_gen_movi_i64(tcg_rt, el << 2);
2667         return;
2668     }
2669     case ARM_CP_DC_ZVA:
2670         /* Writes clear the aligned block of memory which rt points into. */
2671         if (s->mte_active[0]) {
2672             int desc = 0;
2673 
2674             desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
2675             desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
2676             desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
2677 
2678             tcg_rt = tcg_temp_new_i64();
2679             gen_helper_mte_check_zva(tcg_rt, tcg_env,
2680                                      tcg_constant_i32(desc), cpu_reg(s, rt));
2681         } else {
2682             tcg_rt = clean_data_tbi(s, cpu_reg(s, rt));
2683         }
2684         gen_helper_dc_zva(tcg_env, tcg_rt);
2685         return;
2686     case ARM_CP_DC_GVA:
2687         {
2688             TCGv_i64 clean_addr, tag;
2689 
2690             /*
2691              * DC_GVA, like DC_ZVA, requires that we supply the original
2692              * pointer for an invalid page.  Probe that address first.
2693              */
2694             tcg_rt = cpu_reg(s, rt);
2695             clean_addr = clean_data_tbi(s, tcg_rt);
2696             gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8);
2697 
2698             if (s->ata[0]) {
2699                 /* Extract the tag from the register to match STZGM.  */
2700                 tag = tcg_temp_new_i64();
2701                 tcg_gen_shri_i64(tag, tcg_rt, 56);
2702                 gen_helper_stzgm_tags(tcg_env, clean_addr, tag);
2703             }
2704         }
2705         return;
2706     case ARM_CP_DC_GZVA:
2707         {
2708             TCGv_i64 clean_addr, tag;
2709 
2710             /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */
2711             tcg_rt = cpu_reg(s, rt);
2712             clean_addr = clean_data_tbi(s, tcg_rt);
2713             gen_helper_dc_zva(tcg_env, clean_addr);
2714 
2715             if (s->ata[0]) {
2716                 /* Extract the tag from the register to match STZGM.  */
2717                 tag = tcg_temp_new_i64();
2718                 tcg_gen_shri_i64(tag, tcg_rt, 56);
2719                 gen_helper_stzgm_tags(tcg_env, clean_addr, tag);
2720             }
2721         }
2722         return;
2723     default:
2724         g_assert_not_reached();
2725     }
2726 
2727     if (ri->type & ARM_CP_IO) {
2728         /* I/O operations must end the TB here (whether read or write) */
2729         need_exit_tb = translator_io_start(&s->base);
2730     }
2731 
2732     tcg_rt = cpu_reg(s, rt);
2733 
2734     if (isread) {
2735         if (ri->type & ARM_CP_CONST) {
2736             tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
2737         } else if (ri->readfn) {
2738             if (!tcg_ri) {
2739                 tcg_ri = gen_lookup_cp_reg(key);
2740             }
2741             gen_helper_get_cp_reg64(tcg_rt, tcg_env, tcg_ri);
2742         } else {
2743             tcg_gen_ld_i64(tcg_rt, tcg_env, ri->fieldoffset);
2744         }
2745     } else {
2746         if (ri->type & ARM_CP_CONST) {
2747             /* If not forbidden by access permissions, treat as WI */
2748             return;
2749         } else if (ri->writefn) {
2750             if (!tcg_ri) {
2751                 tcg_ri = gen_lookup_cp_reg(key);
2752             }
2753             gen_helper_set_cp_reg64(tcg_env, tcg_ri, tcg_rt);
2754         } else {
2755             tcg_gen_st_i64(tcg_rt, tcg_env, ri->fieldoffset);
2756         }
2757     }
2758 
2759     if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
2760         /*
2761          * A write to any coprocessor register that ends a TB
2762          * must rebuild the hflags for the next TB.
2763          */
2764         gen_rebuild_hflags(s);
2765         /*
2766          * We default to ending the TB on a coprocessor register write,
2767          * but allow this to be suppressed by the register definition
2768          * (usually only necessary to work around guest bugs).
2769          */
2770         need_exit_tb = true;
2771     }
2772     if (need_exit_tb) {
2773         s->base.is_jmp = DISAS_UPDATE_EXIT;
2774     }
2775 }
2776 
trans_SYS(DisasContext * s,arg_SYS * a)2777 static bool trans_SYS(DisasContext *s, arg_SYS *a)
2778 {
2779     handle_sys(s, a->l, a->op0, a->op1, a->op2, a->crn, a->crm, a->rt);
2780     return true;
2781 }
2782 
trans_SVC(DisasContext * s,arg_i * a)2783 static bool trans_SVC(DisasContext *s, arg_i *a)
2784 {
2785     /*
2786      * For SVC, HVC and SMC we advance the single-step state
2787      * machine before taking the exception. This is architecturally
2788      * mandated, to ensure that single-stepping a system call
2789      * instruction works properly.
2790      */
2791     uint32_t syndrome = syn_aa64_svc(a->imm);
2792     if (s->fgt_svc) {
2793         gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
2794         return true;
2795     }
2796     gen_ss_advance(s);
2797     gen_exception_insn(s, 4, EXCP_SWI, syndrome);
2798     return true;
2799 }
2800 
trans_HVC(DisasContext * s,arg_i * a)2801 static bool trans_HVC(DisasContext *s, arg_i *a)
2802 {
2803     int target_el = s->current_el == 3 ? 3 : 2;
2804 
2805     if (s->current_el == 0) {
2806         unallocated_encoding(s);
2807         return true;
2808     }
2809     /*
2810      * The pre HVC helper handles cases when HVC gets trapped
2811      * as an undefined insn by runtime configuration.
2812      */
2813     gen_a64_update_pc(s, 0);
2814     gen_helper_pre_hvc(tcg_env);
2815     /* Architecture requires ss advance before we do the actual work */
2816     gen_ss_advance(s);
2817     gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(a->imm), target_el);
2818     return true;
2819 }
2820 
trans_SMC(DisasContext * s,arg_i * a)2821 static bool trans_SMC(DisasContext *s, arg_i *a)
2822 {
2823     if (s->current_el == 0) {
2824         unallocated_encoding(s);
2825         return true;
2826     }
2827     gen_a64_update_pc(s, 0);
2828     gen_helper_pre_smc(tcg_env, tcg_constant_i32(syn_aa64_smc(a->imm)));
2829     /* Architecture requires ss advance before we do the actual work */
2830     gen_ss_advance(s);
2831     gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(a->imm), 3);
2832     return true;
2833 }
2834 
trans_BRK(DisasContext * s,arg_i * a)2835 static bool trans_BRK(DisasContext *s, arg_i *a)
2836 {
2837     gen_exception_bkpt_insn(s, syn_aa64_bkpt(a->imm));
2838     return true;
2839 }
2840 
trans_HLT(DisasContext * s,arg_i * a)2841 static bool trans_HLT(DisasContext *s, arg_i *a)
2842 {
2843     /*
2844      * HLT. This has two purposes.
2845      * Architecturally, it is an external halting debug instruction.
2846      * Since QEMU doesn't implement external debug, we treat this as
2847      * it is required for halting debug disabled: it will UNDEF.
2848      * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
2849      */
2850     if (semihosting_enabled(s->current_el == 0) && a->imm == 0xf000) {
2851         gen_exception_internal_insn(s, EXCP_SEMIHOST);
2852     } else {
2853         unallocated_encoding(s);
2854     }
2855     return true;
2856 }
2857 
2858 /*
2859  * Load/Store exclusive instructions are implemented by remembering
2860  * the value/address loaded, and seeing if these are the same
2861  * when the store is performed. This is not actually the architecturally
2862  * mandated semantics, but it works for typical guest code sequences
2863  * and avoids having to monitor regular stores.
2864  *
2865  * The store exclusive uses the atomic cmpxchg primitives to avoid
2866  * races in multi-threaded linux-user and when MTTCG softmmu is
2867  * enabled.
2868  */
gen_load_exclusive(DisasContext * s,int rt,int rt2,int rn,int size,bool is_pair)2869 static void gen_load_exclusive(DisasContext *s, int rt, int rt2, int rn,
2870                                int size, bool is_pair)
2871 {
2872     int idx = get_mem_index(s);
2873     TCGv_i64 dirty_addr, clean_addr;
2874     MemOp memop = check_atomic_align(s, rn, size + is_pair);
2875 
2876     s->is_ldex = true;
2877     dirty_addr = cpu_reg_sp(s, rn);
2878     clean_addr = gen_mte_check1(s, dirty_addr, false, rn != 31, memop);
2879 
2880     g_assert(size <= 3);
2881     if (is_pair) {
2882         g_assert(size >= 2);
2883         if (size == 2) {
2884             tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
2885             if (s->be_data == MO_LE) {
2886                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
2887                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
2888             } else {
2889                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
2890                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
2891             }
2892         } else {
2893             TCGv_i128 t16 = tcg_temp_new_i128();
2894 
2895             tcg_gen_qemu_ld_i128(t16, clean_addr, idx, memop);
2896 
2897             if (s->be_data == MO_LE) {
2898                 tcg_gen_extr_i128_i64(cpu_exclusive_val,
2899                                       cpu_exclusive_high, t16);
2900             } else {
2901                 tcg_gen_extr_i128_i64(cpu_exclusive_high,
2902                                       cpu_exclusive_val, t16);
2903             }
2904             tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2905             tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
2906         }
2907     } else {
2908         tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
2909         tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2910     }
2911     tcg_gen_mov_i64(cpu_exclusive_addr, clean_addr);
2912 }
2913 
gen_store_exclusive(DisasContext * s,int rd,int rt,int rt2,int rn,int size,int is_pair)2914 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
2915                                 int rn, int size, int is_pair)
2916 {
2917     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
2918      *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
2919      *     [addr] = {Rt};
2920      *     if (is_pair) {
2921      *         [addr + datasize] = {Rt2};
2922      *     }
2923      *     {Rd} = 0;
2924      * } else {
2925      *     {Rd} = 1;
2926      * }
2927      * env->exclusive_addr = -1;
2928      */
2929     TCGLabel *fail_label = gen_new_label();
2930     TCGLabel *done_label = gen_new_label();
2931     TCGv_i64 tmp, clean_addr;
2932     MemOp memop;
2933 
2934     /*
2935      * FIXME: We are out of spec here.  We have recorded only the address
2936      * from load_exclusive, not the entire range, and we assume that the
2937      * size of the access on both sides match.  The architecture allows the
2938      * store to be smaller than the load, so long as the stored bytes are
2939      * within the range recorded by the load.
2940      */
2941 
2942     /* See AArch64.ExclusiveMonitorsPass() and AArch64.IsExclusiveVA(). */
2943     clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2944     tcg_gen_brcond_i64(TCG_COND_NE, clean_addr, cpu_exclusive_addr, fail_label);
2945 
2946     /*
2947      * The write, and any associated faults, only happen if the virtual
2948      * and physical addresses pass the exclusive monitor check.  These
2949      * faults are exceedingly unlikely, because normally the guest uses
2950      * the exact same address register for the load_exclusive, and we
2951      * would have recognized these faults there.
2952      *
2953      * It is possible to trigger an alignment fault pre-LSE2, e.g. with an
2954      * unaligned 4-byte write within the range of an aligned 8-byte load.
2955      * With LSE2, the store would need to cross a 16-byte boundary when the
2956      * load did not, which would mean the store is outside the range
2957      * recorded for the monitor, which would have failed a corrected monitor
2958      * check above.  For now, we assume no size change and retain the
2959      * MO_ALIGN to let tcg know what we checked in the load_exclusive.
2960      *
2961      * It is possible to trigger an MTE fault, by performing the load with
2962      * a virtual address with a valid tag and performing the store with the
2963      * same virtual address and a different invalid tag.
2964      */
2965     memop = size + is_pair;
2966     if (memop == MO_128 || !dc_isar_feature(aa64_lse2, s)) {
2967         memop |= MO_ALIGN;
2968     }
2969     memop = finalize_memop(s, memop);
2970     gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
2971 
2972     tmp = tcg_temp_new_i64();
2973     if (is_pair) {
2974         if (size == 2) {
2975             if (s->be_data == MO_LE) {
2976                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
2977             } else {
2978                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
2979             }
2980             tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
2981                                        cpu_exclusive_val, tmp,
2982                                        get_mem_index(s), memop);
2983             tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2984         } else {
2985             TCGv_i128 t16 = tcg_temp_new_i128();
2986             TCGv_i128 c16 = tcg_temp_new_i128();
2987             TCGv_i64 a, b;
2988 
2989             if (s->be_data == MO_LE) {
2990                 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2));
2991                 tcg_gen_concat_i64_i128(c16, cpu_exclusive_val,
2992                                         cpu_exclusive_high);
2993             } else {
2994                 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt));
2995                 tcg_gen_concat_i64_i128(c16, cpu_exclusive_high,
2996                                         cpu_exclusive_val);
2997             }
2998 
2999             tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16,
3000                                         get_mem_index(s), memop);
3001 
3002             a = tcg_temp_new_i64();
3003             b = tcg_temp_new_i64();
3004             if (s->be_data == MO_LE) {
3005                 tcg_gen_extr_i128_i64(a, b, t16);
3006             } else {
3007                 tcg_gen_extr_i128_i64(b, a, t16);
3008             }
3009 
3010             tcg_gen_xor_i64(a, a, cpu_exclusive_val);
3011             tcg_gen_xor_i64(b, b, cpu_exclusive_high);
3012             tcg_gen_or_i64(tmp, a, b);
3013 
3014             tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0);
3015         }
3016     } else {
3017         tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
3018                                    cpu_reg(s, rt), get_mem_index(s), memop);
3019         tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
3020     }
3021     tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
3022     tcg_gen_br(done_label);
3023 
3024     gen_set_label(fail_label);
3025     tcg_gen_movi_i64(cpu_reg(s, rd), 1);
3026     gen_set_label(done_label);
3027     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
3028 }
3029 
gen_compare_and_swap(DisasContext * s,int rs,int rt,int rn,int size)3030 static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
3031                                  int rn, int size)
3032 {
3033     TCGv_i64 tcg_rs = cpu_reg(s, rs);
3034     TCGv_i64 tcg_rt = cpu_reg(s, rt);
3035     int memidx = get_mem_index(s);
3036     TCGv_i64 clean_addr;
3037     MemOp memop;
3038 
3039     if (rn == 31) {
3040         gen_check_sp_alignment(s);
3041     }
3042     memop = check_atomic_align(s, rn, size);
3043     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
3044     tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt,
3045                                memidx, memop);
3046 }
3047 
gen_compare_and_swap_pair(DisasContext * s,int rs,int rt,int rn,int size)3048 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
3049                                       int rn, int size)
3050 {
3051     TCGv_i64 s1 = cpu_reg(s, rs);
3052     TCGv_i64 s2 = cpu_reg(s, rs + 1);
3053     TCGv_i64 t1 = cpu_reg(s, rt);
3054     TCGv_i64 t2 = cpu_reg(s, rt + 1);
3055     TCGv_i64 clean_addr;
3056     int memidx = get_mem_index(s);
3057     MemOp memop;
3058 
3059     if (rn == 31) {
3060         gen_check_sp_alignment(s);
3061     }
3062 
3063     /* This is a single atomic access, despite the "pair". */
3064     memop = check_atomic_align(s, rn, size + 1);
3065     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
3066 
3067     if (size == 2) {
3068         TCGv_i64 cmp = tcg_temp_new_i64();
3069         TCGv_i64 val = tcg_temp_new_i64();
3070 
3071         if (s->be_data == MO_LE) {
3072             tcg_gen_concat32_i64(val, t1, t2);
3073             tcg_gen_concat32_i64(cmp, s1, s2);
3074         } else {
3075             tcg_gen_concat32_i64(val, t2, t1);
3076             tcg_gen_concat32_i64(cmp, s2, s1);
3077         }
3078 
3079         tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx, memop);
3080 
3081         if (s->be_data == MO_LE) {
3082             tcg_gen_extr32_i64(s1, s2, cmp);
3083         } else {
3084             tcg_gen_extr32_i64(s2, s1, cmp);
3085         }
3086     } else {
3087         TCGv_i128 cmp = tcg_temp_new_i128();
3088         TCGv_i128 val = tcg_temp_new_i128();
3089 
3090         if (s->be_data == MO_LE) {
3091             tcg_gen_concat_i64_i128(val, t1, t2);
3092             tcg_gen_concat_i64_i128(cmp, s1, s2);
3093         } else {
3094             tcg_gen_concat_i64_i128(val, t2, t1);
3095             tcg_gen_concat_i64_i128(cmp, s2, s1);
3096         }
3097 
3098         tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx, memop);
3099 
3100         if (s->be_data == MO_LE) {
3101             tcg_gen_extr_i128_i64(s1, s2, cmp);
3102         } else {
3103             tcg_gen_extr_i128_i64(s2, s1, cmp);
3104         }
3105     }
3106 }
3107 
3108 /*
3109  * Compute the ISS.SF bit for syndrome information if an exception
3110  * is taken on a load or store. This indicates whether the instruction
3111  * is accessing a 32-bit or 64-bit register. This logic is derived
3112  * from the ARMv8 specs for LDR (Shared decode for all encodings).
3113  */
ldst_iss_sf(int size,bool sign,bool ext)3114 static bool ldst_iss_sf(int size, bool sign, bool ext)
3115 {
3116 
3117     if (sign) {
3118         /*
3119          * Signed loads are 64 bit results if we are not going to
3120          * do a zero-extend from 32 to 64 after the load.
3121          * (For a store, sign and ext are always false.)
3122          */
3123         return !ext;
3124     } else {
3125         /* Unsigned loads/stores work at the specified size */
3126         return size == MO_64;
3127     }
3128 }
3129 
trans_STXR(DisasContext * s,arg_stxr * a)3130 static bool trans_STXR(DisasContext *s, arg_stxr *a)
3131 {
3132     if (a->rn == 31) {
3133         gen_check_sp_alignment(s);
3134     }
3135     if (a->lasr) {
3136         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3137     }
3138     gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, false);
3139     return true;
3140 }
3141 
trans_LDXR(DisasContext * s,arg_stxr * a)3142 static bool trans_LDXR(DisasContext *s, arg_stxr *a)
3143 {
3144     if (a->rn == 31) {
3145         gen_check_sp_alignment(s);
3146     }
3147     gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, false);
3148     if (a->lasr) {
3149         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3150     }
3151     return true;
3152 }
3153 
trans_STLR(DisasContext * s,arg_stlr * a)3154 static bool trans_STLR(DisasContext *s, arg_stlr *a)
3155 {
3156     TCGv_i64 clean_addr;
3157     MemOp memop;
3158     bool iss_sf = ldst_iss_sf(a->sz, false, false);
3159 
3160     /*
3161      * StoreLORelease is the same as Store-Release for QEMU, but
3162      * needs the feature-test.
3163      */
3164     if (!a->lasr && !dc_isar_feature(aa64_lor, s)) {
3165         return false;
3166     }
3167     /* Generate ISS for non-exclusive accesses including LASR.  */
3168     if (a->rn == 31) {
3169         gen_check_sp_alignment(s);
3170     }
3171     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3172     memop = check_ordered_align(s, a->rn, 0, true, a->sz);
3173     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn),
3174                                 true, a->rn != 31, memop);
3175     do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, memop, true, a->rt,
3176               iss_sf, a->lasr);
3177     return true;
3178 }
3179 
trans_LDAR(DisasContext * s,arg_stlr * a)3180 static bool trans_LDAR(DisasContext *s, arg_stlr *a)
3181 {
3182     TCGv_i64 clean_addr;
3183     MemOp memop;
3184     bool iss_sf = ldst_iss_sf(a->sz, false, false);
3185 
3186     /* LoadLOAcquire is the same as Load-Acquire for QEMU.  */
3187     if (!a->lasr && !dc_isar_feature(aa64_lor, s)) {
3188         return false;
3189     }
3190     /* Generate ISS for non-exclusive accesses including LASR.  */
3191     if (a->rn == 31) {
3192         gen_check_sp_alignment(s);
3193     }
3194     memop = check_ordered_align(s, a->rn, 0, false, a->sz);
3195     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn),
3196                                 false, a->rn != 31, memop);
3197     do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, memop, false, true,
3198               a->rt, iss_sf, a->lasr);
3199     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3200     return true;
3201 }
3202 
trans_STXP(DisasContext * s,arg_stxr * a)3203 static bool trans_STXP(DisasContext *s, arg_stxr *a)
3204 {
3205     if (a->rn == 31) {
3206         gen_check_sp_alignment(s);
3207     }
3208     if (a->lasr) {
3209         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3210     }
3211     gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, true);
3212     return true;
3213 }
3214 
trans_LDXP(DisasContext * s,arg_stxr * a)3215 static bool trans_LDXP(DisasContext *s, arg_stxr *a)
3216 {
3217     if (a->rn == 31) {
3218         gen_check_sp_alignment(s);
3219     }
3220     gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, true);
3221     if (a->lasr) {
3222         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3223     }
3224     return true;
3225 }
3226 
trans_CASP(DisasContext * s,arg_CASP * a)3227 static bool trans_CASP(DisasContext *s, arg_CASP *a)
3228 {
3229     if (!dc_isar_feature(aa64_atomics, s)) {
3230         return false;
3231     }
3232     if (((a->rt | a->rs) & 1) != 0) {
3233         return false;
3234     }
3235 
3236     gen_compare_and_swap_pair(s, a->rs, a->rt, a->rn, a->sz);
3237     return true;
3238 }
3239 
trans_CAS(DisasContext * s,arg_CAS * a)3240 static bool trans_CAS(DisasContext *s, arg_CAS *a)
3241 {
3242     if (!dc_isar_feature(aa64_atomics, s)) {
3243         return false;
3244     }
3245     gen_compare_and_swap(s, a->rs, a->rt, a->rn, a->sz);
3246     return true;
3247 }
3248 
trans_LD_lit(DisasContext * s,arg_ldlit * a)3249 static bool trans_LD_lit(DisasContext *s, arg_ldlit *a)
3250 {
3251     bool iss_sf = ldst_iss_sf(a->sz, a->sign, false);
3252     TCGv_i64 tcg_rt = cpu_reg(s, a->rt);
3253     TCGv_i64 clean_addr = tcg_temp_new_i64();
3254     MemOp memop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3255 
3256     gen_pc_plus_diff(s, clean_addr, a->imm);
3257     do_gpr_ld(s, tcg_rt, clean_addr, memop,
3258               false, true, a->rt, iss_sf, false);
3259     return true;
3260 }
3261 
trans_LD_lit_v(DisasContext * s,arg_ldlit * a)3262 static bool trans_LD_lit_v(DisasContext *s, arg_ldlit *a)
3263 {
3264     /* Load register (literal), vector version */
3265     TCGv_i64 clean_addr;
3266     MemOp memop;
3267 
3268     if (!fp_access_check(s)) {
3269         return true;
3270     }
3271     memop = finalize_memop_asimd(s, a->sz);
3272     clean_addr = tcg_temp_new_i64();
3273     gen_pc_plus_diff(s, clean_addr, a->imm);
3274     do_fp_ld(s, a->rt, clean_addr, memop);
3275     return true;
3276 }
3277 
op_addr_ldstpair_pre(DisasContext * s,arg_ldstpair * a,TCGv_i64 * clean_addr,TCGv_i64 * dirty_addr,uint64_t offset,bool is_store,MemOp mop)3278 static void op_addr_ldstpair_pre(DisasContext *s, arg_ldstpair *a,
3279                                  TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3280                                  uint64_t offset, bool is_store, MemOp mop)
3281 {
3282     if (a->rn == 31) {
3283         gen_check_sp_alignment(s);
3284     }
3285 
3286     *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3287     if (!a->p) {
3288         tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset);
3289     }
3290 
3291     *clean_addr = gen_mte_checkN(s, *dirty_addr, is_store,
3292                                  (a->w || a->rn != 31), 2 << a->sz, mop);
3293 }
3294 
op_addr_ldstpair_post(DisasContext * s,arg_ldstpair * a,TCGv_i64 dirty_addr,uint64_t offset)3295 static void op_addr_ldstpair_post(DisasContext *s, arg_ldstpair *a,
3296                                   TCGv_i64 dirty_addr, uint64_t offset)
3297 {
3298     if (a->w) {
3299         if (a->p) {
3300             tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3301         }
3302         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3303     }
3304 }
3305 
trans_STP(DisasContext * s,arg_ldstpair * a)3306 static bool trans_STP(DisasContext *s, arg_ldstpair *a)
3307 {
3308     uint64_t offset = a->imm << a->sz;
3309     TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3310     MemOp mop = finalize_memop(s, a->sz);
3311 
3312     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop);
3313     tcg_rt = cpu_reg(s, a->rt);
3314     tcg_rt2 = cpu_reg(s, a->rt2);
3315     /*
3316      * We built mop above for the single logical access -- rebuild it
3317      * now for the paired operation.
3318      *
3319      * With LSE2, non-sign-extending pairs are treated atomically if
3320      * aligned, and if unaligned one of the pair will be completely
3321      * within a 16-byte block and that element will be atomic.
3322      * Otherwise each element is separately atomic.
3323      * In all cases, issue one operation with the correct atomicity.
3324      */
3325     mop = a->sz + 1;
3326     if (s->align_mem) {
3327         mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
3328     }
3329     mop = finalize_memop_pair(s, mop);
3330     if (a->sz == 2) {
3331         TCGv_i64 tmp = tcg_temp_new_i64();
3332 
3333         if (s->be_data == MO_LE) {
3334             tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2);
3335         } else {
3336             tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt);
3337         }
3338         tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop);
3339     } else {
3340         TCGv_i128 tmp = tcg_temp_new_i128();
3341 
3342         if (s->be_data == MO_LE) {
3343             tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2);
3344         } else {
3345             tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt);
3346         }
3347         tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
3348     }
3349     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3350     return true;
3351 }
3352 
trans_LDP(DisasContext * s,arg_ldstpair * a)3353 static bool trans_LDP(DisasContext *s, arg_ldstpair *a)
3354 {
3355     uint64_t offset = a->imm << a->sz;
3356     TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3357     MemOp mop = finalize_memop(s, a->sz);
3358 
3359     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop);
3360     tcg_rt = cpu_reg(s, a->rt);
3361     tcg_rt2 = cpu_reg(s, a->rt2);
3362 
3363     /*
3364      * We built mop above for the single logical access -- rebuild it
3365      * now for the paired operation.
3366      *
3367      * With LSE2, non-sign-extending pairs are treated atomically if
3368      * aligned, and if unaligned one of the pair will be completely
3369      * within a 16-byte block and that element will be atomic.
3370      * Otherwise each element is separately atomic.
3371      * In all cases, issue one operation with the correct atomicity.
3372      *
3373      * This treats sign-extending loads like zero-extending loads,
3374      * since that reuses the most code below.
3375      */
3376     mop = a->sz + 1;
3377     if (s->align_mem) {
3378         mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
3379     }
3380     mop = finalize_memop_pair(s, mop);
3381     if (a->sz == 2) {
3382         int o2 = s->be_data == MO_LE ? 32 : 0;
3383         int o1 = o2 ^ 32;
3384 
3385         tcg_gen_qemu_ld_i64(tcg_rt, clean_addr, get_mem_index(s), mop);
3386         if (a->sign) {
3387             tcg_gen_sextract_i64(tcg_rt2, tcg_rt, o2, 32);
3388             tcg_gen_sextract_i64(tcg_rt, tcg_rt, o1, 32);
3389         } else {
3390             tcg_gen_extract_i64(tcg_rt2, tcg_rt, o2, 32);
3391             tcg_gen_extract_i64(tcg_rt, tcg_rt, o1, 32);
3392         }
3393     } else {
3394         TCGv_i128 tmp = tcg_temp_new_i128();
3395 
3396         tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop);
3397         if (s->be_data == MO_LE) {
3398             tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp);
3399         } else {
3400             tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp);
3401         }
3402     }
3403     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3404     return true;
3405 }
3406 
trans_STP_v(DisasContext * s,arg_ldstpair * a)3407 static bool trans_STP_v(DisasContext *s, arg_ldstpair *a)
3408 {
3409     uint64_t offset = a->imm << a->sz;
3410     TCGv_i64 clean_addr, dirty_addr;
3411     MemOp mop;
3412 
3413     if (!fp_access_check(s)) {
3414         return true;
3415     }
3416 
3417     /* LSE2 does not merge FP pairs; leave these as separate operations. */
3418     mop = finalize_memop_asimd(s, a->sz);
3419     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop);
3420     do_fp_st(s, a->rt, clean_addr, mop);
3421     tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz);
3422     do_fp_st(s, a->rt2, clean_addr, mop);
3423     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3424     return true;
3425 }
3426 
trans_LDP_v(DisasContext * s,arg_ldstpair * a)3427 static bool trans_LDP_v(DisasContext *s, arg_ldstpair *a)
3428 {
3429     uint64_t offset = a->imm << a->sz;
3430     TCGv_i64 clean_addr, dirty_addr;
3431     MemOp mop;
3432 
3433     if (!fp_access_check(s)) {
3434         return true;
3435     }
3436 
3437     /* LSE2 does not merge FP pairs; leave these as separate operations. */
3438     mop = finalize_memop_asimd(s, a->sz);
3439     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop);
3440     do_fp_ld(s, a->rt, clean_addr, mop);
3441     tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz);
3442     do_fp_ld(s, a->rt2, clean_addr, mop);
3443     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3444     return true;
3445 }
3446 
trans_STGP(DisasContext * s,arg_ldstpair * a)3447 static bool trans_STGP(DisasContext *s, arg_ldstpair *a)
3448 {
3449     TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3450     uint64_t offset = a->imm << LOG2_TAG_GRANULE;
3451     MemOp mop;
3452     TCGv_i128 tmp;
3453 
3454     /* STGP only comes in one size. */
3455     tcg_debug_assert(a->sz == MO_64);
3456 
3457     if (!dc_isar_feature(aa64_mte_insn_reg, s)) {
3458         return false;
3459     }
3460 
3461     if (a->rn == 31) {
3462         gen_check_sp_alignment(s);
3463     }
3464 
3465     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3466     if (!a->p) {
3467         tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3468     }
3469 
3470     clean_addr = clean_data_tbi(s, dirty_addr);
3471     tcg_rt = cpu_reg(s, a->rt);
3472     tcg_rt2 = cpu_reg(s, a->rt2);
3473 
3474     /*
3475      * STGP is defined as two 8-byte memory operations, aligned to TAG_GRANULE,
3476      * and one tag operation.  We implement it as one single aligned 16-byte
3477      * memory operation for convenience.  Note that the alignment ensures
3478      * MO_ATOM_IFALIGN_PAIR produces 8-byte atomicity for the memory store.
3479      */
3480     mop = finalize_memop_atom(s, MO_128 | MO_ALIGN, MO_ATOM_IFALIGN_PAIR);
3481 
3482     tmp = tcg_temp_new_i128();
3483     if (s->be_data == MO_LE) {
3484         tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2);
3485     } else {
3486         tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt);
3487     }
3488     tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
3489 
3490     /* Perform the tag store, if tag access enabled. */
3491     if (s->ata[0]) {
3492         if (tb_cflags(s->base.tb) & CF_PARALLEL) {
3493             gen_helper_stg_parallel(tcg_env, dirty_addr, dirty_addr);
3494         } else {
3495             gen_helper_stg(tcg_env, dirty_addr, dirty_addr);
3496         }
3497     }
3498 
3499     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3500     return true;
3501 }
3502 
op_addr_ldst_imm_pre(DisasContext * s,arg_ldst_imm * a,TCGv_i64 * clean_addr,TCGv_i64 * dirty_addr,uint64_t offset,bool is_store,MemOp mop)3503 static void op_addr_ldst_imm_pre(DisasContext *s, arg_ldst_imm *a,
3504                                  TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3505                                  uint64_t offset, bool is_store, MemOp mop)
3506 {
3507     int memidx;
3508 
3509     if (a->rn == 31) {
3510         gen_check_sp_alignment(s);
3511     }
3512 
3513     *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3514     if (!a->p) {
3515         tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset);
3516     }
3517     memidx = get_a64_user_mem_index(s, a->unpriv);
3518     *clean_addr = gen_mte_check1_mmuidx(s, *dirty_addr, is_store,
3519                                         a->w || a->rn != 31,
3520                                         mop, a->unpriv, memidx);
3521 }
3522 
op_addr_ldst_imm_post(DisasContext * s,arg_ldst_imm * a,TCGv_i64 dirty_addr,uint64_t offset)3523 static void op_addr_ldst_imm_post(DisasContext *s, arg_ldst_imm *a,
3524                                   TCGv_i64 dirty_addr, uint64_t offset)
3525 {
3526     if (a->w) {
3527         if (a->p) {
3528             tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3529         }
3530         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3531     }
3532 }
3533 
trans_STR_i(DisasContext * s,arg_ldst_imm * a)3534 static bool trans_STR_i(DisasContext *s, arg_ldst_imm *a)
3535 {
3536     bool iss_sf, iss_valid = !a->w;
3537     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3538     int memidx = get_a64_user_mem_index(s, a->unpriv);
3539     MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3540 
3541     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop);
3542 
3543     tcg_rt = cpu_reg(s, a->rt);
3544     iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3545 
3546     do_gpr_st_memidx(s, tcg_rt, clean_addr, mop, memidx,
3547                      iss_valid, a->rt, iss_sf, false);
3548     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3549     return true;
3550 }
3551 
trans_LDR_i(DisasContext * s,arg_ldst_imm * a)3552 static bool trans_LDR_i(DisasContext *s, arg_ldst_imm *a)
3553 {
3554     bool iss_sf, iss_valid = !a->w;
3555     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3556     int memidx = get_a64_user_mem_index(s, a->unpriv);
3557     MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3558 
3559     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop);
3560 
3561     tcg_rt = cpu_reg(s, a->rt);
3562     iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3563 
3564     do_gpr_ld_memidx(s, tcg_rt, clean_addr, mop,
3565                      a->ext, memidx, iss_valid, a->rt, iss_sf, false);
3566     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3567     return true;
3568 }
3569 
trans_STR_v_i(DisasContext * s,arg_ldst_imm * a)3570 static bool trans_STR_v_i(DisasContext *s, arg_ldst_imm *a)
3571 {
3572     TCGv_i64 clean_addr, dirty_addr;
3573     MemOp mop;
3574 
3575     if (!fp_access_check(s)) {
3576         return true;
3577     }
3578     mop = finalize_memop_asimd(s, a->sz);
3579     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop);
3580     do_fp_st(s, a->rt, clean_addr, mop);
3581     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3582     return true;
3583 }
3584 
trans_LDR_v_i(DisasContext * s,arg_ldst_imm * a)3585 static bool trans_LDR_v_i(DisasContext *s, arg_ldst_imm *a)
3586 {
3587     TCGv_i64 clean_addr, dirty_addr;
3588     MemOp mop;
3589 
3590     if (!fp_access_check(s)) {
3591         return true;
3592     }
3593     mop = finalize_memop_asimd(s, a->sz);
3594     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop);
3595     do_fp_ld(s, a->rt, clean_addr, mop);
3596     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3597     return true;
3598 }
3599 
op_addr_ldst_pre(DisasContext * s,arg_ldst * a,TCGv_i64 * clean_addr,TCGv_i64 * dirty_addr,bool is_store,MemOp memop)3600 static void op_addr_ldst_pre(DisasContext *s, arg_ldst *a,
3601                              TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3602                              bool is_store, MemOp memop)
3603 {
3604     TCGv_i64 tcg_rm;
3605 
3606     if (a->rn == 31) {
3607         gen_check_sp_alignment(s);
3608     }
3609     *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3610 
3611     tcg_rm = read_cpu_reg(s, a->rm, 1);
3612     ext_and_shift_reg(tcg_rm, tcg_rm, a->opt, a->s ? a->sz : 0);
3613 
3614     tcg_gen_add_i64(*dirty_addr, *dirty_addr, tcg_rm);
3615     *clean_addr = gen_mte_check1(s, *dirty_addr, is_store, true, memop);
3616 }
3617 
trans_LDR(DisasContext * s,arg_ldst * a)3618 static bool trans_LDR(DisasContext *s, arg_ldst *a)
3619 {
3620     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3621     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3622     MemOp memop;
3623 
3624     if (extract32(a->opt, 1, 1) == 0) {
3625         return false;
3626     }
3627 
3628     memop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3629     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop);
3630     tcg_rt = cpu_reg(s, a->rt);
3631     do_gpr_ld(s, tcg_rt, clean_addr, memop,
3632               a->ext, true, a->rt, iss_sf, false);
3633     return true;
3634 }
3635 
trans_STR(DisasContext * s,arg_ldst * a)3636 static bool trans_STR(DisasContext *s, arg_ldst *a)
3637 {
3638     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3639     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3640     MemOp memop;
3641 
3642     if (extract32(a->opt, 1, 1) == 0) {
3643         return false;
3644     }
3645 
3646     memop = finalize_memop(s, a->sz);
3647     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop);
3648     tcg_rt = cpu_reg(s, a->rt);
3649     do_gpr_st(s, tcg_rt, clean_addr, memop, true, a->rt, iss_sf, false);
3650     return true;
3651 }
3652 
trans_LDR_v(DisasContext * s,arg_ldst * a)3653 static bool trans_LDR_v(DisasContext *s, arg_ldst *a)
3654 {
3655     TCGv_i64 clean_addr, dirty_addr;
3656     MemOp memop;
3657 
3658     if (extract32(a->opt, 1, 1) == 0) {
3659         return false;
3660     }
3661 
3662     if (!fp_access_check(s)) {
3663         return true;
3664     }
3665 
3666     memop = finalize_memop_asimd(s, a->sz);
3667     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop);
3668     do_fp_ld(s, a->rt, clean_addr, memop);
3669     return true;
3670 }
3671 
trans_STR_v(DisasContext * s,arg_ldst * a)3672 static bool trans_STR_v(DisasContext *s, arg_ldst *a)
3673 {
3674     TCGv_i64 clean_addr, dirty_addr;
3675     MemOp memop;
3676 
3677     if (extract32(a->opt, 1, 1) == 0) {
3678         return false;
3679     }
3680 
3681     if (!fp_access_check(s)) {
3682         return true;
3683     }
3684 
3685     memop = finalize_memop_asimd(s, a->sz);
3686     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop);
3687     do_fp_st(s, a->rt, clean_addr, memop);
3688     return true;
3689 }
3690 
3691 
do_atomic_ld(DisasContext * s,arg_atomic * a,AtomicThreeOpFn * fn,int sign,bool invert)3692 static bool do_atomic_ld(DisasContext *s, arg_atomic *a, AtomicThreeOpFn *fn,
3693                          int sign, bool invert)
3694 {
3695     MemOp mop = a->sz | sign;
3696     TCGv_i64 clean_addr, tcg_rs, tcg_rt;
3697 
3698     if (a->rn == 31) {
3699         gen_check_sp_alignment(s);
3700     }
3701     mop = check_atomic_align(s, a->rn, mop);
3702     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
3703                                 a->rn != 31, mop);
3704     tcg_rs = read_cpu_reg(s, a->rs, true);
3705     tcg_rt = cpu_reg(s, a->rt);
3706     if (invert) {
3707         tcg_gen_not_i64(tcg_rs, tcg_rs);
3708     }
3709     /*
3710      * The tcg atomic primitives are all full barriers.  Therefore we
3711      * can ignore the Acquire and Release bits of this instruction.
3712      */
3713     fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop);
3714 
3715     if (mop & MO_SIGN) {
3716         switch (a->sz) {
3717         case MO_8:
3718             tcg_gen_ext8u_i64(tcg_rt, tcg_rt);
3719             break;
3720         case MO_16:
3721             tcg_gen_ext16u_i64(tcg_rt, tcg_rt);
3722             break;
3723         case MO_32:
3724             tcg_gen_ext32u_i64(tcg_rt, tcg_rt);
3725             break;
3726         case MO_64:
3727             break;
3728         default:
3729             g_assert_not_reached();
3730         }
3731     }
3732     return true;
3733 }
3734 
3735 TRANS_FEAT(LDADD, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_add_i64, 0, false)
3736 TRANS_FEAT(LDCLR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_and_i64, 0, true)
3737 TRANS_FEAT(LDEOR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_xor_i64, 0, false)
3738 TRANS_FEAT(LDSET, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_or_i64, 0, false)
TRANS_FEAT(LDSMAX,aa64_atomics,do_atomic_ld,a,tcg_gen_atomic_fetch_smax_i64,MO_SIGN,false)3739 TRANS_FEAT(LDSMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smax_i64, MO_SIGN, false)
3740 TRANS_FEAT(LDSMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smin_i64, MO_SIGN, false)
3741 TRANS_FEAT(LDUMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umax_i64, 0, false)
3742 TRANS_FEAT(LDUMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false)
3743 TRANS_FEAT(SWP, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false)
3744 
3745 static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a)
3746 {
3747     bool iss_sf = ldst_iss_sf(a->sz, false, false);
3748     TCGv_i64 clean_addr;
3749     MemOp mop;
3750 
3751     if (!dc_isar_feature(aa64_atomics, s) ||
3752         !dc_isar_feature(aa64_rcpc_8_3, s)) {
3753         return false;
3754     }
3755     if (a->rn == 31) {
3756         gen_check_sp_alignment(s);
3757     }
3758     mop = check_ordered_align(s, a->rn, 0, false, a->sz);
3759     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
3760                                 a->rn != 31, mop);
3761     /*
3762      * LDAPR* are a special case because they are a simple load, not a
3763      * fetch-and-do-something op.
3764      * The architectural consistency requirements here are weaker than
3765      * full load-acquire (we only need "load-acquire processor consistent"),
3766      * but we choose to implement them as full LDAQ.
3767      */
3768     do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, false,
3769               true, a->rt, iss_sf, true);
3770     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3771     return true;
3772 }
3773 
trans_LDRA(DisasContext * s,arg_LDRA * a)3774 static bool trans_LDRA(DisasContext *s, arg_LDRA *a)
3775 {
3776     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3777     MemOp memop;
3778 
3779     /* Load with pointer authentication */
3780     if (!dc_isar_feature(aa64_pauth, s)) {
3781         return false;
3782     }
3783 
3784     if (a->rn == 31) {
3785         gen_check_sp_alignment(s);
3786     }
3787     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3788 
3789     if (s->pauth_active) {
3790         if (!a->m) {
3791             gen_helper_autda_combined(dirty_addr, tcg_env, dirty_addr,
3792                                       tcg_constant_i64(0));
3793         } else {
3794             gen_helper_autdb_combined(dirty_addr, tcg_env, dirty_addr,
3795                                       tcg_constant_i64(0));
3796         }
3797     }
3798 
3799     tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3800 
3801     memop = finalize_memop(s, MO_64);
3802 
3803     /* Note that "clean" and "dirty" here refer to TBI not PAC.  */
3804     clean_addr = gen_mte_check1(s, dirty_addr, false,
3805                                 a->w || a->rn != 31, memop);
3806 
3807     tcg_rt = cpu_reg(s, a->rt);
3808     do_gpr_ld(s, tcg_rt, clean_addr, memop,
3809               /* extend */ false, /* iss_valid */ !a->w,
3810               /* iss_srt */ a->rt, /* iss_sf */ true, /* iss_ar */ false);
3811 
3812     if (a->w) {
3813         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3814     }
3815     return true;
3816 }
3817 
trans_LDAPR_i(DisasContext * s,arg_ldapr_stlr_i * a)3818 static bool trans_LDAPR_i(DisasContext *s, arg_ldapr_stlr_i *a)
3819 {
3820     TCGv_i64 clean_addr, dirty_addr;
3821     MemOp mop = a->sz | (a->sign ? MO_SIGN : 0);
3822     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3823 
3824     if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3825         return false;
3826     }
3827 
3828     if (a->rn == 31) {
3829         gen_check_sp_alignment(s);
3830     }
3831 
3832     mop = check_ordered_align(s, a->rn, a->imm, false, mop);
3833     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3834     tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3835     clean_addr = clean_data_tbi(s, dirty_addr);
3836 
3837     /*
3838      * Load-AcquirePC semantics; we implement as the slightly more
3839      * restrictive Load-Acquire.
3840      */
3841     do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, a->ext, true,
3842               a->rt, iss_sf, true);
3843     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3844     return true;
3845 }
3846 
trans_STLR_i(DisasContext * s,arg_ldapr_stlr_i * a)3847 static bool trans_STLR_i(DisasContext *s, arg_ldapr_stlr_i *a)
3848 {
3849     TCGv_i64 clean_addr, dirty_addr;
3850     MemOp mop = a->sz;
3851     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3852 
3853     if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3854         return false;
3855     }
3856 
3857     /* TODO: ARMv8.4-LSE SCTLR.nAA */
3858 
3859     if (a->rn == 31) {
3860         gen_check_sp_alignment(s);
3861     }
3862 
3863     mop = check_ordered_align(s, a->rn, a->imm, true, mop);
3864     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3865     tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3866     clean_addr = clean_data_tbi(s, dirty_addr);
3867 
3868     /* Store-Release semantics */
3869     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3870     do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, mop, true, a->rt, iss_sf, true);
3871     return true;
3872 }
3873 
trans_LD_mult(DisasContext * s,arg_ldst_mult * a)3874 static bool trans_LD_mult(DisasContext *s, arg_ldst_mult *a)
3875 {
3876     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3877     MemOp endian, align, mop;
3878 
3879     int total;    /* total bytes */
3880     int elements; /* elements per vector */
3881     int r;
3882     int size = a->sz;
3883 
3884     if (!a->p && a->rm != 0) {
3885         /* For non-postindexed accesses the Rm field must be 0 */
3886         return false;
3887     }
3888     if (size == 3 && !a->q && a->selem != 1) {
3889         return false;
3890     }
3891     if (!fp_access_check(s)) {
3892         return true;
3893     }
3894 
3895     if (a->rn == 31) {
3896         gen_check_sp_alignment(s);
3897     }
3898 
3899     /* For our purposes, bytes are always little-endian.  */
3900     endian = s->be_data;
3901     if (size == 0) {
3902         endian = MO_LE;
3903     }
3904 
3905     total = a->rpt * a->selem * (a->q ? 16 : 8);
3906     tcg_rn = cpu_reg_sp(s, a->rn);
3907 
3908     /*
3909      * Issue the MTE check vs the logical repeat count, before we
3910      * promote consecutive little-endian elements below.
3911      */
3912     clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, total,
3913                                 finalize_memop_asimd(s, size));
3914 
3915     /*
3916      * Consecutive little-endian elements from a single register
3917      * can be promoted to a larger little-endian operation.
3918      */
3919     align = MO_ALIGN;
3920     if (a->selem == 1 && endian == MO_LE) {
3921         align = pow2_align(size);
3922         size = 3;
3923     }
3924     if (!s->align_mem) {
3925         align = 0;
3926     }
3927     mop = endian | size | align;
3928 
3929     elements = (a->q ? 16 : 8) >> size;
3930     tcg_ebytes = tcg_constant_i64(1 << size);
3931     for (r = 0; r < a->rpt; r++) {
3932         int e;
3933         for (e = 0; e < elements; e++) {
3934             int xs;
3935             for (xs = 0; xs < a->selem; xs++) {
3936                 int tt = (a->rt + r + xs) % 32;
3937                 do_vec_ld(s, tt, e, clean_addr, mop);
3938                 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3939             }
3940         }
3941     }
3942 
3943     /*
3944      * For non-quad operations, setting a slice of the low 64 bits of
3945      * the register clears the high 64 bits (in the ARM ARM pseudocode
3946      * this is implicit in the fact that 'rval' is a 64 bit wide
3947      * variable).  For quad operations, we might still need to zero
3948      * the high bits of SVE.
3949      */
3950     for (r = 0; r < a->rpt * a->selem; r++) {
3951         int tt = (a->rt + r) % 32;
3952         clear_vec_high(s, a->q, tt);
3953     }
3954 
3955     if (a->p) {
3956         if (a->rm == 31) {
3957             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3958         } else {
3959             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3960         }
3961     }
3962     return true;
3963 }
3964 
trans_ST_mult(DisasContext * s,arg_ldst_mult * a)3965 static bool trans_ST_mult(DisasContext *s, arg_ldst_mult *a)
3966 {
3967     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3968     MemOp endian, align, mop;
3969 
3970     int total;    /* total bytes */
3971     int elements; /* elements per vector */
3972     int r;
3973     int size = a->sz;
3974 
3975     if (!a->p && a->rm != 0) {
3976         /* For non-postindexed accesses the Rm field must be 0 */
3977         return false;
3978     }
3979     if (size == 3 && !a->q && a->selem != 1) {
3980         return false;
3981     }
3982     if (!fp_access_check(s)) {
3983         return true;
3984     }
3985 
3986     if (a->rn == 31) {
3987         gen_check_sp_alignment(s);
3988     }
3989 
3990     /* For our purposes, bytes are always little-endian.  */
3991     endian = s->be_data;
3992     if (size == 0) {
3993         endian = MO_LE;
3994     }
3995 
3996     total = a->rpt * a->selem * (a->q ? 16 : 8);
3997     tcg_rn = cpu_reg_sp(s, a->rn);
3998 
3999     /*
4000      * Issue the MTE check vs the logical repeat count, before we
4001      * promote consecutive little-endian elements below.
4002      */
4003     clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, total,
4004                                 finalize_memop_asimd(s, size));
4005 
4006     /*
4007      * Consecutive little-endian elements from a single register
4008      * can be promoted to a larger little-endian operation.
4009      */
4010     align = MO_ALIGN;
4011     if (a->selem == 1 && endian == MO_LE) {
4012         align = pow2_align(size);
4013         size = 3;
4014     }
4015     if (!s->align_mem) {
4016         align = 0;
4017     }
4018     mop = endian | size | align;
4019 
4020     elements = (a->q ? 16 : 8) >> size;
4021     tcg_ebytes = tcg_constant_i64(1 << size);
4022     for (r = 0; r < a->rpt; r++) {
4023         int e;
4024         for (e = 0; e < elements; e++) {
4025             int xs;
4026             for (xs = 0; xs < a->selem; xs++) {
4027                 int tt = (a->rt + r + xs) % 32;
4028                 do_vec_st(s, tt, e, clean_addr, mop);
4029                 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
4030             }
4031         }
4032     }
4033 
4034     if (a->p) {
4035         if (a->rm == 31) {
4036             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
4037         } else {
4038             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
4039         }
4040     }
4041     return true;
4042 }
4043 
trans_ST_single(DisasContext * s,arg_ldst_single * a)4044 static bool trans_ST_single(DisasContext *s, arg_ldst_single *a)
4045 {
4046     int xs, total, rt;
4047     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
4048     MemOp mop;
4049 
4050     if (!a->p && a->rm != 0) {
4051         return false;
4052     }
4053     if (!fp_access_check(s)) {
4054         return true;
4055     }
4056 
4057     if (a->rn == 31) {
4058         gen_check_sp_alignment(s);
4059     }
4060 
4061     total = a->selem << a->scale;
4062     tcg_rn = cpu_reg_sp(s, a->rn);
4063 
4064     mop = finalize_memop_asimd(s, a->scale);
4065     clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31,
4066                                 total, mop);
4067 
4068     tcg_ebytes = tcg_constant_i64(1 << a->scale);
4069     for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
4070         do_vec_st(s, rt, a->index, clean_addr, mop);
4071         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
4072     }
4073 
4074     if (a->p) {
4075         if (a->rm == 31) {
4076             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
4077         } else {
4078             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
4079         }
4080     }
4081     return true;
4082 }
4083 
trans_LD_single(DisasContext * s,arg_ldst_single * a)4084 static bool trans_LD_single(DisasContext *s, arg_ldst_single *a)
4085 {
4086     int xs, total, rt;
4087     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
4088     MemOp mop;
4089 
4090     if (!a->p && a->rm != 0) {
4091         return false;
4092     }
4093     if (!fp_access_check(s)) {
4094         return true;
4095     }
4096 
4097     if (a->rn == 31) {
4098         gen_check_sp_alignment(s);
4099     }
4100 
4101     total = a->selem << a->scale;
4102     tcg_rn = cpu_reg_sp(s, a->rn);
4103 
4104     mop = finalize_memop_asimd(s, a->scale);
4105     clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31,
4106                                 total, mop);
4107 
4108     tcg_ebytes = tcg_constant_i64(1 << a->scale);
4109     for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
4110         do_vec_ld(s, rt, a->index, clean_addr, mop);
4111         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
4112     }
4113 
4114     if (a->p) {
4115         if (a->rm == 31) {
4116             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
4117         } else {
4118             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
4119         }
4120     }
4121     return true;
4122 }
4123 
trans_LD_single_repl(DisasContext * s,arg_LD_single_repl * a)4124 static bool trans_LD_single_repl(DisasContext *s, arg_LD_single_repl *a)
4125 {
4126     int xs, total, rt;
4127     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
4128     MemOp mop;
4129 
4130     if (!a->p && a->rm != 0) {
4131         return false;
4132     }
4133     if (!fp_access_check(s)) {
4134         return true;
4135     }
4136 
4137     if (a->rn == 31) {
4138         gen_check_sp_alignment(s);
4139     }
4140 
4141     total = a->selem << a->scale;
4142     tcg_rn = cpu_reg_sp(s, a->rn);
4143 
4144     mop = finalize_memop_asimd(s, a->scale);
4145     clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31,
4146                                 total, mop);
4147 
4148     tcg_ebytes = tcg_constant_i64(1 << a->scale);
4149     for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
4150         /* Load and replicate to all elements */
4151         TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4152 
4153         tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop);
4154         tcg_gen_gvec_dup_i64(a->scale, vec_full_reg_offset(s, rt),
4155                              (a->q + 1) * 8, vec_full_reg_size(s), tcg_tmp);
4156         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
4157     }
4158 
4159     if (a->p) {
4160         if (a->rm == 31) {
4161             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
4162         } else {
4163             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
4164         }
4165     }
4166     return true;
4167 }
4168 
trans_STZGM(DisasContext * s,arg_ldst_tag * a)4169 static bool trans_STZGM(DisasContext *s, arg_ldst_tag *a)
4170 {
4171     TCGv_i64 addr, clean_addr, tcg_rt;
4172     int size = 4 << s->dcz_blocksize;
4173 
4174     if (!dc_isar_feature(aa64_mte, s)) {
4175         return false;
4176     }
4177     if (s->current_el == 0) {
4178         return false;
4179     }
4180 
4181     if (a->rn == 31) {
4182         gen_check_sp_alignment(s);
4183     }
4184 
4185     addr = read_cpu_reg_sp(s, a->rn, true);
4186     tcg_gen_addi_i64(addr, addr, a->imm);
4187     tcg_rt = cpu_reg(s, a->rt);
4188 
4189     if (s->ata[0]) {
4190         gen_helper_stzgm_tags(tcg_env, addr, tcg_rt);
4191     }
4192     /*
4193      * The non-tags portion of STZGM is mostly like DC_ZVA,
4194      * except the alignment happens before the access.
4195      */
4196     clean_addr = clean_data_tbi(s, addr);
4197     tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4198     gen_helper_dc_zva(tcg_env, clean_addr);
4199     return true;
4200 }
4201 
trans_STGM(DisasContext * s,arg_ldst_tag * a)4202 static bool trans_STGM(DisasContext *s, arg_ldst_tag *a)
4203 {
4204     TCGv_i64 addr, clean_addr, tcg_rt;
4205 
4206     if (!dc_isar_feature(aa64_mte, s)) {
4207         return false;
4208     }
4209     if (s->current_el == 0) {
4210         return false;
4211     }
4212 
4213     if (a->rn == 31) {
4214         gen_check_sp_alignment(s);
4215     }
4216 
4217     addr = read_cpu_reg_sp(s, a->rn, true);
4218     tcg_gen_addi_i64(addr, addr, a->imm);
4219     tcg_rt = cpu_reg(s, a->rt);
4220 
4221     if (s->ata[0]) {
4222         gen_helper_stgm(tcg_env, addr, tcg_rt);
4223     } else {
4224         MMUAccessType acc = MMU_DATA_STORE;
4225         int size = 4 << s->gm_blocksize;
4226 
4227         clean_addr = clean_data_tbi(s, addr);
4228         tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4229         gen_probe_access(s, clean_addr, acc, size);
4230     }
4231     return true;
4232 }
4233 
trans_LDGM(DisasContext * s,arg_ldst_tag * a)4234 static bool trans_LDGM(DisasContext *s, arg_ldst_tag *a)
4235 {
4236     TCGv_i64 addr, clean_addr, tcg_rt;
4237 
4238     if (!dc_isar_feature(aa64_mte, s)) {
4239         return false;
4240     }
4241     if (s->current_el == 0) {
4242         return false;
4243     }
4244 
4245     if (a->rn == 31) {
4246         gen_check_sp_alignment(s);
4247     }
4248 
4249     addr = read_cpu_reg_sp(s, a->rn, true);
4250     tcg_gen_addi_i64(addr, addr, a->imm);
4251     tcg_rt = cpu_reg(s, a->rt);
4252 
4253     if (s->ata[0]) {
4254         gen_helper_ldgm(tcg_rt, tcg_env, addr);
4255     } else {
4256         MMUAccessType acc = MMU_DATA_LOAD;
4257         int size = 4 << s->gm_blocksize;
4258 
4259         clean_addr = clean_data_tbi(s, addr);
4260         tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4261         gen_probe_access(s, clean_addr, acc, size);
4262         /* The result tags are zeros.  */
4263         tcg_gen_movi_i64(tcg_rt, 0);
4264     }
4265     return true;
4266 }
4267 
trans_LDG(DisasContext * s,arg_ldst_tag * a)4268 static bool trans_LDG(DisasContext *s, arg_ldst_tag *a)
4269 {
4270     TCGv_i64 addr, clean_addr, tcg_rt;
4271 
4272     if (!dc_isar_feature(aa64_mte_insn_reg, s)) {
4273         return false;
4274     }
4275 
4276     if (a->rn == 31) {
4277         gen_check_sp_alignment(s);
4278     }
4279 
4280     addr = read_cpu_reg_sp(s, a->rn, true);
4281     if (!a->p) {
4282         /* pre-index or signed offset */
4283         tcg_gen_addi_i64(addr, addr, a->imm);
4284     }
4285 
4286     tcg_gen_andi_i64(addr, addr, -TAG_GRANULE);
4287     tcg_rt = cpu_reg(s, a->rt);
4288     if (s->ata[0]) {
4289         gen_helper_ldg(tcg_rt, tcg_env, addr, tcg_rt);
4290     } else {
4291         /*
4292          * Tag access disabled: we must check for aborts on the load
4293          * load from [rn+offset], and then insert a 0 tag into rt.
4294          */
4295         clean_addr = clean_data_tbi(s, addr);
4296         gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8);
4297         gen_address_with_allocation_tag0(tcg_rt, tcg_rt);
4298     }
4299 
4300     if (a->w) {
4301         /* pre-index or post-index */
4302         if (a->p) {
4303             /* post-index */
4304             tcg_gen_addi_i64(addr, addr, a->imm);
4305         }
4306         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr);
4307     }
4308     return true;
4309 }
4310 
do_STG(DisasContext * s,arg_ldst_tag * a,bool is_zero,bool is_pair)4311 static bool do_STG(DisasContext *s, arg_ldst_tag *a, bool is_zero, bool is_pair)
4312 {
4313     TCGv_i64 addr, tcg_rt;
4314 
4315     if (a->rn == 31) {
4316         gen_check_sp_alignment(s);
4317     }
4318 
4319     addr = read_cpu_reg_sp(s, a->rn, true);
4320     if (!a->p) {
4321         /* pre-index or signed offset */
4322         tcg_gen_addi_i64(addr, addr, a->imm);
4323     }
4324     tcg_rt = cpu_reg_sp(s, a->rt);
4325     if (!s->ata[0]) {
4326         /*
4327          * For STG and ST2G, we need to check alignment and probe memory.
4328          * TODO: For STZG and STZ2G, we could rely on the stores below,
4329          * at least for system mode; user-only won't enforce alignment.
4330          */
4331         if (is_pair) {
4332             gen_helper_st2g_stub(tcg_env, addr);
4333         } else {
4334             gen_helper_stg_stub(tcg_env, addr);
4335         }
4336     } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
4337         if (is_pair) {
4338             gen_helper_st2g_parallel(tcg_env, addr, tcg_rt);
4339         } else {
4340             gen_helper_stg_parallel(tcg_env, addr, tcg_rt);
4341         }
4342     } else {
4343         if (is_pair) {
4344             gen_helper_st2g(tcg_env, addr, tcg_rt);
4345         } else {
4346             gen_helper_stg(tcg_env, addr, tcg_rt);
4347         }
4348     }
4349 
4350     if (is_zero) {
4351         TCGv_i64 clean_addr = clean_data_tbi(s, addr);
4352         TCGv_i64 zero64 = tcg_constant_i64(0);
4353         TCGv_i128 zero128 = tcg_temp_new_i128();
4354         int mem_index = get_mem_index(s);
4355         MemOp mop = finalize_memop(s, MO_128 | MO_ALIGN);
4356 
4357         tcg_gen_concat_i64_i128(zero128, zero64, zero64);
4358 
4359         /* This is 1 or 2 atomic 16-byte operations. */
4360         tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
4361         if (is_pair) {
4362             tcg_gen_addi_i64(clean_addr, clean_addr, 16);
4363             tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
4364         }
4365     }
4366 
4367     if (a->w) {
4368         /* pre-index or post-index */
4369         if (a->p) {
4370             /* post-index */
4371             tcg_gen_addi_i64(addr, addr, a->imm);
4372         }
4373         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr);
4374     }
4375     return true;
4376 }
4377 
4378 TRANS_FEAT(STG, aa64_mte_insn_reg, do_STG, a, false, false)
4379 TRANS_FEAT(STZG, aa64_mte_insn_reg, do_STG, a, true, false)
4380 TRANS_FEAT(ST2G, aa64_mte_insn_reg, do_STG, a, false, true)
4381 TRANS_FEAT(STZ2G, aa64_mte_insn_reg, do_STG, a, true, true)
4382 
4383 typedef void SetFn(TCGv_env, TCGv_i32, TCGv_i32);
4384 
do_SET(DisasContext * s,arg_set * a,bool is_epilogue,bool is_setg,SetFn fn)4385 static bool do_SET(DisasContext *s, arg_set *a, bool is_epilogue,
4386                    bool is_setg, SetFn fn)
4387 {
4388     int memidx;
4389     uint32_t syndrome, desc = 0;
4390 
4391     if (is_setg && !dc_isar_feature(aa64_mte, s)) {
4392         return false;
4393     }
4394 
4395     /*
4396      * UNPREDICTABLE cases: we choose to UNDEF, which allows
4397      * us to pull this check before the CheckMOPSEnabled() test
4398      * (which we do in the helper function)
4399      */
4400     if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd ||
4401         a->rd == 31 || a->rn == 31) {
4402         return false;
4403     }
4404 
4405     memidx = get_a64_user_mem_index(s, a->unpriv);
4406 
4407     /*
4408      * We pass option_a == true, matching our implementation;
4409      * we pass wrong_option == false: helper function may set that bit.
4410      */
4411     syndrome = syn_mop(true, is_setg, (a->nontemp << 1) | a->unpriv,
4412                        is_epilogue, false, true, a->rd, a->rs, a->rn);
4413 
4414     if (is_setg ? s->ata[a->unpriv] : s->mte_active[a->unpriv]) {
4415         /* We may need to do MTE tag checking, so assemble the descriptor */
4416         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
4417         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
4418         desc = FIELD_DP32(desc, MTEDESC, WRITE, true);
4419         /* SIZEM1 and ALIGN we leave 0 (byte write) */
4420     }
4421     /* The helper function always needs the memidx even with MTE disabled */
4422     desc = FIELD_DP32(desc, MTEDESC, MIDX, memidx);
4423 
4424     /*
4425      * The helper needs the register numbers, but since they're in
4426      * the syndrome anyway, we let it extract them from there rather
4427      * than passing in an extra three integer arguments.
4428      */
4429     fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(desc));
4430     return true;
4431 }
4432 
4433 TRANS_FEAT(SETP, aa64_mops, do_SET, a, false, false, gen_helper_setp)
4434 TRANS_FEAT(SETM, aa64_mops, do_SET, a, false, false, gen_helper_setm)
4435 TRANS_FEAT(SETE, aa64_mops, do_SET, a, true, false, gen_helper_sete)
4436 TRANS_FEAT(SETGP, aa64_mops, do_SET, a, false, true, gen_helper_setgp)
4437 TRANS_FEAT(SETGM, aa64_mops, do_SET, a, false, true, gen_helper_setgm)
4438 TRANS_FEAT(SETGE, aa64_mops, do_SET, a, true, true, gen_helper_setge)
4439 
4440 typedef void CpyFn(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32);
4441 
do_CPY(DisasContext * s,arg_cpy * a,bool is_epilogue,CpyFn fn)4442 static bool do_CPY(DisasContext *s, arg_cpy *a, bool is_epilogue, CpyFn fn)
4443 {
4444     int rmemidx, wmemidx;
4445     uint32_t syndrome, rdesc = 0, wdesc = 0;
4446     bool wunpriv = extract32(a->options, 0, 1);
4447     bool runpriv = extract32(a->options, 1, 1);
4448 
4449     /*
4450      * UNPREDICTABLE cases: we choose to UNDEF, which allows
4451      * us to pull this check before the CheckMOPSEnabled() test
4452      * (which we do in the helper function)
4453      */
4454     if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd ||
4455         a->rd == 31 || a->rs == 31 || a->rn == 31) {
4456         return false;
4457     }
4458 
4459     rmemidx = get_a64_user_mem_index(s, runpriv);
4460     wmemidx = get_a64_user_mem_index(s, wunpriv);
4461 
4462     /*
4463      * We pass option_a == true, matching our implementation;
4464      * we pass wrong_option == false: helper function may set that bit.
4465      */
4466     syndrome = syn_mop(false, false, a->options, is_epilogue,
4467                        false, true, a->rd, a->rs, a->rn);
4468 
4469     /* If we need to do MTE tag checking, assemble the descriptors */
4470     if (s->mte_active[runpriv]) {
4471         rdesc = FIELD_DP32(rdesc, MTEDESC, TBI, s->tbid);
4472         rdesc = FIELD_DP32(rdesc, MTEDESC, TCMA, s->tcma);
4473     }
4474     if (s->mte_active[wunpriv]) {
4475         wdesc = FIELD_DP32(wdesc, MTEDESC, TBI, s->tbid);
4476         wdesc = FIELD_DP32(wdesc, MTEDESC, TCMA, s->tcma);
4477         wdesc = FIELD_DP32(wdesc, MTEDESC, WRITE, true);
4478     }
4479     /* The helper function needs these parts of the descriptor regardless */
4480     rdesc = FIELD_DP32(rdesc, MTEDESC, MIDX, rmemidx);
4481     wdesc = FIELD_DP32(wdesc, MTEDESC, MIDX, wmemidx);
4482 
4483     /*
4484      * The helper needs the register numbers, but since they're in
4485      * the syndrome anyway, we let it extract them from there rather
4486      * than passing in an extra three integer arguments.
4487      */
4488     fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(wdesc),
4489        tcg_constant_i32(rdesc));
4490     return true;
4491 }
4492 
4493 TRANS_FEAT(CPYP, aa64_mops, do_CPY, a, false, gen_helper_cpyp)
4494 TRANS_FEAT(CPYM, aa64_mops, do_CPY, a, false, gen_helper_cpym)
4495 TRANS_FEAT(CPYE, aa64_mops, do_CPY, a, true, gen_helper_cpye)
4496 TRANS_FEAT(CPYFP, aa64_mops, do_CPY, a, false, gen_helper_cpyfp)
4497 TRANS_FEAT(CPYFM, aa64_mops, do_CPY, a, false, gen_helper_cpyfm)
4498 TRANS_FEAT(CPYFE, aa64_mops, do_CPY, a, true, gen_helper_cpyfe)
4499 
4500 typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64);
4501 
gen_rri(DisasContext * s,arg_rri_sf * a,bool rd_sp,bool rn_sp,ArithTwoOp * fn)4502 static bool gen_rri(DisasContext *s, arg_rri_sf *a,
4503                     bool rd_sp, bool rn_sp, ArithTwoOp *fn)
4504 {
4505     TCGv_i64 tcg_rn = rn_sp ? cpu_reg_sp(s, a->rn) : cpu_reg(s, a->rn);
4506     TCGv_i64 tcg_rd = rd_sp ? cpu_reg_sp(s, a->rd) : cpu_reg(s, a->rd);
4507     TCGv_i64 tcg_imm = tcg_constant_i64(a->imm);
4508 
4509     fn(tcg_rd, tcg_rn, tcg_imm);
4510     if (!a->sf) {
4511         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4512     }
4513     return true;
4514 }
4515 
4516 /*
4517  * PC-rel. addressing
4518  */
4519 
trans_ADR(DisasContext * s,arg_ri * a)4520 static bool trans_ADR(DisasContext *s, arg_ri *a)
4521 {
4522     gen_pc_plus_diff(s, cpu_reg(s, a->rd), a->imm);
4523     return true;
4524 }
4525 
trans_ADRP(DisasContext * s,arg_ri * a)4526 static bool trans_ADRP(DisasContext *s, arg_ri *a)
4527 {
4528     int64_t offset = (int64_t)a->imm << 12;
4529 
4530     /* The page offset is ok for CF_PCREL. */
4531     offset -= s->pc_curr & 0xfff;
4532     gen_pc_plus_diff(s, cpu_reg(s, a->rd), offset);
4533     return true;
4534 }
4535 
4536 /*
4537  * Add/subtract (immediate)
4538  */
4539 TRANS(ADD_i, gen_rri, a, 1, 1, tcg_gen_add_i64)
4540 TRANS(SUB_i, gen_rri, a, 1, 1, tcg_gen_sub_i64)
4541 TRANS(ADDS_i, gen_rri, a, 0, 1, a->sf ? gen_add64_CC : gen_add32_CC)
4542 TRANS(SUBS_i, gen_rri, a, 0, 1, a->sf ? gen_sub64_CC : gen_sub32_CC)
4543 
4544 /*
4545  * Add/subtract (immediate, with tags)
4546  */
4547 
gen_add_sub_imm_with_tags(DisasContext * s,arg_rri_tag * a,bool sub_op)4548 static bool gen_add_sub_imm_with_tags(DisasContext *s, arg_rri_tag *a,
4549                                       bool sub_op)
4550 {
4551     TCGv_i64 tcg_rn, tcg_rd;
4552     int imm;
4553 
4554     imm = a->uimm6 << LOG2_TAG_GRANULE;
4555     if (sub_op) {
4556         imm = -imm;
4557     }
4558 
4559     tcg_rn = cpu_reg_sp(s, a->rn);
4560     tcg_rd = cpu_reg_sp(s, a->rd);
4561 
4562     if (s->ata[0]) {
4563         gen_helper_addsubg(tcg_rd, tcg_env, tcg_rn,
4564                            tcg_constant_i32(imm),
4565                            tcg_constant_i32(a->uimm4));
4566     } else {
4567         tcg_gen_addi_i64(tcg_rd, tcg_rn, imm);
4568         gen_address_with_allocation_tag0(tcg_rd, tcg_rd);
4569     }
4570     return true;
4571 }
4572 
TRANS_FEAT(ADDG_i,aa64_mte_insn_reg,gen_add_sub_imm_with_tags,a,false)4573 TRANS_FEAT(ADDG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, false)
4574 TRANS_FEAT(SUBG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, true)
4575 
4576 /* The input should be a value in the bottom e bits (with higher
4577  * bits zero); returns that value replicated into every element
4578  * of size e in a 64 bit integer.
4579  */
4580 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
4581 {
4582     assert(e != 0);
4583     while (e < 64) {
4584         mask |= mask << e;
4585         e *= 2;
4586     }
4587     return mask;
4588 }
4589 
4590 /*
4591  * Logical (immediate)
4592  */
4593 
4594 /*
4595  * Simplified variant of pseudocode DecodeBitMasks() for the case where we
4596  * only require the wmask. Returns false if the imms/immr/immn are a reserved
4597  * value (ie should cause a guest UNDEF exception), and true if they are
4598  * valid, in which case the decoded bit pattern is written to result.
4599  */
logic_imm_decode_wmask(uint64_t * result,unsigned int immn,unsigned int imms,unsigned int immr)4600 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
4601                             unsigned int imms, unsigned int immr)
4602 {
4603     uint64_t mask;
4604     unsigned e, levels, s, r;
4605     int len;
4606 
4607     assert(immn < 2 && imms < 64 && immr < 64);
4608 
4609     /* The bit patterns we create here are 64 bit patterns which
4610      * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
4611      * 64 bits each. Each element contains the same value: a run
4612      * of between 1 and e-1 non-zero bits, rotated within the
4613      * element by between 0 and e-1 bits.
4614      *
4615      * The element size and run length are encoded into immn (1 bit)
4616      * and imms (6 bits) as follows:
4617      * 64 bit elements: immn = 1, imms = <length of run - 1>
4618      * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
4619      * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
4620      *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
4621      *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
4622      *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
4623      * Notice that immn = 0, imms = 11111x is the only combination
4624      * not covered by one of the above options; this is reserved.
4625      * Further, <length of run - 1> all-ones is a reserved pattern.
4626      *
4627      * In all cases the rotation is by immr % e (and immr is 6 bits).
4628      */
4629 
4630     /* First determine the element size */
4631     len = 31 - clz32((immn << 6) | (~imms & 0x3f));
4632     if (len < 1) {
4633         /* This is the immn == 0, imms == 0x11111x case */
4634         return false;
4635     }
4636     e = 1 << len;
4637 
4638     levels = e - 1;
4639     s = imms & levels;
4640     r = immr & levels;
4641 
4642     if (s == levels) {
4643         /* <length of run - 1> mustn't be all-ones. */
4644         return false;
4645     }
4646 
4647     /* Create the value of one element: s+1 set bits rotated
4648      * by r within the element (which is e bits wide)...
4649      */
4650     mask = MAKE_64BIT_MASK(0, s + 1);
4651     if (r) {
4652         mask = (mask >> r) | (mask << (e - r));
4653         mask &= MAKE_64BIT_MASK(0, e);
4654     }
4655     /* ...then replicate the element over the whole 64 bit value */
4656     mask = bitfield_replicate(mask, e);
4657     *result = mask;
4658     return true;
4659 }
4660 
gen_rri_log(DisasContext * s,arg_rri_log * a,bool set_cc,void (* fn)(TCGv_i64,TCGv_i64,int64_t))4661 static bool gen_rri_log(DisasContext *s, arg_rri_log *a, bool set_cc,
4662                         void (*fn)(TCGv_i64, TCGv_i64, int64_t))
4663 {
4664     TCGv_i64 tcg_rd, tcg_rn;
4665     uint64_t imm;
4666 
4667     /* Some immediate field values are reserved. */
4668     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
4669                                 extract32(a->dbm, 0, 6),
4670                                 extract32(a->dbm, 6, 6))) {
4671         return false;
4672     }
4673     if (!a->sf) {
4674         imm &= 0xffffffffull;
4675     }
4676 
4677     tcg_rd = set_cc ? cpu_reg(s, a->rd) : cpu_reg_sp(s, a->rd);
4678     tcg_rn = cpu_reg(s, a->rn);
4679 
4680     fn(tcg_rd, tcg_rn, imm);
4681     if (set_cc) {
4682         gen_logic_CC(a->sf, tcg_rd);
4683     }
4684     if (!a->sf) {
4685         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4686     }
4687     return true;
4688 }
4689 
TRANS(AND_i,gen_rri_log,a,false,tcg_gen_andi_i64)4690 TRANS(AND_i, gen_rri_log, a, false, tcg_gen_andi_i64)
4691 TRANS(ORR_i, gen_rri_log, a, false, tcg_gen_ori_i64)
4692 TRANS(EOR_i, gen_rri_log, a, false, tcg_gen_xori_i64)
4693 TRANS(ANDS_i, gen_rri_log, a, true, tcg_gen_andi_i64)
4694 
4695 /*
4696  * Move wide (immediate)
4697  */
4698 
4699 static bool trans_MOVZ(DisasContext *s, arg_movw *a)
4700 {
4701     int pos = a->hw << 4;
4702     tcg_gen_movi_i64(cpu_reg(s, a->rd), (uint64_t)a->imm << pos);
4703     return true;
4704 }
4705 
trans_MOVN(DisasContext * s,arg_movw * a)4706 static bool trans_MOVN(DisasContext *s, arg_movw *a)
4707 {
4708     int pos = a->hw << 4;
4709     uint64_t imm = a->imm;
4710 
4711     imm = ~(imm << pos);
4712     if (!a->sf) {
4713         imm = (uint32_t)imm;
4714     }
4715     tcg_gen_movi_i64(cpu_reg(s, a->rd), imm);
4716     return true;
4717 }
4718 
trans_MOVK(DisasContext * s,arg_movw * a)4719 static bool trans_MOVK(DisasContext *s, arg_movw *a)
4720 {
4721     int pos = a->hw << 4;
4722     TCGv_i64 tcg_rd, tcg_im;
4723 
4724     tcg_rd = cpu_reg(s, a->rd);
4725     tcg_im = tcg_constant_i64(a->imm);
4726     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_im, pos, 16);
4727     if (!a->sf) {
4728         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4729     }
4730     return true;
4731 }
4732 
4733 /*
4734  * Bitfield
4735  */
4736 
trans_SBFM(DisasContext * s,arg_SBFM * a)4737 static bool trans_SBFM(DisasContext *s, arg_SBFM *a)
4738 {
4739     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4740     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4741     unsigned int bitsize = a->sf ? 64 : 32;
4742     unsigned int ri = a->immr;
4743     unsigned int si = a->imms;
4744     unsigned int pos, len;
4745 
4746     if (si >= ri) {
4747         /* Wd<s-r:0> = Wn<s:r> */
4748         len = (si - ri) + 1;
4749         tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
4750         if (!a->sf) {
4751             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4752         }
4753     } else {
4754         /* Wd<32+s-r,32-r> = Wn<s:0> */
4755         len = si + 1;
4756         pos = (bitsize - ri) & (bitsize - 1);
4757 
4758         if (len < ri) {
4759             /*
4760              * Sign extend the destination field from len to fill the
4761              * balance of the word.  Let the deposit below insert all
4762              * of those sign bits.
4763              */
4764             tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
4765             len = ri;
4766         }
4767 
4768         /*
4769          * We start with zero, and we haven't modified any bits outside
4770          * bitsize, therefore no final zero-extension is unneeded for !sf.
4771          */
4772         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4773     }
4774     return true;
4775 }
4776 
trans_UBFM(DisasContext * s,arg_UBFM * a)4777 static bool trans_UBFM(DisasContext *s, arg_UBFM *a)
4778 {
4779     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4780     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4781     unsigned int bitsize = a->sf ? 64 : 32;
4782     unsigned int ri = a->immr;
4783     unsigned int si = a->imms;
4784     unsigned int pos, len;
4785 
4786     tcg_rd = cpu_reg(s, a->rd);
4787     tcg_tmp = read_cpu_reg(s, a->rn, 1);
4788 
4789     if (si >= ri) {
4790         /* Wd<s-r:0> = Wn<s:r> */
4791         len = (si - ri) + 1;
4792         tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
4793     } else {
4794         /* Wd<32+s-r,32-r> = Wn<s:0> */
4795         len = si + 1;
4796         pos = (bitsize - ri) & (bitsize - 1);
4797         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4798     }
4799     return true;
4800 }
4801 
trans_BFM(DisasContext * s,arg_BFM * a)4802 static bool trans_BFM(DisasContext *s, arg_BFM *a)
4803 {
4804     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4805     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4806     unsigned int bitsize = a->sf ? 64 : 32;
4807     unsigned int ri = a->immr;
4808     unsigned int si = a->imms;
4809     unsigned int pos, len;
4810 
4811     tcg_rd = cpu_reg(s, a->rd);
4812     tcg_tmp = read_cpu_reg(s, a->rn, 1);
4813 
4814     if (si >= ri) {
4815         /* Wd<s-r:0> = Wn<s:r> */
4816         tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
4817         len = (si - ri) + 1;
4818         pos = 0;
4819     } else {
4820         /* Wd<32+s-r,32-r> = Wn<s:0> */
4821         len = si + 1;
4822         pos = (bitsize - ri) & (bitsize - 1);
4823     }
4824 
4825     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
4826     if (!a->sf) {
4827         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4828     }
4829     return true;
4830 }
4831 
trans_EXTR(DisasContext * s,arg_extract * a)4832 static bool trans_EXTR(DisasContext *s, arg_extract *a)
4833 {
4834     TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
4835 
4836     tcg_rd = cpu_reg(s, a->rd);
4837 
4838     if (unlikely(a->imm == 0)) {
4839         /*
4840          * tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
4841          * so an extract from bit 0 is a special case.
4842          */
4843         if (a->sf) {
4844             tcg_gen_mov_i64(tcg_rd, cpu_reg(s, a->rm));
4845         } else {
4846             tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, a->rm));
4847         }
4848     } else {
4849         tcg_rm = cpu_reg(s, a->rm);
4850         tcg_rn = cpu_reg(s, a->rn);
4851 
4852         if (a->sf) {
4853             /* Specialization to ROR happens in EXTRACT2.  */
4854             tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, a->imm);
4855         } else {
4856             TCGv_i32 t0 = tcg_temp_new_i32();
4857 
4858             tcg_gen_extrl_i64_i32(t0, tcg_rm);
4859             if (a->rm == a->rn) {
4860                 tcg_gen_rotri_i32(t0, t0, a->imm);
4861             } else {
4862                 TCGv_i32 t1 = tcg_temp_new_i32();
4863                 tcg_gen_extrl_i64_i32(t1, tcg_rn);
4864                 tcg_gen_extract2_i32(t0, t0, t1, a->imm);
4865             }
4866             tcg_gen_extu_i32_i64(tcg_rd, t0);
4867         }
4868     }
4869     return true;
4870 }
4871 
trans_TBL_TBX(DisasContext * s,arg_TBL_TBX * a)4872 static bool trans_TBL_TBX(DisasContext *s, arg_TBL_TBX *a)
4873 {
4874     if (fp_access_check(s)) {
4875         int len = (a->len + 1) * 16;
4876 
4877         tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
4878                            vec_full_reg_offset(s, a->rm), tcg_env,
4879                            a->q ? 16 : 8, vec_full_reg_size(s),
4880                            (len << 6) | (a->tbx << 5) | a->rn,
4881                            gen_helper_simd_tblx);
4882     }
4883     return true;
4884 }
4885 
4886 typedef int simd_permute_idx_fn(int i, int part, int elements);
4887 
do_simd_permute(DisasContext * s,arg_qrrr_e * a,simd_permute_idx_fn * fn,int part)4888 static bool do_simd_permute(DisasContext *s, arg_qrrr_e *a,
4889                             simd_permute_idx_fn *fn, int part)
4890 {
4891     MemOp esz = a->esz;
4892     int datasize = a->q ? 16 : 8;
4893     int elements = datasize >> esz;
4894     TCGv_i64 tcg_res[2], tcg_ele;
4895 
4896     if (esz == MO_64 && !a->q) {
4897         return false;
4898     }
4899     if (!fp_access_check(s)) {
4900         return true;
4901     }
4902 
4903     tcg_res[0] = tcg_temp_new_i64();
4904     tcg_res[1] = a->q ? tcg_temp_new_i64() : NULL;
4905     tcg_ele = tcg_temp_new_i64();
4906 
4907     for (int i = 0; i < elements; i++) {
4908         int o, w, idx;
4909 
4910         idx = fn(i, part, elements);
4911         read_vec_element(s, tcg_ele, (idx & elements ? a->rm : a->rn),
4912                          idx & (elements - 1), esz);
4913 
4914         w = (i << (esz + 3)) / 64;
4915         o = (i << (esz + 3)) % 64;
4916         if (o == 0) {
4917             tcg_gen_mov_i64(tcg_res[w], tcg_ele);
4918         } else {
4919             tcg_gen_deposit_i64(tcg_res[w], tcg_res[w], tcg_ele, o, 8 << esz);
4920         }
4921     }
4922 
4923     for (int i = a->q; i >= 0; --i) {
4924         write_vec_element(s, tcg_res[i], a->rd, i, MO_64);
4925     }
4926     clear_vec_high(s, a->q, a->rd);
4927     return true;
4928 }
4929 
permute_load_uzp(int i,int part,int elements)4930 static int permute_load_uzp(int i, int part, int elements)
4931 {
4932     return 2 * i + part;
4933 }
4934 
4935 TRANS(UZP1, do_simd_permute, a, permute_load_uzp, 0)
4936 TRANS(UZP2, do_simd_permute, a, permute_load_uzp, 1)
4937 
permute_load_trn(int i,int part,int elements)4938 static int permute_load_trn(int i, int part, int elements)
4939 {
4940     return (i & 1) * elements + (i & ~1) + part;
4941 }
4942 
4943 TRANS(TRN1, do_simd_permute, a, permute_load_trn, 0)
4944 TRANS(TRN2, do_simd_permute, a, permute_load_trn, 1)
4945 
permute_load_zip(int i,int part,int elements)4946 static int permute_load_zip(int i, int part, int elements)
4947 {
4948     return (i & 1) * elements + ((part * elements + i) >> 1);
4949 }
4950 
4951 TRANS(ZIP1, do_simd_permute, a, permute_load_zip, 0)
4952 TRANS(ZIP2, do_simd_permute, a, permute_load_zip, 1)
4953 
4954 /*
4955  * Cryptographic AES, SHA, SHA512
4956  */
4957 
4958 TRANS_FEAT(AESE, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aese)
4959 TRANS_FEAT(AESD, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aesd)
4960 TRANS_FEAT(AESMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesmc)
4961 TRANS_FEAT(AESIMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesimc)
4962 
4963 TRANS_FEAT(SHA1C, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1c)
4964 TRANS_FEAT(SHA1P, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1p)
4965 TRANS_FEAT(SHA1M, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1m)
4966 TRANS_FEAT(SHA1SU0, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1su0)
4967 
4968 TRANS_FEAT(SHA256H, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h)
4969 TRANS_FEAT(SHA256H2, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h2)
4970 TRANS_FEAT(SHA256SU1, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256su1)
4971 
4972 TRANS_FEAT(SHA1H, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1h)
4973 TRANS_FEAT(SHA1SU1, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1su1)
4974 TRANS_FEAT(SHA256SU0, aa64_sha256, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha256su0)
4975 
4976 TRANS_FEAT(SHA512H, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h)
4977 TRANS_FEAT(SHA512H2, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h2)
4978 TRANS_FEAT(SHA512SU1, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512su1)
TRANS_FEAT(RAX1,aa64_sha3,do_gvec_fn3,a,gen_gvec_rax1)4979 TRANS_FEAT(RAX1, aa64_sha3, do_gvec_fn3, a, gen_gvec_rax1)
4980 TRANS_FEAT(SM3PARTW1, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw1)
4981 TRANS_FEAT(SM3PARTW2, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw2)
4982 TRANS_FEAT(SM4EKEY, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4ekey)
4983 
4984 TRANS_FEAT(SHA512SU0, aa64_sha512, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha512su0)
4985 TRANS_FEAT(SM4E, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4e)
4986 
4987 TRANS_FEAT(EOR3, aa64_sha3, do_gvec_fn4, a, gen_gvec_eor3)
4988 TRANS_FEAT(BCAX, aa64_sha3, do_gvec_fn4, a, gen_gvec_bcax)
4989 
4990 static bool trans_SM3SS1(DisasContext *s, arg_SM3SS1 *a)
4991 {
4992     if (!dc_isar_feature(aa64_sm3, s)) {
4993         return false;
4994     }
4995     if (fp_access_check(s)) {
4996         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
4997         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
4998         TCGv_i32 tcg_op3 = tcg_temp_new_i32();
4999         TCGv_i32 tcg_res = tcg_temp_new_i32();
5000 
5001         read_vec_element_i32(s, tcg_op1, a->rn, 3, MO_32);
5002         read_vec_element_i32(s, tcg_op2, a->rm, 3, MO_32);
5003         read_vec_element_i32(s, tcg_op3, a->ra, 3, MO_32);
5004 
5005         tcg_gen_rotri_i32(tcg_res, tcg_op1, 20);
5006         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2);
5007         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3);
5008         tcg_gen_rotri_i32(tcg_res, tcg_res, 25);
5009 
5010         /* Clear the whole register first, then store bits [127:96]. */
5011         clear_vec(s, a->rd);
5012         write_vec_element_i32(s, tcg_res, a->rd, 3, MO_32);
5013     }
5014     return true;
5015 }
5016 
do_crypto3i(DisasContext * s,arg_crypto3i * a,gen_helper_gvec_3 * fn)5017 static bool do_crypto3i(DisasContext *s, arg_crypto3i *a, gen_helper_gvec_3 *fn)
5018 {
5019     if (fp_access_check(s)) {
5020         gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->imm, fn);
5021     }
5022     return true;
5023 }
TRANS_FEAT(SM3TT1A,aa64_sm3,do_crypto3i,a,gen_helper_crypto_sm3tt1a)5024 TRANS_FEAT(SM3TT1A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1a)
5025 TRANS_FEAT(SM3TT1B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1b)
5026 TRANS_FEAT(SM3TT2A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2a)
5027 TRANS_FEAT(SM3TT2B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2b)
5028 
5029 static bool trans_XAR(DisasContext *s, arg_XAR *a)
5030 {
5031     if (!dc_isar_feature(aa64_sha3, s)) {
5032         return false;
5033     }
5034     if (fp_access_check(s)) {
5035         gen_gvec_xar(MO_64, vec_full_reg_offset(s, a->rd),
5036                      vec_full_reg_offset(s, a->rn),
5037                      vec_full_reg_offset(s, a->rm), a->imm, 16,
5038                      vec_full_reg_size(s));
5039     }
5040     return true;
5041 }
5042 
5043 /*
5044  * Advanced SIMD copy
5045  */
5046 
decode_esz_idx(int imm,MemOp * pesz,unsigned * pidx)5047 static bool decode_esz_idx(int imm, MemOp *pesz, unsigned *pidx)
5048 {
5049     unsigned esz = ctz32(imm);
5050     if (esz <= MO_64) {
5051         *pesz = esz;
5052         *pidx = imm >> (esz + 1);
5053         return true;
5054     }
5055     return false;
5056 }
5057 
trans_DUP_element_s(DisasContext * s,arg_DUP_element_s * a)5058 static bool trans_DUP_element_s(DisasContext *s, arg_DUP_element_s *a)
5059 {
5060     MemOp esz;
5061     unsigned idx;
5062 
5063     if (!decode_esz_idx(a->imm, &esz, &idx)) {
5064         return false;
5065     }
5066     if (fp_access_check(s)) {
5067         /*
5068          * This instruction just extracts the specified element and
5069          * zero-extends it into the bottom of the destination register.
5070          */
5071         TCGv_i64 tmp = tcg_temp_new_i64();
5072         read_vec_element(s, tmp, a->rn, idx, esz);
5073         write_fp_dreg(s, a->rd, tmp);
5074     }
5075     return true;
5076 }
5077 
trans_DUP_element_v(DisasContext * s,arg_DUP_element_v * a)5078 static bool trans_DUP_element_v(DisasContext *s, arg_DUP_element_v *a)
5079 {
5080     MemOp esz;
5081     unsigned idx;
5082 
5083     if (!decode_esz_idx(a->imm, &esz, &idx)) {
5084         return false;
5085     }
5086     if (esz == MO_64 && !a->q) {
5087         return false;
5088     }
5089     if (fp_access_check(s)) {
5090         tcg_gen_gvec_dup_mem(esz, vec_full_reg_offset(s, a->rd),
5091                              vec_reg_offset(s, a->rn, idx, esz),
5092                              a->q ? 16 : 8, vec_full_reg_size(s));
5093     }
5094     return true;
5095 }
5096 
trans_DUP_general(DisasContext * s,arg_DUP_general * a)5097 static bool trans_DUP_general(DisasContext *s, arg_DUP_general *a)
5098 {
5099     MemOp esz;
5100     unsigned idx;
5101 
5102     if (!decode_esz_idx(a->imm, &esz, &idx)) {
5103         return false;
5104     }
5105     if (esz == MO_64 && !a->q) {
5106         return false;
5107     }
5108     if (fp_access_check(s)) {
5109         tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
5110                              a->q ? 16 : 8, vec_full_reg_size(s),
5111                              cpu_reg(s, a->rn));
5112     }
5113     return true;
5114 }
5115 
do_smov_umov(DisasContext * s,arg_SMOV * a,MemOp is_signed)5116 static bool do_smov_umov(DisasContext *s, arg_SMOV *a, MemOp is_signed)
5117 {
5118     MemOp esz;
5119     unsigned idx;
5120 
5121     if (!decode_esz_idx(a->imm, &esz, &idx)) {
5122         return false;
5123     }
5124     if (is_signed) {
5125         if (esz == MO_64 || (esz == MO_32 && !a->q)) {
5126             return false;
5127         }
5128     } else {
5129         if (esz == MO_64 ? !a->q : a->q) {
5130             return false;
5131         }
5132     }
5133     if (fp_access_check(s)) {
5134         TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
5135         read_vec_element(s, tcg_rd, a->rn, idx, esz | is_signed);
5136         if (is_signed && !a->q) {
5137             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5138         }
5139     }
5140     return true;
5141 }
5142 
TRANS(SMOV,do_smov_umov,a,MO_SIGN)5143 TRANS(SMOV, do_smov_umov, a, MO_SIGN)
5144 TRANS(UMOV, do_smov_umov, a, 0)
5145 
5146 static bool trans_INS_general(DisasContext *s, arg_INS_general *a)
5147 {
5148     MemOp esz;
5149     unsigned idx;
5150 
5151     if (!decode_esz_idx(a->imm, &esz, &idx)) {
5152         return false;
5153     }
5154     if (fp_access_check(s)) {
5155         write_vec_element(s, cpu_reg(s, a->rn), a->rd, idx, esz);
5156         clear_vec_high(s, true, a->rd);
5157     }
5158     return true;
5159 }
5160 
trans_INS_element(DisasContext * s,arg_INS_element * a)5161 static bool trans_INS_element(DisasContext *s, arg_INS_element *a)
5162 {
5163     MemOp esz;
5164     unsigned didx, sidx;
5165 
5166     if (!decode_esz_idx(a->di, &esz, &didx)) {
5167         return false;
5168     }
5169     sidx = a->si >> esz;
5170     if (fp_access_check(s)) {
5171         TCGv_i64 tmp = tcg_temp_new_i64();
5172 
5173         read_vec_element(s, tmp, a->rn, sidx, esz);
5174         write_vec_element(s, tmp, a->rd, didx, esz);
5175 
5176         /* INS is considered a 128-bit write for SVE. */
5177         clear_vec_high(s, true, a->rd);
5178     }
5179     return true;
5180 }
5181 
5182 /*
5183  * Advanced SIMD three same
5184  */
5185 
5186 typedef struct FPScalar {
5187     void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
5188     void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
5189     void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
5190 } FPScalar;
5191 
do_fp3_scalar_with_fpsttype(DisasContext * s,arg_rrr_e * a,const FPScalar * f,int mergereg,ARMFPStatusFlavour fpsttype)5192 static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a,
5193                                         const FPScalar *f, int mergereg,
5194                                         ARMFPStatusFlavour fpsttype)
5195 {
5196     switch (a->esz) {
5197     case MO_64:
5198         if (fp_access_check(s)) {
5199             TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5200             TCGv_i64 t1 = read_fp_dreg(s, a->rm);
5201             f->gen_d(t0, t0, t1, fpstatus_ptr(fpsttype));
5202             write_fp_dreg_merging(s, a->rd, mergereg, t0);
5203         }
5204         break;
5205     case MO_32:
5206         if (fp_access_check(s)) {
5207             TCGv_i32 t0 = read_fp_sreg(s, a->rn);
5208             TCGv_i32 t1 = read_fp_sreg(s, a->rm);
5209             f->gen_s(t0, t0, t1, fpstatus_ptr(fpsttype));
5210             write_fp_sreg_merging(s, a->rd, mergereg, t0);
5211         }
5212         break;
5213     case MO_16:
5214         if (!dc_isar_feature(aa64_fp16, s)) {
5215             return false;
5216         }
5217         if (fp_access_check(s)) {
5218             TCGv_i32 t0 = read_fp_hreg(s, a->rn);
5219             TCGv_i32 t1 = read_fp_hreg(s, a->rm);
5220             f->gen_h(t0, t0, t1, fpstatus_ptr(fpsttype));
5221             write_fp_hreg_merging(s, a->rd, mergereg, t0);
5222         }
5223         break;
5224     default:
5225         return false;
5226     }
5227     return true;
5228 }
5229 
do_fp3_scalar(DisasContext * s,arg_rrr_e * a,const FPScalar * f,int mergereg)5230 static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f,
5231                           int mergereg)
5232 {
5233     return do_fp3_scalar_with_fpsttype(s, a, f, mergereg,
5234                                        a->esz == MO_16 ?
5235                                        FPST_A64_F16 : FPST_A64);
5236 }
5237 
do_fp3_scalar_ah_2fn(DisasContext * s,arg_rrr_e * a,const FPScalar * fnormal,const FPScalar * fah,int mergereg)5238 static bool do_fp3_scalar_ah_2fn(DisasContext *s, arg_rrr_e *a,
5239                                  const FPScalar *fnormal, const FPScalar *fah,
5240                                  int mergereg)
5241 {
5242     return do_fp3_scalar_with_fpsttype(s, a, s->fpcr_ah ? fah : fnormal,
5243                                        mergereg, select_ah_fpst(s, a->esz));
5244 }
5245 
5246 /* Some insns need to call different helpers when FPCR.AH == 1 */
do_fp3_scalar_2fn(DisasContext * s,arg_rrr_e * a,const FPScalar * fnormal,const FPScalar * fah,int mergereg)5247 static bool do_fp3_scalar_2fn(DisasContext *s, arg_rrr_e *a,
5248                               const FPScalar *fnormal,
5249                               const FPScalar *fah,
5250                               int mergereg)
5251 {
5252     return do_fp3_scalar(s, a, s->fpcr_ah ? fah : fnormal, mergereg);
5253 }
5254 
5255 static const FPScalar f_scalar_fadd = {
5256     gen_helper_vfp_addh,
5257     gen_helper_vfp_adds,
5258     gen_helper_vfp_addd,
5259 };
5260 TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd, a->rn)
5261 
5262 static const FPScalar f_scalar_fsub = {
5263     gen_helper_vfp_subh,
5264     gen_helper_vfp_subs,
5265     gen_helper_vfp_subd,
5266 };
5267 TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub, a->rn)
5268 
5269 static const FPScalar f_scalar_fdiv = {
5270     gen_helper_vfp_divh,
5271     gen_helper_vfp_divs,
5272     gen_helper_vfp_divd,
5273 };
5274 TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv, a->rn)
5275 
5276 static const FPScalar f_scalar_fmul = {
5277     gen_helper_vfp_mulh,
5278     gen_helper_vfp_muls,
5279     gen_helper_vfp_muld,
5280 };
5281 TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul, a->rn)
5282 
5283 static const FPScalar f_scalar_fmax = {
5284     gen_helper_vfp_maxh,
5285     gen_helper_vfp_maxs,
5286     gen_helper_vfp_maxd,
5287 };
5288 static const FPScalar f_scalar_fmax_ah = {
5289     gen_helper_vfp_ah_maxh,
5290     gen_helper_vfp_ah_maxs,
5291     gen_helper_vfp_ah_maxd,
5292 };
5293 TRANS(FMAX_s, do_fp3_scalar_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah, a->rn)
5294 
5295 static const FPScalar f_scalar_fmin = {
5296     gen_helper_vfp_minh,
5297     gen_helper_vfp_mins,
5298     gen_helper_vfp_mind,
5299 };
5300 static const FPScalar f_scalar_fmin_ah = {
5301     gen_helper_vfp_ah_minh,
5302     gen_helper_vfp_ah_mins,
5303     gen_helper_vfp_ah_mind,
5304 };
5305 TRANS(FMIN_s, do_fp3_scalar_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah, a->rn)
5306 
5307 static const FPScalar f_scalar_fmaxnm = {
5308     gen_helper_vfp_maxnumh,
5309     gen_helper_vfp_maxnums,
5310     gen_helper_vfp_maxnumd,
5311 };
5312 TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm, a->rn)
5313 
5314 static const FPScalar f_scalar_fminnm = {
5315     gen_helper_vfp_minnumh,
5316     gen_helper_vfp_minnums,
5317     gen_helper_vfp_minnumd,
5318 };
5319 TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm, a->rn)
5320 
5321 static const FPScalar f_scalar_fmulx = {
5322     gen_helper_advsimd_mulxh,
5323     gen_helper_vfp_mulxs,
5324     gen_helper_vfp_mulxd,
5325 };
5326 TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx, a->rn)
5327 
gen_fnmul_h(TCGv_i32 d,TCGv_i32 n,TCGv_i32 m,TCGv_ptr s)5328 static void gen_fnmul_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5329 {
5330     gen_helper_vfp_mulh(d, n, m, s);
5331     gen_vfp_negh(d, d);
5332 }
5333 
gen_fnmul_s(TCGv_i32 d,TCGv_i32 n,TCGv_i32 m,TCGv_ptr s)5334 static void gen_fnmul_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5335 {
5336     gen_helper_vfp_muls(d, n, m, s);
5337     gen_vfp_negs(d, d);
5338 }
5339 
gen_fnmul_d(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m,TCGv_ptr s)5340 static void gen_fnmul_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5341 {
5342     gen_helper_vfp_muld(d, n, m, s);
5343     gen_vfp_negd(d, d);
5344 }
5345 
gen_fnmul_ah_h(TCGv_i32 d,TCGv_i32 n,TCGv_i32 m,TCGv_ptr s)5346 static void gen_fnmul_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5347 {
5348     gen_helper_vfp_mulh(d, n, m, s);
5349     gen_vfp_ah_negh(d, d);
5350 }
5351 
gen_fnmul_ah_s(TCGv_i32 d,TCGv_i32 n,TCGv_i32 m,TCGv_ptr s)5352 static void gen_fnmul_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5353 {
5354     gen_helper_vfp_muls(d, n, m, s);
5355     gen_vfp_ah_negs(d, d);
5356 }
5357 
gen_fnmul_ah_d(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m,TCGv_ptr s)5358 static void gen_fnmul_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5359 {
5360     gen_helper_vfp_muld(d, n, m, s);
5361     gen_vfp_ah_negd(d, d);
5362 }
5363 
5364 static const FPScalar f_scalar_fnmul = {
5365     gen_fnmul_h,
5366     gen_fnmul_s,
5367     gen_fnmul_d,
5368 };
5369 static const FPScalar f_scalar_ah_fnmul = {
5370     gen_fnmul_ah_h,
5371     gen_fnmul_ah_s,
5372     gen_fnmul_ah_d,
5373 };
5374 TRANS(FNMUL_s, do_fp3_scalar_2fn, a, &f_scalar_fnmul, &f_scalar_ah_fnmul, a->rn)
5375 
5376 static const FPScalar f_scalar_fcmeq = {
5377     gen_helper_advsimd_ceq_f16,
5378     gen_helper_neon_ceq_f32,
5379     gen_helper_neon_ceq_f64,
5380 };
5381 TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq, a->rm)
5382 
5383 static const FPScalar f_scalar_fcmge = {
5384     gen_helper_advsimd_cge_f16,
5385     gen_helper_neon_cge_f32,
5386     gen_helper_neon_cge_f64,
5387 };
5388 TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge, a->rm)
5389 
5390 static const FPScalar f_scalar_fcmgt = {
5391     gen_helper_advsimd_cgt_f16,
5392     gen_helper_neon_cgt_f32,
5393     gen_helper_neon_cgt_f64,
5394 };
5395 TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt, a->rm)
5396 
5397 static const FPScalar f_scalar_facge = {
5398     gen_helper_advsimd_acge_f16,
5399     gen_helper_neon_acge_f32,
5400     gen_helper_neon_acge_f64,
5401 };
5402 TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge, a->rm)
5403 
5404 static const FPScalar f_scalar_facgt = {
5405     gen_helper_advsimd_acgt_f16,
5406     gen_helper_neon_acgt_f32,
5407     gen_helper_neon_acgt_f64,
5408 };
5409 TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt, a->rm)
5410 
gen_fabd_h(TCGv_i32 d,TCGv_i32 n,TCGv_i32 m,TCGv_ptr s)5411 static void gen_fabd_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5412 {
5413     gen_helper_vfp_subh(d, n, m, s);
5414     gen_vfp_absh(d, d);
5415 }
5416 
gen_fabd_s(TCGv_i32 d,TCGv_i32 n,TCGv_i32 m,TCGv_ptr s)5417 static void gen_fabd_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5418 {
5419     gen_helper_vfp_subs(d, n, m, s);
5420     gen_vfp_abss(d, d);
5421 }
5422 
gen_fabd_d(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m,TCGv_ptr s)5423 static void gen_fabd_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5424 {
5425     gen_helper_vfp_subd(d, n, m, s);
5426     gen_vfp_absd(d, d);
5427 }
5428 
gen_fabd_ah_h(TCGv_i32 d,TCGv_i32 n,TCGv_i32 m,TCGv_ptr s)5429 static void gen_fabd_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5430 {
5431     gen_helper_vfp_subh(d, n, m, s);
5432     gen_vfp_ah_absh(d, d);
5433 }
5434 
gen_fabd_ah_s(TCGv_i32 d,TCGv_i32 n,TCGv_i32 m,TCGv_ptr s)5435 static void gen_fabd_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5436 {
5437     gen_helper_vfp_subs(d, n, m, s);
5438     gen_vfp_ah_abss(d, d);
5439 }
5440 
gen_fabd_ah_d(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m,TCGv_ptr s)5441 static void gen_fabd_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5442 {
5443     gen_helper_vfp_subd(d, n, m, s);
5444     gen_vfp_ah_absd(d, d);
5445 }
5446 
5447 static const FPScalar f_scalar_fabd = {
5448     gen_fabd_h,
5449     gen_fabd_s,
5450     gen_fabd_d,
5451 };
5452 static const FPScalar f_scalar_ah_fabd = {
5453     gen_fabd_ah_h,
5454     gen_fabd_ah_s,
5455     gen_fabd_ah_d,
5456 };
5457 TRANS(FABD_s, do_fp3_scalar_2fn, a, &f_scalar_fabd, &f_scalar_ah_fabd, a->rn)
5458 
5459 static const FPScalar f_scalar_frecps = {
5460     gen_helper_recpsf_f16,
5461     gen_helper_recpsf_f32,
5462     gen_helper_recpsf_f64,
5463 };
5464 static const FPScalar f_scalar_ah_frecps = {
5465     gen_helper_recpsf_ah_f16,
5466     gen_helper_recpsf_ah_f32,
5467     gen_helper_recpsf_ah_f64,
5468 };
5469 TRANS(FRECPS_s, do_fp3_scalar_ah_2fn, a,
5470       &f_scalar_frecps, &f_scalar_ah_frecps, a->rn)
5471 
5472 static const FPScalar f_scalar_frsqrts = {
5473     gen_helper_rsqrtsf_f16,
5474     gen_helper_rsqrtsf_f32,
5475     gen_helper_rsqrtsf_f64,
5476 };
5477 static const FPScalar f_scalar_ah_frsqrts = {
5478     gen_helper_rsqrtsf_ah_f16,
5479     gen_helper_rsqrtsf_ah_f32,
5480     gen_helper_rsqrtsf_ah_f64,
5481 };
5482 TRANS(FRSQRTS_s, do_fp3_scalar_ah_2fn, a,
5483       &f_scalar_frsqrts, &f_scalar_ah_frsqrts, a->rn)
5484 
do_fcmp0_s(DisasContext * s,arg_rr_e * a,const FPScalar * f,bool swap)5485 static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a,
5486                        const FPScalar *f, bool swap)
5487 {
5488     switch (a->esz) {
5489     case MO_64:
5490         if (fp_access_check(s)) {
5491             TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5492             TCGv_i64 t1 = tcg_constant_i64(0);
5493             if (swap) {
5494                 f->gen_d(t0, t1, t0, fpstatus_ptr(FPST_A64));
5495             } else {
5496                 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
5497             }
5498             write_fp_dreg(s, a->rd, t0);
5499         }
5500         break;
5501     case MO_32:
5502         if (fp_access_check(s)) {
5503             TCGv_i32 t0 = read_fp_sreg(s, a->rn);
5504             TCGv_i32 t1 = tcg_constant_i32(0);
5505             if (swap) {
5506                 f->gen_s(t0, t1, t0, fpstatus_ptr(FPST_A64));
5507             } else {
5508                 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
5509             }
5510             write_fp_sreg(s, a->rd, t0);
5511         }
5512         break;
5513     case MO_16:
5514         if (!dc_isar_feature(aa64_fp16, s)) {
5515             return false;
5516         }
5517         if (fp_access_check(s)) {
5518             TCGv_i32 t0 = read_fp_hreg(s, a->rn);
5519             TCGv_i32 t1 = tcg_constant_i32(0);
5520             if (swap) {
5521                 f->gen_h(t0, t1, t0, fpstatus_ptr(FPST_A64_F16));
5522             } else {
5523                 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
5524             }
5525             write_fp_sreg(s, a->rd, t0);
5526         }
5527         break;
5528     default:
5529         return false;
5530     }
5531     return true;
5532 }
5533 
5534 TRANS(FCMEQ0_s, do_fcmp0_s, a, &f_scalar_fcmeq, false)
5535 TRANS(FCMGT0_s, do_fcmp0_s, a, &f_scalar_fcmgt, false)
5536 TRANS(FCMGE0_s, do_fcmp0_s, a, &f_scalar_fcmge, false)
5537 TRANS(FCMLT0_s, do_fcmp0_s, a, &f_scalar_fcmgt, true)
5538 TRANS(FCMLE0_s, do_fcmp0_s, a, &f_scalar_fcmge, true)
5539 
do_satacc_s(DisasContext * s,arg_rrr_e * a,MemOp sgn_n,MemOp sgn_m,void (* gen_bhs)(TCGv_i64,TCGv_i64,TCGv_i64,TCGv_i64,MemOp),void (* gen_d)(TCGv_i64,TCGv_i64,TCGv_i64,TCGv_i64))5540 static bool do_satacc_s(DisasContext *s, arg_rrr_e *a,
5541                 MemOp sgn_n, MemOp sgn_m,
5542                 void (*gen_bhs)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64, MemOp),
5543                 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
5544 {
5545     TCGv_i64 t0, t1, t2, qc;
5546     MemOp esz = a->esz;
5547 
5548     if (!fp_access_check(s)) {
5549         return true;
5550     }
5551 
5552     t0 = tcg_temp_new_i64();
5553     t1 = tcg_temp_new_i64();
5554     t2 = tcg_temp_new_i64();
5555     qc = tcg_temp_new_i64();
5556     read_vec_element(s, t1, a->rn, 0, esz | sgn_n);
5557     read_vec_element(s, t2, a->rm, 0, esz | sgn_m);
5558     tcg_gen_ld_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc));
5559 
5560     if (esz == MO_64) {
5561         gen_d(t0, qc, t1, t2);
5562     } else {
5563         gen_bhs(t0, qc, t1, t2, esz);
5564         tcg_gen_ext_i64(t0, t0, esz);
5565     }
5566 
5567     write_fp_dreg(s, a->rd, t0);
5568     tcg_gen_st_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc));
5569     return true;
5570 }
5571 
TRANS(SQADD_s,do_satacc_s,a,MO_SIGN,MO_SIGN,gen_sqadd_bhs,gen_sqadd_d)5572 TRANS(SQADD_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqadd_bhs, gen_sqadd_d)
5573 TRANS(SQSUB_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqsub_bhs, gen_sqsub_d)
5574 TRANS(UQADD_s, do_satacc_s, a, 0, 0, gen_uqadd_bhs, gen_uqadd_d)
5575 TRANS(UQSUB_s, do_satacc_s, a, 0, 0, gen_uqsub_bhs, gen_uqsub_d)
5576 TRANS(SUQADD_s, do_satacc_s, a, MO_SIGN, 0, gen_suqadd_bhs, gen_suqadd_d)
5577 TRANS(USQADD_s, do_satacc_s, a, 0, MO_SIGN, gen_usqadd_bhs, gen_usqadd_d)
5578 
5579 static bool do_int3_scalar_d(DisasContext *s, arg_rrr_e *a,
5580                              void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64))
5581 {
5582     if (fp_access_check(s)) {
5583         TCGv_i64 t0 = tcg_temp_new_i64();
5584         TCGv_i64 t1 = tcg_temp_new_i64();
5585 
5586         read_vec_element(s, t0, a->rn, 0, MO_64);
5587         read_vec_element(s, t1, a->rm, 0, MO_64);
5588         fn(t0, t0, t1);
5589         write_fp_dreg(s, a->rd, t0);
5590     }
5591     return true;
5592 }
5593 
5594 TRANS(SSHL_s, do_int3_scalar_d, a, gen_sshl_i64)
5595 TRANS(USHL_s, do_int3_scalar_d, a, gen_ushl_i64)
5596 TRANS(SRSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_s64)
5597 TRANS(URSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_u64)
5598 TRANS(ADD_s, do_int3_scalar_d, a, tcg_gen_add_i64)
5599 TRANS(SUB_s, do_int3_scalar_d, a, tcg_gen_sub_i64)
5600 
5601 typedef struct ENVScalar2 {
5602     NeonGenTwoOpEnvFn *gen_bhs[3];
5603     NeonGenTwo64OpEnvFn *gen_d;
5604 } ENVScalar2;
5605 
do_env_scalar2(DisasContext * s,arg_rrr_e * a,const ENVScalar2 * f)5606 static bool do_env_scalar2(DisasContext *s, arg_rrr_e *a, const ENVScalar2 *f)
5607 {
5608     if (!fp_access_check(s)) {
5609         return true;
5610     }
5611     if (a->esz == MO_64) {
5612         TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5613         TCGv_i64 t1 = read_fp_dreg(s, a->rm);
5614         f->gen_d(t0, tcg_env, t0, t1);
5615         write_fp_dreg(s, a->rd, t0);
5616     } else {
5617         TCGv_i32 t0 = tcg_temp_new_i32();
5618         TCGv_i32 t1 = tcg_temp_new_i32();
5619 
5620         read_vec_element_i32(s, t0, a->rn, 0, a->esz);
5621         read_vec_element_i32(s, t1, a->rm, 0, a->esz);
5622         f->gen_bhs[a->esz](t0, tcg_env, t0, t1);
5623         write_fp_sreg(s, a->rd, t0);
5624     }
5625     return true;
5626 }
5627 
5628 static const ENVScalar2 f_scalar_sqshl = {
5629     { gen_helper_neon_qshl_s8,
5630       gen_helper_neon_qshl_s16,
5631       gen_helper_neon_qshl_s32 },
5632     gen_helper_neon_qshl_s64,
5633 };
5634 TRANS(SQSHL_s, do_env_scalar2, a, &f_scalar_sqshl)
5635 
5636 static const ENVScalar2 f_scalar_uqshl = {
5637     { gen_helper_neon_qshl_u8,
5638       gen_helper_neon_qshl_u16,
5639       gen_helper_neon_qshl_u32 },
5640     gen_helper_neon_qshl_u64,
5641 };
5642 TRANS(UQSHL_s, do_env_scalar2, a, &f_scalar_uqshl)
5643 
5644 static const ENVScalar2 f_scalar_sqrshl = {
5645     { gen_helper_neon_qrshl_s8,
5646       gen_helper_neon_qrshl_s16,
5647       gen_helper_neon_qrshl_s32 },
5648     gen_helper_neon_qrshl_s64,
5649 };
5650 TRANS(SQRSHL_s, do_env_scalar2, a, &f_scalar_sqrshl)
5651 
5652 static const ENVScalar2 f_scalar_uqrshl = {
5653     { gen_helper_neon_qrshl_u8,
5654       gen_helper_neon_qrshl_u16,
5655       gen_helper_neon_qrshl_u32 },
5656     gen_helper_neon_qrshl_u64,
5657 };
5658 TRANS(UQRSHL_s, do_env_scalar2, a, &f_scalar_uqrshl)
5659 
do_env_scalar2_hs(DisasContext * s,arg_rrr_e * a,const ENVScalar2 * f)5660 static bool do_env_scalar2_hs(DisasContext *s, arg_rrr_e *a,
5661                               const ENVScalar2 *f)
5662 {
5663     if (a->esz == MO_16 || a->esz == MO_32) {
5664         return do_env_scalar2(s, a, f);
5665     }
5666     return false;
5667 }
5668 
5669 static const ENVScalar2 f_scalar_sqdmulh = {
5670     { NULL, gen_helper_neon_qdmulh_s16, gen_helper_neon_qdmulh_s32 }
5671 };
5672 TRANS(SQDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqdmulh)
5673 
5674 static const ENVScalar2 f_scalar_sqrdmulh = {
5675     { NULL, gen_helper_neon_qrdmulh_s16, gen_helper_neon_qrdmulh_s32 }
5676 };
5677 TRANS(SQRDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqrdmulh)
5678 
5679 typedef struct ENVScalar3 {
5680     NeonGenThreeOpEnvFn *gen_hs[2];
5681 } ENVScalar3;
5682 
do_env_scalar3_hs(DisasContext * s,arg_rrr_e * a,const ENVScalar3 * f)5683 static bool do_env_scalar3_hs(DisasContext *s, arg_rrr_e *a,
5684                               const ENVScalar3 *f)
5685 {
5686     TCGv_i32 t0, t1, t2;
5687 
5688     if (a->esz != MO_16 && a->esz != MO_32) {
5689         return false;
5690     }
5691     if (!fp_access_check(s)) {
5692         return true;
5693     }
5694 
5695     t0 = tcg_temp_new_i32();
5696     t1 = tcg_temp_new_i32();
5697     t2 = tcg_temp_new_i32();
5698     read_vec_element_i32(s, t0, a->rn, 0, a->esz);
5699     read_vec_element_i32(s, t1, a->rm, 0, a->esz);
5700     read_vec_element_i32(s, t2, a->rd, 0, a->esz);
5701     f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2);
5702     write_fp_sreg(s, a->rd, t0);
5703     return true;
5704 }
5705 
5706 static const ENVScalar3 f_scalar_sqrdmlah = {
5707     { gen_helper_neon_qrdmlah_s16, gen_helper_neon_qrdmlah_s32 }
5708 };
5709 TRANS_FEAT(SQRDMLAH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlah)
5710 
5711 static const ENVScalar3 f_scalar_sqrdmlsh = {
5712     { gen_helper_neon_qrdmlsh_s16, gen_helper_neon_qrdmlsh_s32 }
5713 };
5714 TRANS_FEAT(SQRDMLSH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlsh)
5715 
do_cmop_d(DisasContext * s,arg_rrr_e * a,TCGCond cond)5716 static bool do_cmop_d(DisasContext *s, arg_rrr_e *a, TCGCond cond)
5717 {
5718     if (fp_access_check(s)) {
5719         TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5720         TCGv_i64 t1 = read_fp_dreg(s, a->rm);
5721         tcg_gen_negsetcond_i64(cond, t0, t0, t1);
5722         write_fp_dreg(s, a->rd, t0);
5723     }
5724     return true;
5725 }
5726 
TRANS(CMGT_s,do_cmop_d,a,TCG_COND_GT)5727 TRANS(CMGT_s, do_cmop_d, a, TCG_COND_GT)
5728 TRANS(CMHI_s, do_cmop_d, a, TCG_COND_GTU)
5729 TRANS(CMGE_s, do_cmop_d, a, TCG_COND_GE)
5730 TRANS(CMHS_s, do_cmop_d, a, TCG_COND_GEU)
5731 TRANS(CMEQ_s, do_cmop_d, a, TCG_COND_EQ)
5732 TRANS(CMTST_s, do_cmop_d, a, TCG_COND_TSTNE)
5733 
5734 static bool do_fp3_vector_with_fpsttype(DisasContext *s, arg_qrrr_e *a,
5735                                         int data,
5736                                         gen_helper_gvec_3_ptr * const fns[3],
5737                                         ARMFPStatusFlavour fpsttype)
5738 {
5739     MemOp esz = a->esz;
5740     int check = fp_access_check_vector_hsd(s, a->q, esz);
5741 
5742     if (check <= 0) {
5743         return check == 0;
5744     }
5745 
5746     gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, fpsttype,
5747                       data, fns[esz - 1]);
5748     return true;
5749 }
5750 
do_fp3_vector(DisasContext * s,arg_qrrr_e * a,int data,gen_helper_gvec_3_ptr * const fns[3])5751 static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data,
5752                           gen_helper_gvec_3_ptr * const fns[3])
5753 {
5754     return do_fp3_vector_with_fpsttype(s, a, data, fns,
5755                                        a->esz == MO_16 ?
5756                                        FPST_A64_F16 : FPST_A64);
5757 }
5758 
do_fp3_vector_2fn(DisasContext * s,arg_qrrr_e * a,int data,gen_helper_gvec_3_ptr * const fnormal[3],gen_helper_gvec_3_ptr * const fah[3])5759 static bool do_fp3_vector_2fn(DisasContext *s, arg_qrrr_e *a, int data,
5760                               gen_helper_gvec_3_ptr * const fnormal[3],
5761                               gen_helper_gvec_3_ptr * const fah[3])
5762 {
5763     return do_fp3_vector(s, a, data, s->fpcr_ah ? fah : fnormal);
5764 }
5765 
do_fp3_vector_ah_2fn(DisasContext * s,arg_qrrr_e * a,int data,gen_helper_gvec_3_ptr * const fnormal[3],gen_helper_gvec_3_ptr * const fah[3])5766 static bool do_fp3_vector_ah_2fn(DisasContext *s, arg_qrrr_e *a, int data,
5767                                  gen_helper_gvec_3_ptr * const fnormal[3],
5768                                  gen_helper_gvec_3_ptr * const fah[3])
5769 {
5770     return do_fp3_vector_with_fpsttype(s, a, data, s->fpcr_ah ? fah : fnormal,
5771                                        select_ah_fpst(s, a->esz));
5772 }
5773 
5774 static gen_helper_gvec_3_ptr * const f_vector_fadd[3] = {
5775     gen_helper_gvec_fadd_h,
5776     gen_helper_gvec_fadd_s,
5777     gen_helper_gvec_fadd_d,
5778 };
5779 TRANS(FADD_v, do_fp3_vector, a, 0, f_vector_fadd)
5780 
5781 static gen_helper_gvec_3_ptr * const f_vector_fsub[3] = {
5782     gen_helper_gvec_fsub_h,
5783     gen_helper_gvec_fsub_s,
5784     gen_helper_gvec_fsub_d,
5785 };
5786 TRANS(FSUB_v, do_fp3_vector, a, 0, f_vector_fsub)
5787 
5788 static gen_helper_gvec_3_ptr * const f_vector_fdiv[3] = {
5789     gen_helper_gvec_fdiv_h,
5790     gen_helper_gvec_fdiv_s,
5791     gen_helper_gvec_fdiv_d,
5792 };
5793 TRANS(FDIV_v, do_fp3_vector, a, 0, f_vector_fdiv)
5794 
5795 static gen_helper_gvec_3_ptr * const f_vector_fmul[3] = {
5796     gen_helper_gvec_fmul_h,
5797     gen_helper_gvec_fmul_s,
5798     gen_helper_gvec_fmul_d,
5799 };
5800 TRANS(FMUL_v, do_fp3_vector, a, 0, f_vector_fmul)
5801 
5802 static gen_helper_gvec_3_ptr * const f_vector_fmax[3] = {
5803     gen_helper_gvec_fmax_h,
5804     gen_helper_gvec_fmax_s,
5805     gen_helper_gvec_fmax_d,
5806 };
5807 static gen_helper_gvec_3_ptr * const f_vector_fmax_ah[3] = {
5808     gen_helper_gvec_ah_fmax_h,
5809     gen_helper_gvec_ah_fmax_s,
5810     gen_helper_gvec_ah_fmax_d,
5811 };
5812 TRANS(FMAX_v, do_fp3_vector_2fn, a, 0, f_vector_fmax, f_vector_fmax_ah)
5813 
5814 static gen_helper_gvec_3_ptr * const f_vector_fmin[3] = {
5815     gen_helper_gvec_fmin_h,
5816     gen_helper_gvec_fmin_s,
5817     gen_helper_gvec_fmin_d,
5818 };
5819 static gen_helper_gvec_3_ptr * const f_vector_fmin_ah[3] = {
5820     gen_helper_gvec_ah_fmin_h,
5821     gen_helper_gvec_ah_fmin_s,
5822     gen_helper_gvec_ah_fmin_d,
5823 };
5824 TRANS(FMIN_v, do_fp3_vector_2fn, a, 0, f_vector_fmin, f_vector_fmin_ah)
5825 
5826 static gen_helper_gvec_3_ptr * const f_vector_fmaxnm[3] = {
5827     gen_helper_gvec_fmaxnum_h,
5828     gen_helper_gvec_fmaxnum_s,
5829     gen_helper_gvec_fmaxnum_d,
5830 };
5831 TRANS(FMAXNM_v, do_fp3_vector, a, 0, f_vector_fmaxnm)
5832 
5833 static gen_helper_gvec_3_ptr * const f_vector_fminnm[3] = {
5834     gen_helper_gvec_fminnum_h,
5835     gen_helper_gvec_fminnum_s,
5836     gen_helper_gvec_fminnum_d,
5837 };
5838 TRANS(FMINNM_v, do_fp3_vector, a, 0, f_vector_fminnm)
5839 
5840 static gen_helper_gvec_3_ptr * const f_vector_fmulx[3] = {
5841     gen_helper_gvec_fmulx_h,
5842     gen_helper_gvec_fmulx_s,
5843     gen_helper_gvec_fmulx_d,
5844 };
5845 TRANS(FMULX_v, do_fp3_vector, a, 0, f_vector_fmulx)
5846 
5847 static gen_helper_gvec_3_ptr * const f_vector_fmla[3] = {
5848     gen_helper_gvec_vfma_h,
5849     gen_helper_gvec_vfma_s,
5850     gen_helper_gvec_vfma_d,
5851 };
5852 TRANS(FMLA_v, do_fp3_vector, a, 0, f_vector_fmla)
5853 
5854 static gen_helper_gvec_3_ptr * const f_vector_fmls[3] = {
5855     gen_helper_gvec_vfms_h,
5856     gen_helper_gvec_vfms_s,
5857     gen_helper_gvec_vfms_d,
5858 };
5859 static gen_helper_gvec_3_ptr * const f_vector_fmls_ah[3] = {
5860     gen_helper_gvec_ah_vfms_h,
5861     gen_helper_gvec_ah_vfms_s,
5862     gen_helper_gvec_ah_vfms_d,
5863 };
5864 TRANS(FMLS_v, do_fp3_vector_2fn, a, 0, f_vector_fmls, f_vector_fmls_ah)
5865 
5866 static gen_helper_gvec_3_ptr * const f_vector_fcmeq[3] = {
5867     gen_helper_gvec_fceq_h,
5868     gen_helper_gvec_fceq_s,
5869     gen_helper_gvec_fceq_d,
5870 };
5871 TRANS(FCMEQ_v, do_fp3_vector, a, 0, f_vector_fcmeq)
5872 
5873 static gen_helper_gvec_3_ptr * const f_vector_fcmge[3] = {
5874     gen_helper_gvec_fcge_h,
5875     gen_helper_gvec_fcge_s,
5876     gen_helper_gvec_fcge_d,
5877 };
5878 TRANS(FCMGE_v, do_fp3_vector, a, 0, f_vector_fcmge)
5879 
5880 static gen_helper_gvec_3_ptr * const f_vector_fcmgt[3] = {
5881     gen_helper_gvec_fcgt_h,
5882     gen_helper_gvec_fcgt_s,
5883     gen_helper_gvec_fcgt_d,
5884 };
5885 TRANS(FCMGT_v, do_fp3_vector, a, 0, f_vector_fcmgt)
5886 
5887 static gen_helper_gvec_3_ptr * const f_vector_facge[3] = {
5888     gen_helper_gvec_facge_h,
5889     gen_helper_gvec_facge_s,
5890     gen_helper_gvec_facge_d,
5891 };
5892 TRANS(FACGE_v, do_fp3_vector, a, 0, f_vector_facge)
5893 
5894 static gen_helper_gvec_3_ptr * const f_vector_facgt[3] = {
5895     gen_helper_gvec_facgt_h,
5896     gen_helper_gvec_facgt_s,
5897     gen_helper_gvec_facgt_d,
5898 };
5899 TRANS(FACGT_v, do_fp3_vector, a, 0, f_vector_facgt)
5900 
5901 static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = {
5902     gen_helper_gvec_fabd_h,
5903     gen_helper_gvec_fabd_s,
5904     gen_helper_gvec_fabd_d,
5905 };
5906 static gen_helper_gvec_3_ptr * const f_vector_ah_fabd[3] = {
5907     gen_helper_gvec_ah_fabd_h,
5908     gen_helper_gvec_ah_fabd_s,
5909     gen_helper_gvec_ah_fabd_d,
5910 };
5911 TRANS(FABD_v, do_fp3_vector_2fn, a, 0, f_vector_fabd, f_vector_ah_fabd)
5912 
5913 static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = {
5914     gen_helper_gvec_recps_h,
5915     gen_helper_gvec_recps_s,
5916     gen_helper_gvec_recps_d,
5917 };
5918 static gen_helper_gvec_3_ptr * const f_vector_ah_frecps[3] = {
5919     gen_helper_gvec_ah_recps_h,
5920     gen_helper_gvec_ah_recps_s,
5921     gen_helper_gvec_ah_recps_d,
5922 };
5923 TRANS(FRECPS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frecps, f_vector_ah_frecps)
5924 
5925 static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = {
5926     gen_helper_gvec_rsqrts_h,
5927     gen_helper_gvec_rsqrts_s,
5928     gen_helper_gvec_rsqrts_d,
5929 };
5930 static gen_helper_gvec_3_ptr * const f_vector_ah_frsqrts[3] = {
5931     gen_helper_gvec_ah_rsqrts_h,
5932     gen_helper_gvec_ah_rsqrts_s,
5933     gen_helper_gvec_ah_rsqrts_d,
5934 };
5935 TRANS(FRSQRTS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frsqrts, f_vector_ah_frsqrts)
5936 
5937 static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = {
5938     gen_helper_gvec_faddp_h,
5939     gen_helper_gvec_faddp_s,
5940     gen_helper_gvec_faddp_d,
5941 };
5942 TRANS(FADDP_v, do_fp3_vector, a, 0, f_vector_faddp)
5943 
5944 static gen_helper_gvec_3_ptr * const f_vector_fmaxp[3] = {
5945     gen_helper_gvec_fmaxp_h,
5946     gen_helper_gvec_fmaxp_s,
5947     gen_helper_gvec_fmaxp_d,
5948 };
5949 static gen_helper_gvec_3_ptr * const f_vector_ah_fmaxp[3] = {
5950     gen_helper_gvec_ah_fmaxp_h,
5951     gen_helper_gvec_ah_fmaxp_s,
5952     gen_helper_gvec_ah_fmaxp_d,
5953 };
5954 TRANS(FMAXP_v, do_fp3_vector_2fn, a, 0, f_vector_fmaxp, f_vector_ah_fmaxp)
5955 
5956 static gen_helper_gvec_3_ptr * const f_vector_fminp[3] = {
5957     gen_helper_gvec_fminp_h,
5958     gen_helper_gvec_fminp_s,
5959     gen_helper_gvec_fminp_d,
5960 };
5961 static gen_helper_gvec_3_ptr * const f_vector_ah_fminp[3] = {
5962     gen_helper_gvec_ah_fminp_h,
5963     gen_helper_gvec_ah_fminp_s,
5964     gen_helper_gvec_ah_fminp_d,
5965 };
5966 TRANS(FMINP_v, do_fp3_vector_2fn, a, 0, f_vector_fminp, f_vector_ah_fminp)
5967 
5968 static gen_helper_gvec_3_ptr * const f_vector_fmaxnmp[3] = {
5969     gen_helper_gvec_fmaxnump_h,
5970     gen_helper_gvec_fmaxnump_s,
5971     gen_helper_gvec_fmaxnump_d,
5972 };
5973 TRANS(FMAXNMP_v, do_fp3_vector, a, 0, f_vector_fmaxnmp)
5974 
5975 static gen_helper_gvec_3_ptr * const f_vector_fminnmp[3] = {
5976     gen_helper_gvec_fminnump_h,
5977     gen_helper_gvec_fminnump_s,
5978     gen_helper_gvec_fminnump_d,
5979 };
5980 TRANS(FMINNMP_v, do_fp3_vector, a, 0, f_vector_fminnmp)
5981 
do_fmlal(DisasContext * s,arg_qrrr_e * a,bool is_s,bool is_2)5982 static bool do_fmlal(DisasContext *s, arg_qrrr_e *a, bool is_s, bool is_2)
5983 {
5984     if (fp_access_check(s)) {
5985         int data = (is_2 << 1) | is_s;
5986         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
5987                            vec_full_reg_offset(s, a->rn),
5988                            vec_full_reg_offset(s, a->rm), tcg_env,
5989                            a->q ? 16 : 8, vec_full_reg_size(s),
5990                            data, gen_helper_gvec_fmlal_a64);
5991     }
5992     return true;
5993 }
5994 
TRANS_FEAT(FMLAL_v,aa64_fhm,do_fmlal,a,false,false)5995 TRANS_FEAT(FMLAL_v, aa64_fhm, do_fmlal, a, false, false)
5996 TRANS_FEAT(FMLSL_v, aa64_fhm, do_fmlal, a, true, false)
5997 TRANS_FEAT(FMLAL2_v, aa64_fhm, do_fmlal, a, false, true)
5998 TRANS_FEAT(FMLSL2_v, aa64_fhm, do_fmlal, a, true, true)
5999 
6000 TRANS(ADDP_v, do_gvec_fn3, a, gen_gvec_addp)
6001 TRANS(SMAXP_v, do_gvec_fn3_no64, a, gen_gvec_smaxp)
6002 TRANS(SMINP_v, do_gvec_fn3_no64, a, gen_gvec_sminp)
6003 TRANS(UMAXP_v, do_gvec_fn3_no64, a, gen_gvec_umaxp)
6004 TRANS(UMINP_v, do_gvec_fn3_no64, a, gen_gvec_uminp)
6005 
6006 TRANS(AND_v, do_gvec_fn3, a, tcg_gen_gvec_and)
6007 TRANS(BIC_v, do_gvec_fn3, a, tcg_gen_gvec_andc)
6008 TRANS(ORR_v, do_gvec_fn3, a, tcg_gen_gvec_or)
6009 TRANS(ORN_v, do_gvec_fn3, a, tcg_gen_gvec_orc)
6010 TRANS(EOR_v, do_gvec_fn3, a, tcg_gen_gvec_xor)
6011 
6012 static bool do_bitsel(DisasContext *s, bool is_q, int d, int a, int b, int c)
6013 {
6014     if (fp_access_check(s)) {
6015         gen_gvec_fn4(s, is_q, d, a, b, c, tcg_gen_gvec_bitsel, 0);
6016     }
6017     return true;
6018 }
6019 
6020 TRANS(BSL_v, do_bitsel, a->q, a->rd, a->rd, a->rn, a->rm)
6021 TRANS(BIT_v, do_bitsel, a->q, a->rd, a->rm, a->rn, a->rd)
6022 TRANS(BIF_v, do_bitsel, a->q, a->rd, a->rm, a->rd, a->rn)
6023 
TRANS(SQADD_v,do_gvec_fn3,a,gen_gvec_sqadd_qc)6024 TRANS(SQADD_v, do_gvec_fn3, a, gen_gvec_sqadd_qc)
6025 TRANS(UQADD_v, do_gvec_fn3, a, gen_gvec_uqadd_qc)
6026 TRANS(SQSUB_v, do_gvec_fn3, a, gen_gvec_sqsub_qc)
6027 TRANS(UQSUB_v, do_gvec_fn3, a, gen_gvec_uqsub_qc)
6028 TRANS(SUQADD_v, do_gvec_fn3, a, gen_gvec_suqadd_qc)
6029 TRANS(USQADD_v, do_gvec_fn3, a, gen_gvec_usqadd_qc)
6030 
6031 TRANS(SSHL_v, do_gvec_fn3, a, gen_gvec_sshl)
6032 TRANS(USHL_v, do_gvec_fn3, a, gen_gvec_ushl)
6033 TRANS(SRSHL_v, do_gvec_fn3, a, gen_gvec_srshl)
6034 TRANS(URSHL_v, do_gvec_fn3, a, gen_gvec_urshl)
6035 TRANS(SQSHL_v, do_gvec_fn3, a, gen_neon_sqshl)
6036 TRANS(UQSHL_v, do_gvec_fn3, a, gen_neon_uqshl)
6037 TRANS(SQRSHL_v, do_gvec_fn3, a, gen_neon_sqrshl)
6038 TRANS(UQRSHL_v, do_gvec_fn3, a, gen_neon_uqrshl)
6039 
6040 TRANS(ADD_v, do_gvec_fn3, a, tcg_gen_gvec_add)
6041 TRANS(SUB_v, do_gvec_fn3, a, tcg_gen_gvec_sub)
6042 TRANS(SHADD_v, do_gvec_fn3_no64, a, gen_gvec_shadd)
6043 TRANS(UHADD_v, do_gvec_fn3_no64, a, gen_gvec_uhadd)
6044 TRANS(SHSUB_v, do_gvec_fn3_no64, a, gen_gvec_shsub)
6045 TRANS(UHSUB_v, do_gvec_fn3_no64, a, gen_gvec_uhsub)
6046 TRANS(SRHADD_v, do_gvec_fn3_no64, a, gen_gvec_srhadd)
6047 TRANS(URHADD_v, do_gvec_fn3_no64, a, gen_gvec_urhadd)
6048 TRANS(SMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smax)
6049 TRANS(UMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umax)
6050 TRANS(SMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smin)
6051 TRANS(UMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umin)
6052 TRANS(SABA_v, do_gvec_fn3_no64, a, gen_gvec_saba)
6053 TRANS(UABA_v, do_gvec_fn3_no64, a, gen_gvec_uaba)
6054 TRANS(SABD_v, do_gvec_fn3_no64, a, gen_gvec_sabd)
6055 TRANS(UABD_v, do_gvec_fn3_no64, a, gen_gvec_uabd)
6056 TRANS(MUL_v, do_gvec_fn3_no64, a, tcg_gen_gvec_mul)
6057 TRANS(PMUL_v, do_gvec_op3_ool, a, 0, gen_helper_gvec_pmul_b)
6058 TRANS(MLA_v, do_gvec_fn3_no64, a, gen_gvec_mla)
6059 TRANS(MLS_v, do_gvec_fn3_no64, a, gen_gvec_mls)
6060 
6061 static bool do_cmop_v(DisasContext *s, arg_qrrr_e *a, TCGCond cond)
6062 {
6063     if (a->esz == MO_64 && !a->q) {
6064         return false;
6065     }
6066     if (fp_access_check(s)) {
6067         tcg_gen_gvec_cmp(cond, a->esz,
6068                          vec_full_reg_offset(s, a->rd),
6069                          vec_full_reg_offset(s, a->rn),
6070                          vec_full_reg_offset(s, a->rm),
6071                          a->q ? 16 : 8, vec_full_reg_size(s));
6072     }
6073     return true;
6074 }
6075 
TRANS(CMGT_v,do_cmop_v,a,TCG_COND_GT)6076 TRANS(CMGT_v, do_cmop_v, a, TCG_COND_GT)
6077 TRANS(CMHI_v, do_cmop_v, a, TCG_COND_GTU)
6078 TRANS(CMGE_v, do_cmop_v, a, TCG_COND_GE)
6079 TRANS(CMHS_v, do_cmop_v, a, TCG_COND_GEU)
6080 TRANS(CMEQ_v, do_cmop_v, a, TCG_COND_EQ)
6081 TRANS(CMTST_v, do_gvec_fn3, a, gen_gvec_cmtst)
6082 
6083 TRANS(SQDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqdmulh_qc)
6084 TRANS(SQRDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmulh_qc)
6085 TRANS_FEAT(SQRDMLAH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlah_qc)
6086 TRANS_FEAT(SQRDMLSH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlsh_qc)
6087 
6088 static bool do_dot_vector(DisasContext *s, arg_qrrr_e *a,
6089                           gen_helper_gvec_4 *fn)
6090 {
6091     if (fp_access_check(s)) {
6092         gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn);
6093     }
6094     return true;
6095 }
6096 
do_dot_vector_env(DisasContext * s,arg_qrrr_e * a,gen_helper_gvec_4_ptr * fn)6097 static bool do_dot_vector_env(DisasContext *s, arg_qrrr_e *a,
6098                               gen_helper_gvec_4_ptr *fn)
6099 {
6100     if (fp_access_check(s)) {
6101         gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn);
6102     }
6103     return true;
6104 }
6105 
TRANS_FEAT(SDOT_v,aa64_dp,do_dot_vector,a,gen_helper_gvec_sdot_b)6106 TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_sdot_b)
6107 TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_b)
6108 TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_b)
6109 TRANS_FEAT(BFDOT_v, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfdot)
6110 TRANS_FEAT(BFMMLA, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfmmla)
6111 TRANS_FEAT(SMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_smmla_b)
6112 TRANS_FEAT(UMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_ummla_b)
6113 TRANS_FEAT(USMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usmmla_b)
6114 
6115 static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a)
6116 {
6117     if (!dc_isar_feature(aa64_bf16, s)) {
6118         return false;
6119     }
6120     if (fp_access_check(s)) {
6121         /* Q bit selects BFMLALB vs BFMLALT. */
6122         gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd,
6123                           s->fpcr_ah ? FPST_AH : FPST_A64, a->q,
6124                           gen_helper_gvec_bfmlal);
6125     }
6126     return true;
6127 }
6128 
6129 static gen_helper_gvec_3_ptr * const f_vector_fcadd[3] = {
6130     gen_helper_gvec_fcaddh,
6131     gen_helper_gvec_fcadds,
6132     gen_helper_gvec_fcaddd,
6133 };
6134 /*
6135  * Encode FPCR.AH into the data so the helper knows whether the
6136  * negations it does should avoid flipping the sign bit on a NaN
6137  */
6138 TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0 | (s->fpcr_ah << 1),
6139            f_vector_fcadd)
6140 TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1 | (s->fpcr_ah << 1),
6141            f_vector_fcadd)
6142 
trans_FCMLA_v(DisasContext * s,arg_FCMLA_v * a)6143 static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a)
6144 {
6145     static gen_helper_gvec_4_ptr * const fn[] = {
6146         [MO_16] = gen_helper_gvec_fcmlah,
6147         [MO_32] = gen_helper_gvec_fcmlas,
6148         [MO_64] = gen_helper_gvec_fcmlad,
6149     };
6150     int check;
6151 
6152     if (!dc_isar_feature(aa64_fcma, s)) {
6153         return false;
6154     }
6155 
6156     check = fp_access_check_vector_hsd(s, a->q, a->esz);
6157     if (check <= 0) {
6158         return check == 0;
6159     }
6160 
6161     gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
6162                       a->esz == MO_16 ? FPST_A64_F16 : FPST_A64,
6163                       a->rot | (s->fpcr_ah << 2), fn[a->esz]);
6164     return true;
6165 }
6166 
6167 /*
6168  * Widening vector x vector/indexed.
6169  *
6170  * These read from the top or bottom half of a 128-bit vector.
6171  * After widening, optionally accumulate with a 128-bit vector.
6172  * Implement these inline, as the number of elements are limited
6173  * and the related SVE and SME operations on larger vectors use
6174  * even/odd elements instead of top/bottom half.
6175  *
6176  * If idx >= 0, operand 2 is indexed, otherwise vector.
6177  * If acc, operand 0 is loaded with rd.
6178  */
6179 
6180 /* For low half, iterating up. */
do_3op_widening(DisasContext * s,MemOp memop,int top,int rd,int rn,int rm,int idx,NeonGenTwo64OpFn * fn,bool acc)6181 static bool do_3op_widening(DisasContext *s, MemOp memop, int top,
6182                             int rd, int rn, int rm, int idx,
6183                             NeonGenTwo64OpFn *fn, bool acc)
6184 {
6185     TCGv_i64 tcg_op0 = tcg_temp_new_i64();
6186     TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6187     TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6188     MemOp esz = memop & MO_SIZE;
6189     int half = 8 >> esz;
6190     int top_swap, top_half;
6191 
6192     /* There are no 64x64->128 bit operations. */
6193     if (esz >= MO_64) {
6194         return false;
6195     }
6196     if (!fp_access_check(s)) {
6197         return true;
6198     }
6199 
6200     if (idx >= 0) {
6201         read_vec_element(s, tcg_op2, rm, idx, memop);
6202     }
6203 
6204     /*
6205      * For top half inputs, iterate forward; backward for bottom half.
6206      * This means the store to the destination will not occur until
6207      * overlapping input inputs are consumed.
6208      * Use top_swap to conditionally invert the forward iteration index.
6209      */
6210     top_swap = top ? 0 : half - 1;
6211     top_half = top ? half : 0;
6212 
6213     for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
6214         int elt = elt_fwd ^ top_swap;
6215 
6216         read_vec_element(s, tcg_op1, rn, elt + top_half, memop);
6217         if (idx < 0) {
6218             read_vec_element(s, tcg_op2, rm, elt + top_half, memop);
6219         }
6220         if (acc) {
6221             read_vec_element(s, tcg_op0, rd, elt, memop + 1);
6222         }
6223         fn(tcg_op0, tcg_op1, tcg_op2);
6224         write_vec_element(s, tcg_op0, rd, elt, esz + 1);
6225     }
6226     clear_vec_high(s, 1, rd);
6227     return true;
6228 }
6229 
gen_muladd_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6230 static void gen_muladd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6231 {
6232     TCGv_i64 t = tcg_temp_new_i64();
6233     tcg_gen_mul_i64(t, n, m);
6234     tcg_gen_add_i64(d, d, t);
6235 }
6236 
gen_mulsub_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6237 static void gen_mulsub_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6238 {
6239     TCGv_i64 t = tcg_temp_new_i64();
6240     tcg_gen_mul_i64(t, n, m);
6241     tcg_gen_sub_i64(d, d, t);
6242 }
6243 
6244 TRANS(SMULL_v, do_3op_widening,
6245       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6246       tcg_gen_mul_i64, false)
6247 TRANS(UMULL_v, do_3op_widening,
6248       a->esz, a->q, a->rd, a->rn, a->rm, -1,
6249       tcg_gen_mul_i64, false)
6250 TRANS(SMLAL_v, do_3op_widening,
6251       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6252       gen_muladd_i64, true)
6253 TRANS(UMLAL_v, do_3op_widening,
6254       a->esz, a->q, a->rd, a->rn, a->rm, -1,
6255       gen_muladd_i64, true)
6256 TRANS(SMLSL_v, do_3op_widening,
6257       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6258       gen_mulsub_i64, true)
6259 TRANS(UMLSL_v, do_3op_widening,
6260       a->esz, a->q, a->rd, a->rn, a->rm, -1,
6261       gen_mulsub_i64, true)
6262 
6263 TRANS(SMULL_vi, do_3op_widening,
6264       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6265       tcg_gen_mul_i64, false)
6266 TRANS(UMULL_vi, do_3op_widening,
6267       a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
6268       tcg_gen_mul_i64, false)
6269 TRANS(SMLAL_vi, do_3op_widening,
6270       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6271       gen_muladd_i64, true)
6272 TRANS(UMLAL_vi, do_3op_widening,
6273       a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
6274       gen_muladd_i64, true)
6275 TRANS(SMLSL_vi, do_3op_widening,
6276       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6277       gen_mulsub_i64, true)
6278 TRANS(UMLSL_vi, do_3op_widening,
6279       a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
6280       gen_mulsub_i64, true)
6281 
gen_sabd_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6282 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6283 {
6284     TCGv_i64 t1 = tcg_temp_new_i64();
6285     TCGv_i64 t2 = tcg_temp_new_i64();
6286 
6287     tcg_gen_sub_i64(t1, n, m);
6288     tcg_gen_sub_i64(t2, m, n);
6289     tcg_gen_movcond_i64(TCG_COND_GE, d, n, m, t1, t2);
6290 }
6291 
gen_uabd_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6292 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6293 {
6294     TCGv_i64 t1 = tcg_temp_new_i64();
6295     TCGv_i64 t2 = tcg_temp_new_i64();
6296 
6297     tcg_gen_sub_i64(t1, n, m);
6298     tcg_gen_sub_i64(t2, m, n);
6299     tcg_gen_movcond_i64(TCG_COND_GEU, d, n, m, t1, t2);
6300 }
6301 
gen_saba_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6302 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6303 {
6304     TCGv_i64 t = tcg_temp_new_i64();
6305     gen_sabd_i64(t, n, m);
6306     tcg_gen_add_i64(d, d, t);
6307 }
6308 
gen_uaba_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6309 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6310 {
6311     TCGv_i64 t = tcg_temp_new_i64();
6312     gen_uabd_i64(t, n, m);
6313     tcg_gen_add_i64(d, d, t);
6314 }
6315 
6316 TRANS(SADDL_v, do_3op_widening,
6317       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6318       tcg_gen_add_i64, false)
6319 TRANS(UADDL_v, do_3op_widening,
6320       a->esz, a->q, a->rd, a->rn, a->rm, -1,
6321       tcg_gen_add_i64, false)
6322 TRANS(SSUBL_v, do_3op_widening,
6323       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6324       tcg_gen_sub_i64, false)
6325 TRANS(USUBL_v, do_3op_widening,
6326       a->esz, a->q, a->rd, a->rn, a->rm, -1,
6327       tcg_gen_sub_i64, false)
6328 TRANS(SABDL_v, do_3op_widening,
6329       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6330       gen_sabd_i64, false)
6331 TRANS(UABDL_v, do_3op_widening,
6332       a->esz, a->q, a->rd, a->rn, a->rm, -1,
6333       gen_uabd_i64, false)
6334 TRANS(SABAL_v, do_3op_widening,
6335       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6336       gen_saba_i64, true)
6337 TRANS(UABAL_v, do_3op_widening,
6338       a->esz, a->q, a->rd, a->rn, a->rm, -1,
6339       gen_uaba_i64, true)
6340 
gen_sqdmull_h(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6341 static void gen_sqdmull_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6342 {
6343     tcg_gen_mul_i64(d, n, m);
6344     gen_helper_neon_addl_saturate_s32(d, tcg_env, d, d);
6345 }
6346 
gen_sqdmull_s(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6347 static void gen_sqdmull_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6348 {
6349     tcg_gen_mul_i64(d, n, m);
6350     gen_helper_neon_addl_saturate_s64(d, tcg_env, d, d);
6351 }
6352 
gen_sqdmlal_h(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6353 static void gen_sqdmlal_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6354 {
6355     TCGv_i64 t = tcg_temp_new_i64();
6356 
6357     tcg_gen_mul_i64(t, n, m);
6358     gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t);
6359     gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t);
6360 }
6361 
gen_sqdmlal_s(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6362 static void gen_sqdmlal_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6363 {
6364     TCGv_i64 t = tcg_temp_new_i64();
6365 
6366     tcg_gen_mul_i64(t, n, m);
6367     gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t);
6368     gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t);
6369 }
6370 
gen_sqdmlsl_h(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6371 static void gen_sqdmlsl_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6372 {
6373     TCGv_i64 t = tcg_temp_new_i64();
6374 
6375     tcg_gen_mul_i64(t, n, m);
6376     gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t);
6377     tcg_gen_neg_i64(t, t);
6378     gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t);
6379 }
6380 
gen_sqdmlsl_s(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6381 static void gen_sqdmlsl_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6382 {
6383     TCGv_i64 t = tcg_temp_new_i64();
6384 
6385     tcg_gen_mul_i64(t, n, m);
6386     gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t);
6387     tcg_gen_neg_i64(t, t);
6388     gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t);
6389 }
6390 
6391 TRANS(SQDMULL_v, do_3op_widening,
6392       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6393       a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
6394 TRANS(SQDMLAL_v, do_3op_widening,
6395       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6396       a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
6397 TRANS(SQDMLSL_v, do_3op_widening,
6398       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6399       a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
6400 
6401 TRANS(SQDMULL_vi, do_3op_widening,
6402       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6403       a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
6404 TRANS(SQDMLAL_vi, do_3op_widening,
6405       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6406       a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
6407 TRANS(SQDMLSL_vi, do_3op_widening,
6408       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6409       a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
6410 
do_addsub_wide(DisasContext * s,arg_qrrr_e * a,MemOp sign,bool sub)6411 static bool do_addsub_wide(DisasContext *s, arg_qrrr_e *a,
6412                            MemOp sign, bool sub)
6413 {
6414     TCGv_i64 tcg_op0, tcg_op1;
6415     MemOp esz = a->esz;
6416     int half = 8 >> esz;
6417     bool top = a->q;
6418     int top_swap = top ? 0 : half - 1;
6419     int top_half = top ? half : 0;
6420 
6421     /* There are no 64x64->128 bit operations. */
6422     if (esz >= MO_64) {
6423         return false;
6424     }
6425     if (!fp_access_check(s)) {
6426         return true;
6427     }
6428     tcg_op0 = tcg_temp_new_i64();
6429     tcg_op1 = tcg_temp_new_i64();
6430 
6431     for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
6432         int elt = elt_fwd ^ top_swap;
6433 
6434         read_vec_element(s, tcg_op1, a->rm, elt + top_half, esz | sign);
6435         read_vec_element(s, tcg_op0, a->rn, elt, esz + 1);
6436         if (sub) {
6437             tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1);
6438         } else {
6439             tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1);
6440         }
6441         write_vec_element(s, tcg_op0, a->rd, elt, esz + 1);
6442     }
6443     clear_vec_high(s, 1, a->rd);
6444     return true;
6445 }
6446 
TRANS(SADDW,do_addsub_wide,a,MO_SIGN,false)6447 TRANS(SADDW, do_addsub_wide, a, MO_SIGN, false)
6448 TRANS(UADDW, do_addsub_wide, a, 0, false)
6449 TRANS(SSUBW, do_addsub_wide, a, MO_SIGN, true)
6450 TRANS(USUBW, do_addsub_wide, a, 0, true)
6451 
6452 static bool do_addsub_highnarrow(DisasContext *s, arg_qrrr_e *a,
6453                                  bool sub, bool round)
6454 {
6455     TCGv_i64 tcg_op0, tcg_op1;
6456     MemOp esz = a->esz;
6457     int half = 8 >> esz;
6458     bool top = a->q;
6459     int ebits = 8 << esz;
6460     uint64_t rbit = 1ull << (ebits - 1);
6461     int top_swap, top_half;
6462 
6463     /* There are no 128x128->64 bit operations. */
6464     if (esz >= MO_64) {
6465         return false;
6466     }
6467     if (!fp_access_check(s)) {
6468         return true;
6469     }
6470     tcg_op0 = tcg_temp_new_i64();
6471     tcg_op1 = tcg_temp_new_i64();
6472 
6473     /*
6474      * For top half inputs, iterate backward; forward for bottom half.
6475      * This means the store to the destination will not occur until
6476      * overlapping input inputs are consumed.
6477      */
6478     top_swap = top ? half - 1 : 0;
6479     top_half = top ? half : 0;
6480 
6481     for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
6482         int elt = elt_fwd ^ top_swap;
6483 
6484         read_vec_element(s, tcg_op1, a->rm, elt, esz + 1);
6485         read_vec_element(s, tcg_op0, a->rn, elt, esz + 1);
6486         if (sub) {
6487             tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1);
6488         } else {
6489             tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1);
6490         }
6491         if (round) {
6492             tcg_gen_addi_i64(tcg_op0, tcg_op0, rbit);
6493         }
6494         tcg_gen_shri_i64(tcg_op0, tcg_op0, ebits);
6495         write_vec_element(s, tcg_op0, a->rd, elt + top_half, esz);
6496     }
6497     clear_vec_high(s, top, a->rd);
6498     return true;
6499 }
6500 
TRANS(ADDHN,do_addsub_highnarrow,a,false,false)6501 TRANS(ADDHN, do_addsub_highnarrow, a, false, false)
6502 TRANS(SUBHN, do_addsub_highnarrow, a, true, false)
6503 TRANS(RADDHN, do_addsub_highnarrow, a, false, true)
6504 TRANS(RSUBHN, do_addsub_highnarrow, a, true, true)
6505 
6506 static bool do_pmull(DisasContext *s, arg_qrrr_e *a, gen_helper_gvec_3 *fn)
6507 {
6508     if (fp_access_check(s)) {
6509         /* The Q field specifies lo/hi half input for these insns.  */
6510         gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->q, fn);
6511     }
6512     return true;
6513 }
6514 
TRANS(PMULL_p8,do_pmull,a,gen_helper_neon_pmull_h)6515 TRANS(PMULL_p8, do_pmull, a, gen_helper_neon_pmull_h)
6516 TRANS_FEAT(PMULL_p64, aa64_pmull, do_pmull, a, gen_helper_gvec_pmull_q)
6517 
6518 /*
6519  * Advanced SIMD scalar/vector x indexed element
6520  */
6521 
6522 static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f)
6523 {
6524     switch (a->esz) {
6525     case MO_64:
6526         if (fp_access_check(s)) {
6527             TCGv_i64 t0 = read_fp_dreg(s, a->rn);
6528             TCGv_i64 t1 = tcg_temp_new_i64();
6529 
6530             read_vec_element(s, t1, a->rm, a->idx, MO_64);
6531             f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
6532             write_fp_dreg_merging(s, a->rd, a->rn, t0);
6533         }
6534         break;
6535     case MO_32:
6536         if (fp_access_check(s)) {
6537             TCGv_i32 t0 = read_fp_sreg(s, a->rn);
6538             TCGv_i32 t1 = tcg_temp_new_i32();
6539 
6540             read_vec_element_i32(s, t1, a->rm, a->idx, MO_32);
6541             f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
6542             write_fp_sreg_merging(s, a->rd, a->rn, t0);
6543         }
6544         break;
6545     case MO_16:
6546         if (!dc_isar_feature(aa64_fp16, s)) {
6547             return false;
6548         }
6549         if (fp_access_check(s)) {
6550             TCGv_i32 t0 = read_fp_hreg(s, a->rn);
6551             TCGv_i32 t1 = tcg_temp_new_i32();
6552 
6553             read_vec_element_i32(s, t1, a->rm, a->idx, MO_16);
6554             f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
6555             write_fp_hreg_merging(s, a->rd, a->rn, t0);
6556         }
6557         break;
6558     default:
6559         g_assert_not_reached();
6560     }
6561     return true;
6562 }
6563 
6564 TRANS(FMUL_si, do_fp3_scalar_idx, a, &f_scalar_fmul)
6565 TRANS(FMULX_si, do_fp3_scalar_idx, a, &f_scalar_fmulx)
6566 
do_fmla_scalar_idx(DisasContext * s,arg_rrx_e * a,bool neg)6567 static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg)
6568 {
6569     switch (a->esz) {
6570     case MO_64:
6571         if (fp_access_check(s)) {
6572             TCGv_i64 t0 = read_fp_dreg(s, a->rd);
6573             TCGv_i64 t1 = read_fp_dreg(s, a->rn);
6574             TCGv_i64 t2 = tcg_temp_new_i64();
6575 
6576             read_vec_element(s, t2, a->rm, a->idx, MO_64);
6577             if (neg) {
6578                 gen_vfp_maybe_ah_negd(s, t1, t1);
6579             }
6580             gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_A64));
6581             write_fp_dreg_merging(s, a->rd, a->rd, t0);
6582         }
6583         break;
6584     case MO_32:
6585         if (fp_access_check(s)) {
6586             TCGv_i32 t0 = read_fp_sreg(s, a->rd);
6587             TCGv_i32 t1 = read_fp_sreg(s, a->rn);
6588             TCGv_i32 t2 = tcg_temp_new_i32();
6589 
6590             read_vec_element_i32(s, t2, a->rm, a->idx, MO_32);
6591             if (neg) {
6592                 gen_vfp_maybe_ah_negs(s, t1, t1);
6593             }
6594             gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_A64));
6595             write_fp_sreg_merging(s, a->rd, a->rd, t0);
6596         }
6597         break;
6598     case MO_16:
6599         if (!dc_isar_feature(aa64_fp16, s)) {
6600             return false;
6601         }
6602         if (fp_access_check(s)) {
6603             TCGv_i32 t0 = read_fp_hreg(s, a->rd);
6604             TCGv_i32 t1 = read_fp_hreg(s, a->rn);
6605             TCGv_i32 t2 = tcg_temp_new_i32();
6606 
6607             read_vec_element_i32(s, t2, a->rm, a->idx, MO_16);
6608             if (neg) {
6609                 gen_vfp_maybe_ah_negh(s, t1, t1);
6610             }
6611             gen_helper_advsimd_muladdh(t0, t1, t2, t0,
6612                                        fpstatus_ptr(FPST_A64_F16));
6613             write_fp_hreg_merging(s, a->rd, a->rd, t0);
6614         }
6615         break;
6616     default:
6617         g_assert_not_reached();
6618     }
6619     return true;
6620 }
6621 
TRANS(FMLA_si,do_fmla_scalar_idx,a,false)6622 TRANS(FMLA_si, do_fmla_scalar_idx, a, false)
6623 TRANS(FMLS_si, do_fmla_scalar_idx, a, true)
6624 
6625 static bool do_env_scalar2_idx_hs(DisasContext *s, arg_rrx_e *a,
6626                                   const ENVScalar2 *f)
6627 {
6628     if (a->esz < MO_16 || a->esz > MO_32) {
6629         return false;
6630     }
6631     if (fp_access_check(s)) {
6632         TCGv_i32 t0 = tcg_temp_new_i32();
6633         TCGv_i32 t1 = tcg_temp_new_i32();
6634 
6635         read_vec_element_i32(s, t0, a->rn, 0, a->esz);
6636         read_vec_element_i32(s, t1, a->rm, a->idx, a->esz);
6637         f->gen_bhs[a->esz](t0, tcg_env, t0, t1);
6638         write_fp_sreg(s, a->rd, t0);
6639     }
6640     return true;
6641 }
6642 
6643 TRANS(SQDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqdmulh)
6644 TRANS(SQRDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqrdmulh)
6645 
do_env_scalar3_idx_hs(DisasContext * s,arg_rrx_e * a,const ENVScalar3 * f)6646 static bool do_env_scalar3_idx_hs(DisasContext *s, arg_rrx_e *a,
6647                                   const ENVScalar3 *f)
6648 {
6649     if (a->esz < MO_16 || a->esz > MO_32) {
6650         return false;
6651     }
6652     if (fp_access_check(s)) {
6653         TCGv_i32 t0 = tcg_temp_new_i32();
6654         TCGv_i32 t1 = tcg_temp_new_i32();
6655         TCGv_i32 t2 = tcg_temp_new_i32();
6656 
6657         read_vec_element_i32(s, t0, a->rn, 0, a->esz);
6658         read_vec_element_i32(s, t1, a->rm, a->idx, a->esz);
6659         read_vec_element_i32(s, t2, a->rd, 0, a->esz);
6660         f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2);
6661         write_fp_sreg(s, a->rd, t0);
6662     }
6663     return true;
6664 }
6665 
6666 TRANS_FEAT(SQRDMLAH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlah)
6667 TRANS_FEAT(SQRDMLSH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlsh)
6668 
do_scalar_muladd_widening_idx(DisasContext * s,arg_rrx_e * a,NeonGenTwo64OpFn * fn,bool acc)6669 static bool do_scalar_muladd_widening_idx(DisasContext *s, arg_rrx_e *a,
6670                                           NeonGenTwo64OpFn *fn, bool acc)
6671 {
6672     if (fp_access_check(s)) {
6673         TCGv_i64 t0 = tcg_temp_new_i64();
6674         TCGv_i64 t1 = tcg_temp_new_i64();
6675         TCGv_i64 t2 = tcg_temp_new_i64();
6676 
6677         if (acc) {
6678             read_vec_element(s, t0, a->rd, 0, a->esz + 1);
6679         }
6680         read_vec_element(s, t1, a->rn, 0, a->esz | MO_SIGN);
6681         read_vec_element(s, t2, a->rm, a->idx, a->esz | MO_SIGN);
6682         fn(t0, t1, t2);
6683 
6684         /* Clear the whole register first, then store scalar. */
6685         clear_vec(s, a->rd);
6686         write_vec_element(s, t0, a->rd, 0, a->esz + 1);
6687     }
6688     return true;
6689 }
6690 
6691 TRANS(SQDMULL_si, do_scalar_muladd_widening_idx, a,
6692       a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
6693 TRANS(SQDMLAL_si, do_scalar_muladd_widening_idx, a,
6694       a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
6695 TRANS(SQDMLSL_si, do_scalar_muladd_widening_idx, a,
6696       a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
6697 
do_fp3_vector_idx(DisasContext * s,arg_qrrx_e * a,gen_helper_gvec_3_ptr * const fns[3])6698 static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a,
6699                               gen_helper_gvec_3_ptr * const fns[3])
6700 {
6701     MemOp esz = a->esz;
6702     int check = fp_access_check_vector_hsd(s, a->q, esz);
6703 
6704     if (check <= 0) {
6705         return check == 0;
6706     }
6707 
6708     gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm,
6709                       esz == MO_16 ? FPST_A64_F16 : FPST_A64,
6710                       a->idx, fns[esz - 1]);
6711     return true;
6712 }
6713 
6714 static gen_helper_gvec_3_ptr * const f_vector_idx_fmul[3] = {
6715     gen_helper_gvec_fmul_idx_h,
6716     gen_helper_gvec_fmul_idx_s,
6717     gen_helper_gvec_fmul_idx_d,
6718 };
6719 TRANS(FMUL_vi, do_fp3_vector_idx, a, f_vector_idx_fmul)
6720 
6721 static gen_helper_gvec_3_ptr * const f_vector_idx_fmulx[3] = {
6722     gen_helper_gvec_fmulx_idx_h,
6723     gen_helper_gvec_fmulx_idx_s,
6724     gen_helper_gvec_fmulx_idx_d,
6725 };
TRANS(FMULX_vi,do_fp3_vector_idx,a,f_vector_idx_fmulx)6726 TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx)
6727 
6728 static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg)
6729 {
6730     static gen_helper_gvec_4_ptr * const fns[3][3] = {
6731         { gen_helper_gvec_fmla_idx_h,
6732           gen_helper_gvec_fmla_idx_s,
6733           gen_helper_gvec_fmla_idx_d },
6734         { gen_helper_gvec_fmls_idx_h,
6735           gen_helper_gvec_fmls_idx_s,
6736           gen_helper_gvec_fmls_idx_d },
6737         { gen_helper_gvec_ah_fmls_idx_h,
6738           gen_helper_gvec_ah_fmls_idx_s,
6739           gen_helper_gvec_ah_fmls_idx_d },
6740     };
6741     MemOp esz = a->esz;
6742     int check = fp_access_check_vector_hsd(s, a->q, esz);
6743 
6744     if (check <= 0) {
6745         return check == 0;
6746     }
6747 
6748     gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
6749                       esz == MO_16 ? FPST_A64_F16 : FPST_A64,
6750                       a->idx, fns[neg ? 1 + s->fpcr_ah : 0][esz - 1]);
6751     return true;
6752 }
6753 
TRANS(FMLA_vi,do_fmla_vector_idx,a,false)6754 TRANS(FMLA_vi, do_fmla_vector_idx, a, false)
6755 TRANS(FMLS_vi, do_fmla_vector_idx, a, true)
6756 
6757 static bool do_fmlal_idx(DisasContext *s, arg_qrrx_e *a, bool is_s, bool is_2)
6758 {
6759     if (fp_access_check(s)) {
6760         int data = (a->idx << 2) | (is_2 << 1) | is_s;
6761         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
6762                            vec_full_reg_offset(s, a->rn),
6763                            vec_full_reg_offset(s, a->rm), tcg_env,
6764                            a->q ? 16 : 8, vec_full_reg_size(s),
6765                            data, gen_helper_gvec_fmlal_idx_a64);
6766     }
6767     return true;
6768 }
6769 
TRANS_FEAT(FMLAL_vi,aa64_fhm,do_fmlal_idx,a,false,false)6770 TRANS_FEAT(FMLAL_vi, aa64_fhm, do_fmlal_idx, a, false, false)
6771 TRANS_FEAT(FMLSL_vi, aa64_fhm, do_fmlal_idx, a, true, false)
6772 TRANS_FEAT(FMLAL2_vi, aa64_fhm, do_fmlal_idx, a, false, true)
6773 TRANS_FEAT(FMLSL2_vi, aa64_fhm, do_fmlal_idx, a, true, true)
6774 
6775 static bool do_int3_vector_idx(DisasContext *s, arg_qrrx_e *a,
6776                                gen_helper_gvec_3 * const fns[2])
6777 {
6778     assert(a->esz == MO_16 || a->esz == MO_32);
6779     if (fp_access_check(s)) {
6780         gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, a->idx, fns[a->esz - 1]);
6781     }
6782     return true;
6783 }
6784 
6785 static gen_helper_gvec_3 * const f_vector_idx_mul[2] = {
6786     gen_helper_gvec_mul_idx_h,
6787     gen_helper_gvec_mul_idx_s,
6788 };
TRANS(MUL_vi,do_int3_vector_idx,a,f_vector_idx_mul)6789 TRANS(MUL_vi, do_int3_vector_idx, a, f_vector_idx_mul)
6790 
6791 static bool do_mla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool sub)
6792 {
6793     static gen_helper_gvec_4 * const fns[2][2] = {
6794         { gen_helper_gvec_mla_idx_h, gen_helper_gvec_mls_idx_h },
6795         { gen_helper_gvec_mla_idx_s, gen_helper_gvec_mls_idx_s },
6796     };
6797 
6798     assert(a->esz == MO_16 || a->esz == MO_32);
6799     if (fp_access_check(s)) {
6800         gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd,
6801                          a->idx, fns[a->esz - 1][sub]);
6802     }
6803     return true;
6804 }
6805 
TRANS(MLA_vi,do_mla_vector_idx,a,false)6806 TRANS(MLA_vi, do_mla_vector_idx, a, false)
6807 TRANS(MLS_vi, do_mla_vector_idx, a, true)
6808 
6809 static bool do_int3_qc_vector_idx(DisasContext *s, arg_qrrx_e *a,
6810                                   gen_helper_gvec_4 * const fns[2])
6811 {
6812     assert(a->esz == MO_16 || a->esz == MO_32);
6813     if (fp_access_check(s)) {
6814         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
6815                            vec_full_reg_offset(s, a->rn),
6816                            vec_full_reg_offset(s, a->rm),
6817                            offsetof(CPUARMState, vfp.qc),
6818                            a->q ? 16 : 8, vec_full_reg_size(s),
6819                            a->idx, fns[a->esz - 1]);
6820     }
6821     return true;
6822 }
6823 
6824 static gen_helper_gvec_4 * const f_vector_idx_sqdmulh[2] = {
6825     gen_helper_neon_sqdmulh_idx_h,
6826     gen_helper_neon_sqdmulh_idx_s,
6827 };
6828 TRANS(SQDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqdmulh)
6829 
6830 static gen_helper_gvec_4 * const f_vector_idx_sqrdmulh[2] = {
6831     gen_helper_neon_sqrdmulh_idx_h,
6832     gen_helper_neon_sqrdmulh_idx_s,
6833 };
6834 TRANS(SQRDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqrdmulh)
6835 
6836 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlah[2] = {
6837     gen_helper_neon_sqrdmlah_idx_h,
6838     gen_helper_neon_sqrdmlah_idx_s,
6839 };
6840 TRANS_FEAT(SQRDMLAH_vi, aa64_rdm, do_int3_qc_vector_idx, a,
6841            f_vector_idx_sqrdmlah)
6842 
6843 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlsh[2] = {
6844     gen_helper_neon_sqrdmlsh_idx_h,
6845     gen_helper_neon_sqrdmlsh_idx_s,
6846 };
TRANS_FEAT(SQRDMLSH_vi,aa64_rdm,do_int3_qc_vector_idx,a,f_vector_idx_sqrdmlsh)6847 TRANS_FEAT(SQRDMLSH_vi, aa64_rdm, do_int3_qc_vector_idx, a,
6848            f_vector_idx_sqrdmlsh)
6849 
6850 static bool do_dot_vector_idx(DisasContext *s, arg_qrrx_e *a,
6851                               gen_helper_gvec_4 *fn)
6852 {
6853     if (fp_access_check(s)) {
6854         gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn);
6855     }
6856     return true;
6857 }
6858 
do_dot_vector_idx_env(DisasContext * s,arg_qrrx_e * a,gen_helper_gvec_4_ptr * fn)6859 static bool do_dot_vector_idx_env(DisasContext *s, arg_qrrx_e *a,
6860                                   gen_helper_gvec_4_ptr *fn)
6861 {
6862     if (fp_access_check(s)) {
6863         gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn);
6864     }
6865     return true;
6866 }
6867 
TRANS_FEAT(SDOT_vi,aa64_dp,do_dot_vector_idx,a,gen_helper_gvec_sdot_idx_b)6868 TRANS_FEAT(SDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_sdot_idx_b)
6869 TRANS_FEAT(UDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_udot_idx_b)
6870 TRANS_FEAT(SUDOT_vi, aa64_i8mm, do_dot_vector_idx, a,
6871            gen_helper_gvec_sudot_idx_b)
6872 TRANS_FEAT(USDOT_vi, aa64_i8mm, do_dot_vector_idx, a,
6873            gen_helper_gvec_usdot_idx_b)
6874 TRANS_FEAT(BFDOT_vi, aa64_bf16, do_dot_vector_idx_env, a,
6875            gen_helper_gvec_bfdot_idx)
6876 
6877 static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a)
6878 {
6879     if (!dc_isar_feature(aa64_bf16, s)) {
6880         return false;
6881     }
6882     if (fp_access_check(s)) {
6883         /* Q bit selects BFMLALB vs BFMLALT. */
6884         gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd,
6885                           s->fpcr_ah ? FPST_AH : FPST_A64,
6886                           (a->idx << 1) | a->q,
6887                           gen_helper_gvec_bfmlal_idx);
6888     }
6889     return true;
6890 }
6891 
trans_FCMLA_vi(DisasContext * s,arg_FCMLA_vi * a)6892 static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a)
6893 {
6894     gen_helper_gvec_4_ptr *fn;
6895 
6896     if (!dc_isar_feature(aa64_fcma, s)) {
6897         return false;
6898     }
6899     switch (a->esz) {
6900     case MO_16:
6901         if (!dc_isar_feature(aa64_fp16, s)) {
6902             return false;
6903         }
6904         fn = gen_helper_gvec_fcmlah_idx;
6905         break;
6906     case MO_32:
6907         fn = gen_helper_gvec_fcmlas_idx;
6908         break;
6909     default:
6910         g_assert_not_reached();
6911     }
6912     if (fp_access_check(s)) {
6913         gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
6914                           a->esz == MO_16 ? FPST_A64_F16 : FPST_A64,
6915                           (s->fpcr_ah << 4) | (a->idx << 2) | a->rot, fn);
6916     }
6917     return true;
6918 }
6919 
6920 /*
6921  * Advanced SIMD scalar pairwise
6922  */
6923 
do_fp3_scalar_pair(DisasContext * s,arg_rr_e * a,const FPScalar * f)6924 static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f)
6925 {
6926     switch (a->esz) {
6927     case MO_64:
6928         if (fp_access_check(s)) {
6929             TCGv_i64 t0 = tcg_temp_new_i64();
6930             TCGv_i64 t1 = tcg_temp_new_i64();
6931 
6932             read_vec_element(s, t0, a->rn, 0, MO_64);
6933             read_vec_element(s, t1, a->rn, 1, MO_64);
6934             f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
6935             write_fp_dreg(s, a->rd, t0);
6936         }
6937         break;
6938     case MO_32:
6939         if (fp_access_check(s)) {
6940             TCGv_i32 t0 = tcg_temp_new_i32();
6941             TCGv_i32 t1 = tcg_temp_new_i32();
6942 
6943             read_vec_element_i32(s, t0, a->rn, 0, MO_32);
6944             read_vec_element_i32(s, t1, a->rn, 1, MO_32);
6945             f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
6946             write_fp_sreg(s, a->rd, t0);
6947         }
6948         break;
6949     case MO_16:
6950         if (!dc_isar_feature(aa64_fp16, s)) {
6951             return false;
6952         }
6953         if (fp_access_check(s)) {
6954             TCGv_i32 t0 = tcg_temp_new_i32();
6955             TCGv_i32 t1 = tcg_temp_new_i32();
6956 
6957             read_vec_element_i32(s, t0, a->rn, 0, MO_16);
6958             read_vec_element_i32(s, t1, a->rn, 1, MO_16);
6959             f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
6960             write_fp_sreg(s, a->rd, t0);
6961         }
6962         break;
6963     default:
6964         g_assert_not_reached();
6965     }
6966     return true;
6967 }
6968 
do_fp3_scalar_pair_2fn(DisasContext * s,arg_rr_e * a,const FPScalar * fnormal,const FPScalar * fah)6969 static bool do_fp3_scalar_pair_2fn(DisasContext *s, arg_rr_e *a,
6970                                    const FPScalar *fnormal,
6971                                    const FPScalar *fah)
6972 {
6973     return do_fp3_scalar_pair(s, a, s->fpcr_ah ? fah : fnormal);
6974 }
6975 
6976 TRANS(FADDP_s, do_fp3_scalar_pair, a, &f_scalar_fadd)
6977 TRANS(FMAXP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah)
6978 TRANS(FMINP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah)
6979 TRANS(FMAXNMP_s, do_fp3_scalar_pair, a, &f_scalar_fmaxnm)
6980 TRANS(FMINNMP_s, do_fp3_scalar_pair, a, &f_scalar_fminnm)
6981 
trans_ADDP_s(DisasContext * s,arg_rr_e * a)6982 static bool trans_ADDP_s(DisasContext *s, arg_rr_e *a)
6983 {
6984     if (fp_access_check(s)) {
6985         TCGv_i64 t0 = tcg_temp_new_i64();
6986         TCGv_i64 t1 = tcg_temp_new_i64();
6987 
6988         read_vec_element(s, t0, a->rn, 0, MO_64);
6989         read_vec_element(s, t1, a->rn, 1, MO_64);
6990         tcg_gen_add_i64(t0, t0, t1);
6991         write_fp_dreg(s, a->rd, t0);
6992     }
6993     return true;
6994 }
6995 
6996 /*
6997  * Floating-point conditional select
6998  */
6999 
trans_FCSEL(DisasContext * s,arg_FCSEL * a)7000 static bool trans_FCSEL(DisasContext *s, arg_FCSEL *a)
7001 {
7002     TCGv_i64 t_true, t_false;
7003     DisasCompare64 c;
7004     int check = fp_access_check_scalar_hsd(s, a->esz);
7005 
7006     if (check <= 0) {
7007         return check == 0;
7008     }
7009 
7010     /* Zero extend sreg & hreg inputs to 64 bits now.  */
7011     t_true = tcg_temp_new_i64();
7012     t_false = tcg_temp_new_i64();
7013     read_vec_element(s, t_true, a->rn, 0, a->esz);
7014     read_vec_element(s, t_false, a->rm, 0, a->esz);
7015 
7016     a64_test_cc(&c, a->cond);
7017     tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0),
7018                         t_true, t_false);
7019 
7020     /*
7021      * Note that sregs & hregs write back zeros to the high bits,
7022      * and we've already done the zero-extension.
7023      */
7024     write_fp_dreg(s, a->rd, t_true);
7025     return true;
7026 }
7027 
7028 /*
7029  * Advanced SIMD Extract
7030  */
7031 
trans_EXT_d(DisasContext * s,arg_EXT_d * a)7032 static bool trans_EXT_d(DisasContext *s, arg_EXT_d *a)
7033 {
7034     if (fp_access_check(s)) {
7035         TCGv_i64 lo = read_fp_dreg(s, a->rn);
7036         if (a->imm != 0) {
7037             TCGv_i64 hi = read_fp_dreg(s, a->rm);
7038             tcg_gen_extract2_i64(lo, lo, hi, a->imm * 8);
7039         }
7040         write_fp_dreg(s, a->rd, lo);
7041     }
7042     return true;
7043 }
7044 
trans_EXT_q(DisasContext * s,arg_EXT_q * a)7045 static bool trans_EXT_q(DisasContext *s, arg_EXT_q *a)
7046 {
7047     TCGv_i64 lo, hi;
7048     int pos = (a->imm & 7) * 8;
7049     int elt = a->imm >> 3;
7050 
7051     if (!fp_access_check(s)) {
7052         return true;
7053     }
7054 
7055     lo = tcg_temp_new_i64();
7056     hi = tcg_temp_new_i64();
7057 
7058     read_vec_element(s, lo, a->rn, elt, MO_64);
7059     elt++;
7060     read_vec_element(s, hi, elt & 2 ? a->rm : a->rn, elt & 1, MO_64);
7061     elt++;
7062 
7063     if (pos != 0) {
7064         TCGv_i64 hh = tcg_temp_new_i64();
7065         tcg_gen_extract2_i64(lo, lo, hi, pos);
7066         read_vec_element(s, hh, a->rm, elt & 1, MO_64);
7067         tcg_gen_extract2_i64(hi, hi, hh, pos);
7068     }
7069 
7070     write_vec_element(s, lo, a->rd, 0, MO_64);
7071     write_vec_element(s, hi, a->rd, 1, MO_64);
7072     clear_vec_high(s, true, a->rd);
7073     return true;
7074 }
7075 
7076 /*
7077  * Floating-point data-processing (3 source)
7078  */
7079 
do_fmadd(DisasContext * s,arg_rrrr_e * a,bool neg_a,bool neg_n)7080 static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n)
7081 {
7082     TCGv_ptr fpst;
7083 
7084     /*
7085      * These are fused multiply-add.  Note that doing the negations here
7086      * as separate steps is correct: an input NaN should come out with
7087      * its sign bit flipped if it is a negated-input.
7088      */
7089     switch (a->esz) {
7090     case MO_64:
7091         if (fp_access_check(s)) {
7092             TCGv_i64 tn = read_fp_dreg(s, a->rn);
7093             TCGv_i64 tm = read_fp_dreg(s, a->rm);
7094             TCGv_i64 ta = read_fp_dreg(s, a->ra);
7095 
7096             if (neg_a) {
7097                 gen_vfp_maybe_ah_negd(s, ta, ta);
7098             }
7099             if (neg_n) {
7100                 gen_vfp_maybe_ah_negd(s, tn, tn);
7101             }
7102             fpst = fpstatus_ptr(FPST_A64);
7103             gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst);
7104             write_fp_dreg_merging(s, a->rd, a->ra, ta);
7105         }
7106         break;
7107 
7108     case MO_32:
7109         if (fp_access_check(s)) {
7110             TCGv_i32 tn = read_fp_sreg(s, a->rn);
7111             TCGv_i32 tm = read_fp_sreg(s, a->rm);
7112             TCGv_i32 ta = read_fp_sreg(s, a->ra);
7113 
7114             if (neg_a) {
7115                 gen_vfp_maybe_ah_negs(s, ta, ta);
7116             }
7117             if (neg_n) {
7118                 gen_vfp_maybe_ah_negs(s, tn, tn);
7119             }
7120             fpst = fpstatus_ptr(FPST_A64);
7121             gen_helper_vfp_muladds(ta, tn, tm, ta, fpst);
7122             write_fp_sreg_merging(s, a->rd, a->ra, ta);
7123         }
7124         break;
7125 
7126     case MO_16:
7127         if (!dc_isar_feature(aa64_fp16, s)) {
7128             return false;
7129         }
7130         if (fp_access_check(s)) {
7131             TCGv_i32 tn = read_fp_hreg(s, a->rn);
7132             TCGv_i32 tm = read_fp_hreg(s, a->rm);
7133             TCGv_i32 ta = read_fp_hreg(s, a->ra);
7134 
7135             if (neg_a) {
7136                 gen_vfp_maybe_ah_negh(s, ta, ta);
7137             }
7138             if (neg_n) {
7139                 gen_vfp_maybe_ah_negh(s, tn, tn);
7140             }
7141             fpst = fpstatus_ptr(FPST_A64_F16);
7142             gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst);
7143             write_fp_hreg_merging(s, a->rd, a->ra, ta);
7144         }
7145         break;
7146 
7147     default:
7148         return false;
7149     }
7150     return true;
7151 }
7152 
TRANS(FMADD,do_fmadd,a,false,false)7153 TRANS(FMADD, do_fmadd, a, false, false)
7154 TRANS(FNMADD, do_fmadd, a, true, true)
7155 TRANS(FMSUB, do_fmadd, a, false, true)
7156 TRANS(FNMSUB, do_fmadd, a, true, false)
7157 
7158 /*
7159  * Advanced SIMD Across Lanes
7160  */
7161 
7162 static bool do_int_reduction(DisasContext *s, arg_qrr_e *a, bool widen,
7163                              MemOp src_sign, NeonGenTwo64OpFn *fn)
7164 {
7165     TCGv_i64 tcg_res, tcg_elt;
7166     MemOp src_mop = a->esz | src_sign;
7167     int elements = (a->q ? 16 : 8) >> a->esz;
7168 
7169     /* Reject MO_64, and MO_32 without Q: a minimum of 4 elements. */
7170     if (elements < 4) {
7171         return false;
7172     }
7173     if (!fp_access_check(s)) {
7174         return true;
7175     }
7176 
7177     tcg_res = tcg_temp_new_i64();
7178     tcg_elt = tcg_temp_new_i64();
7179 
7180     read_vec_element(s, tcg_res, a->rn, 0, src_mop);
7181     for (int i = 1; i < elements; i++) {
7182         read_vec_element(s, tcg_elt, a->rn, i, src_mop);
7183         fn(tcg_res, tcg_res, tcg_elt);
7184     }
7185 
7186     tcg_gen_ext_i64(tcg_res, tcg_res, a->esz + widen);
7187     write_fp_dreg(s, a->rd, tcg_res);
7188     return true;
7189 }
7190 
7191 TRANS(ADDV, do_int_reduction, a, false, 0, tcg_gen_add_i64)
TRANS(SADDLV,do_int_reduction,a,true,MO_SIGN,tcg_gen_add_i64)7192 TRANS(SADDLV, do_int_reduction, a, true, MO_SIGN, tcg_gen_add_i64)
7193 TRANS(UADDLV, do_int_reduction, a, true, 0, tcg_gen_add_i64)
7194 TRANS(SMAXV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smax_i64)
7195 TRANS(UMAXV, do_int_reduction, a, false, 0, tcg_gen_umax_i64)
7196 TRANS(SMINV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smin_i64)
7197 TRANS(UMINV, do_int_reduction, a, false, 0, tcg_gen_umin_i64)
7198 
7199 /*
7200  * do_fp_reduction helper
7201  *
7202  * This mirrors the Reduce() pseudocode in the ARM ARM. It is
7203  * important for correct NaN propagation that we do these
7204  * operations in exactly the order specified by the pseudocode.
7205  *
7206  * This is a recursive function.
7207  */
7208 static TCGv_i32 do_reduction_op(DisasContext *s, int rn, MemOp esz,
7209                                 int ebase, int ecount, TCGv_ptr fpst,
7210                                 NeonGenTwoSingleOpFn *fn)
7211 {
7212     if (ecount == 1) {
7213         TCGv_i32 tcg_elem = tcg_temp_new_i32();
7214         read_vec_element_i32(s, tcg_elem, rn, ebase, esz);
7215         return tcg_elem;
7216     } else {
7217         int half = ecount >> 1;
7218         TCGv_i32 tcg_hi, tcg_lo, tcg_res;
7219 
7220         tcg_hi = do_reduction_op(s, rn, esz, ebase + half, half, fpst, fn);
7221         tcg_lo = do_reduction_op(s, rn, esz, ebase, half, fpst, fn);
7222         tcg_res = tcg_temp_new_i32();
7223 
7224         fn(tcg_res, tcg_lo, tcg_hi, fpst);
7225         return tcg_res;
7226     }
7227 }
7228 
do_fp_reduction(DisasContext * s,arg_qrr_e * a,NeonGenTwoSingleOpFn * fnormal,NeonGenTwoSingleOpFn * fah)7229 static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a,
7230                             NeonGenTwoSingleOpFn *fnormal,
7231                             NeonGenTwoSingleOpFn *fah)
7232 {
7233     if (fp_access_check(s)) {
7234         MemOp esz = a->esz;
7235         int elts = (a->q ? 16 : 8) >> esz;
7236         TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
7237         TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst,
7238                                        s->fpcr_ah ? fah : fnormal);
7239         write_fp_sreg(s, a->rd, res);
7240     }
7241     return true;
7242 }
7243 
TRANS_FEAT(FMAXNMV_h,aa64_fp16,do_fp_reduction,a,gen_helper_vfp_maxnumh,gen_helper_vfp_maxnumh)7244 TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a,
7245            gen_helper_vfp_maxnumh, gen_helper_vfp_maxnumh)
7246 TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a,
7247            gen_helper_vfp_minnumh, gen_helper_vfp_minnumh)
7248 TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a,
7249            gen_helper_vfp_maxh, gen_helper_vfp_ah_maxh)
7250 TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a,
7251            gen_helper_vfp_minh, gen_helper_vfp_ah_minh)
7252 
7253 TRANS(FMAXNMV_s, do_fp_reduction, a,
7254       gen_helper_vfp_maxnums, gen_helper_vfp_maxnums)
7255 TRANS(FMINNMV_s, do_fp_reduction, a,
7256       gen_helper_vfp_minnums, gen_helper_vfp_minnums)
7257 TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs, gen_helper_vfp_ah_maxs)
7258 TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins, gen_helper_vfp_ah_mins)
7259 
7260 /*
7261  * Floating-point Immediate
7262  */
7263 
7264 static bool trans_FMOVI_s(DisasContext *s, arg_FMOVI_s *a)
7265 {
7266     int check = fp_access_check_scalar_hsd(s, a->esz);
7267     uint64_t imm;
7268 
7269     if (check <= 0) {
7270         return check == 0;
7271     }
7272 
7273     imm = vfp_expand_imm(a->esz, a->imm);
7274     write_fp_dreg(s, a->rd, tcg_constant_i64(imm));
7275     return true;
7276 }
7277 
7278 /*
7279  * Floating point compare, conditional compare
7280  */
7281 
handle_fp_compare(DisasContext * s,int size,unsigned int rn,unsigned int rm,bool cmp_with_zero,bool signal_all_nans)7282 static void handle_fp_compare(DisasContext *s, int size,
7283                               unsigned int rn, unsigned int rm,
7284                               bool cmp_with_zero, bool signal_all_nans)
7285 {
7286     TCGv_i64 tcg_flags = tcg_temp_new_i64();
7287     TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_A64_F16 : FPST_A64);
7288 
7289     if (size == MO_64) {
7290         TCGv_i64 tcg_vn, tcg_vm;
7291 
7292         tcg_vn = read_fp_dreg(s, rn);
7293         if (cmp_with_zero) {
7294             tcg_vm = tcg_constant_i64(0);
7295         } else {
7296             tcg_vm = read_fp_dreg(s, rm);
7297         }
7298         if (signal_all_nans) {
7299             gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7300         } else {
7301             gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7302         }
7303     } else {
7304         TCGv_i32 tcg_vn = tcg_temp_new_i32();
7305         TCGv_i32 tcg_vm = tcg_temp_new_i32();
7306 
7307         read_vec_element_i32(s, tcg_vn, rn, 0, size);
7308         if (cmp_with_zero) {
7309             tcg_gen_movi_i32(tcg_vm, 0);
7310         } else {
7311             read_vec_element_i32(s, tcg_vm, rm, 0, size);
7312         }
7313 
7314         switch (size) {
7315         case MO_32:
7316             if (signal_all_nans) {
7317                 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7318             } else {
7319                 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7320             }
7321             break;
7322         case MO_16:
7323             if (signal_all_nans) {
7324                 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7325             } else {
7326                 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7327             }
7328             break;
7329         default:
7330             g_assert_not_reached();
7331         }
7332     }
7333 
7334     gen_set_nzcv(tcg_flags);
7335 }
7336 
7337 /* FCMP, FCMPE */
trans_FCMP(DisasContext * s,arg_FCMP * a)7338 static bool trans_FCMP(DisasContext *s, arg_FCMP *a)
7339 {
7340     int check = fp_access_check_scalar_hsd(s, a->esz);
7341 
7342     if (check <= 0) {
7343         return check == 0;
7344     }
7345 
7346     handle_fp_compare(s, a->esz, a->rn, a->rm, a->z, a->e);
7347     return true;
7348 }
7349 
7350 /* FCCMP, FCCMPE */
trans_FCCMP(DisasContext * s,arg_FCCMP * a)7351 static bool trans_FCCMP(DisasContext *s, arg_FCCMP *a)
7352 {
7353     TCGLabel *label_continue = NULL;
7354     int check = fp_access_check_scalar_hsd(s, a->esz);
7355 
7356     if (check <= 0) {
7357         return check == 0;
7358     }
7359 
7360     if (a->cond < 0x0e) { /* not always */
7361         TCGLabel *label_match = gen_new_label();
7362         label_continue = gen_new_label();
7363         arm_gen_test_cc(a->cond, label_match);
7364         /* nomatch: */
7365         gen_set_nzcv(tcg_constant_i64(a->nzcv << 28));
7366         tcg_gen_br(label_continue);
7367         gen_set_label(label_match);
7368     }
7369 
7370     handle_fp_compare(s, a->esz, a->rn, a->rm, false, a->e);
7371 
7372     if (label_continue) {
7373         gen_set_label(label_continue);
7374     }
7375     return true;
7376 }
7377 
7378 /*
7379  * Advanced SIMD Modified Immediate
7380  */
7381 
trans_FMOVI_v_h(DisasContext * s,arg_FMOVI_v_h * a)7382 static bool trans_FMOVI_v_h(DisasContext *s, arg_FMOVI_v_h *a)
7383 {
7384     if (!dc_isar_feature(aa64_fp16, s)) {
7385         return false;
7386     }
7387     if (fp_access_check(s)) {
7388         tcg_gen_gvec_dup_imm(MO_16, vec_full_reg_offset(s, a->rd),
7389                              a->q ? 16 : 8, vec_full_reg_size(s),
7390                              vfp_expand_imm(MO_16, a->abcdefgh));
7391     }
7392     return true;
7393 }
7394 
gen_movi(unsigned vece,uint32_t dofs,uint32_t aofs,int64_t c,uint32_t oprsz,uint32_t maxsz)7395 static void gen_movi(unsigned vece, uint32_t dofs, uint32_t aofs,
7396                      int64_t c, uint32_t oprsz, uint32_t maxsz)
7397 {
7398     tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c);
7399 }
7400 
trans_Vimm(DisasContext * s,arg_Vimm * a)7401 static bool trans_Vimm(DisasContext *s, arg_Vimm *a)
7402 {
7403     GVecGen2iFn *fn;
7404 
7405     /* Handle decode of cmode/op here between ORR/BIC/MOVI */
7406     if ((a->cmode & 1) && a->cmode < 12) {
7407         /* For op=1, the imm will be inverted, so BIC becomes AND. */
7408         fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori;
7409     } else {
7410         /* There is one unallocated cmode/op combination in this space */
7411         if (a->cmode == 15 && a->op == 1 && a->q == 0) {
7412             return false;
7413         }
7414         fn = gen_movi;
7415     }
7416 
7417     if (fp_access_check(s)) {
7418         uint64_t imm = asimd_imm_const(a->abcdefgh, a->cmode, a->op);
7419         gen_gvec_fn2i(s, a->q, a->rd, a->rd, imm, fn, MO_64);
7420     }
7421     return true;
7422 }
7423 
7424 /*
7425  * Advanced SIMD Shift by Immediate
7426  */
7427 
do_vec_shift_imm(DisasContext * s,arg_qrri_e * a,GVecGen2iFn * fn)7428 static bool do_vec_shift_imm(DisasContext *s, arg_qrri_e *a, GVecGen2iFn *fn)
7429 {
7430     if (fp_access_check(s)) {
7431         gen_gvec_fn2i(s, a->q, a->rd, a->rn, a->imm, fn, a->esz);
7432     }
7433     return true;
7434 }
7435 
7436 TRANS(SSHR_v, do_vec_shift_imm, a, gen_gvec_sshr)
7437 TRANS(USHR_v, do_vec_shift_imm, a, gen_gvec_ushr)
7438 TRANS(SSRA_v, do_vec_shift_imm, a, gen_gvec_ssra)
7439 TRANS(USRA_v, do_vec_shift_imm, a, gen_gvec_usra)
7440 TRANS(SRSHR_v, do_vec_shift_imm, a, gen_gvec_srshr)
7441 TRANS(URSHR_v, do_vec_shift_imm, a, gen_gvec_urshr)
7442 TRANS(SRSRA_v, do_vec_shift_imm, a, gen_gvec_srsra)
7443 TRANS(URSRA_v, do_vec_shift_imm, a, gen_gvec_ursra)
7444 TRANS(SRI_v, do_vec_shift_imm, a, gen_gvec_sri)
7445 TRANS(SHL_v, do_vec_shift_imm, a, tcg_gen_gvec_shli)
7446 TRANS(SLI_v, do_vec_shift_imm, a, gen_gvec_sli);
TRANS(SQSHL_vi,do_vec_shift_imm,a,gen_neon_sqshli)7447 TRANS(SQSHL_vi, do_vec_shift_imm, a, gen_neon_sqshli)
7448 TRANS(UQSHL_vi, do_vec_shift_imm, a, gen_neon_uqshli)
7449 TRANS(SQSHLU_vi, do_vec_shift_imm, a, gen_neon_sqshlui)
7450 
7451 static bool do_vec_shift_imm_wide(DisasContext *s, arg_qrri_e *a, bool is_u)
7452 {
7453     TCGv_i64 tcg_rn, tcg_rd;
7454     int esz = a->esz;
7455     int esize;
7456 
7457     if (!fp_access_check(s)) {
7458         return true;
7459     }
7460 
7461     /*
7462      * For the LL variants the store is larger than the load,
7463      * so if rd == rn we would overwrite parts of our input.
7464      * So load everything right now and use shifts in the main loop.
7465      */
7466     tcg_rd = tcg_temp_new_i64();
7467     tcg_rn = tcg_temp_new_i64();
7468     read_vec_element(s, tcg_rn, a->rn, a->q, MO_64);
7469 
7470     esize = 8 << esz;
7471     for (int i = 0, elements = 8 >> esz; i < elements; i++) {
7472         if (is_u) {
7473             tcg_gen_extract_i64(tcg_rd, tcg_rn, i * esize, esize);
7474         } else {
7475             tcg_gen_sextract_i64(tcg_rd, tcg_rn, i * esize, esize);
7476         }
7477         tcg_gen_shli_i64(tcg_rd, tcg_rd, a->imm);
7478         write_vec_element(s, tcg_rd, a->rd, i, esz + 1);
7479     }
7480     clear_vec_high(s, true, a->rd);
7481     return true;
7482 }
7483 
TRANS(SSHLL_v,do_vec_shift_imm_wide,a,false)7484 TRANS(SSHLL_v, do_vec_shift_imm_wide, a, false)
7485 TRANS(USHLL_v, do_vec_shift_imm_wide, a, true)
7486 
7487 static void gen_sshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7488 {
7489     assert(shift >= 0 && shift <= 64);
7490     tcg_gen_sari_i64(dst, src, MIN(shift, 63));
7491 }
7492 
gen_ushr_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7493 static void gen_ushr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7494 {
7495     assert(shift >= 0 && shift <= 64);
7496     if (shift == 64) {
7497         tcg_gen_movi_i64(dst, 0);
7498     } else {
7499         tcg_gen_shri_i64(dst, src, shift);
7500     }
7501 }
7502 
gen_ssra_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7503 static void gen_ssra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7504 {
7505     gen_sshr_d(src, src, shift);
7506     tcg_gen_add_i64(dst, dst, src);
7507 }
7508 
gen_usra_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7509 static void gen_usra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7510 {
7511     gen_ushr_d(src, src, shift);
7512     tcg_gen_add_i64(dst, dst, src);
7513 }
7514 
gen_srshr_bhs(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7515 static void gen_srshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7516 {
7517     assert(shift >= 0 && shift <= 32);
7518     if (shift) {
7519         TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1));
7520         tcg_gen_add_i64(dst, src, rnd);
7521         tcg_gen_sari_i64(dst, dst, shift);
7522     } else {
7523         tcg_gen_mov_i64(dst, src);
7524     }
7525 }
7526 
gen_urshr_bhs(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7527 static void gen_urshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7528 {
7529     assert(shift >= 0 && shift <= 32);
7530     if (shift) {
7531         TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1));
7532         tcg_gen_add_i64(dst, src, rnd);
7533         tcg_gen_shri_i64(dst, dst, shift);
7534     } else {
7535         tcg_gen_mov_i64(dst, src);
7536     }
7537 }
7538 
gen_srshr_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7539 static void gen_srshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7540 {
7541     assert(shift >= 0 && shift <= 64);
7542     if (shift == 0) {
7543         tcg_gen_mov_i64(dst, src);
7544     } else if (shift == 64) {
7545         /* Extension of sign bit (0,-1) plus sign bit (0,1) is zero. */
7546         tcg_gen_movi_i64(dst, 0);
7547     } else {
7548         TCGv_i64 rnd = tcg_temp_new_i64();
7549         tcg_gen_extract_i64(rnd, src, shift - 1, 1);
7550         tcg_gen_sari_i64(dst, src, shift);
7551         tcg_gen_add_i64(dst, dst, rnd);
7552     }
7553 }
7554 
gen_urshr_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7555 static void gen_urshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7556 {
7557     assert(shift >= 0 && shift <= 64);
7558     if (shift == 0) {
7559         tcg_gen_mov_i64(dst, src);
7560     } else if (shift == 64) {
7561         /* Rounding will propagate bit 63 into bit 64. */
7562         tcg_gen_shri_i64(dst, src, 63);
7563     } else {
7564         TCGv_i64 rnd = tcg_temp_new_i64();
7565         tcg_gen_extract_i64(rnd, src, shift - 1, 1);
7566         tcg_gen_shri_i64(dst, src, shift);
7567         tcg_gen_add_i64(dst, dst, rnd);
7568     }
7569 }
7570 
gen_srsra_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7571 static void gen_srsra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7572 {
7573     gen_srshr_d(src, src, shift);
7574     tcg_gen_add_i64(dst, dst, src);
7575 }
7576 
gen_ursra_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7577 static void gen_ursra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7578 {
7579     gen_urshr_d(src, src, shift);
7580     tcg_gen_add_i64(dst, dst, src);
7581 }
7582 
gen_sri_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7583 static void gen_sri_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7584 {
7585     /* If shift is 64, dst is unchanged. */
7586     if (shift != 64) {
7587         tcg_gen_shri_i64(src, src, shift);
7588         tcg_gen_deposit_i64(dst, dst, src, 0, 64 - shift);
7589     }
7590 }
7591 
gen_sli_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7592 static void gen_sli_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7593 {
7594     tcg_gen_deposit_i64(dst, dst, src, shift, 64 - shift);
7595 }
7596 
do_vec_shift_imm_narrow(DisasContext * s,arg_qrri_e * a,WideShiftImmFn * const fns[3],MemOp sign)7597 static bool do_vec_shift_imm_narrow(DisasContext *s, arg_qrri_e *a,
7598                                     WideShiftImmFn * const fns[3], MemOp sign)
7599 {
7600     TCGv_i64 tcg_rn, tcg_rd;
7601     int esz = a->esz;
7602     int esize;
7603     WideShiftImmFn *fn;
7604 
7605     tcg_debug_assert(esz >= MO_8 && esz <= MO_32);
7606 
7607     if (!fp_access_check(s)) {
7608         return true;
7609     }
7610 
7611     tcg_rn = tcg_temp_new_i64();
7612     tcg_rd = tcg_temp_new_i64();
7613     tcg_gen_movi_i64(tcg_rd, 0);
7614 
7615     fn = fns[esz];
7616     esize = 8 << esz;
7617     for (int i = 0, elements = 8 >> esz; i < elements; i++) {
7618         read_vec_element(s, tcg_rn, a->rn, i, (esz + 1) | sign);
7619         fn(tcg_rn, tcg_rn, a->imm);
7620         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, esize * i, esize);
7621     }
7622 
7623     write_vec_element(s, tcg_rd, a->rd, a->q, MO_64);
7624     clear_vec_high(s, a->q, a->rd);
7625     return true;
7626 }
7627 
gen_sqshrn_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7628 static void gen_sqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7629 {
7630     tcg_gen_sari_i64(d, s, i);
7631     tcg_gen_ext16u_i64(d, d);
7632     gen_helper_neon_narrow_sat_s8(d, tcg_env, d);
7633 }
7634 
gen_sqshrn_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7635 static void gen_sqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7636 {
7637     tcg_gen_sari_i64(d, s, i);
7638     tcg_gen_ext32u_i64(d, d);
7639     gen_helper_neon_narrow_sat_s16(d, tcg_env, d);
7640 }
7641 
gen_sqshrn_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7642 static void gen_sqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7643 {
7644     gen_sshr_d(d, s, i);
7645     gen_helper_neon_narrow_sat_s32(d, tcg_env, d);
7646 }
7647 
gen_uqshrn_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7648 static void gen_uqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7649 {
7650     tcg_gen_shri_i64(d, s, i);
7651     gen_helper_neon_narrow_sat_u8(d, tcg_env, d);
7652 }
7653 
gen_uqshrn_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7654 static void gen_uqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7655 {
7656     tcg_gen_shri_i64(d, s, i);
7657     gen_helper_neon_narrow_sat_u16(d, tcg_env, d);
7658 }
7659 
gen_uqshrn_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7660 static void gen_uqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7661 {
7662     gen_ushr_d(d, s, i);
7663     gen_helper_neon_narrow_sat_u32(d, tcg_env, d);
7664 }
7665 
gen_sqshrun_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7666 static void gen_sqshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7667 {
7668     tcg_gen_sari_i64(d, s, i);
7669     tcg_gen_ext16u_i64(d, d);
7670     gen_helper_neon_unarrow_sat8(d, tcg_env, d);
7671 }
7672 
gen_sqshrun_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7673 static void gen_sqshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7674 {
7675     tcg_gen_sari_i64(d, s, i);
7676     tcg_gen_ext32u_i64(d, d);
7677     gen_helper_neon_unarrow_sat16(d, tcg_env, d);
7678 }
7679 
gen_sqshrun_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7680 static void gen_sqshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7681 {
7682     gen_sshr_d(d, s, i);
7683     gen_helper_neon_unarrow_sat32(d, tcg_env, d);
7684 }
7685 
gen_sqrshrn_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7686 static void gen_sqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7687 {
7688     gen_srshr_bhs(d, s, i);
7689     tcg_gen_ext16u_i64(d, d);
7690     gen_helper_neon_narrow_sat_s8(d, tcg_env, d);
7691 }
7692 
gen_sqrshrn_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7693 static void gen_sqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7694 {
7695     gen_srshr_bhs(d, s, i);
7696     tcg_gen_ext32u_i64(d, d);
7697     gen_helper_neon_narrow_sat_s16(d, tcg_env, d);
7698 }
7699 
gen_sqrshrn_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7700 static void gen_sqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7701 {
7702     gen_srshr_d(d, s, i);
7703     gen_helper_neon_narrow_sat_s32(d, tcg_env, d);
7704 }
7705 
gen_uqrshrn_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7706 static void gen_uqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7707 {
7708     gen_urshr_bhs(d, s, i);
7709     gen_helper_neon_narrow_sat_u8(d, tcg_env, d);
7710 }
7711 
gen_uqrshrn_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7712 static void gen_uqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7713 {
7714     gen_urshr_bhs(d, s, i);
7715     gen_helper_neon_narrow_sat_u16(d, tcg_env, d);
7716 }
7717 
gen_uqrshrn_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7718 static void gen_uqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7719 {
7720     gen_urshr_d(d, s, i);
7721     gen_helper_neon_narrow_sat_u32(d, tcg_env, d);
7722 }
7723 
gen_sqrshrun_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7724 static void gen_sqrshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7725 {
7726     gen_srshr_bhs(d, s, i);
7727     tcg_gen_ext16u_i64(d, d);
7728     gen_helper_neon_unarrow_sat8(d, tcg_env, d);
7729 }
7730 
gen_sqrshrun_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7731 static void gen_sqrshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7732 {
7733     gen_srshr_bhs(d, s, i);
7734     tcg_gen_ext32u_i64(d, d);
7735     gen_helper_neon_unarrow_sat16(d, tcg_env, d);
7736 }
7737 
gen_sqrshrun_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7738 static void gen_sqrshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7739 {
7740     gen_srshr_d(d, s, i);
7741     gen_helper_neon_unarrow_sat32(d, tcg_env, d);
7742 }
7743 
7744 static WideShiftImmFn * const shrn_fns[] = {
7745     tcg_gen_shri_i64,
7746     tcg_gen_shri_i64,
7747     gen_ushr_d,
7748 };
7749 TRANS(SHRN_v, do_vec_shift_imm_narrow, a, shrn_fns, 0)
7750 
7751 static WideShiftImmFn * const rshrn_fns[] = {
7752     gen_urshr_bhs,
7753     gen_urshr_bhs,
7754     gen_urshr_d,
7755 };
7756 TRANS(RSHRN_v, do_vec_shift_imm_narrow, a, rshrn_fns, 0)
7757 
7758 static WideShiftImmFn * const sqshrn_fns[] = {
7759     gen_sqshrn_b,
7760     gen_sqshrn_h,
7761     gen_sqshrn_s,
7762 };
7763 TRANS(SQSHRN_v, do_vec_shift_imm_narrow, a, sqshrn_fns, MO_SIGN)
7764 
7765 static WideShiftImmFn * const uqshrn_fns[] = {
7766     gen_uqshrn_b,
7767     gen_uqshrn_h,
7768     gen_uqshrn_s,
7769 };
7770 TRANS(UQSHRN_v, do_vec_shift_imm_narrow, a, uqshrn_fns, 0)
7771 
7772 static WideShiftImmFn * const sqshrun_fns[] = {
7773     gen_sqshrun_b,
7774     gen_sqshrun_h,
7775     gen_sqshrun_s,
7776 };
7777 TRANS(SQSHRUN_v, do_vec_shift_imm_narrow, a, sqshrun_fns, MO_SIGN)
7778 
7779 static WideShiftImmFn * const sqrshrn_fns[] = {
7780     gen_sqrshrn_b,
7781     gen_sqrshrn_h,
7782     gen_sqrshrn_s,
7783 };
7784 TRANS(SQRSHRN_v, do_vec_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN)
7785 
7786 static WideShiftImmFn * const uqrshrn_fns[] = {
7787     gen_uqrshrn_b,
7788     gen_uqrshrn_h,
7789     gen_uqrshrn_s,
7790 };
7791 TRANS(UQRSHRN_v, do_vec_shift_imm_narrow, a, uqrshrn_fns, 0)
7792 
7793 static WideShiftImmFn * const sqrshrun_fns[] = {
7794     gen_sqrshrun_b,
7795     gen_sqrshrun_h,
7796     gen_sqrshrun_s,
7797 };
TRANS(SQRSHRUN_v,do_vec_shift_imm_narrow,a,sqrshrun_fns,MO_SIGN)7798 TRANS(SQRSHRUN_v, do_vec_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN)
7799 
7800 /*
7801  * Advanced SIMD Scalar Shift by Immediate
7802  */
7803 
7804 static bool do_scalar_shift_imm(DisasContext *s, arg_rri_e *a,
7805                                 WideShiftImmFn *fn, bool accumulate,
7806                                 MemOp sign)
7807 {
7808     if (fp_access_check(s)) {
7809         TCGv_i64 rd = tcg_temp_new_i64();
7810         TCGv_i64 rn = tcg_temp_new_i64();
7811 
7812         read_vec_element(s, rn, a->rn, 0, a->esz | sign);
7813         if (accumulate) {
7814             read_vec_element(s, rd, a->rd, 0, a->esz | sign);
7815         }
7816         fn(rd, rn, a->imm);
7817         write_fp_dreg(s, a->rd, rd);
7818     }
7819     return true;
7820 }
7821 
7822 TRANS(SSHR_s, do_scalar_shift_imm, a, gen_sshr_d, false, 0)
7823 TRANS(USHR_s, do_scalar_shift_imm, a, gen_ushr_d, false, 0)
7824 TRANS(SSRA_s, do_scalar_shift_imm, a, gen_ssra_d, true, 0)
7825 TRANS(USRA_s, do_scalar_shift_imm, a, gen_usra_d, true, 0)
7826 TRANS(SRSHR_s, do_scalar_shift_imm, a, gen_srshr_d, false, 0)
7827 TRANS(URSHR_s, do_scalar_shift_imm, a, gen_urshr_d, false, 0)
7828 TRANS(SRSRA_s, do_scalar_shift_imm, a, gen_srsra_d, true, 0)
7829 TRANS(URSRA_s, do_scalar_shift_imm, a, gen_ursra_d, true, 0)
7830 TRANS(SRI_s, do_scalar_shift_imm, a, gen_sri_d, true, 0)
7831 
7832 TRANS(SHL_s, do_scalar_shift_imm, a, tcg_gen_shli_i64, false, 0)
7833 TRANS(SLI_s, do_scalar_shift_imm, a, gen_sli_d, true, 0)
7834 
trunc_i64_env_imm(TCGv_i64 d,TCGv_i64 s,int64_t i,NeonGenTwoOpEnvFn * fn)7835 static void trunc_i64_env_imm(TCGv_i64 d, TCGv_i64 s, int64_t i,
7836                               NeonGenTwoOpEnvFn *fn)
7837 {
7838     TCGv_i32 t = tcg_temp_new_i32();
7839     tcg_gen_extrl_i64_i32(t, s);
7840     fn(t, tcg_env, t, tcg_constant_i32(i));
7841     tcg_gen_extu_i32_i64(d, t);
7842 }
7843 
gen_sqshli_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7844 static void gen_sqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7845 {
7846     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s8);
7847 }
7848 
gen_sqshli_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7849 static void gen_sqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7850 {
7851     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s16);
7852 }
7853 
gen_sqshli_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7854 static void gen_sqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7855 {
7856     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s32);
7857 }
7858 
gen_sqshli_d(TCGv_i64 d,TCGv_i64 s,int64_t i)7859 static void gen_sqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
7860 {
7861     gen_helper_neon_qshl_s64(d, tcg_env, s, tcg_constant_i64(i));
7862 }
7863 
gen_uqshli_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7864 static void gen_uqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7865 {
7866     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u8);
7867 }
7868 
gen_uqshli_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7869 static void gen_uqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7870 {
7871     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u16);
7872 }
7873 
gen_uqshli_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7874 static void gen_uqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7875 {
7876     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u32);
7877 }
7878 
gen_uqshli_d(TCGv_i64 d,TCGv_i64 s,int64_t i)7879 static void gen_uqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
7880 {
7881     gen_helper_neon_qshl_u64(d, tcg_env, s, tcg_constant_i64(i));
7882 }
7883 
gen_sqshlui_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7884 static void gen_sqshlui_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7885 {
7886     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s8);
7887 }
7888 
gen_sqshlui_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7889 static void gen_sqshlui_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7890 {
7891     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s16);
7892 }
7893 
gen_sqshlui_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7894 static void gen_sqshlui_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7895 {
7896     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s32);
7897 }
7898 
gen_sqshlui_d(TCGv_i64 d,TCGv_i64 s,int64_t i)7899 static void gen_sqshlui_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
7900 {
7901     gen_helper_neon_qshlu_s64(d, tcg_env, s, tcg_constant_i64(i));
7902 }
7903 
7904 static WideShiftImmFn * const f_scalar_sqshli[] = {
7905     gen_sqshli_b, gen_sqshli_h, gen_sqshli_s, gen_sqshli_d
7906 };
7907 
7908 static WideShiftImmFn * const f_scalar_uqshli[] = {
7909     gen_uqshli_b, gen_uqshli_h, gen_uqshli_s, gen_uqshli_d
7910 };
7911 
7912 static WideShiftImmFn * const f_scalar_sqshlui[] = {
7913     gen_sqshlui_b, gen_sqshlui_h, gen_sqshlui_s, gen_sqshlui_d
7914 };
7915 
7916 /* Note that the helpers sign-extend their inputs, so don't do it here. */
7917 TRANS(SQSHL_si, do_scalar_shift_imm, a, f_scalar_sqshli[a->esz], false, 0)
7918 TRANS(UQSHL_si, do_scalar_shift_imm, a, f_scalar_uqshli[a->esz], false, 0)
7919 TRANS(SQSHLU_si, do_scalar_shift_imm, a, f_scalar_sqshlui[a->esz], false, 0)
7920 
do_scalar_shift_imm_narrow(DisasContext * s,arg_rri_e * a,WideShiftImmFn * const fns[3],MemOp sign,bool zext)7921 static bool do_scalar_shift_imm_narrow(DisasContext *s, arg_rri_e *a,
7922                                        WideShiftImmFn * const fns[3],
7923                                        MemOp sign, bool zext)
7924 {
7925     MemOp esz = a->esz;
7926 
7927     tcg_debug_assert(esz >= MO_8 && esz <= MO_32);
7928 
7929     if (fp_access_check(s)) {
7930         TCGv_i64 rd = tcg_temp_new_i64();
7931         TCGv_i64 rn = tcg_temp_new_i64();
7932 
7933         read_vec_element(s, rn, a->rn, 0, (esz + 1) | sign);
7934         fns[esz](rd, rn, a->imm);
7935         if (zext) {
7936             tcg_gen_ext_i64(rd, rd, esz);
7937         }
7938         write_fp_dreg(s, a->rd, rd);
7939     }
7940     return true;
7941 }
7942 
TRANS(SQSHRN_si,do_scalar_shift_imm_narrow,a,sqshrn_fns,MO_SIGN,true)7943 TRANS(SQSHRN_si, do_scalar_shift_imm_narrow, a, sqshrn_fns, MO_SIGN, true)
7944 TRANS(SQRSHRN_si, do_scalar_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN, true)
7945 TRANS(UQSHRN_si, do_scalar_shift_imm_narrow, a, uqshrn_fns, 0, false)
7946 TRANS(UQRSHRN_si, do_scalar_shift_imm_narrow, a, uqrshrn_fns, 0, false)
7947 TRANS(SQSHRUN_si, do_scalar_shift_imm_narrow, a, sqshrun_fns, MO_SIGN, false)
7948 TRANS(SQRSHRUN_si, do_scalar_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN, false)
7949 
7950 static bool do_div(DisasContext *s, arg_rrr_sf *a, bool is_signed)
7951 {
7952     TCGv_i64 tcg_n, tcg_m, tcg_rd;
7953     tcg_rd = cpu_reg(s, a->rd);
7954 
7955     if (!a->sf && is_signed) {
7956         tcg_n = tcg_temp_new_i64();
7957         tcg_m = tcg_temp_new_i64();
7958         tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, a->rn));
7959         tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, a->rm));
7960     } else {
7961         tcg_n = read_cpu_reg(s, a->rn, a->sf);
7962         tcg_m = read_cpu_reg(s, a->rm, a->sf);
7963     }
7964 
7965     if (is_signed) {
7966         gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
7967     } else {
7968         gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
7969     }
7970 
7971     if (!a->sf) { /* zero extend final result */
7972         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
7973     }
7974     return true;
7975 }
7976 
TRANS(SDIV,do_div,a,true)7977 TRANS(SDIV, do_div, a, true)
7978 TRANS(UDIV, do_div, a, false)
7979 
7980 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
7981  * Note that it is the caller's responsibility to ensure that the
7982  * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
7983  * mandated semantics for out of range shifts.
7984  */
7985 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
7986                       enum a64_shift_type shift_type, TCGv_i64 shift_amount)
7987 {
7988     switch (shift_type) {
7989     case A64_SHIFT_TYPE_LSL:
7990         tcg_gen_shl_i64(dst, src, shift_amount);
7991         break;
7992     case A64_SHIFT_TYPE_LSR:
7993         tcg_gen_shr_i64(dst, src, shift_amount);
7994         break;
7995     case A64_SHIFT_TYPE_ASR:
7996         if (!sf) {
7997             tcg_gen_ext32s_i64(dst, src);
7998         }
7999         tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
8000         break;
8001     case A64_SHIFT_TYPE_ROR:
8002         if (sf) {
8003             tcg_gen_rotr_i64(dst, src, shift_amount);
8004         } else {
8005             TCGv_i32 t0, t1;
8006             t0 = tcg_temp_new_i32();
8007             t1 = tcg_temp_new_i32();
8008             tcg_gen_extrl_i64_i32(t0, src);
8009             tcg_gen_extrl_i64_i32(t1, shift_amount);
8010             tcg_gen_rotr_i32(t0, t0, t1);
8011             tcg_gen_extu_i32_i64(dst, t0);
8012         }
8013         break;
8014     default:
8015         assert(FALSE); /* all shift types should be handled */
8016         break;
8017     }
8018 
8019     if (!sf) { /* zero extend final result */
8020         tcg_gen_ext32u_i64(dst, dst);
8021     }
8022 }
8023 
8024 /* Shift a TCGv src by immediate, put result in dst.
8025  * The shift amount must be in range (this should always be true as the
8026  * relevant instructions will UNDEF on bad shift immediates).
8027  */
shift_reg_imm(TCGv_i64 dst,TCGv_i64 src,int sf,enum a64_shift_type shift_type,unsigned int shift_i)8028 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
8029                           enum a64_shift_type shift_type, unsigned int shift_i)
8030 {
8031     assert(shift_i < (sf ? 64 : 32));
8032 
8033     if (shift_i == 0) {
8034         tcg_gen_mov_i64(dst, src);
8035     } else {
8036         shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i));
8037     }
8038 }
8039 
do_shift_reg(DisasContext * s,arg_rrr_sf * a,enum a64_shift_type shift_type)8040 static bool do_shift_reg(DisasContext *s, arg_rrr_sf *a,
8041                          enum a64_shift_type shift_type)
8042 {
8043     TCGv_i64 tcg_shift = tcg_temp_new_i64();
8044     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8045     TCGv_i64 tcg_rn = read_cpu_reg(s, a->rn, a->sf);
8046 
8047     tcg_gen_andi_i64(tcg_shift, cpu_reg(s, a->rm), a->sf ? 63 : 31);
8048     shift_reg(tcg_rd, tcg_rn, a->sf, shift_type, tcg_shift);
8049     return true;
8050 }
8051 
TRANS(LSLV,do_shift_reg,a,A64_SHIFT_TYPE_LSL)8052 TRANS(LSLV, do_shift_reg, a, A64_SHIFT_TYPE_LSL)
8053 TRANS(LSRV, do_shift_reg, a, A64_SHIFT_TYPE_LSR)
8054 TRANS(ASRV, do_shift_reg, a, A64_SHIFT_TYPE_ASR)
8055 TRANS(RORV, do_shift_reg, a, A64_SHIFT_TYPE_ROR)
8056 
8057 static bool do_crc32(DisasContext *s, arg_rrr_e *a, bool crc32c)
8058 {
8059     TCGv_i64 tcg_acc, tcg_val, tcg_rd;
8060     TCGv_i32 tcg_bytes;
8061 
8062     switch (a->esz) {
8063     case MO_8:
8064     case MO_16:
8065     case MO_32:
8066         tcg_val = tcg_temp_new_i64();
8067         tcg_gen_extract_i64(tcg_val, cpu_reg(s, a->rm), 0, 8 << a->esz);
8068         break;
8069     case MO_64:
8070         tcg_val = cpu_reg(s, a->rm);
8071         break;
8072     default:
8073         g_assert_not_reached();
8074     }
8075     tcg_acc = cpu_reg(s, a->rn);
8076     tcg_bytes = tcg_constant_i32(1 << a->esz);
8077     tcg_rd = cpu_reg(s, a->rd);
8078 
8079     if (crc32c) {
8080         gen_helper_crc32c_64(tcg_rd, tcg_acc, tcg_val, tcg_bytes);
8081     } else {
8082         gen_helper_crc32_64(tcg_rd, tcg_acc, tcg_val, tcg_bytes);
8083     }
8084     return true;
8085 }
8086 
TRANS_FEAT(CRC32,aa64_crc32,do_crc32,a,false)8087 TRANS_FEAT(CRC32, aa64_crc32, do_crc32, a, false)
8088 TRANS_FEAT(CRC32C, aa64_crc32, do_crc32, a, true)
8089 
8090 static bool do_subp(DisasContext *s, arg_rrr *a, bool setflag)
8091 {
8092     TCGv_i64 tcg_n = read_cpu_reg_sp(s, a->rn, true);
8093     TCGv_i64 tcg_m = read_cpu_reg_sp(s, a->rm, true);
8094     TCGv_i64 tcg_d = cpu_reg(s, a->rd);
8095 
8096     tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56);
8097     tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56);
8098 
8099     if (setflag) {
8100         gen_sub_CC(true, tcg_d, tcg_n, tcg_m);
8101     } else {
8102         tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m);
8103     }
8104     return true;
8105 }
8106 
TRANS_FEAT(SUBP,aa64_mte_insn_reg,do_subp,a,false)8107 TRANS_FEAT(SUBP, aa64_mte_insn_reg, do_subp, a, false)
8108 TRANS_FEAT(SUBPS, aa64_mte_insn_reg, do_subp, a, true)
8109 
8110 static bool trans_IRG(DisasContext *s, arg_rrr *a)
8111 {
8112     if (dc_isar_feature(aa64_mte_insn_reg, s)) {
8113         TCGv_i64 tcg_rd = cpu_reg_sp(s, a->rd);
8114         TCGv_i64 tcg_rn = cpu_reg_sp(s, a->rn);
8115 
8116         if (s->ata[0]) {
8117             gen_helper_irg(tcg_rd, tcg_env, tcg_rn, cpu_reg(s, a->rm));
8118         } else {
8119             gen_address_with_allocation_tag0(tcg_rd, tcg_rn);
8120         }
8121         return true;
8122     }
8123     return false;
8124 }
8125 
trans_GMI(DisasContext * s,arg_rrr * a)8126 static bool trans_GMI(DisasContext *s, arg_rrr *a)
8127 {
8128     if (dc_isar_feature(aa64_mte_insn_reg, s)) {
8129         TCGv_i64 t = tcg_temp_new_i64();
8130 
8131         tcg_gen_extract_i64(t, cpu_reg_sp(s, a->rn), 56, 4);
8132         tcg_gen_shl_i64(t, tcg_constant_i64(1), t);
8133         tcg_gen_or_i64(cpu_reg(s, a->rd), cpu_reg(s, a->rm), t);
8134         return true;
8135     }
8136     return false;
8137 }
8138 
trans_PACGA(DisasContext * s,arg_rrr * a)8139 static bool trans_PACGA(DisasContext *s, arg_rrr *a)
8140 {
8141     if (dc_isar_feature(aa64_pauth, s)) {
8142         gen_helper_pacga(cpu_reg(s, a->rd), tcg_env,
8143                          cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm));
8144         return true;
8145     }
8146     return false;
8147 }
8148 
8149 typedef void ArithOneOp(TCGv_i64, TCGv_i64);
8150 
gen_rr(DisasContext * s,int rd,int rn,ArithOneOp fn)8151 static bool gen_rr(DisasContext *s, int rd, int rn, ArithOneOp fn)
8152 {
8153     fn(cpu_reg(s, rd), cpu_reg(s, rn));
8154     return true;
8155 }
8156 
gen_rbit32(TCGv_i64 tcg_rd,TCGv_i64 tcg_rn)8157 static void gen_rbit32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8158 {
8159     TCGv_i32 t32 = tcg_temp_new_i32();
8160 
8161     tcg_gen_extrl_i64_i32(t32, tcg_rn);
8162     gen_helper_rbit(t32, t32);
8163     tcg_gen_extu_i32_i64(tcg_rd, t32);
8164 }
8165 
gen_rev16_xx(TCGv_i64 tcg_rd,TCGv_i64 tcg_rn,TCGv_i64 mask)8166 static void gen_rev16_xx(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 mask)
8167 {
8168     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
8169 
8170     tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
8171     tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
8172     tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
8173     tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
8174     tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
8175 }
8176 
gen_rev16_32(TCGv_i64 tcg_rd,TCGv_i64 tcg_rn)8177 static void gen_rev16_32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8178 {
8179     gen_rev16_xx(tcg_rd, tcg_rn, tcg_constant_i64(0x00ff00ff));
8180 }
8181 
gen_rev16_64(TCGv_i64 tcg_rd,TCGv_i64 tcg_rn)8182 static void gen_rev16_64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8183 {
8184     gen_rev16_xx(tcg_rd, tcg_rn, tcg_constant_i64(0x00ff00ff00ff00ffull));
8185 }
8186 
gen_rev_32(TCGv_i64 tcg_rd,TCGv_i64 tcg_rn)8187 static void gen_rev_32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8188 {
8189     tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ);
8190 }
8191 
gen_rev32(TCGv_i64 tcg_rd,TCGv_i64 tcg_rn)8192 static void gen_rev32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8193 {
8194     tcg_gen_bswap64_i64(tcg_rd, tcg_rn);
8195     tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32);
8196 }
8197 
8198 TRANS(RBIT, gen_rr, a->rd, a->rn, a->sf ? gen_helper_rbit64 : gen_rbit32)
8199 TRANS(REV16, gen_rr, a->rd, a->rn, a->sf ? gen_rev16_64 : gen_rev16_32)
8200 TRANS(REV32, gen_rr, a->rd, a->rn, a->sf ? gen_rev32 : gen_rev_32)
8201 TRANS(REV64, gen_rr, a->rd, a->rn, tcg_gen_bswap64_i64)
8202 
gen_clz32(TCGv_i64 tcg_rd,TCGv_i64 tcg_rn)8203 static void gen_clz32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8204 {
8205     TCGv_i32 t32 = tcg_temp_new_i32();
8206 
8207     tcg_gen_extrl_i64_i32(t32, tcg_rn);
8208     tcg_gen_clzi_i32(t32, t32, 32);
8209     tcg_gen_extu_i32_i64(tcg_rd, t32);
8210 }
8211 
gen_clz64(TCGv_i64 tcg_rd,TCGv_i64 tcg_rn)8212 static void gen_clz64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8213 {
8214     tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
8215 }
8216 
gen_cls32(TCGv_i64 tcg_rd,TCGv_i64 tcg_rn)8217 static void gen_cls32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8218 {
8219     TCGv_i32 t32 = tcg_temp_new_i32();
8220 
8221     tcg_gen_extrl_i64_i32(t32, tcg_rn);
8222     tcg_gen_clrsb_i32(t32, t32);
8223     tcg_gen_extu_i32_i64(tcg_rd, t32);
8224 }
8225 
8226 TRANS(CLZ, gen_rr, a->rd, a->rn, a->sf ? gen_clz64 : gen_clz32)
8227 TRANS(CLS, gen_rr, a->rd, a->rn, a->sf ? tcg_gen_clrsb_i64 : gen_cls32)
8228 
gen_pacaut(DisasContext * s,arg_pacaut * a,NeonGenTwo64OpEnvFn fn)8229 static bool gen_pacaut(DisasContext *s, arg_pacaut *a, NeonGenTwo64OpEnvFn fn)
8230 {
8231     TCGv_i64 tcg_rd, tcg_rn;
8232 
8233     if (a->z) {
8234         if (a->rn != 31) {
8235             return false;
8236         }
8237         tcg_rn = tcg_constant_i64(0);
8238     } else {
8239         tcg_rn = cpu_reg_sp(s, a->rn);
8240     }
8241     if (s->pauth_active) {
8242         tcg_rd = cpu_reg(s, a->rd);
8243         fn(tcg_rd, tcg_env, tcg_rd, tcg_rn);
8244     }
8245     return true;
8246 }
8247 
TRANS_FEAT(PACIA,aa64_pauth,gen_pacaut,a,gen_helper_pacia)8248 TRANS_FEAT(PACIA, aa64_pauth, gen_pacaut, a, gen_helper_pacia)
8249 TRANS_FEAT(PACIB, aa64_pauth, gen_pacaut, a, gen_helper_pacib)
8250 TRANS_FEAT(PACDA, aa64_pauth, gen_pacaut, a, gen_helper_pacda)
8251 TRANS_FEAT(PACDB, aa64_pauth, gen_pacaut, a, gen_helper_pacdb)
8252 
8253 TRANS_FEAT(AUTIA, aa64_pauth, gen_pacaut, a, gen_helper_autia)
8254 TRANS_FEAT(AUTIB, aa64_pauth, gen_pacaut, a, gen_helper_autib)
8255 TRANS_FEAT(AUTDA, aa64_pauth, gen_pacaut, a, gen_helper_autda)
8256 TRANS_FEAT(AUTDB, aa64_pauth, gen_pacaut, a, gen_helper_autdb)
8257 
8258 static bool do_xpac(DisasContext *s, int rd, NeonGenOne64OpEnvFn *fn)
8259 {
8260     if (s->pauth_active) {
8261         TCGv_i64 tcg_rd = cpu_reg(s, rd);
8262         fn(tcg_rd, tcg_env, tcg_rd);
8263     }
8264     return true;
8265 }
8266 
8267 TRANS_FEAT(XPACI, aa64_pauth, do_xpac, a->rd, gen_helper_xpaci)
8268 TRANS_FEAT(XPACD, aa64_pauth, do_xpac, a->rd, gen_helper_xpacd)
8269 
do_logic_reg(DisasContext * s,arg_logic_shift * a,ArithTwoOp * fn,ArithTwoOp * inv_fn,bool setflags)8270 static bool do_logic_reg(DisasContext *s, arg_logic_shift *a,
8271                          ArithTwoOp *fn, ArithTwoOp *inv_fn, bool setflags)
8272 {
8273     TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
8274 
8275     if (!a->sf && (a->sa & (1 << 5))) {
8276         return false;
8277     }
8278 
8279     tcg_rd = cpu_reg(s, a->rd);
8280     tcg_rn = cpu_reg(s, a->rn);
8281 
8282     tcg_rm = read_cpu_reg(s, a->rm, a->sf);
8283     if (a->sa) {
8284         shift_reg_imm(tcg_rm, tcg_rm, a->sf, a->st, a->sa);
8285     }
8286 
8287     (a->n ? inv_fn : fn)(tcg_rd, tcg_rn, tcg_rm);
8288     if (!a->sf) {
8289         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
8290     }
8291     if (setflags) {
8292         gen_logic_CC(a->sf, tcg_rd);
8293     }
8294     return true;
8295 }
8296 
trans_ORR_r(DisasContext * s,arg_logic_shift * a)8297 static bool trans_ORR_r(DisasContext *s, arg_logic_shift *a)
8298 {
8299     /*
8300      * Unshifted ORR and ORN with WZR/XZR is the standard encoding for
8301      * register-register MOV and MVN, so it is worth special casing.
8302      */
8303     if (a->sa == 0 && a->st == 0 && a->rn == 31) {
8304         TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8305         TCGv_i64 tcg_rm = cpu_reg(s, a->rm);
8306 
8307         if (a->n) {
8308             tcg_gen_not_i64(tcg_rd, tcg_rm);
8309             if (!a->sf) {
8310                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
8311             }
8312         } else {
8313             if (a->sf) {
8314                 tcg_gen_mov_i64(tcg_rd, tcg_rm);
8315             } else {
8316                 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
8317             }
8318         }
8319         return true;
8320     }
8321 
8322     return do_logic_reg(s, a, tcg_gen_or_i64, tcg_gen_orc_i64, false);
8323 }
8324 
TRANS(AND_r,do_logic_reg,a,tcg_gen_and_i64,tcg_gen_andc_i64,false)8325 TRANS(AND_r, do_logic_reg, a, tcg_gen_and_i64, tcg_gen_andc_i64, false)
8326 TRANS(ANDS_r, do_logic_reg, a, tcg_gen_and_i64, tcg_gen_andc_i64, true)
8327 TRANS(EOR_r, do_logic_reg, a, tcg_gen_xor_i64, tcg_gen_eqv_i64, false)
8328 
8329 static bool do_addsub_ext(DisasContext *s, arg_addsub_ext *a,
8330                           bool sub_op, bool setflags)
8331 {
8332     TCGv_i64 tcg_rm, tcg_rn, tcg_rd, tcg_result;
8333 
8334     if (a->sa > 4) {
8335         return false;
8336     }
8337 
8338     /* non-flag setting ops may use SP */
8339     if (!setflags) {
8340         tcg_rd = cpu_reg_sp(s, a->rd);
8341     } else {
8342         tcg_rd = cpu_reg(s, a->rd);
8343     }
8344     tcg_rn = read_cpu_reg_sp(s, a->rn, a->sf);
8345 
8346     tcg_rm = read_cpu_reg(s, a->rm, a->sf);
8347     ext_and_shift_reg(tcg_rm, tcg_rm, a->st, a->sa);
8348 
8349     tcg_result = tcg_temp_new_i64();
8350     if (!setflags) {
8351         if (sub_op) {
8352             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
8353         } else {
8354             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
8355         }
8356     } else {
8357         if (sub_op) {
8358             gen_sub_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
8359         } else {
8360             gen_add_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
8361         }
8362     }
8363 
8364     if (a->sf) {
8365         tcg_gen_mov_i64(tcg_rd, tcg_result);
8366     } else {
8367         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
8368     }
8369     return true;
8370 }
8371 
TRANS(ADD_ext,do_addsub_ext,a,false,false)8372 TRANS(ADD_ext, do_addsub_ext, a, false, false)
8373 TRANS(SUB_ext, do_addsub_ext, a, true, false)
8374 TRANS(ADDS_ext, do_addsub_ext, a, false, true)
8375 TRANS(SUBS_ext, do_addsub_ext, a, true, true)
8376 
8377 static bool do_addsub_reg(DisasContext *s, arg_addsub_shift *a,
8378                           bool sub_op, bool setflags)
8379 {
8380     TCGv_i64 tcg_rd, tcg_rn, tcg_rm, tcg_result;
8381 
8382     if (a->st == 3 || (!a->sf && (a->sa & 32))) {
8383         return false;
8384     }
8385 
8386     tcg_rd = cpu_reg(s, a->rd);
8387     tcg_rn = read_cpu_reg(s, a->rn, a->sf);
8388     tcg_rm = read_cpu_reg(s, a->rm, a->sf);
8389 
8390     shift_reg_imm(tcg_rm, tcg_rm, a->sf, a->st, a->sa);
8391 
8392     tcg_result = tcg_temp_new_i64();
8393     if (!setflags) {
8394         if (sub_op) {
8395             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
8396         } else {
8397             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
8398         }
8399     } else {
8400         if (sub_op) {
8401             gen_sub_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
8402         } else {
8403             gen_add_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
8404         }
8405     }
8406 
8407     if (a->sf) {
8408         tcg_gen_mov_i64(tcg_rd, tcg_result);
8409     } else {
8410         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
8411     }
8412     return true;
8413 }
8414 
TRANS(ADD_r,do_addsub_reg,a,false,false)8415 TRANS(ADD_r, do_addsub_reg, a, false, false)
8416 TRANS(SUB_r, do_addsub_reg, a, true, false)
8417 TRANS(ADDS_r, do_addsub_reg, a, false, true)
8418 TRANS(SUBS_r, do_addsub_reg, a, true, true)
8419 
8420 static bool do_mulh(DisasContext *s, arg_rrr *a,
8421                     void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
8422 {
8423     TCGv_i64 discard = tcg_temp_new_i64();
8424     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8425     TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
8426     TCGv_i64 tcg_rm = cpu_reg(s, a->rm);
8427 
8428     fn(discard, tcg_rd, tcg_rn, tcg_rm);
8429     return true;
8430 }
8431 
TRANS(SMULH,do_mulh,a,tcg_gen_muls2_i64)8432 TRANS(SMULH, do_mulh, a, tcg_gen_muls2_i64)
8433 TRANS(UMULH, do_mulh, a, tcg_gen_mulu2_i64)
8434 
8435 static bool do_muladd(DisasContext *s, arg_rrrr *a,
8436                       bool sf, bool is_sub, MemOp mop)
8437 {
8438     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8439     TCGv_i64 tcg_op1, tcg_op2;
8440 
8441     if (mop == MO_64) {
8442         tcg_op1 = cpu_reg(s, a->rn);
8443         tcg_op2 = cpu_reg(s, a->rm);
8444     } else {
8445         tcg_op1 = tcg_temp_new_i64();
8446         tcg_op2 = tcg_temp_new_i64();
8447         tcg_gen_ext_i64(tcg_op1, cpu_reg(s, a->rn), mop);
8448         tcg_gen_ext_i64(tcg_op2, cpu_reg(s, a->rm), mop);
8449     }
8450 
8451     if (a->ra == 31 && !is_sub) {
8452         /* Special-case MADD with rA == XZR; it is the standard MUL alias */
8453         tcg_gen_mul_i64(tcg_rd, tcg_op1, tcg_op2);
8454     } else {
8455         TCGv_i64 tcg_tmp = tcg_temp_new_i64();
8456         TCGv_i64 tcg_ra = cpu_reg(s, a->ra);
8457 
8458         tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
8459         if (is_sub) {
8460             tcg_gen_sub_i64(tcg_rd, tcg_ra, tcg_tmp);
8461         } else {
8462             tcg_gen_add_i64(tcg_rd, tcg_ra, tcg_tmp);
8463         }
8464     }
8465 
8466     if (!sf) {
8467         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
8468     }
8469     return true;
8470 }
8471 
TRANS(MADD_w,do_muladd,a,false,false,MO_64)8472 TRANS(MADD_w, do_muladd, a, false, false, MO_64)
8473 TRANS(MSUB_w, do_muladd, a, false, true, MO_64)
8474 TRANS(MADD_x, do_muladd, a, true, false, MO_64)
8475 TRANS(MSUB_x, do_muladd, a, true, true, MO_64)
8476 
8477 TRANS(SMADDL, do_muladd, a, true, false, MO_SL)
8478 TRANS(SMSUBL, do_muladd, a, true, true, MO_SL)
8479 TRANS(UMADDL, do_muladd, a, true, false, MO_UL)
8480 TRANS(UMSUBL, do_muladd, a, true, true, MO_UL)
8481 
8482 static bool do_adc_sbc(DisasContext *s, arg_rrr_sf *a,
8483                        bool is_sub, bool setflags)
8484 {
8485     TCGv_i64 tcg_y, tcg_rn, tcg_rd;
8486 
8487     tcg_rd = cpu_reg(s, a->rd);
8488     tcg_rn = cpu_reg(s, a->rn);
8489 
8490     if (is_sub) {
8491         tcg_y = tcg_temp_new_i64();
8492         tcg_gen_not_i64(tcg_y, cpu_reg(s, a->rm));
8493     } else {
8494         tcg_y = cpu_reg(s, a->rm);
8495     }
8496 
8497     if (setflags) {
8498         gen_adc_CC(a->sf, tcg_rd, tcg_rn, tcg_y);
8499     } else {
8500         gen_adc(a->sf, tcg_rd, tcg_rn, tcg_y);
8501     }
8502     return true;
8503 }
8504 
TRANS(ADC,do_adc_sbc,a,false,false)8505 TRANS(ADC, do_adc_sbc, a, false, false)
8506 TRANS(SBC, do_adc_sbc, a, true, false)
8507 TRANS(ADCS, do_adc_sbc, a, false, true)
8508 TRANS(SBCS, do_adc_sbc, a, true, true)
8509 
8510 static bool trans_RMIF(DisasContext *s, arg_RMIF *a)
8511 {
8512     int mask = a->mask;
8513     TCGv_i64 tcg_rn;
8514     TCGv_i32 nzcv;
8515 
8516     if (!dc_isar_feature(aa64_condm_4, s)) {
8517         return false;
8518     }
8519 
8520     tcg_rn = read_cpu_reg(s, a->rn, 1);
8521     tcg_gen_rotri_i64(tcg_rn, tcg_rn, a->imm);
8522 
8523     nzcv = tcg_temp_new_i32();
8524     tcg_gen_extrl_i64_i32(nzcv, tcg_rn);
8525 
8526     if (mask & 8) { /* N */
8527         tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3);
8528     }
8529     if (mask & 4) { /* Z */
8530         tcg_gen_not_i32(cpu_ZF, nzcv);
8531         tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4);
8532     }
8533     if (mask & 2) { /* C */
8534         tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1);
8535     }
8536     if (mask & 1) { /* V */
8537         tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0);
8538     }
8539     return true;
8540 }
8541 
do_setf(DisasContext * s,int rn,int shift)8542 static bool do_setf(DisasContext *s, int rn, int shift)
8543 {
8544     TCGv_i32 tmp = tcg_temp_new_i32();
8545 
8546     tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn));
8547     tcg_gen_shli_i32(cpu_NF, tmp, shift);
8548     tcg_gen_shli_i32(cpu_VF, tmp, shift - 1);
8549     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
8550     tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF);
8551     return true;
8552 }
8553 
8554 TRANS_FEAT(SETF8, aa64_condm_4, do_setf, a->rn, 24)
8555 TRANS_FEAT(SETF16, aa64_condm_4, do_setf, a->rn, 16)
8556 
8557 /* CCMP, CCMN */
trans_CCMP(DisasContext * s,arg_CCMP * a)8558 static bool trans_CCMP(DisasContext *s, arg_CCMP *a)
8559 {
8560     TCGv_i32 tcg_t0 = tcg_temp_new_i32();
8561     TCGv_i32 tcg_t1 = tcg_temp_new_i32();
8562     TCGv_i32 tcg_t2 = tcg_temp_new_i32();
8563     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
8564     TCGv_i64 tcg_rn, tcg_y;
8565     DisasCompare c;
8566     unsigned nzcv;
8567     bool has_andc;
8568 
8569     /* Set T0 = !COND.  */
8570     arm_test_cc(&c, a->cond);
8571     tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
8572 
8573     /* Load the arguments for the new comparison.  */
8574     if (a->imm) {
8575         tcg_y = tcg_constant_i64(a->y);
8576     } else {
8577         tcg_y = cpu_reg(s, a->y);
8578     }
8579     tcg_rn = cpu_reg(s, a->rn);
8580 
8581     /* Set the flags for the new comparison.  */
8582     if (a->op) {
8583         gen_sub_CC(a->sf, tcg_tmp, tcg_rn, tcg_y);
8584     } else {
8585         gen_add_CC(a->sf, tcg_tmp, tcg_rn, tcg_y);
8586     }
8587 
8588     /*
8589      * If COND was false, force the flags to #nzcv.  Compute two masks
8590      * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
8591      * For tcg hosts that support ANDC, we can make do with just T1.
8592      * In either case, allow the tcg optimizer to delete any unused mask.
8593      */
8594     tcg_gen_neg_i32(tcg_t1, tcg_t0);
8595     tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
8596 
8597     nzcv = a->nzcv;
8598     has_andc = tcg_op_supported(INDEX_op_andc, TCG_TYPE_I32, 0);
8599     if (nzcv & 8) { /* N */
8600         tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
8601     } else {
8602         if (has_andc) {
8603             tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
8604         } else {
8605             tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
8606         }
8607     }
8608     if (nzcv & 4) { /* Z */
8609         if (has_andc) {
8610             tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
8611         } else {
8612             tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
8613         }
8614     } else {
8615         tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
8616     }
8617     if (nzcv & 2) { /* C */
8618         tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
8619     } else {
8620         if (has_andc) {
8621             tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
8622         } else {
8623             tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
8624         }
8625     }
8626     if (nzcv & 1) { /* V */
8627         tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
8628     } else {
8629         if (has_andc) {
8630             tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
8631         } else {
8632             tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
8633         }
8634     }
8635     return true;
8636 }
8637 
trans_CSEL(DisasContext * s,arg_CSEL * a)8638 static bool trans_CSEL(DisasContext *s, arg_CSEL *a)
8639 {
8640     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8641     TCGv_i64 zero = tcg_constant_i64(0);
8642     DisasCompare64 c;
8643 
8644     a64_test_cc(&c, a->cond);
8645 
8646     if (a->rn == 31 && a->rm == 31 && (a->else_inc ^ a->else_inv)) {
8647         /* CSET & CSETM.  */
8648         if (a->else_inv) {
8649             tcg_gen_negsetcond_i64(tcg_invert_cond(c.cond),
8650                                    tcg_rd, c.value, zero);
8651         } else {
8652             tcg_gen_setcond_i64(tcg_invert_cond(c.cond),
8653                                 tcg_rd, c.value, zero);
8654         }
8655     } else {
8656         TCGv_i64 t_true = cpu_reg(s, a->rn);
8657         TCGv_i64 t_false = read_cpu_reg(s, a->rm, 1);
8658 
8659         if (a->else_inv && a->else_inc) {
8660             tcg_gen_neg_i64(t_false, t_false);
8661         } else if (a->else_inv) {
8662             tcg_gen_not_i64(t_false, t_false);
8663         } else if (a->else_inc) {
8664             tcg_gen_addi_i64(t_false, t_false, 1);
8665         }
8666         tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
8667     }
8668 
8669     if (!a->sf) {
8670         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
8671     }
8672     return true;
8673 }
8674 
8675 typedef struct FPScalar1Int {
8676     void (*gen_h)(TCGv_i32, TCGv_i32);
8677     void (*gen_s)(TCGv_i32, TCGv_i32);
8678     void (*gen_d)(TCGv_i64, TCGv_i64);
8679 } FPScalar1Int;
8680 
do_fp1_scalar_int(DisasContext * s,arg_rr_e * a,const FPScalar1Int * f,bool merging)8681 static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a,
8682                               const FPScalar1Int *f,
8683                               bool merging)
8684 {
8685     switch (a->esz) {
8686     case MO_64:
8687         if (fp_access_check(s)) {
8688             TCGv_i64 t = read_fp_dreg(s, a->rn);
8689             f->gen_d(t, t);
8690             if (merging) {
8691                 write_fp_dreg_merging(s, a->rd, a->rd, t);
8692             } else {
8693                 write_fp_dreg(s, a->rd, t);
8694             }
8695         }
8696         break;
8697     case MO_32:
8698         if (fp_access_check(s)) {
8699             TCGv_i32 t = read_fp_sreg(s, a->rn);
8700             f->gen_s(t, t);
8701             if (merging) {
8702                 write_fp_sreg_merging(s, a->rd, a->rd, t);
8703             } else {
8704                 write_fp_sreg(s, a->rd, t);
8705             }
8706         }
8707         break;
8708     case MO_16:
8709         if (!dc_isar_feature(aa64_fp16, s)) {
8710             return false;
8711         }
8712         if (fp_access_check(s)) {
8713             TCGv_i32 t = read_fp_hreg(s, a->rn);
8714             f->gen_h(t, t);
8715             if (merging) {
8716                 write_fp_hreg_merging(s, a->rd, a->rd, t);
8717             } else {
8718                 write_fp_sreg(s, a->rd, t);
8719             }
8720         }
8721         break;
8722     default:
8723         return false;
8724     }
8725     return true;
8726 }
8727 
do_fp1_scalar_int_2fn(DisasContext * s,arg_rr_e * a,const FPScalar1Int * fnormal,const FPScalar1Int * fah)8728 static bool do_fp1_scalar_int_2fn(DisasContext *s, arg_rr_e *a,
8729                                   const FPScalar1Int *fnormal,
8730                                   const FPScalar1Int *fah)
8731 {
8732     return do_fp1_scalar_int(s, a, s->fpcr_ah ? fah : fnormal, true);
8733 }
8734 
8735 static const FPScalar1Int f_scalar_fmov = {
8736     tcg_gen_mov_i32,
8737     tcg_gen_mov_i32,
8738     tcg_gen_mov_i64,
8739 };
8740 TRANS(FMOV_s, do_fp1_scalar_int, a, &f_scalar_fmov, false)
8741 
8742 static const FPScalar1Int f_scalar_fabs = {
8743     gen_vfp_absh,
8744     gen_vfp_abss,
8745     gen_vfp_absd,
8746 };
8747 static const FPScalar1Int f_scalar_ah_fabs = {
8748     gen_vfp_ah_absh,
8749     gen_vfp_ah_abss,
8750     gen_vfp_ah_absd,
8751 };
8752 TRANS(FABS_s, do_fp1_scalar_int_2fn, a, &f_scalar_fabs, &f_scalar_ah_fabs)
8753 
8754 static const FPScalar1Int f_scalar_fneg = {
8755     gen_vfp_negh,
8756     gen_vfp_negs,
8757     gen_vfp_negd,
8758 };
8759 static const FPScalar1Int f_scalar_ah_fneg = {
8760     gen_vfp_ah_negh,
8761     gen_vfp_ah_negs,
8762     gen_vfp_ah_negd,
8763 };
8764 TRANS(FNEG_s, do_fp1_scalar_int_2fn, a, &f_scalar_fneg, &f_scalar_ah_fneg)
8765 
8766 typedef struct FPScalar1 {
8767     void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_ptr);
8768     void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_ptr);
8769     void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_ptr);
8770 } FPScalar1;
8771 
do_fp1_scalar_with_fpsttype(DisasContext * s,arg_rr_e * a,const FPScalar1 * f,int rmode,ARMFPStatusFlavour fpsttype)8772 static bool do_fp1_scalar_with_fpsttype(DisasContext *s, arg_rr_e *a,
8773                                         const FPScalar1 *f, int rmode,
8774                                         ARMFPStatusFlavour fpsttype)
8775 {
8776     TCGv_i32 tcg_rmode = NULL;
8777     TCGv_ptr fpst;
8778     TCGv_i64 t64;
8779     TCGv_i32 t32;
8780     int check = fp_access_check_scalar_hsd(s, a->esz);
8781 
8782     if (check <= 0) {
8783         return check == 0;
8784     }
8785 
8786     fpst = fpstatus_ptr(fpsttype);
8787     if (rmode >= 0) {
8788         tcg_rmode = gen_set_rmode(rmode, fpst);
8789     }
8790 
8791     switch (a->esz) {
8792     case MO_64:
8793         t64 = read_fp_dreg(s, a->rn);
8794         f->gen_d(t64, t64, fpst);
8795         write_fp_dreg_merging(s, a->rd, a->rd, t64);
8796         break;
8797     case MO_32:
8798         t32 = read_fp_sreg(s, a->rn);
8799         f->gen_s(t32, t32, fpst);
8800         write_fp_sreg_merging(s, a->rd, a->rd, t32);
8801         break;
8802     case MO_16:
8803         t32 = read_fp_hreg(s, a->rn);
8804         f->gen_h(t32, t32, fpst);
8805         write_fp_hreg_merging(s, a->rd, a->rd, t32);
8806         break;
8807     default:
8808         g_assert_not_reached();
8809     }
8810 
8811     if (rmode >= 0) {
8812         gen_restore_rmode(tcg_rmode, fpst);
8813     }
8814     return true;
8815 }
8816 
do_fp1_scalar(DisasContext * s,arg_rr_e * a,const FPScalar1 * f,int rmode)8817 static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a,
8818                           const FPScalar1 *f, int rmode)
8819 {
8820     return do_fp1_scalar_with_fpsttype(s, a, f, rmode,
8821                                        a->esz == MO_16 ?
8822                                        FPST_A64_F16 : FPST_A64);
8823 }
8824 
do_fp1_scalar_ah(DisasContext * s,arg_rr_e * a,const FPScalar1 * f,int rmode)8825 static bool do_fp1_scalar_ah(DisasContext *s, arg_rr_e *a,
8826                              const FPScalar1 *f, int rmode)
8827 {
8828     return do_fp1_scalar_with_fpsttype(s, a, f, rmode, select_ah_fpst(s, a->esz));
8829 }
8830 
8831 static const FPScalar1 f_scalar_fsqrt = {
8832     gen_helper_vfp_sqrth,
8833     gen_helper_vfp_sqrts,
8834     gen_helper_vfp_sqrtd,
8835 };
8836 TRANS(FSQRT_s, do_fp1_scalar, a, &f_scalar_fsqrt, -1)
8837 
8838 static const FPScalar1 f_scalar_frint = {
8839     gen_helper_advsimd_rinth,
8840     gen_helper_rints,
8841     gen_helper_rintd,
8842 };
8843 TRANS(FRINTN_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_TIEEVEN)
8844 TRANS(FRINTP_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_POSINF)
8845 TRANS(FRINTM_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_NEGINF)
8846 TRANS(FRINTZ_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_ZERO)
8847 TRANS(FRINTA_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_TIEAWAY)
8848 TRANS(FRINTI_s, do_fp1_scalar, a, &f_scalar_frint, -1)
8849 
8850 static const FPScalar1 f_scalar_frintx = {
8851     gen_helper_advsimd_rinth_exact,
8852     gen_helper_rints_exact,
8853     gen_helper_rintd_exact,
8854 };
8855 TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1)
8856 
trans_BFCVT_s(DisasContext * s,arg_rr_e * a)8857 static bool trans_BFCVT_s(DisasContext *s, arg_rr_e *a)
8858 {
8859     ARMFPStatusFlavour fpsttype = s->fpcr_ah ? FPST_AH : FPST_A64;
8860     TCGv_i32 t32;
8861     int check;
8862 
8863     if (!dc_isar_feature(aa64_bf16, s)) {
8864         return false;
8865     }
8866 
8867     check = fp_access_check_scalar_hsd(s, a->esz);
8868 
8869     if (check <= 0) {
8870         return check == 0;
8871     }
8872 
8873     t32 = read_fp_sreg(s, a->rn);
8874     gen_helper_bfcvt(t32, t32, fpstatus_ptr(fpsttype));
8875     write_fp_hreg_merging(s, a->rd, a->rd, t32);
8876     return true;
8877 }
8878 
8879 static const FPScalar1 f_scalar_frint32 = {
8880     NULL,
8881     gen_helper_frint32_s,
8882     gen_helper_frint32_d,
8883 };
8884 TRANS_FEAT(FRINT32Z_s, aa64_frint, do_fp1_scalar, a,
8885            &f_scalar_frint32, FPROUNDING_ZERO)
8886 TRANS_FEAT(FRINT32X_s, aa64_frint, do_fp1_scalar, a, &f_scalar_frint32, -1)
8887 
8888 static const FPScalar1 f_scalar_frint64 = {
8889     NULL,
8890     gen_helper_frint64_s,
8891     gen_helper_frint64_d,
8892 };
8893 TRANS_FEAT(FRINT64Z_s, aa64_frint, do_fp1_scalar, a,
8894            &f_scalar_frint64, FPROUNDING_ZERO)
8895 TRANS_FEAT(FRINT64X_s, aa64_frint, do_fp1_scalar, a, &f_scalar_frint64, -1)
8896 
8897 static const FPScalar1 f_scalar_frecpe = {
8898     gen_helper_recpe_f16,
8899     gen_helper_recpe_f32,
8900     gen_helper_recpe_f64,
8901 };
8902 static const FPScalar1 f_scalar_frecpe_rpres = {
8903     gen_helper_recpe_f16,
8904     gen_helper_recpe_rpres_f32,
8905     gen_helper_recpe_f64,
8906 };
8907 TRANS(FRECPE_s, do_fp1_scalar_ah, a,
8908       s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ?
8909       &f_scalar_frecpe_rpres : &f_scalar_frecpe, -1)
8910 
8911 static const FPScalar1 f_scalar_frecpx = {
8912     gen_helper_frecpx_f16,
8913     gen_helper_frecpx_f32,
8914     gen_helper_frecpx_f64,
8915 };
8916 TRANS(FRECPX_s, do_fp1_scalar_ah, a, &f_scalar_frecpx, -1)
8917 
8918 static const FPScalar1 f_scalar_frsqrte = {
8919     gen_helper_rsqrte_f16,
8920     gen_helper_rsqrte_f32,
8921     gen_helper_rsqrte_f64,
8922 };
8923 static const FPScalar1 f_scalar_frsqrte_rpres = {
8924     gen_helper_rsqrte_f16,
8925     gen_helper_rsqrte_rpres_f32,
8926     gen_helper_rsqrte_f64,
8927 };
8928 TRANS(FRSQRTE_s, do_fp1_scalar_ah, a,
8929       s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ?
8930       &f_scalar_frsqrte_rpres : &f_scalar_frsqrte, -1)
8931 
trans_FCVT_s_ds(DisasContext * s,arg_rr * a)8932 static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a)
8933 {
8934     if (fp_access_check(s)) {
8935         TCGv_i32 tcg_rn = read_fp_sreg(s, a->rn);
8936         TCGv_i64 tcg_rd = tcg_temp_new_i64();
8937         TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
8938 
8939         gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, fpst);
8940         write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd);
8941     }
8942     return true;
8943 }
8944 
trans_FCVT_s_hs(DisasContext * s,arg_rr * a)8945 static bool trans_FCVT_s_hs(DisasContext *s, arg_rr *a)
8946 {
8947     if (fp_access_check(s)) {
8948         TCGv_i32 tmp = read_fp_sreg(s, a->rn);
8949         TCGv_i32 ahp = get_ahp_flag();
8950         TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
8951 
8952         gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
8953         /* write_fp_hreg_merging is OK here because top half of result is zero */
8954         write_fp_hreg_merging(s, a->rd, a->rd, tmp);
8955     }
8956     return true;
8957 }
8958 
trans_FCVT_s_sd(DisasContext * s,arg_rr * a)8959 static bool trans_FCVT_s_sd(DisasContext *s, arg_rr *a)
8960 {
8961     if (fp_access_check(s)) {
8962         TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn);
8963         TCGv_i32 tcg_rd = tcg_temp_new_i32();
8964         TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
8965 
8966         gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, fpst);
8967         write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd);
8968     }
8969     return true;
8970 }
8971 
trans_FCVT_s_hd(DisasContext * s,arg_rr * a)8972 static bool trans_FCVT_s_hd(DisasContext *s, arg_rr *a)
8973 {
8974     if (fp_access_check(s)) {
8975         TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn);
8976         TCGv_i32 tcg_rd = tcg_temp_new_i32();
8977         TCGv_i32 ahp = get_ahp_flag();
8978         TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
8979 
8980         gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
8981         /* write_fp_hreg_merging is OK here because top half of tcg_rd is zero */
8982         write_fp_hreg_merging(s, a->rd, a->rd, tcg_rd);
8983     }
8984     return true;
8985 }
8986 
trans_FCVT_s_sh(DisasContext * s,arg_rr * a)8987 static bool trans_FCVT_s_sh(DisasContext *s, arg_rr *a)
8988 {
8989     if (fp_access_check(s)) {
8990         TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn);
8991         TCGv_i32 tcg_rd = tcg_temp_new_i32();
8992         TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16);
8993         TCGv_i32 tcg_ahp = get_ahp_flag();
8994 
8995         gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
8996         write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd);
8997     }
8998     return true;
8999 }
9000 
trans_FCVT_s_dh(DisasContext * s,arg_rr * a)9001 static bool trans_FCVT_s_dh(DisasContext *s, arg_rr *a)
9002 {
9003     if (fp_access_check(s)) {
9004         TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn);
9005         TCGv_i64 tcg_rd = tcg_temp_new_i64();
9006         TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16);
9007         TCGv_i32 tcg_ahp = get_ahp_flag();
9008 
9009         gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
9010         write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd);
9011     }
9012     return true;
9013 }
9014 
do_cvtf_scalar(DisasContext * s,MemOp esz,int rd,int shift,TCGv_i64 tcg_int,bool is_signed)9015 static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift,
9016                            TCGv_i64 tcg_int, bool is_signed)
9017 {
9018     TCGv_ptr tcg_fpstatus;
9019     TCGv_i32 tcg_shift, tcg_single;
9020     TCGv_i64 tcg_double;
9021 
9022     tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
9023     tcg_shift = tcg_constant_i32(shift);
9024 
9025     switch (esz) {
9026     case MO_64:
9027         tcg_double = tcg_temp_new_i64();
9028         if (is_signed) {
9029             gen_helper_vfp_sqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus);
9030         } else {
9031             gen_helper_vfp_uqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus);
9032         }
9033         write_fp_dreg_merging(s, rd, rd, tcg_double);
9034         break;
9035 
9036     case MO_32:
9037         tcg_single = tcg_temp_new_i32();
9038         if (is_signed) {
9039             gen_helper_vfp_sqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
9040         } else {
9041             gen_helper_vfp_uqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
9042         }
9043         write_fp_sreg_merging(s, rd, rd, tcg_single);
9044         break;
9045 
9046     case MO_16:
9047         tcg_single = tcg_temp_new_i32();
9048         if (is_signed) {
9049             gen_helper_vfp_sqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
9050         } else {
9051             gen_helper_vfp_uqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
9052         }
9053         write_fp_hreg_merging(s, rd, rd, tcg_single);
9054         break;
9055 
9056     default:
9057         g_assert_not_reached();
9058     }
9059     return true;
9060 }
9061 
do_cvtf_g(DisasContext * s,arg_fcvt * a,bool is_signed)9062 static bool do_cvtf_g(DisasContext *s, arg_fcvt *a, bool is_signed)
9063 {
9064     TCGv_i64 tcg_int;
9065     int check = fp_access_check_scalar_hsd(s, a->esz);
9066 
9067     if (check <= 0) {
9068         return check == 0;
9069     }
9070 
9071     if (a->sf) {
9072         tcg_int = cpu_reg(s, a->rn);
9073     } else {
9074         tcg_int = read_cpu_reg(s, a->rn, true);
9075         if (is_signed) {
9076             tcg_gen_ext32s_i64(tcg_int, tcg_int);
9077         } else {
9078             tcg_gen_ext32u_i64(tcg_int, tcg_int);
9079         }
9080     }
9081     return do_cvtf_scalar(s, a->esz, a->rd, a->shift, tcg_int, is_signed);
9082 }
9083 
TRANS(SCVTF_g,do_cvtf_g,a,true)9084 TRANS(SCVTF_g, do_cvtf_g, a, true)
9085 TRANS(UCVTF_g, do_cvtf_g, a, false)
9086 
9087 /*
9088  * [US]CVTF (vector), scalar version.
9089  * Which sounds weird, but really just means input from fp register
9090  * instead of input from general register.  Input and output element
9091  * size are always equal.
9092  */
9093 static bool do_cvtf_f(DisasContext *s, arg_fcvt *a, bool is_signed)
9094 {
9095     TCGv_i64 tcg_int;
9096     int check = fp_access_check_scalar_hsd(s, a->esz);
9097 
9098     if (check <= 0) {
9099         return check == 0;
9100     }
9101 
9102     tcg_int = tcg_temp_new_i64();
9103     read_vec_element(s, tcg_int, a->rn, 0, a->esz | (is_signed ? MO_SIGN : 0));
9104     return do_cvtf_scalar(s, a->esz, a->rd, a->shift, tcg_int, is_signed);
9105 }
9106 
TRANS(SCVTF_f,do_cvtf_f,a,true)9107 TRANS(SCVTF_f, do_cvtf_f, a, true)
9108 TRANS(UCVTF_f, do_cvtf_f, a, false)
9109 
9110 static void do_fcvt_scalar(DisasContext *s, MemOp out, MemOp esz,
9111                            TCGv_i64 tcg_out, int shift, int rn,
9112                            ARMFPRounding rmode)
9113 {
9114     TCGv_ptr tcg_fpstatus;
9115     TCGv_i32 tcg_shift, tcg_rmode, tcg_single;
9116 
9117     tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
9118     tcg_shift = tcg_constant_i32(shift);
9119     tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
9120 
9121     switch (esz) {
9122     case MO_64:
9123         read_vec_element(s, tcg_out, rn, 0, MO_64);
9124         switch (out) {
9125         case MO_64 | MO_SIGN:
9126             gen_helper_vfp_tosqd(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
9127             break;
9128         case MO_64:
9129             gen_helper_vfp_touqd(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
9130             break;
9131         case MO_32 | MO_SIGN:
9132             gen_helper_vfp_tosld(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
9133             break;
9134         case MO_32:
9135             gen_helper_vfp_tould(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
9136             break;
9137         default:
9138             g_assert_not_reached();
9139         }
9140         break;
9141 
9142     case MO_32:
9143         tcg_single = read_fp_sreg(s, rn);
9144         switch (out) {
9145         case MO_64 | MO_SIGN:
9146             gen_helper_vfp_tosqs(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
9147             break;
9148         case MO_64:
9149             gen_helper_vfp_touqs(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
9150             break;
9151         case MO_32 | MO_SIGN:
9152             gen_helper_vfp_tosls(tcg_single, tcg_single,
9153                                  tcg_shift, tcg_fpstatus);
9154             tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9155             break;
9156         case MO_32:
9157             gen_helper_vfp_touls(tcg_single, tcg_single,
9158                                  tcg_shift, tcg_fpstatus);
9159             tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9160             break;
9161         default:
9162             g_assert_not_reached();
9163         }
9164         break;
9165 
9166     case MO_16:
9167         tcg_single = read_fp_hreg(s, rn);
9168         switch (out) {
9169         case MO_64 | MO_SIGN:
9170             gen_helper_vfp_tosqh(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
9171             break;
9172         case MO_64:
9173             gen_helper_vfp_touqh(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
9174             break;
9175         case MO_32 | MO_SIGN:
9176             gen_helper_vfp_toslh(tcg_single, tcg_single,
9177                                  tcg_shift, tcg_fpstatus);
9178             tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9179             break;
9180         case MO_32:
9181             gen_helper_vfp_toulh(tcg_single, tcg_single,
9182                                  tcg_shift, tcg_fpstatus);
9183             tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9184             break;
9185         case MO_16 | MO_SIGN:
9186             gen_helper_vfp_toshh(tcg_single, tcg_single,
9187                                  tcg_shift, tcg_fpstatus);
9188             tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9189             break;
9190         case MO_16:
9191             gen_helper_vfp_touhh(tcg_single, tcg_single,
9192                                  tcg_shift, tcg_fpstatus);
9193             tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9194             break;
9195         default:
9196             g_assert_not_reached();
9197         }
9198         break;
9199 
9200     default:
9201         g_assert_not_reached();
9202     }
9203 
9204     gen_restore_rmode(tcg_rmode, tcg_fpstatus);
9205 }
9206 
do_fcvt_g(DisasContext * s,arg_fcvt * a,ARMFPRounding rmode,bool is_signed)9207 static bool do_fcvt_g(DisasContext *s, arg_fcvt *a,
9208                       ARMFPRounding rmode, bool is_signed)
9209 {
9210     TCGv_i64 tcg_int;
9211     int check = fp_access_check_scalar_hsd(s, a->esz);
9212 
9213     if (check <= 0) {
9214         return check == 0;
9215     }
9216 
9217     tcg_int = cpu_reg(s, a->rd);
9218     do_fcvt_scalar(s, (a->sf ? MO_64 : MO_32) | (is_signed ? MO_SIGN : 0),
9219                    a->esz, tcg_int, a->shift, a->rn, rmode);
9220 
9221     if (!a->sf) {
9222         tcg_gen_ext32u_i64(tcg_int, tcg_int);
9223     }
9224     return true;
9225 }
9226 
TRANS(FCVTNS_g,do_fcvt_g,a,FPROUNDING_TIEEVEN,true)9227 TRANS(FCVTNS_g, do_fcvt_g, a, FPROUNDING_TIEEVEN, true)
9228 TRANS(FCVTNU_g, do_fcvt_g, a, FPROUNDING_TIEEVEN, false)
9229 TRANS(FCVTPS_g, do_fcvt_g, a, FPROUNDING_POSINF, true)
9230 TRANS(FCVTPU_g, do_fcvt_g, a, FPROUNDING_POSINF, false)
9231 TRANS(FCVTMS_g, do_fcvt_g, a, FPROUNDING_NEGINF, true)
9232 TRANS(FCVTMU_g, do_fcvt_g, a, FPROUNDING_NEGINF, false)
9233 TRANS(FCVTZS_g, do_fcvt_g, a, FPROUNDING_ZERO, true)
9234 TRANS(FCVTZU_g, do_fcvt_g, a, FPROUNDING_ZERO, false)
9235 TRANS(FCVTAS_g, do_fcvt_g, a, FPROUNDING_TIEAWAY, true)
9236 TRANS(FCVTAU_g, do_fcvt_g, a, FPROUNDING_TIEAWAY, false)
9237 
9238 /*
9239  * FCVT* (vector), scalar version.
9240  * Which sounds weird, but really just means output to fp register
9241  * instead of output to general register.  Input and output element
9242  * size are always equal.
9243  */
9244 static bool do_fcvt_f(DisasContext *s, arg_fcvt *a,
9245                       ARMFPRounding rmode, bool is_signed)
9246 {
9247     TCGv_i64 tcg_int;
9248     int check = fp_access_check_scalar_hsd(s, a->esz);
9249 
9250     if (check <= 0) {
9251         return check == 0;
9252     }
9253 
9254     tcg_int = tcg_temp_new_i64();
9255     do_fcvt_scalar(s, a->esz | (is_signed ? MO_SIGN : 0),
9256                    a->esz, tcg_int, a->shift, a->rn, rmode);
9257 
9258     if (!s->fpcr_nep) {
9259         clear_vec(s, a->rd);
9260     }
9261     write_vec_element(s, tcg_int, a->rd, 0, a->esz);
9262     return true;
9263 }
9264 
TRANS(FCVTNS_f,do_fcvt_f,a,FPROUNDING_TIEEVEN,true)9265 TRANS(FCVTNS_f, do_fcvt_f, a, FPROUNDING_TIEEVEN, true)
9266 TRANS(FCVTNU_f, do_fcvt_f, a, FPROUNDING_TIEEVEN, false)
9267 TRANS(FCVTPS_f, do_fcvt_f, a, FPROUNDING_POSINF, true)
9268 TRANS(FCVTPU_f, do_fcvt_f, a, FPROUNDING_POSINF, false)
9269 TRANS(FCVTMS_f, do_fcvt_f, a, FPROUNDING_NEGINF, true)
9270 TRANS(FCVTMU_f, do_fcvt_f, a, FPROUNDING_NEGINF, false)
9271 TRANS(FCVTZS_f, do_fcvt_f, a, FPROUNDING_ZERO, true)
9272 TRANS(FCVTZU_f, do_fcvt_f, a, FPROUNDING_ZERO, false)
9273 TRANS(FCVTAS_f, do_fcvt_f, a, FPROUNDING_TIEAWAY, true)
9274 TRANS(FCVTAU_f, do_fcvt_f, a, FPROUNDING_TIEAWAY, false)
9275 
9276 static bool trans_FJCVTZS(DisasContext *s, arg_FJCVTZS *a)
9277 {
9278     if (!dc_isar_feature(aa64_jscvt, s)) {
9279         return false;
9280     }
9281     if (fp_access_check(s)) {
9282         TCGv_i64 t = read_fp_dreg(s, a->rn);
9283         TCGv_ptr fpstatus = fpstatus_ptr(FPST_A64);
9284 
9285         gen_helper_fjcvtzs(t, t, fpstatus);
9286 
9287         tcg_gen_ext32u_i64(cpu_reg(s, a->rd), t);
9288         tcg_gen_extrh_i64_i32(cpu_ZF, t);
9289         tcg_gen_movi_i32(cpu_CF, 0);
9290         tcg_gen_movi_i32(cpu_NF, 0);
9291         tcg_gen_movi_i32(cpu_VF, 0);
9292     }
9293     return true;
9294 }
9295 
trans_FMOV_hx(DisasContext * s,arg_rr * a)9296 static bool trans_FMOV_hx(DisasContext *s, arg_rr *a)
9297 {
9298     if (!dc_isar_feature(aa64_fp16, s)) {
9299         return false;
9300     }
9301     if (fp_access_check(s)) {
9302         TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
9303         TCGv_i64 tmp = tcg_temp_new_i64();
9304         tcg_gen_ext16u_i64(tmp, tcg_rn);
9305         write_fp_dreg(s, a->rd, tmp);
9306     }
9307     return true;
9308 }
9309 
trans_FMOV_sw(DisasContext * s,arg_rr * a)9310 static bool trans_FMOV_sw(DisasContext *s, arg_rr *a)
9311 {
9312     if (fp_access_check(s)) {
9313         TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
9314         TCGv_i64 tmp = tcg_temp_new_i64();
9315         tcg_gen_ext32u_i64(tmp, tcg_rn);
9316         write_fp_dreg(s, a->rd, tmp);
9317     }
9318     return true;
9319 }
9320 
trans_FMOV_dx(DisasContext * s,arg_rr * a)9321 static bool trans_FMOV_dx(DisasContext *s, arg_rr *a)
9322 {
9323     if (fp_access_check(s)) {
9324         TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
9325         write_fp_dreg(s, a->rd, tcg_rn);
9326     }
9327     return true;
9328 }
9329 
trans_FMOV_ux(DisasContext * s,arg_rr * a)9330 static bool trans_FMOV_ux(DisasContext *s, arg_rr *a)
9331 {
9332     if (fp_access_check(s)) {
9333         TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
9334         tcg_gen_st_i64(tcg_rn, tcg_env, fp_reg_hi_offset(s, a->rd));
9335         clear_vec_high(s, true, a->rd);
9336     }
9337     return true;
9338 }
9339 
trans_FMOV_xh(DisasContext * s,arg_rr * a)9340 static bool trans_FMOV_xh(DisasContext *s, arg_rr *a)
9341 {
9342     if (!dc_isar_feature(aa64_fp16, s)) {
9343         return false;
9344     }
9345     if (fp_access_check(s)) {
9346         TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
9347         tcg_gen_ld16u_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_16));
9348     }
9349     return true;
9350 }
9351 
trans_FMOV_ws(DisasContext * s,arg_rr * a)9352 static bool trans_FMOV_ws(DisasContext *s, arg_rr *a)
9353 {
9354     if (fp_access_check(s)) {
9355         TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
9356         tcg_gen_ld32u_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_32));
9357     }
9358     return true;
9359 }
9360 
trans_FMOV_xd(DisasContext * s,arg_rr * a)9361 static bool trans_FMOV_xd(DisasContext *s, arg_rr *a)
9362 {
9363     if (fp_access_check(s)) {
9364         TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
9365         tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_64));
9366     }
9367     return true;
9368 }
9369 
trans_FMOV_xu(DisasContext * s,arg_rr * a)9370 static bool trans_FMOV_xu(DisasContext *s, arg_rr *a)
9371 {
9372     if (fp_access_check(s)) {
9373         TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
9374         tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_hi_offset(s, a->rn));
9375     }
9376     return true;
9377 }
9378 
9379 typedef struct ENVScalar1 {
9380     NeonGenOneOpEnvFn *gen_bhs[3];
9381     NeonGenOne64OpEnvFn *gen_d;
9382 } ENVScalar1;
9383 
do_env_scalar1(DisasContext * s,arg_rr_e * a,const ENVScalar1 * f)9384 static bool do_env_scalar1(DisasContext *s, arg_rr_e *a, const ENVScalar1 *f)
9385 {
9386     if (!fp_access_check(s)) {
9387         return true;
9388     }
9389     if (a->esz == MO_64) {
9390         TCGv_i64 t = read_fp_dreg(s, a->rn);
9391         f->gen_d(t, tcg_env, t);
9392         write_fp_dreg(s, a->rd, t);
9393     } else {
9394         TCGv_i32 t = tcg_temp_new_i32();
9395 
9396         read_vec_element_i32(s, t, a->rn, 0, a->esz);
9397         f->gen_bhs[a->esz](t, tcg_env, t);
9398         write_fp_sreg(s, a->rd, t);
9399     }
9400     return true;
9401 }
9402 
do_env_vector1(DisasContext * s,arg_qrr_e * a,const ENVScalar1 * f)9403 static bool do_env_vector1(DisasContext *s, arg_qrr_e *a, const ENVScalar1 *f)
9404 {
9405     if (a->esz == MO_64 && !a->q) {
9406         return false;
9407     }
9408     if (!fp_access_check(s)) {
9409         return true;
9410     }
9411     if (a->esz == MO_64) {
9412         TCGv_i64 t = tcg_temp_new_i64();
9413 
9414         for (int i = 0; i < 2; ++i) {
9415             read_vec_element(s, t, a->rn, i, MO_64);
9416             f->gen_d(t, tcg_env, t);
9417             write_vec_element(s, t, a->rd, i, MO_64);
9418         }
9419     } else {
9420         TCGv_i32 t = tcg_temp_new_i32();
9421         int n = (a->q ? 16 : 8) >> a->esz;
9422 
9423         for (int i = 0; i < n; ++i) {
9424             read_vec_element_i32(s, t, a->rn, i, a->esz);
9425             f->gen_bhs[a->esz](t, tcg_env, t);
9426             write_vec_element_i32(s, t, a->rd, i, a->esz);
9427         }
9428     }
9429     clear_vec_high(s, a->q, a->rd);
9430     return true;
9431 }
9432 
9433 static const ENVScalar1 f_scalar_sqabs = {
9434     { gen_helper_neon_qabs_s8,
9435       gen_helper_neon_qabs_s16,
9436       gen_helper_neon_qabs_s32 },
9437     gen_helper_neon_qabs_s64,
9438 };
9439 TRANS(SQABS_s, do_env_scalar1, a, &f_scalar_sqabs)
9440 TRANS(SQABS_v, do_env_vector1, a, &f_scalar_sqabs)
9441 
9442 static const ENVScalar1 f_scalar_sqneg = {
9443     { gen_helper_neon_qneg_s8,
9444       gen_helper_neon_qneg_s16,
9445       gen_helper_neon_qneg_s32 },
9446     gen_helper_neon_qneg_s64,
9447 };
9448 TRANS(SQNEG_s, do_env_scalar1, a, &f_scalar_sqneg)
9449 TRANS(SQNEG_v, do_env_vector1, a, &f_scalar_sqneg)
9450 
do_scalar1_d(DisasContext * s,arg_rr * a,ArithOneOp * f)9451 static bool do_scalar1_d(DisasContext *s, arg_rr *a, ArithOneOp *f)
9452 {
9453     if (fp_access_check(s)) {
9454         TCGv_i64 t = read_fp_dreg(s, a->rn);
9455         f(t, t);
9456         write_fp_dreg(s, a->rd, t);
9457     }
9458     return true;
9459 }
9460 
TRANS(ABS_s,do_scalar1_d,a,tcg_gen_abs_i64)9461 TRANS(ABS_s, do_scalar1_d, a, tcg_gen_abs_i64)
9462 TRANS(NEG_s, do_scalar1_d, a, tcg_gen_neg_i64)
9463 
9464 static bool do_cmop0_d(DisasContext *s, arg_rr *a, TCGCond cond)
9465 {
9466     if (fp_access_check(s)) {
9467         TCGv_i64 t = read_fp_dreg(s, a->rn);
9468         tcg_gen_negsetcond_i64(cond, t, t, tcg_constant_i64(0));
9469         write_fp_dreg(s, a->rd, t);
9470     }
9471     return true;
9472 }
9473 
TRANS(CMGT0_s,do_cmop0_d,a,TCG_COND_GT)9474 TRANS(CMGT0_s, do_cmop0_d, a, TCG_COND_GT)
9475 TRANS(CMGE0_s, do_cmop0_d, a, TCG_COND_GE)
9476 TRANS(CMLE0_s, do_cmop0_d, a, TCG_COND_LE)
9477 TRANS(CMLT0_s, do_cmop0_d, a, TCG_COND_LT)
9478 TRANS(CMEQ0_s, do_cmop0_d, a, TCG_COND_EQ)
9479 
9480 static bool do_2misc_narrow_scalar(DisasContext *s, arg_rr_e *a,
9481                                    ArithOneOp * const fn[3])
9482 {
9483     if (a->esz == MO_64) {
9484         return false;
9485     }
9486     if (fp_access_check(s)) {
9487         TCGv_i64 t = tcg_temp_new_i64();
9488 
9489         read_vec_element(s, t, a->rn, 0, a->esz + 1);
9490         fn[a->esz](t, t);
9491         clear_vec(s, a->rd);
9492         write_vec_element(s, t, a->rd, 0, a->esz);
9493     }
9494     return true;
9495 }
9496 
9497 #define WRAP_ENV(NAME) \
9498     static void gen_##NAME(TCGv_i64 d, TCGv_i64 n) \
9499     { gen_helper_##NAME(d, tcg_env, n); }
9500 
9501 WRAP_ENV(neon_unarrow_sat8)
9502 WRAP_ENV(neon_unarrow_sat16)
9503 WRAP_ENV(neon_unarrow_sat32)
9504 
9505 static ArithOneOp * const f_scalar_sqxtun[] = {
9506     gen_neon_unarrow_sat8,
9507     gen_neon_unarrow_sat16,
9508     gen_neon_unarrow_sat32,
9509 };
9510 TRANS(SQXTUN_s, do_2misc_narrow_scalar, a, f_scalar_sqxtun)
9511 
9512 WRAP_ENV(neon_narrow_sat_s8)
9513 WRAP_ENV(neon_narrow_sat_s16)
9514 WRAP_ENV(neon_narrow_sat_s32)
9515 
9516 static ArithOneOp * const f_scalar_sqxtn[] = {
9517     gen_neon_narrow_sat_s8,
9518     gen_neon_narrow_sat_s16,
9519     gen_neon_narrow_sat_s32,
9520 };
9521 TRANS(SQXTN_s, do_2misc_narrow_scalar, a, f_scalar_sqxtn)
9522 
9523 WRAP_ENV(neon_narrow_sat_u8)
9524 WRAP_ENV(neon_narrow_sat_u16)
9525 WRAP_ENV(neon_narrow_sat_u32)
9526 
9527 static ArithOneOp * const f_scalar_uqxtn[] = {
9528     gen_neon_narrow_sat_u8,
9529     gen_neon_narrow_sat_u16,
9530     gen_neon_narrow_sat_u32,
9531 };
TRANS(UQXTN_s,do_2misc_narrow_scalar,a,f_scalar_uqxtn)9532 TRANS(UQXTN_s, do_2misc_narrow_scalar, a, f_scalar_uqxtn)
9533 
9534 static bool trans_FCVTXN_s(DisasContext *s, arg_rr_e *a)
9535 {
9536     if (fp_access_check(s)) {
9537         /*
9538          * 64 bit to 32 bit float conversion
9539          * with von Neumann rounding (round to odd)
9540          */
9541         TCGv_i64 src = read_fp_dreg(s, a->rn);
9542         TCGv_i32 dst = tcg_temp_new_i32();
9543         gen_helper_fcvtx_f64_to_f32(dst, src, fpstatus_ptr(FPST_A64));
9544         write_fp_sreg_merging(s, a->rd, a->rd, dst);
9545     }
9546     return true;
9547 }
9548 
9549 #undef WRAP_ENV
9550 
do_gvec_fn2(DisasContext * s,arg_qrr_e * a,GVecGen2Fn * fn)9551 static bool do_gvec_fn2(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn)
9552 {
9553     if (!a->q && a->esz == MO_64) {
9554         return false;
9555     }
9556     if (fp_access_check(s)) {
9557         gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz);
9558     }
9559     return true;
9560 }
9561 
TRANS(ABS_v,do_gvec_fn2,a,tcg_gen_gvec_abs)9562 TRANS(ABS_v, do_gvec_fn2, a, tcg_gen_gvec_abs)
9563 TRANS(NEG_v, do_gvec_fn2, a, tcg_gen_gvec_neg)
9564 TRANS(NOT_v, do_gvec_fn2, a, tcg_gen_gvec_not)
9565 TRANS(CNT_v, do_gvec_fn2, a, gen_gvec_cnt)
9566 TRANS(RBIT_v, do_gvec_fn2, a, gen_gvec_rbit)
9567 TRANS(CMGT0_v, do_gvec_fn2, a, gen_gvec_cgt0)
9568 TRANS(CMGE0_v, do_gvec_fn2, a, gen_gvec_cge0)
9569 TRANS(CMLT0_v, do_gvec_fn2, a, gen_gvec_clt0)
9570 TRANS(CMLE0_v, do_gvec_fn2, a, gen_gvec_cle0)
9571 TRANS(CMEQ0_v, do_gvec_fn2, a, gen_gvec_ceq0)
9572 TRANS(REV16_v, do_gvec_fn2, a, gen_gvec_rev16)
9573 TRANS(REV32_v, do_gvec_fn2, a, gen_gvec_rev32)
9574 TRANS(URECPE_v, do_gvec_fn2, a, gen_gvec_urecpe)
9575 TRANS(URSQRTE_v, do_gvec_fn2, a, gen_gvec_ursqrte)
9576 
9577 static bool do_gvec_fn2_bhs(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn)
9578 {
9579     if (a->esz == MO_64) {
9580         return false;
9581     }
9582     if (fp_access_check(s)) {
9583         gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz);
9584     }
9585     return true;
9586 }
9587 
TRANS(CLS_v,do_gvec_fn2_bhs,a,gen_gvec_cls)9588 TRANS(CLS_v, do_gvec_fn2_bhs, a, gen_gvec_cls)
9589 TRANS(CLZ_v, do_gvec_fn2_bhs, a, gen_gvec_clz)
9590 TRANS(REV64_v, do_gvec_fn2_bhs, a, gen_gvec_rev64)
9591 TRANS(SADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_saddlp)
9592 TRANS(UADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_uaddlp)
9593 TRANS(SADALP_v, do_gvec_fn2_bhs, a, gen_gvec_sadalp)
9594 TRANS(UADALP_v, do_gvec_fn2_bhs, a, gen_gvec_uadalp)
9595 
9596 static bool do_2misc_narrow_vector(DisasContext *s, arg_qrr_e *a,
9597                                    ArithOneOp * const fn[3])
9598 {
9599     if (a->esz == MO_64) {
9600         return false;
9601     }
9602     if (fp_access_check(s)) {
9603         TCGv_i64 t0 = tcg_temp_new_i64();
9604         TCGv_i64 t1 = tcg_temp_new_i64();
9605 
9606         read_vec_element(s, t0, a->rn, 0, MO_64);
9607         read_vec_element(s, t1, a->rn, 1, MO_64);
9608         fn[a->esz](t0, t0);
9609         fn[a->esz](t1, t1);
9610         write_vec_element(s, t0, a->rd, a->q ? 2 : 0, MO_32);
9611         write_vec_element(s, t1, a->rd, a->q ? 3 : 1, MO_32);
9612         clear_vec_high(s, a->q, a->rd);
9613     }
9614     return true;
9615 }
9616 
9617 static ArithOneOp * const f_scalar_xtn[] = {
9618     gen_helper_neon_narrow_u8,
9619     gen_helper_neon_narrow_u16,
9620     tcg_gen_ext32u_i64,
9621 };
TRANS(XTN,do_2misc_narrow_vector,a,f_scalar_xtn)9622 TRANS(XTN, do_2misc_narrow_vector, a, f_scalar_xtn)
9623 TRANS(SQXTUN_v, do_2misc_narrow_vector, a, f_scalar_sqxtun)
9624 TRANS(SQXTN_v, do_2misc_narrow_vector, a, f_scalar_sqxtn)
9625 TRANS(UQXTN_v, do_2misc_narrow_vector, a, f_scalar_uqxtn)
9626 
9627 static void gen_fcvtn_hs(TCGv_i64 d, TCGv_i64 n)
9628 {
9629     TCGv_i32 tcg_lo = tcg_temp_new_i32();
9630     TCGv_i32 tcg_hi = tcg_temp_new_i32();
9631     TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
9632     TCGv_i32 ahp = get_ahp_flag();
9633 
9634     tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, n);
9635     gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp);
9636     gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp);
9637     tcg_gen_deposit_i32(tcg_lo, tcg_lo, tcg_hi, 16, 16);
9638     tcg_gen_extu_i32_i64(d, tcg_lo);
9639 }
9640 
gen_fcvtn_sd(TCGv_i64 d,TCGv_i64 n)9641 static void gen_fcvtn_sd(TCGv_i64 d, TCGv_i64 n)
9642 {
9643     TCGv_i32 tmp = tcg_temp_new_i32();
9644     TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
9645 
9646     gen_helper_vfp_fcvtsd(tmp, n, fpst);
9647     tcg_gen_extu_i32_i64(d, tmp);
9648 }
9649 
gen_fcvtxn_sd(TCGv_i64 d,TCGv_i64 n)9650 static void gen_fcvtxn_sd(TCGv_i64 d, TCGv_i64 n)
9651 {
9652     /*
9653      * 64 bit to 32 bit float conversion
9654      * with von Neumann rounding (round to odd)
9655      */
9656     TCGv_i32 tmp = tcg_temp_new_i32();
9657     gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_A64));
9658     tcg_gen_extu_i32_i64(d, tmp);
9659 }
9660 
9661 static ArithOneOp * const f_vector_fcvtn[] = {
9662     NULL,
9663     gen_fcvtn_hs,
9664     gen_fcvtn_sd,
9665 };
9666 static ArithOneOp * const f_scalar_fcvtxn[] = {
9667     NULL,
9668     NULL,
9669     gen_fcvtxn_sd,
9670 };
TRANS(FCVTN_v,do_2misc_narrow_vector,a,f_vector_fcvtn)9671 TRANS(FCVTN_v, do_2misc_narrow_vector, a, f_vector_fcvtn)
9672 TRANS(FCVTXN_v, do_2misc_narrow_vector, a, f_scalar_fcvtxn)
9673 
9674 static void gen_bfcvtn_hs(TCGv_i64 d, TCGv_i64 n)
9675 {
9676     TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
9677     TCGv_i32 tmp = tcg_temp_new_i32();
9678     gen_helper_bfcvt_pair(tmp, n, fpst);
9679     tcg_gen_extu_i32_i64(d, tmp);
9680 }
9681 
gen_bfcvtn_ah_hs(TCGv_i64 d,TCGv_i64 n)9682 static void gen_bfcvtn_ah_hs(TCGv_i64 d, TCGv_i64 n)
9683 {
9684     TCGv_ptr fpst = fpstatus_ptr(FPST_AH);
9685     TCGv_i32 tmp = tcg_temp_new_i32();
9686     gen_helper_bfcvt_pair(tmp, n, fpst);
9687     tcg_gen_extu_i32_i64(d, tmp);
9688 }
9689 
9690 static ArithOneOp * const f_vector_bfcvtn[2][3] = {
9691     {
9692         NULL,
9693         gen_bfcvtn_hs,
9694         NULL,
9695     }, {
9696         NULL,
9697         gen_bfcvtn_ah_hs,
9698         NULL,
9699     }
9700 };
TRANS_FEAT(BFCVTN_v,aa64_bf16,do_2misc_narrow_vector,a,f_vector_bfcvtn[s->fpcr_ah])9701 TRANS_FEAT(BFCVTN_v, aa64_bf16, do_2misc_narrow_vector, a,
9702            f_vector_bfcvtn[s->fpcr_ah])
9703 
9704 static bool trans_SHLL_v(DisasContext *s, arg_qrr_e *a)
9705 {
9706     static NeonGenWidenFn * const widenfns[3] = {
9707         gen_helper_neon_widen_u8,
9708         gen_helper_neon_widen_u16,
9709         tcg_gen_extu_i32_i64,
9710     };
9711     NeonGenWidenFn *widenfn;
9712     TCGv_i64 tcg_res[2];
9713     TCGv_i32 tcg_op;
9714     int part, pass;
9715 
9716     if (a->esz == MO_64) {
9717         return false;
9718     }
9719     if (!fp_access_check(s)) {
9720         return true;
9721     }
9722 
9723     tcg_op = tcg_temp_new_i32();
9724     widenfn = widenfns[a->esz];
9725     part = a->q ? 2 : 0;
9726 
9727     for (pass = 0; pass < 2; pass++) {
9728         read_vec_element_i32(s, tcg_op, a->rn, part + pass, MO_32);
9729         tcg_res[pass] = tcg_temp_new_i64();
9730         widenfn(tcg_res[pass], tcg_op);
9731         tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << a->esz);
9732     }
9733 
9734     for (pass = 0; pass < 2; pass++) {
9735         write_vec_element(s, tcg_res[pass], a->rd, pass, MO_64);
9736     }
9737     return true;
9738 }
9739 
do_fabs_fneg_v(DisasContext * s,arg_qrr_e * a,GVecGen2Fn * fn)9740 static bool do_fabs_fneg_v(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn)
9741 {
9742     int check = fp_access_check_vector_hsd(s, a->q, a->esz);
9743 
9744     if (check <= 0) {
9745         return check == 0;
9746     }
9747 
9748     gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz);
9749     return true;
9750 }
9751 
TRANS(FABS_v,do_fabs_fneg_v,a,gen_gvec_fabs)9752 TRANS(FABS_v, do_fabs_fneg_v, a, gen_gvec_fabs)
9753 TRANS(FNEG_v, do_fabs_fneg_v, a, gen_gvec_fneg)
9754 
9755 static bool do_fp1_vector(DisasContext *s, arg_qrr_e *a,
9756                           const FPScalar1 *f, int rmode)
9757 {
9758     TCGv_i32 tcg_rmode = NULL;
9759     TCGv_ptr fpst;
9760     int check = fp_access_check_vector_hsd(s, a->q, a->esz);
9761 
9762     if (check <= 0) {
9763         return check == 0;
9764     }
9765 
9766     fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
9767     if (rmode >= 0) {
9768         tcg_rmode = gen_set_rmode(rmode, fpst);
9769     }
9770 
9771     if (a->esz == MO_64) {
9772         TCGv_i64 t64 = tcg_temp_new_i64();
9773 
9774         for (int pass = 0; pass < 2; ++pass) {
9775             read_vec_element(s, t64, a->rn, pass, MO_64);
9776             f->gen_d(t64, t64, fpst);
9777             write_vec_element(s, t64, a->rd, pass, MO_64);
9778         }
9779     } else {
9780         TCGv_i32 t32 = tcg_temp_new_i32();
9781         void (*gen)(TCGv_i32, TCGv_i32, TCGv_ptr)
9782             = (a->esz == MO_16 ? f->gen_h : f->gen_s);
9783 
9784         for (int pass = 0, n = (a->q ? 16 : 8) >> a->esz; pass < n; ++pass) {
9785             read_vec_element_i32(s, t32, a->rn, pass, a->esz);
9786             gen(t32, t32, fpst);
9787             write_vec_element_i32(s, t32, a->rd, pass, a->esz);
9788         }
9789     }
9790     clear_vec_high(s, a->q, a->rd);
9791 
9792     if (rmode >= 0) {
9793         gen_restore_rmode(tcg_rmode, fpst);
9794     }
9795     return true;
9796 }
9797 
9798 TRANS(FSQRT_v, do_fp1_vector, a, &f_scalar_fsqrt, -1)
9799 
9800 TRANS(FRINTN_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_TIEEVEN)
9801 TRANS(FRINTP_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_POSINF)
9802 TRANS(FRINTM_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_NEGINF)
9803 TRANS(FRINTZ_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_ZERO)
9804 TRANS(FRINTA_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_TIEAWAY)
9805 TRANS(FRINTI_v, do_fp1_vector, a, &f_scalar_frint, -1)
9806 TRANS(FRINTX_v, do_fp1_vector, a, &f_scalar_frintx, -1)
9807 
9808 TRANS_FEAT(FRINT32Z_v, aa64_frint, do_fp1_vector, a,
9809            &f_scalar_frint32, FPROUNDING_ZERO)
9810 TRANS_FEAT(FRINT32X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint32, -1)
9811 TRANS_FEAT(FRINT64Z_v, aa64_frint, do_fp1_vector, a,
9812            &f_scalar_frint64, FPROUNDING_ZERO)
9813 TRANS_FEAT(FRINT64X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint64, -1)
9814 
do_gvec_op2_fpst_with_fpsttype(DisasContext * s,MemOp esz,bool is_q,int rd,int rn,int data,gen_helper_gvec_2_ptr * const fns[3],ARMFPStatusFlavour fpsttype)9815 static bool do_gvec_op2_fpst_with_fpsttype(DisasContext *s, MemOp esz,
9816                                            bool is_q, int rd, int rn, int data,
9817                                            gen_helper_gvec_2_ptr * const fns[3],
9818                                            ARMFPStatusFlavour fpsttype)
9819 {
9820     int check = fp_access_check_vector_hsd(s, is_q, esz);
9821     TCGv_ptr fpst;
9822 
9823     if (check <= 0) {
9824         return check == 0;
9825     }
9826 
9827     fpst = fpstatus_ptr(fpsttype);
9828     tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
9829                        vec_full_reg_offset(s, rn), fpst,
9830                        is_q ? 16 : 8, vec_full_reg_size(s),
9831                        data, fns[esz - 1]);
9832     return true;
9833 }
9834 
do_gvec_op2_fpst(DisasContext * s,MemOp esz,bool is_q,int rd,int rn,int data,gen_helper_gvec_2_ptr * const fns[3])9835 static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q,
9836                              int rd, int rn, int data,
9837                              gen_helper_gvec_2_ptr * const fns[3])
9838 {
9839     return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data, fns,
9840                                           esz == MO_16 ? FPST_A64_F16 :
9841                                           FPST_A64);
9842 }
9843 
do_gvec_op2_ah_fpst(DisasContext * s,MemOp esz,bool is_q,int rd,int rn,int data,gen_helper_gvec_2_ptr * const fns[3])9844 static bool do_gvec_op2_ah_fpst(DisasContext *s, MemOp esz, bool is_q,
9845                                 int rd, int rn, int data,
9846                                 gen_helper_gvec_2_ptr * const fns[3])
9847 {
9848     return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data,
9849                                           fns, select_ah_fpst(s, esz));
9850 }
9851 
9852 static gen_helper_gvec_2_ptr * const f_scvtf_v[] = {
9853     gen_helper_gvec_vcvt_sh,
9854     gen_helper_gvec_vcvt_sf,
9855     gen_helper_gvec_vcvt_sd,
9856 };
9857 TRANS(SCVTF_vi, do_gvec_op2_fpst,
9858       a->esz, a->q, a->rd, a->rn, 0, f_scvtf_v)
9859 TRANS(SCVTF_vf, do_gvec_op2_fpst,
9860       a->esz, a->q, a->rd, a->rn, a->shift, f_scvtf_v)
9861 
9862 static gen_helper_gvec_2_ptr * const f_ucvtf_v[] = {
9863     gen_helper_gvec_vcvt_uh,
9864     gen_helper_gvec_vcvt_uf,
9865     gen_helper_gvec_vcvt_ud,
9866 };
9867 TRANS(UCVTF_vi, do_gvec_op2_fpst,
9868       a->esz, a->q, a->rd, a->rn, 0, f_ucvtf_v)
9869 TRANS(UCVTF_vf, do_gvec_op2_fpst,
9870       a->esz, a->q, a->rd, a->rn, a->shift, f_ucvtf_v)
9871 
9872 static gen_helper_gvec_2_ptr * const f_fcvtzs_vf[] = {
9873     gen_helper_gvec_vcvt_rz_hs,
9874     gen_helper_gvec_vcvt_rz_fs,
9875     gen_helper_gvec_vcvt_rz_ds,
9876 };
9877 TRANS(FCVTZS_vf, do_gvec_op2_fpst,
9878       a->esz, a->q, a->rd, a->rn, a->shift, f_fcvtzs_vf)
9879 
9880 static gen_helper_gvec_2_ptr * const f_fcvtzu_vf[] = {
9881     gen_helper_gvec_vcvt_rz_hu,
9882     gen_helper_gvec_vcvt_rz_fu,
9883     gen_helper_gvec_vcvt_rz_du,
9884 };
9885 TRANS(FCVTZU_vf, do_gvec_op2_fpst,
9886       a->esz, a->q, a->rd, a->rn, a->shift, f_fcvtzu_vf)
9887 
9888 static gen_helper_gvec_2_ptr * const f_fcvt_s_vi[] = {
9889     gen_helper_gvec_vcvt_rm_sh,
9890     gen_helper_gvec_vcvt_rm_ss,
9891     gen_helper_gvec_vcvt_rm_sd,
9892 };
9893 
9894 static gen_helper_gvec_2_ptr * const f_fcvt_u_vi[] = {
9895     gen_helper_gvec_vcvt_rm_uh,
9896     gen_helper_gvec_vcvt_rm_us,
9897     gen_helper_gvec_vcvt_rm_ud,
9898 };
9899 
9900 TRANS(FCVTNS_vi, do_gvec_op2_fpst,
9901       a->esz, a->q, a->rd, a->rn, float_round_nearest_even, f_fcvt_s_vi)
9902 TRANS(FCVTNU_vi, do_gvec_op2_fpst,
9903       a->esz, a->q, a->rd, a->rn, float_round_nearest_even, f_fcvt_u_vi)
9904 TRANS(FCVTPS_vi, do_gvec_op2_fpst,
9905       a->esz, a->q, a->rd, a->rn, float_round_up, f_fcvt_s_vi)
9906 TRANS(FCVTPU_vi, do_gvec_op2_fpst,
9907       a->esz, a->q, a->rd, a->rn, float_round_up, f_fcvt_u_vi)
9908 TRANS(FCVTMS_vi, do_gvec_op2_fpst,
9909       a->esz, a->q, a->rd, a->rn, float_round_down, f_fcvt_s_vi)
9910 TRANS(FCVTMU_vi, do_gvec_op2_fpst,
9911       a->esz, a->q, a->rd, a->rn, float_round_down, f_fcvt_u_vi)
9912 TRANS(FCVTZS_vi, do_gvec_op2_fpst,
9913       a->esz, a->q, a->rd, a->rn, float_round_to_zero, f_fcvt_s_vi)
9914 TRANS(FCVTZU_vi, do_gvec_op2_fpst,
9915       a->esz, a->q, a->rd, a->rn, float_round_to_zero, f_fcvt_u_vi)
9916 TRANS(FCVTAS_vi, do_gvec_op2_fpst,
9917       a->esz, a->q, a->rd, a->rn, float_round_ties_away, f_fcvt_s_vi)
9918 TRANS(FCVTAU_vi, do_gvec_op2_fpst,
9919       a->esz, a->q, a->rd, a->rn, float_round_ties_away, f_fcvt_u_vi)
9920 
9921 static gen_helper_gvec_2_ptr * const f_fceq0[] = {
9922     gen_helper_gvec_fceq0_h,
9923     gen_helper_gvec_fceq0_s,
9924     gen_helper_gvec_fceq0_d,
9925 };
9926 TRANS(FCMEQ0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fceq0)
9927 
9928 static gen_helper_gvec_2_ptr * const f_fcgt0[] = {
9929     gen_helper_gvec_fcgt0_h,
9930     gen_helper_gvec_fcgt0_s,
9931     gen_helper_gvec_fcgt0_d,
9932 };
9933 TRANS(FCMGT0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcgt0)
9934 
9935 static gen_helper_gvec_2_ptr * const f_fcge0[] = {
9936     gen_helper_gvec_fcge0_h,
9937     gen_helper_gvec_fcge0_s,
9938     gen_helper_gvec_fcge0_d,
9939 };
9940 TRANS(FCMGE0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcge0)
9941 
9942 static gen_helper_gvec_2_ptr * const f_fclt0[] = {
9943     gen_helper_gvec_fclt0_h,
9944     gen_helper_gvec_fclt0_s,
9945     gen_helper_gvec_fclt0_d,
9946 };
9947 TRANS(FCMLT0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fclt0)
9948 
9949 static gen_helper_gvec_2_ptr * const f_fcle0[] = {
9950     gen_helper_gvec_fcle0_h,
9951     gen_helper_gvec_fcle0_s,
9952     gen_helper_gvec_fcle0_d,
9953 };
9954 TRANS(FCMLE0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcle0)
9955 
9956 static gen_helper_gvec_2_ptr * const f_frecpe[] = {
9957     gen_helper_gvec_frecpe_h,
9958     gen_helper_gvec_frecpe_s,
9959     gen_helper_gvec_frecpe_d,
9960 };
9961 static gen_helper_gvec_2_ptr * const f_frecpe_rpres[] = {
9962     gen_helper_gvec_frecpe_h,
9963     gen_helper_gvec_frecpe_rpres_s,
9964     gen_helper_gvec_frecpe_d,
9965 };
9966 TRANS(FRECPE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0,
9967       s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frecpe_rpres : f_frecpe)
9968 
9969 static gen_helper_gvec_2_ptr * const f_frsqrte[] = {
9970     gen_helper_gvec_frsqrte_h,
9971     gen_helper_gvec_frsqrte_s,
9972     gen_helper_gvec_frsqrte_d,
9973 };
9974 static gen_helper_gvec_2_ptr * const f_frsqrte_rpres[] = {
9975     gen_helper_gvec_frsqrte_h,
9976     gen_helper_gvec_frsqrte_rpres_s,
9977     gen_helper_gvec_frsqrte_d,
9978 };
9979 TRANS(FRSQRTE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0,
9980       s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frsqrte_rpres : f_frsqrte)
9981 
trans_FCVTL_v(DisasContext * s,arg_qrr_e * a)9982 static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a)
9983 {
9984     /* Handle 2-reg-misc ops which are widening (so each size element
9985      * in the source becomes a 2*size element in the destination.
9986      * The only instruction like this is FCVTL.
9987      */
9988     int pass;
9989     TCGv_ptr fpst;
9990 
9991     if (!fp_access_check(s)) {
9992         return true;
9993     }
9994 
9995     if (a->esz == MO_64) {
9996         /* 32 -> 64 bit fp conversion */
9997         TCGv_i64 tcg_res[2];
9998         TCGv_i32 tcg_op = tcg_temp_new_i32();
9999         int srcelt = a->q ? 2 : 0;
10000 
10001         fpst = fpstatus_ptr(FPST_A64);
10002 
10003         for (pass = 0; pass < 2; pass++) {
10004             tcg_res[pass] = tcg_temp_new_i64();
10005             read_vec_element_i32(s, tcg_op, a->rn, srcelt + pass, MO_32);
10006             gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, fpst);
10007         }
10008         for (pass = 0; pass < 2; pass++) {
10009             write_vec_element(s, tcg_res[pass], a->rd, pass, MO_64);
10010         }
10011     } else {
10012         /* 16 -> 32 bit fp conversion */
10013         int srcelt = a->q ? 4 : 0;
10014         TCGv_i32 tcg_res[4];
10015         TCGv_i32 ahp = get_ahp_flag();
10016 
10017         fpst = fpstatus_ptr(FPST_A64_F16);
10018 
10019         for (pass = 0; pass < 4; pass++) {
10020             tcg_res[pass] = tcg_temp_new_i32();
10021             read_vec_element_i32(s, tcg_res[pass], a->rn, srcelt + pass, MO_16);
10022             gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
10023                                            fpst, ahp);
10024         }
10025         for (pass = 0; pass < 4; pass++) {
10026             write_vec_element_i32(s, tcg_res[pass], a->rd, pass, MO_32);
10027         }
10028     }
10029     clear_vec_high(s, true, a->rd);
10030     return true;
10031 }
10032 
trans_OK(DisasContext * s,arg_OK * a)10033 static bool trans_OK(DisasContext *s, arg_OK *a)
10034 {
10035     return true;
10036 }
10037 
trans_FAIL(DisasContext * s,arg_OK * a)10038 static bool trans_FAIL(DisasContext *s, arg_OK *a)
10039 {
10040     s->is_nonstreaming = true;
10041     return true;
10042 }
10043 
10044 /**
10045  * btype_destination_ok:
10046  * @insn: The instruction at the branch destination
10047  * @bt: SCTLR_ELx.BT
10048  * @btype: PSTATE.BTYPE, and is non-zero
10049  *
10050  * On a guarded page, there are a limited number of insns
10051  * that may be present at the branch target:
10052  *   - branch target identifiers,
10053  *   - paciasp, pacibsp,
10054  *   - BRK insn
10055  *   - HLT insn
10056  * Anything else causes a Branch Target Exception.
10057  *
10058  * Return true if the branch is compatible, false to raise BTITRAP.
10059  */
btype_destination_ok(uint32_t insn,bool bt,int btype)10060 static bool btype_destination_ok(uint32_t insn, bool bt, int btype)
10061 {
10062     if ((insn & 0xfffff01fu) == 0xd503201fu) {
10063         /* HINT space */
10064         switch (extract32(insn, 5, 7)) {
10065         case 0b011001: /* PACIASP */
10066         case 0b011011: /* PACIBSP */
10067             /*
10068              * If SCTLR_ELx.BT, then PACI*SP are not compatible
10069              * with btype == 3.  Otherwise all btype are ok.
10070              */
10071             return !bt || btype != 3;
10072         case 0b100000: /* BTI */
10073             /* Not compatible with any btype.  */
10074             return false;
10075         case 0b100010: /* BTI c */
10076             /* Not compatible with btype == 3 */
10077             return btype != 3;
10078         case 0b100100: /* BTI j */
10079             /* Not compatible with btype == 2 */
10080             return btype != 2;
10081         case 0b100110: /* BTI jc */
10082             /* Compatible with any btype.  */
10083             return true;
10084         }
10085     } else {
10086         switch (insn & 0xffe0001fu) {
10087         case 0xd4200000u: /* BRK */
10088         case 0xd4400000u: /* HLT */
10089             /* Give priority to the breakpoint exception.  */
10090             return true;
10091         }
10092     }
10093     return false;
10094 }
10095 
aarch64_tr_init_disas_context(DisasContextBase * dcbase,CPUState * cpu)10096 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
10097                                           CPUState *cpu)
10098 {
10099     DisasContext *dc = container_of(dcbase, DisasContext, base);
10100     CPUARMState *env = cpu_env(cpu);
10101     ARMCPU *arm_cpu = env_archcpu(env);
10102     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
10103     int bound, core_mmu_idx;
10104 
10105     dc->isar = &arm_cpu->isar;
10106     dc->condjmp = 0;
10107     dc->pc_save = dc->base.pc_first;
10108     dc->aarch64 = true;
10109     dc->thumb = false;
10110     dc->sctlr_b = 0;
10111     dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
10112     dc->condexec_mask = 0;
10113     dc->condexec_cond = 0;
10114     core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
10115     dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx);
10116     dc->tbii = EX_TBFLAG_A64(tb_flags, TBII);
10117     dc->tbid = EX_TBFLAG_A64(tb_flags, TBID);
10118     dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA);
10119     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
10120 #if !defined(CONFIG_USER_ONLY)
10121     dc->user = (dc->current_el == 0);
10122 #endif
10123     dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
10124     dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
10125     dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
10126     dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE);
10127     dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC);
10128     dc->trap_eret = EX_TBFLAG_A64(tb_flags, TRAP_ERET);
10129     dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL);
10130     dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL);
10131     dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16;
10132     dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16;
10133     dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE);
10134     dc->bt = EX_TBFLAG_A64(tb_flags, BT);
10135     dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE);
10136     dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV);
10137     dc->ata[0] = EX_TBFLAG_A64(tb_flags, ATA);
10138     dc->ata[1] = EX_TBFLAG_A64(tb_flags, ATA0);
10139     dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE);
10140     dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE);
10141     dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM);
10142     dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA);
10143     dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING);
10144     dc->naa = EX_TBFLAG_A64(tb_flags, NAA);
10145     dc->nv = EX_TBFLAG_A64(tb_flags, NV);
10146     dc->nv1 = EX_TBFLAG_A64(tb_flags, NV1);
10147     dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2);
10148     dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20);
10149     dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE);
10150     dc->fpcr_ah = EX_TBFLAG_A64(tb_flags, AH);
10151     dc->fpcr_nep = EX_TBFLAG_A64(tb_flags, NEP);
10152     dc->vec_len = 0;
10153     dc->vec_stride = 0;
10154     dc->cp_regs = arm_cpu->cp_regs;
10155     dc->features = env->features;
10156     dc->dcz_blocksize = arm_cpu->dcz_blocksize;
10157     dc->gm_blocksize = arm_cpu->gm_blocksize;
10158 
10159 #ifdef CONFIG_USER_ONLY
10160     /* In sve_probe_page, we assume TBI is enabled. */
10161     tcg_debug_assert(dc->tbid & 1);
10162 #endif
10163 
10164     dc->lse2 = dc_isar_feature(aa64_lse2, dc);
10165 
10166     /* Single step state. The code-generation logic here is:
10167      *  SS_ACTIVE == 0:
10168      *   generate code with no special handling for single-stepping (except
10169      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
10170      *   this happens anyway because those changes are all system register or
10171      *   PSTATE writes).
10172      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
10173      *   emit code for one insn
10174      *   emit code to clear PSTATE.SS
10175      *   emit code to generate software step exception for completed step
10176      *   end TB (as usual for having generated an exception)
10177      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
10178      *   emit code to generate a software step exception
10179      *   end the TB
10180      */
10181     dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
10182     dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
10183     dc->is_ldex = false;
10184 
10185     /* Bound the number of insns to execute to those left on the page.  */
10186     bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
10187 
10188     /* If architectural single step active, limit to 1.  */
10189     if (dc->ss_active) {
10190         bound = 1;
10191     }
10192     dc->base.max_insns = MIN(dc->base.max_insns, bound);
10193 }
10194 
aarch64_tr_tb_start(DisasContextBase * db,CPUState * cpu)10195 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
10196 {
10197 }
10198 
aarch64_tr_insn_start(DisasContextBase * dcbase,CPUState * cpu)10199 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
10200 {
10201     DisasContext *dc = container_of(dcbase, DisasContext, base);
10202     target_ulong pc_arg = dc->base.pc_next;
10203 
10204     if (tb_cflags(dcbase->tb) & CF_PCREL) {
10205         pc_arg &= ~TARGET_PAGE_MASK;
10206     }
10207     tcg_gen_insn_start(pc_arg, 0, 0);
10208     dc->insn_start_updated = false;
10209 }
10210 
aarch64_tr_translate_insn(DisasContextBase * dcbase,CPUState * cpu)10211 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
10212 {
10213     DisasContext *s = container_of(dcbase, DisasContext, base);
10214     CPUARMState *env = cpu_env(cpu);
10215     uint64_t pc = s->base.pc_next;
10216     uint32_t insn;
10217 
10218     /* Singlestep exceptions have the highest priority. */
10219     if (s->ss_active && !s->pstate_ss) {
10220         /* Singlestep state is Active-pending.
10221          * If we're in this state at the start of a TB then either
10222          *  a) we just took an exception to an EL which is being debugged
10223          *     and this is the first insn in the exception handler
10224          *  b) debug exceptions were masked and we just unmasked them
10225          *     without changing EL (eg by clearing PSTATE.D)
10226          * In either case we're going to take a swstep exception in the
10227          * "did not step an insn" case, and so the syndrome ISV and EX
10228          * bits should be zero.
10229          */
10230         assert(s->base.num_insns == 1);
10231         gen_swstep_exception(s, 0, 0);
10232         s->base.is_jmp = DISAS_NORETURN;
10233         s->base.pc_next = pc + 4;
10234         return;
10235     }
10236 
10237     if (pc & 3) {
10238         /*
10239          * PC alignment fault.  This has priority over the instruction abort
10240          * that we would receive from a translation fault via arm_ldl_code.
10241          * This should only be possible after an indirect branch, at the
10242          * start of the TB.
10243          */
10244         assert(s->base.num_insns == 1);
10245         gen_helper_exception_pc_alignment(tcg_env, tcg_constant_tl(pc));
10246         s->base.is_jmp = DISAS_NORETURN;
10247         s->base.pc_next = QEMU_ALIGN_UP(pc, 4);
10248         return;
10249     }
10250 
10251     s->pc_curr = pc;
10252     insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b);
10253     s->insn = insn;
10254     s->base.pc_next = pc + 4;
10255 
10256     s->fp_access_checked = 0;
10257     s->sve_access_checked = 0;
10258 
10259     if (s->pstate_il) {
10260         /*
10261          * Illegal execution state. This has priority over BTI
10262          * exceptions, but comes after instruction abort exceptions.
10263          */
10264         gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
10265         return;
10266     }
10267 
10268     if (dc_isar_feature(aa64_bti, s)) {
10269         if (s->base.num_insns == 1) {
10270             /* First insn can have btype set to non-zero.  */
10271             tcg_debug_assert(s->btype >= 0);
10272 
10273             /*
10274              * Note that the Branch Target Exception has fairly high
10275              * priority -- below debugging exceptions but above most
10276              * everything else.  This allows us to handle this now
10277              * instead of waiting until the insn is otherwise decoded.
10278              *
10279              * We can check all but the guarded page check here;
10280              * defer the latter to a helper.
10281              */
10282             if (s->btype != 0
10283                 && !btype_destination_ok(insn, s->bt, s->btype)) {
10284                 gen_helper_guarded_page_check(tcg_env);
10285             }
10286         } else {
10287             /* Not the first insn: btype must be 0.  */
10288             tcg_debug_assert(s->btype == 0);
10289         }
10290     }
10291 
10292     s->is_nonstreaming = false;
10293     if (s->sme_trap_nonstreaming) {
10294         disas_sme_fa64(s, insn);
10295     }
10296 
10297     if (!disas_a64(s, insn) &&
10298         !disas_sme(s, insn) &&
10299         !disas_sve(s, insn)) {
10300         unallocated_encoding(s);
10301     }
10302 
10303     /*
10304      * After execution of most insns, btype is reset to 0.
10305      * Note that we set btype == -1 when the insn sets btype.
10306      */
10307     if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) {
10308         reset_btype(s);
10309     }
10310 }
10311 
aarch64_tr_tb_stop(DisasContextBase * dcbase,CPUState * cpu)10312 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
10313 {
10314     DisasContext *dc = container_of(dcbase, DisasContext, base);
10315 
10316     if (unlikely(dc->ss_active)) {
10317         /* Note that this means single stepping WFI doesn't halt the CPU.
10318          * For conditional branch insns this is harmless unreachable code as
10319          * gen_goto_tb() has already handled emitting the debug exception
10320          * (and thus a tb-jump is not possible when singlestepping).
10321          */
10322         switch (dc->base.is_jmp) {
10323         default:
10324             gen_a64_update_pc(dc, 4);
10325             /* fall through */
10326         case DISAS_EXIT:
10327         case DISAS_JUMP:
10328             gen_step_complete_exception(dc);
10329             break;
10330         case DISAS_NORETURN:
10331             break;
10332         }
10333     } else {
10334         switch (dc->base.is_jmp) {
10335         case DISAS_NEXT:
10336         case DISAS_TOO_MANY:
10337             gen_goto_tb(dc, 1, 4);
10338             break;
10339         default:
10340         case DISAS_UPDATE_EXIT:
10341             gen_a64_update_pc(dc, 4);
10342             /* fall through */
10343         case DISAS_EXIT:
10344             tcg_gen_exit_tb(NULL, 0);
10345             break;
10346         case DISAS_UPDATE_NOCHAIN:
10347             gen_a64_update_pc(dc, 4);
10348             /* fall through */
10349         case DISAS_JUMP:
10350             tcg_gen_lookup_and_goto_ptr();
10351             break;
10352         case DISAS_NORETURN:
10353         case DISAS_SWI:
10354             break;
10355         case DISAS_WFE:
10356             gen_a64_update_pc(dc, 4);
10357             gen_helper_wfe(tcg_env);
10358             break;
10359         case DISAS_YIELD:
10360             gen_a64_update_pc(dc, 4);
10361             gen_helper_yield(tcg_env);
10362             break;
10363         case DISAS_WFI:
10364             /*
10365              * This is a special case because we don't want to just halt
10366              * the CPU if trying to debug across a WFI.
10367              */
10368             gen_a64_update_pc(dc, 4);
10369             gen_helper_wfi(tcg_env, tcg_constant_i32(4));
10370             /*
10371              * The helper doesn't necessarily throw an exception, but we
10372              * must go back to the main loop to check for interrupts anyway.
10373              */
10374             tcg_gen_exit_tb(NULL, 0);
10375             break;
10376         }
10377     }
10378 }
10379 
10380 const TranslatorOps aarch64_translator_ops = {
10381     .init_disas_context = aarch64_tr_init_disas_context,
10382     .tb_start           = aarch64_tr_tb_start,
10383     .insn_start         = aarch64_tr_insn_start,
10384     .translate_insn     = aarch64_tr_translate_insn,
10385     .tb_stop            = aarch64_tr_tb_stop,
10386 };
10387